Docker-in-Docker (DinD) capabilities of public runners deactivated. More info

Commit 60b70d49 authored by Clément Haëck's avatar Clément Haëck
Browse files

Upgrade xarray_regex to filefinder

parent a68107d3
......@@ -4,7 +4,7 @@ import logging
import numpy as np
import xarray as xr
from xarray_regex import FileFinder, library
from filefinder import Finder, library
import lib.data.images
from lib import root_data, get_args
......@@ -27,8 +27,8 @@ def merge_pigments(args):
odir = path.join(root_data, args['region'], 'SOM', 'Pig',
'{:d}days'.format(1), *args['date_str'][:2])
idir = path.join(odir, 'tmp')
pregex = r'%(prefix)_%(time:x)_image_%(image:custom=\d\d\d:)\.nc%(var:char)\.nc'
finder = FileFinder(idir, pregex, prefix='Pig_BMUS_A')
pregex = r'%(prefix)_%(time:x)_image_%(image:fmt=03d).nc%(var:char).nc'
finder = Finder(idir, pregex, prefix='Pig_BMUS_A')
files = finder.get_files()
log.info('found %d pigment files', len(files))
......
......@@ -5,7 +5,7 @@ import matplotlib.pyplot as plt
import numpy as np
import tol_colors as tc
import xarray as xr
from xarray_regex.library import get_date
from filefinder.library import get_date
from lib import progressbar
import lib.data.images
......@@ -123,8 +123,7 @@ if __name__ == '__main__':
images_day = lib.data.images.regroup_by_date(images_all)
finder = lib.data.hi.get_finder(args)
hi_files = {get_date(finder.get_matches(f, False)): f
for f in finder.get_files()}
hi_files = {get_date(m): f for f, m in finder.files}
zone = lib.zones.zones['gyre']
......
......@@ -3,7 +3,7 @@ import logging
import subprocess
import tempfile
from xarray_regex import library
from filefinder.library import get_date
from lib import get_args, check_output_dir
import lib.data.modis
......@@ -66,7 +66,7 @@ def write_filelist(args, filename):
with open(filename, 'w') as filelist:
filelist.write("{}\n".format(len(files)))
for f in files:
date = library.get_date(finder.get_matches(f, relative=False))
date = get_date(finder.get_matches(f, relative=False))
filelist.write("{}\n".format(f))
filelist.write("{}\n".format(lib.data.images.nc2ext(f, 'txt')))
filelist.write("{}\n".format(date.strftime('%Y-%m-%d')))
......
......@@ -5,7 +5,7 @@ import warnings
import numpy as np
import pandas as pd
import xarray as xr
from xarray_regex.library import get_date
from filefinder.library import get_date
from lib import get_args
......@@ -55,8 +55,7 @@ def main():
def get_files(finder):
files = {get_date(finder.get_matches(f, False)): f
for f in finder.get_files()}
files = {get_date(m): f for f, m in finder.files}
return files
......
......@@ -5,7 +5,7 @@ import matplotlib.pyplot as plt
import numpy as np
from shapely.geometry import Polygon, Point
import xarray as xr
from xarray_regex.library import get_date
from filefinder.library import get_date
import lib
import lib.data.ostia
......@@ -134,8 +134,8 @@ def write_parameters(args, finder, ds, cells):
with open('Compute/parameters.txt', 'w') as f:
f.write('# Files\n')
f.write('{}\n'.format(len(finder.get_files())))
for ifile in finder.get_files():
date = get_date(finder.get_matches(ifile, False))
for ifile, matches in finder.files:
date = get_date(matches)
ofile = lib.data.jet_regions.get_filename(
args, Y=date.year, m=date.month, d=date.day)
f.write(ifile + '\n')
......
......@@ -9,7 +9,7 @@ from dask.distributed import Client
import numpy as np
import xarray as xr
import pandas as pd
from xarray_regex import FileFinder, library
from filefinder import Finder, library
from global_land_mask import globe
from lib import root_data, get_args, check_output_dir
......@@ -101,8 +101,8 @@ def get_dataset(data_type, args):
return ds
root = path.join(get_data_dir(data_type+'_MODIS', args), 'daily')
pregex = r'A_%(data_type)_%(x)\.nc'
finder = FileFinder(root, pregex, data_type=data_type)
pregex = r'A_%(data_type)_%(x).nc'
finder = Finder(root, pregex, data_type=data_type)
ds = xr.open_mfdataset(finder.get_files(), parallel=True,
preprocess=finder.get_func_process_filename(process))
......@@ -128,14 +128,14 @@ def get_files(data_type, args):
def get_finder(data_type, args):
if data_type == 'OC':
pregex = r'A%(Y)%(j)%(X)\.L2_LAC_OC\.nc'
pregex = r'A%(Y)%(j)%(X).L2_LAC_OC.nc'
elif data_type == 'SST':
pregex = r'AQUA_MODIS\.%(Y)%(m)%(d)T%(X)\.L2\.SST\.nc'
pregex = r'AQUA_MODIS.%(Y)%(m)%(d)T%(X).L2.SST.nc'
else:
raise KeyError
root = path.join(get_data_dir(data_type+'_MODIS', args), 'maps')
finder = FileFinder(root, pregex)
finder = Finder(root, pregex)
return finder
......
......@@ -6,7 +6,7 @@ import logging
import netCDF4 as nc
import xarray as xr
from xarray_regex import FileFinder
from filefinder import Finder
from lib import root_data, get_args, check_output_dir
from l2mapgen import getGeoExtent
......@@ -68,21 +68,18 @@ def l2_to_map(args):
fix_coords(ofile)
def get_l2_filelist(args) -> FileFinder:
def get_l2_filelist(args) -> Finder:
data_type = args['data_type']
if data_type == 'OC':
pregex = r'A%(Y)%(j)%(X)\.L2_LAC_OC\.nc'
pregex = r'A%(Y)%(j)%(X).L2_LAC_OC.nc'
elif data_type == 'SST':
pregex = r'AQUA_MODIS\.%(Y)%(m)%(d)T%(X)\.L2\.SST\.nc'
pregex = r'AQUA_MODIS.%(Y)%(m)%(d)T%(X).L2.SST.nc'
else:
raise KeyError(f"data_type not recognized ({data_type})")
swath_dir = path.join(get_data_dir(args), 'swaths')
finder = FileFinder(swath_dir, pregex)
if 'fix' in args:
for f in args['fix'].items():
finder.fix_matcher(*f)
finder.find_files()
finder = Finder(swath_dir, pregex)
finder.fix_matchers(args['fixes'])
return finder
......
......@@ -45,7 +45,6 @@ def download_data(args):
np.savetxt(link_file, np.array(links), fmt='%s')
finder = get_l2_filelist(args)
finder.create_regex()
# We try to download 10 times max. We start again if we do not have
# the same number of files found and downloaded.
......@@ -62,7 +61,6 @@ def download_data(args):
]
subprocess.call(clo)
finder.find_files()
if n_links == len(finder.get_files()):
log.info('successfully downloaded %d files', n_links)
break
......
......@@ -3,7 +3,7 @@ from datetime import datetime
from os import path
import xarray as xr
from xarray_regex.library import get_date
from filefinder.library import get_date
from lib import root_plot
import lib.data.images
......@@ -23,8 +23,7 @@ def get_odir(folder):
def get_data(image, finder, large=False, by_value=False):
files = {get_date(finder.get_matches(f, relative=False)): f
for f in finder.get_files()}
files = {get_date(m): f for f, m in finder.files}
ifile = files[image.date]
ds = xr.open_dataset(ifile)
if 'time' in ds.dims:
......
......@@ -27,5 +27,5 @@ root_data: /...
scipy
shapely
xarray
xarray-regex
filefinder
......@@ -80,7 +80,7 @@ def get_args(args, description='', add_args=None):
There can be any number of 'fix' arguments. Each is a couple of
key (int/str) and value (str). Each is appended to dictionnary of fixes
in args['fixes'].
See xarray_regex.FileFinder.fix_matchers().
See filefinder.Finder.fix_matchers().
add_args: callable
Function that take an argparse.ArgumentParser and add arguments to it.
Arguments can be overwritten.
......
......@@ -2,7 +2,7 @@
import logging
from typing import Any, Dict, Callable, List
from xarray_regex import FileFinder
from filefinder import Finder
import lib
......@@ -12,11 +12,11 @@ logging.basicConfig()
def get_finder_base(pregex: str, get_root: Callable,
args_names: List[str],
args=None, replace_defaults=None, **kwargs) -> FileFinder:
args=None, replace_defaults=None, **kwargs) -> Finder:
"""Get finder."""
args = process_args(args_names, args, replace_defaults, **kwargs)
root = get_root(args)
finder = FileFinder(root, pregex)
finder = Finder(root, pregex)
finder.fix_matchers(args['fixes'])
return finder
......
......@@ -12,7 +12,7 @@ ARGS_SEL = ['zone', 'data']
def get_finder(args=None, **kwargs):
pregex = r'ds_%(data:fmt=s)_%(zone:fmt=s)_%(Y)\.nc'
pregex = r'ds_%(data:fmt=s)_%(zone:fmt=s)_%(Y).nc'
finder = lib.data.get_finder_base(pregex, get_root, ARGS_DIR, **kwargs)
return finder
......
......@@ -26,9 +26,9 @@ def get_data(args=None, **kwargs):
def get_finder(args=None, **kwargs):
args = lib.data.process_args(ARGS, args, **kwargs)
if args['climato'] is None:
pregex = r"CHL_%(Y)%(m)%(d)\.nc"
pregex = r"CHL_%(Y)%(m)%(d).nc"
else:
pregex = r"{:d}_%(m)%d\.nc".format(args['climato'])
pregex = r"{:d}_%(m)%d.nc".format(args['climato'])
finder = lib.data.get_finder_base(pregex, get_root, ARGS, args)
return finder
......
......@@ -29,7 +29,7 @@ def get_data(args=None, **kwargs):
def get_finder(args=None, **kwargs):
pregex = r'HI_%(Y)%(m)%(d)\.nc'
pregex = r'HI_%(Y)%(m)%(d).nc'
finder = lib.data.get_finder_base(pregex, get_root, ARGS, args, **kwargs)
return finder
......
......@@ -18,14 +18,14 @@ def get_data(args=None, **kwargs):
def get_finder(args=None, **kwargs):
pregex = (r"number_%(number:fmt=d)/"
pregex = (r"number_%(number:fmt=d:rgx=%I)/"
r"scale_%(scale:fmt=.1f)/"
r"coef_%(coef:fmt=d)/"
r"hist_%(time:Y)"
r"_zone_%(zone:fmt=s)"
r"_thrmin_%(thrmin:fmt=.2f)"
r"-thrmax_%(thrmax:fmt=.2f)"
r"\.nc")
r".nc")
finder = lib.data.get_finder_base(pregex, get_root, ARGS_BASE, **kwargs)
return finder
......
......@@ -5,7 +5,7 @@ from os import path
import matplotlib.pyplot as plt
from xarray_regex import FileFinder
from filefinder import Finder
import lib
import lib.data
......@@ -128,7 +128,7 @@ def get_images_text_files(args=None, **kwargs):
if args['days'] == 1:
pregex = '%(m)/' + pregex
root = get_root(args)
finder = FileFinder(root, pregex)
finder = Finder(root, pregex)
finder.fix_matchers(args['fixes'])
return finder.get_files()
......
......@@ -6,7 +6,7 @@ import xarray as xr
import lib
import lib.data
ARGS_DIR = ['region', 'days']
ARGS_DIR = ['region', 'days', 'fixes']
ARGS_FIX = ['Y', 'm', 'd']
grid = '4km_EPSG4326'
......
......@@ -25,7 +25,7 @@ def get_data(args=None, **kwargs):
def get_finder(args=None, **kwargs):
pregex = r'%(m)/A_%(Y)%(time:m)%(d)\.nc'
pregex = r'%(m)/A_%(Y)%(time:m)%(d).nc'
finder = lib.data.get_finder_base(pregex, get_root, ARGS, args, **kwargs)
return finder
......
......@@ -6,7 +6,7 @@ https://podaac.jpl.nasa.gov/dataset/MUR-JPL-L4-GLOB-v4.1
from os import path
import xarray as xr
from xarray_regex import FileFinder
from filefinder import Finder
from lib import root_data
......@@ -21,10 +21,10 @@ def get_data(region='GS', days=1, year=2007, fixes=None):
def get_finder(region='GS', days=1, year=2007, fixes=None):
root = path.join(root_data, region, 'MUR',
'{:d}days'.format(days), str(year))
pregex = r'%(Y)%(m)%(d)%(time:X)-%(suffix)\.nc'
pregex = r'%(Y)%(m)%(d)%(time:X)-%(suffix).nc'
replacements = {
'suffix': r'JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02\.0-fv04\.1_subset'}
finder = FileFinder(root, pregex, **replacements)
'suffix': r'JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1_subset'}
finder = Finder(root, pregex, **replacements)
finder.fix_matchers(fixes)
return finder
......@@ -32,8 +32,8 @@ def get_finder(region='GS', days=1, year=2007, fixes=None):
def get_data_climato(region='GS', climato=1):
root = path.join(root_data,
'{:s}/MUR/climato'.format(region))
f = FileFinder(root, r'%(climato)_%(time:m)%(time:d)\.nc',
climato=str(climato))
f = Finder(root, r'%(climato)_%(time:m)%(time:d).nc',
climato=str(climato))
ds = xr.open_mfdataset(f.get_files(), parallel=True)
ds = ds.rename(analysed_sst='SST')
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment