Commit 3442e731 authored by Clément Haëck's avatar Clément Haëck
Browse files

Clean up

Remove some unused files
Readapt some older scripts. Minor refactoring
parent a1a8f2ef
......@@ -7,7 +7,7 @@ import xarray as xr
from filefinder import Finder, library
import lib.data.images
from lib import root_data, get_args
import lib
logging.basicConfig()
......@@ -15,16 +15,13 @@ log = logging.getLogger(__name__)
log.setLevel(logging.INFO)
def main():
args = get_args(['region', 'date', 'fixes'],
description="Merge pigments images in daily files.")
def main(args):
merge_pigments(args)
def merge_pigments(args):
log.info('finding pigments')
odir = path.join(root_data, args['region'], 'SOM', 'Pig',
odir = path.join(lib.root_data, args['region'], 'SOM', 'Pig',
'{:d}days'.format(1), *args['date_str'][:2])
idir = path.join(odir, 'tmp')
pregex = r'%(prefix)_%(time:x)_image_%(image:fmt=03d).nc%(var:char).nc'
......@@ -78,4 +75,5 @@ def merge_pigments(args):
if __name__ == '__main__':
main()
args = lib.get_args(['region', 'date', 'fixes'])
main(args)
import xarray as xr
import lib
import lib.data.hi
def main():
args = lib.get_args(['region', 'days', 'scale', 'number', 'fixes'])
finder = lib.data.hi.get_finder(args)
encoding = {}
encoding['S'] = dict(
dtype='int16', zlib=True, _FillValue=-999.,
scale_factor=0.003,
add_offset=98.304/2.
)
encoding['V'] = dict(
dtype='int16', zlib=True, _FillValue=-999.,
scale_factor=0.002,
)
encoding['B'] = dict(
dtype='int16', zlib=True, _FillValue=-999.,
scale_factor=0.002,
)
for ofile in finder.get_files():
ds = xr.open_dataset(ofile)
ds.load()
ds.close()
ds.to_netcdf(ofile, encoding=encoding)
if __name__ == '__main__':
main()
......@@ -6,9 +6,10 @@ import logging
import numpy as np
import xarray as xr
import lib
import lib.data.images
import lib.data.hi
from lib import root_data, compute_hi_f
from lib import compute_hi_f
from Compute.compute_hi import get_scale
......@@ -28,7 +29,7 @@ images_all = lib.data.images.get_images_list(region=region, year=year,
days=days, fixes=fixes)
images_grp = lib.data.images.regroup_by_file(images_all)
root = path.join(root_data,
root = path.join(lib.root_data,
'{:s}/HI/HI_{:.1f}_{:d}/HI/{:d}days/{:d}'.format(
region, scale, number, days, year))
......
"""Compute histograms of variables for ostia data"""
import numpy as np
import xarray as xr
import lib
import lib.box
import lib.data.hists
import lib.data.hi
import lib.data.globcolour
import lib.data.ostia
import lib.zones
VAR_RANGE = {
'sst': (-10, 40),
'CHL': (0, 3)
}
def main():
def add_args(parser):
parser.add_argument('-threshold', type=float, default=5.)
parser.add_argument('-nbins', type=int, default=3000)
parser.add_argument('-grid_file', type=str,
default='boxgrid_5.0_5.0')
args = lib.get_args(['region', 'year', 'days',
'scale', 'number', 'coef', 'fixes'],
add_args)
args['fixes']['Y'] = args['year']
# Retrieve datasets
print('Load HI', flush=True)
hi = lib.data.hi.get_data(args)
print('Load GC', flush=True)
gc = lib.data.globcolour.get_data(args)
print('Load SST', flush=True)
st = lib.data.ostia.get_data(args)
# Make sure the date format is the same
print('Align time', flush=True)
hi = lib.fix_time_daily(hi)
gc = lib.fix_time_daily(gc)
st = lib.fix_time_daily(st)
xr.align(*[hi, gc, st], join='exact', copy=False, exclude=['lon', 'lat'])
# Apply coef to find HI.
print('Apply HI coef', flush=True)
coef = lib.data.hi.get_coef(args)
hi['HI'] = lib.data.hi.apply_coef(hi, coef)
# Place HI on SST and GC grids
print('Merge HI on SST and GC grids', flush=True)
st = xr.merge([st, hi], join='exact')
gc['HI'] = st['HI'].interp(lon=gc.lon, lat=gc.lat, method='linear')
# Prepare histogram computation (set bins, ranges, apply_ufunc)
variables_hi = ['sst']
variables_gc = ['CHL']
boxes = lib.box.IndexBox.ReadGridText(lib.box.IndexBox.GetGridFile(args))
args['boxes'] = boxes
args['grid'] = [b.idx_str for b in boxes]
args['zone'] = args['grid_file']
h_st = prepare_ds(st, args, variables_hi)
h_gc = prepare_ds(gc, args, variables_gc)
compute_hist(st, h_st, variables_hi, args)
compute_hist(gc, h_gc, variables_gc, args)
# Merge all histograms in one dataset and write to disk
ds = xr.concat([h_st, h_gc], 'variable')
ds = ds.assign_coords(zone=[idx for idx in args['grid']])
# Write to disk
args['Y'] = args['year']
ofile = lib.data.hists.get_filename(args)
lib.check_output_dir(ofile, file=True)
ds.to_netcdf(ofile, encoding={v: {'zlib': True} for v in ds.data_vars})
return ds
def prepare_ds(ds, args, variables):
"""Set dimensions and coordinates for parameters and histograms."""
args_dims = ['threshold', 'scale', 'number', 'coef']
h = xr.Dataset(
coords=dict(**{c: [args[c]] for c in args_dims},
variable=variables,
mask=['frt', 'bkg'],
time=ds.time.copy())
)
# Add bins variable
bins = xr.DataArray(np.zeros((h['variable'].size, args['nbins']+1)),
dims=['variable', 'nbins_p'])
h['bins'] = bins
for var in variables:
h['bins'].loc[dict(variable=var)] = np.linspace(*VAR_RANGE[var],
bins.nbins_p.size)
# Add hist variable
hist_dims = args_dims + ['variable', 'mask', 'time']
hist = xr.DataArray(
np.zeros((*[h[c].size for c in hist_dims],
args['nbins'], len(args['grid']))),
dims=hist_dims + ['nbins', 'zone'])
h['hist'] = hist
return h
def compute_hist(ds, hist, variables, args):
"""Set up an apply_ufunc."""
ds['frt'] = ds['HI'] > hist['threshold'][0].values
slices = [b.get_isel(ds) for b in args['boxes']]
slices = [(slice(None), slc['lat'], slc['lon']) for slc in slices]
for var in variables:
hists = xr.apply_ufunc(
u_comp_hist, ds['frt'], ds[var],
input_core_dims=[['lat', 'lon'],
['lat', 'lon']],
output_core_dims=[['zone', 'nbins'], ['zone', 'nbins']],
dask='parallelized',
dask_gufunc_kwargs={'output_sizes': {'nbins': hist.nbins.size,
'zone': hist.zone.size}},
kwargs=dict(slices=slices,
range=VAR_RANGE[var],
bins=hist['nbins'].size)
)
for h, mask in zip(hists, ['frt', 'bkg']):
hist['hist'] = xr.where(
((hist.coords['mask'] == mask) &
(hist.coords['variable'] == var)),
h, hist['hist']
)
return hist
def u_comp_hist(mask, values, slices=None, **kwargs):
"""Compute histogram."""
def comp(values, mask):
sel = values[mask] # Apply mask
val = np.delete(sel, np.where(~np.isfinite(sel))) # Remove NaN
h, _ = np.histogram(val, **kwargs) # Compute hist
return h # Add dimension for vectorization
res_frt = []
res_bkg = []
for slc in slices:
res_frt.append(comp(values[slc], mask[slc]))
res_bkg.append(comp(values[slc], ~mask[slc]))
res_frt = np.expand_dims(np.array(res_frt), 0)
res_bkg = np.expand_dims(np.array(res_bkg), 0)
return res_frt, res_bkg
if __name__ == '__main__':
ds = main()
"""Compute histograms of variables for modis data."""
import warnings
import numpy as np
import pandas as pd
import xarray as xr
from filefinder.library import get_date
from lib import get_args
import lib.data.images
import lib.data.hi
import lib.data.hists
import lib.data.modis
import lib.zones
VAR_RANGE = {
'sst': (-10, 40),
'chl_ocx': (0, 3)
}
def main():
def add_args(parser):
parser.add_argument('-threshold_min', type=float, default=5.)
parser.add_argument('-threshold_max', type=float, default=6.)
parser.add_argument('-nbins', type=int, default=700)
args = get_args(['region', 'year', 'days',
'scale', 'number', 'coef',
'fixes', 'zone'],
add_args)
zone = lib.zones.zones[args['zone']]
coef = lib.data.hi.get_coef(args)
# Retrieve images
images_all = lib.data.images.get_images_list(args)
images_day = lib.data.images.regroup_by_date(images_all)
hi_files = get_files(lib.data.hi.get_finder(args))
mo_files = get_files(lib.data.modis.get_finder(args))
variables = ['sst', 'chl_ocx']
hist = prepare_ds(images_day, args, variables)
compute_hist(hist, hi_files, mo_files, images_day, coef, zone)
ofile = lib.data.hists.get_filename(args)
hist.to_netcdf(ofile, encoding={v: {'zlib': True} for v in hist.data_vars})
return hist
def get_files(finder):
files = {get_date(m): f for f, m in finder.files}
return files
def prepare_ds(images_day, args, variables):
"""Set dimensions and coordinates for parameters and histograms."""
args_dims = ['threshold_min', 'threshold_max', 'zone',
'scale', 'number', 'coef']
coords = {c: [(args[c])] for c in args_dims}
coords['variable'] = variables
coords['time'] = pd.to_datetime(list(images_day.keys()))
coords['mask'] = ['frt', 'bkg']
hist = xr.Dataset(coords={c: (c, v) for c, v in coords.items()})
bins = xr.DataArray(np.zeros((hist['variable'].size, args['nbins']+1)),
dims=['variable', 'nbins_p'])
hist['bins'] = bins
for var in variables:
hist['bins'].loc[dict(variable=var)] = np.linspace(*VAR_RANGE[var],
bins.nbins_p.size)
hist_dims = args_dims + ['variable', 'mask']
hist_a = xr.DataArray(
np.zeros((*[hist[c].size for c in hist_dims], args['nbins'])),
dims=hist_dims + ['nbins'])
hist['hist'] = hist_a
return hist
def compute_hist(hist, hi_files, mo_files, images_day, coef, zone):
nbins = hist.nbins.size
h_frt_day = np.zeros((hist.variable.size, nbins))
h_bkg_day = np.zeros((hist.variable.size, nbins))
for date, images in lib.progressbar(images_day.items(), size=30):
if date not in hi_files or date not in mo_files:
warnings.warn("%s image not in data" % date.strftime('%F'))
continue
mo = xr.open_dataset(mo_files[date])
hi = xr.open_dataset(hi_files[date])
h_frt_day[:] = 0
h_bkg_day[:] = 0
for image in images:
if not zone.intersects(image.lon_bounds, image.lat_bounds):
continue
mo_i = image.extract(mo, by_value=True)
hi_i = image.extract(hi, by_value=True)
hi_i['HI'] = lib.data.hi.apply_coef(hi_i, coef)
ds = xr.merge([mo_i, hi_i], join='inner')
mask_frt = ds['HI'] > hist['threshold_max'][0].values
mask_bkg = ds['HI'] < hist['threshold_min'][0].values
for i, var in enumerate(hist.variable.values):
h_frt = get_hist(ds, mask_frt, var, nbins)
h_bkg = get_hist(ds, mask_bkg, var, nbins)
h_frt_day[i, :] = h_frt_day[i, :] + h_frt
h_bkg_day[i, :] = h_bkg_day[i, :] + h_bkg
sel = (hist.coords['time'] == pd.to_datetime(date))
hist['hist'] = xr.where(sel & (hist.coords['mask'] == 'frt'),
h_frt_day, hist['hist'])
hist['hist'] = xr.where(sel & (hist.coords['mask'] == 'bkg'),
h_bkg_day, hist['hist'])
return hist
def get_hist(ds, mask, variable, nbins):
values = ds[variable].where(mask)
hist, _ = np.histogram(values.to_masked_array().compressed(),
range=VAR_RANGE[variable], bins=nbins)
return hist
if __name__ == '__main__':
# warnings.simplefilter("ignore")
warnings.filterwarnings("ignore", category=RuntimeWarning,
module=".*dask")
ds = main()
from os import path
import warnings
from filefinder.library import get_date
import numpy as np
import pandas as pd
import xarray as xr
import lib
import lib.data.hi
import lib.data.hists
import lib.data.images
import lib.data.SN_separation
import lib.data.modis
import lib.zones
VAR_RANGE = {
'sst': (-10, 40),
'chl_ocx': (0, 3)
}
def main():
def add_args(parser):
parser.add_argument('-threshold', type=float, default=5.)
parser.add_argument('-nbins', type=int, default=3000)
args = lib.get_args(['region', 'year', 'days',
'scale', 'number', 'coef',
'fixes', 'zone'],
add_args)
args['fixes']['Y'] = args['year']
images_all = lib.data.images.get_images_list(args)
images_day = lib.data.images.regroup_by_date(images_all)
hi_files = get_files(lib.data.hi.get_finder(args))
mo_files = get_files(lib.data.modis.get_finder(args))
# Get Zone
zone = lib.zones.get_data(args, grid=lib.data.modis.grid)['total'].persist()
land = lib.zones.get_land(args, grid=lib.data.modis.grid)['land_large'].persist()
# Get threshold
thr = lib.data.SN_separation.get_data(args)
thr = lib.fix_time_daily(thr)
threshold = lib.data.SN_separation.smooth(thr, 8)
coef = lib.data.hi.get_coef(args)
variables = ['sst', 'chl_ocx']
hist = prepare_ds(list(images_day.keys()), args, variables)
nbins = hist.nbins.size
h_frt_day = np.zeros((hist.variable.size, nbins))
h_bkg_day = np.zeros((hist.variable.size, nbins))
for date, images in lib.progressbar(images_day.items(), size=30):
if date not in hi_files or date not in mo_files:
warnings.warn("{} image not in data".format(date.strftime('%F')))
continue
mo = xr.open_dataset(mo_files[date])
hi = xr.open_dataset(hi_files[date])
hi = hi.isel(time=0)
sst_thr = threshold.sel(time=date).values
h_frt_day[:] = 0
h_bkg_day[:] = 0
for image in images:
mo_i = image.extract(mo, by_value=True)
hi_i = image.extract(hi, by_value=True)
zone_i = image.extract(zone, by_value=True)
land_i = image.extract(land, by_value=True)
hi_i['HI'] = lib.data.hi.apply_coef(hi_i, coef)
ds = xr.merge([mo_i, hi_i, zone_i], join='exact')
if args['zone'] == 'GS_S':
ds['HI'] = ds.HI.where(ds['sst'] > sst_thr)
elif args['zone'] == 'GS_N':
ds['HI'] = ds.HI.where(ds['sst'] < sst_thr)
else:
raise KeyError("Zone not supported")
mask = (ds['HI'] > hist['threshold'][0].values).persist()
zone_i = (zone_i*~land_i).persist()
for i, var in enumerate(variables):
h_frt = compute_hist(hist, ds, mask*zone_i, var)
h_bkg = compute_hist(hist, ds, ~mask*zone_i, var)
h_frt_day[i, :] = h_frt_day[i, :] + h_frt
h_bkg_day[i, :] = h_bkg_day[i, :] + h_bkg
sel = (hist.coords['time'] == pd.to_datetime(date))
hist['hist'] = xr.where(sel & (hist.coords['mask'] == 'frt'),
h_frt_day, hist['hist'])
hist['hist'] = xr.where(sel & (hist.coords['mask'] == 'bkg'),
h_bkg_day, hist['hist'])
args['Y'] = args['year']
ofile = lib.data.hists.get_filename(args)
lib.check_output_dir(ofile, file=True)
hist.to_netcdf(ofile, encoding={v: {'zlib': True} for v in hist.data_vars})
return hist
def prepare_ds(dates, args, variables):
"""Set dimensions and coordinates for parameters and histograms."""
args_dims = ['threshold', 'zone', 'scale', 'number', 'coef']
h = xr.Dataset(
coords=dict(**{c: [args[c]] for c in args_dims},
variable=variables,
mask=['frt', 'bkg'],
time=pd.to_datetime(dates))
)
# Add bins variable
bins = xr.DataArray(np.zeros((h['variable'].size, args['nbins']+1)),
dims=['variable', 'nbins_p'])
h['bins'] = bins
for var in variables:
h['bins'].loc[dict(variable=var)] = np.linspace(*VAR_RANGE[var],
bins.nbins_p.size)
# Add hist variable
hist_dims = args_dims + ['variable', 'mask', 'time']
hist = xr.DataArray(
np.zeros((*[h[c].size for c in hist_dims], args['nbins'])),
dims=hist_dims + ['nbins'])
h['hist'] = hist
return h
def compute_hist(hist, ds, mask, variable):
values = ds[variable].where(mask).to_masked_array().compressed()
h, _ = np.histogram(values, range=VAR_RANGE[variable],
bins=hist.nbins.size)
return h
def get_files(finder):
files = {get_date(m): path.join(finder.root, f) for f, m in finder.files}
return files
if __name__ == '__main__':
ds = main()
"""Compute histograms of variables for ostia data"""
from dask.diagnostics import ProgressBar
import numpy as np
import xarray as xr
import lib
import lib.data.hists
import lib.data.hi
import lib.data.globcolour
import lib.data.ostia
import lib.zones
VAR_RANGE = {
'sst': (-10, 40),
'CHL': (0, 3)
}
def main():
def add_args(parser):
parser.add_argument('-threshold', type=float, default=5.)
parser.add_argument('-nbins', type=int, default=3000)
args = lib.get_args(['region', 'year', 'days',
'scale', 'number', 'coef',
'fixes', 'zone'],