Commit 8c987a6a authored by Clément Haëck's avatar Clément Haëck
Browse files

Update modis related scripts

parent 7a50288e
from os import path
import lib
import lib.data.hi as lhi
import lib.data.modis as lmo
import lib.zones as lz
import Compute.coefs.compute_coef_ostia as cc
import matplotlib.pyplot as plt
import numpy as np
import tol_colors as tc
import xarray as xr
from filefinder.library import get_date
from lib import progressbar
import lib.data.images
import lib.data.hi
import lib.zones
REDUCE = False
Q_TARGET = 0.95
HI_TARGET = 9.5
def comp_hists(hi_files, images_day, zone, variables, coef=None):
hists = {v: np.zeros(n_bins) for v in variables}
bins = {}
for date, images in progressbar(images_day.items()):
if date not in hi_files:
continue
hi_file = hi_files[date]
def main(args):
lgr = lib.Logger("Getting data")
ds = lhi.get_data(args)
lgr.end()
hi = xr.open_dataset(hi_file)
if REDUCE:
print("DATASET REDUCED FOR TESTING")
ds = ds.isel(time=slice(None, None, 70))
for i, image in enumerate(images):
if not zone.intersects(image.lon_bounds, image.lat_bounds):
continue
hi_i = image.extract(hi, by_value=True)
for var in variables:
if var == 'HI':
hi_i['HI'] = lib.data.hi.apply_coef(hi_i, coef)
values = np.fabs(hi_i[var]).to_masked_array().compressed()
h, b = np.histogram(values, bins=n_bins, range=bounds[var],
density=False)
hists[var] = hists[var] + h
bins[var] = b
lgr.msg("Getting static masks")
args['grid'] = lmo.grid
zone = lz.get_data(args)['total']
land = lz.get_land(args)['land_large']
static = zone*~land
ds = ds.where(static)
lgr.end()
return hists, bins
coef = cc.get_components_coef(ds)
print('components:', coef)
coef_HI = cc.coef_hi(ds, coef)
print('HI:', coef_HI)
coef.update(coef_HI)
def components(hi_files, images_day, zone):
avgs = {}
stds = {}
hists, bins = comp_hists(hi_files, images_day, zone, ['S', 'V', 'B'])
for variable, hist in hists.items():
b = bins[variable]
if variable == 'B':
hist[0] = 0
s = cc.get_coef_json(coef)
print()
print(s)
hist /= np.sum(hist)
avg = get_avg(b, hist)
std = get_std(b, hist, avg)
bins[variable] = b
avgs[variable] = avg
stds[variable] = std
for variable, std in stds.items():
print('{}: {:.4f}'.format(variable, 1./std))
plot_components(bins, hists, avgs, stds)
return hists, bins
def get_avg(bins, hist):
return np.sum(hist * bins[:-1])
def get_std(bins, hist, avg):
return np.sqrt(np.sum(hist*(bins[:-1]-avg)**2))
def plot_components(bins, hists, avgs, stds):
fig, axes = plt.subplots(2, 2, figsize=(10, 8))
fig.subplots_adjust(left=0.05, bottom=0.05, right=0.98, top=0.98)
cset = tc.tol_cset('bright')
for var, ax in zip(['S', 'V', 'B'], axes.flat):
ax.plot(bins[var][:-1], hists[var])
ax.axvline(avgs[var], color=cset.red)
ax.axvspan(avgs[var]-2*stds[var],
avgs[var]+2*stds[var], color=cset.red, alpha=0.3)
def coef_hi(hi_files, images_day, zone, coef):
coef['HI'] = 1.
hist, bins = comp_hists(hi_files, images_day, zone, ['HI'], coef)
hist = hist['HI']
bins = bins['HI']
hist /= np.sum(hist)
mode_i = np.argmax(hist)
mode = bins[mode_i]
fig, ax = plt.subplots(figsize=(5, 5))
fig.subplots_adjust(left=0.05, bottom=0.05, right=0.98, top=0.98)
cset = tc.tol_cset('bright')
ax.plot(bins[:-1]/mode, hist)
ax.axvline(1., color=cset.red)
fig.savefig(path.join(lib.root_plot, 'coefs',
'hi_{:.1f}_{}_{}.png'.format(args['scale'],
args['number'],
args['coef'])))
print('HI: {:.4f}'.format(1./mode))
return hist, bins
return coef
if __name__ == '__main__':
args = lib.get_args(['region', 'days', 'year', 'fixes',
'scale', 'number', 'coef'])
images_all = lib.data.images.get_images_list(args)
images_day = lib.data.images.regroup_by_date(images_all)
finder = lib.data.hi.get_finder(args)
hi_files = {get_date(m): f for f, m in finder.files}
zone = lib.zones.zones['gyre']
bounds = {'V': [0., 5.],
'B': [0., 5.],
'S': [0., 10.],
'HI': [0., 20.]}
n_bins = 1000
# S, V, B
# hists, bins = components(hi_files, images_day, zone)
'scale', 'coef'])
args['number'] = 1
# HI
coef = lib.data.hi.get_coef(args)
hists, bins = coef_hi(hi_files, images_day, zone, coef)
coef = main(args)
......@@ -62,6 +62,13 @@ def main(args):
coef.update(coef_HI)
s = get_coef_json(coef)
print()
print(s)
return s
def get_coef_json(coef):
params = ['number', 'scale', 'coef']
s = '- parameters: {{{}}}'.format(
', '.join(['{}: {}'.format(p, args[p]) for p in params]))
......@@ -69,9 +76,6 @@ def main(args):
variables = ['S', 'V', 'B', 'HI']
s += ' value: {{{}}}'.format(
', '.join(['{}: {:.4f}'.format(v, coef[v]) for v in variables]))
print()
print(s)
return s
......
import itertools
import warnings
import dask_histogram as dh
import dask_histogram.boost as dhb
import numpy as np
import xarray as xr
import lib
import lib.box
import lib.data.modis as lmo
import lib.data.hi as lhi
import lib.data.hists as lh
import lib.zones as lz
import lib.data.images as limg
# import lib.data.SN_separation as lthr
# Bins extent
VAR_RANGE = dict(
sst=[-5., 40.],
# chl_ocx=[5e-3, 20.]
chl_ocx=[0, 3.]
)
# Width of SST bins
SST_NBINS = int((VAR_RANGE['sst'][1]-VAR_RANGE['sst'][0])/0.1)
# Number of Chl bins
CHL_NBINS = 600
VARS = ['chl_ocx', 'sst']
MASKS = ['frt', 'bkg']
MASKS_VAR = ['mask_' + m for m in MASKS]
INFILE_PARAMS = ['threshold', 'scale', 'number', 'coef', 'zone']
def main(args):
lgr = lib.Logger("Fetching images")
images_all = limg.get_images_list(args)
images_day = limg.regroup_by_date(images_all)
lgr.msg("Getting datafiles")
hi_files = limg.get_files(lhi.get_finder(args))
mo_files = limg.get_files(lmo.get_finder(args))
lgr.msg("Getting static masks")
args['grid'] = lmo.grid
zone = lz.get_data(args)['total'].persist()
land = lz.get_land(args)['land_large'].persist()
static = zone*~land
static = static.rename('static').reset_coords(drop=True)
# thr = lthr.smooth(lthr.get_data(args))
# thr = lib.fix_time_daily(thr)
coef = lhi.get_coef(args)
lgr.msg("Creating histograms")
axis = dict(
chl_ocx=dh.axis.Regular(CHL_NBINS, *VAR_RANGE['chl_ocx']),
# transform=dh.axis.transform.log),
sst=dh.axis.Regular(SST_NBINS, *VAR_RANGE['sst'])
)
hists_bh = {v: {m: dhb.Histogram(axis[v]) for m in MASKS} for v in VARS}
hists_xr = {v: {m: [] for m in MASKS} for v in VARS}
lgr.msg("Setting up fills")
for date, images in images_day.items():
lgr.progress()
if date not in hi_files or date not in mo_files:
warnings.warn("{} image not in data".format(date.strftime('%F')))
continue
for v, m in itertools.product(VARS, MASKS):
hists_bh[v][m].reset()
mo = xr.open_dataset(mo_files[date])
hi = xr.open_dataset(hi_files[date])
hi = hi.isel(time=0)
ds = xr.merge([mo, hi, static], join='exact')
# thr_date = float(thr.sel(time=date).values)
for image in images:
if image.lat_min > 32:
continue
image.lat_max = min(image.lat_max, 32)
ds_i = image.extract(ds, by_value=True)
ds_i['HI'] = lhi.apply_coef(ds_i, coef)
ds_i['HI'] = ds_i.HI.where(ds_i.static)
mask = dict(
frt=ds_i.HI > args['threshold'],
bkg=ds_i.HI < args['threshold']
)
for v, m in itertools.product(VARS, MASKS):
hists_bh[v][m].fill(ds_i[v].where(mask[m]).data.ravel())
for v, m in itertools.product(VARS, MASKS):
dk_arr, _ = hists_bh[v][m].to_dask_array()
# NEED to project to numpy array, otherwise computation is lazy
# and there are interferences between different dates
da = xr.DataArray(np.array(dk_arr), dims=['bins'], name='hist')
da = da.expand_dims(time=[date], mask=[m])
hists_xr[v][m].append(da)
hists_v = []
for v in VARS:
hists_m = []
for m in MASKS:
hists_m.append(xr.concat(hists_xr[v][m], dim='time'))
hist = xr.concat(hists_m, dim='mask')
hist = hist.assign_coords(bins=axis[v].edges[:-1])
hist.bins.attrs['right_edge'] = axis[v].edges[-1]
hist.bins.attrs['VAR'] = v
hist = hist.rename(bins=lh.var_name('bins', v))
hist = hist.rename(lh.var_name('hist', v))
hist.attrs['VAR'] = v
hists_v.append(hist)
hists = xr.merge(hists_v)
hists.attrs.pop('VAR')
hists.attrs['VARS'] = VARS
hists = hists.expand_dims({d: [args[d]] for d in INFILE_PARAMS})
# We have at most every pixel of an image in one bin,
# so approx 1000*1000 = 1e6 values. Uint32 stores up to 2**32~4e9.
encoding = {lh.var_name('hist', v): {'dtype': 'uint32',
'_FillValue': 2**30-1}
for v in VARS}
lgr.msg("Writing to disk")
ofile = lh.get_filename(args)
lib.check_output_dir(ofile, file=True)
lib.setup_metadata(hists, args)
hists.to_netcdf(ofile, encoding=encoding)
lgr.end()
return hists
if __name__ == '__main__':
def add_args(parser):
# Zone·s to compute
parser.add_argument('-zones', type=str, default='INS')
args = lib.get_args(['region', 'year', 'days', 'scale',
'coef', 'fixes', 'threshold'], add_args)
args['kind'] = '1thr'
args['fixes']['Y'] = args['year']
args['Y'] = args['year']
args['zone'] = 'GS3_S'
args['number'] = 1
args['mask'] = False
hists = main(args)
"""Import DataBase for H-Index."""
from os import path
import yaml
from filefinder import Finder
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment