Commit 9fca3ea0 authored by Clément Haëck's avatar Clément Haëck
Browse files

Add script to easily find threshold

for sensitivity test
Exp2203b

Also fix 'kind' argument (which should be fixed for all 3 scripts)
parent 28e7deb9
......@@ -195,14 +195,15 @@ def get_bins(variable):
return bins
if __name__ == '__main__':
def add_args(parser):
# Zone·s to compute
parser.add_argument('-zones', type=str, default='INS')
def add_args(parser):
# Zone·s to compute
parser.add_argument('-zones', type=str, default='INS')
if __name__ == '__main__':
args = lib.get_args(['region', 'year', 'days', 'scale', 'number',
'coef', 'fixes', 'threshold', 'mask'], add_args)
args['kind'] = '1thr'
args['fixes']['Y'] = args['year']
args['Y'] = args['year']
......
......@@ -178,6 +178,7 @@ if __name__ == '__main__':
args = lib.get_args(['region', 'year', 'days', 'scale', 'number',
'coef', 'fixes'], add_args)
args['kind'] = '2d'
args['fixes']['Y'] = args['year']
args['Y'] = args['year']
......
......@@ -204,6 +204,7 @@ if __name__ == '__main__':
args = lib.get_args(['region', 'year', 'days', 'scale', 'number',
'coef', 'fixes', 'thr_lo', 'thr_hi', 'mask'],
add_args)
args['kind'] = '2thr'
args['fixes']['Y'] = args['year']
args['Y'] = args['year']
......
import dask_histogram as dh
import xarray as xr
import lib
import lib.data.globcolour
import lib.data.hi
import lib.data.hists
import lib.data.ostia
import lib.data.p_frt_mask
import lib.data.SN_separation
import lib.zones
from lib import m_start, m_end, m_next
# Number of Chl bins
CHL_NBINS = 500
CHL_RANGE = [5e-3, 20.]
VARS = ['CHL']
MASKS = ['frt', 'bkg']
MASKS_VAR = ['mask_' + m for m in MASKS]
INFILE_PARAMS = ['threshold', 'scale', 'number', 'coef']
def main(args):
m_start("Loading data")
ds = get_data(args)
m_next("Smoothing SN separation temperature")
# Smooth SN separation
ds['threshold'] = lib.data.SN_separation.smooth(ds, time_step=8)
m_next("Computing HI")
# Compute HI
ds['HI'] = lib.data.hi.apply_coef(ds, lib.data.hi.get_coef(args))
ds = ds.drop_vars(['S', 'V', 'B'])
m_next("Applying static masks")
# Apply masks: land (enlarged), total zone, min front proba
static = ~ds.land_large * ds.total
if args['mask']:
static = static * ds.p_frt
ds['HI'] = ds.HI.where(static)
ds = ds.drop_vars(['land_large', 'total', 'p_frt'])
m_next("Computing HI masks")
# Masks
ds['mask_frt'] = ds.HI > args['threshold']
ds['mask_bkg'] = ds.HI < args['threshold']
ds = ds.drop_vars(['HI'])
m_next("Selecting South zone")
ds = ds.sel(lat=slice(32, None))
m_end()
print("Setting up histogram computations", end='', flush=True)
var = 'CHL'
bins_name = 'bins_' + var
bins = dh.axis.Regular(CHL_NBINS, *CHL_RANGE,
transform=dh.axis.transform.log)
hists = []
for m, mask in zip(MASKS, MASKS_VAR):
print('.', end='', flush=True)
h = ds[var].where(ds[mask]).groupby('time').map(
hist_grp, shortcut=True, args=[bins, bins_name]
)
h = h.expand_dims(mask=[m])
hists.append(h)
hist = xr.combine_by_coords(hists)
hist = hist[var].rename('hist_' + var)
hist = hist.assign_coords({bins_name: bins.edges[:-1]})
hist[bins_name].attrs['right_edge'] = CHL_RANGE[1]
hist[bins_name].attrs['VAR'] = var
hist.attrs['VAR'] = var
hist = hist.expand_dims({d: [args[d]] for d in INFILE_PARAMS})
hist = hist.expand_dims(dict(zone=['S']))
hist = hist.to_dataset()
# We have at most every pixel of an image in one bin,
# so approx 1000*1000 = 1e6 values. Uint32 stores up to 2**32~4e9.
encoding = {v: {'dtype': 'uint32', '_FillValue': 2**30-1}
for v in ['hist_' + v for v in VARS]}
m_next("Executing computations / Writing to disk")
hist = hist.load()
ofile = lib.data.hists.get_filename(args)
lib.check_output_dir(ofile, file=True)
hist.to_netcdf(ofile, encoding=encoding)
m_end()
return hist
def hist_grp(da, bins: dh.axis.Axis, bins_name, **kwargs):
"""Compute histogram of an array.
Flatten the array completely.
Uses dask_histogram.
"""
h = dh.factory(da.data.ravel(), axes=[bins], **kwargs)
h, _ = h.to_dask_array()
return xr.DataArray(h, dims=[bins_name])
def get_data(args):
"""Load all data in a single dataset."""
data = []
data.append(lib.data.ostia.get_data(args))
data.append(lib.data.globcolour.get_data(args))
data.append(lib.data.hi.get_data(args))
data.append(lib.data.SN_separation.get_data(args))
data = [lib.fix_time_daily(ds) for ds in data]
args['grid'] = lib.data.ostia.grid
data.append(lib.zones.get_data(args).total)
data.append(lib.zones.get_land(args).land_large)
data.append(lib.data.p_frt_mask.get_data(
args, fixes=dict(
threshold=lib.data.p_frt_mask.default_threshold
)).rename(mask='p_frt'))
ds = xr.merge(data, join='inner')
ds = ds.drop_vars(['CHL_error', 'CHL_flags'])
# Check we don't have incompatibility between datasets
if any(ds.sizes[(dim := d)] == 0 for d in ds.dims):
raise IndexError(f"'{dim}' empty after merging.")
return ds
if __name__ == '__main__':
args = lib.get_args(['region', 'year', 'days', 'scale', 'number',
'coef', 'fixes', 'threshold'])
args['kind'] = '1thr'
args['mask'] = False
args['fixes']['Y'] = args['year']
args['Y'] = args['year']
hist = main(args)
N = hist.hist_CHL.sum('bins_CHL').squeeze()
frac = N.sel(mask='frt') / N.sum('mask') * 100.
N_f = N.resample(time='15D').sum()
frac_f = N_f.sel(mask='frt') / N_f.sum('mask') * 100.
frac.plot.line(x='time')
frac_f.plot.line(x='time')
print("Threshold:", args['threshold'])
print("Maximal surface fraction:", float(frac_f.max()))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment