Commit b0c64cb9 authored by Clément Haëck's avatar Clément Haëck
Browse files

Merge local and ciclad repos

Two repos were manually kept in sync with manual copy-paste
(one private for work on my pc, and one for ciclad)
To avoid this overhead, i will only work on this repo,
with possibly multiple branches
parent d3984481
......@@ -8,7 +8,15 @@ __pycache__/
*.o
*.o.d
*.so
*.x
*.exx
*.log
Process/find_images/filelist.txt
Process/find_images/input.txt
Archive
Compute/find_images/filelist.txt
Compute/find_images/input.txt
lib/_build_ds
lib/_build_hi
Test/
from os import path
import logging
import numpy as np
import xarray as xr
from xarray_regex import FileFinder, library
import lib.data.images
from lib import root_data, get_args
logging.basicConfig()
log = logging.getLogger(__name__)
log.setLevel(logging.INFO)
def main():
args = get_args(['region', 'date', 'fix'],
"Merge pigments images in daily files.")
merge_pigments(args)
def merge_pigments(args):
log.info('finding pigments')
odir = path.join(root_data, args['region'], 'SOM', 'Pig',
'{:d}days'.format(1), *args['date_str'])
idir = path.join(odir, 'tmp')
pregex = r'%(prefix)_%(time:x)_image_%(image:custom=\d\d\d:)\.nc%(var:char)\.nc'
finder = FileFinder(idir, pregex, prefix='Pig_BMUS_A')
files = finder.get_files()
log.info('found %d pigment files', len(files))
images_all = lib.data.images.get_images_list(region=args['region'],
year=args['date'][0],
fixes=args['fix'])
images_grp = lib.data.images.regroup_by_file(images_all)
log.info('found %d images', len(images_all))
modis_dir = path.dirname(images_all[0].filename)
log.info('opening modis file to get template')
ds = xr.open_dataset(images_all[0].filename)
ds = ds.drop_vars(ds.data_vars)
ds.attrs = {}
log.info('regrouping pigments images by date')
files_date = {}
variables = set()
for f in files:
matches = finder.get_matches(f, relative=False)
date = library.get_date(matches)
image_idx = int(matches['image']['match']) - 1
var = matches['char']['match']
variables.add(var)
if date not in files_date:
files_date[date] = []
files_date[date].append((image_idx, var, f))
encoding = {v: {'zlib': True} for v in variables}
for var in variables:
ds[var] = (['lat', 'lon'],
np.full((ds.lat.size, ds.lon.size), np.nan))
log.info('reporting data')
for date, k in files_date.items():
ofile = path.join(odir, 'Pig_{}.nc'.format(date.strftime('%Y%m%d')))
log.info('file %s', ofile)
for var in variables:
ds[var][:] = np.nan
for image_idx, var, f in k:
modis_file = path.join(modis_dir,
'A_{}.nc'.format(date.strftime('%Y%m%d')))
image = images_grp[modis_file][image_idx]
pig = xr.open_dataset(f)[var]
ds[var][dict(lon=image.slice_x, lat=image.slice_y)] = pig.values
ds.to_netcdf(ofile, encoding=encoding)
if __name__ == '__main__':
main()
"""Add H-Index to files.
Normalize each component by min/max values.
"""
from lib.data.hi import get_data
from lib import compute_hi, get_args
args = get_args(['region', 'days', 'year', 'scale', 'number'])
region = str(args['region'])
days = int(args['days'])
year = int(args['year'])
scale = float(args['scale'])
number = int(args['number'])
db = get_data(region, days, year, scale, number)
extremas = {'S': [0.0000, 8.6694],
'V': [0.0000, 1.0369],
'B': [0.0340, 1.4108]}
## Compute
for time_slice in db.avail.iter_slices('time', 3):
db.load(time=time_slice)
compute_hi.compute_hi(db, extremas)
db.write_add_variable('HI', 'V', kwargs={'zlib': True})
#!/usr/bin/env bash
export PYTHONPATH="/home/chaeck/Fronts/:$PYTHONPATH"
. "$HOME/.setup-shell.sh"
conda activate py38
python "$HOME/Fronts/Compute/add_hi.py" \
--kwargs region:"'GS'" days:8 year:2003 \
scale:10. number:1
......@@ -8,19 +8,16 @@
# The averaged files are put in
# "${wd}/${rootdir}/climato/${varlist}/${n_clim}_${date}.\nc"
module load nco/4.7.x
typ="PFT"
typ="PFT_concentration"
wd="/data/chaeck"
wd=$HOME/Documents/Work/Data
if [[ "$typ" == "PFT"* ]]; then
varlist=(dtom dinflg galgae \
picoeuk pico prchlcus \
prokrt prymnsio)
prefixes=(dtom_ dinflg_ galgae_ picoeuk_ pico_ prchlcus_ prokrt_ \
prymnsio_)
rootdir="GS/SOM/PFT"
fi
if [[ "$typ" == "PFT_concentration" ]]; then
......@@ -44,7 +41,7 @@ if [[ "$typ" == "CHL_L4" ]]; then
fi
if [[ "$typ" == "SST" ]]; then
varlist=(SST)
rootdir="GS/SST"
rootdir="AVHRR"
fi
......@@ -74,10 +71,14 @@ for ivar in "${!varlist[@]}"; do
done
done
echo "$fileout"
echo "$mean_files"
mkdir -p "${wd}/${rootdir}/climato/${var_2}"
nces -O ${mean_files} ${fileout}
done
exit 1
# -- MONTHLY --
files=($(ls "${wd}/${rootdir}/climato/${var_2}/46_"*.nc))
......
"""Compute cloud coverage for SST and CHL-L3."""
from os import path
import numpy as np
import netCDF4 as nc
import lib.data.sst
import lib.data.chl
from lib import root_data, get_args
args = get_args(['level'])
level = int(args['level'])
db_sst = lib.data.sst.get_data()
db_chl = lib.data.chl.get_data(level=level)
box_lat = [20, 52]
box_lon = [-82, -45]
range_time = [[2007, 1, 1], [2007, 12, 31]]
def get_land_mask_sst(db):
db.load_selected(var='SST_mask', time=0)
mask = db.view(var=0, time=0) != 1
db.unload_data()
return mask
def get_land_mask_chl(db):
db.load_selected(var='CHL_flags', time=0)
mask = db.view(var=0, time=0) == 9
db.unload_data()
return mask
def compute_cloud_cover(db, var, get_land_mask, var_load=None):
if var_load is None:
var_load = var
db.select_by_value('avail', time=slice(*range_time),
lat=slice(*box_lat), lon=slice(*box_lon), by_day=True)
land_mask = get_land_mask(db)
total = np.sum(~land_mask)
time = db.selected.time
axis = (1, 2)
cloud = np.zeros(time.size)
for time_slice in db.selected.iter_slices('time', 3):
db.load_selected(var=var_load, time=time_slice)
data = db[var].mask
cloud[time_slice] = np.sum(data*~land_mask, axis=axis) / total
# Write to disk
filename = path.join(db.filegroups[0].root, 'cloud_cover.nc')
with nc.Dataset(filename, 'w') as f:
f.createDimension('time', time.size)
vnc = f.createVariable('time', 'f', ['time'])
vnc[:] = time[:]
vnc.setncattr('units', time.units)
vnc = f.createVariable('cloud', 'f', ['time'])
vnc[:] = cloud[:]
compute_cloud_cover(db_sst, 'SST', get_land_mask_sst, ['SST', 'SST_error'])
compute_cloud_cover(db_chl, 'CHL', get_land_mask_chl)
#!/usr/bin/env bash
export PYTHONPATH="/home/chaeck/Fronts/:$PYTHONPATH"
. "$HOME/.setup-shell.sh"
conda activate py38
python "$HOME/Fronts/Compute/compute_cloud_cover.py" \
--kwargs level:3
from os import path
import matplotlib.pyplot as plt
import tol_colors as tc
import numpy as np
import pandas as pd
import xarray as xr
import lib.data.images
from lib import root_data
plt.switch_backend('agg')
region = 'GS'
year = 2007
days = 1
fixes = dict()
scale = 30.
number = 1
n_bins = 800
bounds = [0., 20.]
cset = tc.tol_cset('high-contrast')
hi_dir = path.join(root_data, region, 'HI',
'HI_{:2.1f}_{:d}'.format(scale, number), 'HI',
'{:d}days'.format(days), str(year))
def get_extremas(bins, hist):
mins = {}
maxs = {}
for var in hist.keys():
mins[var], maxs[var] = get_extremas_(bins, hist[var])
return mins, maxs
def get_extremas_(bins, hist):
mask = hist != 0
vmin = bins[mask.argmax(axis=0)]
imin = hist.shape[0] - np.flip(mask, axis=0).argmax(axis=0) - 1
vmax = bins[imin]
return vmin, vmax
def plot_hist(bins, hist, mins, maxs, ofile):
fig, axes = plt.subplots(2, 2, figsize=(5, 5))
fig.subplots_adjust(left=0.05, bottom=0.05, right=0.98, top=0.92)
for var, ax in zip(['S', 'V', 'B'], axes.flat):
ax.plot(bins[:-1], hist[var], ds='steps-post', color='k')
ax.axvline(mins[var], color=cset.blue)
ax.axvline(maxs[var], color=cset.red)
# ax.set_xlim(bins[0], maxs[var]*1.2)
ax.set_xlim(bins[0], 5)
ax.set_title(var)
fig.canvas.draw()
fig.savefig(ofile, dpi=150)
def apply_coef(ds, coef):
ds['HI'] = (coef['S'] * np.fabs(ds.S)
+ coef['V'] * ds.V
+ coef['B'] * ds.B)
return ds
def get_hists(images_mth, variables, bins, coef=None, plot=False):
hist = {v: np.zeros(n_bins) for v in variables}
for i, (m, im_day) in enumerate(groups):
hist_mth = {v: np.zeros(n_bins) for v in variables}
ofile = path.join(hi_dir, 'HIST_VSB_{}.png'.format(m.strftime('%F')))
for day, images in im_day.items():
print(day, end=': ')
hi_file = path.join(hi_dir, 'HI_{}.nc'.format(day.strftime('%Y%m%d')))
hi = xr.open_dataset(hi_file)
for j, image in enumerate(images):
print(j, end=', ')
hii = image.extract(hi)
if 'HI' in variables and coef is not None:
hii = apply_coef(hii, coef)
# Reject image
for var in variables:
h, _ = np.histogram(np.fabs(hii[var]),
bins=n_bins, range=bounds)
hist_mth[var] += h
print('')
for var in variables:
hist[var] += hist_mth[var]
hist_mth[var] /= np.sum(hist[var]*np.diff(bins))
mins, maxs = get_extremas(bins, hist_mth)
if plot:
plot_hist(bins, hist_mth, mins, maxs, ofile)
mins_mth[i] = mins
maxs_mth[i] = maxs
mins, maxs = get_extremas(bins, hist)
if plot:
plot_hist(bins, hist, mins, maxs, path.join(hi_dir, 'HIST_TOTAL.png'))
np.save(path.join(hi_dir, 'bins.npy'), bins)
for v in variables:
np.save(path.join(hi_dir, f'hist_{v}.npy'), hist[v])
return hist
images_all = lib.data.images.get_images_list(region=region, year=year,
days=days, fixes=fixes)
images_day = lib.data.images.regroup_by_date(images_all)
images_mth = pd.Series(list(images_day.values()), index=list(images_day.keys()))
groups = images_mth.groupby(pd.Grouper(freq='1M'))
mins_mth = [{} for _ in range(len(groups))]
maxs_mth = [{} for _ in range(len(groups))]
bins = np.linspace(*bounds, n_bins+1)
## Method 1
def get_coef(bins, hist):
vmin, vmax = get_extremas_(bins, hist)
fx = hist
bx = bins
by = (vmax-vmin)*bins + vmin
fy = hist / np.sum(hist*np.diff(by))
plot_fit(bx, by, fx, fy)
a = get_linear_fit_hist(bx, by, fx, fy)
return a
def get_linear_fit_hist(bx, by, fx, fy):
return 0
def plot_fit(bx, by, fx, fy):
fig, ax = plt.subplots(figsize=(5, 5))
fig.subplots_adjust(left=0.05, bottom=0.05, right=0.98, top=0.98)
ax.scatter(by[:-1], bx[:-1], s=np.log(np.fabs(fx)))
hist = {}
for v in ['S', 'V', 'B']:
hist[v] = np.load(path.join(hi_dir, f'hist_{v}.npy'))
bins = np.load(path.join(hi_dir, 'bins.npy'))
get_coef(bins, hist['V'])
## Method 2
def get_coef(bins, hist):
std = get_std(bins[:-1], hist)
return 1./std
def get_std(bins, hist):
N = get_N(hist)
mean = get_mean(bins, hist, N)
return np.sqrt(np.sum(hist*(bins-mean)**2)/N)
def get_mean(bins, hist, N):
return np.sum(bins*hist)/N
def get_N(hist):
return np.sum(hist)
hist = {}
for v in ['S', 'V', 'B']:
hist[v] = np.load(path.join(hi_dir, f'hist_{v}.npy'))
bins = np.load(path.join(hi_dir, 'bins.npy'))
for v in ['S', 'V', 'B']:
print(v, get_coef(bins, hist[v]))
## Compute HI hist
coef = dict(S=2.384/2., V=5.874, B=8.599)
hist = get_hists(images_mth, ['HI'], bins, coef)
## Reload
hist = np.load(path.join(hi_dir, 'hist_HI.npy'))
"""Compute stats. """
from os import path
import xarray as xr
import numpy as np
import pandas as pd
from scipy import ndimage
from lib import root_data, get_args
from lib.dask_client import make_client
import lib.zones
import lib.data.mask
import lib.data.sst
import lib.data.chl
def get_data():
mask_lo = lib.data.mask.get_data(region, days, year, scale, number, chl=True)
mask_lo = mask_lo.assign_coords(time=mask_lo.time.dt.floor("D"))
chl = lib.data.chl.get_data(region, days, year)
zone_lo = lib.zones.get_data(region, 'lo')
ds_lo = xr.merge([mask_lo, chl, zone_lo], join='inner')
mask_hi = lib.data.mask.get_data(region, days, year, scale, number, chl=False)
sst = lib.data.sst.get_data(region, days, year)
sst = sst.rename(mask='sst_mask')
zone_hi = lib.zones.get_data(region, 'hi')
ds_hi = xr.merge([mask_hi, sst, zone_hi], join='inner')
ds_hi = ds_hi.assign_coords(time=ds_hi.time.dt.floor("D"))
ds_lo = add_land('lo', ds_lo)
ds_hi = add_land('hi', ds_hi)
ds_lo, ds_hi = xr.align(ds_lo, ds_hi, join='inner',
exclude=['lat', 'lon'])
ds_lo = to_pd_datetime(ds_lo)
ds_hi = to_pd_datetime(ds_hi)
return ds_lo, ds_hi
def get_stats(time):
variables = ['mean', 'q10', 'q25', 'q50', 'q75', 'q90', 'n']
coords = {
'variable': ['analysed_sst', 'CHL'],
'zone': zones_list,
'mask': ['front', 'background'],
'time': time
}
stats = xr.Dataset(data_vars={v: xr.DataArray(np.nan, coords, coords.keys())
for v in variables},
coords=coords)
return stats
def add_land(kind, ds):
filename = path.join(root_data, 'land_mask_{}.nc'.format(kind))
land = xr.open_dataset(filename)['land']
land, _ = xr.align(land, ds, join='right')
land = land.astype('bool')
bermudes_lat = (32.20, 32.45)
bermudes_lon = (-64.95, -64.60)
land.loc[dict(lat=slice(*bermudes_lat),
lon=slice(*bermudes_lon))] = False
ds['land'] = land
return ds
def to_pd_datetime(ds):
ds = ds.assign_coords(time=pd.to_datetime(ds.time.values))
return ds
def extend_mask(mask, neighbors, repeat):
n = 2*neighbors+1
kernel = np.zeros((n, n))
for i in range(n):
for j in range(n):
kernel[i, j] = (i-(n-1)/2)**2 + (j-(n-1)/2)**2 <= (n/2)**2
for _ in range(repeat):
mask = np.clip(ndimage.convolve(mask, kernel), 0, 1)
return mask
def compute_stats(stats, loc, da, mask):
def add(var, func, *args):
stats[var].loc[loc] = getattr(da.where(mask), func)(*args, dim=['lat', 'lon'])
add('n', 'count')
add('mean', 'mean')
add('q10', 'quantile', 0.10)
add('q25', 'quantile', 0.25)
add('q50', 'quantile', 0.50)
add('q75', 'quantile', 0.75)
add('q90', 'quantile', 0.90)
return stats
def compute(ds, var, stats):
print(var)
for m in ['front', 'background']:
print(m)
mask = ds[m] * ~ds['land']
for zone in zones_list:
print(zone)
if zone == 'total':
mask_ = mask
else:
mask_ = mask * ds[zone]