Docker-in-Docker (DinD) capabilities of public runners deactivated. More info

Commit b0c64cb9 authored by Clément Haëck's avatar Clément Haëck
Browse files

Merge local and ciclad repos

Two repos were manually kept in sync with manual copy-paste
(one private for work on my pc, and one for ciclad)
To avoid this overhead, i will only work on this repo,
with possibly multiple branches
parent d3984481
......@@ -8,7 +8,15 @@ __pycache__/
*.o
*.o.d
*.so
*.x
*.exx
*.log
Process/find_images/filelist.txt
Process/find_images/input.txt
Archive
Compute/find_images/filelist.txt
Compute/find_images/input.txt
lib/_build_ds
lib/_build_hi
Test/
from os import path
import logging
import numpy as np
import xarray as xr
from xarray_regex import FileFinder, library
import lib.data.images
from lib import root_data, get_args
logging.basicConfig()
log = logging.getLogger(__name__)
log.setLevel(logging.INFO)
def main():
args = get_args(['region', 'date', 'fix'],
"Merge pigments images in daily files.")
merge_pigments(args)
def merge_pigments(args):
log.info('finding pigments')
odir = path.join(root_data, args['region'], 'SOM', 'Pig',
'{:d}days'.format(1), *args['date_str'])
idir = path.join(odir, 'tmp')
pregex = r'%(prefix)_%(time:x)_image_%(image:custom=\d\d\d:)\.nc%(var:char)\.nc'
finder = FileFinder(idir, pregex, prefix='Pig_BMUS_A')
files = finder.get_files()
log.info('found %d pigment files', len(files))
images_all = lib.data.images.get_images_list(region=args['region'],
year=args['date'][0],
fixes=args['fix'])
images_grp = lib.data.images.regroup_by_file(images_all)
log.info('found %d images', len(images_all))
modis_dir = path.dirname(images_all[0].filename)
log.info('opening modis file to get template')
ds = xr.open_dataset(images_all[0].filename)
ds = ds.drop_vars(ds.data_vars)
ds.attrs = {}
log.info('regrouping pigments images by date')
files_date = {}
variables = set()
for f in files:
matches = finder.get_matches(f, relative=False)
date = library.get_date(matches)
image_idx = int(matches['image']['match']) - 1
var = matches['char']['match']
variables.add(var)
if date not in files_date:
files_date[date] = []
files_date[date].append((image_idx, var, f))
encoding = {v: {'zlib': True} for v in variables}
for var in variables:
ds[var] = (['lat', 'lon'],
np.full((ds.lat.size, ds.lon.size), np.nan))
log.info('reporting data')
for date, k in files_date.items():
ofile = path.join(odir, 'Pig_{}.nc'.format(date.strftime('%Y%m%d')))
log.info('file %s', ofile)
for var in variables:
ds[var][:] = np.nan
for image_idx, var, f in k:
modis_file = path.join(modis_dir,
'A_{}.nc'.format(date.strftime('%Y%m%d')))
image = images_grp[modis_file][image_idx]
pig = xr.open_dataset(f)[var]
ds[var][dict(lon=image.slice_x, lat=image.slice_y)] = pig.values
ds.to_netcdf(ofile, encoding=encoding)
if __name__ == '__main__':
main()
"""Add H-Index to files.
Normalize each component by min/max values.
"""
from lib.data.hi import get_data
from lib import compute_hi, get_args
args = get_args(['region', 'days', 'year', 'scale', 'number'])
region = str(args['region'])
days = int(args['days'])
year = int(args['year'])
scale = float(args['scale'])
number = int(args['number'])
db = get_data(region, days, year, scale, number)
extremas = {'S': [0.0000, 8.6694],
'V': [0.0000, 1.0369],
'B': [0.0340, 1.4108]}
## Compute
for time_slice in db.avail.iter_slices('time', 3):
db.load(time=time_slice)
compute_hi.compute_hi(db, extremas)
db.write_add_variable('HI', 'V', kwargs={'zlib': True})
#!/usr/bin/env bash
export PYTHONPATH="/home/chaeck/Fronts/:$PYTHONPATH"
. "$HOME/.setup-shell.sh"
conda activate py38
python "$HOME/Fronts/Compute/add_hi.py" \
--kwargs region:"'GS'" days:8 year:2003 \
scale:10. number:1
......@@ -8,19 +8,16 @@
# The averaged files are put in
# "${wd}/${rootdir}/climato/${varlist}/${n_clim}_${date}.\nc"
module load nco/4.7.x
typ="PFT"
typ="PFT_concentration"
wd="/data/chaeck"
wd=$HOME/Documents/Work/Data
if [[ "$typ" == "PFT"* ]]; then
varlist=(dtom dinflg galgae \
picoeuk pico prchlcus \
prokrt prymnsio)
prefixes=(dtom_ dinflg_ galgae_ picoeuk_ pico_ prchlcus_ prokrt_ \
prymnsio_)
rootdir="GS/SOM/PFT"
fi
if [[ "$typ" == "PFT_concentration" ]]; then
......@@ -44,7 +41,7 @@ if [[ "$typ" == "CHL_L4" ]]; then
fi
if [[ "$typ" == "SST" ]]; then
varlist=(SST)
rootdir="GS/SST"
rootdir="AVHRR"
fi
......@@ -74,10 +71,14 @@ for ivar in "${!varlist[@]}"; do
done
done
echo "$fileout"
echo "$mean_files"
mkdir -p "${wd}/${rootdir}/climato/${var_2}"
nces -O ${mean_files} ${fileout}
done
exit 1
# -- MONTHLY --
files=($(ls "${wd}/${rootdir}/climato/${var_2}/46_"*.nc))
......
"""Compute cloud coverage for SST and CHL-L3."""
from os import path
import numpy as np
import netCDF4 as nc
import lib.data.sst
import lib.data.chl
from lib import root_data, get_args
args = get_args(['level'])
level = int(args['level'])
db_sst = lib.data.sst.get_data()
db_chl = lib.data.chl.get_data(level=level)
box_lat = [20, 52]
box_lon = [-82, -45]
range_time = [[2007, 1, 1], [2007, 12, 31]]
def get_land_mask_sst(db):
db.load_selected(var='SST_mask', time=0)
mask = db.view(var=0, time=0) != 1
db.unload_data()
return mask
def get_land_mask_chl(db):
db.load_selected(var='CHL_flags', time=0)
mask = db.view(var=0, time=0) == 9
db.unload_data()
return mask
def compute_cloud_cover(db, var, get_land_mask, var_load=None):
if var_load is None:
var_load = var
db.select_by_value('avail', time=slice(*range_time),
lat=slice(*box_lat), lon=slice(*box_lon), by_day=True)
land_mask = get_land_mask(db)
total = np.sum(~land_mask)
time = db.selected.time
axis = (1, 2)
cloud = np.zeros(time.size)
for time_slice in db.selected.iter_slices('time', 3):
db.load_selected(var=var_load, time=time_slice)
data = db[var].mask
cloud[time_slice] = np.sum(data*~land_mask, axis=axis) / total
# Write to disk
filename = path.join(db.filegroups[0].root, 'cloud_cover.nc')
with nc.Dataset(filename, 'w') as f:
f.createDimension('time', time.size)
vnc = f.createVariable('time', 'f', ['time'])
vnc[:] = time[:]
vnc.setncattr('units', time.units)
vnc = f.createVariable('cloud', 'f', ['time'])
vnc[:] = cloud[:]
compute_cloud_cover(db_sst, 'SST', get_land_mask_sst, ['SST', 'SST_error'])
compute_cloud_cover(db_chl, 'CHL', get_land_mask_chl)
#!/usr/bin/env bash
export PYTHONPATH="/home/chaeck/Fronts/:$PYTHONPATH"
. "$HOME/.setup-shell.sh"
conda activate py38
python "$HOME/Fronts/Compute/compute_cloud_cover.py" \
--kwargs level:3
from os import path
import matplotlib.pyplot as plt
import tol_colors as tc
import numpy as np
import pandas as pd
import xarray as xr
import lib.data.images
from lib import root_data
plt.switch_backend('agg')
region = 'GS'
year = 2007
days = 1
fixes = dict()
scale = 30.
number = 1
n_bins = 800
bounds = [0., 20.]
cset = tc.tol_cset('high-contrast')
hi_dir = path.join(root_data, region, 'HI',
'HI_{:2.1f}_{:d}'.format(scale, number), 'HI',
'{:d}days'.format(days), str(year))
def get_extremas(bins, hist):
mins = {}
maxs = {}
for var in hist.keys():
mins[var], maxs[var] = get_extremas_(bins, hist[var])
return mins, maxs
def get_extremas_(bins, hist):
mask = hist != 0
vmin = bins[mask.argmax(axis=0)]
imin = hist.shape[0] - np.flip(mask, axis=0).argmax(axis=0) - 1
vmax = bins[imin]
return vmin, vmax
def plot_hist(bins, hist, mins, maxs, ofile):
fig, axes = plt.subplots(2, 2, figsize=(5, 5))
fig.subplots_adjust(left=0.05, bottom=0.05, right=0.98, top=0.92)
for var, ax in zip(['S', 'V', 'B'], axes.flat):
ax.plot(bins[:-1], hist[var], ds='steps-post', color='k')
ax.axvline(mins[var], color=cset.blue)
ax.axvline(maxs[var], color=cset.red)
# ax.set_xlim(bins[0], maxs[var]*1.2)
ax.set_xlim(bins[0], 5)
ax.set_title(var)
fig.canvas.draw()
fig.savefig(ofile, dpi=150)
def apply_coef(ds, coef):
ds['HI'] = (coef['S'] * np.fabs(ds.S)
+ coef['V'] * ds.V
+ coef['B'] * ds.B)
return ds
def get_hists(images_mth, variables, bins, coef=None, plot=False):
hist = {v: np.zeros(n_bins) for v in variables}
for i, (m, im_day) in enumerate(groups):
hist_mth = {v: np.zeros(n_bins) for v in variables}
ofile = path.join(hi_dir, 'HIST_VSB_{}.png'.format(m.strftime('%F')))
for day, images in im_day.items():
print(day, end=': ')
hi_file = path.join(hi_dir, 'HI_{}.nc'.format(day.strftime('%Y%m%d')))
hi = xr.open_dataset(hi_file)
for j, image in enumerate(images):
print(j, end=', ')
hii = image.extract(hi)
if 'HI' in variables and coef is not None:
hii = apply_coef(hii, coef)
# Reject image
for var in variables:
h, _ = np.histogram(np.fabs(hii[var]),
bins=n_bins, range=bounds)
hist_mth[var] += h
print('')
for var in variables:
hist[var] += hist_mth[var]
hist_mth[var] /= np.sum(hist[var]*np.diff(bins))
mins, maxs = get_extremas(bins, hist_mth)
if plot:
plot_hist(bins, hist_mth, mins, maxs, ofile)
mins_mth[i] = mins
maxs_mth[i] = maxs
mins, maxs = get_extremas(bins, hist)
if plot:
plot_hist(bins, hist, mins, maxs, path.join(hi_dir, 'HIST_TOTAL.png'))
np.save(path.join(hi_dir, 'bins.npy'), bins)
for v in variables:
np.save(path.join(hi_dir, f'hist_{v}.npy'), hist[v])
return hist
images_all = lib.data.images.get_images_list(region=region, year=year,
days=days, fixes=fixes)
images_day = lib.data.images.regroup_by_date(images_all)
images_mth = pd.Series(list(images_day.values()), index=list(images_day.keys()))
groups = images_mth.groupby(pd.Grouper(freq='1M'))
mins_mth = [{} for _ in range(len(groups))]
maxs_mth = [{} for _ in range(len(groups))]
bins = np.linspace(*bounds, n_bins+1)
## Method 1
def get_coef(bins, hist):
vmin, vmax = get_extremas_(bins, hist)
fx = hist
bx = bins
by = (vmax-vmin)*bins + vmin
fy = hist / np.sum(hist*np.diff(by))
plot_fit(bx, by, fx, fy)
a = get_linear_fit_hist(bx, by, fx, fy)
return a
def get_linear_fit_hist(bx, by, fx, fy):
return 0
def plot_fit(bx, by, fx, fy):
fig, ax = plt.subplots(figsize=(5, 5))
fig.subplots_adjust(left=0.05, bottom=0.05, right=0.98, top=0.98)
ax.scatter(by[:-1], bx[:-1], s=np.log(np.fabs(fx)))
hist = {}
for v in ['S', 'V', 'B']:
hist[v] = np.load(path.join(hi_dir, f'hist_{v}.npy'))
bins = np.load(path.join(hi_dir, 'bins.npy'))
get_coef(bins, hist['V'])
## Method 2
def get_coef(bins, hist):
std = get_std(bins[:-1], hist)
return 1./std
def get_std(bins, hist):
N = get_N(hist)
mean = get_mean(bins, hist, N)
return np.sqrt(np.sum(hist*(bins-mean)**2)/N)
def get_mean(bins, hist, N):
return np.sum(bins*hist)/N
def get_N(hist):
return np.sum(hist)
hist = {}
for v in ['S', 'V', 'B']:
hist[v] = np.load(path.join(hi_dir, f'hist_{v}.npy'))
bins = np.load(path.join(hi_dir, 'bins.npy'))
for v in ['S', 'V', 'B']:
print(v, get_coef(bins, hist[v]))
## Compute HI hist
coef = dict(S=2.384/2., V=5.874, B=8.599)
hist = get_hists(images_mth, ['HI'], bins, coef)
## Reload
hist = np.load(path.join(hi_dir, 'hist_HI.npy'))
"""Compute stats. """
from os import path
import xarray as xr
import numpy as np
import pandas as pd
from scipy import ndimage
from lib import root_data, get_args
from lib.dask_client import make_client
import lib.zones
import lib.data.mask
import lib.data.sst
import lib.data.chl
def get_data():
mask_lo = lib.data.mask.get_data(region, days, year, scale, number, chl=True)
mask_lo = mask_lo.assign_coords(time=mask_lo.time.dt.floor("D"))
chl = lib.data.chl.get_data(region, days, year)
zone_lo = lib.zones.get_data(region, 'lo')
ds_lo = xr.merge([mask_lo, chl, zone_lo], join='inner')
mask_hi = lib.data.mask.get_data(region, days, year, scale, number, chl=False)
sst = lib.data.sst.get_data(region, days, year)
sst = sst.rename(mask='sst_mask')
zone_hi = lib.zones.get_data(region, 'hi')
ds_hi = xr.merge([mask_hi, sst, zone_hi], join='inner')
ds_hi = ds_hi.assign_coords(time=ds_hi.time.dt.floor("D"))
ds_lo = add_land('lo', ds_lo)
ds_hi = add_land('hi', ds_hi)
ds_lo, ds_hi = xr.align(ds_lo, ds_hi, join='inner',
exclude=['lat', 'lon'])
ds_lo = to_pd_datetime(ds_lo)
ds_hi = to_pd_datetime(ds_hi)
return ds_lo, ds_hi
def get_stats(time):
variables = ['mean', 'q10', 'q25', 'q50', 'q75', 'q90', 'n']
coords = {
'variable': ['analysed_sst', 'CHL'],
'zone': zones_list,
'mask': ['front', 'background'],
'time': time
}
stats = xr.Dataset(data_vars={v: xr.DataArray(np.nan, coords, coords.keys())
for v in variables},
coords=coords)
return stats
def add_land(kind, ds):
filename = path.join(root_data, 'land_mask_{}.nc'.format(kind))
land = xr.open_dataset(filename)['land']
land, _ = xr.align(land, ds, join='right')
land = land.astype('bool')
bermudes_lat = (32.20, 32.45)
bermudes_lon = (-64.95, -64.60)
land.loc[dict(lat=slice(*bermudes_lat),
lon=slice(*bermudes_lon))] = False
ds['land'] = land
return ds
def to_pd_datetime(ds):
ds = ds.assign_coords(time=pd.to_datetime(ds.time.values))
return ds
def extend_mask(mask, neighbors, repeat):
n = 2*neighbors+1
kernel = np.zeros((n, n))
for i in range(n):
for j in range(n):
kernel[i, j] = (i-(n-1)/2)**2 + (j-(n-1)/2)**2 <= (n/2)**2
for _ in range(repeat):
mask = np.clip(ndimage.convolve(mask, kernel), 0, 1)
return mask
def compute_stats(stats, loc, da, mask):
def add(var, func, *args):
stats[var].loc[loc] = getattr(da.where(mask), func)(*args, dim=['lat', 'lon'])
add('n', 'count')
add('mean', 'mean')
add('q10', 'quantile', 0.10)
add('q25', 'quantile', 0.25)
add('q50', 'quantile', 0.50)
add('q75', 'quantile', 0.75)
add('q90', 'quantile', 0.90)
return stats
def compute(ds, var, stats):
print(var)
for m in ['front', 'background']:
print(m)
mask = ds[m] * ~ds['land']
for zone in zones_list:
print(zone)
if zone == 'total':
mask_ = mask
else:
mask_ = mask * ds[zone]