Commit 85a14b96 authored by Clément Haëck's avatar Clément Haëck
Browse files

Add script to compute n-days mean

uses XArray (simple resample, since it is yearly, no big overhead)
parent ff74dd4c
#!/usr/bin/env bash
# Compute 8 days mean
module load nco/4.7.x
wd="/data/chaeck"
variable="SST"
year="2017"
precision=".3"
region="GS"
if [[ "$variable" == "SST" ]]; then
prefix=""
suffix="090000-JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1_subset.nc"
keep_var="time,lat,lon,analysed_sst,analysis_error,mask,sea_ice_fraction"
rootdir="SST"
format="%Y%m%d"
fi
if [[ "$variable" == "CHL_L3" ]]; then
prefix="CHL_"
suffix=".nc"
rootdir="CHL/L3"
format="%Y-%m-%d"
fi
if [[ "$variable" == "CHL_L4" ]]; then
prefix="CHL_"
suffix=".nc"
rootdir="CHL/L4"
format="%Y-%m-%d"
fi
currentdir="${wd}/${region}/${rootdir}/1days/${year}"
targetdir="${wd}/${region}/${rootdir}/8days/${year}"
if ! files=("${currentdir}/${prefix}"*"${suffix}"); then
echo "No files found"
exit 1
fi
if [[ ! -d "${targetdir}" ]]; then
echo "Target directory does not exist ($targetdir)."
exit 1
fi
for ((i=29; i<30; i++)); do
date_start=$(date -d "${year}-01-01 +$((8*$i)) day" "+$format")
echo "$i $date_start"
fileout="${targetdir}/${prefix}${date_start}${suffix}"
if [[ -f "${fileout}" ]]; then
echo "Out file already exists ($fileout)."
exit 1
fi
if [[ "${fileout}" == "" ]]; then
echo "Out file is empty."
exit 1
fi
# Files to take to average
mean_files=()
for ((j=0; j<8; j++)); do
d="$(date -d "${date_start} + $j day" "+${format}")"
if [[ "$(date -d "$d" +%Y)" -ne "$year" ]]; then
break
fi
file="${currentdir}/${prefix}${d}${suffix}"
if [[ ! -f "$file" ]]; then
echo "$file does not exist"
exit 1
fi
ncks -L4 --ppc default="$precision" -v "${keep_var}" "${file}" "${file}.reduced"
mean_files=("${mean_files[@]}" "${file}.reduced")
done
echo "$fileout"
echo "${mean_files[@]}"
nces ${mean_files[@]} "${fileout}.big"
for file in "${mean_files[@]}"; do
if [[ "$file" == *"${suffix}.reduced" ]]; then
echo "rm ${file}"
rm ${file}
fi
done
ncks -L4 --ppc default="$precision" "${fileout}.big" "${fileout}"
echo "rm ${fileout}.big"
rm "${fileout}.big"
done
"""Compute n-days mean of multiple datasets.
from os import path
from datetime import datetime
import logging
import numpy as np
import pandas as pd
Specify dataset using its `lib.data.<module name>`
import lib.data.modis
from lib import root_data, check_output_dir
Uses XArray. Yearly script.
"""
logging.basicConfig()
log = logging.getLogger(__name__)
log.setLevel(logging.INFO)
import importlib
from os import path
import lib
from lib.xarray_utils import save_chunks_by_date
region = 'GS'
year = 2007
fixes = {}
ndays = 8
def main(args):
mod = importlib.import_module('lib.data.{}'.format(args['dataset']))
def get_root(region, days, year):
root = path.join(root_data, region, 'MODIS',
'{:d}days'.format(days), str(year))
return root
args['fixes'] = dict(Y=args['year'])
ds = mod.get_data(args)
resample = ds.resample(time='{:d}D'.format(args['ndays']))
ds = resample.mean('time', keep_attrs=True)
finder = mod.get_finder(args, days=args['ndays'])
finder.fix_matchers(Y='%Y', m='%m', d='%d')
lib.check_output_dir(path.join(finder.root, '{:d}'.format(args['year'])))
filename = finder.get_filename()
odir = get_root(region, ndays, year)
check_output_dir(odir, log)
save_chunks_by_date(ds, filename,
encoding={'_all': {'zlib': True}})
ds = lib.data.modis.get_data(region=region, days=1, year=year, fixes=fixes)
return ds
dates = pd.date_range(datetime(year, 1, 1), datetime(year+1, 1, 1),
freq="{:d}D".format(ndays), closed='left')
encoding = {v: {'zlib': True, '_FillValue': -999.}
for v in ds.data_vars}
for v in ds.data_vars:
encoding[v]['dtype'] = ds[v].encoding['dtype']
for d in ['scale_factor', 'add_offset']:
if d in ds[v].encoding:
encoding[v][d] = ds[v].encoding[d]
if __name__ == '__main__':
for v in ds.data_vars:
ds[v] = ds[v].where(ds.qual_sst.fillna(0) < 2, np.nan)
def add_args(parser):
parser.add_argument('-dataset', type=str, default='ostia')
parser.add_argument('-ndays', type=int, default=8)
for d, dst in ds.groupby_bins('time', dates):
date_str = d.left.strftime('%Y%m%d')
ofile = path.join(odir, 'A_{:s}.nc'.format(d.left.strftime('%Y%m%d')))
dst = dst.mean(['time'], skipna=True)
dst = dst.assign_coords(time=pd.to_datetime(date_str))
dst.to_netcdf(ofile, encoding=encoding)
args = lib.get_args(['region', 'year', 'level'], add_args)
ds = main(args)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment