-
Lionel GUEZ authored
Variables `id_anti_cell` and `id_cyclo_cell` contain node indices, not eddy indices. So we check that the first value is consistent with d, `d_init` and `e_overestim`. To decide whether we need to read `node_id_param.json` and filter ghost eddies, we need to look into the first Matlab file whether there is a variable `id_cyclo_cell`. So we modify the structure of the script a little: we process the first Matlab file before the time loop.
Lionel GUEZ authoredVariables `id_anti_cell` and `id_cyclo_cell` contain node indices, not eddy indices. So we check that the first value is consistent with d, `d_init` and `e_overestim`. To decide whether we need to read `node_id_param.json` and filter ghost eddies, we need to look into the first Matlab file whether there is a variable `id_cyclo_cell`. So we modify the structure of the script a little: we process the first Matlab file before the time loop.
inst_eddies_v6.py 9.28 KiB
#!/usr/bin/env python3
"""Converts adt_YYYY-MM-DD.mat files to two collections of shapefiles,
SHPC_anti and SHPC_cyclo. The mat files are assumed to be in v7.3
(HDF5) format and will be converted to v6 format before being read in
Python. The data in each input file is the set of detected
instantaneous eddies at a given date. The second, optional argument,
is the final date processed. The script inst_eddies.m must be in the
current directory at run-time.
"""
import shapefile
import numpy as np
from os import path
import scipy.io as sio
import datetime
import f90nml
import sys
import os
from dateutil import parser
import subprocess
from numpy import ma
import itertools
import json
def define_fields(writers):
"""writers is a dictionary of shapefile.Writer objects."""
writers["extr"].field("ssh", "N", 13, 6)
writers["extr"].field("days_1950", "N", 5)
writers["extr"].field("eddy_index", "N", 5)
writers["extr"].field("valid", "N", 1)
writers["extr"].field("speed", "N", 13, 6)
writers["centroid"].field("days_1950", "N", 5)
writers["centroid"].field("eddy_index", "N", 5)
writers["outer"].field("r_eq_area", "N", 10, 4)
writers["outer"].field("ssh", "N", 13, 6)
writers["outer"].field("days_1950", "N", 5)
writers["outer"].field("eddy_index", "N", 5)
writers["max_speed"].field("r_eq_area", "N", 10, 4)
writers["max_speed"].field("ssh", "N", 13, 6)
writers["max_speed"].field("days_1950", "N", 5)
writers["max_speed"].field("eddy_index", "N", 5)
def write_to_shp(writers, cyclone, d, cell = None, X = None, Y = None,
ADT = None, cell_mask = None):
"""Write to shapefiles eddies with a given orientation, at a given
date.
writers is a dictionary. cyclone is a boolean. d is an int. cell
is a numpy.ndarray with shape (number of eddies including ghosts,
21). X and Y are numpy one-dimensional arrays containing
longitudes and latitudes of the SSH grid. ADT is a numpy
two-dimensional array containing SSH. cell_mask is a numpy boolean
array with shape (number of eddies including ghosts,). The eddy
identification numbers start at 1 and increment by 1 for each
non-ghost eddy in cell.
"""
if cell is not None:
eddy_index = 0
for eddy, ghost in zip(cell, cell_mask):
if not ghost:
# eddy is a numpy.ndarray with shape (21,). The description of
# each element of eddy is given by matlab_data["Fields"] and
# is repeated below in comments. Note that amplitudes in
# eddy[7] and eddy[11] are positive regardless of eddy
# orientation.
eddy_index += 1
writers["extr"].point(eddy[0], eddy[1]) # [XY]center
i = np.argwhere(X == eddy[0]).item()
j = np.argwhere(Y == eddy[1]).item()
speed = 1e4 if np.isnan(eddy[12]) else eddy[12] # Azimuthal Vit
writers["extr"].record(ssh = ADT[i, j], valid = 1,
days_1950 = d, eddy_index = eddy_index,
speed = speed)
# Amplitude Out:
if cyclone:
ssh = ADT[i, j] + eddy[7]
else:
ssh = ADT[i, j] - eddy[7]
writers["centroid"].point(eddy[2], eddy[3]) # [XY]centroid
writers["centroid"].record(days_1950 = d,
eddy_index = eddy_index)
writers["outer"].record(r_eq_area = eddy[6], ssh = ssh,
days_1950 = d, eddy_index = eddy_index)
# (eddy[6] is Equivalent Radius Out)
polyline = np.stack((eddy[4], eddy[5]), axis = 1).tolist()
# ([XY]contour Out)
writers["outer"].poly([polyline])
# Amplitude Vit:
if cyclone:
ssh = ADT[i, j] + eddy[11]
else:
ssh = ADT[i, j] - eddy[11]
writers["max_speed"].record(r_eq_area = eddy[10], ssh = ssh,
days_1950 = d,
eddy_index = eddy_index)
# (eddy[10] is Equivalent Radius Vit)
polyline = np.stack((eddy[8], eddy[9]), axis = 1).tolist()
# ([XY]contour Vit)
writers["max_speed"].poly([polyline])
n_shapes = len(writers["extr"])
writers["ishape_last"].write(str(n_shapes - 1) + "\n")
if len(sys.argv) == 1: sys.exit("Required argument: first input .mat file")
adt_file = sys.argv[1]
dirname, basename = path.split(adt_file)
valid_date_formats = ["adt_%Y-%m-%d.mat", "adt_%Y%m%d.mat"]
for date_format in valid_date_formats:
try:
my_date = datetime.datetime.strptime(basename, date_format).date()
except ValueError:
pass
else:
break
d = my_date - datetime.date(1950, 1, 1)
d = d.days
if len(sys.argv) == 3:
final_date = parser.parse(sys.argv[2], yearfirst=True, dayfirst = False)\
.date()
else:
final_date = my_date
# First date is special: we do not allow the file to be missing and we
# use it to determine whether we are going to filter ghost eddies.
print("Processing", my_date, "...")
os.symlink(adt_file, "adt.mat")
subprocess.run(["matlab", "-batch", "inst_eddies"], check = True)
matlab_data = sio.loadmat("adt_v6.mat", squeeze_me = True)
# (matlab_data is a dictionary.)
filter_ghosts = "id_cyclo_cell" in matlab_data
if filter_ghosts:
with open("node_id_param.json") as f: node_id_param = json.load(f)
n1 = (d - node_id_param["d_init"]) * node_id_param["e_overestim"] + 1
# (same starting node id for both orientations)
# There are two factories, one for cyclones and one for anticyclones:
factories = [dict(writers = {}, cell_name = "Anticyclonic_Cell",
cyclone = False, SHPC = "SHPC_anti", id = "id_anti_cell"),
dict(writers = {}, cell_name = "Cyclonic_Cell", cyclone = True,
SHPC = "SHPC_cyclo", id = "id_cyclo_cell")]
for factory in factories:
fname = path.join(factory["SHPC"], "extremum")
factory["writers"]["extr"] = shapefile.Writer(fname,
shapeType = shapefile.POINT)
fname = path.join(factory["SHPC"], "centroid")
factory["writers"]["centroid"] \
= shapefile.Writer(fname, shapeType = shapefile.POINT)
fname = path.join(factory["SHPC"], "outermost_contour")
factory["writers"]["outer"] \
= shapefile.Writer(fname, shapeType = shapefile.POLYGON)
fname = path.join(factory["SHPC"], "max_speed_contour")
factory["writers"]["max_speed"] \
= shapefile.Writer(fname, shapeType = shapefile.POLYGON)
define_fields(factory["writers"])
fname = path.join(factory["SHPC"], "ishape_last.txt")
factory["writers"]["ishape_last"] = open(fname, "w")
# Loop on dates:
while True:
if os.access(adt_file, os.R_OK):
os.remove("adt_v6.mat")
assert datetime.date.fromordinal(matlab_data["date_num"] - 366) \
== my_date
for factory in factories:
if filter_ghosts:
id_array = ma.fix_invalid(matlab_data[factory["id"]])
id_array = np.rint(id_array).astype(int)
n_eddies = id_array.count() # excluding ghosts
equal_range \
= id_array.compressed() == np.arange(n1, n1 + n_eddies)
assert np.all(equal_range)
cell_mask = id_array.mask
else:
cell_mask = itertools.repeat(False) # no ghost
write_to_shp(factory["writers"], factory["cyclone"], d,
matlab_data[factory["cell_name"]], matlab_data["X"],
matlab_data["Y"], matlab_data["ADT"], cell_mask)
os.remove("adt.mat")
else:
print("Missing file:", adt_file)
for factory in factories:
write_to_shp(factory["writers"], factory["cyclone"], d)
my_date += datetime.timedelta(1)
if my_date > final_date: break
d += 1
if filter_ghosts: n1 += node_id_param["e_overestim"]
basename = my_date.strftime(date_format)
adt_file = path.join(dirname, basename)
if os.access(adt_file, os.R_OK):
print("Processing", my_date, "...")
os.symlink(adt_file, "adt.mat")
subprocess.run(["matlab", "-batch", "inst_eddies"], check = True)
matlab_data = sio.loadmat("adt_v6.mat", squeeze_me = True)
nml = f90nml.Namelist()
nml["grid_nml"] = dict(corner_deg = [matlab_data["X"][0], matlab_data["Y"][0]],
nlon = matlab_data["X"].size,
nlat = matlab_data["Y"].size,
step_deg = [matlab_data["X"][1] - matlab_data["X"][0],
matlab_data["Y"][1] - matlab_data["Y"][0]])
for factory in factories:
fname = path.join(factory["SHPC"], "grid_nml.txt")
nml.write(fname, force = True)
fname = path.join(factory["SHPC"], "orientation.txt")
with open(fname, "w") as f:
if factory["cyclone"]:
f.write("cyclones\n")
else:
f.write("anticyclones\n")
for key, v in factory["writers"].items(): v.close()