Skip to content
Snippets Groups Projects
  • Lionel GUEZ's avatar
    4689dd21
    Bug fix: take into account node indices · 4689dd21
    Lionel GUEZ authored
    Variables `id_anti_cell` and `id_cyclo_cell` contain node indices, not
    eddy indices. So we check that the first value is consistent with d,
    `d_init` and `e_overestim`. To decide whether we need to read
    `node_id_param.json` and filter ghost eddies, we need to look into the
    first Matlab file whether there is a variable `id_cyclo_cell`. So we
    modify the structure of the script a little: we process the first
    Matlab file before the time loop.
    4689dd21
    History
    Bug fix: take into account node indices
    Lionel GUEZ authored
    Variables `id_anti_cell` and `id_cyclo_cell` contain node indices, not
    eddy indices. So we check that the first value is consistent with d,
    `d_init` and `e_overestim`. To decide whether we need to read
    `node_id_param.json` and filter ghost eddies, we need to look into the
    first Matlab file whether there is a variable `id_cyclo_cell`. So we
    modify the structure of the script a little: we process the first
    Matlab file before the time loop.
inst_eddies_v6.py 9.28 KiB
#!/usr/bin/env python3

"""Converts adt_YYYY-MM-DD.mat files to two collections of shapefiles,
SHPC_anti and SHPC_cyclo. The mat files are assumed to be in v7.3
(HDF5) format and will be converted to v6 format before being read in
Python. The data in each input file is the set of detected
instantaneous eddies at a given date. The second, optional argument,
is the final date processed. The script inst_eddies.m must be in the
current directory at run-time.

"""

import shapefile
import numpy as np
from os import path
import scipy.io as sio
import datetime
import f90nml
import sys
import os
from dateutil import parser
import subprocess
from numpy import ma
import itertools
import json

def define_fields(writers):
    """writers is a dictionary of shapefile.Writer objects."""
    
    writers["extr"].field("ssh", "N", 13, 6)
    writers["extr"].field("days_1950", "N", 5)
    writers["extr"].field("eddy_index", "N", 5)
    writers["extr"].field("valid", "N", 1)
    writers["extr"].field("speed", "N", 13, 6)

    writers["centroid"].field("days_1950", "N", 5)
    writers["centroid"].field("eddy_index", "N", 5)

    writers["outer"].field("r_eq_area", "N", 10, 4)
    writers["outer"].field("ssh", "N", 13, 6)
    writers["outer"].field("days_1950", "N", 5)
    writers["outer"].field("eddy_index", "N", 5)

    writers["max_speed"].field("r_eq_area", "N", 10, 4)
    writers["max_speed"].field("ssh", "N", 13, 6)
    writers["max_speed"].field("days_1950", "N", 5)
    writers["max_speed"].field("eddy_index", "N", 5)

def write_to_shp(writers, cyclone, d, cell = None, X = None, Y = None,
                 ADT = None, cell_mask = None):
    """Write to shapefiles eddies with a given orientation, at a given
    date.

    writers is a dictionary. cyclone is a boolean. d is an int. cell
    is a numpy.ndarray with shape (number of eddies including ghosts,
    21). X and Y are numpy one-dimensional arrays containing
    longitudes and latitudes of the SSH grid. ADT is a numpy
    two-dimensional array containing SSH. cell_mask is a numpy boolean
    array with shape (number of eddies including ghosts,). The eddy
    identification numbers start at 1 and increment by 1 for each
    non-ghost eddy in cell.

    """
    if cell is not None:
        eddy_index = 0
        
        for eddy, ghost in zip(cell, cell_mask):
            if not ghost:
                # eddy is a numpy.ndarray with shape (21,). The description of
                # each element of eddy is given by matlab_data["Fields"] and
                # is repeated below in comments. Note that amplitudes in
                # eddy[7] and eddy[11] are positive regardless of eddy
                # orientation.
                eddy_index += 1

                writers["extr"].point(eddy[0], eddy[1]) # [XY]center
                i = np.argwhere(X == eddy[0]).item()
                j = np.argwhere(Y == eddy[1]).item()
                speed = 1e4 if np.isnan(eddy[12]) else eddy[12] # Azimuthal Vit
                writers["extr"].record(ssh = ADT[i, j], valid = 1,
                                       days_1950 = d, eddy_index = eddy_index,
                                       speed = speed)

                # Amplitude Out:
                if cyclone:
                    ssh = ADT[i, j] + eddy[7]
                else:
                    ssh =  ADT[i, j] - eddy[7]

                writers["centroid"].point(eddy[2], eddy[3]) # [XY]centroid
                writers["centroid"].record(days_1950 = d,
                                           eddy_index = eddy_index)

                writers["outer"].record(r_eq_area = eddy[6], ssh = ssh,
                                        days_1950 = d, eddy_index = eddy_index)
                # (eddy[6] is Equivalent Radius Out)

                polyline = np.stack((eddy[4], eddy[5]), axis = 1).tolist()
                # ([XY]contour Out)

                writers["outer"].poly([polyline])

                # Amplitude Vit:
                if cyclone:
                    ssh = ADT[i, j] + eddy[11]
                else:
                    ssh =  ADT[i, j] - eddy[11]

                writers["max_speed"].record(r_eq_area = eddy[10], ssh = ssh,
                                            days_1950 = d,
                                            eddy_index = eddy_index)
                # (eddy[10] is Equivalent Radius Vit)

                polyline = np.stack((eddy[8], eddy[9]), axis = 1).tolist()
                # ([XY]contour Vit)

                writers["max_speed"].poly([polyline])

    n_shapes = len(writers["extr"])
    writers["ishape_last"].write(str(n_shapes - 1) + "\n")

if len(sys.argv) == 1: sys.exit("Required argument: first input .mat file")
adt_file = sys.argv[1]
dirname, basename = path.split(adt_file)
valid_date_formats = ["adt_%Y-%m-%d.mat", "adt_%Y%m%d.mat"]

for date_format in valid_date_formats:
    try:
        my_date = datetime.datetime.strptime(basename, date_format).date()
    except ValueError:
        pass
    else:
        break

d = my_date - datetime.date(1950, 1, 1)
d = d.days

if len(sys.argv) == 3:
    final_date = parser.parse(sys.argv[2], yearfirst=True, dayfirst = False)\
                       .date()
else:
    final_date = my_date

# First date is special: we do not allow the file to be missing and we
# use it to determine whether we are going to filter ghost eddies.

print("Processing", my_date, "...")
os.symlink(adt_file, "adt.mat")
subprocess.run(["matlab", "-batch", "inst_eddies"], check = True)

matlab_data = sio.loadmat("adt_v6.mat", squeeze_me = True)
# (matlab_data is a dictionary.)

filter_ghosts = "id_cyclo_cell" in matlab_data

if filter_ghosts:
    with open("node_id_param.json") as f: node_id_param = json.load(f)
    n1 = (d - node_id_param["d_init"]) * node_id_param["e_overestim"] + 1
    # (same starting node id for both orientations)
# There are two factories, one for cyclones and one for anticyclones:
factories = [dict(writers = {}, cell_name = "Anticyclonic_Cell",
                  cyclone = False, SHPC = "SHPC_anti", id = "id_anti_cell"),
             dict(writers = {}, cell_name = "Cyclonic_Cell", cyclone = True,
                  SHPC = "SHPC_cyclo", id = "id_cyclo_cell")]

for factory in factories:
    fname = path.join(factory["SHPC"], "extremum")
    factory["writers"]["extr"] = shapefile.Writer(fname,
                                                  shapeType = shapefile.POINT)
    
    fname = path.join(factory["SHPC"], "centroid")
    factory["writers"]["centroid"] \
        = shapefile.Writer(fname, shapeType = shapefile.POINT)
    
    fname = path.join(factory["SHPC"], "outermost_contour")
    factory["writers"]["outer"] \
        = shapefile.Writer(fname, shapeType = shapefile.POLYGON)
    
    fname = path.join(factory["SHPC"], "max_speed_contour")
    factory["writers"]["max_speed"] \
        = shapefile.Writer(fname, shapeType = shapefile.POLYGON)
    
    define_fields(factory["writers"])
    fname = path.join(factory["SHPC"], "ishape_last.txt")
    factory["writers"]["ishape_last"] = open(fname, "w")
    

# Loop on dates:
while True:
    if os.access(adt_file, os.R_OK):
        os.remove("adt_v6.mat")
        assert datetime.date.fromordinal(matlab_data["date_num"] - 366) \
            == my_date

        for factory in factories:
            if filter_ghosts:
                id_array = ma.fix_invalid(matlab_data[factory["id"]])
                id_array = np.rint(id_array).astype(int)
                n_eddies = id_array.count() # excluding ghosts
                equal_range \
                    = id_array.compressed() == np.arange(n1, n1 + n_eddies)
                assert np.all(equal_range)
                cell_mask = id_array.mask
            else:
                cell_mask = itertools.repeat(False) # no ghost
                
            write_to_shp(factory["writers"], factory["cyclone"], d,
                         matlab_data[factory["cell_name"]], matlab_data["X"],
                         matlab_data["Y"], matlab_data["ADT"], cell_mask)

        os.remove("adt.mat")
    else:
        print("Missing file:", adt_file)
        
        for factory in factories:
            write_to_shp(factory["writers"], factory["cyclone"], d)
        
    my_date += datetime.timedelta(1)
    if my_date > final_date: break
    d += 1
    if filter_ghosts: n1 += node_id_param["e_overestim"]
    basename = my_date.strftime(date_format)
    adt_file = path.join(dirname, basename)
    
    if os.access(adt_file, os.R_OK):
        print("Processing", my_date, "...")
        os.symlink(adt_file, "adt.mat")
        subprocess.run(["matlab", "-batch", "inst_eddies"], check = True)
        matlab_data = sio.loadmat("adt_v6.mat", squeeze_me = True)

nml = f90nml.Namelist()
nml["grid_nml"] = dict(corner_deg = [matlab_data["X"][0], matlab_data["Y"][0]],
                       nlon = matlab_data["X"].size,
                       nlat = matlab_data["Y"].size,
                       step_deg = [matlab_data["X"][1] - matlab_data["X"][0],
                                   matlab_data["Y"][1] - matlab_data["Y"][0]])

for factory in factories:
    fname = path.join(factory["SHPC"], "grid_nml.txt")
    nml.write(fname, force = True)
    fname = path.join(factory["SHPC"], "orientation.txt")
    
    with open(fname, "w") as f:
        if factory["cyclone"]:
            f.write("cyclones\n")
        else:
            f.write("anticyclones\n")
            
    for key, v in factory["writers"].items(): v.close()