From 6e52178bb19a3f06f649daecfe0b31f74a33d672 Mon Sep 17 00:00:00 2001 From: Lionel GUEZ <guez@lmd.ens.fr> Date: Wed, 18 Aug 2021 18:17:15 +0200 Subject: [PATCH] Split `id_child.mat` Integrating work done by Mevludin Isic. We add the possibility to split `id_child` because it takes more than 2 GiB for the global domain and thus we cannot save the whole variable to Matlab v6 format. We split `id_child` along the time dimension. As the Matlab code for this is the same for anticylonic and cyclonic eddies, we extract it from the script `overlap.m` to a new script `id_child.m`. --- Convert_Matlab/id_child.m | 30 ++++++++++++++++++++++++++++++ Convert_Matlab/overlap.m | 23 +---------------------- Convert_Matlab/overlap_v6.py | 28 ++++++++++++++++++++++++---- 3 files changed, 55 insertions(+), 26 deletions(-) create mode 100644 Convert_Matlab/id_child.m diff --git a/Convert_Matlab/id_child.m b/Convert_Matlab/id_child.m new file mode 100644 index 00000000..c2c0b94e --- /dev/null +++ b/Convert_Matlab/id_child.m @@ -0,0 +1,30 @@ +% This is a Matlab script. This script converts to Matlab v6 format +% some output of TOEddies: what is necessary for the script +% overlap_v6.py. + +clear +disp('Loading Association_eddies_max...') +tic +load('Association_eddies_max.mat', 'id_child') +toc +disp('Saving id_child...') +n_dates = size(id_child, 2) +n_files = 10 +chunck = ceil(n_dates / n_files) +last_date = 0 +tic + +for i = 0:n_files - 2 + first_date = last_date + 1; + last_date = last_date + chunck + part_id_child = id_child(:, first_date:last_date); + destination = strcat('id_child_', int2str(i)); + save(destination, 'part_id_child', '-v6') +end + +first_date = last_date + 1; +part_id_child = id_child(:, first_date:end); +destination = strcat('id_child_', int2str(n_files - 1)); +save(destination, 'part_id_child', '-v6') +toc +exit diff --git a/Convert_Matlab/overlap.m b/Convert_Matlab/overlap.m index adf026de..6a4ba8e6 100644 --- a/Convert_Matlab/overlap.m +++ b/Convert_Matlab/overlap.m @@ -1,28 +1,7 @@ % This is a Matlab script. This script converts to Matlab v6 format -% some output of TOEddies: what is necessary for the script +% some output of TOEddies: part of what is necessary for the script % overlap_v6.py. -clear -disp('Loading Association_eddies_Anti_max...') -tic -load('Association_eddies_Anti_max.mat', 'id_child') -toc -disp('Saving id_child...') -tic -save('id_child_anti', 'id_child', '-v6') -toc - -clear -disp('Loading Association_eddies_Cyclo_max...') -tic -load('Association_eddies_Cyclo_max.mat', 'id_child') -toc -disp('Saving id_child...') -tic -save('id_child_cyclo', 'id_child', '-v6') -toc - -clear disp('Processing Parameters_Anticyclonic_Eddies...') load('Parameters_Anticyclonic_Eddies.mat', 'Nanti', 'date_num') N_eddies = Nanti; diff --git a/Convert_Matlab/overlap_v6.py b/Convert_Matlab/overlap_v6.py index c10ba2ba..ced9e185 100755 --- a/Convert_Matlab/overlap_v6.py +++ b/Convert_Matlab/overlap_v6.py @@ -1,8 +1,8 @@ #!/usr/bin/env python3 -"""This script reads the eddy graph from Matlab v6 files id_child.mat - and N_eddies.mat, for a given orientation and writes the graph to - a new file in edgelist format. +"""This script reads the eddy graph from Matlab v6 files + id_child_*.mat and N_eddies.mat, for a given orientation and + writes the graph to a new file in edgelist format. """ @@ -11,6 +11,7 @@ import csv import sys import json import datetime +import numpy as np def to_days_1950(date_num): """Convert Matlab serial date number to number of days since Jan 1st @@ -26,7 +27,26 @@ def adjust_n(n_Matlab): return n_Matlab + (k - k_Matlab) * e_overestim if __name__ == "__main__": - id_child = sio.loadmat("id_child.mat", squeeze_me=True)["id_child"] + n_files = input("Number of files = ? ") + n_files = int(n_files) + id_child_list = [] + + for i in range(n_files): + print(f"Loading id_child_{i}.mat...") + part_id_child \ + = sio.loadmat(f"id_child_{i}.mat", squeeze_me=True)["part_id_child"] + # part_id_child is a numpy array with shape (e_overestim, chunk) + # where chunk is lower than or equal to the number of dates in the + # original Matlab files. + + id_child_list.append(part_id_child) + + if n_files >= 2: + id_child = np.concatenate(id_child_list, axis = 1) + del id_child_list, part_id_child # can be a lot of memory + else: + id_child = part_id_child + n_eddies = sio.loadmat("N_eddies.mat", squeeze_me = True)["N_eddies"]\ .astype(int, casting = "safe", copy = False) date_num = sio.loadmat("date_num.mat", squeeze_me = True)["date_num"] -- GitLab