Commit 0fc00d1f authored by Guillaume's avatar Guillaume
Browse files

improve get_facets_from_filename

parents 51932e28 c1171bc9
......@@ -12,7 +12,7 @@ import pyessv
from pyessv import TemplateParsingError
from TimeRange import TimeRange
from constants import VOCAB
from constants import VOCAB, FILENAME_TABLE_ID
class DRSParser(object):
......@@ -35,6 +35,9 @@ class DRSParser(object):
self.dir_drs = VOCAB[project]['directory_format']
self.file_drs = VOCAB[project]['filename_format']
# Get fixed tables list.
self.tables = [i.raw_name for i in pyessv.load(FILENAME_TABLE_ID[project]).terms if 'fx' in i.raw_name]
# DRS keys.
self.dir_keys = [pyessv.load(i).raw_name for i in self.dir_drs]
self.file_keys = [pyessv.load(i).raw_name for i in self.file_drs]
......@@ -71,8 +74,8 @@ class DRSParser(object):
print(e)
return None
# Key error handling. Due to wrong number of facets in comparison with the path parts.
except KeyError as e:
# Catch any other exception.
except Exception as e:
print(e)
return None
......@@ -82,42 +85,39 @@ class DRSParser(object):
"""
# Initialize tstart & tend
tstart, tend = None, None
tstart, tend, clim = 'none', 'none', 'False'
# Set clim to True and rename basename to match usual template in case of climatology file.
if basename.endswith('-clim.nc'):
basename, clim = basename.replace('-clim', ''), 'True'
# Check vocabulary.
try:
self.file_parser.parse(basename)
# Deserialize time range in date format.
timerange = TimeRange(basename.split('_')[-1])
terms = self.file_parser.parse(basename)
timerange = TimeRange(terms[[term.collection.raw_name for term in terms].index('time_range')])
tstart, tend = timerange.start, timerange.end
# Vocabulary error handling.
# Parsing error handling.
except TemplateParsingError:
# Try checking vocabulary with fixed variable template.
# Try parsing with "fixed" template.
try:
self.fx_file_parser.parse(basename)
# No timerange.
tstart, tend = "none", "none"
# Set no timerange and no climatology.
tstart, tend, clim = 'none', 'none', 'False'
# Vocabulary error handling.
except TemplateParsingError as e:
# Catch any other exception.
except Exception as e:
print(e)
# Key error handling. Due to wrong number of facets in comparison with the filename parts.
except KeyError as e:
print(e)
return None
# Key error handling. Due to wrong number of facets in comparison with the filename parts.
except KeyError as e:
# Catch any other exception.
except Exception as e:
print(e)
return None
# Deserialize filename and add time range facets.
facets = dict(zip(self.file_keys[:-1], basename.split('_')[:-1]))
facets['period_start'] = tstart
facets['period_end'] = tend
facets['climatology'] = clim
return facets
......@@ -22,21 +22,22 @@ outcat = "OutputCatalog"
threads = 1
#entryBug = "/bdd/CMIP6/PAMIP/CNRM-CERFACS/CNRM-CM6-1/pdSST-pdSIC/.paths.txt"
# entryBug = "/bdd/CMIP6/CMIP/MIROC/MIROC-ES2L/abrupt-4xCO2/.paths.txt"
# # Simulation du traitement (INIT)
# ctx = Context(ARG(project,outcat,threads))
entryBug = "/bdd/CMIP6/PMIP/IPSL/IPSL-CM6A-LR/lig127k/.paths.txt"
# # Simulation du traitement (GO)
# p=Process(ctx)
# p(entryBug) # pour un fichier complet .paths.txt
# Simulation du traitement (INIT)
ctx = Context(ARG(project,outcat,threads))
# Simulation du traitement (GO)
p=Process(ctx)
p(entryBug) # pour un fichier complet .paths.txt
#Pour juste une ligne du fichier ...
drs = DRSParser(project)
# drs = DRSParser(project)
pathCompletDeLaLigneBug = Path("CMIP6/PAMIP/CNRM-CERFACS/CNRM-CM6-1/pdSST-pdSIC/r116i1p1f2/AppldayPlev/va/gr/v20210409/va_AppldayPlev_CNRM-CM6-1_pdSST-pdSIC_r116i1p1f2_gr_20000401-20010531.nc")
# pathCompletDeLaLigneBug = Path("CMIP6/PAMIP/CNRM-CERFACS/CNRM-CM6-1/pdSST-pdSIC/r116i1p1f2/AppldayPlev/va/gr/v20210409/va_AppldayPlev_CNRM-CM6-1_pdSST-pdSIC_r116i1p1f2_gr_20000401-20010531.nc")
pathCompletDeLaLigneBug="CMIP6/ScenarioMIP/EC-Earth-Consortium/EC-Earth3/ssp119/r102i1p1f1/Amon/pr/gr/v20200412/pr_Amon_EC-Earth3_ssp119_r102i1p1f1_gr_205901-205912.nc"
pathCompletDeLaLigneBug=Path(pathCompletDeLaLigneBug)
fa = drs.get_facets_from_path(pathCompletDeLaLigneBug)
print(fa)
\ No newline at end of file
# pathCompletDeLaLigneBug="CMIP6/ScenarioMIP/EC-Earth-Consortium/EC-Earth3/ssp119/r102i1p1f1/Amon/pr/gr/v20200412/pr_Amon_EC-Earth3_ssp119_r102i1p1f1_gr_205901-205912.nc"
# pathCompletDeLaLigneBug=Path(pathCompletDeLaLigneBug)
# fa = drs.get_facets_from_path(pathCompletDeLaLigneBug)
# print(fa)
\ No newline at end of file
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Jan 28 10:47:06 2022
@author: ltroussellier
"""
import time
from glob import iglob
path = "OutputCatalog/CMIP6"
start = time.time()
lastnb = len (list(iglob(path+'/*.csv')))
while(1):
input("Press Enter to check")
nbFile = len (list(iglob(path+'/*.csv')))
now = time.time()
mean = nbFile/(now-start)
print(lastnb-nbFile, "catalog crées depuis le dernier check")
lastnb =nbFile
print(mean, "en moyenne /s depuis le début")
......@@ -7,7 +7,7 @@ class TimeRange(object):
def __init__(self, timerange):
# Split time range into start and end digits.
self.start, self.end = map(self.iso_format, timerange.split('-'))
self.start, self.end, = map(self.iso_format, timerange.split('-')[:2])
@staticmethod
def iso_format(timestamp):
......@@ -18,6 +18,7 @@ class TimeRange(object):
at January 1st and monthly dates starts at first day of the month.
"""
if len(timestamp) == 4:
# Start year at january 1st
timestamp = (timestamp + '0101').ljust(14, '0')
......
......@@ -19,6 +19,10 @@ JSON_EXTENSION = ".json"
CATALOG_DESCRIPTION_TEMPLATE = "CLIMERI-France {} data catalog."
FILENAME_TABLE_ID = {'CMIP6': 'wcrp:cmip6:table_id',
'CMIP5': 'wcrp:cmip5:cmor_table',
'CORDEX': 'wcrp:cordex:time_frequency'}
VOCAB = {
'CMIP6': {
'directory_format': (
......
#!bin/python
# -*- coding: utf-8 -*-
import os
from argparse import ArgumentParser
from glob import iglob
from multiprocessing import Pool
......@@ -33,7 +32,10 @@ class Process(object):
# Build CSV header.
self.header = ['path', 'project']
self.header.extend(drs.dir_keys)
self.header.extend(['period_start', 'period_end', 'latest'])
self.header.extend(['period_start',
'period_end',
'climatology',
'latest'])
# Set CSV entries list.
self.entries = list()
......@@ -72,7 +74,10 @@ class Process(object):
entry_facets = [facets[i] for i in drs.dir_keys]
entry = [IPSL_DATA_ROOT + path.as_posix(), self.project]
entry.extend(entry_facets)
entry.extend([facets['period_start'], facets['period_end'], latest])
entry.extend([facets['period_start'],
facets['period_end'],
facets['climatology'],
latest])
# Ensure header and entry have same length.
assert len(self.header) == len(entry)
......@@ -109,7 +114,7 @@ def get_args():
# Argument parser.
parser = ArgumentParser(
prog='mkesmcat',
description='Generates "intake-esm" catalog (JSON + CSV) from CLIMERI-France climate data archives.',
description='Generates "intake-esm" catalog (JSON + CSV) from ClimERI-France climate data archives.',
add_help=True
)
......@@ -200,7 +205,10 @@ def main():
with Pool(processes=args.processes) as pool:
# Instantiate pool iterator with progress bar.
processes = pool.imap(Process(ctx), get_sources(args.project))
processes = pool.imap(Process(ctx), get_sources(args.project), chunksize=4)
# Run processes in a dummy variable
_ = [x for x in processes]
# If processes is 1, use basic map function.
else:
......@@ -208,8 +216,8 @@ def main():
# Instantiate processes iterator with progress bar.
processes = map(Process(ctx), get_sources(args.project))
# Run processes in a dummy variable.
_ = [x for x in processes]
# Run processes in a dummy variable
_ = [x for x in processes]
def get_sources(project):
......
......@@ -21,7 +21,7 @@ def make_csv(catpath, header, entries):
# Create directory if not exists.
if not os.path.exists(os.path.dirname(catpath)):
os.makedirs(os.path.dirname(catpath))
with open(catpath, 'w+') as f:
f.write(','.join(header) + '\n')
for line in entries:
......@@ -33,6 +33,7 @@ def make_json(catpath, project, header):
ESM JSON catalog templating based on Jinja2.
"""
# Get catalog name.
catname = os.path.basename(catpath)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment