Commit c1171bc9 authored by TROUSSELLIER Laurent's avatar TROUSSELLIER Laurent
Browse files

debug : clim, details..

parent f3d193da
...@@ -69,10 +69,12 @@ class DRSParser(object): ...@@ -69,10 +69,12 @@ class DRSParser(object):
# Vocabulary error handling. # Vocabulary error handling.
except TemplateParsingError as e: except TemplateParsingError as e:
print(e) print(e)
return None
# Key error handling. Due to wrong number of facets in comparison with the path parts. # Key error handling. Due to wrong number of facets in comparison with the path parts.
except KeyError as e: except KeyError as e:
print(e) print(e)
return None
def get_facets_from_filename(self, basename): def get_facets_from_filename(self, basename):
""" """
...@@ -80,15 +82,13 @@ class DRSParser(object): ...@@ -80,15 +82,13 @@ class DRSParser(object):
""" """
# Initialize tstart & tend # Initialize tstart & tend
tstart, tend = None, None tstart, tend, clim = 'none', 'none', 'False'
# Check vocabulary. # Check vocabulary.
try: try:
self.file_parser.parse(basename) self.file_parser.parse(basename)
timerange = TimeRange( basename.split('_')[-1])
# Deserialize time range in date format. tstart, tend, clim = timerange.start, timerange.end, timerange.clim
timerange = TimeRange(basename.split('_')[-1])
tstart, tend = timerange.start, timerange.end
# Vocabulary error handling. # Vocabulary error handling.
except TemplateParsingError: except TemplateParsingError:
...@@ -96,24 +96,29 @@ class DRSParser(object): ...@@ -96,24 +96,29 @@ class DRSParser(object):
# Try checking vocabulary with fixed variable template. # Try checking vocabulary with fixed variable template.
try: try:
self.fx_file_parser.parse(basename) self.fx_file_parser.parse(basename)
# No timerange. # No timerange.
tstart, tend = "none", "none" tstart, tend, clim = "none", "none", "False"
# Vocabulary error handling. # Vocabulary error handling.
except TemplateParsingError as e: except TemplateParsingError as e:
if 'is 8, expected 6' in str(e):
try:
return self.get_facets_from_filename(basename[::-1].replace("_","-",1)[::-1])
except:
print(e)
print(e) print(e)
# Key error handling. Due to wrong number of facets in comparison with the filename parts. # Key error handling. Due to wrong number of facets in comparison with the filename parts.
except KeyError as e: except KeyError as e:
print(e) print(e)
# Key error handling. Due to wrong number of facets in comparison with the filename parts. # # Key error handling. Due to wrong number of facets in comparison with the filename parts.
except KeyError as e: # except KeyError as e:
print(e) # print(e)
# Deserialize filename and add time range facets. # Deserialize filename and add time range facets.
facets = dict(zip(self.file_keys[:-1], basename.split('_')[:-1])) facets = dict(zip(self.file_keys[:-1], basename.split('_')[:-1]))
facets['period_start'] = tstart facets['period_start'] = tstart
facets['period_end'] = tend facets['period_end'] = tend
facets['climatology'] = clim
return facets return facets
...@@ -22,21 +22,22 @@ outcat = "OutputCatalog" ...@@ -22,21 +22,22 @@ outcat = "OutputCatalog"
threads = 1 threads = 1
#entryBug = "/bdd/CMIP6/PAMIP/CNRM-CERFACS/CNRM-CM6-1/pdSST-pdSIC/.paths.txt" #entryBug = "/bdd/CMIP6/PAMIP/CNRM-CERFACS/CNRM-CM6-1/pdSST-pdSIC/.paths.txt"
# entryBug = "/bdd/CMIP6/CMIP/MIROC/MIROC-ES2L/abrupt-4xCO2/.paths.txt" entryBug = "/bdd/CMIP6/PMIP/IPSL/IPSL-CM6A-LR/lig127k/.paths.txt"
# # Simulation du traitement (INIT)
# ctx = Context(ARG(project,outcat,threads))
# # Simulation du traitement (GO) # Simulation du traitement (INIT)
# p=Process(ctx) ctx = Context(ARG(project,outcat,threads))
# p(entryBug) # pour un fichier complet .paths.txt
# Simulation du traitement (GO)
p=Process(ctx)
p(entryBug) # pour un fichier complet .paths.txt
#Pour juste une ligne du fichier ... #Pour juste une ligne du fichier ...
drs = DRSParser(project) # drs = DRSParser(project)
pathCompletDeLaLigneBug = Path("CMIP6/PAMIP/CNRM-CERFACS/CNRM-CM6-1/pdSST-pdSIC/r116i1p1f2/AppldayPlev/va/gr/v20210409/va_AppldayPlev_CNRM-CM6-1_pdSST-pdSIC_r116i1p1f2_gr_20000401-20010531.nc") # pathCompletDeLaLigneBug = Path("CMIP6/PAMIP/CNRM-CERFACS/CNRM-CM6-1/pdSST-pdSIC/r116i1p1f2/AppldayPlev/va/gr/v20210409/va_AppldayPlev_CNRM-CM6-1_pdSST-pdSIC_r116i1p1f2_gr_20000401-20010531.nc")
pathCompletDeLaLigneBug="CMIP6/ScenarioMIP/EC-Earth-Consortium/EC-Earth3/ssp119/r102i1p1f1/Amon/pr/gr/v20200412/pr_Amon_EC-Earth3_ssp119_r102i1p1f1_gr_205901-205912.nc" # pathCompletDeLaLigneBug="CMIP6/ScenarioMIP/EC-Earth-Consortium/EC-Earth3/ssp119/r102i1p1f1/Amon/pr/gr/v20200412/pr_Amon_EC-Earth3_ssp119_r102i1p1f1_gr_205901-205912.nc"
pathCompletDeLaLigneBug=Path(pathCompletDeLaLigneBug) # pathCompletDeLaLigneBug=Path(pathCompletDeLaLigneBug)
fa = drs.get_facets_from_path(pathCompletDeLaLigneBug) # fa = drs.get_facets_from_path(pathCompletDeLaLigneBug)
print(fa) # print(fa)
\ No newline at end of file \ No newline at end of file
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Jan 28 10:47:06 2022
@author: ltroussellier
"""
import time
from glob import iglob
path = "OutputCatalog/CMIP6"
start = time.time()
lastnb = len (list(iglob(path+'/*.csv')))
while(1):
input("Press Enter to check")
nbFile = len (list(iglob(path+'/*.csv')))
now = time.time()
mean = nbFile/(now-start)
print(lastnb-nbFile, "catalog crées depuis le dernier check")
lastnb =nbFile
print(mean, "en moyenne /s depuis le début")
...@@ -7,7 +7,8 @@ class TimeRange(object): ...@@ -7,7 +7,8 @@ class TimeRange(object):
def __init__(self, timerange): def __init__(self, timerange):
# Split time range into start and end digits. # Split time range into start and end digits.
self.start, self.end = map(self.iso_format, timerange.split('-')) self.clim = str("-clim" in timerange)
self.start, self.end, = map(self.iso_format, timerange.split('-')[:2])
@staticmethod @staticmethod
def iso_format(timestamp): def iso_format(timestamp):
...@@ -18,6 +19,7 @@ class TimeRange(object): ...@@ -18,6 +19,7 @@ class TimeRange(object):
at January 1st and monthly dates starts at first day of the month. at January 1st and monthly dates starts at first day of the month.
""" """
if len(timestamp) == 4: if len(timestamp) == 4:
# Start year at january 1st # Start year at january 1st
timestamp = (timestamp + '0101').ljust(14, '0') timestamp = (timestamp + '0101').ljust(14, '0')
......
...@@ -33,7 +33,7 @@ class Process(object): ...@@ -33,7 +33,7 @@ class Process(object):
# Build CSV header. # Build CSV header.
self.header = ['path', 'project'] self.header = ['path', 'project']
self.header.extend(drs.dir_keys) self.header.extend(drs.dir_keys)
self.header.extend(['period_start', 'period_end', 'latest']) self.header.extend(['period_start', 'period_end','climatology', 'latest'])
# Set CSV entries list. # Set CSV entries list.
self.entries = list() self.entries = list()
...@@ -50,50 +50,43 @@ class Process(object): ...@@ -50,50 +50,43 @@ class Process(object):
with open(source, 'r') as f: with open(source, 'r') as f:
for line in f: for line in f:
# Split entry into full file path and latest boolean. # Split entry into full file path and latest boolean.
path, latest = map(str.strip, line.split()) path, latest = map(str.strip, line.split())
# Convert path into pathlib.Path object. # Convert path into pathlib.Path object.
path = Path(path) path = Path(path)
# Get facets from path. # Get facets from path.
# Sets empty dict in the case of parsing error raised by DRSParser. # Sets empty dict in the case of parsing error raised by DRSParser.
facets = drs.get_facets_from_path(path) or dict() facets = drs.get_facets_from_path(path) or dict()
# Update facets from filename. # Update facets from filename.
facets.update(drs.get_facets_from_filename(path.stem)) facets.update(drs.get_facets_from_filename(path.stem))
# If facet dict is empty, go to next line/path. # If facet dict is empty, go to next line/path.
if not facets: if not facets:
continue continue
# Build CSV entry. # Build CSV entry.
entry_facets = [facets[i] for i in drs.dir_keys] entry_facets = [facets[i] for i in drs.dir_keys]
entry = [IPSL_DATA_ROOT + path.as_posix(), self.project] entry = [IPSL_DATA_ROOT + path.as_posix(), self.project]
entry.extend(entry_facets) entry.extend(entry_facets)
entry.extend([facets['period_start'], facets['period_end'], latest]) entry.extend([facets['period_start'], facets['period_end'],facets['climatology'], latest])
# Ensure header and entry have same length. # Ensure header and entry have same length.
assert len(self.header) == len(entry) assert len(self.header) == len(entry)
# Append entry to final list. # Append entry to final list.
self.entries.append(entry) self.entries.append(entry)
catname = '_'.join(source.split('/')[2:-1]) catname = '_'.join(source.split('/')[2:-1])
catpath = os.path.join(self.outcat, self.project, catname) catpath = os.path.join(self.outcat, self.project, catname)
# Remove existing catalog files. # Remove existing catalog files.
try: try:
for ext in [CSV_EXTENSION, JSON_EXTENSION]: for ext in [CSV_EXTENSION, JSON_EXTENSION]:
os.remove(catpath + ext) os.remove(catpath + ext)
except OSError: except OSError:
pass pass
# Write JSON catalog. # Write JSON catalog.
make_json(catpath + JSON_EXTENSION, self.project, self.header) make_json(catpath + JSON_EXTENSION, self.project, self.header)
# Write CSV catalog. # Write CSV catalog.
make_csv(catpath + CSV_EXTENSION, self.header, self.entries) make_csv(catpath + CSV_EXTENSION, self.header, self.entries)
except Exception as e: except Exception as e:
...@@ -172,7 +165,7 @@ class Context(object): ...@@ -172,7 +165,7 @@ class Context(object):
drs = None drs = None
#drs = DRSParser("CMIP6")
def main(): def main():
""" """
...@@ -200,16 +193,18 @@ def main(): ...@@ -200,16 +193,18 @@ def main():
with Pool(processes=args.processes) as pool: with Pool(processes=args.processes) as pool:
# Instantiate pool iterator with progress bar. # Instantiate pool iterator with progress bar.
processes = pool.imap(Process(ctx), get_sources(args.project)) processes = pool.imap(Process(ctx), get_sources(args.project),chunksize=4)
# Run processes in a dummy variable
_ = [x for x in processes]
# If processes is 1, use basic map function. # If processes is 1, use basic map function.
else: else:
# Instantiate processes iterator with progress bar. # Instantiate processes iterator with progress bar.
processes = map(Process(ctx), get_sources(args.project)) processes = map(Process(ctx), get_sources(args.project))
# Run processes in a dummy variable
# Run processes in a dummy variable. _ = [x for x in processes]
_ = [x for x in processes]
def get_sources(project): def get_sources(project):
......
...@@ -12,7 +12,6 @@ from jinja2 import Template ...@@ -12,7 +12,6 @@ from jinja2 import Template
from constants import CATALOG_DESCRIPTION_TEMPLATE from constants import CATALOG_DESCRIPTION_TEMPLATE
def make_csv(catpath, header, entries): def make_csv(catpath, header, entries):
""" """
Write ESM CSV catalog file. Write ESM CSV catalog file.
...@@ -21,7 +20,7 @@ def make_csv(catpath, header, entries): ...@@ -21,7 +20,7 @@ def make_csv(catpath, header, entries):
# Create directory if not exists. # Create directory if not exists.
if not os.path.exists(os.path.dirname(catpath)): if not os.path.exists(os.path.dirname(catpath)):
os.makedirs(os.path.dirname(catpath)) os.makedirs(os.path.dirname(catpath))
with open(catpath, 'w+') as f: with open(catpath, 'w+') as f:
f.write(','.join(header) + '\n') f.write(','.join(header) + '\n')
for line in entries: for line in entries:
...@@ -33,6 +32,7 @@ def make_json(catpath, project, header): ...@@ -33,6 +32,7 @@ def make_json(catpath, project, header):
ESM JSON catalog templating based on Jinja2. ESM JSON catalog templating based on Jinja2.
""" """
# Get catalog name. # Get catalog name.
catname = os.path.basename(catpath) catname = os.path.basename(catpath)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment