Commit c1171bc9 authored by TROUSSELLIER Laurent's avatar TROUSSELLIER Laurent
Browse files

debug : clim, details..

parent f3d193da
......@@ -69,10 +69,12 @@ class DRSParser(object):
# Vocabulary error handling.
except TemplateParsingError as e:
print(e)
return None
# Key error handling. Due to wrong number of facets in comparison with the path parts.
except KeyError as e:
print(e)
return None
def get_facets_from_filename(self, basename):
"""
......@@ -80,15 +82,13 @@ class DRSParser(object):
"""
# Initialize tstart & tend
tstart, tend = None, None
tstart, tend, clim = 'none', 'none', 'False'
# Check vocabulary.
try:
self.file_parser.parse(basename)
# Deserialize time range in date format.
timerange = TimeRange(basename.split('_')[-1])
tstart, tend = timerange.start, timerange.end
timerange = TimeRange( basename.split('_')[-1])
tstart, tend, clim = timerange.start, timerange.end, timerange.clim
# Vocabulary error handling.
except TemplateParsingError:
......@@ -96,24 +96,29 @@ class DRSParser(object):
# Try checking vocabulary with fixed variable template.
try:
self.fx_file_parser.parse(basename)
# No timerange.
tstart, tend = "none", "none"
tstart, tend, clim = "none", "none", "False"
# Vocabulary error handling.
except TemplateParsingError as e:
if 'is 8, expected 6' in str(e):
try:
return self.get_facets_from_filename(basename[::-1].replace("_","-",1)[::-1])
except:
print(e)
# Key error handling. Due to wrong number of facets in comparison with the filename parts.
except KeyError as e:
print(e)
# Key error handling. Due to wrong number of facets in comparison with the filename parts.
except KeyError as e:
print(e)
# # Key error handling. Due to wrong number of facets in comparison with the filename parts.
# except KeyError as e:
# print(e)
# Deserialize filename and add time range facets.
facets = dict(zip(self.file_keys[:-1], basename.split('_')[:-1]))
facets['period_start'] = tstart
facets['period_end'] = tend
facets['climatology'] = clim
return facets
......@@ -22,21 +22,22 @@ outcat = "OutputCatalog"
threads = 1
#entryBug = "/bdd/CMIP6/PAMIP/CNRM-CERFACS/CNRM-CM6-1/pdSST-pdSIC/.paths.txt"
# entryBug = "/bdd/CMIP6/CMIP/MIROC/MIROC-ES2L/abrupt-4xCO2/.paths.txt"
# # Simulation du traitement (INIT)
# ctx = Context(ARG(project,outcat,threads))
entryBug = "/bdd/CMIP6/PMIP/IPSL/IPSL-CM6A-LR/lig127k/.paths.txt"
# # Simulation du traitement (GO)
# p=Process(ctx)
# p(entryBug) # pour un fichier complet .paths.txt
# Simulation du traitement (INIT)
ctx = Context(ARG(project,outcat,threads))
# Simulation du traitement (GO)
p=Process(ctx)
p(entryBug) # pour un fichier complet .paths.txt
#Pour juste une ligne du fichier ...
drs = DRSParser(project)
# drs = DRSParser(project)
pathCompletDeLaLigneBug = Path("CMIP6/PAMIP/CNRM-CERFACS/CNRM-CM6-1/pdSST-pdSIC/r116i1p1f2/AppldayPlev/va/gr/v20210409/va_AppldayPlev_CNRM-CM6-1_pdSST-pdSIC_r116i1p1f2_gr_20000401-20010531.nc")
# pathCompletDeLaLigneBug = Path("CMIP6/PAMIP/CNRM-CERFACS/CNRM-CM6-1/pdSST-pdSIC/r116i1p1f2/AppldayPlev/va/gr/v20210409/va_AppldayPlev_CNRM-CM6-1_pdSST-pdSIC_r116i1p1f2_gr_20000401-20010531.nc")
pathCompletDeLaLigneBug="CMIP6/ScenarioMIP/EC-Earth-Consortium/EC-Earth3/ssp119/r102i1p1f1/Amon/pr/gr/v20200412/pr_Amon_EC-Earth3_ssp119_r102i1p1f1_gr_205901-205912.nc"
pathCompletDeLaLigneBug=Path(pathCompletDeLaLigneBug)
fa = drs.get_facets_from_path(pathCompletDeLaLigneBug)
print(fa)
\ No newline at end of file
# pathCompletDeLaLigneBug="CMIP6/ScenarioMIP/EC-Earth-Consortium/EC-Earth3/ssp119/r102i1p1f1/Amon/pr/gr/v20200412/pr_Amon_EC-Earth3_ssp119_r102i1p1f1_gr_205901-205912.nc"
# pathCompletDeLaLigneBug=Path(pathCompletDeLaLigneBug)
# fa = drs.get_facets_from_path(pathCompletDeLaLigneBug)
# print(fa)
\ No newline at end of file
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Jan 28 10:47:06 2022
@author: ltroussellier
"""
import time
from glob import iglob
path = "OutputCatalog/CMIP6"
start = time.time()
lastnb = len (list(iglob(path+'/*.csv')))
while(1):
input("Press Enter to check")
nbFile = len (list(iglob(path+'/*.csv')))
now = time.time()
mean = nbFile/(now-start)
print(lastnb-nbFile, "catalog crées depuis le dernier check")
lastnb =nbFile
print(mean, "en moyenne /s depuis le début")
......@@ -7,7 +7,8 @@ class TimeRange(object):
def __init__(self, timerange):
# Split time range into start and end digits.
self.start, self.end = map(self.iso_format, timerange.split('-'))
self.clim = str("-clim" in timerange)
self.start, self.end, = map(self.iso_format, timerange.split('-')[:2])
@staticmethod
def iso_format(timestamp):
......@@ -18,6 +19,7 @@ class TimeRange(object):
at January 1st and monthly dates starts at first day of the month.
"""
if len(timestamp) == 4:
# Start year at january 1st
timestamp = (timestamp + '0101').ljust(14, '0')
......
......@@ -33,7 +33,7 @@ class Process(object):
# Build CSV header.
self.header = ['path', 'project']
self.header.extend(drs.dir_keys)
self.header.extend(['period_start', 'period_end', 'latest'])
self.header.extend(['period_start', 'period_end','climatology', 'latest'])
# Set CSV entries list.
self.entries = list()
......@@ -50,30 +50,24 @@ class Process(object):
with open(source, 'r') as f:
for line in f:
# Split entry into full file path and latest boolean.
path, latest = map(str.strip, line.split())
# Convert path into pathlib.Path object.
path = Path(path)
# Get facets from path.
# Sets empty dict in the case of parsing error raised by DRSParser.
facets = drs.get_facets_from_path(path) or dict()
# Update facets from filename.
facets.update(drs.get_facets_from_filename(path.stem))
# If facet dict is empty, go to next line/path.
if not facets:
continue
# Build CSV entry.
entry_facets = [facets[i] for i in drs.dir_keys]
entry = [IPSL_DATA_ROOT + path.as_posix(), self.project]
entry.extend(entry_facets)
entry.extend([facets['period_start'], facets['period_end'], latest])
entry.extend([facets['period_start'], facets['period_end'],facets['climatology'], latest])
# Ensure header and entry have same length.
assert len(self.header) == len(entry)
......@@ -82,18 +76,17 @@ class Process(object):
catname = '_'.join(source.split('/')[2:-1])
catpath = os.path.join(self.outcat, self.project, catname)
# Remove existing catalog files.
try:
for ext in [CSV_EXTENSION, JSON_EXTENSION]:
os.remove(catpath + ext)
except OSError:
pass
# Write JSON catalog.
make_json(catpath + JSON_EXTENSION, self.project, self.header)
# Write CSV catalog.
make_csv(catpath + CSV_EXTENSION, self.header, self.entries)
except Exception as e:
......@@ -172,7 +165,7 @@ class Context(object):
drs = None
#drs = DRSParser("CMIP6")
def main():
"""
......@@ -200,18 +193,20 @@ def main():
with Pool(processes=args.processes) as pool:
# Instantiate pool iterator with progress bar.
processes = pool.imap(Process(ctx), get_sources(args.project))
processes = pool.imap(Process(ctx), get_sources(args.project),chunksize=4)
# Run processes in a dummy variable
_ = [x for x in processes]
# If processes is 1, use basic map function.
else:
# Instantiate processes iterator with progress bar.
processes = map(Process(ctx), get_sources(args.project))
# Run processes in a dummy variable.
# Run processes in a dummy variable
_ = [x for x in processes]
def get_sources(project):
"""
Returns an iterator over list of input sources following pattern of the project.
......
......@@ -12,7 +12,6 @@ from jinja2 import Template
from constants import CATALOG_DESCRIPTION_TEMPLATE
def make_csv(catpath, header, entries):
"""
Write ESM CSV catalog file.
......@@ -33,6 +32,7 @@ def make_json(catpath, project, header):
ESM JSON catalog templating based on Jinja2.
"""
# Get catalog name.
catname = os.path.basename(catpath)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment