Commit 31eb0f27 authored by Guillaume's avatar Guillaume
Browse files

clean up

parent ae0a7952
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import pyessv
from pyessv import TemplateParsingError
from glob import iglob
from pathlib import Path
from TimeRange import TimeRange
from vocabulary import VOCAB
def main():
# TODO: From CLI args.
#root = '/Users/glipsl/Documents/work/catalog/bdd/'
root = '/home/ltrousse/Bureau/ModCatAdmin/catalog/bdd'
project = 'CMIP6'
# Get DRS collections.
dir_drs = VOCAB[project]['directory_format']
file_drs = VOCAB[project]['filename_format']
# Add time range collection.
time_range = pyessv.create_collection(
pyessv.load('wcrp:{}'.format(project)),
"time_range",
description="Time Range",
term_regex=r'[0-9]+\-[0-9]+'
)
# Override version collection with "latest" pattern.
version = pyessv.create_collection(
pyessv.load('wcrp:{}'.format(project)),
"version",
description="Version",
term_regex=r'^v[0-9]{8}|latest$'
)
# DRS keys.
dir_keys = [pyessv.load(i).raw_name for i in dir_drs]
file_keys = [pyessv.load(i).raw_name for i in file_drs]
# Set path template for vocabulary check.
dir_template = os.path.join(project, '/'.join(['{}'] * len(dir_drs)))
dir_parser = pyessv.create_template_parser(dir_template, dir_drs, strictness=1, seperator='/')
# Set file template for vocabulary check for fixed frequency.
file_template = '_'.join(['{}'] * len(file_drs))
file_parser = pyessv.create_template_parser(file_template, file_drs, strictness=1, seperator='_')
# Set file template for vocabulary check.
file_template = '_'.join(['{}'] * (len(file_drs) - 1))
fx_file_parser = pyessv.create_template_parser(file_template, file_drs[:-1], strictness=1, seperator='_')
# Globbing pattern.
pattern = os.path.join(root, project, '**/' * len(dir_drs), '*.nc')
for path in iglob(pattern):
# Get Path object.
path = Path(path)
# Remove root directory.
p = path.relative_to(root)
# Initialize final dictionary of DRS facets.
facets = dict()
# Deserialize path.
try:
# Check vocabulary.
dir_parser.parse(p.parent.as_posix())
# Deserialize p.parent in dict excluding project.
facets = dict(zip(dir_keys, p.parent.parts[1:]))
# Vocabulary error handling.
except TemplateParsingError as e:
print(e)
# Key error handling. Due to wrong number of facets in comparison with the path parts.
except KeyError as e:
print(e)
# Deserialize filename.
try:
# Check vocabulary.
try:
file_parser.parse(p.stem)
# Deserialize time range in date format.
timerange = TimeRange(p.stem.split('_')[-1])
tstart, tend = timerange.start, timerange.end
# Vocabulary error handling.
except TemplateParsingError as e:
# Try checking vocabulary with fixed variable template.
try:
fx_file_parser.parse(p.stem)
# No timerange.
tstart, tend = None
# Vocabulary error handling.
except TemplateParsingError as e:
print(e)
# Key error handling. Due to wrong number of facets in comparison with the filename parts.
except KeyError as e:
print(e)
# Key error handling. Due to wrong number of facets in comparison with the filename parts.
except KeyError as e:
print(e)
# Deserialize p.name and update dict.
facets.update(dict(zip(file_keys[:-1], p.name.split('_')[:-1])))
facets['period_start'] = tstart
facets['period_end'] = tend
print(facets)
# Write in CSV.
#TODO
except:
pass
if __name__ == "__main__":
main()
\ No newline at end of file
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Sep 29 17:05:56 2021
@author: ltrousse
"""
import json # pour écrire le json du catalogue
import csv # pour écrire le csv du catalogue
class ESMCatFabric():
def __init__(self,name,header,data, path = None, description = None):
self.header = header
self.data = data
self.name = name
self.path = path
self.description = description
@classmethod
def from_esmCat(cls,catalog):
return cls(catalog.esmcol_data["id"],list(catalog.df.columns),catalog.df.values.tolist(),catalog.esmcol_path[:catalog.esmcol_path.rfind("/")],catalog.esmcol_data["description"])
def Add(self,data):
self.data.append(data)
self.Update()
def Remove(self,data):
self.data.remove(data)
self.Update()
def Update(self):
self.SaveCSV()
self.SaveJSON()
def CreateCat(self,pathToSave, description):
self.path = pathToSave
self.description = description
self.SaveCSV()
self.SaveJSON()
# ça fait presque la meme chose que catalog.serialize(name="testSerialize", catalog_type="file")
# sauf que ici on cree un CSV sans compression alors que dans esm_datastore il utilise la compression gzip (pourquoi pas, mais du coup on peut plus "voir" le fichier CSV)
def SaveCSV(self):
#self.pC = pCMIP6(self.fC.lFile[10])
#header = list(self.pC.p.keys())
#print(header)
#data=[]
#for file in self.fC.lFile:
#print(list( pCMIP6(file).p.values()))
#lToSave =
# ['None' if v is None else v for v in list( pCMIP6(file).p.values())]
# data.append(list( pCMIP6(file).p.values()))
f = open(self.path+"/"+self.name+".csv", 'w')
writer = csv.writer(f)
writer.writerow(self.header)
writer.writerows(self.data)
f.close()
def SaveJSON(self):
--> Template Jinja2
dicToSave={}
dicToSave["esmcat_version"]="0.1.0"
dicToSave["id"]=self.name
dicToSave["description"]= self.description
dicToSave["catalog_file"]=self.name+".csv"
attributeList=[]
#dicAttr1={}
#dicAttr1["column_name"]="activity_id"
#attributeList.append(dicAttr1)
dicToSave["attributes"]=[]
dicAsset={}
dicAsset["column_name"]= self.header
dicAsset["format"]="netcdf"
dicToSave["assets"]=dicAsset
#print(json.dumps(dicToSave))
with open(self.path+"/"+self.name+".json", 'w') as fp:
json.dump(dicToSave, fp, indent=4)
def __repr__(self):
res = ""
res+=str(self.header)+"\n"
res+=str(self.name)+"\n"
res+=str(self.path)+"\n"
res+=str(self.description)+"\n"
res+=str(self.data)+"\n"
return res
\ No newline at end of file
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Jan 24 09:29:29 2022
@author: ltroussellier
"""
import os
class temp():
CMIP6PathsPath = "/bdd/CMIP6/C4MIP/MOHC/.paths.txt"
CMIP6PathOutput = "FakePath.txt"
if os.path.exists(CMIP6PathOutput):
os.remove(CMIP6PathOutput)
i=0
with open(CMIP6PathsPath) as fi:
for path in fi:
i=i+1
if i>20000:
break
with open(CMIP6PathOutput,"a+") as fr:
fr.write(path)
def createFilePaths():
CMIP6PathsPath = "/bdd/CMIP6/C4MIP/MOHC/.paths.txt"
i=0
crtexp="lig127k"
crtlistpath=[]
nbfile = 1
with open(CMIP6PathsPath) as fi:
for path in fi:
i=i+1
if i>200000:
break
#print(path)
exp=path.split("/")[4]
#print(exp)
if crtexp!=exp:
#print(crtlistpath)
with open("InputFakeFile/fPath"+str(nbfile)+".txt","w") as fo:
for ele in crtlistpath:
fo.write(ele)
nbfile=nbfile+1
crtexp=exp
crtlistpath=[path]
else:
crtlistpath.append(path)
#print(crtexp)
createFilePaths()
\ No newline at end of file
This diff is collapsed.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Sep 29 17:11:56 2021
@author: ltrousse
"""
import os
class FileCatcher():
def __init__(self,path):
self.lFile = []
self.getAll(path)
def getAll(self,path):
alls = [f for f in os.scandir(path)]
for f in alls:
if f.is_file():
self.lFile.append(f.path)
else:
if f.is_dir():
self.getAll(f.path)
\ No newline at end of file
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Jan 27 15:03:29 2022
@author: ltroussellier
"""
from esm_cat_generator import Context,Process
from DRSParser import DRSParser
from pathlib import Path
class ARG(): # pour simuler les arguments
def __init__(self,p,o,t):
self.project =p
self.outcat = o
self.threads =t
#info pour reproduire l'error
project = "CMIP6"
outcat = "OutputCatalog"
threads = 1
#entryBug = "/bdd/CMIP6/PAMIP/CNRM-CERFACS/CNRM-CM6-1/pdSST-pdSIC/.paths.txt"
entryBug = "/bdd/CMIP6/PMIP/IPSL/IPSL-CM6A-LR/lig127k/.paths.txt"
# Simulation du traitement (INIT)
ctx = Context(ARG(project,outcat,threads))
# Simulation du traitement (GO)
p=Process(ctx)
p(entryBug) # pour un fichier complet .paths.txt
#Pour juste une ligne du fichier ...
# drs = DRSParser(project)
# pathCompletDeLaLigneBug = Path("CMIP6/PAMIP/CNRM-CERFACS/CNRM-CM6-1/pdSST-pdSIC/r116i1p1f2/AppldayPlev/va/gr/v20210409/va_AppldayPlev_CNRM-CM6-1_pdSST-pdSIC_r116i1p1f2_gr_20000401-20010531.nc")
# pathCompletDeLaLigneBug="CMIP6/ScenarioMIP/EC-Earth-Consortium/EC-Earth3/ssp119/r102i1p1f1/Amon/pr/gr/v20200412/pr_Amon_EC-Earth3_ssp119_r102i1p1f1_gr_205901-205912.nc"
# pathCompletDeLaLigneBug=Path(pathCompletDeLaLigneBug)
# fa = drs.get_facets_from_path(pathCompletDeLaLigneBug)
# print(fa)
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment