Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
IPSL
E
ESPRI
ESPRI-Mod
catalog
Commits
ea909ff9
Commit
ea909ff9
authored
Feb 02, 2022
by
Guillaume
Browse files
Bugfixes
parent
fd178ba6
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
78 additions
and
75 deletions
+78
-75
DRSParser.py
DRSParser.py
+56
-49
esm_cat_generator.py
esm_cat_generator.py
+3
-8
utils.py
utils.py
+19
-18
No files found.
DRSParser.py
View file @
ea909ff9
...
...
@@ -22,7 +22,6 @@ class DRSParser(object):
"""
def
__init__
(
self
,
project
):
# Add time range collection.
pyessv
.
create_collection
(
pyessv
.
load
(
'wcrp:{}'
.
format
(
project
)),
...
...
@@ -30,6 +29,12 @@ class DRSParser(object):
description
=
"Time Range"
,
term_regex
=
r
'[0-9]+\-[0-9]+'
)
pyessv
.
create_term
(
pyessv
.
load
(
'wcrp:cmip5:product'
),
name
=
'output'
,
description
=
'output'
,
label
=
'output'
)
# Get DRS collections.
self
.
dir_drs
=
VOCAB
[
project
][
'directory_format'
]
...
...
@@ -55,69 +60,71 @@ class DRSParser(object):
self
.
fx_file_parser
=
pyessv
.
create_template_parser
(
file_template
,
self
.
file_drs
[:
-
1
],
strictness
=
1
,
seperator
=
'_'
)
def
get_facets_from_path
(
self
,
path
):
"""
Deserialize pathlib.Path object against a DRS.
"""
# Check vocabulary.
try
:
self
.
dir_parser
.
parse
(
path
.
parent
.
as_posix
())
def
get_facets_from_path
(
self
,
path
):
"""
Deserialize pathlib.Path object against a DRS.
# Deserialize p.parent in dict excluding project.
facets
=
dict
((
zip
(
self
.
dir_keys
,
path
.
parent
.
parts
[
1
:])))
"""
# Check vocabulary.
try
:
self
.
dir_parser
.
parse
(
path
.
parent
.
as_posix
())
return
facets
# Deserialize p.parent in dict excluding project.
facets
=
dict
((
zip
(
self
.
dir_keys
,
path
.
parent
.
parts
[
1
:])))
# Vocabulary error handling.
except
TemplateParsingError
as
e
:
print
(
e
)
return
None
return
facets
#
Catch any other exception
.
except
Exception
as
e
:
print
(
e
)
return
None
#
Vocabulary error handling
.
except
TemplateParsingError
as
e
:
print
(
e
)
return
None
def
get_facets_from_filename
(
self
,
basename
):
"""
Deserialize a filename string against a DRS.
# Catch any other exception.
except
Exception
as
e
:
print
(
e
)
return
None
"""
# Initialize tstart & tend
tstart
,
tend
,
clim
=
''
,
''
,
'False'
# Set clim to True and rename basename to match usual template in case of climatology file.
if
basename
.
endswith
(
'-clim.nc'
):
basename
,
clim
=
basename
.
replace
(
'-clim'
,
''
),
'True'
def
get_facets_from_filename
(
self
,
basename
):
"""
Deserialize a filename string against a DRS.
# Check vocabulary.
try
:
terms
=
self
.
file_parser
.
parse
(
basename
)
timerange
=
TimeRange
(
basename
.
split
(
'_'
)[
-
1
])
tstart
,
tend
=
timerange
.
start
,
timerange
.
end
"""
# Initialize tstart & tend
tstart
,
tend
,
clim
=
''
,
''
,
'False'
# Parsing error handling.
except
TemplateParsingError
:
# Set clim to True and rename basename to match usual template in case of climatology file.
if
basename
.
endswith
(
'-clim.nc'
):
basename
,
clim
=
basename
.
replace
(
'-clim'
,
''
),
'True'
# Try parsing with "fixed" template.
try
:
self
.
fx_file_parser
.
parse
(
basename
)
# Check vocabulary.
try
:
terms
=
self
.
file_parser
.
parse
(
basename
)
timerange
=
TimeRange
(
basename
.
split
(
'_'
)[
-
1
])
tstart
,
tend
=
timerange
.
start
,
timerange
.
end
# Set no timerange and no climatology
.
tstart
,
tend
,
clim
=
''
,
''
,
'False'
# Parsing error handling
.
except
TemplateParsingError
:
# Catch any other exception.
except
Exception
as
e
:
print
(
e
)
# Try parsing with "fixed" template.
try
:
self
.
fx_file_parser
.
parse
(
basename
)
# Set no timerange and no climatology.
tstart
,
tend
,
clim
=
''
,
''
,
'False'
# Catch any other exception.
except
Exception
as
e
:
print
(
e
)
# Deserialize filename and add time range facets.
facets
=
dict
(
zip
(
self
.
file_keys
[:
-
1
],
basename
.
split
(
'_'
)[:
-
1
]))
facets
[
'period_start'
]
=
tstart
facets
[
'period_end'
]
=
tend
facets
[
'climatology'
]
=
clim
return
facets
# Catch any other exception.
except
Exception
as
e
:
print
(
e
)
# Deserialize filename and add time range facets.
facets
=
dict
(
zip
(
self
.
file_keys
[:
-
1
],
basename
.
split
(
'_'
)[:
-
1
]))
facets
[
'period_start'
]
=
tstart
facets
[
'period_end'
]
=
tend
facets
[
'climatology'
]
=
clim
return
facets
esm_cat_generator.py
View file @
ea909ff9
...
...
@@ -65,14 +65,9 @@ class Process(object):
# Update facets from filename.
facets
.
update
(
drs
.
get_facets_from_filename
(
path
.
stem
))
# Deserialize member/ensemble facet.
if
self
.
project
==
'CMIP6'
:
member_key
=
AGGREGATION_MEMBER
[
self
.
project
]
assert
member_key
in
facets
pattern
=
re
.
compile
(
INIT_YEAR_REGEX
)
facets
[
'init_year'
]
=
re
.
match
(
pattern
,
member_key
).
groupdict
()[
'init_year'
]
else
:
facets
[
'init_year'
]
=
''
# Extract CMIP6 DCPP initial year.
regex
=
re
.
match
(
re
.
compile
(
INIT_YEAR_REGEX
),
AGGREGATION_MEMBER
[
self
.
project
])
facets
[
'init_year'
]
=
regex
.
groupdict
()[
'init_year'
]
if
regex
else
''
# If facet dict is empty, go to next line/path.
if
not
facets
:
...
...
utils.py
View file @
ea909ff9
...
...
@@ -49,33 +49,34 @@ def make_json(catpath, project, header):
content
[
'description'
]
=
CATALOG_DESCRIPTION_TEMPLATE
.
format
(
project
)
content
[
'catalog_file'
]
=
os
.
path
.
splitext
(
catname
)[
0
]
+
CSV_EXTENSION
content
[
'attributes'
]
=
list
()
for
facet
in
header
[
1
:]:
for
facet
in
header
[
2
:]:
attr
=
dict
()
attr
[
'column_name'
]
=
facet
attr
[
'vocabulary'
]
=
VOCAB_URLS
[
project
][
facet
]
attr
[
'vocabulary'
]
=
VOCAB_URLS
[
project
][
facet
]
if
facet
in
VOCAB_URLS
[
project
]
else
''
content
[
'attributes'
].
append
(
attr
)
content
[
'assets'
]
=
dict
()
content
[
'assets'
][
'column_name'
]
=
header
[
0
]
content
[
'assets'
][
'format'
]
=
'netcdf'
content
[
'aggregation_control'
]
=
dict
()
content
[
'aggregation_control'
][
'variable_column_name'
]
=
AGGREGATION_VARIABLE
[
project
]
content
[
'aggregation_control'
][
'groupby_attrs'
]
=
AGGREGATION_GROUP
[
project
]
content
[
'aggregations'
]
=
list
()
content
[
'aggregations'
].
append
({
'type'
:
'union'
,
'attribute_name'
:
'variable_id'
})
content
[
'aggregations'
].
append
({
'type'
:
'join_existing'
,
'attribute_name'
:
'period_start'
,
'options'
:
{
'dim'
:
'time'
,
'coords'
:
'minimal'
,
'compat'
:
'override'
}})
content
[
'aggregations'
].
append
({
'type'
:
'join_new'
,
'attribute_name'
:
AGGREGATION_MEMBER
[
project
],
'options'
:
{
'coords'
:
'minimal'
,
'compat'
:
'override'
}})
content
[
'
aggregation_control'
][
'
aggregations'
]
=
list
()
content
[
'
aggregation_control'
][
'
aggregations'
].
append
({
'type'
:
'union'
,
'attribute_name'
:
'variable_id'
})
content
[
'
aggregation_control'
][
'
aggregations'
].
append
({
'type'
:
'join_existing'
,
'attribute_name'
:
'period_start'
,
'options'
:
{
'dim'
:
'time'
,
'coords'
:
'minimal'
,
'compat'
:
'override'
}})
content
[
'
aggregation_control'
][
'
aggregations'
].
append
({
'type'
:
'join_new'
,
'attribute_name'
:
AGGREGATION_MEMBER
[
project
],
'options'
:
{
'coords'
:
'minimal'
,
'compat'
:
'override'
}})
if
project
==
'CMIP6'
:
content
[
'aggregations'
].
append
({
'type'
:
'join_new'
,
'attribute_name'
:
'init_year'
,
'options'
:
{
'coords'
:
'minimal'
,
'compat'
:
'override'
}})
content
[
'
aggregation_control'
][
'
aggregations'
].
append
({
'type'
:
'join_new'
,
'attribute_name'
:
'init_year'
,
'options'
:
{
'coords'
:
'minimal'
,
'compat'
:
'override'
}})
# Create directory if not exists.
if
not
os
.
path
.
exists
(
os
.
path
.
dirname
(
catpath
)):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment