Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
IPSL
E
ESPRI
ESPRI-Mod
catalog
Commits
0db2a772
Commit
0db2a772
authored
Jan 27, 2022
by
Guillaume
Browse files
resolve conflict
parents
9d2083fe
225e0f75
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
90 additions
and
174 deletions
+90
-174
README.md
README.md
+2
-0
__pycache__/DRSParser.cpython-38.pyc
__pycache__/DRSParser.cpython-38.pyc
+0
-0
__pycache__/TimeRange.cpython-38.pyc
__pycache__/TimeRange.cpython-38.pyc
+0
-0
__pycache__/constants.cpython-38.pyc
__pycache__/constants.cpython-38.pyc
+0
-0
__pycache__/utils.cpython-38.pyc
__pycache__/utils.cpython-38.pyc
+0
-0
esm_cat_generator.py
esm_cat_generator.py
+81
-174
utils.py
utils.py
+7
-0
No files found.
README.md
View file @
0db2a772
...
@@ -73,3 +73,5 @@ Pour faire le 2/ ...
...
@@ -73,3 +73,5 @@ Pour faire le 2/ ...
-----------------------------------------------------------------
-----------------------------------------------------------------
=======
Test
__pycache__/DRSParser.cpython-38.pyc
0 → 100644
View file @
0db2a772
File added
__pycache__/TimeRange.cpython-38.pyc
0 → 100644
View file @
0db2a772
File added
__pycache__/constants.cpython-38.pyc
0 → 100644
View file @
0db2a772
File added
__pycache__/utils.cpython-38.pyc
0 → 100644
View file @
0db2a772
File added
esm_cat_generator.py
View file @
0db2a772
#!bin/python
#!bin/python
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
import
signal
import
subprocess
import
subprocess
from
argparse
import
ArgumentParser
from
argparse
import
ArgumentParser
from
multiprocessing
.dummy
import
Pool
from
multiprocessing
import
Pool
from
pathlib
import
Path
from
pathlib
import
Path
import
lockfile
import
lockfile
from
tqdm
import
tqdm
from
tqdm
import
tqdm
from
DRSParser
import
DRSParser
from
constants
import
*
from
constants
import
*
from
utils
import
*
from
utils
import
*
from
DRSParser
import
DRSParser
class
Process
(
object
):
def
decode_path
(
p
):
"""
"""
Child process.
Child process.
"""
"""
# Split entry into full file path and latest boolean.
path
,
latest
=
map
(
str
.
strip
,
p
.
split
())
def
__init__
(
self
,
ctx
):
# Convert path into pathlib.Path object.
"""
path
=
Path
(
path
)
Processing context passed to each process.
"""
self
.
drs
=
ctx
.
drs
self
.
outcat
=
ctx
.
outcat
self
.
depth
=
ctx
.
depth
self
.
project
=
ctx
.
project
def
__call__
(
self
,
entry
):
"""
Any error switches to the next child process.
It does not stop the main process at all.
"""
# Escape in case of error.
try
:
# Split entry into full file path and latest boolean.
path
,
latest
=
map
(
str
.
strip
,
entry
.
split
())
# Convert path into pathlib.Path object.
path
=
Path
(
path
)
# Get facets from path.
facets
=
self
.
drs
.
get_facets_from_path
(
path
)
# Update facets from filename.
facets
.
update
(
self
.
drs
.
get_facets_from_filename
(
path
.
stem
))
# Build CSV entry.
entry_facets
=
[
facets
[
i
]
for
i
in
self
.
drs
.
dir_keys
]
entry
=
[
IPSL_DATA_ROOT
+
path
.
as_posix
(),
self
.
project
]
entry
.
extend
(
entry_facets
)
entry
.
extend
([
facets
[
'period_start'
],
facets
[
'period_end'
],
latest
])
# Build catalog filename.
if
self
.
depth
==
'project'
:
catpath
=
os
.
path
.
join
(
self
.
outcat
,
self
.
project
,
self
.
project
)
else
:
catdepth
=
entry_facets
[:
self
.
drs
.
dir_keys
.
index
(
self
.
depth
)
+
1
]
catpath
=
os
.
path
.
join
(
self
.
outcat
,
self
.
project
,
'_'
.
join
(
catdepth
))
# Lock catalog file to avoid multiprocessing concurrent access.
lock
=
lockfile
.
LockFile
(
catpath
+
CSV_EXTENSION
)
with
lock
:
# Create catalog files (CSV + JSON) if not exists.
if
not
os
.
path
.
isfile
(
catpath
+
CSV_EXTENSION
):
# Build CSV header.
header
=
[
'path'
,
'project'
]
header
.
extend
(
self
.
drs
.
dir_keys
)
header
.
extend
([
'period_start'
,
'period_end'
,
'latest'
])
# Ensure header and entry have same length.
assert
len
(
header
)
==
len
(
entry
)
# Write CSV header.
make_csv
(
catpath
+
CSV_EXTENSION
,
header
)
# Write JSON catalog in the same time.
make_json
(
catpath
+
JSON_EXTENSION
,
self
.
project
,
header
)
# Write catalog entry.
with
open
(
catpath
+
CSV_EXTENSION
,
'a+'
)
as
f
:
f
.
write
(
','
.
join
(
entry
)
+
'
\n
'
)
except
Exception
:
raise
# Get facets from path.
facets
=
drs
.
get_facets_from_path
(
path
)
# Update facets from filename.
facets
.
update
(
drs
.
get_facets_from_filename
(
path
.
stem
))
class
Runner
(
object
):
# Build CSV entry.
entry_facets
=
[
facets
[
i
]
for
i
in
drs
.
dir_keys
]
entry
=
[
IPSL_DATA_ROOT
+
path
.
as_posix
(),
args
.
project
]
entry
.
extend
(
entry_facets
)
entry
.
extend
([
facets
[
'period_start'
],
facets
[
'period_end'
],
latest
])
def
__init__
(
self
,
threads
):
# Build catalog filename.
if
args
.
depth
==
'project'
:
catpath
=
os
.
path
.
join
(
args
.
outcat
,
args
.
project
,
args
.
project
)
else
:
catdepth
=
entry_facets
[:
drs
.
dir_keys
.
index
(
args
.
depth
)
+
1
]
catpath
=
os
.
path
.
join
(
args
.
outcat
,
args
.
project
,
'_'
.
join
(
catdepth
))
# Initialize the pool.
# Lock catalog file to avoid multiprocessing concurrent access.
self
.
pool
=
None
lock
=
lockfile
.
LockFile
(
catpath
+
CSV_EXTENSION
)
with
lock
:
if
threads
!=
1
:
# Create catalog files (CSV + JSON) if not exists.
self
.
pool
=
Pool
(
processes
=
threads
)
if
not
os
.
path
.
isfile
(
catpath
+
CSV_EXTENSION
):
# Build CSV header.
header
=
[
'path'
,
'project'
]
header
.
extend
(
drs
.
dir_keys
)
header
.
extend
([
'period_start'
,
'period_end'
,
'latest'
])
def
_handle_sigterm
(
self
):
# Ensure header and entry have same length.
assert
len
(
header
)
==
len
(
entry
)
# Properly kill the pool in case of SIGTERM.
# Write CSV header.
if
self
.
pool
:
make_csv
(
catpath
+
CSV_EXTENSION
,
header
)
self
.
pool
.
terminate
()
exit
(
1
)
# Write JSON catalog in the same time.
make_json
(
catpath
+
JSON_EXTENSION
,
args
.
project
,
header
)
def
run
(
self
,
sources
,
ctx
):
# Write catalog entry.
with
open
(
catpath
+
CSV_EXTENSION
,
'a+'
)
as
f
:
# Instantiate signal handler.
f
.
write
(
','
.
join
(
entry
)
+
'
\n
'
)
sig_handler
=
signal
.
signal
(
signal
.
SIGTERM
,
self
.
_handle_sigterm
)
# Read sources.
for
source
in
sources
:
# Get total entires in a fast way for beautiful progress bar.
total
=
int
(
subprocess
.
check_output
([
"wc"
,
"-l"
,
source
]).
split
()[
0
])
# Instantiate pool of processes.
if
self
.
pool
:
# Instantiate pool iterator with progress bar.
processes
=
tqdm
(
self
.
pool
.
imap
(
Process
(
ctx
),
self
.
get_entries
(
source
)),
desc
=
'Catalog generation'
,
total
=
total
)
# Sequential processing use basic map function.
else
:
# Instantiate processes iterator with progress bar.
processes
=
tqdm
(
map
(
Process
(
ctx
),
self
.
get_entries
(
source
)),
desc
=
'Catalog generation'
,
total
=
total
)
# Run processes in a dummy variable.
_
=
[
x
for
x
in
processes
]
# Terminate pool in case of SIGTERM signal.
signal
.
signal
(
signal
.
SIGTERM
,
sig_handler
)
# Close the pool.
if
self
.
pool
:
self
.
pool
.
close
()
self
.
pool
.
join
()
@
staticmethod
def
get_entries
(
source
):
# Iterate over each line of the source file.
with
open
(
source
,
'r+'
)
as
f
:
for
entry
in
f
:
yield
entry
def
get_args
():
def
get_args
():
...
@@ -193,14 +109,14 @@ def get_args():
...
@@ -193,14 +109,14 @@ def get_args():
)
)
parser
.
add_argument
(
parser
.
add_argument
(
'-
t
'
,
'--
thread
s'
,
'-
c
'
,
'--
processe
s'
,
metavar
=
'1'
,
metavar
=
'1'
,
type
=
int
,
type
=
int
,
default
=
1
,
default
=
1
,
help
=
"""
help
=
"""
Number of
thread
s.
Number of
processe
s.
Set to "1" seems pure sequential processing (default).
Set to "1" seems pure sequential processing (default).
Set to "-1" seems all available
thread
s as returned by "multiprocessing.cpu_count()".
Set to "-1" seems all available
processe
s as returned by "multiprocessing.cpu_count()".
"""
"""
)
)
...
@@ -215,57 +131,48 @@ def get_args():
...
@@ -215,57 +131,48 @@ def get_args():
return
parser
.
prog
,
parser
.
parse_args
()
return
parser
.
prog
,
parser
.
parse_args
()
class
Context
(
object
):
"""
"""
Run main program
Base class for processing context manager.
"""
def
__init__
(
self
,
args
):
# Set project.
self
.
project
=
args
.
project
# Set DRS parser.
self
.
drs
=
DRSParser
(
args
.
project
)
# Set output catalog directory.
"""
self
.
outcat
=
args
.
outcat
# Get command-line arguments.
prog
,
args
=
get_args
()
# Set max pool processes
.
# Add program name as argument
.
self
.
threads
=
args
.
threads
setattr
(
args
,
'prog'
,
prog
)
# Set catalog depth
.
# Set DRS parser
.
self
.
depth
=
args
.
depth
drs
=
DRSParser
(
args
.
project
)
# Set
sources
# Read
sources
.
self
.
source
s
=
INPUT_SOURCES
[
args
.
project
]
for
source
in
INPUT_SOURCES
[
args
.
project
]
:
def
__enter__
(
self
):
# Get total entries in a fast way for beautiful progress bar.
return
self
total
=
int
(
subprocess
.
check_output
([
"wc"
,
"-l"
,
source
]).
split
()[
0
])
def
__exit__
(
self
,
exc_type
,
exc_val
,
exc_tb
):
if
args
.
processes
!=
1
:
pass
# Instantiate pool of workers.
pool
=
Pool
(
processes
=
args
.
processes
)
def
main
():
# Instantiate pool iterator with progress bar.
"""
processes
=
tqdm
(
pool
.
imap
(
decode_path
,
Run main program
get_entries
(
source
)),
desc
=
'Catalog generation'
,
"""
total
=
total
)
# Get command-line arguments.
prog
,
args
=
get_args
()
# Add program name as argument.
setattr
(
args
,
'prog'
,
prog
)
# Instantiate processing context
# Sequential processing use basic map function.
with
Context
(
args
)
as
ctx
:
else
:
# Instantiate the runner.
r
=
Runner
(
ctx
.
threads
)
# Run the pool.
# Instantiate processes iterator with progress bar.
r
.
run
(
ctx
.
sources
,
ctx
)
processes
=
tqdm
(
map
(
decode_path
,
get_entries
(
source
)),
desc
=
'Catalog generation'
,
total
=
total
)
# Run processes in a dummy variable.
_
=
[
x
for
x
in
processes
]
if
__name__
==
"__main__"
:
if
args
.
processes
!=
1
:
main
()
pool
.
close
()
pool
.
join
()
utils.py
View file @
0db2a772
...
@@ -13,6 +13,13 @@ from jinja2 import Template
...
@@ -13,6 +13,13 @@ from jinja2 import Template
from
constants
import
CATALOG_DESCRIPTION_TEMPLATE
from
constants
import
CATALOG_DESCRIPTION_TEMPLATE
def
get_entries
(
source
):
# Iterate over each line of the source file.
with
open
(
source
,
'r'
)
as
f
:
for
entry
in
f
:
yield
entry
def
make_csv
(
catpath
,
header
):
def
make_csv
(
catpath
,
header
):
"""
"""
Write header into CSV catalog file.
Write header into CSV catalog file.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment