Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
Clément Haëck
submeso-color
Commits
8c987a6a
Commit
8c987a6a
authored
Apr 06, 2022
by
Clément Haëck
Browse files
Update modis related scripts
parent
7a50288e
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
203 additions
and
129 deletions
+203
-129
Compute/coefs/compute_coef_modis.py
Compute/coefs/compute_coef_modis.py
+34
-125
Compute/coefs/compute_coef_ostia.py
Compute/coefs/compute_coef_ostia.py
+7
-3
Compute/histogram/compute_hists_modis_gs3.py
Compute/histogram/compute_hists_modis_gs3.py
+162
-0
lib/data/hi.py
lib/data/hi.py
+0
-1
No files found.
Compute/coefs/compute_coef_modis.py
View file @
8c987a6a
from
os
import
path
import
lib
import
lib.data.hi
as
lhi
import
lib.data.modis
as
lmo
import
lib.zones
as
lz
import
Compute.coefs.compute_coef_ostia
as
cc
import
matplotlib.pyplot
as
plt
import
numpy
as
np
import
tol_colors
as
tc
import
xarray
as
xr
from
filefinder.library
import
get_date
from
lib
import
progressbar
import
lib.data.images
import
lib.data.hi
import
lib.zones
REDUCE
=
False
Q_TARGET
=
0.95
HI_TARGET
=
9.5
def
comp_hists
(
hi_files
,
images_day
,
zone
,
variables
,
coef
=
None
):
hists
=
{
v
:
np
.
zeros
(
n_bins
)
for
v
in
variables
}
bins
=
{}
for
date
,
images
in
progressbar
(
images_day
.
items
()
):
if
date
not
in
hi_files
:
continue
hi_file
=
hi_files
[
date
]
def
main
(
args
):
lgr
=
lib
.
Logger
(
"Getting data"
)
ds
=
lhi
.
get_data
(
args
)
lgr
.
end
()
hi
=
xr
.
open_dataset
(
hi_file
)
if
REDUCE
:
print
(
"DATASET REDUCED FOR TESTING"
)
ds
=
ds
.
isel
(
time
=
slice
(
None
,
None
,
70
))
for
i
,
image
in
enumerate
(
images
):
if
not
zone
.
intersects
(
image
.
lon_bounds
,
image
.
lat_bounds
):
continue
hi_i
=
image
.
extract
(
hi
,
by_value
=
True
)
for
var
in
variables
:
if
var
==
'HI'
:
hi_i
[
'HI'
]
=
lib
.
data
.
hi
.
apply_coef
(
hi_i
,
coef
)
values
=
np
.
fabs
(
hi_i
[
var
]).
to_masked_array
().
compressed
()
h
,
b
=
np
.
histogram
(
values
,
bins
=
n_bins
,
range
=
bounds
[
var
],
density
=
False
)
hists
[
var
]
=
hists
[
var
]
+
h
bins
[
var
]
=
b
lgr
.
msg
(
"Getting static masks"
)
args
[
'grid'
]
=
lmo
.
grid
zone
=
lz
.
get_data
(
args
)[
'total'
]
land
=
lz
.
get_land
(
args
)[
'land_large'
]
static
=
zone
*~
land
ds
=
ds
.
where
(
static
)
lgr
.
end
()
return
hists
,
bins
coef
=
cc
.
get_components_coef
(
ds
)
print
(
'components:'
,
coef
)
coef_HI
=
cc
.
coef_hi
(
ds
,
coef
)
print
(
'HI:'
,
coef_HI
)
coef
.
update
(
coef_HI
)
def
components
(
hi_files
,
images_day
,
zone
):
avgs
=
{}
stds
=
{}
hists
,
bins
=
comp_hists
(
hi_files
,
images_day
,
zone
,
[
'S'
,
'V'
,
'B'
])
for
variable
,
hist
in
hists
.
items
():
b
=
bins
[
variable
]
if
variable
==
'B'
:
hist
[
0
]
=
0
s
=
cc
.
get_coef_json
(
coef
)
print
()
print
(
s
)
hist
/=
np
.
sum
(
hist
)
avg
=
get_avg
(
b
,
hist
)
std
=
get_std
(
b
,
hist
,
avg
)
bins
[
variable
]
=
b
avgs
[
variable
]
=
avg
stds
[
variable
]
=
std
for
variable
,
std
in
stds
.
items
():
print
(
'{}: {:.4f}'
.
format
(
variable
,
1.
/
std
))
plot_components
(
bins
,
hists
,
avgs
,
stds
)
return
hists
,
bins
def
get_avg
(
bins
,
hist
):
return
np
.
sum
(
hist
*
bins
[:
-
1
])
def
get_std
(
bins
,
hist
,
avg
):
return
np
.
sqrt
(
np
.
sum
(
hist
*
(
bins
[:
-
1
]
-
avg
)
**
2
))
def
plot_components
(
bins
,
hists
,
avgs
,
stds
):
fig
,
axes
=
plt
.
subplots
(
2
,
2
,
figsize
=
(
10
,
8
))
fig
.
subplots_adjust
(
left
=
0.05
,
bottom
=
0.05
,
right
=
0.98
,
top
=
0.98
)
cset
=
tc
.
tol_cset
(
'bright'
)
for
var
,
ax
in
zip
([
'S'
,
'V'
,
'B'
],
axes
.
flat
):
ax
.
plot
(
bins
[
var
][:
-
1
],
hists
[
var
])
ax
.
axvline
(
avgs
[
var
],
color
=
cset
.
red
)
ax
.
axvspan
(
avgs
[
var
]
-
2
*
stds
[
var
],
avgs
[
var
]
+
2
*
stds
[
var
],
color
=
cset
.
red
,
alpha
=
0.3
)
def
coef_hi
(
hi_files
,
images_day
,
zone
,
coef
):
coef
[
'HI'
]
=
1.
hist
,
bins
=
comp_hists
(
hi_files
,
images_day
,
zone
,
[
'HI'
],
coef
)
hist
=
hist
[
'HI'
]
bins
=
bins
[
'HI'
]
hist
/=
np
.
sum
(
hist
)
mode_i
=
np
.
argmax
(
hist
)
mode
=
bins
[
mode_i
]
fig
,
ax
=
plt
.
subplots
(
figsize
=
(
5
,
5
))
fig
.
subplots_adjust
(
left
=
0.05
,
bottom
=
0.05
,
right
=
0.98
,
top
=
0.98
)
cset
=
tc
.
tol_cset
(
'bright'
)
ax
.
plot
(
bins
[:
-
1
]
/
mode
,
hist
)
ax
.
axvline
(
1.
,
color
=
cset
.
red
)
fig
.
savefig
(
path
.
join
(
lib
.
root_plot
,
'coefs'
,
'hi_{:.1f}_{}_{}.png'
.
format
(
args
[
'scale'
],
args
[
'number'
],
args
[
'coef'
])))
print
(
'HI: {:.4f}'
.
format
(
1.
/
mode
))
return
hist
,
bins
return
coef
if
__name__
==
'__main__'
:
args
=
lib
.
get_args
([
'region'
,
'days'
,
'year'
,
'fixes'
,
'scale'
,
'number'
,
'coef'
])
images_all
=
lib
.
data
.
images
.
get_images_list
(
args
)
images_day
=
lib
.
data
.
images
.
regroup_by_date
(
images_all
)
finder
=
lib
.
data
.
hi
.
get_finder
(
args
)
hi_files
=
{
get_date
(
m
):
f
for
f
,
m
in
finder
.
files
}
zone
=
lib
.
zones
.
zones
[
'gyre'
]
bounds
=
{
'V'
:
[
0.
,
5.
],
'B'
:
[
0.
,
5.
],
'S'
:
[
0.
,
10.
],
'HI'
:
[
0.
,
20.
]}
n_bins
=
1000
# S, V, B
# hists, bins = components(hi_files, images_day, zone)
'scale'
,
'coef'
])
args
[
'number'
]
=
1
# HI
coef
=
lib
.
data
.
hi
.
get_coef
(
args
)
hists
,
bins
=
coef_hi
(
hi_files
,
images_day
,
zone
,
coef
)
coef
=
main
(
args
)
Compute/coefs/compute_coef_ostia.py
View file @
8c987a6a
...
...
@@ -62,6 +62,13 @@ def main(args):
coef
.
update
(
coef_HI
)
s
=
get_coef_json
(
coef
)
print
()
print
(
s
)
return
s
def
get_coef_json
(
coef
):
params
=
[
'number'
,
'scale'
,
'coef'
]
s
=
'- parameters: {{{}}}'
.
format
(
', '
.
join
([
'{}: {}'
.
format
(
p
,
args
[
p
])
for
p
in
params
]))
...
...
@@ -69,9 +76,6 @@ def main(args):
variables
=
[
'S'
,
'V'
,
'B'
,
'HI'
]
s
+=
' value: {{{}}}'
.
format
(
', '
.
join
([
'{}: {:.4f}'
.
format
(
v
,
coef
[
v
])
for
v
in
variables
]))
print
()
print
(
s
)
return
s
...
...
Compute/histogram/compute_hists_modis_gs3.py
0 → 100644
View file @
8c987a6a
import
itertools
import
warnings
import
dask_histogram
as
dh
import
dask_histogram.boost
as
dhb
import
numpy
as
np
import
xarray
as
xr
import
lib
import
lib.box
import
lib.data.modis
as
lmo
import
lib.data.hi
as
lhi
import
lib.data.hists
as
lh
import
lib.zones
as
lz
import
lib.data.images
as
limg
# import lib.data.SN_separation as lthr
# Bins extent
VAR_RANGE
=
dict
(
sst
=
[
-
5.
,
40.
],
# chl_ocx=[5e-3, 20.]
chl_ocx
=
[
0
,
3.
]
)
# Width of SST bins
SST_NBINS
=
int
((
VAR_RANGE
[
'sst'
][
1
]
-
VAR_RANGE
[
'sst'
][
0
])
/
0.1
)
# Number of Chl bins
CHL_NBINS
=
600
VARS
=
[
'chl_ocx'
,
'sst'
]
MASKS
=
[
'frt'
,
'bkg'
]
MASKS_VAR
=
[
'mask_'
+
m
for
m
in
MASKS
]
INFILE_PARAMS
=
[
'threshold'
,
'scale'
,
'number'
,
'coef'
,
'zone'
]
def
main
(
args
):
lgr
=
lib
.
Logger
(
"Fetching images"
)
images_all
=
limg
.
get_images_list
(
args
)
images_day
=
limg
.
regroup_by_date
(
images_all
)
lgr
.
msg
(
"Getting datafiles"
)
hi_files
=
limg
.
get_files
(
lhi
.
get_finder
(
args
))
mo_files
=
limg
.
get_files
(
lmo
.
get_finder
(
args
))
lgr
.
msg
(
"Getting static masks"
)
args
[
'grid'
]
=
lmo
.
grid
zone
=
lz
.
get_data
(
args
)[
'total'
].
persist
()
land
=
lz
.
get_land
(
args
)[
'land_large'
].
persist
()
static
=
zone
*~
land
static
=
static
.
rename
(
'static'
).
reset_coords
(
drop
=
True
)
# thr = lthr.smooth(lthr.get_data(args))
# thr = lib.fix_time_daily(thr)
coef
=
lhi
.
get_coef
(
args
)
lgr
.
msg
(
"Creating histograms"
)
axis
=
dict
(
chl_ocx
=
dh
.
axis
.
Regular
(
CHL_NBINS
,
*
VAR_RANGE
[
'chl_ocx'
]),
# transform=dh.axis.transform.log),
sst
=
dh
.
axis
.
Regular
(
SST_NBINS
,
*
VAR_RANGE
[
'sst'
])
)
hists_bh
=
{
v
:
{
m
:
dhb
.
Histogram
(
axis
[
v
])
for
m
in
MASKS
}
for
v
in
VARS
}
hists_xr
=
{
v
:
{
m
:
[]
for
m
in
MASKS
}
for
v
in
VARS
}
lgr
.
msg
(
"Setting up fills"
)
for
date
,
images
in
images_day
.
items
():
lgr
.
progress
()
if
date
not
in
hi_files
or
date
not
in
mo_files
:
warnings
.
warn
(
"{} image not in data"
.
format
(
date
.
strftime
(
'%F'
)))
continue
for
v
,
m
in
itertools
.
product
(
VARS
,
MASKS
):
hists_bh
[
v
][
m
].
reset
()
mo
=
xr
.
open_dataset
(
mo_files
[
date
])
hi
=
xr
.
open_dataset
(
hi_files
[
date
])
hi
=
hi
.
isel
(
time
=
0
)
ds
=
xr
.
merge
([
mo
,
hi
,
static
],
join
=
'exact'
)
# thr_date = float(thr.sel(time=date).values)
for
image
in
images
:
if
image
.
lat_min
>
32
:
continue
image
.
lat_max
=
min
(
image
.
lat_max
,
32
)
ds_i
=
image
.
extract
(
ds
,
by_value
=
True
)
ds_i
[
'HI'
]
=
lhi
.
apply_coef
(
ds_i
,
coef
)
ds_i
[
'HI'
]
=
ds_i
.
HI
.
where
(
ds_i
.
static
)
mask
=
dict
(
frt
=
ds_i
.
HI
>
args
[
'threshold'
],
bkg
=
ds_i
.
HI
<
args
[
'threshold'
]
)
for
v
,
m
in
itertools
.
product
(
VARS
,
MASKS
):
hists_bh
[
v
][
m
].
fill
(
ds_i
[
v
].
where
(
mask
[
m
]).
data
.
ravel
())
for
v
,
m
in
itertools
.
product
(
VARS
,
MASKS
):
dk_arr
,
_
=
hists_bh
[
v
][
m
].
to_dask_array
()
# NEED to project to numpy array, otherwise computation is lazy
# and there are interferences between different dates
da
=
xr
.
DataArray
(
np
.
array
(
dk_arr
),
dims
=
[
'bins'
],
name
=
'hist'
)
da
=
da
.
expand_dims
(
time
=
[
date
],
mask
=
[
m
])
hists_xr
[
v
][
m
].
append
(
da
)
hists_v
=
[]
for
v
in
VARS
:
hists_m
=
[]
for
m
in
MASKS
:
hists_m
.
append
(
xr
.
concat
(
hists_xr
[
v
][
m
],
dim
=
'time'
))
hist
=
xr
.
concat
(
hists_m
,
dim
=
'mask'
)
hist
=
hist
.
assign_coords
(
bins
=
axis
[
v
].
edges
[:
-
1
])
hist
.
bins
.
attrs
[
'right_edge'
]
=
axis
[
v
].
edges
[
-
1
]
hist
.
bins
.
attrs
[
'VAR'
]
=
v
hist
=
hist
.
rename
(
bins
=
lh
.
var_name
(
'bins'
,
v
))
hist
=
hist
.
rename
(
lh
.
var_name
(
'hist'
,
v
))
hist
.
attrs
[
'VAR'
]
=
v
hists_v
.
append
(
hist
)
hists
=
xr
.
merge
(
hists_v
)
hists
.
attrs
.
pop
(
'VAR'
)
hists
.
attrs
[
'VARS'
]
=
VARS
hists
=
hists
.
expand_dims
({
d
:
[
args
[
d
]]
for
d
in
INFILE_PARAMS
})
# We have at most every pixel of an image in one bin,
# so approx 1000*1000 = 1e6 values. Uint32 stores up to 2**32~4e9.
encoding
=
{
lh
.
var_name
(
'hist'
,
v
):
{
'dtype'
:
'uint32'
,
'_FillValue'
:
2
**
30
-
1
}
for
v
in
VARS
}
lgr
.
msg
(
"Writing to disk"
)
ofile
=
lh
.
get_filename
(
args
)
lib
.
check_output_dir
(
ofile
,
file
=
True
)
lib
.
setup_metadata
(
hists
,
args
)
hists
.
to_netcdf
(
ofile
,
encoding
=
encoding
)
lgr
.
end
()
return
hists
if
__name__
==
'__main__'
:
def
add_args
(
parser
):
# Zone·s to compute
parser
.
add_argument
(
'-zones'
,
type
=
str
,
default
=
'INS'
)
args
=
lib
.
get_args
([
'region'
,
'year'
,
'days'
,
'scale'
,
'coef'
,
'fixes'
,
'threshold'
],
add_args
)
args
[
'kind'
]
=
'1thr'
args
[
'fixes'
][
'Y'
]
=
args
[
'year'
]
args
[
'Y'
]
=
args
[
'year'
]
args
[
'zone'
]
=
'GS3_S'
args
[
'number'
]
=
1
args
[
'mask'
]
=
False
hists
=
main
(
args
)
lib/data/hi.py
View file @
8c987a6a
"""Import DataBase for H-Index."""
from
os
import
path
import
yaml
from
filefinder
import
Finder
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment