Skip to content
Snippets Groups Projects
Commit 70c78606 authored by JOSSOUD Olivier's avatar JOSSOUD Olivier
Browse files

Explo. Iceblocks. Which one is melting.

parent d0ae9879
No related branches found
No related tags found
No related merge requests found
......@@ -70,7 +70,7 @@ class Dataset:
def explore_dataset(self) -> None:
for filename in os.listdir(self.full_directory_name):
try:
inst_and_type = re.search(self.directory_name + '_(.+?).log', filename).group(1)
inst_and_type = re.search("^" + self.directory_name + '_(.+?).log$', filename).group(1)
except AttributeError:
# The found file does not match normal instrument's log file pattern
print("File [" + filename + "] does not appear to be a valid CFA log file")
......@@ -81,7 +81,10 @@ class Dataset:
if len(inst_and_type.split("_")) == 2:
log_type = inst_and_type.split("_")[1]
if log_type == "instant":
instrument_log = InstrumentInstantLog(self.full_directory_name, filename, instrument_name)
if instrument_name == "ICBKCTRL":
instrument_log = IceblockInstantLog(self.full_directory_name, filename, instrument_name)
else:
instrument_log = InstrumentInstantLog(self.full_directory_name, filename, instrument_name)
elif log_type == "periodic":
instrument_log = InstrumentPeriodicLog(self.full_directory_name, filename, instrument_name)
self.last_data_datetime = max(self.last_data_datetime,
......@@ -135,6 +138,51 @@ class InstrumentInstantLog(InstrumentLog):
return timeseries_df
class IceblockInstantLog(InstrumentLog):
def __init__(self, full_directory_name: str, filename: str, instrument_name: str):
InstrumentLog.__init__(self, full_directory_name, filename, instrument_name)
def __get_df__(self) -> pd.DataFrame:
df = pd.read_csv(self.full_file_name, sep=",", parse_dates=["datetime"])
df["datetime"] = df["datetime"].dt.tz_localize('UTC')
return df
def get_variables(self):
return ["melting"]
def get_timeseries(self, variable: str) -> pd.DataFrame:
if variable == "melting":
timeseries_df = self.__get_melting_timeseries__()
else:
raise ValueError("Variable name [" + variable + "] not yet managed.")
return timeseries_df
def __get_melting_timeseries__(self) -> pd.DataFrame:
# Get the mapping between iceblock id and iceblock name (assuming that the last name's modification is the
# good one.
mapping_df = self.df[["datetime", "id", "name"]].copy()
mapping_df = mapping_df.groupby("id")["id", "name"].tail(1)
mapping_df = mapping_df.append({"id": 0, "name": "None"}, ignore_index=True)
mapping_df = mapping_df.set_index("id")
mapping_dict = mapping_df["name"].to_dict()
# Get the datetime of the beginning of each iceblock's melting
melting_df = self.df[["datetime", "id", "status"]].copy()
start_df = melting_df[melting_df["status"] == "Melting"].groupby("id")["datetime", "id"].head(1)
# Get the end of the last iceblock's melting, and set that after that the current melting block is 0/None.
end_df = melting_df[melting_df["status"] == "Done"].groupby("id").head(1)
melting_df = start_df.append({"datetime": end_df.iloc[-1]["datetime"], "id": 0},
ignore_index=True)
# Get the value (iceblocks name) and value_int (coded value, iceblock id in this case).
melting_df.rename(columns={"id": 'value_int'}, inplace=True)
melting_df["value"] = melting_df["value_int"].map(mapping_dict)
return melting_df
class InstrumentPeriodicLog(InstrumentLog):
def __init__(self, full_directory_name: str, filename: str, instrument_name: str):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment