diff --git a/src/dataprovider/exploprovider.py b/src/dataprovider/exploprovider.py index 35387b42c60867b796667763e5c6881f0c02d45d..9f0f9b274e39ba2c545726de4f5fc0f218c65b21 100644 --- a/src/dataprovider/exploprovider.py +++ b/src/dataprovider/exploprovider.py @@ -70,7 +70,7 @@ class Dataset: def explore_dataset(self) -> None: for filename in os.listdir(self.full_directory_name): try: - inst_and_type = re.search(self.directory_name + '_(.+?).log', filename).group(1) + inst_and_type = re.search("^" + self.directory_name + '_(.+?).log$', filename).group(1) except AttributeError: # The found file does not match normal instrument's log file pattern print("File [" + filename + "] does not appear to be a valid CFA log file") @@ -81,7 +81,10 @@ class Dataset: if len(inst_and_type.split("_")) == 2: log_type = inst_and_type.split("_")[1] if log_type == "instant": - instrument_log = InstrumentInstantLog(self.full_directory_name, filename, instrument_name) + if instrument_name == "ICBKCTRL": + instrument_log = IceblockInstantLog(self.full_directory_name, filename, instrument_name) + else: + instrument_log = InstrumentInstantLog(self.full_directory_name, filename, instrument_name) elif log_type == "periodic": instrument_log = InstrumentPeriodicLog(self.full_directory_name, filename, instrument_name) self.last_data_datetime = max(self.last_data_datetime, @@ -135,6 +138,51 @@ class InstrumentInstantLog(InstrumentLog): return timeseries_df +class IceblockInstantLog(InstrumentLog): + + def __init__(self, full_directory_name: str, filename: str, instrument_name: str): + InstrumentLog.__init__(self, full_directory_name, filename, instrument_name) + + def __get_df__(self) -> pd.DataFrame: + df = pd.read_csv(self.full_file_name, sep=",", parse_dates=["datetime"]) + df["datetime"] = df["datetime"].dt.tz_localize('UTC') + return df + + def get_variables(self): + return ["melting"] + + def get_timeseries(self, variable: str) -> pd.DataFrame: + if variable == "melting": + timeseries_df = self.__get_melting_timeseries__() + else: + raise ValueError("Variable name [" + variable + "] not yet managed.") + + return timeseries_df + + def __get_melting_timeseries__(self) -> pd.DataFrame: + # Get the mapping between iceblock id and iceblock name (assuming that the last name's modification is the + # good one. + mapping_df = self.df[["datetime", "id", "name"]].copy() + mapping_df = mapping_df.groupby("id")["id", "name"].tail(1) + mapping_df = mapping_df.append({"id": 0, "name": "None"}, ignore_index=True) + mapping_df = mapping_df.set_index("id") + mapping_dict = mapping_df["name"].to_dict() + + # Get the datetime of the beginning of each iceblock's melting + melting_df = self.df[["datetime", "id", "status"]].copy() + start_df = melting_df[melting_df["status"] == "Melting"].groupby("id")["datetime", "id"].head(1) + + # Get the end of the last iceblock's melting, and set that after that the current melting block is 0/None. + end_df = melting_df[melting_df["status"] == "Done"].groupby("id").head(1) + melting_df = start_df.append({"datetime": end_df.iloc[-1]["datetime"], "id": 0}, + ignore_index=True) + + # Get the value (iceblocks name) and value_int (coded value, iceblock id in this case). + melting_df.rename(columns={"id": 'value_int'}, inplace=True) + melting_df["value"] = melting_df["value_int"].map(mapping_dict) + + return melting_df + class InstrumentPeriodicLog(InstrumentLog): def __init__(self, full_directory_name: str, filename: str, instrument_name: str):