Commit 503488ea authored by JOSSOUD Olivier's avatar JOSSOUD Olivier
Browse files

Iceblock Processor. Get clean iceblock_df

parent 01d71d16
Pipeline #119360 passed with stages
in 44 seconds
......@@ -444,12 +444,12 @@ def get_absolute_melted_height(encoder_df: pd.DataFrame,
encoder_df: pd.DataFrame
Encoder's dataset.
``ENCODER_periodic``'s dataset.
stacked_iceblocks_df: pd.DataFrame
Datetime-indexed DataFrame containing the total height of ice blocks stacked. In other words, the height of all
the ice blocks stacked at the same time should be summed.
moveup_event_df: pd.DataFrame
Output of :func:`~processor.encoder.get_moveup_events` function.
Output of :func:`cfatools.processor.encoder.get_moveup_events` function.
......@@ -2,7 +2,7 @@
The iceblock processor module provides functions to manipulate ice block controller's raw data (from
``yyyymmdd_dataset_name_ICBKCTRL_instant.log``-like log files).
import numpy as np
import pandas as pd
......@@ -33,3 +33,41 @@ def get_melting_timeseries(iceblock_df: pd.DataFrame) -> pd.DataFrame:
melting_df = melting_df.set_index("datetime")
return melting_df
def get_clean_iceblock_df(iceblock_df: pd.DataFrame) -> pd.DataFrame:
"""Get "cleaned" iceblocks' information, keeping only relevant data.
iceblock_df: pd.DataFrame
``ICBKCTRL_instant`` dataset.
Same as input DataFrame, without the lines with the intermediate data modification.
# Find first appearance to determine which blocks were stacked at the same time
first_df = iceblock_df.groupby("id").head(1).copy()
first_df["time_diff"] = first_df.index.to_series().diff().dt.total_seconds()
first_df["new_stack"] = 0
first_df.loc[first_df["time_diff"] > 30, "new_stack"] = 1
first_df["stack_id"] = first_df["new_stack"].cumsum()
first_df = first_df.reset_index().rename(columns={ 'datetime_stacked'})
first_df = first_df[["id", "datetime_stacked", "stack_id"]]
iceblock_df = iceblock_df.reset_index(drop=True)
# Find datetime when the block started to melt
start_df = iceblock_df[np.logical_not(iceblock_df["datetime_start"].isnull())].groupby("id").head(1)
start_df = start_df[["id", "datetime_start"]]
# Find the last data modification for each iceblock as it is supposed to be the valid one.
final_df = iceblock_df.groupby("id").tail(1)
final_df = final_df.drop(columns="datetime_start")
clean_iceblock_df = pd.merge(start_df, final_df, on="id", how="outer")
clean_iceblock_df = pd.merge(clean_iceblock_df, first_df, on="id", how="outer")
return clean_iceblock_df
......@@ -14,8 +14,16 @@ class TestIceblock(TestCase):
def test_get_melting_timeseries(self):
reader = InstrumentReader(self.base_path)
iceblock_df = reader.get_timeseries("20191104_test_temperature_10", "ICBKCTRL_instant")
iceblock_df = reader.get_timeseries("20210507_ASUMA2016_8_14", "ICBKCTRL_instant")
except:"Exception raised!")
def test_get_clean_iceblock_df(self):
reader = InstrumentReader(self.base_path)
iceblock_df = reader.get_timeseries("20210506_test_encoder_mock_core4", "ICBKCTRL_instant")
except:"Exception raised!")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment