Skip to content
Snippets Groups Projects
Commit 08a77984 authored by JOSSOUD Olivier's avatar JOSSOUD Olivier
Browse files

Explo. Picarro OK

parent d9547158
No related branches found
No related tags found
No related merge requests found
[DATA_SOURCE]
# Absolute path of the base directory where the data files produced by the CFA are be stored.
absolute_root_dir = /homel/ojossoud/_temp/data_cfa
# Absolute path of the Picarro data
picarro_root_dir = /media/ojossoud/DATA/PicarroData
picarro_id = HIDS2334
\ No newline at end of file
......@@ -4,14 +4,16 @@ import os
import re
import xmltodict
import utils
from dataprovider.picarroprovider import PicarroProvider
class ExploProvider:
def __init__(self):
def __init__(self, picarro_prvd: PicarroProvider):
self.datasets_root_directory = ""
self.datasets = {}
pass
self.picarro_prvd = picarro_prvd
def explore_root_directory(self, root_directory: str) -> list:
"""Get the names of the datasets directories.
......@@ -44,7 +46,7 @@ class ExploProvider:
self.datasets_root_directory = root_directory
self.datasets_dirs = dataset_directories
for directory in dataset_directories:
dataset = Dataset(root_directory, directory)
dataset = Dataset(root_directory, directory, self.picarro_prvd)
self.datasets[directory] = dataset
return dataset_directories
......@@ -52,13 +54,16 @@ class ExploProvider:
class Dataset:
def __init__(self, root_directory: str, directory_name: str):
def __init__(self, root_directory: str, directory_name: str, picarro_prvd: PicarroProvider):
self.root_directory = root_directory
self.directory_name = directory_name
self.full_directory_name = root_directory + "/" + directory_name
self.picarro_prvd = picarro_prvd
# Get dataset name
self.dataset_text = directory_name[-9:]
self.first_data_datetime = datetime.datetime.now(tz=datetime.timezone.utc)
self.last_data_datetime = datetime.datetime(1970, 1, 1, tzinfo=datetime.timezone.utc)
self.instlogs = {}
......@@ -67,7 +72,9 @@ class Dataset:
self.explore_dataset()
def explore_dataset(self) -> None:
for filename in os.listdir(self.full_directory_name):
filenames = os.listdir(self.full_directory_name)
for filename in filenames:
try:
inst_and_type = re.search("^" + self.directory_name + '_(.+?).log$', filename).group(1)
except AttributeError:
......@@ -86,14 +93,36 @@ class Dataset:
instrument_log = InstrumentInstantLog(self.full_directory_name, filename, instrument_name)
elif log_type == "periodic":
instrument_log = InstrumentPeriodicLog(self.full_directory_name, filename, instrument_name)
self.last_data_datetime = max(self.last_data_datetime,
instrument_log.df["datetime"].max())
self.first_data_datetime = min(self.first_data_datetime, instrument_log.df["datetime"].min())
self.last_data_datetime = max(self.last_data_datetime, instrument_log.df["datetime"].max())
else:
raise ValueError("Unknown log type: [" + log_type + "]")
self.instlogs[inst_and_type] = instrument_log
elif instrument_name == "manual-event":
self.manual_event_log = ManualEventLog(self.full_directory_name, filename, instrument_name)
# Picarro data are not logged the same way as the others, it is logged directly in the Picarro instrument.
# In order to have comparable data files, create "artificial" PICARRO_periodic log file from the Picarro log
# files.
picarro_filename = self.directory_name + "_PICARRO_periodic.log"
if picarro_filename not in filenames:
try:
picarro_df = self.picarro_prvd.get_df(self.first_data_datetime,
self.last_data_datetime,
["H2O", "Delta_D_H", "Delta_18_16"])
except:
print("Failed to get Picarro data")
return
picarro_df.to_csv(path_or_buf=self.full_directory_name + "/" + picarro_filename,
sep="\t",
index=False,
mode='w', # Always override file content
date_format=utils.datetime_format
)
picarro_log = InstrumentPeriodicLog(self.full_directory_name, picarro_filename, "PICARRO")
self.instlogs["PICARRO_periodic"] = picarro_log
class InstrumentLog:
......
import pandas as pd
import numpy as np
import datetime
import os
import re
from config import Config
class PicarroProvider:
def __init__(self, config: Config):
self.picarro_id = config.read("DATA_SOURCE", "picarro_id")
self.root_dir = config.read("DATA_SOURCE", "picarro_root_dir") + "/"\
+ self.picarro_id + "/"
def get_df(self, first_datetime: datetime.datetime, last_datetime: datetime.datetime, columns: list):
if (first_datetime.year == last_datetime.year) \
& (first_datetime.month == last_datetime.month) \
& (first_datetime.day == last_datetime.day):
directory = self.root_dir + str(first_datetime.year)\
+ "/" + str(first_datetime.month).zfill(2)\
+ "/" + str(first_datetime.day).zfill(2)
filenames = os.listdir(directory)
# TODO: Read only the relevant files, not all the files of the day.
# filenames_df = pd.DataFrame({"filename": filenames})
# filenames_df["datetime"] = [re.search("^" + self.picarro_id + "-" + '([0-9]{8}-[0-9]{6}Z)' + '-DataLog_User.dat$', filename).group(1) for filename in filenames]
# filenames_df["datetime"] = pd.to_datetime(filenames_df["datetime"])
picarro_df = pd.DataFrame()
for filename in filenames:
step_df = pd.read_csv(directory + "/" + filename, delim_whitespace=True)
step_df = step_df[["DATE", "TIME"] + columns]
picarro_df = picarro_df.append(step_df)
picarro_df["datetime"] = pd.to_datetime(picarro_df["DATE"] + " " + picarro_df["TIME"]).dt.tz_localize('UTC')
picarro_df = picarro_df.drop(columns=["DATE", "TIME"])
picarro_df = picarro_df[(picarro_df["datetime"] >= first_datetime) & (picarro_df["datetime"] <= last_datetime)]
return picarro_df
......@@ -9,6 +9,7 @@ from gui.mainwindow import MainWindow
from dataprovider.conductcalibprovider import ConductCalibProvider
from dataprovider.exploprovider import ExploProvider
from dataprovider.picarroprovider import PicarroProvider
from uim.conductcalibuim import ConductCalibUim
from uim.explouim import ExploUim
......@@ -30,7 +31,8 @@ main_window_ui = main_window.main_ui
# DATA PROVIDERS
########################################################################################################################
conduct_prvd = ConductCalibProvider()
explo_prvd = ExploProvider()
picarro_prvd = PicarroProvider(config)
explo_prvd = ExploProvider(picarro_prvd)
########################################################################################################################
# GUI MANAGERS
......
......@@ -6,6 +6,7 @@ import re
from PyQt5.QtWidgets import QTableWidget
from PyQt5.QtGui import QColor
datetime_format = "%Y-%m-%dT%H:%M:%S.%fZ"
def pd_time_to_epoch_ms(series: pd.Series) -> list:
"""Convert a Pandas' Series containing Timestamp data into 1D array containing seconds (as float) since epoch."""
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment