Source code for LOGS_solutions.GenerateStatistics.StatisticNMR.StatisticsTypesOfExperiments

import csv
import logging
import sys
from datetime import datetime, timedelta
from pathlib import Path
from typing import Dict, Tuple
from pathvalidate import sanitize_filename

from LOGS.Entities import DatasetRequestParameter
from LOGS.LOGS import LOGS

from .StatisticHandlerNMR import StatisticHandlerNMR


[docs] class StatisticsTypesOfExperiments(StatisticHandlerNMR): """This class provides methods to create statistics for the different types of NMR experiments and save them as HTML or PDF files.""" def __init__( self, logs: LOGS, begin_date: datetime = None, end_date: datetime = None, target_path: str = None, ): """Initialization. :param logs: LOGS object to access the LOGS web API, :param begin_date: Lowest date limit for statistics to be created. :param end_date: Highest date limit for statistics to be created. :param target_path: Path where all datasets should be saved. """ self._logger_instruments = logging.getLogger("StatisticsTypesInstruments") self._logger_instruments.setLevel(logging.INFO) logfile_folder = Path(__file__).resolve().parent / "logfiles" logfile_folder.mkdir(parents=True, exist_ok=True) if not self._logger_instruments.hasHandlers(): formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") logconsole_handler = logging.StreamHandler(sys.stdout) logconsole_handler.setLevel(logging.INFO) logconsole_handler.setFormatter(formatter) self._logger_instruments.addHandler(logconsole_handler) super().__init__( logs, begin_date, end_date, target_path, self._logger_instruments ) # self._instruments = self._validate_list(instruments) self.__instrument_path = self._target_path / "types_of_instruments" if self._begin_date is None: self._begin_date = ( self._logs.datasets(DatasetRequestParameter(sortBy="CREATION_DATE")) .first() .creationDate ) self._begin_date = self._begin_date.replace( hour=0, minute=0, second=0, microsecond=0 ) if self._end_date is None: datasets_list = list( self._logs.datasets(DatasetRequestParameter(sortBy="CREATION_DATE")) ) self._end_date = ( datasets_list[-1].creationDate if datasets_list else datetime.now() ) self._end_date = (self._end_date + timedelta(days=1)).replace( hour=0, minute=0, second=0, microsecond=0 )
[docs] def get_dataset_instruments(self) -> Dict[int, Tuple[str, Dict[str, int]]]: """Get all instruments of the datasets and count the number of each type of experiment per instrument. :return: Dictionary with instrument ID as key and a tuple with instrument name and a dictionary with experiment type as key and count as value. """ # Get the total number of datasets with the format "BrukerNMR" in the given time frame datasets_total = self._logs.datasets( DatasetRequestParameter( creationDateFrom=self._begin_date, creationDateTo=self._end_date, formatIds=["BrukerNMR"], ) ).count if datasets_total == 0: self._logger_instruments.warning( "No datasets with format 'BrukerNMR' found in the given time frame." ) return {} self._logger_instruments.info( "Processing instruments with format 'BrukerNMR' in the given time frame: begin date: %s - end date: %s.", self._begin_date.strftime("%d/%B/%Y"), self._end_date.strftime("%d/%B/%Y"), ) instruments = {} # {instrument_id: (instrument_name, {experiment_type: count})} count = 0 # Get all datasets with format "BrukerNMR" and creation date between begin_date and end_dates # and count the number of each type of experiment per instrument for dataset in self._logs.datasets( DatasetRequestParameter( creationDateFrom=self._begin_date, creationDateTo=self._end_date, formatIds=["BrukerNMR"], ) ): # Skip datasets with invalid creation date tz = dataset.creationDate.tzinfo if ( (dataset.creationDate is None) or (datetime(1677, 9, 21, tzinfo=tz) >= dataset.creationDate) or (dataset.creationDate >= datetime(2262, 4, 11, tzinfo=tz)) ): self._logger_instruments.warning( "Dataset %s has invalid creation date.: %s. Dataset will not be included in the statistics.", dataset.id, dataset.creationDate, ) continue if dataset.instrument is None: dataset_instrument_id = 0 dataset_instrument_name = "No instrument" else: dataset_instrument_id = dataset.instrument.id dataset_instrument_name = dataset.instrument.name if dataset_instrument_id not in instruments: instruments[dataset_instrument_id] = ( dataset_instrument_name, {}, ) dataset.fetchParameters() if ( dataset.parameters.get("General creation parameters/Dimension") is not None ): if ( dataset.parameters["General creation parameters/Dimension"] not in instruments[dataset_instrument_id][1] ): instruments[dataset_instrument_id][1][ dataset.parameters["General creation parameters/Dimension"] ] = 1 else: instruments[dataset_instrument_id][1][ dataset.parameters["General creation parameters/Dimension"] ] += 1 else: # If the dataset has no dimension, create a CSV file with the dataset ID and instrument ID # and instrument name # create "No dimension" in the instruments dictionary and add 1 to the count self.__instrument_path.mkdir(parents=True, exist_ok=True) csv_path = self.__instrument_path / "no_dimension.csv" file_exists = csv_path.is_file() with open( csv_path, "a", ) as file: if not file_exists: writer = csv.writer(file) writer.writerow( [ "Dataset ID", "Instrument ID", "Instrument Name", ] ) writer = csv.writer(file) writer.writerow( [ dataset.id, dataset_instrument_id, dataset_instrument_name, ] ) if "No dimension" not in instruments[dataset_instrument_id][1]: instruments[dataset_instrument_id][1]["No dimension"] = 1 else: instruments[dataset_instrument_id][1]["No dimension"] += 1 if count % 10000 == 0 and count != 0: self._logger_instruments.info( "%d/%d datasets processed.", count, datasets_total ) count += 1 self._logger_instruments.info( "Finished getting all datasets with format 'BrukerNMR' in the given date range." ) return instruments
[docs] def create_statistic(self): """Create the statistics of the different types of NMR experiments of each instrument.""" self._logger_instruments.info( "Starting to generate a statistical analysis of the different types of NMR experiments of each instrument." ) instruments = self.get_dataset_instruments() self._logger_instruments.info( "Creating reports with a statistical analysis of the different types of NMR experiments." ) for instrument_id, value in instruments.items(): instrument_name = sanitize_filename(value[0]) self.create_report( self.__instrument_path, True, False, f"Types_of_NMR_experiments_of_{instrument_name}_ID{instrument_id})", self.create_plot_instrument_num( instrument_id, value[0], value[1], ), ) self._logger_instruments.info( "Finished generating reports with a statistical analysis of the different types of NMR experiments." ) self._logger_instruments.info( "Finished generating a statistical analysis of the different types of NMR experiments of each instrument." )