Source code for LOGS_solutions.GenerateStatistics.StatisticNMR.StatisticsTypesOfExperiments

import csv
import logging
import sys
from datetime import datetime, timedelta
from pathlib import Path
from typing import Dict, Tuple

from LOGS.Entities import DatasetRequestParameter
from LOGS.LOGS import LOGS

from ..Common.FileHandler import FileHandler
from .StatisticHandlerNMR import StatisticHandlerNMR


[docs] class StatisticsTypesOfExperiments(StatisticHandlerNMR): """This class provides methods to create statistics for the different types of NMR experiments and save them as HTML or PDF files.""" def __init__( self, logs: LOGS, begin_date: datetime = None, end_date: datetime = None, target_path: str = None, ): """Initialization. :param logs: LOGS object to access the LOGS web API, :param begin_date: Lowest date limit for statistics to be created. :param end_date: Highest date limit for statistics to be created. :param target_path: Path where all datasets should be saved. """ self._logger_instruments = logging.getLogger("StatisticsTypesInstruments") self._logger_instruments.setLevel(logging.INFO) logfile_folder = Path(__file__).resolve().parent / "logfiles" logfile_folder.mkdir(parents=True, exist_ok=True) logfile_path = logfile_folder / "StatisticsTypesInstruments.log" if not self._logger_instruments.hasHandlers(): logfile_handler = logging.FileHandler(logfile_path, mode="w") formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") logfile_handler.setFormatter(formatter) self._logger_instruments.addHandler(logfile_handler) logconsole_handler = logging.StreamHandler(sys.stdout) logconsole_handler.setLevel(logging.INFO) logconsole_handler.setFormatter(formatter) self._logger_instruments.addHandler(logconsole_handler) super().__init__( logs, begin_date, end_date, target_path, self._logger_instruments ) # self._instruments = self._validate_list(instruments) self.__instrument_path = self._target_path / "types_of_instruments" if self._begin_date is None: self._begin_date = ( self._logs.datasets( DatasetRequestParameter(orderby="ACQUISITION_DATE_ASC") ) .first() .acquisitionDate ) self._begin_date = self._begin_date.replace( hour=0, minute=0, second=0, microsecond=0 ) if self._end_date is None: self._end_date = ( self._logs.datasets( DatasetRequestParameter(orderby="ACQUISITION_DATE_DESC") ) .first() .acquisitionDate ) self._end_date = (self._end_date + timedelta(days=1)).replace( hour=0, minute=0, second=0, microsecond=0 )
[docs] def get_dataset_instruments(self) -> Dict[int, Tuple[str, Dict[str, int]]]: """Get all instruments of the datasets and count the number of each type of experiment per instrument. :return: Dictionary with instrument ID as key and a tuple with instrument name and a dictionary with experiment type as key and count as value. """ # Get the total number of datasets with the format "BrukerNMR" in the given time frame datasets_total = self._logs.datasets( DatasetRequestParameter( acquisitionDateFrom=self._begin_date, acquisitionDateTo=self._end_date, formatIds=["BrukerNMR"], ) ).count if datasets_total == 0: self._logger_instruments.warning( "No datasets with format 'BrukerNMR' found in the given time frame." ) return {} self._logger_instruments.info( "Processing instruments with format 'BrukerNMR' in the given time frame: begin date: %s - end date: %s.", self._begin_date.strftime("%d/%B/%Y"), self._end_date.strftime("%d/%B/%Y"), ) instruments = {} # {instrument_id: (instrument_name, {experiment_type: count})} count = 0 # Get all datasets with format "BrukerNMR" and acquisition date between begin_date and end_dates # and count the number of each type of experiment per instrument for dataset in self._logs.datasets( DatasetRequestParameter( acquisitionDateFrom=self._begin_date, acquisitionDateTo=self._end_date, formatIds=["BrukerNMR"], ) ): # Skip datasets with invalid acquisition date tz = dataset.acquisitionDate.tzinfo if ( (dataset.acquisitionDate is None) or (datetime(1677, 9, 21, tzinfo=tz) >= dataset.acquisitionDate) or (dataset.acquisitionDate >= datetime(2262, 4, 11, tzinfo=tz)) ): self._logger_instruments.warning( "Dataset %s has invalid acquisition date.: %s. Dataset will not be included in the statistics.", dataset.id, dataset.acquisitionDate, ) continue if dataset.instrument is None: dataset_instrument_id = 0 dataset_instrument_name = "No instrument" else: dataset_instrument_id = dataset.instrument.id dataset_instrument_name = dataset.instrument.name if dataset_instrument_id not in instruments: instruments[dataset_instrument_id] = ( dataset_instrument_name, {}, ) dataset.fetchParameters() if ( dataset.parameters.get("General acquisition parameters/Dimension") is not None ): if ( dataset.parameters["General acquisition parameters/Dimension"] not in instruments[dataset_instrument_id][1] ): instruments[dataset_instrument_id][1][ dataset.parameters["General acquisition parameters/Dimension"] ] = 1 else: instruments[dataset_instrument_id][1][ dataset.parameters["General acquisition parameters/Dimension"] ] += 1 else: # If the dataset has no dimension, create a CSV file with the dataset ID and instrument ID # and instrument name # create "No dimension" in the instruments dictionary and add 1 to the count self.__instrument_path.mkdir(parents=True, exist_ok=True) csv_path = self.__instrument_path / "no_dimension.csv" file_exists = csv_path.is_file() with open( csv_path, "a", ) as file: if not file_exists: writer = csv.writer(file) writer.writerow( [ "Dataset ID", "Instrument ID", "Instrument Name", ] ) writer = csv.writer(file) writer.writerow( [ dataset.id, dataset_instrument_id, dataset_instrument_name, ] ) if "No dimension" not in instruments[dataset_instrument_id][1]: instruments[dataset_instrument_id][1]["No dimension"] = 1 else: instruments[dataset_instrument_id][1]["No dimension"] += 1 if count % 10000 == 0 and count != 0: self._logger_instruments.info( "%d/%d datasets processed.", count, datasets_total ) count += 1 self._logger_instruments.info( "Finished getting all datasets with format 'BrukerNMR' in the given date range." ) return instruments
[docs] def create_statistic(self): """Create the statistics of the different types of NMR experiments of each instrument.""" self._logger_instruments.info( "Starting to generate a statistical analysis of the different types of NMR experiments of each instrument." ) instruments = self.get_dataset_instruments() self._logger_instruments.info( "Creating reports with a statistical analysis of the different types of NMR experiments." ) for instrument_id, value in instruments.items(): instrument_name = FileHandler.clean_filename(value[0]) self.create_report( self.__instrument_path, True, False, f"Types_of_NMR_experiments_of_{instrument_name}_ID{instrument_id})", self.create_plot_instrument_num( instrument_id, value[0], value[1], ), ) self._logger_instruments.info( "Finished generating reports with a statistical analysis of the different types of NMR experiments." ) self._logger_instruments.info( "Finished generating a statistical analysis of the different types of NMR experiments of each instrument." )