Source code for LOGS_solutions.GenerateStatistics.StatisticNMR.StatisticsTypesOfExperiments

import csv
import logging
import sys
from datetime import datetime, timedelta
from pathlib import Path
from typing import Dict, Tuple
from pathvalidate import sanitize_filename

from LOGS.Entities import DatasetRequestParameter
from LOGS.LOGS import LOGS

from .StatisticHandlerNMR import StatisticHandlerNMR



[docs]
class StatisticsTypesOfExperiments(StatisticHandlerNMR):
    """This class provides methods to create statistics for the different types
    of NMR experiments and save them as HTML or PDF files."""

    def __init__(
        self,
        logs: LOGS,
        begin_date: datetime = None,
        end_date: datetime = None,
        target_path: str = None,
    ):
        """Initialization.

        :param logs: LOGS object to access the LOGS web API,
        :param begin_date: Lowest date limit for statistics to be
            created.
        :param end_date: Highest date limit for statistics to be
            created.
        :param target_path: Path where all datasets should be saved.
        """

        self._logger_instruments = logging.getLogger("StatisticsTypesInstruments")

        self._logger_instruments.setLevel(logging.INFO)

        logfile_folder = Path(__file__).resolve().parent / "logfiles"
        logfile_folder.mkdir(parents=True, exist_ok=True)
        if not self._logger_instruments.hasHandlers():
            formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")

            logconsole_handler = logging.StreamHandler(sys.stdout)
            logconsole_handler.setLevel(logging.INFO)
            logconsole_handler.setFormatter(formatter)
            self._logger_instruments.addHandler(logconsole_handler)

        super().__init__(
            logs, begin_date, end_date, target_path, self._logger_instruments
        )
        # self._instruments = self._validate_list(instruments)
        self.__instrument_path = self._target_path / "types_of_instruments"

        if self._begin_date is None:
            self._begin_date = (
                self._logs.datasets(DatasetRequestParameter(sortBy="CREATION_DATE"))
                .first()
                .creationDate
            )
            self._begin_date = self._begin_date.replace(
                hour=0, minute=0, second=0, microsecond=0
            )
        if self._end_date is None:
            datasets_list = list(
                self._logs.datasets(DatasetRequestParameter(sortBy="CREATION_DATE"))
            )
            self._end_date = (
                datasets_list[-1].creationDate if datasets_list else datetime.now()
            )
            self._end_date = (self._end_date + timedelta(days=1)).replace(
                hour=0, minute=0, second=0, microsecond=0
            )


[docs]
    def get_dataset_instruments(self) -> Dict[int, Tuple[str, Dict[str, int]]]:
        """Get all instruments of the datasets and count the number of each
        type of experiment per instrument.

        :return: Dictionary with instrument ID as key and a tuple with
            instrument name and a dictionary with experiment type as key
            and count as value.
        """

        # Get the total number of datasets with the format "BrukerNMR" in the given time frame
        datasets_total = self._logs.datasets(
            DatasetRequestParameter(
                creationDateFrom=self._begin_date,
                creationDateTo=self._end_date,
                formatIds=["BrukerNMR"],
            )
        ).count

        if datasets_total == 0:
            self._logger_instruments.warning(
                "No datasets with format 'BrukerNMR' found in the given time frame."
            )
            return {}

        self._logger_instruments.info(
            "Processing instruments with format 'BrukerNMR' in the given time frame: begin date: %s - end date: %s.",
            self._begin_date.strftime("%d/%B/%Y"),
            self._end_date.strftime("%d/%B/%Y"),
        )
        instruments = {}  # {instrument_id: (instrument_name, {experiment_type: count})}
        count = 0
        # Get all datasets with format "BrukerNMR" and creation date between begin_date and end_dates
        # and count the number of each type of experiment per instrument
        for dataset in self._logs.datasets(
            DatasetRequestParameter(
                creationDateFrom=self._begin_date,
                creationDateTo=self._end_date,
                formatIds=["BrukerNMR"],
            )
        ):
            # Skip datasets with invalid creation date
            tz = dataset.creationDate.tzinfo
            if (
                (dataset.creationDate is None)
                or (datetime(1677, 9, 21, tzinfo=tz) >= dataset.creationDate)
                or (dataset.creationDate >= datetime(2262, 4, 11, tzinfo=tz))
            ):
                self._logger_instruments.warning(
                    "Dataset %s has invalid creation date.: %s. Dataset will not be included in the statistics.",
                    dataset.id,
                    dataset.creationDate,
                )
                continue

            if dataset.instrument is None:
                dataset_instrument_id = 0
                dataset_instrument_name = "No instrument"
            else:
                dataset_instrument_id = dataset.instrument.id
                dataset_instrument_name = dataset.instrument.name
            if dataset_instrument_id not in instruments:
                instruments[dataset_instrument_id] = (
                    dataset_instrument_name,
                    {},
                )

            dataset.fetchParameters()
            if (
                dataset.parameters.get("General creation parameters/Dimension")
                is not None
            ):
                if (
                    dataset.parameters["General creation parameters/Dimension"]
                    not in instruments[dataset_instrument_id][1]
                ):
                    instruments[dataset_instrument_id][1][
                        dataset.parameters["General creation parameters/Dimension"]
                    ] = 1
                else:
                    instruments[dataset_instrument_id][1][
                        dataset.parameters["General creation parameters/Dimension"]
                    ] += 1
            else:
                # If the dataset has no dimension, create a CSV file with the dataset ID and instrument ID
                # and instrument name
                # create "No dimension" in the instruments dictionary and add 1 to the count

                self.__instrument_path.mkdir(parents=True, exist_ok=True)
                csv_path = self.__instrument_path / "no_dimension.csv"
                file_exists = csv_path.is_file()

                with open(
                    csv_path,
                    "a",
                ) as file:
                    if not file_exists:
                        writer = csv.writer(file)
                        writer.writerow(
                            [
                                "Dataset ID",
                                "Instrument ID",
                                "Instrument Name",
                            ]
                        )
                    writer = csv.writer(file)
                    writer.writerow(
                        [
                            dataset.id,
                            dataset_instrument_id,
                            dataset_instrument_name,
                        ]
                    )
                if "No dimension" not in instruments[dataset_instrument_id][1]:
                    instruments[dataset_instrument_id][1]["No dimension"] = 1
                else:
                    instruments[dataset_instrument_id][1]["No dimension"] += 1
            if count % 10000 == 0 and count != 0:
                self._logger_instruments.info(
                    "%d/%d datasets processed.", count, datasets_total
                )
            count += 1

        self._logger_instruments.info(
            "Finished getting all datasets with format 'BrukerNMR' in the given date range."
        )

        return instruments



[docs]
    def create_statistic(self):
        """Create the statistics of the different types of NMR experiments of
        each instrument."""

        self._logger_instruments.info(
            "Starting to generate a statistical analysis of the different types of NMR experiments of each instrument."
        )

        instruments = self.get_dataset_instruments()

        self._logger_instruments.info(
            "Creating reports with a statistical analysis of the different types of NMR experiments."
        )

        for instrument_id, value in instruments.items():
            instrument_name = sanitize_filename(value[0])
            self.create_report(
                self.__instrument_path,
                True,
                False,
                f"Types_of_NMR_experiments_of_{instrument_name}_ID{instrument_id})",
                self.create_plot_instrument_num(
                    instrument_id,
                    value[0],
                    value[1],
                ),
            )

        self._logger_instruments.info(
            "Finished generating reports with a statistical analysis of the different types of NMR experiments."
        )
        self._logger_instruments.info(
            "Finished generating a statistical analysis of the different types of NMR experiments of each instrument."
        )