Source code for LOGS_solutions.GenerateStatistics.StatisticNMR.StatisticHandlerNMR

import logging
import sys
from abc import abstractmethod
from datetime import datetime
from typing import Dict, List

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from LOGS.LOGS import LOGS
from matplotlib.figure import Figure

from ..Common.CommonHandler import CommonHandler
from ..Common.DateHandler import DateHandler



[docs]
class StatisticHandlerNMR(CommonHandler):
    """Abstract class for creating statistics."""

    def __init__(
        self,
        logs: LOGS,
        begin_date: datetime,
        end_date: datetime,
        target_path: str,
        logger: logging.Logger,
    ):
        """Initialization.

        :param logs: LOGS object to access the LOGS web API,
        :param begin_date: Lowest date limit for statistics to be
            created.
        :param end_date: Highest date limit for statistics to be
            created.
        :param target_path: Path where all datasets should be saved.
        """

        super().__init__(logs, begin_date, end_date, target_path)
        self._logger = logger


[docs]
    @abstractmethod
    def create_statistic(self):
        """Method for generating the statistics."""



[docs]
    def create_plot_comparison_heatmap_duration(
        self, entity_dict: Dict, entity_type: str
    ) -> Figure:
        """Creates a heatmap with the totalized duration times of the different
        entities. (Intended for measurement times of instruments)

        :param entity_dict: dict with all entities and their duration times (e.g. {instrument1: [(date1, time), (date2,time), ...], instrument2: [(date1, time), (date2, time), ...]})
        :param entity_type: type of the entity (e.g. 'datasets for instrument x')
        :return: plot
        """
        if not entity_dict:
            self._logger.warning(
                "No data available for the statistics of the following entity type: %s.",
                entity_type,
            )
            return

        for entity, dates in entity_dict.items():
            if not dates:
                self._logger.warning(
                    "No data available for the statistics of the following entity: %s.",
                    entity,
                )
                return

        try:
            df = pd.DataFrame(
                [
                    {
                        "Instrument": f"{records[0]} (ID: {key})",
                        "Year": acq_time.year,
                        "Time": duration,
                    }
                    for key, records in entity_dict.items()
                    for duration, acq_time, operators in records[1:]
                ]
            )
        except ValueError as e:
            self._logger.error("Error creating DataFrame for %s: %s", entity_type, e)
            return

        heatmap_instrument_pivot_table = df.pivot_table(
            index="Year",
            columns="Instrument",
            values="Time",
            aggfunc="sum",
            fill_value=0,
        )

        total_time_year = 31557600.0  # seconds per year: approx. 31557600.0 (based on 365.2422 days per year)

        heatmap_instrument_pivot_table = (
            (heatmap_instrument_pivot_table / total_time_year * 100)
            .clip(upper=101)
            .round(3)
        )

        dynamic_width = max(10, len(heatmap_instrument_pivot_table.columns))
        dynamic_height = max(6, len(heatmap_instrument_pivot_table.index))

        fig, ax = plt.subplots(figsize=(dynamic_width, dynamic_height))
        cax = ax.matshow(heatmap_instrument_pivot_table, cmap="coolwarm")

        for (i, j), val in np.ndenumerate(heatmap_instrument_pivot_table):
            if not pd.isna(val):
                if val > 0:
                    if float(f"{val:.1f}") < 0.1:
                        ax.text(
                            j,
                            i,
                            "<0.1%",
                            ha="center",
                            va="center",
                            color="black",
                        )
                    elif val > 100:
                        ax.text(
                            j,
                            i,
                            ">100%",
                            ha="center",
                            va="center",
                            color="black",
                        )
                    else:
                        ax.text(
                            j,
                            i,
                            f"{val:.1f}%",
                            ha="center",
                            va="center",
                            color="black",
                        )
                else:
                    ax.text(
                        j,
                        i,
                        f"{val:.1f}%",
                        ha="center",
                        va="center",
                        color="black",
                    )
            else:
                ax.text(
                    j,
                    i,
                    "",
                    ha="center",
                    va="center",
                    color="black",
                )

        plt.xticks(
            np.arange(len(heatmap_instrument_pivot_table.columns)),
            heatmap_instrument_pivot_table.columns,
            rotation=90,
        )
        plt.yticks(
            np.arange(len(heatmap_instrument_pivot_table.index)),
            heatmap_instrument_pivot_table.index,
        )

        plt.colorbar(cax)
        plt.title(
            f"Utilization of instruments [aggregated experiment durations/year in %] of datasets acquired between {self._begin_date.strftime('%d/%B/%Y')} and {self._end_date.strftime('%d/%B/%Y')}",
            loc="center",
        )
        plt.xlabel("Instrument")
        plt.ylabel("Year")
        plt.close()

        return fig



[docs]
    def create_plot_year_duration(
        self, dates_list: List, entity_type: str
    ) -> Dict[str, Figure]:
        """Creates a block diagram of the duration time for each year, based on
        365.2422 days per year.

        :param dates_list: list with all dates of the entity type.
        :param entity_type: E.g. 'datasets for instrument x'.
        """

        if not dates_list:
            self._logger.warning(
                "No data available for the statistics of the following entity type: %s.",
                entity_type,
            )
            return

        dates = dates_list

        fig_dict = {}
        if not isinstance(dates[0], tuple):
            self._logger.error("No data to plot for duration per year.")
            sys.exit(0)

        # df = pd.DataFrame(dates, columns=["duration", "acq_time", "operators"]) #TODO: WIeder rein wenn operators wieder drin
        df = pd.DataFrame(
            dates, columns=["duration", "acq_time"]
        )  # TODO: WIeder raus wenn operators wieder drin
        df["acq_time"] = pd.to_datetime(df["acq_time"])
        df = df.sort_values(by="acq_time")
        df["Year"] = df["acq_time"].dt.year
        counts_per_year = df.groupby("Year").agg({"duration": "sum"}).reset_index()
        years = counts_per_year["Year"].astype(str)

        for i, value in enumerate(counts_per_year["duration"].values):
            # seconds per year: approx. 31557600.0 (based on 365.2422 days per year)
            data = [value, 31557600.0 - value]
            if data[1] < 0:
                labels = ["In use (over 100%)"]
                data = [value]
                colors = ["#a71e69"]
            elif data[1] == 0:
                labels = ["In use"]
                data = [value]
                colors = ["#a71e69"]
            elif data[1] == 31557600.0:
                labels = ["No use"]
                data = [0]
                colors = ["#83929c"]
            else:
                labels = ["In use", "No use"]
            colors = ["#a71e69", "#83929c"]

            if data == [0]:
                fig, ax = plt.subplots()
                ax.text(
                    0.5,
                    0.5,
                    f"There is no recorded usage for {entity_type} in the year {years[i]}. This may be due to the duration being zero, not specified, or no activity occurring during that period.",
                    horizontalalignment="center",
                    verticalalignment="center",
                    transform=ax.transAxes,
                )
                ax.axis("off")

            else:
                fig, ax = plt.subplots()
                wedges, texts, autotexts = ax.pie(
                    data,
                    labels=labels,
                    autopct="%1.1f%%",
                    startangle=90,
                    colors=colors,
                    textprops={"color": "white"},
                )
                ax.axis("equal")

                for text, label in zip(texts, labels):
                    if label == "In use" or label == "In use (over 100%)":
                        text.set_color("#a71e69")
                    if label == "No use":
                        text.set_color("#83929c")

                plt.title(
                    f"Utilization of {entity_type} in {years[i]} [aggregated experiment duration/year, in %] <br> Aggregated utilization: {DateHandler.seconds_to_dhms(value)}"
                )

            fig_dict[years[i]] = fig
            plt.close()

        return fig_dict



[docs]
    def create_plot_year_month_duration(
        self, dates_list: List, entity_type: str
    ) -> Dict[str, Figure]:
        """Creates a pie chart of the duration time for each month in a year,
        based on the standard number of seconds per month, without considering
        leap years.

        :param dates_list: list with all dates of the entity type
        :param entity_type: E.g. 'datasets for instrument x'
        :return: Plot
        """

        if not dates_list:
            self._logger.warning(
                "No data available for the statistics of the following entity type: %s.",
                entity_type,
            )
            return

        if not isinstance(dates_list[0], tuple):
            self._logger.error("ERROR: No data to plot for duration per month.")
            sys.exit(0)

        dates = dates_list
        fig_dict = {}
        month_total_seconds = {
            "01": 2678400,
            "02": 2419200,  # normal, no leap year
            "03": 2678400,
            "04": 2592000,
            "05": 2678400,
            "06": 2592000,
            "07": 2678400,
            "08": 2678400,
            "09": 2592000,
            "10": 2678400,
            "11": 2592000,
            "12": 2678400,
        }

        # df = pd.DataFrame(dates, columns=["duration", "acq_time", "operators"])  # TODO: WIeder rein wenn operators wieder drin
        df = pd.DataFrame(
            dates, columns=["duration", "acq_time"]
        )  # TODO: WIeder raus wenn operators wieder drin

        df["acq_time"] = pd.to_datetime(df["acq_time"])
        df = df.sort_values(by="acq_time")
        df["Year"] = df["acq_time"].dt.year
        df["Month"] = df["acq_time"].dt.month
        counts_per_year_month = (
            df.groupby(["Year", "Month"]).agg({"duration": "sum"}).reset_index()
        )
        counts_per_year_month["Year-Month"] = counts_per_year_month.apply(
            lambda x: f"{x['Year']}-{x['Month']:02d}", axis=1
        )
        year_month = counts_per_year_month["Year-Month"]

        for i, value in enumerate(counts_per_year_month["duration"].values):
            year = year_month[i].split("-")[0]
            month = year_month[i].split("-")[1]
            data = [value, month_total_seconds[month] - value]
            if data[1] < 0:
                labels = ["In use (over 100%)"]
                data = [value]
                colors = ["#a71e69"]
            elif data[1] == 0:
                labels = ["In use"]
                data = [value]
                colors = ["#a71e69"]
            elif data[1] == month_total_seconds[month]:
                labels = ["No use"]
                data = [0]
                colors = ["#83929c"]
            else:
                labels = ["In use", "No use"]

            if data == [0]:
                fig, ax = plt.subplots()
                ax.text(
                    0.5,
                    0.5,
                    f"There is no recorded usage for {entity_type} in {month}/{year}. This may be due to the duration being zero, not specified, or no activity occurring during that period.",
                    horizontalalignment="center",
                    verticalalignment="center",
                    transform=ax.transAxes,
                )
                ax.axis("off")
            else:
                colors = ["#a71e69", "#83929c"]

                fig, ax = plt.subplots()
                wedges, texts, autotexts = ax.pie(
                    data,
                    labels=labels,
                    autopct="%1.1f%%",
                    startangle=90,
                    colors=colors,
                    textprops={"color": "white"},
                )
                ax.axis("equal")

                for text, label in zip(texts, labels):
                    if label == "In use" or label == "In use (over 100%)":
                        text.set_color("#a71e69")
                    if label == "No use":
                        text.set_color("#83929c")

                plt.title(
                    f"Utilization of {entity_type} in {month}/{year} [aggregated experiment duration/month per year in %] <br> Aggregated utilization: {DateHandler.seconds_to_dhms(value)}"
                )
            fig_dict[year_month[i]] = fig
            plt.close()

        return fig_dict



[docs]
    def create_plot_year_calendarWeek_duration(
        self, dates_list: List, entity_type: str
    ) -> Dict[str, Figure]:
        """Creates a pie chart with the duration time within a calendar week
        per year, based on a standardized week length (without considering
        daylight saving time or partial weeks).

        ::param dates_list: list with all dates of the entity type
        :param entity_type: E.g. 'datasets for instrument x'
        """

        if not dates_list:
            self._logger.warning(
                "No data available for the statistics of the following entity type: %s.",
                entity_type,
            )
            return

        if not isinstance(dates_list[0], tuple):
            self._logger.error("ERROR: No data to plot for duration per calendar week.")
            sys.exit(0)

        dates = dates_list
        fig_dict = {}

        # df = pd.DataFrame(dates, columns=["duration", "acq_time", "operators"]) #TODO: WIeder rein wenn operators wieder drin
        df = pd.DataFrame(
            dates, columns=["duration", "acq_time"]
        )  # TODO: WIeder raus wenn operators wieder drin
        df["acq_time"] = pd.to_datetime(df["acq_time"])
        df = df.sort_values(by="acq_time")
        year, week, day_of_week = zip(*[d.isocalendar() for d in df["acq_time"]])
        df["Year"] = year
        df["CalendarWeek"] = week

        counts_per_year_calendarWeek = (
            df.groupby(["Year", "CalendarWeek"]).agg({"duration": "sum"}).reset_index()
        )
        counts_per_year_calendarWeek["Year-CalendarWeek"] = (
            counts_per_year_calendarWeek.apply(
                lambda x: f"{x['Year']}-{x['CalendarWeek']:02d}", axis=1
            )
        )
        year_calendarWeek = counts_per_year_calendarWeek["Year-CalendarWeek"]

        for i, value in enumerate(counts_per_year_calendarWeek["duration"].values):
            year = year_calendarWeek[i].split("-")[0]
            calendar_week = year_calendarWeek[i].split("-")[1]
            # seconds per calendar week: 604800
            data = [value, 604800 - value]
            if data[1] < 0:
                labels = ["In use (over 100%)"]
                data = [value]
                colors = ["#a71e69"]
            elif data[1] == 0:
                labels = ["In use"]
                data = [value]
                colors = ["#a71e69"]
            elif data[1] == 604800:
                labels = ["No use"]
                data = [0]
                colors = ["#83929c"]
            else:
                labels = ["In use", "No use"]

            if data == [0]:
                fig, ax = plt.subplots()
                ax.text(
                    0.5,
                    0.5,
                    f"There is no recorded usage for {entity_type} in CW{calendar_week}/{year}. This may be due to the duration being zero, not specified, or no activity occurring during that period.",
                    horizontalalignment="center",
                    verticalalignment="center",
                    transform=ax.transAxes,
                )
                ax.axis("off")
            else:
                colors = ["#a71e69", "#83929c"]

                fig, ax = plt.subplots()
                wedges, texts, autotexts = ax.pie(
                    data,
                    labels=labels,
                    autopct="%1.1f%%",
                    startangle=90,
                    colors=colors,
                    textprops={"color": "white"},
                )
                ax.axis("equal")

                for text, label in zip(texts, labels):
                    if label == "In use" or label == "In use (over 100%)":
                        text.set_color("#a71e69")
                    if label == "No use":
                        text.set_color("#83929c")

                plt.title(
                    f"Utilization of {entity_type} in CW{calendar_week}/{year} [aggregated experiment duration/calendar week per year in %] <br> Aggregated utilization: {DateHandler.seconds_to_dhms(value)}"
                )

            fig_dict[year_calendarWeek[i]] = fig
            plt.close()

        return fig_dict


    ### Plot functions for StatisticsInstrumentsNMR ###


[docs]
    def create_plot_instrument_num(
        self,
        instrument_id: int,
        instrument_name: str,
        data,
    ) -> Figure:
        """Creates a pie chart for the distribution of the data.

        :param instrument_id: ID of the instrument.
        :param instrument_name: Name of the instrument.
        :param data: Data for the distribution.
        :param statistic_entity: Entity for which the distribution is
            created.
        :param cutoff: Cutoff value for the distribution.
        :return: Figure with the pie chart.
        """

        filtered_data = data

        if not filtered_data:
            information_text = f"No data available for the distribution of different types of NMR experiments of {instrument_name} (ID: {instrument_id}) of datasets acquired between {self._begin_date.strftime('%d/%B/%Y')} and {self._end_date.strftime('%d/%B/%Y')}."
            fig, ax = plt.subplots()
            ax.text(
                0.5,
                0.5,
                information_text,
                horizontalalignment="center",
                verticalalignment="center",
                transform=ax.transAxes,
            )
            ax.axis("off")
            plt.close()
            return fig

        labels = filtered_data.keys()
        sizes = filtered_data.values()

        fig, ax = plt.subplots()
        wedges, texts = ax.pie(sizes, startangle=90)
        ax.axis("equal")

        # cut off test, if wanted include cutoff parameter in function
        legend_labels = [f"{label}: {size}" for label, size in zip(labels, sizes)]
        # end cut off test

        fig, ax = plt.subplots()
        wedges, texts, autotexts = ax.pie(
            sizes,
            # labels=legend_labels,
            autopct="%1.1f%%",
            startangle=90,
        )
        ax.axis("equal")
        ax.legend(
            legend_labels,
            loc="center",
            bbox_to_anchor=(0.5, -0.15),
            ncol=2,
            fontsize=10,
        )

        plt.title(
            f"Types of NMR experiments (Instrument: {instrument_name} , ID: {instrument_id}) of datasets acquired between {self._begin_date.strftime('%d/%B/%Y')} and {self._end_date.strftime('%d/%B/%Y')}",
        )

        plt.close()
        return fig