Source code for LOGS_solutions.GenerateStatistics.StatisticNMR.StatisticHandlerNMR

import logging
import sys
from abc import abstractmethod
from datetime import datetime
from typing import Dict, List

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from LOGS.LOGS import LOGS
from matplotlib.figure import Figure

from ..Common.CommonHandler import CommonHandler
from ..Common.DateHandler import DateHandler


[docs] class StatisticHandlerNMR(CommonHandler): """Abstract class for creating statistics.""" def __init__( self, logs: LOGS, begin_date: datetime, end_date: datetime, target_path: str, logger: logging.Logger, ): """Initialization. :param logs: LOGS object to access the LOGS web API, :param begin_date: Lowest date limit for statistics to be created. :param end_date: Highest date limit for statistics to be created. :param target_path: Path where all datasets should be saved. """ super().__init__(logs, begin_date, end_date, target_path) self._logger = logger
[docs] @abstractmethod def create_statistic(self): """Method for generating the statistics."""
[docs] def create_plot_comparison_heatmap_duration( self, entity_dict: Dict, entity_type: str ) -> Figure: """Creates a heatmap with the totalized duration times of the different entities. (Intended for measurement times of instruments) :param entity_dict: dict with all entities and their duration times (e.g. {instrument1: [(date1, time), (date2,time), ...], instrument2: [(date1, time), (date2, time), ...]}) :param entity_type: type of the entity (e.g. 'datasets for instrument x') :return: plot """ if not entity_dict: self._logger.warning( "No data available for the statistics of the following entity type: %s.", entity_type, ) return for entity, dates in entity_dict.items(): if not dates: self._logger.warning( "No data available for the statistics of the following entity: %s.", entity, ) return try: df = pd.DataFrame( [ { "Instrument": f"{records[0]} (ID: {key})", "Year": acq_time.year, "Time": duration, } for key, records in entity_dict.items() for duration, acq_time, operators in records[1:] ] ) except ValueError as e: self._logger.error("Error creating DataFrame for %s: %s", entity_type, e) return heatmap_instrument_pivot_table = df.pivot_table( index="Year", columns="Instrument", values="Time", aggfunc="sum", fill_value=0, ) total_time_year = 31557600.0 # seconds per year: approx. 31557600.0 (based on 365.2422 days per year) heatmap_instrument_pivot_table = ( (heatmap_instrument_pivot_table / total_time_year * 100) .clip(upper=101) .round(3) ) dynamic_width = max(10, len(heatmap_instrument_pivot_table.columns)) dynamic_height = max(6, len(heatmap_instrument_pivot_table.index)) fig, ax = plt.subplots(figsize=(dynamic_width, dynamic_height)) cax = ax.matshow(heatmap_instrument_pivot_table, cmap="coolwarm") for (i, j), val in np.ndenumerate(heatmap_instrument_pivot_table): if not pd.isna(val): if val > 0: if float(f"{val:.1f}") < 0.1: ax.text( j, i, "<0.1%", ha="center", va="center", color="black", ) elif val > 100: ax.text( j, i, ">100%", ha="center", va="center", color="black", ) else: ax.text( j, i, f"{val:.1f}%", ha="center", va="center", color="black", ) else: ax.text( j, i, f"{val:.1f}%", ha="center", va="center", color="black", ) else: ax.text( j, i, "", ha="center", va="center", color="black", ) plt.xticks( np.arange(len(heatmap_instrument_pivot_table.columns)), heatmap_instrument_pivot_table.columns, rotation=90, ) plt.yticks( np.arange(len(heatmap_instrument_pivot_table.index)), heatmap_instrument_pivot_table.index, ) plt.colorbar(cax) plt.title( f"Utilization of instruments [aggregated experiment durations/year in %] of datasets acquired between {self._begin_date.strftime('%d/%B/%Y')} and {self._end_date.strftime('%d/%B/%Y')}", loc="center", ) plt.xlabel("Instrument") plt.ylabel("Year") plt.close() return fig
[docs] def create_plot_year_duration( self, dates_list: List, entity_type: str ) -> Dict[str, Figure]: """Creates a block diagram of the duration time for each year, based on 365.2422 days per year. :param dates_list: list with all dates of the entity type. :param entity_type: E.g. 'datasets for instrument x'. """ if not dates_list: self._logger.warning( "No data available for the statistics of the following entity type: %s.", entity_type, ) return dates = dates_list fig_dict = {} if not isinstance(dates[0], tuple): self._logger.error("No data to plot for duration per year.") sys.exit(0) df = pd.DataFrame(dates, columns=["duration", "acq_time", "operators"]) df["acq_time"] = pd.to_datetime(df["acq_time"]) df = df.sort_values(by="acq_time") df["Year"] = df["acq_time"].dt.year counts_per_year = df.groupby("Year").agg({"duration": "sum"}).reset_index() years = counts_per_year["Year"].astype(str) for i, value in enumerate(counts_per_year["duration"].values): # seconds per year: approx. 31557600.0 (based on 365.2422 days per year) data = [value, 31557600.0 - value] if data[1] < 0: labels = ["In use (over 100%)"] data = [value] colors = ["#a71e69"] elif data[1] == 0: labels = ["In use"] data = [value] colors = ["#a71e69"] elif data[1] == 31557600.0: labels = ["No use"] data = [0] colors = ["#83929c"] else: labels = ["In use", "No use"] colors = ["#a71e69", "#83929c"] if data == [0]: fig, ax = plt.subplots() ax.text( 0.5, 0.5, f"There is no recorded usage for {entity_type} in the year {years[i]}. This may be due to the duration being zero, not specified, or no activity occurring during that period.", horizontalalignment="center", verticalalignment="center", transform=ax.transAxes, ) ax.axis("off") else: fig, ax = plt.subplots() wedges, texts, autotexts = ax.pie( data, labels=labels, autopct="%1.1f%%", startangle=90, colors=colors, textprops={"color": "white"}, ) ax.axis("equal") for text, label in zip(texts, labels): if label == "In use" or label == "In use (over 100%)": text.set_color("#a71e69") if label == "No use": text.set_color("#83929c") plt.title( f"Utilization of {entity_type} in {years[i]} [aggregated experiment duration/year, in %] <br> Aggregated utilization: {DateHandler.seconds_to_dhms(value)}" ) fig_dict[years[i]] = fig plt.close() return fig_dict
[docs] def create_plot_year_month_duration( self, dates_list: List, entity_type: str ) -> Dict[str, Figure]: """Creates a pie chart of the duration time for each month in a year, based on the standard number of seconds per month, without considering leap years. :param dates_list: list with all dates of the entity type :param entity_type: E.g. 'datasets for instrument x' :return: Plot """ if not dates_list: self._logger.warning( "No data available for the statistics of the following entity type: %s.", entity_type, ) return if not isinstance(dates_list[0], tuple): self._logger.error("ERROR: No data to plot for duration per month.") sys.exit(0) dates = dates_list fig_dict = {} month_total_seconds = { "01": 2678400, "02": 2419200, # normal, no leap year "03": 2678400, "04": 2592000, "05": 2678400, "06": 2592000, "07": 2678400, "08": 2678400, "09": 2592000, "10": 2678400, "11": 2592000, "12": 2678400, } df = pd.DataFrame(dates, columns=["duration", "acq_time", "operators"]) df["acq_time"] = pd.to_datetime(df["acq_time"]) df = df.sort_values(by="acq_time") df["Year"] = df["acq_time"].dt.year df["Month"] = df["acq_time"].dt.month counts_per_year_month = ( df.groupby(["Year", "Month"]).agg({"duration": "sum"}).reset_index() ) counts_per_year_month["Year-Month"] = counts_per_year_month.apply( lambda x: f"{x['Year']}-{x['Month']:02d}", axis=1 ) year_month = counts_per_year_month["Year-Month"] for i, value in enumerate(counts_per_year_month["duration"].values): year = year_month[i].split("-")[0] month = year_month[i].split("-")[1] data = [value, month_total_seconds[month] - value] if data[1] < 0: labels = ["In use (over 100%)"] data = [value] colors = ["#a71e69"] elif data[1] == 0: labels = ["In use"] data = [value] colors = ["#a71e69"] elif data[1] == month_total_seconds[month]: labels = ["No use"] data = [0] colors = ["#83929c"] else: labels = ["In use", "No use"] if data == [0]: fig, ax = plt.subplots() ax.text( 0.5, 0.5, f"There is no recorded usage for {entity_type} in {month}/{year}. This may be due to the duration being zero, not specified, or no activity occurring during that period.", horizontalalignment="center", verticalalignment="center", transform=ax.transAxes, ) ax.axis("off") else: colors = ["#a71e69", "#83929c"] fig, ax = plt.subplots() wedges, texts, autotexts = ax.pie( data, labels=labels, autopct="%1.1f%%", startangle=90, colors=colors, textprops={"color": "white"}, ) ax.axis("equal") for text, label in zip(texts, labels): if label == "In use" or label == "In use (over 100%)": text.set_color("#a71e69") if label == "No use": text.set_color("#83929c") plt.title( f"Utilization of {entity_type} in {month}/{year} [aggregated experiment duration/month per year in %] <br> Aggregated utilization: {DateHandler.seconds_to_dhms(value)}" ) fig_dict[year_month[i]] = fig plt.close() return fig_dict
[docs] def create_plot_year_calendarWeek_duration( self, dates_list: List, entity_type: str ) -> Dict[str, Figure]: """Creates a pie chart with the duration time within a calendar week per year, based on a standardized week length (without considering daylight saving time or partial weeks). ::param dates_list: list with all dates of the entity type :param entity_type: E.g. 'datasets for instrument x' """ if not dates_list: self._logger.warning( "No data available for the statistics of the following entity type: %s.", entity_type, ) return if not isinstance(dates_list[0], tuple): self._logger.error("ERROR: No data to plot for duration per calendar week.") sys.exit(0) dates = dates_list fig_dict = {} df = pd.DataFrame(dates, columns=["duration", "acq_time", "operators"]) df["acq_time"] = pd.to_datetime(df["acq_time"]) df = df.sort_values(by="acq_time") year, week, day_of_week = zip(*[d.isocalendar() for d in df["acq_time"]]) df["Year"] = year df["CalendarWeek"] = week counts_per_year_calendarWeek = ( df.groupby(["Year", "CalendarWeek"]).agg({"duration": "sum"}).reset_index() ) counts_per_year_calendarWeek["Year-CalendarWeek"] = ( counts_per_year_calendarWeek.apply( lambda x: f"{x['Year']}-{x['CalendarWeek']:02d}", axis=1 ) ) year_calendarWeek = counts_per_year_calendarWeek["Year-CalendarWeek"] for i, value in enumerate(counts_per_year_calendarWeek["duration"].values): year = year_calendarWeek[i].split("-")[0] calendar_week = year_calendarWeek[i].split("-")[1] # seconds per calendar week: 604800 data = [value, 604800 - value] if data[1] < 0: labels = ["In use (over 100%)"] data = [value] colors = ["#a71e69"] elif data[1] == 0: labels = ["In use"] data = [value] colors = ["#a71e69"] elif data[1] == 604800: labels = ["No use"] data = [0] colors = ["#83929c"] else: labels = ["In use", "No use"] if data == [0]: fig, ax = plt.subplots() ax.text( 0.5, 0.5, f"There is no recorded usage for {entity_type} in CW{calendar_week}/{year}. This may be due to the duration being zero, not specified, or no activity occurring during that period.", horizontalalignment="center", verticalalignment="center", transform=ax.transAxes, ) ax.axis("off") else: colors = ["#a71e69", "#83929c"] fig, ax = plt.subplots() wedges, texts, autotexts = ax.pie( data, labels=labels, autopct="%1.1f%%", startangle=90, colors=colors, textprops={"color": "white"}, ) ax.axis("equal") for text, label in zip(texts, labels): if label == "In use" or label == "In use (over 100%)": text.set_color("#a71e69") if label == "No use": text.set_color("#83929c") plt.title( f"Utilization of {entity_type} in CW{calendar_week}/{year} [aggregated experiment duration/calendar week per year in %] <br> Aggregated utilization: {DateHandler.seconds_to_dhms(value)}" ) fig_dict[year_calendarWeek[i]] = fig plt.close() return fig_dict
### Plot functions for StatisticsInstrumentsNMR ###
[docs] def create_plot_instrument_num( self, instrument_id: int, instrument_name: str, data, ) -> Figure: """Creates a pie chart for the distribution of the data. :param instrument_id: ID of the instrument. :param instrument_name: Name of the instrument. :param data: Data for the distribution. :param statistic_entity: Entity for which the distribution is created. :param cutoff: Cutoff value for the distribution. :return: Figure with the pie chart. """ filtered_data = data if not filtered_data: information_text = f"No data available for the distribution of different types of NMR experiments of {instrument_name} (ID: {instrument_id}) of datasets acquired between {self._begin_date.strftime('%d/%B/%Y')} and {self._end_date.strftime('%d/%B/%Y')}." fig, ax = plt.subplots() ax.text( 0.5, 0.5, information_text, horizontalalignment="center", verticalalignment="center", transform=ax.transAxes, ) ax.axis("off") plt.close() return fig labels = filtered_data.keys() sizes = filtered_data.values() fig, ax = plt.subplots() wedges, texts = ax.pie(sizes, startangle=90) ax.axis("equal") # cut off test, if wanted include cutoff parameter in function legend_labels = [f"{label}: {size}" for label, size in zip(labels, sizes)] # end cut off test fig, ax = plt.subplots() wedges, texts, autotexts = ax.pie( sizes, # labels=legend_labels, autopct="%1.1f%%", startangle=90, ) ax.axis("equal") ax.legend( legend_labels, loc="center", bbox_to_anchor=(0.5, -0.15), ncol=2, fontsize=10, ) plt.title( f"Types of NMR experiments (Instrument: {instrument_name} , ID: {instrument_id}) of datasets acquired between {self._begin_date.strftime('%d/%B/%Y')} and {self._end_date.strftime('%d/%B/%Y')}", ) plt.close() return fig