Source code for LOGS_solutions.CreateExportEntities.CreateExportSamples.SampleManager

#!/usr/bin/env python3

import csv
import json
import logging
import os
import sys
from datetime import datetime
from pathlib import Path
from typing import List, Optional, Set, Tuple

import numpy as np
import openpyxl
import pandas as pd
from LOGS.Auxiliary.Exceptions import LOGSException
from LOGS.Entities import (
    CustomType,
    CustomTypeRequestParameter,
    Person,
    PersonRequestParameter,
    Project,
    ProjectRequestParameter,
    Sample,
    SampleRequestParameter,
)
from LOGS.LOGS import LOGS

from ...Utils.Exceptions import CsvReadError, ExcelReadError

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


[docs] class SampleManager: """This class enables the creation of samples in a LOGS instance using a CSV file, or the export of samples from a LOGS instance into a CSV file. """ def __init__( self, logs: LOGS, source_path: Optional[str] = None, target_path: Optional[str] = None, export_format: Optional[str] = ".csv", log_path: Optional[str] = None, ) -> None: """Initialization. :param logs: LOGS object to access the LOGS web API :param source_path: Source path for exporting samples in logs instance, defaults to None :param target_path: target path for extracting samples of a logs instance in csv file, defaults to None :param export_format: Should be set to ".xlsx" if the export format should be an Excel table instead of a CSV file; default: ".csv" :param log_path: Directory where the log file will be saved, defaults to None (no file logging) """ self.__logs = logs self.__source_path = source_path self.__target_path = target_path # Configure file logging if log_path is None: log_path = Path.cwd() / "SampleManager.log" log_path = Path(log_path) # Remove any previous file handler from this logger for h in logger.handlers[:]: if isinstance(h, logging.FileHandler): logger.removeHandler(h) h.close() file_handler = logging.FileHandler(log_path) file_handler.setLevel(logging.INFO) file_handler.setFormatter( logging.Formatter("%(asctime)s:%(levelname)s:%(name)s:%(message)s") ) logger.addHandler(file_handler) if self.__target_path is not None: if self.__target_path.suffix == "": self.__target_path = os.path.join( self.__target_path, f"samples_export{export_format}" ) self.__export_format = export_format self.__source_format = self.__source_path.suffix if self.__source_path else None
[docs] def check_customtypes(self, inventories_data) -> pd.DataFrame: """Removes rows whose Custom Type ID does not exist in the LOGS instance. :param inventories_data: DataFrame with inventories data. :return: DataFrame with valid Custom Type IDs. """ logs_customtype_ids = { customtype.id for customtype in self.__logs.customTypes(CustomTypeRequestParameter()) } invalid_types = ( set(inventories_data["Custom Type ID"].dropna().unique()) - logs_customtype_ids ) for customtype in invalid_types: if customtype != "": rows = inventories_data.index[ inventories_data["Custom Type ID"] == customtype ].tolist() logger.warning( "The custom type %s in line(s) %s does not exist in this LOGS instance and will be skipped.", customtype, [r + 2 for r in rows], ) inventories_data = inventories_data[ inventories_data["Custom Type ID"].isin(logs_customtype_ids) ] return inventories_data
[docs] def check_projects(self, inventories_data) -> pd.DataFrame: """Removes rows whose project IDs do not exist in the LOGS instance. :param inventories_data: DataFrame with inventories data. :return: DataFrame with valid project IDs.""" logs_project_ids = { str(project.id) for project in self.__logs.projects(ProjectRequestParameter()) } def row_is_valid(row): project_list = row["Projects"] csv_line = row.name + 2 if project_list is None: return True if isinstance(project_list, float) and pd.isna(project_list): return True for project in project_list: if str(project) not in logs_project_ids: logger.warning( "The project %s in line %s does not exist in this LOGS instance and will be skipped.", project, csv_line, ) return False return True inventories_data = inventories_data[ inventories_data.apply(row_is_valid, axis=1) ] return inventories_data
[docs] def check_persons(self, inventories_data) -> pd.DataFrame: """Removes rows whose person IDs do not exist in the LOGS instance. :param inventories_data: DataFrame with inventories data. :return: DataFrame with valid person IDs. """ logs_person_ids = { str(person.id) for person in self.__logs.persons(PersonRequestParameter()) } def row_is_valid(row): person_list = row["Prepared By"] csv_line = row.name + 2 if person_list is None: return True for person in person_list: if str(person) not in logs_person_ids: logger.warning( "The person %s in line %s does not exist in this LOGS instance and will be skipped.", person, csv_line, ) return False return True inventories_data = inventories_data[ inventories_data.apply(row_is_valid, axis=1) ] return inventories_data
[docs] def create_attribute_list( self, attribute_str: str, attr_obj_list: List, check_person: bool = False ) -> List: """Creates a list of attributes. :param attribute_str: List of attributes of one class type :param attribute_class: Class of the attributes :param check_person: Should be True, if the attr_obj_list is a list of persons :return: List of all attributes in attribute_str. """ attr_str_list = str(attribute_str).split(",") attribute_list = [] for attr_obj in attr_obj_list: if attr_obj is not None and attr_obj != "": if attr_obj.id in attr_str_list: attribute_list.append(attr_obj) continue if check_person: if attr_obj.login in attr_str_list: attribute_list.append(attr_obj) return attribute_list
def _ensure_header(self, df: pd.DataFrame) -> None: """Ensure that the DataFrame has the correct header. Supports either: - Name, Custom Type, Projects - Name, Custom Type, Projects, Prepared At, Prepared By (Order-sensitive; adjust if you want order-insensitive behavior.) :param df: DataFrame to check. """ required_columns_1 = ["Name", "Custom Type ID", "Projects"] required_columns_2 = [ "Name", "Custom Type ID", "Projects", "Prepared At", "Prepared By", ] cols = [str(c).strip() for c in df.columns] if cols == required_columns_1 or cols == required_columns_2: return raise ValueError( f"Header does not match.\nExpected: {required_columns_1} or {required_columns_2}\nFound: {cols}" ) def _split_int_list_cell(self, v: object) -> Optional[List[int]]: """Parse '2, 1' -> [2,1]. Empty/None/NA -> None. :param v: Cell value to parse. :return: List of integers or None. """ if v is None: return None if isinstance(v, float) and pd.isna(v): return None s = str(v).strip() if s == "" or s.lower() in {"none", "nan"}: return None parts = [p.strip() for p in s.split(",")] out: List[int] = [] for p in parts: if not p: continue try: out.append(int(p)) except ValueError: continue return out if out else None
[docs] def post_process_data(self, sample_data: pd.DataFrame) -> pd.DataFrame: """Post-processes the sample data after reading from file. :param sample_data: DataFrame containing the sample data. :return: Processed DataFrame. """ self._ensure_header(sample_data) df = sample_data.copy() df = df.replace(r"^\s*$", pd.NA, regex=True) # Always parse Projects (required) if "Projects" in df.columns: df["Projects"] = df["Projects"].map(self._split_int_list_cell) # Optional columns: only process if present if "Prepared By" in df.columns: df["Prepared By"] = df["Prepared By"].map(self._split_int_list_cell) else: # keep downstream code robust if it expects the column optionally df["Prepared By"] = None if "Prepared At" in df.columns: ts = pd.to_datetime(df["Prepared At"], errors="coerce") df["Prepared At"] = ts.map( lambda x: x.to_pydatetime() if pd.notna(x) else None ) else: df["Prepared At"] = None # Custom Type ID: nullable int if "Custom Type ID" in df.columns: df["Custom Type ID"] = pd.to_numeric( df["Custom Type ID"], errors="coerce" ).astype("Int64") df = df.dropna(how="all") return df
[docs] def read_file(self) -> pd.DataFrame: """Reads the sample data file and returns a DataFrame. :return: DataFrame containing the sample data. """ logger.info("Reading sample data from file: %s", self.__source_path) if self.__source_format == ".csv": try: sample_data = pd.read_csv( self.__source_path, delimiter=";", dtype=str, keep_default_na=False, quotechar='"', skip_blank_lines=True, ) sample_data = self.post_process_data(sample_data) except Exception as e: message = f"Error reading CSV file with the samples: {e}" logger.exception(message) raise CsvReadError(message) from e elif self.__source_format in [".xlsx"]: try: sample_data = pd.read_excel( self.__source_path, keep_default_na=False, engine="openpyxl", dtype=str, ) sample_data = self.post_process_data(sample_data) except Exception as e: message = f"Error reading Excel file with the samples: {e}" logger.exception(message) raise ExcelReadError(message) from e else: raise ValueError( f"Unsupported source format: {self.__source_format}. Supported formats are: .csv, .xlsx" ) return sample_data
[docs] def create_samples(self) -> None: """Creates a sample by the given csv-file. :return: None """ sample_data = self.read_file() logger.info("Creating samples in LOGS instance.") bool_prepared_by = "Prepared By" in sample_data.columns bool_prepared_at = "Prepared At" in sample_data.columns sample_data = self.check_persons(sample_data) sample_data = self.check_projects(sample_data) sample_data = self.check_customtypes(sample_data) # Create each sample for idx, sample in sample_data.iterrows(): csv_line = idx + 2 # +2 for header and 0-based index projects = sample["Projects"] sample_customtype = ( self.__logs.customTypes( CustomTypeRequestParameter(ids=[int(sample["Custom Type ID"])]) ).first() if not pd.isna(sample["Custom Type ID"]) else None ) logger.info( "Custom Type Name in line %s: %s", csv_line, sample_customtype.name if sample_customtype else "None", ) log_sample = self.__logs.newSample(entityOrCustomTypeOrId=sample_customtype) log_sample.name = str(sample["Name"]).strip() log_sample.projects = projects if (log_sample.customType is not None) and ( sample_customtype.name in [ "Basic", "Sample (LOGS 3.1)", ] ): if bool_prepared_by: if sample["Prepared By"] is not None: log_sample.customValues.Legacy_fields.Prepared_by = sample[ "Prepared By" ] if bool_prepared_at: val = sample["Prepared At"] if pd.notna(val): if hasattr(val, "to_pydatetime"): val = val.to_pydatetime() log_sample.customValues.Legacy_fields.Prepared_on = val try: self.__logs.create(log_sample) logger.info("The sample in line %s has been created.", csv_line) except LOGSException: logger.exception( "The sample in line %s could not be created.", csv_line )
[docs] def export_samples_json(self) -> None: """Exports samples from the LOGS instance to JSON files. :return: None""" target_dir = os.path.dirname(self.__target_path) for sample in self.__logs.samples(SampleRequestParameter()): sample_json = sample.toJson() json_filename = f"sample_{sample.id}.json" json_path = os.path.join(target_dir, json_filename) with open(json_path, "w", encoding="utf-8") as json_file: json.dump(sample_json, json_file, ensure_ascii=False, indent=2)
[docs] def export_samples_csv(self) -> None: """Export Samples from logs to a csv file. :return: None """ heading = [ "Name", "Custom Type ID", "Projects", ] with open(self.__target_path, "w", newline="", encoding="utf-8") as file: writer = csv.writer( file, delimiter=";", quotechar='"', quoting=csv.QUOTE_ALL ) writer.writerow(heading) for sample in self.__logs.samples(SampleRequestParameter()): projects_str = "" if sample.projects is not None: projects_str = ",".join( str(project.id) for project in sample.projects ) sample_data = [ sample.name, sample.customType.id if sample.customType else "", projects_str, ] writer.writerow(sample_data)
[docs] def export_samples_excel(self) -> None: """Export Samples from logs to an excel file. :return: None """ heading = [ "Name", "Custom Type ID", "Projects", ] wb = openpyxl.Workbook() ws = wb.active ws.append(heading) for sample in self.__logs.samples(SampleRequestParameter()): projects_str = "" if sample.projects is not None: projects_str = ",".join(str(project.id) for project in sample.projects) sample_data = [ sample.name, sample.customType.id if sample.customType else "", projects_str, ] ws.append(sample_data) wb.save(self.__target_path)
[docs] def export_samples(self) -> None: """Exports samples from the LOGS instance to a CSV file or Excel file. :return: None """ if self.__export_format == ".csv": self.export_samples_csv() elif self.__export_format == ".xlsx": self.export_samples_excel() else: raise ValueError( f"Invalid export format: {self.__export_format}. Supported formats are: .csv, .xlsx" ) self.export_samples_json()