Source code for hypex.reporters.abstract

from __future__ import annotations

from abc import ABC, abstractmethod
from typing import Any

from ..dataset import Dataset, ExperimentData
from ..dataset.roles import InfoRole, ReportRole, TreatmentRole
from ..utils import ID_SPLIT_SYMBOL, ExperimentDataEnum
from ..utils.errors import AbstractMethodError


[docs] class Reporter(ABC): @abstractmethod def report(self, data: ExperimentData): raise AbstractMethodError
[docs] class DictReporter(Reporter, ABC): def __init__(self, front=True): self.front = front @staticmethod def extract_from_one_row_dataset(data: Dataset) -> dict[str, Any]: return {k: v[0] for k, v in data.to_dict()["data"]["data"].items()} def _extract_from_comparator(self, data: ExperimentData, comparator_id: str): result = {} field = comparator_id[comparator_id.rfind(ID_SPLIT_SYMBOL) + 1 :] executor_name = comparator_id[: comparator_id.find(ID_SPLIT_SYMBOL)] sep = " " if self.front else ID_SPLIT_SYMBOL analysis_dict = data.analysis_tables[comparator_id].to_dict()["data"] for i, index_value in enumerate(analysis_dict["index"]): for k, v in analysis_dict["data"].items(): key = sep.join( [field, executor_name, k, str(index_value)] if field else [executor_name, k, str(index_value)] ) result[key] = v[i] return result def _extract_from_comparators( self, data: ExperimentData, comparator_ids: list[str] ) -> dict[str, Any]: result = {} for comparator_id in comparator_ids: result.update(self._extract_from_comparator(data, comparator_id)) return result @abstractmethod def report(self, data: ExperimentData) -> dict: raise AbstractMethodError
class OnDictReporter(Reporter, ABC): def __init__(self, dict_reporter: DictReporter) -> None: self.dict_reporter = dict_reporter
[docs] class DatasetReporter(OnDictReporter): def report(self, data: ExperimentData) -> dict[str, Dataset] | Dataset: dict_result = self.dict_reporter.report(data) return self.convert_to_dataset( dict_result ) # TODO: change to DatasetAdapter.to_dataset() @staticmethod def convert_to_dataset(data: dict) -> dict[str, Dataset] | Dataset: return Dataset.from_dict(roles={k: ReportRole() for k in data}, data=[data])
class TestDictReporter(DictReporter): @staticmethod def _get_struct_dict(data: dict): dict_result = {} for key, value in data.items(): if ID_SPLIT_SYMBOL in key: key_split = key.split(ID_SPLIT_SYMBOL) if key_split[2] in ( "pass", "p-value", "difference", "difference %", "control mean", "test mean", ): if key_split[0] not in dict_result: dict_result[key_split[0]] = { key_split[3]: {key_split[1]: {key_split[2]: value}} } elif key_split[3] not in dict_result[key_split[0]]: dict_result[key_split[0]][key_split[3]] = { key_split[1]: {key_split[2]: value} } elif key_split[1] not in dict_result[key_split[0]][key_split[3]]: dict_result[key_split[0]][key_split[3]][key_split[1]] = { key_split[2]: value } else: dict_result[key_split[0]][key_split[3]][key_split[1]][ key_split[2] ] = value return dict_result @staticmethod def _convert_struct_dict_to_dataset(data: dict) -> Dataset: def _is_truthy(v) -> bool: """Return True iff v represents a truthy pass value. Handles the case where transpose().to_records() stringifies booleans (e.g. numpy.bool_(False) → "False"), so a plain bool() check would incorrectly treat the non-empty string "False" as True. """ if v is None: return False if isinstance(v, str): return v.lower() == "true" return bool(v) def rename_passed(data: dict[str, bool]): return { c: (("NOT OK" if _is_truthy(v) else "OK") if "pass" in c else v) for c, v in data.items() } result = [] for feature, groups in data.items(): for group, tests in groups.items(): t_values = {"feature": feature, "group": group} for test, values in tests.items(): if test == "GroupDifference": t_values["control mean"] = values.get("control mean") t_values["test mean"] = values.get("test mean") t_values["difference"] = values.get("difference") t_values["difference %"] = values.get("difference %") else: t_values[f"{test} pass"] = values.get("pass") t_values[f"{test} p-value"] = values.get("p-value") result.append(t_values) result = [rename_passed(d) for d in result] return Dataset.from_dict( result, roles={"feature": InfoRole(), "group": TreatmentRole()}, ) def extract_tests(self, data: ExperimentData) -> dict[str, Any]: test_ids = data.get_ids( self.tests, searched_space=ExperimentDataEnum.analysis_tables ) result = {} for class_, ids in test_ids.items(): result.update( self._extract_from_comparators( data, ids[ExperimentDataEnum.analysis_tables.value] ) ) return {k: v for k, v in result.items() if "pass" in k or "p-value" in k}