Source code for hypex.transformers.cuped

from __future__ import annotations

from copy import deepcopy
from typing import Any

from ..dataset.dataset import Dataset, ExperimentData
from ..dataset.roles import StatisticRole, TargetRole
from .abstract import Transformer


[docs] class CUPEDTransformer(Transformer):
[docs] def __init__( self, cuped_features: dict[str, str], key: Any = "", ): """ Transformer that applies the CUPED adjustment to target features. Args: cuped_features (dict[str, str]): A mapping {target_feature: pre_target_feature}. """ super().__init__(key=key) self.cuped_features = cuped_features
@staticmethod def _inner_function( data: Dataset, cuped_features: dict[str, str], ) -> Dataset: result = deepcopy(data) for target_feature, pre_target_feature in cuped_features.items(): mean_xy = (result[target_feature] * result[pre_target_feature]).mean() mean_x = result[pre_target_feature].mean() mean_y = result[target_feature].mean() cov_xy = mean_xy - mean_x * mean_y std_y = result[target_feature].std() std_x = result[pre_target_feature].std() # Handle zero variance or NaN case (single observation) if std_y == 0 or std_x == 0 or std_y != std_y or std_x != std_x: theta = 0 else: theta = cov_xy / (std_y * std_x) pre_target_mean = result[pre_target_feature].mean() new_values_ds = ( result[target_feature] - (result[pre_target_feature] - pre_target_mean) * theta ) result = result.add_column( data=new_values_ds, role={f"{target_feature}_cuped": TargetRole()} ) return result @classmethod def calc(cls, data: Dataset, cuped_features: dict[str, str], **kwargs) -> Dataset: return cls._inner_function(data, cuped_features) def execute(self, data: ExperimentData) -> ExperimentData: new_ds = self.calc(data=data.ds, cuped_features=self.cuped_features) # Calculate variance reductions variance_reductions = {} for target_feature, pre_target_feature in self.cuped_features.items(): original_var = data.ds[target_feature].var() adjusted_var = new_ds[f"{target_feature}_cuped"].var() variance_reduction = ( (1 - adjusted_var / original_var) * 100 if original_var > 0 else 0.0 ) variance_reductions[f"{target_feature}_cuped"] = variance_reduction # Save variance reductions to additional_fields for metric, reduction in variance_reductions.items(): data.additional_fields = data.additional_fields.add_column( data=[reduction], role={f"{metric}_variance_reduction": StatisticRole()} ) return data.copy(data=new_ds)