diff --git a/src/synnax_shared/data_processing/scaler/PredefinedMinMaxScaler b/src/synnax_shared/data_processing/scaler/PredefinedMinMaxScaler deleted file mode 100644 index e9cdc92..0000000 --- a/src/synnax_shared/data_processing/scaler/PredefinedMinMaxScaler +++ /dev/null @@ -1,29 +0,0 @@ -import pandas as pd - -class PredefinedMinMaxScaler: - '''Scaler that scales data to the user defined range [min_val, max_val]''' - def __init__(self, min_val: float, max_val: float) -> None: - self.min_val = min_val - self.max_val = max_val - - def scale_column( - self, column: pd.Series, min_val: float, max_val: float - ) -> pd.Series: - return (column - min_val) / (max_val - min_val) - - def unscale_column( - self, column: pd.Series, min_val: float, max_val: float - ) -> pd.Series: - return column * (max_val - min_val) + min_val - - def transform(self, data: pd.DataFrame) -> pd.DataFrame: - data = data.copy() - for column in data.select_dtypes(include=[np.number]): - data[column] = self.scale_column(data[column], self.min_val, self.max_val) - return data - - def inverse_transform(self, data: pd.DataFrame) -> pd.DataFrame: - data = data.copy() - for column in data.select_dtypes(include=[np.number]): - data[column] = self.unscale_column(data[column], self.min_val, self.max_val) - return data \ No newline at end of file diff --git a/src/synnax_shared/data_processing/scaler/linear_scaler.py b/src/synnax_shared/data_processing/scaler/linear_scaler.py new file mode 100644 index 0000000..73f0924 --- /dev/null +++ b/src/synnax_shared/data_processing/scaler/linear_scaler.py @@ -0,0 +1,56 @@ +from typing import TypedDict +from numpy import number +from pandas import DataFrame, Series + + +class LinearScalerDto(TypedDict): + offset: float + divisor: float + + +class LinearScaler: + + def __init__(self, offset: float, divisor: float) -> None: + """ + Initializes the LinearScaler. + E.g. To scale data from -100 to 100 to 0 to 1, the offset would be 100.0 and the divisor would be 200.0. + + Parameters: + offset (float): + The offset to be applied to the data + divisor (float): + The divisor to be applied to the data + """ + if divisor == 0: + raise ValueError("Divisor cannot be 0") + self.offset = offset + self.divisor = divisor + + def transform_series(self, series: Series) -> Series: + float_series = series.astype(float) + return (float_series + self.offset) / self.divisor + + def inverse_transform_series(self, series: Series) -> Series: + float_series = series.astype(float) + return (float_series * self.divisor) - self.offset + + def transform_dataframe(self, dataframe: DataFrame) -> DataFrame: + dataframe_copy = dataframe.copy() + for column in dataframe_copy.select_dtypes(include=[number]): + dataframe_copy[column] = self.transform_series(dataframe_copy[column]) + return dataframe_copy + + def inverse_transform_dataframe(self, dataframe: DataFrame) -> DataFrame: + dataframe_copy = dataframe.copy() + for column in dataframe_copy.select_dtypes(include=[number]): + dataframe_copy[column] = self.inverse_transform_series( + dataframe_copy[column] + ) + return dataframe_copy + + def toDto(self) -> LinearScalerDto: + return {"offset": self.offset, "divisor": self.divisor} + + @staticmethod + def fromDto(dto: LinearScalerDto): + return LinearScaler(dto["offset"], dto["divisor"])