Source code for BaseITS.custom_transform
import pandas as pd
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
[docs]class CustomTransform(BaseEstimator, TransformerMixin):
"""Class to transform dataframe for Poisson Regression by adding harmonic.
Args:
BaseEstimator (Sklearn): Base class for all estimators in scikit-learn.
TransformerMixin (Sklearn): Mixin class for all transformers in scikit-learn.
"""
def __init__(self, columns: list):
"""Constructor
Args:
columns (list): list of columns to be transformed
"""
self.columns = columns
def __init__(
self,
columns: list,
seasonally_adjusted: bool = True,
var_name: str = "month",
nfreq: int = 2,
period: int = 12,
fit_intercept: bool = False,
):
"""Constructor
Args:
columns (list): list of columns to be transformed
seasonally_adjusted (bool, optional): Boolean value whether to adjust for seasons. Defaults to True.
var_name (str, optional): Column name for the time/month column. Defaults to "month".
nfreq (int, optional): Frequency. Defaults to 2.
period (int, optional): Period duration. Defaults to 12.
fit_intercept (bool, optional): Boolean value whether to perform fit_intercept. Defaults to False.
"""
self.columns = columns
self.seasonally_adjusted = seasonally_adjusted
self.var_name = var_name
self.nfreq = nfreq
self.period = period
self.fit_intercept = fit_intercept
# TODO: Implement validate_inputs()
[docs] def fit(self, X: pd.DataFrame, y: pd.Series = None):
"""Function to fit the model
Args:
X (_type_): _description_
y (_type_, optional): _description_. Defaults to None.
Returns:
_type_: _description_
"""
return self
[docs] def transform(self, X: pd.DataFrame, y: pd.Series = None):
"""Function to transform the variables
Args:
X (pd.DataFrame): Dataframe with the harmonic inputs
y (pd.Series, optional): Series of the outcome variable. Defaults to None.
Returns:
pd.DataFrame: _description_
"""
if self.seasonally_adjusted:
if self.var_name not in X.columns:
print("Harmonic variable not found in the data")
return
if not self.nfreq > 0:
print("nfreq > 0 is not true")
return
v = X[self.var_name]
if not all(isinstance(i, (int, float)) for i in v):
print("All values must be numeric")
return
N = list(range(0, self.nfreq))
k = [(2**i) * 2 * np.pi / self.period for i in N]
M = np.outer(v, k)
simM = np.sin(M)
cosM = np.cos(M)
if self.fit_intercept:
a = np.array([1] * len(cosM))
harmonic_X = pd.DataFrame(np.column_stack([a, simM, cosM]))
harmonic_X.columns = [
"harmonic({},{},{}) intercept".format(
self.var_name, self.nfreq, self.period
)
] + [
"harmonic({},{},{}) {}".format(
self.var_name, self.nfreq, self.period, i + 1
)
for i in harmonic_X.columns[1:]
]
else:
harmonic_X = pd.DataFrame(np.column_stack([simM, cosM]))
harmonic_X.columns = [
"harmonic({},{},{}) {}".format(
self.var_name, self.nfreq, self.period, i + 1
)
for i in harmonic_X.columns
]
return pd.concat([X[self.columns], harmonic_X], axis=1)
else:
return X[self.columns]