Source code for BaseITS.model_tuning

import pandas as pd
import numpy as np
from datetime import datetime
import itertools

from prophet import Prophet
from prophet.diagnostics import cross_validation
from prophet.diagnostics import performance_metrics

from BaseITS.pre_processing import str_date_validate


[docs]class ModelTuning: """Class for tuning Hyperparameters for prophet model. No implementation for Poisson Regression because it's a basic linear regression model. Args: cutoff_start (str, optional): start date for tuning data . Defaults to "2019-02-28". cutoff_end (str, optional): end date for tuning data. Defaults to "2019-10-31". param_grid (dict, optional): Dictionary with the parameters to be tuned. Defaults to { "changepoint_prior_scale": [0.001, 0.05], "seasonality_prior_scale": [0.1, 10.0], "seasonality_mode": ["additive", "multiplicative"], }. """ def __init__( self, # model: Prophet = Prophet(), cutoff_start: str = "2019-02-28", cutoff_end: str = "2019-10-31", param_grid: dict = { "changepoint_prior_scale": [0.001, 0.05], "seasonality_prior_scale": [0.1, 10.0], "seasonality_mode": ["additive", "multiplicative"], }, ): """Tuning parameters Args: cutoff_start (str, optional): start date for tuning data . Defaults to "2019-02-28". cutoff_end (str, optional): end date for tuning data. Defaults to "2019-10-31". param_grid (dict, optional): Dictionary with the parameters to be tuned. Defaults to { "changepoint_prior_scale": [0.001, 0.05], "seasonality_prior_scale": [0.1, 10.0], "seasonality_mode": ["additive", "multiplicative"], }. """ self.param_grid_ = param_grid self.model_ = Prophet() self.cutoff_start_ = cutoff_start self.cuttoff_end_ = cutoff_end self.__validate_inputs() def __validate_inputs(self): """Function to validate the inputs provided to this class.""" # confirm no null values are passed. or wrong formats of data # str_date_validate(self.cutoff_start_) # str_date_validate(self.cuttoff_end_) if not (isinstance(self.cutoff_start_, str)) or not ( isinstance(self.cuttoff_end_, str) ): raise TypeError( '""cutoff_start" and "cutoff_end" date parameters provided must be of type "str"' ) elif not bool(self.cuttoff_end_ and not self.cuttoff_end_.isspace()): raise ValueError("The cut-off start dates cannot be None or empty") elif not bool(self.cutoff_start_ and not self.cutoff_start_.isspace()): raise ValueError("The cut-off end dates cannot be None or empty") elif not isinstance(self.param_grid_, dict): raise TypeError( 'Make sure the "param_grid parameter" passed is of type dictionary.' ) elif len(self.param_grid_) == 0: raise ValueError('"param_grid" parameter cannot be empty') elif not set(list(self.param_grid_.keys())).issubset( ["changepoint_prior_scale", "seasonality_prior_scale", "seasonality_mode"] ): raise ValueError( 'Make sure the values in the "param_grid" are the ones expected by the Prophet() model ' )
[docs] def tune_hyperparameters(self, df: pd.DataFrame, param_grid: dict = None): """Function to tune the hyperparameters Args: df (pd.DataFrame): Dataframe with the data to be tuned param_grid (dict, optional): Parameters to be tuned. If None, defaults to the one provided in init(). Defaults to None. Returns: pd.DataFrame: Dataframe with the optimal parameters. """ cutoff_start = datetime.strptime(self.cutoff_start_, "%Y-%m-%d") cutoff_end = datetime.strptime(self.cuttoff_end_, "%Y-%m-%d") cutoffs = pd.date_range(start=cutoff_start, end=cutoff_end, freq="MS") if (param_grid == None) or (len(param_grid) == 0): # Generate all combinations of parameters all_params = [ dict(zip(self.param_grid_.keys(), v)) for v in itertools.product(*self.param_grid_.values()) ] else: all_params = [ dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values()) ] rmses = [] # Store the RMSEs for each params here # Use cross validation to evaluate all parameters for params in all_params: m = Prophet( interval_width=0.95, growth="linear", yearly_seasonality=False, weekly_seasonality=False, daily_seasonality=False, **params ).add_seasonality(name="yearly", period=365, fourier_order=5) m.fit(df) df_cv = cross_validation( model=m, horizon="90 days", cutoffs=cutoffs, parallel="processes" ) df_p = performance_metrics(df_cv, rolling_window=1) rmses.append(df_p["rmse"].values[0]) # Find the best parameters tuning_results = pd.DataFrame(all_params) tuning_results["rmse"] = rmses tuning_results = tuning_results.sort_values("rmse") best_params = all_params[np.argmin(rmses)] return tuning_results, best_params