Source code for BaseITS.model_tuning

import pandas as pd
import numpy as np
from datetime import datetime
import itertools

from prophet import Prophet
from prophet.diagnostics import cross_validation
from prophet.diagnostics import performance_metrics

from BaseITS.pre_processing import str_date_validate


[docs]class ModelTuning:
    """Class for tuning Hyperparameters for prophet model. No implementation for Poisson Regression because it's a basic linear regression model.

    Args:
        cutoff_start (str, optional): start date for tuning data . Defaults to "2019-02-28".
        cutoff_end (str, optional): end date for tuning data. Defaults to "2019-10-31".
        param_grid (dict, optional): Dictionary with the parameters to be tuned. Defaults to { "changepoint_prior_scale": [0.001, 0.05], "seasonality_prior_scale": [0.1, 10.0], "seasonality_mode": ["additive", "multiplicative"], }.
    """

    def __init__(
        self,
        # model: Prophet = Prophet(),
        cutoff_start: str = "2019-02-28",
        cutoff_end: str = "2019-10-31",
        param_grid: dict = {
            "changepoint_prior_scale": [0.001, 0.05],
            "seasonality_prior_scale": [0.1, 10.0],
            "seasonality_mode": ["additive", "multiplicative"],
        },
    ):
        """Tuning parameters

        Args:
            cutoff_start (str, optional): start date for tuning data . Defaults to "2019-02-28".
            cutoff_end (str, optional): end date for tuning data. Defaults to "2019-10-31".
            param_grid (dict, optional): Dictionary with the parameters to be tuned. Defaults to { "changepoint_prior_scale": [0.001, 0.05], "seasonality_prior_scale": [0.1, 10.0], "seasonality_mode": ["additive", "multiplicative"], }.
        """

        self.param_grid_ = param_grid
        self.model_ = Prophet()
        self.cutoff_start_ = cutoff_start
        self.cuttoff_end_ = cutoff_end
        self.__validate_inputs()

    def __validate_inputs(self):
        """Function to validate the inputs provided to this class."""
        # confirm no null values are passed. or wrong formats of data

        # str_date_validate(self.cutoff_start_)
        # str_date_validate(self.cuttoff_end_)

        if not (isinstance(self.cutoff_start_, str)) or not (
            isinstance(self.cuttoff_end_, str)
        ):
            raise TypeError(
                '""cutoff_start" and "cutoff_end" date parameters provided must be of type "str"'
            )

        elif not bool(self.cuttoff_end_ and not self.cuttoff_end_.isspace()):
            raise ValueError("The cut-off start dates cannot be None or empty")

        elif not bool(self.cutoff_start_ and not self.cutoff_start_.isspace()):
            raise ValueError("The cut-off end dates cannot be None or empty")

        elif not isinstance(self.param_grid_, dict):
            raise TypeError(
                'Make sure the "param_grid parameter" passed is of type dictionary.'
            )

        elif len(self.param_grid_) == 0:
            raise ValueError('"param_grid" parameter cannot be empty')

        elif not set(list(self.param_grid_.keys())).issubset(
            ["changepoint_prior_scale", "seasonality_prior_scale", "seasonality_mode"]
        ):
            raise ValueError(
                'Make sure the values in the "param_grid" are the ones expected by the Prophet() model '
            )

[docs]    def tune_hyperparameters(self, df: pd.DataFrame, param_grid: dict = None):
        """Function to tune the hyperparameters

        Args:
            df (pd.DataFrame): Dataframe with the data to be tuned
            param_grid (dict, optional): Parameters to be tuned. If None, defaults to the one provided in init(). Defaults to None.

        Returns:
            pd.DataFrame: Dataframe with the optimal parameters.
        """

        cutoff_start = datetime.strptime(self.cutoff_start_, "%Y-%m-%d")
        cutoff_end = datetime.strptime(self.cuttoff_end_, "%Y-%m-%d")
        cutoffs = pd.date_range(start=cutoff_start, end=cutoff_end, freq="MS")

        if (param_grid == None) or (len(param_grid) == 0):
            # Generate all combinations of parameters
            all_params = [
                dict(zip(self.param_grid_.keys(), v))
                for v in itertools.product(*self.param_grid_.values())
            ]
        else:
            all_params = [
                dict(zip(param_grid.keys(), v))
                for v in itertools.product(*param_grid.values())
            ]
        rmses = []  # Store the RMSEs for each params here

        # Use cross validation to evaluate all parameters
        for params in all_params:

            m = Prophet(
                interval_width=0.95,
                growth="linear",
                yearly_seasonality=False,
                weekly_seasonality=False,
                daily_seasonality=False,
                **params
            ).add_seasonality(name="yearly", period=365, fourier_order=5)

            m.fit(df)

            df_cv = cross_validation(
                model=m, horizon="90 days", cutoffs=cutoffs, parallel="processes"
            )
            df_p = performance_metrics(df_cv, rolling_window=1)
            rmses.append(df_p["rmse"].values[0])

        # Find the best parameters
        tuning_results = pd.DataFrame(all_params)
        tuning_results["rmse"] = rmses
        tuning_results = tuning_results.sort_values("rmse")
        best_params = all_params[np.argmin(rmses)]

        return tuning_results, best_params