`utils`¶

`save_forecaster(forecaster, file_name, save_custom_functions=True, verbose=True)` ¶

Save forecaster model using joblib. If custom functions are used to create predictors or weights, they are saved as .py files.

Parameters:

Name	Type	Description	Default
`forecaster`	`Forecaster`	Forecaster created with skforecast library.	required
`file_name`	`str`	File name given to the object.	required
`save_custom_functions`	`bool`	If True, save custom functions used in the forecaster (fun_predictors and weight_func) as .py files. Custom functions need to be available in the environment where the forecaster is going to be loaded.	`True`
`verbose`	`bool`	Print summary about the forecaster saved.	`True`

Returns:

Type	Description
`None`

Source code in skforecast\utils\utils.py

def save_forecaster(
    forecaster: object, 
    file_name: str,
    save_custom_functions: bool=True, 
    verbose: bool=True
) -> None:
    """
    Save forecaster model using joblib. If custom functions are used to create
    predictors or weights, they are saved as .py files.

    Parameters
    ----------
    forecaster : Forecaster
        Forecaster created with skforecast library.
    file_name : str
        File name given to the object.
    save_custom_functions : bool, default `True`
        If True, save custom functions used in the forecaster (fun_predictors and
        weight_func) as .py files. Custom functions need to be available in the
        environment where the forecaster is going to be loaded.
    verbose : bool, default `True`
        Print summary about the forecaster saved.

    Returns
    -------
    None

    """

    # Save forecaster
    joblib.dump(forecaster, filename=file_name)

    if save_custom_functions:
        # Save custom functions to create predictors
        if hasattr(forecaster, 'fun_predictors') and forecaster.fun_predictors is not None:
            file_name = forecaster.fun_predictors.__name__ + '.py'
            with open(file_name, 'w') as file:
                file.write(inspect.getsource(forecaster.fun_predictors))

        # Save custom functions to create weights
        if hasattr(forecaster, 'weight_func') and forecaster.weight_func is not None:
            if isinstance(forecaster.weight_func, dict):
                for fun in set(forecaster.weight_func.values()):
                    file_name = fun.__name__ + '.py'
                    with open(file_name, 'w') as file:
                        file.write(inspect.getsource(fun))
            else:
                file_name = forecaster.weight_func.__name__ + '.py'
                with open(file_name, 'w') as file:
                    file.write(inspect.getsource(forecaster.weight_func))
    else:
        if ((hasattr(forecaster, 'fun_predictors') and forecaster.fun_predictors is not None)
          or (hasattr(forecaster, 'weight_func') and forecaster.weight_func is not None)):
            warnings.warn(
                ("Custom functions used to create predictors or weights are not saved. "
                 "To save them, set `save_custom_functions` to `True`.")
            )

    if verbose:
        forecaster.summary()

`load_forecaster(file_name, verbose=True)` ¶

Load forecaster model using joblib. If the forecaster was saved with custom functions to create predictors or weights, these functions must be available in the environment where the forecaster is going to be loaded.

Parameters:

Name	Type	Description	Default
`file_name`	`str`	Object file name.	required
`verbose`	`bool`	Print summary about the forecaster loaded.	`True`

Returns:

Name	Type	Description
`forecaster`	`Forecaster`	Forecaster created with skforecast library.

Source code in skforecast\utils\utils.py

def load_forecaster(
    file_name: str,
    verbose: bool=True
) -> object:
    """
    Load forecaster model using joblib. If the forecaster was saved with custom
    functions to create predictors or weights, these functions must be available
    in the environment where the forecaster is going to be loaded.

    Parameters
    ----------
    file_name: str
        Object file name.
    verbose: bool, default `True`
        Print summary about the forecaster loaded.

    Returns
    -------
    forecaster: Forecaster
        Forecaster created with skforecast library.

    """

    forecaster = joblib.load(filename=file_name)

    skforecast_v = skforecast.__version__
    forecaster_v = forecaster.skforecast_version

    if forecaster_v != skforecast_v:
        warnings.warn(
            (f"The skforecast version installed in the environment differs "
             f"from the version used to create the forecaster.\n"
             f"    Installed Version  : {skforecast_v}\n"
             f"    Forecaster Version : {forecaster_v}\n"
             f"This may create incompatibilities when using the library."),
             SkforecastVersionWarning
        )

    if verbose:
        forecaster.summary()

    return forecaster

`initialize_lags(forecaster_name, lags)` ¶

Check lags argument input and generate the corresponding numpy ndarray.

Parameters:

Name	Type	Description	Default
`forecaster_name`	`str`	Forecaster name. ForecasterAutoreg, ForecasterAutoregCustom, ForecasterAutoregDirect, ForecasterAutoregMultiSeries, ForecasterAutoregMultiSeriesCustom, ForecasterAutoregMultiVariate.	required
`lags`	`Any`	Lags used as predictors.	required

Returns:

Name	Type	Description
`lags`	`numpy ndarray`	Lags used as predictors.

Source code in skforecast\utils\utils.py

def initialize_lags(
    forecaster_name: str,
    lags: Any
) -> np.ndarray:
    """
    Check lags argument input and generate the corresponding numpy ndarray.

    Parameters
    ----------
    forecaster_name : str
        Forecaster name. ForecasterAutoreg, ForecasterAutoregCustom, 
        ForecasterAutoregDirect, ForecasterAutoregMultiSeries, 
        ForecasterAutoregMultiSeriesCustom, ForecasterAutoregMultiVariate.
    lags : Any
        Lags used as predictors.

    Returns
    -------
    lags : numpy ndarray
        Lags used as predictors.

    """

    if isinstance(lags, int):
        if lags < 1:
            raise ValueError("Minimum value of lags allowed is 1.")
        lags = np.arange(1, lags + 1)

    if isinstance(lags, (list, tuple, range)):
        lags = np.array(lags)

    if isinstance(lags, np.ndarray):
        if lags.ndim != 1:
            raise ValueError("`lags` must be a 1-dimensional array.")
        if lags.size == 0:
            raise ValueError("Argument `lags` must contain at least one value.")
        if not np.issubdtype(lags.dtype, np.integer):
            raise TypeError("All values in `lags` must be integers.")
        if np.any(lags < 1):
            raise ValueError("Minimum value of lags allowed is 1.")
    else:
        if forecaster_name != 'ForecasterAutoregMultiVariate':
            raise TypeError(
                (f"`lags` argument must be an int, 1d numpy ndarray, range, tuple or list. "
                 f"Got {type(lags)}.")
            )
        else:
            raise TypeError(
                ("`lags` argument must be a dict, int, 1d numpy ndarray, range, tuple or list. "
                 f"Got {type(lags)}.")
            )

    return lags

`initialize_weights(forecaster_name, regressor, weight_func, series_weights)` ¶

Check weights arguments, weight_func and series_weights for the different forecasters. Create source_code_weight_func, source code of the custom function(s) used to create weights.

Parameters:

Name	Type	Description	Default
`forecaster_name`	`str`	Forecaster name. ForecasterAutoreg, ForecasterAutoregCustom, ForecasterAutoregDirect, ForecasterAutoregMultiSeries, ForecasterAutoregMultiSeriesCustom, ForecasterAutoregMultiVariate.	required
`regressor`	`regressor or pipeline compatible with the scikit-learn API`	Regressor of the forecaster.	required
`weight_func`	`(Callable, dict)`	Argument `weight_func` of the forecaster.	required
`series_weights`	`dict`	Argument `series_weights` of the forecaster.	required

Returns:

Name	Type	Description
`weight_func`	`(Callable, dict)`	Argument `weight_func` of the forecaster.
`source_code_weight_func`	`(str, dict)`	Argument `source_code_weight_func` of the forecaster.
`series_weights`	`dict`	Argument `series_weights` of the forecaster.

Source code in skforecast\utils\utils.py

def initialize_weights(
    forecaster_name: str,
    regressor: object,
    weight_func: Union[Callable, dict],
    series_weights: dict
) -> Tuple[Union[Callable, dict], Union[str, dict], dict]:
    """
    Check weights arguments, `weight_func` and `series_weights` for the different 
    forecasters. Create `source_code_weight_func`, source code of the custom 
    function(s) used to create weights.

    Parameters
    ----------
    forecaster_name : str
        Forecaster name. ForecasterAutoreg, ForecasterAutoregCustom, 
        ForecasterAutoregDirect, ForecasterAutoregMultiSeries, 
        ForecasterAutoregMultiSeriesCustom, ForecasterAutoregMultiVariate.
    regressor : regressor or pipeline compatible with the scikit-learn API
        Regressor of the forecaster.
    weight_func : Callable, dict
        Argument `weight_func` of the forecaster.
    series_weights : dict
        Argument `series_weights` of the forecaster.

    Returns
    -------
    weight_func : Callable, dict
        Argument `weight_func` of the forecaster.
    source_code_weight_func : str, dict
        Argument `source_code_weight_func` of the forecaster.
    series_weights : dict
        Argument `series_weights` of the forecaster.

    """

    source_code_weight_func = None

    if weight_func is not None:

        if forecaster_name in ['ForecasterAutoregMultiSeries', 
                               'ForecasterAutoregMultiSeriesCustom']:
            if not isinstance(weight_func, (Callable, dict)):
                raise TypeError(
                    (f"Argument `weight_func` must be a Callable or a dict of "
                     f"Callables. Got {type(weight_func)}.")
                )
        elif not isinstance(weight_func, Callable):
            raise TypeError(
                f"Argument `weight_func` must be a Callable. Got {type(weight_func)}."
            )

        if isinstance(weight_func, dict):
            source_code_weight_func = {}
            for key in weight_func:
                source_code_weight_func[key] = inspect.getsource(weight_func[key])
        else:
            source_code_weight_func = inspect.getsource(weight_func)

        if 'sample_weight' not in inspect.signature(regressor.fit).parameters:
            warnings.warn(
                (f"Argument `weight_func` is ignored since regressor {regressor} "
                 f"does not accept `sample_weight` in its `fit` method."),
                 IgnoredArgumentWarning
            )
            weight_func = None
            source_code_weight_func = None

    if series_weights is not None:
        if not isinstance(series_weights, dict):
            raise TypeError(
                (f"Argument `series_weights` must be a dict of floats or ints."
                 f"Got {type(series_weights)}.")
            )
        if 'sample_weight' not in inspect.signature(regressor.fit).parameters:
            warnings.warn(
                (f"Argument `series_weights` is ignored since regressor {regressor} "
                 f"does not accept `sample_weight` in its `fit` method."),
                 IgnoredArgumentWarning
            )
            series_weights = None

    return weight_func, source_code_weight_func, series_weights

`initialize_transformer_series(series_col_names, transformer_series=None)` ¶

Initialize transformer_series_ attribute for the Forecasters Multiseries.

If transformer_series is None, no transformation is applied.
If transformer_series is a scikit-learn transformer (object), the same transformer is applied to all series (series_col_names).
If transformer_series is a dict, a different transformer can be applied to each series. The keys of the dictionary must be the same as the names of the series in series_col_names.

Parameters:

Name	Type	Description	Default
`series_col_names`	`list`	Names of the series (levels) used during training.	required
`transformer_series`	`(object, dict)`	An instance of a transformer (preprocessor) compatible with the scikit-learn preprocessing API with methods: fit, transform, fit_transform and inverse_transform.	`None`

Returns:

Name	Type	Description
`transformer_series_`	`dict`	Dictionary with the transformer for each series. It is created cloning the objects in `transformer_series` and is used internally to avoid overwriting.

Source code in skforecast\utils\utils.py

def initialize_transformer_series(
    series_col_names: list,
    transformer_series: Optional[Union[object, dict]]=None
) -> dict:
    """
    Initialize `transformer_series_` attribute for the Forecasters Multiseries.

    - If `transformer_series` is `None`, no transformation is applied.
    - If `transformer_series` is a scikit-learn transformer (object), the same 
    transformer is applied to all series (`series_col_names`).
    - If `transformer_series` is a `dict`, a different transformer can be
    applied to each series. The keys of the dictionary must be the same as the
    names of the series in `series_col_names`.

    Parameters
    ----------
    series_col_names : list
        Names of the series (levels) used during training.
    transformer_series : object, dict, default `None`
        An instance of a transformer (preprocessor) compatible with the scikit-learn
        preprocessing API with methods: fit, transform, fit_transform and 
        inverse_transform. 

    Returns
    -------
    transformer_series_ : dict
        Dictionary with the transformer for each series. It is created cloning the 
        objects in `transformer_series` and is used internally to avoid overwriting.

    """

    if transformer_series is None:
        transformer_series_ = {serie: None for serie in series_col_names}
    elif not isinstance(transformer_series, dict):
        transformer_series_ = {serie: clone(transformer_series) 
                               for serie in series_col_names}
    else:
        transformer_series_ = {serie: None for serie in series_col_names}
        # Only elements already present in transformer_series_ are updated
        transformer_series_.update(
            (k, v) for k, v in deepcopy(transformer_series).items() 
            if k in transformer_series_
        )
        series_not_in_transformer_series = (
            set(series_col_names) - set(transformer_series.keys())
        )
        if series_not_in_transformer_series:
            warnings.warn(
                (f"{series_not_in_transformer_series} not present in `transformer_series`."
                 f" No transformation is applied to these series."),
                 IgnoredArgumentWarning
            )

    return transformer_series_

`initialize_lags_grid(forecaster, lags_grid=None)` ¶

Initialize lags grid and lags label for model selection.

Parameters:

Name	Type	Description	Default
`forecaster`	`Forecaster`	Forecaster model. ForecasterAutoreg, ForecasterAutoregCustom, ForecasterAutoregDirect, ForecasterAutoregMultiSeries, ForecasterAutoregMultiSeriesCustom, ForecasterAutoregMultiVariate.	required
`lags_grid`	`(list, dict)`	Lists of lags to try, containing int, lists, numpy ndarray, or range objects. If `dict`, the keys are used as labels in the `results` DataFrame, and the values are used as the lists of lags to try. Ignored if the forecaster is an instance of `ForecasterAutoregCustom` or `ForecasterAutoregMultiSeriesCustom`.	`None`

Returns:

Name	Type	Description
`lags_grid`	`dict`	Dictionary with lags configuration for each iteration.
`lags_label`	`str`	Label for lags representation in the results object.

Source code in skforecast\utils\utils.py

def initialize_lags_grid(
    forecaster: object, 
    lags_grid: Optional[Union[list, dict]]=None
) -> Tuple[dict, str]:
    """
    Initialize lags grid and lags label for model selection. 

    Parameters
    ----------
    forecaster : Forecaster
        Forecaster model. ForecasterAutoreg, ForecasterAutoregCustom, 
        ForecasterAutoregDirect, ForecasterAutoregMultiSeries, 
        ForecasterAutoregMultiSeriesCustom, ForecasterAutoregMultiVariate.
    lags_grid : list, dict, default `None`
        Lists of lags to try, containing int, lists, numpy ndarray, or range 
        objects. If `dict`, the keys are used as labels in the `results` 
        DataFrame, and the values are used as the lists of lags to try. Ignored 
        if the forecaster is an instance of `ForecasterAutoregCustom` or 
        `ForecasterAutoregMultiSeriesCustom`.

    Returns
    -------
    lags_grid : dict
        Dictionary with lags configuration for each iteration.
    lags_label : str
        Label for lags representation in the results object.

    """

    if not isinstance(lags_grid, (list, dict, type(None))):
        raise TypeError(
            (f"`lags_grid` argument must be a list, dict or None. "
             f"Got {type(lags_grid)}.")
        )

    if type(forecaster).__name__ in ['ForecasterAutoregCustom', 
                                     'ForecasterAutoregMultiSeriesCustom']:
        if lags_grid is not None:
            warnings.warn(
                (f"`lags_grid` ignored if forecaster is an instance of "
                 f"`{type(forecaster).__name__}`."),
                IgnoredArgumentWarning
            )
        lags_grid = ['custom predictors']

    lags_label = 'values'
    if isinstance(lags_grid, list):
        lags_grid = {f'{lags}': lags for lags in lags_grid}
    elif lags_grid is None:
        lags_grid = {f'{list(forecaster.lags)}': list(forecaster.lags)}
    else:
        lags_label = 'keys'

    return lags_grid, lags_label

`check_select_fit_kwargs(regressor, fit_kwargs=None)` ¶

Check if fit_kwargs is a dict and select only the keys that are used by the fit method of the regressor.

Parameters:

Name	Type	Description	Default
`regressor`	`object`	Regressor object.	required
`fit_kwargs`	`dict`	Dictionary with the arguments to pass to the `fit' method of the forecaster.	`None`

Returns:

Name	Type	Description
`fit_kwargs`	`dict`	Dictionary with the arguments to be passed to the `fit` method of the regressor after removing the unused keys.

Source code in skforecast\utils\utils.py

def check_select_fit_kwargs(
    regressor: object,
    fit_kwargs: Optional[dict]=None
) -> dict:
    """
    Check if `fit_kwargs` is a dict and select only the keys that are used by
    the `fit` method of the regressor.

    Parameters
    ----------
    regressor : object
        Regressor object.
    fit_kwargs : dict, default `None`
        Dictionary with the arguments to pass to the `fit' method of the forecaster.

    Returns
    -------
    fit_kwargs : dict
        Dictionary with the arguments to be passed to the `fit` method of the 
        regressor after removing the unused keys.

    """

    if fit_kwargs is None:
        fit_kwargs = {}
    else:
        if not isinstance(fit_kwargs, dict):
            raise TypeError(
                f"Argument `fit_kwargs` must be a dict. Got {type(fit_kwargs)}."
            )

        # Non used keys
        non_used_keys = [k for k in fit_kwargs.keys()
                         if k not in inspect.signature(regressor.fit).parameters]
        if non_used_keys:
            warnings.warn(
                (f"Argument/s {non_used_keys} ignored since they are not used by the "
                 f"regressor's `fit` method."),
                 IgnoredArgumentWarning
            )

        if 'sample_weight' in fit_kwargs.keys():
            warnings.warn(
                ("The `sample_weight` argument is ignored. Use `weight_func` to pass "
                 "a function that defines the individual weights for each sample "
                 "based on its index."),
                 IgnoredArgumentWarning
            )
            del fit_kwargs['sample_weight']

        # Select only the keyword arguments allowed by the regressor's `fit` method.
        fit_kwargs = {k:v for k, v in fit_kwargs.items()
                      if k in inspect.signature(regressor.fit).parameters}

    return fit_kwargs

check_y(y, series_id='`y`') ¶

Raise Exception if y is not pandas Series or if it has missing values.

Parameters:

Name	Type	Description	Default
`y`	`Any`	Time series values.	required
`series_id`	`str`	Identifier of the series used in the warning message.	'`y`'

Returns:

Type	Description
`None`

Source code in skforecast\utils\utils.py

def check_y(
    y: Any,
    series_id: str="`y`"
) -> None:
    """
    Raise Exception if `y` is not pandas Series or if it has missing values.

    Parameters
    ----------
    y : Any
        Time series values.
    series_id : str, default '`y`'
        Identifier of the series used in the warning message.

    Returns
    -------
    None

    """

    if not isinstance(y, pd.Series):
        raise TypeError(f"{series_id} must be a pandas Series.")

    if y.isnull().any():
        raise ValueError(f"{series_id} has missing values.")

    return

check_exog(exog, allow_nan=True, series_id='`exog`') ¶

Raise Exception if exog is not pandas Series or pandas DataFrame. If allow_nan = True, issue a warning if exog contains NaN values.

Parameters:

Name	Type	Description	Default
`exog`	`Any`	Exogenous variable/s included as predictor/s.	required
`allow_nan`	`bool`	If True, allows the presence of NaN values in `exog`. If False (default), issue a warning if `exog` contains NaN values.	`True`
`series_id`	`str`	Identifier of the series for which the exogenous variable/s are used in the warning message.	'`exog`'

Returns:

Type	Description
`None`

Source code in skforecast\utils\utils.py

def check_exog(
    exog: Any,
    allow_nan: bool=True,
    series_id: str="`exog`"
) -> None:
    """
    Raise Exception if `exog` is not pandas Series or pandas DataFrame.
    If `allow_nan = True`, issue a warning if `exog` contains NaN values.

    Parameters
    ----------
    exog : Any
        Exogenous variable/s included as predictor/s.
    allow_nan : bool, default `True`
        If True, allows the presence of NaN values in `exog`. If False (default),
        issue a warning if `exog` contains NaN values.
    series_id : str, default '`exog`'
        Identifier of the series for which the exogenous variable/s are used
        in the warning message.

    Returns
    -------
    None

    """

    if not isinstance(exog, (pd.Series, pd.DataFrame)):
        raise TypeError(f"{series_id} must be a pandas Series or DataFrame.")

    if isinstance(exog, pd.Series) and exog.name is None:
        raise ValueError(f"When {series_id} is a pandas Series, it must have a name.")

    if not allow_nan:
        if exog.isnull().any().any():
            warnings.warn(
                (f"{series_id} has missing values. Most machine learning models "
                 f"do not allow missing values. Fitting the forecaster may fail."), 
                 MissingValuesWarning
            )

    return

`get_exog_dtypes(exog)` ¶

Store dtypes of exog.

Parameters:

Name	Type	Description	Default
`exog`	`pandas DataFrame, pandas Series`	Exogenous variable/s included as predictor/s.	required

Returns:

Name	Type	Description
`exog_dtypes`	`dict`	Dictionary with the dtypes in `exog`.

Source code in skforecast\utils\utils.py

def get_exog_dtypes(
    exog: Union[pd.DataFrame, pd.Series]
) -> dict:
    """
    Store dtypes of `exog`.

    Parameters
    ----------
    exog : pandas DataFrame, pandas Series
        Exogenous variable/s included as predictor/s.

    Returns
    -------
    exog_dtypes : dict
        Dictionary with the dtypes in `exog`.

    """

    if isinstance(exog, pd.Series):
        exog_dtypes = {exog.name: exog.dtypes}
    else:
        exog_dtypes = exog.dtypes.to_dict()

    return exog_dtypes

check_exog_dtypes(exog, call_check_exog=True, series_id='`exog`') ¶

Raise Exception if exog has categorical columns with non integer values. This is needed when using machine learning regressors that allow categorical features. Issue a Warning if exog has columns that are not init, float, or category.

Parameters:

Name	Type	Description	Default
`exog`	`pandas DataFrame, pandas Series`	Exogenous variable/s included as predictor/s.	required
`call_check_exog`	`bool`	If `True`, call `check_exog` function.	`True`
`series_id`	`str`	Identifier of the series for which the exogenous variable/s are used in the warning message.	'`exog`'

Returns:

Type	Description
`None`

Source code in skforecast\utils\utils.py

def check_exog_dtypes(
    exog: Union[pd.DataFrame, pd.Series],
    call_check_exog: bool=True,
    series_id: str="`exog`"
) -> None:
    """
    Raise Exception if `exog` has categorical columns with non integer values.
    This is needed when using machine learning regressors that allow categorical
    features.
    Issue a Warning if `exog` has columns that are not `init`, `float`, or `category`.

    Parameters
    ----------
    exog : pandas DataFrame, pandas Series
        Exogenous variable/s included as predictor/s.
    call_check_exog : bool, default `True`
        If `True`, call `check_exog` function.
    series_id : str, default '`exog`'
        Identifier of the series for which the exogenous variable/s are used
        in the warning message.

    Returns
    -------
    None

    """

    if call_check_exog:
        check_exog(exog=exog, allow_nan=False, series_id=series_id)

    if isinstance(exog, pd.DataFrame):
        if not exog.select_dtypes(exclude=[np.number, 'category']).columns.empty:
            warnings.warn(
                (f"{series_id} may contain only `int`, `float` or `category` dtypes. "
                 f"Most machine learning models do not allow other types of values. "
                 f"Fitting the forecaster may fail."), 
                 DataTypeWarning
            )
        for col in exog.select_dtypes(include='category'):
            if exog[col].cat.categories.dtype not in [int, np.int32, np.int64]:
                raise TypeError(
                    ("Categorical columns in exog must contain only integer values. "
                     "See skforecast docs for more info about how to include "
                     "categorical features https://skforecast.org/"
                     "latest/user_guides/categorical-features.html")
                )
    else:
        if exog.dtype.name not in ['int', 'int8', 'int16', 'int32', 'int64', 'float', 
        'float16', 'float32', 'float64', 'uint8', 'uint16', 'uint32', 'uint64', 'category']:
            warnings.warn(
                (f"{series_id} may contain only `int`, `float` or `category` dtypes. Most "
                 f"machine learning models do not allow other types of values. "
                 f"Fitting the forecaster may fail."), 
                 DataTypeWarning
            )
        if exog.dtype.name == 'category' and exog.cat.categories.dtype not in [int,
        np.int32, np.int64]:
            raise TypeError(
                ("If exog is of type category, it must contain only integer values. "
                 "See skforecast docs for more info about how to include "
                 "categorical features https://skforecast.org/"
                 "latest/user_guides/categorical-features.html")
            )

    return

`check_interval(interval=None, quantiles=None, alpha=None)` ¶

Check provided confidence interval sequence is valid.

Parameters:

Name	Type	Description	Default
`interval`	`list`	Confidence of the prediction interval estimated. Sequence of percentiles to compute, which must be between 0 and 100 inclusive. For example, interval of 95% should be as `interval = [2.5, 97.5]`.	`None`
`quantiles`	`list`	Sequence of quantiles to compute, which must be between 0 and 1 inclusive. For example, quantiles of 0.05, 0.5 and 0.95 should be as `quantiles = [0.05, 0.5, 0.95]`.	`None`
`alpha`	`float`	The confidence intervals used in ForecasterSarimax are (1 - alpha) %.	`None`

Returns:

Type	Description
`None`

Source code in skforecast\utils\utils.py

def check_interval(
    interval: list=None,
    quantiles: float=None,
    alpha: float=None
) -> None:
    """
    Check provided confidence interval sequence is valid.

    Parameters
    ----------
    interval : list, default `None`
        Confidence of the prediction interval estimated. Sequence of percentiles
        to compute, which must be between 0 and 100 inclusive. For example, 
        interval of 95% should be as `interval = [2.5, 97.5]`.
    quantiles : list, default `None`
        Sequence of quantiles to compute, which must be between 0 and 1 
        inclusive. For example, quantiles of 0.05, 0.5 and 0.95 should be as 
        `quantiles = [0.05, 0.5, 0.95]`.
    alpha : float, default `None`
        The confidence intervals used in ForecasterSarimax are (1 - alpha) %.

    Returns
    -------
    None

    """

    if interval is not None:
        if not isinstance(interval, list):
            raise TypeError(
                ("`interval` must be a `list`. For example, interval of 95% "
                 "should be as `interval = [2.5, 97.5]`.")
            )

        if len(interval) != 2:
            raise ValueError(
                ("`interval` must contain exactly 2 values, respectively the "
                 "lower and upper interval bounds. For example, interval of 95% "
                 "should be as `interval = [2.5, 97.5]`.")
            )

        if (interval[0] < 0.) or (interval[0] >= 100.):
            raise ValueError(
                f"Lower interval bound ({interval[0]}) must be >= 0 and < 100."
            )

        if (interval[1] <= 0.) or (interval[1] > 100.):
            raise ValueError(
                f"Upper interval bound ({interval[1]}) must be > 0 and <= 100."
            )

        if interval[0] >= interval[1]:
            raise ValueError(
                (f"Lower interval bound ({interval[0]}) must be less than the "
                 f"upper interval bound ({interval[1]}).")
            )

    if quantiles is not None:
        if not isinstance(quantiles, list):
            raise TypeError(
                ("`quantiles` must be a `list`. For example, quantiles 0.05, "
                 "0.5, and 0.95 should be as `quantiles = [0.05, 0.5, 0.95]`.")
            )

        for q in quantiles:
            if (q < 0.) or (q > 1.):
                raise ValueError(
                    ("All elements in `quantiles` must be >= 0 and <= 1.")
                )

    if alpha is not None:
        if not isinstance(alpha, float):
            raise TypeError(
                ("`alpha` must be a `float`. For example, interval of 95% "
                 "should be as `alpha = 0.05`.")
            )

        if (alpha <= 0.) or (alpha >= 1):
            raise ValueError(
                f"`alpha` must have a value between 0 and 1. Got {alpha}."
            )

    return

`check_predict_input(forecaster_name, steps, fitted, included_exog, index_type, index_freq, window_size, last_window=None, last_window_exog=None, exog=None, exog_type=None, exog_col_names=None, interval=None, alpha=None, max_steps=None, levels=None, levels_forecaster=None, series_col_names=None)` ¶

Check all inputs of predict method. This is a helper function to validate that inputs used in predict method match attributes of a forecaster already trained.

Parameters:

Name	Type	Description	Default
`forecaster_name`	`str`	Forecaster name. ForecasterAutoreg, ForecasterAutoregCustom, ForecasterAutoregDirect, ForecasterAutoregMultiSeries, ForecasterAutoregMultiSeriesCustom, ForecasterAutoregMultiVariate, ForecasterRnn	required
`steps`	`(int, list)`	Number of future steps predicted.	required
`fitted`	`bool`	Tag to identify if the regressor has been fitted (trained).	required
`included_exog`	`bool`	If the forecaster has been trained using exogenous variable/s.	required
`index_type`	`type`	Type of index of the input used in training.	required
`index_freq`	`str`	Frequency of Index of the input used in training.	required
`window_size`	`int`	Size of the window needed to create the predictors. It is equal to `max_lag`.	required
`last_window`	`pandas Series, pandas DataFrame`	Values of the series used to create the predictors (lags) need in the first iteration of prediction (t + 1).	`None`
`last_window_exog`	`pandas Series, pandas DataFrame`	Values of the exogenous variables aligned with `last_window` in ForecasterSarimax predictions.	`None`
`exog`	`pandas Series, pandas DataFrame`	Exogenous variable/s included as predictor/s.	`None`
`exog_type`	`type`	Type of exogenous variable/s used in training.	`None`
`exog_col_names`	`list`	Names of the exogenous variables used during training.	`None`
`interval`	`list`	Confidence of the prediction interval estimated. Sequence of percentiles to compute, which must be between 0 and 100 inclusive. For example, interval of 95% should be as `interval = [2.5, 97.5]`.	`None`
`alpha`	`float`	The confidence intervals used in ForecasterSarimax are (1 - alpha) %.	`None`
`max_steps`	`Optional[int]`	Maximum number of steps allowed (`ForecasterAutoregDirect` and `ForecasterAutoregMultiVariate`).	`None`
`levels`	`(str, list)`	Time series to be predicted (`ForecasterAutoregMultiSeries`, `ForecasterAutoregMultiSeriesCustom` and `ForecasterRnn).	`None`
`levels_forecaster`	`(str, list)`	Time series used as output data of a multiseries problem in a RNN problem (`ForecasterRnn`).	`None`
`series_col_names`	`list`	Names of the columns used during fit (`ForecasterAutoregMultiSeries`, `ForecasterAutoregMultiSeriesCustom`, `ForecasterAutoregMultiVariate` and `ForecasterRnn`).	`None`

Returns:

Type	Description
`None`

Source code in skforecast\utils\utils.py

def check_predict_input(
    forecaster_name: str,
    steps: Union[int, list],
    fitted: bool,
    included_exog: bool,
    index_type: type,
    index_freq: str,
    window_size: int,
    last_window: Optional[Union[pd.Series, pd.DataFrame]]=None,
    last_window_exog: Optional[Union[pd.Series, pd.DataFrame]]=None,
    exog: Optional[Union[pd.Series, pd.DataFrame]]=None,
    exog_type: Optional[type]=None,
    exog_col_names: Optional[list]=None,
    interval: Optional[list]=None,
    alpha: Optional[float]=None,
    max_steps: Optional[int]=None,
    levels: Optional[Union[str, list]]=None,
    levels_forecaster: Optional[Union[str, list]]=None,
    series_col_names: Optional[list]=None
) -> None:
    """
    Check all inputs of predict method. This is a helper function to validate
    that inputs used in predict method match attributes of a forecaster already
    trained.

    Parameters
    ----------
    forecaster_name : str
        Forecaster name. ForecasterAutoreg, ForecasterAutoregCustom, 
        ForecasterAutoregDirect, ForecasterAutoregMultiSeries, 
        ForecasterAutoregMultiSeriesCustom, ForecasterAutoregMultiVariate,
        ForecasterRnn
    steps : int, list
        Number of future steps predicted.
    fitted: bool
        Tag to identify if the regressor has been fitted (trained).
    included_exog : bool
        If the forecaster has been trained using exogenous variable/s.
    index_type : type
        Type of index of the input used in training.
    index_freq : str
        Frequency of Index of the input used in training.
    window_size: int
        Size of the window needed to create the predictors. It is equal to 
        `max_lag`.
    last_window : pandas Series, pandas DataFrame, default `None`
        Values of the series used to create the predictors (lags) need in the 
        first iteration of prediction (t + 1).
    last_window_exog : pandas Series, pandas DataFrame, default `None`
        Values of the exogenous variables aligned with `last_window` in 
        ForecasterSarimax predictions.
    exog : pandas Series, pandas DataFrame, default `None`
        Exogenous variable/s included as predictor/s.
    exog_type : type, default `None`
        Type of exogenous variable/s used in training.
    exog_col_names : list, default `None`
        Names of the exogenous variables used during training.
    interval : list, default `None`
        Confidence of the prediction interval estimated. Sequence of percentiles
        to compute, which must be between 0 and 100 inclusive. For example, 
        interval of 95% should be as `interval = [2.5, 97.5]`.
    alpha : float, default `None`
        The confidence intervals used in ForecasterSarimax are (1 - alpha) %.
    max_steps: int, default `None`
        Maximum number of steps allowed (`ForecasterAutoregDirect` and 
        `ForecasterAutoregMultiVariate`).
    levels : str, list, default `None`
        Time series to be predicted (`ForecasterAutoregMultiSeries`,
        `ForecasterAutoregMultiSeriesCustom` and `ForecasterRnn).
    levels_forecaster : str, list, default `None`
        Time series used as output data of a multiseries problem in a RNN problem
        (`ForecasterRnn`).
    series_col_names : list, default `None`
        Names of the columns used during fit (`ForecasterAutoregMultiSeries`, 
        `ForecasterAutoregMultiSeriesCustom`, `ForecasterAutoregMultiVariate`
        and `ForecasterRnn`).

    Returns
    -------
    None

    """

    if not fitted:
        raise NotFittedError(
            ("This Forecaster instance is not fitted yet. Call `fit` with "
             "appropriate arguments before using predict.")
        )

    if isinstance(steps, (int, np.integer)) and steps < 1:
        raise ValueError(
            f"`steps` must be an integer greater than or equal to 1. Got {steps}."
        )

    if isinstance(steps, list) and min(steps) < 1:
        raise ValueError(
           (f"The minimum value of `steps` must be equal to or greater than 1. "
            f"Got {min(steps)}.")
        )

    if max_steps is not None:
        if max(steps) > max_steps:
            raise ValueError(
                (f"The maximum value of `steps` must be less than or equal to "
                 f"the value of steps defined when initializing the forecaster. "
                 f"Got {max(steps)}, but the maximum is {max_steps}.")
            )

    if interval is not None or alpha is not None:
        check_interval(interval=interval, alpha=alpha)

    if forecaster_name in ['ForecasterAutoregMultiSeries', 
                           'ForecasterAutoregMultiSeriesCustom',
                           'ForecasterRnn']:
        if not isinstance(levels, (type(None), str, list)):
            raise TypeError(
                ("`levels` must be a `list` of column names, a `str` of a "
                 "column name or `None`.")
            )

        levels_to_check = levels_forecaster if forecaster_name == 'ForecasterRnn' else series_col_names
        if len(set(levels) - set(levels_to_check)) != 0:
            raise ValueError(
                (f"`levels` names must be included in the series used during fit "
                 f"({levels_to_check}). Got {levels}.")
            )

    if exog is None and included_exog:
        raise ValueError(
            ("Forecaster trained with exogenous variable/s. "
             "Same variable/s must be provided when predicting.")
        )

    if exog is not None and not included_exog:
        raise ValueError(
            ("Forecaster trained without exogenous variable/s. "
             "`exog` must be `None` when predicting.")
        )

    # Checks last_window
    # Check last_window type (pd.Series or pd.DataFrame according to forecaster)
    if isinstance(last_window, type(None)) and forecaster_name not in [
        'ForecasterAutoregMultiSeries', 
        'ForecasterAutoregMultiSeriesCustom',
        'ForecasterRnn'
    ]:
        raise ValueError(
            ("`last_window` was not stored during training. If you don't want "
             "to retrain the Forecaster, provide `last_window` as argument.")
        )

    if forecaster_name in ['ForecasterAutoregMultiSeries', 
                           'ForecasterAutoregMultiSeriesCustom',
                           'ForecasterAutoregMultiVariate',
                           'ForecasterRnn']:
        if not isinstance(last_window, pd.DataFrame):
            raise TypeError(
                f"`last_window` must be a pandas DataFrame. Got {type(last_window)}."
            )

        last_window_cols = last_window.columns.to_list()

        if forecaster_name in ['ForecasterAutoregMultiSeries', 
                               'ForecasterAutoregMultiSeriesCustom',
                               'ForecasterRnn'] and \
            len(set(levels) - set(last_window_cols)) != 0:
            raise ValueError(
                (f"`last_window` must contain a column(s) named as the level(s) "
                 f"to be predicted.\n"
                 f"    `levels` : {levels}\n"
                 f"    `last_window` columns : {last_window_cols}")
            )

        if forecaster_name == 'ForecasterAutoregMultiVariate':
            if len(set(series_col_names) - set(last_window_cols)) > 0:
                raise ValueError(
                    (f"`last_window` columns must be the same as the `series` "
                     f"column names used to create the X_train matrix.\n"
                     f"    `last_window` columns    : {last_window_cols}\n"
                     f"    `series` columns X train : {series_col_names}")
                )
    else:
        if not isinstance(last_window, pd.Series):
            raise TypeError(
                f"`last_window` must be a pandas Series. Got {type(last_window)}."
            )

    # Check last_window len, nulls and index (type and freq)
    if len(last_window) < window_size:
        raise ValueError(
            (f"`last_window` must have as many values as needed to "
             f"generate the predictors. For this forecaster it is {window_size}.")
        )
    if last_window.isnull().any().all():
        warnings.warn(
            ("`last_window` has missing values. Most of machine learning models do "
             "not allow missing values. `predict` method may fail."), 
             MissingValuesWarning
        )
    _, last_window_index = preprocess_last_window(
                               last_window   = last_window.iloc[:0],
                               return_values = False
                           ) 
    if not isinstance(last_window_index, index_type):
        raise TypeError(
            (f"Expected index of type {index_type} for `last_window`. "
             f"Got {type(last_window_index)}.")
        )
    if isinstance(last_window_index, pd.DatetimeIndex):
        if not last_window_index.freqstr == index_freq:
            raise TypeError(
                (f"Expected frequency of type {index_freq} for `last_window`. "
                 f"Got {last_window_index.freqstr}.")
            )

    # Checks exog
    if exog is not None:

        # Check type, nulls and expected type
        if forecaster_name in ['ForecasterAutoregMultiSeries', 
                               'ForecasterAutoregMultiSeriesCustom']:
            if not isinstance(exog, (pd.Series, pd.DataFrame, dict)):
                raise TypeError(
                    f"`exog` must be a pandas Series, DataFrame or dict. Got {type(exog)}."
                )

        if not isinstance(exog, exog_type):
            raise TypeError(
                f"Expected type for `exog`: {exog_type}. Got {type(exog)}."
            )

        if isinstance(exog, dict):
            exogs_to_check = [(f"`exog` for series '{k}'", v) 
                              for k, v in exog.items() if v is not None]
        else:
            exogs_to_check = [('`exog`', exog)]

        for exog_name, exog_to_check in exogs_to_check:

            if not isinstance(exog_to_check, (pd.Series, pd.DataFrame)):
                raise TypeError(
                    f"{exog_name} must be a pandas Series or DataFrame. Got {type(exog_to_check)}"
                )

            if exog_to_check.isnull().any().any():
                warnings.warn(
                    (f"{exog_name} has missing values. Most of machine learning models "
                     f"do not allow missing values. `predict` method may fail."), 
                     MissingValuesWarning
                )

            # Check exog has many values as distance to max step predicted
            last_step = max(steps) if isinstance(steps, list) else steps
            if len(exog_to_check) < last_step:
                if forecaster_name in ['ForecasterAutoregMultiSeries', 
                                       'ForecasterAutoregMultiSeriesCustom']:
                    warnings.warn(
                        (f"{exog_name} doesn't have as many values as steps "
                         f"predicted, {last_step}. Missing values are filled "
                         f"with NaN. Most of machine learning models do not "
                         f"allow missing values. `predict` method may fail."),
                         MissingValuesWarning
                    )
                else: 
                    raise ValueError(
                        (f"{exog_name} must have at least as many values as "
                         f"steps predicted, {last_step}.")
                    )

            # Check name/columns are in exog_col_names
            if isinstance(exog_to_check, pd.DataFrame):
                col_missing = set(exog_col_names).difference(set(exog_to_check.columns))
                if col_missing:
                    if forecaster_name in ['ForecasterAutoregMultiSeries', 
                                           'ForecasterAutoregMultiSeriesCustom']:
                        warnings.warn(
                            (f"{col_missing} not present in {exog_name}. All "
                             f"values will be NaN."),
                             MissingExogWarning
                        ) 
                    else:
                        raise ValueError(
                            (f"Missing columns in {exog_name}. Expected {exog_col_names}. "
                             f"Got {exog_to_check.columns.to_list()}.")
                        )
            else:
                if exog_to_check.name is None:
                    raise ValueError(
                        (f"When {exog_name} is a pandas Series, it must have a name. Got None.")
                    )

                if exog_to_check.name not in exog_col_names:
                    if forecaster_name in ['ForecasterAutoregMultiSeries', 
                                           'ForecasterAutoregMultiSeriesCustom']:
                        warnings.warn(
                            (f"'{exog_to_check.name}' was not observed during training. "
                             f"{exog_name} is ignored. Exogenous variables must be one "
                             f"of: {exog_col_names}."),
                             IgnoredArgumentWarning
                        )
                    else:
                        raise ValueError(
                            (f"'{exog_to_check.name}' was not observed during training. "
                             f"Exogenous variables must be: {exog_col_names}.")
                        )

            # Check index dtype and freq
            _, exog_index = preprocess_exog(
                                exog          = exog_to_check.iloc[:0, ],
                                return_values = False
                            )
            if not isinstance(exog_index, index_type):
                raise TypeError(
                    (f"Expected index of type {index_type} for {exog_name}. "
                     f"Got {type(exog_index)}.")
                )
            if forecaster_name not in ['ForecasterAutoregMultiSeries', 
                                       'ForecasterAutoregMultiSeriesCustom']:
                if isinstance(exog_index, pd.DatetimeIndex):
                    if not exog_index.freqstr == index_freq:
                        raise TypeError(
                            (f"Expected frequency of type {index_freq} for {exog_name}. "
                             f"Got {exog_index.freqstr}.")
                        )

            # Check exog starts one step ahead of last_window end.
            expected_index = expand_index(last_window.index, 1)[0]
            if expected_index != exog_to_check.index[0]:
                if forecaster_name in ['ForecasterAutoregMultiSeries', 
                                       'ForecasterAutoregMultiSeriesCustom']:
                    warnings.warn(
                        (f"To make predictions {exog_name} must start one step "
                         f"ahead of `last_window`. Missing values are filled "
                         f"with NaN.\n"
                         f"    `last_window` ends at : {last_window.index[-1]}.\n"
                         f"    {exog_name} starts at : {exog_to_check.index[0]}.\n"
                         f"     Expected index       : {expected_index}."),
                         MissingValuesWarning
                    )  
                else:
                    raise ValueError(
                        (f"To make predictions {exog_name} must start one step "
                         f"ahead of `last_window`.\n"
                         f"    `last_window` ends at : {last_window.index[-1]}.\n"
                         f"    {exog_name} starts at : {exog_to_check.index[0]}.\n"
                         f"     Expected index : {expected_index}.")
                    )

    # Checks ForecasterSarimax
    if forecaster_name == 'ForecasterSarimax':
        # Check last_window_exog type, len, nulls and index (type and freq)
        if last_window_exog is not None:
            if not included_exog:
                raise ValueError(
                    ("Forecaster trained without exogenous variable/s. "
                     "`last_window_exog` must be `None` when predicting.")
                )

            if not isinstance(last_window_exog, (pd.Series, pd.DataFrame)):
                raise TypeError(
                    (f"`last_window_exog` must be a pandas Series or a "
                     f"pandas DataFrame. Got {type(last_window_exog)}.")
                )
            if len(last_window_exog) < window_size:
                raise ValueError(
                    (f"`last_window_exog` must have as many values as needed to "
                     f"generate the predictors. For this forecaster it is {window_size}.")
                )
            if last_window_exog.isnull().any().all():
                warnings.warn(
                ("`last_window_exog` has missing values. Most of machine learning "
                 "models do not allow missing values. `predict` method may fail."),
                MissingValuesWarning
            )
            _, last_window_exog_index = preprocess_last_window(
                                            last_window   = last_window_exog.iloc[:0],
                                            return_values = False
                                        ) 
            if not isinstance(last_window_exog_index, index_type):
                raise TypeError(
                    (f"Expected index of type {index_type} for `last_window_exog`. "
                     f"Got {type(last_window_exog_index)}.")
                )
            if isinstance(last_window_exog_index, pd.DatetimeIndex):
                if not last_window_exog_index.freqstr == index_freq:
                    raise TypeError(
                        (f"Expected frequency of type {index_freq} for "
                         f"`last_window_exog`. Got {last_window_exog_index.freqstr}.")
                    )

            # Check all columns are in the pd.DataFrame, last_window_exog
            if isinstance(last_window_exog, pd.DataFrame):
                col_missing = set(exog_col_names).difference(set(last_window_exog.columns))
                if col_missing:
                    raise ValueError(
                        (f"Missing columns in `last_window_exog`. Expected {exog_col_names}. "
                         f"Got {last_window_exog.columns.to_list()}.") 
                    )
            else:
                if last_window_exog.name is None:
                    raise ValueError(
                        (
                            "When `last_window_exog` is a pandas Series, it must have a "
                            "name. Got None."
                        )
                    )

                if last_window_exog.name not in exog_col_names:
                    raise ValueError(
                        (f"'{last_window_exog.name}' was not observed during training. "
                         f"Exogenous variables must be: {exog_col_names}.")
                    )

    return

`preprocess_y(y, return_values=True)` ¶

Return values and index of series separately. Index is overwritten according to the next rules:

If index is of type DatetimeIndex and has frequency, nothing is changed.
If index is of type RangeIndex, nothing is changed.
If index is of type DatetimeIndex but has no frequency, a RangeIndex is created.
If index is not of type DatetimeIndex, a RangeIndex is created.

Parameters:

Name	Type	Description	Default
`y`	`pandas Series, pandas DataFrame`	Time series.	required
`return_values`	`bool`	If `True` return the values of `y` as numpy ndarray. This option is intended to avoid copying data when it is not necessary.	`True`

Returns:

Name	Type	Description
`y_values`	`None, numpy ndarray`	Numpy array with values of `y`.
`y_index`	`pandas Index`	Index of `y` modified according to the rules.

Source code in skforecast\utils\utils.py

def preprocess_y(
    y: Union[pd.Series, pd.DataFrame],
    return_values: bool=True
) -> Tuple[Union[None, np.ndarray], pd.Index]:
    """
    Return values and index of series separately. Index is overwritten 
    according to the next rules:

    - If index is of type `DatetimeIndex` and has frequency, nothing is 
    changed.
    - If index is of type `RangeIndex`, nothing is changed.
    - If index is of type `DatetimeIndex` but has no frequency, a 
    `RangeIndex` is created.
    - If index is not of type `DatetimeIndex`, a `RangeIndex` is created.

    Parameters
    ----------
    y : pandas Series, pandas DataFrame
        Time series.
    return_values : bool, default `True`
        If `True` return the values of `y` as numpy ndarray. This option is 
        intended to avoid copying data when it is not necessary.

    Returns
    -------
    y_values : None, numpy ndarray
        Numpy array with values of `y`.
    y_index : pandas Index
        Index of `y` modified according to the rules.

    """

    if isinstance(y.index, pd.DatetimeIndex) and y.index.freq is not None:
        y_index = y.index
    elif isinstance(y.index, pd.RangeIndex):
        y_index = y.index
    elif isinstance(y.index, pd.DatetimeIndex) and y.index.freq is None:
        warnings.warn(
            ("Series has DatetimeIndex index but no frequency. "
             "Index is overwritten with a RangeIndex of step 1.")
        )
        y_index = pd.RangeIndex(
                      start = 0,
                      stop  = len(y),
                      step  = 1
                  )
    else:
        warnings.warn(
            ("Series has no DatetimeIndex nor RangeIndex index. "
             "Index is overwritten with a RangeIndex.")
        )
        y_index = pd.RangeIndex(
                      start = 0,
                      stop  = len(y),
                      step  = 1
                  )

    y_values = y.to_numpy(copy=True) if return_values else None

    return y_values, y_index

`preprocess_last_window(last_window, return_values=True)` ¶

Return values and index of series separately. Index is overwritten according to the next rules:

If index is of type DatetimeIndex and has frequency, nothing is changed.
If index is of type RangeIndex, nothing is changed.
If index is of type DatetimeIndex but has no frequency, a RangeIndex is created.
If index is not of type DatetimeIndex, a RangeIndex is created.

Parameters:

Name	Type	Description	Default
`last_window`	`pandas Series, pandas DataFrame`	Time series values.	required
`return_values`	`bool`	If `True` return the values of `last_window` as numpy ndarray. This option is intended to avoid copying data when it is not necessary.	`True`

Returns:

Name	Type	Description
`last_window_values`	`numpy ndarray`	Numpy array with values of `last_window`.
`last_window_index`	`pandas Index`	Index of `last_window` modified according to the rules.

Source code in skforecast\utils\utils.py

def preprocess_last_window(
    last_window: Union[pd.Series, pd.DataFrame],
    return_values: bool=True
 ) -> Tuple[np.ndarray, pd.Index]:
    """
    Return values and index of series separately. Index is overwritten 
    according to the next rules:

    - If index is of type `DatetimeIndex` and has frequency, nothing is 
    changed.
    - If index is of type `RangeIndex`, nothing is changed.
    - If index is of type `DatetimeIndex` but has no frequency, a 
    `RangeIndex` is created.
    - If index is not of type `DatetimeIndex`, a `RangeIndex` is created.

    Parameters
    ----------
    last_window : pandas Series, pandas DataFrame
        Time series values.
    return_values : bool, default `True`
        If `True` return the values of `last_window` as numpy ndarray. This option 
        is intended to avoid copying data when it is not necessary.

    Returns
    -------
    last_window_values : numpy ndarray
        Numpy array with values of `last_window`.
    last_window_index : pandas Index
        Index of `last_window` modified according to the rules.

    """

    if isinstance(last_window.index, pd.DatetimeIndex) and last_window.index.freq is not None:
        last_window_index = last_window.index
    elif isinstance(last_window.index, pd.RangeIndex):
        last_window_index = last_window.index
    elif isinstance(last_window.index, pd.DatetimeIndex) and last_window.index.freq is None:
        warnings.warn(
            ("`last_window` has DatetimeIndex index but no frequency. "
             "Index is overwritten with a RangeIndex of step 1.")
        )
        last_window_index = pd.RangeIndex(
                                start = 0,
                                stop  = len(last_window),
                                step  = 1
                            )
    else:
        warnings.warn(
            ("`last_window` has no DatetimeIndex nor RangeIndex index. "
             "Index is overwritten with a RangeIndex.")
        )
        last_window_index = pd.RangeIndex(
                                start = 0,
                                stop  = len(last_window),
                                step  = 1
                            )

    last_window_values = last_window.to_numpy(copy=True) if return_values else None

    return last_window_values, last_window_index

`preprocess_exog(exog, return_values=True)` ¶

Return values and index of series or data frame separately. Index is overwritten according to the next rules:

If index is of type DatetimeIndex and has frequency, nothing is changed.
If index is of type RangeIndex, nothing is changed.
If index is of type DatetimeIndex but has no frequency, a RangeIndex is created.
If index is not of type DatetimeIndex, a RangeIndex is created.

Parameters:

Name	Type	Description	Default
`exog`	`pandas Series, pandas DataFrame`	Exogenous variables.	required
`return_values`	`bool`	If `True` return the values of `exog` as numpy ndarray. This option is intended to avoid copying data when it is not necessary.	`True`

Returns:

Name	Type	Description
`exog_values`	`None, numpy ndarray`	Numpy array with values of `exog`.
`exog_index`	`pandas Index`	Index of `exog` modified according to the rules.

Source code in skforecast\utils\utils.py

def preprocess_exog(
    exog: Union[pd.Series, pd.DataFrame],
    return_values: bool=True
) -> Tuple[Union[None, np.ndarray], pd.Index]:
    """
    Return values and index of series or data frame separately. Index is
    overwritten  according to the next rules:

    - If index is of type `DatetimeIndex` and has frequency, nothing is 
    changed.
    - If index is of type `RangeIndex`, nothing is changed.
    - If index is of type `DatetimeIndex` but has no frequency, a 
    `RangeIndex` is created.
    - If index is not of type `DatetimeIndex`, a `RangeIndex` is created.

    Parameters
    ----------
    exog : pandas Series, pandas DataFrame
        Exogenous variables.
    return_values : bool, default `True`
        If `True` return the values of `exog` as numpy ndarray. This option is 
        intended to avoid copying data when it is not necessary.

    Returns
    -------
    exog_values : None, numpy ndarray
        Numpy array with values of `exog`.
    exog_index : pandas Index
        Index of `exog` modified according to the rules.

    """

    if isinstance(exog.index, pd.DatetimeIndex) and exog.index.freq is not None:
        exog_index = exog.index
    elif isinstance(exog.index, pd.RangeIndex):
        exog_index = exog.index
    elif isinstance(exog.index, pd.DatetimeIndex) and exog.index.freq is None:
        warnings.warn(
            ("`exog` has DatetimeIndex index but no frequency. "
             "Index is overwritten with a RangeIndex of step 1.")
        )
        exog_index = pd.RangeIndex(
                         start = 0,
                         stop  = len(exog),
                         step  = 1
                     )

    else:
        warnings.warn(
            ("`exog` has no DatetimeIndex nor RangeIndex index. "
             "Index is overwritten with a RangeIndex.")
        )
        exog_index = pd.RangeIndex(
                         start = 0,
                         stop  = len(exog),
                         step  = 1
                     )

    exog_values = exog.to_numpy(copy=True) if return_values else None

    return exog_values, exog_index

`cast_exog_dtypes(exog, exog_dtypes)` ¶

Cast exog to a specified types. This is done because, for a forecaster to accept a categorical exog, it must contain only integer values. Due to the internal modifications of numpy, the values may be casted to float, so they have to be re-converted to int.

If exog is a pandas Series, exog_dtypes must be a dict with a single value.
If exog_dtypes is category but the current type of exog is float, then the type is cast to int and then to category.

Parameters:

Name	Type	Description	Default
`exog`	`pandas Series, pandas DataFrame`	Exogenous variables.	required
`exog_dtypes`	`dict`	Dictionary with name and type of the series or data frame columns.	required

Returns:

Name	Type	Description
`exog`	`pandas Series, pandas DataFrame`	Exogenous variables casted to the indicated dtypes.

Source code in skforecast\utils\utils.py

def cast_exog_dtypes(
    exog: Union[pd.Series, pd.DataFrame],
    exog_dtypes: dict,
) -> Union[pd.Series, pd.DataFrame]: # pragma: no cover
    """
    Cast `exog` to a specified types. This is done because, for a forecaster to 
    accept a categorical exog, it must contain only integer values. Due to the 
    internal modifications of numpy, the values may be casted to `float`, so 
    they have to be re-converted to `int`.

    - If `exog` is a pandas Series, `exog_dtypes` must be a dict with a 
    single value.
    - If `exog_dtypes` is `category` but the current type of `exog` is `float`, 
    then the type is cast to `int` and then to `category`. 

    Parameters
    ----------
    exog : pandas Series, pandas DataFrame
        Exogenous variables.
    exog_dtypes: dict
        Dictionary with name and type of the series or data frame columns.

    Returns
    -------
    exog : pandas Series, pandas DataFrame
        Exogenous variables casted to the indicated dtypes.

    """

    # Remove keys from exog_dtypes not in exog.columns
    exog_dtypes = {k:v for k, v in exog_dtypes.items() if k in exog.columns}

    if isinstance(exog, pd.Series) and exog.dtypes != list(exog_dtypes.values())[0]:
        exog = exog.astype(list(exog_dtypes.values())[0])
    elif isinstance(exog, pd.DataFrame):
        for col, initial_dtype in exog_dtypes.items():
            if exog[col].dtypes != initial_dtype:
                if initial_dtype == "category" and exog[col].dtypes==float:
                    exog[col] = exog[col].astype(int).astype("category")
                else:
                    exog[col] = exog[col].astype(initial_dtype)

    return exog

`exog_to_direct(exog, steps)` ¶

Transforms exog to a pandas DataFrame with the shape needed for Direct forecasting.

Parameters:

Name	Type	Description	Default
`exog`	`pandas Series, pandas DataFrame`	Exogenous variables.	required
`steps`	`int.`	Number of steps that will be predicted using exog.	required

Returns:

Name	Type	Description
`exog_transformed`	`pandas DataFrame`	Exogenous variables transformed.

Source code in skforecast\utils\utils.py

def exog_to_direct(
    exog: Union[pd.Series, pd.DataFrame],
    steps: int
)-> pd.DataFrame:
    """
    Transforms `exog` to a pandas DataFrame with the shape needed for Direct
    forecasting.

    Parameters
    ----------
    exog : pandas Series, pandas DataFrame
        Exogenous variables.
    steps : int.
        Number of steps that will be predicted using exog.

    Returns
    -------
    exog_transformed : pandas DataFrame
        Exogenous variables transformed.

    """

    if not isinstance(exog, (pd.Series, pd.DataFrame)):
        raise TypeError(f"`exog` must be a pandas Series or DataFrame. Got {type(exog)}.")

    if isinstance(exog, pd.Series):
        exog = exog.to_frame()

    n_rows = len(exog)
    exog_idx = exog.index
    exog_transformed = []

    for i in range(steps):
        exog_column_transformed = exog.iloc[i : n_rows - (steps - 1 - i), ]
        exog_column_transformed.index = pd.RangeIndex(len(exog_column_transformed))
        exog_column_transformed.columns = [f"{col}_step_{i+1}" 
                                           for col in exog_column_transformed.columns]
        exog_transformed.append(exog_column_transformed)

    if len(exog_transformed) > 1:
        exog_transformed = pd.concat(exog_transformed, axis=1, copy=False)
    else:
        exog_transformed = exog_column_transformed

    exog_transformed.index = exog_idx[-len(exog_transformed):]

    return exog_transformed

`exog_to_direct_numpy(exog, steps)` ¶

Transforms exog to numpy ndarray with the shape needed for Direct forecasting.

Parameters:

Name	Type	Description	Default
`exog`	`numpy ndarray, shape(samples,)`	Exogenous variables.	required
`steps`	`int.`	Number of steps that will be predicted using exog.	required

Returns:

Name	Type	Description
`exog_transformed`	`numpy ndarray`	Exogenous variables transformed.

Source code in skforecast\utils\utils.py

def exog_to_direct_numpy(
    exog: np.ndarray,
    steps: int
)-> np.ndarray:
    """
    Transforms `exog` to numpy ndarray with the shape needed for Direct
    forecasting.

    Parameters
    ----------
    exog : numpy ndarray, shape(samples,)
        Exogenous variables.
    steps : int.
        Number of steps that will be predicted using exog.

    Returns
    -------
    exog_transformed : numpy ndarray
        Exogenous variables transformed.

    """

    if not isinstance(exog, np.ndarray):
        raise TypeError(f"`exog` must be a numpy ndarray. Got {type(exog)}.")

    if exog.ndim == 1:
        exog = np.expand_dims(exog, axis=1)

    n_rows = len(exog)
    exog_transformed = []

    for i in range(steps):
        exog_column_transformed = exog[i : n_rows - (steps - 1 - i)]
        exog_transformed.append(exog_column_transformed)

    if len(exog_transformed) > 1:
        exog_transformed = np.concatenate(exog_transformed, axis=1)
    else:
        exog_transformed = exog_column_transformed.copy()

    return exog_transformed

`expand_index(index, steps)` ¶

Create a new index of length steps starting at the end of the index.

Parameters:

Name	Type	Description	Default
`index`	`pandas Index, None`	Original index.	required
`steps`	`int`	Number of steps to expand.	required

Returns:

Name	Type	Description
`new_index`	`pandas Index`	New index.

Source code in skforecast\utils\utils.py

def expand_index(
    index: Union[pd.Index, None], 
    steps: int
) -> pd.Index:
    """
    Create a new index of length `steps` starting at the end of the index.

    Parameters
    ----------
    index : pandas Index, None
        Original index.
    steps : int
        Number of steps to expand.

    Returns
    -------
    new_index : pandas Index
        New index.

    """

    if isinstance(index, pd.Index):

        if isinstance(index, pd.DatetimeIndex):
            new_index = pd.date_range(
                            start   = index[-1] + index.freq,
                            periods = steps,
                            freq    = index.freq
                        )
        elif isinstance(index, pd.RangeIndex):
            new_index = pd.RangeIndex(
                            start = index[-1] + 1,
                            stop  = index[-1] + 1 + steps
                        )
        else:
            raise TypeError(
                "Argument `index` must be a pandas DatetimeIndex or RangeIndex."
            )
    else:
        new_index = pd.RangeIndex(
                        start = 0,
                        stop  = steps
                    )

    return new_index

`transform_series(series, transformer, fit=False, inverse_transform=False)` ¶

Transform raw values of pandas Series with a scikit-learn alike transformer (preprocessor). The transformer used must have the following methods: fit, transform, fit_transform and inverse_transform. ColumnTransformers are not allowed since they do not have inverse_transform method.

Parameters:

Name	Type	Description	Default
`series`	`pandas Series`	Series to be transformed.	required
`transformer`	`scikit-learn alike transformer (preprocessor).`	scikit-learn alike transformer (preprocessor) with methods: fit, transform, fit_transform and inverse_transform. ColumnTransformers are not allowed since they do not have inverse_transform method.	required
`fit`	`bool`	Train the transformer before applying it.	`False`
`inverse_transform`	`bool`	Transform back the data to the original representation.	`False`

Returns:

Name	Type	Description
`series_transformed`	`pandas Series, pandas DataFrame`	Transformed Series. Depending on the transformer used, the output may be a Series or a DataFrame.

Source code in skforecast\utils\utils.py

def transform_series(
    series: pd.Series,
    transformer,
    fit: bool=False,
    inverse_transform: bool=False
) -> Union[pd.Series, pd.DataFrame]:
    """      
    Transform raw values of pandas Series with a scikit-learn alike transformer
    (preprocessor). The transformer used must have the following methods: fit, 
    transform, fit_transform and inverse_transform. ColumnTransformers are not 
    allowed since they do not have inverse_transform method.

    Parameters
    ----------
    series : pandas Series
        Series to be transformed.
    transformer : scikit-learn alike transformer (preprocessor).
        scikit-learn alike transformer (preprocessor) with methods: fit, transform,
        fit_transform and inverse_transform. ColumnTransformers are not allowed 
        since they do not have inverse_transform method.
    fit : bool, default `False`
        Train the transformer before applying it.
    inverse_transform : bool, default `False`
        Transform back the data to the original representation.

    Returns
    -------
    series_transformed : pandas Series, pandas DataFrame
        Transformed Series. Depending on the transformer used, the output may 
        be a Series or a DataFrame.

    """

    if not isinstance(series, pd.Series):
        raise TypeError(
            (f"`series` argument must be a pandas Series. Got {type(series)}.")
        )

    if transformer is None:
        return series

    if series.name is None:
        series.name = 'no_name'

    data = series.to_frame()

    if fit and hasattr(transformer, 'fit'):
        transformer.fit(data)

    # If argument feature_names_in_ exits, is overwritten to allow using the 
    # transformer on other series than those that were passed during fit.
    if hasattr(transformer, 'feature_names_in_') and transformer.feature_names_in_[0] != data.columns[0]:
        transformer = deepcopy(transformer)
        transformer.feature_names_in_ = np.array([data.columns[0]], dtype=object)

    if inverse_transform:
        values_transformed = transformer.inverse_transform(data)
    else:
        values_transformed = transformer.transform(data)   

    if hasattr(values_transformed, 'toarray'):
        # If the returned values are in sparse matrix format, it is converted to dense array.
        values_transformed = values_transformed.toarray()

    if isinstance(values_transformed, np.ndarray) and values_transformed.shape[1] == 1:
        series_transformed = pd.Series(
                                 data  = values_transformed.flatten(),
                                 index = data.index,
                                 name  = data.columns[0]
                             )
    elif isinstance(values_transformed, pd.DataFrame) and values_transformed.shape[1] == 1:
        series_transformed = values_transformed.squeeze()
    else:
        series_transformed = pd.DataFrame(
                                 data    = values_transformed,
                                 index   = data.index,
                                 columns = transformer.get_feature_names_out()
                             )

    return series_transformed

`transform_dataframe(df, transformer, fit=False, inverse_transform=False)` ¶

Transform raw values of pandas DataFrame with a scikit-learn alike transformer, preprocessor or ColumnTransformer. inverse_transform is not available when using ColumnTransformers.

Parameters:

Name	Type	Description	Default
`df`	`pandas DataFrame`	DataFrame to be transformed.	required
`transformer`	`scikit-learn alike transformer, preprocessor or ColumnTransformer.`	scikit-learn alike transformer, preprocessor or ColumnTransformer.	required
`fit`	`bool`	Train the transformer before applying it.	`False`
`inverse_transform`	`bool`	Transform back the data to the original representation. This is not available when using transformers of class scikit-learn ColumnTransformers.	`False`

Returns:

Name	Type	Description
`df_transformed`	`pandas DataFrame`	Transformed DataFrame.

Source code in skforecast\utils\utils.py

def transform_dataframe(
    df: pd.DataFrame,
    transformer,
    fit: bool=False,
    inverse_transform: bool=False
) -> pd.DataFrame:
    """      
    Transform raw values of pandas DataFrame with a scikit-learn alike
    transformer, preprocessor or ColumnTransformer. `inverse_transform` is not 
    available when using ColumnTransformers.

    Parameters
    ----------
    df : pandas DataFrame
        DataFrame to be transformed.
    transformer : scikit-learn alike transformer, preprocessor or ColumnTransformer.
        scikit-learn alike transformer, preprocessor or ColumnTransformer.
    fit : bool, default `False`
        Train the transformer before applying it.
    inverse_transform : bool, default `False`
        Transform back the data to the original representation. This is not available
        when using transformers of class scikit-learn ColumnTransformers.

    Returns
    -------
    df_transformed : pandas DataFrame
        Transformed DataFrame.

    """

    if not isinstance(df, pd.DataFrame):
        raise TypeError(
            f"`df` argument must be a pandas DataFrame. Got {type(df)}"
        )

    if transformer is None:
        return df

    if inverse_transform and isinstance(transformer, ColumnTransformer):
        raise Exception(
            "`inverse_transform` is not available when using ColumnTransformers."
        )

    if not inverse_transform:
        if fit:
            values_transformed = transformer.fit_transform(df)
        else:
            values_transformed = transformer.transform(df)
    else:
        values_transformed = transformer.inverse_transform(df)

    if hasattr(values_transformed, 'toarray'):
        # If the returned values are in sparse matrix format, it is converted to dense
        values_transformed = values_transformed.toarray()

    if hasattr(transformer, 'get_feature_names_out'):
        feature_names_out = transformer.get_feature_names_out()
    elif hasattr(transformer, 'categories_'):   
        feature_names_out = transformer.categories_
    else:
        feature_names_out = df.columns

    df_transformed = pd.DataFrame(
                         data    = values_transformed,
                         index   = df.index,
                         columns = feature_names_out
                     )

    return df_transformed

`check_optional_dependency(package_name)` ¶

Check if an optional dependency is installed, if not raise an ImportError
with installation instructions.

Parameters:

Name	Type	Description	Default
`package_name`	`str`	Name of the package to check.	required

Returns:

Type	Description
`None`

Source code in skforecast\utils\utils.py

def check_optional_dependency(
    package_name: str
) -> None:
    """
    Check if an optional dependency is installed, if not raise an ImportError  
    with installation instructions.

    Parameters
    ----------
    package_name : str
        Name of the package to check.

    Returns
    -------
    None

    """

    if importlib.util.find_spec(package_name) is None:
        try:
            extra, package_version = _find_optional_dependency(package_name=package_name)
            msg = (
                f"\n'{package_name}' is an optional dependency not included in the default "
                f"skforecast installation. Please run: `pip install \"{package_version}\"` to install it."
                f"\n\nAlternately, you can install it by running `pip install skforecast[{extra}]`"
            )
        except:
            msg = f"\n'{package_name}' is needed but not installed. Please install it."

        raise ImportError(msg)

`multivariate_time_series_corr(time_series, other, lags, method='pearson')` ¶

Compute correlation between a time_series and the lagged values of other time series.

Parameters:

Name	Type	Description	Default
`time_series`	`pandas Series`	Target time series.	required
`other`	`pandas DataFrame`	Time series whose lagged values are correlated to `time_series`.	required
`lags`	`int, list, numpy ndarray`	Lags to be included in the correlation analysis.	required
`method`	`str`	'pearson': standard correlation coefficient. 'kendall': Kendall Tau correlation coefficient. 'spearman': Spearman rank correlation.	`'pearson'`

Returns:

Name	Type	Description
`corr`	`pandas DataFrame`	Correlation values.

Source code in skforecast\utils\utils.py

def multivariate_time_series_corr(
    time_series: pd.Series,
    other: pd.DataFrame,
    lags: Union[int, list, np.array],
    method: str='pearson'
)-> pd.DataFrame:
    """
    Compute correlation between a time_series and the lagged values of other 
    time series. 

    Parameters
    ----------
    time_series : pandas Series
        Target time series.
    other : pandas DataFrame
        Time series whose lagged values are correlated to `time_series`.
    lags : int, list, numpy ndarray
        Lags to be included in the correlation analysis.
    method : str, default 'pearson'
        - 'pearson': standard correlation coefficient.
        - 'kendall': Kendall Tau correlation coefficient.
        - 'spearman': Spearman rank correlation.

    Returns
    -------
    corr : pandas DataFrame
        Correlation values.

    """

    if not len(time_series) == len(other):
        raise ValueError("`time_series` and `other` must have the same length.")

    if not (time_series.index == other.index).all():
        raise ValueError("`time_series` and `other` must have the same index.")

    if isinstance(lags, int):
        lags = range(lags)

    corr = {}
    for col in other.columns:
        lag_values = {}
        for lag in lags:
            lag_values[lag] = other[col].shift(lag)

        lag_values = pd.DataFrame(lag_values)
        lag_values.insert(0, None, time_series)
        corr[col] = lag_values.corr(method=method).iloc[1:, 0]

    corr = pd.DataFrame(corr)
    corr.index = corr.index.astype('int64')
    corr.index.name = "lag"

    return corr

`check_backtesting_input(forecaster, steps, metric, y=None, series=None, exog=None, initial_train_size=None, fixed_train_size=True, gap=0, allow_incomplete_fold=True, refit=False, interval=None, alpha=None, n_boot=500, random_state=123, in_sample_residuals=True, n_jobs='auto', verbose=False, show_progress=True, suppress_warnings=False)` ¶

This is a helper function to check most inputs of backtesting functions in modules model_selection, model_selection_multiseries and model_selection_sarimax.

Parameters:

Name	Type	Description	Default
`forecaster`	`Forecaster`	Forecaster model.	required
`steps`	`(int, list)`	Number of future steps predicted.	required
`metric`	`(str, Callable, list)`	Metric used to quantify the goodness of fit of the model.	required
`y`	`pandas Series`	Training time series for uni-series forecasters.	`None`
`series`	`pandas DataFrame, dict`	Training time series for multi-series forecasters.	`None`
`exog`	`pandas Series, pandas DataFrame, dict`	Exogenous variables.	`None`
`initial_train_size`	`int`	Number of samples in the initial train split. If `None` and `forecaster` is already trained, no initial train is done and all data is used to evaluate the model.	`None`
`fixed_train_size`	`bool`	If True, train size doesn't increase but moves by `steps` in each iteration.	`True`
`gap`	`int`	Number of samples to be excluded after the end of each training set and before the test set.	`0`
`allow_incomplete_fold`	`bool`	Last fold is allowed to have a smaller number of samples than the `test_size`. If `False`, the last fold is excluded.	`True`
`refit`	`(bool, int)`	Whether to re-fit the forecaster in each iteration. If `refit` is an integer, the Forecaster will be trained every that number of iterations.	`False`
`interval`	`list`	Confidence of the prediction interval estimated. Sequence of percentiles to compute, which must be between 0 and 100 inclusive.	`None`
`alpha`	`float`	The confidence intervals used in ForecasterSarimax are (1 - alpha) %.	`None`
`n_boot`	`int`	Number of bootstrapping iterations used to estimate prediction intervals.	`500`
`random_state`	`int`	Sets a seed to the random generator, so that boot intervals are always deterministic.	`123`
`in_sample_residuals`	`bool`	If `True`, residuals from the training data are used as proxy of prediction error to create prediction intervals. If `False`, out_sample_residuals are used if they are already stored inside the forecaster.	`True`
`n_jobs`	`(int, auto)`	The number of jobs to run in parallel. If `-1`, then the number of jobs is set to the number of cores. If 'auto', `n_jobs` is set using the fuction skforecast.utils.select_n_jobs_fit_forecaster. New in version 0.9.0	`'auto'`
`verbose`	`bool`	Print number of folds and index of training and validation sets used for backtesting.	`False`
`show_progress`	`bool`	Whether to show a progress bar.	`True`
`suppress_warnings`	`bool`	If `True`, skforecast warnings will be suppressed during the backtesting process. See skforecast.exceptions.warn_skforecast_categories for more information.	`False`

Returns:

Type	Description
`None`

Source code in skforecast\utils\utils.py

def check_backtesting_input(
    forecaster: object,
    steps: int,
    metric: Union[str, Callable, list],
    y: Optional[pd.Series]=None,
    series: Optional[Union[pd.DataFrame, dict]]=None,
    exog: Optional[Union[pd.Series, pd.DataFrame, dict]]=None,
    initial_train_size: Optional[int]=None,
    fixed_train_size: bool=True,
    gap: int=0,
    allow_incomplete_fold: bool=True,
    refit: Union[bool, int]=False,
    interval: Optional[list]=None,
    alpha: Optional[float]=None,
    n_boot: int=500,
    random_state: int=123,
    in_sample_residuals: bool=True,
    n_jobs: Union[int, str]='auto',
    verbose: bool=False,
    show_progress: bool=True,
    suppress_warnings: bool=False
) -> None:
    """
    This is a helper function to check most inputs of backtesting functions in 
    modules `model_selection`, `model_selection_multiseries` and 
    `model_selection_sarimax`.

    Parameters
    ----------
    forecaster : Forecaster
        Forecaster model.
    steps : int, list
        Number of future steps predicted.
    metric : str, Callable, list
        Metric used to quantify the goodness of fit of the model.
    y : pandas Series, default `None`
        Training time series for uni-series forecasters.
    series : pandas DataFrame, dict, default `None`
        Training time series for multi-series forecasters.
    exog : pandas Series, pandas DataFrame, dict, default `None`
        Exogenous variables.
    initial_train_size : int, default `None`
        Number of samples in the initial train split. If `None` and `forecaster` 
        is already trained, no initial train is done and all data is used to 
        evaluate the model.
    fixed_train_size : bool, default `True`
        If True, train size doesn't increase but moves by `steps` in each iteration.
    gap : int, default `0`
        Number of samples to be excluded after the end of each training set and 
        before the test set.
    allow_incomplete_fold : bool, default `True`
        Last fold is allowed to have a smaller number of samples than the 
        `test_size`. If `False`, the last fold is excluded.
    refit : bool, int, default `False`
        Whether to re-fit the forecaster in each iteration. If `refit` is an 
        integer, the Forecaster will be trained every that number of iterations.
    interval : list, default `None`
        Confidence of the prediction interval estimated. Sequence of percentiles
        to compute, which must be between 0 and 100 inclusive.
    alpha : float, default `None`
        The confidence intervals used in ForecasterSarimax are (1 - alpha) %. 
    n_boot : int, default `500`
        Number of bootstrapping iterations used to estimate prediction
        intervals.
    random_state : int, default `123`
        Sets a seed to the random generator, so that boot intervals are always 
        deterministic.
    in_sample_residuals : bool, default `True`
        If `True`, residuals from the training data are used as proxy of prediction 
        error to create prediction intervals.  If `False`, out_sample_residuals 
        are used if they are already stored inside the forecaster.
    n_jobs : int, 'auto', default `'auto'`
        The number of jobs to run in parallel. If `-1`, then the number of jobs is 
        set to the number of cores. If 'auto', `n_jobs` is set using the fuction
        skforecast.utils.select_n_jobs_fit_forecaster.
        **New in version 0.9.0**
    verbose : bool, default `False`
        Print number of folds and index of training and validation sets used 
        for backtesting.
    show_progress : bool, default `True`
        Whether to show a progress bar.
    suppress_warnings: bool, default `False`
        If `True`, skforecast warnings will be suppressed during the backtesting 
        process. See skforecast.exceptions.warn_skforecast_categories for more
        information.

    Returns
    -------
    None

    """

    forecasters_uni = [
        "ForecasterAutoreg",
        "ForecasterAutoregCustom",
        "ForecasterAutoregDirect",
        "ForecasterSarimax",
        "ForecasterEquivalentDate",
    ]
    forecasters_multi = [
        "ForecasterAutoregMultiVariate",
        "ForecasterRnn",
    ]
    forecasters_multi_dict = [
        "ForecasterAutoregMultiSeries",
        "ForecasterAutoregMultiSeriesCustom",
    ]

    forecaster_name = type(forecaster).__name__

    if forecaster_name in forecasters_uni:
        if not isinstance(y, pd.Series):
            raise TypeError("`y` must be a pandas Series.")
        data_name = 'y'
        data_length = len(y)

    elif forecaster_name in forecasters_multi:
        if not isinstance(series, pd.DataFrame):
            raise TypeError("`series` must be a pandas DataFrame.")
        data_name = 'series'
        data_length = len(series)

    elif forecaster_name in forecasters_multi_dict:
        if not isinstance(series, (pd.DataFrame, dict)):
            raise TypeError(
                (f"`series` must be a pandas DataFrame or a dict of DataFrames or Series. "
                 f"Got {type(series)}.")
            )

        data_name = 'series'
        if isinstance(series, dict):
            not_valid_series = [
                k 
                for k, v in series.items()
                if not isinstance(v, (pd.Series, pd.DataFrame))
            ]
            if not_valid_series:
                raise TypeError(
                    (f"If `series` is a dictionary, all series must be a named "
                     f"pandas Series or a pandas DataFrame with a single column. "
                     f"Review series: {not_valid_series}")
                )
            not_valid_index = [
                k 
                for k, v in series.items()
                if not isinstance(v.index, pd.DatetimeIndex)
            ]
            if not_valid_index:
                raise ValueError(
                    (f"If `series` is a dictionary, all series must have a Pandas "
                     f"DatetimeIndex as index with the same frequency. "
                     f"Review series: {not_valid_index}")
                )

            indexes_freq = [f'{v.index.freq}' for v in series.values()]
            indexes_freq = sorted(set(indexes_freq))
            if not len(indexes_freq) == 1:
                raise ValueError(
                    (f"If `series` is a dictionary, all series must have a Pandas "
                     f"DatetimeIndex as index with the same frequency. "
                     f"Found frequencies: {indexes_freq}")
                )
            data_length = max([len(series[serie]) for serie in series])
        else:
            data_length = len(series)

    if exog is not None:
        if forecaster_name in forecasters_multi_dict:
            if not isinstance(exog, (pd.Series, pd.DataFrame, dict)):
                raise TypeError(
                    (f"`exog` must be a pandas Series, DataFrame, dictionary of pandas "
                     f"Series/DataFrames or None. Got {type(exog)}.")
                )
            if isinstance(exog, dict):
                not_valid_exog = [
                    k 
                    for k, v in exog.items()
                    if not isinstance(v, (pd.Series, pd.DataFrame, type(None)))
                ]
                if not_valid_exog:
                    raise TypeError(
                        (f"If `exog` is a dictionary, All exog must be a named pandas "
                         f"Series, a pandas DataFrame or None. Review exog: {not_valid_exog}")
                    )
        else:
            if not isinstance(exog, (pd.Series, pd.DataFrame)):
                raise TypeError(
                    (f"`exog` must be a pandas Series, DataFrame or None. Got {type(exog)}.")
                )

    if not isinstance(steps, (int, np.integer)) or steps < 1:
        raise TypeError(
            f"`steps` must be an integer greater than or equal to 1. Got {steps}."
        )
    if not isinstance(gap, (int, np.integer)) or gap < 0:
        raise TypeError(
            f"`gap` must be an integer greater than or equal to 0. Got {gap}."
        )
    if not isinstance(metric, (str, Callable, list)):
        raise TypeError(
            (f"`metric` must be a string, a callable function, or a list containing "
             f"multiple strings and/or callables. Got {type(metric)}.")
        )

    if forecaster_name == "ForecasterEquivalentDate" and isinstance(
        forecaster.offset, pd.tseries.offsets.DateOffset
    ):
        pass
    elif initial_train_size is not None:
        if not isinstance(initial_train_size, (int, np.integer)):
            raise TypeError(
                (f"If used, `initial_train_size` must be an integer greater than the "
                 f"window_size of the forecaster. Got type {type(initial_train_size)}.")
            )
        if initial_train_size >= data_length:
            raise ValueError(
                (f"If used, `initial_train_size` must be an integer smaller "
                 f"than the length of `{data_name}` ({data_length}).")
            )    
        if initial_train_size < forecaster.window_size:
            raise ValueError(
                (f"If used, `initial_train_size` must be an integer greater than "
                 f"the window_size of the forecaster ({forecaster.window_size}).")
            )
        if initial_train_size + gap >= data_length:
            raise ValueError(
                (f"The combination of initial_train_size {initial_train_size} and "
                 f"gap {gap} cannot be greater than the length of `{data_name}` "
                 f"({data_length}).")
            )
    else:
        if forecaster_name == 'ForecasterSarimax':
            raise ValueError(
                (f"`initial_train_size` must be an integer smaller than the "
                 f"length of `{data_name}` ({data_length}).")
            )
        else:
            if not forecaster.fitted:
                raise NotFittedError(
                    ("`forecaster` must be already trained if no `initial_train_size` "
                     "is provided.")
                )
            if refit:
                raise ValueError(
                    "`refit` is only allowed when `initial_train_size` is not `None`."
                )

    if not isinstance(fixed_train_size, bool):
        raise TypeError("`fixed_train_size` must be a boolean: `True`, `False`.")
    if not isinstance(allow_incomplete_fold, bool):
        raise TypeError("`allow_incomplete_fold` must be a boolean: `True`, `False`.")
    if not isinstance(refit, (bool, int, np.integer)) or refit < 0:
        raise TypeError(f"`refit` must be a boolean or an integer greater than 0. Got {refit}.")
    if not isinstance(n_boot, (int, np.integer)) or n_boot < 0:
        raise TypeError(f"`n_boot` must be an integer greater than 0. Got {n_boot}.")
    if not isinstance(random_state, (int, np.integer)) or random_state < 0:
        raise TypeError(f"`random_state` must be an integer greater than 0. Got {random_state}.")
    if not isinstance(in_sample_residuals, bool):
        raise TypeError("`in_sample_residuals` must be a boolean: `True`, `False`.")
    if not isinstance(n_jobs, int) and n_jobs != 'auto':
        raise TypeError(f"`n_jobs` must be an integer or `'auto'`. Got {n_jobs}.")
    if not isinstance(verbose, bool):
        raise TypeError("`verbose` must be a boolean: `True`, `False`.")
    if not isinstance(show_progress, bool):
        raise TypeError("`show_progress` must be a boolean: `True`, `False`.")
    if not isinstance(suppress_warnings, bool):
        raise TypeError("`suppress_warnings` must be a boolean: `True`, `False`.")

    if interval is not None or alpha is not None:
        check_interval(interval=interval, alpha=alpha)

    if not allow_incomplete_fold and data_length - (initial_train_size + gap) < steps:
        raise ValueError(
            (f"There is not enough data to evaluate {steps} steps in a single "
             f"fold. Set `allow_incomplete_fold` to `True` to allow incomplete folds.\n"
             f"    Data available for test : {data_length - (initial_train_size + gap)}\n"
             f"    Steps                   : {steps}")
        )

    return

`select_n_jobs_backtesting(forecaster, refit)` ¶

Select the optimal number of jobs to use in the backtesting process. This selection is based on heuristics and is not guaranteed to be optimal.

The number of jobs is chosen as follows:

If refit is an integer, then n_jobs=1. This is because parallelization doesn't work with intermittent refit.
If forecaster is 'ForecasterAutoreg' or 'ForecasterAutoregCustom' and regressor is a linear regressor, then n_jobs=1.
If forecaster is 'ForecasterAutoreg' or 'ForecasterAutoregCustom', regressor is not a linear regressor and refit=True, then n_jobs=cpu_count().
If forecaster is 'ForecasterAutoreg' or 'ForecasterAutoregCustom', regressor is not a linear regressor and refit=False, then n_jobs=1.
If forecaster is 'ForecasterAutoregDirect' or 'ForecasterAutoregMultiVariate' and refit=True, then n_jobs=cpu_count().
If forecaster is 'ForecasterAutoregDirect' or 'ForecasterAutoregMultiVariate' and refit=False, then n_jobs=1.
If forecaster is 'ForecasterAutoregMultiSeries' or 'ForecasterAutoregMultiSeriesCustom', then n_jobs=cpu_count().
If forecaster is 'ForecasterSarimax' or 'ForecasterEquivalentDate', then n_jobs=1.

Parameters:

Name	Type	Description	Default
`forecaster`	`Forecaster`	Forecaster model. ForecasterAutoreg, ForecasterAutoregCustom, ForecasterAutoregDirect, ForecasterAutoregMultiSeries, ForecasterAutoregMultiSeriesCustom, ForecasterAutoregMultiVariate.	required
`refit`	`(bool, int)`	If the forecaster is refitted during the backtesting process.	required

Returns:

Name	Type	Description
`n_jobs`	`int`	The number of jobs to run in parallel.

Source code in skforecast\utils\utils.py

def select_n_jobs_backtesting(
    forecaster: object,
    refit: Union[bool, int]
) -> int:
    """
    Select the optimal number of jobs to use in the backtesting process. This
    selection is based on heuristics and is not guaranteed to be optimal.

    The number of jobs is chosen as follows:

    - If `refit` is an integer, then n_jobs=1. This is because parallelization doesn't 
    work with intermittent refit.
    - If forecaster is 'ForecasterAutoreg' or 'ForecasterAutoregCustom' and
    regressor is a linear regressor, then n_jobs=1.
    - If forecaster is 'ForecasterAutoreg' or 'ForecasterAutoregCustom',
    regressor is not a linear regressor and refit=`True`, then
    n_jobs=cpu_count().
    - If forecaster is 'ForecasterAutoreg' or 'ForecasterAutoregCustom',
    regressor is not a linear regressor and refit=`False`, then
    n_jobs=1.
    - If forecaster is 'ForecasterAutoregDirect' or 'ForecasterAutoregMultiVariate'
    and refit=`True`, then n_jobs=cpu_count().
    - If forecaster is 'ForecasterAutoregDirect' or 'ForecasterAutoregMultiVariate'
    and refit=`False`, then n_jobs=1.
    - If forecaster is 'ForecasterAutoregMultiSeries' or 
    'ForecasterAutoregMultiSeriesCustom', then n_jobs=cpu_count().
    - If forecaster is 'ForecasterSarimax' or 'ForecasterEquivalentDate', 
    then n_jobs=1.

    Parameters
    ----------
    forecaster : Forecaster
        Forecaster model. ForecasterAutoreg, ForecasterAutoregCustom, 
        ForecasterAutoregDirect, ForecasterAutoregMultiSeries, 
        ForecasterAutoregMultiSeriesCustom, ForecasterAutoregMultiVariate.
    refit : bool, int
        If the forecaster is refitted during the backtesting process.

    Returns
    -------
    n_jobs : int
        The number of jobs to run in parallel.

    """

    forecaster_name = type(forecaster).__name__

    if isinstance(forecaster.regressor, Pipeline):
        regressor_name = type(forecaster.regressor[-1]).__name__
    else:
        regressor_name = type(forecaster.regressor).__name__

    linear_regressors = [
        regressor_name
        for regressor_name in dir(sklearn.linear_model)
        if not regressor_name.startswith('_')
    ]

    refit = False if refit == 0 else refit
    if not isinstance(refit, bool) and refit != 1:
        n_jobs = 1
    else:
        if forecaster_name in ['ForecasterAutoreg', 'ForecasterAutoregCustom']:
            if regressor_name in linear_regressors:
                n_jobs = 1
            else:
                n_jobs = joblib.cpu_count() if refit else 1
        elif forecaster_name in ['ForecasterAutoregDirect', 'ForecasterAutoregMultiVariate']:
            n_jobs = 1
        elif forecaster_name in ['ForecasterAutoregMultiSeries', 'ForecasterAutoregMultiSeriesCustom']:
            n_jobs = joblib.cpu_count()
        elif forecaster_name in ['ForecasterSarimax', 'ForecasterEquivalentDate']:
            n_jobs = 1
        else:
            n_jobs = 1

    return n_jobs

`select_n_jobs_fit_forecaster(forecaster_name, regressor_name)` ¶

Select the optimal number of jobs to use in the fitting process. This selection is based on heuristics and is not guaranteed to be optimal.

The number of jobs is chosen as follows:

If forecaster_name is 'ForecasterAutoregDirect' or 'ForecasterAutoregMultiVariate' and regressor_name is a linear regressor, then n_jobs=1, otherwise n_jobs=cpu_count().

Parameters:

Name	Type	Description	Default
`forecaster_name`	`str`	The type of Forecaster.	required
`regressor_name`	`str`	The type of regressor.	required

Returns:

Name	Type	Description
`n_jobs`	`int`	The number of jobs to run in parallel.

Source code in skforecast\utils\utils.py

def select_n_jobs_fit_forecaster(
    forecaster_name: str,
    regressor_name: str,
) -> int:
    """
    Select the optimal number of jobs to use in the fitting process. This
    selection is based on heuristics and is not guaranteed to be optimal. 

    The number of jobs is chosen as follows:

    - If forecaster_name is 'ForecasterAutoregDirect' or 'ForecasterAutoregMultiVariate'
    and regressor_name is a linear regressor, then n_jobs=1, otherwise n_jobs=cpu_count().

    Parameters
    ----------
    forecaster_name : str
        The type of Forecaster.
    regressor_name : str
        The type of regressor.

    Returns
    -------
    n_jobs : int
        The number of jobs to run in parallel.

    """

    linear_regressors = [
        regressor_name
        for regressor_name in dir(sklearn.linear_model)
        if not regressor_name.startswith('_')
    ]

    if forecaster_name in ['ForecasterAutoregDirect', 
                           'ForecasterAutoregMultiVariate']:
        if regressor_name in linear_regressors:
            n_jobs = 1
        else:
            n_jobs = joblib.cpu_count()
    else:
        n_jobs = 1

    return n_jobs

`check_preprocess_series(series)` ¶

Check and preprocess series argument in ForecasterAutoregMultiSeries and ForecasterAutoregMultiSeriesCustom classes.

If series is a pandas DataFrame, it is converted to a dict of pandas Series and index is overwritten according to the rules of preprocess_y.
If series is a dict, all values are converted to pandas Series. Checks if all index are pandas DatetimeIndex and, at least, one Series has a non-null frequency. No multiple frequency is allowed.

Parameters:

Name	Type	Description	Default
`series`	`pandas DataFrame, dict`	Training time series.	required

Returns:

Name	Type	Description
`series_dict`	`dict`	Dictionary with the series used during training.
`series_indexes`	`dict`	Dictionary with the index of each series.

Source code in skforecast\utils\utils.py

def check_preprocess_series(
    series: Union[pd.DataFrame, dict],
) -> Tuple[dict, pd.Index]:
    """
    Check and preprocess `series` argument in `ForecasterAutoregMultiSeries` and
    `ForecasterAutoregMultiSeriesCustom` classes.

    - If `series` is a pandas DataFrame, it is converted to a dict of pandas 
    Series and index is overwritten according to the rules of preprocess_y.
    - If `series` is a dict, all values are converted to pandas Series. Checks
    if all index are pandas DatetimeIndex and, at least, one Series has a non-null
    frequency. No multiple frequency is allowed.

    Parameters
    ----------
    series : pandas DataFrame, dict
        Training time series.

    Returns
    -------
    series_dict : dict
        Dictionary with the series used during training.
    series_indexes : dict
        Dictionary with the index of each series.

    """

    if isinstance(series, pd.DataFrame):

        _, series_index = preprocess_y(y=series, return_values=False)
        series = series.copy()
        series.index = series_index
        series_dict = series.to_dict("series")

    elif isinstance(series, dict):

        not_valid_series = [
            k 
            for k, v in series.items()
            if not isinstance(v, (pd.Series, pd.DataFrame))
        ]
        if not_valid_series:
            raise TypeError(
                (f"If `series` is a dictionary, all series must be a named "
                 f"pandas Series or a pandas DataFrame with a single column. "
                 f"Review series: {not_valid_series}")
            )

        series_dict = {
            k: v.copy()
            for k, v in series.items()
        }

        for k, v in series_dict.items():
            if isinstance(v, pd.DataFrame):
                if v.shape[1] != 1:
                    raise ValueError(
                        (f"If `series` is a dictionary, all series must be a named "
                         f"pandas Series or a pandas DataFrame with a single column. "
                         f"Review series: '{k}'")
                    )
                series_dict[k] = v.iloc[:, 0]

            series_dict[k].name = k

        not_valid_index = [
            k 
            for k, v in series_dict.items()
            if not isinstance(v.index, pd.DatetimeIndex)
        ]
        if not_valid_index:
            raise TypeError(
                (f"If `series` is a dictionary, all series must have a Pandas "
                 f"DatetimeIndex as index with the same frequency. "
                 f"Review series: {not_valid_index}")
            )

        indexes_freq = [f"{v.index.freq}" for v in series_dict.values()]
        indexes_freq = sorted(set(indexes_freq))
        if not len(indexes_freq) == 1:
            raise ValueError(
                (f"If `series` is a dictionary, all series must have a Pandas "
                 f"DatetimeIndex as index with the same frequency. "
                 f"Found frequencies: {indexes_freq}")
            )
    else:
        raise TypeError(
            (f"`series` must be a pandas DataFrame or a dict of DataFrames or Series. "
             f"Got {type(series)}.")
        )

    for k, v in series_dict.items():
        if np.isnan(v).all():
            raise ValueError(f"All values of series '{k}' are NaN.")

    series_indexes = {
        k: v.index
        for k, v in series_dict.items()
    }

    return series_dict, series_indexes

`check_preprocess_exog_multiseries(input_series_is_dict, series_indexes, series_col_names, exog, exog_dict)` ¶

Check and preprocess exog argument in ForecasterAutoregMultiSeries and ForecasterAutoregMultiSeriesCustom classes.

If input series is a pandas DataFrame (input_series_is_dict = False),
checks that input exog (pandas Series, DataFrame or dict) has the same index (type, length and frequency). Index is overwritten according to the rules of preprocess_exog. Create a dict of exog with the same keys as series.
If input series is a dict (input_series_is_dict = True), then input exog must be a dict. Check exog has a pandas DatetimeIndex and convert all values to pandas DataFrames.

Parameters:

Name	Type	Description	Default
`input_series_is_dict`	`bool`	Indicates if input series argument is a dict.	required
`series_indexes`	`dict`	Dictionary with the index of each series.	required
`series_col_names`	`list`	Names of the series (levels) used during training.	required
`exog`	`pandas Series, pandas DataFrame, dict`	Exogenous variable/s used during training.	required
`exog_dict`	`dict`	Dictionary with the exogenous variable/s used during training.	required

Returns:

Name	Type	Description
`exog_dict`	`dict`	Dictionary with the exogenous variable/s used during training.
`exog_col_names`	`list`	Names of the exogenous variables used during training.

Source code in skforecast\utils\utils.py

def check_preprocess_exog_multiseries(
    input_series_is_dict: bool,
    series_indexes: dict,
    series_col_names: list,
    exog: Union[pd.Series, pd.DataFrame, dict],
    exog_dict: dict,
) -> Tuple[dict, list]:
    """
    Check and preprocess `exog` argument in `ForecasterAutoregMultiSeries` and
    `ForecasterAutoregMultiSeriesCustom` classes.

    - If input series is a pandas DataFrame (input_series_is_dict = False),  
    checks that input exog (pandas Series, DataFrame or dict) has the same index 
    (type, length and frequency). Index is overwritten according to the rules 
    of preprocess_exog. Create a dict of exog with the same keys as series.
    - If input series is a dict (input_series_is_dict = True), then input 
    exog must be a dict. Check exog has a pandas DatetimeIndex and convert all
    values to pandas DataFrames.

    Parameters
    ----------
    input_series_is_dict : bool
        Indicates if input series argument is a dict.
    series_indexes : dict
        Dictionary with the index of each series.
    series_col_names : list
        Names of the series (levels) used during training.
    exog : pandas Series, pandas DataFrame, dict
        Exogenous variable/s used during training.
    exog_dict : dict
        Dictionary with the exogenous variable/s used during training.

    Returns
    -------
    exog_dict : dict
        Dictionary with the exogenous variable/s used during training.
    exog_col_names : list
        Names of the exogenous variables used during training.

    """

    if not isinstance(exog, (pd.Series, pd.DataFrame, dict)):
        raise TypeError(
            (f"`exog` must be a pandas Series, DataFrame, dictionary of pandas "
             f"Series/DataFrames or None. Got {type(exog)}.")
        )

    if not input_series_is_dict:
        # If input series is a pandas DataFrame, all index are the same.
        # Select the first index to check exog
        series_index = series_indexes[series_col_names[0]]

    if isinstance(exog, (pd.Series, pd.DataFrame)): 

        if input_series_is_dict:
            raise TypeError(
                (f"`exog` must be a dict of DataFrames or Series if "
                 f"`series` is a dict. Got {type(exog)}.")
            )

        _, exog_index = preprocess_exog(exog=exog, return_values=False)
        exog = exog.copy().to_frame() if isinstance(exog, pd.Series) else exog.copy()
        exog.index = exog_index

        if len(exog) != len(series_index):
            raise ValueError(
                (f"`exog` must have same number of samples as `series`. "
                 f"length `exog`: ({len(exog)}), length `series`: ({len(series_index)})")
            )

        if not (exog_index == series_index).all():
            raise ValueError(
                ("Different index for `series` and `exog`. They must be equal "
                 "to ensure the correct alignment of values.")
            )

        exog_dict = {serie: exog for serie in series_col_names}

    else:

        not_valid_exog = [
            k 
            for k, v in exog.items()
            if not isinstance(v, (pd.Series, pd.DataFrame, type(None)))
        ]
        if not_valid_exog:
            raise TypeError(
                (f"If `exog` is a dictionary, all exog must be a named pandas "
                 f"Series, a pandas DataFrame or None. Review exog: {not_valid_exog}")
            )

        # Only elements already present in exog_dict are updated
        exog_dict.update(
            (k, v.copy())
            for k, v in exog.items() 
            if k in exog_dict and v is not None
        )

        series_not_in_exog = set(series_col_names) - set(exog.keys())
        if series_not_in_exog:
            warnings.warn(
                (f"{series_not_in_exog} not present in `exog`. All values "
                 f"of the exogenous variables for these series will be NaN."),
                 MissingExogWarning
            )

        for k, v in exog_dict.items():
            if v is not None:
                check_exog(exog=v, allow_nan=True)
                if isinstance(v, pd.Series):
                    v = v.to_frame()
                exog_dict[k] = v

        if not input_series_is_dict:
            for k, v in exog_dict.items():
                if v is not None:
                    if len(v) != len(series_index):
                        raise ValueError(
                            (f"`exog` for series '{k}' must have same number of "
                             f"samples as `series`. length `exog`: ({len(v)}), "
                             f"length `series`: ({len(series_index)})")
                        )

                    _, v_index = preprocess_exog(exog=v, return_values=False)
                    exog_dict[k].index = v_index
                    if not (exog_dict[k].index == series_index).all():
                        raise ValueError(
                            (f"Different index for series '{k}' and its exog. "
                             f"When `series` is a pandas DataFrame, they must be "
                             f"equal to ensure the correct alignment of values.")
                        )
        else:
            not_valid_index = [
                k
                for k, v in exog_dict.items()
                if v is not None and not isinstance(v.index, pd.DatetimeIndex)
            ]
            if not_valid_index:
                raise TypeError(
                    (f"All exog must have a Pandas DatetimeIndex as index with the "
                     f"same frequency. Check exog for series: {not_valid_index}")
                )

    exog_col_names = list(
        set(
            column
            for df in exog_dict.values()
            if df is not None
            for column in df.columns.to_list()
        )
    )

    # Check that all exog have the same dtypes for common columns
    exog_dtype_dict = {col_name: set() 
                       for col_name in exog_col_names}
    for v in exog_dict.values():
        if v is not None:
            for col_name in v.columns:
                exog_dtype_dict[col_name].add(v[col_name].dtype.name)

    for col_name, dtypes in exog_dtype_dict.items():
        if len(dtypes) > 1:
            raise TypeError(
                (f"Column '{col_name}' has different dtypes in different exog "
                 f"DataFrames or Series.")
            )

    if len(set(exog_col_names) - set(series_col_names)) != len(exog_col_names):
        raise ValueError(
            (f"`exog` cannot contain a column named the same as one of the "
             f"series (column names of series).\n"
             f"    `series` columns : {series_col_names}.\n"
             f"    `exog`   columns : {exog_col_names}.")
        )

    return exog_dict, exog_col_names

`align_series_and_exog_multiseries(series_dict, input_series_is_dict, exog_dict=None)` ¶

Align series and exog according to their index. If needed, reindexing is applied. Heading and trailing NaNs are removed from all series in series_dict.

If input series is a pandas DataFrame (input_series_is_dict = False),
input exog (pandas Series, DataFrame or dict) must have the same index (type, length and frequency). Reindexing is not applied.
If input series is a dict (input_series_is_dict = True), then input exog must be a dict. Both must have a pandas DatetimeIndex, but can have different lengths. Reindexing is applied.

Parameters:

Name	Type	Description	Default
`series_dict`	`dict`	Dictionary with the series used during training.	required
`input_series_is_dict`	`bool`	Indicates if input series argument is a dict.	required
`exog_dict`	`dict`	Dictionary with the exogenous variable/s used during training.	`None`

Returns:

Name	Type	Description
`series_dict`	`dict`	Dictionary with the series used during training.
`exog_dict`	`dict`	Dictionary with the exogenous variable/s used during training.

Source code in skforecast\utils\utils.py

def align_series_and_exog_multiseries(
    series_dict: dict,
    input_series_is_dict: bool,
    exog_dict: dict=None
) -> Tuple[Union[pd.Series, pd.DataFrame], Union[pd.Series, pd.DataFrame]]:
    """
    Align series and exog according to their index. If needed, reindexing is
    applied. Heading and trailing NaNs are removed from all series in 
    `series_dict`.

    - If input series is a pandas DataFrame (input_series_is_dict = False),  
    input exog (pandas Series, DataFrame or dict) must have the same index 
    (type, length and frequency). Reindexing is not applied.
    - If input series is a dict (input_series_is_dict = True), then input 
    exog must be a dict. Both must have a pandas DatetimeIndex, but can have 
    different lengths. Reindexing is applied.

    Parameters
    ----------
    series_dict : dict
        Dictionary with the series used during training.
    input_series_is_dict : bool
        Indicates if input series argument is a dict.
    exog_dict : dict, default `None`
        Dictionary with the exogenous variable/s used during training.

    Returns
    -------
    series_dict : dict
        Dictionary with the series used during training.
    exog_dict : dict
        Dictionary with the exogenous variable/s used during training.

    """

    for k in series_dict.keys():

        first_valid_index = series_dict[k].first_valid_index()
        last_valid_index = series_dict[k].last_valid_index()

        series_dict[k] = series_dict[k].loc[first_valid_index : last_valid_index]

        if exog_dict[k] is not None:
            if input_series_is_dict:
                index_intersection = (
                    series_dict[k].index.intersection(exog_dict[k].index)
                )
                if len(index_intersection) == 0:
                    warnings.warn(
                        (f"Series '{k}' and its `exog` do not have the same index. "
                         f"All exog values will be NaN for the period of the series."),
                         MissingValuesWarning
                    )
                elif len(index_intersection) != len(series_dict[k]):
                    warnings.warn(
                        (f"Series '{k}' and its `exog` do not have the same length. "
                         f"Exog values will be NaN for the not matched period of the series."),
                         MissingValuesWarning
                    )  
                exog_dict[k] = exog_dict[k].loc[index_intersection]
                if len(index_intersection) != len(series_dict[k]):
                    exog_dict[k] = exog_dict[k].reindex(
                                       series_dict[k].index, 
                                       fill_value = np.nan
                                   )
            else:
                exog_dict[k] = exog_dict[k].loc[first_valid_index : last_valid_index]

    return series_dict, exog_dict

`set_skforecast_warnings(suppress_warnings, action='default')` ¶

Set skforecast warnings action.

Parameters:

Name	Type	Description	Default
`suppress_warnings`	`bool`	If `True`, skforecast warnings will be suppressed. If `False`, skforecast warnings will be shown as default. See skforecast.exceptions.warn_skforecast_categories for more information.	required
`action`	`str`	Action to be taken when a warning is raised. See the warnings module for more information.	`'default'`

Returns:

Type	Description
`None`

Source code in skforecast\utils\utils.py

def set_skforecast_warnings(
    suppress_warnings: bool,
    action: str='default'
) -> None:
    """
    Set skforecast warnings action.

    Parameters
    ----------
    suppress_warnings : bool
        If `True`, skforecast warnings will be suppressed. If `False`, skforecast
        warnings will be shown as default. See 
        skforecast.exceptions.warn_skforecast_categories for more information.
    action : str, default `'default'`
        Action to be taken when a warning is raised. See the warnings module
        for more information.

    Returns
    -------
    None

    """

    if suppress_warnings:
        for category in warn_skforecast_categories:
            warnings.filterwarnings(action, category=category)

utils¶

save_forecaster(forecaster, file_name, save_custom_functions=True, verbose=True) ¶

load_forecaster(file_name, verbose=True) ¶

initialize_lags(forecaster_name, lags) ¶

initialize_weights(forecaster_name, regressor, weight_func, series_weights) ¶

initialize_transformer_series(series_col_names, transformer_series=None) ¶

initialize_lags_grid(forecaster, lags_grid=None) ¶

check_select_fit_kwargs(regressor, fit_kwargs=None) ¶

check_y(y, series_id='`y`') ¶

check_exog(exog, allow_nan=True, series_id='`exog`') ¶

get_exog_dtypes(exog) ¶

check_exog_dtypes(exog, call_check_exog=True, series_id='`exog`') ¶

check_interval(interval=None, quantiles=None, alpha=None) ¶

check_predict_input(forecaster_name, steps, fitted, included_exog, index_type, index_freq, window_size, last_window=None, last_window_exog=None, exog=None, exog_type=None, exog_col_names=None, interval=None, alpha=None, max_steps=None, levels=None, levels_forecaster=None, series_col_names=None) ¶

preprocess_y(y, return_values=True) ¶

preprocess_last_window(last_window, return_values=True) ¶

preprocess_exog(exog, return_values=True) ¶

cast_exog_dtypes(exog, exog_dtypes) ¶

exog_to_direct(exog, steps) ¶

exog_to_direct_numpy(exog, steps) ¶

expand_index(index, steps) ¶

transform_series(series, transformer, fit=False, inverse_transform=False) ¶

transform_dataframe(df, transformer, fit=False, inverse_transform=False) ¶

check_optional_dependency(package_name) ¶

multivariate_time_series_corr(time_series, other, lags, method='pearson') ¶

select_n_jobs_backtesting(forecaster, refit) ¶

select_n_jobs_fit_forecaster(forecaster_name, regressor_name) ¶

check_preprocess_series(series) ¶

check_preprocess_exog_multiseries(input_series_is_dict, series_indexes, series_col_names, exog, exog_dict) ¶

align_series_and_exog_multiseries(series_dict, input_series_is_dict, exog_dict=None) ¶

set_skforecast_warnings(suppress_warnings, action='default') ¶

`utils`¶

`save_forecaster(forecaster, file_name, save_custom_functions=True, verbose=True)` ¶

`load_forecaster(file_name, verbose=True)` ¶

`initialize_lags(forecaster_name, lags)` ¶

`initialize_weights(forecaster_name, regressor, weight_func, series_weights)` ¶

`initialize_transformer_series(series_col_names, transformer_series=None)` ¶

`initialize_lags_grid(forecaster, lags_grid=None)` ¶

`check_select_fit_kwargs(regressor, fit_kwargs=None)` ¶

`get_exog_dtypes(exog)` ¶

`check_interval(interval=None, quantiles=None, alpha=None)` ¶

`preprocess_y(y, return_values=True)` ¶

`preprocess_last_window(last_window, return_values=True)` ¶

`preprocess_exog(exog, return_values=True)` ¶

`cast_exog_dtypes(exog, exog_dtypes)` ¶

`exog_to_direct(exog, steps)` ¶

`exog_to_direct_numpy(exog, steps)` ¶

`expand_index(index, steps)` ¶

`transform_series(series, transformer, fit=False, inverse_transform=False)` ¶

`transform_dataframe(df, transformer, fit=False, inverse_transform=False)` ¶

`check_optional_dependency(package_name)` ¶

`multivariate_time_series_corr(time_series, other, lags, method='pearson')` ¶

`select_n_jobs_backtesting(forecaster, refit)` ¶

`select_n_jobs_fit_forecaster(forecaster_name, regressor_name)` ¶

`check_preprocess_series(series)` ¶

`check_preprocess_exog_multiseries(input_series_is_dict, series_indexes, series_col_names, exog, exog_dict)` ¶

`align_series_and_exog_multiseries(series_dict, input_series_is_dict, exog_dict=None)` ¶

`set_skforecast_warnings(suppress_warnings, action='default')` ¶