`utils`¶

skforecast.utils.utils.save_forecaster ¶


save_forecaster(
    forecaster,
    file_name,
    save_custom_functions=True,
    verbose=True,
    suppress_warnings=False,
)

Save forecaster model using joblib. If custom functions are used to create weights, they are saved as .py files.

Parameters:

Name	Type	Description	Default
`forecaster`	`Forecaster`	Forecaster created with skforecast library.	required
`file_name`	`str`	File name given to the object. The save extension will be .joblib.	required
`save_custom_functions`	`bool`	If True, save custom functions used in the forecaster (weight_func) as .py files. Custom functions need to be available in the environment where the forecaster is going to be loaded.	`True`
`verbose`	`bool`	Print summary about the forecaster saved.	`True`
`suppress_warnings`	`bool`	If `True`, skforecast warnings will be suppressed. See skforecast.exceptions.warn_skforecast_categories for more information.	`False`

Returns:

Type	Description
`None`

Source code in skforecast/utils/utils.py

def save_forecaster(
    forecaster: object, 
    file_name: str,
    save_custom_functions: bool = True, 
    verbose: bool = True,
    suppress_warnings: bool = False
) -> None:
    """
    Save forecaster model using joblib. If custom functions are used to create
    weights, they are saved as .py files.

    Parameters
    ----------
    forecaster : Forecaster
        Forecaster created with skforecast library.
    file_name : str
        File name given to the object. The save extension will be .joblib.
    save_custom_functions : bool, default True
        If True, save custom functions used in the forecaster (weight_func) as 
        .py files. Custom functions need to be available in the environment 
        where the forecaster is going to be loaded.
    verbose : bool, default True
        Print summary about the forecaster saved.
    suppress_warnings : bool, default False
        If `True`, skforecast warnings will be suppressed. See 
        skforecast.exceptions.warn_skforecast_categories for more information.

    Returns
    -------
    None

    """

    set_skforecast_warnings(suppress_warnings, action='ignore')

    file_name = Path(file_name).with_suffix('.joblib')

    # Save forecaster
    joblib.dump(forecaster, filename=file_name)

    if save_custom_functions:
        # Save custom functions to create weights
        if hasattr(forecaster, 'weight_func') and forecaster.weight_func is not None:
            if isinstance(forecaster.weight_func, dict):
                for fun in set(forecaster.weight_func.values()):
                    file_name = fun.__name__ + '.py'
                    with open(file_name, 'w') as file:
                        file.write(inspect.getsource(fun))
            else:
                file_name = forecaster.weight_func.__name__ + '.py'
                with open(file_name, 'w') as file:
                    file.write(inspect.getsource(forecaster.weight_func))
    else:
        if hasattr(forecaster, 'weight_func') and forecaster.weight_func is not None:
            warnings.warn(
                "Custom function(s) used to create weights are not saved. To save them, "
                "set `save_custom_functions` to `True`.",
                SaveLoadSkforecastWarning
            )

    if hasattr(forecaster, 'window_features') and forecaster.window_features is not None:
        skforecast_classes = {'RollingFeatures'}
        custom_classes = set(forecaster.window_features_class_names) - skforecast_classes
        if custom_classes:
            warnings.warn(
                "The Forecaster includes custom user-defined classes in the "
                "`window_features` argument. These classes are not saved automatically "
                "when saving the Forecaster. Please ensure you save these classes "
                "manually and import them before loading the Forecaster.\n"
                "    Custom classes: " + ', '.join(custom_classes) + "\n"
                "Visit the documentation for more information: "
                "https://skforecast.org/latest/user_guides/save-load-forecaster.html#saving-and-loading-a-forecaster-model-with-custom-features",
                SaveLoadSkforecastWarning
            )

    if verbose:
        forecaster.summary()

    set_skforecast_warnings(suppress_warnings, action='default')

skforecast.utils.utils.load_forecaster ¶


load_forecaster(
    file_name, verbose=True, suppress_warnings=False
)

Load forecaster model using joblib. If the forecaster was saved with custom user-defined classes as as window features or custom functions to create weights, these objects must be available in the environment where the forecaster is going to be loaded.

Parameters:

Name	Type	Description	Default
`file_name`	`str`	Object file name.	required
`verbose`	`bool`	Print summary about the forecaster loaded.	`True`
`suppress_warnings`	`bool`	If `True`, skforecast warnings will be suppressed. See skforecast.exceptions.warn_skforecast_categories for more information.	`False`

Returns:

Name	Type	Description
`forecaster`	`Forecaster`	Forecaster created with skforecast library.

Source code in skforecast/utils/utils.py

def load_forecaster(
    file_name: str,
    verbose: bool = True,
    suppress_warnings: bool = False
) -> object:
    """
    Load forecaster model using joblib. If the forecaster was saved with 
    custom user-defined classes as as window features or custom
    functions to create weights, these objects must be available
    in the environment where the forecaster is going to be loaded.

    Parameters
    ----------
    file_name: str
        Object file name.
    verbose: bool, default True
        Print summary about the forecaster loaded.
    suppress_warnings : bool, default False
        If `True`, skforecast warnings will be suppressed. See 
        skforecast.exceptions.warn_skforecast_categories for more information.

    Returns
    -------
    forecaster: Forecaster
        Forecaster created with skforecast library.

    """

    set_skforecast_warnings(suppress_warnings, action='ignore')

    forecaster = joblib.load(filename=Path(file_name))
    forecaster_v = forecaster.skforecast_version

    if forecaster_v != __version__:
        warnings.warn(
            f"The skforecast version installed in the environment differs "
            f"from the version used to create the forecaster.\n"
            f"    Installed Version  : {__version__}\n"
            f"    Forecaster Version : {forecaster_v}\n"
            f"This may create incompatibilities when using the library.",
            SkforecastVersionWarning
        )

    if verbose:
        forecaster.summary()

    set_skforecast_warnings(suppress_warnings, action='default')

    return forecaster

skforecast.utils.utils.initialize_lags ¶


initialize_lags(forecaster_name, lags)

Check lags argument input and generate the corresponding numpy ndarray.

Parameters:

Name	Type	Description	Default
`forecaster_name`	`str`	Forecaster name.	required
`lags`	`Any`	Lags used as predictors.	required

Returns:

Name	Type	Description
`lags`	`numpy ndarray, None`	Lags used as predictors.
`lags_names`	`(list, None)`	Names of the lags used as predictors.
`max_lag`	`(int, None)`	Maximum value of the lags.

Source code in skforecast/utils/utils.py

def initialize_lags(
    forecaster_name: str,
    lags: Any
) -> tuple[np.ndarray[int] | None, list[str] | None, int | None]:
    """
    Check lags argument input and generate the corresponding numpy ndarray.

    Parameters
    ----------
    forecaster_name : str
        Forecaster name.
    lags : Any
        Lags used as predictors.

    Returns
    -------
    lags : numpy ndarray, None
        Lags used as predictors.
    lags_names : list, None
        Names of the lags used as predictors.
    max_lag : int, None
        Maximum value of the lags.

    """

    lags_names = None
    max_lag = None
    if lags is not None:
        if isinstance(lags, int):
            if lags < 1:
                raise ValueError("Minimum value of lags allowed is 1.")
            lags = np.arange(1, lags + 1)

        if isinstance(lags, (list, tuple, range)):
            lags = np.array(lags)

        if isinstance(lags, np.ndarray):
            if lags.size == 0:
                return None, None, None
            if lags.ndim != 1:
                raise ValueError("`lags` must be a 1-dimensional array.")
            if not np.issubdtype(lags.dtype, np.integer):
                raise TypeError("All values in `lags` must be integers.")
            if np.any(lags < 1):
                raise ValueError("Minimum value of lags allowed is 1.")
        else:
            if forecaster_name == 'ForecasterDirectMultiVariate':
                raise TypeError(
                    f"`lags` argument must be a dict, int, 1d numpy ndarray, range, "
                    f"tuple or list. Got {type(lags)}."
                )
            else:
                raise TypeError(
                    f"`lags` argument must be an int, 1d numpy ndarray, range, "
                    f"tuple or list. Got {type(lags)}."
                )

        lags = np.sort(lags)
        lags_names = [f'lag_{i}' for i in lags]
        max_lag = max(lags)

    return lags, lags_names, max_lag

skforecast.utils.utils.initialize_weights ¶


initialize_weights(
    forecaster_name, estimator, weight_func, series_weights
)

Check weights arguments, weight_func and series_weights for the different forecasters. Create source_code_weight_func, source code of the custom function(s) used to create weights.

Parameters:

Name	Type	Description	Default
`forecaster_name`	`str`	Forecaster name.	required
`estimator`	`estimator or pipeline compatible with the scikit-learn API`	Estimator of the forecaster.	required
`weight_func`	`(Callable, dict)`	Argument `weight_func` of the forecaster.	required
`series_weights`	`dict`	Argument `series_weights` of the forecaster.	required

Returns:

Name	Type	Description
`weight_func`	`(Callable, dict)`	Argument `weight_func` of the forecaster.
`source_code_weight_func`	`(str, dict)`	Argument `source_code_weight_func` of the forecaster.
`series_weights`	`dict`	Argument `series_weights` of the forecaster. Only ForecasterRecursiveMultiSeries.

Source code in skforecast/utils/utils.py

def initialize_weights(
    forecaster_name: str,
    estimator: object,
    weight_func: Callable | dict[str, Callable],
    series_weights: dict[str, float]
) -> tuple[Callable | dict[str, Callable] | None, str | dict[str, str] | None, dict[str, float] | None]:
    """
    Check weights arguments, `weight_func` and `series_weights` for the different 
    forecasters. Create `source_code_weight_func`, source code of the custom 
    function(s) used to create weights.

    Parameters
    ----------
    forecaster_name : str
        Forecaster name.
    estimator : estimator or pipeline compatible with the scikit-learn API
        Estimator of the forecaster.
    weight_func : Callable, dict
        Argument `weight_func` of the forecaster.
    series_weights : dict
        Argument `series_weights` of the forecaster.

    Returns
    -------
    weight_func : Callable, dict
        Argument `weight_func` of the forecaster.
    source_code_weight_func : str, dict
        Argument `source_code_weight_func` of the forecaster.
    series_weights : dict
        Argument `series_weights` of the forecaster. Only ForecasterRecursiveMultiSeries.

    """

    source_code_weight_func = None

    if weight_func is not None:

        if forecaster_name in ['ForecasterRecursiveMultiSeries']:
            if not isinstance(weight_func, (Callable, dict)):
                raise TypeError(
                    f"Argument `weight_func` must be a Callable or a dict of "
                    f"Callables. Got {type(weight_func)}."
                )
        elif not isinstance(weight_func, Callable):
            raise TypeError(
                f"Argument `weight_func` must be a Callable. Got {type(weight_func)}."
            )

        if isinstance(weight_func, dict):
            source_code_weight_func = {}
            for key in weight_func:
                source_code_weight_func[key] = inspect.getsource(weight_func[key])
        else:
            source_code_weight_func = inspect.getsource(weight_func)

        if 'sample_weight' not in inspect.signature(estimator.fit).parameters:
            warnings.warn(
                f"Argument `weight_func` is ignored since estimator {estimator} "
                f"does not accept `sample_weight` in its `fit` method.",
                IgnoredArgumentWarning
            )
            weight_func = None
            source_code_weight_func = None

    if series_weights is not None:
        if not isinstance(series_weights, dict):
            raise TypeError(
                f"Argument `series_weights` must be a dict of floats or ints."
                f"Got {type(series_weights)}."
            )
        if 'sample_weight' not in inspect.signature(estimator.fit).parameters:
            warnings.warn(
                f"Argument `series_weights` is ignored since estimator {estimator} "
                f"does not accept `sample_weight` in its `fit` method.",
                IgnoredArgumentWarning
            )
            series_weights = None

    return weight_func, source_code_weight_func, series_weights

skforecast.utils.utils.initialize_transformer_series ¶


initialize_transformer_series(
    forecaster_name,
    series_names_in_,
    encoding=None,
    transformer_series=None,
)

Initialize transformer_series_ attribute for the Forecasters Multiseries.

If transformer_series is None, no transformation is applied.
If transformer_series is a scikit-learn transformer (object), the same transformer is applied to all series (series_names_in_).
If transformer_series is a dict, a different transformer can be applied to each series. The keys of the dictionary must be the same as the names of the series in series_names_in_.

Parameters:

Name	Type	Description	Default
`forecaster_name`	`str`	Forecaster name.	required
`series_names_in_`	`list`	Names of the series (levels) used during training.	required
`encoding`	`str`	Encoding used to identify the different series (`ForecasterRecursiveMultiSeries`).	`None`
`transformer_series`	`(object, dict)`	An instance of a transformer (preprocessor) compatible with the scikit-learn preprocessing API with methods: fit, transform, fit_transform and inverse_transform.	`None`

Returns:

Name	Type	Description
`transformer_series_`	`dict`	Dictionary with the transformer for each series. It is created cloning the objects in `transformer_series` and is used internally to avoid overwriting.

Source code in skforecast/utils/utils.py

def initialize_transformer_series(
    forecaster_name: str,
    series_names_in_: list[str],
    encoding: str | None = None,
    transformer_series: object | dict[str, object | None] | None = None
) -> dict[str, object | None]:
    """
    Initialize `transformer_series_` attribute for the Forecasters Multiseries.

    - If `transformer_series` is `None`, no transformation is applied.
    - If `transformer_series` is a scikit-learn transformer (object), the same 
    transformer is applied to all series (`series_names_in_`).
    - If `transformer_series` is a `dict`, a different transformer can be
    applied to each series. The keys of the dictionary must be the same as the
    names of the series in `series_names_in_`.

    Parameters
    ----------
    forecaster_name : str
        Forecaster name.
    series_names_in_ : list
        Names of the series (levels) used during training.
    encoding : str, default None
        Encoding used to identify the different series (`ForecasterRecursiveMultiSeries`).
    transformer_series : object, dict, default None
        An instance of a transformer (preprocessor) compatible with the scikit-learn
        preprocessing API with methods: fit, transform, fit_transform and 
        inverse_transform. 

    Returns
    -------
    transformer_series_ : dict
        Dictionary with the transformer for each series. It is created cloning the 
        objects in `transformer_series` and is used internally to avoid overwriting.

    """

    if forecaster_name == 'ForecasterRecursiveMultiSeries':
        if encoding is None:
            series_names_in_ = ['_unknown_level']
        else:
            series_names_in_ = series_names_in_ + ['_unknown_level']

    if transformer_series is None:
        transformer_series_ = {serie: None for serie in series_names_in_}
    elif not isinstance(transformer_series, dict):
        transformer_series_ = {
            serie: clone(transformer_series) 
            for serie in series_names_in_
        }
    else:
        transformer_series_ = {serie: None for serie in series_names_in_}
        # Only elements already present in transformer_series_ are updated
        transformer_series_.update(
            {
                k: deepcopy(v)
                for k, v in transformer_series.items()
                if k in transformer_series_
            }
        )

        series_not_in_transformer_series = (
            set(series_names_in_) - set(transformer_series.keys())
        ) - {'_unknown_level'}
        if series_not_in_transformer_series:
            warnings.warn(
                f"{series_not_in_transformer_series} not present in `transformer_series`."
                f" No transformation is applied to these series.",
                IgnoredArgumentWarning
            )

    return transformer_series_

skforecast.utils.utils.check_select_fit_kwargs ¶


check_select_fit_kwargs(estimator, fit_kwargs=None)

Check if fit_kwargs is a dict and select only the keys that are used by the fit method of the estimator.

Parameters:

Name	Type	Description	Default
`estimator`	`object`	Estimator object.	required
`fit_kwargs`	`dict`	Dictionary with the arguments to pass to the `fit' method of the forecaster.	`None`

Returns:

Name	Type	Description
`fit_kwargs`	`dict`	Dictionary with the arguments to be passed to the `fit` method of the estimator after removing the unused keys.

Source code in skforecast/utils/utils.py

def check_select_fit_kwargs(
    estimator: object,
    fit_kwargs: dict[str, object] | None = None
) -> dict[str, object]:
    """
    Check if `fit_kwargs` is a dict and select only the keys that are used by
    the `fit` method of the estimator.

    Parameters
    ----------
    estimator : object
        Estimator object.
    fit_kwargs : dict, default None
        Dictionary with the arguments to pass to the `fit' method of the forecaster.

    Returns
    -------
    fit_kwargs : dict
        Dictionary with the arguments to be passed to the `fit` method of the 
        estimator after removing the unused keys.

    """

    if fit_kwargs is None:
        fit_kwargs = {}
    else:
        if not isinstance(fit_kwargs, dict):
            raise TypeError(
                f"Argument `fit_kwargs` must be a dict. Got {type(fit_kwargs)}."
            )

        fit_params = inspect.signature(estimator.fit).parameters

        # Non used keys
        non_used_keys = [
            k for k in fit_kwargs.keys() if k not in fit_params
        ]
        if non_used_keys:
            warnings.warn(
                f"Argument/s {non_used_keys} ignored since they are not used by the "
                f"estimator's `fit` method.",
                IgnoredArgumentWarning
            )

        if 'sample_weight' in fit_kwargs.keys():
            warnings.warn(
                "The `sample_weight` argument is ignored. Use `weight_func` to pass "
                "a function that defines the individual weights for each sample "
                "based on its index.",
                IgnoredArgumentWarning
            )
            del fit_kwargs['sample_weight']

        # Select only the keyword arguments allowed by the estimator's `fit` method.
        fit_kwargs = {
            k: v for k, v in fit_kwargs.items() if k in fit_params
        }

    return fit_kwargs

skforecast.utils.utils.check_y ¶


check_y(y, series_id='`y`')

Raise Exception if y is not pandas Series or if it has missing values.

Parameters:

Name	Type	Description	Default
`y`	`Any`	Time series values.	required
`series_id`	`str`	Identifier of the series used in the warning message.	'`y`'

Returns:

Type	Description
`None`

Source code in skforecast/utils/utils.py

def check_y(
    y: Any,
    series_id: str = "`y`"
) -> None:
    """
    Raise Exception if `y` is not pandas Series or if it has missing values.

    Parameters
    ----------
    y : Any
        Time series values.
    series_id : str, default '`y`'
        Identifier of the series used in the warning message.

    Returns
    -------
    None

    """

    if not isinstance(y, pd.Series):
        raise TypeError(
            f"{series_id} must be a pandas Series with a DatetimeIndex or a RangeIndex. "
            f"Found {type(y)}."
        )

    if y.isna().to_numpy().any():
        raise ValueError(f"{series_id} has missing values.")

    return

skforecast.utils.utils.check_exog ¶


check_exog(exog, allow_nan=True, series_id='`exog`')

Raise Exception if exog is not pandas Series or pandas DataFrame. If allow_nan = True, issue a warning if exog contains NaN values.

Parameters:

Name	Type	Description	Default
`exog`	`pandas Series, pandas DataFrame`	Exogenous variable/s included as predictor/s.	required
`allow_nan`	`bool`	If True, allows the presence of NaN values in `exog`. If False (default), issue a warning if `exog` contains NaN values.	`True`
`series_id`	`str`	Identifier of the series for which the exogenous variable/s are used in the warning message.	'`exog`'

Returns:

Type	Description
`None`

Source code in skforecast/utils/utils.py

def check_exog(
    exog: pd.Series | pd.DataFrame,
    allow_nan: bool = True,
    series_id: str = "`exog`"
) -> None:
    """
    Raise Exception if `exog` is not pandas Series or pandas DataFrame.
    If `allow_nan = True`, issue a warning if `exog` contains NaN values.

    Parameters
    ----------
    exog : pandas Series, pandas DataFrame
        Exogenous variable/s included as predictor/s.
    allow_nan : bool, default True
        If True, allows the presence of NaN values in `exog`. If False (default),
        issue a warning if `exog` contains NaN values.
    series_id : str, default '`exog`'
        Identifier of the series for which the exogenous variable/s are used
        in the warning message.

    Returns
    -------
    None

    """

    if not isinstance(exog, (pd.Series, pd.DataFrame)):
        raise TypeError(
            f"{series_id} must be a pandas Series or DataFrame. Got {type(exog)}."
        )

    if isinstance(exog, pd.Series) and exog.name is None:
        raise ValueError(f"When {series_id} is a pandas Series, it must have a name.")

    if not allow_nan:
        if exog.isna().to_numpy().any():
            warnings.warn(
                f"{series_id} has missing values. Most machine learning models "
                f"do not allow missing values. Fitting the forecaster may fail.", 
                MissingValuesWarning
            )

    return

skforecast.utils.utils.get_exog_dtypes ¶


get_exog_dtypes(exog)

Store dtypes of exog.

Parameters:

Name	Type	Description	Default
`exog`	`pandas Series, pandas DataFrame`	Exogenous variable/s included as predictor/s.	required

Returns:

Name	Type	Description
`exog_dtypes`	`dict`	Dictionary with the dtypes in `exog`.

Source code in skforecast/utils/utils.py

def get_exog_dtypes(
    exog: pd.Series | pd.DataFrame, 
) -> dict[str, type]:
    """
    Store dtypes of `exog`.

    Parameters
    ----------
    exog : pandas Series, pandas DataFrame
        Exogenous variable/s included as predictor/s.

    Returns
    -------
    exog_dtypes : dict
        Dictionary with the dtypes in `exog`.

    """

    if isinstance(exog, pd.Series):
        exog_dtypes = {exog.name: exog.dtypes}
    else:
        exog_dtypes = exog.dtypes.to_dict()

    return exog_dtypes

skforecast.utils.utils.check_exog_dtypes ¶


check_exog_dtypes(
    exog, call_check_exog=True, series_id="`exog`"
)

Raise Exception if exog has categorical columns with non integer values. This is needed when using machine learning estimators that allow categorical features. Issue a Warning if exog has columns that are not init, float, or category.

Parameters:

Name	Type	Description	Default
`exog`	`pandas Series, pandas DataFrame`	Exogenous variable/s included as predictor/s.	required
`call_check_exog`	`bool`	If `True`, call `check_exog` function.	`True`
`series_id`	`str`	Identifier of the series for which the exogenous variable/s are used in the warning message.	'`exog`'

Returns:

Type	Description
`None`

Source code in skforecast/utils/utils.py

def check_exog_dtypes(
    exog: pd.Series | pd.DataFrame,
    call_check_exog: bool = True,
    series_id: str = "`exog`"
) -> None:
    """
    Raise Exception if `exog` has categorical columns with non integer values.
    This is needed when using machine learning estimators that allow categorical
    features.
    Issue a Warning if `exog` has columns that are not `init`, `float`, or `category`.

    Parameters
    ----------
    exog : pandas Series, pandas DataFrame
        Exogenous variable/s included as predictor/s.
    call_check_exog : bool, default True
        If `True`, call `check_exog` function.
    series_id : str, default '`exog`'
        Identifier of the series for which the exogenous variable/s are used
        in the warning message.

    Returns
    -------
    None

    """

    if call_check_exog:
        check_exog(exog=exog, allow_nan=False, series_id=series_id)

    valid_dtypes = ("int", "Int", "float", "Float", "uint")

    if isinstance(exog, pd.DataFrame):

        for dtype_name in set(exog.dtypes.astype(str)):
            if not (dtype_name.startswith(valid_dtypes) or dtype_name == "category"):
                warnings.warn(
                    f"{series_id} may contain only `int`, `float` or `category` dtypes. "
                    f"Most machine learning models do not allow other types of values. "
                    f"Fitting the forecaster may fail.", 
                    DataTypeWarning
                )
                break

        for col in exog.columns:
            if isinstance(exog[col].dtype, pd.CategoricalDtype):
                if not np.issubdtype(exog[col].cat.categories.dtype, np.integer):
                    raise TypeError(
                        "Categorical dtypes in exog must contain only integer values. "
                        "See skforecast docs for more info about how to include "
                        "categorical features https://skforecast.org/"
                        "latest/user_guides/categorical-features.html"
                    )

    else:

        dtype_name = str(exog.dtypes)
        if not (dtype_name.startswith(valid_dtypes) or dtype_name == "category"):
            warnings.warn(
                f"{series_id} may contain only `int`, `float` or `category` dtypes. Most "
                f"machine learning models do not allow other types of values. "
                f"Fitting the forecaster may fail.", 
                DataTypeWarning
            )

        if isinstance(exog.dtype, pd.CategoricalDtype):
            if not np.issubdtype(exog.cat.categories.dtype, np.integer):
                raise TypeError(
                    "Categorical dtypes in exog must contain only integer values. "
                    "See skforecast docs for more info about how to include "
                    "categorical features https://skforecast.org/"
                    "latest/user_guides/categorical-features.html"
                )

skforecast.utils.utils.check_interval ¶


check_interval(
    interval=None,
    ensure_symmetric_intervals=False,
    quantiles=None,
    alpha=None,
    alpha_literal="alpha",
)

Check provided confidence interval sequence is valid.

Parameters:

Name	Type	Description	Default
`interval`	`(list, tuple)`	Confidence of the prediction interval estimated. Sequence of percentiles to compute, which must be between 0 and 100 inclusive. For example, interval of 95% should be as `interval = [2.5, 97.5]`.	`None`
`ensure_symmetric_intervals`	`bool`	If True, ensure that the intervals are symmetric.	`False`
`quantiles`	`(list, tuple)`	Sequence of quantiles to compute, which must be between 0 and 1 inclusive. For example, quantiles of 0.05, 0.5 and 0.95 should be as `quantiles = [0.05, 0.5, 0.95]`.	`None`
`alpha`	`float`	The confidence intervals used in ForecasterStats are (1 - alpha) %.	`None`
`alpha_literal`	`str`	Literal used in the exception message when `alpha` is provided.	`'alpha'`

Returns:

Type	Description
`None`

Source code in skforecast/utils/utils.py

def check_interval(
    interval: list[float] | tuple[float] | None = None,
    ensure_symmetric_intervals: bool = False,
    quantiles: list[float] | tuple[float] | None = None,
    alpha: float = None,
    alpha_literal: str | None = 'alpha'
) -> None:
    """
    Check provided confidence interval sequence is valid.

    Parameters
    ----------
    interval : list, tuple, default None
        Confidence of the prediction interval estimated. Sequence of percentiles
        to compute, which must be between 0 and 100 inclusive. For example, 
        interval of 95% should be as `interval = [2.5, 97.5]`.
    ensure_symmetric_intervals : bool, default False
        If True, ensure that the intervals are symmetric.
    quantiles : list, tuple, default None
        Sequence of quantiles to compute, which must be between 0 and 1 
        inclusive. For example, quantiles of 0.05, 0.5 and 0.95 should be as 
        `quantiles = [0.05, 0.5, 0.95]`.
    alpha : float, default None
        The confidence intervals used in ForecasterStats are (1 - alpha) %.
    alpha_literal : str, default 'alpha'
        Literal used in the exception message when `alpha` is provided.

    Returns
    -------
    None

    """

    if interval is not None:
        if not isinstance(interval, (list, tuple)):
            raise TypeError(
                "`interval` must be a `list` or `tuple`. For example, interval of 95% "
                "should be as `interval = [2.5, 97.5]`."
            )

        if len(interval) != 2:
            raise ValueError(
                "`interval` must contain exactly 2 values, respectively the "
                "lower and upper interval bounds. For example, interval of 95% "
                "should be as `interval = [2.5, 97.5]`."
            )

        if (interval[0] < 0.) or (interval[0] >= 100.):
            raise ValueError(
                f"Lower interval bound ({interval[0]}) must be >= 0 and < 100."
            )

        if (interval[1] <= 0.) or (interval[1] > 100.):
            raise ValueError(
                f"Upper interval bound ({interval[1]}) must be > 0 and <= 100."
            )

        if interval[0] >= interval[1]:
            raise ValueError(
                f"Lower interval bound ({interval[0]}) must be less than the "
                f"upper interval bound ({interval[1]})."
            )

        if ensure_symmetric_intervals and interval[0] + interval[1] != 100:
            raise ValueError(
                f"Interval must be symmetric, the sum of the lower, ({interval[0]}), "
                f"and upper, ({interval[1]}), interval bounds must be equal to "
                f"100. Got {interval[0] + interval[1]}."
            )

    if quantiles is not None:
        if not isinstance(quantiles, (list, tuple)):
            raise TypeError(
                "`quantiles` must be a `list` or `tuple`. For example, quantiles "
                "0.05, 0.5, and 0.95 should be as `quantiles = [0.05, 0.5, 0.95]`."
            )

        for q in quantiles:
            if (q < 0.) or (q > 1.):
                raise ValueError(
                    "All elements in `quantiles` must be >= 0 and <= 1."
                )

    if alpha is not None:
        if not isinstance(alpha, float):
            raise TypeError(
                f"`{alpha_literal}` must be a `float`. For example, interval of 95% "
                f"should be as `alpha = 0.05`."
            )

        if (alpha <= 0.) or (alpha >= 1):
            raise ValueError(
                f"`{alpha_literal}` must have a value between 0 and 1. Got {alpha}."
            )

skforecast.utils.utils.check_predict_input ¶


check_predict_input(
    forecaster_name,
    steps,
    is_fitted,
    exog_in_,
    index_type_,
    index_freq_,
    window_size,
    last_window,
    last_window_exog=None,
    exog=None,
    exog_names_in_=None,
    interval=None,
    alpha=None,
    max_step=None,
    levels=None,
    levels_forecaster=None,
    series_names_in_=None,
    encoding=None,
)

Check all inputs of predict method. This is a helper function to validate that inputs used in predict method match attributes of a forecaster already trained.

Parameters:

Name	Type	Description	Default
`forecaster_name`	`str`	Forecaster name.	required
`steps`	`(int, list)`	Number of future steps predicted.	required
`is_fitted`	`bool`	Tag to identify if the estimator has been fitted (trained).	required
`exog_in_`	`bool`	If the forecaster has been trained using exogenous variable/s.	required
`index_type_`	`type`	Type of index of the input used in training.	required
`index_freq_`	`str`	Frequency of Index of the input used in training.	required
`window_size`	`int`	Size of the window needed to create the predictors. It is equal to `max_lag`.	required
`last_window`	`pandas Series, pandas DataFrame, None`	Values of the series used to create the predictors (lags) need in the first iteration of prediction (t + 1).	required
`last_window_exog`	`pandas Series, pandas DataFrame`	Values of the exogenous variables aligned with `last_window` in ForecasterStats predictions.	`None`
`exog`	`pandas Series, pandas DataFrame, dict`	Exogenous variable/s included as predictor/s.	`None`
`exog_names_in_`	`list`	Names of the exogenous variables used during training.	`None`
`interval`	`(list, tuple)`	Confidence of the prediction interval estimated. Sequence of percentiles to compute, which must be between 0 and 100 inclusive. For example, interval of 95% should be as `interval = [2.5, 97.5]`.	`None`
`alpha`	`float`	The confidence intervals used in ForecasterStats are (1 - alpha) %.	`None`
`max_step`	`int \| None`	Maximum number of steps allowed (`ForecasterDirect` and `ForecasterDirectMultiVariate`).	`None`
`levels`	`(str, list)`	Time series to be predicted (`ForecasterRecursiveMultiSeries` and `ForecasterRnn).	`None`
`levels_forecaster`	`(str, list)`	Time series used as output data of a multiseries problem in a RNN problem (`ForecasterRnn`).	`None`
`series_names_in_`	`list`	Names of the columns used during fit (`ForecasterRecursiveMultiSeries`, `ForecasterDirectMultiVariate` and `ForecasterRnn`).	`None`
`encoding`	`str`	Encoding used to identify the different series (`ForecasterRecursiveMultiSeries`).	`None`

Returns:

Type	Description
`None`

Source code in skforecast/utils/utils.py

def check_predict_input(
    forecaster_name: str,
    steps: int | list[int],
    is_fitted: bool,
    exog_in_: bool,
    index_type_: type,
    index_freq_: str,
    window_size: int,
    last_window: pd.Series | pd.DataFrame | None,
    last_window_exog: pd.Series | pd.DataFrame | None = None,
    exog: pd.Series | pd.DataFrame | dict[str, pd.Series | pd.DataFrame] | None = None,
    exog_names_in_: list[str] | None = None,
    interval: list[float] | None = None,
    alpha: float | None = None,
    max_step: int | None = None,
    levels: str | list[str] | None = None,
    levels_forecaster: str | list[str] | None = None,
    series_names_in_: list[str] | None = None,
    encoding: str | None = None
) -> None:
    """
    Check all inputs of predict method. This is a helper function to validate
    that inputs used in predict method match attributes of a forecaster already
    trained.

    Parameters
    ----------
    forecaster_name : str
        Forecaster name.
    steps : int, list
        Number of future steps predicted.
    is_fitted: bool
        Tag to identify if the estimator has been fitted (trained).
    exog_in_ : bool
        If the forecaster has been trained using exogenous variable/s.
    index_type_ : type
        Type of index of the input used in training.
    index_freq_ : str
        Frequency of Index of the input used in training.
    window_size: int
        Size of the window needed to create the predictors. It is equal to 
        `max_lag`.
    last_window : pandas Series, pandas DataFrame, None
        Values of the series used to create the predictors (lags) need in the 
        first iteration of prediction (t + 1).
    last_window_exog : pandas Series, pandas DataFrame, default None
        Values of the exogenous variables aligned with `last_window` in 
        ForecasterStats predictions.
    exog : pandas Series, pandas DataFrame, dict, default None
        Exogenous variable/s included as predictor/s.
    exog_names_in_ : list, default None
        Names of the exogenous variables used during training.
    interval : list, tuple, default None
        Confidence of the prediction interval estimated. Sequence of percentiles
        to compute, which must be between 0 and 100 inclusive. For example, 
        interval of 95% should be as `interval = [2.5, 97.5]`.
    alpha : float, default None
        The confidence intervals used in ForecasterStats are (1 - alpha) %.
    max_step: int, default None
        Maximum number of steps allowed (`ForecasterDirect` and 
        `ForecasterDirectMultiVariate`).
    levels : str, list, default None
        Time series to be predicted (`ForecasterRecursiveMultiSeries`
        and `ForecasterRnn).
    levels_forecaster : str, list, default None
        Time series used as output data of a multiseries problem in a RNN problem
        (`ForecasterRnn`).
    series_names_in_ : list, default None
        Names of the columns used during fit (`ForecasterRecursiveMultiSeries`, 
        `ForecasterDirectMultiVariate` and `ForecasterRnn`).
    encoding : str, default None
        Encoding used to identify the different series (`ForecasterRecursiveMultiSeries`).

    Returns
    -------
    None

    """

    if not is_fitted:
        raise NotFittedError(
            "This Forecaster instance is not fitted yet. Call `fit` with "
            "appropriate arguments before using predict."
        )

    if isinstance(steps, (int, np.integer)) and steps < 1:
        raise ValueError(
            f"`steps` must be an integer greater than or equal to 1. Got {steps}."
        )

    if isinstance(steps, list) and min(steps) < 1:
        raise ValueError(
           f"The minimum value of `steps` must be equal to or greater than 1. "
           f"Got {min(steps)}."
        )

    if max_step is not None:
        if max(steps) > max_step:
            raise ValueError(
                f"The maximum value of `steps` must be less than or equal to "
                f"the value of steps defined when initializing the forecaster. "
                f"Got {max(steps)}, but the maximum is {max_step}."
            )

    if interval is not None or alpha is not None:
        check_interval(interval=interval, alpha=alpha)

    if forecaster_name in ['ForecasterRecursiveMultiSeries', 'ForecasterRnn']:
        if not isinstance(levels, (type(None), str, list)):
            raise TypeError(
                "`levels` must be a `list` of column names, a `str` of a "
                "column name or `None`."
            )

        levels_to_check = (
            levels_forecaster if forecaster_name == 'ForecasterRnn'
            else series_names_in_
        )
        unknown_levels = set(levels) - set(levels_to_check)
        if forecaster_name == 'ForecasterRnn':
            if len(unknown_levels) != 0:
                raise ValueError(
                    f"`levels` names must be included in the series used during fit "
                    f"({levels_to_check}). Got {levels}."
                )
        else:
            if len(unknown_levels) != 0 and last_window is not None and encoding is not None:
                if encoding == 'onehot':
                    warnings.warn(
                        f"`levels` {unknown_levels} were not included in training. The resulting "
                        f"one-hot encoded columns for this feature will be all zeros.",
                        UnknownLevelWarning
                    )
                else:
                    warnings.warn(
                        f"`levels` {unknown_levels} were not included in training. "
                        f"Unknown levels are encoded as NaN, which may cause the "
                        f"prediction to fail if the estimator does not accept NaN values.",
                        UnknownLevelWarning
                    )

    if exog is None and exog_in_:
        raise ValueError(
            "Forecaster trained with exogenous variable/s. "
            "Same variable/s must be provided when predicting."
        )

    if exog is not None and not exog_in_:
        raise ValueError(
            "Forecaster trained without exogenous variable/s. "
            "`exog` must be `None` when predicting."
        )

    # Checks last_window
    # Check last_window type (pd.Series or pd.DataFrame according to forecaster)
    if isinstance(last_window, type(None)) and forecaster_name not in [
        'ForecasterRecursiveMultiSeries', 
        'ForecasterRnn'
    ]:
        raise ValueError(
            "`last_window` was not stored during training. If you don't want "
            "to retrain the Forecaster, provide `last_window` as argument."
        )

    if forecaster_name in [
        'ForecasterRecursiveMultiSeries', 
        'ForecasterDirectMultiVariate',
        'ForecasterRnn'
    ]:
        if not isinstance(last_window, pd.DataFrame):
            raise TypeError(
                f"`last_window` must be a pandas DataFrame. Got {type(last_window)}."
            )

        last_window_cols = last_window.columns.to_list()

        if (
            forecaster_name in ["ForecasterRecursiveMultiSeries", "ForecasterRnn"]
            and len(set(levels) - set(last_window_cols)) != 0
        ):
            missing_levels = set(levels) - set(last_window_cols)
            raise ValueError(
                f"`last_window` must contain a column(s) named as the level(s) to be predicted. "
                f"The following `levels` are missing in `last_window`: {missing_levels}\n"
                f"Ensure that `last_window` contains all the necessary columns "
                f"corresponding to the `levels` being predicted.\n"
                f"    Argument `levels`     : {levels}\n"
                f"    `last_window` columns : {last_window_cols}\n"
                f"Example: If `levels = ['series_1', 'series_2']`, make sure "
                f"`last_window` includes columns named 'series_1' and 'series_2'."
            )

        if forecaster_name == 'ForecasterDirectMultiVariate':
            if len(set(series_names_in_) - set(last_window_cols)) > 0:
                raise ValueError(
                    f"`last_window` columns must be the same as the `series` "
                    f"column names used to create the X_train matrix.\n"
                    f"    `last_window` columns    : {last_window_cols}\n"
                    f"    `series` columns X train : {series_names_in_}"
                )
    else:
        if not isinstance(last_window, (pd.Series, pd.DataFrame)):
            raise TypeError(
                f"`last_window` must be a pandas Series or DataFrame. "
                f"Got {type(last_window)}."
            )

    # Check last_window len, nulls and index (type and freq)
    if len(last_window) < window_size:
        raise ValueError(
            f"`last_window` must have as many values as needed to "
            f"generate the predictors. For this forecaster it is {window_size}."
        )
    if last_window.isna().to_numpy().any():
        warnings.warn(
            "`last_window` has missing values. Most of machine learning models do "
            "not allow missing values. Prediction method may fail.", 
            MissingValuesWarning
        )

    _, last_window_index = check_extract_values_and_index(
        data=last_window, data_label='`last_window`', ignore_freq=False, return_values=False
    )
    if not isinstance(last_window_index, index_type_):
        raise TypeError(
            f"Expected index of type {index_type_} for `last_window`. "
            f"Got {type(last_window_index)}."
        )
    if isinstance(last_window_index, pd.DatetimeIndex):
        if not last_window_index.freq == index_freq_:
            raise TypeError(
                f"Expected frequency of type {index_freq_} for `last_window`. "
                f"Got {last_window_index.freq}."
            )

    # Checks exog
    if exog is not None:

        # Check type, nulls and expected type
        if forecaster_name in ['ForecasterRecursiveMultiSeries']:
            if not isinstance(exog, (pd.Series, pd.DataFrame, dict)):
                raise TypeError(
                    f"`exog` must be a pandas Series, DataFrame or dict. Got {type(exog)}."
                )
        else:
            if not isinstance(exog, (pd.Series, pd.DataFrame)):
                raise TypeError(
                    f"`exog` must be a pandas Series or DataFrame. Got {type(exog)}."
                )

        if isinstance(exog, dict):
            no_exog_levels = set(levels) - set(exog.keys())
            if no_exog_levels:
                warnings.warn(
                    f"`exog` does not contain keys for levels {no_exog_levels}. "
                    f"Missing levels are filled with NaN. Most of machine learning "
                    f"models do not allow missing values. Prediction method may fail.",
                    MissingExogWarning
                )
            exogs_to_check = [
                (f"`exog` for series '{k}'", v) 
                for k, v in exog.items() 
                if v is not None and k in levels
            ]
        else:
            exogs_to_check = [('`exog`', exog)]

        last_step = max(steps) if isinstance(steps, list) else steps
        expected_index = expand_index(last_window_index, 1)[0]
        for exog_name, exog_to_check in exogs_to_check:

            if not isinstance(exog_to_check, (pd.Series, pd.DataFrame)):
                raise TypeError(
                    f"{exog_name} must be a pandas Series or DataFrame. Got {type(exog_to_check)}"
                )

            if exog_to_check.isna().to_numpy().any():
                warnings.warn(
                    f"{exog_name} has missing values. Most of machine learning models "
                    f"do not allow missing values. Prediction method may fail.", 
                    MissingValuesWarning
                )

            # Check exog has many values as distance to max step predicted
            if len(exog_to_check) < last_step:
                if forecaster_name in ['ForecasterRecursiveMultiSeries']:
                    warnings.warn(
                        f"{exog_name} doesn't have as many values as steps "
                        f"predicted, {last_step}. Missing values are filled "
                        f"with NaN. Most of machine learning models do not "
                        f"allow missing values. Prediction method may fail.",
                        MissingValuesWarning
                    )
                else: 
                    raise ValueError(
                        f"{exog_name} must have at least as many values as "
                        f"steps predicted, {last_step}."
                    )

            # Check name/columns are in exog_names_in_
            if isinstance(exog_to_check, pd.DataFrame):
                col_missing = set(exog_names_in_).difference(set(exog_to_check.columns))
                if col_missing:
                    if forecaster_name in ['ForecasterRecursiveMultiSeries']:
                        warnings.warn(
                            f"{col_missing} not present in {exog_name}. All "
                            f"values will be NaN.",
                            MissingExogWarning
                        ) 
                    else:
                        raise ValueError(
                            f"Missing columns in {exog_name}. Expected {exog_names_in_}. "
                            f"Got {exog_to_check.columns.to_list()}."
                        )
            else:
                if exog_to_check.name is None:
                    raise ValueError(
                        f"When {exog_name} is a pandas Series, it must have a name. Got None."
                    )

                if exog_to_check.name not in exog_names_in_:
                    if forecaster_name in ['ForecasterRecursiveMultiSeries']:
                        warnings.warn(
                            f"'{exog_to_check.name}' was not observed during training. "
                            f"{exog_name} is ignored. Exogenous variables must be one "
                            f"of: {exog_names_in_}.",
                            IgnoredArgumentWarning
                        )
                    else:
                        raise ValueError(
                            f"'{exog_to_check.name}' was not observed during training. "
                            f"Exogenous variables must be: {exog_names_in_}."
                        )

            # Check index dtype and freq
            _, exog_index = check_extract_values_and_index(
                data=exog_to_check, data_label=exog_name, ignore_freq=True, return_values=False
            )
            if not isinstance(exog_index, index_type_):
                raise TypeError(
                    f"Expected index of type {index_type_} for {exog_name}. "
                    f"Got {type(exog_index)}."
                )

            # Check exog starts one step ahead of last_window end.
            if expected_index != exog_index[0]:
                if forecaster_name in ['ForecasterRecursiveMultiSeries']:
                    warnings.warn(
                        f"To make predictions {exog_name} must start one step "
                        f"ahead of `last_window`. Missing values are filled "
                        f"with NaN.\n"
                        f"    `last_window` ends at : {last_window.index[-1]}.\n"
                        f"    {exog_name} starts at : {exog_index[0]}.\n"
                        f"    Expected index : {expected_index}.",
                        MissingValuesWarning
                    )  
                else:
                    raise ValueError(
                        f"To make predictions {exog_name} must start one step "
                        f"ahead of `last_window`.\n"
                        f"    `last_window` ends at : {last_window.index[-1]}.\n"
                        f"    {exog_name} starts at : {exog_index[0]}.\n"
                        f"    Expected index : {expected_index}."
                    )

    # Checks ForecasterStats
    if forecaster_name == 'ForecasterStats':
        # Check last_window_exog type, len, nulls and index (type and freq)
        if last_window_exog is not None:
            if not exog_in_:
                raise ValueError(
                    "Forecaster trained without exogenous variable/s. "
                    "`last_window_exog` must be `None` when predicting."
                )

            if not isinstance(last_window_exog, (pd.Series, pd.DataFrame)):
                raise TypeError(
                    f"`last_window_exog` must be a pandas Series or a "
                    f"pandas DataFrame. Got {type(last_window_exog)}."
                )
            if len(last_window_exog) < window_size:
                raise ValueError(
                    f"`last_window_exog` must have as many values as needed to "
                    f"generate the predictors. For this forecaster it is {window_size}."
                )
            if last_window_exog.isna().to_numpy().any():
                warnings.warn(
                    "`last_window_exog` has missing values. Most of machine learning "
                    "models do not allow missing values. Prediction method may fail.",
                    MissingValuesWarning
            )
            _, last_window_exog_index = check_extract_values_and_index(
                data=last_window_exog, data_label='`last_window_exog`', return_values=False
            )
            if not isinstance(last_window_exog_index, index_type_):
                raise TypeError(
                    f"Expected index of type {index_type_} for `last_window_exog`. "
                    f"Got {type(last_window_exog_index)}."
                )
            if isinstance(last_window_exog_index, pd.DatetimeIndex):
                if not last_window_exog_index.freq == index_freq_:
                    raise TypeError(
                        f"Expected frequency of type {index_freq_} for "
                        f"`last_window_exog`. Got {last_window_exog_index.freq}."
                    )

            # Check all columns are in the pd.DataFrame, last_window_exog
            if isinstance(last_window_exog, pd.DataFrame):
                col_missing = set(exog_names_in_).difference(set(last_window_exog.columns))
                if col_missing:
                    raise ValueError(
                        f"Missing columns in `last_window_exog`. Expected {exog_names_in_}. "
                        f"Got {last_window_exog.columns.to_list()}."
                    )
            else:
                if last_window_exog.name is None:
                    raise ValueError(
                        "When `last_window_exog` is a pandas Series, it must have a "
                        "name. Got None."
                    )

                if last_window_exog.name not in exog_names_in_:
                    raise ValueError(
                        f"'{last_window_exog.name}' was not observed during training. "
                        f"Exogenous variables must be: {exog_names_in_}."
                    )

skforecast.utils.utils.check_residuals_input ¶


check_residuals_input(
    forecaster_name,
    use_in_sample_residuals,
    in_sample_residuals_,
    out_sample_residuals_,
    use_binned_residuals,
    in_sample_residuals_by_bin_,
    out_sample_residuals_by_bin_,
    levels=None,
    encoding=None,
)

Check residuals input arguments in Forecasters.

Parameters:

Name	Type	Description	Default
`forecaster_name`	`str`	Forecaster name.	required
`use_in_sample_residuals`	`bool`	Indicates if in sample or out sample residuals are used.	required
`in_sample_residuals_`	`numpy ndarray, dict`	Residuals of the model when predicting training data.	required
`out_sample_residuals_`	`numpy ndarray, dict`	Residuals of the model when predicting non training data.	required
`use_binned_residuals`	`bool`	Indicates if residuals are binned.	required
`in_sample_residuals_by_bin_`	`dict`	In sample residuals binned according to the predicted value each residual is associated with.	required
`out_sample_residuals_by_bin_`	`dict`	Out of sample residuals binned according to the predicted value each residual is associated with.	required
`levels`	`list`	Names of the series (levels) to be predicted (Forecasters multiseries).	`None`
`encoding`	`str`	Encoding used to identify the different series (ForecasterRecursiveMultiSeries).	`None`

Returns:

Type	Description
`None`

Source code in skforecast/utils/utils.py

def check_residuals_input(
    forecaster_name: str,
    use_in_sample_residuals: bool,
    in_sample_residuals_: np.ndarray | dict[str, np.ndarray] | None,
    out_sample_residuals_: np.ndarray | dict[str, np.ndarray] | None,
    use_binned_residuals: bool,
    in_sample_residuals_by_bin_: dict[str | int, np.ndarray | dict[int, np.ndarray]] | None,
    out_sample_residuals_by_bin_: dict[str | int, np.ndarray | dict[int, np.ndarray]] | None,
    levels: list[str] | None = None,
    encoding: str | None = None
) -> None:
    """
    Check residuals input arguments in Forecasters.

    Parameters
    ----------
    forecaster_name : str
        Forecaster name.
    use_in_sample_residuals : bool
        Indicates if in sample or out sample residuals are used.
    in_sample_residuals_ : numpy ndarray, dict
        Residuals of the model when predicting training data.
    out_sample_residuals_ : numpy ndarray, dict
        Residuals of the model when predicting non training data.
    use_binned_residuals : bool
        Indicates if residuals are binned.
    in_sample_residuals_by_bin_ : dict
        In sample residuals binned according to the predicted value each residual
        is associated with.
    out_sample_residuals_by_bin_ : dict
        Out of sample residuals binned according to the predicted value each residual
        is associated with.
    levels : list, default None
        Names of the series (levels) to be predicted (Forecasters multiseries).
    encoding : str, default None
        Encoding used to identify the different series (ForecasterRecursiveMultiSeries).

    Returns
    -------
    None

    """

    forecasters_multiseries = [
        'ForecasterRecursiveMultiSeries',
        'ForecasterDirectMultiVariate',
        'ForecasterRnn'
    ]

    if use_in_sample_residuals:
        if use_binned_residuals:
            residuals = in_sample_residuals_by_bin_
            literal = "in_sample_residuals_by_bin_"
        else:
            residuals = in_sample_residuals_
            literal = "in_sample_residuals_"

        if (
            residuals is None
            or (isinstance(residuals, dict) and not residuals)
            or (isinstance(residuals, np.ndarray) and residuals.size == 0)
        ):
            raise ValueError(
                f"`forecaster.{literal}` is either None or empty. Use "
                f"`store_in_sample_residuals = True` when fitting the forecaster "
                f"or use the `set_in_sample_residuals()` method before predicting."
            )

        if forecaster_name in forecasters_multiseries:
            if encoding is not None:
                unknown_levels = set(levels) - set(residuals.keys())
                if unknown_levels:
                    warnings.warn(
                        f"`levels` {unknown_levels} are not present in `forecaster.{literal}`, "
                        f"most likely because they were not present in the training data. "
                        f"A random sample of the residuals from other levels will be used. "
                        f"This can lead to inaccurate intervals for the unknown levels.",
                        UnknownLevelWarning
                    )
    else:
        if use_binned_residuals:
            residuals = out_sample_residuals_by_bin_
            literal = "out_sample_residuals_by_bin_"
        else:
            residuals = out_sample_residuals_
            literal = "out_sample_residuals_"

        if (
            residuals is None
            or (isinstance(residuals, dict) and not residuals)
            or (isinstance(residuals, np.ndarray) and residuals.size == 0)
        ):
            raise ValueError(
                f"`forecaster.{literal}` is either None or empty. Use "
                f"`use_in_sample_residuals = True` or the "
                f"`set_out_sample_residuals()` method before predicting."
            )

        if forecaster_name in forecasters_multiseries:
            if encoding is not None:
                unknown_levels = set(levels) - set(residuals.keys())
                if unknown_levels:
                    warnings.warn(
                        f"`levels` {unknown_levels} are not present in `forecaster.{literal}`. "
                        f"A random sample of the residuals from other levels will be used. "
                        f"This can lead to inaccurate intervals for the unknown levels. "
                        f"Otherwise, Use the `set_out_sample_residuals()` method before "
                        f"predicting to set the residuals for these levels.",
                        UnknownLevelWarning
                    )

    if forecaster_name in forecasters_multiseries:
        for level in residuals.keys():
            if residuals[level] is None or len(residuals[level]) == 0:
                raise ValueError(
                    f"Residuals for level '{level}' are None. Check `forecaster.{literal}`."
                )

skforecast.utils.utils.check_extract_values_and_index ¶


check_extract_values_and_index(
    data,
    data_label="`y`",
    ignore_freq=False,
    return_values=True,
)

Return values and index of series separately. Check that index is a pandas DatetimeIndex or RangeIndex. Optionally, check that the index has a frequency.

Parameters:

Name	Type	Description	Default
`data`	`pandas Series, pandas DataFrame`	Time series.	required
`data_label`	`str`	Label of the data to be used in warnings and errors.	'`y`'
`ignore_freq`	`bool`	If `True`, ignore the frequency of the index. If `False`, check that the index is a pandas `DatetimeIndex` with a frequency.	`False`
`return_values`	`bool`	If `True` return the values of `data` as numpy ndarray. This option is intended to avoid copying data when it is not necessary.	`True`

Returns:

Name	Type	Description
`data_values`	`numpy ndarray, None`	Numpy array with values of `data`.
`data_index`	`pandas Index`	Index of `data`.

Source code in skforecast/utils/utils.py

def check_extract_values_and_index(
    data: pd.Series | pd.DataFrame,
    data_label: str = '`y`',
    ignore_freq: bool = False,
    return_values: bool = True
) -> tuple[np.ndarray | None, pd.Index]:
    """
    Return values and index of series separately. Check that index is a pandas
    `DatetimeIndex` or `RangeIndex`. Optionally, check that the index has a
    frequency.

    Parameters
    ----------
    data : pandas Series, pandas DataFrame
        Time series.
    data_label : str, default '`y`'
        Label of the data to be used in warnings and errors.
    ignore_freq : bool, default False
        If `True`, ignore the frequency of the index. If `False`, check that the
        index is a pandas `DatetimeIndex` with a frequency.
    return_values : bool, default True
        If `True` return the values of `data` as numpy ndarray. This option is
        intended to avoid copying data when it is not necessary.

    Returns
    -------
    data_values : numpy ndarray, None
        Numpy array with values of `data`.
    data_index : pandas Index
        Index of `data`.

    """

    if isinstance(data.index, pd.DatetimeIndex):            
        if not ignore_freq and data.index.freq is None:
            raise ValueError(
                f"{data_label} has a pandas DatetimeIndex without a frequency. "
                f"To avoid this error, set the frequency of the DatetimeIndex."
            )
        data_index = data.index
    elif isinstance(data.index, pd.RangeIndex):
        data_index = data.index
    else:
        raise TypeError(
            f"{data_label} has an unsupported index type. The index must be a "
            f"pandas DatetimeIndex or a RangeIndex. Got {type(data.index)}."
        )

    data_values = data.to_numpy(copy=True).ravel() if return_values else None

    return data_values, data_index

skforecast.utils.utils.cast_exog_dtypes ¶


cast_exog_dtypes(exog, exog_dtypes)

Cast exog to a specified types. This is done because, for a forecaster to accept a categorical exog, it must contain only integer values. Due to the internal modifications of numpy, the values may be casted to float, so they have to be re-converted to int.

If exog is a pandas Series, exog_dtypes must be a dict with a single value.
If exog_dtypes is category but the current type of exog is float, then the type is cast to int and then to category.

Parameters:

Name	Type	Description	Default
`exog`	`pandas Series, pandas DataFrame`	Exogenous variables.	required
`exog_dtypes`	`dict[str, type]`	Dictionary with name and type of the series or data frame columns.	required

Returns:

Name	Type	Description
`exog`	`pandas Series, pandas DataFrame`	Exogenous variables casted to the indicated dtypes.

Source code in skforecast/utils/utils.py

def cast_exog_dtypes(
    exog: pd.Series | pd.DataFrame,
    exog_dtypes: dict[str, type],
) -> pd.Series | pd.DataFrame:  # pragma: no cover
    """
    Cast `exog` to a specified types. This is done because, for a forecaster to 
    accept a categorical exog, it must contain only integer values. Due to the 
    internal modifications of numpy, the values may be casted to `float`, so 
    they have to be re-converted to `int`.

    - If `exog` is a pandas Series, `exog_dtypes` must be a dict with a 
    single value.
    - If `exog_dtypes` is `category` but the current type of `exog` is `float`, 
    then the type is cast to `int` and then to `category`. 

    Parameters
    ----------
    exog : pandas Series, pandas DataFrame
        Exogenous variables.
    exog_dtypes: dict
        Dictionary with name and type of the series or data frame columns.

    Returns
    -------
    exog : pandas Series, pandas DataFrame
        Exogenous variables casted to the indicated dtypes.

    """

    # Remove keys from exog_dtypes not in exog.columns
    exog_dtypes = {k: v for k, v in exog_dtypes.items() if k in exog.columns}

    if isinstance(exog, pd.Series) and exog.dtypes != list(exog_dtypes.values())[0]:
        exog = exog.astype(list(exog_dtypes.values())[0])
    elif isinstance(exog, pd.DataFrame):
        for col, initial_dtype in exog_dtypes.items():
            if exog[col].dtypes != initial_dtype:
                if initial_dtype == "category" and exog[col].dtypes == float:
                    exog[col] = exog[col].astype(int).astype("category")
                else:
                    exog[col] = exog[col].astype(initial_dtype)

    return exog

skforecast.utils.utils.exog_to_direct ¶


exog_to_direct(exog, steps)

Transforms exog to a pandas DataFrame with the shape needed for Direct forecasting.

Parameters:

Name	Type	Description	Default
`exog`	`pandas Series, pandas DataFrame`	Exogenous variables.	required
`steps`	`int`	Number of steps that will be predicted using exog.	required

Returns:

Name	Type	Description
`exog_direct`	`pandas DataFrame`	Exogenous variables transformed.
`exog_direct_names`	`list`	Names of the columns of the exogenous variables transformed. Only created if `exog` is a pandas Series or DataFrame.

Source code in skforecast/utils/utils.py

def exog_to_direct(
    exog: pd.Series | pd.DataFrame,
    steps: int
) -> tuple[pd.DataFrame, list[str]]:
    """
    Transforms `exog` to a pandas DataFrame with the shape needed for Direct
    forecasting.

    Parameters
    ----------
    exog : pandas Series, pandas DataFrame
        Exogenous variables.
    steps : int
        Number of steps that will be predicted using exog.

    Returns
    -------
    exog_direct : pandas DataFrame
        Exogenous variables transformed.
    exog_direct_names : list
        Names of the columns of the exogenous variables transformed. Only 
        created if `exog` is a pandas Series or DataFrame.

    """

    if not isinstance(exog, (pd.Series, pd.DataFrame)):
        raise TypeError(f"`exog` must be a pandas Series or DataFrame. Got {type(exog)}.")

    if isinstance(exog, pd.Series):
        exog = exog.to_frame()

    n_rows = len(exog)
    exog_idx = exog.index
    exog_cols = exog.columns
    exog_direct = []
    for i in range(steps):
        exog_step = exog.iloc[i : n_rows - (steps - 1 - i), ]
        exog_step.index = pd.RangeIndex(len(exog_step))
        exog_step.columns = [f"{col}_step_{i + 1}" for col in exog_cols]
        exog_direct.append(exog_step)

    if len(exog_direct) > 1:
        exog_direct = pd.concat(exog_direct, axis=1, copy=False)
    else:
        exog_direct = exog_direct[0]

    exog_direct_names = exog_direct.columns.to_list()
    exog_direct.index = exog_idx[-len(exog_direct):]

    return exog_direct, exog_direct_names

skforecast.utils.utils.exog_to_direct_numpy ¶


exog_to_direct_numpy(exog, steps)

Transforms exog to numpy ndarray with the shape needed for Direct forecasting.

Parameters:

Name	Type	Description	Default
`exog`	`numpy ndarray, pandas Series, pandas DataFrame`	Exogenous variables, shape(samples,). If exog is a pandas format, the direct exog names are created.	required
`steps`	`int`	Number of steps that will be predicted using exog.	required

Returns:

Name	Type	Description
`exog_direct`	`numpy ndarray`	Exogenous variables transformed.
`exog_direct_names`	`(list, None)`	Names of the columns of the exogenous variables transformed. Only created if `exog` is a pandas Series or DataFrame.

Source code in skforecast/utils/utils.py

def exog_to_direct_numpy(
    exog: np.ndarray | pd.Series | pd.DataFrame,
    steps: int
) -> tuple[np.ndarray, list[str] | None]:
    """
    Transforms `exog` to numpy ndarray with the shape needed for Direct
    forecasting.

    Parameters
    ----------
    exog : numpy ndarray, pandas Series, pandas DataFrame
        Exogenous variables, shape(samples,). If exog is a pandas format, the 
        direct exog names are created.
    steps : int
        Number of steps that will be predicted using exog.

    Returns
    -------
    exog_direct : numpy ndarray
        Exogenous variables transformed.
    exog_direct_names : list, None
        Names of the columns of the exogenous variables transformed. Only 
        created if `exog` is a pandas Series or DataFrame.

    """

    if isinstance(exog, (pd.Series, pd.DataFrame)):
        exog_cols = exog.columns if isinstance(exog, pd.DataFrame) else [exog.name]
        exog_direct_names = [
            f"{col}_step_{i + 1}" for i in range(steps) for col in exog_cols
        ]
        exog = exog.to_numpy()
    else:
        exog_direct_names = None
        if not isinstance(exog, np.ndarray):
            raise TypeError(
                f"`exog` must be a numpy ndarray, pandas Series or DataFrame. "
                f"Got {type(exog)}."
            )

    if exog.ndim == 1:
        exog = np.expand_dims(exog, axis=1)

    n_rows = len(exog)
    exog_direct = []
    for i in range(steps):
        exog_step = exog[i : n_rows - (steps - 1 - i)]
        exog_direct.append(exog_step)

    if len(exog_direct) > 1:
        exog_direct = np.concatenate(exog_direct, axis=1)
    else:
        exog_direct = exog_direct[0]

    return exog_direct, exog_direct_names

skforecast.utils.utils.expand_index ¶


expand_index(index, steps)

Create a new index of length steps starting at the end of the index.

Parameters:

Name	Type	Description	Default
`index`	`pandas Index, None`	Original index.	required
`steps`	`int`	Number of steps to expand.	required

Returns:

Name	Type	Description
`new_index`	`pandas Index`	New index.

Source code in skforecast/utils/utils.py

def expand_index(
    index: pd.Index | None, 
    steps: int
) -> pd.Index:
    """
    Create a new index of length `steps` starting at the end of the index.

    Parameters
    ----------
    index : pandas Index, None
        Original index.
    steps : int
        Number of steps to expand.

    Returns
    -------
    new_index : pandas Index
        New index.

    """

    if not isinstance(steps, (int, np.integer)):
        raise TypeError(f"`steps` must be an integer. Got {type(steps)}.")

    if isinstance(index, pd.Index):

        if isinstance(index, pd.DatetimeIndex):
            new_index = pd.date_range(
                            start   = index[-1] + index.freq,
                            periods = steps,
                            freq    = index.freq
                        )
        elif isinstance(index, pd.RangeIndex):
            new_index = pd.RangeIndex(
                            start = index[-1] + 1,
                            stop  = index[-1] + 1 + steps
                        )
        else:
            raise TypeError(
                "Argument `index` must be a pandas DatetimeIndex or RangeIndex."
            )
    else:
        new_index = pd.RangeIndex(
                        start = 0,
                        stop  = steps
                    )

    return new_index

skforecast.utils.utils.transform_numpy ¶


transform_numpy(
    array, transformer, fit=False, inverse_transform=False
)

Transform raw values of a numpy ndarray with a scikit-learn alike transformer, preprocessor or ColumnTransformer. The transformer used must have the following methods: fit, transform, fit_transform and inverse_transform. ColumnTransformers are not allowed since they do not have inverse_transform method.

Parameters:

Name	Type	Description	Default
`array`	`numpy ndarray`	Array to be transformed.	required
`transformer`	`scikit-learn alike transformer, preprocessor, or ColumnTransformer.`	Scikit-learn alike transformer (preprocessor) with methods: fit, transform, fit_transform and inverse_transform.	required
`fit`	`bool`	Train the transformer before applying it.	`False`
`inverse_transform`	`bool`	Transform back the data to the original representation. This is not available when using transformers of class scikit-learn ColumnTransformers.	`False`

Returns:

Name	Type	Description
`array_transformed`	`numpy ndarray`	Transformed array.

Source code in skforecast/utils/utils.py

def transform_numpy(
    array: np.ndarray,
    transformer: object | None,
    fit: bool = False,
    inverse_transform: bool = False
) -> np.ndarray:
    """
    Transform raw values of a numpy ndarray with a scikit-learn alike 
    transformer, preprocessor or ColumnTransformer. The transformer used must 
    have the following methods: fit, transform, fit_transform and 
    inverse_transform. ColumnTransformers are not allowed since they do not 
    have inverse_transform method.

    Parameters
    ----------
    array : numpy ndarray
        Array to be transformed.
    transformer : scikit-learn alike transformer, preprocessor, or ColumnTransformer.
        Scikit-learn alike transformer (preprocessor) with methods: fit, transform,
        fit_transform and inverse_transform.
    fit : bool, default False
        Train the transformer before applying it.
    inverse_transform : bool, default False
        Transform back the data to the original representation. This is not available
        when using transformers of class scikit-learn ColumnTransformers.

    Returns
    -------
    array_transformed : numpy ndarray
        Transformed array.

    """

    if not isinstance(array, np.ndarray):
        raise TypeError(
            f"`array` argument must be a numpy ndarray. Got {type(array)}"
        )

    if transformer is None:
        return array

    array_ndim = array.ndim
    if array_ndim == 1:
        array = array.reshape(-1, 1)

    if inverse_transform and isinstance(transformer, ColumnTransformer):
        raise ValueError(
            "`inverse_transform` is not available when using ColumnTransformers."
        )

    with warnings.catch_warnings():
        warnings.filterwarnings(
            "ignore", 
            message="X does not have valid feature names", 
            category=UserWarning
        )
        if not inverse_transform:
            if fit:
                array_transformed = transformer.fit_transform(array)
            else:
                array_transformed = transformer.transform(array)
        else:
            array_transformed = transformer.inverse_transform(array)

    if hasattr(array_transformed, 'toarray'):
        # If the returned values are in sparse matrix format, it is converted to dense
        array_transformed = array_transformed.toarray()

    if isinstance(array_transformed, (pd.Series, pd.DataFrame)):
        array_transformed = array_transformed.to_numpy()

    if array_ndim == 1:
        array_transformed = array_transformed.ravel()

    return array_transformed

skforecast.utils.utils.transform_series ¶


transform_series(
    series, transformer, fit=False, inverse_transform=False
)

Transform raw values of pandas Series with a scikit-learn alike transformer, preprocessor or ColumnTransformer. The transformer used must have the following methods: fit, transform, fit_transform and inverse_transform. ColumnTransformers are not allowed since they do not have inverse_transform method.

Parameters:

Name	Type	Description	Default
`series`	`pandas Series`	Series to be transformed.	required
`transformer`	`scikit-learn alike transformer, preprocessor, or ColumnTransformer.`	Scikit-learn alike transformer (preprocessor) with methods: fit, transform, fit_transform and inverse_transform.	required
`fit`	`bool`	Train the transformer before applying it.	`False`
`inverse_transform`	`bool`	Transform back the data to the original representation. This is not available when using transformers of class scikit-learn ColumnTransformers.	`False`

Returns:

Name	Type	Description
`series_transformed`	`pandas Series, pandas DataFrame`	Transformed Series. Depending on the transformer used, the output may be a Series or a DataFrame.

Source code in skforecast/utils/utils.py

def transform_series(
    series: pd.Series,
    transformer: object | None,
    fit: bool = False,
    inverse_transform: bool = False
) -> pd.Series | pd.DataFrame:
    """
    Transform raw values of pandas Series with a scikit-learn alike 
    transformer, preprocessor or ColumnTransformer. The transformer used must 
    have the following methods: fit, transform, fit_transform and 
    inverse_transform. ColumnTransformers are not allowed since they do not 
    have inverse_transform method.

    Parameters
    ----------
    series : pandas Series
        Series to be transformed.
    transformer : scikit-learn alike transformer, preprocessor, or ColumnTransformer.
        Scikit-learn alike transformer (preprocessor) with methods: fit, transform,
        fit_transform and inverse_transform.
    fit : bool, default False
        Train the transformer before applying it.
    inverse_transform : bool, default False
        Transform back the data to the original representation. This is not available
        when using transformers of class scikit-learn ColumnTransformers.

    Returns
    -------
    series_transformed : pandas Series, pandas DataFrame
        Transformed Series. Depending on the transformer used, the output may 
        be a Series or a DataFrame.

    """

    if not isinstance(series, pd.Series):
        raise TypeError(
            f"`series` argument must be a pandas Series. Got {type(series)}."
        )

    if transformer is None:
        return series

    if series.name is None:
        series.name = 'no_name'

    data = series.to_frame()

    if fit and hasattr(transformer, 'fit'):
        transformer.fit(data)

    # If argument feature_names_in_ exits, is overwritten to allow using the 
    # transformer on other series than those that were passed during fit.
    if hasattr(transformer, 'feature_names_in_') and transformer.feature_names_in_[0] != data.columns[0]:
        transformer = deepcopy(transformer)
        transformer.feature_names_in_ = np.array([data.columns[0]], dtype=object)

    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=UserWarning)
        if inverse_transform:
            values_transformed = transformer.inverse_transform(data)
        else:
            values_transformed = transformer.transform(data)   

    if hasattr(values_transformed, 'toarray'):
        # If the returned values are in sparse matrix format, it is converted to dense array.
        values_transformed = values_transformed.toarray()

    if isinstance(values_transformed, np.ndarray) and values_transformed.shape[1] == 1:
        series_transformed = pd.Series(
                                 data  = values_transformed.ravel(),
                                 index = data.index,
                                 name  = data.columns[0]
                             )
    elif isinstance(values_transformed, pd.DataFrame) and values_transformed.shape[1] == 1:
        series_transformed = values_transformed.squeeze()
    else:
        series_transformed = pd.DataFrame(
                                 data    = values_transformed,
                                 index   = data.index,
                                 columns = transformer.get_feature_names_out()
                             )

    return series_transformed

skforecast.utils.utils.transform_dataframe ¶


transform_dataframe(
    df, transformer, fit=False, inverse_transform=False
)

Transform raw values of pandas DataFrame with a scikit-learn alike transformer, preprocessor or ColumnTransformer. The transformer used must have the following methods: fit, transform, fit_transform and inverse_transform. ColumnTransformers are not allowed since they do not have inverse_transform method.

Parameters:

Name	Type	Description	Default
`df`	`pandas DataFrame`	DataFrame to be transformed.	required
`transformer`	`scikit-learn alike transformer, preprocessor, or ColumnTransformer.`	Scikit-learn alike transformer (preprocessor) with methods: fit, transform, fit_transform and inverse_transform.	required
`fit`	`bool`	Train the transformer before applying it.	`False`
`inverse_transform`	`bool`	Transform back the data to the original representation. This is not available when using transformers of class scikit-learn ColumnTransformers.	`False`

Returns:

Name	Type	Description
`df_transformed`	`pandas DataFrame`	Transformed DataFrame.

Source code in skforecast/utils/utils.py

def transform_dataframe(
    df: pd.DataFrame,
    transformer: object | None,
    fit: bool = False,
    inverse_transform: bool = False
) -> pd.DataFrame:
    """
    Transform raw values of pandas DataFrame with a scikit-learn alike 
    transformer, preprocessor or ColumnTransformer. The transformer used must 
    have the following methods: fit, transform, fit_transform and 
    inverse_transform. ColumnTransformers are not allowed since they do not 
    have inverse_transform method.

    Parameters
    ----------
    df : pandas DataFrame
        DataFrame to be transformed.
    transformer : scikit-learn alike transformer, preprocessor, or ColumnTransformer.
        Scikit-learn alike transformer (preprocessor) with methods: fit, transform,
        fit_transform and inverse_transform.
    fit : bool, default False
        Train the transformer before applying it.
    inverse_transform : bool, default False
        Transform back the data to the original representation. This is not available
        when using transformers of class scikit-learn ColumnTransformers.

    Returns
    -------
    df_transformed : pandas DataFrame
        Transformed DataFrame.

    """

    if not isinstance(df, pd.DataFrame):
        raise TypeError(
            f"`df` argument must be a pandas DataFrame. Got {type(df)}"
        )

    if transformer is None:
        return df

    if inverse_transform and isinstance(transformer, ColumnTransformer):
        raise ValueError(
            "`inverse_transform` is not available when using ColumnTransformers."
        )

    if not inverse_transform:
        if fit:
            values_transformed = transformer.fit_transform(df)
        else:
            values_transformed = transformer.transform(df)
    else:
        values_transformed = transformer.inverse_transform(df)

    if hasattr(values_transformed, 'toarray'):
        # If the returned values are in sparse matrix format, it is converted to dense
        values_transformed = values_transformed.toarray()

    if hasattr(transformer, 'get_feature_names_out'):
        feature_names_out = transformer.get_feature_names_out()
    elif hasattr(transformer, 'categories_'):   
        feature_names_out = transformer.categories_
    else:
        feature_names_out = df.columns

    df_transformed = pd.DataFrame(
                         data    = values_transformed,
                         index   = df.index,
                         columns = feature_names_out
                     )

    return df_transformed

skforecast.utils.utils.check_optional_dependency ¶


check_optional_dependency(package_name)

Check if an optional dependency is installed, if not raise an ImportError
with installation instructions.

Parameters:

Name	Type	Description	Default
`package_name`	`str`	Name of the package to check.	required

Returns:

Type	Description
`None`

Source code in skforecast/utils/utils.py

def check_optional_dependency(
    package_name: str
) -> None:
    """
    Check if an optional dependency is installed, if not raise an ImportError  
    with installation instructions.

    Parameters
    ----------
    package_name : str
        Name of the package to check.

    Returns
    -------
    None

    """

    if find_spec(package_name) is None:
        try:
            extra, package_version = _find_optional_dependency(package_name=package_name)
            msg = (
                f"\n'{package_name}' is an optional dependency not included in the default "
                f"skforecast installation. Please run: `pip install \"{package_version}\"` to install it."
                f"\n\nAlternately, you can install it by running `pip install skforecast[{extra}]`"
            )
        except:
            msg = f"\n'{package_name}' is needed but not installed. Please install it."

        raise ImportError(msg)

skforecast.utils.utils.multivariate_time_series_corr ¶


multivariate_time_series_corr(
    time_series, other, lags, method="pearson"
)

Compute correlation between a time_series and the lagged values of other time series.

Parameters:

Name	Type	Description	Default
`time_series`	`pandas Series`	Target time series.	required
`other`	`pandas DataFrame`	Time series whose lagged values are correlated to `time_series`.	required
`lags`	`int, list, numpy ndarray`	Lags to be included in the correlation analysis.	required
`method`	`str`	'pearson': standard correlation coefficient. 'kendall': Kendall Tau correlation coefficient. 'spearman': Spearman rank correlation.	`'pearson'`

Returns:

Name	Type	Description
`corr`	`pandas DataFrame`	Correlation values.

Source code in skforecast/utils/utils.py

def multivariate_time_series_corr(
    time_series: pd.Series,
    other: pd.DataFrame,
    lags: int | list[int] | np.ndarray[int],
    method: str = 'pearson'
) -> pd.DataFrame:
    """
    Compute correlation between a time_series and the lagged values of other 
    time series. 

    Parameters
    ----------
    time_series : pandas Series
        Target time series.
    other : pandas DataFrame
        Time series whose lagged values are correlated to `time_series`.
    lags : int, list, numpy ndarray
        Lags to be included in the correlation analysis.
    method : str, default 'pearson'
        - 'pearson': standard correlation coefficient.
        - 'kendall': Kendall Tau correlation coefficient.
        - 'spearman': Spearman rank correlation.

    Returns
    -------
    corr : pandas DataFrame
        Correlation values.

    """

    if not len(time_series) == len(other):
        raise ValueError("`time_series` and `other` must have the same length.")

    if not (time_series.index == other.index).all():
        raise ValueError("`time_series` and `other` must have the same index.")

    if isinstance(lags, int):
        lags = range(lags)

    corr = {}
    for col in other.columns:
        lag_values = {}
        for lag in lags:
            lag_values[lag] = other[col].shift(lag)

        lag_values = pd.DataFrame(lag_values)
        lag_values.insert(0, None, time_series)
        corr[col] = lag_values.corr(method=method).iloc[1:, 0]

    corr = pd.DataFrame(corr)
    corr.index = corr.index.astype('int64')
    corr.index.name = "lag"

    return corr

skforecast.utils.utils.select_n_jobs_fit_forecaster ¶


select_n_jobs_fit_forecaster(forecaster_name, estimator)

Select the optimal number of jobs to use in the fitting process. This selection is based on heuristics and is not guaranteed to be optimal.

The number of jobs is chosen as follows:

If forecaster_name is 'ForecasterDirect' or 'ForecasterDirectMultiVariate' and estimator_name is a linear estimator then n_jobs = 1, otherwise n_jobs = cpu_count() - 1.
If estimator is a LGBMRegressor(n_jobs=1), then n_jobs = cpu_count() - 1.
If estimator is a LGBMRegressor with internal n_jobs != 1, then n_jobs = 1. This is because lightgbm is highly optimized for gradient boosting and parallelizes operations at a very fine-grained level, making additional parallelization unnecessary and potentially harmful due to resource contention.

Parameters:

Name	Type	Description	Default
`forecaster_name`	`str`	Forecaster name.	required
`estimator`	`estimator or pipeline compatible with the scikit-learn API`	An instance of a estimator or pipeline compatible with the scikit-learn API.	required

Returns:

Name	Type	Description
`n_jobs`	`int`	The number of jobs to run in parallel.

Source code in skforecast/utils/utils.py

def select_n_jobs_fit_forecaster(
    forecaster_name: str,
    estimator: object
) -> int:
    """
    Select the optimal number of jobs to use in the fitting process. This
    selection is based on heuristics and is not guaranteed to be optimal. 

    The number of jobs is chosen as follows:

    - If forecaster_name is 'ForecasterDirect' or 'ForecasterDirectMultiVariate'
    and estimator_name is a linear estimator then `n_jobs = 1`, 
    otherwise `n_jobs = cpu_count() - 1`.
    - If estimator is a `LGBMRegressor(n_jobs=1)`, then `n_jobs = cpu_count() - 1`.
    - If estimator is a `LGBMRegressor` with internal n_jobs != 1, then `n_jobs = 1`.
    This is because `lightgbm` is highly optimized for gradient boosting and
    parallelizes operations at a very fine-grained level, making additional
    parallelization unnecessary and potentially harmful due to resource contention.

    Parameters
    ----------
    forecaster_name : str
        Forecaster name.
    estimator : estimator or pipeline compatible with the scikit-learn API
        An instance of a estimator or pipeline compatible with the scikit-learn API.

    Returns
    -------
    n_jobs : int
        The number of jobs to run in parallel.

    """

    if isinstance(estimator, Pipeline):
        estimator = estimator[-1]
        estimator_name = type(estimator).__name__
    else:
        estimator_name = type(estimator).__name__

    linear_estimators = [
        estimator_name
        for estimator_name in dir(sklearn.linear_model)
        if not estimator_name.startswith('_')
    ]

    if forecaster_name in ['ForecasterDirect', 'ForecasterDirectMultiVariate']:
        if estimator_name in linear_estimators:
            n_jobs = 1
        elif estimator_name == 'LGBMRegressor':
            n_jobs = joblib.cpu_count() - 1 if estimator.n_jobs == 1 else 1
        else:
            n_jobs = joblib.cpu_count() - 1
    else:
        n_jobs = 1

    return n_jobs

skforecast.utils.utils.check_preprocess_series ¶


check_preprocess_series(series)

Check and preprocess series argument in ForecasterRecursiveMultiSeries class.

If series is a wide-format pandas DataFrame, each column represents a different time series, and the index must be either a DatetimeIndex or a RangeIndex with frequency or step size, as appropriate
If series is a long-format pandas DataFrame with a MultiIndex, the first level of the index must contain the series IDs, and the second level must be a DatetimeIndex with the same frequency across all series.
If series is a dictionary, each key must be a series ID, and each value must be a named pandas Series. All series must have the same index, which must be either a DatetimeIndex or a RangeIndex, and they must share the same frequency or step size, as appropriate.

When series is a pandas DataFrame, it is converted to a dictionary of pandas Series, where the keys are the series IDs and the values are the Series with the same index as the original DataFrame.

Parameters:

Name	Type	Description	Default
`series`	`pandas DataFrame, dict`	Training time series.	required

Returns:

Name	Type	Description
`series_dict`	`dict`	Dictionary with the series used during training.
`series_indexes`	`dict`	Dictionary with the index of each series.

Source code in skforecast/utils/utils.py

def check_preprocess_series(
    series: pd.DataFrame | dict[str, pd.Series | pd.DataFrame],
) -> tuple[dict[str, pd.Series], dict[str, pd.Index]]:
    """
    Check and preprocess `series` argument in `ForecasterRecursiveMultiSeries` class.

    - If `series` is a wide-format pandas DataFrame, each column represents a
    different time series, and the index must be either a `DatetimeIndex` or 
    a `RangeIndex` with frequency or step size, as appropriate
    - If `series` is a long-format pandas DataFrame with a MultiIndex, the 
    first level of the index must contain the series IDs, and the second 
    level must be a `DatetimeIndex` with the same frequency across all series.
    - If series is a dictionary, each key must be a series ID, and each value 
    must be a named pandas Series. All series must have the same index, which 
    must be either a `DatetimeIndex` or a `RangeIndex`, and they must share the 
    same frequency or step size, as appropriate.

    When `series` is a pandas DataFrame, it is converted to a dictionary of pandas 
    Series, where the keys are the series IDs and the values are the Series with 
    the same index as the original DataFrame.

    Parameters
    ----------
    series : pandas DataFrame, dict
        Training time series.

    Returns
    -------
    series_dict : dict
        Dictionary with the series used during training.
    series_indexes : dict
        Dictionary with the index of each series.

    """

    if not isinstance(series, (pd.DataFrame, dict)):
        raise TypeError(
            f"`series` must be a pandas DataFrame or a dict of DataFrames or Series. "
            f"Got {type(series)}."
        )

    if isinstance(series, pd.DataFrame):

        if not isinstance(series.index, pd.MultiIndex):
            _, _ = check_extract_values_and_index(
                data=series, data_label='`series`', return_values=False
            )
            series = series.copy()
            series.index.name = None
            series_dict = series.to_dict(orient='series')
        else:
            if not isinstance(series.index.levels[1], pd.DatetimeIndex):
                raise TypeError(
                    f"The second level of the MultiIndex in `series` must be a "
                    f"pandas DatetimeIndex with the same frequency for each series. "
                    f"Found {type(series.index.levels[1])}."
                )

            first_col = series.columns[0]
            if len(series.columns) != 1:
                warnings.warn(
                    f"`series` DataFrame has multiple columns. Only the values of "
                    f"first column, '{first_col}', will be used as series values. "
                    f"All other columns will be ignored.",
                    IgnoredArgumentWarning
                )

            series = series.copy()
            series.index = series.index.set_names([series.index.names[0], None])
            series_dict = {
                series_id: series.loc[series_id][first_col].rename(series_id)
                for series_id in series.index.levels[0]
            }

        warnings.warn(
            "Passing a DataFrame (either wide or long format) as `series` requires "
            "additional internal transformations, which can increase computational "
            "time. It is recommended to use a dictionary of pandas Series instead. "
            "For more details, see: "
            "https://skforecast.org/latest/user_guides/independent-multi-time-series-forecasting.html#input-data",
            InputTypeWarning
        )

    else:

        not_valid_series = [
            k 
            for k, v in series.items()
            if not isinstance(v, (pd.Series, pd.DataFrame))
        ]
        if not_valid_series:
            raise TypeError(
                f"If `series` is a dictionary, all series must be a named "
                f"pandas Series or a pandas DataFrame with a single column. "
                f"Review series: {not_valid_series}"
            )

        series_dict = {
            k: v.copy()
            for k, v in series.items()
        }

    not_valid_index = []
    indexes_freq = set()
    series_indexes = {}
    for k, v in series_dict.items():
        if isinstance(v, pd.DataFrame):
            if v.shape[1] != 1:
                raise ValueError(
                    f"If `series` is a dictionary, all series must be a named "
                    f"pandas Series or a pandas DataFrame with a single column. "
                    f"Review series: '{k}'"
                )
            series_dict[k] = v.iloc[:, 0]

        series_dict[k].name = k
        idx = v.index
        if isinstance(idx, pd.DatetimeIndex):
            indexes_freq.add(idx.freq)
        elif isinstance(idx, pd.RangeIndex):
            indexes_freq.add(idx.step)
        else:
            not_valid_index.append(k)

        if v.isna().to_numpy().all():
            raise ValueError(f"All values of series '{k}' are NaN.")

        series_indexes[k] = idx

    if not_valid_index:
        raise TypeError(
            f"If `series` is a dictionary, all series must have a Pandas "
            f"RangeIndex or DatetimeIndex with the same step/frequency. "
            f"Review series: {not_valid_index}"
        )
    if None in indexes_freq:
        raise ValueError(
            "If `series` is a dictionary, all series must have a Pandas "
            "RangeIndex or DatetimeIndex with the same step/frequency. "
            "If it a MultiIndex DataFrame, the second level must be a DatetimeIndex "
            "with the same frequency for each series. Found series with no "
            "frequency or step."
        )
    if not len(indexes_freq) == 1:
        raise ValueError(
            f"If `series` is a dictionary, all series must have a Pandas "
            f"RangeIndex or DatetimeIndex with the same step/frequency. "
            f"If it a MultiIndex DataFrame, the second level must be a DatetimeIndex "
            f"with the same frequency for each series. "
            f"Found frequencies: {sorted(indexes_freq)}"
        )

    return series_dict, series_indexes

skforecast.utils.utils.check_preprocess_exog_multiseries ¶


check_preprocess_exog_multiseries(
    series_names_in_, series_index_type, exog, exog_dict
)

Check and preprocess exog argument in ForecasterRecursiveMultiSeries class.

If exog is a wide-format pandas DataFrame, it must share the same index type as series. Each column represents a different exogenous variable, and the same values are applied to all time series.
If exog is a long-format pandas Series or DataFrame with a MultiIndex, the first level contains the series IDs to which it belongs, and the second level contains a pandas DatetimeIndex. One column must be created for each exogenous variable.
If exog is a dictionary, each key must be the series ID to which it belongs, and each value must be a named pandas Series/DataFrame with the same index type as series or None. While it is not necessary for all values to include all the exogenous variables, the dtypes must be consistent for the same exogenous variable across all series.

When exog is a pandas DataFrame, it is converted to a dictionary of pandas DataFrames, where the keys are the series IDs and the values are the Series with the same index as the original DataFrame.

Parameters:

Name	Type	Description	Default
`series_names_in_`	`list`	Names of the series (levels) used during training.	required
`series_index_type`	`type`	Index type of the series used during training.	required
`exog`	`pandas Series, pandas DataFrame, dict`	Exogenous variable/s used during training.	required
`exog_dict`	`dict`	Dictionary with the exogenous variable/s used during training.	required

Returns:

Name	Type	Description
`exog_dict`	`dict`	Dictionary with the exogenous variable/s used during training.
`exog_names_in_`	`list`	Names of the exogenous variables used during training.

Source code in skforecast/utils/utils.py

def check_preprocess_exog_multiseries(
    series_names_in_: list[str],
    series_index_type: type,
    exog: pd.Series | pd.DataFrame | dict[str, pd.Series | pd.DataFrame | None],
    exog_dict: dict[str, pd.Series | pd.DataFrame | None],
) -> tuple[dict[str, pd.DataFrame | None], list[str]]:
    """
    Check and preprocess `exog` argument in `ForecasterRecursiveMultiSeries` class.

    - If `exog` is a wide-format pandas DataFrame, it must share the same 
    index type as series. Each column represents a different exogenous variable, 
    and the same values are applied to all time series.
    - If `exog` is a long-format pandas Series or DataFrame with a MultiIndex, 
    the first level contains the series IDs to which it belongs, and the 
    second level contains a pandas DatetimeIndex. One column must be created
    for each exogenous variable.
    - If `exog` is a dictionary, each key must be the series ID to which it 
    belongs, and each value must be a named pandas Series/DataFrame with
    the same index type as `series` or None. While it is not necessary for 
    all values to include all the exogenous variables, the dtypes must be 
    consistent for the same exogenous variable across all series.

    When `exog` is a pandas DataFrame, it is converted to a dictionary of pandas 
    DataFrames, where the keys are the series IDs and the values are the Series 
    with the same index as the original DataFrame.

    Parameters
    ----------
    series_names_in_ : list
        Names of the series (levels) used during training.
    series_index_type : type
        Index type of the series used during training.
    exog : pandas Series, pandas DataFrame, dict
        Exogenous variable/s used during training.
    exog_dict : dict
        Dictionary with the exogenous variable/s used during training.

    Returns
    -------
    exog_dict : dict
        Dictionary with the exogenous variable/s used during training.
    exog_names_in_ : list
        Names of the exogenous variables used during training.

    """

    if not isinstance(exog, (pd.Series, pd.DataFrame, dict)):
        raise TypeError(
            f"`exog` must be a pandas Series, DataFrame, dictionary of pandas "
            f"Series/DataFrames or None. Got {type(exog)}."
        )

    if isinstance(exog, (pd.Series, pd.DataFrame)): 

        exog = exog.copy().to_frame() if isinstance(exog, pd.Series) else exog.copy()
        if isinstance(exog.index, pd.MultiIndex):
            if not isinstance(exog.index.levels[1], pd.DatetimeIndex):
                raise TypeError(
                    f"When input data are pandas MultiIndex DataFrame, "
                    f"`series` and `exog` second level index must be a "
                    f"pandas DatetimeIndex. Found `exog` index type: "
                    f"{type(exog.index.levels[1])}."
                )
            exog.index = exog.index.set_names([exog.index.names[0], None])
            exog_dict.update(
                {
                    series_id: exog.loc[series_id] 
                    for series_id in exog.index.levels[0]
                    if series_id in series_names_in_
                }
            )
            series_ids_in_exog = exog.index.levels[0]
            warnings.warn(
                "Using a long-format DataFrame as `exog` requires additional transformations, "
                "which can increase computational time. It is recommended to use a dictionary of "
                "Series or DataFrames instead. For more information, see: "
                "https://skforecast.org/latest/user_guides/independent-multi-time-series-forecasting#input-data",
                InputTypeWarning
            )
        else:
            if not isinstance(exog.index, series_index_type):
                raise TypeError(
                    f"`exog` must have the same index type as `series`, pandas "
                    f"RangeIndex or pandas DatetimeIndex.\n"
                    f"    `series` index type : {series_index_type}.\n"
                    f"    `exog`   index type : {type(exog.index)}."
                )
            exog_dict = {series_id: exog for series_id in series_names_in_}
            series_ids_in_exog = series_names_in_

    else:

        not_valid_exog = [
            k 
            for k, v in exog.items()
            if not isinstance(v, (pd.Series, pd.DataFrame, type(None)))
        ]
        if not_valid_exog:
            raise TypeError(
                f"If `exog` is a dictionary, all exog must be a named pandas "
                f"Series, a pandas DataFrame or None. Review exog: {not_valid_exog}"
            )

        # NOTE: Only elements already present in exog_dict are updated. Copy is
        # needed to avoid modifying the original exog.
        exog_dict.update(
            {
                k: v.copy()
                for k, v in exog.items()
                if k in series_names_in_ and v is not None
            }
        )
        series_ids_in_exog = exog.keys()

    series_not_in_exog = set(series_names_in_) - set(series_ids_in_exog)
    if series_not_in_exog:
        warnings.warn(
            f"No `exog` for series {series_not_in_exog}. All values "
            f"of the exogenous variables for these series will be NaN.",
            MissingExogWarning
        )

    for k, v in exog_dict.items():
        if v is not None:
            check_exog(exog=v, allow_nan=True)
            if isinstance(v, pd.Series):
                v = v.to_frame()
            exog_dict[k] = v

    not_valid_index = [
        k
        for k, v in exog_dict.items()
        if v is not None and not isinstance(v.index, series_index_type)
    ]
    if not_valid_index:
        raise TypeError(
            f"All exog must have the same index type as `series`, which can be "
            f"either a pandas RangeIndex or a pandas DatetimeIndex. If either "
            f"`series` or `exog` is a pandas DataFrame with a MultiIndex, then "
            f"both must be pandas DatetimeIndex. Review exog for series: {not_valid_index}."
        )

    if isinstance(exog, dict):
        # NOTE: Check that all exog have the same dtypes for common columns
        exog_dtypes_buffer = pd.DataFrame(
            {k: df.dtypes for k, df in exog_dict.items() if df is not None}
        )
        exog_dtypes_nunique = exog_dtypes_buffer.nunique(axis=1)
        if not (exog_dtypes_nunique == 1).all():
            non_unique_dtypes_exogs = exog_dtypes_nunique[exog_dtypes_nunique != 1].index.to_list()
            raise TypeError(
                f"Exog/s: {non_unique_dtypes_exogs} have different dtypes in different "
                f"series. If any of these variables are categorical, note that this "
                f"error can also occur when their internal categories "
                f"(`series.cat.categories`) differ between series. Please ensure "
                f"that all series have the same categories (and category order) "
                f"for each categorical variable."
            )

        exog_names_in_ = list(
            set(
                column
                for df in exog_dict.values()
                if df is not None
                for column in df.columns.to_list()
            )
        )
    else:
        exog_names_in_ = list(exog.columns) if isinstance(exog, pd.DataFrame) else [exog.name]

    if len(set(exog_names_in_) - set(series_names_in_)) != len(exog_names_in_):
        raise ValueError(
            f"`exog` cannot contain a column named the same as one of the series.\n"
            f"    `series` columns : {series_names_in_}.\n"
            f"    `exog`   columns : {exog_names_in_}."
        )

    return exog_dict, exog_names_in_

skforecast.utils.utils.align_series_and_exog_multiseries ¶


align_series_and_exog_multiseries(
    series_dict, exog_dict=None
)

Align series and exog according to their index. If needed, reindexing is applied. Heading and trailing NaNs are removed from all series in series_dict.

Parameters:

Name	Type	Description	Default
`series_dict`	`dict`	Dictionary with the series used during training.	required
`exog_dict`	`dict`	Dictionary with the exogenous variable/s used during training.	`None`

Returns:

Name	Type	Description
`series_dict`	`dict`	Dictionary with the series used during training.
`exog_dict`	`dict`	Dictionary with the exogenous variable/s used during training.

Source code in skforecast/utils/utils.py

def align_series_and_exog_multiseries(
    series_dict: dict[str, pd.Series],
    exog_dict: dict[str, pd.DataFrame] | None = None
) -> tuple[dict[str, pd.Series], dict[str, pd.DataFrame | None]]:
    """
    Align series and exog according to their index. If needed, reindexing is
    applied. Heading and trailing NaNs are removed from all series in 
    `series_dict`.

    Parameters
    ----------
    series_dict : dict
        Dictionary with the series used during training.
    exog_dict : dict, default None
        Dictionary with the exogenous variable/s used during training.

    Returns
    -------
    series_dict : dict
        Dictionary with the series used during training.
    exog_dict : dict
        Dictionary with the exogenous variable/s used during training.

    """

    for k in series_dict.keys():
        if np.isnan(series_dict[k].iat[0]) or np.isnan(series_dict[k].iat[-1]):
            first_valid_index = series_dict[k].first_valid_index()
            last_valid_index = series_dict[k].last_valid_index()
            series_dict[k] = series_dict[k].loc[first_valid_index : last_valid_index]
        else:
            first_valid_index = series_dict[k].index[0]
            last_valid_index = series_dict[k].index[-1]

        if exog_dict[k] is not None:
            if not series_dict[k].index.equals(exog_dict[k].index):
                exog_dict[k] = exog_dict[k].loc[first_valid_index:last_valid_index]
                if exog_dict[k].empty:
                    warnings.warn(
                        f"`exog` for series '{k}' is empty after aligning "
                        f"with the series index. Exog values will be NaN.",
                        MissingValuesWarning
                    )
                    exog_dict[k] = None
                elif len(exog_dict[k]) != len(series_dict[k]):
                    warnings.warn(
                        f"`exog` for series '{k}' doesn't have values for "
                        f"all the dates in the series. Missing values will be "
                        f"filled with NaN.",
                        MissingValuesWarning
                    )
                    exog_dict[k] = exog_dict[k].reindex(
                        series_dict[k].index, fill_value = np.nan
                    )

    return series_dict, exog_dict

skforecast.utils.utils.prepare_levels_multiseries ¶


prepare_levels_multiseries(
    X_train_series_names_in_, levels=None
)

Prepare list of levels to be predicted in multiseries Forecasters.

Parameters:

Name	Type	Description	Default
`X_train_series_names_in_`	`list`	Names of the series (levels) included in the matrix `X_train`.	required
`levels`	`(str, list)`	Names of the series (levels) to be predicted.	`None`

Returns:

Name	Type	Description
`levels`	`list`	Names of the series (levels) to be predicted.
`input_levels_is_list`	`bool`	Indicates if input levels argument is a list.

Source code in skforecast/utils/utils.py

def prepare_levels_multiseries(
    X_train_series_names_in_: list[str],
    levels: str | list[str] | None = None
) -> tuple[list[str], bool]:
    """
    Prepare list of levels to be predicted in multiseries Forecasters.

    Parameters
    ----------
    X_train_series_names_in_ : list
        Names of the series (levels) included in the matrix `X_train`.
    levels : str, list, default None
        Names of the series (levels) to be predicted.

    Returns
    -------
    levels : list
        Names of the series (levels) to be predicted.
    input_levels_is_list : bool
        Indicates if input levels argument is a list.

    """

    input_levels_is_list = False
    if levels is None:
        levels = X_train_series_names_in_
    elif isinstance(levels, str):
        levels = [levels]
    else:
        input_levels_is_list = True

    return levels, input_levels_is_list

skforecast.utils.utils.preprocess_levels_self_last_window_multiseries ¶


preprocess_levels_self_last_window_multiseries(
    levels, input_levels_is_list, last_window_
)

Preprocess levels and last_window (when using self.last_window_) arguments in multiseries Forecasters when predicting. Only levels whose last window ends at the same datetime index will be predicted together.

Parameters:

Name	Type	Description	Default
`levels`	`list`	Names of the series (levels) to be predicted.	required
`input_levels_is_list`	`bool`	Indicates if input levels argument is a list.	required
`last_window_`	`dict`	Dictionary with the last window of each series (self.last_window_).	required

Returns:

Name	Type	Description
`levels`	`list`	Names of the series (levels) to be predicted.
`last_window`	`pandas DataFrame`	Series values used to create the predictors (lags) needed in the first iteration of the prediction (t + 1).

Source code in skforecast/utils/utils.py

def preprocess_levels_self_last_window_multiseries(
    levels: list[str],
    input_levels_is_list: bool,
    last_window_: dict[str, pd.Series],
) -> tuple[list[str], pd.DataFrame]:
    """
    Preprocess `levels` and `last_window` (when using self.last_window_) arguments 
    in multiseries Forecasters when predicting. Only levels whose last window 
    ends at the same datetime index will be predicted together.

    Parameters
    ----------
    levels : list
        Names of the series (levels) to be predicted.
    input_levels_is_list : bool
        Indicates if input levels argument is a list.
    last_window_ : dict
        Dictionary with the last window of each series (self.last_window_).

    Returns
    -------
    levels : list
        Names of the series (levels) to be predicted.
    last_window : pandas DataFrame
        Series values used to create the predictors (lags) needed in the 
        first iteration of the prediction (t + 1).

    """

    available_last_windows = set() if last_window_ is None else set(last_window_.keys())
    not_available_last_window = set(levels) - available_last_windows
    if not_available_last_window:
        levels = [
            level for level in levels 
            if level not in not_available_last_window
        ]
        if not levels:
            raise ValueError(
                f"No series to predict. None of the series {not_available_last_window} "
                f"are present in `last_window_` attribute. Provide `last_window` "
                f"as argument in predict method."
            )
        else:
            warnings.warn(
                f"Levels {not_available_last_window} are excluded from "
                f"prediction since they were not stored in `last_window_` "
                f"attribute during training. If you don't want to retrain "
                f"the Forecaster, provide `last_window` as argument.",
                IgnoredArgumentWarning
            )

    last_index_levels = [
        v.index[-1] 
        for k, v in last_window_.items()
        if k in levels
    ]
    if len(set(last_index_levels)) > 1:
        max_index_levels = max(last_index_levels)
        selected_levels = [
            k
            for k, v in last_window_.items()
            if k in levels and v.index[-1] == max_index_levels
        ]

        series_excluded_from_last_window = set(levels) - set(selected_levels)
        levels = selected_levels

        if input_levels_is_list and series_excluded_from_last_window:
            warnings.warn(
                f"Only series whose last window ends at the same index "
                f"can be predicted together. Series that do not reach "
                f"the maximum index, '{max_index_levels}', are excluded "
                f"from prediction: {series_excluded_from_last_window}.",
                IgnoredArgumentWarning
            )

    last_window = pd.DataFrame(
        {k: v 
         for k, v in last_window_.items() 
         if k in levels}
    )

    return levels, last_window

skforecast.utils.utils.prepare_steps_direct ¶


prepare_steps_direct(max_step, steps=None)

Prepare list of steps to be predicted in Direct Forecasters.

Parameters:

Name	Type	Description	Default
`max_step`	`int, list, numpy ndarray`	Maximum number of future steps the forecaster will predict when using predict methods.	required
`steps`	`(int, list, None)`	Predict n steps. The value of `steps` must be less than or equal to the value of steps defined when initializing the forecaster. Starts at 1. If `int`: Only steps within the range of 1 to int are predicted. If `list`: List of ints. Only the steps contained in the list are predicted. If `None`: As many steps are predicted as were defined at initialization.	`None`

Returns:

Name	Type	Description
`steps`	`list`	Steps to be predicted.

Source code in skforecast/utils/utils.py

def prepare_steps_direct(
    max_step: int | list[int] | np.ndarray[int],
    steps: int | list[int] | None = None
) -> list[int]:
    """
    Prepare list of steps to be predicted in Direct Forecasters.

    Parameters
    ----------
    max_step : int, list, numpy ndarray
        Maximum number of future steps the forecaster will predict 
        when using predict methods.
    steps : int, list, None, default None
        Predict n steps. The value of `steps` must be less than or equal to the 
        value of steps defined when initializing the forecaster. Starts at 1.

        - If `int`: Only steps within the range of 1 to int are predicted.
        - If `list`: List of ints. Only the steps contained in the list 
        are predicted.
        - If `None`: As many steps are predicted as were defined at 
        initialization.

    Returns
    -------
    steps : list
        Steps to be predicted.

    """

    if isinstance(steps, int):
        steps = list(np.arange(steps) + 1)
    elif steps is None:
        if isinstance(max_step, int):
            steps = list(np.arange(max_step) + 1)
        else:
            steps = list(np.array(max_step))
    elif isinstance(steps, list):
        steps = list(np.array(steps))

    for step in steps:
        if not isinstance(step, (int, np.int64, np.int32)):
            raise TypeError(
                f"`steps` argument must be an int, a list of ints or `None`. "
                f"Got {type(steps)}."
            )

    # Required since numpy 2.0
    steps = [int(step) for step in steps if step is not None]

    return steps

skforecast.utils.utils.set_skforecast_warnings ¶


set_skforecast_warnings(
    suppress_warnings, action="default"
)

Set skforecast warnings action.

Parameters:

Name	Type	Description	Default
`suppress_warnings`	`bool`	If `True`, skforecast warnings will be suppressed. If `False`, skforecast warnings will be shown as default. See skforecast.exceptions.warn_skforecast_categories for more information.	required
`action`	`str`	Action to be taken when a warning is raised. See the warnings module for more information.	`'default'`

Returns:

Type	Description
`None`

Source code in skforecast/utils/utils.py

def set_skforecast_warnings(
    suppress_warnings: bool,
    action: str = 'default'
) -> None:
    """
    Set skforecast warnings action.

    Parameters
    ----------
    suppress_warnings : bool
        If `True`, skforecast warnings will be suppressed. If `False`, skforecast
        warnings will be shown as default. See 
        skforecast.exceptions.warn_skforecast_categories for more information.
    action : str, default `'default'`
        Action to be taken when a warning is raised. See the warnings module
        for more information.

    Returns
    -------
    None

    """

    if suppress_warnings:
        for category in warn_skforecast_categories:
            warnings.filterwarnings(action, category=category)

utils¶

skforecast.utils.utils.save_forecaster ¶

skforecast.utils.utils.load_forecaster ¶

skforecast.utils.utils.initialize_lags ¶

skforecast.utils.utils.initialize_weights ¶

skforecast.utils.utils.initialize_transformer_series ¶

skforecast.utils.utils.check_select_fit_kwargs ¶

skforecast.utils.utils.check_y ¶

skforecast.utils.utils.check_exog ¶

skforecast.utils.utils.get_exog_dtypes ¶

skforecast.utils.utils.check_exog_dtypes ¶

skforecast.utils.utils.check_interval ¶

skforecast.utils.utils.check_predict_input ¶

skforecast.utils.utils.check_residuals_input ¶

skforecast.utils.utils.check_extract_values_and_index ¶

skforecast.utils.utils.cast_exog_dtypes ¶

skforecast.utils.utils.exog_to_direct ¶

skforecast.utils.utils.exog_to_direct_numpy ¶

skforecast.utils.utils.expand_index ¶

skforecast.utils.utils.transform_numpy ¶

skforecast.utils.utils.transform_series ¶

skforecast.utils.utils.transform_dataframe ¶

skforecast.utils.utils.check_optional_dependency ¶

skforecast.utils.utils.multivariate_time_series_corr ¶

skforecast.utils.utils.select_n_jobs_fit_forecaster ¶

skforecast.utils.utils.check_preprocess_series ¶

skforecast.utils.utils.check_preprocess_exog_multiseries ¶

skforecast.utils.utils.align_series_and_exog_multiseries ¶

skforecast.utils.utils.prepare_levels_multiseries ¶

skforecast.utils.utils.preprocess_levels_self_last_window_multiseries ¶

skforecast.utils.utils.prepare_steps_direct ¶

skforecast.utils.utils.set_skforecast_warnings ¶

`utils`¶