Skip to content

utils

save_forecaster(forecaster, file_name, verbose=True)

Save forecaster model using joblib.

Parameters:

Name Type Description Default
forecaster forecaster object from skforecast library.

Forecaster created with skforecast library.

required
file_name str

File name given to the object.

required
verbose bool

Print summary about the forecaster saved.

True
Source code in skforecast/utils/utils.py
def save_forecaster(
    forecaster, 
    file_name: str, 
    verbose: bool=True
) -> None:
    """
    Save forecaster model using joblib.

    Parameters
    ----------
    forecaster: forecaster object from skforecast library.
        Forecaster created with skforecast library.

    file_name: str
        File name given to the object.

    verbose: bool, default `True`
        Print summary about the forecaster saved.

    Returns 
    -------
    None

    """

    joblib.dump(forecaster, filename=file_name)

    if verbose:
        forecaster.summary()

load_forecaster(file_name, verbose=True)

Load forecaster model using joblib.

Parameters:

Name Type Description Default
file_name str

Object file name.

required
verbose bool

Print summary about the forecaster loaded.

True

Returns:

Type Description
object

Forecaster created with skforecast library.

Source code in skforecast/utils/utils.py
def load_forecaster(
    file_name: str,
    verbose: bool=True
) -> object:
    """
    Load forecaster model using joblib.

    Parameters
    ----------
    file_name: str
        Object file name.

    verbose: bool, default `True`
        Print summary about the forecaster loaded.

    Returns 
    -------
    Forecaster
        Forecaster created with skforecast library.

    """

    forecaster = joblib.load(filename=file_name)

    if verbose:
        forecaster.summary()

    return forecaster

initialize_lags(forecaster_name, lags)

Check lags argument input and generate the corresponding numpy ndarray.

Parameters:

Name Type Description Default
forecaster_name str

Forecaster name. ForecasterAutoreg, ForecasterAutoregCustom, ForecasterAutoregDirect, ForecasterAutoregMultiSeries, ForecasterAutoregMultiVariate.

required
lags Any

Lags used as predictors.

required
Returns None required
---------- None required
lags Any

Lags used as predictors.

required
Source code in skforecast/utils/utils.py
def initialize_lags(
    forecaster_name: str,
    lags: Any
) -> np.ndarray:
    """
    Check lags argument input and generate the corresponding numpy ndarray.

    Parameters
    ----------
    forecaster_name : str
        Forecaster name. ForecasterAutoreg, ForecasterAutoregCustom, 
        ForecasterAutoregDirect, ForecasterAutoregMultiSeries, 
        ForecasterAutoregMultiVariate.

    lags : Any
        Lags used as predictors.

    Returns
    ----------
    lags : numpy ndarray
        Lags used as predictors.

    """

    if isinstance(lags, int) and lags < 1:
        raise ValueError("Minimum value of lags allowed is 1.")

    if isinstance(lags, (list, np.ndarray)):
        for lag in lags:
            if not isinstance(lag, (int, np.int64, np.int32)):
                raise TypeError("All values in `lags` must be int.")

    if isinstance(lags, (list, range, np.ndarray)) and min(lags) < 1:
        raise ValueError("Minimum value of lags allowed is 1.")

    if isinstance(lags, int):
        lags = np.arange(lags) + 1
    elif isinstance(lags, (list, range)):
        lags = np.array(lags)
    elif isinstance(lags, np.ndarray):
        lags = lags
    else:
        if not forecaster_name == 'ForecasterAutoregMultiVariate':
            raise TypeError(
                ("`lags` argument must be an int, 1d numpy ndarray, range or list. "
                 f"Got {type(lags)}.")
            )
        else:
            raise TypeError(
                ("`lags` argument must be a dict, int, 1d numpy ndarray, range or list. "
                 f"Got {type(lags)}.")
            )

    return lags

initialize_weights(forecaster_name, regressor, weight_func, series_weights)

Check weights arguments, weight_func and series_weights for the different

forecasters. Create source_code_weight_func, source code of the custom function(s) used to create weights.

Parameters:

Name Type Description Default
forecaster_name str

Forecaster name. ForecasterAutoreg, ForecasterAutoregCustom, ForecasterAutoregDirect, ForecasterAutoregMultiSeries, ForecasterAutoregMultiVariate, ForecasterAutoregMultiSeriesCustom.

required
regressor object

Regressor of the forecaster.

required
weight_func Union[Callable, dict]

Argument weight_func of the forecaster.

required
series_weights dict

Argument series_weights of the forecaster.

required
Returns None required
---------- None required
weight_func Union[Callable, dict]

Argument weight_func of the forecaster.

required
source_code_weight_func str, dict

Argument source_code_weight_func of the forecaster.

required
series_weights dict

Argument series_weights of the forecaster.

required
Source code in skforecast/utils/utils.py
def initialize_weights(
    forecaster_name: str,
    regressor: object,
    weight_func: Union[Callable, dict],
    series_weights: dict
) -> Tuple[Union[Callable, dict], Union[Callable, dict], dict]:
    """
    Check weights arguments, `weight_func` and `series_weights` for the different 
    forecasters. Create `source_code_weight_func`, source code of the custom 
    function(s) used to create weights.

    Parameters
    ----------
    forecaster_name : str
        Forecaster name. ForecasterAutoreg, ForecasterAutoregCustom, 
        ForecasterAutoregDirect, ForecasterAutoregMultiSeries, 
        ForecasterAutoregMultiVariate, ForecasterAutoregMultiSeriesCustom.

    regressor : regressor or pipeline compatible with the scikit-learn API
        Regressor of the forecaster.

    weight_func : Callable, dict
        Argument `weight_func` of the forecaster.

    series_weights : dict
        Argument `series_weights` of the forecaster.


    Returns
    ----------
    weight_func : Callable, dict
        Argument `weight_func` of the forecaster.

    source_code_weight_func : str, dict
        Argument `source_code_weight_func` of the forecaster.

    series_weights : dict
        Argument `series_weights` of the forecaster.

    """

    source_code_weight_func = None

    if weight_func is not None:

        if forecaster_name in ['ForecasterAutoregMultiSeries', 'ForecasterAutoregMultiSeriesCustom']:
            if not isinstance(weight_func, (Callable, dict)):
                raise TypeError(
                    (f"Argument `weight_func` must be a Callable or a dict of "
                     f"Callables. Got {type(weight_func)}.")
                )
        elif not isinstance(weight_func, Callable):
            raise TypeError(
                f"Argument `weight_func` must be a Callable. Got {type(weight_func)}."
            )

        if isinstance(weight_func, dict):
            source_code_weight_func = {}
            for key in weight_func:
                source_code_weight_func[key] = inspect.getsource(weight_func[key])
        else:
            source_code_weight_func = inspect.getsource(weight_func)

        if 'sample_weight' not in inspect.signature(regressor.fit).parameters:
            warnings.warn(
                (f"Argument `weight_func` is ignored since regressor {regressor} "
                 f"does not accept `sample_weight` in its `fit` method."),
                 IgnoredArgumentWarning
            )
            weight_func = None
            source_code_weight_func = None

    if series_weights is not None:
        if not isinstance(series_weights, dict):
            raise TypeError(
                (f"Argument `series_weights` must be a dict of floats or ints."
                 f"Got {type(series_weights)}.")
            )
        if 'sample_weight' not in inspect.signature(regressor.fit).parameters:
            warnings.warn(
                (f"Argument `series_weights` is ignored since regressor {regressor} "
                 f"does not accept `sample_weight` in its `fit` method."),
                 IgnoredArgumentWarning
            )
            series_weights = None

    return weight_func, source_code_weight_func, series_weights

check_select_fit_kwargs(regressor, fit_kwargs=None)

Check if fit_kwargs is a dict and select only the keys that are used by

the fit method of the regressor.

Parameters:

Name Type Description Default
regressor object

Regressor object.

required
fit_kwargs Optional[dict]

Dictionary with the arguments to pass to the `fit' method of the forecaster.

None

Returns:

Type Description
dict

Dictionary with the arguments to be passed to the fit method of the regressor after removing the unused keys.

Source code in skforecast/utils/utils.py
def check_select_fit_kwargs(
    regressor: object,
    fit_kwargs: Optional[dict]=None
) -> dict:
    """
    Check if `fit_kwargs` is a dict and select only the keys that are used by
    the `fit` method of the regressor.

    Parameters
    ----------
    regressor : object
        Regressor object.

    fit_kwargs : dict, default `None`
        Dictionary with the arguments to pass to the `fit' method of the 
        forecaster.

    Returns
    -------
    fit_kwargs : dict
        Dictionary with the arguments to be passed to the `fit` method of the 
        regressor after removing the unused keys.

    """

    if fit_kwargs is None:
        fit_kwargs = {}
    else:
        if not isinstance(fit_kwargs, dict):
            raise TypeError(
                f"Argument `fit_kwargs` must be a dict. Got {type(fit_kwargs)}."
            )

        # Non used keys
        non_used_keys = [k for k in fit_kwargs.keys()
                         if k not in inspect.signature(regressor.fit).parameters]
        if non_used_keys:
            warnings.warn(
                (f"Argument/s {non_used_keys} ignored since they are not used by the "
                 f"regressor's `fit` method."),
                 IgnoredArgumentWarning
            )

        if 'sample_weight' in fit_kwargs.keys():
            warnings.warn(
                ("The `sample_weight` argument is ignored. Use `weight_func` to pass "
                 "a function that defines the individual weights for each sample "
                 "based on its index."),
                 IgnoredArgumentWarning
            )
            del fit_kwargs['sample_weight']

        # Select only the keyword arguments allowed by the regressor's `fit` method.
        fit_kwargs = {k:v for k, v in fit_kwargs.items()
                      if k in inspect.signature(regressor.fit).parameters}

    return fit_kwargs

check_y(y)

Raise Exception if y is not pandas Series or if it has missing values.

Parameters:

Name Type Description Default
y Any

Time series values.

required
Returns None required
---------- None required
None None required
Source code in skforecast/utils/utils.py
def check_y(
    y: Any
) -> None:
    """
    Raise Exception if `y` is not pandas Series or if it has missing values.

    Parameters
    ----------        
    y : Any
        Time series values.

    Returns
    ----------
    None

    """

    if not isinstance(y, pd.Series):
        raise TypeError("`y` must be a pandas Series.")

    if y.isnull().any():
        raise ValueError("`y` has missing values.")

    return

check_exog(exog, allow_nan=True)

Raise Exception if exog is not pandas Series or pandas DataFrame.

If allow_nan = True, issue a warning if exog contains NaN values.

Parameters:

Name Type Description Default
exog Any

Exogenous variable/s included as predictor/s.

required
allow_nan bool

If True, allows the presence of NaN values in exog. If False (default), issue a warning if exog contains NaN values.

True
Returns None required
---------- None required
None None required
Source code in skforecast/utils/utils.py
def check_exog(
    exog: Any,
    allow_nan: bool=True
) -> None:
    """
    Raise Exception if `exog` is not pandas Series or pandas DataFrame.
    If `allow_nan = True`, issue a warning if `exog` contains NaN values.

    Parameters
    ----------        
    exog :  Any
        Exogenous variable/s included as predictor/s.
    allow_nan: bool, default True
        If True, allows the presence of NaN values in `exog`. If False (default),
        issue a warning if `exog` contains NaN values.

    Returns
    ----------
    None

    """

    if not isinstance(exog, (pd.Series, pd.DataFrame)):
        raise TypeError("`exog` must be a pandas Series or DataFrame.")

    if not allow_nan:
        if exog.isnull().any().any():
            warnings.warn(
                ("`exog` has missing values. Most machine learning models do not allow "
                 "missing values. Fitting the forecaster may fail."), 
                 MissingValuesExogWarning
            )

    return

get_exog_dtypes(exog)

Store dtypes of exog.

Parameters:

Name Type Description Default
exog Union[pandas.core.frame.DataFrame, pandas.core.series.Series]

Exogenous variable/s included as predictor/s.

required

Returns:

Type Description
dict

Dictionary with the dtypes in exog.

Source code in skforecast/utils/utils.py
def get_exog_dtypes(
    exog: Union[pd.DataFrame, pd.Series]
) -> dict:
    """
    Store dtypes of `exog`.

    Parameters
    ----------
    exog :  pandas DataFrame, pandas Series
        Exogenous variable/s included as predictor/s.

    Returns
    -------
    exog_dtypes : dict
        Dictionary with the dtypes in `exog`.

    """

    if isinstance(exog, pd.Series):
        exog_dtypes = {exog.name: exog.dtypes}
    else:
        exog_dtypes = exog.dtypes.to_dict()

    return exog_dtypes

check_exog_dtypes(exog)

Raise Exception if exog has categorical columns with non integer values.

This is needed when using machine learning regressors that allow categorical features. Rise a Warning if values are not init, float, or category.

Parameters:

Name Type Description Default
exog Union[pandas.core.frame.DataFrame, pandas.core.series.Series]

Exogenous variable/s included as predictor/s.

required
Returns None required
---------- None required
None None required
Source code in skforecast/utils/utils.py
def check_exog_dtypes(
    exog: Union[pd.DataFrame, pd.Series]
) -> None:
    """
    Raise Exception if `exog` has categorical columns with non integer values.
    This is needed when using machine learning regressors that allow categorical
    features.
    Rise a Warning if values are not `init`, `float`, or `category`.

    Parameters
    ----------        
    exog :  pandas DataFrame, pandas Series
        Exogenous variable/s included as predictor/s.

    Returns
    ----------
    None

    """
    check_exog(exog=exog, allow_nan=False)

    if isinstance(exog, pd.DataFrame):
        if not all([dtype in ['float', 'int', 'category'] for dtype in exog.dtypes]):
            warnings.warn(
                ("`exog` may contain only `int`, `float` or `category` dtypes. Most "
                 "machine learning models do not allow other types of values. "
                 "Fitting the forecaster may fail."), DataTypeWarning
            )
        for col in exog.select_dtypes(include='category'):
            if exog[col].cat.categories.dtype not in [int, np.int32, np.int64]:
                raise TypeError(
                    ("Categorical columns in exog must contain only integer values. "
                     "See skforecast docs for more info about how to include "
                     "categorical features https://skforecast.org/"
                     "latest/user_guides/categorical-features.html")
                )
    else:
        if exog.dtypes not in ['float', 'int', 'category']:
            warnings.warn(
                ("`exog` may contain only `int`, `float` or `category` dtypes. Most "
                 "machine learning models do not allow other types of values. "
                 "Fitting the forecaster may fail."), DataTypeWarning
            )
        if exog.dtypes == 'category' and exog.cat.categories.dtype not in [int, np.int32, np.int64]:
            raise TypeError(
                ("If exog is of type category, it must contain only integer values. "
                 "See skforecast docs for more info about how to include "
                 "categorical features https://skforecast.org/"
                 "latest/user_guides/categorical-features.html")
            )

    return

check_interval(interval=None, alpha=None)

Check provided confidence interval sequence is valid.

Parameters:

Name Type Description Default
interval list

Confidence of the prediction interval estimated. Sequence of percentiles to compute, which must be between 0 and 100 inclusive. For example, interval of 95% should be as interval = [2.5, 97.5].

None
alpha float

The confidence intervals used in ForecasterSarimax are (1 - alpha) %.

None
Source code in skforecast/utils/utils.py
def check_interval(
    interval: list=None,
    alpha: float=None
) -> None:
    """
    Check provided confidence interval sequence is valid.

    Parameters
    ----------
    interval : list, default `None`
        Confidence of the prediction interval estimated. Sequence of percentiles
        to compute, which must be between 0 and 100 inclusive. For example, 
        interval of 95% should be as `interval = [2.5, 97.5]`.

    alpha : float, default `None`
        The confidence intervals used in ForecasterSarimax are (1 - alpha) %.

    """

    if interval is not None:
        if not isinstance(interval, list):
            raise TypeError(
                ("`interval` must be a `list`. For example, interval of 95% "
                 "should be as `interval = [2.5, 97.5]`.")
            )

        if len(interval) != 2:
            raise ValueError(
                ("`interval` must contain exactly 2 values, respectively the "
                 "lower and upper interval bounds. For example, interval of 95% "
                 "should be as `interval = [2.5, 97.5]`.")
            )

        if (interval[0] < 0.) or (interval[0] >= 100.):
            raise ValueError(
                f"Lower interval bound ({interval[0]}) must be >= 0 and < 100."
            )

        if (interval[1] <= 0.) or (interval[1] > 100.):
            raise ValueError(
                f"Upper interval bound ({interval[1]}) must be > 0 and <= 100."
            )

        if interval[0] >= interval[1]:
            raise ValueError(
                f"Lower interval bound ({interval[0]}) must be less than the "
                f"upper interval bound ({interval[1]})."
            )

    if alpha is not None:
        if not isinstance(alpha, float):
            raise TypeError(
                ("`alpha` must be a `float`. For example, interval of 95% "
                 "should be as `alpha = 0.05`.")
            )

        if (alpha <= 0.) or (alpha >= 1):
            raise ValueError(
                f"`alpha` must have a value between 0 and 1. Got {alpha}."
            )

    return

check_predict_input(forecaster_name, steps, fitted, included_exog, index_type, index_freq, window_size, last_window=None, last_window_exog=None, exog=None, exog_type=None, exog_col_names=None, interval=None, alpha=None, max_steps=None, levels=None, series_col_names=None)

Check all inputs of predict method. This is a helper function to validate

that inputs used in predict method match attributes of a forecaster already trained.

Parameters:

Name Type Description Default
forecaster_name str

Forecaster name. ForecasterAutoreg, ForecasterAutoregCustom, ForecasterAutoregDirect, ForecasterAutoregMultiSeries, ForecasterAutoregMultiVariate, ForecasterAutoregMultiSeriesCustom.

required
steps Union[int, list]

Number of future steps predicted.

required
fitted bool

Tag to identify if the regressor has been fitted (trained).

required
included_exog bool

If the forecaster has been trained using exogenous variable/s.

required
index_type type

Type of index of the input used in training.

required
index_freq str

Frequency of Index of the input used in training.

required
window_size int

Size of the window needed to create the predictors. It is equal to max_lag.

required
last_window Union[pandas.core.series.Series, pandas.core.frame.DataFrame]

Values of the series used to create the predictors (lags) need in the first iteration of prediction (t + 1).

None
last_window_exog Union[pandas.core.series.Series, pandas.core.frame.DataFrame]

Values of the exogenous variables aligned with last_window in ForecasterSarimax predictions.

None
exog Union[pandas.core.series.Series, pandas.core.frame.DataFrame]

Exogenous variable/s included as predictor/s.

None
exog_type Optional[type]

Type of exogenous variable/s used in training.

None
exog_col_names Optional[list]

Names of columns of exog if exog used in training was a pandas DataFrame.

None
interval Optional[list]

Confidence of the prediction interval estimated. Sequence of percentiles to compute, which must be between 0 and 100 inclusive. For example, interval of 95% should be as interval = [2.5, 97.5].

None
alpha Optional[float]

The confidence intervals used in ForecasterSarimax are (1 - alpha) %.

None
max_steps Optional[int]

Maximum number of steps allowed (ForecasterAutoregDirect and ForecasterAutoregMultiVariate).

None
levels Union[str, list]

Time series to be predicted (ForecasterAutoregMultiSeries and ForecasterAutoregMultiSeriesCustom).

None
series_col_names Optional[list]

Names of the columns used during fit (ForecasterAutoregMultiSeries, ForecasterAutoregMultiVariate and ForecasterAutoregMultiSeriesCustom).

None
Source code in skforecast/utils/utils.py
def check_predict_input(
    forecaster_name: str,
    steps: Union[int, list],
    fitted: bool,
    included_exog: bool,
    index_type: type,
    index_freq: str,
    window_size: int,
    last_window: Optional[Union[pd.Series, pd.DataFrame]]=None,
    last_window_exog: Optional[Union[pd.Series, pd.DataFrame]]=None,
    exog: Optional[Union[pd.Series, pd.DataFrame]]=None,
    exog_type: Optional[Union[type, None]]=None,
    exog_col_names: Optional[Union[list, None]]=None,
    interval: Optional[list]=None,
    alpha: Optional[float]=None,
    max_steps: Optional[int]=None,
    levels: Optional[Union[str, list]]=None,
    series_col_names: Optional[list]=None
) -> None:
    """
    Check all inputs of predict method. This is a helper function to validate
    that inputs used in predict method match attributes of a forecaster already
    trained.

    Parameters
    ----------
    forecaster_name : str
        Forecaster name. ForecasterAutoreg, ForecasterAutoregCustom, 
        ForecasterAutoregDirect, ForecasterAutoregMultiSeries, 
        ForecasterAutoregMultiVariate, ForecasterAutoregMultiSeriesCustom.

    steps : int, list
        Number of future steps predicted.

    fitted: Bool
        Tag to identify if the regressor has been fitted (trained).

    included_exog : bool
        If the forecaster has been trained using exogenous variable/s.

    index_type : type
        Type of index of the input used in training.

    index_freq : str
        Frequency of Index of the input used in training.

    window_size: int
        Size of the window needed to create the predictors. It is equal to
        `max_lag`.

    last_window : pandas Series, pandas DataFrame, default `None`
        Values of the series used to create the predictors (lags) need in the 
        first iteration of prediction (t + 1).

    last_window_exog : pandas Series, pandas DataFrame, default `None`
        Values of the exogenous variables aligned with `last_window` in 
        ForecasterSarimax predictions.

    exog : pandas Series, pandas DataFrame, default `None`
        Exogenous variable/s included as predictor/s.

    exog_type : type, default `None`
        Type of exogenous variable/s used in training.

    exog_col_names : list, default `None`
        Names of columns of `exog` if `exog` used in training was a pandas
        DataFrame.

    interval : list, default `None`
        Confidence of the prediction interval estimated. Sequence of percentiles
        to compute, which must be between 0 and 100 inclusive. For example, 
        interval of 95% should be as `interval = [2.5, 97.5]`.

    alpha : float, default `None`
        The confidence intervals used in ForecasterSarimax are (1 - alpha) %.

    max_steps: int, default `None`
        Maximum number of steps allowed (`ForecasterAutoregDirect` and 
        `ForecasterAutoregMultiVariate`).

    levels : str, list, default `None`
        Time series to be predicted (`ForecasterAutoregMultiSeries` and
        `ForecasterAutoregMultiSeriesCustom`).

    series_col_names : list, default `None`
        Names of the columns used during fit (`ForecasterAutoregMultiSeries`, 
        `ForecasterAutoregMultiVariate` and `ForecasterAutoregMultiSeriesCustom`).

    """

    if not fitted:
        raise sklearn.exceptions.NotFittedError(
            ("This Forecaster instance is not fitted yet. Call `fit` with "
             "appropriate arguments before using predict.")
        )

    if isinstance(steps, (int, np.integer)) and steps < 1:
        raise ValueError(
            f"`steps` must be an integer greater than or equal to 1. Got {steps}."
        )

    if isinstance(steps, list) and min(steps) < 1:
        raise ValueError(
           (f"The minimum value of `steps` must be equal to or greater than 1. "
            f"Got {min(steps)}.")
        )

    if max_steps is not None:
        if max(steps) > max_steps:
            raise ValueError(
                (f"The maximum value of `steps` must be less than or equal to "
                 f"the value of steps defined when initializing the forecaster. "
                 f"Got {max(steps)}, but the maximum is {max_steps}.")
            )

    if interval is not None or alpha is not None:
        check_interval(interval=interval, alpha=alpha)

    if forecaster_name in ['ForecasterAutoregMultiSeries', 'ForecasterAutoregMultiSeriesCustom']:
        if levels is not None and not isinstance(levels, (str, list)):
            raise TypeError(
                "`levels` must be a `list` of column names, a `str` of a column name or `None`."
            )
        if len(set(levels) - set(series_col_names)) != 0:
            raise ValueError(
                f"`levels` must be in `series_col_names` : {series_col_names}."
            )

    if exog is None and included_exog:
        raise ValueError(
            ("Forecaster trained with exogenous variable/s. "
             "Same variable/s must be provided when predicting.")
        )

    if exog is not None and not included_exog:
        raise ValueError(
            ("Forecaster trained without exogenous variable/s. "
             "`exog` must be `None` when predicting.")
        )

    # Checks last_window
    # Check last_window type (pd.Series or pd.DataFrame according to forecaster)
    if forecaster_name in ['ForecasterAutoregMultiSeries', 'ForecasterAutoregMultiVariate',
                           'ForecasterAutoregMultiSeriesCustom']:
        if not isinstance(last_window, pd.DataFrame):
            raise TypeError(
                f"`last_window` must be a pandas DataFrame. Got {type(last_window)}."
            )

        if forecaster_name in ['ForecasterAutoregMultiSeries', 'ForecasterAutoregMultiSeriesCustom'] and \
            len(set(levels) - set(last_window.columns)) != 0:
            raise ValueError(
                (f"`last_window` must contain a column(s) named as the level(s) to be predicted.\n"
                 f"    `levels` : {levels}.\n"
                 f"    `last_window` columns : {list(last_window.columns)}.")
            )

        if forecaster_name == 'ForecasterAutoregMultiVariate' and \
            (series_col_names != list(last_window.columns)):
            raise ValueError(
                (f"`last_window` columns must be the same as `series` column names.\n"
                 f"    `last_window` columns : {list(last_window.columns)}.\n"
                 f"    `series` columns      : {series_col_names}.")
            )    
    else:    
        if not isinstance(last_window, pd.Series):
            raise TypeError(
                f"`last_window` must be a pandas Series. Got {type(last_window)}."
            )

    # Check last_window len, nulls and index (type and freq)
    if len(last_window) < window_size:
        raise ValueError(
            (f"`last_window` must have as many values as needed to "
             f"generate the predictors. For this forecaster it is {window_size}.")
        )
    if last_window.isnull().any().all():
        raise ValueError(
            ("`last_window` has missing values.")
        )
    _, last_window_index = preprocess_last_window(
                               last_window  = last_window.iloc[:0],
                               return_values = False
                           ) 
    if not isinstance(last_window_index, index_type):
        raise TypeError(
            (f"Expected index of type {index_type} for `last_window`. "
             f"Got {type(last_window_index)}.")
        )
    if isinstance(last_window_index, pd.DatetimeIndex):
        if not last_window_index.freqstr == index_freq:
            raise TypeError(
                (f"Expected frequency of type {index_freq} for `last_window`. "
                 f"Got {last_window_index.freqstr}.")
            )

    # Checks exog
    if exog is not None:
        # Check type, nulls and expected type
        if not isinstance(exog, (pd.Series, pd.DataFrame)):
            raise TypeError("`exog` must be a pandas Series or DataFrame.")
        if exog.isnull().any().any():
            warnings.warn(
                ("`exog` has missing values. Most of machine learning models do "
                 "not allow missing values. `predict` method may fail."), 
                 MissingValuesExogWarning
            )
        if not isinstance(exog, exog_type):
            raise TypeError(
                f"Expected type for `exog`: {exog_type}. Got {type(exog)}."    
            )

        # Check exog has many values as distance to max step predicted
        last_step = max(steps) if isinstance(steps, list) else steps
        if len(exog) < last_step:
            raise ValueError(
                (f"`exog` must have at least as many values as the distance to "
                 f"the maximum step predicted, {last_step}.")
            )

        # Check all columns are in the pandas DataFrame
        if isinstance(exog, pd.DataFrame):
            col_missing = set(exog_col_names).difference(set(exog.columns))
            if col_missing:
                raise ValueError(
                    (f"Missing columns in `exog`. Expected {exog_col_names}. "
                     f"Got {exog.columns.to_list()}.") 
                )

        # Check index dtype and freq
        _, exog_index = preprocess_exog(
                            exog          = exog.iloc[:0, ],
                            return_values = False
                        )
        if not isinstance(exog_index, index_type):
            raise TypeError(
                (f"Expected index of type {index_type} for `exog`. "
                 f"Got {type(exog_index)}.")
            )   
        if isinstance(exog_index, pd.DatetimeIndex):
            if not exog_index.freqstr == index_freq:
                raise TypeError(
                    (f"Expected frequency of type {index_freq} for `exog`. "
                     f"Got {exog_index.freqstr}.")
                )

        # Check exog starts one step ahead of last_window end.
        expected_index = expand_index(last_window.index, 1)[0]
        if expected_index != exog.index[0]:
            raise ValueError(
                (f"To make predictions `exog` must start one step ahead of `last_window`.\n"
                 f"    `last_window` ends at : {last_window.index[-1]}.\n"
                 f"    `exog` starts at      : {exog.index[0]}.\n"
                 f"     Expected index       : {expected_index}.")
            )

    # Checks ForecasterSarimax
    if forecaster_name == 'ForecasterSarimax':
        # Check last_window_exog type, len, nulls and index (type and freq)
        if last_window_exog is not None:
            if not included_exog:
                raise ValueError(
                    ("Forecaster trained without exogenous variable/s. "
                     "`last_window_exog` must be `None` when predicting.")
                )

            if not isinstance(last_window_exog, (pd.Series, pd.DataFrame)):
                raise TypeError(
                    (f"`last_window_exog` must be a pandas Series or a "
                     f"pandas DataFrame. Got {type(last_window_exog)}.")
                )
            if len(last_window_exog) < window_size:
                raise ValueError(
                    (f"`last_window_exog` must have as many values as needed to "
                     f"generate the predictors. For this forecaster it is {window_size}.")
                )
            if last_window_exog.isnull().any().all():
                warnings.warn(
                ("`last_window_exog` has missing values. Most of machine learning "
                 "models do not allow missing values. `predict` method may fail."),
                MissingValuesExogWarning
            )
            _, last_window_exog_index = preprocess_last_window(
                                            last_window   = last_window_exog.iloc[:0],
                                            return_values = False
                                        ) 
            if not isinstance(last_window_exog_index, index_type):
                raise TypeError(
                    (f"Expected index of type {index_type} for `last_window_exog`. "
                     f"Got {type(last_window_exog_index)}.")
                )
            if isinstance(last_window_exog_index, pd.DatetimeIndex):
                if not last_window_exog_index.freqstr == index_freq:
                    raise TypeError(
                        (f"Expected frequency of type {index_freq} for "
                         f"`last_window_exog`. Got {last_window_exog_index.freqstr}.")
                    )

            # Check all columns are in the pd.DataFrame, last_window_exog
            if isinstance(last_window_exog, pd.DataFrame):
                col_missing = set(exog_col_names).difference(set(last_window_exog.columns))
                if col_missing:
                    raise ValueError(
                        (f"Missing columns in `exog`. Expected {exog_col_names}. "
                         f"Got {last_window_exog.columns.to_list()}.") 
                    )

    return

preprocess_y(y, return_values=True)

Returns values and index of series separately. Index is overwritten

according to the next rules: If index is of type DatetimeIndex and has frequency, nothing is changed. If index is of type RangeIndex, nothing is changed. If index is of type DatetimeIndex but has no frequency, a RangeIndex is created. If index is not of type DatetimeIndex, a RangeIndex is created.

Parameters:

Name Type Description Default
y Series

Time series.

required
return_values bool

If True return the values of y as numpy ndarray. This option is intended to avoid copying data when it is not necessary.

True

Returns:

Type Description
Tuple[Optional[numpy.ndarray], pandas.core.indexes.base.Index]

Numpy array with values of y.

Source code in skforecast/utils/utils.py
def preprocess_y(
    y: pd.Series,
    return_values: bool=True
) -> Tuple[Union[None, np.ndarray], pd.Index]:
    """
    Returns values and index of series separately. Index is overwritten 
    according to the next rules:
        If index is of type DatetimeIndex and has frequency, nothing is 
        changed.
        If index is of type RangeIndex, nothing is changed.
        If index is of type DatetimeIndex but has no frequency, a 
        RangeIndex is created.
        If index is not of type DatetimeIndex, a RangeIndex is created.

    Parameters
    ----------        
    y : pandas Series
        Time series.

    return_values : bool, default `True`
        If `True` return the values of `y` as numpy ndarray. This option is 
        intended to avoid copying data when it is not necessary.

    Returns 
    -------
    y_values : None, numpy ndarray
        Numpy array with values of `y`.

    y_index : pandas Index
        Index of `y` modified according to the rules.

    """

    if isinstance(y.index, pd.DatetimeIndex) and y.index.freq is not None:
        y_index = y.index
    elif isinstance(y.index, pd.RangeIndex):
        y_index = y.index
    elif isinstance(y.index, pd.DatetimeIndex) and y.index.freq is None:
        warnings.warn(
            ("`y` has DatetimeIndex index but no frequency. "
             "Index is overwritten with a RangeIndex of step 1.")
        )
        y_index = pd.RangeIndex(
                      start = 0,
                      stop  = len(y),
                      step  = 1
                  )
    else:
        warnings.warn(
            ("`y` has no DatetimeIndex nor RangeIndex index. "
             "Index is overwritten with a RangeIndex.")
        )
        y_index = pd.RangeIndex(
                      start = 0,
                      stop  = len(y),
                      step  = 1
                  )

    y_values = y.to_numpy() if return_values else None

    return y_values, y_index

preprocess_last_window(last_window, return_values=True)

Returns values and index of series separately. Index is overwritten

according to the next rules: If index is of type DatetimeIndex and has frequency, nothing is changed. If index is of type RangeIndex, nothing is changed. If index is of type DatetimeIndex but has no frequency, a RangeIndex is created. If index is not of type DatetimeIndex, a RangeIndex is created.

Parameters:

Name Type Description Default
last_window Union[pandas.core.series.Series, pandas.core.frame.DataFrame]

Time series values.

required
return_values bool

If True return the values of last_window as numpy ndarray. This option is intended to avoid copying data when it is not necessary.

True

Returns:

Type Description
Tuple[numpy.ndarray, pandas.core.indexes.base.Index]

Numpy array with values of last_window.

Source code in skforecast/utils/utils.py
def preprocess_last_window(
    last_window: Union[pd.Series, pd.DataFrame],
    return_values: bool=True
 ) -> Tuple[np.ndarray, pd.Index]:
    """
    Returns values and index of series separately. Index is overwritten 
    according to the next rules:
        If index is of type DatetimeIndex and has frequency, nothing is 
        changed.
        If index is of type RangeIndex, nothing is changed.
        If index is of type DatetimeIndex but has no frequency, a 
        RangeIndex is created.
        If index is not of type DatetimeIndex, a RangeIndex is created.

    Parameters
    ----------        
    last_window : pandas Series, pandas DataFrame
        Time series values.

    return_values : bool, default `True`
        If `True` return the values of `last_window` as numpy ndarray. This option 
        is intended to avoid copying data when it is not necessary.

    Returns 
    -------
    last_window_values : numpy ndarray
        Numpy array with values of `last_window`.

    last_window_index : pandas Index
        Index of `last_window` modified according to the rules.

    """

    if isinstance(last_window.index, pd.DatetimeIndex) and last_window.index.freq is not None:
        last_window_index = last_window.index
    elif isinstance(last_window.index, pd.RangeIndex):
        last_window_index = last_window.index
    elif isinstance(last_window.index, pd.DatetimeIndex) and last_window.index.freq is None:
        warnings.warn(
            ("`last_window` has DatetimeIndex index but no frequency. "
             "Index is overwritten with a RangeIndex of step 1.")
        )
        last_window_index = pd.RangeIndex(
                                start = 0,
                                stop  = len(last_window),
                                step  = 1
                            )
    else:
        warnings.warn(
            ("`last_window` has no DatetimeIndex nor RangeIndex index. "
             "Index is overwritten with a RangeIndex.")
        )
        last_window_index = pd.RangeIndex(
                                start = 0,
                                stop  = len(last_window),
                                step  = 1
                            )

    last_window_values = last_window.to_numpy() if return_values else None

    return last_window_values, last_window_index

preprocess_exog(exog, return_values=True)

Returns values and index of series or data frame separately. Index is

overwritten according to the next rules: If index is of type DatetimeIndex and has frequency, nothing is changed. If index is of type RangeIndex, nothing is changed. If index is of type DatetimeIndex but has no frequency, a RangeIndex is created. If index is not of type DatetimeIndex, a RangeIndex is created.

Parameters:

Name Type Description Default
exog Union[pandas.core.series.Series, pandas.core.frame.DataFrame]

Exogenous variables.

required
return_values bool

If True return the values of exog as numpy ndarray. This option is intended to avoid copying data when it is not necessary.

True

Returns:

Type Description
Tuple[Optional[numpy.ndarray], pandas.core.indexes.base.Index]

Numpy array with values of exog.

Source code in skforecast/utils/utils.py
def preprocess_exog(
    exog: Union[pd.Series, pd.DataFrame],
    return_values: bool=True
) -> Tuple[Union[None, np.ndarray], pd.Index]:
    """
    Returns values and index of series or data frame separately. Index is
    overwritten  according to the next rules:
        If index is of type DatetimeIndex and has frequency, nothing is 
        changed.
        If index is of type RangeIndex, nothing is changed.
        If index is of type DatetimeIndex but has no frequency, a 
        RangeIndex is created.
        If index is not of type DatetimeIndex, a RangeIndex is created.

    Parameters
    ----------        
    exog : pandas Series, pandas DataFrame
        Exogenous variables.

    return_values : bool, default `True`
        If `True` return the values of `exog` as numpy ndarray. This option is 
        intended to avoid copying data when it is not necessary.

    Returns 
    -------
    exog_values : None, numpy ndarray
        Numpy array with values of `exog`.

    exog_index : pandas Index
        Index of `exog` modified according to the rules.

    """

    if isinstance(exog.index, pd.DatetimeIndex) and exog.index.freq is not None:
        exog_index = exog.index
    elif isinstance(exog.index, pd.RangeIndex):
        exog_index = exog.index
    elif isinstance(exog.index, pd.DatetimeIndex) and exog.index.freq is None:
        warnings.warn(
            ("`exog` has DatetimeIndex index but no frequency. "
             "Index is overwritten with a RangeIndex of step 1.")
        )
        exog_index = pd.RangeIndex(
                         start = 0,
                         stop  = len(exog),
                         step  = 1
                     )

    else:
        warnings.warn(
            ("`exog` has no DatetimeIndex nor RangeIndex index. "
             "Index is overwritten with a RangeIndex.")
        )
        exog_index = pd.RangeIndex(
                         start = 0,
                         stop  = len(exog),
                         step  = 1
                     )

    exog_values = exog.to_numpy() if return_values else None

    return exog_values, exog_index

cast_exog_dtypes(exog, exog_dtypes)

Cast exog to a specified types.

If exog is a pandas Series, exog_dtypes must be a dict with a single value. If exog_dtypes is category but the current type of exog is float, then the type is cast to int and then to category. This is done because, for a forecaster to accept a categorical exog, it must contain only integer values. Due to the internal modifications of numpy, the values may be casted to float, so they have to be re-converted to int.

Parameters:

Name Type Description Default
exog Union[pandas.core.series.Series, pandas.core.frame.DataFrame]

Exogenous variables.

required
exog_dtypes dict

Dictionary with name and type of the series or data frame columns.

required

Returns:

Type Description
Union[pandas.core.series.Series, pandas.core.frame.DataFrame]
Source code in skforecast/utils/utils.py
def cast_exog_dtypes(
    exog: Union[pd.Series, pd.DataFrame],
    exog_dtypes: dict,
) -> Union[pd.Series, pd.DataFrame]: # pragma: no cover
    """
    Cast `exog` to a specified types.
    If `exog` is a pandas Series, `exog_dtypes` must be a dict with a single value.
    If `exog_dtypes` is `category` but the current type of `exog` is `float`, then
    the type is cast to `int` and then to `category`. This is done because, for
    a forecaster to accept a categorical exog, it must contain only integer values.
    Due to the internal modifications of numpy, the values may be casted to `float`,
    so they have to be re-converted to `int`.

    Parameters
    ----------
    exog : pandas Series, pandas DataFrame
        Exogenous variables.

    exog_dtypes: dict
        Dictionary with name and type of the series or data frame columns.

    Returns 
    -------
    exog

    """

    # Remove keys from exog_dtypes not in exog.columns
    exog_dtypes = {k:v for k, v in exog_dtypes.items() if k in exog.columns}

    if isinstance(exog, pd.Series) and exog.dtypes != list(exog_dtypes.values())[0]:
        exog = exog.astype(list(exog_dtypes.values())[0])
    elif isinstance(exog, pd.DataFrame):
        for col, initial_dtype in exog_dtypes.items():
            if exog[col].dtypes != initial_dtype:
                if initial_dtype == "category" and exog[col].dtypes==float:
                    exog[col] = exog[col].astype(int).astype("category")
                else:
                    exog[col] = exog[col].astype(initial_dtype)

    return exog

exog_to_direct(exog, steps)

Transforms exog to a pandas DataFrame with the shape needed for Direct

forecasting.

Parameters:

Name Type Description Default
exog Union[pandas.core.series.Series, pandas.core.frame.DataFrame]

Exogenous variables.

required
steps int

Number of steps that will be predicted using exog.

required

Returns:

Type Description
DataFrame

Exogenous variables transformed.

Source code in skforecast/utils/utils.py
def exog_to_direct(
    exog: Union[pd.Series, pd.DataFrame],
    steps: int
)-> pd.DataFrame:
    """
    Transforms `exog` to a pandas DataFrame with the shape needed for Direct
    forecasting.

    Parameters
    ----------
    exog : pandas Series, pandas DataFrame
        Exogenous variables.

    steps : int.
        Number of steps that will be predicted using exog.

    Returns 
    -------
    exog_transformed : pandas DataFrame
        Exogenous variables transformed.

    """

    if not isinstance(exog, (pd.Series, pd.DataFrame)):
        raise TypeError(f"`exog` must be a pandas Series or DataFrame. Got {type(exog)}.")

    if isinstance(exog, pd.Series):
        exog = exog.to_frame()

    len_columns = len(exog)
    exog_idx = exog.index
    exog_transformed = []
    for column in exog.columns:

        exog_column_transformed = [
            (exog[column].iloc[i : len_columns - (steps - 1 - i)]).reset_index(drop=True)
            for i in range(steps)
        ]
        exog_column_transformed = pd.concat(exog_column_transformed, axis=1)
        exog_column_transformed.columns = [f"{column}_step_{i+1}" for i in range(steps)]

        exog_transformed.append(exog_column_transformed)

    if len(exog_transformed) > 1:
        exog_transformed = pd.concat(exog_transformed, axis=1)
    else:
        exog_transformed = exog_column_transformed

    exog_transformed.index = exog_idx[-len(exog_transformed):]

    return exog_transformed

exog_to_direct_numpy(exog, steps)

Transforms exog to np.ndarray with the shape needed for direct

forecasting.

Parameters:

Name Type Description Default
exog ndarray

Exogenous variables.

required
steps int

Number of steps that will be predicted using exog.

required

Returns:

Type Description
ndarray

Exogenous variables transformed.

Source code in skforecast/utils/utils.py
def exog_to_direct_numpy(
    exog: np.ndarray,
    steps: int
)-> np.ndarray:
    """
    Transforms `exog` to `np.ndarray` with the shape needed for direct
    forecasting.

    Parameters
    ----------        
    exog : numpy ndarray, shape(samples,)
        Exogenous variables.

    steps : int.
        Number of steps that will be predicted using exog.

    Returns 
    -------
    exog_transformed : numpy ndarray
        Exogenous variables transformed.

    """

    exog_transformed = []

    if exog.ndim == 1:
        exog = np.expand_dims(exog, axis=1)

    for i in range(exog.shape[1]):
        exog_column = exog[:, i]
        exog_column_transformed = np.vstack(
            [np.roll(exog_column, j) for j in range(steps)]
        ).T[steps - 1:]
        exog_column_transformed = exog_column_transformed[:, ::-1]
        exog_transformed.append(exog_column_transformed)

    if len(exog_transformed) > 1:
        exog_transformed = np.concatenate(exog_transformed, axis=1)
    else:
        exog_transformed = exog_column_transformed

    return exog_transformed

expand_index(index, steps)

Create a new index of length steps starting at the end of the index.

Parameters:

Name Type Description Default
index Optional[pandas.core.indexes.base.Index]

Index of last window.

required
steps int

Number of steps to expand.

required

Returns:

Type Description
Index

New index.

Source code in skforecast/utils/utils.py
def expand_index(
    index: Union[pd.Index, None], 
    steps: int
) -> pd.Index:
    """
    Create a new index of length `steps` starting at the end of the index.

    Parameters
    ----------        
    index : pd.Index, None
        Index of last window.

    steps : int
        Number of steps to expand.

    Returns 
    -------
    new_index : pd.Index
        New index.

    """

    if isinstance(index, pd.Index):

        if isinstance(index, pd.DatetimeIndex):
            new_index = pd.date_range(
                            index[-1] + index.freq,
                            periods = steps,
                            freq    = index.freq
                        )
        elif isinstance(index, pd.RangeIndex):
            new_index = pd.RangeIndex(
                            start = index[-1] + 1,
                            stop  = index[-1] + 1 + steps
                        )
    else: 
        new_index = pd.RangeIndex(
                        start = 0,
                        stop  = steps
                    )

    return new_index

transform_series(series, transformer, fit=False, inverse_transform=False)

Transform raw values of pandas Series with a scikit-learn alike transformer

(preprocessor). The transformer used must have the following methods: fit, transform, fit_transform and inverse_transform. ColumnTransformers are not allowed since they do not have inverse_transform method.

Parameters:

Name Type Description Default
series Series

Series to be transformed.

required
transformer scikit-learn alike transformer (preprocessor).

scikit-learn alike transformer (preprocessor) with methods: fit, transform, fit_transform and inverse_transform. ColumnTransformers are not allowed since they do not have inverse_transform method.

required
fit bool

Train the transformer before applying it.

False
inverse_transform bool

Transform back the data to the original representation.

False

Returns:

Type Description
Union[pandas.core.series.Series, pandas.core.frame.DataFrame]

Transformed Series. Depending on the transformer used, the output may be a Series or a DataFrame.

Source code in skforecast/utils/utils.py
def transform_series(
    series: pd.Series,
    transformer,
    fit: bool=False,
    inverse_transform: bool=False
) -> Union[pd.Series, pd.DataFrame]:
    """      
    Transform raw values of pandas Series with a scikit-learn alike transformer
    (preprocessor). The transformer used must have the following methods: fit, transform,
    fit_transform and inverse_transform. ColumnTransformers are not allowed since they
    do not have inverse_transform method.

    Parameters
    ----------
    series : pandas Series
        Series to be transformed.

    transformer : scikit-learn alike transformer (preprocessor).
        scikit-learn alike transformer (preprocessor) with methods: fit, transform,
        fit_transform and inverse_transform. ColumnTransformers are not allowed 
        since they do not have inverse_transform method.

    fit : bool, default `False`
        Train the transformer before applying it.

    inverse_transform : bool, default `False`
        Transform back the data to the original representation.

    Returns
    -------
    series_transformed : pandas Series, pandas DataFrame
        Transformed Series. Depending on the transformer used, the output may 
        be a Series or a DataFrame.

    """

    if not isinstance(series, pd.Series):
        raise TypeError(
            (f"`series` argument must be a pandas Series. Got {type(series)}.")
        )

    if transformer is None:
        return series

    if series.name is None:
        series.name = 'no_name'

    data = series.to_frame()

    if fit and hasattr(transformer, 'fit'):
        transformer.fit(data)

    # If argument feature_names_in_ exits, is overwritten to allow using the 
    # transformer on other series than those that were passed during fit.
    if hasattr(transformer, 'feature_names_in_') and transformer.feature_names_in_[0] != data.columns[0]:
        transformer = deepcopy(transformer)
        transformer.feature_names_in_ = np.array([data.columns[0]], dtype=object)

    if inverse_transform:
        values_transformed = transformer.inverse_transform(data)
    else:
        values_transformed = transformer.transform(data)   

    if hasattr(values_transformed, 'toarray'):
        # If the returned values are in sparse matrix format, it is converted to dense array.
        values_transformed = values_transformed.toarray()

    if isinstance(values_transformed, np.ndarray) and values_transformed.shape[1] == 1:
        series_transformed = pd.Series(
                                 data  = values_transformed.flatten(),
                                 index = data.index,
                                 name  = data.columns[0]
                             )
    elif isinstance(values_transformed, pd.DataFrame) and values_transformed.shape[1] == 1:
        series_transformed = values_transformed.squeeze()
    else:
        series_transformed = pd.DataFrame(
                                 data    = values_transformed,
                                 index   = data.index,
                                 columns = transformer.get_feature_names_out()
                             )

    return series_transformed

transform_dataframe(df, transformer, fit=False, inverse_transform=False)

Transform raw values of pandas DataFrame with a scikit-learn alike

transformer, preprocessor or ColumnTransformer. inverse_transform is not available when using ColumnTransformers.

Parameters:

Name Type Description Default
df DataFrame

Pandas DataFrame to be transformed.

required
transformer scikit-learn alike transformer, preprocessor or ColumnTransformer.

scikit-learn alike transformer, preprocessor or ColumnTransformer.

required
fit bool

Train the transformer before applying it.

False
inverse_transform bool

Transform back the data to the original representation. This is not available when using transformers of class scikit-learn ColumnTransformers.

False

Returns:

Type Description
DataFrame

Transformed DataFrame.

Source code in skforecast/utils/utils.py
def transform_dataframe(
    df: pd.DataFrame,
    transformer,
    fit: bool=False,
    inverse_transform: bool=False
) -> pd.DataFrame:
    """      
    Transform raw values of pandas DataFrame with a scikit-learn alike
    transformer, preprocessor or ColumnTransformer. `inverse_transform` is not 
    available when using ColumnTransformers.

    Parameters
    ----------
    df : pandas DataFrame
        Pandas DataFrame to be transformed.

    transformer : scikit-learn alike transformer, preprocessor or ColumnTransformer.
        scikit-learn alike transformer, preprocessor or ColumnTransformer.

    fit : bool, default `False`
        Train the transformer before applying it.

    inverse_transform : bool, default `False`
        Transform back the data to the original representation. This is not available
        when using transformers of class scikit-learn ColumnTransformers.

    Returns
    -------
    df_transformed : pandas DataFrame
        Transformed DataFrame.

    """

    if not isinstance(df, pd.DataFrame):
        raise TypeError(
            f"`df` argument must be a pandas DataFrame. Got {type(df)}"
        )

    if transformer is None:
        return df

    if inverse_transform and isinstance(transformer, ColumnTransformer):
        raise Exception(
            "`inverse_transform` is not available when using ColumnTransformers."
        )

    if not inverse_transform:
        if fit:
            values_transformed = transformer.fit_transform(df)
        else:
            values_transformed = transformer.transform(df)
    else:
        values_transformed = transformer.inverse_transform(df)

    if hasattr(values_transformed, 'toarray'):
        # If the returned values are in sparse matrix format, it is converted to dense
        values_transformed = values_transformed.toarray()

    if hasattr(transformer, 'get_feature_names_out'):
        feature_names_out = transformer.get_feature_names_out()
    elif hasattr(transformer, 'categories_'):   
        feature_names_out = transformer.categories_
    else:
        feature_names_out = df.columns

    df_transformed = pd.DataFrame(
                         data    = values_transformed,
                         index   = df.index,
                         columns = feature_names_out
                     )

    return df_transformed

check_optional_dependency(package_name)

Check if an optional dependency is installed, if not raise an ImportError

with installation instructions.

Parameters:

Name Type Description Default
package_name str

Name of the package to check.

required
Source code in skforecast/utils/utils.py
def check_optional_dependency(
    package_name: str
) -> None:
    """
    Check if an optional dependency is installed, if not raise an ImportError  
    with installation instructions.

    Parameters
    ----------
    package_name : str
        Name of the package to check.

    """

    if importlib.util.find_spec(package_name) is None:
        try:
            extra, package_version = _find_optional_dependency(package_name=package_name)
            msg = (
                f"\n'{package_name}' is an optional dependency not included in the default "
                f"skforecast installation. Please run: `pip install \"{package_version}\"` to install it."
                f"\n\nAlternately, you can install it by running `pip install skforecast[{extra}]`"
            )
        except:
            msg = f"\n'{package_name}' is needed but not installed. Please install it."

        raise ImportError(msg)

multivariate_time_series_corr(time_series, other, lags, method='pearson')

Compute correlation between a time_series and the lagged values of other

time series.

Parameters:

Name Type Description Default
time_series Series

Target time series.

required
other DataFrame

Time series whose lagged values are correlated to time_series.

required
lags Union[int, list, <built-in function array>]

Lags to be included in the correlation analysis.

required
method str
  • pearson : standard correlation coefficient.
  • kendall : Kendall Tau correlation coefficient.
  • spearman : Spearman rank correlation.
'pearson'

Returns:

Type Description
DataFrame

Correlation values.

Source code in skforecast/utils/utils.py
def multivariate_time_series_corr(
    time_series: pd.Series,
    other: pd.DataFrame,
    lags: Union[int, list, np.array],
    method: str='pearson'
)-> pd.DataFrame:
    """
    Compute correlation between a time_series and the lagged values of other 
    time series. 

    Parameters
    ----------
    time_series : pandas Series
        Target time series.

    other : pandas DataFrame
        Time series whose lagged values are correlated to `time_series`.

    lags : Union[int, list, numpy ndarray]
        Lags to be included in the correlation analysis.

    method : str, default 'pearson'
        - pearson : standard correlation coefficient.
        - kendall : Kendall Tau correlation coefficient.
        - spearman : Spearman rank correlation.

    Returns
    -------
    corr : pandas DataFrame
        Correlation values.

    """

    if not len(time_series) == len(other):
        raise ValueError("`time_series` and `other` must have the same length.")

    if not (time_series.index == other.index).all():
        raise ValueError("`time_series` and `other` must have the same index.")

    if isinstance(lags, int):
        lags = range(lags)

    corr = {}
    for col in other.columns:
        lag_values = {}
        for lag in lags:
            lag_values[lag] = other[col].shift(lag)

        lag_values = pd.DataFrame(lag_values)
        lag_values.insert(0, None, time_series)
        corr[col] = lag_values.corr(method=method).iloc[1:, 0]

    corr = pd.DataFrame(corr)
    corr.index = corr.index.astype('int64')
    corr.index.name = "lag"

    return corr