`utils`¶

`save_forecaster(forecaster, file_name, verbose=True)` ¶

Save forecaster model using joblib.

Parameters:

Name	Type	Description	Default
`forecaster`	`forecaster object from skforecast library.`	Model created with skforecast library.	required
`file_name`	`str`	File name given to the object.	required
`verbose`	`bool`	Print info about the forecaster saved	`True`

Source code in skforecast/utils/utils.py

def save_forecaster(
    forecaster, 
    file_name: str, 
    verbose: bool=True
) -> None:
    """
    Save forecaster model using joblib.

    Parameters
    ----------
    forecaster: forecaster object from skforecast library.
        Model created with skforecast library.

    file_name: str
        File name given to the object.

    verbose: bool, default `True`
        Print info about the forecaster saved

    Returns 
    -------
    None

    """

    joblib.dump(forecaster, filename=file_name)

    if verbose:
        forecaster.summary()

`load_forecaster(file_name, verbose=True)` ¶

Load forecaster model from disc using joblib.

Parameters:

Name	Type	Description	Default
`forecaster`	`forecaster object from skforecast library.`	Forecaster created with skforecast library.	required
`file_name`	`str`	File name given to the object.	required
`verbose`	`bool`	Print summary about the forecaster loaded.	`True`

Returns:

Type	Description
`object`	Forecaster created with skforecast library.

Source code in skforecast/utils/utils.py

def load_forecaster(
    file_name: str,
    verbose: bool=True
) -> object:
    """
    Load forecaster model from disc using joblib.

    Parameters
    ----------
    forecaster: forecaster object from skforecast library.
        Forecaster created with skforecast library.

    file_name: str
        File name given to the object.

    verbose: bool, default `True`
        Print summary about the forecaster loaded.

    Returns 
    -------
    Forecaster
        Forecaster created with skforecast library.

    """

    forecaster = joblib.load(filename=file_name)

    if verbose:
        forecaster.summary()

    return forecaster

`initialize_lags(forecaster_type, lags)` ¶

Check lags argument input and generate the corresponding numpy ndarray.

Parameters:

Name	Type	Description	Default
`forecaster_type`	`str`	Forcaster type. ForecasterAutoreg, ForecasterAutoregCustom, ForecasterAutoregDirect, ForecasterAutoregMultiSeries, ForecasterAutoregMultiVariate.	required
`lags`	`Any`	Lags used as predictors.	required
`Returns`	`None`		required
`----------`	`None`		required
`lags`	`Any`	Lags used as predictors.	required

Source code in skforecast/utils/utils.py

def initialize_lags(
    forecaster_type: str,
    lags: Any
) -> np.ndarray:
    """
    Check lags argument input and generate the corresponding numpy ndarray.

    Parameters
    ----------
    forecaster_type : str
        Forcaster type. ForecasterAutoreg, ForecasterAutoregCustom, 
        ForecasterAutoregDirect, ForecasterAutoregMultiSeries, 
        ForecasterAutoregMultiVariate.

    lags : Any
        Lags used as predictors.

    Returns
    ----------
    lags : numpy ndarray
        Lags used as predictors.

    """

    if isinstance(lags, int) and lags < 1:
        raise ValueError('Minimum value of lags allowed is 1.')

    if isinstance(lags, (list, np.ndarray)):
        for lag in lags:
            if not isinstance(lag, (int, np.int64, np.int32)):
                raise TypeError('All values in `lags` must be int.')

    if isinstance(lags, (list, range, np.ndarray)) and min(lags) < 1:
        raise ValueError('Minimum value of lags allowed is 1.')

    if isinstance(lags, int):
        lags = np.arange(lags) + 1
    elif isinstance(lags, (list, range)):
        lags = np.array(lags)
    elif isinstance(lags, np.ndarray):
        lags = lags
    else:
        if not forecaster_type == 'ForecasterAutoregMultiVariate':
            raise TypeError(
                '`lags` argument must be an int, 1d numpy ndarray, range or list. '
                f"Got {type(lags)}."
            )
        else:
            raise TypeError(
                '`lags` argument must be a dict, int, 1d numpy ndarray, range or list. '
                f"Got {type(lags)}."
            )

    return lags

`check_y(y)` ¶

Raise Exception if y is not pandas Series or if it has missing values.

Parameters:

Name	Type	Description	Default
`y`	`Any`	Time series values.	required
`Returns`	`None`		required
`----------`	`None`		required
`None`	`None`		required

Source code in skforecast/utils/utils.py

def check_y(
    y: Any
) -> None:
    """
    Raise Exception if `y` is not pandas Series or if it has missing values.

    Parameters
    ----------        
    y : Any
        Time series values.

    Returns
    ----------
    None

    """

    if not isinstance(y, pd.Series):
        raise TypeError('`y` must be a pandas Series.')

    if y.isnull().any():
        raise ValueError('`y` has missing values.')

    return

`check_exog(exog)` ¶

Raise Exception if exog is not pandas Series or pandas DataFrame, or

if it has missing values.

Parameters:

Name	Type	Description	Default
`exog`	`Any`	Exogenous variable/s included as predictor/s.	required
`Returns`	`None`		required
`----------`	`None`		required
`None`	`None`		required

Source code in skforecast/utils/utils.py

def check_exog(
    exog: Any
) -> None:
    """
    Raise Exception if `exog` is not pandas Series or pandas DataFrame, or
    if it has missing values.

    Parameters
    ----------        
    exog :  Any
        Exogenous variable/s included as predictor/s.

    Returns
    ----------
    None

    """

    if not isinstance(exog, (pd.Series, pd.DataFrame)):
        raise TypeError('`exog` must be `pd.Series` or `pd.DataFrame`.')

    if exog.isnull().any().any():
        raise ValueError('`exog` has missing values.')

    return

`check_predict_input(forecaster_type, steps, fitted, included_exog, index_type, index_freq, window_size, last_window=None, exog=None, exog_type=None, exog_col_names=None, interval=None, max_steps=None, levels=None, series_col_names=None)` ¶

Check all inputs of predict method. This is a helper function to validate

that inputs used in predict method match attributes of a forecaster already trained.

Parameters:

Name	Type	Description	Default
`forecaster_type`	`str`	Forcaster type. ForecasterAutoreg, ForecasterAutoregCustom, ForecasterAutoregDirect, ForecasterAutoregMultiSeries, ForecasterAutoregMultiVariate.	required
`steps`	`int`	Number of future steps predicted.	required
`fitted`	`bool`	Tag to identify if the regressor has been fitted (trained).	required
`included_exog`	`bool`	If the forecaster has been trained using exogenous variable/s.	required
`index_type`	`type`	Type of index of the input used in training.	required
`index_freq`	`str`	Frequency of Index of the input used in training.	required
`window_size`	`int`	Size of the window needed to create the predictors. It is equal to `max_lag`.	required
`last_window`	`Union[pandas.core.series.Series, pandas.core.frame.DataFrame]`	Values of the series used to create the predictors (lags) need in the first iteration of prediction (t + 1).	`None`
`exog`	`Union[pandas.core.series.Series, pandas.core.frame.DataFrame]`	Exogenous variable/s included as predictor/s.	`None`
`exog_type`	`Optional[type]`	Type of exogenous variable/s used in training.	`None`
`exog_col_names`	`Optional[list]`	Names of columns of `exog` if `exog` used in training was a pandas DataFrame.	`None`
`interval`	`list`	Confidence of the prediction interval estimated. Sequence of percentiles to compute, which must be between 0 and 100 inclusive. For example, interval of 95% should be as `interval = [2.5, 97.5]`.	`None`
`max_steps`	`int`	Maximum number of steps allowed (`ForecasterAutoregDirect` and `ForecasterAutoregMultiVariate`).	`None`
`levels`	`Union[str, list]`	Time series to be predicted (`ForecasterAutoregMultiSeries`).	`None`
`series_col_names`	`list`	Names of the columns used during fit (`ForecasterAutoregMultiSeries` and `ForecasterAutoregMultiVariate`).	`None`

Source code in skforecast/utils/utils.py

def check_predict_input(
    forecaster_type: str,
    steps: int,
    fitted: bool,
    included_exog: bool,
    index_type: type,
    index_freq: str,
    window_size: int,
    last_window: Union[pd.Series, pd.DataFrame]=None,
    exog: Union[pd.Series, pd.DataFrame]=None,
    exog_type: Union[type, None]=None,
    exog_col_names: Union[list, None]=None,
    interval: list=None,
    max_steps: int=None,
    levels: Optional[Union[str, list]]=None,
    series_col_names: list=None
) -> None:
    """
    Check all inputs of predict method. This is a helper function to validate
    that inputs used in predict method match attributes of a forecaster already
    trained.

    Parameters
    ----------
    forecaster_type : str
        Forcaster type. ForecasterAutoreg, ForecasterAutoregCustom, 
        ForecasterAutoregDirect, ForecasterAutoregMultiSeries, 
        ForecasterAutoregMultiVariate.

    steps : int
        Number of future steps predicted.

    fitted: Bool
        Tag to identify if the regressor has been fitted (trained).

    included_exog : bool
        If the forecaster has been trained using exogenous variable/s.

    index_type : type
        Type of index of the input used in training.

    index_freq : str
        Frequency of Index of the input used in training.

    window_size: int
        Size of the window needed to create the predictors. It is equal to
        `max_lag`.

    last_window : pandas Series, pandas DataFrame, default `None`
        Values of the series used to create the predictors (lags) need in the 
        first iteration of prediction (t + 1).

    exog : pandas Series, pandas DataFrame, default `None`
        Exogenous variable/s included as predictor/s.

    exog_type : type, default `None`
        Type of exogenous variable/s used in training.

    exog_col_names : list, default `None`
        Names of columns of `exog` if `exog` used in training was a pandas
        DataFrame.

    interval : list, default `None`
        Confidence of the prediction interval estimated. Sequence of percentiles
        to compute, which must be between 0 and 100 inclusive. For example, 
        interval of 95% should be as `interval = [2.5, 97.5]`.

    max_steps: int, default `None`
        Maximum number of steps allowed (`ForecasterAutoregDirect` and 
        `ForecasterAutoregMultiVariate`).

    levels : str, list, default `None`
        Time series to be predicted (`ForecasterAutoregMultiSeries`).

    series_col_names : list, default `None`
        Names of the columns used during fit (`ForecasterAutoregMultiSeries` and 
        `ForecasterAutoregMultiVariate`).

    """

    if not fitted:
        raise sklearn.exceptions.NotFittedError(
            ('This Forecaster instance is not fitted yet. Call `fit` with '
             'appropriate arguments before using predict.')
        )

    if isinstance(steps, int) and steps < 1:
        raise ValueError(
            f'`steps` must be an integer greater than or equal to 1. Got {steps}.'
        )

    if isinstance(steps, list) and min(steps) < 0:
        raise ValueError(
           (f"The minimum value of `steps` must be equal to or greater than 1. "
            f"Got {min(steps) + 1}.")
        )

    if max_steps is not None:
        if max(steps)+1 > max_steps:
            raise ValueError(
                (f"The maximum value of `steps` must be less than or equal to "
                 f"the value of steps defined when initializing the forecaster. "
                 f"Got {max(steps)+1}, but the maximum is {max_steps}.")
            )

    if interval is not None:
        _check_interval(interval = interval)

    if forecaster_type == 'ForecasterAutoregMultiSeries':
        if levels is not None and not isinstance(levels, (str, list)):
            raise TypeError(
                f'`levels` must be a `list` of column names, a `str` of a column name or `None`.'
            )

        if len(set(levels) - set(series_col_names)) != 0:
            raise ValueError(
                f'`levels` must be in `series_col_names` : {series_col_names}.'
            )

    if exog is None and included_exog:
        raise ValueError(
            ('Forecaster trained with exogenous variable/s. '
             'Same variable/s must be provided in `predict()`.')
        )

    if exog is not None and not included_exog:
        raise ValueError(
            ('Forecaster trained without exogenous variable/s. '
             '`exog` must be `None` in `predict()`.')
        )

    if exog is not None:
        max_step = max(steps)+1 if isinstance(steps, list) else steps
        if len(exog) < max_step:
            raise ValueError(
                f'`exog` must have at least as many values as the distance to '
                f'the maximum step predicted, {max_step}.'
            )
        if not isinstance(exog, (pd.Series, pd.DataFrame)):
            raise TypeError('`exog` must be a pandas Series or DataFrame.')
        if exog.isnull().values.any():
            raise ValueError('`exog` has missing values.')
        if not isinstance(exog, exog_type):
            raise TypeError(
                f'Expected type for `exog`: {exog_type}. Got {type(exog)}.'     
            )
        if isinstance(exog, pd.DataFrame):
            col_missing = set(exog_col_names).difference(set(exog.columns))
            if col_missing:
                raise ValueError(
                    (f'Missing columns in `exog`. Expected {exog_col_names}. '
                     f'Got {exog.columns.to_list()}.') 
                )
        check_exog(exog = exog)
        _, exog_index = preprocess_exog(exog=exog.iloc[:0, ])

        if not isinstance(exog_index, index_type):
            raise TypeError(
                (f'Expected index of type {index_type} for `exog`. '
                 f'Got {type(exog_index)}.')
            )

        if isinstance(exog_index, pd.DatetimeIndex):
            if not exog_index.freqstr == index_freq:
                raise TypeError(
                    (f'Expected frequency of type {index_freq} for `exog`. '
                     f'Got {exog_index.freqstr}.')
                )

    if last_window is not None:
        if len(last_window) < window_size:
            raise ValueError(
                (f'`last_window` must have as many values as as needed to '
                 f'calculate the predictors. For this forecaster it is {window_size}.')
            )

        if forecaster_type in ['ForecasterAutoregMultiSeries', 'ForecasterAutoregMultiVariate']:
            if not isinstance(last_window, pd.DataFrame):
                raise TypeError(
                    f'`last_window` must be a pandas DataFrame. Got {type(last_window)}.'
                )

            if forecaster_type == 'ForecasterAutoregMultiSeries' and \
               len(set(levels) - set(last_window.columns)) != 0:
                raise ValueError(
                    (f'`last_window` must contain a column(s) named as the level(s) to be predicted.\n'
                     f'    `levels` : {levels}.\n'
                     f'    `last_window` columns : {list(last_window.columns)}.')
                )

            if forecaster_type == 'ForecasterAutoregMultiVariate' and \
               (series_col_names != list(last_window.columns)):
                raise ValueError(
                    (f'`last_window` columns must be the same as `series` column names.\n'
                     f'    `last_window` columns : {list(last_window.columns)}.\n'
                     f'    `series` columns      : {series_col_names}.')
                )

        else:    
            if not isinstance(last_window, pd.Series):
                raise TypeError('`last_window` must be a pandas Series.')

        if last_window.isnull().any().all():
            raise ValueError('`last_window` has missing values.')
        _, last_window_index = preprocess_last_window(
                                    last_window = last_window.iloc[:0]
                                ) 
        if not isinstance(last_window_index, index_type):
            raise TypeError(
                f'Expected index of type {index_type} for `last_window`. '
                f'Got {type(last_window_index)}.'
            )
        if isinstance(last_window_index, pd.DatetimeIndex):
            if not last_window_index.freqstr == index_freq:
                raise TypeError(
                    f'Expected frequency of type {index_freq} for `last_window`. '
                    f'Got {last_window_index.freqstr}.'
                )

    return

`preprocess_y(y)` ¶

Returns values and index of series separately. Index is overwritten

according to the next rules: If index is of type DatetimeIndex and has frequency, nothing is changed. If index is of type RangeIndex, nothing is changed. If index is of type DatetimeIndex but has no frequency, a RangeIndex is created. If index is not of type DatetimeIndex, a RangeIndex is created.

Parameters:

Name	Type	Description	Default
`y`	`Series`	Time series.	required

Returns:

Type	Description
`Tuple[numpy.ndarray, pandas.core.indexes.base.Index]`	Numpy array with values of `y`.

Source code in skforecast/utils/utils.py

def preprocess_y(
    y: pd.Series
) -> Tuple[np.ndarray, pd.Index]:
    """
    Returns values and index of series separately. Index is overwritten 
    according to the next rules:
        If index is of type DatetimeIndex and has frequency, nothing is 
        changed.
        If index is of type RangeIndex, nothing is changed.
        If index is of type DatetimeIndex but has no frequency, a 
        RangeIndex is created.
        If index is not of type DatetimeIndex, a RangeIndex is created.

    Parameters
    ----------        
    y : pandas Series
        Time series.

    Returns 
    -------
    y_values : numpy ndarray
        Numpy array with values of `y`.

    y_index : pandas Index
        Index of `y` modified according to the rules.

    """

    if isinstance(y.index, pd.DatetimeIndex) and y.index.freq is not None:
        y_index = y.index
    elif isinstance(y.index, pd.RangeIndex):
        y_index = y.index
    elif isinstance(y.index, pd.DatetimeIndex) and y.index.freq is None:
        warnings.warn(
            '`y` has DatetimeIndex index but no frequency. '
            'Index is overwritten with a RangeIndex of step 1.'
        )
        y_index = pd.RangeIndex(
                      start = 0,
                      stop  = len(y),
                      step  = 1
                  )
    else:
        warnings.warn(
            '`y` has no DatetimeIndex nor RangeIndex index. Index is overwritten with a RangeIndex.'
        )
        y_index = pd.RangeIndex(
                      start = 0,
                      stop  = len(y),
                      step  = 1
                  )

    y_values = y.to_numpy()

    return y_values, y_index

`preprocess_last_window(last_window)` ¶

Returns values and index of series separately. Index is overwritten

according to the next rules: If index is of type DatetimeIndex and has frequency, nothing is changed. If index is of type RangeIndex, nothing is changed. If index is of type DatetimeIndex but has no frequency, a RangeIndex is created. If index is not of type DatetimeIndex, a RangeIndex is created.

Parameters:

Name	Type	Description	Default
`last_window`	`Union[pandas.core.series.Series, pandas.core.frame.DataFrame]`	Time series values.	required

Returns:

Type	Description
`Tuple[numpy.ndarray, pandas.core.indexes.base.Index]`	Numpy array with values of `last_window`.

Source code in skforecast/utils/utils.py

def preprocess_last_window(
    last_window:Union[pd.Series, pd.DataFrame]
 ) -> Tuple[np.ndarray, pd.Index]:
    """
    Returns values and index of series separately. Index is overwritten 
    according to the next rules:
        If index is of type DatetimeIndex and has frequency, nothing is 
        changed.
        If index is of type RangeIndex, nothing is changed.
        If index is of type DatetimeIndex but has no frequency, a 
        RangeIndex is created.
        If index is not of type DatetimeIndex, a RangeIndex is created.

    Parameters
    ----------        
    last_window : pandas Series, pandas DataFrame
        Time series values.

    Returns 
    -------
    last_window_values : numpy ndarray
        Numpy array with values of `last_window`.

    last_window_index : pandas Index
        Index of `last_window` modified according to the rules.

    """

    if isinstance(last_window.index, pd.DatetimeIndex) and last_window.index.freq is not None:
        last_window_index = last_window.index
    elif isinstance(last_window.index, pd.RangeIndex):
        last_window_index = last_window.index
    elif isinstance(last_window.index, pd.DatetimeIndex) and last_window.index.freq is None:
        warnings.warn(
            '`last_window` has DatetimeIndex index but no frequency. '
            'Index is overwritten with a RangeIndex of step 1.'
        )
        last_window_index = pd.RangeIndex(
                                start = 0,
                                stop  = len(last_window),
                                step  = 1
                                )
    else:
        warnings.warn(
            '`last_window` has no DatetimeIndex nor RangeIndex index. Index is overwritten with a RangeIndex.'
        )
        last_window_index = pd.RangeIndex(
                                start = 0,
                                stop  = len(last_window),
                                step  = 1
                                )

    last_window_values = last_window.to_numpy()

    return last_window_values, last_window_index

`preprocess_exog(exog)` ¶

Returns values and index of series separately. Index is overwritten

according to the next rules: If index is of type DatetimeIndex and has frequency, nothing is changed. If index is of type RangeIndex, nothing is changed. If index is of type DatetimeIndex but has no frequency, a RangeIndex is created. If index is not of type DatetimeIndex, a RangeIndex is created.

Parameters:

Name	Type	Description	Default
`exog`	`Union[pandas.core.series.Series, pandas.core.frame.DataFrame]`	Exogenous variables.	required

Returns:

Type	Description
`Tuple[numpy.ndarray, pandas.core.indexes.base.Index]`	Numpy array with values of `exog`.

Source code in skforecast/utils/utils.py

def preprocess_exog(
    exog: Union[pd.Series, pd.DataFrame]
) -> Tuple[np.ndarray, pd.Index]:
    """
    Returns values and index of series separately. Index is overwritten 
    according to the next rules:
        If index is of type DatetimeIndex and has frequency, nothing is 
        changed.
        If index is of type RangeIndex, nothing is changed.
        If index is of type DatetimeIndex but has no frequency, a 
        RangeIndex is created.
        If index is not of type DatetimeIndex, a RangeIndex is created.

    Parameters
    ----------        
    exog : pandas Series, pandas DataFrame
        Exogenous variables.

    Returns 
    -------
    exog_values : numpy ndarray
        Numpy array with values of `exog`.

    exog_index : pandas Index
        Index of `exog` modified according to the rules.

    """

    if isinstance(exog.index, pd.DatetimeIndex) and exog.index.freq is not None:
        exog_index = exog.index
    elif isinstance(exog.index, pd.RangeIndex):
        exog_index = exog.index
    elif isinstance(exog.index, pd.DatetimeIndex) and exog.index.freq is None:
        warnings.warn(
            '`exog` has DatetimeIndex index but no frequency. '
            'Index is overwritten with a RangeIndex of step 1.'
        )
        exog_index = pd.RangeIndex(
                        start = 0,
                        stop  = len(exog),
                        step  = 1
                        )

    else:
        warnings.warn(
            '`exog` has no DatetimeIndex nor RangeIndex index. Index is overwritten with a RangeIndex.'
        )
        exog_index = pd.RangeIndex(
                        start = 0,
                        stop  = len(exog),
                        step  = 1
                        )

    exog_values = exog.to_numpy()

    return exog_values, exog_index

`exog_to_direct(exog, steps)` ¶

Transforms exog to np.ndarray with the shape needed for direct

forecasting.

Parameters:

Name	Type	Description	Default
`exog`	`ndarray`	Time series values.	required
`steps`	`int`	Number of steps that will be predicted using this exog.	required

Returns:

Type	Description
`ndarray`

Source code in skforecast/utils/utils.py

def exog_to_direct(
    exog: np.ndarray,
    steps: int
)-> np.ndarray:
    """
    Transforms `exog` to `np.ndarray` with the shape needed for direct
    forecasting.

    Parameters
    ----------        
    exog : numpy ndarray, shape(samples,)
        Time series values.

    steps : int.
        Number of steps that will be predicted using this exog.

    Returns 
    -------
    exog_transformed : numpy ndarray

    """

    exog_transformed = []

    if exog.ndim < 2:
        exog = exog.reshape(-1, 1)

    for column in range(exog.shape[1]):

        exog_column_transformed = []

        for i in range(exog.shape[0] - (steps -1)):
            exog_column_transformed.append(exog[i:i + steps, column])

        if len(exog_column_transformed) > 1:
            exog_column_transformed = np.vstack(exog_column_transformed)

        exog_transformed.append(exog_column_transformed)

    if len(exog_transformed) > 1:
        exog_transformed = np.hstack(exog_transformed)
    else:
        exog_transformed = exog_column_transformed

    return exog_transformed

`expand_index(index, steps)` ¶

Create a new index of length steps starting at the end of the index.

Parameters:

Name	Type	Description	Default
`index`	`Optional[pandas.core.indexes.base.Index]`	Index of last window.	required
`steps`	`int`	Number of steps to expand.	required

Returns:

Type	Description
`Index`

Source code in skforecast/utils/utils.py

def expand_index(
    index: Union[pd.Index, None], 
    steps: int
) -> pd.Index:
    """
    Create a new index of length `steps` starting at the end of the index.

    Parameters
    ----------        
    index : pd.Index, None
        Index of last window.
    steps : int
        Number of steps to expand.

    Returns 
    -------
    new_index : pd.Index

    """

    if isinstance(index, pd.Index):

        if isinstance(index, pd.DatetimeIndex):
            new_index = pd.date_range(
                            index[-1] + index.freq,
                            periods = steps,
                            freq    = index.freq
                        )
        elif isinstance(index, pd.RangeIndex):
            new_index = pd.RangeIndex(
                            start = index[-1] + 1,
                            stop  = index[-1] + 1 + steps
                        )
    else: 
        new_index = pd.RangeIndex(
                        start = 0,
                        stop  = steps
                     )
    return new_index

`transform_series(series, transformer, fit=False, inverse_transform=False)` ¶

Transform raw values of pandas Series with a scikit-learn alike transformer

(preprocessor). The transformer used must have the following methods: fit, transform, fit_transform and inverse_transform. ColumnTransformers are not allowed since they do not have inverse_transform method.

Parameters:

Name	Type	Description	Default
`series`	`Series`		required
`transformer`	`scikit-learn alike transformer (preprocessor).`	scikit-learn alike transformer (preprocessor) with methods: fit, transform, fit_transform and inverse_transform. ColumnTransformers are not allowed since they do not have inverse_transform method.	required
`fit`	`bool`	Train the transformer before applying it.	`False`
`inverse_transform`	`bool`	Transform back the data to the original representation.	`False`

Returns:

Type	Description
`Union[pandas.core.series.Series, pandas.core.frame.DataFrame]`	Transformed Series. Depending on the transformer used, the output may be a Series or a DataFrame.

Source code in skforecast/utils/utils.py

def transform_series(
    series: pd.Series,
    transformer,
    fit: bool=False,
    inverse_transform: bool=False
) -> Union[pd.Series, pd.DataFrame]:
    """      
    Transform raw values of pandas Series with a scikit-learn alike transformer
    (preprocessor). The transformer used must have the following methods: fit, transform,
    fit_transform and inverse_transform. ColumnTransformers are not allowed since they
    do not have inverse_transform method.

    Parameters
    ----------
    series : pandas Series

    transformer : scikit-learn alike transformer (preprocessor).
        scikit-learn alike transformer (preprocessor) with methods: fit, transform,
        fit_transform and inverse_transform. ColumnTransformers are not allowed since they
        do not have inverse_transform method.

    fit : bool, default `False`
        Train the transformer before applying it.

    inverse_transform : bool, default `False`
        Transform back the data to the original representation.

    Returns
    -------
    series_transformed : pandas Series, pandas DataFrame
        Transformed Series. Depending on the transformer used, the output may be a Series
        or a DataFrame.

    """

    if not isinstance(series, pd.Series):
        raise TypeError(
            "`series` argument must be a pandas Series."
        )

    if transformer is None:
        return series

    series = series.to_frame()

    if fit and not isinstance(transformer, FunctionTransformer):
        transformer.fit(series)

    if inverse_transform:
        values_transformed = transformer.inverse_transform(series)
    else:
        values_transformed = transformer.transform(series)   

    if hasattr(values_transformed, 'toarray'):
        # If the returned values are in sparse matrix format, it is converted to dense array.
        values_transformed = values_transformed.toarray()

    if isinstance(values_transformed, np.ndarray) and values_transformed.shape[1] == 1:
        series_transformed = pd.Series(
                                 data  = values_transformed.flatten(),
                                 index = series.index,
                                 name  = series.columns[0]
                             )
    elif isinstance(values_transformed, pd.DataFrame) and values_transformed.shape[1] == 1:
        series_transformed = values_transformed.squeeze()
    else:
        series_transformed = pd.DataFrame(
                                 data = values_transformed,
                                 index = series.index,
                                 columns = transformer.get_feature_names_out()
                             )

    return series_transformed

`transform_dataframe(df, transformer, fit=False, inverse_transform=False)` ¶

Transform raw values of pandas DataFrame with a scikit-learn alike

transformer, preprocessor or ColumnTransformer. inverse_transform is not available when using ColumnTransformers.

Parameters:

Name	Type	Description	Default
`series`	`pandas DataFrame`		required
`transformer`	`scikit-learn alike transformer, preprocessor or ColumnTransformer.`	scikit-learn alike transformer, preprocessor or ColumnTransformer.	required
`fit`	`bool`	Train the transformer before applying it.	`False`
`inverse_transform`	`bool`	Transform back the data to the original representation. This is not available when using transformers of class scikit-learn ColumnTransformers.	`False`

Returns:

Type	Description
`DataFrame`	Transformed DataFrame.

Source code in skforecast/utils/utils.py

def transform_dataframe(
    df: pd.DataFrame,
    transformer,
    fit: bool=False,
    inverse_transform: bool=False
) -> pd.DataFrame:
    """      
    Transform raw values of pandas DataFrame with a scikit-learn alike
    transformer, preprocessor or ColumnTransformer. `inverse_transform` is not available
    when using ColumnTransformers.

    Parameters
    ----------
    series : pandas DataFrame

    transformer : scikit-learn alike transformer, preprocessor or ColumnTransformer.
        scikit-learn alike transformer, preprocessor or ColumnTransformer.

    fit : bool, default `False`
        Train the transformer before applying it.

    inverse_transform : bool, default `False`
        Transform back the data to the original representation. This is not available
        when using transformers of class scikit-learn ColumnTransformers.

    Returns
    -------
    series_transformed : pandas DataFrame
        Transformed DataFrame.

    """

    if not isinstance(df, pd.DataFrame):
        raise TypeError(
            "`df` argument must be a pandas DataFrame."
        )

    if transformer is None:
        return df

    if inverse_transform and isinstance(transformer, ColumnTransformer):
        raise Exception(
            '`inverse_transform` is not available when using ColumnTransformers.'
        )

    if not inverse_transform:
        if fit:
            values_transformed = transformer.fit_transform(df)
        else:
            values_transformed = transformer.transform(df)
    else:
        values_transformed = transformer.inverse_transform(df)

    if hasattr(values_transformed, 'toarray'):
        # If the returned values are in sparse matrix format, it is converted to dense
        values_transformed = values_transformed.toarray()

    if hasattr(transformer, 'get_feature_names_out'):
        feature_names_out = transformer.get_feature_names_out()
    elif hasattr(transformer, 'categories_'):   
        feature_names_out = transformer.categories_
    else:
        feature_names_out = df.columns

    df_transformed = pd.DataFrame(
                         data = values_transformed,
                         index = df.index,
                         columns = feature_names_out
                     )

    return df_transformed

`multivariate_time_series_corr(time_series, other, lags, method='pearson')` ¶

Compute correlation between a time_series and the lagged values of other

time series.

Parameters:

Name	Type	Description	Default
`time_series`	`Series`	Target time series.	required
`other`	`DataFrame`	Time series whose lagged values are correlated to `time_series`.	required
`lags`	`Union[int, list, <built-in function array>]`	Lags to be included in the correlation analysis.	required
`method`	`str`	pearson : standard correlation coefficient. kendall : Kendall Tau correlation coefficient. spearman : Spearman rank correlation.	`'pearson'`

Returns:

Type	Description
`DataFrame`	Correlation values.

Source code in skforecast/utils/utils.py

def multivariate_time_series_corr(
    time_series: pd.Series,
    other: pd.DataFrame,
    lags: Union[int, list, np.array],
    method: str='pearson'
)-> pd.DataFrame:
    """
    Compute correlation between a time_series and the lagged values of other 
    time series. 

    Parameters
    ----------
    time_series : pandas Series
        Target time series.

    other : pandas DataFrame
        Time series whose lagged values are correlated to `time_series`.

    lags : Union[int, list, numpy ndarray]
        Lags to be included in the correlation analysis.

    method : str, default 'pearson'
        - pearson : standard correlation coefficient.
        - kendall : Kendall Tau correlation coefficient.
        - spearman : Spearman rank correlation.

    Returns
    -------
    corr : pandas DataFrame
        Correlation values.

    """

    if not len(time_series) == len(other):
        raise ValueError("`time_series` and `other` must have the same length.")

    if not (time_series.index == other.index).all():
        raise ValueError("`time_series` and `other` must have the same index.")

    if isinstance(lags, int):
        lags = range(lags)

    corr = {}
    for col in other.columns:
        lag_values = {}
        for lag in lags:
            lag_values[lag] = other[col].shift(lag)

        lag_values = pd.DataFrame(lag_values)
        lag_values.insert(0, None, time_series)
        corr[col] = lag_values.corr(method=method).iloc[1:, 0]

    corr = pd.DataFrame(corr)
    corr.index = corr.index.astype(int)
    corr.index.name="lag"

    return corr

utils¶

save_forecaster(forecaster, file_name, verbose=True) ¶

load_forecaster(file_name, verbose=True) ¶

initialize_lags(forecaster_type, lags) ¶

check_y(y) ¶

check_exog(exog) ¶

check_predict_input(forecaster_type, steps, fitted, included_exog, index_type, index_freq, window_size, last_window=None, exog=None, exog_type=None, exog_col_names=None, interval=None, max_steps=None, levels=None, series_col_names=None) ¶

preprocess_y(y) ¶

preprocess_last_window(last_window) ¶

preprocess_exog(exog) ¶

exog_to_direct(exog, steps) ¶

expand_index(index, steps) ¶

transform_series(series, transformer, fit=False, inverse_transform=False) ¶

transform_dataframe(df, transformer, fit=False, inverse_transform=False) ¶

multivariate_time_series_corr(time_series, other, lags, method='pearson') ¶

`utils`¶

`save_forecaster(forecaster, file_name, verbose=True)` ¶

`load_forecaster(file_name, verbose=True)` ¶

`initialize_lags(forecaster_type, lags)` ¶

`check_y(y)` ¶

`check_exog(exog)` ¶

`check_predict_input(forecaster_type, steps, fitted, included_exog, index_type, index_freq, window_size, last_window=None, exog=None, exog_type=None, exog_col_names=None, interval=None, max_steps=None, levels=None, series_col_names=None)` ¶

`preprocess_y(y)` ¶

`preprocess_last_window(last_window)` ¶

`preprocess_exog(exog)` ¶

`exog_to_direct(exog, steps)` ¶

`expand_index(index, steps)` ¶

`transform_series(series, transformer, fit=False, inverse_transform=False)` ¶

`transform_dataframe(df, transformer, fit=False, inverse_transform=False)` ¶

`multivariate_time_series_corr(time_series, other, lags, method='pearson')` ¶