Skip to content

utils

save_forecaster(forecaster, file_name, verbose=True)

Save forecaster model using joblib.

Parameters:

Name Type Description Default
forecaster forecaster object from skforecast library.

Model created with skforecast library.

required
file_name str

File name given to the object.

required
verbose bool

Print info about the forecaster saved

True
Source code in skforecast/utils/utils.py
def save_forecaster(
    forecaster, 
    file_name: str, 
    verbose: bool=True
) -> None:
    """
    Save forecaster model using joblib.

    Parameters
    ----------
    forecaster: forecaster object from skforecast library.
        Model created with skforecast library.

    file_name: str
        File name given to the object.

    verbose: bool, default `True`
        Print info about the forecaster saved

    Returns 
    -------
    None

    """

    joblib.dump(forecaster, filename=file_name)

    if verbose:
        forecaster.summary()

load_forecaster(file_name, verbose=True)

Load forecaster model from disc using joblib.

Parameters:

Name Type Description Default
forecaster forecaster object from skforecast library.

Forecaster created with skforecast library.

required
file_name str

File name given to the object.

required
verbose bool

Print summary about the forecaster loaded.

True

Returns:

Type Description
object

Forecaster created with skforecast library.

Source code in skforecast/utils/utils.py
def load_forecaster(
    file_name: str,
    verbose: bool=True
) -> object:
    """
    Load forecaster model from disc using joblib.

    Parameters
    ----------
    forecaster: forecaster object from skforecast library.
        Forecaster created with skforecast library.

    file_name: str
        File name given to the object.

    verbose: bool, default `True`
        Print summary about the forecaster loaded.

    Returns 
    -------
    Forecaster
        Forecaster created with skforecast library.

    """

    forecaster = joblib.load(filename=file_name)

    if verbose:
        forecaster.summary()

    return forecaster

initialize_lags(forecaster_type, lags)

Check lags argument input and generate the corresponding numpy ndarray.

Parameters:

Name Type Description Default
forecaster_type str

Forcaster type. ForecasterAutoreg, ForecasterAutoregCustom, ForecasterAutoregDirect, ForecasterAutoregMultiSeries, ForecasterAutoregMultiVariate.

required
lags Any

Lags used as predictors.

required
Returns None required
---------- None required
lags Any

Lags used as predictors.

required
Source code in skforecast/utils/utils.py
def initialize_lags(
    forecaster_type: str,
    lags: Any
) -> np.ndarray:
    """
    Check lags argument input and generate the corresponding numpy ndarray.

    Parameters
    ----------
    forecaster_type : str
        Forcaster type. ForecasterAutoreg, ForecasterAutoregCustom, 
        ForecasterAutoregDirect, ForecasterAutoregMultiSeries, 
        ForecasterAutoregMultiVariate.

    lags : Any
        Lags used as predictors.

    Returns
    ----------
    lags : numpy ndarray
        Lags used as predictors.

    """

    if isinstance(lags, int) and lags < 1:
        raise ValueError('Minimum value of lags allowed is 1.')

    if isinstance(lags, (list, np.ndarray)):
        for lag in lags:
            if not isinstance(lag, (int, np.int64, np.int32)):
                raise TypeError('All values in `lags` must be int.')

    if isinstance(lags, (list, range, np.ndarray)) and min(lags) < 1:
        raise ValueError('Minimum value of lags allowed is 1.')

    if isinstance(lags, int):
        lags = np.arange(lags) + 1
    elif isinstance(lags, (list, range)):
        lags = np.array(lags)
    elif isinstance(lags, np.ndarray):
        lags = lags
    else:
        if not forecaster_type == 'ForecasterAutoregMultiVariate':
            raise TypeError(
                '`lags` argument must be an int, 1d numpy ndarray, range or list. '
                f"Got {type(lags)}."
            )
        else:
            raise TypeError(
                '`lags` argument must be a dict, int, 1d numpy ndarray, range or list. '
                f"Got {type(lags)}."
            )

    return lags

check_y(y)

Raise Exception if y is not pandas Series or if it has missing values.

Parameters:

Name Type Description Default
y Any

Time series values.

required
Returns None required
---------- None required
None None required
Source code in skforecast/utils/utils.py
def check_y(
    y: Any
) -> None:
    """
    Raise Exception if `y` is not pandas Series or if it has missing values.

    Parameters
    ----------        
    y : Any
        Time series values.

    Returns
    ----------
    None

    """

    if not isinstance(y, pd.Series):
        raise TypeError('`y` must be a pandas Series.')

    if y.isnull().any():
        raise ValueError('`y` has missing values.')

    return

check_exog(exog)

Raise Exception if exog is not pandas Series or pandas DataFrame, or

if it has missing values.

Parameters:

Name Type Description Default
exog Any

Exogenous variable/s included as predictor/s.

required
Returns None required
---------- None required
None None required
Source code in skforecast/utils/utils.py
def check_exog(
    exog: Any
) -> None:
    """
    Raise Exception if `exog` is not pandas Series or pandas DataFrame, or
    if it has missing values.

    Parameters
    ----------        
    exog :  Any
        Exogenous variable/s included as predictor/s.

    Returns
    ----------
    None

    """

    if not isinstance(exog, (pd.Series, pd.DataFrame)):
        raise TypeError('`exog` must be `pd.Series` or `pd.DataFrame`.')

    if exog.isnull().any().any():
        raise ValueError('`exog` has missing values.')

    return

check_predict_input(forecaster_type, steps, fitted, included_exog, index_type, index_freq, window_size, last_window=None, exog=None, exog_type=None, exog_col_names=None, interval=None, max_steps=None, levels=None, series_col_names=None)

Check all inputs of predict method. This is a helper function to validate

that inputs used in predict method match attributes of a forecaster already trained.

Parameters:

Name Type Description Default
forecaster_type str

Forcaster type. ForecasterAutoreg, ForecasterAutoregCustom, ForecasterAutoregDirect, ForecasterAutoregMultiSeries, ForecasterAutoregMultiVariate.

required
steps int

Number of future steps predicted.

required
fitted bool

Tag to identify if the regressor has been fitted (trained).

required
included_exog bool

If the forecaster has been trained using exogenous variable/s.

required
index_type type

Type of index of the input used in training.

required
index_freq str

Frequency of Index of the input used in training.

required
window_size int

Size of the window needed to create the predictors. It is equal to max_lag.

required
last_window Union[pandas.core.series.Series, pandas.core.frame.DataFrame]

Values of the series used to create the predictors (lags) need in the first iteration of prediction (t + 1).

None
exog Union[pandas.core.series.Series, pandas.core.frame.DataFrame]

Exogenous variable/s included as predictor/s.

None
exog_type Optional[type]

Type of exogenous variable/s used in training.

None
exog_col_names Optional[list]

Names of columns of exog if exog used in training was a pandas DataFrame.

None
interval list

Confidence of the prediction interval estimated. Sequence of percentiles to compute, which must be between 0 and 100 inclusive. For example, interval of 95% should be as interval = [2.5, 97.5].

None
max_steps int

Maximum number of steps allowed (ForecasterAutoregDirect and ForecasterAutoregMultiVariate).

None
levels Union[str, list]

Time series to be predicted (ForecasterAutoregMultiSeries).

None
series_col_names list

Names of the columns used during fit (ForecasterAutoregMultiSeries and ForecasterAutoregMultiVariate).

None
Source code in skforecast/utils/utils.py
def check_predict_input(
    forecaster_type: str,
    steps: int,
    fitted: bool,
    included_exog: bool,
    index_type: type,
    index_freq: str,
    window_size: int,
    last_window: Union[pd.Series, pd.DataFrame]=None,
    exog: Union[pd.Series, pd.DataFrame]=None,
    exog_type: Union[type, None]=None,
    exog_col_names: Union[list, None]=None,
    interval: list=None,
    max_steps: int=None,
    levels: Optional[Union[str, list]]=None,
    series_col_names: list=None
) -> None:
    """
    Check all inputs of predict method. This is a helper function to validate
    that inputs used in predict method match attributes of a forecaster already
    trained.

    Parameters
    ----------
    forecaster_type : str
        Forcaster type. ForecasterAutoreg, ForecasterAutoregCustom, 
        ForecasterAutoregDirect, ForecasterAutoregMultiSeries, 
        ForecasterAutoregMultiVariate.

    steps : int
        Number of future steps predicted.

    fitted: Bool
        Tag to identify if the regressor has been fitted (trained).

    included_exog : bool
        If the forecaster has been trained using exogenous variable/s.

    index_type : type
        Type of index of the input used in training.

    index_freq : str
        Frequency of Index of the input used in training.

    window_size: int
        Size of the window needed to create the predictors. It is equal to
        `max_lag`.

    last_window : pandas Series, pandas DataFrame, default `None`
        Values of the series used to create the predictors (lags) need in the 
        first iteration of prediction (t + 1).

    exog : pandas Series, pandas DataFrame, default `None`
        Exogenous variable/s included as predictor/s.

    exog_type : type, default `None`
        Type of exogenous variable/s used in training.

    exog_col_names : list, default `None`
        Names of columns of `exog` if `exog` used in training was a pandas
        DataFrame.

    interval : list, default `None`
        Confidence of the prediction interval estimated. Sequence of percentiles
        to compute, which must be between 0 and 100 inclusive. For example, 
        interval of 95% should be as `interval = [2.5, 97.5]`.

    max_steps: int, default `None`
        Maximum number of steps allowed (`ForecasterAutoregDirect` and 
        `ForecasterAutoregMultiVariate`).

    levels : str, list, default `None`
        Time series to be predicted (`ForecasterAutoregMultiSeries`).

    series_col_names : list, default `None`
        Names of the columns used during fit (`ForecasterAutoregMultiSeries` and 
        `ForecasterAutoregMultiVariate`).

    """

    if not fitted:
        raise sklearn.exceptions.NotFittedError(
            ('This Forecaster instance is not fitted yet. Call `fit` with '
             'appropriate arguments before using predict.')
        )

    if isinstance(steps, int) and steps < 1:
        raise ValueError(
            f'`steps` must be an integer greater than or equal to 1. Got {steps}.'
        )

    if isinstance(steps, list) and min(steps) < 0:
        raise ValueError(
           (f"The minimum value of `steps` must be equal to or greater than 1. "
            f"Got {min(steps) + 1}.")
        )

    if max_steps is not None:
        if max(steps)+1 > max_steps:
            raise ValueError(
                (f"The maximum value of `steps` must be less than or equal to "
                 f"the value of steps defined when initializing the forecaster. "
                 f"Got {max(steps)+1}, but the maximum is {max_steps}.")
            )

    if interval is not None:
        _check_interval(interval = interval)

    if forecaster_type == 'ForecasterAutoregMultiSeries':
        if levels is not None and not isinstance(levels, (str, list)):
            raise TypeError(
                f'`levels` must be a `list` of column names, a `str` of a column name or `None`.'
            )

        if len(set(levels) - set(series_col_names)) != 0:
            raise ValueError(
                f'`levels` must be in `series_col_names` : {series_col_names}.'
            )

    if exog is None and included_exog:
        raise ValueError(
            ('Forecaster trained with exogenous variable/s. '
             'Same variable/s must be provided in `predict()`.')
        )

    if exog is not None and not included_exog:
        raise ValueError(
            ('Forecaster trained without exogenous variable/s. '
             '`exog` must be `None` in `predict()`.')
        )

    if exog is not None:
        max_step = max(steps)+1 if isinstance(steps, list) else steps
        if len(exog) < max_step:
            raise ValueError(
                f'`exog` must have at least as many values as the distance to '
                f'the maximum step predicted, {max_step}.'
            )
        if not isinstance(exog, (pd.Series, pd.DataFrame)):
            raise TypeError('`exog` must be a pandas Series or DataFrame.')
        if exog.isnull().values.any():
            raise ValueError('`exog` has missing values.')
        if not isinstance(exog, exog_type):
            raise TypeError(
                f'Expected type for `exog`: {exog_type}. Got {type(exog)}.'     
            )
        if isinstance(exog, pd.DataFrame):
            col_missing = set(exog_col_names).difference(set(exog.columns))
            if col_missing:
                raise ValueError(
                    (f'Missing columns in `exog`. Expected {exog_col_names}. '
                     f'Got {exog.columns.to_list()}.') 
                )
        check_exog(exog = exog)
        _, exog_index = preprocess_exog(exog=exog.iloc[:0, ])

        if not isinstance(exog_index, index_type):
            raise TypeError(
                (f'Expected index of type {index_type} for `exog`. '
                 f'Got {type(exog_index)}.')
            )

        if isinstance(exog_index, pd.DatetimeIndex):
            if not exog_index.freqstr == index_freq:
                raise TypeError(
                    (f'Expected frequency of type {index_freq} for `exog`. '
                     f'Got {exog_index.freqstr}.')
                )

    if last_window is not None:
        if len(last_window) < window_size:
            raise ValueError(
                (f'`last_window` must have as many values as as needed to '
                 f'calculate the predictors. For this forecaster it is {window_size}.')
            )

        if forecaster_type in ['ForecasterAutoregMultiSeries', 'ForecasterAutoregMultiVariate']:
            if not isinstance(last_window, pd.DataFrame):
                raise TypeError(
                    f'`last_window` must be a pandas DataFrame. Got {type(last_window)}.'
                )

            if forecaster_type == 'ForecasterAutoregMultiSeries' and \
               len(set(levels) - set(last_window.columns)) != 0:
                raise ValueError(
                    (f'`last_window` must contain a column(s) named as the level(s) to be predicted.\n'
                     f'    `levels` : {levels}.\n'
                     f'    `last_window` columns : {list(last_window.columns)}.')
                )

            if forecaster_type == 'ForecasterAutoregMultiVariate' and \
               (series_col_names != list(last_window.columns)):
                raise ValueError(
                    (f'`last_window` columns must be the same as `series` column names.\n'
                     f'    `last_window` columns : {list(last_window.columns)}.\n'
                     f'    `series` columns      : {series_col_names}.')
                )

        else:    
            if not isinstance(last_window, pd.Series):
                raise TypeError('`last_window` must be a pandas Series.')

        if last_window.isnull().any().all():
            raise ValueError('`last_window` has missing values.')
        _, last_window_index = preprocess_last_window(
                                    last_window = last_window.iloc[:0]
                                ) 
        if not isinstance(last_window_index, index_type):
            raise TypeError(
                f'Expected index of type {index_type} for `last_window`. '
                f'Got {type(last_window_index)}.'
            )
        if isinstance(last_window_index, pd.DatetimeIndex):
            if not last_window_index.freqstr == index_freq:
                raise TypeError(
                    f'Expected frequency of type {index_freq} for `last_window`. '
                    f'Got {last_window_index.freqstr}.'
                )

    return

preprocess_y(y)

Returns values and index of series separately. Index is overwritten

according to the next rules: If index is of type DatetimeIndex and has frequency, nothing is changed. If index is of type RangeIndex, nothing is changed. If index is of type DatetimeIndex but has no frequency, a RangeIndex is created. If index is not of type DatetimeIndex, a RangeIndex is created.

Parameters:

Name Type Description Default
y Series

Time series.

required

Returns:

Type Description
Tuple[numpy.ndarray, pandas.core.indexes.base.Index]

Numpy array with values of y.

Source code in skforecast/utils/utils.py
def preprocess_y(
    y: pd.Series
) -> Tuple[np.ndarray, pd.Index]:
    """
    Returns values and index of series separately. Index is overwritten 
    according to the next rules:
        If index is of type DatetimeIndex and has frequency, nothing is 
        changed.
        If index is of type RangeIndex, nothing is changed.
        If index is of type DatetimeIndex but has no frequency, a 
        RangeIndex is created.
        If index is not of type DatetimeIndex, a RangeIndex is created.

    Parameters
    ----------        
    y : pandas Series
        Time series.

    Returns 
    -------
    y_values : numpy ndarray
        Numpy array with values of `y`.

    y_index : pandas Index
        Index of `y` modified according to the rules.

    """

    if isinstance(y.index, pd.DatetimeIndex) and y.index.freq is not None:
        y_index = y.index
    elif isinstance(y.index, pd.RangeIndex):
        y_index = y.index
    elif isinstance(y.index, pd.DatetimeIndex) and y.index.freq is None:
        warnings.warn(
            '`y` has DatetimeIndex index but no frequency. '
            'Index is overwritten with a RangeIndex of step 1.'
        )
        y_index = pd.RangeIndex(
                      start = 0,
                      stop  = len(y),
                      step  = 1
                  )
    else:
        warnings.warn(
            '`y` has no DatetimeIndex nor RangeIndex index. Index is overwritten with a RangeIndex.'
        )
        y_index = pd.RangeIndex(
                      start = 0,
                      stop  = len(y),
                      step  = 1
                  )

    y_values = y.to_numpy()

    return y_values, y_index

preprocess_last_window(last_window)

Returns values and index of series separately. Index is overwritten

according to the next rules: If index is of type DatetimeIndex and has frequency, nothing is changed. If index is of type RangeIndex, nothing is changed. If index is of type DatetimeIndex but has no frequency, a RangeIndex is created. If index is not of type DatetimeIndex, a RangeIndex is created.

Parameters:

Name Type Description Default
last_window Union[pandas.core.series.Series, pandas.core.frame.DataFrame]

Time series values.

required

Returns:

Type Description
Tuple[numpy.ndarray, pandas.core.indexes.base.Index]

Numpy array with values of last_window.

Source code in skforecast/utils/utils.py
def preprocess_last_window(
    last_window:Union[pd.Series, pd.DataFrame]
 ) -> Tuple[np.ndarray, pd.Index]:
    """
    Returns values and index of series separately. Index is overwritten 
    according to the next rules:
        If index is of type DatetimeIndex and has frequency, nothing is 
        changed.
        If index is of type RangeIndex, nothing is changed.
        If index is of type DatetimeIndex but has no frequency, a 
        RangeIndex is created.
        If index is not of type DatetimeIndex, a RangeIndex is created.

    Parameters
    ----------        
    last_window : pandas Series, pandas DataFrame
        Time series values.

    Returns 
    -------
    last_window_values : numpy ndarray
        Numpy array with values of `last_window`.

    last_window_index : pandas Index
        Index of `last_window` modified according to the rules.

    """

    if isinstance(last_window.index, pd.DatetimeIndex) and last_window.index.freq is not None:
        last_window_index = last_window.index
    elif isinstance(last_window.index, pd.RangeIndex):
        last_window_index = last_window.index
    elif isinstance(last_window.index, pd.DatetimeIndex) and last_window.index.freq is None:
        warnings.warn(
            '`last_window` has DatetimeIndex index but no frequency. '
            'Index is overwritten with a RangeIndex of step 1.'
        )
        last_window_index = pd.RangeIndex(
                                start = 0,
                                stop  = len(last_window),
                                step  = 1
                                )
    else:
        warnings.warn(
            '`last_window` has no DatetimeIndex nor RangeIndex index. Index is overwritten with a RangeIndex.'
        )
        last_window_index = pd.RangeIndex(
                                start = 0,
                                stop  = len(last_window),
                                step  = 1
                                )

    last_window_values = last_window.to_numpy()

    return last_window_values, last_window_index

preprocess_exog(exog)

Returns values ​​and index of series separately. Index is overwritten

according to the next rules: If index is of type DatetimeIndex and has frequency, nothing is changed. If index is of type RangeIndex, nothing is changed. If index is of type DatetimeIndex but has no frequency, a RangeIndex is created. If index is not of type DatetimeIndex, a RangeIndex is created.

Parameters:

Name Type Description Default
exog Union[pandas.core.series.Series, pandas.core.frame.DataFrame]

Exogenous variables.

required

Returns:

Type Description
Tuple[numpy.ndarray, pandas.core.indexes.base.Index]

Numpy array with values of exog.

Source code in skforecast/utils/utils.py
def preprocess_exog(
    exog: Union[pd.Series, pd.DataFrame]
) -> Tuple[np.ndarray, pd.Index]:
    """
    Returns values ​​and index of series separately. Index is overwritten 
    according to the next rules:
        If index is of type DatetimeIndex and has frequency, nothing is 
        changed.
        If index is of type RangeIndex, nothing is changed.
        If index is of type DatetimeIndex but has no frequency, a 
        RangeIndex is created.
        If index is not of type DatetimeIndex, a RangeIndex is created.

    Parameters
    ----------        
    exog : pandas Series, pandas DataFrame
        Exogenous variables.

    Returns 
    -------
    exog_values : numpy ndarray
        Numpy array with values of `exog`.

    exog_index : pandas Index
        Index of `exog` modified according to the rules.

    """

    if isinstance(exog.index, pd.DatetimeIndex) and exog.index.freq is not None:
        exog_index = exog.index
    elif isinstance(exog.index, pd.RangeIndex):
        exog_index = exog.index
    elif isinstance(exog.index, pd.DatetimeIndex) and exog.index.freq is None:
        warnings.warn(
            '`exog` has DatetimeIndex index but no frequency. '
            'Index is overwritten with a RangeIndex of step 1.'
        )
        exog_index = pd.RangeIndex(
                        start = 0,
                        stop  = len(exog),
                        step  = 1
                        )

    else:
        warnings.warn(
            '`exog` has no DatetimeIndex nor RangeIndex index. Index is overwritten with a RangeIndex.'
        )
        exog_index = pd.RangeIndex(
                        start = 0,
                        stop  = len(exog),
                        step  = 1
                        )

    exog_values = exog.to_numpy()

    return exog_values, exog_index

exog_to_direct(exog, steps)

Transforms exog to np.ndarray with the shape needed for direct

forecasting.

Parameters:

Name Type Description Default
exog ndarray

Time series values.

required
steps int

Number of steps that will be predicted using this exog.

required

Returns:

Type Description
ndarray
Source code in skforecast/utils/utils.py
def exog_to_direct(
    exog: np.ndarray,
    steps: int
)-> np.ndarray:
    """
    Transforms `exog` to `np.ndarray` with the shape needed for direct
    forecasting.

    Parameters
    ----------        
    exog : numpy ndarray, shape(samples,)
        Time series values.

    steps : int.
        Number of steps that will be predicted using this exog.

    Returns 
    -------
    exog_transformed : numpy ndarray

    """

    exog_transformed = []

    if exog.ndim < 2:
        exog = exog.reshape(-1, 1)

    for column in range(exog.shape[1]):

        exog_column_transformed = []

        for i in range(exog.shape[0] - (steps -1)):
            exog_column_transformed.append(exog[i:i + steps, column])

        if len(exog_column_transformed) > 1:
            exog_column_transformed = np.vstack(exog_column_transformed)

        exog_transformed.append(exog_column_transformed)

    if len(exog_transformed) > 1:
        exog_transformed = np.hstack(exog_transformed)
    else:
        exog_transformed = exog_column_transformed

    return exog_transformed

expand_index(index, steps)

Create a new index of length steps starting at the end of the index.

Parameters:

Name Type Description Default
index Optional[pandas.core.indexes.base.Index]

Index of last window.

required
steps int

Number of steps to expand.

required

Returns:

Type Description
Index
Source code in skforecast/utils/utils.py
def expand_index(
    index: Union[pd.Index, None], 
    steps: int
) -> pd.Index:
    """
    Create a new index of length `steps` starting at the end of the index.

    Parameters
    ----------        
    index : pd.Index, None
        Index of last window.
    steps : int
        Number of steps to expand.

    Returns 
    -------
    new_index : pd.Index

    """

    if isinstance(index, pd.Index):

        if isinstance(index, pd.DatetimeIndex):
            new_index = pd.date_range(
                            index[-1] + index.freq,
                            periods = steps,
                            freq    = index.freq
                        )
        elif isinstance(index, pd.RangeIndex):
            new_index = pd.RangeIndex(
                            start = index[-1] + 1,
                            stop  = index[-1] + 1 + steps
                        )
    else: 
        new_index = pd.RangeIndex(
                        start = 0,
                        stop  = steps
                     )
    return new_index

transform_series(series, transformer, fit=False, inverse_transform=False)

Transform raw values of pandas Series with a scikit-learn alike transformer

(preprocessor). The transformer used must have the following methods: fit, transform, fit_transform and inverse_transform. ColumnTransformers are not allowed since they do not have inverse_transform method.

Parameters:

Name Type Description Default
series Series required
transformer scikit-learn alike transformer (preprocessor).

scikit-learn alike transformer (preprocessor) with methods: fit, transform, fit_transform and inverse_transform. ColumnTransformers are not allowed since they do not have inverse_transform method.

required
fit bool

Train the transformer before applying it.

False
inverse_transform bool

Transform back the data to the original representation.

False

Returns:

Type Description
Union[pandas.core.series.Series, pandas.core.frame.DataFrame]

Transformed Series. Depending on the transformer used, the output may be a Series or a DataFrame.

Source code in skforecast/utils/utils.py
def transform_series(
    series: pd.Series,
    transformer,
    fit: bool=False,
    inverse_transform: bool=False
) -> Union[pd.Series, pd.DataFrame]:
    """      
    Transform raw values of pandas Series with a scikit-learn alike transformer
    (preprocessor). The transformer used must have the following methods: fit, transform,
    fit_transform and inverse_transform. ColumnTransformers are not allowed since they
    do not have inverse_transform method.

    Parameters
    ----------
    series : pandas Series

    transformer : scikit-learn alike transformer (preprocessor).
        scikit-learn alike transformer (preprocessor) with methods: fit, transform,
        fit_transform and inverse_transform. ColumnTransformers are not allowed since they
        do not have inverse_transform method.

    fit : bool, default `False`
        Train the transformer before applying it.

    inverse_transform : bool, default `False`
        Transform back the data to the original representation.

    Returns
    -------
    series_transformed : pandas Series, pandas DataFrame
        Transformed Series. Depending on the transformer used, the output may be a Series
        or a DataFrame.

    """

    if not isinstance(series, pd.Series):
        raise TypeError(
            "`series` argument must be a pandas Series."
        )

    if transformer is None:
        return series

    series = series.to_frame()

    if fit and not isinstance(transformer, FunctionTransformer):
        transformer.fit(series)

    if inverse_transform:
        values_transformed = transformer.inverse_transform(series)
    else:
        values_transformed = transformer.transform(series)   

    if hasattr(values_transformed, 'toarray'):
        # If the returned values are in sparse matrix format, it is converted to dense array.
        values_transformed = values_transformed.toarray()

    if isinstance(values_transformed, np.ndarray) and values_transformed.shape[1] == 1:
        series_transformed = pd.Series(
                                 data  = values_transformed.flatten(),
                                 index = series.index,
                                 name  = series.columns[0]
                             )
    elif isinstance(values_transformed, pd.DataFrame) and values_transformed.shape[1] == 1:
        series_transformed = values_transformed.squeeze()
    else:
        series_transformed = pd.DataFrame(
                                 data = values_transformed,
                                 index = series.index,
                                 columns = transformer.get_feature_names_out()
                             )

    return series_transformed

transform_dataframe(df, transformer, fit=False, inverse_transform=False)

Transform raw values of pandas DataFrame with a scikit-learn alike

transformer, preprocessor or ColumnTransformer. inverse_transform is not available when using ColumnTransformers.

Parameters:

Name Type Description Default
series pandas DataFrame required
transformer scikit-learn alike transformer, preprocessor or ColumnTransformer.

scikit-learn alike transformer, preprocessor or ColumnTransformer.

required
fit bool

Train the transformer before applying it.

False
inverse_transform bool

Transform back the data to the original representation. This is not available when using transformers of class scikit-learn ColumnTransformers.

False

Returns:

Type Description
DataFrame

Transformed DataFrame.

Source code in skforecast/utils/utils.py
def transform_dataframe(
    df: pd.DataFrame,
    transformer,
    fit: bool=False,
    inverse_transform: bool=False
) -> pd.DataFrame:
    """      
    Transform raw values of pandas DataFrame with a scikit-learn alike
    transformer, preprocessor or ColumnTransformer. `inverse_transform` is not available
    when using ColumnTransformers.

    Parameters
    ----------
    series : pandas DataFrame

    transformer : scikit-learn alike transformer, preprocessor or ColumnTransformer.
        scikit-learn alike transformer, preprocessor or ColumnTransformer.

    fit : bool, default `False`
        Train the transformer before applying it.

    inverse_transform : bool, default `False`
        Transform back the data to the original representation. This is not available
        when using transformers of class scikit-learn ColumnTransformers.

    Returns
    -------
    series_transformed : pandas DataFrame
        Transformed DataFrame.

    """

    if not isinstance(df, pd.DataFrame):
        raise TypeError(
            "`df` argument must be a pandas DataFrame."
        )

    if transformer is None:
        return df

    if inverse_transform and isinstance(transformer, ColumnTransformer):
        raise Exception(
            '`inverse_transform` is not available when using ColumnTransformers.'
        )

    if not inverse_transform:
        if fit:
            values_transformed = transformer.fit_transform(df)
        else:
            values_transformed = transformer.transform(df)
    else:
        values_transformed = transformer.inverse_transform(df)

    if hasattr(values_transformed, 'toarray'):
        # If the returned values are in sparse matrix format, it is converted to dense
        values_transformed = values_transformed.toarray()

    if hasattr(transformer, 'get_feature_names_out'):
        feature_names_out = transformer.get_feature_names_out()
    elif hasattr(transformer, 'categories_'):   
        feature_names_out = transformer.categories_
    else:
        feature_names_out = df.columns

    df_transformed = pd.DataFrame(
                         data = values_transformed,
                         index = df.index,
                         columns = feature_names_out
                     )

    return df_transformed

multivariate_time_series_corr(time_series, other, lags, method='pearson')

Compute correlation between a time_series and the lagged values of other

time series.

Parameters:

Name Type Description Default
time_series Series

Target time series.

required
other DataFrame

Time series whose lagged values are correlated to time_series.

required
lags Union[int, list, <built-in function array>]

Lags to be included in the correlation analysis.

required
method str
  • pearson : standard correlation coefficient.
  • kendall : Kendall Tau correlation coefficient.
  • spearman : Spearman rank correlation.
'pearson'

Returns:

Type Description
DataFrame

Correlation values.

Source code in skforecast/utils/utils.py
def multivariate_time_series_corr(
    time_series: pd.Series,
    other: pd.DataFrame,
    lags: Union[int, list, np.array],
    method: str='pearson'
)-> pd.DataFrame:
    """
    Compute correlation between a time_series and the lagged values of other 
    time series. 

    Parameters
    ----------
    time_series : pandas Series
        Target time series.

    other : pandas DataFrame
        Time series whose lagged values are correlated to `time_series`.

    lags : Union[int, list, numpy ndarray]
        Lags to be included in the correlation analysis.

    method : str, default 'pearson'
        - pearson : standard correlation coefficient.
        - kendall : Kendall Tau correlation coefficient.
        - spearman : Spearman rank correlation.

    Returns
    -------
    corr : pandas DataFrame
        Correlation values.

    """

    if not len(time_series) == len(other):
        raise ValueError("`time_series` and `other` must have the same length.")

    if not (time_series.index == other.index).all():
        raise ValueError("`time_series` and `other` must have the same index.")

    if isinstance(lags, int):
        lags = range(lags)

    corr = {}
    for col in other.columns:
        lag_values = {}
        for lag in lags:
            lag_values[lag] = other[col].shift(lag)

        lag_values = pd.DataFrame(lag_values)
        lag_values.insert(0, None, time_series)
        corr[col] = lag_values.corr(method=method).iloc[1:, 0]

    corr = pd.DataFrame(corr)
    corr.index = corr.index.astype(int)
    corr.index.name="lag"

    return corr