Equivalent Pipelines in sktime and skforecast¶

Sktime, a well-known forecasting library, provides functionality to apply transformations to both the target variable and exogenous variables using two distinct classes:

TransformedTargetForecaster: Applies the specified transformations to the target series.
ForecastingPipeline: Applies the specified transformations to the exogenous variables before passing them to the forecaster.

Similarly, skforecast supports transformations for both the target variable and exogenous variables through the following arguments present in all forecasters:

transformer_y: Applies the specified transformations (single transformer or a sklearn pipeline with multiple transformers) to the target variable.
transformer_series: Equivalent to transformer_y in multi-series forecasters.
transformer_exog: Applies the specified transformations (single transformer or a sklearn pipeline with multiple transformers) to the exogenous variables.

The following document provides a side-by-side comparison of equivalent code in Sktime and Skforecast for applying transformations to the target variable and exogenous variables.

Without exogenous variables

skforecast

sktime

from skforecast.recursive import ForecasterRecursive
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler

forecaster = ForecasterRecursive(
                 regressor     = Ridge(random_state=951),
                 lags          = 15,
                 transformer_y = StandardScaler(),
             )
forecaster.fit(y=y)
predictios = forecaster.predict(steps=10)
predictios

from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from sktime.transformations.series.adapt import TabularToSeriesAdaptor
from sktime.forecasting.base import ForecastingHorizon
from sktime.forecasting.compose import (
    make_reduction,
    TransformedTargetForecaster,
)

regressor = make_reduction(Ridge(random_state=951), window_length=15, strategy="recursive")
forecaster = TransformedTargetForecaster(
    steps=[
        ("boxcox", TabularToSeriesAdaptor(StandardScaler())),
        ("regressor", regressor),
    ]
)
forecaster.fit(y=y)
fh = ForecastingHorizon(np.arange(1, 11), is_relative=True)
predictions = forecaster.predict(fh=fh)
predictios

With exogenous variables

skforecast

sktime

from skforecast.recursive import ForecasterRecursive
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from sktime.transformations.series.boxcox import BoxCoxTransformer

forecaster = ForecasterRecursive(
                 regressor        = Ridge(random_state=951),
                 lags             = 15,
                 transformer_y    = BoxCoxTransformer(),
                 transformer_exog = StandardScaler()
             )
forecaster.fit(y=y)
predictios = forecaster.predict(steps=10)
predictios

from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from sktime.transformations.series.boxcox import BoxCoxTransformer
from sktime.transformations.series.adapt import TabularToSeriesAdaptor
from sktime.forecasting.base import ForecastingHorizon
from sktime.forecasting.compose import (
    make_reduction,
    TransformedTargetForecaster,
    ForecastingPipeline,
)

regressor = make_reduction(Ridge(random_state=951), window_length=15, strategy="recursive")
pipe_y = TransformedTargetForecaster(
    steps=[
        ("boxcox", BoxCoxTransformer()),
        ("regressor", regressor),
    ]
)
pipe_X = ForecastingPipeline(
    steps=[
        ("scaler", TabularToSeriesAdaptor(StandardScaler())),
        ("forecaster", pipe_y),
    ]
)
pipe_X.fit(y=y, X=exog)
fh = ForecastingHorizon(np.arange(1, 11), is_relative=True)
predictions = pipe_X.predict(fh=fh, X=exog_test)
predictions

⚠ Warning

When working with exogenous variables, both libraries apply the same transformations. However, the results differ because sktime incorporates the lagged values of the exogenous variables into the underlying training matrices, whereas skforecast does not. For example, if 3 lagged values are used and two exogenous variables are included, the underlying training matrices are as follows:

skforecast: lag_1, lag_2, lag_3, exog_1, exog_2
sktime: lag_1, lag_2, lag_3, exog_1_lag_1, exog_1_lag_2, exog_1_lag_3, exog_2_lag_1, exog_2_lag_2, exog_2_lag_3

In [1]:

Copied!





# Libraries
# ======================================================================================
import numpy as np
import pandas as pd
from skforecast.datasets import fetch_dataset
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler

# skforecast
from skforecast.recursive import ForecasterRecursive

# sktime
from sktime.forecasting.base import ForecastingHorizon
from sktime.forecasting.compose import (
    make_reduction,
    TransformedTargetForecaster,
    ForecastingPipeline,
)
from sktime.transformations.series.boxcox import BoxCoxTransformer
from sktime.transformations.series.adapt import TabularToSeriesAdaptor
# Libraries
# ======================================================================================
import numpy as np
import pandas as pd
from skforecast.datasets import fetch_dataset
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler

# skforecast
from skforecast.recursive import ForecasterRecursive

# sktime
from sktime.forecasting.base import ForecastingHorizon
from sktime.forecasting.compose import (
    make_reduction,
    TransformedTargetForecaster,
    ForecastingPipeline,
)
from sktime.transformations.series.boxcox import BoxCoxTransformer
from sktime.transformations.series.adapt import TabularToSeriesAdaptor

In [2]:

Copied!





# Data
# ======================================================================================
data = fetch_dataset(name='fuel_consumption')
data = data.rename(columns={'Gasolinas': 'litters'})
data = data.rename_axis('date')
data = data.loc[:'1990-01-01 00:00:00']
data = data[['litters']]
data['month'] = data.index.month
data['year'] = data.index.year
display(data.head(4))
# Data
# ======================================================================================
data = fetch_dataset(name='fuel_consumption')
data = data.rename(columns={'Gasolinas': 'litters'})
data = data.rename_axis('date')
data = data.loc[:'1990-01-01 00:00:00']
data = data[['litters']]
data['month'] = data.index.month
data['year'] = data.index.year
display(data.head(4))

fuel_consumption
----------------
Monthly fuel consumption in Spain from 1969-01-01 to 2022-08-01.
Obtained from Corporación de Reservas Estratégicas de Productos Petrolíferos and
Corporación de Derecho Público tutelada por el Ministerio para la Transición
Ecológica y el Reto Demográfico. https://www.cores.es/es/estadisticas
Shape of the dataset: (644, 5)

	litters	month	year
date
1969-01-01	166875.2129	1	1969
1969-02-01	155466.8105	2	1969
1969-03-01	184983.6699	3	1969
1969-04-01	202319.8164	4	1969

In [3]:

Copied!





# Train-test dates
# ======================================================================================
end_train = '1980-01-01 23:59:59'
data_train = data.loc[:end_train]
data_test  = data.loc[end_train:]
# Train-test dates
# ======================================================================================
end_train = '1980-01-01 23:59:59'
data_train = data.loc[:end_train]
data_test  = data.loc[end_train:]

Sktime¶

In [4]:

Copied!





# Sktime pipeline
# ======================================================================================
regressor = make_reduction(Ridge(), window_length=15, strategy="recursive")
pipe_y = TransformedTargetForecaster(
    steps=[
        ("boxcox", BoxCoxTransformer()),
        ("regressor", regressor),
    ]
)
pipe_X = ForecastingPipeline(
    steps=[
        ("scaler", TabularToSeriesAdaptor(StandardScaler())),
        ("forecaster", pipe_y),
    ]
)
pipe_X.fit(y=data_train['litters'], X=data_train[['month', 'year']])
fh = ForecastingHorizon(np.arange(1, len(data_test) + 1), is_relative=True)
predictions_sktime = pipe_X.predict(fh=fh, X=data_test[['month', 'year']])
predictions_sktime
# Sktime pipeline
# ======================================================================================
regressor = make_reduction(Ridge(), window_length=15, strategy="recursive")
pipe_y = TransformedTargetForecaster(
    steps=[
        ("boxcox", BoxCoxTransformer()),
        ("regressor", regressor),
    ]
)
pipe_X = ForecastingPipeline(
    steps=[
        ("scaler", TabularToSeriesAdaptor(StandardScaler())),
        ("forecaster", pipe_y),
    ]
)
pipe_X.fit(y=data_train['litters'], X=data_train[['month', 'year']])
fh = ForecastingHorizon(np.arange(1, len(data_test) + 1), is_relative=True)
predictions_sktime = pipe_X.predict(fh=fh, X=data_test[['month', 'year']])
predictions_sktime

Out[4]:

1980-02-01    430096.815068
1980-03-01    472406.420587
1980-04-01    509203.559184
1980-05-01    495910.509282
1980-06-01    518548.672893
                  ...      
1989-09-01    820033.569581
1989-10-01    801291.145367
1989-11-01    756075.962331
1989-12-01    795345.389792
1990-01-01    746317.734572
Freq: MS, Name: litters, Length: 120, dtype: float64

Skforecast¶

In [5]:

Copied!





# Skforecast with transformations
# ======================================================================================
forecaster = ForecasterRecursive(
                 regressor        = Ridge(),
                 lags             = 15,
                 transformer_y    = BoxCoxTransformer(),
                 transformer_exog = StandardScaler()
             )
forecaster.fit(y=data_train['litters'], exog=data_train[['month', 'year']])

predictions_skforecast = forecaster.predict(steps=len(data_test), exog=data_test[['month', 'year']])
predictions_skforecast
# Skforecast with transformations
# ======================================================================================
forecaster = ForecasterRecursive(
                 regressor        = Ridge(),
                 lags             = 15,
                 transformer_y    = BoxCoxTransformer(),
                 transformer_exog = StandardScaler()
             )
forecaster.fit(y=data_train['litters'], exog=data_train[['month', 'year']])

predictions_skforecast = forecaster.predict(steps=len(data_test), exog=data_test[['month', 'year']])
predictions_skforecast

Out[5]:

1980-02-01    427508.153706
1980-03-01    487904.492766
1980-04-01    524565.943847
1980-05-01    506245.770327
1980-06-01    531938.860717
                  ...      
1989-09-01    770334.700792
1989-10-01    753315.656399
1989-11-01    787562.026285
1989-12-01    743408.935078
1990-01-01    682958.500996
Freq: MS, Name: pred, Length: 120, dtype: float64

In [6]:

Copied!





# Transformation results
# ======================================================================================
results = pd.DataFrame({
              'sktime': predictions_sktime,
              'skforecast': predictions_skforecast,
          })
results
# Transformation results
# ======================================================================================
results = pd.DataFrame({
              'sktime': predictions_sktime,
              'skforecast': predictions_skforecast,
          })
results

Out[6]:

	sktime	skforecast
1980-02-01	430096.815068	427508.153706
1980-03-01	472406.420587	487904.492766
1980-04-01	509203.559184	524565.943847
1980-05-01	495910.509282	506245.770327
1980-06-01	518548.672893	531938.860717
...	...	...
1989-09-01	820033.569581	770334.700792
1989-10-01	801291.145367	753315.656399
1989-11-01	756075.962331	787562.026285
1989-12-01	795345.389792	743408.935078
1990-01-01	746317.734572	682958.500996

120 rows × 2 columns

Equivalent transformations¶

The following table shows the equivalent transformations in sktime and skforecast:

In [7]:

Copied!





# Box-Cox transformation
# ======================================================================================
from sktime.transformations.series.boxcox import BoxCoxTransformer
from sklearn.preprocessing import PowerTransformer

# sktime
transformer_sktime = BoxCoxTransformer()
y_hat_sktime = transformer_sktime.fit_transform(data_train['litters'])

# skforecast
transformer_skforecast = PowerTransformer(method='box-cox', standardize=False)
y_hat_skforecast = transformer_skforecast.fit_transform(data_train[['litters']]).flatten()

np.testing.assert_allclose(y_hat_sktime, y_hat_skforecast)
# Box-Cox transformation
# ======================================================================================
from sktime.transformations.series.boxcox import BoxCoxTransformer
from sklearn.preprocessing import PowerTransformer

# sktime
transformer_sktime = BoxCoxTransformer()
y_hat_sktime = transformer_sktime.fit_transform(data_train['litters'])

# skforecast
transformer_skforecast = PowerTransformer(method='box-cox', standardize=False)
y_hat_skforecast = transformer_skforecast.fit_transform(data_train[['litters']]).flatten()

np.testing.assert_allclose(y_hat_sktime, y_hat_skforecast)

In [8]:

Copied!





# Differencing
# ======================================================================================
from sktime.transformations.series.difference import Differencer
from skforecast.preprocessing import TimeSeriesDifferentiator

# sktime
transformer_sktime = Differencer(lags=1)
y_hat_sktime = transformer_sktime.fit_transform(data_train['litters'])[1:]

# skforecast
transformer_skforecast = TimeSeriesDifferentiator(order=1)
y_hat_skforecast = transformer_skforecast.fit_transform(data_train['litters'].to_numpy())[1:]

np.testing.assert_allclose(y_hat_sktime, y_hat_skforecast)
# Differencing
# ======================================================================================
from sktime.transformations.series.difference import Differencer
from skforecast.preprocessing import TimeSeriesDifferentiator

# sktime
transformer_sktime = Differencer(lags=1)
y_hat_sktime = transformer_sktime.fit_transform(data_train['litters'])[1:]

# skforecast
transformer_skforecast = TimeSeriesDifferentiator(order=1)
y_hat_skforecast = transformer_skforecast.fit_transform(data_train['litters'].to_numpy())[1:]

np.testing.assert_allclose(y_hat_sktime, y_hat_skforecast)

In [9]:

Copied!





# Log transformation
# ======================================================================================
from sklearn.preprocessing import FunctionTransformer
from sktime.transformations.series.boxcox import LogTransformer

# sktime
transformer_sktime = LogTransformer(offset=1)
y_hat_sktime = transformer_sktime.fit_transform(data_train['litters'])

# skforecast
transformer_skforecast = FunctionTransformer(func=np.log1p, inverse_func=np.expm1, validate=True)
y_hat_skforecast = transformer_skforecast.fit_transform(data_train[['litters']]).flatten()

np.testing.assert_allclose(y_hat_sktime, y_hat_skforecast)
# Log transformation
# ======================================================================================
from sklearn.preprocessing import FunctionTransformer
from sktime.transformations.series.boxcox import LogTransformer

# sktime
transformer_sktime = LogTransformer(offset=1)
y_hat_sktime = transformer_sktime.fit_transform(data_train['litters'])

# skforecast
transformer_skforecast = FunctionTransformer(func=np.log1p, inverse_func=np.expm1, validate=True)
y_hat_skforecast = transformer_skforecast.fit_transform(data_train[['litters']]).flatten()

np.testing.assert_allclose(y_hat_sktime, y_hat_skforecast)

/home/ubuntu/anaconda3/envs/skforecast_15_py12/lib/python3.12/site-packages/sklearn/base.py:493: UserWarning: X does not have valid feature names, but FunctionTransformer was fitted with feature names
  warnings.warn(