Equivalent Pipelines in sktime and skforecast¶
Sktime, a well-known forecasting library, provides functionality to apply transformations to both the target variable and exogenous variables using two distinct classes:
TransformedTargetForecaster
: Applies the specified transformations to the target series.ForecastingPipeline
: Applies the specified transformations to the exogenous variables before passing them to the forecaster.
Similarly, skforecast supports transformations for both the target variable and exogenous variables through the following arguments present in all forecasters:
transformer_y
: Applies the specified transformations (single transformer or a sklearn pipeline with multiple transformers) to the target variable.transformer_series
: Equivalent totransformer_y
in multi-series forecasters.transformer_exog
: Applies the specified transformations (single transformer or a sklearn pipeline with multiple transformers) to the exogenous variables.
The following document provides a side-by-side comparison of equivalent code in Sktime and Skforecast for applying transformations to the target variable and exogenous variables.
Without exogenous variables
skforecast | sktime |
from skforecast.recursive import ForecasterRecursive
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
forecaster = ForecasterRecursive(
regressor = Ridge(random_state=951),
lags = 15,
transformer_y = StandardScaler(),
)
forecaster.fit(y=y)
predictios = forecaster.predict(steps=10)
predictios
|
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from sktime.transformations.series.adapt import TabularToSeriesAdaptor
from sktime.forecasting.base import ForecastingHorizon
from sktime.forecasting.compose import (
make_reduction,
TransformedTargetForecaster,
)
regressor = make_reduction(Ridge(random_state=951), window_length=15, strategy="recursive")
forecaster = TransformedTargetForecaster(
steps=[
("boxcox", TabularToSeriesAdaptor(StandardScaler())),
("regressor", regressor),
]
)
forecaster.fit(y=y)
fh = ForecastingHorizon(np.arange(1, 11), is_relative=True)
predictions = forecaster.predict(fh=fh)
predictios
|
With exogenous variables
skforecast | sktime |
from skforecast.recursive import ForecasterRecursive
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from sktime.transformations.series.boxcox import BoxCoxTransformer
forecaster = ForecasterRecursive(
regressor = Ridge(random_state=951),
lags = 15,
transformer_y = BoxCoxTransformer(),
transformer_exog = StandardScaler()
)
forecaster.fit(y=y)
predictios = forecaster.predict(steps=10)
predictios
|
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from sktime.transformations.series.boxcox import BoxCoxTransformer
from sktime.transformations.series.adapt import TabularToSeriesAdaptor
from sktime.forecasting.base import ForecastingHorizon
from sktime.forecasting.compose import (
make_reduction,
TransformedTargetForecaster,
ForecastingPipeline,
)
regressor = make_reduction(Ridge(random_state=951), window_length=15, strategy="recursive")
pipe_y = TransformedTargetForecaster(
steps=[
("boxcox", BoxCoxTransformer()),
("regressor", regressor),
]
)
pipe_X = ForecastingPipeline(
steps=[
("scaler", TabularToSeriesAdaptor(StandardScaler())),
("forecaster", pipe_y),
]
)
pipe_X.fit(y=y, X=exog)
fh = ForecastingHorizon(np.arange(1, 11), is_relative=True)
predictions = pipe_X.predict(fh=fh, X=exog_test)
predictions
|
⚠ Warning
When working with exogenous variables, both libraries apply the same transformations. However, the results differ because sktime incorporates the lagged values of the exogenous variables into the underlying training matrices, whereas skforecast does not. For example, if 3 lagged values are used and two exogenous variables are included, the underlying training matrices are as follows:
- skforecast:
lag_1
,lag_2
,lag_3
,exog_1
,exog_2
- sktime:
lag_1
,lag_2
,lag_3
,exog_1_lag_1
,exog_1_lag_2
,exog_1_lag_3
,exog_2_lag_1
,exog_2_lag_2
,exog_2_lag_3
# Libraries
# ======================================================================================
import numpy as np
import pandas as pd
from skforecast.datasets import fetch_dataset
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
# skforecast
from skforecast.recursive import ForecasterRecursive
# sktime
from sktime.forecasting.base import ForecastingHorizon
from sktime.forecasting.compose import (
make_reduction,
TransformedTargetForecaster,
ForecastingPipeline,
)
from sktime.transformations.series.boxcox import BoxCoxTransformer
from sktime.transformations.series.adapt import TabularToSeriesAdaptor
# Data
# ======================================================================================
data = fetch_dataset(name='fuel_consumption')
data = data.rename(columns={'Gasolinas': 'litters'})
data = data.rename_axis('date')
data = data.loc[:'1990-01-01 00:00:00']
data = data[['litters']]
data['month'] = data.index.month
data['year'] = data.index.year
display(data.head(4))
fuel_consumption ---------------- Monthly fuel consumption in Spain from 1969-01-01 to 2022-08-01. Obtained from Corporación de Reservas Estratégicas de Productos Petrolíferos and Corporación de Derecho Público tutelada por el Ministerio para la Transición Ecológica y el Reto Demográfico. https://www.cores.es/es/estadisticas Shape of the dataset: (644, 5)
litters | month | year | |
---|---|---|---|
date | |||
1969-01-01 | 166875.2129 | 1 | 1969 |
1969-02-01 | 155466.8105 | 2 | 1969 |
1969-03-01 | 184983.6699 | 3 | 1969 |
1969-04-01 | 202319.8164 | 4 | 1969 |
# Train-test dates
# ======================================================================================
end_train = '1980-01-01 23:59:59'
data_train = data.loc[:end_train]
data_test = data.loc[end_train:]
Sktime¶
# Sktime pipeline
# ======================================================================================
regressor = make_reduction(Ridge(), window_length=15, strategy="recursive")
pipe_y = TransformedTargetForecaster(
steps=[
("boxcox", BoxCoxTransformer()),
("regressor", regressor),
]
)
pipe_X = ForecastingPipeline(
steps=[
("scaler", TabularToSeriesAdaptor(StandardScaler())),
("forecaster", pipe_y),
]
)
pipe_X.fit(y=data_train['litters'], X=data_train[['month', 'year']])
fh = ForecastingHorizon(np.arange(1, len(data_test) + 1), is_relative=True)
predictions_sktime = pipe_X.predict(fh=fh, X=data_test[['month', 'year']])
predictions_sktime
1980-02-01 430096.815068 1980-03-01 472406.420587 1980-04-01 509203.559184 1980-05-01 495910.509282 1980-06-01 518548.672893 ... 1989-09-01 820033.569581 1989-10-01 801291.145367 1989-11-01 756075.962331 1989-12-01 795345.389792 1990-01-01 746317.734572 Freq: MS, Name: litters, Length: 120, dtype: float64
Skforecast¶
# Skforecast with transformations
# ======================================================================================
forecaster = ForecasterRecursive(
regressor = Ridge(),
lags = 15,
transformer_y = BoxCoxTransformer(),
transformer_exog = StandardScaler()
)
forecaster.fit(y=data_train['litters'], exog=data_train[['month', 'year']])
predictions_skforecast = forecaster.predict(steps=len(data_test), exog=data_test[['month', 'year']])
predictions_skforecast
1980-02-01 427508.153706 1980-03-01 487904.492766 1980-04-01 524565.943847 1980-05-01 506245.770327 1980-06-01 531938.860717 ... 1989-09-01 770334.700792 1989-10-01 753315.656399 1989-11-01 787562.026285 1989-12-01 743408.935078 1990-01-01 682958.500996 Freq: MS, Name: pred, Length: 120, dtype: float64
# Transformation results
# ======================================================================================
results = pd.DataFrame({
'sktime': predictions_sktime,
'skforecast': predictions_skforecast,
})
results
sktime | skforecast | |
---|---|---|
1980-02-01 | 430096.815068 | 427508.153706 |
1980-03-01 | 472406.420587 | 487904.492766 |
1980-04-01 | 509203.559184 | 524565.943847 |
1980-05-01 | 495910.509282 | 506245.770327 |
1980-06-01 | 518548.672893 | 531938.860717 |
... | ... | ... |
1989-09-01 | 820033.569581 | 770334.700792 |
1989-10-01 | 801291.145367 | 753315.656399 |
1989-11-01 | 756075.962331 | 787562.026285 |
1989-12-01 | 795345.389792 | 743408.935078 |
1990-01-01 | 746317.734572 | 682958.500996 |
120 rows × 2 columns
Equivalent transformations¶
The following table shows the equivalent transformations in sktime and skforecast:
# Box-Cox transformation
# ======================================================================================
from sktime.transformations.series.boxcox import BoxCoxTransformer
from sklearn.preprocessing import PowerTransformer
# sktime
transformer_sktime = BoxCoxTransformer()
y_hat_sktime = transformer_sktime.fit_transform(data_train['litters'])
# skforecast
transformer_skforecast = PowerTransformer(method='box-cox', standardize=False)
y_hat_skforecast = transformer_skforecast.fit_transform(data_train[['litters']]).flatten()
np.testing.assert_allclose(y_hat_sktime, y_hat_skforecast)
# Differencing
# ======================================================================================
from sktime.transformations.series.difference import Differencer
from skforecast.preprocessing import TimeSeriesDifferentiator
# sktime
transformer_sktime = Differencer(lags=1)
y_hat_sktime = transformer_sktime.fit_transform(data_train['litters'])[1:]
# skforecast
transformer_skforecast = TimeSeriesDifferentiator(order=1)
y_hat_skforecast = transformer_skforecast.fit_transform(data_train['litters'].to_numpy())[1:]
np.testing.assert_allclose(y_hat_sktime, y_hat_skforecast)
# Log transformation
# ======================================================================================
from sklearn.preprocessing import FunctionTransformer
from sktime.transformations.series.boxcox import LogTransformer
# sktime
transformer_sktime = LogTransformer(offset=1)
y_hat_sktime = transformer_sktime.fit_transform(data_train['litters'])
# skforecast
transformer_skforecast = FunctionTransformer(func=np.log1p, inverse_func=np.expm1, validate=True)
y_hat_skforecast = transformer_skforecast.fit_transform(data_train[['litters']]).flatten()
np.testing.assert_allclose(y_hat_sktime, y_hat_skforecast)
/home/ubuntu/anaconda3/envs/skforecast_15_py12/lib/python3.12/site-packages/sklearn/base.py:493: UserWarning: X does not have valid feature names, but FunctionTransformer was fitted with feature names warnings.warn(