Equivalent Pipelines in sktime and skforecast¶
Sktime, a well-known forecasting library, provides functionality to apply transformations to both the target variable and exogenous variables using two distinct classes:
TransformedTargetForecaster: Applies the specified transformations to the target series.ForecastingPipeline: Applies the specified transformations to the exogenous variables before passing them to the forecaster.
Similarly, skforecast supports transformations for both the target variable and exogenous variables through the following arguments present in all forecasters:
transformer_y: Applies the specified transformations (single transformer or a sklearn pipeline with multiple transformers) to the target variable.transformer_series: Equivalent totransformer_yin multi-series forecasters.transformer_exog: Applies the specified transformations (single transformer or a sklearn pipeline with multiple transformers) to the exogenous variables.
The following document provides a side-by-side comparison of equivalent code in Sktime and Skforecast for applying transformations to the target variable and exogenous variables.
Without exogenous variables
| skforecast | sktime |
from skforecast.recursive import ForecasterRecursive
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
forecaster = ForecasterRecursive(
regressor = Ridge(random_state=951),
lags = 15,
transformer_y = StandardScaler(),
)
forecaster.fit(y=y)
predictios = forecaster.predict(steps=10)
predictios
|
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from sktime.transformations.series.adapt import TabularToSeriesAdaptor
from sktime.forecasting.base import ForecastingHorizon
from sktime.forecasting.compose import (
make_reduction,
TransformedTargetForecaster,
)
regressor = make_reduction(Ridge(random_state=951), window_length=15, strategy="recursive")
forecaster = TransformedTargetForecaster(
steps=[
("boxcox", TabularToSeriesAdaptor(StandardScaler())),
("regressor", regressor),
]
)
forecaster.fit(y=y)
fh = ForecastingHorizon(np.arange(1, 11), is_relative=True)
predictions = forecaster.predict(fh=fh)
predictios
|
With exogenous variables
| skforecast | sktime |
from skforecast.recursive import ForecasterRecursive
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from sktime.transformations.series.boxcox import BoxCoxTransformer
forecaster = ForecasterRecursive(
regressor = Ridge(random_state=951),
lags = 15,
transformer_y = BoxCoxTransformer(),
transformer_exog = StandardScaler()
)
forecaster.fit(y=y)
predictios = forecaster.predict(steps=10)
predictios
|
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from sktime.transformations.series.boxcox import BoxCoxTransformer
from sktime.transformations.series.adapt import TabularToSeriesAdaptor
from sktime.forecasting.base import ForecastingHorizon
from sktime.forecasting.compose import (
make_reduction,
TransformedTargetForecaster,
ForecastingPipeline,
)
regressor = make_reduction(Ridge(random_state=951), window_length=15, strategy="recursive")
pipe_y = TransformedTargetForecaster(
steps=[
("boxcox", BoxCoxTransformer()),
("regressor", regressor),
]
)
pipe_X = ForecastingPipeline(
steps=[
("scaler", TabularToSeriesAdaptor(StandardScaler())),
("forecaster", pipe_y),
]
)
pipe_X.fit(y=y, X=exog)
fh = ForecastingHorizon(np.arange(1, 11), is_relative=True)
predictions = pipe_X.predict(fh=fh, X=exog_test)
predictions
|
⚠ Warning
When working with exogenous variables, both libraries apply the same transformations. However, the results differ because sktime incorporates the lagged values of the exogenous variables into the underlying training matrices, whereas skforecast does not. For example, if 3 lagged values are used and two exogenous variables are included, the underlying training matrices are as follows:
- skforecast:
lag_1,lag_2,lag_3,exog_1,exog_2 - sktime:
lag_1,lag_2,lag_3,exog_1_lag_1,exog_1_lag_2,exog_1_lag_3,exog_2_lag_1,exog_2_lag_2,exog_2_lag_3
# Libraries
# ======================================================================================
import numpy as np
import pandas as pd
from skforecast.datasets import fetch_dataset
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
# skforecast
from skforecast.recursive import ForecasterRecursive
# sktime
from sktime.forecasting.base import ForecastingHorizon
from sktime.forecasting.compose import (
make_reduction,
TransformedTargetForecaster,
ForecastingPipeline,
)
from sktime.transformations.series.boxcox import BoxCoxTransformer
from sktime.transformations.series.adapt import TabularToSeriesAdaptor
# Data
# ======================================================================================
data = fetch_dataset(name='fuel_consumption')
data = data.rename(columns={'Gasolinas': 'litters'})
data = data.rename_axis('date')
data = data.loc[:'1990-01-01 00:00:00']
data = data[['litters']]
data['month'] = data.index.month
data['year'] = data.index.year
display(data.head(4))
fuel_consumption ---------------- Monthly fuel consumption in Spain from 1969-01-01 to 2022-08-01. Obtained from Corporación de Reservas Estratégicas de Productos Petrolíferos and Corporación de Derecho Público tutelada por el Ministerio para la Transición Ecológica y el Reto Demográfico. https://www.cores.es/es/estadisticas Shape of the dataset: (644, 5)
| litters | month | year | |
|---|---|---|---|
| date | |||
| 1969-01-01 | 166875.2129 | 1 | 1969 |
| 1969-02-01 | 155466.8105 | 2 | 1969 |
| 1969-03-01 | 184983.6699 | 3 | 1969 |
| 1969-04-01 | 202319.8164 | 4 | 1969 |
# Train-test dates
# ======================================================================================
end_train = '1980-01-01 23:59:59'
data_train = data.loc[:end_train]
data_test = data.loc[end_train:]
Sktime¶
# Sktime pipeline
# ======================================================================================
regressor = make_reduction(Ridge(), window_length=15, strategy="recursive")
pipe_y = TransformedTargetForecaster(
steps=[
("boxcox", BoxCoxTransformer()),
("regressor", regressor),
]
)
pipe_X = ForecastingPipeline(
steps=[
("scaler", TabularToSeriesAdaptor(StandardScaler())),
("forecaster", pipe_y),
]
)
pipe_X.fit(y=data_train['litters'], X=data_train[['month', 'year']])
fh = ForecastingHorizon(np.arange(1, len(data_test) + 1), is_relative=True)
predictions_sktime = pipe_X.predict(fh=fh, X=data_test[['month', 'year']])
predictions_sktime
1980-02-01 430096.815068
1980-03-01 472406.420587
1980-04-01 509203.559184
1980-05-01 495910.509282
1980-06-01 518548.672893
...
1989-09-01 820033.569581
1989-10-01 801291.145367
1989-11-01 756075.962331
1989-12-01 795345.389792
1990-01-01 746317.734572
Freq: MS, Name: litters, Length: 120, dtype: float64
Skforecast¶
# Skforecast with transformations
# ======================================================================================
forecaster = ForecasterRecursive(
regressor = Ridge(),
lags = 15,
transformer_y = BoxCoxTransformer(),
transformer_exog = StandardScaler()
)
forecaster.fit(y=data_train['litters'], exog=data_train[['month', 'year']])
predictions_skforecast = forecaster.predict(steps=len(data_test), exog=data_test[['month', 'year']])
predictions_skforecast
1980-02-01 427508.153706
1980-03-01 487904.492766
1980-04-01 524565.943847
1980-05-01 506245.770327
1980-06-01 531938.860717
...
1989-09-01 770334.700792
1989-10-01 753315.656399
1989-11-01 787562.026285
1989-12-01 743408.935078
1990-01-01 682958.500996
Freq: MS, Name: pred, Length: 120, dtype: float64
# Transformation results
# ======================================================================================
results = pd.DataFrame({
'sktime': predictions_sktime,
'skforecast': predictions_skforecast,
})
results
| sktime | skforecast | |
|---|---|---|
| 1980-02-01 | 430096.815068 | 427508.153706 |
| 1980-03-01 | 472406.420587 | 487904.492766 |
| 1980-04-01 | 509203.559184 | 524565.943847 |
| 1980-05-01 | 495910.509282 | 506245.770327 |
| 1980-06-01 | 518548.672893 | 531938.860717 |
| ... | ... | ... |
| 1989-09-01 | 820033.569581 | 770334.700792 |
| 1989-10-01 | 801291.145367 | 753315.656399 |
| 1989-11-01 | 756075.962331 | 787562.026285 |
| 1989-12-01 | 795345.389792 | 743408.935078 |
| 1990-01-01 | 746317.734572 | 682958.500996 |
120 rows × 2 columns
Equivalent transformations¶
The following table shows the equivalent transformations in sktime and skforecast:
# Box-Cox transformation
# ======================================================================================
from sktime.transformations.series.boxcox import BoxCoxTransformer
from sklearn.preprocessing import PowerTransformer
# sktime
transformer_sktime = BoxCoxTransformer()
y_hat_sktime = transformer_sktime.fit_transform(data_train['litters'])
# skforecast
transformer_skforecast = PowerTransformer(method='box-cox', standardize=False)
y_hat_skforecast = transformer_skforecast.fit_transform(data_train[['litters']]).flatten()
np.testing.assert_allclose(y_hat_sktime, y_hat_skforecast)
# Differencing
# ======================================================================================
from sktime.transformations.series.difference import Differencer
from skforecast.preprocessing import TimeSeriesDifferentiator
# sktime
transformer_sktime = Differencer(lags=1)
y_hat_sktime = transformer_sktime.fit_transform(data_train['litters'])[1:]
# skforecast
transformer_skforecast = TimeSeriesDifferentiator(order=1)
y_hat_skforecast = transformer_skforecast.fit_transform(data_train['litters'].to_numpy())[1:]
np.testing.assert_allclose(y_hat_sktime, y_hat_skforecast)
# Log transformation
# ======================================================================================
from sklearn.preprocessing import FunctionTransformer
from sktime.transformations.series.boxcox import LogTransformer
# sktime
transformer_sktime = LogTransformer(offset=1)
y_hat_sktime = transformer_sktime.fit_transform(data_train['litters'])
# skforecast
transformer_skforecast = FunctionTransformer(func=np.log1p, inverse_func=np.expm1, validate=True)
y_hat_skforecast = transformer_skforecast.fit_transform(data_train[['litters']]).flatten()
np.testing.assert_allclose(y_hat_sktime, y_hat_skforecast)
/home/joaquin/miniconda3/envs/skforecast_17_py12/lib/python3.12/site-packages/sklearn/utils/validation.py:2749: UserWarning: X does not have valid feature names, but FunctionTransformer was fitted with feature names warnings.warn(