Save and load forecasters¶
Skforecast models can be easily saved and loaded from disk using the joblib library. Two handy functions, save_forecaster
and load_forecaster
are available to streamline this process. See below for a simple example.
A forecaster_id
has been included when initializing the Forecaster, this may help to identify the target of the model.
✎ Note
Learn how to use forecaster models in production.
Libraries and data¶
# Libraries
# ==============================================================================
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from skforecast.datasets import fetch_dataset
from skforecast.recursive import ForecasterRecursive
from skforecast.recursive import ForecasterRecursiveMultiSeries
from skforecast.utils import save_forecaster
from skforecast.utils import load_forecaster
# Download data
# ==============================================================================
data = fetch_dataset(
name="h2o", raw=True, kwargs_read_csv={"names": ["y", "date"], "header": 0}
)
data['date'] = pd.to_datetime(data['date'], format='%Y-%m-%d')
data = data.set_index('date')
data = data.asfreq('MS')
h2o --- Monthly expenditure ($AUD) on corticosteroid drugs that the Australian health system had between 1991 and 2008. Hyndman R (2023). fpp3: Data for Forecasting: Principles and Practice(3rd Edition). http://pkg.robjhyndman.com/fpp3package/,https://github.com/robjhyndman /fpp3package, http://OTexts.com/fpp3. Shape of the dataset: (204, 2)
Save and load forecaster model¶
# Create and train forecaster
# ==============================================================================
forecaster = ForecasterRecursive(
regressor = RandomForestRegressor(random_state=123),
lags = 5,
forecaster_id = "forecaster_001"
)
forecaster.fit(y=data['y'])
forecaster.predict(steps=3)
2008-07-01 0.714526 2008-08-01 0.789144 2008-09-01 0.818433 Freq: MS, Name: pred, dtype: float64
# Save model
# ==============================================================================
save_forecaster(forecaster, file_name='forecaster_001.joblib', verbose=False)
# Load model
# ==============================================================================
forecaster_loaded = load_forecaster('forecaster_001.joblib', verbose=True)
=================== ForecasterRecursive =================== Regressor: RandomForestRegressor Lags: [1 2 3 4 5] Window features: None Window size: 5 Exogenous included: False Exogenous names: None Transformer for y: None Transformer for exog: None Weight function included: False Differentiation order: None Training range: [Timestamp('1991-07-01 00:00:00'), Timestamp('2008-06-01 00:00:00')] Training index type: DatetimeIndex Training index frequency: MS Regressor parameters: {'bootstrap': True, 'ccp_alpha': 0.0, 'criterion': 'squared_error', 'max_depth': None, 'max_features': 1.0, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 123, 'verbose': 0, 'warm_start': False} fit_kwargs: {} Creation date: 2024-11-19 09:20:55 Last fit date: 2024-11-19 09:20:55 Skforecast version: 0.14.0 Python version: 3.11.10 Forecaster id: forecaster_001
# Predict
# ==============================================================================
forecaster_loaded.predict(steps=3)
2008-07-01 0.714526 2008-08-01 0.789144 2008-09-01 0.818433 Freq: MS, Name: pred, dtype: float64
# Forecaster identifier
# ==============================================================================
forecaster.forecaster_id
'forecaster_001'
Saving and Loading a Forecaster Model with Custom Features¶
Sometimes external objects are needed when creating a Forecaster. For example:
Custom class to create window and custom features.
A function to reduce the impact of some dates on the model, Weighted Time Series Forecasting.
For your code to work properly, these functions must be available in the environment where the Forecaster is loaded.
# Custom class to create rolling skewness features
# ==============================================================================
from scipy.stats import skew
class RollingSkewness():
"""
Custom class to create rolling skewness features.
"""
def __init__(self, window_sizes, features_names: list = 'rolling_skewness'):
if not isinstance(window_sizes, list):
window_sizes = [window_sizes]
self.window_sizes = window_sizes
self.features_names = features_names
def transform_batch(self, X: pd.Series) -> pd.DataFrame:
rolling_obj = X.rolling(window=self.window_sizes[0], center=False, closed='left')
rolling_skewness = rolling_obj.skew()
rolling_skewness = pd.DataFrame({
self.features_names: rolling_skewness
}).dropna()
return rolling_skewness
def transform(self, X: np.ndarray) -> np.ndarray:
X = X[~np.isnan(X)]
if len(X) > 0:
rolling_skewness = np.array([skew(X, bias=False)])
else:
rolling_skewness = np.array([np.nan])
return rolling_skewness
# Custom function to create weights
# ==============================================================================
def custom_weights(index):
"""
Return 0 if index is between 2004-01-01 and 2005-01-01.
"""
weights = np.where(
(index >= '2004-01-01') & (index <= '2005-01-01'),
0,
1
)
return weights
# Create and train forecaster
# ==============================================================================
window_features = RollingSkewness(window_sizes=3)
forecaster = ForecasterRecursive(
regressor = RandomForestRegressor(random_state=123),
lags = 3,
window_features = window_features,
weight_func = custom_weights,
forecaster_id = "forecaster_custom_features"
)
forecaster.fit(y=data['y'])
⚠ Warning
The save_forecaster
function will save the functions used to create the weights as a module (custom_weights.py
). But the classes used to create the window features will not be saved. Therefore, you must ensure that these classes are available in the environment where the Forecaster is loaded.
# Save model and custom function
# ==============================================================================
save_forecaster(
forecaster,
file_name = 'forecaster_custom_features.joblib',
save_custom_functions = True,
verbose = False
)
c:\Users\jaesc2\Miniconda3\envs\skforecast_py11_2\Lib\site-packages\skforecast\utils\utils.py:1898: SaveLoadSkforecastWarning: The Forecaster includes custom user-defined classes in the `window_features` argument. These classes are not saved automatically when saving the Forecaster. Please ensure you save these classes manually and import them before loading the Forecaster. Custom classes: RollingSkewness Visit the documentation for more information: https://skforecast.org/latest/user_guides/save-load-forecaster.html#saving-and-loading-a-forecaster-model-with-custom-features You can suppress this warning using: warnings.simplefilter('ignore', category=SaveLoadSkforecastWarning) warnings.warn(
At this point, the RollingSkewness
class is manually saved in a file called rolling_skewness.py
. This file must be available in the environment where the Forecaster is loaded.
# Load model and custom function
# ==============================================================================
from rolling_skewness import RollingSkewness # This file has to be generated manually
from custom_weights import custom_weights
forecaster_loaded = load_forecaster('forecaster_custom_features.joblib', verbose=True)
=================== ForecasterRecursive =================== Regressor: RandomForestRegressor Lags: [1 2 3] Window features: ['rolling_skewness'] Window size: 3 Exogenous included: False Exogenous names: None Transformer for y: None Transformer for exog: None Weight function included: True Differentiation order: None Training range: [Timestamp('1991-07-01 00:00:00'), Timestamp('2008-06-01 00:00:00')] Training index type: DatetimeIndex Training index frequency: MS Regressor parameters: {'bootstrap': True, 'ccp_alpha': 0.0, 'criterion': 'squared_error', 'max_depth': None, 'max_features': 1.0, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 123, 'verbose': 0, 'warm_start': False} fit_kwargs: {} Creation date: 2024-11-19 09:20:55 Last fit date: 2024-11-19 09:20:55 Skforecast version: 0.14.0 Python version: 3.11.10 Forecaster id: forecaster_custom_features
# Predict using loaded forecaster
# ==============================================================================
forecaster_loaded.predict(steps=5)
2008-07-01 0.808125 2008-08-01 0.859447 2008-09-01 0.933751 2008-10-01 0.950768 2008-11-01 0.914137 Freq: MS, Name: pred, dtype: float64
ForecasterRecursiveMultiSeries¶
When using a ForecasterRecursiveMultiSeries
, the save_forecaster
function will save a different module for each of the functions used to create the weights.
# Data download
# ==============================================================================
data = fetch_dataset(name="items_sales")
data.head()
items_sales ----------- Simulated time series for the sales of 3 different items. Simulated data. Shape of the dataset: (1097, 3)
item_1 | item_2 | item_3 | |
---|---|---|---|
date | |||
2012-01-01 | 8.253175 | 21.047727 | 19.429739 |
2012-01-02 | 22.777826 | 26.578125 | 28.009863 |
2012-01-03 | 27.549099 | 31.751042 | 32.078922 |
2012-01-04 | 25.895533 | 24.567708 | 27.252276 |
2012-01-05 | 21.379238 | 18.191667 | 20.357737 |
# Custom function to create weights for each item
# ==============================================================================
def custom_weights_item_1(index):
"""
Return 0 if index is between 2012-01-01 and 2012-06-01.
"""
weights = np.where(
(index >= '2012-01-01') & (index <= '2012-06-01'), 0, 1
)
return weights
def custom_weights_item_2(index):
"""
Return 0 if index is between 2012-04-01 and 2013-01-01.
"""
weights = np.where(
(index >= '2012-04-01') & (index <= '2013-01-01'), 0, 1
)
return weights
def custom_weights_item_3(index):
"""
Return 0 if index is between 2012-06-01 and 2013-01-01.
"""
weights = np.where(
(index >= '2012-06-01') & (index <= '2013-01-01'), 0, 1
)
return weights
# Custom class to create rolling skewness features (multi-series)
# ==============================================================================
from scipy.stats import skew
class RollingSkewnessMultiSeries():
"""
Custom class to create rolling skewness features for multiple series.
"""
def __init__(self, window_sizes, features_names: list = 'rolling_skewness'):
if not isinstance(window_sizes, list):
window_sizes = [window_sizes]
self.window_sizes = window_sizes
self.features_names = features_names
def transform_batch(self, X: pd.Series) -> pd.DataFrame:
rolling_obj = X.rolling(window=self.window_sizes[0], center=False, closed='left')
rolling_skewness = rolling_obj.skew()
rolling_skewness = pd.DataFrame({
self.features_names: rolling_skewness
}).dropna()
return rolling_skewness
def transform(self, X: np.ndarray) -> np.ndarray:
X_dim = X.ndim
if X_dim == 1:
n_series = 1 # Only one series
X = X.reshape(-1, 1)
else:
n_series = X.shape[1] # Series (levels) to be predicted (present in last_window)
n_stats = 1 # Only skewness is calculated
rolling_skewness = np.full(
shape=(n_series, n_stats), fill_value=np.nan, dtype=float
)
for i in range(n_series):
if len(X) > 0:
rolling_skewness[i, :] = skew(X[:, i], bias=False)
else:
rolling_skewness[i, :] = np.nan
if X_dim == 1:
rolling_skewness = rolling_skewness.flatten()
return rolling_skewness
⚠ Warning
When weight_func
is a dict
and does not contain any of the series, for instance:
# Weights are not included for item_2
weight_func_dict = {
'item_1': custom_weights_item_1,
'item_3': custom_weights_item_3
}
You must create a function that returns all 1's as weights of that series.
def custom_weights_all_1(index):
"""
Return 1 for all elements in the index.
"""
weights = np.ones(len(index))
return weights
# item_2 dummy weights
weight_func_dict = {
'item_1': custom_weights_item_1,
'item_2': custom_weights_all_1,
'item_3': custom_weights_item_3
}
# Create and train ForecasterRecursiveMultiSeries
# ==============================================================================
window_features = RollingSkewnessMultiSeries(window_sizes=3)
weight_func_dict = {
'item_1': custom_weights_item_1,
'item_2': custom_weights_item_2,
'item_3': custom_weights_item_3
}
forecaster = ForecasterRecursiveMultiSeries(
regressor = RandomForestRegressor(random_state=123),
lags = 3,
window_features = window_features,
encoding = 'ordinal',
weight_func = weight_func_dict
)
forecaster.fit(series=data)
forecaster
ForecasterRecursiveMultiSeries
General Information
- Regressor: RandomForestRegressor
- Lags: [1 2 3]
- Window features: ['rolling_skewness']
- Window size: 3
- Series encoding: ordinal
- Exogenous included: False
- Weight function included: True
- Series weights: None
- Differentiation order: None
- Creation date: 2024-11-19 09:20:56
- Last fit date: 2024-11-19 09:20:57
- Skforecast version: 0.14.0
- Python version: 3.11.10
- Forecaster id: None
Exogenous Variables
-
None
Data Transformations
- Transformer for series: None
- Transformer for exog: None
Training Information
- Series names (levels): item_1, item_2, item_3
- Training range: 'item_1': ['2012-01-01', '2015-01-01'], 'item_2': ['2012-01-01', '2015-01-01'], 'item_3': ['2012-01-01', '2015-01-01']
- Training index type: DatetimeIndex
- Training index frequency: D
Regressor Parameters
-
{'bootstrap': True, 'ccp_alpha': 0.0, 'criterion': 'squared_error', 'max_depth': None, 'max_features': 1.0, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 123, 'verbose': 0, 'warm_start': False}
Fit Kwargs
-
{}
# Save model and custom function
# ==============================================================================
save_forecaster(
forecaster,
file_name = 'forecaster_multiseries_custom_features.joblib',
save_custom_functions = True,
verbose = False
)
c:\Users\jaesc2\Miniconda3\envs\skforecast_py11_2\Lib\site-packages\skforecast\utils\utils.py:1898: SaveLoadSkforecastWarning: The Forecaster includes custom user-defined classes in the `window_features` argument. These classes are not saved automatically when saving the Forecaster. Please ensure you save these classes manually and import them before loading the Forecaster. Custom classes: RollingSkewnessMultiSeries Visit the documentation for more information: https://skforecast.org/latest/user_guides/save-load-forecaster.html#saving-and-loading-a-forecaster-model-with-custom-features You can suppress this warning using: warnings.simplefilter('ignore', category=SaveLoadSkforecastWarning) warnings.warn(
# Load model and custom function
# ==============================================================================
from rolling_skewness import RollingSkewnessMultiSeries # This file has to be generated manually
from custom_weights_item_1 import custom_weights_item_1
from custom_weights_item_2 import custom_weights_item_2
from custom_weights_item_3 import custom_weights_item_3
forecaster_loaded = load_forecaster(
'forecaster_multiseries_custom_features.joblib', verbose=True
)
============================== ForecasterRecursiveMultiSeries ============================== Regressor: RandomForestRegressor Lags: [1 2 3] Window features: ['rolling_skewness'] Window size: 3 Series encoding: ordinal Series names (levels): item_1, item_2, item_3 Exogenous included: False Exogenous names: None Transformer for series: None Transformer for exog: None Weight function included: True Series weights: None Differentiation order: None Training range: 'item_1': ['2012-01-01', '2015-01-01'], 'item_2': ['2012-01-01', '2015-01-01'], 'item_3': ['2012-01-01', '2015-01-01'] Training index type: DatetimeIndex Training index frequency: D Regressor parameters: {'bootstrap': True, 'ccp_alpha': 0.0, 'criterion': 'squared_error', 'max_depth': None, 'max_features': 1.0, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 123, 'verbose': 0, 'warm_start': False} fit_kwargs: {} Creation date: 2024-11-19 09:20:56 Last fit date: 2024-11-19 09:20:57 Skforecast version: 0.14.0 Python version: 3.11.10 Forecaster id: None
# Predict using loaded forecaster
# ==============================================================================
forecaster_loaded.predict(steps=5, levels=None) # Predict all levels
item_1 | item_2 | item_3 | |
---|---|---|---|
2015-01-02 | 14.818313 | 17.954045 | 19.676498 |
2015-01-03 | 14.961743 | 17.530592 | 19.207165 |
2015-01-04 | 18.349711 | 17.792810 | 19.919855 |
2015-01-05 | 18.639790 | 18.447346 | 22.158983 |
2015-01-06 | 17.254107 | 19.599428 | 22.687187 |