Feature importance¶
The importance of the predictors included in a forecaster can be obtained using the method get_feature_importance
. This method accesses the coef_
and feature_importances_
attributes of the internal regressor.
  Warning
This method only returns values if the regressor used inside the forecaster has the attribute `coef_` or `feature_importances_`.Libraries¶
In [1]:
Copied!
# Libraries
# ==============================================================================
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import Ridge
from skforecast.ForecasterAutoreg import ForecasterAutoreg
from skforecast.ForecasterAutoregDirect import ForecasterAutoregDirect
# Libraries
# ==============================================================================
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import Ridge
from skforecast.ForecasterAutoreg import ForecasterAutoreg
from skforecast.ForecasterAutoregDirect import ForecasterAutoregDirect
Data¶
In [2]:
Copied!
# Download data
# ==============================================================================
url = ('https://raw.githubusercontent.com/JoaquinAmatRodrigo/skforecast/master/data/h2o_exog.csv')
data = pd.read_csv(url, sep=',', header=0, names=['date', 'y', 'exog_1', 'exog_2'])
# Data preprocessing
# ==============================================================================
data['date'] = pd.to_datetime(data['date'], format='%Y/%m/%d')
data = data.set_index('date')
data = data.asfreq('MS')
# Download data
# ==============================================================================
url = ('https://raw.githubusercontent.com/JoaquinAmatRodrigo/skforecast/master/data/h2o_exog.csv')
data = pd.read_csv(url, sep=',', header=0, names=['date', 'y', 'exog_1', 'exog_2'])
# Data preprocessing
# ==============================================================================
data['date'] = pd.to_datetime(data['date'], format='%Y/%m/%d')
data = data.set_index('date')
data = data.asfreq('MS')
Extract feature importance from trained forecaster¶
In [3]:
Copied!
# Create and fit forecaster
# ==============================================================================
forecaster = ForecasterAutoreg(
regressor = RandomForestRegressor(),
lags = 5
)
forecaster.fit(y=data['y'], exog=data[['exog_1', 'exog_2']])
# Predictors importance
# ==============================================================================
forecaster.get_feature_importance()
# Create and fit forecaster
# ==============================================================================
forecaster = ForecasterAutoreg(
regressor = RandomForestRegressor(),
lags = 5
)
forecaster.fit(y=data['y'], exog=data[['exog_1', 'exog_2']])
# Predictors importance
# ==============================================================================
forecaster.get_feature_importance()
Out[3]:
feature | importance | |
---|---|---|
0 | lag_1 | 0.541886 |
1 | lag_2 | 0.090949 |
2 | lag_3 | 0.023549 |
3 | lag_4 | 0.073510 |
4 | lag_5 | 0.063987 |
5 | exog_1 | 0.045629 |
6 | exog_2 | 0.160490 |
In [4]:
Copied!
# Create and fit forecaster
# ==============================================================================
forecaster = ForecasterAutoreg(
regressor = Ridge(),
lags = 5
)
forecaster.fit(y=data['y'], exog=data[['exog_1', 'exog_2']])
forecaster.get_feature_importance()
# Create and fit forecaster
# ==============================================================================
forecaster = ForecasterAutoreg(
regressor = Ridge(),
lags = 5
)
forecaster.fit(y=data['y'], exog=data[['exog_1', 'exog_2']])
forecaster.get_feature_importance()
Out[4]:
feature | importance | |
---|---|---|
0 | lag_1 | 0.327688 |
1 | lag_2 | -0.073593 |
2 | lag_3 | -0.152202 |
3 | lag_4 | -0.217106 |
4 | lag_5 | -0.145800 |
5 | exog_1 | 0.379798 |
6 | exog_2 | 0.668162 |
Since ForecasterAutoregDirect
fits one model per step, it is necessary to specify from which model retrieves its feature importance.
In [5]:
Copied!
# Create and fit forecaster
# ==============================================================================
forecaster = ForecasterAutoregDirect(
regressor = RandomForestRegressor(),
steps = 10,
lags = 5
)
forecaster.fit(y=data['y'], exog=data[['exog_1', 'exog_2']])
# Predictors importance
# ==============================================================================
forecaster.get_feature_importance(step=1)
# Create and fit forecaster
# ==============================================================================
forecaster = ForecasterAutoregDirect(
regressor = RandomForestRegressor(),
steps = 10,
lags = 5
)
forecaster.fit(y=data['y'], exog=data[['exog_1', 'exog_2']])
# Predictors importance
# ==============================================================================
forecaster.get_feature_importance(step=1)
Out[5]:
feature | importance | |
---|---|---|
0 | lag_1 | 0.521185 |
1 | lag_2 | 0.098278 |
2 | lag_3 | 0.026642 |
3 | lag_4 | 0.081040 |
4 | lag_5 | 0.046787 |
5 | exog_1 | 0.046945 |
6 | exog_2 | 0.179123 |
In [6]:
Copied!
%%html
<style>
.jupyter-wrapper .jp-CodeCell .jp-Cell-inputWrapper .jp-InputPrompt {display: none;}
</style>
%%html