Feature importance¶
The importance of the predictors included in a forecaster can be obtained using the method get_feature_importance
. This method accesses the coef_
and feature_importances_
attributes of the internal regressor.
  Warning
This method only returns values if the regressor used inside the forecaster has the attribute `coef_` or `feature_importances_`.Libraries¶
In [7]:
Copied!
# Libraries
# ==============================================================================
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import Ridge
from skforecast.ForecasterAutoreg import ForecasterAutoreg
from skforecast.ForecasterAutoregMultiOutput import ForecasterAutoregMultiOutput
# Libraries
# ==============================================================================
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import Ridge
from skforecast.ForecasterAutoreg import ForecasterAutoreg
from skforecast.ForecasterAutoregMultiOutput import ForecasterAutoregMultiOutput
Data¶
In [8]:
Copied!
# Download data
# ==============================================================================
url = ('https://raw.githubusercontent.com/JoaquinAmatRodrigo/skforecast/master/data/h2o_exog.csv')
data = pd.read_csv(url, sep=',', header=0, names=['date', 'y', 'exog_1', 'exog_2'])
# Data preprocessing
# ==============================================================================
data['date'] = pd.to_datetime(data['date'], format='%Y/%m/%d')
data = data.set_index('date')
data = data.asfreq('MS')
# Download data
# ==============================================================================
url = ('https://raw.githubusercontent.com/JoaquinAmatRodrigo/skforecast/master/data/h2o_exog.csv')
data = pd.read_csv(url, sep=',', header=0, names=['date', 'y', 'exog_1', 'exog_2'])
# Data preprocessing
# ==============================================================================
data['date'] = pd.to_datetime(data['date'], format='%Y/%m/%d')
data = data.set_index('date')
data = data.asfreq('MS')
Extract feature importance from trained forecaster¶
In [9]:
Copied!
# Create and fit forecaster
# ==============================================================================
forecaster = ForecasterAutoreg(
regressor = RandomForestRegressor(),
lags = 5
)
forecaster.fit(y=data['y'], exog=data[['exog_1', 'exog_2']])
# Predictors importance
# ==============================================================================
forecaster.get_feature_importance()
# Create and fit forecaster
# ==============================================================================
forecaster = ForecasterAutoreg(
regressor = RandomForestRegressor(),
lags = 5
)
forecaster.fit(y=data['y'], exog=data[['exog_1', 'exog_2']])
# Predictors importance
# ==============================================================================
forecaster.get_feature_importance()
Out[9]:
feature | importance | |
---|---|---|
0 | lag_1 | 0.533117 |
1 | lag_2 | 0.097804 |
2 | lag_3 | 0.029198 |
3 | lag_4 | 0.073071 |
4 | lag_5 | 0.053683 |
5 | exog_1 | 0.042011 |
6 | exog_2 | 0.171117 |
In [10]:
Copied!
# Create and fit forecaster
# ==============================================================================
forecaster = ForecasterAutoreg(
regressor = Ridge(),
lags = 5
)
forecaster.fit(y=data['y'], exog=data[['exog_1', 'exog_2']])
forecaster.get_feature_importance()
# Create and fit forecaster
# ==============================================================================
forecaster = ForecasterAutoreg(
regressor = Ridge(),
lags = 5
)
forecaster.fit(y=data['y'], exog=data[['exog_1', 'exog_2']])
forecaster.get_feature_importance()
Out[10]:
feature | importance | |
---|---|---|
0 | lag_1 | 0.327688 |
1 | lag_2 | -0.073593 |
2 | lag_3 | -0.152202 |
3 | lag_4 | -0.217106 |
4 | lag_5 | -0.145800 |
5 | exog_1 | 0.379798 |
6 | exog_2 | 0.668162 |
Since ForecasterAutoregMultiOutput
fits one model per step, it is necessary to specify from which model retrieves its feature importance.
In [11]:
Copied!
# Create and fit forecaster
# ==============================================================================
forecaster = ForecasterAutoregMultiOutput(
regressor = RandomForestRegressor(),
steps = 10,
lags = 5
)
forecaster.fit(y=data['y'], exog=data[['exog_1', 'exog_2']])
# Predictors importance
# ==============================================================================
forecaster.get_feature_importance(step=1)
# Create and fit forecaster
# ==============================================================================
forecaster = ForecasterAutoregMultiOutput(
regressor = RandomForestRegressor(),
steps = 10,
lags = 5
)
forecaster.fit(y=data['y'], exog=data[['exog_1', 'exog_2']])
# Predictors importance
# ==============================================================================
forecaster.get_feature_importance(step=1)
Out[11]:
feature | importance | |
---|---|---|
0 | lag_1 | 0.537580 |
1 | lag_2 | 0.102076 |
2 | lag_3 | 0.024955 |
3 | lag_4 | 0.074393 |
4 | lag_5 | 0.051036 |
5 | exog_1 | 0.055780 |
6 | exog_2 | 0.154180 |
In [12]:
Copied!
%%html
<style>
.jupyter-wrapper .jp-CodeCell .jp-Cell-inputWrapper .jp-InputPrompt {display: none;}
</style>
%%html