44
55import matplotlib .pyplot as plt
66
7+ from scipy .stats import t
8+
79from sklearn .preprocessing import StandardScaler
810
911from sklearn .feature_selection import RFE
@@ -57,6 +59,10 @@ def rfe_cv(df, vars_x, var_y, estimator, cv=5, std_scaling=False, figsize=(8,4))
5759
5860 ################### END CV ###
5961
62+ alpha = 0.3173 # for 1 stddev 68,37% CI
63+ degrees_of_freedom = n_splits - 1
64+
65+
6066 # mean rank figure
6167 fig1 , ax1 = plt .subplots (figsize = figsize )
6268
@@ -67,8 +73,8 @@ def rfe_cv(df, vars_x, var_y, estimator, cv=5, std_scaling=False, figsize=(8,4))
6773
6874 ax1 .fill_between (
6975 np .arange (len (v_rank_mean )),
70- v_rank_mean + v_rank_std / np .sqrt (n_splits ), # stddev of mean as s/sqrt(n) is not right, the true value is higher
71- v_rank_mean - v_rank_std / np .sqrt (n_splits ),
76+ v_rank_mean + t . ppf ( 1 - alpha / 2 , degrees_of_freedom ) * v_rank_std / np .sqrt (n_splits ), # stddev correction using student-t
77+ v_rank_mean - t . ppf ( 1 - alpha / 2 , degrees_of_freedom ) * v_rank_std / np .sqrt (n_splits ),
7278 alpha = 0.1 ,
7379 color = 'b'
7480 )
@@ -148,8 +154,8 @@ def rfe_cv(df, vars_x, var_y, estimator, cv=5, std_scaling=False, figsize=(8,4))
148154
149155 ax2 .fill_between (
150156 np .arange (len (model ['scores_mean' ]))+ 1 ,
151- model ['scores_mean' ] + np .array (model ['scores_std' ])/ np .sqrt (n_splits ),
152- model ['scores_mean' ] - np .array (model ['scores_std' ])/ np .sqrt (n_splits ),
157+ model ['scores_mean' ] + t . ppf ( 1 - alpha / 2 , degrees_of_freedom ) * np .array (model ['scores_std' ])/ np .sqrt (n_splits ),
158+ model ['scores_mean' ] - t . ppf ( 1 - alpha / 2 , degrees_of_freedom ) * np .array (model ['scores_std' ])/ np .sqrt (n_splits ),
153159 alpha = 0.1 ,
154160 color = 'b'
155161 )
0 commit comments