66from statsmodels .sandbox .regression .predstd import wls_prediction_std
77
88
9- def check_tolerance (t , y , to_exclude : int = 1 , poly_features : list = [1 , 2 ], alpha : float = 0.05 ) -> pd .DataFrame :
9+ def check_tolerance (t , y , to_exclude : int = 1 , poly_features : list = [1 , 2 ], alpha : float = 0.05 , forecast : bool = False ) -> pd .DataFrame :
1010 """
1111 Check that some future values are within a weighted least squares confidence interval.
1212
@@ -24,6 +24,9 @@ def check_tolerance(t, y, to_exclude: int = 1, poly_features: list = [1, 2], alp
2424 a second degree polynomial to the data and return both sets of results.
2525 alpha : float, default = 0.05
2626 Alpha parameter for the weighted least squares confidence interval.
27+ forecast : bool, default = False
28+ When set to true, will return model projections for 20% of the data range beyond
29+ the current distribution.
2730
2831
2932 Returns
@@ -34,7 +37,7 @@ def check_tolerance(t, y, to_exclude: int = 1, poly_features: list = [1, 2], alp
3437 "yobs" : Observed value for y
3538 "yhat" : Predicted value for y
3639 "yhat_l" : Lower confidence interval for y
37- "polynomial": Degree of polynomial fit to data
40+ "polynomial": Max polynomial of model fit to the data
3841
3942
4043 Examples
@@ -43,16 +46,17 @@ def check_tolerance(t, y, to_exclude: int = 1, poly_features: list = [1, 2], alp
4346 ... t = np.array([1001,1002,1003,1004,1005,1006]),
4447 ... y = np.array([2,3,4,4.5,5,5.1]),
4548 ... to_exclude = 2,
46- ... poly_features = [2],
4749 ... )
4850 yhat_u yobs yhat yhat_l polynomial
49- 0 9.077182 5.0 4.875 0.672818 2
50- 1 13.252339 5.1 4.975 -3.302339 2
51+ 0 6.817413 5.0 5.500 4.182587 1
52+ 1 7.952702 5.1 6.350 4.747298 1
53+ 2 9.077182 5.0 4.875 0.672818 2
54+ 3 13.252339 5.1 4.975 -3.302339 2
5155 """
5256
5357 if not isinstance (poly_features , list ):
5458 raise ValueError ("Please input a list of integers from 0 to 4 for poly_features." )
55- assert all ([ 0 <= degree <= 4 for degree in poly_features ] ), (
59+ assert all (0 <= degree <= 4 for degree in poly_features ), (
5660 "Please ensure all numbers in poly_features are from 0 to 4."
5761 )
5862 if not isinstance (alpha , float ) or 0 >= alpha >= 1 :
@@ -64,29 +68,49 @@ def check_tolerance(t, y, to_exclude: int = 1, poly_features: list = [1, 2], alp
6468 model. Either reduce to_exclude or increase your sample size to continue."""
6569 )
6670 assert np .isfinite (y ).all (), (
67- "Your sample contains missing or infinite values for y. Please exclude these values to continue."
71+ "Your sample contains missing or infinite values for y. Exclude these values to continue."
6872 )
6973
7074
7175 # Sort data by X increasing
7276 idx = np .argsort (t )
7377 t = t [idx ]
7478 y = y [idx ]
79+ forecasts = 5
80+
81+ if forecast :
82+ # 5 forecast values based on 20% of data range
83+ t_range = t [- 1 ] - t [0 ]
84+ t_forecast = np .linspace (
85+ (t [- 1 ] + t_range * 0.001 ),
86+ (t [- 1 ] + t_range * 0.2 ),
87+ forecasts ,
88+ )
89+
7590 results = pd .DataFrame ()
76-
7791 for degree in poly_features :
7892 transforms = make_pipeline (
7993 StandardScaler (),
8094 PolynomialFeatures (degree = degree ),
8195 )
8296
83- # Fit transforms to training data, apply to all data.
97+ # Fit transforms to training data only , apply to all data.
8498 fitted_transforms = transforms .fit (t [:- to_exclude ].reshape (- 1 , 1 ))
8599 _t = fitted_transforms .transform (t .reshape (- 1 , 1 ))
86100
87101 t_train , y_train = _t [:- to_exclude , :], y [:- to_exclude ]
88102 t_predict , y_predict = _t [- to_exclude :, :], y [- to_exclude :]
89-
103+
104+ if forecast :
105+ # Add forecasts to prediction array
106+ t_predict = np .append (
107+ t_predict ,
108+ fitted_transforms .transform (t_forecast .reshape (- 1 , 1 )),
109+ axis = 0
110+ )
111+ # This will prevent the final dataframe complaining about array lengths.
112+ y_predict = np .append (y_predict , np .full (forecasts , np .nan ))
113+
90114 # Fit ordinary least squares model to the training data, then predict for the
91115 # prediction data.
92116 model = sm .OLS (y_train , t_train ).fit ()
@@ -103,7 +127,8 @@ def check_tolerance(t, y, to_exclude: int = 1, poly_features: list = [1, 2], alp
103127 "yhat" : yhat ,
104128 "yhat_l" : yhat_l ,
105129 "polynomial" : degree
106- })
130+ }),
131+ ignore_index = True ,
107132 )
108133
109134 return results
0 commit comments