Skip to content

Commit abb4e89

Browse files
committed
Forecasting added.
1 parent 3c9cb1b commit abb4e89

File tree

1 file changed

+36
-11
lines changed

1 file changed

+36
-11
lines changed

codonPython/tolerance.py

Lines changed: 36 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from statsmodels.sandbox.regression.predstd import wls_prediction_std
77

88

9-
def check_tolerance(t, y, to_exclude: int = 1, poly_features: list = [1, 2], alpha: float = 0.05) -> pd.DataFrame:
9+
def check_tolerance(t, y, to_exclude: int = 1, poly_features: list = [1, 2], alpha: float = 0.05, forecast: bool = False) -> pd.DataFrame:
1010
"""
1111
Check that some future values are within a weighted least squares confidence interval.
1212
@@ -24,6 +24,9 @@ def check_tolerance(t, y, to_exclude: int = 1, poly_features: list = [1, 2], alp
2424
a second degree polynomial to the data and return both sets of results.
2525
alpha : float, default = 0.05
2626
Alpha parameter for the weighted least squares confidence interval.
27+
forecast : bool, default = False
28+
When set to true, will return model projections for 20% of the data range beyond
29+
the current distribution.
2730
2831
2932
Returns
@@ -34,7 +37,7 @@ def check_tolerance(t, y, to_exclude: int = 1, poly_features: list = [1, 2], alp
3437
"yobs" : Observed value for y
3538
"yhat" : Predicted value for y
3639
"yhat_l" : Lower confidence interval for y
37-
"polynomial": Degree of polynomial fit to data
40+
"polynomial": Max polynomial of model fit to the data
3841
3942
4043
Examples
@@ -43,16 +46,17 @@ def check_tolerance(t, y, to_exclude: int = 1, poly_features: list = [1, 2], alp
4346
... t = np.array([1001,1002,1003,1004,1005,1006]),
4447
... y = np.array([2,3,4,4.5,5,5.1]),
4548
... to_exclude = 2,
46-
... poly_features = [2],
4749
... )
4850
yhat_u yobs yhat yhat_l polynomial
49-
0 9.077182 5.0 4.875 0.672818 2
50-
1 13.252339 5.1 4.975 -3.302339 2
51+
0 6.817413 5.0 5.500 4.182587 1
52+
1 7.952702 5.1 6.350 4.747298 1
53+
2 9.077182 5.0 4.875 0.672818 2
54+
3 13.252339 5.1 4.975 -3.302339 2
5155
"""
5256

5357
if not isinstance(poly_features, list):
5458
raise ValueError("Please input a list of integers from 0 to 4 for poly_features.")
55-
assert all([0 <= degree <= 4 for degree in poly_features]), (
59+
assert all(0 <= degree <= 4 for degree in poly_features), (
5660
"Please ensure all numbers in poly_features are from 0 to 4."
5761
)
5862
if not isinstance(alpha, float) or 0 >= alpha >= 1:
@@ -64,29 +68,49 @@ def check_tolerance(t, y, to_exclude: int = 1, poly_features: list = [1, 2], alp
6468
model. Either reduce to_exclude or increase your sample size to continue."""
6569
)
6670
assert np.isfinite(y).all(), (
67-
"Your sample contains missing or infinite values for y. Please exclude these values to continue."
71+
"Your sample contains missing or infinite values for y. Exclude these values to continue."
6872
)
6973

7074

7175
# Sort data by X increasing
7276
idx = np.argsort(t)
7377
t = t[idx]
7478
y = y[idx]
79+
forecasts = 5
80+
81+
if forecast:
82+
# 5 forecast values based on 20% of data range
83+
t_range = t[-1] - t[0]
84+
t_forecast = np.linspace(
85+
(t[-1] + t_range*0.001),
86+
(t[-1] + t_range*0.2),
87+
forecasts,
88+
)
89+
7590
results = pd.DataFrame()
76-
7791
for degree in poly_features:
7892
transforms = make_pipeline(
7993
StandardScaler(),
8094
PolynomialFeatures(degree=degree),
8195
)
8296

83-
# Fit transforms to training data, apply to all data.
97+
# Fit transforms to training data only, apply to all data.
8498
fitted_transforms = transforms.fit(t[:-to_exclude].reshape(-1, 1))
8599
_t = fitted_transforms.transform(t.reshape(-1, 1))
86100

87101
t_train, y_train = _t[:-to_exclude, :], y[:-to_exclude]
88102
t_predict, y_predict = _t[-to_exclude:, :], y[-to_exclude:]
89-
103+
104+
if forecast:
105+
# Add forecasts to prediction array
106+
t_predict = np.append(
107+
t_predict,
108+
fitted_transforms.transform(t_forecast.reshape(-1, 1)),
109+
axis=0
110+
)
111+
# This will prevent the final dataframe complaining about array lengths.
112+
y_predict = np.append(y_predict, np.full(forecasts, np.nan))
113+
90114
# Fit ordinary least squares model to the training data, then predict for the
91115
# prediction data.
92116
model = sm.OLS(y_train, t_train).fit()
@@ -103,7 +127,8 @@ def check_tolerance(t, y, to_exclude: int = 1, poly_features: list = [1, 2], alp
103127
"yhat" : yhat,
104128
"yhat_l" : yhat_l,
105129
"polynomial" : degree
106-
})
130+
}),
131+
ignore_index=True,
107132
)
108133

109134
return results

0 commit comments

Comments
 (0)