diff --git a/run_all_regressions.py b/run_all_regressions.py index e1f9329..ceed036 100644 --- a/run_all_regressions.py +++ b/run_all_regressions.py @@ -3,6 +3,7 @@ import pandas as pd from sklearn.metrics import mean_squared_error, r2_score from matplotlib import pyplot as plt import numpy as np +from sklearn.preprocessing import PolynomialFeatures def k_days_into_future_regression(X, y, k, n0): @@ -46,6 +47,20 @@ def standard_lr(x, y): return regr.intercept_, regr.coef_, r2, mse +def poly_regression(x, y, degree): + # Polynomial regression with nth degree, gives back rmse and r2 + polynomial_features = PolynomialFeatures(degree=degree) + x_poly = polynomial_features.fist_transform(x) + + model = linear_model.LinearRegression() + model.fit(x_poly, y) + y_poly_pred = model.predict(x_poly) + + rmse = np.sqrt(mean_squared_error(y, y_poly_pred)) + r2 = r2_score(y, y_poly_pred) + return rmse, r2 + + def run_all_linears(): # Reads in the neccessary csv file @@ -71,6 +86,17 @@ def run_all_polynomials(): for i in range(4, 11): for j in range(1, 11 - i): mat = df[[df.columns[i], df.columns[i + j]]].values + for d in range(2, 5): + rmse, r2 = poly_regression(mat[:, 0], mat[:, 1], d) + plt.figure(figsize=(6, 6)) + plt.xlabel(df.columns[i]) + plt.ylabel(df.columns[i + j]) + plt.title('r2: ' + str(r2) + 'degree: ' + str(d)) + plt.scatter(mat[:, 0], mat[:, 1]) + plt.savefig('wellness_poly_regressions/' + df.columns[i] + '_vs_' + df.columns[i + j] + '_' + str(d) + '_degree.png') + print(df.columns[i] + '_vs_' + df.columns[i + j] + '_degree_' + str(d) + '_r2=' + str(r2) + '_rmse=' + str(rmse)) + plt.close() run_all_linears() +run_all_polynomials()