|
@ -3,6 +3,7 @@ import pandas as pd |
|
|
from sklearn.metrics import mean_squared_error, r2_score |
|
|
from sklearn.metrics import mean_squared_error, r2_score |
|
|
from matplotlib import pyplot as plt |
|
|
from matplotlib import pyplot as plt |
|
|
import numpy as np |
|
|
import numpy as np |
|
|
|
|
|
from sklearn.preprocessing import PolynomialFeatures |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def k_days_into_future_regression(X, y, k, n0): |
|
|
def k_days_into_future_regression(X, y, k, n0): |
|
@ -46,6 +47,20 @@ def standard_lr(x, y): |
|
|
return regr.intercept_, regr.coef_, r2, mse |
|
|
return regr.intercept_, regr.coef_, r2, mse |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def poly_regression(x, y, degree): |
|
|
|
|
|
# Polynomial regression with nth degree, gives back rmse and r2 |
|
|
|
|
|
polynomial_features = PolynomialFeatures(degree=degree) |
|
|
|
|
|
x_poly = polynomial_features.fist_transform(x) |
|
|
|
|
|
|
|
|
|
|
|
model = linear_model.LinearRegression() |
|
|
|
|
|
model.fit(x_poly, y) |
|
|
|
|
|
y_poly_pred = model.predict(x_poly) |
|
|
|
|
|
|
|
|
|
|
|
rmse = np.sqrt(mean_squared_error(y, y_poly_pred)) |
|
|
|
|
|
r2 = r2_score(y, y_poly_pred) |
|
|
|
|
|
return rmse, r2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def run_all_linears(): |
|
|
def run_all_linears(): |
|
|
|
|
|
|
|
|
# Reads in the neccessary csv file |
|
|
# Reads in the neccessary csv file |
|
@ -71,6 +86,17 @@ def run_all_polynomials(): |
|
|
for i in range(4, 11): |
|
|
for i in range(4, 11): |
|
|
for j in range(1, 11 - i): |
|
|
for j in range(1, 11 - i): |
|
|
mat = df[[df.columns[i], df.columns[i + j]]].values |
|
|
mat = df[[df.columns[i], df.columns[i + j]]].values |
|
|
|
|
|
for d in range(2, 5): |
|
|
|
|
|
rmse, r2 = poly_regression(mat[:, 0], mat[:, 1], d) |
|
|
|
|
|
plt.figure(figsize=(6, 6)) |
|
|
|
|
|
plt.xlabel(df.columns[i]) |
|
|
|
|
|
plt.ylabel(df.columns[i + j]) |
|
|
|
|
|
plt.title('r2: ' + str(r2) + 'degree: ' + str(d)) |
|
|
|
|
|
plt.scatter(mat[:, 0], mat[:, 1]) |
|
|
|
|
|
plt.savefig('wellness_poly_regressions/' + df.columns[i] + '_vs_' + df.columns[i + j] + '_' + str(d) + '_degree.png') |
|
|
|
|
|
print(df.columns[i] + '_vs_' + df.columns[i + j] + '_degree_' + str(d) + '_r2=' + str(r2) + '_rmse=' + str(rmse)) |
|
|
|
|
|
plt.close() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
run_all_linears() |
|
|
run_all_linears() |
|
|
|
|
|
run_all_polynomials() |