datafest competition 2019
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

84 lines
2.5 KiB

  1. from sklearn import linear_model
  2. from sklearn.preprocessing import PolynomialFeatures
  3. import numpy as np
  4. import pandas as pd
  5. from sklearn.metrics import mean_squared_error, r2_score
  6. def k_days_into_future_regression(X, y, k, n0):
  7. """
  8. linear regression that returns the fitted weights as well as metrics
  9. :param X: x timeseries dataframe (very clean, no unamed columns), multidimensional rows
  10. :param y: y timeseries dataframe (very clean, no unamed columns), scalar rows
  11. :param k: days predicting in advance
  12. :param n0: ignoring the first n0 days
  13. :return: intercept, slopes, correlation, mean squared error
  14. """
  15. col = "TimeSinceAugFirst"
  16. inp = []
  17. out = []
  18. for day in y[col][n0 - 1:]:
  19. prev = day - k
  20. xprev = X[X[col] == prev].drop(columns=[col]).to_numpy()
  21. if xprev.shape[0] != 1:
  22. continue
  23. else:
  24. xprev = xprev[0, :]
  25. yt = y[y[col] == day].drop(columns=[col]).to_numpy()[0, :]
  26. inp.append(xprev)
  27. out.append(yt)
  28. regr = linear_model.LinearRegression()
  29. regr.fit(inp, out)
  30. predictions = regr.predict(inp)
  31. mse = mean_squared_error(out, predictions)/(len(out) - 2)
  32. r2 = r2_score(out, predictions)
  33. return regr.intercept_, regr.coef_, r2, mse
  34. def standard_lr(x, y):
  35. # Standard linear regression formula, gives back params and r2
  36. regr = linear_model.LinearRegression()
  37. regr.fit(x, y)
  38. predictions = regr.predict(x)
  39. mse = mean_squared_error(y, predictions) / (len(y) - 2)
  40. r2 = r2_score(y, predictions)
  41. return regr.intercept_, regr.coef_, r2, mse
  42. def poly_regression(x, y, degree):
  43. # Polynomial regression with nth degree, gives back rmse and r2
  44. polynomial_features = PolynomialFeatures(degree=degree)
  45. x_poly = polynomial_features.fit_transform(x)
  46. model = linear_model.LinearRegression()
  47. model.fit(x_poly, y)
  48. y_poly_pred = model.predict(x_poly)
  49. rmse = np.sqrt(mean_squared_error(y, y_poly_pred))
  50. r2 = r2_score(y, y_poly_pred)
  51. return rmse, r2
  52. def main():
  53. file = open("ryan_regressions.txt", 'w')
  54. player = pd.read_csv("../data_preparation/cleaned/personal.csv", index_col=0)
  55. for name, value in player.iteritems():
  56. if name == "day":
  57. continue
  58. for j in range(1, 17):
  59. ply = player[player['playerID'] == j]
  60. x = ply[['fatigueNorm', 'day']]
  61. y = ply[name]
  62. lr = standard_lr(x, y)
  63. poly = poly_regression(x, y, 3)
  64. if .9 > lr[2] > .4 or .9 > poly[1] > .4:
  65. file.write("Player {} for {}\n".format(j, name))
  66. file.write("{}\n".format(lr))
  67. file.write("{}\n\n".format(poly))
  68. if __name__ == "__main__":
  69. main()