datafest competition 2019
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

32 lines
1003 B

  1. from sklearn import linear_model
  2. import pandas as pd
  3. from sklearn.metrics import mean_squared_error, r2_score
  4. def k_days_into_future_regression(X, y, k, n0):
  5. col = "TimeSinceAugFirst"
  6. inp = []
  7. out = []
  8. for day in y[col][n0 - 1:]:
  9. prev = day - k
  10. xprev = X[X[col] == prev].drop(columns=[col]).to_numpy()[0, :]
  11. yt = y[y[col] == day].drop(columns=[col]).to_numpy()[0, :]
  12. inp.append(xprev)
  13. out.append(yt)
  14. regr = linear_model.LinearRegression()
  15. regr.fit(inp, out)
  16. predictions = regr.predict(inp)
  17. mse = mean_squared_error(out, predictions)/(len(out) - 2)
  18. r2 = r2_score(out, predictions)
  19. return regr.intercept_, regr.coef_, r2, mse
  20. def main():
  21. fatigueSums = pd.read_csv("fatigue_total_sum.csv")
  22. workMovingAverage21 = pd.read_csv("21DaySlidingWorkAverage.csv", index_col=0)
  23. performance = pd.read_csv("time_series_days_ranked.csv", index_col=0)
  24. print(k_days_into_future_regression(workMovingAverage21, fatigueSums, 0, 21))
  25. if __name__ == "__main__":
  26. main()