datafest competition 2019
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

61 lines
1.9 KiB

  1. from sklearn import linear_model
  2. import pandas as pd
  3. from sklearn.metrics import mean_squared_error, r2_score
  4. def k_days_into_future_regression(X, y, k, n0):
  5. """
  6. linear regression that returns the fitted weights as well as metrics
  7. :param X: x timeseries dataframe (very clean, no unamed columns), multidimensional rows
  8. :param y: y timeseries dataframe (very clean, no unamed columns), scalar rows
  9. :param k: days predicting in advance
  10. :param n0: ignoring the first n0 days
  11. :return: intercept, slopes, correlation, mean squared error
  12. """
  13. col = "TimeSinceAugFirst"
  14. inp = []
  15. out = []
  16. for day in y[col][n0 - 1:]:
  17. prev = day - k
  18. xprev = X[X[col] == prev].drop(columns=[col]).to_numpy()
  19. if xprev.shape[0] != 1:
  20. continue
  21. else:
  22. xprev = xprev[0, :]
  23. yt = y[y[col] == day].drop(columns=[col]).to_numpy()[0, :]
  24. inp.append(xprev)
  25. out.append(yt)
  26. regr = linear_model.LinearRegression()
  27. regr.fit(inp, out)
  28. predictions = regr.predict(inp)
  29. mse = mean_squared_error(out, predictions)/(len(out) - 2)
  30. r2 = r2_score(out, predictions)
  31. return regr.intercept_, regr.coef_, r2, mse
  32. def standard_lr(x, y):
  33. regr = linear_model.LinearRegression()
  34. regr.fit(x, y)
  35. predictions = regr.predict(x)
  36. mse = mean_squared_error(y, predictions) / (len(y) - 2)
  37. r2 = r2_score(y, predictions)
  38. return regr.intercept_, regr.coef_, r2, mse
  39. def main():
  40. # fatigueSums = pd.read_csv("fatigue_total_sum.csv")
  41. # workMovingAverage21 = pd.read_csv("21DaySlidingWorkAverage.csv", index_col=0)
  42. # print(k_days_into_future_regression(workMovingAverage21, fatigueSums, 0, 21))
  43. wellness = pd.read_csv("../data_preparation/cleaned/time_series_normalized_wellness_menstruation.csv")
  44. wellness = wellness.fillna(0)
  45. x = wellness[['normSoreness', 'TimeSinceAugFirst']]
  46. y = wellness['normFatigue']
  47. print(wellness.isnull().sum())
  48. print(standard_lr(x, y))
  49. if __name__ == "__main__":
  50. main()