Merge branch 'master' of https://github.com/PerryXDeng/wheatyeeters

6 years ago · 9a907b5460
--- a/data_preparation/cleaned/personal.csv
+++ b/data_preparation/cleaned/personal.csv
--- a/hypotheses_modeling/KerasRegressions.py
+++ b/hypotheses_modeling/KerasRegressions.py
@ -49,7 +49,7 @@ def time_series_sigmoid_classification(X, Y, k, n0, x_columns, y_columns):
  return model.get_weights()


 def time_series_sigmoid_classification(X, Y, k, n0, x_columns, y_columns):
 def time_series_dnn_classification(X, Y, k, n0, x_columns, y_columns):
  inp = X[x_columns]
  out = Y[y_columns]
  col = "day"
@ -86,7 +86,7 @@ def time_series_sigmoid_classification(X, Y, k, n0, x_columns, y_columns):
  y = np.array(y)
  model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=input_shape),
    tf.keras.layers.Dense(32),
    tf.keras.layers.Dense(32, activation=tf.nn.softmax),
    tf.keras.layers.Dense(output_shape, activation=tf.nn.softmax)
  ])
  model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy', 'categorical_accuracy'])
@ -94,3 +94,95 @@ def time_series_sigmoid_classification(X, Y, k, n0, x_columns, y_columns):
  loss, accuracy = model.evaluate(x, y)
  print(loss, accuracy)
  return model.get_weights()

 def time_series_linear_regression(X, Y, k, n0, x_columns, y_columns):
  inp = X[x_columns]
  out = Y[y_columns]
  col = "day"
  x = []
  y = []
  input_shape = 0
  output_shape = 0
  for player in Y["playerID"].unique():
    XPlayer = inp[inp["playerID"] == player]
    YPlayer = out[out["playerID"] == player]
    for day in YPlayer[col][n0 - 1:]:
      prev = day - k
      xprev = XPlayer[XPlayer[col] == prev].drop(columns=[col]).to_numpy()
      if xprev.shape[0] != 1:
        continue
      else:
        xprev = xprev[0, :]
      yt = YPlayer[YPlayer[col] == day].drop(columns=[col]).to_numpy()[0, :]
      if input_shape == 0:
        input_shape = xprev.shape[0]
      else:
        if input_shape != xprev.shape[0]:
          print("INCONSISTENT INPUT DIMENSION")
          exit(2)
      if input_shape == 0:
        output_shape = yt.shape[0]
      else:
        if output_shape != yt.shape[0]:
          print("INCONSISTENT OUTPUT DIMENSION")
          exit(2)
      x.append(xprev)
      y.append(yt)
  x = np.array(x)
  y = np.array(y)
  model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=input_shape),
    tf.keras.layers.Dense(output_shape)
  ])
  model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy', 'categorical_accuracy'])
  model.fit(x, y, epochs=100)
  loss, accuracy = model.evaluate(x, y)
  print(loss, accuracy)
  return model.get_weights()


 def time_series_dnn_regressions(X, Y, k, n0, x_columns, y_columns):
  inp = X[x_columns]
  out = Y[y_columns]
  col = "day"
  x = []
  y = []
  input_shape = 0
  output_shape = 0
  for player in Y["playerID"].unique():
    XPlayer = inp[inp["playerID"] == player]
    YPlayer = out[out["playerID"] == player]
    for day in YPlayer[col][n0 - 1:]:
      prev = day - k
      xprev = XPlayer[XPlayer[col] == prev].drop(columns=[col]).to_numpy()
      if xprev.shape[0] != 1:
        continue
      else:
        xprev = xprev[0, :]
      yt = YPlayer[YPlayer[col] == day].drop(columns=[col]).to_numpy()[0, :]
      if input_shape == 0:
        input_shape = xprev.shape[0]
      else:
        if input_shape != xprev.shape[0]:
          print("INCONSISTENT INPUT DIMENSION")
          exit(2)
      if input_shape == 0:
        output_shape = yt.shape[0]
      else:
        if output_shape != yt.shape[0]:
          print("INCONSISTENT OUTPUT DIMENSION")
          exit(2)
      x.append(xprev)
      y.append(yt)
  x = np.array(x)
  y = np.array(y)
  model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=input_shape),
    tf.keras.layers.Dense(32, activation=tf.nn.softmax),
    tf.keras.layers.Dense(output_shape)
  ])
  model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy', 'categorical_accuracy'])
  model.fit(x, y, epochs=100)
  loss, accuracy = model.evaluate(x, y)
  print(loss, accuracy)
  return model.get_weights()
--- a/hypotheses_modeling/hypotheses.txt
+++ b/hypotheses_modeling/hypotheses.txt
@ -21,8 +21,15 @@ Team:

    5.


 Individual:
    Player 1 - fatigue + day / soreness
        lr - 0.24677741789096985
        pr - 0.32119826926167405

 Perry:
 7 day moving average team workload - normalized team fatigue: 0.0006
 21 day moving average team workload - normalized team fatigue: 0.0024
 normalized team fatigue - game day performance: 0.0696
 normalized team fatigue - paper smoothed workload fatigue: 0.0324

--- a/hypotheses_modeling/team_regressions.py
+++ b/hypotheses_modeling/team_regressions.py
@ -1,4 +1,6 @@
 from sklearn import linear_model
 from sklearn.preprocessing import PolynomialFeatures
 import numpy as np
 import pandas as pd
 from sklearn.metrics import mean_squared_error, r2_score

@ -34,6 +36,7 @@ def k_days_into_future_regression(X, y, k, n0):


 def standard_lr(x, y):
  # Standard linear regression formula, gives back params and r2
  regr = linear_model.LinearRegression()
  regr.fit(x, y)
  predictions = regr.predict(x)
@ -42,19 +45,27 @@ def standard_lr(x, y):
  return regr.intercept_, regr.coef_, r2, mse


 def main():
  # fatigueSums = pd.read_csv("fatigue_total_sum.csv")
  # workMovingAverage21 = pd.read_csv("21DaySlidingWorkAverage.csv", index_col=0)
  # print(k_days_into_future_regression(workMovingAverage21, fatigueSums, 0, 21))
 def poly_regression(x, y, degree):
  # Polynomial regression with nth degree, gives back rmse and r2
  polynomial_features = PolynomialFeatures(degree=degree)
  x_poly = polynomial_features.fit_transform(x)

  model = linear_model.LinearRegression()
  model.fit(x_poly, y)
  y_poly_pred = model.predict(x_poly)

  rmse = np.sqrt(mean_squared_error(y, y_poly_pred))
  r2 = r2_score(y, y_poly_pred)
  return rmse, r2

  wellness = pd.read_csv("../data_preparation/cleaned/time_series_normalized_wellness_menstruation.csv")

  wellness = wellness.fillna(0)
  x = wellness[['normSoreness', 'TimeSinceAugFirst']]
  y = wellness['normFatigue']
  print(wellness.isnull().sum())
 def main():
  player = pd.read_csv("../data_preparation/cleaned/personal.csv", index_col=0)
  player = player[player['playerID'] == 1]
  x = player[['fatigueNorm', 'day']]
  y = player['sorenessNorm']
  print(standard_lr(x, y))
  print(poly_regression(x, y, 5))


 if __name__ == "__main__":