nglod33 5 years ago
parent
commit
9a907b5460
4 changed files with 6157 additions and 11 deletions
  1. +6036
    -0
      data_preparation/cleaned/personal.csv
  2. +94
    -2
      hypotheses_modeling/KerasRegressions.py
  3. +7
    -0
      hypotheses_modeling/hypotheses.txt
  4. +20
    -9
      hypotheses_modeling/team_regressions.py

+ 6036
- 0
data_preparation/cleaned/personal.csv
File diff suppressed because it is too large
View File


+ 94
- 2
hypotheses_modeling/KerasRegressions.py View File

@ -49,7 +49,7 @@ def time_series_sigmoid_classification(X, Y, k, n0, x_columns, y_columns):
return model.get_weights()
def time_series_sigmoid_classification(X, Y, k, n0, x_columns, y_columns):
def time_series_dnn_classification(X, Y, k, n0, x_columns, y_columns):
inp = X[x_columns]
out = Y[y_columns]
col = "day"
@ -86,7 +86,7 @@ def time_series_sigmoid_classification(X, Y, k, n0, x_columns, y_columns):
y = np.array(y)
model = tf.keras.Sequential([
tf.keras.layers.Flatten(input_shape=input_shape),
tf.keras.layers.Dense(32),
tf.keras.layers.Dense(32, activation=tf.nn.softmax),
tf.keras.layers.Dense(output_shape, activation=tf.nn.softmax)
])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy', 'categorical_accuracy'])
@ -94,3 +94,95 @@ def time_series_sigmoid_classification(X, Y, k, n0, x_columns, y_columns):
loss, accuracy = model.evaluate(x, y)
print(loss, accuracy)
return model.get_weights()
def time_series_linear_regression(X, Y, k, n0, x_columns, y_columns):
inp = X[x_columns]
out = Y[y_columns]
col = "day"
x = []
y = []
input_shape = 0
output_shape = 0
for player in Y["playerID"].unique():
XPlayer = inp[inp["playerID"] == player]
YPlayer = out[out["playerID"] == player]
for day in YPlayer[col][n0 - 1:]:
prev = day - k
xprev = XPlayer[XPlayer[col] == prev].drop(columns=[col]).to_numpy()
if xprev.shape[0] != 1:
continue
else:
xprev = xprev[0, :]
yt = YPlayer[YPlayer[col] == day].drop(columns=[col]).to_numpy()[0, :]
if input_shape == 0:
input_shape = xprev.shape[0]
else:
if input_shape != xprev.shape[0]:
print("INCONSISTENT INPUT DIMENSION")
exit(2)
if input_shape == 0:
output_shape = yt.shape[0]
else:
if output_shape != yt.shape[0]:
print("INCONSISTENT OUTPUT DIMENSION")
exit(2)
x.append(xprev)
y.append(yt)
x = np.array(x)
y = np.array(y)
model = tf.keras.Sequential([
tf.keras.layers.Flatten(input_shape=input_shape),
tf.keras.layers.Dense(output_shape)
])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy', 'categorical_accuracy'])
model.fit(x, y, epochs=100)
loss, accuracy = model.evaluate(x, y)
print(loss, accuracy)
return model.get_weights()
def time_series_dnn_regressions(X, Y, k, n0, x_columns, y_columns):
inp = X[x_columns]
out = Y[y_columns]
col = "day"
x = []
y = []
input_shape = 0
output_shape = 0
for player in Y["playerID"].unique():
XPlayer = inp[inp["playerID"] == player]
YPlayer = out[out["playerID"] == player]
for day in YPlayer[col][n0 - 1:]:
prev = day - k
xprev = XPlayer[XPlayer[col] == prev].drop(columns=[col]).to_numpy()
if xprev.shape[0] != 1:
continue
else:
xprev = xprev[0, :]
yt = YPlayer[YPlayer[col] == day].drop(columns=[col]).to_numpy()[0, :]
if input_shape == 0:
input_shape = xprev.shape[0]
else:
if input_shape != xprev.shape[0]:
print("INCONSISTENT INPUT DIMENSION")
exit(2)
if input_shape == 0:
output_shape = yt.shape[0]
else:
if output_shape != yt.shape[0]:
print("INCONSISTENT OUTPUT DIMENSION")
exit(2)
x.append(xprev)
y.append(yt)
x = np.array(x)
y = np.array(y)
model = tf.keras.Sequential([
tf.keras.layers.Flatten(input_shape=input_shape),
tf.keras.layers.Dense(32, activation=tf.nn.softmax),
tf.keras.layers.Dense(output_shape)
])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy', 'categorical_accuracy'])
model.fit(x, y, epochs=100)
loss, accuracy = model.evaluate(x, y)
print(loss, accuracy)
return model.get_weights()

+ 7
- 0
hypotheses_modeling/hypotheses.txt View File

@ -21,8 +21,15 @@ Team:
5.
Individual:
Player 1 - fatigue + day / soreness
lr - 0.24677741789096985
pr - 0.32119826926167405
Perry:
7 day moving average team workload - normalized team fatigue: 0.0006
21 day moving average team workload - normalized team fatigue: 0.0024
normalized team fatigue - game day performance: 0.0696
normalized team fatigue - paper smoothed workload fatigue: 0.0324

+ 20
- 9
hypotheses_modeling/team_regressions.py View File

@ -1,4 +1,6 @@
from sklearn import linear_model
from sklearn.preprocessing import PolynomialFeatures
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, r2_score
@ -34,6 +36,7 @@ def k_days_into_future_regression(X, y, k, n0):
def standard_lr(x, y):
# Standard linear regression formula, gives back params and r2
regr = linear_model.LinearRegression()
regr.fit(x, y)
predictions = regr.predict(x)
@ -42,19 +45,27 @@ def standard_lr(x, y):
return regr.intercept_, regr.coef_, r2, mse
def main():
# fatigueSums = pd.read_csv("fatigue_total_sum.csv")
# workMovingAverage21 = pd.read_csv("21DaySlidingWorkAverage.csv", index_col=0)
# print(k_days_into_future_regression(workMovingAverage21, fatigueSums, 0, 21))
def poly_regression(x, y, degree):
# Polynomial regression with nth degree, gives back rmse and r2
polynomial_features = PolynomialFeatures(degree=degree)
x_poly = polynomial_features.fit_transform(x)
model = linear_model.LinearRegression()
model.fit(x_poly, y)
y_poly_pred = model.predict(x_poly)
rmse = np.sqrt(mean_squared_error(y, y_poly_pred))
r2 = r2_score(y, y_poly_pred)
return rmse, r2
wellness = pd.read_csv("../data_preparation/cleaned/time_series_normalized_wellness_menstruation.csv")
wellness = wellness.fillna(0)
x = wellness[['normSoreness', 'TimeSinceAugFirst']]
y = wellness['normFatigue']
print(wellness.isnull().sum())
def main():
player = pd.read_csv("../data_preparation/cleaned/personal.csv", index_col=0)
player = player[player['playerID'] == 1]
x = player[['fatigueNorm', 'day']]
y = player['sorenessNorm']
print(standard_lr(x, y))
print(poly_regression(x, y, 5))
if __name__ == "__main__":

Loading…
Cancel
Save