nglod33 5 years ago
parent
commit
ba734fca38
2 changed files with 6107 additions and 41 deletions
  1. +71
    -41
      hypotheses_modeling/KerasRegressions.py
  2. +6036
    -0
      hypotheses_modeling/personal.csv

+ 71
- 41
hypotheses_modeling/KerasRegressions.py View File

@ -1,34 +1,42 @@
import tensorflow as tf import tensorflow as tf
import pandas as pd import pandas as pd
import numpy as np import numpy as np
from sklearn.metrics import r2_score
def time_series_sigmoid_classification(X, Y, k, n0, x_columns, y_columns):
inp = X[x_columns]
out = Y[y_columns]
def r2_(y, pred):
ybar = np.sum(y) / len(y)
ssreg = np.sum((pred - ybar)**2)
sstot = np.sum((y - ybar)**2)
return ssreg/sstot
def time_series_sigmoid_classification(dataset, k, n0, x_columns, y_columns):
inp = dataset[x_columns]
out = dataset[y_columns]
col = "day" col = "day"
x = [] x = []
y = [] y = []
input_shape = 0 input_shape = 0
output_shape = 0 output_shape = 0
for player in Y["playerID"].unique():
for player in out["playerID"].unique():
XPlayer = inp[inp["playerID"] == player] XPlayer = inp[inp["playerID"] == player]
YPlayer = out[out["playerID"] == player] YPlayer = out[out["playerID"] == player]
for day in YPlayer[col][n0 - 1:]: for day in YPlayer[col][n0 - 1:]:
prev = day - k prev = day - k
xprev = XPlayer[XPlayer[col] == prev].drop(columns=[col]).to_numpy()
xprev = XPlayer[XPlayer[col] == prev].drop(columns=[col, "playerID"]).to_numpy()
if xprev.shape[0] != 1: if xprev.shape[0] != 1:
continue continue
else: else:
xprev = xprev[0, :] xprev = xprev[0, :]
yt = YPlayer[YPlayer[col] == day].drop(columns=[col]).to_numpy()[0, :]
yt = YPlayer[YPlayer[col] == day].drop(columns=[col, "playerID"]).to_numpy()[0, :]
if input_shape == 0: if input_shape == 0:
input_shape = xprev.shape[0] input_shape = xprev.shape[0]
else: else:
if input_shape != xprev.shape[0]: if input_shape != xprev.shape[0]:
print("INCONSISTENT INPUT DIMENSION") print("INCONSISTENT INPUT DIMENSION")
exit(2) exit(2)
if input_shape == 0:
if output_shape == 0:
output_shape = yt.shape[0] output_shape = yt.shape[0]
else: else:
if output_shape != yt.shape[0]: if output_shape != yt.shape[0]:
@ -39,42 +47,42 @@ def time_series_sigmoid_classification(X, Y, k, n0, x_columns, y_columns):
x = np.array(x) x = np.array(x)
y = np.array(y) y = np.array(y)
model = tf.keras.Sequential([ model = tf.keras.Sequential([
tf.keras.layers.Flatten(input_shape=input_shape),
tf.keras.layers.Flatten(input_shape=[input_shape]),
tf.keras.layers.Dense(output_shape, activation=tf.nn.softmax) tf.keras.layers.Dense(output_shape, activation=tf.nn.softmax)
]) ])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy', 'categorical_accuracy']) model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy', 'categorical_accuracy'])
model.fit(x, y, epochs=100)
model.fit(x, y, epochs=50)
loss, accuracy = model.evaluate(x, y) loss, accuracy = model.evaluate(x, y)
print(loss, accuracy) print(loss, accuracy)
return model.get_weights() return model.get_weights()
def time_series_dnn_classification(X, Y, k, n0, x_columns, y_columns):
inp = X[x_columns]
out = Y[y_columns]
def time_series_dnn_classification(dataset, k, n0, x_columns, y_columns):
inp = dataset[x_columns]
out = dataset[y_columns]
col = "day" col = "day"
x = [] x = []
y = [] y = []
input_shape = 0 input_shape = 0
output_shape = 0 output_shape = 0
for player in Y["playerID"].unique():
for player in out["playerID"].unique():
XPlayer = inp[inp["playerID"] == player] XPlayer = inp[inp["playerID"] == player]
YPlayer = out[out["playerID"] == player] YPlayer = out[out["playerID"] == player]
for day in YPlayer[col][n0 - 1:]: for day in YPlayer[col][n0 - 1:]:
prev = day - k prev = day - k
xprev = XPlayer[XPlayer[col] == prev].drop(columns=[col]).to_numpy()
xprev = XPlayer[XPlayer[col] == prev].drop(columns=[col, "playerID"]).to_numpy()
if xprev.shape[0] != 1: if xprev.shape[0] != 1:
continue continue
else: else:
xprev = xprev[0, :] xprev = xprev[0, :]
yt = YPlayer[YPlayer[col] == day].drop(columns=[col]).to_numpy()[0, :]
yt = YPlayer[YPlayer[col] == day].drop(columns=[col, "playerID"]).to_numpy()[0, :]
if input_shape == 0: if input_shape == 0:
input_shape = xprev.shape[0] input_shape = xprev.shape[0]
else: else:
if input_shape != xprev.shape[0]: if input_shape != xprev.shape[0]:
print("INCONSISTENT INPUT DIMENSION") print("INCONSISTENT INPUT DIMENSION")
exit(2) exit(2)
if input_shape == 0:
if output_shape == 0:
output_shape = yt.shape[0] output_shape = yt.shape[0]
else: else:
if output_shape != yt.shape[0]: if output_shape != yt.shape[0]:
@ -85,42 +93,44 @@ def time_series_dnn_classification(X, Y, k, n0, x_columns, y_columns):
x = np.array(x) x = np.array(x)
y = np.array(y) y = np.array(y)
model = tf.keras.Sequential([ model = tf.keras.Sequential([
tf.keras.layers.Flatten(input_shape=input_shape),
tf.keras.layers.Dense(32, activation=tf.nn.softmax),
tf.keras.layers.Dense(32, input_dim=input_shape,activation=tf.nn.softmax),
tf.keras.layers.Dense(output_shape, activation=tf.nn.softmax) tf.keras.layers.Dense(output_shape, activation=tf.nn.softmax)
]) ])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy', 'categorical_accuracy']) model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy', 'categorical_accuracy'])
model.fit(x, y, epochs=100)
print(output_shape)
model.fit(x, y, epochs=50)
loss, accuracy = model.evaluate(x, y) loss, accuracy = model.evaluate(x, y)
print(x.shape)
print(y.shape)
print(loss, accuracy) print(loss, accuracy)
return model.get_weights() return model.get_weights()
def time_series_linear_regression(X, Y, k, n0, x_columns, y_columns):
inp = X[x_columns]
out = Y[y_columns]
def time_series_linear_regression(dataset, k, n0, x_columns, y_columns):
inp = dataset[x_columns]
out = dataset[y_columns]
col = "day" col = "day"
x = [] x = []
y = [] y = []
input_shape = 0 input_shape = 0
output_shape = 0 output_shape = 0
for player in Y["playerID"].unique():
for player in out["playerID"].unique():
XPlayer = inp[inp["playerID"] == player] XPlayer = inp[inp["playerID"] == player]
YPlayer = out[out["playerID"] == player] YPlayer = out[out["playerID"] == player]
for day in YPlayer[col][n0 - 1:]: for day in YPlayer[col][n0 - 1:]:
prev = day - k prev = day - k
xprev = XPlayer[XPlayer[col] == prev].drop(columns=[col]).to_numpy()
xprev = XPlayer[XPlayer[col] == prev].drop(columns=[col, "playerID"]).to_numpy()
if xprev.shape[0] != 1: if xprev.shape[0] != 1:
continue continue
else: else:
xprev = xprev[0, :] xprev = xprev[0, :]
yt = YPlayer[YPlayer[col] == day].drop(columns=[col]).to_numpy()[0, :]
yt = YPlayer[YPlayer[col] == day].drop(columns=[col, "playerID"]).to_numpy()[0, :]
if input_shape == 0: if input_shape == 0:
input_shape = xprev.shape[0] input_shape = xprev.shape[0]
else: else:
if input_shape != xprev.shape[0]: if input_shape != xprev.shape[0]:
print("INCONSISTENT INPUT DIMENSION") print("INCONSISTENT INPUT DIMENSION")
exit(2) exit(2)
if input_shape == 0:
if output_shape == 0:
output_shape = yt.shape[0] output_shape = yt.shape[0]
else: else:
if output_shape != yt.shape[0]: if output_shape != yt.shape[0]:
@ -131,42 +141,45 @@ def time_series_linear_regression(X, Y, k, n0, x_columns, y_columns):
x = np.array(x) x = np.array(x)
y = np.array(y) y = np.array(y)
model = tf.keras.Sequential([ model = tf.keras.Sequential([
tf.keras.layers.Flatten(input_shape=input_shape),
tf.keras.layers.Flatten(input_shape=[input_shape]),
tf.keras.layers.Dense(output_shape) tf.keras.layers.Dense(output_shape)
]) ])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy', 'categorical_accuracy'])
model.fit(x, y, epochs=100)
loss, accuracy = model.evaluate(x, y)
print(loss, accuracy)
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])
model.fit(x, y, epochs=50)
loss, _ = model.evaluate(x, y)
print(loss)
pred = model.predict(x)
r2 = r2_(y, pred)
print(r2)
return model.get_weights() return model.get_weights()
def time_series_dnn_regressions(X, Y, k, n0, x_columns, y_columns):
inp = X[x_columns]
out = Y[y_columns]
def time_series_dnn_regressions(dataset, k, n0, x_columns, y_columns):
inp = dataset[x_columns]
out = dataset[y_columns]
col = "day" col = "day"
x = [] x = []
y = [] y = []
input_shape = 0 input_shape = 0
output_shape = 0 output_shape = 0
for player in Y["playerID"].unique():
for player in out["playerID"].unique():
XPlayer = inp[inp["playerID"] == player] XPlayer = inp[inp["playerID"] == player]
YPlayer = out[out["playerID"] == player] YPlayer = out[out["playerID"] == player]
for day in YPlayer[col][n0 - 1:]: for day in YPlayer[col][n0 - 1:]:
prev = day - k prev = day - k
xprev = XPlayer[XPlayer[col] == prev].drop(columns=[col]).to_numpy()
xprev = XPlayer[XPlayer[col] == prev].drop(columns=[col, "playerID"]).to_numpy()
if xprev.shape[0] != 1: if xprev.shape[0] != 1:
continue continue
else: else:
xprev = xprev[0, :] xprev = xprev[0, :]
yt = YPlayer[YPlayer[col] == day].drop(columns=[col]).to_numpy()[0, :]
yt = YPlayer[YPlayer[col] == day].drop(columns=[col, "playerID"]).to_numpy()[0, :]
if input_shape == 0: if input_shape == 0:
input_shape = xprev.shape[0] input_shape = xprev.shape[0]
else: else:
if input_shape != xprev.shape[0]: if input_shape != xprev.shape[0]:
print("INCONSISTENT INPUT DIMENSION") print("INCONSISTENT INPUT DIMENSION")
exit(2) exit(2)
if input_shape == 0:
if output_shape == 0:
output_shape = yt.shape[0] output_shape = yt.shape[0]
else: else:
if output_shape != yt.shape[0]: if output_shape != yt.shape[0]:
@ -177,12 +190,29 @@ def time_series_dnn_regressions(X, Y, k, n0, x_columns, y_columns):
x = np.array(x) x = np.array(x)
y = np.array(y) y = np.array(y)
model = tf.keras.Sequential([ model = tf.keras.Sequential([
tf.keras.layers.Flatten(input_shape=input_shape),
tf.keras.layers.Flatten(input_shape=[input_shape]),
tf.keras.layers.Dense(32, activation=tf.nn.softmax), tf.keras.layers.Dense(32, activation=tf.nn.softmax),
tf.keras.layers.Dense(output_shape) tf.keras.layers.Dense(output_shape)
]) ])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy', 'categorical_accuracy'])
model.fit(x, y, epochs=100)
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])
model.fit(x, y, epochs=50)
loss, accuracy = model.evaluate(x, y) loss, accuracy = model.evaluate(x, y)
print(loss, accuracy) print(loss, accuracy)
pred = model.predict(x)
r2 = r2_(y, pred)
print(r2)
return model.get_weights() return model.get_weights()
def main():
filename = "personal.csv"
df = pd.read_csv(filename)
x = ["day", "playerID", "fatigueSliding"]
y = ["day", "playerID", "BestOutOfMyselfAbsolutely", "BestOutOfMyselfSomewhat", "BestOutOfMyselfNotAtAll", "BestOutOfMyselfUnknown"]
k = 0
n0 = 30
weights = time_series_dnn_classification(df, k, n0, x, y)
if __name__ == "__main__":
main()

+ 6036
- 0
hypotheses_modeling/personal.csv
File diff suppressed because it is too large
View File


Loading…
Cancel
Save