diff --git a/data_preparation/cleaned/time_series_notnormalized_with_0Nan_rpe.csv b/data_preparation/cleaned/time_series_notnormalized_with_0Nan_rpe.csv index c2a075e..801ada8 100644 --- a/data_preparation/cleaned/time_series_notnormalized_with_0Nan_rpe.csv +++ b/data_preparation/cleaned/time_series_notnormalized_with_0Nan_rpe.csv @@ -36,8 +36,8 @@ 34,34,34,2018-07-17,4,1,Skills,60.0,5.0,300.0,300.0,107.1,309.11,0.35,7.0,8.0,,0,0,1,0,0,0,0,0,0,0,0,1,350 35,35,35,2018-07-17,5,1,Skills,60.0,5.0,300.0,300.0,42.9,100.71,0.43,9.0,9.0,,0,0,1,0,0,0,0,0,0,0,0,1,350 36,36,36,2018-07-17,6,1,Speed,30.0,3.0,90.0,370.0,52.9,333.5,0.16,0.0,0.0,,0,0,0,0,0,0,1,0,0,0,0,1,350 -37,37,37,2018-07-17,6,1,Conditioning,35.0,8.0,280.0,0.0,0.0,0.0,0.0,0.0,0.0,,0,0,0,1,0,0,0,0,0,0,0,1,350 38,38,38,2018-07-17,7,1,Skills,75.0,6.0,450.0,450.0,205.7,401.79,0.51,0.0,0.0,,0,0,1,0,0,0,0,0,0,0,0,1,350 +37,37,37,2018-07-17,6,1,Conditioning,35.0,8.0,280.0,0.0,0.0,0.0,0.0,0.0,0.0,,0,0,0,1,0,0,0,0,0,0,0,1,350 39,39,39,2018-07-17,10,1,Skills,60.0,4.0,240.0,240.0,79.3,298.57,0.27,0.0,0.0,,0,0,1,0,0,0,0,0,0,0,0,1,350 40,40,40,2018-07-17,11,1,Skills,90.0,5.0,450.0,450.0,210.0,391.36,0.54,9.0,9.0,Absolutely,0,0,1,0,0,0,0,0,0,1,0,0,350 41,41,41,2018-07-17,13,1,Skills,90.0,6.0,540.0,540.0,268.6,309.25,0.87,0.0,0.0,Not at all,0,0,1,0,0,0,0,0,1,0,0,0,350 diff --git a/hypotheses_modeling/KerasRegressions.py b/hypotheses_modeling/KerasRegressions.py new file mode 100644 index 0000000..130c43a --- /dev/null +++ b/hypotheses_modeling/KerasRegressions.py @@ -0,0 +1,96 @@ +import tensorflow as tf +import pandas as pd +import numpy as np + + +def time_series_sigmoid_classification(X, Y, k, n0, x_columns, y_columns): + inp = X[x_columns] + out = Y[y_columns] + col = "day" + x = [] + y = [] + input_shape = 0 + output_shape = 0 + for player in Y["playerID"].unique(): + XPlayer = inp[inp["playerID"] == player] + YPlayer = out[out["playerID"] == player] + for day in YPlayer[col][n0 - 1:]: + prev = day - k + xprev = XPlayer[XPlayer[col] == prev].drop(columns=[col]).to_numpy() + if xprev.shape[0] != 1: + continue + else: + xprev = xprev[0, :] + yt = YPlayer[YPlayer[col] == day].drop(columns=[col]).to_numpy()[0, :] + if input_shape == 0: + input_shape = xprev.shape[0] + else: + if input_shape != xprev.shape[0]: + print("INCONSISTENT INPUT DIMENSION") + exit(2) + if input_shape == 0: + output_shape = yt.shape[0] + else: + if output_shape != yt.shape[0]: + print("INCONSISTENT OUTPUT DIMENSION") + exit(2) + x.append(xprev) + y.append(yt) + x = np.array(x) + y = np.array(y) + model = tf.keras.Sequential([ + tf.keras.layers.Flatten(input_shape=input_shape), + tf.keras.layers.Dense(output_shape, activation=tf.nn.softmax) + ]) + model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy', 'categorical_accuracy']) + model.fit(x, y, epochs=100) + loss, accuracy = model.evaluate(x, y) + print(loss, accuracy) + return model.get_weights() + + +def time_series_sigmoid_classification(X, Y, k, n0, x_columns, y_columns): + inp = X[x_columns] + out = Y[y_columns] + col = "day" + x = [] + y = [] + input_shape = 0 + output_shape = 0 + for player in Y["playerID"].unique(): + XPlayer = inp[inp["playerID"] == player] + YPlayer = out[out["playerID"] == player] + for day in YPlayer[col][n0 - 1:]: + prev = day - k + xprev = XPlayer[XPlayer[col] == prev].drop(columns=[col]).to_numpy() + if xprev.shape[0] != 1: + continue + else: + xprev = xprev[0, :] + yt = YPlayer[YPlayer[col] == day].drop(columns=[col]).to_numpy()[0, :] + if input_shape == 0: + input_shape = xprev.shape[0] + else: + if input_shape != xprev.shape[0]: + print("INCONSISTENT INPUT DIMENSION") + exit(2) + if input_shape == 0: + output_shape = yt.shape[0] + else: + if output_shape != yt.shape[0]: + print("INCONSISTENT OUTPUT DIMENSION") + exit(2) + x.append(xprev) + y.append(yt) + x = np.array(x) + y = np.array(y) + model = tf.keras.Sequential([ + tf.keras.layers.Flatten(input_shape=input_shape), + tf.keras.layers.Dense(32), + tf.keras.layers.Dense(output_shape, activation=tf.nn.softmax) + ]) + model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy', 'categorical_accuracy']) + model.fit(x, y, epochs=100) + loss, accuracy = model.evaluate(x, y) + print(loss, accuracy) + return model.get_weights()