diff --git a/.~lock.DataFest 2019 - Codebook.xlsx# b/.~lock.DataFest 2019 - Codebook.xlsx# new file mode 100644 index 0000000..8af3a3a --- /dev/null +++ b/.~lock.DataFest 2019 - Codebook.xlsx# @@ -0,0 +1 @@ +,pxd256,null,31.03.2019 08:16,file:///home/pxd256/.config/libreoffice/4; \ No newline at end of file diff --git a/hypotheses_modeling/KerasRegressions.py b/hypotheses_modeling/KerasRegressions.py index 4ab364d..4fe6974 100644 --- a/hypotheses_modeling/KerasRegressions.py +++ b/hypotheses_modeling/KerasRegressions.py @@ -144,14 +144,14 @@ def time_series_linear_regression(dataset, k, n0, x_columns, y_columns): tf.keras.layers.Flatten(input_shape=[input_shape]), tf.keras.layers.Dense(output_shape) ]) - model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy']) - model.fit(x, y, epochs=50) - loss, _ = model.evaluate(x, y) - print(loss) + model.compile(optimizer='adam', loss='mean_squared_error', metrics=[tf.keras.metrics.mean_squared_error]) + model.fit(x, y, epochs=50, verbose=2) pred = model.predict(x) r2 = r2_(y, pred) - print(r2) - return model.get_weights() + hard_pred = model.predict(x[0]) + hard_out = np.matmul(x[0], [0.03589787, -0.03472298, 0.24109702, -0.10143519]) - 0.890594 + print(hard_pred, hard_out, y[0]) + return r2, model.get_weights() def time_series_dnn_regressions(dataset, k, n0, x_columns, y_columns): @@ -194,24 +194,26 @@ def time_series_dnn_regressions(dataset, k, n0, x_columns, y_columns): tf.keras.layers.Dense(32, activation=tf.nn.softmax), tf.keras.layers.Dense(output_shape) ]) - model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy']) - model.fit(x, y, epochs=100, verbose=0) + model.compile(optimizer='adam', loss='mean_squared_error', metrics=[tf.keras.metrics.mean_squared_error]) + model.fit(x, y, epochs=100, verbose=2) loss, accuracy = model.evaluate(x, y) - print(loss, accuracy) pred = model.predict(x) r2 = r2_(y, pred) - print(r2) - return model.get_weights() + return r2, model.get_weights() def main(): filename = "personal.csv" df = pd.read_csv(filename) - x = ["day", "playerID", "DailyLoadSliding", "sleepQuality"] + x = ["day", "playerID", "sleepHoursSliding", "sleepHours", "sleepQuality", "acuteChronicRatio"] y = ["day", "playerID", "fatigueNorm"] k = 0 n0 = 30 - weights = time_series_linear_regression(df, k, n0, x, y) + r2, weights = time_series_linear_regression(df, k, n0, x, y) + print("r2") + print(r2) + print("weights") + print(weights) if __name__ == "__main__": diff --git a/hypotheses_modeling/model.ckpt b/hypotheses_modeling/model.ckpt index 7ab3040..0668d28 100644 Binary files a/hypotheses_modeling/model.ckpt and b/hypotheses_modeling/model.ckpt differ diff --git a/hypotheses_modeling/out.txt b/hypotheses_modeling/out.txt new file mode 100644 index 0000000..37d9d08 --- /dev/null +++ b/hypotheses_modeling/out.txt @@ -0,0 +1,8 @@ +Epoch 1/50, mean_squared_error: 90.4998 +Epoch 11/50, mean_squared_error: 1.0265 +Epoch 21/50, mean_squared_error: 0.9604 +Epoch 31/50, mean_squared_error: 0.8671 +Epoch 41/50, mean_squared_error: 0.7838 +r2, 0.07949744624446509 +slopes, 0.03589787,-0.03472298, 0.24109702, -0.10143519 +intercept, -0.8960594 diff --git a/hypotheses_modeling/pytorch_shit.py b/hypotheses_modeling/pytorch_shit.py index 483c9bc..08e604e 100644 --- a/hypotheses_modeling/pytorch_shit.py +++ b/hypotheses_modeling/pytorch_shit.py @@ -3,6 +3,9 @@ import torch.nn as nn import torch.optim as optim import numpy as np import pandas as pd +from sklearn.utils.multiclass import unique_labels +from matplotlib import pyplot as plt +from sklearn.metrics import * class Net(nn.Module): @@ -24,10 +27,53 @@ def get_argmax(array): max = array[i] index = i - return [index] - - -def get_trainset(dataset, k, n0, x_columns, y_columns): + one_hot = [0, 0, 0, 0] + one_hot[index] = 1 + return one_hot + + +def plot_confusion_matrix(y_true, y_pred, classes, cmap=plt.cm.Blues): + """ + This function prints and plots the confusion matrix. + Normalization can be applied by setting `normalize=True`. + """ + title = "Confusion Matrix" + + # Compute confusion matrix + cm = confusion_matrix(y_true, y_pred) + # Only use the labels that appear in the data + classes = classes[unique_labels(y_true, y_pred)] + print(cm) + + fig, ax = plt.subplots() + im = ax.imshow(cm, interpolation='nearest', cmap=cmap) + ax.figure.colorbar(im, ax=ax) + # We want to show all ticks... + ax.set(xticks=np.arange(cm.shape[1]), + yticks=np.arange(cm.shape[0]), + # ... and label them with the respective list entries + xticklabels=classes, yticklabels=classes, + title=title, + ylabel='True label', + xlabel='Predicted label') + + # Rotate the tick labels and set their alignment. + plt.setp(ax.get_xticklabels(), rotation=45, ha="right", + rotation_mode="anchor") + + # Loop over data dimensions and create text annotations. + fmt = '.2f' if normalize else 'd' + thresh = cm.max() / 2. + for i in range(cm.shape[0]): + for j in range(cm.shape[1]): + ax.text(j, i, format(cm[i, j], fmt), + ha="center", va="center", + color="white" if cm[i, j] > thresh else "black") + fig.tight_layout() + return ax + + +def get_trainset(batch_size, dataset, k, n0, x_columns, y_columns): inp = dataset[x_columns] out = dataset[y_columns] col = "day" @@ -61,34 +107,43 @@ def get_trainset(dataset, k, n0, x_columns, y_columns): x.append(xprev) y.append(yt) + randn_1 = np.random.randint(1, 5200) x = torch.FloatTensor(x) y = torch.LongTensor(y) + if batch_size: + x = x.narrow(0, randn_1, 125) + y = y.narrow(0, randn_1, 125) return x, y -def time_series_sigmoid_classification(steps, dataset, k, n0, x_columns, y_columns): - net = Net(1) - optimizer = optim.Adam(net.parameters(), lr=.001) - loss = nn.CrossEntropyLoss() +def time_series_sigmoid_classification(steps, dataset, k, n0, x_columns, y_columns, labels): + net = Net(4) + optimizer = optim.Adam(net.parameters(), lr=.03) + loss = nn.CrossEntropyLoss() - for step in range(steps): - optimizer.zero_grad() + x, y = get_trainset(False, dataset, k, n0, x_columns, y_columns) + accuracy(net, x, y) - x, y = get_trainset(dataset, k, n0, x_columns, y_columns) - pred = net(x) - net_loss = loss(pred, torch.max(y, 1)[1]) - net_loss.backward() - optimizer.step() + for step in range(steps): + optimizer.zero_grad() - print("Loss at Step {}: {}".format(step, net_loss)) + x, y = get_trainset(True, dataset, k, n0, x_columns, y_columns) + pred = net(x) + net_loss = loss(pred, torch.max(y, 1)[1]) + net_loss.backward() + optimizer.step() - x, y = get_trainset(dataset, k, n0, x_columns, y_columns) - accuracy(net, x, y) + print("Loss at Step {}: {}".format(step, net_loss)) + + x, y = get_trainset(False, dataset, k, n0, x_columns, y_columns) + accuracy(net, x, y) def accuracy(net, x, y): pred = net(x) pred = pred.detach().numpy() + for row in range(len(pred)): + pred[row] = get_argmax(pred[row]) total = len(pred) correct = 0 @@ -102,15 +157,32 @@ def accuracy(net, x, y): accuracy = (correct / total) * 100 print("Accuracy for set: {}%".format(accuracy)) - torch.save(net, "model.ckpt") + torch.save(net, "model_higher_lr.ckpt") + return pred, y + + +def cm_plot(classes, dataset, k, n0, x_columns, y_columns): + model = torch.load('model.ckpt') + x, y = get_trainset(True, dataset, k, n0, x_columns, y_columns) + pred = model(x) + pred = pred.detach().numpy() + for row in range(len(pred)): + pred[row] = get_argmax(pred[row]) + + print('F1: {}'.format(f1_score(y, pred > .5, average='micro'))) + plot_confusion_matrix(y, pred, classes) def main(): - filename = "personal.csv" - df = pd.read_csv(filename) - x = ["day", "playerID", "fatigueSliding"] - y = ["day", "playerID", "BestOutOfMyselfAbsolutely", "BestOutOfMyselfSomewhat", "BestOutOfMyselfNotAtAll", "BestOutOfMyselfUnknown"] - time_series_sigmoid_classification(100, df, 0, 30, x, y) + filename = "personal.csv" + df = pd.read_csv(filename) + x = ["day", "playerID", "fatigueSliding", "fatigueNorm", "sleepHoursSliding", "sleepQuality"] + y = ["day", "playerID", "BestOutOfMyselfAbsolutely", "BestOutOfMyselfSomewhat", "BestOutOfMyselfNotAtAll", + "BestOutOfMyselfUnknown"] + # time_series_sigmoid_classification(50, df, 0, 30, x, y, y) + cm_plot( + ["BestOutOfMyselfAbsolutely", "BestOutOfMyselfSomewhat", "BestOutOfMyselfNotAtAll", "BestOutOfMyselfUnknown"], + df, 0, 30, x, y) if __name__ == '__main__':