From 9e76b6c16018bb8f53995226e8fc0985b6455929 Mon Sep 17 00:00:00 2001 From: Ryan Missel Date: Sun, 31 Mar 2019 09:38:09 -0400 Subject: [PATCH 1/3] Auto stash before merge of "master" and "origin/master" --- hypotheses_modeling/model.ckpt | Bin 4443 -> 4443 bytes hypotheses_modeling/pytorch_shit.py | 11 +++++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/hypotheses_modeling/model.ckpt b/hypotheses_modeling/model.ckpt index 7ab304026c704dccd1c109030959810b233c7987..0668d2848c24e82ddd181f6d85d0cb2347562871 100644 GIT binary patch delta 398 zcmcbubX#e|TV7rxLqk&|b4v>oQ$quj$-j8R5h5n0Mi!If_^u;FEDS6RCOh%F@xXN% znHWxko}@< z8Fq6jbM~91aoT@%H@82rd!k*5TCn|0OHDfksDXEdv-i!J)^2yyf2Cbgc%0oTrJs9E zo!M*`xF*|0-LKf=`$J-1(E;UsEv=mUK8xnr`Mi~~yS9kkE@^tl-nXl!?0aDUa1v;y}y38Aq1xE19`^&*N}?J4V$-w$C3P+xux(`@S0rS+*ya PEw*(`-@JG4A+CJ@TuOzh delta 398 zcmcbubX#e|TV7scLlYANBQs+YV-o|5$-j8R5h5lgW+s#4_^u;FOwA3ACOh%F@xXPN zn3+zV!rxU77XTSk$QZ%Pz`y_(0~u7vgeGQYYF5aMDh4vXkR?@*1E?7U95T}P^SU|g z_qkJQUvQmgf70Vywz}^Z?rWW%y3eC=s~t?;?bY>a`<1y__emu4*q#1hvcGQ0ntf9z zHQK+{f3`20lfjM!VjhEC>b`vy59Rju^>5wRw2jYB0V;k!g<;>~{SNy)Ok(XOZgAUY z>|V03RcVcNtxAR6OCza$4W~|8-@co@&x1?f`kTSeJ&hI@ZCBpv*jJ(@wYT&C>U|aV z8$0&NGoRgO^XSaJyYkI;IvMx(-2Z8?Z-1bbUGtQq``GI5?>*%{(RR`=W4ov+q5FIr TnD_azxY&NKIA!~I_bNL8p$?1+ diff --git a/hypotheses_modeling/pytorch_shit.py b/hypotheses_modeling/pytorch_shit.py index 483c9bc..413aeed 100644 --- a/hypotheses_modeling/pytorch_shit.py +++ b/hypotheses_modeling/pytorch_shit.py @@ -24,7 +24,9 @@ def get_argmax(array): max = array[i] index = i - return [index] + one_hot = [0,0,0,0] + one_hot[index] = 1 + return one_hot def get_trainset(dataset, k, n0, x_columns, y_columns): @@ -71,6 +73,9 @@ def time_series_sigmoid_classification(steps, dataset, k, n0, x_columns, y_colum optimizer = optim.Adam(net.parameters(), lr=.001) loss = nn.CrossEntropyLoss() + x, y = get_trainset(dataset, k, n0, x_columns, y_columns) + accuracy(net, x, y) + for step in range(steps): optimizer.zero_grad() @@ -89,6 +94,8 @@ def time_series_sigmoid_classification(steps, dataset, k, n0, x_columns, y_colum def accuracy(net, x, y): pred = net(x) pred = pred.detach().numpy() + for row in range(len(pred)): + pred[row] = get_argmax(pred[row]) total = len(pred) correct = 0 @@ -110,7 +117,7 @@ def main(): df = pd.read_csv(filename) x = ["day", "playerID", "fatigueSliding"] y = ["day", "playerID", "BestOutOfMyselfAbsolutely", "BestOutOfMyselfSomewhat", "BestOutOfMyselfNotAtAll", "BestOutOfMyselfUnknown"] - time_series_sigmoid_classification(100, df, 0, 30, x, y) + time_series_sigmoid_classification(2, df, 0, 30, x, y) if __name__ == '__main__': From c819c339072f30cec8ce836b81f3d190e3473dfb Mon Sep 17 00:00:00 2001 From: PerryXDeng Date: Sun, 31 Mar 2019 11:33:08 -0400 Subject: [PATCH 2/3] output from multivar lin reg --- .~lock.DataFest 2019 - Codebook.xlsx# | 1 + hypotheses_modeling/KerasRegressions.py | 28 +++++++++++++------------ hypotheses_modeling/out.txt | 8 +++++++ 3 files changed, 24 insertions(+), 13 deletions(-) create mode 100644 .~lock.DataFest 2019 - Codebook.xlsx# create mode 100644 hypotheses_modeling/out.txt diff --git a/.~lock.DataFest 2019 - Codebook.xlsx# b/.~lock.DataFest 2019 - Codebook.xlsx# new file mode 100644 index 0000000..8af3a3a --- /dev/null +++ b/.~lock.DataFest 2019 - Codebook.xlsx# @@ -0,0 +1 @@ +,pxd256,null,31.03.2019 08:16,file:///home/pxd256/.config/libreoffice/4; \ No newline at end of file diff --git a/hypotheses_modeling/KerasRegressions.py b/hypotheses_modeling/KerasRegressions.py index 4ab364d..4fe6974 100644 --- a/hypotheses_modeling/KerasRegressions.py +++ b/hypotheses_modeling/KerasRegressions.py @@ -144,14 +144,14 @@ def time_series_linear_regression(dataset, k, n0, x_columns, y_columns): tf.keras.layers.Flatten(input_shape=[input_shape]), tf.keras.layers.Dense(output_shape) ]) - model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy']) - model.fit(x, y, epochs=50) - loss, _ = model.evaluate(x, y) - print(loss) + model.compile(optimizer='adam', loss='mean_squared_error', metrics=[tf.keras.metrics.mean_squared_error]) + model.fit(x, y, epochs=50, verbose=2) pred = model.predict(x) r2 = r2_(y, pred) - print(r2) - return model.get_weights() + hard_pred = model.predict(x[0]) + hard_out = np.matmul(x[0], [0.03589787, -0.03472298, 0.24109702, -0.10143519]) - 0.890594 + print(hard_pred, hard_out, y[0]) + return r2, model.get_weights() def time_series_dnn_regressions(dataset, k, n0, x_columns, y_columns): @@ -194,24 +194,26 @@ def time_series_dnn_regressions(dataset, k, n0, x_columns, y_columns): tf.keras.layers.Dense(32, activation=tf.nn.softmax), tf.keras.layers.Dense(output_shape) ]) - model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy']) - model.fit(x, y, epochs=100, verbose=0) + model.compile(optimizer='adam', loss='mean_squared_error', metrics=[tf.keras.metrics.mean_squared_error]) + model.fit(x, y, epochs=100, verbose=2) loss, accuracy = model.evaluate(x, y) - print(loss, accuracy) pred = model.predict(x) r2 = r2_(y, pred) - print(r2) - return model.get_weights() + return r2, model.get_weights() def main(): filename = "personal.csv" df = pd.read_csv(filename) - x = ["day", "playerID", "DailyLoadSliding", "sleepQuality"] + x = ["day", "playerID", "sleepHoursSliding", "sleepHours", "sleepQuality", "acuteChronicRatio"] y = ["day", "playerID", "fatigueNorm"] k = 0 n0 = 30 - weights = time_series_linear_regression(df, k, n0, x, y) + r2, weights = time_series_linear_regression(df, k, n0, x, y) + print("r2") + print(r2) + print("weights") + print(weights) if __name__ == "__main__": diff --git a/hypotheses_modeling/out.txt b/hypotheses_modeling/out.txt new file mode 100644 index 0000000..37d9d08 --- /dev/null +++ b/hypotheses_modeling/out.txt @@ -0,0 +1,8 @@ +Epoch 1/50, mean_squared_error: 90.4998 +Epoch 11/50, mean_squared_error: 1.0265 +Epoch 21/50, mean_squared_error: 0.9604 +Epoch 31/50, mean_squared_error: 0.8671 +Epoch 41/50, mean_squared_error: 0.7838 +r2, 0.07949744624446509 +slopes, 0.03589787,-0.03472298, 0.24109702, -0.10143519 +intercept, -0.8960594 From 8964ee4f41cd5650a197855425ec8a8b582d7a5e Mon Sep 17 00:00:00 2001 From: Ryan Missel Date: Sun, 31 Mar 2019 12:01:10 -0400 Subject: [PATCH 3/3] working pytorch model :partyparrot: --- hypotheses_modeling/pytorch_shit.py | 113 ++++++++++++++++++++++------ 1 file changed, 89 insertions(+), 24 deletions(-) diff --git a/hypotheses_modeling/pytorch_shit.py b/hypotheses_modeling/pytorch_shit.py index 413aeed..08e604e 100644 --- a/hypotheses_modeling/pytorch_shit.py +++ b/hypotheses_modeling/pytorch_shit.py @@ -3,6 +3,9 @@ import torch.nn as nn import torch.optim as optim import numpy as np import pandas as pd +from sklearn.utils.multiclass import unique_labels +from matplotlib import pyplot as plt +from sklearn.metrics import * class Net(nn.Module): @@ -24,12 +27,53 @@ def get_argmax(array): max = array[i] index = i - one_hot = [0,0,0,0] + one_hot = [0, 0, 0, 0] one_hot[index] = 1 return one_hot -def get_trainset(dataset, k, n0, x_columns, y_columns): +def plot_confusion_matrix(y_true, y_pred, classes, cmap=plt.cm.Blues): + """ + This function prints and plots the confusion matrix. + Normalization can be applied by setting `normalize=True`. + """ + title = "Confusion Matrix" + + # Compute confusion matrix + cm = confusion_matrix(y_true, y_pred) + # Only use the labels that appear in the data + classes = classes[unique_labels(y_true, y_pred)] + print(cm) + + fig, ax = plt.subplots() + im = ax.imshow(cm, interpolation='nearest', cmap=cmap) + ax.figure.colorbar(im, ax=ax) + # We want to show all ticks... + ax.set(xticks=np.arange(cm.shape[1]), + yticks=np.arange(cm.shape[0]), + # ... and label them with the respective list entries + xticklabels=classes, yticklabels=classes, + title=title, + ylabel='True label', + xlabel='Predicted label') + + # Rotate the tick labels and set their alignment. + plt.setp(ax.get_xticklabels(), rotation=45, ha="right", + rotation_mode="anchor") + + # Loop over data dimensions and create text annotations. + fmt = '.2f' if normalize else 'd' + thresh = cm.max() / 2. + for i in range(cm.shape[0]): + for j in range(cm.shape[1]): + ax.text(j, i, format(cm[i, j], fmt), + ha="center", va="center", + color="white" if cm[i, j] > thresh else "black") + fig.tight_layout() + return ax + + +def get_trainset(batch_size, dataset, k, n0, x_columns, y_columns): inp = dataset[x_columns] out = dataset[y_columns] col = "day" @@ -63,32 +107,36 @@ def get_trainset(dataset, k, n0, x_columns, y_columns): x.append(xprev) y.append(yt) + randn_1 = np.random.randint(1, 5200) x = torch.FloatTensor(x) y = torch.LongTensor(y) + if batch_size: + x = x.narrow(0, randn_1, 125) + y = y.narrow(0, randn_1, 125) return x, y -def time_series_sigmoid_classification(steps, dataset, k, n0, x_columns, y_columns): - net = Net(1) - optimizer = optim.Adam(net.parameters(), lr=.001) - loss = nn.CrossEntropyLoss() +def time_series_sigmoid_classification(steps, dataset, k, n0, x_columns, y_columns, labels): + net = Net(4) + optimizer = optim.Adam(net.parameters(), lr=.03) + loss = nn.CrossEntropyLoss() - x, y = get_trainset(dataset, k, n0, x_columns, y_columns) - accuracy(net, x, y) + x, y = get_trainset(False, dataset, k, n0, x_columns, y_columns) + accuracy(net, x, y) - for step in range(steps): - optimizer.zero_grad() + for step in range(steps): + optimizer.zero_grad() - x, y = get_trainset(dataset, k, n0, x_columns, y_columns) - pred = net(x) - net_loss = loss(pred, torch.max(y, 1)[1]) - net_loss.backward() - optimizer.step() + x, y = get_trainset(True, dataset, k, n0, x_columns, y_columns) + pred = net(x) + net_loss = loss(pred, torch.max(y, 1)[1]) + net_loss.backward() + optimizer.step() - print("Loss at Step {}: {}".format(step, net_loss)) + print("Loss at Step {}: {}".format(step, net_loss)) - x, y = get_trainset(dataset, k, n0, x_columns, y_columns) - accuracy(net, x, y) + x, y = get_trainset(False, dataset, k, n0, x_columns, y_columns) + accuracy(net, x, y) def accuracy(net, x, y): @@ -109,15 +157,32 @@ def accuracy(net, x, y): accuracy = (correct / total) * 100 print("Accuracy for set: {}%".format(accuracy)) - torch.save(net, "model.ckpt") + torch.save(net, "model_higher_lr.ckpt") + return pred, y + + +def cm_plot(classes, dataset, k, n0, x_columns, y_columns): + model = torch.load('model.ckpt') + x, y = get_trainset(True, dataset, k, n0, x_columns, y_columns) + pred = model(x) + pred = pred.detach().numpy() + for row in range(len(pred)): + pred[row] = get_argmax(pred[row]) + + print('F1: {}'.format(f1_score(y, pred > .5, average='micro'))) + plot_confusion_matrix(y, pred, classes) def main(): - filename = "personal.csv" - df = pd.read_csv(filename) - x = ["day", "playerID", "fatigueSliding"] - y = ["day", "playerID", "BestOutOfMyselfAbsolutely", "BestOutOfMyselfSomewhat", "BestOutOfMyselfNotAtAll", "BestOutOfMyselfUnknown"] - time_series_sigmoid_classification(2, df, 0, 30, x, y) + filename = "personal.csv" + df = pd.read_csv(filename) + x = ["day", "playerID", "fatigueSliding", "fatigueNorm", "sleepHoursSliding", "sleepQuality"] + y = ["day", "playerID", "BestOutOfMyselfAbsolutely", "BestOutOfMyselfSomewhat", "BestOutOfMyselfNotAtAll", + "BestOutOfMyselfUnknown"] + # time_series_sigmoid_classification(50, df, 0, 30, x, y, y) + cm_plot( + ["BestOutOfMyselfAbsolutely", "BestOutOfMyselfSomewhat", "BestOutOfMyselfNotAtAll", "BestOutOfMyselfUnknown"], + df, 0, 30, x, y) if __name__ == '__main__':