From 8964ee4f41cd5650a197855425ec8a8b582d7a5e Mon Sep 17 00:00:00 2001 From: Ryan Missel Date: Sun, 31 Mar 2019 12:01:10 -0400 Subject: [PATCH] working pytorch model :partyparrot: --- hypotheses_modeling/pytorch_shit.py | 113 ++++++++++++++++++++++------ 1 file changed, 89 insertions(+), 24 deletions(-) diff --git a/hypotheses_modeling/pytorch_shit.py b/hypotheses_modeling/pytorch_shit.py index 413aeed..08e604e 100644 --- a/hypotheses_modeling/pytorch_shit.py +++ b/hypotheses_modeling/pytorch_shit.py @@ -3,6 +3,9 @@ import torch.nn as nn import torch.optim as optim import numpy as np import pandas as pd +from sklearn.utils.multiclass import unique_labels +from matplotlib import pyplot as plt +from sklearn.metrics import * class Net(nn.Module): @@ -24,12 +27,53 @@ def get_argmax(array): max = array[i] index = i - one_hot = [0,0,0,0] + one_hot = [0, 0, 0, 0] one_hot[index] = 1 return one_hot -def get_trainset(dataset, k, n0, x_columns, y_columns): +def plot_confusion_matrix(y_true, y_pred, classes, cmap=plt.cm.Blues): + """ + This function prints and plots the confusion matrix. + Normalization can be applied by setting `normalize=True`. + """ + title = "Confusion Matrix" + + # Compute confusion matrix + cm = confusion_matrix(y_true, y_pred) + # Only use the labels that appear in the data + classes = classes[unique_labels(y_true, y_pred)] + print(cm) + + fig, ax = plt.subplots() + im = ax.imshow(cm, interpolation='nearest', cmap=cmap) + ax.figure.colorbar(im, ax=ax) + # We want to show all ticks... + ax.set(xticks=np.arange(cm.shape[1]), + yticks=np.arange(cm.shape[0]), + # ... and label them with the respective list entries + xticklabels=classes, yticklabels=classes, + title=title, + ylabel='True label', + xlabel='Predicted label') + + # Rotate the tick labels and set their alignment. + plt.setp(ax.get_xticklabels(), rotation=45, ha="right", + rotation_mode="anchor") + + # Loop over data dimensions and create text annotations. + fmt = '.2f' if normalize else 'd' + thresh = cm.max() / 2. + for i in range(cm.shape[0]): + for j in range(cm.shape[1]): + ax.text(j, i, format(cm[i, j], fmt), + ha="center", va="center", + color="white" if cm[i, j] > thresh else "black") + fig.tight_layout() + return ax + + +def get_trainset(batch_size, dataset, k, n0, x_columns, y_columns): inp = dataset[x_columns] out = dataset[y_columns] col = "day" @@ -63,32 +107,36 @@ def get_trainset(dataset, k, n0, x_columns, y_columns): x.append(xprev) y.append(yt) + randn_1 = np.random.randint(1, 5200) x = torch.FloatTensor(x) y = torch.LongTensor(y) + if batch_size: + x = x.narrow(0, randn_1, 125) + y = y.narrow(0, randn_1, 125) return x, y -def time_series_sigmoid_classification(steps, dataset, k, n0, x_columns, y_columns): - net = Net(1) - optimizer = optim.Adam(net.parameters(), lr=.001) - loss = nn.CrossEntropyLoss() +def time_series_sigmoid_classification(steps, dataset, k, n0, x_columns, y_columns, labels): + net = Net(4) + optimizer = optim.Adam(net.parameters(), lr=.03) + loss = nn.CrossEntropyLoss() - x, y = get_trainset(dataset, k, n0, x_columns, y_columns) - accuracy(net, x, y) + x, y = get_trainset(False, dataset, k, n0, x_columns, y_columns) + accuracy(net, x, y) - for step in range(steps): - optimizer.zero_grad() + for step in range(steps): + optimizer.zero_grad() - x, y = get_trainset(dataset, k, n0, x_columns, y_columns) - pred = net(x) - net_loss = loss(pred, torch.max(y, 1)[1]) - net_loss.backward() - optimizer.step() + x, y = get_trainset(True, dataset, k, n0, x_columns, y_columns) + pred = net(x) + net_loss = loss(pred, torch.max(y, 1)[1]) + net_loss.backward() + optimizer.step() - print("Loss at Step {}: {}".format(step, net_loss)) + print("Loss at Step {}: {}".format(step, net_loss)) - x, y = get_trainset(dataset, k, n0, x_columns, y_columns) - accuracy(net, x, y) + x, y = get_trainset(False, dataset, k, n0, x_columns, y_columns) + accuracy(net, x, y) def accuracy(net, x, y): @@ -109,15 +157,32 @@ def accuracy(net, x, y): accuracy = (correct / total) * 100 print("Accuracy for set: {}%".format(accuracy)) - torch.save(net, "model.ckpt") + torch.save(net, "model_higher_lr.ckpt") + return pred, y + + +def cm_plot(classes, dataset, k, n0, x_columns, y_columns): + model = torch.load('model.ckpt') + x, y = get_trainset(True, dataset, k, n0, x_columns, y_columns) + pred = model(x) + pred = pred.detach().numpy() + for row in range(len(pred)): + pred[row] = get_argmax(pred[row]) + + print('F1: {}'.format(f1_score(y, pred > .5, average='micro'))) + plot_confusion_matrix(y, pred, classes) def main(): - filename = "personal.csv" - df = pd.read_csv(filename) - x = ["day", "playerID", "fatigueSliding"] - y = ["day", "playerID", "BestOutOfMyselfAbsolutely", "BestOutOfMyselfSomewhat", "BestOutOfMyselfNotAtAll", "BestOutOfMyselfUnknown"] - time_series_sigmoid_classification(2, df, 0, 30, x, y) + filename = "personal.csv" + df = pd.read_csv(filename) + x = ["day", "playerID", "fatigueSliding", "fatigueNorm", "sleepHoursSliding", "sleepQuality"] + y = ["day", "playerID", "BestOutOfMyselfAbsolutely", "BestOutOfMyselfSomewhat", "BestOutOfMyselfNotAtAll", + "BestOutOfMyselfUnknown"] + # time_series_sigmoid_classification(50, df, 0, 30, x, y, y) + cm_plot( + ["BestOutOfMyselfAbsolutely", "BestOutOfMyselfSomewhat", "BestOutOfMyselfNotAtAll", "BestOutOfMyselfUnknown"], + df, 0, 30, x, y) if __name__ == '__main__':