From 9e76b6c16018bb8f53995226e8fc0985b6455929 Mon Sep 17 00:00:00 2001
From: Ryan Missel <rxm7244@rit.edu>
Date: Sun, 31 Mar 2019 09:38:09 -0400
Subject: [PATCH 1/3] Auto stash before merge of "master" and "origin/master"

---
 hypotheses_modeling/model.ckpt      | Bin 4443 -> 4443 bytes
 hypotheses_modeling/pytorch_shit.py |  11 +++++++++--
 2 files changed, 9 insertions(+), 2 deletions(-)
diff --git a/hypotheses_modeling/model.ckpt b/hypotheses_modeling/model.ckpt
index 7ab304026c704dccd1c109030959810b233c7987..0668d2848c24e82ddd181f6d85d0cb2347562871 100644
GIT binary patch
delta 398
zcmcbubX#e|TV7rxLqk&|b4v>oQ$quj$-j8R5h5n0Mi!If_^u;FEDS6RCOh%F@xXN%
znHWx<!rxVoEMQj17{SZHzyKEm8C1xGDh9HykQr4BWPBk@svZkaGYI%O+}*4BK-+F2
zSBqV(6qg+bM3^CMRjqwQrLO$}tJk(Aehc<-1=REHe|%5Gevw;;ZPk(xxWYD>ko}@<
z8Fq6jbM~91aoT@%H@82rd!k*5TCn|0OHDfksDXEdv-i!J)^2yyf2Cbgc%0oTrJs9E
zo!M*`xF*|0-LKf=`$J-1(E;UsEv=mUK8xnr`Mi~~yS9kkE@^tl-nXl!?0aDUa<bhQ
z$r!tRTiEPGoR-=xGM>1v;y}y38Aq1xE19`^&*N}?J4V$-w$C3P+xux(`@S0rS+*ya
PEw*(`-@JG4A+CJ@TuOzh

delta 398
zcmcbubX#e|TV7scLlYANBQs+YV-o|5$-j8R5h5lgW+s#4_^u;FOwA3ACOh%F@xXPN
zn3+zV!rxU77XTSk$QZ%Pz`y_(0~u7vgeGQYYF5aMDh4vXkR?@*1E?7U95T}P^SU|g
z_qkJQUvQmgf70Vywz}^Z?rWW%y3eC=s~t?;?bY>a`<1y__emu4*q#1hvcGQ0ntf9z
zHQK+{f3`20lfjM!VjhEC>b`vy59Rju^>5wRw2jYB0V;k!g<;>~{SNy)Ok(XOZgAUY
z>|V03RcVcNtxAR6OCza$4W~|8-@co@&x1?f`kTSeJ&hI@ZCBpv*jJ(@wYT&C>U|aV
z8$0&NGoRgO^XSaJyYkI;IvMx(-2Z8?Z-1bbUGtQq``GI5?>*%{(RR`=W4ov+q5FIr
TnD_azxY&NKIA!~I_bNL8p$?1+

diff --git a/hypotheses_modeling/pytorch_shit.py b/hypotheses_modeling/pytorch_shit.py
index 483c9bc..413aeed 100644
--- a/hypotheses_modeling/pytorch_shit.py
+++ b/hypotheses_modeling/pytorch_shit.py
@@ -24,7 +24,9 @@ def get_argmax(array):
             max = array[i]
             index = i
 
-    return [index]
+    one_hot = [0,0,0,0]
+    one_hot[index] = 1
+    return one_hot
 
 
 def get_trainset(dataset, k, n0, x_columns, y_columns):
@@ -71,6 +73,9 @@ def time_series_sigmoid_classification(steps, dataset, k, n0, x_columns, y_colum
   optimizer = optim.Adam(net.parameters(), lr=.001)
   loss = nn.CrossEntropyLoss()
 
+  x, y = get_trainset(dataset, k, n0, x_columns, y_columns)
+  accuracy(net, x, y)
+
   for step in range(steps):
       optimizer.zero_grad()
 
@@ -89,6 +94,8 @@ def time_series_sigmoid_classification(steps, dataset, k, n0, x_columns, y_colum
 def accuracy(net, x, y):
     pred = net(x)
     pred = pred.detach().numpy()
+    for row in range(len(pred)):
+        pred[row] = get_argmax(pred[row])
 
     total = len(pred)
     correct = 0
@@ -110,7 +117,7 @@ def main():
   df = pd.read_csv(filename)
   x = ["day", "playerID", "fatigueSliding"]
   y = ["day", "playerID", "BestOutOfMyselfAbsolutely", "BestOutOfMyselfSomewhat", "BestOutOfMyselfNotAtAll", "BestOutOfMyselfUnknown"]
-  time_series_sigmoid_classification(100, df, 0, 30, x, y)
+  time_series_sigmoid_classification(2, df, 0, 30, x, y)
 
 
 if __name__ == '__main__':

From c819c339072f30cec8ce836b81f3d190e3473dfb Mon Sep 17 00:00:00 2001
From: PerryXDeng <PerryXDeng@users.noreply.github.com>
Date: Sun, 31 Mar 2019 11:33:08 -0400
Subject: [PATCH 2/3] output from multivar lin reg

---
 .~lock.DataFest 2019 - Codebook.xlsx#   |  1 +
 hypotheses_modeling/KerasRegressions.py | 28 +++++++++++++------------
 hypotheses_modeling/out.txt             |  8 +++++++
 3 files changed, 24 insertions(+), 13 deletions(-)
 create mode 100644 .~lock.DataFest 2019 - Codebook.xlsx#
 create mode 100644 hypotheses_modeling/out.txt

diff --git a/.~lock.DataFest 2019 - Codebook.xlsx# b/.~lock.DataFest 2019 - Codebook.xlsx#
new file mode 100644
index 0000000..8af3a3a
--- /dev/null
+++ b/.~lock.DataFest 2019 - Codebook.xlsx#	
@@ -0,0 +1 @@
+,pxd256,null,31.03.2019 08:16,file:///home/pxd256/.config/libreoffice/4;
\ No newline at end of file
diff --git a/hypotheses_modeling/KerasRegressions.py b/hypotheses_modeling/KerasRegressions.py
index 4ab364d..4fe6974 100644
--- a/hypotheses_modeling/KerasRegressions.py
+++ b/hypotheses_modeling/KerasRegressions.py
@@ -144,14 +144,14 @@ def time_series_linear_regression(dataset, k, n0, x_columns, y_columns):
     tf.keras.layers.Flatten(input_shape=[input_shape]),
     tf.keras.layers.Dense(output_shape)
   ])
-  model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])
-  model.fit(x, y, epochs=50)
-  loss, _ = model.evaluate(x, y)
-  print(loss)
+  model.compile(optimizer='adam', loss='mean_squared_error', metrics=[tf.keras.metrics.mean_squared_error])
+  model.fit(x, y, epochs=50, verbose=2)
   pred = model.predict(x)
   r2 = r2_(y, pred)
-  print(r2)
-  return model.get_weights()
+  hard_pred = model.predict(x[0])
+  hard_out = np.matmul(x[0], [0.03589787, -0.03472298, 0.24109702, -0.10143519]) - 0.890594
+  print(hard_pred, hard_out, y[0])
+  return r2, model.get_weights()
 
 
 def time_series_dnn_regressions(dataset, k, n0, x_columns, y_columns):
@@ -194,24 +194,26 @@ def time_series_dnn_regressions(dataset, k, n0, x_columns, y_columns):
     tf.keras.layers.Dense(32, activation=tf.nn.softmax),
     tf.keras.layers.Dense(output_shape)
   ])
-  model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])
-  model.fit(x, y, epochs=100, verbose=0)
+  model.compile(optimizer='adam', loss='mean_squared_error', metrics=[tf.keras.metrics.mean_squared_error])
+  model.fit(x, y, epochs=100, verbose=2)
   loss, accuracy = model.evaluate(x, y)
-  print(loss, accuracy)
   pred = model.predict(x)
   r2 = r2_(y, pred)
-  print(r2)
-  return model.get_weights()
+  return r2, model.get_weights()
 
 
 def main():
   filename = "personal.csv"
   df = pd.read_csv(filename)
-  x = ["day", "playerID", "DailyLoadSliding", "sleepQuality"]
+  x = ["day", "playerID", "sleepHoursSliding", "sleepHours", "sleepQuality", "acuteChronicRatio"]
   y = ["day", "playerID", "fatigueNorm"]
   k = 0
   n0 = 30
-  weights = time_series_linear_regression(df, k, n0, x, y)
+  r2, weights =  time_series_linear_regression(df, k, n0, x, y)
+  print("r2")
+  print(r2)
+  print("weights")
+  print(weights)
 
 
 if __name__ == "__main__":
diff --git a/hypotheses_modeling/out.txt b/hypotheses_modeling/out.txt
new file mode 100644
index 0000000..37d9d08
--- /dev/null
+++ b/hypotheses_modeling/out.txt
@@ -0,0 +1,8 @@
+Epoch 1/50, mean_squared_error: 90.4998
+Epoch 11/50, mean_squared_error: 1.0265
+Epoch 21/50, mean_squared_error: 0.9604
+Epoch 31/50, mean_squared_error: 0.8671
+Epoch 41/50, mean_squared_error: 0.7838
+r2, 0.07949744624446509
+slopes, 0.03589787,-0.03472298, 0.24109702, -0.10143519
+intercept, -0.8960594

From 8964ee4f41cd5650a197855425ec8a8b582d7a5e Mon Sep 17 00:00:00 2001
From: Ryan Missel <rxm7244@rit.edu>
Date: Sun, 31 Mar 2019 12:01:10 -0400
Subject: [PATCH 3/3] working pytorch model :partyparrot:

---
 hypotheses_modeling/pytorch_shit.py | 113 ++++++++++++++++++++++------
 1 file changed, 89 insertions(+), 24 deletions(-)

diff --git a/hypotheses_modeling/pytorch_shit.py b/hypotheses_modeling/pytorch_shit.py
index 413aeed..08e604e 100644
--- a/hypotheses_modeling/pytorch_shit.py
+++ b/hypotheses_modeling/pytorch_shit.py
@@ -3,6 +3,9 @@ import torch.nn as nn
 import torch.optim as optim
 import numpy as np
 import pandas as pd
+from sklearn.utils.multiclass import unique_labels
+from matplotlib import pyplot as plt
+from sklearn.metrics import *
 
 
 class Net(nn.Module):
@@ -24,12 +27,53 @@ def get_argmax(array):
             max = array[i]
             index = i
 
-    one_hot = [0,0,0,0]
+    one_hot = [0, 0, 0, 0]
     one_hot[index] = 1
     return one_hot
 
 
-def get_trainset(dataset, k, n0, x_columns, y_columns):
+def plot_confusion_matrix(y_true, y_pred, classes, cmap=plt.cm.Blues):
+    """
+    This function prints and plots the confusion matrix.
+    Normalization can be applied by setting `normalize=True`.
+    """
+    title = "Confusion Matrix"
+
+    # Compute confusion matrix
+    cm = confusion_matrix(y_true, y_pred)
+    # Only use the labels that appear in the data
+    classes = classes[unique_labels(y_true, y_pred)]
+    print(cm)
+
+    fig, ax = plt.subplots()
+    im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
+    ax.figure.colorbar(im, ax=ax)
+    # We want to show all ticks...
+    ax.set(xticks=np.arange(cm.shape[1]),
+           yticks=np.arange(cm.shape[0]),
+           # ... and label them with the respective list entries
+           xticklabels=classes, yticklabels=classes,
+           title=title,
+           ylabel='True label',
+           xlabel='Predicted label')
+
+    # Rotate the tick labels and set their alignment.
+    plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
+             rotation_mode="anchor")
+
+    # Loop over data dimensions and create text annotations.
+    fmt = '.2f' if normalize else 'd'
+    thresh = cm.max() / 2.
+    for i in range(cm.shape[0]):
+        for j in range(cm.shape[1]):
+            ax.text(j, i, format(cm[i, j], fmt),
+                    ha="center", va="center",
+                    color="white" if cm[i, j] > thresh else "black")
+    fig.tight_layout()
+    return ax
+
+
+def get_trainset(batch_size, dataset, k, n0, x_columns, y_columns):
     inp = dataset[x_columns]
     out = dataset[y_columns]
     col = "day"
@@ -63,32 +107,36 @@ def get_trainset(dataset, k, n0, x_columns, y_columns):
             x.append(xprev)
             y.append(yt)
 
+    randn_1 = np.random.randint(1, 5200)
     x = torch.FloatTensor(x)
     y = torch.LongTensor(y)
+    if batch_size:
+        x = x.narrow(0, randn_1, 125)
+        y = y.narrow(0, randn_1, 125)
     return x, y
 
 
-def time_series_sigmoid_classification(steps, dataset, k, n0, x_columns, y_columns):
-  net = Net(1)
-  optimizer = optim.Adam(net.parameters(), lr=.001)
-  loss = nn.CrossEntropyLoss()
+def time_series_sigmoid_classification(steps, dataset, k, n0, x_columns, y_columns, labels):
+    net = Net(4)
+    optimizer = optim.Adam(net.parameters(), lr=.03)
+    loss = nn.CrossEntropyLoss()
 
-  x, y = get_trainset(dataset, k, n0, x_columns, y_columns)
-  accuracy(net, x, y)
+    x, y = get_trainset(False, dataset, k, n0, x_columns, y_columns)
+    accuracy(net, x, y)
 
-  for step in range(steps):
-      optimizer.zero_grad()
+    for step in range(steps):
+        optimizer.zero_grad()
 
-      x, y = get_trainset(dataset, k, n0, x_columns, y_columns)
-      pred = net(x)
-      net_loss = loss(pred, torch.max(y, 1)[1])
-      net_loss.backward()
-      optimizer.step()
+        x, y = get_trainset(True, dataset, k, n0, x_columns, y_columns)
+        pred = net(x)
+        net_loss = loss(pred, torch.max(y, 1)[1])
+        net_loss.backward()
+        optimizer.step()
 
-      print("Loss at Step {}: {}".format(step, net_loss))
+        print("Loss at Step {}: {}".format(step, net_loss))
 
-  x, y = get_trainset(dataset, k, n0, x_columns, y_columns)
-  accuracy(net, x, y)
+    x, y = get_trainset(False, dataset, k, n0, x_columns, y_columns)
+    accuracy(net, x, y)
 
 
 def accuracy(net, x, y):
@@ -109,15 +157,32 @@ def accuracy(net, x, y):
 
     accuracy = (correct / total) * 100
     print("Accuracy for set: {}%".format(accuracy))
-    torch.save(net, "model.ckpt")
+    torch.save(net, "model_higher_lr.ckpt")
+    return pred, y
+
+
+def cm_plot(classes, dataset, k, n0, x_columns, y_columns):
+    model = torch.load('model.ckpt')
+    x, y = get_trainset(True, dataset, k, n0, x_columns, y_columns)
+    pred = model(x)
+    pred = pred.detach().numpy()
+    for row in range(len(pred)):
+        pred[row] = get_argmax(pred[row])
+
+    print('F1: {}'.format(f1_score(y, pred > .5, average='micro')))
+    plot_confusion_matrix(y, pred, classes)
 
 
 def main():
-  filename = "personal.csv"
-  df = pd.read_csv(filename)
-  x = ["day", "playerID", "fatigueSliding"]
-  y = ["day", "playerID", "BestOutOfMyselfAbsolutely", "BestOutOfMyselfSomewhat", "BestOutOfMyselfNotAtAll", "BestOutOfMyselfUnknown"]
-  time_series_sigmoid_classification(2, df, 0, 30, x, y)
+    filename = "personal.csv"
+    df = pd.read_csv(filename)
+    x = ["day", "playerID", "fatigueSliding", "fatigueNorm", "sleepHoursSliding", "sleepQuality"]
+    y = ["day", "playerID", "BestOutOfMyselfAbsolutely", "BestOutOfMyselfSomewhat", "BestOutOfMyselfNotAtAll",
+         "BestOutOfMyselfUnknown"]
+    # time_series_sigmoid_classification(50, df, 0, 30, x, y, y)
+    cm_plot(
+        ["BestOutOfMyselfAbsolutely", "BestOutOfMyselfSomewhat", "BestOutOfMyselfNotAtAll", "BestOutOfMyselfUnknown"],
+        df, 0, 30, x, y)
 
 
 if __name__ == '__main__':