Jeffery Russell 5 years ago
parent
commit
3a0660e5c3
5 changed files with 120 additions and 37 deletions
  1. +1
    -0
      .~lock.DataFest 2019 - Codebook.xlsx#
  2. +15
    -13
      hypotheses_modeling/KerasRegressions.py
  3. BIN
      hypotheses_modeling/model.ckpt
  4. +8
    -0
      hypotheses_modeling/out.txt
  5. +96
    -24
      hypotheses_modeling/pytorch_shit.py

+ 1
- 0
.~lock.DataFest 2019 - Codebook.xlsx# View File

@ -0,0 +1 @@
,pxd256,null,31.03.2019 08:16,file:///home/pxd256/.config/libreoffice/4;

+ 15
- 13
hypotheses_modeling/KerasRegressions.py View File

@ -144,14 +144,14 @@ def time_series_linear_regression(dataset, k, n0, x_columns, y_columns):
tf.keras.layers.Flatten(input_shape=[input_shape]), tf.keras.layers.Flatten(input_shape=[input_shape]),
tf.keras.layers.Dense(output_shape) tf.keras.layers.Dense(output_shape)
]) ])
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])
model.fit(x, y, epochs=50)
loss, _ = model.evaluate(x, y)
print(loss)
model.compile(optimizer='adam', loss='mean_squared_error', metrics=[tf.keras.metrics.mean_squared_error])
model.fit(x, y, epochs=50, verbose=2)
pred = model.predict(x) pred = model.predict(x)
r2 = r2_(y, pred) r2 = r2_(y, pred)
print(r2)
return model.get_weights()
hard_pred = model.predict(x[0])
hard_out = np.matmul(x[0], [0.03589787, -0.03472298, 0.24109702, -0.10143519]) - 0.890594
print(hard_pred, hard_out, y[0])
return r2, model.get_weights()
def time_series_dnn_regressions(dataset, k, n0, x_columns, y_columns): def time_series_dnn_regressions(dataset, k, n0, x_columns, y_columns):
@ -194,24 +194,26 @@ def time_series_dnn_regressions(dataset, k, n0, x_columns, y_columns):
tf.keras.layers.Dense(32, activation=tf.nn.softmax), tf.keras.layers.Dense(32, activation=tf.nn.softmax),
tf.keras.layers.Dense(output_shape) tf.keras.layers.Dense(output_shape)
]) ])
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])
model.fit(x, y, epochs=100, verbose=0)
model.compile(optimizer='adam', loss='mean_squared_error', metrics=[tf.keras.metrics.mean_squared_error])
model.fit(x, y, epochs=100, verbose=2)
loss, accuracy = model.evaluate(x, y) loss, accuracy = model.evaluate(x, y)
print(loss, accuracy)
pred = model.predict(x) pred = model.predict(x)
r2 = r2_(y, pred) r2 = r2_(y, pred)
print(r2)
return model.get_weights()
return r2, model.get_weights()
def main(): def main():
filename = "personal.csv" filename = "personal.csv"
df = pd.read_csv(filename) df = pd.read_csv(filename)
x = ["day", "playerID", "DailyLoadSliding", "sleepQuality"]
x = ["day", "playerID", "sleepHoursSliding", "sleepHours", "sleepQuality", "acuteChronicRatio"]
y = ["day", "playerID", "fatigueNorm"] y = ["day", "playerID", "fatigueNorm"]
k = 0 k = 0
n0 = 30 n0 = 30
weights = time_series_linear_regression(df, k, n0, x, y)
r2, weights = time_series_linear_regression(df, k, n0, x, y)
print("r2")
print(r2)
print("weights")
print(weights)
if __name__ == "__main__": if __name__ == "__main__":

BIN
hypotheses_modeling/model.ckpt View File


+ 8
- 0
hypotheses_modeling/out.txt View File

@ -0,0 +1,8 @@
Epoch 1/50, mean_squared_error: 90.4998
Epoch 11/50, mean_squared_error: 1.0265
Epoch 21/50, mean_squared_error: 0.9604
Epoch 31/50, mean_squared_error: 0.8671
Epoch 41/50, mean_squared_error: 0.7838
r2, 0.07949744624446509
slopes, 0.03589787,-0.03472298, 0.24109702, -0.10143519
intercept, -0.8960594

+ 96
- 24
hypotheses_modeling/pytorch_shit.py View File

@ -3,6 +3,9 @@ import torch.nn as nn
import torch.optim as optim import torch.optim as optim
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from sklearn.utils.multiclass import unique_labels
from matplotlib import pyplot as plt
from sklearn.metrics import *
class Net(nn.Module): class Net(nn.Module):
@ -24,10 +27,53 @@ def get_argmax(array):
max = array[i] max = array[i]
index = i index = i
return [index]
def get_trainset(dataset, k, n0, x_columns, y_columns):
one_hot = [0, 0, 0, 0]
one_hot[index] = 1
return one_hot
def plot_confusion_matrix(y_true, y_pred, classes, cmap=plt.cm.Blues):
"""
This function prints and plots the confusion matrix.
Normalization can be applied by setting `normalize=True`.
"""
title = "Confusion Matrix"
# Compute confusion matrix
cm = confusion_matrix(y_true, y_pred)
# Only use the labels that appear in the data
classes = classes[unique_labels(y_true, y_pred)]
print(cm)
fig, ax = plt.subplots()
im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
ax.figure.colorbar(im, ax=ax)
# We want to show all ticks...
ax.set(xticks=np.arange(cm.shape[1]),
yticks=np.arange(cm.shape[0]),
# ... and label them with the respective list entries
xticklabels=classes, yticklabels=classes,
title=title,
ylabel='True label',
xlabel='Predicted label')
# Rotate the tick labels and set their alignment.
plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
rotation_mode="anchor")
# Loop over data dimensions and create text annotations.
fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
for i in range(cm.shape[0]):
for j in range(cm.shape[1]):
ax.text(j, i, format(cm[i, j], fmt),
ha="center", va="center",
color="white" if cm[i, j] > thresh else "black")
fig.tight_layout()
return ax
def get_trainset(batch_size, dataset, k, n0, x_columns, y_columns):
inp = dataset[x_columns] inp = dataset[x_columns]
out = dataset[y_columns] out = dataset[y_columns]
col = "day" col = "day"
@ -61,34 +107,43 @@ def get_trainset(dataset, k, n0, x_columns, y_columns):
x.append(xprev) x.append(xprev)
y.append(yt) y.append(yt)
randn_1 = np.random.randint(1, 5200)
x = torch.FloatTensor(x) x = torch.FloatTensor(x)
y = torch.LongTensor(y) y = torch.LongTensor(y)
if batch_size:
x = x.narrow(0, randn_1, 125)
y = y.narrow(0, randn_1, 125)
return x, y return x, y
def time_series_sigmoid_classification(steps, dataset, k, n0, x_columns, y_columns):
net = Net(1)
optimizer = optim.Adam(net.parameters(), lr=.001)
loss = nn.CrossEntropyLoss()
def time_series_sigmoid_classification(steps, dataset, k, n0, x_columns, y_columns, labels):
net = Net(4)
optimizer = optim.Adam(net.parameters(), lr=.03)
loss = nn.CrossEntropyLoss()
for step in range(steps):
optimizer.zero_grad()
x, y = get_trainset(False, dataset, k, n0, x_columns, y_columns)
accuracy(net, x, y)
x, y = get_trainset(dataset, k, n0, x_columns, y_columns)
pred = net(x)
net_loss = loss(pred, torch.max(y, 1)[1])
net_loss.backward()
optimizer.step()
for step in range(steps):
optimizer.zero_grad()
print("Loss at Step {}: {}".format(step, net_loss))
x, y = get_trainset(True, dataset, k, n0, x_columns, y_columns)
pred = net(x)
net_loss = loss(pred, torch.max(y, 1)[1])
net_loss.backward()
optimizer.step()
x, y = get_trainset(dataset, k, n0, x_columns, y_columns)
accuracy(net, x, y)
print("Loss at Step {}: {}".format(step, net_loss))
x, y = get_trainset(False, dataset, k, n0, x_columns, y_columns)
accuracy(net, x, y)
def accuracy(net, x, y): def accuracy(net, x, y):
pred = net(x) pred = net(x)
pred = pred.detach().numpy() pred = pred.detach().numpy()
for row in range(len(pred)):
pred[row] = get_argmax(pred[row])
total = len(pred) total = len(pred)
correct = 0 correct = 0
@ -102,15 +157,32 @@ def accuracy(net, x, y):
accuracy = (correct / total) * 100 accuracy = (correct / total) * 100
print("Accuracy for set: {}%".format(accuracy)) print("Accuracy for set: {}%".format(accuracy))
torch.save(net, "model.ckpt")
torch.save(net, "model_higher_lr.ckpt")
return pred, y
def cm_plot(classes, dataset, k, n0, x_columns, y_columns):
model = torch.load('model.ckpt')
x, y = get_trainset(True, dataset, k, n0, x_columns, y_columns)
pred = model(x)
pred = pred.detach().numpy()
for row in range(len(pred)):
pred[row] = get_argmax(pred[row])
print('F1: {}'.format(f1_score(y, pred > .5, average='micro')))
plot_confusion_matrix(y, pred, classes)
def main(): def main():
filename = "personal.csv"
df = pd.read_csv(filename)
x = ["day", "playerID", "fatigueSliding"]
y = ["day", "playerID", "BestOutOfMyselfAbsolutely", "BestOutOfMyselfSomewhat", "BestOutOfMyselfNotAtAll", "BestOutOfMyselfUnknown"]
time_series_sigmoid_classification(100, df, 0, 30, x, y)
filename = "personal.csv"
df = pd.read_csv(filename)
x = ["day", "playerID", "fatigueSliding", "fatigueNorm", "sleepHoursSliding", "sleepQuality"]
y = ["day", "playerID", "BestOutOfMyselfAbsolutely", "BestOutOfMyselfSomewhat", "BestOutOfMyselfNotAtAll",
"BestOutOfMyselfUnknown"]
# time_series_sigmoid_classification(50, df, 0, 30, x, y, y)
cm_plot(
["BestOutOfMyselfAbsolutely", "BestOutOfMyselfSomewhat", "BestOutOfMyselfNotAtAll", "BestOutOfMyselfUnknown"],
df, 0, 30, x, y)
if __name__ == '__main__': if __name__ == '__main__':

Loading…
Cancel
Save