datafest competition 2019
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

189 lines
5.7 KiB

  1. import torch
  2. import torch.nn as nn
  3. import torch.optim as optim
  4. import numpy as np
  5. import pandas as pd
  6. from sklearn.utils.multiclass import unique_labels
  7. from matplotlib import pyplot as plt
  8. from sklearn.metrics import *
  9. class Net(nn.Module):
  10. def __init__(self, input_shape):
  11. super().__init__()
  12. self.fc1 = nn.Linear(input_shape, 8)
  13. self.fc2 = nn.Linear(8, 4)
  14. def forward(self, x):
  15. x = torch.sigmoid(self.fc1(x))
  16. return self.fc2(x)
  17. def get_argmax(array):
  18. max = 0
  19. index = 0
  20. for i in range(len(array)):
  21. if array[i] > max:
  22. max = array[i]
  23. index = i
  24. one_hot = [0, 0, 0, 0]
  25. one_hot[index] = 1
  26. return one_hot
  27. def plot_confusion_matrix(y_true, y_pred, classes, cmap=plt.cm.Blues):
  28. """
  29. This function prints and plots the confusion matrix.
  30. Normalization can be applied by setting `normalize=True`.
  31. """
  32. title = "Confusion Matrix"
  33. # Compute confusion matrix
  34. cm = confusion_matrix(y_true, y_pred)
  35. # Only use the labels that appear in the data
  36. classes = classes[unique_labels(y_true, y_pred)]
  37. print(cm)
  38. fig, ax = plt.subplots()
  39. im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
  40. ax.figure.colorbar(im, ax=ax)
  41. # We want to show all ticks...
  42. ax.set(xticks=np.arange(cm.shape[1]),
  43. yticks=np.arange(cm.shape[0]),
  44. # ... and label them with the respective list entries
  45. xticklabels=classes, yticklabels=classes,
  46. title=title,
  47. ylabel='True label',
  48. xlabel='Predicted label')
  49. # Rotate the tick labels and set their alignment.
  50. plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
  51. rotation_mode="anchor")
  52. # Loop over data dimensions and create text annotations.
  53. fmt = '.2f' if normalize else 'd'
  54. thresh = cm.max() / 2.
  55. for i in range(cm.shape[0]):
  56. for j in range(cm.shape[1]):
  57. ax.text(j, i, format(cm[i, j], fmt),
  58. ha="center", va="center",
  59. color="white" if cm[i, j] > thresh else "black")
  60. fig.tight_layout()
  61. return ax
  62. def get_trainset(batch_size, dataset, k, n0, x_columns, y_columns):
  63. inp = dataset[x_columns]
  64. out = dataset[y_columns]
  65. col = "day"
  66. x = []
  67. y = []
  68. input_shape = 0
  69. output_shape = 0
  70. for player in out["playerID"].unique():
  71. XPlayer = inp[inp["playerID"] == player]
  72. YPlayer = out[out["playerID"] == player]
  73. for day in YPlayer[col][n0 - 1:]:
  74. prev = day - k
  75. xprev = XPlayer[XPlayer[col] == prev].drop(columns=[col, "playerID"]).to_numpy()
  76. if xprev.shape[0] != 1:
  77. continue
  78. else:
  79. xprev = xprev[0, :]
  80. yt = YPlayer[YPlayer[col] == day].drop(columns=[col, "playerID"]).to_numpy()[0, :]
  81. if input_shape == 0:
  82. input_shape = xprev.shape[0]
  83. else:
  84. if input_shape != xprev.shape[0]:
  85. print("INCONSISTENT INPUT DIMENSION")
  86. exit(2)
  87. if output_shape == 0:
  88. output_shape = yt.shape[0]
  89. else:
  90. if output_shape != yt.shape[0]:
  91. print("INCONSISTENT OUTPUT DIMENSION")
  92. exit(2)
  93. x.append(xprev)
  94. y.append(yt)
  95. randn_1 = np.random.randint(1, 5200)
  96. x = torch.FloatTensor(x)
  97. y = torch.LongTensor(y)
  98. if batch_size:
  99. x = x.narrow(0, randn_1, 125)
  100. y = y.narrow(0, randn_1, 125)
  101. return x, y
  102. def time_series_sigmoid_classification(steps, dataset, k, n0, x_columns, y_columns, labels):
  103. net = Net(4)
  104. optimizer = optim.Adam(net.parameters(), lr=.03)
  105. loss = nn.CrossEntropyLoss()
  106. x, y = get_trainset(False, dataset, k, n0, x_columns, y_columns)
  107. accuracy(net, x, y)
  108. for step in range(steps):
  109. optimizer.zero_grad()
  110. x, y = get_trainset(True, dataset, k, n0, x_columns, y_columns)
  111. pred = net(x)
  112. net_loss = loss(pred, torch.max(y, 1)[1])
  113. net_loss.backward()
  114. optimizer.step()
  115. print("Loss at Step {}: {}".format(step, net_loss))
  116. x, y = get_trainset(False, dataset, k, n0, x_columns, y_columns)
  117. accuracy(net, x, y)
  118. def accuracy(net, x, y):
  119. pred = net(x)
  120. pred = pred.detach().numpy()
  121. for row in range(len(pred)):
  122. pred[row] = get_argmax(pred[row])
  123. total = len(pred)
  124. correct = 0
  125. for i in range(len(pred)):
  126. equal = True
  127. for j in range(len(pred[i])):
  128. if pred[i][j] != y[i][j]:
  129. equal = False
  130. if equal:
  131. correct += 1
  132. accuracy = (correct / total) * 100
  133. print("Accuracy for set: {}%".format(accuracy))
  134. torch.save(net, "model_higher_lr.ckpt")
  135. return pred, y
  136. def cm_plot(classes, dataset, k, n0, x_columns, y_columns):
  137. model = torch.load('model.ckpt')
  138. x, y = get_trainset(True, dataset, k, n0, x_columns, y_columns)
  139. pred = model(x)
  140. pred = pred.detach().numpy()
  141. for row in range(len(pred)):
  142. pred[row] = get_argmax(pred[row])
  143. print('F1: {}'.format(f1_score(y, pred > .5, average='micro')))
  144. plot_confusion_matrix(y, pred, classes)
  145. def main():
  146. filename = "personal.csv"
  147. df = pd.read_csv(filename)
  148. x = ["day", "playerID", "fatigueSliding", "fatigueNorm", "sleepHoursSliding", "sleepQuality"]
  149. y = ["day", "playerID", "BestOutOfMyselfAbsolutely", "BestOutOfMyselfSomewhat", "BestOutOfMyselfNotAtAll",
  150. "BestOutOfMyselfUnknown"]
  151. # time_series_sigmoid_classification(50, df, 0, 30, x, y, y)
  152. cm_plot(
  153. ["BestOutOfMyselfAbsolutely", "BestOutOfMyselfSomewhat", "BestOutOfMyselfNotAtAll", "BestOutOfMyselfUnknown"],
  154. df, 0, 30, x, y)
  155. if __name__ == '__main__':
  156. main()