datafest competition 2019
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

124 lines
3.3 KiB

  1. import torch
  2. import torch.nn as nn
  3. import torch.optim as optim
  4. import numpy as np
  5. import pandas as pd
  6. class Net(nn.Module):
  7. def __init__(self, input_shape):
  8. super().__init__()
  9. self.fc1 = nn.Linear(input_shape, 8)
  10. self.fc2 = nn.Linear(8, 4)
  11. def forward(self, x):
  12. x = torch.sigmoid(self.fc1(x))
  13. return self.fc2(x)
  14. def get_argmax(array):
  15. max = 0
  16. index = 0
  17. for i in range(len(array)):
  18. if array[i] > max:
  19. max = array[i]
  20. index = i
  21. one_hot = [0,0,0,0]
  22. one_hot[index] = 1
  23. return one_hot
  24. def get_trainset(dataset, k, n0, x_columns, y_columns):
  25. inp = dataset[x_columns]
  26. out = dataset[y_columns]
  27. col = "day"
  28. x = []
  29. y = []
  30. input_shape = 0
  31. output_shape = 0
  32. for player in out["playerID"].unique():
  33. XPlayer = inp[inp["playerID"] == player]
  34. YPlayer = out[out["playerID"] == player]
  35. for day in YPlayer[col][n0 - 1:]:
  36. prev = day - k
  37. xprev = XPlayer[XPlayer[col] == prev].drop(columns=[col, "playerID"]).to_numpy()
  38. if xprev.shape[0] != 1:
  39. continue
  40. else:
  41. xprev = xprev[0, :]
  42. yt = YPlayer[YPlayer[col] == day].drop(columns=[col, "playerID"]).to_numpy()[0, :]
  43. if input_shape == 0:
  44. input_shape = xprev.shape[0]
  45. else:
  46. if input_shape != xprev.shape[0]:
  47. print("INCONSISTENT INPUT DIMENSION")
  48. exit(2)
  49. if output_shape == 0:
  50. output_shape = yt.shape[0]
  51. else:
  52. if output_shape != yt.shape[0]:
  53. print("INCONSISTENT OUTPUT DIMENSION")
  54. exit(2)
  55. x.append(xprev)
  56. y.append(yt)
  57. x = torch.FloatTensor(x)
  58. y = torch.LongTensor(y)
  59. return x, y
  60. def time_series_sigmoid_classification(steps, dataset, k, n0, x_columns, y_columns):
  61. net = Net(1)
  62. optimizer = optim.Adam(net.parameters(), lr=.001)
  63. loss = nn.CrossEntropyLoss()
  64. x, y = get_trainset(dataset, k, n0, x_columns, y_columns)
  65. accuracy(net, x, y)
  66. for step in range(steps):
  67. optimizer.zero_grad()
  68. x, y = get_trainset(dataset, k, n0, x_columns, y_columns)
  69. pred = net(x)
  70. net_loss = loss(pred, torch.max(y, 1)[1])
  71. net_loss.backward()
  72. optimizer.step()
  73. print("Loss at Step {}: {}".format(step, net_loss))
  74. x, y = get_trainset(dataset, k, n0, x_columns, y_columns)
  75. accuracy(net, x, y)
  76. def accuracy(net, x, y):
  77. pred = net(x)
  78. pred = pred.detach().numpy()
  79. for row in range(len(pred)):
  80. pred[row] = get_argmax(pred[row])
  81. total = len(pred)
  82. correct = 0
  83. for i in range(len(pred)):
  84. equal = True
  85. for j in range(len(pred[i])):
  86. if pred[i][j] != y[i][j]:
  87. equal = False
  88. if equal:
  89. correct += 1
  90. accuracy = (correct / total) * 100
  91. print("Accuracy for set: {}%".format(accuracy))
  92. torch.save(net, "model.ckpt")
  93. def main():
  94. filename = "personal.csv"
  95. df = pd.read_csv(filename)
  96. x = ["day", "playerID", "fatigueSliding"]
  97. y = ["day", "playerID", "BestOutOfMyselfAbsolutely", "BestOutOfMyselfSomewhat", "BestOutOfMyselfNotAtAll", "BestOutOfMyselfUnknown"]
  98. time_series_sigmoid_classification(2, df, 0, 30, x, y)
  99. if __name__ == '__main__':
  100. main()