datafest competition 2019
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

117 lines
3.1 KiB

  1. import torch
  2. import torch.nn as nn
  3. import torch.optim as optim
  4. import numpy as np
  5. import pandas as pd
  6. class Net(nn.Module):
  7. def __init__(self, input_shape):
  8. super().__init__()
  9. self.fc1 = nn.Linear(input_shape, 8)
  10. self.fc2 = nn.Linear(8, 4)
  11. def forward(self, x):
  12. x = torch.sigmoid(self.fc1(x))
  13. return self.fc2(x)
  14. def get_argmax(array):
  15. max = 0
  16. index = 0
  17. for i in range(len(array)):
  18. if array[i] > max:
  19. max = array[i]
  20. index = i
  21. return [index]
  22. def get_trainset(dataset, k, n0, x_columns, y_columns):
  23. inp = dataset[x_columns]
  24. out = dataset[y_columns]
  25. col = "day"
  26. x = []
  27. y = []
  28. input_shape = 0
  29. output_shape = 0
  30. for player in out["playerID"].unique():
  31. XPlayer = inp[inp["playerID"] == player]
  32. YPlayer = out[out["playerID"] == player]
  33. for day in YPlayer[col][n0 - 1:]:
  34. prev = day - k
  35. xprev = XPlayer[XPlayer[col] == prev].drop(columns=[col, "playerID"]).to_numpy()
  36. if xprev.shape[0] != 1:
  37. continue
  38. else:
  39. xprev = xprev[0, :]
  40. yt = YPlayer[YPlayer[col] == day].drop(columns=[col, "playerID"]).to_numpy()[0, :]
  41. if input_shape == 0:
  42. input_shape = xprev.shape[0]
  43. else:
  44. if input_shape != xprev.shape[0]:
  45. print("INCONSISTENT INPUT DIMENSION")
  46. exit(2)
  47. if output_shape == 0:
  48. output_shape = yt.shape[0]
  49. else:
  50. if output_shape != yt.shape[0]:
  51. print("INCONSISTENT OUTPUT DIMENSION")
  52. exit(2)
  53. x.append(xprev)
  54. y.append(yt)
  55. x = torch.FloatTensor(x)
  56. y = torch.LongTensor(y)
  57. return x, y
  58. def time_series_sigmoid_classification(steps, dataset, k, n0, x_columns, y_columns):
  59. net = Net(1)
  60. optimizer = optim.Adam(net.parameters(), lr=.001)
  61. loss = nn.CrossEntropyLoss()
  62. for step in range(steps):
  63. optimizer.zero_grad()
  64. x, y = get_trainset(dataset, k, n0, x_columns, y_columns)
  65. pred = net(x)
  66. net_loss = loss(pred, torch.max(y, 1)[1])
  67. net_loss.backward()
  68. optimizer.step()
  69. print("Loss at Step {}: {}".format(step, net_loss))
  70. x, y = get_trainset(dataset, k, n0, x_columns, y_columns)
  71. accuracy(net, x, y)
  72. def accuracy(net, x, y):
  73. pred = net(x)
  74. pred = pred.detach().numpy()
  75. total = len(pred)
  76. correct = 0
  77. for i in range(len(pred)):
  78. equal = True
  79. for j in range(len(pred[i])):
  80. if pred[i][j] != y[i][j]:
  81. equal = False
  82. if equal:
  83. correct += 1
  84. accuracy = (correct / total) * 100
  85. print("Accuracy for set: {}%".format(accuracy))
  86. torch.save(net, "model.ckpt")
  87. def main():
  88. filename = "personal.csv"
  89. df = pd.read_csv(filename)
  90. x = ["day", "playerID", "fatigueSliding"]
  91. y = ["day", "playerID", "BestOutOfMyselfAbsolutely", "BestOutOfMyselfSomewhat", "BestOutOfMyselfNotAtAll", "BestOutOfMyselfUnknown"]
  92. time_series_sigmoid_classification(100, df, 0, 30, x, y)
  93. if __name__ == '__main__':
  94. main()