datafest competition 2019
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

188 lines
5.8 KiB

5 years ago
  1. import tensorflow as tf
  2. import pandas as pd
  3. import numpy as np
  4. def time_series_sigmoid_classification(X, Y, k, n0, x_columns, y_columns):
  5. inp = X[x_columns]
  6. out = Y[y_columns]
  7. col = "day"
  8. x = []
  9. y = []
  10. input_shape = 0
  11. output_shape = 0
  12. for player in Y["playerID"].unique():
  13. XPlayer = inp[inp["playerID"] == player]
  14. YPlayer = out[out["playerID"] == player]
  15. for day in YPlayer[col][n0 - 1:]:
  16. prev = day - k
  17. xprev = XPlayer[XPlayer[col] == prev].drop(columns=[col]).to_numpy()
  18. if xprev.shape[0] != 1:
  19. continue
  20. else:
  21. xprev = xprev[0, :]
  22. yt = YPlayer[YPlayer[col] == day].drop(columns=[col]).to_numpy()[0, :]
  23. if input_shape == 0:
  24. input_shape = xprev.shape[0]
  25. else:
  26. if input_shape != xprev.shape[0]:
  27. print("INCONSISTENT INPUT DIMENSION")
  28. exit(2)
  29. if input_shape == 0:
  30. output_shape = yt.shape[0]
  31. else:
  32. if output_shape != yt.shape[0]:
  33. print("INCONSISTENT OUTPUT DIMENSION")
  34. exit(2)
  35. x.append(xprev)
  36. y.append(yt)
  37. x = np.array(x)
  38. y = np.array(y)
  39. model = tf.keras.Sequential([
  40. tf.keras.layers.Flatten(input_shape=input_shape),
  41. tf.keras.layers.Dense(output_shape, activation=tf.nn.softmax)
  42. ])
  43. model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy', 'categorical_accuracy'])
  44. model.fit(x, y, epochs=100)
  45. loss, accuracy = model.evaluate(x, y)
  46. print(loss, accuracy)
  47. return model.get_weights()
  48. def time_series_dnn_classification(X, Y, k, n0, x_columns, y_columns):
  49. inp = X[x_columns]
  50. out = Y[y_columns]
  51. col = "day"
  52. x = []
  53. y = []
  54. input_shape = 0
  55. output_shape = 0
  56. for player in Y["playerID"].unique():
  57. XPlayer = inp[inp["playerID"] == player]
  58. YPlayer = out[out["playerID"] == player]
  59. for day in YPlayer[col][n0 - 1:]:
  60. prev = day - k
  61. xprev = XPlayer[XPlayer[col] == prev].drop(columns=[col]).to_numpy()
  62. if xprev.shape[0] != 1:
  63. continue
  64. else:
  65. xprev = xprev[0, :]
  66. yt = YPlayer[YPlayer[col] == day].drop(columns=[col]).to_numpy()[0, :]
  67. if input_shape == 0:
  68. input_shape = xprev.shape[0]
  69. else:
  70. if input_shape != xprev.shape[0]:
  71. print("INCONSISTENT INPUT DIMENSION")
  72. exit(2)
  73. if input_shape == 0:
  74. output_shape = yt.shape[0]
  75. else:
  76. if output_shape != yt.shape[0]:
  77. print("INCONSISTENT OUTPUT DIMENSION")
  78. exit(2)
  79. x.append(xprev)
  80. y.append(yt)
  81. x = np.array(x)
  82. y = np.array(y)
  83. model = tf.keras.Sequential([
  84. tf.keras.layers.Flatten(input_shape=input_shape),
  85. tf.keras.layers.Dense(32, activation=tf.nn.softmax),
  86. tf.keras.layers.Dense(output_shape, activation=tf.nn.softmax)
  87. ])
  88. model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy', 'categorical_accuracy'])
  89. model.fit(x, y, epochs=100)
  90. loss, accuracy = model.evaluate(x, y)
  91. print(loss, accuracy)
  92. return model.get_weights()
  93. def time_series_linear_regression(X, Y, k, n0, x_columns, y_columns):
  94. inp = X[x_columns]
  95. out = Y[y_columns]
  96. col = "day"
  97. x = []
  98. y = []
  99. input_shape = 0
  100. output_shape = 0
  101. for player in Y["playerID"].unique():
  102. XPlayer = inp[inp["playerID"] == player]
  103. YPlayer = out[out["playerID"] == player]
  104. for day in YPlayer[col][n0 - 1:]:
  105. prev = day - k
  106. xprev = XPlayer[XPlayer[col] == prev].drop(columns=[col]).to_numpy()
  107. if xprev.shape[0] != 1:
  108. continue
  109. else:
  110. xprev = xprev[0, :]
  111. yt = YPlayer[YPlayer[col] == day].drop(columns=[col]).to_numpy()[0, :]
  112. if input_shape == 0:
  113. input_shape = xprev.shape[0]
  114. else:
  115. if input_shape != xprev.shape[0]:
  116. print("INCONSISTENT INPUT DIMENSION")
  117. exit(2)
  118. if input_shape == 0:
  119. output_shape = yt.shape[0]
  120. else:
  121. if output_shape != yt.shape[0]:
  122. print("INCONSISTENT OUTPUT DIMENSION")
  123. exit(2)
  124. x.append(xprev)
  125. y.append(yt)
  126. x = np.array(x)
  127. y = np.array(y)
  128. model = tf.keras.Sequential([
  129. tf.keras.layers.Flatten(input_shape=input_shape),
  130. tf.keras.layers.Dense(output_shape)
  131. ])
  132. model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy', 'categorical_accuracy'])
  133. model.fit(x, y, epochs=100)
  134. loss, accuracy = model.evaluate(x, y)
  135. print(loss, accuracy)
  136. return model.get_weights()
  137. def time_series_dnn_regressions(X, Y, k, n0, x_columns, y_columns):
  138. inp = X[x_columns]
  139. out = Y[y_columns]
  140. col = "day"
  141. x = []
  142. y = []
  143. input_shape = 0
  144. output_shape = 0
  145. for player in Y["playerID"].unique():
  146. XPlayer = inp[inp["playerID"] == player]
  147. YPlayer = out[out["playerID"] == player]
  148. for day in YPlayer[col][n0 - 1:]:
  149. prev = day - k
  150. xprev = XPlayer[XPlayer[col] == prev].drop(columns=[col]).to_numpy()
  151. if xprev.shape[0] != 1:
  152. continue
  153. else:
  154. xprev = xprev[0, :]
  155. yt = YPlayer[YPlayer[col] == day].drop(columns=[col]).to_numpy()[0, :]
  156. if input_shape == 0:
  157. input_shape = xprev.shape[0]
  158. else:
  159. if input_shape != xprev.shape[0]:
  160. print("INCONSISTENT INPUT DIMENSION")
  161. exit(2)
  162. if input_shape == 0:
  163. output_shape = yt.shape[0]
  164. else:
  165. if output_shape != yt.shape[0]:
  166. print("INCONSISTENT OUTPUT DIMENSION")
  167. exit(2)
  168. x.append(xprev)
  169. y.append(yt)
  170. x = np.array(x)
  171. y = np.array(y)
  172. model = tf.keras.Sequential([
  173. tf.keras.layers.Flatten(input_shape=input_shape),
  174. tf.keras.layers.Dense(32, activation=tf.nn.softmax),
  175. tf.keras.layers.Dense(output_shape)
  176. ])
  177. model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy', 'categorical_accuracy'])
  178. model.fit(x, y, epochs=100)
  179. loss, accuracy = model.evaluate(x, y)
  180. print(loss, accuracy)
  181. return model.get_weights()