datafest competition 2019
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

220 lines
6.7 KiB

  1. import tensorflow as tf
  2. import pandas as pd
  3. import numpy as np
  4. from sklearn.metrics import r2_score
  5. def r2_(y, pred):
  6. ybar = np.sum(y) / len(y)
  7. ssreg = np.sum((pred - ybar)**2)
  8. sstot = np.sum((y - ybar)**2)
  9. return ssreg/sstot
  10. def time_series_sigmoid_classification(dataset, k, n0, x_columns, y_columns):
  11. inp = dataset[x_columns]
  12. out = dataset[y_columns]
  13. col = "day"
  14. x = []
  15. y = []
  16. input_shape = 0
  17. output_shape = 0
  18. for player in out["playerID"].unique():
  19. XPlayer = inp[inp["playerID"] == player]
  20. YPlayer = out[out["playerID"] == player]
  21. for day in YPlayer[col][n0 - 1:]:
  22. prev = day - k
  23. xprev = XPlayer[XPlayer[col] == prev].drop(columns=[col, "playerID"]).to_numpy()
  24. if xprev.shape[0] != 1:
  25. continue
  26. else:
  27. xprev = xprev[0, :]
  28. yt = YPlayer[YPlayer[col] == day].drop(columns=[col, "playerID"]).to_numpy()[0, :]
  29. if input_shape == 0:
  30. input_shape = xprev.shape[0]
  31. else:
  32. if input_shape != xprev.shape[0]:
  33. print("INCONSISTENT INPUT DIMENSION")
  34. exit(2)
  35. if output_shape == 0:
  36. output_shape = yt.shape[0]
  37. else:
  38. if output_shape != yt.shape[0]:
  39. print("INCONSISTENT OUTPUT DIMENSION")
  40. exit(2)
  41. x.append(xprev)
  42. y.append(yt)
  43. x = np.array(x)
  44. y = np.array(y)
  45. model = tf.keras.Sequential([
  46. tf.keras.layers.Flatten(input_shape=[input_shape]),
  47. tf.keras.layers.Dense(output_shape, activation=tf.nn.softmax)
  48. ])
  49. model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy', 'categorical_accuracy'])
  50. model.fit(x, y, epochs=50)
  51. loss, accuracy = model.evaluate(x, y)
  52. print(loss, accuracy)
  53. return model.get_weights()
  54. def time_series_dnn_classification(dataset, k, n0, x_columns, y_columns):
  55. inp = dataset[x_columns]
  56. out = dataset[y_columns]
  57. col = "day"
  58. x = []
  59. y = []
  60. input_shape = 0
  61. output_shape = 0
  62. for player in out["playerID"].unique():
  63. XPlayer = inp[inp["playerID"] == player]
  64. YPlayer = out[out["playerID"] == player]
  65. for day in YPlayer[col][n0 - 1:]:
  66. prev = day - k
  67. xprev = XPlayer[XPlayer[col] == prev].drop(columns=[col, "playerID"]).to_numpy()
  68. if xprev.shape[0] != 1:
  69. continue
  70. else:
  71. xprev = xprev[0, :]
  72. yt = YPlayer[YPlayer[col] == day].drop(columns=[col, "playerID"]).to_numpy()[0, :]
  73. if input_shape == 0:
  74. input_shape = xprev.shape[0]
  75. else:
  76. if input_shape != xprev.shape[0]:
  77. print("INCONSISTENT INPUT DIMENSION")
  78. exit(2)
  79. if output_shape == 0:
  80. output_shape = yt.shape[0]
  81. else:
  82. if output_shape != yt.shape[0]:
  83. print("INCONSISTENT OUTPUT DIMENSION")
  84. exit(2)
  85. x.append(xprev)
  86. y.append(yt)
  87. x = np.array(x)
  88. y = np.array(y)
  89. model = tf.keras.Sequential([
  90. tf.keras.layers.Dense(32, input_dim=input_shape,activation=tf.nn.softmax),
  91. tf.keras.layers.Dense(output_shape, input_dim=32, activation=tf.nn.softmax)
  92. ])
  93. model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy', 'categorical_accuracy'])
  94. print(output_shape)
  95. model.fit(x, y, epochs=50)
  96. loss, accuracy = model.evaluate(x, y)
  97. print(x.shape)
  98. print(y.shape)
  99. print(loss, accuracy)
  100. return model.get_weights()
  101. def time_series_linear_regression(dataset, k, n0, x_columns, y_columns):
  102. inp = dataset[x_columns]
  103. out = dataset[y_columns]
  104. col = "day"
  105. x = []
  106. y = []
  107. input_shape = 0
  108. output_shape = 0
  109. for player in out["playerID"].unique():
  110. XPlayer = inp[inp["playerID"] == player]
  111. YPlayer = out[out["playerID"] == player]
  112. for day in YPlayer[col][n0 - 1:]:
  113. prev = day - k
  114. xprev = XPlayer[XPlayer[col] == prev].drop(columns=[col, "playerID"]).to_numpy()
  115. if xprev.shape[0] != 1:
  116. continue
  117. else:
  118. xprev = xprev[0, :]
  119. yt = YPlayer[YPlayer[col] == day].drop(columns=[col, "playerID"]).to_numpy()[0, :]
  120. if input_shape == 0:
  121. input_shape = xprev.shape[0]
  122. else:
  123. if input_shape != xprev.shape[0]:
  124. print("INCONSISTENT INPUT DIMENSION")
  125. exit(2)
  126. if output_shape == 0:
  127. output_shape = yt.shape[0]
  128. else:
  129. if output_shape != yt.shape[0]:
  130. print("INCONSISTENT OUTPUT DIMENSION")
  131. exit(2)
  132. x.append(xprev)
  133. y.append(yt)
  134. x = np.array(x)
  135. y = np.array(y)
  136. model = tf.keras.Sequential([
  137. tf.keras.layers.Flatten(input_shape=[input_shape]),
  138. tf.keras.layers.Dense(output_shape)
  139. ])
  140. model.compile(optimizer='adam', loss='mean_squared_error', metrics=[tf.keras.metrics.mean_squared_error])
  141. model.fit(x, y, epochs=50, verbose=2)
  142. pred = model.predict(x)
  143. r2 = r2_(y, pred)
  144. hard_pred = model.predict(x[0])
  145. hard_out = np.matmul(x[0], [0.03589787, -0.03472298, 0.24109702, -0.10143519]) - 0.890594
  146. print(hard_pred, hard_out, y[0])
  147. return r2, model.get_weights()
  148. def time_series_dnn_regressions(dataset, k, n0, x_columns, y_columns):
  149. inp = dataset[x_columns]
  150. out = dataset[y_columns]
  151. col = "day"
  152. x = []
  153. y = []
  154. input_shape = 0
  155. output_shape = 0
  156. for player in out["playerID"].unique():
  157. XPlayer = inp[inp["playerID"] == player]
  158. YPlayer = out[out["playerID"] == player]
  159. for day in YPlayer[col][n0 - 1:]:
  160. prev = day - k
  161. xprev = XPlayer[XPlayer[col] == prev].drop(columns=[col, "playerID"]).to_numpy()
  162. if xprev.shape[0] != 1:
  163. continue
  164. else:
  165. xprev = xprev[0, :]
  166. yt = YPlayer[YPlayer[col] == day].drop(columns=[col, "playerID"]).to_numpy()[0, :]
  167. if input_shape == 0:
  168. input_shape = xprev.shape[0]
  169. else:
  170. if input_shape != xprev.shape[0]:
  171. print("INCONSISTENT INPUT DIMENSION")
  172. exit(2)
  173. if output_shape == 0:
  174. output_shape = yt.shape[0]
  175. else:
  176. if output_shape != yt.shape[0]:
  177. print("INCONSISTENT OUTPUT DIMENSION")
  178. exit(2)
  179. x.append(xprev)
  180. y.append(yt)
  181. x = np.array(x)
  182. y = np.array(y)
  183. model = tf.keras.Sequential([
  184. tf.keras.layers.Flatten(input_shape=[input_shape]),
  185. tf.keras.layers.Dense(32, activation=tf.nn.softmax),
  186. tf.keras.layers.Dense(output_shape)
  187. ])
  188. model.compile(optimizer='adam', loss='mean_squared_error', metrics=[tf.keras.metrics.mean_squared_error])
  189. model.fit(x, y, epochs=100, verbose=2)
  190. loss, accuracy = model.evaluate(x, y)
  191. pred = model.predict(x)
  192. r2 = r2_(y, pred)
  193. return r2, model.get_weights()
  194. def main():
  195. filename = "personal.csv"
  196. df = pd.read_csv(filename)
  197. x = ["day", "playerID", "sleepHoursSliding", "sleepHours", "sleepQuality", "acuteChronicRatio"]
  198. y = ["day", "playerID", "fatigueNorm"]
  199. k = 0
  200. n0 = 30
  201. r2, weights = time_series_linear_regression(df, k, n0, x, y)
  202. print("r2")
  203. print(r2)
  204. print("weights")
  205. print(weights)
  206. if __name__ == "__main__":
  207. main()