datafest competition 2019
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

218 lines
5.1 KiB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
  1. # Look at data
  2. library(tidyverse)
  3. library(DBI)
  4. library(RSQLite)
  5. gpsData <- read.csv("data/gps.csv")
  6. gpsDataTibble <- as_tibble(gpsData)
  7. #workingTibble <- head(gpsDataTibble, 500000)
  8. workingTibble <- gpsDataTibble
  9. playerIds <-unique(workingTibble$PlayerID)
  10. cat("Number of Players: ", length(playerIds), sep="")
  11. gameIds <- unique(workingTibble$GameID)
  12. cat("Number of Games: ", length(gameIds), sep="")
  13. playerIDMetrics <- c()
  14. gameIDMetrics <- c()
  15. averageSpeed <- c()
  16. accelDistance <- c()
  17. for(playerID in playerIds)
  18. {
  19. for(gameID in gameIds)
  20. {
  21. cat(playerID, gameID , '\n', sep=" ")
  22. speedTibble <- subset(workingTibble, GameID == gameID & PlayerID == playerID)
  23. # crunch average speed
  24. averageSpeed <- c(averageSpeed, mean(speedTibble$Speed, na.rm = 0))
  25. # average for accel value
  26. accelDistance <- c(accelDistance, mean(sqrt(speedTibble$AccelX^2 + speedTibble$AccelY^2 + speedTibble$AccelZ^ 2), na.rm = 0))
  27. # game and player id to vector
  28. playerIDMetrics <- c(playerIDMetrics, playerID)
  29. gameIDMetrics <- c(gameIDMetrics, gameID)
  30. }
  31. }
  32. plot(accelDistance, averageSpeed)
  33. compressedMetrics <- tibble(gameID = gameIDMetrics, playerID = playerIDMetrics, averageSpeed = averageSpeed, accelerationVector = accelDistance)
  34. length(compressedMetrics$averageSpeed)
  35. length(compressedMetrics$accelerationVector)
  36. write.csv(compressedMetrics, "data/speedData.csv")
  37. #putSQLiteHere <- "gpsData.sqlite" # could also be ":memory:"
  38. #mySQLiteDB <- dbConnect(RSQLite::SQLite(),putSQLiteHere)
  39. #dbWriteTable(mySQLiteDB, "gpsData", compressedMetrics, overwrite=TRUE)
  40. #dbDisconnect(mySQLiteDB)
  41. wellnessData <- read.csv("./data/wellness_na.csv")
  42. wellnessDataTibble <- as_tibble(wellnessData)
  43. #plot(wellnesPlayer1$Fatigue * wellnesPlayer1$Soreness * wellnesPlayer1$Irritability, wellnesPlayer1$SleepHours * wellnesPlayer1$SleepQuality)
  44. wellnessCleaned <- as_tibble(read.csv("./cleaned/dirty_wellness.csv"))
  45. wellnesPlayer1 <- subset(wellnessCleaned, PlayerID == 1)
  46. ggplot(data = wellnessCleaned) +
  47. theme(plot.title = element_text(hjust = 0.5)) +
  48. ggtitle("Hours of Sleep Box Plot") +
  49. geom_boxplot(na.rm = T, mapping = aes(y=SleepHours, group = PlayerID), outlier.colour = "red", outlier.shape = 1) +
  50. labs(group = "Player ID", y = "Hours of Sleep") +
  51. coord_flip() +
  52. theme_bw()
  53. ggplot(data = wellnessCleaned) +
  54. theme(plot.title = element_text(hjust = 0.5)) +
  55. ggtitle("Fatigue Box Plot") +
  56. geom_boxplot(na.rm = T, mapping = aes(y=Fatigue, group = PlayerID), outlier.colour = "red", outlier.shape = 1) +
  57. labs(group = "Player ID", y = "Fatigue Score") +
  58. coord_flip() +
  59. theme_bw()
  60. ggplot(data = wellnessCleaned) +
  61. theme(plot.title = element_text(hjust = 0.5)) +
  62. ggtitle("Sleep Quality Box Plot") +
  63. geom_boxplot(na.rm = T, mapping = aes(y=SleepQuality, group = PlayerID), outlier.colour = "red", outlier.shape = 1) +
  64. labs(group = "Player ID", y = "Sleep Quality") +
  65. coord_flip() +
  66. theme_bw()
  67. ggplot(data = wellnessCleaned) +
  68. theme(plot.title = element_text(hjust = 0.5)) +
  69. ggtitle("Training Readiness Box Plot") +
  70. geom_boxplot(na.rm = T, mapping = aes(y=TrainingReadinessNum, group = PlayerID), outlier.colour = "red", outlier.shape = 1) +
  71. labs(group = "Player ID", y = "Training Readiness") +
  72. coord_flip() +
  73. theme_bw()
  74. plot(density(wellnesPlayer1$SleepHours))
  75. max(wellnessCleaned$SleepHours, na.rm = T)
  76. min(wellnessCleaned$SleepHours, na.rm = T)
  77. playerIdsWellness <-unique(wellnessCleaned$PlayerID)
  78. cat("Number of Players: ", length(playerIdsWellness), sep="")
  79. rpeData <- read.csv("./data/rpe.csv")
  80. rpeDataTibble <- as_tibble(rpeData)
  81. gameData <- read.csv("data/games.csv")
  82. gameDataTibble <- as_tibble(gameData)
  83. par(mfrow = c(4, 5))
  84. playerIdsWellness <- sort(playerIdsWellness)
  85. for(playerID in playerIdsWellness)
  86. {
  87. if(!is.na(playerID) && playerID < 88)
  88. {
  89. #print(playerID)
  90. #welnessTibble <- c()
  91. welnessTibble <- subset(wellnessCleaned,PlayerID == playerID)
  92. #print(length(welnessTibble$SleepHours))
  93. plot(density(welnessTibble$SleepHours, kernel = "gaussian", bw=0.5), main = paste("Player ", playerID, sep=""), xlab="Hours of Sleep")
  94. #lines(density(welnessTibble$SleepHours))
  95. }
  96. }
  97. plot(density(wellnesPlayer1$SleepHours, kernel = "gaussian", bw=0.4), ylim=c(0,.7), xlab = "Hours of Sleep", main="Team's Sleep Distribution")
  98. for(playerID in playerIdsWellness)
  99. {
  100. if(!is.na(playerID) && playerID < 88)
  101. {
  102. #print(playerID)
  103. #welnessTibble <- c()
  104. welnessTibble <- subset(wellnessCleaned,PlayerID == playerID)
  105. lines(density(welnessTibble$SleepHours,kernel = "gaussian", bw=0.4))
  106. }
  107. }
  108. plot(density(wellnesPlayer1$Fatigue, kernel = "gaussian", bw=0.4), ylim=c(0,.7), xlab = "Self Reported Fatigue", main="Team's Fatigue Distribution")
  109. for(playerID in playerIdsWellness)
  110. {
  111. if(!is.na(playerID) && playerID < 88)
  112. {
  113. #print(playerID)
  114. #welnessTibble <- c()
  115. welnessTibble <- subset(wellnessCleaned,PlayerID == playerID)
  116. lines(density(welnessTibble$Fatigue,kernel = "gaussian", bw=0.4))
  117. }
  118. }
  119. head(gpsData)
  120. # Normalize Wellness data