Browse Source

Normalized the users data relative to their answers to remove some bias.

master
Jeffery Russell 5 years ago
parent
commit
7ace789b9f
4 changed files with 5213 additions and 15 deletions
  1. +5012
    -0
      data_preparation/cleaned/normalizedWellness.csv
  2. +87
    -15
      data_preparation/dataPrep.R
  3. +97
    -0
      data_preparation/normalizeData.R
  4. +17
    -0
      data_preparation/readData.R

+ 5012
- 0
data_preparation/cleaned/normalizedWellness.csv
File diff suppressed because it is too large
View File


+ 87
- 15
data_preparation/dataPrep.R View File

@ -77,27 +77,17 @@ write.csv(compressedMetrics, "data/speedData.csv")
rpeData <- read.csv("./data/rpe.csv")
rpeDataTibble <- as_tibble(rpeData)
gameData <- read.csv("data/games.csv")
gameDataTibble <- as_tibble(gameData)
wellnessData <- read.csv("./data/wellness_na.csv") wellnessData <- read.csv("./data/wellness_na.csv")
wellnessDataTibble <- as_tibble(wellnessData) wellnessDataTibble <- as_tibble(wellnessData)
wellnesPlayer1 <- subset(wellnessDataTibble, PlayerID == 1)
plot(wellnesPlayer1$Fatigue * wellnesPlayer1$Soreness * wellnesPlayer1$Irritability, wellnesPlayer1$SleepHours * wellnesPlayer1$SleepQuality)
#plot(wellnesPlayer1$Fatigue * wellnesPlayer1$Soreness * wellnesPlayer1$Irritability, wellnesPlayer1$SleepHours * wellnesPlayer1$SleepQuality)
wellnessCleaned <- as_tibble(read.csv("./cleaned/dirty_wellness.csv")) wellnessCleaned <- as_tibble(read.csv("./cleaned/dirty_wellness.csv"))
wellnesPlayer1 <- subset(wellnessCleaned, PlayerID == 1)
ggplot(data = wellnessCleaned) + ggplot(data = wellnessCleaned) +
@ -111,7 +101,7 @@ ggplot(data = wellnessCleaned) +
ggplot(data = wellnessCleaned) + ggplot(data = wellnessCleaned) +
theme(plot.title = element_text(hjust = 0.5)) + theme(plot.title = element_text(hjust = 0.5)) +
ggtitle("Hours of Sleep Box Plot") +
ggtitle("Fatigue Box Plot") +
geom_boxplot(na.rm = T, mapping = aes(y=Fatigue, group = PlayerID), outlier.colour = "red", outlier.shape = 1) + geom_boxplot(na.rm = T, mapping = aes(y=Fatigue, group = PlayerID), outlier.colour = "red", outlier.shape = 1) +
labs(group = "Player ID", y = "Fatigue Score") + labs(group = "Player ID", y = "Fatigue Score") +
coord_flip() + coord_flip() +
@ -136,6 +126,8 @@ ggplot(data = wellnessCleaned) +
theme_bw() theme_bw()
plot(density(wellnesPlayer1$SleepHours))
max(wellnessCleaned$SleepHours, na.rm = T) max(wellnessCleaned$SleepHours, na.rm = T)
min(wellnessCleaned$SleepHours, na.rm = T) min(wellnessCleaned$SleepHours, na.rm = T)
@ -143,4 +135,84 @@ min(wellnessCleaned$SleepHours, na.rm = T)
playerIdsWellness <-unique(wellnessCleaned$PlayerID) playerIdsWellness <-unique(wellnessCleaned$PlayerID)
cat("Number of Players: ", length(playerIdsWellness), sep="") cat("Number of Players: ", length(playerIdsWellness), sep="")
head(gpsData)
rpeData <- read.csv("./data/rpe.csv")
rpeDataTibble <- as_tibble(rpeData)
gameData <- read.csv("data/games.csv")
gameDataTibble <- as_tibble(gameData)
par(mfrow = c(4, 5))
playerIdsWellness <- sort(playerIdsWellness)
for(playerID in playerIdsWellness)
{
if(!is.na(playerID) && playerID < 88)
{
#print(playerID)
#welnessTibble <- c()
welnessTibble <- subset(wellnessCleaned,PlayerID == playerID)
#print(length(welnessTibble$SleepHours))
plot(density(welnessTibble$SleepHours, kernel = "gaussian", bw=0.5), main = paste("Player ", playerID, sep=""), xlab="Hours of Sleep")
#lines(density(welnessTibble$SleepHours))
}
}
plot(density(wellnesPlayer1$SleepHours, kernel = "gaussian", bw=0.4), ylim=c(0,.7), xlab = "Hours of Sleep", main="Team's Sleep Distribution")
for(playerID in playerIdsWellness)
{
if(!is.na(playerID) && playerID < 88)
{
#print(playerID)
#welnessTibble <- c()
welnessTibble <- subset(wellnessCleaned,PlayerID == playerID)
lines(density(welnessTibble$SleepHours,kernel = "gaussian", bw=0.4))
}
}
plot(density(wellnesPlayer1$Fatigue, kernel = "gaussian", bw=0.4), ylim=c(0,.7), xlab = "Self Reported Fatigue", main="Team's Fatigue Distribution")
for(playerID in playerIdsWellness)
{
if(!is.na(playerID) && playerID < 88)
{
#print(playerID)
#welnessTibble <- c()
welnessTibble <- subset(wellnessCleaned,PlayerID == playerID)
lines(density(welnessTibble$Fatigue,kernel = "gaussian", bw=0.4))
}
}
head(gpsData)
# Normalize Wellness data

+ 97
- 0
data_preparation/normalizeData.R View File

@ -0,0 +1,97 @@
source("readData.R")
library(tidyverse)
library(bestNormalize)
# File to normalize the user inputted data in
# the wellness
wellnessData <- readWellnessData()
playerIds <-unique(wellnessData$PlayerID)
cat("Number of Players: ", length(playerIds), sep="")
normPlayerIDs <- c()
normDate <- c()
normFatigue <- c()
normSoreness <- c()
normDesire <- c()
normIrritability <- c()
normSleepHours <- c()
normSleepQuality <- c()
for(id in playerIds)
{
wellnessDataT <- subset(wellnessData, PlayerID == id)
if(length(wellnessDataT$Fatigue) > 0)
{
print(id)
userTibble <- subset(wellnessData, PlayerID == id)
print(length(userTibble$Fatigue))
#fatigueNormalized <- bestNormalize(userTibble$Fatigue)
fatigueNormalized <- bestNormalize(userTibble$Fatigue, standardize = TRUE)
fatNorm <-predict(fatigueNormalized)
print(fatigueNormalized)
sleepNormalized <- bestNormalize(userTibble$SleepHours, standardize = TRUE)
sleepNorm <-predict(fatigueNormalized)
soreness <- bestNormalize(userTibble$Soreness, standardize = TRUE)
sorenessNorm <- predict(soreness)
desire <- bestNormalize(userTibble$Desire, standardize = TRUE)
desireNorm <- predict(desire)
irritability <- bestNormalize(userTibble$Irritability, standardize = TRUE)
irritabilityNorm <- predict(irritability)
sleepHours <- bestNormalize(userTibble$SleepHours, standardize = TRUE)
sleepHoursNorm <- predict(sleepHours)
sleepQuality <- bestNormalize(userTibble$SleepQuality, standardize = TRUE)
sleepQualityNorm <- predict(sleepQuality)
normPlayerIDs <- c(normPlayerIDs, userTibble$PlayerID)
normDate <- c(normDate, userTibble$TimeSinceAugFirst)
normSoreness <- c(normSoreness, sorenessNorm)
normFatigue <- c(normFatigue, fatNorm)
normDesire <- c(normDesire, desireNorm)
normIrritability <- c(normIrritability, irritabilityNorm)
normSleepHours <- c(normSleepHours, sleepHoursNorm)
normSleepQuality <- c(normSleepQuality, sleepQualityNorm)
#plot(density(userTibble$SleepHours))
#plot(density(sleepNorm))
}
}
normalWellnessData <- tibble(date = normDate, playerID = normPlayerIDs, normSoreness = normSoreness,
normFatigue = normFatigue, normDesire = normDesire, normIrritability = normIrritability,
normSleepHours = normSleepHours, normSleepQuality = normSleepQuality)
write.csv(normalWellnessData, "cleaned/normalizedWellness.csv")
plot()
plot(normDesire, normSoreness)
print(fagigueNormalized)

+ 17
- 0
data_preparation/readData.R View File

@ -0,0 +1,17 @@
library(tidyverse)
readGPSMetrics <- function()
{
as_tibble(read.csv("./data/speedData.csv"))
}
readWellnessData <- function()
{
as_tibble(read.csv("./cleaned/time_series_notnormalized_with_0NaN_wellness.csv"))
}
readRPEData <- function()
{
as_tibble(read.csv("./cleaned/notnormalized_with_0Nan_rpe.csv"))
}

Loading…
Cancel
Save