PerryXDeng 5 years ago
parent
commit
2a0d09adf8
2 changed files with 96 additions and 9 deletions
  1. +77
    -0
      data_preparation/dataPrep.R
  2. +19
    -9
      data_preparation/vectorization_ex.py

+ 77
- 0
data_preparation/dataPrep.R View File

@ -0,0 +1,77 @@
# Look at data
library(tidyverse)
gpsData <- read.csv("data/gps.csv")
gpsDataTibble <- as_tibble(gpsData)
workingTibble <- head(gpsDataTibble, 100000)
playerIds <-unique(workingTibble$PlayerID)
gameIds <- unique(workingTibble$GameID)
playerIDMetrics <- c()
gameIDMetrics <- c()
averageSpeed <- c()
accelDistance <- c()
for(playerID in playerIds)
{
for(gameID in gameIds)
{
cat(playerID, gameID , '\n', sep=" ")
speedTibble <- subset(workingTibble, GameID == gameID & PlayerID == playerID)
# crunch average speed
averageSpeed <- c(averageSpeed, mean(speedTibble$Speed))
# average for accel value
accelDistance <- c(accelDistance, mean(sqrt(speedTibble$AccelX^2 + speedTibble$AccelY^2 + speedTibble$AccelZ^ 2)))
#xAccel <- c(xAccel, mean(speedTibble$AccelX))
#yAccel <- c(yAccel, mean(speedTibble$AccelY))
#zAccel <- c(zAccel, mean(speedTibble$AccelZ))
# game and player id to vector
playerIDMetrics <- c(playerIDMetrics, playerID)
gameIDMetrics <- c(gameIDMetrics, gameID)
}
}
plot(accelDistance, averageSpeed)
rpeData <- read.csv("./data/rpe.csv")
rpeDataTibble <- as_tibble(rpeData)
gameData <- read.csv("./data/game.csv")
gameDataTibble <- as_tibble(gameData)
wellnessData <- read.csv("./data/wellness.csv")
wellnessDataTibble <- as_tibble(wellnessData)
head(gpsData)

+ 19
- 9
data_preparation/vectorization_ex.py View File

@ -3,16 +3,26 @@ import pandas as pd
# read in CSV
df = pd.read_csv('cleaned/wellness.csv')
# print out column uniques
print(df["Illness"].unique())
# make dictionary of unique values and their associated values
illness = {'No': 0, 'Slightly Off': 0.5, 'Yes': 1}
def vectorize_mult(column, dictionary, file=None):
newCol = column + "Num"
df[newCol] = df[column].map(dictionary)
if file is not None:
df.to_csv('cleaned/{}.csv'.format(file))
# iterate through new column vectorize
df["IllnessNum"] = [illness[item] for item in df["Illness"]]
df.to_csv('cleaned/wellness.csv')
vectorize_mult("USGMeasurement", {"No": 0, "Yes": 1}, "wellness")
print(df["Illness"])
print(df["IllnessNum"])
"""
for i, value in df["TrainingReadiness"].iteritems():
if pd.notna(value):
value = value.split("%")[0]
value = float(value) * (1/100)
value = round(value, 2)
df.set_value(i, "TrainingReadinessNum", value)
print(value)
df.to_csv('cleaned/{}.csv'.format("wellness"))
"""

Loading…
Cancel
Save