From 6d25e735c5d520767aa31d35bf5ee4c4f08b6359 Mon Sep 17 00:00:00 2001 From: jrtechs Date: Fri, 29 Mar 2019 20:12:43 -0400 Subject: [PATCH 1/2] Added basic data input for gps data to git. --- data_preparation/dataPrep.R | 77 +++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 data_preparation/dataPrep.R diff --git a/data_preparation/dataPrep.R b/data_preparation/dataPrep.R new file mode 100644 index 0000000..a27a239 --- /dev/null +++ b/data_preparation/dataPrep.R @@ -0,0 +1,77 @@ +# Look at data + + + +library(tidyverse) + +gpsData <- read.csv("data/gps.csv") + + +gpsDataTibble <- as_tibble(gpsData) + + + + +workingTibble <- head(gpsDataTibble, 100000) + + +playerIds <-unique(workingTibble$PlayerID) + + +gameIds <- unique(workingTibble$GameID) + + +playerIDMetrics <- c() +gameIDMetrics <- c() +averageSpeed <- c() + +accelDistance <- c() + + +for(playerID in playerIds) +{ + for(gameID in gameIds) + { + cat(playerID, gameID , '\n', sep=" ") + speedTibble <- subset(workingTibble, GameID == gameID & PlayerID == playerID) + + + # crunch average speed + averageSpeed <- c(averageSpeed, mean(speedTibble$Speed)) + + # average for accel value + + accelDistance <- c(accelDistance, mean(sqrt(speedTibble$AccelX^2 + speedTibble$AccelY^2 + speedTibble$AccelZ^ 2))) + + + #xAccel <- c(xAccel, mean(speedTibble$AccelX)) + #yAccel <- c(yAccel, mean(speedTibble$AccelY)) + #zAccel <- c(zAccel, mean(speedTibble$AccelZ)) + + + # game and player id to vector + playerIDMetrics <- c(playerIDMetrics, playerID) + gameIDMetrics <- c(gameIDMetrics, gameID) + } +} + + +plot(accelDistance, averageSpeed) + + + + + + +rpeData <- read.csv("./data/rpe.csv") +rpeDataTibble <- as_tibble(rpeData) + + +gameData <- read.csv("./data/game.csv") +gameDataTibble <- as_tibble(gameData) + + +wellnessData <- read.csv("./data/wellness.csv") +wellnessDataTibble <- as_tibble(wellnessData) + +head(gpsData) \ No newline at end of file From d0d2c359d334436b45da6bbb562c3a3aa7c12166 Mon Sep 17 00:00:00 2001 From: Ryan Missel Date: Fri, 29 Mar 2019 21:44:20 -0400 Subject: [PATCH 2/2] add thing for perry and friends --- data_preparation/vectorization_ex.py | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/data_preparation/vectorization_ex.py b/data_preparation/vectorization_ex.py index 331a70b..36583f8 100644 --- a/data_preparation/vectorization_ex.py +++ b/data_preparation/vectorization_ex.py @@ -3,16 +3,26 @@ import pandas as pd # read in CSV df = pd.read_csv('cleaned/wellness.csv') -# print out column uniques -print(df["Illness"].unique()) -# make dictionary of unique values and their associated values -illness = {'No': 0, 'Slightly Off': 0.5, 'Yes': 1} +def vectorize_mult(column, dictionary, file=None): + newCol = column + "Num" + df[newCol] = df[column].map(dictionary) + if file is not None: + df.to_csv('cleaned/{}.csv'.format(file)) -# iterate through new column vectorize -df["IllnessNum"] = [illness[item] for item in df["Illness"]] -df.to_csv('cleaned/wellness.csv') +vectorize_mult("USGMeasurement", {"No": 0, "Yes": 1}, "wellness") -print(df["Illness"]) -print(df["IllnessNum"]) +""" +for i, value in df["TrainingReadiness"].iteritems(): + if pd.notna(value): + value = value.split("%")[0] + value = float(value) * (1/100) + value = round(value, 2) + df.set_value(i, "TrainingReadinessNum", value) + + print(value) + + +df.to_csv('cleaned/{}.csv'.format("wellness")) +""" \ No newline at end of file