From 6d25e735c5d520767aa31d35bf5ee4c4f08b6359 Mon Sep 17 00:00:00 2001
From: jrtechs <jxr8142@rit.edu>
Date: Fri, 29 Mar 2019 20:12:43 -0400
Subject: [PATCH 1/2] Added basic data input for gps data to git.

---
 data_preparation/dataPrep.R | 77 +++++++++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)
 create mode 100644 data_preparation/dataPrep.R

diff --git a/data_preparation/dataPrep.R b/data_preparation/dataPrep.R
new file mode 100644
index 0000000..a27a239
--- /dev/null
+++ b/data_preparation/dataPrep.R
@@ -0,0 +1,77 @@
+# Look at data
+
+
+
+library(tidyverse)
+
+gpsData <- read.csv("data/gps.csv")
+
+
+gpsDataTibble <- as_tibble(gpsData)
+
+
+
+
+workingTibble <- head(gpsDataTibble, 100000)
+
+
+playerIds <-unique(workingTibble$PlayerID)
+
+
+gameIds <- unique(workingTibble$GameID)
+
+
+playerIDMetrics <- c()
+gameIDMetrics <- c()
+averageSpeed <- c()
+
+accelDistance <- c()
+
+
+for(playerID in playerIds)
+{
+  for(gameID in gameIds)
+  {
+    cat(playerID, gameID , '\n', sep=" ")
+    speedTibble <- subset(workingTibble, GameID == gameID & PlayerID == playerID)
+    
+    
+    # crunch average speed    
+    averageSpeed <- c(averageSpeed, mean(speedTibble$Speed))
+    
+    # average for accel value  
+    
+    accelDistance <- c(accelDistance, mean(sqrt(speedTibble$AccelX^2 + speedTibble$AccelY^2 + speedTibble$AccelZ^ 2)))
+    
+    
+    #xAccel <- c(xAccel, mean(speedTibble$AccelX))
+    #yAccel <- c(yAccel, mean(speedTibble$AccelY))
+    #zAccel <- c(zAccel, mean(speedTibble$AccelZ))
+    
+  
+    # game and player id to vector
+    playerIDMetrics <- c(playerIDMetrics, playerID)
+    gameIDMetrics <- c(gameIDMetrics, gameID)
+  }
+}
+
+
+plot(accelDistance, averageSpeed)
+
+
+
+
+
+
+rpeData <- read.csv("./data/rpe.csv")
+rpeDataTibble <- as_tibble(rpeData)
+
+
+gameData <- read.csv("./data/game.csv")
+gameDataTibble <- as_tibble(gameData)
+
+
+wellnessData <- read.csv("./data/wellness.csv")
+wellnessDataTibble <- as_tibble(wellnessData)
+
+head(gpsData)
\ No newline at end of file

From d0d2c359d334436b45da6bbb562c3a3aa7c12166 Mon Sep 17 00:00:00 2001
From: Ryan Missel <rxm7244@rit.edu>
Date: Fri, 29 Mar 2019 21:44:20 -0400
Subject: [PATCH 2/2] add thing for perry and friends

---
 data_preparation/vectorization_ex.py | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/data_preparation/vectorization_ex.py b/data_preparation/vectorization_ex.py
index 331a70b..36583f8 100644
--- a/data_preparation/vectorization_ex.py
+++ b/data_preparation/vectorization_ex.py
@@ -3,16 +3,26 @@ import pandas as pd
 # read in CSV
 df = pd.read_csv('cleaned/wellness.csv')
 
-# print out column uniques
-print(df["Illness"].unique())
 
-# make dictionary of unique values and their associated values
-illness = {'No': 0, 'Slightly Off': 0.5, 'Yes': 1}
+def vectorize_mult(column, dictionary, file=None):
+    newCol = column + "Num"
+    df[newCol] = df[column].map(dictionary)
+    if file is not None:
+        df.to_csv('cleaned/{}.csv'.format(file))
 
-# iterate through new column vectorize
-df["IllnessNum"] = [illness[item] for item in df["Illness"]]
 
-df.to_csv('cleaned/wellness.csv')
+vectorize_mult("USGMeasurement", {"No": 0, "Yes": 1}, "wellness")
 
-print(df["Illness"])
-print(df["IllnessNum"])
+"""
+for i, value in df["TrainingReadiness"].iteritems():
+    if pd.notna(value):
+        value = value.split("%")[0]
+        value = float(value) * (1/100)
+        value = round(value, 2)
+        df.set_value(i, "TrainingReadinessNum", value)
+
+        print(value)
+
+
+df.to_csv('cleaned/{}.csv'.format("wellness"))
+"""
\ No newline at end of file