From d0d2c359d334436b45da6bbb562c3a3aa7c12166 Mon Sep 17 00:00:00 2001
From: Ryan Missel <rxm7244@rit.edu>
Date: Fri, 29 Mar 2019 21:44:20 -0400
Subject: [PATCH] add thing for perry and friends

---
 data_preparation/vectorization_ex.py | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/data_preparation/vectorization_ex.py b/data_preparation/vectorization_ex.py
index 331a70b..36583f8 100644
--- a/data_preparation/vectorization_ex.py
+++ b/data_preparation/vectorization_ex.py
@@ -3,16 +3,26 @@ import pandas as pd
 # read in CSV
 df = pd.read_csv('cleaned/wellness.csv')
 
-# print out column uniques
-print(df["Illness"].unique())
 
-# make dictionary of unique values and their associated values
-illness = {'No': 0, 'Slightly Off': 0.5, 'Yes': 1}
+def vectorize_mult(column, dictionary, file=None):
+    newCol = column + "Num"
+    df[newCol] = df[column].map(dictionary)
+    if file is not None:
+        df.to_csv('cleaned/{}.csv'.format(file))
 
-# iterate through new column vectorize
-df["IllnessNum"] = [illness[item] for item in df["Illness"]]
 
-df.to_csv('cleaned/wellness.csv')
+vectorize_mult("USGMeasurement", {"No": 0, "Yes": 1}, "wellness")
 
-print(df["Illness"])
-print(df["IllnessNum"])
+"""
+for i, value in df["TrainingReadiness"].iteritems():
+    if pd.notna(value):
+        value = value.split("%")[0]
+        value = float(value) * (1/100)
+        value = round(value, 2)
+        df.set_value(i, "TrainingReadinessNum", value)
+
+        print(value)
+
+
+df.to_csv('cleaned/{}.csv'.format("wellness"))
+"""
\ No newline at end of file