From d0d2c359d334436b45da6bbb562c3a3aa7c12166 Mon Sep 17 00:00:00 2001 From: Ryan Missel Date: Fri, 29 Mar 2019 21:44:20 -0400 Subject: [PATCH] add thing for perry and friends --- data_preparation/vectorization_ex.py | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/data_preparation/vectorization_ex.py b/data_preparation/vectorization_ex.py index 331a70b..36583f8 100644 --- a/data_preparation/vectorization_ex.py +++ b/data_preparation/vectorization_ex.py @@ -3,16 +3,26 @@ import pandas as pd # read in CSV df = pd.read_csv('cleaned/wellness.csv') -# print out column uniques -print(df["Illness"].unique()) -# make dictionary of unique values and their associated values -illness = {'No': 0, 'Slightly Off': 0.5, 'Yes': 1} +def vectorize_mult(column, dictionary, file=None): + newCol = column + "Num" + df[newCol] = df[column].map(dictionary) + if file is not None: + df.to_csv('cleaned/{}.csv'.format(file)) -# iterate through new column vectorize -df["IllnessNum"] = [illness[item] for item in df["Illness"]] -df.to_csv('cleaned/wellness.csv') +vectorize_mult("USGMeasurement", {"No": 0, "Yes": 1}, "wellness") -print(df["Illness"]) -print(df["IllnessNum"]) +""" +for i, value in df["TrainingReadiness"].iteritems(): + if pd.notna(value): + value = value.split("%")[0] + value = float(value) * (1/100) + value = round(value, 2) + df.set_value(i, "TrainingReadinessNum", value) + + print(value) + + +df.to_csv('cleaned/{}.csv'.format("wellness")) +""" \ No newline at end of file