|
@ -3,16 +3,26 @@ import pandas as pd |
|
|
# read in CSV |
|
|
# read in CSV |
|
|
df = pd.read_csv('cleaned/wellness.csv') |
|
|
df = pd.read_csv('cleaned/wellness.csv') |
|
|
|
|
|
|
|
|
# print out column uniques |
|
|
|
|
|
print(df["Illness"].unique()) |
|
|
|
|
|
|
|
|
|
|
|
# make dictionary of unique values and their associated values |
|
|
|
|
|
illness = {'No': 0, 'Slightly Off': 0.5, 'Yes': 1} |
|
|
|
|
|
|
|
|
def vectorize_mult(column, dictionary, file=None): |
|
|
|
|
|
newCol = column + "Num" |
|
|
|
|
|
df[newCol] = df[column].map(dictionary) |
|
|
|
|
|
if file is not None: |
|
|
|
|
|
df.to_csv('cleaned/{}.csv'.format(file)) |
|
|
|
|
|
|
|
|
# iterate through new column vectorize |
|
|
|
|
|
df["IllnessNum"] = [illness[item] for item in df["Illness"]] |
|
|
|
|
|
|
|
|
|
|
|
df.to_csv('cleaned/wellness.csv') |
|
|
|
|
|
|
|
|
vectorize_mult("USGMeasurement", {"No": 0, "Yes": 1}, "wellness") |
|
|
|
|
|
|
|
|
print(df["Illness"]) |
|
|
|
|
|
print(df["IllnessNum"]) |
|
|
|
|
|
|
|
|
""" |
|
|
|
|
|
for i, value in df["TrainingReadiness"].iteritems(): |
|
|
|
|
|
if pd.notna(value): |
|
|
|
|
|
value = value.split("%")[0] |
|
|
|
|
|
value = float(value) * (1/100) |
|
|
|
|
|
value = round(value, 2) |
|
|
|
|
|
df.set_value(i, "TrainingReadinessNum", value) |
|
|
|
|
|
|
|
|
|
|
|
print(value) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
df.to_csv('cleaned/{}.csv'.format("wellness")) |
|
|
|
|
|
""" |