| @ -1,12 +1,51 @@ | |||||
| import pandas as pd | import pandas as pd | ||||
| # read in CSV | |||||
| df = pd.read_csv('cleaned/dirty_wellness_na.csv') | |||||
| def vectorize_mult(column, dictionary, file=None): | |||||
| def vectorize_mult(df, column, dictionary): | |||||
| """ | |||||
| Changes all the categorical values into its respective | |||||
| number in the dictionary and then saves it in the DF | |||||
| :param df: dataframe | |||||
| :param column: column name | |||||
| :param dictionary: alterations to make | |||||
| """ | |||||
| newCol = column + "Num" | newCol = column + "Num" | ||||
| df[newCol] = df[column].map(dictionary) | df[newCol] = df[column].map(dictionary) | ||||
| if file is not None: | |||||
| df.to_csv('cleaned/{}.csv'.format(file)) | |||||
| class WellnessCSV: | |||||
| def __init__(self): | |||||
| self.file = "data/wellness.csv" | |||||
| self.end = "cleaned/notnormalized_with_0NaN_wellness.csv" | |||||
| def vectorize(self): | |||||
| df = pd.read_csv(self.file) | |||||
| # Vectorizing appropriate data | |||||
| vectorize_mult(df, "Pain", {"No": 0, "Yes": 1}) | |||||
| vectorize_mult(df, "Illness", {"No": 0, "Slightly Off": 0.5, "Yes": 1}) | |||||
| vectorize_mult(df, "Menstruation", {"No": 0, "Yes": 1}) | |||||
| vectorize_mult(df, "Nutrition", {"Poor": 0, "Okay": 0.5, "Excellent": 1}) | |||||
| vectorize_mult(df, "NutritionAdjustment", {"No": 0, "Yes": 1}) | |||||
| vectorize_mult(df, "USGMeasurement", {"No": 0, "Yes": 1}) | |||||
| readiness = [] | |||||
| for i, value in df["TrainingReadiness"].iteritems(): | |||||
| value = value.split("%")[0] | |||||
| value = int(value) * (1/100) | |||||
| readiness.append(value) | |||||
| df["TrainingReadinessNum"] = readiness | |||||
| # Filling in NaNs for appropriate layers where they won't make a statistical difference | |||||
| df["MenstruationNum"] = df["MenstruationNum"].fillna(0) | |||||
| df["USGMeasurementNum"] = df["USGMeasurementNum"].fillna(0) | |||||
| df["NutritionNum"] = df["MenstruationNum"].fillna(0) | |||||
| df["NutritionAdjustmentNum"] = df["NutritionAdjustmentNum"].fillna(0) | |||||
| # Saving the df to the "cleaned" CSV | |||||
| df.to_csv(self.end) | |||||
| cls = WellnessCSV() | |||||
| cls.vectorize() | |||||