import pandas as pd import numpy as np def vectorize_mult(column, dictionary, postfix, df, file=None): newCol = column + postfix df[newCol] = df[column].map(dictionary) if file is not None: df.to_csv('cleaned/{}.csv'.format(file)) csv = pd.read_csv("cleaned/notnormalized_with_continuousNan_rpe.csv") # vectorize_mult("Training", {"No": 0, "Yes": 1}, "", csv) # # mapping = {"Mobility/Recovery": 1, "Game": 0, "Skills": 0, "Conditioning": 0, # "Strength": 0, "Combat": 0, "Speed": 0, np.nan: 0} # vectorize_mult("SessionType", mapping, "Mobility/Recovery", csv) # mapping["Mobility/Recovery"] = 0 # mapping["Game"] = 1 # vectorize_mult("SessionType", mapping, "Game", csv) # mapping["Game"] = 0 # mapping["Skills"] = 1 # vectorize_mult("SessionType", mapping, "Skills", csv) # mapping["Skills"] = 0 # mapping["Conditioning"] = 1 # vectorize_mult("SessionType", mapping, "Conditioning", csv) # mapping["Conditioning"] = 0 # mapping["Strength"] = 1 # vectorize_mult("SessionType", mapping, "Strength", csv) # mapping["Strength"] = 0 # mapping["Combat"] = 1 # vectorize_mult("SessionType", mapping, "Combat", csv) # mapping["Combat"] = 0 # mapping["Speed"] = 1 # vectorize_mult("SessionType", mapping, "Speed", csv) # mapping["Speed"] = 0 # mapping[np.nan] = 1 # vectorize_mult("SessionType", mapping, "Unknown", csv) # mapping[np.nan] = 0 # # mapping = {"Not at all": 1, "Absolutely": 0, "Somewhat": 0, np.nan: 0} # vectorize_mult("BestOutOfMyself", mapping, "NotAtAll", csv) # mapping["Not at all"] = 0 # mapping["Absolutely"] = 1 # vectorize_mult("BestOutOfMyself", mapping, "Absolutely", csv) # mapping["Absolutely"] = 0 # mapping["Somewhat"] = 1 # vectorize_mult('BestOutOfMyself', mapping, "Somewhat", csv) # mapping["Somewhat"] = 0 # mapping[np.nan] = 1 # vectorize_mult('BestOutOfMyself', mapping, "Unknown", csv) # mapping[np.nan] = 0 # # csv.to_csv("cleaned/notnormalized_with_continuousNan_rpe.csv") for i in range(len(csv.dtypes)): type = csv.dtypes[i] if type != "object": colname = csv.columns[i] if csv[colname].hasnans: print(colname) csv[colname] = csv[colname].fillna(0) #print(csv.isnull().sum()) csv.to_csv("cleaned/notnormalized_with_0Nan_rpe.csv")