| 
						
						
							
								
							
						
						
					 | 
				
				 | 
				
					@ -3,16 +3,26 @@ import pandas as pd | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					# read in CSV | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					df = pd.read_csv('cleaned/wellness.csv') | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					
 | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					# print out column uniques | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					print(df["Illness"].unique()) | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					
 | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					# make dictionary of unique values and their associated values | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					illness = {'No': 0, 'Slightly Off': 0.5, 'Yes': 1} | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					def vectorize_mult(column, dictionary, file=None): | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					    newCol = column + "Num" | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					    df[newCol] = df[column].map(dictionary) | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					    if file is not None: | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        df.to_csv('cleaned/{}.csv'.format(file)) | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					
 | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					# iterate through new column vectorize | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					df["IllnessNum"] = [illness[item] for item in df["Illness"]] | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					
 | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					df.to_csv('cleaned/wellness.csv') | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					vectorize_mult("USGMeasurement", {"No": 0, "Yes": 1}, "wellness") | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					
 | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					print(df["Illness"]) | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					print(df["IllnessNum"]) | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					""" | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					for i, value in df["TrainingReadiness"].iteritems(): | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					    if pd.notna(value): | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        value = value.split("%")[0] | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        value = float(value) * (1/100) | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        value = round(value, 2) | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        df.set_value(i, "TrainingReadinessNum", value) | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					
 | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					        print(value) | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					
 | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					
 | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					df.to_csv('cleaned/{}.csv'.format("wellness")) | 
				
			
			
		
	
		
			
				
					 | 
					 | 
				
				 | 
				
					""" |