Browse Source

Add dirty_wellness_na to fill in NaN values

master
Ryan Missel 5 years ago
parent
commit
a0d704e1c9
3 changed files with 10037 additions and 11 deletions
  1. +5018
    -0
      data_preparation/cleaned/dirty_wellness_na.csv
  2. +5018
    -0
      data_preparation/dirty_wellness_na.csv
  3. +1
    -11
      data_preparation/vectorization_ex.py

+ 5018
- 0
data_preparation/cleaned/dirty_wellness_na.csv
File diff suppressed because it is too large
View File


+ 5018
- 0
data_preparation/dirty_wellness_na.csv
File diff suppressed because it is too large
View File


+ 1
- 11
data_preparation/vectorization_ex.py View File

@ -1,22 +1,12 @@
import pandas as pd import pandas as pd
# read in CSV # read in CSV
df = pd.read_csv('cleaned/dirty_wellness.csv')
df = pd.read_csv('cleaned/dirty_wellness_na.csv')
def vectorize_mult(column, dictionary, file=None): def vectorize_mult(column, dictionary, file=None):
"""
Handles vectorizing
:param column:
:param dictionary:
:param file:
:return:
"""
newCol = column + "Num" newCol = column + "Num"
df[newCol] = df[column].map(dictionary) df[newCol] = df[column].map(dictionary)
if file is not None: if file is not None:
df.to_csv('cleaned/{}.csv'.format(file)) df.to_csv('cleaned/{}.csv'.format(file))
vectorize_mult("USGMeasurement", {"No": 0, "Yes": 1}, "wellness")

Loading…
Cancel
Save