Browse Source

Update vectorization script, added dirty_wellness csv

master
Ryan Missel 5 years ago
parent
commit
fd58bd0ab7
3 changed files with 5030 additions and 15 deletions
  1. +3
    -0
      .gitignore
  2. +5018
    -0
      data_preparation/cleaned/dirty_wellness.csv
  3. +9
    -15
      data_preparation/vectorization_ex.py

+ 3
- 0
.gitignore View File

@ -133,3 +133,6 @@ rsconnect/
# datafest
data_preparation/data/*.csv
data_preparation/cleaned/games.csv
data_preparation/cleaned/gps.csv
data_preparation/cleaned/rpe.csv

+ 5018
- 0
data_preparation/cleaned/dirty_wellness.csv
File diff suppressed because it is too large
View File


+ 9
- 15
data_preparation/vectorization_ex.py View File

@ -1,28 +1,22 @@
import pandas as pd
# read in CSV
df = pd.read_csv('cleaned/wellness.csv')
df = pd.read_csv('cleaned/dirty_wellness.csv')
def vectorize_mult(column, dictionary, file=None):
"""
Handles vectorizing
:param column:
:param dictionary:
:param file:
:return:
"""
newCol = column + "Num"
df[newCol] = df[column].map(dictionary)
if file is not None:
df.to_csv('cleaned/{}.csv'.format(file))
vectorize_mult("USGMeasurement", {"No": 0, "Yes": 1}, "wellness")
"""
for i, value in df["TrainingReadiness"].iteritems():
if pd.notna(value):
value = value.split("%")[0]
value = float(value) * (1/100)
value = round(value, 2)
df.set_value(i, "TrainingReadinessNum", value)
print(value)
df.to_csv('cleaned/{}.csv'.format("wellness"))
"""
vectorize_mult("USGMeasurement", {"No": 0, "Yes": 1}, "wellness")

Loading…
Cancel
Save