datafest competition 2019
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

55 lines
1.8 KiB

5 years ago
5 years ago
5 years ago
5 years ago
  1. import pandas as pd
  2. import numpy as np
  3. def vectorize_mult(column, dictionary, postfix, df, file=None):
  4. newCol = column + postfix
  5. df[newCol] = df[column].map(dictionary)
  6. if file is not None:
  7. df.to_csv('cleaned/{}.csv'.format(file))
  8. csv = pd.read_csv("data/rpe.csv")
  9. vectorize_mult("Training", {"No": 0, "Yes": 1}, "", csv)
  10. mapping = {"Mobility/Recovery": 1, "Game": 0, "Skills": 0, "Conditioning": 0,
  11. "Strength": 0, "Combat": 0, "Speed": 0, np.nan: 0}
  12. vectorize_mult("SessionType", mapping, "Mobility/Recovery", csv)
  13. mapping["Mobility/Recovery"] = 0
  14. mapping["Game"] = 1
  15. vectorize_mult("SessionType", mapping, "Game", csv)
  16. mapping["Game"] = 0
  17. mapping["Skills"] = 1
  18. vectorize_mult("SessionType", mapping, "Skills", csv)
  19. mapping["Skills"] = 0
  20. mapping["Conditioning"] = 1
  21. vectorize_mult("SessionType", mapping, "Conditioning", csv)
  22. mapping["Conditioning"] = 0
  23. mapping["Strength"] = 1
  24. vectorize_mult("SessionType", mapping, "Strength", csv)
  25. mapping["Strength"] = 0
  26. mapping["Combat"] = 1
  27. vectorize_mult("SessionType", mapping, "Combat", csv)
  28. mapping["Combat"] = 0
  29. mapping["Speed"] = 1
  30. vectorize_mult("SessionType", mapping, "Speed", csv)
  31. mapping["Speed"] = 0
  32. mapping[np.nan] = 1
  33. vectorize_mult("SessionType", mapping, "Unknown", csv)
  34. mapping[np.nan] = 0
  35. mapping = {"Not at all": 1, "Absolutely": 0, "Somewhat": 0, np.nan: 0}
  36. vectorize_mult("BestOutOfMyself", mapping, "NotAtAll", csv)
  37. mapping["Not at all"] = 0
  38. mapping["Absolutely"] = 1
  39. vectorize_mult("BestOutOfMyself", mapping, "Absolutely", csv)
  40. mapping["Absolutely"] = 0
  41. mapping["Somewhat"] = 1
  42. vectorize_mult('BestOutOfMyself', mapping, "Somewhat", csv)
  43. mapping["Somewhat"] = 0
  44. mapping[np.nan] = 1
  45. vectorize_mult('BestOutOfMyself', mapping, "Unknown", csv)
  46. mapping[np.nan] = 0
  47. print(csv.isnull().sum())
  48. csv.to_csv("cleaned/notnormalized_with_continuousNan_rpe.csv")