datafest competition 2019
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

64 lines
2.2 KiB

5 years ago
  1. import pandas as pd
  2. import numpy as np
  3. def vectorize_mult(column, dictionary, postfix, df, file=None):
  4. newCol = column + postfix
  5. df[newCol] = df[column].map(dictionary)
  6. if file is not None:
  7. df.to_csv('cleaned/{}.csv'.format(file))
  8. csv = pd.read_csv("cleaned/notnormalized_with_continuousNan_rpe.csv")
  9. # vectorize_mult("Training", {"No": 0, "Yes": 1}, "", csv)
  10. #
  11. # mapping = {"Mobility/Recovery": 1, "Game": 0, "Skills": 0, "Conditioning": 0,
  12. # "Strength": 0, "Combat": 0, "Speed": 0, np.nan: 0}
  13. # vectorize_mult("SessionType", mapping, "Mobility/Recovery", csv)
  14. # mapping["Mobility/Recovery"] = 0
  15. # mapping["Game"] = 1
  16. # vectorize_mult("SessionType", mapping, "Game", csv)
  17. # mapping["Game"] = 0
  18. # mapping["Skills"] = 1
  19. # vectorize_mult("SessionType", mapping, "Skills", csv)
  20. # mapping["Skills"] = 0
  21. # mapping["Conditioning"] = 1
  22. # vectorize_mult("SessionType", mapping, "Conditioning", csv)
  23. # mapping["Conditioning"] = 0
  24. # mapping["Strength"] = 1
  25. # vectorize_mult("SessionType", mapping, "Strength", csv)
  26. # mapping["Strength"] = 0
  27. # mapping["Combat"] = 1
  28. # vectorize_mult("SessionType", mapping, "Combat", csv)
  29. # mapping["Combat"] = 0
  30. # mapping["Speed"] = 1
  31. # vectorize_mult("SessionType", mapping, "Speed", csv)
  32. # mapping["Speed"] = 0
  33. # mapping[np.nan] = 1
  34. # vectorize_mult("SessionType", mapping, "Unknown", csv)
  35. # mapping[np.nan] = 0
  36. #
  37. # mapping = {"Not at all": 1, "Absolutely": 0, "Somewhat": 0, np.nan: 0}
  38. # vectorize_mult("BestOutOfMyself", mapping, "NotAtAll", csv)
  39. # mapping["Not at all"] = 0
  40. # mapping["Absolutely"] = 1
  41. # vectorize_mult("BestOutOfMyself", mapping, "Absolutely", csv)
  42. # mapping["Absolutely"] = 0
  43. # mapping["Somewhat"] = 1
  44. # vectorize_mult('BestOutOfMyself', mapping, "Somewhat", csv)
  45. # mapping["Somewhat"] = 0
  46. # mapping[np.nan] = 1
  47. # vectorize_mult('BestOutOfMyself', mapping, "Unknown", csv)
  48. # mapping[np.nan] = 0
  49. #
  50. # csv.to_csv("cleaned/notnormalized_with_continuousNan_rpe.csv")
  51. for i in range(len(csv.dtypes)):
  52. type = csv.dtypes[i]
  53. if type != "object":
  54. colname = csv.columns[i]
  55. if csv[colname].hasnans:
  56. print(colname)
  57. csv[colname] = csv[colname].fillna(0)
  58. #print(csv.isnull().sum())
  59. csv.to_csv("cleaned/notnormalized_with_0Nan_rpe.csv")