datafest competition 2019
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

35 lines
1.0 KiB

5 years ago
5 years ago
  1. import pandas as pd
  2. import numpy as np
  3. def start_end_times(filename):
  4. df = pd.read_csv(filename)
  5. columnname = "Date"
  6. dt = pd.to_datetime(df[columnname], format="%Y-%m-%d")
  7. print()
  8. print(filename)
  9. print("min")
  10. print(dt.min())
  11. print("max")
  12. print(dt.max())
  13. return dt.min()
  14. def timeframes():
  15. start_end_times("data/rpe.csv")
  16. start_end_times("data/games.csv")
  17. start_end_times("data/wellness.csv")
  18. def normalize_time_series(path, filename, start):
  19. df = pd.read_csv(path)
  20. columnname = "Date"
  21. dt = pd.to_datetime(df[columnname], format="%Y-%m-%d")
  22. df["TimeSinceAugFirst"] = (dt - start).dt.days
  23. df.to_csv("cleaned/time_series_" + filename)
  24. start = start_end_times("data/rpe.csv")
  25. normalize_time_series("cleaned/notnormalized_with_0NaN_wellness.csv", "notnormalized_with_0NaN_wellness.csv", start)
  26. normalize_time_series("cleaned/notnormalized_with_0Nan_rpe.csv", "notnormalized_with_0Nan_rpe.csv", start)
  27. normalize_time_series("cleaned/notnormalized_with_continuousNan_rpe.csv", "notnormalized_with_continuousNan_rpe.csv", start)