diff --git a/data_preparation/time_series.py b/data_preparation/time_series.py new file mode 100644 index 0000000..1666cfd --- /dev/null +++ b/data_preparation/time_series.py @@ -0,0 +1,33 @@ +import pandas as pd +import numpy as np + + +def start_end_times(filename): + df = pd.read_csv(filename) + columnname = "Date" + dt = pd.to_datetime(df[columnname], format="%Y-%m-%d") + print() + print(filename) + print("min") + print(dt.min()) + print("max") + print(dt.max()) + return dt.min() + + +def timeframes(): + start_end_times("data/rpe.csv") + start_end_times("data/games.csv") + start_end_times("data/wellness.csv") + + +def normalize_time_series(filename, start): + df = pd.read_csv(filename) + columnname = "Date" + dt = pd.to_datetime(df[columnname], format="%Y-%m-%d") + df["TimeSinceAugFirst"] = (dt - start).dt.days + df.to_csv("cleaned/time_series_" + filename) + + +start = start_end_times("data/rpe.csv") +normalize_time_series("cleaned/rpe.csv", start)