import pandas as pd import numpy as np def start_end_times(filename): df = pd.read_csv(filename) columnname = "Date" dt = pd.to_datetime(df[columnname], format="%Y-%m-%d") print() print(filename) print("min") print(dt.min()) print("max") print(dt.max()) return dt.min() def timeframes(): start_end_times("data/rpe.csv") start_end_times("data/games.csv") start_end_times("data/wellness.csv") def normalize_time_series(path, filename, start): df = pd.read_csv(path) columnname = "Date" dt = pd.to_datetime(df[columnname], format="%Y-%m-%d") df["TimeSinceAugFirst"] = (dt - start).dt.days df.to_csv("cleaned/time_series_" + filename) start = start_end_times("data/rpe.csv") normalize_time_series("cleaned/notnormalized_with_0NaN_wellness.csv", "notnormalized_with_0NaN_wellness.csv", start) normalize_time_series("cleaned/notnormalized_with_0Nan_rpe.csv", "notnormalized_with_0Nan_rpe.csv", start) normalize_time_series("cleaned/notnormalized_with_continuousNan_rpe.csv", "notnormalized_with_continuousNan_rpe.csv", start)