|
|
- import pandas as pd
- import numpy as np
-
-
- def start_end_times(filename):
- df = pd.read_csv(filename)
- columnname = "Date"
- dt = pd.to_datetime(df[columnname], format="%Y-%m-%d")
- print()
- print(filename)
- print("min")
- print(dt.min())
- print("max")
- print(dt.max())
- return dt.min()
-
-
- def timeframes():
- start_end_times("data/rpe.csv")
- start_end_times("data/games.csv")
- start_end_times("data/wellness.csv")
-
-
- def normalize_time_series(path, filename, start):
- df = pd.read_csv(path)
- columnname = "Date"
- dt = pd.to_datetime(df[columnname], format="%Y-%m-%d")
- df["TimeSinceAugFirst"] = (dt - start).dt.days
- df.to_csv("cleaned/time_series_" + filename)
-
-
- start = start_end_times("data/rpe.csv")
- normalize_time_series("cleaned/notnormalized_with_0NaN_wellness.csv", "notnormalized_with_0NaN_wellness.csv", start)
- normalize_time_series("cleaned/notnormalized_with_0Nan_rpe.csv", "notnormalized_with_0Nan_rpe.csv", start)
- normalize_time_series("cleaned/notnormalized_with_continuousNan_rpe.csv", "notnormalized_with_continuousNan_rpe.csv", start)
|