import pandas as pd
|
|
import numpy as np
|
|
|
|
|
|
def start_end_times(filename):
|
|
df = pd.read_csv(filename)
|
|
columnname = "Date"
|
|
dt = pd.to_datetime(df[columnname], format="%Y-%m-%d")
|
|
print()
|
|
print(filename)
|
|
print("min")
|
|
print(dt.min())
|
|
print("max")
|
|
print(dt.max())
|
|
return dt.min()
|
|
|
|
|
|
def timeframes():
|
|
start_end_times("data/rpe.csv")
|
|
start_end_times("data/games.csv")
|
|
start_end_times("data/wellness.csv")
|
|
|
|
|
|
def normalize_time_series(path, filename, start):
|
|
df = pd.read_csv(path)
|
|
columnname = "Date"
|
|
dt = pd.to_datetime(df[columnname], format="%Y-%m-%d")
|
|
df["TimeSinceAugFirst"] = (dt - start).dt.days
|
|
df.to_csv("cleaned/time_series_" + filename)
|
|
|
|
|
|
start = start_end_times("data/rpe.csv")
|
|
normalize_time_series("cleaned/normalizedWellness.csv", "normalizedWellness.csv", start)
|