|
@ -0,0 +1,33 @@ |
|
|
|
|
|
import pandas as pd |
|
|
|
|
|
import numpy as np |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def start_end_times(filename): |
|
|
|
|
|
df = pd.read_csv(filename) |
|
|
|
|
|
columnname = "Date" |
|
|
|
|
|
dt = pd.to_datetime(df[columnname], format="%Y-%m-%d") |
|
|
|
|
|
print() |
|
|
|
|
|
print(filename) |
|
|
|
|
|
print("min") |
|
|
|
|
|
print(dt.min()) |
|
|
|
|
|
print("max") |
|
|
|
|
|
print(dt.max()) |
|
|
|
|
|
return dt.min() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def timeframes(): |
|
|
|
|
|
start_end_times("data/rpe.csv") |
|
|
|
|
|
start_end_times("data/games.csv") |
|
|
|
|
|
start_end_times("data/wellness.csv") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def normalize_time_series(filename, start): |
|
|
|
|
|
df = pd.read_csv(filename) |
|
|
|
|
|
columnname = "Date" |
|
|
|
|
|
dt = pd.to_datetime(df[columnname], format="%Y-%m-%d") |
|
|
|
|
|
df["TimeSinceAugFirst"] = (dt - start).dt.days |
|
|
|
|
|
df.to_csv("cleaned/time_series_" + filename) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
start = start_end_times("data/rpe.csv") |
|
|
|
|
|
normalize_time_series("cleaned/rpe.csv", start) |