From f115c7a155fdc6b06283b5875d3e160582016dac Mon Sep 17 00:00:00 2001 From: PerryXDeng Date: Sat, 30 Mar 2019 09:38:57 -0400 Subject: [PATCH] time serialization ready for production --- data_preparation/time_series.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 data_preparation/time_series.py diff --git a/data_preparation/time_series.py b/data_preparation/time_series.py new file mode 100644 index 0000000..1666cfd --- /dev/null +++ b/data_preparation/time_series.py @@ -0,0 +1,33 @@ +import pandas as pd +import numpy as np + + +def start_end_times(filename): + df = pd.read_csv(filename) + columnname = "Date" + dt = pd.to_datetime(df[columnname], format="%Y-%m-%d") + print() + print(filename) + print("min") + print(dt.min()) + print("max") + print(dt.max()) + return dt.min() + + +def timeframes(): + start_end_times("data/rpe.csv") + start_end_times("data/games.csv") + start_end_times("data/wellness.csv") + + +def normalize_time_series(filename, start): + df = pd.read_csv(filename) + columnname = "Date" + dt = pd.to_datetime(df[columnname], format="%Y-%m-%d") + df["TimeSinceAugFirst"] = (dt - start).dt.days + df.to_csv("cleaned/time_series_" + filename) + + +start = start_end_times("data/rpe.csv") +normalize_time_series("cleaned/rpe.csv", start)