Browse Source

time serialization ready for production

master
PerryXDeng 5 years ago
parent
commit
f115c7a155
1 changed files with 33 additions and 0 deletions
  1. +33
    -0
      data_preparation/time_series.py

+ 33
- 0
data_preparation/time_series.py View File

@ -0,0 +1,33 @@
import pandas as pd
import numpy as np
def start_end_times(filename):
df = pd.read_csv(filename)
columnname = "Date"
dt = pd.to_datetime(df[columnname], format="%Y-%m-%d")
print()
print(filename)
print("min")
print(dt.min())
print("max")
print(dt.max())
return dt.min()
def timeframes():
start_end_times("data/rpe.csv")
start_end_times("data/games.csv")
start_end_times("data/wellness.csv")
def normalize_time_series(filename, start):
df = pd.read_csv(filename)
columnname = "Date"
dt = pd.to_datetime(df[columnname], format="%Y-%m-%d")
df["TimeSinceAugFirst"] = (dt - start).dt.days
df.to_csv("cleaned/time_series_" + filename)
start = start_end_times("data/rpe.csv")
normalize_time_series("cleaned/rpe.csv", start)

Loading…
Cancel
Save