datafest competition 2019
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

35 lines
1.0 KiB

import pandas as pd
import numpy as np
def start_end_times(filename):
df = pd.read_csv(filename)
columnname = "Date"
dt = pd.to_datetime(df[columnname], format="%Y-%m-%d")
print()
print(filename)
print("min")
print(dt.min())
print("max")
print(dt.max())
return dt.min()
def timeframes():
start_end_times("data/rpe.csv")
start_end_times("data/games.csv")
start_end_times("data/wellness.csv")
def normalize_time_series(path, filename, start):
df = pd.read_csv(path)
columnname = "Date"
dt = pd.to_datetime(df[columnname], format="%Y-%m-%d")
df["TimeSinceAugFirst"] = (dt - start).dt.days
df.to_csv("cleaned/time_series_" + filename)
start = start_end_times("data/rpe.csv")
normalize_time_series("cleaned/notnormalized_with_0NaN_wellness.csv", "notnormalized_with_0NaN_wellness.csv", start)
normalize_time_series("cleaned/notnormalized_with_0Nan_rpe.csv", "notnormalized_with_0Nan_rpe.csv", start)
normalize_time_series("cleaned/notnormalized_with_continuousNan_rpe.csv", "notnormalized_with_continuousNan_rpe.csv", start)