diff --git a/data_preparation/cleaned/time_series_days_ranked.csv b/data_preparation/cleaned/time_series_days_ranked.csv new file mode 100644 index 0000000..8e0b9e7 --- /dev/null +++ b/data_preparation/cleaned/time_series_days_ranked.csv @@ -0,0 +1,18 @@ +,Date,DailyElo +0,121,0.0 +1,122,-3.714599999999998 +2,178,0.04346000000000028 +3,179,2.1916710000000013 +4,180,0.0 +5,255,0.0 +6,256,0.0 +7,257,-2.520374784999996 +8,263,-2.0880156214999985 +9,264,-1.7032140593500005 +10,284,-0.6130256153877235 +11,285,2.620463284090865 +12,311,-2.076954630427971 +13,312,1.0960590427574828 +14,313,1.8954531384817344 +15,353,-0.2940921753664384 +16,354,-1.8646829578297937 diff --git a/elo_per_day.py b/elo_per_day.py new file mode 100644 index 0000000..4361e72 --- /dev/null +++ b/elo_per_day.py @@ -0,0 +1,33 @@ +import numpy as np +import pandas as pd + + +def join_cols(): + + # Reads in csv files to be manipulated + dfg = pd.read_csv('data_preparation/data/games_ranked.csv') + + # Creates the new dataframe where each date is a unique column, and gets the number of dates + unique_dates = pd.DataFrame(dfg["Date"].unique()).to_numpy() + unique_rows = unique_dates.shape[0] + daily_elos = np.array(unique_rows).astype(float) + print(unique_rows) + + # Creates two numpy arrays to perform some operations on + dates = dfg["Date"].to_numpy() + e_change = dfg["eloChangeAdjusted"].to_numpy() + rows = dates.shape()[0] + + # sums up the elo change on a given day and then exports it to a unique .csv file + x = 0 + for i in range(0, rows): + if not (dates[i] == unique_dates[x]): + x = x + 1 + daily_elos[x] = daily_elos[x] + e_change[i] + + # Creates a new dataframe from the two unique date array and the daily elo change array + df_dec = pd.DataFrame() + df_dec["Date"] = unique_dates + df_dec["DailyElo"] = daily_elos + print(df_dec) +