diff --git a/findings/data_plot.py b/findings/data_plot.py new file mode 100644 index 0000000..af9fb25 --- /dev/null +++ b/findings/data_plot.py @@ -0,0 +1,13 @@ +from matplotlib import pyplot as plt +import pandas as pd + + +def plot_xy(x, y): + plt.scatter(x, y) + plt.xlabel(x.name) + plt.ylabel(y.name) + plt.show() + + +wellness = pd.read_csv('../data_preparation/data/wellness.csv') +plot_xy(wellness['Fatigue'], wellness['SleepHours']) diff --git a/hypotheses_modeling/hypotheses.txt b/hypotheses_modeling/hypotheses.txt new file mode 100644 index 0000000..ed54461 --- /dev/null +++ b/hypotheses_modeling/hypotheses.txt @@ -0,0 +1,22 @@ +Team: + 1. + x - normFatigue, TimeSince + y - normSoreness + r2 - 0.16830803694995466 + + 2. + x - normFatigue, TimeSince + y - normDesire + r2 - 0.1722466278587138 + + 3. + x - normFatigue, TimeSince + y - normIrritability + r2 - 0.09563459728506452 + + 4. + x - normSleepHours, TimeSince + y - normSleepQualiyt + r2 - 0.09717095676693188 + + 5. \ No newline at end of file diff --git a/hypotheses_modeling/team_regressions.py b/hypotheses_modeling/team_regressions.py index 5753708..c7fced1 100644 --- a/hypotheses_modeling/team_regressions.py +++ b/hypotheses_modeling/team_regressions.py @@ -33,10 +33,28 @@ def k_days_into_future_regression(X, y, k, n0): return regr.intercept_, regr.coef_, r2, mse +def standard_lr(x, y): + regr = linear_model.LinearRegression() + regr.fit(x, y) + predictions = regr.predict(x) + mse = mean_squared_error(y, predictions) / (len(y) - 2) + r2 = r2_score(y, predictions) + return regr.intercept_, regr.coef_, r2, mse + + def main(): - fatigueSums = pd.read_csv("fatigue_total_sum.csv") - performance = pd.read_csv("../data_preparation/cleaned/expSmoothWorkAndFatigueData.csv", index_col=0).drop(columns=["totalWork", "averageWorkLoad", "smoothedFatigueData"]) - print(k_days_into_future_regression(fatigueSums, performance, 0, 1)) + # fatigueSums = pd.read_csv("fatigue_total_sum.csv") + # workMovingAverage21 = pd.read_csv("21DaySlidingWorkAverage.csv", index_col=0) + # print(k_days_into_future_regression(workMovingAverage21, fatigueSums, 0, 21)) + + + wellness = pd.read_csv("../data_preparation/cleaned/time_series_normalized_wellness_menstruation.csv") + + wellness = wellness.fillna(0) + x = wellness[['normSoreness', 'TimeSinceAugFirst']] + y = wellness['normFatigue'] + print(wellness.isnull().sum()) + print(standard_lr(x, y)) if __name__ == "__main__":