|
|
- from sklearn import linear_model
- import pandas as pd
- from sklearn.metrics import mean_squared_error, r2_score
-
-
- def k_days_into_future_regression(X, y, k, n0):
- """
- linear regression that returns the fitted weights as well as metrics
- :param X: x timeseries dataframe (very clean, no unamed columns), multidimensional rows
- :param y: y timeseries dataframe (very clean, no unamed columns), scalar rows
- :param k: days predicting in advance
- :param n0: ignoring the first n0 days
- :return: intercept, slopes, correlation, mean squared error
- """
- col = "TimeSinceAugFirst"
- inp = []
- out = []
- for day in y[col][n0 - 1:]:
- prev = day - k
- xprev = X[X[col] == prev].drop(columns=[col]).to_numpy()
- if xprev.shape[0] != 1:
- continue
- else:
- xprev = xprev[0, :]
- yt = y[y[col] == day].drop(columns=[col]).to_numpy()[0, :]
- inp.append(xprev)
- out.append(yt)
- regr = linear_model.LinearRegression()
- regr.fit(inp, out)
- predictions = regr.predict(inp)
- mse = mean_squared_error(out, predictions)/(len(out) - 2)
- r2 = r2_score(out, predictions)
- return regr.intercept_, regr.coef_, r2, mse
-
-
- def standard_lr(x, y):
- regr = linear_model.LinearRegression()
- regr.fit(x, y)
- predictions = regr.predict(x)
- mse = mean_squared_error(y, predictions) / (len(y) - 2)
- r2 = r2_score(y, predictions)
- return regr.intercept_, regr.coef_, r2, mse
-
-
- def main():
- # fatigueSums = pd.read_csv("fatigue_total_sum.csv")
- # workMovingAverage21 = pd.read_csv("21DaySlidingWorkAverage.csv", index_col=0)
- # print(k_days_into_future_regression(workMovingAverage21, fatigueSums, 0, 21))
-
-
- wellness = pd.read_csv("../data_preparation/cleaned/time_series_normalized_wellness_menstruation.csv")
-
- wellness = wellness.fillna(0)
- x = wellness[['normSoreness', 'TimeSinceAugFirst']]
- y = wellness['normFatigue']
- print(wellness.isnull().sum())
- print(standard_lr(x, y))
-
-
- if __name__ == "__main__":
- main()
|