diff --git a/data_exploration/wellness_cluster_test.py b/data_exploration/wellness_cluster_test.py new file mode 100644 index 0000000..f25c629 --- /dev/null +++ b/data_exploration/wellness_cluster_test.py @@ -0,0 +1,25 @@ +import sklearn.cluster as cluster +import pandas as pd +from matplotlib import pyplot as plt + + +def find_kmeans(mat, k, first, second): + km = cluster.KMeans(n_clusters=k) + km.fit(mat) + + # Plot sse against k + plt.figure(figsize=(6, 6)) + plt.xlabel('Metric: ' + first) + plt.ylabel('Metric: ' + second) + plt.scatter(mat[:, 0], mat[:, 1], c=km.labels_, cmap='rainbow') + plt.show() + + +# Read csv in +df = pd.read_csv('../data_preparation/cleaned/time_series_normalized_wellness_menstruation.csv') + +# Specify what things you want +df = df[["normFatigue", "normSleepQuality"]] + +# values, num clusters, axis labelsg +find_kmeans(df.values, 2, "normFatigue", "normSleepQuality") \ No newline at end of file