From 89c7f302815fe68a1ff1753ce36a591ca1ebf411 Mon Sep 17 00:00:00 2001 From: Ryan Missel Date: Sat, 30 Mar 2019 17:46:45 -0400 Subject: [PATCH] add clustering --- data_exploration/wellness_cluster_test.py | 25 +++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 data_exploration/wellness_cluster_test.py diff --git a/data_exploration/wellness_cluster_test.py b/data_exploration/wellness_cluster_test.py new file mode 100644 index 0000000..f25c629 --- /dev/null +++ b/data_exploration/wellness_cluster_test.py @@ -0,0 +1,25 @@ +import sklearn.cluster as cluster +import pandas as pd +from matplotlib import pyplot as plt + + +def find_kmeans(mat, k, first, second): + km = cluster.KMeans(n_clusters=k) + km.fit(mat) + + # Plot sse against k + plt.figure(figsize=(6, 6)) + plt.xlabel('Metric: ' + first) + plt.ylabel('Metric: ' + second) + plt.scatter(mat[:, 0], mat[:, 1], c=km.labels_, cmap='rainbow') + plt.show() + + +# Read csv in +df = pd.read_csv('../data_preparation/cleaned/time_series_normalized_wellness_menstruation.csv') + +# Specify what things you want +df = df[["normFatigue", "normSleepQuality"]] + +# values, num clusters, axis labelsg +find_kmeans(df.values, 2, "normFatigue", "normSleepQuality") \ No newline at end of file