| @ -0,0 +1,25 @@ | |||||
| import sklearn.cluster as cluster | |||||
| import pandas as pd | |||||
| from matplotlib import pyplot as plt | |||||
| def find_kmeans(mat, k, first, second): | |||||
| km = cluster.KMeans(n_clusters=k) | |||||
| km.fit(mat) | |||||
| # Plot sse against k | |||||
| plt.figure(figsize=(6, 6)) | |||||
| plt.xlabel('Metric: ' + first) | |||||
| plt.ylabel('Metric: ' + second) | |||||
| plt.scatter(mat[:, 0], mat[:, 1], c=km.labels_, cmap='rainbow') | |||||
| plt.show() | |||||
| # Read csv in | |||||
| df = pd.read_csv('../data_preparation/cleaned/time_series_normalized_wellness_menstruation.csv') | |||||
| # Specify what things you want | |||||
| df = df[["normFatigue", "normSleepQuality"]] | |||||
| # values, num clusters, axis labelsg | |||||
| find_kmeans(df.values, 2, "normFatigue", "normSleepQuality") | |||||