|
|
- import sklearn.cluster as cluster
- import pandas as pd
- from matplotlib import pyplot as plt
-
-
- def find_kmeans(mat, k, first, second):
- km = cluster.KMeans(n_clusters=k)
- km.fit(mat)
-
- # Plot sse against k
- plt.figure(figsize=(6, 6))
- plt.xlabel('Metric: ' + first)
- plt.ylabel('Metric: ' + second)
- plt.scatter(mat[:, 0], mat[:, 1], c=km.labels_, cmap='rainbow')
- plt.show()
-
-
- # Read csv in
- df = pd.read_csv('../data_preparation/cleaned/time_series_normalized_wellness_menstruation.csv')
-
- # Specify what things you want
- df = df[["normFatigue", "normSleepQuality"]]
-
- # values, num clusters, axis labelsg
- find_kmeans(df.values, 2, "normFatigue", "normSleepQuality")
|