import sklearn.cluster as cluster
|
|
import pandas as pd
|
|
from matplotlib import pyplot as plt
|
|
|
|
|
|
def find_kmeans(mat, k, first, second):
|
|
km = cluster.KMeans(n_clusters=k)
|
|
km.fit(mat)
|
|
|
|
# Plot sse against k
|
|
plt.figure(figsize=(6, 6))
|
|
plt.xlabel('Metric: ' + first)
|
|
plt.ylabel('Metric: ' + second)
|
|
plt.scatter(mat[:, 0], mat[:, 1], c=km.labels_, cmap='rainbow')
|
|
plt.show()
|
|
|
|
|
|
# Read csv in
|
|
df = pd.read_csv('../data_preparation/cleaned/time_series_normalized_wellness_menstruation.csv')
|
|
|
|
# Specify what things you want
|
|
df = df[["normFatigue", "normSleepQuality"]]
|
|
|
|
# values, num clusters, axis labelsg
|
|
find_kmeans(df.values, 2, "normFatigue", "normSleepQuality")
|