|
@ -0,0 +1,25 @@ |
|
|
|
|
|
import sklearn.cluster as cluster |
|
|
|
|
|
import pandas as pd |
|
|
|
|
|
from matplotlib import pyplot as plt |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def find_kmeans(mat, k, first, second): |
|
|
|
|
|
km = cluster.KMeans(n_clusters=k) |
|
|
|
|
|
km.fit(mat) |
|
|
|
|
|
|
|
|
|
|
|
# Plot sse against k |
|
|
|
|
|
plt.figure(figsize=(6, 6)) |
|
|
|
|
|
plt.xlabel('Metric: ' + first) |
|
|
|
|
|
plt.ylabel('Metric: ' + second) |
|
|
|
|
|
plt.scatter(mat[:, 0], mat[:, 1], c=km.labels_, cmap='rainbow') |
|
|
|
|
|
plt.show() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Read csv in |
|
|
|
|
|
df = pd.read_csv('../data_preparation/cleaned/time_series_normalized_wellness_menstruation.csv') |
|
|
|
|
|
|
|
|
|
|
|
# Specify what things you want |
|
|
|
|
|
df = df[["normFatigue", "normSleepQuality"]] |
|
|
|
|
|
|
|
|
|
|
|
# values, num clusters, axis labelsg |
|
|
|
|
|
find_kmeans(df.values, 2, "normFatigue", "normSleepQuality") |