datafest competition 2019
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

24 lines
686 B

5 years ago
  1. import sklearn.cluster as cluster
  2. import pandas as pd
  3. from matplotlib import pyplot as plt
  4. def find_kmeans(mat, k, first, second):
  5. km = cluster.KMeans(n_clusters=k)
  6. km.fit(mat)
  7. # Plot sse against k
  8. plt.figure(figsize=(6, 6))
  9. plt.xlabel('Metric: ' + first)
  10. plt.ylabel('Metric: ' + second)
  11. plt.scatter(mat[:, 0], mat[:, 1], c=km.labels_, cmap='rainbow')
  12. plt.show()
  13. # Read csv in
  14. df = pd.read_csv('../data_preparation/cleaned/time_series_normalized_wellness_menstruation.csv')
  15. # Specify what things you want
  16. df = df[["normFatigue", "normSleepQuality"]]
  17. # values, num clusters, axis labelsg
  18. find_kmeans(df.values, 2, "normFatigue", "normSleepQuality")