AlgoPlus v0.1.0
Loading...
Searching...
No Matches
kmeans.h
1#ifndef KMEANS_H
2#define KMEANS_H
3
4#ifdef __cplusplus
5#include "../../../../third_party/json.hpp"
6#include <fstream>
7#include <iostream>
8#include <map>
9#include <random>
10#include <vector>
11#endif
12
13using json = nlohmann::json;
14
18class kmeans {
19 private:
27 double distance(std::vector<double>& a, std::vector<double>& b) {
28 return sqrt(pow((a[0] - b[0]), 2) + pow((a[1] - b[1]), 2));
29 }
30 std::vector<std::vector<double>> data;
31 int K;
32 std::vector<std::vector<double>> cluster_centers;
33 std::map<std::vector<double>, int64_t> assignments;
34
35 public:
42 kmeans(std::vector<std::vector<double>> data, int K, int64_t MAX_ITER = 1500)
43 : data(data), K(K) {
44
45 std::random_device rd;
46 std::mt19937_64 gen(rd());
47 std::uniform_real_distribution<double> distrib(std::numeric_limits<double>::min(),
48 std::numeric_limits<double>::max());
49 for (int i = 0; i < K; i++) {
50 double rand_num = distrib(gen);
51 this->cluster_centers.push_back(data[rand_num]);
52 }
53
54 for (int ww = 0; ww < MAX_ITER; ww++) {
55 for (int64_t i = 0; i < data.size(); i++) {
56 assign_to_closest(data[i]);
57 }
58
59 std::vector<std::vector<std::vector<double>>> _clusters(K);
60 for (auto& x : assignments) {
61 _clusters[x.second].push_back(x.first);
62 }
63 std::vector<std::vector<double>> new_centroids;
64 for (int i = 0; i < K; i++) {
65 new_centroids.push_back(get_centroid(_clusters[i]));
66 }
67
68 if (new_centroids == this->cluster_centers) {
69 break;
70 } else {
71 this->cluster_centers = new_centroids;
72 }
73 }
74 }
75
80 cluster_centers.clear();
81 assignments.clear();
82 }
83
89 inline void assign_to_closest(std::vector<double>& x) {
90 std::vector<double> id = this->cluster_centers[0];
91 int index = 0;
92 int min_dist = distance(x, id);
93 for (int j = 0; j < this->cluster_centers.size(); j++) {
94 int current_dist = distance(this->cluster_centers[j], x);
95 if (current_dist < min_dist) {
96 min_dist = current_dist;
97 index = j;
98 id = this->cluster_centers[index];
99 }
100 }
101 assignments[x] = index;
102 }
103
109 inline std::vector<double> get_centroid(std::vector<std::vector<double>> cluster) {
110 double sum_x = 0, sum_y = 0, n = cluster.size();
111 for (auto& x : cluster) {
112 sum_x += x[0];
113 sum_y += x[1];
114 }
115 sum_x /= n;
116 sum_y /= n;
117 return {sum_x, sum_y};
118 }
119
126 inline std::pair<std::vector<std::vector<double>>, std::map<std::vector<double>, int64_t>>
127 fit() {
128 return std::make_pair(cluster_centers, assignments);
129 }
130};
131
132#endif
std::vector< double > get_centroid(std::vector< std::vector< double > > cluster)
Definition kmeans.h:109
~kmeans()
Destroy the kmeans object.
Definition kmeans.h:79
std::pair< std::vector< std::vector< double > >, std::map< std::vector< double >, int64_t > > fit()
Definition kmeans.h:127
kmeans(std::vector< std::vector< double > > data, int K, int64_t MAX_ITER=1500)
Constructor for the kmeans class.
Definition kmeans.h:42
void assign_to_closest(std::vector< double > &x)
Definition kmeans.h:89