42 kmeans(std::vector<std::vector<double>> data,
int K, int64_t MAX_ITER = 1500)
45 std::random_device rd;
46 std::mt19937_64 gen(rd());
47 std::uniform_real_distribution<double> distrib(std::numeric_limits<double>::min(),
48 std::numeric_limits<double>::max());
49 for (
int i = 0; i < K; i++) {
50 double rand_num = distrib(gen);
51 this->cluster_centers.push_back(data[rand_num]);
54 for (
int ww = 0; ww < MAX_ITER; ww++) {
55 for (int64_t i = 0; i < data.size(); i++) {
59 std::vector<std::vector<std::vector<double>>> _clusters(K);
60 for (
auto& x : assignments) {
61 _clusters[x.second].push_back(x.first);
63 std::vector<std::vector<double>> new_centroids;
64 for (
int i = 0; i < K; i++) {
68 if (new_centroids == this->cluster_centers) {
71 this->cluster_centers = new_centroids;
90 std::vector<double>
id = this->cluster_centers[0];
92 int min_dist = distance(x,
id);
93 for (
int j = 0; j < this->cluster_centers.size(); j++) {
94 int current_dist = distance(this->cluster_centers[j], x);
95 if (current_dist < min_dist) {
96 min_dist = current_dist;
98 id = this->cluster_centers[index];
101 assignments[x] = index;