AlgoPlus v0.1.0
Loading...
Searching...
No Matches
dbscan.h
1#ifndef DBSCAN_H
2#define DBSCAN_H
3
4#ifdef __cplusplus
5#include <cstdint>
6#include <iostream>
7#include <map>
8#include <math.h>
9#include <stdexcept>
10#include <utility>
11#include <vector>
12#endif
13
17class DBSCAN {
18 private:
19 std::vector<std::pair<double, double>> setOfPoints;
20 double Eps;
21 int64_t MinPts;
22 int64_t cluster_id{0};
23 std::map<std::pair<double, double>, int64_t> points;
24
25 public:
32 explicit DBSCAN(std::vector<std::pair<double, double>> setOfPoints, double Eps,
33 int64_t MinPts) noexcept
34 : setOfPoints(setOfPoints), Eps(Eps), MinPts(MinPts) {
35 // cluster_id is by default noise
36
37 for (size_t i = 0; i < setOfPoints.size(); ++i) {
38 if (points.find(setOfPoints[i]) == points.end()) {
39 if (ExpandCluster(setOfPoints, setOfPoints[i], cluster_id, Eps, MinPts)) {
40 cluster_id = nextId(cluster_id);
41 }
42 }
43 }
44 }
45
50 int64_t nextId(int64_t cluster_id);
51
61 bool ExpandCluster(std::vector<std::pair<double, double>> setOfPoints,
62 std::pair<double, double> point, int64_t cluster_id, double Eps,
63 int64_t MinPts);
64
72 std::vector<std::pair<double, double>>
73 get_query(std::vector<std::pair<double, double>> setOfPoints, std::pair<double, double> point,
74 double Eps);
75
82 double dist(std::pair<double, double> a, std::pair<double, double> b);
83
89 std::map<std::pair<double, double>, int64_t> get_clusters();
90
96 std::vector<std::pair<double, double>> get_noise();
97};
98
99inline int64_t DBSCAN::nextId(int64_t cluster_id) {
100 try {
101 if (cluster_id < -1) {
102 throw std::logic_error("cluster_id can't be less than -1");
103 }
104 } catch (std::logic_error& e) {
105 std::cerr << e.what() << '\n';
106 return -2;
107 }
108 cluster_id++;
109 return cluster_id;
110}
111
112bool DBSCAN::ExpandCluster(std::vector<std::pair<double, double>> setOfPoints,
113 std::pair<double, double> point, int64_t cluster_id, double Eps,
114 int64_t MinPts) {
115 std::vector<std::pair<double, double>> seeds = get_query(setOfPoints, point, Eps);
116 if (seeds.size() < MinPts) {
117 // no core point
118 points[point] = -1;
119 return false;
120 } else {
121 // we have a core point
122 // all the points in seeds are density-reachable from point
123 for (auto& x : seeds) {
124 points[x] = cluster_id;
125 }
126 for (auto it = seeds.begin(); it != seeds.end(); it++) {
127 if ((*it).first == point.first && (*it).second == point.second) {
128 seeds.erase(it);
129 break;
130 }
131 }
132
133 while (!seeds.empty()) {
134 auto current = seeds[0];
135 std::vector<std::pair<double, double>> result = get_query(setOfPoints, current, Eps);
136
137 if (result.size() >= MinPts) {
138 for (size_t i = 0; i < result.size(); i++) {
139 std::pair<double, double> result_p = result[i];
140 if (points.find(result_p) == points.end() || points[result_p] == -1) {
141 if (points.find(result_p) == points.end()) {
142 seeds.push_back(result_p);
143 }
144 points[result_p] = cluster_id;
145 } // unclassified or noise
146 }
147 }
148 for (auto it = seeds.begin(); it != seeds.end(); it++) {
149 if ((*it).first == current.first && (*it).second == current.second) {
150 seeds.erase(it);
151 break;
152 }
153 }
154 }
155 return true;
156 }
157 return false;
158}
159
160inline std::vector<std::pair<double, double>>
161DBSCAN::get_query(std::vector<std::pair<double, double>> setOfPoints,
162 std::pair<double, double> point, double Eps) {
163 std::vector<std::pair<double, double>> ans;
164 for (size_t i = 0; i < setOfPoints.size(); i++) {
165 std::pair<double, double> curr = setOfPoints[i];
166 if (dist(point, curr) <= Eps) {
167 ans.push_back(curr);
168 }
169 }
170 return ans;
171}
172
173inline double DBSCAN::dist(std::pair<double, double> a, std::pair<double, double> b) {
174 return sqrt(pow(b.second - a.second, 2) + pow(b.first - a.first, 2));
175}
176
177inline std::map<std::pair<double, double>, int64_t> DBSCAN::get_clusters() {
178 std::map<std::pair<double, double>, int64_t> ans;
179 for (size_t i = 0; i < setOfPoints.size(); i++) {
180 if (points[setOfPoints[i]] != -1) {
181 ans[setOfPoints[i]] = points[setOfPoints[i]];
182 }
183 }
184 return ans;
185}
186
187inline std::vector<std::pair<double, double>> DBSCAN::get_noise() {
188 std::vector<std::pair<double, double>> ans;
189 for (size_t i = 0; i < setOfPoints.size(); i++) {
190 if (points[setOfPoints[i]] == -1) {
191 ans.push_back(setOfPoints[i]);
192 }
193 }
194 return ans;
195}
196
197#endif
DBSCAN(std::vector< std::pair< double, double > > setOfPoints, double Eps, int64_t MinPts) noexcept
constructor for the DBSCAN class
Definition dbscan.h:32
bool ExpandCluster(std::vector< std::pair< double, double > > setOfPoints, std::pair< double, double > point, int64_t cluster_id, double Eps, int64_t MinPts)
ExpandCluster function.
Definition dbscan.h:112
double dist(std::pair< double, double > a, std::pair< double, double > b)
dist function
Definition dbscan.h:173
int64_t nextId(int64_t cluster_id)
Definition dbscan.h:99
std::vector< std::pair< double, double > > get_query(std::vector< std::pair< double, double > > setOfPoints, std::pair< double, double > point, double Eps)
get_query function
Definition dbscan.h:161
std::map< std::pair< double, double >, int64_t > get_clusters()
get_clusters function
Definition dbscan.h:177
std::vector< std::pair< double, double > > get_noise()
get_noise function
Definition dbscan.h:187