izzi
SVG SUBSET C++ API
Loading...
Searching...
No Matches
izzi-points-cluster.h
Go to the documentation of this file.
1// izzi 2d point clustering -*- mode: C++ -*-
2
3// Copyright (c) 2025, Benjamin De Kosnik <b.dekosnik@gmail.com>
4
5// This file is part of the alpha60 library. This library is free
6// software; you can redistribute it and/or modify it under the terms
7// of the GNU General Public License as published by the Free Software
8// Foundation; either version 3, or (at your option) any later
9// version.
10
11// This library is distributed in the hope that it will be useful, but
12// WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14// General Public License for more details.
15
16#ifndef izzi_POINTS_CLUSTER_H
17#define izzi_POINTS_CLUSTER_H 1
18
19#include <vector>
20#include <cmath>
21#include <unordered_map>
22#include <queue>
23#include <algorithm>
24#include <random>
25#include <ranges>
26#include <concepts>
27#include <limits>
28#include <memory>
29#include <iostream>
30#include <numeric>
31
32
33/// Voronoi diagram related structures
42
43
44/// Cluster of points.
45/// Reduce a set of points via a given cluster algorithm.
47{
48 double radius;
50
51public:
52 point_cluster(const vpoints& input_points, const double r)
53 : radius(r), points(input_points) { }
54
55 /// Algorithm 1: Grid-based Clustering (Simple and Fast)
58 {
59 if (points.empty()) return {};
60
61 // Create grid cells
62 std::unordered_map<std::pair<int, int>, vpoints,
63 decltype([](const auto& p) {
64 return std::hash<int>{}(p.first) ^ (std::hash<int>{}(p.second) << 1);
65 })> grid;
66
67 for (const auto& point : points)
68 {
69 int grid_x = static_cast<int>(point.x / radius);
70 int grid_y = static_cast<int>(point.y / radius);
71 grid[{grid_x, grid_y}].push_back(point);
72 }
73
74 vwpoints result;
75 for (const auto& [cell, cell_points] : grid)
76 {
77 if (!cell_points.empty())
78 {
79 auto centroid = calculate_centroid(cell_points);
80 result.emplace_back(centroid, cell_points.size());
81 }
82 }
83
84 return result;
85 }
86
87 /// Algorithm 2: Hierarchical Agglomerative Clustering (More Accurate)
90 {
91 if (points.empty()) return {};
92
93 std::vector<vpoints> clusters;
94 std::vector<bool> assigned(points.size(), false);
95
96 for (size_t i = 0; i < points.size(); ++i) {
97 if (assigned[i]) continue;
98
99 vpoints current_cluster;
100 std::queue<size_t> to_process;
101 to_process.push(i);
102 assigned[i] = true;
103
104 while (!to_process.empty()) {
105 size_t current_idx = to_process.front();
106 to_process.pop();
107 current_cluster.push_back(points[current_idx]);
108
109 // Find all unassigned points within cluster radius
110 for (size_t j = 0; j < points.size(); ++j) {
111 if (!assigned[j] && points[current_idx].distance(points[j]) <= radius) {
112 assigned[j] = true;
113 to_process.push(j);
114 }
115 }
116 }
117
118 if (!current_cluster.empty()) {
119 clusters.push_back(std::move(current_cluster));
120 }
121 }
122
123 return weigh_cluster(clusters);
124 }
125
126 /// Algorithm 3: K-Means Inspired with Distance Constraint
128 reduce_constrained_k_means(size_t max_iterations = 100)
129 {
130 if (points.empty()) return {};
131
132 auto centroids = initialize_centroids();
133 std::vector<vpoints> clusters(centroids.size());
134
135 for (size_t iter = 0; iter < max_iterations; ++iter)
136 {
137 // Assign points to nearest centroid within cluster radius
138 for (const auto& point : points)
139 {
140 double min_dist = std::numeric_limits<double>::max();
141 int best_cluster = -1;
142
143 for (size_t i = 0; i < centroids.size(); ++i)
144 {
145 double dist = point.distance(centroids[i]);
146 if (dist <= radius && dist < min_dist)
147 {
148 min_dist = dist;
149 best_cluster = static_cast<int>(i);
150 }
151 }
152
153 if (best_cluster != -1)
154 clusters[best_cluster].push_back(point);
155 }
156
157 // Update centroids
158 vpoints new_centroids;
159 for (size_t i = 0; i < clusters.size(); ++i)
160 {
161 if (!clusters[i].empty())
162 new_centroids.push_back(calculate_centroid(clusters[i]));
163 }
164
165 // Check for convergence
166 if (new_centroids.size() == centroids.size())
167 {
168 bool converged = true;
169 for (size_t i = 0; i < centroids.size(); ++i)
170 {
171 if (centroids[i].distance(new_centroids[i]) > 1e-6)
172 {
173 converged = false;
174 break;
175 }
176 }
177 if (converged)
178 break;
179 }
180
181 centroids = std::move(new_centroids);
182 clusters.resize(centroids.size());
183 }
184
185 return weigh_cluster(clusters);
186 }
187
188 /// Algorithm 4: Voronoi-Based Clustering with Radius Constraint
190 reduce_voronoi(size_t max_iterations = 50)
191 {
192 if (points.empty()) return {};
193
194 // Step 1: Initialize sites using farthest point sampling
195 auto sites = initialize_centroids();
196 std::vector<voronoi_cell> cells;
197
198 for (const auto& site : sites) {
199 cells.emplace_back(site);
200 }
201
202 for (size_t iter = 0; iter < max_iterations; ++iter) {
203 // Clear current cell points
204 for (auto& cell : cells) {
205 cell.points.clear();
206 }
207
208 // Step 2: Assign each point to nearest site (Voronoi partitioning)
209 for (const auto& point : points) {
210 double min_dist = std::numeric_limits<double>::max();
211 size_t best_cell = 0;
212
213 for (size_t i = 0; i < cells.size(); ++i) {
214 double dist = point.distance(cells[i].site);
215 if (dist < min_dist) {
216 min_dist = dist;
217 best_cell = i;
218 }
219 }
220
221 // Only assign if within cluster radius of the site
222 if (min_dist <= radius) {
223 cells[best_cell].points.push_back(point);
224 }
225 }
226
227 // Step 3: Update sites to centroids of their Voronoi cells
228 bool converged = true;
229 for (size_t i = 0; i < cells.size(); ++i) {
230 if (!cells[i].points.empty()) {
231 Point new_site = calculate_centroid(cells[i].points);
232 if (cells[i].site.distance(new_site) > 1e-6) {
233 converged = false;
234 }
235 cells[i].site = new_site;
236 }
237 }
238
239 if (converged) break;
240
241 // Step 4: Remove empty cells and merge nearby sites
242 cells.erase(std::remove_if(cells.begin(), cells.end(),
243 [](const voronoi_cell& cell) { return cell.points.empty(); }),
244 cells.end());
245
246 // Merge sites that are too close
247 merge_close_sites(cells);
248 }
249
250 // Create final clusters by ensuring radius constraint
251 return constrain_voronoi_clusters(cells);
252 }
253
254private:
255 vpoints
257 {
258 vpoints centroids;
259 if (!points.empty())
260 {
261 std::vector<bool> selected(points.size(), false);
262
263 // Start with a random point
264 std::random_device rd;
265 std::mt19937 gen(rd());
266 std::uniform_int_distribution<size_t> dist(0, points.size() - 1);
267
268 size_t first_idx = dist(gen);
269 centroids.push_back(points[first_idx]);
270 selected[first_idx] = true;
271
272 // Add centroids that are at least radius away from existing ones
273 for (size_t i = 0; i < points.size(); ++i)
274 {
275 if (selected[i])
276 continue;
277
278 bool far_enough = true;
279 for (const auto& centroid : centroids)
280 {
281 if (points[i].distance(centroid) < radius)
282 {
283 far_enough = false;
284 break;
285 }
286 }
287
288 if (far_enough)
289 {
290 centroids.push_back(points[i]);
291 selected[i] = true;
292 }
293 }
294 }
295 return centroids;
296 }
297
298 Point
300 {
301 Point cp(0,0);
302 if (!cluster.empty())
303 {
304 double sum_x = 0;
305 double sum_y = 0;
306 std::string name;
307 for (const auto& point : cluster)
308 {
309 sum_x += point.x;
310 sum_y += point.y;
311 if (!point.name.empty())
312 name = point.name;
313 }
314 cp = Point(sum_x / cluster.size(), sum_y / cluster.size(), name);
315 }
316 return cp;
317 }
318
320 weigh_cluster(const std::vector<vpoints>& clusters)
321 {
322 vwpoints result;
323 for (const auto& cluster : clusters) {
324 if (!cluster.empty()) {
325 auto centroid = calculate_centroid(cluster);
326 result.emplace_back(centroid, cluster.size());
327 }
328 }
329 return result;
330 }
331
332 void
333 merge_close_sites(std::vector<voronoi_cell>& cells)
334 {
335 for (size_t i = 0; i < cells.size(); ++i)
336 {
337 for (size_t j = i + 1; j < cells.size(); )
338 {
339 if (cells[i].site.distance(cells[j].site) < radius * 0.5)
340 {
341 // Merge cell j into cell i
342 auto& cpoints = cells[i].points;
343 cpoints.insert(cpoints.end(),
344 cells[j].points.begin(), cells[j].points.end());
345 cells[i].site = calculate_centroid(cpoints);
346
347 // Remove cell j
348 cells.erase(cells.begin() + j);
349 }
350 else
351 ++j;
352 }
353 }
354 }
355
357 constrain_voronoi_clusters(const std::vector<voronoi_cell>& cells)
358 {
359 vwpoints result;
360 for (const auto& cell : cells)
361 {
362 if (!cell.points.empty())
363 {
364 // Further split cells that violate radius constraint
365 auto subclusters = split_large_cell(cell);
366 for (const auto& subcluster : subclusters)
367 {
368 if (!subcluster.empty())
369 {
370 auto centroid = calculate_centroid(subcluster);
371 result.emplace_back(centroid, subcluster.size());
372 }
373 }
374 }
375 }
376
377 return result;
378 }
379
380 std::vector<vpoints>
382 {
383 std::vector<vpoints> subclusters;
384
385 if (cell.points.empty()) return subclusters;
386
387 // Check if cell needs splitting (points too far from centroid)
388 std::vector<bool> assigned(cell.points.size(), false);
389
390 for (size_t i = 0; i < cell.points.size(); ++i) {
391 if (assigned[i]) continue;
392
393 vpoints current_subcluster;
394 std::queue<size_t> to_process;
395 to_process.push(i);
396 assigned[i] = true;
397
398 while (!to_process.empty()) {
399 size_t current_idx = to_process.front();
400 to_process.pop();
401 current_subcluster.push_back(cell.points[current_idx]);
402
403 // Find all unassigned points within cluster radius
404 for (size_t j = 0; j < cell.points.size(); ++j) {
405 if (!assigned[j] &&
406 cell.points[current_idx].distance(cell.points[j]) <= radius) {
407 assigned[j] = true;
408 to_process.push(j);
409 }
410 }
411 }
412
413 if (!current_subcluster.empty()) {
414 subclusters.push_back(std::move(current_subcluster));
415 }
416 }
417
418 return subclusters;
419 }
420};
421
422// Main function interface
424clusterPoints(const vpoints& points, double radius,
425 const std::string& method)
426{
427 point_cluster clusterer(points, radius);
428
429 if (method == "grid") {
430 return clusterer.reduce_grid();
431 } else if (method == "hierarchical") {
432 return clusterer.reduce_hierarchical_agglomerative();
433 } else if (method == "kmeans") {
434 return clusterer.reduce_constrained_k_means();
435 } else if (method == "voronoi") {
436 return clusterer.reduce_voronoi();
437 } else {
438 throw std::invalid_argument("Unknown clustering method");
439 }
440}
441
442// Example usage and test function
443void
444printClusters(const vwpoints& clusters)
445{
446 std::cout << "Clusters found: " << clusters.size() << "\n";
447 for (size_t i = 0; i < clusters.size(); ++i)
448 {
449 const auto& wp = clusters[i];
450 std::cout << "Cluster " << i + 1 << ": " << wp.pt.x << "," << wp.pt.y
451 << " weight: " << wp.weight << " name: " << wp.pt.name
452 << "\n";
453 }
454
455 auto lklstr = [](size_t sum, const WeightedPoint& wp)
456 { return sum + wp.weight; };
457
458 std::cout << "Total points represented: "
459 << std::accumulate(clusters.begin(), clusters.end(), 0ULL, lklstr)
460 << "\n";
461}
462
463#endif
Cluster of points. Reduce a set of points via a given cluster algorithm.
vwpoints reduce_voronoi(size_t max_iterations=50)
Algorithm 4: Voronoi-Based Clustering with Radius Constraint.
vwpoints reduce_grid()
Algorithm 1: Grid-based Clustering (Simple and Fast)
vpoints initialize_centroids()
vwpoints weigh_cluster(const std::vector< vpoints > &clusters)
std::vector< vpoints > split_large_cell(const voronoi_cell &cell)
vwpoints constrain_voronoi_clusters(const std::vector< voronoi_cell > &cells)
vwpoints reduce_hierarchical_agglomerative()
Algorithm 2: Hierarchical Agglomerative Clustering (More Accurate)
Point calculate_centroid(const vpoints &cluster)
vwpoints reduce_constrained_k_means(size_t max_iterations=100)
Algorithm 3: K-Means Inspired with Distance Constraint.
void merge_close_sites(std::vector< voronoi_cell > &cells)
point_cluster(const vpoints &input_points, const double r)
vwpoints clusterPoints(const vpoints &points, double radius, const std::string &method)
void printClusters(const vwpoints &clusters)
std::vector< Point > vpoints
std::vector< WeightedPoint > vwpoints
Voronoi diagram related structures.
voronoi_cell(const Point &p)