/src/fluent-bit/lib/ripser-1.2.1/ripser.cpp
Line | Count | Source |
1 | | /* |
2 | | |
3 | | Ripser: a lean C++ code for computation of Vietoris-Rips persistence barcodes |
4 | | |
5 | | MIT License |
6 | | |
7 | | Copyright (c) 2015–2021 Ulrich Bauer |
8 | | Modifications Copyright (c) 2025 The Fluent Bit Authors |
9 | | |
10 | | Permission is hereby granted, free of charge, to any person obtaining a copy |
11 | | of this software and associated documentation files (the "Software"), to deal |
12 | | in the Software without restriction, including without limitation the rights |
13 | | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
14 | | copies of the Software, and to permit persons to whom the Software is |
15 | | furnished to do so, subject to the following conditions: |
16 | | |
17 | | The above copyright notice and this permission notice shall be included in all |
18 | | copies or substantial portions of the Software. |
19 | | |
20 | | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
21 | | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
22 | | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
23 | | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
24 | | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
25 | | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
26 | | SOFTWARE. |
27 | | |
28 | | You are under no obligation whatsoever to provide any bug fixes, patches, or |
29 | | upgrades to the features, functionality or performance of the source code |
30 | | ("Enhancements") to anyone; however, if you choose to make your Enhancements |
31 | | available either publicly, or directly to the author of this software, without |
32 | | imposing a separate written license agreement for such Enhancements, then you |
33 | | hereby grant the following license: a non-exclusive, royalty-free perpetual |
34 | | license to install, use, modify, prepare derivative works, incorporate into |
35 | | other computer software, distribute, and sublicense such enhancements or |
36 | | derivative works thereof, in binary and source code form. |
37 | | |
38 | | */ |
39 | | |
40 | | //#define USE_COEFFICIENTS |
41 | | |
42 | | //#define INDICATE_PROGRESS |
43 | | // #define PRINT_PERSISTENCE_PAIRS |
44 | | |
45 | | //#define USE_ROBINHOOD_HASHMAP |
46 | | |
47 | | #include <algorithm> |
48 | | #include <atomic> |
49 | | #include <cassert> |
50 | | #include <chrono> |
51 | | #include <cmath> |
52 | | #include <cstdlib> |
53 | | #include <fstream> |
54 | | #include <iostream> |
55 | | #include <numeric> |
56 | | #include <queue> |
57 | | #include <sstream> |
58 | | #include <unordered_map> |
59 | | |
60 | | // Include ripser core implementation. |
61 | | // Either include a header-only version or include ripser.cpp directly. |
62 | | // Only one compilation unit should compile ripser.cpp. |
63 | | #include "ripser_internal.hpp" |
64 | | |
65 | | #ifdef USE_ROBINHOOD_HASHMAP |
66 | | |
67 | | #include "robin-hood-hashing/src/include/robin_hood.h" |
68 | | |
69 | | template <class Key, class T, class H, class E> |
70 | | using hash_map = robin_hood::unordered_map<Key, T, H, E>; |
71 | | template <class Key> using hash = robin_hood::hash<Key>; |
72 | | |
73 | | #else |
74 | | |
75 | | template <class Key, class T, class H, class E> using hash_map = std::unordered_map<Key, T, H, E>; |
76 | | template <class Key> using hash = std::hash<Key>; |
77 | | |
78 | | #endif |
79 | | |
80 | | #ifdef INDICATE_PROGRESS |
81 | | static const std::chrono::milliseconds time_step(40); |
82 | | #endif |
83 | | |
84 | | static const std::string clear_line("\r\033[K"); |
85 | | |
86 | | static const size_t num_coefficient_bits = 8; |
87 | | |
88 | | static const index_t max_simplex_index = |
89 | | (1l << (8 * sizeof(index_t) - 1 - num_coefficient_bits)) - 1; |
90 | | |
91 | 0 | void check_overflow(index_t i) { |
92 | 0 | if |
93 | | #ifdef USE_COEFFICIENTS |
94 | | (i > max_simplex_index) |
95 | | #else |
96 | 0 | (i < 0) |
97 | 0 | #endif |
98 | 0 | throw std::overflow_error("simplex index " + std::to_string((uint64_t)i) + |
99 | 0 | " in filtration is larger than maximum index " + |
100 | 0 | std::to_string(max_simplex_index)); |
101 | 0 | } |
102 | | |
103 | | class binomial_coeff_table { |
104 | | std::vector<std::vector<index_t>> B; |
105 | | |
106 | | public: |
107 | 0 | binomial_coeff_table(index_t n, index_t k) : B(n + 1) { |
108 | 0 | for (index_t i = 0; i <= n; ++i) { |
109 | 0 | B[i].resize(k + 1, 0); |
110 | 0 | B[i][0] = 1; |
111 | 0 | for (index_t j = 1; j < std::min(i, k + 1); ++j) |
112 | 0 | B[i][j] = B[i - 1][j - 1] + B[i - 1][j]; |
113 | 0 | if (i <= k) B[i][i] = 1; |
114 | 0 | check_overflow(B[i][std::min(i >> 1, k)]); |
115 | 0 | } |
116 | 0 | } |
117 | | |
118 | 0 | index_t operator()(index_t n, index_t k) const { |
119 | 0 | assert(n < B.size() && k < B[n].size() && n >= k - 1); |
120 | 0 | return B[n][k]; |
121 | 0 | } |
122 | | }; |
123 | | |
124 | 0 | bool is_prime(const coefficient_t n) { |
125 | 0 | if (!(n & 1) || n < 2) return n == 2; |
126 | 0 | for (coefficient_t p = 3; p <= n / p; p += 2) |
127 | 0 | if (!(n % p)) return false; |
128 | 0 | return true; |
129 | 0 | } |
130 | | |
131 | 0 | std::vector<coefficient_t> multiplicative_inverse_vector(const coefficient_t m) { |
132 | 0 | std::vector<coefficient_t> inverse(m); |
133 | 0 | inverse[1] = 1; |
134 | | // m = a * (m / a) + m % a |
135 | | // Multipying with inverse(a) * inverse(m % a): |
136 | | // 0 = inverse(m % a) * (m / a) + inverse(a) (mod m) |
137 | 0 | for (coefficient_t a = 2; a < m; ++a) inverse[a] = m - (inverse[m % a] * (m / a)) % m; |
138 | 0 | return inverse; |
139 | 0 | } |
140 | | |
141 | | #ifdef USE_COEFFICIENTS |
142 | | |
143 | | struct __attribute__((packed)) entry_t { |
144 | | index_t index : 8 * sizeof(index_t) - num_coefficient_bits; |
145 | | coefficient_t coefficient : num_coefficient_bits; |
146 | | entry_t(index_t _index, coefficient_t _coefficient) |
147 | | : index(_index), coefficient(_coefficient) {} |
148 | | entry_t(index_t _index) : index(_index), coefficient(0) {} |
149 | | entry_t() : index(0), coefficient(0) {} |
150 | | }; |
151 | | |
152 | | static_assert(sizeof(entry_t) == sizeof(index_t), "size of entry_t is not the same as index_t"); |
153 | | |
154 | | entry_t make_entry(index_t i, coefficient_t c) { return entry_t(i, c); } |
155 | | index_t get_index(const entry_t& e) { return e.index; } |
156 | | index_t get_coefficient(const entry_t& e) { return e.coefficient; } |
157 | | void set_coefficient(entry_t& e, const coefficient_t c) { e.coefficient = c; } |
158 | | |
159 | | std::ostream& operator<<(std::ostream& stream, const entry_t& e) { |
160 | | stream << get_index(e) << ":" << get_coefficient(e); |
161 | | return stream; |
162 | | } |
163 | | |
164 | | #else |
165 | | |
166 | | typedef index_t entry_t; |
167 | 0 | const index_t get_index(const entry_t& i) { return i; } |
168 | 0 | index_t get_coefficient(const entry_t& i) { return 1; } |
169 | 0 | entry_t make_entry(index_t _index, coefficient_t _value) { return entry_t(_index); } |
170 | 0 | void set_coefficient(entry_t& e, const coefficient_t c) {} |
171 | | |
172 | | #endif |
173 | | |
174 | 0 | const entry_t& get_entry(const entry_t& e) { return e; } |
175 | | |
176 | | typedef std::pair<value_t, index_t> diameter_index_t; |
177 | 0 | value_t get_diameter(const diameter_index_t& i) { return i.first; } |
178 | 0 | index_t get_index(const diameter_index_t& i) { return i.second; } |
179 | | |
180 | | typedef std::pair<index_t, value_t> index_diameter_t; |
181 | 0 | index_t get_index(const index_diameter_t& i) { return i.first; } |
182 | 0 | value_t get_diameter(const index_diameter_t& i) { return i.second; } |
183 | | |
184 | | struct diameter_entry_t : std::pair<value_t, entry_t> { |
185 | | using std::pair<value_t, entry_t>::pair; |
186 | | diameter_entry_t(value_t _diameter, index_t _index, coefficient_t _coefficient) |
187 | 0 | : diameter_entry_t(_diameter, make_entry(_index, _coefficient)) {} |
188 | | diameter_entry_t(const diameter_index_t& _diameter_index, coefficient_t _coefficient) |
189 | 0 | : diameter_entry_t(get_diameter(_diameter_index), |
190 | 0 | make_entry(get_index(_diameter_index), _coefficient)) {} |
191 | | diameter_entry_t(const diameter_index_t& _diameter_index) |
192 | 0 | : diameter_entry_t(get_diameter(_diameter_index), |
193 | 0 | make_entry(get_index(_diameter_index), 0)) {} |
194 | 0 | diameter_entry_t(const index_t& _index) : diameter_entry_t(0, _index, 0) {} |
195 | | }; |
196 | | |
197 | 0 | const entry_t& get_entry(const diameter_entry_t& p) { return p.second; } |
198 | 0 | entry_t& get_entry(diameter_entry_t& p) { return p.second; } |
199 | 0 | const index_t get_index(const diameter_entry_t& p) { return get_index(get_entry(p)); } |
200 | 0 | const coefficient_t get_coefficient(const diameter_entry_t& p) { |
201 | 0 | return get_coefficient(get_entry(p)); |
202 | 0 | } |
203 | 0 | const value_t& get_diameter(const diameter_entry_t& p) { return p.first; } |
204 | 0 | void set_coefficient(diameter_entry_t& p, const coefficient_t c) { |
205 | 0 | set_coefficient(get_entry(p), c); |
206 | 0 | } |
207 | | |
208 | | template <typename Entry> struct greater_diameter_or_smaller_index_comp { |
209 | 0 | bool operator()(const Entry& a, const Entry& b) { |
210 | 0 | return greater_diameter_or_smaller_index(a, b); |
211 | 0 | } |
212 | | }; |
213 | | |
214 | 0 | template <typename Entry> bool greater_diameter_or_smaller_index(const Entry& a, const Entry& b) { |
215 | 0 | return (get_diameter(a) > get_diameter(b)) || |
216 | 0 | ((get_diameter(a) == get_diameter(b)) && (get_index(a) < get_index(b))); |
217 | 0 | } Unexecuted instantiation: bool greater_diameter_or_smaller_index<std::__1::pair<float, long> >(std::__1::pair<float, long> const&, std::__1::pair<float, long> const&) Unexecuted instantiation: bool greater_diameter_or_smaller_index<diameter_entry_t>(diameter_entry_t const&, diameter_entry_t const&) |
218 | | |
219 | 0 | template <> void compressed_lower_distance_matrix::init_rows() { |
220 | 0 | value_t* pointer = &distances[0]; |
221 | 0 | for (size_t i = 1; i < size(); ++i) { |
222 | 0 | rows[i] = pointer; |
223 | 0 | pointer += i; |
224 | 0 | } |
225 | 0 | } |
226 | | |
227 | 0 | template <> void compressed_upper_distance_matrix::init_rows() { |
228 | 0 | value_t* pointer = &distances[0] - 1; |
229 | 0 | for (size_t i = 0; i < size() - 1; ++i) { |
230 | 0 | rows[i] = pointer; |
231 | 0 | pointer += size() - i - 2; |
232 | 0 | } |
233 | 0 | } |
234 | | |
235 | | template <> |
236 | 0 | value_t compressed_lower_distance_matrix::operator()(const index_t i, const index_t j) const { |
237 | 0 | return i == j ? 0 : i < j ? rows[j][i] : rows[i][j]; |
238 | 0 | } |
239 | | |
240 | | template <> |
241 | 0 | value_t compressed_upper_distance_matrix::operator()(const index_t i, const index_t j) const { |
242 | 0 | return i == j ? 0 : i > j ? rows[j][i] : rows[i][j]; |
243 | 0 | } |
244 | | |
245 | | struct sparse_distance_matrix { |
246 | | std::vector<std::vector<index_diameter_t>> neighbors; |
247 | | |
248 | | index_t num_edges; |
249 | | |
250 | | sparse_distance_matrix(std::vector<std::vector<index_diameter_t>>&& _neighbors, |
251 | | index_t _num_edges) |
252 | 0 | : neighbors(std::move(_neighbors)), num_edges(_num_edges) {} |
253 | | |
254 | | template <typename DistanceMatrix> |
255 | | sparse_distance_matrix(const DistanceMatrix& mat, const value_t threshold) |
256 | | : neighbors(mat.size()), num_edges(0) { |
257 | | |
258 | | for (size_t i = 0; i < size(); ++i) |
259 | | for (size_t j = 0; j < size(); ++j) |
260 | | if (i != j) { |
261 | | auto d = mat(i, j); |
262 | | if (d <= threshold) { |
263 | | ++num_edges; |
264 | | neighbors[i].push_back({j, d}); |
265 | | } |
266 | | } |
267 | | } |
268 | | |
269 | 0 | value_t operator()(const index_t i, const index_t j) const { |
270 | 0 | auto neighbor = |
271 | 0 | std::lower_bound(neighbors[i].begin(), neighbors[i].end(), index_diameter_t{j, 0}); |
272 | 0 | return (neighbor != neighbors[i].end() && get_index(*neighbor) == j) |
273 | 0 | ? get_diameter(*neighbor) |
274 | 0 | : std::numeric_limits<value_t>::infinity(); |
275 | 0 | } |
276 | | |
277 | 0 | size_t size() const { return neighbors.size(); } |
278 | | }; |
279 | | |
280 | | struct euclidean_distance_matrix { |
281 | | std::vector<std::vector<value_t>> points; |
282 | | |
283 | | euclidean_distance_matrix(std::vector<std::vector<value_t>>&& _points) |
284 | 0 | : points(std::move(_points)) { |
285 | 0 | for (auto p : points) { assert(p.size() == points.front().size()); } |
286 | 0 | } |
287 | | |
288 | 0 | value_t operator()(const index_t i, const index_t j) const { |
289 | 0 | assert(i < points.size()); |
290 | 0 | assert(j < points.size()); |
291 | 0 | return std::sqrt(std::inner_product( |
292 | 0 | points[i].begin(), points[i].end(), points[j].begin(), value_t(), std::plus<value_t>(), |
293 | 0 | [](value_t u, value_t v) { return (u - v) * (u - v); })); |
294 | 0 | } |
295 | | |
296 | 0 | size_t size() const { return points.size(); } |
297 | | }; |
298 | | |
299 | | class union_find { |
300 | | std::vector<index_t> parent; |
301 | | std::vector<uint8_t> rank; |
302 | | |
303 | | public: |
304 | 0 | union_find(const index_t n) : parent(n), rank(n, 0) { |
305 | 0 | for (index_t i = 0; i < n; ++i) parent[i] = i; |
306 | 0 | } |
307 | | |
308 | 0 | index_t find(index_t x) { |
309 | 0 | index_t y = x, z; |
310 | 0 | while ((z = parent[y]) != y) y = z; |
311 | 0 | while ((z = parent[x]) != y) { |
312 | 0 | parent[x] = y; |
313 | 0 | x = z; |
314 | 0 | } |
315 | 0 | return z; |
316 | 0 | } |
317 | | |
318 | 0 | void link(index_t x, index_t y) { |
319 | 0 | if ((x = find(x)) == (y = find(y))) return; |
320 | 0 | if (rank[x] > rank[y]) |
321 | 0 | parent[y] = x; |
322 | 0 | else { |
323 | 0 | parent[x] = y; |
324 | 0 | if (rank[x] == rank[y]) ++rank[y]; |
325 | 0 | } |
326 | 0 | } |
327 | | }; |
328 | | |
329 | 0 | template <typename T> T begin(std::pair<T, T>& p) { return p.first; } |
330 | 0 | template <typename T> T end(std::pair<T, T>& p) { return p.second; } |
331 | | |
332 | | template <typename ValueType> class compressed_sparse_matrix { |
333 | | std::vector<size_t> bounds; |
334 | | std::vector<ValueType> entries; |
335 | | |
336 | | typedef typename std::vector<ValueType>::iterator iterator; |
337 | | typedef std::pair<iterator, iterator> iterator_pair; |
338 | | |
339 | | public: |
340 | 0 | size_t size() const { return bounds.size(); } |
341 | | |
342 | 0 | iterator_pair subrange(const index_t index) { |
343 | 0 | return {entries.begin() + (index == 0 ? 0 : bounds[index - 1]), |
344 | 0 | entries.begin() + bounds[index]}; |
345 | 0 | } |
346 | | |
347 | 0 | void append_column() { bounds.push_back(entries.size()); } |
348 | | |
349 | 0 | void push_back(const ValueType e) { |
350 | 0 | assert(0 < size()); |
351 | 0 | entries.push_back(e); |
352 | 0 | ++bounds.back(); |
353 | 0 | } |
354 | | }; |
355 | | |
356 | | template <class Predicate> |
357 | 0 | index_t get_max(index_t top, const index_t bottom, const Predicate pred) { |
358 | 0 | if (!pred(top)) { |
359 | 0 | index_t count = top - bottom; |
360 | 0 | while (count > 0) { |
361 | 0 | index_t step = count >> 1, mid = top - step; |
362 | 0 | if (!pred(mid)) { |
363 | 0 | top = mid - 1; |
364 | 0 | count -= step + 1; |
365 | 0 | } else |
366 | 0 | count = step; |
367 | 0 | } |
368 | 0 | } |
369 | 0 | return top; |
370 | 0 | } Unexecuted instantiation: long get_max<ripser<compressed_distance_matrix<(compressed_matrix_layout)0> >::get_max_vertex(long, long, long) const::{lambda(long)#1}>(long, long, ripser<compressed_distance_matrix<(compressed_matrix_layout)0> >::get_max_vertex(long, long, long) const::{lambda(long)#1})Unexecuted instantiation: long get_max<ripser<sparse_distance_matrix>::get_max_vertex(long, long, long) const::{lambda(long)#1}>(long, long, ripser<sparse_distance_matrix>::get_max_vertex(long, long, long) const::{lambda(long)#1}) |
371 | | |
372 | | template <typename DistanceMatrix> class ripser { |
373 | | const DistanceMatrix dist; |
374 | | const index_t n, dim_max; |
375 | | const value_t threshold; |
376 | | const float ratio; |
377 | | const coefficient_t modulus; |
378 | | const binomial_coeff_table binomial_coeff; |
379 | | const std::vector<coefficient_t> multiplicative_inverse; |
380 | | mutable std::vector<diameter_entry_t> cofacet_entries; |
381 | | mutable std::vector<index_t> vertices; |
382 | | interval_recorder recorder_; |
383 | | |
384 | | struct entry_hash { |
385 | 0 | std::size_t operator()(const entry_t& e) const { return hash<index_t>()(::get_index(e)); } |
386 | | }; |
387 | | |
388 | | struct equal_index { |
389 | 0 | bool operator()(const entry_t& e, const entry_t& f) const { |
390 | 0 | return ::get_index(e) == ::get_index(f); |
391 | 0 | } |
392 | | }; |
393 | | |
394 | | typedef hash_map<entry_t, size_t, entry_hash, equal_index> entry_hash_map; |
395 | | |
396 | | public: |
397 | | ripser(DistanceMatrix&& _dist, index_t _dim_max, value_t _threshold, float _ratio, |
398 | | coefficient_t _modulus, interval_recorder recorder = {}) |
399 | 0 | : dist(std::move(_dist)), n(dist.size()), |
400 | 0 | dim_max(std::min(_dim_max, index_t(dist.size() - 2))), threshold(_threshold), |
401 | 0 | ratio(_ratio), modulus(_modulus), binomial_coeff(n, dim_max + 2), |
402 | 0 | multiplicative_inverse(multiplicative_inverse_vector(_modulus)), |
403 | 0 | recorder_(recorder) {} |
404 | | |
405 | 0 | index_t get_max_vertex(const index_t idx, const index_t k, const index_t n) const { |
406 | 0 | return get_max(n, k - 1, [&](index_t w) -> bool { return (binomial_coeff(w, k) <= idx); }); |
407 | 0 | } Unexecuted instantiation: ripser<compressed_distance_matrix<(compressed_matrix_layout)0> >::get_max_vertex(long, long, long) const Unexecuted instantiation: ripser<sparse_distance_matrix>::get_max_vertex(long, long, long) const |
408 | | |
409 | 0 | index_t get_edge_index(const index_t i, const index_t j) const { |
410 | 0 | return binomial_coeff(i, 2) + j; |
411 | 0 | } |
412 | | |
413 | | template <typename OutputIterator> |
414 | | OutputIterator get_simplex_vertices(index_t idx, const index_t dim, index_t n, |
415 | 0 | OutputIterator out) const { |
416 | 0 | --n; |
417 | 0 | for (index_t k = dim + 1; k > 0; --k) { |
418 | 0 | n = get_max_vertex(idx, k, n); |
419 | 0 | *out++ = n; |
420 | 0 | idx -= binomial_coeff(n, k); |
421 | 0 | } |
422 | 0 | return out; |
423 | 0 | } Unexecuted instantiation: std::__1::reverse_iterator<std::__1::__wrap_iter<long*> > ripser<compressed_distance_matrix<(compressed_matrix_layout)0> >::get_simplex_vertices<std::__1::reverse_iterator<std::__1::__wrap_iter<long*> > >(long, long, long, std::__1::reverse_iterator<std::__1::__wrap_iter<long*> >) const Unexecuted instantiation: std::__1::reverse_iterator<std::__1::__wrap_iter<long*> > ripser<sparse_distance_matrix>::get_simplex_vertices<std::__1::reverse_iterator<std::__1::__wrap_iter<long*> > >(long, long, long, std::__1::reverse_iterator<std::__1::__wrap_iter<long*> >) const |
424 | | |
425 | 0 | value_t compute_diameter(const index_t index, const index_t dim) const { |
426 | 0 | value_t diam = -std::numeric_limits<value_t>::infinity(); |
427 | |
|
428 | 0 | vertices.resize(dim + 1); |
429 | 0 | get_simplex_vertices(index, dim, dist.size(), vertices.rbegin()); |
430 | |
|
431 | 0 | for (index_t i = 0; i <= dim; ++i) |
432 | 0 | for (index_t j = 0; j < i; ++j) { |
433 | 0 | diam = std::max(diam, dist(vertices[i], vertices[j])); |
434 | 0 | } |
435 | 0 | return diam; |
436 | 0 | } |
437 | | |
438 | | class simplex_coboundary_enumerator; |
439 | | |
440 | | class simplex_boundary_enumerator { |
441 | | private: |
442 | | index_t idx_below, idx_above, j, k; |
443 | | diameter_entry_t simplex; |
444 | | index_t dim; |
445 | | const coefficient_t modulus; |
446 | | const binomial_coeff_table& binomial_coeff; |
447 | | const ripser& parent; |
448 | | |
449 | | public: |
450 | | simplex_boundary_enumerator(const diameter_entry_t _simplex, const index_t _dim, |
451 | | const ripser& _parent) |
452 | 0 | : idx_below(get_index(_simplex)), idx_above(0), j(_parent.n - 1), k(_dim), |
453 | 0 | simplex(_simplex), modulus(_parent.modulus), binomial_coeff(_parent.binomial_coeff), |
454 | 0 | parent(_parent) {} |
455 | | |
456 | | simplex_boundary_enumerator(const index_t _dim, const ripser& _parent) |
457 | 0 | : simplex_boundary_enumerator(-1, _dim, _parent) {} |
458 | | |
459 | 0 | void set_simplex(const diameter_entry_t _simplex, const index_t _dim) { |
460 | 0 | idx_below = get_index(_simplex); |
461 | 0 | idx_above = 0; |
462 | 0 | j = parent.n - 1; |
463 | 0 | k = _dim; |
464 | 0 | simplex = _simplex; |
465 | 0 | dim = _dim; |
466 | 0 | } |
467 | | |
468 | 0 | bool has_next() { return (k >= 0); } |
469 | | |
470 | 0 | diameter_entry_t next() { |
471 | 0 | j = parent.get_max_vertex(idx_below, k + 1, j); |
472 | |
|
473 | 0 | index_t face_index = idx_above - binomial_coeff(j, k + 1) + idx_below; |
474 | |
|
475 | 0 | value_t face_diameter = parent.compute_diameter(face_index, dim - 1); |
476 | |
|
477 | 0 | coefficient_t face_coefficient = |
478 | 0 | (k & 1 ? -1 + modulus : 1) * get_coefficient(simplex) % modulus; |
479 | |
|
480 | 0 | idx_below -= binomial_coeff(j, k + 1); |
481 | 0 | idx_above += binomial_coeff(j, k); |
482 | |
|
483 | 0 | --k; |
484 | |
|
485 | 0 | return diameter_entry_t(face_diameter, face_index, face_coefficient); |
486 | 0 | } |
487 | | }; |
488 | | |
489 | 0 | diameter_entry_t get_zero_pivot_facet(const diameter_entry_t simplex, const index_t dim) { |
490 | 0 | static simplex_boundary_enumerator facets(0, *this); |
491 | 0 | facets.set_simplex(simplex, dim); |
492 | 0 | while (facets.has_next()) { |
493 | 0 | diameter_entry_t facet = facets.next(); |
494 | 0 | if (get_diameter(facet) == get_diameter(simplex)) return facet; |
495 | 0 | } |
496 | 0 | return diameter_entry_t(-1); |
497 | 0 | } |
498 | | |
499 | 0 | diameter_entry_t get_zero_pivot_cofacet(const diameter_entry_t simplex, const index_t dim) { |
500 | 0 | static simplex_coboundary_enumerator cofacets(*this); |
501 | 0 | cofacets.set_simplex(simplex, dim); |
502 | 0 | while (cofacets.has_next()) { |
503 | 0 | diameter_entry_t cofacet = cofacets.next(); |
504 | 0 | if (get_diameter(cofacet) == get_diameter(simplex)) return cofacet; |
505 | 0 | } |
506 | 0 | return diameter_entry_t(-1); |
507 | 0 | } |
508 | | |
509 | 0 | diameter_entry_t get_zero_apparent_facet(const diameter_entry_t simplex, const index_t dim) { |
510 | 0 | diameter_entry_t facet = get_zero_pivot_facet(simplex, dim); |
511 | 0 | return ((get_index(facet) != -1) && |
512 | 0 | (get_index(get_zero_pivot_cofacet(facet, dim - 1)) == get_index(simplex))) |
513 | 0 | ? facet |
514 | 0 | : diameter_entry_t(-1); |
515 | 0 | } |
516 | | |
517 | 0 | diameter_entry_t get_zero_apparent_cofacet(const diameter_entry_t simplex, const index_t dim) { |
518 | 0 | diameter_entry_t cofacet = get_zero_pivot_cofacet(simplex, dim); |
519 | 0 | return ((get_index(cofacet) != -1) && |
520 | 0 | (get_index(get_zero_pivot_facet(cofacet, dim + 1)) == get_index(simplex))) |
521 | 0 | ? cofacet |
522 | 0 | : diameter_entry_t(-1); |
523 | 0 | } |
524 | | |
525 | 0 | bool is_in_zero_apparent_pair(const diameter_entry_t simplex, const index_t dim) { |
526 | 0 | return (get_index(get_zero_apparent_cofacet(simplex, dim)) != -1) || |
527 | 0 | (get_index(get_zero_apparent_facet(simplex, dim)) != -1); |
528 | 0 | } |
529 | | |
530 | | void assemble_columns_to_reduce(std::vector<diameter_index_t>& simplices, |
531 | | std::vector<diameter_index_t>& columns_to_reduce, |
532 | 0 | entry_hash_map& pivot_column_index, index_t dim) { |
533 | |
|
534 | | #ifdef INDICATE_PROGRESS |
535 | | std::cerr << clear_line << "assembling columns" << std::flush; |
536 | | std::chrono::steady_clock::time_point next = std::chrono::steady_clock::now() + time_step; |
537 | | #endif |
538 | |
|
539 | 0 | columns_to_reduce.clear(); |
540 | 0 | std::vector<diameter_index_t> next_simplices; |
541 | |
|
542 | 0 | simplex_coboundary_enumerator cofacets(*this); |
543 | |
|
544 | 0 | for (diameter_index_t& simplex : simplices) { |
545 | 0 | cofacets.set_simplex(diameter_entry_t(simplex, 1), dim - 1); |
546 | |
|
547 | 0 | while (cofacets.has_next(false)) { |
548 | | #ifdef INDICATE_PROGRESS |
549 | | if (std::chrono::steady_clock::now() > next) { |
550 | | std::cerr << clear_line << "assembling " << next_simplices.size() |
551 | | << " columns (processing " << std::distance(&simplices[0], &simplex) |
552 | | << "/" << simplices.size() << " simplices)" << std::flush; |
553 | | next = std::chrono::steady_clock::now() + time_step; |
554 | | } |
555 | | #endif |
556 | 0 | auto cofacet = cofacets.next(); |
557 | 0 | if (get_diameter(cofacet) <= threshold) { |
558 | 0 | next_simplices.push_back({get_diameter(cofacet), get_index(cofacet)}); |
559 | 0 | if (!is_in_zero_apparent_pair(cofacet, dim) && |
560 | 0 | (pivot_column_index.find(get_entry(cofacet)) == pivot_column_index.end())) |
561 | 0 | columns_to_reduce.push_back({get_diameter(cofacet), get_index(cofacet)}); |
562 | 0 | } |
563 | 0 | } |
564 | 0 | } |
565 | |
|
566 | 0 | simplices.swap(next_simplices); |
567 | |
|
568 | | #ifdef INDICATE_PROGRESS |
569 | | std::cerr << clear_line << "sorting " << columns_to_reduce.size() << " columns" |
570 | | << std::flush; |
571 | | #endif |
572 | |
|
573 | 0 | std::sort(columns_to_reduce.begin(), columns_to_reduce.end(), |
574 | 0 | greater_diameter_or_smaller_index<diameter_index_t>); |
575 | | #ifdef INDICATE_PROGRESS |
576 | | std::cerr << clear_line << std::flush; |
577 | | #endif |
578 | 0 | } |
579 | | |
580 | | void compute_dim_0_pairs(std::vector<diameter_index_t>& edges, |
581 | 0 | std::vector<diameter_index_t>& columns_to_reduce) { |
582 | | #ifdef PRINT_PERSISTENCE_PAIRS |
583 | | std::cout << "persistence intervals in dim 0:" << std::endl; |
584 | | #endif |
585 | |
|
586 | 0 | union_find dset(n); |
587 | |
|
588 | 0 | edges = get_edges(); |
589 | 0 | std::sort(edges.rbegin(), edges.rend(), |
590 | 0 | greater_diameter_or_smaller_index<diameter_index_t>); |
591 | 0 | std::vector<index_t> vertices_of_edge(2); |
592 | 0 | for (auto e : edges) { |
593 | 0 | get_simplex_vertices(get_index(e), 1, n, vertices_of_edge.rbegin()); |
594 | 0 | index_t u = dset.find(vertices_of_edge[0]), v = dset.find(vertices_of_edge[1]); |
595 | |
|
596 | 0 | if (u != v) { |
597 | 0 | value_t birth = 0.0f; |
598 | 0 | value_t death = get_diameter(e); |
599 | |
|
600 | 0 | if (death != 0) { |
601 | | #ifdef PRINT_PERSISTENCE_PAIRS |
602 | | std::cout << " [" << birth << "," << death << ")" << std::endl; |
603 | | #endif |
604 | 0 | recorder_.emit(/*dim=*/0, birth, death); |
605 | 0 | } |
606 | 0 | dset.link(u, v); |
607 | 0 | } else if (get_index(get_zero_apparent_cofacet(e, 1)) == -1) |
608 | 0 | columns_to_reduce.push_back(e); |
609 | 0 | } |
610 | 0 | std::reverse(columns_to_reduce.begin(), columns_to_reduce.end()); |
611 | |
|
612 | | #ifdef PRINT_PERSISTENCE_PAIRS |
613 | | for (index_t i = 0; i < n; ++i) |
614 | | if (dset.find(i) == i) std::cout << " [0, )" << std::endl; |
615 | | #endif |
616 | 0 | for (index_t i = 0; i < n; ++i) { |
617 | 0 | if (dset.find(i) == i) { |
618 | 0 | recorder_.emit(/*dim=*/0, /*birth=*/0.0f, /*death=*/-1.0f); |
619 | 0 | } |
620 | 0 | } |
621 | 0 | } |
622 | | |
623 | 0 | template <typename Column> diameter_entry_t pop_pivot(Column& column) { |
624 | 0 | diameter_entry_t pivot(-1); |
625 | | #ifdef USE_COEFFICIENTS |
626 | | while (!column.empty()) { |
627 | | if (get_coefficient(pivot) == 0) |
628 | | pivot = column.top(); |
629 | | else if (get_index(column.top()) != get_index(pivot)) |
630 | | return pivot; |
631 | | else |
632 | | set_coefficient(pivot, |
633 | | (get_coefficient(pivot) + get_coefficient(column.top())) % modulus); |
634 | | column.pop(); |
635 | | } |
636 | | return (get_coefficient(pivot) == 0) ? -1 : pivot; |
637 | | #else |
638 | 0 | while (!column.empty()) { |
639 | 0 | pivot = column.top(); |
640 | 0 | column.pop(); |
641 | 0 | if (column.empty() || get_index(column.top()) != get_index(pivot)) return pivot; |
642 | 0 | column.pop(); |
643 | 0 | } |
644 | 0 | return -1; |
645 | 0 | #endif |
646 | 0 | } |
647 | | |
648 | 0 | template <typename Column> diameter_entry_t get_pivot(Column& column) { |
649 | 0 | diameter_entry_t result = pop_pivot(column); |
650 | 0 | if (get_index(result) != -1) column.push(result); |
651 | 0 | return result; |
652 | 0 | } |
653 | | |
654 | | template <typename Column> |
655 | | diameter_entry_t init_coboundary_and_get_pivot(const diameter_entry_t simplex, |
656 | | Column& working_coboundary, const index_t& dim, |
657 | 0 | entry_hash_map& pivot_column_index) { |
658 | 0 | static simplex_coboundary_enumerator cofacets(*this); |
659 | 0 | bool check_for_emergent_pair = true; |
660 | 0 | cofacet_entries.clear(); |
661 | 0 | cofacets.set_simplex(simplex, dim); |
662 | 0 | while (cofacets.has_next()) { |
663 | 0 | diameter_entry_t cofacet = cofacets.next(); |
664 | 0 | if (get_diameter(cofacet) <= threshold) { |
665 | 0 | cofacet_entries.push_back(cofacet); |
666 | 0 | if (check_for_emergent_pair && (get_diameter(simplex) == get_diameter(cofacet))) { |
667 | 0 | if ((pivot_column_index.find(get_entry(cofacet)) == pivot_column_index.end()) && |
668 | 0 | (get_index(get_zero_apparent_facet(cofacet, dim + 1)) == -1)) |
669 | 0 | return cofacet; |
670 | 0 | check_for_emergent_pair = false; |
671 | 0 | } |
672 | 0 | } |
673 | 0 | } |
674 | 0 | for (auto cofacet : cofacet_entries) working_coboundary.push(cofacet); |
675 | 0 | return get_pivot(working_coboundary); |
676 | 0 | } |
677 | | |
678 | | template <typename Column> |
679 | | void add_simplex_coboundary(const diameter_entry_t simplex, const index_t& dim, |
680 | 0 | Column& working_reduction_column, Column& working_coboundary) { |
681 | 0 | static simplex_coboundary_enumerator cofacets(*this); |
682 | 0 | working_reduction_column.push(simplex); |
683 | 0 | cofacets.set_simplex(simplex, dim); |
684 | 0 | while (cofacets.has_next()) { |
685 | 0 | diameter_entry_t cofacet = cofacets.next(); |
686 | 0 | if (get_diameter(cofacet) <= threshold) working_coboundary.push(cofacet); |
687 | 0 | } |
688 | 0 | } |
689 | | |
690 | | template <typename Column> |
691 | | void add_coboundary(compressed_sparse_matrix<diameter_entry_t>& reduction_matrix, |
692 | | const std::vector<diameter_index_t>& columns_to_reduce, |
693 | | const size_t index_column_to_add, const coefficient_t factor, |
694 | | const size_t& dim, Column& working_reduction_column, |
695 | 0 | Column& working_coboundary) { |
696 | 0 | diameter_entry_t column_to_add(columns_to_reduce[index_column_to_add], factor); |
697 | 0 | add_simplex_coboundary(column_to_add, dim, working_reduction_column, working_coboundary); |
698 | |
|
699 | 0 | for (diameter_entry_t simplex : reduction_matrix.subrange(index_column_to_add)) { |
700 | 0 | set_coefficient(simplex, get_coefficient(simplex) * factor % modulus); |
701 | 0 | add_simplex_coboundary(simplex, dim, working_reduction_column, working_coboundary); |
702 | 0 | } |
703 | 0 | } |
704 | | |
705 | | void compute_pairs(const std::vector<diameter_index_t>& columns_to_reduce, |
706 | 0 | entry_hash_map& pivot_column_index, const index_t dim) { |
707 | |
|
708 | | #ifdef PRINT_PERSISTENCE_PAIRS |
709 | | std::cout << "persistence intervals in dim " << dim << ":" << std::endl; |
710 | | #endif |
711 | |
|
712 | 0 | compressed_sparse_matrix<diameter_entry_t> reduction_matrix; |
713 | |
|
714 | | #ifdef INDICATE_PROGRESS |
715 | | std::chrono::steady_clock::time_point next = std::chrono::steady_clock::now() + time_step; |
716 | | #endif |
717 | 0 | for (size_t index_column_to_reduce = 0; index_column_to_reduce < columns_to_reduce.size(); |
718 | 0 | ++index_column_to_reduce) { |
719 | |
|
720 | 0 | diameter_entry_t column_to_reduce(columns_to_reduce[index_column_to_reduce], 1); |
721 | 0 | value_t diameter = get_diameter(column_to_reduce); |
722 | |
|
723 | 0 | reduction_matrix.append_column(); |
724 | |
|
725 | 0 | std::priority_queue<diameter_entry_t, std::vector<diameter_entry_t>, |
726 | 0 | greater_diameter_or_smaller_index_comp<diameter_entry_t>> |
727 | 0 | working_reduction_column, working_coboundary; |
728 | |
|
729 | 0 | diameter_entry_t e, pivot = init_coboundary_and_get_pivot( |
730 | 0 | column_to_reduce, working_coboundary, dim, pivot_column_index); |
731 | |
|
732 | 0 | while (true) { |
733 | | #ifdef INDICATE_PROGRESS |
734 | | if (std::chrono::steady_clock::now() > next) { |
735 | | std::cerr << clear_line << "reducing column " << index_column_to_reduce + 1 |
736 | | << "/" << columns_to_reduce.size() << " (diameter " << diameter << ")" |
737 | | << std::flush; |
738 | | next = std::chrono::steady_clock::now() + time_step; |
739 | | } |
740 | | #endif |
741 | 0 | if (get_index(pivot) != -1) { |
742 | 0 | auto pair = pivot_column_index.find(get_entry(pivot)); |
743 | 0 | if (pair != pivot_column_index.end()) { |
744 | 0 | entry_t other_pivot = pair->first; |
745 | 0 | index_t index_column_to_add = pair->second; |
746 | 0 | coefficient_t factor = |
747 | 0 | modulus - get_coefficient(pivot) * |
748 | 0 | multiplicative_inverse[get_coefficient(other_pivot)] % |
749 | 0 | modulus; |
750 | |
|
751 | 0 | add_coboundary(reduction_matrix, columns_to_reduce, index_column_to_add, |
752 | 0 | factor, dim, working_reduction_column, working_coboundary); |
753 | |
|
754 | 0 | pivot = get_pivot(working_coboundary); |
755 | 0 | } else if (get_index(e = get_zero_apparent_facet(pivot, dim + 1)) != -1) { |
756 | 0 | set_coefficient(e, modulus - get_coefficient(e)); |
757 | |
|
758 | 0 | add_simplex_coboundary(e, dim, working_reduction_column, working_coboundary); |
759 | |
|
760 | 0 | pivot = get_pivot(working_coboundary); |
761 | 0 | } else { |
762 | 0 | value_t death = get_diameter(pivot); |
763 | 0 | if (death > diameter * ratio) { |
764 | | #ifdef PRINT_PERSISTENCE_PAIRS |
765 | | #ifdef INDICATE_PROGRESS |
766 | | std::cerr << clear_line << std::flush; |
767 | | #endif |
768 | | std::cout << " [" << diameter << "," << death << ")" << std::endl; |
769 | | #endif |
770 | 0 | recorder_.emit(/*dim=*/dim, diameter, death); |
771 | 0 | } |
772 | 0 | pivot_column_index.insert({get_entry(pivot), index_column_to_reduce}); |
773 | |
|
774 | 0 | while (true) { |
775 | 0 | diameter_entry_t e = pop_pivot(working_reduction_column); |
776 | 0 | if (get_index(e) == -1) break; |
777 | 0 | assert(get_coefficient(e) > 0); |
778 | 0 | reduction_matrix.push_back(e); |
779 | 0 | } |
780 | 0 | break; |
781 | 0 | } |
782 | 0 | } else { |
783 | | #ifdef PRINT_PERSISTENCE_PAIRS |
784 | | #ifdef INDICATE_PROGRESS |
785 | | std::cerr << clear_line << std::flush; |
786 | | #endif |
787 | | std::cout << " [" << diameter << ", )" << std::endl; |
788 | | #endif |
789 | 0 | recorder_.emit(/*dim=*/dim, diameter, /*death=*/-1.0f); |
790 | 0 | break; |
791 | 0 | } |
792 | 0 | } |
793 | 0 | } |
794 | | #ifdef INDICATE_PROGRESS |
795 | | std::cerr << clear_line << std::flush; |
796 | | #endif |
797 | 0 | } |
798 | | |
799 | | std::vector<diameter_index_t> get_edges(); |
800 | | |
801 | 0 | void compute_barcodes() { |
802 | 0 | std::vector<diameter_index_t> simplices, columns_to_reduce; |
803 | |
|
804 | 0 | compute_dim_0_pairs(simplices, columns_to_reduce); |
805 | |
|
806 | 0 | for (index_t dim = 1; dim <= dim_max; ++dim) { |
807 | 0 | entry_hash_map pivot_column_index; |
808 | 0 | pivot_column_index.reserve(columns_to_reduce.size()); |
809 | |
|
810 | 0 | compute_pairs(columns_to_reduce, pivot_column_index, dim); |
811 | |
|
812 | 0 | if (dim < dim_max) |
813 | 0 | assemble_columns_to_reduce(simplices, columns_to_reduce, pivot_column_index, |
814 | 0 | dim + 1); |
815 | 0 | } |
816 | 0 | } |
817 | | }; |
818 | | |
819 | | template <> class ripser<compressed_lower_distance_matrix>::simplex_coboundary_enumerator { |
820 | | index_t idx_below, idx_above, j, k; |
821 | | std::vector<index_t> vertices; |
822 | | diameter_entry_t simplex; |
823 | | const coefficient_t modulus; |
824 | | const compressed_lower_distance_matrix& dist; |
825 | | const binomial_coeff_table& binomial_coeff; |
826 | | const ripser<compressed_lower_distance_matrix>& parent; |
827 | | |
828 | | public: |
829 | | simplex_coboundary_enumerator(const diameter_entry_t _simplex, const index_t _dim, |
830 | | const ripser<compressed_lower_distance_matrix>& _parent) |
831 | | : modulus(_parent.modulus), dist(_parent.dist), |
832 | 0 | binomial_coeff(_parent.binomial_coeff), parent(_parent) { |
833 | 0 | if (get_index(_simplex) != -1) |
834 | 0 | parent.get_simplex_vertices(get_index(_simplex), _dim, parent.n, vertices.rbegin()); |
835 | 0 | } |
836 | | |
837 | | simplex_coboundary_enumerator(const ripser<compressed_lower_distance_matrix>& _parent) |
838 | 0 | : modulus(_parent.modulus), dist(_parent.dist), |
839 | 0 | binomial_coeff(_parent.binomial_coeff), parent(_parent) {} |
840 | | |
841 | 0 | void set_simplex(const diameter_entry_t _simplex, const index_t _dim) { |
842 | 0 | idx_below = get_index(_simplex); |
843 | 0 | idx_above = 0; |
844 | 0 | j = parent.n - 1; |
845 | 0 | k = _dim + 1; |
846 | 0 | simplex = _simplex; |
847 | 0 | vertices.resize(_dim + 1); |
848 | 0 | parent.get_simplex_vertices(get_index(_simplex), _dim, parent.n, vertices.rbegin()); |
849 | 0 | } |
850 | | |
851 | 0 | bool has_next(bool all_cofacets = true) { |
852 | 0 | return (j >= k && (all_cofacets || binomial_coeff(j, k) > idx_below)); |
853 | 0 | } |
854 | | |
855 | 0 | diameter_entry_t next() { |
856 | 0 | while ((binomial_coeff(j, k) <= idx_below)) { |
857 | 0 | idx_below -= binomial_coeff(j, k); |
858 | 0 | idx_above += binomial_coeff(j, k + 1); |
859 | 0 | --j; |
860 | 0 | --k; |
861 | 0 | assert(k != -1); |
862 | 0 | } |
863 | 0 | value_t cofacet_diameter = get_diameter(simplex); |
864 | 0 | for (index_t i : vertices) cofacet_diameter = std::max(cofacet_diameter, dist(j, i)); |
865 | 0 | index_t cofacet_index = idx_above + binomial_coeff(j--, k + 1) + idx_below; |
866 | 0 | coefficient_t cofacet_coefficient = |
867 | 0 | (k & 1 ? modulus - 1 : 1) * get_coefficient(simplex) % modulus; |
868 | 0 | return diameter_entry_t(cofacet_diameter, cofacet_index, cofacet_coefficient); |
869 | 0 | } |
870 | | }; |
871 | | |
872 | | template <> class ripser<sparse_distance_matrix>::simplex_coboundary_enumerator { |
873 | | index_t idx_below, idx_above, k; |
874 | | std::vector<index_t> vertices; |
875 | | diameter_entry_t simplex; |
876 | | const coefficient_t modulus; |
877 | | const sparse_distance_matrix& dist; |
878 | | const binomial_coeff_table& binomial_coeff; |
879 | | std::vector<std::vector<index_diameter_t>::const_reverse_iterator> neighbor_it; |
880 | | std::vector<std::vector<index_diameter_t>::const_reverse_iterator> neighbor_end; |
881 | | index_diameter_t neighbor; |
882 | | const ripser<sparse_distance_matrix>& parent; |
883 | | |
884 | | public: |
885 | | simplex_coboundary_enumerator(const diameter_entry_t _simplex, const index_t _dim, |
886 | | const ripser<sparse_distance_matrix>& _parent) |
887 | | : modulus(_parent.modulus), dist(_parent.dist), |
888 | 0 | binomial_coeff(_parent.binomial_coeff), parent(_parent) { |
889 | 0 | if (get_index(_simplex) != -1) set_simplex(_simplex, _dim); |
890 | 0 | } |
891 | | |
892 | | simplex_coboundary_enumerator(const ripser<sparse_distance_matrix>& _parent) |
893 | | : modulus(_parent.modulus), dist(_parent.dist), |
894 | 0 | binomial_coeff(_parent.binomial_coeff), parent(_parent) {} |
895 | | |
896 | 0 | void set_simplex(const diameter_entry_t _simplex, const index_t _dim) { |
897 | 0 | idx_below = get_index(_simplex); |
898 | 0 | idx_above = 0; |
899 | 0 | k = _dim + 1; |
900 | 0 | simplex = _simplex; |
901 | 0 | vertices.resize(_dim + 1); |
902 | 0 | parent.get_simplex_vertices(idx_below, _dim, parent.n, vertices.rbegin()); |
903 | 0 |
|
904 | 0 | neighbor_it.resize(_dim + 1); |
905 | 0 | neighbor_end.resize(_dim + 1); |
906 | 0 | for (index_t i = 0; i <= _dim; ++i) { |
907 | 0 | auto v = vertices[i]; |
908 | 0 | neighbor_it[i] = dist.neighbors[v].rbegin(); |
909 | 0 | neighbor_end[i] = dist.neighbors[v].rend(); |
910 | 0 | } |
911 | 0 | } |
912 | | |
913 | 0 | bool has_next(bool all_cofacets = true) { |
914 | 0 | for (auto &it0 = neighbor_it[0], &end0 = neighbor_end[0]; it0 != end0; ++it0) { |
915 | 0 | neighbor = *it0; |
916 | 0 | for (size_t idx = 1; idx < neighbor_it.size(); ++idx) { |
917 | 0 | auto &it = neighbor_it[idx], end = neighbor_end[idx]; |
918 | 0 | while (get_index(*it) > get_index(neighbor)) |
919 | 0 | if (++it == end) return false; |
920 | 0 | if (get_index(*it) != get_index(neighbor)) |
921 | 0 | goto continue_outer; |
922 | 0 | else |
923 | 0 | neighbor = std::max(neighbor, *it); |
924 | 0 | } |
925 | 0 | while (k > 0 && vertices[k - 1] > get_index(neighbor)) { |
926 | 0 | if (!all_cofacets) return false; |
927 | 0 | idx_below -= binomial_coeff(vertices[k - 1], k); |
928 | 0 | idx_above += binomial_coeff(vertices[k - 1], k + 1); |
929 | 0 | --k; |
930 | 0 | } |
931 | 0 | return true; |
932 | 0 | continue_outer:; |
933 | 0 | } |
934 | 0 | return false; |
935 | 0 | } |
936 | | |
937 | 0 | diameter_entry_t next() { |
938 | 0 | ++neighbor_it[0]; |
939 | 0 | value_t cofacet_diameter = std::max(get_diameter(simplex), get_diameter(neighbor)); |
940 | 0 | index_t cofacet_index = idx_above + binomial_coeff(get_index(neighbor), k + 1) + idx_below; |
941 | 0 | coefficient_t cofacet_coefficient = |
942 | 0 | (k & 1 ? modulus - 1 : 1) * get_coefficient(simplex) % modulus; |
943 | 0 | return diameter_entry_t(cofacet_diameter, cofacet_index, cofacet_coefficient); |
944 | 0 | } |
945 | | }; |
946 | | |
947 | 0 | template <> std::vector<diameter_index_t> ripser<compressed_lower_distance_matrix>::get_edges() { |
948 | 0 | std::vector<diameter_index_t> edges; |
949 | 0 | std::vector<index_t> vertices(2); |
950 | 0 | for (index_t index = binomial_coeff(n, 2); index-- > 0;) { |
951 | 0 | get_simplex_vertices(index, 1, dist.size(), vertices.rbegin()); |
952 | 0 | value_t length = dist(vertices[0], vertices[1]); |
953 | 0 | if (length <= threshold) edges.push_back({length, index}); |
954 | 0 | } |
955 | 0 | return edges; |
956 | 0 | } |
957 | | |
958 | 0 | template <> std::vector<diameter_index_t> ripser<sparse_distance_matrix>::get_edges() { |
959 | 0 | std::vector<diameter_index_t> edges; |
960 | 0 | for (index_t i = 0; i < n; ++i) |
961 | 0 | for (auto n : dist.neighbors[i]) { |
962 | 0 | index_t j = get_index(n); |
963 | 0 | if (i > j) edges.push_back({get_diameter(n), get_edge_index(i, j)}); |
964 | 0 | } |
965 | 0 | return edges; |
966 | 0 | } |
967 | | |
968 | | void ripser_run_from_compressed_lower( |
969 | | compressed_lower_distance_matrix &&dist, |
970 | | index_t dim_max, |
971 | | value_t threshold, |
972 | | float ratio, |
973 | | interval_recorder recorder) |
974 | 0 | { |
975 | 0 | coefficient_t modulus = 2; // Z/2Z |
976 | |
|
977 | 0 | ripser<compressed_lower_distance_matrix> r( |
978 | 0 | std::move(dist), |
979 | 0 | dim_max, |
980 | 0 | threshold, |
981 | 0 | ratio, |
982 | 0 | modulus, |
983 | 0 | recorder); |
984 | |
|
985 | 0 | r.compute_barcodes(); |
986 | 0 | } |
987 | | |
988 | | /* The below code is only used for the original ripser implementation |
989 | | * to create executabels. */ |
990 | | #ifdef RIPSEREXE |
991 | | |
992 | | enum file_format { |
993 | | LOWER_DISTANCE_MATRIX, |
994 | | UPPER_DISTANCE_MATRIX, |
995 | | DISTANCE_MATRIX, |
996 | | POINT_CLOUD, |
997 | | DIPHA, |
998 | | SPARSE, |
999 | | BINARY |
1000 | | }; |
1001 | | |
1002 | | static const uint16_t endian_check(0xff00); |
1003 | | static const bool is_big_endian = *reinterpret_cast<const uint8_t*>(&endian_check); |
1004 | | |
1005 | | template <typename T> T read(std::istream& input_stream) { |
1006 | | T result; |
1007 | | char* p = reinterpret_cast<char*>(&result); |
1008 | | if (input_stream.read(p, sizeof(T)).gcount() != sizeof(T)) return T(); |
1009 | | if (is_big_endian) std::reverse(p, p + sizeof(T)); |
1010 | | return result; |
1011 | | } |
1012 | | |
1013 | | euclidean_distance_matrix read_point_cloud(std::istream& input_stream) { |
1014 | | std::vector<std::vector<value_t>> points; |
1015 | | |
1016 | | std::string line; |
1017 | | value_t value; |
1018 | | while (std::getline(input_stream, line)) { |
1019 | | std::vector<value_t> point; |
1020 | | std::istringstream s(line); |
1021 | | while (s >> value) { |
1022 | | point.push_back(value); |
1023 | | s.ignore(); |
1024 | | } |
1025 | | if (!point.empty()) points.push_back(point); |
1026 | | assert(point.size() == points.front().size()); |
1027 | | } |
1028 | | |
1029 | | euclidean_distance_matrix eucl_dist(std::move(points)); |
1030 | | index_t n = eucl_dist.size(); |
1031 | | std::cout << "point cloud with " << n << " points in dimension " |
1032 | | << eucl_dist.points.front().size() << std::endl; |
1033 | | |
1034 | | return eucl_dist; |
1035 | | } |
1036 | | |
1037 | | sparse_distance_matrix read_sparse_distance_matrix(std::istream& input_stream) { |
1038 | | std::vector<std::vector<index_diameter_t>> neighbors; |
1039 | | index_t num_edges = 0; |
1040 | | |
1041 | | std::string line; |
1042 | | while (std::getline(input_stream, line)) { |
1043 | | std::istringstream s(line); |
1044 | | size_t i, j; |
1045 | | value_t value; |
1046 | | s >> i; |
1047 | | s >> j; |
1048 | | s >> value; |
1049 | | if (i != j) { |
1050 | | neighbors.resize(std::max({neighbors.size(), i + 1, j + 1})); |
1051 | | neighbors[i].push_back({j, value}); |
1052 | | neighbors[j].push_back({i, value}); |
1053 | | ++num_edges; |
1054 | | } |
1055 | | } |
1056 | | |
1057 | | for (size_t i = 0; i < neighbors.size(); ++i) |
1058 | | std::sort(neighbors[i].begin(), neighbors[i].end()); |
1059 | | |
1060 | | return sparse_distance_matrix(std::move(neighbors), num_edges); |
1061 | | } |
1062 | | |
1063 | | compressed_lower_distance_matrix read_lower_distance_matrix(std::istream& input_stream) { |
1064 | | std::vector<value_t> distances; |
1065 | | value_t value; |
1066 | | while (input_stream >> value) { |
1067 | | distances.push_back(value); |
1068 | | input_stream.ignore(); |
1069 | | } |
1070 | | |
1071 | | return compressed_lower_distance_matrix(std::move(distances)); |
1072 | | } |
1073 | | |
1074 | | compressed_lower_distance_matrix read_upper_distance_matrix(std::istream& input_stream) { |
1075 | | std::vector<value_t> distances; |
1076 | | value_t value; |
1077 | | while (input_stream >> value) { |
1078 | | distances.push_back(value); |
1079 | | input_stream.ignore(); |
1080 | | } |
1081 | | |
1082 | | return compressed_lower_distance_matrix(compressed_upper_distance_matrix(std::move(distances))); |
1083 | | } |
1084 | | |
1085 | | compressed_lower_distance_matrix read_distance_matrix(std::istream& input_stream) { |
1086 | | std::vector<value_t> distances; |
1087 | | |
1088 | | std::string line; |
1089 | | value_t value; |
1090 | | for (int i = 0; std::getline(input_stream, line); ++i) { |
1091 | | std::istringstream s(line); |
1092 | | for (int j = 0; j < i && s >> value; ++j) { |
1093 | | distances.push_back(value); |
1094 | | s.ignore(); |
1095 | | } |
1096 | | } |
1097 | | |
1098 | | return compressed_lower_distance_matrix(std::move(distances)); |
1099 | | } |
1100 | | |
1101 | | compressed_lower_distance_matrix read_dipha(std::istream& input_stream) { |
1102 | | if (read<int64_t>(input_stream) != 8067171840) { |
1103 | | std::cerr << "input is not a Dipha file (magic number: 8067171840)" << std::endl; |
1104 | | exit(-1); |
1105 | | } |
1106 | | |
1107 | | if (read<int64_t>(input_stream) != 7) { |
1108 | | std::cerr << "input is not a Dipha distance matrix (file type: 7)" << std::endl; |
1109 | | exit(-1); |
1110 | | } |
1111 | | |
1112 | | index_t n = read<int64_t>(input_stream); |
1113 | | |
1114 | | std::vector<value_t> distances; |
1115 | | |
1116 | | for (int i = 0; i < n; ++i) |
1117 | | for (int j = 0; j < n; ++j) |
1118 | | if (i > j) |
1119 | | distances.push_back(read<double>(input_stream)); |
1120 | | else |
1121 | | read<double>(input_stream); |
1122 | | |
1123 | | return compressed_lower_distance_matrix(std::move(distances)); |
1124 | | } |
1125 | | |
1126 | | compressed_lower_distance_matrix read_binary(std::istream& input_stream) { |
1127 | | std::vector<value_t> distances; |
1128 | | while (!input_stream.eof()) distances.push_back(read<value_t>(input_stream)); |
1129 | | return compressed_lower_distance_matrix(std::move(distances)); |
1130 | | } |
1131 | | |
1132 | | compressed_lower_distance_matrix read_file(std::istream& input_stream, const file_format format) { |
1133 | | switch (format) { |
1134 | | case LOWER_DISTANCE_MATRIX: |
1135 | | return read_lower_distance_matrix(input_stream); |
1136 | | case UPPER_DISTANCE_MATRIX: |
1137 | | return read_upper_distance_matrix(input_stream); |
1138 | | case DISTANCE_MATRIX: |
1139 | | return read_distance_matrix(input_stream); |
1140 | | case POINT_CLOUD: |
1141 | | return read_point_cloud(input_stream); |
1142 | | case DIPHA: |
1143 | | return read_dipha(input_stream); |
1144 | | default: |
1145 | | return read_binary(input_stream); |
1146 | | } |
1147 | | } |
1148 | | |
1149 | | void print_usage_and_exit(int exit_code) { |
1150 | | std::cerr |
1151 | | << "Usage: " |
1152 | | << "ripser " |
1153 | | << "[options] [filename]" << std::endl |
1154 | | << std::endl |
1155 | | << "Options:" << std::endl |
1156 | | << std::endl |
1157 | | << " --help print this screen" << std::endl |
1158 | | << " --format use the specified file format for the input. Options are:" |
1159 | | << std::endl |
1160 | | << " lower-distance (lower triangular distance matrix; default)" |
1161 | | << std::endl |
1162 | | << " upper-distance (upper triangular distance matrix)" << std::endl |
1163 | | << " distance (full distance matrix)" << std::endl |
1164 | | << " point-cloud (point cloud in Euclidean space)" << std::endl |
1165 | | << " dipha (distance matrix in DIPHA file format)" << std::endl |
1166 | | << " sparse (sparse distance matrix in sparse triplet format)" |
1167 | | << std::endl |
1168 | | << " binary (lower triangular distance matrix in binary format)" |
1169 | | << std::endl |
1170 | | << " --dim <k> compute persistent homology up to dimension k" << std::endl |
1171 | | << " --threshold <t> compute Rips complexes up to diameter t" << std::endl |
1172 | | #ifdef USE_COEFFICIENTS |
1173 | | << " --modulus <p> compute homology with coefficients in the prime field Z/pZ" |
1174 | | << std::endl |
1175 | | #endif |
1176 | | << " --ratio <r> only show persistence pairs with death/birth ratio > r" << std::endl |
1177 | | << std::endl; |
1178 | | exit(exit_code); |
1179 | | } |
1180 | | |
1181 | | int main(int argc, char** argv) { |
1182 | | const char* filename = nullptr; |
1183 | | |
1184 | | file_format format = DISTANCE_MATRIX; |
1185 | | |
1186 | | index_t dim_max = 1; |
1187 | | value_t threshold = std::numeric_limits<value_t>::max(); |
1188 | | float ratio = 1; |
1189 | | coefficient_t modulus = 2; |
1190 | | |
1191 | | for (index_t i = 1; i < argc; ++i) { |
1192 | | const std::string arg(argv[i]); |
1193 | | if (arg == "--help") { |
1194 | | print_usage_and_exit(0); |
1195 | | } else if (arg == "--dim") { |
1196 | | std::string parameter = std::string(argv[++i]); |
1197 | | size_t next_pos; |
1198 | | dim_max = std::stol(parameter, &next_pos); |
1199 | | if (next_pos != parameter.size()) print_usage_and_exit(-1); |
1200 | | } else if (arg == "--threshold") { |
1201 | | std::string parameter = std::string(argv[++i]); |
1202 | | size_t next_pos; |
1203 | | threshold = std::stof(parameter, &next_pos); |
1204 | | if (next_pos != parameter.size()) print_usage_and_exit(-1); |
1205 | | } else if (arg == "--ratio") { |
1206 | | std::string parameter = std::string(argv[++i]); |
1207 | | size_t next_pos; |
1208 | | ratio = std::stof(parameter, &next_pos); |
1209 | | if (next_pos != parameter.size()) print_usage_and_exit(-1); |
1210 | | } else if (arg == "--format") { |
1211 | | std::string parameter = std::string(argv[++i]); |
1212 | | if (parameter.rfind("lower", 0) == 0) |
1213 | | format = LOWER_DISTANCE_MATRIX; |
1214 | | else if (parameter.rfind("upper", 0) == 0) |
1215 | | format = UPPER_DISTANCE_MATRIX; |
1216 | | else if (parameter.rfind("dist", 0) == 0) |
1217 | | format = DISTANCE_MATRIX; |
1218 | | else if (parameter.rfind("point", 0) == 0) |
1219 | | format = POINT_CLOUD; |
1220 | | else if (parameter == "dipha") |
1221 | | format = DIPHA; |
1222 | | else if (parameter == "sparse") |
1223 | | format = SPARSE; |
1224 | | else if (parameter == "binary") |
1225 | | format = BINARY; |
1226 | | else |
1227 | | print_usage_and_exit(-1); |
1228 | | #ifdef USE_COEFFICIENTS |
1229 | | } else if (arg == "--modulus") { |
1230 | | std::string parameter = std::string(argv[++i]); |
1231 | | size_t next_pos; |
1232 | | modulus = std::stol(parameter, &next_pos); |
1233 | | if (next_pos != parameter.size() || !is_prime(modulus)) print_usage_and_exit(-1); |
1234 | | #endif |
1235 | | } else { |
1236 | | if (filename) { print_usage_and_exit(-1); } |
1237 | | filename = argv[i]; |
1238 | | } |
1239 | | } |
1240 | | |
1241 | | std::ifstream file_stream(filename); |
1242 | | if (filename && file_stream.fail()) { |
1243 | | std::cerr << "couldn't open file " << filename << std::endl; |
1244 | | exit(-1); |
1245 | | } |
1246 | | |
1247 | | if (format == SPARSE) { |
1248 | | sparse_distance_matrix dist = |
1249 | | read_sparse_distance_matrix(filename ? file_stream : std::cin); |
1250 | | std::cout << "sparse distance matrix with " << dist.size() << " points and " |
1251 | | << dist.num_edges << "/" << (dist.size() * (dist.size() - 1)) / 2 << " entries" |
1252 | | << std::endl; |
1253 | | |
1254 | | ripser<sparse_distance_matrix>(std::move(dist), dim_max, threshold, ratio, modulus) |
1255 | | .compute_barcodes(); |
1256 | | } else if (format == POINT_CLOUD && threshold < std::numeric_limits<value_t>::max()) { |
1257 | | sparse_distance_matrix dist(read_point_cloud(filename ? file_stream : std::cin), threshold); |
1258 | | ripser<sparse_distance_matrix>(std::move(dist), dim_max, threshold, ratio, modulus) |
1259 | | .compute_barcodes(); |
1260 | | } else { |
1261 | | compressed_lower_distance_matrix dist = |
1262 | | read_file(filename ? file_stream : std::cin, format); |
1263 | | |
1264 | | value_t min = std::numeric_limits<value_t>::infinity(), |
1265 | | max = -std::numeric_limits<value_t>::infinity(), max_finite = max; |
1266 | | int num_edges = 0; |
1267 | | |
1268 | | value_t enclosing_radius = std::numeric_limits<value_t>::infinity(); |
1269 | | if (threshold == std::numeric_limits<value_t>::max()) { |
1270 | | for (size_t i = 0; i < dist.size(); ++i) { |
1271 | | value_t r_i = -std::numeric_limits<value_t>::infinity(); |
1272 | | for (size_t j = 0; j < dist.size(); ++j) r_i = std::max(r_i, dist(i, j)); |
1273 | | enclosing_radius = std::min(enclosing_radius, r_i); |
1274 | | } |
1275 | | } |
1276 | | |
1277 | | for (auto d : dist.distances) { |
1278 | | min = std::min(min, d); |
1279 | | max = std::max(max, d); |
1280 | | if (d != std::numeric_limits<value_t>::infinity()) max_finite = std::max(max_finite, d); |
1281 | | if (d <= threshold) ++num_edges; |
1282 | | } |
1283 | | std::cout << "value range: [" << min << "," << max_finite << "]" << std::endl; |
1284 | | |
1285 | | if (threshold == std::numeric_limits<value_t>::max()) { |
1286 | | std::cout << "distance matrix with " << dist.size() |
1287 | | << " points, using threshold at enclosing radius " << enclosing_radius |
1288 | | << std::endl; |
1289 | | ripser<compressed_lower_distance_matrix>(std::move(dist), dim_max, enclosing_radius, |
1290 | | ratio, modulus) |
1291 | | .compute_barcodes(); |
1292 | | } else { |
1293 | | std::cout << "sparse distance matrix with " << dist.size() << " points and " |
1294 | | << num_edges << "/" << (dist.size() * dist.size() - 1) / 2 << " entries" |
1295 | | << std::endl; |
1296 | | |
1297 | | ripser<sparse_distance_matrix>(sparse_distance_matrix(std::move(dist), threshold), |
1298 | | dim_max, threshold, ratio, modulus) |
1299 | | .compute_barcodes(); |
1300 | | } |
1301 | | exit(0); |
1302 | | } |
1303 | | } |
1304 | | |
1305 | | #endif |