Coverage Report

Created: 2026-06-20 07:25

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/fluent-bit/lib/ripser-1.2.1/ripser.cpp
Line
Count
Source
1
/*
2
3
 Ripser: a lean C++ code for computation of Vietoris-Rips persistence barcodes
4
5
 MIT License
6
7
 Copyright (c) 2015–2021 Ulrich Bauer
8
 Modifications Copyright (c) 2025 The Fluent Bit Authors
9
10
 Permission is hereby granted, free of charge, to any person obtaining a copy
11
 of this software and associated documentation files (the "Software"), to deal
12
 in the Software without restriction, including without limitation the rights
13
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
 copies of the Software, and to permit persons to whom the Software is
15
 furnished to do so, subject to the following conditions:
16
17
 The above copyright notice and this permission notice shall be included in all
18
 copies or substantial portions of the Software.
19
20
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
 SOFTWARE.
27
28
 You are under no obligation whatsoever to provide any bug fixes, patches, or
29
 upgrades to the features, functionality or performance of the source code
30
 ("Enhancements") to anyone; however, if you choose to make your Enhancements
31
 available either publicly, or directly to the author of this software, without
32
 imposing a separate written license agreement for such Enhancements, then you
33
 hereby grant the following license: a non-exclusive, royalty-free perpetual
34
 license to install, use, modify, prepare derivative works, incorporate into
35
 other computer software, distribute, and sublicense such enhancements or
36
 derivative works thereof, in binary and source code form.
37
38
*/
39
40
//#define USE_COEFFICIENTS
41
42
//#define INDICATE_PROGRESS
43
// #define PRINT_PERSISTENCE_PAIRS
44
45
//#define USE_ROBINHOOD_HASHMAP
46
47
#include <algorithm>
48
#include <atomic>
49
#include <cassert>
50
#include <chrono>
51
#include <cmath>
52
#include <cstdlib>
53
#include <fstream>
54
#include <iostream>
55
#include <numeric>
56
#include <queue>
57
#include <sstream>
58
#include <unordered_map>
59
60
// Include ripser core implementation.
61
// Either include a header-only version or include ripser.cpp directly.
62
// Only one compilation unit should compile ripser.cpp.
63
#include "ripser_internal.hpp"
64
65
#ifdef USE_ROBINHOOD_HASHMAP
66
67
#include "robin-hood-hashing/src/include/robin_hood.h"
68
69
template <class Key, class T, class H, class E>
70
using hash_map = robin_hood::unordered_map<Key, T, H, E>;
71
template <class Key> using hash = robin_hood::hash<Key>;
72
73
#else
74
75
template <class Key, class T, class H, class E> using hash_map = std::unordered_map<Key, T, H, E>;
76
template <class Key> using hash = std::hash<Key>;
77
78
#endif
79
80
#ifdef INDICATE_PROGRESS
81
static const std::chrono::milliseconds time_step(40);
82
#endif
83
84
static const std::string clear_line("\r\033[K");
85
86
static const size_t num_coefficient_bits = 8;
87
88
static const index_t max_simplex_index =
89
    (1l << (8 * sizeof(index_t) - 1 - num_coefficient_bits)) - 1;
90
91
0
void check_overflow(index_t i) {
92
0
  if
93
#ifdef USE_COEFFICIENTS
94
      (i > max_simplex_index)
95
#else
96
0
      (i < 0)
97
0
#endif
98
0
    throw std::overflow_error("simplex index " + std::to_string((uint64_t)i) +
99
0
                              " in filtration is larger than maximum index " +
100
0
                              std::to_string(max_simplex_index));
101
0
}
102
103
class binomial_coeff_table {
104
  std::vector<std::vector<index_t>> B;
105
106
public:
107
0
  binomial_coeff_table(index_t n, index_t k) : B(n + 1) {
108
0
    for (index_t i = 0; i <= n; ++i) {
109
0
      B[i].resize(k + 1, 0);
110
0
      B[i][0] = 1;
111
0
      for (index_t j = 1; j < std::min(i, k + 1); ++j)
112
0
        B[i][j] = B[i - 1][j - 1] + B[i - 1][j];
113
0
      if (i <= k) B[i][i] = 1;
114
0
      check_overflow(B[i][std::min(i >> 1, k)]);
115
0
    }
116
0
  }
117
118
0
  index_t operator()(index_t n, index_t k) const {
119
0
    assert(n < B.size() && k < B[n].size() && n >= k - 1);
120
0
    return B[n][k];
121
0
  }
122
};
123
124
0
bool is_prime(const coefficient_t n) {
125
0
  if (!(n & 1) || n < 2) return n == 2;
126
0
  for (coefficient_t p = 3; p <= n / p; p += 2)
127
0
    if (!(n % p)) return false;
128
0
  return true;
129
0
}
130
131
0
std::vector<coefficient_t> multiplicative_inverse_vector(const coefficient_t m) {
132
0
  std::vector<coefficient_t> inverse(m);
133
0
  inverse[1] = 1;
134
  // m = a * (m / a) + m % a
135
  // Multipying with inverse(a) * inverse(m % a):
136
  // 0 = inverse(m % a) * (m / a) + inverse(a)  (mod m)
137
0
  for (coefficient_t a = 2; a < m; ++a) inverse[a] = m - (inverse[m % a] * (m / a)) % m;
138
0
  return inverse;
139
0
}
140
141
#ifdef USE_COEFFICIENTS
142
143
struct __attribute__((packed)) entry_t {
144
  index_t index : 8 * sizeof(index_t) - num_coefficient_bits;
145
  coefficient_t coefficient : num_coefficient_bits;
146
  entry_t(index_t _index, coefficient_t _coefficient)
147
      : index(_index), coefficient(_coefficient) {}
148
  entry_t(index_t _index) : index(_index), coefficient(0) {}
149
  entry_t() : index(0), coefficient(0) {}
150
};
151
152
static_assert(sizeof(entry_t) == sizeof(index_t), "size of entry_t is not the same as index_t");
153
154
entry_t make_entry(index_t i, coefficient_t c) { return entry_t(i, c); }
155
index_t get_index(const entry_t& e) { return e.index; }
156
index_t get_coefficient(const entry_t& e) { return e.coefficient; }
157
void set_coefficient(entry_t& e, const coefficient_t c) { e.coefficient = c; }
158
159
std::ostream& operator<<(std::ostream& stream, const entry_t& e) {
160
  stream << get_index(e) << ":" << get_coefficient(e);
161
  return stream;
162
}
163
164
#else
165
166
typedef index_t entry_t;
167
0
const index_t get_index(const entry_t& i) { return i; }
168
0
index_t get_coefficient(const entry_t& i) { return 1; }
169
0
entry_t make_entry(index_t _index, coefficient_t _value) { return entry_t(_index); }
170
0
void set_coefficient(entry_t& e, const coefficient_t c) {}
171
172
#endif
173
174
0
const entry_t& get_entry(const entry_t& e) { return e; }
175
176
typedef std::pair<value_t, index_t> diameter_index_t;
177
0
value_t get_diameter(const diameter_index_t& i) { return i.first; }
178
0
index_t get_index(const diameter_index_t& i) { return i.second; }
179
180
typedef std::pair<index_t, value_t> index_diameter_t;
181
0
index_t get_index(const index_diameter_t& i) { return i.first; }
182
0
value_t get_diameter(const index_diameter_t& i) { return i.second; }
183
184
struct diameter_entry_t : std::pair<value_t, entry_t> {
185
  using std::pair<value_t, entry_t>::pair;
186
  diameter_entry_t(value_t _diameter, index_t _index, coefficient_t _coefficient)
187
0
      : diameter_entry_t(_diameter, make_entry(_index, _coefficient)) {}
188
  diameter_entry_t(const diameter_index_t& _diameter_index, coefficient_t _coefficient)
189
0
      : diameter_entry_t(get_diameter(_diameter_index),
190
0
                         make_entry(get_index(_diameter_index), _coefficient)) {}
191
  diameter_entry_t(const diameter_index_t& _diameter_index)
192
0
      : diameter_entry_t(get_diameter(_diameter_index),
193
0
                         make_entry(get_index(_diameter_index), 0)) {}
194
0
  diameter_entry_t(const index_t& _index) : diameter_entry_t(0, _index, 0) {}
195
};
196
197
0
const entry_t& get_entry(const diameter_entry_t& p) { return p.second; }
198
0
entry_t& get_entry(diameter_entry_t& p) { return p.second; }
199
0
const index_t get_index(const diameter_entry_t& p) { return get_index(get_entry(p)); }
200
0
const coefficient_t get_coefficient(const diameter_entry_t& p) {
201
0
  return get_coefficient(get_entry(p));
202
0
}
203
0
const value_t& get_diameter(const diameter_entry_t& p) { return p.first; }
204
0
void set_coefficient(diameter_entry_t& p, const coefficient_t c) {
205
0
  set_coefficient(get_entry(p), c);
206
0
}
207
208
template <typename Entry> struct greater_diameter_or_smaller_index_comp {
209
0
  bool operator()(const Entry& a, const Entry& b) {
210
0
    return greater_diameter_or_smaller_index(a, b);
211
0
  }
212
};
213
214
0
template <typename Entry> bool greater_diameter_or_smaller_index(const Entry& a, const Entry& b) {
215
0
  return (get_diameter(a) > get_diameter(b)) ||
216
0
         ((get_diameter(a) == get_diameter(b)) && (get_index(a) < get_index(b)));
217
0
}
Unexecuted instantiation: bool greater_diameter_or_smaller_index<std::__1::pair<float, long> >(std::__1::pair<float, long> const&, std::__1::pair<float, long> const&)
Unexecuted instantiation: bool greater_diameter_or_smaller_index<diameter_entry_t>(diameter_entry_t const&, diameter_entry_t const&)
218
219
0
template <> void compressed_lower_distance_matrix::init_rows() {
220
0
  value_t* pointer = &distances[0];
221
0
  for (size_t i = 1; i < size(); ++i) {
222
0
    rows[i] = pointer;
223
0
    pointer += i;
224
0
  }
225
0
}
226
227
0
template <> void compressed_upper_distance_matrix::init_rows() {
228
0
  value_t* pointer = &distances[0] - 1;
229
0
  for (size_t i = 0; i < size() - 1; ++i) {
230
0
    rows[i] = pointer;
231
0
    pointer += size() - i - 2;
232
0
  }
233
0
}
234
235
template <>
236
0
value_t compressed_lower_distance_matrix::operator()(const index_t i, const index_t j) const {
237
0
  return i == j ? 0 : i < j ? rows[j][i] : rows[i][j];
238
0
}
239
240
template <>
241
0
value_t compressed_upper_distance_matrix::operator()(const index_t i, const index_t j) const {
242
0
  return i == j ? 0 : i > j ? rows[j][i] : rows[i][j];
243
0
}
244
245
struct sparse_distance_matrix {
246
  std::vector<std::vector<index_diameter_t>> neighbors;
247
248
  index_t num_edges;
249
250
  sparse_distance_matrix(std::vector<std::vector<index_diameter_t>>&& _neighbors,
251
                         index_t _num_edges)
252
0
      : neighbors(std::move(_neighbors)), num_edges(_num_edges) {}
253
254
  template <typename DistanceMatrix>
255
  sparse_distance_matrix(const DistanceMatrix& mat, const value_t threshold)
256
      : neighbors(mat.size()), num_edges(0) {
257
258
    for (size_t i = 0; i < size(); ++i)
259
      for (size_t j = 0; j < size(); ++j)
260
        if (i != j) {
261
          auto d = mat(i, j);
262
          if (d <= threshold) {
263
            ++num_edges;
264
            neighbors[i].push_back({j, d});
265
          }
266
        }
267
  }
268
269
0
  value_t operator()(const index_t i, const index_t j) const {
270
0
    auto neighbor =
271
0
        std::lower_bound(neighbors[i].begin(), neighbors[i].end(), index_diameter_t{j, 0});
272
0
    return (neighbor != neighbors[i].end() && get_index(*neighbor) == j)
273
0
               ? get_diameter(*neighbor)
274
0
               : std::numeric_limits<value_t>::infinity();
275
0
  }
276
277
0
  size_t size() const { return neighbors.size(); }
278
};
279
280
struct euclidean_distance_matrix {
281
  std::vector<std::vector<value_t>> points;
282
283
  euclidean_distance_matrix(std::vector<std::vector<value_t>>&& _points)
284
0
      : points(std::move(_points)) {
285
0
    for (auto p : points) { assert(p.size() == points.front().size()); }
286
0
  }
287
288
0
  value_t operator()(const index_t i, const index_t j) const {
289
0
    assert(i < points.size());
290
0
    assert(j < points.size());
291
0
    return std::sqrt(std::inner_product(
292
0
        points[i].begin(), points[i].end(), points[j].begin(), value_t(), std::plus<value_t>(),
293
0
        [](value_t u, value_t v) { return (u - v) * (u - v); }));
294
0
  }
295
296
0
  size_t size() const { return points.size(); }
297
};
298
299
class union_find {
300
  std::vector<index_t> parent;
301
  std::vector<uint8_t> rank;
302
303
public:
304
0
  union_find(const index_t n) : parent(n), rank(n, 0) {
305
0
    for (index_t i = 0; i < n; ++i) parent[i] = i;
306
0
  }
307
308
0
  index_t find(index_t x) {
309
0
    index_t y = x, z;
310
0
    while ((z = parent[y]) != y) y = z;
311
0
    while ((z = parent[x]) != y) {
312
0
      parent[x] = y;
313
0
      x = z;
314
0
    }
315
0
    return z;
316
0
  }
317
318
0
  void link(index_t x, index_t y) {
319
0
    if ((x = find(x)) == (y = find(y))) return;
320
0
    if (rank[x] > rank[y])
321
0
      parent[y] = x;
322
0
    else {
323
0
      parent[x] = y;
324
0
      if (rank[x] == rank[y]) ++rank[y];
325
0
    }
326
0
  }
327
};
328
329
0
template <typename T> T begin(std::pair<T, T>& p) { return p.first; }
330
0
template <typename T> T end(std::pair<T, T>& p) { return p.second; }
331
332
template <typename ValueType> class compressed_sparse_matrix {
333
  std::vector<size_t> bounds;
334
  std::vector<ValueType> entries;
335
336
  typedef typename std::vector<ValueType>::iterator iterator;
337
  typedef std::pair<iterator, iterator> iterator_pair;
338
339
public:
340
0
  size_t size() const { return bounds.size(); }
341
342
0
  iterator_pair subrange(const index_t index) {
343
0
    return {entries.begin() + (index == 0 ? 0 : bounds[index - 1]),
344
0
            entries.begin() + bounds[index]};
345
0
  }
346
347
0
  void append_column() { bounds.push_back(entries.size()); }
348
349
0
  void push_back(const ValueType e) {
350
0
    assert(0 < size());
351
0
    entries.push_back(e);
352
0
    ++bounds.back();
353
0
  }
354
};
355
356
template <class Predicate>
357
0
index_t get_max(index_t top, const index_t bottom, const Predicate pred) {
358
0
  if (!pred(top)) {
359
0
    index_t count = top - bottom;
360
0
    while (count > 0) {
361
0
      index_t step = count >> 1, mid = top - step;
362
0
      if (!pred(mid)) {
363
0
        top = mid - 1;
364
0
        count -= step + 1;
365
0
      } else
366
0
        count = step;
367
0
    }
368
0
  }
369
0
  return top;
370
0
}
Unexecuted instantiation: long get_max<ripser<compressed_distance_matrix<(compressed_matrix_layout)0> >::get_max_vertex(long, long, long) const::{lambda(long)#1}>(long, long, ripser<compressed_distance_matrix<(compressed_matrix_layout)0> >::get_max_vertex(long, long, long) const::{lambda(long)#1})
Unexecuted instantiation: long get_max<ripser<sparse_distance_matrix>::get_max_vertex(long, long, long) const::{lambda(long)#1}>(long, long, ripser<sparse_distance_matrix>::get_max_vertex(long, long, long) const::{lambda(long)#1})
371
372
template <typename DistanceMatrix> class ripser {
373
  const DistanceMatrix dist;
374
  const index_t n, dim_max;
375
  const value_t threshold;
376
  const float ratio;
377
  const coefficient_t modulus;
378
  const binomial_coeff_table binomial_coeff;
379
  const std::vector<coefficient_t> multiplicative_inverse;
380
  mutable std::vector<diameter_entry_t> cofacet_entries;
381
  mutable std::vector<index_t> vertices;
382
  interval_recorder recorder_;
383
384
  struct entry_hash {
385
0
    std::size_t operator()(const entry_t& e) const { return hash<index_t>()(::get_index(e)); }
386
  };
387
388
  struct equal_index {
389
0
    bool operator()(const entry_t& e, const entry_t& f) const {
390
0
      return ::get_index(e) == ::get_index(f);
391
0
    }
392
  };
393
394
  typedef hash_map<entry_t, size_t, entry_hash, equal_index> entry_hash_map;
395
396
public:
397
  ripser(DistanceMatrix&& _dist, index_t _dim_max, value_t _threshold, float _ratio,
398
         coefficient_t _modulus, interval_recorder recorder = {})
399
0
      : dist(std::move(_dist)), n(dist.size()),
400
0
        dim_max(std::min(_dim_max, index_t(dist.size() - 2))), threshold(_threshold),
401
0
        ratio(_ratio), modulus(_modulus), binomial_coeff(n, dim_max + 2),
402
0
        multiplicative_inverse(multiplicative_inverse_vector(_modulus)),
403
0
        recorder_(recorder) {}
404
405
0
  index_t get_max_vertex(const index_t idx, const index_t k, const index_t n) const {
406
0
    return get_max(n, k - 1, [&](index_t w) -> bool { return (binomial_coeff(w, k) <= idx); });
407
0
  }
Unexecuted instantiation: ripser<compressed_distance_matrix<(compressed_matrix_layout)0> >::get_max_vertex(long, long, long) const
Unexecuted instantiation: ripser<sparse_distance_matrix>::get_max_vertex(long, long, long) const
408
409
0
  index_t get_edge_index(const index_t i, const index_t j) const {
410
0
    return binomial_coeff(i, 2) + j;
411
0
  }
412
413
  template <typename OutputIterator>
414
  OutputIterator get_simplex_vertices(index_t idx, const index_t dim, index_t n,
415
0
                                      OutputIterator out) const {
416
0
    --n;
417
0
    for (index_t k = dim + 1; k > 0; --k) {
418
0
      n = get_max_vertex(idx, k, n);
419
0
      *out++ = n;
420
0
      idx -= binomial_coeff(n, k);
421
0
    }
422
0
    return out;
423
0
  }
Unexecuted instantiation: std::__1::reverse_iterator<std::__1::__wrap_iter<long*> > ripser<compressed_distance_matrix<(compressed_matrix_layout)0> >::get_simplex_vertices<std::__1::reverse_iterator<std::__1::__wrap_iter<long*> > >(long, long, long, std::__1::reverse_iterator<std::__1::__wrap_iter<long*> >) const
Unexecuted instantiation: std::__1::reverse_iterator<std::__1::__wrap_iter<long*> > ripser<sparse_distance_matrix>::get_simplex_vertices<std::__1::reverse_iterator<std::__1::__wrap_iter<long*> > >(long, long, long, std::__1::reverse_iterator<std::__1::__wrap_iter<long*> >) const
424
425
0
  value_t compute_diameter(const index_t index, const index_t dim) const {
426
0
    value_t diam = -std::numeric_limits<value_t>::infinity();
427
428
0
    vertices.resize(dim + 1);
429
0
    get_simplex_vertices(index, dim, dist.size(), vertices.rbegin());
430
431
0
    for (index_t i = 0; i <= dim; ++i)
432
0
      for (index_t j = 0; j < i; ++j) {
433
0
        diam = std::max(diam, dist(vertices[i], vertices[j]));
434
0
      }
435
0
    return diam;
436
0
  }
437
438
  class simplex_coboundary_enumerator;
439
440
  class simplex_boundary_enumerator {
441
  private:
442
    index_t idx_below, idx_above, j, k;
443
    diameter_entry_t simplex;
444
    index_t dim;
445
    const coefficient_t modulus;
446
    const binomial_coeff_table& binomial_coeff;
447
    const ripser& parent;
448
449
  public:
450
    simplex_boundary_enumerator(const diameter_entry_t _simplex, const index_t _dim,
451
                                const ripser& _parent)
452
0
        : idx_below(get_index(_simplex)), idx_above(0), j(_parent.n - 1), k(_dim),
453
0
          simplex(_simplex), modulus(_parent.modulus), binomial_coeff(_parent.binomial_coeff),
454
0
          parent(_parent) {}
455
456
    simplex_boundary_enumerator(const index_t _dim, const ripser& _parent)
457
0
        : simplex_boundary_enumerator(-1, _dim, _parent) {}
458
459
0
    void set_simplex(const diameter_entry_t _simplex, const index_t _dim) {
460
0
      idx_below = get_index(_simplex);
461
0
      idx_above = 0;
462
0
      j = parent.n - 1;
463
0
      k = _dim;
464
0
      simplex = _simplex;
465
0
      dim = _dim;
466
0
    }
467
468
0
    bool has_next() { return (k >= 0); }
469
470
0
    diameter_entry_t next() {
471
0
      j = parent.get_max_vertex(idx_below, k + 1, j);
472
473
0
      index_t face_index = idx_above - binomial_coeff(j, k + 1) + idx_below;
474
475
0
      value_t face_diameter = parent.compute_diameter(face_index, dim - 1);
476
477
0
      coefficient_t face_coefficient =
478
0
          (k & 1 ? -1 + modulus : 1) * get_coefficient(simplex) % modulus;
479
480
0
      idx_below -= binomial_coeff(j, k + 1);
481
0
      idx_above += binomial_coeff(j, k);
482
483
0
      --k;
484
485
0
      return diameter_entry_t(face_diameter, face_index, face_coefficient);
486
0
    }
487
  };
488
489
0
  diameter_entry_t get_zero_pivot_facet(const diameter_entry_t simplex, const index_t dim) {
490
0
    static simplex_boundary_enumerator facets(0, *this);
491
0
    facets.set_simplex(simplex, dim);
492
0
    while (facets.has_next()) {
493
0
      diameter_entry_t facet = facets.next();
494
0
      if (get_diameter(facet) == get_diameter(simplex)) return facet;
495
0
    }
496
0
    return diameter_entry_t(-1);
497
0
  }
498
499
0
  diameter_entry_t get_zero_pivot_cofacet(const diameter_entry_t simplex, const index_t dim) {
500
0
    static simplex_coboundary_enumerator cofacets(*this);
501
0
    cofacets.set_simplex(simplex, dim);
502
0
    while (cofacets.has_next()) {
503
0
      diameter_entry_t cofacet = cofacets.next();
504
0
      if (get_diameter(cofacet) == get_diameter(simplex)) return cofacet;
505
0
    }
506
0
    return diameter_entry_t(-1);
507
0
  }
508
509
0
  diameter_entry_t get_zero_apparent_facet(const diameter_entry_t simplex, const index_t dim) {
510
0
    diameter_entry_t facet = get_zero_pivot_facet(simplex, dim);
511
0
    return ((get_index(facet) != -1) &&
512
0
            (get_index(get_zero_pivot_cofacet(facet, dim - 1)) == get_index(simplex)))
513
0
               ? facet
514
0
               : diameter_entry_t(-1);
515
0
  }
516
517
0
  diameter_entry_t get_zero_apparent_cofacet(const diameter_entry_t simplex, const index_t dim) {
518
0
    diameter_entry_t cofacet = get_zero_pivot_cofacet(simplex, dim);
519
0
    return ((get_index(cofacet) != -1) &&
520
0
            (get_index(get_zero_pivot_facet(cofacet, dim + 1)) == get_index(simplex)))
521
0
               ? cofacet
522
0
               : diameter_entry_t(-1);
523
0
  }
524
525
0
  bool is_in_zero_apparent_pair(const diameter_entry_t simplex, const index_t dim) {
526
0
    return (get_index(get_zero_apparent_cofacet(simplex, dim)) != -1) ||
527
0
           (get_index(get_zero_apparent_facet(simplex, dim)) != -1);
528
0
  }
529
530
  void assemble_columns_to_reduce(std::vector<diameter_index_t>& simplices,
531
                                  std::vector<diameter_index_t>& columns_to_reduce,
532
0
                                  entry_hash_map& pivot_column_index, index_t dim) {
533
534
#ifdef INDICATE_PROGRESS
535
    std::cerr << clear_line << "assembling columns" << std::flush;
536
    std::chrono::steady_clock::time_point next = std::chrono::steady_clock::now() + time_step;
537
#endif
538
539
0
    columns_to_reduce.clear();
540
0
    std::vector<diameter_index_t> next_simplices;
541
542
0
    simplex_coboundary_enumerator cofacets(*this);
543
544
0
    for (diameter_index_t& simplex : simplices) {
545
0
      cofacets.set_simplex(diameter_entry_t(simplex, 1), dim - 1);
546
547
0
      while (cofacets.has_next(false)) {
548
#ifdef INDICATE_PROGRESS
549
        if (std::chrono::steady_clock::now() > next) {
550
          std::cerr << clear_line << "assembling " << next_simplices.size()
551
                    << " columns (processing " << std::distance(&simplices[0], &simplex)
552
                    << "/" << simplices.size() << " simplices)" << std::flush;
553
          next = std::chrono::steady_clock::now() + time_step;
554
        }
555
#endif
556
0
        auto cofacet = cofacets.next();
557
0
        if (get_diameter(cofacet) <= threshold) {
558
0
          next_simplices.push_back({get_diameter(cofacet), get_index(cofacet)});
559
0
          if (!is_in_zero_apparent_pair(cofacet, dim) &&
560
0
              (pivot_column_index.find(get_entry(cofacet)) == pivot_column_index.end()))
561
0
            columns_to_reduce.push_back({get_diameter(cofacet), get_index(cofacet)});
562
0
        }
563
0
      }
564
0
    }
565
566
0
    simplices.swap(next_simplices);
567
568
#ifdef INDICATE_PROGRESS
569
    std::cerr << clear_line << "sorting " << columns_to_reduce.size() << " columns"
570
              << std::flush;
571
#endif
572
573
0
    std::sort(columns_to_reduce.begin(), columns_to_reduce.end(),
574
0
              greater_diameter_or_smaller_index<diameter_index_t>);
575
#ifdef INDICATE_PROGRESS
576
    std::cerr << clear_line << std::flush;
577
#endif
578
0
  }
579
580
  void compute_dim_0_pairs(std::vector<diameter_index_t>& edges,
581
0
                           std::vector<diameter_index_t>& columns_to_reduce) {
582
#ifdef PRINT_PERSISTENCE_PAIRS
583
    std::cout << "persistence intervals in dim 0:" << std::endl;
584
#endif
585
586
0
    union_find dset(n);
587
588
0
    edges = get_edges();
589
0
    std::sort(edges.rbegin(), edges.rend(),
590
0
              greater_diameter_or_smaller_index<diameter_index_t>);
591
0
    std::vector<index_t> vertices_of_edge(2);
592
0
    for (auto e : edges) {
593
0
      get_simplex_vertices(get_index(e), 1, n, vertices_of_edge.rbegin());
594
0
      index_t u = dset.find(vertices_of_edge[0]), v = dset.find(vertices_of_edge[1]);
595
596
0
      if (u != v) {
597
0
        value_t birth = 0.0f;
598
0
        value_t death = get_diameter(e);
599
600
0
        if (death != 0) {
601
#ifdef PRINT_PERSISTENCE_PAIRS
602
          std::cout << " [" << birth << "," << death << ")" << std::endl;
603
#endif
604
0
          recorder_.emit(/*dim=*/0, birth, death);
605
0
        }
606
0
        dset.link(u, v);
607
0
      } else if (get_index(get_zero_apparent_cofacet(e, 1)) == -1)
608
0
        columns_to_reduce.push_back(e);
609
0
    }
610
0
    std::reverse(columns_to_reduce.begin(), columns_to_reduce.end());
611
612
#ifdef PRINT_PERSISTENCE_PAIRS
613
    for (index_t i = 0; i < n; ++i)
614
      if (dset.find(i) == i) std::cout << " [0, )" << std::endl;
615
#endif
616
0
    for (index_t i = 0; i < n; ++i) {
617
0
      if (dset.find(i) == i) {
618
0
        recorder_.emit(/*dim=*/0, /*birth=*/0.0f, /*death=*/-1.0f);
619
0
      }
620
0
    }
621
0
  }
622
623
0
  template <typename Column> diameter_entry_t pop_pivot(Column& column) {
624
0
    diameter_entry_t pivot(-1);
625
#ifdef USE_COEFFICIENTS
626
    while (!column.empty()) {
627
      if (get_coefficient(pivot) == 0)
628
        pivot = column.top();
629
      else if (get_index(column.top()) != get_index(pivot))
630
        return pivot;
631
      else
632
        set_coefficient(pivot,
633
                        (get_coefficient(pivot) + get_coefficient(column.top())) % modulus);
634
      column.pop();
635
    }
636
    return (get_coefficient(pivot) == 0) ? -1 : pivot;
637
#else
638
0
    while (!column.empty()) {
639
0
      pivot = column.top();
640
0
      column.pop();
641
0
      if (column.empty() || get_index(column.top()) != get_index(pivot)) return pivot;
642
0
      column.pop();
643
0
    }
644
0
    return -1;
645
0
#endif
646
0
  }
647
648
0
  template <typename Column> diameter_entry_t get_pivot(Column& column) {
649
0
    diameter_entry_t result = pop_pivot(column);
650
0
    if (get_index(result) != -1) column.push(result);
651
0
    return result;
652
0
  }
653
654
  template <typename Column>
655
  diameter_entry_t init_coboundary_and_get_pivot(const diameter_entry_t simplex,
656
                                                 Column& working_coboundary, const index_t& dim,
657
0
                                                 entry_hash_map& pivot_column_index) {
658
0
    static simplex_coboundary_enumerator cofacets(*this);
659
0
    bool check_for_emergent_pair = true;
660
0
    cofacet_entries.clear();
661
0
    cofacets.set_simplex(simplex, dim);
662
0
    while (cofacets.has_next()) {
663
0
      diameter_entry_t cofacet = cofacets.next();
664
0
      if (get_diameter(cofacet) <= threshold) {
665
0
        cofacet_entries.push_back(cofacet);
666
0
        if (check_for_emergent_pair && (get_diameter(simplex) == get_diameter(cofacet))) {
667
0
          if ((pivot_column_index.find(get_entry(cofacet)) == pivot_column_index.end()) &&
668
0
              (get_index(get_zero_apparent_facet(cofacet, dim + 1)) == -1))
669
0
            return cofacet;
670
0
          check_for_emergent_pair = false;
671
0
        }
672
0
      }
673
0
    }
674
0
    for (auto cofacet : cofacet_entries) working_coboundary.push(cofacet);
675
0
    return get_pivot(working_coboundary);
676
0
  }
677
678
  template <typename Column>
679
  void add_simplex_coboundary(const diameter_entry_t simplex, const index_t& dim,
680
0
                              Column& working_reduction_column, Column& working_coboundary) {
681
0
    static simplex_coboundary_enumerator cofacets(*this);
682
0
    working_reduction_column.push(simplex);
683
0
    cofacets.set_simplex(simplex, dim);
684
0
    while (cofacets.has_next()) {
685
0
      diameter_entry_t cofacet = cofacets.next();
686
0
      if (get_diameter(cofacet) <= threshold) working_coboundary.push(cofacet);
687
0
    }
688
0
  }
689
690
  template <typename Column>
691
  void add_coboundary(compressed_sparse_matrix<diameter_entry_t>& reduction_matrix,
692
                      const std::vector<diameter_index_t>& columns_to_reduce,
693
                      const size_t index_column_to_add, const coefficient_t factor,
694
                      const size_t& dim, Column& working_reduction_column,
695
0
                      Column& working_coboundary) {
696
0
    diameter_entry_t column_to_add(columns_to_reduce[index_column_to_add], factor);
697
0
    add_simplex_coboundary(column_to_add, dim, working_reduction_column, working_coboundary);
698
699
0
    for (diameter_entry_t simplex : reduction_matrix.subrange(index_column_to_add)) {
700
0
      set_coefficient(simplex, get_coefficient(simplex) * factor % modulus);
701
0
      add_simplex_coboundary(simplex, dim, working_reduction_column, working_coboundary);
702
0
    }
703
0
  }
704
705
  void compute_pairs(const std::vector<diameter_index_t>& columns_to_reduce,
706
0
                     entry_hash_map& pivot_column_index, const index_t dim) {
707
708
#ifdef PRINT_PERSISTENCE_PAIRS
709
    std::cout << "persistence intervals in dim " << dim << ":" << std::endl;
710
#endif
711
712
0
    compressed_sparse_matrix<diameter_entry_t> reduction_matrix;
713
714
#ifdef INDICATE_PROGRESS
715
    std::chrono::steady_clock::time_point next = std::chrono::steady_clock::now() + time_step;
716
#endif
717
0
    for (size_t index_column_to_reduce = 0; index_column_to_reduce < columns_to_reduce.size();
718
0
         ++index_column_to_reduce) {
719
720
0
      diameter_entry_t column_to_reduce(columns_to_reduce[index_column_to_reduce], 1);
721
0
      value_t diameter = get_diameter(column_to_reduce);
722
723
0
      reduction_matrix.append_column();
724
725
0
      std::priority_queue<diameter_entry_t, std::vector<diameter_entry_t>,
726
0
                          greater_diameter_or_smaller_index_comp<diameter_entry_t>>
727
0
          working_reduction_column, working_coboundary;
728
729
0
      diameter_entry_t e, pivot = init_coboundary_and_get_pivot(
730
0
                              column_to_reduce, working_coboundary, dim, pivot_column_index);
731
732
0
      while (true) {
733
#ifdef INDICATE_PROGRESS
734
        if (std::chrono::steady_clock::now() > next) {
735
          std::cerr << clear_line << "reducing column " << index_column_to_reduce + 1
736
                    << "/" << columns_to_reduce.size() << " (diameter " << diameter << ")"
737
                    << std::flush;
738
          next = std::chrono::steady_clock::now() + time_step;
739
        }
740
#endif
741
0
        if (get_index(pivot) != -1) {
742
0
          auto pair = pivot_column_index.find(get_entry(pivot));
743
0
          if (pair != pivot_column_index.end()) {
744
0
            entry_t other_pivot = pair->first;
745
0
            index_t index_column_to_add = pair->second;
746
0
            coefficient_t factor =
747
0
                modulus - get_coefficient(pivot) *
748
0
                              multiplicative_inverse[get_coefficient(other_pivot)] %
749
0
                              modulus;
750
751
0
            add_coboundary(reduction_matrix, columns_to_reduce, index_column_to_add,
752
0
                           factor, dim, working_reduction_column, working_coboundary);
753
754
0
            pivot = get_pivot(working_coboundary);
755
0
          } else if (get_index(e = get_zero_apparent_facet(pivot, dim + 1)) != -1) {
756
0
            set_coefficient(e, modulus - get_coefficient(e));
757
758
0
            add_simplex_coboundary(e, dim, working_reduction_column, working_coboundary);
759
760
0
            pivot = get_pivot(working_coboundary);
761
0
          } else {
762
0
            value_t death = get_diameter(pivot);
763
0
            if (death > diameter * ratio) {
764
#ifdef PRINT_PERSISTENCE_PAIRS
765
#ifdef INDICATE_PROGRESS
766
              std::cerr << clear_line << std::flush;
767
#endif
768
              std::cout << " [" << diameter << "," << death << ")" << std::endl;
769
#endif
770
0
              recorder_.emit(/*dim=*/dim, diameter, death);
771
0
            }
772
0
            pivot_column_index.insert({get_entry(pivot), index_column_to_reduce});
773
774
0
            while (true) {
775
0
              diameter_entry_t e = pop_pivot(working_reduction_column);
776
0
              if (get_index(e) == -1) break;
777
0
              assert(get_coefficient(e) > 0);
778
0
              reduction_matrix.push_back(e);
779
0
            }
780
0
            break;
781
0
          }
782
0
        } else {
783
#ifdef PRINT_PERSISTENCE_PAIRS
784
#ifdef INDICATE_PROGRESS
785
          std::cerr << clear_line << std::flush;
786
#endif
787
          std::cout << " [" << diameter << ", )" << std::endl;
788
#endif
789
0
          recorder_.emit(/*dim=*/dim, diameter, /*death=*/-1.0f);
790
0
          break;
791
0
        }
792
0
      }
793
0
    }
794
#ifdef INDICATE_PROGRESS
795
    std::cerr << clear_line << std::flush;
796
#endif
797
0
  }
798
799
  std::vector<diameter_index_t> get_edges();
800
801
0
  void compute_barcodes() {
802
0
    std::vector<diameter_index_t> simplices, columns_to_reduce;
803
804
0
    compute_dim_0_pairs(simplices, columns_to_reduce);
805
806
0
    for (index_t dim = 1; dim <= dim_max; ++dim) {
807
0
      entry_hash_map pivot_column_index;
808
0
      pivot_column_index.reserve(columns_to_reduce.size());
809
810
0
      compute_pairs(columns_to_reduce, pivot_column_index, dim);
811
812
0
      if (dim < dim_max)
813
0
        assemble_columns_to_reduce(simplices, columns_to_reduce, pivot_column_index,
814
0
                                   dim + 1);
815
0
    }
816
0
  }
817
};
818
819
template <> class ripser<compressed_lower_distance_matrix>::simplex_coboundary_enumerator {
820
  index_t idx_below, idx_above, j, k;
821
  std::vector<index_t> vertices;
822
  diameter_entry_t simplex;
823
  const coefficient_t modulus;
824
  const compressed_lower_distance_matrix& dist;
825
  const binomial_coeff_table& binomial_coeff;
826
  const ripser<compressed_lower_distance_matrix>& parent;
827
828
public:
829
  simplex_coboundary_enumerator(const diameter_entry_t _simplex, const index_t _dim,
830
                                const ripser<compressed_lower_distance_matrix>& _parent)
831
      : modulus(_parent.modulus), dist(_parent.dist),
832
0
        binomial_coeff(_parent.binomial_coeff), parent(_parent) {
833
0
    if (get_index(_simplex) != -1)
834
0
      parent.get_simplex_vertices(get_index(_simplex), _dim, parent.n, vertices.rbegin());
835
0
  }
836
837
  simplex_coboundary_enumerator(const ripser<compressed_lower_distance_matrix>& _parent)
838
0
    : modulus(_parent.modulus), dist(_parent.dist),
839
0
      binomial_coeff(_parent.binomial_coeff), parent(_parent) {}
840
841
0
  void set_simplex(const diameter_entry_t _simplex, const index_t _dim) {
842
0
    idx_below = get_index(_simplex);
843
0
    idx_above = 0;
844
0
    j = parent.n - 1;
845
0
    k = _dim + 1;
846
0
    simplex = _simplex;
847
0
    vertices.resize(_dim + 1);
848
0
    parent.get_simplex_vertices(get_index(_simplex), _dim, parent.n, vertices.rbegin());
849
0
  }
850
851
0
  bool has_next(bool all_cofacets = true) {
852
0
    return (j >= k && (all_cofacets || binomial_coeff(j, k) > idx_below));
853
0
  }
854
855
0
  diameter_entry_t next() {
856
0
    while ((binomial_coeff(j, k) <= idx_below)) {
857
0
      idx_below -= binomial_coeff(j, k);
858
0
      idx_above += binomial_coeff(j, k + 1);
859
0
      --j;
860
0
      --k;
861
0
      assert(k != -1);
862
0
    }
863
0
    value_t cofacet_diameter = get_diameter(simplex);
864
0
    for (index_t i : vertices) cofacet_diameter = std::max(cofacet_diameter, dist(j, i));
865
0
    index_t cofacet_index = idx_above + binomial_coeff(j--, k + 1) + idx_below;
866
0
    coefficient_t cofacet_coefficient =
867
0
        (k & 1 ? modulus - 1 : 1) * get_coefficient(simplex) % modulus;
868
0
    return diameter_entry_t(cofacet_diameter, cofacet_index, cofacet_coefficient);
869
0
  }
870
};
871
872
template <> class ripser<sparse_distance_matrix>::simplex_coboundary_enumerator {
873
  index_t idx_below, idx_above, k;
874
  std::vector<index_t> vertices;
875
  diameter_entry_t simplex;
876
  const coefficient_t modulus;
877
  const sparse_distance_matrix& dist;
878
  const binomial_coeff_table& binomial_coeff;
879
  std::vector<std::vector<index_diameter_t>::const_reverse_iterator> neighbor_it;
880
  std::vector<std::vector<index_diameter_t>::const_reverse_iterator> neighbor_end;
881
  index_diameter_t neighbor;
882
  const ripser<sparse_distance_matrix>& parent;
883
884
public:
885
  simplex_coboundary_enumerator(const diameter_entry_t _simplex, const index_t _dim,
886
                                const ripser<sparse_distance_matrix>& _parent)
887
      : modulus(_parent.modulus), dist(_parent.dist),
888
0
        binomial_coeff(_parent.binomial_coeff), parent(_parent) {
889
0
    if (get_index(_simplex) != -1) set_simplex(_simplex, _dim);
890
0
  }
891
892
  simplex_coboundary_enumerator(const ripser<sparse_distance_matrix>& _parent)
893
      : modulus(_parent.modulus), dist(_parent.dist),
894
0
  binomial_coeff(_parent.binomial_coeff), parent(_parent) {}
895
896
0
  void set_simplex(const diameter_entry_t _simplex, const index_t _dim) {
897
0
    idx_below = get_index(_simplex);
898
0
    idx_above = 0;
899
0
    k = _dim + 1;
900
0
    simplex = _simplex;
901
0
    vertices.resize(_dim + 1);
902
0
    parent.get_simplex_vertices(idx_below, _dim, parent.n, vertices.rbegin());
903
0
904
0
    neighbor_it.resize(_dim + 1);
905
0
    neighbor_end.resize(_dim + 1);
906
0
    for (index_t i = 0; i <= _dim; ++i) {
907
0
      auto v = vertices[i];
908
0
      neighbor_it[i] = dist.neighbors[v].rbegin();
909
0
      neighbor_end[i] = dist.neighbors[v].rend();
910
0
    }
911
0
  }
912
913
0
  bool has_next(bool all_cofacets = true) {
914
0
    for (auto &it0 = neighbor_it[0], &end0 = neighbor_end[0]; it0 != end0; ++it0) {
915
0
      neighbor = *it0;
916
0
      for (size_t idx = 1; idx < neighbor_it.size(); ++idx) {
917
0
        auto &it = neighbor_it[idx], end = neighbor_end[idx];
918
0
        while (get_index(*it) > get_index(neighbor))
919
0
          if (++it == end) return false;
920
0
        if (get_index(*it) != get_index(neighbor))
921
0
          goto continue_outer;
922
0
        else
923
0
          neighbor = std::max(neighbor, *it);
924
0
      }
925
0
      while (k > 0 && vertices[k - 1] > get_index(neighbor)) {
926
0
        if (!all_cofacets) return false;
927
0
        idx_below -= binomial_coeff(vertices[k - 1], k);
928
0
        idx_above += binomial_coeff(vertices[k - 1], k + 1);
929
0
        --k;
930
0
      }
931
0
      return true;
932
0
    continue_outer:;
933
0
    }
934
0
    return false;
935
0
  }
936
937
0
  diameter_entry_t next() {
938
0
    ++neighbor_it[0];
939
0
    value_t cofacet_diameter = std::max(get_diameter(simplex), get_diameter(neighbor));
940
0
    index_t cofacet_index = idx_above + binomial_coeff(get_index(neighbor), k + 1) + idx_below;
941
0
    coefficient_t cofacet_coefficient =
942
0
        (k & 1 ? modulus - 1 : 1) * get_coefficient(simplex) % modulus;
943
0
    return diameter_entry_t(cofacet_diameter, cofacet_index, cofacet_coefficient);
944
0
  }
945
};
946
947
0
template <> std::vector<diameter_index_t> ripser<compressed_lower_distance_matrix>::get_edges() {
948
0
  std::vector<diameter_index_t> edges;
949
0
  std::vector<index_t> vertices(2);
950
0
  for (index_t index = binomial_coeff(n, 2); index-- > 0;) {
951
0
    get_simplex_vertices(index, 1, dist.size(), vertices.rbegin());
952
0
    value_t length = dist(vertices[0], vertices[1]);
953
0
    if (length <= threshold) edges.push_back({length, index});
954
0
  }
955
0
  return edges;
956
0
}
957
958
0
template <> std::vector<diameter_index_t> ripser<sparse_distance_matrix>::get_edges() {
959
0
  std::vector<diameter_index_t> edges;
960
0
  for (index_t i = 0; i < n; ++i)
961
0
    for (auto n : dist.neighbors[i]) {
962
0
      index_t j = get_index(n);
963
0
      if (i > j) edges.push_back({get_diameter(n), get_edge_index(i, j)});
964
0
    }
965
0
  return edges;
966
0
}
967
968
void ripser_run_from_compressed_lower(
969
    compressed_lower_distance_matrix &&dist,
970
    index_t dim_max,
971
    value_t threshold,
972
    float ratio,
973
    interval_recorder recorder)
974
0
{
975
0
    coefficient_t modulus = 2; // Z/2Z
976
977
0
    ripser<compressed_lower_distance_matrix> r(
978
0
        std::move(dist),
979
0
        dim_max,
980
0
        threshold,
981
0
        ratio,
982
0
        modulus,
983
0
        recorder);
984
985
0
    r.compute_barcodes();
986
0
}
987
988
/* The below code is only used for the original ripser implementation
989
 * to create executabels. */
990
#ifdef RIPSEREXE
991
992
enum file_format {
993
  LOWER_DISTANCE_MATRIX,
994
  UPPER_DISTANCE_MATRIX,
995
  DISTANCE_MATRIX,
996
  POINT_CLOUD,
997
  DIPHA,
998
  SPARSE,
999
  BINARY
1000
};
1001
1002
static const uint16_t endian_check(0xff00);
1003
static const bool is_big_endian = *reinterpret_cast<const uint8_t*>(&endian_check);
1004
1005
template <typename T> T read(std::istream& input_stream) {
1006
  T result;
1007
  char* p = reinterpret_cast<char*>(&result);
1008
  if (input_stream.read(p, sizeof(T)).gcount() != sizeof(T)) return T();
1009
  if (is_big_endian) std::reverse(p, p + sizeof(T));
1010
  return result;
1011
}
1012
1013
euclidean_distance_matrix read_point_cloud(std::istream& input_stream) {
1014
  std::vector<std::vector<value_t>> points;
1015
1016
  std::string line;
1017
  value_t value;
1018
  while (std::getline(input_stream, line)) {
1019
    std::vector<value_t> point;
1020
    std::istringstream s(line);
1021
    while (s >> value) {
1022
      point.push_back(value);
1023
      s.ignore();
1024
    }
1025
    if (!point.empty()) points.push_back(point);
1026
    assert(point.size() == points.front().size());
1027
  }
1028
1029
  euclidean_distance_matrix eucl_dist(std::move(points));
1030
  index_t n = eucl_dist.size();
1031
  std::cout << "point cloud with " << n << " points in dimension "
1032
            << eucl_dist.points.front().size() << std::endl;
1033
1034
  return eucl_dist;
1035
}
1036
1037
sparse_distance_matrix read_sparse_distance_matrix(std::istream& input_stream) {
1038
  std::vector<std::vector<index_diameter_t>> neighbors;
1039
  index_t num_edges = 0;
1040
1041
  std::string line;
1042
  while (std::getline(input_stream, line)) {
1043
    std::istringstream s(line);
1044
    size_t i, j;
1045
    value_t value;
1046
    s >> i;
1047
    s >> j;
1048
    s >> value;
1049
    if (i != j) {
1050
      neighbors.resize(std::max({neighbors.size(), i + 1, j + 1}));
1051
      neighbors[i].push_back({j, value});
1052
      neighbors[j].push_back({i, value});
1053
      ++num_edges;
1054
    }
1055
  }
1056
1057
  for (size_t i = 0; i < neighbors.size(); ++i)
1058
    std::sort(neighbors[i].begin(), neighbors[i].end());
1059
1060
  return sparse_distance_matrix(std::move(neighbors), num_edges);
1061
}
1062
1063
compressed_lower_distance_matrix read_lower_distance_matrix(std::istream& input_stream) {
1064
  std::vector<value_t> distances;
1065
  value_t value;
1066
  while (input_stream >> value) {
1067
    distances.push_back(value);
1068
    input_stream.ignore();
1069
  }
1070
1071
  return compressed_lower_distance_matrix(std::move(distances));
1072
}
1073
1074
compressed_lower_distance_matrix read_upper_distance_matrix(std::istream& input_stream) {
1075
  std::vector<value_t> distances;
1076
  value_t value;
1077
  while (input_stream >> value) {
1078
    distances.push_back(value);
1079
    input_stream.ignore();
1080
  }
1081
1082
  return compressed_lower_distance_matrix(compressed_upper_distance_matrix(std::move(distances)));
1083
}
1084
1085
compressed_lower_distance_matrix read_distance_matrix(std::istream& input_stream) {
1086
  std::vector<value_t> distances;
1087
1088
  std::string line;
1089
  value_t value;
1090
  for (int i = 0; std::getline(input_stream, line); ++i) {
1091
    std::istringstream s(line);
1092
    for (int j = 0; j < i && s >> value; ++j) {
1093
      distances.push_back(value);
1094
      s.ignore();
1095
    }
1096
  }
1097
1098
  return compressed_lower_distance_matrix(std::move(distances));
1099
}
1100
1101
compressed_lower_distance_matrix read_dipha(std::istream& input_stream) {
1102
  if (read<int64_t>(input_stream) != 8067171840) {
1103
    std::cerr << "input is not a Dipha file (magic number: 8067171840)" << std::endl;
1104
    exit(-1);
1105
  }
1106
1107
  if (read<int64_t>(input_stream) != 7) {
1108
    std::cerr << "input is not a Dipha distance matrix (file type: 7)" << std::endl;
1109
    exit(-1);
1110
  }
1111
1112
  index_t n = read<int64_t>(input_stream);
1113
1114
  std::vector<value_t> distances;
1115
1116
  for (int i = 0; i < n; ++i)
1117
    for (int j = 0; j < n; ++j)
1118
      if (i > j)
1119
        distances.push_back(read<double>(input_stream));
1120
      else
1121
        read<double>(input_stream);
1122
1123
  return compressed_lower_distance_matrix(std::move(distances));
1124
}
1125
1126
compressed_lower_distance_matrix read_binary(std::istream& input_stream) {
1127
  std::vector<value_t> distances;
1128
  while (!input_stream.eof()) distances.push_back(read<value_t>(input_stream));
1129
  return compressed_lower_distance_matrix(std::move(distances));
1130
}
1131
1132
compressed_lower_distance_matrix read_file(std::istream& input_stream, const file_format format) {
1133
  switch (format) {
1134
  case LOWER_DISTANCE_MATRIX:
1135
    return read_lower_distance_matrix(input_stream);
1136
  case UPPER_DISTANCE_MATRIX:
1137
    return read_upper_distance_matrix(input_stream);
1138
  case DISTANCE_MATRIX:
1139
    return read_distance_matrix(input_stream);
1140
  case POINT_CLOUD:
1141
    return read_point_cloud(input_stream);
1142
  case DIPHA:
1143
    return read_dipha(input_stream);
1144
  default:
1145
    return read_binary(input_stream);
1146
  }
1147
}
1148
1149
void print_usage_and_exit(int exit_code) {
1150
  std::cerr
1151
      << "Usage: "
1152
      << "ripser "
1153
      << "[options] [filename]" << std::endl
1154
      << std::endl
1155
      << "Options:" << std::endl
1156
      << std::endl
1157
      << "  --help           print this screen" << std::endl
1158
      << "  --format         use the specified file format for the input. Options are:"
1159
      << std::endl
1160
      << "                     lower-distance (lower triangular distance matrix; default)"
1161
      << std::endl
1162
      << "                     upper-distance (upper triangular distance matrix)" << std::endl
1163
      << "                     distance       (full distance matrix)" << std::endl
1164
      << "                     point-cloud    (point cloud in Euclidean space)" << std::endl
1165
      << "                     dipha          (distance matrix in DIPHA file format)" << std::endl
1166
      << "                     sparse         (sparse distance matrix in sparse triplet format)"
1167
      << std::endl
1168
      << "                     binary         (lower triangular distance matrix in binary format)"
1169
      << std::endl
1170
      << "  --dim <k>        compute persistent homology up to dimension k" << std::endl
1171
      << "  --threshold <t>  compute Rips complexes up to diameter t" << std::endl
1172
#ifdef USE_COEFFICIENTS
1173
      << "  --modulus <p>    compute homology with coefficients in the prime field Z/pZ"
1174
      << std::endl
1175
#endif
1176
      << "  --ratio <r>      only show persistence pairs with death/birth ratio > r" << std::endl
1177
      << std::endl;
1178
  exit(exit_code);
1179
}
1180
1181
int main(int argc, char** argv) {
1182
  const char* filename = nullptr;
1183
1184
  file_format format = DISTANCE_MATRIX;
1185
1186
  index_t dim_max = 1;
1187
  value_t threshold = std::numeric_limits<value_t>::max();
1188
  float ratio = 1;
1189
  coefficient_t modulus = 2;
1190
1191
  for (index_t i = 1; i < argc; ++i) {
1192
    const std::string arg(argv[i]);
1193
    if (arg == "--help") {
1194
      print_usage_and_exit(0);
1195
    } else if (arg == "--dim") {
1196
      std::string parameter = std::string(argv[++i]);
1197
      size_t next_pos;
1198
      dim_max = std::stol(parameter, &next_pos);
1199
      if (next_pos != parameter.size()) print_usage_and_exit(-1);
1200
    } else if (arg == "--threshold") {
1201
      std::string parameter = std::string(argv[++i]);
1202
      size_t next_pos;
1203
      threshold = std::stof(parameter, &next_pos);
1204
      if (next_pos != parameter.size()) print_usage_and_exit(-1);
1205
    } else if (arg == "--ratio") {
1206
      std::string parameter = std::string(argv[++i]);
1207
      size_t next_pos;
1208
      ratio = std::stof(parameter, &next_pos);
1209
      if (next_pos != parameter.size()) print_usage_and_exit(-1);
1210
    } else if (arg == "--format") {
1211
      std::string parameter = std::string(argv[++i]);
1212
      if (parameter.rfind("lower", 0) == 0)
1213
        format = LOWER_DISTANCE_MATRIX;
1214
      else if (parameter.rfind("upper", 0) == 0)
1215
        format = UPPER_DISTANCE_MATRIX;
1216
      else if (parameter.rfind("dist", 0) == 0)
1217
        format = DISTANCE_MATRIX;
1218
      else if (parameter.rfind("point", 0) == 0)
1219
        format = POINT_CLOUD;
1220
      else if (parameter == "dipha")
1221
        format = DIPHA;
1222
      else if (parameter == "sparse")
1223
        format = SPARSE;
1224
      else if (parameter == "binary")
1225
        format = BINARY;
1226
      else
1227
        print_usage_and_exit(-1);
1228
#ifdef USE_COEFFICIENTS
1229
    } else if (arg == "--modulus") {
1230
      std::string parameter = std::string(argv[++i]);
1231
      size_t next_pos;
1232
      modulus = std::stol(parameter, &next_pos);
1233
      if (next_pos != parameter.size() || !is_prime(modulus)) print_usage_and_exit(-1);
1234
#endif
1235
    } else {
1236
      if (filename) { print_usage_and_exit(-1); }
1237
      filename = argv[i];
1238
    }
1239
  }
1240
1241
  std::ifstream file_stream(filename);
1242
  if (filename && file_stream.fail()) {
1243
    std::cerr << "couldn't open file " << filename << std::endl;
1244
    exit(-1);
1245
  }
1246
1247
  if (format == SPARSE) {
1248
    sparse_distance_matrix dist =
1249
        read_sparse_distance_matrix(filename ? file_stream : std::cin);
1250
    std::cout << "sparse distance matrix with " << dist.size() << " points and "
1251
              << dist.num_edges << "/" << (dist.size() * (dist.size() - 1)) / 2 << " entries"
1252
              << std::endl;
1253
1254
    ripser<sparse_distance_matrix>(std::move(dist), dim_max, threshold, ratio, modulus)
1255
        .compute_barcodes();
1256
  } else if (format == POINT_CLOUD && threshold < std::numeric_limits<value_t>::max()) {
1257
    sparse_distance_matrix dist(read_point_cloud(filename ? file_stream : std::cin), threshold);
1258
    ripser<sparse_distance_matrix>(std::move(dist), dim_max, threshold, ratio, modulus)
1259
        .compute_barcodes();
1260
  } else {
1261
    compressed_lower_distance_matrix dist =
1262
        read_file(filename ? file_stream : std::cin, format);
1263
1264
    value_t min = std::numeric_limits<value_t>::infinity(),
1265
            max = -std::numeric_limits<value_t>::infinity(), max_finite = max;
1266
    int num_edges = 0;
1267
1268
    value_t enclosing_radius = std::numeric_limits<value_t>::infinity();
1269
    if (threshold == std::numeric_limits<value_t>::max()) {
1270
      for (size_t i = 0; i < dist.size(); ++i) {
1271
        value_t r_i = -std::numeric_limits<value_t>::infinity();
1272
        for (size_t j = 0; j < dist.size(); ++j) r_i = std::max(r_i, dist(i, j));
1273
        enclosing_radius = std::min(enclosing_radius, r_i);
1274
      }
1275
    }
1276
1277
    for (auto d : dist.distances) {
1278
      min = std::min(min, d);
1279
      max = std::max(max, d);
1280
      if (d != std::numeric_limits<value_t>::infinity()) max_finite = std::max(max_finite, d);
1281
      if (d <= threshold) ++num_edges;
1282
    }
1283
    std::cout << "value range: [" << min << "," << max_finite << "]" << std::endl;
1284
1285
    if (threshold == std::numeric_limits<value_t>::max()) {
1286
      std::cout << "distance matrix with " << dist.size()
1287
                << " points, using threshold at enclosing radius " << enclosing_radius
1288
                << std::endl;
1289
      ripser<compressed_lower_distance_matrix>(std::move(dist), dim_max, enclosing_radius,
1290
                                               ratio, modulus)
1291
          .compute_barcodes();
1292
    } else {
1293
      std::cout << "sparse distance matrix with " << dist.size() << " points and "
1294
                << num_edges << "/" << (dist.size() * dist.size() - 1) / 2 << " entries"
1295
                << std::endl;
1296
1297
      ripser<sparse_distance_matrix>(sparse_distance_matrix(std::move(dist), threshold),
1298
                                     dim_max, threshold, ratio, modulus)
1299
          .compute_barcodes();
1300
    }
1301
    exit(0);
1302
  }
1303
}
1304
1305
#endif