Coverage Report

Created: 2025-11-16 06:21

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/re2/re2/sparse_set.h
Line
Count
Source
1
// Copyright 2006 The RE2 Authors.  All Rights Reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4
5
#ifndef RE2_SPARSE_SET_H_
6
#define RE2_SPARSE_SET_H_
7
8
// DESCRIPTION
9
//
10
// SparseSet(m) is a set of integers in [0, m).
11
// It requires sizeof(int)*m memory, but it provides
12
// fast iteration through the elements in the set and fast clearing
13
// of the set.
14
//
15
// Insertion and deletion are constant time operations.
16
//
17
// Allocating the set is a constant time operation
18
// when memory allocation is a constant time operation.
19
//
20
// Clearing the set is a constant time operation (unusual!).
21
//
22
// Iterating through the set is an O(n) operation, where n
23
// is the number of items in the set (not O(m)).
24
//
25
// The set iterator visits entries in the order they were first
26
// inserted into the set.  It is safe to add items to the set while
27
// using an iterator: the iterator will visit indices added to the set
28
// during the iteration, but will not re-visit indices whose values
29
// change after visiting.  Thus SparseSet can be a convenient
30
// implementation of a work queue.
31
//
32
// The SparseSet implementation is NOT thread-safe.  It is up to the
33
// caller to make sure only one thread is accessing the set.  (Typically
34
// these sets are temporary values and used in situations where speed is
35
// important.)
36
//
37
// The SparseSet interface does not present all the usual STL bells and
38
// whistles.
39
//
40
// Implemented with reference to Briggs & Torczon, An Efficient
41
// Representation for Sparse Sets, ACM Letters on Programming Languages
42
// and Systems, Volume 2, Issue 1-4 (March-Dec.  1993), pp.  59-69.
43
//
44
// This is a specialization of sparse array; see sparse_array.h.
45
46
// IMPLEMENTATION
47
//
48
// See sparse_array.h for implementation details.
49
50
#include <assert.h>
51
#include <stdint.h>
52
53
#include <algorithm>
54
#include <memory>
55
#include <utility>
56
57
#include "re2/pod_array.h"
58
59
// Doing this simplifies the logic below.
60
#ifndef __has_feature
61
#define __has_feature(x) 0
62
#endif
63
64
#if __has_feature(memory_sanitizer)
65
#include <sanitizer/msan_interface.h>
66
#endif
67
68
namespace re2 {
69
70
template<typename Value>
71
class SparseSetT {
72
 public:
73
  SparseSetT();
74
  explicit SparseSetT(int max_size);
75
  ~SparseSetT();
76
77
  typedef int* iterator;
78
  typedef const int* const_iterator;
79
80
  // Return the number of entries in the set.
81
  int size() const {
82
    return size_;
83
  }
84
85
  // Indicate whether the set is empty.
86
2.50k
  int empty() const {
87
2.50k
    return size_ == 0;
88
2.50k
  }
89
90
  // Iterate over the set.
91
12.5M
  iterator begin() {
92
12.5M
    return dense_.data();
93
12.5M
  }
94
908M
  iterator end() {
95
908M
    return dense_.data() + size_;
96
908M
  }
97
98
  const_iterator begin() const {
99
    return dense_.data();
100
  }
101
  const_iterator end() const {
102
    return dense_.data() + size_;
103
  }
104
105
  // Change the maximum size of the set.
106
  // Invalidates all iterators.
107
  void resize(int new_max_size);
108
109
  // Return the maximum size of the set.
110
  // Indices can be in the range [0, max_size).
111
3.39G
  int max_size() const {
112
3.39G
    if (dense_.data() != NULL)
113
3.39G
      return dense_.size();
114
0
    else
115
0
      return 0;
116
3.39G
  }
117
118
  // Clear the set.
119
23.2M
  void clear() {
120
23.2M
    size_ = 0;
121
23.2M
  }
122
123
  // Check whether index i is in the set.
124
  bool contains(int i) const;
125
126
  // Comparison function for sorting.
127
  // Can sort the sparse set so that future iterations
128
  // will visit indices in increasing order using
129
  // std::sort(arr.begin(), arr.end(), arr.less);
130
  static bool less(int a, int b);
131
132
 public:
133
  // Insert index i into the set.
134
129M
  iterator insert(int i) {
135
129M
    return InsertInternal(true, i);
136
129M
  }
137
138
  // Insert index i into the set.
139
  // Fast but unsafe: only use if contains(i) is false.
140
1.18G
  iterator insert_new(int i) {
141
1.18G
    return InsertInternal(false, i);
142
1.18G
  }
143
144
 private:
145
1.31G
  iterator InsertInternal(bool allow_existing, int i) {
146
1.31G
    DebugCheckInvariants();
147
1.31G
    if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) {
148
0
      assert(false && "illegal index");
149
      // Semantically, end() would be better here, but we already know
150
      // the user did something stupid, so begin() insulates them from
151
      // dereferencing an invalid pointer.
152
0
      return begin();
153
0
    }
154
1.31G
    if (!allow_existing) {
155
1.18G
      assert(!contains(i));
156
1.18G
      create_index(i);
157
1.18G
    } else {
158
129M
      if (!contains(i))
159
98.3M
        create_index(i);
160
129M
    }
161
1.31G
    DebugCheckInvariants();
162
1.31G
    return dense_.data() + sparse_[i];
163
1.31G
  }
164
165
  // Add the index i to the set.
166
  // Only use if contains(i) is known to be false.
167
  // This function is private, only intended as a helper
168
  // for other methods.
169
  void create_index(int i);
170
171
  // In debug mode, verify that some invariant properties of the class
172
  // are being maintained. This is called at the end of the constructor
173
  // and at the beginning and end of all public non-const member functions.
174
  void DebugCheckInvariants() const;
175
176
  // Initializes memory for elements [min, max).
177
166k
  void MaybeInitializeMemory(int min, int max) {
178
#if __has_feature(memory_sanitizer)
179
    __msan_unpoison(sparse_.data() + min, (max - min) * sizeof sparse_[0]);
180
#elif defined(RE2_ON_VALGRIND)
181
    for (int i = min; i < max; i++) {
182
      sparse_[i] = 0xababababU;
183
    }
184
#endif
185
166k
  }
186
187
  int size_ = 0;
188
  PODArray<int> sparse_;
189
  PODArray<int> dense_;
190
};
191
192
template<typename Value>
193
SparseSetT<Value>::SparseSetT() = default;
194
195
// Change the maximum size of the set.
196
// Invalidates all iterators.
197
template<typename Value>
198
void SparseSetT<Value>::resize(int new_max_size) {
199
  DebugCheckInvariants();
200
  if (new_max_size > max_size()) {
201
    const int old_max_size = max_size();
202
203
    // Construct these first for exception safety.
204
    PODArray<int> a(new_max_size);
205
    PODArray<int> b(new_max_size);
206
207
    std::copy_n(sparse_.data(), old_max_size, a.data());
208
    std::copy_n(dense_.data(), old_max_size, b.data());
209
210
    sparse_ = std::move(a);
211
    dense_ = std::move(b);
212
213
    MaybeInitializeMemory(old_max_size, new_max_size);
214
  }
215
  if (size_ > new_max_size)
216
    size_ = new_max_size;
217
  DebugCheckInvariants();
218
}
219
220
// Check whether index i is in the set.
221
template<typename Value>
222
2.08G
bool SparseSetT<Value>::contains(int i) const {
223
2.08G
  assert(i >= 0);
224
2.08G
  assert(i < max_size());
225
2.08G
  if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) {
226
0
    return false;
227
0
  }
228
  // Unsigned comparison avoids checking sparse_[i] < 0.
229
2.08G
  return (uint32_t)sparse_[i] < (uint32_t)size_ &&
230
902M
         dense_[sparse_[i]] == i;
231
2.08G
}
232
233
template<typename Value>
234
1.27G
void SparseSetT<Value>::create_index(int i) {
235
1.27G
  assert(!contains(i));
236
1.27G
  assert(size_ < max_size());
237
1.27G
  sparse_[i] = size_;
238
1.27G
  dense_[size_] = i;
239
1.27G
  size_++;
240
1.27G
}
241
242
template<typename Value> SparseSetT<Value>::SparseSetT(int max_size) :
243
166k
    sparse_(max_size), dense_(max_size) {
244
166k
  MaybeInitializeMemory(size_, max_size);
245
166k
  DebugCheckInvariants();
246
166k
}
247
248
166k
template<typename Value> SparseSetT<Value>::~SparseSetT() {
249
166k
  DebugCheckInvariants();
250
166k
}
251
252
2.62G
template<typename Value> void SparseSetT<Value>::DebugCheckInvariants() const {
253
2.62G
  assert(0 <= size_);
254
  assert(size_ <= max_size());
255
2.62G
}
256
257
// Comparison function for sorting.
258
template<typename Value> bool SparseSetT<Value>::less(int a, int b) {
259
  return a < b;
260
}
261
262
typedef SparseSetT<void> SparseSet;
263
264
}  // namespace re2
265
266
#endif  // RE2_SPARSE_SET_H_