Coverage Report

Created: 2023-05-06 07:03

/src/re2/re2/sparse_set.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2006 The RE2 Authors.  All Rights Reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4
5
#ifndef RE2_SPARSE_SET_H_
6
#define RE2_SPARSE_SET_H_
7
8
// DESCRIPTION
9
//
10
// SparseSet(m) is a set of integers in [0, m).
11
// It requires sizeof(int)*m memory, but it provides
12
// fast iteration through the elements in the set and fast clearing
13
// of the set.
14
//
15
// Insertion and deletion are constant time operations.
16
//
17
// Allocating the set is a constant time operation
18
// when memory allocation is a constant time operation.
19
//
20
// Clearing the set is a constant time operation (unusual!).
21
//
22
// Iterating through the set is an O(n) operation, where n
23
// is the number of items in the set (not O(m)).
24
//
25
// The set iterator visits entries in the order they were first
26
// inserted into the set.  It is safe to add items to the set while
27
// using an iterator: the iterator will visit indices added to the set
28
// during the iteration, but will not re-visit indices whose values
29
// change after visiting.  Thus SparseSet can be a convenient
30
// implementation of a work queue.
31
//
32
// The SparseSet implementation is NOT thread-safe.  It is up to the
33
// caller to make sure only one thread is accessing the set.  (Typically
34
// these sets are temporary values and used in situations where speed is
35
// important.)
36
//
37
// The SparseSet interface does not present all the usual STL bells and
38
// whistles.
39
//
40
// Implemented with reference to Briggs & Torczon, An Efficient
41
// Representation for Sparse Sets, ACM Letters on Programming Languages
42
// and Systems, Volume 2, Issue 1-4 (March-Dec.  1993), pp.  59-69.
43
//
44
// This is a specialization of sparse array; see sparse_array.h.
45
46
// IMPLEMENTATION
47
//
48
// See sparse_array.h for implementation details.
49
50
// Doing this simplifies the logic below.
51
#ifndef __has_feature
52
#define __has_feature(x) 0
53
#endif
54
55
#include <assert.h>
56
#include <stdint.h>
57
#if __has_feature(memory_sanitizer)
58
#include <sanitizer/msan_interface.h>
59
#endif
60
#include <algorithm>
61
#include <memory>
62
#include <utility>
63
64
#include "re2/pod_array.h"
65
66
namespace re2 {
67
68
template<typename Value>
69
class SparseSetT {
70
 public:
71
  SparseSetT();
72
  explicit SparseSetT(int max_size);
73
  ~SparseSetT();
74
75
  typedef int* iterator;
76
  typedef const int* const_iterator;
77
78
  // Return the number of entries in the set.
79
  int size() const {
80
    return size_;
81
  }
82
83
  // Indicate whether the set is empty.
84
  int empty() const {
85
    return size_ == 0;
86
  }
87
88
  // Iterate over the set.
89
20.6M
  iterator begin() {
90
20.6M
    return dense_.data();
91
20.6M
  }
92
158M
  iterator end() {
93
158M
    return dense_.data() + size_;
94
158M
  }
95
96
  const_iterator begin() const {
97
    return dense_.data();
98
  }
99
  const_iterator end() const {
100
    return dense_.data() + size_;
101
  }
102
103
  // Change the maximum size of the set.
104
  // Invalidates all iterators.
105
  void resize(int new_max_size);
106
107
  // Return the maximum size of the set.
108
  // Indices can be in the range [0, max_size).
109
634M
  int max_size() const {
110
634M
    if (dense_.data() != NULL)
111
634M
      return dense_.size();
112
0
    else
113
0
      return 0;
114
634M
  }
115
116
  // Clear the set.
117
45.3M
  void clear() {
118
45.3M
    size_ = 0;
119
45.3M
  }
120
121
  // Check whether index i is in the set.
122
  bool contains(int i) const;
123
124
  // Comparison function for sorting.
125
  // Can sort the sparse set so that future iterations
126
  // will visit indices in increasing order using
127
  // std::sort(arr.begin(), arr.end(), arr.less);
128
  static bool less(int a, int b);
129
130
 public:
131
  // Insert index i into the set.
132
113M
  iterator insert(int i) {
133
113M
    return InsertInternal(true, i);
134
113M
  }
135
136
  // Insert index i into the set.
137
  // Fast but unsafe: only use if contains(i) is false.
138
138M
  iterator insert_new(int i) {
139
138M
    return InsertInternal(false, i);
140
138M
  }
141
142
 private:
143
251M
  iterator InsertInternal(bool allow_existing, int i) {
144
251M
    DebugCheckInvariants();
145
251M
    if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) {
146
0
      assert(false && "illegal index");
147
      // Semantically, end() would be better here, but we already know
148
      // the user did something stupid, so begin() insulates them from
149
      // dereferencing an invalid pointer.
150
0
      return begin();
151
0
    }
152
251M
    if (!allow_existing) {
153
138M
      assert(!contains(i));
154
138M
      create_index(i);
155
138M
    } else {
156
113M
      if (!contains(i))
157
89.3M
        create_index(i);
158
113M
    }
159
251M
    DebugCheckInvariants();
160
251M
    return dense_.data() + sparse_[i];
161
251M
  }
162
163
  // Add the index i to the set.
164
  // Only use if contains(i) is known to be false.
165
  // This function is private, only intended as a helper
166
  // for other methods.
167
  void create_index(int i);
168
169
  // In debug mode, verify that some invariant properties of the class
170
  // are being maintained. This is called at the end of the constructor
171
  // and at the beginning and end of all public non-const member functions.
172
  void DebugCheckInvariants() const;
173
174
  // Initializes memory for elements [min, max).
175
54.7k
  void MaybeInitializeMemory(int min, int max) {
176
#if __has_feature(memory_sanitizer)
177
    __msan_unpoison(sparse_.data() + min, (max - min) * sizeof sparse_[0]);
178
#elif defined(RE2_ON_VALGRIND)
179
    for (int i = min; i < max; i++) {
180
      sparse_[i] = 0xababababU;
181
    }
182
#endif
183
54.7k
  }
184
185
  int size_ = 0;
186
  PODArray<int> sparse_;
187
  PODArray<int> dense_;
188
};
189
190
template<typename Value>
191
SparseSetT<Value>::SparseSetT() = default;
192
193
// Change the maximum size of the set.
194
// Invalidates all iterators.
195
template<typename Value>
196
void SparseSetT<Value>::resize(int new_max_size) {
197
  DebugCheckInvariants();
198
  if (new_max_size > max_size()) {
199
    const int old_max_size = max_size();
200
201
    // Construct these first for exception safety.
202
    PODArray<int> a(new_max_size);
203
    PODArray<int> b(new_max_size);
204
205
    std::copy_n(sparse_.data(), old_max_size, a.data());
206
    std::copy_n(dense_.data(), old_max_size, b.data());
207
208
    sparse_ = std::move(a);
209
    dense_ = std::move(b);
210
211
    MaybeInitializeMemory(old_max_size, new_max_size);
212
  }
213
  if (size_ > new_max_size)
214
    size_ = new_max_size;
215
  DebugCheckInvariants();
216
}
217
218
// Check whether index i is in the set.
219
template<typename Value>
220
382M
bool SparseSetT<Value>::contains(int i) const {
221
382M
  assert(i >= 0);
222
382M
  assert(i < max_size());
223
382M
  if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) {
224
0
    return false;
225
0
  }
226
  // Unsigned comparison avoids checking sparse_[i] < 0.
227
382M
  return (uint32_t)sparse_[i] < (uint32_t)size_ &&
228
382M
         dense_[sparse_[i]] == i;
229
382M
}
230
231
template<typename Value>
232
227M
void SparseSetT<Value>::create_index(int i) {
233
227M
  assert(!contains(i));
234
227M
  assert(size_ < max_size());
235
227M
  sparse_[i] = size_;
236
227M
  dense_[size_] = i;
237
227M
  size_++;
238
227M
}
239
240
template<typename Value> SparseSetT<Value>::SparseSetT(int max_size) :
241
54.7k
    sparse_(max_size), dense_(max_size) {
242
54.7k
  MaybeInitializeMemory(size_, max_size);
243
54.7k
  DebugCheckInvariants();
244
54.7k
}
245
246
54.7k
template<typename Value> SparseSetT<Value>::~SparseSetT() {
247
54.7k
  DebugCheckInvariants();
248
54.7k
}
249
250
503M
template<typename Value> void SparseSetT<Value>::DebugCheckInvariants() const {
251
503M
  assert(0 <= size_);
252
503M
  assert(size_ <= max_size());
253
503M
}
254
255
// Comparison function for sorting.
256
template<typename Value> bool SparseSetT<Value>::less(int a, int b) {
257
  return a < b;
258
}
259
260
typedef SparseSetT<void> SparseSet;
261
262
}  // namespace re2
263
264
#endif  // RE2_SPARSE_SET_H_