Coverage Report

Created: 2023-03-02 15:38

/src/re2/re2/sparse_array.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2006 The RE2 Authors.  All Rights Reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4
5
#ifndef RE2_SPARSE_ARRAY_H_
6
#define RE2_SPARSE_ARRAY_H_
7
8
// DESCRIPTION
9
//
10
// SparseArray<T>(m) is a map from integers in [0, m) to T values.
11
// It requires (sizeof(T)+sizeof(int))*m memory, but it provides
12
// fast iteration through the elements in the array and fast clearing
13
// of the array.  The array has a concept of certain elements being
14
// uninitialized (having no value).
15
//
16
// Insertion and deletion are constant time operations.
17
//
18
// Allocating the array is a constant time operation
19
// when memory allocation is a constant time operation.
20
//
21
// Clearing the array is a constant time operation (unusual!).
22
//
23
// Iterating through the array is an O(n) operation, where n
24
// is the number of items in the array (not O(m)).
25
//
26
// The array iterator visits entries in the order they were first
27
// inserted into the array.  It is safe to add items to the array while
28
// using an iterator: the iterator will visit indices added to the array
29
// during the iteration, but will not re-visit indices whose values
30
// change after visiting.  Thus SparseArray can be a convenient
31
// implementation of a work queue.
32
//
33
// The SparseArray implementation is NOT thread-safe.  It is up to the
34
// caller to make sure only one thread is accessing the array.  (Typically
35
// these arrays are temporary values and used in situations where speed is
36
// important.)
37
//
38
// The SparseArray interface does not present all the usual STL bells and
39
// whistles.
40
//
41
// Implemented with reference to Briggs & Torczon, An Efficient
42
// Representation for Sparse Sets, ACM Letters on Programming Languages
43
// and Systems, Volume 2, Issue 1-4 (March-Dec.  1993), pp.  59-69.
44
//
45
// Briggs & Torczon popularized this technique, but it had been known
46
// long before their paper.  They point out that Aho, Hopcroft, and
47
// Ullman's 1974 Design and Analysis of Computer Algorithms and Bentley's
48
// 1986 Programming Pearls both hint at the technique in exercises to the
49
// reader (in Aho & Hopcroft, exercise 2.12; in Bentley, column 1
50
// exercise 8).
51
//
52
// Briggs & Torczon describe a sparse set implementation.  I have
53
// trivially generalized it to create a sparse array (actually the original
54
// target of the AHU and Bentley exercises).
55
56
// IMPLEMENTATION
57
//
58
// SparseArray is an array dense_ and an array sparse_ of identical size.
59
// At any point, the number of elements in the sparse array is size_.
60
//
61
// The array dense_ contains the size_ elements in the sparse array (with
62
// their indices),
63
// in the order that the elements were first inserted.  This array is dense:
64
// the size_ pairs are dense_[0] through dense_[size_-1].
65
//
66
// The array sparse_ maps from indices in [0,m) to indices in [0,size_).
67
// For indices present in the array, dense_[sparse_[i]].index_ == i.
68
// For indices not present in the array, sparse_ can contain any value at all,
69
// perhaps outside the range [0, size_) but perhaps not.
70
//
71
// The lax requirement on sparse_ values makes clearing the array very easy:
72
// set size_ to 0.  Lookups are slightly more complicated.
73
// An index i has a value in the array if and only if:
74
//   sparse_[i] is in [0, size_) AND
75
//   dense_[sparse_[i]].index_ == i.
76
// If both these properties hold, only then it is safe to refer to
77
//   dense_[sparse_[i]].value_
78
// as the value associated with index i.
79
//
80
// To insert a new entry, set sparse_[i] to size_,
81
// initialize dense_[size_], and then increment size_.
82
//
83
// To make the sparse array as efficient as possible for non-primitive types,
84
// elements may or may not be destroyed when they are deleted from the sparse
85
// array through a call to resize(). They immediately become inaccessible, but
86
// they are only guaranteed to be destroyed when the SparseArray destructor is
87
// called.
88
//
89
// A moved-from SparseArray will be empty.
90
91
// Doing this simplifies the logic below.
92
#ifndef __has_feature
93
#define __has_feature(x) 0
94
#endif
95
96
#include <assert.h>
97
#include <stdint.h>
98
#if __has_feature(memory_sanitizer)
99
#include <sanitizer/msan_interface.h>
100
#endif
101
#include <algorithm>
102
#include <memory>
103
#include <utility>
104
105
#include "re2/pod_array.h"
106
107
namespace re2 {
108
109
template<typename Value>
110
class SparseArray {
111
 public:
112
8.68k
  SparseArray();
113
  explicit SparseArray(int max_size);
114
  ~SparseArray();
115
116
  // IndexValue pairs: exposed in SparseArray::iterator.
117
  class IndexValue;
118
119
  typedef IndexValue* iterator;
120
  typedef const IndexValue* const_iterator;
121
122
  SparseArray(const SparseArray& src);
123
  SparseArray(SparseArray&& src);
124
125
  SparseArray& operator=(const SparseArray& src);
126
  SparseArray& operator=(SparseArray&& src);
127
128
  // Return the number of entries in the array.
129
161M
  int size() const {
130
161M
    return size_;
131
161M
  }
re2::SparseArray<re2::NFA::Thread*>::size() const
Line
Count
Source
129
75.3k
  int size() const {
130
75.3k
    return size_;
131
75.3k
  }
re2::SparseArray<int>::size() const
Line
Count
Source
129
161M
  int size() const {
130
161M
    return size_;
131
161M
  }
132
133
  // Indicate whether the array is empty.
134
  int empty() const {
135
    return size_ == 0;
136
  }
137
138
  // Iterate over the array.
139
151M
  iterator begin() {
140
151M
    return dense_.data();
141
151M
  }
re2::SparseArray<int>::begin()
Line
Count
Source
139
151M
  iterator begin() {
140
151M
    return dense_.data();
141
151M
  }
re2::SparseArray<re2::NFA::Thread*>::begin()
Line
Count
Source
139
85.6k
  iterator begin() {
140
85.6k
    return dense_.data();
141
85.6k
  }
142
390M
  iterator end() {
143
390M
    return dense_.data() + size_;
144
390M
  }
re2::SparseArray<int>::end()
Line
Count
Source
142
161M
  iterator end() {
143
161M
    return dense_.data() + size_;
144
161M
  }
re2::SparseArray<re2::NFA::Thread*>::end()
Line
Count
Source
142
228M
  iterator end() {
143
228M
    return dense_.data() + size_;
144
228M
  }
145
146
  const_iterator begin() const {
147
    return dense_.data();
148
  }
149
  const_iterator end() const {
150
    return dense_.data() + size_;
151
  }
152
153
  // Change the maximum size of the array.
154
  // Invalidates all iterators.
155
  void resize(int new_max_size);
156
157
  // Return the maximum size of the array.
158
  // Indices can be in the range [0, max_size).
159
4.72G
  int max_size() const {
160
4.72G
    if (dense_.data() != NULL)
161
4.72G
      return dense_.size();
162
17.3k
    else
163
17.3k
      return 0;
164
4.72G
  }
re2::SparseArray<re2::NFA::Thread*>::max_size() const
Line
Count
Source
159
592M
  int max_size() const {
160
592M
    if (dense_.data() != NULL)
161
592M
      return dense_.size();
162
17.3k
    else
163
17.3k
      return 0;
164
592M
  }
re2::SparseArray<int>::max_size() const
Line
Count
Source
159
4.13G
  int max_size() const {
160
4.13G
    if (dense_.data() != NULL)
161
4.13G
      return dense_.size();
162
0
    else
163
0
      return 0;
164
4.13G
  }
165
166
  // Clear the array.
167
239k
  void clear() {
168
239k
    size_ = 0;
169
239k
  }
re2::SparseArray<re2::NFA::Thread*>::clear()
Line
Count
Source
167
239k
  void clear() {
168
239k
    size_ = 0;
169
239k
  }
Unexecuted instantiation: re2::SparseArray<int>::clear()
170
171
  // Check whether index i is in the array.
172
  bool has_index(int i) const;
173
174
  // Comparison function for sorting.
175
  // Can sort the sparse array so that future iterations
176
  // will visit indices in increasing order using
177
  // std::sort(arr.begin(), arr.end(), arr.less);
178
  static bool less(const IndexValue& a, const IndexValue& b);
179
180
 public:
181
  // Set the value at index i to v.
182
  iterator set(int i, const Value& v) {
183
    return SetInternal(true, i, v);
184
  }
185
186
  // Set the value at new index i to v.
187
  // Fast but unsafe: only use if has_index(i) is false.
188
628M
  iterator set_new(int i, const Value& v) {
189
628M
    return SetInternal(false, i, v);
190
628M
  }
re2::SparseArray<re2::NFA::Thread*>::set_new(int, re2::NFA::Thread* const&)
Line
Count
Source
188
228M
  iterator set_new(int i, const Value& v) {
189
228M
    return SetInternal(false, i, v);
190
228M
  }
re2::SparseArray<int>::set_new(int, int const&)
Line
Count
Source
188
399M
  iterator set_new(int i, const Value& v) {
189
399M
    return SetInternal(false, i, v);
190
399M
  }
191
192
  // Set the value at index i to v.
193
  // Fast but unsafe: only use if has_index(i) is true.
194
  iterator set_existing(int i, const Value& v) {
195
    return SetExistingInternal(i, v);
196
  }
197
198
  // Get the value at index i.
199
  // Fast but unsafe: only use if has_index(i) is true.
200
1.10G
  Value& get_existing(int i) {
201
1.10G
    assert(has_index(i));
202
1.10G
    return dense_[sparse_[i]].value_;
203
1.10G
  }
re2::SparseArray<re2::NFA::Thread*>::get_existing(int)
Line
Count
Source
200
228M
  Value& get_existing(int i) {
201
228M
    assert(has_index(i));
202
228M
    return dense_[sparse_[i]].value_;
203
228M
  }
re2::SparseArray<int>::get_existing(int)
Line
Count
Source
200
875M
  Value& get_existing(int i) {
201
875M
    assert(has_index(i));
202
875M
    return dense_[sparse_[i]].value_;
203
875M
  }
204
  const Value& get_existing(int i) const {
205
    assert(has_index(i));
206
    return dense_[sparse_[i]].value_;
207
  }
208
209
 private:
210
628M
  iterator SetInternal(bool allow_existing, int i, const Value& v) {
211
628M
    DebugCheckInvariants();
212
628M
    if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) {
213
0
      assert(false && "illegal index");
214
      // Semantically, end() would be better here, but we already know
215
      // the user did something stupid, so begin() insulates them from
216
      // dereferencing an invalid pointer.
217
0
      return begin();
218
0
    }
219
628M
    if (!allow_existing) {
220
628M
      assert(!has_index(i));
221
628M
      create_index(i);
222
628M
    } else {
223
0
      if (!has_index(i))
224
0
        create_index(i);
225
0
    }
226
628M
    return SetExistingInternal(i, v);
227
628M
  }
re2::SparseArray<re2::NFA::Thread*>::SetInternal(bool, int, re2::NFA::Thread* const&)
Line
Count
Source
210
228M
  iterator SetInternal(bool allow_existing, int i, const Value& v) {
211
228M
    DebugCheckInvariants();
212
228M
    if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) {
213
0
      assert(false && "illegal index");
214
      // Semantically, end() would be better here, but we already know
215
      // the user did something stupid, so begin() insulates them from
216
      // dereferencing an invalid pointer.
217
0
      return begin();
218
0
    }
219
228M
    if (!allow_existing) {
220
228M
      assert(!has_index(i));
221
228M
      create_index(i);
222
228M
    } else {
223
0
      if (!has_index(i))
224
0
        create_index(i);
225
0
    }
226
228M
    return SetExistingInternal(i, v);
227
228M
  }
re2::SparseArray<int>::SetInternal(bool, int, int const&)
Line
Count
Source
210
399M
  iterator SetInternal(bool allow_existing, int i, const Value& v) {
211
399M
    DebugCheckInvariants();
212
399M
    if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) {
213
0
      assert(false && "illegal index");
214
      // Semantically, end() would be better here, but we already know
215
      // the user did something stupid, so begin() insulates them from
216
      // dereferencing an invalid pointer.
217
0
      return begin();
218
0
    }
219
399M
    if (!allow_existing) {
220
399M
      assert(!has_index(i));
221
399M
      create_index(i);
222
399M
    } else {
223
0
      if (!has_index(i))
224
0
        create_index(i);
225
0
    }
226
399M
    return SetExistingInternal(i, v);
227
399M
  }
228
229
628M
  iterator SetExistingInternal(int i, const Value& v) {
230
628M
    DebugCheckInvariants();
231
628M
    assert(has_index(i));
232
628M
    dense_[sparse_[i]].value_ = v;
233
628M
    DebugCheckInvariants();
234
628M
    return dense_.data() + sparse_[i];
235
628M
  }
re2::SparseArray<re2::NFA::Thread*>::SetExistingInternal(int, re2::NFA::Thread* const&)
Line
Count
Source
229
228M
  iterator SetExistingInternal(int i, const Value& v) {
230
228M
    DebugCheckInvariants();
231
228M
    assert(has_index(i));
232
228M
    dense_[sparse_[i]].value_ = v;
233
228M
    DebugCheckInvariants();
234
228M
    return dense_.data() + sparse_[i];
235
228M
  }
re2::SparseArray<int>::SetExistingInternal(int, int const&)
Line
Count
Source
229
399M
  iterator SetExistingInternal(int i, const Value& v) {
230
399M
    DebugCheckInvariants();
231
399M
    assert(has_index(i));
232
399M
    dense_[sparse_[i]].value_ = v;
233
399M
    DebugCheckInvariants();
234
399M
    return dense_.data() + sparse_[i];
235
399M
  }
236
237
  // Add the index i to the array.
238
  // Only use if has_index(i) is known to be false.
239
  // Since it doesn't set the value associated with i,
240
  // this function is private, only intended as a helper
241
  // for other methods.
242
  void create_index(int i);
243
244
  // In debug mode, verify that some invariant properties of the class
245
  // are being maintained. This is called at the end of the constructor
246
  // and at the beginning and end of all public non-const member functions.
247
  void DebugCheckInvariants() const;
248
249
  // Initializes memory for elements [min, max).
250
100k
  void MaybeInitializeMemory(int min, int max) {
251
#if __has_feature(memory_sanitizer)
252
    __msan_unpoison(sparse_.data() + min, (max - min) * sizeof sparse_[0]);
253
#elif defined(RE2_ON_VALGRIND)
254
    for (int i = min; i < max; i++) {
255
      sparse_[i] = 0xababababU;
256
    }
257
#endif
258
100k
  }
re2::SparseArray<int>::MaybeInitializeMemory(int, int)
Line
Count
Source
250
92.1k
  void MaybeInitializeMemory(int min, int max) {
251
#if __has_feature(memory_sanitizer)
252
    __msan_unpoison(sparse_.data() + min, (max - min) * sizeof sparse_[0]);
253
#elif defined(RE2_ON_VALGRIND)
254
    for (int i = min; i < max; i++) {
255
      sparse_[i] = 0xababababU;
256
    }
257
#endif
258
92.1k
  }
re2::SparseArray<re2::NFA::Thread*>::MaybeInitializeMemory(int, int)
Line
Count
Source
250
8.68k
  void MaybeInitializeMemory(int min, int max) {
251
#if __has_feature(memory_sanitizer)
252
    __msan_unpoison(sparse_.data() + min, (max - min) * sizeof sparse_[0]);
253
#elif defined(RE2_ON_VALGRIND)
254
    for (int i = min; i < max; i++) {
255
      sparse_[i] = 0xababababU;
256
    }
257
#endif
258
8.68k
  }
259
260
  int size_ = 0;
261
  PODArray<int> sparse_;
262
  PODArray<IndexValue> dense_;
263
};
264
265
template<typename Value>
266
SparseArray<Value>::SparseArray() = default;
267
268
template<typename Value>
269
SparseArray<Value>::SparseArray(const SparseArray& src)
270
    : size_(src.size_),
271
      sparse_(src.max_size()),
272
46.0k
      dense_(src.max_size()) {
273
46.0k
  std::copy_n(src.sparse_.data(), src.max_size(), sparse_.data());
274
46.0k
  std::copy_n(src.dense_.data(), src.max_size(), dense_.data());
275
46.0k
}
276
277
template<typename Value>
278
SparseArray<Value>::SparseArray(SparseArray&& src)
279
    : size_(src.size_),
280
      sparse_(std::move(src.sparse_)),
281
      dense_(std::move(src.dense_)) {
282
  src.size_ = 0;
283
}
284
285
template<typename Value>
286
SparseArray<Value>& SparseArray<Value>::operator=(const SparseArray& src) {
287
  // Construct these first for exception safety.
288
  PODArray<int> a(src.max_size());
289
  PODArray<IndexValue> b(src.max_size());
290
291
  size_ = src.size_;
292
  sparse_ = std::move(a);
293
  dense_ = std::move(b);
294
  std::copy_n(src.sparse_.data(), src.max_size(), sparse_.data());
295
  std::copy_n(src.dense_.data(), src.max_size(), dense_.data());
296
  return *this;
297
}
298
299
template<typename Value>
300
SparseArray<Value>& SparseArray<Value>::operator=(SparseArray&& src) {
301
  size_ = src.size_;
302
  sparse_ = std::move(src.sparse_);
303
  dense_ = std::move(src.dense_);
304
  src.size_ = 0;
305
  return *this;
306
}
307
308
// IndexValue pairs: exposed in SparseArray::iterator.
309
template<typename Value>
310
class SparseArray<Value>::IndexValue {
311
 public:
312
630M
  int index() const { return index_; }
re2::SparseArray<re2::NFA::Thread*>::IndexValue::index() const
Line
Count
Source
312
15.3M
  int index() const { return index_; }
re2::SparseArray<int>::IndexValue::index() const
Line
Count
Source
312
615M
  int index() const { return index_; }
313
228M
  Value& value() { return value_; }
Unexecuted instantiation: re2::SparseArray<int>::IndexValue::value()
re2::SparseArray<re2::NFA::Thread*>::IndexValue::value()
Line
Count
Source
313
228M
  Value& value() { return value_; }
314
322M
  const Value& value() const { return value_; }
315
316
 private:
317
  friend class SparseArray;
318
  int index_;
319
  Value value_;
320
};
321
322
// Change the maximum size of the array.
323
// Invalidates all iterators.
324
template<typename Value>
325
8.68k
void SparseArray<Value>::resize(int new_max_size) {
326
8.68k
  DebugCheckInvariants();
327
8.68k
  if (new_max_size > max_size()) {
328
8.68k
    const int old_max_size = max_size();
329
330
    // Construct these first for exception safety.
331
8.68k
    PODArray<int> a(new_max_size);
332
8.68k
    PODArray<IndexValue> b(new_max_size);
333
334
8.68k
    std::copy_n(sparse_.data(), old_max_size, a.data());
335
8.68k
    std::copy_n(dense_.data(), old_max_size, b.data());
336
337
8.68k
    sparse_ = std::move(a);
338
8.68k
    dense_ = std::move(b);
339
340
8.68k
    MaybeInitializeMemory(old_max_size, new_max_size);
341
8.68k
  }
342
8.68k
  if (size_ > new_max_size)
343
0
    size_ = new_max_size;
344
8.68k
  DebugCheckInvariants();
345
8.68k
}
346
347
// Check whether index i is in the array.
348
template<typename Value>
349
4.09G
bool SparseArray<Value>::has_index(int i) const {
350
4.09G
  assert(i >= 0);
351
4.09G
  assert(i < max_size());
352
4.09G
  if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) {
353
0
    return false;
354
0
  }
355
  // Unsigned comparison avoids checking sparse_[i] < 0.
356
4.09G
  return (uint32_t)sparse_[i] < (uint32_t)size_ &&
357
4.09G
         dense_[sparse_[i]].index_ == i;
358
4.09G
}
re2::SparseArray<re2::NFA::Thread*>::has_index(int) const
Line
Count
Source
349
364M
bool SparseArray<Value>::has_index(int i) const {
350
364M
  assert(i >= 0);
351
364M
  assert(i < max_size());
352
364M
  if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) {
353
0
    return false;
354
0
  }
355
  // Unsigned comparison avoids checking sparse_[i] < 0.
356
364M
  return (uint32_t)sparse_[i] < (uint32_t)size_ &&
357
364M
         dense_[sparse_[i]].index_ == i;
358
364M
}
re2::SparseArray<int>::has_index(int) const
Line
Count
Source
349
3.73G
bool SparseArray<Value>::has_index(int i) const {
350
3.73G
  assert(i >= 0);
351
3.73G
  assert(i < max_size());
352
3.73G
  if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) {
353
0
    return false;
354
0
  }
355
  // Unsigned comparison avoids checking sparse_[i] < 0.
356
3.73G
  return (uint32_t)sparse_[i] < (uint32_t)size_ &&
357
3.73G
         dense_[sparse_[i]].index_ == i;
358
3.73G
}
359
360
template<typename Value>
361
628M
void SparseArray<Value>::create_index(int i) {
362
628M
  assert(!has_index(i));
363
628M
  assert(size_ < max_size());
364
628M
  sparse_[i] = size_;
365
628M
  dense_[size_].index_ = i;
366
628M
  size_++;
367
628M
}
re2::SparseArray<re2::NFA::Thread*>::create_index(int)
Line
Count
Source
361
228M
void SparseArray<Value>::create_index(int i) {
362
228M
  assert(!has_index(i));
363
228M
  assert(size_ < max_size());
364
228M
  sparse_[i] = size_;
365
228M
  dense_[size_].index_ = i;
366
228M
  size_++;
367
228M
}
re2::SparseArray<int>::create_index(int)
Line
Count
Source
361
399M
void SparseArray<Value>::create_index(int i) {
362
399M
  assert(!has_index(i));
363
399M
  assert(size_ < max_size());
364
399M
  sparse_[i] = size_;
365
399M
  dense_[size_].index_ = i;
366
399M
  size_++;
367
399M
}
368
369
template<typename Value> SparseArray<Value>::SparseArray(int max_size) :
370
92.1k
    sparse_(max_size), dense_(max_size) {
371
92.1k
  MaybeInitializeMemory(size_, max_size);
372
92.1k
  DebugCheckInvariants();
373
92.1k
}
374
375
146k
template<typename Value> SparseArray<Value>::~SparseArray() {
376
146k
  DebugCheckInvariants();
377
146k
}
re2::SparseArray<int>::~SparseArray()
Line
Count
Source
375
138k
template<typename Value> SparseArray<Value>::~SparseArray() {
376
138k
  DebugCheckInvariants();
377
138k
}
re2::SparseArray<re2::NFA::Thread*>::~SparseArray()
Line
Count
Source
375
8.68k
template<typename Value> SparseArray<Value>::~SparseArray() {
376
8.68k
  DebugCheckInvariants();
377
8.68k
}
378
379
1.88G
template<typename Value> void SparseArray<Value>::DebugCheckInvariants() const {
380
1.88G
  assert(0 <= size_);
381
1.88G
  assert(size_ <= max_size());
382
1.88G
}
re2::SparseArray<int>::DebugCheckInvariants() const
Line
Count
Source
379
1.19G
template<typename Value> void SparseArray<Value>::DebugCheckInvariants() const {
380
1.19G
  assert(0 <= size_);
381
1.19G
  assert(size_ <= max_size());
382
1.19G
}
re2::SparseArray<re2::NFA::Thread*>::DebugCheckInvariants() const
Line
Count
Source
379
686M
template<typename Value> void SparseArray<Value>::DebugCheckInvariants() const {
380
686M
  assert(0 <= size_);
381
686M
  assert(size_ <= max_size());
382
686M
}
383
384
// Comparison function for sorting.
385
template<typename Value> bool SparseArray<Value>::less(const IndexValue& a,
386
2.43G
                                                       const IndexValue& b) {
387
2.43G
  return a.index_ < b.index_;
388
2.43G
}
389
390
}  // namespace re2
391
392
#endif  // RE2_SPARSE_ARRAY_H_