/src/re2/re2/sparse_set.h
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // Copyright 2006 The RE2 Authors.  All Rights Reserved.  | 
2  |  | // Use of this source code is governed by a BSD-style  | 
3  |  | // license that can be found in the LICENSE file.  | 
4  |  |  | 
5  |  | #ifndef RE2_SPARSE_SET_H_  | 
6  |  | #define RE2_SPARSE_SET_H_  | 
7  |  |  | 
8  |  | // DESCRIPTION  | 
9  |  | //  | 
10  |  | // SparseSet(m) is a set of integers in [0, m).  | 
11  |  | // It requires sizeof(int)*m memory, but it provides  | 
12  |  | // fast iteration through the elements in the set and fast clearing  | 
13  |  | // of the set.  | 
14  |  | //  | 
15  |  | // Insertion and deletion are constant time operations.  | 
16  |  | //  | 
17  |  | // Allocating the set is a constant time operation  | 
18  |  | // when memory allocation is a constant time operation.  | 
19  |  | //  | 
20  |  | // Clearing the set is a constant time operation (unusual!).  | 
21  |  | //  | 
22  |  | // Iterating through the set is an O(n) operation, where n  | 
23  |  | // is the number of items in the set (not O(m)).  | 
24  |  | //  | 
25  |  | // The set iterator visits entries in the order they were first  | 
26  |  | // inserted into the set.  It is safe to add items to the set while  | 
27  |  | // using an iterator: the iterator will visit indices added to the set  | 
28  |  | // during the iteration, but will not re-visit indices whose values  | 
29  |  | // change after visiting.  Thus SparseSet can be a convenient  | 
30  |  | // implementation of a work queue.  | 
31  |  | //  | 
32  |  | // The SparseSet implementation is NOT thread-safe.  It is up to the  | 
33  |  | // caller to make sure only one thread is accessing the set.  (Typically  | 
34  |  | // these sets are temporary values and used in situations where speed is  | 
35  |  | // important.)  | 
36  |  | //  | 
37  |  | // The SparseSet interface does not present all the usual STL bells and  | 
38  |  | // whistles.  | 
39  |  | //  | 
40  |  | // Implemented with reference to Briggs & Torczon, An Efficient  | 
41  |  | // Representation for Sparse Sets, ACM Letters on Programming Languages  | 
42  |  | // and Systems, Volume 2, Issue 1-4 (March-Dec.  1993), pp.  59-69.  | 
43  |  | //  | 
44  |  | // This is a specialization of sparse array; see sparse_array.h.  | 
45  |  |  | 
46  |  | // IMPLEMENTATION  | 
47  |  | //  | 
48  |  | // See sparse_array.h for implementation details.  | 
49  |  |  | 
50  |  | // Doing this simplifies the logic below.  | 
51  |  | #ifndef __has_feature  | 
52  |  | #define __has_feature(x) 0  | 
53  |  | #endif  | 
54  |  |  | 
55  |  | #include <assert.h>  | 
56  |  | #include <stdint.h>  | 
57  |  | #if __has_feature(memory_sanitizer)  | 
58  |  | #include <sanitizer/msan_interface.h>  | 
59  |  | #endif  | 
60  |  | #include <algorithm>  | 
61  |  | #include <memory>  | 
62  |  | #include <utility>  | 
63  |  |  | 
64  |  | #include "re2/pod_array.h"  | 
65  |  |  | 
66  |  | namespace re2 { | 
67  |  |  | 
68  |  | template<typename Value>  | 
69  |  | class SparseSetT { | 
70  |  |  public:  | 
71  |  |   SparseSetT();  | 
72  |  |   explicit SparseSetT(int max_size);  | 
73  |  |   ~SparseSetT();  | 
74  |  |  | 
75  |  |   typedef int* iterator;  | 
76  |  |   typedef const int* const_iterator;  | 
77  |  |  | 
78  |  |   // Return the number of entries in the set.  | 
79  |  |   int size() const { | 
80  |  |     return size_;  | 
81  |  |   }  | 
82  |  |  | 
83  |  |   // Indicate whether the set is empty.  | 
84  |  |   int empty() const { | 
85  |  |     return size_ == 0;  | 
86  |  |   }  | 
87  |  |  | 
88  |  |   // Iterate over the set.  | 
89  | 20.6M  |   iterator begin() { | 
90  | 20.6M  |     return dense_.data();  | 
91  | 20.6M  |   }  | 
92  | 158M  |   iterator end() { | 
93  | 158M  |     return dense_.data() + size_;  | 
94  | 158M  |   }  | 
95  |  |  | 
96  |  |   const_iterator begin() const { | 
97  |  |     return dense_.data();  | 
98  |  |   }  | 
99  |  |   const_iterator end() const { | 
100  |  |     return dense_.data() + size_;  | 
101  |  |   }  | 
102  |  |  | 
103  |  |   // Change the maximum size of the set.  | 
104  |  |   // Invalidates all iterators.  | 
105  |  |   void resize(int new_max_size);  | 
106  |  |  | 
107  |  |   // Return the maximum size of the set.  | 
108  |  |   // Indices can be in the range [0, max_size).  | 
109  | 634M  |   int max_size() const { | 
110  | 634M  |     if (dense_.data() != NULL)  | 
111  | 634M  |       return dense_.size();  | 
112  | 0  |     else  | 
113  | 0  |       return 0;  | 
114  | 634M  |   }  | 
115  |  |  | 
116  |  |   // Clear the set.  | 
117  | 45.3M  |   void clear() { | 
118  | 45.3M  |     size_ = 0;  | 
119  | 45.3M  |   }  | 
120  |  |  | 
121  |  |   // Check whether index i is in the set.  | 
122  |  |   bool contains(int i) const;  | 
123  |  |  | 
124  |  |   // Comparison function for sorting.  | 
125  |  |   // Can sort the sparse set so that future iterations  | 
126  |  |   // will visit indices in increasing order using  | 
127  |  |   // std::sort(arr.begin(), arr.end(), arr.less);  | 
128  |  |   static bool less(int a, int b);  | 
129  |  |  | 
130  |  |  public:  | 
131  |  |   // Insert index i into the set.  | 
132  | 113M  |   iterator insert(int i) { | 
133  | 113M  |     return InsertInternal(true, i);  | 
134  | 113M  |   }  | 
135  |  |  | 
136  |  |   // Insert index i into the set.  | 
137  |  |   // Fast but unsafe: only use if contains(i) is false.  | 
138  | 138M  |   iterator insert_new(int i) { | 
139  | 138M  |     return InsertInternal(false, i);  | 
140  | 138M  |   }  | 
141  |  |  | 
142  |  |  private:  | 
143  | 251M  |   iterator InsertInternal(bool allow_existing, int i) { | 
144  | 251M  |     DebugCheckInvariants();  | 
145  | 251M  |     if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) { | 
146  | 0  |       assert(false && "illegal index");  | 
147  |  |       // Semantically, end() would be better here, but we already know  | 
148  |  |       // the user did something stupid, so begin() insulates them from  | 
149  |  |       // dereferencing an invalid pointer.  | 
150  | 0  |       return begin();  | 
151  | 0  |     }  | 
152  | 251M  |     if (!allow_existing) { | 
153  | 138M  |       assert(!contains(i));  | 
154  | 138M  |       create_index(i);  | 
155  | 138M  |     } else { | 
156  | 113M  |       if (!contains(i))  | 
157  | 89.3M  |         create_index(i);  | 
158  | 113M  |     }  | 
159  | 251M  |     DebugCheckInvariants();  | 
160  | 251M  |     return dense_.data() + sparse_[i];  | 
161  | 251M  |   }  | 
162  |  |  | 
163  |  |   // Add the index i to the set.  | 
164  |  |   // Only use if contains(i) is known to be false.  | 
165  |  |   // This function is private, only intended as a helper  | 
166  |  |   // for other methods.  | 
167  |  |   void create_index(int i);  | 
168  |  |  | 
169  |  |   // In debug mode, verify that some invariant properties of the class  | 
170  |  |   // are being maintained. This is called at the end of the constructor  | 
171  |  |   // and at the beginning and end of all public non-const member functions.  | 
172  |  |   void DebugCheckInvariants() const;  | 
173  |  |  | 
174  |  |   // Initializes memory for elements [min, max).  | 
175  | 54.7k  |   void MaybeInitializeMemory(int min, int max) { | 
176  |  | #if __has_feature(memory_sanitizer)  | 
177  |  |     __msan_unpoison(sparse_.data() + min, (max - min) * sizeof sparse_[0]);  | 
178  |  | #elif defined(RE2_ON_VALGRIND)  | 
179  |  |     for (int i = min; i < max; i++) { | 
180  |  |       sparse_[i] = 0xababababU;  | 
181  |  |     }  | 
182  |  | #endif  | 
183  | 54.7k  |   }  | 
184  |  |  | 
185  |  |   int size_ = 0;  | 
186  |  |   PODArray<int> sparse_;  | 
187  |  |   PODArray<int> dense_;  | 
188  |  | };  | 
189  |  |  | 
190  |  | template<typename Value>  | 
191  |  | SparseSetT<Value>::SparseSetT() = default;  | 
192  |  |  | 
193  |  | // Change the maximum size of the set.  | 
194  |  | // Invalidates all iterators.  | 
195  |  | template<typename Value>  | 
196  |  | void SparseSetT<Value>::resize(int new_max_size) { | 
197  |  |   DebugCheckInvariants();  | 
198  |  |   if (new_max_size > max_size()) { | 
199  |  |     const int old_max_size = max_size();  | 
200  |  |  | 
201  |  |     // Construct these first for exception safety.  | 
202  |  |     PODArray<int> a(new_max_size);  | 
203  |  |     PODArray<int> b(new_max_size);  | 
204  |  |  | 
205  |  |     std::copy_n(sparse_.data(), old_max_size, a.data());  | 
206  |  |     std::copy_n(dense_.data(), old_max_size, b.data());  | 
207  |  |  | 
208  |  |     sparse_ = std::move(a);  | 
209  |  |     dense_ = std::move(b);  | 
210  |  |  | 
211  |  |     MaybeInitializeMemory(old_max_size, new_max_size);  | 
212  |  |   }  | 
213  |  |   if (size_ > new_max_size)  | 
214  |  |     size_ = new_max_size;  | 
215  |  |   DebugCheckInvariants();  | 
216  |  | }  | 
217  |  |  | 
218  |  | // Check whether index i is in the set.  | 
219  |  | template<typename Value>  | 
220  | 382M  | bool SparseSetT<Value>::contains(int i) const { | 
221  | 382M  |   assert(i >= 0);  | 
222  | 382M  |   assert(i < max_size());  | 
223  | 382M  |   if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) { | 
224  | 0  |     return false;  | 
225  | 0  |   }  | 
226  |  |   // Unsigned comparison avoids checking sparse_[i] < 0.  | 
227  | 382M  |   return (uint32_t)sparse_[i] < (uint32_t)size_ &&  | 
228  | 382M  |          dense_[sparse_[i]] == i;  | 
229  | 382M  | }  | 
230  |  |  | 
231  |  | template<typename Value>  | 
232  | 227M  | void SparseSetT<Value>::create_index(int i) { | 
233  | 227M  |   assert(!contains(i));  | 
234  | 227M  |   assert(size_ < max_size());  | 
235  | 227M  |   sparse_[i] = size_;  | 
236  | 227M  |   dense_[size_] = i;  | 
237  | 227M  |   size_++;  | 
238  | 227M  | }  | 
239  |  |  | 
240  |  | template<typename Value> SparseSetT<Value>::SparseSetT(int max_size) :  | 
241  | 54.7k  |     sparse_(max_size), dense_(max_size) { | 
242  | 54.7k  |   MaybeInitializeMemory(size_, max_size);  | 
243  | 54.7k  |   DebugCheckInvariants();  | 
244  | 54.7k  | }  | 
245  |  |  | 
246  | 54.7k  | template<typename Value> SparseSetT<Value>::~SparseSetT() { | 
247  | 54.7k  |   DebugCheckInvariants();  | 
248  | 54.7k  | }  | 
249  |  |  | 
250  | 503M  | template<typename Value> void SparseSetT<Value>::DebugCheckInvariants() const { | 
251  | 503M  |   assert(0 <= size_);  | 
252  | 503M  |   assert(size_ <= max_size());  | 
253  | 503M  | }  | 
254  |  |  | 
255  |  | // Comparison function for sorting.  | 
256  |  | template<typename Value> bool SparseSetT<Value>::less(int a, int b) { | 
257  |  |   return a < b;  | 
258  |  | }  | 
259  |  |  | 
260  |  | typedef SparseSetT<void> SparseSet;  | 
261  |  |  | 
262  |  | }  // namespace re2  | 
263  |  |  | 
264  |  | #endif  // RE2_SPARSE_SET_H_  |