/src/croaring_fuzzer_cc.cc
Line  | Count  | Source  | 
1  |  | // Copyright 2023 Google LLC  | 
2  |  | //  | 
3  |  | // Licensed under the Apache License, Version 2.0 (the "License");  | 
4  |  | // you may not use this file except in compliance with the License.  | 
5  |  | // You may obtain a copy of the License at  | 
6  |  | //  | 
7  |  | //     http://www.apache.org/licenses/LICENSE-2.0  | 
8  |  | //  | 
9  |  | // Unless required by applicable law or agreed to in writing, software  | 
10  |  | // distributed under the License is distributed on an "AS IS" BASIS,  | 
11  |  | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  | 
12  |  | // See the License for the specific language governing permissions and  | 
13  |  | // limitations under the License.  | 
14  |  | //  | 
15  |  | ////////////////////////////////////////////////////////////////////////////////  | 
16  |  |  | 
17  |  | #include <vector>  | 
18  |  |  | 
19  |  | #include "cpp/roaring/roaring.hh"  | 
20  |  | #include "fuzzer/FuzzedDataProvider.h"  | 
21  |  |  | 
22  |  | std::vector<uint32_t> ConsumeVecInRange(FuzzedDataProvider &fdp, size_t length,  | 
23  |  |                                         uint32_t min_value,  | 
24  | 11.3k  |                                         uint32_t max_value) { | 
25  | 11.3k  |     std::vector<uint32_t> result = {0}; | 
26  | 11.3k  |     result.resize(length);  | 
27  | 5.66M  |     std::generate(result.begin(), result.end(), [&]() { | 
28  | 5.66M  |         return fdp.ConsumeIntegralInRange<uint32_t>(min_value, max_value);  | 
29  | 5.66M  |     });  | 
30  | 11.3k  |     return result;  | 
31  | 11.3k  | }  | 
32  |  |  | 
33  | 5.66k  | extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { | 
34  |  |     /**  | 
35  |  |      * A bitmap may contain up to 2**32 elements. Later this function will  | 
36  |  |      * output the content to an array where each element uses 32 bits of  | 
37  |  |      * storage. That would use 16 GB. Thus this function is bound to run out of  | 
38  |  |      * memory.  | 
39  |  |      *  | 
40  |  |      * Even without the full serialization to a 32-bit array, a bitmap may still  | 
41  |  |      * use over 512 MB in the normal course of operation: that is to be expected  | 
42  |  |      * since it can represent all sets of integers in [0,2**32]. This function  | 
43  |  |      * may hold several bitmaps in memory at once, so it can require gigabytes  | 
44  |  |      * of memory (without bugs). Hence, unless it has a generous memory  | 
45  |  |      * capacity, this function will run out of memory almost certainly.  | 
46  |  |      *  | 
47  |  |      * For sanity, we may limit the range to, say, 10,000,000 which will use 38  | 
48  |  |      * MB or so. With such a limited range, if we run out of memory, then we can  | 
49  |  |      * almost certain that it has to do with a genuine bug.  | 
50  |  |      */  | 
51  |  |  | 
52  | 5.66k  |     uint32_t range_start = 0;  | 
53  | 5.66k  |     uint32_t range_end = 10'000'000;  | 
54  |  |  | 
55  |  |     /**  | 
56  |  |      * We are not solely dependent on the range [range_start, range_end) because  | 
57  |  |      * ConsumeVecInRange below produce integers in a small range starting at 0.  | 
58  |  |      */  | 
59  |  |  | 
60  | 5.66k  |     FuzzedDataProvider fdp(data, size);  | 
61  |  |     /**  | 
62  |  |      * The next line was ConsumeVecInRange(fdp, 500, 0, 1000) but it would pick  | 
63  |  |      * 500 values at random from 0, 1000, making almost certain that all of the  | 
64  |  |      * values are picked. It seems more useful to pick 500 values in the range  | 
65  |  |      * 0,1000.  | 
66  |  |      */  | 
67  | 5.66k  |     std::vector<uint32_t> bitmap_data_a = ConsumeVecInRange(fdp, 500, 0, 1000);  | 
68  | 5.66k  |     roaring::Roaring a(bitmap_data_a.size(), bitmap_data_a.data());  | 
69  | 5.66k  |     a.runOptimize();  | 
70  | 5.66k  |     a.shrinkToFit();  | 
71  |  |  | 
72  | 5.66k  |     std::vector<uint32_t> bitmap_data_b = ConsumeVecInRange(fdp, 500, 0, 1000);  | 
73  | 5.66k  |     roaring::Roaring b(bitmap_data_b.size(), bitmap_data_b.data());  | 
74  | 5.66k  |     b.runOptimize();  | 
75  | 5.66k  |     b.add(fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end));  | 
76  | 5.66k  |     b.addChecked(fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end));  | 
77  | 5.66k  |     b.addRange(fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end),  | 
78  | 5.66k  |                fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end));  | 
79  |  |     // add half of a to b.  | 
80  | 5.66k  |     b.addMany(bitmap_data_a.size() / 2, bitmap_data_a.data());  | 
81  | 5.66k  |     b.remove(fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end));  | 
82  | 5.66k  |     b.removeChecked(  | 
83  | 5.66k  |         fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end));  | 
84  | 5.66k  |     b.removeRange(fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end),  | 
85  | 5.66k  |                   fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end));  | 
86  | 5.66k  |     b.removeRangeClosed(  | 
87  | 5.66k  |         fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end),  | 
88  | 5.66k  |         fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end));  | 
89  | 5.66k  |     b.maximum();  | 
90  | 5.66k  |     b.minimum();  | 
91  | 5.66k  |     b.contains(fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end));  | 
92  | 5.66k  |     b.containsRange(  | 
93  | 5.66k  |         fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end),  | 
94  | 5.66k  |         fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end));  | 
95  |  |  | 
96  | 5.66k  |     uint32_t element = 0;  | 
97  | 5.66k  |     a.select(fdp.ConsumeIntegralInRange<uint32_t>(0, 1000), &element);  | 
98  | 5.66k  |     a.intersect(b);  | 
99  | 5.66k  |     a.jaccard_index(b);  | 
100  | 5.66k  |     a.or_cardinality(b);  | 
101  | 5.66k  |     a.andnot_cardinality(b);  | 
102  | 5.66k  |     a.xor_cardinality(b);  | 
103  | 5.66k  |     a.rank(fdp.ConsumeIntegralInRange<uint32_t>(0, 5000));  | 
104  | 5.66k  |     a.getSizeInBytes();  | 
105  |  |  | 
106  | 5.66k  |     roaring::Roaring c = a & b;  | 
107  | 5.66k  |     roaring::Roaring d = a - b;  | 
108  | 5.66k  |     roaring::Roaring e = a | b;  | 
109  | 5.66k  |     roaring::Roaring f = a ^ b;  | 
110  | 5.66k  |     a |= e;  | 
111  | 5.66k  |     a &= b;  | 
112  | 5.66k  |     a -= c;  | 
113  | 5.66k  |     a ^= f;  | 
114  |  |  | 
115  | 5.66k  |     volatile bool is_equal = (a == b);  | 
116  |  |  | 
117  | 5.66k  |     std::vector<uint32_t> b_as_array = {0}; | 
118  | 5.66k  |     b_as_array.resize(b.cardinality());  | 
119  | 5.66k  |     b.isEmpty();  | 
120  | 5.66k  |     b.toUint32Array(b_as_array.data());  | 
121  |  |  | 
122  | 5.66k  |     a.isSubset(b);  | 
123  | 5.66k  |     a.isStrictSubset(b);  | 
124  | 5.66k  |     b.flip(fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end),  | 
125  | 5.66k  |            fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end));  | 
126  | 5.66k  |     b.flipClosed(fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end),  | 
127  | 5.66k  |                  fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end));  | 
128  | 5.66k  |     b.removeRunCompression();  | 
129  |  |  | 
130  |  |     // Move/copy constructors  | 
131  | 5.66k  |     roaring::Roaring copied = b;  | 
132  | 5.66k  |     roaring::Roaring moved = std::move(b);  | 
133  |  |  | 
134  |  |     // Asignment operators  | 
135  | 5.66k  |     b = copied;  | 
136  | 5.66k  |     b = std::move(moved);  | 
137  |  |  | 
138  |  |     // Safe read from serialized  | 
139  | 5.66k  |     std::vector<char> read_buffer = fdp.ConsumeBytes<char>(100);  | 
140  | 5.66k  |     try { | 
141  | 5.66k  |         roaring::Roaring read_safely =  | 
142  | 5.66k  |             roaring::Roaring::readSafe(read_buffer.data(), read_buffer.size());  | 
143  |  |         // The above is guaranteed to be safe. However, read_safely is maybe  | 
144  |  |         // in an improper state and it cannot be used safely (including for  | 
145  |  |         // reserialization).  | 
146  | 5.66k  |     } catch (...) { | 
147  | 5.65k  |     }  | 
148  |  |  | 
149  |  |     // The bitmap b can be serialized and re-read.  | 
150  | 5.66k  |     std::size_t expected_size_in_bytes = b.getSizeInBytes();  | 
151  | 5.66k  |     std::vector<char> buffer(expected_size_in_bytes);  | 
152  | 5.66k  |     std::size_t size_in_bytes = b.write(buffer.data());  | 
153  | 5.66k  |     assert(expected_size_in_bytes == size_in_bytes);  | 
154  | 5.66k  |     roaring::Roaring bread =  | 
155  | 5.66k  |         roaring::Roaring::readSafe(buffer.data(), size_in_bytes);  | 
156  | 5.66k  |     assert(bread == b);  | 
157  |  |  | 
158  | 5.66k  |     f.toString();  | 
159  |  |  | 
160  | 5.66k  |     volatile int unused = 0;  | 
161  |  |  | 
162  | 312k  |     for (roaring::Roaring::const_iterator i = a.begin(); i != a.end(); i++) { | 
163  | 306k  |         unused++;  | 
164  | 306k  |     }  | 
165  |  |  | 
166  | 5.66k  |     roaring::Roaring::const_iterator b_iter = b.begin();  | 
167  | 5.66k  |     b_iter.equalorlarger(  | 
168  | 5.66k  |         fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end));  | 
169  |  |  | 
170  | 5.66k  |     return 0;  | 
171  | 5.66k  | }  |