/src/croaring_fuzzer_cc.cc
Line | Count | Source |
1 | | // Copyright 2023 Google LLC |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | // |
15 | | //////////////////////////////////////////////////////////////////////////////// |
16 | | |
17 | | #include <vector> |
18 | | |
19 | | #include "cpp/roaring.hh" |
20 | | #include "fuzzer/FuzzedDataProvider.h" |
21 | | |
22 | | std::vector<uint32_t> ConsumeVecInRange(FuzzedDataProvider &fdp, size_t length, |
23 | | uint32_t min_value, |
24 | 10.7k | uint32_t max_value) { |
25 | 10.7k | std::vector<uint32_t> result = {0}; |
26 | 10.7k | result.resize(length); |
27 | 5.36M | std::generate(result.begin(), result.end(), [&]() { |
28 | 5.36M | return fdp.ConsumeIntegralInRange<uint32_t>(min_value, max_value); |
29 | 5.36M | }); |
30 | 10.7k | return result; |
31 | 10.7k | } |
32 | | |
33 | 5.36k | extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { |
34 | | /** |
35 | | * A bitmap may contain up to 2**32 elements. Later this function will |
36 | | * output the content to an array where each element uses 32 bits of |
37 | | * storage. That would use 16 GB. Thus this function is bound to run out of |
38 | | * memory. |
39 | | * |
40 | | * Even without the full serialization to a 32-bit array, a bitmap may still |
41 | | * use over 512 MB in the normal course of operation: that is to be expected |
42 | | * since it can represent all sets of integers in [0,2**32]. This function |
43 | | * may hold several bitmaps in memory at once, so it can require gigabytes |
44 | | * of memory (without bugs). Hence, unless it has a generous memory |
45 | | * capacity, this function will run out of memory almost certainly. |
46 | | * |
47 | | * For sanity, we may limit the range to, say, 10,000,000 which will use 38 |
48 | | * MB or so. With such a limited range, if we run out of memory, then we can |
49 | | * almost certain that it has to do with a genuine bug. |
50 | | */ |
51 | | |
52 | 5.36k | uint32_t range_start = 0; |
53 | 5.36k | uint32_t range_end = 10'000'000; |
54 | | |
55 | | /** |
56 | | * We are not solely dependent on the range [range_start, range_end) because |
57 | | * ConsumeVecInRange below produce integers in a small range starting at 0. |
58 | | */ |
59 | | |
60 | 5.36k | FuzzedDataProvider fdp(data, size); |
61 | | /** |
62 | | * The next line was ConsumeVecInRange(fdp, 500, 0, 1000) but it would pick |
63 | | * 500 values at random from 0, 1000, making almost certain that all of the |
64 | | * values are picked. It seems more useful to pick 500 values in the range |
65 | | * 0,1000. |
66 | | */ |
67 | 5.36k | std::vector<uint32_t> bitmap_data_a = ConsumeVecInRange(fdp, 500, 0, 1000); |
68 | 5.36k | roaring::Roaring a(bitmap_data_a.size(), bitmap_data_a.data()); |
69 | 5.36k | a.runOptimize(); |
70 | 5.36k | a.shrinkToFit(); |
71 | | |
72 | 5.36k | std::vector<uint32_t> bitmap_data_b = ConsumeVecInRange(fdp, 500, 0, 1000); |
73 | 5.36k | roaring::Roaring b(bitmap_data_b.size(), bitmap_data_b.data()); |
74 | 5.36k | b.runOptimize(); |
75 | 5.36k | b.add(fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end)); |
76 | 5.36k | b.addChecked(fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end)); |
77 | 5.36k | b.addRange(fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end), |
78 | 5.36k | fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end)); |
79 | | // add half of a to b. |
80 | 5.36k | b.addMany(bitmap_data_a.size() / 2, bitmap_data_a.data()); |
81 | 5.36k | b.remove(fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end)); |
82 | 5.36k | b.removeChecked( |
83 | 5.36k | fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end)); |
84 | 5.36k | b.removeRange(fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end), |
85 | 5.36k | fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end)); |
86 | 5.36k | b.removeRangeClosed( |
87 | 5.36k | fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end), |
88 | 5.36k | fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end)); |
89 | 5.36k | b.maximum(); |
90 | 5.36k | b.minimum(); |
91 | 5.36k | b.contains(fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end)); |
92 | 5.36k | b.containsRange( |
93 | 5.36k | fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end), |
94 | 5.36k | fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end)); |
95 | | |
96 | 5.36k | uint32_t element = 0; |
97 | 5.36k | a.select(fdp.ConsumeIntegralInRange<uint32_t>(0, 1000), &element); |
98 | 5.36k | a.intersect(b); |
99 | 5.36k | a.jaccard_index(b); |
100 | 5.36k | a.or_cardinality(b); |
101 | 5.36k | a.andnot_cardinality(b); |
102 | 5.36k | a.xor_cardinality(b); |
103 | 5.36k | a.rank(fdp.ConsumeIntegralInRange<uint32_t>(0, 5000)); |
104 | 5.36k | a.getSizeInBytes(); |
105 | | |
106 | 5.36k | roaring::Roaring c = a & b; |
107 | 5.36k | roaring::Roaring d = a - b; |
108 | 5.36k | roaring::Roaring e = a | b; |
109 | 5.36k | roaring::Roaring f = a ^ b; |
110 | 5.36k | a |= e; |
111 | 5.36k | a &= b; |
112 | 5.36k | a -= c; |
113 | 5.36k | a ^= f; |
114 | | |
115 | 5.36k | volatile bool is_equal = (a == b); |
116 | | |
117 | 5.36k | std::vector<uint32_t> b_as_array = {0}; |
118 | 5.36k | b_as_array.resize(b.cardinality()); |
119 | 5.36k | b.isEmpty(); |
120 | 5.36k | b.toUint32Array(b_as_array.data()); |
121 | | |
122 | 5.36k | a.isSubset(b); |
123 | 5.36k | a.isStrictSubset(b); |
124 | 5.36k | b.flip(fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end), |
125 | 5.36k | fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end)); |
126 | 5.36k | b.flipClosed(fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end), |
127 | 5.36k | fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end)); |
128 | 5.36k | b.removeRunCompression(); |
129 | | |
130 | | // Move/copy constructors |
131 | 5.36k | roaring::Roaring copied = b; |
132 | 5.36k | roaring::Roaring moved = std::move(b); |
133 | | |
134 | | // Asignment operators |
135 | 5.36k | b = copied; |
136 | 5.36k | b = std::move(moved); |
137 | | |
138 | | // Safe read from serialized |
139 | 5.36k | std::vector<char> read_buffer = fdp.ConsumeBytes<char>(100); |
140 | 5.36k | try { |
141 | 5.36k | roaring::Roaring read_safely = |
142 | 5.36k | roaring::Roaring::readSafe(read_buffer.data(), read_buffer.size()); |
143 | | // The above is guaranteed to be safe. However, read_safely is maybe |
144 | | // in an improper state and it cannot be used safely (including for |
145 | | // reserialization). |
146 | 5.36k | } catch (...) { |
147 | 5.35k | } |
148 | | |
149 | | // The bitmap b can be serialized and re-read. |
150 | 5.36k | std::size_t expected_size_in_bytes = b.getSizeInBytes(); |
151 | 5.36k | std::vector<char> buffer(expected_size_in_bytes); |
152 | 5.36k | std::size_t size_in_bytes = b.write(buffer.data()); |
153 | 5.36k | assert(expected_size_in_bytes == size_in_bytes); |
154 | 5.36k | roaring::Roaring bread = |
155 | 5.36k | roaring::Roaring::readSafe(buffer.data(), size_in_bytes); |
156 | 5.36k | assert(bread == b); |
157 | | |
158 | 5.36k | f.toString(); |
159 | | |
160 | 5.36k | volatile int unused = 0; |
161 | | |
162 | 278k | for (roaring::Roaring::const_iterator i = a.begin(); i != a.end(); i++) { |
163 | 272k | unused++; |
164 | 272k | } |
165 | | |
166 | 5.36k | roaring::Roaring::const_iterator b_iter = b.begin(); |
167 | 5.36k | b_iter.equalorlarger( |
168 | 5.36k | fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end)); |
169 | | |
170 | 5.36k | return 0; |
171 | 5.36k | } |