Coverage Report

Created: 2025-07-09 06:54

/src/mercurial/contrib/fuzz/FuzzedDataProvider.h
Line
Count
Source (jump to first uncovered line)
1
//===- FuzzedDataProvider.h - Utility header for fuzz targets ---*- C++ -* ===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
// A single header library providing an utility class to break up an array of
9
// bytes. Whenever run on the same input, provides the same output, as long as
10
// its methods are called in the same order, with the same arguments.
11
//===----------------------------------------------------------------------===//
12
13
#ifndef LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_
14
#define LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_
15
16
#include <algorithm>
17
#include <climits>
18
#include <cstddef>
19
#include <cstdint>
20
#include <cstring>
21
#include <initializer_list>
22
#include <string>
23
#include <type_traits>
24
#include <utility>
25
#include <vector>
26
27
// In addition to the comments below, the API is also briefly documented at
28
// https://github.com/google/fuzzing/blob/master/docs/split-inputs.md#fuzzed-data-provider
29
class FuzzedDataProvider
30
{
31
      public:
32
  // |data| is an array of length |size| that the FuzzedDataProvider wraps
33
  // to provide more granular access. |data| must outlive the
34
  // FuzzedDataProvider.
35
  FuzzedDataProvider(const uint8_t *data, size_t size)
36
4.74k
      : data_ptr_(data), remaining_bytes_(size)
37
4.74k
  {
38
4.74k
  }
39
  ~FuzzedDataProvider() = default;
40
41
  // Returns a std::vector containing |num_bytes| of input data. If fewer
42
  // than |num_bytes| of data remain, returns a shorter std::vector
43
  // containing all of the data that's left. Can be used with any byte
44
  // sized type, such as char, unsigned char, uint8_t, etc.
45
  template <typename T> std::vector<T> ConsumeBytes(size_t num_bytes)
46
  {
47
    num_bytes = std::min(num_bytes, remaining_bytes_);
48
    return ConsumeBytes<T>(num_bytes, num_bytes);
49
  }
50
51
  // Similar to |ConsumeBytes|, but also appends the terminator value at
52
  // the end of the resulting vector. Useful, when a mutable
53
  // null-terminated C-string is needed, for example. But that is a rare
54
  // case. Better avoid it, if possible, and prefer using |ConsumeBytes|
55
  // or |ConsumeBytesAsString| methods.
56
  template <typename T>
57
  std::vector<T> ConsumeBytesWithTerminator(size_t num_bytes,
58
                                            T terminator = 0)
59
  {
60
    num_bytes = std::min(num_bytes, remaining_bytes_);
61
    std::vector<T> result =
62
        ConsumeBytes<T>(num_bytes + 1, num_bytes);
63
    result.back() = terminator;
64
    return result;
65
  }
66
67
  // Returns a std::string containing |num_bytes| of input data. Using
68
  // this and
69
  // |.c_str()| on the resulting string is the best way to get an
70
  // immutable null-terminated C string. If fewer than |num_bytes| of data
71
  // remain, returns a shorter std::string containing all of the data
72
  // that's left.
73
  std::string ConsumeBytesAsString(size_t num_bytes)
74
4.67k
  {
75
4.67k
    static_assert(sizeof(std::string::value_type) ==
76
4.67k
                      sizeof(uint8_t),
77
4.67k
                  "ConsumeBytesAsString cannot convert the data to "
78
4.67k
                  "a string.");
79
80
4.67k
    num_bytes = std::min(num_bytes, remaining_bytes_);
81
4.67k
    std::string result(
82
4.67k
        reinterpret_cast<const std::string::value_type *>(
83
4.67k
            data_ptr_),
84
4.67k
        num_bytes);
85
4.67k
    Advance(num_bytes);
86
4.67k
    return result;
87
4.67k
  }
88
89
  // Returns a number in the range [min, max] by consuming bytes from the
90
  // input data. The value might not be uniformly distributed in the given
91
  // range. If there's no input data left, always returns |min|. |min|
92
  // must be less than or equal to |max|.
93
  template <typename T> T ConsumeIntegralInRange(T min, T max)
94
364
  {
95
364
    static_assert(std::is_integral<T>::value,
96
364
                  "An integral type is required.");
97
364
    static_assert(sizeof(T) <= sizeof(uint64_t),
98
364
                  "Unsupported integral type.");
99
100
364
    if (min > max)
101
0
      abort();
102
103
    // Use the biggest type possible to hold the range and the
104
    // result.
105
364
    uint64_t range = static_cast<uint64_t>(max) - min;
106
364
    uint64_t result = 0;
107
364
    size_t offset = 0;
108
109
728
    while (offset < sizeof(T) * CHAR_BIT && (range >> offset) > 0 &&
110
728
           remaining_bytes_ != 0) {
111
      // Pull bytes off the end of the seed data.
112
      // Experimentally, this seems to allow the fuzzer to
113
      // more easily explore the input space. This makes
114
      // sense, since it works by modifying inputs that caused
115
      // new code to run, and this data is often used to
116
      // encode length of data read by |ConsumeBytes|.
117
      // Separating out read lengths makes it easier modify
118
      // the contents of the data that is actually read.
119
364
      --remaining_bytes_;
120
364
      result =
121
364
          (result << CHAR_BIT) | data_ptr_[remaining_bytes_];
122
364
      offset += CHAR_BIT;
123
364
    }
124
125
    // Avoid division by 0, in case |range + 1| results in overflow.
126
364
    if (range != std::numeric_limits<decltype(range)>::max())
127
364
      result = result % (range + 1);
128
129
364
    return static_cast<T>(min + result);
130
364
  }
131
132
  // Returns a std::string of length from 0 to |max_length|. When it runs
133
  // out of input data, returns what remains of the input. Designed to be
134
  // more stable with respect to a fuzzer inserting characters than just
135
  // picking a random length and then consuming that many bytes with
136
  // |ConsumeBytes|.
137
  std::string ConsumeRandomLengthString(size_t max_length)
138
4.38k
  {
139
    // Reads bytes from the start of |data_ptr_|. Maps "\\" to "\",
140
    // and maps "\" followed by anything else to the end of the
141
    // string. As a result of this logic, a fuzzer can insert
142
    // characters into the string, and the string will be lengthened
143
    // to include those new characters, resulting in a more stable
144
    // fuzzer than picking the length of a string independently from
145
    // picking its contents.
146
4.38k
    std::string result;
147
148
    // Reserve the anticipated capaticity to prevent several
149
    // reallocations.
150
4.38k
    result.reserve(std::min(max_length, remaining_bytes_));
151
42.7M
    for (size_t i = 0; i < max_length && remaining_bytes_ != 0;
152
42.7M
         ++i) {
153
42.7M
      char next = ConvertUnsignedToSigned<char>(data_ptr_[0]);
154
42.7M
      Advance(1);
155
42.7M
      if (next == '\\' && remaining_bytes_ != 0) {
156
4.75k
        next =
157
4.75k
            ConvertUnsignedToSigned<char>(data_ptr_[0]);
158
4.75k
        Advance(1);
159
4.75k
        if (next != '\\')
160
3.78k
          break;
161
4.75k
      }
162
42.7M
      result += next;
163
42.7M
    }
164
165
4.38k
    result.shrink_to_fit();
166
4.38k
    return result;
167
4.38k
  }
168
169
  // Returns a std::vector containing all remaining bytes of the input
170
  // data.
171
  template <typename T> std::vector<T> ConsumeRemainingBytes()
172
  {
173
    return ConsumeBytes<T>(remaining_bytes_);
174
  }
175
176
  // Returns a std::string containing all remaining bytes of the input
177
  // data. Prefer using |ConsumeRemainingBytes| unless you actually need a
178
  // std::string object.
179
  std::string ConsumeRemainingBytesAsString()
180
4.67k
  {
181
4.67k
    return ConsumeBytesAsString(remaining_bytes_);
182
4.67k
  }
183
184
  // Returns a number in the range [Type's min, Type's max]. The value
185
  // might not be uniformly distributed in the given range. If there's no
186
  // input data left, always returns |min|.
187
  template <typename T> T ConsumeIntegral()
188
364
  {
189
364
    return ConsumeIntegralInRange(std::numeric_limits<T>::min(),
190
364
                                  std::numeric_limits<T>::max());
191
364
  }
192
193
  // Reads one byte and returns a bool, or false when no data remains.
194
  bool ConsumeBool()
195
364
  {
196
364
    return 1 & ConsumeIntegral<uint8_t>();
197
364
  }
198
199
  // Returns a copy of the value selected from the given fixed-size
200
  // |array|.
201
  template <typename T, size_t size>
202
  T PickValueInArray(const T (&array)[size])
203
  {
204
    static_assert(size > 0, "The array must be non empty.");
205
    return array[ConsumeIntegralInRange<size_t>(0, size - 1)];
206
  }
207
208
  template <typename T>
209
  T PickValueInArray(std::initializer_list<const T> list)
210
  {
211
    // TODO(Dor1s): switch to static_assert once C++14 is allowed.
212
    if (!list.size())
213
      abort();
214
215
    return *(list.begin() +
216
             ConsumeIntegralInRange<size_t>(0, list.size() - 1));
217
  }
218
219
  // Returns an enum value. The enum must start at 0 and be contiguous. It
220
  // must also contain |kMaxValue| aliased to its largest (inclusive)
221
  // value. Such as: enum class Foo { SomeValue, OtherValue, kMaxValue =
222
  // OtherValue };
223
  template <typename T> T ConsumeEnum()
224
  {
225
    static_assert(std::is_enum<T>::value,
226
                  "|T| must be an enum type.");
227
    return static_cast<T>(ConsumeIntegralInRange<uint32_t>(
228
        0, static_cast<uint32_t>(T::kMaxValue)));
229
  }
230
231
  // Returns a floating point number in the range [0.0, 1.0]. If there's
232
  // no input data left, always returns 0.
233
  template <typename T> T ConsumeProbability()
234
  {
235
    static_assert(std::is_floating_point<T>::value,
236
                  "A floating point type is required.");
237
238
    // Use different integral types for different floating point
239
    // types in order to provide better density of the resulting
240
    // values.
241
    using IntegralType =
242
        typename std::conditional<(sizeof(T) <= sizeof(uint32_t)),
243
                                  uint32_t, uint64_t>::type;
244
245
    T result = static_cast<T>(ConsumeIntegral<IntegralType>());
246
    result /=
247
        static_cast<T>(std::numeric_limits<IntegralType>::max());
248
    return result;
249
  }
250
251
  // Returns a floating point value in the range [Type's lowest, Type's
252
  // max] by consuming bytes from the input data. If there's no input data
253
  // left, always returns approximately 0.
254
  template <typename T> T ConsumeFloatingPoint()
255
  {
256
    return ConsumeFloatingPointInRange<T>(
257
        std::numeric_limits<T>::lowest(),
258
        std::numeric_limits<T>::max());
259
  }
260
261
  // Returns a floating point value in the given range by consuming bytes
262
  // from the input data. If there's no input data left, returns |min|.
263
  // Note that |min| must be less than or equal to |max|.
264
  template <typename T> T ConsumeFloatingPointInRange(T min, T max)
265
  {
266
    if (min > max)
267
      abort();
268
269
    T range = .0;
270
    T result = min;
271
    constexpr T zero(.0);
272
    if (max > zero && min < zero &&
273
        max > min + std::numeric_limits<T>::max()) {
274
      // The diff |max - min| would overflow the given
275
      // floating point type. Use the half of the diff as the
276
      // range and consume a bool to decide whether the result
277
      // is in the first of the second part of the diff.
278
      range = (max / 2.0) - (min / 2.0);
279
      if (ConsumeBool()) {
280
        result += range;
281
      }
282
    } else {
283
      range = max - min;
284
    }
285
286
    return result + range * ConsumeProbability<T>();
287
  }
288
289
  // Reports the remaining bytes available for fuzzed input.
290
  size_t remaining_bytes()
291
0
  {
292
0
    return remaining_bytes_;
293
0
  }
294
295
      private:
296
  FuzzedDataProvider(const FuzzedDataProvider &) = delete;
297
  FuzzedDataProvider &operator=(const FuzzedDataProvider &) = delete;
298
299
  void Advance(size_t num_bytes)
300
42.7M
  {
301
42.7M
    if (num_bytes > remaining_bytes_)
302
0
      abort();
303
304
42.7M
    data_ptr_ += num_bytes;
305
42.7M
    remaining_bytes_ -= num_bytes;
306
42.7M
  }
307
308
  template <typename T>
309
  std::vector<T> ConsumeBytes(size_t size, size_t num_bytes_to_consume)
310
  {
311
    static_assert(sizeof(T) == sizeof(uint8_t),
312
                  "Incompatible data type.");
313
314
    // The point of using the size-based constructor below is to
315
    // increase the odds of having a vector object with capacity
316
    // being equal to the length. That part is always implementation
317
    // specific, but at least both libc++ and libstdc++ allocate the
318
    // requested number of bytes in that constructor, which seems to
319
    // be a natural choice for other implementations as well. To
320
    // increase the odds even more, we also call |shrink_to_fit|
321
    // below.
322
    std::vector<T> result(size);
323
    if (size == 0) {
324
      if (num_bytes_to_consume != 0)
325
        abort();
326
      return result;
327
    }
328
329
    std::memcpy(result.data(), data_ptr_, num_bytes_to_consume);
330
    Advance(num_bytes_to_consume);
331
332
    // Even though |shrink_to_fit| is also implementation specific,
333
    // we expect it to provide an additional assurance in case
334
    // vector's constructor allocated a buffer which is larger than
335
    // the actual amount of data we put inside it.
336
    result.shrink_to_fit();
337
    return result;
338
  }
339
340
  template <typename TS, typename TU> TS ConvertUnsignedToSigned(TU value)
341
42.7M
  {
342
42.7M
    static_assert(sizeof(TS) == sizeof(TU),
343
42.7M
                  "Incompatible data types.");
344
42.7M
    static_assert(!std::numeric_limits<TU>::is_signed,
345
42.7M
                  "Source type must be unsigned.");
346
347
    // TODO(Dor1s): change to `if constexpr` once C++17 becomes
348
    // mainstream.
349
42.7M
    if (std::numeric_limits<TS>::is_modulo)
350
0
      return static_cast<TS>(value);
351
352
    // Avoid using implementation-defined unsigned to signer
353
    // conversions. To learn more, see
354
    // https://stackoverflow.com/questions/13150449.
355
42.7M
    if (value <= std::numeric_limits<TS>::max()) {
356
41.0M
      return static_cast<TS>(value);
357
41.0M
    } else {
358
1.67M
      constexpr auto TS_min = std::numeric_limits<TS>::min();
359
1.67M
      return TS_min + static_cast<char>(value - TS_min);
360
1.67M
    }
361
42.7M
  }
362
363
  const uint8_t *data_ptr_;
364
  size_t remaining_bytes_;
365
};
366
367
#endif // LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_
368
// no-check-code since this is from a third party