Coverage Report

Created: 2026-01-16 06:36

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/simdjson/fuzz/FuzzUtils.h
Line
Count
Source
1
#ifndef SIMDJSON_FUZZUTILS_H
2
#define SIMDJSON_FUZZUTILS_H
3
4
#include <cstdint>
5
#include <vector>
6
#include <string_view>
7
#include <cstring> //memcpy
8
9
// view data as a byte pointer
10
template <typename T> inline const std::uint8_t* as_bytes(const T* data) {
11
  return static_cast<const std::uint8_t*>(static_cast<const void*>(data));
12
}
13
14
// view data as a char pointer
15
template <typename T> inline const char* as_chars(const T* data) {
16
  return static_cast<const char*>(static_cast<const void*>(data));
17
}
18
19
20
21
22
// Splits the input into strings, using a four byte separator which is human
23
// readable. Makes for nicer debugging of fuzz data.
24
// See https://github.com/google/fuzzing/blob/master/docs/split-inputs.md#magic-separator
25
// for background. Note: don't use memmem, it is not standard C++.
26
26.2k
inline std::vector<std::string_view> split(const char* Data, size_t Size) {
27
28
26.2k
  std::vector<std::string_view> ret;
29
30
26.2k
    using namespace std::literals;
31
26.2k
    constexpr auto sep="\n~~\n"sv;
32
33
26.2k
    std::string_view all(Data,Size);
34
26.2k
    auto pos=all.find(sep);
35
2.86M
    while(pos!=std::string_view::npos) {
36
2.84M
      ret.push_back(all.substr(0,pos));
37
2.84M
      all=all.substr(pos+sep.size());
38
2.84M
      pos=all.find(sep);
39
2.84M
    }
40
26.2k
    ret.push_back(all);
41
26.2k
    return ret;
42
26.2k
}
43
44
// Generic helper to split fuzz data into usable parts, like ints etc.
45
// Note that it does not throw, instead it sets the data pointer to null
46
// if the input is exhausted.
47
struct FuzzData {
48
  // data may not be null, even if size is zero.
49
  FuzzData(const uint8_t* data,
50
35.4k
           size_t size) : Data(data),Size(size){}
51
52
  ///range is inclusive
53
  template<int Min, int Max>
54
37.0k
  int getInt() {
55
37.0k
    static_assert (Min<Max,"min must be <max");
56
57
    // make this constexpr, can't overflow because that is UB and is forbidden
58
    // in constexpr evaluation
59
37.0k
    constexpr int range=(Max-Min)+1;
60
37.0k
    constexpr unsigned int urange=range;
61
62
    // don't use std::uniform_int_distribution, we don't want to pay for
63
    // over consumption of random data. Accept the slightly non-uniform distribution.
64
37.0k
    if(range<256)
65
28.8k
      return Min+static_cast<int>(get<uint8_t>()%urange);
66
8.26k
    if(range<65536)
67
8.26k
      return Min+static_cast<int>(get<uint16_t>()%urange);
68
69
0
    return Min+static_cast<int>(get<uint32_t>()%urange);
70
8.26k
  }
int FuzzData::getInt<0, 10>()
Line
Count
Source
54
929
  int getInt() {
55
929
    static_assert (Min<Max,"min must be <max");
56
57
    // make this constexpr, can't overflow because that is UB and is forbidden
58
    // in constexpr evaluation
59
929
    constexpr int range=(Max-Min)+1;
60
929
    constexpr unsigned int urange=range;
61
62
    // don't use std::uniform_int_distribution, we don't want to pay for
63
    // over consumption of random data. Accept the slightly non-uniform distribution.
64
929
    if(range<256)
65
929
      return Min+static_cast<int>(get<uint8_t>()%urange);
66
0
    if(range<65536)
67
0
      return Min+static_cast<int>(get<uint16_t>()%urange);
68
69
0
    return Min+static_cast<int>(get<uint32_t>()%urange);
70
0
  }
int FuzzData::getInt<0, 31>()
Line
Count
Source
54
11.4k
  int getInt() {
55
11.4k
    static_assert (Min<Max,"min must be <max");
56
57
    // make this constexpr, can't overflow because that is UB and is forbidden
58
    // in constexpr evaluation
59
11.4k
    constexpr int range=(Max-Min)+1;
60
11.4k
    constexpr unsigned int urange=range;
61
62
    // don't use std::uniform_int_distribution, we don't want to pay for
63
    // over consumption of random data. Accept the slightly non-uniform distribution.
64
11.4k
    if(range<256)
65
11.4k
      return Min+static_cast<int>(get<uint8_t>()%urange);
66
0
    if(range<65536)
67
0
      return Min+static_cast<int>(get<uint16_t>()%urange);
68
69
0
    return Min+static_cast<int>(get<uint32_t>()%urange);
70
0
  }
int FuzzData::getInt<0, 7>()
Line
Count
Source
54
11.4k
  int getInt() {
55
11.4k
    static_assert (Min<Max,"min must be <max");
56
57
    // make this constexpr, can't overflow because that is UB and is forbidden
58
    // in constexpr evaluation
59
11.4k
    constexpr int range=(Max-Min)+1;
60
11.4k
    constexpr unsigned int urange=range;
61
62
    // don't use std::uniform_int_distribution, we don't want to pay for
63
    // over consumption of random data. Accept the slightly non-uniform distribution.
64
11.4k
    if(range<256)
65
11.4k
      return Min+static_cast<int>(get<uint8_t>()%urange);
66
0
    if(range<65536)
67
0
      return Min+static_cast<int>(get<uint16_t>()%urange);
68
69
0
    return Min+static_cast<int>(get<uint32_t>()%urange);
70
0
  }
int FuzzData::getInt<0, 12>()
Line
Count
Source
54
4.98k
  int getInt() {
55
4.98k
    static_assert (Min<Max,"min must be <max");
56
57
    // make this constexpr, can't overflow because that is UB and is forbidden
58
    // in constexpr evaluation
59
4.98k
    constexpr int range=(Max-Min)+1;
60
4.98k
    constexpr unsigned int urange=range;
61
62
    // don't use std::uniform_int_distribution, we don't want to pay for
63
    // over consumption of random data. Accept the slightly non-uniform distribution.
64
4.98k
    if(range<256)
65
4.98k
      return Min+static_cast<int>(get<uint8_t>()%urange);
66
0
    if(range<65536)
67
0
      return Min+static_cast<int>(get<uint16_t>()%urange);
68
69
0
    return Min+static_cast<int>(get<uint32_t>()%urange);
70
0
  }
int FuzzData::getInt<0, 1000>()
Line
Count
Source
54
8.26k
  int getInt() {
55
8.26k
    static_assert (Min<Max,"min must be <max");
56
57
    // make this constexpr, can't overflow because that is UB and is forbidden
58
    // in constexpr evaluation
59
8.26k
    constexpr int range=(Max-Min)+1;
60
8.26k
    constexpr unsigned int urange=range;
61
62
    // don't use std::uniform_int_distribution, we don't want to pay for
63
    // over consumption of random data. Accept the slightly non-uniform distribution.
64
8.26k
    if(range<256)
65
0
      return Min+static_cast<int>(get<uint8_t>()%urange);
66
8.26k
    if(range<65536)
67
8.26k
      return Min+static_cast<int>(get<uint16_t>()%urange);
68
69
0
    return Min+static_cast<int>(get<uint32_t>()%urange);
70
8.26k
  }
71
72
  template<typename T>
73
50.4k
  T get() {
74
50.4k
    const auto Nbytes=sizeof(T);
75
50.4k
    T ret{};
76
50.4k
    if(Size<Nbytes) {
77
      //don't throw, signal with null instead.
78
616
      Data=nullptr;
79
616
      Size=0;
80
616
      return ret;
81
616
    }
82
49.7k
    std::memcpy(&ret,Data,Nbytes);
83
49.7k
    Data+=Nbytes;
84
49.7k
    Size-=Nbytes;
85
49.7k
    return ret;
86
50.4k
  }
unsigned char FuzzData::get<unsigned char>()
Line
Count
Source
73
28.8k
  T get() {
74
28.8k
    const auto Nbytes=sizeof(T);
75
28.8k
    T ret{};
76
28.8k
    if(Size<Nbytes) {
77
      //don't throw, signal with null instead.
78
1
      Data=nullptr;
79
1
      Size=0;
80
1
      return ret;
81
1
    }
82
28.8k
    std::memcpy(&ret,Data,Nbytes);
83
28.8k
    Data+=Nbytes;
84
28.8k
    Size-=Nbytes;
85
28.8k
    return ret;
86
28.8k
  }
unsigned long FuzzData::get<unsigned long>()
Line
Count
Source
73
13.3k
  T get() {
74
13.3k
    const auto Nbytes=sizeof(T);
75
13.3k
    T ret{};
76
13.3k
    if(Size<Nbytes) {
77
      //don't throw, signal with null instead.
78
613
      Data=nullptr;
79
613
      Size=0;
80
613
      return ret;
81
613
    }
82
12.7k
    std::memcpy(&ret,Data,Nbytes);
83
12.7k
    Data+=Nbytes;
84
12.7k
    Size-=Nbytes;
85
12.7k
    return ret;
86
13.3k
  }
Unexecuted instantiation: unsigned int FuzzData::get<unsigned int>()
unsigned short FuzzData::get<unsigned short>()
Line
Count
Source
73
8.26k
  T get() {
74
8.26k
    const auto Nbytes=sizeof(T);
75
8.26k
    T ret{};
76
8.26k
    if(Size<Nbytes) {
77
      //don't throw, signal with null instead.
78
2
      Data=nullptr;
79
2
      Size=0;
80
2
      return ret;
81
2
    }
82
8.26k
    std::memcpy(&ret,Data,Nbytes);
83
8.26k
    Data+=Nbytes;
84
8.26k
    Size-=Nbytes;
85
8.26k
    return ret;
86
8.26k
  }
87
88
  // gets a string view with length in [Min,Max]
89
  template<int Min, int Max>
90
  std::string_view get_stringview() {
91
    static_assert (Min>=0,"Min must be positive");
92
    const int len=getInt<Min,Max>();
93
    const unsigned int ulen=static_cast<unsigned int>(len);
94
    if(ulen<Size) {
95
      std::string_view ret(chardata(),ulen);
96
      Data+=len;
97
      Size-=ulen;
98
      return ret;
99
    }
100
101
    //mark that there is too little data to fulfill the request
102
    Data=nullptr;
103
    Size=0;
104
105
    return {};
106
  }
107
108
  // consumes the rest of the data as a string view
109
8.26k
  std::string_view remainder_as_stringview() {
110
8.26k
    std::string_view ret{chardata(),Size};
111
8.26k
    Data+=Size;
112
8.26k
    Size=0;
113
8.26k
    return ret;
114
8.26k
  }
115
116
  // split the remainder of the data into string views,
117
26.2k
  std::vector<std::string_view> splitIntoStrings() {
118
26.2k
    std::vector<std::string_view> ret;
119
26.2k
    if(Size>0) {
120
26.2k
      ret=split(chardata(),Size);
121
      // all data consumed.
122
26.2k
      Data+=Size;
123
26.2k
      Size=0;
124
26.2k
    }
125
26.2k
    return ret;
126
26.2k
  }
127
128
  //are we good?
129
11.4k
  explicit operator bool() const { return Data!=nullptr;}
130
131
  //we are a URBG
132
  // https://en.cppreference.com/w/cpp/named_req/UniformRandomBitGenerator
133
  //The type G satisfies UniformRandomBitGenerator if    Given
134
  //   T, the type named by G::result_type
135
  //    g, a value of type G
136
  //
137
  //  The following expressions must be valid and have their specified effects
138
  //  Expression  Return type   Requirements
139
  //  G::result_type  T   T is an unsigned integer type
140
  using result_type=uint8_t;
141
  //  G::min()  T   Returns the smallest value that G's operator() may return. The value is strictly less than G::max(). The function must be constexpr.
142
0
  static constexpr result_type min() {return 0;}
143
  //  G::max()  T   Returns the largest value that G's operator() may return. The value is strictly greater than G::min(). The function must be constexpr.
144
0
  static constexpr result_type max() {return 255;}
145
  //  g()   T   Returns a value in the closed interval [G::min(), G::max()]. Has amortized constant complexity.
146
0
  result_type operator()() {
147
0
    if(Size==0) {
148
0
      // return something varying, otherwise uniform_int_distribution may get
149
0
      // stuck
150
0
      return failcount++;
151
0
    }
152
0
    const result_type ret=Data[0];
153
0
    Data++;
154
0
    Size--;
155
0
    return ret;
156
0
  }
157
  // returns a pointer to data as const char* to avoid those cstyle casts
158
34.6k
  const char* chardata() const {return static_cast<const char*>(static_cast<const void*>(Data));}
159
  // members
160
  const uint8_t* Data;
161
  size_t Size;
162
  uint8_t failcount=0;
163
};
164
165
166
#endif // SIMDJSON_FUZZUTILS_H