Coverage Report

Created: 2025-11-24 06:14

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/flatbuffers/tests/fuzzer/flatbuffers_scalar_fuzzer.cc
Line
Count
Source
1
/*
2
 * Copyright 2014 Google Inc. All rights reserved.
3
 *
4
 * Licensed under the Apache License, Version 2.0 (the "License");
5
 * you may not use this file except in compliance with the License.
6
 * You may obtain a copy of the License at
7
 *
8
 *     http://www.apache.org/licenses/LICENSE-2.0
9
 *
10
 * Unless required by applicable law or agreed to in writing, software
11
 * distributed under the License is distributed on an "AS IS" BASIS,
12
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
 * See the License for the specific language governing permissions and
14
 * limitations under the License.
15
 */
16
17
#include <assert.h>
18
#include <stddef.h>
19
#include <stdint.h>
20
21
#include <algorithm>
22
#include <clocale>
23
#include <memory>
24
#include <regex>
25
#include <string>
26
27
#include "flatbuffers/idl.h"
28
#include "test_init.h"
29
30
static constexpr size_t kMinInputLength = 1;
31
static constexpr size_t kMaxInputLength = 3000;
32
33
static constexpr uint8_t flags_scalar_type = 0x0F;  // type of scalar value
34
static constexpr uint8_t flags_quotes_kind = 0x10;  // quote " or '
35
// reserved for future: json {named} or [unnamed]
36
// static constexpr uint8_t flags_json_bracer = 0x20;
37
38
// Find all 'subj' sub-strings and replace first character of sub-string.
39
// BreakSequence("testest","tes", 'X') -> "XesXest".
40
// BreakSequence("xxx","xx", 'Y') -> "YYx".
41
53.3k
static void BreakSequence(std::string& s, const char* subj, char repl) {
42
53.3k
  size_t pos = 0;
43
59.3k
  while (pos = s.find(subj, pos), pos != std::string::npos) {
44
6.01k
    s.at(pos) = repl;
45
6.01k
    pos++;
46
6.01k
  }
47
53.3k
}
48
49
// Remove all leading and trailing symbols matched with pattern set.
50
// StripString("xy{xy}y", "xy") -> "{xy}"
51
static std::string StripString(const std::string& s, const char* pattern,
52
9.03k
                               size_t* pos = nullptr) {
53
9.03k
  if (pos) *pos = 0;
54
  // leading
55
9.03k
  auto first = s.find_first_not_of(pattern);
56
9.03k
  if (std::string::npos == first) return "";
57
8.96k
  if (pos) *pos = first;
58
  // trailing
59
8.96k
  auto last = s.find_last_not_of(pattern);
60
8.96k
  assert(last < s.length());
61
8.96k
  assert(first <= last);
62
8.96k
  return s.substr(first, last - first + 1);
63
8.96k
}
64
65
class RegexMatcher {
66
 protected:
67
  virtual bool MatchNumber(const std::string& input) const = 0;
68
69
 public:
70
4.85k
  virtual ~RegexMatcher() = default;
71
72
  struct MatchResult {
73
    size_t pos{0};
74
    size_t len{0};
75
    bool res{false};
76
    bool quoted{false};
77
  };
78
79
4.85k
  MatchResult Match(const std::string& input) const {
80
4.85k
    MatchResult r;
81
    // strip leading and trailing "spaces" accepted by flatbuffer
82
4.85k
    auto test = StripString(input, "\t\r\n ", &r.pos);
83
4.85k
    r.len = test.size();
84
    // check quotes
85
4.85k
    if (test.size() >= 2) {
86
4.23k
      auto fch = test.front();
87
4.23k
      auto lch = test.back();
88
4.23k
      r.quoted = (fch == lch) && (fch == '\'' || fch == '\"');
89
4.23k
      if (r.quoted) {
90
        // remove quotes for regex test
91
106
        test = test.substr(1, test.size() - 2);
92
106
      }
93
4.23k
    }
94
    // Fast check:
95
4.85k
    if (test.empty()) return r;
96
    // A string with a valid scalar shouldn't have non-ascii or non-printable
97
    // symbols.
98
374k
    for (auto c : test) {
99
374k
      if ((c < ' ') || (c > '~')) return r;
100
374k
    }
101
    // Check with regex
102
4.19k
    r.res = MatchNumber(test);
103
4.19k
    return r;
104
4.78k
  }
105
106
  bool MatchRegexList(const std::string& input,
107
4.17k
                      const std::vector<std::regex>& re_list) const {
108
4.17k
    auto str = StripString(input, " ");
109
4.17k
    if (str.empty()) return false;
110
8.84k
    for (auto& re : re_list) {
111
8.84k
      std::smatch match;
112
8.84k
      if (std::regex_match(str, match, re)) return true;
113
8.84k
    }
114
2.34k
    return false;
115
4.17k
  }
116
};
117
118
class IntegerRegex : public RegexMatcher {
119
 protected:
120
1.76k
  bool MatchNumber(const std::string& input) const override {
121
1.76k
    static const std::vector<std::regex> re_list = {
122
1.76k
        std::regex{R"(^[-+]?[0-9]+$)", std::regex_constants::optimize},
123
124
1.76k
        std::regex{R"(^[-+]?0[xX][0-9a-fA-F]+$)",
125
1.76k
                   std::regex_constants::optimize}};
126
1.76k
    return MatchRegexList(input, re_list);
127
1.76k
  }
128
129
 public:
130
1.99k
  IntegerRegex() = default;
131
  virtual ~IntegerRegex() = default;
132
};
133
134
class UIntegerRegex : public RegexMatcher {
135
 protected:
136
1.40k
  bool MatchNumber(const std::string& input) const override {
137
1.40k
    static const std::vector<std::regex> re_list = {
138
1.40k
        std::regex{R"(^[+]?[0-9]+$)", std::regex_constants::optimize},
139
1.40k
        std::regex{R"(^[+]?0[xX][0-9a-fA-F]+$)",
140
1.40k
                   std::regex_constants::optimize},
141
        // accept -0 number
142
1.40k
        std::regex{R"(^[-](?:0[xX])?0+$)", std::regex_constants::optimize}};
143
1.40k
    return MatchRegexList(input, re_list);
144
1.40k
  }
145
146
 public:
147
1.56k
  UIntegerRegex() = default;
148
  virtual ~UIntegerRegex() = default;
149
};
150
151
class BooleanRegex : public IntegerRegex {
152
 protected:
153
302
  bool MatchNumber(const std::string& input) const override {
154
302
    if (input == "true" || input == "false") return true;
155
288
    return IntegerRegex::MatchNumber(input);
156
302
  }
157
158
 public:
159
364
  BooleanRegex() = default;
160
  virtual ~BooleanRegex() = default;
161
};
162
163
class FloatRegex : public RegexMatcher {
164
 protected:
165
1.01k
  bool MatchNumber(const std::string& input) const override {
166
1.01k
    static const std::vector<std::regex> re_list = {
167
        // hex-float
168
1.01k
        std::regex{
169
1.01k
            R"(^[-+]?0[xX](?:(?:[.][0-9a-fA-F]+)|(?:[0-9a-fA-F]+[.][0-9a-fA-F]*)|(?:[0-9a-fA-F]+))[pP][-+]?[0-9]+$)",
170
1.01k
            std::regex_constants::optimize},
171
        // dec-float
172
1.01k
        std::regex{
173
1.01k
            R"(^[-+]?(?:(?:[.][0-9]+)|(?:[0-9]+[.][0-9]*)|(?:[0-9]+))(?:[eE][-+]?[0-9]+)?$)",
174
1.01k
            std::regex_constants::optimize},
175
176
1.01k
        std::regex{
177
1.01k
            R"(^[-+]?(?:nan|inf|infinity)$)",
178
1.01k
            std::regex_constants::optimize | std::regex_constants::icase}};
179
1.01k
    return MatchRegexList(input, re_list);
180
1.01k
  }
181
182
 public:
183
1.29k
  FloatRegex() = default;
184
  virtual ~FloatRegex() = default;
185
};
186
187
class ScalarReferenceResult {
188
 private:
189
  ScalarReferenceResult(const char* _type, RegexMatcher::MatchResult _matched)
190
4.85k
      : type(_type), matched(_matched) {}
191
192
 public:
193
  // Decode scalar type and check if the input string satisfies the scalar type.
194
4.85k
  static ScalarReferenceResult Check(uint8_t code, const std::string& input) {
195
4.85k
    switch (code) {
196
432
      case 0x0:
197
432
        return {"double", FloatRegex().Match(input)};
198
201
      case 0x1:
199
201
        return {"float", FloatRegex().Match(input)};
200
412
      case 0x2:
201
412
        return {"int8", IntegerRegex().Match(input)};
202
394
      case 0x3:
203
394
        return {"int16", IntegerRegex().Match(input)};
204
465
      case 0x4:
205
465
        return {"int32", IntegerRegex().Match(input)};
206
364
      case 0x5:
207
364
        return {"int64", IntegerRegex().Match(input)};
208
265
      case 0x6:
209
265
        return {"uint8", UIntegerRegex().Match(input)};
210
403
      case 0x7:
211
403
        return {"uint16", UIntegerRegex().Match(input)};
212
412
      case 0x8:
213
412
        return {"uint32", UIntegerRegex().Match(input)};
214
481
      case 0x9:
215
481
        return {"uint64", UIntegerRegex().Match(input)};
216
364
      case 0xA:
217
364
        return {"bool", BooleanRegex().Match(input)};
218
660
      default:
219
660
        return {"float", FloatRegex().Match(input)};
220
4.85k
    };
221
0
  }
222
223
  const char* type;
224
  const RegexMatcher::MatchResult matched;
225
};
226
227
bool Parse(flatbuffers::Parser& parser, const std::string& json,
228
659k
           std::string* _text) {
229
659k
  auto done = parser.ParseJson(json.c_str());
230
659k
  if (done) {
231
187k
    TEST_NULL(GenText(parser, parser.builder_.GetBufferPointer(), _text));
232
471k
  } else {
233
471k
    *_text = parser.error_;
234
471k
  }
235
659k
  return done;
236
659k
}
237
238
// Utility for test run.
239
OneTimeTestInit OneTimeTestInit::one_time_init_;
240
241
// llvm std::regex have problem with stack overflow, limit maximum length.
242
// ./scalar_fuzzer -max_len=3000
243
4.90k
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
244
  // Reserve one byte for Parser flags and one byte for repetition counter.
245
4.90k
  if (size < 3) return 0;
246
4.90k
  const uint8_t flags = data[0];
247
  // normalize to ascii alphabet
248
4.90k
  const int extra_rep_number =
249
4.90k
      std::max(5, (data[1] > '0' ? (data[1] - '0') : 0));
250
4.90k
  data += 2;
251
4.90k
  size -= 2;  // bypass
252
253
  // Guarantee 0-termination.
254
4.90k
  const std::string original(reinterpret_cast<const char*>(data), size);
255
4.90k
  auto input = std::string(original.c_str());  // until '\0'
256
4.90k
  if (input.size() < kMinInputLength || input.size() > kMaxInputLength)
257
54
    return 0;
258
259
  // Break comments in json to avoid complexity with regex matcher.
260
  // The string " 12345 /* text */" will be accepted if insert it to string
261
  // expression: "table X { Y: " + " 12345 /* text */" + "; }.
262
  // But strings like this will complicate regex matcher.
263
  // We reject this by transform "/* text */ 12345" to "@* text */ 12345".
264
4.85k
  BreakSequence(input, "//", '@');  // "//" -> "@/"
265
4.85k
  BreakSequence(input, "/*", '@');  // "/*" -> "@*"
266
  // { "$schema: "text" } is exceptional case.
267
  // This key:value ignored by the parser. Numbers can not have $.
268
4.85k
  BreakSequence(input, "$schema", '@');  // "$schema" -> "@schema"
269
  // Break all known scalar functions (todo: add them to regex?):
270
38.8k
  for (auto f : {"deg", "rad", "sin", "cos", "tan", "asin", "acos", "atan"}) {
271
38.8k
    BreakSequence(input, f, '_');  // ident -> ident
272
38.8k
  }
273
274
  // Extract type of scalar from 'flags' and check if the input string satisfies
275
  // the scalar type.
276
4.85k
  const auto ref_res =
277
4.85k
      ScalarReferenceResult::Check(flags & flags_scalar_type, input);
278
4.85k
  auto& recheck = ref_res.matched;
279
280
  // Create parser
281
4.85k
  flatbuffers::IDLOptions opts;
282
4.85k
  opts.force_defaults = true;
283
4.85k
  opts.output_default_scalars_in_json = true;
284
4.85k
  opts.indent_step = -1;
285
4.85k
  opts.strict_json = true;
286
287
4.85k
  flatbuffers::Parser parser(opts);
288
4.85k
  auto schema =
289
4.85k
      "table X { Y: " + std::string(ref_res.type) + "; } root_type X;";
290
4.85k
  TEST_EQ_FUNC(parser.Parse(schema.c_str()), true);
291
292
  // The fuzzer can adjust the number repetition if a side-effects have found.
293
  // Each test should pass at least two times to ensure that the parser doesn't
294
  // have any hidden-states or locale-depended effects.
295
340k
  for (auto cnt = 0; cnt < (extra_rep_number + 2); cnt++) {
296
    // Each even run (0,2,4..) will test locale independed code.
297
335k
    auto use_locale = !!OneTimeTestInit::test_locale() && (0 == (cnt % 2));
298
    // Set new locale.
299
335k
    if (use_locale) {
300
169k
      FLATBUFFERS_ASSERT(setlocale(LC_ALL, OneTimeTestInit::test_locale()));
301
169k
    }
302
303
    // Parse original input as-is.
304
335k
    auto orig_scalar = "{\"Y\" : " + input + "}";
305
335k
    std::string orig_back;
306
335k
    auto orig_done = Parse(parser, orig_scalar, &orig_back);
307
308
335k
    if (recheck.res != orig_done) {
309
      // look for "does not fit" or "doesn't fit" or "out of range"
310
38.7k
      auto not_fit =
311
38.7k
          (true == recheck.res)
312
38.7k
              ? ((orig_back.find("does not fit") != std::string::npos) ||
313
0
                 (orig_back.find("out of range") != std::string::npos))
314
38.7k
              : false;
315
316
38.7k
      if (false == not_fit) {
317
0
        TEST_OUTPUT_LINE("Stage 1 failed: Parser(%d) != Regex(%d)", orig_done,
318
0
                         recheck.res);
319
0
        TEST_EQ_STR(orig_back.c_str(),
320
0
                    input.substr(recheck.pos, recheck.len).c_str());
321
0
        TEST_EQ_FUNC(orig_done, recheck.res);
322
0
      }
323
38.7k
    }
324
325
    // Try to make quoted string and test it.
326
335k
    std::string qouted_input;
327
335k
    if (true == recheck.quoted) {
328
      // we can't simply remove quotes, they may be nested "'12'".
329
      // Original string "\'12\'" converted to "'12'".
330
      // The string can be an invalid string by JSON rules, but after quotes
331
      // removed can transform to valid.
332
11.3k
      assert(recheck.len >= 2);
333
323k
    } else {
334
323k
      const auto quote = (flags & flags_quotes_kind) ? '\"' : '\'';
335
323k
      qouted_input = input;  // copy
336
323k
      qouted_input.insert(recheck.pos + recheck.len, 1, quote);
337
323k
      qouted_input.insert(recheck.pos, 1, quote);
338
323k
    }
339
340
    // Test quoted version of the string
341
335k
    if (!qouted_input.empty()) {
342
323k
      auto fix_scalar = "{\"Y\" : " + qouted_input + "}";
343
323k
      std::string fix_back;
344
323k
      auto fix_done = Parse(parser, fix_scalar, &fix_back);
345
346
323k
      if (orig_done != fix_done) {
347
0
        TEST_OUTPUT_LINE("Stage 2 failed: Parser(%d) != Regex(%d)", fix_done,
348
0
                         orig_done);
349
0
        TEST_EQ_STR(fix_back.c_str(), orig_back.c_str());
350
0
      }
351
323k
      if (orig_done) {
352
90.4k
        TEST_EQ_STR(fix_back.c_str(), orig_back.c_str());
353
90.4k
      }
354
323k
      TEST_EQ_FUNC(fix_done, orig_done);
355
323k
    }
356
357
    // Create new parser and test default value
358
335k
    if (true == orig_done) {
359
97.3k
      flatbuffers::Parser def_parser(opts);  // re-use options
360
97.3k
      auto def_schema = "table X { Y: " + std::string(ref_res.type) + " = " +
361
97.3k
                        input + "; } root_type X;" +
362
97.3k
                        "{}";  // <- with empty json {}!
363
364
97.3k
      auto def_done = def_parser.Parse(def_schema.c_str());
365
97.3k
      if (false == def_done) {
366
0
        TEST_OUTPUT_LINE("Stage 3.1 failed with _error = %s",
367
0
                         def_parser.error_.c_str());
368
0
        FLATBUFFERS_ASSERT(false);
369
0
      }
370
      // Compare with print.
371
97.3k
      std::string ref_string, def_string;
372
97.3k
      FLATBUFFERS_ASSERT(
373
97.3k
          !GenText(parser, parser.builder_.GetBufferPointer(), &ref_string));
374
97.3k
      FLATBUFFERS_ASSERT(!GenText(
375
97.3k
          def_parser, def_parser.builder_.GetBufferPointer(), &def_string));
376
97.3k
      if (ref_string != def_string) {
377
0
        TEST_OUTPUT_LINE("Stage 3.2 failed: '%s' != '%s'", def_string.c_str(),
378
0
                         ref_string.c_str());
379
0
        FLATBUFFERS_ASSERT(false);
380
0
      }
381
97.3k
    }
382
383
    // Restore locale.
384
335k
    if (use_locale) {
385
169k
      FLATBUFFERS_ASSERT(setlocale(LC_ALL, "C"));
386
169k
    }
387
335k
  }
388
4.85k
  return 0;
389
4.85k
}