/proc/self/cwd/pw_tokenizer/detokenize_fuzzer.cc

Source (jump to first uncovered line)
// Copyright 2020 The Pigweed Authors
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy of
// the License at
//
//     https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations under
// the License.

// This file implements a basic fuzz test for the Detokenizer.
// An instance of the Detokenizer is created from a minimal, nearly-empty token
// database. Fuzz data is fed to the detokenizer in various supported input
// argument formats at random, when then decodes this data and tries to match
// it to tokens in the database.

#include <cstddef>
#include <cstdint>
#include <cstring>
#include <vector>

#include "pw_fuzzer/fuzzed_data_provider.h"
#include "pw_preprocessor/util.h"
#include "pw_tokenizer/detokenize.h"

namespace pw::tokenizer {
namespace {

constexpr size_t kFuzzRangeMin = 0;
constexpr size_t kFuzzRangeMax = 10000;

enum DetokenizeBufferArgumentType : uint8_t {
  kSpan = 0,
  kStringView,
  kPtrAndLength,
  kMaxValue = kPtrAndLength
};

// In order to better fuzz the detokenizer, rather than use an empty token
// database, we construct a minimal database with 4 entries out of a string
// literal array that matches the token database format (see token_database.h
// for detailed info on the database entry format)
constexpr char kBasicData[] =
    "TOKENS\0\0"
    "\x04\x00\x00\x00"
    "\0\0\0\0"
    "\x01\x00\x00\x00----"
    "\x05\x00\x00\x00----"
    "\xFF\x00\x00\x00----"
    "\xFF\xEE\xEE\xDD----"
    "One\0"
    "TWO\0"
    "333\0"
    "FOUR";

}  // namespace

extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
  static Detokenizer detokenizer(TokenDatabase::Create<kBasicData>());

  FuzzedDataProvider provider(data, size);

  while (provider.remaining_bytes() != 0) {
    // Map the first word of the remaining fuzz data to a buffer argument
    // type, and feed the Detokenizer with a random length buffer to be
    // detokenized in the relevant format. The detokenized string returned
    // is itself of little consequence to this test.
    switch (provider.ConsumeEnum<DetokenizeBufferArgumentType>()) {
      case kSpan: {
        size_t consumed_size = provider.ConsumeIntegralInRange<size_t>(
            kFuzzRangeMin, kFuzzRangeMax);
        std::vector<uint8_t> buffer =
            provider.ConsumeBytes<uint8_t>(consumed_size);
        if (buffer.empty()) {
          return -1;
        }
        auto detokenized_string =
            detokenizer.Detokenize(span(&buffer[0], buffer.size()));
        static_cast<void>(detokenized_string);
        break;
      }

      case kStringView: {
        std::string str =
            provider.ConsumeRandomLengthString(provider.remaining_bytes());
        auto detokenized_string = detokenizer.Detokenize(str);
        static_cast<void>(detokenized_string);
        break;
      }

      case kPtrAndLength: {
        size_t consumed_size = provider.ConsumeIntegralInRange<size_t>(
            kFuzzRangeMin, kFuzzRangeMax);
        std::vector<uint8_t> buffer =
            provider.ConsumeBytes<uint8_t>(consumed_size);
        auto detokenized_string =
            detokenizer.Detokenize(buffer.data(), buffer.size());
        static_cast<void>(detokenized_string);
        break;
      }
    }
  }

  return 0;
}

}  // namespace pw::tokenizer

Line	Count	Source (jump to first uncovered line)
1		// Copyright 2020 The Pigweed Authors
2		//
3		// Licensed under the Apache License, Version 2.0 (the "License"); you may not
4		// use this file except in compliance with the License. You may obtain a copy of
5		// the License at
6		//
7		// https://www.apache.org/licenses/LICENSE-2.0
8		//
9		// Unless required by applicable law or agreed to in writing, software
10		// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11		// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12		// License for the specific language governing permissions and limitations under
13		// the License.
14
15		// This file implements a basic fuzz test for the Detokenizer.
16		// An instance of the Detokenizer is created from a minimal, nearly-empty token
17		// database. Fuzz data is fed to the detokenizer in various supported input
18		// argument formats at random, when then decodes this data and tries to match
19		// it to tokens in the database.
20
21		#include <cstddef>
22		#include <cstdint>
23		#include <cstring>
24		#include <vector>
25
26		#include "pw_fuzzer/fuzzed_data_provider.h"
27		#include "pw_preprocessor/util.h"
28		#include "pw_tokenizer/detokenize.h"
29
30		namespace pw::tokenizer {
31		namespace {
32
33		constexpr size_t kFuzzRangeMin = 0;
34		constexpr size_t kFuzzRangeMax = 10000;
35
36		enum DetokenizeBufferArgumentType : uint8_t {
37		kSpan = 0,
38		kStringView,
39		kPtrAndLength,
40		kMaxValue = kPtrAndLength
41		};
42
43		// In order to better fuzz the detokenizer, rather than use an empty token
44		// database, we construct a minimal database with 4 entries out of a string
45		// literal array that matches the token database format (see token_database.h
46		// for detailed info on the database entry format)
47		constexpr char kBasicData[] =
48		"TOKENS\0\0"
49		"\x04\x00\x00\x00"
50		"\0\0\0\0"
51		"\x01\x00\x00\x00----"
52		"\x05\x00\x00\x00----"
53		"\xFF\x00\x00\x00----"
54		"\xFF\xEE\xEE\xDD----"
55		"One\0"
56		"TWO\0"
57		"333\0"
58		"FOUR";
59
60		} // namespace
61
62	651	extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
63	651	static Detokenizer detokenizer(TokenDatabase::Create<kBasicData>());
64
65	651	FuzzedDataProvider provider(data, size);
66
67	21.3k	while (provider.remaining_bytes() != 0) {
68		// Map the first word of the remaining fuzz data to a buffer argument
69		// type, and feed the Detokenizer with a random length buffer to be
70		// detokenized in the relevant format. The detokenized string returned
71		// is itself of little consequence to this test.
72	20.7k	switch (provider.ConsumeEnum<DetokenizeBufferArgumentType>()) {
73	5.43k	case kSpan: {
74	5.43k	size_t consumed_size = provider.ConsumeIntegralInRange<size_t>(
75	5.43k	kFuzzRangeMin, kFuzzRangeMax);
76	5.43k	std::vector<uint8_t> buffer =
77	5.43k	provider.ConsumeBytes<uint8_t>(consumed_size);
78	5.43k	if (buffer.empty()) {
79	71	return -1;
80	71	}
81	5.36k	auto detokenized_string =
82	5.36k	detokenizer.Detokenize(span(&buffer[0], buffer.size()));
83	5.36k	static_cast<void>(detokenized_string);
84	5.36k	break;
85	5.43k	}
86
87	9.53k	case kStringView: {
88	9.53k	std::string str =
89	9.53k	provider.ConsumeRandomLengthString(provider.remaining_bytes());
90	9.53k	auto detokenized_string = detokenizer.Detokenize(str);
91	9.53k	static_cast<void>(detokenized_string);
92	9.53k	break;
93	5.43k	}
94
95	5.77k	case kPtrAndLength: {
96	5.77k	size_t consumed_size = provider.ConsumeIntegralInRange<size_t>(
97	5.77k	kFuzzRangeMin, kFuzzRangeMax);
98	5.77k	std::vector<uint8_t> buffer =
99	5.77k	provider.ConsumeBytes<uint8_t>(consumed_size);
100	5.77k	auto detokenized_string =
101	5.77k	detokenizer.Detokenize(buffer.data(), buffer.size());
102	5.77k	static_cast<void>(detokenized_string);
103	5.77k	break;
104	5.43k	}
105	20.7k	}
106	20.7k	}
107
108	580	return 0;
109	651	}
110
111		} // namespace pw::tokenizer

Coverage Report

Created: 2025-06-13 06:31