/proc/self/cwd/pw_tokenizer/detokenize_fuzzer.cc
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // Copyright 2020 The Pigweed Authors  | 
2  |  | //  | 
3  |  | // Licensed under the Apache License, Version 2.0 (the "License"); you may not  | 
4  |  | // use this file except in compliance with the License. You may obtain a copy of  | 
5  |  | // the License at  | 
6  |  | //  | 
7  |  | //     https://www.apache.org/licenses/LICENSE-2.0  | 
8  |  | //  | 
9  |  | // Unless required by applicable law or agreed to in writing, software  | 
10  |  | // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT  | 
11  |  | // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the  | 
12  |  | // License for the specific language governing permissions and limitations under  | 
13  |  | // the License.  | 
14  |  |  | 
15  |  | // This file implements a basic fuzz test for the Detokenizer.  | 
16  |  | // An instance of the Detokenizer is created from a minimal, nearly-empty token  | 
17  |  | // database. Fuzz data is fed to the detokenizer in various supported input  | 
18  |  | // argument formats at random, when then decodes this data and tries to match  | 
19  |  | // it to tokens in the database.  | 
20  |  |  | 
21  |  | #include <cstddef>  | 
22  |  | #include <cstdint>  | 
23  |  | #include <cstring>  | 
24  |  | #include <vector>  | 
25  |  |  | 
26  |  | #include "pw_fuzzer/fuzzed_data_provider.h"  | 
27  |  | #include "pw_preprocessor/util.h"  | 
28  |  | #include "pw_tokenizer/detokenize.h"  | 
29  |  |  | 
30  |  | namespace pw::tokenizer { | 
31  |  | namespace { | 
32  |  |  | 
33  |  | constexpr size_t kFuzzRangeMin = 0;  | 
34  |  | constexpr size_t kFuzzRangeMax = 10000;  | 
35  |  |  | 
36  |  | enum DetokenizeBufferArgumentType : uint8_t { | 
37  |  |   kSpan = 0,  | 
38  |  |   kStringView,  | 
39  |  |   kPtrAndLength,  | 
40  |  |   kMaxValue = kPtrAndLength  | 
41  |  | };  | 
42  |  |  | 
43  |  | // In order to better fuzz the detokenizer, rather than use an empty token  | 
44  |  | // database, we construct a minimal database with 4 entries out of a string  | 
45  |  | // literal array that matches the token database format (see token_database.h  | 
46  |  | // for detailed info on the database entry format)  | 
47  |  | constexpr char kBasicData[] =  | 
48  |  |     "TOKENS\0\0"  | 
49  |  |     "\x04\x00\x00\x00"  | 
50  |  |     "\0\0\0\0"  | 
51  |  |     "\x01\x00\x00\x00----"  | 
52  |  |     "\x05\x00\x00\x00----"  | 
53  |  |     "\xFF\x00\x00\x00----"  | 
54  |  |     "\xFF\xEE\xEE\xDD----"  | 
55  |  |     "One\0"  | 
56  |  |     "TWO\0"  | 
57  |  |     "333\0"  | 
58  |  |     "FOUR";  | 
59  |  |  | 
60  |  | }  // namespace  | 
61  |  |  | 
62  | 651  | extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { | 
63  | 651  |   static Detokenizer detokenizer(TokenDatabase::Create<kBasicData>());  | 
64  |  |  | 
65  | 651  |   FuzzedDataProvider provider(data, size);  | 
66  |  |  | 
67  | 21.3k  |   while (provider.remaining_bytes() != 0) { | 
68  |  |     // Map the first word of the remaining fuzz data to a buffer argument  | 
69  |  |     // type, and feed the Detokenizer with a random length buffer to be  | 
70  |  |     // detokenized in the relevant format. The detokenized string returned  | 
71  |  |     // is itself of little consequence to this test.  | 
72  | 20.7k  |     switch (provider.ConsumeEnum<DetokenizeBufferArgumentType>()) { | 
73  | 5.43k  |       case kSpan: { | 
74  | 5.43k  |         size_t consumed_size = provider.ConsumeIntegralInRange<size_t>(  | 
75  | 5.43k  |             kFuzzRangeMin, kFuzzRangeMax);  | 
76  | 5.43k  |         std::vector<uint8_t> buffer =  | 
77  | 5.43k  |             provider.ConsumeBytes<uint8_t>(consumed_size);  | 
78  | 5.43k  |         if (buffer.empty()) { | 
79  | 71  |           return -1;  | 
80  | 71  |         }  | 
81  | 5.36k  |         auto detokenized_string =  | 
82  | 5.36k  |             detokenizer.Detokenize(span(&buffer[0], buffer.size()));  | 
83  | 5.36k  |         static_cast<void>(detokenized_string);  | 
84  | 5.36k  |         break;  | 
85  | 5.43k  |       }  | 
86  |  |  | 
87  | 9.53k  |       case kStringView: { | 
88  | 9.53k  |         std::string str =  | 
89  | 9.53k  |             provider.ConsumeRandomLengthString(provider.remaining_bytes());  | 
90  | 9.53k  |         auto detokenized_string = detokenizer.Detokenize(str);  | 
91  | 9.53k  |         static_cast<void>(detokenized_string);  | 
92  | 9.53k  |         break;  | 
93  | 5.43k  |       }  | 
94  |  |  | 
95  | 5.77k  |       case kPtrAndLength: { | 
96  | 5.77k  |         size_t consumed_size = provider.ConsumeIntegralInRange<size_t>(  | 
97  | 5.77k  |             kFuzzRangeMin, kFuzzRangeMax);  | 
98  | 5.77k  |         std::vector<uint8_t> buffer =  | 
99  | 5.77k  |             provider.ConsumeBytes<uint8_t>(consumed_size);  | 
100  | 5.77k  |         auto detokenized_string =  | 
101  | 5.77k  |             detokenizer.Detokenize(buffer.data(), buffer.size());  | 
102  | 5.77k  |         static_cast<void>(detokenized_string);  | 
103  | 5.77k  |         break;  | 
104  | 5.43k  |       }  | 
105  | 20.7k  |     }  | 
106  | 20.7k  |   }  | 
107  |  |  | 
108  | 580  |   return 0;  | 
109  | 651  | }  | 
110  |  |  | 
111  |  | }  // namespace pw::tokenizer  |