/src/simdjson/fuzz/fuzz_utf8.cpp
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /*  | 
2  |  |  * For fuzzing all of the implementations (haswell/fallback/westmere),  | 
3  |  |  * finding any difference between the output of each which would  | 
4  |  |  * indicate inconsistency. Also, it gets the non-default backend  | 
5  |  |  * some fuzzing love.  | 
6  |  |  *  | 
7  |  |  * Copyright Paul Dreik 20200912 for the simdjson project.  | 
8  |  |  */  | 
9  |  |  | 
10  |  | #include "simdjson.h"  | 
11  |  | #include <cstddef>  | 
12  |  | #include <cstdlib>  | 
13  |  | #include <iostream>  | 
14  |  | #include "supported_implementations.h"  | 
15  |  |  | 
16  | 0  | extern "C" int VerboseTestOneInput(const uint8_t *Data, size_t Size) { | 
17  | 0  |     static const auto supported_implementations=get_runtime_supported_implementations();  | 
18  | 0  |     for(size_t i = 0; i <= Size; i++) { | 
19  | 0  |         std::cout<<"size: "<<std::dec<<std::setw(8)<<i<<std::endl;  | 
20  | 0  |         std::cout<<"Input: \"";  | 
21  | 0  |         for(size_t j = 0; j < i; j++) { | 
22  | 0  |             std::cout<<"\\x"<<std::hex<<std::setw(2)<<std::setfill('0')<<uint32_t(Data[j]); | 
23  | 0  |         }  | 
24  | 0  |         std::cout<<"\""<<std::endl;  | 
25  | 0  |         for(const auto& e: supported_implementations) { | 
26  | 0  |             if(!e->supported_by_runtime_system()) { continue; } | 
27  | 0  |             const bool current=e->validate_utf8((const char*)Data,i);  | 
28  | 0  |             std::cout<<e->name()<<" returns "<<current<<std::endl;  | 
29  | 0  |         }  | 
30  | 0  |         std::cout<<std::endl;  | 
31  | 0  |     }  | 
32  | 0  |     return 0;  | 
33  | 0  | }  | 
34  |  |  | 
35  | 481  | extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { | 
36  |  |  | 
37  |  |     // since this check is expensive, only do it once  | 
38  | 481  |     static const auto supported_implementations=get_runtime_supported_implementations();  | 
39  |  |  | 
40  |  |  | 
41  | 1.44k  |     auto utf8verify=[Data,Size](const simdjson::implementation* impl) -> bool { | 
42  | 1.44k  |         return impl->validate_utf8((const char*)Data,Size);  | 
43  | 1.44k  |     };  | 
44  |  |  | 
45  |  |  | 
46  | 481  |     auto first = supported_implementations.begin();  | 
47  | 481  |     auto last = supported_implementations.end();  | 
48  |  |  | 
49  |  |  | 
50  | 481  |     const bool reference=utf8verify(*first);  | 
51  |  |  | 
52  | 481  |     bool failed=false;  | 
53  | 1.44k  |     for(auto it=first+1; it != last; ++it) { | 
54  | 962  |         const bool current=utf8verify(*it);  | 
55  | 962  |         if(current!=reference) { | 
56  | 0  |             failed=true;  | 
57  | 0  |         }  | 
58  | 962  |     }  | 
59  |  |  | 
60  | 481  |     if(failed) { | 
61  | 0  |         std::cerr<<std::boolalpha<<"Mismatch between implementations of validate_utf8() found:\n";  | 
62  | 0  |         for(const auto& e: supported_implementations) { | 
63  | 0  |             if(!e->supported_by_runtime_system()) { continue; } | 
64  | 0  |             const bool current=utf8verify(e);  | 
65  | 0  |             std::cerr<<e->name()<<" returns "<<current<<std::endl;  | 
66  | 0  |         }  | 
67  | 0  |         std::cerr << "Offending input: \"";  | 
68  | 0  |         for(size_t i = 0; i < Size; i++) { | 
69  | 0  |             std::cerr << "\\x" << std::hex << std::setw(2) << std::setfill('0')  << uint32_t(Data[i]); | 
70  | 0  |         }  | 
71  | 0  |         std::cerr << "\"" <<std::endl;  | 
72  |  | 
  | 
73  | 0  |         VerboseTestOneInput(Data, Size);  | 
74  |  | 
  | 
75  | 0  |         std::abort();  | 
76  | 0  |     }  | 
77  |  |  | 
78  |  |     //all is well  | 
79  | 481  |     return 0;  | 
80  | 481  | }  |