/proc/self/cwd/source/common/json/json_sanitizer.cc
Line | Count | Source (jump to first uncovered line) |
1 | | #include "source/common/json/json_sanitizer.h" |
2 | | |
3 | | #include "source/common/common/assert.h" |
4 | | #include "source/common/common/thread.h" |
5 | | #include "source/common/json/json_internal.h" |
6 | | |
7 | | #include "absl/strings/str_format.h" |
8 | | |
9 | | namespace Envoy { |
10 | | namespace Json { |
11 | | |
12 | | // clang-format off |
13 | | // SPELLCHECKER(off) |
14 | | // |
15 | | // Performance benchmarks show this is slightly faster as an array of uint32_t |
16 | | // rather than an array of bool. |
17 | | static constexpr uint32_t needs_slow_sanitizer[256] = { |
18 | | // Control-characters 0-31 all require escapes. |
19 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
20 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
21 | | |
22 | | // Pass through printable characters starting with space. Double-quote and |
23 | | // backslash require an escape. |
24 | | 0, 0, 1 /* " */, 0, 0, 0, 0, 0, // !"#$%&' |
25 | | 0, 0, 0, 0, 0, 0, 0, 0, // ()*+,-.7 |
26 | | 0, 0, 0, 0, 0, 0, 0, 0, // 01234567 |
27 | | 0, 0, 0, 0, 0, 0, 0, 0, // 89:;<=>? |
28 | | 0, 0, 0, 0, 0, 0, 0, 0, // @ABCDEFG |
29 | | 0, 0, 0, 0, 0, 0, 0, 0, // HIJKLMNO |
30 | | 0, 0, 0, 0, 0, 0, 0, 0, // PQRSTUVW |
31 | | 0, 0, 0, 0, 1 /* backslash */, 0, 0, 0, // XYZ[\]^_ |
32 | | 0, 0, 0, 0, 0, 0, 0, 0, // `abcdefg |
33 | | 0, 0, 0, 0, 0, 0, 0, 0, // hijklmno |
34 | | 0, 0, 0, 0, 0, 0, 0, 0, // pqrstuvw |
35 | | 0, 0, 0, 0, 0, 0, 0, 1, // xyz{|}~\177 |
36 | | |
37 | | // 0x80-0xff, all of which require calling the slow sanitizer. |
38 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
39 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
40 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
41 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
42 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
43 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
44 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
45 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
46 | | }; |
47 | | // SPELLCHECKER(on) |
48 | | // clang-format on |
49 | | |
50 | 484k | absl::string_view sanitize(std::string& buffer, absl::string_view str) { |
51 | | // Fast-path to see whether any escapes or utf-encoding are needed. If str has |
52 | | // only unescaped ascii characters, we can simply return it. |
53 | | // |
54 | | // Benchmarks show it's faster to just rip through the string with no |
55 | | // conditionals, so we only check the arithmetically ORed condition after the |
56 | | // loop. This avoids branches and allows simpler loop unrolling by the |
57 | | // compiler. |
58 | 484k | static_assert(ARRAY_SIZE(needs_slow_sanitizer) == 256); |
59 | 484k | uint32_t need_slow = 0; |
60 | 16.4M | for (char c : str) { |
61 | | // We need to escape control characters, characters >= 127, and double-quote |
62 | | // and backslash. |
63 | 16.4M | need_slow |= needs_slow_sanitizer[static_cast<uint8_t>(c)]; |
64 | 16.4M | } |
65 | 484k | if (need_slow == 0) { |
66 | 481k | return str; // Fast path, should be executed most of the time. |
67 | 481k | } |
68 | 3.35k | TRY_ASSERT_MAIN_THREAD { |
69 | | // The Nlohmann JSON library supports serialization and is not too slow. A |
70 | | // hand-rolled sanitizer can be a little over 2x faster at the cost of added |
71 | | // production complexity. The main drawback is that this code cannot be used |
72 | | // in the data plane as it throws exceptions. Should this become an issue, |
73 | | // #20428 can be revived which is faster and doesn't throw exceptions, but |
74 | | // adds complexity to the production code base. |
75 | 3.35k | buffer = Nlohmann::Factory::serialize(str); |
76 | 3.35k | return stripDoubleQuotes(buffer); |
77 | 3.35k | } |
78 | 3.35k | END_TRY |
79 | 3.35k | catch (std::exception&) { |
80 | | // If Nlohmann throws an error, emit an octal escape for any character |
81 | | // requiring it. This can occur for invalid utf-8 sequences, and we don't |
82 | | // want to crash the server if such a sequence makes its way into a string |
83 | | // we need to serialize. For example, if admin endpoint /stats?format=json |
84 | | // is called, and a stat name was synthesized from dynamic content such as a |
85 | | // gRPC method. |
86 | 1.77k | buffer.clear(); |
87 | 8.40M | for (char c : str) { |
88 | 8.40M | if (needs_slow_sanitizer[static_cast<uint8_t>(c)]) { |
89 | 7.24M | buffer.append(absl::StrFormat("\\%03o", c)); |
90 | 7.24M | } else { |
91 | 1.16M | buffer.append(1, c); |
92 | 1.16M | } |
93 | 8.40M | } |
94 | 1.77k | } |
95 | | |
96 | 1.77k | return buffer; |
97 | 3.35k | } |
98 | | |
99 | 484k | absl::string_view stripDoubleQuotes(absl::string_view str) { |
100 | 484k | if (str.size() >= 2 && str[0] == '"' && str[str.size() - 1] == '"') { |
101 | 484k | str = str.substr(1, str.size() - 2); |
102 | 484k | } else { |
103 | 0 | ASSERT(false, |
104 | 0 | absl::StrCat("stripDoubleQuotes called on a str that lacks double-quotes: ", str)); |
105 | 0 | } |
106 | 484k | return str; |
107 | 484k | } |
108 | | |
109 | | } // namespace Json |
110 | | } // namespace Envoy |