Line data Source code
1 : #include "source/common/json/json_sanitizer.h"
2 :
3 : #include "source/common/common/assert.h"
4 : #include "source/common/common/thread.h"
5 : #include "source/common/json/json_internal.h"
6 :
7 : #include "absl/strings/str_format.h"
8 :
9 : namespace Envoy {
10 : namespace Json {
11 :
12 : // clang-format off
13 : // SPELLCHECKER(off)
14 : //
15 : // Performance benchmarks show this is slightly faster as an array of uint32_t
16 : // rather than an array of bool.
17 : static constexpr uint32_t needs_slow_sanitizer[256] = {
18 : // Control-characters 0-31 all require escapes.
19 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
20 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
21 :
22 : // Pass through printable characters starting with space. Double-quote and
23 : // backslash require an escape.
24 : 0, 0, 1 /* " */, 0, 0, 0, 0, 0, // !"#$%&'
25 : 0, 0, 0, 0, 0, 0, 0, 0, // ()*+,-.7
26 : 0, 0, 0, 0, 0, 0, 0, 0, // 01234567
27 : 0, 0, 0, 0, 0, 0, 0, 0, // 89:;<=>?
28 : 0, 0, 0, 0, 0, 0, 0, 0, // @ABCDEFG
29 : 0, 0, 0, 0, 0, 0, 0, 0, // HIJKLMNO
30 : 0, 0, 0, 0, 0, 0, 0, 0, // PQRSTUVW
31 : 0, 0, 0, 0, 1 /* backslash */, 0, 0, 0, // XYZ[\]^_
32 : 0, 0, 0, 0, 0, 0, 0, 0, // `abcdefg
33 : 0, 0, 0, 0, 0, 0, 0, 0, // hijklmno
34 : 0, 0, 0, 0, 0, 0, 0, 0, // pqrstuvw
35 : 0, 0, 0, 0, 0, 0, 0, 1, // xyz{|}~\177
36 :
37 : // 0x80-0xff, all of which require calling the slow sanitizer.
38 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
46 : };
47 : // SPELLCHECKER(on)
48 : // clang-format on
49 :
50 3225 : absl::string_view sanitize(std::string& buffer, absl::string_view str) {
51 : // Fast-path to see whether any escapes or utf-encoding are needed. If str has
52 : // only unescaped ascii characters, we can simply return it.
53 : //
54 : // Benchmarks show it's faster to just rip through the string with no
55 : // conditionals, so we only check the arithmetically ORed condition after the
56 : // loop. This avoids branches and allows simpler loop unrolling by the
57 : // compiler.
58 3225 : static_assert(ARRAY_SIZE(needs_slow_sanitizer) == 256);
59 3225 : uint32_t need_slow = 0;
60 161475 : for (char c : str) {
61 : // We need to escape control characters, characters >= 127, and double-quote
62 : // and backslash.
63 161475 : need_slow |= needs_slow_sanitizer[static_cast<uint8_t>(c)];
64 161475 : }
65 3225 : if (need_slow == 0) {
66 291 : return str; // Fast path, should be executed most of the time.
67 291 : }
68 2934 : TRY_ASSERT_MAIN_THREAD {
69 : // The Nlohmann JSON library supports serialization and is not too slow. A
70 : // hand-rolled sanitizer can be a little over 2x faster at the cost of added
71 : // production complexity. The main drawback is that this code cannot be used
72 : // in the data plane as it throws exceptions. Should this become an issue,
73 : // #20428 can be revived which is faster and doesn't throw exceptions, but
74 : // adds complexity to the production code base.
75 2934 : buffer = Nlohmann::Factory::serialize(str);
76 2934 : return stripDoubleQuotes(buffer);
77 2934 : }
78 2934 : END_TRY
79 2934 : catch (std::exception&) {
80 : // If Nlohmann throws an error, emit an octal escape for any character
81 : // requiring it. This can occur for invalid utf-8 sequences, and we don't
82 : // want to crash the server if such a sequence makes its way into a string
83 : // we need to serialize. For example, if admin endpoint /stats?format=json
84 : // is called, and a stat name was synthesized from dynamic content such as a
85 : // gRPC method.
86 1529 : buffer.clear();
87 132196 : for (char c : str) {
88 132196 : if (needs_slow_sanitizer[static_cast<uint8_t>(c)]) {
89 95107 : buffer.append(absl::StrFormat("\\%03o", c));
90 95107 : } else {
91 37089 : buffer.append(1, c);
92 37089 : }
93 132196 : }
94 1529 : }
95 :
96 1529 : return buffer;
97 2934 : }
98 :
99 3101 : absl::string_view stripDoubleQuotes(absl::string_view str) {
100 3101 : if (str.size() >= 2 && str[0] == '"' && str[str.size() - 1] == '"') {
101 3101 : str = str.substr(1, str.size() - 2);
102 3101 : } else {
103 0 : ASSERT(false,
104 0 : absl::StrCat("stripDoubleQuotes called on a str that lacks double-quotes: ", str));
105 0 : }
106 3101 : return str;
107 3101 : }
108 :
109 : } // namespace Json
110 : } // namespace Envoy
|