Coverage Report

Created: 2023-11-12 09:30

/proc/self/cwd/source/common/json/json_sanitizer.cc
Line
Count
Source (jump to first uncovered line)
1
#include "source/common/json/json_sanitizer.h"
2
3
#include "source/common/common/assert.h"
4
#include "source/common/common/thread.h"
5
#include "source/common/json/json_internal.h"
6
7
#include "absl/strings/str_format.h"
8
9
namespace Envoy {
10
namespace Json {
11
12
// clang-format off
13
// SPELLCHECKER(off)
14
//
15
// Performance benchmarks show this is slightly faster as an array of uint32_t
16
// rather than an array of bool.
17
static constexpr uint32_t needs_slow_sanitizer[256] = {
18
  // Control-characters 0-31 all require escapes.
19
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
20
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
21
22
  // Pass through printable characters starting with space. Double-quote and
23
  // backslash require an escape.
24
  0, 0, 1 /* " */, 0, 0, 0, 0, 0,         //  !"#$%&'
25
  0, 0, 0, 0, 0, 0, 0, 0,                 // ()*+,-.7
26
  0, 0, 0, 0, 0, 0, 0, 0,                 // 01234567
27
  0, 0, 0, 0, 0, 0, 0, 0,                 // 89:;<=>?
28
  0, 0, 0, 0, 0, 0, 0, 0,                 // @ABCDEFG
29
  0, 0, 0, 0, 0, 0, 0, 0,                 // HIJKLMNO
30
  0, 0, 0, 0, 0, 0, 0, 0,                 // PQRSTUVW
31
  0, 0, 0, 0, 1 /* backslash */, 0, 0, 0, // XYZ[\]^_
32
  0, 0, 0, 0, 0, 0, 0, 0,                 // `abcdefg
33
  0, 0, 0, 0, 0, 0, 0, 0,                 // hijklmno
34
  0, 0, 0, 0, 0, 0, 0, 0,                 // pqrstuvw
35
  0, 0, 0, 0, 0, 0, 0, 1,                 // xyz{|}~\177
36
37
  // 0x80-0xff, all of which require calling the slow sanitizer.
38
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
46
};
47
// SPELLCHECKER(on)
48
// clang-format on
49
50
484k
absl::string_view sanitize(std::string& buffer, absl::string_view str) {
51
  // Fast-path to see whether any escapes or utf-encoding are needed. If str has
52
  // only unescaped ascii characters, we can simply return it.
53
  //
54
  // Benchmarks show it's faster to just rip through the string with no
55
  // conditionals, so we only check the arithmetically ORed condition after the
56
  // loop. This avoids branches and allows simpler loop unrolling by the
57
  // compiler.
58
484k
  static_assert(ARRAY_SIZE(needs_slow_sanitizer) == 256);
59
484k
  uint32_t need_slow = 0;
60
16.4M
  for (char c : str) {
61
    // We need to escape control characters, characters >= 127, and double-quote
62
    // and backslash.
63
16.4M
    need_slow |= needs_slow_sanitizer[static_cast<uint8_t>(c)];
64
16.4M
  }
65
484k
  if (need_slow == 0) {
66
481k
    return str; // Fast path, should be executed most of the time.
67
481k
  }
68
3.35k
  TRY_ASSERT_MAIN_THREAD {
69
    // The Nlohmann JSON library supports serialization and is not too slow. A
70
    // hand-rolled sanitizer can be a little over 2x faster at the cost of added
71
    // production complexity. The main drawback is that this code cannot be used
72
    // in the data plane as it throws exceptions. Should this become an issue,
73
    // #20428 can be revived which is faster and doesn't throw exceptions, but
74
    // adds complexity to the production code base.
75
3.35k
    buffer = Nlohmann::Factory::serialize(str);
76
3.35k
    return stripDoubleQuotes(buffer);
77
3.35k
  }
78
3.35k
  END_TRY
79
3.35k
  catch (std::exception&) {
80
    // If Nlohmann throws an error, emit an octal escape for any character
81
    // requiring it. This can occur for invalid utf-8 sequences, and we don't
82
    // want to crash the server if such a sequence makes its way into a string
83
    // we need to serialize. For example, if admin endpoint /stats?format=json
84
    // is called, and a stat name was synthesized from dynamic content such as a
85
    // gRPC method.
86
1.77k
    buffer.clear();
87
8.40M
    for (char c : str) {
88
8.40M
      if (needs_slow_sanitizer[static_cast<uint8_t>(c)]) {
89
7.24M
        buffer.append(absl::StrFormat("\\%03o", c));
90
7.24M
      } else {
91
1.16M
        buffer.append(1, c);
92
1.16M
      }
93
8.40M
    }
94
1.77k
  }
95
96
1.77k
  return buffer;
97
3.35k
}
98
99
484k
absl::string_view stripDoubleQuotes(absl::string_view str) {
100
484k
  if (str.size() >= 2 && str[0] == '"' && str[str.size() - 1] == '"') {
101
484k
    str = str.substr(1, str.size() - 2);
102
484k
  } else {
103
0
    ASSERT(false,
104
0
           absl::StrCat("stripDoubleQuotes called on a str that lacks double-quotes: ", str));
105
0
  }
106
484k
  return str;
107
484k
}
108
109
} // namespace Json
110
} // namespace Envoy