LCOV - code coverage report
Current view: top level - source/common/json - json_sanitizer.cc (source / functions) Hit Total Coverage
Test: coverage.dat Lines: 32 35 91.4 %
Date: 2024-01-05 06:35:25 Functions: 2 2 100.0 %

          Line data    Source code
       1             : #include "source/common/json/json_sanitizer.h"
       2             : 
       3             : #include "source/common/common/assert.h"
       4             : #include "source/common/common/thread.h"
       5             : #include "source/common/json/json_internal.h"
       6             : 
       7             : #include "absl/strings/str_format.h"
       8             : 
       9             : namespace Envoy {
      10             : namespace Json {
      11             : 
      12             : // clang-format off
      13             : // SPELLCHECKER(off)
      14             : //
      15             : // Performance benchmarks show this is slightly faster as an array of uint32_t
      16             : // rather than an array of bool.
      17             : static constexpr uint32_t needs_slow_sanitizer[256] = {
      18             :   // Control-characters 0-31 all require escapes.
      19             :   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
      20             :   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
      21             : 
      22             :   // Pass through printable characters starting with space. Double-quote and
      23             :   // backslash require an escape.
      24             :   0, 0, 1 /* " */, 0, 0, 0, 0, 0,         //  !"#$%&'
      25             :   0, 0, 0, 0, 0, 0, 0, 0,                 // ()*+,-.7
      26             :   0, 0, 0, 0, 0, 0, 0, 0,                 // 01234567
      27             :   0, 0, 0, 0, 0, 0, 0, 0,                 // 89:;<=>?
      28             :   0, 0, 0, 0, 0, 0, 0, 0,                 // @ABCDEFG
      29             :   0, 0, 0, 0, 0, 0, 0, 0,                 // HIJKLMNO
      30             :   0, 0, 0, 0, 0, 0, 0, 0,                 // PQRSTUVW
      31             :   0, 0, 0, 0, 1 /* backslash */, 0, 0, 0, // XYZ[\]^_
      32             :   0, 0, 0, 0, 0, 0, 0, 0,                 // `abcdefg
      33             :   0, 0, 0, 0, 0, 0, 0, 0,                 // hijklmno
      34             :   0, 0, 0, 0, 0, 0, 0, 0,                 // pqrstuvw
      35             :   0, 0, 0, 0, 0, 0, 0, 1,                 // xyz{|}~\177
      36             : 
      37             :   // 0x80-0xff, all of which require calling the slow sanitizer.
      38             :   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
      39             :   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
      40             :   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
      41             :   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
      42             :   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
      43             :   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
      44             :   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
      45             :   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
      46             : };
      47             : // SPELLCHECKER(on)
      48             : // clang-format on
      49             : 
      50        3225 : absl::string_view sanitize(std::string& buffer, absl::string_view str) {
      51             :   // Fast-path to see whether any escapes or utf-encoding are needed. If str has
      52             :   // only unescaped ascii characters, we can simply return it.
      53             :   //
      54             :   // Benchmarks show it's faster to just rip through the string with no
      55             :   // conditionals, so we only check the arithmetically ORed condition after the
      56             :   // loop. This avoids branches and allows simpler loop unrolling by the
      57             :   // compiler.
      58        3225 :   static_assert(ARRAY_SIZE(needs_slow_sanitizer) == 256);
      59        3225 :   uint32_t need_slow = 0;
      60      161475 :   for (char c : str) {
      61             :     // We need to escape control characters, characters >= 127, and double-quote
      62             :     // and backslash.
      63      161475 :     need_slow |= needs_slow_sanitizer[static_cast<uint8_t>(c)];
      64      161475 :   }
      65        3225 :   if (need_slow == 0) {
      66         291 :     return str; // Fast path, should be executed most of the time.
      67         291 :   }
      68        2934 :   TRY_ASSERT_MAIN_THREAD {
      69             :     // The Nlohmann JSON library supports serialization and is not too slow. A
      70             :     // hand-rolled sanitizer can be a little over 2x faster at the cost of added
      71             :     // production complexity. The main drawback is that this code cannot be used
      72             :     // in the data plane as it throws exceptions. Should this become an issue,
      73             :     // #20428 can be revived which is faster and doesn't throw exceptions, but
      74             :     // adds complexity to the production code base.
      75        2934 :     buffer = Nlohmann::Factory::serialize(str);
      76        2934 :     return stripDoubleQuotes(buffer);
      77        2934 :   }
      78        2934 :   END_TRY
      79        2934 :   catch (std::exception&) {
      80             :     // If Nlohmann throws an error, emit an octal escape for any character
      81             :     // requiring it. This can occur for invalid utf-8 sequences, and we don't
      82             :     // want to crash the server if such a sequence makes its way into a string
      83             :     // we need to serialize. For example, if admin endpoint /stats?format=json
      84             :     // is called, and a stat name was synthesized from dynamic content such as a
      85             :     // gRPC method.
      86        1529 :     buffer.clear();
      87      132196 :     for (char c : str) {
      88      132196 :       if (needs_slow_sanitizer[static_cast<uint8_t>(c)]) {
      89       95107 :         buffer.append(absl::StrFormat("\\%03o", c));
      90       95107 :       } else {
      91       37089 :         buffer.append(1, c);
      92       37089 :       }
      93      132196 :     }
      94        1529 :   }
      95             : 
      96        1529 :   return buffer;
      97        2934 : }
      98             : 
      99        3101 : absl::string_view stripDoubleQuotes(absl::string_view str) {
     100        3101 :   if (str.size() >= 2 && str[0] == '"' && str[str.size() - 1] == '"') {
     101        3101 :     str = str.substr(1, str.size() - 2);
     102        3101 :   } else {
     103           0 :     ASSERT(false,
     104           0 :            absl::StrCat("stripDoubleQuotes called on a str that lacks double-quotes: ", str));
     105           0 :   }
     106        3101 :   return str;
     107        3101 : }
     108             : 
     109             : } // namespace Json
     110             : } // namespace Envoy

Generated by: LCOV version 1.15