/src/simdjson/include/simdjson/dom/serialization-inl.h

Source (jump to first uncovered line)

#ifndef SIMDJSON_SERIALIZATION_INL_H
#define SIMDJSON_SERIALIZATION_INL_H

#include "simdjson/dom/serialization.h"

#include <cinttypes>
#include <type_traits>

namespace simdjson {
namespace dom {
inline bool parser::print_json(std::ostream &os) const noexcept {
  if (!valid) { return false; }
  simdjson::internal::string_builder<> sb;
  sb.append(doc.root());
  std::string_view answer = sb.str();
  os << answer;
  return true;
}
}
/***
 * Number utility functions
 **/


namespace {
/**@private
 * Escape sequence like \b or \u0001
 * We expect that most compilers will use 8 bytes for this data structure.
 **/
struct escape_sequence {
    uint8_t length;
    const char string[7]; // technically, we only ever need 6 characters, we pad to 8
};
/**@private
 * This converts a signed integer into a character sequence.
 * The caller is responsible for providing enough memory (at least
 * 20 characters.)
 * Though various runtime libraries provide itoa functions,
 * it is not part of the C++ standard. The C++17 standard
 * adds the to_chars functions which would do as well, but
 * we want to support C++11.
 */
char *fast_itoa(char *output, int64_t value) noexcept {
  // This is a standard implementation of itoa.
  char buffer[20];
  uint64_t value_positive;
  // In general, negating a signed integer is unsafe.
  if(value < 0) {
    *output++ = '-';
    // Doing value_positive = -value; while avoiding
    // undefined behavior warnings.
    // It assumes two complement's which is universal at this
    // point in time.
    std::memcpy(&value_positive, &value, sizeof(value));
    value_positive = (~value_positive) + 1; // this is a negation
  } else {
    value_positive = value;
  }
  // We work solely with value_positive. It *might* be easier
  // for an optimizing compiler to deal with an unsigned variable
  // as far as performance goes.
  const char *const end_buffer = buffer + 20;
  char *write_pointer = buffer + 19;
  // A faster approach is possible if we expect large integers:
  // unroll the loop (work in 100s, 1000s) and use some kind of
  // memoization.
  while(value_positive >= 10) {
    *write_pointer-- = char('0' + (value_positive % 10));
    value_positive /= 10;
  }
  *write_pointer = char('0' + value_positive);
  size_t len = end_buffer - write_pointer;
  std::memcpy(output, write_pointer, len);
  return output + len;
}
/**@private
 * This converts an unsigned integer into a character sequence.
 * The caller is responsible for providing enough memory (at least
 * 19 characters.)
 * Though various runtime libraries provide itoa functions,
 * it is not part of the C++ standard. The C++17 standard
 * adds the to_chars functions which would do as well, but
 * we want to support C++11.
 */
char *fast_itoa(char *output, uint64_t value) noexcept {
  // This is a standard implementation of itoa.
  char buffer[20];
  const char *const end_buffer = buffer + 20;
  char *write_pointer = buffer + 19;
  // A faster approach is possible if we expect large integers:
  // unroll the loop (work in 100s, 1000s) and use some kind of
  // memoization.
  while(value >= 10) {
    *write_pointer-- = char('0' + (value % 10));
    value /= 10;
  };
  *write_pointer = char('0' + value);
  size_t len = end_buffer - write_pointer;
  std::memcpy(output, write_pointer, len);
  return output + len;
}
} // anonymous namespace
namespace internal {

/***
 * Minifier/formatter code.
 **/

simdjson_inline void mini_formatter::number(uint64_t x) {
  char number_buffer[24];
  char *newp = fast_itoa(number_buffer, x);
  buffer.insert(buffer.end(), number_buffer, newp);
}

simdjson_inline void mini_formatter::number(int64_t x) {
  char number_buffer[24];
  char *newp = fast_itoa(number_buffer, x);
  buffer.insert(buffer.end(), number_buffer, newp);
}

simdjson_inline void mini_formatter::number(double x) {
  char number_buffer[24];
  // Currently, passing the nullptr to the second argument is
  // safe because our implementation does not check the second
  // argument.
  char *newp = internal::to_chars(number_buffer, nullptr, x);
  buffer.insert(buffer.end(), number_buffer, newp);
}

simdjson_inline void mini_formatter::start_array() { one_char('['); }
simdjson_inline void mini_formatter::end_array() { one_char(']'); }
simdjson_inline void mini_formatter::start_object() { one_char('{'); }
simdjson_inline void mini_formatter::end_object() { one_char('}'); }
simdjson_inline void mini_formatter::comma() { one_char(','); }


simdjson_inline void mini_formatter::true_atom() {
  const char * s = "true";
  buffer.insert(buffer.end(), s, s + 4);
}
simdjson_inline void mini_formatter::false_atom() {
  const char * s = "false";
  buffer.insert(buffer.end(), s, s + 5);
}
simdjson_inline void mini_formatter::null_atom() {
  const char * s = "null";
  buffer.insert(buffer.end(), s, s + 4);
}
simdjson_inline void mini_formatter::one_char(char c) { buffer.push_back(c); }
simdjson_inline void mini_formatter::key(std::string_view unescaped) {
  string(unescaped);
  one_char(':');
}
simdjson_inline void mini_formatter::string(std::string_view unescaped) {
  one_char('\"');
  size_t i = 0;
  // Fast path for the case where we have no control character, no ", and no backslash.
  // This should include most keys.
  //
  // We would like to use 'bool' but some compilers take offense to bitwise operation
  // with bool types.
  constexpr static char needs_escaping[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
  for(;i + 8 <= unescaped.length(); i += 8) {
    // Poor's man vectorization. This could get much faster if we used SIMD.
    //
    // It is not the case that replacing '|' with '||' would be neutral performance-wise.
    if(needs_escaping[uint8_t(unescaped[i])] | needs_escaping[uint8_t(unescaped[i+1])]
      | needs_escaping[uint8_t(unescaped[i+2])] | needs_escaping[uint8_t(unescaped[i+3])]
      | needs_escaping[uint8_t(unescaped[i+4])] | needs_escaping[uint8_t(unescaped[i+5])]
      | needs_escaping[uint8_t(unescaped[i+6])] | needs_escaping[uint8_t(unescaped[i+7])]
      ) { break; }
  }
  for(;i < unescaped.length(); i++) {
    if(needs_escaping[uint8_t(unescaped[i])]) { break; }
  }
  // The following is also possible and omits a 256-byte table, but it is slower:
  // for (; (i < unescaped.length()) && (uint8_t(unescaped[i]) > 0x1F)
  //      && (unescaped[i] != '\"') && (unescaped[i] != '\\'); i++) {}

  // At least for long strings, the following should be fast. We could
  // do better by integrating the checks and the insertion.
  buffer.insert(buffer.end(), unescaped.data(), unescaped.data() + i);
  // We caught a control character if we enter this loop (slow).
  // Note that we are do not restart from the beginning, but rather we continue
  // from the point where we encountered something that requires escaping.
  for (; i < unescaped.length(); i++) {
    switch (unescaped[i]) {
    case '\"':
      {
        const char * s = "\\\"";
        buffer.insert(buffer.end(), s, s + 2);
      }
      break;
    case '\\':
      {
        const char * s = "\\\\";
        buffer.insert(buffer.end(), s, s + 2);
      }
      break;
    default:
      if (uint8_t(unescaped[i]) <= 0x1F) {
        // If packed, this uses 8 * 32 bytes.
        // Note that we expect most compilers to embed this code in the data
        // section.
        constexpr static escape_sequence escaped[32] = {
          {6, "\\u0000"}, {6, "\\u0001"}, {6, "\\u0002"}, {6, "\\u0003"},
          {6, "\\u0004"}, {6, "\\u0005"}, {6, "\\u0006"}, {6, "\\u0007"},
          {2, "\\b"},     {2, "\\t"},     {2, "\\n"},     {6, "\\u000b"},
          {2, "\\f"},     {2, "\\r"},     {6, "\\u000e"}, {6, "\\u000f"},
          {6, "\\u0010"}, {6, "\\u0011"}, {6, "\\u0012"}, {6, "\\u0013"},
          {6, "\\u0014"}, {6, "\\u0015"}, {6, "\\u0016"}, {6, "\\u0017"},
          {6, "\\u0018"}, {6, "\\u0019"}, {6, "\\u001a"}, {6, "\\u001b"},
          {6, "\\u001c"}, {6, "\\u001d"}, {6, "\\u001e"}, {6, "\\u001f"}};
        auto u = escaped[uint8_t(unescaped[i])];
        buffer.insert(buffer.end(), u.string, u.string + u.length);
      } else {
        one_char(unescaped[i]);
      }
    } // switch
  }   // for
  one_char('\"');
}

inline void mini_formatter::clear() {
  buffer.clear();
}

simdjson_inline std::string_view mini_formatter::str() const {
  return std::string_view(buffer.data(), buffer.size());
}


/***
 * String building code.
 **/

template <class serializer>
inline void string_builder<serializer>::append(simdjson::dom::element value) {
  // using tape_type = simdjson::internal::tape_type;
  size_t depth = 0;
  constexpr size_t MAX_DEPTH = 16;
  bool is_object[MAX_DEPTH];
  is_object[0] = false;
  bool after_value = false;

  internal::tape_ref iter(value.tape);
  do {
    // print commas after each value
    if (after_value) {
      format.comma();
    }
    // If we are in an object, print the next key and :, and skip to the next
    // value.
    if (is_object[depth]) {
      format.key(iter.get_string_view());
      iter.json_index++;
    }
    switch (iter.tape_ref_type()) {

    // Arrays
    case tape_type::START_ARRAY: {
      // If we're too deep, we need to recurse to go deeper.
      depth++;
      if (simdjson_unlikely(depth >= MAX_DEPTH)) {
        append(simdjson::dom::array(iter));
        iter.json_index = iter.matching_brace_index() - 1; // Jump to the ]
        depth--;
        break;
      }

      // Output start [
      format.start_array();
      iter.json_index++;

      // Handle empty [] (we don't want to come back around and print commas)
      if (iter.tape_ref_type() == tape_type::END_ARRAY) {
        format.end_array();
        depth--;
        break;
      }

      is_object[depth] = false;
      after_value = false;
      continue;
    }

    // Objects
    case tape_type::START_OBJECT: {
      // If we're too deep, we need to recurse to go deeper.
      depth++;
      if (simdjson_unlikely(depth >= MAX_DEPTH)) {
        append(simdjson::dom::object(iter));
        iter.json_index = iter.matching_brace_index() - 1; // Jump to the }
        depth--;
        break;
      }

      // Output start {
      format.start_object();
      iter.json_index++;

      // Handle empty {} (we don't want to come back around and print commas)
      if (iter.tape_ref_type() == tape_type::END_OBJECT) {
        format.end_object();
        depth--;
        break;
      }

      is_object[depth] = true;
      after_value = false;
      continue;
    }

    // Scalars
    case tape_type::STRING:
      format.string(iter.get_string_view());
      break;
    case tape_type::INT64:
      format.number(iter.next_tape_value<int64_t>());
      iter.json_index++; // numbers take up 2 spots, so we need to increment
                         // extra
      break;
    case tape_type::UINT64:
      format.number(iter.next_tape_value<uint64_t>());
      iter.json_index++; // numbers take up 2 spots, so we need to increment
                         // extra
      break;
    case tape_type::DOUBLE:
      format.number(iter.next_tape_value<double>());
      iter.json_index++; // numbers take up 2 spots, so we need to increment
                         // extra
      break;
    case tape_type::TRUE_VALUE:
      format.true_atom();
      break;
    case tape_type::FALSE_VALUE:
      format.false_atom();
      break;
    case tape_type::NULL_VALUE:
      format.null_atom();
      break;

    // These are impossible
    case tape_type::END_ARRAY:
    case tape_type::END_OBJECT:
    case tape_type::ROOT:
      SIMDJSON_UNREACHABLE();
    }
    iter.json_index++;
    after_value = true;

    // Handle multiple ends in a row
    while (depth != 0 && (iter.tape_ref_type() == tape_type::END_ARRAY ||
                          iter.tape_ref_type() == tape_type::END_OBJECT)) {
      if (iter.tape_ref_type() == tape_type::END_ARRAY) {
        format.end_array();
      } else {
        format.end_object();
      }
      depth--;
      iter.json_index++;
    }

    // Stop when we're at depth 0
  } while (depth != 0);
}

template <class serializer>
inline void string_builder<serializer>::append(simdjson::dom::object value) {
  format.start_object();
  auto pair = value.begin();
  auto end = value.end();
  if (pair != end) {
    append(*pair);
    for (++pair; pair != end; ++pair) {
      format.comma();
      append(*pair);
    }
  }
  format.end_object();
}

template <class serializer>
inline void string_builder<serializer>::append(simdjson::dom::array value) {
  format.start_array();
  auto iter = value.begin();
  auto end = value.end();
  if (iter != end) {
    append(*iter);
    for (++iter; iter != end; ++iter) {
      format.comma();
      append(*iter);
    }
  }
  format.end_array();
}

template <class serializer>
simdjson_inline void string_builder<serializer>::append(simdjson::dom::key_value_pair kv) {
  format.key(kv.key);
  append(kv.value);
}

template <class serializer>
simdjson_inline void string_builder<serializer>::clear() {
  format.clear();
}

template <class serializer>
simdjson_inline std::string_view string_builder<serializer>::str() const {
  return format.str();
}


} // namespace internal
} // namespace simdjson

#endif

Coverage Report

Created: 2023-03-26 06:03

Line	Count	Source (jump to first uncovered line)
1
2		#ifndef SIMDJSON_SERIALIZATION_INL_H
3		#define SIMDJSON_SERIALIZATION_INL_H
4
5		#include "simdjson/dom/serialization.h"
6
7		#include <cinttypes>
8		#include <type_traits>
9
10		namespace simdjson {
11		namespace dom {
12	0	inline bool parser::print_json(std::ostream &os) const noexcept {
13	0	if (!valid) { return false; }
14	0	simdjson::internal::string_builder<> sb;
15	0	sb.append(doc.root());
16	0	std::string_view answer = sb.str();
17	0	os << answer;
18	0	return true;
19	0	}
20		}
21		/***
22		* Number utility functions
23		**/
24
25
26		namespace {
27		/**@private
28		* Escape sequence like \b or \u0001
29		* We expect that most compilers will use 8 bytes for this data structure.
30		**/
31		struct escape_sequence {
32		uint8_t length;
33		const char string[7]; // technically, we only ever need 6 characters, we pad to 8
34		};
35		/**@private
36		* This converts a signed integer into a character sequence.
37		* The caller is responsible for providing enough memory (at least
38		* 20 characters.)
39		* Though various runtime libraries provide itoa functions,
40		* it is not part of the C++ standard. The C++17 standard
41		* adds the to_chars functions which would do as well, but
42		* we want to support C++11.
43		*/
44	0	char fast_itoa(char output, int64_t value) noexcept {
45	0	// This is a standard implementation of itoa.
46	0	char buffer[20];
47	0	uint64_t value_positive;
48	0	// In general, negating a signed integer is unsafe.
49	0	if(value < 0) {
50	0	*output++ = '-';
51	0	// Doing value_positive = -value; while avoiding
52	0	// undefined behavior warnings.
53	0	// It assumes two complement's which is universal at this
54	0	// point in time.
55	0	std::memcpy(&value_positive, &value, sizeof(value));
56	0	value_positive = (~value_positive) + 1; // this is a negation
57	0	} else {
58	0	value_positive = value;
59	0	}
60	0	// We work solely with value_positive. It might be easier
61	0	// for an optimizing compiler to deal with an unsigned variable
62	0	// as far as performance goes.
63	0	const char *const end_buffer = buffer + 20;
64	0	char *write_pointer = buffer + 19;
65	0	// A faster approach is possible if we expect large integers:
66	0	// unroll the loop (work in 100s, 1000s) and use some kind of
67	0	// memoization.
68	0	while(value_positive >= 10) {
69	0	*write_pointer-- = char('0' + (value_positive % 10));
70	0	value_positive /= 10;
71	0	}
72	0	*write_pointer = char('0' + value_positive);
73	0	size_t len = end_buffer - write_pointer;
74	0	std::memcpy(output, write_pointer, len);
75	0	return output + len;
76	0	} Unexecuted instantiation: fuzz_parser.cpp:simdjson::(anonymous namespace)::fast_itoa(char, long) Unexecuted instantiation: simdjson.cpp:simdjson::(anonymous namespace)::fast_itoa(char, long)
77		/**@private
78		* This converts an unsigned integer into a character sequence.
79		* The caller is responsible for providing enough memory (at least
80		* 19 characters.)
81		* Though various runtime libraries provide itoa functions,
82		* it is not part of the C++ standard. The C++17 standard
83		* adds the to_chars functions which would do as well, but
84		* we want to support C++11.
85		*/
86	0	char fast_itoa(char output, uint64_t value) noexcept {
87	0	// This is a standard implementation of itoa.
88	0	char buffer[20];
89	0	const char *const end_buffer = buffer + 20;
90	0	char *write_pointer = buffer + 19;
91	0	// A faster approach is possible if we expect large integers:
92	0	// unroll the loop (work in 100s, 1000s) and use some kind of
93	0	// memoization.
94	0	while(value >= 10) {
95	0	*write_pointer-- = char('0' + (value % 10));
96	0	value /= 10;
97	0	};
98	0	*write_pointer = char('0' + value);
99	0	size_t len = end_buffer - write_pointer;
100	0	std::memcpy(output, write_pointer, len);
101	0	return output + len;
102	0	} Unexecuted instantiation: fuzz_parser.cpp:simdjson::(anonymous namespace)::fast_itoa(char, unsigned long) Unexecuted instantiation: simdjson.cpp:simdjson::(anonymous namespace)::fast_itoa(char, unsigned long)
103		} // anonymous namespace
104		namespace internal {
105
106		/***
107		* Minifier/formatter code.
108		**/
109
110	0	simdjson_inline void mini_formatter::number(uint64_t x) {
111	0	char number_buffer[24];
112	0	char *newp = fast_itoa(number_buffer, x);
113	0	buffer.insert(buffer.end(), number_buffer, newp);
114	0	}
115
116	0	simdjson_inline void mini_formatter::number(int64_t x) {
117	0	char number_buffer[24];
118	0	char *newp = fast_itoa(number_buffer, x);
119	0	buffer.insert(buffer.end(), number_buffer, newp);
120	0	}
121
122	0	simdjson_inline void mini_formatter::number(double x) {
123	0	char number_buffer[24];
124	0	// Currently, passing the nullptr to the second argument is
125	0	// safe because our implementation does not check the second
126	0	// argument.
127	0	char *newp = internal::to_chars(number_buffer, nullptr, x);
128	0	buffer.insert(buffer.end(), number_buffer, newp);
129	0	}
130
131	0	simdjson_inline void mini_formatter::start_array() { one_char('['); }
132	0	simdjson_inline void mini_formatter::end_array() { one_char(']'); }
133	0	simdjson_inline void mini_formatter::start_object() { one_char('{'); }
134	0	simdjson_inline void mini_formatter::end_object() { one_char('}'); }
135	0	simdjson_inline void mini_formatter::comma() { one_char(','); }
136
137
138	0	simdjson_inline void mini_formatter::true_atom() {
139	0	const char * s = "true";
140	0	buffer.insert(buffer.end(), s, s + 4);
141	0	}
142	0	simdjson_inline void mini_formatter::false_atom() {
143	0	const char * s = "false";
144	0	buffer.insert(buffer.end(), s, s + 5);
145	0	}
146	0	simdjson_inline void mini_formatter::null_atom() {
147	0	const char * s = "null";
148	0	buffer.insert(buffer.end(), s, s + 4);
149	0	}
150	0	simdjson_inline void mini_formatter::one_char(char c) { buffer.push_back(c); }
151	0	simdjson_inline void mini_formatter::key(std::string_view unescaped) {
152	0	string(unescaped);
153	0	one_char(':');
154	0	}
155	0	simdjson_inline void mini_formatter::string(std::string_view unescaped) {
156	0	one_char('\"');
157	0	size_t i = 0;
158	0	// Fast path for the case where we have no control character, no ", and no backslash.
159	0	// This should include most keys.
160	0	//
161	0	// We would like to use 'bool' but some compilers take offense to bitwise operation
162	0	// with bool types.
163	0	constexpr static char needs_escaping[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
164	0	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
165	0	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
166	0	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
167	0	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
168	0	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
169	0	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
170	0	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
171	0	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
172	0	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
173	0	for(;i + 8 <= unescaped.length(); i += 8) {
174	0	// Poor's man vectorization. This could get much faster if we used SIMD.
175	0	//
176	0	// It is not the case that replacing '\|' with '\|\|' would be neutral performance-wise.
177	0	if(needs_escaping[uint8_t(unescaped[i])] \| needs_escaping[uint8_t(unescaped[i+1])]
178	0	\| needs_escaping[uint8_t(unescaped[i+2])] \| needs_escaping[uint8_t(unescaped[i+3])]
179	0	\| needs_escaping[uint8_t(unescaped[i+4])] \| needs_escaping[uint8_t(unescaped[i+5])]
180	0	\| needs_escaping[uint8_t(unescaped[i+6])] \| needs_escaping[uint8_t(unescaped[i+7])]
181	0	) { break; }
182	0	}
183	0	for(;i < unescaped.length(); i++) {
184	0	if(needs_escaping[uint8_t(unescaped[i])]) { break; }
185	0	}
186	0	// The following is also possible and omits a 256-byte table, but it is slower:
187	0	// for (; (i < unescaped.length()) && (uint8_t(unescaped[i]) > 0x1F)
188	0	// && (unescaped[i] != '\"') && (unescaped[i] != '\\'); i++) {}
189	0
190	0	// At least for long strings, the following should be fast. We could
191	0	// do better by integrating the checks and the insertion.
192	0	buffer.insert(buffer.end(), unescaped.data(), unescaped.data() + i);
193	0	// We caught a control character if we enter this loop (slow).
194	0	// Note that we are do not restart from the beginning, but rather we continue
195	0	// from the point where we encountered something that requires escaping.
196	0	for (; i < unescaped.length(); i++) {
197	0	switch (unescaped[i]) {
198	0	case '\"':
199	0	{
200	0	const char * s = "\\\"";
201	0	buffer.insert(buffer.end(), s, s + 2);
202	0	}
203	0	break;
204	0	case '\\':
205	0	{
206	0	const char * s = "\\\\";
207	0	buffer.insert(buffer.end(), s, s + 2);
208	0	}
209	0	break;
210	0	default:
211	0	if (uint8_t(unescaped[i]) <= 0x1F) {
212	0	// If packed, this uses 8 * 32 bytes.
213	0	// Note that we expect most compilers to embed this code in the data
214	0	// section.
215	0	constexpr static escape_sequence escaped[32] = {
216	0	{6, "\\u0000"}, {6, "\\u0001"}, {6, "\\u0002"}, {6, "\\u0003"},
217	0	{6, "\\u0004"}, {6, "\\u0005"}, {6, "\\u0006"}, {6, "\\u0007"},
218	0	{2, "\\b"}, {2, "\\t"}, {2, "\\n"}, {6, "\\u000b"},
219	0	{2, "\\f"}, {2, "\\r"}, {6, "\\u000e"}, {6, "\\u000f"},
220	0	{6, "\\u0010"}, {6, "\\u0011"}, {6, "\\u0012"}, {6, "\\u0013"},
221	0	{6, "\\u0014"}, {6, "\\u0015"}, {6, "\\u0016"}, {6, "\\u0017"},
222	0	{6, "\\u0018"}, {6, "\\u0019"}, {6, "\\u001a"}, {6, "\\u001b"},
223	0	{6, "\\u001c"}, {6, "\\u001d"}, {6, "\\u001e"}, {6, "\\u001f"}};
224	0	auto u = escaped[uint8_t(unescaped[i])];
225	0	buffer.insert(buffer.end(), u.string, u.string + u.length);
226	0	} else {
227	0	one_char(unescaped[i]);
228	0	}
229	0	} // switch
230	0	} // for
231	0	one_char('\"');
232	0	}
233
234	0	inline void mini_formatter::clear() {
235	0	buffer.clear();
236	0	}
237
238	0	simdjson_inline std::string_view mini_formatter::str() const {
239	0	return std::string_view(buffer.data(), buffer.size());
240	0	}
241
242
243		/***
244		* String building code.
245		**/
246
247		template <class serializer>
248	0	inline void string_builder<serializer>::append(simdjson::dom::element value) {
249	0	// using tape_type = simdjson::internal::tape_type;
250	0	size_t depth = 0;
251	0	constexpr size_t MAX_DEPTH = 16;
252	0	bool is_object[MAX_DEPTH];
253	0	is_object[0] = false;
254	0	bool after_value = false;
255	0
256	0	internal::tape_ref iter(value.tape);
257	0	do {
258	0	// print commas after each value
259	0	if (after_value) {
260	0	format.comma();
261	0	}
262	0	// If we are in an object, print the next key and :, and skip to the next
263	0	// value.
264	0	if (is_object[depth]) {
265	0	format.key(iter.get_string_view());
266	0	iter.json_index++;
267	0	}
268	0	switch (iter.tape_ref_type()) {
269	0
270	0	// Arrays
271	0	case tape_type::START_ARRAY: {
272	0	// If we're too deep, we need to recurse to go deeper.
273	0	depth++;
274	0	if (simdjson_unlikely(depth >= MAX_DEPTH)) {
275	0	append(simdjson::dom::array(iter));
276	0	iter.json_index = iter.matching_brace_index() - 1; // Jump to the ]
277	0	depth--;
278	0	break;
279	0	}
280	0
281	0	// Output start [
282	0	format.start_array();
283	0	iter.json_index++;
284	0
285	0	// Handle empty [] (we don't want to come back around and print commas)
286	0	if (iter.tape_ref_type() == tape_type::END_ARRAY) {
287	0	format.end_array();
288	0	depth--;
289	0	break;
290	0	}
291	0
292	0	is_object[depth] = false;
293	0	after_value = false;
294	0	continue;
295	0	}
296	0
297	0	// Objects
298	0	case tape_type::START_OBJECT: {
299	0	// If we're too deep, we need to recurse to go deeper.
300	0	depth++;
301	0	if (simdjson_unlikely(depth >= MAX_DEPTH)) {
302	0	append(simdjson::dom::object(iter));
303	0	iter.json_index = iter.matching_brace_index() - 1; // Jump to the }
304	0	depth--;
305	0	break;
306	0	}
307	0
308	0	// Output start {
309	0	format.start_object();
310	0	iter.json_index++;
311	0
312	0	// Handle empty {} (we don't want to come back around and print commas)
313	0	if (iter.tape_ref_type() == tape_type::END_OBJECT) {
314	0	format.end_object();
315	0	depth--;
316	0	break;
317	0	}
318	0
319	0	is_object[depth] = true;
320	0	after_value = false;
321	0	continue;
322	0	}
323	0
324	0	// Scalars
325	0	case tape_type::STRING:
326	0	format.string(iter.get_string_view());
327	0	break;
328	0	case tape_type::INT64:
329	0	format.number(iter.next_tape_value<int64_t>());
330	0	iter.json_index++; // numbers take up 2 spots, so we need to increment
331	0	// extra
332	0	break;
333	0	case tape_type::UINT64:
334	0	format.number(iter.next_tape_value<uint64_t>());
335	0	iter.json_index++; // numbers take up 2 spots, so we need to increment
336	0	// extra
337	0	break;
338	0	case tape_type::DOUBLE:
339	0	format.number(iter.next_tape_value<double>());
340	0	iter.json_index++; // numbers take up 2 spots, so we need to increment
341	0	// extra
342	0	break;
343	0	case tape_type::TRUE_VALUE:
344	0	format.true_atom();
345	0	break;
346	0	case tape_type::FALSE_VALUE:
347	0	format.false_atom();
348	0	break;
349	0	case tape_type::NULL_VALUE:
350	0	format.null_atom();
351	0	break;
352	0
353	0	// These are impossible
354	0	case tape_type::END_ARRAY:
355	0	case tape_type::END_OBJECT:
356	0	case tape_type::ROOT:
357	0	SIMDJSON_UNREACHABLE();
358	0	}
359	0	iter.json_index++;
360	0	after_value = true;
361	0
362	0	// Handle multiple ends in a row
363	0	while (depth != 0 && (iter.tape_ref_type() == tape_type::END_ARRAY \|\|
364	0	iter.tape_ref_type() == tape_type::END_OBJECT)) {
365	0	if (iter.tape_ref_type() == tape_type::END_ARRAY) {
366	0	format.end_array();
367	0	} else {
368	0	format.end_object();
369	0	}
370	0	depth--;
371	0	iter.json_index++;
372	0	}
373	0
374	0	// Stop when we're at depth 0
375	0	} while (depth != 0);
376	0	}
377
378		template <class serializer>
379	0	inline void string_builder<serializer>::append(simdjson::dom::object value) {
380	0	format.start_object();
381	0	auto pair = value.begin();
382	0	auto end = value.end();
383	0	if (pair != end) {
384	0	append(*pair);
385	0	for (++pair; pair != end; ++pair) {
386	0	format.comma();
387	0	append(*pair);
388	0	}
389	0	}
390	0	format.end_object();
391	0	}
392
393		template <class serializer>
394	0	inline void string_builder<serializer>::append(simdjson::dom::array value) {
395	0	format.start_array();
396	0	auto iter = value.begin();
397	0	auto end = value.end();
398	0	if (iter != end) {
399	0	append(*iter);
400	0	for (++iter; iter != end; ++iter) {
401	0	format.comma();
402	0	append(*iter);
403	0	}
404	0	}
405	0	format.end_array();
406	0	}
407
408		template <class serializer>
409	0	simdjson_inline void string_builder<serializer>::append(simdjson::dom::key_value_pair kv) {
410	0	format.key(kv.key);
411	0	append(kv.value);
412	0	}
413
414		template <class serializer>
415		simdjson_inline void string_builder<serializer>::clear() {
416		format.clear();
417		}
418
419		template <class serializer>
420	0	simdjson_inline std::string_view string_builder<serializer>::str() const {
421	0	return format.str();
422	0	}
423
424
425		} // namespace internal
426		} // namespace simdjson
427
428		#endif