/src/valijson/thirdparty/rapidjson/include/rapidjson/reader.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Tencent is pleased to support the open source community by making RapidJSON available. |
2 | | // |
3 | | // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. |
4 | | // |
5 | | // Licensed under the MIT License (the "License"); you may not use this file except |
6 | | // in compliance with the License. You may obtain a copy of the License at |
7 | | // |
8 | | // http://opensource.org/licenses/MIT |
9 | | // |
10 | | // Unless required by applicable law or agreed to in writing, software distributed |
11 | | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR |
12 | | // CONDITIONS OF ANY KIND, either express or implied. See the License for the |
13 | | // specific language governing permissions and limitations under the License. |
14 | | |
15 | | #ifndef RAPIDJSON_READER_H_ |
16 | | #define RAPIDJSON_READER_H_ |
17 | | |
18 | | /*! \file reader.h */ |
19 | | |
20 | | #include "allocators.h" |
21 | | #include "stream.h" |
22 | | #include "encodedstream.h" |
23 | | #include "internal/clzll.h" |
24 | | #include "internal/meta.h" |
25 | | #include "internal/stack.h" |
26 | | #include "internal/strtod.h" |
27 | | #include <limits> |
28 | | |
29 | | #if defined(RAPIDJSON_SIMD) && defined(_MSC_VER) |
30 | | #include <intrin.h> |
31 | | #pragma intrinsic(_BitScanForward) |
32 | | #endif |
33 | | #ifdef RAPIDJSON_SSE42 |
34 | | #include <nmmintrin.h> |
35 | | #elif defined(RAPIDJSON_SSE2) |
36 | | #include <emmintrin.h> |
37 | | #elif defined(RAPIDJSON_NEON) |
38 | | #include <arm_neon.h> |
39 | | #endif |
40 | | |
41 | | #ifdef __clang__ |
42 | | RAPIDJSON_DIAG_PUSH |
43 | | RAPIDJSON_DIAG_OFF(old-style-cast) |
44 | | RAPIDJSON_DIAG_OFF(padded) |
45 | | RAPIDJSON_DIAG_OFF(switch-enum) |
46 | | #elif defined(_MSC_VER) |
47 | | RAPIDJSON_DIAG_PUSH |
48 | | RAPIDJSON_DIAG_OFF(4127) // conditional expression is constant |
49 | | RAPIDJSON_DIAG_OFF(4702) // unreachable code |
50 | | #endif |
51 | | |
52 | | #ifdef __GNUC__ |
53 | | RAPIDJSON_DIAG_PUSH |
54 | | RAPIDJSON_DIAG_OFF(effc++) |
55 | | #endif |
56 | | |
57 | | //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN |
58 | | #define RAPIDJSON_NOTHING /* deliberately empty */ |
59 | | #ifndef RAPIDJSON_PARSE_ERROR_EARLY_RETURN |
60 | | #define RAPIDJSON_PARSE_ERROR_EARLY_RETURN(value) \ |
61 | | RAPIDJSON_MULTILINEMACRO_BEGIN \ |
62 | | if (RAPIDJSON_UNLIKELY(HasParseError())) { return value; } \ |
63 | | RAPIDJSON_MULTILINEMACRO_END |
64 | | #endif |
65 | | #define RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID \ |
66 | | RAPIDJSON_PARSE_ERROR_EARLY_RETURN(RAPIDJSON_NOTHING) |
67 | | //!@endcond |
68 | | |
69 | | /*! \def RAPIDJSON_PARSE_ERROR_NORETURN |
70 | | \ingroup RAPIDJSON_ERRORS |
71 | | \brief Macro to indicate a parse error. |
72 | | \param parseErrorCode \ref rapidjson::ParseErrorCode of the error |
73 | | \param offset position of the error in JSON input (\c size_t) |
74 | | |
75 | | This macros can be used as a customization point for the internal |
76 | | error handling mechanism of RapidJSON. |
77 | | |
78 | | A common usage model is to throw an exception instead of requiring the |
79 | | caller to explicitly check the \ref rapidjson::GenericReader::Parse's |
80 | | return value: |
81 | | |
82 | | \code |
83 | | #define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode,offset) \ |
84 | | throw ParseException(parseErrorCode, #parseErrorCode, offset) |
85 | | |
86 | | #include <stdexcept> // std::runtime_error |
87 | | #include "rapidjson/error/error.h" // rapidjson::ParseResult |
88 | | |
89 | | struct ParseException : std::runtime_error, rapidjson::ParseResult { |
90 | | ParseException(rapidjson::ParseErrorCode code, const char* msg, size_t offset) |
91 | | : std::runtime_error(msg), ParseResult(code, offset) {} |
92 | | }; |
93 | | |
94 | | #include "rapidjson/reader.h" |
95 | | \endcode |
96 | | |
97 | | \see RAPIDJSON_PARSE_ERROR, rapidjson::GenericReader::Parse |
98 | | */ |
99 | | #ifndef RAPIDJSON_PARSE_ERROR_NORETURN |
100 | | #define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset) \ |
101 | | RAPIDJSON_MULTILINEMACRO_BEGIN \ |
102 | | RAPIDJSON_ASSERT(!HasParseError()); /* Error can only be assigned once */ \ |
103 | | SetParseError(parseErrorCode, offset); \ |
104 | | RAPIDJSON_MULTILINEMACRO_END |
105 | | #endif |
106 | | |
107 | | /*! \def RAPIDJSON_PARSE_ERROR |
108 | | \ingroup RAPIDJSON_ERRORS |
109 | | \brief (Internal) macro to indicate and handle a parse error. |
110 | | \param parseErrorCode \ref rapidjson::ParseErrorCode of the error |
111 | | \param offset position of the error in JSON input (\c size_t) |
112 | | |
113 | | Invokes RAPIDJSON_PARSE_ERROR_NORETURN and stops the parsing. |
114 | | |
115 | | \see RAPIDJSON_PARSE_ERROR_NORETURN |
116 | | \hideinitializer |
117 | | */ |
118 | | #ifndef RAPIDJSON_PARSE_ERROR |
119 | | #define RAPIDJSON_PARSE_ERROR(parseErrorCode, offset) \ |
120 | | RAPIDJSON_MULTILINEMACRO_BEGIN \ |
121 | | RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset); \ |
122 | | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; \ |
123 | | RAPIDJSON_MULTILINEMACRO_END |
124 | | #endif |
125 | | |
126 | | #include "error/error.h" // ParseErrorCode, ParseResult |
127 | | |
128 | | RAPIDJSON_NAMESPACE_BEGIN |
129 | | |
130 | | /////////////////////////////////////////////////////////////////////////////// |
131 | | // ParseFlag |
132 | | |
133 | | /*! \def RAPIDJSON_PARSE_DEFAULT_FLAGS |
134 | | \ingroup RAPIDJSON_CONFIG |
135 | | \brief User-defined kParseDefaultFlags definition. |
136 | | |
137 | | User can define this as any \c ParseFlag combinations. |
138 | | */ |
139 | | #ifndef RAPIDJSON_PARSE_DEFAULT_FLAGS |
140 | | #define RAPIDJSON_PARSE_DEFAULT_FLAGS kParseNoFlags |
141 | | #endif |
142 | | |
143 | | //! Combination of parseFlags |
144 | | /*! \see Reader::Parse, Document::Parse, Document::ParseInsitu, Document::ParseStream |
145 | | */ |
146 | | enum ParseFlag { |
147 | | kParseNoFlags = 0, //!< No flags are set. |
148 | | kParseInsituFlag = 1, //!< In-situ(destructive) parsing. |
149 | | kParseValidateEncodingFlag = 2, //!< Validate encoding of JSON strings. |
150 | | kParseIterativeFlag = 4, //!< Iterative(constant complexity in terms of function call stack size) parsing. |
151 | | kParseStopWhenDoneFlag = 8, //!< After parsing a complete JSON root from stream, stop further processing the rest of stream. When this flag is used, parser will not generate kParseErrorDocumentRootNotSingular error. |
152 | | kParseFullPrecisionFlag = 16, //!< Parse number in full precision (but slower). |
153 | | kParseCommentsFlag = 32, //!< Allow one-line (//) and multi-line (/**/) comments. |
154 | | kParseNumbersAsStringsFlag = 64, //!< Parse all numbers (ints/doubles) as strings. |
155 | | kParseTrailingCommasFlag = 128, //!< Allow trailing commas at the end of objects and arrays. |
156 | | kParseNanAndInfFlag = 256, //!< Allow parsing NaN, Inf, Infinity, -Inf and -Infinity as doubles. |
157 | | kParseEscapedApostropheFlag = 512, //!< Allow escaped apostrophe in strings. |
158 | | kParseDefaultFlags = RAPIDJSON_PARSE_DEFAULT_FLAGS //!< Default parse flags. Can be customized by defining RAPIDJSON_PARSE_DEFAULT_FLAGS |
159 | | }; |
160 | | |
161 | | /////////////////////////////////////////////////////////////////////////////// |
162 | | // Handler |
163 | | |
164 | | /*! \class rapidjson::Handler |
165 | | \brief Concept for receiving events from GenericReader upon parsing. |
166 | | The functions return true if no error occurs. If they return false, |
167 | | the event publisher should terminate the process. |
168 | | \code |
169 | | concept Handler { |
170 | | typename Ch; |
171 | | |
172 | | bool Null(); |
173 | | bool Bool(bool b); |
174 | | bool Int(int i); |
175 | | bool Uint(unsigned i); |
176 | | bool Int64(int64_t i); |
177 | | bool Uint64(uint64_t i); |
178 | | bool Double(double d); |
179 | | /// enabled via kParseNumbersAsStringsFlag, string is not null-terminated (use length) |
180 | | bool RawNumber(const Ch* str, SizeType length, bool copy); |
181 | | bool String(const Ch* str, SizeType length, bool copy); |
182 | | bool StartObject(); |
183 | | bool Key(const Ch* str, SizeType length, bool copy); |
184 | | bool EndObject(SizeType memberCount); |
185 | | bool StartArray(); |
186 | | bool EndArray(SizeType elementCount); |
187 | | }; |
188 | | \endcode |
189 | | */ |
190 | | /////////////////////////////////////////////////////////////////////////////// |
191 | | // BaseReaderHandler |
192 | | |
193 | | //! Default implementation of Handler. |
194 | | /*! This can be used as base class of any reader handler. |
195 | | \note implements Handler concept |
196 | | */ |
197 | | template<typename Encoding = UTF8<>, typename Derived = void> |
198 | | struct BaseReaderHandler { |
199 | | typedef typename Encoding::Ch Ch; |
200 | | |
201 | | typedef typename internal::SelectIf<internal::IsSame<Derived, void>, BaseReaderHandler, Derived>::Type Override; |
202 | | |
203 | | bool Default() { return true; } |
204 | | bool Null() { return static_cast<Override&>(*this).Default(); } |
205 | | bool Bool(bool) { return static_cast<Override&>(*this).Default(); } |
206 | | bool Int(int) { return static_cast<Override&>(*this).Default(); } |
207 | | bool Uint(unsigned) { return static_cast<Override&>(*this).Default(); } |
208 | | bool Int64(int64_t) { return static_cast<Override&>(*this).Default(); } |
209 | | bool Uint64(uint64_t) { return static_cast<Override&>(*this).Default(); } |
210 | | bool Double(double) { return static_cast<Override&>(*this).Default(); } |
211 | | /// enabled via kParseNumbersAsStringsFlag, string is not null-terminated (use length) |
212 | | bool RawNumber(const Ch* str, SizeType len, bool copy) { return static_cast<Override&>(*this).String(str, len, copy); } |
213 | | bool String(const Ch*, SizeType, bool) { return static_cast<Override&>(*this).Default(); } |
214 | | bool StartObject() { return static_cast<Override&>(*this).Default(); } |
215 | | bool Key(const Ch* str, SizeType len, bool copy) { return static_cast<Override&>(*this).String(str, len, copy); } |
216 | | bool EndObject(SizeType) { return static_cast<Override&>(*this).Default(); } |
217 | | bool StartArray() { return static_cast<Override&>(*this).Default(); } |
218 | | bool EndArray(SizeType) { return static_cast<Override&>(*this).Default(); } |
219 | | }; |
220 | | |
221 | | /////////////////////////////////////////////////////////////////////////////// |
222 | | // StreamLocalCopy |
223 | | |
224 | | namespace internal { |
225 | | |
226 | | template<typename Stream, int = StreamTraits<Stream>::copyOptimization> |
227 | | class StreamLocalCopy; |
228 | | |
229 | | //! Do copy optimization. |
230 | | template<typename Stream> |
231 | | class StreamLocalCopy<Stream, 1> { |
232 | | public: |
233 | | StreamLocalCopy(Stream& original) : s(original), original_(original) {} |
234 | | ~StreamLocalCopy() { original_ = s; } |
235 | | |
236 | | Stream s; |
237 | | |
238 | | private: |
239 | | StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */; |
240 | | |
241 | | Stream& original_; |
242 | | }; |
243 | | |
244 | | //! Keep reference. |
245 | | template<typename Stream> |
246 | | class StreamLocalCopy<Stream, 0> { |
247 | | public: |
248 | | StreamLocalCopy(Stream& original) : s(original) {} |
249 | | |
250 | | Stream& s; |
251 | | |
252 | | private: |
253 | | StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */; |
254 | | }; |
255 | | |
256 | | } // namespace internal |
257 | | |
258 | | /////////////////////////////////////////////////////////////////////////////// |
259 | | // SkipWhitespace |
260 | | |
261 | | //! Skip the JSON white spaces in a stream. |
262 | | /*! \param is A input stream for skipping white spaces. |
263 | | \note This function has SSE2/SSE4.2 specialization. |
264 | | */ |
265 | | template<typename InputStream> |
266 | | void SkipWhitespace(InputStream& is) { |
267 | | internal::StreamLocalCopy<InputStream> copy(is); |
268 | | InputStream& s(copy.s); |
269 | | |
270 | | typename InputStream::Ch c; |
271 | | while ((c = s.Peek()) == ' ' || c == '\n' || c == '\r' || c == '\t') |
272 | | s.Take(); |
273 | | } |
274 | | |
275 | 0 | inline const char* SkipWhitespace(const char* p, const char* end) { |
276 | 0 | while (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')) |
277 | 0 | ++p; |
278 | 0 | return p; |
279 | 0 | } |
280 | | |
281 | | #ifdef RAPIDJSON_SSE42 |
282 | | //! Skip whitespace with SSE 4.2 pcmpistrm instruction, testing 16 8-byte characters at once. |
283 | | inline const char *SkipWhitespace_SIMD(const char* p) { |
284 | | // Fast return for single non-whitespace |
285 | | if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') |
286 | | ++p; |
287 | | else |
288 | | return p; |
289 | | |
290 | | // 16-byte align to the next boundary |
291 | | const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15)); |
292 | | while (p != nextAligned) |
293 | | if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') |
294 | | ++p; |
295 | | else |
296 | | return p; |
297 | | |
298 | | // The rest of string using SIMD |
299 | | static const char whitespace[16] = " \n\r\t"; |
300 | | const __m128i w = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespace[0])); |
301 | | |
302 | | for (;; p += 16) { |
303 | | const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p)); |
304 | | const int r = _mm_cmpistri(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT | _SIDD_NEGATIVE_POLARITY); |
305 | | if (r != 16) // some of characters is non-whitespace |
306 | | return p + r; |
307 | | } |
308 | | } |
309 | | |
310 | | inline const char *SkipWhitespace_SIMD(const char* p, const char* end) { |
311 | | // Fast return for single non-whitespace |
312 | | if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')) |
313 | | ++p; |
314 | | else |
315 | | return p; |
316 | | |
317 | | // The middle of string using SIMD |
318 | | static const char whitespace[16] = " \n\r\t"; |
319 | | const __m128i w = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespace[0])); |
320 | | |
321 | | for (; p <= end - 16; p += 16) { |
322 | | const __m128i s = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p)); |
323 | | const int r = _mm_cmpistri(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT | _SIDD_NEGATIVE_POLARITY); |
324 | | if (r != 16) // some of characters is non-whitespace |
325 | | return p + r; |
326 | | } |
327 | | |
328 | | return SkipWhitespace(p, end); |
329 | | } |
330 | | |
331 | | #elif defined(RAPIDJSON_SSE2) |
332 | | |
333 | | //! Skip whitespace with SSE2 instructions, testing 16 8-byte characters at once. |
334 | | inline const char *SkipWhitespace_SIMD(const char* p) { |
335 | | // Fast return for single non-whitespace |
336 | | if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') |
337 | | ++p; |
338 | | else |
339 | | return p; |
340 | | |
341 | | // 16-byte align to the next boundary |
342 | | const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15)); |
343 | | while (p != nextAligned) |
344 | | if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') |
345 | | ++p; |
346 | | else |
347 | | return p; |
348 | | |
349 | | // The rest of string |
350 | | #define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c } |
351 | | static const char whitespaces[4][16] = { C16(' '), C16('\n'), C16('\r'), C16('\t') }; |
352 | | #undef C16 |
353 | | |
354 | | const __m128i w0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[0][0])); |
355 | | const __m128i w1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[1][0])); |
356 | | const __m128i w2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[2][0])); |
357 | | const __m128i w3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[3][0])); |
358 | | |
359 | | for (;; p += 16) { |
360 | | const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p)); |
361 | | __m128i x = _mm_cmpeq_epi8(s, w0); |
362 | | x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1)); |
363 | | x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2)); |
364 | | x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3)); |
365 | | unsigned short r = static_cast<unsigned short>(~_mm_movemask_epi8(x)); |
366 | | if (r != 0) { // some of characters may be non-whitespace |
367 | | #ifdef _MSC_VER // Find the index of first non-whitespace |
368 | | unsigned long offset; |
369 | | _BitScanForward(&offset, r); |
370 | | return p + offset; |
371 | | #else |
372 | | return p + __builtin_ffs(r) - 1; |
373 | | #endif |
374 | | } |
375 | | } |
376 | | } |
377 | | |
378 | | inline const char *SkipWhitespace_SIMD(const char* p, const char* end) { |
379 | | // Fast return for single non-whitespace |
380 | | if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')) |
381 | | ++p; |
382 | | else |
383 | | return p; |
384 | | |
385 | | // The rest of string |
386 | | #define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c } |
387 | | static const char whitespaces[4][16] = { C16(' '), C16('\n'), C16('\r'), C16('\t') }; |
388 | | #undef C16 |
389 | | |
390 | | const __m128i w0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[0][0])); |
391 | | const __m128i w1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[1][0])); |
392 | | const __m128i w2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[2][0])); |
393 | | const __m128i w3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[3][0])); |
394 | | |
395 | | for (; p <= end - 16; p += 16) { |
396 | | const __m128i s = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p)); |
397 | | __m128i x = _mm_cmpeq_epi8(s, w0); |
398 | | x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1)); |
399 | | x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2)); |
400 | | x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3)); |
401 | | unsigned short r = static_cast<unsigned short>(~_mm_movemask_epi8(x)); |
402 | | if (r != 0) { // some of characters may be non-whitespace |
403 | | #ifdef _MSC_VER // Find the index of first non-whitespace |
404 | | unsigned long offset; |
405 | | _BitScanForward(&offset, r); |
406 | | return p + offset; |
407 | | #else |
408 | | return p + __builtin_ffs(r) - 1; |
409 | | #endif |
410 | | } |
411 | | } |
412 | | |
413 | | return SkipWhitespace(p, end); |
414 | | } |
415 | | |
416 | | #elif defined(RAPIDJSON_NEON) |
417 | | |
418 | | //! Skip whitespace with ARM Neon instructions, testing 16 8-byte characters at once. |
419 | | inline const char *SkipWhitespace_SIMD(const char* p) { |
420 | | // Fast return for single non-whitespace |
421 | | if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') |
422 | | ++p; |
423 | | else |
424 | | return p; |
425 | | |
426 | | // 16-byte align to the next boundary |
427 | | const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15)); |
428 | | while (p != nextAligned) |
429 | | if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') |
430 | | ++p; |
431 | | else |
432 | | return p; |
433 | | |
434 | | const uint8x16_t w0 = vmovq_n_u8(' '); |
435 | | const uint8x16_t w1 = vmovq_n_u8('\n'); |
436 | | const uint8x16_t w2 = vmovq_n_u8('\r'); |
437 | | const uint8x16_t w3 = vmovq_n_u8('\t'); |
438 | | |
439 | | for (;; p += 16) { |
440 | | const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p)); |
441 | | uint8x16_t x = vceqq_u8(s, w0); |
442 | | x = vorrq_u8(x, vceqq_u8(s, w1)); |
443 | | x = vorrq_u8(x, vceqq_u8(s, w2)); |
444 | | x = vorrq_u8(x, vceqq_u8(s, w3)); |
445 | | |
446 | | x = vmvnq_u8(x); // Negate |
447 | | x = vrev64q_u8(x); // Rev in 64 |
448 | | uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract |
449 | | uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract |
450 | | |
451 | | if (low == 0) { |
452 | | if (high != 0) { |
453 | | uint32_t lz = internal::clzll(high); |
454 | | return p + 8 + (lz >> 3); |
455 | | } |
456 | | } else { |
457 | | uint32_t lz = internal::clzll(low); |
458 | | return p + (lz >> 3); |
459 | | } |
460 | | } |
461 | | } |
462 | | |
463 | | inline const char *SkipWhitespace_SIMD(const char* p, const char* end) { |
464 | | // Fast return for single non-whitespace |
465 | | if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')) |
466 | | ++p; |
467 | | else |
468 | | return p; |
469 | | |
470 | | const uint8x16_t w0 = vmovq_n_u8(' '); |
471 | | const uint8x16_t w1 = vmovq_n_u8('\n'); |
472 | | const uint8x16_t w2 = vmovq_n_u8('\r'); |
473 | | const uint8x16_t w3 = vmovq_n_u8('\t'); |
474 | | |
475 | | for (; p <= end - 16; p += 16) { |
476 | | const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p)); |
477 | | uint8x16_t x = vceqq_u8(s, w0); |
478 | | x = vorrq_u8(x, vceqq_u8(s, w1)); |
479 | | x = vorrq_u8(x, vceqq_u8(s, w2)); |
480 | | x = vorrq_u8(x, vceqq_u8(s, w3)); |
481 | | |
482 | | x = vmvnq_u8(x); // Negate |
483 | | x = vrev64q_u8(x); // Rev in 64 |
484 | | uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract |
485 | | uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract |
486 | | |
487 | | if (low == 0) { |
488 | | if (high != 0) { |
489 | | uint32_t lz = internal::clzll(high); |
490 | | return p + 8 + (lz >> 3); |
491 | | } |
492 | | } else { |
493 | | uint32_t lz = internal::clzll(low); |
494 | | return p + (lz >> 3); |
495 | | } |
496 | | } |
497 | | |
498 | | return SkipWhitespace(p, end); |
499 | | } |
500 | | |
501 | | #endif // RAPIDJSON_NEON |
502 | | |
503 | | #ifdef RAPIDJSON_SIMD |
504 | | //! Template function specialization for InsituStringStream |
505 | | template<> inline void SkipWhitespace(InsituStringStream& is) { |
506 | | is.src_ = const_cast<char*>(SkipWhitespace_SIMD(is.src_)); |
507 | | } |
508 | | |
509 | | //! Template function specialization for StringStream |
510 | | template<> inline void SkipWhitespace(StringStream& is) { |
511 | | is.src_ = SkipWhitespace_SIMD(is.src_); |
512 | | } |
513 | | |
514 | | template<> inline void SkipWhitespace(EncodedInputStream<UTF8<>, MemoryStream>& is) { |
515 | | is.is_.src_ = SkipWhitespace_SIMD(is.is_.src_, is.is_.end_); |
516 | | } |
517 | | #endif // RAPIDJSON_SIMD |
518 | | |
519 | | /////////////////////////////////////////////////////////////////////////////// |
520 | | // GenericReader |
521 | | |
522 | | //! SAX-style JSON parser. Use \ref Reader for UTF8 encoding and default allocator. |
523 | | /*! GenericReader parses JSON text from a stream, and send events synchronously to an |
524 | | object implementing Handler concept. |
525 | | |
526 | | It needs to allocate a stack for storing a single decoded string during |
527 | | non-destructive parsing. |
528 | | |
529 | | For in-situ parsing, the decoded string is directly written to the source |
530 | | text string, no temporary buffer is required. |
531 | | |
532 | | A GenericReader object can be reused for parsing multiple JSON text. |
533 | | |
534 | | \tparam SourceEncoding Encoding of the input stream. |
535 | | \tparam TargetEncoding Encoding of the parse output. |
536 | | \tparam StackAllocator Allocator type for stack. |
537 | | */ |
538 | | template <typename SourceEncoding, typename TargetEncoding, typename StackAllocator = CrtAllocator> |
539 | | class GenericReader { |
540 | | public: |
541 | | typedef typename SourceEncoding::Ch Ch; //!< SourceEncoding character type |
542 | | |
543 | | //! Constructor. |
544 | | /*! \param stackAllocator Optional allocator for allocating stack memory. (Only use for non-destructive parsing) |
545 | | \param stackCapacity stack capacity in bytes for storing a single decoded string. (Only use for non-destructive parsing) |
546 | | */ |
547 | | GenericReader(StackAllocator* stackAllocator = 0, size_t stackCapacity = kDefaultStackCapacity) : |
548 | | stack_(stackAllocator, stackCapacity), parseResult_(), state_(IterativeParsingStartState) {} |
549 | | |
550 | | //! Parse JSON text. |
551 | | /*! \tparam parseFlags Combination of \ref ParseFlag. |
552 | | \tparam InputStream Type of input stream, implementing Stream concept. |
553 | | \tparam Handler Type of handler, implementing Handler concept. |
554 | | \param is Input stream to be parsed. |
555 | | \param handler The handler to receive events. |
556 | | \return Whether the parsing is successful. |
557 | | */ |
558 | | template <unsigned parseFlags, typename InputStream, typename Handler> |
559 | | ParseResult Parse(InputStream& is, Handler& handler) { |
560 | | if (parseFlags & kParseIterativeFlag) |
561 | | return IterativeParse<parseFlags>(is, handler); |
562 | | |
563 | | parseResult_.Clear(); |
564 | | |
565 | | ClearStackOnExit scope(*this); |
566 | | |
567 | | SkipWhitespaceAndComments<parseFlags>(is); |
568 | | RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); |
569 | | |
570 | | if (RAPIDJSON_UNLIKELY(is.Peek() == '\0')) { |
571 | | RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentEmpty, is.Tell()); |
572 | | RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); |
573 | | } |
574 | | else { |
575 | | ParseValue<parseFlags>(is, handler); |
576 | | RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); |
577 | | |
578 | | if (!(parseFlags & kParseStopWhenDoneFlag)) { |
579 | | SkipWhitespaceAndComments<parseFlags>(is); |
580 | | RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); |
581 | | |
582 | | if (RAPIDJSON_UNLIKELY(is.Peek() != '\0')) { |
583 | | RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentRootNotSingular, is.Tell()); |
584 | | RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); |
585 | | } |
586 | | } |
587 | | } |
588 | | |
589 | | return parseResult_; |
590 | | } |
591 | | |
592 | | //! Parse JSON text (with \ref kParseDefaultFlags) |
593 | | /*! \tparam InputStream Type of input stream, implementing Stream concept |
594 | | \tparam Handler Type of handler, implementing Handler concept. |
595 | | \param is Input stream to be parsed. |
596 | | \param handler The handler to receive events. |
597 | | \return Whether the parsing is successful. |
598 | | */ |
599 | | template <typename InputStream, typename Handler> |
600 | | ParseResult Parse(InputStream& is, Handler& handler) { |
601 | | return Parse<kParseDefaultFlags>(is, handler); |
602 | | } |
603 | | |
604 | | //! Initialize JSON text token-by-token parsing |
605 | | /*! |
606 | | */ |
607 | | void IterativeParseInit() { |
608 | | parseResult_.Clear(); |
609 | | state_ = IterativeParsingStartState; |
610 | | } |
611 | | |
612 | | //! Parse one token from JSON text |
613 | | /*! \tparam InputStream Type of input stream, implementing Stream concept |
614 | | \tparam Handler Type of handler, implementing Handler concept. |
615 | | \param is Input stream to be parsed. |
616 | | \param handler The handler to receive events. |
617 | | \return Whether the parsing is successful. |
618 | | */ |
619 | | template <unsigned parseFlags, typename InputStream, typename Handler> |
620 | | bool IterativeParseNext(InputStream& is, Handler& handler) { |
621 | | while (RAPIDJSON_LIKELY(is.Peek() != '\0')) { |
622 | | SkipWhitespaceAndComments<parseFlags>(is); |
623 | | |
624 | | Token t = Tokenize(is.Peek()); |
625 | | IterativeParsingState n = Predict(state_, t); |
626 | | IterativeParsingState d = Transit<parseFlags>(state_, t, n, is, handler); |
627 | | |
628 | | // If we've finished or hit an error... |
629 | | if (RAPIDJSON_UNLIKELY(IsIterativeParsingCompleteState(d))) { |
630 | | // Report errors. |
631 | | if (d == IterativeParsingErrorState) { |
632 | | HandleError(state_, is); |
633 | | return false; |
634 | | } |
635 | | |
636 | | // Transition to the finish state. |
637 | | RAPIDJSON_ASSERT(d == IterativeParsingFinishState); |
638 | | state_ = d; |
639 | | |
640 | | // If StopWhenDone is not set... |
641 | | if (!(parseFlags & kParseStopWhenDoneFlag)) { |
642 | | // ... and extra non-whitespace data is found... |
643 | | SkipWhitespaceAndComments<parseFlags>(is); |
644 | | if (is.Peek() != '\0') { |
645 | | // ... this is considered an error. |
646 | | HandleError(state_, is); |
647 | | return false; |
648 | | } |
649 | | } |
650 | | |
651 | | // Success! We are done! |
652 | | return true; |
653 | | } |
654 | | |
655 | | // Transition to the new state. |
656 | | state_ = d; |
657 | | |
658 | | // If we parsed anything other than a delimiter, we invoked the handler, so we can return true now. |
659 | | if (!IsIterativeParsingDelimiterState(n)) |
660 | | return true; |
661 | | } |
662 | | |
663 | | // We reached the end of file. |
664 | | stack_.Clear(); |
665 | | |
666 | | if (state_ != IterativeParsingFinishState) { |
667 | | HandleError(state_, is); |
668 | | return false; |
669 | | } |
670 | | |
671 | | return true; |
672 | | } |
673 | | |
674 | | //! Check if token-by-token parsing JSON text is complete |
675 | | /*! \return Whether the JSON has been fully decoded. |
676 | | */ |
677 | | RAPIDJSON_FORCEINLINE bool IterativeParseComplete() const { |
678 | | return IsIterativeParsingCompleteState(state_); |
679 | | } |
680 | | |
681 | | //! Whether a parse error has occurred in the last parsing. |
682 | | bool HasParseError() const { return parseResult_.IsError(); } |
683 | | |
684 | | //! Get the \ref ParseErrorCode of last parsing. |
685 | | ParseErrorCode GetParseErrorCode() const { return parseResult_.Code(); } |
686 | | |
687 | | //! Get the position of last parsing error in input, 0 otherwise. |
688 | | size_t GetErrorOffset() const { return parseResult_.Offset(); } |
689 | | |
690 | | protected: |
691 | | void SetParseError(ParseErrorCode code, size_t offset) { parseResult_.Set(code, offset); } |
692 | | |
693 | | private: |
694 | | // Prohibit copy constructor & assignment operator. |
695 | | GenericReader(const GenericReader&); |
696 | | GenericReader& operator=(const GenericReader&); |
697 | | |
698 | | void ClearStack() { stack_.Clear(); } |
699 | | |
700 | | // clear stack on any exit from ParseStream, e.g. due to exception |
701 | | struct ClearStackOnExit { |
702 | | explicit ClearStackOnExit(GenericReader& r) : r_(r) {} |
703 | | ~ClearStackOnExit() { r_.ClearStack(); } |
704 | | private: |
705 | | GenericReader& r_; |
706 | | ClearStackOnExit(const ClearStackOnExit&); |
707 | | ClearStackOnExit& operator=(const ClearStackOnExit&); |
708 | | }; |
709 | | |
710 | | template<unsigned parseFlags, typename InputStream> |
711 | | void SkipWhitespaceAndComments(InputStream& is) { |
712 | | SkipWhitespace(is); |
713 | | |
714 | | if (parseFlags & kParseCommentsFlag) { |
715 | | while (RAPIDJSON_UNLIKELY(Consume(is, '/'))) { |
716 | | if (Consume(is, '*')) { |
717 | | while (true) { |
718 | | if (RAPIDJSON_UNLIKELY(is.Peek() == '\0')) |
719 | | RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell()); |
720 | | else if (Consume(is, '*')) { |
721 | | if (Consume(is, '/')) |
722 | | break; |
723 | | } |
724 | | else |
725 | | is.Take(); |
726 | | } |
727 | | } |
728 | | else if (RAPIDJSON_LIKELY(Consume(is, '/'))) |
729 | | while (is.Peek() != '\0' && is.Take() != '\n') {} |
730 | | else |
731 | | RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell()); |
732 | | |
733 | | SkipWhitespace(is); |
734 | | } |
735 | | } |
736 | | } |
737 | | |
738 | | // Parse object: { string : value, ... } |
739 | | template<unsigned parseFlags, typename InputStream, typename Handler> |
740 | | void ParseObject(InputStream& is, Handler& handler) { |
741 | | RAPIDJSON_ASSERT(is.Peek() == '{'); |
742 | | is.Take(); // Skip '{' |
743 | | |
744 | | if (RAPIDJSON_UNLIKELY(!handler.StartObject())) |
745 | | RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); |
746 | | |
747 | | SkipWhitespaceAndComments<parseFlags>(is); |
748 | | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
749 | | |
750 | | if (Consume(is, '}')) { |
751 | | if (RAPIDJSON_UNLIKELY(!handler.EndObject(0))) // empty object |
752 | | RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); |
753 | | return; |
754 | | } |
755 | | |
756 | | for (SizeType memberCount = 0;;) { |
757 | | if (RAPIDJSON_UNLIKELY(is.Peek() != '"')) |
758 | | RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell()); |
759 | | |
760 | | ParseString<parseFlags>(is, handler, true); |
761 | | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
762 | | |
763 | | SkipWhitespaceAndComments<parseFlags>(is); |
764 | | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
765 | | |
766 | | if (RAPIDJSON_UNLIKELY(!Consume(is, ':'))) |
767 | | RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell()); |
768 | | |
769 | | SkipWhitespaceAndComments<parseFlags>(is); |
770 | | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
771 | | |
772 | | ParseValue<parseFlags>(is, handler); |
773 | | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
774 | | |
775 | | SkipWhitespaceAndComments<parseFlags>(is); |
776 | | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
777 | | |
778 | | ++memberCount; |
779 | | |
780 | | switch (is.Peek()) { |
781 | | case ',': |
782 | | is.Take(); |
783 | | SkipWhitespaceAndComments<parseFlags>(is); |
784 | | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
785 | | break; |
786 | | case '}': |
787 | | is.Take(); |
788 | | if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount))) |
789 | | RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); |
790 | | return; |
791 | | default: |
792 | | RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); break; // This useless break is only for making warning and coverage happy |
793 | | } |
794 | | |
795 | | if (parseFlags & kParseTrailingCommasFlag) { |
796 | | if (is.Peek() == '}') { |
797 | | if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount))) |
798 | | RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); |
799 | | is.Take(); |
800 | | return; |
801 | | } |
802 | | } |
803 | | } |
804 | | } |
805 | | |
806 | | // Parse array: [ value, ... ] |
807 | | template<unsigned parseFlags, typename InputStream, typename Handler> |
808 | | void ParseArray(InputStream& is, Handler& handler) { |
809 | | RAPIDJSON_ASSERT(is.Peek() == '['); |
810 | | is.Take(); // Skip '[' |
811 | | |
812 | | if (RAPIDJSON_UNLIKELY(!handler.StartArray())) |
813 | | RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); |
814 | | |
815 | | SkipWhitespaceAndComments<parseFlags>(is); |
816 | | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
817 | | |
818 | | if (Consume(is, ']')) { |
819 | | if (RAPIDJSON_UNLIKELY(!handler.EndArray(0))) // empty array |
820 | | RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); |
821 | | return; |
822 | | } |
823 | | |
824 | | for (SizeType elementCount = 0;;) { |
825 | | ParseValue<parseFlags>(is, handler); |
826 | | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
827 | | |
828 | | ++elementCount; |
829 | | SkipWhitespaceAndComments<parseFlags>(is); |
830 | | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
831 | | |
832 | | if (Consume(is, ',')) { |
833 | | SkipWhitespaceAndComments<parseFlags>(is); |
834 | | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
835 | | } |
836 | | else if (Consume(is, ']')) { |
837 | | if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount))) |
838 | | RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); |
839 | | return; |
840 | | } |
841 | | else |
842 | | RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell()); |
843 | | |
844 | | if (parseFlags & kParseTrailingCommasFlag) { |
845 | | if (is.Peek() == ']') { |
846 | | if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount))) |
847 | | RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); |
848 | | is.Take(); |
849 | | return; |
850 | | } |
851 | | } |
852 | | } |
853 | | } |
854 | | |
855 | | template<unsigned parseFlags, typename InputStream, typename Handler> |
856 | | void ParseNull(InputStream& is, Handler& handler) { |
857 | | RAPIDJSON_ASSERT(is.Peek() == 'n'); |
858 | | is.Take(); |
859 | | |
860 | | if (RAPIDJSON_LIKELY(Consume(is, 'u') && Consume(is, 'l') && Consume(is, 'l'))) { |
861 | | if (RAPIDJSON_UNLIKELY(!handler.Null())) |
862 | | RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); |
863 | | } |
864 | | else |
865 | | RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell()); |
866 | | } |
867 | | |
868 | | template<unsigned parseFlags, typename InputStream, typename Handler> |
869 | | void ParseTrue(InputStream& is, Handler& handler) { |
870 | | RAPIDJSON_ASSERT(is.Peek() == 't'); |
871 | | is.Take(); |
872 | | |
873 | | if (RAPIDJSON_LIKELY(Consume(is, 'r') && Consume(is, 'u') && Consume(is, 'e'))) { |
874 | | if (RAPIDJSON_UNLIKELY(!handler.Bool(true))) |
875 | | RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); |
876 | | } |
877 | | else |
878 | | RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell()); |
879 | | } |
880 | | |
881 | | template<unsigned parseFlags, typename InputStream, typename Handler> |
882 | | void ParseFalse(InputStream& is, Handler& handler) { |
883 | | RAPIDJSON_ASSERT(is.Peek() == 'f'); |
884 | | is.Take(); |
885 | | |
886 | | if (RAPIDJSON_LIKELY(Consume(is, 'a') && Consume(is, 'l') && Consume(is, 's') && Consume(is, 'e'))) { |
887 | | if (RAPIDJSON_UNLIKELY(!handler.Bool(false))) |
888 | | RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); |
889 | | } |
890 | | else |
891 | | RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell()); |
892 | | } |
893 | | |
894 | | template<typename InputStream> |
895 | | RAPIDJSON_FORCEINLINE static bool Consume(InputStream& is, typename InputStream::Ch expect) { |
896 | | if (RAPIDJSON_LIKELY(is.Peek() == expect)) { |
897 | | is.Take(); |
898 | | return true; |
899 | | } |
900 | | else |
901 | | return false; |
902 | | } |
903 | | |
904 | | // Helper function to parse four hexadecimal digits in \uXXXX in ParseString(). |
905 | | template<typename InputStream> |
906 | | unsigned ParseHex4(InputStream& is, size_t escapeOffset) { |
907 | | unsigned codepoint = 0; |
908 | | for (int i = 0; i < 4; i++) { |
909 | | Ch c = is.Peek(); |
910 | | codepoint <<= 4; |
911 | | codepoint += static_cast<unsigned>(c); |
912 | | if (c >= '0' && c <= '9') |
913 | | codepoint -= '0'; |
914 | | else if (c >= 'A' && c <= 'F') |
915 | | codepoint -= 'A' - 10; |
916 | | else if (c >= 'a' && c <= 'f') |
917 | | codepoint -= 'a' - 10; |
918 | | else { |
919 | | RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorStringUnicodeEscapeInvalidHex, escapeOffset); |
920 | | RAPIDJSON_PARSE_ERROR_EARLY_RETURN(0); |
921 | | } |
922 | | is.Take(); |
923 | | } |
924 | | return codepoint; |
925 | | } |
926 | | |
927 | | template <typename CharType> |
928 | | class StackStream { |
929 | | public: |
930 | | typedef CharType Ch; |
931 | | |
932 | | StackStream(internal::Stack<StackAllocator>& stack) : stack_(stack), length_(0) {} |
933 | | RAPIDJSON_FORCEINLINE void Put(Ch c) { |
934 | | *stack_.template Push<Ch>() = c; |
935 | | ++length_; |
936 | | } |
937 | | |
938 | | RAPIDJSON_FORCEINLINE void* Push(SizeType count) { |
939 | | length_ += count; |
940 | | return stack_.template Push<Ch>(count); |
941 | | } |
942 | | |
943 | | size_t Length() const { return length_; } |
944 | | |
945 | | Ch* Pop() { |
946 | | return stack_.template Pop<Ch>(length_); |
947 | | } |
948 | | |
949 | | private: |
950 | | StackStream(const StackStream&); |
951 | | StackStream& operator=(const StackStream&); |
952 | | |
953 | | internal::Stack<StackAllocator>& stack_; |
954 | | SizeType length_; |
955 | | }; |
956 | | |
957 | | // Parse string and generate String event. Different code paths for kParseInsituFlag. |
958 | | template<unsigned parseFlags, typename InputStream, typename Handler> |
959 | | void ParseString(InputStream& is, Handler& handler, bool isKey = false) { |
960 | | internal::StreamLocalCopy<InputStream> copy(is); |
961 | | InputStream& s(copy.s); |
962 | | |
963 | | RAPIDJSON_ASSERT(s.Peek() == '\"'); |
964 | | s.Take(); // Skip '\"' |
965 | | |
966 | | bool success = false; |
967 | | if (parseFlags & kParseInsituFlag) { |
968 | | typename InputStream::Ch *head = s.PutBegin(); |
969 | | ParseStringToStream<parseFlags, SourceEncoding, SourceEncoding>(s, s); |
970 | | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
971 | | size_t length = s.PutEnd(head) - 1; |
972 | | RAPIDJSON_ASSERT(length <= 0xFFFFFFFF); |
973 | | const typename TargetEncoding::Ch* const str = reinterpret_cast<typename TargetEncoding::Ch*>(head); |
974 | | success = (isKey ? handler.Key(str, SizeType(length), false) : handler.String(str, SizeType(length), false)); |
975 | | } |
976 | | else { |
977 | | StackStream<typename TargetEncoding::Ch> stackStream(stack_); |
978 | | ParseStringToStream<parseFlags, SourceEncoding, TargetEncoding>(s, stackStream); |
979 | | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
980 | | SizeType length = static_cast<SizeType>(stackStream.Length()) - 1; |
981 | | const typename TargetEncoding::Ch* const str = stackStream.Pop(); |
982 | | success = (isKey ? handler.Key(str, length, true) : handler.String(str, length, true)); |
983 | | } |
984 | | if (RAPIDJSON_UNLIKELY(!success)) |
985 | | RAPIDJSON_PARSE_ERROR(kParseErrorTermination, s.Tell()); |
986 | | } |
987 | | |
988 | | // Parse string to an output is |
989 | | // This function handles the prefix/suffix double quotes, escaping, and optional encoding validation. |
990 | | template<unsigned parseFlags, typename SEncoding, typename TEncoding, typename InputStream, typename OutputStream> |
991 | | RAPIDJSON_FORCEINLINE void ParseStringToStream(InputStream& is, OutputStream& os) { |
992 | | //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN |
993 | | #define Z16 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 |
994 | | static const char escape[256] = { |
995 | | Z16, Z16, 0, 0,'\"', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '/', |
996 | | Z16, Z16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'\\', 0, 0, 0, |
997 | | 0, 0,'\b', 0, 0, 0,'\f', 0, 0, 0, 0, 0, 0, 0,'\n', 0, |
998 | | 0, 0,'\r', 0,'\t', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
999 | | Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16 |
1000 | | }; |
1001 | | #undef Z16 |
1002 | | //!@endcond |
1003 | | |
1004 | | for (;;) { |
1005 | | // Scan and copy string before "\\\"" or < 0x20. This is an optional optimzation. |
1006 | | if (!(parseFlags & kParseValidateEncodingFlag)) |
1007 | | ScanCopyUnescapedString(is, os); |
1008 | | |
1009 | | Ch c = is.Peek(); |
1010 | | if (RAPIDJSON_UNLIKELY(c == '\\')) { // Escape |
1011 | | size_t escapeOffset = is.Tell(); // For invalid escaping, report the initial '\\' as error offset |
1012 | | is.Take(); |
1013 | | Ch e = is.Peek(); |
1014 | | if ((sizeof(Ch) == 1 || unsigned(e) < 256) && RAPIDJSON_LIKELY(escape[static_cast<unsigned char>(e)])) { |
1015 | | is.Take(); |
1016 | | os.Put(static_cast<typename TEncoding::Ch>(escape[static_cast<unsigned char>(e)])); |
1017 | | } |
1018 | | else if ((parseFlags & kParseEscapedApostropheFlag) && RAPIDJSON_LIKELY(e == '\'')) { // Allow escaped apostrophe |
1019 | | is.Take(); |
1020 | | os.Put('\''); |
1021 | | } |
1022 | | else if (RAPIDJSON_LIKELY(e == 'u')) { // Unicode |
1023 | | is.Take(); |
1024 | | unsigned codepoint = ParseHex4(is, escapeOffset); |
1025 | | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
1026 | | if (RAPIDJSON_UNLIKELY(codepoint >= 0xD800 && codepoint <= 0xDFFF)) { |
1027 | | // high surrogate, check if followed by valid low surrogate |
1028 | | if (RAPIDJSON_LIKELY(codepoint <= 0xDBFF)) { |
1029 | | // Handle UTF-16 surrogate pair |
1030 | | if (RAPIDJSON_UNLIKELY(!Consume(is, '\\') || !Consume(is, 'u'))) |
1031 | | RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset); |
1032 | | unsigned codepoint2 = ParseHex4(is, escapeOffset); |
1033 | | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
1034 | | if (RAPIDJSON_UNLIKELY(codepoint2 < 0xDC00 || codepoint2 > 0xDFFF)) |
1035 | | RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset); |
1036 | | codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000; |
1037 | | } |
1038 | | // single low surrogate |
1039 | | else |
1040 | | { |
1041 | | RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset); |
1042 | | } |
1043 | | } |
1044 | | TEncoding::Encode(os, codepoint); |
1045 | | } |
1046 | | else |
1047 | | RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, escapeOffset); |
1048 | | } |
1049 | | else if (RAPIDJSON_UNLIKELY(c == '"')) { // Closing double quote |
1050 | | is.Take(); |
1051 | | os.Put('\0'); // null-terminate the string |
1052 | | return; |
1053 | | } |
1054 | | else if (RAPIDJSON_UNLIKELY(static_cast<unsigned>(c) < 0x20)) { // RFC 4627: unescaped = %x20-21 / %x23-5B / %x5D-10FFFF |
1055 | | if (c == '\0') |
1056 | | RAPIDJSON_PARSE_ERROR(kParseErrorStringMissQuotationMark, is.Tell()); |
1057 | | else |
1058 | | RAPIDJSON_PARSE_ERROR(kParseErrorStringInvalidEncoding, is.Tell()); |
1059 | | } |
1060 | | else { |
1061 | | size_t offset = is.Tell(); |
1062 | | if (RAPIDJSON_UNLIKELY((parseFlags & kParseValidateEncodingFlag ? |
1063 | | !Transcoder<SEncoding, TEncoding>::Validate(is, os) : |
1064 | | !Transcoder<SEncoding, TEncoding>::Transcode(is, os)))) |
1065 | | RAPIDJSON_PARSE_ERROR(kParseErrorStringInvalidEncoding, offset); |
1066 | | } |
1067 | | } |
1068 | | } |
1069 | | |
1070 | | template<typename InputStream, typename OutputStream> |
1071 | | static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InputStream&, OutputStream&) { |
1072 | | // Do nothing for generic version |
1073 | | } |
1074 | | |
1075 | | #if defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42) |
1076 | | // StringStream -> StackStream<char> |
1077 | | static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(StringStream& is, StackStream<char>& os) { |
1078 | | const char* p = is.src_; |
1079 | | |
1080 | | // Scan one by one until alignment (unaligned load may cross page boundary and cause crash) |
1081 | | const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15)); |
1082 | | while (p != nextAligned) |
1083 | | if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) { |
1084 | | is.src_ = p; |
1085 | | return; |
1086 | | } |
1087 | | else |
1088 | | os.Put(*p++); |
1089 | | |
1090 | | // The rest of string using SIMD |
1091 | | static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' }; |
1092 | | static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' }; |
1093 | | static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F }; |
1094 | | const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0])); |
1095 | | const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0])); |
1096 | | const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0])); |
1097 | | |
1098 | | for (;; p += 16) { |
1099 | | const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p)); |
1100 | | const __m128i t1 = _mm_cmpeq_epi8(s, dq); |
1101 | | const __m128i t2 = _mm_cmpeq_epi8(s, bs); |
1102 | | const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F |
1103 | | const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3); |
1104 | | unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x)); |
1105 | | if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped |
1106 | | SizeType length; |
1107 | | #ifdef _MSC_VER // Find the index of first escaped |
1108 | | unsigned long offset; |
1109 | | _BitScanForward(&offset, r); |
1110 | | length = offset; |
1111 | | #else |
1112 | | length = static_cast<SizeType>(__builtin_ffs(r) - 1); |
1113 | | #endif |
1114 | | if (length != 0) { |
1115 | | char* q = reinterpret_cast<char*>(os.Push(length)); |
1116 | | for (size_t i = 0; i < length; i++) |
1117 | | q[i] = p[i]; |
1118 | | |
1119 | | p += length; |
1120 | | } |
1121 | | break; |
1122 | | } |
1123 | | _mm_storeu_si128(reinterpret_cast<__m128i *>(os.Push(16)), s); |
1124 | | } |
1125 | | |
1126 | | is.src_ = p; |
1127 | | } |
1128 | | |
1129 | | // InsituStringStream -> InsituStringStream |
1130 | | static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InsituStringStream& is, InsituStringStream& os) { |
1131 | | RAPIDJSON_ASSERT(&is == &os); |
1132 | | (void)os; |
1133 | | |
1134 | | if (is.src_ == is.dst_) { |
1135 | | SkipUnescapedString(is); |
1136 | | return; |
1137 | | } |
1138 | | |
1139 | | char* p = is.src_; |
1140 | | char *q = is.dst_; |
1141 | | |
1142 | | // Scan one by one until alignment (unaligned load may cross page boundary and cause crash) |
1143 | | const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15)); |
1144 | | while (p != nextAligned) |
1145 | | if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) { |
1146 | | is.src_ = p; |
1147 | | is.dst_ = q; |
1148 | | return; |
1149 | | } |
1150 | | else |
1151 | | *q++ = *p++; |
1152 | | |
1153 | | // The rest of string using SIMD |
1154 | | static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' }; |
1155 | | static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' }; |
1156 | | static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F }; |
1157 | | const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0])); |
1158 | | const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0])); |
1159 | | const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0])); |
1160 | | |
1161 | | for (;; p += 16, q += 16) { |
1162 | | const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p)); |
1163 | | const __m128i t1 = _mm_cmpeq_epi8(s, dq); |
1164 | | const __m128i t2 = _mm_cmpeq_epi8(s, bs); |
1165 | | const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F |
1166 | | const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3); |
1167 | | unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x)); |
1168 | | if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped |
1169 | | size_t length; |
1170 | | #ifdef _MSC_VER // Find the index of first escaped |
1171 | | unsigned long offset; |
1172 | | _BitScanForward(&offset, r); |
1173 | | length = offset; |
1174 | | #else |
1175 | | length = static_cast<size_t>(__builtin_ffs(r) - 1); |
1176 | | #endif |
1177 | | for (const char* pend = p + length; p != pend; ) |
1178 | | *q++ = *p++; |
1179 | | break; |
1180 | | } |
1181 | | _mm_storeu_si128(reinterpret_cast<__m128i *>(q), s); |
1182 | | } |
1183 | | |
1184 | | is.src_ = p; |
1185 | | is.dst_ = q; |
1186 | | } |
1187 | | |
1188 | | // When read/write pointers are the same for insitu stream, just skip unescaped characters |
1189 | | static RAPIDJSON_FORCEINLINE void SkipUnescapedString(InsituStringStream& is) { |
1190 | | RAPIDJSON_ASSERT(is.src_ == is.dst_); |
1191 | | char* p = is.src_; |
1192 | | |
1193 | | // Scan one by one until alignment (unaligned load may cross page boundary and cause crash) |
1194 | | const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15)); |
1195 | | for (; p != nextAligned; p++) |
1196 | | if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) { |
1197 | | is.src_ = is.dst_ = p; |
1198 | | return; |
1199 | | } |
1200 | | |
1201 | | // The rest of string using SIMD |
1202 | | static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' }; |
1203 | | static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' }; |
1204 | | static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F }; |
1205 | | const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0])); |
1206 | | const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0])); |
1207 | | const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0])); |
1208 | | |
1209 | | for (;; p += 16) { |
1210 | | const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p)); |
1211 | | const __m128i t1 = _mm_cmpeq_epi8(s, dq); |
1212 | | const __m128i t2 = _mm_cmpeq_epi8(s, bs); |
1213 | | const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F |
1214 | | const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3); |
1215 | | unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x)); |
1216 | | if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped |
1217 | | size_t length; |
1218 | | #ifdef _MSC_VER // Find the index of first escaped |
1219 | | unsigned long offset; |
1220 | | _BitScanForward(&offset, r); |
1221 | | length = offset; |
1222 | | #else |
1223 | | length = static_cast<size_t>(__builtin_ffs(r) - 1); |
1224 | | #endif |
1225 | | p += length; |
1226 | | break; |
1227 | | } |
1228 | | } |
1229 | | |
1230 | | is.src_ = is.dst_ = p; |
1231 | | } |
1232 | | #elif defined(RAPIDJSON_NEON) |
1233 | | // StringStream -> StackStream<char> |
1234 | | static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(StringStream& is, StackStream<char>& os) { |
1235 | | const char* p = is.src_; |
1236 | | |
1237 | | // Scan one by one until alignment (unaligned load may cross page boundary and cause crash) |
1238 | | const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15)); |
1239 | | while (p != nextAligned) |
1240 | | if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) { |
1241 | | is.src_ = p; |
1242 | | return; |
1243 | | } |
1244 | | else |
1245 | | os.Put(*p++); |
1246 | | |
1247 | | // The rest of string using SIMD |
1248 | | const uint8x16_t s0 = vmovq_n_u8('"'); |
1249 | | const uint8x16_t s1 = vmovq_n_u8('\\'); |
1250 | | const uint8x16_t s2 = vmovq_n_u8('\b'); |
1251 | | const uint8x16_t s3 = vmovq_n_u8(32); |
1252 | | |
1253 | | for (;; p += 16) { |
1254 | | const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p)); |
1255 | | uint8x16_t x = vceqq_u8(s, s0); |
1256 | | x = vorrq_u8(x, vceqq_u8(s, s1)); |
1257 | | x = vorrq_u8(x, vceqq_u8(s, s2)); |
1258 | | x = vorrq_u8(x, vcltq_u8(s, s3)); |
1259 | | |
1260 | | x = vrev64q_u8(x); // Rev in 64 |
1261 | | uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract |
1262 | | uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract |
1263 | | |
1264 | | SizeType length = 0; |
1265 | | bool escaped = false; |
1266 | | if (low == 0) { |
1267 | | if (high != 0) { |
1268 | | uint32_t lz = internal::clzll(high); |
1269 | | length = 8 + (lz >> 3); |
1270 | | escaped = true; |
1271 | | } |
1272 | | } else { |
1273 | | uint32_t lz = internal::clzll(low); |
1274 | | length = lz >> 3; |
1275 | | escaped = true; |
1276 | | } |
1277 | | if (RAPIDJSON_UNLIKELY(escaped)) { // some of characters is escaped |
1278 | | if (length != 0) { |
1279 | | char* q = reinterpret_cast<char*>(os.Push(length)); |
1280 | | for (size_t i = 0; i < length; i++) |
1281 | | q[i] = p[i]; |
1282 | | |
1283 | | p += length; |
1284 | | } |
1285 | | break; |
1286 | | } |
1287 | | vst1q_u8(reinterpret_cast<uint8_t *>(os.Push(16)), s); |
1288 | | } |
1289 | | |
1290 | | is.src_ = p; |
1291 | | } |
1292 | | |
1293 | | // InsituStringStream -> InsituStringStream |
1294 | | static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InsituStringStream& is, InsituStringStream& os) { |
1295 | | RAPIDJSON_ASSERT(&is == &os); |
1296 | | (void)os; |
1297 | | |
1298 | | if (is.src_ == is.dst_) { |
1299 | | SkipUnescapedString(is); |
1300 | | return; |
1301 | | } |
1302 | | |
1303 | | char* p = is.src_; |
1304 | | char *q = is.dst_; |
1305 | | |
1306 | | // Scan one by one until alignment (unaligned load may cross page boundary and cause crash) |
1307 | | const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15)); |
1308 | | while (p != nextAligned) |
1309 | | if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) { |
1310 | | is.src_ = p; |
1311 | | is.dst_ = q; |
1312 | | return; |
1313 | | } |
1314 | | else |
1315 | | *q++ = *p++; |
1316 | | |
1317 | | // The rest of string using SIMD |
1318 | | const uint8x16_t s0 = vmovq_n_u8('"'); |
1319 | | const uint8x16_t s1 = vmovq_n_u8('\\'); |
1320 | | const uint8x16_t s2 = vmovq_n_u8('\b'); |
1321 | | const uint8x16_t s3 = vmovq_n_u8(32); |
1322 | | |
1323 | | for (;; p += 16, q += 16) { |
1324 | | const uint8x16_t s = vld1q_u8(reinterpret_cast<uint8_t *>(p)); |
1325 | | uint8x16_t x = vceqq_u8(s, s0); |
1326 | | x = vorrq_u8(x, vceqq_u8(s, s1)); |
1327 | | x = vorrq_u8(x, vceqq_u8(s, s2)); |
1328 | | x = vorrq_u8(x, vcltq_u8(s, s3)); |
1329 | | |
1330 | | x = vrev64q_u8(x); // Rev in 64 |
1331 | | uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract |
1332 | | uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract |
1333 | | |
1334 | | SizeType length = 0; |
1335 | | bool escaped = false; |
1336 | | if (low == 0) { |
1337 | | if (high != 0) { |
1338 | | uint32_t lz = internal::clzll(high); |
1339 | | length = 8 + (lz >> 3); |
1340 | | escaped = true; |
1341 | | } |
1342 | | } else { |
1343 | | uint32_t lz = internal::clzll(low); |
1344 | | length = lz >> 3; |
1345 | | escaped = true; |
1346 | | } |
1347 | | if (RAPIDJSON_UNLIKELY(escaped)) { // some of characters is escaped |
1348 | | for (const char* pend = p + length; p != pend; ) { |
1349 | | *q++ = *p++; |
1350 | | } |
1351 | | break; |
1352 | | } |
1353 | | vst1q_u8(reinterpret_cast<uint8_t *>(q), s); |
1354 | | } |
1355 | | |
1356 | | is.src_ = p; |
1357 | | is.dst_ = q; |
1358 | | } |
1359 | | |
1360 | | // When read/write pointers are the same for insitu stream, just skip unescaped characters |
1361 | | static RAPIDJSON_FORCEINLINE void SkipUnescapedString(InsituStringStream& is) { |
1362 | | RAPIDJSON_ASSERT(is.src_ == is.dst_); |
1363 | | char* p = is.src_; |
1364 | | |
1365 | | // Scan one by one until alignment (unaligned load may cross page boundary and cause crash) |
1366 | | const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15)); |
1367 | | for (; p != nextAligned; p++) |
1368 | | if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) { |
1369 | | is.src_ = is.dst_ = p; |
1370 | | return; |
1371 | | } |
1372 | | |
1373 | | // The rest of string using SIMD |
1374 | | const uint8x16_t s0 = vmovq_n_u8('"'); |
1375 | | const uint8x16_t s1 = vmovq_n_u8('\\'); |
1376 | | const uint8x16_t s2 = vmovq_n_u8('\b'); |
1377 | | const uint8x16_t s3 = vmovq_n_u8(32); |
1378 | | |
1379 | | for (;; p += 16) { |
1380 | | const uint8x16_t s = vld1q_u8(reinterpret_cast<uint8_t *>(p)); |
1381 | | uint8x16_t x = vceqq_u8(s, s0); |
1382 | | x = vorrq_u8(x, vceqq_u8(s, s1)); |
1383 | | x = vorrq_u8(x, vceqq_u8(s, s2)); |
1384 | | x = vorrq_u8(x, vcltq_u8(s, s3)); |
1385 | | |
1386 | | x = vrev64q_u8(x); // Rev in 64 |
1387 | | uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract |
1388 | | uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract |
1389 | | |
1390 | | if (low == 0) { |
1391 | | if (high != 0) { |
1392 | | uint32_t lz = internal::clzll(high); |
1393 | | p += 8 + (lz >> 3); |
1394 | | break; |
1395 | | } |
1396 | | } else { |
1397 | | uint32_t lz = internal::clzll(low); |
1398 | | p += lz >> 3; |
1399 | | break; |
1400 | | } |
1401 | | } |
1402 | | |
1403 | | is.src_ = is.dst_ = p; |
1404 | | } |
1405 | | #endif // RAPIDJSON_NEON |
1406 | | |
1407 | | template<typename InputStream, typename StackCharacter, bool backup, bool pushOnTake> |
1408 | | class NumberStream; |
1409 | | |
1410 | | template<typename InputStream, typename StackCharacter> |
1411 | | class NumberStream<InputStream, StackCharacter, false, false> { |
1412 | | public: |
1413 | | typedef typename InputStream::Ch Ch; |
1414 | | |
1415 | | NumberStream(GenericReader& reader, InputStream& s) : is(s) { (void)reader; } |
1416 | | |
1417 | | RAPIDJSON_FORCEINLINE Ch Peek() const { return is.Peek(); } |
1418 | | RAPIDJSON_FORCEINLINE Ch TakePush() { return is.Take(); } |
1419 | | RAPIDJSON_FORCEINLINE Ch Take() { return is.Take(); } |
1420 | | RAPIDJSON_FORCEINLINE void Push(char) {} |
1421 | | |
1422 | | size_t Tell() { return is.Tell(); } |
1423 | | size_t Length() { return 0; } |
1424 | | const StackCharacter* Pop() { return 0; } |
1425 | | |
1426 | | protected: |
1427 | | NumberStream& operator=(const NumberStream&); |
1428 | | |
1429 | | InputStream& is; |
1430 | | }; |
1431 | | |
1432 | | template<typename InputStream, typename StackCharacter> |
1433 | | class NumberStream<InputStream, StackCharacter, true, false> : public NumberStream<InputStream, StackCharacter, false, false> { |
1434 | | typedef NumberStream<InputStream, StackCharacter, false, false> Base; |
1435 | | public: |
1436 | | NumberStream(GenericReader& reader, InputStream& s) : Base(reader, s), stackStream(reader.stack_) {} |
1437 | | |
1438 | | RAPIDJSON_FORCEINLINE Ch TakePush() { |
1439 | | stackStream.Put(static_cast<StackCharacter>(Base::is.Peek())); |
1440 | | return Base::is.Take(); |
1441 | | } |
1442 | | |
1443 | | RAPIDJSON_FORCEINLINE void Push(StackCharacter c) { |
1444 | | stackStream.Put(c); |
1445 | | } |
1446 | | |
1447 | | size_t Length() { return stackStream.Length(); } |
1448 | | |
1449 | | const StackCharacter* Pop() { |
1450 | | stackStream.Put('\0'); |
1451 | | return stackStream.Pop(); |
1452 | | } |
1453 | | |
1454 | | private: |
1455 | | StackStream<StackCharacter> stackStream; |
1456 | | }; |
1457 | | |
1458 | | template<typename InputStream, typename StackCharacter> |
1459 | | class NumberStream<InputStream, StackCharacter, true, true> : public NumberStream<InputStream, StackCharacter, true, false> { |
1460 | | typedef NumberStream<InputStream, StackCharacter, true, false> Base; |
1461 | | public: |
1462 | | NumberStream(GenericReader& reader, InputStream& s) : Base(reader, s) {} |
1463 | | |
1464 | | RAPIDJSON_FORCEINLINE Ch Take() { return Base::TakePush(); } |
1465 | | }; |
1466 | | |
1467 | | template<unsigned parseFlags, typename InputStream, typename Handler> |
1468 | | void ParseNumber(InputStream& is, Handler& handler) { |
1469 | | typedef typename internal::SelectIf<internal::BoolType<(parseFlags & kParseNumbersAsStringsFlag) != 0>, typename TargetEncoding::Ch, char>::Type NumberCharacter; |
1470 | | |
1471 | | internal::StreamLocalCopy<InputStream> copy(is); |
1472 | | NumberStream<InputStream, NumberCharacter, |
1473 | | ((parseFlags & kParseNumbersAsStringsFlag) != 0) ? |
1474 | | ((parseFlags & kParseInsituFlag) == 0) : |
1475 | | ((parseFlags & kParseFullPrecisionFlag) != 0), |
1476 | | (parseFlags & kParseNumbersAsStringsFlag) != 0 && |
1477 | | (parseFlags & kParseInsituFlag) == 0> s(*this, copy.s); |
1478 | | |
1479 | | size_t startOffset = s.Tell(); |
1480 | | double d = 0.0; |
1481 | | bool useNanOrInf = false; |
1482 | | |
1483 | | // Parse minus |
1484 | | bool minus = Consume(s, '-'); |
1485 | | |
1486 | | // Parse int: zero / ( digit1-9 *DIGIT ) |
1487 | | unsigned i = 0; |
1488 | | uint64_t i64 = 0; |
1489 | | bool use64bit = false; |
1490 | | int significandDigit = 0; |
1491 | | if (RAPIDJSON_UNLIKELY(s.Peek() == '0')) { |
1492 | | i = 0; |
1493 | | s.TakePush(); |
1494 | | } |
1495 | | else if (RAPIDJSON_LIKELY(s.Peek() >= '1' && s.Peek() <= '9')) { |
1496 | | i = static_cast<unsigned>(s.TakePush() - '0'); |
1497 | | |
1498 | | if (minus) |
1499 | | while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { |
1500 | | if (RAPIDJSON_UNLIKELY(i >= 214748364)) { // 2^31 = 2147483648 |
1501 | | if (RAPIDJSON_LIKELY(i != 214748364 || s.Peek() > '8')) { |
1502 | | i64 = i; |
1503 | | use64bit = true; |
1504 | | break; |
1505 | | } |
1506 | | } |
1507 | | i = i * 10 + static_cast<unsigned>(s.TakePush() - '0'); |
1508 | | significandDigit++; |
1509 | | } |
1510 | | else |
1511 | | while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { |
1512 | | if (RAPIDJSON_UNLIKELY(i >= 429496729)) { // 2^32 - 1 = 4294967295 |
1513 | | if (RAPIDJSON_LIKELY(i != 429496729 || s.Peek() > '5')) { |
1514 | | i64 = i; |
1515 | | use64bit = true; |
1516 | | break; |
1517 | | } |
1518 | | } |
1519 | | i = i * 10 + static_cast<unsigned>(s.TakePush() - '0'); |
1520 | | significandDigit++; |
1521 | | } |
1522 | | } |
1523 | | // Parse NaN or Infinity here |
1524 | | else if ((parseFlags & kParseNanAndInfFlag) && RAPIDJSON_LIKELY((s.Peek() == 'I' || s.Peek() == 'N'))) { |
1525 | | if (Consume(s, 'N')) { |
1526 | | if (Consume(s, 'a') && Consume(s, 'N')) { |
1527 | | d = std::numeric_limits<double>::quiet_NaN(); |
1528 | | useNanOrInf = true; |
1529 | | } |
1530 | | } |
1531 | | else if (RAPIDJSON_LIKELY(Consume(s, 'I'))) { |
1532 | | if (Consume(s, 'n') && Consume(s, 'f')) { |
1533 | | d = (minus ? -std::numeric_limits<double>::infinity() : std::numeric_limits<double>::infinity()); |
1534 | | useNanOrInf = true; |
1535 | | |
1536 | | if (RAPIDJSON_UNLIKELY(s.Peek() == 'i' && !(Consume(s, 'i') && Consume(s, 'n') |
1537 | | && Consume(s, 'i') && Consume(s, 't') && Consume(s, 'y')))) { |
1538 | | RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell()); |
1539 | | } |
1540 | | } |
1541 | | } |
1542 | | |
1543 | | if (RAPIDJSON_UNLIKELY(!useNanOrInf)) { |
1544 | | RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell()); |
1545 | | } |
1546 | | } |
1547 | | else |
1548 | | RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell()); |
1549 | | |
1550 | | // Parse 64bit int |
1551 | | bool useDouble = false; |
1552 | | if (use64bit) { |
1553 | | if (minus) |
1554 | | while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { |
1555 | | if (RAPIDJSON_UNLIKELY(i64 >= RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC))) // 2^63 = 9223372036854775808 |
1556 | | if (RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC) || s.Peek() > '8')) { |
1557 | | d = static_cast<double>(i64); |
1558 | | useDouble = true; |
1559 | | break; |
1560 | | } |
1561 | | i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0'); |
1562 | | significandDigit++; |
1563 | | } |
1564 | | else |
1565 | | while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { |
1566 | | if (RAPIDJSON_UNLIKELY(i64 >= RAPIDJSON_UINT64_C2(0x19999999, 0x99999999))) // 2^64 - 1 = 18446744073709551615 |
1567 | | if (RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) || s.Peek() > '5')) { |
1568 | | d = static_cast<double>(i64); |
1569 | | useDouble = true; |
1570 | | break; |
1571 | | } |
1572 | | i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0'); |
1573 | | significandDigit++; |
1574 | | } |
1575 | | } |
1576 | | |
1577 | | // Force double for big integer |
1578 | | if (useDouble) { |
1579 | | while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { |
1580 | | d = d * 10 + (s.TakePush() - '0'); |
1581 | | } |
1582 | | } |
1583 | | |
1584 | | // Parse frac = decimal-point 1*DIGIT |
1585 | | int expFrac = 0; |
1586 | | size_t decimalPosition; |
1587 | | if (Consume(s, '.')) { |
1588 | | decimalPosition = s.Length(); |
1589 | | |
1590 | | if (RAPIDJSON_UNLIKELY(!(s.Peek() >= '0' && s.Peek() <= '9'))) |
1591 | | RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissFraction, s.Tell()); |
1592 | | |
1593 | | if (!useDouble) { |
1594 | | #if RAPIDJSON_64BIT |
1595 | | // Use i64 to store significand in 64-bit architecture |
1596 | | if (!use64bit) |
1597 | | i64 = i; |
1598 | | |
1599 | | while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { |
1600 | | if (i64 > RAPIDJSON_UINT64_C2(0x1FFFFF, 0xFFFFFFFF)) // 2^53 - 1 for fast path |
1601 | | break; |
1602 | | else { |
1603 | | i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0'); |
1604 | | --expFrac; |
1605 | | if (i64 != 0) |
1606 | | significandDigit++; |
1607 | | } |
1608 | | } |
1609 | | |
1610 | | d = static_cast<double>(i64); |
1611 | | #else |
1612 | | // Use double to store significand in 32-bit architecture |
1613 | | d = static_cast<double>(use64bit ? i64 : i); |
1614 | | #endif |
1615 | | useDouble = true; |
1616 | | } |
1617 | | |
1618 | | while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { |
1619 | | if (significandDigit < 17) { |
1620 | | d = d * 10.0 + (s.TakePush() - '0'); |
1621 | | --expFrac; |
1622 | | if (RAPIDJSON_LIKELY(d > 0.0)) |
1623 | | significandDigit++; |
1624 | | } |
1625 | | else |
1626 | | s.TakePush(); |
1627 | | } |
1628 | | } |
1629 | | else |
1630 | | decimalPosition = s.Length(); // decimal position at the end of integer. |
1631 | | |
1632 | | // Parse exp = e [ minus / plus ] 1*DIGIT |
1633 | | int exp = 0; |
1634 | | if (Consume(s, 'e') || Consume(s, 'E')) { |
1635 | | if (!useDouble) { |
1636 | | d = static_cast<double>(use64bit ? i64 : i); |
1637 | | useDouble = true; |
1638 | | } |
1639 | | |
1640 | | bool expMinus = false; |
1641 | | if (Consume(s, '+')) |
1642 | | ; |
1643 | | else if (Consume(s, '-')) |
1644 | | expMinus = true; |
1645 | | |
1646 | | if (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { |
1647 | | exp = static_cast<int>(s.Take() - '0'); |
1648 | | if (expMinus) { |
1649 | | // (exp + expFrac) must not underflow int => we're detecting when -exp gets |
1650 | | // dangerously close to INT_MIN (a pessimistic next digit 9 would push it into |
1651 | | // underflow territory): |
1652 | | // |
1653 | | // -(exp * 10 + 9) + expFrac >= INT_MIN |
1654 | | // <=> exp <= (expFrac - INT_MIN - 9) / 10 |
1655 | | RAPIDJSON_ASSERT(expFrac <= 0); |
1656 | | int maxExp = (expFrac + 2147483639) / 10; |
1657 | | |
1658 | | while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { |
1659 | | exp = exp * 10 + static_cast<int>(s.Take() - '0'); |
1660 | | if (RAPIDJSON_UNLIKELY(exp > maxExp)) { |
1661 | | while (RAPIDJSON_UNLIKELY(s.Peek() >= '0' && s.Peek() <= '9')) // Consume the rest of exponent |
1662 | | s.Take(); |
1663 | | } |
1664 | | } |
1665 | | } |
1666 | | else { // positive exp |
1667 | | int maxExp = 308 - expFrac; |
1668 | | while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { |
1669 | | exp = exp * 10 + static_cast<int>(s.Take() - '0'); |
1670 | | if (RAPIDJSON_UNLIKELY(exp > maxExp)) |
1671 | | RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, startOffset); |
1672 | | } |
1673 | | } |
1674 | | } |
1675 | | else |
1676 | | RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissExponent, s.Tell()); |
1677 | | |
1678 | | if (expMinus) |
1679 | | exp = -exp; |
1680 | | } |
1681 | | |
1682 | | // Finish parsing, call event according to the type of number. |
1683 | | bool cont = true; |
1684 | | |
1685 | | if (parseFlags & kParseNumbersAsStringsFlag) { |
1686 | | if (parseFlags & kParseInsituFlag) { |
1687 | | s.Pop(); // Pop stack no matter if it will be used or not. |
1688 | | typename InputStream::Ch* head = is.PutBegin(); |
1689 | | const size_t length = s.Tell() - startOffset; |
1690 | | RAPIDJSON_ASSERT(length <= 0xFFFFFFFF); |
1691 | | // unable to insert the \0 character here, it will erase the comma after this number |
1692 | | const typename TargetEncoding::Ch* const str = reinterpret_cast<typename TargetEncoding::Ch*>(head); |
1693 | | cont = handler.RawNumber(str, SizeType(length), false); |
1694 | | } |
1695 | | else { |
1696 | | SizeType numCharsToCopy = static_cast<SizeType>(s.Length()); |
1697 | | GenericStringStream<UTF8<NumberCharacter> > srcStream(s.Pop()); |
1698 | | StackStream<typename TargetEncoding::Ch> dstStream(stack_); |
1699 | | while (numCharsToCopy--) { |
1700 | | Transcoder<UTF8<typename TargetEncoding::Ch>, TargetEncoding>::Transcode(srcStream, dstStream); |
1701 | | } |
1702 | | dstStream.Put('\0'); |
1703 | | const typename TargetEncoding::Ch* str = dstStream.Pop(); |
1704 | | const SizeType length = static_cast<SizeType>(dstStream.Length()) - 1; |
1705 | | cont = handler.RawNumber(str, SizeType(length), true); |
1706 | | } |
1707 | | } |
1708 | | else { |
1709 | | size_t length = s.Length(); |
1710 | | const NumberCharacter* decimal = s.Pop(); // Pop stack no matter if it will be used or not. |
1711 | | |
1712 | | if (useDouble) { |
1713 | | int p = exp + expFrac; |
1714 | | if (parseFlags & kParseFullPrecisionFlag) |
1715 | | d = internal::StrtodFullPrecision(d, p, decimal, length, decimalPosition, exp); |
1716 | | else |
1717 | | d = internal::StrtodNormalPrecision(d, p); |
1718 | | |
1719 | | // Use > max, instead of == inf, to fix bogus warning -Wfloat-equal |
1720 | | if (d > (std::numeric_limits<double>::max)()) { |
1721 | | // Overflow |
1722 | | // TODO: internal::StrtodX should report overflow (or underflow) |
1723 | | RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, startOffset); |
1724 | | } |
1725 | | |
1726 | | cont = handler.Double(minus ? -d : d); |
1727 | | } |
1728 | | else if (useNanOrInf) { |
1729 | | cont = handler.Double(d); |
1730 | | } |
1731 | | else { |
1732 | | if (use64bit) { |
1733 | | if (minus) |
1734 | | cont = handler.Int64(static_cast<int64_t>(~i64 + 1)); |
1735 | | else |
1736 | | cont = handler.Uint64(i64); |
1737 | | } |
1738 | | else { |
1739 | | if (minus) |
1740 | | cont = handler.Int(static_cast<int32_t>(~i + 1)); |
1741 | | else |
1742 | | cont = handler.Uint(i); |
1743 | | } |
1744 | | } |
1745 | | } |
1746 | | if (RAPIDJSON_UNLIKELY(!cont)) |
1747 | | RAPIDJSON_PARSE_ERROR(kParseErrorTermination, startOffset); |
1748 | | } |
1749 | | |
1750 | | // Parse any JSON value |
1751 | | template<unsigned parseFlags, typename InputStream, typename Handler> |
1752 | | void ParseValue(InputStream& is, Handler& handler) { |
1753 | | switch (is.Peek()) { |
1754 | | case 'n': ParseNull <parseFlags>(is, handler); break; |
1755 | | case 't': ParseTrue <parseFlags>(is, handler); break; |
1756 | | case 'f': ParseFalse <parseFlags>(is, handler); break; |
1757 | | case '"': ParseString<parseFlags>(is, handler); break; |
1758 | | case '{': ParseObject<parseFlags>(is, handler); break; |
1759 | | case '[': ParseArray <parseFlags>(is, handler); break; |
1760 | | default : |
1761 | | ParseNumber<parseFlags>(is, handler); |
1762 | | break; |
1763 | | |
1764 | | } |
1765 | | } |
1766 | | |
1767 | | // Iterative Parsing |
1768 | | |
1769 | | // States |
1770 | | enum IterativeParsingState { |
1771 | | IterativeParsingFinishState = 0, // sink states at top |
1772 | | IterativeParsingErrorState, // sink states at top |
1773 | | IterativeParsingStartState, |
1774 | | |
1775 | | // Object states |
1776 | | IterativeParsingObjectInitialState, |
1777 | | IterativeParsingMemberKeyState, |
1778 | | IterativeParsingMemberValueState, |
1779 | | IterativeParsingObjectFinishState, |
1780 | | |
1781 | | // Array states |
1782 | | IterativeParsingArrayInitialState, |
1783 | | IterativeParsingElementState, |
1784 | | IterativeParsingArrayFinishState, |
1785 | | |
1786 | | // Single value state |
1787 | | IterativeParsingValueState, |
1788 | | |
1789 | | // Delimiter states (at bottom) |
1790 | | IterativeParsingElementDelimiterState, |
1791 | | IterativeParsingMemberDelimiterState, |
1792 | | IterativeParsingKeyValueDelimiterState, |
1793 | | |
1794 | | cIterativeParsingStateCount |
1795 | | }; |
1796 | | |
1797 | | // Tokens |
1798 | | enum Token { |
1799 | | LeftBracketToken = 0, |
1800 | | RightBracketToken, |
1801 | | |
1802 | | LeftCurlyBracketToken, |
1803 | | RightCurlyBracketToken, |
1804 | | |
1805 | | CommaToken, |
1806 | | ColonToken, |
1807 | | |
1808 | | StringToken, |
1809 | | FalseToken, |
1810 | | TrueToken, |
1811 | | NullToken, |
1812 | | NumberToken, |
1813 | | |
1814 | | kTokenCount |
1815 | | }; |
1816 | | |
1817 | | RAPIDJSON_FORCEINLINE Token Tokenize(Ch c) const { |
1818 | | |
1819 | | //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN |
1820 | | #define N NumberToken |
1821 | | #define N16 N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N |
1822 | | // Maps from ASCII to Token |
1823 | | static const unsigned char tokenMap[256] = { |
1824 | | N16, // 00~0F |
1825 | | N16, // 10~1F |
1826 | | N, N, StringToken, N, N, N, N, N, N, N, N, N, CommaToken, N, N, N, // 20~2F |
1827 | | N, N, N, N, N, N, N, N, N, N, ColonToken, N, N, N, N, N, // 30~3F |
1828 | | N16, // 40~4F |
1829 | | N, N, N, N, N, N, N, N, N, N, N, LeftBracketToken, N, RightBracketToken, N, N, // 50~5F |
1830 | | N, N, N, N, N, N, FalseToken, N, N, N, N, N, N, N, NullToken, N, // 60~6F |
1831 | | N, N, N, N, TrueToken, N, N, N, N, N, N, LeftCurlyBracketToken, N, RightCurlyBracketToken, N, N, // 70~7F |
1832 | | N16, N16, N16, N16, N16, N16, N16, N16 // 80~FF |
1833 | | }; |
1834 | | #undef N |
1835 | | #undef N16 |
1836 | | //!@endcond |
1837 | | |
1838 | | if (sizeof(Ch) == 1 || static_cast<unsigned>(c) < 256) |
1839 | | return static_cast<Token>(tokenMap[static_cast<unsigned char>(c)]); |
1840 | | else |
1841 | | return NumberToken; |
1842 | | } |
1843 | | |
1844 | | RAPIDJSON_FORCEINLINE IterativeParsingState Predict(IterativeParsingState state, Token token) const { |
1845 | | // current state x one lookahead token -> new state |
1846 | | static const char G[cIterativeParsingStateCount][kTokenCount] = { |
1847 | | // Finish(sink state) |
1848 | | { |
1849 | | IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, |
1850 | | IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, |
1851 | | IterativeParsingErrorState |
1852 | | }, |
1853 | | // Error(sink state) |
1854 | | { |
1855 | | IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, |
1856 | | IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, |
1857 | | IterativeParsingErrorState |
1858 | | }, |
1859 | | // Start |
1860 | | { |
1861 | | IterativeParsingArrayInitialState, // Left bracket |
1862 | | IterativeParsingErrorState, // Right bracket |
1863 | | IterativeParsingObjectInitialState, // Left curly bracket |
1864 | | IterativeParsingErrorState, // Right curly bracket |
1865 | | IterativeParsingErrorState, // Comma |
1866 | | IterativeParsingErrorState, // Colon |
1867 | | IterativeParsingValueState, // String |
1868 | | IterativeParsingValueState, // False |
1869 | | IterativeParsingValueState, // True |
1870 | | IterativeParsingValueState, // Null |
1871 | | IterativeParsingValueState // Number |
1872 | | }, |
1873 | | // ObjectInitial |
1874 | | { |
1875 | | IterativeParsingErrorState, // Left bracket |
1876 | | IterativeParsingErrorState, // Right bracket |
1877 | | IterativeParsingErrorState, // Left curly bracket |
1878 | | IterativeParsingObjectFinishState, // Right curly bracket |
1879 | | IterativeParsingErrorState, // Comma |
1880 | | IterativeParsingErrorState, // Colon |
1881 | | IterativeParsingMemberKeyState, // String |
1882 | | IterativeParsingErrorState, // False |
1883 | | IterativeParsingErrorState, // True |
1884 | | IterativeParsingErrorState, // Null |
1885 | | IterativeParsingErrorState // Number |
1886 | | }, |
1887 | | // MemberKey |
1888 | | { |
1889 | | IterativeParsingErrorState, // Left bracket |
1890 | | IterativeParsingErrorState, // Right bracket |
1891 | | IterativeParsingErrorState, // Left curly bracket |
1892 | | IterativeParsingErrorState, // Right curly bracket |
1893 | | IterativeParsingErrorState, // Comma |
1894 | | IterativeParsingKeyValueDelimiterState, // Colon |
1895 | | IterativeParsingErrorState, // String |
1896 | | IterativeParsingErrorState, // False |
1897 | | IterativeParsingErrorState, // True |
1898 | | IterativeParsingErrorState, // Null |
1899 | | IterativeParsingErrorState // Number |
1900 | | }, |
1901 | | // MemberValue |
1902 | | { |
1903 | | IterativeParsingErrorState, // Left bracket |
1904 | | IterativeParsingErrorState, // Right bracket |
1905 | | IterativeParsingErrorState, // Left curly bracket |
1906 | | IterativeParsingObjectFinishState, // Right curly bracket |
1907 | | IterativeParsingMemberDelimiterState, // Comma |
1908 | | IterativeParsingErrorState, // Colon |
1909 | | IterativeParsingErrorState, // String |
1910 | | IterativeParsingErrorState, // False |
1911 | | IterativeParsingErrorState, // True |
1912 | | IterativeParsingErrorState, // Null |
1913 | | IterativeParsingErrorState // Number |
1914 | | }, |
1915 | | // ObjectFinish(sink state) |
1916 | | { |
1917 | | IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, |
1918 | | IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, |
1919 | | IterativeParsingErrorState |
1920 | | }, |
1921 | | // ArrayInitial |
1922 | | { |
1923 | | IterativeParsingArrayInitialState, // Left bracket(push Element state) |
1924 | | IterativeParsingArrayFinishState, // Right bracket |
1925 | | IterativeParsingObjectInitialState, // Left curly bracket(push Element state) |
1926 | | IterativeParsingErrorState, // Right curly bracket |
1927 | | IterativeParsingErrorState, // Comma |
1928 | | IterativeParsingErrorState, // Colon |
1929 | | IterativeParsingElementState, // String |
1930 | | IterativeParsingElementState, // False |
1931 | | IterativeParsingElementState, // True |
1932 | | IterativeParsingElementState, // Null |
1933 | | IterativeParsingElementState // Number |
1934 | | }, |
1935 | | // Element |
1936 | | { |
1937 | | IterativeParsingErrorState, // Left bracket |
1938 | | IterativeParsingArrayFinishState, // Right bracket |
1939 | | IterativeParsingErrorState, // Left curly bracket |
1940 | | IterativeParsingErrorState, // Right curly bracket |
1941 | | IterativeParsingElementDelimiterState, // Comma |
1942 | | IterativeParsingErrorState, // Colon |
1943 | | IterativeParsingErrorState, // String |
1944 | | IterativeParsingErrorState, // False |
1945 | | IterativeParsingErrorState, // True |
1946 | | IterativeParsingErrorState, // Null |
1947 | | IterativeParsingErrorState // Number |
1948 | | }, |
1949 | | // ArrayFinish(sink state) |
1950 | | { |
1951 | | IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, |
1952 | | IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, |
1953 | | IterativeParsingErrorState |
1954 | | }, |
1955 | | // Single Value (sink state) |
1956 | | { |
1957 | | IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, |
1958 | | IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, |
1959 | | IterativeParsingErrorState |
1960 | | }, |
1961 | | // ElementDelimiter |
1962 | | { |
1963 | | IterativeParsingArrayInitialState, // Left bracket(push Element state) |
1964 | | IterativeParsingArrayFinishState, // Right bracket |
1965 | | IterativeParsingObjectInitialState, // Left curly bracket(push Element state) |
1966 | | IterativeParsingErrorState, // Right curly bracket |
1967 | | IterativeParsingErrorState, // Comma |
1968 | | IterativeParsingErrorState, // Colon |
1969 | | IterativeParsingElementState, // String |
1970 | | IterativeParsingElementState, // False |
1971 | | IterativeParsingElementState, // True |
1972 | | IterativeParsingElementState, // Null |
1973 | | IterativeParsingElementState // Number |
1974 | | }, |
1975 | | // MemberDelimiter |
1976 | | { |
1977 | | IterativeParsingErrorState, // Left bracket |
1978 | | IterativeParsingErrorState, // Right bracket |
1979 | | IterativeParsingErrorState, // Left curly bracket |
1980 | | IterativeParsingObjectFinishState, // Right curly bracket |
1981 | | IterativeParsingErrorState, // Comma |
1982 | | IterativeParsingErrorState, // Colon |
1983 | | IterativeParsingMemberKeyState, // String |
1984 | | IterativeParsingErrorState, // False |
1985 | | IterativeParsingErrorState, // True |
1986 | | IterativeParsingErrorState, // Null |
1987 | | IterativeParsingErrorState // Number |
1988 | | }, |
1989 | | // KeyValueDelimiter |
1990 | | { |
1991 | | IterativeParsingArrayInitialState, // Left bracket(push MemberValue state) |
1992 | | IterativeParsingErrorState, // Right bracket |
1993 | | IterativeParsingObjectInitialState, // Left curly bracket(push MemberValue state) |
1994 | | IterativeParsingErrorState, // Right curly bracket |
1995 | | IterativeParsingErrorState, // Comma |
1996 | | IterativeParsingErrorState, // Colon |
1997 | | IterativeParsingMemberValueState, // String |
1998 | | IterativeParsingMemberValueState, // False |
1999 | | IterativeParsingMemberValueState, // True |
2000 | | IterativeParsingMemberValueState, // Null |
2001 | | IterativeParsingMemberValueState // Number |
2002 | | }, |
2003 | | }; // End of G |
2004 | | |
2005 | | return static_cast<IterativeParsingState>(G[state][token]); |
2006 | | } |
2007 | | |
2008 | | // Make an advance in the token stream and state based on the candidate destination state which was returned by Transit(). |
2009 | | // May return a new state on state pop. |
2010 | | template <unsigned parseFlags, typename InputStream, typename Handler> |
2011 | | RAPIDJSON_FORCEINLINE IterativeParsingState Transit(IterativeParsingState src, Token token, IterativeParsingState dst, InputStream& is, Handler& handler) { |
2012 | | (void)token; |
2013 | | |
2014 | | switch (dst) { |
2015 | | case IterativeParsingErrorState: |
2016 | | return dst; |
2017 | | |
2018 | | case IterativeParsingObjectInitialState: |
2019 | | case IterativeParsingArrayInitialState: |
2020 | | { |
2021 | | // Push the state(Element or MemeberValue) if we are nested in another array or value of member. |
2022 | | // In this way we can get the correct state on ObjectFinish or ArrayFinish by frame pop. |
2023 | | IterativeParsingState n = src; |
2024 | | if (src == IterativeParsingArrayInitialState || src == IterativeParsingElementDelimiterState) |
2025 | | n = IterativeParsingElementState; |
2026 | | else if (src == IterativeParsingKeyValueDelimiterState) |
2027 | | n = IterativeParsingMemberValueState; |
2028 | | // Push current state. |
2029 | | *stack_.template Push<SizeType>(1) = n; |
2030 | | // Initialize and push the member/element count. |
2031 | | *stack_.template Push<SizeType>(1) = 0; |
2032 | | // Call handler |
2033 | | bool hr = (dst == IterativeParsingObjectInitialState) ? handler.StartObject() : handler.StartArray(); |
2034 | | // On handler short circuits the parsing. |
2035 | | if (!hr) { |
2036 | | RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell()); |
2037 | | return IterativeParsingErrorState; |
2038 | | } |
2039 | | else { |
2040 | | is.Take(); |
2041 | | return dst; |
2042 | | } |
2043 | | } |
2044 | | |
2045 | | case IterativeParsingMemberKeyState: |
2046 | | ParseString<parseFlags>(is, handler, true); |
2047 | | if (HasParseError()) |
2048 | | return IterativeParsingErrorState; |
2049 | | else |
2050 | | return dst; |
2051 | | |
2052 | | case IterativeParsingKeyValueDelimiterState: |
2053 | | RAPIDJSON_ASSERT(token == ColonToken); |
2054 | | is.Take(); |
2055 | | return dst; |
2056 | | |
2057 | | case IterativeParsingMemberValueState: |
2058 | | // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state. |
2059 | | ParseValue<parseFlags>(is, handler); |
2060 | | if (HasParseError()) { |
2061 | | return IterativeParsingErrorState; |
2062 | | } |
2063 | | return dst; |
2064 | | |
2065 | | case IterativeParsingElementState: |
2066 | | // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state. |
2067 | | ParseValue<parseFlags>(is, handler); |
2068 | | if (HasParseError()) { |
2069 | | return IterativeParsingErrorState; |
2070 | | } |
2071 | | return dst; |
2072 | | |
2073 | | case IterativeParsingMemberDelimiterState: |
2074 | | case IterativeParsingElementDelimiterState: |
2075 | | is.Take(); |
2076 | | // Update member/element count. |
2077 | | *stack_.template Top<SizeType>() = *stack_.template Top<SizeType>() + 1; |
2078 | | return dst; |
2079 | | |
2080 | | case IterativeParsingObjectFinishState: |
2081 | | { |
2082 | | // Transit from delimiter is only allowed when trailing commas are enabled |
2083 | | if (!(parseFlags & kParseTrailingCommasFlag) && src == IterativeParsingMemberDelimiterState) { |
2084 | | RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorObjectMissName, is.Tell()); |
2085 | | return IterativeParsingErrorState; |
2086 | | } |
2087 | | // Get member count. |
2088 | | SizeType c = *stack_.template Pop<SizeType>(1); |
2089 | | // If the object is not empty, count the last member. |
2090 | | if (src == IterativeParsingMemberValueState) |
2091 | | ++c; |
2092 | | // Restore the state. |
2093 | | IterativeParsingState n = static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1)); |
2094 | | // Transit to Finish state if this is the topmost scope. |
2095 | | if (n == IterativeParsingStartState) |
2096 | | n = IterativeParsingFinishState; |
2097 | | // Call handler |
2098 | | bool hr = handler.EndObject(c); |
2099 | | // On handler short circuits the parsing. |
2100 | | if (!hr) { |
2101 | | RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell()); |
2102 | | return IterativeParsingErrorState; |
2103 | | } |
2104 | | else { |
2105 | | is.Take(); |
2106 | | return n; |
2107 | | } |
2108 | | } |
2109 | | |
2110 | | case IterativeParsingArrayFinishState: |
2111 | | { |
2112 | | // Transit from delimiter is only allowed when trailing commas are enabled |
2113 | | if (!(parseFlags & kParseTrailingCommasFlag) && src == IterativeParsingElementDelimiterState) { |
2114 | | RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorValueInvalid, is.Tell()); |
2115 | | return IterativeParsingErrorState; |
2116 | | } |
2117 | | // Get element count. |
2118 | | SizeType c = *stack_.template Pop<SizeType>(1); |
2119 | | // If the array is not empty, count the last element. |
2120 | | if (src == IterativeParsingElementState) |
2121 | | ++c; |
2122 | | // Restore the state. |
2123 | | IterativeParsingState n = static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1)); |
2124 | | // Transit to Finish state if this is the topmost scope. |
2125 | | if (n == IterativeParsingStartState) |
2126 | | n = IterativeParsingFinishState; |
2127 | | // Call handler |
2128 | | bool hr = handler.EndArray(c); |
2129 | | // On handler short circuits the parsing. |
2130 | | if (!hr) { |
2131 | | RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell()); |
2132 | | return IterativeParsingErrorState; |
2133 | | } |
2134 | | else { |
2135 | | is.Take(); |
2136 | | return n; |
2137 | | } |
2138 | | } |
2139 | | |
2140 | | default: |
2141 | | // This branch is for IterativeParsingValueState actually. |
2142 | | // Use `default:` rather than |
2143 | | // `case IterativeParsingValueState:` is for code coverage. |
2144 | | |
2145 | | // The IterativeParsingStartState is not enumerated in this switch-case. |
2146 | | // It is impossible for that case. And it can be caught by following assertion. |
2147 | | |
2148 | | // The IterativeParsingFinishState is not enumerated in this switch-case either. |
2149 | | // It is a "derivative" state which cannot triggered from Predict() directly. |
2150 | | // Therefore it cannot happen here. And it can be caught by following assertion. |
2151 | | RAPIDJSON_ASSERT(dst == IterativeParsingValueState); |
2152 | | |
2153 | | // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state. |
2154 | | ParseValue<parseFlags>(is, handler); |
2155 | | if (HasParseError()) { |
2156 | | return IterativeParsingErrorState; |
2157 | | } |
2158 | | return IterativeParsingFinishState; |
2159 | | } |
2160 | | } |
2161 | | |
2162 | | template <typename InputStream> |
2163 | | void HandleError(IterativeParsingState src, InputStream& is) { |
2164 | | if (HasParseError()) { |
2165 | | // Error flag has been set. |
2166 | | return; |
2167 | | } |
2168 | | |
2169 | | switch (src) { |
2170 | | case IterativeParsingStartState: RAPIDJSON_PARSE_ERROR(kParseErrorDocumentEmpty, is.Tell()); return; |
2171 | | case IterativeParsingFinishState: RAPIDJSON_PARSE_ERROR(kParseErrorDocumentRootNotSingular, is.Tell()); return; |
2172 | | case IterativeParsingObjectInitialState: |
2173 | | case IterativeParsingMemberDelimiterState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell()); return; |
2174 | | case IterativeParsingMemberKeyState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell()); return; |
2175 | | case IterativeParsingMemberValueState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); return; |
2176 | | case IterativeParsingKeyValueDelimiterState: |
2177 | | case IterativeParsingArrayInitialState: |
2178 | | case IterativeParsingElementDelimiterState: RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell()); return; |
2179 | | default: RAPIDJSON_ASSERT(src == IterativeParsingElementState); RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell()); return; |
2180 | | } |
2181 | | } |
2182 | | |
2183 | | RAPIDJSON_FORCEINLINE bool IsIterativeParsingDelimiterState(IterativeParsingState s) const { |
2184 | | return s >= IterativeParsingElementDelimiterState; |
2185 | | } |
2186 | | |
2187 | | RAPIDJSON_FORCEINLINE bool IsIterativeParsingCompleteState(IterativeParsingState s) const { |
2188 | | return s <= IterativeParsingErrorState; |
2189 | | } |
2190 | | |
2191 | | template <unsigned parseFlags, typename InputStream, typename Handler> |
2192 | | ParseResult IterativeParse(InputStream& is, Handler& handler) { |
2193 | | parseResult_.Clear(); |
2194 | | ClearStackOnExit scope(*this); |
2195 | | IterativeParsingState state = IterativeParsingStartState; |
2196 | | |
2197 | | SkipWhitespaceAndComments<parseFlags>(is); |
2198 | | RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); |
2199 | | while (is.Peek() != '\0') { |
2200 | | Token t = Tokenize(is.Peek()); |
2201 | | IterativeParsingState n = Predict(state, t); |
2202 | | IterativeParsingState d = Transit<parseFlags>(state, t, n, is, handler); |
2203 | | |
2204 | | if (d == IterativeParsingErrorState) { |
2205 | | HandleError(state, is); |
2206 | | break; |
2207 | | } |
2208 | | |
2209 | | state = d; |
2210 | | |
2211 | | // Do not further consume streams if a root JSON has been parsed. |
2212 | | if ((parseFlags & kParseStopWhenDoneFlag) && state == IterativeParsingFinishState) |
2213 | | break; |
2214 | | |
2215 | | SkipWhitespaceAndComments<parseFlags>(is); |
2216 | | RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); |
2217 | | } |
2218 | | |
2219 | | // Handle the end of file. |
2220 | | if (state != IterativeParsingFinishState) |
2221 | | HandleError(state, is); |
2222 | | |
2223 | | return parseResult_; |
2224 | | } |
2225 | | |
2226 | | static const size_t kDefaultStackCapacity = 256; //!< Default stack capacity in bytes for storing a single decoded string. |
2227 | | internal::Stack<StackAllocator> stack_; //!< A stack for storing decoded string temporarily during non-destructive parsing. |
2228 | | ParseResult parseResult_; |
2229 | | IterativeParsingState state_; |
2230 | | }; // class GenericReader |
2231 | | |
2232 | | //! Reader with UTF8 encoding and default allocator. |
2233 | | typedef GenericReader<UTF8<>, UTF8<> > Reader; |
2234 | | |
2235 | | RAPIDJSON_NAMESPACE_END |
2236 | | |
2237 | | #if defined(__clang__) || defined(_MSC_VER) |
2238 | | RAPIDJSON_DIAG_POP |
2239 | | #endif |
2240 | | |
2241 | | |
2242 | | #ifdef __GNUC__ |
2243 | | RAPIDJSON_DIAG_POP |
2244 | | #endif |
2245 | | |
2246 | | #endif // RAPIDJSON_READER_H_ |