/src/brpc/src/butil/third_party/rapidjson/encodings.h
Line | Count | Source |
1 | | // Tencent is pleased to support the open source community by making RapidJSON available. |
2 | | // |
3 | | // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. |
4 | | // |
5 | | // Licensed under the MIT License (the "License"); you may not use this file except |
6 | | // in compliance with the License. You may obtain a copy of the License at |
7 | | // |
8 | | // http://opensource.org/licenses/MIT |
9 | | // |
10 | | // Unless required by applicable law or agreed to in writing, software distributed |
11 | | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR |
12 | | // CONDITIONS OF ANY KIND, either express or implied. See the License for the |
13 | | // specific language governing permissions and limitations under the License. |
14 | | |
15 | | #ifndef RAPIDJSON_ENCODINGS_H_ |
16 | | #define RAPIDJSON_ENCODINGS_H_ |
17 | | |
18 | | #include "rapidjson.h" |
19 | | |
20 | | #ifdef _MSC_VER |
21 | | RAPIDJSON_DIAG_PUSH |
22 | | RAPIDJSON_DIAG_OFF(4244) // conversion from 'type1' to 'type2', possible loss of data |
23 | | RAPIDJSON_DIAG_OFF(4702) // unreachable code |
24 | | #elif defined(__GNUC__) |
25 | | RAPIDJSON_DIAG_PUSH |
26 | | RAPIDJSON_DIAG_OFF(effc++) |
27 | | RAPIDJSON_DIAG_OFF(overflow) |
28 | | #endif |
29 | | |
30 | | BUTIL_RAPIDJSON_NAMESPACE_BEGIN |
31 | | |
32 | | /////////////////////////////////////////////////////////////////////////////// |
33 | | // Encoding |
34 | | |
35 | | /*! \class rapidjson::Encoding |
36 | | \brief Concept for encoding of Unicode characters. |
37 | | |
38 | | \code |
39 | | concept Encoding { |
40 | | typename Ch; //! Type of character. A "character" is actually a code unit in unicode's definition. |
41 | | |
42 | | enum { supportUnicode = 1 }; // or 0 if not supporting unicode |
43 | | |
44 | | //! \brief Encode a Unicode codepoint to an output stream. |
45 | | //! \param os Output stream. |
46 | | //! \param codepoint An unicode codepoint, ranging from 0x0 to 0x10FFFF inclusively. |
47 | | template<typename OutputStream> |
48 | | static void Encode(OutputStream& os, unsigned codepoint); |
49 | | |
50 | | //! \brief Decode a Unicode codepoint from an input stream. |
51 | | //! \param is Input stream. |
52 | | //! \param codepoint Output of the unicode codepoint. |
53 | | //! \return true if a valid codepoint can be decoded from the stream. |
54 | | template <typename InputStream> |
55 | | static bool Decode(InputStream& is, unsigned* codepoint); |
56 | | |
57 | | //! \brief Validate one Unicode codepoint from an encoded stream. |
58 | | //! \param is Input stream to obtain codepoint. |
59 | | //! \param os Output for copying one codepoint. |
60 | | //! \return true if it is valid. |
61 | | //! \note This function just validating and copying the codepoint without actually decode it. |
62 | | template <typename InputStream, typename OutputStream> |
63 | | static bool Validate(InputStream& is, OutputStream& os); |
64 | | |
65 | | // The following functions are deal with byte streams. |
66 | | |
67 | | //! Take a character from input byte stream, skip BOM if exist. |
68 | | template <typename InputByteStream> |
69 | | static CharType TakeBOM(InputByteStream& is); |
70 | | |
71 | | //! Take a character from input byte stream. |
72 | | template <typename InputByteStream> |
73 | | static Ch Take(InputByteStream& is); |
74 | | |
75 | | //! Put BOM to output byte stream. |
76 | | template <typename OutputByteStream> |
77 | | static void PutBOM(OutputByteStream& os); |
78 | | |
79 | | //! Put a character to output byte stream. |
80 | | template <typename OutputByteStream> |
81 | | static void Put(OutputByteStream& os, Ch c); |
82 | | }; |
83 | | \endcode |
84 | | */ |
85 | | |
86 | | /////////////////////////////////////////////////////////////////////////////// |
87 | | // UTF8 |
88 | | |
89 | | //! UTF-8 encoding. |
90 | | /*! http://en.wikipedia.org/wiki/UTF-8 |
91 | | http://tools.ietf.org/html/rfc3629 |
92 | | \tparam CharType Code unit for storing 8-bit UTF-8 data. Default is char. |
93 | | \note implements Encoding concept |
94 | | */ |
95 | | template<typename CharType = char> |
96 | | struct UTF8 { |
97 | | typedef CharType Ch; |
98 | | |
99 | | enum { supportUnicode = 1 }; |
100 | | |
101 | | template<typename OutputStream> |
102 | 0 | static void Encode(OutputStream& os, unsigned codepoint) { |
103 | 0 | if (codepoint <= 0x7F) |
104 | 0 | os.Put(static_cast<Ch>(codepoint & 0xFF)); |
105 | 0 | else if (codepoint <= 0x7FF) { |
106 | 0 | os.Put(static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF))); |
107 | 0 | os.Put(static_cast<Ch>(0x80 | ((codepoint & 0x3F)))); |
108 | 0 | } |
109 | 0 | else if (codepoint <= 0xFFFF) { |
110 | 0 | os.Put(static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF))); |
111 | 0 | os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F))); |
112 | 0 | os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F))); |
113 | 0 | } |
114 | 0 | else { |
115 | 0 | RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); |
116 | 0 | os.Put(static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF))); |
117 | 0 | os.Put(static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F))); |
118 | 0 | os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F))); |
119 | 0 | os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F))); |
120 | 0 | } |
121 | 0 | } Unexecuted instantiation: void butil::rapidjson::UTF8<char>::Encode<butil::rapidjson::GenericReader<butil::rapidjson::UTF8<char>, butil::rapidjson::UTF8<char>, butil::rapidjson::CrtAllocator>::StackStream<char> >(butil::rapidjson::GenericReader<butil::rapidjson::UTF8<char>, butil::rapidjson::UTF8<char>, butil::rapidjson::CrtAllocator>::StackStream<char>&, unsigned int) Unexecuted instantiation: void butil::rapidjson::UTF8<char>::Encode<butil::rapidjson::GenericStringStream<butil::rapidjson::UTF8<char> > >(butil::rapidjson::GenericStringStream<butil::rapidjson::UTF8<char> >&, unsigned int) Unexecuted instantiation: void butil::rapidjson::UTF8<char>::Encode<json2pb::ZeroCopyStreamReader>(json2pb::ZeroCopyStreamReader&, unsigned int) |
122 | | |
123 | | template <typename InputStream> |
124 | 0 | static bool Decode(InputStream& is, unsigned* codepoint) { |
125 | 0 | #define COPY() c = is.Take(); *codepoint = (*codepoint << 6) | ((unsigned char)c & 0x3Fu) |
126 | 0 | #define TRANS(mask) result &= ((GetRange((unsigned char)c) & mask) != 0) |
127 | 0 | #define TAIL() COPY(); TRANS(0x70) |
128 | 0 | Ch c = is.Take(); |
129 | 0 | if (!(c & 0x80)) { |
130 | 0 | *codepoint = (unsigned char)c; |
131 | 0 | return true; |
132 | 0 | } |
133 | 0 |
|
134 | 0 | unsigned char type = GetRange((unsigned char)c); |
135 | 0 | *codepoint = (0xFF >> type) & (unsigned char)c; |
136 | 0 | bool result = true; |
137 | 0 | switch (type) { |
138 | 0 | case 2: TAIL(); return result; |
139 | 0 | case 3: TAIL(); TAIL(); return result; |
140 | 0 | case 4: COPY(); TRANS(0x50); TAIL(); return result; |
141 | 0 | case 5: COPY(); TRANS(0x10); TAIL(); TAIL(); return result; |
142 | 0 | case 6: TAIL(); TAIL(); TAIL(); return result; |
143 | 0 | case 10: COPY(); TRANS(0x20); TAIL(); return result; |
144 | 0 | case 11: COPY(); TRANS(0x60); TAIL(); TAIL(); return result; |
145 | 0 | default: return false; |
146 | 0 | } |
147 | 0 | #undef COPY |
148 | 0 | #undef TRANS |
149 | 0 | #undef TAIL |
150 | 0 | } |
151 | | |
152 | | template <typename InputStream, typename OutputStream> |
153 | 0 | static bool Validate(InputStream& is, OutputStream& os) { |
154 | 0 | #define COPY() os.Put(c = is.Take()) |
155 | 0 | #define TRANS(mask) result &= ((GetRange((unsigned char)c) & mask) != 0) |
156 | 0 | #define TAIL() COPY(); TRANS(0x70) |
157 | 0 | Ch c; |
158 | 0 | COPY(); |
159 | 0 | if (!(c & 0x80)) |
160 | 0 | return true; |
161 | | |
162 | 0 | bool result = true; |
163 | 0 | switch (GetRange((unsigned char)c)) { |
164 | 0 | case 2: TAIL(); return result; |
165 | 0 | case 3: TAIL(); TAIL(); return result; |
166 | 0 | case 4: COPY(); TRANS(0x50); TAIL(); return result; |
167 | 0 | case 5: COPY(); TRANS(0x10); TAIL(); TAIL(); return result; |
168 | 0 | case 6: TAIL(); TAIL(); TAIL(); return result; |
169 | 0 | case 10: COPY(); TRANS(0x20); TAIL(); return result; |
170 | 0 | case 11: COPY(); TRANS(0x60); TAIL(); TAIL(); return result; |
171 | 0 | default: return false; |
172 | 0 | } |
173 | 0 | #undef COPY |
174 | 0 | #undef TRANS |
175 | 0 | #undef TAIL |
176 | 0 | } Unexecuted instantiation: bool butil::rapidjson::UTF8<char>::Validate<butil::rapidjson::GenericStringStream<butil::rapidjson::UTF8<char> >, butil::rapidjson::GenericReader<butil::rapidjson::UTF8<char>, butil::rapidjson::UTF8<char>, butil::rapidjson::CrtAllocator>::StackStream<char> >(butil::rapidjson::GenericStringStream<butil::rapidjson::UTF8<char> >&, butil::rapidjson::GenericReader<butil::rapidjson::UTF8<char>, butil::rapidjson::UTF8<char>, butil::rapidjson::CrtAllocator>::StackStream<char>&) Unexecuted instantiation: bool butil::rapidjson::UTF8<char>::Validate<json2pb::ZeroCopyStreamReader, butil::rapidjson::GenericReader<butil::rapidjson::UTF8<char>, butil::rapidjson::UTF8<char>, butil::rapidjson::CrtAllocator>::StackStream<char> >(json2pb::ZeroCopyStreamReader&, butil::rapidjson::GenericReader<butil::rapidjson::UTF8<char>, butil::rapidjson::UTF8<char>, butil::rapidjson::CrtAllocator>::StackStream<char>&) Unexecuted instantiation: bool butil::rapidjson::UTF8<char>::Validate<butil::rapidjson::GenericStringStream<butil::rapidjson::UTF8<char> >, butil::rapidjson::GenericStringStream<butil::rapidjson::UTF8<char> > >(butil::rapidjson::GenericStringStream<butil::rapidjson::UTF8<char> >&, butil::rapidjson::GenericStringStream<butil::rapidjson::UTF8<char> >&) Unexecuted instantiation: bool butil::rapidjson::UTF8<char>::Validate<json2pb::ZeroCopyStreamReader, json2pb::ZeroCopyStreamReader>(json2pb::ZeroCopyStreamReader&, json2pb::ZeroCopyStreamReader&) |
177 | | |
178 | 0 | static unsigned char GetRange(unsigned char c) { |
179 | | // Referring to DFA of http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ |
180 | | // With new mapping 1 -> 0x10, 7 -> 0x20, 9 -> 0x40, such that AND operation can test multiple types. |
181 | 0 | static const unsigned char type[] = { |
182 | 0 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
183 | 0 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
184 | 0 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
185 | 0 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
186 | 0 | 0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10, |
187 | 0 | 0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, |
188 | 0 | 0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20, |
189 | 0 | 0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20, |
190 | 0 | 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, |
191 | 0 | 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, |
192 | 0 | }; |
193 | 0 | return type[c]; |
194 | 0 | } |
195 | | |
196 | | template <typename InputByteStream> |
197 | | static CharType TakeBOM(InputByteStream& is) { |
198 | | RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); |
199 | | Ch c = Take(is); |
200 | | if ((unsigned char)c != 0xEFu) return c; |
201 | | c = is.Take(); |
202 | | if ((unsigned char)c != 0xBBu) return c; |
203 | | c = is.Take(); |
204 | | if ((unsigned char)c != 0xBFu) return c; |
205 | | c = is.Take(); |
206 | | return c; |
207 | | } |
208 | | |
209 | | template <typename InputByteStream> |
210 | | static Ch Take(InputByteStream& is) { |
211 | | RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); |
212 | | return is.Take(); |
213 | | } |
214 | | |
215 | | template <typename OutputByteStream> |
216 | | static void PutBOM(OutputByteStream& os) { |
217 | | RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); |
218 | | os.Put(0xEFu); os.Put(0xBBu); os.Put(0xBFu); |
219 | | } |
220 | | |
221 | | template <typename OutputByteStream> |
222 | | static void Put(OutputByteStream& os, Ch c) { |
223 | | RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); |
224 | | os.Put(static_cast<typename OutputByteStream::Ch>(c)); |
225 | | } |
226 | | }; |
227 | | |
228 | | /////////////////////////////////////////////////////////////////////////////// |
229 | | // UTF16 |
230 | | |
231 | | //! UTF-16 encoding. |
232 | | /*! http://en.wikipedia.org/wiki/UTF-16 |
233 | | http://tools.ietf.org/html/rfc2781 |
234 | | \tparam CharType Type for storing 16-bit UTF-16 data. Default is wchar_t. C++11 may use char16_t instead. |
235 | | \note implements Encoding concept |
236 | | |
237 | | \note For in-memory access, no need to concern endianness. The code units and code points are represented by CPU's endianness. |
238 | | For streaming, use UTF16LE and UTF16BE, which handle endianness. |
239 | | */ |
240 | | template<typename CharType = wchar_t> |
241 | | struct UTF16 { |
242 | | typedef CharType Ch; |
243 | | RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 2); |
244 | | |
245 | | enum { supportUnicode = 1 }; |
246 | | |
247 | | template<typename OutputStream> |
248 | | static void Encode(OutputStream& os, unsigned codepoint) { |
249 | | RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2); |
250 | | if (codepoint <= 0xFFFF) { |
251 | | RAPIDJSON_ASSERT(codepoint < 0xD800 || codepoint > 0xDFFF); // Code point itself cannot be surrogate pair |
252 | | os.Put(static_cast<typename OutputStream::Ch>(codepoint)); |
253 | | } |
254 | | else { |
255 | | RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); |
256 | | unsigned v = codepoint - 0x10000; |
257 | | os.Put(static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800)); |
258 | | os.Put((v & 0x3FF) | 0xDC00); |
259 | | } |
260 | | } |
261 | | |
262 | | template <typename InputStream> |
263 | | static bool Decode(InputStream& is, unsigned* codepoint) { |
264 | | RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 2); |
265 | | Ch c = is.Take(); |
266 | | if (c < 0xD800 || c > 0xDFFF) { |
267 | | *codepoint = c; |
268 | | return true; |
269 | | } |
270 | | else if (c <= 0xDBFF) { |
271 | | *codepoint = (c & 0x3FF) << 10; |
272 | | c = is.Take(); |
273 | | *codepoint |= (c & 0x3FF); |
274 | | *codepoint += 0x10000; |
275 | | return c >= 0xDC00 && c <= 0xDFFF; |
276 | | } |
277 | | return false; |
278 | | } |
279 | | |
280 | | template <typename InputStream, typename OutputStream> |
281 | | static bool Validate(InputStream& is, OutputStream& os) { |
282 | | RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 2); |
283 | | RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2); |
284 | | Ch c; |
285 | | os.Put(c = is.Take()); |
286 | | if (c < 0xD800 || c > 0xDFFF) |
287 | | return true; |
288 | | else if (c <= 0xDBFF) { |
289 | | os.Put(c = is.Take()); |
290 | | return c >= 0xDC00 && c <= 0xDFFF; |
291 | | } |
292 | | return false; |
293 | | } |
294 | | }; |
295 | | |
296 | | //! UTF-16 little endian encoding. |
297 | | template<typename CharType = wchar_t> |
298 | | struct UTF16LE : UTF16<CharType> { |
299 | | template <typename InputByteStream> |
300 | | static CharType TakeBOM(InputByteStream& is) { |
301 | | RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); |
302 | | CharType c = Take(is); |
303 | | return (unsigned short)c == 0xFEFFu ? Take(is) : c; |
304 | | } |
305 | | |
306 | | template <typename InputByteStream> |
307 | | static CharType Take(InputByteStream& is) { |
308 | | RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); |
309 | | CharType c = (unsigned char)is.Take(); |
310 | | c |= (unsigned char)is.Take() << 8; |
311 | | return c; |
312 | | } |
313 | | |
314 | | template <typename OutputByteStream> |
315 | | static void PutBOM(OutputByteStream& os) { |
316 | | RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); |
317 | | os.Put(0xFFu); os.Put(0xFEu); |
318 | | } |
319 | | |
320 | | template <typename OutputByteStream> |
321 | | static void Put(OutputByteStream& os, CharType c) { |
322 | | RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); |
323 | | os.Put(c & 0xFFu); |
324 | | os.Put((c >> 8) & 0xFFu); |
325 | | } |
326 | | }; |
327 | | |
328 | | //! UTF-16 big endian encoding. |
329 | | template<typename CharType = wchar_t> |
330 | | struct UTF16BE : UTF16<CharType> { |
331 | | template <typename InputByteStream> |
332 | | static CharType TakeBOM(InputByteStream& is) { |
333 | | RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); |
334 | | CharType c = Take(is); |
335 | | return (unsigned short)c == 0xFEFFu ? Take(is) : c; |
336 | | } |
337 | | |
338 | | template <typename InputByteStream> |
339 | | static CharType Take(InputByteStream& is) { |
340 | | RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); |
341 | | CharType c = (unsigned char)is.Take() << 8; |
342 | | c |= (unsigned char)is.Take(); |
343 | | return c; |
344 | | } |
345 | | |
346 | | template <typename OutputByteStream> |
347 | | static void PutBOM(OutputByteStream& os) { |
348 | | RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); |
349 | | os.Put(0xFEu); os.Put(0xFFu); |
350 | | } |
351 | | |
352 | | template <typename OutputByteStream> |
353 | | static void Put(OutputByteStream& os, CharType c) { |
354 | | RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); |
355 | | os.Put((c >> 8) & 0xFFu); |
356 | | os.Put(c & 0xFFu); |
357 | | } |
358 | | }; |
359 | | |
360 | | /////////////////////////////////////////////////////////////////////////////// |
361 | | // UTF32 |
362 | | |
363 | | //! UTF-32 encoding. |
364 | | /*! http://en.wikipedia.org/wiki/UTF-32 |
365 | | \tparam CharType Type for storing 32-bit UTF-32 data. Default is unsigned. C++11 may use char32_t instead. |
366 | | \note implements Encoding concept |
367 | | |
368 | | \note For in-memory access, no need to concern endianness. The code units and code points are represented by CPU's endianness. |
369 | | For streaming, use UTF32LE and UTF32BE, which handle endianness. |
370 | | */ |
371 | | template<typename CharType = unsigned> |
372 | | struct UTF32 { |
373 | | typedef CharType Ch; |
374 | | RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 4); |
375 | | |
376 | | enum { supportUnicode = 1 }; |
377 | | |
378 | | template<typename OutputStream> |
379 | | static void Encode(OutputStream& os, unsigned codepoint) { |
380 | | RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 4); |
381 | | RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); |
382 | | os.Put(codepoint); |
383 | | } |
384 | | |
385 | | template <typename InputStream> |
386 | | static bool Decode(InputStream& is, unsigned* codepoint) { |
387 | | RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 4); |
388 | | Ch c = is.Take(); |
389 | | *codepoint = c; |
390 | | return c <= 0x10FFFF; |
391 | | } |
392 | | |
393 | | template <typename InputStream, typename OutputStream> |
394 | | static bool Validate(InputStream& is, OutputStream& os) { |
395 | | RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 4); |
396 | | Ch c; |
397 | | os.Put(c = is.Take()); |
398 | | return c <= 0x10FFFF; |
399 | | } |
400 | | }; |
401 | | |
402 | | //! UTF-32 little endian enocoding. |
403 | | template<typename CharType = unsigned> |
404 | | struct UTF32LE : UTF32<CharType> { |
405 | | template <typename InputByteStream> |
406 | | static CharType TakeBOM(InputByteStream& is) { |
407 | | RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); |
408 | | CharType c = Take(is); |
409 | | return (unsigned)c == 0x0000FEFFu ? Take(is) : c; |
410 | | } |
411 | | |
412 | | template <typename InputByteStream> |
413 | | static CharType Take(InputByteStream& is) { |
414 | | RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); |
415 | | CharType c = (unsigned char)is.Take(); |
416 | | c |= (unsigned char)is.Take() << 8; |
417 | | c |= (unsigned char)is.Take() << 16; |
418 | | c |= (unsigned char)is.Take() << 24; |
419 | | return c; |
420 | | } |
421 | | |
422 | | template <typename OutputByteStream> |
423 | | static void PutBOM(OutputByteStream& os) { |
424 | | RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); |
425 | | os.Put(0xFFu); os.Put(0xFEu); os.Put(0x00u); os.Put(0x00u); |
426 | | } |
427 | | |
428 | | template <typename OutputByteStream> |
429 | | static void Put(OutputByteStream& os, CharType c) { |
430 | | RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); |
431 | | os.Put(c & 0xFFu); |
432 | | os.Put((c >> 8) & 0xFFu); |
433 | | os.Put((c >> 16) & 0xFFu); |
434 | | os.Put((c >> 24) & 0xFFu); |
435 | | } |
436 | | }; |
437 | | |
438 | | //! UTF-32 big endian encoding. |
439 | | template<typename CharType = unsigned> |
440 | | struct UTF32BE : UTF32<CharType> { |
441 | | template <typename InputByteStream> |
442 | | static CharType TakeBOM(InputByteStream& is) { |
443 | | RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); |
444 | | CharType c = Take(is); |
445 | | return (unsigned)c == 0x0000FEFFu ? Take(is) : c; |
446 | | } |
447 | | |
448 | | template <typename InputByteStream> |
449 | | static CharType Take(InputByteStream& is) { |
450 | | RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); |
451 | | CharType c = (unsigned char)is.Take() << 24; |
452 | | c |= (unsigned char)is.Take() << 16; |
453 | | c |= (unsigned char)is.Take() << 8; |
454 | | c |= (unsigned char)is.Take(); |
455 | | return c; |
456 | | } |
457 | | |
458 | | template <typename OutputByteStream> |
459 | | static void PutBOM(OutputByteStream& os) { |
460 | | RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); |
461 | | os.Put(0x00u); os.Put(0x00u); os.Put(0xFEu); os.Put(0xFFu); |
462 | | } |
463 | | |
464 | | template <typename OutputByteStream> |
465 | | static void Put(OutputByteStream& os, CharType c) { |
466 | | RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); |
467 | | os.Put((c >> 24) & 0xFFu); |
468 | | os.Put((c >> 16) & 0xFFu); |
469 | | os.Put((c >> 8) & 0xFFu); |
470 | | os.Put(c & 0xFFu); |
471 | | } |
472 | | }; |
473 | | |
474 | | /////////////////////////////////////////////////////////////////////////////// |
475 | | // ASCII |
476 | | |
477 | | //! ASCII encoding. |
478 | | /*! http://en.wikipedia.org/wiki/ASCII |
479 | | \tparam CharType Code unit for storing 7-bit ASCII data. Default is char. |
480 | | \note implements Encoding concept |
481 | | */ |
482 | | template<typename CharType = char> |
483 | | struct ASCII { |
484 | | typedef CharType Ch; |
485 | | |
486 | | enum { supportUnicode = 0 }; |
487 | | |
488 | | template<typename OutputStream> |
489 | | static void Encode(OutputStream& os, unsigned codepoint) { |
490 | | RAPIDJSON_ASSERT(codepoint <= 0x7F); |
491 | | os.Put(static_cast<Ch>(codepoint & 0xFF)); |
492 | | } |
493 | | |
494 | | template <typename InputStream> |
495 | | static bool Decode(InputStream& is, unsigned* codepoint) { |
496 | | unsigned char c = static_cast<unsigned char>(is.Take()); |
497 | | *codepoint = c; |
498 | | return c <= 0X7F; |
499 | | } |
500 | | |
501 | | template <typename InputStream, typename OutputStream> |
502 | | static bool Validate(InputStream& is, OutputStream& os) { |
503 | | unsigned char c = is.Take(); |
504 | | os.Put(c); |
505 | | return c <= 0x7F; |
506 | | } |
507 | | |
508 | | template <typename InputByteStream> |
509 | | static CharType TakeBOM(InputByteStream& is) { |
510 | | RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); |
511 | | Ch c = Take(is); |
512 | | return c; |
513 | | } |
514 | | |
515 | | template <typename InputByteStream> |
516 | | static Ch Take(InputByteStream& is) { |
517 | | RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); |
518 | | return is.Take(); |
519 | | } |
520 | | |
521 | | template <typename OutputByteStream> |
522 | | static void PutBOM(OutputByteStream& os) { |
523 | | RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); |
524 | | (void)os; |
525 | | } |
526 | | |
527 | | template <typename OutputByteStream> |
528 | | static void Put(OutputByteStream& os, Ch c) { |
529 | | RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); |
530 | | os.Put(static_cast<typename OutputByteStream::Ch>(c)); |
531 | | } |
532 | | }; |
533 | | |
534 | | /////////////////////////////////////////////////////////////////////////////// |
535 | | // AutoUTF |
536 | | |
537 | | //! Runtime-specified UTF encoding type of a stream. |
538 | | enum UTFType { |
539 | | kUTF8 = 0, //!< UTF-8. |
540 | | kUTF16LE = 1, //!< UTF-16 little endian. |
541 | | kUTF16BE = 2, //!< UTF-16 big endian. |
542 | | kUTF32LE = 3, //!< UTF-32 little endian. |
543 | | kUTF32BE = 4 //!< UTF-32 big endian. |
544 | | }; |
545 | | |
546 | | //! Dynamically select encoding according to stream's runtime-specified UTF encoding type. |
547 | | /*! \note This class can be used with AutoUTFInputtStream and AutoUTFOutputStream, which provides GetType(). |
548 | | */ |
549 | | template<typename CharType> |
550 | | struct AutoUTF { |
551 | | typedef CharType Ch; |
552 | | |
553 | | enum { supportUnicode = 1 }; |
554 | | |
555 | | #define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x |
556 | | |
557 | | template<typename OutputStream> |
558 | | RAPIDJSON_FORCEINLINE static void Encode(OutputStream& os, unsigned codepoint) { |
559 | | typedef void (*EncodeFunc)(OutputStream&, unsigned); |
560 | | static const EncodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Encode) }; |
561 | | (*f[os.GetType()])(os, codepoint); |
562 | | } |
563 | | |
564 | | template <typename InputStream> |
565 | | RAPIDJSON_FORCEINLINE static bool Decode(InputStream& is, unsigned* codepoint) { |
566 | | typedef bool (*DecodeFunc)(InputStream&, unsigned*); |
567 | | static const DecodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Decode) }; |
568 | | return (*f[is.GetType()])(is, codepoint); |
569 | | } |
570 | | |
571 | | template <typename InputStream, typename OutputStream> |
572 | | RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) { |
573 | | typedef bool (*ValidateFunc)(InputStream&, OutputStream&); |
574 | | static const ValidateFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Validate) }; |
575 | | return (*f[is.GetType()])(is, os); |
576 | | } |
577 | | |
578 | | #undef RAPIDJSON_ENCODINGS_FUNC |
579 | | }; |
580 | | |
581 | | /////////////////////////////////////////////////////////////////////////////// |
582 | | // Transcoder |
583 | | |
584 | | //! Encoding conversion. |
585 | | template<typename SourceEncoding, typename TargetEncoding> |
586 | | struct Transcoder { |
587 | | //! Take one Unicode codepoint from source encoding, convert it to target encoding and put it to the output stream. |
588 | | template<typename InputStream, typename OutputStream> |
589 | | RAPIDJSON_FORCEINLINE static bool Transcode(InputStream& is, OutputStream& os) { |
590 | | unsigned codepoint; |
591 | | if (!SourceEncoding::Decode(is, &codepoint)) |
592 | | return false; |
593 | | TargetEncoding::Encode(os, codepoint); |
594 | | return true; |
595 | | } |
596 | | |
597 | | //! Validate one Unicode codepoint from an encoded stream. |
598 | | template<typename InputStream, typename OutputStream> |
599 | | RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) { |
600 | | return Transcode(is, os); // Since source/target encoding is different, must transcode. |
601 | | } |
602 | | }; |
603 | | |
604 | | //! Specialization of Transcoder with same source and target encoding. |
605 | | template<typename Encoding> |
606 | | struct Transcoder<Encoding, Encoding> { |
607 | | template<typename InputStream, typename OutputStream> |
608 | 0 | RAPIDJSON_FORCEINLINE static bool Transcode(InputStream& is, OutputStream& os) { |
609 | 0 | os.Put(is.Take()); // Just copy one code unit. This semantic is different from primary template class. |
610 | 0 | return true; |
611 | 0 | } Unexecuted instantiation: bool butil::rapidjson::Transcoder<butil::rapidjson::UTF8<char>, butil::rapidjson::UTF8<char> >::Transcode<butil::rapidjson::GenericStringStream<butil::rapidjson::UTF8<char> >, butil::rapidjson::GenericReader<butil::rapidjson::UTF8<char>, butil::rapidjson::UTF8<char>, butil::rapidjson::CrtAllocator>::StackStream<char> >(butil::rapidjson::GenericStringStream<butil::rapidjson::UTF8<char> >&, butil::rapidjson::GenericReader<butil::rapidjson::UTF8<char>, butil::rapidjson::UTF8<char>, butil::rapidjson::CrtAllocator>::StackStream<char>&) Unexecuted instantiation: bool butil::rapidjson::Transcoder<butil::rapidjson::UTF8<char>, butil::rapidjson::UTF8<char> >::Transcode<json2pb::ZeroCopyStreamReader, butil::rapidjson::GenericReader<butil::rapidjson::UTF8<char>, butil::rapidjson::UTF8<char>, butil::rapidjson::CrtAllocator>::StackStream<char> >(json2pb::ZeroCopyStreamReader&, butil::rapidjson::GenericReader<butil::rapidjson::UTF8<char>, butil::rapidjson::UTF8<char>, butil::rapidjson::CrtAllocator>::StackStream<char>&) Unexecuted instantiation: bool butil::rapidjson::Transcoder<butil::rapidjson::UTF8<char>, butil::rapidjson::UTF8<char> >::Transcode<butil::rapidjson::GenericStringStream<butil::rapidjson::UTF8<char> >, butil::rapidjson::GenericStringStream<butil::rapidjson::UTF8<char> > >(butil::rapidjson::GenericStringStream<butil::rapidjson::UTF8<char> >&, butil::rapidjson::GenericStringStream<butil::rapidjson::UTF8<char> >&) Unexecuted instantiation: bool butil::rapidjson::Transcoder<butil::rapidjson::UTF8<char>, butil::rapidjson::UTF8<char> >::Transcode<json2pb::ZeroCopyStreamReader, json2pb::ZeroCopyStreamReader>(json2pb::ZeroCopyStreamReader&, json2pb::ZeroCopyStreamReader&) Unexecuted instantiation: bool butil::rapidjson::Transcoder<butil::rapidjson::UTF8<char>, butil::rapidjson::UTF8<char> >::Transcode<butil::rapidjson::GenericStringStream<butil::rapidjson::UTF8<char> >, butil::rapidjson::GenericStringBuffer<butil::rapidjson::UTF8<char>, butil::rapidjson::CrtAllocator> >(butil::rapidjson::GenericStringStream<butil::rapidjson::UTF8<char> >&, butil::rapidjson::GenericStringBuffer<butil::rapidjson::UTF8<char>, butil::rapidjson::CrtAllocator>&) Unexecuted instantiation: bool butil::rapidjson::Transcoder<butil::rapidjson::UTF8<char>, butil::rapidjson::UTF8<char> >::Transcode<butil::rapidjson::GenericStringStream<butil::rapidjson::UTF8<char> >, butil::rapidjson::GenericMemoryBuffer<butil::rapidjson::CrtAllocator> >(butil::rapidjson::GenericStringStream<butil::rapidjson::UTF8<char> >&, butil::rapidjson::GenericMemoryBuffer<butil::rapidjson::CrtAllocator>&) Unexecuted instantiation: bool butil::rapidjson::Transcoder<butil::rapidjson::UTF8<char>, butil::rapidjson::UTF8<char> >::Transcode<butil::rapidjson::GenericStringStream<butil::rapidjson::UTF8<char> >, json2pb::ZeroCopyStreamWriter>(butil::rapidjson::GenericStringStream<butil::rapidjson::UTF8<char> >&, json2pb::ZeroCopyStreamWriter&) |
612 | | |
613 | | template<typename InputStream, typename OutputStream> |
614 | 0 | RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) { |
615 | 0 | return Encoding::Validate(is, os); // source/target encoding are the same |
616 | 0 | } Unexecuted instantiation: bool butil::rapidjson::Transcoder<butil::rapidjson::UTF8<char>, butil::rapidjson::UTF8<char> >::Validate<butil::rapidjson::GenericStringStream<butil::rapidjson::UTF8<char> >, butil::rapidjson::GenericReader<butil::rapidjson::UTF8<char>, butil::rapidjson::UTF8<char>, butil::rapidjson::CrtAllocator>::StackStream<char> >(butil::rapidjson::GenericStringStream<butil::rapidjson::UTF8<char> >&, butil::rapidjson::GenericReader<butil::rapidjson::UTF8<char>, butil::rapidjson::UTF8<char>, butil::rapidjson::CrtAllocator>::StackStream<char>&) Unexecuted instantiation: bool butil::rapidjson::Transcoder<butil::rapidjson::UTF8<char>, butil::rapidjson::UTF8<char> >::Validate<json2pb::ZeroCopyStreamReader, butil::rapidjson::GenericReader<butil::rapidjson::UTF8<char>, butil::rapidjson::UTF8<char>, butil::rapidjson::CrtAllocator>::StackStream<char> >(json2pb::ZeroCopyStreamReader&, butil::rapidjson::GenericReader<butil::rapidjson::UTF8<char>, butil::rapidjson::UTF8<char>, butil::rapidjson::CrtAllocator>::StackStream<char>&) Unexecuted instantiation: bool butil::rapidjson::Transcoder<butil::rapidjson::UTF8<char>, butil::rapidjson::UTF8<char> >::Validate<butil::rapidjson::GenericStringStream<butil::rapidjson::UTF8<char> >, butil::rapidjson::GenericStringStream<butil::rapidjson::UTF8<char> > >(butil::rapidjson::GenericStringStream<butil::rapidjson::UTF8<char> >&, butil::rapidjson::GenericStringStream<butil::rapidjson::UTF8<char> >&) Unexecuted instantiation: bool butil::rapidjson::Transcoder<butil::rapidjson::UTF8<char>, butil::rapidjson::UTF8<char> >::Validate<json2pb::ZeroCopyStreamReader, json2pb::ZeroCopyStreamReader>(json2pb::ZeroCopyStreamReader&, json2pb::ZeroCopyStreamReader&) |
617 | | }; |
618 | | |
619 | | BUTIL_RAPIDJSON_NAMESPACE_END |
620 | | |
621 | | #if defined(__GNUC__) || defined(_MSV_VER) |
622 | | RAPIDJSON_DIAG_POP |
623 | | #endif |
624 | | |
625 | | #endif // RAPIDJSON_ENCODINGS_H_ |