Coverage Report

Created: 2026-01-09 06:13

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/rapidjson/include/rapidjson/encodedstream.h
Line
Count
Source
1
// Tencent is pleased to support the open source community by making RapidJSON available.
2
// 
3
// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
4
//
5
// Licensed under the MIT License (the "License"); you may not use this file except
6
// in compliance with the License. You may obtain a copy of the License at
7
//
8
// http://opensource.org/licenses/MIT
9
//
10
// Unless required by applicable law or agreed to in writing, software distributed 
11
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
12
// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
13
// specific language governing permissions and limitations under the License.
14
15
#ifndef RAPIDJSON_ENCODEDSTREAM_H_
16
#define RAPIDJSON_ENCODEDSTREAM_H_
17
18
#include "stream.h"
19
#include "memorystream.h"
20
21
#ifdef __GNUC__
22
RAPIDJSON_DIAG_PUSH
23
RAPIDJSON_DIAG_OFF(effc++)
24
#endif
25
26
#ifdef __clang__
27
RAPIDJSON_DIAG_PUSH
28
RAPIDJSON_DIAG_OFF(padded)
29
#endif
30
31
RAPIDJSON_NAMESPACE_BEGIN
32
33
//! Input byte stream wrapper with a statically bound encoding.
34
/*!
35
    \tparam Encoding The interpretation of encoding of the stream. Either UTF8, UTF16LE, UTF16BE, UTF32LE, UTF32BE.
36
    \tparam InputByteStream Type of input byte stream. For example, FileReadStream.
37
*/
38
template <typename Encoding, typename InputByteStream>
39
class EncodedInputStream {
40
    RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
41
public:
42
    typedef typename Encoding::Ch Ch;
43
44
    EncodedInputStream(InputByteStream& is) : is_(is) { 
45
        current_ = Encoding::TakeBOM(is_);
46
    }
47
48
    Ch Peek() const { return current_; }
49
    Ch Take() { Ch c = current_; current_ = Encoding::Take(is_); return c; }
50
    size_t Tell() const { return is_.Tell(); }
51
52
    // Not implemented
53
    void Put(Ch) { RAPIDJSON_ASSERT(false); }
54
    void Flush() { RAPIDJSON_ASSERT(false); } 
55
    Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
56
    size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
57
58
private:
59
    EncodedInputStream(const EncodedInputStream&);
60
    EncodedInputStream& operator=(const EncodedInputStream&);
61
62
    InputByteStream& is_;
63
    Ch current_;
64
};
65
66
//! Specialized for UTF8 MemoryStream.
67
template <>
68
class EncodedInputStream<UTF8<>, MemoryStream> {
69
public:
70
    typedef UTF8<>::Ch Ch;
71
72
0
    EncodedInputStream(MemoryStream& is) : is_(is) {
73
0
        if (static_cast<unsigned char>(is_.Peek()) == 0xEFu) is_.Take();
74
0
        if (static_cast<unsigned char>(is_.Peek()) == 0xBBu) is_.Take();
75
0
        if (static_cast<unsigned char>(is_.Peek()) == 0xBFu) is_.Take();
76
0
    }
77
0
    Ch Peek() const { return is_.Peek(); }
78
0
    Ch Take() { return is_.Take(); }
79
0
    size_t Tell() const { return is_.Tell(); }
80
81
    // Not implemented
82
0
    void Put(Ch) {}
83
0
    void Flush() {} 
84
0
    Ch* PutBegin() { return 0; }
85
0
    size_t PutEnd(Ch*) { return 0; }
86
87
    MemoryStream& is_;
88
89
private:
90
    EncodedInputStream(const EncodedInputStream&);
91
    EncodedInputStream& operator=(const EncodedInputStream&);
92
};
93
94
//! Output byte stream wrapper with statically bound encoding.
95
/*!
96
    \tparam Encoding The interpretation of encoding of the stream. Either UTF8, UTF16LE, UTF16BE, UTF32LE, UTF32BE.
97
    \tparam OutputByteStream Type of input byte stream. For example, FileWriteStream.
98
*/
99
template <typename Encoding, typename OutputByteStream>
100
class EncodedOutputStream {
101
    RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
102
public:
103
    typedef typename Encoding::Ch Ch;
104
105
    EncodedOutputStream(OutputByteStream& os, bool putBOM = true) : os_(os) { 
106
        if (putBOM)
107
            Encoding::PutBOM(os_);
108
    }
109
110
    void Put(Ch c) { Encoding::Put(os_, c);  }
111
    void Flush() { os_.Flush(); }
112
113
    // Not implemented
114
    Ch Peek() const { RAPIDJSON_ASSERT(false); return 0;}
115
    Ch Take() { RAPIDJSON_ASSERT(false); return 0;}
116
    size_t Tell() const { RAPIDJSON_ASSERT(false);  return 0; }
117
    Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
118
    size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
119
120
private:
121
    EncodedOutputStream(const EncodedOutputStream&);
122
    EncodedOutputStream& operator=(const EncodedOutputStream&);
123
124
    OutputByteStream& os_;
125
};
126
127
#define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x
128
129
//! Input stream wrapper with dynamically bound encoding and automatic encoding detection.
130
/*!
131
    \tparam CharType Type of character for reading.
132
    \tparam InputByteStream type of input byte stream to be wrapped.
133
*/
134
template <typename CharType, typename InputByteStream>
135
class AutoUTFInputStream {
136
    RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
137
public:
138
    typedef CharType Ch;
139
140
    //! Constructor.
141
    /*!
142
        \param is input stream to be wrapped.
143
        \param type UTF encoding type if it is not detected from the stream.
144
    */
145
    AutoUTFInputStream(InputByteStream& is, UTFType type = kUTF8) : is_(&is), type_(type), hasBOM_(false) {
146
        RAPIDJSON_ASSERT(type >= kUTF8 && type <= kUTF32BE);        
147
        DetectType();
148
        static const TakeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Take) };
149
        takeFunc_ = f[type_];
150
        current_ = takeFunc_(*is_);
151
    }
152
153
    UTFType GetType() const { return type_; }
154
    bool HasBOM() const { return hasBOM_; }
155
156
    Ch Peek() const { return current_; }
157
    Ch Take() { Ch c = current_; current_ = takeFunc_(*is_); return c; }
158
    size_t Tell() const { return is_->Tell(); }
159
160
    // Not implemented
161
    void Put(Ch) { RAPIDJSON_ASSERT(false); }
162
    void Flush() { RAPIDJSON_ASSERT(false); } 
163
    Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
164
    size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
165
166
private:
167
    AutoUTFInputStream(const AutoUTFInputStream&);
168
    AutoUTFInputStream& operator=(const AutoUTFInputStream&);
169
170
    // Detect encoding type with BOM or RFC 4627
171
    void DetectType() {
172
        // BOM (Byte Order Mark):
173
        // 00 00 FE FF  UTF-32BE
174
        // FF FE 00 00  UTF-32LE
175
        // FE FF        UTF-16BE
176
        // FF FE        UTF-16LE
177
        // EF BB BF     UTF-8
178
179
        const unsigned char* c = reinterpret_cast<const unsigned char *>(is_->Peek4());
180
        if (!c)
181
            return;
182
183
        unsigned bom = static_cast<unsigned>(c[0] | (c[1] << 8) | (c[2] << 16) | (c[3] << 24));
184
        hasBOM_ = false;
185
        if (bom == 0xFFFE0000)                  { type_ = kUTF32BE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); }
186
        else if (bom == 0x0000FEFF)             { type_ = kUTF32LE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); }
187
        else if ((bom & 0xFFFF) == 0xFFFE)      { type_ = kUTF16BE; hasBOM_ = true; is_->Take(); is_->Take();                           }
188
        else if ((bom & 0xFFFF) == 0xFEFF)      { type_ = kUTF16LE; hasBOM_ = true; is_->Take(); is_->Take();                           }
189
        else if ((bom & 0xFFFFFF) == 0xBFBBEF)  { type_ = kUTF8;    hasBOM_ = true; is_->Take(); is_->Take(); is_->Take();              }
190
191
        // RFC 4627: Section 3
192
        // "Since the first two characters of a JSON text will always be ASCII
193
        // characters [RFC0020], it is possible to determine whether an octet
194
        // stream is UTF-8, UTF-16 (BE or LE), or UTF-32 (BE or LE) by looking
195
        // at the pattern of nulls in the first four octets."
196
        // 00 00 00 xx  UTF-32BE
197
        // 00 xx 00 xx  UTF-16BE
198
        // xx 00 00 00  UTF-32LE
199
        // xx 00 xx 00  UTF-16LE
200
        // xx xx xx xx  UTF-8
201
202
        if (!hasBOM_) {
203
            int pattern = (c[0] ? 1 : 0) | (c[1] ? 2 : 0) | (c[2] ? 4 : 0) | (c[3] ? 8 : 0);
204
            switch (pattern) {
205
            case 0x08: type_ = kUTF32BE; break;
206
            case 0x0A: type_ = kUTF16BE; break;
207
            case 0x01: type_ = kUTF32LE; break;
208
            case 0x05: type_ = kUTF16LE; break;
209
            case 0x0F: type_ = kUTF8;    break;
210
            default: break; // Use type defined by user.
211
            }
212
        }
213
214
        // Runtime check whether the size of character type is sufficient. It only perform checks with assertion.
215
        if (type_ == kUTF16LE || type_ == kUTF16BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 2);
216
        if (type_ == kUTF32LE || type_ == kUTF32BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 4);
217
    }
218
219
    typedef Ch (*TakeFunc)(InputByteStream& is);
220
    InputByteStream* is_;
221
    UTFType type_;
222
    Ch current_;
223
    TakeFunc takeFunc_;
224
    bool hasBOM_;
225
};
226
227
//! Output stream wrapper with dynamically bound encoding and automatic encoding detection.
228
/*!
229
    \tparam CharType Type of character for writing.
230
    \tparam OutputByteStream type of output byte stream to be wrapped.
231
*/
232
template <typename CharType, typename OutputByteStream>
233
class AutoUTFOutputStream {
234
    RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
235
public:
236
    typedef CharType Ch;
237
238
    //! Constructor.
239
    /*!
240
        \param os output stream to be wrapped.
241
        \param type UTF encoding type.
242
        \param putBOM Whether to write BOM at the beginning of the stream.
243
    */
244
    AutoUTFOutputStream(OutputByteStream& os, UTFType type, bool putBOM) : os_(&os), type_(type) {
245
        RAPIDJSON_ASSERT(type >= kUTF8 && type <= kUTF32BE);
246
247
        // Runtime check whether the size of character type is sufficient. It only perform checks with assertion.
248
        if (type_ == kUTF16LE || type_ == kUTF16BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 2);
249
        if (type_ == kUTF32LE || type_ == kUTF32BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 4);
250
251
        static const PutFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Put) };
252
        putFunc_ = f[type_];
253
254
        if (putBOM)
255
            PutBOM();
256
    }
257
258
    UTFType GetType() const { return type_; }
259
260
    void Put(Ch c) { putFunc_(*os_, c); }
261
    void Flush() { os_->Flush(); } 
262
263
    // Not implemented
264
    Ch Peek() const { RAPIDJSON_ASSERT(false); return 0;}
265
    Ch Take() { RAPIDJSON_ASSERT(false); return 0;}
266
    size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; }
267
    Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
268
    size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
269
270
private:
271
    AutoUTFOutputStream(const AutoUTFOutputStream&);
272
    AutoUTFOutputStream& operator=(const AutoUTFOutputStream&);
273
274
    void PutBOM() { 
275
        typedef void (*PutBOMFunc)(OutputByteStream&);
276
        static const PutBOMFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(PutBOM) };
277
        f[type_](*os_);
278
    }
279
280
    typedef void (*PutFunc)(OutputByteStream&, Ch);
281
282
    OutputByteStream* os_;
283
    UTFType type_;
284
    PutFunc putFunc_;
285
};
286
287
#undef RAPIDJSON_ENCODINGS_FUNC
288
289
RAPIDJSON_NAMESPACE_END
290
291
#ifdef __clang__
292
RAPIDJSON_DIAG_POP
293
#endif
294
295
#ifdef __GNUC__
296
RAPIDJSON_DIAG_POP
297
#endif
298
299
#endif // RAPIDJSON_FILESTREAM_H_