Coverage Report

Created: 2025-12-03 08:24

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/gdal/port/cpl_json_streaming_parser.cpp
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Project:  CPL - Common Portability Library
4
 * Purpose:  JSon streaming parser
5
 * Author:   Even Rouault, even.rouault at spatialys.com
6
 *
7
 ******************************************************************************
8
 * Copyright (c) 2017, Even Rouault <even.rouault at spatialys.com>
9
 *
10
 * SPDX-License-Identifier: MIT
11
 ****************************************************************************/
12
13
/*! @cond Doxygen_Suppress */
14
15
#include <assert.h>
16
#include <ctype.h>   // isdigit...
17
#include <stdio.h>   // snprintf
18
#include <string.h>  // strlen
19
#include <vector>
20
#include <string>
21
22
#include "cpl_conv.h"
23
#include "cpl_string.h"
24
#include "cpl_json_streaming_parser.h"
25
26
/************************************************************************/
27
/*                       CPLJSonStreamingParser()                       */
28
/************************************************************************/
29
30
CPLJSonStreamingParser::CPLJSonStreamingParser()
31
26.2k
{
32
26.2k
    m_aState.push_back(INIT);
33
26.2k
}
34
35
/************************************************************************/
36
/*                      ~CPLJSonStreamingParser()                       */
37
/************************************************************************/
38
39
CPLJSonStreamingParser::~CPLJSonStreamingParser()
40
26.2k
{
41
26.2k
}
42
43
/************************************************************************/
44
/*                           SetMaxDepth()                              */
45
/************************************************************************/
46
47
void CPLJSonStreamingParser::SetMaxDepth(size_t nVal)
48
0
{
49
0
    m_nMaxDepth = nVal;
50
0
}
51
52
/************************************************************************/
53
/*                         SetMaxStringSize()                           */
54
/************************************************************************/
55
56
void CPLJSonStreamingParser::SetMaxStringSize(size_t nVal)
57
0
{
58
0
    m_nMaxStringSize = nVal;
59
0
}
60
61
/************************************************************************/
62
/*                                Reset()                               */
63
/************************************************************************/
64
65
void CPLJSonStreamingParser::Reset()
66
0
{
67
0
    m_bExceptionOccurred = false;
68
0
    m_bElementFound = false;
69
0
    m_nLastChar = 0;
70
0
    m_nLineCounter = 1;
71
0
    m_nCharCounter = 1;
72
0
    m_aState.clear();
73
0
    m_aState.push_back(INIT);
74
0
    m_osToken.clear();
75
0
    m_abArrayState.clear();
76
0
    m_aeObjectState.clear();
77
0
    m_bInStringEscape = false;
78
0
    m_bInUnicode = false;
79
0
    m_osUnicodeHex.clear();
80
0
}
81
82
/************************************************************************/
83
/*                              AdvanceChar()                           */
84
/************************************************************************/
85
86
void CPLJSonStreamingParser::AdvanceChar(const char *&pStr, size_t &nLength)
87
81.0M
{
88
81.0M
    if (*pStr == 13 && m_nLastChar != 10)
89
395k
    {
90
395k
        m_nLineCounter++;
91
395k
        m_nCharCounter = 0;
92
395k
    }
93
80.6M
    else if (*pStr == 10 && m_nLastChar != 13)
94
2.74M
    {
95
2.74M
        m_nLineCounter++;
96
2.74M
        m_nCharCounter = 0;
97
2.74M
    }
98
81.0M
    m_nLastChar = *pStr;
99
100
81.0M
    pStr++;
101
81.0M
    nLength--;
102
81.0M
    m_nCharCounter++;
103
81.0M
}
104
105
/************************************************************************/
106
/*                               SkipSpace()                            */
107
/************************************************************************/
108
109
void CPLJSonStreamingParser::SkipSpace(const char *&pStr, size_t &nLength)
110
12.7M
{
111
29.6M
    while (nLength > 0 && isspace(static_cast<unsigned char>(*pStr)))
112
16.9M
    {
113
16.9M
        AdvanceChar(pStr, nLength);
114
16.9M
    }
115
12.7M
}
116
117
/************************************************************************/
118
/*                             EmitException()                          */
119
/************************************************************************/
120
121
bool CPLJSonStreamingParser::EmitException(const char *pszMessage)
122
10.5k
{
123
10.5k
    m_bExceptionOccurred = true;
124
10.5k
    CPLString osMsg;
125
10.5k
    osMsg.Printf("At line %d, character %d: %s", m_nLineCounter, m_nCharCounter,
126
10.5k
                 pszMessage);
127
10.5k
    Exception(osMsg.c_str());
128
10.5k
    return false;
129
10.5k
}
130
131
/************************************************************************/
132
/*                             StopParsing()                            */
133
/************************************************************************/
134
135
void CPLJSonStreamingParser::StopParsing()
136
14.4k
{
137
14.4k
    m_bStopParsing = true;
138
14.4k
}
139
140
/************************************************************************/
141
/*                          EmitUnexpectedChar()                        */
142
/************************************************************************/
143
144
bool CPLJSonStreamingParser::EmitUnexpectedChar(char ch,
145
                                                const char *pszExpecting)
146
6.15k
{
147
6.15k
    char szMessage[64];
148
6.15k
    if (pszExpecting)
149
1.27k
    {
150
1.27k
        snprintf(szMessage, sizeof(szMessage),
151
1.27k
                 "Unexpected character (%c). Expecting %s", ch, pszExpecting);
152
1.27k
    }
153
4.88k
    else
154
4.88k
    {
155
4.88k
        snprintf(szMessage, sizeof(szMessage), "Unexpected character (%c)", ch);
156
4.88k
    }
157
6.15k
    return EmitException(szMessage);
158
6.15k
}
159
160
/************************************************************************/
161
/*                            IsValidNewToken()                         */
162
/************************************************************************/
163
164
static bool IsValidNewToken(char ch)
165
4.69M
{
166
4.69M
    return ch == '[' || ch == '{' || ch == '"' || ch == '-' || ch == '.' ||
167
605k
           isdigit(static_cast<unsigned char>(ch)) || ch == 't' || ch == 'f' ||
168
337k
           ch == 'n' || ch == 'i' || ch == 'I' || ch == 'N';
169
4.69M
}
170
171
/************************************************************************/
172
/*                             StartNewToken()                          */
173
/************************************************************************/
174
175
bool CPLJSonStreamingParser::StartNewToken(const char *&pStr, size_t &nLength)
176
4.69M
{
177
4.69M
    char ch = *pStr;
178
4.69M
    if (ch == '{')
179
706k
    {
180
706k
        if (m_aState.size() == m_nMaxDepth)
181
11
        {
182
11
            return EmitException("Too many nested objects and/or arrays");
183
11
        }
184
706k
        StartObject();
185
706k
        m_aeObjectState.push_back(WAITING_KEY);
186
706k
        m_aState.push_back(OBJECT);
187
706k
        AdvanceChar(pStr, nLength);
188
706k
    }
189
3.98M
    else if (ch == '"')
190
2.83M
    {
191
2.83M
        m_aState.push_back(STRING);
192
2.83M
        AdvanceChar(pStr, nLength);
193
2.83M
    }
194
1.15M
    else if (ch == '[')
195
490k
    {
196
490k
        if (m_aState.size() == m_nMaxDepth)
197
10
        {
198
10
            return EmitException("Too many nested objects and/or arrays");
199
10
        }
200
490k
        StartArray();
201
490k
        m_abArrayState.push_back(ArrayState::INIT);
202
490k
        m_aState.push_back(ARRAY);
203
490k
        AdvanceChar(pStr, nLength);
204
490k
    }
205
664k
    else if (ch == '-' || ch == '.' ||
206
600k
             isdigit(static_cast<unsigned char>(ch)) || ch == 'i' ||
207
401k
             ch == 'I' || ch == 'N')
208
263k
    {
209
263k
        m_aState.push_back(NUMBER);
210
263k
    }
211
400k
    else if (ch == 't')
212
67.3k
    {
213
67.3k
        m_aState.push_back(STATE_TRUE);
214
67.3k
    }
215
333k
    else if (ch == 'f')
216
684
    {
217
684
        m_aState.push_back(STATE_FALSE);
218
684
    }
219
332k
    else if (ch == 'n')
220
332k
    {
221
332k
        m_aState.push_back(STATE_NULL); /* might be nan */
222
332k
    }
223
0
    else
224
0
    {
225
0
        assert(false);
226
0
    }
227
4.69M
    return true;
228
4.69M
}
229
230
/************************************************************************/
231
/*                       CheckAndEmitTrueFalseOrNull()                  */
232
/************************************************************************/
233
234
bool CPLJSonStreamingParser::CheckAndEmitTrueFalseOrNull(char ch)
235
400k
{
236
400k
    State eCurState = currentState();
237
238
400k
    if (eCurState == STATE_TRUE)
239
67.1k
    {
240
67.1k
        if (m_osToken == "true")
241
67.0k
        {
242
67.0k
            Boolean(true);
243
67.0k
        }
244
101
        else
245
101
        {
246
101
            return EmitUnexpectedChar(ch);
247
101
        }
248
67.1k
    }
249
333k
    else if (eCurState == STATE_FALSE)
250
524
    {
251
524
        if (m_osToken == "false")
252
445
        {
253
445
            Boolean(false);
254
445
        }
255
79
        else
256
79
        {
257
79
            return EmitUnexpectedChar(ch);
258
79
        }
259
524
    }
260
332k
    else /* if( eCurState == STATE_NULL ) */
261
332k
    {
262
332k
        if (m_osToken == "null")
263
332k
        {
264
332k
            Null();
265
332k
        }
266
63
        else
267
63
        {
268
63
            return EmitUnexpectedChar(ch);
269
63
        }
270
332k
    }
271
400k
    m_aState.pop_back();
272
400k
    m_osToken.clear();
273
400k
    return true;
274
400k
}
275
276
/************************************************************************/
277
/*                           CheckStackEmpty()                          */
278
/************************************************************************/
279
280
bool CPLJSonStreamingParser::CheckStackEmpty()
281
235
{
282
235
    if (!m_aeObjectState.empty())
283
235
    {
284
235
        return EmitException("Unterminated object");
285
235
    }
286
0
    else if (!m_abArrayState.empty())
287
0
    {
288
0
        return EmitException("Unterminated array");
289
0
    }
290
0
    return true;
291
235
}
292
293
/************************************************************************/
294
/*                           IsHighSurrogate()                          */
295
/************************************************************************/
296
297
static bool IsHighSurrogate(unsigned uc)
298
348k
{
299
348k
    return (uc & 0xFC00) == 0xD800;
300
348k
}
301
302
/************************************************************************/
303
/*                           IsLowSurrogate()                           */
304
/************************************************************************/
305
306
static bool IsLowSurrogate(unsigned uc)
307
156k
{
308
156k
    return (uc & 0xFC00) == 0xDC00;
309
156k
}
310
311
/************************************************************************/
312
/*                         GetSurrogatePair()                           */
313
/************************************************************************/
314
315
static unsigned GetSurrogatePair(unsigned hi, unsigned lo)
316
1.06k
{
317
1.06k
    return ((hi & 0x3FF) << 10) + (lo & 0x3FF) + 0x10000;
318
1.06k
}
319
320
/************************************************************************/
321
/*                            IsHexDigit()                              */
322
/************************************************************************/
323
324
static bool IsHexDigit(char ch)
325
1.00M
{
326
1.00M
    return (ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') ||
327
284k
           (ch >= 'A' && ch <= 'F');
328
1.00M
}
329
330
/************************************************************************/
331
/*                           HexToDecimal()                             */
332
/************************************************************************/
333
334
static unsigned HexToDecimal(char ch)
335
1.88M
{
336
1.88M
    if (ch >= '0' && ch <= '9')
337
1.04M
        return ch - '0';
338
832k
    if (ch >= 'a' && ch <= 'f')
339
305k
        return 10 + ch - 'a';
340
    // if (ch >= 'A' && ch <= 'F' )
341
527k
    return 10 + ch - 'A';
342
832k
}
343
344
/************************************************************************/
345
/*                            getUCSChar()                              */
346
/************************************************************************/
347
348
static unsigned getUCSChar(const std::string &unicode4HexChar)
349
470k
{
350
470k
    return (HexToDecimal(unicode4HexChar[0]) << 12) |
351
470k
           (HexToDecimal(unicode4HexChar[1]) << 8) |
352
470k
           (HexToDecimal(unicode4HexChar[2]) << 4) |
353
470k
           (HexToDecimal(unicode4HexChar[3]));
354
470k
}
355
356
/************************************************************************/
357
/*                           DecodeUnicode()                            */
358
/************************************************************************/
359
360
void CPLJSonStreamingParser::DecodeUnicode()
361
220k
{
362
220k
    constexpr char szReplacementUTF8[] = "\xEF\xBF\xBD";
363
220k
    unsigned nUCSChar;
364
220k
    if (m_osUnicodeHex.size() == 8)
365
29.4k
    {
366
29.4k
        unsigned nUCSHigh = getUCSChar(m_osUnicodeHex);
367
29.4k
        assert(IsHighSurrogate(nUCSHigh));
368
29.4k
        unsigned nUCSLow = getUCSChar(m_osUnicodeHex.substr(4));
369
29.4k
        if (IsLowSurrogate(nUCSLow))
370
1.06k
        {
371
1.06k
            nUCSChar = GetSurrogatePair(nUCSHigh, nUCSLow);
372
1.06k
        }
373
28.4k
        else
374
28.4k
        {
375
            /* Invalid code point. Insert the replacement char */
376
28.4k
            nUCSChar = 0xFFFFFFFFU;
377
28.4k
        }
378
29.4k
    }
379
190k
    else
380
190k
    {
381
190k
        assert(m_osUnicodeHex.size() == 4);
382
190k
        nUCSChar = getUCSChar(m_osUnicodeHex);
383
190k
    }
384
385
220k
    if (nUCSChar < 0x80)
386
92.0k
    {
387
92.0k
        m_osToken += static_cast<char>(nUCSChar);
388
92.0k
    }
389
128k
    else if (nUCSChar < 0x800)
390
1.07k
    {
391
1.07k
        m_osToken += static_cast<char>(0xC0 | (nUCSChar >> 6));
392
1.07k
        m_osToken += static_cast<char>(0x80 | (nUCSChar & 0x3F));
393
1.07k
    }
394
127k
    else if (IsLowSurrogate(nUCSChar) || IsHighSurrogate(nUCSChar))
395
62.1k
    {
396
        /* Invalid code point. Insert the replacement char */
397
62.1k
        m_osToken += szReplacementUTF8;
398
62.1k
    }
399
65.1k
    else if (nUCSChar < 0x10000)
400
35.6k
    {
401
35.6k
        m_osToken += static_cast<char>(0xE0 | (nUCSChar >> 12));
402
35.6k
        m_osToken += static_cast<char>(0x80 | ((nUCSChar >> 6) & 0x3F));
403
35.6k
        m_osToken += static_cast<char>(0x80 | (nUCSChar & 0x3F));
404
35.6k
    }
405
29.4k
    else if (nUCSChar < 0x110000)
406
1.06k
    {
407
1.06k
        m_osToken += static_cast<char>(0xF0 | ((nUCSChar >> 18) & 0x07));
408
1.06k
        m_osToken += static_cast<char>(0x80 | ((nUCSChar >> 12) & 0x3F));
409
1.06k
        m_osToken += static_cast<char>(0x80 | ((nUCSChar >> 6) & 0x3F));
410
1.06k
        m_osToken += static_cast<char>(0x80 | (nUCSChar & 0x3F));
411
1.06k
    }
412
28.4k
    else
413
28.4k
    {
414
        /* Invalid code point. Insert the replacement char */
415
28.4k
        m_osToken += szReplacementUTF8;
416
28.4k
    }
417
418
220k
    m_bInUnicode = false;
419
220k
    m_osUnicodeHex.clear();
420
220k
}
421
422
/************************************************************************/
423
/*                              Parse()                                 */
424
/************************************************************************/
425
426
bool CPLJSonStreamingParser::Parse(std::string_view sStr, bool bFinished)
427
29.2k
{
428
29.2k
    const char *pStr = sStr.data();
429
29.2k
    size_t nLength = sStr.size();
430
12.7M
    while (true)
431
12.7M
    {
432
12.7M
        if (m_bExceptionOccurred || m_bStopParsing)
433
14.6k
            return false;
434
12.7M
        State eCurState = currentState();
435
12.7M
        if (eCurState == INIT)
436
27.9k
        {
437
27.9k
            SkipSpace(pStr, nLength);
438
27.9k
            if (nLength == 0)
439
1.42k
                return true;
440
26.5k
            if (m_bElementFound || !IsValidNewToken(*pStr))
441
287
            {
442
287
                return EmitUnexpectedChar(*pStr);
443
287
            }
444
26.2k
            if (!StartNewToken(pStr, nLength))
445
0
            {
446
0
                return false;
447
0
            }
448
26.2k
            m_bElementFound = true;
449
26.2k
        }
450
12.6M
        else if (eCurState == NUMBER)
451
263k
        {
452
263k
            if (m_osToken.empty())
453
263k
            {
454
                // Optimization to avoid using temporary buffer
455
263k
                auto nPos =
456
263k
                    std::string_view(pStr, nLength).find_first_of(" \t\r\n,}]");
457
263k
                if (nPos != std::string::npos)
458
263k
                {
459
263k
                    Number(std::string_view(pStr, nPos));
460
263k
                    m_aState.pop_back();
461
263k
                    pStr += nPos;
462
263k
                    nLength -= nPos;
463
263k
                    SkipSpace(pStr, nLength);
464
263k
                    continue;
465
263k
                }
466
263k
            }
467
468
7.34k
            while (nLength)
469
7.16k
            {
470
7.16k
                char ch = *pStr;
471
7.16k
                if (ch == '+' || ch == '-' ||
472
7.01k
                    isdigit(static_cast<unsigned char>(ch)) || ch == '.' ||
473
614
                    ch == 'e' || ch == 'E')
474
6.60k
                {
475
6.60k
                    if (m_osToken.size() == 1024)
476
1
                    {
477
1
                        return EmitException("Too many characters in number");
478
1
                    }
479
6.60k
                    m_osToken += ch;
480
6.60k
                }
481
560
                else if (isspace(static_cast<unsigned char>(ch)) || ch == ',' ||
482
483
                         ch == '}' || ch == ']')
483
83
                {
484
83
                    SkipSpace(pStr, nLength);
485
83
                    break;
486
83
                }
487
477
                else
488
477
                {
489
477
                    CPLString extendedToken(m_osToken + ch);
490
477
                    if ((STARTS_WITH_CI("Infinity", extendedToken) &&
491
149
                         m_osToken.size() + 1 <= strlen("Infinity")) ||
492
328
                        (STARTS_WITH_CI("-Infinity", extendedToken) &&
493
21
                         m_osToken.size() + 1 <= strlen("-Infinity")) ||
494
311
                        (STARTS_WITH_CI("NaN", extendedToken) &&
495
86
                         m_osToken.size() + 1 <= strlen("NaN")))
496
252
                    {
497
252
                        m_osToken += ch;
498
252
                    }
499
225
                    else
500
225
                    {
501
225
                        return EmitUnexpectedChar(ch);
502
225
                    }
503
477
                }
504
6.85k
                AdvanceChar(pStr, nLength);
505
6.85k
            }
506
507
270
            if (nLength != 0 || bFinished)
508
207
            {
509
207
                const char firstCh = m_osToken[0];
510
207
                if (firstCh == 'i' || firstCh == 'I')
511
19
                {
512
19
                    if (!EQUAL(m_osToken.c_str(), "Infinity"))
513
11
                    {
514
11
                        return EmitException("Invalid number");
515
11
                    }
516
19
                }
517
188
                else if (firstCh == '-')
518
46
                {
519
46
                    if (m_osToken[1] == 'i' || m_osToken[1] == 'I')
520
1
                    {
521
1
                        if (!EQUAL(m_osToken.c_str(), "-Infinity"))
522
1
                        {
523
1
                            return EmitException("Invalid number");
524
1
                        }
525
1
                    }
526
46
                }
527
142
                else if (firstCh == 'n' || firstCh == 'N')
528
9
                {
529
9
                    if (m_osToken[1] == 'a' || m_osToken[1] == 'A')
530
9
                    {
531
9
                        if (!EQUAL(m_osToken.c_str(), "NaN"))
532
4
                        {
533
4
                            return EmitException("Invalid number");
534
4
                        }
535
9
                    }
536
9
                }
537
538
191
                Number(m_osToken);
539
191
                m_osToken.clear();
540
191
                m_aState.pop_back();
541
191
            }
542
543
254
            if (nLength == 0)
544
178
            {
545
178
                if (bFinished)
546
115
                {
547
115
                    return CheckStackEmpty();
548
115
                }
549
63
                return true;
550
178
            }
551
254
        }
552
12.4M
        else if (eCurState == STRING)
553
2.83M
        {
554
2.83M
            bool bEOS = false;
555
556
2.83M
            if (m_osToken.empty() && !m_bInStringEscape && !m_bInUnicode)
557
2.83M
            {
558
                // Optimization to avoid using temporary buffer
559
2.83M
                auto nPos =
560
2.83M
                    std::string_view(pStr, nLength).find_first_of("\"\\");
561
2.83M
                if (nPos != std::string::npos && pStr[nPos] == '"')
562
2.73M
                {
563
2.73M
                    if (nPos > m_nMaxStringSize)
564
0
                    {
565
0
                        return EmitException("Too many characters in number");
566
0
                    }
567
2.73M
                    if (!m_aeObjectState.empty() &&
568
2.73M
                        m_aeObjectState.back() == IN_KEY)
569
1.88M
                    {
570
1.88M
                        StartObjectMember(std::string_view(pStr, nPos));
571
1.88M
                    }
572
854k
                    else
573
854k
                    {
574
854k
                        String(std::string_view(pStr, nPos));
575
854k
                    }
576
2.73M
                    m_aState.pop_back();
577
2.73M
                    pStr += nPos + 1;
578
2.73M
                    nLength -= nPos + 1;
579
2.73M
                    SkipSpace(pStr, nLength);
580
2.73M
                    if (nLength != 0)
581
2.73M
                        continue;
582
151
                    bEOS = true;
583
151
                }
584
2.83M
            }
585
586
53.8M
            while (nLength)
587
53.8M
            {
588
53.8M
                if (m_osToken.size() == m_nMaxStringSize)
589
0
                {
590
0
                    return EmitException("Too many characters in number");
591
0
                }
592
593
53.8M
                char ch = *pStr;
594
53.8M
                if (m_bInUnicode)
595
1.12M
                {
596
1.12M
                    if (m_osUnicodeHex.size() == 8)
597
29.4k
                    {
598
29.4k
                        DecodeUnicode();
599
29.4k
                    }
600
1.09M
                    else if (m_osUnicodeHex.size() == 4)
601
128k
                    {
602
                        /* Start of next surrogate pair ? */
603
128k
                        if (m_nLastChar == '\\')
604
36.0k
                        {
605
36.0k
                            if (ch == 'u')
606
29.5k
                            {
607
29.5k
                                AdvanceChar(pStr, nLength);
608
29.5k
                                continue;
609
29.5k
                            }
610
6.47k
                            else
611
6.47k
                            {
612
                                /* will be replacement character */
613
6.47k
                                DecodeUnicode();
614
6.47k
                                m_bInStringEscape = true;
615
6.47k
                            }
616
36.0k
                        }
617
92.9k
                        else if (m_nLastChar == 'u')
618
29.5k
                        {
619
29.5k
                            if (IsHexDigit(ch))
620
29.4k
                            {
621
29.4k
                                m_osUnicodeHex += ch;
622
29.4k
                            }
623
65
                            else
624
65
                            {
625
65
                                char szMessage[64];
626
65
                                snprintf(szMessage, sizeof(szMessage),
627
65
                                         "Illegal character in unicode "
628
65
                                         "sequence (\\%c)",
629
65
                                         ch);
630
65
                                return EmitException(szMessage);
631
65
                            }
632
29.4k
                            AdvanceChar(pStr, nLength);
633
29.4k
                            continue;
634
29.5k
                        }
635
63.3k
                        else if (ch == '\\')
636
36.0k
                        {
637
36.0k
                            AdvanceChar(pStr, nLength);
638
36.0k
                            continue;
639
36.0k
                        }
640
27.2k
                        else
641
27.2k
                        {
642
                            /* will be replacement character */
643
27.2k
                            DecodeUnicode();
644
27.2k
                        }
645
128k
                    }
646
971k
                    else
647
971k
                    {
648
971k
                        if (IsHexDigit(ch))
649
970k
                        {
650
970k
                            m_osUnicodeHex += ch;
651
970k
                            if (m_osUnicodeHex.size() == 4 &&
652
220k
                                !IsHighSurrogate(getUCSChar(m_osUnicodeHex)))
653
157k
                            {
654
157k
                                DecodeUnicode();
655
157k
                            }
656
970k
                        }
657
237
                        else
658
237
                        {
659
237
                            char szMessage[64];
660
237
                            snprintf(szMessage, sizeof(szMessage),
661
237
                                     "Illegal character in unicode "
662
237
                                     "sequence (\\%c)",
663
237
                                     ch);
664
237
                            return EmitException(szMessage);
665
237
                        }
666
970k
                        AdvanceChar(pStr, nLength);
667
970k
                        continue;
668
971k
                    }
669
1.12M
                }
670
671
52.8M
                if (m_bInStringEscape)
672
535k
                {
673
535k
                    if (ch == '"' || ch == '\\' || ch == '/')
674
116k
                        m_osToken += ch;
675
419k
                    else if (ch == 'b')
676
9.72k
                        m_osToken += '\b';
677
409k
                    else if (ch == 'f')
678
5.59k
                        m_osToken += '\f';
679
403k
                    else if (ch == 'n')
680
138k
                        m_osToken += '\n';
681
265k
                    else if (ch == 'r')
682
27.8k
                        m_osToken += '\r';
683
237k
                    else if (ch == 't')
684
16.7k
                        m_osToken += '\t';
685
221k
                    else if (ch == 'u')
686
220k
                    {
687
220k
                        m_bInUnicode = true;
688
220k
                    }
689
294
                    else
690
294
                    {
691
294
                        char szMessage[32];
692
294
                        snprintf(szMessage, sizeof(szMessage),
693
294
                                 "Illegal escape sequence (\\%c)", ch);
694
294
                        return EmitException(szMessage);
695
294
                    }
696
535k
                    m_bInStringEscape = false;
697
535k
                    AdvanceChar(pStr, nLength);
698
535k
                    continue;
699
535k
                }
700
52.2M
                else if (ch == '\\')
701
529k
                {
702
529k
                    m_bInStringEscape = true;
703
529k
                    AdvanceChar(pStr, nLength);
704
529k
                    continue;
705
529k
                }
706
51.7M
                else if (ch == '"')
707
92.5k
                {
708
92.5k
                    bEOS = true;
709
92.5k
                    AdvanceChar(pStr, nLength);
710
92.5k
                    SkipSpace(pStr, nLength);
711
712
92.5k
                    if (!m_aeObjectState.empty() &&
713
92.5k
                        m_aeObjectState.back() == IN_KEY)
714
60.3k
                    {
715
60.3k
                        StartObjectMember(m_osToken);
716
60.3k
                    }
717
32.1k
                    else
718
32.1k
                    {
719
32.1k
                        String(m_osToken);
720
32.1k
                    }
721
92.5k
                    m_osToken.clear();
722
92.5k
                    m_aState.pop_back();
723
724
92.5k
                    break;
725
92.5k
                }
726
727
51.6M
                m_osToken += ch;
728
51.6M
                AdvanceChar(pStr, nLength);
729
51.6M
            }
730
731
95.8k
            if (nLength == 0)
732
3.36k
            {
733
3.36k
                if (bFinished)
734
1.09k
                {
735
1.09k
                    if (!bEOS)
736
1.00k
                    {
737
1.00k
                        return EmitException("Unterminated string");
738
1.00k
                    }
739
92
                    return CheckStackEmpty();
740
1.09k
                }
741
2.27k
                return true;
742
3.36k
            }
743
95.8k
        }
744
9.59M
        else if (eCurState == ARRAY)
745
1.50M
        {
746
1.50M
            SkipSpace(pStr, nLength);
747
1.50M
            if (nLength == 0)
748
259
            {
749
259
                if (bFinished)
750
110
                {
751
110
                    return EmitException("Unterminated array");
752
110
                }
753
149
                return true;
754
259
            }
755
756
1.50M
            char ch = *pStr;
757
1.50M
            if (ch == ',')
758
295k
            {
759
295k
                if (m_abArrayState.back() != ArrayState::AFTER_VALUE)
760
52
                {
761
52
                    return EmitUnexpectedChar(ch, "','");
762
52
                }
763
295k
                m_abArrayState.back() = ArrayState::AFTER_COMMA;
764
295k
                AdvanceChar(pStr, nLength);
765
295k
            }
766
1.21M
            else if (ch == ']')
767
429k
            {
768
429k
                if (m_abArrayState.back() == ArrayState::AFTER_COMMA)
769
34
                {
770
34
                    return EmitException("Missing value");
771
34
                }
772
773
429k
                EndArray();
774
429k
                AdvanceChar(pStr, nLength);
775
429k
                m_abArrayState.pop_back();
776
429k
                m_aState.pop_back();
777
429k
            }
778
783k
            else if (IsValidNewToken(ch))
779
782k
            {
780
782k
                if (m_abArrayState.back() == ArrayState::AFTER_VALUE)
781
385
                {
782
385
                    return EmitException(
783
385
                        "Unexpected state: ',' or ']' expected");
784
385
                }
785
782k
                m_abArrayState.back() = ArrayState::AFTER_VALUE;
786
787
782k
                StartArrayMember();
788
782k
                if (!StartNewToken(pStr, nLength))
789
9
                {
790
9
                    return false;
791
9
                }
792
782k
            }
793
745
            else
794
745
            {
795
745
                return EmitUnexpectedChar(ch);
796
745
            }
797
1.50M
        }
798
8.08M
        else if (eCurState == OBJECT)
799
7.68M
        {
800
7.68M
            SkipSpace(pStr, nLength);
801
7.68M
            if (nLength == 0)
802
432
            {
803
432
                if (bFinished)
804
157
                {
805
157
                    return EmitException("Unterminated object");
806
157
                }
807
275
                return true;
808
432
            }
809
810
7.68M
            char ch = *pStr;
811
7.68M
            if (ch == ',')
812
1.25M
            {
813
1.25M
                if (m_aeObjectState.back() != IN_VALUE)
814
243
                {
815
243
                    return EmitUnexpectedChar(ch, "','");
816
243
                }
817
818
1.25M
                m_aeObjectState.back() = WAITING_KEY;
819
1.25M
                AdvanceChar(pStr, nLength);
820
1.25M
            }
821
6.42M
            else if (ch == ':')
822
1.94M
            {
823
1.94M
                if (m_aeObjectState.back() != IN_KEY)
824
233
                {
825
233
                    return EmitUnexpectedChar(ch, "':'");
826
233
                }
827
1.94M
                m_aeObjectState.back() = KEY_FINISHED;
828
1.94M
                AdvanceChar(pStr, nLength);
829
1.94M
            }
830
4.48M
            else if (ch == '}')
831
597k
            {
832
597k
                if (m_aeObjectState.back() == WAITING_KEY ||
833
584k
                    m_aeObjectState.back() == IN_VALUE)
834
597k
                {
835
                    // nothing
836
597k
                }
837
111
                else
838
111
                {
839
111
                    return EmitException("Missing value");
840
111
                }
841
842
597k
                EndObject();
843
597k
                AdvanceChar(pStr, nLength);
844
597k
                m_aeObjectState.pop_back();
845
597k
                m_aState.pop_back();
846
597k
            }
847
3.88M
            else if (IsValidNewToken(ch))
848
3.88M
            {
849
3.88M
                if (m_aeObjectState.back() == WAITING_KEY)
850
1.94M
                {
851
1.94M
                    if (ch != '"')
852
743
                    {
853
743
                        return EmitUnexpectedChar(ch, "'\"'");
854
743
                    }
855
1.94M
                    m_aeObjectState.back() = IN_KEY;
856
1.94M
                }
857
1.94M
                else if (m_aeObjectState.back() == KEY_FINISHED)
858
1.94M
                {
859
1.94M
                    m_aeObjectState.back() = IN_VALUE;
860
1.94M
                }
861
1.55k
                else
862
1.55k
                {
863
1.55k
                    return EmitException("Unexpected state");
864
1.55k
                }
865
3.88M
                if (!StartNewToken(pStr, nLength))
866
12
                {
867
12
                    return false;
868
12
                }
869
3.88M
            }
870
2.83k
            else
871
2.83k
            {
872
2.83k
                return EmitUnexpectedChar(ch);
873
2.83k
            }
874
7.68M
        }
875
400k
        else /* if( eCurState == STATE_TRUE || eCurState == STATE_FALSE ||
876
                    eCurState == STATE_NULL ) */
877
400k
        {
878
2.00M
            while (nLength)
879
2.00M
            {
880
2.00M
                char ch = *pStr;
881
2.00M
                if (eCurState == STATE_NULL && (ch == 'a' || ch == 'A') &&
882
107
                    m_osToken.size() == 1)
883
52
                {
884
52
                    m_aState.back() = NUMBER;
885
52
                    break;
886
52
                }
887
2.00M
                if (isalpha(static_cast<unsigned char>(ch)))
888
1.60M
                {
889
1.60M
                    m_osToken += ch;
890
1.60M
                    if (eCurState == STATE_TRUE &&
891
268k
                        (m_osToken.size() > strlen("true") ||
892
268k
                         memcmp(m_osToken.c_str(), "true", m_osToken.size()) !=
893
268k
                             0))
894
139
                    {
895
139
                        return EmitUnexpectedChar(*pStr);
896
139
                    }
897
1.60M
                    else if (eCurState == STATE_FALSE &&
898
2.61k
                             (m_osToken.size() > strlen("false") ||
899
2.61k
                              memcmp(m_osToken.c_str(), "false",
900
2.61k
                                     m_osToken.size()) != 0))
901
132
                    {
902
132
                        return EmitUnexpectedChar(*pStr);
903
132
                    }
904
1.60M
                    else if (eCurState == STATE_NULL &&
905
1.33M
                             (m_osToken.size() > strlen("null") ||
906
1.33M
                              memcmp(m_osToken.c_str(), "null",
907
1.33M
                                     m_osToken.size()) != 0))
908
138
                    {
909
138
                        return EmitUnexpectedChar(*pStr);
910
138
                    }
911
1.60M
                }
912
400k
                else if (isspace(static_cast<unsigned char>(ch)) || ch == ',' ||
913
19.4k
                         ch == '}' || ch == ']')
914
400k
                {
915
400k
                    SkipSpace(pStr, nLength);
916
400k
                    break;
917
400k
                }
918
136
                else
919
136
                {
920
136
                    return EmitUnexpectedChar(ch);
921
136
                }
922
1.60M
                AdvanceChar(pStr, nLength);
923
1.60M
            }
924
400k
            if (m_aState.back() == NUMBER)
925
52
            {
926
52
                continue;
927
52
            }
928
400k
            if (nLength == 0)
929
118
            {
930
118
                if (bFinished)
931
53
                {
932
53
                    if (!CheckAndEmitTrueFalseOrNull(0))
933
25
                        return false;
934
28
                    return CheckStackEmpty();
935
53
                }
936
65
                return true;
937
118
            }
938
939
400k
            if (!CheckAndEmitTrueFalseOrNull(*pStr))
940
218
                return false;
941
400k
        }
942
12.7M
    }
943
29.2k
}
944
945
/************************************************************************/
946
/*                       GetSerializedString()                          */
947
/************************************************************************/
948
949
std::string CPLJSonStreamingParser::GetSerializedString(std::string_view s)
950
0
{
951
0
    std::string osStr("\"");
952
0
    for (char ch : s)
953
0
    {
954
0
        if (ch == '\b')
955
0
            osStr += "\\b";
956
0
        else if (ch == '\f')
957
0
            osStr += "\\f";
958
0
        else if (ch == '\n')
959
0
            osStr += "\\n";
960
0
        else if (ch == '\r')
961
0
            osStr += "\\r";
962
0
        else if (ch == '\t')
963
0
            osStr += "\\t";
964
0
        else if (ch == '"')
965
0
            osStr += "\\\"";
966
0
        else if (ch == '\\')
967
0
            osStr += "\\\\";
968
0
        else if (static_cast<unsigned char>(ch) < ' ')
969
0
            osStr += CPLSPrintf("\\u%04X", ch);
970
0
        else
971
0
            osStr += ch;
972
0
    }
973
0
    osStr += "\"";
974
0
    return osStr;
975
0
}
976
977
/*! @endcond */