Coverage Report

Created: 2026-06-30 08:33

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/gdal/port/cpl_json_streaming_parser.cpp
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Project:  CPL - Common Portability Library
4
 * Purpose:  JSon streaming parser
5
 * Author:   Even Rouault, even.rouault at spatialys.com
6
 *
7
 ******************************************************************************
8
 * Copyright (c) 2017, Even Rouault <even.rouault at spatialys.com>
9
 *
10
 * SPDX-License-Identifier: MIT
11
 ****************************************************************************/
12
13
/*! @cond Doxygen_Suppress */
14
15
#include <assert.h>
16
#include <ctype.h>   // isdigit...
17
#include <stdio.h>   // snprintf
18
#include <string.h>  // strlen
19
#include <vector>
20
#include <string>
21
22
#include "cpl_conv.h"
23
#include "cpl_string.h"
24
#include "cpl_json_streaming_parser.h"
25
26
/************************************************************************/
27
/*                       CPLJSonStreamingParser()                       */
28
/************************************************************************/
29
30
CPLJSonStreamingParser::CPLJSonStreamingParser()
31
33.9k
{
32
33.9k
    m_aState.push_back(INIT);
33
33.9k
}
34
35
/************************************************************************/
36
/*                      ~CPLJSonStreamingParser()                       */
37
/************************************************************************/
38
39
CPLJSonStreamingParser::~CPLJSonStreamingParser()
40
33.9k
{
41
33.9k
}
42
43
/************************************************************************/
44
/*                            SetMaxDepth()                             */
45
/************************************************************************/
46
47
void CPLJSonStreamingParser::SetMaxDepth(size_t nVal)
48
0
{
49
0
    m_nMaxDepth = nVal;
50
0
}
51
52
/************************************************************************/
53
/*                          SetMaxStringSize()                          */
54
/************************************************************************/
55
56
void CPLJSonStreamingParser::SetMaxStringSize(size_t nVal)
57
0
{
58
0
    m_nMaxStringSize = nVal;
59
0
}
60
61
/************************************************************************/
62
/*                               Reset()                                */
63
/************************************************************************/
64
65
void CPLJSonStreamingParser::Reset()
66
0
{
67
0
    m_bExceptionOccurred = false;
68
0
    m_bElementFound = false;
69
0
    m_nLastChar = 0;
70
0
    m_nLineCounter = 1;
71
0
    m_nCharCounter = 1;
72
0
    m_aState.clear();
73
0
    m_aState.push_back(INIT);
74
0
    m_osToken.clear();
75
0
    m_abArrayState.clear();
76
0
    m_aeObjectState.clear();
77
0
    m_bInStringEscape = false;
78
0
    m_bInUnicode = false;
79
0
    m_osUnicodeHex.clear();
80
0
}
81
82
/************************************************************************/
83
/*                            AdvanceChar()                             */
84
/************************************************************************/
85
86
void CPLJSonStreamingParser::AdvanceChar(const char *&pStr, size_t &nLength)
87
106M
{
88
106M
    if (*pStr == 13 && m_nLastChar != 10)
89
434k
    {
90
434k
        m_nLineCounter++;
91
434k
        m_nCharCounter = 0;
92
434k
    }
93
106M
    else if (*pStr == 10 && m_nLastChar != 13)
94
3.56M
    {
95
3.56M
        m_nLineCounter++;
96
3.56M
        m_nCharCounter = 0;
97
3.56M
    }
98
106M
    m_nLastChar = *pStr;
99
100
106M
    pStr++;
101
106M
    nLength--;
102
106M
    m_nCharCounter++;
103
106M
}
104
105
/************************************************************************/
106
/*                             SkipSpace()                              */
107
/************************************************************************/
108
109
void CPLJSonStreamingParser::SkipSpace(const char *&pStr, size_t &nLength)
110
20.5M
{
111
37.8M
    while (nLength > 0 && isspace(static_cast<unsigned char>(*pStr)))
112
17.2M
    {
113
17.2M
        AdvanceChar(pStr, nLength);
114
17.2M
    }
115
20.5M
}
116
117
/************************************************************************/
118
/*                           EmitException()                            */
119
/************************************************************************/
120
121
bool CPLJSonStreamingParser::EmitException(const char *pszMessage)
122
13.4k
{
123
13.4k
    m_bExceptionOccurred = true;
124
13.4k
    CPLString osMsg;
125
13.4k
    osMsg.Printf("At line %d, character %d: %s", m_nLineCounter, m_nCharCounter,
126
13.4k
                 pszMessage);
127
13.4k
    Exception(osMsg.c_str());
128
13.4k
    return false;
129
13.4k
}
130
131
/************************************************************************/
132
/*                            StopParsing()                             */
133
/************************************************************************/
134
135
void CPLJSonStreamingParser::StopParsing()
136
18.9k
{
137
18.9k
    m_bStopParsing = true;
138
18.9k
}
139
140
/************************************************************************/
141
/*                         EmitUnexpectedChar()                         */
142
/************************************************************************/
143
144
bool CPLJSonStreamingParser::EmitUnexpectedChar(char ch,
145
                                                const char *pszExpecting)
146
8.01k
{
147
8.01k
    char szMessage[64];
148
8.01k
    if (pszExpecting)
149
1.28k
    {
150
1.28k
        snprintf(szMessage, sizeof(szMessage),
151
1.28k
                 "Unexpected character (%c). Expecting %s", ch, pszExpecting);
152
1.28k
    }
153
6.73k
    else
154
6.73k
    {
155
6.73k
        snprintf(szMessage, sizeof(szMessage), "Unexpected character (%c)", ch);
156
6.73k
    }
157
8.01k
    return EmitException(szMessage);
158
8.01k
}
159
160
/************************************************************************/
161
/*                          IsValidNewToken()                           */
162
/************************************************************************/
163
164
static bool IsValidNewToken(char ch)
165
7.53M
{
166
7.53M
    return ch == '[' || ch == '{' || ch == '"' || ch == '-' || ch == '.' ||
167
952k
           isdigit(static_cast<unsigned char>(ch)) || ch == 't' || ch == 'f' ||
168
583k
           ch == 'n' || ch == 'i' || ch == 'I' || ch == 'N';
169
7.53M
}
170
171
/************************************************************************/
172
/*                           StartNewToken()                            */
173
/************************************************************************/
174
175
bool CPLJSonStreamingParser::StartNewToken(const char *&pStr, size_t &nLength)
176
7.53M
{
177
7.53M
    char ch = *pStr;
178
7.53M
    if (ch == '{')
179
1.23M
    {
180
1.23M
        if (m_aState.size() == m_nMaxDepth)
181
11
        {
182
11
            return EmitException("Too many nested objects and/or arrays");
183
11
        }
184
1.23M
        StartObject();
185
1.23M
        m_aeObjectState.push_back(WAITING_KEY);
186
1.23M
        m_aState.push_back(OBJECT);
187
1.23M
        AdvanceChar(pStr, nLength);
188
1.23M
    }
189
6.29M
    else if (ch == '"')
190
4.63M
    {
191
4.63M
        m_aState.push_back(STRING);
192
4.63M
        AdvanceChar(pStr, nLength);
193
4.63M
    }
194
1.66M
    else if (ch == '[')
195
629k
    {
196
629k
        if (m_aState.size() == m_nMaxDepth)
197
13
        {
198
13
            return EmitException("Too many nested objects and/or arrays");
199
13
        }
200
629k
        StartArray();
201
629k
        m_abArrayState.push_back(ArrayState::INIT);
202
629k
        m_aState.push_back(ARRAY);
203
629k
        AdvanceChar(pStr, nLength);
204
629k
    }
205
1.03M
    else if (ch == '-' || ch == '.' ||
206
945k
             isdigit(static_cast<unsigned char>(ch)) || ch == 'i' ||
207
669k
             ch == 'I' || ch == 'N')
208
363k
    {
209
363k
        m_aState.push_back(NUMBER);
210
363k
    }
211
669k
    else if (ch == 't')
212
90.8k
    {
213
90.8k
        m_aState.push_back(STATE_TRUE);
214
90.8k
    }
215
578k
    else if (ch == 'f')
216
890
    {
217
890
        m_aState.push_back(STATE_FALSE);
218
890
    }
219
577k
    else if (ch == 'n')
220
577k
    {
221
577k
        m_aState.push_back(STATE_NULL); /* might be nan */
222
577k
    }
223
0
    else
224
0
    {
225
0
        assert(false);
226
0
    }
227
7.53M
    return true;
228
7.53M
}
229
230
/************************************************************************/
231
/*                    CheckAndEmitTrueFalseOrNull()                     */
232
/************************************************************************/
233
234
bool CPLJSonStreamingParser::CheckAndEmitTrueFalseOrNull(char ch)
235
668k
{
236
668k
    State eCurState = currentState();
237
238
668k
    if (eCurState == STATE_TRUE)
239
90.5k
    {
240
90.5k
        if (m_osToken == "true")
241
90.4k
        {
242
90.4k
            Boolean(true);
243
90.4k
        }
244
113
        else
245
113
        {
246
113
            return EmitUnexpectedChar(ch);
247
113
        }
248
90.5k
    }
249
577k
    else if (eCurState == STATE_FALSE)
250
714
    {
251
714
        if (m_osToken == "false")
252
669
        {
253
669
            Boolean(false);
254
669
        }
255
45
        else
256
45
        {
257
45
            return EmitUnexpectedChar(ch);
258
45
        }
259
714
    }
260
577k
    else /* if( eCurState == STATE_NULL ) */
261
577k
    {
262
577k
        if (m_osToken == "null")
263
576k
        {
264
576k
            Null();
265
576k
        }
266
119
        else
267
119
        {
268
119
            return EmitUnexpectedChar(ch);
269
119
        }
270
577k
    }
271
668k
    m_aState.pop_back();
272
668k
    m_osToken.clear();
273
668k
    return true;
274
668k
}
275
276
/************************************************************************/
277
/*                          CheckStackEmpty()                           */
278
/************************************************************************/
279
280
bool CPLJSonStreamingParser::CheckStackEmpty()
281
364
{
282
364
    if (!m_aeObjectState.empty())
283
364
    {
284
364
        return EmitException("Unterminated object");
285
364
    }
286
0
    else if (!m_abArrayState.empty())
287
0
    {
288
0
        return EmitException("Unterminated array");
289
0
    }
290
0
    return true;
291
364
}
292
293
/************************************************************************/
294
/*                          IsHighSurrogate()                           */
295
/************************************************************************/
296
297
static bool IsHighSurrogate(unsigned uc)
298
495k
{
299
495k
    return (uc & 0xFC00) == 0xD800;
300
495k
}
301
302
/************************************************************************/
303
/*                           IsLowSurrogate()                           */
304
/************************************************************************/
305
306
static bool IsLowSurrogate(unsigned uc)
307
153k
{
308
153k
    return (uc & 0xFC00) == 0xDC00;
309
153k
}
310
311
/************************************************************************/
312
/*                          GetSurrogatePair()                          */
313
/************************************************************************/
314
315
static unsigned GetSurrogatePair(unsigned hi, unsigned lo)
316
1.24k
{
317
1.24k
    return ((hi & 0x3FF) << 10) + (lo & 0x3FF) + 0x10000;
318
1.24k
}
319
320
/************************************************************************/
321
/*                             IsHexDigit()                             */
322
/************************************************************************/
323
324
static bool IsHexDigit(char ch)
325
1.59M
{
326
1.59M
    return (ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') ||
327
274k
           (ch >= 'A' && ch <= 'F');
328
1.59M
}
329
330
/************************************************************************/
331
/*                            HexToDecimal()                            */
332
/************************************************************************/
333
334
static unsigned HexToDecimal(char ch)
335
3.06M
{
336
3.06M
    if (ch >= '0' && ch <= '9')
337
2.17M
        return ch - '0';
338
891k
    if (ch >= 'a' && ch <= 'f')
339
376k
        return 10 + ch - 'a';
340
    // if (ch >= 'A' && ch <= 'F' )
341
514k
    return 10 + ch - 'A';
342
891k
}
343
344
/************************************************************************/
345
/*                             getUCSChar()                             */
346
/************************************************************************/
347
348
static unsigned getUCSChar(const std::string &unicode4HexChar)
349
767k
{
350
767k
    return (HexToDecimal(unicode4HexChar[0]) << 12) |
351
767k
           (HexToDecimal(unicode4HexChar[1]) << 8) |
352
767k
           (HexToDecimal(unicode4HexChar[2]) << 4) |
353
767k
           (HexToDecimal(unicode4HexChar[3]));
354
767k
}
355
356
/************************************************************************/
357
/*                           DecodeUnicode()                            */
358
/************************************************************************/
359
360
void CPLJSonStreamingParser::DecodeUnicode()
361
369k
{
362
369k
    constexpr char szReplacementUTF8[] = "\xEF\xBF\xBD";
363
369k
    unsigned nUCSChar;
364
369k
    if (m_osUnicodeHex.size() == 8)
365
28.0k
    {
366
28.0k
        unsigned nUCSHigh = getUCSChar(m_osUnicodeHex);
367
28.0k
        assert(IsHighSurrogate(nUCSHigh));
368
28.0k
        unsigned nUCSLow = getUCSChar(m_osUnicodeHex.substr(4));
369
28.0k
        if (IsLowSurrogate(nUCSLow))
370
1.24k
        {
371
1.24k
            nUCSChar = GetSurrogatePair(nUCSHigh, nUCSLow);
372
1.24k
        }
373
26.8k
        else
374
26.8k
        {
375
            /* Invalid code point. Insert the replacement char */
376
26.8k
            nUCSChar = 0xFFFFFFFFU;
377
26.8k
        }
378
28.0k
    }
379
341k
    else
380
341k
    {
381
341k
        assert(m_osUnicodeHex.size() == 4);
382
341k
        nUCSChar = getUCSChar(m_osUnicodeHex);
383
341k
    }
384
385
369k
    if (nUCSChar < 0x80)
386
243k
    {
387
243k
        m_osToken += static_cast<char>(nUCSChar);
388
243k
    }
389
126k
    else if (nUCSChar < 0x800)
390
844
    {
391
844
        m_osToken += static_cast<char>(0xC0 | (nUCSChar >> 6));
392
844
        m_osToken += static_cast<char>(0x80 | (nUCSChar & 0x3F));
393
844
    }
394
125k
    else if (IsLowSurrogate(nUCSChar) || IsHighSurrogate(nUCSChar))
395
62.5k
    {
396
        /* Invalid code point. Insert the replacement char */
397
62.5k
        m_osToken += szReplacementUTF8;
398
62.5k
    }
399
62.6k
    else if (nUCSChar < 0x10000)
400
34.5k
    {
401
34.5k
        m_osToken += static_cast<char>(0xE0 | (nUCSChar >> 12));
402
34.5k
        m_osToken += static_cast<char>(0x80 | ((nUCSChar >> 6) & 0x3F));
403
34.5k
        m_osToken += static_cast<char>(0x80 | (nUCSChar & 0x3F));
404
34.5k
    }
405
28.0k
    else if (nUCSChar < 0x110000)
406
1.24k
    {
407
1.24k
        m_osToken += static_cast<char>(0xF0 | ((nUCSChar >> 18) & 0x07));
408
1.24k
        m_osToken += static_cast<char>(0x80 | ((nUCSChar >> 12) & 0x3F));
409
1.24k
        m_osToken += static_cast<char>(0x80 | ((nUCSChar >> 6) & 0x3F));
410
1.24k
        m_osToken += static_cast<char>(0x80 | (nUCSChar & 0x3F));
411
1.24k
    }
412
26.8k
    else
413
26.8k
    {
414
        /* Invalid code point. Insert the replacement char */
415
26.8k
        m_osToken += szReplacementUTF8;
416
26.8k
    }
417
418
369k
    m_bInUnicode = false;
419
369k
    m_osUnicodeHex.clear();
420
369k
}
421
422
/************************************************************************/
423
/*                               Parse()                                */
424
/************************************************************************/
425
426
bool CPLJSonStreamingParser::Parse(std::string_view sStr, bool bFinished)
427
38.4k
{
428
38.4k
    const char *pStr = sStr.data();
429
38.4k
    size_t nLength = sStr.size();
430
20.5M
    while (true)
431
20.5M
    {
432
20.5M
        if (m_bExceptionOccurred || m_bStopParsing)
433
19.0k
            return false;
434
20.5M
        State eCurState = currentState();
435
20.5M
        if (eCurState == INIT)
436
36.2k
        {
437
36.2k
            SkipSpace(pStr, nLength);
438
36.2k
            if (nLength == 0)
439
1.90k
                return true;
440
34.3k
            if (m_bElementFound || !IsValidNewToken(*pStr))
441
718
            {
442
718
                return EmitUnexpectedChar(*pStr);
443
718
            }
444
33.5k
            if (!StartNewToken(pStr, nLength))
445
0
            {
446
0
                return false;
447
0
            }
448
33.5k
            m_bElementFound = true;
449
33.5k
        }
450
20.5M
        else if (eCurState == NUMBER)
451
363k
        {
452
363k
            if (m_osToken.empty())
453
363k
            {
454
                // Optimization to avoid using temporary buffer
455
363k
                auto nPos =
456
363k
                    std::string_view(pStr, nLength).find_first_of(" \t\r\n,}]");
457
363k
                if (nPos != std::string::npos)
458
363k
                {
459
363k
                    Number(std::string_view(pStr, nPos));
460
363k
                    m_aState.pop_back();
461
363k
                    pStr += nPos;
462
363k
                    nLength -= nPos;
463
363k
                    SkipSpace(pStr, nLength);
464
363k
                    continue;
465
363k
                }
466
363k
            }
467
468
8.31k
            while (nLength)
469
8.09k
            {
470
8.09k
                char ch = *pStr;
471
8.09k
                if (ch == '+' || ch == '-' ||
472
6.98k
                    isdigit(static_cast<unsigned char>(ch)) || ch == '.' ||
473
1.18k
                    ch == 'e' || ch == 'E')
474
6.99k
                {
475
6.99k
                    if (m_osToken.size() == 1024)
476
0
                    {
477
0
                        return EmitException("Too many characters in number");
478
0
                    }
479
6.99k
                    m_osToken += ch;
480
6.99k
                }
481
1.10k
                else if (isspace(static_cast<unsigned char>(ch)) || ch == ',' ||
482
896
                         ch == '}' || ch == ']')
483
221
                {
484
221
                    SkipSpace(pStr, nLength);
485
221
                    break;
486
221
                }
487
879
                else
488
879
                {
489
879
                    CPLString extendedToken(m_osToken + ch);
490
879
                    if ((STARTS_WITH_CI("Infinity", extendedToken) &&
491
152
                         m_osToken.size() + 1 <= strlen("Infinity")) ||
492
727
                        (STARTS_WITH_CI("-Infinity", extendedToken) &&
493
31
                         m_osToken.size() + 1 <= strlen("-Infinity")) ||
494
702
                        (STARTS_WITH_CI("NaN", extendedToken) &&
495
347
                         m_osToken.size() + 1 <= strlen("NaN")))
496
524
                    {
497
524
                        m_osToken += ch;
498
524
                    }
499
355
                    else
500
355
                    {
501
355
                        return EmitUnexpectedChar(ch);
502
355
                    }
503
879
                }
504
7.52k
                AdvanceChar(pStr, nLength);
505
7.52k
            }
506
507
436
            if (nLength != 0 || bFinished)
508
363
            {
509
363
                const char firstCh = m_osToken[0];
510
363
                if (firstCh == 'i' || firstCh == 'I')
511
19
                {
512
19
                    if (!EQUAL(m_osToken.c_str(), "Infinity"))
513
18
                    {
514
18
                        return EmitException("Invalid number");
515
18
                    }
516
19
                }
517
344
                else if (firstCh == '-')
518
56
                {
519
56
                    if (m_osToken[1] == 'i' || m_osToken[1] == 'I')
520
1
                    {
521
1
                        if (!EQUAL(m_osToken.c_str(), "-Infinity"))
522
1
                        {
523
1
                            return EmitException("Invalid number");
524
1
                        }
525
1
                    }
526
56
                }
527
288
                else if (firstCh == 'n' || firstCh == 'N')
528
130
                {
529
130
                    if (m_osToken[1] == 'a' || m_osToken[1] == 'A')
530
130
                    {
531
130
                        if (!EQUAL(m_osToken.c_str(), "NaN"))
532
18
                        {
533
18
                            return EmitException("Invalid number");
534
18
                        }
535
130
                    }
536
130
                }
537
538
326
                Number(m_osToken);
539
326
                m_osToken.clear();
540
326
                m_aState.pop_back();
541
326
            }
542
543
399
            if (nLength == 0)
544
212
            {
545
212
                if (bFinished)
546
139
                {
547
139
                    return CheckStackEmpty();
548
139
                }
549
73
                return true;
550
212
            }
551
399
        }
552
20.1M
        else if (eCurState == STRING)
553
4.63M
        {
554
4.63M
            bool bEOS = false;
555
556
4.63M
            if (m_osToken.empty() && !m_bInStringEscape && !m_bInUnicode)
557
4.63M
            {
558
                // Optimization to avoid using temporary buffer
559
4.63M
                auto nPos =
560
4.63M
                    std::string_view(pStr, nLength).find_first_of("\"\\");
561
4.63M
                if (nPos != std::string::npos && pStr[nPos] == '"')
562
4.42M
                {
563
4.42M
                    if (nPos > m_nMaxStringSize)
564
0
                    {
565
0
                        return EmitException("Too many characters in number");
566
0
                    }
567
4.42M
                    if (!m_aeObjectState.empty() &&
568
4.42M
                        m_aeObjectState.back() == IN_KEY)
569
3.02M
                    {
570
3.02M
                        StartObjectMember(std::string_view(pStr, nPos));
571
3.02M
                    }
572
1.39M
                    else
573
1.39M
                    {
574
1.39M
                        String(std::string_view(pStr, nPos));
575
1.39M
                    }
576
4.42M
                    m_aState.pop_back();
577
4.42M
                    pStr += nPos + 1;
578
4.42M
                    nLength -= nPos + 1;
579
4.42M
                    SkipSpace(pStr, nLength);
580
4.42M
                    if (nLength != 0)
581
4.42M
                        continue;
582
257
                    bEOS = true;
583
257
                }
584
4.63M
            }
585
586
72.9M
            while (nLength)
587
72.9M
            {
588
72.9M
                if (m_osToken.size() == m_nMaxStringSize)
589
0
                {
590
0
                    return EmitException("Too many characters in number");
591
0
                }
592
593
72.9M
                char ch = *pStr;
594
72.9M
                if (m_bInUnicode)
595
1.71M
                {
596
1.71M
                    if (m_osUnicodeHex.size() == 8)
597
28.0k
                    {
598
28.0k
                        DecodeUnicode();
599
28.0k
                    }
600
1.68M
                    else if (m_osUnicodeHex.size() == 4)
601
125k
                    {
602
                        /* Start of next surrogate pair ? */
603
125k
                        if (m_nLastChar == '\\')
604
34.1k
                        {
605
34.1k
                            if (ch == 'u')
606
28.1k
                            {
607
28.1k
                                AdvanceChar(pStr, nLength);
608
28.1k
                                continue;
609
28.1k
                            }
610
5.91k
                            else
611
5.91k
                            {
612
                                /* will be replacement character */
613
5.91k
                                DecodeUnicode();
614
5.91k
                                m_bInStringEscape = true;
615
5.91k
                            }
616
34.1k
                        }
617
91.8k
                        else if (m_nLastChar == 'u')
618
28.1k
                        {
619
28.1k
                            if (IsHexDigit(ch))
620
28.1k
                            {
621
28.1k
                                m_osUnicodeHex += ch;
622
28.1k
                            }
623
58
                            else
624
58
                            {
625
58
                                char szMessage[64];
626
58
                                snprintf(szMessage, sizeof(szMessage),
627
58
                                         "Illegal character in unicode "
628
58
                                         "sequence (\\%c)",
629
58
                                         ch);
630
58
                                return EmitException(szMessage);
631
58
                            }
632
28.1k
                            AdvanceChar(pStr, nLength);
633
28.1k
                            continue;
634
28.1k
                        }
635
63.6k
                        else if (ch == '\\')
636
34.1k
                        {
637
34.1k
                            AdvanceChar(pStr, nLength);
638
34.1k
                            continue;
639
34.1k
                        }
640
29.5k
                        else
641
29.5k
                        {
642
                            /* will be replacement character */
643
29.5k
                            DecodeUnicode();
644
29.5k
                        }
645
125k
                    }
646
1.56M
                    else
647
1.56M
                    {
648
1.56M
                        if (IsHexDigit(ch))
649
1.56M
                        {
650
1.56M
                            m_osUnicodeHex += ch;
651
1.56M
                            if (m_osUnicodeHex.size() == 4 &&
652
369k
                                !IsHighSurrogate(getUCSChar(m_osUnicodeHex)))
653
305k
                            {
654
305k
                                DecodeUnicode();
655
305k
                            }
656
1.56M
                        }
657
245
                        else
658
245
                        {
659
245
                            char szMessage[64];
660
245
                            snprintf(szMessage, sizeof(szMessage),
661
245
                                     "Illegal character in unicode "
662
245
                                     "sequence (\\%c)",
663
245
                                     ch);
664
245
                            return EmitException(szMessage);
665
245
                        }
666
1.56M
                        AdvanceChar(pStr, nLength);
667
1.56M
                        continue;
668
1.56M
                    }
669
1.71M
                }
670
671
71.3M
                if (m_bInStringEscape)
672
976k
                {
673
976k
                    if (ch == '"' || ch == '\\' || ch == '/')
674
285k
                        m_osToken += ch;
675
691k
                    else if (ch == 'b')
676
9.62k
                        m_osToken += '\b';
677
681k
                    else if (ch == 'f')
678
6.08k
                        m_osToken += '\f';
679
675k
                    else if (ch == 'n')
680
252k
                        m_osToken += '\n';
681
423k
                    else if (ch == 'r')
682
27.5k
                        m_osToken += '\r';
683
395k
                    else if (ch == 't')
684
25.4k
                        m_osToken += '\t';
685
369k
                    else if (ch == 'u')
686
369k
                    {
687
369k
                        m_bInUnicode = true;
688
369k
                    }
689
265
                    else
690
265
                    {
691
265
                        char szMessage[32];
692
265
                        snprintf(szMessage, sizeof(szMessage),
693
265
                                 "Illegal escape sequence (\\%c)", ch);
694
265
                        return EmitException(szMessage);
695
265
                    }
696
976k
                    m_bInStringEscape = false;
697
976k
                    AdvanceChar(pStr, nLength);
698
976k
                    continue;
699
976k
                }
700
70.3M
                else if (ch == '\\')
701
970k
                {
702
970k
                    m_bInStringEscape = true;
703
970k
                    AdvanceChar(pStr, nLength);
704
970k
                    continue;
705
970k
                }
706
69.3M
                else if (ch == '"')
707
204k
                {
708
204k
                    bEOS = true;
709
204k
                    AdvanceChar(pStr, nLength);
710
204k
                    SkipSpace(pStr, nLength);
711
712
204k
                    if (!m_aeObjectState.empty() &&
713
204k
                        m_aeObjectState.back() == IN_KEY)
714
122k
                    {
715
122k
                        StartObjectMember(m_osToken);
716
122k
                    }
717
82.6k
                    else
718
82.6k
                    {
719
82.6k
                        String(m_osToken);
720
82.6k
                    }
721
204k
                    m_osToken.clear();
722
204k
                    m_aState.pop_back();
723
724
204k
                    break;
725
204k
                }
726
727
69.1M
                m_osToken += ch;
728
69.1M
                AdvanceChar(pStr, nLength);
729
69.1M
            }
730
731
209k
            if (nLength == 0)
732
5.13k
            {
733
5.13k
                if (bFinished)
734
1.50k
                {
735
1.50k
                    if (!bEOS)
736
1.31k
                    {
737
1.31k
                        return EmitException("Unterminated string");
738
1.31k
                    }
739
190
                    return CheckStackEmpty();
740
1.50k
                }
741
3.62k
                return true;
742
5.13k
            }
743
209k
        }
744
15.5M
        else if (eCurState == ARRAY)
745
2.33M
        {
746
2.33M
            SkipSpace(pStr, nLength);
747
2.33M
            if (nLength == 0)
748
299
            {
749
299
                if (bFinished)
750
108
                {
751
108
                    return EmitException("Unterminated array");
752
108
                }
753
191
                return true;
754
299
            }
755
756
2.33M
            char ch = *pStr;
757
2.33M
            if (ch == ',')
758
579k
            {
759
579k
                if (m_abArrayState.back() != ArrayState::AFTER_VALUE)
760
81
                {
761
81
                    return EmitUnexpectedChar(ch, "','");
762
81
                }
763
579k
                m_abArrayState.back() = ArrayState::AFTER_COMMA;
764
579k
                AdvanceChar(pStr, nLength);
765
579k
            }
766
1.75M
            else if (ch == ']')
767
554k
            {
768
554k
                if (m_abArrayState.back() == ArrayState::AFTER_COMMA)
769
59
                {
770
59
                    return EmitException("Missing value");
771
59
                }
772
773
554k
                EndArray();
774
554k
                AdvanceChar(pStr, nLength);
775
554k
                m_abArrayState.pop_back();
776
554k
                m_aState.pop_back();
777
554k
            }
778
1.20M
            else if (IsValidNewToken(ch))
779
1.20M
            {
780
1.20M
                if (m_abArrayState.back() == ArrayState::AFTER_VALUE)
781
633
                {
782
633
                    return EmitException(
783
633
                        "Unexpected state: ',' or ']' expected");
784
633
                }
785
1.20M
                m_abArrayState.back() = ArrayState::AFTER_VALUE;
786
787
1.20M
                StartArrayMember();
788
1.20M
                if (!StartNewToken(pStr, nLength))
789
10
                {
790
10
                    return false;
791
10
                }
792
1.20M
            }
793
880
            else
794
880
            {
795
880
                return EmitUnexpectedChar(ch);
796
880
            }
797
2.33M
        }
798
13.1M
        else if (eCurState == OBJECT)
799
12.5M
        {
800
12.5M
            SkipSpace(pStr, nLength);
801
12.5M
            if (nLength == 0)
802
524
            {
803
524
                if (bFinished)
804
190
                {
805
190
                    return EmitException("Unterminated object");
806
190
                }
807
334
                return true;
808
524
            }
809
810
12.5M
            char ch = *pStr;
811
12.5M
            if (ch == ',')
812
1.96M
            {
813
1.96M
                if (m_aeObjectState.back() != IN_VALUE)
814
215
                {
815
215
                    return EmitUnexpectedChar(ch, "','");
816
215
                }
817
818
1.96M
                m_aeObjectState.back() = WAITING_KEY;
819
1.96M
                AdvanceChar(pStr, nLength);
820
1.96M
            }
821
10.5M
            else if (ch == ':')
822
3.14M
            {
823
3.14M
                if (m_aeObjectState.back() != IN_KEY)
824
276
                {
825
276
                    return EmitUnexpectedChar(ch, "':'");
826
276
                }
827
3.14M
                m_aeObjectState.back() = KEY_FINISHED;
828
3.14M
                AdvanceChar(pStr, nLength);
829
3.14M
            }
830
7.40M
            else if (ch == '}')
831
1.10M
            {
832
1.10M
                if (m_aeObjectState.back() == WAITING_KEY ||
833
1.05M
                    m_aeObjectState.back() == IN_VALUE)
834
1.10M
                {
835
                    // nothing
836
1.10M
                }
837
90
                else
838
90
                {
839
90
                    return EmitException("Missing value");
840
90
                }
841
842
1.10M
                EndObject();
843
1.10M
                AdvanceChar(pStr, nLength);
844
1.10M
                m_aeObjectState.pop_back();
845
1.10M
                m_aState.pop_back();
846
1.10M
            }
847
6.30M
            else if (IsValidNewToken(ch))
848
6.29M
            {
849
6.29M
                if (m_aeObjectState.back() == WAITING_KEY)
850
3.15M
                {
851
3.15M
                    if (ch != '"')
852
713
                    {
853
713
                        return EmitUnexpectedChar(ch, "'\"'");
854
713
                    }
855
3.15M
                    m_aeObjectState.back() = IN_KEY;
856
3.15M
                }
857
3.14M
                else if (m_aeObjectState.back() == KEY_FINISHED)
858
3.14M
                {
859
3.14M
                    m_aeObjectState.back() = IN_VALUE;
860
3.14M
                }
861
1.80k
                else
862
1.80k
                {
863
1.80k
                    return EmitException("Unexpected state");
864
1.80k
                }
865
6.29M
                if (!StartNewToken(pStr, nLength))
866
14
                {
867
14
                    return false;
868
14
                }
869
6.29M
            }
870
3.88k
            else
871
3.88k
            {
872
3.88k
                return EmitUnexpectedChar(ch);
873
3.88k
            }
874
12.5M
        }
875
669k
        else /* if( eCurState == STATE_TRUE || eCurState == STATE_FALSE ||
876
                    eCurState == STATE_NULL ) */
877
669k
        {
878
3.34M
            while (nLength)
879
3.34M
            {
880
3.34M
                char ch = *pStr;
881
3.34M
                if (eCurState == STATE_NULL && (ch == 'a' || ch == 'A') &&
882
242
                    m_osToken.size() == 1)
883
220
                {
884
220
                    m_aState.back() = NUMBER;
885
220
                    break;
886
220
                }
887
3.34M
                if (isalpha(static_cast<unsigned char>(ch)))
888
2.67M
                {
889
2.67M
                    m_osToken += ch;
890
2.67M
                    if (eCurState == STATE_TRUE &&
891
362k
                        (m_osToken.size() > strlen("true") ||
892
362k
                         memcmp(m_osToken.c_str(), "true", m_osToken.size()) !=
893
362k
                             0))
894
202
                    {
895
202
                        return EmitUnexpectedChar(*pStr);
896
202
                    }
897
2.67M
                    else if (eCurState == STATE_FALSE &&
898
3.81k
                             (m_osToken.size() > strlen("false") ||
899
3.79k
                              memcmp(m_osToken.c_str(), "false",
900
3.79k
                                     m_osToken.size()) != 0))
901
148
                    {
902
148
                        return EmitUnexpectedChar(*pStr);
903
148
                    }
904
2.67M
                    else if (eCurState == STATE_NULL &&
905
2.30M
                             (m_osToken.size() > strlen("null") ||
906
2.30M
                              memcmp(m_osToken.c_str(), "null",
907
2.30M
                                     m_osToken.size()) != 0))
908
151
                    {
909
151
                        return EmitUnexpectedChar(*pStr);
910
151
                    }
911
2.67M
                }
912
668k
                else if (isspace(static_cast<unsigned char>(ch)) || ch == ',' ||
913
509k
                         ch == '}' || ch == ']')
914
668k
                {
915
668k
                    SkipSpace(pStr, nLength);
916
668k
                    break;
917
668k
                }
918
120
                else
919
120
                {
920
120
                    return EmitUnexpectedChar(ch);
921
120
                }
922
2.67M
                AdvanceChar(pStr, nLength);
923
2.67M
            }
924
668k
            if (m_aState.back() == NUMBER)
925
220
            {
926
220
                continue;
927
220
            }
928
668k
            if (nLength == 0)
929
164
            {
930
164
                if (bFinished)
931
87
                {
932
87
                    if (!CheckAndEmitTrueFalseOrNull(0))
933
52
                        return false;
934
35
                    return CheckStackEmpty();
935
87
                }
936
77
                return true;
937
164
            }
938
939
668k
            if (!CheckAndEmitTrueFalseOrNull(*pStr))
940
225
                return false;
941
668k
        }
942
20.5M
    }
943
38.4k
}
944
945
/************************************************************************/
946
/*                        GetSerializedString()                         */
947
/************************************************************************/
948
949
std::string CPLJSonStreamingParser::GetSerializedString(std::string_view s)
950
0
{
951
0
    std::string osStr("\"");
952
0
    for (char ch : s)
953
0
    {
954
0
        if (ch == '\b')
955
0
            osStr += "\\b";
956
0
        else if (ch == '\f')
957
0
            osStr += "\\f";
958
0
        else if (ch == '\n')
959
0
            osStr += "\\n";
960
0
        else if (ch == '\r')
961
0
            osStr += "\\r";
962
0
        else if (ch == '\t')
963
0
            osStr += "\\t";
964
0
        else if (ch == '"')
965
0
            osStr += "\\\"";
966
0
        else if (ch == '\\')
967
0
            osStr += "\\\\";
968
0
        else if (static_cast<unsigned char>(ch) < ' ')
969
0
            osStr += CPLSPrintf("\\u%04X", ch);
970
0
        else
971
0
            osStr += ch;
972
0
    }
973
0
    osStr += "\"";
974
0
    return osStr;
975
0
}
976
977
/*! @endcond */