Coverage Report

Created: 2025-11-16 06:25

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/gdal/port/cpl_json_streaming_parser.cpp
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Project:  CPL - Common Portability Library
4
 * Purpose:  JSon streaming parser
5
 * Author:   Even Rouault, even.rouault at spatialys.com
6
 *
7
 ******************************************************************************
8
 * Copyright (c) 2017, Even Rouault <even.rouault at spatialys.com>
9
 *
10
 * SPDX-License-Identifier: MIT
11
 ****************************************************************************/
12
13
/*! @cond Doxygen_Suppress */
14
15
#include <assert.h>
16
#include <ctype.h>   // isdigit...
17
#include <stdio.h>   // snprintf
18
#include <string.h>  // strlen
19
#include <vector>
20
#include <string>
21
22
#include "cpl_conv.h"
23
#include "cpl_string.h"
24
#include "cpl_json_streaming_parser.h"
25
26
/************************************************************************/
27
/*                       CPLJSonStreamingParser()                       */
28
/************************************************************************/
29
30
CPLJSonStreamingParser::CPLJSonStreamingParser()
31
0
{
32
0
    m_aState.push_back(INIT);
33
0
}
34
35
/************************************************************************/
36
/*                      ~CPLJSonStreamingParser()                       */
37
/************************************************************************/
38
39
CPLJSonStreamingParser::~CPLJSonStreamingParser()
40
0
{
41
0
}
42
43
/************************************************************************/
44
/*                           SetMaxDepth()                              */
45
/************************************************************************/
46
47
void CPLJSonStreamingParser::SetMaxDepth(size_t nVal)
48
0
{
49
0
    m_nMaxDepth = nVal;
50
0
}
51
52
/************************************************************************/
53
/*                         SetMaxStringSize()                           */
54
/************************************************************************/
55
56
void CPLJSonStreamingParser::SetMaxStringSize(size_t nVal)
57
0
{
58
0
    m_nMaxStringSize = nVal;
59
0
}
60
61
/************************************************************************/
62
/*                                Reset()                               */
63
/************************************************************************/
64
65
void CPLJSonStreamingParser::Reset()
66
0
{
67
0
    m_bExceptionOccurred = false;
68
0
    m_bElementFound = false;
69
0
    m_nLastChar = 0;
70
0
    m_nLineCounter = 1;
71
0
    m_nCharCounter = 1;
72
0
    m_aState.clear();
73
0
    m_aState.push_back(INIT);
74
0
    m_osToken.clear();
75
0
    m_abArrayState.clear();
76
0
    m_aeObjectState.clear();
77
0
    m_bInStringEscape = false;
78
0
    m_bInUnicode = false;
79
0
    m_osUnicodeHex.clear();
80
0
}
81
82
/************************************************************************/
83
/*                              AdvanceChar()                           */
84
/************************************************************************/
85
86
void CPLJSonStreamingParser::AdvanceChar(const char *&pStr, size_t &nLength)
87
0
{
88
0
    if (*pStr == 13 && m_nLastChar != 10)
89
0
    {
90
0
        m_nLineCounter++;
91
0
        m_nCharCounter = 0;
92
0
    }
93
0
    else if (*pStr == 10 && m_nLastChar != 13)
94
0
    {
95
0
        m_nLineCounter++;
96
0
        m_nCharCounter = 0;
97
0
    }
98
0
    m_nLastChar = *pStr;
99
100
0
    pStr++;
101
0
    nLength--;
102
0
    m_nCharCounter++;
103
0
}
104
105
/************************************************************************/
106
/*                               SkipSpace()                            */
107
/************************************************************************/
108
109
void CPLJSonStreamingParser::SkipSpace(const char *&pStr, size_t &nLength)
110
0
{
111
0
    while (nLength > 0 && isspace(static_cast<unsigned char>(*pStr)))
112
0
    {
113
0
        AdvanceChar(pStr, nLength);
114
0
    }
115
0
}
116
117
/************************************************************************/
118
/*                             EmitException()                          */
119
/************************************************************************/
120
121
bool CPLJSonStreamingParser::EmitException(const char *pszMessage)
122
0
{
123
0
    m_bExceptionOccurred = true;
124
0
    CPLString osMsg;
125
0
    osMsg.Printf("At line %d, character %d: %s", m_nLineCounter, m_nCharCounter,
126
0
                 pszMessage);
127
0
    Exception(osMsg.c_str());
128
0
    return false;
129
0
}
130
131
/************************************************************************/
132
/*                             StopParsing()                            */
133
/************************************************************************/
134
135
void CPLJSonStreamingParser::StopParsing()
136
0
{
137
0
    m_bStopParsing = true;
138
0
}
139
140
/************************************************************************/
141
/*                          EmitUnexpectedChar()                        */
142
/************************************************************************/
143
144
bool CPLJSonStreamingParser::EmitUnexpectedChar(char ch,
145
                                                const char *pszExpecting)
146
0
{
147
0
    char szMessage[64];
148
0
    if (pszExpecting)
149
0
    {
150
0
        snprintf(szMessage, sizeof(szMessage),
151
0
                 "Unexpected character (%c). Expecting %s", ch, pszExpecting);
152
0
    }
153
0
    else
154
0
    {
155
0
        snprintf(szMessage, sizeof(szMessage), "Unexpected character (%c)", ch);
156
0
    }
157
0
    return EmitException(szMessage);
158
0
}
159
160
/************************************************************************/
161
/*                            IsValidNewToken()                         */
162
/************************************************************************/
163
164
static bool IsValidNewToken(char ch)
165
0
{
166
0
    return ch == '[' || ch == '{' || ch == '"' || ch == '-' || ch == '.' ||
167
0
           isdigit(static_cast<unsigned char>(ch)) || ch == 't' || ch == 'f' ||
168
0
           ch == 'n' || ch == 'i' || ch == 'I' || ch == 'N';
169
0
}
170
171
/************************************************************************/
172
/*                             StartNewToken()                          */
173
/************************************************************************/
174
175
bool CPLJSonStreamingParser::StartNewToken(const char *&pStr, size_t &nLength)
176
0
{
177
0
    char ch = *pStr;
178
0
    if (ch == '{')
179
0
    {
180
0
        if (m_aState.size() == m_nMaxDepth)
181
0
        {
182
0
            return EmitException("Too many nested objects and/or arrays");
183
0
        }
184
0
        StartObject();
185
0
        m_aeObjectState.push_back(WAITING_KEY);
186
0
        m_aState.push_back(OBJECT);
187
0
        AdvanceChar(pStr, nLength);
188
0
    }
189
0
    else if (ch == '"')
190
0
    {
191
0
        m_aState.push_back(STRING);
192
0
        AdvanceChar(pStr, nLength);
193
0
    }
194
0
    else if (ch == '[')
195
0
    {
196
0
        if (m_aState.size() == m_nMaxDepth)
197
0
        {
198
0
            return EmitException("Too many nested objects and/or arrays");
199
0
        }
200
0
        StartArray();
201
0
        m_abArrayState.push_back(ArrayState::INIT);
202
0
        m_aState.push_back(ARRAY);
203
0
        AdvanceChar(pStr, nLength);
204
0
    }
205
0
    else if (ch == '-' || ch == '.' ||
206
0
             isdigit(static_cast<unsigned char>(ch)) || ch == 'i' ||
207
0
             ch == 'I' || ch == 'N')
208
0
    {
209
0
        m_aState.push_back(NUMBER);
210
0
    }
211
0
    else if (ch == 't')
212
0
    {
213
0
        m_aState.push_back(STATE_TRUE);
214
0
    }
215
0
    else if (ch == 'f')
216
0
    {
217
0
        m_aState.push_back(STATE_FALSE);
218
0
    }
219
0
    else if (ch == 'n')
220
0
    {
221
0
        m_aState.push_back(STATE_NULL); /* might be nan */
222
0
    }
223
0
    else
224
0
    {
225
0
        assert(false);
226
0
    }
227
0
    return true;
228
0
}
229
230
/************************************************************************/
231
/*                       CheckAndEmitTrueFalseOrNull()                  */
232
/************************************************************************/
233
234
bool CPLJSonStreamingParser::CheckAndEmitTrueFalseOrNull(char ch)
235
0
{
236
0
    State eCurState = currentState();
237
238
0
    if (eCurState == STATE_TRUE)
239
0
    {
240
0
        if (m_osToken == "true")
241
0
        {
242
0
            Boolean(true);
243
0
        }
244
0
        else
245
0
        {
246
0
            return EmitUnexpectedChar(ch);
247
0
        }
248
0
    }
249
0
    else if (eCurState == STATE_FALSE)
250
0
    {
251
0
        if (m_osToken == "false")
252
0
        {
253
0
            Boolean(false);
254
0
        }
255
0
        else
256
0
        {
257
0
            return EmitUnexpectedChar(ch);
258
0
        }
259
0
    }
260
0
    else /* if( eCurState == STATE_NULL ) */
261
0
    {
262
0
        if (m_osToken == "null")
263
0
        {
264
0
            Null();
265
0
        }
266
0
        else
267
0
        {
268
0
            return EmitUnexpectedChar(ch);
269
0
        }
270
0
    }
271
0
    m_aState.pop_back();
272
0
    m_osToken.clear();
273
0
    return true;
274
0
}
275
276
/************************************************************************/
277
/*                           CheckStackEmpty()                          */
278
/************************************************************************/
279
280
bool CPLJSonStreamingParser::CheckStackEmpty()
281
0
{
282
0
    if (!m_aeObjectState.empty())
283
0
    {
284
0
        return EmitException("Unterminated object");
285
0
    }
286
0
    else if (!m_abArrayState.empty())
287
0
    {
288
0
        return EmitException("Unterminated array");
289
0
    }
290
0
    return true;
291
0
}
292
293
/************************************************************************/
294
/*                           IsHighSurrogate()                          */
295
/************************************************************************/
296
297
static bool IsHighSurrogate(unsigned uc)
298
0
{
299
0
    return (uc & 0xFC00) == 0xD800;
300
0
}
301
302
/************************************************************************/
303
/*                           IsLowSurrogate()                           */
304
/************************************************************************/
305
306
static bool IsLowSurrogate(unsigned uc)
307
0
{
308
0
    return (uc & 0xFC00) == 0xDC00;
309
0
}
310
311
/************************************************************************/
312
/*                         GetSurrogatePair()                           */
313
/************************************************************************/
314
315
static unsigned GetSurrogatePair(unsigned hi, unsigned lo)
316
0
{
317
0
    return ((hi & 0x3FF) << 10) + (lo & 0x3FF) + 0x10000;
318
0
}
319
320
/************************************************************************/
321
/*                            IsHexDigit()                              */
322
/************************************************************************/
323
324
static bool IsHexDigit(char ch)
325
0
{
326
0
    return (ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') ||
327
0
           (ch >= 'A' && ch <= 'F');
328
0
}
329
330
/************************************************************************/
331
/*                           HexToDecimal()                             */
332
/************************************************************************/
333
334
static unsigned HexToDecimal(char ch)
335
0
{
336
0
    if (ch >= '0' && ch <= '9')
337
0
        return ch - '0';
338
0
    if (ch >= 'a' && ch <= 'f')
339
0
        return 10 + ch - 'a';
340
    // if (ch >= 'A' && ch <= 'F' )
341
0
    return 10 + ch - 'A';
342
0
}
343
344
/************************************************************************/
345
/*                            getUCSChar()                              */
346
/************************************************************************/
347
348
static unsigned getUCSChar(const std::string &unicode4HexChar)
349
0
{
350
0
    return (HexToDecimal(unicode4HexChar[0]) << 12) |
351
0
           (HexToDecimal(unicode4HexChar[1]) << 8) |
352
0
           (HexToDecimal(unicode4HexChar[2]) << 4) |
353
0
           (HexToDecimal(unicode4HexChar[3]));
354
0
}
355
356
/************************************************************************/
357
/*                           DecodeUnicode()                            */
358
/************************************************************************/
359
360
void CPLJSonStreamingParser::DecodeUnicode()
361
0
{
362
0
    constexpr char szReplacementUTF8[] = "\xEF\xBF\xBD";
363
0
    unsigned nUCSChar;
364
0
    if (m_osUnicodeHex.size() == 8)
365
0
    {
366
0
        unsigned nUCSHigh = getUCSChar(m_osUnicodeHex);
367
0
        assert(IsHighSurrogate(nUCSHigh));
368
0
        unsigned nUCSLow = getUCSChar(m_osUnicodeHex.substr(4));
369
0
        if (IsLowSurrogate(nUCSLow))
370
0
        {
371
0
            nUCSChar = GetSurrogatePair(nUCSHigh, nUCSLow);
372
0
        }
373
0
        else
374
0
        {
375
            /* Invalid code point. Insert the replacement char */
376
0
            nUCSChar = 0xFFFFFFFFU;
377
0
        }
378
0
    }
379
0
    else
380
0
    {
381
0
        assert(m_osUnicodeHex.size() == 4);
382
0
        nUCSChar = getUCSChar(m_osUnicodeHex);
383
0
    }
384
385
0
    if (nUCSChar < 0x80)
386
0
    {
387
0
        m_osToken += static_cast<char>(nUCSChar);
388
0
    }
389
0
    else if (nUCSChar < 0x800)
390
0
    {
391
0
        m_osToken += static_cast<char>(0xC0 | (nUCSChar >> 6));
392
0
        m_osToken += static_cast<char>(0x80 | (nUCSChar & 0x3F));
393
0
    }
394
0
    else if (IsLowSurrogate(nUCSChar) || IsHighSurrogate(nUCSChar))
395
0
    {
396
        /* Invalid code point. Insert the replacement char */
397
0
        m_osToken += szReplacementUTF8;
398
0
    }
399
0
    else if (nUCSChar < 0x10000)
400
0
    {
401
0
        m_osToken += static_cast<char>(0xE0 | (nUCSChar >> 12));
402
0
        m_osToken += static_cast<char>(0x80 | ((nUCSChar >> 6) & 0x3F));
403
0
        m_osToken += static_cast<char>(0x80 | (nUCSChar & 0x3F));
404
0
    }
405
0
    else if (nUCSChar < 0x110000)
406
0
    {
407
0
        m_osToken += static_cast<char>(0xF0 | ((nUCSChar >> 18) & 0x07));
408
0
        m_osToken += static_cast<char>(0x80 | ((nUCSChar >> 12) & 0x3F));
409
0
        m_osToken += static_cast<char>(0x80 | ((nUCSChar >> 6) & 0x3F));
410
0
        m_osToken += static_cast<char>(0x80 | (nUCSChar & 0x3F));
411
0
    }
412
0
    else
413
0
    {
414
        /* Invalid code point. Insert the replacement char */
415
0
        m_osToken += szReplacementUTF8;
416
0
    }
417
418
0
    m_bInUnicode = false;
419
0
    m_osUnicodeHex.clear();
420
0
}
421
422
/************************************************************************/
423
/*                              Parse()                                 */
424
/************************************************************************/
425
426
bool CPLJSonStreamingParser::Parse(std::string_view sStr, bool bFinished)
427
0
{
428
0
    const char *pStr = sStr.data();
429
0
    size_t nLength = sStr.size();
430
0
    while (true)
431
0
    {
432
0
        if (m_bExceptionOccurred || m_bStopParsing)
433
0
            return false;
434
0
        State eCurState = currentState();
435
0
        if (eCurState == INIT)
436
0
        {
437
0
            SkipSpace(pStr, nLength);
438
0
            if (nLength == 0)
439
0
                return true;
440
0
            if (m_bElementFound || !IsValidNewToken(*pStr))
441
0
            {
442
0
                return EmitUnexpectedChar(*pStr);
443
0
            }
444
0
            if (!StartNewToken(pStr, nLength))
445
0
            {
446
0
                return false;
447
0
            }
448
0
            m_bElementFound = true;
449
0
        }
450
0
        else if (eCurState == NUMBER)
451
0
        {
452
0
            if (m_osToken.empty())
453
0
            {
454
                // Optimization to avoid using temporary buffer
455
0
                auto nPos =
456
0
                    std::string_view(pStr, nLength).find_first_of(" \t\r\n,}]");
457
0
                if (nPos != std::string::npos)
458
0
                {
459
0
                    Number(std::string_view(pStr, nPos));
460
0
                    m_aState.pop_back();
461
0
                    pStr += nPos;
462
0
                    nLength -= nPos;
463
0
                    SkipSpace(pStr, nLength);
464
0
                    continue;
465
0
                }
466
0
            }
467
468
0
            while (nLength)
469
0
            {
470
0
                char ch = *pStr;
471
0
                if (ch == '+' || ch == '-' ||
472
0
                    isdigit(static_cast<unsigned char>(ch)) || ch == '.' ||
473
0
                    ch == 'e' || ch == 'E')
474
0
                {
475
0
                    if (m_osToken.size() == 1024)
476
0
                    {
477
0
                        return EmitException("Too many characters in number");
478
0
                    }
479
0
                    m_osToken += ch;
480
0
                }
481
0
                else if (isspace(static_cast<unsigned char>(ch)) || ch == ',' ||
482
0
                         ch == '}' || ch == ']')
483
0
                {
484
0
                    SkipSpace(pStr, nLength);
485
0
                    break;
486
0
                }
487
0
                else
488
0
                {
489
0
                    CPLString extendedToken(m_osToken + ch);
490
0
                    if ((STARTS_WITH_CI("Infinity", extendedToken) &&
491
0
                         m_osToken.size() + 1 <= strlen("Infinity")) ||
492
0
                        (STARTS_WITH_CI("-Infinity", extendedToken) &&
493
0
                         m_osToken.size() + 1 <= strlen("-Infinity")) ||
494
0
                        (STARTS_WITH_CI("NaN", extendedToken) &&
495
0
                         m_osToken.size() + 1 <= strlen("NaN")))
496
0
                    {
497
0
                        m_osToken += ch;
498
0
                    }
499
0
                    else
500
0
                    {
501
0
                        return EmitUnexpectedChar(ch);
502
0
                    }
503
0
                }
504
0
                AdvanceChar(pStr, nLength);
505
0
            }
506
507
0
            if (nLength != 0 || bFinished)
508
0
            {
509
0
                const char firstCh = m_osToken[0];
510
0
                if (firstCh == 'i' || firstCh == 'I')
511
0
                {
512
0
                    if (!EQUAL(m_osToken.c_str(), "Infinity"))
513
0
                    {
514
0
                        return EmitException("Invalid number");
515
0
                    }
516
0
                }
517
0
                else if (firstCh == '-')
518
0
                {
519
0
                    if (m_osToken[1] == 'i' || m_osToken[1] == 'I')
520
0
                    {
521
0
                        if (!EQUAL(m_osToken.c_str(), "-Infinity"))
522
0
                        {
523
0
                            return EmitException("Invalid number");
524
0
                        }
525
0
                    }
526
0
                }
527
0
                else if (firstCh == 'n' || firstCh == 'N')
528
0
                {
529
0
                    if (m_osToken[1] == 'a' || m_osToken[1] == 'A')
530
0
                    {
531
0
                        if (!EQUAL(m_osToken.c_str(), "NaN"))
532
0
                        {
533
0
                            return EmitException("Invalid number");
534
0
                        }
535
0
                    }
536
0
                }
537
538
0
                Number(m_osToken);
539
0
                m_osToken.clear();
540
0
                m_aState.pop_back();
541
0
            }
542
543
0
            if (nLength == 0)
544
0
            {
545
0
                if (bFinished)
546
0
                {
547
0
                    return CheckStackEmpty();
548
0
                }
549
0
                return true;
550
0
            }
551
0
        }
552
0
        else if (eCurState == STRING)
553
0
        {
554
0
            bool bEOS = false;
555
556
0
            if (m_osToken.empty() && !m_bInStringEscape && !m_bInUnicode)
557
0
            {
558
                // Optimization to avoid using temporary buffer
559
0
                auto nPos =
560
0
                    std::string_view(pStr, nLength).find_first_of("\"\\");
561
0
                if (nPos != std::string::npos && pStr[nPos] == '"')
562
0
                {
563
0
                    if (nPos > m_nMaxStringSize)
564
0
                    {
565
0
                        return EmitException("Too many characters in number");
566
0
                    }
567
0
                    if (!m_aeObjectState.empty() &&
568
0
                        m_aeObjectState.back() == IN_KEY)
569
0
                    {
570
0
                        StartObjectMember(std::string_view(pStr, nPos));
571
0
                    }
572
0
                    else
573
0
                    {
574
0
                        String(std::string_view(pStr, nPos));
575
0
                    }
576
0
                    m_aState.pop_back();
577
0
                    pStr += nPos + 1;
578
0
                    nLength -= nPos + 1;
579
0
                    SkipSpace(pStr, nLength);
580
0
                    if (nLength != 0)
581
0
                        continue;
582
0
                    bEOS = true;
583
0
                }
584
0
            }
585
586
0
            while (nLength)
587
0
            {
588
0
                if (m_osToken.size() == m_nMaxStringSize)
589
0
                {
590
0
                    return EmitException("Too many characters in number");
591
0
                }
592
593
0
                char ch = *pStr;
594
0
                if (m_bInUnicode)
595
0
                {
596
0
                    if (m_osUnicodeHex.size() == 8)
597
0
                    {
598
0
                        DecodeUnicode();
599
0
                    }
600
0
                    else if (m_osUnicodeHex.size() == 4)
601
0
                    {
602
                        /* Start of next surrogate pair ? */
603
0
                        if (m_nLastChar == '\\')
604
0
                        {
605
0
                            if (ch == 'u')
606
0
                            {
607
0
                                AdvanceChar(pStr, nLength);
608
0
                                continue;
609
0
                            }
610
0
                            else
611
0
                            {
612
                                /* will be replacement character */
613
0
                                DecodeUnicode();
614
0
                                m_bInStringEscape = true;
615
0
                            }
616
0
                        }
617
0
                        else if (m_nLastChar == 'u')
618
0
                        {
619
0
                            if (IsHexDigit(ch))
620
0
                            {
621
0
                                m_osUnicodeHex += ch;
622
0
                            }
623
0
                            else
624
0
                            {
625
0
                                char szMessage[64];
626
0
                                snprintf(szMessage, sizeof(szMessage),
627
0
                                         "Illegal character in unicode "
628
0
                                         "sequence (\\%c)",
629
0
                                         ch);
630
0
                                return EmitException(szMessage);
631
0
                            }
632
0
                            AdvanceChar(pStr, nLength);
633
0
                            continue;
634
0
                        }
635
0
                        else if (ch == '\\')
636
0
                        {
637
0
                            AdvanceChar(pStr, nLength);
638
0
                            continue;
639
0
                        }
640
0
                        else
641
0
                        {
642
                            /* will be replacement character */
643
0
                            DecodeUnicode();
644
0
                        }
645
0
                    }
646
0
                    else
647
0
                    {
648
0
                        if (IsHexDigit(ch))
649
0
                        {
650
0
                            m_osUnicodeHex += ch;
651
0
                            if (m_osUnicodeHex.size() == 4 &&
652
0
                                !IsHighSurrogate(getUCSChar(m_osUnicodeHex)))
653
0
                            {
654
0
                                DecodeUnicode();
655
0
                            }
656
0
                        }
657
0
                        else
658
0
                        {
659
0
                            char szMessage[64];
660
0
                            snprintf(szMessage, sizeof(szMessage),
661
0
                                     "Illegal character in unicode "
662
0
                                     "sequence (\\%c)",
663
0
                                     ch);
664
0
                            return EmitException(szMessage);
665
0
                        }
666
0
                        AdvanceChar(pStr, nLength);
667
0
                        continue;
668
0
                    }
669
0
                }
670
671
0
                if (m_bInStringEscape)
672
0
                {
673
0
                    if (ch == '"' || ch == '\\' || ch == '/')
674
0
                        m_osToken += ch;
675
0
                    else if (ch == 'b')
676
0
                        m_osToken += '\b';
677
0
                    else if (ch == 'f')
678
0
                        m_osToken += '\f';
679
0
                    else if (ch == 'n')
680
0
                        m_osToken += '\n';
681
0
                    else if (ch == 'r')
682
0
                        m_osToken += '\r';
683
0
                    else if (ch == 't')
684
0
                        m_osToken += '\t';
685
0
                    else if (ch == 'u')
686
0
                    {
687
0
                        m_bInUnicode = true;
688
0
                    }
689
0
                    else
690
0
                    {
691
0
                        char szMessage[32];
692
0
                        snprintf(szMessage, sizeof(szMessage),
693
0
                                 "Illegal escape sequence (\\%c)", ch);
694
0
                        return EmitException(szMessage);
695
0
                    }
696
0
                    m_bInStringEscape = false;
697
0
                    AdvanceChar(pStr, nLength);
698
0
                    continue;
699
0
                }
700
0
                else if (ch == '\\')
701
0
                {
702
0
                    m_bInStringEscape = true;
703
0
                    AdvanceChar(pStr, nLength);
704
0
                    continue;
705
0
                }
706
0
                else if (ch == '"')
707
0
                {
708
0
                    bEOS = true;
709
0
                    AdvanceChar(pStr, nLength);
710
0
                    SkipSpace(pStr, nLength);
711
712
0
                    if (!m_aeObjectState.empty() &&
713
0
                        m_aeObjectState.back() == IN_KEY)
714
0
                    {
715
0
                        StartObjectMember(m_osToken);
716
0
                    }
717
0
                    else
718
0
                    {
719
0
                        String(m_osToken);
720
0
                    }
721
0
                    m_osToken.clear();
722
0
                    m_aState.pop_back();
723
724
0
                    break;
725
0
                }
726
727
0
                m_osToken += ch;
728
0
                AdvanceChar(pStr, nLength);
729
0
            }
730
731
0
            if (nLength == 0)
732
0
            {
733
0
                if (bFinished)
734
0
                {
735
0
                    if (!bEOS)
736
0
                    {
737
0
                        return EmitException("Unterminated string");
738
0
                    }
739
0
                    return CheckStackEmpty();
740
0
                }
741
0
                return true;
742
0
            }
743
0
        }
744
0
        else if (eCurState == ARRAY)
745
0
        {
746
0
            SkipSpace(pStr, nLength);
747
0
            if (nLength == 0)
748
0
            {
749
0
                if (bFinished)
750
0
                {
751
0
                    return EmitException("Unterminated array");
752
0
                }
753
0
                return true;
754
0
            }
755
756
0
            char ch = *pStr;
757
0
            if (ch == ',')
758
0
            {
759
0
                if (m_abArrayState.back() != ArrayState::AFTER_VALUE)
760
0
                {
761
0
                    return EmitUnexpectedChar(ch, "','");
762
0
                }
763
0
                m_abArrayState.back() = ArrayState::AFTER_COMMA;
764
0
                AdvanceChar(pStr, nLength);
765
0
            }
766
0
            else if (ch == ']')
767
0
            {
768
0
                if (m_abArrayState.back() == ArrayState::AFTER_COMMA)
769
0
                {
770
0
                    return EmitException("Missing value");
771
0
                }
772
773
0
                EndArray();
774
0
                AdvanceChar(pStr, nLength);
775
0
                m_abArrayState.pop_back();
776
0
                m_aState.pop_back();
777
0
            }
778
0
            else if (IsValidNewToken(ch))
779
0
            {
780
0
                if (m_abArrayState.back() == ArrayState::AFTER_VALUE)
781
0
                {
782
0
                    return EmitException(
783
0
                        "Unexpected state: ',' or ']' expected");
784
0
                }
785
0
                m_abArrayState.back() = ArrayState::AFTER_VALUE;
786
787
0
                StartArrayMember();
788
0
                if (!StartNewToken(pStr, nLength))
789
0
                {
790
0
                    return false;
791
0
                }
792
0
            }
793
0
            else
794
0
            {
795
0
                return EmitUnexpectedChar(ch);
796
0
            }
797
0
        }
798
0
        else if (eCurState == OBJECT)
799
0
        {
800
0
            SkipSpace(pStr, nLength);
801
0
            if (nLength == 0)
802
0
            {
803
0
                if (bFinished)
804
0
                {
805
0
                    return EmitException("Unterminated object");
806
0
                }
807
0
                return true;
808
0
            }
809
810
0
            char ch = *pStr;
811
0
            if (ch == ',')
812
0
            {
813
0
                if (m_aeObjectState.back() != IN_VALUE)
814
0
                {
815
0
                    return EmitUnexpectedChar(ch, "','");
816
0
                }
817
818
0
                m_aeObjectState.back() = WAITING_KEY;
819
0
                AdvanceChar(pStr, nLength);
820
0
            }
821
0
            else if (ch == ':')
822
0
            {
823
0
                if (m_aeObjectState.back() != IN_KEY)
824
0
                {
825
0
                    return EmitUnexpectedChar(ch, "':'");
826
0
                }
827
0
                m_aeObjectState.back() = KEY_FINISHED;
828
0
                AdvanceChar(pStr, nLength);
829
0
            }
830
0
            else if (ch == '}')
831
0
            {
832
0
                if (m_aeObjectState.back() == WAITING_KEY ||
833
0
                    m_aeObjectState.back() == IN_VALUE)
834
0
                {
835
                    // nothing
836
0
                }
837
0
                else
838
0
                {
839
0
                    return EmitException("Missing value");
840
0
                }
841
842
0
                EndObject();
843
0
                AdvanceChar(pStr, nLength);
844
0
                m_aeObjectState.pop_back();
845
0
                m_aState.pop_back();
846
0
            }
847
0
            else if (IsValidNewToken(ch))
848
0
            {
849
0
                if (m_aeObjectState.back() == WAITING_KEY)
850
0
                {
851
0
                    if (ch != '"')
852
0
                    {
853
0
                        return EmitUnexpectedChar(ch, "'\"'");
854
0
                    }
855
0
                    m_aeObjectState.back() = IN_KEY;
856
0
                }
857
0
                else if (m_aeObjectState.back() == KEY_FINISHED)
858
0
                {
859
0
                    m_aeObjectState.back() = IN_VALUE;
860
0
                }
861
0
                else
862
0
                {
863
0
                    return EmitException("Unexpected state");
864
0
                }
865
0
                if (!StartNewToken(pStr, nLength))
866
0
                {
867
0
                    return false;
868
0
                }
869
0
            }
870
0
            else
871
0
            {
872
0
                return EmitUnexpectedChar(ch);
873
0
            }
874
0
        }
875
0
        else /* if( eCurState == STATE_TRUE || eCurState == STATE_FALSE ||
876
                    eCurState == STATE_NULL ) */
877
0
        {
878
0
            while (nLength)
879
0
            {
880
0
                char ch = *pStr;
881
0
                if (eCurState == STATE_NULL && (ch == 'a' || ch == 'A') &&
882
0
                    m_osToken.size() == 1)
883
0
                {
884
0
                    m_aState.back() = NUMBER;
885
0
                    break;
886
0
                }
887
0
                if (isalpha(static_cast<unsigned char>(ch)))
888
0
                {
889
0
                    m_osToken += ch;
890
0
                    if (eCurState == STATE_TRUE &&
891
0
                        (m_osToken.size() > strlen("true") ||
892
0
                         memcmp(m_osToken.c_str(), "true", m_osToken.size()) !=
893
0
                             0))
894
0
                    {
895
0
                        return EmitUnexpectedChar(*pStr);
896
0
                    }
897
0
                    else if (eCurState == STATE_FALSE &&
898
0
                             (m_osToken.size() > strlen("false") ||
899
0
                              memcmp(m_osToken.c_str(), "false",
900
0
                                     m_osToken.size()) != 0))
901
0
                    {
902
0
                        return EmitUnexpectedChar(*pStr);
903
0
                    }
904
0
                    else if (eCurState == STATE_NULL &&
905
0
                             (m_osToken.size() > strlen("null") ||
906
0
                              memcmp(m_osToken.c_str(), "null",
907
0
                                     m_osToken.size()) != 0))
908
0
                    {
909
0
                        return EmitUnexpectedChar(*pStr);
910
0
                    }
911
0
                }
912
0
                else if (isspace(static_cast<unsigned char>(ch)) || ch == ',' ||
913
0
                         ch == '}' || ch == ']')
914
0
                {
915
0
                    SkipSpace(pStr, nLength);
916
0
                    break;
917
0
                }
918
0
                else
919
0
                {
920
0
                    return EmitUnexpectedChar(ch);
921
0
                }
922
0
                AdvanceChar(pStr, nLength);
923
0
            }
924
0
            if (m_aState.back() == NUMBER)
925
0
            {
926
0
                continue;
927
0
            }
928
0
            if (nLength == 0)
929
0
            {
930
0
                if (bFinished)
931
0
                {
932
0
                    if (!CheckAndEmitTrueFalseOrNull(0))
933
0
                        return false;
934
0
                    return CheckStackEmpty();
935
0
                }
936
0
                return true;
937
0
            }
938
939
0
            if (!CheckAndEmitTrueFalseOrNull(*pStr))
940
0
                return false;
941
0
        }
942
0
    }
943
0
}
944
945
/************************************************************************/
946
/*                       GetSerializedString()                          */
947
/************************************************************************/
948
949
std::string CPLJSonStreamingParser::GetSerializedString(std::string_view s)
950
0
{
951
0
    std::string osStr("\"");
952
0
    for (char ch : s)
953
0
    {
954
0
        if (ch == '\b')
955
0
            osStr += "\\b";
956
0
        else if (ch == '\f')
957
0
            osStr += "\\f";
958
0
        else if (ch == '\n')
959
0
            osStr += "\\n";
960
0
        else if (ch == '\r')
961
0
            osStr += "\\r";
962
0
        else if (ch == '\t')
963
0
            osStr += "\\t";
964
0
        else if (ch == '"')
965
0
            osStr += "\\\"";
966
0
        else if (ch == '\\')
967
0
            osStr += "\\\\";
968
0
        else if (static_cast<unsigned char>(ch) < ' ')
969
0
            osStr += CPLSPrintf("\\u%04X", ch);
970
0
        else
971
0
            osStr += ch;
972
0
    }
973
0
    osStr += "\"";
974
0
    return osStr;
975
0
}
976
977
/*! @endcond */