Coverage Report

Created: 2025-06-09 07:42

/src/gdal/port/cpl_minixml.cpp
Line
Count
Source (jump to first uncovered line)
1
/**********************************************************************
2
 *
3
 * Project:  CPL - Common Portability Library
4
 * Purpose:  Implementation of MiniXML Parser and handling.
5
 * Author:   Frank Warmerdam, warmerdam@pobox.com
6
 *
7
 **********************************************************************
8
 * Copyright (c) 2001, Frank Warmerdam
9
 * Copyright (c) 2007-2013, Even Rouault <even dot rouault at spatialys.com>
10
 *
11
 * SPDX-License-Identifier: MIT
12
 **********************************************************************
13
 *
14
 * Independent Security Audit 2003/04/05 Andrey Kiselev:
15
 *   Completed audit of this module. Any documents may be parsed without
16
 *   buffer overflows and stack corruptions.
17
 *
18
 * Security Audit 2003/03/28 warmerda:
19
 *   Completed security audit.  I believe that this module may be safely used
20
 *   to parse, and serialize arbitrary documents provided by a potentially
21
 *   hostile source.
22
 *
23
 */
24
25
#include "cpl_minixml.h"
26
27
#include <cctype>
28
#include <climits>
29
#include <cstddef>
30
#include <cstdio>
31
#include <cstring>
32
33
#include <algorithm>
34
35
#include "cpl_conv.h"
36
#include "cpl_error.h"
37
#include "cpl_string.h"
38
#include "cpl_vsi.h"
39
40
typedef enum
41
{
42
    TNone,
43
    TString,
44
    TOpen,
45
    TClose,
46
    TEqual,
47
    TToken,
48
    TSlashClose,
49
    TQuestionClose,
50
    TComment,
51
    TLiteral
52
} XMLTokenType;
53
54
typedef struct
55
{
56
    CPLXMLNode *psFirstNode;
57
    CPLXMLNode *psLastChild;
58
} StackContext;
59
60
typedef struct
61
{
62
    const char *pszInput;
63
    int nInputOffset;
64
    int nInputLine;
65
    bool bInElement;
66
    XMLTokenType eTokenType;
67
    char *pszToken;
68
    size_t nTokenMaxSize;
69
    size_t nTokenSize;
70
71
    int nStackMaxSize;
72
    int nStackSize;
73
    StackContext *papsStack;
74
75
    CPLXMLNode *psFirstNode;
76
    CPLXMLNode *psLastNode;
77
} ParseContext;
78
79
static CPLXMLNode *_CPLCreateXMLNode(CPLXMLNode *poParent, CPLXMLNodeType eType,
80
                                     const char *pszText);
81
82
/************************************************************************/
83
/*                              ReadChar()                              */
84
/************************************************************************/
85
86
static CPL_INLINE char ReadChar(ParseContext *psContext)
87
88
91.9M
{
89
91.9M
    const char chReturn = psContext->pszInput[psContext->nInputOffset++];
90
91
91.9M
    if (chReturn == '\0')
92
115k
        psContext->nInputOffset--;
93
91.8M
    else if (chReturn == 10)
94
652k
        psContext->nInputLine++;
95
96
91.9M
    return chReturn;
97
91.9M
}
98
99
/************************************************************************/
100
/*                             UnreadChar()                             */
101
/************************************************************************/
102
103
static CPL_INLINE void UnreadChar(ParseContext *psContext, char chToUnread)
104
105
3.67M
{
106
3.67M
    if (chToUnread == '\0')
107
10.9k
        return;
108
109
3.66M
    CPLAssert(chToUnread == psContext->pszInput[psContext->nInputOffset - 1]);
110
111
3.66M
    psContext->nInputOffset--;
112
113
3.66M
    if (chToUnread == 10)
114
48
        psContext->nInputLine--;
115
3.66M
}
116
117
/************************************************************************/
118
/*                           ReallocToken()                             */
119
/************************************************************************/
120
121
static bool ReallocToken(ParseContext *psContext)
122
356k
{
123
356k
    if (psContext->nTokenMaxSize > INT_MAX / 2)
124
0
    {
125
0
        CPLError(CE_Failure, CPLE_OutOfMemory,
126
0
                 "Out of memory allocating %d*2 bytes",
127
0
                 static_cast<int>(psContext->nTokenMaxSize));
128
0
        VSIFree(psContext->pszToken);
129
0
        psContext->pszToken = nullptr;
130
0
        return false;
131
0
    }
132
133
356k
    psContext->nTokenMaxSize *= 2;
134
356k
    char *pszToken = static_cast<char *>(
135
356k
        VSIRealloc(psContext->pszToken, psContext->nTokenMaxSize));
136
356k
    if (pszToken == nullptr)
137
0
    {
138
0
        CPLError(CE_Failure, CPLE_OutOfMemory,
139
0
                 "Out of memory allocating %d bytes",
140
0
                 static_cast<int>(psContext->nTokenMaxSize));
141
0
        VSIFree(psContext->pszToken);
142
0
        psContext->pszToken = nullptr;
143
0
        return false;
144
0
    }
145
356k
    psContext->pszToken = pszToken;
146
356k
    return true;
147
356k
}
148
149
/************************************************************************/
150
/*                             AddToToken()                             */
151
/************************************************************************/
152
153
static CPL_INLINE bool _AddToToken(ParseContext *psContext, char chNewChar)
154
155
74.5M
{
156
74.5M
    if (psContext->nTokenSize >= psContext->nTokenMaxSize - 2)
157
356k
    {
158
356k
        if (!ReallocToken(psContext))
159
0
            return false;
160
356k
    }
161
162
74.5M
    psContext->pszToken[psContext->nTokenSize++] = chNewChar;
163
74.5M
    psContext->pszToken[psContext->nTokenSize] = '\0';
164
74.5M
    return true;
165
74.5M
}
166
167
// TODO(schwehr): Remove the goto.
168
#define AddToToken(psContext, chNewChar)                                       \
169
74.5M
    if (!_AddToToken(psContext, chNewChar))                                    \
170
49.5M
        goto fail;
171
172
/************************************************************************/
173
/*                             ReadToken()                              */
174
/************************************************************************/
175
176
static XMLTokenType ReadToken(ParseContext *psContext, CPLErr &eLastErrorType)
177
178
10.3M
{
179
10.3M
    psContext->nTokenSize = 0;
180
10.3M
    psContext->pszToken[0] = '\0';
181
182
10.3M
    char chNext = ReadChar(psContext);
183
15.0M
    while (isspace(static_cast<unsigned char>(chNext)))
184
4.63M
        chNext = ReadChar(psContext);
185
186
    /* -------------------------------------------------------------------- */
187
    /*      Handle comments.                                                */
188
    /* -------------------------------------------------------------------- */
189
10.3M
    if (chNext == '<' &&
190
10.3M
        STARTS_WITH_CI(psContext->pszInput + psContext->nInputOffset, "!--"))
191
0
    {
192
0
        psContext->eTokenType = TComment;
193
194
        // Skip "!--" characters.
195
0
        ReadChar(psContext);
196
0
        ReadChar(psContext);
197
0
        ReadChar(psContext);
198
199
0
        while (!STARTS_WITH_CI(psContext->pszInput + psContext->nInputOffset,
200
0
                               "-->") &&
201
0
               (chNext = ReadChar(psContext)) != '\0')
202
0
            AddToToken(psContext, chNext);
203
204
        // Skip "-->" characters.
205
0
        ReadChar(psContext);
206
0
        ReadChar(psContext);
207
0
        ReadChar(psContext);
208
0
    }
209
    /* -------------------------------------------------------------------- */
210
    /*      Handle DOCTYPE.                                                 */
211
    /* -------------------------------------------------------------------- */
212
10.3M
    else if (chNext == '<' &&
213
10.3M
             STARTS_WITH_CI(psContext->pszInput + psContext->nInputOffset,
214
10.3M
                            "!DOCTYPE"))
215
121
    {
216
121
        bool bInQuotes = false;
217
121
        psContext->eTokenType = TLiteral;
218
219
121
        AddToToken(psContext, '<');
220
121
        do
221
28.9k
        {
222
28.9k
            chNext = ReadChar(psContext);
223
28.9k
            if (chNext == '\0')
224
3
            {
225
3
                eLastErrorType = CE_Failure;
226
3
                CPLError(eLastErrorType, CPLE_AppDefined,
227
3
                         "Parse error in DOCTYPE on or before line %d, "
228
3
                         "reached end of file without '>'.",
229
3
                         psContext->nInputLine);
230
231
3
                break;
232
3
            }
233
234
            /* The markup declaration block within a DOCTYPE tag consists of:
235
             * - a left square bracket [
236
             * - a list of declarations
237
             * - a right square bracket ]
238
             * Example:
239
             * <!DOCTYPE RootElement [ ...declarations... ]>
240
             */
241
28.9k
            if (chNext == '[')
242
129
            {
243
129
                AddToToken(psContext, chNext);
244
245
129
                do
246
39.8k
                {
247
39.8k
                    chNext = ReadChar(psContext);
248
39.8k
                    if (chNext == ']')
249
23
                        break;
250
39.7k
                    AddToToken(psContext, chNext);
251
39.7k
                } while (chNext != '\0' &&
252
39.7k
                         !STARTS_WITH_CI(psContext->pszInput +
253
129
                                             psContext->nInputOffset,
254
129
                                         "]>"));
255
256
129
                if (chNext == '\0')
257
36
                {
258
36
                    eLastErrorType = CE_Failure;
259
36
                    CPLError(eLastErrorType, CPLE_AppDefined,
260
36
                             "Parse error in DOCTYPE on or before line %d, "
261
36
                             "reached end of file without ']'.",
262
36
                             psContext->nInputLine);
263
36
                    break;
264
36
                }
265
266
93
                if (chNext != ']')
267
70
                {
268
70
                    chNext = ReadChar(psContext);
269
70
                    AddToToken(psContext, chNext);
270
271
                    // Skip ">" character, will be consumed below.
272
70
                    chNext = ReadChar(psContext);
273
70
                }
274
93
            }
275
276
28.8k
            if (chNext == '\"')
277
89
                bInQuotes = !bInQuotes;
278
279
28.8k
            if (chNext == '>' && !bInQuotes)
280
82
            {
281
82
                AddToToken(psContext, '>');
282
82
                break;
283
82
            }
284
285
28.7k
            AddToToken(psContext, chNext);
286
28.7k
        } while (true);
287
121
    }
288
    /* -------------------------------------------------------------------- */
289
    /*      Handle CDATA.                                                   */
290
    /* -------------------------------------------------------------------- */
291
10.3M
    else if (chNext == '<' &&
292
10.3M
             STARTS_WITH_CI(psContext->pszInput + psContext->nInputOffset,
293
10.3M
                            "![CDATA["))
294
1.75k
    {
295
1.75k
        psContext->eTokenType = TString;
296
297
        // Skip !CDATA[
298
1.75k
        ReadChar(psContext);
299
1.75k
        ReadChar(psContext);
300
1.75k
        ReadChar(psContext);
301
1.75k
        ReadChar(psContext);
302
1.75k
        ReadChar(psContext);
303
1.75k
        ReadChar(psContext);
304
1.75k
        ReadChar(psContext);
305
1.75k
        ReadChar(psContext);
306
307
13.7k
        while (!STARTS_WITH_CI(psContext->pszInput + psContext->nInputOffset,
308
13.7k
                               "]]>") &&
309
13.7k
               (chNext = ReadChar(psContext)) != '\0')
310
12.0k
            AddToToken(psContext, chNext);
311
312
        // Skip "]]>" characters.
313
1.75k
        ReadChar(psContext);
314
1.75k
        ReadChar(psContext);
315
1.75k
        ReadChar(psContext);
316
1.75k
    }
317
    /* -------------------------------------------------------------------- */
318
    /*      Simple single tokens of interest.                               */
319
    /* -------------------------------------------------------------------- */
320
10.3M
    else if (chNext == '<' && !psContext->bInElement)
321
1.44M
    {
322
1.44M
        psContext->eTokenType = TOpen;
323
1.44M
        psContext->bInElement = true;
324
1.44M
    }
325
8.94M
    else if (chNext == '>' && psContext->bInElement)
326
890k
    {
327
890k
        psContext->eTokenType = TClose;
328
890k
        psContext->bInElement = false;
329
890k
    }
330
8.05M
    else if (chNext == '=' && psContext->bInElement)
331
1.87M
    {
332
1.87M
        psContext->eTokenType = TEqual;
333
1.87M
    }
334
6.18M
    else if (chNext == '\0')
335
96.7k
    {
336
96.7k
        psContext->eTokenType = TNone;
337
96.7k
    }
338
    /* -------------------------------------------------------------------- */
339
    /*      Handle the /> token terminator.                                 */
340
    /* -------------------------------------------------------------------- */
341
6.08M
    else if (chNext == '/' && psContext->bInElement &&
342
6.08M
             psContext->pszInput[psContext->nInputOffset] == '>')
343
541k
    {
344
541k
        chNext = ReadChar(psContext);
345
541k
        (void)chNext;
346
541k
        CPLAssert(chNext == '>');
347
348
541k
        psContext->eTokenType = TSlashClose;
349
541k
        psContext->bInElement = false;
350
541k
    }
351
    /* -------------------------------------------------------------------- */
352
    /*      Handle the ?> token terminator.                                 */
353
    /* -------------------------------------------------------------------- */
354
5.54M
    else if (chNext == '?' && psContext->bInElement &&
355
5.54M
             psContext->pszInput[psContext->nInputOffset] == '>')
356
19
    {
357
19
        chNext = ReadChar(psContext);
358
19
        (void)chNext;
359
19
        CPLAssert(chNext == '>');
360
361
19
        psContext->eTokenType = TQuestionClose;
362
19
        psContext->bInElement = false;
363
19
    }
364
    /* -------------------------------------------------------------------- */
365
    /*      Collect a quoted string.                                        */
366
    /* -------------------------------------------------------------------- */
367
5.54M
    else if (psContext->bInElement && chNext == '"')
368
47.5k
    {
369
47.5k
        psContext->eTokenType = TString;
370
371
255k
        while ((chNext = ReadChar(psContext)) != '"' && chNext != '\0')
372
208k
            AddToToken(psContext, chNext);
373
374
47.5k
        if (chNext != '"')
375
445
        {
376
445
            psContext->eTokenType = TNone;
377
445
            eLastErrorType = CE_Failure;
378
445
            CPLError(
379
445
                eLastErrorType, CPLE_AppDefined,
380
445
                "Parse error on line %d, reached EOF before closing quote.",
381
445
                psContext->nInputLine);
382
445
        }
383
384
        // Do we need to unescape it?
385
47.5k
        if (strchr(psContext->pszToken, '&') != nullptr)
386
0
        {
387
0
            int nLength = 0;
388
0
            char *pszUnescaped =
389
0
                CPLUnescapeString(psContext->pszToken, &nLength, CPLES_XML);
390
0
            strcpy(psContext->pszToken, pszUnescaped);
391
0
            CPLFree(pszUnescaped);
392
0
            psContext->nTokenSize = strlen(psContext->pszToken);
393
0
        }
394
47.5k
    }
395
5.49M
    else if (psContext->bInElement && chNext == '\'')
396
1.82M
    {
397
1.82M
        psContext->eTokenType = TString;
398
399
32.4M
        while ((chNext = ReadChar(psContext)) != '\'' && chNext != '\0')
400
30.6M
            AddToToken(psContext, chNext);
401
402
1.82M
        if (chNext != '\'')
403
30
        {
404
30
            psContext->eTokenType = TNone;
405
30
            eLastErrorType = CE_Failure;
406
30
            CPLError(
407
30
                eLastErrorType, CPLE_AppDefined,
408
30
                "Parse error on line %d, reached EOF before closing quote.",
409
30
                psContext->nInputLine);
410
30
        }
411
412
        // Do we need to unescape it?
413
1.82M
        if (strchr(psContext->pszToken, '&') != nullptr)
414
0
        {
415
0
            int nLength = 0;
416
0
            char *pszUnescaped =
417
0
                CPLUnescapeString(psContext->pszToken, &nLength, CPLES_XML);
418
0
            strcpy(psContext->pszToken, pszUnescaped);
419
0
            CPLFree(pszUnescaped);
420
0
            psContext->nTokenSize = strlen(psContext->pszToken);
421
0
        }
422
1.82M
    }
423
    /* -------------------------------------------------------------------- */
424
    /*      Collect an unquoted string, terminated by a open angle          */
425
    /*      bracket.                                                        */
426
    /* -------------------------------------------------------------------- */
427
3.67M
    else if (!psContext->bInElement)
428
352k
    {
429
352k
        psContext->eTokenType = TString;
430
431
352k
        AddToToken(psContext, chNext);
432
19.0M
        while ((chNext = ReadChar(psContext)) != '<' && chNext != '\0')
433
18.7M
            AddToToken(psContext, chNext);
434
352k
        UnreadChar(psContext, chNext);
435
436
        // Do we need to unescape it?
437
352k
        if (strchr(psContext->pszToken, '&') != nullptr)
438
42.1k
        {
439
42.1k
            int nLength = 0;
440
42.1k
            char *pszUnescaped =
441
42.1k
                CPLUnescapeString(psContext->pszToken, &nLength, CPLES_XML);
442
42.1k
            strcpy(psContext->pszToken, pszUnescaped);
443
42.1k
            CPLFree(pszUnescaped);
444
42.1k
            psContext->nTokenSize = strlen(psContext->pszToken);
445
42.1k
        }
446
352k
    }
447
448
    /* -------------------------------------------------------------------- */
449
    /*      Collect a regular token terminated by white space, or           */
450
    /*      special character(s) like an equal sign.                        */
451
    /* -------------------------------------------------------------------- */
452
3.32M
    else
453
3.32M
    {
454
3.32M
        psContext->eTokenType = TToken;
455
456
        // Add the first character to the token regardless of what it is.
457
3.32M
        AddToToken(psContext, chNext);
458
459
3.32M
        for (chNext = ReadChar(psContext);
460
24.5M
             (chNext >= 'A' && chNext <= 'Z') ||
461
24.5M
             (chNext >= 'a' && chNext <= 'z') || chNext == '-' ||
462
24.5M
             chNext == '_' || chNext == '.' || chNext == ':' ||
463
24.5M
             (chNext >= '0' && chNext <= '9');
464
21.2M
             chNext = ReadChar(psContext))
465
21.2M
        {
466
21.2M
            AddToToken(psContext, chNext);
467
21.2M
        }
468
469
3.32M
        UnreadChar(psContext, chNext);
470
3.32M
    }
471
472
10.3M
    return psContext->eTokenType;
473
474
0
fail:
475
0
    psContext->eTokenType = TNone;
476
0
    return TNone;
477
10.3M
}
478
479
/************************************************************************/
480
/*                              PushNode()                              */
481
/************************************************************************/
482
483
static bool PushNode(ParseContext *psContext, CPLXMLNode *psNode,
484
                     CPLErr &eLastErrorType)
485
486
1.00M
{
487
1.00M
    if (psContext->nStackMaxSize <= psContext->nStackSize)
488
97.9k
    {
489
        // Somewhat arbitrary number.
490
97.9k
        if (psContext->nStackMaxSize >= 10000)
491
0
        {
492
0
            eLastErrorType = CE_Failure;
493
0
            CPLError(CE_Failure, CPLE_NotSupported,
494
0
                     "XML element depth beyond 10000. Giving up");
495
0
            VSIFree(psContext->papsStack);
496
0
            psContext->papsStack = nullptr;
497
0
            return false;
498
0
        }
499
97.9k
        psContext->nStackMaxSize += 10;
500
501
97.9k
        StackContext *papsStack = static_cast<StackContext *>(
502
97.9k
            VSIRealloc(psContext->papsStack,
503
97.9k
                       sizeof(StackContext) * psContext->nStackMaxSize));
504
97.9k
        if (papsStack == nullptr)
505
0
        {
506
0
            eLastErrorType = CE_Failure;
507
0
            CPLError(CE_Failure, CPLE_OutOfMemory,
508
0
                     "Out of memory allocating %d bytes",
509
0
                     static_cast<int>(sizeof(StackContext)) *
510
0
                         psContext->nStackMaxSize);
511
0
            VSIFree(psContext->papsStack);
512
0
            psContext->papsStack = nullptr;
513
0
            return false;
514
0
        }
515
97.9k
        psContext->papsStack = papsStack;
516
97.9k
    }
517
#ifdef DEBUG
518
    // To make Coverity happy, but cannot happen.
519
    if (psContext->papsStack == nullptr)
520
        return false;
521
#endif
522
523
1.00M
    psContext->papsStack[psContext->nStackSize].psFirstNode = psNode;
524
1.00M
    psContext->papsStack[psContext->nStackSize].psLastChild = nullptr;
525
1.00M
    psContext->nStackSize++;
526
527
1.00M
    return true;
528
1.00M
}
529
530
/************************************************************************/
531
/*                             AttachNode()                             */
532
/*                                                                      */
533
/*      Attach the passed node as a child of the current node.          */
534
/*      Special handling exists for adding siblings to psFirst if       */
535
/*      there is nothing on the stack.                                  */
536
/************************************************************************/
537
538
static void AttachNode(ParseContext *psContext, CPLXMLNode *psNode)
539
540
3.23M
{
541
3.23M
    if (psContext->psFirstNode == nullptr)
542
98.0k
    {
543
98.0k
        psContext->psFirstNode = psNode;
544
98.0k
        psContext->psLastNode = psNode;
545
98.0k
    }
546
3.14M
    else if (psContext->nStackSize == 0)
547
17.6k
    {
548
17.6k
        psContext->psLastNode->psNext = psNode;
549
17.6k
        psContext->psLastNode = psNode;
550
17.6k
    }
551
3.12M
    else
552
3.12M
    {
553
3.12M
        if (psContext->papsStack[psContext->nStackSize - 1]
554
3.12M
                .psFirstNode->psChild == nullptr)
555
999k
        {
556
999k
            psContext->papsStack[psContext->nStackSize - 1]
557
999k
                .psFirstNode->psChild = psNode;
558
999k
        }
559
2.12M
        else
560
2.12M
        {
561
2.12M
            psContext->papsStack[psContext->nStackSize - 1]
562
2.12M
                .psLastChild->psNext = psNode;
563
2.12M
        }
564
3.12M
        psContext->papsStack[psContext->nStackSize - 1].psLastChild = psNode;
565
3.12M
    }
566
3.23M
}
567
568
/************************************************************************/
569
/*                         CPLParseXMLString()                          */
570
/************************************************************************/
571
572
/**
573
 * \brief Parse an XML string into tree form.
574
 *
575
 * The passed document is parsed into a CPLXMLNode tree representation.
576
 * If the document is not well formed XML then NULL is returned, and errors
577
 * are reported via CPLError().  No validation beyond wellformedness is
578
 * done.  The CPLParseXMLFile() convenience function can be used to parse
579
 * from a file.
580
 *
581
 * The returned document tree is owned by the caller and should be freed
582
 * with CPLDestroyXMLNode() when no longer needed.
583
 *
584
 * If the document has more than one "root level" element then those after the
585
 * first will be attached to the first as siblings (via the psNext pointers)
586
 * even though there is no common parent.  A document with no XML structure
587
 * (no angle brackets for instance) would be considered well formed, and
588
 * returned as a single CXT_Text node.
589
 *
590
 * @param pszString the document to parse.
591
 *
592
 * @return parsed tree or NULL on error.
593
 */
594
595
CPLXMLNode *CPLParseXMLString(const char *pszString)
596
597
98.4k
{
598
98.4k
    if (pszString == nullptr)
599
0
    {
600
0
        CPLError(CE_Failure, CPLE_AppDefined,
601
0
                 "CPLParseXMLString() called with NULL pointer.");
602
0
        return nullptr;
603
0
    }
604
605
    // Save back error context.
606
98.4k
    const CPLErr eErrClass = CPLGetLastErrorType();
607
98.4k
    const CPLErrorNum nErrNum = CPLGetLastErrorNo();
608
98.4k
    const CPLString osErrMsg = CPLGetLastErrorMsg();
609
610
    // Reset it now.
611
98.4k
    CPLErrorSetState(CE_None, CPLE_AppDefined, "");
612
613
    /* -------------------------------------------------------------------- */
614
    /*      Check for a UTF-8 BOM and skip if found                         */
615
    /*                                                                      */
616
    /*      TODO: BOM is variable-length parameter and depends on encoding. */
617
    /*            Add BOM detection for other encodings.                    */
618
    /* -------------------------------------------------------------------- */
619
620
    // Used to skip to actual beginning of XML data.
621
98.4k
    if ((static_cast<unsigned char>(pszString[0]) == 0xEF) &&
622
98.4k
        (static_cast<unsigned char>(pszString[1]) == 0xBB) &&
623
98.4k
        (static_cast<unsigned char>(pszString[2]) == 0xBF))
624
0
    {
625
0
        pszString += 3;
626
0
    }
627
628
    /* -------------------------------------------------------------------- */
629
    /*      Initialize parse context.                                       */
630
    /* -------------------------------------------------------------------- */
631
98.4k
    ParseContext sContext;
632
98.4k
    sContext.pszInput = pszString;
633
98.4k
    sContext.nInputOffset = 0;
634
98.4k
    sContext.nInputLine = 0;
635
98.4k
    sContext.bInElement = false;
636
98.4k
    sContext.nTokenMaxSize = 10;
637
98.4k
    sContext.pszToken = static_cast<char *>(VSIMalloc(sContext.nTokenMaxSize));
638
98.4k
    if (sContext.pszToken == nullptr)
639
0
        return nullptr;
640
98.4k
    sContext.nTokenSize = 0;
641
98.4k
    sContext.eTokenType = TNone;
642
98.4k
    sContext.nStackMaxSize = 0;
643
98.4k
    sContext.nStackSize = 0;
644
98.4k
    sContext.papsStack = nullptr;
645
98.4k
    sContext.psFirstNode = nullptr;
646
98.4k
    sContext.psLastNode = nullptr;
647
648
#ifdef DEBUG
649
    bool bRecoverableError = true;
650
#endif
651
98.4k
    CPLErr eLastErrorType = CE_None;
652
653
    /* ==================================================================== */
654
    /*      Loop reading tokens.                                            */
655
    /* ==================================================================== */
656
4.76M
    while (ReadToken(&sContext, eLastErrorType) != TNone)
657
4.67M
    {
658
4.67M
    loop_beginning:
659
        /* --------------------------------------------------------------------
660
         */
661
        /*      Create a new element. */
662
        /* --------------------------------------------------------------------
663
         */
664
4.67M
        if (sContext.eTokenType == TOpen)
665
1.44M
        {
666
1.44M
            if (ReadToken(&sContext, eLastErrorType) != TToken)
667
367
            {
668
367
                eLastErrorType = CE_Failure;
669
367
                CPLError(eLastErrorType, CPLE_AppDefined,
670
367
                         "Line %d: Didn't find element token after "
671
367
                         "open angle bracket.",
672
367
                         sContext.nInputLine);
673
367
                break;
674
367
            }
675
676
1.44M
            CPLXMLNode *psElement = nullptr;
677
1.44M
            if (sContext.pszToken[0] != '/')
678
1.00M
            {
679
1.00M
                psElement =
680
1.00M
                    _CPLCreateXMLNode(nullptr, CXT_Element, sContext.pszToken);
681
1.00M
                if (!psElement)
682
0
                    break;
683
1.00M
                AttachNode(&sContext, psElement);
684
1.00M
                if (!PushNode(&sContext, psElement, eLastErrorType))
685
0
                    break;
686
1.00M
            }
687
435k
            else
688
435k
            {
689
435k
                if (sContext.nStackSize == 0 ||
690
435k
                    !EQUAL(sContext.pszToken + 1,
691
435k
                           sContext.papsStack[sContext.nStackSize - 1]
692
435k
                               .psFirstNode->pszValue))
693
143
                {
694
#ifdef DEBUG
695
                    // Makes life of fuzzers easier if we accept somewhat
696
                    // corrupted XML like <foo> ... </not_foo>.
697
                    if (CPLTestBool(
698
                            CPLGetConfigOption("CPL_MINIXML_RELAXED", "FALSE")))
699
                    {
700
                        eLastErrorType = CE_Warning;
701
                        CPLError(
702
                            eLastErrorType, CPLE_AppDefined,
703
                            "Line %d: <%.500s> doesn't have matching <%.500s>.",
704
                            sContext.nInputLine, sContext.pszToken,
705
                            sContext.pszToken + 1);
706
                        if (sContext.nStackSize == 0)
707
                            break;
708
                        goto end_processing_close;
709
                    }
710
                    else
711
#endif
712
143
                    {
713
143
                        eLastErrorType = CE_Failure;
714
143
                        CPLError(
715
143
                            eLastErrorType, CPLE_AppDefined,
716
143
                            "Line %d: <%.500s> doesn't have matching <%.500s>.",
717
143
                            sContext.nInputLine, sContext.pszToken,
718
143
                            sContext.pszToken + 1);
719
143
                        break;
720
143
                    }
721
143
                }
722
435k
                else
723
435k
                {
724
435k
                    if (strcmp(sContext.pszToken + 1,
725
435k
                               sContext.papsStack[sContext.nStackSize - 1]
726
435k
                                   .psFirstNode->pszValue) != 0)
727
1.14k
                    {
728
                        // TODO: At some point we could just error out like any
729
                        // other sane XML parser would do.
730
1.14k
                        eLastErrorType = CE_Warning;
731
1.14k
                        CPLError(
732
1.14k
                            eLastErrorType, CPLE_AppDefined,
733
1.14k
                            "Line %d: <%.500s> matches <%.500s>, but the case "
734
1.14k
                            "isn't the same.  Going on, but this is invalid "
735
1.14k
                            "XML that might be rejected in future versions.",
736
1.14k
                            sContext.nInputLine,
737
1.14k
                            sContext.papsStack[sContext.nStackSize - 1]
738
1.14k
                                .psFirstNode->pszValue,
739
1.14k
                            sContext.pszToken);
740
1.14k
                    }
741
#ifdef DEBUG
742
                end_processing_close:
743
#endif
744
435k
                    if (ReadToken(&sContext, eLastErrorType) != TClose)
745
0
                    {
746
0
                        eLastErrorType = CE_Failure;
747
0
                        CPLError(eLastErrorType, CPLE_AppDefined,
748
0
                                 "Line %d: Missing close angle bracket "
749
0
                                 "after <%.500s.",
750
0
                                 sContext.nInputLine, sContext.pszToken);
751
0
                        break;
752
0
                    }
753
754
                    // Pop element off stack
755
435k
                    sContext.nStackSize--;
756
435k
                }
757
435k
            }
758
1.44M
        }
759
760
        /* --------------------------------------------------------------------
761
         */
762
        /*      Add an attribute to a token. */
763
        /* --------------------------------------------------------------------
764
         */
765
3.22M
        else if (sContext.eTokenType == TToken)
766
1.87M
        {
767
1.87M
            CPLXMLNode *psAttr =
768
1.87M
                _CPLCreateXMLNode(nullptr, CXT_Attribute, sContext.pszToken);
769
1.87M
            if (!psAttr)
770
0
                break;
771
1.87M
            AttachNode(&sContext, psAttr);
772
773
1.87M
            XMLTokenType nextToken = ReadToken(&sContext, eLastErrorType);
774
1.87M
            if (nextToken != TEqual)
775
3.30k
            {
776
                // Parse stuff like <?valbuddy_schematron
777
                // ../wmtsSimpleGetCapabilities.sch?>
778
3.30k
                if (sContext.nStackSize > 0 &&
779
3.30k
                    sContext.papsStack[sContext.nStackSize - 1]
780
3.30k
                            .psFirstNode->pszValue[0] == '?')
781
32
                {
782
32
                    psAttr->eType = CXT_Text;
783
32
                    if (nextToken == TNone)
784
0
                        break;
785
32
                    goto loop_beginning;
786
32
                }
787
788
3.27k
                eLastErrorType = CE_Failure;
789
3.27k
                CPLError(eLastErrorType, CPLE_AppDefined,
790
3.27k
                         "Line %d: Didn't find expected '=' for value of "
791
3.27k
                         "attribute '%.500s'.",
792
3.27k
                         sContext.nInputLine, psAttr->pszValue);
793
#ifdef DEBUG
794
                // Accepting an attribute without child text
795
                // would break too much assumptions in driver code
796
                bRecoverableError = false;
797
#endif
798
3.27k
                break;
799
3.30k
            }
800
801
1.87M
            if (ReadToken(&sContext, eLastErrorType) == TToken)
802
27
            {
803
                /* TODO: at some point we could just error out like any other */
804
                /* sane XML parser would do */
805
27
                eLastErrorType = CE_Warning;
806
27
                CPLError(eLastErrorType, CPLE_AppDefined,
807
27
                         "Line %d: Attribute value should be single or double "
808
27
                         "quoted.  Going on, but this is invalid XML that "
809
27
                         "might be rejected in future versions.",
810
27
                         sContext.nInputLine);
811
27
            }
812
1.87M
            else if (sContext.eTokenType != TString)
813
496
            {
814
496
                eLastErrorType = CE_Failure;
815
496
                CPLError(eLastErrorType, CPLE_AppDefined,
816
496
                         "Line %d: Didn't find expected attribute value.",
817
496
                         sContext.nInputLine);
818
#ifdef DEBUG
819
                // Accepting an attribute without child text
820
                // would break too much assumptions in driver code
821
                bRecoverableError = false;
822
#endif
823
496
                break;
824
496
            }
825
826
1.87M
            if (!_CPLCreateXMLNode(psAttr, CXT_Text, sContext.pszToken))
827
0
                break;
828
1.87M
        }
829
830
        /* --------------------------------------------------------------------
831
         */
832
        /*      Close the start section of an element. */
833
        /* --------------------------------------------------------------------
834
         */
835
1.35M
        else if (sContext.eTokenType == TClose)
836
455k
        {
837
455k
            if (sContext.nStackSize == 0)
838
0
            {
839
0
                eLastErrorType = CE_Failure;
840
0
                CPLError(eLastErrorType, CPLE_AppDefined,
841
0
                         "Line %d: Found unbalanced '>'.", sContext.nInputLine);
842
0
                break;
843
0
            }
844
455k
        }
845
846
        /* --------------------------------------------------------------------
847
         */
848
        /*      Close the start section of an element, and pop it */
849
        /*      immediately. */
850
        /* --------------------------------------------------------------------
851
         */
852
896k
        else if (sContext.eTokenType == TSlashClose)
853
541k
        {
854
541k
            if (sContext.nStackSize == 0)
855
0
            {
856
0
                eLastErrorType = CE_Failure;
857
0
                CPLError(eLastErrorType, CPLE_AppDefined,
858
0
                         "Line %d: Found unbalanced '/>'.",
859
0
                         sContext.nInputLine);
860
0
                break;
861
0
            }
862
863
541k
            sContext.nStackSize--;
864
541k
        }
865
        /* --------------------------------------------------------------------
866
         */
867
        /*      Close the start section of a <?...?> element, and pop it */
868
        /*      immediately. */
869
        /* --------------------------------------------------------------------
870
         */
871
354k
        else if (sContext.eTokenType == TQuestionClose)
872
19
        {
873
19
            if (sContext.nStackSize == 0)
874
0
            {
875
0
                eLastErrorType = CE_Failure;
876
0
                CPLError(eLastErrorType, CPLE_AppDefined,
877
0
                         "Line %d: Found unbalanced '?>'.",
878
0
                         sContext.nInputLine);
879
0
                break;
880
0
            }
881
19
            else if (sContext.papsStack[sContext.nStackSize - 1]
882
19
                         .psFirstNode->pszValue[0] != '?')
883
0
            {
884
0
                eLastErrorType = CE_Failure;
885
0
                CPLError(eLastErrorType, CPLE_AppDefined,
886
0
                         "Line %d: Found '?>' without matching '<?'.",
887
0
                         sContext.nInputLine);
888
0
                break;
889
0
            }
890
891
19
            sContext.nStackSize--;
892
19
        }
893
        /* --------------------------------------------------------------------
894
         */
895
        /*      Handle comments.  They are returned as a whole token with the */
896
        /*      prefix and postfix omitted.  No processing of white space */
897
        /*      will be done. */
898
        /* --------------------------------------------------------------------
899
         */
900
354k
        else if (sContext.eTokenType == TComment)
901
0
        {
902
0
            CPLXMLNode *psValue =
903
0
                _CPLCreateXMLNode(nullptr, CXT_Comment, sContext.pszToken);
904
0
            if (!psValue)
905
0
                break;
906
0
            AttachNode(&sContext, psValue);
907
0
        }
908
        /* --------------------------------------------------------------------
909
         */
910
        /*      Handle literals.  They are returned without processing. */
911
        /* --------------------------------------------------------------------
912
         */
913
354k
        else if (sContext.eTokenType == TLiteral)
914
121
        {
915
121
            CPLXMLNode *psValue =
916
121
                _CPLCreateXMLNode(nullptr, CXT_Literal, sContext.pszToken);
917
121
            if (!psValue)
918
0
                break;
919
121
            AttachNode(&sContext, psValue);
920
121
        }
921
        /* --------------------------------------------------------------------
922
         */
923
        /*      Add a text value node as a child of the current element. */
924
        /* --------------------------------------------------------------------
925
         */
926
354k
        else if (sContext.eTokenType == TString && !sContext.bInElement)
927
354k
        {
928
354k
            CPLXMLNode *psValue =
929
354k
                _CPLCreateXMLNode(nullptr, CXT_Text, sContext.pszToken);
930
354k
            if (!psValue)
931
0
                break;
932
354k
            AttachNode(&sContext, psValue);
933
354k
        }
934
        /* --------------------------------------------------------------------
935
         */
936
        /*      Anything else is an error. */
937
        /* --------------------------------------------------------------------
938
         */
939
24
        else
940
24
        {
941
24
            eLastErrorType = CE_Failure;
942
24
            CPLError(eLastErrorType, CPLE_AppDefined,
943
24
                     "Parse error at line %d, unexpected token:%.500s",
944
24
                     sContext.nInputLine, sContext.pszToken);
945
24
            break;
946
24
        }
947
4.67M
    }
948
949
    /* -------------------------------------------------------------------- */
950
    /*      Did we pop all the way out of our stack?                        */
951
    /* -------------------------------------------------------------------- */
952
98.4k
    if (CPLGetLastErrorType() != CE_Failure && sContext.nStackSize > 0 &&
953
98.4k
        sContext.papsStack != nullptr)
954
8.29k
    {
955
#ifdef DEBUG
956
        // Makes life of fuzzers easier if we accept somewhat corrupted XML
957
        // like <x> ...
958
        if (bRecoverableError &&
959
            CPLTestBool(CPLGetConfigOption("CPL_MINIXML_RELAXED", "FALSE")))
960
        {
961
            eLastErrorType = CE_Warning;
962
        }
963
        else
964
#endif
965
8.29k
        {
966
8.29k
            eLastErrorType = CE_Failure;
967
8.29k
        }
968
8.29k
        CPLError(
969
8.29k
            eLastErrorType, CPLE_AppDefined,
970
8.29k
            "Parse error at EOF, not all elements have been closed, "
971
8.29k
            "starting with %.500s",
972
8.29k
            sContext.papsStack[sContext.nStackSize - 1].psFirstNode->pszValue);
973
8.29k
    }
974
975
    /* -------------------------------------------------------------------- */
976
    /*      Cleanup                                                         */
977
    /* -------------------------------------------------------------------- */
978
98.4k
    CPLFree(sContext.pszToken);
979
98.4k
    if (sContext.papsStack != nullptr)
980
97.9k
        CPLFree(sContext.papsStack);
981
982
    // We do not trust CPLGetLastErrorType() as if CPLTurnFailureIntoWarning()
983
    // has been set we would never get failures
984
98.4k
    if (eLastErrorType == CE_Failure)
985
12.6k
    {
986
12.6k
        CPLDestroyXMLNode(sContext.psFirstNode);
987
12.6k
        sContext.psFirstNode = nullptr;
988
12.6k
        sContext.psLastNode = nullptr;
989
12.6k
    }
990
991
98.4k
    if (eLastErrorType == CE_None)
992
84.6k
    {
993
        // Restore initial error state.
994
84.6k
        CPLErrorSetState(eErrClass, nErrNum, osErrMsg);
995
84.6k
    }
996
997
98.4k
    return sContext.psFirstNode;
998
98.4k
}
999
1000
/************************************************************************/
1001
/*                            _GrowBuffer()                             */
1002
/************************************************************************/
1003
1004
static bool _GrowBuffer(size_t nNeeded, char **ppszText, size_t *pnMaxLength)
1005
1006
1.26M
{
1007
1.26M
    if (nNeeded + 1 >= *pnMaxLength)
1008
45.8k
    {
1009
45.8k
        *pnMaxLength = std::max(*pnMaxLength * 2, nNeeded + 1);
1010
45.8k
        char *pszTextNew =
1011
45.8k
            static_cast<char *>(VSIRealloc(*ppszText, *pnMaxLength));
1012
45.8k
        if (pszTextNew == nullptr)
1013
0
            return false;
1014
45.8k
        *ppszText = pszTextNew;
1015
45.8k
    }
1016
1.26M
    return true;
1017
1.26M
}
1018
1019
/************************************************************************/
1020
/*                        CPLSerializeXMLNode()                         */
1021
/************************************************************************/
1022
1023
// TODO(schwehr): Rewrite this whole thing using C++ string.
1024
// CPLSerializeXMLNode has buffer overflows.
1025
static bool CPLSerializeXMLNode(const CPLXMLNode *psNode, int nIndent,
1026
                                char **ppszText, size_t *pnLength,
1027
                                size_t *pnMaxLength)
1028
1029
598k
{
1030
598k
    if (psNode == nullptr)
1031
0
        return true;
1032
1033
    /* -------------------------------------------------------------------- */
1034
    /*      Ensure the buffer is plenty large to hold this additional       */
1035
    /*      string.                                                         */
1036
    /* -------------------------------------------------------------------- */
1037
598k
    *pnLength += strlen(*ppszText + *pnLength);
1038
598k
    if (!_GrowBuffer(strlen(psNode->pszValue) + *pnLength + 40 + nIndent,
1039
598k
                     ppszText, pnMaxLength))
1040
0
        return false;
1041
1042
    /* -------------------------------------------------------------------- */
1043
    /*      Text is just directly emitted.                                  */
1044
    /* -------------------------------------------------------------------- */
1045
598k
    if (psNode->eType == CXT_Text)
1046
268k
    {
1047
268k
        char *pszEscaped =
1048
268k
            CPLEscapeString(psNode->pszValue, -1, CPLES_XML_BUT_QUOTES);
1049
1050
268k
        CPLAssert(psNode->psChild == nullptr);
1051
1052
        // Escaped text might be bigger than expected.
1053
268k
        if (!_GrowBuffer(strlen(pszEscaped) + *pnLength, ppszText, pnMaxLength))
1054
0
        {
1055
0
            CPLFree(pszEscaped);
1056
0
            return false;
1057
0
        }
1058
268k
        strcat(*ppszText + *pnLength, pszEscaped);
1059
1060
268k
        CPLFree(pszEscaped);
1061
268k
    }
1062
1063
    /* -------------------------------------------------------------------- */
1064
    /*      Attributes require a little formatting.                         */
1065
    /* -------------------------------------------------------------------- */
1066
329k
    else if (psNode->eType == CXT_Attribute)
1067
15.2k
    {
1068
15.2k
        CPLAssert(psNode->psChild != nullptr &&
1069
15.2k
                  psNode->psChild->eType == CXT_Text);
1070
1071
15.2k
        snprintf(*ppszText + *pnLength, *pnMaxLength - *pnLength, " %s=\"",
1072
15.2k
                 psNode->pszValue);
1073
15.2k
        *pnLength += strlen(*ppszText + *pnLength);
1074
1075
15.2k
        char *pszEscaped =
1076
15.2k
            CPLEscapeString(psNode->psChild->pszValue, -1, CPLES_XML);
1077
1078
15.2k
        if (!_GrowBuffer(strlen(pszEscaped) + *pnLength, ppszText, pnMaxLength))
1079
0
        {
1080
0
            CPLFree(pszEscaped);
1081
0
            return false;
1082
0
        }
1083
15.2k
        strcat(*ppszText + *pnLength, pszEscaped);
1084
1085
15.2k
        CPLFree(pszEscaped);
1086
1087
15.2k
        *pnLength += strlen(*ppszText + *pnLength);
1088
15.2k
        if (!_GrowBuffer(3 + *pnLength, ppszText, pnMaxLength))
1089
0
            return false;
1090
15.2k
        strcat(*ppszText + *pnLength, "\"");
1091
15.2k
    }
1092
1093
    /* -------------------------------------------------------------------- */
1094
    /*      Handle comment output.                                          */
1095
    /* -------------------------------------------------------------------- */
1096
314k
    else if (psNode->eType == CXT_Comment)
1097
0
    {
1098
0
        CPLAssert(psNode->psChild == nullptr);
1099
1100
0
        for (int i = 0; i < nIndent; i++)
1101
0
            (*ppszText)[(*pnLength)++] = ' ';
1102
1103
0
        snprintf(*ppszText + *pnLength, *pnMaxLength - *pnLength, "<!--%s-->\n",
1104
0
                 psNode->pszValue);
1105
0
    }
1106
1107
    /* -------------------------------------------------------------------- */
1108
    /*      Handle literal output (like <!DOCTYPE...>)                      */
1109
    /* -------------------------------------------------------------------- */
1110
314k
    else if (psNode->eType == CXT_Literal)
1111
0
    {
1112
0
        CPLAssert(psNode->psChild == nullptr);
1113
1114
0
        for (int i = 0; i < nIndent; i++)
1115
0
            (*ppszText)[(*pnLength)++] = ' ';
1116
1117
0
        strcpy(*ppszText + *pnLength, psNode->pszValue);
1118
0
        strcat(*ppszText + *pnLength, "\n");
1119
0
    }
1120
1121
    /* -------------------------------------------------------------------- */
1122
    /*      Elements actually have to deal with general children, and       */
1123
    /*      various formatting issues.                                      */
1124
    /* -------------------------------------------------------------------- */
1125
314k
    else if (psNode->eType == CXT_Element)
1126
314k
    {
1127
314k
        if (nIndent)
1128
299k
            memset(*ppszText + *pnLength, ' ', nIndent);
1129
314k
        *pnLength += nIndent;
1130
314k
        (*ppszText)[*pnLength] = '\0';
1131
1132
314k
        snprintf(*ppszText + *pnLength, *pnMaxLength - *pnLength, "<%s",
1133
314k
                 psNode->pszValue);
1134
1135
314k
        if (psNode->pszValue[0] == '?')
1136
0
        {
1137
0
            for (const CPLXMLNode *psChild = psNode->psChild;
1138
0
                 psChild != nullptr; psChild = psChild->psNext)
1139
0
            {
1140
0
                if (psChild->eType == CXT_Text)
1141
0
                {
1142
0
                    *pnLength += strlen(*ppszText + *pnLength);
1143
0
                    if (!_GrowBuffer(1 + *pnLength, ppszText, pnMaxLength))
1144
0
                        return false;
1145
0
                    strcat(*ppszText + *pnLength, " ");
1146
0
                }
1147
1148
0
                if (!CPLSerializeXMLNode(psChild, 0, ppszText, pnLength,
1149
0
                                         pnMaxLength))
1150
0
                {
1151
0
                    return false;
1152
0
                }
1153
0
            }
1154
0
            if (!_GrowBuffer(*pnLength + 40, ppszText, pnMaxLength))
1155
0
                return false;
1156
1157
0
            strcat(*ppszText + *pnLength, "?>\n");
1158
0
        }
1159
314k
        else
1160
314k
        {
1161
314k
            bool bHasNonAttributeChildren = false;
1162
            // Serialize *all* the attribute children, regardless of order
1163
314k
            for (const CPLXMLNode *psChild = psNode->psChild;
1164
897k
                 psChild != nullptr; psChild = psChild->psNext)
1165
583k
            {
1166
583k
                if (psChild->eType == CXT_Attribute)
1167
15.2k
                {
1168
15.2k
                    if (!CPLSerializeXMLNode(psChild, 0, ppszText, pnLength,
1169
15.2k
                                             pnMaxLength))
1170
0
                        return false;
1171
15.2k
                }
1172
567k
                else
1173
567k
                    bHasNonAttributeChildren = true;
1174
583k
            }
1175
1176
314k
            if (!bHasNonAttributeChildren)
1177
460
            {
1178
460
                if (!_GrowBuffer(*pnLength + 40, ppszText, pnMaxLength))
1179
0
                    return false;
1180
1181
460
                strcat(*ppszText + *pnLength, " />\n");
1182
460
            }
1183
313k
            else
1184
313k
            {
1185
313k
                bool bJustText = true;
1186
1187
313k
                strcat(*ppszText + *pnLength, ">");
1188
1189
313k
                for (const CPLXMLNode *psChild = psNode->psChild;
1190
896k
                     psChild != nullptr; psChild = psChild->psNext)
1191
583k
                {
1192
583k
                    if (psChild->eType == CXT_Attribute)
1193
15.2k
                        continue;
1194
1195
567k
                    if (psChild->eType != CXT_Text && bJustText)
1196
50.1k
                    {
1197
50.1k
                        bJustText = false;
1198
50.1k
                        *pnLength += strlen(*ppszText + *pnLength);
1199
50.1k
                        if (!_GrowBuffer(1 + *pnLength, ppszText, pnMaxLength))
1200
0
                            return false;
1201
50.1k
                        strcat(*ppszText + *pnLength, "\n");
1202
50.1k
                    }
1203
1204
567k
                    if (!CPLSerializeXMLNode(psChild, nIndent + 2, ppszText,
1205
567k
                                             pnLength, pnMaxLength))
1206
0
                        return false;
1207
567k
                }
1208
1209
313k
                *pnLength += strlen(*ppszText + *pnLength);
1210
313k
                if (!_GrowBuffer(strlen(psNode->pszValue) + *pnLength + 40 +
1211
313k
                                     nIndent,
1212
313k
                                 ppszText, pnMaxLength))
1213
0
                    return false;
1214
1215
313k
                if (!bJustText)
1216
50.1k
                {
1217
50.1k
                    if (nIndent)
1218
34.9k
                        memset(*ppszText + *pnLength, ' ', nIndent);
1219
50.1k
                    *pnLength += nIndent;
1220
50.1k
                    (*ppszText)[*pnLength] = '\0';
1221
50.1k
                }
1222
1223
313k
                *pnLength += strlen(*ppszText + *pnLength);
1224
313k
                snprintf(*ppszText + *pnLength, *pnMaxLength - *pnLength,
1225
313k
                         "</%s>\n", psNode->pszValue);
1226
313k
            }
1227
314k
        }
1228
314k
    }
1229
1230
598k
    return true;
1231
598k
}
1232
1233
/************************************************************************/
1234
/*                        CPLSerializeXMLTree()                         */
1235
/************************************************************************/
1236
1237
/**
1238
 * \brief Convert tree into string document.
1239
 *
1240
 * This function converts a CPLXMLNode tree representation of a document
1241
 * into a flat string representation.  White space indentation is used
1242
 * visually preserve the tree structure of the document.  The returned
1243
 * document becomes owned by the caller and should be freed with CPLFree()
1244
 * when no longer needed.
1245
 *
1246
 * @param psNode the node to serialize.
1247
 *
1248
 * @return the document on success or NULL on failure.
1249
 */
1250
1251
char *CPLSerializeXMLTree(const CPLXMLNode *psNode)
1252
1253
15.2k
{
1254
15.2k
    size_t nMaxLength = 100;
1255
15.2k
    char *pszText = static_cast<char *>(CPLCalloc(nMaxLength, sizeof(char)));
1256
15.2k
    if (pszText == nullptr)
1257
0
        return nullptr;
1258
1259
15.2k
    size_t nLength = 0;
1260
30.4k
    for (const CPLXMLNode *psThis = psNode; psThis != nullptr;
1261
15.2k
         psThis = psThis->psNext)
1262
15.2k
    {
1263
15.2k
        if (!CPLSerializeXMLNode(psThis, 0, &pszText, &nLength, &nMaxLength))
1264
0
        {
1265
0
            VSIFree(pszText);
1266
0
            return nullptr;
1267
0
        }
1268
15.2k
    }
1269
1270
15.2k
    return pszText;
1271
15.2k
}
1272
1273
/************************************************************************/
1274
/*                          CPLCreateXMLNode()                          */
1275
/************************************************************************/
1276
1277
#ifdef DEBUG
1278
static CPLXMLNode *psDummyStaticNode;
1279
#endif
1280
1281
/**
1282
 * \brief Create an document tree item.
1283
 *
1284
 * Create a single CPLXMLNode object with the desired value and type, and
1285
 * attach it as a child of the indicated parent.
1286
 *
1287
 * @param poParent the parent to which this node should be attached as a
1288
 * child.  May be NULL to keep as free standing.
1289
 * @param eType the type of the newly created node
1290
 * @param pszText the value of the newly created node
1291
 *
1292
 * @return the newly created node, now owned by the caller (or parent node).
1293
 */
1294
1295
CPLXMLNode *CPLCreateXMLNode(CPLXMLNode *poParent, CPLXMLNodeType eType,
1296
                             const char *pszText)
1297
1298
22.7k
{
1299
22.7k
    auto ret = _CPLCreateXMLNode(poParent, eType, pszText);
1300
22.7k
    if (!ret)
1301
0
    {
1302
0
        CPLError(CE_Fatal, CPLE_OutOfMemory, "CPLCreateXMLNode() failed");
1303
0
    }
1304
22.7k
    return ret;
1305
22.7k
}
1306
1307
/************************************************************************/
1308
/*                         _CPLCreateXMLNode()                          */
1309
/************************************************************************/
1310
1311
/* Same as CPLCreateXMLNode() but can return NULL in case of out-of-memory */
1312
/* situation */
1313
1314
static CPLXMLNode *_CPLCreateXMLNode(CPLXMLNode *poParent, CPLXMLNodeType eType,
1315
                                     const char *pszText)
1316
1317
5.13M
{
1318
1319
    /* -------------------------------------------------------------------- */
1320
    /*      Create new node.                                                */
1321
    /* -------------------------------------------------------------------- */
1322
5.13M
    CPLXMLNode *psNode =
1323
5.13M
        static_cast<CPLXMLNode *>(VSICalloc(sizeof(CPLXMLNode), 1));
1324
5.13M
    if (psNode == nullptr)
1325
0
    {
1326
0
        CPLError(CE_Failure, CPLE_OutOfMemory, "Cannot allocate CPLXMLNode");
1327
0
        return nullptr;
1328
0
    }
1329
1330
5.13M
    psNode->eType = eType;
1331
5.13M
    psNode->pszValue = VSIStrdup(pszText ? pszText : "");
1332
5.13M
    if (psNode->pszValue == nullptr)
1333
0
    {
1334
0
        CPLError(CE_Failure, CPLE_OutOfMemory,
1335
0
                 "Cannot allocate psNode->pszValue");
1336
0
        VSIFree(psNode);
1337
0
        return nullptr;
1338
0
    }
1339
1340
    /* -------------------------------------------------------------------- */
1341
    /*      Attach to parent, if provided.                                  */
1342
    /* -------------------------------------------------------------------- */
1343
5.13M
    if (poParent != nullptr)
1344
1.87M
    {
1345
1.87M
        if (poParent->psChild == nullptr)
1346
1.87M
            poParent->psChild = psNode;
1347
0
        else
1348
0
        {
1349
0
            CPLXMLNode *psLink = poParent->psChild;
1350
0
            if (psLink->psNext == nullptr && eType == CXT_Attribute &&
1351
0
                psLink->eType == CXT_Text)
1352
0
            {
1353
0
                psNode->psNext = psLink;
1354
0
                poParent->psChild = psNode;
1355
0
            }
1356
0
            else
1357
0
            {
1358
0
                while (psLink->psNext != nullptr)
1359
0
                {
1360
0
                    if (eType == CXT_Attribute &&
1361
0
                        psLink->psNext->eType == CXT_Text)
1362
0
                    {
1363
0
                        psNode->psNext = psLink->psNext;
1364
0
                        break;
1365
0
                    }
1366
1367
0
                    psLink = psLink->psNext;
1368
0
                }
1369
1370
0
                psLink->psNext = psNode;
1371
0
            }
1372
0
        }
1373
1.87M
    }
1374
#ifdef DEBUG
1375
    else
1376
    {
1377
        // Coverity sometimes doesn't realize that this function is passed
1378
        // with a non NULL parent and thinks that this branch is taken, leading
1379
        // to creating object being leak by caller. This ugly hack hopefully
1380
        // makes it believe that someone will reference it.
1381
        psDummyStaticNode = psNode;
1382
    }
1383
#endif
1384
1385
5.13M
    return psNode;
1386
5.13M
}
1387
1388
/************************************************************************/
1389
/*                         CPLDestroyXMLNode()                          */
1390
/************************************************************************/
1391
1392
/**
1393
 * \brief Destroy a tree.
1394
 *
1395
 * This function frees resources associated with a CPLXMLNode and all its
1396
 * children nodes.
1397
 *
1398
 * @param psNode the tree to free.
1399
 */
1400
1401
void CPLDestroyXMLNode(CPLXMLNode *psNode)
1402
1403
98.2k
{
1404
5.23M
    while (psNode != nullptr)
1405
5.13M
    {
1406
5.13M
        if (psNode->pszValue != nullptr)
1407
5.13M
            CPLFree(psNode->pszValue);
1408
1409
5.13M
        if (psNode->psChild != nullptr)
1410
2.88M
        {
1411
2.88M
            CPLXMLNode *psNext = psNode->psNext;
1412
2.88M
            psNode->psNext = psNode->psChild;
1413
            // Move the child and its siblings as the next
1414
            // siblings of the current node.
1415
2.88M
            if (psNext != nullptr)
1416
2.62M
            {
1417
2.62M
                CPLXMLNode *psIter = psNode->psChild;
1418
4.06M
                while (psIter->psNext != nullptr)
1419
1.44M
                    psIter = psIter->psNext;
1420
2.62M
                psIter->psNext = psNext;
1421
2.62M
            }
1422
2.88M
        }
1423
1424
5.13M
        CPLXMLNode *psNext = psNode->psNext;
1425
1426
5.13M
        CPLFree(psNode);
1427
1428
5.13M
        psNode = psNext;
1429
5.13M
    }
1430
98.2k
}
1431
1432
/************************************************************************/
1433
/*                           CPLSearchXMLNode()                         */
1434
/************************************************************************/
1435
1436
/**
1437
 * \brief Search for a node in document.
1438
 *
1439
 * Searches the children (and potentially siblings) of the documented
1440
 * passed in for the named element or attribute.  To search following
1441
 * siblings as well as children, prefix the pszElement name with an equal
1442
 * sign.  This function does an in-order traversal of the document tree.
1443
 * So it will first match against the current node, then its first child,
1444
 * that child's first child, and so on.
1445
 *
1446
 * Use CPLGetXMLNode() to find a specific child, or along a specific
1447
 * node path.
1448
 *
1449
 * @param psRoot the subtree to search.  This should be a node of type
1450
 * CXT_Element.  NULL is safe.
1451
 *
1452
 * @param pszElement the name of the element or attribute to search for.
1453
 *
1454
 * @return The matching node or NULL on failure.
1455
 */
1456
1457
CPLXMLNode *CPLSearchXMLNode(CPLXMLNode *psRoot, const char *pszElement)
1458
1459
0
{
1460
0
    if (psRoot == nullptr || pszElement == nullptr)
1461
0
        return nullptr;
1462
1463
0
    bool bSideSearch = false;
1464
1465
0
    if (*pszElement == '=')
1466
0
    {
1467
0
        bSideSearch = true;
1468
0
        pszElement++;
1469
0
    }
1470
1471
    /* -------------------------------------------------------------------- */
1472
    /*      Does this node match?                                           */
1473
    /* -------------------------------------------------------------------- */
1474
0
    if ((psRoot->eType == CXT_Element || psRoot->eType == CXT_Attribute) &&
1475
0
        EQUAL(pszElement, psRoot->pszValue))
1476
0
        return psRoot;
1477
1478
    /* -------------------------------------------------------------------- */
1479
    /*      Search children.                                                */
1480
    /* -------------------------------------------------------------------- */
1481
0
    CPLXMLNode *psChild = nullptr;
1482
0
    for (psChild = psRoot->psChild; psChild != nullptr;
1483
0
         psChild = psChild->psNext)
1484
0
    {
1485
0
        if ((psChild->eType == CXT_Element ||
1486
0
             psChild->eType == CXT_Attribute) &&
1487
0
            EQUAL(pszElement, psChild->pszValue))
1488
0
            return psChild;
1489
1490
0
        if (psChild->psChild != nullptr)
1491
0
        {
1492
0
            CPLXMLNode *psResult = CPLSearchXMLNode(psChild, pszElement);
1493
0
            if (psResult != nullptr)
1494
0
                return psResult;
1495
0
        }
1496
0
    }
1497
1498
    /* -------------------------------------------------------------------- */
1499
    /*      Search siblings if we are in side search mode.                  */
1500
    /* -------------------------------------------------------------------- */
1501
0
    if (bSideSearch)
1502
0
    {
1503
0
        for (psRoot = psRoot->psNext; psRoot != nullptr;
1504
0
             psRoot = psRoot->psNext)
1505
0
        {
1506
0
            CPLXMLNode *psResult = CPLSearchXMLNode(psRoot, pszElement);
1507
0
            if (psResult != nullptr)
1508
0
                return psResult;
1509
0
        }
1510
0
    }
1511
1512
0
    return nullptr;
1513
0
}
1514
1515
/************************************************************************/
1516
/*                           CPLGetXMLNode()                            */
1517
/************************************************************************/
1518
1519
/**
1520
 * \brief Find node by path.
1521
 *
1522
 * Searches the document or subdocument indicated by psRoot for an element
1523
 * (or attribute) with the given path.  The path should consist of a set of
1524
 * element names separated by dots, not including the name of the root
1525
 * element (psRoot).  If the requested element is not found NULL is returned.
1526
 *
1527
 * Attribute names may only appear as the last item in the path.
1528
 *
1529
 * The search is done from the root nodes children, but all intermediate
1530
 * nodes in the path must be specified.  Searching for "name" would only find
1531
 * a name element or attribute if it is a direct child of the root, not at any
1532
 * level in the subdocument.
1533
 *
1534
 * If the pszPath is prefixed by "=" then the search will begin with the
1535
 * root node, and its siblings, instead of the root nodes children.  This
1536
 * is particularly useful when searching within a whole document which is
1537
 * often prefixed by one or more "junk" nodes like the <?xml> declaration.
1538
 *
1539
 * @param psRoot the subtree in which to search.  This should be a node of
1540
 * type CXT_Element.  NULL is safe.
1541
 *
1542
 * @param pszPath the list of element names in the path (dot separated).
1543
 *
1544
 * @return the requested element node, or NULL if not found.
1545
 */
1546
1547
CPLXMLNode *CPLGetXMLNode(CPLXMLNode *psRoot, const char *pszPath)
1548
1549
3.17M
{
1550
3.17M
    if (psRoot == nullptr || pszPath == nullptr)
1551
0
        return nullptr;
1552
1553
3.17M
    bool bSideSearch = false;
1554
1555
3.17M
    if (*pszPath == '=')
1556
2.60k
    {
1557
2.60k
        bSideSearch = true;
1558
2.60k
        pszPath++;
1559
2.60k
    }
1560
1561
3.17M
    const char *const apszTokens[2] = {pszPath, nullptr};
1562
1563
    // Slight optimization: avoid using CSLTokenizeStringComplex that
1564
    // does memory allocations when it is not really necessary.
1565
3.17M
    bool bFreeTokens = false;
1566
3.17M
    char **papszTokensToFree = nullptr;
1567
3.17M
    const char *const *papszTokens;
1568
3.17M
    if (strchr(pszPath, '.'))
1569
6.73k
    {
1570
6.73k
        papszTokensToFree =
1571
6.73k
            CSLTokenizeStringComplex(pszPath, ".", FALSE, FALSE);
1572
6.73k
        papszTokens = papszTokensToFree;
1573
6.73k
        bFreeTokens = true;
1574
6.73k
    }
1575
3.16M
    else
1576
3.16M
    {
1577
3.16M
        papszTokens = apszTokens;
1578
3.16M
    }
1579
1580
3.17M
    int iToken = 0;
1581
4.44M
    while (papszTokens[iToken] != nullptr && psRoot != nullptr)
1582
3.17M
    {
1583
3.17M
        CPLXMLNode *psChild = nullptr;
1584
1585
3.17M
        if (bSideSearch)
1586
2.60k
        {
1587
2.60k
            psChild = psRoot;
1588
2.60k
            bSideSearch = false;
1589
2.60k
        }
1590
3.17M
        else
1591
3.17M
            psChild = psRoot->psChild;
1592
1593
11.4M
        for (; psChild != nullptr; psChild = psChild->psNext)
1594
9.56M
        {
1595
9.56M
            if (psChild->eType != CXT_Text &&
1596
9.56M
                EQUAL(papszTokens[iToken], psChild->pszValue))
1597
1.27M
                break;
1598
9.56M
        }
1599
1600
3.17M
        if (psChild == nullptr)
1601
1.90M
        {
1602
1.90M
            psRoot = nullptr;
1603
1.90M
            break;
1604
1.90M
        }
1605
1606
1.27M
        psRoot = psChild;
1607
1.27M
        iToken++;
1608
1.27M
    }
1609
1610
3.17M
    if (bFreeTokens)
1611
6.73k
        CSLDestroy(papszTokensToFree);
1612
3.17M
    return psRoot;
1613
3.17M
}
1614
1615
/************************************************************************/
1616
/*                           CPLGetXMLValue()                           */
1617
/************************************************************************/
1618
1619
/**
1620
 * \brief Fetch element/attribute value.
1621
 *
1622
 * Searches the document for the element/attribute value associated with
1623
 * the path.  The corresponding node is internally found with CPLGetXMLNode()
1624
 * (see there for details on path handling).  Once found, the value is
1625
 * considered to be the first CXT_Text child of the node.
1626
 *
1627
 * If the attribute/element search fails, or if the found node has no
1628
 * value then the passed default value is returned.
1629
 *
1630
 * The returned value points to memory within the document tree, and should
1631
 * not be altered or freed.
1632
 *
1633
 * @param psRoot the subtree in which to search.  This should be a node of
1634
 * type CXT_Element.  NULL is safe.
1635
 *
1636
 * @param pszPath the list of element names in the path (dot separated).  An
1637
 * empty path means get the value of the psRoot node.
1638
 *
1639
 * @param pszDefault the value to return if a corresponding value is not
1640
 * found, may be NULL.
1641
 *
1642
 * @return the requested value or pszDefault if not found.
1643
 */
1644
1645
const char *CPLGetXMLValue(const CPLXMLNode *psRoot, const char *pszPath,
1646
                           const char *pszDefault)
1647
1648
3.06M
{
1649
3.06M
    const CPLXMLNode *psTarget = nullptr;
1650
1651
3.06M
    if (pszPath == nullptr || *pszPath == '\0')
1652
35
        psTarget = psRoot;
1653
3.06M
    else
1654
3.06M
        psTarget = CPLGetXMLNode(psRoot, pszPath);
1655
1656
3.06M
    if (psTarget == nullptr)
1657
1.83M
        return pszDefault;
1658
1659
1.23M
    if (psTarget->eType == CXT_Attribute)
1660
953k
    {
1661
953k
        CPLAssert(psTarget->psChild != nullptr &&
1662
953k
                  psTarget->psChild->eType == CXT_Text);
1663
1664
953k
        return psTarget->psChild->pszValue;
1665
953k
    }
1666
1667
281k
    if (psTarget->eType == CXT_Element)
1668
281k
    {
1669
        // Find first non-attribute child, and verify it is a single text
1670
        // with no siblings.
1671
1672
281k
        psTarget = psTarget->psChild;
1673
1674
283k
        while (psTarget != nullptr && psTarget->eType == CXT_Attribute)
1675
2.71k
            psTarget = psTarget->psNext;
1676
1677
281k
        if (psTarget != nullptr && psTarget->eType == CXT_Text &&
1678
281k
            psTarget->psNext == nullptr)
1679
280k
            return psTarget->pszValue;
1680
281k
    }
1681
1682
20
    return pszDefault;
1683
281k
}
1684
1685
/************************************************************************/
1686
/*                           CPLAddXMLChild()                           */
1687
/************************************************************************/
1688
1689
/**
1690
 * \brief Add child node to parent.
1691
 *
1692
 * The passed child is added to the list of children of the indicated
1693
 * parent.  Normally the child is added at the end of the parents child
1694
 * list, but attributes (CXT_Attribute) will be inserted after any other
1695
 * attributes but before any other element type.  Ownership of the child
1696
 * node is effectively assumed by the parent node.   If the child has
1697
 * siblings (its psNext is not NULL) they will be trimmed, but if the child
1698
 * has children they are carried with it.
1699
 *
1700
 * @param psParent the node to attach the child to.  May not be NULL.
1701
 *
1702
 * @param psChild the child to add to the parent.  May not be NULL.  Should
1703
 * not be a child of any other parent.
1704
 */
1705
1706
void CPLAddXMLChild(CPLXMLNode *psParent, CPLXMLNode *psChild)
1707
1708
0
{
1709
0
    if (psParent->psChild == nullptr)
1710
0
    {
1711
0
        psParent->psChild = psChild;
1712
0
        return;
1713
0
    }
1714
1715
    // Insert at head of list if first child is not attribute.
1716
0
    if (psChild->eType == CXT_Attribute &&
1717
0
        psParent->psChild->eType != CXT_Attribute)
1718
0
    {
1719
0
        psChild->psNext = psParent->psChild;
1720
0
        psParent->psChild = psChild;
1721
0
        return;
1722
0
    }
1723
1724
    // Search for end of list.
1725
0
    CPLXMLNode *psSib = nullptr;
1726
0
    for (psSib = psParent->psChild; psSib->psNext != nullptr;
1727
0
         psSib = psSib->psNext)
1728
0
    {
1729
        // Insert attributes if the next node is not an attribute.
1730
0
        if (psChild->eType == CXT_Attribute && psSib->psNext != nullptr &&
1731
0
            psSib->psNext->eType != CXT_Attribute)
1732
0
        {
1733
0
            psChild->psNext = psSib->psNext;
1734
0
            psSib->psNext = psChild;
1735
0
            return;
1736
0
        }
1737
0
    }
1738
1739
0
    psSib->psNext = psChild;
1740
0
}
1741
1742
/************************************************************************/
1743
/*                        CPLRemoveXMLChild()                           */
1744
/************************************************************************/
1745
1746
/**
1747
 * \brief Remove child node from parent.
1748
 *
1749
 * The passed child is removed from the child list of the passed parent,
1750
 * but the child is not destroyed.  The child retains ownership of its
1751
 * own children, but is cleanly removed from the child list of the parent.
1752
 *
1753
 * @param psParent the node to the child is attached to.
1754
 *
1755
 * @param psChild the child to remove.
1756
 *
1757
 * @return TRUE on success or FALSE if the child was not found.
1758
 */
1759
1760
int CPLRemoveXMLChild(CPLXMLNode *psParent, CPLXMLNode *psChild)
1761
1762
0
{
1763
0
    if (psParent == nullptr)
1764
0
        return FALSE;
1765
1766
0
    CPLXMLNode *psLast = nullptr;
1767
0
    CPLXMLNode *psThis = nullptr;
1768
0
    for (psThis = psParent->psChild; psThis != nullptr; psThis = psThis->psNext)
1769
0
    {
1770
0
        if (psThis == psChild)
1771
0
        {
1772
0
            if (psLast == nullptr)
1773
0
                psParent->psChild = psThis->psNext;
1774
0
            else
1775
0
                psLast->psNext = psThis->psNext;
1776
1777
0
            psThis->psNext = nullptr;
1778
0
            return TRUE;
1779
0
        }
1780
0
        psLast = psThis;
1781
0
    }
1782
1783
0
    return FALSE;
1784
0
}
1785
1786
/************************************************************************/
1787
/*                          CPLAddXMLSibling()                          */
1788
/************************************************************************/
1789
1790
/**
1791
 * \brief Add new sibling.
1792
 *
1793
 * The passed psNewSibling is added to the end of siblings of the
1794
 * psOlderSibling node.  That is, it is added to the end of the psNext
1795
 * chain.  There is no special handling if psNewSibling is an attribute.
1796
 * If this is required, use CPLAddXMLChild().
1797
 *
1798
 * @param psOlderSibling the node to attach the sibling after.
1799
 *
1800
 * @param psNewSibling the node to add at the end of psOlderSiblings psNext
1801
 * chain.
1802
 */
1803
1804
void CPLAddXMLSibling(CPLXMLNode *psOlderSibling, CPLXMLNode *psNewSibling)
1805
1806
0
{
1807
0
    if (psOlderSibling == nullptr)
1808
0
        return;
1809
1810
0
    while (psOlderSibling->psNext != nullptr)
1811
0
        psOlderSibling = psOlderSibling->psNext;
1812
1813
0
    psOlderSibling->psNext = psNewSibling;
1814
0
}
1815
1816
/************************************************************************/
1817
/*                    CPLCreateXMLElementAndValue()                     */
1818
/************************************************************************/
1819
1820
/**
1821
 * \brief Create an element and text value.
1822
 *
1823
 * This is function is a convenient short form for:
1824
 *
1825
 * \code
1826
 *     CPLXMLNode *psTextNode;
1827
 *     CPLXMLNode *psElementNode;
1828
 *
1829
 *     psElementNode = CPLCreateXMLNode( psParent, CXT_Element, pszName );
1830
 *     psTextNode = CPLCreateXMLNode( psElementNode, CXT_Text, pszValue );
1831
 *
1832
 *     return psElementNode;
1833
 * \endcode
1834
 *
1835
 * It creates a CXT_Element node, with a CXT_Text child, and
1836
 * attaches the element to the passed parent.
1837
 *
1838
 * @param psParent the parent node to which the resulting node should
1839
 * be attached.  May be NULL to keep as freestanding.
1840
 *
1841
 * @param pszName the element name to create.
1842
 * @param pszValue the text to attach to the element. Must not be NULL.
1843
 *
1844
 * @return the pointer to the new element node.
1845
 */
1846
1847
CPLXMLNode *CPLCreateXMLElementAndValue(CPLXMLNode *psParent,
1848
                                        const char *pszName,
1849
                                        const char *pszValue)
1850
1851
0
{
1852
0
    CPLXMLNode *psElementNode =
1853
0
        CPLCreateXMLNode(psParent, CXT_Element, pszName);
1854
0
    CPLCreateXMLNode(psElementNode, CXT_Text, pszValue);
1855
1856
0
    return psElementNode;
1857
0
}
1858
1859
/************************************************************************/
1860
/*                    CPLCreateXMLElementAndValue()                     */
1861
/************************************************************************/
1862
1863
/**
1864
 * \brief Create an attribute and text value.
1865
 *
1866
 * This is function is a convenient short form for:
1867
 *
1868
 * \code
1869
 *   CPLXMLNode *psAttributeNode;
1870
 *
1871
 *   psAttributeNode = CPLCreateXMLNode( psParent, CXT_Attribute, pszName );
1872
 *   CPLCreateXMLNode( psAttributeNode, CXT_Text, pszValue );
1873
 * \endcode
1874
 *
1875
 * It creates a CXT_Attribute node, with a CXT_Text child, and
1876
 * attaches the element to the passed parent.
1877
 *
1878
 * @param psParent the parent node to which the resulting node should
1879
 * be attached.  Must not be NULL.
1880
 * @param pszName the attribute name to create.
1881
 * @param pszValue the text to attach to the attribute. Must not be NULL.
1882
 *
1883
 * @since GDAL 2.0
1884
 */
1885
1886
void CPLAddXMLAttributeAndValue(CPLXMLNode *psParent, const char *pszName,
1887
                                const char *pszValue)
1888
0
{
1889
0
    CPLAssert(psParent != nullptr);
1890
0
    CPLXMLNode *psAttributeNode =
1891
0
        CPLCreateXMLNode(psParent, CXT_Attribute, pszName);
1892
0
    CPLCreateXMLNode(psAttributeNode, CXT_Text, pszValue);
1893
0
}
1894
1895
/************************************************************************/
1896
/*                          CPLCloneXMLTree()                           */
1897
/************************************************************************/
1898
1899
/**
1900
 * \brief Copy tree.
1901
 *
1902
 * Creates a deep copy of a CPLXMLNode tree.
1903
 *
1904
 * @param psTree the tree to duplicate.
1905
 *
1906
 * @return a copy of the whole tree.
1907
 */
1908
1909
CPLXMLNode *CPLCloneXMLTree(const CPLXMLNode *psTree)
1910
1911
10.0k
{
1912
10.0k
    CPLXMLNode *psPrevious = nullptr;
1913
10.0k
    CPLXMLNode *psReturn = nullptr;
1914
1915
30.9k
    while (psTree != nullptr)
1916
20.8k
    {
1917
20.8k
        CPLXMLNode *psCopy =
1918
20.8k
            CPLCreateXMLNode(nullptr, psTree->eType, psTree->pszValue);
1919
20.8k
        if (psReturn == nullptr)
1920
10.0k
            psReturn = psCopy;
1921
20.8k
        if (psPrevious != nullptr)
1922
10.8k
            psPrevious->psNext = psCopy;
1923
1924
20.8k
        if (psTree->psChild != nullptr)
1925
9.98k
            psCopy->psChild = CPLCloneXMLTree(psTree->psChild);
1926
1927
20.8k
        psPrevious = psCopy;
1928
20.8k
        psTree = psTree->psNext;
1929
20.8k
    }
1930
1931
10.0k
    return psReturn;
1932
10.0k
}
1933
1934
/************************************************************************/
1935
/*                           CPLSetXMLValue()                           */
1936
/************************************************************************/
1937
1938
/**
1939
 * \brief Set element value by path.
1940
 *
1941
 * Find (or create) the target element or attribute specified in the
1942
 * path, and assign it the indicated value.
1943
 *
1944
 * Any path elements that do not already exist will be created.  The target
1945
 * nodes value (the first CXT_Text child) will be replaced with the provided
1946
 * value.
1947
 *
1948
 * If the target node is an attribute instead of an element, the name
1949
 * should be prefixed with a #.
1950
 *
1951
 * Example:
1952
 *   CPLSetXMLValue( "Citation.Id.Description", "DOQ dataset" );
1953
 *   CPLSetXMLValue( "Citation.Id.Description.#name", "doq" );
1954
 *
1955
 * @param psRoot the subdocument to be updated.
1956
 *
1957
 * @param pszPath the dot separated path to the target element/attribute.
1958
 *
1959
 * @param pszValue the text value to assign.
1960
 *
1961
 * @return TRUE on success.
1962
 */
1963
1964
int CPLSetXMLValue(CPLXMLNode *psRoot, const char *pszPath,
1965
                   const char *pszValue)
1966
1967
0
{
1968
0
    char **papszTokens = CSLTokenizeStringComplex(pszPath, ".", FALSE, FALSE);
1969
0
    int iToken = 0;
1970
1971
0
    while (papszTokens[iToken] != nullptr)
1972
0
    {
1973
0
        bool bIsAttribute = false;
1974
0
        const char *pszName = papszTokens[iToken];
1975
1976
0
        if (pszName[0] == '#')
1977
0
        {
1978
0
            bIsAttribute = true;
1979
0
            pszName++;
1980
0
        }
1981
1982
0
        if (psRoot->eType != CXT_Element)
1983
0
        {
1984
0
            CSLDestroy(papszTokens);
1985
0
            return FALSE;
1986
0
        }
1987
1988
0
        CPLXMLNode *psChild = nullptr;
1989
0
        for (psChild = psRoot->psChild; psChild != nullptr;
1990
0
             psChild = psChild->psNext)
1991
0
        {
1992
0
            if (psChild->eType != CXT_Text && EQUAL(pszName, psChild->pszValue))
1993
0
                break;
1994
0
        }
1995
1996
0
        if (psChild == nullptr)
1997
0
        {
1998
0
            if (bIsAttribute)
1999
0
                psChild = CPLCreateXMLNode(psRoot, CXT_Attribute, pszName);
2000
0
            else
2001
0
                psChild = CPLCreateXMLNode(psRoot, CXT_Element, pszName);
2002
0
        }
2003
2004
0
        psRoot = psChild;
2005
0
        iToken++;
2006
0
    }
2007
2008
0
    CSLDestroy(papszTokens);
2009
2010
    /* -------------------------------------------------------------------- */
2011
    /*      Find the "text" child if there is one.                          */
2012
    /* -------------------------------------------------------------------- */
2013
0
    CPLXMLNode *psTextChild = psRoot->psChild;
2014
2015
0
    while (psTextChild != nullptr && psTextChild->eType != CXT_Text)
2016
0
        psTextChild = psTextChild->psNext;
2017
2018
    /* -------------------------------------------------------------------- */
2019
    /*      Now set a value node under this node.                           */
2020
    /* -------------------------------------------------------------------- */
2021
2022
0
    if (psTextChild == nullptr)
2023
0
        CPLCreateXMLNode(psRoot, CXT_Text, pszValue);
2024
0
    else
2025
0
    {
2026
0
        CPLFree(psTextChild->pszValue);
2027
0
        psTextChild->pszValue = CPLStrdup(pszValue);
2028
0
    }
2029
2030
0
    return TRUE;
2031
0
}
2032
2033
/************************************************************************/
2034
/*                        CPLStripXMLNamespace()                        */
2035
/************************************************************************/
2036
2037
/**
2038
 * \brief Strip indicated namespaces.
2039
 *
2040
 * The subdocument (psRoot) is recursively examined, and any elements
2041
 * with the indicated namespace prefix will have the namespace prefix
2042
 * stripped from the element names.  If the passed namespace is NULL, then
2043
 * all namespace prefixes will be stripped.
2044
 *
2045
 * Nodes other than elements should remain unaffected.  The changes are
2046
 * made "in place", and should not alter any node locations, only the
2047
 * pszValue field of affected nodes.
2048
 *
2049
 * @param psRoot the document to operate on.
2050
 * @param pszNamespace the name space prefix (not including colon), or NULL.
2051
 * @param bRecurse TRUE to recurse over whole document, or FALSE to only
2052
 * operate on the passed node.
2053
 */
2054
2055
void CPLStripXMLNamespace(CPLXMLNode *psRoot, const char *pszNamespace,
2056
                          int bRecurse)
2057
2058
9.83k
{
2059
9.83k
    size_t nNameSpaceLen = (pszNamespace) ? strlen(pszNamespace) : 0;
2060
2061
30.4k
    while (psRoot != nullptr)
2062
20.6k
    {
2063
20.6k
        if (psRoot->eType == CXT_Element || psRoot->eType == CXT_Attribute)
2064
10.2k
        {
2065
10.2k
            if (pszNamespace != nullptr)
2066
0
            {
2067
0
                if (EQUALN(pszNamespace, psRoot->pszValue, nNameSpaceLen) &&
2068
0
                    psRoot->pszValue[nNameSpaceLen] == ':')
2069
0
                {
2070
0
                    memmove(psRoot->pszValue,
2071
0
                            psRoot->pszValue + nNameSpaceLen + 1,
2072
0
                            strlen(psRoot->pszValue + nNameSpaceLen + 1) + 1);
2073
0
                }
2074
0
            }
2075
10.2k
            else
2076
10.2k
            {
2077
82.3k
                for (const char *pszCheck = psRoot->pszValue; *pszCheck != '\0';
2078
72.1k
                     pszCheck++)
2079
72.4k
                {
2080
72.4k
                    if (*pszCheck == ':')
2081
326
                    {
2082
326
                        memmove(psRoot->pszValue, pszCheck + 1,
2083
326
                                strlen(pszCheck + 1) + 1);
2084
326
                        break;
2085
326
                    }
2086
72.4k
                }
2087
10.2k
            }
2088
10.2k
        }
2089
2090
20.6k
        if (bRecurse)
2091
20.6k
        {
2092
20.6k
            if (psRoot->psChild != nullptr)
2093
9.81k
                CPLStripXMLNamespace(psRoot->psChild, pszNamespace, 1);
2094
2095
20.6k
            psRoot = psRoot->psNext;
2096
20.6k
        }
2097
0
        else
2098
0
        {
2099
0
            break;
2100
0
        }
2101
20.6k
    }
2102
9.83k
}
2103
2104
/************************************************************************/
2105
/*                          CPLParseXMLFile()                           */
2106
/************************************************************************/
2107
2108
/**
2109
 * \brief Parse XML file into tree.
2110
 *
2111
 * The named file is opened, loaded into memory as a big string, and
2112
 * parsed with CPLParseXMLString().  Errors in reading the file or parsing
2113
 * the XML will be reported by CPLError().
2114
 *
2115
 * The "large file" API is used, so XML files can come from virtualized
2116
 * files.
2117
 *
2118
 * @param pszFilename the file to open.
2119
 *
2120
 * @return NULL on failure, or the document tree on success.
2121
 */
2122
2123
CPLXMLNode *CPLParseXMLFile(const char *pszFilename)
2124
2125
368
{
2126
    /* -------------------------------------------------------------------- */
2127
    /*      Ingest the file.                                                */
2128
    /* -------------------------------------------------------------------- */
2129
368
    GByte *pabyOut = nullptr;
2130
368
    if (!VSIIngestFile(nullptr, pszFilename, &pabyOut, nullptr, -1))
2131
0
        return nullptr;
2132
2133
368
    char *pszDoc = reinterpret_cast<char *>(pabyOut);
2134
2135
    /* -------------------------------------------------------------------- */
2136
    /*      Parse it.                                                       */
2137
    /* -------------------------------------------------------------------- */
2138
368
    CPLXMLNode *psTree = CPLParseXMLString(pszDoc);
2139
368
    CPLFree(pszDoc);
2140
2141
368
    return psTree;
2142
368
}
2143
2144
/************************************************************************/
2145
/*                     CPLSerializeXMLTreeToFile()                      */
2146
/************************************************************************/
2147
2148
/**
2149
 * \brief Write document tree to a file.
2150
 *
2151
 * The passed document tree is converted into one big string (with
2152
 * CPLSerializeXMLTree()) and then written to the named file.  Errors writing
2153
 * the file will be reported by CPLError().  The source document tree is
2154
 * not altered.  If the output file already exists it will be overwritten.
2155
 *
2156
 * @param psTree the document tree to write.
2157
 * @param pszFilename the name of the file to write to.
2158
 * @return TRUE on success, FALSE otherwise.
2159
 */
2160
2161
int CPLSerializeXMLTreeToFile(const CPLXMLNode *psTree, const char *pszFilename)
2162
2163
0
{
2164
    /* -------------------------------------------------------------------- */
2165
    /*      Serialize document.                                             */
2166
    /* -------------------------------------------------------------------- */
2167
0
    char *pszDoc = CPLSerializeXMLTree(psTree);
2168
0
    if (pszDoc == nullptr)
2169
0
        return FALSE;
2170
2171
0
    const vsi_l_offset nLength = strlen(pszDoc);
2172
2173
    /* -------------------------------------------------------------------- */
2174
    /*      Create file.                                                    */
2175
    /* -------------------------------------------------------------------- */
2176
0
    VSILFILE *fp = VSIFOpenL(pszFilename, "wt");
2177
0
    if (fp == nullptr)
2178
0
    {
2179
0
        CPLError(CE_Failure, CPLE_OpenFailed, "Failed to open %.500s to write.",
2180
0
                 pszFilename);
2181
0
        CPLFree(pszDoc);
2182
0
        return FALSE;
2183
0
    }
2184
2185
    /* -------------------------------------------------------------------- */
2186
    /*      Write file.                                                     */
2187
    /* -------------------------------------------------------------------- */
2188
0
    if (VSIFWriteL(pszDoc, 1, static_cast<size_t>(nLength), fp) != nLength)
2189
0
    {
2190
0
        CPLError(CE_Failure, CPLE_FileIO,
2191
0
                 "Failed to write whole XML document (%.500s).", pszFilename);
2192
0
        CPL_IGNORE_RET_VAL(VSIFCloseL(fp));
2193
0
        CPLFree(pszDoc);
2194
0
        return FALSE;
2195
0
    }
2196
2197
    /* -------------------------------------------------------------------- */
2198
    /*      Cleanup                                                         */
2199
    /* -------------------------------------------------------------------- */
2200
0
    const bool bRet = VSIFCloseL(fp) == 0;
2201
0
    if (!bRet)
2202
0
    {
2203
0
        CPLError(CE_Failure, CPLE_FileIO,
2204
0
                 "Failed to write whole XML document (%.500s).", pszFilename);
2205
0
    }
2206
0
    CPLFree(pszDoc);
2207
2208
0
    return bRet;
2209
0
}
2210
2211
/************************************************************************/
2212
/*                       CPLCleanXMLElementName()                       */
2213
/************************************************************************/
2214
2215
/**
2216
 * \brief Make string into safe XML token.
2217
 *
2218
 * Modifies a string in place to try and make it into a legal
2219
 * XML token that can be used as an element name.   This is accomplished
2220
 * by changing any characters not legal in a token into an underscore.
2221
 *
2222
 * NOTE: This function should implement the rules in section 2.3 of
2223
 * http://www.w3.org/TR/xml11/ but it doesn't yet do that properly.  We
2224
 * only do a rough approximation of that.
2225
 *
2226
 * @param pszTarget the string to be adjusted.  It is altered in place.
2227
 */
2228
2229
void CPLCleanXMLElementName(char *pszTarget)
2230
0
{
2231
0
    if (pszTarget == nullptr)
2232
0
        return;
2233
2234
0
    for (; *pszTarget != '\0'; pszTarget++)
2235
0
    {
2236
0
        if ((static_cast<unsigned char>(*pszTarget) & 0x80) ||
2237
0
            isalnum(static_cast<unsigned char>(*pszTarget)) ||
2238
0
            *pszTarget == '_' || *pszTarget == '.')
2239
0
        {
2240
            // Ok.
2241
0
        }
2242
0
        else
2243
0
        {
2244
0
            *pszTarget = '_';
2245
0
        }
2246
0
    }
2247
0
}
2248
2249
/************************************************************************/
2250
/*                     CPLXMLNodeGetRAMUsageEstimate()                  */
2251
/************************************************************************/
2252
2253
static size_t CPLXMLNodeGetRAMUsageEstimate(const CPLXMLNode *psNode,
2254
                                            bool bVisitSiblings)
2255
0
{
2256
0
    size_t nRet = sizeof(CPLXMLNode);
2257
    // malloc() aligns on 16-byte boundaries on 64 bit.
2258
0
    nRet += std::max(2 * sizeof(void *), strlen(psNode->pszValue) + 1);
2259
0
    if (bVisitSiblings)
2260
0
    {
2261
0
        for (const CPLXMLNode *psIter = psNode->psNext; psIter;
2262
0
             psIter = psIter->psNext)
2263
0
        {
2264
0
            nRet += CPLXMLNodeGetRAMUsageEstimate(psIter, false);
2265
0
        }
2266
0
    }
2267
0
    if (psNode->psChild)
2268
0
    {
2269
0
        nRet += CPLXMLNodeGetRAMUsageEstimate(psNode->psChild, true);
2270
0
    }
2271
0
    return nRet;
2272
0
}
2273
2274
/** Return a conservative estimate of the RAM usage of this node, its children
2275
 * and siblings. The returned values is in bytes.
2276
 *
2277
 * @since 3.9
2278
 */
2279
size_t CPLXMLNodeGetRAMUsageEstimate(const CPLXMLNode *psNode)
2280
0
{
2281
0
    return CPLXMLNodeGetRAMUsageEstimate(psNode, true);
2282
0
}
2283
2284
/************************************************************************/
2285
/*            CPLXMLTreeCloser::getDocumentElement()                    */
2286
/************************************************************************/
2287
2288
CPLXMLNode *CPLXMLTreeCloser::getDocumentElement()
2289
0
{
2290
0
    CPLXMLNode *doc = get();
2291
    // skip the Declaration and assume the next is the root element
2292
0
    while (doc != nullptr &&
2293
0
           (doc->eType != CXT_Element || doc->pszValue[0] == '?'))
2294
0
    {
2295
0
        doc = doc->psNext;
2296
0
    }
2297
0
    return doc;
2298
0
}