Coverage Report

Created: 2025-06-13 06:29

/src/gdal/port/cpl_minixml.cpp
Line
Count
Source (jump to first uncovered line)
1
/**********************************************************************
2
 *
3
 * Project:  CPL - Common Portability Library
4
 * Purpose:  Implementation of MiniXML Parser and handling.
5
 * Author:   Frank Warmerdam, warmerdam@pobox.com
6
 *
7
 **********************************************************************
8
 * Copyright (c) 2001, Frank Warmerdam
9
 * Copyright (c) 2007-2013, Even Rouault <even dot rouault at spatialys.com>
10
 *
11
 * SPDX-License-Identifier: MIT
12
 **********************************************************************
13
 *
14
 * Independent Security Audit 2003/04/05 Andrey Kiselev:
15
 *   Completed audit of this module. Any documents may be parsed without
16
 *   buffer overflows and stack corruptions.
17
 *
18
 * Security Audit 2003/03/28 warmerda:
19
 *   Completed security audit.  I believe that this module may be safely used
20
 *   to parse, and serialize arbitrary documents provided by a potentially
21
 *   hostile source.
22
 *
23
 */
24
25
#include "cpl_minixml.h"
26
27
#include <cctype>
28
#include <climits>
29
#include <cstddef>
30
#include <cstdio>
31
#include <cstring>
32
33
#include <algorithm>
34
35
#include "cpl_conv.h"
36
#include "cpl_error.h"
37
#include "cpl_string.h"
38
#include "cpl_vsi.h"
39
40
typedef enum
41
{
42
    TNone,
43
    TString,
44
    TOpen,
45
    TClose,
46
    TEqual,
47
    TToken,
48
    TSlashClose,
49
    TQuestionClose,
50
    TComment,
51
    TLiteral
52
} XMLTokenType;
53
54
typedef struct
55
{
56
    CPLXMLNode *psFirstNode;
57
    CPLXMLNode *psLastChild;
58
} StackContext;
59
60
typedef struct
61
{
62
    const char *pszInput;
63
    int nInputOffset;
64
    int nInputLine;
65
    bool bInElement;
66
    XMLTokenType eTokenType;
67
    char *pszToken;
68
    size_t nTokenMaxSize;
69
    size_t nTokenSize;
70
71
    int nStackMaxSize;
72
    int nStackSize;
73
    StackContext *papsStack;
74
75
    CPLXMLNode *psFirstNode;
76
    CPLXMLNode *psLastNode;
77
} ParseContext;
78
79
static CPLXMLNode *_CPLCreateXMLNode(CPLXMLNode *poParent, CPLXMLNodeType eType,
80
                                     const char *pszText);
81
82
/************************************************************************/
83
/*                              ReadChar()                              */
84
/************************************************************************/
85
86
static CPL_INLINE char ReadChar(ParseContext *psContext)
87
88
875
{
89
875
    const char chReturn = psContext->pszInput[psContext->nInputOffset++];
90
91
875
    if (chReturn == '\0')
92
875
        psContext->nInputOffset--;
93
0
    else if (chReturn == 10)
94
0
        psContext->nInputLine++;
95
96
875
    return chReturn;
97
875
}
98
99
/************************************************************************/
100
/*                             UnreadChar()                             */
101
/************************************************************************/
102
103
static CPL_INLINE void UnreadChar(ParseContext *psContext, char chToUnread)
104
105
0
{
106
0
    if (chToUnread == '\0')
107
0
        return;
108
109
0
    CPLAssert(chToUnread == psContext->pszInput[psContext->nInputOffset - 1]);
110
111
0
    psContext->nInputOffset--;
112
113
0
    if (chToUnread == 10)
114
0
        psContext->nInputLine--;
115
0
}
116
117
/************************************************************************/
118
/*                           ReallocToken()                             */
119
/************************************************************************/
120
121
static bool ReallocToken(ParseContext *psContext)
122
0
{
123
0
    if (psContext->nTokenMaxSize > INT_MAX / 2)
124
0
    {
125
0
        CPLError(CE_Failure, CPLE_OutOfMemory,
126
0
                 "Out of memory allocating %d*2 bytes",
127
0
                 static_cast<int>(psContext->nTokenMaxSize));
128
0
        VSIFree(psContext->pszToken);
129
0
        psContext->pszToken = nullptr;
130
0
        return false;
131
0
    }
132
133
0
    psContext->nTokenMaxSize *= 2;
134
0
    char *pszToken = static_cast<char *>(
135
0
        VSIRealloc(psContext->pszToken, psContext->nTokenMaxSize));
136
0
    if (pszToken == nullptr)
137
0
    {
138
0
        CPLError(CE_Failure, CPLE_OutOfMemory,
139
0
                 "Out of memory allocating %d bytes",
140
0
                 static_cast<int>(psContext->nTokenMaxSize));
141
0
        VSIFree(psContext->pszToken);
142
0
        psContext->pszToken = nullptr;
143
0
        return false;
144
0
    }
145
0
    psContext->pszToken = pszToken;
146
0
    return true;
147
0
}
148
149
/************************************************************************/
150
/*                             AddToToken()                             */
151
/************************************************************************/
152
153
static CPL_INLINE bool _AddToToken(ParseContext *psContext, char chNewChar)
154
155
0
{
156
0
    if (psContext->nTokenSize >= psContext->nTokenMaxSize - 2)
157
0
    {
158
0
        if (!ReallocToken(psContext))
159
0
            return false;
160
0
    }
161
162
0
    psContext->pszToken[psContext->nTokenSize++] = chNewChar;
163
0
    psContext->pszToken[psContext->nTokenSize] = '\0';
164
0
    return true;
165
0
}
166
167
// TODO(schwehr): Remove the goto.
168
#define AddToToken(psContext, chNewChar)                                       \
169
0
    if (!_AddToToken(psContext, chNewChar))                                    \
170
0
        goto fail;
171
172
/************************************************************************/
173
/*                             ReadToken()                              */
174
/************************************************************************/
175
176
static XMLTokenType ReadToken(ParseContext *psContext, CPLErr &eLastErrorType)
177
178
875
{
179
875
    psContext->nTokenSize = 0;
180
875
    psContext->pszToken[0] = '\0';
181
182
875
    char chNext = ReadChar(psContext);
183
875
    while (isspace(static_cast<unsigned char>(chNext)))
184
0
        chNext = ReadChar(psContext);
185
186
    /* -------------------------------------------------------------------- */
187
    /*      Handle comments.                                                */
188
    /* -------------------------------------------------------------------- */
189
875
    if (chNext == '<' &&
190
875
        STARTS_WITH_CI(psContext->pszInput + psContext->nInputOffset, "!--"))
191
0
    {
192
0
        psContext->eTokenType = TComment;
193
194
        // Skip "!--" characters.
195
0
        ReadChar(psContext);
196
0
        ReadChar(psContext);
197
0
        ReadChar(psContext);
198
199
0
        while (!STARTS_WITH_CI(psContext->pszInput + psContext->nInputOffset,
200
0
                               "-->") &&
201
0
               (chNext = ReadChar(psContext)) != '\0')
202
0
            AddToToken(psContext, chNext);
203
204
        // Skip "-->" characters.
205
0
        ReadChar(psContext);
206
0
        ReadChar(psContext);
207
0
        ReadChar(psContext);
208
0
    }
209
    /* -------------------------------------------------------------------- */
210
    /*      Handle DOCTYPE.                                                 */
211
    /* -------------------------------------------------------------------- */
212
875
    else if (chNext == '<' &&
213
875
             STARTS_WITH_CI(psContext->pszInput + psContext->nInputOffset,
214
875
                            "!DOCTYPE"))
215
0
    {
216
0
        bool bInQuotes = false;
217
0
        psContext->eTokenType = TLiteral;
218
219
0
        AddToToken(psContext, '<');
220
0
        do
221
0
        {
222
0
            chNext = ReadChar(psContext);
223
0
            if (chNext == '\0')
224
0
            {
225
0
                eLastErrorType = CE_Failure;
226
0
                CPLError(eLastErrorType, CPLE_AppDefined,
227
0
                         "Parse error in DOCTYPE on or before line %d, "
228
0
                         "reached end of file without '>'.",
229
0
                         psContext->nInputLine);
230
231
0
                break;
232
0
            }
233
234
            /* The markup declaration block within a DOCTYPE tag consists of:
235
             * - a left square bracket [
236
             * - a list of declarations
237
             * - a right square bracket ]
238
             * Example:
239
             * <!DOCTYPE RootElement [ ...declarations... ]>
240
             */
241
0
            if (chNext == '[')
242
0
            {
243
0
                AddToToken(psContext, chNext);
244
245
0
                do
246
0
                {
247
0
                    chNext = ReadChar(psContext);
248
0
                    if (chNext == ']')
249
0
                        break;
250
0
                    AddToToken(psContext, chNext);
251
0
                } while (chNext != '\0' &&
252
0
                         !STARTS_WITH_CI(psContext->pszInput +
253
0
                                             psContext->nInputOffset,
254
0
                                         "]>"));
255
256
0
                if (chNext == '\0')
257
0
                {
258
0
                    eLastErrorType = CE_Failure;
259
0
                    CPLError(eLastErrorType, CPLE_AppDefined,
260
0
                             "Parse error in DOCTYPE on or before line %d, "
261
0
                             "reached end of file without ']'.",
262
0
                             psContext->nInputLine);
263
0
                    break;
264
0
                }
265
266
0
                if (chNext != ']')
267
0
                {
268
0
                    chNext = ReadChar(psContext);
269
0
                    AddToToken(psContext, chNext);
270
271
                    // Skip ">" character, will be consumed below.
272
0
                    chNext = ReadChar(psContext);
273
0
                }
274
0
            }
275
276
0
            if (chNext == '\"')
277
0
                bInQuotes = !bInQuotes;
278
279
0
            if (chNext == '>' && !bInQuotes)
280
0
            {
281
0
                AddToToken(psContext, '>');
282
0
                break;
283
0
            }
284
285
0
            AddToToken(psContext, chNext);
286
0
        } while (true);
287
0
    }
288
    /* -------------------------------------------------------------------- */
289
    /*      Handle CDATA.                                                   */
290
    /* -------------------------------------------------------------------- */
291
875
    else if (chNext == '<' &&
292
875
             STARTS_WITH_CI(psContext->pszInput + psContext->nInputOffset,
293
875
                            "![CDATA["))
294
0
    {
295
0
        psContext->eTokenType = TString;
296
297
        // Skip !CDATA[
298
0
        ReadChar(psContext);
299
0
        ReadChar(psContext);
300
0
        ReadChar(psContext);
301
0
        ReadChar(psContext);
302
0
        ReadChar(psContext);
303
0
        ReadChar(psContext);
304
0
        ReadChar(psContext);
305
0
        ReadChar(psContext);
306
307
0
        while (!STARTS_WITH_CI(psContext->pszInput + psContext->nInputOffset,
308
0
                               "]]>") &&
309
0
               (chNext = ReadChar(psContext)) != '\0')
310
0
            AddToToken(psContext, chNext);
311
312
        // Skip "]]>" characters.
313
0
        ReadChar(psContext);
314
0
        ReadChar(psContext);
315
0
        ReadChar(psContext);
316
0
    }
317
    /* -------------------------------------------------------------------- */
318
    /*      Simple single tokens of interest.                               */
319
    /* -------------------------------------------------------------------- */
320
875
    else if (chNext == '<' && !psContext->bInElement)
321
0
    {
322
0
        psContext->eTokenType = TOpen;
323
0
        psContext->bInElement = true;
324
0
    }
325
875
    else if (chNext == '>' && psContext->bInElement)
326
0
    {
327
0
        psContext->eTokenType = TClose;
328
0
        psContext->bInElement = false;
329
0
    }
330
875
    else if (chNext == '=' && psContext->bInElement)
331
0
    {
332
0
        psContext->eTokenType = TEqual;
333
0
    }
334
875
    else if (chNext == '\0')
335
875
    {
336
875
        psContext->eTokenType = TNone;
337
875
    }
338
    /* -------------------------------------------------------------------- */
339
    /*      Handle the /> token terminator.                                 */
340
    /* -------------------------------------------------------------------- */
341
0
    else if (chNext == '/' && psContext->bInElement &&
342
0
             psContext->pszInput[psContext->nInputOffset] == '>')
343
0
    {
344
0
        chNext = ReadChar(psContext);
345
0
        (void)chNext;
346
0
        CPLAssert(chNext == '>');
347
348
0
        psContext->eTokenType = TSlashClose;
349
0
        psContext->bInElement = false;
350
0
    }
351
    /* -------------------------------------------------------------------- */
352
    /*      Handle the ?> token terminator.                                 */
353
    /* -------------------------------------------------------------------- */
354
0
    else if (chNext == '?' && psContext->bInElement &&
355
0
             psContext->pszInput[psContext->nInputOffset] == '>')
356
0
    {
357
0
        chNext = ReadChar(psContext);
358
0
        (void)chNext;
359
0
        CPLAssert(chNext == '>');
360
361
0
        psContext->eTokenType = TQuestionClose;
362
0
        psContext->bInElement = false;
363
0
    }
364
    /* -------------------------------------------------------------------- */
365
    /*      Collect a quoted string.                                        */
366
    /* -------------------------------------------------------------------- */
367
0
    else if (psContext->bInElement && chNext == '"')
368
0
    {
369
0
        psContext->eTokenType = TString;
370
371
0
        while ((chNext = ReadChar(psContext)) != '"' && chNext != '\0')
372
0
            AddToToken(psContext, chNext);
373
374
0
        if (chNext != '"')
375
0
        {
376
0
            psContext->eTokenType = TNone;
377
0
            eLastErrorType = CE_Failure;
378
0
            CPLError(
379
0
                eLastErrorType, CPLE_AppDefined,
380
0
                "Parse error on line %d, reached EOF before closing quote.",
381
0
                psContext->nInputLine);
382
0
        }
383
384
        // Do we need to unescape it?
385
0
        if (strchr(psContext->pszToken, '&') != nullptr)
386
0
        {
387
0
            int nLength = 0;
388
0
            char *pszUnescaped =
389
0
                CPLUnescapeString(psContext->pszToken, &nLength, CPLES_XML);
390
0
            strcpy(psContext->pszToken, pszUnescaped);
391
0
            CPLFree(pszUnescaped);
392
0
            psContext->nTokenSize = strlen(psContext->pszToken);
393
0
        }
394
0
    }
395
0
    else if (psContext->bInElement && chNext == '\'')
396
0
    {
397
0
        psContext->eTokenType = TString;
398
399
0
        while ((chNext = ReadChar(psContext)) != '\'' && chNext != '\0')
400
0
            AddToToken(psContext, chNext);
401
402
0
        if (chNext != '\'')
403
0
        {
404
0
            psContext->eTokenType = TNone;
405
0
            eLastErrorType = CE_Failure;
406
0
            CPLError(
407
0
                eLastErrorType, CPLE_AppDefined,
408
0
                "Parse error on line %d, reached EOF before closing quote.",
409
0
                psContext->nInputLine);
410
0
        }
411
412
        // Do we need to unescape it?
413
0
        if (strchr(psContext->pszToken, '&') != nullptr)
414
0
        {
415
0
            int nLength = 0;
416
0
            char *pszUnescaped =
417
0
                CPLUnescapeString(psContext->pszToken, &nLength, CPLES_XML);
418
0
            strcpy(psContext->pszToken, pszUnescaped);
419
0
            CPLFree(pszUnescaped);
420
0
            psContext->nTokenSize = strlen(psContext->pszToken);
421
0
        }
422
0
    }
423
    /* -------------------------------------------------------------------- */
424
    /*      Collect an unquoted string, terminated by a open angle          */
425
    /*      bracket.                                                        */
426
    /* -------------------------------------------------------------------- */
427
0
    else if (!psContext->bInElement)
428
0
    {
429
0
        psContext->eTokenType = TString;
430
431
0
        AddToToken(psContext, chNext);
432
0
        while ((chNext = ReadChar(psContext)) != '<' && chNext != '\0')
433
0
            AddToToken(psContext, chNext);
434
0
        UnreadChar(psContext, chNext);
435
436
        // Do we need to unescape it?
437
0
        if (strchr(psContext->pszToken, '&') != nullptr)
438
0
        {
439
0
            int nLength = 0;
440
0
            char *pszUnescaped =
441
0
                CPLUnescapeString(psContext->pszToken, &nLength, CPLES_XML);
442
0
            strcpy(psContext->pszToken, pszUnescaped);
443
0
            CPLFree(pszUnescaped);
444
0
            psContext->nTokenSize = strlen(psContext->pszToken);
445
0
        }
446
0
    }
447
448
    /* -------------------------------------------------------------------- */
449
    /*      Collect a regular token terminated by white space, or           */
450
    /*      special character(s) like an equal sign.                        */
451
    /* -------------------------------------------------------------------- */
452
0
    else
453
0
    {
454
0
        psContext->eTokenType = TToken;
455
456
        // Add the first character to the token regardless of what it is.
457
0
        AddToToken(psContext, chNext);
458
459
0
        for (chNext = ReadChar(psContext);
460
0
             (chNext >= 'A' && chNext <= 'Z') ||
461
0
             (chNext >= 'a' && chNext <= 'z') || chNext == '-' ||
462
0
             chNext == '_' || chNext == '.' || chNext == ':' ||
463
0
             (chNext >= '0' && chNext <= '9');
464
0
             chNext = ReadChar(psContext))
465
0
        {
466
0
            AddToToken(psContext, chNext);
467
0
        }
468
469
0
        UnreadChar(psContext, chNext);
470
0
    }
471
472
875
    return psContext->eTokenType;
473
474
0
fail:
475
0
    psContext->eTokenType = TNone;
476
0
    return TNone;
477
875
}
478
479
/************************************************************************/
480
/*                              PushNode()                              */
481
/************************************************************************/
482
483
static bool PushNode(ParseContext *psContext, CPLXMLNode *psNode,
484
                     CPLErr &eLastErrorType)
485
486
0
{
487
0
    if (psContext->nStackMaxSize <= psContext->nStackSize)
488
0
    {
489
        // Somewhat arbitrary number.
490
0
        if (psContext->nStackMaxSize >= 10000)
491
0
        {
492
0
            eLastErrorType = CE_Failure;
493
0
            CPLError(CE_Failure, CPLE_NotSupported,
494
0
                     "XML element depth beyond 10000. Giving up");
495
0
            VSIFree(psContext->papsStack);
496
0
            psContext->papsStack = nullptr;
497
0
            return false;
498
0
        }
499
0
        psContext->nStackMaxSize += 10;
500
501
0
        StackContext *papsStack = static_cast<StackContext *>(
502
0
            VSIRealloc(psContext->papsStack,
503
0
                       sizeof(StackContext) * psContext->nStackMaxSize));
504
0
        if (papsStack == nullptr)
505
0
        {
506
0
            eLastErrorType = CE_Failure;
507
0
            CPLError(CE_Failure, CPLE_OutOfMemory,
508
0
                     "Out of memory allocating %d bytes",
509
0
                     static_cast<int>(sizeof(StackContext)) *
510
0
                         psContext->nStackMaxSize);
511
0
            VSIFree(psContext->papsStack);
512
0
            psContext->papsStack = nullptr;
513
0
            return false;
514
0
        }
515
0
        psContext->papsStack = papsStack;
516
0
    }
517
0
#ifdef DEBUG
518
    // To make Coverity happy, but cannot happen.
519
0
    if (psContext->papsStack == nullptr)
520
0
        return false;
521
0
#endif
522
523
0
    psContext->papsStack[psContext->nStackSize].psFirstNode = psNode;
524
0
    psContext->papsStack[psContext->nStackSize].psLastChild = nullptr;
525
0
    psContext->nStackSize++;
526
527
0
    return true;
528
0
}
529
530
/************************************************************************/
531
/*                             AttachNode()                             */
532
/*                                                                      */
533
/*      Attach the passed node as a child of the current node.          */
534
/*      Special handling exists for adding siblings to psFirst if       */
535
/*      there is nothing on the stack.                                  */
536
/************************************************************************/
537
538
static void AttachNode(ParseContext *psContext, CPLXMLNode *psNode)
539
540
0
{
541
0
    if (psContext->psFirstNode == nullptr)
542
0
    {
543
0
        psContext->psFirstNode = psNode;
544
0
        psContext->psLastNode = psNode;
545
0
    }
546
0
    else if (psContext->nStackSize == 0)
547
0
    {
548
0
        psContext->psLastNode->psNext = psNode;
549
0
        psContext->psLastNode = psNode;
550
0
    }
551
0
    else
552
0
    {
553
0
        if (psContext->papsStack[psContext->nStackSize - 1]
554
0
                .psFirstNode->psChild == nullptr)
555
0
        {
556
0
            psContext->papsStack[psContext->nStackSize - 1]
557
0
                .psFirstNode->psChild = psNode;
558
0
        }
559
0
        else
560
0
        {
561
0
            psContext->papsStack[psContext->nStackSize - 1]
562
0
                .psLastChild->psNext = psNode;
563
0
        }
564
0
        psContext->papsStack[psContext->nStackSize - 1].psLastChild = psNode;
565
0
    }
566
0
}
567
568
/************************************************************************/
569
/*                         CPLParseXMLString()                          */
570
/************************************************************************/
571
572
/**
573
 * \brief Parse an XML string into tree form.
574
 *
575
 * The passed document is parsed into a CPLXMLNode tree representation.
576
 * If the document is not well formed XML then NULL is returned, and errors
577
 * are reported via CPLError().  No validation beyond wellformedness is
578
 * done.  The CPLParseXMLFile() convenience function can be used to parse
579
 * from a file.
580
 *
581
 * The returned document tree is owned by the caller and should be freed
582
 * with CPLDestroyXMLNode() when no longer needed.
583
 *
584
 * If the document has more than one "root level" element then those after the
585
 * first will be attached to the first as siblings (via the psNext pointers)
586
 * even though there is no common parent.  A document with no XML structure
587
 * (no angle brackets for instance) would be considered well formed, and
588
 * returned as a single CXT_Text node.
589
 *
590
 * @param pszString the document to parse.
591
 *
592
 * @return parsed tree or NULL on error.
593
 */
594
595
CPLXMLNode *CPLParseXMLString(const char *pszString)
596
597
875
{
598
875
    if (pszString == nullptr)
599
0
    {
600
0
        CPLError(CE_Failure, CPLE_AppDefined,
601
0
                 "CPLParseXMLString() called with NULL pointer.");
602
0
        return nullptr;
603
0
    }
604
605
    // Save back error context.
606
875
    const CPLErr eErrClass = CPLGetLastErrorType();
607
875
    const CPLErrorNum nErrNum = CPLGetLastErrorNo();
608
875
    const CPLString osErrMsg = CPLGetLastErrorMsg();
609
610
    // Reset it now.
611
875
    CPLErrorSetState(CE_None, CPLE_AppDefined, "");
612
613
    /* -------------------------------------------------------------------- */
614
    /*      Check for a UTF-8 BOM and skip if found                         */
615
    /*                                                                      */
616
    /*      TODO: BOM is variable-length parameter and depends on encoding. */
617
    /*            Add BOM detection for other encodings.                    */
618
    /* -------------------------------------------------------------------- */
619
620
    // Used to skip to actual beginning of XML data.
621
875
    if ((static_cast<unsigned char>(pszString[0]) == 0xEF) &&
622
875
        (static_cast<unsigned char>(pszString[1]) == 0xBB) &&
623
875
        (static_cast<unsigned char>(pszString[2]) == 0xBF))
624
0
    {
625
0
        pszString += 3;
626
0
    }
627
628
    /* -------------------------------------------------------------------- */
629
    /*      Initialize parse context.                                       */
630
    /* -------------------------------------------------------------------- */
631
875
    ParseContext sContext;
632
875
    sContext.pszInput = pszString;
633
875
    sContext.nInputOffset = 0;
634
875
    sContext.nInputLine = 0;
635
875
    sContext.bInElement = false;
636
875
    sContext.nTokenMaxSize = 10;
637
875
    sContext.pszToken = static_cast<char *>(VSIMalloc(sContext.nTokenMaxSize));
638
875
    if (sContext.pszToken == nullptr)
639
0
        return nullptr;
640
875
    sContext.nTokenSize = 0;
641
875
    sContext.eTokenType = TNone;
642
875
    sContext.nStackMaxSize = 0;
643
875
    sContext.nStackSize = 0;
644
875
    sContext.papsStack = nullptr;
645
875
    sContext.psFirstNode = nullptr;
646
875
    sContext.psLastNode = nullptr;
647
648
875
#ifdef DEBUG
649
875
    bool bRecoverableError = true;
650
875
#endif
651
875
    CPLErr eLastErrorType = CE_None;
652
653
    /* ==================================================================== */
654
    /*      Loop reading tokens.                                            */
655
    /* ==================================================================== */
656
875
    while (ReadToken(&sContext, eLastErrorType) != TNone)
657
0
    {
658
0
    loop_beginning:
659
        /* --------------------------------------------------------------------
660
         */
661
        /*      Create a new element. */
662
        /* --------------------------------------------------------------------
663
         */
664
0
        if (sContext.eTokenType == TOpen)
665
0
        {
666
0
            if (ReadToken(&sContext, eLastErrorType) != TToken)
667
0
            {
668
0
                eLastErrorType = CE_Failure;
669
0
                CPLError(eLastErrorType, CPLE_AppDefined,
670
0
                         "Line %d: Didn't find element token after "
671
0
                         "open angle bracket.",
672
0
                         sContext.nInputLine);
673
0
                break;
674
0
            }
675
676
0
            CPLXMLNode *psElement = nullptr;
677
0
            if (sContext.pszToken[0] != '/')
678
0
            {
679
0
                psElement =
680
0
                    _CPLCreateXMLNode(nullptr, CXT_Element, sContext.pszToken);
681
0
                if (!psElement)
682
0
                    break;
683
0
                AttachNode(&sContext, psElement);
684
0
                if (!PushNode(&sContext, psElement, eLastErrorType))
685
0
                    break;
686
0
            }
687
0
            else
688
0
            {
689
0
                if (sContext.nStackSize == 0 ||
690
0
                    !EQUAL(sContext.pszToken + 1,
691
0
                           sContext.papsStack[sContext.nStackSize - 1]
692
0
                               .psFirstNode->pszValue))
693
0
                {
694
0
#ifdef DEBUG
695
                    // Makes life of fuzzers easier if we accept somewhat
696
                    // corrupted XML like <foo> ... </not_foo>.
697
0
                    if (CPLTestBool(
698
0
                            CPLGetConfigOption("CPL_MINIXML_RELAXED", "FALSE")))
699
0
                    {
700
0
                        eLastErrorType = CE_Warning;
701
0
                        CPLError(
702
0
                            eLastErrorType, CPLE_AppDefined,
703
0
                            "Line %d: <%.500s> doesn't have matching <%.500s>.",
704
0
                            sContext.nInputLine, sContext.pszToken,
705
0
                            sContext.pszToken + 1);
706
0
                        if (sContext.nStackSize == 0)
707
0
                            break;
708
0
                        goto end_processing_close;
709
0
                    }
710
0
                    else
711
0
#endif
712
0
                    {
713
0
                        eLastErrorType = CE_Failure;
714
0
                        CPLError(
715
0
                            eLastErrorType, CPLE_AppDefined,
716
0
                            "Line %d: <%.500s> doesn't have matching <%.500s>.",
717
0
                            sContext.nInputLine, sContext.pszToken,
718
0
                            sContext.pszToken + 1);
719
0
                        break;
720
0
                    }
721
0
                }
722
0
                else
723
0
                {
724
0
                    if (strcmp(sContext.pszToken + 1,
725
0
                               sContext.papsStack[sContext.nStackSize - 1]
726
0
                                   .psFirstNode->pszValue) != 0)
727
0
                    {
728
                        // TODO: At some point we could just error out like any
729
                        // other sane XML parser would do.
730
0
                        eLastErrorType = CE_Warning;
731
0
                        CPLError(
732
0
                            eLastErrorType, CPLE_AppDefined,
733
0
                            "Line %d: <%.500s> matches <%.500s>, but the case "
734
0
                            "isn't the same.  Going on, but this is invalid "
735
0
                            "XML that might be rejected in future versions.",
736
0
                            sContext.nInputLine,
737
0
                            sContext.papsStack[sContext.nStackSize - 1]
738
0
                                .psFirstNode->pszValue,
739
0
                            sContext.pszToken);
740
0
                    }
741
0
#ifdef DEBUG
742
0
                end_processing_close:
743
0
#endif
744
0
                    if (ReadToken(&sContext, eLastErrorType) != TClose)
745
0
                    {
746
0
                        eLastErrorType = CE_Failure;
747
0
                        CPLError(eLastErrorType, CPLE_AppDefined,
748
0
                                 "Line %d: Missing close angle bracket "
749
0
                                 "after <%.500s.",
750
0
                                 sContext.nInputLine, sContext.pszToken);
751
0
                        break;
752
0
                    }
753
754
                    // Pop element off stack
755
0
                    sContext.nStackSize--;
756
0
                }
757
0
            }
758
0
        }
759
760
        /* --------------------------------------------------------------------
761
         */
762
        /*      Add an attribute to a token. */
763
        /* --------------------------------------------------------------------
764
         */
765
0
        else if (sContext.eTokenType == TToken)
766
0
        {
767
0
            CPLXMLNode *psAttr =
768
0
                _CPLCreateXMLNode(nullptr, CXT_Attribute, sContext.pszToken);
769
0
            if (!psAttr)
770
0
                break;
771
0
            AttachNode(&sContext, psAttr);
772
773
0
            XMLTokenType nextToken = ReadToken(&sContext, eLastErrorType);
774
0
            if (nextToken != TEqual)
775
0
            {
776
                // Parse stuff like <?valbuddy_schematron
777
                // ../wmtsSimpleGetCapabilities.sch?>
778
0
                if (sContext.nStackSize > 0 &&
779
0
                    sContext.papsStack[sContext.nStackSize - 1]
780
0
                            .psFirstNode->pszValue[0] == '?')
781
0
                {
782
0
                    psAttr->eType = CXT_Text;
783
0
                    if (nextToken == TNone)
784
0
                        break;
785
0
                    goto loop_beginning;
786
0
                }
787
788
0
                eLastErrorType = CE_Failure;
789
0
                CPLError(eLastErrorType, CPLE_AppDefined,
790
0
                         "Line %d: Didn't find expected '=' for value of "
791
0
                         "attribute '%.500s'.",
792
0
                         sContext.nInputLine, psAttr->pszValue);
793
0
#ifdef DEBUG
794
                // Accepting an attribute without child text
795
                // would break too much assumptions in driver code
796
0
                bRecoverableError = false;
797
0
#endif
798
0
                break;
799
0
            }
800
801
0
            if (ReadToken(&sContext, eLastErrorType) == TToken)
802
0
            {
803
                /* TODO: at some point we could just error out like any other */
804
                /* sane XML parser would do */
805
0
                eLastErrorType = CE_Warning;
806
0
                CPLError(eLastErrorType, CPLE_AppDefined,
807
0
                         "Line %d: Attribute value should be single or double "
808
0
                         "quoted.  Going on, but this is invalid XML that "
809
0
                         "might be rejected in future versions.",
810
0
                         sContext.nInputLine);
811
0
            }
812
0
            else if (sContext.eTokenType != TString)
813
0
            {
814
0
                eLastErrorType = CE_Failure;
815
0
                CPLError(eLastErrorType, CPLE_AppDefined,
816
0
                         "Line %d: Didn't find expected attribute value.",
817
0
                         sContext.nInputLine);
818
0
#ifdef DEBUG
819
                // Accepting an attribute without child text
820
                // would break too much assumptions in driver code
821
0
                bRecoverableError = false;
822
0
#endif
823
0
                break;
824
0
            }
825
826
0
            if (!_CPLCreateXMLNode(psAttr, CXT_Text, sContext.pszToken))
827
0
                break;
828
0
        }
829
830
        /* --------------------------------------------------------------------
831
         */
832
        /*      Close the start section of an element. */
833
        /* --------------------------------------------------------------------
834
         */
835
0
        else if (sContext.eTokenType == TClose)
836
0
        {
837
0
            if (sContext.nStackSize == 0)
838
0
            {
839
0
                eLastErrorType = CE_Failure;
840
0
                CPLError(eLastErrorType, CPLE_AppDefined,
841
0
                         "Line %d: Found unbalanced '>'.", sContext.nInputLine);
842
0
                break;
843
0
            }
844
0
        }
845
846
        /* --------------------------------------------------------------------
847
         */
848
        /*      Close the start section of an element, and pop it */
849
        /*      immediately. */
850
        /* --------------------------------------------------------------------
851
         */
852
0
        else if (sContext.eTokenType == TSlashClose)
853
0
        {
854
0
            if (sContext.nStackSize == 0)
855
0
            {
856
0
                eLastErrorType = CE_Failure;
857
0
                CPLError(eLastErrorType, CPLE_AppDefined,
858
0
                         "Line %d: Found unbalanced '/>'.",
859
0
                         sContext.nInputLine);
860
0
                break;
861
0
            }
862
863
0
            sContext.nStackSize--;
864
0
        }
865
        /* --------------------------------------------------------------------
866
         */
867
        /*      Close the start section of a <?...?> element, and pop it */
868
        /*      immediately. */
869
        /* --------------------------------------------------------------------
870
         */
871
0
        else if (sContext.eTokenType == TQuestionClose)
872
0
        {
873
0
            if (sContext.nStackSize == 0)
874
0
            {
875
0
                eLastErrorType = CE_Failure;
876
0
                CPLError(eLastErrorType, CPLE_AppDefined,
877
0
                         "Line %d: Found unbalanced '?>'.",
878
0
                         sContext.nInputLine);
879
0
                break;
880
0
            }
881
0
            else if (sContext.papsStack[sContext.nStackSize - 1]
882
0
                         .psFirstNode->pszValue[0] != '?')
883
0
            {
884
0
                eLastErrorType = CE_Failure;
885
0
                CPLError(eLastErrorType, CPLE_AppDefined,
886
0
                         "Line %d: Found '?>' without matching '<?'.",
887
0
                         sContext.nInputLine);
888
0
                break;
889
0
            }
890
891
0
            sContext.nStackSize--;
892
0
        }
893
        /* --------------------------------------------------------------------
894
         */
895
        /*      Handle comments.  They are returned as a whole token with the */
896
        /*      prefix and postfix omitted.  No processing of white space */
897
        /*      will be done. */
898
        /* --------------------------------------------------------------------
899
         */
900
0
        else if (sContext.eTokenType == TComment)
901
0
        {
902
0
            CPLXMLNode *psValue =
903
0
                _CPLCreateXMLNode(nullptr, CXT_Comment, sContext.pszToken);
904
0
            if (!psValue)
905
0
                break;
906
0
            AttachNode(&sContext, psValue);
907
0
        }
908
        /* --------------------------------------------------------------------
909
         */
910
        /*      Handle literals.  They are returned without processing. */
911
        /* --------------------------------------------------------------------
912
         */
913
0
        else if (sContext.eTokenType == TLiteral)
914
0
        {
915
0
            CPLXMLNode *psValue =
916
0
                _CPLCreateXMLNode(nullptr, CXT_Literal, sContext.pszToken);
917
0
            if (!psValue)
918
0
                break;
919
0
            AttachNode(&sContext, psValue);
920
0
        }
921
        /* --------------------------------------------------------------------
922
         */
923
        /*      Add a text value node as a child of the current element. */
924
        /* --------------------------------------------------------------------
925
         */
926
0
        else if (sContext.eTokenType == TString && !sContext.bInElement)
927
0
        {
928
0
            CPLXMLNode *psValue =
929
0
                _CPLCreateXMLNode(nullptr, CXT_Text, sContext.pszToken);
930
0
            if (!psValue)
931
0
                break;
932
0
            AttachNode(&sContext, psValue);
933
0
        }
934
        /* --------------------------------------------------------------------
935
         */
936
        /*      Anything else is an error. */
937
        /* --------------------------------------------------------------------
938
         */
939
0
        else
940
0
        {
941
0
            eLastErrorType = CE_Failure;
942
0
            CPLError(eLastErrorType, CPLE_AppDefined,
943
0
                     "Parse error at line %d, unexpected token:%.500s",
944
0
                     sContext.nInputLine, sContext.pszToken);
945
0
            break;
946
0
        }
947
0
    }
948
949
    /* -------------------------------------------------------------------- */
950
    /*      Did we pop all the way out of our stack?                        */
951
    /* -------------------------------------------------------------------- */
952
875
    if (CPLGetLastErrorType() != CE_Failure && sContext.nStackSize > 0 &&
953
875
        sContext.papsStack != nullptr)
954
0
    {
955
0
#ifdef DEBUG
956
        // Makes life of fuzzers easier if we accept somewhat corrupted XML
957
        // like <x> ...
958
0
        if (bRecoverableError &&
959
0
            CPLTestBool(CPLGetConfigOption("CPL_MINIXML_RELAXED", "FALSE")))
960
0
        {
961
0
            eLastErrorType = CE_Warning;
962
0
        }
963
0
        else
964
0
#endif
965
0
        {
966
0
            eLastErrorType = CE_Failure;
967
0
        }
968
0
        CPLError(
969
0
            eLastErrorType, CPLE_AppDefined,
970
0
            "Parse error at EOF, not all elements have been closed, "
971
0
            "starting with %.500s",
972
0
            sContext.papsStack[sContext.nStackSize - 1].psFirstNode->pszValue);
973
0
    }
974
975
    /* -------------------------------------------------------------------- */
976
    /*      Cleanup                                                         */
977
    /* -------------------------------------------------------------------- */
978
875
    CPLFree(sContext.pszToken);
979
875
    if (sContext.papsStack != nullptr)
980
0
        CPLFree(sContext.papsStack);
981
982
    // We do not trust CPLGetLastErrorType() as if CPLTurnFailureIntoWarning()
983
    // has been set we would never get failures
984
875
    if (eLastErrorType == CE_Failure)
985
0
    {
986
0
        CPLDestroyXMLNode(sContext.psFirstNode);
987
0
        sContext.psFirstNode = nullptr;
988
0
        sContext.psLastNode = nullptr;
989
0
    }
990
991
875
    if (eLastErrorType == CE_None)
992
875
    {
993
        // Restore initial error state.
994
875
        CPLErrorSetState(eErrClass, nErrNum, osErrMsg);
995
875
    }
996
997
875
    return sContext.psFirstNode;
998
875
}
999
1000
/************************************************************************/
1001
/*                            _GrowBuffer()                             */
1002
/************************************************************************/
1003
1004
static bool _GrowBuffer(size_t nNeeded, char **ppszText, size_t *pnMaxLength)
1005
1006
0
{
1007
0
    if (nNeeded + 1 >= *pnMaxLength)
1008
0
    {
1009
0
        *pnMaxLength = std::max(*pnMaxLength * 2, nNeeded + 1);
1010
0
        char *pszTextNew =
1011
0
            static_cast<char *>(VSIRealloc(*ppszText, *pnMaxLength));
1012
0
        if (pszTextNew == nullptr)
1013
0
            return false;
1014
0
        *ppszText = pszTextNew;
1015
0
    }
1016
0
    return true;
1017
0
}
1018
1019
/************************************************************************/
1020
/*                        CPLSerializeXMLNode()                         */
1021
/************************************************************************/
1022
1023
// TODO(schwehr): Rewrite this whole thing using C++ string.
1024
// CPLSerializeXMLNode has buffer overflows.
1025
static bool CPLSerializeXMLNode(const CPLXMLNode *psNode, int nIndent,
1026
                                char **ppszText, size_t *pnLength,
1027
                                size_t *pnMaxLength)
1028
1029
0
{
1030
0
    if (psNode == nullptr)
1031
0
        return true;
1032
1033
    /* -------------------------------------------------------------------- */
1034
    /*      Ensure the buffer is plenty large to hold this additional       */
1035
    /*      string.                                                         */
1036
    /* -------------------------------------------------------------------- */
1037
0
    *pnLength += strlen(*ppszText + *pnLength);
1038
0
    if (!_GrowBuffer(strlen(psNode->pszValue) + *pnLength + 40 + nIndent,
1039
0
                     ppszText, pnMaxLength))
1040
0
        return false;
1041
1042
    /* -------------------------------------------------------------------- */
1043
    /*      Text is just directly emitted.                                  */
1044
    /* -------------------------------------------------------------------- */
1045
0
    if (psNode->eType == CXT_Text)
1046
0
    {
1047
0
        char *pszEscaped =
1048
0
            CPLEscapeString(psNode->pszValue, -1, CPLES_XML_BUT_QUOTES);
1049
1050
0
        CPLAssert(psNode->psChild == nullptr);
1051
1052
        // Escaped text might be bigger than expected.
1053
0
        if (!_GrowBuffer(strlen(pszEscaped) + *pnLength, ppszText, pnMaxLength))
1054
0
        {
1055
0
            CPLFree(pszEscaped);
1056
0
            return false;
1057
0
        }
1058
0
        strcat(*ppszText + *pnLength, pszEscaped);
1059
1060
0
        CPLFree(pszEscaped);
1061
0
    }
1062
1063
    /* -------------------------------------------------------------------- */
1064
    /*      Attributes require a little formatting.                         */
1065
    /* -------------------------------------------------------------------- */
1066
0
    else if (psNode->eType == CXT_Attribute)
1067
0
    {
1068
0
        CPLAssert(psNode->psChild != nullptr &&
1069
0
                  psNode->psChild->eType == CXT_Text);
1070
1071
0
        snprintf(*ppszText + *pnLength, *pnMaxLength - *pnLength, " %s=\"",
1072
0
                 psNode->pszValue);
1073
0
        *pnLength += strlen(*ppszText + *pnLength);
1074
1075
0
        char *pszEscaped =
1076
0
            CPLEscapeString(psNode->psChild->pszValue, -1, CPLES_XML);
1077
1078
0
        if (!_GrowBuffer(strlen(pszEscaped) + *pnLength, ppszText, pnMaxLength))
1079
0
        {
1080
0
            CPLFree(pszEscaped);
1081
0
            return false;
1082
0
        }
1083
0
        strcat(*ppszText + *pnLength, pszEscaped);
1084
1085
0
        CPLFree(pszEscaped);
1086
1087
0
        *pnLength += strlen(*ppszText + *pnLength);
1088
0
        if (!_GrowBuffer(3 + *pnLength, ppszText, pnMaxLength))
1089
0
            return false;
1090
0
        strcat(*ppszText + *pnLength, "\"");
1091
0
    }
1092
1093
    /* -------------------------------------------------------------------- */
1094
    /*      Handle comment output.                                          */
1095
    /* -------------------------------------------------------------------- */
1096
0
    else if (psNode->eType == CXT_Comment)
1097
0
    {
1098
0
        CPLAssert(psNode->psChild == nullptr);
1099
1100
0
        for (int i = 0; i < nIndent; i++)
1101
0
            (*ppszText)[(*pnLength)++] = ' ';
1102
1103
0
        snprintf(*ppszText + *pnLength, *pnMaxLength - *pnLength, "<!--%s-->\n",
1104
0
                 psNode->pszValue);
1105
0
    }
1106
1107
    /* -------------------------------------------------------------------- */
1108
    /*      Handle literal output (like <!DOCTYPE...>)                      */
1109
    /* -------------------------------------------------------------------- */
1110
0
    else if (psNode->eType == CXT_Literal)
1111
0
    {
1112
0
        CPLAssert(psNode->psChild == nullptr);
1113
1114
0
        for (int i = 0; i < nIndent; i++)
1115
0
            (*ppszText)[(*pnLength)++] = ' ';
1116
1117
0
        strcpy(*ppszText + *pnLength, psNode->pszValue);
1118
0
        strcat(*ppszText + *pnLength, "\n");
1119
0
    }
1120
1121
    /* -------------------------------------------------------------------- */
1122
    /*      Elements actually have to deal with general children, and       */
1123
    /*      various formatting issues.                                      */
1124
    /* -------------------------------------------------------------------- */
1125
0
    else if (psNode->eType == CXT_Element)
1126
0
    {
1127
0
        if (nIndent)
1128
0
            memset(*ppszText + *pnLength, ' ', nIndent);
1129
0
        *pnLength += nIndent;
1130
0
        (*ppszText)[*pnLength] = '\0';
1131
1132
0
        snprintf(*ppszText + *pnLength, *pnMaxLength - *pnLength, "<%s",
1133
0
                 psNode->pszValue);
1134
1135
0
        if (psNode->pszValue[0] == '?')
1136
0
        {
1137
0
            for (const CPLXMLNode *psChild = psNode->psChild;
1138
0
                 psChild != nullptr; psChild = psChild->psNext)
1139
0
            {
1140
0
                if (psChild->eType == CXT_Text)
1141
0
                {
1142
0
                    *pnLength += strlen(*ppszText + *pnLength);
1143
0
                    if (!_GrowBuffer(1 + *pnLength, ppszText, pnMaxLength))
1144
0
                        return false;
1145
0
                    strcat(*ppszText + *pnLength, " ");
1146
0
                }
1147
1148
0
                if (!CPLSerializeXMLNode(psChild, 0, ppszText, pnLength,
1149
0
                                         pnMaxLength))
1150
0
                {
1151
0
                    return false;
1152
0
                }
1153
0
            }
1154
0
            if (!_GrowBuffer(*pnLength + 40, ppszText, pnMaxLength))
1155
0
                return false;
1156
1157
0
            strcat(*ppszText + *pnLength, "?>\n");
1158
0
        }
1159
0
        else
1160
0
        {
1161
0
            bool bHasNonAttributeChildren = false;
1162
            // Serialize *all* the attribute children, regardless of order
1163
0
            for (const CPLXMLNode *psChild = psNode->psChild;
1164
0
                 psChild != nullptr; psChild = psChild->psNext)
1165
0
            {
1166
0
                if (psChild->eType == CXT_Attribute)
1167
0
                {
1168
0
                    if (!CPLSerializeXMLNode(psChild, 0, ppszText, pnLength,
1169
0
                                             pnMaxLength))
1170
0
                        return false;
1171
0
                }
1172
0
                else
1173
0
                    bHasNonAttributeChildren = true;
1174
0
            }
1175
1176
0
            if (!bHasNonAttributeChildren)
1177
0
            {
1178
0
                if (!_GrowBuffer(*pnLength + 40, ppszText, pnMaxLength))
1179
0
                    return false;
1180
1181
0
                strcat(*ppszText + *pnLength, " />\n");
1182
0
            }
1183
0
            else
1184
0
            {
1185
0
                bool bJustText = true;
1186
1187
0
                strcat(*ppszText + *pnLength, ">");
1188
1189
0
                for (const CPLXMLNode *psChild = psNode->psChild;
1190
0
                     psChild != nullptr; psChild = psChild->psNext)
1191
0
                {
1192
0
                    if (psChild->eType == CXT_Attribute)
1193
0
                        continue;
1194
1195
0
                    if (psChild->eType != CXT_Text && bJustText)
1196
0
                    {
1197
0
                        bJustText = false;
1198
0
                        *pnLength += strlen(*ppszText + *pnLength);
1199
0
                        if (!_GrowBuffer(1 + *pnLength, ppszText, pnMaxLength))
1200
0
                            return false;
1201
0
                        strcat(*ppszText + *pnLength, "\n");
1202
0
                    }
1203
1204
0
                    if (!CPLSerializeXMLNode(psChild, nIndent + 2, ppszText,
1205
0
                                             pnLength, pnMaxLength))
1206
0
                        return false;
1207
0
                }
1208
1209
0
                *pnLength += strlen(*ppszText + *pnLength);
1210
0
                if (!_GrowBuffer(strlen(psNode->pszValue) + *pnLength + 40 +
1211
0
                                     nIndent,
1212
0
                                 ppszText, pnMaxLength))
1213
0
                    return false;
1214
1215
0
                if (!bJustText)
1216
0
                {
1217
0
                    if (nIndent)
1218
0
                        memset(*ppszText + *pnLength, ' ', nIndent);
1219
0
                    *pnLength += nIndent;
1220
0
                    (*ppszText)[*pnLength] = '\0';
1221
0
                }
1222
1223
0
                *pnLength += strlen(*ppszText + *pnLength);
1224
0
                snprintf(*ppszText + *pnLength, *pnMaxLength - *pnLength,
1225
0
                         "</%s>\n", psNode->pszValue);
1226
0
            }
1227
0
        }
1228
0
    }
1229
1230
0
    return true;
1231
0
}
1232
1233
/************************************************************************/
1234
/*                        CPLSerializeXMLTree()                         */
1235
/************************************************************************/
1236
1237
/**
1238
 * \brief Convert tree into string document.
1239
 *
1240
 * This function converts a CPLXMLNode tree representation of a document
1241
 * into a flat string representation.  White space indentation is used
1242
 * visually preserve the tree structure of the document.  The returned
1243
 * document becomes owned by the caller and should be freed with CPLFree()
1244
 * when no longer needed.
1245
 *
1246
 * @param psNode the node to serialize.
1247
 *
1248
 * @return the document on success or NULL on failure.
1249
 */
1250
1251
char *CPLSerializeXMLTree(const CPLXMLNode *psNode)
1252
1253
0
{
1254
0
    size_t nMaxLength = 100;
1255
0
    char *pszText = static_cast<char *>(CPLCalloc(nMaxLength, sizeof(char)));
1256
0
    if (pszText == nullptr)
1257
0
        return nullptr;
1258
1259
0
    size_t nLength = 0;
1260
0
    for (const CPLXMLNode *psThis = psNode; psThis != nullptr;
1261
0
         psThis = psThis->psNext)
1262
0
    {
1263
0
        if (!CPLSerializeXMLNode(psThis, 0, &pszText, &nLength, &nMaxLength))
1264
0
        {
1265
0
            VSIFree(pszText);
1266
0
            return nullptr;
1267
0
        }
1268
0
    }
1269
1270
0
    return pszText;
1271
0
}
1272
1273
/************************************************************************/
1274
/*                          CPLCreateXMLNode()                          */
1275
/************************************************************************/
1276
1277
#ifdef DEBUG
1278
static CPLXMLNode *psDummyStaticNode;
1279
#endif
1280
1281
/**
1282
 * \brief Create an document tree item.
1283
 *
1284
 * Create a single CPLXMLNode object with the desired value and type, and
1285
 * attach it as a child of the indicated parent.
1286
 *
1287
 * @param poParent the parent to which this node should be attached as a
1288
 * child.  May be NULL to keep as free standing.
1289
 * @param eType the type of the newly created node
1290
 * @param pszText the value of the newly created node
1291
 *
1292
 * @return the newly created node, now owned by the caller (or parent node).
1293
 */
1294
1295
CPLXMLNode *CPLCreateXMLNode(CPLXMLNode *poParent, CPLXMLNodeType eType,
1296
                             const char *pszText)
1297
1298
0
{
1299
0
    auto ret = _CPLCreateXMLNode(poParent, eType, pszText);
1300
0
    if (!ret)
1301
0
    {
1302
0
        CPLError(CE_Fatal, CPLE_OutOfMemory, "CPLCreateXMLNode() failed");
1303
0
    }
1304
0
    return ret;
1305
0
}
1306
1307
/************************************************************************/
1308
/*                         _CPLCreateXMLNode()                          */
1309
/************************************************************************/
1310
1311
/* Same as CPLCreateXMLNode() but can return NULL in case of out-of-memory */
1312
/* situation */
1313
1314
static CPLXMLNode *_CPLCreateXMLNode(CPLXMLNode *poParent, CPLXMLNodeType eType,
1315
                                     const char *pszText)
1316
1317
0
{
1318
1319
    /* -------------------------------------------------------------------- */
1320
    /*      Create new node.                                                */
1321
    /* -------------------------------------------------------------------- */
1322
0
    CPLXMLNode *psNode =
1323
0
        static_cast<CPLXMLNode *>(VSICalloc(sizeof(CPLXMLNode), 1));
1324
0
    if (psNode == nullptr)
1325
0
    {
1326
0
        CPLError(CE_Failure, CPLE_OutOfMemory, "Cannot allocate CPLXMLNode");
1327
0
        return nullptr;
1328
0
    }
1329
1330
0
    psNode->eType = eType;
1331
0
    psNode->pszValue = VSIStrdup(pszText ? pszText : "");
1332
0
    if (psNode->pszValue == nullptr)
1333
0
    {
1334
0
        CPLError(CE_Failure, CPLE_OutOfMemory,
1335
0
                 "Cannot allocate psNode->pszValue");
1336
0
        VSIFree(psNode);
1337
0
        return nullptr;
1338
0
    }
1339
1340
    /* -------------------------------------------------------------------- */
1341
    /*      Attach to parent, if provided.                                  */
1342
    /* -------------------------------------------------------------------- */
1343
0
    if (poParent != nullptr)
1344
0
    {
1345
0
        if (poParent->psChild == nullptr)
1346
0
            poParent->psChild = psNode;
1347
0
        else
1348
0
        {
1349
0
            CPLXMLNode *psLink = poParent->psChild;
1350
0
            if (psLink->psNext == nullptr && eType == CXT_Attribute &&
1351
0
                psLink->eType == CXT_Text)
1352
0
            {
1353
0
                psNode->psNext = psLink;
1354
0
                poParent->psChild = psNode;
1355
0
            }
1356
0
            else
1357
0
            {
1358
0
                while (psLink->psNext != nullptr)
1359
0
                {
1360
0
                    if (eType == CXT_Attribute &&
1361
0
                        psLink->psNext->eType == CXT_Text)
1362
0
                    {
1363
0
                        psNode->psNext = psLink->psNext;
1364
0
                        break;
1365
0
                    }
1366
1367
0
                    psLink = psLink->psNext;
1368
0
                }
1369
1370
0
                psLink->psNext = psNode;
1371
0
            }
1372
0
        }
1373
0
    }
1374
0
#ifdef DEBUG
1375
0
    else
1376
0
    {
1377
        // Coverity sometimes doesn't realize that this function is passed
1378
        // with a non NULL parent and thinks that this branch is taken, leading
1379
        // to creating object being leak by caller. This ugly hack hopefully
1380
        // makes it believe that someone will reference it.
1381
0
        psDummyStaticNode = psNode;
1382
0
    }
1383
0
#endif
1384
1385
0
    return psNode;
1386
0
}
1387
1388
/************************************************************************/
1389
/*                         CPLDestroyXMLNode()                          */
1390
/************************************************************************/
1391
1392
/**
1393
 * \brief Destroy a tree.
1394
 *
1395
 * This function frees resources associated with a CPLXMLNode and all its
1396
 * children nodes.
1397
 *
1398
 * @param psNode the tree to free.
1399
 */
1400
1401
void CPLDestroyXMLNode(CPLXMLNode *psNode)
1402
1403
0
{
1404
0
    while (psNode != nullptr)
1405
0
    {
1406
0
        if (psNode->pszValue != nullptr)
1407
0
            CPLFree(psNode->pszValue);
1408
1409
0
        if (psNode->psChild != nullptr)
1410
0
        {
1411
0
            CPLXMLNode *psNext = psNode->psNext;
1412
0
            psNode->psNext = psNode->psChild;
1413
            // Move the child and its siblings as the next
1414
            // siblings of the current node.
1415
0
            if (psNext != nullptr)
1416
0
            {
1417
0
                CPLXMLNode *psIter = psNode->psChild;
1418
0
                while (psIter->psNext != nullptr)
1419
0
                    psIter = psIter->psNext;
1420
0
                psIter->psNext = psNext;
1421
0
            }
1422
0
        }
1423
1424
0
        CPLXMLNode *psNext = psNode->psNext;
1425
1426
0
        CPLFree(psNode);
1427
1428
0
        psNode = psNext;
1429
0
    }
1430
0
}
1431
1432
/************************************************************************/
1433
/*                           CPLSearchXMLNode()                         */
1434
/************************************************************************/
1435
1436
/**
1437
 * \brief Search for a node in document.
1438
 *
1439
 * Searches the children (and potentially siblings) of the documented
1440
 * passed in for the named element or attribute.  To search following
1441
 * siblings as well as children, prefix the pszElement name with an equal
1442
 * sign.  This function does an in-order traversal of the document tree.
1443
 * So it will first match against the current node, then its first child,
1444
 * that child's first child, and so on.
1445
 *
1446
 * Use CPLGetXMLNode() to find a specific child, or along a specific
1447
 * node path.
1448
 *
1449
 * @param psRoot the subtree to search.  This should be a node of type
1450
 * CXT_Element.  NULL is safe.
1451
 *
1452
 * @param pszElement the name of the element or attribute to search for.
1453
 *
1454
 * @return The matching node or NULL on failure.
1455
 */
1456
1457
CPLXMLNode *CPLSearchXMLNode(CPLXMLNode *psRoot, const char *pszElement)
1458
1459
0
{
1460
0
    if (psRoot == nullptr || pszElement == nullptr)
1461
0
        return nullptr;
1462
1463
0
    bool bSideSearch = false;
1464
1465
0
    if (*pszElement == '=')
1466
0
    {
1467
0
        bSideSearch = true;
1468
0
        pszElement++;
1469
0
    }
1470
1471
    /* -------------------------------------------------------------------- */
1472
    /*      Does this node match?                                           */
1473
    /* -------------------------------------------------------------------- */
1474
0
    if ((psRoot->eType == CXT_Element || psRoot->eType == CXT_Attribute) &&
1475
0
        EQUAL(pszElement, psRoot->pszValue))
1476
0
        return psRoot;
1477
1478
    /* -------------------------------------------------------------------- */
1479
    /*      Search children.                                                */
1480
    /* -------------------------------------------------------------------- */
1481
0
    CPLXMLNode *psChild = nullptr;
1482
0
    for (psChild = psRoot->psChild; psChild != nullptr;
1483
0
         psChild = psChild->psNext)
1484
0
    {
1485
0
        if ((psChild->eType == CXT_Element ||
1486
0
             psChild->eType == CXT_Attribute) &&
1487
0
            EQUAL(pszElement, psChild->pszValue))
1488
0
            return psChild;
1489
1490
0
        if (psChild->psChild != nullptr)
1491
0
        {
1492
0
            CPLXMLNode *psResult = CPLSearchXMLNode(psChild, pszElement);
1493
0
            if (psResult != nullptr)
1494
0
                return psResult;
1495
0
        }
1496
0
    }
1497
1498
    /* -------------------------------------------------------------------- */
1499
    /*      Search siblings if we are in side search mode.                  */
1500
    /* -------------------------------------------------------------------- */
1501
0
    if (bSideSearch)
1502
0
    {
1503
0
        for (psRoot = psRoot->psNext; psRoot != nullptr;
1504
0
             psRoot = psRoot->psNext)
1505
0
        {
1506
0
            CPLXMLNode *psResult = CPLSearchXMLNode(psRoot, pszElement);
1507
0
            if (psResult != nullptr)
1508
0
                return psResult;
1509
0
        }
1510
0
    }
1511
1512
0
    return nullptr;
1513
0
}
1514
1515
/************************************************************************/
1516
/*                           CPLGetXMLNode()                            */
1517
/************************************************************************/
1518
1519
/**
1520
 * \brief Find node by path.
1521
 *
1522
 * Searches the document or subdocument indicated by psRoot for an element
1523
 * (or attribute) with the given path.  The path should consist of a set of
1524
 * element names separated by dots, not including the name of the root
1525
 * element (psRoot).  If the requested element is not found NULL is returned.
1526
 *
1527
 * Attribute names may only appear as the last item in the path.
1528
 *
1529
 * The search is done from the root nodes children, but all intermediate
1530
 * nodes in the path must be specified.  Searching for "name" would only find
1531
 * a name element or attribute if it is a direct child of the root, not at any
1532
 * level in the subdocument.
1533
 *
1534
 * If the pszPath is prefixed by "=" then the search will begin with the
1535
 * root node, and its siblings, instead of the root nodes children.  This
1536
 * is particularly useful when searching within a whole document which is
1537
 * often prefixed by one or more "junk" nodes like the <?xml> declaration.
1538
 *
1539
 * @param psRoot the subtree in which to search.  This should be a node of
1540
 * type CXT_Element.  NULL is safe.
1541
 *
1542
 * @param pszPath the list of element names in the path (dot separated).
1543
 *
1544
 * @return the requested element node, or NULL if not found.
1545
 */
1546
1547
CPLXMLNode *CPLGetXMLNode(CPLXMLNode *psRoot, const char *pszPath)
1548
1549
0
{
1550
0
    if (psRoot == nullptr || pszPath == nullptr)
1551
0
        return nullptr;
1552
1553
0
    bool bSideSearch = false;
1554
1555
0
    if (*pszPath == '=')
1556
0
    {
1557
0
        bSideSearch = true;
1558
0
        pszPath++;
1559
0
    }
1560
1561
0
    const char *const apszTokens[2] = {pszPath, nullptr};
1562
1563
    // Slight optimization: avoid using CSLTokenizeStringComplex that
1564
    // does memory allocations when it is not really necessary.
1565
0
    bool bFreeTokens = false;
1566
0
    char **papszTokensToFree = nullptr;
1567
0
    const char *const *papszTokens;
1568
0
    if (strchr(pszPath, '.'))
1569
0
    {
1570
0
        papszTokensToFree =
1571
0
            CSLTokenizeStringComplex(pszPath, ".", FALSE, FALSE);
1572
0
        papszTokens = papszTokensToFree;
1573
0
        bFreeTokens = true;
1574
0
    }
1575
0
    else
1576
0
    {
1577
0
        papszTokens = apszTokens;
1578
0
    }
1579
1580
0
    int iToken = 0;
1581
0
    while (papszTokens[iToken] != nullptr && psRoot != nullptr)
1582
0
    {
1583
0
        CPLXMLNode *psChild = nullptr;
1584
1585
0
        if (bSideSearch)
1586
0
        {
1587
0
            psChild = psRoot;
1588
0
            bSideSearch = false;
1589
0
        }
1590
0
        else
1591
0
            psChild = psRoot->psChild;
1592
1593
0
        for (; psChild != nullptr; psChild = psChild->psNext)
1594
0
        {
1595
0
            if (psChild->eType != CXT_Text &&
1596
0
                EQUAL(papszTokens[iToken], psChild->pszValue))
1597
0
                break;
1598
0
        }
1599
1600
0
        if (psChild == nullptr)
1601
0
        {
1602
0
            psRoot = nullptr;
1603
0
            break;
1604
0
        }
1605
1606
0
        psRoot = psChild;
1607
0
        iToken++;
1608
0
    }
1609
1610
0
    if (bFreeTokens)
1611
0
        CSLDestroy(papszTokensToFree);
1612
0
    return psRoot;
1613
0
}
1614
1615
/************************************************************************/
1616
/*                           CPLGetXMLValue()                           */
1617
/************************************************************************/
1618
1619
/**
1620
 * \brief Fetch element/attribute value.
1621
 *
1622
 * Searches the document for the element/attribute value associated with
1623
 * the path.  The corresponding node is internally found with CPLGetXMLNode()
1624
 * (see there for details on path handling).  Once found, the value is
1625
 * considered to be the first CXT_Text child of the node.
1626
 *
1627
 * If the attribute/element search fails, or if the found node has no
1628
 * value then the passed default value is returned.
1629
 *
1630
 * The returned value points to memory within the document tree, and should
1631
 * not be altered or freed.
1632
 *
1633
 * @param psRoot the subtree in which to search.  This should be a node of
1634
 * type CXT_Element.  NULL is safe.
1635
 *
1636
 * @param pszPath the list of element names in the path (dot separated).  An
1637
 * empty path means get the value of the psRoot node.
1638
 *
1639
 * @param pszDefault the value to return if a corresponding value is not
1640
 * found, may be NULL.
1641
 *
1642
 * @return the requested value or pszDefault if not found.
1643
 */
1644
1645
const char *CPLGetXMLValue(const CPLXMLNode *psRoot, const char *pszPath,
1646
                           const char *pszDefault)
1647
1648
0
{
1649
0
    const CPLXMLNode *psTarget = nullptr;
1650
1651
0
    if (pszPath == nullptr || *pszPath == '\0')
1652
0
        psTarget = psRoot;
1653
0
    else
1654
0
        psTarget = CPLGetXMLNode(psRoot, pszPath);
1655
1656
0
    if (psTarget == nullptr)
1657
0
        return pszDefault;
1658
1659
0
    if (psTarget->eType == CXT_Attribute)
1660
0
    {
1661
0
        CPLAssert(psTarget->psChild != nullptr &&
1662
0
                  psTarget->psChild->eType == CXT_Text);
1663
1664
0
        return psTarget->psChild->pszValue;
1665
0
    }
1666
1667
0
    if (psTarget->eType == CXT_Element)
1668
0
    {
1669
        // Find first non-attribute child, and verify it is a single text
1670
        // with no siblings.
1671
1672
0
        psTarget = psTarget->psChild;
1673
1674
0
        while (psTarget != nullptr && psTarget->eType == CXT_Attribute)
1675
0
            psTarget = psTarget->psNext;
1676
1677
0
        if (psTarget != nullptr && psTarget->eType == CXT_Text &&
1678
0
            psTarget->psNext == nullptr)
1679
0
            return psTarget->pszValue;
1680
0
    }
1681
1682
0
    return pszDefault;
1683
0
}
1684
1685
/************************************************************************/
1686
/*                           CPLAddXMLChild()                           */
1687
/************************************************************************/
1688
1689
/**
1690
 * \brief Add child node to parent.
1691
 *
1692
 * The passed child is added to the list of children of the indicated
1693
 * parent.  Normally the child is added at the end of the parents child
1694
 * list, but attributes (CXT_Attribute) will be inserted after any other
1695
 * attributes but before any other element type.  Ownership of the child
1696
 * node is effectively assumed by the parent node.   If the child has
1697
 * siblings (its psNext is not NULL) they will be trimmed, but if the child
1698
 * has children they are carried with it.
1699
 *
1700
 * @param psParent the node to attach the child to.  May not be NULL.
1701
 *
1702
 * @param psChild the child to add to the parent.  May not be NULL.  Should
1703
 * not be a child of any other parent.
1704
 */
1705
1706
void CPLAddXMLChild(CPLXMLNode *psParent, CPLXMLNode *psChild)
1707
1708
0
{
1709
0
    if (psParent->psChild == nullptr)
1710
0
    {
1711
0
        psParent->psChild = psChild;
1712
0
        return;
1713
0
    }
1714
1715
    // Insert at head of list if first child is not attribute.
1716
0
    if (psChild->eType == CXT_Attribute &&
1717
0
        psParent->psChild->eType != CXT_Attribute)
1718
0
    {
1719
0
        psChild->psNext = psParent->psChild;
1720
0
        psParent->psChild = psChild;
1721
0
        return;
1722
0
    }
1723
1724
    // Search for end of list.
1725
0
    CPLXMLNode *psSib = nullptr;
1726
0
    for (psSib = psParent->psChild; psSib->psNext != nullptr;
1727
0
         psSib = psSib->psNext)
1728
0
    {
1729
        // Insert attributes if the next node is not an attribute.
1730
0
        if (psChild->eType == CXT_Attribute && psSib->psNext != nullptr &&
1731
0
            psSib->psNext->eType != CXT_Attribute)
1732
0
        {
1733
0
            psChild->psNext = psSib->psNext;
1734
0
            psSib->psNext = psChild;
1735
0
            return;
1736
0
        }
1737
0
    }
1738
1739
0
    psSib->psNext = psChild;
1740
0
}
1741
1742
/************************************************************************/
1743
/*                        CPLRemoveXMLChild()                           */
1744
/************************************************************************/
1745
1746
/**
1747
 * \brief Remove child node from parent.
1748
 *
1749
 * The passed child is removed from the child list of the passed parent,
1750
 * but the child is not destroyed.  The child retains ownership of its
1751
 * own children, but is cleanly removed from the child list of the parent.
1752
 *
1753
 * @param psParent the node to the child is attached to.
1754
 *
1755
 * @param psChild the child to remove.
1756
 *
1757
 * @return TRUE on success or FALSE if the child was not found.
1758
 */
1759
1760
int CPLRemoveXMLChild(CPLXMLNode *psParent, CPLXMLNode *psChild)
1761
1762
0
{
1763
0
    if (psParent == nullptr)
1764
0
        return FALSE;
1765
1766
0
    CPLXMLNode *psLast = nullptr;
1767
0
    CPLXMLNode *psThis = nullptr;
1768
0
    for (psThis = psParent->psChild; psThis != nullptr; psThis = psThis->psNext)
1769
0
    {
1770
0
        if (psThis == psChild)
1771
0
        {
1772
0
            if (psLast == nullptr)
1773
0
                psParent->psChild = psThis->psNext;
1774
0
            else
1775
0
                psLast->psNext = psThis->psNext;
1776
1777
0
            psThis->psNext = nullptr;
1778
0
            return TRUE;
1779
0
        }
1780
0
        psLast = psThis;
1781
0
    }
1782
1783
0
    return FALSE;
1784
0
}
1785
1786
/************************************************************************/
1787
/*                          CPLAddXMLSibling()                          */
1788
/************************************************************************/
1789
1790
/**
1791
 * \brief Add new sibling.
1792
 *
1793
 * The passed psNewSibling is added to the end of siblings of the
1794
 * psOlderSibling node.  That is, it is added to the end of the psNext
1795
 * chain.  There is no special handling if psNewSibling is an attribute.
1796
 * If this is required, use CPLAddXMLChild().
1797
 *
1798
 * @param psOlderSibling the node to attach the sibling after.
1799
 *
1800
 * @param psNewSibling the node to add at the end of psOlderSiblings psNext
1801
 * chain.
1802
 */
1803
1804
void CPLAddXMLSibling(CPLXMLNode *psOlderSibling, CPLXMLNode *psNewSibling)
1805
1806
0
{
1807
0
    if (psOlderSibling == nullptr)
1808
0
        return;
1809
1810
0
    while (psOlderSibling->psNext != nullptr)
1811
0
        psOlderSibling = psOlderSibling->psNext;
1812
1813
0
    psOlderSibling->psNext = psNewSibling;
1814
0
}
1815
1816
/************************************************************************/
1817
/*                    CPLCreateXMLElementAndValue()                     */
1818
/************************************************************************/
1819
1820
/**
1821
 * \brief Create an element and text value.
1822
 *
1823
 * This is function is a convenient short form for:
1824
 *
1825
 * \code
1826
 *     CPLXMLNode *psTextNode;
1827
 *     CPLXMLNode *psElementNode;
1828
 *
1829
 *     psElementNode = CPLCreateXMLNode( psParent, CXT_Element, pszName );
1830
 *     psTextNode = CPLCreateXMLNode( psElementNode, CXT_Text, pszValue );
1831
 *
1832
 *     return psElementNode;
1833
 * \endcode
1834
 *
1835
 * It creates a CXT_Element node, with a CXT_Text child, and
1836
 * attaches the element to the passed parent.
1837
 *
1838
 * @param psParent the parent node to which the resulting node should
1839
 * be attached.  May be NULL to keep as freestanding.
1840
 *
1841
 * @param pszName the element name to create.
1842
 * @param pszValue the text to attach to the element. Must not be NULL.
1843
 *
1844
 * @return the pointer to the new element node.
1845
 */
1846
1847
CPLXMLNode *CPLCreateXMLElementAndValue(CPLXMLNode *psParent,
1848
                                        const char *pszName,
1849
                                        const char *pszValue)
1850
1851
0
{
1852
0
    CPLXMLNode *psElementNode =
1853
0
        CPLCreateXMLNode(psParent, CXT_Element, pszName);
1854
0
    CPLCreateXMLNode(psElementNode, CXT_Text, pszValue);
1855
1856
0
    return psElementNode;
1857
0
}
1858
1859
/************************************************************************/
1860
/*                    CPLCreateXMLElementAndValue()                     */
1861
/************************************************************************/
1862
1863
/**
1864
 * \brief Create an attribute and text value.
1865
 *
1866
 * This is function is a convenient short form for:
1867
 *
1868
 * \code
1869
 *   CPLXMLNode *psAttributeNode;
1870
 *
1871
 *   psAttributeNode = CPLCreateXMLNode( psParent, CXT_Attribute, pszName );
1872
 *   CPLCreateXMLNode( psAttributeNode, CXT_Text, pszValue );
1873
 * \endcode
1874
 *
1875
 * It creates a CXT_Attribute node, with a CXT_Text child, and
1876
 * attaches the element to the passed parent.
1877
 *
1878
 * @param psParent the parent node to which the resulting node should
1879
 * be attached.  Must not be NULL.
1880
 * @param pszName the attribute name to create.
1881
 * @param pszValue the text to attach to the attribute. Must not be NULL.
1882
 *
1883
 * @since GDAL 2.0
1884
 */
1885
1886
void CPLAddXMLAttributeAndValue(CPLXMLNode *psParent, const char *pszName,
1887
                                const char *pszValue)
1888
0
{
1889
0
    CPLAssert(psParent != nullptr);
1890
0
    CPLXMLNode *psAttributeNode =
1891
0
        CPLCreateXMLNode(psParent, CXT_Attribute, pszName);
1892
0
    CPLCreateXMLNode(psAttributeNode, CXT_Text, pszValue);
1893
0
}
1894
1895
/************************************************************************/
1896
/*                          CPLCloneXMLTree()                           */
1897
/************************************************************************/
1898
1899
/**
1900
 * \brief Copy tree.
1901
 *
1902
 * Creates a deep copy of a CPLXMLNode tree.
1903
 *
1904
 * @param psTree the tree to duplicate.
1905
 *
1906
 * @return a copy of the whole tree.
1907
 */
1908
1909
CPLXMLNode *CPLCloneXMLTree(const CPLXMLNode *psTree)
1910
1911
0
{
1912
0
    CPLXMLNode *psPrevious = nullptr;
1913
0
    CPLXMLNode *psReturn = nullptr;
1914
1915
0
    while (psTree != nullptr)
1916
0
    {
1917
0
        CPLXMLNode *psCopy =
1918
0
            CPLCreateXMLNode(nullptr, psTree->eType, psTree->pszValue);
1919
0
        if (psReturn == nullptr)
1920
0
            psReturn = psCopy;
1921
0
        if (psPrevious != nullptr)
1922
0
            psPrevious->psNext = psCopy;
1923
1924
0
        if (psTree->psChild != nullptr)
1925
0
            psCopy->psChild = CPLCloneXMLTree(psTree->psChild);
1926
1927
0
        psPrevious = psCopy;
1928
0
        psTree = psTree->psNext;
1929
0
    }
1930
1931
0
    return psReturn;
1932
0
}
1933
1934
/************************************************************************/
1935
/*                           CPLSetXMLValue()                           */
1936
/************************************************************************/
1937
1938
/**
1939
 * \brief Set element value by path.
1940
 *
1941
 * Find (or create) the target element or attribute specified in the
1942
 * path, and assign it the indicated value.
1943
 *
1944
 * Any path elements that do not already exist will be created.  The target
1945
 * nodes value (the first CXT_Text child) will be replaced with the provided
1946
 * value.
1947
 *
1948
 * If the target node is an attribute instead of an element, the name
1949
 * should be prefixed with a #.
1950
 *
1951
 * Example:
1952
 *   CPLSetXMLValue( "Citation.Id.Description", "DOQ dataset" );
1953
 *   CPLSetXMLValue( "Citation.Id.Description.#name", "doq" );
1954
 *
1955
 * @param psRoot the subdocument to be updated.
1956
 *
1957
 * @param pszPath the dot separated path to the target element/attribute.
1958
 *
1959
 * @param pszValue the text value to assign.
1960
 *
1961
 * @return TRUE on success.
1962
 */
1963
1964
int CPLSetXMLValue(CPLXMLNode *psRoot, const char *pszPath,
1965
                   const char *pszValue)
1966
1967
0
{
1968
0
    char **papszTokens = CSLTokenizeStringComplex(pszPath, ".", FALSE, FALSE);
1969
0
    int iToken = 0;
1970
1971
0
    while (papszTokens[iToken] != nullptr)
1972
0
    {
1973
0
        bool bIsAttribute = false;
1974
0
        const char *pszName = papszTokens[iToken];
1975
1976
0
        if (pszName[0] == '#')
1977
0
        {
1978
0
            bIsAttribute = true;
1979
0
            pszName++;
1980
0
        }
1981
1982
0
        if (psRoot->eType != CXT_Element)
1983
0
        {
1984
0
            CSLDestroy(papszTokens);
1985
0
            return FALSE;
1986
0
        }
1987
1988
0
        CPLXMLNode *psChild = nullptr;
1989
0
        for (psChild = psRoot->psChild; psChild != nullptr;
1990
0
             psChild = psChild->psNext)
1991
0
        {
1992
0
            if (psChild->eType != CXT_Text && EQUAL(pszName, psChild->pszValue))
1993
0
                break;
1994
0
        }
1995
1996
0
        if (psChild == nullptr)
1997
0
        {
1998
0
            if (bIsAttribute)
1999
0
                psChild = CPLCreateXMLNode(psRoot, CXT_Attribute, pszName);
2000
0
            else
2001
0
                psChild = CPLCreateXMLNode(psRoot, CXT_Element, pszName);
2002
0
        }
2003
2004
0
        psRoot = psChild;
2005
0
        iToken++;
2006
0
    }
2007
2008
0
    CSLDestroy(papszTokens);
2009
2010
    /* -------------------------------------------------------------------- */
2011
    /*      Find the "text" child if there is one.                          */
2012
    /* -------------------------------------------------------------------- */
2013
0
    CPLXMLNode *psTextChild = psRoot->psChild;
2014
2015
0
    while (psTextChild != nullptr && psTextChild->eType != CXT_Text)
2016
0
        psTextChild = psTextChild->psNext;
2017
2018
    /* -------------------------------------------------------------------- */
2019
    /*      Now set a value node under this node.                           */
2020
    /* -------------------------------------------------------------------- */
2021
2022
0
    if (psTextChild == nullptr)
2023
0
        CPLCreateXMLNode(psRoot, CXT_Text, pszValue);
2024
0
    else
2025
0
    {
2026
0
        CPLFree(psTextChild->pszValue);
2027
0
        psTextChild->pszValue = CPLStrdup(pszValue);
2028
0
    }
2029
2030
0
    return TRUE;
2031
0
}
2032
2033
/************************************************************************/
2034
/*                        CPLStripXMLNamespace()                        */
2035
/************************************************************************/
2036
2037
/**
2038
 * \brief Strip indicated namespaces.
2039
 *
2040
 * The subdocument (psRoot) is recursively examined, and any elements
2041
 * with the indicated namespace prefix will have the namespace prefix
2042
 * stripped from the element names.  If the passed namespace is NULL, then
2043
 * all namespace prefixes will be stripped.
2044
 *
2045
 * Nodes other than elements should remain unaffected.  The changes are
2046
 * made "in place", and should not alter any node locations, only the
2047
 * pszValue field of affected nodes.
2048
 *
2049
 * @param psRoot the document to operate on.
2050
 * @param pszNamespace the name space prefix (not including colon), or NULL.
2051
 * @param bRecurse TRUE to recurse over whole document, or FALSE to only
2052
 * operate on the passed node.
2053
 */
2054
2055
void CPLStripXMLNamespace(CPLXMLNode *psRoot, const char *pszNamespace,
2056
                          int bRecurse)
2057
2058
0
{
2059
0
    size_t nNameSpaceLen = (pszNamespace) ? strlen(pszNamespace) : 0;
2060
2061
0
    while (psRoot != nullptr)
2062
0
    {
2063
0
        if (psRoot->eType == CXT_Element || psRoot->eType == CXT_Attribute)
2064
0
        {
2065
0
            if (pszNamespace != nullptr)
2066
0
            {
2067
0
                if (EQUALN(pszNamespace, psRoot->pszValue, nNameSpaceLen) &&
2068
0
                    psRoot->pszValue[nNameSpaceLen] == ':')
2069
0
                {
2070
0
                    memmove(psRoot->pszValue,
2071
0
                            psRoot->pszValue + nNameSpaceLen + 1,
2072
0
                            strlen(psRoot->pszValue + nNameSpaceLen + 1) + 1);
2073
0
                }
2074
0
            }
2075
0
            else
2076
0
            {
2077
0
                for (const char *pszCheck = psRoot->pszValue; *pszCheck != '\0';
2078
0
                     pszCheck++)
2079
0
                {
2080
0
                    if (*pszCheck == ':')
2081
0
                    {
2082
0
                        memmove(psRoot->pszValue, pszCheck + 1,
2083
0
                                strlen(pszCheck + 1) + 1);
2084
0
                        break;
2085
0
                    }
2086
0
                }
2087
0
            }
2088
0
        }
2089
2090
0
        if (bRecurse)
2091
0
        {
2092
0
            if (psRoot->psChild != nullptr)
2093
0
                CPLStripXMLNamespace(psRoot->psChild, pszNamespace, 1);
2094
2095
0
            psRoot = psRoot->psNext;
2096
0
        }
2097
0
        else
2098
0
        {
2099
0
            break;
2100
0
        }
2101
0
    }
2102
0
}
2103
2104
/************************************************************************/
2105
/*                          CPLParseXMLFile()                           */
2106
/************************************************************************/
2107
2108
/**
2109
 * \brief Parse XML file into tree.
2110
 *
2111
 * The named file is opened, loaded into memory as a big string, and
2112
 * parsed with CPLParseXMLString().  Errors in reading the file or parsing
2113
 * the XML will be reported by CPLError().
2114
 *
2115
 * The "large file" API is used, so XML files can come from virtualized
2116
 * files.
2117
 *
2118
 * @param pszFilename the file to open.
2119
 *
2120
 * @return NULL on failure, or the document tree on success.
2121
 */
2122
2123
CPLXMLNode *CPLParseXMLFile(const char *pszFilename)
2124
2125
924
{
2126
    /* -------------------------------------------------------------------- */
2127
    /*      Ingest the file.                                                */
2128
    /* -------------------------------------------------------------------- */
2129
924
    GByte *pabyOut = nullptr;
2130
924
    if (!VSIIngestFile(nullptr, pszFilename, &pabyOut, nullptr, -1))
2131
49
        return nullptr;
2132
2133
875
    char *pszDoc = reinterpret_cast<char *>(pabyOut);
2134
2135
    /* -------------------------------------------------------------------- */
2136
    /*      Parse it.                                                       */
2137
    /* -------------------------------------------------------------------- */
2138
875
    CPLXMLNode *psTree = CPLParseXMLString(pszDoc);
2139
875
    CPLFree(pszDoc);
2140
2141
875
    return psTree;
2142
924
}
2143
2144
/************************************************************************/
2145
/*                     CPLSerializeXMLTreeToFile()                      */
2146
/************************************************************************/
2147
2148
/**
2149
 * \brief Write document tree to a file.
2150
 *
2151
 * The passed document tree is converted into one big string (with
2152
 * CPLSerializeXMLTree()) and then written to the named file.  Errors writing
2153
 * the file will be reported by CPLError().  The source document tree is
2154
 * not altered.  If the output file already exists it will be overwritten.
2155
 *
2156
 * @param psTree the document tree to write.
2157
 * @param pszFilename the name of the file to write to.
2158
 * @return TRUE on success, FALSE otherwise.
2159
 */
2160
2161
int CPLSerializeXMLTreeToFile(const CPLXMLNode *psTree, const char *pszFilename)
2162
2163
0
{
2164
    /* -------------------------------------------------------------------- */
2165
    /*      Serialize document.                                             */
2166
    /* -------------------------------------------------------------------- */
2167
0
    char *pszDoc = CPLSerializeXMLTree(psTree);
2168
0
    if (pszDoc == nullptr)
2169
0
        return FALSE;
2170
2171
0
    const vsi_l_offset nLength = strlen(pszDoc);
2172
2173
    /* -------------------------------------------------------------------- */
2174
    /*      Create file.                                                    */
2175
    /* -------------------------------------------------------------------- */
2176
0
    VSILFILE *fp = VSIFOpenL(pszFilename, "wt");
2177
0
    if (fp == nullptr)
2178
0
    {
2179
0
        CPLError(CE_Failure, CPLE_OpenFailed, "Failed to open %.500s to write.",
2180
0
                 pszFilename);
2181
0
        CPLFree(pszDoc);
2182
0
        return FALSE;
2183
0
    }
2184
2185
    /* -------------------------------------------------------------------- */
2186
    /*      Write file.                                                     */
2187
    /* -------------------------------------------------------------------- */
2188
0
    if (VSIFWriteL(pszDoc, 1, static_cast<size_t>(nLength), fp) != nLength)
2189
0
    {
2190
0
        CPLError(CE_Failure, CPLE_FileIO,
2191
0
                 "Failed to write whole XML document (%.500s).", pszFilename);
2192
0
        CPL_IGNORE_RET_VAL(VSIFCloseL(fp));
2193
0
        CPLFree(pszDoc);
2194
0
        return FALSE;
2195
0
    }
2196
2197
    /* -------------------------------------------------------------------- */
2198
    /*      Cleanup                                                         */
2199
    /* -------------------------------------------------------------------- */
2200
0
    const bool bRet = VSIFCloseL(fp) == 0;
2201
0
    if (!bRet)
2202
0
    {
2203
0
        CPLError(CE_Failure, CPLE_FileIO,
2204
0
                 "Failed to write whole XML document (%.500s).", pszFilename);
2205
0
    }
2206
0
    CPLFree(pszDoc);
2207
2208
0
    return bRet;
2209
0
}
2210
2211
/************************************************************************/
2212
/*                       CPLCleanXMLElementName()                       */
2213
/************************************************************************/
2214
2215
/**
2216
 * \brief Make string into safe XML token.
2217
 *
2218
 * Modifies a string in place to try and make it into a legal
2219
 * XML token that can be used as an element name.   This is accomplished
2220
 * by changing any characters not legal in a token into an underscore.
2221
 *
2222
 * NOTE: This function should implement the rules in section 2.3 of
2223
 * http://www.w3.org/TR/xml11/ but it doesn't yet do that properly.  We
2224
 * only do a rough approximation of that.
2225
 *
2226
 * @param pszTarget the string to be adjusted.  It is altered in place.
2227
 */
2228
2229
void CPLCleanXMLElementName(char *pszTarget)
2230
0
{
2231
0
    if (pszTarget == nullptr)
2232
0
        return;
2233
2234
0
    for (; *pszTarget != '\0'; pszTarget++)
2235
0
    {
2236
0
        if ((static_cast<unsigned char>(*pszTarget) & 0x80) ||
2237
0
            isalnum(static_cast<unsigned char>(*pszTarget)) ||
2238
0
            *pszTarget == '_' || *pszTarget == '.')
2239
0
        {
2240
            // Ok.
2241
0
        }
2242
0
        else
2243
0
        {
2244
0
            *pszTarget = '_';
2245
0
        }
2246
0
    }
2247
0
}
2248
2249
/************************************************************************/
2250
/*                     CPLXMLNodeGetRAMUsageEstimate()                  */
2251
/************************************************************************/
2252
2253
static size_t CPLXMLNodeGetRAMUsageEstimate(const CPLXMLNode *psNode,
2254
                                            bool bVisitSiblings)
2255
0
{
2256
0
    size_t nRet = sizeof(CPLXMLNode);
2257
    // malloc() aligns on 16-byte boundaries on 64 bit.
2258
0
    nRet += std::max(2 * sizeof(void *), strlen(psNode->pszValue) + 1);
2259
0
    if (bVisitSiblings)
2260
0
    {
2261
0
        for (const CPLXMLNode *psIter = psNode->psNext; psIter;
2262
0
             psIter = psIter->psNext)
2263
0
        {
2264
0
            nRet += CPLXMLNodeGetRAMUsageEstimate(psIter, false);
2265
0
        }
2266
0
    }
2267
0
    if (psNode->psChild)
2268
0
    {
2269
0
        nRet += CPLXMLNodeGetRAMUsageEstimate(psNode->psChild, true);
2270
0
    }
2271
0
    return nRet;
2272
0
}
2273
2274
/** Return a conservative estimate of the RAM usage of this node, its children
2275
 * and siblings. The returned values is in bytes.
2276
 *
2277
 * @since 3.9
2278
 */
2279
size_t CPLXMLNodeGetRAMUsageEstimate(const CPLXMLNode *psNode)
2280
0
{
2281
0
    return CPLXMLNodeGetRAMUsageEstimate(psNode, true);
2282
0
}
2283
2284
/************************************************************************/
2285
/*            CPLXMLTreeCloser::getDocumentElement()                    */
2286
/************************************************************************/
2287
2288
CPLXMLNode *CPLXMLTreeCloser::getDocumentElement()
2289
0
{
2290
0
    CPLXMLNode *doc = get();
2291
    // skip the Declaration and assume the next is the root element
2292
0
    while (doc != nullptr &&
2293
0
           (doc->eType != CXT_Element || doc->pszValue[0] == '?'))
2294
0
    {
2295
0
        doc = doc->psNext;
2296
0
    }
2297
0
    return doc;
2298
0
}