Coverage Report

Created: 2025-06-13 06:18

/src/gdal/port/cplkeywordparser.cpp
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 *
3
 * Project:  Common Portability Library
4
 * Purpose:  Implementation of CPLKeywordParser - a class for parsing
5
 *           the keyword format used for files like QuickBird .RPB files.
6
 *           This is a slight variation on the NASAKeywordParser used for
7
 *           the PDS/ISIS2/ISIS3 formats.
8
 * Author:   Frank Warmerdam <warmerdam@pobox.com
9
 *
10
 ******************************************************************************
11
 * Copyright (c) 2008, Frank Warmerdam <warmerdam@pobox.com>
12
 * Copyright (c) 2009-2010, Even Rouault <even dot rouault at spatialys.com>
13
 *
14
 * SPDX-License-Identifier: MIT
15
 ****************************************************************************/
16
17
//! @cond Doxygen_Suppress
18
19
#include "cpl_port.h"
20
#include "cplkeywordparser.h"
21
22
#include <cctype>
23
#include <cstring>
24
#include <string>
25
26
#include "cpl_string.h"
27
#include "cpl_vsi.h"
28
29
/************************************************************************/
30
/* ==================================================================== */
31
/*                          CPLKeywordParser                           */
32
/* ==================================================================== */
33
/************************************************************************/
34
35
/************************************************************************/
36
/*                         CPLKeywordParser()                          */
37
/************************************************************************/
38
39
0
CPLKeywordParser::CPLKeywordParser() = default;
40
41
/************************************************************************/
42
/*                        ~CPLKeywordParser()                          */
43
/************************************************************************/
44
45
CPLKeywordParser::~CPLKeywordParser()
46
47
0
{
48
0
    CSLDestroy(papszKeywordList);
49
0
    papszKeywordList = nullptr;
50
0
}
51
52
/************************************************************************/
53
/*                               Ingest()                               */
54
/************************************************************************/
55
56
int CPLKeywordParser::Ingest(VSILFILE *fp)
57
58
0
{
59
    /* -------------------------------------------------------------------- */
60
    /*      Read in buffer till we find END all on its own line.            */
61
    /* -------------------------------------------------------------------- */
62
0
    for (; true;)
63
0
    {
64
0
        char szChunk[513] = {};
65
0
        const size_t nBytesRead = VSIFReadL(szChunk, 1, 512, fp);
66
67
0
        szChunk[nBytesRead] = '\0';
68
0
        osHeaderText += szChunk;
69
70
0
        if (nBytesRead < 512)
71
0
            break;
72
73
0
        const char *pszCheck = nullptr;
74
0
        if (osHeaderText.size() > 520)
75
0
            pszCheck = osHeaderText.c_str() + (osHeaderText.size() - 520);
76
0
        else
77
0
            pszCheck = szChunk;
78
79
0
        if (strstr(pszCheck, "\r\nEND;\r\n") != nullptr ||
80
0
            strstr(pszCheck, "\nEND;\n") != nullptr)
81
0
            break;
82
0
    }
83
84
0
    pszHeaderNext = osHeaderText.c_str();
85
86
    /* -------------------------------------------------------------------- */
87
    /*      Process name/value pairs, keeping track of a "path stack".      */
88
    /* -------------------------------------------------------------------- */
89
0
    return ReadGroup("", 0);
90
0
}
91
92
/************************************************************************/
93
/*                             ReadGroup()                              */
94
/************************************************************************/
95
96
bool CPLKeywordParser::ReadGroup(const char *pszPathPrefix, int nRecLevel)
97
98
0
{
99
0
    CPLString osName;
100
0
    CPLString osValue;
101
102
    // Arbitrary threshold to avoid stack overflow
103
0
    if (nRecLevel == 100)
104
0
        return false;
105
106
0
    for (; true;)
107
0
    {
108
0
        if (!ReadPair(osName, osValue))
109
0
            return false;
110
111
0
        if (EQUAL(osName, "BEGIN_GROUP") || EQUAL(osName, "GROUP"))
112
0
        {
113
0
            if (!ReadGroup((CPLString(pszPathPrefix) + osValue + ".").c_str(),
114
0
                           nRecLevel + 1))
115
0
                return false;
116
0
        }
117
0
        else if (STARTS_WITH_CI(osName, "END"))
118
0
        {
119
0
            return true;
120
0
        }
121
0
        else
122
0
        {
123
0
            osName = pszPathPrefix + osName;
124
0
            papszKeywordList =
125
0
                CSLSetNameValue(papszKeywordList, osName, osValue);
126
0
        }
127
0
    }
128
0
}
129
130
/************************************************************************/
131
/*                              ReadPair()                              */
132
/*                                                                      */
133
/*      Read a name/value pair from the input stream.  Strip off        */
134
/*      white space, ignore comments, split on '='.                     */
135
/************************************************************************/
136
137
bool CPLKeywordParser::ReadPair(CPLString &osName, CPLString &osValue)
138
139
0
{
140
0
    osName = "";
141
0
    osValue = "";
142
143
0
    if (!ReadWord(osName))
144
0
        return false;
145
146
0
    SkipWhite();
147
148
0
    if (EQUAL(osName, "END"))
149
0
        return TRUE;
150
151
0
    if (*pszHeaderNext != '=')
152
0
    {
153
        // ISIS3 does not have anything after the end group/object keyword.
154
0
        return EQUAL(osName, "End_Group") || EQUAL(osName, "End_Object");
155
0
    }
156
157
0
    pszHeaderNext++;
158
159
0
    SkipWhite();
160
161
0
    osValue = "";
162
163
    // Handle value lists like:     Name   = (Red, Red)
164
    // or list of lists like: TLCList = ( (0, 0.000000), (8299, 4.811014) );
165
0
    if (*pszHeaderNext == '(')
166
0
    {
167
0
        CPLString osWord;
168
0
        int nDepth = 0;
169
0
        const char *pszLastPos = pszHeaderNext;
170
171
0
        while (ReadWord(osWord) && pszLastPos != pszHeaderNext)
172
0
        {
173
0
            SkipWhite();
174
0
            pszLastPos = pszHeaderNext;
175
176
0
            osValue += osWord;
177
0
            const char *pszIter = osWord.c_str();
178
0
            bool bInQuote = false;
179
0
            while (*pszIter != '\0')
180
0
            {
181
0
                if (*pszIter == '"')
182
0
                    bInQuote = !bInQuote;
183
0
                else if (!bInQuote)
184
0
                {
185
0
                    if (*pszIter == '(')
186
0
                        nDepth++;
187
0
                    else if (*pszIter == ')')
188
0
                    {
189
0
                        nDepth--;
190
0
                        if (nDepth == 0)
191
0
                            break;
192
0
                    }
193
0
                }
194
0
                pszIter++;
195
0
            }
196
0
            if (*pszIter == ')' && nDepth == 0)
197
0
                break;
198
0
        }
199
0
    }
200
201
0
    else  // Handle more normal "single word" values.
202
0
    {
203
        // Special case to handle non-conformant IMD files generated by
204
        // previous GDAL version where we omit to surround values that have
205
        // spaces with double quotes.
206
        // So we use a heuristics to handle things like:
207
        //       key = value with spaces without single or double quotes at
208
        //       beginning of value;[\r]\n
209
0
        const char *pszNextLF = strchr(pszHeaderNext, '\n');
210
0
        if (pszNextLF)
211
0
        {
212
0
            std::string osTxt(pszHeaderNext, pszNextLF - pszHeaderNext);
213
0
            const auto nCRPos = osTxt.find('\r');
214
0
            const auto nSemiColonPos = osTxt.find(';');
215
0
            const auto nQuotePos = osTxt.find('\'');
216
0
            const auto nDoubleQuotePos = osTxt.find('"');
217
0
            const auto nLTPos = osTxt.find('<');
218
0
            if (nSemiColonPos != std::string::npos &&
219
0
                (nCRPos == std::string::npos || (nCRPos + 1 == osTxt.size())) &&
220
0
                ((nCRPos != std::string::npos &&
221
0
                  (nSemiColonPos + 1 == nCRPos)) ||
222
0
                 (nCRPos == std::string::npos &&
223
0
                  (nSemiColonPos + 1 == osTxt.size()))) &&
224
0
                (nQuotePos == std::string::npos || nQuotePos != 0) &&
225
0
                (nDoubleQuotePos == std::string::npos ||
226
0
                 nDoubleQuotePos != 0) &&
227
0
                (nLTPos == std::string::npos ||
228
0
                 osTxt.find('>') == std::string::npos))
229
0
            {
230
0
                pszHeaderNext = pszNextLF;
231
0
                osTxt.resize(nSemiColonPos);
232
0
                osValue = osTxt;
233
0
                while (!osValue.empty() && osValue.back() == ' ')
234
0
                    osValue.pop_back();
235
0
                return true;
236
0
            }
237
0
        }
238
239
0
        if (!ReadWord(osValue))
240
0
            return false;
241
0
    }
242
243
0
    SkipWhite();
244
245
    // No units keyword?
246
0
    if (*pszHeaderNext != '<')
247
0
        return true;
248
249
    // Append units keyword.  For lines that like like this:
250
    //  MAP_RESOLUTION               = 4.0 <PIXEL/DEGREE>
251
252
0
    CPLString osWord;
253
254
0
    osValue += " ";
255
256
0
    while (ReadWord(osWord))
257
0
    {
258
0
        SkipWhite();
259
260
0
        osValue += osWord;
261
0
        if (osWord.back() == '>')
262
0
            break;
263
0
    }
264
265
0
    return true;
266
0
}
267
268
/************************************************************************/
269
/*                              ReadWord()                              */
270
/************************************************************************/
271
272
bool CPLKeywordParser::ReadWord(CPLString &osWord)
273
274
0
{
275
0
    osWord = "";
276
277
0
    SkipWhite();
278
279
0
    if (*pszHeaderNext == '\0' || *pszHeaderNext == '=')
280
0
        return false;
281
282
0
    while (*pszHeaderNext != '\0' && *pszHeaderNext != '=' &&
283
0
           *pszHeaderNext != ';' &&
284
0
           !isspace(static_cast<unsigned char>(*pszHeaderNext)))
285
0
    {
286
0
        if (*pszHeaderNext == '"')
287
0
        {
288
0
            osWord += *(pszHeaderNext++);
289
0
            while (*pszHeaderNext != '"')
290
0
            {
291
0
                if (*pszHeaderNext == '\0')
292
0
                    return false;
293
294
0
                osWord += *(pszHeaderNext++);
295
0
            }
296
0
            osWord += *(pszHeaderNext++);
297
0
        }
298
0
        else if (*pszHeaderNext == '\'')
299
0
        {
300
0
            osWord += *(pszHeaderNext++);
301
0
            while (*pszHeaderNext != '\'')
302
0
            {
303
0
                if (*pszHeaderNext == '\0')
304
0
                    return false;
305
306
0
                osWord += *(pszHeaderNext++);
307
0
            }
308
0
            osWord += *(pszHeaderNext++);
309
0
        }
310
0
        else
311
0
        {
312
0
            osWord += *pszHeaderNext;
313
0
            pszHeaderNext++;
314
0
        }
315
0
    }
316
317
0
    if (*pszHeaderNext == ';')
318
0
        pszHeaderNext++;
319
320
0
    return true;
321
0
}
322
323
/************************************************************************/
324
/*                             SkipWhite()                              */
325
/************************************************************************/
326
327
void CPLKeywordParser::SkipWhite()
328
329
0
{
330
0
    for (; true;)
331
0
    {
332
        // Skip white space (newline, space, tab, etc )
333
0
        if (isspace(static_cast<unsigned char>(*pszHeaderNext)))
334
0
        {
335
0
            pszHeaderNext++;
336
0
            continue;
337
0
        }
338
339
        // Skip C style comments
340
0
        if (*pszHeaderNext == '/' && pszHeaderNext[1] == '*')
341
0
        {
342
0
            pszHeaderNext += 2;
343
344
0
            while (*pszHeaderNext != '\0' &&
345
0
                   (*pszHeaderNext != '*' || pszHeaderNext[1] != '/'))
346
0
            {
347
0
                pszHeaderNext++;
348
0
            }
349
0
            if (*pszHeaderNext == '\0')
350
0
                break;
351
352
0
            pszHeaderNext += 2;
353
0
            continue;
354
0
        }
355
356
        // Skip # style comments
357
0
        if (*pszHeaderNext == '#')
358
0
        {
359
0
            pszHeaderNext += 1;
360
361
            // consume till end of line.
362
0
            while (*pszHeaderNext != '\0' && *pszHeaderNext != 10 &&
363
0
                   *pszHeaderNext != 13)
364
0
            {
365
0
                pszHeaderNext++;
366
0
            }
367
0
            continue;
368
0
        }
369
370
        // not white space, return.
371
0
        return;
372
0
    }
373
0
}
374
375
/************************************************************************/
376
/*                             GetKeyword()                             */
377
/************************************************************************/
378
379
const char *CPLKeywordParser::GetKeyword(const char *pszPath,
380
                                         const char *pszDefault)
381
382
0
{
383
0
    const char *pszResult = CSLFetchNameValue(papszKeywordList, pszPath);
384
0
    if (pszResult == nullptr)
385
0
        return pszDefault;
386
387
0
    return pszResult;
388
0
}
389
390
//! @endcond