/src/gdal/port/cplkeywordparser.cpp
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /******************************************************************************  | 
2  |  |  *  | 
3  |  |  * Project:  Common Portability Library  | 
4  |  |  * Purpose:  Implementation of CPLKeywordParser - a class for parsing  | 
5  |  |  *           the keyword format used for files like QuickBird .RPB files.  | 
6  |  |  *           This is a slight variation on the NASAKeywordParser used for  | 
7  |  |  *           the PDS/ISIS2/ISIS3 formats.  | 
8  |  |  * Author:   Frank Warmerdam <warmerdam@pobox.com  | 
9  |  |  *  | 
10  |  |  ******************************************************************************  | 
11  |  |  * Copyright (c) 2008, Frank Warmerdam <warmerdam@pobox.com>  | 
12  |  |  * Copyright (c) 2009-2010, Even Rouault <even dot rouault at spatialys.com>  | 
13  |  |  *  | 
14  |  |  * SPDX-License-Identifier: MIT  | 
15  |  |  ****************************************************************************/  | 
16  |  |  | 
17  |  | //! @cond Doxygen_Suppress  | 
18  |  |  | 
19  |  | #include "cpl_port.h"  | 
20  |  | #include "cplkeywordparser.h"  | 
21  |  |  | 
22  |  | #include <cctype>  | 
23  |  | #include <cstring>  | 
24  |  | #include <string>  | 
25  |  |  | 
26  |  | #include "cpl_string.h"  | 
27  |  | #include "cpl_vsi.h"  | 
28  |  |  | 
29  |  | /************************************************************************/  | 
30  |  | /* ==================================================================== */  | 
31  |  | /*                          CPLKeywordParser                           */  | 
32  |  | /* ==================================================================== */  | 
33  |  | /************************************************************************/  | 
34  |  |  | 
35  |  | /************************************************************************/  | 
36  |  | /*                         CPLKeywordParser()                          */  | 
37  |  | /************************************************************************/  | 
38  |  |  | 
39  | 0  | CPLKeywordParser::CPLKeywordParser() = default;  | 
40  |  |  | 
41  |  | /************************************************************************/  | 
42  |  | /*                        ~CPLKeywordParser()                          */  | 
43  |  | /************************************************************************/  | 
44  |  |  | 
45  |  | CPLKeywordParser::~CPLKeywordParser()  | 
46  |  |  | 
47  | 0  | { | 
48  | 0  |     CSLDestroy(papszKeywordList);  | 
49  | 0  |     papszKeywordList = nullptr;  | 
50  | 0  | }  | 
51  |  |  | 
52  |  | /************************************************************************/  | 
53  |  | /*                               Ingest()                               */  | 
54  |  | /************************************************************************/  | 
55  |  |  | 
56  |  | int CPLKeywordParser::Ingest(VSILFILE *fp)  | 
57  |  |  | 
58  | 0  | { | 
59  |  |     /* -------------------------------------------------------------------- */  | 
60  |  |     /*      Read in buffer till we find END all on its own line.            */  | 
61  |  |     /* -------------------------------------------------------------------- */  | 
62  | 0  |     for (; true;)  | 
63  | 0  |     { | 
64  | 0  |         char szChunk[513] = {}; | 
65  | 0  |         const size_t nBytesRead = VSIFReadL(szChunk, 1, 512, fp);  | 
66  |  | 
  | 
67  | 0  |         szChunk[nBytesRead] = '\0';  | 
68  | 0  |         osHeaderText += szChunk;  | 
69  |  | 
  | 
70  | 0  |         if (nBytesRead < 512)  | 
71  | 0  |             break;  | 
72  |  |  | 
73  | 0  |         const char *pszCheck = nullptr;  | 
74  | 0  |         if (osHeaderText.size() > 520)  | 
75  | 0  |             pszCheck = osHeaderText.c_str() + (osHeaderText.size() - 520);  | 
76  | 0  |         else  | 
77  | 0  |             pszCheck = szChunk;  | 
78  |  | 
  | 
79  | 0  |         if (strstr(pszCheck, "\r\nEND;\r\n") != nullptr ||  | 
80  | 0  |             strstr(pszCheck, "\nEND;\n") != nullptr)  | 
81  | 0  |             break;  | 
82  | 0  |     }  | 
83  |  | 
  | 
84  | 0  |     pszHeaderNext = osHeaderText.c_str();  | 
85  |  |  | 
86  |  |     /* -------------------------------------------------------------------- */  | 
87  |  |     /*      Process name/value pairs, keeping track of a "path stack".      */  | 
88  |  |     /* -------------------------------------------------------------------- */  | 
89  | 0  |     return ReadGroup("", 0); | 
90  | 0  | }  | 
91  |  |  | 
92  |  | /************************************************************************/  | 
93  |  | /*                             ReadGroup()                              */  | 
94  |  | /************************************************************************/  | 
95  |  |  | 
96  |  | bool CPLKeywordParser::ReadGroup(const char *pszPathPrefix, int nRecLevel)  | 
97  |  |  | 
98  | 0  | { | 
99  | 0  |     CPLString osName;  | 
100  | 0  |     CPLString osValue;  | 
101  |  |  | 
102  |  |     // Arbitrary threshold to avoid stack overflow  | 
103  | 0  |     if (nRecLevel == 100)  | 
104  | 0  |         return false;  | 
105  |  |  | 
106  | 0  |     for (; true;)  | 
107  | 0  |     { | 
108  | 0  |         if (!ReadPair(osName, osValue))  | 
109  | 0  |             return false;  | 
110  |  |  | 
111  | 0  |         if (EQUAL(osName, "BEGIN_GROUP") || EQUAL(osName, "GROUP"))  | 
112  | 0  |         { | 
113  | 0  |             if (!ReadGroup((CPLString(pszPathPrefix) + osValue + ".").c_str(),  | 
114  | 0  |                            nRecLevel + 1))  | 
115  | 0  |                 return false;  | 
116  | 0  |         }  | 
117  | 0  |         else if (STARTS_WITH_CI(osName, "END"))  | 
118  | 0  |         { | 
119  | 0  |             return true;  | 
120  | 0  |         }  | 
121  | 0  |         else  | 
122  | 0  |         { | 
123  | 0  |             osName = pszPathPrefix + osName;  | 
124  | 0  |             papszKeywordList =  | 
125  | 0  |                 CSLSetNameValue(papszKeywordList, osName, osValue);  | 
126  | 0  |         }  | 
127  | 0  |     }  | 
128  | 0  | }  | 
129  |  |  | 
130  |  | /************************************************************************/  | 
131  |  | /*                              ReadPair()                              */  | 
132  |  | /*                                                                      */  | 
133  |  | /*      Read a name/value pair from the input stream.  Strip off        */  | 
134  |  | /*      white space, ignore comments, split on '='.                     */  | 
135  |  | /************************************************************************/  | 
136  |  |  | 
137  |  | bool CPLKeywordParser::ReadPair(CPLString &osName, CPLString &osValue)  | 
138  |  |  | 
139  | 0  | { | 
140  | 0  |     osName = "";  | 
141  | 0  |     osValue = "";  | 
142  |  | 
  | 
143  | 0  |     if (!ReadWord(osName))  | 
144  | 0  |         return false;  | 
145  |  |  | 
146  | 0  |     SkipWhite();  | 
147  |  | 
  | 
148  | 0  |     if (EQUAL(osName, "END"))  | 
149  | 0  |         return TRUE;  | 
150  |  |  | 
151  | 0  |     if (*pszHeaderNext != '=')  | 
152  | 0  |     { | 
153  |  |         // ISIS3 does not have anything after the end group/object keyword.  | 
154  | 0  |         return EQUAL(osName, "End_Group") || EQUAL(osName, "End_Object");  | 
155  | 0  |     }  | 
156  |  |  | 
157  | 0  |     pszHeaderNext++;  | 
158  |  | 
  | 
159  | 0  |     SkipWhite();  | 
160  |  | 
  | 
161  | 0  |     osValue = "";  | 
162  |  |  | 
163  |  |     // Handle value lists like:     Name   = (Red, Red)  | 
164  |  |     // or list of lists like: TLCList = ( (0, 0.000000), (8299, 4.811014) );  | 
165  | 0  |     if (*pszHeaderNext == '(') | 
166  | 0  |     { | 
167  | 0  |         CPLString osWord;  | 
168  | 0  |         int nDepth = 0;  | 
169  | 0  |         const char *pszLastPos = pszHeaderNext;  | 
170  |  | 
  | 
171  | 0  |         while (ReadWord(osWord) && pszLastPos != pszHeaderNext)  | 
172  | 0  |         { | 
173  | 0  |             SkipWhite();  | 
174  | 0  |             pszLastPos = pszHeaderNext;  | 
175  |  | 
  | 
176  | 0  |             osValue += osWord;  | 
177  | 0  |             const char *pszIter = osWord.c_str();  | 
178  | 0  |             bool bInQuote = false;  | 
179  | 0  |             while (*pszIter != '\0')  | 
180  | 0  |             { | 
181  | 0  |                 if (*pszIter == '"')  | 
182  | 0  |                     bInQuote = !bInQuote;  | 
183  | 0  |                 else if (!bInQuote)  | 
184  | 0  |                 { | 
185  | 0  |                     if (*pszIter == '(') | 
186  | 0  |                         nDepth++;  | 
187  | 0  |                     else if (*pszIter == ')')  | 
188  | 0  |                     { | 
189  | 0  |                         nDepth--;  | 
190  | 0  |                         if (nDepth == 0)  | 
191  | 0  |                             break;  | 
192  | 0  |                     }  | 
193  | 0  |                 }  | 
194  | 0  |                 pszIter++;  | 
195  | 0  |             }  | 
196  | 0  |             if (*pszIter == ')' && nDepth == 0)  | 
197  | 0  |                 break;  | 
198  | 0  |         }  | 
199  | 0  |     }  | 
200  |  |  | 
201  | 0  |     else  // Handle more normal "single word" values.  | 
202  | 0  |     { | 
203  |  |         // Special case to handle non-conformant IMD files generated by  | 
204  |  |         // previous GDAL version where we omit to surround values that have  | 
205  |  |         // spaces with double quotes.  | 
206  |  |         // So we use a heuristics to handle things like:  | 
207  |  |         //       key = value with spaces without single or double quotes at  | 
208  |  |         //       beginning of value;[\r]\n  | 
209  | 0  |         const char *pszNextLF = strchr(pszHeaderNext, '\n');  | 
210  | 0  |         if (pszNextLF)  | 
211  | 0  |         { | 
212  | 0  |             std::string osTxt(pszHeaderNext, pszNextLF - pszHeaderNext);  | 
213  | 0  |             const auto nCRPos = osTxt.find('\r'); | 
214  | 0  |             const auto nSemiColonPos = osTxt.find(';'); | 
215  | 0  |             const auto nQuotePos = osTxt.find('\''); | 
216  | 0  |             const auto nDoubleQuotePos = osTxt.find('"'); | 
217  | 0  |             const auto nLTPos = osTxt.find('<'); | 
218  | 0  |             if (nSemiColonPos != std::string::npos &&  | 
219  | 0  |                 (nCRPos == std::string::npos || (nCRPos + 1 == osTxt.size())) &&  | 
220  | 0  |                 ((nCRPos != std::string::npos &&  | 
221  | 0  |                   (nSemiColonPos + 1 == nCRPos)) ||  | 
222  | 0  |                  (nCRPos == std::string::npos &&  | 
223  | 0  |                   (nSemiColonPos + 1 == osTxt.size()))) &&  | 
224  | 0  |                 (nQuotePos == std::string::npos || nQuotePos != 0) &&  | 
225  | 0  |                 (nDoubleQuotePos == std::string::npos ||  | 
226  | 0  |                  nDoubleQuotePos != 0) &&  | 
227  | 0  |                 (nLTPos == std::string::npos ||  | 
228  | 0  |                  osTxt.find('>') == std::string::npos)) | 
229  | 0  |             { | 
230  | 0  |                 pszHeaderNext = pszNextLF;  | 
231  | 0  |                 osTxt.resize(nSemiColonPos);  | 
232  | 0  |                 osValue = osTxt;  | 
233  | 0  |                 while (!osValue.empty() && osValue.back() == ' ')  | 
234  | 0  |                     osValue.pop_back();  | 
235  | 0  |                 return true;  | 
236  | 0  |             }  | 
237  | 0  |         }  | 
238  |  |  | 
239  | 0  |         if (!ReadWord(osValue))  | 
240  | 0  |             return false;  | 
241  | 0  |     }  | 
242  |  |  | 
243  | 0  |     SkipWhite();  | 
244  |  |  | 
245  |  |     // No units keyword?  | 
246  | 0  |     if (*pszHeaderNext != '<')  | 
247  | 0  |         return true;  | 
248  |  |  | 
249  |  |     // Append units keyword.  For lines that like like this:  | 
250  |  |     //  MAP_RESOLUTION               = 4.0 <PIXEL/DEGREE>  | 
251  |  |  | 
252  | 0  |     CPLString osWord;  | 
253  |  | 
  | 
254  | 0  |     osValue += " ";  | 
255  |  | 
  | 
256  | 0  |     while (ReadWord(osWord))  | 
257  | 0  |     { | 
258  | 0  |         SkipWhite();  | 
259  |  | 
  | 
260  | 0  |         osValue += osWord;  | 
261  | 0  |         if (osWord.back() == '>')  | 
262  | 0  |             break;  | 
263  | 0  |     }  | 
264  |  | 
  | 
265  | 0  |     return true;  | 
266  | 0  | }  | 
267  |  |  | 
268  |  | /************************************************************************/  | 
269  |  | /*                              ReadWord()                              */  | 
270  |  | /************************************************************************/  | 
271  |  |  | 
272  |  | bool CPLKeywordParser::ReadWord(CPLString &osWord)  | 
273  |  |  | 
274  | 0  | { | 
275  | 0  |     osWord = "";  | 
276  |  | 
  | 
277  | 0  |     SkipWhite();  | 
278  |  | 
  | 
279  | 0  |     if (*pszHeaderNext == '\0' || *pszHeaderNext == '=')  | 
280  | 0  |         return false;  | 
281  |  |  | 
282  | 0  |     while (*pszHeaderNext != '\0' && *pszHeaderNext != '=' &&  | 
283  | 0  |            *pszHeaderNext != ';' &&  | 
284  | 0  |            !isspace(static_cast<unsigned char>(*pszHeaderNext)))  | 
285  | 0  |     { | 
286  | 0  |         if (*pszHeaderNext == '"')  | 
287  | 0  |         { | 
288  | 0  |             osWord += *(pszHeaderNext++);  | 
289  | 0  |             while (*pszHeaderNext != '"')  | 
290  | 0  |             { | 
291  | 0  |                 if (*pszHeaderNext == '\0')  | 
292  | 0  |                     return false;  | 
293  |  |  | 
294  | 0  |                 osWord += *(pszHeaderNext++);  | 
295  | 0  |             }  | 
296  | 0  |             osWord += *(pszHeaderNext++);  | 
297  | 0  |         }  | 
298  | 0  |         else if (*pszHeaderNext == '\'')  | 
299  | 0  |         { | 
300  | 0  |             osWord += *(pszHeaderNext++);  | 
301  | 0  |             while (*pszHeaderNext != '\'')  | 
302  | 0  |             { | 
303  | 0  |                 if (*pszHeaderNext == '\0')  | 
304  | 0  |                     return false;  | 
305  |  |  | 
306  | 0  |                 osWord += *(pszHeaderNext++);  | 
307  | 0  |             }  | 
308  | 0  |             osWord += *(pszHeaderNext++);  | 
309  | 0  |         }  | 
310  | 0  |         else  | 
311  | 0  |         { | 
312  | 0  |             osWord += *pszHeaderNext;  | 
313  | 0  |             pszHeaderNext++;  | 
314  | 0  |         }  | 
315  | 0  |     }  | 
316  |  |  | 
317  | 0  |     if (*pszHeaderNext == ';')  | 
318  | 0  |         pszHeaderNext++;  | 
319  |  | 
  | 
320  | 0  |     return true;  | 
321  | 0  | }  | 
322  |  |  | 
323  |  | /************************************************************************/  | 
324  |  | /*                             SkipWhite()                              */  | 
325  |  | /************************************************************************/  | 
326  |  |  | 
327  |  | void CPLKeywordParser::SkipWhite()  | 
328  |  |  | 
329  | 0  | { | 
330  | 0  |     for (; true;)  | 
331  | 0  |     { | 
332  |  |         // Skip white space (newline, space, tab, etc )  | 
333  | 0  |         if (isspace(static_cast<unsigned char>(*pszHeaderNext)))  | 
334  | 0  |         { | 
335  | 0  |             pszHeaderNext++;  | 
336  | 0  |             continue;  | 
337  | 0  |         }  | 
338  |  |  | 
339  |  |         // Skip C style comments  | 
340  | 0  |         if (*pszHeaderNext == '/' && pszHeaderNext[1] == '*')  | 
341  | 0  |         { | 
342  | 0  |             pszHeaderNext += 2;  | 
343  |  | 
  | 
344  | 0  |             while (*pszHeaderNext != '\0' &&  | 
345  | 0  |                    (*pszHeaderNext != '*' || pszHeaderNext[1] != '/'))  | 
346  | 0  |             { | 
347  | 0  |                 pszHeaderNext++;  | 
348  | 0  |             }  | 
349  | 0  |             if (*pszHeaderNext == '\0')  | 
350  | 0  |                 break;  | 
351  |  |  | 
352  | 0  |             pszHeaderNext += 2;  | 
353  | 0  |             continue;  | 
354  | 0  |         }  | 
355  |  |  | 
356  |  |         // Skip # style comments  | 
357  | 0  |         if (*pszHeaderNext == '#')  | 
358  | 0  |         { | 
359  | 0  |             pszHeaderNext += 1;  | 
360  |  |  | 
361  |  |             // consume till end of line.  | 
362  | 0  |             while (*pszHeaderNext != '\0' && *pszHeaderNext != 10 &&  | 
363  | 0  |                    *pszHeaderNext != 13)  | 
364  | 0  |             { | 
365  | 0  |                 pszHeaderNext++;  | 
366  | 0  |             }  | 
367  | 0  |             continue;  | 
368  | 0  |         }  | 
369  |  |  | 
370  |  |         // not white space, return.  | 
371  | 0  |         return;  | 
372  | 0  |     }  | 
373  | 0  | }  | 
374  |  |  | 
375  |  | /************************************************************************/  | 
376  |  | /*                             GetKeyword()                             */  | 
377  |  | /************************************************************************/  | 
378  |  |  | 
379  |  | const char *CPLKeywordParser::GetKeyword(const char *pszPath,  | 
380  |  |                                          const char *pszDefault)  | 
381  |  |  | 
382  | 0  | { | 
383  | 0  |     const char *pszResult = CSLFetchNameValue(papszKeywordList, pszPath);  | 
384  | 0  |     if (pszResult == nullptr)  | 
385  | 0  |         return pszDefault;  | 
386  |  |  | 
387  | 0  |     return pszResult;  | 
388  | 0  | }  | 
389  |  |  | 
390  |  | //! @endcond  |