/src/gdal/port/cplkeywordparser.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Project: Common Portability Library |
4 | | * Purpose: Implementation of CPLKeywordParser - a class for parsing |
5 | | * the keyword format used for files like QuickBird .RPB files. |
6 | | * This is a slight variation on the NASAKeywordParser used for |
7 | | * the PDS/ISIS2/ISIS3 formats. |
8 | | * Author: Frank Warmerdam <warmerdam@pobox.com |
9 | | * |
10 | | ****************************************************************************** |
11 | | * Copyright (c) 2008, Frank Warmerdam <warmerdam@pobox.com> |
12 | | * Copyright (c) 2009-2010, Even Rouault <even dot rouault at spatialys.com> |
13 | | * |
14 | | * SPDX-License-Identifier: MIT |
15 | | ****************************************************************************/ |
16 | | |
17 | | //! @cond Doxygen_Suppress |
18 | | |
19 | | #include "cpl_port.h" |
20 | | #include "cplkeywordparser.h" |
21 | | |
22 | | #include <cctype> |
23 | | #include <cstring> |
24 | | #include <string> |
25 | | |
26 | | #include "cpl_string.h" |
27 | | #include "cpl_vsi.h" |
28 | | |
29 | | /************************************************************************/ |
30 | | /* ==================================================================== */ |
31 | | /* CPLKeywordParser */ |
32 | | /* ==================================================================== */ |
33 | | /************************************************************************/ |
34 | | |
35 | | /************************************************************************/ |
36 | | /* CPLKeywordParser() */ |
37 | | /************************************************************************/ |
38 | | |
39 | 0 | CPLKeywordParser::CPLKeywordParser() = default; |
40 | | |
41 | | /************************************************************************/ |
42 | | /* ~CPLKeywordParser() */ |
43 | | /************************************************************************/ |
44 | | |
45 | | CPLKeywordParser::~CPLKeywordParser() |
46 | | |
47 | 0 | { |
48 | 0 | CSLDestroy(papszKeywordList); |
49 | 0 | papszKeywordList = nullptr; |
50 | 0 | } |
51 | | |
52 | | /************************************************************************/ |
53 | | /* Ingest() */ |
54 | | /************************************************************************/ |
55 | | |
56 | | int CPLKeywordParser::Ingest(VSILFILE *fp) |
57 | | |
58 | 0 | { |
59 | | /* -------------------------------------------------------------------- */ |
60 | | /* Read in buffer till we find END all on its own line. */ |
61 | | /* -------------------------------------------------------------------- */ |
62 | 0 | for (; true;) |
63 | 0 | { |
64 | 0 | char szChunk[513] = {}; |
65 | 0 | const size_t nBytesRead = VSIFReadL(szChunk, 1, 512, fp); |
66 | |
|
67 | 0 | szChunk[nBytesRead] = '\0'; |
68 | 0 | osHeaderText += szChunk; |
69 | |
|
70 | 0 | if (nBytesRead < 512) |
71 | 0 | break; |
72 | | |
73 | 0 | const char *pszCheck = nullptr; |
74 | 0 | if (osHeaderText.size() > 520) |
75 | 0 | pszCheck = osHeaderText.c_str() + (osHeaderText.size() - 520); |
76 | 0 | else |
77 | 0 | pszCheck = szChunk; |
78 | |
|
79 | 0 | if (strstr(pszCheck, "\r\nEND;\r\n") != nullptr || |
80 | 0 | strstr(pszCheck, "\nEND;\n") != nullptr) |
81 | 0 | break; |
82 | 0 | } |
83 | |
|
84 | 0 | pszHeaderNext = osHeaderText.c_str(); |
85 | | |
86 | | /* -------------------------------------------------------------------- */ |
87 | | /* Process name/value pairs, keeping track of a "path stack". */ |
88 | | /* -------------------------------------------------------------------- */ |
89 | 0 | return ReadGroup("", 0); |
90 | 0 | } |
91 | | |
92 | | /************************************************************************/ |
93 | | /* ReadGroup() */ |
94 | | /************************************************************************/ |
95 | | |
96 | | bool CPLKeywordParser::ReadGroup(const char *pszPathPrefix, int nRecLevel) |
97 | | |
98 | 0 | { |
99 | 0 | CPLString osName; |
100 | 0 | CPLString osValue; |
101 | | |
102 | | // Arbitrary threshold to avoid stack overflow |
103 | 0 | if (nRecLevel == 100) |
104 | 0 | return false; |
105 | | |
106 | 0 | for (; true;) |
107 | 0 | { |
108 | 0 | if (!ReadPair(osName, osValue)) |
109 | 0 | return false; |
110 | | |
111 | 0 | if (EQUAL(osName, "BEGIN_GROUP") || EQUAL(osName, "GROUP")) |
112 | 0 | { |
113 | 0 | if (!ReadGroup((CPLString(pszPathPrefix) + osValue + ".").c_str(), |
114 | 0 | nRecLevel + 1)) |
115 | 0 | return false; |
116 | 0 | } |
117 | 0 | else if (STARTS_WITH_CI(osName, "END")) |
118 | 0 | { |
119 | 0 | return true; |
120 | 0 | } |
121 | 0 | else |
122 | 0 | { |
123 | 0 | osName = pszPathPrefix + osName; |
124 | 0 | papszKeywordList = |
125 | 0 | CSLSetNameValue(papszKeywordList, osName, osValue); |
126 | 0 | } |
127 | 0 | } |
128 | 0 | } |
129 | | |
130 | | /************************************************************************/ |
131 | | /* ReadPair() */ |
132 | | /* */ |
133 | | /* Read a name/value pair from the input stream. Strip off */ |
134 | | /* white space, ignore comments, split on '='. */ |
135 | | /************************************************************************/ |
136 | | |
137 | | bool CPLKeywordParser::ReadPair(CPLString &osName, CPLString &osValue) |
138 | | |
139 | 0 | { |
140 | 0 | osName = ""; |
141 | 0 | osValue = ""; |
142 | |
|
143 | 0 | if (!ReadWord(osName)) |
144 | 0 | return false; |
145 | | |
146 | 0 | SkipWhite(); |
147 | |
|
148 | 0 | if (EQUAL(osName, "END")) |
149 | 0 | return TRUE; |
150 | | |
151 | 0 | if (*pszHeaderNext != '=') |
152 | 0 | { |
153 | | // ISIS3 does not have anything after the end group/object keyword. |
154 | 0 | return EQUAL(osName, "End_Group") || EQUAL(osName, "End_Object"); |
155 | 0 | } |
156 | | |
157 | 0 | pszHeaderNext++; |
158 | |
|
159 | 0 | SkipWhite(); |
160 | |
|
161 | 0 | osValue = ""; |
162 | | |
163 | | // Handle value lists like: Name = (Red, Red) |
164 | | // or list of lists like: TLCList = ( (0, 0.000000), (8299, 4.811014) ); |
165 | 0 | if (*pszHeaderNext == '(') |
166 | 0 | { |
167 | 0 | CPLString osWord; |
168 | 0 | int nDepth = 0; |
169 | 0 | const char *pszLastPos = pszHeaderNext; |
170 | |
|
171 | 0 | while (ReadWord(osWord) && pszLastPos != pszHeaderNext) |
172 | 0 | { |
173 | 0 | SkipWhite(); |
174 | 0 | pszLastPos = pszHeaderNext; |
175 | |
|
176 | 0 | osValue += osWord; |
177 | 0 | const char *pszIter = osWord.c_str(); |
178 | 0 | bool bInQuote = false; |
179 | 0 | while (*pszIter != '\0') |
180 | 0 | { |
181 | 0 | if (*pszIter == '"') |
182 | 0 | bInQuote = !bInQuote; |
183 | 0 | else if (!bInQuote) |
184 | 0 | { |
185 | 0 | if (*pszIter == '(') |
186 | 0 | nDepth++; |
187 | 0 | else if (*pszIter == ')') |
188 | 0 | { |
189 | 0 | nDepth--; |
190 | 0 | if (nDepth == 0) |
191 | 0 | break; |
192 | 0 | } |
193 | 0 | } |
194 | 0 | pszIter++; |
195 | 0 | } |
196 | 0 | if (*pszIter == ')' && nDepth == 0) |
197 | 0 | break; |
198 | 0 | } |
199 | 0 | } |
200 | | |
201 | 0 | else // Handle more normal "single word" values. |
202 | 0 | { |
203 | | // Special case to handle non-conformant IMD files generated by |
204 | | // previous GDAL version where we omit to surround values that have |
205 | | // spaces with double quotes. |
206 | | // So we use a heuristics to handle things like: |
207 | | // key = value with spaces without single or double quotes at |
208 | | // beginning of value;[\r]\n |
209 | 0 | const char *pszNextLF = strchr(pszHeaderNext, '\n'); |
210 | 0 | if (pszNextLF) |
211 | 0 | { |
212 | 0 | std::string osTxt(pszHeaderNext, pszNextLF - pszHeaderNext); |
213 | 0 | const auto nCRPos = osTxt.find('\r'); |
214 | 0 | const auto nSemiColonPos = osTxt.find(';'); |
215 | 0 | const auto nQuotePos = osTxt.find('\''); |
216 | 0 | const auto nDoubleQuotePos = osTxt.find('"'); |
217 | 0 | const auto nLTPos = osTxt.find('<'); |
218 | 0 | if (nSemiColonPos != std::string::npos && |
219 | 0 | (nCRPos == std::string::npos || (nCRPos + 1 == osTxt.size())) && |
220 | 0 | ((nCRPos != std::string::npos && |
221 | 0 | (nSemiColonPos + 1 == nCRPos)) || |
222 | 0 | (nCRPos == std::string::npos && |
223 | 0 | (nSemiColonPos + 1 == osTxt.size()))) && |
224 | 0 | (nQuotePos == std::string::npos || nQuotePos != 0) && |
225 | 0 | (nDoubleQuotePos == std::string::npos || |
226 | 0 | nDoubleQuotePos != 0) && |
227 | 0 | (nLTPos == std::string::npos || |
228 | 0 | osTxt.find('>') == std::string::npos)) |
229 | 0 | { |
230 | 0 | pszHeaderNext = pszNextLF; |
231 | 0 | osTxt.resize(nSemiColonPos); |
232 | 0 | osValue = osTxt; |
233 | 0 | while (!osValue.empty() && osValue.back() == ' ') |
234 | 0 | osValue.pop_back(); |
235 | 0 | return true; |
236 | 0 | } |
237 | 0 | } |
238 | | |
239 | 0 | if (!ReadWord(osValue)) |
240 | 0 | return false; |
241 | 0 | } |
242 | | |
243 | 0 | SkipWhite(); |
244 | | |
245 | | // No units keyword? |
246 | 0 | if (*pszHeaderNext != '<') |
247 | 0 | return true; |
248 | | |
249 | | // Append units keyword. For lines that like like this: |
250 | | // MAP_RESOLUTION = 4.0 <PIXEL/DEGREE> |
251 | | |
252 | 0 | CPLString osWord; |
253 | |
|
254 | 0 | osValue += " "; |
255 | |
|
256 | 0 | while (ReadWord(osWord)) |
257 | 0 | { |
258 | 0 | SkipWhite(); |
259 | |
|
260 | 0 | osValue += osWord; |
261 | 0 | if (osWord.back() == '>') |
262 | 0 | break; |
263 | 0 | } |
264 | |
|
265 | 0 | return true; |
266 | 0 | } |
267 | | |
268 | | /************************************************************************/ |
269 | | /* ReadWord() */ |
270 | | /************************************************************************/ |
271 | | |
272 | | bool CPLKeywordParser::ReadWord(CPLString &osWord) |
273 | | |
274 | 0 | { |
275 | 0 | osWord = ""; |
276 | |
|
277 | 0 | SkipWhite(); |
278 | |
|
279 | 0 | if (*pszHeaderNext == '\0' || *pszHeaderNext == '=') |
280 | 0 | return false; |
281 | | |
282 | 0 | while (*pszHeaderNext != '\0' && *pszHeaderNext != '=' && |
283 | 0 | *pszHeaderNext != ';' && |
284 | 0 | !isspace(static_cast<unsigned char>(*pszHeaderNext))) |
285 | 0 | { |
286 | 0 | if (*pszHeaderNext == '"') |
287 | 0 | { |
288 | 0 | osWord += *(pszHeaderNext++); |
289 | 0 | while (*pszHeaderNext != '"') |
290 | 0 | { |
291 | 0 | if (*pszHeaderNext == '\0') |
292 | 0 | return false; |
293 | | |
294 | 0 | osWord += *(pszHeaderNext++); |
295 | 0 | } |
296 | 0 | osWord += *(pszHeaderNext++); |
297 | 0 | } |
298 | 0 | else if (*pszHeaderNext == '\'') |
299 | 0 | { |
300 | 0 | osWord += *(pszHeaderNext++); |
301 | 0 | while (*pszHeaderNext != '\'') |
302 | 0 | { |
303 | 0 | if (*pszHeaderNext == '\0') |
304 | 0 | return false; |
305 | | |
306 | 0 | osWord += *(pszHeaderNext++); |
307 | 0 | } |
308 | 0 | osWord += *(pszHeaderNext++); |
309 | 0 | } |
310 | 0 | else |
311 | 0 | { |
312 | 0 | osWord += *pszHeaderNext; |
313 | 0 | pszHeaderNext++; |
314 | 0 | } |
315 | 0 | } |
316 | | |
317 | 0 | if (*pszHeaderNext == ';') |
318 | 0 | pszHeaderNext++; |
319 | |
|
320 | 0 | return true; |
321 | 0 | } |
322 | | |
323 | | /************************************************************************/ |
324 | | /* SkipWhite() */ |
325 | | /************************************************************************/ |
326 | | |
327 | | void CPLKeywordParser::SkipWhite() |
328 | | |
329 | 0 | { |
330 | 0 | for (; true;) |
331 | 0 | { |
332 | | // Skip white space (newline, space, tab, etc ) |
333 | 0 | if (isspace(static_cast<unsigned char>(*pszHeaderNext))) |
334 | 0 | { |
335 | 0 | pszHeaderNext++; |
336 | 0 | continue; |
337 | 0 | } |
338 | | |
339 | | // Skip C style comments |
340 | 0 | if (*pszHeaderNext == '/' && pszHeaderNext[1] == '*') |
341 | 0 | { |
342 | 0 | pszHeaderNext += 2; |
343 | |
|
344 | 0 | while (*pszHeaderNext != '\0' && |
345 | 0 | (*pszHeaderNext != '*' || pszHeaderNext[1] != '/')) |
346 | 0 | { |
347 | 0 | pszHeaderNext++; |
348 | 0 | } |
349 | 0 | if (*pszHeaderNext == '\0') |
350 | 0 | break; |
351 | | |
352 | 0 | pszHeaderNext += 2; |
353 | 0 | continue; |
354 | 0 | } |
355 | | |
356 | | // Skip # style comments |
357 | 0 | if (*pszHeaderNext == '#') |
358 | 0 | { |
359 | 0 | pszHeaderNext += 1; |
360 | | |
361 | | // consume till end of line. |
362 | 0 | while (*pszHeaderNext != '\0' && *pszHeaderNext != 10 && |
363 | 0 | *pszHeaderNext != 13) |
364 | 0 | { |
365 | 0 | pszHeaderNext++; |
366 | 0 | } |
367 | 0 | continue; |
368 | 0 | } |
369 | | |
370 | | // not white space, return. |
371 | 0 | return; |
372 | 0 | } |
373 | 0 | } |
374 | | |
375 | | /************************************************************************/ |
376 | | /* GetKeyword() */ |
377 | | /************************************************************************/ |
378 | | |
379 | | const char *CPLKeywordParser::GetKeyword(const char *pszPath, |
380 | | const char *pszDefault) |
381 | | |
382 | 0 | { |
383 | 0 | const char *pszResult = CSLFetchNameValue(papszKeywordList, pszPath); |
384 | 0 | if (pszResult == nullptr) |
385 | 0 | return pszDefault; |
386 | | |
387 | 0 | return pszResult; |
388 | 0 | } |
389 | | |
390 | | //! @endcond |