/src/gdal/port/cpl_xml_validate.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Project: CPL - Common Portability Library |
4 | | * Purpose: Implement XML validation against XSD schema |
5 | | * Author: Even Rouault, even.rouault at spatialys.com |
6 | | * |
7 | | ****************************************************************************** |
8 | | * Copyright (c) 2012-2014, Even Rouault <even dot rouault at spatialys.com> |
9 | | * |
10 | | * SPDX-License-Identifier: MIT |
11 | | ****************************************************************************/ |
12 | | |
13 | | #include "cpl_port.h" |
14 | | #include "cpl_conv.h" |
15 | | #include "cpl_error.h" |
16 | | |
17 | | #ifdef HAVE_LIBXML2 |
18 | | #include <libxml/xmlversion.h> |
19 | | #if defined(LIBXML_VERSION) && LIBXML_VERSION >= 20622 |
20 | | // We need at least 2.6.20 for xmlSchemaValidateDoc |
21 | | // and xmlParseDoc to accept a const xmlChar* |
22 | | // We could workaround it, but likely not worth the effort for now. |
23 | | // Actually, we need at least 2.6.22, at runtime, to be |
24 | | // able to parse the OGC GML schemas |
25 | | #define HAVE_RECENT_LIBXML2 |
26 | | |
27 | | // libxml2 before 2.8.0 had a bug to parse the OGC GML schemas |
28 | | // We have a workaround for that for versions >= 2.6.20 and < 2.8.0. |
29 | | #if defined(LIBXML_VERSION) && LIBXML_VERSION < 20800 |
30 | | #define HAS_VALIDATION_BUG |
31 | | #endif |
32 | | |
33 | | #else |
34 | | #warning "Not recent enough libxml2 version" |
35 | | #endif |
36 | | #endif |
37 | | |
38 | | #ifdef HAVE_RECENT_LIBXML2 |
39 | | #include <string.h> |
40 | | |
41 | | #if defined(__GNUC__) |
42 | | #pragma GCC diagnostic push |
43 | | #pragma GCC diagnostic ignored "-Wold-style-cast" |
44 | | #endif |
45 | | #if defined(__clang__) |
46 | | #pragma clang diagnostic push |
47 | | #pragma clang diagnostic ignored "-Wunknown-pragmas" |
48 | | #pragma clang diagnostic ignored "-Wdocumentation" |
49 | | #pragma clang diagnostic ignored "-Wdocumentation-unknown-command" |
50 | | #pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" |
51 | | #endif |
52 | | |
53 | | #include <libxml/xmlschemas.h> |
54 | | #include <libxml/parserInternals.h> |
55 | | #include <libxml/catalog.h> |
56 | | |
57 | | #if defined(__clang__) |
58 | | #pragma clang diagnostic pop |
59 | | #endif |
60 | | #if defined(__GNUC__) |
61 | | #pragma GCC diagnostic pop |
62 | | #endif |
63 | | |
64 | | #include "cpl_string.h" |
65 | | #include "cpl_hash_set.h" |
66 | | #include "cpl_minixml.h" |
67 | | |
68 | | static xmlExternalEntityLoader pfnLibXMLOldExtranerEntityLoader = nullptr; |
69 | | |
70 | | /************************************************************************/ |
71 | | /* CPLFixPath() */ |
72 | | /************************************************************************/ |
73 | | |
74 | | // Replace \ by / to make libxml2 happy on Windows and |
75 | | // replace "a/b/../c" pattern by "a/c". |
76 | | static void CPLFixPath(char *pszPath) |
77 | | { |
78 | | for (int i = 0; pszPath[i] != '\0'; ++i) |
79 | | { |
80 | | if (pszPath[i] == '\\') |
81 | | pszPath[i] = '/'; |
82 | | } |
83 | | |
84 | | std::string osRet(pszPath); |
85 | | while (true) |
86 | | { |
87 | | size_t nSlashDotDot = osRet.find("/../"); |
88 | | if (nSlashDotDot == std::string::npos || nSlashDotDot == 0) |
89 | | break; |
90 | | size_t nPos = nSlashDotDot - 1; |
91 | | while (nPos > 0 && osRet[nPos] != '/') |
92 | | --nPos; |
93 | | if (nPos == 0) |
94 | | break; |
95 | | osRet = osRet.substr(0, nPos + 1) + |
96 | | osRet.substr(nSlashDotDot + strlen("/../")); |
97 | | } |
98 | | memcpy(pszPath, osRet.data(), osRet.size() + 1); |
99 | | } |
100 | | |
101 | | #ifdef HAS_VALIDATION_BUG |
102 | | |
103 | | /************************************************************************/ |
104 | | /* CPLHasLibXMLBugWarningCallback() */ |
105 | | /************************************************************************/ |
106 | | |
107 | | static void CPLHasLibXMLBugWarningCallback(void * /*ctx*/, const char * /*msg*/, |
108 | | ...) |
109 | | { |
110 | | } |
111 | | |
112 | | /************************************************************************/ |
113 | | /* CPLHasLibXMLBug() */ |
114 | | /************************************************************************/ |
115 | | |
116 | | static bool CPLHasLibXMLBug() |
117 | | { |
118 | | static bool bHasLibXMLBug = false; |
119 | | static bool bLibXMLBugChecked = false; |
120 | | if (bLibXMLBugChecked) |
121 | | return bHasLibXMLBug; |
122 | | |
123 | | constexpr char szLibXMLBugTester[] = |
124 | | "<schema targetNamespace=\"http://foo\" " |
125 | | "xmlns:foo=\"http://foo\" xmlns=\"http://www.w3.org/2001/XMLSchema\">" |
126 | | "<simpleType name=\"t1\">" |
127 | | "<list itemType=\"double\"/>" |
128 | | "</simpleType>" |
129 | | "<complexType name=\"t2\">" |
130 | | "<simpleContent>" |
131 | | "<extension base=\"foo:t1\"/>" |
132 | | "</simpleContent>" |
133 | | "</complexType>" |
134 | | "<complexType name=\"t3\">" |
135 | | "<simpleContent>" |
136 | | "<restriction base=\"foo:t2\">" |
137 | | "<length value=\"2\"/>" |
138 | | "</restriction>" |
139 | | "</simpleContent>" |
140 | | "</complexType>" |
141 | | "</schema>"; |
142 | | |
143 | | xmlSchemaParserCtxtPtr pSchemaParserCtxt = |
144 | | xmlSchemaNewMemParserCtxt(szLibXMLBugTester, strlen(szLibXMLBugTester)); |
145 | | |
146 | | xmlSchemaSetParserErrors(pSchemaParserCtxt, CPLHasLibXMLBugWarningCallback, |
147 | | CPLHasLibXMLBugWarningCallback, nullptr); |
148 | | |
149 | | xmlSchemaPtr pSchema = xmlSchemaParse(pSchemaParserCtxt); |
150 | | xmlSchemaFreeParserCtxt(pSchemaParserCtxt); |
151 | | |
152 | | bHasLibXMLBug = pSchema == nullptr; |
153 | | bLibXMLBugChecked = true; |
154 | | |
155 | | if (pSchema) |
156 | | xmlSchemaFree(pSchema); |
157 | | |
158 | | if (bHasLibXMLBug) |
159 | | { |
160 | | CPLDebug("CPL", |
161 | | "LibXML bug found " |
162 | | "(cf https://bugzilla.gnome.org/show_bug.cgi?id=630130). " |
163 | | "Will try to workaround for GML schemas."); |
164 | | } |
165 | | |
166 | | return bHasLibXMLBug; |
167 | | } |
168 | | |
169 | | #endif |
170 | | |
171 | | /************************************************************************/ |
172 | | /* CPLExtractSubSchema() */ |
173 | | /************************************************************************/ |
174 | | |
175 | | static CPLXMLNode *CPLExtractSubSchema(CPLXMLNode *psSubXML, |
176 | | CPLXMLNode *psMainSchema) |
177 | | { |
178 | | if (psSubXML->eType == CXT_Element && |
179 | | strcmp(psSubXML->pszValue, "?xml") == 0) |
180 | | { |
181 | | CPLXMLNode *psNext = psSubXML->psNext; |
182 | | psSubXML->psNext = nullptr; |
183 | | CPLDestroyXMLNode(psSubXML); |
184 | | psSubXML = psNext; |
185 | | } |
186 | | |
187 | | if (psSubXML != nullptr && psSubXML->eType == CXT_Comment) |
188 | | { |
189 | | CPLXMLNode *psNext = psSubXML->psNext; |
190 | | psSubXML->psNext = nullptr; |
191 | | CPLDestroyXMLNode(psSubXML); |
192 | | psSubXML = psNext; |
193 | | } |
194 | | |
195 | | if (psSubXML != nullptr && psSubXML->eType == CXT_Element && |
196 | | (strcmp(psSubXML->pszValue, "schema") == 0 || |
197 | | strcmp(psSubXML->pszValue, "xs:schema") == 0 || |
198 | | strcmp(psSubXML->pszValue, "xsd:schema") == 0) && |
199 | | psSubXML->psNext == nullptr) |
200 | | { |
201 | | CPLXMLNode *psNext = psSubXML->psChild; |
202 | | while (psNext != nullptr && psNext->eType != CXT_Element && |
203 | | psNext->psNext != nullptr && |
204 | | psNext->psNext->eType != CXT_Element) |
205 | | { |
206 | | // Add xmlns: from subschema to main schema if missing. |
207 | | if (psNext->eType == CXT_Attribute && |
208 | | STARTS_WITH(psNext->pszValue, "xmlns:") && |
209 | | CPLGetXMLValue(psMainSchema, psNext->pszValue, nullptr) == |
210 | | nullptr) |
211 | | { |
212 | | CPLXMLNode *psAttr = |
213 | | CPLCreateXMLNode(nullptr, CXT_Attribute, psNext->pszValue); |
214 | | CPLCreateXMLNode(psAttr, CXT_Text, psNext->psChild->pszValue); |
215 | | |
216 | | psAttr->psNext = psMainSchema->psChild; |
217 | | psMainSchema->psChild = psAttr; |
218 | | } |
219 | | psNext = psNext->psNext; |
220 | | } |
221 | | |
222 | | if (psNext != nullptr && psNext->eType != CXT_Element && |
223 | | psNext->psNext != nullptr && psNext->psNext->eType == CXT_Element) |
224 | | { |
225 | | CPLXMLNode *psNext2 = psNext->psNext; |
226 | | psNext->psNext = nullptr; |
227 | | CPLDestroyXMLNode(psSubXML); |
228 | | psSubXML = psNext2; |
229 | | } |
230 | | } |
231 | | |
232 | | return psSubXML; |
233 | | } |
234 | | |
235 | | #ifdef HAS_VALIDATION_BUG |
236 | | /************************************************************************/ |
237 | | /* CPLWorkaroundLibXMLBug() */ |
238 | | /************************************************************************/ |
239 | | |
240 | | // Return TRUE if the current node must be destroyed. |
241 | | static bool CPLWorkaroundLibXMLBug(CPLXMLNode *psIter) |
242 | | { |
243 | | if (psIter->eType == CXT_Element && |
244 | | strcmp(psIter->pszValue, "element") == 0 && |
245 | | strcmp(CPLGetXMLValue(psIter, "name", ""), "QuantityExtent") == 0 && |
246 | | strcmp(CPLGetXMLValue(psIter, "type", ""), "gml:QuantityExtentType") == |
247 | | 0) |
248 | | { |
249 | | CPLXMLNode *psIter2 = psIter->psChild; |
250 | | while (psIter2) |
251 | | { |
252 | | if (psIter2->eType == CXT_Attribute && |
253 | | strcmp(psIter2->pszValue, "type") == 0) |
254 | | { |
255 | | CPLFree(psIter2->psChild->pszValue); |
256 | | if (strcmp(CPLGetXMLValue(psIter, "substitutionGroup", ""), |
257 | | "gml:AbstractValue") == 0) |
258 | | // GML 3.2.1. |
259 | | psIter2->psChild->pszValue = |
260 | | CPLStrdup("gml:MeasureOrNilReasonListType"); |
261 | | else |
262 | | psIter2->psChild->pszValue = |
263 | | CPLStrdup("gml:MeasureOrNullListType"); |
264 | | } |
265 | | psIter2 = psIter2->psNext; |
266 | | } |
267 | | } |
268 | | |
269 | | else if (psIter->eType == CXT_Element && |
270 | | strcmp(psIter->pszValue, "element") == 0 && |
271 | | strcmp(CPLGetXMLValue(psIter, "name", ""), "CategoryExtent") == |
272 | | 0 && |
273 | | strcmp(CPLGetXMLValue(psIter, "type", ""), |
274 | | "gml:CategoryExtentType") == 0) |
275 | | { |
276 | | CPLXMLNode *psIter2 = psIter->psChild; |
277 | | while (psIter2) |
278 | | { |
279 | | if (psIter2->eType == CXT_Attribute && |
280 | | strcmp(psIter2->pszValue, "type") == 0) |
281 | | { |
282 | | CPLFree(psIter2->psChild->pszValue); |
283 | | if (strcmp(CPLGetXMLValue(psIter, "substitutionGroup", ""), |
284 | | "gml:AbstractValue") == 0) |
285 | | // GML 3.2.1 |
286 | | psIter2->psChild->pszValue = |
287 | | CPLStrdup("gml:CodeOrNilReasonListType"); |
288 | | else |
289 | | psIter2->psChild->pszValue = |
290 | | CPLStrdup("gml:CodeOrNullListType"); |
291 | | } |
292 | | psIter2 = psIter2->psNext; |
293 | | } |
294 | | } |
295 | | |
296 | | else if (CPLHasLibXMLBug() && psIter->eType == CXT_Element && |
297 | | strcmp(psIter->pszValue, "complexType") == 0 && |
298 | | (strcmp(CPLGetXMLValue(psIter, "name", ""), |
299 | | "QuantityExtentType") == 0 || |
300 | | strcmp(CPLGetXMLValue(psIter, "name", ""), |
301 | | "CategoryExtentType") == 0)) |
302 | | { |
303 | | // Destroy this element. |
304 | | return true; |
305 | | } |
306 | | |
307 | | // For GML 3.2.1 |
308 | | else if (psIter->eType == CXT_Element && |
309 | | strcmp(psIter->pszValue, "complexType") == 0 && |
310 | | strcmp(CPLGetXMLValue(psIter, "name", ""), "VectorType") == 0) |
311 | | { |
312 | | CPLXMLNode *psSimpleContent = |
313 | | CPLCreateXMLNode(nullptr, CXT_Element, "simpleContent"); |
314 | | CPLXMLNode *psExtension = |
315 | | CPLCreateXMLNode(psSimpleContent, CXT_Element, "extension"); |
316 | | CPLXMLNode *psExtensionBase = |
317 | | CPLCreateXMLNode(psExtension, CXT_Attribute, "base"); |
318 | | CPLCreateXMLNode(psExtensionBase, CXT_Text, "gml:doubleList"); |
319 | | CPLXMLNode *psAttributeGroup = |
320 | | CPLCreateXMLNode(psExtension, CXT_Element, "attributeGroup"); |
321 | | CPLXMLNode *psAttributeGroupRef = |
322 | | CPLCreateXMLNode(psAttributeGroup, CXT_Attribute, "ref"); |
323 | | CPLCreateXMLNode(psAttributeGroupRef, CXT_Text, |
324 | | "gml:SRSReferenceGroup"); |
325 | | |
326 | | CPLXMLNode *psName = CPLCreateXMLNode(nullptr, CXT_Attribute, "name"); |
327 | | CPLCreateXMLNode(psName, CXT_Text, "VectorType"); |
328 | | |
329 | | CPLDestroyXMLNode(psIter->psChild); |
330 | | psIter->psChild = psName; |
331 | | psIter->psChild->psNext = psSimpleContent; |
332 | | } |
333 | | |
334 | | else if (psIter->eType == CXT_Element && |
335 | | strcmp(psIter->pszValue, "element") == 0 && |
336 | | (strcmp(CPLGetXMLValue(psIter, "name", ""), "domainOfValidity") == |
337 | | 0 || |
338 | | strcmp(CPLGetXMLValue(psIter, "name", ""), |
339 | | "coordinateOperationAccuracy") == 0 || |
340 | | strcmp(CPLGetXMLValue(psIter, "name", ""), "formulaCitation") == |
341 | | 0)) |
342 | | { |
343 | | CPLXMLNode *psComplexType = |
344 | | CPLCreateXMLNode(nullptr, CXT_Element, "complexType"); |
345 | | CPLXMLNode *psSequence = |
346 | | CPLCreateXMLNode(psComplexType, CXT_Element, "sequence"); |
347 | | CPLXMLNode *psSequenceMinOccurs = |
348 | | CPLCreateXMLNode(psSequence, CXT_Attribute, "minOccurs"); |
349 | | CPLCreateXMLNode(psSequenceMinOccurs, CXT_Text, "0"); |
350 | | CPLXMLNode *psAny = CPLCreateXMLNode(psSequence, CXT_Element, "any"); |
351 | | CPLXMLNode *psAnyMinOccurs = |
352 | | CPLCreateXMLNode(psAny, CXT_Attribute, "minOccurs"); |
353 | | CPLCreateXMLNode(psAnyMinOccurs, CXT_Text, "0"); |
354 | | CPLXMLNode *psAnyProcessContents = |
355 | | CPLCreateXMLNode(psAny, CXT_Attribute, " processContents"); |
356 | | CPLCreateXMLNode(psAnyProcessContents, CXT_Text, "lax"); |
357 | | |
358 | | CPLXMLNode *psName = CPLCreateXMLNode(nullptr, CXT_Attribute, "name"); |
359 | | CPLCreateXMLNode(psName, CXT_Text, CPLGetXMLValue(psIter, "name", "")); |
360 | | |
361 | | CPLDestroyXMLNode(psIter->psChild); |
362 | | psIter->psChild = psName; |
363 | | psIter->psChild->psNext = psComplexType; |
364 | | } |
365 | | |
366 | | return false; |
367 | | } |
368 | | #endif |
369 | | |
370 | | /************************************************************************/ |
371 | | /* CPLLoadSchemaStrInternal() */ |
372 | | /************************************************************************/ |
373 | | |
374 | | static CPLXMLNode *CPLLoadSchemaStrInternal(CPLHashSet *hSetSchemas, |
375 | | const char *pszFile) |
376 | | { |
377 | | if (CPLHashSetLookup(hSetSchemas, pszFile)) |
378 | | return nullptr; |
379 | | |
380 | | CPLHashSetInsert(hSetSchemas, CPLStrdup(pszFile)); |
381 | | |
382 | | CPLDebug("CPL", "Parsing %s", pszFile); |
383 | | |
384 | | CPLXMLNode *psXML = CPLParseXMLFile(pszFile); |
385 | | if (psXML == nullptr) |
386 | | { |
387 | | CPLError(CE_Failure, CPLE_AppDefined, "Cannot open %s", pszFile); |
388 | | return nullptr; |
389 | | } |
390 | | |
391 | | CPLXMLNode *psSchema = CPLGetXMLNode(psXML, "=schema"); |
392 | | if (psSchema == nullptr) |
393 | | { |
394 | | psSchema = CPLGetXMLNode(psXML, "=xs:schema"); |
395 | | } |
396 | | if (psSchema == nullptr) |
397 | | { |
398 | | psSchema = CPLGetXMLNode(psXML, "=xsd:schema"); |
399 | | } |
400 | | if (psSchema == nullptr) |
401 | | { |
402 | | CPLError(CE_Failure, CPLE_AppDefined, "Cannot find schema node in %s", |
403 | | pszFile); |
404 | | CPLDestroyXMLNode(psXML); |
405 | | return nullptr; |
406 | | } |
407 | | |
408 | | CPLXMLNode *psPrev = nullptr; |
409 | | CPLXMLNode *psIter = psSchema->psChild; |
410 | | while (psIter) |
411 | | { |
412 | | bool bDestroyCurrentNode = false; |
413 | | |
414 | | #ifdef HAS_VALIDATION_BUG |
415 | | if (CPLHasLibXMLBug()) |
416 | | bDestroyCurrentNode = CPLWorkaroundLibXMLBug(psIter); |
417 | | #endif |
418 | | |
419 | | // Load the referenced schemas, and integrate them in the main schema. |
420 | | if (psIter->eType == CXT_Element && |
421 | | (strcmp(psIter->pszValue, "include") == 0 || |
422 | | strcmp(psIter->pszValue, "xs:include") == 0 || |
423 | | strcmp(psIter->pszValue, "xsd:include") == 0) && |
424 | | psIter->psChild != nullptr && |
425 | | psIter->psChild->eType == CXT_Attribute && |
426 | | strcmp(psIter->psChild->pszValue, "schemaLocation") == 0) |
427 | | { |
428 | | const char *pszIncludeSchema = psIter->psChild->psChild->pszValue; |
429 | | char *pszFullFilename = |
430 | | CPLStrdup(CPLFormFilenameSafe(CPLGetPathSafe(pszFile).c_str(), |
431 | | pszIncludeSchema, nullptr) |
432 | | .c_str()); |
433 | | |
434 | | CPLFixPath(pszFullFilename); |
435 | | |
436 | | CPLXMLNode *psSubXML = nullptr; |
437 | | |
438 | | // If we haven't yet loaded that schema, do it now. |
439 | | if (!CPLHashSetLookup(hSetSchemas, pszFullFilename)) |
440 | | { |
441 | | psSubXML = |
442 | | CPLLoadSchemaStrInternal(hSetSchemas, pszFullFilename); |
443 | | if (psSubXML == nullptr) |
444 | | { |
445 | | CPLFree(pszFullFilename); |
446 | | CPLDestroyXMLNode(psXML); |
447 | | return nullptr; |
448 | | } |
449 | | } |
450 | | CPLFree(pszFullFilename); |
451 | | pszFullFilename = nullptr; |
452 | | |
453 | | if (psSubXML) |
454 | | { |
455 | | CPLXMLNode *psNext = psIter->psNext; |
456 | | |
457 | | psSubXML = CPLExtractSubSchema(psSubXML, psSchema); |
458 | | if (psSubXML == nullptr) |
459 | | { |
460 | | CPLDestroyXMLNode(psXML); |
461 | | return nullptr; |
462 | | } |
463 | | |
464 | | // Replace <include/> node by the subXML. |
465 | | CPLXMLNode *psIter2 = psSubXML; |
466 | | while (psIter2->psNext) |
467 | | psIter2 = psIter2->psNext; |
468 | | psIter2->psNext = psNext; |
469 | | |
470 | | if (psPrev == nullptr) |
471 | | psSchema->psChild = psSubXML; |
472 | | else |
473 | | psPrev->psNext = psSubXML; |
474 | | |
475 | | psIter->psNext = nullptr; |
476 | | CPLDestroyXMLNode(psIter); |
477 | | |
478 | | psPrev = psIter2; |
479 | | psIter = psNext; |
480 | | continue; |
481 | | } |
482 | | else |
483 | | { |
484 | | // We have already included that file, |
485 | | // so just remove the <include/> node |
486 | | bDestroyCurrentNode = true; |
487 | | } |
488 | | } |
489 | | |
490 | | // Patch the schemaLocation of <import/>. |
491 | | else if (psIter->eType == CXT_Element && |
492 | | (strcmp(psIter->pszValue, "import") == 0 || |
493 | | strcmp(psIter->pszValue, "xs:import") == 0 || |
494 | | strcmp(psIter->pszValue, "xsd:import") == 0)) |
495 | | { |
496 | | CPLXMLNode *psIter2 = psIter->psChild; |
497 | | while (psIter2) |
498 | | { |
499 | | if (psIter2->eType == CXT_Attribute && |
500 | | strcmp(psIter2->pszValue, "schemaLocation") == 0 && |
501 | | psIter2->psChild != nullptr && |
502 | | !STARTS_WITH(psIter2->psChild->pszValue, "http://") && |
503 | | !STARTS_WITH(psIter2->psChild->pszValue, "ftp://") && |
504 | | // If the top file is our warping file, don't alter the path |
505 | | // of the import. |
506 | | strstr(pszFile, "/vsimem/CPLValidateXML_") == nullptr) |
507 | | { |
508 | | char *pszFullFilename = CPLStrdup( |
509 | | CPLFormFilenameSafe(CPLGetPathSafe(pszFile).c_str(), |
510 | | psIter2->psChild->pszValue, nullptr) |
511 | | .c_str()); |
512 | | CPLFixPath(pszFullFilename); |
513 | | CPLFree(psIter2->psChild->pszValue); |
514 | | psIter2->psChild->pszValue = pszFullFilename; |
515 | | } |
516 | | psIter2 = psIter2->psNext; |
517 | | } |
518 | | } |
519 | | |
520 | | if (bDestroyCurrentNode) |
521 | | { |
522 | | CPLXMLNode *psNext = psIter->psNext; |
523 | | if (psPrev == nullptr) |
524 | | psSchema->psChild = psNext; |
525 | | else |
526 | | psPrev->psNext = psNext; |
527 | | |
528 | | psIter->psNext = nullptr; |
529 | | CPLDestroyXMLNode(psIter); |
530 | | |
531 | | psIter = psNext; |
532 | | continue; |
533 | | } |
534 | | |
535 | | psPrev = psIter; |
536 | | psIter = psIter->psNext; |
537 | | } |
538 | | |
539 | | return psXML; |
540 | | } |
541 | | |
542 | | /************************************************************************/ |
543 | | /* CPLMoveImportAtBeginning() */ |
544 | | /************************************************************************/ |
545 | | |
546 | | static void CPLMoveImportAtBeginning(CPLXMLNode *psXML) |
547 | | { |
548 | | CPLXMLNode *psSchema = CPLGetXMLNode(psXML, "=schema"); |
549 | | if (psSchema == nullptr) |
550 | | psSchema = CPLGetXMLNode(psXML, "=xs:schema"); |
551 | | if (psSchema == nullptr) |
552 | | psSchema = CPLGetXMLNode(psXML, "=xsd:schema"); |
553 | | if (psSchema == nullptr) |
554 | | return; |
555 | | |
556 | | CPLXMLNode *psPrev = nullptr; |
557 | | CPLXMLNode *psIter = psSchema->psChild; |
558 | | while (psIter) |
559 | | { |
560 | | if (psPrev != nullptr && psIter->eType == CXT_Element && |
561 | | (strcmp(psIter->pszValue, "import") == 0 || |
562 | | strcmp(psIter->pszValue, "xs:import") == 0 || |
563 | | strcmp(psIter->pszValue, "xsd:import") == 0)) |
564 | | { |
565 | | // Reorder at the beginning. |
566 | | CPLXMLNode *psNext = psIter->psNext; |
567 | | |
568 | | psPrev->psNext = psNext; |
569 | | |
570 | | CPLXMLNode *psFirstChild = psSchema->psChild; |
571 | | psSchema->psChild = psIter; |
572 | | psIter->psNext = psFirstChild; |
573 | | |
574 | | psIter = psNext; |
575 | | continue; |
576 | | } |
577 | | |
578 | | psPrev = psIter; |
579 | | psIter = psIter->psNext; |
580 | | } |
581 | | } |
582 | | |
583 | | /************************************************************************/ |
584 | | /* CPLLoadSchemaStr() */ |
585 | | /************************************************************************/ |
586 | | |
587 | | static char *CPLLoadSchemaStr(const char *pszXSDFilename) |
588 | | { |
589 | | #ifdef HAS_VALIDATION_BUG |
590 | | CPLHasLibXMLBug(); |
591 | | #endif |
592 | | |
593 | | CPLHashSet *hSetSchemas = |
594 | | CPLHashSetNew(CPLHashSetHashStr, CPLHashSetEqualStr, CPLFree); |
595 | | CPLXMLNode *psSchema = |
596 | | CPLLoadSchemaStrInternal(hSetSchemas, pszXSDFilename); |
597 | | |
598 | | char *pszStr = nullptr; |
599 | | if (psSchema) |
600 | | { |
601 | | CPLMoveImportAtBeginning(psSchema); |
602 | | pszStr = CPLSerializeXMLTree(psSchema); |
603 | | CPLDestroyXMLNode(psSchema); |
604 | | } |
605 | | CPLHashSetDestroy(hSetSchemas); |
606 | | return pszStr; |
607 | | } |
608 | | |
609 | | /************************************************************************/ |
610 | | /* CPLFindLocalXSD() */ |
611 | | /************************************************************************/ |
612 | | |
613 | | static CPLString CPLFindLocalXSD(const char *pszXSDFilename) |
614 | | { |
615 | | CPLString osTmp; |
616 | | const char *pszSchemasOpenGIS = |
617 | | CPLGetConfigOption("GDAL_OPENGIS_SCHEMAS", nullptr); |
618 | | if (pszSchemasOpenGIS != nullptr) |
619 | | { |
620 | | int nLen = static_cast<int>(strlen(pszSchemasOpenGIS)); |
621 | | if (nLen > 0 && pszSchemasOpenGIS[nLen - 1] == '/') |
622 | | { |
623 | | osTmp = pszSchemasOpenGIS; |
624 | | osTmp += pszXSDFilename; |
625 | | } |
626 | | else |
627 | | { |
628 | | osTmp = pszSchemasOpenGIS; |
629 | | osTmp += "/"; |
630 | | osTmp += pszXSDFilename; |
631 | | } |
632 | | } |
633 | | else if ((pszSchemasOpenGIS = CPLFindFile("gdal", "SCHEMAS_OPENGIS_NET")) != |
634 | | nullptr) |
635 | | { |
636 | | osTmp = pszSchemasOpenGIS; |
637 | | osTmp += "/"; |
638 | | osTmp += pszXSDFilename; |
639 | | } |
640 | | |
641 | | VSIStatBufL sStatBuf; |
642 | | if (VSIStatExL(osTmp, &sStatBuf, VSI_STAT_EXISTS_FLAG) == 0) |
643 | | return osTmp; |
644 | | return ""; |
645 | | } |
646 | | |
647 | | /************************************************************************/ |
648 | | /* CPLExternalEntityLoader() */ |
649 | | /************************************************************************/ |
650 | | |
651 | | constexpr char szXML_XSD[] = |
652 | | "<schema xmlns=\"http://www.w3.org/2001/XMLSchema\" " |
653 | | "targetNamespace=\"http://www.w3.org/XML/1998/namespace\">" |
654 | | "<attribute name=\"lang\">" |
655 | | "<simpleType>" |
656 | | "<union memberTypes=\"language\">" |
657 | | "<simpleType>" |
658 | | "<restriction base=\"string\">" |
659 | | "<enumeration value=\"\"/>" |
660 | | "</restriction>" |
661 | | "</simpleType>" |
662 | | "</union>" |
663 | | "</simpleType>" |
664 | | "</attribute>" |
665 | | "<attribute name=\"space\">" |
666 | | "<simpleType>" |
667 | | "<restriction base=\"NCName\">" |
668 | | "<enumeration value=\"default\"/>" |
669 | | "<enumeration value=\"preserve\"/>" |
670 | | "</restriction>" |
671 | | "</simpleType>" |
672 | | "</attribute>" |
673 | | "<attribute name=\"base\" type=\"anyURI\"/>" |
674 | | "<attribute name=\"id\" type=\"ID\"/>" |
675 | | "<attributeGroup name=\"specialAttrs\">" |
676 | | "<attribute ref=\"xml:base\"/>" |
677 | | "<attribute ref=\"xml:lang\"/>" |
678 | | "<attribute ref=\"xml:space\"/>" |
679 | | "<attribute ref=\"xml:id\"/>" |
680 | | "</attributeGroup>" |
681 | | "</schema>"; |
682 | | |
683 | | // Simplified (and truncated) version of http://www.w3.org/1999/xlink.xsd |
684 | | // (sufficient for GML schemas). |
685 | | constexpr char szXLINK_XSD[] = |
686 | | "<schema xmlns=\"http://www.w3.org/2001/XMLSchema\" " |
687 | | "targetNamespace=\"http://www.w3.org/1999/xlink\" " |
688 | | "xmlns:xlink=\"http://www.w3.org/1999/xlink\">" |
689 | | "<attribute name=\"type\" type=\"string\"/>" |
690 | | "<attribute name=\"href\" type=\"anyURI\"/>" |
691 | | "<attribute name=\"role\" type=\"anyURI\"/>" |
692 | | "<attribute name=\"arcrole\" type=\"anyURI\"/>" |
693 | | "<attribute name=\"title\" type=\"string\"/>" |
694 | | "<attribute name=\"show\" type=\"string\"/>" |
695 | | "<attribute name=\"actuate\" type=\"string\"/>" |
696 | | "<attribute name=\"label\" type=\"NCName\"/>" |
697 | | "<attribute name=\"from\" type=\"NCName\"/>" |
698 | | "<attribute name=\"to\" type=\"NCName\"/>" |
699 | | "<attributeGroup name=\"simpleAttrs\">" |
700 | | "<attribute ref=\"xlink:type\" fixed=\"simple\"/>" |
701 | | "<attribute ref=\"xlink:href\"/>" |
702 | | "<attribute ref=\"xlink:role\"/>" |
703 | | "<attribute ref=\"xlink:arcrole\"/>" |
704 | | "<attribute ref=\"xlink:title\"/>" |
705 | | "<attribute ref=\"xlink:show\"/>" |
706 | | "<attribute ref=\"xlink:actuate\"/>" |
707 | | "</attributeGroup>" |
708 | | "</schema>"; |
709 | | |
710 | | static xmlParserInputPtr CPLExternalEntityLoader(const char *URL, |
711 | | const char *ID, |
712 | | xmlParserCtxtPtr context) |
713 | | { |
714 | | #if DEBUG_VERBOSE |
715 | | CPLDebug("CPL", "CPLExternalEntityLoader(%s)", URL); |
716 | | #endif |
717 | | // Use libxml2 catalog mechanism to resolve the URL to something else. |
718 | | // xmlChar* pszResolved = xmlCatalogResolveSystem((const xmlChar*)URL); |
719 | | xmlChar *pszResolved = |
720 | | xmlCatalogResolveSystem(reinterpret_cast<const xmlChar *>(URL)); |
721 | | if (pszResolved == nullptr) |
722 | | pszResolved = |
723 | | xmlCatalogResolveURI(reinterpret_cast<const xmlChar *>(URL)); |
724 | | CPLString osURL; |
725 | | if (pszResolved) |
726 | | { |
727 | | CPLDebug("CPL", "Resolving %s in %s", URL, |
728 | | reinterpret_cast<const char *>(pszResolved)); |
729 | | osURL = reinterpret_cast<const char *>(pszResolved); |
730 | | URL = osURL.c_str(); |
731 | | xmlFree(pszResolved); |
732 | | pszResolved = nullptr; |
733 | | } |
734 | | |
735 | | if (STARTS_WITH(URL, "http://")) |
736 | | { |
737 | | // Make sure to use http://schemas.opengis.net/ |
738 | | // when gml/2 or gml/3 is detected. |
739 | | const char *pszGML = strstr(URL, "gml/2"); |
740 | | if (pszGML == nullptr) |
741 | | pszGML = strstr(URL, "gml/3"); |
742 | | if (pszGML != nullptr) |
743 | | { |
744 | | osURL = "http://schemas.opengis.net/"; |
745 | | osURL += pszGML; |
746 | | URL = osURL.c_str(); |
747 | | } |
748 | | else if (strcmp(URL, "http://www.w3.org/2001/xml.xsd") == 0) |
749 | | { |
750 | | std::string osTmp = CPLFindLocalXSD("xml.xsd"); |
751 | | if (!osTmp.empty()) |
752 | | { |
753 | | osURL = std::move(osTmp); |
754 | | URL = osURL.c_str(); |
755 | | } |
756 | | else |
757 | | { |
758 | | CPLDebug("CPL", "Resolving %s to local definition", |
759 | | "http://www.w3.org/2001/xml.xsd"); |
760 | | return xmlNewStringInputStream( |
761 | | context, reinterpret_cast<const xmlChar *>(szXML_XSD)); |
762 | | } |
763 | | } |
764 | | else if (strcmp(URL, "http://www.w3.org/1999/xlink.xsd") == 0) |
765 | | { |
766 | | std::string osTmp = CPLFindLocalXSD("xlink.xsd"); |
767 | | if (!osTmp.empty()) |
768 | | { |
769 | | osURL = std::move(osTmp); |
770 | | URL = osURL.c_str(); |
771 | | } |
772 | | else |
773 | | { |
774 | | CPLDebug("CPL", "Resolving %s to local definition", |
775 | | "http://www.w3.org/1999/xlink.xsd"); |
776 | | return xmlNewStringInputStream( |
777 | | context, reinterpret_cast<const xmlChar *>(szXLINK_XSD)); |
778 | | } |
779 | | } |
780 | | else if (!STARTS_WITH(URL, "http://schemas.opengis.net/")) |
781 | | { |
782 | | CPLDebug("CPL", "Loading %s", URL); |
783 | | return pfnLibXMLOldExtranerEntityLoader(URL, ID, context); |
784 | | } |
785 | | } |
786 | | else if (STARTS_WITH(URL, "ftp://")) |
787 | | { |
788 | | return pfnLibXMLOldExtranerEntityLoader(URL, ID, context); |
789 | | } |
790 | | else if (STARTS_WITH(URL, "file://")) |
791 | | { |
792 | | // Parse file:// URI so as to be able to open them with VSI*L API. |
793 | | if (STARTS_WITH(URL, "file://localhost/")) |
794 | | URL += 16; |
795 | | else |
796 | | URL += 7; |
797 | | |
798 | | if (URL[0] == '/' && URL[1] != '\0' && URL[2] == ':' && URL[3] == '/') |
799 | | { |
800 | | // Windows. |
801 | | ++URL; |
802 | | } |
803 | | else if (URL[0] == '/') |
804 | | { |
805 | | // Unix. |
806 | | } |
807 | | else |
808 | | { |
809 | | return pfnLibXMLOldExtranerEntityLoader(URL, ID, context); |
810 | | } |
811 | | } |
812 | | |
813 | | CPLString osModURL; |
814 | | if (STARTS_WITH(URL, "/vsizip/vsicurl/http%3A//")) |
815 | | { |
816 | | osModURL = "/vsizip/vsicurl/http://"; |
817 | | osModURL += URL + strlen("/vsizip/vsicurl/http%3A//"); |
818 | | } |
819 | | else if (STARTS_WITH(URL, "/vsicurl/http%3A//")) |
820 | | { |
821 | | osModURL = "vsicurl/http://"; |
822 | | osModURL += URL + strlen("/vsicurl/http%3A//"); |
823 | | } |
824 | | else if (STARTS_WITH(URL, "http://schemas.opengis.net/")) |
825 | | { |
826 | | const char *pszAfterOpenGIS = |
827 | | URL + strlen("http://schemas.opengis.net/"); |
828 | | |
829 | | const char *pszSchemasOpenGIS = |
830 | | CPLGetConfigOption("GDAL_OPENGIS_SCHEMAS", nullptr); |
831 | | if (pszSchemasOpenGIS != nullptr) |
832 | | { |
833 | | const int nLen = static_cast<int>(strlen(pszSchemasOpenGIS)); |
834 | | if (nLen > 0 && pszSchemasOpenGIS[nLen - 1] == '/') |
835 | | { |
836 | | osModURL = pszSchemasOpenGIS; |
837 | | osModURL += pszAfterOpenGIS; |
838 | | } |
839 | | else |
840 | | { |
841 | | osModURL = pszSchemasOpenGIS; |
842 | | osModURL += "/"; |
843 | | osModURL += pszAfterOpenGIS; |
844 | | } |
845 | | } |
846 | | else if ((pszSchemasOpenGIS = |
847 | | CPLFindFile("gdal", "SCHEMAS_OPENGIS_NET")) != nullptr) |
848 | | { |
849 | | osModURL = pszSchemasOpenGIS; |
850 | | osModURL += "/"; |
851 | | osModURL += pszAfterOpenGIS; |
852 | | } |
853 | | else if ((pszSchemasOpenGIS = CPLFindFile( |
854 | | "gdal", "SCHEMAS_OPENGIS_NET.zip")) != nullptr) |
855 | | { |
856 | | osModURL = "/vsizip/"; |
857 | | osModURL += pszSchemasOpenGIS; |
858 | | osModURL += "/"; |
859 | | osModURL += pszAfterOpenGIS; |
860 | | } |
861 | | else |
862 | | { |
863 | | osModURL = "/vsizip/vsicurl/" |
864 | | "http://schemas.opengis.net/SCHEMAS_OPENGIS_NET.zip/"; |
865 | | osModURL += pszAfterOpenGIS; |
866 | | } |
867 | | } |
868 | | else |
869 | | { |
870 | | osModURL = URL; |
871 | | } |
872 | | |
873 | | char *pszSchema = CPLLoadSchemaStr(osModURL); |
874 | | if (!pszSchema) |
875 | | return nullptr; |
876 | | |
877 | | xmlParserInputPtr parser = xmlNewStringInputStream( |
878 | | context, reinterpret_cast<const xmlChar *>(pszSchema)); |
879 | | CPLFree(pszSchema); |
880 | | |
881 | | return parser; |
882 | | } |
883 | | |
884 | | /************************************************************************/ |
885 | | /* CPLLibXMLWarningErrorCallback() */ |
886 | | /************************************************************************/ |
887 | | |
888 | | static void CPLLibXMLWarningErrorCallback(void *ctx, const char *msg, ...) |
889 | | { |
890 | | va_list varg; |
891 | | va_start(varg, msg); |
892 | | |
893 | | char *pszStr = reinterpret_cast<char *>(va_arg(varg, char *)); |
894 | | |
895 | | if (strstr(pszStr, "since this namespace was already imported") == nullptr) |
896 | | { |
897 | | const xmlError *pErrorPtr = xmlGetLastError(); |
898 | | const char *pszFilename = static_cast<char *>(ctx); |
899 | | char *pszStrDup = CPLStrdup(pszStr); |
900 | | int nLen = static_cast<int>(strlen(pszStrDup)); |
901 | | if (nLen > 0 && pszStrDup[nLen - 1] == '\n') |
902 | | pszStrDup[nLen - 1] = '\0'; |
903 | | if (pszFilename != nullptr && pszFilename[0] != '<') |
904 | | { |
905 | | CPLError(CE_Failure, CPLE_AppDefined, "libXML: %s:%d: %s", |
906 | | pszFilename, pErrorPtr ? pErrorPtr->line : 0, pszStrDup); |
907 | | } |
908 | | else |
909 | | { |
910 | | CPLError(CE_Failure, CPLE_AppDefined, "libXML: %d: %s", |
911 | | pErrorPtr ? pErrorPtr->line : 0, pszStrDup); |
912 | | } |
913 | | CPLFree(pszStrDup); |
914 | | } |
915 | | |
916 | | va_end(varg); |
917 | | } |
918 | | |
919 | | /************************************************************************/ |
920 | | /* CPLLoadContentFromFile() */ |
921 | | /************************************************************************/ |
922 | | |
923 | | static char *CPLLoadContentFromFile(const char *pszFilename) |
924 | | { |
925 | | VSILFILE *fp = VSIFOpenL(pszFilename, "rb"); |
926 | | if (fp == nullptr) |
927 | | return nullptr; |
928 | | if (VSIFSeekL(fp, 0, SEEK_END) != 0) |
929 | | { |
930 | | CPL_IGNORE_RET_VAL(VSIFCloseL(fp)); |
931 | | return nullptr; |
932 | | } |
933 | | vsi_l_offset nSize = VSIFTellL(fp); |
934 | | if (VSIFSeekL(fp, 0, SEEK_SET) != 0) |
935 | | { |
936 | | CPL_IGNORE_RET_VAL(VSIFCloseL(fp)); |
937 | | return nullptr; |
938 | | } |
939 | | if (static_cast<vsi_l_offset>(static_cast<int>(nSize)) != nSize || |
940 | | nSize > INT_MAX - 1) |
941 | | { |
942 | | CPL_IGNORE_RET_VAL(VSIFCloseL(fp)); |
943 | | return nullptr; |
944 | | } |
945 | | char *pszBuffer = |
946 | | static_cast<char *>(VSIMalloc(static_cast<size_t>(nSize) + 1)); |
947 | | if (pszBuffer == nullptr) |
948 | | { |
949 | | CPL_IGNORE_RET_VAL(VSIFCloseL(fp)); |
950 | | return nullptr; |
951 | | } |
952 | | if (static_cast<size_t>(VSIFReadL(pszBuffer, 1, static_cast<size_t>(nSize), |
953 | | fp)) != static_cast<size_t>(nSize)) |
954 | | { |
955 | | VSIFree(pszBuffer); |
956 | | CPL_IGNORE_RET_VAL(VSIFCloseL(fp)); |
957 | | return nullptr; |
958 | | } |
959 | | pszBuffer[nSize] = '\0'; |
960 | | CPL_IGNORE_RET_VAL(VSIFCloseL(fp)); |
961 | | return pszBuffer; |
962 | | } |
963 | | |
964 | | /************************************************************************/ |
965 | | /* CPLLoadXMLSchema() */ |
966 | | /************************************************************************/ |
967 | | |
968 | | typedef void *CPLXMLSchemaPtr; |
969 | | |
970 | | /** |
971 | | * \brief Load a XSD schema. |
972 | | * |
973 | | * The return value should be freed with CPLFreeXMLSchema(). |
974 | | * |
975 | | * @param pszXSDFilename XSD schema to load. |
976 | | * @return a handle to the parsed XML schema, or NULL in case of failure. |
977 | | * |
978 | | * @since GDAL 1.10.0 |
979 | | */ |
980 | | |
981 | | static CPLXMLSchemaPtr CPLLoadXMLSchema(const char *pszXSDFilename) |
982 | | { |
983 | | char *pszStr = CPLLoadSchemaStr(pszXSDFilename); |
984 | | if (pszStr == nullptr) |
985 | | return nullptr; |
986 | | |
987 | | xmlExternalEntityLoader pfnLibXMLOldExtranerEntityLoaderLocal = nullptr; |
988 | | pfnLibXMLOldExtranerEntityLoaderLocal = xmlGetExternalEntityLoader(); |
989 | | pfnLibXMLOldExtranerEntityLoader = pfnLibXMLOldExtranerEntityLoaderLocal; |
990 | | xmlSetExternalEntityLoader(CPLExternalEntityLoader); |
991 | | |
992 | | xmlSchemaParserCtxtPtr pSchemaParserCtxt = |
993 | | xmlSchemaNewMemParserCtxt(pszStr, static_cast<int>(strlen(pszStr))); |
994 | | |
995 | | xmlSchemaSetParserErrors(pSchemaParserCtxt, CPLLibXMLWarningErrorCallback, |
996 | | CPLLibXMLWarningErrorCallback, nullptr); |
997 | | |
998 | | xmlSchemaPtr pSchema = xmlSchemaParse(pSchemaParserCtxt); |
999 | | xmlSchemaFreeParserCtxt(pSchemaParserCtxt); |
1000 | | |
1001 | | xmlSetExternalEntityLoader(pfnLibXMLOldExtranerEntityLoaderLocal); |
1002 | | |
1003 | | CPLFree(pszStr); |
1004 | | |
1005 | | return static_cast<CPLXMLSchemaPtr>(pSchema); |
1006 | | } |
1007 | | |
1008 | | /************************************************************************/ |
1009 | | /* CPLFreeXMLSchema() */ |
1010 | | /************************************************************************/ |
1011 | | |
1012 | | /** |
1013 | | * \brief Free a XSD schema. |
1014 | | * |
1015 | | * @param pSchema a handle to the parsed XML schema. |
1016 | | * |
1017 | | * @since GDAL 1.10.0 |
1018 | | */ |
1019 | | |
1020 | | static void CPLFreeXMLSchema(CPLXMLSchemaPtr pSchema) |
1021 | | { |
1022 | | if (pSchema) |
1023 | | xmlSchemaFree(static_cast<xmlSchemaPtr>(pSchema)); |
1024 | | } |
1025 | | |
1026 | | /************************************************************************/ |
1027 | | /* CPLValidateXML() */ |
1028 | | /************************************************************************/ |
1029 | | |
1030 | | /** |
1031 | | * \brief Validate a XML file against a XML schema. |
1032 | | * |
1033 | | * @param pszXMLFilename the filename of the XML file to validate. |
1034 | | * @param pszXSDFilename the filename of the XSD schema. |
1035 | | * @param papszOptions unused for now. Set to NULL. |
1036 | | * @return TRUE if the XML file validates against the XML schema. |
1037 | | * |
1038 | | * @since GDAL 1.10.0 |
1039 | | */ |
1040 | | |
1041 | | int CPLValidateXML(const char *pszXMLFilename, const char *pszXSDFilename, |
1042 | | CPL_UNUSED CSLConstList papszOptions) |
1043 | | { |
1044 | | char szHeader[2048] = {}; // TODO(schwehr): Get this off of the stack. |
1045 | | CPLString osTmpXSDFilename; |
1046 | | |
1047 | | if (pszXMLFilename[0] == '<') |
1048 | | { |
1049 | | strncpy(szHeader, pszXMLFilename, sizeof(szHeader)); |
1050 | | szHeader[sizeof(szHeader) - 1] = '\0'; |
1051 | | } |
1052 | | else |
1053 | | { |
1054 | | VSILFILE *fpXML = VSIFOpenL(pszXMLFilename, "rb"); |
1055 | | if (fpXML == nullptr) |
1056 | | { |
1057 | | CPLError(CE_Failure, CPLE_OpenFailed, "Cannot open %s", |
1058 | | pszXMLFilename); |
1059 | | return FALSE; |
1060 | | } |
1061 | | const vsi_l_offset nRead = |
1062 | | VSIFReadL(szHeader, 1, sizeof(szHeader) - 1, fpXML); |
1063 | | szHeader[nRead] = '\0'; |
1064 | | CPL_IGNORE_RET_VAL(VSIFCloseL(fpXML)); |
1065 | | } |
1066 | | |
1067 | | // Workaround following bug: |
1068 | | // |
1069 | | // "element FeatureCollection: Schemas validity error : Element |
1070 | | // '{http://www.opengis.net/wfs}FeatureCollection': No matching global |
1071 | | // declaration available for the validation root" |
1072 | | // |
1073 | | // We create a wrapping XSD that imports the WFS .xsd (and possibly the GML |
1074 | | // .xsd too) and the application schema. This is a known libxml2 |
1075 | | // limitation. |
1076 | | if (strstr(szHeader, "<wfs:FeatureCollection") || |
1077 | | (strstr(szHeader, "<FeatureCollection") && |
1078 | | strstr(szHeader, "xmlns:wfs=\"http://www.opengis.net/wfs\""))) |
1079 | | { |
1080 | | const char *pszWFSSchemaNamespace = "http://www.opengis.net/wfs"; |
1081 | | const char *pszWFSSchemaLocation = nullptr; |
1082 | | const char *pszGMLSchemaLocation = nullptr; |
1083 | | if (strstr(szHeader, "wfs/1.0.0/WFS-basic.xsd")) |
1084 | | { |
1085 | | pszWFSSchemaLocation = |
1086 | | "http://schemas.opengis.net/wfs/1.0.0/WFS-basic.xsd"; |
1087 | | } |
1088 | | else if (strstr(szHeader, "wfs/1.1.0/wfs.xsd")) |
1089 | | { |
1090 | | pszWFSSchemaLocation = |
1091 | | "http://schemas.opengis.net/wfs/1.1.0/wfs.xsd"; |
1092 | | } |
1093 | | else if (strstr(szHeader, "wfs/2.0/wfs.xsd")) |
1094 | | { |
1095 | | pszWFSSchemaNamespace = "http://www.opengis.net/wfs/2.0"; |
1096 | | pszWFSSchemaLocation = "http://schemas.opengis.net/wfs/2.0/wfs.xsd"; |
1097 | | } |
1098 | | |
1099 | | VSILFILE *fpXSD = VSIFOpenL(pszXSDFilename, "rb"); |
1100 | | if (fpXSD == nullptr) |
1101 | | { |
1102 | | CPLError(CE_Failure, CPLE_OpenFailed, "Cannot open %s", |
1103 | | pszXSDFilename); |
1104 | | return FALSE; |
1105 | | } |
1106 | | const vsi_l_offset nRead = |
1107 | | VSIFReadL(szHeader, 1, sizeof(szHeader) - 1, fpXSD); |
1108 | | szHeader[nRead] = '\0'; |
1109 | | CPL_IGNORE_RET_VAL(VSIFCloseL(fpXSD)); |
1110 | | |
1111 | | if (strstr(szHeader, "gml/3.1.1") != nullptr && |
1112 | | strstr(szHeader, "gml/3.1.1/base/gml.xsd") == nullptr) |
1113 | | { |
1114 | | pszGMLSchemaLocation = |
1115 | | "http://schemas.opengis.net/gml/3.1.1/base/gml.xsd"; |
1116 | | } |
1117 | | |
1118 | | if (pszWFSSchemaLocation != nullptr) |
1119 | | { |
1120 | | osTmpXSDFilename = CPLSPrintf("/vsimem/CPLValidateXML_%p_%p.xsd", |
1121 | | pszXMLFilename, pszXSDFilename); |
1122 | | char *const pszEscapedXSDFilename = |
1123 | | CPLEscapeString(pszXSDFilename, -1, CPLES_XML); |
1124 | | VSILFILE *const fpMEM = VSIFOpenL(osTmpXSDFilename, "wb"); |
1125 | | CPL_IGNORE_RET_VAL(VSIFPrintfL( |
1126 | | fpMEM, |
1127 | | "<xs:schema xmlns:xs=\"http://www.w3.org/2001/XMLSchema\">\n")); |
1128 | | CPL_IGNORE_RET_VAL(VSIFPrintfL( |
1129 | | fpMEM, |
1130 | | " <xs:import namespace=\"%s\" schemaLocation=\"%s\"/>\n", |
1131 | | pszWFSSchemaNamespace, pszWFSSchemaLocation)); |
1132 | | CPL_IGNORE_RET_VAL(VSIFPrintfL( |
1133 | | fpMEM, |
1134 | | " <xs:import namespace=\"ignored\" schemaLocation=\"%s\"/>\n", |
1135 | | pszEscapedXSDFilename)); |
1136 | | if (pszGMLSchemaLocation) |
1137 | | CPL_IGNORE_RET_VAL(VSIFPrintfL( |
1138 | | fpMEM, |
1139 | | " <xs:import namespace=\"http://www.opengis.net/gml\" " |
1140 | | "schemaLocation=\"%s\"/>\n", |
1141 | | pszGMLSchemaLocation)); |
1142 | | CPL_IGNORE_RET_VAL(VSIFPrintfL(fpMEM, "</xs:schema>\n")); |
1143 | | CPL_IGNORE_RET_VAL(VSIFCloseL(fpMEM)); |
1144 | | CPLFree(pszEscapedXSDFilename); |
1145 | | } |
1146 | | } |
1147 | | |
1148 | | CPLXMLSchemaPtr pSchema = CPLLoadXMLSchema( |
1149 | | !osTmpXSDFilename.empty() ? osTmpXSDFilename.c_str() : pszXSDFilename); |
1150 | | if (!osTmpXSDFilename.empty()) |
1151 | | VSIUnlink(osTmpXSDFilename); |
1152 | | if (pSchema == nullptr) |
1153 | | return FALSE; |
1154 | | |
1155 | | xmlSchemaValidCtxtPtr pSchemaValidCtxt = |
1156 | | xmlSchemaNewValidCtxt(static_cast<xmlSchemaPtr>(pSchema)); |
1157 | | |
1158 | | if (pSchemaValidCtxt == nullptr) |
1159 | | { |
1160 | | CPLFreeXMLSchema(pSchema); |
1161 | | return FALSE; |
1162 | | } |
1163 | | |
1164 | | xmlSchemaSetValidErrors(pSchemaValidCtxt, CPLLibXMLWarningErrorCallback, |
1165 | | CPLLibXMLWarningErrorCallback, |
1166 | | const_cast<char *>(pszXMLFilename)); |
1167 | | |
1168 | | bool bValid = false; |
1169 | | if (pszXMLFilename[0] == '<') |
1170 | | { |
1171 | | xmlDocPtr pDoc = |
1172 | | xmlParseDoc(reinterpret_cast<const xmlChar *>(pszXMLFilename)); |
1173 | | if (pDoc != nullptr) |
1174 | | { |
1175 | | bValid = xmlSchemaValidateDoc(pSchemaValidCtxt, pDoc) == 0; |
1176 | | } |
1177 | | xmlFreeDoc(pDoc); |
1178 | | } |
1179 | | else if (!STARTS_WITH(pszXMLFilename, "/vsi")) |
1180 | | { |
1181 | | bValid = |
1182 | | xmlSchemaValidateFile(pSchemaValidCtxt, pszXMLFilename, 0) == 0; |
1183 | | } |
1184 | | else |
1185 | | { |
1186 | | char *pszXML = CPLLoadContentFromFile(pszXMLFilename); |
1187 | | if (pszXML != nullptr) |
1188 | | { |
1189 | | xmlDocPtr pDoc = |
1190 | | xmlParseDoc(reinterpret_cast<const xmlChar *>(pszXML)); |
1191 | | if (pDoc != nullptr) |
1192 | | { |
1193 | | bValid = xmlSchemaValidateDoc(pSchemaValidCtxt, pDoc) == 0; |
1194 | | } |
1195 | | xmlFreeDoc(pDoc); |
1196 | | } |
1197 | | CPLFree(pszXML); |
1198 | | } |
1199 | | xmlSchemaFreeValidCtxt(pSchemaValidCtxt); |
1200 | | CPLFreeXMLSchema(pSchema); |
1201 | | |
1202 | | return bValid; |
1203 | | } |
1204 | | |
1205 | | #else // HAVE_RECENT_LIBXML2 |
1206 | | |
1207 | | /************************************************************************/ |
1208 | | /* CPLValidateXML() */ |
1209 | | /************************************************************************/ |
1210 | | |
1211 | | int CPLValidateXML(const char * /* pszXMLFilename */, |
1212 | | const char * /* pszXSDFilename */, |
1213 | | CSLConstList /* papszOptions */) |
1214 | 0 | { |
1215 | 0 | CPLError(CE_Failure, CPLE_NotSupported, |
1216 | 0 | "%s not implemented due to missing libxml2 support", |
1217 | 0 | "CPLValidateXML()"); |
1218 | 0 | return FALSE; |
1219 | 0 | } |
1220 | | |
1221 | | #endif // HAVE_RECENT_LIBXML2 |