/src/gdal/ogr/ogrsf_frmts/gmlas/ogr_gmlas.h
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * Project: OGR |
3 | | * Purpose: OGRGMLASDriver implementation |
4 | | * Author: Even Rouault, <even dot rouault at spatialys dot com> |
5 | | * |
6 | | * Initial development funded by the European Earth observation programme |
7 | | * Copernicus |
8 | | * |
9 | | ****************************************************************************** |
10 | | * Copyright (c) 2016, Even Rouault, <even dot rouault at spatialys dot com> |
11 | | * |
12 | | * SPDX-License-Identifier: MIT |
13 | | ****************************************************************************/ |
14 | | |
15 | | #ifndef OGR_GMLAS_INCLUDED |
16 | | #define OGR_GMLAS_INCLUDED |
17 | | |
18 | | #include "xercesc_headers.h" |
19 | | #include "ogr_xerces.h" |
20 | | |
21 | | #include "cpl_vsi_virtual.h" |
22 | | #include "gdal_priv.h" |
23 | | #include "ogrsf_frmts.h" |
24 | | |
25 | | #include "ogr_gmlas_consts.h" |
26 | | |
27 | | #include <list> |
28 | | #include <set> |
29 | | #include <map> |
30 | | #include <vector> |
31 | | |
32 | | typedef std::pair<CPLString, CPLString> PairURIFilename; |
33 | | |
34 | | typedef enum |
35 | | { |
36 | | GMLAS_SWAP_AUTO, |
37 | | GMLAS_SWAP_YES, |
38 | | GMLAS_SWAP_NO, |
39 | | } GMLASSwapCoordinatesEnum; |
40 | | |
41 | | GDALDataset *OGRGMLASDriverCreateCopy(const char *pszFilename, |
42 | | GDALDataset *poSrcDS, int /*bStrict*/, |
43 | | char **papszOptions, |
44 | | GDALProgressFunc pfnProgress, |
45 | | void *pProgressData); |
46 | | |
47 | | /************************************************************************/ |
48 | | /* IGMLASInputSourceClosing */ |
49 | | /************************************************************************/ |
50 | | |
51 | | class IGMLASInputSourceClosing /* non final */ |
52 | | { |
53 | | public: |
54 | | virtual ~IGMLASInputSourceClosing(); |
55 | | |
56 | | virtual void notifyClosing(const CPLString &osFilename) = 0; |
57 | | }; |
58 | | |
59 | | /************************************************************************/ |
60 | | /* GMLASResourceCache */ |
61 | | /************************************************************************/ |
62 | | |
63 | | class GMLASResourceCache /* non final */ |
64 | | { |
65 | | protected: |
66 | | bool m_bHasCheckedCacheDirectory = false; |
67 | | std::string m_osCacheDirectory{}; |
68 | | bool m_bRefresh = false; |
69 | | bool m_bAllowDownload = false; |
70 | | std::set<std::string> m_aoSetRefreshedFiles{}; |
71 | | |
72 | | static bool |
73 | | RecursivelyCreateDirectoryIfNeeded(const std::string &osDirname); |
74 | | bool RecursivelyCreateDirectoryIfNeeded(); |
75 | | |
76 | | std::string GetCachedFilename(const std::string &osResource); |
77 | | |
78 | | public: |
79 | | void SetCacheDirectory(const std::string &osCacheDirectory); |
80 | | |
81 | | void SetRefreshMode(bool bRefresh) |
82 | 3.40k | { |
83 | 3.40k | m_bRefresh = bRefresh; |
84 | 3.40k | } |
85 | | |
86 | | void SetAllowDownload(bool bVal) |
87 | 2.44k | { |
88 | 2.44k | m_bAllowDownload = bVal; |
89 | 2.44k | } |
90 | | }; |
91 | | |
92 | | /************************************************************************/ |
93 | | /* GMLASXSDCache */ |
94 | | /************************************************************************/ |
95 | | |
96 | | class GMLASXSDCache final : public GMLASResourceCache |
97 | | { |
98 | | bool CacheAllGML321(); |
99 | | bool CacheAllISO20070417(); |
100 | | |
101 | | public: |
102 | | VSILFILE *Open(const std::string &osResource, const std::string &osBasePath, |
103 | | std::string &osOutFilename); |
104 | | }; |
105 | | |
106 | | /************************************************************************/ |
107 | | /* GMLASBaseEntityResolver */ |
108 | | /************************************************************************/ |
109 | | |
110 | | class GMLASBaseEntityResolver /* non final*/ : public EntityResolver, |
111 | | public IGMLASInputSourceClosing |
112 | | { |
113 | | protected: |
114 | | std::vector<CPLString> m_aosPathStack{}; |
115 | | GMLASXSDCache &m_oCache; |
116 | | CPLString m_osGMLVersionFound{}; |
117 | | std::set<CPLString> m_oSetSchemaURLs{}; |
118 | | bool m_bFoundNonOfficialGMLSchemaLocation = false; |
119 | | bool m_bSubstituteWithOGCSchemaLocation = false; |
120 | | |
121 | | public: |
122 | | GMLASBaseEntityResolver(const CPLString &osBasePath, GMLASXSDCache &oCache); |
123 | | ~GMLASBaseEntityResolver() override; |
124 | | |
125 | | void SetBasePath(const CPLString &osBasePath); |
126 | | |
127 | | const CPLString &GetGMLVersionFound() const |
128 | 1.03k | { |
129 | 1.03k | return m_osGMLVersionFound; |
130 | 1.03k | } |
131 | | |
132 | | const std::set<CPLString> &GetSchemaURLS() const |
133 | 1.03k | { |
134 | 1.03k | return m_oSetSchemaURLs; |
135 | 1.03k | } |
136 | | |
137 | | void SetSubstituteWithOGCSchemaLocation(bool b) |
138 | 1.09k | { |
139 | 1.09k | m_bSubstituteWithOGCSchemaLocation = b; |
140 | 1.09k | } |
141 | | |
142 | | bool GetFoundNonOfficialGMLSchemaLocation() const |
143 | 1.09k | { |
144 | 1.09k | return m_bFoundNonOfficialGMLSchemaLocation; |
145 | 1.09k | } |
146 | | |
147 | | void notifyClosing(const CPLString &osFilename) override; |
148 | | virtual InputSource *resolveEntity(const XMLCh *const publicId, |
149 | | const XMLCh *const systemId) override; |
150 | | |
151 | | virtual void |
152 | | DoExtraSchemaProcessing(const CPLString &osFilename, |
153 | | const std::shared_ptr<VSIVirtualHandle> &fp); |
154 | | }; |
155 | | |
156 | | /************************************************************************/ |
157 | | /* GMLASInputSource */ |
158 | | /************************************************************************/ |
159 | | |
160 | | class GMLASInputSource final : public InputSource |
161 | | { |
162 | | std::shared_ptr<VSIVirtualHandle> m_fp{}; |
163 | | int m_nCounter = 0; |
164 | | int *m_pnCounter = nullptr; |
165 | | CPLString m_osFilename{}; |
166 | | IGMLASInputSourceClosing *m_cbk = nullptr; |
167 | | |
168 | | CPL_DISALLOW_COPY_ASSIGN(GMLASInputSource) |
169 | | |
170 | | public: |
171 | | GMLASInputSource( |
172 | | const char *pszFilename, const std::shared_ptr<VSIVirtualHandle> &fp, |
173 | | MemoryManager *const manager = XMLPlatformUtils::fgMemoryManager); |
174 | | ~GMLASInputSource() override; |
175 | | |
176 | | BinInputStream *makeStream() const override; |
177 | | |
178 | | void SetClosingCallback(IGMLASInputSourceClosing *cbk); |
179 | | }; |
180 | | |
181 | | /************************************************************************/ |
182 | | /* GMLASErrorHandler */ |
183 | | /************************************************************************/ |
184 | | |
185 | | class GMLASErrorHandler final : public ErrorHandler |
186 | | { |
187 | | public: |
188 | 26.1k | GMLASErrorHandler() = default; |
189 | | |
190 | | void SetSchemaFullCheckingEnabled(bool b) |
191 | 13.5k | { |
192 | 13.5k | m_bSchemaFullChecking = b; |
193 | 13.5k | } |
194 | | |
195 | | void SetHandleMultipleImportsEnabled(bool b) |
196 | 13.5k | { |
197 | 13.5k | m_bHandleMultipleImports = b; |
198 | 13.5k | } |
199 | | |
200 | | void SetHideGMLTypeNotFound(bool b) |
201 | 6.30k | { |
202 | 6.30k | m_bHideGMLTypeNotFound = b; |
203 | 6.30k | } |
204 | | |
205 | | const std::string &GetGMLTypeNotFoundError() const |
206 | 2.25k | { |
207 | 2.25k | return m_osGMLTypeNotFoundError; |
208 | 2.25k | } |
209 | | |
210 | | bool hasFailed() const |
211 | 7.25k | { |
212 | 7.25k | return m_bFailed; |
213 | 7.25k | } |
214 | | |
215 | | void warning(const SAXParseException &e) override; |
216 | | void error(const SAXParseException &e) override; |
217 | | void fatalError(const SAXParseException &e) override; |
218 | | |
219 | | void resetErrors() override |
220 | 18.3k | { |
221 | 18.3k | m_bFailed = false; |
222 | 18.3k | } |
223 | | |
224 | | private: |
225 | | bool m_bFailed = false; |
226 | | bool m_bSchemaFullChecking = false; |
227 | | bool m_bHandleMultipleImports = false; |
228 | | bool m_bHideGMLTypeNotFound = false; |
229 | | std::string m_osGMLTypeNotFoundError{}; |
230 | | |
231 | | void handle(const SAXParseException &e, CPLErr eErr); |
232 | | }; |
233 | | |
234 | | /************************************************************************/ |
235 | | /* GMLASXLinkResolutionConf */ |
236 | | /************************************************************************/ |
237 | | |
238 | | class GMLASXLinkResolutionConf final |
239 | | { |
240 | | public: |
241 | | /* See data/gmlasconf.xsd for documentation of the fields */ |
242 | | |
243 | | typedef enum |
244 | | { |
245 | | RawContent, |
246 | | FieldsFromXPath |
247 | | } ResolutionMode; |
248 | | |
249 | | int m_nTimeOut = 0; |
250 | | |
251 | | int m_nMaxFileSize = MAX_FILE_SIZE_DEFAULT; |
252 | | |
253 | | int m_nMaxGlobalResolutionTime = 0; |
254 | | |
255 | | CPLString m_osProxyServerPort{}; |
256 | | |
257 | | CPLString m_osProxyUserPassword{}; |
258 | | |
259 | | CPLString m_osProxyAuth{}; |
260 | | |
261 | | CPLString m_osCacheDirectory{}; |
262 | | |
263 | | bool m_bDefaultResolutionEnabled = DEFAULT_RESOLUTION_ENABLED_DEFAULT; |
264 | | |
265 | | bool m_bDefaultAllowRemoteDownload = ALLOW_REMOTE_DOWNLOAD_DEFAULT; |
266 | | |
267 | | ResolutionMode m_eDefaultResolutionMode = RawContent; |
268 | | |
269 | | int m_nDefaultResolutionDepth = 1; |
270 | | |
271 | | bool m_bDefaultCacheResults = CACHE_RESULTS_DEFAULT; |
272 | | |
273 | | bool m_bResolveInternalXLinks = INTERNAL_XLINK_RESOLUTION_DEFAULT; |
274 | | |
275 | | struct URLSpecificResolution |
276 | | { |
277 | | CPLString m_osURLPrefix{}; |
278 | | |
279 | | std::vector<std::pair<CPLString, CPLString>> |
280 | | m_aosNameValueHTTPHeaders{}; |
281 | | |
282 | | bool m_bAllowRemoteDownload = false; |
283 | | |
284 | | ResolutionMode m_eResolutionMode = RawContent; |
285 | | |
286 | | int m_nResolutionDepth = 1; |
287 | | |
288 | | bool m_bCacheResults = false; |
289 | | |
290 | | struct XPathDerivedField |
291 | | { |
292 | | CPLString m_osName{}; |
293 | | |
294 | | CPLString m_osType{}; |
295 | | |
296 | | CPLString m_osXPath{}; |
297 | | }; |
298 | | |
299 | | std::vector<XPathDerivedField> m_aoFields{}; |
300 | | }; |
301 | | |
302 | | std::vector<URLSpecificResolution> m_aoURLSpecificRules{}; |
303 | | |
304 | | GMLASXLinkResolutionConf() = default; |
305 | | |
306 | | bool LoadFromXML(CPLXMLNode *psRoot); |
307 | | }; |
308 | | |
309 | | /************************************************************************/ |
310 | | /* GMLASConfiguration */ |
311 | | /************************************************************************/ |
312 | | |
313 | | class GMLASConfiguration final |
314 | | { |
315 | | public: |
316 | | /** Whether remote schemas are allowed to be download. */ |
317 | | bool m_bAllowRemoteSchemaDownload = ALLOW_REMOTE_SCHEMA_DOWNLOAD_DEFAULT; |
318 | | |
319 | | /** Whether a ogr_pkid attribute should always be generated. */ |
320 | | bool m_bAlwaysGenerateOGRId = ALWAYS_GENERATE_OGR_ID_DEFAULT; |
321 | | |
322 | | /** Whether to remove layers found to be unused in initial scan pass */ |
323 | | bool m_bRemoveUnusedLayers = REMOVE_UNUSED_LAYERS_DEFAULT; |
324 | | |
325 | | /** Whether to remove fields found to be unused in initial scan pass */ |
326 | | bool m_bRemoveUnusedFields = REMOVE_UNUSED_FIELDS_DEFAULT; |
327 | | |
328 | | /** Whether repeated strings, integers, reals should be in corresponding |
329 | | OGR array types. */ |
330 | | bool m_bUseArrays = USE_ARRAYS_DEFAULT; |
331 | | |
332 | | /** Whether OGR field null state should be used. */ |
333 | | bool m_bUseNullState = USE_NULL_STATE_DEFAULT; |
334 | | |
335 | | /** Whether geometries should be stored as XML in a OGR string field. */ |
336 | | bool m_bIncludeGeometryXML = INCLUDE_GEOMETRY_XML_DEFAULT; |
337 | | |
338 | | /** Whether, when dealing with schemas that import the |
339 | | GML namespace, and that at least one of them has |
340 | | elements that derive from gml:_Feature or |
341 | | gml:AbstractFeatureonly, only such elements should be |
342 | | instantiated as OGR layers, during the first pass that |
343 | | iterates over top level elements of the imported |
344 | | schemas. */ |
345 | | bool m_bInstantiateGMLFeaturesOnly = INSTANTIATE_GML_FEATURES_ONLY_DEFAULT; |
346 | | |
347 | | /** Maximum length of layer and field identifiers*/ |
348 | | int m_nIdentifierMaxLength = 0; |
349 | | |
350 | | /** Whether case insensitive comparison should be used for identifier |
351 | | * equality testing */ |
352 | | bool m_bCaseInsensitiveIdentifier = CASE_INSENSITIVE_IDENTIFIER_DEFAULT; |
353 | | |
354 | | /** Whether to launder identifiers like postgresql does */ |
355 | | bool m_bPGIdentifierLaundering = PG_IDENTIFIER_LAUNDERING_DEFAULT; |
356 | | |
357 | | /* Maximum number of fields in an element considered for flattening. */ |
358 | | int m_nMaximumFieldsForFlattening = MAXIMUM_FIELDS_FLATTENING_DEFAULT; |
359 | | |
360 | | /** Whether remote XSD schemas should be locally cached. */ |
361 | | bool m_bAllowXSDCache = ALLOW_XSD_CACHE_DEFAULT; |
362 | | |
363 | | /** Cache directory for cached XSD schemas. */ |
364 | | CPLString m_osXSDCacheDirectory{}; |
365 | | |
366 | | /** Whether to enable schema full checking. */ |
367 | | bool m_bSchemaFullChecking = SCHEMA_FULL_CHECKING_DEFAULT; |
368 | | |
369 | | /** Whether to allow multiple imports of the same namespace. */ |
370 | | bool m_bHandleMultipleImports = HANDLE_MULTIPLE_IMPORTS_DEFAULT; |
371 | | |
372 | | /** Whether validation of document against schema should be done. */ |
373 | | bool m_bValidate = VALIDATE_DEFAULT; |
374 | | |
375 | | /** Whether a validation error should prevent dataset opening. */ |
376 | | bool m_bFailIfValidationError = FAIL_IF_VALIDATION_ERROR_DEFAULT; |
377 | | |
378 | | /** Whether technical layers should be exposed. */ |
379 | | bool m_bExposeMetadataLayers = WARN_IF_EXCLUDED_XPATH_FOUND_DEFAULT; |
380 | | |
381 | | /** For flatening rules, map prefix namespace to its URI */ |
382 | | std::map<CPLString, CPLString> m_oMapPrefixToURIFlatteningRules{}; |
383 | | |
384 | | std::vector<CPLString> m_osForcedFlattenedXPath{}; |
385 | | |
386 | | std::vector<CPLString> m_osDisabledFlattenedXPath{}; |
387 | | |
388 | | enum SWEActivationMode |
389 | | { |
390 | | SWE_ACTIVATE_IF_NAMESPACE_FOUND, |
391 | | SWE_ACTIVATE_TRUE, |
392 | | SWE_ACTIVATE_FALSE |
393 | | }; |
394 | | |
395 | | /** If and when activate SWE special processings */ |
396 | | SWEActivationMode m_eSWEActivationMode = SWE_ACTIVATE_IF_NAMESPACE_FOUND; |
397 | | |
398 | | /** If enabling swe:DataRecord parsing */ |
399 | | bool m_bSWEProcessDataRecord = SWE_PROCESS_DATA_RECORD_DEFAULT; |
400 | | |
401 | | /** If enabling swe:DataArray parsing */ |
402 | | bool m_bSWEProcessDataArray = SWE_PROCESS_DATA_ARRAY_DEFAULT; |
403 | | |
404 | | /** For ignored xpaths, map prefix namespace to its URI */ |
405 | | std::map<CPLString, CPLString> m_oMapPrefixToURIIgnoredXPaths{}; |
406 | | |
407 | | /** Ignored xpaths */ |
408 | | std::vector<CPLString> m_aosIgnoredXPaths{}; |
409 | | |
410 | | /** For type constraints, map prefix namespace to its URI */ |
411 | | std::map<CPLString, CPLString> m_oMapPrefixToURITypeConstraints{}; |
412 | | |
413 | | /** Map an XPath to a list of potential types for its children */ |
414 | | std::map<CPLString, std::vector<CPLString>> |
415 | | m_oMapChildrenElementsConstraints{}; |
416 | | |
417 | | /* Beginning of Writer config */ |
418 | | |
419 | | /** Number of spaces for indentation */ |
420 | | int m_nIndentSize = INDENT_SIZE_DEFAULT; |
421 | | |
422 | | CPLString m_osComment{}; |
423 | | |
424 | | /** End of line format: "CRLF" or "LR" */ |
425 | | CPLString m_osLineFormat{}; |
426 | | |
427 | | /** "SHORT", "OGC_URN" or "OGC_URL" */ |
428 | | CPLString m_osSRSNameFormat = szSRSNAME_DEFAULT; |
429 | | |
430 | | /** "WFS2_FEATURECOLLECTION" or "GMLAS_FEATURECOLLECTION" */ |
431 | | CPLString m_osWrapping = szWFS2_FEATURECOLLECTION; |
432 | | |
433 | | /** XML datetime or empty for current time */ |
434 | | CPLString m_osTimestamp{}; |
435 | | |
436 | | /** Path or URL to OGC WFS 2.0 schema. */ |
437 | | CPLString m_osWFS20SchemaLocation = szWFS20_SCHEMALOCATION; |
438 | | |
439 | | /* End of Writer config */ |
440 | | |
441 | | /** Whether a warning should be emitted when an element or attribute is |
442 | | found in the document parsed, but ignored because of the ignored |
443 | | XPath defined. */ |
444 | | std::map<CPLString, bool> m_oMapIgnoredXPathToWarn{}; |
445 | | |
446 | | GMLASXLinkResolutionConf m_oXLinkResolution{}; |
447 | | |
448 | | GMLASConfiguration() = default; |
449 | | |
450 | | bool Load(const char *pszFilename); |
451 | | void Finalize(); |
452 | | |
453 | | static std::string GetDefaultConfFile(bool &bUnlinkAfterUse); |
454 | | }; |
455 | | |
456 | | /************************************************************************/ |
457 | | /* GMLASXLinkResolver */ |
458 | | /************************************************************************/ |
459 | | |
460 | | class GMLASXLinkResolver final : public GMLASResourceCache |
461 | | { |
462 | | GMLASXLinkResolutionConf m_oConf{}; |
463 | | int m_nGlobalResolutionTime = 0; |
464 | | |
465 | | std::map<CPLString, CPLString> m_oMapURLToContent{}; |
466 | | std::map<size_t, std::vector<CPLString>> m_oMapFileSizeToURLs{}; |
467 | | size_t m_nMaxRAMCacheSize = 0; |
468 | | size_t m_nCurrentRAMCacheSize = 0; |
469 | | |
470 | | CPLString FetchRawContent(const CPLString &osURL, const char *pszHeaders); |
471 | | |
472 | | CPLString GetRawContent(const CPLString &osURL, const char *pszHeaders, |
473 | | bool bAllowRemoteDownload, bool bCacheResults); |
474 | | |
475 | | public: |
476 | | GMLASXLinkResolver(); |
477 | | |
478 | | void SetConf(const GMLASXLinkResolutionConf &oConf); |
479 | | |
480 | | const GMLASXLinkResolutionConf &GetConf() const |
481 | 9.68k | { |
482 | 9.68k | return m_oConf; |
483 | 9.68k | } |
484 | | |
485 | | bool IsRawContentResolutionEnabled() const; |
486 | | int GetMatchingResolutionRule(const CPLString &osURL) const; |
487 | | CPLString GetRawContent(const CPLString &osURL); |
488 | | CPLString GetRawContentForRule(const CPLString &osURL, int nIdxRule); |
489 | | }; |
490 | | |
491 | | /************************************************************************/ |
492 | | /* GMLASXPathMatcher */ |
493 | | /************************************************************************/ |
494 | | |
495 | | /** Object to compares a user provided XPath against a set of test XPaths */ |
496 | | class GMLASXPathMatcher final |
497 | | { |
498 | | struct XPathComponent |
499 | | { |
500 | | CPLString m_osValue{}; |
501 | | bool m_bDirectChild = false; |
502 | | }; |
503 | | |
504 | | /** For reference xpaths, map prefix namespace to its URI */ |
505 | | std::map<CPLString, CPLString> m_oMapPrefixToURIReferenceXPaths{}; |
506 | | |
507 | | /** Reference xpaths */ |
508 | | std::vector<CPLString> m_aosReferenceXPathsUncompiled{}; |
509 | | |
510 | | /** Reference xpaths "compiled" */ |
511 | | std::vector<std::vector<XPathComponent>> m_aosReferenceXPaths{}; |
512 | | |
513 | | static bool MatchesRefXPath(const CPLString &osXPath, |
514 | | const std::vector<XPathComponent> &oRefXPath); |
515 | | |
516 | | public: |
517 | | void SetRefXPaths( |
518 | | const std::map<CPLString, CPLString> &oMapPrefixToURIReferenceXPaths, |
519 | | const std::vector<CPLString> &aosReferenceXPaths); |
520 | | |
521 | | void SetDocumentMapURIToPrefix( |
522 | | const std::map<CPLString, CPLString> &oMapURIToPrefix); |
523 | | |
524 | | /** Return true if osXPath matches one of the XPath of |
525 | | m_aosReferenceXPaths */ |
526 | | bool MatchesRefXPath(const CPLString &osXPath, |
527 | | CPLString &osOutMatchedXPath) const; |
528 | | |
529 | | const std::map<CPLString, CPLString> &GetMapPrefixToURI() const |
530 | 0 | { |
531 | 0 | return m_oMapPrefixToURIReferenceXPaths; |
532 | 0 | } |
533 | | }; |
534 | | |
535 | | /************************************************************************/ |
536 | | /* GMLASFieldType */ |
537 | | /************************************************************************/ |
538 | | |
539 | | /** Enumeration for XML primitive types */ |
540 | | typedef enum |
541 | | { |
542 | | GMLAS_FT_STRING, |
543 | | GMLAS_FT_ID, |
544 | | GMLAS_FT_BOOLEAN, |
545 | | GMLAS_FT_SHORT, |
546 | | GMLAS_FT_INT32, |
547 | | GMLAS_FT_INT64, |
548 | | GMLAS_FT_FLOAT, |
549 | | GMLAS_FT_DOUBLE, |
550 | | GMLAS_FT_DECIMAL, |
551 | | GMLAS_FT_DATE, |
552 | | GMLAS_FT_GYEAR, |
553 | | GMLAS_FT_GYEAR_MONTH, |
554 | | GMLAS_FT_TIME, |
555 | | GMLAS_FT_DATETIME, |
556 | | GMLAS_FT_BASE64BINARY, |
557 | | GMLAS_FT_HEXBINARY, |
558 | | GMLAS_FT_ANYURI, |
559 | | GMLAS_FT_ANYTYPE, |
560 | | GMLAS_FT_ANYSIMPLETYPE, |
561 | | GMLAS_FT_GEOMETRY, // this one isn't a XML primitive type. |
562 | | } GMLASFieldType; |
563 | | |
564 | | /************************************************************************/ |
565 | | /* GMLASField */ |
566 | | /************************************************************************/ |
567 | | |
568 | | class GMLASField final |
569 | | { |
570 | | public: |
571 | | typedef enum |
572 | | { |
573 | | /** Field that is going to be instantiated as a OGR field */ |
574 | | REGULAR, |
575 | | |
576 | | /** Non-instanciable field. The corresponding element to the XPath |
577 | | is stored in a child layer that will reference back to the |
578 | | main layer. */ |
579 | | PATH_TO_CHILD_ELEMENT_NO_LINK, |
580 | | |
581 | | /** Field that will store the PKID of a child element */ |
582 | | PATH_TO_CHILD_ELEMENT_WITH_LINK, |
583 | | |
584 | | /** Non-instanciable field. The corresponding element to the XPath |
585 | | is stored in a child layer. And the link between both will be |
586 | | done through a junction table. */ |
587 | | PATH_TO_CHILD_ELEMENT_WITH_JUNCTION_TABLE, |
588 | | |
589 | | /** Non-instanciable field. Corresponds to a group of an element. */ |
590 | | GROUP |
591 | | } Category; |
592 | | |
593 | | private: |
594 | | CPLString m_osName{}; /**< Field name */ |
595 | | GMLASFieldType m_eType = GMLAS_FT_STRING; /**< Field type */ |
596 | | OGRwkbGeometryType m_eGeomType = wkbNone; /**< Field geometry type */ |
597 | | CPLString m_osTypeName{}; /**< Original XSD type */ |
598 | | int m_nWidth = 0; /**< Field width */ |
599 | | bool m_bNotNullable = false; /**< If the field is not nullable */ |
600 | | |
601 | | /** If the field is an array (from OGR types point of view) */ |
602 | | bool m_bArray = false; |
603 | | bool m_bList = false; /**< If the field is a list (a xs:list) */ |
604 | | |
605 | | /** Category of the field. */ |
606 | | Category m_eCategory = REGULAR; |
607 | | |
608 | | /** XPath of the field. */ |
609 | | CPLString m_osXPath{}; |
610 | | |
611 | | /** Set of XPath that are linked to this field. |
612 | | This is used for cases where a gml:AbstractGeometry element is |
613 | | referenced. In which case all possible realizations of this |
614 | | element are listed. Will be used with eType == GMLAS_FT_ANYTYPE |
615 | | to store XML blob on parsing. */ |
616 | | std::vector<CPLString> m_aosXPath{}; |
617 | | |
618 | | CPLString m_osFixedValue{}; /**< Value of fixed='' attribute */ |
619 | | CPLString m_osDefaultValue{}; /**< Value of default='' attribute */ |
620 | | |
621 | | /** Minimum number of occurrences. Might be -1 if unset */ |
622 | | int m_nMinOccurs = -1; |
623 | | |
624 | | /** Maximum number of occurrences, or MAXOCCURS_UNLIMITED. Might be |
625 | | -1 if unset. */ |
626 | | int m_nMaxOccurs = -1; |
627 | | |
628 | | /** For a PATH_TO_CHILD_ELEMENT_NO_LINK, whether maxOccurs>1 is on the |
629 | | sequence rather than on the element */ |
630 | | bool m_bRepetitionOnSequence = false; |
631 | | |
632 | | /** In case of m_eType == GMLAS_FT_ANYTYPE whether the current element |
633 | | must be stored in the XML blob (if false, only its children) */ |
634 | | bool m_bIncludeThisEltInBlob = false; |
635 | | |
636 | | /** Only used for PATH_TO_CHILD_ELEMENT_WITH_JUNCTION_TABLE. The XPath |
637 | | of the abstract element (the concrete XPath is in m_osXPath). |
638 | | e.g myns:mainElt/myns:subEltAbstract whereas the concrete XPath |
639 | | is myns:mainElt/myns:subEltRealization */ |
640 | | CPLString m_osAbstractElementXPath{}; |
641 | | |
642 | | /** Only used for PATH_TO_CHILD_ELEMENT_WITH_LINK and |
643 | | PATH_TO_CHILD_ELEMENT_WITH_JUNCTION_TABLE (and also for |
644 | | PATH_TO_CHILD_ELEMENT_NO_LINK and GROUP but for metadata layers only). |
645 | | The XPath of the child element. */ |
646 | | CPLString m_osRelatedClassXPath{}; |
647 | | |
648 | | /** Only use for PATH_TO_CHILD_ELEMENT_WITH_JUNCTION_TABLE. Name of |
649 | | the junction layer to consult for this field. Only used by |
650 | | writer code. */ |
651 | | CPLString m_osJunctionLayer{}; |
652 | | |
653 | | /** Dirty hack to register attributes with fixed values, despite being |
654 | | in the XPath ignored list. Needed to avoid warning when doing validation |
655 | | */ |
656 | | bool m_bIgnored = false; |
657 | | |
658 | | /** Documentation from schema */ |
659 | | CPLString m_osDoc{}; |
660 | | |
661 | | /** For elements within xs:choice */ |
662 | | bool m_bMayAppearOutOfOrder = false; |
663 | | |
664 | | public: |
665 | 327k | GMLASField() = default; |
666 | | |
667 | | void SetName(const CPLString &osName) |
668 | 3.49M | { |
669 | 3.49M | m_osName = osName; |
670 | 3.49M | } |
671 | | |
672 | | void SetType(GMLASFieldType eType, const char *pszTypeName); |
673 | | |
674 | | void SetGeomType(OGRwkbGeometryType eGeomType) |
675 | 18 | { |
676 | 18 | m_eGeomType = eGeomType; |
677 | 18 | } |
678 | | |
679 | | void SetWidth(int nWidth) |
680 | 49.0k | { |
681 | 49.0k | m_nWidth = nWidth; |
682 | 49.0k | } |
683 | | |
684 | | void SetNotNullable(bool bNotNullable) |
685 | 56.4k | { |
686 | 56.4k | m_bNotNullable = bNotNullable; |
687 | 56.4k | } |
688 | | |
689 | | void SetArray(bool bArray) |
690 | 6 | { |
691 | 6 | m_bArray = bArray; |
692 | 6 | } |
693 | | |
694 | | void SetList(bool bList) |
695 | 0 | { |
696 | 0 | m_bList = bList; |
697 | 0 | } |
698 | | |
699 | | void SetXPath(const CPLString &osXPath) |
700 | 327k | { |
701 | 327k | m_osXPath = osXPath; |
702 | 327k | } |
703 | | |
704 | | void AddAlternateXPath(const CPLString &osXPath) |
705 | 26 | { |
706 | 26 | m_aosXPath.push_back(osXPath); |
707 | 26 | } |
708 | | |
709 | | void SetFixedValue(const CPLString &osFixedValue) |
710 | 40 | { |
711 | 40 | m_osFixedValue = osFixedValue; |
712 | 40 | } |
713 | | |
714 | | void SetDefaultValue(const CPLString &osDefaultValue) |
715 | 0 | { |
716 | 0 | m_osDefaultValue = osDefaultValue; |
717 | 0 | } |
718 | | |
719 | | void SetCategory(Category eCategory) |
720 | 218k | { |
721 | 218k | m_eCategory = eCategory; |
722 | 218k | } |
723 | | |
724 | | void SetMinOccurs(int nMinOccurs) |
725 | 308k | { |
726 | 308k | m_nMinOccurs = nMinOccurs; |
727 | 308k | } |
728 | | |
729 | | void SetMaxOccurs(int nMaxOccurs) |
730 | 300k | { |
731 | 300k | m_nMaxOccurs = nMaxOccurs; |
732 | 300k | } |
733 | | |
734 | | void SetRepetitionOnSequence(bool b) |
735 | 0 | { |
736 | 0 | m_bRepetitionOnSequence = b; |
737 | 0 | } |
738 | | |
739 | | void SetIncludeThisEltInBlob(bool b) |
740 | 7.41k | { |
741 | 7.41k | m_bIncludeThisEltInBlob = b; |
742 | 7.41k | } |
743 | | |
744 | | void SetAbstractElementXPath(const CPLString &osName) |
745 | 27.4k | { |
746 | 27.4k | m_osAbstractElementXPath = osName; |
747 | 27.4k | } |
748 | | |
749 | | void SetRelatedClassXPath(const CPLString &osName) |
750 | 218k | { |
751 | 218k | m_osRelatedClassXPath = osName; |
752 | 218k | } |
753 | | |
754 | | void SetJunctionLayer(const CPLString &osName) |
755 | 0 | { |
756 | 0 | m_osJunctionLayer = osName; |
757 | 0 | } |
758 | | |
759 | | void SetIgnored() |
760 | 17 | { |
761 | 17 | m_bIgnored = true; |
762 | 17 | } |
763 | | |
764 | | void SetDocumentation(const CPLString &osDoc) |
765 | 109k | { |
766 | 109k | m_osDoc = osDoc; |
767 | 109k | } |
768 | | |
769 | | void SetMayAppearOutOfOrder(bool b) |
770 | 22 | { |
771 | 22 | m_bMayAppearOutOfOrder = b; |
772 | 22 | } |
773 | | |
774 | | static CPLString |
775 | | MakePKIDFieldXPathFromXLinkHrefXPath(const CPLString &osBaseXPath) |
776 | 0 | { |
777 | 0 | return "{" + osBaseXPath + "}_pkid"; |
778 | 0 | } |
779 | | |
780 | | static CPLString MakeXLinkRawContentFieldXPathFromXLinkHrefXPath( |
781 | | const CPLString &osBaseXPath) |
782 | 0 | { |
783 | 0 | return "{" + osBaseXPath + "}_rawcontent"; |
784 | 0 | } |
785 | | |
786 | | static CPLString |
787 | | MakeXLinkDerivedFieldXPathFromXLinkHrefXPath(const CPLString &osBaseXPath, |
788 | | const CPLString &osName) |
789 | 0 | { |
790 | 0 | return "{" + osBaseXPath + "}_derived_" + osName; |
791 | 0 | } |
792 | | |
793 | | const CPLString &GetName() const |
794 | 9.98M | { |
795 | 9.98M | return m_osName; |
796 | 9.98M | } |
797 | | |
798 | | const CPLString &GetXPath() const |
799 | 12.9M | { |
800 | 12.9M | return m_osXPath; |
801 | 12.9M | } |
802 | | |
803 | | const std::vector<CPLString> &GetAlternateXPaths() const |
804 | 282k | { |
805 | 282k | return m_aosXPath; |
806 | 282k | } |
807 | | |
808 | | GMLASFieldType GetType() const |
809 | 622k | { |
810 | 622k | return m_eType; |
811 | 622k | } |
812 | | |
813 | | OGRwkbGeometryType GetGeomType() const |
814 | 18 | { |
815 | 18 | return m_eGeomType; |
816 | 18 | } |
817 | | |
818 | | const CPLString &GetTypeName() const |
819 | 533k | { |
820 | 533k | return m_osTypeName; |
821 | 533k | } |
822 | | |
823 | | int GetWidth() const |
824 | 224k | { |
825 | 224k | return m_nWidth; |
826 | 224k | } |
827 | | |
828 | | bool IsNotNullable() const |
829 | 304k | { |
830 | 304k | return m_bNotNullable; |
831 | 304k | } |
832 | | |
833 | | bool IsArray() const |
834 | 224k | { |
835 | 224k | return m_bArray; |
836 | 224k | } |
837 | | |
838 | | bool IsList() const |
839 | 284k | { |
840 | 284k | return m_bList; |
841 | 284k | } |
842 | | |
843 | | const CPLString &GetFixedValue() const |
844 | 687k | { |
845 | 687k | return m_osFixedValue; |
846 | 687k | } |
847 | | |
848 | | const CPLString &GetDefaultValue() const |
849 | 666k | { |
850 | 666k | return m_osDefaultValue; |
851 | 666k | } |
852 | | |
853 | | Category GetCategory() const |
854 | 1.38M | { |
855 | 1.38M | return m_eCategory; |
856 | 1.38M | } |
857 | | |
858 | | int GetMinOccurs() const |
859 | 547k | { |
860 | 547k | return m_nMinOccurs; |
861 | 547k | } |
862 | | |
863 | | int GetMaxOccurs() const |
864 | 1.30M | { |
865 | 1.30M | return m_nMaxOccurs; |
866 | 1.30M | } |
867 | | |
868 | | bool GetRepetitionOnSequence() const |
869 | 30.6k | { |
870 | 30.6k | return m_bRepetitionOnSequence; |
871 | 30.6k | } |
872 | | |
873 | | bool GetIncludeThisEltInBlob() const |
874 | 34.3k | { |
875 | 34.3k | return m_bIncludeThisEltInBlob; |
876 | 34.3k | } |
877 | | |
878 | | const CPLString &GetAbstractElementXPath() const |
879 | 35.8k | { |
880 | 35.8k | return m_osAbstractElementXPath; |
881 | 35.8k | } |
882 | | |
883 | | const CPLString &GetJunctionLayer() const |
884 | 0 | { |
885 | 0 | return m_osJunctionLayer; |
886 | 0 | } |
887 | | |
888 | | const CPLString &GetRelatedClassXPath() const |
889 | 544k | { |
890 | 544k | return m_osRelatedClassXPath; |
891 | 544k | } |
892 | | |
893 | | bool IsIgnored() const |
894 | 282k | { |
895 | 282k | return m_bIgnored; |
896 | 282k | } |
897 | | |
898 | | const CPLString &GetDocumentation() const |
899 | 282k | { |
900 | 282k | return m_osDoc; |
901 | 282k | } |
902 | | |
903 | | bool MayAppearOutOfOrder() const |
904 | 31.6k | { |
905 | 31.6k | return m_bMayAppearOutOfOrder; |
906 | 31.6k | } |
907 | | |
908 | | static GMLASFieldType GetTypeFromString(const CPLString &osType); |
909 | | }; |
910 | | |
911 | | /************************************************************************/ |
912 | | /* GMLASFeatureClass */ |
913 | | /************************************************************************/ |
914 | | |
915 | | class GMLASFeatureClass final |
916 | | { |
917 | | /** User facing name */ |
918 | | CPLString m_osName{}; |
919 | | |
920 | | /** XPath to the main element of the feature class */ |
921 | | CPLString m_osXPath{}; |
922 | | |
923 | | /** List of fields */ |
924 | | std::vector<GMLASField> m_aoFields{}; |
925 | | |
926 | | /** Child nested classes */ |
927 | | std::vector<GMLASFeatureClass> m_aoNestedClasses{}; |
928 | | |
929 | | /** Whether this layer corresponds to a (multiple instantiated) xs:group |
930 | | or a repeated sequence */ |
931 | | bool m_bIsRepeatedSequence = false; |
932 | | |
933 | | /** Whether this is a repeated group. Should be set together with |
934 | | * m_bIsRepeatedSequence */ |
935 | | bool m_bIsGroup = false; |
936 | | |
937 | | /** Only used for junction tables. The XPath to the parent table */ |
938 | | CPLString m_osParentXPath{}; |
939 | | |
940 | | /** Only used for junction tables. The XPath to the child table */ |
941 | | CPLString m_osChildXPath{}; |
942 | | |
943 | | /** Whether this corresponds to a top-level XSD element in the schema */ |
944 | | bool m_bIsTopLevelElt = false; |
945 | | |
946 | | /** Documentation from schema */ |
947 | | CPLString m_osDoc{}; |
948 | | |
949 | | public: |
950 | 83.2k | GMLASFeatureClass() = default; |
951 | | |
952 | | void SetName(const CPLString &osName); |
953 | | void SetXPath(const CPLString &osXPath); |
954 | | void AddField(const GMLASField &oField); |
955 | | void PrependFields(const std::vector<GMLASField> &aoFields); |
956 | | void AppendFields(const std::vector<GMLASField> &aoFields); |
957 | | void AddNestedClass(const GMLASFeatureClass &oNestedClass); |
958 | | |
959 | | void SetIsRepeatedSequence(bool bIsRepeatedSequence) |
960 | 2.19k | { |
961 | 2.19k | m_bIsRepeatedSequence = bIsRepeatedSequence; |
962 | 2.19k | } |
963 | | |
964 | | void SetIsGroup(bool bIsGroup) |
965 | 0 | { |
966 | 0 | m_bIsGroup = bIsGroup; |
967 | 0 | } |
968 | | |
969 | | void SetParentXPath(const CPLString &osXPath) |
970 | 27.4k | { |
971 | 27.4k | m_osParentXPath = osXPath; |
972 | 27.4k | } |
973 | | |
974 | | void SetChildXPath(const CPLString &osXPath) |
975 | 27.4k | { |
976 | 27.4k | m_osChildXPath = osXPath; |
977 | 27.4k | } |
978 | | |
979 | | void SetIsTopLevelElt(bool bIsTopLevelElt) |
980 | 9.69k | { |
981 | 9.69k | m_bIsTopLevelElt = bIsTopLevelElt; |
982 | 9.69k | } |
983 | | |
984 | | void SetDocumentation(const CPLString &osDoc) |
985 | 50.3k | { |
986 | 50.3k | m_osDoc = osDoc; |
987 | 50.3k | } |
988 | | |
989 | | const CPLString &GetName() const |
990 | 281k | { |
991 | 281k | return m_osName; |
992 | 281k | } |
993 | | |
994 | | const CPLString &GetXPath() const |
995 | 44.8M | { |
996 | 44.8M | return m_osXPath; |
997 | 44.8M | } |
998 | | |
999 | | const std::vector<GMLASField> &GetFields() const |
1000 | 456k | { |
1001 | 456k | return m_aoFields; |
1002 | 456k | } |
1003 | | |
1004 | | std::vector<GMLASField> &GetFields() |
1005 | 119k | { |
1006 | 119k | return m_aoFields; |
1007 | 119k | } |
1008 | | |
1009 | | const std::vector<GMLASFeatureClass> &GetNestedClasses() const |
1010 | 40.0k | { |
1011 | 40.0k | return m_aoNestedClasses; |
1012 | 40.0k | } |
1013 | | |
1014 | | std::vector<GMLASFeatureClass> &GetNestedClasses() |
1015 | 92.1k | { |
1016 | 92.1k | return m_aoNestedClasses; |
1017 | 92.1k | } |
1018 | | |
1019 | | bool IsRepeatedSequence() const |
1020 | 1.16M | { |
1021 | 1.16M | return m_bIsRepeatedSequence; |
1022 | 1.16M | } |
1023 | | |
1024 | | bool IsGroup() const |
1025 | 541k | { |
1026 | 541k | return m_bIsGroup; |
1027 | 541k | } |
1028 | | |
1029 | | const CPLString &GetParentXPath() const |
1030 | 191k | { |
1031 | 191k | return m_osParentXPath; |
1032 | 191k | } |
1033 | | |
1034 | | const CPLString &GetChildXPath() const |
1035 | 0 | { |
1036 | 0 | return m_osChildXPath; |
1037 | 0 | } |
1038 | | |
1039 | | bool IsTopLevelElt() const |
1040 | 13.7k | { |
1041 | 13.7k | return m_bIsTopLevelElt; |
1042 | 13.7k | } |
1043 | | |
1044 | | const CPLString &GetDocumentation() const |
1045 | 13.8k | { |
1046 | 13.8k | return m_osDoc; |
1047 | 13.8k | } |
1048 | | }; |
1049 | | |
1050 | | /************************************************************************/ |
1051 | | /* GMLASSchemaAnalyzer */ |
1052 | | /************************************************************************/ |
1053 | | |
1054 | | class GMLASSchemaAnalyzer final |
1055 | | { |
1056 | | GMLASXPathMatcher &m_oIgnoredXPathMatcher; |
1057 | | |
1058 | | GMLASXPathMatcher &m_oChildrenElementsConstraintsXPathMatcher; |
1059 | | |
1060 | | GMLASXPathMatcher &m_oForcedFlattenedXPathMatcher; |
1061 | | |
1062 | | GMLASXPathMatcher &m_oDisabledFlattenedXPathMatcher; |
1063 | | |
1064 | | std::map<CPLString, std::vector<CPLString>> |
1065 | | m_oMapChildrenElementsConstraints{}; |
1066 | | |
1067 | | /** Whether repeated strings, integers, reals should be in corresponding |
1068 | | OGR array types. */ |
1069 | | bool m_bUseArrays = true; |
1070 | | |
1071 | | /** Whether OGR field null state should be used. */ |
1072 | | bool m_bUseNullState = false; |
1073 | | |
1074 | | /** Whether, when dealing with schemas that import the |
1075 | | GML namespace, and that at least one of them has |
1076 | | elements that derive from gml:_Feature or |
1077 | | gml:AbstractFeatureonly, only such elements should be |
1078 | | instantiated as OGR layers, during the first pass that |
1079 | | iterates over top level elements of the imported |
1080 | | schemas. */ |
1081 | | bool m_bInstantiateGMLFeaturesOnly = true; |
1082 | | |
1083 | | /** Vector of feature classes */ |
1084 | | std::vector<GMLASFeatureClass> m_aoClasses{}; |
1085 | | |
1086 | | /** Map from a namespace URI to the corresponding prefix */ |
1087 | | std::map<CPLString, CPLString> m_oMapURIToPrefix{}; |
1088 | | |
1089 | | /** Map element XPath to its XSElementDeclaration* */ |
1090 | | std::map<CPLString, XSElementDeclaration *> m_oMapXPathToEltDecl{}; |
1091 | | |
1092 | | typedef std::map<XSElementDeclaration *, |
1093 | | std::vector<XSElementDeclaration *>> |
1094 | | tMapParentEltToChildElt; |
1095 | | /** Map from a base/parent element to a vector of derived/children |
1096 | | elements that are substitutionGroup of it. The map only |
1097 | | addresses the direct derived types, and not the 2nd level or more |
1098 | | derived ones. For that recursion in the map must be used.*/ |
1099 | | tMapParentEltToChildElt m_oMapParentEltToChildElt{}; |
1100 | | |
1101 | | /** Map from a XSModelGroup* object to the name of its group definition. */ |
1102 | | std::map<XSModelGroup *, XSModelGroupDefinition *> m_oMapModelGroupToMGD{}; |
1103 | | |
1104 | | /** Map from (non namespace prefixed) element names to the number of |
1105 | | elements that share the same namespace (in different namespaces) */ |
1106 | | std::map<CPLString, int> m_oMapEltNamesToInstanceCount{}; |
1107 | | |
1108 | | /** Set of elements that match a OGR layer */ |
1109 | | std::set<XSElementDeclaration *> m_oSetEltsForTopClass{}; |
1110 | | |
1111 | | /** Set of elements that are simple enough to be inlined whenever they |
1112 | | are referenced with cardinality 1. The use case if base:identifier |
1113 | | used by Inspire schemas. */ |
1114 | | std::set<XSElementDeclaration *> m_oSetSimpleEnoughElts{}; |
1115 | | |
1116 | | /** Maximum length of layer and field identifiers*/ |
1117 | | int m_nIdentifierMaxLength = 0; |
1118 | | |
1119 | | /** Whether case insensitive comparison should be used for identifier |
1120 | | * equality testing */ |
1121 | | bool m_bCaseInsensitiveIdentifier = CASE_INSENSITIVE_IDENTIFIER_DEFAULT; |
1122 | | |
1123 | | /** Whether to launder identifiers like postgresql does */ |
1124 | | bool m_bPGIdentifierLaundering = PG_IDENTIFIER_LAUNDERING_DEFAULT; |
1125 | | |
1126 | | /* Maximum number of fields in an element considered for flattening. */ |
1127 | | int m_nMaximumFieldsForFlattening = MAXIMUM_FIELDS_FLATTENING_DEFAULT; |
1128 | | |
1129 | | /** GML version found: 2.1.1, 3.1.1 or 3.2.1 or empty*/ |
1130 | | CPLString m_osGMLVersionFound{}; |
1131 | | |
1132 | | /** Set of schemas opened */ |
1133 | | std::set<CPLString> m_oSetSchemaURLs{}; |
1134 | | |
1135 | | /** Map from namespace URI to namespace prefix coming from the |
1136 | | * examination of xmlns:foo=bar attributes of the top element of the |
1137 | | * GML document */ |
1138 | | std::map<CPLString, CPLString> m_oMapDocNSURIToPrefix{}; |
1139 | | |
1140 | | bool m_bAlwaysGenerateOGRId = ALWAYS_GENERATE_OGR_ID_DEFAULT; |
1141 | | |
1142 | | static bool IsSame(const XSModelGroup *poModelGroup1, |
1143 | | const XSModelGroup *poModelGroup2); |
1144 | | XSModelGroupDefinition * |
1145 | | GetGroupDefinition(const XSModelGroup *poModelGroup); |
1146 | | bool SetFieldFromAttribute(GMLASField &oField, XSAttributeUse *poAttr, |
1147 | | const CPLString &osXPathPrefix, |
1148 | | const CPLString &osNamePrefix = CPLString()); |
1149 | | void GetConcreteImplementationTypes( |
1150 | | XSElementDeclaration *poParentElt, |
1151 | | std::vector<XSElementDeclaration *> &apoImplEltList); |
1152 | | std::vector<XSElementDeclaration *> |
1153 | | GetConstraintChildrenElements(const CPLString &osFullXPath); |
1154 | | bool FindElementsWithMustBeToLevel( |
1155 | | const CPLString &osParentXPath, XSModelGroup *poModelGroup, |
1156 | | int nRecursionCounter, |
1157 | | std::set<XSElementDeclaration *> &oSetVisitedEltDecl, |
1158 | | std::set<XSModelGroup *> &oSetVisitedModelGroups, |
1159 | | std::vector<XSElementDeclaration *> &oVectorEltsForTopClass, |
1160 | | std::set<CPLString> &aoSetXPathEltsForTopClass, XSModel *poModel, |
1161 | | bool &bSimpleEnoughOut, int &nCountSubEltsOut); |
1162 | | static void BuildMapCountOccurrencesOfSameName( |
1163 | | XSModelGroup *poModelGroup, |
1164 | | std::map<CPLString, int> &oMapCountOccurrencesOfSameName); |
1165 | | bool ExploreModelGroup( |
1166 | | XSModelGroup *psMainModelGroup, XSAttributeUseList *poMainAttrList, |
1167 | | GMLASFeatureClass &oClass, int nRecursionCounter, |
1168 | | std::set<XSModelGroup *> &oSetVisitedModelGroups, XSModel *poModel, |
1169 | | const std::map<CPLString, int> &oMapCountOccurrencesOfSameName); |
1170 | | void SetFieldTypeAndWidthFromDefinition(XSSimpleTypeDefinition *poST, |
1171 | | GMLASField &oField); |
1172 | | CPLString GetPrefix(const CPLString &osNamespaceURI); |
1173 | | CPLString MakeXPath(const CPLString &osNamespace, const CPLString &osName); |
1174 | | bool LaunderFieldNames(GMLASFeatureClass &oClass); |
1175 | | void LaunderClassNames(); |
1176 | | |
1177 | | XSElementDeclaration * |
1178 | | GetTopElementDeclarationFromXPath(const CPLString &osXPath, |
1179 | | XSModel *poModel); |
1180 | | |
1181 | | bool InstantiateClassFromEltDeclaration(XSElementDeclaration *poEltDecl, |
1182 | | XSModel *poModel, bool &bError); |
1183 | | void CreateNonNestedRelationship( |
1184 | | XSElementDeclaration *poElt, |
1185 | | std::vector<XSElementDeclaration *> &apoSubEltList, |
1186 | | GMLASFeatureClass &oClass, int nMaxOccurs, bool bEltNameWillNeedPrefix, |
1187 | | bool bForceJunctionTable, bool bCaseOfConstraintChildren); |
1188 | | |
1189 | | bool IsGMLNamespace(const CPLString &osURI); |
1190 | | |
1191 | | bool DerivesFromGMLFeature(XSElementDeclaration *poEltDecl); |
1192 | | |
1193 | | bool IsIgnoredXPath(const CPLString &osXPath); |
1194 | | |
1195 | | static void |
1196 | | CollectClassesReferences(GMLASFeatureClass &oClass, |
1197 | | std::vector<GMLASFeatureClass *> &aoClasses); |
1198 | | |
1199 | | CPL_DISALLOW_COPY_ASSIGN(GMLASSchemaAnalyzer) |
1200 | | |
1201 | | public: |
1202 | | GMLASSchemaAnalyzer( |
1203 | | GMLASXPathMatcher &oIgnoredXPathMatcher, |
1204 | | GMLASXPathMatcher &oChildrenElementsConstraintsXPathMatcher, |
1205 | | const std::map<CPLString, std::vector<CPLString>> |
1206 | | &oMapChildrenElementsConstraints, |
1207 | | GMLASXPathMatcher &oForcedFlattenedXPathMatcher, |
1208 | | GMLASXPathMatcher &oDisabledFlattenedXPathMatcher); |
1209 | | |
1210 | | void SetUseArrays(bool b) |
1211 | 2.44k | { |
1212 | 2.44k | m_bUseArrays = b; |
1213 | 2.44k | } |
1214 | | |
1215 | | void SetUseNullState(bool b) |
1216 | 2.44k | { |
1217 | 2.44k | m_bUseNullState = b; |
1218 | 2.44k | } |
1219 | | |
1220 | | void SetInstantiateGMLFeaturesOnly(bool b) |
1221 | 2.44k | { |
1222 | 2.44k | m_bInstantiateGMLFeaturesOnly = b; |
1223 | 2.44k | } |
1224 | | |
1225 | | void SetIdentifierMaxLength(int nLength) |
1226 | 2.44k | { |
1227 | 2.44k | m_nIdentifierMaxLength = nLength; |
1228 | 2.44k | } |
1229 | | |
1230 | | void SetCaseInsensitiveIdentifier(bool b) |
1231 | 2.44k | { |
1232 | 2.44k | m_bCaseInsensitiveIdentifier = b; |
1233 | 2.44k | } |
1234 | | |
1235 | | void SetPGIdentifierLaundering(bool b) |
1236 | 2.44k | { |
1237 | 2.44k | m_bPGIdentifierLaundering = b; |
1238 | 2.44k | } |
1239 | | |
1240 | | void SetMaximumFieldsForFlattening(int n) |
1241 | 2.44k | { |
1242 | 2.44k | m_nMaximumFieldsForFlattening = n; |
1243 | 2.44k | } |
1244 | | |
1245 | | void SetMapDocNSURIToPrefix(const std::map<CPLString, CPLString> &oMap) |
1246 | 2.11k | { |
1247 | 2.11k | m_oMapDocNSURIToPrefix = oMap; |
1248 | 2.11k | } |
1249 | | |
1250 | | void SetAlwaysGenerateOGRId(bool b) |
1251 | 2.44k | { |
1252 | 2.44k | m_bAlwaysGenerateOGRId = b; |
1253 | 2.44k | } |
1254 | | |
1255 | | bool Analyze(GMLASXSDCache &oCache, const CPLString &osBaseDirname, |
1256 | | std::vector<PairURIFilename> &aoXSDs, bool bSchemaFullChecking, |
1257 | | bool bHandleMultipleImports); |
1258 | | |
1259 | | const std::vector<GMLASFeatureClass> &GetClasses() const |
1260 | 960 | { |
1261 | 960 | return m_aoClasses; |
1262 | 960 | } |
1263 | | |
1264 | | const std::map<CPLString, CPLString> &GetMapURIToPrefix() const |
1265 | 960 | { |
1266 | 960 | return m_oMapURIToPrefix; |
1267 | 960 | } |
1268 | | |
1269 | | const CPLString &GetGMLVersionFound() const |
1270 | 960 | { |
1271 | 960 | return m_osGMLVersionFound; |
1272 | 960 | } |
1273 | | |
1274 | | const std::set<CPLString> &GetSchemaURLS() const |
1275 | 960 | { |
1276 | 960 | return m_oSetSchemaURLs; |
1277 | 960 | } |
1278 | | |
1279 | | static CPLString BuildJunctionTableXPath(const CPLString &osEltXPath, |
1280 | | const CPLString &osSubEltXPath) |
1281 | 36.9k | { |
1282 | 36.9k | return osEltXPath + "|" + osSubEltXPath; |
1283 | 36.9k | } |
1284 | | }; |
1285 | | |
1286 | | /************************************************************************/ |
1287 | | /* OGRGMLASDataSource */ |
1288 | | /************************************************************************/ |
1289 | | |
1290 | | class OGRGMLASLayer; |
1291 | | class GMLASReader; |
1292 | | |
1293 | | class OGRGMLASDataSource final : public GDALDataset |
1294 | | { |
1295 | | struct XercesInitializer |
1296 | | { |
1297 | | XercesInitializer(); |
1298 | | ~XercesInitializer(); |
1299 | | }; |
1300 | | |
1301 | | // MUST be first member, to get destroyed last after we have cleaned up |
1302 | | // all other Xerces dependent objects. |
1303 | | XercesInitializer m_oXercesInitializer{}; |
1304 | | |
1305 | | std::vector<std::unique_ptr<OGRGMLASLayer>> m_apoLayers{}; |
1306 | | std::map<CPLString, CPLString> m_oMapURIToPrefix{}; |
1307 | | CPLString m_osGMLFilename{}; |
1308 | | std::unique_ptr<OGRLayer> m_poFieldsMetadataLayer{}; |
1309 | | std::unique_ptr<OGRLayer> m_poLayersMetadataLayer{}; |
1310 | | std::unique_ptr<OGRLayer> m_poRelationshipsLayer{}; |
1311 | | std::unique_ptr<OGRLayer> m_poOtherMetadataLayer{}; |
1312 | | std::vector<OGRLayer *> m_apoRequestedMetadataLayers{}; |
1313 | | std::shared_ptr<VSIVirtualHandle> m_fpGML{}; |
1314 | | std::shared_ptr<VSIVirtualHandle> m_fpGMLParser{}; |
1315 | | bool m_bLayerInitFinished = false; |
1316 | | bool m_bSchemaFullChecking = false; |
1317 | | bool m_bHandleMultipleImports = false; |
1318 | | bool m_bValidate = false; |
1319 | | bool m_bRemoveUnusedLayers = false; |
1320 | | bool m_bRemoveUnusedFields = false; |
1321 | | bool m_bFirstPassDone = false; |
1322 | | /** Map from a SRS name to a boolean indicating if its coordinate |
1323 | | order is inverted. */ |
1324 | | std::map<CPLString, bool> m_oMapSRSNameToInvertedAxis{}; |
1325 | | |
1326 | | /** Map from geometry field definition to its expected SRSName */ |
1327 | | std::map<OGRGeomFieldDefn *, CPLString> m_oMapGeomFieldDefnToSRSName{}; |
1328 | | |
1329 | | /* map the ID attribute to its belonging layer, e.g foo.1 -> layer Foo */ |
1330 | | std::map<CPLString, OGRGMLASLayer *> m_oMapElementIdToLayer{}; |
1331 | | |
1332 | | /* map the ID attribute to the feature PKID (when different from itself) */ |
1333 | | std::map<CPLString, CPLString> m_oMapElementIdToPKID{}; |
1334 | | |
1335 | | std::vector<PairURIFilename> m_aoXSDsManuallyPassed{}; |
1336 | | |
1337 | | /** Default value for srsDimension attribute. */ |
1338 | | int m_nDefaultSrsDimension = 0; |
1339 | | |
1340 | | GMLASConfiguration m_oConf{}; |
1341 | | |
1342 | | /** Schema cache */ |
1343 | | GMLASXSDCache m_oCache{}; |
1344 | | |
1345 | | GMLASXPathMatcher m_oIgnoredXPathMatcher{}; |
1346 | | |
1347 | | GMLASXPathMatcher m_oChildrenElementsConstraintsXPathMatcher{}; |
1348 | | |
1349 | | GMLASXPathMatcher m_oForcedFlattenedXPathMatcher{}; |
1350 | | |
1351 | | GMLASXPathMatcher m_oDisabledFlattenedXPathMatcher{}; |
1352 | | |
1353 | | GMLASSwapCoordinatesEnum m_eSwapCoordinates = GMLAS_SWAP_AUTO; |
1354 | | |
1355 | | /** Base unique identifier */ |
1356 | | CPLString m_osHash{}; |
1357 | | |
1358 | | vsi_l_offset m_nFileSize = 0; |
1359 | | |
1360 | | std::unique_ptr<GMLASReader> m_poReader{}; |
1361 | | |
1362 | | bool m_bEndOfReaderLayers = false; |
1363 | | |
1364 | | int m_nCurMetadataLayerIdx = -1; |
1365 | | |
1366 | | GMLASXLinkResolver m_oXLinkResolver{}; |
1367 | | |
1368 | | CPLString m_osGMLVersionFound{}; |
1369 | | |
1370 | | bool m_bFoundSWE = false; |
1371 | | |
1372 | | // Pointers are also included in m_apoLayers |
1373 | | std::vector<OGRGMLASLayer *> m_apoSWEDataArrayLayersRef{}; |
1374 | | |
1375 | | // Path to gmlasconf.xml. It is a /vsimem temporary file if |
1376 | | // m_bUnlinkConfigFileAfterUse is set. |
1377 | | std::string m_osConfigFile{}; |
1378 | | |
1379 | | // Whether m_osConfigFile should be removed at closing. |
1380 | | bool m_bUnlinkConfigFileAfterUse = false; |
1381 | | |
1382 | | void TranslateClasses(OGRGMLASLayer *poParentLayer, |
1383 | | const GMLASFeatureClass &oFC); |
1384 | | |
1385 | | bool RunFirstPassIfNeeded(GMLASReader *poReader, |
1386 | | GDALProgressFunc pfnProgress, |
1387 | | void *pProgressData); |
1388 | | |
1389 | | void FillOtherMetadataLayer(GDALOpenInfo *poOpenInfo, |
1390 | | const CPLString &osConfigFile, |
1391 | | const std::vector<PairURIFilename> &aoXSDs, |
1392 | | const std::set<CPLString> &oSetSchemaURLs); |
1393 | | |
1394 | | static std::vector<PairURIFilename> |
1395 | | BuildXSDVector(const CPLString &osXSDFilenames); |
1396 | | |
1397 | | void InitReaderWithFirstPassElements(GMLASReader *poReader); |
1398 | | |
1399 | | public: |
1400 | | OGRGMLASDataSource(); |
1401 | | |
1402 | | ~OGRGMLASDataSource() override; |
1403 | | |
1404 | | int GetLayerCount() const override; |
1405 | | const OGRLayer *GetLayer(int) const override; |
1406 | | OGRLayer *GetLayerByName(const char *pszName) override; |
1407 | | |
1408 | | void ResetReading() override; |
1409 | | virtual OGRFeature *GetNextFeature(OGRLayer **ppoBelongingLayer, |
1410 | | double *pdfProgressPct, |
1411 | | GDALProgressFunc pfnProgress, |
1412 | | void *pProgressData) override; |
1413 | | int TestCapability(const char *) const override; |
1414 | | |
1415 | | bool Open(GDALOpenInfo *poOpenInfo); |
1416 | | |
1417 | | std::vector<std::unique_ptr<OGRGMLASLayer>> &GetLayers() |
1418 | 6.18k | { |
1419 | 6.18k | return m_apoLayers; |
1420 | 6.18k | } |
1421 | | |
1422 | | const std::map<CPLString, CPLString> &GetMapURIToPrefix() const |
1423 | 6.18k | { |
1424 | 6.18k | return m_oMapURIToPrefix; |
1425 | 6.18k | } |
1426 | | |
1427 | | const CPLString &GetGMLFilename() const |
1428 | 6.95k | { |
1429 | 6.95k | return m_osGMLFilename; |
1430 | 6.95k | } |
1431 | | |
1432 | | const CPLString &GetGMLVersionFound() const |
1433 | 0 | { |
1434 | 0 | return m_osGMLVersionFound; |
1435 | 0 | } |
1436 | | |
1437 | | OGRLayer *GetFieldsMetadataLayer() |
1438 | 40.0k | { |
1439 | 40.0k | return m_poFieldsMetadataLayer.get(); |
1440 | 40.0k | } |
1441 | | |
1442 | | OGRLayer *GetLayersMetadataLayer() |
1443 | 40.0k | { |
1444 | 40.0k | return m_poLayersMetadataLayer.get(); |
1445 | 40.0k | } |
1446 | | |
1447 | | OGRLayer *GetRelationshipsLayer() |
1448 | 40.0k | { |
1449 | 40.0k | return m_poRelationshipsLayer.get(); |
1450 | 40.0k | } |
1451 | | |
1452 | | OGRGMLASLayer *GetLayerByXPath(const CPLString &osXPath); |
1453 | | |
1454 | | GMLASReader *CreateReader(std::shared_ptr<VSIVirtualHandle> &fpGML, |
1455 | | GDALProgressFunc pfnProgress = nullptr, |
1456 | | void *pProgressData = nullptr); |
1457 | | |
1458 | | GMLASXSDCache &GetCache() |
1459 | 5.36k | { |
1460 | 5.36k | return m_oCache; |
1461 | 5.36k | } |
1462 | | |
1463 | | void PushUnusedGMLFilePointer(std::shared_ptr<VSIVirtualHandle> &fpGML); |
1464 | | std::shared_ptr<VSIVirtualHandle> PopUnusedGMLFilePointer(); |
1465 | | |
1466 | | bool IsLayerInitFinished() const |
1467 | 429k | { |
1468 | 429k | return m_bLayerInitFinished; |
1469 | 429k | } |
1470 | | |
1471 | | GMLASSwapCoordinatesEnum GetSwapCoordinates() const |
1472 | 5.36k | { |
1473 | 5.36k | return m_eSwapCoordinates; |
1474 | 5.36k | } |
1475 | | |
1476 | | const std::map<CPLString, bool> &GetMapIgnoredXPathToWarn() const |
1477 | 5.36k | { |
1478 | 5.36k | return m_oConf.m_oMapIgnoredXPathToWarn; |
1479 | 5.36k | } |
1480 | | |
1481 | | const GMLASXPathMatcher &GetIgnoredXPathMatcher() const |
1482 | 5.36k | { |
1483 | 5.36k | return m_oIgnoredXPathMatcher; |
1484 | 5.36k | } |
1485 | | |
1486 | | const GMLASConfiguration &GetConf() const |
1487 | 475 | { |
1488 | 475 | return m_oConf; |
1489 | 475 | } |
1490 | | |
1491 | | const std::vector<PairURIFilename> &GetXSDsManuallyPassed() const |
1492 | 0 | { |
1493 | 0 | return m_aoXSDsManuallyPassed; |
1494 | 0 | } |
1495 | | }; |
1496 | | |
1497 | | /************************************************************************/ |
1498 | | /* OGRGMLASLayer */ |
1499 | | /************************************************************************/ |
1500 | | |
1501 | | class OGRGMLASLayer final : public OGRLayer |
1502 | | { |
1503 | | friend class OGRGMLASDataSource; |
1504 | | |
1505 | | OGRGMLASDataSource *m_poDS = nullptr; |
1506 | | GMLASFeatureClass m_oFC{}; |
1507 | | mutable bool m_bLayerDefnFinalized = false; |
1508 | | int m_nMaxFieldIndex = 0; |
1509 | | OGRFeatureDefn *m_poFeatureDefn = nullptr; |
1510 | | |
1511 | | /** Map from XPath to corresponding field index in OGR layer |
1512 | | definition */ |
1513 | | std::map<CPLString, int> m_oMapFieldXPathToOGRFieldIdx{}; |
1514 | | |
1515 | | /** Map from XPath to corresponding geometry field index in OGR layer |
1516 | | definition */ |
1517 | | std::map<CPLString, int> m_oMapFieldXPathToOGRGeomFieldIdx{}; |
1518 | | |
1519 | | /** Map from a OGR field index to the corresponding field index in |
1520 | | m_oFC.GetFields() */ |
1521 | | std::map<int, int> m_oMapOGRFieldIdxtoFCFieldIdx{}; |
1522 | | std::map<int, int> m_oMapOGRGeomFieldIdxtoFCFieldIdx{}; |
1523 | | |
1524 | | /** Map from XPath to corresponding field index in m_oFC.GetFields() */ |
1525 | | std::map<CPLString, int> m_oMapFieldXPathToFCFieldIdx{}; |
1526 | | |
1527 | | bool m_bEOF = false; |
1528 | | std::unique_ptr<GMLASReader> m_poReader{}; |
1529 | | std::shared_ptr<VSIVirtualHandle> m_fpGML{}; |
1530 | | /** OGR field index of the ID field */ |
1531 | | int m_nIDFieldIdx = -1; |
1532 | | /** Whether the ID field is generated, or comes from the XML content */ |
1533 | | bool m_bIDFieldIsGenerated = false; |
1534 | | /** Pointer to parent layer */ |
1535 | | OGRGMLASLayer *m_poParentLayer = nullptr; |
1536 | | /** OGR field index of the field that points to the parent ID */ |
1537 | | int m_nParentIDFieldIdx = -1; |
1538 | | |
1539 | | std::map<CPLString, CPLString> m_oMapSWEFieldToOGRFieldName{}; |
1540 | | |
1541 | | OGRFeature *GetNextRawFeature(); |
1542 | | |
1543 | | bool InitReader(); |
1544 | | |
1545 | | void SetLayerDefnFinalized(bool bVal) |
1546 | 39.9k | { |
1547 | 39.9k | m_bLayerDefnFinalized = bVal; |
1548 | 39.9k | } |
1549 | | |
1550 | | CPLString LaunderFieldName(const CPLString &osFieldName); |
1551 | | |
1552 | | CPLString GetXPathFromOGRFieldIndex(int nIdx) const; |
1553 | | |
1554 | | CPL_DISALLOW_COPY_ASSIGN(OGRGMLASLayer) |
1555 | | |
1556 | | public: |
1557 | | OGRGMLASLayer(OGRGMLASDataSource *poDS, const GMLASFeatureClass &oFC, |
1558 | | OGRGMLASLayer *poParentLayer, bool bAlwaysGenerateOGRPKId); |
1559 | | explicit OGRGMLASLayer(const char *pszLayerName); |
1560 | | ~OGRGMLASLayer() override; |
1561 | | |
1562 | | const char *GetName() const override |
1563 | 1.07M | { |
1564 | 1.07M | return GetDescription(); |
1565 | 1.07M | } |
1566 | | |
1567 | | using OGRLayer::GetLayerDefn; |
1568 | | const OGRFeatureDefn *GetLayerDefn() const override; |
1569 | | void ResetReading() override; |
1570 | | OGRFeature *GetNextFeature() override; |
1571 | | |
1572 | | int TestCapability(const char *) const override |
1573 | 0 | { |
1574 | 0 | return FALSE; |
1575 | 0 | } |
1576 | | |
1577 | | void SetDataSource(OGRGMLASDataSource *poDS) |
1578 | 5.36k | { |
1579 | 5.36k | m_poDS = poDS; |
1580 | 5.36k | } |
1581 | | |
1582 | | void PostInit(bool bIncludeGeometryXML); |
1583 | | void |
1584 | | ProcessDataRecordCreateFields(CPLXMLNode *psDataRecord, |
1585 | | const std::vector<OGRFeature *> &apoFeatures, |
1586 | | OGRLayer *poFieldsMetadataLayer); |
1587 | | void ProcessDataRecordFillFeature(CPLXMLNode *psDataRecord, |
1588 | | OGRFeature *poFeature); |
1589 | | void |
1590 | | ProcessDataRecordOfDataArrayCreateFields(OGRGMLASLayer *poParentLayer, |
1591 | | CPLXMLNode *psDataRecord, |
1592 | | OGRLayer *poFieldsMetadataLayer); |
1593 | | void CreateCompoundFoldedMappings(); |
1594 | | |
1595 | | const GMLASFeatureClass &GetFeatureClass() const |
1596 | 43.6M | { |
1597 | 43.6M | return m_oFC; |
1598 | 43.6M | } |
1599 | | |
1600 | | int GetOGRFieldIndexFromXPath(const CPLString &osXPath) const; |
1601 | | int GetOGRGeomFieldIndexFromXPath(const CPLString &osXPath) const; |
1602 | | |
1603 | | int GetIDFieldIdx() const |
1604 | 12.4M | { |
1605 | 12.4M | return m_nIDFieldIdx; |
1606 | 12.4M | } |
1607 | | |
1608 | | bool IsGeneratedIDField() const |
1609 | 70.3k | { |
1610 | 70.3k | return m_bIDFieldIsGenerated; |
1611 | 70.3k | } |
1612 | | |
1613 | | OGRGMLASLayer *GetParent() |
1614 | 81.1k | { |
1615 | 81.1k | return m_poParentLayer; |
1616 | 81.1k | } |
1617 | | |
1618 | | int GetParentIDFieldIdx() const |
1619 | 102k | { |
1620 | 102k | return m_nParentIDFieldIdx; |
1621 | 102k | } |
1622 | | |
1623 | | int GetFCFieldIndexFromOGRFieldIdx(int iOGRFieldIdx) const; |
1624 | | int GetFCFieldIndexFromOGRGeomFieldIdx(int iOGRGeomFieldIdx) const; |
1625 | | int GetFCFieldIndexFromXPath(const CPLString &osXPath) const; |
1626 | | |
1627 | | bool EvaluateFilter(OGRFeature *poFeature); |
1628 | | |
1629 | | bool RemoveField(int nIdx); |
1630 | | void InsertNewField(int nInsertPos, const OGRFieldDefn &oFieldDefn, |
1631 | | const CPLString &osXPath); |
1632 | | |
1633 | | CPLString |
1634 | | GetXPathOfFieldLinkForAttrToOtherLayer(const CPLString &osFieldName, |
1635 | | const CPLString &osTargetLayerXPath); |
1636 | | CPLString |
1637 | | CreateLinkForAttrToOtherLayer(const CPLString &osFieldName, |
1638 | | const CPLString &osTargetLayerXPath); |
1639 | | |
1640 | | const std::map<CPLString, int> &GetMapFieldXPathToOGRFieldIdx() const |
1641 | 0 | { |
1642 | 0 | return m_oMapFieldXPathToOGRFieldIdx; |
1643 | 0 | } |
1644 | | }; |
1645 | | |
1646 | | /************************************************************************/ |
1647 | | /* GMLASReader */ |
1648 | | /************************************************************************/ |
1649 | | |
1650 | | class GMLASReader final : public DefaultHandler |
1651 | | { |
1652 | | /** Schema cache */ |
1653 | | GMLASXSDCache &m_oCache; |
1654 | | |
1655 | | /** Object to tell if a XPath must be ignored */ |
1656 | | const GMLASXPathMatcher &m_oIgnoredXPathMatcher; |
1657 | | |
1658 | | /** XLink resolver */ |
1659 | | GMLASXLinkResolver &m_oXLinkResolver; |
1660 | | |
1661 | | /** Whether we should stop parsing */ |
1662 | | bool m_bParsingError = false; |
1663 | | |
1664 | | /** Xerces reader object */ |
1665 | | std::unique_ptr<SAX2XMLReader> m_poSAXReader{}; |
1666 | | |
1667 | | /** Token for Xerces */ |
1668 | | XMLPScanToken m_oToFill{}; |
1669 | | |
1670 | | /** File descriptor */ |
1671 | | std::shared_ptr<VSIVirtualHandle> m_fp{}; |
1672 | | |
1673 | | /** Input source */ |
1674 | | std::unique_ptr<GMLASInputSource> m_GMLInputSource{}; |
1675 | | |
1676 | | /** Whether we are at the first iteration */ |
1677 | | bool m_bFirstIteration = true; |
1678 | | |
1679 | | /** Whether we have reached end of file (or an error) */ |
1680 | | bool m_bEOF = false; |
1681 | | |
1682 | | /** Whether GetNextFeature() has been user interrupted (progress cbk) */ |
1683 | | bool m_bInterrupted = false; |
1684 | | |
1685 | | /** Error handler (for Xerces reader) */ |
1686 | | GMLASErrorHandler m_oErrorHandler{}; |
1687 | | |
1688 | | /** Map URI namespaces to their prefix */ |
1689 | | std::map<CPLString, CPLString> m_oMapURIToPrefix{}; |
1690 | | |
1691 | | /** List of OGR layers */ |
1692 | | std::vector<std::unique_ptr<OGRGMLASLayer>> *m_apoLayers = nullptr; |
1693 | | |
1694 | | /** Vector of features ready for consumption */ |
1695 | | std::list<std::pair<std::unique_ptr<OGRFeature>, OGRGMLASLayer *>> |
1696 | | m_aoFeaturesReady{}; |
1697 | | |
1698 | | /** OGR field index of the current field */ |
1699 | | int m_nCurFieldIdx = -1; |
1700 | | |
1701 | | /** OGR geometry field index of the current field */ |
1702 | | int m_nCurGeomFieldIdx = -1; |
1703 | | |
1704 | | /** XML nested level of current field */ |
1705 | | int m_nCurFieldLevel = 0; |
1706 | | |
1707 | | /** Whether we should store all content of the current field as XML */ |
1708 | | bool m_bIsXMLBlob = false; |
1709 | | bool m_bIsXMLBlobIncludeUpper = false; |
1710 | | |
1711 | | /** Content of the current field */ |
1712 | | CPLString m_osTextContent{}; |
1713 | | |
1714 | | /** For list field types, list of content */ |
1715 | | CPLStringList m_osTextContentList{}; |
1716 | | /** Estimated memory footprint of m_osTextContentList */ |
1717 | | size_t m_nTextContentListEstimatedSize = 0; |
1718 | | |
1719 | | /** Which layer is of interest for the reader, or NULL for all */ |
1720 | | OGRGMLASLayer *m_poLayerOfInterest = nullptr; |
1721 | | |
1722 | | /** Stack of length of split XPath components */ |
1723 | | std::vector<size_t> m_anStackXPathLength{}; |
1724 | | |
1725 | | /** Current absolute XPath */ |
1726 | | CPLString m_osCurXPath{}; |
1727 | | |
1728 | | /** Current XPath, relative to top-level feature */ |
1729 | | CPLString m_osCurSubXPath{}; |
1730 | | |
1731 | | /** Current XML nesting level */ |
1732 | | int m_nLevel = 0; |
1733 | | |
1734 | | /** Whether we are in a gml:boundedBy element at level 1 */ |
1735 | | bool m_bInGMLBoundedByLevel1 = false; |
1736 | | |
1737 | | /** Default value for srsDimension attribute. */ |
1738 | | int m_nDefaultSrsDimension = 0; |
1739 | | |
1740 | | /** Map layer to global FID */ |
1741 | | std::map<OGRLayer *, int> m_oMapGlobalCounter{}; |
1742 | | |
1743 | | /** Parsing context */ |
1744 | | struct Context |
1745 | | { |
1746 | | /** XML nesting level */ |
1747 | | int m_nLevel = 0; |
1748 | | |
1749 | | /** Current feature */ |
1750 | | OGRFeature *m_poFeature = nullptr; |
1751 | | |
1752 | | /** Layer of m_poFeature */ |
1753 | | OGRGMLASLayer *m_poLayer = nullptr; |
1754 | | |
1755 | | /** Current layer in a repeated group */ |
1756 | | OGRGMLASLayer *m_poGroupLayer = nullptr; |
1757 | | |
1758 | | /** Nesting level of m_poCurGroupLayer */ |
1759 | | int m_nGroupLayerLevel = -1; |
1760 | | |
1761 | | /** Index of the last processed OGR field in m_poCurGroupLayer */ |
1762 | | int m_nLastFieldIdxGroupLayer = -1; |
1763 | | |
1764 | | /** Map layer to local FID */ |
1765 | | std::map<OGRLayer *, int> m_oMapCounter{}; |
1766 | | |
1767 | | /** Current XPath, relative to (current) top-level feature */ |
1768 | | CPLString m_osCurSubXPath{}; |
1769 | | |
1770 | | void Dump() const; |
1771 | | }; |
1772 | | |
1773 | | /** Current context */ |
1774 | | Context m_oCurCtxt{}; |
1775 | | |
1776 | | /** Stack of saved contexts */ |
1777 | | std::vector<Context> m_aoStackContext{}; |
1778 | | |
1779 | | /** Context used in m_apsXMLNodeStack */ |
1780 | | struct NodeLastChild |
1781 | | { |
1782 | | /** Current node */ |
1783 | | CPLXMLNode *psNode = nullptr; |
1784 | | |
1785 | | /** Last child of psNode (for fast append operations) */ |
1786 | | CPLXMLNode *psLastChild = nullptr; |
1787 | | }; |
1788 | | |
1789 | | /** Stack of contexts to build XML tree of GML Geometry */ |
1790 | | std::vector<NodeLastChild> m_apsXMLNodeStack{}; |
1791 | | |
1792 | | /** Counter used to prevent XML billion laugh attacks */ |
1793 | | int m_nEntityCounter = 0; |
1794 | | |
1795 | | /** Maximum allowed number of XML nesting level */ |
1796 | | int m_nMaxLevel = 100; |
1797 | | |
1798 | | /** Maximum allowed size of XML content in byte */ |
1799 | | size_t m_nMaxContentSize = 512000000; |
1800 | | |
1801 | | /** Map from a SRS name to a boolean indicating if its coordinate |
1802 | | order is inverted. */ |
1803 | | std::map<CPLString, bool> m_oMapSRSNameToInvertedAxis{}; |
1804 | | |
1805 | | /** Set of geometry fields with unknown SRS */ |
1806 | | std::set<OGRGeomFieldDefn *> m_oSetGeomFieldsWithUnknownSRS{}; |
1807 | | |
1808 | | /** Map from geometry field definition to its expected SRSName. |
1809 | | This is used to know if reprojection must be done */ |
1810 | | std::map<OGRGeomFieldDefn *, CPLString> m_oMapGeomFieldDefnToSRSName{}; |
1811 | | |
1812 | | /** Whether this parsing involves schema validation */ |
1813 | | bool m_bValidate = false; |
1814 | | |
1815 | | /** Entity resolver used during schema validation */ |
1816 | | std::unique_ptr<GMLASBaseEntityResolver> m_poEntityResolver{}; |
1817 | | |
1818 | | /** First level from which warnings about ignored XPath should be |
1819 | | silent. */ |
1820 | | int m_nLevelSilentIgnoredXPath = -1; |
1821 | | |
1822 | | /** Whether a warning should be emitted when an element or attribute is |
1823 | | found in the document parsed, but ignored because of the ignored |
1824 | | XPath defined. */ |
1825 | | std::map<CPLString, bool> m_oMapIgnoredXPathToWarn{}; |
1826 | | |
1827 | | /** Policy to decide when to invert coordinates */ |
1828 | | GMLASSwapCoordinatesEnum m_eSwapCoordinates = GMLAS_SWAP_AUTO; |
1829 | | |
1830 | | /** Initial pass to guess SRS, etc... */ |
1831 | | bool m_bInitialPass = false; |
1832 | | |
1833 | | /** Whether to process swe:DataArray in a special way */ |
1834 | | bool m_bProcessSWEDataArray = false; |
1835 | | |
1836 | | /** Whether to process swe:DataArray in a special way */ |
1837 | | bool m_bProcessSWEDataRecord = false; |
1838 | | |
1839 | | /** Depth level of the swe:DataArray element */ |
1840 | | int m_nSWEDataArrayLevel = -1; |
1841 | | |
1842 | | /** Field name to which the DataArray belongs to */ |
1843 | | CPLString m_osSWEDataArrayParentField{}; |
1844 | | |
1845 | | /** Depth level of the swe:DataRecord element */ |
1846 | | int m_nSWEDataRecordLevel = -1; |
1847 | | |
1848 | | OGRLayer *m_poFieldsMetadataLayer = nullptr; |
1849 | | OGRLayer *m_poLayersMetadataLayer = nullptr; |
1850 | | OGRLayer *m_poRelationshipsLayer = nullptr; |
1851 | | |
1852 | | /** Base unique identifier */ |
1853 | | CPLString m_osHash{}; |
1854 | | |
1855 | | vsi_l_offset m_nFileSize = 0; |
1856 | | |
1857 | | bool m_bWarnUnexpected = false; |
1858 | | |
1859 | | /** Map from layer to a map of field XPath to a set of matching |
1860 | | URL specific resolution rule index */ |
1861 | | std::map<OGRGMLASLayer *, std::map<CPLString, std::set<int>>> |
1862 | | m_oMapXLinkFields{}; |
1863 | | |
1864 | | /** Variables that could be local but more efficient to have same |
1865 | | persistent, so as to save many memory allocations/deallocations */ |
1866 | | CPLString m_osLocalname{}; |
1867 | | CPLString m_osNSUri{}; |
1868 | | CPLString m_osNSPrefix{}; |
1869 | | CPLString m_osXPath{}; |
1870 | | CPLString m_osLayerXPath{}; |
1871 | | CPLString m_osAttrNSUri{}; |
1872 | | CPLString m_osAttrNSPrefix{}; |
1873 | | CPLString m_osAttrLocalName{}; |
1874 | | CPLString m_osAttrXPath{}; |
1875 | | CPLString m_osAttrValue{}; |
1876 | | CPLString m_osText{}; |
1877 | | |
1878 | | std::vector<OGRGMLASLayer *> m_apoSWEDataArrayLayersRef{}; |
1879 | | std::vector<std::unique_ptr<OGRGMLASLayer>> m_apoSWEDataArrayLayersOwned{}; |
1880 | | |
1881 | | int m_nSWEDataArrayLayerIdx = 0; |
1882 | | |
1883 | | /* Set of 3 maps used for xlink:href="#xxxx" internal links resolution */ |
1884 | | /* 1) map the ID attribute to its belonging layer, e.g foo.1 -> layer Foo */ |
1885 | | std::map<CPLString, OGRGMLASLayer *> m_oMapElementIdToLayer{}; |
1886 | | /* 2) map the ID attribute to the feature PKID (when different from itself) |
1887 | | */ |
1888 | | std::map<CPLString, CPLString> m_oMapElementIdToPKID{}; |
1889 | | /* 3) map each (layer, field_xpath) to the list of ID it refers to */ |
1890 | | /* e.g (layer Bar, field_xpath) -> [foo.1, foo.2] */ |
1891 | | std::map<std::pair<OGRGMLASLayer *, CPLString>, std::vector<CPLString>> |
1892 | | m_oMapFieldXPathToLinkValue{}; |
1893 | | |
1894 | | /* Map layer's XPath to layer (for layers that are not group) */ |
1895 | | std::map<CPLString, OGRGMLASLayer *> m_oMapXPathToLayer{}; |
1896 | | |
1897 | | /* Map OGR field XPath to layer (for layers that are group) */ |
1898 | | std::map<CPLString, OGRGMLASLayer *> m_oMapFieldXPathToGroupLayer{}; |
1899 | | |
1900 | | /* Map layer's XPath to layer (for layers that are repeated sequences) */ |
1901 | | std::map<CPLString, std::vector<OGRGMLASLayer *>> |
1902 | | m_oMapXPathToLayerRepeadedSequence{}; |
1903 | | |
1904 | | void SetField(OGRFeature *poFeature, OGRGMLASLayer *poLayer, int nAttrIdx, |
1905 | | const CPLString &osAttrValue); |
1906 | | |
1907 | | void CreateNewFeature(const CPLString &osLocalname); |
1908 | | |
1909 | | void PushFeatureReady(std::unique_ptr<OGRFeature> &&, |
1910 | | OGRGMLASLayer *poLayer); |
1911 | | |
1912 | | void PushContext(const Context &oContext); |
1913 | | void PopContext(); |
1914 | | |
1915 | | void BuildXMLBlobStartElement(const CPLString &osXPath, |
1916 | | const Attributes &attrs); |
1917 | | |
1918 | | OGRGMLASLayer *GetLayerByXPath(const CPLString &osXPath); |
1919 | | |
1920 | | void AttachAsLastChild(CPLXMLNode *psNode); |
1921 | | |
1922 | | void ProcessSWEDataArray(CPLXMLNode *psRoot); |
1923 | | void ProcessSWEDataRecord(CPLXMLNode *psRoot); |
1924 | | void ProcessGeometry(CPLXMLNode *psRoot); |
1925 | | |
1926 | | void ProcessAttributes(const Attributes &attrs); |
1927 | | void ProcessXLinkHref(int nAttrIdx, const CPLString &osAttrXPath, |
1928 | | const CPLString &osAttrValue); |
1929 | | void |
1930 | | ExploreXMLDoc(const CPLString &osAttrXPath, |
1931 | | const GMLASXLinkResolutionConf::URLSpecificResolution &oRule, |
1932 | | CPLXMLNode *psNode, const CPLString &osParentXPath, |
1933 | | const GMLASXPathMatcher &oMatcher, |
1934 | | const std::map<CPLString, size_t> &oMapFieldXPathToIdx); |
1935 | | |
1936 | | void CreateFieldsForURLSpecificRules(); |
1937 | | void CreateFieldsForURLSpecificRule( |
1938 | | OGRGMLASLayer *poLayer, int nFieldIdx, const CPLString &osFieldXPath, |
1939 | | int &nInsertFieldIdx, |
1940 | | const GMLASXLinkResolutionConf::URLSpecificResolution &oRule); |
1941 | | |
1942 | | bool FillTextContent() const |
1943 | 3.97M | { |
1944 | 3.97M | return !m_bInitialPass && m_nCurFieldIdx >= 0; |
1945 | 3.97M | } |
1946 | | |
1947 | | void ProcessInternalXLinkFirstPass( |
1948 | | bool bRemoveUnusedFields, |
1949 | | std::map<OGRGMLASLayer *, std::set<CPLString>> &oMapUnusedFields); |
1950 | | |
1951 | | CPL_DISALLOW_COPY_ASSIGN(GMLASReader) |
1952 | | |
1953 | | public: |
1954 | | GMLASReader(GMLASXSDCache &oCache, |
1955 | | const GMLASXPathMatcher &oIgnoredXPathMatcher, |
1956 | | GMLASXLinkResolver &oXLinkResolver); |
1957 | | ~GMLASReader() override; |
1958 | | |
1959 | | bool Init(const char *pszFilename, |
1960 | | const std::shared_ptr<VSIVirtualHandle> &fp, |
1961 | | const std::map<CPLString, CPLString> &oMapURIToPrefix, |
1962 | | std::vector<std::unique_ptr<OGRGMLASLayer>> &apoLayers, |
1963 | | bool bValidate, const std::vector<PairURIFilename> &aoXSDs, |
1964 | | bool bSchemaFullChecking, bool bHandleMultipleImports); |
1965 | | |
1966 | | void SetLayerOfInterest(OGRGMLASLayer *poLayer); |
1967 | | |
1968 | | void SetMapIgnoredXPathToWarn(const std::map<CPLString, bool> &oMap) |
1969 | 6.18k | { |
1970 | 6.18k | m_oMapIgnoredXPathToWarn = oMap; |
1971 | 6.18k | } |
1972 | | |
1973 | | void SetSwapCoordinates(GMLASSwapCoordinatesEnum eVal) |
1974 | 5.36k | { |
1975 | 5.36k | m_eSwapCoordinates = eVal; |
1976 | 5.36k | } |
1977 | | |
1978 | | const std::shared_ptr<VSIVirtualHandle> &GetFP() const |
1979 | 314 | { |
1980 | 314 | return m_fp; |
1981 | 314 | } |
1982 | | |
1983 | | const std::map<CPLString, bool> &GetMapSRSNameToInvertedAxis() const |
1984 | 821 | { |
1985 | 821 | return m_oMapSRSNameToInvertedAxis; |
1986 | 821 | } |
1987 | | |
1988 | | void SetMapSRSNameToInvertedAxis(const std::map<CPLString, bool> &oMap) |
1989 | 5.36k | { |
1990 | 5.36k | m_oMapSRSNameToInvertedAxis = oMap; |
1991 | 5.36k | } |
1992 | | |
1993 | | const std::map<OGRGeomFieldDefn *, CPLString> & |
1994 | | GetMapGeomFieldDefnToSRSName() const |
1995 | 821 | { |
1996 | 821 | return m_oMapGeomFieldDefnToSRSName; |
1997 | 821 | } |
1998 | | |
1999 | | void SetMapGeomFieldDefnToSRSName( |
2000 | | const std::map<OGRGeomFieldDefn *, CPLString> &oMap) |
2001 | 5.36k | { |
2002 | 5.36k | m_oMapGeomFieldDefnToSRSName = oMap; |
2003 | 5.36k | } |
2004 | | |
2005 | | const std::map<CPLString, OGRGMLASLayer *> &GetMapElementIdToLayer() const |
2006 | 821 | { |
2007 | 821 | return m_oMapElementIdToLayer; |
2008 | 821 | } |
2009 | | |
2010 | | void |
2011 | | SetMapElementIdToLayer(const std::map<CPLString, OGRGMLASLayer *> &oMap) |
2012 | 5.36k | { |
2013 | 5.36k | m_oMapElementIdToLayer = oMap; |
2014 | 5.36k | } |
2015 | | |
2016 | | const std::map<CPLString, CPLString> &GetMapElementIdToPKID() const |
2017 | 821 | { |
2018 | 821 | return m_oMapElementIdToPKID; |
2019 | 821 | } |
2020 | | |
2021 | | void SetMapElementIdToPKID(const std::map<CPLString, CPLString> &oMap) |
2022 | 5.36k | { |
2023 | 5.36k | m_oMapElementIdToPKID = oMap; |
2024 | 5.36k | } |
2025 | | |
2026 | | int GetDefaultSrsDimension() const |
2027 | 821 | { |
2028 | 821 | return m_nDefaultSrsDimension; |
2029 | 821 | } |
2030 | | |
2031 | | void SetDefaultSrsDimension(int nDim) |
2032 | 5.36k | { |
2033 | 5.36k | m_nDefaultSrsDimension = nDim; |
2034 | 5.36k | } |
2035 | | |
2036 | | void SetHash(const CPLString &osHash) |
2037 | 6.18k | { |
2038 | 6.18k | m_osHash = osHash; |
2039 | 6.18k | } |
2040 | | |
2041 | | void SetFileSize(vsi_l_offset nFileSize) |
2042 | 6.18k | { |
2043 | 6.18k | m_nFileSize = nFileSize; |
2044 | 6.18k | } |
2045 | | |
2046 | | OGRFeature *GetNextFeature(OGRGMLASLayer **ppoBelongingLayer = nullptr, |
2047 | | GDALProgressFunc pfnProgress = nullptr, |
2048 | | void *pProgressData = nullptr); |
2049 | | |
2050 | | virtual void startElement(const XMLCh *const uri, |
2051 | | const XMLCh *const localname, |
2052 | | const XMLCh *const qname, |
2053 | | const Attributes &attrs) override; |
2054 | | virtual void endElement(const XMLCh *const uri, |
2055 | | const XMLCh *const localname, |
2056 | | const XMLCh *const qname) override; |
2057 | | |
2058 | | virtual void characters(const XMLCh *const chars, |
2059 | | const XMLSize_t length) override; |
2060 | | |
2061 | | void startEntity(const XMLCh *const name) override; |
2062 | | |
2063 | | bool RunFirstPass(GDALProgressFunc pfnProgress, void *pProgressData, |
2064 | | bool bRemoveUnusedLayers, bool bRemoveUnusedFields, |
2065 | | bool bProcessSWEDataArray, |
2066 | | OGRLayer *poFieldsMetadataLayer, |
2067 | | OGRLayer *poLayersMetadataLayer, |
2068 | | OGRLayer *poRelationshipsLayer, |
2069 | | std::set<CPLString> &aoSetRemovedLayerNames); |
2070 | | |
2071 | | static bool LoadXSDInParser(SAX2XMLReader *poParser, GMLASXSDCache &oCache, |
2072 | | GMLASBaseEntityResolver &oXSDEntityResolver, |
2073 | | const CPLString &osBaseDirname, |
2074 | | const CPLString &osXSDFilename, |
2075 | | Grammar **ppoGrammar, bool bSchemaFullChecking, |
2076 | | bool bHandleMultipleImports); |
2077 | | |
2078 | | void SetSWEDataArrayLayersRef(const std::vector<OGRGMLASLayer *> &ar); |
2079 | | |
2080 | | void SetProcessDataRecord(bool b) |
2081 | 6.18k | { |
2082 | 6.18k | m_bProcessSWEDataRecord = b; |
2083 | 6.18k | } |
2084 | | |
2085 | | std::vector<std::unique_ptr<OGRGMLASLayer>> StealSWEDataArrayLayersOwned() |
2086 | 821 | { |
2087 | 821 | return std::move(m_apoSWEDataArrayLayersOwned); |
2088 | 821 | } |
2089 | | }; |
2090 | | |
2091 | | CPLString OGRGMLASTruncateIdentifier(const CPLString &osName, |
2092 | | int nIdentMaxLength); |
2093 | | |
2094 | | CPLString OGRGMLASAddSerialNumber(const CPLString &osNameIn, int iOccurrence, |
2095 | | size_t nOccurrences, int nIdentMaxLength); |
2096 | | |
2097 | | #endif // OGR_GMLAS_INCLUDED |