/src/gdal/ogr/ogrsf_frmts/gmlas/ogrgmlasxsdcache.cpp
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * Project: OGR |
3 | | * Purpose: OGRGMLASDriver implementation |
4 | | * Author: Even Rouault, <even dot rouault at spatialys dot com> |
5 | | * |
6 | | * Initial development funded by the European Earth observation programme |
7 | | * Copernicus |
8 | | * |
9 | | ****************************************************************************** |
10 | | * Copyright (c) 2016, Even Rouault, <even dot rouault at spatialys dot com> |
11 | | * |
12 | | * SPDX-License-Identifier: MIT |
13 | | ****************************************************************************/ |
14 | | |
15 | | #include "ogr_gmlas.h" |
16 | | |
17 | | #include "cpl_http.h" |
18 | | #include "cpl_sha256.h" |
19 | | |
20 | | /************************************************************************/ |
21 | | /* SetCacheDirectory() */ |
22 | | /************************************************************************/ |
23 | | |
24 | | void GMLASResourceCache::SetCacheDirectory(const std::string &osCacheDirectory) |
25 | 975 | { |
26 | 975 | m_osCacheDirectory = osCacheDirectory; |
27 | 975 | } |
28 | | |
29 | | /************************************************************************/ |
30 | | /* RecursivelyCreateDirectoryIfNeeded() */ |
31 | | /************************************************************************/ |
32 | | |
33 | | bool GMLASResourceCache::RecursivelyCreateDirectoryIfNeeded( |
34 | | const std::string &osDirname) |
35 | 0 | { |
36 | 0 | VSIStatBufL sStat; |
37 | 0 | if (VSIStatL(osDirname.c_str(), &sStat) == 0) |
38 | 0 | { |
39 | 0 | return true; |
40 | 0 | } |
41 | | |
42 | 0 | std::string osParent = CPLGetDirnameSafe(osDirname.c_str()); |
43 | 0 | if (!osParent.empty() && osParent != ".") |
44 | 0 | { |
45 | 0 | if (!RecursivelyCreateDirectoryIfNeeded(osParent.c_str())) |
46 | 0 | return false; |
47 | 0 | } |
48 | 0 | return VSIMkdir(osDirname.c_str(), 0755) == 0; |
49 | 0 | } |
50 | | |
51 | | bool GMLASResourceCache::RecursivelyCreateDirectoryIfNeeded() |
52 | 0 | { |
53 | 0 | if (!m_bHasCheckedCacheDirectory) |
54 | 0 | { |
55 | 0 | m_bHasCheckedCacheDirectory = true; |
56 | 0 | if (!RecursivelyCreateDirectoryIfNeeded(m_osCacheDirectory)) |
57 | 0 | { |
58 | 0 | CPLError(CE_Warning, CPLE_AppDefined, "Cannot create %s", |
59 | 0 | m_osCacheDirectory.c_str()); |
60 | 0 | m_osCacheDirectory.clear(); |
61 | 0 | return false; |
62 | 0 | } |
63 | 0 | } |
64 | 0 | return true; |
65 | 0 | } |
66 | | |
67 | | /************************************************************************/ |
68 | | /* GetCachedFilename() */ |
69 | | /************************************************************************/ |
70 | | |
71 | | std::string GMLASResourceCache::GetCachedFilename(const std::string &osResource) |
72 | 0 | { |
73 | 0 | std::string osLaunderedName(osResource); |
74 | 0 | if (STARTS_WITH(osLaunderedName.c_str(), "http://")) |
75 | 0 | osLaunderedName = osLaunderedName.substr(strlen("http://")); |
76 | 0 | else if (STARTS_WITH(osLaunderedName.c_str(), "https://")) |
77 | 0 | osLaunderedName = osLaunderedName.substr(strlen("https://")); |
78 | 0 | for (size_t i = 0; i < osLaunderedName.size(); i++) |
79 | 0 | { |
80 | 0 | if (!isalnum(static_cast<unsigned char>(osLaunderedName[i])) && |
81 | 0 | osLaunderedName[i] != '.') |
82 | 0 | osLaunderedName[i] = '_'; |
83 | 0 | } |
84 | | |
85 | | // If filename is too long, then truncate it and put a hash at the end |
86 | | // We try to make sure that the whole filename (including the cache path) |
87 | | // fits into 255 characters, for windows compat |
88 | |
|
89 | 0 | const size_t nWindowsMaxFilenameSize = 255; |
90 | | // 60 is arbitrary but should be sufficient for most people. We could |
91 | | // always take into account m_osCacheDirectory.size(), but if we want to |
92 | | // to be able to share caches between computers, then this would be |
93 | | // impractical. |
94 | 0 | const size_t nTypicalMaxSizeForDirName = 60; |
95 | 0 | const size_t nSizeForDirName = |
96 | 0 | (m_osCacheDirectory.size() > nTypicalMaxSizeForDirName && |
97 | 0 | m_osCacheDirectory.size() < nWindowsMaxFilenameSize - strlen(".tmp") - |
98 | 0 | 2 * CPL_SHA256_HASH_SIZE) |
99 | 0 | ? m_osCacheDirectory.size() |
100 | 0 | : nTypicalMaxSizeForDirName; |
101 | 0 | CPLAssert(nWindowsMaxFilenameSize >= nSizeForDirName); |
102 | 0 | const size_t nMaxFilenameSize = nWindowsMaxFilenameSize - nSizeForDirName; |
103 | |
|
104 | 0 | CPLAssert(nMaxFilenameSize >= strlen(".tmp")); |
105 | 0 | if (osLaunderedName.size() >= nMaxFilenameSize - strlen(".tmp")) |
106 | 0 | { |
107 | 0 | GByte abyHash[CPL_SHA256_HASH_SIZE]; |
108 | 0 | CPL_SHA256(osResource.c_str(), osResource.size(), abyHash); |
109 | 0 | char *pszHash = CPLBinaryToHex(CPL_SHA256_HASH_SIZE, abyHash); |
110 | 0 | osLaunderedName.resize(nMaxFilenameSize - strlen(".tmp") - |
111 | 0 | 2 * CPL_SHA256_HASH_SIZE); |
112 | 0 | osLaunderedName += pszHash; |
113 | 0 | CPLFree(pszHash); |
114 | 0 | CPLDebug("GMLAS", "Cached filename truncated to %s", |
115 | 0 | osLaunderedName.c_str()); |
116 | 0 | } |
117 | |
|
118 | 0 | return CPLFormFilenameSafe(m_osCacheDirectory.c_str(), |
119 | 0 | osLaunderedName.c_str(), nullptr); |
120 | 0 | } |
121 | | |
122 | | /************************************************************************/ |
123 | | /* CacheAllGML321() */ |
124 | | /************************************************************************/ |
125 | | |
126 | | bool GMLASXSDCache::CacheAllGML321() |
127 | 0 | { |
128 | | // As of today (2024-01-02), the schemas in https://schemas.opengis.net/gml/3.2.1 |
129 | | // are actually the same as the ones in the https://schemas.opengis.net/gml/gml-3_2_2.zip archive. |
130 | | // Download the later and unzip it for faster fetching of GML schemas. |
131 | |
|
132 | 0 | bool bSuccess = false; |
133 | 0 | CPLErrorStateBackuper oErrorStateBackuper(CPLQuietErrorHandler); |
134 | |
|
135 | 0 | const char *pszHTTPZIP = "https://schemas.opengis.net/gml/gml-3_2_2.zip"; |
136 | 0 | CPLHTTPResult *psResult = CPLHTTPFetch(pszHTTPZIP, nullptr); |
137 | 0 | if (psResult && psResult->nDataLen) |
138 | 0 | { |
139 | 0 | const std::string osZIPFilename( |
140 | 0 | VSIMemGenerateHiddenFilename("temp.zip")); |
141 | 0 | auto fpZIP = |
142 | 0 | VSIFileFromMemBuffer(osZIPFilename.c_str(), psResult->pabyData, |
143 | 0 | psResult->nDataLen, FALSE); |
144 | 0 | if (fpZIP) |
145 | 0 | { |
146 | 0 | VSIFCloseL(fpZIP); |
147 | |
|
148 | 0 | const std::string osVSIZIPFilename("/vsizip/" + osZIPFilename); |
149 | 0 | const CPLStringList aosFiles( |
150 | 0 | VSIReadDirRecursive(osVSIZIPFilename.c_str())); |
151 | 0 | for (int i = 0; i < aosFiles.size(); ++i) |
152 | 0 | { |
153 | 0 | if (strstr(aosFiles[i], ".xsd")) |
154 | 0 | { |
155 | 0 | const std::string osFilename( |
156 | 0 | std::string("https://schemas.opengis.net/gml/3.2.1/") + |
157 | 0 | CPLGetFilename(aosFiles[i])); |
158 | 0 | const std::string osCachedFileName( |
159 | 0 | GetCachedFilename(osFilename.c_str())); |
160 | |
|
161 | 0 | std::string osTmpfilename(osCachedFileName + ".tmp"); |
162 | 0 | if (CPLCopyFile( |
163 | 0 | osTmpfilename.c_str(), |
164 | 0 | (osVSIZIPFilename + "/" + aosFiles[i]).c_str()) == |
165 | 0 | 0) |
166 | 0 | { |
167 | 0 | VSIRename(osTmpfilename.c_str(), |
168 | 0 | osCachedFileName.c_str()); |
169 | 0 | bSuccess = true; |
170 | 0 | } |
171 | 0 | } |
172 | 0 | } |
173 | 0 | } |
174 | 0 | VSIUnlink(osZIPFilename.c_str()); |
175 | 0 | } |
176 | 0 | CPLHTTPDestroyResult(psResult); |
177 | 0 | if (!bSuccess) |
178 | 0 | { |
179 | 0 | CPLDebugOnce("GMLAS", "Cannot get GML schemas from %s", pszHTTPZIP); |
180 | 0 | } |
181 | 0 | return bSuccess; |
182 | 0 | } |
183 | | |
184 | | /************************************************************************/ |
185 | | /* CacheAllISO20070417() */ |
186 | | /************************************************************************/ |
187 | | |
188 | | bool GMLASXSDCache::CacheAllISO20070417() |
189 | 0 | { |
190 | | // As of today (2024-01-02), the schemas in https://schemas.opengis.net/iso/19139/20070417/ |
191 | | // are actually the same as the ones in the iso19139-20070417_5-v20220526.zip archive |
192 | | // in https://schemas.opengis.net/iso/19139/iso19139-20070417.zip archive. |
193 | | // Download the later and unzip it for faster fetching of ISO schemas. |
194 | |
|
195 | 0 | bool bSuccess = false; |
196 | 0 | CPLErrorStateBackuper oErrorStateBackuper(CPLQuietErrorHandler); |
197 | |
|
198 | 0 | const char *pszHTTPZIP = |
199 | 0 | "https://schemas.opengis.net/iso/19139/iso19139-20070417.zip"; |
200 | 0 | CPLHTTPResult *psResult = CPLHTTPFetch(pszHTTPZIP, nullptr); |
201 | 0 | if (psResult && psResult->nDataLen) |
202 | 0 | { |
203 | 0 | const std::string osZIPFilename( |
204 | 0 | VSIMemGenerateHiddenFilename("temp.zip")); |
205 | 0 | auto fpZIP = |
206 | 0 | VSIFileFromMemBuffer(osZIPFilename.c_str(), psResult->pabyData, |
207 | 0 | psResult->nDataLen, FALSE); |
208 | 0 | if (fpZIP) |
209 | 0 | { |
210 | 0 | VSIFCloseL(fpZIP); |
211 | |
|
212 | 0 | const std::string osVSIZIPFilename( |
213 | 0 | "/vsizip//vsizip/" + osZIPFilename + |
214 | 0 | "/iso19139-20070417_5-v20220526.zip"); |
215 | 0 | const CPLStringList aosFiles( |
216 | 0 | VSIReadDirRecursive(osVSIZIPFilename.c_str())); |
217 | 0 | for (int i = 0; i < aosFiles.size(); ++i) |
218 | 0 | { |
219 | 0 | if (STARTS_WITH(aosFiles[i], "iso/19139/20070417/") && |
220 | 0 | strstr(aosFiles[i], ".xsd")) |
221 | 0 | { |
222 | 0 | const std::string osFilename( |
223 | 0 | std::string("https://schemas.opengis.net/") + |
224 | 0 | aosFiles[i]); |
225 | 0 | const std::string osCachedFileName( |
226 | 0 | GetCachedFilename(osFilename.c_str())); |
227 | |
|
228 | 0 | std::string osTmpfilename(osCachedFileName + ".tmp"); |
229 | 0 | if (CPLCopyFile( |
230 | 0 | osTmpfilename.c_str(), |
231 | 0 | (osVSIZIPFilename + "/" + aosFiles[i]).c_str()) == |
232 | 0 | 0) |
233 | 0 | { |
234 | 0 | VSIRename(osTmpfilename.c_str(), |
235 | 0 | osCachedFileName.c_str()); |
236 | 0 | bSuccess = true; |
237 | 0 | } |
238 | 0 | } |
239 | 0 | } |
240 | 0 | } |
241 | 0 | VSIUnlink(osZIPFilename.c_str()); |
242 | 0 | } |
243 | 0 | CPLHTTPDestroyResult(psResult); |
244 | 0 | if (!bSuccess) |
245 | 0 | { |
246 | 0 | CPLDebugOnce("GMLAS", "Cannot get ISO schemas from %s", pszHTTPZIP); |
247 | 0 | } |
248 | 0 | return bSuccess; |
249 | 0 | } |
250 | | |
251 | | /************************************************************************/ |
252 | | /* Open() */ |
253 | | /************************************************************************/ |
254 | | |
255 | | VSILFILE *GMLASXSDCache::Open(const std::string &osResource, |
256 | | const std::string &osBasePath, |
257 | | std::string &osOutFilename) |
258 | 0 | { |
259 | 0 | osOutFilename = osResource; |
260 | 0 | if (!STARTS_WITH(osResource.c_str(), "http://") && |
261 | 0 | !STARTS_WITH(osResource.c_str(), "https://") && |
262 | 0 | CPLIsFilenameRelative(osResource.c_str()) && !osResource.empty()) |
263 | 0 | { |
264 | | /* Transform a/b + ../c --> a/c */ |
265 | 0 | std::string osResourceModified(osResource); |
266 | 0 | std::string osBasePathModified(osBasePath); |
267 | 0 | while ((STARTS_WITH(osResourceModified.c_str(), "../") || |
268 | 0 | STARTS_WITH(osResourceModified.c_str(), "..\\")) && |
269 | 0 | !osBasePathModified.empty()) |
270 | 0 | { |
271 | 0 | osBasePathModified = CPLGetDirnameSafe(osBasePathModified.c_str()); |
272 | 0 | osResourceModified = osResourceModified.substr(3); |
273 | 0 | } |
274 | |
|
275 | 0 | osOutFilename = CPLFormFilenameSafe( |
276 | 0 | osBasePathModified.c_str(), osResourceModified.c_str(), nullptr); |
277 | 0 | } |
278 | |
|
279 | 0 | CPLDebug("GMLAS", "Resolving %s (%s) to %s", osResource.c_str(), |
280 | 0 | osBasePath.c_str(), osOutFilename.c_str()); |
281 | |
|
282 | 0 | VSILFILE *fp = nullptr; |
283 | 0 | bool bHasTriedZIPArchive = false; |
284 | 0 | retry: |
285 | 0 | if (!m_osCacheDirectory.empty() && |
286 | 0 | (STARTS_WITH(osOutFilename.c_str(), "http://") || |
287 | 0 | STARTS_WITH(osOutFilename.c_str(), "https://")) && |
288 | 0 | RecursivelyCreateDirectoryIfNeeded()) |
289 | 0 | { |
290 | 0 | const std::string osCachedFileName( |
291 | 0 | GetCachedFilename(osOutFilename.c_str())); |
292 | 0 | if (!m_bRefresh || m_aoSetRefreshedFiles.find(osCachedFileName) != |
293 | 0 | m_aoSetRefreshedFiles.end()) |
294 | 0 | { |
295 | 0 | fp = VSIFOpenL(osCachedFileName.c_str(), "rb"); |
296 | 0 | } |
297 | 0 | if (fp != nullptr) |
298 | 0 | { |
299 | 0 | CPLDebug("GMLAS", "Use cached %s", osCachedFileName.c_str()); |
300 | 0 | } |
301 | 0 | else if (m_bAllowDownload) |
302 | 0 | { |
303 | 0 | if (m_bRefresh) |
304 | 0 | m_aoSetRefreshedFiles.insert(osCachedFileName); |
305 | | |
306 | 0 | else if (!bHasTriedZIPArchive && |
307 | 0 | strstr(osOutFilename.c_str(), |
308 | 0 | "://schemas.opengis.net/gml/3.2.1/") && |
309 | 0 | CPLTestBool(CPLGetConfigOption( |
310 | 0 | "OGR_GMLAS_USE_SCHEMAS_FROM_OGC_ZIP", "YES"))) |
311 | 0 | { |
312 | 0 | bHasTriedZIPArchive = true; |
313 | 0 | if (CacheAllGML321()) |
314 | 0 | goto retry; |
315 | 0 | } |
316 | | |
317 | 0 | else if (!bHasTriedZIPArchive && |
318 | 0 | strstr(osOutFilename.c_str(), |
319 | 0 | "://schemas.opengis.net/iso/19139/20070417/") && |
320 | 0 | CPLTestBool(CPLGetConfigOption( |
321 | 0 | "OGR_GMLAS_USE_SCHEMAS_FROM_OGC_ZIP", "YES"))) |
322 | 0 | { |
323 | 0 | bHasTriedZIPArchive = true; |
324 | 0 | if (CacheAllISO20070417()) |
325 | 0 | goto retry; |
326 | 0 | } |
327 | | |
328 | 0 | CPLHTTPResult *psResult = |
329 | 0 | CPLHTTPFetch(osOutFilename.c_str(), nullptr); |
330 | 0 | if (psResult == nullptr || psResult->nDataLen == 0) |
331 | 0 | { |
332 | 0 | CPLError(CE_Failure, CPLE_FileIO, "Cannot resolve %s", |
333 | 0 | osResource.c_str()); |
334 | 0 | CPLHTTPDestroyResult(psResult); |
335 | 0 | return nullptr; |
336 | 0 | } |
337 | | |
338 | 0 | std::string osTmpfilename(osCachedFileName + ".tmp"); |
339 | 0 | VSILFILE *fpTmp = VSIFOpenL(osTmpfilename.c_str(), "wb"); |
340 | 0 | if (fpTmp) |
341 | 0 | { |
342 | 0 | const auto nRet = VSIFWriteL(psResult->pabyData, |
343 | 0 | psResult->nDataLen, 1, fpTmp); |
344 | 0 | VSIFCloseL(fpTmp); |
345 | 0 | if (nRet == 1) |
346 | 0 | { |
347 | 0 | VSIRename(osTmpfilename.c_str(), osCachedFileName.c_str()); |
348 | 0 | fp = VSIFOpenL(osCachedFileName.c_str(), "rb"); |
349 | 0 | } |
350 | 0 | } |
351 | |
|
352 | 0 | CPLHTTPDestroyResult(psResult); |
353 | 0 | } |
354 | 0 | } |
355 | 0 | else |
356 | 0 | { |
357 | 0 | if (STARTS_WITH(osOutFilename.c_str(), "http://") || |
358 | 0 | STARTS_WITH(osOutFilename.c_str(), "https://")) |
359 | 0 | { |
360 | 0 | if (m_bAllowDownload) |
361 | 0 | { |
362 | 0 | CPLHTTPResult *psResult = |
363 | 0 | CPLHTTPFetch(osOutFilename.c_str(), nullptr); |
364 | 0 | if (psResult == nullptr || psResult->nDataLen == 0) |
365 | 0 | { |
366 | 0 | CPLError(CE_Failure, CPLE_FileIO, "Cannot resolve %s", |
367 | 0 | osResource.c_str()); |
368 | 0 | CPLHTTPDestroyResult(psResult); |
369 | 0 | return nullptr; |
370 | 0 | } |
371 | | |
372 | 0 | fp = VSIFileFromMemBuffer(nullptr, psResult->pabyData, |
373 | 0 | psResult->nDataLen, TRUE); |
374 | 0 | if (fp) |
375 | 0 | { |
376 | | // Steal the memory buffer from HTTP result |
377 | 0 | psResult->pabyData = nullptr; |
378 | 0 | psResult->nDataLen = 0; |
379 | 0 | psResult->nDataAlloc = 0; |
380 | 0 | } |
381 | 0 | CPLHTTPDestroyResult(psResult); |
382 | 0 | } |
383 | 0 | } |
384 | 0 | else |
385 | 0 | { |
386 | 0 | fp = VSIFOpenL(osOutFilename.c_str(), "rb"); |
387 | 0 | } |
388 | 0 | } |
389 | | |
390 | 0 | if (fp == nullptr) |
391 | 0 | { |
392 | 0 | CPLError(CE_Failure, CPLE_FileIO, "Cannot resolve %s", |
393 | 0 | osResource.c_str()); |
394 | 0 | } |
395 | |
|
396 | 0 | return fp; |
397 | 0 | } |