/src/gdal/port/cpl_vsil_curl.cpp
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Project: CPL - Common Portability Library |
4 | | * Purpose: Implement VSI large file api for HTTP/FTP files |
5 | | * Author: Even Rouault, even.rouault at spatialys.com |
6 | | * |
7 | | ****************************************************************************** |
8 | | * Copyright (c) 2010-2018, Even Rouault <even.rouault at spatialys.com> |
9 | | * |
10 | | * SPDX-License-Identifier: MIT |
11 | | ****************************************************************************/ |
12 | | |
13 | | #include "cpl_port.h" |
14 | | #include "cpl_vsil_curl_priv.h" |
15 | | #include "cpl_vsil_curl_class.h" |
16 | | |
17 | | #include <algorithm> |
18 | | #include <array> |
19 | | #include <limits> |
20 | | #include <map> |
21 | | #include <memory> |
22 | | #include <numeric> |
23 | | #include <set> |
24 | | #include <string_view> |
25 | | |
26 | | #include "cpl_aws.h" |
27 | | #include "cpl_json.h" |
28 | | #include "cpl_json_header.h" |
29 | | #include "cpl_minixml.h" |
30 | | #include "cpl_multiproc.h" |
31 | | #include "cpl_string.h" |
32 | | #include "cpl_time.h" |
33 | | #include "cpl_vsi.h" |
34 | | #include "cpl_vsi_virtual.h" |
35 | | #include "cpl_http.h" |
36 | | #include "cpl_mem_cache.h" |
37 | | |
38 | | #ifndef S_IRUSR |
39 | | #define S_IRUSR 00400 |
40 | | #define S_IWUSR 00200 |
41 | | #define S_IXUSR 00100 |
42 | | #define S_IRGRP 00040 |
43 | | #define S_IWGRP 00020 |
44 | | #define S_IXGRP 00010 |
45 | | #define S_IROTH 00004 |
46 | | #define S_IWOTH 00002 |
47 | | #define S_IXOTH 00001 |
48 | | #endif |
49 | | |
50 | | #ifndef HAVE_CURL |
51 | | |
52 | | void VSIInstallCurlFileHandler(void) |
53 | | { |
54 | | // Not supported. |
55 | | } |
56 | | |
57 | | void VSICurlClearCache(void) |
58 | | { |
59 | | // Not supported. |
60 | | } |
61 | | |
62 | | void VSICurlPartialClearCache(const char *) |
63 | | { |
64 | | // Not supported. |
65 | | } |
66 | | |
67 | | void VSICurlAuthParametersChanged() |
68 | | { |
69 | | // Not supported. |
70 | | } |
71 | | |
72 | | void VSINetworkStatsReset(void) |
73 | | { |
74 | | // Not supported |
75 | | } |
76 | | |
77 | | char *VSINetworkStatsGetAsSerializedJSON(char ** /* papszOptions */) |
78 | | { |
79 | | // Not supported |
80 | | return nullptr; |
81 | | } |
82 | | |
83 | | /************************************************************************/ |
84 | | /* VSICurlInstallReadCbk() */ |
85 | | /************************************************************************/ |
86 | | |
87 | | int VSICurlInstallReadCbk(VSILFILE * /* fp */, |
88 | | VSICurlReadCbkFunc /* pfnReadCbk */, |
89 | | void * /* pfnUserData */, |
90 | | int /* bStopOnInterruptUntilUninstall */) |
91 | | { |
92 | | return FALSE; |
93 | | } |
94 | | |
95 | | /************************************************************************/ |
96 | | /* VSICurlUninstallReadCbk() */ |
97 | | /************************************************************************/ |
98 | | |
99 | | int VSICurlUninstallReadCbk(VSILFILE * /* fp */) |
100 | | { |
101 | | return FALSE; |
102 | | } |
103 | | |
104 | | #else |
105 | | |
106 | | //! @cond Doxygen_Suppress |
107 | | #ifndef DOXYGEN_SKIP |
108 | | |
109 | 21.5k | #define ENABLE_DEBUG 1 |
110 | | #define ENABLE_DEBUG_VERBOSE 0 |
111 | | |
112 | | #define unchecked_curl_easy_setopt(handle, opt, param) \ |
113 | 3.53M | CPL_IGNORE_RET_VAL(curl_easy_setopt(handle, opt, param)) |
114 | | |
115 | | constexpr const char *const VSICURL_PREFIXES[] = {"/vsicurl/", "/vsicurl?"}; |
116 | | |
117 | | extern "C" bool CPL_DLL GDALIsInGlobalDestructorFromDLLMain(); |
118 | | |
119 | | /***********************************************************รน************/ |
120 | | /* VSICurlAuthParametersChanged() */ |
121 | | /************************************************************************/ |
122 | | |
123 | | static unsigned int gnGenerationAuthParameters = 0; |
124 | | |
125 | | void VSICurlAuthParametersChanged() |
126 | 0 | { |
127 | 0 | gnGenerationAuthParameters++; |
128 | 0 | } |
129 | | |
130 | | // Do not access those variables directly ! |
131 | | // Use VSICURLGetDownloadChunkSize() and GetMaxRegions() |
132 | | static int N_MAX_REGIONS_DO_NOT_USE_DIRECTLY = 0; |
133 | | static int DOWNLOAD_CHUNK_SIZE_DO_NOT_USE_DIRECTLY = 0; |
134 | | |
135 | | /************************************************************************/ |
136 | | /* VSICURLReadGlobalEnvVariables() */ |
137 | | /************************************************************************/ |
138 | | |
139 | | static void VSICURLReadGlobalEnvVariables() |
140 | 308k | { |
141 | 308k | struct Initializer |
142 | 308k | { |
143 | 308k | Initializer() |
144 | 308k | { |
145 | 17 | constexpr int DOWNLOAD_CHUNK_SIZE_DEFAULT = 16384; |
146 | 17 | const char *pszChunkSize = |
147 | 17 | CPLGetConfigOption("CPL_VSIL_CURL_CHUNK_SIZE", nullptr); |
148 | 17 | GIntBig nChunkSize = DOWNLOAD_CHUNK_SIZE_DEFAULT; |
149 | | |
150 | 17 | if (pszChunkSize) |
151 | 0 | { |
152 | 0 | if (CPLParseMemorySize(pszChunkSize, &nChunkSize, nullptr) != |
153 | 0 | CE_None) |
154 | 0 | { |
155 | 0 | CPLError( |
156 | 0 | CE_Warning, CPLE_AppDefined, |
157 | 0 | "Could not parse value for CPL_VSIL_CURL_CHUNK_SIZE. " |
158 | 0 | "Using default value of %d instead.", |
159 | 0 | DOWNLOAD_CHUNK_SIZE_DEFAULT); |
160 | 0 | } |
161 | 0 | } |
162 | | |
163 | 17 | constexpr int MIN_CHUNK_SIZE = 1024; |
164 | 17 | constexpr int MAX_CHUNK_SIZE = 10 * 1024 * 1024; |
165 | 17 | if (nChunkSize < MIN_CHUNK_SIZE || nChunkSize > MAX_CHUNK_SIZE) |
166 | 0 | { |
167 | 0 | nChunkSize = DOWNLOAD_CHUNK_SIZE_DEFAULT; |
168 | 0 | CPLError(CE_Warning, CPLE_AppDefined, |
169 | 0 | "Invalid value for CPL_VSIL_CURL_CHUNK_SIZE. " |
170 | 0 | "Allowed range is [%d, %d]. " |
171 | 0 | "Using CPL_VSIL_CURL_CHUNK_SIZE=%d instead", |
172 | 0 | MIN_CHUNK_SIZE, MAX_CHUNK_SIZE, |
173 | 0 | DOWNLOAD_CHUNK_SIZE_DEFAULT); |
174 | 0 | } |
175 | 17 | DOWNLOAD_CHUNK_SIZE_DO_NOT_USE_DIRECTLY = |
176 | 17 | static_cast<int>(nChunkSize); |
177 | | |
178 | 17 | constexpr int N_MAX_REGIONS_DEFAULT = 1000; |
179 | 17 | constexpr int CACHE_SIZE_DEFAULT = |
180 | 17 | N_MAX_REGIONS_DEFAULT * DOWNLOAD_CHUNK_SIZE_DEFAULT; |
181 | | |
182 | 17 | const char *pszCacheSize = |
183 | 17 | CPLGetConfigOption("CPL_VSIL_CURL_CACHE_SIZE", nullptr); |
184 | 17 | GIntBig nCacheSize = CACHE_SIZE_DEFAULT; |
185 | | |
186 | 17 | if (pszCacheSize) |
187 | 0 | { |
188 | 0 | if (CPLParseMemorySize(pszCacheSize, &nCacheSize, nullptr) != |
189 | 0 | CE_None) |
190 | 0 | { |
191 | 0 | CPLError( |
192 | 0 | CE_Warning, CPLE_AppDefined, |
193 | 0 | "Could not parse value for CPL_VSIL_CURL_CACHE_SIZE. " |
194 | 0 | "Using default value of " CPL_FRMT_GIB " instead.", |
195 | 0 | nCacheSize); |
196 | 0 | } |
197 | 0 | } |
198 | | |
199 | 17 | const auto nMaxRAM = CPLGetUsablePhysicalRAM(); |
200 | 17 | const auto nMinVal = DOWNLOAD_CHUNK_SIZE_DO_NOT_USE_DIRECTLY; |
201 | 17 | auto nMaxVal = static_cast<GIntBig>(INT_MAX) * |
202 | 17 | DOWNLOAD_CHUNK_SIZE_DO_NOT_USE_DIRECTLY; |
203 | 17 | if (nMaxRAM > 0 && nMaxVal > nMaxRAM) |
204 | 17 | nMaxVal = nMaxRAM; |
205 | 17 | if (nCacheSize < nMinVal || nCacheSize > nMaxVal) |
206 | 0 | { |
207 | 0 | nCacheSize = nCacheSize < nMinVal ? nMinVal : nMaxVal; |
208 | 0 | CPLError(CE_Warning, CPLE_AppDefined, |
209 | 0 | "Invalid value for CPL_VSIL_CURL_CACHE_SIZE. " |
210 | 0 | "Allowed range is [%d, " CPL_FRMT_GIB "]. " |
211 | 0 | "Using CPL_VSIL_CURL_CACHE_SIZE=" CPL_FRMT_GIB |
212 | 0 | " instead", |
213 | 0 | nMinVal, nMaxVal, nCacheSize); |
214 | 0 | } |
215 | 17 | N_MAX_REGIONS_DO_NOT_USE_DIRECTLY = std::max( |
216 | 17 | 1, static_cast<int>(nCacheSize / |
217 | 17 | DOWNLOAD_CHUNK_SIZE_DO_NOT_USE_DIRECTLY)); |
218 | 17 | } |
219 | 308k | }; |
220 | | |
221 | 308k | static Initializer initializer; |
222 | 308k | } |
223 | | |
224 | | /************************************************************************/ |
225 | | /* VSICURLGetDownloadChunkSize() */ |
226 | | /************************************************************************/ |
227 | | |
228 | | int VSICURLGetDownloadChunkSize() |
229 | 286k | { |
230 | 286k | VSICURLReadGlobalEnvVariables(); |
231 | 286k | return DOWNLOAD_CHUNK_SIZE_DO_NOT_USE_DIRECTLY; |
232 | 286k | } |
233 | | |
234 | | /************************************************************************/ |
235 | | /* GetMaxRegions() */ |
236 | | /************************************************************************/ |
237 | | |
238 | | static int GetMaxRegions() |
239 | 21.6k | { |
240 | 21.6k | VSICURLReadGlobalEnvVariables(); |
241 | 21.6k | return N_MAX_REGIONS_DO_NOT_USE_DIRECTLY; |
242 | 21.6k | } |
243 | | |
244 | | /************************************************************************/ |
245 | | /* VSICurlFindStringSensitiveExceptEscapeSequences() */ |
246 | | /************************************************************************/ |
247 | | |
248 | | static int |
249 | | VSICurlFindStringSensitiveExceptEscapeSequences(CSLConstList papszList, |
250 | | const char *pszTarget) |
251 | | |
252 | 250k | { |
253 | 250k | if (papszList == nullptr) |
254 | 250k | return -1; |
255 | | |
256 | 66 | for (int i = 0; papszList[i] != nullptr; i++) |
257 | 33 | { |
258 | 33 | const char *pszIter1 = papszList[i]; |
259 | 33 | const char *pszIter2 = pszTarget; |
260 | 33 | char ch1 = '\0'; |
261 | 33 | char ch2 = '\0'; |
262 | | /* The comparison is case-sensitive, except for escaped */ |
263 | | /* sequences where letters of the hexadecimal sequence */ |
264 | | /* can be uppercase or lowercase depending on the quoting algorithm */ |
265 | 40 | while (true) |
266 | 40 | { |
267 | 40 | ch1 = *pszIter1; |
268 | 40 | ch2 = *pszIter2; |
269 | 40 | if (ch1 == '\0' || ch2 == '\0') |
270 | 7 | break; |
271 | 33 | if (ch1 == '%' && ch2 == '%' && pszIter1[1] != '\0' && |
272 | 0 | pszIter1[2] != '\0' && pszIter2[1] != '\0' && |
273 | 0 | pszIter2[2] != '\0') |
274 | 0 | { |
275 | 0 | if (!EQUALN(pszIter1 + 1, pszIter2 + 1, 2)) |
276 | 0 | break; |
277 | 0 | pszIter1 += 2; |
278 | 0 | pszIter2 += 2; |
279 | 0 | } |
280 | 33 | if (ch1 != ch2) |
281 | 26 | break; |
282 | 7 | pszIter1++; |
283 | 7 | pszIter2++; |
284 | 7 | } |
285 | 33 | if (ch1 == ch2 && ch1 == '\0') |
286 | 0 | return i; |
287 | 33 | } |
288 | | |
289 | 33 | return -1; |
290 | 33 | } |
291 | | |
292 | | /************************************************************************/ |
293 | | /* VSICurlIsFileInList() */ |
294 | | /************************************************************************/ |
295 | | |
296 | | static int VSICurlIsFileInList(CSLConstList papszList, const char *pszTarget) |
297 | 152k | { |
298 | 152k | int nRet = |
299 | 152k | VSICurlFindStringSensitiveExceptEscapeSequences(papszList, pszTarget); |
300 | 152k | if (nRet >= 0) |
301 | 0 | return nRet; |
302 | | |
303 | | // If we didn't find anything, try to URL-escape the target filename. |
304 | 152k | char *pszEscaped = CPLEscapeString(pszTarget, -1, CPLES_URL); |
305 | 152k | if (strcmp(pszTarget, pszEscaped) != 0) |
306 | 98.0k | { |
307 | 98.0k | nRet = VSICurlFindStringSensitiveExceptEscapeSequences(papszList, |
308 | 98.0k | pszEscaped); |
309 | 98.0k | } |
310 | 152k | CPLFree(pszEscaped); |
311 | 152k | return nRet; |
312 | 152k | } |
313 | | |
314 | | /************************************************************************/ |
315 | | /* StartsWithVSICurlPrefix() */ |
316 | | /************************************************************************/ |
317 | | |
318 | | static bool StartsWithVSICurlPrefix(const char *pszFilename) |
319 | 1.22M | { |
320 | 1.22M | for (const char *pszPrefix : VSICURL_PREFIXES) |
321 | 2.21M | { |
322 | 2.21M | if (STARTS_WITH(pszFilename, pszPrefix)) |
323 | 641k | { |
324 | 641k | return true; |
325 | 641k | } |
326 | 2.21M | } |
327 | 584k | return false; |
328 | 1.22M | } |
329 | | |
330 | | /************************************************************************/ |
331 | | /* VSICurlGetURLFromFilename() */ |
332 | | /************************************************************************/ |
333 | | |
334 | | static std::string VSICurlGetURLFromFilename( |
335 | | const char *pszFilename, CPLHTTPRetryParameters *poRetryParameters, |
336 | | bool *pbUseHead, bool *pbUseRedirectURLIfNoQueryStringParams, |
337 | | bool *pbListDir, bool *pbEmptyDir, CPLStringList *paosHTTPOptions, |
338 | | bool *pbPlanetaryComputerURLSigning, char **ppszPlanetaryComputerCollection) |
339 | 609k | { |
340 | 609k | if (ppszPlanetaryComputerCollection) |
341 | 182k | *ppszPlanetaryComputerCollection = nullptr; |
342 | | |
343 | 609k | if (!StartsWithVSICurlPrefix(pszFilename)) |
344 | 131k | return pszFilename; |
345 | | |
346 | 478k | if (pbPlanetaryComputerURLSigning) |
347 | 182k | { |
348 | | // It may be more convenient sometimes to store Planetary Computer URL |
349 | | // signing as a per-path specific option rather than capturing it in |
350 | | // the filename with the &pc_url_signing=yes option. |
351 | 182k | if (CPLTestBool(VSIGetPathSpecificOption( |
352 | 182k | pszFilename, "VSICURL_PC_URL_SIGNING", "FALSE"))) |
353 | 0 | { |
354 | 0 | *pbPlanetaryComputerURLSigning = true; |
355 | 0 | } |
356 | 182k | } |
357 | | |
358 | 478k | pszFilename += strlen("/vsicurl/"); |
359 | 478k | if (!STARTS_WITH(pszFilename, "http://") && |
360 | 447k | !STARTS_WITH(pszFilename, "https://") && |
361 | 440k | !STARTS_WITH(pszFilename, "ftp://") && |
362 | 406k | !STARTS_WITH(pszFilename, "file://")) |
363 | 404k | { |
364 | 404k | if (*pszFilename == '?') |
365 | 5.55k | pszFilename++; |
366 | 404k | char **papszTokens = CSLTokenizeString2(pszFilename, "&", 0); |
367 | 2.14M | for (int i = 0; papszTokens[i] != nullptr; i++) |
368 | 1.73M | { |
369 | 1.73M | char *pszUnescaped = |
370 | 1.73M | CPLUnescapeString(papszTokens[i], nullptr, CPLES_URL); |
371 | 1.73M | CPLFree(papszTokens[i]); |
372 | 1.73M | papszTokens[i] = pszUnescaped; |
373 | 1.73M | } |
374 | | |
375 | 404k | std::string osURL; |
376 | 404k | std::string osHeaders; |
377 | 2.14M | for (int i = 0; papszTokens[i]; i++) |
378 | 1.73M | { |
379 | 1.73M | char *pszKey = nullptr; |
380 | 1.73M | const char *pszValue = CPLParseNameValue(papszTokens[i], &pszKey); |
381 | 1.73M | if (pszKey && pszValue) |
382 | 1.18M | { |
383 | 1.18M | if (EQUAL(pszKey, "max_retry")) |
384 | 13.4k | { |
385 | 13.4k | if (poRetryParameters) |
386 | 4.88k | poRetryParameters->nMaxRetry = atoi(pszValue); |
387 | 13.4k | } |
388 | 1.16M | else if (EQUAL(pszKey, "retry_delay")) |
389 | 85.9k | { |
390 | 85.9k | if (poRetryParameters) |
391 | 32.5k | poRetryParameters->dfInitialDelay = CPLAtof(pszValue); |
392 | 85.9k | } |
393 | 1.08M | else if (EQUAL(pszKey, "retry_codes")) |
394 | 5.17k | { |
395 | 5.17k | if (poRetryParameters) |
396 | 2.02k | poRetryParameters->osRetryCodes = pszValue; |
397 | 5.17k | } |
398 | 1.07M | else if (EQUAL(pszKey, "use_head")) |
399 | 1.24k | { |
400 | 1.24k | if (pbUseHead) |
401 | 436 | *pbUseHead = CPLTestBool(pszValue); |
402 | 1.24k | } |
403 | 1.07M | else if (EQUAL(pszKey, |
404 | 1.07M | "use_redirect_url_if_no_query_string_params")) |
405 | 3.41k | { |
406 | | /* Undocumented. Used by PLScenes driver */ |
407 | 3.41k | if (pbUseRedirectURLIfNoQueryStringParams) |
408 | 1.23k | *pbUseRedirectURLIfNoQueryStringParams = |
409 | 1.23k | CPLTestBool(pszValue); |
410 | 3.41k | } |
411 | 1.07M | else if (EQUAL(pszKey, "list_dir")) |
412 | 1.35k | { |
413 | 1.35k | if (pbListDir) |
414 | 532 | *pbListDir = CPLTestBool(pszValue); |
415 | 1.35k | } |
416 | 1.07M | else if (EQUAL(pszKey, "empty_dir")) |
417 | 1.15k | { |
418 | 1.15k | if (pbEmptyDir) |
419 | 572 | *pbEmptyDir = CPLTestBool(pszValue); |
420 | 1.15k | } |
421 | 1.06M | else if (EQUAL(pszKey, "header_file")) |
422 | 15.6k | { |
423 | | #if defined(CPL_VSIL_CURL_HEADER_FILE_KVP_DISABLED) |
424 | | constexpr bool CPL_VSIL_CURL_HEADER_FILE_KVP_DISABLED = |
425 | | true; |
426 | | #else |
427 | 15.6k | constexpr bool CPL_VSIL_CURL_HEADER_FILE_KVP_DISABLED = |
428 | 15.6k | false; |
429 | 15.6k | #endif |
430 | 15.6k | if (CPL_VSIL_CURL_HEADER_FILE_KVP_DISABLED) |
431 | 0 | { |
432 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
433 | 0 | "Use of 'header_file' key-value pair in " |
434 | 0 | "/vsicurl? is disabled in this build"); |
435 | 0 | } |
436 | 15.6k | else |
437 | 15.6k | { |
438 | 15.6k | bool bSetValue = false; |
439 | 15.6k | const char *pszAllowHeaderFileKVP = CPLGetConfigOption( |
440 | 15.6k | "CPL_VSIL_CURL_HEADER_FILE_KVP_ENABLED", nullptr); |
441 | 15.6k | if (!pszAllowHeaderFileKVP || |
442 | 0 | pszAllowHeaderFileKVP[0] == 0 || |
443 | 0 | EQUAL(pszAllowHeaderFileKVP, "ONLY_IN_TEMP")) |
444 | 15.6k | { |
445 | 15.6k | if (STARTS_WITH(pszValue, "/vsimem/")) |
446 | 9.40k | { |
447 | 9.40k | bSetValue = !CPLHasUnbalancedPathTraversal( |
448 | 9.40k | pszValue + strlen("/vsimem/")); |
449 | 9.40k | } |
450 | 6.23k | else if (STARTS_WITH(pszValue, "/tmp/")) |
451 | 0 | { |
452 | 0 | bSetValue = !CPLHasUnbalancedPathTraversal( |
453 | 0 | pszValue + strlen("/tmp/")); |
454 | 0 | } |
455 | 6.23k | else |
456 | 6.23k | { |
457 | 6.23k | for (const char *pszEnvVar : {"TEMP", "TMP"}) |
458 | 12.4k | { |
459 | 12.4k | if (const char *pszTemp = |
460 | 12.4k | CPLGetConfigOption(pszEnvVar, |
461 | 12.4k | nullptr)) |
462 | 0 | { |
463 | 0 | std::string osTemp = pszTemp; |
464 | 0 | if (!osTemp.empty() && |
465 | 0 | (osTemp.back() == '/' || |
466 | 0 | osTemp.back() == '\\')) |
467 | 0 | osTemp.pop_back(); |
468 | 0 | if (!osTemp.empty() && |
469 | 0 | cpl::starts_with( |
470 | 0 | std::string_view(pszValue), |
471 | 0 | osTemp) && |
472 | 0 | (pszValue[osTemp.size()] == '/' || |
473 | 0 | pszValue[osTemp.size()] == '\\')) |
474 | 0 | { |
475 | 0 | bSetValue = |
476 | 0 | !CPLHasUnbalancedPathTraversal( |
477 | 0 | pszValue + osTemp.size()); |
478 | 0 | break; |
479 | 0 | } |
480 | 0 | } |
481 | 12.4k | } |
482 | 6.23k | } |
483 | 15.6k | if (!bSetValue) |
484 | 6.55k | { |
485 | 6.55k | CPLError(CE_Failure, CPLE_AppDefined, |
486 | 6.55k | "Use of 'header_file=%s' " |
487 | 6.55k | "key-value pair in /vsicurl? is " |
488 | 6.55k | "disabled because it refers to a " |
489 | 6.55k | "file stored in a non-temporary " |
490 | 6.55k | "location. You may set the " |
491 | 6.55k | "CPL_VSIL_CURL_HEADER_FILE_KVP_" |
492 | 6.55k | "ENABLED configuration option to " |
493 | 6.55k | "YES to remove that restriction.", |
494 | 6.55k | pszValue); |
495 | 6.55k | } |
496 | 15.6k | } |
497 | 0 | else if (CPLTestBool(pszAllowHeaderFileKVP)) |
498 | 0 | { |
499 | 0 | bSetValue = true; |
500 | 0 | } |
501 | 0 | else |
502 | 0 | { |
503 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
504 | 0 | "Use of 'header_file' key-value pair in " |
505 | 0 | "/vsicurl? is disabled by the " |
506 | 0 | "CPL_VSIL_CURL_HEADER_FILE_KVP_ENABLED " |
507 | 0 | "configuration option"); |
508 | 0 | } |
509 | | |
510 | 15.6k | if (bSetValue && paosHTTPOptions) |
511 | 3.30k | { |
512 | 3.30k | paosHTTPOptions->SetNameValue(pszKey, pszValue); |
513 | 3.30k | } |
514 | 15.6k | } |
515 | 15.6k | } |
516 | 1.05M | else if (EQUAL(pszKey, "useragent") || |
517 | 1.05M | EQUAL(pszKey, "referer") || EQUAL(pszKey, "cookie") || |
518 | 1.04M | EQUAL(pszKey, "unsafessl") || |
519 | | #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION |
520 | | EQUAL(pszKey, "timeout") || |
521 | | EQUAL(pszKey, "connecttimeout") || |
522 | | #endif |
523 | 1.04M | EQUAL(pszKey, "low_speed_time") || |
524 | 1.04M | EQUAL(pszKey, "low_speed_limit") || |
525 | 1.04M | EQUAL(pszKey, "proxy") || EQUAL(pszKey, "proxyauth") || |
526 | 950k | EQUAL(pszKey, "proxyuserpwd")) |
527 | 120k | { |
528 | | // Above names are the ones supported by |
529 | | // CPLHTTPSetOptions() |
530 | 120k | if (paosHTTPOptions) |
531 | 47.4k | { |
532 | 47.4k | paosHTTPOptions->SetNameValue(pszKey, pszValue); |
533 | 47.4k | } |
534 | 120k | } |
535 | 933k | else if (EQUAL(pszKey, "url")) |
536 | 55.9k | { |
537 | 55.9k | osURL = pszValue; |
538 | 55.9k | } |
539 | 877k | else if (EQUAL(pszKey, "pc_url_signing")) |
540 | 41.9k | { |
541 | 41.9k | if (pbPlanetaryComputerURLSigning) |
542 | 16.7k | *pbPlanetaryComputerURLSigning = CPLTestBool(pszValue); |
543 | 41.9k | } |
544 | 835k | else if (EQUAL(pszKey, "pc_collection")) |
545 | 10.2k | { |
546 | 10.2k | if (ppszPlanetaryComputerCollection) |
547 | 4.09k | { |
548 | 4.09k | CPLFree(*ppszPlanetaryComputerCollection); |
549 | 4.09k | *ppszPlanetaryComputerCollection = CPLStrdup(pszValue); |
550 | 4.09k | } |
551 | 10.2k | } |
552 | 825k | else if (STARTS_WITH(pszKey, "header.")) |
553 | 34.0k | { |
554 | 34.0k | osHeaders += (pszKey + strlen("header.")); |
555 | 34.0k | osHeaders += ':'; |
556 | 34.0k | osHeaders += pszValue; |
557 | 34.0k | osHeaders += "\r\n"; |
558 | 34.0k | } |
559 | 791k | else |
560 | 791k | { |
561 | 791k | CPLError(CE_Warning, CPLE_NotSupported, |
562 | 791k | "Unsupported option: %s", pszKey); |
563 | 791k | } |
564 | 1.18M | } |
565 | 1.73M | CPLFree(pszKey); |
566 | 1.73M | } |
567 | | |
568 | 404k | if (paosHTTPOptions && !osHeaders.empty()) |
569 | 6.07k | paosHTTPOptions->SetNameValue("HEADERS", osHeaders.c_str()); |
570 | | |
571 | 404k | CSLDestroy(papszTokens); |
572 | 404k | if (osURL.empty()) |
573 | 377k | { |
574 | 377k | CPLError(CE_Failure, CPLE_IllegalArg, "Missing url parameter"); |
575 | 377k | return pszFilename; |
576 | 377k | } |
577 | | |
578 | 27.3k | return osURL; |
579 | 404k | } |
580 | | |
581 | 73.9k | return pszFilename; |
582 | 478k | } |
583 | | |
584 | | namespace cpl |
585 | | { |
586 | | |
587 | | /************************************************************************/ |
588 | | /* VSICurlHandle() */ |
589 | | /************************************************************************/ |
590 | | |
591 | | VSICurlHandle::VSICurlHandle(VSICurlFilesystemHandlerBase *poFSIn, |
592 | | const char *pszFilename, const char *pszURLIn) |
593 | 299k | : poFS(poFSIn), m_osFilename(pszFilename), |
594 | 299k | m_aosHTTPOptions(CPLHTTPGetOptionsFromEnv(pszFilename)), |
595 | 299k | m_oRetryParameters(m_aosHTTPOptions), |
596 | | m_bUseHead( |
597 | 299k | CPLTestBool(CPLGetConfigOption("CPL_VSIL_CURL_USE_HEAD", "YES"))) |
598 | 299k | { |
599 | 299k | if (pszURLIn) |
600 | 117k | { |
601 | 117k | m_pszURL = CPLStrdup(pszURLIn); |
602 | 117k | } |
603 | 182k | else |
604 | 182k | { |
605 | 182k | char *pszPCCollection = nullptr; |
606 | 182k | m_pszURL = |
607 | 182k | CPLStrdup(VSICurlGetURLFromFilename( |
608 | 182k | pszFilename, &m_oRetryParameters, &m_bUseHead, |
609 | 182k | &m_bUseRedirectURLIfNoQueryStringParams, nullptr, |
610 | 182k | nullptr, &m_aosHTTPOptions, |
611 | 182k | &m_bPlanetaryComputerURLSigning, &pszPCCollection) |
612 | 182k | .c_str()); |
613 | 182k | if (pszPCCollection) |
614 | 2.40k | m_osPlanetaryComputerCollection = pszPCCollection; |
615 | 182k | CPLFree(pszPCCollection); |
616 | 182k | } |
617 | | |
618 | 299k | m_bCached = poFSIn->AllowCachedDataFor(pszFilename); |
619 | 299k | poFS->GetCachedFileProp(m_pszURL, oFileProp); |
620 | 299k | } |
621 | | |
622 | | /************************************************************************/ |
623 | | /* ~VSICurlHandle() */ |
624 | | /************************************************************************/ |
625 | | |
626 | | VSICurlHandle::~VSICurlHandle() |
627 | 299k | { |
628 | 299k | if (m_oThreadAdviseRead.joinable()) |
629 | 0 | { |
630 | 0 | m_oThreadAdviseRead.join(); |
631 | 0 | } |
632 | 299k | if (m_hCurlMultiHandleForAdviseRead) |
633 | 0 | { |
634 | 0 | VSICURLMultiCleanup(m_hCurlMultiHandleForAdviseRead); |
635 | 0 | } |
636 | | |
637 | 299k | if (!m_bCached) |
638 | 0 | { |
639 | 0 | poFS->InvalidateCachedData(m_pszURL); |
640 | 0 | poFS->InvalidateDirContent(CPLGetDirnameSafe(m_osFilename.c_str())); |
641 | 0 | } |
642 | 299k | CPLFree(m_pszURL); |
643 | 299k | } |
644 | | |
645 | | /************************************************************************/ |
646 | | /* SetURL() */ |
647 | | /************************************************************************/ |
648 | | |
649 | | void VSICurlHandle::SetURL(const char *pszURLIn) |
650 | 0 | { |
651 | 0 | CPLFree(m_pszURL); |
652 | 0 | m_pszURL = CPLStrdup(pszURLIn); |
653 | 0 | } |
654 | | |
655 | | /************************************************************************/ |
656 | | /* InstallReadCbk() */ |
657 | | /************************************************************************/ |
658 | | |
659 | | int VSICurlHandle::InstallReadCbk(VSICurlReadCbkFunc pfnReadCbkIn, |
660 | | void *pfnUserDataIn, |
661 | | int bStopOnInterruptUntilUninstallIn) |
662 | 0 | { |
663 | 0 | if (pfnReadCbk != nullptr) |
664 | 0 | return FALSE; |
665 | | |
666 | 0 | pfnReadCbk = pfnReadCbkIn; |
667 | 0 | pReadCbkUserData = pfnUserDataIn; |
668 | 0 | bStopOnInterruptUntilUninstall = |
669 | 0 | CPL_TO_BOOL(bStopOnInterruptUntilUninstallIn); |
670 | 0 | bInterrupted = false; |
671 | 0 | return TRUE; |
672 | 0 | } |
673 | | |
674 | | /************************************************************************/ |
675 | | /* UninstallReadCbk() */ |
676 | | /************************************************************************/ |
677 | | |
678 | | int VSICurlHandle::UninstallReadCbk() |
679 | 0 | { |
680 | 0 | if (pfnReadCbk == nullptr) |
681 | 0 | return FALSE; |
682 | | |
683 | 0 | pfnReadCbk = nullptr; |
684 | 0 | pReadCbkUserData = nullptr; |
685 | 0 | bStopOnInterruptUntilUninstall = false; |
686 | 0 | bInterrupted = false; |
687 | 0 | return TRUE; |
688 | 0 | } |
689 | | |
690 | | /************************************************************************/ |
691 | | /* Seek() */ |
692 | | /************************************************************************/ |
693 | | |
694 | | int VSICurlHandle::Seek(vsi_l_offset nOffset, int nWhence) |
695 | 64.8k | { |
696 | 64.8k | if (nWhence == SEEK_SET) |
697 | 36.4k | { |
698 | 36.4k | curOffset = nOffset; |
699 | 36.4k | } |
700 | 28.4k | else if (nWhence == SEEK_CUR) |
701 | 0 | { |
702 | 0 | curOffset = curOffset + nOffset; |
703 | 0 | } |
704 | 28.4k | else |
705 | 28.4k | { |
706 | 28.4k | curOffset = GetFileSize(false) + nOffset; |
707 | 28.4k | } |
708 | 64.8k | bEOF = false; |
709 | 64.8k | return 0; |
710 | 64.8k | } |
711 | | |
712 | | } // namespace cpl |
713 | | |
714 | | /************************************************************************/ |
715 | | /* VSICurlGetTimeStampFromRFC822DateTime() */ |
716 | | /************************************************************************/ |
717 | | |
718 | | static GIntBig VSICurlGetTimeStampFromRFC822DateTime(const char *pszDT) |
719 | 2.45k | { |
720 | | // Sun, 03 Apr 2016 12:07:27 GMT |
721 | 2.45k | if (strlen(pszDT) >= 5 && pszDT[3] == ',' && pszDT[4] == ' ') |
722 | 2.45k | pszDT += 5; |
723 | 2.45k | int nDay = 0; |
724 | 2.45k | int nYear = 0; |
725 | 2.45k | int nHour = 0; |
726 | 2.45k | int nMinute = 0; |
727 | 2.45k | int nSecond = 0; |
728 | 2.45k | char szMonth[4] = {}; |
729 | 2.45k | szMonth[3] = 0; |
730 | 2.45k | if (sscanf(pszDT, "%02d %03s %04d %02d:%02d:%02d GMT", &nDay, szMonth, |
731 | 2.45k | &nYear, &nHour, &nMinute, &nSecond) == 6) |
732 | 2.45k | { |
733 | 2.45k | static const char *const aszMonthStr[] = {"Jan", "Feb", "Mar", "Apr", |
734 | 2.45k | "May", "Jun", "Jul", "Aug", |
735 | 2.45k | "Sep", "Oct", "Nov", "Dec"}; |
736 | | |
737 | 2.45k | int nMonthIdx0 = -1; |
738 | 14.7k | for (int i = 0; i < 12; i++) |
739 | 14.7k | { |
740 | 14.7k | if (EQUAL(szMonth, aszMonthStr[i])) |
741 | 2.45k | { |
742 | 2.45k | nMonthIdx0 = i; |
743 | 2.45k | break; |
744 | 2.45k | } |
745 | 14.7k | } |
746 | 2.45k | if (nMonthIdx0 >= 0) |
747 | 2.45k | { |
748 | 2.45k | struct tm brokendowntime; |
749 | 2.45k | brokendowntime.tm_year = nYear - 1900; |
750 | 2.45k | brokendowntime.tm_mon = nMonthIdx0; |
751 | 2.45k | brokendowntime.tm_mday = nDay; |
752 | 2.45k | brokendowntime.tm_hour = nHour; |
753 | 2.45k | brokendowntime.tm_min = nMinute; |
754 | 2.45k | brokendowntime.tm_sec = nSecond; |
755 | 2.45k | return CPLYMDHMSToUnixTime(&brokendowntime); |
756 | 2.45k | } |
757 | 2.45k | } |
758 | 0 | return 0; |
759 | 2.45k | } |
760 | | |
761 | | /************************************************************************/ |
762 | | /* VSICURLInitWriteFuncStruct() */ |
763 | | /************************************************************************/ |
764 | | |
765 | | void VSICURLInitWriteFuncStruct(cpl::WriteFuncStruct *psStruct, VSILFILE *fp, |
766 | | VSICurlReadCbkFunc pfnReadCbk, |
767 | | void *pReadCbkUserData) |
768 | 516k | { |
769 | 516k | psStruct->pBuffer = nullptr; |
770 | 516k | psStruct->nSize = 0; |
771 | 516k | psStruct->bIsHTTP = false; |
772 | 516k | psStruct->bMultiRange = false; |
773 | 516k | psStruct->nStartOffset = 0; |
774 | 516k | psStruct->nEndOffset = 0; |
775 | 516k | psStruct->nHTTPCode = 0; |
776 | 516k | psStruct->nFirstHTTPCode = 0; |
777 | 516k | psStruct->nContentLength = 0; |
778 | 516k | psStruct->bFoundContentRange = false; |
779 | 516k | psStruct->bError = false; |
780 | 516k | psStruct->bDetectRangeDownloadingError = true; |
781 | 516k | psStruct->nTimestampDate = 0; |
782 | | |
783 | 516k | psStruct->fp = fp; |
784 | 516k | psStruct->pfnReadCbk = pfnReadCbk; |
785 | 516k | psStruct->pReadCbkUserData = pReadCbkUserData; |
786 | 516k | psStruct->bInterrupted = false; |
787 | 516k | } |
788 | | |
789 | | /************************************************************************/ |
790 | | /* VSICurlHandleWriteFunc() */ |
791 | | /************************************************************************/ |
792 | | |
793 | | size_t VSICurlHandleWriteFunc(void *buffer, size_t count, size_t nmemb, |
794 | | void *req) |
795 | 53.3k | { |
796 | 53.3k | cpl::WriteFuncStruct *psStruct = static_cast<cpl::WriteFuncStruct *>(req); |
797 | 53.3k | const size_t nSize = count * nmemb; |
798 | | |
799 | 53.3k | if (psStruct->bInterrupted) |
800 | 50 | { |
801 | 50 | return 0; |
802 | 50 | } |
803 | | |
804 | 53.2k | char *pNewBuffer = static_cast<char *>( |
805 | 53.2k | VSIRealloc(psStruct->pBuffer, psStruct->nSize + nSize + 1)); |
806 | 53.2k | if (pNewBuffer) |
807 | 53.2k | { |
808 | 53.2k | psStruct->pBuffer = pNewBuffer; |
809 | 53.2k | memcpy(psStruct->pBuffer + psStruct->nSize, buffer, nSize); |
810 | 53.2k | psStruct->pBuffer[psStruct->nSize + nSize] = '\0'; |
811 | 53.2k | if (psStruct->bIsHTTP) |
812 | 21.7k | { |
813 | 21.7k | char *pszLine = psStruct->pBuffer + psStruct->nSize; |
814 | 21.7k | if (STARTS_WITH_CI(pszLine, "HTTP/")) |
815 | 2.45k | { |
816 | 2.45k | char *pszSpace = strchr(pszLine, ' '); |
817 | 2.45k | if (pszSpace) |
818 | 2.45k | { |
819 | 2.45k | const int nHTTPCode = atoi(pszSpace + 1); |
820 | 2.45k | if (psStruct->nFirstHTTPCode == 0) |
821 | 2.45k | psStruct->nFirstHTTPCode = nHTTPCode; |
822 | 2.45k | psStruct->nHTTPCode = nHTTPCode; |
823 | 2.45k | } |
824 | 2.45k | } |
825 | 19.3k | else if (STARTS_WITH_CI(pszLine, "Content-Length: ")) |
826 | 2.45k | { |
827 | 2.45k | psStruct->nContentLength = CPLScanUIntBig( |
828 | 2.45k | pszLine + 16, static_cast<int>(strlen(pszLine + 16))); |
829 | 2.45k | } |
830 | 16.8k | else if (STARTS_WITH_CI(pszLine, "Content-Range: ")) |
831 | 0 | { |
832 | 0 | psStruct->bFoundContentRange = true; |
833 | 0 | } |
834 | 16.8k | else if (STARTS_WITH_CI(pszLine, "Date: ")) |
835 | 2.45k | { |
836 | 2.45k | CPLString osDate = pszLine + strlen("Date: "); |
837 | 2.45k | size_t nSizeLine = osDate.size(); |
838 | 7.37k | while (nSizeLine && (osDate[nSizeLine - 1] == '\r' || |
839 | 4.91k | osDate[nSizeLine - 1] == '\n')) |
840 | 4.91k | { |
841 | 4.91k | osDate.resize(nSizeLine - 1); |
842 | 4.91k | nSizeLine--; |
843 | 4.91k | } |
844 | 2.45k | osDate.Trim(); |
845 | | |
846 | 2.45k | GIntBig nTimestampDate = |
847 | 2.45k | VSICurlGetTimeStampFromRFC822DateTime(osDate.c_str()); |
848 | | #if DEBUG_VERBOSE |
849 | | CPLDebug("VSICURL", "Timestamp = " CPL_FRMT_GIB, |
850 | | nTimestampDate); |
851 | | #endif |
852 | 2.45k | psStruct->nTimestampDate = nTimestampDate; |
853 | 2.45k | } |
854 | | /*if( nSize > 2 && pszLine[nSize - 2] == '\r' && |
855 | | pszLine[nSize - 1] == '\n' ) |
856 | | { |
857 | | pszLine[nSize - 2] = 0; |
858 | | CPLDebug("VSICURL", "%s", pszLine); |
859 | | pszLine[nSize - 2] = '\r'; |
860 | | }*/ |
861 | | |
862 | 21.7k | if (pszLine[0] == '\r' && pszLine[1] == '\n') |
863 | 2.45k | { |
864 | | // Detect servers that don't support range downloading. |
865 | 2.45k | if (psStruct->nHTTPCode == 200 && |
866 | 0 | psStruct->bDetectRangeDownloadingError && |
867 | 0 | !psStruct->bMultiRange && !psStruct->bFoundContentRange && |
868 | 0 | (psStruct->nStartOffset != 0 || |
869 | 0 | psStruct->nContentLength > |
870 | 0 | 10 * (psStruct->nEndOffset - psStruct->nStartOffset + |
871 | 0 | 1))) |
872 | 0 | { |
873 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
874 | 0 | "Range downloading not supported by this " |
875 | 0 | "server!"); |
876 | 0 | psStruct->bError = true; |
877 | 0 | return 0; |
878 | 0 | } |
879 | 2.45k | } |
880 | 21.7k | } |
881 | 31.5k | else |
882 | 31.5k | { |
883 | 31.5k | if (psStruct->pfnReadCbk) |
884 | 0 | { |
885 | 0 | if (!psStruct->pfnReadCbk(psStruct->fp, buffer, nSize, |
886 | 0 | psStruct->pReadCbkUserData)) |
887 | 0 | { |
888 | 0 | psStruct->bInterrupted = true; |
889 | 0 | return 0; |
890 | 0 | } |
891 | 0 | } |
892 | 31.5k | } |
893 | 53.2k | psStruct->nSize += nSize; |
894 | 53.2k | return nmemb; |
895 | 53.2k | } |
896 | 0 | else |
897 | 0 | { |
898 | 0 | return 0; |
899 | 0 | } |
900 | 53.2k | } |
901 | | |
902 | | /************************************************************************/ |
903 | | /* VSICurlIsS3LikeSignedURL() */ |
904 | | /************************************************************************/ |
905 | | |
906 | | static bool VSICurlIsS3LikeSignedURL(const char *pszURL) |
907 | 295k | { |
908 | 295k | return ((strstr(pszURL, ".s3.amazonaws.com/") != nullptr || |
909 | 293k | strstr(pszURL, ".s3.amazonaws.com:") != nullptr || |
910 | 272k | strstr(pszURL, ".storage.googleapis.com/") != nullptr || |
911 | 271k | strstr(pszURL, ".storage.googleapis.com:") != nullptr || |
912 | 267k | strstr(pszURL, ".cloudfront.net/") != nullptr || |
913 | 265k | strstr(pszURL, ".cloudfront.net:") != nullptr) && |
914 | 31.5k | (strstr(pszURL, "&Signature=") != nullptr || |
915 | 30.2k | strstr(pszURL, "?Signature=") != nullptr)) || |
916 | 292k | strstr(pszURL, "&X-Amz-Signature=") != nullptr || |
917 | 288k | strstr(pszURL, "?X-Amz-Signature=") != nullptr; |
918 | 295k | } |
919 | | |
920 | | /************************************************************************/ |
921 | | /* VSICurlGetExpiresFromS3LikeSignedURL() */ |
922 | | /************************************************************************/ |
923 | | |
924 | | static GIntBig VSICurlGetExpiresFromS3LikeSignedURL(const char *pszURL) |
925 | 0 | { |
926 | 0 | const auto GetParamValue = [pszURL](const char *pszKey) -> const char * |
927 | 0 | { |
928 | 0 | for (const char *pszPrefix : {"&", "?"}) |
929 | 0 | { |
930 | 0 | std::string osNeedle(pszPrefix); |
931 | 0 | osNeedle += pszKey; |
932 | 0 | osNeedle += '='; |
933 | 0 | const char *pszStr = strstr(pszURL, osNeedle.c_str()); |
934 | 0 | if (pszStr) |
935 | 0 | return pszStr + osNeedle.size(); |
936 | 0 | } |
937 | 0 | return nullptr; |
938 | 0 | }; |
939 | |
|
940 | 0 | { |
941 | | // Expires= is a Unix timestamp |
942 | 0 | const char *pszExpires = GetParamValue("Expires"); |
943 | 0 | if (pszExpires != nullptr) |
944 | 0 | return CPLAtoGIntBig(pszExpires); |
945 | 0 | } |
946 | | |
947 | | // X-Amz-Expires= is a delay, to be combined with X-Amz-Date= |
948 | 0 | const char *pszAmzExpires = GetParamValue("X-Amz-Expires"); |
949 | 0 | if (pszAmzExpires == nullptr) |
950 | 0 | return 0; |
951 | 0 | const int nDelay = atoi(pszAmzExpires); |
952 | |
|
953 | 0 | const char *pszAmzDate = GetParamValue("X-Amz-Date"); |
954 | 0 | if (pszAmzDate == nullptr) |
955 | 0 | return 0; |
956 | | // pszAmzDate should be YYYYMMDDTHHMMSSZ |
957 | 0 | if (strlen(pszAmzDate) < strlen("YYYYMMDDTHHMMSSZ")) |
958 | 0 | return 0; |
959 | 0 | if (pszAmzDate[strlen("YYYYMMDDTHHMMSSZ") - 1] != 'Z') |
960 | 0 | return 0; |
961 | 0 | struct tm brokendowntime; |
962 | 0 | brokendowntime.tm_year = |
963 | 0 | atoi(std::string(pszAmzDate).substr(0, 4).c_str()) - 1900; |
964 | 0 | brokendowntime.tm_mon = |
965 | 0 | atoi(std::string(pszAmzDate).substr(4, 2).c_str()) - 1; |
966 | 0 | brokendowntime.tm_mday = atoi(std::string(pszAmzDate).substr(6, 2).c_str()); |
967 | 0 | brokendowntime.tm_hour = atoi(std::string(pszAmzDate).substr(9, 2).c_str()); |
968 | 0 | brokendowntime.tm_min = atoi(std::string(pszAmzDate).substr(11, 2).c_str()); |
969 | 0 | brokendowntime.tm_sec = atoi(std::string(pszAmzDate).substr(13, 2).c_str()); |
970 | 0 | return CPLYMDHMSToUnixTime(&brokendowntime) + nDelay; |
971 | 0 | } |
972 | | |
973 | | /************************************************************************/ |
974 | | /* VSICURLMultiPerform() */ |
975 | | /************************************************************************/ |
976 | | |
977 | | void VSICURLMultiPerform(CURLM *hCurlMultiHandle, CURL *hEasyHandle, |
978 | | std::atomic<bool> *pbInterrupt) |
979 | 260k | { |
980 | 260k | int repeats = 0; |
981 | | |
982 | 260k | if (hEasyHandle) |
983 | 260k | curl_multi_add_handle(hCurlMultiHandle, hEasyHandle); |
984 | | |
985 | 260k | void *old_handler = CPLHTTPIgnoreSigPipe(); |
986 | 273k | while (true) |
987 | 273k | { |
988 | 273k | int still_running; |
989 | 273k | while (curl_multi_perform(hCurlMultiHandle, &still_running) == |
990 | 273k | CURLM_CALL_MULTI_PERFORM) |
991 | 0 | { |
992 | | // loop |
993 | 0 | } |
994 | 273k | if (!still_running) |
995 | 260k | { |
996 | 260k | break; |
997 | 260k | } |
998 | | |
999 | | #ifdef undef |
1000 | | CURLMsg *msg; |
1001 | | do |
1002 | | { |
1003 | | int msgq = 0; |
1004 | | msg = curl_multi_info_read(hCurlMultiHandle, &msgq); |
1005 | | if (msg && (msg->msg == CURLMSG_DONE)) |
1006 | | { |
1007 | | CURL *e = msg->easy_handle; |
1008 | | } |
1009 | | } while (msg); |
1010 | | #endif |
1011 | | |
1012 | 13.2k | CPLMultiPerformWait(hCurlMultiHandle, repeats); |
1013 | | |
1014 | 13.2k | if (pbInterrupt && *pbInterrupt) |
1015 | 0 | break; |
1016 | 13.2k | } |
1017 | 260k | CPLHTTPRestoreSigPipeHandler(old_handler); |
1018 | | |
1019 | 260k | if (hEasyHandle) |
1020 | 260k | curl_multi_remove_handle(hCurlMultiHandle, hEasyHandle); |
1021 | 260k | } |
1022 | | |
1023 | | /************************************************************************/ |
1024 | | /* VSICurlDummyWriteFunc() */ |
1025 | | /************************************************************************/ |
1026 | | |
1027 | | static size_t VSICurlDummyWriteFunc(void *, size_t, size_t, void *) |
1028 | 0 | { |
1029 | 0 | return 0; |
1030 | 0 | } |
1031 | | |
1032 | | /************************************************************************/ |
1033 | | /* VSICURLResetHeaderAndWriterFunctions() */ |
1034 | | /************************************************************************/ |
1035 | | |
1036 | | void VSICURLResetHeaderAndWriterFunctions(CURL *hCurlHandle) |
1037 | 259k | { |
1038 | 259k | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION, |
1039 | 259k | VSICurlDummyWriteFunc); |
1040 | 259k | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, |
1041 | 259k | VSICurlDummyWriteFunc); |
1042 | 259k | } |
1043 | | |
1044 | | /************************************************************************/ |
1045 | | /* Iso8601ToUnixTime() */ |
1046 | | /************************************************************************/ |
1047 | | |
1048 | | static bool Iso8601ToUnixTime(const char *pszDT, GIntBig *pnUnixTime) |
1049 | 0 | { |
1050 | 0 | int nYear; |
1051 | 0 | int nMonth; |
1052 | 0 | int nDay; |
1053 | 0 | int nHour; |
1054 | 0 | int nMinute; |
1055 | 0 | int nSecond; |
1056 | 0 | if (sscanf(pszDT, "%04d-%02d-%02dT%02d:%02d:%02d", &nYear, &nMonth, &nDay, |
1057 | 0 | &nHour, &nMinute, &nSecond) == 6) |
1058 | 0 | { |
1059 | 0 | struct tm brokendowntime; |
1060 | 0 | brokendowntime.tm_year = nYear - 1900; |
1061 | 0 | brokendowntime.tm_mon = nMonth - 1; |
1062 | 0 | brokendowntime.tm_mday = nDay; |
1063 | 0 | brokendowntime.tm_hour = nHour; |
1064 | 0 | brokendowntime.tm_min = nMinute; |
1065 | 0 | brokendowntime.tm_sec = nSecond; |
1066 | 0 | *pnUnixTime = CPLYMDHMSToUnixTime(&brokendowntime); |
1067 | 0 | return true; |
1068 | 0 | } |
1069 | 0 | return false; |
1070 | 0 | } |
1071 | | |
1072 | | namespace cpl |
1073 | | { |
1074 | | |
1075 | | /************************************************************************/ |
1076 | | /* ManagePlanetaryComputerSigning() */ |
1077 | | /************************************************************************/ |
1078 | | |
1079 | | void VSICurlHandle::ManagePlanetaryComputerSigning() const |
1080 | 9.99k | { |
1081 | | // Take global lock |
1082 | 9.99k | static std::mutex goMutex; |
1083 | 9.99k | std::lock_guard<std::mutex> oLock(goMutex); |
1084 | | |
1085 | 9.99k | struct PCSigningInfo |
1086 | 9.99k | { |
1087 | 9.99k | std::string osQueryString{}; |
1088 | 9.99k | GIntBig nExpireTimestamp = 0; |
1089 | 9.99k | }; |
1090 | | |
1091 | 9.99k | PCSigningInfo sSigningInfo; |
1092 | 9.99k | constexpr int knExpirationDelayMargin = 60; |
1093 | | |
1094 | 9.99k | if (!m_osPlanetaryComputerCollection.empty()) |
1095 | 1.75k | { |
1096 | | // key is the name of a collection |
1097 | 1.75k | static lru11::Cache<std::string, PCSigningInfo> goCacheCollection{1024}; |
1098 | | |
1099 | 1.75k | if (goCacheCollection.tryGet(m_osPlanetaryComputerCollection, |
1100 | 1.75k | sSigningInfo) && |
1101 | 0 | time(nullptr) + knExpirationDelayMargin <= |
1102 | 0 | sSigningInfo.nExpireTimestamp) |
1103 | 0 | { |
1104 | 0 | m_osQueryString = sSigningInfo.osQueryString; |
1105 | 0 | } |
1106 | 1.75k | else |
1107 | 1.75k | { |
1108 | 1.75k | const auto psResult = |
1109 | 1.75k | CPLHTTPFetch((std::string(CPLGetConfigOption( |
1110 | 1.75k | "VSICURL_PC_SAS_TOKEN_URL", |
1111 | 1.75k | "https://planetarycomputer.microsoft.com/api/" |
1112 | 1.75k | "sas/v1/token/")) + |
1113 | 1.75k | m_osPlanetaryComputerCollection) |
1114 | 1.75k | .c_str(), |
1115 | 1.75k | nullptr); |
1116 | 1.75k | if (psResult) |
1117 | 1.75k | { |
1118 | 1.75k | const auto aosKeyVals = CPLParseKeyValueJson( |
1119 | 1.75k | reinterpret_cast<const char *>(psResult->pabyData)); |
1120 | 1.75k | const char *pszToken = aosKeyVals.FetchNameValue("token"); |
1121 | 1.75k | if (pszToken) |
1122 | 0 | { |
1123 | 0 | m_osQueryString = '?'; |
1124 | 0 | m_osQueryString += pszToken; |
1125 | |
|
1126 | 0 | sSigningInfo.osQueryString = m_osQueryString; |
1127 | 0 | sSigningInfo.nExpireTimestamp = 0; |
1128 | 0 | const char *pszExpiry = |
1129 | 0 | aosKeyVals.FetchNameValue("msft:expiry"); |
1130 | 0 | if (pszExpiry) |
1131 | 0 | { |
1132 | 0 | Iso8601ToUnixTime(pszExpiry, |
1133 | 0 | &sSigningInfo.nExpireTimestamp); |
1134 | 0 | } |
1135 | 0 | goCacheCollection.insert(m_osPlanetaryComputerCollection, |
1136 | 0 | sSigningInfo); |
1137 | |
|
1138 | 0 | CPLDebug("VSICURL", "Got token from Planetary Computer: %s", |
1139 | 0 | m_osQueryString.c_str()); |
1140 | 0 | } |
1141 | 1.75k | CPLHTTPDestroyResult(psResult); |
1142 | 1.75k | } |
1143 | 1.75k | } |
1144 | 1.75k | } |
1145 | 8.23k | else |
1146 | 8.23k | { |
1147 | | // key is a URL |
1148 | 8.23k | static lru11::Cache<std::string, PCSigningInfo> goCacheURL{1024}; |
1149 | | |
1150 | 8.23k | if (goCacheURL.tryGet(m_pszURL, sSigningInfo) && |
1151 | 0 | time(nullptr) + knExpirationDelayMargin <= |
1152 | 0 | sSigningInfo.nExpireTimestamp) |
1153 | 0 | { |
1154 | 0 | m_osQueryString = sSigningInfo.osQueryString; |
1155 | 0 | } |
1156 | 8.23k | else |
1157 | 8.23k | { |
1158 | 8.23k | const auto psResult = |
1159 | 8.23k | CPLHTTPFetch((std::string(CPLGetConfigOption( |
1160 | 8.23k | "VSICURL_PC_SAS_SIGN_HREF_URL", |
1161 | 8.23k | "https://planetarycomputer.microsoft.com/api/" |
1162 | 8.23k | "sas/v1/sign?href=")) + |
1163 | 8.23k | m_pszURL) |
1164 | 8.23k | .c_str(), |
1165 | 8.23k | nullptr); |
1166 | 8.23k | if (psResult) |
1167 | 8.23k | { |
1168 | 8.23k | const auto aosKeyVals = CPLParseKeyValueJson( |
1169 | 8.23k | reinterpret_cast<const char *>(psResult->pabyData)); |
1170 | 8.23k | const char *pszHref = aosKeyVals.FetchNameValue("href"); |
1171 | 8.23k | if (pszHref && STARTS_WITH(pszHref, m_pszURL)) |
1172 | 0 | { |
1173 | 0 | m_osQueryString = pszHref + strlen(m_pszURL); |
1174 | |
|
1175 | 0 | sSigningInfo.osQueryString = m_osQueryString; |
1176 | 0 | sSigningInfo.nExpireTimestamp = 0; |
1177 | 0 | const char *pszExpiry = |
1178 | 0 | aosKeyVals.FetchNameValue("msft:expiry"); |
1179 | 0 | if (pszExpiry) |
1180 | 0 | { |
1181 | 0 | Iso8601ToUnixTime(pszExpiry, |
1182 | 0 | &sSigningInfo.nExpireTimestamp); |
1183 | 0 | } |
1184 | 0 | goCacheURL.insert(m_pszURL, sSigningInfo); |
1185 | |
|
1186 | 0 | CPLDebug("VSICURL", |
1187 | 0 | "Got signature from Planetary Computer: %s", |
1188 | 0 | m_osQueryString.c_str()); |
1189 | 0 | } |
1190 | 8.23k | CPLHTTPDestroyResult(psResult); |
1191 | 8.23k | } |
1192 | 8.23k | } |
1193 | 8.23k | } |
1194 | 9.99k | } |
1195 | | |
1196 | | /************************************************************************/ |
1197 | | /* UpdateQueryString() */ |
1198 | | /************************************************************************/ |
1199 | | |
1200 | | void VSICurlHandle::UpdateQueryString() const |
1201 | 146k | { |
1202 | 146k | if (m_bPlanetaryComputerURLSigning) |
1203 | 9.99k | { |
1204 | 9.99k | ManagePlanetaryComputerSigning(); |
1205 | 9.99k | } |
1206 | 136k | else |
1207 | 136k | { |
1208 | 136k | const char *pszQueryString = VSIGetPathSpecificOption( |
1209 | 136k | m_osFilename.c_str(), "VSICURL_QUERY_STRING", nullptr); |
1210 | 136k | if (pszQueryString) |
1211 | 0 | { |
1212 | 0 | if (m_osFilename.back() == '?') |
1213 | 0 | { |
1214 | 0 | if (pszQueryString[0] == '?') |
1215 | 0 | m_osQueryString = pszQueryString + 1; |
1216 | 0 | else |
1217 | 0 | m_osQueryString = pszQueryString; |
1218 | 0 | } |
1219 | 0 | else |
1220 | 0 | { |
1221 | 0 | if (pszQueryString[0] == '?') |
1222 | 0 | m_osQueryString = pszQueryString; |
1223 | 0 | else |
1224 | 0 | { |
1225 | 0 | m_osQueryString = "?"; |
1226 | 0 | m_osQueryString.append(pszQueryString); |
1227 | 0 | } |
1228 | 0 | } |
1229 | 0 | } |
1230 | 136k | } |
1231 | 146k | } |
1232 | | |
1233 | | /************************************************************************/ |
1234 | | /* GetFileSizeOrHeaders() */ |
1235 | | /************************************************************************/ |
1236 | | |
1237 | | vsi_l_offset VSICurlHandle::GetFileSizeOrHeaders(bool bSetError, |
1238 | | bool bGetHeaders) |
1239 | 259k | { |
1240 | 259k | if (oFileProp.bHasComputedFileSize && !bGetHeaders) |
1241 | 123k | return oFileProp.fileSize; |
1242 | | |
1243 | 136k | NetworkStatisticsFileSystem oContextFS(poFS->GetFSPrefix().c_str()); |
1244 | 136k | NetworkStatisticsFile oContextFile(m_osFilename.c_str()); |
1245 | 136k | NetworkStatisticsAction oContextAction("GetFileSize"); |
1246 | | |
1247 | 136k | oFileProp.bHasComputedFileSize = true; |
1248 | | |
1249 | 136k | CURLM *hCurlMultiHandle = poFS->GetCurlMultiHandleFor(m_pszURL); |
1250 | | |
1251 | 136k | UpdateQueryString(); |
1252 | | |
1253 | 136k | std::string osURL(m_pszURL + m_osQueryString); |
1254 | 136k | int nTryCount = 0; |
1255 | 136k | bool bRetryWithGet = false; |
1256 | 136k | bool bRetryWithLimitedRangeGet = false; |
1257 | 136k | bool bS3LikeRedirect = false; |
1258 | 136k | CPLHTTPRetryContext oRetryContext(m_oRetryParameters); |
1259 | | |
1260 | 242k | retry: |
1261 | 242k | ++nTryCount; |
1262 | 242k | CURL *hCurlHandle = curl_easy_init(); |
1263 | | |
1264 | 242k | struct curl_slist *headers = nullptr; |
1265 | 242k | if (bS3LikeRedirect) |
1266 | 504 | { |
1267 | | // Do not propagate authentication sent to the original URL to a S3-like |
1268 | | // redirect. |
1269 | 504 | CPLStringList aosHTTPOptions{}; |
1270 | 504 | for (const auto &pszOption : m_aosHTTPOptions) |
1271 | 1.00k | { |
1272 | 1.00k | if (STARTS_WITH_CI(pszOption, "HTTPAUTH") || |
1273 | 1.00k | STARTS_WITH_CI(pszOption, "HTTP_BEARER")) |
1274 | 0 | continue; |
1275 | 1.00k | aosHTTPOptions.AddString(pszOption); |
1276 | 1.00k | } |
1277 | 504 | headers = VSICurlSetOptions(hCurlHandle, osURL.c_str(), |
1278 | 504 | aosHTTPOptions.List()); |
1279 | 504 | } |
1280 | 242k | else |
1281 | 242k | { |
1282 | 242k | headers = VSICurlSetOptions(hCurlHandle, osURL.c_str(), |
1283 | 242k | m_aosHTTPOptions.List()); |
1284 | 242k | } |
1285 | | |
1286 | 242k | WriteFuncStruct sWriteFuncHeaderData; |
1287 | 242k | VSICURLInitWriteFuncStruct(&sWriteFuncHeaderData, nullptr, nullptr, |
1288 | 242k | nullptr); |
1289 | 242k | sWriteFuncHeaderData.bDetectRangeDownloadingError = false; |
1290 | 242k | sWriteFuncHeaderData.bIsHTTP = STARTS_WITH(osURL.c_str(), "http"); |
1291 | | |
1292 | 242k | WriteFuncStruct sWriteFuncData; |
1293 | 242k | VSICURLInitWriteFuncStruct(&sWriteFuncData, nullptr, nullptr, nullptr); |
1294 | | |
1295 | 242k | std::string osVerb; |
1296 | 242k | std::string osRange; // leave in this scope ! |
1297 | 242k | int nRoundedBufSize = 0; |
1298 | 242k | const int knDOWNLOAD_CHUNK_SIZE = VSICURLGetDownloadChunkSize(); |
1299 | 242k | bool bHasUsedLimitedRangeGet = false; |
1300 | 242k | if (bRetryWithLimitedRangeGet || UseLimitRangeGetInsteadOfHead()) |
1301 | 2.24k | { |
1302 | 2.24k | bHasUsedLimitedRangeGet = true; |
1303 | 2.24k | osVerb = "GET"; |
1304 | 2.24k | const int nBufSize = std::clamp( |
1305 | 2.24k | atoi(CPLGetConfigOption("GDAL_INGESTED_BYTES_AT_OPEN", "1024")), |
1306 | 2.24k | 1024, 10 * 1024 * 1024); |
1307 | 2.24k | nRoundedBufSize = cpl::div_round_up(nBufSize, knDOWNLOAD_CHUNK_SIZE) * |
1308 | 2.24k | knDOWNLOAD_CHUNK_SIZE; |
1309 | | |
1310 | | // so it gets included in Azure signature |
1311 | 2.24k | osRange = CPLSPrintf("Range: bytes=0-%d", nRoundedBufSize - 1); |
1312 | 2.24k | headers = curl_slist_append(headers, osRange.c_str()); |
1313 | 2.24k | } |
1314 | | // HACK for mbtiles driver: http://a.tiles.mapbox.com/v3/ doesn't accept |
1315 | | // HEAD, as it is a redirect to AWS S3 signed URL, but those are only valid |
1316 | | // for a given type of HTTP request, and thus GET. This is valid for any |
1317 | | // signed URL for AWS S3. |
1318 | 240k | else if (bRetryWithGet || |
1319 | 240k | strstr(osURL.c_str(), ".tiles.mapbox.com/") != nullptr || |
1320 | 237k | VSICurlIsS3LikeSignedURL(osURL.c_str()) || !m_bUseHead) |
1321 | 13.1k | { |
1322 | 13.1k | sWriteFuncData.bInterrupted = true; |
1323 | 13.1k | osVerb = "GET"; |
1324 | 13.1k | } |
1325 | 227k | else |
1326 | 227k | { |
1327 | 227k | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_NOBODY, 1); |
1328 | 227k | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HTTPGET, 0); |
1329 | 227k | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADER, 1); |
1330 | 227k | osVerb = "HEAD"; |
1331 | 227k | } |
1332 | | |
1333 | 242k | bRetryWithLimitedRangeGet = false; |
1334 | | |
1335 | 242k | if (!AllowAutomaticRedirection()) |
1336 | 0 | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_FOLLOWLOCATION, 0); |
1337 | | |
1338 | 242k | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA, |
1339 | 242k | &sWriteFuncHeaderData); |
1340 | 242k | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION, |
1341 | 242k | VSICurlHandleWriteFunc); |
1342 | | |
1343 | | // Bug with older curl versions (<=7.16.4) and FTP. |
1344 | | // See http://curl.haxx.se/mail/lib-2007-08/0312.html |
1345 | 242k | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData); |
1346 | 242k | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, |
1347 | 242k | VSICurlHandleWriteFunc); |
1348 | | |
1349 | 242k | char szCurlErrBuf[CURL_ERROR_SIZE + 1] = {}; |
1350 | 242k | szCurlErrBuf[0] = '\0'; |
1351 | 242k | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, szCurlErrBuf); |
1352 | | |
1353 | 242k | headers = GetCurlHeaders(osVerb, headers); |
1354 | 242k | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HTTPHEADER, headers); |
1355 | | |
1356 | 242k | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_FILETIME, 1); |
1357 | | |
1358 | 242k | VSICURLMultiPerform(hCurlMultiHandle, hCurlHandle, &m_bInterrupt); |
1359 | | |
1360 | 242k | VSICURLResetHeaderAndWriterFunctions(hCurlHandle); |
1361 | | |
1362 | 242k | curl_slist_free_all(headers); |
1363 | | |
1364 | 242k | oFileProp.eExists = EXIST_UNKNOWN; |
1365 | | |
1366 | 242k | curl_off_t filetime = -1; |
1367 | 242k | GIntBig mtime = 0; |
1368 | 242k | if (curl_easy_getinfo(hCurlHandle, CURLINFO_FILETIME_T, &filetime) == |
1369 | 242k | CURLE_OK && |
1370 | 242k | filetime != -1) |
1371 | 225 | { |
1372 | 225 | mtime = static_cast<GIntBig>(filetime); |
1373 | 225 | } |
1374 | | |
1375 | 242k | if (osVerb == "GET") |
1376 | 15.3k | NetworkStatisticsLogger::LogGET(sWriteFuncData.nSize); |
1377 | 227k | else |
1378 | 227k | NetworkStatisticsLogger::LogHEAD(); |
1379 | | |
1380 | 242k | if (STARTS_WITH(osURL.c_str(), "ftp")) |
1381 | 24.7k | { |
1382 | 24.7k | if (sWriteFuncData.pBuffer != nullptr) |
1383 | 0 | { |
1384 | 0 | const char *pszContentLength = |
1385 | 0 | strstr(const_cast<const char *>(sWriteFuncData.pBuffer), |
1386 | 0 | "Content-Length: "); |
1387 | 0 | if (pszContentLength) |
1388 | 0 | { |
1389 | 0 | pszContentLength += strlen("Content-Length: "); |
1390 | 0 | oFileProp.eExists = EXIST_YES; |
1391 | 0 | oFileProp.fileSize = |
1392 | 0 | CPLScanUIntBig(pszContentLength, |
1393 | 0 | static_cast<int>(strlen(pszContentLength))); |
1394 | | if constexpr (ENABLE_DEBUG) |
1395 | 0 | { |
1396 | 0 | CPLDebug(poFS->GetDebugKey(), |
1397 | 0 | "GetFileSize(%s)=" CPL_FRMT_GUIB, osURL.c_str(), |
1398 | 0 | oFileProp.fileSize); |
1399 | 0 | } |
1400 | 0 | } |
1401 | 0 | } |
1402 | 24.7k | } |
1403 | | |
1404 | 242k | double dfSize = 0; |
1405 | 242k | long response_code = -1; |
1406 | 242k | if (oFileProp.eExists != EXIST_YES) |
1407 | 242k | { |
1408 | 242k | curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE, &response_code); |
1409 | | |
1410 | 242k | bool bAlreadyLogged = false; |
1411 | 242k | if (response_code >= 400 && szCurlErrBuf[0] == '\0') |
1412 | 2.17k | { |
1413 | 2.17k | const bool bLogResponse = |
1414 | 2.17k | CPLTestBool(CPLGetConfigOption("CPL_CURL_VERBOSE", "NO")); |
1415 | 2.17k | if (bLogResponse && sWriteFuncData.pBuffer) |
1416 | 0 | { |
1417 | 0 | const char *pszErrorMsg = |
1418 | 0 | static_cast<const char *>(sWriteFuncData.pBuffer); |
1419 | 0 | bAlreadyLogged = true; |
1420 | 0 | CPLDebug( |
1421 | 0 | poFS->GetDebugKey(), |
1422 | 0 | "GetFileSize(%s): response_code=%d, server error msg=%s", |
1423 | 0 | osURL.c_str(), static_cast<int>(response_code), |
1424 | 0 | pszErrorMsg[0] ? pszErrorMsg : "(no message provided)"); |
1425 | 0 | } |
1426 | 2.17k | } |
1427 | 240k | else if (szCurlErrBuf[0] != '\0') |
1428 | 240k | { |
1429 | 240k | bAlreadyLogged = true; |
1430 | 240k | CPLDebug(poFS->GetDebugKey(), |
1431 | 240k | "GetFileSize(%s): response_code=%d, curl error msg=%s", |
1432 | 240k | osURL.c_str(), static_cast<int>(response_code), |
1433 | 240k | szCurlErrBuf); |
1434 | 240k | } |
1435 | | |
1436 | 242k | std::string osEffectiveURL; |
1437 | 242k | { |
1438 | 242k | char *pszEffectiveURL = nullptr; |
1439 | 242k | curl_easy_getinfo(hCurlHandle, CURLINFO_EFFECTIVE_URL, |
1440 | 242k | &pszEffectiveURL); |
1441 | 242k | if (pszEffectiveURL) |
1442 | 242k | osEffectiveURL = pszEffectiveURL; |
1443 | 242k | } |
1444 | | |
1445 | 242k | if (!osEffectiveURL.empty() && |
1446 | 242k | strstr(osEffectiveURL.c_str(), osURL.c_str()) == nullptr) |
1447 | 56.1k | { |
1448 | | // Moved permanently ? |
1449 | 56.1k | if (sWriteFuncHeaderData.nFirstHTTPCode == 301 || |
1450 | 56.1k | (m_bUseRedirectURLIfNoQueryStringParams && |
1451 | 2.03k | osEffectiveURL.find('?') == std::string::npos)) |
1452 | 109 | { |
1453 | 109 | CPLDebug(poFS->GetDebugKey(), |
1454 | 109 | "Using effective URL %s permanently", |
1455 | 109 | osEffectiveURL.c_str()); |
1456 | 109 | oFileProp.osRedirectURL = osEffectiveURL; |
1457 | 109 | poFS->SetCachedFileProp(m_pszURL, oFileProp); |
1458 | 109 | } |
1459 | 56.0k | else |
1460 | 56.0k | { |
1461 | 56.0k | CPLDebug(poFS->GetDebugKey(), |
1462 | 56.0k | "Using effective URL %s temporarily", |
1463 | 56.0k | osEffectiveURL.c_str()); |
1464 | 56.0k | } |
1465 | | |
1466 | | // Is this is a redirect to a S3 URL? |
1467 | 56.1k | if (VSICurlIsS3LikeSignedURL(osEffectiveURL.c_str()) && |
1468 | 2.15k | !VSICurlIsS3LikeSignedURL(osURL.c_str())) |
1469 | 589 | { |
1470 | | // Note that this is a redirect as we won't notice after the |
1471 | | // retry. |
1472 | 589 | bS3LikeRedirect = true; |
1473 | | |
1474 | 589 | if (!bRetryWithGet && osVerb == "HEAD" && response_code == 403) |
1475 | 0 | { |
1476 | 0 | CPLDebug(poFS->GetDebugKey(), |
1477 | 0 | "Redirected to a AWS S3 signed URL. Retrying " |
1478 | 0 | "with GET request instead of HEAD since the URL " |
1479 | 0 | "might be valid only for GET"); |
1480 | 0 | bRetryWithGet = true; |
1481 | 0 | osURL = std::move(osEffectiveURL); |
1482 | 0 | CPLFree(sWriteFuncData.pBuffer); |
1483 | 0 | CPLFree(sWriteFuncHeaderData.pBuffer); |
1484 | 0 | curl_easy_cleanup(hCurlHandle); |
1485 | 0 | goto retry; |
1486 | 0 | } |
1487 | 589 | } |
1488 | 55.5k | else if (oFileProp.osRedirectURL.empty() && nTryCount == 1 && |
1489 | 26.4k | ((response_code >= 300 && response_code < 400) || |
1490 | 26.4k | (osVerb == "HEAD" && response_code == 403))) |
1491 | 1 | { |
1492 | 1 | if (response_code == 403) |
1493 | 1 | { |
1494 | 1 | CPLDebug( |
1495 | 1 | poFS->GetDebugKey(), |
1496 | 1 | "Retrying redirected URL with GET instead of HEAD"); |
1497 | 1 | bRetryWithGet = true; |
1498 | 1 | } |
1499 | 1 | osURL = std::move(osEffectiveURL); |
1500 | 1 | CPLFree(sWriteFuncData.pBuffer); |
1501 | 1 | CPLFree(sWriteFuncHeaderData.pBuffer); |
1502 | 1 | curl_easy_cleanup(hCurlHandle); |
1503 | 1 | goto retry; |
1504 | 1 | } |
1505 | 56.1k | } |
1506 | | |
1507 | 242k | if (bS3LikeRedirect && response_code >= 200 && response_code < 300 && |
1508 | 0 | sWriteFuncHeaderData.nTimestampDate > 0 && |
1509 | 0 | !osEffectiveURL.empty() && |
1510 | 0 | CPLTestBool( |
1511 | 0 | CPLGetConfigOption("CPL_VSIL_CURL_USE_S3_REDIRECT", "TRUE"))) |
1512 | 0 | { |
1513 | 0 | const GIntBig nExpireTimestamp = |
1514 | 0 | VSICurlGetExpiresFromS3LikeSignedURL(osEffectiveURL.c_str()); |
1515 | 0 | if (nExpireTimestamp > sWriteFuncHeaderData.nTimestampDate + 10) |
1516 | 0 | { |
1517 | 0 | const int nValidity = static_cast<int>( |
1518 | 0 | nExpireTimestamp - sWriteFuncHeaderData.nTimestampDate); |
1519 | 0 | CPLDebug(poFS->GetDebugKey(), |
1520 | 0 | "Will use redirect URL for the next %d seconds", |
1521 | 0 | nValidity); |
1522 | | // As our local clock might not be in sync with server clock, |
1523 | | // figure out the expiration timestamp in local time |
1524 | 0 | oFileProp.bS3LikeRedirect = true; |
1525 | 0 | oFileProp.nExpireTimestampLocal = time(nullptr) + nValidity; |
1526 | 0 | oFileProp.osRedirectURL = osEffectiveURL; |
1527 | 0 | poFS->SetCachedFileProp(m_pszURL, oFileProp); |
1528 | 0 | } |
1529 | 0 | } |
1530 | | |
1531 | | // Split a string with the raw HTTP response headers as a key/value |
1532 | | // CPLStringList |
1533 | 242k | const auto TokenizeHeaders = [](const char *pszHeaders) -> CPLStringList |
1534 | 242k | { |
1535 | 240k | CPLStringList aosHeaders; |
1536 | 240k | while (pszHeaders) |
1537 | 262 | { |
1538 | 262 | const char *pszDelim = strchr(pszHeaders, ':'); |
1539 | 262 | if (!pszDelim) |
1540 | 131 | break; |
1541 | 131 | const char *pszValue = pszDelim + 1; |
1542 | | |
1543 | | // Skip whitespace after colon |
1544 | 262 | while (*pszValue == ' ' || *pszValue == '\t') |
1545 | 131 | ++pszValue; |
1546 | | |
1547 | | // Find end of value |
1548 | 131 | const char *pszEndOfValue = pszValue; |
1549 | 3.93k | while (*pszEndOfValue && |
1550 | 3.93k | !(*pszEndOfValue == '\r' && pszEndOfValue[1] == '\n')) |
1551 | 3.79k | ++pszEndOfValue; |
1552 | | |
1553 | 131 | aosHeaders.SetNameValue( |
1554 | 131 | std::string(pszHeaders, pszDelim - pszHeaders).c_str(), |
1555 | 131 | std::string(pszValue, pszEndOfValue - pszValue).c_str()); |
1556 | | |
1557 | 131 | if (*pszEndOfValue == '\r' && pszEndOfValue[1] == '\n') |
1558 | 131 | pszHeaders = pszEndOfValue + 2; |
1559 | 0 | else |
1560 | 0 | break; |
1561 | 131 | } |
1562 | 240k | return aosHeaders; |
1563 | 240k | }; |
1564 | | |
1565 | 242k | if (response_code < 300) |
1566 | 240k | { |
1567 | 240k | curl_off_t nSizeTmp = 0; |
1568 | 240k | const CURLcode code = curl_easy_getinfo( |
1569 | 240k | hCurlHandle, CURLINFO_CONTENT_LENGTH_DOWNLOAD_T, &nSizeTmp); |
1570 | 240k | CPL_IGNORE_RET_VAL(dfSize); |
1571 | 240k | dfSize = static_cast<double>(nSizeTmp); |
1572 | 240k | if (code == 0) |
1573 | 240k | { |
1574 | 240k | if (dfSize < 0) |
1575 | 240k | { |
1576 | 240k | if (osVerb == "HEAD" && !bRetryWithGet && |
1577 | 227k | response_code == 200) |
1578 | 0 | { |
1579 | 0 | if (sWriteFuncHeaderData.pBuffer) |
1580 | 0 | { |
1581 | 0 | const CPLStringList aosHeaders( |
1582 | 0 | TokenizeHeaders(sWriteFuncHeaderData.pBuffer)); |
1583 | 0 | if (strcmp(aosHeaders.FetchNameValueDef( |
1584 | 0 | "accept-ranges", ""), |
1585 | 0 | "bytes") == 0) |
1586 | 0 | { |
1587 | 0 | CPLDebug( |
1588 | 0 | poFS->GetDebugKey(), |
1589 | 0 | "HEAD did not provide file size. Retrying " |
1590 | 0 | "with limited range GET"); |
1591 | 0 | bRetryWithLimitedRangeGet = true; |
1592 | 0 | CPLFree(sWriteFuncData.pBuffer); |
1593 | 0 | CPLFree(sWriteFuncHeaderData.pBuffer); |
1594 | 0 | curl_easy_cleanup(hCurlHandle); |
1595 | 0 | goto retry; |
1596 | 0 | } |
1597 | 0 | } |
1598 | | |
1599 | 0 | CPLDebug(poFS->GetDebugKey(), |
1600 | 0 | "HEAD did not provide file size. Retrying " |
1601 | 0 | "with GET"); |
1602 | 0 | bRetryWithGet = true; |
1603 | 0 | CPLFree(sWriteFuncData.pBuffer); |
1604 | 0 | CPLFree(sWriteFuncHeaderData.pBuffer); |
1605 | 0 | curl_easy_cleanup(hCurlHandle); |
1606 | 0 | goto retry; |
1607 | 0 | } |
1608 | | |
1609 | 240k | if (poFS->GetFSPrefix() == "/vsicurl/" || |
1610 | 65 | poFS->GetFSPrefix() == "/vsicurl?") |
1611 | 240k | { |
1612 | 240k | const CPLStringList aosHeaders( |
1613 | 240k | TokenizeHeaders(sWriteFuncHeaderData.pBuffer)); |
1614 | 240k | if (strcmp(aosHeaders.FetchNameValueDef( |
1615 | 240k | "transfer-encoding", ""), |
1616 | 240k | "chunked") == 0) |
1617 | 0 | { |
1618 | 0 | CPLError( |
1619 | 0 | CE_Failure, CPLE_AppDefined, |
1620 | 0 | "Server does not seem to support range " |
1621 | 0 | "requests. " |
1622 | 0 | "Maybe retry with /vsicurl_streaming/ if the " |
1623 | 0 | "read " |
1624 | 0 | "access pattern is compatible with sequential " |
1625 | 0 | "reading, or download the file entirely"); |
1626 | 0 | } |
1627 | 240k | } |
1628 | 240k | } |
1629 | 94 | else |
1630 | 94 | { |
1631 | 94 | oFileProp.eExists = EXIST_YES; |
1632 | 94 | oFileProp.fileSize = static_cast<GUIntBig>(dfSize); |
1633 | 94 | } |
1634 | 240k | } |
1635 | 240k | } |
1636 | | |
1637 | 242k | if (sWriteFuncHeaderData.pBuffer != nullptr && |
1638 | 2.40k | (response_code == 200 || response_code == 206)) |
1639 | 0 | { |
1640 | 0 | { |
1641 | 0 | const CPLStringList aosHeaders( |
1642 | 0 | TokenizeHeaders(sWriteFuncHeaderData.pBuffer)); |
1643 | 0 | for (const auto &[pszKey, pszValue] : |
1644 | 0 | cpl::IterateNameValue(aosHeaders)) |
1645 | 0 | { |
1646 | 0 | if (bGetHeaders) |
1647 | 0 | { |
1648 | 0 | m_aosHeaders.SetNameValue(pszKey, pszValue); |
1649 | 0 | } |
1650 | 0 | if (EQUAL(pszKey, "Cache-Control") && |
1651 | 0 | EQUAL(pszValue, "no-cache") && |
1652 | 0 | CPLTestBool(CPLGetConfigOption( |
1653 | 0 | "CPL_VSIL_CURL_HONOR_CACHE_CONTROL", "YES"))) |
1654 | 0 | { |
1655 | 0 | m_bCached = false; |
1656 | 0 | } |
1657 | | |
1658 | 0 | else if (EQUAL(pszKey, "ETag")) |
1659 | 0 | { |
1660 | 0 | std::string osValue(pszValue); |
1661 | 0 | if (osValue.size() >= 2 && osValue.front() == '"' && |
1662 | 0 | osValue.back() == '"') |
1663 | 0 | osValue = osValue.substr(1, osValue.size() - 2); |
1664 | 0 | oFileProp.ETag = std::move(osValue); |
1665 | 0 | } |
1666 | | |
1667 | | // Azure Data Lake Storage |
1668 | 0 | else if (EQUAL(pszKey, "x-ms-resource-type")) |
1669 | 0 | { |
1670 | 0 | if (EQUAL(pszValue, "file")) |
1671 | 0 | { |
1672 | 0 | oFileProp.nMode |= S_IFREG; |
1673 | 0 | } |
1674 | 0 | else if (EQUAL(pszValue, "directory")) |
1675 | 0 | { |
1676 | 0 | oFileProp.bIsDirectory = true; |
1677 | 0 | oFileProp.nMode |= S_IFDIR; |
1678 | 0 | } |
1679 | 0 | } |
1680 | 0 | else if (EQUAL(pszKey, "x-ms-permissions")) |
1681 | 0 | { |
1682 | 0 | oFileProp.nMode |= |
1683 | 0 | VSICurlParseUnixPermissions(pszValue); |
1684 | 0 | } |
1685 | | |
1686 | | // https://overturemapswestus2.blob.core.windows.net/release/2024-11-13.0/theme%3Ddivisions/type%3Ddivision_area |
1687 | | // returns a x-ms-meta-hdi_isfolder: true header |
1688 | 0 | else if (EQUAL(pszKey, "x-ms-meta-hdi_isfolder") && |
1689 | 0 | EQUAL(pszValue, "true")) |
1690 | 0 | { |
1691 | 0 | oFileProp.bIsAzureFolder = true; |
1692 | 0 | oFileProp.bIsDirectory = true; |
1693 | 0 | oFileProp.nMode |= S_IFDIR; |
1694 | 0 | } |
1695 | 0 | } |
1696 | 0 | } |
1697 | 0 | } |
1698 | | |
1699 | 242k | if (bHasUsedLimitedRangeGet && response_code == 206) |
1700 | 0 | { |
1701 | 0 | oFileProp.eExists = EXIST_NO; |
1702 | 0 | oFileProp.fileSize = 0; |
1703 | 0 | if (sWriteFuncHeaderData.pBuffer != nullptr) |
1704 | 0 | { |
1705 | 0 | const CPLStringList aosHeaders( |
1706 | 0 | TokenizeHeaders(sWriteFuncHeaderData.pBuffer)); |
1707 | 0 | const char *pszContentRange = |
1708 | 0 | aosHeaders.FetchNameValue("content-range"); |
1709 | | // Trailing space in string intended |
1710 | 0 | if (pszContentRange && |
1711 | 0 | STARTS_WITH_CI(pszContentRange, "bytes ")) |
1712 | 0 | { |
1713 | 0 | pszContentRange += strlen("bytes "); |
1714 | 0 | pszContentRange = strchr(pszContentRange, '/'); |
1715 | 0 | if (pszContentRange) |
1716 | 0 | { |
1717 | 0 | oFileProp.eExists = EXIST_YES; |
1718 | 0 | oFileProp.fileSize = static_cast<GUIntBig>( |
1719 | 0 | CPLAtoGIntBig(pszContentRange + 1)); |
1720 | 0 | } |
1721 | 0 | } |
1722 | | |
1723 | | // Add first bytes to cache |
1724 | 0 | if (sWriteFuncData.pBuffer != nullptr) |
1725 | 0 | { |
1726 | 0 | size_t nOffset = 0; |
1727 | 0 | while (nOffset < sWriteFuncData.nSize) |
1728 | 0 | { |
1729 | 0 | const size_t nToCache = |
1730 | 0 | std::min<size_t>(sWriteFuncData.nSize - nOffset, |
1731 | 0 | knDOWNLOAD_CHUNK_SIZE); |
1732 | 0 | poFS->AddRegion(m_pszURL, nOffset, nToCache, |
1733 | 0 | sWriteFuncData.pBuffer + nOffset); |
1734 | 0 | nOffset += nToCache; |
1735 | 0 | } |
1736 | 0 | } |
1737 | 0 | } |
1738 | 0 | } |
1739 | 242k | else if (IsDirectoryFromExists(osVerb.c_str(), |
1740 | 242k | static_cast<int>(response_code))) |
1741 | 0 | { |
1742 | 0 | oFileProp.eExists = EXIST_YES; |
1743 | 0 | oFileProp.fileSize = 0; |
1744 | 0 | oFileProp.bIsDirectory = true; |
1745 | 0 | } |
1746 | | // 405 = Method not allowed |
1747 | 242k | else if (response_code == 405 && !bRetryWithGet && osVerb == "HEAD") |
1748 | 0 | { |
1749 | 0 | CPLDebug(poFS->GetDebugKey(), |
1750 | 0 | "HEAD not allowed. Retrying with GET"); |
1751 | 0 | bRetryWithGet = true; |
1752 | 0 | CPLFree(sWriteFuncData.pBuffer); |
1753 | 0 | CPLFree(sWriteFuncHeaderData.pBuffer); |
1754 | 0 | curl_easy_cleanup(hCurlHandle); |
1755 | 0 | goto retry; |
1756 | 0 | } |
1757 | 242k | else if (response_code == 416) |
1758 | 0 | { |
1759 | 0 | oFileProp.eExists = EXIST_YES; |
1760 | 0 | oFileProp.fileSize = 0; |
1761 | 0 | } |
1762 | 242k | else if (response_code != 200) |
1763 | 242k | { |
1764 | | // Look if we should attempt a retry |
1765 | 242k | if (oRetryContext.CanRetry(static_cast<int>(response_code), |
1766 | 242k | sWriteFuncHeaderData.pBuffer, |
1767 | 242k | szCurlErrBuf)) |
1768 | 106k | { |
1769 | 106k | CPLError(CE_Warning, CPLE_AppDefined, |
1770 | 106k | "HTTP error code: %d - %s. " |
1771 | 106k | "Retrying again in %.1f secs", |
1772 | 106k | static_cast<int>(response_code), m_pszURL, |
1773 | 106k | oRetryContext.GetCurrentDelay()); |
1774 | 106k | CPLSleep(oRetryContext.GetCurrentDelay()); |
1775 | 106k | CPLFree(sWriteFuncData.pBuffer); |
1776 | 106k | CPLFree(sWriteFuncHeaderData.pBuffer); |
1777 | 106k | curl_easy_cleanup(hCurlHandle); |
1778 | 106k | goto retry; |
1779 | 106k | } |
1780 | | |
1781 | 136k | if (sWriteFuncData.pBuffer != nullptr) |
1782 | 2.28k | { |
1783 | 2.28k | if (UseLimitRangeGetInsteadOfHead() && |
1784 | 2.17k | CanRestartOnError(sWriteFuncData.pBuffer, |
1785 | 2.17k | sWriteFuncHeaderData.pBuffer, bSetError)) |
1786 | 0 | { |
1787 | 0 | oFileProp.bHasComputedFileSize = false; |
1788 | 0 | CPLFree(sWriteFuncData.pBuffer); |
1789 | 0 | CPLFree(sWriteFuncHeaderData.pBuffer); |
1790 | 0 | curl_easy_cleanup(hCurlHandle); |
1791 | 0 | return GetFileSizeOrHeaders(bSetError, bGetHeaders); |
1792 | 0 | } |
1793 | 2.28k | else |
1794 | 2.28k | { |
1795 | 2.28k | CPL_IGNORE_RET_VAL(CanRestartOnError( |
1796 | 2.28k | sWriteFuncData.pBuffer, sWriteFuncHeaderData.pBuffer, |
1797 | 2.28k | bSetError)); |
1798 | 2.28k | } |
1799 | 2.28k | } |
1800 | | |
1801 | | // If there was no VSI error thrown in the process, |
1802 | | // fail by reporting the HTTP response code. |
1803 | 136k | if (bSetError && VSIGetLastErrorNo() == 0) |
1804 | 5.36k | { |
1805 | 5.36k | if (strlen(szCurlErrBuf) > 0) |
1806 | 5.16k | { |
1807 | 5.16k | if (response_code == 0) |
1808 | 5.16k | { |
1809 | 5.16k | VSIError(VSIE_HttpError, "CURL error: %s", |
1810 | 5.16k | szCurlErrBuf); |
1811 | 5.16k | } |
1812 | 0 | else |
1813 | 0 | { |
1814 | 0 | VSIError(VSIE_HttpError, "HTTP response code: %d - %s", |
1815 | 0 | static_cast<int>(response_code), szCurlErrBuf); |
1816 | 0 | } |
1817 | 5.16k | } |
1818 | 200 | else |
1819 | 200 | { |
1820 | 200 | VSIError(VSIE_HttpError, "HTTP response code: %d", |
1821 | 200 | static_cast<int>(response_code)); |
1822 | 200 | } |
1823 | 5.36k | } |
1824 | 130k | else |
1825 | 130k | { |
1826 | 130k | if (response_code != 400 && response_code != 404) |
1827 | 129k | { |
1828 | 129k | CPLError(CE_Warning, CPLE_AppDefined, |
1829 | 129k | "HTTP response code on %s: %d", osURL.c_str(), |
1830 | 129k | static_cast<int>(response_code)); |
1831 | 129k | } |
1832 | | // else a CPLDebug() is emitted below |
1833 | 130k | } |
1834 | | |
1835 | 136k | oFileProp.eExists = EXIST_NO; |
1836 | 136k | oFileProp.nHTTPCode = static_cast<int>(response_code); |
1837 | 136k | oFileProp.fileSize = 0; |
1838 | 136k | } |
1839 | 0 | else if (sWriteFuncData.pBuffer != nullptr) |
1840 | 0 | { |
1841 | 0 | ProcessGetFileSizeResult( |
1842 | 0 | reinterpret_cast<const char *>(sWriteFuncData.pBuffer)); |
1843 | 0 | } |
1844 | | |
1845 | | // Try to guess if this is a directory. Generally if this is a |
1846 | | // directory, curl will retry with an URL with slash added. |
1847 | 136k | if (!osEffectiveURL.empty() && |
1848 | 136k | strncmp(osURL.c_str(), osEffectiveURL.c_str(), osURL.size()) == 0 && |
1849 | 87.7k | osEffectiveURL[osURL.size()] == '/' && |
1850 | 1.44k | oFileProp.eExists != EXIST_NO) |
1851 | 0 | { |
1852 | 0 | oFileProp.eExists = EXIST_YES; |
1853 | 0 | oFileProp.fileSize = 0; |
1854 | 0 | oFileProp.bIsDirectory = true; |
1855 | 0 | } |
1856 | 136k | else if (osURL.back() == '/') |
1857 | 953 | { |
1858 | 953 | oFileProp.bIsDirectory = true; |
1859 | 953 | } |
1860 | | |
1861 | 136k | if (!bAlreadyLogged) |
1862 | 2.32k | { |
1863 | 2.32k | CPLDebug(poFS->GetDebugKey(), |
1864 | 2.32k | "GetFileSize(%s)=" CPL_FRMT_GUIB " response_code=%d", |
1865 | 2.32k | osURL.c_str(), oFileProp.fileSize, |
1866 | 2.32k | static_cast<int>(response_code)); |
1867 | 2.32k | } |
1868 | 136k | } |
1869 | | |
1870 | 136k | CPLFree(sWriteFuncData.pBuffer); |
1871 | 136k | CPLFree(sWriteFuncHeaderData.pBuffer); |
1872 | 136k | curl_easy_cleanup(hCurlHandle); |
1873 | | |
1874 | 136k | oFileProp.bHasComputedFileSize = true; |
1875 | 136k | if (mtime > 0) |
1876 | 136 | oFileProp.mTime = mtime; |
1877 | | // Do not update cached file properties if cURL returned a non-HTTP error |
1878 | 136k | if (response_code != 0) |
1879 | 2.17k | poFS->SetCachedFileProp(m_pszURL, oFileProp); |
1880 | | |
1881 | 136k | return oFileProp.fileSize; |
1882 | 242k | } |
1883 | | |
1884 | | /************************************************************************/ |
1885 | | /* Exists() */ |
1886 | | /************************************************************************/ |
1887 | | |
1888 | | bool VSICurlHandle::Exists(bool bSetError) |
1889 | 253k | { |
1890 | 253k | if (oFileProp.eExists == EXIST_UNKNOWN) |
1891 | 118k | { |
1892 | 118k | GetFileSize(bSetError); |
1893 | 118k | } |
1894 | 135k | else if (oFileProp.eExists == EXIST_NO) |
1895 | 135k | { |
1896 | | // If there was no VSI error thrown in the process, |
1897 | | // and we know the HTTP error code of the first request where the |
1898 | | // file could not be retrieved, fail by reporting the HTTP code. |
1899 | 135k | if (bSetError && VSIGetLastErrorNo() == 0 && oFileProp.nHTTPCode) |
1900 | 145 | { |
1901 | 145 | VSIError(VSIE_HttpError, "HTTP response code: %d", |
1902 | 145 | oFileProp.nHTTPCode); |
1903 | 145 | } |
1904 | 135k | } |
1905 | | |
1906 | 253k | return oFileProp.eExists == EXIST_YES; |
1907 | 253k | } |
1908 | | |
1909 | | /************************************************************************/ |
1910 | | /* Tell() */ |
1911 | | /************************************************************************/ |
1912 | | |
1913 | | vsi_l_offset VSICurlHandle::Tell() |
1914 | 29.4k | { |
1915 | 29.4k | return curOffset; |
1916 | 29.4k | } |
1917 | | |
1918 | | /************************************************************************/ |
1919 | | /* GetRedirectURLIfValid() */ |
1920 | | /************************************************************************/ |
1921 | | |
1922 | | std::string |
1923 | | VSICurlHandle::GetRedirectURLIfValid(bool &bHasExpired, |
1924 | | CPLStringList &aosHTTPOptions) const |
1925 | 10.6k | { |
1926 | 10.6k | bHasExpired = false; |
1927 | 10.6k | poFS->GetCachedFileProp(m_pszURL, oFileProp); |
1928 | | |
1929 | 10.6k | std::string osURL(m_pszURL + m_osQueryString); |
1930 | 10.6k | if (oFileProp.bS3LikeRedirect) |
1931 | 0 | { |
1932 | 0 | if (time(nullptr) + 1 < oFileProp.nExpireTimestampLocal) |
1933 | 0 | { |
1934 | 0 | CPLDebug(poFS->GetDebugKey(), |
1935 | 0 | "Using redirect URL as it looks to be still valid " |
1936 | 0 | "(%d seconds left)", |
1937 | 0 | static_cast<int>(oFileProp.nExpireTimestampLocal - |
1938 | 0 | time(nullptr))); |
1939 | 0 | osURL = oFileProp.osRedirectURL; |
1940 | 0 | } |
1941 | 0 | else |
1942 | 0 | { |
1943 | 0 | CPLDebug(poFS->GetDebugKey(), |
1944 | 0 | "Redirect URL has expired. Using original URL"); |
1945 | 0 | oFileProp.bS3LikeRedirect = false; |
1946 | 0 | poFS->SetCachedFileProp(m_pszURL, oFileProp); |
1947 | 0 | bHasExpired = true; |
1948 | 0 | } |
1949 | 0 | } |
1950 | 10.6k | else if (!oFileProp.osRedirectURL.empty()) |
1951 | 0 | { |
1952 | 0 | osURL = oFileProp.osRedirectURL; |
1953 | 0 | bHasExpired = false; |
1954 | 0 | } |
1955 | | |
1956 | 10.6k | if (m_pszURL != osURL) |
1957 | 0 | { |
1958 | 0 | const char *pszAuthorizationHeaderAllowed = VSIGetPathSpecificOption( |
1959 | 0 | m_osFilename.c_str(), |
1960 | 0 | "CPL_VSIL_CURL_AUTHORIZATION_HEADER_ALLOWED_IF_REDIRECT", |
1961 | 0 | "IF_SAME_HOST"); |
1962 | 0 | if (EQUAL(pszAuthorizationHeaderAllowed, "IF_SAME_HOST")) |
1963 | 0 | { |
1964 | 0 | const auto ExtractServer = [](const std::string &s) |
1965 | 0 | { |
1966 | 0 | size_t afterHTTPPos = 0; |
1967 | 0 | if (STARTS_WITH(s.c_str(), "http://")) |
1968 | 0 | afterHTTPPos = strlen("http://"); |
1969 | 0 | else if (STARTS_WITH(s.c_str(), "https://")) |
1970 | 0 | afterHTTPPos = strlen("https://"); |
1971 | 0 | const auto posSlash = s.find('/', afterHTTPPos); |
1972 | 0 | if (posSlash != std::string::npos) |
1973 | 0 | return s.substr(afterHTTPPos, posSlash - afterHTTPPos); |
1974 | 0 | else |
1975 | 0 | return s.substr(afterHTTPPos); |
1976 | 0 | }; |
1977 | |
|
1978 | 0 | if (ExtractServer(osURL) != ExtractServer(m_pszURL)) |
1979 | 0 | { |
1980 | 0 | aosHTTPOptions.SetNameValue("AUTHORIZATION_HEADER_ALLOWED", |
1981 | 0 | "NO"); |
1982 | 0 | } |
1983 | 0 | } |
1984 | 0 | else if (!CPLTestBool(pszAuthorizationHeaderAllowed)) |
1985 | 0 | { |
1986 | 0 | aosHTTPOptions.SetNameValue("AUTHORIZATION_HEADER_ALLOWED", "NO"); |
1987 | 0 | } |
1988 | 0 | } |
1989 | | |
1990 | 10.6k | return osURL; |
1991 | 10.6k | } |
1992 | | |
1993 | | /************************************************************************/ |
1994 | | /* CurrentDownload */ |
1995 | | /************************************************************************/ |
1996 | | |
1997 | | namespace |
1998 | | { |
1999 | | struct CurrentDownload |
2000 | | { |
2001 | | VSICurlFilesystemHandlerBase *m_poFS = nullptr; |
2002 | | std::string m_osURL{}; |
2003 | | vsi_l_offset m_nStartOffset = 0; |
2004 | | int m_nBlocks = 0; |
2005 | | std::string m_osAlreadyDownloadedData{}; |
2006 | | bool m_bHasAlreadyDownloadedData = false; |
2007 | | |
2008 | | CurrentDownload(VSICurlFilesystemHandlerBase *poFS, const char *pszURL, |
2009 | | vsi_l_offset startOffset, int nBlocks) |
2010 | 10.6k | : m_poFS(poFS), m_osURL(pszURL), m_nStartOffset(startOffset), |
2011 | 10.6k | m_nBlocks(nBlocks) |
2012 | 10.6k | { |
2013 | 10.6k | auto res = m_poFS->NotifyStartDownloadRegion(m_osURL, m_nStartOffset, |
2014 | 10.6k | m_nBlocks); |
2015 | 10.6k | m_bHasAlreadyDownloadedData = res.first; |
2016 | 10.6k | m_osAlreadyDownloadedData = std::move(res.second); |
2017 | 10.6k | } |
2018 | | |
2019 | | bool HasAlreadyDownloadedData() const |
2020 | 10.6k | { |
2021 | 10.6k | return m_bHasAlreadyDownloadedData; |
2022 | 10.6k | } |
2023 | | |
2024 | | const std::string &GetAlreadyDownloadedData() const |
2025 | 0 | { |
2026 | 0 | return m_osAlreadyDownloadedData; |
2027 | 0 | } |
2028 | | |
2029 | | void SetData(const std::string &osData) |
2030 | 0 | { |
2031 | 0 | CPLAssert(!m_bHasAlreadyDownloadedData); |
2032 | 0 | m_bHasAlreadyDownloadedData = true; |
2033 | 0 | m_poFS->NotifyStopDownloadRegion(m_osURL, m_nStartOffset, m_nBlocks, |
2034 | 0 | osData); |
2035 | 0 | } |
2036 | | |
2037 | | ~CurrentDownload() |
2038 | 10.6k | { |
2039 | 10.6k | if (!m_bHasAlreadyDownloadedData) |
2040 | 10.6k | m_poFS->NotifyStopDownloadRegion(m_osURL, m_nStartOffset, m_nBlocks, |
2041 | 10.6k | std::string()); |
2042 | 10.6k | } |
2043 | | |
2044 | | CurrentDownload(const CurrentDownload &) = delete; |
2045 | | CurrentDownload &operator=(const CurrentDownload &) = delete; |
2046 | | }; |
2047 | | } // namespace |
2048 | | |
2049 | | /************************************************************************/ |
2050 | | /* NotifyStartDownloadRegion() */ |
2051 | | /************************************************************************/ |
2052 | | |
2053 | | /** Indicate intent at downloading a new region. |
2054 | | * |
2055 | | * If the region is already in download in another thread, then wait for its |
2056 | | * completion. |
2057 | | * |
2058 | | * Returns: |
2059 | | * - (false, empty string) if a new download is needed |
2060 | | * - (true, region_content) if we have been waiting for a download of the same |
2061 | | * region to be completed and got its result. Note that region_content will be |
2062 | | * empty if the download of that region failed. |
2063 | | */ |
2064 | | std::pair<bool, std::string> |
2065 | | VSICurlFilesystemHandlerBase::NotifyStartDownloadRegion( |
2066 | | const std::string &osURL, vsi_l_offset startOffset, int nBlocks) |
2067 | 10.6k | { |
2068 | 10.6k | std::string osId(osURL); |
2069 | 10.6k | osId += '_'; |
2070 | 10.6k | osId += std::to_string(startOffset); |
2071 | 10.6k | osId += '_'; |
2072 | 10.6k | osId += std::to_string(nBlocks); |
2073 | | |
2074 | 10.6k | m_oMutex.lock(); |
2075 | 10.6k | auto oIter = m_oMapRegionInDownload.find(osId); |
2076 | 10.6k | if (oIter != m_oMapRegionInDownload.end()) |
2077 | 0 | { |
2078 | 0 | auto ®ion = *(oIter->second); |
2079 | 0 | std::unique_lock<std::mutex> oRegionLock(region.oMutex); |
2080 | 0 | m_oMutex.unlock(); |
2081 | 0 | region.nWaiters++; |
2082 | 0 | while (region.bDownloadInProgress) |
2083 | 0 | { |
2084 | 0 | region.oCond.wait(oRegionLock); |
2085 | 0 | } |
2086 | 0 | std::string osRet = region.osData; |
2087 | 0 | region.nWaiters--; |
2088 | 0 | region.oCond.notify_one(); |
2089 | 0 | return std::pair<bool, std::string>(true, osRet); |
2090 | 0 | } |
2091 | 10.6k | else |
2092 | 10.6k | { |
2093 | 10.6k | auto poRegionInDownload = std::make_unique<RegionInDownload>(); |
2094 | 10.6k | poRegionInDownload->bDownloadInProgress = true; |
2095 | 10.6k | m_oMapRegionInDownload[osId] = std::move(poRegionInDownload); |
2096 | 10.6k | m_oMutex.unlock(); |
2097 | 10.6k | return std::pair<bool, std::string>(false, std::string()); |
2098 | 10.6k | } |
2099 | 10.6k | } |
2100 | | |
2101 | | /************************************************************************/ |
2102 | | /* NotifyStopDownloadRegion() */ |
2103 | | /************************************************************************/ |
2104 | | |
2105 | | void VSICurlFilesystemHandlerBase::NotifyStopDownloadRegion( |
2106 | | const std::string &osURL, vsi_l_offset startOffset, int nBlocks, |
2107 | | const std::string &osData) |
2108 | 10.6k | { |
2109 | 10.6k | std::string osId(osURL); |
2110 | 10.6k | osId += '_'; |
2111 | 10.6k | osId += std::to_string(startOffset); |
2112 | 10.6k | osId += '_'; |
2113 | 10.6k | osId += std::to_string(nBlocks); |
2114 | | |
2115 | 10.6k | m_oMutex.lock(); |
2116 | 10.6k | auto oIter = m_oMapRegionInDownload.find(osId); |
2117 | 10.6k | CPLAssert(oIter != m_oMapRegionInDownload.end()); |
2118 | 10.6k | auto ®ion = *(oIter->second); |
2119 | 10.6k | { |
2120 | 10.6k | std::unique_lock<std::mutex> oRegionLock(region.oMutex); |
2121 | 10.6k | if (region.nWaiters) |
2122 | 0 | { |
2123 | 0 | region.osData = osData; |
2124 | 0 | region.bDownloadInProgress = false; |
2125 | 0 | region.oCond.notify_all(); |
2126 | |
|
2127 | 0 | while (region.nWaiters) |
2128 | 0 | { |
2129 | 0 | region.oCond.wait(oRegionLock); |
2130 | 0 | } |
2131 | 0 | } |
2132 | 10.6k | } |
2133 | 10.6k | m_oMapRegionInDownload.erase(oIter); |
2134 | 10.6k | m_oMutex.unlock(); |
2135 | 10.6k | } |
2136 | | |
2137 | | /************************************************************************/ |
2138 | | /* DownloadRegion() */ |
2139 | | /************************************************************************/ |
2140 | | |
2141 | | std::string VSICurlHandle::DownloadRegion(const vsi_l_offset startOffset, |
2142 | | const int nBlocks) |
2143 | 10.6k | { |
2144 | 10.6k | if (bInterrupted && bStopOnInterruptUntilUninstall) |
2145 | 0 | return std::string(); |
2146 | | |
2147 | 10.6k | if (oFileProp.eExists == EXIST_NO) |
2148 | 0 | return std::string(); |
2149 | | |
2150 | | // Check if there is not a download of the same region in progress in |
2151 | | // another thread, and if so wait for it to be completed |
2152 | 10.6k | CurrentDownload currentDownload(poFS, m_pszURL, startOffset, nBlocks); |
2153 | 10.6k | if (currentDownload.HasAlreadyDownloadedData()) |
2154 | 0 | { |
2155 | 0 | return currentDownload.GetAlreadyDownloadedData(); |
2156 | 0 | } |
2157 | | |
2158 | 10.6k | begin: |
2159 | 10.6k | CURLM *hCurlMultiHandle = poFS->GetCurlMultiHandleFor(m_pszURL); |
2160 | | |
2161 | 10.6k | UpdateQueryString(); |
2162 | | |
2163 | 10.6k | bool bHasExpired = false; |
2164 | | |
2165 | 10.6k | CPLStringList aosHTTPOptions(m_aosHTTPOptions); |
2166 | 10.6k | std::string osURL(GetRedirectURLIfValid(bHasExpired, aosHTTPOptions)); |
2167 | 10.6k | bool bUsedRedirect = osURL != m_pszURL; |
2168 | | |
2169 | 10.6k | WriteFuncStruct sWriteFuncData; |
2170 | 10.6k | WriteFuncStruct sWriteFuncHeaderData; |
2171 | 10.6k | CPLHTTPRetryContext oRetryContext(m_oRetryParameters); |
2172 | | |
2173 | 10.7k | retry: |
2174 | 10.7k | CURL *hCurlHandle = curl_easy_init(); |
2175 | 10.7k | struct curl_slist *headers = |
2176 | 10.7k | VSICurlSetOptions(hCurlHandle, osURL.c_str(), aosHTTPOptions.List()); |
2177 | | |
2178 | 10.7k | if (!AllowAutomaticRedirection()) |
2179 | 0 | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_FOLLOWLOCATION, 0); |
2180 | | |
2181 | 10.7k | VSICURLInitWriteFuncStruct(&sWriteFuncData, this, pfnReadCbk, |
2182 | 10.7k | pReadCbkUserData); |
2183 | 10.7k | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData); |
2184 | 10.7k | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, |
2185 | 10.7k | VSICurlHandleWriteFunc); |
2186 | | |
2187 | 10.7k | VSICURLInitWriteFuncStruct(&sWriteFuncHeaderData, nullptr, nullptr, |
2188 | 10.7k | nullptr); |
2189 | 10.7k | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA, |
2190 | 10.7k | &sWriteFuncHeaderData); |
2191 | 10.7k | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION, |
2192 | 10.7k | VSICurlHandleWriteFunc); |
2193 | 10.7k | sWriteFuncHeaderData.bIsHTTP = STARTS_WITH(m_pszURL, "http"); |
2194 | 10.7k | sWriteFuncHeaderData.nStartOffset = startOffset; |
2195 | 10.7k | sWriteFuncHeaderData.nEndOffset = |
2196 | 10.7k | startOffset + |
2197 | 10.7k | static_cast<vsi_l_offset>(nBlocks) * VSICURLGetDownloadChunkSize() - 1; |
2198 | | // Some servers don't like we try to read after end-of-file (#5786). |
2199 | 10.7k | if (oFileProp.bHasComputedFileSize && |
2200 | 0 | sWriteFuncHeaderData.nEndOffset >= oFileProp.fileSize) |
2201 | 0 | { |
2202 | 0 | sWriteFuncHeaderData.nEndOffset = oFileProp.fileSize - 1; |
2203 | 0 | } |
2204 | | |
2205 | 10.7k | char rangeStr[512] = {}; |
2206 | 10.7k | snprintf(rangeStr, sizeof(rangeStr), CPL_FRMT_GUIB "-" CPL_FRMT_GUIB, |
2207 | 10.7k | startOffset, sWriteFuncHeaderData.nEndOffset); |
2208 | | |
2209 | | if constexpr (ENABLE_DEBUG) |
2210 | 10.7k | { |
2211 | 10.7k | CPLDebug(poFS->GetDebugKey(), "Downloading %s (%s)...", rangeStr, |
2212 | 10.7k | osURL.c_str()); |
2213 | 10.7k | } |
2214 | | |
2215 | 10.7k | std::string osHeaderRange; // leave in this scope |
2216 | 10.7k | if (sWriteFuncHeaderData.bIsHTTP) |
2217 | 8.15k | { |
2218 | 8.15k | osHeaderRange = CPLSPrintf("Range: bytes=%s", rangeStr); |
2219 | | // So it gets included in Azure signature |
2220 | 8.15k | headers = curl_slist_append(headers, osHeaderRange.c_str()); |
2221 | 8.15k | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, nullptr); |
2222 | 8.15k | } |
2223 | 2.60k | else |
2224 | 2.60k | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, rangeStr); |
2225 | | |
2226 | 10.7k | char szCurlErrBuf[CURL_ERROR_SIZE + 1] = {}; |
2227 | 10.7k | szCurlErrBuf[0] = '\0'; |
2228 | 10.7k | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, szCurlErrBuf); |
2229 | | |
2230 | 10.7k | headers = GetCurlHeaders("GET", headers); |
2231 | 10.7k | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HTTPHEADER, headers); |
2232 | | |
2233 | 10.7k | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_FILETIME, 1); |
2234 | | |
2235 | 10.7k | VSICURLMultiPerform(hCurlMultiHandle, hCurlHandle, &m_bInterrupt); |
2236 | | |
2237 | 10.7k | VSICURLResetHeaderAndWriterFunctions(hCurlHandle); |
2238 | | |
2239 | 10.7k | curl_slist_free_all(headers); |
2240 | | |
2241 | 10.7k | NetworkStatisticsLogger::LogGET(sWriteFuncData.nSize); |
2242 | | |
2243 | 10.7k | if (sWriteFuncData.bInterrupted || m_bInterrupt) |
2244 | 0 | { |
2245 | 0 | bInterrupted = true; |
2246 | | |
2247 | | // Notify that the download of the current region is finished |
2248 | 0 | currentDownload.SetData(std::string()); |
2249 | |
|
2250 | 0 | CPLFree(sWriteFuncData.pBuffer); |
2251 | 0 | CPLFree(sWriteFuncHeaderData.pBuffer); |
2252 | 0 | curl_easy_cleanup(hCurlHandle); |
2253 | |
|
2254 | 0 | return std::string(); |
2255 | 0 | } |
2256 | | |
2257 | 10.7k | long response_code = 0; |
2258 | 10.7k | curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE, &response_code); |
2259 | | |
2260 | 10.7k | if (ENABLE_DEBUG && szCurlErrBuf[0] != '\0') |
2261 | 10.4k | { |
2262 | 10.4k | CPLDebug(poFS->GetDebugKey(), |
2263 | 10.4k | "DownloadRegion(%s): response_code=%d, msg=%s", osURL.c_str(), |
2264 | 10.4k | static_cast<int>(response_code), szCurlErrBuf); |
2265 | 10.4k | } |
2266 | | |
2267 | 10.7k | long mtime = 0; |
2268 | 10.7k | curl_easy_getinfo(hCurlHandle, CURLINFO_FILETIME, &mtime); |
2269 | 10.7k | if (mtime > 0) |
2270 | 0 | { |
2271 | 0 | oFileProp.mTime = mtime; |
2272 | 0 | poFS->SetCachedFileProp(m_pszURL, oFileProp); |
2273 | 0 | } |
2274 | | |
2275 | | if constexpr (ENABLE_DEBUG) |
2276 | 10.7k | { |
2277 | 10.7k | CPLDebug(poFS->GetDebugKey(), "Got response_code=%ld", response_code); |
2278 | 10.7k | } |
2279 | | |
2280 | 10.7k | if (bUsedRedirect && |
2281 | 0 | (response_code == 403 || |
2282 | | // Below case is in particular for |
2283 | | // gdalinfo |
2284 | | // /vsicurl/https://lpdaac.earthdata.nasa.gov/lp-prod-protected/HLSS30.015/HLS.S30.T10TEK.2020273T190109.v1.5.B8A.tif |
2285 | | // --config GDAL_DISABLE_READDIR_ON_OPEN EMPTY_DIR --config |
2286 | | // GDAL_HTTP_COOKIEFILE /tmp/cookie.txt --config GDAL_HTTP_COOKIEJAR |
2287 | | // /tmp/cookie.txt We got the redirect URL from a HEAD request, but it |
2288 | | // is not valid for a GET. So retry with GET on original URL to get a |
2289 | | // redirect URL valid for it. |
2290 | 0 | (response_code == 400 && |
2291 | 0 | osURL.find(".cloudfront.net") != std::string::npos))) |
2292 | 0 | { |
2293 | 0 | CPLDebug(poFS->GetDebugKey(), |
2294 | 0 | "Got an error with redirect URL. Retrying with original one"); |
2295 | 0 | oFileProp.bS3LikeRedirect = false; |
2296 | 0 | poFS->SetCachedFileProp(m_pszURL, oFileProp); |
2297 | 0 | bUsedRedirect = false; |
2298 | 0 | osURL = m_pszURL; |
2299 | 0 | CPLFree(sWriteFuncData.pBuffer); |
2300 | 0 | CPLFree(sWriteFuncHeaderData.pBuffer); |
2301 | 0 | curl_easy_cleanup(hCurlHandle); |
2302 | 0 | goto retry; |
2303 | 0 | } |
2304 | | |
2305 | 10.7k | if (response_code == 401 && oRetryContext.CanRetry()) |
2306 | 0 | { |
2307 | 0 | CPLDebug(poFS->GetDebugKey(), "Unauthorized, trying to authenticate"); |
2308 | 0 | CPLFree(sWriteFuncData.pBuffer); |
2309 | 0 | CPLFree(sWriteFuncHeaderData.pBuffer); |
2310 | 0 | curl_easy_cleanup(hCurlHandle); |
2311 | 0 | if (Authenticate(m_osFilename.c_str())) |
2312 | 0 | goto retry; |
2313 | 0 | return std::string(); |
2314 | 0 | } |
2315 | | |
2316 | 10.7k | UpdateRedirectInfo(hCurlHandle, sWriteFuncHeaderData); |
2317 | | |
2318 | 10.7k | if ((response_code != 200 && response_code != 206 && response_code != 225 && |
2319 | 10.7k | response_code != 226 && response_code != 426) || |
2320 | 0 | sWriteFuncHeaderData.bError) |
2321 | 10.7k | { |
2322 | 10.7k | if (sWriteFuncData.pBuffer != nullptr && |
2323 | 282 | CanRestartOnError( |
2324 | 282 | reinterpret_cast<const char *>(sWriteFuncData.pBuffer), |
2325 | 282 | reinterpret_cast<const char *>(sWriteFuncHeaderData.pBuffer), |
2326 | 282 | true)) |
2327 | 0 | { |
2328 | 0 | CPLFree(sWriteFuncData.pBuffer); |
2329 | 0 | CPLFree(sWriteFuncHeaderData.pBuffer); |
2330 | 0 | curl_easy_cleanup(hCurlHandle); |
2331 | 0 | goto begin; |
2332 | 0 | } |
2333 | | |
2334 | | // Look if we should attempt a retry |
2335 | 10.7k | if (oRetryContext.CanRetry(static_cast<int>(response_code), |
2336 | 10.7k | sWriteFuncHeaderData.pBuffer, szCurlErrBuf)) |
2337 | 142 | { |
2338 | 142 | CPLError(CE_Warning, CPLE_AppDefined, |
2339 | 142 | "HTTP error code: %d - %s. " |
2340 | 142 | "Retrying again in %.1f secs", |
2341 | 142 | static_cast<int>(response_code), m_pszURL, |
2342 | 142 | oRetryContext.GetCurrentDelay()); |
2343 | 142 | CPLSleep(oRetryContext.GetCurrentDelay()); |
2344 | 142 | CPLFree(sWriteFuncData.pBuffer); |
2345 | 142 | CPLFree(sWriteFuncHeaderData.pBuffer); |
2346 | 142 | curl_easy_cleanup(hCurlHandle); |
2347 | 142 | goto retry; |
2348 | 142 | } |
2349 | | |
2350 | 10.6k | if (response_code >= 400 && szCurlErrBuf[0] != '\0') |
2351 | 0 | { |
2352 | 0 | if (strcmp(szCurlErrBuf, "Couldn't use REST") == 0) |
2353 | 0 | CPLError( |
2354 | 0 | CE_Failure, CPLE_AppDefined, |
2355 | 0 | "%d: %s, Range downloading not supported by this server!", |
2356 | 0 | static_cast<int>(response_code), szCurlErrBuf); |
2357 | 0 | else |
2358 | 0 | CPLError(CE_Failure, CPLE_AppDefined, "%d: %s", |
2359 | 0 | static_cast<int>(response_code), szCurlErrBuf); |
2360 | 0 | } |
2361 | 10.6k | else if (response_code == 416) /* Range Not Satisfiable */ |
2362 | 0 | { |
2363 | 0 | if (sWriteFuncData.pBuffer) |
2364 | 0 | { |
2365 | 0 | CPLError( |
2366 | 0 | CE_Failure, CPLE_AppDefined, |
2367 | 0 | "%d: Range downloading not supported by this server: %s", |
2368 | 0 | static_cast<int>(response_code), sWriteFuncData.pBuffer); |
2369 | 0 | } |
2370 | 0 | else |
2371 | 0 | { |
2372 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
2373 | 0 | "%d: Range downloading not supported by this server", |
2374 | 0 | static_cast<int>(response_code)); |
2375 | 0 | } |
2376 | 0 | } |
2377 | 10.6k | if (!oFileProp.bHasComputedFileSize && startOffset == 0) |
2378 | 10.6k | { |
2379 | 10.6k | oFileProp.bHasComputedFileSize = true; |
2380 | 10.6k | oFileProp.fileSize = 0; |
2381 | 10.6k | oFileProp.eExists = EXIST_NO; |
2382 | 10.6k | poFS->SetCachedFileProp(m_pszURL, oFileProp); |
2383 | 10.6k | } |
2384 | 10.6k | CPLFree(sWriteFuncData.pBuffer); |
2385 | 10.6k | CPLFree(sWriteFuncHeaderData.pBuffer); |
2386 | 10.6k | curl_easy_cleanup(hCurlHandle); |
2387 | 10.6k | return std::string(); |
2388 | 10.7k | } |
2389 | | |
2390 | 0 | if (!oFileProp.bHasComputedFileSize && sWriteFuncHeaderData.pBuffer) |
2391 | 0 | { |
2392 | | // Try to retrieve the filesize from the HTTP headers |
2393 | | // if in the form: "Content-Range: bytes x-y/filesize". |
2394 | 0 | char *pszContentRange = |
2395 | 0 | strstr(sWriteFuncHeaderData.pBuffer, "Content-Range: bytes "); |
2396 | 0 | if (pszContentRange == nullptr) |
2397 | 0 | pszContentRange = |
2398 | 0 | strstr(sWriteFuncHeaderData.pBuffer, "content-range: bytes "); |
2399 | 0 | if (pszContentRange) |
2400 | 0 | { |
2401 | 0 | char *pszEOL = strchr(pszContentRange, '\n'); |
2402 | 0 | if (pszEOL) |
2403 | 0 | { |
2404 | 0 | *pszEOL = 0; |
2405 | 0 | pszEOL = strchr(pszContentRange, '\r'); |
2406 | 0 | if (pszEOL) |
2407 | 0 | *pszEOL = 0; |
2408 | 0 | char *pszSlash = strchr(pszContentRange, '/'); |
2409 | 0 | if (pszSlash) |
2410 | 0 | { |
2411 | 0 | pszSlash++; |
2412 | 0 | oFileProp.fileSize = CPLScanUIntBig( |
2413 | 0 | pszSlash, static_cast<int>(strlen(pszSlash))); |
2414 | 0 | } |
2415 | 0 | } |
2416 | 0 | } |
2417 | 0 | else if (STARTS_WITH(m_pszURL, "ftp")) |
2418 | 0 | { |
2419 | | // Parse 213 answer for FTP protocol. |
2420 | 0 | char *pszSize = strstr(sWriteFuncHeaderData.pBuffer, "213 "); |
2421 | 0 | if (pszSize) |
2422 | 0 | { |
2423 | 0 | pszSize += 4; |
2424 | 0 | char *pszEOL = strchr(pszSize, '\n'); |
2425 | 0 | if (pszEOL) |
2426 | 0 | { |
2427 | 0 | *pszEOL = 0; |
2428 | 0 | pszEOL = strchr(pszSize, '\r'); |
2429 | 0 | if (pszEOL) |
2430 | 0 | *pszEOL = 0; |
2431 | |
|
2432 | 0 | oFileProp.fileSize = CPLScanUIntBig( |
2433 | 0 | pszSize, static_cast<int>(strlen(pszSize))); |
2434 | 0 | } |
2435 | 0 | } |
2436 | 0 | } |
2437 | |
|
2438 | 0 | if (oFileProp.fileSize != 0) |
2439 | 0 | { |
2440 | 0 | oFileProp.eExists = EXIST_YES; |
2441 | |
|
2442 | | if constexpr (ENABLE_DEBUG) |
2443 | 0 | { |
2444 | 0 | CPLDebug(poFS->GetDebugKey(), |
2445 | 0 | "GetFileSize(%s)=" CPL_FRMT_GUIB " response_code=%d", |
2446 | 0 | m_pszURL, oFileProp.fileSize, |
2447 | 0 | static_cast<int>(response_code)); |
2448 | 0 | } |
2449 | |
|
2450 | 0 | oFileProp.bHasComputedFileSize = true; |
2451 | 0 | poFS->SetCachedFileProp(m_pszURL, oFileProp); |
2452 | 0 | } |
2453 | 0 | } |
2454 | |
|
2455 | 0 | DownloadRegionPostProcess(startOffset, nBlocks, sWriteFuncData.pBuffer, |
2456 | 0 | sWriteFuncData.nSize); |
2457 | |
|
2458 | 0 | std::string osRet; |
2459 | 0 | osRet.assign(sWriteFuncData.pBuffer, sWriteFuncData.nSize); |
2460 | | |
2461 | | // Notify that the download of the current region is finished |
2462 | 0 | currentDownload.SetData(osRet); |
2463 | |
|
2464 | 0 | CPLFree(sWriteFuncData.pBuffer); |
2465 | 0 | CPLFree(sWriteFuncHeaderData.pBuffer); |
2466 | 0 | curl_easy_cleanup(hCurlHandle); |
2467 | |
|
2468 | 0 | return osRet; |
2469 | 10.7k | } |
2470 | | |
2471 | | /************************************************************************/ |
2472 | | /* UpdateRedirectInfo() */ |
2473 | | /************************************************************************/ |
2474 | | |
2475 | | void VSICurlHandle::UpdateRedirectInfo( |
2476 | | CURL *hCurlHandle, const WriteFuncStruct &sWriteFuncHeaderData) |
2477 | 10.7k | { |
2478 | 10.7k | std::string osEffectiveURL; |
2479 | 10.7k | { |
2480 | 10.7k | char *pszEffectiveURL = nullptr; |
2481 | 10.7k | curl_easy_getinfo(hCurlHandle, CURLINFO_EFFECTIVE_URL, |
2482 | 10.7k | &pszEffectiveURL); |
2483 | 10.7k | if (pszEffectiveURL) |
2484 | 10.7k | osEffectiveURL = pszEffectiveURL; |
2485 | 10.7k | } |
2486 | | |
2487 | 10.7k | if (!oFileProp.bS3LikeRedirect && !osEffectiveURL.empty() && |
2488 | 10.7k | strstr(osEffectiveURL.c_str(), m_pszURL) == nullptr) |
2489 | 369 | { |
2490 | 369 | CPLDebug(poFS->GetDebugKey(), "Effective URL: %s", |
2491 | 369 | osEffectiveURL.c_str()); |
2492 | | |
2493 | 369 | long response_code = 0; |
2494 | 369 | curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE, &response_code); |
2495 | 369 | if (response_code >= 200 && response_code < 300 && |
2496 | 0 | sWriteFuncHeaderData.nTimestampDate > 0 && |
2497 | 0 | VSICurlIsS3LikeSignedURL(osEffectiveURL.c_str()) && |
2498 | 0 | !VSICurlIsS3LikeSignedURL(m_pszURL) && |
2499 | 0 | CPLTestBool( |
2500 | 0 | CPLGetConfigOption("CPL_VSIL_CURL_USE_S3_REDIRECT", "TRUE"))) |
2501 | 0 | { |
2502 | 0 | GIntBig nExpireTimestamp = |
2503 | 0 | VSICurlGetExpiresFromS3LikeSignedURL(osEffectiveURL.c_str()); |
2504 | 0 | if (nExpireTimestamp > sWriteFuncHeaderData.nTimestampDate + 10) |
2505 | 0 | { |
2506 | 0 | const int nValidity = static_cast<int>( |
2507 | 0 | nExpireTimestamp - sWriteFuncHeaderData.nTimestampDate); |
2508 | 0 | CPLDebug(poFS->GetDebugKey(), |
2509 | 0 | "Will use redirect URL for the next %d seconds", |
2510 | 0 | nValidity); |
2511 | | // As our local clock might not be in sync with server clock, |
2512 | | // figure out the expiration timestamp in local time. |
2513 | 0 | oFileProp.bS3LikeRedirect = true; |
2514 | 0 | oFileProp.nExpireTimestampLocal = time(nullptr) + nValidity; |
2515 | 0 | oFileProp.osRedirectURL = std::move(osEffectiveURL); |
2516 | 0 | poFS->SetCachedFileProp(m_pszURL, oFileProp); |
2517 | 0 | } |
2518 | 0 | } |
2519 | 369 | } |
2520 | 10.7k | } |
2521 | | |
2522 | | /************************************************************************/ |
2523 | | /* DownloadRegionPostProcess() */ |
2524 | | /************************************************************************/ |
2525 | | |
2526 | | void VSICurlHandle::DownloadRegionPostProcess(const vsi_l_offset startOffset, |
2527 | | const int nBlocks, |
2528 | | const char *pBuffer, size_t nSize) |
2529 | 0 | { |
2530 | 0 | const int knDOWNLOAD_CHUNK_SIZE = VSICURLGetDownloadChunkSize(); |
2531 | 0 | lastDownloadedOffset = startOffset + static_cast<vsi_l_offset>(nBlocks) * |
2532 | 0 | knDOWNLOAD_CHUNK_SIZE; |
2533 | |
|
2534 | 0 | if (nSize > static_cast<size_t>(nBlocks) * knDOWNLOAD_CHUNK_SIZE) |
2535 | 0 | { |
2536 | | if constexpr (ENABLE_DEBUG) |
2537 | 0 | { |
2538 | 0 | CPLDebug( |
2539 | 0 | poFS->GetDebugKey(), |
2540 | 0 | "Got more data than expected : %u instead of %u", |
2541 | 0 | static_cast<unsigned int>(nSize), |
2542 | 0 | static_cast<unsigned int>(nBlocks * knDOWNLOAD_CHUNK_SIZE)); |
2543 | 0 | } |
2544 | 0 | } |
2545 | |
|
2546 | 0 | vsi_l_offset l_startOffset = startOffset; |
2547 | 0 | while (nSize > 0) |
2548 | 0 | { |
2549 | | #if DEBUG_VERBOSE |
2550 | | if constexpr (ENABLE_DEBUG) |
2551 | | { |
2552 | | CPLDebug(poFS->GetDebugKey(), "Add region %u - %u", |
2553 | | static_cast<unsigned int>(startOffset), |
2554 | | static_cast<unsigned int>(std::min( |
2555 | | static_cast<size_t>(knDOWNLOAD_CHUNK_SIZE), nSize))); |
2556 | | } |
2557 | | #endif |
2558 | 0 | const size_t nChunkSize = |
2559 | 0 | std::min(static_cast<size_t>(knDOWNLOAD_CHUNK_SIZE), nSize); |
2560 | 0 | poFS->AddRegion(m_pszURL, l_startOffset, nChunkSize, pBuffer); |
2561 | 0 | l_startOffset += nChunkSize; |
2562 | 0 | pBuffer += nChunkSize; |
2563 | 0 | nSize -= nChunkSize; |
2564 | 0 | } |
2565 | 0 | } |
2566 | | |
2567 | | /************************************************************************/ |
2568 | | /* Read() */ |
2569 | | /************************************************************************/ |
2570 | | |
2571 | | size_t VSICurlHandle::Read(void *const pBufferIn, size_t const nBytes) |
2572 | 40.6k | { |
2573 | 40.6k | NetworkStatisticsFileSystem oContextFS(poFS->GetFSPrefix().c_str()); |
2574 | 40.6k | NetworkStatisticsFile oContextFile(m_osFilename.c_str()); |
2575 | 40.6k | NetworkStatisticsAction oContextAction("Read"); |
2576 | | |
2577 | 40.6k | size_t nBufferRequestSize = nBytes; |
2578 | 40.6k | if (nBufferRequestSize == 0) |
2579 | 19.0k | return 0; |
2580 | | |
2581 | 21.5k | void *pBuffer = pBufferIn; |
2582 | | |
2583 | | #if DEBUG_VERBOSE |
2584 | | CPLDebug(poFS->GetDebugKey(), "offset=%d, size=%d", |
2585 | | static_cast<int>(curOffset), static_cast<int>(nBufferRequestSize)); |
2586 | | #endif |
2587 | | |
2588 | 21.5k | vsi_l_offset iterOffset = curOffset; |
2589 | 21.5k | const int knMAX_REGIONS = GetMaxRegions(); |
2590 | 21.5k | const int knDOWNLOAD_CHUNK_SIZE = VSICURLGetDownloadChunkSize(); |
2591 | 21.5k | while (nBufferRequestSize) |
2592 | 21.5k | { |
2593 | | // Don't try to read after end of file. |
2594 | 21.5k | poFS->GetCachedFileProp(m_pszURL, oFileProp); |
2595 | 21.5k | if (oFileProp.bHasComputedFileSize && iterOffset >= oFileProp.fileSize) |
2596 | 10.6k | { |
2597 | 10.6k | if (iterOffset == curOffset) |
2598 | 10.6k | { |
2599 | 10.6k | CPLDebug(poFS->GetDebugKey(), |
2600 | 10.6k | "Request at offset " CPL_FRMT_GUIB |
2601 | 10.6k | ", after end of file", |
2602 | 10.6k | iterOffset); |
2603 | 10.6k | } |
2604 | 10.6k | break; |
2605 | 10.6k | } |
2606 | | |
2607 | 10.9k | const vsi_l_offset nOffsetToDownload = |
2608 | 10.9k | (iterOffset / knDOWNLOAD_CHUNK_SIZE) * knDOWNLOAD_CHUNK_SIZE; |
2609 | 10.9k | std::string osRegion; |
2610 | 10.9k | std::shared_ptr<std::string> psRegion = |
2611 | 10.9k | poFS->GetRegion(m_pszURL, nOffsetToDownload); |
2612 | 10.9k | if (psRegion != nullptr) |
2613 | 0 | { |
2614 | 0 | osRegion = *psRegion; |
2615 | 0 | } |
2616 | 10.9k | else |
2617 | 10.9k | { |
2618 | 10.9k | if (nOffsetToDownload == lastDownloadedOffset) |
2619 | 0 | { |
2620 | | // In case of consecutive reads (of small size), we use a |
2621 | | // heuristic that we will read the file sequentially, so |
2622 | | // we double the requested size to decrease the number of |
2623 | | // client/server roundtrips. |
2624 | 0 | constexpr int MAX_CHUNK_SIZE_INCREASE_FACTOR = 128; |
2625 | 0 | if (nBlocksToDownload < MAX_CHUNK_SIZE_INCREASE_FACTOR) |
2626 | 0 | nBlocksToDownload *= 2; |
2627 | 0 | } |
2628 | 10.9k | else |
2629 | 10.9k | { |
2630 | | // Random reads. Cancel the above heuristics. |
2631 | 10.9k | nBlocksToDownload = 1; |
2632 | 10.9k | } |
2633 | | |
2634 | | // Ensure that we will request at least the number of blocks |
2635 | | // to satisfy the remaining buffer size to read. |
2636 | 10.9k | const vsi_l_offset nEndOffsetToDownload = |
2637 | 10.9k | ((iterOffset + nBufferRequestSize + knDOWNLOAD_CHUNK_SIZE - 1) / |
2638 | 10.9k | knDOWNLOAD_CHUNK_SIZE) * |
2639 | 10.9k | knDOWNLOAD_CHUNK_SIZE; |
2640 | 10.9k | const int nMinBlocksToDownload = |
2641 | 10.9k | static_cast<int>((nEndOffsetToDownload - nOffsetToDownload) / |
2642 | 10.9k | knDOWNLOAD_CHUNK_SIZE); |
2643 | 10.9k | if (nBlocksToDownload < nMinBlocksToDownload) |
2644 | 64 | nBlocksToDownload = nMinBlocksToDownload; |
2645 | | |
2646 | | // Avoid reading already cached data. |
2647 | | // Note: this might get evicted if concurrent reads are done, but |
2648 | | // this should not cause bugs. Just missed optimization. |
2649 | 11.2k | for (int i = 1; i < nBlocksToDownload; i++) |
2650 | 268 | { |
2651 | 268 | if (poFS->GetRegion(m_pszURL, nOffsetToDownload + |
2652 | 268 | static_cast<vsi_l_offset>(i) * |
2653 | 268 | knDOWNLOAD_CHUNK_SIZE) != |
2654 | 268 | nullptr) |
2655 | 0 | { |
2656 | 0 | nBlocksToDownload = i; |
2657 | 0 | break; |
2658 | 0 | } |
2659 | 268 | } |
2660 | | |
2661 | | // We can't download more than knMAX_REGIONS chunks at a time, |
2662 | | // otherwise the cache will not be big enough to store them and |
2663 | | // copy their content to the target buffer. |
2664 | 10.9k | if (nBlocksToDownload > knMAX_REGIONS) |
2665 | 0 | nBlocksToDownload = knMAX_REGIONS; |
2666 | | |
2667 | 10.9k | osRegion = DownloadRegion(nOffsetToDownload, nBlocksToDownload); |
2668 | 10.9k | if (osRegion.empty()) |
2669 | 10.9k | { |
2670 | 10.9k | if (!bInterrupted) |
2671 | 10.9k | bError = true; |
2672 | 10.9k | return 0; |
2673 | 10.9k | } |
2674 | 10.9k | } |
2675 | | |
2676 | 0 | const vsi_l_offset nRegionOffset = iterOffset - nOffsetToDownload; |
2677 | 0 | if (osRegion.size() < nRegionOffset) |
2678 | 0 | { |
2679 | 0 | if (iterOffset == curOffset) |
2680 | 0 | { |
2681 | 0 | CPLDebug(poFS->GetDebugKey(), |
2682 | 0 | "Request at offset " CPL_FRMT_GUIB |
2683 | 0 | ", after end of file", |
2684 | 0 | iterOffset); |
2685 | 0 | } |
2686 | 0 | break; |
2687 | 0 | } |
2688 | | |
2689 | 0 | const int nToCopy = static_cast<int>( |
2690 | 0 | std::min(static_cast<vsi_l_offset>(nBufferRequestSize), |
2691 | 0 | osRegion.size() - nRegionOffset)); |
2692 | 0 | memcpy(pBuffer, osRegion.data() + nRegionOffset, nToCopy); |
2693 | 0 | pBuffer = static_cast<char *>(pBuffer) + nToCopy; |
2694 | 0 | iterOffset += nToCopy; |
2695 | 0 | nBufferRequestSize -= nToCopy; |
2696 | 0 | if (osRegion.size() < static_cast<size_t>(knDOWNLOAD_CHUNK_SIZE) && |
2697 | 0 | nBufferRequestSize != 0) |
2698 | 0 | { |
2699 | 0 | break; |
2700 | 0 | } |
2701 | 0 | } |
2702 | | |
2703 | 10.6k | const size_t ret = static_cast<size_t>(iterOffset - curOffset); |
2704 | 10.6k | if (ret != nBytes) |
2705 | 10.6k | bEOF = true; |
2706 | | |
2707 | 10.6k | curOffset = iterOffset; |
2708 | | |
2709 | 10.6k | return ret; |
2710 | 21.5k | } |
2711 | | |
2712 | | /************************************************************************/ |
2713 | | /* ReadMultiRange() */ |
2714 | | /************************************************************************/ |
2715 | | |
2716 | | int VSICurlHandle::ReadMultiRange(int const nRanges, void **const ppData, |
2717 | | const vsi_l_offset *const panOffsets, |
2718 | | const size_t *const panSizes) |
2719 | 0 | { |
2720 | 0 | if (bInterrupted && bStopOnInterruptUntilUninstall) |
2721 | 0 | return FALSE; |
2722 | | |
2723 | 0 | poFS->GetCachedFileProp(m_pszURL, oFileProp); |
2724 | 0 | if (oFileProp.eExists == EXIST_NO) |
2725 | 0 | return -1; |
2726 | | |
2727 | 0 | NetworkStatisticsFileSystem oContextFS(poFS->GetFSPrefix().c_str()); |
2728 | 0 | NetworkStatisticsFile oContextFile(m_osFilename.c_str()); |
2729 | 0 | NetworkStatisticsAction oContextAction("ReadMultiRange"); |
2730 | |
|
2731 | 0 | const char *pszMultiRangeStrategy = |
2732 | 0 | CPLGetConfigOption("GDAL_HTTP_MULTIRANGE", ""); |
2733 | 0 | if (EQUAL(pszMultiRangeStrategy, "SINGLE_GET")) |
2734 | 0 | { |
2735 | | // Just in case someone needs it, but the interest of this mode is |
2736 | | // rather dubious now. We could probably remove it |
2737 | 0 | return ReadMultiRangeSingleGet(nRanges, ppData, panOffsets, panSizes); |
2738 | 0 | } |
2739 | 0 | else if (nRanges == 1 || EQUAL(pszMultiRangeStrategy, "SERIAL")) |
2740 | 0 | { |
2741 | 0 | return VSIVirtualHandle::ReadMultiRange(nRanges, ppData, panOffsets, |
2742 | 0 | panSizes); |
2743 | 0 | } |
2744 | | |
2745 | 0 | UpdateQueryString(); |
2746 | |
|
2747 | 0 | bool bHasExpired = false; |
2748 | |
|
2749 | 0 | CPLStringList aosHTTPOptions(m_aosHTTPOptions); |
2750 | 0 | std::string osURL(GetRedirectURLIfValid(bHasExpired, aosHTTPOptions)); |
2751 | 0 | if (bHasExpired) |
2752 | 0 | { |
2753 | 0 | return VSIVirtualHandle::ReadMultiRange(nRanges, ppData, panOffsets, |
2754 | 0 | panSizes); |
2755 | 0 | } |
2756 | | |
2757 | 0 | CURLM *hMultiHandle = poFS->GetCurlMultiHandleFor(osURL); |
2758 | 0 | #ifdef CURLPIPE_MULTIPLEX |
2759 | | // Enable HTTP/2 multiplexing (ignored if an older version of HTTP is |
2760 | | // used) |
2761 | | // Not that this does not enable HTTP/1.1 pipeling, which is not |
2762 | | // recommended for example by Google Cloud Storage. |
2763 | | // For HTTP/1.1, parallel connections work better since you can get |
2764 | | // results out of order. |
2765 | 0 | if (CPLTestBool(CPLGetConfigOption("GDAL_HTTP_MULTIPLEX", "YES"))) |
2766 | 0 | { |
2767 | 0 | curl_multi_setopt(hMultiHandle, CURLMOPT_PIPELINING, |
2768 | 0 | CURLPIPE_MULTIPLEX); |
2769 | 0 | } |
2770 | 0 | #endif |
2771 | |
|
2772 | 0 | struct CurlErrBuffer |
2773 | 0 | { |
2774 | 0 | std::array<char, CURL_ERROR_SIZE + 1> szCurlErrBuf; |
2775 | 0 | }; |
2776 | | |
2777 | | // Sort ranges by file offset so the merge loop below can coalesce |
2778 | | // adjacent ranges regardless of the order the caller passed them. |
2779 | | // The ppData buffer pointers travel with their offsets, so the |
2780 | | // distribute logic fills the correct caller buffers after reading. |
2781 | 0 | std::vector<int> anSortOrder(nRanges); |
2782 | 0 | std::iota(anSortOrder.begin(), anSortOrder.end(), 0); |
2783 | 0 | std::sort(anSortOrder.begin(), anSortOrder.end(), [panOffsets](int a, int b) |
2784 | 0 | { return panOffsets[a] < panOffsets[b]; }); |
2785 | |
|
2786 | 0 | std::vector<void *> apSortedData(nRanges); |
2787 | 0 | std::vector<vsi_l_offset> anSortedOffsets(nRanges); |
2788 | 0 | std::vector<size_t> anSortedSizes(nRanges); |
2789 | 0 | for (int i = 0; i < nRanges; ++i) |
2790 | 0 | { |
2791 | 0 | apSortedData[i] = ppData[anSortOrder[i]]; |
2792 | 0 | anSortedOffsets[i] = panOffsets[anSortOrder[i]]; |
2793 | 0 | anSortedSizes[i] = panSizes[anSortOrder[i]]; |
2794 | 0 | } |
2795 | |
|
2796 | 0 | const bool bMergeConsecutiveRanges = CPLTestBool( |
2797 | 0 | CPLGetConfigOption("GDAL_HTTP_MERGE_CONSECUTIVE_RANGES", "TRUE")); |
2798 | | |
2799 | | // Build list of merged requests upfront, each with its own retry context |
2800 | 0 | struct MergedRequest |
2801 | 0 | { |
2802 | 0 | int iFirstRange; |
2803 | 0 | int iLastRange; |
2804 | 0 | vsi_l_offset nStartOffset; |
2805 | 0 | size_t nSize; |
2806 | 0 | CPLHTTPRetryContext retryContext; |
2807 | 0 | bool bToRetry = true; // true initially to trigger first attempt |
2808 | |
|
2809 | 0 | MergedRequest(int first, int last, vsi_l_offset start, size_t size, |
2810 | 0 | const CPLHTTPRetryParameters ¶ms) |
2811 | 0 | : iFirstRange(first), iLastRange(last), nStartOffset(start), |
2812 | 0 | nSize(size), retryContext(params) |
2813 | 0 | { |
2814 | 0 | } |
2815 | 0 | }; |
2816 | |
|
2817 | 0 | std::vector<MergedRequest> asMergedRequests; |
2818 | 0 | for (int i = 0; i < nRanges;) |
2819 | 0 | { |
2820 | 0 | size_t nSize = 0; |
2821 | 0 | int iNext = i; |
2822 | | // Identify consecutive ranges |
2823 | 0 | while (bMergeConsecutiveRanges && iNext + 1 < nRanges && |
2824 | 0 | anSortedOffsets[iNext] + anSortedSizes[iNext] == |
2825 | 0 | anSortedOffsets[iNext + 1]) |
2826 | 0 | { |
2827 | 0 | nSize += anSortedSizes[iNext]; |
2828 | 0 | iNext++; |
2829 | 0 | } |
2830 | 0 | nSize += anSortedSizes[iNext]; |
2831 | |
|
2832 | 0 | if (nSize == 0) |
2833 | 0 | { |
2834 | 0 | i = iNext + 1; |
2835 | 0 | continue; |
2836 | 0 | } |
2837 | | |
2838 | 0 | asMergedRequests.emplace_back(i, iNext, anSortedOffsets[i], nSize, |
2839 | 0 | m_oRetryParameters); |
2840 | 0 | i = iNext + 1; |
2841 | 0 | } |
2842 | |
|
2843 | 0 | if (asMergedRequests.empty()) |
2844 | 0 | return 0; |
2845 | | |
2846 | 0 | int nRet = 0; |
2847 | 0 | size_t nTotalDownloaded = 0; |
2848 | | |
2849 | | // Retry loop: re-issue only failed requests that are retryable |
2850 | 0 | while (true) |
2851 | 0 | { |
2852 | 0 | const size_t nRequests = asMergedRequests.size(); |
2853 | 0 | std::vector<CURL *> aHandles(nRequests, nullptr); |
2854 | 0 | std::vector<WriteFuncStruct> asWriteFuncData(nRequests); |
2855 | 0 | std::vector<WriteFuncStruct> asWriteFuncHeaderData(nRequests); |
2856 | 0 | std::vector<char *> apszRanges(nRequests, nullptr); |
2857 | 0 | std::vector<struct curl_slist *> aHeaders(nRequests, nullptr); |
2858 | 0 | std::vector<CurlErrBuffer> asCurlErrors(nRequests); |
2859 | |
|
2860 | 0 | bool bAnyHandle = false; |
2861 | 0 | for (size_t iReq = 0; iReq < nRequests; iReq++) |
2862 | 0 | { |
2863 | 0 | if (!asMergedRequests[iReq].bToRetry) |
2864 | 0 | continue; |
2865 | 0 | asMergedRequests[iReq].bToRetry = false; |
2866 | |
|
2867 | 0 | CURL *hCurlHandle = curl_easy_init(); |
2868 | 0 | aHandles[iReq] = hCurlHandle; |
2869 | 0 | bAnyHandle = true; |
2870 | |
|
2871 | 0 | struct curl_slist *headers = VSICurlSetOptions( |
2872 | 0 | hCurlHandle, osURL.c_str(), aosHTTPOptions.List()); |
2873 | |
|
2874 | 0 | VSICURLInitWriteFuncStruct(&asWriteFuncData[iReq], this, pfnReadCbk, |
2875 | 0 | pReadCbkUserData); |
2876 | 0 | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, |
2877 | 0 | &asWriteFuncData[iReq]); |
2878 | 0 | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, |
2879 | 0 | VSICurlHandleWriteFunc); |
2880 | |
|
2881 | 0 | VSICURLInitWriteFuncStruct(&asWriteFuncHeaderData[iReq], nullptr, |
2882 | 0 | nullptr, nullptr); |
2883 | 0 | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA, |
2884 | 0 | &asWriteFuncHeaderData[iReq]); |
2885 | 0 | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION, |
2886 | 0 | VSICurlHandleWriteFunc); |
2887 | 0 | asWriteFuncHeaderData[iReq].bIsHTTP = STARTS_WITH(m_pszURL, "http"); |
2888 | 0 | asWriteFuncHeaderData[iReq].nStartOffset = |
2889 | 0 | asMergedRequests[iReq].nStartOffset; |
2890 | 0 | asWriteFuncHeaderData[iReq].nEndOffset = |
2891 | 0 | asMergedRequests[iReq].nStartOffset + |
2892 | 0 | asMergedRequests[iReq].nSize - 1; |
2893 | |
|
2894 | 0 | char rangeStr[512] = {}; |
2895 | 0 | snprintf(rangeStr, sizeof(rangeStr), |
2896 | 0 | CPL_FRMT_GUIB "-" CPL_FRMT_GUIB, |
2897 | 0 | asWriteFuncHeaderData[iReq].nStartOffset, |
2898 | 0 | asWriteFuncHeaderData[iReq].nEndOffset); |
2899 | |
|
2900 | | if constexpr (ENABLE_DEBUG) |
2901 | 0 | { |
2902 | 0 | CPLDebug(poFS->GetDebugKey(), "Downloading %s (%s)...", |
2903 | 0 | rangeStr, osURL.c_str()); |
2904 | 0 | } |
2905 | |
|
2906 | 0 | if (asWriteFuncHeaderData[iReq].bIsHTTP) |
2907 | 0 | { |
2908 | | // So it gets included in Azure signature |
2909 | 0 | char *pszRange = |
2910 | 0 | CPLStrdup(CPLSPrintf("Range: bytes=%s", rangeStr)); |
2911 | 0 | apszRanges[iReq] = pszRange; |
2912 | 0 | headers = curl_slist_append(headers, pszRange); |
2913 | 0 | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, nullptr); |
2914 | 0 | } |
2915 | 0 | else |
2916 | 0 | { |
2917 | 0 | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, |
2918 | 0 | rangeStr); |
2919 | 0 | } |
2920 | |
|
2921 | 0 | asCurlErrors[iReq].szCurlErrBuf[0] = '\0'; |
2922 | 0 | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, |
2923 | 0 | &asCurlErrors[iReq].szCurlErrBuf[0]); |
2924 | |
|
2925 | 0 | headers = GetCurlHeaders("GET", headers); |
2926 | 0 | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HTTPHEADER, |
2927 | 0 | headers); |
2928 | 0 | aHeaders[iReq] = headers; |
2929 | 0 | curl_multi_add_handle(hMultiHandle, hCurlHandle); |
2930 | 0 | } |
2931 | |
|
2932 | 0 | if (bAnyHandle) |
2933 | 0 | { |
2934 | 0 | VSICURLMultiPerform(hMultiHandle); |
2935 | 0 | } |
2936 | | |
2937 | | // Process results |
2938 | 0 | bool bRetry = false; |
2939 | 0 | double dfMaxDelay = 0.0; |
2940 | 0 | for (size_t iReq = 0; iReq < nRequests; iReq++) |
2941 | 0 | { |
2942 | 0 | if (!aHandles[iReq]) |
2943 | 0 | continue; |
2944 | | |
2945 | 0 | long response_code = 0; |
2946 | 0 | curl_easy_getinfo(aHandles[iReq], CURLINFO_HTTP_CODE, |
2947 | 0 | &response_code); |
2948 | |
|
2949 | 0 | if (ENABLE_DEBUG && asCurlErrors[iReq].szCurlErrBuf[0] != '\0') |
2950 | 0 | { |
2951 | 0 | char rangeStr[512] = {}; |
2952 | 0 | snprintf(rangeStr, sizeof(rangeStr), |
2953 | 0 | CPL_FRMT_GUIB "-" CPL_FRMT_GUIB, |
2954 | 0 | asWriteFuncHeaderData[iReq].nStartOffset, |
2955 | 0 | asWriteFuncHeaderData[iReq].nEndOffset); |
2956 | |
|
2957 | 0 | const char *pszErrorMsg = &asCurlErrors[iReq].szCurlErrBuf[0]; |
2958 | 0 | CPLDebug(poFS->GetDebugKey(), |
2959 | 0 | "ReadMultiRange(%s), %s: response_code=%d, msg=%s", |
2960 | 0 | osURL.c_str(), rangeStr, |
2961 | 0 | static_cast<int>(response_code), pszErrorMsg); |
2962 | 0 | } |
2963 | |
|
2964 | 0 | if ((response_code != 206 && response_code != 225) || |
2965 | 0 | asWriteFuncHeaderData[iReq].nEndOffset + 1 != |
2966 | 0 | asWriteFuncHeaderData[iReq].nStartOffset + |
2967 | 0 | asWriteFuncData[iReq].nSize) |
2968 | 0 | { |
2969 | 0 | char rangeStr[512] = {}; |
2970 | 0 | snprintf(rangeStr, sizeof(rangeStr), |
2971 | 0 | CPL_FRMT_GUIB "-" CPL_FRMT_GUIB, |
2972 | 0 | asWriteFuncHeaderData[iReq].nStartOffset, |
2973 | 0 | asWriteFuncHeaderData[iReq].nEndOffset); |
2974 | | |
2975 | | // Look if we should attempt a retry |
2976 | 0 | if (asMergedRequests[iReq].retryContext.CanRetry( |
2977 | 0 | static_cast<int>(response_code), |
2978 | 0 | asWriteFuncData[iReq].pBuffer, |
2979 | 0 | &asCurlErrors[iReq].szCurlErrBuf[0])) |
2980 | 0 | { |
2981 | 0 | CPLError( |
2982 | 0 | CE_Warning, CPLE_AppDefined, |
2983 | 0 | "HTTP error code for %s range %s: %d. " |
2984 | 0 | "Retrying again in %.1f secs", |
2985 | 0 | osURL.c_str(), rangeStr, |
2986 | 0 | static_cast<int>(response_code), |
2987 | 0 | asMergedRequests[iReq].retryContext.GetCurrentDelay()); |
2988 | 0 | dfMaxDelay = std::max( |
2989 | 0 | dfMaxDelay, |
2990 | 0 | asMergedRequests[iReq].retryContext.GetCurrentDelay()); |
2991 | 0 | asMergedRequests[iReq].bToRetry = true; |
2992 | 0 | bRetry = true; |
2993 | 0 | } |
2994 | 0 | else |
2995 | 0 | { |
2996 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
2997 | 0 | "Request for %s failed with response_code=%ld", |
2998 | 0 | rangeStr, response_code); |
2999 | 0 | nRet = -1; |
3000 | 0 | } |
3001 | 0 | } |
3002 | 0 | else if (nRet == 0) |
3003 | 0 | { |
3004 | 0 | size_t nOffset = 0; |
3005 | 0 | size_t nRemainingSize = asWriteFuncData[iReq].nSize; |
3006 | 0 | nTotalDownloaded += nRemainingSize; |
3007 | 0 | for (int iRange = asMergedRequests[iReq].iFirstRange; |
3008 | 0 | iRange <= asMergedRequests[iReq].iLastRange; iRange++) |
3009 | 0 | { |
3010 | 0 | if (nRemainingSize < anSortedSizes[iRange]) |
3011 | 0 | { |
3012 | 0 | nRet = -1; |
3013 | 0 | break; |
3014 | 0 | } |
3015 | | |
3016 | 0 | if (anSortedSizes[iRange] > 0) |
3017 | 0 | { |
3018 | 0 | memcpy(apSortedData[iRange], |
3019 | 0 | asWriteFuncData[iReq].pBuffer + nOffset, |
3020 | 0 | anSortedSizes[iRange]); |
3021 | 0 | } |
3022 | 0 | nOffset += anSortedSizes[iRange]; |
3023 | 0 | nRemainingSize -= anSortedSizes[iRange]; |
3024 | 0 | } |
3025 | 0 | } |
3026 | |
|
3027 | 0 | curl_multi_remove_handle(hMultiHandle, aHandles[iReq]); |
3028 | 0 | VSICURLResetHeaderAndWriterFunctions(aHandles[iReq]); |
3029 | 0 | curl_easy_cleanup(aHandles[iReq]); |
3030 | 0 | CPLFree(apszRanges[iReq]); |
3031 | 0 | CPLFree(asWriteFuncData[iReq].pBuffer); |
3032 | 0 | CPLFree(asWriteFuncHeaderData[iReq].pBuffer); |
3033 | 0 | if (aHeaders[iReq]) |
3034 | 0 | curl_slist_free_all(aHeaders[iReq]); |
3035 | 0 | } |
3036 | |
|
3037 | 0 | if (!bRetry || nRet != 0) |
3038 | 0 | break; |
3039 | 0 | CPLSleep(dfMaxDelay); |
3040 | 0 | } |
3041 | |
|
3042 | 0 | NetworkStatisticsLogger::LogGET(nTotalDownloaded); |
3043 | |
|
3044 | | if constexpr (ENABLE_DEBUG) |
3045 | 0 | { |
3046 | 0 | CPLDebug(poFS->GetDebugKey(), "Download completed"); |
3047 | 0 | } |
3048 | |
|
3049 | 0 | return nRet; |
3050 | 0 | } |
3051 | | |
3052 | | /************************************************************************/ |
3053 | | /* ReadMultiRangeSingleGet() */ |
3054 | | /************************************************************************/ |
3055 | | |
3056 | | // TODO: the interest of this mode is rather dubious now. We could probably |
3057 | | // remove it |
3058 | | int VSICurlHandle::ReadMultiRangeSingleGet(int const nRanges, |
3059 | | void **const ppData, |
3060 | | const vsi_l_offset *const panOffsets, |
3061 | | const size_t *const panSizes) |
3062 | 0 | { |
3063 | 0 | std::string osRanges; |
3064 | 0 | std::string osFirstRange; |
3065 | 0 | std::string osLastRange; |
3066 | 0 | int nMergedRanges = 0; |
3067 | 0 | vsi_l_offset nTotalReqSize = 0; |
3068 | 0 | for (int i = 0; i < nRanges; i++) |
3069 | 0 | { |
3070 | 0 | std::string osCurRange; |
3071 | 0 | if (i != 0) |
3072 | 0 | osRanges.append(","); |
3073 | 0 | osCurRange = CPLSPrintf(CPL_FRMT_GUIB "-", panOffsets[i]); |
3074 | 0 | while (i + 1 < nRanges && |
3075 | 0 | panOffsets[i] + panSizes[i] == panOffsets[i + 1]) |
3076 | 0 | { |
3077 | 0 | nTotalReqSize += panSizes[i]; |
3078 | 0 | i++; |
3079 | 0 | } |
3080 | 0 | nTotalReqSize += panSizes[i]; |
3081 | 0 | osCurRange.append( |
3082 | 0 | CPLSPrintf(CPL_FRMT_GUIB, panOffsets[i] + panSizes[i] - 1)); |
3083 | 0 | nMergedRanges++; |
3084 | |
|
3085 | 0 | osRanges += osCurRange; |
3086 | |
|
3087 | 0 | if (nMergedRanges == 1) |
3088 | 0 | osFirstRange = osCurRange; |
3089 | 0 | osLastRange = std::move(osCurRange); |
3090 | 0 | } |
3091 | |
|
3092 | 0 | const char *pszMaxRanges = |
3093 | 0 | CPLGetConfigOption("CPL_VSIL_CURL_MAX_RANGES", "250"); |
3094 | 0 | int nMaxRanges = atoi(pszMaxRanges); |
3095 | 0 | if (nMaxRanges <= 0) |
3096 | 0 | nMaxRanges = 250; |
3097 | 0 | if (nMergedRanges > nMaxRanges) |
3098 | 0 | { |
3099 | 0 | const int nHalf = nRanges / 2; |
3100 | 0 | const int nRet = ReadMultiRange(nHalf, ppData, panOffsets, panSizes); |
3101 | 0 | if (nRet != 0) |
3102 | 0 | return nRet; |
3103 | 0 | return ReadMultiRange(nRanges - nHalf, ppData + nHalf, |
3104 | 0 | panOffsets + nHalf, panSizes + nHalf); |
3105 | 0 | } |
3106 | | |
3107 | 0 | CURLM *hCurlMultiHandle = poFS->GetCurlMultiHandleFor(m_pszURL); |
3108 | 0 | CURL *hCurlHandle = curl_easy_init(); |
3109 | |
|
3110 | 0 | struct curl_slist *headers = |
3111 | 0 | VSICurlSetOptions(hCurlHandle, m_pszURL, m_aosHTTPOptions.List()); |
3112 | |
|
3113 | 0 | WriteFuncStruct sWriteFuncData; |
3114 | 0 | WriteFuncStruct sWriteFuncHeaderData; |
3115 | |
|
3116 | 0 | VSICURLInitWriteFuncStruct(&sWriteFuncData, this, pfnReadCbk, |
3117 | 0 | pReadCbkUserData); |
3118 | 0 | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData); |
3119 | 0 | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, |
3120 | 0 | VSICurlHandleWriteFunc); |
3121 | |
|
3122 | 0 | VSICURLInitWriteFuncStruct(&sWriteFuncHeaderData, nullptr, nullptr, |
3123 | 0 | nullptr); |
3124 | 0 | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA, |
3125 | 0 | &sWriteFuncHeaderData); |
3126 | 0 | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION, |
3127 | 0 | VSICurlHandleWriteFunc); |
3128 | 0 | sWriteFuncHeaderData.bIsHTTP = STARTS_WITH(m_pszURL, "http"); |
3129 | 0 | sWriteFuncHeaderData.bMultiRange = nMergedRanges > 1; |
3130 | 0 | if (nMergedRanges == 1) |
3131 | 0 | { |
3132 | 0 | sWriteFuncHeaderData.nStartOffset = panOffsets[0]; |
3133 | 0 | sWriteFuncHeaderData.nEndOffset = panOffsets[0] + nTotalReqSize - 1; |
3134 | 0 | } |
3135 | |
|
3136 | | if constexpr (ENABLE_DEBUG) |
3137 | 0 | { |
3138 | 0 | if (nMergedRanges == 1) |
3139 | 0 | CPLDebug(poFS->GetDebugKey(), "Downloading %s (%s)...", |
3140 | 0 | osRanges.c_str(), m_pszURL); |
3141 | 0 | else |
3142 | 0 | CPLDebug(poFS->GetDebugKey(), |
3143 | 0 | "Downloading %s, ..., %s (" CPL_FRMT_GUIB " bytes, %s)...", |
3144 | 0 | osFirstRange.c_str(), osLastRange.c_str(), |
3145 | 0 | static_cast<GUIntBig>(nTotalReqSize), m_pszURL); |
3146 | 0 | } |
3147 | |
|
3148 | 0 | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, osRanges.c_str()); |
3149 | |
|
3150 | 0 | char szCurlErrBuf[CURL_ERROR_SIZE + 1] = {}; |
3151 | 0 | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, szCurlErrBuf); |
3152 | |
|
3153 | 0 | headers = GetCurlHeaders("GET", headers); |
3154 | 0 | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HTTPHEADER, headers); |
3155 | |
|
3156 | 0 | VSICURLMultiPerform(hCurlMultiHandle, hCurlHandle); |
3157 | |
|
3158 | 0 | VSICURLResetHeaderAndWriterFunctions(hCurlHandle); |
3159 | |
|
3160 | 0 | curl_slist_free_all(headers); |
3161 | |
|
3162 | 0 | NetworkStatisticsLogger::LogGET(sWriteFuncData.nSize); |
3163 | |
|
3164 | 0 | if (sWriteFuncData.bInterrupted) |
3165 | 0 | { |
3166 | 0 | bInterrupted = true; |
3167 | |
|
3168 | 0 | CPLFree(sWriteFuncData.pBuffer); |
3169 | 0 | CPLFree(sWriteFuncHeaderData.pBuffer); |
3170 | 0 | curl_easy_cleanup(hCurlHandle); |
3171 | |
|
3172 | 0 | return -1; |
3173 | 0 | } |
3174 | | |
3175 | 0 | long response_code = 0; |
3176 | 0 | curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE, &response_code); |
3177 | |
|
3178 | 0 | if ((response_code != 200 && response_code != 206 && response_code != 225 && |
3179 | 0 | response_code != 226 && response_code != 426) || |
3180 | 0 | sWriteFuncHeaderData.bError) |
3181 | 0 | { |
3182 | 0 | if (response_code >= 400 && szCurlErrBuf[0] != '\0') |
3183 | 0 | { |
3184 | 0 | if (strcmp(szCurlErrBuf, "Couldn't use REST") == 0) |
3185 | 0 | CPLError( |
3186 | 0 | CE_Failure, CPLE_AppDefined, |
3187 | 0 | "%d: %s, Range downloading not supported by this server!", |
3188 | 0 | static_cast<int>(response_code), szCurlErrBuf); |
3189 | 0 | else |
3190 | 0 | CPLError(CE_Failure, CPLE_AppDefined, "%d: %s", |
3191 | 0 | static_cast<int>(response_code), szCurlErrBuf); |
3192 | 0 | } |
3193 | | /* |
3194 | | if( !bHasComputedFileSize && startOffset == 0 ) |
3195 | | { |
3196 | | cachedFileProp->bHasComputedFileSize = bHasComputedFileSize = true; |
3197 | | cachedFileProp->fileSize = fileSize = 0; |
3198 | | cachedFileProp->eExists = eExists = EXIST_NO; |
3199 | | } |
3200 | | */ |
3201 | 0 | CPLFree(sWriteFuncData.pBuffer); |
3202 | 0 | CPLFree(sWriteFuncHeaderData.pBuffer); |
3203 | 0 | curl_easy_cleanup(hCurlHandle); |
3204 | 0 | return -1; |
3205 | 0 | } |
3206 | | |
3207 | 0 | char *pBuffer = sWriteFuncData.pBuffer; |
3208 | 0 | size_t nSize = sWriteFuncData.nSize; |
3209 | | |
3210 | | // TODO(schwehr): Localize after removing gotos. |
3211 | 0 | int nRet = -1; |
3212 | 0 | char *pszBoundary; |
3213 | 0 | std::string osBoundary; |
3214 | 0 | char *pszNext = nullptr; |
3215 | 0 | int iRange = 0; |
3216 | 0 | int iPart = 0; |
3217 | 0 | char *pszEOL = nullptr; |
3218 | | |
3219 | | /* -------------------------------------------------------------------- */ |
3220 | | /* No multipart if a single range has been requested */ |
3221 | | /* -------------------------------------------------------------------- */ |
3222 | |
|
3223 | 0 | if (nMergedRanges == 1) |
3224 | 0 | { |
3225 | 0 | size_t nAccSize = 0; |
3226 | 0 | if (static_cast<vsi_l_offset>(nSize) < nTotalReqSize) |
3227 | 0 | goto end; |
3228 | | |
3229 | 0 | for (int i = 0; i < nRanges; i++) |
3230 | 0 | { |
3231 | 0 | memcpy(ppData[i], pBuffer + nAccSize, panSizes[i]); |
3232 | 0 | nAccSize += panSizes[i]; |
3233 | 0 | } |
3234 | |
|
3235 | 0 | nRet = 0; |
3236 | 0 | goto end; |
3237 | 0 | } |
3238 | | |
3239 | | /* -------------------------------------------------------------------- */ |
3240 | | /* Extract boundary name */ |
3241 | | /* -------------------------------------------------------------------- */ |
3242 | | |
3243 | 0 | pszBoundary = strstr(sWriteFuncHeaderData.pBuffer, |
3244 | 0 | "Content-Type: multipart/byteranges; boundary="); |
3245 | 0 | if (pszBoundary == nullptr) |
3246 | 0 | { |
3247 | 0 | CPLError(CE_Failure, CPLE_AppDefined, "Could not find '%s'", |
3248 | 0 | "Content-Type: multipart/byteranges; boundary="); |
3249 | 0 | goto end; |
3250 | 0 | } |
3251 | | |
3252 | 0 | pszBoundary += strlen("Content-Type: multipart/byteranges; boundary="); |
3253 | |
|
3254 | 0 | pszEOL = strchr(pszBoundary, '\r'); |
3255 | 0 | if (pszEOL) |
3256 | 0 | *pszEOL = 0; |
3257 | 0 | pszEOL = strchr(pszBoundary, '\n'); |
3258 | 0 | if (pszEOL) |
3259 | 0 | *pszEOL = 0; |
3260 | | |
3261 | | /* Remove optional double-quote character around boundary name */ |
3262 | 0 | if (pszBoundary[0] == '"') |
3263 | 0 | { |
3264 | 0 | pszBoundary++; |
3265 | 0 | char *pszLastDoubleQuote = strrchr(pszBoundary, '"'); |
3266 | 0 | if (pszLastDoubleQuote) |
3267 | 0 | *pszLastDoubleQuote = 0; |
3268 | 0 | } |
3269 | |
|
3270 | 0 | osBoundary = "--"; |
3271 | 0 | osBoundary += pszBoundary; |
3272 | | |
3273 | | /* -------------------------------------------------------------------- */ |
3274 | | /* Find the start of the first chunk. */ |
3275 | | /* -------------------------------------------------------------------- */ |
3276 | 0 | pszNext = strstr(pBuffer, osBoundary.c_str()); |
3277 | 0 | if (pszNext == nullptr) |
3278 | 0 | { |
3279 | 0 | CPLError(CE_Failure, CPLE_AppDefined, "No parts found."); |
3280 | 0 | goto end; |
3281 | 0 | } |
3282 | | |
3283 | 0 | pszNext += osBoundary.size(); |
3284 | 0 | while (*pszNext != '\n' && *pszNext != '\r' && *pszNext != '\0') |
3285 | 0 | pszNext++; |
3286 | 0 | if (*pszNext == '\r') |
3287 | 0 | pszNext++; |
3288 | 0 | if (*pszNext == '\n') |
3289 | 0 | pszNext++; |
3290 | | |
3291 | | /* -------------------------------------------------------------------- */ |
3292 | | /* Loop over parts... */ |
3293 | | /* -------------------------------------------------------------------- */ |
3294 | 0 | while (iPart < nRanges) |
3295 | 0 | { |
3296 | | /* -------------------------------------------------------------------- |
3297 | | */ |
3298 | | /* Collect headers. */ |
3299 | | /* -------------------------------------------------------------------- |
3300 | | */ |
3301 | 0 | bool bExpectedRange = false; |
3302 | |
|
3303 | 0 | while (*pszNext != '\n' && *pszNext != '\r' && *pszNext != '\0') |
3304 | 0 | { |
3305 | 0 | pszEOL = strstr(pszNext, "\n"); |
3306 | |
|
3307 | 0 | if (pszEOL == nullptr) |
3308 | 0 | { |
3309 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
3310 | 0 | "Error while parsing multipart content (at line %d)", |
3311 | 0 | __LINE__); |
3312 | 0 | goto end; |
3313 | 0 | } |
3314 | | |
3315 | 0 | *pszEOL = '\0'; |
3316 | 0 | bool bRestoreAntislashR = false; |
3317 | 0 | if (pszEOL - pszNext > 1 && pszEOL[-1] == '\r') |
3318 | 0 | { |
3319 | 0 | bRestoreAntislashR = true; |
3320 | 0 | pszEOL[-1] = '\0'; |
3321 | 0 | } |
3322 | |
|
3323 | 0 | if (STARTS_WITH_CI(pszNext, "Content-Range: bytes ")) |
3324 | 0 | { |
3325 | 0 | bExpectedRange = true; /* FIXME */ |
3326 | 0 | } |
3327 | |
|
3328 | 0 | if (bRestoreAntislashR) |
3329 | 0 | pszEOL[-1] = '\r'; |
3330 | 0 | *pszEOL = '\n'; |
3331 | |
|
3332 | 0 | pszNext = pszEOL + 1; |
3333 | 0 | } |
3334 | | |
3335 | 0 | if (!bExpectedRange) |
3336 | 0 | { |
3337 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
3338 | 0 | "Error while parsing multipart content (at line %d)", |
3339 | 0 | __LINE__); |
3340 | 0 | goto end; |
3341 | 0 | } |
3342 | | |
3343 | 0 | if (*pszNext == '\r') |
3344 | 0 | pszNext++; |
3345 | 0 | if (*pszNext == '\n') |
3346 | 0 | pszNext++; |
3347 | | |
3348 | | /* -------------------------------------------------------------------- |
3349 | | */ |
3350 | | /* Work out the data block size. */ |
3351 | | /* -------------------------------------------------------------------- |
3352 | | */ |
3353 | 0 | size_t nBytesAvail = nSize - (pszNext - pBuffer); |
3354 | |
|
3355 | 0 | while (true) |
3356 | 0 | { |
3357 | 0 | if (nBytesAvail < panSizes[iRange]) |
3358 | 0 | { |
3359 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
3360 | 0 | "Error while parsing multipart content (at line %d)", |
3361 | 0 | __LINE__); |
3362 | 0 | goto end; |
3363 | 0 | } |
3364 | | |
3365 | 0 | memcpy(ppData[iRange], pszNext, panSizes[iRange]); |
3366 | 0 | pszNext += panSizes[iRange]; |
3367 | 0 | nBytesAvail -= panSizes[iRange]; |
3368 | 0 | if (iRange + 1 < nRanges && |
3369 | 0 | panOffsets[iRange] + panSizes[iRange] == panOffsets[iRange + 1]) |
3370 | 0 | { |
3371 | 0 | iRange++; |
3372 | 0 | } |
3373 | 0 | else |
3374 | 0 | { |
3375 | 0 | break; |
3376 | 0 | } |
3377 | 0 | } |
3378 | | |
3379 | 0 | iPart++; |
3380 | 0 | iRange++; |
3381 | |
|
3382 | 0 | while (nBytesAvail > 0 && |
3383 | 0 | (*pszNext != '-' || |
3384 | 0 | strncmp(pszNext, osBoundary.c_str(), osBoundary.size()) != 0)) |
3385 | 0 | { |
3386 | 0 | pszNext++; |
3387 | 0 | nBytesAvail--; |
3388 | 0 | } |
3389 | |
|
3390 | 0 | if (nBytesAvail == 0) |
3391 | 0 | { |
3392 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
3393 | 0 | "Error while parsing multipart content (at line %d)", |
3394 | 0 | __LINE__); |
3395 | 0 | goto end; |
3396 | 0 | } |
3397 | | |
3398 | 0 | pszNext += osBoundary.size(); |
3399 | 0 | if (STARTS_WITH(pszNext, "--")) |
3400 | 0 | { |
3401 | | // End of multipart. |
3402 | 0 | break; |
3403 | 0 | } |
3404 | | |
3405 | 0 | if (*pszNext == '\r') |
3406 | 0 | pszNext++; |
3407 | 0 | if (*pszNext == '\n') |
3408 | 0 | pszNext++; |
3409 | 0 | else |
3410 | 0 | { |
3411 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
3412 | 0 | "Error while parsing multipart content (at line %d)", |
3413 | 0 | __LINE__); |
3414 | 0 | goto end; |
3415 | 0 | } |
3416 | 0 | } |
3417 | | |
3418 | 0 | if (iPart == nMergedRanges) |
3419 | 0 | nRet = 0; |
3420 | 0 | else |
3421 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
3422 | 0 | "Got only %d parts, where %d were expected", iPart, |
3423 | 0 | nMergedRanges); |
3424 | |
|
3425 | 0 | end: |
3426 | 0 | CPLFree(sWriteFuncData.pBuffer); |
3427 | 0 | CPLFree(sWriteFuncHeaderData.pBuffer); |
3428 | 0 | curl_easy_cleanup(hCurlHandle); |
3429 | |
|
3430 | 0 | return nRet; |
3431 | 0 | } |
3432 | | |
3433 | | /************************************************************************/ |
3434 | | /* PRead() */ |
3435 | | /************************************************************************/ |
3436 | | |
3437 | | size_t VSICurlHandle::PRead(void *pBuffer, size_t nSize, |
3438 | | vsi_l_offset nOffset) const |
3439 | 0 | { |
3440 | | // Try to use AdviseRead ranges fetched asynchronously |
3441 | 0 | if (!m_aoAdviseReadRanges.empty()) |
3442 | 0 | { |
3443 | 0 | for (auto &poRange : m_aoAdviseReadRanges) |
3444 | 0 | { |
3445 | 0 | if (nOffset >= poRange->nStartOffset && |
3446 | 0 | nOffset + nSize <= poRange->nStartOffset + poRange->nSize) |
3447 | 0 | { |
3448 | 0 | { |
3449 | 0 | std::unique_lock<std::mutex> oLock(poRange->oMutex); |
3450 | | // coverity[missing_lock:FALSE] |
3451 | 0 | while (!poRange->bDone) |
3452 | 0 | { |
3453 | 0 | poRange->oCV.wait(oLock); |
3454 | 0 | } |
3455 | 0 | } |
3456 | 0 | if (poRange->abyData.empty()) |
3457 | 0 | return 0; |
3458 | | |
3459 | 0 | auto nEndOffset = |
3460 | 0 | poRange->nStartOffset + poRange->abyData.size(); |
3461 | 0 | if (nOffset >= nEndOffset) |
3462 | 0 | return 0; |
3463 | 0 | const size_t nToCopy = static_cast<size_t>( |
3464 | 0 | std::min<vsi_l_offset>(nSize, nEndOffset - nOffset)); |
3465 | 0 | memcpy(pBuffer, |
3466 | 0 | poRange->abyData.data() + |
3467 | 0 | static_cast<size_t>(nOffset - poRange->nStartOffset), |
3468 | 0 | nToCopy); |
3469 | 0 | return nToCopy; |
3470 | 0 | } |
3471 | 0 | } |
3472 | 0 | } |
3473 | | |
3474 | | // poFS has a global mutex |
3475 | 0 | poFS->GetCachedFileProp(m_pszURL, oFileProp); |
3476 | 0 | if (oFileProp.eExists == EXIST_NO) |
3477 | 0 | return static_cast<size_t>(-1); |
3478 | | |
3479 | 0 | NetworkStatisticsFileSystem oContextFS(poFS->GetFSPrefix().c_str()); |
3480 | 0 | NetworkStatisticsFile oContextFile(m_osFilename.c_str()); |
3481 | 0 | NetworkStatisticsAction oContextAction("PRead"); |
3482 | |
|
3483 | 0 | CPLStringList aosHTTPOptions(m_aosHTTPOptions); |
3484 | 0 | std::string osURL; |
3485 | 0 | { |
3486 | 0 | std::lock_guard<std::mutex> oLock(m_oMutex); |
3487 | 0 | UpdateQueryString(); |
3488 | 0 | bool bHasExpired; |
3489 | 0 | osURL = GetRedirectURLIfValid(bHasExpired, aosHTTPOptions); |
3490 | 0 | } |
3491 | |
|
3492 | 0 | CURL *hCurlHandle = curl_easy_init(); |
3493 | |
|
3494 | 0 | struct curl_slist *headers = |
3495 | 0 | VSICurlSetOptions(hCurlHandle, osURL.c_str(), aosHTTPOptions.List()); |
3496 | |
|
3497 | 0 | WriteFuncStruct sWriteFuncData; |
3498 | 0 | VSICURLInitWriteFuncStruct(&sWriteFuncData, nullptr, nullptr, nullptr); |
3499 | 0 | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData); |
3500 | 0 | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, |
3501 | 0 | VSICurlHandleWriteFunc); |
3502 | |
|
3503 | 0 | WriteFuncStruct sWriteFuncHeaderData; |
3504 | 0 | VSICURLInitWriteFuncStruct(&sWriteFuncHeaderData, nullptr, nullptr, |
3505 | 0 | nullptr); |
3506 | 0 | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA, |
3507 | 0 | &sWriteFuncHeaderData); |
3508 | 0 | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION, |
3509 | 0 | VSICurlHandleWriteFunc); |
3510 | 0 | sWriteFuncHeaderData.bIsHTTP = STARTS_WITH(m_pszURL, "http"); |
3511 | 0 | sWriteFuncHeaderData.nStartOffset = nOffset; |
3512 | |
|
3513 | 0 | sWriteFuncHeaderData.nEndOffset = nOffset + nSize - 1; |
3514 | |
|
3515 | 0 | char rangeStr[512] = {}; |
3516 | 0 | snprintf(rangeStr, sizeof(rangeStr), CPL_FRMT_GUIB "-" CPL_FRMT_GUIB, |
3517 | 0 | sWriteFuncHeaderData.nStartOffset, |
3518 | 0 | sWriteFuncHeaderData.nEndOffset); |
3519 | |
|
3520 | | if constexpr (ENABLE_DEBUG) |
3521 | 0 | { |
3522 | 0 | CPLDebug(poFS->GetDebugKey(), "Downloading %s (%s)...", rangeStr, |
3523 | 0 | osURL.c_str()); |
3524 | 0 | } |
3525 | |
|
3526 | 0 | std::string osHeaderRange; |
3527 | 0 | if (sWriteFuncHeaderData.bIsHTTP) |
3528 | 0 | { |
3529 | 0 | osHeaderRange = CPLSPrintf("Range: bytes=%s", rangeStr); |
3530 | | // So it gets included in Azure signature |
3531 | 0 | headers = curl_slist_append(headers, osHeaderRange.data()); |
3532 | 0 | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, nullptr); |
3533 | 0 | } |
3534 | 0 | else |
3535 | 0 | { |
3536 | 0 | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, rangeStr); |
3537 | 0 | } |
3538 | |
|
3539 | 0 | std::array<char, CURL_ERROR_SIZE + 1> szCurlErrBuf; |
3540 | 0 | szCurlErrBuf[0] = '\0'; |
3541 | 0 | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, |
3542 | 0 | &szCurlErrBuf[0]); |
3543 | |
|
3544 | 0 | { |
3545 | 0 | std::lock_guard<std::mutex> oLock(m_oMutex); |
3546 | 0 | headers = |
3547 | 0 | const_cast<VSICurlHandle *>(this)->GetCurlHeaders("GET", headers); |
3548 | 0 | } |
3549 | 0 | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HTTPHEADER, headers); |
3550 | |
|
3551 | 0 | CURLM *hMultiHandle = poFS->GetCurlMultiHandleFor(osURL); |
3552 | 0 | VSICURLMultiPerform(hMultiHandle, hCurlHandle, &m_bInterrupt); |
3553 | |
|
3554 | 0 | { |
3555 | 0 | std::lock_guard<std::mutex> oLock(m_oMutex); |
3556 | 0 | const_cast<VSICurlHandle *>(this)->UpdateRedirectInfo( |
3557 | 0 | hCurlHandle, sWriteFuncHeaderData); |
3558 | 0 | } |
3559 | |
|
3560 | 0 | long response_code = 0; |
3561 | 0 | curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE, &response_code); |
3562 | |
|
3563 | 0 | if (ENABLE_DEBUG && szCurlErrBuf[0] != '\0') |
3564 | 0 | { |
3565 | 0 | const char *pszErrorMsg = &szCurlErrBuf[0]; |
3566 | 0 | CPLDebug(poFS->GetDebugKey(), "PRead(%s), %s: response_code=%d, msg=%s", |
3567 | 0 | osURL.c_str(), rangeStr, static_cast<int>(response_code), |
3568 | 0 | pszErrorMsg); |
3569 | 0 | } |
3570 | |
|
3571 | 0 | size_t nRet; |
3572 | 0 | if ((response_code != 206 && response_code != 225) || |
3573 | 0 | sWriteFuncData.nSize == 0) |
3574 | 0 | { |
3575 | 0 | if (!m_bInterrupt) |
3576 | 0 | { |
3577 | 0 | CPLDebug(poFS->GetDebugKey(), |
3578 | 0 | "Request for %s failed with response_code=%ld", rangeStr, |
3579 | 0 | response_code); |
3580 | 0 | } |
3581 | 0 | nRet = static_cast<size_t>(-1); |
3582 | 0 | } |
3583 | 0 | else |
3584 | 0 | { |
3585 | 0 | nRet = std::min(sWriteFuncData.nSize, nSize); |
3586 | 0 | if (nRet > 0) |
3587 | 0 | memcpy(pBuffer, sWriteFuncData.pBuffer, nRet); |
3588 | 0 | } |
3589 | |
|
3590 | 0 | VSICURLResetHeaderAndWriterFunctions(hCurlHandle); |
3591 | 0 | curl_easy_cleanup(hCurlHandle); |
3592 | 0 | CPLFree(sWriteFuncData.pBuffer); |
3593 | 0 | CPLFree(sWriteFuncHeaderData.pBuffer); |
3594 | 0 | curl_slist_free_all(headers); |
3595 | |
|
3596 | 0 | NetworkStatisticsLogger::LogGET(sWriteFuncData.nSize); |
3597 | |
|
3598 | | #if 0 |
3599 | | if( ENABLE_DEBUG ) |
3600 | | CPLDebug(poFS->GetDebugKey(), "Download completed"); |
3601 | | #endif |
3602 | |
|
3603 | 0 | return nRet; |
3604 | 0 | } |
3605 | | |
3606 | | /************************************************************************/ |
3607 | | /* GetAdviseReadTotalBytesLimit() */ |
3608 | | /************************************************************************/ |
3609 | | |
3610 | | size_t VSICurlHandle::GetAdviseReadTotalBytesLimit() const |
3611 | 0 | { |
3612 | 0 | return static_cast<size_t>(std::min<unsigned long long>( |
3613 | 0 | std::numeric_limits<size_t>::max(), |
3614 | | // 100 MB |
3615 | 0 | std::strtoull( |
3616 | 0 | CPLGetConfigOption("CPL_VSIL_CURL_ADVISE_READ_TOTAL_BYTES_LIMIT", |
3617 | 0 | "104857600"), |
3618 | 0 | nullptr, 10))); |
3619 | 0 | } |
3620 | | |
3621 | | /************************************************************************/ |
3622 | | /* VSICURLMultiInit() */ |
3623 | | /************************************************************************/ |
3624 | | |
3625 | | static CURLM *VSICURLMultiInit() |
3626 | 45 | { |
3627 | 45 | CURLM *hCurlMultiHandle = curl_multi_init(); |
3628 | | |
3629 | 45 | if (const char *pszMAXCONNECTS = |
3630 | 45 | CPLGetConfigOption("GDAL_HTTP_MAX_CACHED_CONNECTIONS", nullptr)) |
3631 | 0 | { |
3632 | 0 | curl_multi_setopt(hCurlMultiHandle, CURLMOPT_MAXCONNECTS, |
3633 | 0 | atoi(pszMAXCONNECTS)); |
3634 | 0 | } |
3635 | | |
3636 | 45 | if (const char *pszMAX_TOTAL_CONNECTIONS = |
3637 | 45 | CPLGetConfigOption("GDAL_HTTP_MAX_TOTAL_CONNECTIONS", nullptr)) |
3638 | 0 | { |
3639 | 0 | curl_multi_setopt(hCurlMultiHandle, CURLMOPT_MAX_TOTAL_CONNECTIONS, |
3640 | 0 | atoi(pszMAX_TOTAL_CONNECTIONS)); |
3641 | 0 | } |
3642 | | |
3643 | 45 | return hCurlMultiHandle; |
3644 | 45 | } |
3645 | | |
3646 | | /************************************************************************/ |
3647 | | /* AdviseRead() */ |
3648 | | /************************************************************************/ |
3649 | | |
3650 | | void VSICurlHandle::AdviseRead(int nRanges, const vsi_l_offset *panOffsets, |
3651 | | const size_t *panSizes) |
3652 | 0 | { |
3653 | 0 | if (!CPLTestBool( |
3654 | 0 | CPLGetConfigOption("GDAL_HTTP_ENABLE_ADVISE_READ", "TRUE"))) |
3655 | 0 | return; |
3656 | | |
3657 | 0 | if (m_oThreadAdviseRead.joinable()) |
3658 | 0 | { |
3659 | 0 | m_oThreadAdviseRead.join(); |
3660 | 0 | } |
3661 | | |
3662 | | // Give up if we need to allocate too much memory |
3663 | 0 | vsi_l_offset nMaxSize = 0; |
3664 | 0 | const size_t nLimit = GetAdviseReadTotalBytesLimit(); |
3665 | 0 | for (int i = 0; i < nRanges; ++i) |
3666 | 0 | { |
3667 | 0 | if (panSizes[i] > nLimit - nMaxSize) |
3668 | 0 | { |
3669 | 0 | CPLDebug(poFS->GetDebugKey(), |
3670 | 0 | "Trying to request too many bytes in AdviseRead()"); |
3671 | 0 | return; |
3672 | 0 | } |
3673 | 0 | nMaxSize += panSizes[i]; |
3674 | 0 | } |
3675 | | |
3676 | 0 | UpdateQueryString(); |
3677 | |
|
3678 | 0 | bool bHasExpired = false; |
3679 | 0 | CPLStringList aosHTTPOptions(m_aosHTTPOptions); |
3680 | 0 | const std::string l_osURL( |
3681 | 0 | GetRedirectURLIfValid(bHasExpired, aosHTTPOptions)); |
3682 | 0 | if (bHasExpired) |
3683 | 0 | { |
3684 | 0 | return; |
3685 | 0 | } |
3686 | | |
3687 | 0 | const bool bMergeConsecutiveRanges = CPLTestBool( |
3688 | 0 | CPLGetConfigOption("GDAL_HTTP_MERGE_CONSECUTIVE_RANGES", "TRUE")); |
3689 | |
|
3690 | 0 | try |
3691 | 0 | { |
3692 | 0 | m_aoAdviseReadRanges.clear(); |
3693 | 0 | m_aoAdviseReadRanges.reserve(nRanges); |
3694 | 0 | for (int i = 0; i < nRanges;) |
3695 | 0 | { |
3696 | 0 | int iNext = i; |
3697 | | // Identify consecutive ranges |
3698 | 0 | constexpr size_t SIZE_COG_MARKERS = 2 * sizeof(uint32_t); |
3699 | 0 | auto nEndOffset = panOffsets[iNext] + panSizes[iNext]; |
3700 | 0 | while (bMergeConsecutiveRanges && iNext + 1 < nRanges && |
3701 | 0 | panOffsets[iNext + 1] > panOffsets[iNext] && |
3702 | 0 | panOffsets[iNext] + panSizes[iNext] + SIZE_COG_MARKERS >= |
3703 | 0 | panOffsets[iNext + 1] && |
3704 | 0 | panOffsets[iNext + 1] + panSizes[iNext + 1] > nEndOffset) |
3705 | 0 | { |
3706 | 0 | iNext++; |
3707 | 0 | nEndOffset = panOffsets[iNext] + panSizes[iNext]; |
3708 | 0 | } |
3709 | 0 | CPLAssert(panOffsets[i] <= nEndOffset); |
3710 | 0 | const size_t nSize = |
3711 | 0 | static_cast<size_t>(nEndOffset - panOffsets[i]); |
3712 | |
|
3713 | 0 | if (nSize == 0) |
3714 | 0 | { |
3715 | 0 | i = iNext + 1; |
3716 | 0 | continue; |
3717 | 0 | } |
3718 | | |
3719 | 0 | auto newAdviseReadRange = |
3720 | 0 | std::make_unique<AdviseReadRange>(m_oRetryParameters); |
3721 | 0 | newAdviseReadRange->nStartOffset = panOffsets[i]; |
3722 | 0 | newAdviseReadRange->nSize = nSize; |
3723 | 0 | newAdviseReadRange->abyData.resize(nSize); |
3724 | 0 | m_aoAdviseReadRanges.push_back(std::move(newAdviseReadRange)); |
3725 | |
|
3726 | 0 | i = iNext + 1; |
3727 | 0 | } |
3728 | 0 | } |
3729 | 0 | catch (const std::exception &) |
3730 | 0 | { |
3731 | 0 | CPLError(CE_Failure, CPLE_OutOfMemory, |
3732 | 0 | "Out of memory in VSICurlHandle::AdviseRead()"); |
3733 | 0 | m_aoAdviseReadRanges.clear(); |
3734 | 0 | } |
3735 | |
|
3736 | 0 | if (m_aoAdviseReadRanges.empty()) |
3737 | 0 | return; |
3738 | | |
3739 | | #ifdef DEBUG |
3740 | | CPLDebug(poFS->GetDebugKey(), "AdviseRead(): fetching %u ranges", |
3741 | | static_cast<unsigned>(m_aoAdviseReadRanges.size())); |
3742 | | #endif |
3743 | | |
3744 | 0 | const auto task = [this, aosHTTPOptions = std::move(aosHTTPOptions)]( |
3745 | 0 | const std::string &osURL) |
3746 | 0 | { |
3747 | 0 | if (!m_hCurlMultiHandleForAdviseRead) |
3748 | 0 | m_hCurlMultiHandleForAdviseRead = VSICURLMultiInit(); |
3749 | |
|
3750 | 0 | NetworkStatisticsFileSystem oContextFS(poFS->GetFSPrefix().c_str()); |
3751 | 0 | NetworkStatisticsFile oContextFile(m_osFilename.c_str()); |
3752 | 0 | NetworkStatisticsAction oContextAction("AdviseRead"); |
3753 | |
|
3754 | 0 | #ifdef CURLPIPE_MULTIPLEX |
3755 | | // Enable HTTP/2 multiplexing (ignored if an older version of HTTP is |
3756 | | // used) |
3757 | | // Not that this does not enable HTTP/1.1 pipeling, which is not |
3758 | | // recommended for example by Google Cloud Storage. |
3759 | | // For HTTP/1.1, parallel connections work better since you can get |
3760 | | // results out of order. |
3761 | 0 | if (CPLTestBool(CPLGetConfigOption("GDAL_HTTP_MULTIPLEX", "YES"))) |
3762 | 0 | { |
3763 | 0 | curl_multi_setopt(m_hCurlMultiHandleForAdviseRead, |
3764 | 0 | CURLMOPT_PIPELINING, CURLPIPE_MULTIPLEX); |
3765 | 0 | } |
3766 | 0 | #endif |
3767 | |
|
3768 | 0 | size_t nTotalDownloaded = 0; |
3769 | |
|
3770 | 0 | while (true) |
3771 | 0 | { |
3772 | |
|
3773 | 0 | std::vector<CURL *> aHandles; |
3774 | 0 | std::vector<WriteFuncStruct> asWriteFuncData( |
3775 | 0 | m_aoAdviseReadRanges.size()); |
3776 | 0 | std::vector<WriteFuncStruct> asWriteFuncHeaderData( |
3777 | 0 | m_aoAdviseReadRanges.size()); |
3778 | 0 | std::vector<char *> apszRanges; |
3779 | 0 | std::vector<struct curl_slist *> aHeaders; |
3780 | |
|
3781 | 0 | struct CurlErrBuffer |
3782 | 0 | { |
3783 | 0 | std::array<char, CURL_ERROR_SIZE + 1> szCurlErrBuf; |
3784 | 0 | }; |
3785 | 0 | std::vector<CurlErrBuffer> asCurlErrors( |
3786 | 0 | m_aoAdviseReadRanges.size()); |
3787 | |
|
3788 | 0 | std::map<CURL *, size_t> oMapHandleToIdx; |
3789 | 0 | for (size_t i = 0; i < m_aoAdviseReadRanges.size(); ++i) |
3790 | 0 | { |
3791 | 0 | if (!m_aoAdviseReadRanges[i]->bToRetry) |
3792 | 0 | { |
3793 | 0 | aHandles.push_back(nullptr); |
3794 | 0 | apszRanges.push_back(nullptr); |
3795 | 0 | aHeaders.push_back(nullptr); |
3796 | 0 | continue; |
3797 | 0 | } |
3798 | 0 | m_aoAdviseReadRanges[i]->bToRetry = false; |
3799 | |
|
3800 | 0 | CURL *hCurlHandle = curl_easy_init(); |
3801 | 0 | oMapHandleToIdx[hCurlHandle] = i; |
3802 | 0 | aHandles.push_back(hCurlHandle); |
3803 | | |
3804 | | // As the multi-range request is likely not the first one, we don't |
3805 | | // need to wait as we already know if pipelining is possible |
3806 | | // unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_PIPEWAIT, 1); |
3807 | |
|
3808 | 0 | struct curl_slist *headers = VSICurlSetOptions( |
3809 | 0 | hCurlHandle, osURL.c_str(), aosHTTPOptions.List()); |
3810 | |
|
3811 | 0 | VSICURLInitWriteFuncStruct(&asWriteFuncData[i], this, |
3812 | 0 | pfnReadCbk, pReadCbkUserData); |
3813 | 0 | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, |
3814 | 0 | &asWriteFuncData[i]); |
3815 | 0 | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, |
3816 | 0 | VSICurlHandleWriteFunc); |
3817 | |
|
3818 | 0 | VSICURLInitWriteFuncStruct(&asWriteFuncHeaderData[i], nullptr, |
3819 | 0 | nullptr, nullptr); |
3820 | 0 | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA, |
3821 | 0 | &asWriteFuncHeaderData[i]); |
3822 | 0 | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION, |
3823 | 0 | VSICurlHandleWriteFunc); |
3824 | 0 | asWriteFuncHeaderData[i].bIsHTTP = |
3825 | 0 | STARTS_WITH(m_pszURL, "http"); |
3826 | 0 | asWriteFuncHeaderData[i].nStartOffset = |
3827 | 0 | m_aoAdviseReadRanges[i]->nStartOffset; |
3828 | |
|
3829 | 0 | asWriteFuncHeaderData[i].nEndOffset = |
3830 | 0 | m_aoAdviseReadRanges[i]->nStartOffset + |
3831 | 0 | m_aoAdviseReadRanges[i]->nSize - 1; |
3832 | |
|
3833 | 0 | char rangeStr[512] = {}; |
3834 | 0 | snprintf(rangeStr, sizeof(rangeStr), |
3835 | 0 | CPL_FRMT_GUIB "-" CPL_FRMT_GUIB, |
3836 | 0 | asWriteFuncHeaderData[i].nStartOffset, |
3837 | 0 | asWriteFuncHeaderData[i].nEndOffset); |
3838 | |
|
3839 | | if constexpr (ENABLE_DEBUG) |
3840 | 0 | { |
3841 | 0 | CPLDebug(poFS->GetDebugKey(), "Downloading %s (%s)...", |
3842 | 0 | rangeStr, osURL.c_str()); |
3843 | 0 | } |
3844 | |
|
3845 | 0 | if (asWriteFuncHeaderData[i].bIsHTTP) |
3846 | 0 | { |
3847 | 0 | std::string osHeaderRange( |
3848 | 0 | CPLSPrintf("Range: bytes=%s", rangeStr)); |
3849 | | // So it gets included in Azure signature |
3850 | 0 | char *pszRange = CPLStrdup(osHeaderRange.c_str()); |
3851 | 0 | apszRanges.push_back(pszRange); |
3852 | 0 | headers = curl_slist_append(headers, pszRange); |
3853 | 0 | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, |
3854 | 0 | nullptr); |
3855 | 0 | } |
3856 | 0 | else |
3857 | 0 | { |
3858 | 0 | apszRanges.push_back(nullptr); |
3859 | 0 | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, |
3860 | 0 | rangeStr); |
3861 | 0 | } |
3862 | |
|
3863 | 0 | asCurlErrors[i].szCurlErrBuf[0] = '\0'; |
3864 | 0 | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, |
3865 | 0 | &asCurlErrors[i].szCurlErrBuf[0]); |
3866 | |
|
3867 | 0 | headers = GetCurlHeaders("GET", headers); |
3868 | 0 | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HTTPHEADER, |
3869 | 0 | headers); |
3870 | 0 | aHeaders.push_back(headers); |
3871 | 0 | curl_multi_add_handle(m_hCurlMultiHandleForAdviseRead, |
3872 | 0 | hCurlHandle); |
3873 | 0 | } |
3874 | |
|
3875 | 0 | const auto DealWithRequest = [this, &osURL, &nTotalDownloaded, |
3876 | 0 | &oMapHandleToIdx, &asCurlErrors, |
3877 | 0 | &asWriteFuncHeaderData, |
3878 | 0 | &asWriteFuncData](CURL *hCurlHandle) |
3879 | 0 | { |
3880 | 0 | auto oIter = oMapHandleToIdx.find(hCurlHandle); |
3881 | 0 | CPLAssert(oIter != oMapHandleToIdx.end()); |
3882 | 0 | const auto iReq = oIter->second; |
3883 | |
|
3884 | 0 | long response_code = 0; |
3885 | 0 | curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE, |
3886 | 0 | &response_code); |
3887 | |
|
3888 | 0 | if (ENABLE_DEBUG && asCurlErrors[iReq].szCurlErrBuf[0] != '\0') |
3889 | 0 | { |
3890 | 0 | char rangeStr[512] = {}; |
3891 | 0 | snprintf(rangeStr, sizeof(rangeStr), |
3892 | 0 | CPL_FRMT_GUIB "-" CPL_FRMT_GUIB, |
3893 | 0 | asWriteFuncHeaderData[iReq].nStartOffset, |
3894 | 0 | asWriteFuncHeaderData[iReq].nEndOffset); |
3895 | |
|
3896 | 0 | const char *pszErrorMsg = |
3897 | 0 | &asCurlErrors[iReq].szCurlErrBuf[0]; |
3898 | 0 | CPLDebug(poFS->GetDebugKey(), |
3899 | 0 | "ReadMultiRange(%s), %s: response_code=%d, msg=%s", |
3900 | 0 | osURL.c_str(), rangeStr, |
3901 | 0 | static_cast<int>(response_code), pszErrorMsg); |
3902 | 0 | } |
3903 | |
|
3904 | 0 | bool bToRetry = false; |
3905 | 0 | if ((response_code != 206 && response_code != 225) || |
3906 | 0 | asWriteFuncHeaderData[iReq].nEndOffset + 1 != |
3907 | 0 | asWriteFuncHeaderData[iReq].nStartOffset + |
3908 | 0 | asWriteFuncData[iReq].nSize) |
3909 | 0 | { |
3910 | 0 | char rangeStr[512] = {}; |
3911 | 0 | snprintf(rangeStr, sizeof(rangeStr), |
3912 | 0 | CPL_FRMT_GUIB "-" CPL_FRMT_GUIB, |
3913 | 0 | asWriteFuncHeaderData[iReq].nStartOffset, |
3914 | 0 | asWriteFuncHeaderData[iReq].nEndOffset); |
3915 | | |
3916 | | // Look if we should attempt a retry |
3917 | 0 | if (m_aoAdviseReadRanges[iReq]->retryContext.CanRetry( |
3918 | 0 | static_cast<int>(response_code), |
3919 | 0 | asWriteFuncData[iReq].pBuffer, |
3920 | 0 | &asCurlErrors[iReq].szCurlErrBuf[0])) |
3921 | 0 | { |
3922 | 0 | CPLError(CE_Warning, CPLE_AppDefined, |
3923 | 0 | "HTTP error code for %s range %s: %d. " |
3924 | 0 | "Retrying again in %.1f secs", |
3925 | 0 | osURL.c_str(), rangeStr, |
3926 | 0 | static_cast<int>(response_code), |
3927 | 0 | m_aoAdviseReadRanges[iReq] |
3928 | 0 | ->retryContext.GetCurrentDelay()); |
3929 | 0 | m_aoAdviseReadRanges[iReq]->dfSleepDelay = |
3930 | 0 | m_aoAdviseReadRanges[iReq] |
3931 | 0 | ->retryContext.GetCurrentDelay(); |
3932 | 0 | bToRetry = true; |
3933 | 0 | } |
3934 | 0 | else |
3935 | 0 | { |
3936 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
3937 | 0 | "Request for %s range %s failed with " |
3938 | 0 | "response_code=%ld", |
3939 | 0 | osURL.c_str(), rangeStr, response_code); |
3940 | 0 | } |
3941 | 0 | } |
3942 | 0 | else |
3943 | 0 | { |
3944 | 0 | const size_t nSize = asWriteFuncData[iReq].nSize; |
3945 | 0 | memcpy(&m_aoAdviseReadRanges[iReq]->abyData[0], |
3946 | 0 | asWriteFuncData[iReq].pBuffer, nSize); |
3947 | 0 | m_aoAdviseReadRanges[iReq]->abyData.resize(nSize); |
3948 | |
|
3949 | 0 | nTotalDownloaded += nSize; |
3950 | 0 | } |
3951 | |
|
3952 | 0 | m_aoAdviseReadRanges[iReq]->bToRetry = bToRetry; |
3953 | |
|
3954 | 0 | if (!bToRetry) |
3955 | 0 | { |
3956 | 0 | std::lock_guard<std::mutex> oLock( |
3957 | 0 | m_aoAdviseReadRanges[iReq]->oMutex); |
3958 | 0 | m_aoAdviseReadRanges[iReq]->bDone = true; |
3959 | 0 | m_aoAdviseReadRanges[iReq]->oCV.notify_all(); |
3960 | 0 | } |
3961 | 0 | }; |
3962 | |
|
3963 | 0 | int repeats = 0; |
3964 | |
|
3965 | 0 | void *old_handler = CPLHTTPIgnoreSigPipe(); |
3966 | 0 | while (true) |
3967 | 0 | { |
3968 | 0 | int still_running; |
3969 | 0 | while (curl_multi_perform(m_hCurlMultiHandleForAdviseRead, |
3970 | 0 | &still_running) == |
3971 | 0 | CURLM_CALL_MULTI_PERFORM) |
3972 | 0 | { |
3973 | | // loop |
3974 | 0 | } |
3975 | 0 | if (!still_running) |
3976 | 0 | { |
3977 | 0 | break; |
3978 | 0 | } |
3979 | | |
3980 | 0 | CURLMsg *msg; |
3981 | 0 | do |
3982 | 0 | { |
3983 | 0 | int msgq = 0; |
3984 | 0 | msg = curl_multi_info_read(m_hCurlMultiHandleForAdviseRead, |
3985 | 0 | &msgq); |
3986 | 0 | if (msg && (msg->msg == CURLMSG_DONE)) |
3987 | 0 | { |
3988 | 0 | DealWithRequest(msg->easy_handle); |
3989 | 0 | } |
3990 | 0 | } while (msg); |
3991 | |
|
3992 | 0 | CPLMultiPerformWait(m_hCurlMultiHandleForAdviseRead, repeats); |
3993 | 0 | } |
3994 | 0 | CPLHTTPRestoreSigPipeHandler(old_handler); |
3995 | |
|
3996 | 0 | bool bRetry = false; |
3997 | 0 | double dfDelay = 0.0; |
3998 | 0 | for (size_t i = 0; i < m_aoAdviseReadRanges.size(); ++i) |
3999 | 0 | { |
4000 | 0 | bool bReqDone; |
4001 | 0 | { |
4002 | | // To please Coverity Scan |
4003 | 0 | std::lock_guard<std::mutex> oLock( |
4004 | 0 | m_aoAdviseReadRanges[i]->oMutex); |
4005 | 0 | bReqDone = m_aoAdviseReadRanges[i]->bDone; |
4006 | 0 | } |
4007 | 0 | if (!bReqDone && !m_aoAdviseReadRanges[i]->bToRetry) |
4008 | 0 | { |
4009 | 0 | DealWithRequest(aHandles[i]); |
4010 | 0 | } |
4011 | 0 | if (m_aoAdviseReadRanges[i]->bToRetry) |
4012 | 0 | dfDelay = std::max(dfDelay, |
4013 | 0 | m_aoAdviseReadRanges[i]->dfSleepDelay); |
4014 | 0 | bRetry = bRetry || m_aoAdviseReadRanges[i]->bToRetry; |
4015 | 0 | if (aHandles[i]) |
4016 | 0 | { |
4017 | 0 | curl_multi_remove_handle(m_hCurlMultiHandleForAdviseRead, |
4018 | 0 | aHandles[i]); |
4019 | 0 | VSICURLResetHeaderAndWriterFunctions(aHandles[i]); |
4020 | 0 | curl_easy_cleanup(aHandles[i]); |
4021 | 0 | } |
4022 | 0 | CPLFree(apszRanges[i]); |
4023 | 0 | CPLFree(asWriteFuncData[i].pBuffer); |
4024 | 0 | CPLFree(asWriteFuncHeaderData[i].pBuffer); |
4025 | 0 | if (aHeaders[i]) |
4026 | 0 | curl_slist_free_all(aHeaders[i]); |
4027 | 0 | } |
4028 | 0 | if (!bRetry) |
4029 | 0 | break; |
4030 | 0 | CPLSleep(dfDelay); |
4031 | 0 | } |
4032 | |
|
4033 | 0 | NetworkStatisticsLogger::LogGET(nTotalDownloaded); |
4034 | 0 | }; |
4035 | |
|
4036 | 0 | m_oThreadAdviseRead = std::thread(task, l_osURL); |
4037 | 0 | } |
4038 | | |
4039 | | /************************************************************************/ |
4040 | | /* Write() */ |
4041 | | /************************************************************************/ |
4042 | | |
4043 | | size_t VSICurlHandle::Write(const void * /* pBuffer */, size_t /* nBytes */) |
4044 | 0 | { |
4045 | 0 | return 0; |
4046 | 0 | } |
4047 | | |
4048 | | /************************************************************************/ |
4049 | | /* ClearErr() */ |
4050 | | /************************************************************************/ |
4051 | | |
4052 | | void VSICurlHandle::ClearErr() |
4053 | | |
4054 | 0 | { |
4055 | 0 | bEOF = false; |
4056 | 0 | bError = false; |
4057 | 0 | } |
4058 | | |
4059 | | /************************************************************************/ |
4060 | | /* Error() */ |
4061 | | /************************************************************************/ |
4062 | | |
4063 | | int VSICurlHandle::Error() |
4064 | | |
4065 | 0 | { |
4066 | 0 | return bError ? TRUE : FALSE; |
4067 | 0 | } |
4068 | | |
4069 | | /************************************************************************/ |
4070 | | /* Eof() */ |
4071 | | /************************************************************************/ |
4072 | | |
4073 | | int VSICurlHandle::Eof() |
4074 | | |
4075 | 40 | { |
4076 | 40 | return bEOF ? TRUE : FALSE; |
4077 | 40 | } |
4078 | | |
4079 | | /************************************************************************/ |
4080 | | /* Flush() */ |
4081 | | /************************************************************************/ |
4082 | | |
4083 | | int VSICurlHandle::Flush() |
4084 | 0 | { |
4085 | 0 | return 0; |
4086 | 0 | } |
4087 | | |
4088 | | /************************************************************************/ |
4089 | | /* Close() */ |
4090 | | /************************************************************************/ |
4091 | | |
4092 | | int VSICurlHandle::Close() |
4093 | 45.8k | { |
4094 | 45.8k | return 0; |
4095 | 45.8k | } |
4096 | | |
4097 | | /************************************************************************/ |
4098 | | /* VSICurlFilesystemHandlerBase() */ |
4099 | | /************************************************************************/ |
4100 | | |
4101 | | VSICurlFilesystemHandlerBase::VSICurlFilesystemHandlerBase() |
4102 | 664 | : oCacheDirList{1024, 0} |
4103 | 664 | { |
4104 | 664 | } |
4105 | | |
4106 | | /************************************************************************/ |
4107 | | /* CachedConnection */ |
4108 | | /************************************************************************/ |
4109 | | |
4110 | | namespace |
4111 | | { |
4112 | | struct CachedConnection |
4113 | | { |
4114 | | CURLM *hCurlMultiHandle = nullptr; |
4115 | | void clear(); |
4116 | | |
4117 | | ~CachedConnection() |
4118 | 45 | { |
4119 | 45 | clear(); |
4120 | 45 | } |
4121 | | }; |
4122 | | } // namespace |
4123 | | |
4124 | | #ifdef _WIN32 |
4125 | | // Currently thread_local and C++ objects don't work well with DLL on Windows |
4126 | | static void FreeCachedConnection(void *pData) |
4127 | | { |
4128 | | delete static_cast< |
4129 | | std::map<VSICurlFilesystemHandlerBase *, CachedConnection> *>(pData); |
4130 | | } |
4131 | | |
4132 | | // Per-thread and per-filesystem Curl connection cache. |
4133 | | static std::map<VSICurlFilesystemHandlerBase *, CachedConnection> & |
4134 | | GetConnectionCache() |
4135 | | { |
4136 | | static std::map<VSICurlFilesystemHandlerBase *, CachedConnection> |
4137 | | dummyCache; |
4138 | | int bMemoryErrorOccurred = false; |
4139 | | void *pData = |
4140 | | CPLGetTLSEx(CTLS_VSICURL_CACHEDCONNECTION, &bMemoryErrorOccurred); |
4141 | | if (bMemoryErrorOccurred) |
4142 | | { |
4143 | | return dummyCache; |
4144 | | } |
4145 | | if (pData == nullptr) |
4146 | | { |
4147 | | auto cachedConnection = |
4148 | | new std::map<VSICurlFilesystemHandlerBase *, CachedConnection>(); |
4149 | | CPLSetTLSWithFreeFuncEx(CTLS_VSICURL_CACHEDCONNECTION, cachedConnection, |
4150 | | FreeCachedConnection, &bMemoryErrorOccurred); |
4151 | | if (bMemoryErrorOccurred) |
4152 | | { |
4153 | | delete cachedConnection; |
4154 | | return dummyCache; |
4155 | | } |
4156 | | return *cachedConnection; |
4157 | | } |
4158 | | return *static_cast< |
4159 | | std::map<VSICurlFilesystemHandlerBase *, CachedConnection> *>(pData); |
4160 | | } |
4161 | | #else |
4162 | | static thread_local std::map<VSICurlFilesystemHandlerBase *, CachedConnection> |
4163 | | g_tls_connectionCache; |
4164 | | |
4165 | | static std::map<VSICurlFilesystemHandlerBase *, CachedConnection> & |
4166 | | GetConnectionCache() |
4167 | 153k | { |
4168 | 153k | return g_tls_connectionCache; |
4169 | 153k | } |
4170 | | #endif |
4171 | | |
4172 | | /************************************************************************/ |
4173 | | /* clear() */ |
4174 | | /************************************************************************/ |
4175 | | |
4176 | | void CachedConnection::clear() |
4177 | 45 | { |
4178 | 45 | if (hCurlMultiHandle) |
4179 | 45 | { |
4180 | 45 | VSICURLMultiCleanup(hCurlMultiHandle); |
4181 | 45 | hCurlMultiHandle = nullptr; |
4182 | 45 | } |
4183 | 45 | } |
4184 | | |
4185 | | /************************************************************************/ |
4186 | | /* ~VSICurlFilesystemHandlerBase() */ |
4187 | | /************************************************************************/ |
4188 | | |
4189 | | VSICurlFilesystemHandlerBase::~VSICurlFilesystemHandlerBase() |
4190 | 0 | { |
4191 | 0 | VSICurlFilesystemHandlerBase::ClearCache(); |
4192 | 0 | GetConnectionCache().erase(this); |
4193 | |
|
4194 | 0 | if (hMutex != nullptr) |
4195 | 0 | CPLDestroyMutex(hMutex); |
4196 | 0 | hMutex = nullptr; |
4197 | 0 | } |
4198 | | |
4199 | | /************************************************************************/ |
4200 | | /* AllowCachedDataFor() */ |
4201 | | /************************************************************************/ |
4202 | | |
4203 | | bool VSICurlFilesystemHandlerBase::AllowCachedDataFor(const char *pszFilename) |
4204 | 645k | { |
4205 | 645k | bool bCachedAllowed = true; |
4206 | 645k | char **papszTokens = CSLTokenizeString2( |
4207 | 645k | CPLGetConfigOption("CPL_VSIL_CURL_NON_CACHED", ""), ":", 0); |
4208 | 645k | for (int i = 0; papszTokens && papszTokens[i]; i++) |
4209 | 0 | { |
4210 | 0 | if (STARTS_WITH(pszFilename, papszTokens[i])) |
4211 | 0 | { |
4212 | 0 | bCachedAllowed = false; |
4213 | 0 | break; |
4214 | 0 | } |
4215 | 0 | } |
4216 | 645k | CSLDestroy(papszTokens); |
4217 | 645k | return bCachedAllowed; |
4218 | 645k | } |
4219 | | |
4220 | | /************************************************************************/ |
4221 | | /* GetCurlMultiHandleFor() */ |
4222 | | /************************************************************************/ |
4223 | | |
4224 | | CURLM *VSICurlFilesystemHandlerBase::GetCurlMultiHandleFor( |
4225 | | const std::string & /*osURL*/) |
4226 | 153k | { |
4227 | 153k | auto &conn = GetConnectionCache()[this]; |
4228 | 153k | if (conn.hCurlMultiHandle == nullptr) |
4229 | 45 | { |
4230 | 45 | conn.hCurlMultiHandle = VSICURLMultiInit(); |
4231 | 45 | } |
4232 | 153k | return conn.hCurlMultiHandle; |
4233 | 153k | } |
4234 | | |
4235 | | /************************************************************************/ |
4236 | | /* GetRegionCache() */ |
4237 | | /************************************************************************/ |
4238 | | |
4239 | | VSICurlFilesystemHandlerBase::RegionCacheType * |
4240 | | VSICurlFilesystemHandlerBase::GetRegionCache() |
4241 | 11.2k | { |
4242 | | // should be called under hMutex taken |
4243 | 11.2k | if (m_poRegionCacheDoNotUseDirectly == nullptr) |
4244 | 34 | { |
4245 | 34 | m_poRegionCacheDoNotUseDirectly.reset( |
4246 | 34 | new RegionCacheType(static_cast<size_t>(GetMaxRegions()))); |
4247 | 34 | } |
4248 | 11.2k | return m_poRegionCacheDoNotUseDirectly.get(); |
4249 | 11.2k | } |
4250 | | |
4251 | | /************************************************************************/ |
4252 | | /* GetRegion() */ |
4253 | | /************************************************************************/ |
4254 | | |
4255 | | std::shared_ptr<std::string> |
4256 | | VSICurlFilesystemHandlerBase::GetRegion(const char *pszURL, |
4257 | | vsi_l_offset nFileOffsetStart) |
4258 | 11.2k | { |
4259 | 11.2k | CPLMutexHolder oHolder(&hMutex); |
4260 | | |
4261 | 11.2k | const int knDOWNLOAD_CHUNK_SIZE = VSICURLGetDownloadChunkSize(); |
4262 | 11.2k | nFileOffsetStart = |
4263 | 11.2k | (nFileOffsetStart / knDOWNLOAD_CHUNK_SIZE) * knDOWNLOAD_CHUNK_SIZE; |
4264 | | |
4265 | 11.2k | std::shared_ptr<std::string> out; |
4266 | 11.2k | if (GetRegionCache()->tryGet( |
4267 | 11.2k | FilenameOffsetPair(std::string(pszURL), nFileOffsetStart), out)) |
4268 | 0 | { |
4269 | 0 | return out; |
4270 | 0 | } |
4271 | | |
4272 | 11.2k | return nullptr; |
4273 | 11.2k | } |
4274 | | |
4275 | | /************************************************************************/ |
4276 | | /* AddRegion() */ |
4277 | | /************************************************************************/ |
4278 | | |
4279 | | void VSICurlFilesystemHandlerBase::AddRegion(const char *pszURL, |
4280 | | vsi_l_offset nFileOffsetStart, |
4281 | | size_t nSize, const char *pData) |
4282 | 0 | { |
4283 | 0 | CPLMutexHolder oHolder(&hMutex); |
4284 | |
|
4285 | 0 | auto value = std::make_shared<std::string>(); |
4286 | 0 | value->assign(pData, nSize); |
4287 | 0 | GetRegionCache()->insert( |
4288 | 0 | FilenameOffsetPair(std::string(pszURL), nFileOffsetStart), |
4289 | 0 | std::move(value)); |
4290 | 0 | } |
4291 | | |
4292 | | /************************************************************************/ |
4293 | | /* GetCachedFileProp() */ |
4294 | | /************************************************************************/ |
4295 | | |
4296 | | bool VSICurlFilesystemHandlerBase::GetCachedFileProp(const char *pszURL, |
4297 | | FileProp &oFileProp) |
4298 | 945k | { |
4299 | 945k | return VSICURLGetCachedFileProp(pszURL, oFileProp); |
4300 | 945k | } |
4301 | | |
4302 | | /************************************************************************/ |
4303 | | /* SetCachedFileProp() */ |
4304 | | /************************************************************************/ |
4305 | | |
4306 | | void VSICurlFilesystemHandlerBase::SetCachedFileProp(const char *pszURL, |
4307 | | FileProp &oFileProp) |
4308 | 15.0k | { |
4309 | 15.0k | VSICURLSetCachedFileProp(pszURL, oFileProp); |
4310 | 15.0k | } |
4311 | | |
4312 | | /************************************************************************/ |
4313 | | /* GetCachedDirList() */ |
4314 | | /************************************************************************/ |
4315 | | |
4316 | | bool VSICurlFilesystemHandlerBase::GetCachedDirList( |
4317 | | const char *pszURL, CachedDirList &oCachedDirList) |
4318 | 366k | { |
4319 | 366k | CPLMutexHolder oHolder(&hMutex); |
4320 | | |
4321 | 366k | return oCacheDirList.tryGet(std::string(pszURL), oCachedDirList) && |
4322 | | // Let a chance to use new auth parameters |
4323 | 292k | gnGenerationAuthParameters == |
4324 | 292k | oCachedDirList.nGenerationAuthParameters; |
4325 | 366k | } |
4326 | | |
4327 | | /************************************************************************/ |
4328 | | /* SetCachedDirList() */ |
4329 | | /************************************************************************/ |
4330 | | |
4331 | | void VSICurlFilesystemHandlerBase::SetCachedDirList( |
4332 | | const char *pszURL, CachedDirList &oCachedDirList) |
4333 | 13.1k | { |
4334 | 13.1k | CPLMutexHolder oHolder(&hMutex); |
4335 | | |
4336 | 13.1k | std::string key(pszURL); |
4337 | 13.1k | CachedDirList oldValue; |
4338 | 13.1k | if (oCacheDirList.tryGet(key, oldValue)) |
4339 | 0 | { |
4340 | 0 | nCachedFilesInDirList -= oldValue.oFileList.size(); |
4341 | 0 | oCacheDirList.remove(key); |
4342 | 0 | } |
4343 | | |
4344 | 13.4k | while ((!oCacheDirList.empty() && |
4345 | 13.3k | nCachedFilesInDirList + oCachedDirList.oFileList.size() > |
4346 | 13.3k | 1024 * 1024) || |
4347 | 13.4k | oCacheDirList.size() == oCacheDirList.getMaxAllowedSize()) |
4348 | 331 | { |
4349 | 331 | std::string oldestKey; |
4350 | 331 | oCacheDirList.getOldestEntry(oldestKey, oldValue); |
4351 | 331 | nCachedFilesInDirList -= oldValue.oFileList.size(); |
4352 | 331 | oCacheDirList.remove(oldestKey); |
4353 | 331 | } |
4354 | 13.1k | oCachedDirList.nGenerationAuthParameters = gnGenerationAuthParameters; |
4355 | | |
4356 | 13.1k | nCachedFilesInDirList += oCachedDirList.oFileList.size(); |
4357 | 13.1k | oCacheDirList.insert(key, oCachedDirList); |
4358 | 13.1k | } |
4359 | | |
4360 | | /************************************************************************/ |
4361 | | /* ExistsInCacheDirList() */ |
4362 | | /************************************************************************/ |
4363 | | |
4364 | | bool VSICurlFilesystemHandlerBase::ExistsInCacheDirList( |
4365 | | const std::string &osDirname, bool *pbIsDir) |
4366 | 0 | { |
4367 | 0 | CachedDirList cachedDirList; |
4368 | 0 | if (GetCachedDirList(osDirname.c_str(), cachedDirList)) |
4369 | 0 | { |
4370 | 0 | if (pbIsDir) |
4371 | 0 | *pbIsDir = !cachedDirList.oFileList.empty(); |
4372 | 0 | return false; |
4373 | 0 | } |
4374 | 0 | else |
4375 | 0 | { |
4376 | 0 | if (pbIsDir) |
4377 | 0 | *pbIsDir = false; |
4378 | 0 | return false; |
4379 | 0 | } |
4380 | 0 | } |
4381 | | |
4382 | | /************************************************************************/ |
4383 | | /* InvalidateCachedData() */ |
4384 | | /************************************************************************/ |
4385 | | |
4386 | | void VSICurlFilesystemHandlerBase::InvalidateCachedData(const char *pszURL) |
4387 | 0 | { |
4388 | 0 | CPLMutexHolder oHolder(&hMutex); |
4389 | |
|
4390 | 0 | VSICURLInvalidateCachedFileProp(pszURL); |
4391 | | |
4392 | | // Invalidate all cached regions for this URL |
4393 | 0 | std::list<FilenameOffsetPair> keysToRemove; |
4394 | 0 | std::string osURL(pszURL); |
4395 | 0 | auto lambda = |
4396 | 0 | [&keysToRemove, |
4397 | 0 | &osURL](const lru11::KeyValuePair<FilenameOffsetPair, |
4398 | 0 | std::shared_ptr<std::string>> &kv) |
4399 | 0 | { |
4400 | 0 | if (kv.key.filename_ == osURL) |
4401 | 0 | keysToRemove.push_back(kv.key); |
4402 | 0 | }; |
4403 | 0 | auto *poRegionCache = GetRegionCache(); |
4404 | 0 | poRegionCache->cwalk(lambda); |
4405 | 0 | for (const auto &key : keysToRemove) |
4406 | 0 | poRegionCache->remove(key); |
4407 | 0 | } |
4408 | | |
4409 | | /************************************************************************/ |
4410 | | /* ClearCache() */ |
4411 | | /************************************************************************/ |
4412 | | |
4413 | | void VSICurlFilesystemHandlerBase::ClearCache() |
4414 | 0 | { |
4415 | 0 | CPLMutexHolder oHolder(&hMutex); |
4416 | |
|
4417 | 0 | GetRegionCache()->clear(); |
4418 | |
|
4419 | 0 | VSICURLDestroyCacheFileProp(); |
4420 | |
|
4421 | 0 | oCacheDirList.clear(); |
4422 | 0 | nCachedFilesInDirList = 0; |
4423 | |
|
4424 | 0 | GetConnectionCache()[this].clear(); |
4425 | 0 | } |
4426 | | |
4427 | | /************************************************************************/ |
4428 | | /* PartialClearCache() */ |
4429 | | /************************************************************************/ |
4430 | | |
4431 | | void VSICurlFilesystemHandlerBase::PartialClearCache( |
4432 | | const char *pszFilenamePrefix) |
4433 | 0 | { |
4434 | 0 | CPLMutexHolder oHolder(&hMutex); |
4435 | |
|
4436 | 0 | std::string osURL = GetURLFromFilename(pszFilenamePrefix); |
4437 | 0 | { |
4438 | 0 | std::list<FilenameOffsetPair> keysToRemove; |
4439 | 0 | auto lambda = |
4440 | 0 | [&keysToRemove, &osURL]( |
4441 | 0 | const lru11::KeyValuePair<FilenameOffsetPair, |
4442 | 0 | std::shared_ptr<std::string>> &kv) |
4443 | 0 | { |
4444 | 0 | if (strncmp(kv.key.filename_.c_str(), osURL.c_str(), |
4445 | 0 | osURL.size()) == 0) |
4446 | 0 | keysToRemove.push_back(kv.key); |
4447 | 0 | }; |
4448 | 0 | auto *poRegionCache = GetRegionCache(); |
4449 | 0 | poRegionCache->cwalk(lambda); |
4450 | 0 | for (const auto &key : keysToRemove) |
4451 | 0 | poRegionCache->remove(key); |
4452 | 0 | } |
4453 | |
|
4454 | 0 | VSICURLInvalidateCachedFilePropPrefix(osURL.c_str()); |
4455 | |
|
4456 | 0 | { |
4457 | 0 | const size_t nLen = strlen(pszFilenamePrefix); |
4458 | 0 | std::list<std::string> keysToRemove; |
4459 | 0 | auto lambda = |
4460 | 0 | [this, &keysToRemove, pszFilenamePrefix, |
4461 | 0 | nLen](const lru11::KeyValuePair<std::string, CachedDirList> &kv) |
4462 | 0 | { |
4463 | 0 | if (strncmp(kv.key.c_str(), pszFilenamePrefix, nLen) == 0) |
4464 | 0 | { |
4465 | 0 | keysToRemove.push_back(kv.key); |
4466 | 0 | nCachedFilesInDirList -= kv.value.oFileList.size(); |
4467 | 0 | } |
4468 | 0 | }; |
4469 | 0 | oCacheDirList.cwalk(lambda); |
4470 | 0 | for (const auto &key : keysToRemove) |
4471 | 0 | oCacheDirList.remove(key); |
4472 | 0 | } |
4473 | 0 | } |
4474 | | |
4475 | | /************************************************************************/ |
4476 | | /* CreateFileHandle() */ |
4477 | | /************************************************************************/ |
4478 | | |
4479 | | VSICurlHandle * |
4480 | | VSICurlFilesystemHandlerBase::CreateFileHandle(const char *pszFilename) |
4481 | 182k | { |
4482 | 182k | return new VSICurlHandle(this, pszFilename); |
4483 | 182k | } |
4484 | | |
4485 | | /************************************************************************/ |
4486 | | /* GetActualURL() */ |
4487 | | /************************************************************************/ |
4488 | | |
4489 | | const char *VSICurlFilesystemHandlerBase::GetActualURL(const char *pszFilename) |
4490 | 0 | { |
4491 | 0 | VSICurlHandle *poHandle = CreateFileHandle(pszFilename); |
4492 | 0 | if (poHandle == nullptr) |
4493 | 0 | return pszFilename; |
4494 | 0 | std::string osURL(poHandle->GetURL()); |
4495 | 0 | delete poHandle; |
4496 | 0 | return CPLSPrintf("%s", osURL.c_str()); |
4497 | 0 | } |
4498 | | |
4499 | | /************************************************************************/ |
4500 | | /* GetOptions() */ |
4501 | | /************************************************************************/ |
4502 | | |
4503 | | #define VSICURL_OPTIONS \ |
4504 | 0 | " <Option name='GDAL_HTTP_MAX_RETRY' type='int' " \ |
4505 | 0 | "description='Maximum number of retries' default='0'/>" \ |
4506 | 0 | " <Option name='GDAL_HTTP_RETRY_DELAY' type='double' " \ |
4507 | 0 | "description='Retry delay in seconds' default='30'/>" \ |
4508 | 0 | " <Option name='GDAL_HTTP_HEADER_FILE' type='string' " \ |
4509 | 0 | "description='Filename of a file that contains HTTP headers to " \ |
4510 | 0 | "forward to the server'/>" \ |
4511 | 0 | " <Option name='CPL_VSIL_CURL_USE_HEAD' type='boolean' " \ |
4512 | 0 | "description='Whether to use HTTP HEAD verb to retrieve " \ |
4513 | 0 | "file information' default='YES'/>" \ |
4514 | 0 | " <Option name='GDAL_HTTP_MULTIRANGE' type='string-select' " \ |
4515 | 0 | "description='Strategy to apply to run multi-range requests' " \ |
4516 | 0 | "default='PARALLEL'>" \ |
4517 | 0 | " <Value>PARALLEL</Value>" \ |
4518 | 0 | " <Value>SERIAL</Value>" \ |
4519 | 0 | " </Option>" \ |
4520 | 0 | " <Option name='GDAL_HTTP_MULTIPLEX' type='boolean' " \ |
4521 | 0 | "description='Whether to enable HTTP/2 multiplexing' default='YES'/>" \ |
4522 | 0 | " <Option name='GDAL_HTTP_MERGE_CONSECUTIVE_RANGES' type='boolean' " \ |
4523 | 0 | "description='Whether to merge consecutive ranges in multirange " \ |
4524 | 0 | "requests' default='YES'/>" \ |
4525 | 0 | " <Option name='CPL_VSIL_CURL_NON_CACHED' type='string' " \ |
4526 | 0 | "description='Colon-separated list of filenames whose content" \ |
4527 | 0 | "must not be cached across open attempts'/>" \ |
4528 | 0 | " <Option name='CPL_VSIL_CURL_ALLOWED_FILENAME' type='string' " \ |
4529 | 0 | "description='Single filename that is allowed to be opened'/>" \ |
4530 | 0 | " <Option name='CPL_VSIL_CURL_ALLOWED_EXTENSIONS' type='string' " \ |
4531 | 0 | "description='Comma or space separated list of allowed file " \ |
4532 | 0 | "extensions'/>" \ |
4533 | 0 | " <Option name='GDAL_DISABLE_READDIR_ON_OPEN' type='string-select' " \ |
4534 | 0 | "description='Whether to disable establishing the list of files in " \ |
4535 | 0 | "the directory of the current filename' default='NO'>" \ |
4536 | 0 | " <Value>NO</Value>" \ |
4537 | 0 | " <Value>YES</Value>" \ |
4538 | 0 | " <Value>EMPTY_DIR</Value>" \ |
4539 | 0 | " </Option>" \ |
4540 | 0 | " <Option name='VSI_CACHE' type='boolean' " \ |
4541 | 0 | "description='Whether to cache in memory the contents of the opened " \ |
4542 | 0 | "file as soon as they are read' default='NO'/>" \ |
4543 | 0 | " <Option name='CPL_VSIL_CURL_CHUNK_SIZE' type='integer' " \ |
4544 | 0 | "description='Size in bytes of the minimum amount of data read in a " \ |
4545 | 0 | "file' default='16384' min='1024' max='10485760'/>" \ |
4546 | 0 | " <Option name='CPL_VSIL_CURL_CACHE_SIZE' type='integer' " \ |
4547 | 0 | "description='Size in bytes of the global /vsicurl/ cache' " \ |
4548 | 0 | "default='16384000'/>" \ |
4549 | 0 | " <Option name='CPL_VSIL_CURL_IGNORE_GLACIER_STORAGE' type='boolean' " \ |
4550 | 0 | "description='Whether to skip files with Glacier storage class in " \ |
4551 | 0 | "directory listing.' default='YES'/>" \ |
4552 | 0 | " <Option name='CPL_VSIL_CURL_ADVISE_READ_TOTAL_BYTES_LIMIT' " \ |
4553 | 0 | "type='integer' description='Maximum number of bytes AdviseRead() is " \ |
4554 | 0 | "allowed to fetch at once' default='104857600'/>" \ |
4555 | 0 | " <Option name='CPL_VSIL_CURL_HEADER_FILE_KVP_ENABLED' " \ |
4556 | 0 | "type='string-select' description='Whether the header-file key-value " \ |
4557 | 0 | "pair can be used in /vsicurl? filenames' default='ONLY_IN_TEMP'>" \ |
4558 | 0 | " <Value>ONLY_IN_TEMP</Value>" \ |
4559 | 0 | " <Value>NO</Value>" \ |
4560 | 0 | " <Value>YES</Value>" \ |
4561 | 0 | " </Option>" \ |
4562 | 0 | " <Option name='GDAL_HTTP_MAX_CACHED_CONNECTIONS' type='integer' " \ |
4563 | 0 | "description='Maximum amount of connections that libcurl may keep alive " \ |
4564 | 0 | "in its connection cache after use'/>" \ |
4565 | 0 | " <Option name='GDAL_HTTP_MAX_TOTAL_CONNECTIONS' type='integer' " \ |
4566 | 0 | "description='Maximum number of simultaneously open connections in " \ |
4567 | 0 | "total'/>" |
4568 | | |
4569 | | const char *VSICurlFilesystemHandlerBase::GetOptionsStatic() |
4570 | 0 | { |
4571 | 0 | return VSICURL_OPTIONS; |
4572 | 0 | } |
4573 | | |
4574 | | const char *VSICurlFilesystemHandlerBase::GetOptions() |
4575 | 0 | { |
4576 | 0 | static std::string osOptions(std::string("<Options>") + GetOptionsStatic() + |
4577 | 0 | "</Options>"); |
4578 | 0 | return osOptions.c_str(); |
4579 | 0 | } |
4580 | | |
4581 | | /************************************************************************/ |
4582 | | /* IsAllowedFilename() */ |
4583 | | /************************************************************************/ |
4584 | | |
4585 | | bool VSICurlFilesystemHandlerBase::IsAllowedFilename(const char *pszFilename) |
4586 | 452k | { |
4587 | 452k | const char *pszAllowedFilename = |
4588 | 452k | CPLGetConfigOption("CPL_VSIL_CURL_ALLOWED_FILENAME", nullptr); |
4589 | 452k | if (pszAllowedFilename != nullptr) |
4590 | 0 | { |
4591 | 0 | return strcmp(pszFilename, pszAllowedFilename) == 0; |
4592 | 0 | } |
4593 | | |
4594 | | // Consider that only the files whose extension ends up with one that is |
4595 | | // listed in CPL_VSIL_CURL_ALLOWED_EXTENSIONS exist on the server. This can |
4596 | | // speeds up dramatically open experience, in case the server cannot return |
4597 | | // a file list. {noext} can be used as a special token to mean file with no |
4598 | | // extension. |
4599 | | // For example: |
4600 | | // gdalinfo --config CPL_VSIL_CURL_ALLOWED_EXTENSIONS ".tif" |
4601 | | // /vsicurl/http://igskmncngs506.cr.usgs.gov/gmted/Global_tiles_GMTED/075darcsec/bln/W030/30N030W_20101117_gmted_bln075.tif |
4602 | 452k | const char *pszAllowedExtensions = |
4603 | 452k | CPLGetConfigOption("CPL_VSIL_CURL_ALLOWED_EXTENSIONS", nullptr); |
4604 | 452k | if (pszAllowedExtensions) |
4605 | 0 | { |
4606 | 0 | char **papszExtensions = |
4607 | 0 | CSLTokenizeString2(pszAllowedExtensions, ", ", 0); |
4608 | 0 | const char *queryStart = strchr(pszFilename, '?'); |
4609 | 0 | char *pszFilenameWithoutQuery = nullptr; |
4610 | 0 | if (queryStart != nullptr) |
4611 | 0 | { |
4612 | 0 | pszFilenameWithoutQuery = CPLStrdup(pszFilename); |
4613 | 0 | pszFilenameWithoutQuery[queryStart - pszFilename] = '\0'; |
4614 | 0 | pszFilename = pszFilenameWithoutQuery; |
4615 | 0 | } |
4616 | 0 | const size_t nURLLen = strlen(pszFilename); |
4617 | 0 | bool bFound = false; |
4618 | 0 | for (int i = 0; papszExtensions[i] != nullptr; i++) |
4619 | 0 | { |
4620 | 0 | const size_t nExtensionLen = strlen(papszExtensions[i]); |
4621 | 0 | if (EQUAL(papszExtensions[i], "{noext}")) |
4622 | 0 | { |
4623 | 0 | const char *pszLastSlash = strrchr(pszFilename, '/'); |
4624 | 0 | if (pszLastSlash != nullptr && |
4625 | 0 | strchr(pszLastSlash, '.') == nullptr) |
4626 | 0 | { |
4627 | 0 | bFound = true; |
4628 | 0 | break; |
4629 | 0 | } |
4630 | 0 | } |
4631 | 0 | else if (nURLLen > nExtensionLen && |
4632 | 0 | EQUAL(pszFilename + nURLLen - nExtensionLen, |
4633 | 0 | papszExtensions[i])) |
4634 | 0 | { |
4635 | 0 | bFound = true; |
4636 | 0 | break; |
4637 | 0 | } |
4638 | 0 | } |
4639 | |
|
4640 | 0 | CSLDestroy(papszExtensions); |
4641 | 0 | if (pszFilenameWithoutQuery) |
4642 | 0 | { |
4643 | 0 | CPLFree(pszFilenameWithoutQuery); |
4644 | 0 | } |
4645 | |
|
4646 | 0 | return bFound; |
4647 | 0 | } |
4648 | 452k | return TRUE; |
4649 | 452k | } |
4650 | | |
4651 | | /************************************************************************/ |
4652 | | /* Open() */ |
4653 | | /************************************************************************/ |
4654 | | |
4655 | | VSIVirtualHandleUniquePtr |
4656 | | VSICurlFilesystemHandlerBase::Open(const char *pszFilename, |
4657 | | const char *pszAccess, bool bSetError, |
4658 | | CSLConstList papszOptions) |
4659 | 175k | { |
4660 | 175k | const bool bStartsWithVSICurlPrefix = StartsWithVSICurlPrefix(pszFilename); |
4661 | 175k | if (!bStartsWithVSICurlPrefix && |
4662 | 31.9k | !cpl::starts_with(std::string_view(pszFilename), GetFSPrefix())) |
4663 | 521 | { |
4664 | 521 | return nullptr; |
4665 | 521 | } |
4666 | | |
4667 | 174k | if (strchr(pszAccess, 'w') != nullptr || strchr(pszAccess, '+') != nullptr) |
4668 | 0 | { |
4669 | 0 | if (bSetError) |
4670 | 0 | { |
4671 | 0 | VSIError(VSIE_FileError, |
4672 | 0 | "Only read-only mode is supported for /vsicurl"); |
4673 | 0 | } |
4674 | 0 | return nullptr; |
4675 | 0 | } |
4676 | 174k | if (!papszOptions || |
4677 | 0 | !CPLTestBool(CSLFetchNameValueDef( |
4678 | 0 | papszOptions, "IGNORE_FILENAME_RESTRICTIONS", "NO"))) |
4679 | 174k | { |
4680 | 174k | if (!IsAllowedFilename(pszFilename)) |
4681 | 0 | return nullptr; |
4682 | 174k | } |
4683 | | |
4684 | 174k | bool bListDir = true; |
4685 | 174k | bool bEmptyDir = false; |
4686 | 174k | std::string osURL = |
4687 | 174k | bStartsWithVSICurlPrefix |
4688 | 174k | ? VSICurlGetURLFromFilename(pszFilename, nullptr, nullptr, nullptr, |
4689 | 143k | &bListDir, &bEmptyDir, nullptr, nullptr, |
4690 | 143k | nullptr) |
4691 | 174k | : GetURLFromFilename(pszFilename); |
4692 | | |
4693 | 174k | const char *pszOptionVal = CSLFetchNameValueDef( |
4694 | 174k | papszOptions, "DISABLE_READDIR_ON_OPEN", |
4695 | 174k | VSIGetPathSpecificOption(pszFilename, "GDAL_DISABLE_READDIR_ON_OPEN", |
4696 | 174k | "NO")); |
4697 | 174k | const bool bCache = CPLTestBool(CSLFetchNameValueDef( |
4698 | 174k | papszOptions, "CACHE", AllowCachedDataFor(pszFilename) ? "YES" : "NO")); |
4699 | 174k | const bool bSkipReadDir = !bListDir || bEmptyDir || |
4700 | 174k | EQUAL(pszOptionVal, "EMPTY_DIR") || |
4701 | 174k | CPLTestBool(pszOptionVal) || !bCache; |
4702 | | |
4703 | 174k | std::string osFilename(pszFilename); |
4704 | 174k | bool bGotFileList = !bSkipReadDir; |
4705 | 174k | bool bForceExistsCheck = false; |
4706 | 174k | FileProp cachedFileProp; |
4707 | 174k | if (!bSkipReadDir && |
4708 | 174k | !(GetCachedFileProp(osURL.c_str(), cachedFileProp) && |
4709 | 34.4k | cachedFileProp.eExists == EXIST_YES) && |
4710 | 174k | strchr(CPLGetFilename(osFilename.c_str()), '.') != nullptr && |
4711 | 147k | !STARTS_WITH(CPLGetExtensionSafe(osFilename.c_str()).c_str(), "zip") && |
4712 | | // Likely a Kerchunk JSON reference file: no need to list siblings |
4713 | 125k | !cpl::ends_with(osFilename, ".nc.zarr")) |
4714 | 125k | { |
4715 | | // 1000 corresponds to the default page size of S3. |
4716 | 125k | constexpr int FILE_COUNT_LIMIT = 1000; |
4717 | 125k | const CPLStringList aosFileList(ReadDirInternal( |
4718 | 125k | (CPLGetDirnameSafe(osFilename.c_str()) + '/').c_str(), |
4719 | 125k | FILE_COUNT_LIMIT, &bGotFileList)); |
4720 | 125k | const bool bFound = |
4721 | 125k | VSICurlIsFileInList(aosFileList.List(), |
4722 | 125k | CPLGetFilename(osFilename.c_str())) != -1; |
4723 | 125k | if (bGotFileList && !bFound && aosFileList.size() < FILE_COUNT_LIMIT) |
4724 | 2.00k | { |
4725 | | // Some file servers are case insensitive, so in case there is a |
4726 | | // match with case difference, do a full check just in case. |
4727 | | // e.g. |
4728 | | // http://pds-geosciences.wustl.edu/mgs/mgs-m-mola-5-megdr-l3-v1/mgsl_300x/meg004/MEGA90N000CB.IMG |
4729 | | // that is queried by |
4730 | | // gdalinfo |
4731 | | // /vsicurl/http://pds-geosciences.wustl.edu/mgs/mgs-m-mola-5-megdr-l3-v1/mgsl_300x/meg004/mega90n000cb.lbl |
4732 | 2.00k | if (aosFileList.FindString(CPLGetFilename(osFilename.c_str())) != |
4733 | 2.00k | -1) |
4734 | 0 | { |
4735 | 0 | bForceExistsCheck = true; |
4736 | 0 | } |
4737 | 2.00k | else |
4738 | 2.00k | { |
4739 | 2.00k | return nullptr; |
4740 | 2.00k | } |
4741 | 2.00k | } |
4742 | 125k | } |
4743 | 172k | if (!bStartsWithVSICurlPrefix) |
4744 | 31.3k | osURL = GetURLFromFilename(pszFilename); |
4745 | 172k | if (GetCachedFileProp(osURL.c_str(), cachedFileProp) && |
4746 | 34.4k | cachedFileProp.eExists == EXIST_YES && cachedFileProp.bIsDirectory) |
4747 | 0 | { |
4748 | 0 | return nullptr; |
4749 | 0 | } |
4750 | | |
4751 | 172k | auto poHandle = |
4752 | 172k | std::unique_ptr<VSICurlHandle>(CreateFileHandle(osFilename.c_str())); |
4753 | 172k | if (poHandle == nullptr) |
4754 | 12.6k | return nullptr; |
4755 | 160k | poHandle->SetCache(bCache); |
4756 | 160k | if (!bGotFileList || bForceExistsCheck) |
4757 | 114k | { |
4758 | | // If we didn't get a filelist, check that the file really exists. |
4759 | 114k | if (!poHandle->Exists(bSetError)) |
4760 | 114k | { |
4761 | 114k | return nullptr; |
4762 | 114k | } |
4763 | 114k | } |
4764 | | |
4765 | 45.8k | if (CPLTestBool(CPLGetConfigOption("VSI_CACHE", "FALSE"))) |
4766 | 0 | return VSIVirtualHandleUniquePtr( |
4767 | 0 | VSICreateCachedFile(poHandle.release())); |
4768 | 45.8k | else |
4769 | 45.8k | return VSIVirtualHandleUniquePtr(poHandle.release()); |
4770 | 45.8k | } |
4771 | | |
4772 | | /************************************************************************/ |
4773 | | /* VSICurlParserFindEOL() */ |
4774 | | /* */ |
4775 | | /* Small helper function for VSICurlPaseHTMLFileList() to find */ |
4776 | | /* the end of a line in the directory listing. Either a <br> */ |
4777 | | /* or newline. */ |
4778 | | /************************************************************************/ |
4779 | | |
4780 | | static char *VSICurlParserFindEOL(char *pszData) |
4781 | | |
4782 | 0 | { |
4783 | 0 | while (*pszData != '\0' && *pszData != '\n' && |
4784 | 0 | !STARTS_WITH_CI(pszData, "<br>")) |
4785 | 0 | pszData++; |
4786 | |
|
4787 | 0 | if (*pszData == '\0') |
4788 | 0 | return nullptr; |
4789 | | |
4790 | 0 | return pszData; |
4791 | 0 | } |
4792 | | |
4793 | | /************************************************************************/ |
4794 | | /* ParseFileSize() */ |
4795 | | /************************************************************************/ |
4796 | | |
4797 | | static GUIntBig ParseFileSize(const char *pszStr) |
4798 | 0 | { |
4799 | 0 | GUIntBig nFileSize = 0; |
4800 | 0 | while (*pszStr == ' ') |
4801 | 0 | pszStr++; |
4802 | 0 | if (*pszStr >= '1' && *pszStr <= '9') |
4803 | 0 | { |
4804 | 0 | const char *pszIter = pszStr + 1; |
4805 | 0 | while (*pszIter >= '0' && *pszIter <= '9') |
4806 | 0 | ++pszIter; |
4807 | 0 | if (*pszIter == 0 || *pszIter == ' ' || *pszIter == '\t' || |
4808 | 0 | *pszIter == '\r' || *pszIter == '\n') |
4809 | 0 | { |
4810 | 0 | nFileSize = |
4811 | 0 | CPLScanUIntBig(pszStr, static_cast<int>(pszIter - pszStr)); |
4812 | 0 | } |
4813 | 0 | } |
4814 | 0 | return nFileSize; |
4815 | 0 | } |
4816 | | |
4817 | | /************************************************************************/ |
4818 | | /* VSICurlParseHTMLDateTimeFileSize() */ |
4819 | | /************************************************************************/ |
4820 | | |
4821 | | static const char *const apszMonths[] = { |
4822 | | "January", "February", "March", "April", "May", "June", |
4823 | | "July", "August", "September", "October", "November", "December"}; |
4824 | | |
4825 | | static bool VSICurlParseHTMLDateTimeFileSize(const char *pszStr, |
4826 | | struct tm &brokendowntime, |
4827 | | GUIntBig &nFileSize, |
4828 | | GIntBig &mTime) |
4829 | 0 | { |
4830 | 0 | for (int iMonth = 0; iMonth < 12; iMonth++) |
4831 | 0 | { |
4832 | 0 | nFileSize = 0; |
4833 | |
|
4834 | 0 | char szMonth[32] = {}; |
4835 | 0 | szMonth[0] = '-'; |
4836 | 0 | memcpy(szMonth + 1, apszMonths[iMonth], 3); |
4837 | 0 | szMonth[4] = '-'; |
4838 | 0 | szMonth[5] = '\0'; |
4839 | 0 | const char *pszMonthFound = strstr(pszStr, szMonth); |
4840 | 0 | if (pszMonthFound) |
4841 | 0 | { |
4842 | | // Format of Apache, like in |
4843 | | // http://download.osgeo.org/gdal/data/gtiff/ |
4844 | | // "17-May-2010 12:26" |
4845 | 0 | const auto nMonthFoundLen = strlen(pszMonthFound); |
4846 | 0 | if (pszMonthFound - pszStr > 2 && nMonthFoundLen > 15 && |
4847 | 0 | pszMonthFound[-2 + 11] == ' ' && pszMonthFound[-2 + 14] == ':') |
4848 | 0 | { |
4849 | 0 | pszMonthFound -= 2; |
4850 | 0 | int nDay = atoi(pszMonthFound); |
4851 | 0 | int nYear = atoi(pszMonthFound + 7); |
4852 | 0 | int nHour = atoi(pszMonthFound + 12); |
4853 | 0 | int nMin = atoi(pszMonthFound + 15); |
4854 | 0 | if (nDay >= 1 && nDay <= 31 && nYear >= 1900 && nHour >= 0 && |
4855 | 0 | nHour <= 24 && nMin >= 0 && nMin < 60) |
4856 | 0 | { |
4857 | 0 | brokendowntime.tm_year = nYear - 1900; |
4858 | 0 | brokendowntime.tm_mon = iMonth; |
4859 | 0 | brokendowntime.tm_mday = nDay; |
4860 | 0 | brokendowntime.tm_hour = nHour; |
4861 | 0 | brokendowntime.tm_min = nMin; |
4862 | 0 | mTime = CPLYMDHMSToUnixTime(&brokendowntime); |
4863 | |
|
4864 | 0 | if (nMonthFoundLen > 15 + 2) |
4865 | 0 | { |
4866 | 0 | const char *pszFilesize = pszMonthFound + 15 + 2; |
4867 | 0 | nFileSize = ParseFileSize(pszFilesize); |
4868 | 0 | } |
4869 | 0 | } |
4870 | 0 | } |
4871 | 0 | return nFileSize > 0; |
4872 | 0 | } |
4873 | | |
4874 | | /* Microsoft IIS */ |
4875 | 0 | snprintf(szMonth, sizeof(szMonth), " %s ", apszMonths[iMonth]); |
4876 | 0 | pszMonthFound = strstr(pszStr, szMonth); |
4877 | 0 | if (pszMonthFound) |
4878 | 0 | { |
4879 | 0 | int nLenMonth = static_cast<int>(strlen(apszMonths[iMonth])); |
4880 | 0 | if (pszMonthFound - pszStr > 2 && pszMonthFound[-1] != ',' && |
4881 | 0 | pszMonthFound[-2] != ' ' && |
4882 | 0 | static_cast<int>(strlen(pszMonthFound - 2)) > |
4883 | 0 | 2 + 1 + nLenMonth + 1 + 4 + 1 + 5 + 1 + 4) |
4884 | 0 | { |
4885 | | /* Format of http://ortho.linz.govt.nz/tifs/1994_95/ */ |
4886 | | /* " Friday, 21 April 2006 12:05 p.m. 48062343 |
4887 | | * m35a_fy_94_95.tif" */ |
4888 | 0 | pszMonthFound -= 2; |
4889 | 0 | int nDay = atoi(pszMonthFound); |
4890 | 0 | int nCurOffset = 2 + 1 + nLenMonth + 1; |
4891 | 0 | int nYear = atoi(pszMonthFound + nCurOffset); |
4892 | 0 | nCurOffset += 4 + 1; |
4893 | 0 | int nHour = atoi(pszMonthFound + nCurOffset); |
4894 | 0 | if (nHour < 10) |
4895 | 0 | nCurOffset += 1 + 1; |
4896 | 0 | else |
4897 | 0 | nCurOffset += 2 + 1; |
4898 | 0 | const int nMin = atoi(pszMonthFound + nCurOffset); |
4899 | 0 | nCurOffset += 2 + 1; |
4900 | 0 | if (STARTS_WITH(pszMonthFound + nCurOffset, "p.m.")) |
4901 | 0 | nHour += 12; |
4902 | 0 | else if (!STARTS_WITH(pszMonthFound + nCurOffset, "a.m.")) |
4903 | 0 | nHour = -1; |
4904 | 0 | nCurOffset += 4; |
4905 | |
|
4906 | 0 | if (nDay >= 1 && nDay <= 31 && nYear >= 1900 && nHour >= 0 && |
4907 | 0 | nHour <= 24 && nMin >= 0 && nMin < 60) |
4908 | 0 | { |
4909 | 0 | brokendowntime.tm_year = nYear - 1900; |
4910 | 0 | brokendowntime.tm_mon = iMonth; |
4911 | 0 | brokendowntime.tm_mday = nDay; |
4912 | 0 | brokendowntime.tm_hour = nHour; |
4913 | 0 | brokendowntime.tm_min = nMin; |
4914 | 0 | mTime = CPLYMDHMSToUnixTime(&brokendowntime); |
4915 | |
|
4916 | 0 | const char *pszFilesize = pszMonthFound + nCurOffset; |
4917 | 0 | nFileSize = ParseFileSize(pszFilesize); |
4918 | 0 | } |
4919 | 0 | } |
4920 | 0 | else if (pszMonthFound - pszStr > 1 && pszMonthFound[-1] == ',' && |
4921 | 0 | static_cast<int>(strlen(pszMonthFound)) > |
4922 | 0 | 1 + nLenMonth + 1 + 2 + 1 + 1 + 4 + 1 + 5 + 1 + 2) |
4923 | 0 | { |
4924 | | // Format of |
4925 | | // http://publicfiles.dep.state.fl.us/dear/BWR_GIS/2007NWFLULC/ |
4926 | | // " Sunday, June 20, 2010 6:46 PM 233170905 |
4927 | | // NWF2007LULCForSDE.zip" |
4928 | 0 | pszMonthFound += 1; |
4929 | 0 | int nCurOffset = nLenMonth + 1; |
4930 | 0 | int nDay = atoi(pszMonthFound + nCurOffset); |
4931 | 0 | nCurOffset += 2 + 1 + 1; |
4932 | 0 | int nYear = atoi(pszMonthFound + nCurOffset); |
4933 | 0 | nCurOffset += 4 + 1; |
4934 | 0 | int nHour = atoi(pszMonthFound + nCurOffset); |
4935 | 0 | nCurOffset += 2 + 1; |
4936 | 0 | const int nMin = atoi(pszMonthFound + nCurOffset); |
4937 | 0 | nCurOffset += 2 + 1; |
4938 | 0 | if (STARTS_WITH(pszMonthFound + nCurOffset, "PM")) |
4939 | 0 | nHour += 12; |
4940 | 0 | else if (!STARTS_WITH(pszMonthFound + nCurOffset, "AM")) |
4941 | 0 | nHour = -1; |
4942 | 0 | nCurOffset += 2; |
4943 | |
|
4944 | 0 | if (nDay >= 1 && nDay <= 31 && nYear >= 1900 && nHour >= 0 && |
4945 | 0 | nHour <= 24 && nMin >= 0 && nMin < 60) |
4946 | 0 | { |
4947 | 0 | brokendowntime.tm_year = nYear - 1900; |
4948 | 0 | brokendowntime.tm_mon = iMonth; |
4949 | 0 | brokendowntime.tm_mday = nDay; |
4950 | 0 | brokendowntime.tm_hour = nHour; |
4951 | 0 | brokendowntime.tm_min = nMin; |
4952 | 0 | mTime = CPLYMDHMSToUnixTime(&brokendowntime); |
4953 | |
|
4954 | 0 | const char *pszFilesize = pszMonthFound + nCurOffset; |
4955 | 0 | nFileSize = ParseFileSize(pszFilesize); |
4956 | 0 | } |
4957 | 0 | } |
4958 | |
|
4959 | 0 | return nFileSize > 0; |
4960 | 0 | } |
4961 | 0 | } |
4962 | | |
4963 | 0 | return false; |
4964 | 0 | } |
4965 | | |
4966 | | /************************************************************************/ |
4967 | | /* ParseHTMLFileList() */ |
4968 | | /* */ |
4969 | | /* Parse a file list document and return all the components. */ |
4970 | | /************************************************************************/ |
4971 | | |
4972 | | char **VSICurlFilesystemHandlerBase::ParseHTMLFileList(const char *pszFilename, |
4973 | | int nMaxFiles, |
4974 | | char *pszData, |
4975 | | bool *pbGotFileList) |
4976 | 0 | { |
4977 | 0 | *pbGotFileList = false; |
4978 | |
|
4979 | 0 | std::string osURL(VSICurlGetURLFromFilename(pszFilename, nullptr, nullptr, |
4980 | 0 | nullptr, nullptr, nullptr, |
4981 | 0 | nullptr, nullptr, nullptr)); |
4982 | 0 | const char *pszDir = nullptr; |
4983 | 0 | if (STARTS_WITH_CI(osURL.c_str(), "http://")) |
4984 | 0 | pszDir = strchr(osURL.c_str() + strlen("http://"), '/'); |
4985 | 0 | else if (STARTS_WITH_CI(osURL.c_str(), "https://")) |
4986 | 0 | pszDir = strchr(osURL.c_str() + strlen("https://"), '/'); |
4987 | 0 | else if (STARTS_WITH_CI(osURL.c_str(), "ftp://")) |
4988 | 0 | pszDir = strchr(osURL.c_str() + strlen("ftp://"), '/'); |
4989 | 0 | if (pszDir == nullptr) |
4990 | 0 | pszDir = ""; |
4991 | | |
4992 | | /* Apache / Nginx */ |
4993 | | /* Most of the time the format is <title>Index of {pszDir[/]}</title>, but |
4994 | | * there are special cases like https://cdn.star.nesdis.noaa.gov/GOES18/ABI/MESO/M1/GEOCOLOR/ |
4995 | | * where a CDN stuff makes that the title is <title>Index of /ma-cdn02/GOES/data/GOES18/ABI/MESO/M1/GEOCOLOR/</title> |
4996 | | */ |
4997 | 0 | const std::string osTitleIndexOfPrefix = "<title>Index of "; |
4998 | 0 | const std::string osExpectedSuffix = std::string(pszDir).append("</title>"); |
4999 | 0 | const std::string osExpectedSuffixWithSlash = |
5000 | 0 | std::string(pszDir).append("/</title>"); |
5001 | | /* FTP */ |
5002 | 0 | const std::string osExpectedStringFTP = |
5003 | 0 | std::string("FTP Listing of ").append(pszDir).append("/"); |
5004 | | /* Apache 1.3.33 */ |
5005 | 0 | const std::string osExpectedStringOldApache = |
5006 | 0 | std::string("<TITLE>Index of ").append(pszDir).append("</TITLE>"); |
5007 | | |
5008 | | // The listing of |
5009 | | // http://dds.cr.usgs.gov/srtm/SRTM_image_sample/picture%20examples/ |
5010 | | // has |
5011 | | // "<title>Index of /srtm/SRTM_image_sample/picture examples</title>" |
5012 | | // so we must try unescaped %20 also. |
5013 | | // Similar with |
5014 | | // http://datalib.usask.ca/gis/Data/Central_America_goodbutdoweown%3f/ |
5015 | 0 | std::string osExpectedString_unescaped; |
5016 | 0 | if (strchr(pszDir, '%')) |
5017 | 0 | { |
5018 | 0 | char *pszUnescapedDir = CPLUnescapeString(pszDir, nullptr, CPLES_URL); |
5019 | 0 | osExpectedString_unescaped = osTitleIndexOfPrefix; |
5020 | 0 | osExpectedString_unescaped += pszUnescapedDir; |
5021 | 0 | osExpectedString_unescaped += "</title>"; |
5022 | 0 | CPLFree(pszUnescapedDir); |
5023 | 0 | } |
5024 | |
|
5025 | 0 | char *c = nullptr; |
5026 | 0 | int nCount = 0; |
5027 | 0 | int nCountTable = 0; |
5028 | 0 | CPLStringList oFileList; |
5029 | 0 | char *pszLine = pszData; |
5030 | 0 | bool bIsHTMLDirList = false; |
5031 | |
|
5032 | 0 | while ((c = VSICurlParserFindEOL(pszLine)) != nullptr) |
5033 | 0 | { |
5034 | 0 | *c = '\0'; |
5035 | | |
5036 | | // To avoid false positive on pages such as |
5037 | | // http://www.ngs.noaa.gov/PC_PROD/USGG2009BETA |
5038 | | // This is a heuristics, but normal HTML listing of files have not more |
5039 | | // than one table. |
5040 | 0 | if (strstr(pszLine, "<table")) |
5041 | 0 | { |
5042 | 0 | nCountTable++; |
5043 | 0 | if (nCountTable == 2) |
5044 | 0 | { |
5045 | 0 | *pbGotFileList = false; |
5046 | 0 | return nullptr; |
5047 | 0 | } |
5048 | 0 | } |
5049 | | |
5050 | 0 | if (!bIsHTMLDirList && |
5051 | 0 | ((strstr(pszLine, osTitleIndexOfPrefix.c_str()) && |
5052 | 0 | (strstr(pszLine, osExpectedSuffix.c_str()) || |
5053 | 0 | strstr(pszLine, osExpectedSuffixWithSlash.c_str()))) || |
5054 | 0 | strstr(pszLine, osExpectedStringFTP.c_str()) || |
5055 | 0 | strstr(pszLine, osExpectedStringOldApache.c_str()) || |
5056 | 0 | (!osExpectedString_unescaped.empty() && |
5057 | 0 | strstr(pszLine, osExpectedString_unescaped.c_str())))) |
5058 | 0 | { |
5059 | 0 | bIsHTMLDirList = true; |
5060 | 0 | *pbGotFileList = true; |
5061 | 0 | } |
5062 | | // Subversion HTTP listing |
5063 | | // or Microsoft-IIS/6.0 listing |
5064 | | // (e.g. http://ortho.linz.govt.nz/tifs/2005_06/) */ |
5065 | 0 | else if (!bIsHTMLDirList && strstr(pszLine, "<title>")) |
5066 | 0 | { |
5067 | | // Detect something like: |
5068 | | // <html><head><title>gdal - Revision 20739: |
5069 | | // /trunk/autotest/gcore/data</title></head> */ The annoying thing |
5070 | | // is that what is after ': ' is a subpart of what is after |
5071 | | // http://server/ |
5072 | 0 | char *pszSubDir = strstr(pszLine, ": "); |
5073 | 0 | if (pszSubDir == nullptr) |
5074 | | // or <title>ortho.linz.govt.nz - /tifs/2005_06/</title> |
5075 | 0 | pszSubDir = strstr(pszLine, "- "); |
5076 | 0 | if (pszSubDir) |
5077 | 0 | { |
5078 | 0 | pszSubDir += 2; |
5079 | 0 | char *pszTmp = strstr(pszSubDir, "</title>"); |
5080 | 0 | if (pszTmp) |
5081 | 0 | { |
5082 | 0 | if (pszTmp[-1] == '/') |
5083 | 0 | pszTmp[-1] = 0; |
5084 | 0 | else |
5085 | 0 | *pszTmp = 0; |
5086 | 0 | if (strstr(pszDir, pszSubDir)) |
5087 | 0 | { |
5088 | 0 | bIsHTMLDirList = true; |
5089 | 0 | *pbGotFileList = true; |
5090 | 0 | } |
5091 | 0 | } |
5092 | 0 | } |
5093 | 0 | } |
5094 | 0 | else if (bIsHTMLDirList && |
5095 | 0 | (strstr(pszLine, "<a href=\"") != nullptr || |
5096 | 0 | strstr(pszLine, "<A HREF=\"") != nullptr) && |
5097 | | // Exclude absolute links, like to subversion home. |
5098 | 0 | strstr(pszLine, "<a href=\"http://") == nullptr && |
5099 | | // exclude parent directory. |
5100 | 0 | strstr(pszLine, "Parent Directory") == nullptr) |
5101 | 0 | { |
5102 | 0 | char *beginFilename = strstr(pszLine, "<a href=\""); |
5103 | 0 | if (beginFilename == nullptr) |
5104 | 0 | beginFilename = strstr(pszLine, "<A HREF=\""); |
5105 | 0 | beginFilename += strlen("<a href=\""); |
5106 | 0 | char *endQuote = strchr(beginFilename, '"'); |
5107 | 0 | if (endQuote && !STARTS_WITH(beginFilename, "?C=") && |
5108 | 0 | !STARTS_WITH(beginFilename, "?N=")) |
5109 | 0 | { |
5110 | 0 | struct tm brokendowntime; |
5111 | 0 | memset(&brokendowntime, 0, sizeof(brokendowntime)); |
5112 | 0 | GUIntBig nFileSize = 0; |
5113 | 0 | GIntBig mTime = 0; |
5114 | |
|
5115 | 0 | VSICurlParseHTMLDateTimeFileSize(pszLine, brokendowntime, |
5116 | 0 | nFileSize, mTime); |
5117 | |
|
5118 | 0 | *endQuote = '\0'; |
5119 | | |
5120 | | // Remove trailing slash, that are returned for directories by |
5121 | | // Apache. |
5122 | 0 | bool bIsDirectory = false; |
5123 | 0 | if (endQuote[-1] == '/') |
5124 | 0 | { |
5125 | 0 | bIsDirectory = true; |
5126 | 0 | endQuote[-1] = 0; |
5127 | 0 | } |
5128 | | |
5129 | | // shttpd links include slashes from the root directory. |
5130 | | // Skip them. |
5131 | 0 | while (strchr(beginFilename, '/')) |
5132 | 0 | beginFilename = strchr(beginFilename, '/') + 1; |
5133 | |
|
5134 | 0 | if (strcmp(beginFilename, ".") != 0 && |
5135 | 0 | strcmp(beginFilename, "..") != 0) |
5136 | 0 | { |
5137 | 0 | std::string osCachedFilename = |
5138 | 0 | CPLSPrintf("%s/%s", osURL.c_str(), beginFilename); |
5139 | |
|
5140 | 0 | FileProp cachedFileProp; |
5141 | 0 | GetCachedFileProp(osCachedFilename.c_str(), cachedFileProp); |
5142 | 0 | cachedFileProp.eExists = EXIST_YES; |
5143 | 0 | cachedFileProp.bIsDirectory = bIsDirectory; |
5144 | 0 | if (mTime > 0) |
5145 | 0 | { |
5146 | 0 | cachedFileProp.mTime = static_cast<time_t>(mTime); |
5147 | 0 | } |
5148 | 0 | if (!cachedFileProp.bHasComputedFileSize) |
5149 | 0 | { |
5150 | 0 | cachedFileProp.bHasComputedFileSize = nFileSize > 0; |
5151 | 0 | cachedFileProp.fileSize = nFileSize; |
5152 | 0 | } |
5153 | 0 | SetCachedFileProp(osCachedFilename.c_str(), cachedFileProp); |
5154 | |
|
5155 | 0 | oFileList.AddString(beginFilename); |
5156 | | if constexpr (ENABLE_DEBUG_VERBOSE) |
5157 | | { |
5158 | | CPLDebug( |
5159 | | GetDebugKey(), |
5160 | | "File[%d] = %s, is_dir = %d, size = " CPL_FRMT_GUIB |
5161 | | ", time = %04d/%02d/%02d %02d:%02d:%02d", |
5162 | | nCount, osCachedFilename.c_str(), |
5163 | | bIsDirectory ? 1 : 0, nFileSize, |
5164 | | brokendowntime.tm_year + 1900, |
5165 | | brokendowntime.tm_mon + 1, brokendowntime.tm_mday, |
5166 | | brokendowntime.tm_hour, brokendowntime.tm_min, |
5167 | | brokendowntime.tm_sec); |
5168 | | } |
5169 | 0 | nCount++; |
5170 | |
|
5171 | 0 | if (nMaxFiles > 0 && oFileList.Count() > nMaxFiles) |
5172 | 0 | break; |
5173 | 0 | } |
5174 | 0 | } |
5175 | 0 | } |
5176 | 0 | pszLine = c + 1; |
5177 | 0 | } |
5178 | | |
5179 | 0 | return oFileList.StealList(); |
5180 | 0 | } |
5181 | | |
5182 | | /************************************************************************/ |
5183 | | /* GetStreamingFilename() */ |
5184 | | /************************************************************************/ |
5185 | | |
5186 | | std::string VSICurlFilesystemHandler::GetStreamingFilename( |
5187 | | const std::string &osFilename) const |
5188 | 420k | { |
5189 | 420k | if (STARTS_WITH(osFilename.c_str(), GetFSPrefix().c_str())) |
5190 | 420k | return "/vsicurl_streaming/" + osFilename.substr(GetFSPrefix().size()); |
5191 | 0 | return osFilename; |
5192 | 420k | } |
5193 | | |
5194 | | /************************************************************************/ |
5195 | | /* GetHintForPotentiallyRecognizedPath() */ |
5196 | | /************************************************************************/ |
5197 | | |
5198 | | std::string VSICurlFilesystemHandler::GetHintForPotentiallyRecognizedPath( |
5199 | | const std::string &osPath) |
5200 | 423k | { |
5201 | 423k | if (!StartsWithVSICurlPrefix(osPath.c_str()) && |
5202 | 420k | !cpl::starts_with(osPath, GetStreamingFilename(GetFSPrefix()))) |
5203 | 420k | { |
5204 | 420k | for (const char *pszPrefix : {"http://", "https://"}) |
5205 | 841k | { |
5206 | 841k | if (cpl::starts_with(osPath, pszPrefix)) |
5207 | 373 | { |
5208 | 373 | return GetFSPrefix() + osPath; |
5209 | 373 | } |
5210 | 841k | } |
5211 | 420k | } |
5212 | 423k | return std::string(); |
5213 | 423k | } |
5214 | | |
5215 | | /************************************************************************/ |
5216 | | /* VSICurlGetToken() */ |
5217 | | /************************************************************************/ |
5218 | | |
5219 | | static char *VSICurlGetToken(char *pszCurPtr, char **ppszNextToken) |
5220 | 0 | { |
5221 | 0 | if (pszCurPtr == nullptr) |
5222 | 0 | return nullptr; |
5223 | | |
5224 | 0 | while ((*pszCurPtr) == ' ') |
5225 | 0 | pszCurPtr++; |
5226 | 0 | if (*pszCurPtr == '\0') |
5227 | 0 | return nullptr; |
5228 | | |
5229 | 0 | char *pszToken = pszCurPtr; |
5230 | 0 | while ((*pszCurPtr) != ' ' && (*pszCurPtr) != '\0') |
5231 | 0 | pszCurPtr++; |
5232 | 0 | if (*pszCurPtr == '\0') |
5233 | 0 | { |
5234 | 0 | *ppszNextToken = nullptr; |
5235 | 0 | } |
5236 | 0 | else |
5237 | 0 | { |
5238 | 0 | *pszCurPtr = '\0'; |
5239 | 0 | pszCurPtr++; |
5240 | 0 | while ((*pszCurPtr) == ' ') |
5241 | 0 | pszCurPtr++; |
5242 | 0 | *ppszNextToken = pszCurPtr; |
5243 | 0 | } |
5244 | |
|
5245 | 0 | return pszToken; |
5246 | 0 | } |
5247 | | |
5248 | | /************************************************************************/ |
5249 | | /* VSICurlParseFullFTPLine() */ |
5250 | | /************************************************************************/ |
5251 | | |
5252 | | /* Parse lines like the following ones : |
5253 | | -rw-r--r-- 1 10003 100 430 Jul 04 2008 COPYING |
5254 | | lrwxrwxrwx 1 ftp ftp 28 Jun 14 14:13 MPlayer -> |
5255 | | mirrors/mplayerhq.hu/MPlayer -rw-r--r-- 1 ftp ftp 725614592 May 13 |
5256 | | 20:13 Fedora-15-x86_64-Live-KDE.iso drwxr-xr-x 280 1003 1003 6656 Aug 26 |
5257 | | 04:17 gnu |
5258 | | */ |
5259 | | |
5260 | | static bool VSICurlParseFullFTPLine(char *pszLine, char *&pszFilename, |
5261 | | bool &bSizeValid, GUIntBig &nSize, |
5262 | | bool &bIsDirectory, GIntBig &nUnixTime) |
5263 | 0 | { |
5264 | 0 | char *pszNextToken = pszLine; |
5265 | 0 | char *pszPermissions = VSICurlGetToken(pszNextToken, &pszNextToken); |
5266 | 0 | if (pszPermissions == nullptr || strlen(pszPermissions) != 10) |
5267 | 0 | return false; |
5268 | 0 | bIsDirectory = pszPermissions[0] == 'd'; |
5269 | |
|
5270 | 0 | for (int i = 0; i < 3; i++) |
5271 | 0 | { |
5272 | 0 | if (VSICurlGetToken(pszNextToken, &pszNextToken) == nullptr) |
5273 | 0 | return false; |
5274 | 0 | } |
5275 | | |
5276 | 0 | char *pszSize = VSICurlGetToken(pszNextToken, &pszNextToken); |
5277 | 0 | if (pszSize == nullptr) |
5278 | 0 | return false; |
5279 | | |
5280 | 0 | if (pszPermissions[0] == '-') |
5281 | 0 | { |
5282 | | // Regular file. |
5283 | 0 | bSizeValid = true; |
5284 | 0 | nSize = CPLScanUIntBig(pszSize, static_cast<int>(strlen(pszSize))); |
5285 | 0 | } |
5286 | |
|
5287 | 0 | struct tm brokendowntime; |
5288 | 0 | memset(&brokendowntime, 0, sizeof(brokendowntime)); |
5289 | 0 | bool bBrokenDownTimeValid = true; |
5290 | |
|
5291 | 0 | char *pszMonth = VSICurlGetToken(pszNextToken, &pszNextToken); |
5292 | 0 | if (pszMonth == nullptr || strlen(pszMonth) != 3) |
5293 | 0 | return false; |
5294 | | |
5295 | 0 | int i = 0; // Used after for. |
5296 | 0 | for (; i < 12; i++) |
5297 | 0 | { |
5298 | 0 | if (EQUALN(pszMonth, apszMonths[i], 3)) |
5299 | 0 | break; |
5300 | 0 | } |
5301 | 0 | if (i < 12) |
5302 | 0 | brokendowntime.tm_mon = i; |
5303 | 0 | else |
5304 | 0 | bBrokenDownTimeValid = false; |
5305 | |
|
5306 | 0 | char *pszDay = VSICurlGetToken(pszNextToken, &pszNextToken); |
5307 | 0 | if (pszDay == nullptr || (strlen(pszDay) != 1 && strlen(pszDay) != 2)) |
5308 | 0 | return false; |
5309 | 0 | int nDay = atoi(pszDay); |
5310 | 0 | if (nDay >= 1 && nDay <= 31) |
5311 | 0 | brokendowntime.tm_mday = nDay; |
5312 | 0 | else |
5313 | 0 | bBrokenDownTimeValid = false; |
5314 | |
|
5315 | 0 | char *pszHourOrYear = VSICurlGetToken(pszNextToken, &pszNextToken); |
5316 | 0 | if (pszHourOrYear == nullptr || |
5317 | 0 | (strlen(pszHourOrYear) != 4 && strlen(pszHourOrYear) != 5)) |
5318 | 0 | return false; |
5319 | 0 | if (strlen(pszHourOrYear) == 4) |
5320 | 0 | { |
5321 | 0 | brokendowntime.tm_year = atoi(pszHourOrYear) - 1900; |
5322 | 0 | } |
5323 | 0 | else |
5324 | 0 | { |
5325 | 0 | time_t sTime; |
5326 | 0 | time(&sTime); |
5327 | 0 | struct tm currentBrokendowntime; |
5328 | 0 | CPLUnixTimeToYMDHMS(static_cast<GIntBig>(sTime), |
5329 | 0 | ¤tBrokendowntime); |
5330 | 0 | brokendowntime.tm_year = currentBrokendowntime.tm_year; |
5331 | 0 | brokendowntime.tm_hour = atoi(pszHourOrYear); |
5332 | 0 | brokendowntime.tm_min = atoi(pszHourOrYear + 3); |
5333 | 0 | } |
5334 | |
|
5335 | 0 | if (bBrokenDownTimeValid) |
5336 | 0 | nUnixTime = CPLYMDHMSToUnixTime(&brokendowntime); |
5337 | 0 | else |
5338 | 0 | nUnixTime = 0; |
5339 | |
|
5340 | 0 | if (pszNextToken == nullptr) |
5341 | 0 | return false; |
5342 | | |
5343 | 0 | pszFilename = pszNextToken; |
5344 | |
|
5345 | 0 | char *pszCurPtr = pszFilename; |
5346 | 0 | while (*pszCurPtr != '\0') |
5347 | 0 | { |
5348 | | // In case of a link, stop before the pointed part of the link. |
5349 | 0 | if (pszPermissions[0] == 'l' && STARTS_WITH(pszCurPtr, " -> ")) |
5350 | 0 | { |
5351 | 0 | break; |
5352 | 0 | } |
5353 | 0 | pszCurPtr++; |
5354 | 0 | } |
5355 | 0 | *pszCurPtr = '\0'; |
5356 | |
|
5357 | 0 | return true; |
5358 | 0 | } |
5359 | | |
5360 | | /************************************************************************/ |
5361 | | /* GetURLFromFilename() */ |
5362 | | /************************************************************************/ |
5363 | | |
5364 | | std::string VSICurlFilesystemHandlerBase::GetURLFromFilename( |
5365 | | const std::string &osFilename) const |
5366 | 106k | { |
5367 | 106k | return VSICurlGetURLFromFilename(osFilename.c_str(), nullptr, nullptr, |
5368 | 106k | nullptr, nullptr, nullptr, nullptr, |
5369 | 106k | nullptr, nullptr); |
5370 | 106k | } |
5371 | | |
5372 | | /************************************************************************/ |
5373 | | /* RegisterEmptyDir() */ |
5374 | | /************************************************************************/ |
5375 | | |
5376 | | void VSICurlFilesystemHandlerBase::RegisterEmptyDir( |
5377 | | const std::string &osDirname) |
5378 | 0 | { |
5379 | 0 | CachedDirList cachedDirList; |
5380 | 0 | cachedDirList.bGotFileList = true; |
5381 | 0 | cachedDirList.oFileList.AddString("."); |
5382 | 0 | SetCachedDirList(osDirname.c_str(), cachedDirList); |
5383 | 0 | } |
5384 | | |
5385 | | /************************************************************************/ |
5386 | | /* GetFileList() */ |
5387 | | /************************************************************************/ |
5388 | | |
5389 | | char **VSICurlFilesystemHandlerBase::GetFileList(const char *pszDirname, |
5390 | | int nMaxFiles, |
5391 | | bool *pbGotFileList) |
5392 | 6.62k | { |
5393 | | if constexpr (ENABLE_DEBUG) |
5394 | 6.62k | { |
5395 | 6.62k | CPLDebug(GetDebugKey(), "GetFileList(%s)", pszDirname); |
5396 | 6.62k | } |
5397 | | |
5398 | 6.62k | *pbGotFileList = false; |
5399 | | |
5400 | 6.62k | bool bListDir = true; |
5401 | 6.62k | bool bEmptyDir = false; |
5402 | 6.62k | std::string osURL(VSICurlGetURLFromFilename(pszDirname, nullptr, nullptr, |
5403 | 6.62k | nullptr, &bListDir, &bEmptyDir, |
5404 | 6.62k | nullptr, nullptr, nullptr)); |
5405 | 6.62k | if (bEmptyDir) |
5406 | 15 | { |
5407 | 15 | *pbGotFileList = true; |
5408 | 15 | return CSLAddString(nullptr, "."); |
5409 | 15 | } |
5410 | 6.60k | if (!bListDir) |
5411 | 0 | return nullptr; |
5412 | | |
5413 | | // Deal with publicly visible Azure directories. |
5414 | 6.60k | if (STARTS_WITH(osURL.c_str(), "https://")) |
5415 | 104 | { |
5416 | 104 | const char *pszBlobCore = |
5417 | 104 | strstr(osURL.c_str(), ".blob.core.windows.net/"); |
5418 | 104 | if (pszBlobCore) |
5419 | 8 | { |
5420 | 8 | FileProp cachedFileProp; |
5421 | 8 | GetCachedFileProp(osURL.c_str(), cachedFileProp); |
5422 | 8 | if (cachedFileProp.bIsAzureFolder) |
5423 | 0 | { |
5424 | 0 | const char *pszURLWithoutHTTPS = |
5425 | 0 | osURL.c_str() + strlen("https://"); |
5426 | 0 | const std::string osStorageAccount( |
5427 | 0 | pszURLWithoutHTTPS, pszBlobCore - pszURLWithoutHTTPS); |
5428 | 0 | CPLConfigOptionSetter oSetter1("AZURE_NO_SIGN_REQUEST", "YES", |
5429 | 0 | false); |
5430 | 0 | CPLConfigOptionSetter oSetter2("AZURE_STORAGE_ACCOUNT", |
5431 | 0 | osStorageAccount.c_str(), false); |
5432 | 0 | const std::string osVSIAZ(std::string("/vsiaz/").append( |
5433 | 0 | pszBlobCore + strlen(".blob.core.windows.net/"))); |
5434 | 0 | char **papszFileList = VSIReadDirEx(osVSIAZ.c_str(), nMaxFiles); |
5435 | 0 | if (papszFileList) |
5436 | 0 | { |
5437 | 0 | *pbGotFileList = true; |
5438 | 0 | return papszFileList; |
5439 | 0 | } |
5440 | 0 | } |
5441 | 8 | } |
5442 | 104 | } |
5443 | | |
5444 | | // HACK (optimization in fact) for MBTiles driver. |
5445 | 6.60k | if (strstr(pszDirname, ".tiles.mapbox.com") != nullptr) |
5446 | 303 | return nullptr; |
5447 | | |
5448 | 6.30k | if (STARTS_WITH(osURL.c_str(), "ftp://")) |
5449 | 241 | { |
5450 | 241 | WriteFuncStruct sWriteFuncData; |
5451 | 241 | sWriteFuncData.pBuffer = nullptr; |
5452 | | |
5453 | 241 | std::string osDirname(osURL); |
5454 | 241 | osDirname += '/'; |
5455 | | |
5456 | 241 | char **papszFileList = nullptr; |
5457 | | |
5458 | 241 | CURLM *hCurlMultiHandle = GetCurlMultiHandleFor(osDirname); |
5459 | 241 | CURL *hCurlHandle = curl_easy_init(); |
5460 | | |
5461 | 241 | for (int iTry = 0; iTry < 2; iTry++) |
5462 | 241 | { |
5463 | 241 | struct curl_slist *headers = |
5464 | 241 | VSICurlSetOptions(hCurlHandle, osDirname.c_str(), nullptr); |
5465 | | |
5466 | | // On the first pass, we want to try fetching all the possible |
5467 | | // information (filename, file/directory, size). If that does not |
5468 | | // work, then try again with CURLOPT_DIRLISTONLY set. |
5469 | 241 | if (iTry == 1) |
5470 | 0 | { |
5471 | 0 | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_DIRLISTONLY, 1); |
5472 | 0 | } |
5473 | | |
5474 | 241 | VSICURLInitWriteFuncStruct(&sWriteFuncData, nullptr, nullptr, |
5475 | 241 | nullptr); |
5476 | 241 | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, |
5477 | 241 | &sWriteFuncData); |
5478 | 241 | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, |
5479 | 241 | VSICurlHandleWriteFunc); |
5480 | | |
5481 | 241 | char szCurlErrBuf[CURL_ERROR_SIZE + 1] = {}; |
5482 | 241 | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, |
5483 | 241 | szCurlErrBuf); |
5484 | | |
5485 | 241 | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HTTPHEADER, |
5486 | 241 | headers); |
5487 | | |
5488 | 241 | VSICURLMultiPerform(hCurlMultiHandle, hCurlHandle); |
5489 | | |
5490 | 241 | curl_slist_free_all(headers); |
5491 | | |
5492 | 241 | if (sWriteFuncData.pBuffer == nullptr) |
5493 | 241 | { |
5494 | 241 | curl_easy_cleanup(hCurlHandle); |
5495 | 241 | return nullptr; |
5496 | 241 | } |
5497 | | |
5498 | 0 | char *pszLine = sWriteFuncData.pBuffer; |
5499 | 0 | char *c = nullptr; |
5500 | 0 | int nCount = 0; |
5501 | |
|
5502 | 0 | if (STARTS_WITH_CI(pszLine, "<!DOCTYPE HTML") || |
5503 | 0 | STARTS_WITH_CI(pszLine, "<HTML>")) |
5504 | 0 | { |
5505 | 0 | papszFileList = |
5506 | 0 | ParseHTMLFileList(pszDirname, nMaxFiles, |
5507 | 0 | sWriteFuncData.pBuffer, pbGotFileList); |
5508 | 0 | break; |
5509 | 0 | } |
5510 | 0 | else if (iTry == 0) |
5511 | 0 | { |
5512 | 0 | CPLStringList oFileList; |
5513 | 0 | *pbGotFileList = true; |
5514 | |
|
5515 | 0 | while ((c = strchr(pszLine, '\n')) != nullptr) |
5516 | 0 | { |
5517 | 0 | *c = 0; |
5518 | 0 | if (c - pszLine > 0 && c[-1] == '\r') |
5519 | 0 | c[-1] = 0; |
5520 | |
|
5521 | 0 | char *pszFilename = nullptr; |
5522 | 0 | bool bSizeValid = false; |
5523 | 0 | GUIntBig nFileSize = 0; |
5524 | 0 | bool bIsDirectory = false; |
5525 | 0 | GIntBig mUnixTime = 0; |
5526 | 0 | if (!VSICurlParseFullFTPLine(pszLine, pszFilename, |
5527 | 0 | bSizeValid, nFileSize, |
5528 | 0 | bIsDirectory, mUnixTime)) |
5529 | 0 | break; |
5530 | | |
5531 | 0 | if (strcmp(pszFilename, ".") != 0 && |
5532 | 0 | strcmp(pszFilename, "..") != 0) |
5533 | 0 | { |
5534 | 0 | if (CPLHasUnbalancedPathTraversal(pszFilename)) |
5535 | 0 | { |
5536 | 0 | CPLError(CE_Warning, CPLE_AppDefined, |
5537 | 0 | "Ignoring '%s' that has a path traversal " |
5538 | 0 | "pattern", |
5539 | 0 | pszFilename); |
5540 | 0 | } |
5541 | 0 | else |
5542 | 0 | { |
5543 | 0 | std::string osCachedFilename = |
5544 | 0 | CPLSPrintf("%s/%s", osURL.c_str(), pszFilename); |
5545 | |
|
5546 | 0 | FileProp cachedFileProp; |
5547 | 0 | GetCachedFileProp(osCachedFilename.c_str(), |
5548 | 0 | cachedFileProp); |
5549 | 0 | cachedFileProp.eExists = EXIST_YES; |
5550 | 0 | cachedFileProp.bIsDirectory = bIsDirectory; |
5551 | 0 | cachedFileProp.mTime = |
5552 | 0 | static_cast<time_t>(mUnixTime); |
5553 | 0 | cachedFileProp.bHasComputedFileSize = bSizeValid; |
5554 | 0 | cachedFileProp.fileSize = nFileSize; |
5555 | 0 | SetCachedFileProp(osCachedFilename.c_str(), |
5556 | 0 | cachedFileProp); |
5557 | |
|
5558 | 0 | oFileList.AddString(pszFilename); |
5559 | | if constexpr (ENABLE_DEBUG_VERBOSE) |
5560 | | { |
5561 | | struct tm brokendowntime; |
5562 | | CPLUnixTimeToYMDHMS(mUnixTime, &brokendowntime); |
5563 | | CPLDebug( |
5564 | | GetDebugKey(), |
5565 | | "File[%d] = %s, is_dir = %d, size " |
5566 | | "= " CPL_FRMT_GUIB |
5567 | | ", time = %04d/%02d/%02d %02d:%02d:%02d", |
5568 | | nCount, pszFilename, bIsDirectory ? 1 : 0, |
5569 | | nFileSize, brokendowntime.tm_year + 1900, |
5570 | | brokendowntime.tm_mon + 1, |
5571 | | brokendowntime.tm_mday, |
5572 | | brokendowntime.tm_hour, |
5573 | | brokendowntime.tm_min, |
5574 | | brokendowntime.tm_sec); |
5575 | | } |
5576 | |
|
5577 | 0 | nCount++; |
5578 | |
|
5579 | 0 | if (nMaxFiles > 0 && oFileList.Count() > nMaxFiles) |
5580 | 0 | break; |
5581 | 0 | } |
5582 | 0 | } |
5583 | | |
5584 | 0 | pszLine = c + 1; |
5585 | 0 | } |
5586 | |
|
5587 | 0 | if (c == nullptr) |
5588 | 0 | { |
5589 | 0 | papszFileList = oFileList.StealList(); |
5590 | 0 | break; |
5591 | 0 | } |
5592 | 0 | } |
5593 | 0 | else |
5594 | 0 | { |
5595 | 0 | CPLStringList oFileList; |
5596 | 0 | *pbGotFileList = true; |
5597 | |
|
5598 | 0 | while ((c = strchr(pszLine, '\n')) != nullptr) |
5599 | 0 | { |
5600 | 0 | *c = 0; |
5601 | 0 | if (c - pszLine > 0 && c[-1] == '\r') |
5602 | 0 | c[-1] = 0; |
5603 | |
|
5604 | 0 | if (strcmp(pszLine, ".") != 0 && strcmp(pszLine, "..") != 0) |
5605 | 0 | { |
5606 | 0 | oFileList.AddString(pszLine); |
5607 | | if constexpr (ENABLE_DEBUG_VERBOSE) |
5608 | | { |
5609 | | CPLDebug(GetDebugKey(), "File[%d] = %s", nCount, |
5610 | | pszLine); |
5611 | | } |
5612 | 0 | nCount++; |
5613 | 0 | } |
5614 | |
|
5615 | 0 | pszLine = c + 1; |
5616 | 0 | } |
5617 | |
|
5618 | 0 | papszFileList = oFileList.StealList(); |
5619 | 0 | } |
5620 | | |
5621 | 0 | CPLFree(sWriteFuncData.pBuffer); |
5622 | 0 | sWriteFuncData.pBuffer = nullptr; |
5623 | 0 | } |
5624 | | |
5625 | 0 | CPLFree(sWriteFuncData.pBuffer); |
5626 | 0 | curl_easy_cleanup(hCurlHandle); |
5627 | |
|
5628 | 0 | return papszFileList; |
5629 | 241 | } |
5630 | | |
5631 | | // Try to recognize HTML pages that list the content of a directory. |
5632 | | // Currently this supports what Apache and shttpd can return. |
5633 | 6.06k | else if (STARTS_WITH(osURL.c_str(), "http://") || |
5634 | 5.76k | STARTS_WITH(osURL.c_str(), "https://")) |
5635 | 396 | { |
5636 | 396 | std::string osDirname(std::move(osURL)); |
5637 | 396 | osDirname += '/'; |
5638 | | |
5639 | 396 | CURLM *hCurlMultiHandle = GetCurlMultiHandleFor(osDirname); |
5640 | 396 | CURL *hCurlHandle = curl_easy_init(); |
5641 | | |
5642 | 396 | struct curl_slist *headers = |
5643 | 396 | VSICurlSetOptions(hCurlHandle, osDirname.c_str(), nullptr); |
5644 | | |
5645 | 396 | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, nullptr); |
5646 | | |
5647 | 396 | WriteFuncStruct sWriteFuncData; |
5648 | 396 | VSICURLInitWriteFuncStruct(&sWriteFuncData, nullptr, nullptr, nullptr); |
5649 | 396 | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, |
5650 | 396 | &sWriteFuncData); |
5651 | 396 | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, |
5652 | 396 | VSICurlHandleWriteFunc); |
5653 | | |
5654 | 396 | char szCurlErrBuf[CURL_ERROR_SIZE + 1] = {}; |
5655 | 396 | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, |
5656 | 396 | szCurlErrBuf); |
5657 | | |
5658 | 396 | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HTTPHEADER, headers); |
5659 | | |
5660 | 396 | VSICURLMultiPerform(hCurlMultiHandle, hCurlHandle); |
5661 | | |
5662 | 396 | curl_slist_free_all(headers); |
5663 | | |
5664 | 396 | NetworkStatisticsLogger::LogGET(sWriteFuncData.nSize); |
5665 | | |
5666 | 396 | if (sWriteFuncData.pBuffer == nullptr) |
5667 | 396 | { |
5668 | 396 | curl_easy_cleanup(hCurlHandle); |
5669 | 396 | return nullptr; |
5670 | 396 | } |
5671 | | |
5672 | 0 | char **papszFileList = nullptr; |
5673 | 0 | if (STARTS_WITH_CI(sWriteFuncData.pBuffer, "<?xml") && |
5674 | 0 | strstr(sWriteFuncData.pBuffer, "<ListBucketResult") != nullptr) |
5675 | 0 | { |
5676 | 0 | CPLStringList osFileList; |
5677 | 0 | std::string osBaseURL(pszDirname); |
5678 | 0 | osBaseURL += "/"; |
5679 | 0 | bool bIsTruncated = true; |
5680 | 0 | bool ret = AnalyseS3FileList( |
5681 | 0 | osBaseURL, sWriteFuncData.pBuffer, osFileList, nMaxFiles, |
5682 | 0 | GetS3IgnoredStorageClasses(), bIsTruncated); |
5683 | | // If the list is truncated, then don't report it. |
5684 | 0 | if (ret && !bIsTruncated) |
5685 | 0 | { |
5686 | 0 | if (osFileList.empty()) |
5687 | 0 | { |
5688 | | // To avoid an error to be reported |
5689 | 0 | osFileList.AddString("."); |
5690 | 0 | } |
5691 | 0 | papszFileList = osFileList.StealList(); |
5692 | 0 | *pbGotFileList = true; |
5693 | 0 | } |
5694 | 0 | } |
5695 | 0 | else |
5696 | 0 | { |
5697 | 0 | papszFileList = ParseHTMLFileList( |
5698 | 0 | pszDirname, nMaxFiles, sWriteFuncData.pBuffer, pbGotFileList); |
5699 | 0 | } |
5700 | |
|
5701 | 0 | CPLFree(sWriteFuncData.pBuffer); |
5702 | 0 | curl_easy_cleanup(hCurlHandle); |
5703 | 0 | return papszFileList; |
5704 | 396 | } |
5705 | | |
5706 | 5.66k | return nullptr; |
5707 | 6.30k | } |
5708 | | |
5709 | | /************************************************************************/ |
5710 | | /* GetS3IgnoredStorageClasses() */ |
5711 | | /************************************************************************/ |
5712 | | |
5713 | | std::set<std::string> VSICurlFilesystemHandlerBase::GetS3IgnoredStorageClasses() |
5714 | 0 | { |
5715 | 0 | std::set<std::string> oSetIgnoredStorageClasses; |
5716 | 0 | const char *pszIgnoredStorageClasses = |
5717 | 0 | CPLGetConfigOption("CPL_VSIL_CURL_IGNORE_STORAGE_CLASSES", nullptr); |
5718 | 0 | const char *pszIgnoreGlacierStorage = |
5719 | 0 | CPLGetConfigOption("CPL_VSIL_CURL_IGNORE_GLACIER_STORAGE", nullptr); |
5720 | 0 | CPLStringList aosIgnoredStorageClasses( |
5721 | 0 | CSLTokenizeString2(pszIgnoredStorageClasses ? pszIgnoredStorageClasses |
5722 | 0 | : "GLACIER,DEEP_ARCHIVE", |
5723 | 0 | ",", 0)); |
5724 | 0 | for (int i = 0; i < aosIgnoredStorageClasses.size(); ++i) |
5725 | 0 | oSetIgnoredStorageClasses.insert(aosIgnoredStorageClasses[i]); |
5726 | 0 | if (pszIgnoredStorageClasses == nullptr && |
5727 | 0 | pszIgnoreGlacierStorage != nullptr && |
5728 | 0 | !CPLTestBool(pszIgnoreGlacierStorage)) |
5729 | 0 | { |
5730 | 0 | oSetIgnoredStorageClasses.clear(); |
5731 | 0 | } |
5732 | 0 | return oSetIgnoredStorageClasses; |
5733 | 0 | } |
5734 | | |
5735 | | /************************************************************************/ |
5736 | | /* Stat() */ |
5737 | | /************************************************************************/ |
5738 | | |
5739 | | int VSICurlFilesystemHandlerBase::Stat(const char *pszFilename, |
5740 | | VSIStatBufL *pStatBuf, int nFlags) |
5741 | 172k | { |
5742 | 172k | if (!cpl::starts_with(std::string_view(pszFilename), GetFSPrefix()) && |
5743 | 17.5k | !StartsWithVSICurlPrefix(pszFilename)) |
5744 | 282 | { |
5745 | 282 | return -1; |
5746 | 282 | } |
5747 | | |
5748 | 171k | memset(pStatBuf, 0, sizeof(VSIStatBufL)); |
5749 | | |
5750 | 171k | if ((nFlags & VSI_STAT_CACHE_ONLY) != 0) |
5751 | 0 | { |
5752 | 0 | cpl::FileProp oFileProp; |
5753 | 0 | if (!GetCachedFileProp(GetURLFromFilename(pszFilename).c_str(), |
5754 | 0 | oFileProp) || |
5755 | 0 | oFileProp.eExists != EXIST_YES) |
5756 | 0 | { |
5757 | 0 | return -1; |
5758 | 0 | } |
5759 | 0 | pStatBuf->st_mode = static_cast<unsigned short>(oFileProp.nMode); |
5760 | 0 | pStatBuf->st_mtime = oFileProp.mTime; |
5761 | 0 | pStatBuf->st_size = oFileProp.fileSize; |
5762 | 0 | return 0; |
5763 | 0 | } |
5764 | | |
5765 | 171k | NetworkStatisticsFileSystem oContextFS(GetFSPrefix().c_str()); |
5766 | 171k | NetworkStatisticsAction oContextAction("Stat"); |
5767 | | |
5768 | 171k | const std::string osFilename(pszFilename); |
5769 | | |
5770 | 171k | if (!IsAllowedFilename(pszFilename)) |
5771 | 0 | return -1; |
5772 | | |
5773 | 171k | bool bListDir = true; |
5774 | 171k | bool bEmptyDir = false; |
5775 | 171k | std::string osURL(VSICurlGetURLFromFilename(pszFilename, nullptr, nullptr, |
5776 | 171k | nullptr, &bListDir, &bEmptyDir, |
5777 | 171k | nullptr, nullptr, nullptr)); |
5778 | | |
5779 | 171k | const char *pszOptionVal = VSIGetPathSpecificOption( |
5780 | 171k | pszFilename, "GDAL_DISABLE_READDIR_ON_OPEN", "NO"); |
5781 | 171k | const bool bSkipReadDir = |
5782 | 171k | !bListDir || bEmptyDir || EQUAL(pszOptionVal, "EMPTY_DIR") || |
5783 | 171k | CPLTestBool(pszOptionVal) || !AllowCachedDataFor(pszFilename); |
5784 | | |
5785 | | // Does it look like a FTP directory? |
5786 | 171k | if (STARTS_WITH(osURL.c_str(), "ftp://") && osFilename.back() == '/' && |
5787 | 6 | !bSkipReadDir) |
5788 | 6 | { |
5789 | 6 | char **papszFileList = ReadDirEx(osFilename.c_str(), 0); |
5790 | 6 | if (papszFileList) |
5791 | 0 | { |
5792 | 0 | pStatBuf->st_mode = S_IFDIR; |
5793 | 0 | pStatBuf->st_size = 0; |
5794 | |
|
5795 | 0 | CSLDestroy(papszFileList); |
5796 | |
|
5797 | 0 | return 0; |
5798 | 0 | } |
5799 | 6 | return -1; |
5800 | 6 | } |
5801 | 171k | else if (strchr(CPLGetFilename(osFilename.c_str()), '.') != nullptr && |
5802 | 171k | !STARTS_WITH_CI(CPLGetExtensionSafe(osFilename.c_str()).c_str(), |
5803 | 97.6k | "zip") && |
5804 | 97.6k | strstr(osFilename.c_str(), ".zip.") != nullptr && |
5805 | 44.6k | strstr(osFilename.c_str(), ".ZIP.") != nullptr && !bSkipReadDir) |
5806 | 27.4k | { |
5807 | 27.4k | bool bGotFileList = false; |
5808 | 27.4k | char **papszFileList = ReadDirInternal( |
5809 | 27.4k | CPLGetDirnameSafe(osFilename.c_str()).c_str(), 0, &bGotFileList); |
5810 | 27.4k | const bool bFound = |
5811 | 27.4k | VSICurlIsFileInList(papszFileList, |
5812 | 27.4k | CPLGetFilename(osFilename.c_str())) != -1; |
5813 | 27.4k | CSLDestroy(papszFileList); |
5814 | 27.4k | if (bGotFileList && !bFound) |
5815 | 0 | { |
5816 | 0 | return -1; |
5817 | 0 | } |
5818 | 27.4k | } |
5819 | | |
5820 | 171k | VSICurlHandle *poHandle = CreateFileHandle(osFilename.c_str()); |
5821 | 171k | if (poHandle == nullptr) |
5822 | 32.1k | return -1; |
5823 | | |
5824 | 139k | if (poHandle->IsKnownFileSize() || |
5825 | 22.6k | ((nFlags & VSI_STAT_SIZE_FLAG) && !poHandle->IsDirectory() && |
5826 | 6.10k | CPLTestBool(CPLGetConfigOption("CPL_VSIL_CURL_SLOW_GET_SIZE", "YES")))) |
5827 | 123k | { |
5828 | 123k | pStatBuf->st_size = poHandle->GetFileSize(true); |
5829 | 123k | } |
5830 | | |
5831 | 139k | const int nRet = |
5832 | 139k | poHandle->Exists((nFlags & VSI_STAT_SET_ERROR_FLAG) > 0) ? 0 : -1; |
5833 | 139k | pStatBuf->st_mtime = poHandle->GetMTime(); |
5834 | 139k | pStatBuf->st_mode = static_cast<unsigned short>(poHandle->GetMode()); |
5835 | 139k | if (pStatBuf->st_mode == 0) |
5836 | 139k | pStatBuf->st_mode = poHandle->IsDirectory() ? S_IFDIR : S_IFREG; |
5837 | 139k | delete poHandle; |
5838 | 139k | return nRet; |
5839 | 171k | } |
5840 | | |
5841 | | /************************************************************************/ |
5842 | | /* ReadDirInternal() */ |
5843 | | /************************************************************************/ |
5844 | | |
5845 | | char **VSICurlFilesystemHandlerBase::ReadDirInternal(const char *pszDirname, |
5846 | | int nMaxFiles, |
5847 | | bool *pbGotFileList) |
5848 | 268k | { |
5849 | 268k | std::string osDirname(pszDirname); |
5850 | | |
5851 | | // Replace a/b/../c by a/c |
5852 | 268k | const auto posSlashDotDot = osDirname.find("/.."); |
5853 | 268k | if (posSlashDotDot != std::string::npos && posSlashDotDot >= 1) |
5854 | 27.4k | { |
5855 | 27.4k | const auto posPrecedingSlash = |
5856 | 27.4k | osDirname.find_last_of('/', posSlashDotDot - 1); |
5857 | 27.4k | if (posPrecedingSlash != std::string::npos && posPrecedingSlash >= 1) |
5858 | 25.0k | { |
5859 | 25.0k | osDirname.erase(osDirname.begin() + posPrecedingSlash, |
5860 | 25.0k | osDirname.begin() + posSlashDotDot + strlen("/..")); |
5861 | 25.0k | } |
5862 | 27.4k | } |
5863 | | |
5864 | 268k | std::string osDirnameOri(osDirname); |
5865 | 268k | if (osDirname + "/" == GetFSPrefix()) |
5866 | 1.01k | { |
5867 | 1.01k | osDirname += "/"; |
5868 | 1.01k | } |
5869 | 267k | else if (osDirname != GetFSPrefix()) |
5870 | 260k | { |
5871 | 426k | while (!osDirname.empty() && osDirname.back() == '/') |
5872 | 165k | osDirname.erase(osDirname.size() - 1); |
5873 | 260k | } |
5874 | | |
5875 | 268k | if (osDirname.size() < GetFSPrefix().size()) |
5876 | 2.22k | { |
5877 | 2.22k | if (pbGotFileList) |
5878 | 1.98k | *pbGotFileList = true; |
5879 | 2.22k | return nullptr; |
5880 | 2.22k | } |
5881 | | |
5882 | 265k | NetworkStatisticsFileSystem oContextFS(GetFSPrefix().c_str()); |
5883 | 265k | NetworkStatisticsAction oContextAction("ReadDir"); |
5884 | | |
5885 | 265k | CPLMutexHolder oHolder(&hMutex); |
5886 | | |
5887 | | // If we know the file exists and is not a directory, |
5888 | | // then don't try to list its content. |
5889 | 265k | FileProp cachedFileProp; |
5890 | 265k | if (GetCachedFileProp(GetURLFromFilename(osDirname.c_str()).c_str(), |
5891 | 265k | cachedFileProp) && |
5892 | 115k | cachedFileProp.eExists == EXIST_YES && !cachedFileProp.bIsDirectory) |
5893 | 0 | { |
5894 | 0 | if (osDirnameOri != osDirname) |
5895 | 0 | { |
5896 | 0 | if (GetCachedFileProp((GetURLFromFilename(osDirname) + "/").c_str(), |
5897 | 0 | cachedFileProp) && |
5898 | 0 | cachedFileProp.eExists == EXIST_YES && |
5899 | 0 | !cachedFileProp.bIsDirectory) |
5900 | 0 | { |
5901 | 0 | if (pbGotFileList) |
5902 | 0 | *pbGotFileList = true; |
5903 | 0 | return nullptr; |
5904 | 0 | } |
5905 | 0 | } |
5906 | 0 | else |
5907 | 0 | { |
5908 | 0 | if (pbGotFileList) |
5909 | 0 | *pbGotFileList = true; |
5910 | 0 | return nullptr; |
5911 | 0 | } |
5912 | 0 | } |
5913 | | |
5914 | 265k | CachedDirList cachedDirList; |
5915 | 265k | if (!GetCachedDirList(osDirname.c_str(), cachedDirList)) |
5916 | 13.1k | { |
5917 | 13.1k | cachedDirList.oFileList.Assign(GetFileList(osDirname.c_str(), nMaxFiles, |
5918 | 13.1k | &cachedDirList.bGotFileList), |
5919 | 13.1k | true); |
5920 | 13.1k | if (cachedDirList.bGotFileList && cachedDirList.oFileList.empty()) |
5921 | 0 | { |
5922 | | // To avoid an error to be reported |
5923 | 0 | cachedDirList.oFileList.AddString("."); |
5924 | 0 | } |
5925 | 13.1k | if (nMaxFiles <= 0 || cachedDirList.oFileList.size() < nMaxFiles) |
5926 | 13.1k | { |
5927 | | // Only cache content if we didn't hit the limitation |
5928 | 13.1k | SetCachedDirList(osDirname.c_str(), cachedDirList); |
5929 | 13.1k | } |
5930 | 13.1k | } |
5931 | | |
5932 | 265k | if (pbGotFileList) |
5933 | 150k | *pbGotFileList = cachedDirList.bGotFileList; |
5934 | | |
5935 | 265k | return CSLDuplicate(cachedDirList.oFileList.List()); |
5936 | 265k | } |
5937 | | |
5938 | | /************************************************************************/ |
5939 | | /* InvalidateDirContent() */ |
5940 | | /************************************************************************/ |
5941 | | |
5942 | | void VSICurlFilesystemHandlerBase::InvalidateDirContent( |
5943 | | const std::string &osDirname) |
5944 | 0 | { |
5945 | 0 | CPLMutexHolder oHolder(&hMutex); |
5946 | |
|
5947 | 0 | CachedDirList oCachedDirList; |
5948 | 0 | if (oCacheDirList.tryGet(osDirname, oCachedDirList)) |
5949 | 0 | { |
5950 | 0 | nCachedFilesInDirList -= oCachedDirList.oFileList.size(); |
5951 | 0 | oCacheDirList.remove(osDirname); |
5952 | 0 | } |
5953 | 0 | } |
5954 | | |
5955 | | /************************************************************************/ |
5956 | | /* ReadDirEx() */ |
5957 | | /************************************************************************/ |
5958 | | |
5959 | | char **VSICurlFilesystemHandlerBase::ReadDirEx(const char *pszDirname, |
5960 | | int nMaxFiles) |
5961 | 11.9k | { |
5962 | 11.9k | return ReadDirInternal(pszDirname, nMaxFiles, nullptr); |
5963 | 11.9k | } |
5964 | | |
5965 | | /************************************************************************/ |
5966 | | /* SiblingFiles() */ |
5967 | | /************************************************************************/ |
5968 | | |
5969 | | char **VSICurlFilesystemHandlerBase::SiblingFiles(const char *pszFilename) |
5970 | 12.2k | { |
5971 | | /* Small optimization to avoid unnecessary stat'ing from PAux or ENVI */ |
5972 | | /* drivers. The MBTiles driver needs no companion file. */ |
5973 | 12.2k | if (EQUAL(CPLGetExtensionSafe(pszFilename).c_str(), "mbtiles")) |
5974 | 0 | { |
5975 | 0 | return static_cast<char **>(CPLCalloc(1, sizeof(char *))); |
5976 | 0 | } |
5977 | 12.2k | return nullptr; |
5978 | 12.2k | } |
5979 | | |
5980 | | /************************************************************************/ |
5981 | | /* GetFileMetadata() */ |
5982 | | /************************************************************************/ |
5983 | | |
5984 | | char **VSICurlFilesystemHandlerBase::GetFileMetadata(const char *pszFilename, |
5985 | | const char *pszDomain, |
5986 | | CSLConstList) |
5987 | 0 | { |
5988 | 0 | if (pszDomain == nullptr || !EQUAL(pszDomain, "HEADERS")) |
5989 | 0 | return nullptr; |
5990 | 0 | std::unique_ptr<VSICurlHandle> poHandle(CreateFileHandle(pszFilename)); |
5991 | 0 | if (poHandle == nullptr) |
5992 | 0 | return nullptr; |
5993 | | |
5994 | 0 | NetworkStatisticsFileSystem oContextFS(GetFSPrefix().c_str()); |
5995 | 0 | NetworkStatisticsAction oContextAction("GetFileMetadata"); |
5996 | |
|
5997 | 0 | poHandle->GetFileSizeOrHeaders(true, true); |
5998 | 0 | return CSLDuplicate(poHandle->GetHeaders().List()); |
5999 | 0 | } |
6000 | | |
6001 | | /************************************************************************/ |
6002 | | /* VSIAppendWriteHandle() */ |
6003 | | /************************************************************************/ |
6004 | | |
6005 | | VSIAppendWriteHandle::VSIAppendWriteHandle(VSICurlFilesystemHandlerBase *poFS, |
6006 | | const char *pszFSPrefix, |
6007 | | const char *pszFilename, |
6008 | | int nChunkSize) |
6009 | 0 | : m_poFS(poFS), m_osFSPrefix(pszFSPrefix), m_osFilename(pszFilename), |
6010 | 0 | m_oRetryParameters(CPLStringList(CPLHTTPGetOptionsFromEnv(pszFilename))), |
6011 | 0 | m_nBufferSize(nChunkSize) |
6012 | 0 | { |
6013 | 0 | m_pabyBuffer = static_cast<GByte *>(VSIMalloc(m_nBufferSize)); |
6014 | 0 | if (m_pabyBuffer == nullptr) |
6015 | 0 | { |
6016 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
6017 | 0 | "Cannot allocate working buffer for %s writing", |
6018 | 0 | m_osFSPrefix.c_str()); |
6019 | 0 | } |
6020 | 0 | } |
6021 | | |
6022 | | /************************************************************************/ |
6023 | | /* ~VSIAppendWriteHandle() */ |
6024 | | /************************************************************************/ |
6025 | | |
6026 | | VSIAppendWriteHandle::~VSIAppendWriteHandle() |
6027 | 0 | { |
6028 | | /* WARNING: implementation should call Close() themselves */ |
6029 | | /* cannot be done safely from here, since Send() can be called. */ |
6030 | 0 | CPLFree(m_pabyBuffer); |
6031 | 0 | } |
6032 | | |
6033 | | /************************************************************************/ |
6034 | | /* Seek() */ |
6035 | | /************************************************************************/ |
6036 | | |
6037 | | int VSIAppendWriteHandle::Seek(vsi_l_offset nOffset, int nWhence) |
6038 | 0 | { |
6039 | 0 | if (!((nWhence == SEEK_SET && nOffset == m_nCurOffset) || |
6040 | 0 | (nWhence == SEEK_CUR && nOffset == 0) || |
6041 | 0 | (nWhence == SEEK_END && nOffset == 0))) |
6042 | 0 | { |
6043 | 0 | CPLError(CE_Failure, CPLE_NotSupported, |
6044 | 0 | "Seek not supported on writable %s files", |
6045 | 0 | m_osFSPrefix.c_str()); |
6046 | 0 | m_bError = true; |
6047 | 0 | return -1; |
6048 | 0 | } |
6049 | 0 | return 0; |
6050 | 0 | } |
6051 | | |
6052 | | /************************************************************************/ |
6053 | | /* Tell() */ |
6054 | | /************************************************************************/ |
6055 | | |
6056 | | vsi_l_offset VSIAppendWriteHandle::Tell() |
6057 | 0 | { |
6058 | 0 | return m_nCurOffset; |
6059 | 0 | } |
6060 | | |
6061 | | /************************************************************************/ |
6062 | | /* Read() */ |
6063 | | /************************************************************************/ |
6064 | | |
6065 | | size_t VSIAppendWriteHandle::Read(void * /* pBuffer */, size_t /* nBytes */) |
6066 | 0 | { |
6067 | 0 | CPLError(CE_Failure, CPLE_NotSupported, |
6068 | 0 | "Read not supported on writable %s files", m_osFSPrefix.c_str()); |
6069 | 0 | m_bError = true; |
6070 | 0 | return 0; |
6071 | 0 | } |
6072 | | |
6073 | | /************************************************************************/ |
6074 | | /* ReadCallBackBuffer() */ |
6075 | | /************************************************************************/ |
6076 | | |
6077 | | size_t VSIAppendWriteHandle::ReadCallBackBuffer(char *buffer, size_t size, |
6078 | | size_t nitems, void *instream) |
6079 | 0 | { |
6080 | 0 | VSIAppendWriteHandle *poThis = |
6081 | 0 | static_cast<VSIAppendWriteHandle *>(instream); |
6082 | 0 | const int nSizeMax = static_cast<int>(size * nitems); |
6083 | 0 | const int nSizeToWrite = std::min( |
6084 | 0 | nSizeMax, poThis->m_nBufferOff - poThis->m_nBufferOffReadCallback); |
6085 | 0 | memcpy(buffer, poThis->m_pabyBuffer + poThis->m_nBufferOffReadCallback, |
6086 | 0 | nSizeToWrite); |
6087 | 0 | poThis->m_nBufferOffReadCallback += nSizeToWrite; |
6088 | 0 | return nSizeToWrite; |
6089 | 0 | } |
6090 | | |
6091 | | /************************************************************************/ |
6092 | | /* Write() */ |
6093 | | /************************************************************************/ |
6094 | | |
6095 | | size_t VSIAppendWriteHandle::Write(const void *pBuffer, size_t nBytes) |
6096 | 0 | { |
6097 | 0 | if (m_bError) |
6098 | 0 | return 0; |
6099 | | |
6100 | 0 | size_t nBytesToWrite = nBytes; |
6101 | 0 | if (nBytesToWrite == 0) |
6102 | 0 | return 0; |
6103 | | |
6104 | 0 | const GByte *pabySrcBuffer = reinterpret_cast<const GByte *>(pBuffer); |
6105 | 0 | while (nBytesToWrite > 0) |
6106 | 0 | { |
6107 | 0 | if (m_nBufferOff == m_nBufferSize) |
6108 | 0 | { |
6109 | 0 | if (!Send(false)) |
6110 | 0 | { |
6111 | 0 | m_bError = true; |
6112 | 0 | return 0; |
6113 | 0 | } |
6114 | 0 | m_nBufferOff = 0; |
6115 | 0 | } |
6116 | | |
6117 | 0 | const int nToWriteInBuffer = static_cast<int>(std::min( |
6118 | 0 | static_cast<size_t>(m_nBufferSize - m_nBufferOff), nBytesToWrite)); |
6119 | 0 | memcpy(m_pabyBuffer + m_nBufferOff, pabySrcBuffer, nToWriteInBuffer); |
6120 | 0 | pabySrcBuffer += nToWriteInBuffer; |
6121 | 0 | m_nBufferOff += nToWriteInBuffer; |
6122 | 0 | m_nCurOffset += nToWriteInBuffer; |
6123 | 0 | nBytesToWrite -= nToWriteInBuffer; |
6124 | 0 | } |
6125 | 0 | return nBytes; |
6126 | 0 | } |
6127 | | |
6128 | | /************************************************************************/ |
6129 | | /* Close() */ |
6130 | | /************************************************************************/ |
6131 | | |
6132 | | int VSIAppendWriteHandle::Close() |
6133 | 0 | { |
6134 | 0 | int nRet = 0; |
6135 | 0 | if (!m_bClosed) |
6136 | 0 | { |
6137 | 0 | m_bClosed = true; |
6138 | 0 | if (!m_bError && !Send(true)) |
6139 | 0 | nRet = -1; |
6140 | 0 | } |
6141 | 0 | return nRet; |
6142 | 0 | } |
6143 | | |
6144 | | /************************************************************************/ |
6145 | | /* CurlRequestHelper() */ |
6146 | | /************************************************************************/ |
6147 | | |
6148 | | CurlRequestHelper::CurlRequestHelper() |
6149 | 2.82k | { |
6150 | 2.82k | VSICURLInitWriteFuncStruct(&sWriteFuncData, nullptr, nullptr, nullptr); |
6151 | 2.82k | VSICURLInitWriteFuncStruct(&sWriteFuncHeaderData, nullptr, nullptr, |
6152 | 2.82k | nullptr); |
6153 | 2.82k | } |
6154 | | |
6155 | | /************************************************************************/ |
6156 | | /* ~CurlRequestHelper() */ |
6157 | | /************************************************************************/ |
6158 | | |
6159 | | CurlRequestHelper::~CurlRequestHelper() |
6160 | 2.82k | { |
6161 | 2.82k | CPLFree(sWriteFuncData.pBuffer); |
6162 | 2.82k | CPLFree(sWriteFuncHeaderData.pBuffer); |
6163 | 2.82k | } |
6164 | | |
6165 | | /************************************************************************/ |
6166 | | /* perform() */ |
6167 | | /************************************************************************/ |
6168 | | |
6169 | | long CurlRequestHelper::perform(CURL *hCurlHandle, struct curl_slist *headers, |
6170 | | VSICurlFilesystemHandlerBase *poFS, |
6171 | | IVSIS3LikeHandleHelper *poS3HandleHelper) |
6172 | 2.82k | { |
6173 | 2.82k | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HTTPHEADER, headers); |
6174 | | |
6175 | 2.82k | poS3HandleHelper->ResetQueryParameters(); |
6176 | | |
6177 | 2.82k | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData); |
6178 | 2.82k | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, |
6179 | 2.82k | VSICurlHandleWriteFunc); |
6180 | | |
6181 | 2.82k | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA, |
6182 | 2.82k | &sWriteFuncHeaderData); |
6183 | 2.82k | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION, |
6184 | 2.82k | VSICurlHandleWriteFunc); |
6185 | | |
6186 | 2.82k | szCurlErrBuf[0] = '\0'; |
6187 | 2.82k | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, szCurlErrBuf); |
6188 | | |
6189 | 2.82k | VSICURLMultiPerform(poFS->GetCurlMultiHandleFor(poS3HandleHelper->GetURL()), |
6190 | 2.82k | hCurlHandle); |
6191 | | |
6192 | 2.82k | VSICURLResetHeaderAndWriterFunctions(hCurlHandle); |
6193 | | |
6194 | 2.82k | curl_slist_free_all(headers); |
6195 | | |
6196 | 2.82k | long response_code = 0; |
6197 | 2.82k | curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE, &response_code); |
6198 | 2.82k | return response_code; |
6199 | 2.82k | } |
6200 | | |
6201 | | /************************************************************************/ |
6202 | | /* NetworkStatisticsLogger */ |
6203 | | /************************************************************************/ |
6204 | | |
6205 | | // Global variable |
6206 | | NetworkStatisticsLogger NetworkStatisticsLogger::gInstance{}; |
6207 | | int NetworkStatisticsLogger::gnEnabled = -1; // unknown state |
6208 | | |
6209 | | static void ShowNetworkStats() |
6210 | 0 | { |
6211 | 0 | printf("Network statistics:\n%s\n", // ok |
6212 | 0 | NetworkStatisticsLogger::GetReportAsSerializedJSON().c_str()); |
6213 | 0 | } |
6214 | | |
6215 | | void NetworkStatisticsLogger::ReadEnabled() |
6216 | 17 | { |
6217 | 17 | const bool bShowNetworkStats = |
6218 | 17 | CPLTestBool(CPLGetConfigOption("CPL_VSIL_SHOW_NETWORK_STATS", "NO")); |
6219 | 17 | gnEnabled = |
6220 | 17 | (bShowNetworkStats || CPLTestBool(CPLGetConfigOption( |
6221 | 17 | "CPL_VSIL_NETWORK_STATS_ENABLED", "NO"))) |
6222 | 17 | ? TRUE |
6223 | 17 | : FALSE; |
6224 | 17 | if (bShowNetworkStats) |
6225 | 0 | { |
6226 | 0 | static bool bRegistered = false; |
6227 | 0 | if (!bRegistered) |
6228 | 0 | { |
6229 | 0 | bRegistered = true; |
6230 | 0 | atexit(ShowNetworkStats); |
6231 | 0 | } |
6232 | 0 | } |
6233 | 17 | } |
6234 | | |
6235 | | void NetworkStatisticsLogger::EnterFileSystem(const char *pszName) |
6236 | 730k | { |
6237 | 730k | if (!IsEnabled()) |
6238 | 730k | return; |
6239 | 0 | std::lock_guard<std::mutex> oLock(gInstance.m_mutex); |
6240 | 0 | gInstance.m_mapThreadIdToContextPath[CPLGetPID()].push_back( |
6241 | 0 | ContextPathItem(ContextPathType::FILESYSTEM, pszName)); |
6242 | 0 | } |
6243 | | |
6244 | | void NetworkStatisticsLogger::LeaveFileSystem() |
6245 | 730k | { |
6246 | 730k | if (!IsEnabled()) |
6247 | 730k | return; |
6248 | 0 | std::lock_guard<std::mutex> oLock(gInstance.m_mutex); |
6249 | 0 | gInstance.m_mapThreadIdToContextPath[CPLGetPID()].pop_back(); |
6250 | 0 | } |
6251 | | |
6252 | | void NetworkStatisticsLogger::EnterFile(const char *pszName) |
6253 | 179k | { |
6254 | 179k | if (!IsEnabled()) |
6255 | 179k | return; |
6256 | 0 | std::lock_guard<std::mutex> oLock(gInstance.m_mutex); |
6257 | 0 | gInstance.m_mapThreadIdToContextPath[CPLGetPID()].push_back( |
6258 | 0 | ContextPathItem(ContextPathType::FILE, pszName)); |
6259 | 0 | } |
6260 | | |
6261 | | void NetworkStatisticsLogger::LeaveFile() |
6262 | 179k | { |
6263 | 179k | if (!IsEnabled()) |
6264 | 179k | return; |
6265 | 0 | std::lock_guard<std::mutex> oLock(gInstance.m_mutex); |
6266 | 0 | gInstance.m_mapThreadIdToContextPath[CPLGetPID()].pop_back(); |
6267 | 0 | } |
6268 | | |
6269 | | void NetworkStatisticsLogger::EnterAction(const char *pszName) |
6270 | 730k | { |
6271 | 730k | if (!IsEnabled()) |
6272 | 730k | return; |
6273 | 0 | std::lock_guard<std::mutex> oLock(gInstance.m_mutex); |
6274 | 0 | gInstance.m_mapThreadIdToContextPath[CPLGetPID()].push_back( |
6275 | 0 | ContextPathItem(ContextPathType::ACTION, pszName)); |
6276 | 0 | } |
6277 | | |
6278 | | void NetworkStatisticsLogger::LeaveAction() |
6279 | 730k | { |
6280 | 730k | if (!IsEnabled()) |
6281 | 730k | return; |
6282 | 0 | std::lock_guard<std::mutex> oLock(gInstance.m_mutex); |
6283 | 0 | gInstance.m_mapThreadIdToContextPath[CPLGetPID()].pop_back(); |
6284 | 0 | } |
6285 | | |
6286 | | std::vector<NetworkStatisticsLogger::Counters *> |
6287 | | NetworkStatisticsLogger::GetCountersForContext() |
6288 | 0 | { |
6289 | 0 | std::vector<Counters *> v; |
6290 | 0 | const auto &contextPath = gInstance.m_mapThreadIdToContextPath[CPLGetPID()]; |
6291 | |
|
6292 | 0 | Stats *curStats = &m_stats; |
6293 | 0 | v.push_back(&(curStats->counters)); |
6294 | |
|
6295 | 0 | bool inFileSystem = false; |
6296 | 0 | bool inFile = false; |
6297 | 0 | bool inAction = false; |
6298 | 0 | for (const auto &item : contextPath) |
6299 | 0 | { |
6300 | 0 | if (item.eType == ContextPathType::FILESYSTEM) |
6301 | 0 | { |
6302 | 0 | if (inFileSystem) |
6303 | 0 | continue; |
6304 | 0 | inFileSystem = true; |
6305 | 0 | } |
6306 | 0 | else if (item.eType == ContextPathType::FILE) |
6307 | 0 | { |
6308 | 0 | if (inFile) |
6309 | 0 | continue; |
6310 | 0 | inFile = true; |
6311 | 0 | } |
6312 | 0 | else if (item.eType == ContextPathType::ACTION) |
6313 | 0 | { |
6314 | 0 | if (inAction) |
6315 | 0 | continue; |
6316 | 0 | inAction = true; |
6317 | 0 | } |
6318 | | |
6319 | 0 | curStats = &(curStats->children[item]); |
6320 | 0 | v.push_back(&(curStats->counters)); |
6321 | 0 | } |
6322 | |
|
6323 | 0 | return v; |
6324 | 0 | } |
6325 | | |
6326 | | void NetworkStatisticsLogger::LogGET(size_t nDownloadedBytes) |
6327 | 32.3k | { |
6328 | 32.3k | if (!IsEnabled()) |
6329 | 32.3k | return; |
6330 | 0 | std::lock_guard<std::mutex> oLock(gInstance.m_mutex); |
6331 | 0 | for (auto counters : gInstance.GetCountersForContext()) |
6332 | 0 | { |
6333 | 0 | counters->nGET++; |
6334 | 0 | counters->nGETDownloadedBytes += nDownloadedBytes; |
6335 | 0 | } |
6336 | 0 | } |
6337 | | |
6338 | | void NetworkStatisticsLogger::LogPUT(size_t nUploadedBytes) |
6339 | 0 | { |
6340 | 0 | if (!IsEnabled()) |
6341 | 0 | return; |
6342 | 0 | std::lock_guard<std::mutex> oLock(gInstance.m_mutex); |
6343 | 0 | for (auto counters : gInstance.GetCountersForContext()) |
6344 | 0 | { |
6345 | 0 | counters->nPUT++; |
6346 | 0 | counters->nPUTUploadedBytes += nUploadedBytes; |
6347 | 0 | } |
6348 | 0 | } |
6349 | | |
6350 | | void NetworkStatisticsLogger::LogHEAD() |
6351 | 227k | { |
6352 | 227k | if (!IsEnabled()) |
6353 | 227k | return; |
6354 | 0 | std::lock_guard<std::mutex> oLock(gInstance.m_mutex); |
6355 | 0 | for (auto counters : gInstance.GetCountersForContext()) |
6356 | 0 | { |
6357 | 0 | counters->nHEAD++; |
6358 | 0 | } |
6359 | 0 | } |
6360 | | |
6361 | | void NetworkStatisticsLogger::LogPOST(size_t nUploadedBytes, |
6362 | | size_t nDownloadedBytes) |
6363 | 0 | { |
6364 | 0 | if (!IsEnabled()) |
6365 | 0 | return; |
6366 | 0 | std::lock_guard<std::mutex> oLock(gInstance.m_mutex); |
6367 | 0 | for (auto counters : gInstance.GetCountersForContext()) |
6368 | 0 | { |
6369 | 0 | counters->nPOST++; |
6370 | 0 | counters->nPOSTUploadedBytes += nUploadedBytes; |
6371 | 0 | counters->nPOSTDownloadedBytes += nDownloadedBytes; |
6372 | 0 | } |
6373 | 0 | } |
6374 | | |
6375 | | void NetworkStatisticsLogger::LogDELETE() |
6376 | 0 | { |
6377 | 0 | if (!IsEnabled()) |
6378 | 0 | return; |
6379 | 0 | std::lock_guard<std::mutex> oLock(gInstance.m_mutex); |
6380 | 0 | for (auto counters : gInstance.GetCountersForContext()) |
6381 | 0 | { |
6382 | 0 | counters->nDELETE++; |
6383 | 0 | } |
6384 | 0 | } |
6385 | | |
6386 | | void NetworkStatisticsLogger::Reset() |
6387 | 0 | { |
6388 | 0 | std::lock_guard<std::mutex> oLock(gInstance.m_mutex); |
6389 | 0 | gInstance.m_stats = Stats(); |
6390 | 0 | gnEnabled = -1; |
6391 | 0 | } |
6392 | | |
6393 | | void NetworkStatisticsLogger::Stats::AsJSON(CPLJSONObject &oJSON) const |
6394 | 0 | { |
6395 | 0 | CPLJSONObject oMethods; |
6396 | 0 | if (counters.nHEAD) |
6397 | 0 | oMethods.Add("HEAD/count", counters.nHEAD); |
6398 | 0 | if (counters.nGET) |
6399 | 0 | oMethods.Add("GET/count", counters.nGET); |
6400 | 0 | if (counters.nGETDownloadedBytes) |
6401 | 0 | oMethods.Add("GET/downloaded_bytes", counters.nGETDownloadedBytes); |
6402 | 0 | if (counters.nPUT) |
6403 | 0 | oMethods.Add("PUT/count", counters.nPUT); |
6404 | 0 | if (counters.nPUTUploadedBytes) |
6405 | 0 | oMethods.Add("PUT/uploaded_bytes", counters.nPUTUploadedBytes); |
6406 | 0 | if (counters.nPOST) |
6407 | 0 | oMethods.Add("POST/count", counters.nPOST); |
6408 | 0 | if (counters.nPOSTUploadedBytes) |
6409 | 0 | oMethods.Add("POST/uploaded_bytes", counters.nPOSTUploadedBytes); |
6410 | 0 | if (counters.nPOSTDownloadedBytes) |
6411 | 0 | oMethods.Add("POST/downloaded_bytes", counters.nPOSTDownloadedBytes); |
6412 | 0 | if (counters.nDELETE) |
6413 | 0 | oMethods.Add("DELETE/count", counters.nDELETE); |
6414 | 0 | oJSON.Add("methods", oMethods); |
6415 | 0 | CPLJSONObject oFiles; |
6416 | 0 | bool bFilesAdded = false; |
6417 | 0 | for (const auto &kv : children) |
6418 | 0 | { |
6419 | 0 | CPLJSONObject childJSON; |
6420 | 0 | kv.second.AsJSON(childJSON); |
6421 | 0 | if (kv.first.eType == ContextPathType::FILESYSTEM) |
6422 | 0 | { |
6423 | 0 | std::string osName(kv.first.osName); |
6424 | 0 | if (!osName.empty() && osName[0] == '/') |
6425 | 0 | osName = osName.substr(1); |
6426 | 0 | if (!osName.empty() && osName.back() == '/') |
6427 | 0 | osName.pop_back(); |
6428 | 0 | oJSON.Add(("handlers/" + osName).c_str(), childJSON); |
6429 | 0 | } |
6430 | 0 | else if (kv.first.eType == ContextPathType::FILE) |
6431 | 0 | { |
6432 | 0 | if (!bFilesAdded) |
6433 | 0 | { |
6434 | 0 | bFilesAdded = true; |
6435 | 0 | oJSON.Add("files", oFiles); |
6436 | 0 | } |
6437 | 0 | oFiles.AddNoSplitName(kv.first.osName.c_str(), childJSON); |
6438 | 0 | } |
6439 | 0 | else if (kv.first.eType == ContextPathType::ACTION) |
6440 | 0 | { |
6441 | 0 | oJSON.Add(("actions/" + kv.first.osName).c_str(), childJSON); |
6442 | 0 | } |
6443 | 0 | } |
6444 | 0 | } |
6445 | | |
6446 | | std::string NetworkStatisticsLogger::GetReportAsSerializedJSON() |
6447 | 0 | { |
6448 | 0 | std::lock_guard<std::mutex> oLock(gInstance.m_mutex); |
6449 | |
|
6450 | 0 | CPLJSONObject oJSON; |
6451 | 0 | gInstance.m_stats.AsJSON(oJSON); |
6452 | 0 | return oJSON.Format(CPLJSONObject::PrettyFormat::Pretty); |
6453 | 0 | } |
6454 | | |
6455 | | } /* end of namespace cpl */ |
6456 | | |
6457 | | /************************************************************************/ |
6458 | | /* VSICurlParseUnixPermissions() */ |
6459 | | /************************************************************************/ |
6460 | | |
6461 | | int VSICurlParseUnixPermissions(const char *pszPermissions) |
6462 | 0 | { |
6463 | 0 | if (strlen(pszPermissions) != 9) |
6464 | 0 | return 0; |
6465 | 0 | int nMode = 0; |
6466 | 0 | if (pszPermissions[0] == 'r') |
6467 | 0 | nMode |= S_IRUSR; |
6468 | 0 | if (pszPermissions[1] == 'w') |
6469 | 0 | nMode |= S_IWUSR; |
6470 | 0 | if (pszPermissions[2] == 'x') |
6471 | 0 | nMode |= S_IXUSR; |
6472 | 0 | if (pszPermissions[3] == 'r') |
6473 | 0 | nMode |= S_IRGRP; |
6474 | 0 | if (pszPermissions[4] == 'w') |
6475 | 0 | nMode |= S_IWGRP; |
6476 | 0 | if (pszPermissions[5] == 'x') |
6477 | 0 | nMode |= S_IXGRP; |
6478 | 0 | if (pszPermissions[6] == 'r') |
6479 | 0 | nMode |= S_IROTH; |
6480 | 0 | if (pszPermissions[7] == 'w') |
6481 | 0 | nMode |= S_IWOTH; |
6482 | 0 | if (pszPermissions[8] == 'x') |
6483 | 0 | nMode |= S_IXOTH; |
6484 | 0 | return nMode; |
6485 | 0 | } |
6486 | | |
6487 | | /************************************************************************/ |
6488 | | /* Cache of file properties. */ |
6489 | | /************************************************************************/ |
6490 | | |
6491 | | static std::mutex oCacheFilePropMutex; |
6492 | | static lru11::Cache<std::string, cpl::FileProp> *poCacheFileProp = nullptr; |
6493 | | |
6494 | | /************************************************************************/ |
6495 | | /* VSICURLGetCachedFileProp() */ |
6496 | | /************************************************************************/ |
6497 | | |
6498 | | bool VSICURLGetCachedFileProp(const char *pszURL, cpl::FileProp &oFileProp) |
6499 | 960k | { |
6500 | 960k | std::lock_guard<std::mutex> oLock(oCacheFilePropMutex); |
6501 | 960k | return poCacheFileProp != nullptr && |
6502 | 957k | poCacheFileProp->tryGet(std::string(pszURL), oFileProp) && |
6503 | | // Let a chance to use new auth parameters |
6504 | 352k | !(oFileProp.eExists == cpl::EXIST_NO && |
6505 | 345k | gnGenerationAuthParameters != oFileProp.nGenerationAuthParameters); |
6506 | 960k | } |
6507 | | |
6508 | | /************************************************************************/ |
6509 | | /* VSICURLSetCachedFileProp() */ |
6510 | | /************************************************************************/ |
6511 | | |
6512 | | void VSICURLSetCachedFileProp(const char *pszURL, cpl::FileProp &oFileProp) |
6513 | 33.8k | { |
6514 | 33.8k | std::lock_guard<std::mutex> oLock(oCacheFilePropMutex); |
6515 | 33.8k | if (poCacheFileProp == nullptr) |
6516 | 17 | poCacheFileProp = |
6517 | 17 | new lru11::Cache<std::string, cpl::FileProp>(100 * 1024); |
6518 | 33.8k | oFileProp.nGenerationAuthParameters = gnGenerationAuthParameters; |
6519 | 33.8k | poCacheFileProp->insert(std::string(pszURL), oFileProp); |
6520 | 33.8k | } |
6521 | | |
6522 | | /************************************************************************/ |
6523 | | /* VSICURLInvalidateCachedFileProp() */ |
6524 | | /************************************************************************/ |
6525 | | |
6526 | | void VSICURLInvalidateCachedFileProp(const char *pszURL) |
6527 | 0 | { |
6528 | 0 | std::lock_guard<std::mutex> oLock(oCacheFilePropMutex); |
6529 | 0 | if (poCacheFileProp != nullptr) |
6530 | 0 | poCacheFileProp->remove(std::string(pszURL)); |
6531 | 0 | } |
6532 | | |
6533 | | /************************************************************************/ |
6534 | | /* VSICURLInvalidateCachedFilePropPrefix() */ |
6535 | | /************************************************************************/ |
6536 | | |
6537 | | void VSICURLInvalidateCachedFilePropPrefix(const char *pszURL) |
6538 | 0 | { |
6539 | 0 | std::lock_guard<std::mutex> oLock(oCacheFilePropMutex); |
6540 | 0 | if (poCacheFileProp != nullptr) |
6541 | 0 | { |
6542 | 0 | std::list<std::string> keysToRemove; |
6543 | 0 | const size_t nURLSize = strlen(pszURL); |
6544 | 0 | auto lambda = |
6545 | 0 | [&keysToRemove, &pszURL, nURLSize]( |
6546 | 0 | const lru11::KeyValuePair<std::string, cpl::FileProp> &kv) |
6547 | 0 | { |
6548 | 0 | if (strncmp(kv.key.c_str(), pszURL, nURLSize) == 0) |
6549 | 0 | keysToRemove.push_back(kv.key); |
6550 | 0 | }; |
6551 | 0 | poCacheFileProp->cwalk(lambda); |
6552 | 0 | for (const auto &key : keysToRemove) |
6553 | 0 | poCacheFileProp->remove(key); |
6554 | 0 | } |
6555 | 0 | } |
6556 | | |
6557 | | /************************************************************************/ |
6558 | | /* VSICURLDestroyCacheFileProp() */ |
6559 | | /************************************************************************/ |
6560 | | |
6561 | | void VSICURLDestroyCacheFileProp() |
6562 | 0 | { |
6563 | 0 | std::lock_guard<std::mutex> oLock(oCacheFilePropMutex); |
6564 | 0 | delete poCacheFileProp; |
6565 | 0 | poCacheFileProp = nullptr; |
6566 | 0 | } |
6567 | | |
6568 | | /************************************************************************/ |
6569 | | /* VSICURLMultiCleanup() */ |
6570 | | /************************************************************************/ |
6571 | | |
6572 | | void VSICURLMultiCleanup(CURLM *hCurlMultiHandle) |
6573 | 45 | { |
6574 | | #if defined(CURL_AT_LEAST_VERSION) && defined(_WIN32) |
6575 | | // Since curl 8.20.0, auxiliary threads are used for DNS resolution |
6576 | | // Trying to join them when detaching the DLL results in a hang. |
6577 | | // See https://github.com/curl/curl/issues/21466#issuecomment-4372138595 |
6578 | | #if CURL_AT_LEAST_VERSION(8, 20, 0) |
6579 | | if (GDALIsInGlobalDestructorFromDLLMain()) |
6580 | | curl_multi_setopt(hCurlMultiHandle, CURLMOPT_QUICK_EXIT, 1L); |
6581 | | #endif |
6582 | | #endif |
6583 | | |
6584 | 45 | void *old_handler = CPLHTTPIgnoreSigPipe(); |
6585 | 45 | curl_multi_cleanup(hCurlMultiHandle); |
6586 | 45 | CPLHTTPRestoreSigPipeHandler(old_handler); |
6587 | 45 | } |
6588 | | |
6589 | | /************************************************************************/ |
6590 | | /* VSICurlInstallReadCbk() */ |
6591 | | /************************************************************************/ |
6592 | | |
6593 | | int VSICurlInstallReadCbk(VSILFILE *fp, VSICurlReadCbkFunc pfnReadCbk, |
6594 | | void *pfnUserData, int bStopOnInterruptUntilUninstall) |
6595 | 0 | { |
6596 | 0 | return reinterpret_cast<cpl::VSICurlHandle *>(fp)->InstallReadCbk( |
6597 | 0 | pfnReadCbk, pfnUserData, bStopOnInterruptUntilUninstall); |
6598 | 0 | } |
6599 | | |
6600 | | /************************************************************************/ |
6601 | | /* VSICurlUninstallReadCbk() */ |
6602 | | /************************************************************************/ |
6603 | | |
6604 | | int VSICurlUninstallReadCbk(VSILFILE *fp) |
6605 | 0 | { |
6606 | 0 | return reinterpret_cast<cpl::VSICurlHandle *>(fp)->UninstallReadCbk(); |
6607 | 0 | } |
6608 | | |
6609 | | /************************************************************************/ |
6610 | | /* VSICurlSetOptions() */ |
6611 | | /************************************************************************/ |
6612 | | |
6613 | | struct curl_slist *VSICurlSetOptions(CURL *hCurlHandle, const char *pszURL, |
6614 | | const char *const *papszOptions) |
6615 | 264k | { |
6616 | 264k | struct curl_slist *headers = static_cast<struct curl_slist *>( |
6617 | 264k | CPLHTTPSetOptions(hCurlHandle, pszURL, papszOptions)); |
6618 | | |
6619 | 264k | long option = CURLFTPMETHOD_SINGLECWD; |
6620 | 264k | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_FTP_FILEMETHOD, option); |
6621 | | |
6622 | | // ftp://ftp2.cits.rncan.gc.ca/pub/cantopo/250k_tif/ |
6623 | | // doesn't like EPSV command, |
6624 | 264k | unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_FTP_USE_EPSV, 0); |
6625 | | |
6626 | 264k | return headers; |
6627 | 264k | } |
6628 | | |
6629 | | /************************************************************************/ |
6630 | | /* VSICurlSetContentTypeFromExt() */ |
6631 | | /************************************************************************/ |
6632 | | |
6633 | | struct curl_slist *VSICurlSetContentTypeFromExt(struct curl_slist *poList, |
6634 | | const char *pszPath) |
6635 | 0 | { |
6636 | 0 | struct curl_slist *iter = poList; |
6637 | 0 | while (iter != nullptr) |
6638 | 0 | { |
6639 | 0 | if (STARTS_WITH_CI(iter->data, "Content-Type")) |
6640 | 0 | { |
6641 | 0 | return poList; |
6642 | 0 | } |
6643 | 0 | iter = iter->next; |
6644 | 0 | } |
6645 | | |
6646 | 0 | static const struct |
6647 | 0 | { |
6648 | 0 | const char *ext; |
6649 | 0 | const char *mime; |
6650 | 0 | } aosExtMimePairs[] = { |
6651 | 0 | {"txt", "text/plain"}, {"json", "application/json"}, |
6652 | 0 | {"tif", "image/tiff"}, {"tiff", "image/tiff"}, |
6653 | 0 | {"jpg", "image/jpeg"}, {"jpeg", "image/jpeg"}, |
6654 | 0 | {"jp2", "image/jp2"}, {"jpx", "image/jp2"}, |
6655 | 0 | {"j2k", "image/jp2"}, {"jpc", "image/jp2"}, |
6656 | 0 | {"png", "image/png"}, |
6657 | 0 | }; |
6658 | |
|
6659 | 0 | const std::string osExt = CPLGetExtensionSafe(pszPath); |
6660 | 0 | if (!osExt.empty()) |
6661 | 0 | { |
6662 | 0 | for (const auto &pair : aosExtMimePairs) |
6663 | 0 | { |
6664 | 0 | if (EQUAL(osExt.c_str(), pair.ext)) |
6665 | 0 | { |
6666 | |
|
6667 | 0 | const std::string osContentType( |
6668 | 0 | CPLSPrintf("Content-Type: %s", pair.mime)); |
6669 | 0 | poList = curl_slist_append(poList, osContentType.c_str()); |
6670 | | #ifdef DEBUG_VERBOSE |
6671 | | CPLDebug("HTTP", "Setting %s, based on lookup table.", |
6672 | | osContentType.c_str()); |
6673 | | #endif |
6674 | 0 | break; |
6675 | 0 | } |
6676 | 0 | } |
6677 | 0 | } |
6678 | |
|
6679 | 0 | return poList; |
6680 | 0 | } |
6681 | | |
6682 | | /************************************************************************/ |
6683 | | /* VSICurlSetCreationHeadersFromOptions() */ |
6684 | | /************************************************************************/ |
6685 | | |
6686 | | struct curl_slist *VSICurlSetCreationHeadersFromOptions( |
6687 | | struct curl_slist *headers, CSLConstList papszOptions, const char *pszPath) |
6688 | 0 | { |
6689 | 0 | bool bContentTypeFound = false; |
6690 | 0 | for (CSLConstList papszIter = papszOptions; papszIter && *papszIter; |
6691 | 0 | ++papszIter) |
6692 | 0 | { |
6693 | 0 | char *pszKey = nullptr; |
6694 | 0 | const char *pszValue = CPLParseNameValue(*papszIter, &pszKey); |
6695 | 0 | if (pszKey && pszValue) |
6696 | 0 | { |
6697 | 0 | if (EQUAL(pszKey, "Content-Type")) |
6698 | 0 | { |
6699 | 0 | bContentTypeFound = true; |
6700 | 0 | } |
6701 | 0 | headers = curl_slist_append(headers, |
6702 | 0 | CPLSPrintf("%s: %s", pszKey, pszValue)); |
6703 | 0 | } |
6704 | 0 | CPLFree(pszKey); |
6705 | 0 | } |
6706 | | |
6707 | | // If Content-type not found in papszOptions, try to set it from the |
6708 | | // filename exstension. |
6709 | 0 | if (!bContentTypeFound) |
6710 | 0 | { |
6711 | 0 | headers = VSICurlSetContentTypeFromExt(headers, pszPath); |
6712 | 0 | } |
6713 | |
|
6714 | 0 | return headers; |
6715 | 0 | } |
6716 | | |
6717 | | #endif // DOXYGEN_SKIP |
6718 | | //! @endcond |
6719 | | |
6720 | | /************************************************************************/ |
6721 | | /* VSIInstallCurlFileHandler() */ |
6722 | | /************************************************************************/ |
6723 | | |
6724 | | /*! |
6725 | | \brief Install /vsicurl/ HTTP/FTP file system handler (requires libcurl) |
6726 | | |
6727 | | \verbatim embed:rst |
6728 | | See :ref:`/vsicurl/ documentation <vsicurl>` |
6729 | | \endverbatim |
6730 | | |
6731 | | */ |
6732 | | void VSIInstallCurlFileHandler(void) |
6733 | 83 | { |
6734 | 83 | auto poHandler = std::make_shared<cpl::VSICurlFilesystemHandler>(); |
6735 | 83 | for (const char *pszPrefix : VSICURL_PREFIXES) |
6736 | 166 | { |
6737 | 166 | VSIFileManager::InstallHandler(pszPrefix, poHandler); |
6738 | 166 | } |
6739 | 83 | } |
6740 | | |
6741 | | /************************************************************************/ |
6742 | | /* VSICurlClearCache() */ |
6743 | | /************************************************************************/ |
6744 | | |
6745 | | /** |
6746 | | * \brief Clean local cache associated with /vsicurl/ (and related file systems) |
6747 | | * |
6748 | | * /vsicurl (and related file systems like /vsis3/, /vsigs/, /vsiaz/, /vsioss/, |
6749 | | * /vsiswift/) cache a number of |
6750 | | * metadata and data for faster execution in read-only scenarios. But when the |
6751 | | * content on the server-side may change during the same process, those |
6752 | | * mechanisms can prevent opening new files, or give an outdated version of |
6753 | | * them. |
6754 | | * |
6755 | | */ |
6756 | | |
6757 | | void VSICurlClearCache(void) |
6758 | 0 | { |
6759 | | // FIXME ? Currently we have different filesystem instances for |
6760 | | // vsicurl/, /vsis3/, /vsigs/ . So each one has its own cache of regions. |
6761 | | // File properties cache are now shared |
6762 | 0 | char **papszPrefix = VSIFileManager::GetPrefixes(); |
6763 | 0 | for (size_t i = 0; papszPrefix && papszPrefix[i]; ++i) |
6764 | 0 | { |
6765 | 0 | auto poFSHandler = dynamic_cast<cpl::VSICurlFilesystemHandlerBase *>( |
6766 | 0 | VSIFileManager::GetHandler(papszPrefix[i])); |
6767 | |
|
6768 | 0 | if (poFSHandler) |
6769 | 0 | poFSHandler->ClearCache(); |
6770 | 0 | } |
6771 | 0 | CSLDestroy(papszPrefix); |
6772 | |
|
6773 | 0 | VSICurlStreamingClearCache(); |
6774 | 0 | } |
6775 | | |
6776 | | /************************************************************************/ |
6777 | | /* VSICurlPartialClearCache() */ |
6778 | | /************************************************************************/ |
6779 | | |
6780 | | /** |
6781 | | * \brief Clean local cache associated with /vsicurl/ (and related file systems) |
6782 | | * for a given filename (and its subfiles and subdirectories if it is a |
6783 | | * directory) |
6784 | | * |
6785 | | * /vsicurl (and related file systems like /vsis3/, /vsigs/, /vsiaz/, /vsioss/, |
6786 | | * /vsiswift/) cache a number of |
6787 | | * metadata and data for faster execution in read-only scenarios. But when the |
6788 | | * content on the server-side may change during the same process, those |
6789 | | * mechanisms can prevent opening new files, or give an outdated version of |
6790 | | * them. |
6791 | | * |
6792 | | * The filename prefix must start with the name of a known virtual file system |
6793 | | * (such as "/vsicurl/", "/vsis3/") |
6794 | | * |
6795 | | * VSICurlPartialClearCache("/vsis3/b") will clear all cached state for any file |
6796 | | * or directory starting with that prefix, so potentially "/vsis3/bucket", |
6797 | | * "/vsis3/basket/" or "/vsis3/basket/object". |
6798 | | * |
6799 | | * @param pszFilenamePrefix Filename prefix |
6800 | | */ |
6801 | | |
6802 | | void VSICurlPartialClearCache(const char *pszFilenamePrefix) |
6803 | 0 | { |
6804 | 0 | auto poFSHandler = dynamic_cast<cpl::VSICurlFilesystemHandlerBase *>( |
6805 | 0 | VSIFileManager::GetHandler(pszFilenamePrefix)); |
6806 | |
|
6807 | 0 | if (poFSHandler) |
6808 | 0 | poFSHandler->PartialClearCache(pszFilenamePrefix); |
6809 | 0 | } |
6810 | | |
6811 | | /************************************************************************/ |
6812 | | /* VSINetworkStatsReset() */ |
6813 | | /************************************************************************/ |
6814 | | |
6815 | | /** |
6816 | | * \brief Clear network related statistics. |
6817 | | * |
6818 | | * The effect of the CPL_VSIL_NETWORK_STATS_ENABLED configuration option |
6819 | | * will also be reset. That is, that the next network access will check its |
6820 | | * value again. |
6821 | | * |
6822 | | * @since GDAL 3.2.0 |
6823 | | */ |
6824 | | |
6825 | | void VSINetworkStatsReset(void) |
6826 | 0 | { |
6827 | 0 | cpl::NetworkStatisticsLogger::Reset(); |
6828 | 0 | } |
6829 | | |
6830 | | /************************************************************************/ |
6831 | | /* VSINetworkStatsGetAsSerializedJSON() */ |
6832 | | /************************************************************************/ |
6833 | | |
6834 | | /** |
6835 | | * \brief Return network related statistics, as a JSON serialized object. |
6836 | | * |
6837 | | * Statistics collecting should be enabled with the |
6838 | | CPL_VSIL_NETWORK_STATS_ENABLED |
6839 | | * configuration option set to YES before any network activity starts |
6840 | | * (for efficiency, reading it is cached on first access, until |
6841 | | VSINetworkStatsReset() is called) |
6842 | | * |
6843 | | * Statistics can also be emitted on standard output at process termination if |
6844 | | * the CPL_VSIL_SHOW_NETWORK_STATS configuration option is set to YES. |
6845 | | * |
6846 | | * Example of output: |
6847 | | * \code{.js} |
6848 | | * { |
6849 | | * "methods":{ |
6850 | | * "GET":{ |
6851 | | * "count":6, |
6852 | | * "downloaded_bytes":40825 |
6853 | | * }, |
6854 | | * "PUT":{ |
6855 | | * "count":1, |
6856 | | * "uploaded_bytes":35472 |
6857 | | * } |
6858 | | * }, |
6859 | | * "handlers":{ |
6860 | | * "vsigs":{ |
6861 | | * "methods":{ |
6862 | | * "GET":{ |
6863 | | * "count":2, |
6864 | | * "downloaded_bytes":446 |
6865 | | * }, |
6866 | | * "PUT":{ |
6867 | | * "count":1, |
6868 | | * "uploaded_bytes":35472 |
6869 | | * } |
6870 | | * }, |
6871 | | * "files":{ |
6872 | | * "\/vsigs\/spatialys\/byte.tif":{ |
6873 | | * "methods":{ |
6874 | | * "PUT":{ |
6875 | | * "count":1, |
6876 | | * "uploaded_bytes":35472 |
6877 | | * } |
6878 | | * }, |
6879 | | * "actions":{ |
6880 | | * "Write":{ |
6881 | | * "methods":{ |
6882 | | * "PUT":{ |
6883 | | * "count":1, |
6884 | | * "uploaded_bytes":35472 |
6885 | | * } |
6886 | | * } |
6887 | | * } |
6888 | | * } |
6889 | | * } |
6890 | | * }, |
6891 | | * "actions":{ |
6892 | | * "Stat":{ |
6893 | | * "methods":{ |
6894 | | * "GET":{ |
6895 | | * "count":2, |
6896 | | * "downloaded_bytes":446 |
6897 | | * } |
6898 | | * }, |
6899 | | * "files":{ |
6900 | | * "\/vsigs\/spatialys\/byte.tif\/":{ |
6901 | | * "methods":{ |
6902 | | * "GET":{ |
6903 | | * "count":1, |
6904 | | * "downloaded_bytes":181 |
6905 | | * } |
6906 | | * } |
6907 | | * } |
6908 | | * } |
6909 | | * } |
6910 | | * } |
6911 | | * }, |
6912 | | * "vsis3":{ |
6913 | | * [...] |
6914 | | * } |
6915 | | * } |
6916 | | * } |
6917 | | * \endcode |
6918 | | * |
6919 | | * @param papszOptions Unused. |
6920 | | * @return a JSON serialized string to free with VSIFree(), or nullptr |
6921 | | * @since GDAL 3.2.0 |
6922 | | */ |
6923 | | |
6924 | | char *VSINetworkStatsGetAsSerializedJSON(CPL_UNUSED char **papszOptions) |
6925 | 0 | { |
6926 | 0 | return CPLStrdup( |
6927 | 0 | cpl::NetworkStatisticsLogger::GetReportAsSerializedJSON().c_str()); |
6928 | 0 | } |
6929 | | |
6930 | | #endif /* HAVE_CURL */ |
6931 | | |
6932 | | #undef ENABLE_DEBUG |