Coverage Report

Created: 2026-04-01 06:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/gdal/port/cpl_vsil_curl.cpp
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Project:  CPL - Common Portability Library
4
 * Purpose:  Implement VSI large file api for HTTP/FTP files
5
 * Author:   Even Rouault, even.rouault at spatialys.com
6
 *
7
 ******************************************************************************
8
 * Copyright (c) 2010-2018, Even Rouault <even.rouault at spatialys.com>
9
 *
10
 * SPDX-License-Identifier: MIT
11
 ****************************************************************************/
12
13
#include "cpl_port.h"
14
#include "cpl_vsil_curl_priv.h"
15
#include "cpl_vsil_curl_class.h"
16
17
#include <algorithm>
18
#include <array>
19
#include <limits>
20
#include <map>
21
#include <memory>
22
#include <numeric>
23
#include <set>
24
#include <string_view>
25
26
#include "cpl_aws.h"
27
#include "cpl_json.h"
28
#include "cpl_json_header.h"
29
#include "cpl_minixml.h"
30
#include "cpl_multiproc.h"
31
#include "cpl_string.h"
32
#include "cpl_time.h"
33
#include "cpl_vsi.h"
34
#include "cpl_vsi_virtual.h"
35
#include "cpl_http.h"
36
#include "cpl_mem_cache.h"
37
38
#ifndef S_IRUSR
39
#define S_IRUSR 00400
40
#define S_IWUSR 00200
41
#define S_IXUSR 00100
42
#define S_IRGRP 00040
43
#define S_IWGRP 00020
44
#define S_IXGRP 00010
45
#define S_IROTH 00004
46
#define S_IWOTH 00002
47
#define S_IXOTH 00001
48
#endif
49
50
#ifndef HAVE_CURL
51
52
void VSIInstallCurlFileHandler(void)
53
0
{
54
    // Not supported.
55
0
}
56
57
void VSICurlClearCache(void)
58
0
{
59
    // Not supported.
60
0
}
61
62
void VSICurlPartialClearCache(const char *)
63
0
{
64
    // Not supported.
65
0
}
66
67
void VSICurlAuthParametersChanged()
68
454
{
69
    // Not supported.
70
454
}
71
72
void VSINetworkStatsReset(void)
73
0
{
74
    // Not supported
75
0
}
76
77
char *VSINetworkStatsGetAsSerializedJSON(char ** /* papszOptions */)
78
0
{
79
    // Not supported
80
0
    return nullptr;
81
0
}
82
83
/************************************************************************/
84
/*                       VSICurlInstallReadCbk()                        */
85
/************************************************************************/
86
87
int VSICurlInstallReadCbk(VSILFILE * /* fp */,
88
                          VSICurlReadCbkFunc /* pfnReadCbk */,
89
                          void * /* pfnUserData */,
90
                          int /* bStopOnInterruptUntilUninstall */)
91
0
{
92
0
    return FALSE;
93
0
}
94
95
/************************************************************************/
96
/*                      VSICurlUninstallReadCbk()                       */
97
/************************************************************************/
98
99
int VSICurlUninstallReadCbk(VSILFILE * /* fp */)
100
0
{
101
0
    return FALSE;
102
0
}
103
104
#else
105
106
//! @cond Doxygen_Suppress
107
#ifndef DOXYGEN_SKIP
108
109
#define ENABLE_DEBUG 1
110
#define ENABLE_DEBUG_VERBOSE 0
111
112
#define unchecked_curl_easy_setopt(handle, opt, param)                         \
113
    CPL_IGNORE_RET_VAL(curl_easy_setopt(handle, opt, param))
114
115
constexpr const char *const VSICURL_PREFIXES[] = {"/vsicurl/", "/vsicurl?"};
116
117
/***********************************************************รน************/
118
/*                    VSICurlAuthParametersChanged()                    */
119
/************************************************************************/
120
121
static unsigned int gnGenerationAuthParameters = 0;
122
123
void VSICurlAuthParametersChanged()
124
{
125
    gnGenerationAuthParameters++;
126
}
127
128
// Do not access those variables directly !
129
// Use VSICURLGetDownloadChunkSize() and GetMaxRegions()
130
static int N_MAX_REGIONS_DO_NOT_USE_DIRECTLY = 0;
131
static int DOWNLOAD_CHUNK_SIZE_DO_NOT_USE_DIRECTLY = 0;
132
133
/************************************************************************/
134
/*                   VSICURLReadGlobalEnvVariables()                    */
135
/************************************************************************/
136
137
static void VSICURLReadGlobalEnvVariables()
138
{
139
    struct Initializer
140
    {
141
        Initializer()
142
        {
143
            constexpr int DOWNLOAD_CHUNK_SIZE_DEFAULT = 16384;
144
            const char *pszChunkSize =
145
                CPLGetConfigOption("CPL_VSIL_CURL_CHUNK_SIZE", nullptr);
146
            GIntBig nChunkSize = DOWNLOAD_CHUNK_SIZE_DEFAULT;
147
148
            if (pszChunkSize)
149
            {
150
                if (CPLParseMemorySize(pszChunkSize, &nChunkSize, nullptr) !=
151
                    CE_None)
152
                {
153
                    CPLError(
154
                        CE_Warning, CPLE_AppDefined,
155
                        "Could not parse value for CPL_VSIL_CURL_CHUNK_SIZE. "
156
                        "Using default value of %d instead.",
157
                        DOWNLOAD_CHUNK_SIZE_DEFAULT);
158
                }
159
            }
160
161
            constexpr int MIN_CHUNK_SIZE = 1024;
162
            constexpr int MAX_CHUNK_SIZE = 10 * 1024 * 1024;
163
            if (nChunkSize < MIN_CHUNK_SIZE || nChunkSize > MAX_CHUNK_SIZE)
164
            {
165
                nChunkSize = DOWNLOAD_CHUNK_SIZE_DEFAULT;
166
                CPLError(CE_Warning, CPLE_AppDefined,
167
                         "Invalid value for CPL_VSIL_CURL_CHUNK_SIZE. "
168
                         "Allowed range is [%d, %d]. "
169
                         "Using CPL_VSIL_CURL_CHUNK_SIZE=%d instead",
170
                         MIN_CHUNK_SIZE, MAX_CHUNK_SIZE,
171
                         DOWNLOAD_CHUNK_SIZE_DEFAULT);
172
            }
173
            DOWNLOAD_CHUNK_SIZE_DO_NOT_USE_DIRECTLY =
174
                static_cast<int>(nChunkSize);
175
176
            constexpr int N_MAX_REGIONS_DEFAULT = 1000;
177
            constexpr int CACHE_SIZE_DEFAULT =
178
                N_MAX_REGIONS_DEFAULT * DOWNLOAD_CHUNK_SIZE_DEFAULT;
179
180
            const char *pszCacheSize =
181
                CPLGetConfigOption("CPL_VSIL_CURL_CACHE_SIZE", nullptr);
182
            GIntBig nCacheSize = CACHE_SIZE_DEFAULT;
183
184
            if (pszCacheSize)
185
            {
186
                if (CPLParseMemorySize(pszCacheSize, &nCacheSize, nullptr) !=
187
                    CE_None)
188
                {
189
                    CPLError(
190
                        CE_Warning, CPLE_AppDefined,
191
                        "Could not parse value for CPL_VSIL_CURL_CACHE_SIZE. "
192
                        "Using default value of " CPL_FRMT_GIB " instead.",
193
                        nCacheSize);
194
                }
195
            }
196
197
            const auto nMaxRAM = CPLGetUsablePhysicalRAM();
198
            const auto nMinVal = DOWNLOAD_CHUNK_SIZE_DO_NOT_USE_DIRECTLY;
199
            auto nMaxVal = static_cast<GIntBig>(INT_MAX) *
200
                           DOWNLOAD_CHUNK_SIZE_DO_NOT_USE_DIRECTLY;
201
            if (nMaxRAM > 0 && nMaxVal > nMaxRAM)
202
                nMaxVal = nMaxRAM;
203
            if (nCacheSize < nMinVal || nCacheSize > nMaxVal)
204
            {
205
                nCacheSize = nCacheSize < nMinVal ? nMinVal : nMaxVal;
206
                CPLError(CE_Warning, CPLE_AppDefined,
207
                         "Invalid value for CPL_VSIL_CURL_CACHE_SIZE. "
208
                         "Allowed range is [%d, " CPL_FRMT_GIB "]. "
209
                         "Using CPL_VSIL_CURL_CACHE_SIZE=" CPL_FRMT_GIB
210
                         " instead",
211
                         nMinVal, nMaxVal, nCacheSize);
212
            }
213
            N_MAX_REGIONS_DO_NOT_USE_DIRECTLY = std::max(
214
                1, static_cast<int>(nCacheSize /
215
                                    DOWNLOAD_CHUNK_SIZE_DO_NOT_USE_DIRECTLY));
216
        }
217
    };
218
219
    static Initializer initializer;
220
}
221
222
/************************************************************************/
223
/*                    VSICURLGetDownloadChunkSize()                     */
224
/************************************************************************/
225
226
int VSICURLGetDownloadChunkSize()
227
{
228
    VSICURLReadGlobalEnvVariables();
229
    return DOWNLOAD_CHUNK_SIZE_DO_NOT_USE_DIRECTLY;
230
}
231
232
/************************************************************************/
233
/*                           GetMaxRegions()                            */
234
/************************************************************************/
235
236
static int GetMaxRegions()
237
{
238
    VSICURLReadGlobalEnvVariables();
239
    return N_MAX_REGIONS_DO_NOT_USE_DIRECTLY;
240
}
241
242
/************************************************************************/
243
/*          VSICurlFindStringSensitiveExceptEscapeSequences()           */
244
/************************************************************************/
245
246
static int
247
VSICurlFindStringSensitiveExceptEscapeSequences(CSLConstList papszList,
248
                                                const char *pszTarget)
249
250
{
251
    if (papszList == nullptr)
252
        return -1;
253
254
    for (int i = 0; papszList[i] != nullptr; i++)
255
    {
256
        const char *pszIter1 = papszList[i];
257
        const char *pszIter2 = pszTarget;
258
        char ch1 = '\0';
259
        char ch2 = '\0';
260
        /* The comparison is case-sensitive, except for escaped */
261
        /* sequences where letters of the hexadecimal sequence */
262
        /* can be uppercase or lowercase depending on the quoting algorithm */
263
        while (true)
264
        {
265
            ch1 = *pszIter1;
266
            ch2 = *pszIter2;
267
            if (ch1 == '\0' || ch2 == '\0')
268
                break;
269
            if (ch1 == '%' && ch2 == '%' && pszIter1[1] != '\0' &&
270
                pszIter1[2] != '\0' && pszIter2[1] != '\0' &&
271
                pszIter2[2] != '\0')
272
            {
273
                if (!EQUALN(pszIter1 + 1, pszIter2 + 1, 2))
274
                    break;
275
                pszIter1 += 2;
276
                pszIter2 += 2;
277
            }
278
            if (ch1 != ch2)
279
                break;
280
            pszIter1++;
281
            pszIter2++;
282
        }
283
        if (ch1 == ch2 && ch1 == '\0')
284
            return i;
285
    }
286
287
    return -1;
288
}
289
290
/************************************************************************/
291
/*                        VSICurlIsFileInList()                         */
292
/************************************************************************/
293
294
static int VSICurlIsFileInList(CSLConstList papszList, const char *pszTarget)
295
{
296
    int nRet =
297
        VSICurlFindStringSensitiveExceptEscapeSequences(papszList, pszTarget);
298
    if (nRet >= 0)
299
        return nRet;
300
301
    // If we didn't find anything, try to URL-escape the target filename.
302
    char *pszEscaped = CPLEscapeString(pszTarget, -1, CPLES_URL);
303
    if (strcmp(pszTarget, pszEscaped) != 0)
304
    {
305
        nRet = VSICurlFindStringSensitiveExceptEscapeSequences(papszList,
306
                                                               pszEscaped);
307
    }
308
    CPLFree(pszEscaped);
309
    return nRet;
310
}
311
312
/************************************************************************/
313
/*                      StartsWithVSICurlPrefix()                       */
314
/************************************************************************/
315
316
static bool
317
StartsWithVSICurlPrefix(const char *pszFilename,
318
                        std::string *posFilenameAfterPrefix = nullptr)
319
{
320
    for (const char *pszPrefix : VSICURL_PREFIXES)
321
    {
322
        if (STARTS_WITH(pszFilename, pszPrefix))
323
        {
324
            if (posFilenameAfterPrefix)
325
                *posFilenameAfterPrefix = pszFilename + strlen(pszPrefix);
326
            return true;
327
        }
328
    }
329
    return false;
330
}
331
332
/************************************************************************/
333
/*                     VSICurlGetURLFromFilename()                      */
334
/************************************************************************/
335
336
static std::string VSICurlGetURLFromFilename(
337
    const char *pszFilename, CPLHTTPRetryParameters *poRetryParameters,
338
    bool *pbUseHead, bool *pbUseRedirectURLIfNoQueryStringParams,
339
    bool *pbListDir, bool *pbEmptyDir, CPLStringList *paosHTTPOptions,
340
    bool *pbPlanetaryComputerURLSigning, char **ppszPlanetaryComputerCollection)
341
{
342
    if (ppszPlanetaryComputerCollection)
343
        *ppszPlanetaryComputerCollection = nullptr;
344
345
    if (!StartsWithVSICurlPrefix(pszFilename))
346
        return pszFilename;
347
348
    if (pbPlanetaryComputerURLSigning)
349
    {
350
        // It may be more convenient sometimes to store Planetary Computer URL
351
        // signing as a per-path specific option rather than capturing it in
352
        // the filename with the &pc_url_signing=yes option.
353
        if (CPLTestBool(VSIGetPathSpecificOption(
354
                pszFilename, "VSICURL_PC_URL_SIGNING", "FALSE")))
355
        {
356
            *pbPlanetaryComputerURLSigning = true;
357
        }
358
    }
359
360
    pszFilename += strlen("/vsicurl/");
361
    if (!STARTS_WITH(pszFilename, "http://") &&
362
        !STARTS_WITH(pszFilename, "https://") &&
363
        !STARTS_WITH(pszFilename, "ftp://") &&
364
        !STARTS_WITH(pszFilename, "file://"))
365
    {
366
        if (*pszFilename == '?')
367
            pszFilename++;
368
        char **papszTokens = CSLTokenizeString2(pszFilename, "&", 0);
369
        for (int i = 0; papszTokens[i] != nullptr; i++)
370
        {
371
            char *pszUnescaped =
372
                CPLUnescapeString(papszTokens[i], nullptr, CPLES_URL);
373
            CPLFree(papszTokens[i]);
374
            papszTokens[i] = pszUnescaped;
375
        }
376
377
        std::string osURL;
378
        std::string osHeaders;
379
        for (int i = 0; papszTokens[i]; i++)
380
        {
381
            char *pszKey = nullptr;
382
            const char *pszValue = CPLParseNameValue(papszTokens[i], &pszKey);
383
            if (pszKey && pszValue)
384
            {
385
                if (EQUAL(pszKey, "max_retry"))
386
                {
387
                    if (poRetryParameters)
388
                        poRetryParameters->nMaxRetry = atoi(pszValue);
389
                }
390
                else if (EQUAL(pszKey, "retry_delay"))
391
                {
392
                    if (poRetryParameters)
393
                        poRetryParameters->dfInitialDelay = CPLAtof(pszValue);
394
                }
395
                else if (EQUAL(pszKey, "retry_codes"))
396
                {
397
                    if (poRetryParameters)
398
                        poRetryParameters->osRetryCodes = pszValue;
399
                }
400
                else if (EQUAL(pszKey, "use_head"))
401
                {
402
                    if (pbUseHead)
403
                        *pbUseHead = CPLTestBool(pszValue);
404
                }
405
                else if (EQUAL(pszKey,
406
                               "use_redirect_url_if_no_query_string_params"))
407
                {
408
                    /* Undocumented. Used by PLScenes driver */
409
                    if (pbUseRedirectURLIfNoQueryStringParams)
410
                        *pbUseRedirectURLIfNoQueryStringParams =
411
                            CPLTestBool(pszValue);
412
                }
413
                else if (EQUAL(pszKey, "list_dir"))
414
                {
415
                    if (pbListDir)
416
                        *pbListDir = CPLTestBool(pszValue);
417
                }
418
                else if (EQUAL(pszKey, "empty_dir"))
419
                {
420
                    if (pbEmptyDir)
421
                        *pbEmptyDir = CPLTestBool(pszValue);
422
                }
423
                else if (EQUAL(pszKey, "useragent") ||
424
                         EQUAL(pszKey, "referer") || EQUAL(pszKey, "cookie") ||
425
                         EQUAL(pszKey, "header_file") ||
426
                         EQUAL(pszKey, "unsafessl") ||
427
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
428
                         EQUAL(pszKey, "timeout") ||
429
                         EQUAL(pszKey, "connecttimeout") ||
430
#endif
431
                         EQUAL(pszKey, "low_speed_time") ||
432
                         EQUAL(pszKey, "low_speed_limit") ||
433
                         EQUAL(pszKey, "proxy") || EQUAL(pszKey, "proxyauth") ||
434
                         EQUAL(pszKey, "proxyuserpwd"))
435
                {
436
                    // Above names are the ones supported by
437
                    // CPLHTTPSetOptions()
438
                    if (paosHTTPOptions)
439
                    {
440
                        paosHTTPOptions->SetNameValue(pszKey, pszValue);
441
                    }
442
                }
443
                else if (EQUAL(pszKey, "url"))
444
                {
445
                    osURL = pszValue;
446
                }
447
                else if (EQUAL(pszKey, "pc_url_signing"))
448
                {
449
                    if (pbPlanetaryComputerURLSigning)
450
                        *pbPlanetaryComputerURLSigning = CPLTestBool(pszValue);
451
                }
452
                else if (EQUAL(pszKey, "pc_collection"))
453
                {
454
                    if (ppszPlanetaryComputerCollection)
455
                    {
456
                        CPLFree(*ppszPlanetaryComputerCollection);
457
                        *ppszPlanetaryComputerCollection = CPLStrdup(pszValue);
458
                    }
459
                }
460
                else if (STARTS_WITH(pszKey, "header."))
461
                {
462
                    osHeaders += (pszKey + strlen("header."));
463
                    osHeaders += ':';
464
                    osHeaders += pszValue;
465
                    osHeaders += "\r\n";
466
                }
467
                else
468
                {
469
                    CPLError(CE_Warning, CPLE_NotSupported,
470
                             "Unsupported option: %s", pszKey);
471
                }
472
            }
473
            CPLFree(pszKey);
474
        }
475
476
        if (paosHTTPOptions && !osHeaders.empty())
477
            paosHTTPOptions->SetNameValue("HEADERS", osHeaders.c_str());
478
479
        CSLDestroy(papszTokens);
480
        if (osURL.empty())
481
        {
482
            CPLError(CE_Failure, CPLE_IllegalArg, "Missing url parameter");
483
            return pszFilename;
484
        }
485
486
        return osURL;
487
    }
488
489
    return pszFilename;
490
}
491
492
namespace cpl
493
{
494
495
/************************************************************************/
496
/*                           VSICurlHandle()                            */
497
/************************************************************************/
498
499
VSICurlHandle::VSICurlHandle(VSICurlFilesystemHandlerBase *poFSIn,
500
                             const char *pszFilename, const char *pszURLIn)
501
    : poFS(poFSIn), m_osFilename(pszFilename),
502
      m_aosHTTPOptions(CPLHTTPGetOptionsFromEnv(pszFilename)),
503
      m_oRetryParameters(m_aosHTTPOptions),
504
      m_bUseHead(
505
          CPLTestBool(CPLGetConfigOption("CPL_VSIL_CURL_USE_HEAD", "YES")))
506
{
507
    if (pszURLIn)
508
    {
509
        m_pszURL = CPLStrdup(pszURLIn);
510
    }
511
    else
512
    {
513
        char *pszPCCollection = nullptr;
514
        m_pszURL =
515
            CPLStrdup(VSICurlGetURLFromFilename(
516
                          pszFilename, &m_oRetryParameters, &m_bUseHead,
517
                          &m_bUseRedirectURLIfNoQueryStringParams, nullptr,
518
                          nullptr, &m_aosHTTPOptions,
519
                          &m_bPlanetaryComputerURLSigning, &pszPCCollection)
520
                          .c_str());
521
        if (pszPCCollection)
522
            m_osPlanetaryComputerCollection = pszPCCollection;
523
        CPLFree(pszPCCollection);
524
    }
525
526
    m_bCached = poFSIn->AllowCachedDataFor(pszFilename);
527
    poFS->GetCachedFileProp(m_pszURL, oFileProp);
528
}
529
530
/************************************************************************/
531
/*                           ~VSICurlHandle()                           */
532
/************************************************************************/
533
534
VSICurlHandle::~VSICurlHandle()
535
{
536
    if (m_oThreadAdviseRead.joinable())
537
    {
538
        m_oThreadAdviseRead.join();
539
    }
540
    if (m_hCurlMultiHandleForAdviseRead)
541
    {
542
        curl_multi_cleanup(m_hCurlMultiHandleForAdviseRead);
543
    }
544
545
    if (!m_bCached)
546
    {
547
        poFS->InvalidateCachedData(m_pszURL);
548
        poFS->InvalidateDirContent(CPLGetDirnameSafe(m_osFilename.c_str()));
549
    }
550
    CPLFree(m_pszURL);
551
}
552
553
/************************************************************************/
554
/*                               SetURL()                               */
555
/************************************************************************/
556
557
void VSICurlHandle::SetURL(const char *pszURLIn)
558
{
559
    CPLFree(m_pszURL);
560
    m_pszURL = CPLStrdup(pszURLIn);
561
}
562
563
/************************************************************************/
564
/*                           InstallReadCbk()                           */
565
/************************************************************************/
566
567
int VSICurlHandle::InstallReadCbk(VSICurlReadCbkFunc pfnReadCbkIn,
568
                                  void *pfnUserDataIn,
569
                                  int bStopOnInterruptUntilUninstallIn)
570
{
571
    if (pfnReadCbk != nullptr)
572
        return FALSE;
573
574
    pfnReadCbk = pfnReadCbkIn;
575
    pReadCbkUserData = pfnUserDataIn;
576
    bStopOnInterruptUntilUninstall =
577
        CPL_TO_BOOL(bStopOnInterruptUntilUninstallIn);
578
    bInterrupted = false;
579
    return TRUE;
580
}
581
582
/************************************************************************/
583
/*                          UninstallReadCbk()                          */
584
/************************************************************************/
585
586
int VSICurlHandle::UninstallReadCbk()
587
{
588
    if (pfnReadCbk == nullptr)
589
        return FALSE;
590
591
    pfnReadCbk = nullptr;
592
    pReadCbkUserData = nullptr;
593
    bStopOnInterruptUntilUninstall = false;
594
    bInterrupted = false;
595
    return TRUE;
596
}
597
598
/************************************************************************/
599
/*                                Seek()                                */
600
/************************************************************************/
601
602
int VSICurlHandle::Seek(vsi_l_offset nOffset, int nWhence)
603
{
604
    if (nWhence == SEEK_SET)
605
    {
606
        curOffset = nOffset;
607
    }
608
    else if (nWhence == SEEK_CUR)
609
    {
610
        curOffset = curOffset + nOffset;
611
    }
612
    else
613
    {
614
        curOffset = GetFileSize(false) + nOffset;
615
    }
616
    bEOF = false;
617
    return 0;
618
}
619
620
}  // namespace cpl
621
622
/************************************************************************/
623
/*               VSICurlGetTimeStampFromRFC822DateTime()                */
624
/************************************************************************/
625
626
static GIntBig VSICurlGetTimeStampFromRFC822DateTime(const char *pszDT)
627
{
628
    // Sun, 03 Apr 2016 12:07:27 GMT
629
    if (strlen(pszDT) >= 5 && pszDT[3] == ',' && pszDT[4] == ' ')
630
        pszDT += 5;
631
    int nDay = 0;
632
    int nYear = 0;
633
    int nHour = 0;
634
    int nMinute = 0;
635
    int nSecond = 0;
636
    char szMonth[4] = {};
637
    szMonth[3] = 0;
638
    if (sscanf(pszDT, "%02d %03s %04d %02d:%02d:%02d GMT", &nDay, szMonth,
639
               &nYear, &nHour, &nMinute, &nSecond) == 6)
640
    {
641
        static const char *const aszMonthStr[] = {"Jan", "Feb", "Mar", "Apr",
642
                                                  "May", "Jun", "Jul", "Aug",
643
                                                  "Sep", "Oct", "Nov", "Dec"};
644
645
        int nMonthIdx0 = -1;
646
        for (int i = 0; i < 12; i++)
647
        {
648
            if (EQUAL(szMonth, aszMonthStr[i]))
649
            {
650
                nMonthIdx0 = i;
651
                break;
652
            }
653
        }
654
        if (nMonthIdx0 >= 0)
655
        {
656
            struct tm brokendowntime;
657
            brokendowntime.tm_year = nYear - 1900;
658
            brokendowntime.tm_mon = nMonthIdx0;
659
            brokendowntime.tm_mday = nDay;
660
            brokendowntime.tm_hour = nHour;
661
            brokendowntime.tm_min = nMinute;
662
            brokendowntime.tm_sec = nSecond;
663
            return CPLYMDHMSToUnixTime(&brokendowntime);
664
        }
665
    }
666
    return 0;
667
}
668
669
/************************************************************************/
670
/*                     VSICURLInitWriteFuncStruct()                     */
671
/************************************************************************/
672
673
void VSICURLInitWriteFuncStruct(cpl::WriteFuncStruct *psStruct, VSILFILE *fp,
674
                                VSICurlReadCbkFunc pfnReadCbk,
675
                                void *pReadCbkUserData)
676
{
677
    psStruct->pBuffer = nullptr;
678
    psStruct->nSize = 0;
679
    psStruct->bIsHTTP = false;
680
    psStruct->bMultiRange = false;
681
    psStruct->nStartOffset = 0;
682
    psStruct->nEndOffset = 0;
683
    psStruct->nHTTPCode = 0;
684
    psStruct->nFirstHTTPCode = 0;
685
    psStruct->nContentLength = 0;
686
    psStruct->bFoundContentRange = false;
687
    psStruct->bError = false;
688
    psStruct->bDetectRangeDownloadingError = true;
689
    psStruct->nTimestampDate = 0;
690
691
    psStruct->fp = fp;
692
    psStruct->pfnReadCbk = pfnReadCbk;
693
    psStruct->pReadCbkUserData = pReadCbkUserData;
694
    psStruct->bInterrupted = false;
695
}
696
697
/************************************************************************/
698
/*                       VSICurlHandleWriteFunc()                       */
699
/************************************************************************/
700
701
size_t VSICurlHandleWriteFunc(void *buffer, size_t count, size_t nmemb,
702
                              void *req)
703
{
704
    cpl::WriteFuncStruct *psStruct = static_cast<cpl::WriteFuncStruct *>(req);
705
    const size_t nSize = count * nmemb;
706
707
    if (psStruct->bInterrupted)
708
    {
709
        return 0;
710
    }
711
712
    char *pNewBuffer = static_cast<char *>(
713
        VSIRealloc(psStruct->pBuffer, psStruct->nSize + nSize + 1));
714
    if (pNewBuffer)
715
    {
716
        psStruct->pBuffer = pNewBuffer;
717
        memcpy(psStruct->pBuffer + psStruct->nSize, buffer, nSize);
718
        psStruct->pBuffer[psStruct->nSize + nSize] = '\0';
719
        if (psStruct->bIsHTTP)
720
        {
721
            char *pszLine = psStruct->pBuffer + psStruct->nSize;
722
            if (STARTS_WITH_CI(pszLine, "HTTP/"))
723
            {
724
                char *pszSpace = strchr(pszLine, ' ');
725
                if (pszSpace)
726
                {
727
                    const int nHTTPCode = atoi(pszSpace + 1);
728
                    if (psStruct->nFirstHTTPCode == 0)
729
                        psStruct->nFirstHTTPCode = nHTTPCode;
730
                    psStruct->nHTTPCode = nHTTPCode;
731
                }
732
            }
733
            else if (STARTS_WITH_CI(pszLine, "Content-Length: "))
734
            {
735
                psStruct->nContentLength = CPLScanUIntBig(
736
                    pszLine + 16, static_cast<int>(strlen(pszLine + 16)));
737
            }
738
            else if (STARTS_WITH_CI(pszLine, "Content-Range: "))
739
            {
740
                psStruct->bFoundContentRange = true;
741
            }
742
            else if (STARTS_WITH_CI(pszLine, "Date: "))
743
            {
744
                CPLString osDate = pszLine + strlen("Date: ");
745
                size_t nSizeLine = osDate.size();
746
                while (nSizeLine && (osDate[nSizeLine - 1] == '\r' ||
747
                                     osDate[nSizeLine - 1] == '\n'))
748
                {
749
                    osDate.resize(nSizeLine - 1);
750
                    nSizeLine--;
751
                }
752
                osDate.Trim();
753
754
                GIntBig nTimestampDate =
755
                    VSICurlGetTimeStampFromRFC822DateTime(osDate.c_str());
756
#if DEBUG_VERBOSE
757
                CPLDebug("VSICURL", "Timestamp = " CPL_FRMT_GIB,
758
                         nTimestampDate);
759
#endif
760
                psStruct->nTimestampDate = nTimestampDate;
761
            }
762
            /*if( nSize > 2 && pszLine[nSize - 2] == '\r' &&
763
                  pszLine[nSize - 1] == '\n' )
764
            {
765
                pszLine[nSize - 2] = 0;
766
                CPLDebug("VSICURL", "%s", pszLine);
767
                pszLine[nSize - 2] = '\r';
768
            }*/
769
770
            if (pszLine[0] == '\r' && pszLine[1] == '\n')
771
            {
772
                // Detect servers that don't support range downloading.
773
                if (psStruct->nHTTPCode == 200 &&
774
                    psStruct->bDetectRangeDownloadingError &&
775
                    !psStruct->bMultiRange && !psStruct->bFoundContentRange &&
776
                    (psStruct->nStartOffset != 0 ||
777
                     psStruct->nContentLength >
778
                         10 * (psStruct->nEndOffset - psStruct->nStartOffset +
779
                               1)))
780
                {
781
                    CPLError(CE_Failure, CPLE_AppDefined,
782
                             "Range downloading not supported by this "
783
                             "server!");
784
                    psStruct->bError = true;
785
                    return 0;
786
                }
787
            }
788
        }
789
        else
790
        {
791
            if (psStruct->pfnReadCbk)
792
            {
793
                if (!psStruct->pfnReadCbk(psStruct->fp, buffer, nSize,
794
                                          psStruct->pReadCbkUserData))
795
                {
796
                    psStruct->bInterrupted = true;
797
                    return 0;
798
                }
799
            }
800
        }
801
        psStruct->nSize += nSize;
802
        return nmemb;
803
    }
804
    else
805
    {
806
        return 0;
807
    }
808
}
809
810
/************************************************************************/
811
/*                      VSICurlIsS3LikeSignedURL()                      */
812
/************************************************************************/
813
814
static bool VSICurlIsS3LikeSignedURL(const char *pszURL)
815
{
816
    return ((strstr(pszURL, ".s3.amazonaws.com/") != nullptr ||
817
             strstr(pszURL, ".s3.amazonaws.com:") != nullptr ||
818
             strstr(pszURL, ".storage.googleapis.com/") != nullptr ||
819
             strstr(pszURL, ".storage.googleapis.com:") != nullptr ||
820
             strstr(pszURL, ".cloudfront.net/") != nullptr ||
821
             strstr(pszURL, ".cloudfront.net:") != nullptr) &&
822
            (strstr(pszURL, "&Signature=") != nullptr ||
823
             strstr(pszURL, "?Signature=") != nullptr)) ||
824
           strstr(pszURL, "&X-Amz-Signature=") != nullptr ||
825
           strstr(pszURL, "?X-Amz-Signature=") != nullptr;
826
}
827
828
/************************************************************************/
829
/*                VSICurlGetExpiresFromS3LikeSignedURL()                */
830
/************************************************************************/
831
832
static GIntBig VSICurlGetExpiresFromS3LikeSignedURL(const char *pszURL)
833
{
834
    const auto GetParamValue = [pszURL](const char *pszKey) -> const char *
835
    {
836
        for (const char *pszPrefix : {"&", "?"})
837
        {
838
            std::string osNeedle(pszPrefix);
839
            osNeedle += pszKey;
840
            osNeedle += '=';
841
            const char *pszStr = strstr(pszURL, osNeedle.c_str());
842
            if (pszStr)
843
                return pszStr + osNeedle.size();
844
        }
845
        return nullptr;
846
    };
847
848
    {
849
        // Expires= is a Unix timestamp
850
        const char *pszExpires = GetParamValue("Expires");
851
        if (pszExpires != nullptr)
852
            return CPLAtoGIntBig(pszExpires);
853
    }
854
855
    // X-Amz-Expires= is a delay, to be combined with X-Amz-Date=
856
    const char *pszAmzExpires = GetParamValue("X-Amz-Expires");
857
    if (pszAmzExpires == nullptr)
858
        return 0;
859
    const int nDelay = atoi(pszAmzExpires);
860
861
    const char *pszAmzDate = GetParamValue("X-Amz-Date");
862
    if (pszAmzDate == nullptr)
863
        return 0;
864
    // pszAmzDate should be YYYYMMDDTHHMMSSZ
865
    if (strlen(pszAmzDate) < strlen("YYYYMMDDTHHMMSSZ"))
866
        return 0;
867
    if (pszAmzDate[strlen("YYYYMMDDTHHMMSSZ") - 1] != 'Z')
868
        return 0;
869
    struct tm brokendowntime;
870
    brokendowntime.tm_year =
871
        atoi(std::string(pszAmzDate).substr(0, 4).c_str()) - 1900;
872
    brokendowntime.tm_mon =
873
        atoi(std::string(pszAmzDate).substr(4, 2).c_str()) - 1;
874
    brokendowntime.tm_mday = atoi(std::string(pszAmzDate).substr(6, 2).c_str());
875
    brokendowntime.tm_hour = atoi(std::string(pszAmzDate).substr(9, 2).c_str());
876
    brokendowntime.tm_min = atoi(std::string(pszAmzDate).substr(11, 2).c_str());
877
    brokendowntime.tm_sec = atoi(std::string(pszAmzDate).substr(13, 2).c_str());
878
    return CPLYMDHMSToUnixTime(&brokendowntime) + nDelay;
879
}
880
881
/************************************************************************/
882
/*                        VSICURLMultiPerform()                         */
883
/************************************************************************/
884
885
void VSICURLMultiPerform(CURLM *hCurlMultiHandle, CURL *hEasyHandle,
886
                         std::atomic<bool> *pbInterrupt)
887
{
888
    int repeats = 0;
889
890
    if (hEasyHandle)
891
        curl_multi_add_handle(hCurlMultiHandle, hEasyHandle);
892
893
    void *old_handler = CPLHTTPIgnoreSigPipe();
894
    while (true)
895
    {
896
        int still_running;
897
        while (curl_multi_perform(hCurlMultiHandle, &still_running) ==
898
               CURLM_CALL_MULTI_PERFORM)
899
        {
900
            // loop
901
        }
902
        if (!still_running)
903
        {
904
            break;
905
        }
906
907
#ifdef undef
908
        CURLMsg *msg;
909
        do
910
        {
911
            int msgq = 0;
912
            msg = curl_multi_info_read(hCurlMultiHandle, &msgq);
913
            if (msg && (msg->msg == CURLMSG_DONE))
914
            {
915
                CURL *e = msg->easy_handle;
916
            }
917
        } while (msg);
918
#endif
919
920
        CPLMultiPerformWait(hCurlMultiHandle, repeats);
921
922
        if (pbInterrupt && *pbInterrupt)
923
            break;
924
    }
925
    CPLHTTPRestoreSigPipeHandler(old_handler);
926
927
    if (hEasyHandle)
928
        curl_multi_remove_handle(hCurlMultiHandle, hEasyHandle);
929
}
930
931
/************************************************************************/
932
/*                       VSICurlDummyWriteFunc()                        */
933
/************************************************************************/
934
935
static size_t VSICurlDummyWriteFunc(void *, size_t, size_t, void *)
936
{
937
    return 0;
938
}
939
940
/************************************************************************/
941
/*                VSICURLResetHeaderAndWriterFunctions()                */
942
/************************************************************************/
943
944
void VSICURLResetHeaderAndWriterFunctions(CURL *hCurlHandle)
945
{
946
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION,
947
                               VSICurlDummyWriteFunc);
948
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION,
949
                               VSICurlDummyWriteFunc);
950
}
951
952
/************************************************************************/
953
/*                         Iso8601ToUnixTime()                          */
954
/************************************************************************/
955
956
static bool Iso8601ToUnixTime(const char *pszDT, GIntBig *pnUnixTime)
957
{
958
    int nYear;
959
    int nMonth;
960
    int nDay;
961
    int nHour;
962
    int nMinute;
963
    int nSecond;
964
    if (sscanf(pszDT, "%04d-%02d-%02dT%02d:%02d:%02d", &nYear, &nMonth, &nDay,
965
               &nHour, &nMinute, &nSecond) == 6)
966
    {
967
        struct tm brokendowntime;
968
        brokendowntime.tm_year = nYear - 1900;
969
        brokendowntime.tm_mon = nMonth - 1;
970
        brokendowntime.tm_mday = nDay;
971
        brokendowntime.tm_hour = nHour;
972
        brokendowntime.tm_min = nMinute;
973
        brokendowntime.tm_sec = nSecond;
974
        *pnUnixTime = CPLYMDHMSToUnixTime(&brokendowntime);
975
        return true;
976
    }
977
    return false;
978
}
979
980
namespace cpl
981
{
982
983
/************************************************************************/
984
/*                   ManagePlanetaryComputerSigning()                   */
985
/************************************************************************/
986
987
void VSICurlHandle::ManagePlanetaryComputerSigning() const
988
{
989
    // Take global lock
990
    static std::mutex goMutex;
991
    std::lock_guard<std::mutex> oLock(goMutex);
992
993
    struct PCSigningInfo
994
    {
995
        std::string osQueryString{};
996
        GIntBig nExpireTimestamp = 0;
997
    };
998
999
    PCSigningInfo sSigningInfo;
1000
    constexpr int knExpirationDelayMargin = 60;
1001
1002
    if (!m_osPlanetaryComputerCollection.empty())
1003
    {
1004
        // key is the name of a collection
1005
        static lru11::Cache<std::string, PCSigningInfo> goCacheCollection{1024};
1006
1007
        if (goCacheCollection.tryGet(m_osPlanetaryComputerCollection,
1008
                                     sSigningInfo) &&
1009
            time(nullptr) + knExpirationDelayMargin <=
1010
                sSigningInfo.nExpireTimestamp)
1011
        {
1012
            m_osQueryString = sSigningInfo.osQueryString;
1013
        }
1014
        else
1015
        {
1016
            const auto psResult =
1017
                CPLHTTPFetch((std::string(CPLGetConfigOption(
1018
                                  "VSICURL_PC_SAS_TOKEN_URL",
1019
                                  "https://planetarycomputer.microsoft.com/api/"
1020
                                  "sas/v1/token/")) +
1021
                              m_osPlanetaryComputerCollection)
1022
                                 .c_str(),
1023
                             nullptr);
1024
            if (psResult)
1025
            {
1026
                const auto aosKeyVals = CPLParseKeyValueJson(
1027
                    reinterpret_cast<const char *>(psResult->pabyData));
1028
                const char *pszToken = aosKeyVals.FetchNameValue("token");
1029
                if (pszToken)
1030
                {
1031
                    m_osQueryString = '?';
1032
                    m_osQueryString += pszToken;
1033
1034
                    sSigningInfo.osQueryString = m_osQueryString;
1035
                    sSigningInfo.nExpireTimestamp = 0;
1036
                    const char *pszExpiry =
1037
                        aosKeyVals.FetchNameValue("msft:expiry");
1038
                    if (pszExpiry)
1039
                    {
1040
                        Iso8601ToUnixTime(pszExpiry,
1041
                                          &sSigningInfo.nExpireTimestamp);
1042
                    }
1043
                    goCacheCollection.insert(m_osPlanetaryComputerCollection,
1044
                                             sSigningInfo);
1045
1046
                    CPLDebug("VSICURL", "Got token from Planetary Computer: %s",
1047
                             m_osQueryString.c_str());
1048
                }
1049
                CPLHTTPDestroyResult(psResult);
1050
            }
1051
        }
1052
    }
1053
    else
1054
    {
1055
        // key is a URL
1056
        static lru11::Cache<std::string, PCSigningInfo> goCacheURL{1024};
1057
1058
        if (goCacheURL.tryGet(m_pszURL, sSigningInfo) &&
1059
            time(nullptr) + knExpirationDelayMargin <=
1060
                sSigningInfo.nExpireTimestamp)
1061
        {
1062
            m_osQueryString = sSigningInfo.osQueryString;
1063
        }
1064
        else
1065
        {
1066
            const auto psResult =
1067
                CPLHTTPFetch((std::string(CPLGetConfigOption(
1068
                                  "VSICURL_PC_SAS_SIGN_HREF_URL",
1069
                                  "https://planetarycomputer.microsoft.com/api/"
1070
                                  "sas/v1/sign?href=")) +
1071
                              m_pszURL)
1072
                                 .c_str(),
1073
                             nullptr);
1074
            if (psResult)
1075
            {
1076
                const auto aosKeyVals = CPLParseKeyValueJson(
1077
                    reinterpret_cast<const char *>(psResult->pabyData));
1078
                const char *pszHref = aosKeyVals.FetchNameValue("href");
1079
                if (pszHref && STARTS_WITH(pszHref, m_pszURL))
1080
                {
1081
                    m_osQueryString = pszHref + strlen(m_pszURL);
1082
1083
                    sSigningInfo.osQueryString = m_osQueryString;
1084
                    sSigningInfo.nExpireTimestamp = 0;
1085
                    const char *pszExpiry =
1086
                        aosKeyVals.FetchNameValue("msft:expiry");
1087
                    if (pszExpiry)
1088
                    {
1089
                        Iso8601ToUnixTime(pszExpiry,
1090
                                          &sSigningInfo.nExpireTimestamp);
1091
                    }
1092
                    goCacheURL.insert(m_pszURL, sSigningInfo);
1093
1094
                    CPLDebug("VSICURL",
1095
                             "Got signature from Planetary Computer: %s",
1096
                             m_osQueryString.c_str());
1097
                }
1098
                CPLHTTPDestroyResult(psResult);
1099
            }
1100
        }
1101
    }
1102
}
1103
1104
/************************************************************************/
1105
/*                         UpdateQueryString()                          */
1106
/************************************************************************/
1107
1108
void VSICurlHandle::UpdateQueryString() const
1109
{
1110
    if (m_bPlanetaryComputerURLSigning)
1111
    {
1112
        ManagePlanetaryComputerSigning();
1113
    }
1114
    else
1115
    {
1116
        const char *pszQueryString = VSIGetPathSpecificOption(
1117
            m_osFilename.c_str(), "VSICURL_QUERY_STRING", nullptr);
1118
        if (pszQueryString)
1119
        {
1120
            if (m_osFilename.back() == '?')
1121
            {
1122
                if (pszQueryString[0] == '?')
1123
                    m_osQueryString = pszQueryString + 1;
1124
                else
1125
                    m_osQueryString = pszQueryString;
1126
            }
1127
            else
1128
            {
1129
                if (pszQueryString[0] == '?')
1130
                    m_osQueryString = pszQueryString;
1131
                else
1132
                {
1133
                    m_osQueryString = "?";
1134
                    m_osQueryString.append(pszQueryString);
1135
                }
1136
            }
1137
        }
1138
    }
1139
}
1140
1141
/************************************************************************/
1142
/*                        GetFileSizeOrHeaders()                        */
1143
/************************************************************************/
1144
1145
vsi_l_offset VSICurlHandle::GetFileSizeOrHeaders(bool bSetError,
1146
                                                 bool bGetHeaders)
1147
{
1148
    if (oFileProp.bHasComputedFileSize && !bGetHeaders)
1149
        return oFileProp.fileSize;
1150
1151
    NetworkStatisticsFileSystem oContextFS(poFS->GetFSPrefix().c_str());
1152
    NetworkStatisticsFile oContextFile(m_osFilename.c_str());
1153
    NetworkStatisticsAction oContextAction("GetFileSize");
1154
1155
    oFileProp.bHasComputedFileSize = true;
1156
1157
    CURLM *hCurlMultiHandle = poFS->GetCurlMultiHandleFor(m_pszURL);
1158
1159
    UpdateQueryString();
1160
1161
    std::string osURL(m_pszURL + m_osQueryString);
1162
    int nTryCount = 0;
1163
    bool bRetryWithGet = false;
1164
    bool bS3LikeRedirect = false;
1165
    CPLHTTPRetryContext oRetryContext(m_oRetryParameters);
1166
1167
retry:
1168
    ++nTryCount;
1169
    CURL *hCurlHandle = curl_easy_init();
1170
1171
    struct curl_slist *headers = nullptr;
1172
    if (bS3LikeRedirect)
1173
    {
1174
        // Do not propagate authentication sent to the original URL to a S3-like
1175
        // redirect.
1176
        CPLStringList aosHTTPOptions{};
1177
        for (const auto &pszOption : m_aosHTTPOptions)
1178
        {
1179
            if (STARTS_WITH_CI(pszOption, "HTTPAUTH") ||
1180
                STARTS_WITH_CI(pszOption, "HTTP_BEARER"))
1181
                continue;
1182
            aosHTTPOptions.AddString(pszOption);
1183
        }
1184
        headers = VSICurlSetOptions(hCurlHandle, osURL.c_str(),
1185
                                    aosHTTPOptions.List());
1186
    }
1187
    else
1188
    {
1189
        headers = VSICurlSetOptions(hCurlHandle, osURL.c_str(),
1190
                                    m_aosHTTPOptions.List());
1191
    }
1192
1193
    WriteFuncStruct sWriteFuncHeaderData;
1194
    VSICURLInitWriteFuncStruct(&sWriteFuncHeaderData, nullptr, nullptr,
1195
                               nullptr);
1196
    sWriteFuncHeaderData.bDetectRangeDownloadingError = false;
1197
    sWriteFuncHeaderData.bIsHTTP = STARTS_WITH(osURL.c_str(), "http");
1198
1199
    WriteFuncStruct sWriteFuncData;
1200
    VSICURLInitWriteFuncStruct(&sWriteFuncData, nullptr, nullptr, nullptr);
1201
1202
    std::string osVerb;
1203
    std::string osRange;  // leave in this scope !
1204
    int nRoundedBufSize = 0;
1205
    const int knDOWNLOAD_CHUNK_SIZE = VSICURLGetDownloadChunkSize();
1206
    if (UseLimitRangeGetInsteadOfHead())
1207
    {
1208
        osVerb = "GET";
1209
        const int nBufSize = std::max(
1210
            1024, std::min(10 * 1024 * 1024,
1211
                           atoi(CPLGetConfigOption(
1212
                               "GDAL_INGESTED_BYTES_AT_OPEN", "1024"))));
1213
        nRoundedBufSize = cpl::div_round_up(nBufSize, knDOWNLOAD_CHUNK_SIZE) *
1214
                          knDOWNLOAD_CHUNK_SIZE;
1215
1216
        // so it gets included in Azure signature
1217
        osRange = CPLSPrintf("Range: bytes=0-%d", nRoundedBufSize - 1);
1218
        headers = curl_slist_append(headers, osRange.c_str());
1219
    }
1220
    // HACK for mbtiles driver: http://a.tiles.mapbox.com/v3/ doesn't accept
1221
    // HEAD, as it is a redirect to AWS S3 signed URL, but those are only valid
1222
    // for a given type of HTTP request, and thus GET. This is valid for any
1223
    // signed URL for AWS S3.
1224
    else if (bRetryWithGet ||
1225
             strstr(osURL.c_str(), ".tiles.mapbox.com/") != nullptr ||
1226
             VSICurlIsS3LikeSignedURL(osURL.c_str()) || !m_bUseHead)
1227
    {
1228
        sWriteFuncData.bInterrupted = true;
1229
        osVerb = "GET";
1230
    }
1231
    else
1232
    {
1233
        unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_NOBODY, 1);
1234
        unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HTTPGET, 0);
1235
        unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADER, 1);
1236
        osVerb = "HEAD";
1237
    }
1238
1239
    if (!AllowAutomaticRedirection())
1240
        unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_FOLLOWLOCATION, 0);
1241
1242
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA,
1243
                               &sWriteFuncHeaderData);
1244
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION,
1245
                               VSICurlHandleWriteFunc);
1246
1247
    // Bug with older curl versions (<=7.16.4) and FTP.
1248
    // See http://curl.haxx.se/mail/lib-2007-08/0312.html
1249
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData);
1250
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION,
1251
                               VSICurlHandleWriteFunc);
1252
1253
    char szCurlErrBuf[CURL_ERROR_SIZE + 1] = {};
1254
    szCurlErrBuf[0] = '\0';
1255
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, szCurlErrBuf);
1256
1257
    headers = GetCurlHeaders(osVerb, headers);
1258
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HTTPHEADER, headers);
1259
1260
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_FILETIME, 1);
1261
1262
    VSICURLMultiPerform(hCurlMultiHandle, hCurlHandle, &m_bInterrupt);
1263
1264
    VSICURLResetHeaderAndWriterFunctions(hCurlHandle);
1265
1266
    curl_slist_free_all(headers);
1267
1268
    oFileProp.eExists = EXIST_UNKNOWN;
1269
1270
    long mtime = 0;
1271
    curl_easy_getinfo(hCurlHandle, CURLINFO_FILETIME, &mtime);
1272
1273
    if (osVerb == "GET")
1274
        NetworkStatisticsLogger::LogGET(sWriteFuncData.nSize);
1275
    else
1276
        NetworkStatisticsLogger::LogHEAD();
1277
1278
    if (STARTS_WITH(osURL.c_str(), "ftp"))
1279
    {
1280
        if (sWriteFuncData.pBuffer != nullptr)
1281
        {
1282
            const char *pszContentLength =
1283
                strstr(const_cast<const char *>(sWriteFuncData.pBuffer),
1284
                       "Content-Length: ");
1285
            if (pszContentLength)
1286
            {
1287
                pszContentLength += strlen("Content-Length: ");
1288
                oFileProp.eExists = EXIST_YES;
1289
                oFileProp.fileSize =
1290
                    CPLScanUIntBig(pszContentLength,
1291
                                   static_cast<int>(strlen(pszContentLength)));
1292
                if constexpr (ENABLE_DEBUG)
1293
                {
1294
                    CPLDebug(poFS->GetDebugKey(),
1295
                             "GetFileSize(%s)=" CPL_FRMT_GUIB, osURL.c_str(),
1296
                             oFileProp.fileSize);
1297
                }
1298
            }
1299
        }
1300
    }
1301
1302
    double dfSize = 0;
1303
    long response_code = -1;
1304
    if (oFileProp.eExists != EXIST_YES)
1305
    {
1306
        curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE, &response_code);
1307
1308
        bool bAlreadyLogged = false;
1309
        if (response_code >= 400 && szCurlErrBuf[0] == '\0')
1310
        {
1311
            const bool bLogResponse =
1312
                CPLTestBool(CPLGetConfigOption("CPL_CURL_VERBOSE", "NO"));
1313
            if (bLogResponse && sWriteFuncData.pBuffer)
1314
            {
1315
                const char *pszErrorMsg =
1316
                    static_cast<const char *>(sWriteFuncData.pBuffer);
1317
                bAlreadyLogged = true;
1318
                CPLDebug(
1319
                    poFS->GetDebugKey(),
1320
                    "GetFileSize(%s): response_code=%d, server error msg=%s",
1321
                    osURL.c_str(), static_cast<int>(response_code),
1322
                    pszErrorMsg[0] ? pszErrorMsg : "(no message provided)");
1323
            }
1324
        }
1325
        else if (szCurlErrBuf[0] != '\0')
1326
        {
1327
            bAlreadyLogged = true;
1328
            CPLDebug(poFS->GetDebugKey(),
1329
                     "GetFileSize(%s): response_code=%d, curl error msg=%s",
1330
                     osURL.c_str(), static_cast<int>(response_code),
1331
                     szCurlErrBuf);
1332
        }
1333
1334
        std::string osEffectiveURL;
1335
        {
1336
            char *pszEffectiveURL = nullptr;
1337
            curl_easy_getinfo(hCurlHandle, CURLINFO_EFFECTIVE_URL,
1338
                              &pszEffectiveURL);
1339
            if (pszEffectiveURL)
1340
                osEffectiveURL = pszEffectiveURL;
1341
        }
1342
1343
        if (!osEffectiveURL.empty() &&
1344
            strstr(osEffectiveURL.c_str(), osURL.c_str()) == nullptr)
1345
        {
1346
            // Moved permanently ?
1347
            if (sWriteFuncHeaderData.nFirstHTTPCode == 301 ||
1348
                (m_bUseRedirectURLIfNoQueryStringParams &&
1349
                 osEffectiveURL.find('?') == std::string::npos))
1350
            {
1351
                CPLDebug(poFS->GetDebugKey(),
1352
                         "Using effective URL %s permanently",
1353
                         osEffectiveURL.c_str());
1354
                oFileProp.osRedirectURL = osEffectiveURL;
1355
                poFS->SetCachedFileProp(m_pszURL, oFileProp);
1356
            }
1357
            else
1358
            {
1359
                CPLDebug(poFS->GetDebugKey(),
1360
                         "Using effective URL %s temporarily",
1361
                         osEffectiveURL.c_str());
1362
            }
1363
1364
            // Is this is a redirect to a S3 URL?
1365
            if (VSICurlIsS3LikeSignedURL(osEffectiveURL.c_str()) &&
1366
                !VSICurlIsS3LikeSignedURL(osURL.c_str()))
1367
            {
1368
                // Note that this is a redirect as we won't notice after the
1369
                // retry.
1370
                bS3LikeRedirect = true;
1371
1372
                if (!bRetryWithGet && osVerb == "HEAD" && response_code == 403)
1373
                {
1374
                    CPLDebug(poFS->GetDebugKey(),
1375
                             "Redirected to a AWS S3 signed URL. Retrying "
1376
                             "with GET request instead of HEAD since the URL "
1377
                             "might be valid only for GET");
1378
                    bRetryWithGet = true;
1379
                    osURL = std::move(osEffectiveURL);
1380
                    CPLFree(sWriteFuncData.pBuffer);
1381
                    CPLFree(sWriteFuncHeaderData.pBuffer);
1382
                    curl_easy_cleanup(hCurlHandle);
1383
                    goto retry;
1384
                }
1385
            }
1386
            else if (oFileProp.osRedirectURL.empty() && nTryCount == 1 &&
1387
                     ((response_code >= 300 && response_code < 400) ||
1388
                      (osVerb == "HEAD" && response_code == 403)))
1389
            {
1390
                if (response_code == 403)
1391
                {
1392
                    CPLDebug(
1393
                        poFS->GetDebugKey(),
1394
                        "Retrying redirected URL with GET instead of HEAD");
1395
                    bRetryWithGet = true;
1396
                }
1397
                osURL = std::move(osEffectiveURL);
1398
                CPLFree(sWriteFuncData.pBuffer);
1399
                CPLFree(sWriteFuncHeaderData.pBuffer);
1400
                curl_easy_cleanup(hCurlHandle);
1401
                goto retry;
1402
            }
1403
        }
1404
1405
        if (bS3LikeRedirect && response_code >= 200 && response_code < 300 &&
1406
            sWriteFuncHeaderData.nTimestampDate > 0 &&
1407
            !osEffectiveURL.empty() &&
1408
            CPLTestBool(
1409
                CPLGetConfigOption("CPL_VSIL_CURL_USE_S3_REDIRECT", "TRUE")))
1410
        {
1411
            const GIntBig nExpireTimestamp =
1412
                VSICurlGetExpiresFromS3LikeSignedURL(osEffectiveURL.c_str());
1413
            if (nExpireTimestamp > sWriteFuncHeaderData.nTimestampDate + 10)
1414
            {
1415
                const int nValidity = static_cast<int>(
1416
                    nExpireTimestamp - sWriteFuncHeaderData.nTimestampDate);
1417
                CPLDebug(poFS->GetDebugKey(),
1418
                         "Will use redirect URL for the next %d seconds",
1419
                         nValidity);
1420
                // As our local clock might not be in sync with server clock,
1421
                // figure out the expiration timestamp in local time
1422
                oFileProp.bS3LikeRedirect = true;
1423
                oFileProp.nExpireTimestampLocal = time(nullptr) + nValidity;
1424
                oFileProp.osRedirectURL = osEffectiveURL;
1425
                poFS->SetCachedFileProp(m_pszURL, oFileProp);
1426
            }
1427
        }
1428
1429
        if (response_code < 400)
1430
        {
1431
            curl_off_t nSizeTmp = 0;
1432
            const CURLcode code = curl_easy_getinfo(
1433
                hCurlHandle, CURLINFO_CONTENT_LENGTH_DOWNLOAD_T, &nSizeTmp);
1434
            CPL_IGNORE_RET_VAL(dfSize);
1435
            dfSize = static_cast<double>(nSizeTmp);
1436
            if (code == 0)
1437
            {
1438
                oFileProp.eExists = EXIST_YES;
1439
                if (dfSize < 0)
1440
                {
1441
                    if (osVerb == "HEAD" && !bRetryWithGet &&
1442
                        response_code == 200)
1443
                    {
1444
                        CPLDebug(poFS->GetDebugKey(),
1445
                                 "HEAD did not provide file size. Retrying "
1446
                                 "with GET");
1447
                        bRetryWithGet = true;
1448
                        CPLFree(sWriteFuncData.pBuffer);
1449
                        CPLFree(sWriteFuncHeaderData.pBuffer);
1450
                        curl_easy_cleanup(hCurlHandle);
1451
                        goto retry;
1452
                    }
1453
                    oFileProp.fileSize = 0;
1454
                }
1455
                else
1456
                    oFileProp.fileSize = static_cast<GUIntBig>(dfSize);
1457
            }
1458
        }
1459
1460
        if (sWriteFuncHeaderData.pBuffer != nullptr &&
1461
            (response_code == 200 || response_code == 206))
1462
        {
1463
            {
1464
                char **papszHeaders =
1465
                    CSLTokenizeString2(sWriteFuncHeaderData.pBuffer, "\r\n", 0);
1466
                for (int i = 0; papszHeaders[i]; ++i)
1467
                {
1468
                    char *pszKey = nullptr;
1469
                    const char *pszValue =
1470
                        CPLParseNameValue(papszHeaders[i], &pszKey);
1471
                    if (pszKey && pszValue)
1472
                    {
1473
                        if (bGetHeaders)
1474
                        {
1475
                            m_aosHeaders.SetNameValue(pszKey, pszValue);
1476
                        }
1477
                        if (EQUAL(pszKey, "Cache-Control") &&
1478
                            EQUAL(pszValue, "no-cache") &&
1479
                            CPLTestBool(CPLGetConfigOption(
1480
                                "CPL_VSIL_CURL_HONOR_CACHE_CONTROL", "YES")))
1481
                        {
1482
                            m_bCached = false;
1483
                        }
1484
1485
                        else if (EQUAL(pszKey, "ETag"))
1486
                        {
1487
                            std::string osValue(pszValue);
1488
                            if (osValue.size() >= 2 && osValue.front() == '"' &&
1489
                                osValue.back() == '"')
1490
                                osValue = osValue.substr(1, osValue.size() - 2);
1491
                            oFileProp.ETag = std::move(osValue);
1492
                        }
1493
1494
                        // Azure Data Lake Storage
1495
                        else if (EQUAL(pszKey, "x-ms-resource-type"))
1496
                        {
1497
                            if (EQUAL(pszValue, "file"))
1498
                            {
1499
                                oFileProp.nMode |= S_IFREG;
1500
                            }
1501
                            else if (EQUAL(pszValue, "directory"))
1502
                            {
1503
                                oFileProp.bIsDirectory = true;
1504
                                oFileProp.nMode |= S_IFDIR;
1505
                            }
1506
                        }
1507
                        else if (EQUAL(pszKey, "x-ms-permissions"))
1508
                        {
1509
                            oFileProp.nMode |=
1510
                                VSICurlParseUnixPermissions(pszValue);
1511
                        }
1512
1513
                        // https://overturemapswestus2.blob.core.windows.net/release/2024-11-13.0/theme%3Ddivisions/type%3Ddivision_area
1514
                        // returns a x-ms-meta-hdi_isfolder: true header
1515
                        else if (EQUAL(pszKey, "x-ms-meta-hdi_isfolder") &&
1516
                                 EQUAL(pszValue, "true"))
1517
                        {
1518
                            oFileProp.bIsAzureFolder = true;
1519
                            oFileProp.bIsDirectory = true;
1520
                            oFileProp.nMode |= S_IFDIR;
1521
                        }
1522
                    }
1523
                    CPLFree(pszKey);
1524
                }
1525
                CSLDestroy(papszHeaders);
1526
            }
1527
        }
1528
1529
        if (UseLimitRangeGetInsteadOfHead() && response_code == 206)
1530
        {
1531
            oFileProp.eExists = EXIST_NO;
1532
            oFileProp.fileSize = 0;
1533
            if (sWriteFuncHeaderData.pBuffer != nullptr)
1534
            {
1535
                const char *pszContentRange = strstr(
1536
                    sWriteFuncHeaderData.pBuffer, "Content-Range: bytes ");
1537
                if (pszContentRange == nullptr)
1538
                    pszContentRange = strstr(sWriteFuncHeaderData.pBuffer,
1539
                                             "content-range: bytes ");
1540
                if (pszContentRange)
1541
                    pszContentRange = strchr(pszContentRange, '/');
1542
                if (pszContentRange)
1543
                {
1544
                    oFileProp.eExists = EXIST_YES;
1545
                    oFileProp.fileSize = static_cast<GUIntBig>(
1546
                        CPLAtoGIntBig(pszContentRange + 1));
1547
                }
1548
1549
                // Add first bytes to cache
1550
                if (sWriteFuncData.pBuffer != nullptr)
1551
                {
1552
                    size_t nOffset = 0;
1553
                    while (nOffset < sWriteFuncData.nSize)
1554
                    {
1555
                        const size_t nToCache =
1556
                            std::min<size_t>(sWriteFuncData.nSize - nOffset,
1557
                                             knDOWNLOAD_CHUNK_SIZE);
1558
                        poFS->AddRegion(m_pszURL, nOffset, nToCache,
1559
                                        sWriteFuncData.pBuffer + nOffset);
1560
                        nOffset += nToCache;
1561
                    }
1562
                }
1563
            }
1564
        }
1565
        else if (IsDirectoryFromExists(osVerb.c_str(),
1566
                                       static_cast<int>(response_code)))
1567
        {
1568
            oFileProp.eExists = EXIST_YES;
1569
            oFileProp.fileSize = 0;
1570
            oFileProp.bIsDirectory = true;
1571
        }
1572
        // 405 = Method not allowed
1573
        else if (response_code == 405 && !bRetryWithGet && osVerb == "HEAD")
1574
        {
1575
            CPLDebug(poFS->GetDebugKey(),
1576
                     "HEAD not allowed. Retrying with GET");
1577
            bRetryWithGet = true;
1578
            CPLFree(sWriteFuncData.pBuffer);
1579
            CPLFree(sWriteFuncHeaderData.pBuffer);
1580
            curl_easy_cleanup(hCurlHandle);
1581
            goto retry;
1582
        }
1583
        else if (response_code == 416)
1584
        {
1585
            oFileProp.eExists = EXIST_YES;
1586
            oFileProp.fileSize = 0;
1587
        }
1588
        else if (response_code != 200)
1589
        {
1590
            // Look if we should attempt a retry
1591
            if (oRetryContext.CanRetry(static_cast<int>(response_code),
1592
                                       sWriteFuncHeaderData.pBuffer,
1593
                                       szCurlErrBuf))
1594
            {
1595
                CPLError(CE_Warning, CPLE_AppDefined,
1596
                         "HTTP error code: %d - %s. "
1597
                         "Retrying again in %.1f secs",
1598
                         static_cast<int>(response_code), m_pszURL,
1599
                         oRetryContext.GetCurrentDelay());
1600
                CPLSleep(oRetryContext.GetCurrentDelay());
1601
                CPLFree(sWriteFuncData.pBuffer);
1602
                CPLFree(sWriteFuncHeaderData.pBuffer);
1603
                curl_easy_cleanup(hCurlHandle);
1604
                goto retry;
1605
            }
1606
1607
            if (sWriteFuncData.pBuffer != nullptr)
1608
            {
1609
                if (UseLimitRangeGetInsteadOfHead() &&
1610
                    CanRestartOnError(sWriteFuncData.pBuffer,
1611
                                      sWriteFuncHeaderData.pBuffer, bSetError))
1612
                {
1613
                    oFileProp.bHasComputedFileSize = false;
1614
                    CPLFree(sWriteFuncData.pBuffer);
1615
                    CPLFree(sWriteFuncHeaderData.pBuffer);
1616
                    curl_easy_cleanup(hCurlHandle);
1617
                    return GetFileSizeOrHeaders(bSetError, bGetHeaders);
1618
                }
1619
                else
1620
                {
1621
                    CPL_IGNORE_RET_VAL(CanRestartOnError(
1622
                        sWriteFuncData.pBuffer, sWriteFuncHeaderData.pBuffer,
1623
                        bSetError));
1624
                }
1625
            }
1626
1627
            // If there was no VSI error thrown in the process,
1628
            // fail by reporting the HTTP response code.
1629
            if (bSetError && VSIGetLastErrorNo() == 0)
1630
            {
1631
                if (strlen(szCurlErrBuf) > 0)
1632
                {
1633
                    if (response_code == 0)
1634
                    {
1635
                        VSIError(VSIE_HttpError, "CURL error: %s",
1636
                                 szCurlErrBuf);
1637
                    }
1638
                    else
1639
                    {
1640
                        VSIError(VSIE_HttpError, "HTTP response code: %d - %s",
1641
                                 static_cast<int>(response_code), szCurlErrBuf);
1642
                    }
1643
                }
1644
                else
1645
                {
1646
                    VSIError(VSIE_HttpError, "HTTP response code: %d",
1647
                             static_cast<int>(response_code));
1648
                }
1649
            }
1650
            else
1651
            {
1652
                if (response_code != 400 && response_code != 404)
1653
                {
1654
                    CPLError(CE_Warning, CPLE_AppDefined,
1655
                             "HTTP response code on %s: %d", osURL.c_str(),
1656
                             static_cast<int>(response_code));
1657
                }
1658
                // else a CPLDebug() is emitted below
1659
            }
1660
1661
            oFileProp.eExists = EXIST_NO;
1662
            oFileProp.nHTTPCode = static_cast<int>(response_code);
1663
            oFileProp.fileSize = 0;
1664
        }
1665
        else if (sWriteFuncData.pBuffer != nullptr)
1666
        {
1667
            ProcessGetFileSizeResult(
1668
                reinterpret_cast<const char *>(sWriteFuncData.pBuffer));
1669
        }
1670
1671
        // Try to guess if this is a directory. Generally if this is a
1672
        // directory, curl will retry with an URL with slash added.
1673
        if (!osEffectiveURL.empty() &&
1674
            strncmp(osURL.c_str(), osEffectiveURL.c_str(), osURL.size()) == 0 &&
1675
            osEffectiveURL[osURL.size()] == '/' &&
1676
            oFileProp.eExists != EXIST_NO)
1677
        {
1678
            oFileProp.eExists = EXIST_YES;
1679
            oFileProp.fileSize = 0;
1680
            oFileProp.bIsDirectory = true;
1681
        }
1682
        else if (osURL.back() == '/')
1683
        {
1684
            oFileProp.bIsDirectory = true;
1685
        }
1686
1687
        if (!bAlreadyLogged)
1688
        {
1689
            CPLDebug(poFS->GetDebugKey(),
1690
                     "GetFileSize(%s)=" CPL_FRMT_GUIB "  response_code=%d",
1691
                     osURL.c_str(), oFileProp.fileSize,
1692
                     static_cast<int>(response_code));
1693
        }
1694
    }
1695
1696
    CPLFree(sWriteFuncData.pBuffer);
1697
    CPLFree(sWriteFuncHeaderData.pBuffer);
1698
    curl_easy_cleanup(hCurlHandle);
1699
1700
    oFileProp.bHasComputedFileSize = true;
1701
    if (mtime > 0)
1702
        oFileProp.mTime = mtime;
1703
    // Do not update cached file properties if cURL returned a non-HTTP error
1704
    if (response_code != 0)
1705
        poFS->SetCachedFileProp(m_pszURL, oFileProp);
1706
1707
    return oFileProp.fileSize;
1708
}
1709
1710
/************************************************************************/
1711
/*                               Exists()                               */
1712
/************************************************************************/
1713
1714
bool VSICurlHandle::Exists(bool bSetError)
1715
{
1716
    if (oFileProp.eExists == EXIST_UNKNOWN)
1717
    {
1718
        GetFileSize(bSetError);
1719
    }
1720
    else if (oFileProp.eExists == EXIST_NO)
1721
    {
1722
        // If there was no VSI error thrown in the process,
1723
        // and we know the HTTP error code of the first request where the
1724
        // file could not be retrieved, fail by reporting the HTTP code.
1725
        if (bSetError && VSIGetLastErrorNo() == 0 && oFileProp.nHTTPCode)
1726
        {
1727
            VSIError(VSIE_HttpError, "HTTP response code: %d",
1728
                     oFileProp.nHTTPCode);
1729
        }
1730
    }
1731
1732
    return oFileProp.eExists == EXIST_YES;
1733
}
1734
1735
/************************************************************************/
1736
/*                                Tell()                                */
1737
/************************************************************************/
1738
1739
vsi_l_offset VSICurlHandle::Tell()
1740
{
1741
    return curOffset;
1742
}
1743
1744
/************************************************************************/
1745
/*                       GetRedirectURLIfValid()                        */
1746
/************************************************************************/
1747
1748
std::string
1749
VSICurlHandle::GetRedirectURLIfValid(bool &bHasExpired,
1750
                                     CPLStringList &aosHTTPOptions) const
1751
{
1752
    bHasExpired = false;
1753
    poFS->GetCachedFileProp(m_pszURL, oFileProp);
1754
1755
    std::string osURL(m_pszURL + m_osQueryString);
1756
    if (oFileProp.bS3LikeRedirect)
1757
    {
1758
        if (time(nullptr) + 1 < oFileProp.nExpireTimestampLocal)
1759
        {
1760
            CPLDebug(poFS->GetDebugKey(),
1761
                     "Using redirect URL as it looks to be still valid "
1762
                     "(%d seconds left)",
1763
                     static_cast<int>(oFileProp.nExpireTimestampLocal -
1764
                                      time(nullptr)));
1765
            osURL = oFileProp.osRedirectURL;
1766
        }
1767
        else
1768
        {
1769
            CPLDebug(poFS->GetDebugKey(),
1770
                     "Redirect URL has expired. Using original URL");
1771
            oFileProp.bS3LikeRedirect = false;
1772
            poFS->SetCachedFileProp(m_pszURL, oFileProp);
1773
            bHasExpired = true;
1774
        }
1775
    }
1776
    else if (!oFileProp.osRedirectURL.empty())
1777
    {
1778
        osURL = oFileProp.osRedirectURL;
1779
        bHasExpired = false;
1780
    }
1781
1782
    if (m_pszURL != osURL)
1783
    {
1784
        const char *pszAuthorizationHeaderAllowed = VSIGetPathSpecificOption(
1785
            m_osFilename.c_str(),
1786
            "CPL_VSIL_CURL_AUTHORIZATION_HEADER_ALLOWED_IF_REDIRECT",
1787
            "IF_SAME_HOST");
1788
        if (EQUAL(pszAuthorizationHeaderAllowed, "IF_SAME_HOST"))
1789
        {
1790
            const auto ExtractServer = [](const std::string &s)
1791
            {
1792
                size_t afterHTTPPos = 0;
1793
                if (STARTS_WITH(s.c_str(), "http://"))
1794
                    afterHTTPPos = strlen("http://");
1795
                else if (STARTS_WITH(s.c_str(), "https://"))
1796
                    afterHTTPPos = strlen("https://");
1797
                const auto posSlash = s.find('/', afterHTTPPos);
1798
                if (posSlash != std::string::npos)
1799
                    return s.substr(afterHTTPPos, posSlash - afterHTTPPos);
1800
                else
1801
                    return s.substr(afterHTTPPos);
1802
            };
1803
1804
            if (ExtractServer(osURL) != ExtractServer(m_pszURL))
1805
            {
1806
                aosHTTPOptions.SetNameValue("AUTHORIZATION_HEADER_ALLOWED",
1807
                                            "NO");
1808
            }
1809
        }
1810
        else if (!CPLTestBool(pszAuthorizationHeaderAllowed))
1811
        {
1812
            aosHTTPOptions.SetNameValue("AUTHORIZATION_HEADER_ALLOWED", "NO");
1813
        }
1814
    }
1815
1816
    return osURL;
1817
}
1818
1819
/************************************************************************/
1820
/*                           CurrentDownload                            */
1821
/************************************************************************/
1822
1823
namespace
1824
{
1825
struct CurrentDownload
1826
{
1827
    VSICurlFilesystemHandlerBase *m_poFS = nullptr;
1828
    std::string m_osURL{};
1829
    vsi_l_offset m_nStartOffset = 0;
1830
    int m_nBlocks = 0;
1831
    std::string m_osAlreadyDownloadedData{};
1832
    bool m_bHasAlreadyDownloadedData = false;
1833
1834
    CurrentDownload(VSICurlFilesystemHandlerBase *poFS, const char *pszURL,
1835
                    vsi_l_offset startOffset, int nBlocks)
1836
        : m_poFS(poFS), m_osURL(pszURL), m_nStartOffset(startOffset),
1837
          m_nBlocks(nBlocks)
1838
    {
1839
        auto res = m_poFS->NotifyStartDownloadRegion(m_osURL, m_nStartOffset,
1840
                                                     m_nBlocks);
1841
        m_bHasAlreadyDownloadedData = res.first;
1842
        m_osAlreadyDownloadedData = std::move(res.second);
1843
    }
1844
1845
    bool HasAlreadyDownloadedData() const
1846
    {
1847
        return m_bHasAlreadyDownloadedData;
1848
    }
1849
1850
    const std::string &GetAlreadyDownloadedData() const
1851
    {
1852
        return m_osAlreadyDownloadedData;
1853
    }
1854
1855
    void SetData(const std::string &osData)
1856
    {
1857
        CPLAssert(!m_bHasAlreadyDownloadedData);
1858
        m_bHasAlreadyDownloadedData = true;
1859
        m_poFS->NotifyStopDownloadRegion(m_osURL, m_nStartOffset, m_nBlocks,
1860
                                         osData);
1861
    }
1862
1863
    ~CurrentDownload()
1864
    {
1865
        if (!m_bHasAlreadyDownloadedData)
1866
            m_poFS->NotifyStopDownloadRegion(m_osURL, m_nStartOffset, m_nBlocks,
1867
                                             std::string());
1868
    }
1869
1870
    CurrentDownload(const CurrentDownload &) = delete;
1871
    CurrentDownload &operator=(const CurrentDownload &) = delete;
1872
};
1873
}  // namespace
1874
1875
/************************************************************************/
1876
/*                     NotifyStartDownloadRegion()                      */
1877
/************************************************************************/
1878
1879
/** Indicate intent at downloading a new region.
1880
 *
1881
 * If the region is already in download in another thread, then wait for its
1882
 * completion.
1883
 *
1884
 * Returns:
1885
 * - (false, empty string) if a new download is needed
1886
 * - (true, region_content) if we have been waiting for a download of the same
1887
 *   region to be completed and got its result. Note that region_content will be
1888
 *   empty if the download of that region failed.
1889
 */
1890
std::pair<bool, std::string>
1891
VSICurlFilesystemHandlerBase::NotifyStartDownloadRegion(
1892
    const std::string &osURL, vsi_l_offset startOffset, int nBlocks)
1893
{
1894
    std::string osId(osURL);
1895
    osId += '_';
1896
    osId += std::to_string(startOffset);
1897
    osId += '_';
1898
    osId += std::to_string(nBlocks);
1899
1900
    m_oMutex.lock();
1901
    auto oIter = m_oMapRegionInDownload.find(osId);
1902
    if (oIter != m_oMapRegionInDownload.end())
1903
    {
1904
        auto &region = *(oIter->second);
1905
        std::unique_lock<std::mutex> oRegionLock(region.oMutex);
1906
        m_oMutex.unlock();
1907
        region.nWaiters++;
1908
        while (region.bDownloadInProgress)
1909
        {
1910
            region.oCond.wait(oRegionLock);
1911
        }
1912
        std::string osRet = region.osData;
1913
        region.nWaiters--;
1914
        region.oCond.notify_one();
1915
        return std::pair<bool, std::string>(true, osRet);
1916
    }
1917
    else
1918
    {
1919
        auto poRegionInDownload = std::make_unique<RegionInDownload>();
1920
        poRegionInDownload->bDownloadInProgress = true;
1921
        m_oMapRegionInDownload[osId] = std::move(poRegionInDownload);
1922
        m_oMutex.unlock();
1923
        return std::pair<bool, std::string>(false, std::string());
1924
    }
1925
}
1926
1927
/************************************************************************/
1928
/*                      NotifyStopDownloadRegion()                      */
1929
/************************************************************************/
1930
1931
void VSICurlFilesystemHandlerBase::NotifyStopDownloadRegion(
1932
    const std::string &osURL, vsi_l_offset startOffset, int nBlocks,
1933
    const std::string &osData)
1934
{
1935
    std::string osId(osURL);
1936
    osId += '_';
1937
    osId += std::to_string(startOffset);
1938
    osId += '_';
1939
    osId += std::to_string(nBlocks);
1940
1941
    m_oMutex.lock();
1942
    auto oIter = m_oMapRegionInDownload.find(osId);
1943
    CPLAssert(oIter != m_oMapRegionInDownload.end());
1944
    auto &region = *(oIter->second);
1945
    {
1946
        std::unique_lock<std::mutex> oRegionLock(region.oMutex);
1947
        if (region.nWaiters)
1948
        {
1949
            region.osData = osData;
1950
            region.bDownloadInProgress = false;
1951
            region.oCond.notify_all();
1952
1953
            while (region.nWaiters)
1954
            {
1955
                region.oCond.wait(oRegionLock);
1956
            }
1957
        }
1958
    }
1959
    m_oMapRegionInDownload.erase(oIter);
1960
    m_oMutex.unlock();
1961
}
1962
1963
/************************************************************************/
1964
/*                           DownloadRegion()                           */
1965
/************************************************************************/
1966
1967
std::string VSICurlHandle::DownloadRegion(const vsi_l_offset startOffset,
1968
                                          const int nBlocks)
1969
{
1970
    if (bInterrupted && bStopOnInterruptUntilUninstall)
1971
        return std::string();
1972
1973
    if (oFileProp.eExists == EXIST_NO)
1974
        return std::string();
1975
1976
    // Check if there is not a download of the same region in progress in
1977
    // another thread, and if so wait for it to be completed
1978
    CurrentDownload currentDownload(poFS, m_pszURL, startOffset, nBlocks);
1979
    if (currentDownload.HasAlreadyDownloadedData())
1980
    {
1981
        return currentDownload.GetAlreadyDownloadedData();
1982
    }
1983
1984
begin:
1985
    CURLM *hCurlMultiHandle = poFS->GetCurlMultiHandleFor(m_pszURL);
1986
1987
    UpdateQueryString();
1988
1989
    bool bHasExpired = false;
1990
1991
    CPLStringList aosHTTPOptions(m_aosHTTPOptions);
1992
    std::string osURL(GetRedirectURLIfValid(bHasExpired, aosHTTPOptions));
1993
    bool bUsedRedirect = osURL != m_pszURL;
1994
1995
    WriteFuncStruct sWriteFuncData;
1996
    WriteFuncStruct sWriteFuncHeaderData;
1997
    CPLHTTPRetryContext oRetryContext(m_oRetryParameters);
1998
1999
retry:
2000
    CURL *hCurlHandle = curl_easy_init();
2001
    struct curl_slist *headers =
2002
        VSICurlSetOptions(hCurlHandle, osURL.c_str(), aosHTTPOptions.List());
2003
2004
    if (!AllowAutomaticRedirection())
2005
        unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_FOLLOWLOCATION, 0);
2006
2007
    VSICURLInitWriteFuncStruct(&sWriteFuncData, this, pfnReadCbk,
2008
                               pReadCbkUserData);
2009
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData);
2010
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION,
2011
                               VSICurlHandleWriteFunc);
2012
2013
    VSICURLInitWriteFuncStruct(&sWriteFuncHeaderData, nullptr, nullptr,
2014
                               nullptr);
2015
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA,
2016
                               &sWriteFuncHeaderData);
2017
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION,
2018
                               VSICurlHandleWriteFunc);
2019
    sWriteFuncHeaderData.bIsHTTP = STARTS_WITH(m_pszURL, "http");
2020
    sWriteFuncHeaderData.nStartOffset = startOffset;
2021
    sWriteFuncHeaderData.nEndOffset =
2022
        startOffset +
2023
        static_cast<vsi_l_offset>(nBlocks) * VSICURLGetDownloadChunkSize() - 1;
2024
    // Some servers don't like we try to read after end-of-file (#5786).
2025
    if (oFileProp.bHasComputedFileSize &&
2026
        sWriteFuncHeaderData.nEndOffset >= oFileProp.fileSize)
2027
    {
2028
        sWriteFuncHeaderData.nEndOffset = oFileProp.fileSize - 1;
2029
    }
2030
2031
    char rangeStr[512] = {};
2032
    snprintf(rangeStr, sizeof(rangeStr), CPL_FRMT_GUIB "-" CPL_FRMT_GUIB,
2033
             startOffset, sWriteFuncHeaderData.nEndOffset);
2034
2035
    if constexpr (ENABLE_DEBUG)
2036
    {
2037
        CPLDebug(poFS->GetDebugKey(), "Downloading %s (%s)...", rangeStr,
2038
                 osURL.c_str());
2039
    }
2040
2041
    std::string osHeaderRange;  // leave in this scope
2042
    if (sWriteFuncHeaderData.bIsHTTP)
2043
    {
2044
        osHeaderRange = CPLSPrintf("Range: bytes=%s", rangeStr);
2045
        // So it gets included in Azure signature
2046
        headers = curl_slist_append(headers, osHeaderRange.c_str());
2047
        unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, nullptr);
2048
    }
2049
    else
2050
        unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, rangeStr);
2051
2052
    char szCurlErrBuf[CURL_ERROR_SIZE + 1] = {};
2053
    szCurlErrBuf[0] = '\0';
2054
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, szCurlErrBuf);
2055
2056
    headers = GetCurlHeaders("GET", headers);
2057
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HTTPHEADER, headers);
2058
2059
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_FILETIME, 1);
2060
2061
    VSICURLMultiPerform(hCurlMultiHandle, hCurlHandle, &m_bInterrupt);
2062
2063
    VSICURLResetHeaderAndWriterFunctions(hCurlHandle);
2064
2065
    curl_slist_free_all(headers);
2066
2067
    NetworkStatisticsLogger::LogGET(sWriteFuncData.nSize);
2068
2069
    if (sWriteFuncData.bInterrupted || m_bInterrupt)
2070
    {
2071
        bInterrupted = true;
2072
2073
        // Notify that the download of the current region is finished
2074
        currentDownload.SetData(std::string());
2075
2076
        CPLFree(sWriteFuncData.pBuffer);
2077
        CPLFree(sWriteFuncHeaderData.pBuffer);
2078
        curl_easy_cleanup(hCurlHandle);
2079
2080
        return std::string();
2081
    }
2082
2083
    long response_code = 0;
2084
    curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE, &response_code);
2085
2086
    if (ENABLE_DEBUG && szCurlErrBuf[0] != '\0')
2087
    {
2088
        CPLDebug(poFS->GetDebugKey(),
2089
                 "DownloadRegion(%s): response_code=%d, msg=%s", osURL.c_str(),
2090
                 static_cast<int>(response_code), szCurlErrBuf);
2091
    }
2092
2093
    long mtime = 0;
2094
    curl_easy_getinfo(hCurlHandle, CURLINFO_FILETIME, &mtime);
2095
    if (mtime > 0)
2096
    {
2097
        oFileProp.mTime = mtime;
2098
        poFS->SetCachedFileProp(m_pszURL, oFileProp);
2099
    }
2100
2101
    if constexpr (ENABLE_DEBUG)
2102
    {
2103
        CPLDebug(poFS->GetDebugKey(), "Got response_code=%ld", response_code);
2104
    }
2105
2106
    if (bUsedRedirect &&
2107
        (response_code == 403 ||
2108
         // Below case is in particular for
2109
         // gdalinfo
2110
         // /vsicurl/https://lpdaac.earthdata.nasa.gov/lp-prod-protected/HLSS30.015/HLS.S30.T10TEK.2020273T190109.v1.5.B8A.tif
2111
         // --config GDAL_DISABLE_READDIR_ON_OPEN EMPTY_DIR --config
2112
         // GDAL_HTTP_COOKIEFILE /tmp/cookie.txt --config GDAL_HTTP_COOKIEJAR
2113
         // /tmp/cookie.txt We got the redirect URL from a HEAD request, but it
2114
         // is not valid for a GET. So retry with GET on original URL to get a
2115
         // redirect URL valid for it.
2116
         (response_code == 400 &&
2117
          osURL.find(".cloudfront.net") != std::string::npos)))
2118
    {
2119
        CPLDebug(poFS->GetDebugKey(),
2120
                 "Got an error with redirect URL. Retrying with original one");
2121
        oFileProp.bS3LikeRedirect = false;
2122
        poFS->SetCachedFileProp(m_pszURL, oFileProp);
2123
        bUsedRedirect = false;
2124
        osURL = m_pszURL;
2125
        CPLFree(sWriteFuncData.pBuffer);
2126
        CPLFree(sWriteFuncHeaderData.pBuffer);
2127
        curl_easy_cleanup(hCurlHandle);
2128
        goto retry;
2129
    }
2130
2131
    if (response_code == 401 && oRetryContext.CanRetry())
2132
    {
2133
        CPLDebug(poFS->GetDebugKey(), "Unauthorized, trying to authenticate");
2134
        CPLFree(sWriteFuncData.pBuffer);
2135
        CPLFree(sWriteFuncHeaderData.pBuffer);
2136
        curl_easy_cleanup(hCurlHandle);
2137
        if (Authenticate(m_osFilename.c_str()))
2138
            goto retry;
2139
        return std::string();
2140
    }
2141
2142
    UpdateRedirectInfo(hCurlHandle, sWriteFuncHeaderData);
2143
2144
    if ((response_code != 200 && response_code != 206 && response_code != 225 &&
2145
         response_code != 226 && response_code != 426) ||
2146
        sWriteFuncHeaderData.bError)
2147
    {
2148
        if (sWriteFuncData.pBuffer != nullptr &&
2149
            CanRestartOnError(
2150
                reinterpret_cast<const char *>(sWriteFuncData.pBuffer),
2151
                reinterpret_cast<const char *>(sWriteFuncHeaderData.pBuffer),
2152
                true))
2153
        {
2154
            CPLFree(sWriteFuncData.pBuffer);
2155
            CPLFree(sWriteFuncHeaderData.pBuffer);
2156
            curl_easy_cleanup(hCurlHandle);
2157
            goto begin;
2158
        }
2159
2160
        // Look if we should attempt a retry
2161
        if (oRetryContext.CanRetry(static_cast<int>(response_code),
2162
                                   sWriteFuncHeaderData.pBuffer, szCurlErrBuf))
2163
        {
2164
            CPLError(CE_Warning, CPLE_AppDefined,
2165
                     "HTTP error code: %d - %s. "
2166
                     "Retrying again in %.1f secs",
2167
                     static_cast<int>(response_code), m_pszURL,
2168
                     oRetryContext.GetCurrentDelay());
2169
            CPLSleep(oRetryContext.GetCurrentDelay());
2170
            CPLFree(sWriteFuncData.pBuffer);
2171
            CPLFree(sWriteFuncHeaderData.pBuffer);
2172
            curl_easy_cleanup(hCurlHandle);
2173
            goto retry;
2174
        }
2175
2176
        if (response_code >= 400 && szCurlErrBuf[0] != '\0')
2177
        {
2178
            if (strcmp(szCurlErrBuf, "Couldn't use REST") == 0)
2179
                CPLError(
2180
                    CE_Failure, CPLE_AppDefined,
2181
                    "%d: %s, Range downloading not supported by this server!",
2182
                    static_cast<int>(response_code), szCurlErrBuf);
2183
            else
2184
                CPLError(CE_Failure, CPLE_AppDefined, "%d: %s",
2185
                         static_cast<int>(response_code), szCurlErrBuf);
2186
        }
2187
        else if (response_code == 416) /* Range Not Satisfiable */
2188
        {
2189
            if (sWriteFuncData.pBuffer)
2190
            {
2191
                CPLError(
2192
                    CE_Failure, CPLE_AppDefined,
2193
                    "%d: Range downloading not supported by this server: %s",
2194
                    static_cast<int>(response_code), sWriteFuncData.pBuffer);
2195
            }
2196
            else
2197
            {
2198
                CPLError(CE_Failure, CPLE_AppDefined,
2199
                         "%d: Range downloading not supported by this server",
2200
                         static_cast<int>(response_code));
2201
            }
2202
        }
2203
        if (!oFileProp.bHasComputedFileSize && startOffset == 0)
2204
        {
2205
            oFileProp.bHasComputedFileSize = true;
2206
            oFileProp.fileSize = 0;
2207
            oFileProp.eExists = EXIST_NO;
2208
            poFS->SetCachedFileProp(m_pszURL, oFileProp);
2209
        }
2210
        CPLFree(sWriteFuncData.pBuffer);
2211
        CPLFree(sWriteFuncHeaderData.pBuffer);
2212
        curl_easy_cleanup(hCurlHandle);
2213
        return std::string();
2214
    }
2215
2216
    if (!oFileProp.bHasComputedFileSize && sWriteFuncHeaderData.pBuffer)
2217
    {
2218
        // Try to retrieve the filesize from the HTTP headers
2219
        // if in the form: "Content-Range: bytes x-y/filesize".
2220
        char *pszContentRange =
2221
            strstr(sWriteFuncHeaderData.pBuffer, "Content-Range: bytes ");
2222
        if (pszContentRange == nullptr)
2223
            pszContentRange =
2224
                strstr(sWriteFuncHeaderData.pBuffer, "content-range: bytes ");
2225
        if (pszContentRange)
2226
        {
2227
            char *pszEOL = strchr(pszContentRange, '\n');
2228
            if (pszEOL)
2229
            {
2230
                *pszEOL = 0;
2231
                pszEOL = strchr(pszContentRange, '\r');
2232
                if (pszEOL)
2233
                    *pszEOL = 0;
2234
                char *pszSlash = strchr(pszContentRange, '/');
2235
                if (pszSlash)
2236
                {
2237
                    pszSlash++;
2238
                    oFileProp.fileSize = CPLScanUIntBig(
2239
                        pszSlash, static_cast<int>(strlen(pszSlash)));
2240
                }
2241
            }
2242
        }
2243
        else if (STARTS_WITH(m_pszURL, "ftp"))
2244
        {
2245
            // Parse 213 answer for FTP protocol.
2246
            char *pszSize = strstr(sWriteFuncHeaderData.pBuffer, "213 ");
2247
            if (pszSize)
2248
            {
2249
                pszSize += 4;
2250
                char *pszEOL = strchr(pszSize, '\n');
2251
                if (pszEOL)
2252
                {
2253
                    *pszEOL = 0;
2254
                    pszEOL = strchr(pszSize, '\r');
2255
                    if (pszEOL)
2256
                        *pszEOL = 0;
2257
2258
                    oFileProp.fileSize = CPLScanUIntBig(
2259
                        pszSize, static_cast<int>(strlen(pszSize)));
2260
                }
2261
            }
2262
        }
2263
2264
        if (oFileProp.fileSize != 0)
2265
        {
2266
            oFileProp.eExists = EXIST_YES;
2267
2268
            if constexpr (ENABLE_DEBUG)
2269
            {
2270
                CPLDebug(poFS->GetDebugKey(),
2271
                         "GetFileSize(%s)=" CPL_FRMT_GUIB "  response_code=%d",
2272
                         m_pszURL, oFileProp.fileSize,
2273
                         static_cast<int>(response_code));
2274
            }
2275
2276
            oFileProp.bHasComputedFileSize = true;
2277
            poFS->SetCachedFileProp(m_pszURL, oFileProp);
2278
        }
2279
    }
2280
2281
    DownloadRegionPostProcess(startOffset, nBlocks, sWriteFuncData.pBuffer,
2282
                              sWriteFuncData.nSize);
2283
2284
    std::string osRet;
2285
    osRet.assign(sWriteFuncData.pBuffer, sWriteFuncData.nSize);
2286
2287
    // Notify that the download of the current region is finished
2288
    currentDownload.SetData(osRet);
2289
2290
    CPLFree(sWriteFuncData.pBuffer);
2291
    CPLFree(sWriteFuncHeaderData.pBuffer);
2292
    curl_easy_cleanup(hCurlHandle);
2293
2294
    return osRet;
2295
}
2296
2297
/************************************************************************/
2298
/*                         UpdateRedirectInfo()                         */
2299
/************************************************************************/
2300
2301
void VSICurlHandle::UpdateRedirectInfo(
2302
    CURL *hCurlHandle, const WriteFuncStruct &sWriteFuncHeaderData)
2303
{
2304
    std::string osEffectiveURL;
2305
    {
2306
        char *pszEffectiveURL = nullptr;
2307
        curl_easy_getinfo(hCurlHandle, CURLINFO_EFFECTIVE_URL,
2308
                          &pszEffectiveURL);
2309
        if (pszEffectiveURL)
2310
            osEffectiveURL = pszEffectiveURL;
2311
    }
2312
2313
    if (!oFileProp.bS3LikeRedirect && !osEffectiveURL.empty() &&
2314
        strstr(osEffectiveURL.c_str(), m_pszURL) == nullptr)
2315
    {
2316
        CPLDebug(poFS->GetDebugKey(), "Effective URL: %s",
2317
                 osEffectiveURL.c_str());
2318
2319
        long response_code = 0;
2320
        curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE, &response_code);
2321
        if (response_code >= 200 && response_code < 300 &&
2322
            sWriteFuncHeaderData.nTimestampDate > 0 &&
2323
            VSICurlIsS3LikeSignedURL(osEffectiveURL.c_str()) &&
2324
            !VSICurlIsS3LikeSignedURL(m_pszURL) &&
2325
            CPLTestBool(
2326
                CPLGetConfigOption("CPL_VSIL_CURL_USE_S3_REDIRECT", "TRUE")))
2327
        {
2328
            GIntBig nExpireTimestamp =
2329
                VSICurlGetExpiresFromS3LikeSignedURL(osEffectiveURL.c_str());
2330
            if (nExpireTimestamp > sWriteFuncHeaderData.nTimestampDate + 10)
2331
            {
2332
                const int nValidity = static_cast<int>(
2333
                    nExpireTimestamp - sWriteFuncHeaderData.nTimestampDate);
2334
                CPLDebug(poFS->GetDebugKey(),
2335
                         "Will use redirect URL for the next %d seconds",
2336
                         nValidity);
2337
                // As our local clock might not be in sync with server clock,
2338
                // figure out the expiration timestamp in local time.
2339
                oFileProp.bS3LikeRedirect = true;
2340
                oFileProp.nExpireTimestampLocal = time(nullptr) + nValidity;
2341
                oFileProp.osRedirectURL = std::move(osEffectiveURL);
2342
                poFS->SetCachedFileProp(m_pszURL, oFileProp);
2343
            }
2344
        }
2345
    }
2346
}
2347
2348
/************************************************************************/
2349
/*                     DownloadRegionPostProcess()                      */
2350
/************************************************************************/
2351
2352
void VSICurlHandle::DownloadRegionPostProcess(const vsi_l_offset startOffset,
2353
                                              const int nBlocks,
2354
                                              const char *pBuffer, size_t nSize)
2355
{
2356
    const int knDOWNLOAD_CHUNK_SIZE = VSICURLGetDownloadChunkSize();
2357
    lastDownloadedOffset = startOffset + static_cast<vsi_l_offset>(nBlocks) *
2358
                                             knDOWNLOAD_CHUNK_SIZE;
2359
2360
    if (nSize > static_cast<size_t>(nBlocks) * knDOWNLOAD_CHUNK_SIZE)
2361
    {
2362
        if constexpr (ENABLE_DEBUG)
2363
        {
2364
            CPLDebug(
2365
                poFS->GetDebugKey(),
2366
                "Got more data than expected : %u instead of %u",
2367
                static_cast<unsigned int>(nSize),
2368
                static_cast<unsigned int>(nBlocks * knDOWNLOAD_CHUNK_SIZE));
2369
        }
2370
    }
2371
2372
    vsi_l_offset l_startOffset = startOffset;
2373
    while (nSize > 0)
2374
    {
2375
#if DEBUG_VERBOSE
2376
        if constexpr (ENABLE_DEBUG)
2377
        {
2378
            CPLDebug(poFS->GetDebugKey(), "Add region %u - %u",
2379
                     static_cast<unsigned int>(startOffset),
2380
                     static_cast<unsigned int>(std::min(
2381
                         static_cast<size_t>(knDOWNLOAD_CHUNK_SIZE), nSize)));
2382
        }
2383
#endif
2384
        const size_t nChunkSize =
2385
            std::min(static_cast<size_t>(knDOWNLOAD_CHUNK_SIZE), nSize);
2386
        poFS->AddRegion(m_pszURL, l_startOffset, nChunkSize, pBuffer);
2387
        l_startOffset += nChunkSize;
2388
        pBuffer += nChunkSize;
2389
        nSize -= nChunkSize;
2390
    }
2391
}
2392
2393
/************************************************************************/
2394
/*                                Read()                                */
2395
/************************************************************************/
2396
2397
size_t VSICurlHandle::Read(void *const pBufferIn, size_t const nBytes)
2398
{
2399
    NetworkStatisticsFileSystem oContextFS(poFS->GetFSPrefix().c_str());
2400
    NetworkStatisticsFile oContextFile(m_osFilename.c_str());
2401
    NetworkStatisticsAction oContextAction("Read");
2402
2403
    size_t nBufferRequestSize = nBytes;
2404
    if (nBufferRequestSize == 0)
2405
        return 0;
2406
2407
    void *pBuffer = pBufferIn;
2408
2409
#if DEBUG_VERBOSE
2410
    CPLDebug(poFS->GetDebugKey(), "offset=%d, size=%d",
2411
             static_cast<int>(curOffset), static_cast<int>(nBufferRequestSize));
2412
#endif
2413
2414
    vsi_l_offset iterOffset = curOffset;
2415
    const int knMAX_REGIONS = GetMaxRegions();
2416
    const int knDOWNLOAD_CHUNK_SIZE = VSICURLGetDownloadChunkSize();
2417
    while (nBufferRequestSize)
2418
    {
2419
        // Don't try to read after end of file.
2420
        poFS->GetCachedFileProp(m_pszURL, oFileProp);
2421
        if (oFileProp.bHasComputedFileSize && iterOffset >= oFileProp.fileSize)
2422
        {
2423
            if (iterOffset == curOffset)
2424
            {
2425
                CPLDebug(poFS->GetDebugKey(),
2426
                         "Request at offset " CPL_FRMT_GUIB
2427
                         ", after end of file",
2428
                         iterOffset);
2429
            }
2430
            break;
2431
        }
2432
2433
        const vsi_l_offset nOffsetToDownload =
2434
            (iterOffset / knDOWNLOAD_CHUNK_SIZE) * knDOWNLOAD_CHUNK_SIZE;
2435
        std::string osRegion;
2436
        std::shared_ptr<std::string> psRegion =
2437
            poFS->GetRegion(m_pszURL, nOffsetToDownload);
2438
        if (psRegion != nullptr)
2439
        {
2440
            osRegion = *psRegion;
2441
        }
2442
        else
2443
        {
2444
            if (nOffsetToDownload == lastDownloadedOffset)
2445
            {
2446
                // In case of consecutive reads (of small size), we use a
2447
                // heuristic that we will read the file sequentially, so
2448
                // we double the requested size to decrease the number of
2449
                // client/server roundtrips.
2450
                constexpr int MAX_CHUNK_SIZE_INCREASE_FACTOR = 128;
2451
                if (nBlocksToDownload < MAX_CHUNK_SIZE_INCREASE_FACTOR)
2452
                    nBlocksToDownload *= 2;
2453
            }
2454
            else
2455
            {
2456
                // Random reads. Cancel the above heuristics.
2457
                nBlocksToDownload = 1;
2458
            }
2459
2460
            // Ensure that we will request at least the number of blocks
2461
            // to satisfy the remaining buffer size to read.
2462
            const vsi_l_offset nEndOffsetToDownload =
2463
                ((iterOffset + nBufferRequestSize + knDOWNLOAD_CHUNK_SIZE - 1) /
2464
                 knDOWNLOAD_CHUNK_SIZE) *
2465
                knDOWNLOAD_CHUNK_SIZE;
2466
            const int nMinBlocksToDownload =
2467
                static_cast<int>((nEndOffsetToDownload - nOffsetToDownload) /
2468
                                 knDOWNLOAD_CHUNK_SIZE);
2469
            if (nBlocksToDownload < nMinBlocksToDownload)
2470
                nBlocksToDownload = nMinBlocksToDownload;
2471
2472
            // Avoid reading already cached data.
2473
            // Note: this might get evicted if concurrent reads are done, but
2474
            // this should not cause bugs. Just missed optimization.
2475
            for (int i = 1; i < nBlocksToDownload; i++)
2476
            {
2477
                if (poFS->GetRegion(m_pszURL, nOffsetToDownload +
2478
                                                  static_cast<vsi_l_offset>(i) *
2479
                                                      knDOWNLOAD_CHUNK_SIZE) !=
2480
                    nullptr)
2481
                {
2482
                    nBlocksToDownload = i;
2483
                    break;
2484
                }
2485
            }
2486
2487
            // We can't download more than knMAX_REGIONS chunks at a time,
2488
            // otherwise the cache will not be big enough to store them and
2489
            // copy their content to the target buffer.
2490
            if (nBlocksToDownload > knMAX_REGIONS)
2491
                nBlocksToDownload = knMAX_REGIONS;
2492
2493
            osRegion = DownloadRegion(nOffsetToDownload, nBlocksToDownload);
2494
            if (osRegion.empty())
2495
            {
2496
                if (!bInterrupted)
2497
                    bError = true;
2498
                return 0;
2499
            }
2500
        }
2501
2502
        const vsi_l_offset nRegionOffset = iterOffset - nOffsetToDownload;
2503
        if (osRegion.size() < nRegionOffset)
2504
        {
2505
            if (iterOffset == curOffset)
2506
            {
2507
                CPLDebug(poFS->GetDebugKey(),
2508
                         "Request at offset " CPL_FRMT_GUIB
2509
                         ", after end of file",
2510
                         iterOffset);
2511
            }
2512
            break;
2513
        }
2514
2515
        const int nToCopy = static_cast<int>(
2516
            std::min(static_cast<vsi_l_offset>(nBufferRequestSize),
2517
                     osRegion.size() - nRegionOffset));
2518
        memcpy(pBuffer, osRegion.data() + nRegionOffset, nToCopy);
2519
        pBuffer = static_cast<char *>(pBuffer) + nToCopy;
2520
        iterOffset += nToCopy;
2521
        nBufferRequestSize -= nToCopy;
2522
        if (osRegion.size() < static_cast<size_t>(knDOWNLOAD_CHUNK_SIZE) &&
2523
            nBufferRequestSize != 0)
2524
        {
2525
            break;
2526
        }
2527
    }
2528
2529
    const size_t ret = static_cast<size_t>(iterOffset - curOffset);
2530
    if (ret != nBytes)
2531
        bEOF = true;
2532
2533
    curOffset = iterOffset;
2534
2535
    return ret;
2536
}
2537
2538
/************************************************************************/
2539
/*                           ReadMultiRange()                           */
2540
/************************************************************************/
2541
2542
int VSICurlHandle::ReadMultiRange(int const nRanges, void **const ppData,
2543
                                  const vsi_l_offset *const panOffsets,
2544
                                  const size_t *const panSizes)
2545
{
2546
    if (bInterrupted && bStopOnInterruptUntilUninstall)
2547
        return FALSE;
2548
2549
    poFS->GetCachedFileProp(m_pszURL, oFileProp);
2550
    if (oFileProp.eExists == EXIST_NO)
2551
        return -1;
2552
2553
    NetworkStatisticsFileSystem oContextFS(poFS->GetFSPrefix().c_str());
2554
    NetworkStatisticsFile oContextFile(m_osFilename.c_str());
2555
    NetworkStatisticsAction oContextAction("ReadMultiRange");
2556
2557
    const char *pszMultiRangeStrategy =
2558
        CPLGetConfigOption("GDAL_HTTP_MULTIRANGE", "");
2559
    if (EQUAL(pszMultiRangeStrategy, "SINGLE_GET"))
2560
    {
2561
        // Just in case someone needs it, but the interest of this mode is
2562
        // rather dubious now. We could probably remove it
2563
        return ReadMultiRangeSingleGet(nRanges, ppData, panOffsets, panSizes);
2564
    }
2565
    else if (nRanges == 1 || EQUAL(pszMultiRangeStrategy, "SERIAL"))
2566
    {
2567
        return VSIVirtualHandle::ReadMultiRange(nRanges, ppData, panOffsets,
2568
                                                panSizes);
2569
    }
2570
2571
    UpdateQueryString();
2572
2573
    bool bHasExpired = false;
2574
2575
    CPLStringList aosHTTPOptions(m_aosHTTPOptions);
2576
    std::string osURL(GetRedirectURLIfValid(bHasExpired, aosHTTPOptions));
2577
    if (bHasExpired)
2578
    {
2579
        return VSIVirtualHandle::ReadMultiRange(nRanges, ppData, panOffsets,
2580
                                                panSizes);
2581
    }
2582
2583
    CURLM *hMultiHandle = poFS->GetCurlMultiHandleFor(osURL);
2584
#ifdef CURLPIPE_MULTIPLEX
2585
    // Enable HTTP/2 multiplexing (ignored if an older version of HTTP is
2586
    // used)
2587
    // Not that this does not enable HTTP/1.1 pipeling, which is not
2588
    // recommended for example by Google Cloud Storage.
2589
    // For HTTP/1.1, parallel connections work better since you can get
2590
    // results out of order.
2591
    if (CPLTestBool(CPLGetConfigOption("GDAL_HTTP_MULTIPLEX", "YES")))
2592
    {
2593
        curl_multi_setopt(hMultiHandle, CURLMOPT_PIPELINING,
2594
                          CURLPIPE_MULTIPLEX);
2595
    }
2596
#endif
2597
2598
    struct CurlErrBuffer
2599
    {
2600
        std::array<char, CURL_ERROR_SIZE + 1> szCurlErrBuf;
2601
    };
2602
2603
    // Sort ranges by file offset so the merge loop below can coalesce
2604
    // adjacent ranges regardless of the order the caller passed them.
2605
    // The ppData buffer pointers travel with their offsets, so the
2606
    // distribute logic fills the correct caller buffers after reading.
2607
    std::vector<int> anSortOrder(nRanges);
2608
    std::iota(anSortOrder.begin(), anSortOrder.end(), 0);
2609
    std::sort(anSortOrder.begin(), anSortOrder.end(), [panOffsets](int a, int b)
2610
              { return panOffsets[a] < panOffsets[b]; });
2611
2612
    std::vector<void *> apSortedData(nRanges);
2613
    std::vector<vsi_l_offset> anSortedOffsets(nRanges);
2614
    std::vector<size_t> anSortedSizes(nRanges);
2615
    for (int i = 0; i < nRanges; ++i)
2616
    {
2617
        apSortedData[i] = ppData[anSortOrder[i]];
2618
        anSortedOffsets[i] = panOffsets[anSortOrder[i]];
2619
        anSortedSizes[i] = panSizes[anSortOrder[i]];
2620
    }
2621
2622
    const bool bMergeConsecutiveRanges = CPLTestBool(
2623
        CPLGetConfigOption("GDAL_HTTP_MERGE_CONSECUTIVE_RANGES", "TRUE"));
2624
2625
    // Build list of merged requests upfront, each with its own retry context
2626
    struct MergedRequest
2627
    {
2628
        int iFirstRange;
2629
        int iLastRange;
2630
        vsi_l_offset nStartOffset;
2631
        size_t nSize;
2632
        CPLHTTPRetryContext retryContext;
2633
        bool bToRetry = true;  // true initially to trigger first attempt
2634
2635
        MergedRequest(int first, int last, vsi_l_offset start, size_t size,
2636
                      const CPLHTTPRetryParameters &params)
2637
            : iFirstRange(first), iLastRange(last), nStartOffset(start),
2638
              nSize(size), retryContext(params)
2639
        {
2640
        }
2641
    };
2642
2643
    std::vector<MergedRequest> asMergedRequests;
2644
    for (int i = 0; i < nRanges;)
2645
    {
2646
        size_t nSize = 0;
2647
        int iNext = i;
2648
        // Identify consecutive ranges
2649
        while (bMergeConsecutiveRanges && iNext + 1 < nRanges &&
2650
               anSortedOffsets[iNext] + anSortedSizes[iNext] ==
2651
                   anSortedOffsets[iNext + 1])
2652
        {
2653
            nSize += anSortedSizes[iNext];
2654
            iNext++;
2655
        }
2656
        nSize += anSortedSizes[iNext];
2657
2658
        if (nSize == 0)
2659
        {
2660
            i = iNext + 1;
2661
            continue;
2662
        }
2663
2664
        asMergedRequests.emplace_back(i, iNext, anSortedOffsets[i], nSize,
2665
                                      m_oRetryParameters);
2666
        i = iNext + 1;
2667
    }
2668
2669
    if (asMergedRequests.empty())
2670
        return 0;
2671
2672
    int nRet = 0;
2673
    size_t nTotalDownloaded = 0;
2674
2675
    // Retry loop: re-issue only failed requests that are retryable
2676
    while (true)
2677
    {
2678
        const size_t nRequests = asMergedRequests.size();
2679
        std::vector<CURL *> aHandles(nRequests, nullptr);
2680
        std::vector<WriteFuncStruct> asWriteFuncData(nRequests);
2681
        std::vector<WriteFuncStruct> asWriteFuncHeaderData(nRequests);
2682
        std::vector<char *> apszRanges(nRequests, nullptr);
2683
        std::vector<struct curl_slist *> aHeaders(nRequests, nullptr);
2684
        std::vector<CurlErrBuffer> asCurlErrors(nRequests);
2685
2686
        bool bAnyHandle = false;
2687
        for (size_t iReq = 0; iReq < nRequests; iReq++)
2688
        {
2689
            if (!asMergedRequests[iReq].bToRetry)
2690
                continue;
2691
            asMergedRequests[iReq].bToRetry = false;
2692
2693
            CURL *hCurlHandle = curl_easy_init();
2694
            aHandles[iReq] = hCurlHandle;
2695
            bAnyHandle = true;
2696
2697
            struct curl_slist *headers = VSICurlSetOptions(
2698
                hCurlHandle, osURL.c_str(), aosHTTPOptions.List());
2699
2700
            VSICURLInitWriteFuncStruct(&asWriteFuncData[iReq], this, pfnReadCbk,
2701
                                       pReadCbkUserData);
2702
            unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA,
2703
                                       &asWriteFuncData[iReq]);
2704
            unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION,
2705
                                       VSICurlHandleWriteFunc);
2706
2707
            VSICURLInitWriteFuncStruct(&asWriteFuncHeaderData[iReq], nullptr,
2708
                                       nullptr, nullptr);
2709
            unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA,
2710
                                       &asWriteFuncHeaderData[iReq]);
2711
            unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION,
2712
                                       VSICurlHandleWriteFunc);
2713
            asWriteFuncHeaderData[iReq].bIsHTTP = STARTS_WITH(m_pszURL, "http");
2714
            asWriteFuncHeaderData[iReq].nStartOffset =
2715
                asMergedRequests[iReq].nStartOffset;
2716
            asWriteFuncHeaderData[iReq].nEndOffset =
2717
                asMergedRequests[iReq].nStartOffset +
2718
                asMergedRequests[iReq].nSize - 1;
2719
2720
            char rangeStr[512] = {};
2721
            snprintf(rangeStr, sizeof(rangeStr),
2722
                     CPL_FRMT_GUIB "-" CPL_FRMT_GUIB,
2723
                     asWriteFuncHeaderData[iReq].nStartOffset,
2724
                     asWriteFuncHeaderData[iReq].nEndOffset);
2725
2726
            if constexpr (ENABLE_DEBUG)
2727
            {
2728
                CPLDebug(poFS->GetDebugKey(), "Downloading %s (%s)...",
2729
                         rangeStr, osURL.c_str());
2730
            }
2731
2732
            if (asWriteFuncHeaderData[iReq].bIsHTTP)
2733
            {
2734
                // So it gets included in Azure signature
2735
                char *pszRange =
2736
                    CPLStrdup(CPLSPrintf("Range: bytes=%s", rangeStr));
2737
                apszRanges[iReq] = pszRange;
2738
                headers = curl_slist_append(headers, pszRange);
2739
                unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, nullptr);
2740
            }
2741
            else
2742
            {
2743
                unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_RANGE,
2744
                                           rangeStr);
2745
            }
2746
2747
            asCurlErrors[iReq].szCurlErrBuf[0] = '\0';
2748
            unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER,
2749
                                       &asCurlErrors[iReq].szCurlErrBuf[0]);
2750
2751
            headers = GetCurlHeaders("GET", headers);
2752
            unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HTTPHEADER,
2753
                                       headers);
2754
            aHeaders[iReq] = headers;
2755
            curl_multi_add_handle(hMultiHandle, hCurlHandle);
2756
        }
2757
2758
        if (bAnyHandle)
2759
        {
2760
            VSICURLMultiPerform(hMultiHandle);
2761
        }
2762
2763
        // Process results
2764
        bool bRetry = false;
2765
        double dfMaxDelay = 0.0;
2766
        for (size_t iReq = 0; iReq < nRequests; iReq++)
2767
        {
2768
            if (!aHandles[iReq])
2769
                continue;
2770
2771
            long response_code = 0;
2772
            curl_easy_getinfo(aHandles[iReq], CURLINFO_HTTP_CODE,
2773
                              &response_code);
2774
2775
            if (ENABLE_DEBUG && asCurlErrors[iReq].szCurlErrBuf[0] != '\0')
2776
            {
2777
                char rangeStr[512] = {};
2778
                snprintf(rangeStr, sizeof(rangeStr),
2779
                         CPL_FRMT_GUIB "-" CPL_FRMT_GUIB,
2780
                         asWriteFuncHeaderData[iReq].nStartOffset,
2781
                         asWriteFuncHeaderData[iReq].nEndOffset);
2782
2783
                const char *pszErrorMsg = &asCurlErrors[iReq].szCurlErrBuf[0];
2784
                CPLDebug(poFS->GetDebugKey(),
2785
                         "ReadMultiRange(%s), %s: response_code=%d, msg=%s",
2786
                         osURL.c_str(), rangeStr,
2787
                         static_cast<int>(response_code), pszErrorMsg);
2788
            }
2789
2790
            if ((response_code != 206 && response_code != 225) ||
2791
                asWriteFuncHeaderData[iReq].nEndOffset + 1 !=
2792
                    asWriteFuncHeaderData[iReq].nStartOffset +
2793
                        asWriteFuncData[iReq].nSize)
2794
            {
2795
                char rangeStr[512] = {};
2796
                snprintf(rangeStr, sizeof(rangeStr),
2797
                         CPL_FRMT_GUIB "-" CPL_FRMT_GUIB,
2798
                         asWriteFuncHeaderData[iReq].nStartOffset,
2799
                         asWriteFuncHeaderData[iReq].nEndOffset);
2800
2801
                // Look if we should attempt a retry
2802
                if (asMergedRequests[iReq].retryContext.CanRetry(
2803
                        static_cast<int>(response_code),
2804
                        asWriteFuncData[iReq].pBuffer,
2805
                        &asCurlErrors[iReq].szCurlErrBuf[0]))
2806
                {
2807
                    CPLError(
2808
                        CE_Warning, CPLE_AppDefined,
2809
                        "HTTP error code for %s range %s: %d. "
2810
                        "Retrying again in %.1f secs",
2811
                        osURL.c_str(), rangeStr,
2812
                        static_cast<int>(response_code),
2813
                        asMergedRequests[iReq].retryContext.GetCurrentDelay());
2814
                    dfMaxDelay = std::max(
2815
                        dfMaxDelay,
2816
                        asMergedRequests[iReq].retryContext.GetCurrentDelay());
2817
                    asMergedRequests[iReq].bToRetry = true;
2818
                    bRetry = true;
2819
                }
2820
                else
2821
                {
2822
                    CPLError(CE_Failure, CPLE_AppDefined,
2823
                             "Request for %s failed with response_code=%ld",
2824
                             rangeStr, response_code);
2825
                    nRet = -1;
2826
                }
2827
            }
2828
            else if (nRet == 0)
2829
            {
2830
                size_t nOffset = 0;
2831
                size_t nRemainingSize = asWriteFuncData[iReq].nSize;
2832
                nTotalDownloaded += nRemainingSize;
2833
                for (int iRange = asMergedRequests[iReq].iFirstRange;
2834
                     iRange <= asMergedRequests[iReq].iLastRange; iRange++)
2835
                {
2836
                    if (nRemainingSize < anSortedSizes[iRange])
2837
                    {
2838
                        nRet = -1;
2839
                        break;
2840
                    }
2841
2842
                    if (anSortedSizes[iRange] > 0)
2843
                    {
2844
                        memcpy(apSortedData[iRange],
2845
                               asWriteFuncData[iReq].pBuffer + nOffset,
2846
                               anSortedSizes[iRange]);
2847
                    }
2848
                    nOffset += anSortedSizes[iRange];
2849
                    nRemainingSize -= anSortedSizes[iRange];
2850
                }
2851
            }
2852
2853
            curl_multi_remove_handle(hMultiHandle, aHandles[iReq]);
2854
            VSICURLResetHeaderAndWriterFunctions(aHandles[iReq]);
2855
            curl_easy_cleanup(aHandles[iReq]);
2856
            CPLFree(apszRanges[iReq]);
2857
            CPLFree(asWriteFuncData[iReq].pBuffer);
2858
            CPLFree(asWriteFuncHeaderData[iReq].pBuffer);
2859
            if (aHeaders[iReq])
2860
                curl_slist_free_all(aHeaders[iReq]);
2861
        }
2862
2863
        if (!bRetry || nRet != 0)
2864
            break;
2865
        CPLSleep(dfMaxDelay);
2866
    }
2867
2868
    NetworkStatisticsLogger::LogGET(nTotalDownloaded);
2869
2870
    if constexpr (ENABLE_DEBUG)
2871
    {
2872
        CPLDebug(poFS->GetDebugKey(), "Download completed");
2873
    }
2874
2875
    return nRet;
2876
}
2877
2878
/************************************************************************/
2879
/*                      ReadMultiRangeSingleGet()                       */
2880
/************************************************************************/
2881
2882
// TODO: the interest of this mode is rather dubious now. We could probably
2883
// remove it
2884
int VSICurlHandle::ReadMultiRangeSingleGet(int const nRanges,
2885
                                           void **const ppData,
2886
                                           const vsi_l_offset *const panOffsets,
2887
                                           const size_t *const panSizes)
2888
{
2889
    std::string osRanges;
2890
    std::string osFirstRange;
2891
    std::string osLastRange;
2892
    int nMergedRanges = 0;
2893
    vsi_l_offset nTotalReqSize = 0;
2894
    for (int i = 0; i < nRanges; i++)
2895
    {
2896
        std::string osCurRange;
2897
        if (i != 0)
2898
            osRanges.append(",");
2899
        osCurRange = CPLSPrintf(CPL_FRMT_GUIB "-", panOffsets[i]);
2900
        while (i + 1 < nRanges &&
2901
               panOffsets[i] + panSizes[i] == panOffsets[i + 1])
2902
        {
2903
            nTotalReqSize += panSizes[i];
2904
            i++;
2905
        }
2906
        nTotalReqSize += panSizes[i];
2907
        osCurRange.append(
2908
            CPLSPrintf(CPL_FRMT_GUIB, panOffsets[i] + panSizes[i] - 1));
2909
        nMergedRanges++;
2910
2911
        osRanges += osCurRange;
2912
2913
        if (nMergedRanges == 1)
2914
            osFirstRange = osCurRange;
2915
        osLastRange = std::move(osCurRange);
2916
    }
2917
2918
    const char *pszMaxRanges =
2919
        CPLGetConfigOption("CPL_VSIL_CURL_MAX_RANGES", "250");
2920
    int nMaxRanges = atoi(pszMaxRanges);
2921
    if (nMaxRanges <= 0)
2922
        nMaxRanges = 250;
2923
    if (nMergedRanges > nMaxRanges)
2924
    {
2925
        const int nHalf = nRanges / 2;
2926
        const int nRet = ReadMultiRange(nHalf, ppData, panOffsets, panSizes);
2927
        if (nRet != 0)
2928
            return nRet;
2929
        return ReadMultiRange(nRanges - nHalf, ppData + nHalf,
2930
                              panOffsets + nHalf, panSizes + nHalf);
2931
    }
2932
2933
    CURLM *hCurlMultiHandle = poFS->GetCurlMultiHandleFor(m_pszURL);
2934
    CURL *hCurlHandle = curl_easy_init();
2935
2936
    struct curl_slist *headers =
2937
        VSICurlSetOptions(hCurlHandle, m_pszURL, m_aosHTTPOptions.List());
2938
2939
    WriteFuncStruct sWriteFuncData;
2940
    WriteFuncStruct sWriteFuncHeaderData;
2941
2942
    VSICURLInitWriteFuncStruct(&sWriteFuncData, this, pfnReadCbk,
2943
                               pReadCbkUserData);
2944
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData);
2945
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION,
2946
                               VSICurlHandleWriteFunc);
2947
2948
    VSICURLInitWriteFuncStruct(&sWriteFuncHeaderData, nullptr, nullptr,
2949
                               nullptr);
2950
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA,
2951
                               &sWriteFuncHeaderData);
2952
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION,
2953
                               VSICurlHandleWriteFunc);
2954
    sWriteFuncHeaderData.bIsHTTP = STARTS_WITH(m_pszURL, "http");
2955
    sWriteFuncHeaderData.bMultiRange = nMergedRanges > 1;
2956
    if (nMergedRanges == 1)
2957
    {
2958
        sWriteFuncHeaderData.nStartOffset = panOffsets[0];
2959
        sWriteFuncHeaderData.nEndOffset = panOffsets[0] + nTotalReqSize - 1;
2960
    }
2961
2962
    if constexpr (ENABLE_DEBUG)
2963
    {
2964
        if (nMergedRanges == 1)
2965
            CPLDebug(poFS->GetDebugKey(), "Downloading %s (%s)...",
2966
                     osRanges.c_str(), m_pszURL);
2967
        else
2968
            CPLDebug(poFS->GetDebugKey(),
2969
                     "Downloading %s, ..., %s (" CPL_FRMT_GUIB " bytes, %s)...",
2970
                     osFirstRange.c_str(), osLastRange.c_str(),
2971
                     static_cast<GUIntBig>(nTotalReqSize), m_pszURL);
2972
    }
2973
2974
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, osRanges.c_str());
2975
2976
    char szCurlErrBuf[CURL_ERROR_SIZE + 1] = {};
2977
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, szCurlErrBuf);
2978
2979
    headers = GetCurlHeaders("GET", headers);
2980
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HTTPHEADER, headers);
2981
2982
    VSICURLMultiPerform(hCurlMultiHandle, hCurlHandle);
2983
2984
    VSICURLResetHeaderAndWriterFunctions(hCurlHandle);
2985
2986
    curl_slist_free_all(headers);
2987
2988
    NetworkStatisticsLogger::LogGET(sWriteFuncData.nSize);
2989
2990
    if (sWriteFuncData.bInterrupted)
2991
    {
2992
        bInterrupted = true;
2993
2994
        CPLFree(sWriteFuncData.pBuffer);
2995
        CPLFree(sWriteFuncHeaderData.pBuffer);
2996
        curl_easy_cleanup(hCurlHandle);
2997
2998
        return -1;
2999
    }
3000
3001
    long response_code = 0;
3002
    curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE, &response_code);
3003
3004
    if ((response_code != 200 && response_code != 206 && response_code != 225 &&
3005
         response_code != 226 && response_code != 426) ||
3006
        sWriteFuncHeaderData.bError)
3007
    {
3008
        if (response_code >= 400 && szCurlErrBuf[0] != '\0')
3009
        {
3010
            if (strcmp(szCurlErrBuf, "Couldn't use REST") == 0)
3011
                CPLError(
3012
                    CE_Failure, CPLE_AppDefined,
3013
                    "%d: %s, Range downloading not supported by this server!",
3014
                    static_cast<int>(response_code), szCurlErrBuf);
3015
            else
3016
                CPLError(CE_Failure, CPLE_AppDefined, "%d: %s",
3017
                         static_cast<int>(response_code), szCurlErrBuf);
3018
        }
3019
        /*
3020
        if( !bHasComputedFileSize && startOffset == 0 )
3021
        {
3022
            cachedFileProp->bHasComputedFileSize = bHasComputedFileSize = true;
3023
            cachedFileProp->fileSize = fileSize = 0;
3024
            cachedFileProp->eExists = eExists = EXIST_NO;
3025
        }
3026
        */
3027
        CPLFree(sWriteFuncData.pBuffer);
3028
        CPLFree(sWriteFuncHeaderData.pBuffer);
3029
        curl_easy_cleanup(hCurlHandle);
3030
        return -1;
3031
    }
3032
3033
    char *pBuffer = sWriteFuncData.pBuffer;
3034
    size_t nSize = sWriteFuncData.nSize;
3035
3036
    // TODO(schwehr): Localize after removing gotos.
3037
    int nRet = -1;
3038
    char *pszBoundary;
3039
    std::string osBoundary;
3040
    char *pszNext = nullptr;
3041
    int iRange = 0;
3042
    int iPart = 0;
3043
    char *pszEOL = nullptr;
3044
3045
    /* -------------------------------------------------------------------- */
3046
    /*      No multipart if a single range has been requested               */
3047
    /* -------------------------------------------------------------------- */
3048
3049
    if (nMergedRanges == 1)
3050
    {
3051
        size_t nAccSize = 0;
3052
        if (static_cast<vsi_l_offset>(nSize) < nTotalReqSize)
3053
            goto end;
3054
3055
        for (int i = 0; i < nRanges; i++)
3056
        {
3057
            memcpy(ppData[i], pBuffer + nAccSize, panSizes[i]);
3058
            nAccSize += panSizes[i];
3059
        }
3060
3061
        nRet = 0;
3062
        goto end;
3063
    }
3064
3065
    /* -------------------------------------------------------------------- */
3066
    /*      Extract boundary name                                           */
3067
    /* -------------------------------------------------------------------- */
3068
3069
    pszBoundary = strstr(sWriteFuncHeaderData.pBuffer,
3070
                         "Content-Type: multipart/byteranges; boundary=");
3071
    if (pszBoundary == nullptr)
3072
    {
3073
        CPLError(CE_Failure, CPLE_AppDefined, "Could not find '%s'",
3074
                 "Content-Type: multipart/byteranges; boundary=");
3075
        goto end;
3076
    }
3077
3078
    pszBoundary += strlen("Content-Type: multipart/byteranges; boundary=");
3079
3080
    pszEOL = strchr(pszBoundary, '\r');
3081
    if (pszEOL)
3082
        *pszEOL = 0;
3083
    pszEOL = strchr(pszBoundary, '\n');
3084
    if (pszEOL)
3085
        *pszEOL = 0;
3086
3087
    /* Remove optional double-quote character around boundary name */
3088
    if (pszBoundary[0] == '"')
3089
    {
3090
        pszBoundary++;
3091
        char *pszLastDoubleQuote = strrchr(pszBoundary, '"');
3092
        if (pszLastDoubleQuote)
3093
            *pszLastDoubleQuote = 0;
3094
    }
3095
3096
    osBoundary = "--";
3097
    osBoundary += pszBoundary;
3098
3099
    /* -------------------------------------------------------------------- */
3100
    /*      Find the start of the first chunk.                              */
3101
    /* -------------------------------------------------------------------- */
3102
    pszNext = strstr(pBuffer, osBoundary.c_str());
3103
    if (pszNext == nullptr)
3104
    {
3105
        CPLError(CE_Failure, CPLE_AppDefined, "No parts found.");
3106
        goto end;
3107
    }
3108
3109
    pszNext += osBoundary.size();
3110
    while (*pszNext != '\n' && *pszNext != '\r' && *pszNext != '\0')
3111
        pszNext++;
3112
    if (*pszNext == '\r')
3113
        pszNext++;
3114
    if (*pszNext == '\n')
3115
        pszNext++;
3116
3117
    /* -------------------------------------------------------------------- */
3118
    /*      Loop over parts...                                              */
3119
    /* -------------------------------------------------------------------- */
3120
    while (iPart < nRanges)
3121
    {
3122
        /* --------------------------------------------------------------------
3123
         */
3124
        /*      Collect headers. */
3125
        /* --------------------------------------------------------------------
3126
         */
3127
        bool bExpectedRange = false;
3128
3129
        while (*pszNext != '\n' && *pszNext != '\r' && *pszNext != '\0')
3130
        {
3131
            pszEOL = strstr(pszNext, "\n");
3132
3133
            if (pszEOL == nullptr)
3134
            {
3135
                CPLError(CE_Failure, CPLE_AppDefined,
3136
                         "Error while parsing multipart content (at line %d)",
3137
                         __LINE__);
3138
                goto end;
3139
            }
3140
3141
            *pszEOL = '\0';
3142
            bool bRestoreAntislashR = false;
3143
            if (pszEOL - pszNext > 1 && pszEOL[-1] == '\r')
3144
            {
3145
                bRestoreAntislashR = true;
3146
                pszEOL[-1] = '\0';
3147
            }
3148
3149
            if (STARTS_WITH_CI(pszNext, "Content-Range: bytes "))
3150
            {
3151
                bExpectedRange = true; /* FIXME */
3152
            }
3153
3154
            if (bRestoreAntislashR)
3155
                pszEOL[-1] = '\r';
3156
            *pszEOL = '\n';
3157
3158
            pszNext = pszEOL + 1;
3159
        }
3160
3161
        if (!bExpectedRange)
3162
        {
3163
            CPLError(CE_Failure, CPLE_AppDefined,
3164
                     "Error while parsing multipart content (at line %d)",
3165
                     __LINE__);
3166
            goto end;
3167
        }
3168
3169
        if (*pszNext == '\r')
3170
            pszNext++;
3171
        if (*pszNext == '\n')
3172
            pszNext++;
3173
3174
        /* --------------------------------------------------------------------
3175
         */
3176
        /*      Work out the data block size. */
3177
        /* --------------------------------------------------------------------
3178
         */
3179
        size_t nBytesAvail = nSize - (pszNext - pBuffer);
3180
3181
        while (true)
3182
        {
3183
            if (nBytesAvail < panSizes[iRange])
3184
            {
3185
                CPLError(CE_Failure, CPLE_AppDefined,
3186
                         "Error while parsing multipart content (at line %d)",
3187
                         __LINE__);
3188
                goto end;
3189
            }
3190
3191
            memcpy(ppData[iRange], pszNext, panSizes[iRange]);
3192
            pszNext += panSizes[iRange];
3193
            nBytesAvail -= panSizes[iRange];
3194
            if (iRange + 1 < nRanges &&
3195
                panOffsets[iRange] + panSizes[iRange] == panOffsets[iRange + 1])
3196
            {
3197
                iRange++;
3198
            }
3199
            else
3200
            {
3201
                break;
3202
            }
3203
        }
3204
3205
        iPart++;
3206
        iRange++;
3207
3208
        while (nBytesAvail > 0 &&
3209
               (*pszNext != '-' ||
3210
                strncmp(pszNext, osBoundary.c_str(), osBoundary.size()) != 0))
3211
        {
3212
            pszNext++;
3213
            nBytesAvail--;
3214
        }
3215
3216
        if (nBytesAvail == 0)
3217
        {
3218
            CPLError(CE_Failure, CPLE_AppDefined,
3219
                     "Error while parsing multipart content (at line %d)",
3220
                     __LINE__);
3221
            goto end;
3222
        }
3223
3224
        pszNext += osBoundary.size();
3225
        if (STARTS_WITH(pszNext, "--"))
3226
        {
3227
            // End of multipart.
3228
            break;
3229
        }
3230
3231
        if (*pszNext == '\r')
3232
            pszNext++;
3233
        if (*pszNext == '\n')
3234
            pszNext++;
3235
        else
3236
        {
3237
            CPLError(CE_Failure, CPLE_AppDefined,
3238
                     "Error while parsing multipart content (at line %d)",
3239
                     __LINE__);
3240
            goto end;
3241
        }
3242
    }
3243
3244
    if (iPart == nMergedRanges)
3245
        nRet = 0;
3246
    else
3247
        CPLError(CE_Failure, CPLE_AppDefined,
3248
                 "Got only %d parts, where %d were expected", iPart,
3249
                 nMergedRanges);
3250
3251
end:
3252
    CPLFree(sWriteFuncData.pBuffer);
3253
    CPLFree(sWriteFuncHeaderData.pBuffer);
3254
    curl_easy_cleanup(hCurlHandle);
3255
3256
    return nRet;
3257
}
3258
3259
/************************************************************************/
3260
/*                               PRead()                                */
3261
/************************************************************************/
3262
3263
size_t VSICurlHandle::PRead(void *pBuffer, size_t nSize,
3264
                            vsi_l_offset nOffset) const
3265
{
3266
    // Try to use AdviseRead ranges fetched asynchronously
3267
    if (!m_aoAdviseReadRanges.empty())
3268
    {
3269
        for (auto &poRange : m_aoAdviseReadRanges)
3270
        {
3271
            if (nOffset >= poRange->nStartOffset &&
3272
                nOffset + nSize <= poRange->nStartOffset + poRange->nSize)
3273
            {
3274
                {
3275
                    std::unique_lock<std::mutex> oLock(poRange->oMutex);
3276
                    // coverity[missing_lock:FALSE]
3277
                    while (!poRange->bDone)
3278
                    {
3279
                        poRange->oCV.wait(oLock);
3280
                    }
3281
                }
3282
                if (poRange->abyData.empty())
3283
                    return 0;
3284
3285
                auto nEndOffset =
3286
                    poRange->nStartOffset + poRange->abyData.size();
3287
                if (nOffset >= nEndOffset)
3288
                    return 0;
3289
                const size_t nToCopy = static_cast<size_t>(
3290
                    std::min<vsi_l_offset>(nSize, nEndOffset - nOffset));
3291
                memcpy(pBuffer,
3292
                       poRange->abyData.data() +
3293
                           static_cast<size_t>(nOffset - poRange->nStartOffset),
3294
                       nToCopy);
3295
                return nToCopy;
3296
            }
3297
        }
3298
    }
3299
3300
    // poFS has a global mutex
3301
    poFS->GetCachedFileProp(m_pszURL, oFileProp);
3302
    if (oFileProp.eExists == EXIST_NO)
3303
        return static_cast<size_t>(-1);
3304
3305
    NetworkStatisticsFileSystem oContextFS(poFS->GetFSPrefix().c_str());
3306
    NetworkStatisticsFile oContextFile(m_osFilename.c_str());
3307
    NetworkStatisticsAction oContextAction("PRead");
3308
3309
    CPLStringList aosHTTPOptions(m_aosHTTPOptions);
3310
    std::string osURL;
3311
    {
3312
        std::lock_guard<std::mutex> oLock(m_oMutex);
3313
        UpdateQueryString();
3314
        bool bHasExpired;
3315
        osURL = GetRedirectURLIfValid(bHasExpired, aosHTTPOptions);
3316
    }
3317
3318
    CURL *hCurlHandle = curl_easy_init();
3319
3320
    struct curl_slist *headers =
3321
        VSICurlSetOptions(hCurlHandle, osURL.c_str(), aosHTTPOptions.List());
3322
3323
    WriteFuncStruct sWriteFuncData;
3324
    VSICURLInitWriteFuncStruct(&sWriteFuncData, nullptr, nullptr, nullptr);
3325
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData);
3326
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION,
3327
                               VSICurlHandleWriteFunc);
3328
3329
    WriteFuncStruct sWriteFuncHeaderData;
3330
    VSICURLInitWriteFuncStruct(&sWriteFuncHeaderData, nullptr, nullptr,
3331
                               nullptr);
3332
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA,
3333
                               &sWriteFuncHeaderData);
3334
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION,
3335
                               VSICurlHandleWriteFunc);
3336
    sWriteFuncHeaderData.bIsHTTP = STARTS_WITH(m_pszURL, "http");
3337
    sWriteFuncHeaderData.nStartOffset = nOffset;
3338
3339
    sWriteFuncHeaderData.nEndOffset = nOffset + nSize - 1;
3340
3341
    char rangeStr[512] = {};
3342
    snprintf(rangeStr, sizeof(rangeStr), CPL_FRMT_GUIB "-" CPL_FRMT_GUIB,
3343
             sWriteFuncHeaderData.nStartOffset,
3344
             sWriteFuncHeaderData.nEndOffset);
3345
3346
    if constexpr (ENABLE_DEBUG)
3347
    {
3348
        CPLDebug(poFS->GetDebugKey(), "Downloading %s (%s)...", rangeStr,
3349
                 osURL.c_str());
3350
    }
3351
3352
    std::string osHeaderRange;
3353
    if (sWriteFuncHeaderData.bIsHTTP)
3354
    {
3355
        osHeaderRange = CPLSPrintf("Range: bytes=%s", rangeStr);
3356
        // So it gets included in Azure signature
3357
        headers = curl_slist_append(headers, osHeaderRange.data());
3358
        unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, nullptr);
3359
    }
3360
    else
3361
    {
3362
        unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, rangeStr);
3363
    }
3364
3365
    std::array<char, CURL_ERROR_SIZE + 1> szCurlErrBuf;
3366
    szCurlErrBuf[0] = '\0';
3367
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER,
3368
                               &szCurlErrBuf[0]);
3369
3370
    {
3371
        std::lock_guard<std::mutex> oLock(m_oMutex);
3372
        headers =
3373
            const_cast<VSICurlHandle *>(this)->GetCurlHeaders("GET", headers);
3374
    }
3375
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HTTPHEADER, headers);
3376
3377
    CURLM *hMultiHandle = poFS->GetCurlMultiHandleFor(osURL);
3378
    VSICURLMultiPerform(hMultiHandle, hCurlHandle, &m_bInterrupt);
3379
3380
    {
3381
        std::lock_guard<std::mutex> oLock(m_oMutex);
3382
        const_cast<VSICurlHandle *>(this)->UpdateRedirectInfo(
3383
            hCurlHandle, sWriteFuncHeaderData);
3384
    }
3385
3386
    long response_code = 0;
3387
    curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE, &response_code);
3388
3389
    if (ENABLE_DEBUG && szCurlErrBuf[0] != '\0')
3390
    {
3391
        const char *pszErrorMsg = &szCurlErrBuf[0];
3392
        CPLDebug(poFS->GetDebugKey(), "PRead(%s), %s: response_code=%d, msg=%s",
3393
                 osURL.c_str(), rangeStr, static_cast<int>(response_code),
3394
                 pszErrorMsg);
3395
    }
3396
3397
    size_t nRet;
3398
    if ((response_code != 206 && response_code != 225) ||
3399
        sWriteFuncData.nSize == 0)
3400
    {
3401
        if (!m_bInterrupt)
3402
        {
3403
            CPLDebug(poFS->GetDebugKey(),
3404
                     "Request for %s failed with response_code=%ld", rangeStr,
3405
                     response_code);
3406
        }
3407
        nRet = static_cast<size_t>(-1);
3408
    }
3409
    else
3410
    {
3411
        nRet = std::min(sWriteFuncData.nSize, nSize);
3412
        if (nRet > 0)
3413
            memcpy(pBuffer, sWriteFuncData.pBuffer, nRet);
3414
    }
3415
3416
    VSICURLResetHeaderAndWriterFunctions(hCurlHandle);
3417
    curl_easy_cleanup(hCurlHandle);
3418
    CPLFree(sWriteFuncData.pBuffer);
3419
    CPLFree(sWriteFuncHeaderData.pBuffer);
3420
    curl_slist_free_all(headers);
3421
3422
    NetworkStatisticsLogger::LogGET(sWriteFuncData.nSize);
3423
3424
#if 0
3425
    if( ENABLE_DEBUG )
3426
        CPLDebug(poFS->GetDebugKey(), "Download completed");
3427
#endif
3428
3429
    return nRet;
3430
}
3431
3432
/************************************************************************/
3433
/*                    GetAdviseReadTotalBytesLimit()                    */
3434
/************************************************************************/
3435
3436
size_t VSICurlHandle::GetAdviseReadTotalBytesLimit() const
3437
{
3438
    return static_cast<size_t>(std::min<unsigned long long>(
3439
        std::numeric_limits<size_t>::max(),
3440
        // 100 MB
3441
        std::strtoull(
3442
            CPLGetConfigOption("CPL_VSIL_CURL_ADVISE_READ_TOTAL_BYTES_LIMIT",
3443
                               "104857600"),
3444
            nullptr, 10)));
3445
}
3446
3447
/************************************************************************/
3448
/*                          VSICURLMultiInit()                          */
3449
/************************************************************************/
3450
3451
static CURLM *VSICURLMultiInit()
3452
{
3453
    CURLM *hCurlMultiHandle = curl_multi_init();
3454
3455
    if (const char *pszMAXCONNECTS =
3456
            CPLGetConfigOption("GDAL_HTTP_MAX_CACHED_CONNECTIONS", nullptr))
3457
    {
3458
        curl_multi_setopt(hCurlMultiHandle, CURLMOPT_MAXCONNECTS,
3459
                          atoi(pszMAXCONNECTS));
3460
    }
3461
3462
    if (const char *pszMAX_TOTAL_CONNECTIONS =
3463
            CPLGetConfigOption("GDAL_HTTP_MAX_TOTAL_CONNECTIONS", nullptr))
3464
    {
3465
        curl_multi_setopt(hCurlMultiHandle, CURLMOPT_MAX_TOTAL_CONNECTIONS,
3466
                          atoi(pszMAX_TOTAL_CONNECTIONS));
3467
    }
3468
3469
    return hCurlMultiHandle;
3470
}
3471
3472
/************************************************************************/
3473
/*                             AdviseRead()                             */
3474
/************************************************************************/
3475
3476
void VSICurlHandle::AdviseRead(int nRanges, const vsi_l_offset *panOffsets,
3477
                               const size_t *panSizes)
3478
{
3479
    if (!CPLTestBool(
3480
            CPLGetConfigOption("GDAL_HTTP_ENABLE_ADVISE_READ", "TRUE")))
3481
        return;
3482
3483
    if (m_oThreadAdviseRead.joinable())
3484
    {
3485
        m_oThreadAdviseRead.join();
3486
    }
3487
3488
    // Give up if we need to allocate too much memory
3489
    vsi_l_offset nMaxSize = 0;
3490
    const size_t nLimit = GetAdviseReadTotalBytesLimit();
3491
    for (int i = 0; i < nRanges; ++i)
3492
    {
3493
        if (panSizes[i] > nLimit - nMaxSize)
3494
        {
3495
            CPLDebug(poFS->GetDebugKey(),
3496
                     "Trying to request too many bytes in AdviseRead()");
3497
            return;
3498
        }
3499
        nMaxSize += panSizes[i];
3500
    }
3501
3502
    UpdateQueryString();
3503
3504
    bool bHasExpired = false;
3505
    CPLStringList aosHTTPOptions(m_aosHTTPOptions);
3506
    const std::string l_osURL(
3507
        GetRedirectURLIfValid(bHasExpired, aosHTTPOptions));
3508
    if (bHasExpired)
3509
    {
3510
        return;
3511
    }
3512
3513
    const bool bMergeConsecutiveRanges = CPLTestBool(
3514
        CPLGetConfigOption("GDAL_HTTP_MERGE_CONSECUTIVE_RANGES", "TRUE"));
3515
3516
    try
3517
    {
3518
        m_aoAdviseReadRanges.clear();
3519
        m_aoAdviseReadRanges.reserve(nRanges);
3520
        for (int i = 0; i < nRanges;)
3521
        {
3522
            int iNext = i;
3523
            // Identify consecutive ranges
3524
            constexpr size_t SIZE_COG_MARKERS = 2 * sizeof(uint32_t);
3525
            auto nEndOffset = panOffsets[iNext] + panSizes[iNext];
3526
            while (bMergeConsecutiveRanges && iNext + 1 < nRanges &&
3527
                   panOffsets[iNext + 1] > panOffsets[iNext] &&
3528
                   panOffsets[iNext] + panSizes[iNext] + SIZE_COG_MARKERS >=
3529
                       panOffsets[iNext + 1] &&
3530
                   panOffsets[iNext + 1] + panSizes[iNext + 1] > nEndOffset)
3531
            {
3532
                iNext++;
3533
                nEndOffset = panOffsets[iNext] + panSizes[iNext];
3534
            }
3535
            CPLAssert(panOffsets[i] <= nEndOffset);
3536
            const size_t nSize =
3537
                static_cast<size_t>(nEndOffset - panOffsets[i]);
3538
3539
            if (nSize == 0)
3540
            {
3541
                i = iNext + 1;
3542
                continue;
3543
            }
3544
3545
            auto newAdviseReadRange =
3546
                std::make_unique<AdviseReadRange>(m_oRetryParameters);
3547
            newAdviseReadRange->nStartOffset = panOffsets[i];
3548
            newAdviseReadRange->nSize = nSize;
3549
            newAdviseReadRange->abyData.resize(nSize);
3550
            m_aoAdviseReadRanges.push_back(std::move(newAdviseReadRange));
3551
3552
            i = iNext + 1;
3553
        }
3554
    }
3555
    catch (const std::exception &)
3556
    {
3557
        CPLError(CE_Failure, CPLE_OutOfMemory,
3558
                 "Out of memory in VSICurlHandle::AdviseRead()");
3559
        m_aoAdviseReadRanges.clear();
3560
    }
3561
3562
    if (m_aoAdviseReadRanges.empty())
3563
        return;
3564
3565
#ifdef DEBUG
3566
    CPLDebug(poFS->GetDebugKey(), "AdviseRead(): fetching %u ranges",
3567
             static_cast<unsigned>(m_aoAdviseReadRanges.size()));
3568
#endif
3569
3570
    const auto task = [this, aosHTTPOptions = std::move(aosHTTPOptions)](
3571
                          const std::string &osURL)
3572
    {
3573
        if (!m_hCurlMultiHandleForAdviseRead)
3574
            m_hCurlMultiHandleForAdviseRead = VSICURLMultiInit();
3575
3576
        NetworkStatisticsFileSystem oContextFS(poFS->GetFSPrefix().c_str());
3577
        NetworkStatisticsFile oContextFile(m_osFilename.c_str());
3578
        NetworkStatisticsAction oContextAction("AdviseRead");
3579
3580
#ifdef CURLPIPE_MULTIPLEX
3581
        // Enable HTTP/2 multiplexing (ignored if an older version of HTTP is
3582
        // used)
3583
        // Not that this does not enable HTTP/1.1 pipeling, which is not
3584
        // recommended for example by Google Cloud Storage.
3585
        // For HTTP/1.1, parallel connections work better since you can get
3586
        // results out of order.
3587
        if (CPLTestBool(CPLGetConfigOption("GDAL_HTTP_MULTIPLEX", "YES")))
3588
        {
3589
            curl_multi_setopt(m_hCurlMultiHandleForAdviseRead,
3590
                              CURLMOPT_PIPELINING, CURLPIPE_MULTIPLEX);
3591
        }
3592
#endif
3593
3594
        size_t nTotalDownloaded = 0;
3595
3596
        while (true)
3597
        {
3598
3599
            std::vector<CURL *> aHandles;
3600
            std::vector<WriteFuncStruct> asWriteFuncData(
3601
                m_aoAdviseReadRanges.size());
3602
            std::vector<WriteFuncStruct> asWriteFuncHeaderData(
3603
                m_aoAdviseReadRanges.size());
3604
            std::vector<char *> apszRanges;
3605
            std::vector<struct curl_slist *> aHeaders;
3606
3607
            struct CurlErrBuffer
3608
            {
3609
                std::array<char, CURL_ERROR_SIZE + 1> szCurlErrBuf;
3610
            };
3611
            std::vector<CurlErrBuffer> asCurlErrors(
3612
                m_aoAdviseReadRanges.size());
3613
3614
            std::map<CURL *, size_t> oMapHandleToIdx;
3615
            for (size_t i = 0; i < m_aoAdviseReadRanges.size(); ++i)
3616
            {
3617
                if (!m_aoAdviseReadRanges[i]->bToRetry)
3618
                {
3619
                    aHandles.push_back(nullptr);
3620
                    apszRanges.push_back(nullptr);
3621
                    aHeaders.push_back(nullptr);
3622
                    continue;
3623
                }
3624
                m_aoAdviseReadRanges[i]->bToRetry = false;
3625
3626
                CURL *hCurlHandle = curl_easy_init();
3627
                oMapHandleToIdx[hCurlHandle] = i;
3628
                aHandles.push_back(hCurlHandle);
3629
3630
                // As the multi-range request is likely not the first one, we don't
3631
                // need to wait as we already know if pipelining is possible
3632
                // unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_PIPEWAIT, 1);
3633
3634
                struct curl_slist *headers = VSICurlSetOptions(
3635
                    hCurlHandle, osURL.c_str(), aosHTTPOptions.List());
3636
3637
                VSICURLInitWriteFuncStruct(&asWriteFuncData[i], this,
3638
                                           pfnReadCbk, pReadCbkUserData);
3639
                unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA,
3640
                                           &asWriteFuncData[i]);
3641
                unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION,
3642
                                           VSICurlHandleWriteFunc);
3643
3644
                VSICURLInitWriteFuncStruct(&asWriteFuncHeaderData[i], nullptr,
3645
                                           nullptr, nullptr);
3646
                unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA,
3647
                                           &asWriteFuncHeaderData[i]);
3648
                unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION,
3649
                                           VSICurlHandleWriteFunc);
3650
                asWriteFuncHeaderData[i].bIsHTTP =
3651
                    STARTS_WITH(m_pszURL, "http");
3652
                asWriteFuncHeaderData[i].nStartOffset =
3653
                    m_aoAdviseReadRanges[i]->nStartOffset;
3654
3655
                asWriteFuncHeaderData[i].nEndOffset =
3656
                    m_aoAdviseReadRanges[i]->nStartOffset +
3657
                    m_aoAdviseReadRanges[i]->nSize - 1;
3658
3659
                char rangeStr[512] = {};
3660
                snprintf(rangeStr, sizeof(rangeStr),
3661
                         CPL_FRMT_GUIB "-" CPL_FRMT_GUIB,
3662
                         asWriteFuncHeaderData[i].nStartOffset,
3663
                         asWriteFuncHeaderData[i].nEndOffset);
3664
3665
                if constexpr (ENABLE_DEBUG)
3666
                {
3667
                    CPLDebug(poFS->GetDebugKey(), "Downloading %s (%s)...",
3668
                             rangeStr, osURL.c_str());
3669
                }
3670
3671
                if (asWriteFuncHeaderData[i].bIsHTTP)
3672
                {
3673
                    std::string osHeaderRange(
3674
                        CPLSPrintf("Range: bytes=%s", rangeStr));
3675
                    // So it gets included in Azure signature
3676
                    char *pszRange = CPLStrdup(osHeaderRange.c_str());
3677
                    apszRanges.push_back(pszRange);
3678
                    headers = curl_slist_append(headers, pszRange);
3679
                    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_RANGE,
3680
                                               nullptr);
3681
                }
3682
                else
3683
                {
3684
                    apszRanges.push_back(nullptr);
3685
                    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_RANGE,
3686
                                               rangeStr);
3687
                }
3688
3689
                asCurlErrors[i].szCurlErrBuf[0] = '\0';
3690
                unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER,
3691
                                           &asCurlErrors[i].szCurlErrBuf[0]);
3692
3693
                headers = GetCurlHeaders("GET", headers);
3694
                unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HTTPHEADER,
3695
                                           headers);
3696
                aHeaders.push_back(headers);
3697
                curl_multi_add_handle(m_hCurlMultiHandleForAdviseRead,
3698
                                      hCurlHandle);
3699
            }
3700
3701
            const auto DealWithRequest = [this, &osURL, &nTotalDownloaded,
3702
                                          &oMapHandleToIdx, &asCurlErrors,
3703
                                          &asWriteFuncHeaderData,
3704
                                          &asWriteFuncData](CURL *hCurlHandle)
3705
            {
3706
                auto oIter = oMapHandleToIdx.find(hCurlHandle);
3707
                CPLAssert(oIter != oMapHandleToIdx.end());
3708
                const auto iReq = oIter->second;
3709
3710
                long response_code = 0;
3711
                curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE,
3712
                                  &response_code);
3713
3714
                if (ENABLE_DEBUG && asCurlErrors[iReq].szCurlErrBuf[0] != '\0')
3715
                {
3716
                    char rangeStr[512] = {};
3717
                    snprintf(rangeStr, sizeof(rangeStr),
3718
                             CPL_FRMT_GUIB "-" CPL_FRMT_GUIB,
3719
                             asWriteFuncHeaderData[iReq].nStartOffset,
3720
                             asWriteFuncHeaderData[iReq].nEndOffset);
3721
3722
                    const char *pszErrorMsg =
3723
                        &asCurlErrors[iReq].szCurlErrBuf[0];
3724
                    CPLDebug(poFS->GetDebugKey(),
3725
                             "ReadMultiRange(%s), %s: response_code=%d, msg=%s",
3726
                             osURL.c_str(), rangeStr,
3727
                             static_cast<int>(response_code), pszErrorMsg);
3728
                }
3729
3730
                bool bToRetry = false;
3731
                if ((response_code != 206 && response_code != 225) ||
3732
                    asWriteFuncHeaderData[iReq].nEndOffset + 1 !=
3733
                        asWriteFuncHeaderData[iReq].nStartOffset +
3734
                            asWriteFuncData[iReq].nSize)
3735
                {
3736
                    char rangeStr[512] = {};
3737
                    snprintf(rangeStr, sizeof(rangeStr),
3738
                             CPL_FRMT_GUIB "-" CPL_FRMT_GUIB,
3739
                             asWriteFuncHeaderData[iReq].nStartOffset,
3740
                             asWriteFuncHeaderData[iReq].nEndOffset);
3741
3742
                    // Look if we should attempt a retry
3743
                    if (m_aoAdviseReadRanges[iReq]->retryContext.CanRetry(
3744
                            static_cast<int>(response_code),
3745
                            asWriteFuncData[iReq].pBuffer,
3746
                            &asCurlErrors[iReq].szCurlErrBuf[0]))
3747
                    {
3748
                        CPLError(CE_Warning, CPLE_AppDefined,
3749
                                 "HTTP error code for %s range %s: %d. "
3750
                                 "Retrying again in %.1f secs",
3751
                                 osURL.c_str(), rangeStr,
3752
                                 static_cast<int>(response_code),
3753
                                 m_aoAdviseReadRanges[iReq]
3754
                                     ->retryContext.GetCurrentDelay());
3755
                        m_aoAdviseReadRanges[iReq]->dfSleepDelay =
3756
                            m_aoAdviseReadRanges[iReq]
3757
                                ->retryContext.GetCurrentDelay();
3758
                        bToRetry = true;
3759
                    }
3760
                    else
3761
                    {
3762
                        CPLError(CE_Failure, CPLE_AppDefined,
3763
                                 "Request for %s range %s failed with "
3764
                                 "response_code=%ld",
3765
                                 osURL.c_str(), rangeStr, response_code);
3766
                    }
3767
                }
3768
                else
3769
                {
3770
                    const size_t nSize = asWriteFuncData[iReq].nSize;
3771
                    memcpy(&m_aoAdviseReadRanges[iReq]->abyData[0],
3772
                           asWriteFuncData[iReq].pBuffer, nSize);
3773
                    m_aoAdviseReadRanges[iReq]->abyData.resize(nSize);
3774
3775
                    nTotalDownloaded += nSize;
3776
                }
3777
3778
                m_aoAdviseReadRanges[iReq]->bToRetry = bToRetry;
3779
3780
                if (!bToRetry)
3781
                {
3782
                    std::lock_guard<std::mutex> oLock(
3783
                        m_aoAdviseReadRanges[iReq]->oMutex);
3784
                    m_aoAdviseReadRanges[iReq]->bDone = true;
3785
                    m_aoAdviseReadRanges[iReq]->oCV.notify_all();
3786
                }
3787
            };
3788
3789
            int repeats = 0;
3790
3791
            void *old_handler = CPLHTTPIgnoreSigPipe();
3792
            while (true)
3793
            {
3794
                int still_running;
3795
                while (curl_multi_perform(m_hCurlMultiHandleForAdviseRead,
3796
                                          &still_running) ==
3797
                       CURLM_CALL_MULTI_PERFORM)
3798
                {
3799
                    // loop
3800
                }
3801
                if (!still_running)
3802
                {
3803
                    break;
3804
                }
3805
3806
                CURLMsg *msg;
3807
                do
3808
                {
3809
                    int msgq = 0;
3810
                    msg = curl_multi_info_read(m_hCurlMultiHandleForAdviseRead,
3811
                                               &msgq);
3812
                    if (msg && (msg->msg == CURLMSG_DONE))
3813
                    {
3814
                        DealWithRequest(msg->easy_handle);
3815
                    }
3816
                } while (msg);
3817
3818
                CPLMultiPerformWait(m_hCurlMultiHandleForAdviseRead, repeats);
3819
            }
3820
            CPLHTTPRestoreSigPipeHandler(old_handler);
3821
3822
            bool bRetry = false;
3823
            double dfDelay = 0.0;
3824
            for (size_t i = 0; i < m_aoAdviseReadRanges.size(); ++i)
3825
            {
3826
                bool bReqDone;
3827
                {
3828
                    // To please Coverity Scan
3829
                    std::lock_guard<std::mutex> oLock(
3830
                        m_aoAdviseReadRanges[i]->oMutex);
3831
                    bReqDone = m_aoAdviseReadRanges[i]->bDone;
3832
                }
3833
                if (!bReqDone && !m_aoAdviseReadRanges[i]->bToRetry)
3834
                {
3835
                    DealWithRequest(aHandles[i]);
3836
                }
3837
                if (m_aoAdviseReadRanges[i]->bToRetry)
3838
                    dfDelay = std::max(dfDelay,
3839
                                       m_aoAdviseReadRanges[i]->dfSleepDelay);
3840
                bRetry = bRetry || m_aoAdviseReadRanges[i]->bToRetry;
3841
                if (aHandles[i])
3842
                {
3843
                    curl_multi_remove_handle(m_hCurlMultiHandleForAdviseRead,
3844
                                             aHandles[i]);
3845
                    VSICURLResetHeaderAndWriterFunctions(aHandles[i]);
3846
                    curl_easy_cleanup(aHandles[i]);
3847
                }
3848
                CPLFree(apszRanges[i]);
3849
                CPLFree(asWriteFuncData[i].pBuffer);
3850
                CPLFree(asWriteFuncHeaderData[i].pBuffer);
3851
                if (aHeaders[i])
3852
                    curl_slist_free_all(aHeaders[i]);
3853
            }
3854
            if (!bRetry)
3855
                break;
3856
            CPLSleep(dfDelay);
3857
        }
3858
3859
        NetworkStatisticsLogger::LogGET(nTotalDownloaded);
3860
    };
3861
3862
    m_oThreadAdviseRead = std::thread(task, l_osURL);
3863
}
3864
3865
/************************************************************************/
3866
/*                               Write()                                */
3867
/************************************************************************/
3868
3869
size_t VSICurlHandle::Write(const void * /* pBuffer */, size_t /* nBytes */)
3870
{
3871
    return 0;
3872
}
3873
3874
/************************************************************************/
3875
/*                              ClearErr()                              */
3876
/************************************************************************/
3877
3878
void VSICurlHandle::ClearErr()
3879
3880
{
3881
    bEOF = false;
3882
    bError = false;
3883
}
3884
3885
/************************************************************************/
3886
/*                               Error()                                */
3887
/************************************************************************/
3888
3889
int VSICurlHandle::Error()
3890
3891
{
3892
    return bError ? TRUE : FALSE;
3893
}
3894
3895
/************************************************************************/
3896
/*                                Eof()                                 */
3897
/************************************************************************/
3898
3899
int VSICurlHandle::Eof()
3900
3901
{
3902
    return bEOF ? TRUE : FALSE;
3903
}
3904
3905
/************************************************************************/
3906
/*                               Flush()                                */
3907
/************************************************************************/
3908
3909
int VSICurlHandle::Flush()
3910
{
3911
    return 0;
3912
}
3913
3914
/************************************************************************/
3915
/*                               Close()                                */
3916
/************************************************************************/
3917
3918
int VSICurlHandle::Close()
3919
{
3920
    return 0;
3921
}
3922
3923
/************************************************************************/
3924
/*                    VSICurlFilesystemHandlerBase()                    */
3925
/************************************************************************/
3926
3927
VSICurlFilesystemHandlerBase::VSICurlFilesystemHandlerBase()
3928
    : oCacheFileProp{100 * 1024}, oCacheDirList{1024, 0}
3929
{
3930
}
3931
3932
/************************************************************************/
3933
/*                           CachedConnection                           */
3934
/************************************************************************/
3935
3936
namespace
3937
{
3938
struct CachedConnection
3939
{
3940
    CURLM *hCurlMultiHandle = nullptr;
3941
    void clear();
3942
3943
    ~CachedConnection()
3944
    {
3945
        clear();
3946
    }
3947
};
3948
}  // namespace
3949
3950
#ifdef _WIN32
3951
// Currently thread_local and C++ objects don't work well with DLL on Windows
3952
static void FreeCachedConnection(void *pData)
3953
{
3954
    delete static_cast<
3955
        std::map<VSICurlFilesystemHandlerBase *, CachedConnection> *>(pData);
3956
}
3957
3958
// Per-thread and per-filesystem Curl connection cache.
3959
static std::map<VSICurlFilesystemHandlerBase *, CachedConnection> &
3960
GetConnectionCache()
3961
{
3962
    static std::map<VSICurlFilesystemHandlerBase *, CachedConnection>
3963
        dummyCache;
3964
    int bMemoryErrorOccurred = false;
3965
    void *pData =
3966
        CPLGetTLSEx(CTLS_VSICURL_CACHEDCONNECTION, &bMemoryErrorOccurred);
3967
    if (bMemoryErrorOccurred)
3968
    {
3969
        return dummyCache;
3970
    }
3971
    if (pData == nullptr)
3972
    {
3973
        auto cachedConnection =
3974
            new std::map<VSICurlFilesystemHandlerBase *, CachedConnection>();
3975
        CPLSetTLSWithFreeFuncEx(CTLS_VSICURL_CACHEDCONNECTION, cachedConnection,
3976
                                FreeCachedConnection, &bMemoryErrorOccurred);
3977
        if (bMemoryErrorOccurred)
3978
        {
3979
            delete cachedConnection;
3980
            return dummyCache;
3981
        }
3982
        return *cachedConnection;
3983
    }
3984
    return *static_cast<
3985
        std::map<VSICurlFilesystemHandlerBase *, CachedConnection> *>(pData);
3986
}
3987
#else
3988
static thread_local std::map<VSICurlFilesystemHandlerBase *, CachedConnection>
3989
    g_tls_connectionCache;
3990
3991
static std::map<VSICurlFilesystemHandlerBase *, CachedConnection> &
3992
GetConnectionCache()
3993
{
3994
    return g_tls_connectionCache;
3995
}
3996
#endif
3997
3998
/************************************************************************/
3999
/*                               clear()                                */
4000
/************************************************************************/
4001
4002
void CachedConnection::clear()
4003
{
4004
    if (hCurlMultiHandle)
4005
    {
4006
        VSICURLMultiCleanup(hCurlMultiHandle);
4007
        hCurlMultiHandle = nullptr;
4008
    }
4009
}
4010
4011
/************************************************************************/
4012
/*                   ~VSICurlFilesystemHandlerBase()                    */
4013
/************************************************************************/
4014
4015
VSICurlFilesystemHandlerBase::~VSICurlFilesystemHandlerBase()
4016
{
4017
    VSICurlFilesystemHandlerBase::ClearCache();
4018
    GetConnectionCache().erase(this);
4019
4020
    if (hMutex != nullptr)
4021
        CPLDestroyMutex(hMutex);
4022
    hMutex = nullptr;
4023
}
4024
4025
/************************************************************************/
4026
/*                         AllowCachedDataFor()                         */
4027
/************************************************************************/
4028
4029
bool VSICurlFilesystemHandlerBase::AllowCachedDataFor(const char *pszFilename)
4030
{
4031
    bool bCachedAllowed = true;
4032
    char **papszTokens = CSLTokenizeString2(
4033
        CPLGetConfigOption("CPL_VSIL_CURL_NON_CACHED", ""), ":", 0);
4034
    for (int i = 0; papszTokens && papszTokens[i]; i++)
4035
    {
4036
        if (STARTS_WITH(pszFilename, papszTokens[i]))
4037
        {
4038
            bCachedAllowed = false;
4039
            break;
4040
        }
4041
    }
4042
    CSLDestroy(papszTokens);
4043
    return bCachedAllowed;
4044
}
4045
4046
/************************************************************************/
4047
/*                       GetCurlMultiHandleFor()                        */
4048
/************************************************************************/
4049
4050
CURLM *VSICurlFilesystemHandlerBase::GetCurlMultiHandleFor(
4051
    const std::string & /*osURL*/)
4052
{
4053
    auto &conn = GetConnectionCache()[this];
4054
    if (conn.hCurlMultiHandle == nullptr)
4055
    {
4056
        conn.hCurlMultiHandle = VSICURLMultiInit();
4057
    }
4058
    return conn.hCurlMultiHandle;
4059
}
4060
4061
/************************************************************************/
4062
/*                           GetRegionCache()                           */
4063
/************************************************************************/
4064
4065
VSICurlFilesystemHandlerBase::RegionCacheType *
4066
VSICurlFilesystemHandlerBase::GetRegionCache()
4067
{
4068
    // should be called under hMutex taken
4069
    if (m_poRegionCacheDoNotUseDirectly == nullptr)
4070
    {
4071
        m_poRegionCacheDoNotUseDirectly.reset(
4072
            new RegionCacheType(static_cast<size_t>(GetMaxRegions())));
4073
    }
4074
    return m_poRegionCacheDoNotUseDirectly.get();
4075
}
4076
4077
/************************************************************************/
4078
/*                             GetRegion()                              */
4079
/************************************************************************/
4080
4081
std::shared_ptr<std::string>
4082
VSICurlFilesystemHandlerBase::GetRegion(const char *pszURL,
4083
                                        vsi_l_offset nFileOffsetStart)
4084
{
4085
    CPLMutexHolder oHolder(&hMutex);
4086
4087
    const int knDOWNLOAD_CHUNK_SIZE = VSICURLGetDownloadChunkSize();
4088
    nFileOffsetStart =
4089
        (nFileOffsetStart / knDOWNLOAD_CHUNK_SIZE) * knDOWNLOAD_CHUNK_SIZE;
4090
4091
    std::shared_ptr<std::string> out;
4092
    if (GetRegionCache()->tryGet(
4093
            FilenameOffsetPair(std::string(pszURL), nFileOffsetStart), out))
4094
    {
4095
        return out;
4096
    }
4097
4098
    return nullptr;
4099
}
4100
4101
/************************************************************************/
4102
/*                             AddRegion()                              */
4103
/************************************************************************/
4104
4105
void VSICurlFilesystemHandlerBase::AddRegion(const char *pszURL,
4106
                                             vsi_l_offset nFileOffsetStart,
4107
                                             size_t nSize, const char *pData)
4108
{
4109
    CPLMutexHolder oHolder(&hMutex);
4110
4111
    auto value = std::make_shared<std::string>();
4112
    value->assign(pData, nSize);
4113
    GetRegionCache()->insert(
4114
        FilenameOffsetPair(std::string(pszURL), nFileOffsetStart),
4115
        std::move(value));
4116
}
4117
4118
/************************************************************************/
4119
/*                         GetCachedFileProp()                          */
4120
/************************************************************************/
4121
4122
bool VSICurlFilesystemHandlerBase::GetCachedFileProp(const char *pszURL,
4123
                                                     FileProp &oFileProp)
4124
{
4125
    CPLMutexHolder oHolder(&hMutex);
4126
    bool inCache;
4127
    if (oCacheFileProp.tryGet(std::string(pszURL), inCache))
4128
    {
4129
        if (VSICURLGetCachedFileProp(pszURL, oFileProp))
4130
        {
4131
            return true;
4132
        }
4133
        oCacheFileProp.remove(std::string(pszURL));
4134
    }
4135
    return false;
4136
}
4137
4138
/************************************************************************/
4139
/*                         SetCachedFileProp()                          */
4140
/************************************************************************/
4141
4142
void VSICurlFilesystemHandlerBase::SetCachedFileProp(const char *pszURL,
4143
                                                     FileProp &oFileProp)
4144
{
4145
    CPLMutexHolder oHolder(&hMutex);
4146
    oCacheFileProp.insert(std::string(pszURL), true);
4147
    VSICURLSetCachedFileProp(pszURL, oFileProp);
4148
}
4149
4150
/************************************************************************/
4151
/*                          GetCachedDirList()                          */
4152
/************************************************************************/
4153
4154
bool VSICurlFilesystemHandlerBase::GetCachedDirList(
4155
    const char *pszURL, CachedDirList &oCachedDirList)
4156
{
4157
    CPLMutexHolder oHolder(&hMutex);
4158
4159
    return oCacheDirList.tryGet(std::string(pszURL), oCachedDirList) &&
4160
           // Let a chance to use new auth parameters
4161
           gnGenerationAuthParameters ==
4162
               oCachedDirList.nGenerationAuthParameters;
4163
}
4164
4165
/************************************************************************/
4166
/*                          SetCachedDirList()                          */
4167
/************************************************************************/
4168
4169
void VSICurlFilesystemHandlerBase::SetCachedDirList(
4170
    const char *pszURL, CachedDirList &oCachedDirList)
4171
{
4172
    CPLMutexHolder oHolder(&hMutex);
4173
4174
    std::string key(pszURL);
4175
    CachedDirList oldValue;
4176
    if (oCacheDirList.tryGet(key, oldValue))
4177
    {
4178
        nCachedFilesInDirList -= oldValue.oFileList.size();
4179
        oCacheDirList.remove(key);
4180
    }
4181
4182
    while ((!oCacheDirList.empty() &&
4183
            nCachedFilesInDirList + oCachedDirList.oFileList.size() >
4184
                1024 * 1024) ||
4185
           oCacheDirList.size() == oCacheDirList.getMaxAllowedSize())
4186
    {
4187
        std::string oldestKey;
4188
        oCacheDirList.getOldestEntry(oldestKey, oldValue);
4189
        nCachedFilesInDirList -= oldValue.oFileList.size();
4190
        oCacheDirList.remove(oldestKey);
4191
    }
4192
    oCachedDirList.nGenerationAuthParameters = gnGenerationAuthParameters;
4193
4194
    nCachedFilesInDirList += oCachedDirList.oFileList.size();
4195
    oCacheDirList.insert(key, oCachedDirList);
4196
}
4197
4198
/************************************************************************/
4199
/*                        ExistsInCacheDirList()                        */
4200
/************************************************************************/
4201
4202
bool VSICurlFilesystemHandlerBase::ExistsInCacheDirList(
4203
    const std::string &osDirname, bool *pbIsDir)
4204
{
4205
    CachedDirList cachedDirList;
4206
    if (GetCachedDirList(osDirname.c_str(), cachedDirList))
4207
    {
4208
        if (pbIsDir)
4209
            *pbIsDir = !cachedDirList.oFileList.empty();
4210
        return false;
4211
    }
4212
    else
4213
    {
4214
        if (pbIsDir)
4215
            *pbIsDir = false;
4216
        return false;
4217
    }
4218
}
4219
4220
/************************************************************************/
4221
/*                        InvalidateCachedData()                        */
4222
/************************************************************************/
4223
4224
void VSICurlFilesystemHandlerBase::InvalidateCachedData(const char *pszURL)
4225
{
4226
    CPLMutexHolder oHolder(&hMutex);
4227
4228
    oCacheFileProp.remove(std::string(pszURL));
4229
4230
    // Invalidate all cached regions for this URL
4231
    std::list<FilenameOffsetPair> keysToRemove;
4232
    std::string osURL(pszURL);
4233
    auto lambda =
4234
        [&keysToRemove,
4235
         &osURL](const lru11::KeyValuePair<FilenameOffsetPair,
4236
                                           std::shared_ptr<std::string>> &kv)
4237
    {
4238
        if (kv.key.filename_ == osURL)
4239
            keysToRemove.push_back(kv.key);
4240
    };
4241
    auto *poRegionCache = GetRegionCache();
4242
    poRegionCache->cwalk(lambda);
4243
    for (const auto &key : keysToRemove)
4244
        poRegionCache->remove(key);
4245
}
4246
4247
/************************************************************************/
4248
/*                             ClearCache()                             */
4249
/************************************************************************/
4250
4251
void VSICurlFilesystemHandlerBase::ClearCache()
4252
{
4253
    CPLMutexHolder oHolder(&hMutex);
4254
4255
    GetRegionCache()->clear();
4256
4257
    {
4258
        const auto lambda = [](const lru11::KeyValuePair<std::string, bool> &kv)
4259
        { VSICURLInvalidateCachedFileProp(kv.key.c_str()); };
4260
        oCacheFileProp.cwalk(lambda);
4261
        oCacheFileProp.clear();
4262
    }
4263
4264
    oCacheDirList.clear();
4265
    nCachedFilesInDirList = 0;
4266
4267
    GetConnectionCache()[this].clear();
4268
}
4269
4270
/************************************************************************/
4271
/*                         PartialClearCache()                          */
4272
/************************************************************************/
4273
4274
void VSICurlFilesystemHandlerBase::PartialClearCache(
4275
    const char *pszFilenamePrefix)
4276
{
4277
    CPLMutexHolder oHolder(&hMutex);
4278
4279
    std::string osURL = GetURLFromFilename(pszFilenamePrefix);
4280
    {
4281
        std::list<FilenameOffsetPair> keysToRemove;
4282
        auto lambda =
4283
            [&keysToRemove, &osURL](
4284
                const lru11::KeyValuePair<FilenameOffsetPair,
4285
                                          std::shared_ptr<std::string>> &kv)
4286
        {
4287
            if (strncmp(kv.key.filename_.c_str(), osURL.c_str(),
4288
                        osURL.size()) == 0)
4289
                keysToRemove.push_back(kv.key);
4290
        };
4291
        auto *poRegionCache = GetRegionCache();
4292
        poRegionCache->cwalk(lambda);
4293
        for (const auto &key : keysToRemove)
4294
            poRegionCache->remove(key);
4295
    }
4296
4297
    {
4298
        std::list<std::string> keysToRemove;
4299
        auto lambda = [&keysToRemove,
4300
                       &osURL](const lru11::KeyValuePair<std::string, bool> &kv)
4301
        {
4302
            if (strncmp(kv.key.c_str(), osURL.c_str(), osURL.size()) == 0)
4303
                keysToRemove.push_back(kv.key);
4304
        };
4305
        oCacheFileProp.cwalk(lambda);
4306
        for (const auto &key : keysToRemove)
4307
            oCacheFileProp.remove(key);
4308
    }
4309
    VSICURLInvalidateCachedFilePropPrefix(osURL.c_str());
4310
4311
    {
4312
        const size_t nLen = strlen(pszFilenamePrefix);
4313
        std::list<std::string> keysToRemove;
4314
        auto lambda =
4315
            [this, &keysToRemove, pszFilenamePrefix,
4316
             nLen](const lru11::KeyValuePair<std::string, CachedDirList> &kv)
4317
        {
4318
            if (strncmp(kv.key.c_str(), pszFilenamePrefix, nLen) == 0)
4319
            {
4320
                keysToRemove.push_back(kv.key);
4321
                nCachedFilesInDirList -= kv.value.oFileList.size();
4322
            }
4323
        };
4324
        oCacheDirList.cwalk(lambda);
4325
        for (const auto &key : keysToRemove)
4326
            oCacheDirList.remove(key);
4327
    }
4328
}
4329
4330
/************************************************************************/
4331
/*                          CreateFileHandle()                          */
4332
/************************************************************************/
4333
4334
VSICurlHandle *
4335
VSICurlFilesystemHandlerBase::CreateFileHandle(const char *pszFilename)
4336
{
4337
    return new VSICurlHandle(this, pszFilename);
4338
}
4339
4340
/************************************************************************/
4341
/*                            GetActualURL()                            */
4342
/************************************************************************/
4343
4344
const char *VSICurlFilesystemHandlerBase::GetActualURL(const char *pszFilename)
4345
{
4346
    VSICurlHandle *poHandle = CreateFileHandle(pszFilename);
4347
    if (poHandle == nullptr)
4348
        return pszFilename;
4349
    std::string osURL(poHandle->GetURL());
4350
    delete poHandle;
4351
    return CPLSPrintf("%s", osURL.c_str());
4352
}
4353
4354
/************************************************************************/
4355
/*                             GetOptions()                             */
4356
/************************************************************************/
4357
4358
#define VSICURL_OPTIONS                                                        \
4359
    "  <Option name='GDAL_HTTP_MAX_RETRY' type='int' "                         \
4360
    "description='Maximum number of retries' default='0'/>"                    \
4361
    "  <Option name='GDAL_HTTP_RETRY_DELAY' type='double' "                    \
4362
    "description='Retry delay in seconds' default='30'/>"                      \
4363
    "  <Option name='GDAL_HTTP_HEADER_FILE' type='string' "                    \
4364
    "description='Filename of a file that contains HTTP headers to "           \
4365
    "forward to the server'/>"                                                 \
4366
    "  <Option name='CPL_VSIL_CURL_USE_HEAD' type='boolean' "                  \
4367
    "description='Whether to use HTTP HEAD verb to retrieve "                  \
4368
    "file information' default='YES'/>"                                        \
4369
    "  <Option name='GDAL_HTTP_MULTIRANGE' type='string-select' "              \
4370
    "description='Strategy to apply to run multi-range requests' "             \
4371
    "default='PARALLEL'>"                                                      \
4372
    "       <Value>PARALLEL</Value>"                                           \
4373
    "       <Value>SERIAL</Value>"                                             \
4374
    "  </Option>"                                                              \
4375
    "  <Option name='GDAL_HTTP_MULTIPLEX' type='boolean' "                     \
4376
    "description='Whether to enable HTTP/2 multiplexing' default='YES'/>"      \
4377
    "  <Option name='GDAL_HTTP_MERGE_CONSECUTIVE_RANGES' type='boolean' "      \
4378
    "description='Whether to merge consecutive ranges in multirange "          \
4379
    "requests' default='YES'/>"                                                \
4380
    "  <Option name='CPL_VSIL_CURL_NON_CACHED' type='string' "                 \
4381
    "description='Colon-separated list of filenames whose content"             \
4382
    "must not be cached across open attempts'/>"                               \
4383
    "  <Option name='CPL_VSIL_CURL_ALLOWED_FILENAME' type='string' "           \
4384
    "description='Single filename that is allowed to be opened'/>"             \
4385
    "  <Option name='CPL_VSIL_CURL_ALLOWED_EXTENSIONS' type='string' "         \
4386
    "description='Comma or space separated list of allowed file "              \
4387
    "extensions'/>"                                                            \
4388
    "  <Option name='GDAL_DISABLE_READDIR_ON_OPEN' type='string-select' "      \
4389
    "description='Whether to disable establishing the list of files in "       \
4390
    "the directory of the current filename' default='NO'>"                     \
4391
    "       <Value>NO</Value>"                                                 \
4392
    "       <Value>YES</Value>"                                                \
4393
    "       <Value>EMPTY_DIR</Value>"                                          \
4394
    "  </Option>"                                                              \
4395
    "  <Option name='VSI_CACHE' type='boolean' "                               \
4396
    "description='Whether to cache in memory the contents of the opened "      \
4397
    "file as soon as they are read' default='NO'/>"                            \
4398
    "  <Option name='CPL_VSIL_CURL_CHUNK_SIZE' type='integer' "                \
4399
    "description='Size in bytes of the minimum amount of data read in a "      \
4400
    "file' default='16384' min='1024' max='10485760'/>"                        \
4401
    "  <Option name='CPL_VSIL_CURL_CACHE_SIZE' type='integer' "                \
4402
    "description='Size in bytes of the global /vsicurl/ cache' "               \
4403
    "default='16384000'/>"                                                     \
4404
    "  <Option name='CPL_VSIL_CURL_IGNORE_GLACIER_STORAGE' type='boolean' "    \
4405
    "description='Whether to skip files with Glacier storage class in "        \
4406
    "directory listing.' default='YES'/>"                                      \
4407
    "  <Option name='CPL_VSIL_CURL_ADVISE_READ_TOTAL_BYTES_LIMIT' "            \
4408
    "type='integer' description='Maximum number of bytes AdviseRead() is "     \
4409
    "allowed to fetch at once' default='104857600'/>"                          \
4410
    "  <Option name='GDAL_HTTP_MAX_CACHED_CONNECTIONS' type='integer' "        \
4411
    "description='Maximum amount of connections that libcurl may keep alive "  \
4412
    "in its connection cache after use'/>"                                     \
4413
    "  <Option name='GDAL_HTTP_MAX_TOTAL_CONNECTIONS' type='integer' "         \
4414
    "description='Maximum number of simultaneously open connections in "       \
4415
    "total'/>"
4416
4417
const char *VSICurlFilesystemHandlerBase::GetOptionsStatic()
4418
{
4419
    return VSICURL_OPTIONS;
4420
}
4421
4422
const char *VSICurlFilesystemHandlerBase::GetOptions()
4423
{
4424
    static std::string osOptions(std::string("<Options>") + GetOptionsStatic() +
4425
                                 "</Options>");
4426
    return osOptions.c_str();
4427
}
4428
4429
/************************************************************************/
4430
/*                         IsAllowedFilename()                          */
4431
/************************************************************************/
4432
4433
bool VSICurlFilesystemHandlerBase::IsAllowedFilename(const char *pszFilename)
4434
{
4435
    const char *pszAllowedFilename =
4436
        CPLGetConfigOption("CPL_VSIL_CURL_ALLOWED_FILENAME", nullptr);
4437
    if (pszAllowedFilename != nullptr)
4438
    {
4439
        return strcmp(pszFilename, pszAllowedFilename) == 0;
4440
    }
4441
4442
    // Consider that only the files whose extension ends up with one that is
4443
    // listed in CPL_VSIL_CURL_ALLOWED_EXTENSIONS exist on the server.  This can
4444
    // speeds up dramatically open experience, in case the server cannot return
4445
    // a file list.  {noext} can be used as a special token to mean file with no
4446
    // extension.
4447
    // For example:
4448
    // gdalinfo --config CPL_VSIL_CURL_ALLOWED_EXTENSIONS ".tif"
4449
    // /vsicurl/http://igskmncngs506.cr.usgs.gov/gmted/Global_tiles_GMTED/075darcsec/bln/W030/30N030W_20101117_gmted_bln075.tif
4450
    const char *pszAllowedExtensions =
4451
        CPLGetConfigOption("CPL_VSIL_CURL_ALLOWED_EXTENSIONS", nullptr);
4452
    if (pszAllowedExtensions)
4453
    {
4454
        char **papszExtensions =
4455
            CSLTokenizeString2(pszAllowedExtensions, ", ", 0);
4456
        const char *queryStart = strchr(pszFilename, '?');
4457
        char *pszFilenameWithoutQuery = nullptr;
4458
        if (queryStart != nullptr)
4459
        {
4460
            pszFilenameWithoutQuery = CPLStrdup(pszFilename);
4461
            pszFilenameWithoutQuery[queryStart - pszFilename] = '\0';
4462
            pszFilename = pszFilenameWithoutQuery;
4463
        }
4464
        const size_t nURLLen = strlen(pszFilename);
4465
        bool bFound = false;
4466
        for (int i = 0; papszExtensions[i] != nullptr; i++)
4467
        {
4468
            const size_t nExtensionLen = strlen(papszExtensions[i]);
4469
            if (EQUAL(papszExtensions[i], "{noext}"))
4470
            {
4471
                const char *pszLastSlash = strrchr(pszFilename, '/');
4472
                if (pszLastSlash != nullptr &&
4473
                    strchr(pszLastSlash, '.') == nullptr)
4474
                {
4475
                    bFound = true;
4476
                    break;
4477
                }
4478
            }
4479
            else if (nURLLen > nExtensionLen &&
4480
                     EQUAL(pszFilename + nURLLen - nExtensionLen,
4481
                           papszExtensions[i]))
4482
            {
4483
                bFound = true;
4484
                break;
4485
            }
4486
        }
4487
4488
        CSLDestroy(papszExtensions);
4489
        if (pszFilenameWithoutQuery)
4490
        {
4491
            CPLFree(pszFilenameWithoutQuery);
4492
        }
4493
4494
        return bFound;
4495
    }
4496
    return TRUE;
4497
}
4498
4499
/************************************************************************/
4500
/*                                Open()                                */
4501
/************************************************************************/
4502
4503
VSIVirtualHandleUniquePtr
4504
VSICurlFilesystemHandlerBase::Open(const char *pszFilename,
4505
                                   const char *pszAccess, bool bSetError,
4506
                                   CSLConstList papszOptions)
4507
{
4508
    std::string osFilenameAfterPrefix;
4509
    if (cpl::starts_with(std::string_view(pszFilename), GetFSPrefix()))
4510
    {
4511
        osFilenameAfterPrefix = pszFilename + GetFSPrefix().size();
4512
    }
4513
    else if (!StartsWithVSICurlPrefix(pszFilename, &osFilenameAfterPrefix))
4514
    {
4515
        return nullptr;
4516
    }
4517
4518
    if (strchr(pszAccess, 'w') != nullptr || strchr(pszAccess, '+') != nullptr)
4519
    {
4520
        if (bSetError)
4521
        {
4522
            VSIError(VSIE_FileError,
4523
                     "Only read-only mode is supported for /vsicurl");
4524
        }
4525
        return nullptr;
4526
    }
4527
    if (!papszOptions ||
4528
        !CPLTestBool(CSLFetchNameValueDef(
4529
            papszOptions, "IGNORE_FILENAME_RESTRICTIONS", "NO")))
4530
    {
4531
        if (!IsAllowedFilename(pszFilename))
4532
            return nullptr;
4533
    }
4534
4535
    bool bListDir = true;
4536
    bool bEmptyDir = false;
4537
    CPL_IGNORE_RET_VAL(VSICurlGetURLFromFilename(pszFilename, nullptr, nullptr,
4538
                                                 nullptr, &bListDir, &bEmptyDir,
4539
                                                 nullptr, nullptr, nullptr));
4540
4541
    const char *pszOptionVal = CSLFetchNameValueDef(
4542
        papszOptions, "DISABLE_READDIR_ON_OPEN",
4543
        VSIGetPathSpecificOption(pszFilename, "GDAL_DISABLE_READDIR_ON_OPEN",
4544
                                 "NO"));
4545
    const bool bCache = CPLTestBool(CSLFetchNameValueDef(
4546
        papszOptions, "CACHE", AllowCachedDataFor(pszFilename) ? "YES" : "NO"));
4547
    const bool bSkipReadDir = !bListDir || bEmptyDir ||
4548
                              EQUAL(pszOptionVal, "EMPTY_DIR") ||
4549
                              CPLTestBool(pszOptionVal) || !bCache;
4550
4551
    std::string osFilename(pszFilename);
4552
    bool bGotFileList = !bSkipReadDir;
4553
    bool bForceExistsCheck = false;
4554
    FileProp cachedFileProp;
4555
    if (!bSkipReadDir &&
4556
        !(GetCachedFileProp(osFilenameAfterPrefix.c_str(), cachedFileProp) &&
4557
          cachedFileProp.eExists == EXIST_YES) &&
4558
        strchr(CPLGetFilename(osFilename.c_str()), '.') != nullptr &&
4559
        !STARTS_WITH(CPLGetExtensionSafe(osFilename.c_str()).c_str(), "zip") &&
4560
        // Likely a Kerchunk JSON reference file: no need to list siblings
4561
        !cpl::ends_with(osFilename, ".nc.zarr"))
4562
    {
4563
        // 1000 corresponds to the default page size of S3.
4564
        constexpr int FILE_COUNT_LIMIT = 1000;
4565
        const CPLStringList aosFileList(ReadDirInternal(
4566
            (CPLGetDirnameSafe(osFilename.c_str()) + '/').c_str(),
4567
            FILE_COUNT_LIMIT, &bGotFileList));
4568
        const bool bFound =
4569
            VSICurlIsFileInList(aosFileList.List(),
4570
                                CPLGetFilename(osFilename.c_str())) != -1;
4571
        if (bGotFileList && !bFound && aosFileList.size() < FILE_COUNT_LIMIT)
4572
        {
4573
            // Some file servers are case insensitive, so in case there is a
4574
            // match with case difference, do a full check just in case.
4575
            // e.g.
4576
            // http://pds-geosciences.wustl.edu/mgs/mgs-m-mola-5-megdr-l3-v1/mgsl_300x/meg004/MEGA90N000CB.IMG
4577
            // that is queried by
4578
            // gdalinfo
4579
            // /vsicurl/http://pds-geosciences.wustl.edu/mgs/mgs-m-mola-5-megdr-l3-v1/mgsl_300x/meg004/mega90n000cb.lbl
4580
            if (aosFileList.FindString(CPLGetFilename(osFilename.c_str())) !=
4581
                -1)
4582
            {
4583
                bForceExistsCheck = true;
4584
            }
4585
            else
4586
            {
4587
                return nullptr;
4588
            }
4589
        }
4590
    }
4591
4592
    auto poHandle =
4593
        std::unique_ptr<VSICurlHandle>(CreateFileHandle(osFilename.c_str()));
4594
    if (poHandle == nullptr)
4595
        return nullptr;
4596
    poHandle->SetCache(bCache);
4597
    if (!bGotFileList || bForceExistsCheck)
4598
    {
4599
        // If we didn't get a filelist, check that the file really exists.
4600
        if (!poHandle->Exists(bSetError))
4601
        {
4602
            return nullptr;
4603
        }
4604
    }
4605
4606
    if (CPLTestBool(CPLGetConfigOption("VSI_CACHE", "FALSE")))
4607
        return VSIVirtualHandleUniquePtr(
4608
            VSICreateCachedFile(poHandle.release()));
4609
    else
4610
        return VSIVirtualHandleUniquePtr(poHandle.release());
4611
}
4612
4613
/************************************************************************/
4614
/*                        VSICurlParserFindEOL()                        */
4615
/*                                                                      */
4616
/*      Small helper function for VSICurlPaseHTMLFileList() to find     */
4617
/*      the end of a line in the directory listing.  Either a <br>      */
4618
/*      or newline.                                                     */
4619
/************************************************************************/
4620
4621
static char *VSICurlParserFindEOL(char *pszData)
4622
4623
{
4624
    while (*pszData != '\0' && *pszData != '\n' &&
4625
           !STARTS_WITH_CI(pszData, "<br>"))
4626
        pszData++;
4627
4628
    if (*pszData == '\0')
4629
        return nullptr;
4630
4631
    return pszData;
4632
}
4633
4634
/************************************************************************/
4635
/*                  VSICurlParseHTMLDateTimeFileSize()                  */
4636
/************************************************************************/
4637
4638
static const char *const apszMonths[] = {
4639
    "January", "February", "March",     "April",   "May",      "June",
4640
    "July",    "August",   "September", "October", "November", "December"};
4641
4642
static bool VSICurlParseHTMLDateTimeFileSize(const char *pszStr,
4643
                                             struct tm &brokendowntime,
4644
                                             GUIntBig &nFileSize,
4645
                                             GIntBig &mTime)
4646
{
4647
    for (int iMonth = 0; iMonth < 12; iMonth++)
4648
    {
4649
        char szMonth[32] = {};
4650
        szMonth[0] = '-';
4651
        memcpy(szMonth + 1, apszMonths[iMonth], 3);
4652
        szMonth[4] = '-';
4653
        szMonth[5] = '\0';
4654
        const char *pszMonthFound = strstr(pszStr, szMonth);
4655
        if (pszMonthFound)
4656
        {
4657
            // Format of Apache, like in
4658
            // http://download.osgeo.org/gdal/data/gtiff/
4659
            // "17-May-2010 12:26"
4660
            const auto nMonthFoundLen = strlen(pszMonthFound);
4661
            if (pszMonthFound - pszStr > 2 && nMonthFoundLen > 15 &&
4662
                pszMonthFound[-2 + 11] == ' ' && pszMonthFound[-2 + 14] == ':')
4663
            {
4664
                pszMonthFound -= 2;
4665
                int nDay = atoi(pszMonthFound);
4666
                int nYear = atoi(pszMonthFound + 7);
4667
                int nHour = atoi(pszMonthFound + 12);
4668
                int nMin = atoi(pszMonthFound + 15);
4669
                if (nDay >= 1 && nDay <= 31 && nYear >= 1900 && nHour >= 0 &&
4670
                    nHour <= 24 && nMin >= 0 && nMin < 60)
4671
                {
4672
                    brokendowntime.tm_year = nYear - 1900;
4673
                    brokendowntime.tm_mon = iMonth;
4674
                    brokendowntime.tm_mday = nDay;
4675
                    brokendowntime.tm_hour = nHour;
4676
                    brokendowntime.tm_min = nMin;
4677
                    mTime = CPLYMDHMSToUnixTime(&brokendowntime);
4678
4679
                    if (nMonthFoundLen > 15 + 2)
4680
                    {
4681
                        const char *pszFilesize = pszMonthFound + 15 + 2;
4682
                        while (*pszFilesize == ' ')
4683
                            pszFilesize++;
4684
                        if (*pszFilesize >= '1' && *pszFilesize <= '9')
4685
                            nFileSize = CPLScanUIntBig(
4686
                                pszFilesize,
4687
                                static_cast<int>(strlen(pszFilesize)));
4688
                    }
4689
4690
                    return true;
4691
                }
4692
            }
4693
            return false;
4694
        }
4695
4696
        /* Microsoft IIS */
4697
        snprintf(szMonth, sizeof(szMonth), " %s ", apszMonths[iMonth]);
4698
        pszMonthFound = strstr(pszStr, szMonth);
4699
        if (pszMonthFound)
4700
        {
4701
            int nLenMonth = static_cast<int>(strlen(apszMonths[iMonth]));
4702
            if (pszMonthFound - pszStr > 2 && pszMonthFound[-1] != ',' &&
4703
                pszMonthFound[-2] != ' ' &&
4704
                static_cast<int>(strlen(pszMonthFound - 2)) >
4705
                    2 + 1 + nLenMonth + 1 + 4 + 1 + 5 + 1 + 4)
4706
            {
4707
                /* Format of http://ortho.linz.govt.nz/tifs/1994_95/ */
4708
                /* "        Friday, 21 April 2006 12:05 p.m.     48062343
4709
                 * m35a_fy_94_95.tif" */
4710
                pszMonthFound -= 2;
4711
                int nDay = atoi(pszMonthFound);
4712
                int nCurOffset = 2 + 1 + nLenMonth + 1;
4713
                int nYear = atoi(pszMonthFound + nCurOffset);
4714
                nCurOffset += 4 + 1;
4715
                int nHour = atoi(pszMonthFound + nCurOffset);
4716
                if (nHour < 10)
4717
                    nCurOffset += 1 + 1;
4718
                else
4719
                    nCurOffset += 2 + 1;
4720
                const int nMin = atoi(pszMonthFound + nCurOffset);
4721
                nCurOffset += 2 + 1;
4722
                if (STARTS_WITH(pszMonthFound + nCurOffset, "p.m."))
4723
                    nHour += 12;
4724
                else if (!STARTS_WITH(pszMonthFound + nCurOffset, "a.m."))
4725
                    nHour = -1;
4726
                nCurOffset += 4;
4727
4728
                const char *pszFilesize = pszMonthFound + nCurOffset;
4729
                while (*pszFilesize == ' ')
4730
                    pszFilesize++;
4731
                if (*pszFilesize >= '1' && *pszFilesize <= '9')
4732
                    nFileSize = CPLScanUIntBig(
4733
                        pszFilesize, static_cast<int>(strlen(pszFilesize)));
4734
4735
                if (nDay >= 1 && nDay <= 31 && nYear >= 1900 && nHour >= 0 &&
4736
                    nHour <= 24 && nMin >= 0 && nMin < 60)
4737
                {
4738
                    brokendowntime.tm_year = nYear - 1900;
4739
                    brokendowntime.tm_mon = iMonth;
4740
                    brokendowntime.tm_mday = nDay;
4741
                    brokendowntime.tm_hour = nHour;
4742
                    brokendowntime.tm_min = nMin;
4743
                    mTime = CPLYMDHMSToUnixTime(&brokendowntime);
4744
4745
                    return true;
4746
                }
4747
                nFileSize = 0;
4748
            }
4749
            else if (pszMonthFound - pszStr > 1 && pszMonthFound[-1] == ',' &&
4750
                     static_cast<int>(strlen(pszMonthFound)) >
4751
                         1 + nLenMonth + 1 + 2 + 1 + 1 + 4 + 1 + 5 + 1 + 2)
4752
            {
4753
                // Format of
4754
                // http://publicfiles.dep.state.fl.us/dear/BWR_GIS/2007NWFLULC/
4755
                // "        Sunday, June 20, 2010  6:46 PM    233170905
4756
                // NWF2007LULCForSDE.zip"
4757
                pszMonthFound += 1;
4758
                int nCurOffset = nLenMonth + 1;
4759
                int nDay = atoi(pszMonthFound + nCurOffset);
4760
                nCurOffset += 2 + 1 + 1;
4761
                int nYear = atoi(pszMonthFound + nCurOffset);
4762
                nCurOffset += 4 + 1;
4763
                int nHour = atoi(pszMonthFound + nCurOffset);
4764
                nCurOffset += 2 + 1;
4765
                const int nMin = atoi(pszMonthFound + nCurOffset);
4766
                nCurOffset += 2 + 1;
4767
                if (STARTS_WITH(pszMonthFound + nCurOffset, "PM"))
4768
                    nHour += 12;
4769
                else if (!STARTS_WITH(pszMonthFound + nCurOffset, "AM"))
4770
                    nHour = -1;
4771
                nCurOffset += 2;
4772
4773
                const char *pszFilesize = pszMonthFound + nCurOffset;
4774
                while (*pszFilesize == ' ')
4775
                    pszFilesize++;
4776
                if (*pszFilesize >= '1' && *pszFilesize <= '9')
4777
                    nFileSize = CPLScanUIntBig(
4778
                        pszFilesize, static_cast<int>(strlen(pszFilesize)));
4779
4780
                if (nDay >= 1 && nDay <= 31 && nYear >= 1900 && nHour >= 0 &&
4781
                    nHour <= 24 && nMin >= 0 && nMin < 60)
4782
                {
4783
                    brokendowntime.tm_year = nYear - 1900;
4784
                    brokendowntime.tm_mon = iMonth;
4785
                    brokendowntime.tm_mday = nDay;
4786
                    brokendowntime.tm_hour = nHour;
4787
                    brokendowntime.tm_min = nMin;
4788
                    mTime = CPLYMDHMSToUnixTime(&brokendowntime);
4789
4790
                    return true;
4791
                }
4792
                nFileSize = 0;
4793
            }
4794
            return false;
4795
        }
4796
    }
4797
4798
    return false;
4799
}
4800
4801
/************************************************************************/
4802
/*                          ParseHTMLFileList()                         */
4803
/*                                                                      */
4804
/*      Parse a file list document and return all the components.       */
4805
/************************************************************************/
4806
4807
char **VSICurlFilesystemHandlerBase::ParseHTMLFileList(const char *pszFilename,
4808
                                                       int nMaxFiles,
4809
                                                       char *pszData,
4810
                                                       bool *pbGotFileList)
4811
{
4812
    *pbGotFileList = false;
4813
4814
    std::string osURL(VSICurlGetURLFromFilename(pszFilename, nullptr, nullptr,
4815
                                                nullptr, nullptr, nullptr,
4816
                                                nullptr, nullptr, nullptr));
4817
    const char *pszDir = nullptr;
4818
    if (STARTS_WITH_CI(osURL.c_str(), "http://"))
4819
        pszDir = strchr(osURL.c_str() + strlen("http://"), '/');
4820
    else if (STARTS_WITH_CI(osURL.c_str(), "https://"))
4821
        pszDir = strchr(osURL.c_str() + strlen("https://"), '/');
4822
    else if (STARTS_WITH_CI(osURL.c_str(), "ftp://"))
4823
        pszDir = strchr(osURL.c_str() + strlen("ftp://"), '/');
4824
    if (pszDir == nullptr)
4825
        pszDir = "";
4826
4827
    /* Apache / Nginx */
4828
    /* Most of the time the format is <title>Index of {pszDir[/]}</title>, but
4829
     * there are special cases like https://cdn.star.nesdis.noaa.gov/GOES18/ABI/MESO/M1/GEOCOLOR/
4830
     * where a CDN stuff makes that the title is <title>Index of /ma-cdn02/GOES/data/GOES18/ABI/MESO/M1/GEOCOLOR/</title>
4831
     */
4832
    const std::string osTitleIndexOfPrefix = "<title>Index of ";
4833
    const std::string osExpectedSuffix = std::string(pszDir).append("</title>");
4834
    const std::string osExpectedSuffixWithSlash =
4835
        std::string(pszDir).append("/</title>");
4836
    /* FTP */
4837
    const std::string osExpectedStringFTP =
4838
        std::string("FTP Listing of ").append(pszDir).append("/");
4839
    /* Apache 1.3.33 */
4840
    const std::string osExpectedStringOldApache =
4841
        std::string("<TITLE>Index of ").append(pszDir).append("</TITLE>");
4842
4843
    // The listing of
4844
    // http://dds.cr.usgs.gov/srtm/SRTM_image_sample/picture%20examples/
4845
    // has
4846
    // "<title>Index of /srtm/SRTM_image_sample/picture examples</title>"
4847
    // so we must try unescaped %20 also.
4848
    // Similar with
4849
    // http://datalib.usask.ca/gis/Data/Central_America_goodbutdoweown%3f/
4850
    std::string osExpectedString_unescaped;
4851
    if (strchr(pszDir, '%'))
4852
    {
4853
        char *pszUnescapedDir = CPLUnescapeString(pszDir, nullptr, CPLES_URL);
4854
        osExpectedString_unescaped = osTitleIndexOfPrefix;
4855
        osExpectedString_unescaped += pszUnescapedDir;
4856
        osExpectedString_unescaped += "</title>";
4857
        CPLFree(pszUnescapedDir);
4858
    }
4859
4860
    char *c = nullptr;
4861
    int nCount = 0;
4862
    int nCountTable = 0;
4863
    CPLStringList oFileList;
4864
    char *pszLine = pszData;
4865
    bool bIsHTMLDirList = false;
4866
4867
    while ((c = VSICurlParserFindEOL(pszLine)) != nullptr)
4868
    {
4869
        *c = '\0';
4870
4871
        // To avoid false positive on pages such as
4872
        // http://www.ngs.noaa.gov/PC_PROD/USGG2009BETA
4873
        // This is a heuristics, but normal HTML listing of files have not more
4874
        // than one table.
4875
        if (strstr(pszLine, "<table"))
4876
        {
4877
            nCountTable++;
4878
            if (nCountTable == 2)
4879
            {
4880
                *pbGotFileList = false;
4881
                return nullptr;
4882
            }
4883
        }
4884
4885
        if (!bIsHTMLDirList &&
4886
            ((strstr(pszLine, osTitleIndexOfPrefix.c_str()) &&
4887
              (strstr(pszLine, osExpectedSuffix.c_str()) ||
4888
               strstr(pszLine, osExpectedSuffixWithSlash.c_str()))) ||
4889
             strstr(pszLine, osExpectedStringFTP.c_str()) ||
4890
             strstr(pszLine, osExpectedStringOldApache.c_str()) ||
4891
             (!osExpectedString_unescaped.empty() &&
4892
              strstr(pszLine, osExpectedString_unescaped.c_str()))))
4893
        {
4894
            bIsHTMLDirList = true;
4895
            *pbGotFileList = true;
4896
        }
4897
        // Subversion HTTP listing
4898
        // or Microsoft-IIS/6.0 listing
4899
        // (e.g. http://ortho.linz.govt.nz/tifs/2005_06/) */
4900
        else if (!bIsHTMLDirList && strstr(pszLine, "<title>"))
4901
        {
4902
            // Detect something like:
4903
            // <html><head><title>gdal - Revision 20739:
4904
            // /trunk/autotest/gcore/data</title></head> */ The annoying thing
4905
            // is that what is after ': ' is a subpart of what is after
4906
            // http://server/
4907
            char *pszSubDir = strstr(pszLine, ": ");
4908
            if (pszSubDir == nullptr)
4909
                // or <title>ortho.linz.govt.nz - /tifs/2005_06/</title>
4910
                pszSubDir = strstr(pszLine, "- ");
4911
            if (pszSubDir)
4912
            {
4913
                pszSubDir += 2;
4914
                char *pszTmp = strstr(pszSubDir, "</title>");
4915
                if (pszTmp)
4916
                {
4917
                    if (pszTmp[-1] == '/')
4918
                        pszTmp[-1] = 0;
4919
                    else
4920
                        *pszTmp = 0;
4921
                    if (strstr(pszDir, pszSubDir))
4922
                    {
4923
                        bIsHTMLDirList = true;
4924
                        *pbGotFileList = true;
4925
                    }
4926
                }
4927
            }
4928
        }
4929
        else if (bIsHTMLDirList &&
4930
                 (strstr(pszLine, "<a href=\"") != nullptr ||
4931
                  strstr(pszLine, "<A HREF=\"") != nullptr) &&
4932
                 // Exclude absolute links, like to subversion home.
4933
                 strstr(pszLine, "<a href=\"http://") == nullptr &&
4934
                 // exclude parent directory.
4935
                 strstr(pszLine, "Parent Directory") == nullptr)
4936
        {
4937
            char *beginFilename = strstr(pszLine, "<a href=\"");
4938
            if (beginFilename == nullptr)
4939
                beginFilename = strstr(pszLine, "<A HREF=\"");
4940
            beginFilename += strlen("<a href=\"");
4941
            char *endQuote = strchr(beginFilename, '"');
4942
            if (endQuote && !STARTS_WITH(beginFilename, "?C=") &&
4943
                !STARTS_WITH(beginFilename, "?N="))
4944
            {
4945
                struct tm brokendowntime;
4946
                memset(&brokendowntime, 0, sizeof(brokendowntime));
4947
                GUIntBig nFileSize = 0;
4948
                GIntBig mTime = 0;
4949
4950
                VSICurlParseHTMLDateTimeFileSize(pszLine, brokendowntime,
4951
                                                 nFileSize, mTime);
4952
4953
                *endQuote = '\0';
4954
4955
                // Remove trailing slash, that are returned for directories by
4956
                // Apache.
4957
                bool bIsDirectory = false;
4958
                if (endQuote[-1] == '/')
4959
                {
4960
                    bIsDirectory = true;
4961
                    endQuote[-1] = 0;
4962
                }
4963
4964
                // shttpd links include slashes from the root directory.
4965
                // Skip them.
4966
                while (strchr(beginFilename, '/'))
4967
                    beginFilename = strchr(beginFilename, '/') + 1;
4968
4969
                if (strcmp(beginFilename, ".") != 0 &&
4970
                    strcmp(beginFilename, "..") != 0)
4971
                {
4972
                    std::string osCachedFilename =
4973
                        CPLSPrintf("%s/%s", osURL.c_str(), beginFilename);
4974
4975
                    FileProp cachedFileProp;
4976
                    GetCachedFileProp(osCachedFilename.c_str(), cachedFileProp);
4977
                    cachedFileProp.eExists = EXIST_YES;
4978
                    cachedFileProp.bIsDirectory = bIsDirectory;
4979
                    cachedFileProp.mTime = static_cast<time_t>(mTime);
4980
                    cachedFileProp.bHasComputedFileSize = nFileSize > 0;
4981
                    cachedFileProp.fileSize = nFileSize;
4982
                    SetCachedFileProp(osCachedFilename.c_str(), cachedFileProp);
4983
4984
                    oFileList.AddString(beginFilename);
4985
                    if constexpr (ENABLE_DEBUG_VERBOSE)
4986
                    {
4987
                        CPLDebug(
4988
                            GetDebugKey(),
4989
                            "File[%d] = %s, is_dir = %d, size = " CPL_FRMT_GUIB
4990
                            ", time = %04d/%02d/%02d %02d:%02d:%02d",
4991
                            nCount, osCachedFilename.c_str(),
4992
                            bIsDirectory ? 1 : 0, nFileSize,
4993
                            brokendowntime.tm_year + 1900,
4994
                            brokendowntime.tm_mon + 1, brokendowntime.tm_mday,
4995
                            brokendowntime.tm_hour, brokendowntime.tm_min,
4996
                            brokendowntime.tm_sec);
4997
                    }
4998
                    nCount++;
4999
5000
                    if (nMaxFiles > 0 && oFileList.Count() > nMaxFiles)
5001
                        break;
5002
                }
5003
            }
5004
        }
5005
        pszLine = c + 1;
5006
    }
5007
5008
    return oFileList.StealList();
5009
}
5010
5011
/************************************************************************/
5012
/*                        GetStreamingFilename()                        */
5013
/************************************************************************/
5014
5015
std::string VSICurlFilesystemHandler::GetStreamingFilename(
5016
    const std::string &osFilename) const
5017
{
5018
    if (STARTS_WITH(osFilename.c_str(), GetFSPrefix().c_str()))
5019
        return "/vsicurl_streaming/" + osFilename.substr(GetFSPrefix().size());
5020
    return osFilename;
5021
}
5022
5023
/************************************************************************/
5024
/*                GetHintForPotentiallyRecognizedPath()                 */
5025
/************************************************************************/
5026
5027
std::string VSICurlFilesystemHandler::GetHintForPotentiallyRecognizedPath(
5028
    const std::string &osPath)
5029
{
5030
    if (!StartsWithVSICurlPrefix(osPath.c_str()) &&
5031
        !cpl::starts_with(osPath, GetStreamingFilename(GetFSPrefix())))
5032
    {
5033
        for (const char *pszPrefix : {"http://", "https://"})
5034
        {
5035
            if (cpl::starts_with(osPath, pszPrefix))
5036
            {
5037
                return GetFSPrefix() + osPath;
5038
            }
5039
        }
5040
    }
5041
    return std::string();
5042
}
5043
5044
/************************************************************************/
5045
/*                          VSICurlGetToken()                           */
5046
/************************************************************************/
5047
5048
static char *VSICurlGetToken(char *pszCurPtr, char **ppszNextToken)
5049
{
5050
    if (pszCurPtr == nullptr)
5051
        return nullptr;
5052
5053
    while ((*pszCurPtr) == ' ')
5054
        pszCurPtr++;
5055
    if (*pszCurPtr == '\0')
5056
        return nullptr;
5057
5058
    char *pszToken = pszCurPtr;
5059
    while ((*pszCurPtr) != ' ' && (*pszCurPtr) != '\0')
5060
        pszCurPtr++;
5061
    if (*pszCurPtr == '\0')
5062
    {
5063
        *ppszNextToken = nullptr;
5064
    }
5065
    else
5066
    {
5067
        *pszCurPtr = '\0';
5068
        pszCurPtr++;
5069
        while ((*pszCurPtr) == ' ')
5070
            pszCurPtr++;
5071
        *ppszNextToken = pszCurPtr;
5072
    }
5073
5074
    return pszToken;
5075
}
5076
5077
/************************************************************************/
5078
/*                      VSICurlParseFullFTPLine()                       */
5079
/************************************************************************/
5080
5081
/* Parse lines like the following ones :
5082
-rw-r--r--    1 10003    100           430 Jul 04  2008 COPYING
5083
lrwxrwxrwx    1 ftp      ftp            28 Jun 14 14:13 MPlayer ->
5084
mirrors/mplayerhq.hu/MPlayer -rw-r--r--    1 ftp      ftp      725614592 May 13
5085
20:13 Fedora-15-x86_64-Live-KDE.iso drwxr-xr-x  280 1003  1003  6656 Aug 26
5086
04:17 gnu
5087
*/
5088
5089
static bool VSICurlParseFullFTPLine(char *pszLine, char *&pszFilename,
5090
                                    bool &bSizeValid, GUIntBig &nSize,
5091
                                    bool &bIsDirectory, GIntBig &nUnixTime)
5092
{
5093
    char *pszNextToken = pszLine;
5094
    char *pszPermissions = VSICurlGetToken(pszNextToken, &pszNextToken);
5095
    if (pszPermissions == nullptr || strlen(pszPermissions) != 10)
5096
        return false;
5097
    bIsDirectory = pszPermissions[0] == 'd';
5098
5099
    for (int i = 0; i < 3; i++)
5100
    {
5101
        if (VSICurlGetToken(pszNextToken, &pszNextToken) == nullptr)
5102
            return false;
5103
    }
5104
5105
    char *pszSize = VSICurlGetToken(pszNextToken, &pszNextToken);
5106
    if (pszSize == nullptr)
5107
        return false;
5108
5109
    if (pszPermissions[0] == '-')
5110
    {
5111
        // Regular file.
5112
        bSizeValid = true;
5113
        nSize = CPLScanUIntBig(pszSize, static_cast<int>(strlen(pszSize)));
5114
    }
5115
5116
    struct tm brokendowntime;
5117
    memset(&brokendowntime, 0, sizeof(brokendowntime));
5118
    bool bBrokenDownTimeValid = true;
5119
5120
    char *pszMonth = VSICurlGetToken(pszNextToken, &pszNextToken);
5121
    if (pszMonth == nullptr || strlen(pszMonth) != 3)
5122
        return false;
5123
5124
    int i = 0;  // Used after for.
5125
    for (; i < 12; i++)
5126
    {
5127
        if (EQUALN(pszMonth, apszMonths[i], 3))
5128
            break;
5129
    }
5130
    if (i < 12)
5131
        brokendowntime.tm_mon = i;
5132
    else
5133
        bBrokenDownTimeValid = false;
5134
5135
    char *pszDay = VSICurlGetToken(pszNextToken, &pszNextToken);
5136
    if (pszDay == nullptr || (strlen(pszDay) != 1 && strlen(pszDay) != 2))
5137
        return false;
5138
    int nDay = atoi(pszDay);
5139
    if (nDay >= 1 && nDay <= 31)
5140
        brokendowntime.tm_mday = nDay;
5141
    else
5142
        bBrokenDownTimeValid = false;
5143
5144
    char *pszHourOrYear = VSICurlGetToken(pszNextToken, &pszNextToken);
5145
    if (pszHourOrYear == nullptr ||
5146
        (strlen(pszHourOrYear) != 4 && strlen(pszHourOrYear) != 5))
5147
        return false;
5148
    if (strlen(pszHourOrYear) == 4)
5149
    {
5150
        brokendowntime.tm_year = atoi(pszHourOrYear) - 1900;
5151
    }
5152
    else
5153
    {
5154
        time_t sTime;
5155
        time(&sTime);
5156
        struct tm currentBrokendowntime;
5157
        CPLUnixTimeToYMDHMS(static_cast<GIntBig>(sTime),
5158
                            &currentBrokendowntime);
5159
        brokendowntime.tm_year = currentBrokendowntime.tm_year;
5160
        brokendowntime.tm_hour = atoi(pszHourOrYear);
5161
        brokendowntime.tm_min = atoi(pszHourOrYear + 3);
5162
    }
5163
5164
    if (bBrokenDownTimeValid)
5165
        nUnixTime = CPLYMDHMSToUnixTime(&brokendowntime);
5166
    else
5167
        nUnixTime = 0;
5168
5169
    if (pszNextToken == nullptr)
5170
        return false;
5171
5172
    pszFilename = pszNextToken;
5173
5174
    char *pszCurPtr = pszFilename;
5175
    while (*pszCurPtr != '\0')
5176
    {
5177
        // In case of a link, stop before the pointed part of the link.
5178
        if (pszPermissions[0] == 'l' && STARTS_WITH(pszCurPtr, " -> "))
5179
        {
5180
            break;
5181
        }
5182
        pszCurPtr++;
5183
    }
5184
    *pszCurPtr = '\0';
5185
5186
    return true;
5187
}
5188
5189
/************************************************************************/
5190
/*                         GetURLFromFilename()                         */
5191
/************************************************************************/
5192
5193
std::string VSICurlFilesystemHandlerBase::GetURLFromFilename(
5194
    const std::string &osFilename) const
5195
{
5196
    return VSICurlGetURLFromFilename(osFilename.c_str(), nullptr, nullptr,
5197
                                     nullptr, nullptr, nullptr, nullptr,
5198
                                     nullptr, nullptr);
5199
}
5200
5201
/************************************************************************/
5202
/*                          RegisterEmptyDir()                          */
5203
/************************************************************************/
5204
5205
void VSICurlFilesystemHandlerBase::RegisterEmptyDir(
5206
    const std::string &osDirname)
5207
{
5208
    CachedDirList cachedDirList;
5209
    cachedDirList.bGotFileList = true;
5210
    cachedDirList.oFileList.AddString(".");
5211
    SetCachedDirList(osDirname.c_str(), cachedDirList);
5212
}
5213
5214
/************************************************************************/
5215
/*                            GetFileList()                             */
5216
/************************************************************************/
5217
5218
char **VSICurlFilesystemHandlerBase::GetFileList(const char *pszDirname,
5219
                                                 int nMaxFiles,
5220
                                                 bool *pbGotFileList)
5221
{
5222
    if constexpr (ENABLE_DEBUG)
5223
    {
5224
        CPLDebug(GetDebugKey(), "GetFileList(%s)", pszDirname);
5225
    }
5226
5227
    *pbGotFileList = false;
5228
5229
    bool bListDir = true;
5230
    bool bEmptyDir = false;
5231
    std::string osURL(VSICurlGetURLFromFilename(pszDirname, nullptr, nullptr,
5232
                                                nullptr, &bListDir, &bEmptyDir,
5233
                                                nullptr, nullptr, nullptr));
5234
    if (bEmptyDir)
5235
    {
5236
        *pbGotFileList = true;
5237
        return CSLAddString(nullptr, ".");
5238
    }
5239
    if (!bListDir)
5240
        return nullptr;
5241
5242
    // Deal with publicly visible Azure directories.
5243
    if (STARTS_WITH(osURL.c_str(), "https://"))
5244
    {
5245
        const char *pszBlobCore =
5246
            strstr(osURL.c_str(), ".blob.core.windows.net/");
5247
        if (pszBlobCore)
5248
        {
5249
            FileProp cachedFileProp;
5250
            GetCachedFileProp(osURL.c_str(), cachedFileProp);
5251
            if (cachedFileProp.bIsAzureFolder)
5252
            {
5253
                const char *pszURLWithoutHTTPS =
5254
                    osURL.c_str() + strlen("https://");
5255
                const std::string osStorageAccount(
5256
                    pszURLWithoutHTTPS, pszBlobCore - pszURLWithoutHTTPS);
5257
                CPLConfigOptionSetter oSetter1("AZURE_NO_SIGN_REQUEST", "YES",
5258
                                               false);
5259
                CPLConfigOptionSetter oSetter2("AZURE_STORAGE_ACCOUNT",
5260
                                               osStorageAccount.c_str(), false);
5261
                const std::string osVSIAZ(std::string("/vsiaz/").append(
5262
                    pszBlobCore + strlen(".blob.core.windows.net/")));
5263
                char **papszFileList = VSIReadDirEx(osVSIAZ.c_str(), nMaxFiles);
5264
                if (papszFileList)
5265
                {
5266
                    *pbGotFileList = true;
5267
                    return papszFileList;
5268
                }
5269
            }
5270
        }
5271
    }
5272
5273
    // HACK (optimization in fact) for MBTiles driver.
5274
    if (strstr(pszDirname, ".tiles.mapbox.com") != nullptr)
5275
        return nullptr;
5276
5277
    if (STARTS_WITH(osURL.c_str(), "ftp://"))
5278
    {
5279
        WriteFuncStruct sWriteFuncData;
5280
        sWriteFuncData.pBuffer = nullptr;
5281
5282
        std::string osDirname(osURL);
5283
        osDirname += '/';
5284
5285
        char **papszFileList = nullptr;
5286
5287
        CURLM *hCurlMultiHandle = GetCurlMultiHandleFor(osDirname);
5288
        CURL *hCurlHandle = curl_easy_init();
5289
5290
        for (int iTry = 0; iTry < 2; iTry++)
5291
        {
5292
            struct curl_slist *headers =
5293
                VSICurlSetOptions(hCurlHandle, osDirname.c_str(), nullptr);
5294
5295
            // On the first pass, we want to try fetching all the possible
5296
            // information (filename, file/directory, size). If that does not
5297
            // work, then try again with CURLOPT_DIRLISTONLY set.
5298
            if (iTry == 1)
5299
            {
5300
                unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_DIRLISTONLY, 1);
5301
            }
5302
5303
            VSICURLInitWriteFuncStruct(&sWriteFuncData, nullptr, nullptr,
5304
                                       nullptr);
5305
            unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA,
5306
                                       &sWriteFuncData);
5307
            unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION,
5308
                                       VSICurlHandleWriteFunc);
5309
5310
            char szCurlErrBuf[CURL_ERROR_SIZE + 1] = {};
5311
            unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER,
5312
                                       szCurlErrBuf);
5313
5314
            unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HTTPHEADER,
5315
                                       headers);
5316
5317
            VSICURLMultiPerform(hCurlMultiHandle, hCurlHandle);
5318
5319
            curl_slist_free_all(headers);
5320
5321
            if (sWriteFuncData.pBuffer == nullptr)
5322
            {
5323
                curl_easy_cleanup(hCurlHandle);
5324
                return nullptr;
5325
            }
5326
5327
            char *pszLine = sWriteFuncData.pBuffer;
5328
            char *c = nullptr;
5329
            int nCount = 0;
5330
5331
            if (STARTS_WITH_CI(pszLine, "<!DOCTYPE HTML") ||
5332
                STARTS_WITH_CI(pszLine, "<HTML>"))
5333
            {
5334
                papszFileList =
5335
                    ParseHTMLFileList(pszDirname, nMaxFiles,
5336
                                      sWriteFuncData.pBuffer, pbGotFileList);
5337
                break;
5338
            }
5339
            else if (iTry == 0)
5340
            {
5341
                CPLStringList oFileList;
5342
                *pbGotFileList = true;
5343
5344
                while ((c = strchr(pszLine, '\n')) != nullptr)
5345
                {
5346
                    *c = 0;
5347
                    if (c - pszLine > 0 && c[-1] == '\r')
5348
                        c[-1] = 0;
5349
5350
                    char *pszFilename = nullptr;
5351
                    bool bSizeValid = false;
5352
                    GUIntBig nFileSize = 0;
5353
                    bool bIsDirectory = false;
5354
                    GIntBig mUnixTime = 0;
5355
                    if (!VSICurlParseFullFTPLine(pszLine, pszFilename,
5356
                                                 bSizeValid, nFileSize,
5357
                                                 bIsDirectory, mUnixTime))
5358
                        break;
5359
5360
                    if (strcmp(pszFilename, ".") != 0 &&
5361
                        strcmp(pszFilename, "..") != 0)
5362
                    {
5363
                        if (CPLHasUnbalancedPathTraversal(pszFilename))
5364
                        {
5365
                            CPLError(CE_Warning, CPLE_AppDefined,
5366
                                     "Ignoring '%s' that has a path traversal "
5367
                                     "pattern",
5368
                                     pszFilename);
5369
                        }
5370
                        else
5371
                        {
5372
                            std::string osCachedFilename =
5373
                                CPLSPrintf("%s/%s", osURL.c_str(), pszFilename);
5374
5375
                            FileProp cachedFileProp;
5376
                            GetCachedFileProp(osCachedFilename.c_str(),
5377
                                              cachedFileProp);
5378
                            cachedFileProp.eExists = EXIST_YES;
5379
                            cachedFileProp.bIsDirectory = bIsDirectory;
5380
                            cachedFileProp.mTime =
5381
                                static_cast<time_t>(mUnixTime);
5382
                            cachedFileProp.bHasComputedFileSize = bSizeValid;
5383
                            cachedFileProp.fileSize = nFileSize;
5384
                            SetCachedFileProp(osCachedFilename.c_str(),
5385
                                              cachedFileProp);
5386
5387
                            oFileList.AddString(pszFilename);
5388
                            if constexpr (ENABLE_DEBUG_VERBOSE)
5389
                            {
5390
                                struct tm brokendowntime;
5391
                                CPLUnixTimeToYMDHMS(mUnixTime, &brokendowntime);
5392
                                CPLDebug(
5393
                                    GetDebugKey(),
5394
                                    "File[%d] = %s, is_dir = %d, size "
5395
                                    "= " CPL_FRMT_GUIB
5396
                                    ", time = %04d/%02d/%02d %02d:%02d:%02d",
5397
                                    nCount, pszFilename, bIsDirectory ? 1 : 0,
5398
                                    nFileSize, brokendowntime.tm_year + 1900,
5399
                                    brokendowntime.tm_mon + 1,
5400
                                    brokendowntime.tm_mday,
5401
                                    brokendowntime.tm_hour,
5402
                                    brokendowntime.tm_min,
5403
                                    brokendowntime.tm_sec);
5404
                            }
5405
5406
                            nCount++;
5407
5408
                            if (nMaxFiles > 0 && oFileList.Count() > nMaxFiles)
5409
                                break;
5410
                        }
5411
                    }
5412
5413
                    pszLine = c + 1;
5414
                }
5415
5416
                if (c == nullptr)
5417
                {
5418
                    papszFileList = oFileList.StealList();
5419
                    break;
5420
                }
5421
            }
5422
            else
5423
            {
5424
                CPLStringList oFileList;
5425
                *pbGotFileList = true;
5426
5427
                while ((c = strchr(pszLine, '\n')) != nullptr)
5428
                {
5429
                    *c = 0;
5430
                    if (c - pszLine > 0 && c[-1] == '\r')
5431
                        c[-1] = 0;
5432
5433
                    if (strcmp(pszLine, ".") != 0 && strcmp(pszLine, "..") != 0)
5434
                    {
5435
                        oFileList.AddString(pszLine);
5436
                        if constexpr (ENABLE_DEBUG_VERBOSE)
5437
                        {
5438
                            CPLDebug(GetDebugKey(), "File[%d] = %s", nCount,
5439
                                     pszLine);
5440
                        }
5441
                        nCount++;
5442
                    }
5443
5444
                    pszLine = c + 1;
5445
                }
5446
5447
                papszFileList = oFileList.StealList();
5448
            }
5449
5450
            CPLFree(sWriteFuncData.pBuffer);
5451
            sWriteFuncData.pBuffer = nullptr;
5452
        }
5453
5454
        CPLFree(sWriteFuncData.pBuffer);
5455
        curl_easy_cleanup(hCurlHandle);
5456
5457
        return papszFileList;
5458
    }
5459
5460
    // Try to recognize HTML pages that list the content of a directory.
5461
    // Currently this supports what Apache and shttpd can return.
5462
    else if (STARTS_WITH(osURL.c_str(), "http://") ||
5463
             STARTS_WITH(osURL.c_str(), "https://"))
5464
    {
5465
        std::string osDirname(std::move(osURL));
5466
        osDirname += '/';
5467
5468
        CURLM *hCurlMultiHandle = GetCurlMultiHandleFor(osDirname);
5469
        CURL *hCurlHandle = curl_easy_init();
5470
5471
        struct curl_slist *headers =
5472
            VSICurlSetOptions(hCurlHandle, osDirname.c_str(), nullptr);
5473
5474
        unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, nullptr);
5475
5476
        WriteFuncStruct sWriteFuncData;
5477
        VSICURLInitWriteFuncStruct(&sWriteFuncData, nullptr, nullptr, nullptr);
5478
        unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA,
5479
                                   &sWriteFuncData);
5480
        unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION,
5481
                                   VSICurlHandleWriteFunc);
5482
5483
        char szCurlErrBuf[CURL_ERROR_SIZE + 1] = {};
5484
        unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER,
5485
                                   szCurlErrBuf);
5486
5487
        unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HTTPHEADER, headers);
5488
5489
        VSICURLMultiPerform(hCurlMultiHandle, hCurlHandle);
5490
5491
        curl_slist_free_all(headers);
5492
5493
        NetworkStatisticsLogger::LogGET(sWriteFuncData.nSize);
5494
5495
        if (sWriteFuncData.pBuffer == nullptr)
5496
        {
5497
            curl_easy_cleanup(hCurlHandle);
5498
            return nullptr;
5499
        }
5500
5501
        char **papszFileList = nullptr;
5502
        if (STARTS_WITH_CI(sWriteFuncData.pBuffer, "<?xml") &&
5503
            strstr(sWriteFuncData.pBuffer, "<ListBucketResult") != nullptr)
5504
        {
5505
            CPLStringList osFileList;
5506
            std::string osBaseURL(pszDirname);
5507
            osBaseURL += "/";
5508
            bool bIsTruncated = true;
5509
            bool ret = AnalyseS3FileList(
5510
                osBaseURL, sWriteFuncData.pBuffer, osFileList, nMaxFiles,
5511
                GetS3IgnoredStorageClasses(), bIsTruncated);
5512
            // If the list is truncated, then don't report it.
5513
            if (ret && !bIsTruncated)
5514
            {
5515
                if (osFileList.empty())
5516
                {
5517
                    // To avoid an error to be reported
5518
                    osFileList.AddString(".");
5519
                }
5520
                papszFileList = osFileList.StealList();
5521
                *pbGotFileList = true;
5522
            }
5523
        }
5524
        else
5525
        {
5526
            papszFileList = ParseHTMLFileList(
5527
                pszDirname, nMaxFiles, sWriteFuncData.pBuffer, pbGotFileList);
5528
        }
5529
5530
        CPLFree(sWriteFuncData.pBuffer);
5531
        curl_easy_cleanup(hCurlHandle);
5532
        return papszFileList;
5533
    }
5534
5535
    return nullptr;
5536
}
5537
5538
/************************************************************************/
5539
/*                     GetS3IgnoredStorageClasses()                     */
5540
/************************************************************************/
5541
5542
std::set<std::string> VSICurlFilesystemHandlerBase::GetS3IgnoredStorageClasses()
5543
{
5544
    std::set<std::string> oSetIgnoredStorageClasses;
5545
    const char *pszIgnoredStorageClasses =
5546
        CPLGetConfigOption("CPL_VSIL_CURL_IGNORE_STORAGE_CLASSES", nullptr);
5547
    const char *pszIgnoreGlacierStorage =
5548
        CPLGetConfigOption("CPL_VSIL_CURL_IGNORE_GLACIER_STORAGE", nullptr);
5549
    CPLStringList aosIgnoredStorageClasses(
5550
        CSLTokenizeString2(pszIgnoredStorageClasses ? pszIgnoredStorageClasses
5551
                                                    : "GLACIER,DEEP_ARCHIVE",
5552
                           ",", 0));
5553
    for (int i = 0; i < aosIgnoredStorageClasses.size(); ++i)
5554
        oSetIgnoredStorageClasses.insert(aosIgnoredStorageClasses[i]);
5555
    if (pszIgnoredStorageClasses == nullptr &&
5556
        pszIgnoreGlacierStorage != nullptr &&
5557
        !CPLTestBool(pszIgnoreGlacierStorage))
5558
    {
5559
        oSetIgnoredStorageClasses.clear();
5560
    }
5561
    return oSetIgnoredStorageClasses;
5562
}
5563
5564
/************************************************************************/
5565
/*                                Stat()                                */
5566
/************************************************************************/
5567
5568
int VSICurlFilesystemHandlerBase::Stat(const char *pszFilename,
5569
                                       VSIStatBufL *pStatBuf, int nFlags)
5570
{
5571
    if (!cpl::starts_with(std::string_view(pszFilename), GetFSPrefix()) &&
5572
        !StartsWithVSICurlPrefix(pszFilename))
5573
    {
5574
        return -1;
5575
    }
5576
5577
    memset(pStatBuf, 0, sizeof(VSIStatBufL));
5578
5579
    if ((nFlags & VSI_STAT_CACHE_ONLY) != 0)
5580
    {
5581
        cpl::FileProp oFileProp;
5582
        if (!GetCachedFileProp(GetURLFromFilename(pszFilename).c_str(),
5583
                               oFileProp) ||
5584
            oFileProp.eExists != EXIST_YES)
5585
        {
5586
            return -1;
5587
        }
5588
        pStatBuf->st_mode = static_cast<unsigned short>(oFileProp.nMode);
5589
        pStatBuf->st_mtime = oFileProp.mTime;
5590
        pStatBuf->st_size = oFileProp.fileSize;
5591
        return 0;
5592
    }
5593
5594
    NetworkStatisticsFileSystem oContextFS(GetFSPrefix().c_str());
5595
    NetworkStatisticsAction oContextAction("Stat");
5596
5597
    const std::string osFilename(pszFilename);
5598
5599
    if (!IsAllowedFilename(pszFilename))
5600
        return -1;
5601
5602
    bool bListDir = true;
5603
    bool bEmptyDir = false;
5604
    std::string osURL(VSICurlGetURLFromFilename(pszFilename, nullptr, nullptr,
5605
                                                nullptr, &bListDir, &bEmptyDir,
5606
                                                nullptr, nullptr, nullptr));
5607
5608
    const char *pszOptionVal = VSIGetPathSpecificOption(
5609
        pszFilename, "GDAL_DISABLE_READDIR_ON_OPEN", "NO");
5610
    const bool bSkipReadDir =
5611
        !bListDir || bEmptyDir || EQUAL(pszOptionVal, "EMPTY_DIR") ||
5612
        CPLTestBool(pszOptionVal) || !AllowCachedDataFor(pszFilename);
5613
5614
    // Does it look like a FTP directory?
5615
    if (STARTS_WITH(osURL.c_str(), "ftp://") && osFilename.back() == '/' &&
5616
        !bSkipReadDir)
5617
    {
5618
        char **papszFileList = ReadDirEx(osFilename.c_str(), 0);
5619
        if (papszFileList)
5620
        {
5621
            pStatBuf->st_mode = S_IFDIR;
5622
            pStatBuf->st_size = 0;
5623
5624
            CSLDestroy(papszFileList);
5625
5626
            return 0;
5627
        }
5628
        return -1;
5629
    }
5630
    else if (strchr(CPLGetFilename(osFilename.c_str()), '.') != nullptr &&
5631
             !STARTS_WITH_CI(CPLGetExtensionSafe(osFilename.c_str()).c_str(),
5632
                             "zip") &&
5633
             strstr(osFilename.c_str(), ".zip.") != nullptr &&
5634
             strstr(osFilename.c_str(), ".ZIP.") != nullptr && !bSkipReadDir)
5635
    {
5636
        bool bGotFileList = false;
5637
        char **papszFileList = ReadDirInternal(
5638
            CPLGetDirnameSafe(osFilename.c_str()).c_str(), 0, &bGotFileList);
5639
        const bool bFound =
5640
            VSICurlIsFileInList(papszFileList,
5641
                                CPLGetFilename(osFilename.c_str())) != -1;
5642
        CSLDestroy(papszFileList);
5643
        if (bGotFileList && !bFound)
5644
        {
5645
            return -1;
5646
        }
5647
    }
5648
5649
    VSICurlHandle *poHandle = CreateFileHandle(osFilename.c_str());
5650
    if (poHandle == nullptr)
5651
        return -1;
5652
5653
    if (poHandle->IsKnownFileSize() ||
5654
        ((nFlags & VSI_STAT_SIZE_FLAG) && !poHandle->IsDirectory() &&
5655
         CPLTestBool(CPLGetConfigOption("CPL_VSIL_CURL_SLOW_GET_SIZE", "YES"))))
5656
    {
5657
        pStatBuf->st_size = poHandle->GetFileSize(true);
5658
    }
5659
5660
    const int nRet =
5661
        poHandle->Exists((nFlags & VSI_STAT_SET_ERROR_FLAG) > 0) ? 0 : -1;
5662
    pStatBuf->st_mtime = poHandle->GetMTime();
5663
    pStatBuf->st_mode = static_cast<unsigned short>(poHandle->GetMode());
5664
    if (pStatBuf->st_mode == 0)
5665
        pStatBuf->st_mode = poHandle->IsDirectory() ? S_IFDIR : S_IFREG;
5666
    delete poHandle;
5667
    return nRet;
5668
}
5669
5670
/************************************************************************/
5671
/*                          ReadDirInternal()                           */
5672
/************************************************************************/
5673
5674
char **VSICurlFilesystemHandlerBase::ReadDirInternal(const char *pszDirname,
5675
                                                     int nMaxFiles,
5676
                                                     bool *pbGotFileList)
5677
{
5678
    std::string osDirname(pszDirname);
5679
5680
    // Replace a/b/../c by a/c
5681
    const auto posSlashDotDot = osDirname.find("/..");
5682
    if (posSlashDotDot != std::string::npos && posSlashDotDot >= 1)
5683
    {
5684
        const auto posPrecedingSlash =
5685
            osDirname.find_last_of('/', posSlashDotDot - 1);
5686
        if (posPrecedingSlash != std::string::npos && posPrecedingSlash >= 1)
5687
        {
5688
            osDirname.erase(osDirname.begin() + posPrecedingSlash,
5689
                            osDirname.begin() + posSlashDotDot + strlen("/.."));
5690
        }
5691
    }
5692
5693
    std::string osDirnameOri(osDirname);
5694
    if (osDirname + "/" == GetFSPrefix())
5695
    {
5696
        osDirname += "/";
5697
    }
5698
    else if (osDirname != GetFSPrefix())
5699
    {
5700
        while (!osDirname.empty() && osDirname.back() == '/')
5701
            osDirname.erase(osDirname.size() - 1);
5702
    }
5703
5704
    if (osDirname.size() < GetFSPrefix().size())
5705
    {
5706
        if (pbGotFileList)
5707
            *pbGotFileList = true;
5708
        return nullptr;
5709
    }
5710
5711
    NetworkStatisticsFileSystem oContextFS(GetFSPrefix().c_str());
5712
    NetworkStatisticsAction oContextAction("ReadDir");
5713
5714
    CPLMutexHolder oHolder(&hMutex);
5715
5716
    // If we know the file exists and is not a directory,
5717
    // then don't try to list its content.
5718
    FileProp cachedFileProp;
5719
    if (GetCachedFileProp(GetURLFromFilename(osDirname.c_str()).c_str(),
5720
                          cachedFileProp) &&
5721
        cachedFileProp.eExists == EXIST_YES && !cachedFileProp.bIsDirectory)
5722
    {
5723
        if (osDirnameOri != osDirname)
5724
        {
5725
            if (GetCachedFileProp((GetURLFromFilename(osDirname) + "/").c_str(),
5726
                                  cachedFileProp) &&
5727
                cachedFileProp.eExists == EXIST_YES &&
5728
                !cachedFileProp.bIsDirectory)
5729
            {
5730
                if (pbGotFileList)
5731
                    *pbGotFileList = true;
5732
                return nullptr;
5733
            }
5734
        }
5735
        else
5736
        {
5737
            if (pbGotFileList)
5738
                *pbGotFileList = true;
5739
            return nullptr;
5740
        }
5741
    }
5742
5743
    CachedDirList cachedDirList;
5744
    if (!GetCachedDirList(osDirname.c_str(), cachedDirList))
5745
    {
5746
        cachedDirList.oFileList.Assign(GetFileList(osDirname.c_str(), nMaxFiles,
5747
                                                   &cachedDirList.bGotFileList),
5748
                                       true);
5749
        if (cachedDirList.bGotFileList && cachedDirList.oFileList.empty())
5750
        {
5751
            // To avoid an error to be reported
5752
            cachedDirList.oFileList.AddString(".");
5753
        }
5754
        if (nMaxFiles <= 0 || cachedDirList.oFileList.size() < nMaxFiles)
5755
        {
5756
            // Only cache content if we didn't hit the limitation
5757
            SetCachedDirList(osDirname.c_str(), cachedDirList);
5758
        }
5759
    }
5760
5761
    if (pbGotFileList)
5762
        *pbGotFileList = cachedDirList.bGotFileList;
5763
5764
    return CSLDuplicate(cachedDirList.oFileList.List());
5765
}
5766
5767
/************************************************************************/
5768
/*                        InvalidateDirContent()                        */
5769
/************************************************************************/
5770
5771
void VSICurlFilesystemHandlerBase::InvalidateDirContent(
5772
    const std::string &osDirname)
5773
{
5774
    CPLMutexHolder oHolder(&hMutex);
5775
5776
    CachedDirList oCachedDirList;
5777
    if (oCacheDirList.tryGet(osDirname, oCachedDirList))
5778
    {
5779
        nCachedFilesInDirList -= oCachedDirList.oFileList.size();
5780
        oCacheDirList.remove(osDirname);
5781
    }
5782
}
5783
5784
/************************************************************************/
5785
/*                             ReadDirEx()                              */
5786
/************************************************************************/
5787
5788
char **VSICurlFilesystemHandlerBase::ReadDirEx(const char *pszDirname,
5789
                                               int nMaxFiles)
5790
{
5791
    return ReadDirInternal(pszDirname, nMaxFiles, nullptr);
5792
}
5793
5794
/************************************************************************/
5795
/*                            SiblingFiles()                            */
5796
/************************************************************************/
5797
5798
char **VSICurlFilesystemHandlerBase::SiblingFiles(const char *pszFilename)
5799
{
5800
    /* Small optimization to avoid unnecessary stat'ing from PAux or ENVI */
5801
    /* drivers. The MBTiles driver needs no companion file. */
5802
    if (EQUAL(CPLGetExtensionSafe(pszFilename).c_str(), "mbtiles"))
5803
    {
5804
        return static_cast<char **>(CPLCalloc(1, sizeof(char *)));
5805
    }
5806
    return nullptr;
5807
}
5808
5809
/************************************************************************/
5810
/*                          GetFileMetadata()                           */
5811
/************************************************************************/
5812
5813
char **VSICurlFilesystemHandlerBase::GetFileMetadata(const char *pszFilename,
5814
                                                     const char *pszDomain,
5815
                                                     CSLConstList)
5816
{
5817
    if (pszDomain == nullptr || !EQUAL(pszDomain, "HEADERS"))
5818
        return nullptr;
5819
    std::unique_ptr<VSICurlHandle> poHandle(CreateFileHandle(pszFilename));
5820
    if (poHandle == nullptr)
5821
        return nullptr;
5822
5823
    NetworkStatisticsFileSystem oContextFS(GetFSPrefix().c_str());
5824
    NetworkStatisticsAction oContextAction("GetFileMetadata");
5825
5826
    poHandle->GetFileSizeOrHeaders(true, true);
5827
    return CSLDuplicate(poHandle->GetHeaders().List());
5828
}
5829
5830
/************************************************************************/
5831
/*                        VSIAppendWriteHandle()                        */
5832
/************************************************************************/
5833
5834
VSIAppendWriteHandle::VSIAppendWriteHandle(VSICurlFilesystemHandlerBase *poFS,
5835
                                           const char *pszFSPrefix,
5836
                                           const char *pszFilename,
5837
                                           int nChunkSize)
5838
    : m_poFS(poFS), m_osFSPrefix(pszFSPrefix), m_osFilename(pszFilename),
5839
      m_oRetryParameters(CPLStringList(CPLHTTPGetOptionsFromEnv(pszFilename))),
5840
      m_nBufferSize(nChunkSize)
5841
{
5842
    m_pabyBuffer = static_cast<GByte *>(VSIMalloc(m_nBufferSize));
5843
    if (m_pabyBuffer == nullptr)
5844
    {
5845
        CPLError(CE_Failure, CPLE_AppDefined,
5846
                 "Cannot allocate working buffer for %s writing",
5847
                 m_osFSPrefix.c_str());
5848
    }
5849
}
5850
5851
/************************************************************************/
5852
/*                       ~VSIAppendWriteHandle()                        */
5853
/************************************************************************/
5854
5855
VSIAppendWriteHandle::~VSIAppendWriteHandle()
5856
{
5857
    /* WARNING: implementation should call Close() themselves */
5858
    /* cannot be done safely from here, since Send() can be called. */
5859
    CPLFree(m_pabyBuffer);
5860
}
5861
5862
/************************************************************************/
5863
/*                                Seek()                                */
5864
/************************************************************************/
5865
5866
int VSIAppendWriteHandle::Seek(vsi_l_offset nOffset, int nWhence)
5867
{
5868
    if (!((nWhence == SEEK_SET && nOffset == m_nCurOffset) ||
5869
          (nWhence == SEEK_CUR && nOffset == 0) ||
5870
          (nWhence == SEEK_END && nOffset == 0)))
5871
    {
5872
        CPLError(CE_Failure, CPLE_NotSupported,
5873
                 "Seek not supported on writable %s files",
5874
                 m_osFSPrefix.c_str());
5875
        m_bError = true;
5876
        return -1;
5877
    }
5878
    return 0;
5879
}
5880
5881
/************************************************************************/
5882
/*                                Tell()                                */
5883
/************************************************************************/
5884
5885
vsi_l_offset VSIAppendWriteHandle::Tell()
5886
{
5887
    return m_nCurOffset;
5888
}
5889
5890
/************************************************************************/
5891
/*                                Read()                                */
5892
/************************************************************************/
5893
5894
size_t VSIAppendWriteHandle::Read(void * /* pBuffer */, size_t /* nBytes */)
5895
{
5896
    CPLError(CE_Failure, CPLE_NotSupported,
5897
             "Read not supported on writable %s files", m_osFSPrefix.c_str());
5898
    m_bError = true;
5899
    return 0;
5900
}
5901
5902
/************************************************************************/
5903
/*                         ReadCallBackBuffer()                         */
5904
/************************************************************************/
5905
5906
size_t VSIAppendWriteHandle::ReadCallBackBuffer(char *buffer, size_t size,
5907
                                                size_t nitems, void *instream)
5908
{
5909
    VSIAppendWriteHandle *poThis =
5910
        static_cast<VSIAppendWriteHandle *>(instream);
5911
    const int nSizeMax = static_cast<int>(size * nitems);
5912
    const int nSizeToWrite = std::min(
5913
        nSizeMax, poThis->m_nBufferOff - poThis->m_nBufferOffReadCallback);
5914
    memcpy(buffer, poThis->m_pabyBuffer + poThis->m_nBufferOffReadCallback,
5915
           nSizeToWrite);
5916
    poThis->m_nBufferOffReadCallback += nSizeToWrite;
5917
    return nSizeToWrite;
5918
}
5919
5920
/************************************************************************/
5921
/*                               Write()                                */
5922
/************************************************************************/
5923
5924
size_t VSIAppendWriteHandle::Write(const void *pBuffer, size_t nBytes)
5925
{
5926
    if (m_bError)
5927
        return 0;
5928
5929
    size_t nBytesToWrite = nBytes;
5930
    if (nBytesToWrite == 0)
5931
        return 0;
5932
5933
    const GByte *pabySrcBuffer = reinterpret_cast<const GByte *>(pBuffer);
5934
    while (nBytesToWrite > 0)
5935
    {
5936
        if (m_nBufferOff == m_nBufferSize)
5937
        {
5938
            if (!Send(false))
5939
            {
5940
                m_bError = true;
5941
                return 0;
5942
            }
5943
            m_nBufferOff = 0;
5944
        }
5945
5946
        const int nToWriteInBuffer = static_cast<int>(std::min(
5947
            static_cast<size_t>(m_nBufferSize - m_nBufferOff), nBytesToWrite));
5948
        memcpy(m_pabyBuffer + m_nBufferOff, pabySrcBuffer, nToWriteInBuffer);
5949
        pabySrcBuffer += nToWriteInBuffer;
5950
        m_nBufferOff += nToWriteInBuffer;
5951
        m_nCurOffset += nToWriteInBuffer;
5952
        nBytesToWrite -= nToWriteInBuffer;
5953
    }
5954
    return nBytes;
5955
}
5956
5957
/************************************************************************/
5958
/*                               Close()                                */
5959
/************************************************************************/
5960
5961
int VSIAppendWriteHandle::Close()
5962
{
5963
    int nRet = 0;
5964
    if (!m_bClosed)
5965
    {
5966
        m_bClosed = true;
5967
        if (!m_bError && !Send(true))
5968
            nRet = -1;
5969
    }
5970
    return nRet;
5971
}
5972
5973
/************************************************************************/
5974
/*                         CurlRequestHelper()                          */
5975
/************************************************************************/
5976
5977
CurlRequestHelper::CurlRequestHelper()
5978
{
5979
    VSICURLInitWriteFuncStruct(&sWriteFuncData, nullptr, nullptr, nullptr);
5980
    VSICURLInitWriteFuncStruct(&sWriteFuncHeaderData, nullptr, nullptr,
5981
                               nullptr);
5982
}
5983
5984
/************************************************************************/
5985
/*                         ~CurlRequestHelper()                         */
5986
/************************************************************************/
5987
5988
CurlRequestHelper::~CurlRequestHelper()
5989
{
5990
    CPLFree(sWriteFuncData.pBuffer);
5991
    CPLFree(sWriteFuncHeaderData.pBuffer);
5992
}
5993
5994
/************************************************************************/
5995
/*                              perform()                               */
5996
/************************************************************************/
5997
5998
long CurlRequestHelper::perform(CURL *hCurlHandle, struct curl_slist *headers,
5999
                                VSICurlFilesystemHandlerBase *poFS,
6000
                                IVSIS3LikeHandleHelper *poS3HandleHelper)
6001
{
6002
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HTTPHEADER, headers);
6003
6004
    poS3HandleHelper->ResetQueryParameters();
6005
6006
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData);
6007
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION,
6008
                               VSICurlHandleWriteFunc);
6009
6010
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA,
6011
                               &sWriteFuncHeaderData);
6012
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION,
6013
                               VSICurlHandleWriteFunc);
6014
6015
    szCurlErrBuf[0] = '\0';
6016
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, szCurlErrBuf);
6017
6018
    VSICURLMultiPerform(poFS->GetCurlMultiHandleFor(poS3HandleHelper->GetURL()),
6019
                        hCurlHandle);
6020
6021
    VSICURLResetHeaderAndWriterFunctions(hCurlHandle);
6022
6023
    curl_slist_free_all(headers);
6024
6025
    long response_code = 0;
6026
    curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE, &response_code);
6027
    return response_code;
6028
}
6029
6030
/************************************************************************/
6031
/*                       NetworkStatisticsLogger                        */
6032
/************************************************************************/
6033
6034
// Global variable
6035
NetworkStatisticsLogger NetworkStatisticsLogger::gInstance{};
6036
int NetworkStatisticsLogger::gnEnabled = -1;  // unknown state
6037
6038
static void ShowNetworkStats()
6039
{
6040
    printf("Network statistics:\n%s\n",  // ok
6041
           NetworkStatisticsLogger::GetReportAsSerializedJSON().c_str());
6042
}
6043
6044
void NetworkStatisticsLogger::ReadEnabled()
6045
{
6046
    const bool bShowNetworkStats =
6047
        CPLTestBool(CPLGetConfigOption("CPL_VSIL_SHOW_NETWORK_STATS", "NO"));
6048
    gnEnabled =
6049
        (bShowNetworkStats || CPLTestBool(CPLGetConfigOption(
6050
                                  "CPL_VSIL_NETWORK_STATS_ENABLED", "NO")))
6051
            ? TRUE
6052
            : FALSE;
6053
    if (bShowNetworkStats)
6054
    {
6055
        static bool bRegistered = false;
6056
        if (!bRegistered)
6057
        {
6058
            bRegistered = true;
6059
            atexit(ShowNetworkStats);
6060
        }
6061
    }
6062
}
6063
6064
void NetworkStatisticsLogger::EnterFileSystem(const char *pszName)
6065
{
6066
    if (!IsEnabled())
6067
        return;
6068
    std::lock_guard<std::mutex> oLock(gInstance.m_mutex);
6069
    gInstance.m_mapThreadIdToContextPath[CPLGetPID()].push_back(
6070
        ContextPathItem(ContextPathType::FILESYSTEM, pszName));
6071
}
6072
6073
void NetworkStatisticsLogger::LeaveFileSystem()
6074
{
6075
    if (!IsEnabled())
6076
        return;
6077
    std::lock_guard<std::mutex> oLock(gInstance.m_mutex);
6078
    gInstance.m_mapThreadIdToContextPath[CPLGetPID()].pop_back();
6079
}
6080
6081
void NetworkStatisticsLogger::EnterFile(const char *pszName)
6082
{
6083
    if (!IsEnabled())
6084
        return;
6085
    std::lock_guard<std::mutex> oLock(gInstance.m_mutex);
6086
    gInstance.m_mapThreadIdToContextPath[CPLGetPID()].push_back(
6087
        ContextPathItem(ContextPathType::FILE, pszName));
6088
}
6089
6090
void NetworkStatisticsLogger::LeaveFile()
6091
{
6092
    if (!IsEnabled())
6093
        return;
6094
    std::lock_guard<std::mutex> oLock(gInstance.m_mutex);
6095
    gInstance.m_mapThreadIdToContextPath[CPLGetPID()].pop_back();
6096
}
6097
6098
void NetworkStatisticsLogger::EnterAction(const char *pszName)
6099
{
6100
    if (!IsEnabled())
6101
        return;
6102
    std::lock_guard<std::mutex> oLock(gInstance.m_mutex);
6103
    gInstance.m_mapThreadIdToContextPath[CPLGetPID()].push_back(
6104
        ContextPathItem(ContextPathType::ACTION, pszName));
6105
}
6106
6107
void NetworkStatisticsLogger::LeaveAction()
6108
{
6109
    if (!IsEnabled())
6110
        return;
6111
    std::lock_guard<std::mutex> oLock(gInstance.m_mutex);
6112
    gInstance.m_mapThreadIdToContextPath[CPLGetPID()].pop_back();
6113
}
6114
6115
std::vector<NetworkStatisticsLogger::Counters *>
6116
NetworkStatisticsLogger::GetCountersForContext()
6117
{
6118
    std::vector<Counters *> v;
6119
    const auto &contextPath = gInstance.m_mapThreadIdToContextPath[CPLGetPID()];
6120
6121
    Stats *curStats = &m_stats;
6122
    v.push_back(&(curStats->counters));
6123
6124
    bool inFileSystem = false;
6125
    bool inFile = false;
6126
    bool inAction = false;
6127
    for (const auto &item : contextPath)
6128
    {
6129
        if (item.eType == ContextPathType::FILESYSTEM)
6130
        {
6131
            if (inFileSystem)
6132
                continue;
6133
            inFileSystem = true;
6134
        }
6135
        else if (item.eType == ContextPathType::FILE)
6136
        {
6137
            if (inFile)
6138
                continue;
6139
            inFile = true;
6140
        }
6141
        else if (item.eType == ContextPathType::ACTION)
6142
        {
6143
            if (inAction)
6144
                continue;
6145
            inAction = true;
6146
        }
6147
6148
        curStats = &(curStats->children[item]);
6149
        v.push_back(&(curStats->counters));
6150
    }
6151
6152
    return v;
6153
}
6154
6155
void NetworkStatisticsLogger::LogGET(size_t nDownloadedBytes)
6156
{
6157
    if (!IsEnabled())
6158
        return;
6159
    std::lock_guard<std::mutex> oLock(gInstance.m_mutex);
6160
    for (auto counters : gInstance.GetCountersForContext())
6161
    {
6162
        counters->nGET++;
6163
        counters->nGETDownloadedBytes += nDownloadedBytes;
6164
    }
6165
}
6166
6167
void NetworkStatisticsLogger::LogPUT(size_t nUploadedBytes)
6168
{
6169
    if (!IsEnabled())
6170
        return;
6171
    std::lock_guard<std::mutex> oLock(gInstance.m_mutex);
6172
    for (auto counters : gInstance.GetCountersForContext())
6173
    {
6174
        counters->nPUT++;
6175
        counters->nPUTUploadedBytes += nUploadedBytes;
6176
    }
6177
}
6178
6179
void NetworkStatisticsLogger::LogHEAD()
6180
{
6181
    if (!IsEnabled())
6182
        return;
6183
    std::lock_guard<std::mutex> oLock(gInstance.m_mutex);
6184
    for (auto counters : gInstance.GetCountersForContext())
6185
    {
6186
        counters->nHEAD++;
6187
    }
6188
}
6189
6190
void NetworkStatisticsLogger::LogPOST(size_t nUploadedBytes,
6191
                                      size_t nDownloadedBytes)
6192
{
6193
    if (!IsEnabled())
6194
        return;
6195
    std::lock_guard<std::mutex> oLock(gInstance.m_mutex);
6196
    for (auto counters : gInstance.GetCountersForContext())
6197
    {
6198
        counters->nPOST++;
6199
        counters->nPOSTUploadedBytes += nUploadedBytes;
6200
        counters->nPOSTDownloadedBytes += nDownloadedBytes;
6201
    }
6202
}
6203
6204
void NetworkStatisticsLogger::LogDELETE()
6205
{
6206
    if (!IsEnabled())
6207
        return;
6208
    std::lock_guard<std::mutex> oLock(gInstance.m_mutex);
6209
    for (auto counters : gInstance.GetCountersForContext())
6210
    {
6211
        counters->nDELETE++;
6212
    }
6213
}
6214
6215
void NetworkStatisticsLogger::Reset()
6216
{
6217
    std::lock_guard<std::mutex> oLock(gInstance.m_mutex);
6218
    gInstance.m_stats = Stats();
6219
    gnEnabled = -1;
6220
}
6221
6222
void NetworkStatisticsLogger::Stats::AsJSON(CPLJSONObject &oJSON) const
6223
{
6224
    CPLJSONObject oMethods;
6225
    if (counters.nHEAD)
6226
        oMethods.Add("HEAD/count", counters.nHEAD);
6227
    if (counters.nGET)
6228
        oMethods.Add("GET/count", counters.nGET);
6229
    if (counters.nGETDownloadedBytes)
6230
        oMethods.Add("GET/downloaded_bytes", counters.nGETDownloadedBytes);
6231
    if (counters.nPUT)
6232
        oMethods.Add("PUT/count", counters.nPUT);
6233
    if (counters.nPUTUploadedBytes)
6234
        oMethods.Add("PUT/uploaded_bytes", counters.nPUTUploadedBytes);
6235
    if (counters.nPOST)
6236
        oMethods.Add("POST/count", counters.nPOST);
6237
    if (counters.nPOSTUploadedBytes)
6238
        oMethods.Add("POST/uploaded_bytes", counters.nPOSTUploadedBytes);
6239
    if (counters.nPOSTDownloadedBytes)
6240
        oMethods.Add("POST/downloaded_bytes", counters.nPOSTDownloadedBytes);
6241
    if (counters.nDELETE)
6242
        oMethods.Add("DELETE/count", counters.nDELETE);
6243
    oJSON.Add("methods", oMethods);
6244
    CPLJSONObject oFiles;
6245
    bool bFilesAdded = false;
6246
    for (const auto &kv : children)
6247
    {
6248
        CPLJSONObject childJSON;
6249
        kv.second.AsJSON(childJSON);
6250
        if (kv.first.eType == ContextPathType::FILESYSTEM)
6251
        {
6252
            std::string osName(kv.first.osName);
6253
            if (!osName.empty() && osName[0] == '/')
6254
                osName = osName.substr(1);
6255
            if (!osName.empty() && osName.back() == '/')
6256
                osName.pop_back();
6257
            oJSON.Add(("handlers/" + osName).c_str(), childJSON);
6258
        }
6259
        else if (kv.first.eType == ContextPathType::FILE)
6260
        {
6261
            if (!bFilesAdded)
6262
            {
6263
                bFilesAdded = true;
6264
                oJSON.Add("files", oFiles);
6265
            }
6266
            oFiles.AddNoSplitName(kv.first.osName.c_str(), childJSON);
6267
        }
6268
        else if (kv.first.eType == ContextPathType::ACTION)
6269
        {
6270
            oJSON.Add(("actions/" + kv.first.osName).c_str(), childJSON);
6271
        }
6272
    }
6273
}
6274
6275
std::string NetworkStatisticsLogger::GetReportAsSerializedJSON()
6276
{
6277
    std::lock_guard<std::mutex> oLock(gInstance.m_mutex);
6278
6279
    CPLJSONObject oJSON;
6280
    gInstance.m_stats.AsJSON(oJSON);
6281
    return oJSON.Format(CPLJSONObject::PrettyFormat::Pretty);
6282
}
6283
6284
} /* end of namespace cpl */
6285
6286
/************************************************************************/
6287
/*                    VSICurlParseUnixPermissions()                     */
6288
/************************************************************************/
6289
6290
int VSICurlParseUnixPermissions(const char *pszPermissions)
6291
{
6292
    if (strlen(pszPermissions) != 9)
6293
        return 0;
6294
    int nMode = 0;
6295
    if (pszPermissions[0] == 'r')
6296
        nMode |= S_IRUSR;
6297
    if (pszPermissions[1] == 'w')
6298
        nMode |= S_IWUSR;
6299
    if (pszPermissions[2] == 'x')
6300
        nMode |= S_IXUSR;
6301
    if (pszPermissions[3] == 'r')
6302
        nMode |= S_IRGRP;
6303
    if (pszPermissions[4] == 'w')
6304
        nMode |= S_IWGRP;
6305
    if (pszPermissions[5] == 'x')
6306
        nMode |= S_IXGRP;
6307
    if (pszPermissions[6] == 'r')
6308
        nMode |= S_IROTH;
6309
    if (pszPermissions[7] == 'w')
6310
        nMode |= S_IWOTH;
6311
    if (pszPermissions[8] == 'x')
6312
        nMode |= S_IXOTH;
6313
    return nMode;
6314
}
6315
6316
/************************************************************************/
6317
/*                      Cache of file properties.                       */
6318
/************************************************************************/
6319
6320
static std::mutex oCacheFilePropMutex;
6321
static lru11::Cache<std::string, cpl::FileProp> *poCacheFileProp = nullptr;
6322
6323
/************************************************************************/
6324
/*                      VSICURLGetCachedFileProp()                      */
6325
/************************************************************************/
6326
6327
bool VSICURLGetCachedFileProp(const char *pszURL, cpl::FileProp &oFileProp)
6328
{
6329
    std::lock_guard<std::mutex> oLock(oCacheFilePropMutex);
6330
    return poCacheFileProp != nullptr &&
6331
           poCacheFileProp->tryGet(std::string(pszURL), oFileProp) &&
6332
           // Let a chance to use new auth parameters
6333
           !(oFileProp.eExists == cpl::EXIST_NO &&
6334
             gnGenerationAuthParameters != oFileProp.nGenerationAuthParameters);
6335
}
6336
6337
/************************************************************************/
6338
/*                      VSICURLSetCachedFileProp()                      */
6339
/************************************************************************/
6340
6341
void VSICURLSetCachedFileProp(const char *pszURL, cpl::FileProp &oFileProp)
6342
{
6343
    std::lock_guard<std::mutex> oLock(oCacheFilePropMutex);
6344
    if (poCacheFileProp == nullptr)
6345
        poCacheFileProp =
6346
            new lru11::Cache<std::string, cpl::FileProp>(100 * 1024);
6347
    oFileProp.nGenerationAuthParameters = gnGenerationAuthParameters;
6348
    poCacheFileProp->insert(std::string(pszURL), oFileProp);
6349
}
6350
6351
/************************************************************************/
6352
/*                  VSICURLInvalidateCachedFileProp()                   */
6353
/************************************************************************/
6354
6355
void VSICURLInvalidateCachedFileProp(const char *pszURL)
6356
{
6357
    std::lock_guard<std::mutex> oLock(oCacheFilePropMutex);
6358
    if (poCacheFileProp != nullptr)
6359
        poCacheFileProp->remove(std::string(pszURL));
6360
}
6361
6362
/************************************************************************/
6363
/*               VSICURLInvalidateCachedFilePropPrefix()                */
6364
/************************************************************************/
6365
6366
void VSICURLInvalidateCachedFilePropPrefix(const char *pszURL)
6367
{
6368
    std::lock_guard<std::mutex> oLock(oCacheFilePropMutex);
6369
    if (poCacheFileProp != nullptr)
6370
    {
6371
        std::list<std::string> keysToRemove;
6372
        const size_t nURLSize = strlen(pszURL);
6373
        auto lambda =
6374
            [&keysToRemove, &pszURL, nURLSize](
6375
                const lru11::KeyValuePair<std::string, cpl::FileProp> &kv)
6376
        {
6377
            if (strncmp(kv.key.c_str(), pszURL, nURLSize) == 0)
6378
                keysToRemove.push_back(kv.key);
6379
        };
6380
        poCacheFileProp->cwalk(lambda);
6381
        for (const auto &key : keysToRemove)
6382
            poCacheFileProp->remove(key);
6383
    }
6384
}
6385
6386
/************************************************************************/
6387
/*                    VSICURLDestroyCacheFileProp()                     */
6388
/************************************************************************/
6389
6390
void VSICURLDestroyCacheFileProp()
6391
{
6392
    std::lock_guard<std::mutex> oLock(oCacheFilePropMutex);
6393
    delete poCacheFileProp;
6394
    poCacheFileProp = nullptr;
6395
}
6396
6397
/************************************************************************/
6398
/*                        VSICURLMultiCleanup()                         */
6399
/************************************************************************/
6400
6401
void VSICURLMultiCleanup(CURLM *hCurlMultiHandle)
6402
{
6403
    void *old_handler = CPLHTTPIgnoreSigPipe();
6404
    curl_multi_cleanup(hCurlMultiHandle);
6405
    CPLHTTPRestoreSigPipeHandler(old_handler);
6406
}
6407
6408
/************************************************************************/
6409
/*                       VSICurlInstallReadCbk()                        */
6410
/************************************************************************/
6411
6412
int VSICurlInstallReadCbk(VSILFILE *fp, VSICurlReadCbkFunc pfnReadCbk,
6413
                          void *pfnUserData, int bStopOnInterruptUntilUninstall)
6414
{
6415
    return reinterpret_cast<cpl::VSICurlHandle *>(fp)->InstallReadCbk(
6416
        pfnReadCbk, pfnUserData, bStopOnInterruptUntilUninstall);
6417
}
6418
6419
/************************************************************************/
6420
/*                      VSICurlUninstallReadCbk()                       */
6421
/************************************************************************/
6422
6423
int VSICurlUninstallReadCbk(VSILFILE *fp)
6424
{
6425
    return reinterpret_cast<cpl::VSICurlHandle *>(fp)->UninstallReadCbk();
6426
}
6427
6428
/************************************************************************/
6429
/*                         VSICurlSetOptions()                          */
6430
/************************************************************************/
6431
6432
struct curl_slist *VSICurlSetOptions(CURL *hCurlHandle, const char *pszURL,
6433
                                     const char *const *papszOptions)
6434
{
6435
    struct curl_slist *headers = static_cast<struct curl_slist *>(
6436
        CPLHTTPSetOptions(hCurlHandle, pszURL, papszOptions));
6437
6438
    long option = CURLFTPMETHOD_SINGLECWD;
6439
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_FTP_FILEMETHOD, option);
6440
6441
    // ftp://ftp2.cits.rncan.gc.ca/pub/cantopo/250k_tif/
6442
    // doesn't like EPSV command,
6443
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_FTP_USE_EPSV, 0);
6444
6445
    return headers;
6446
}
6447
6448
/************************************************************************/
6449
/*                    VSICurlSetContentTypeFromExt()                    */
6450
/************************************************************************/
6451
6452
struct curl_slist *VSICurlSetContentTypeFromExt(struct curl_slist *poList,
6453
                                                const char *pszPath)
6454
{
6455
    struct curl_slist *iter = poList;
6456
    while (iter != nullptr)
6457
    {
6458
        if (STARTS_WITH_CI(iter->data, "Content-Type"))
6459
        {
6460
            return poList;
6461
        }
6462
        iter = iter->next;
6463
    }
6464
6465
    static const struct
6466
    {
6467
        const char *ext;
6468
        const char *mime;
6469
    } aosExtMimePairs[] = {
6470
        {"txt", "text/plain"}, {"json", "application/json"},
6471
        {"tif", "image/tiff"}, {"tiff", "image/tiff"},
6472
        {"jpg", "image/jpeg"}, {"jpeg", "image/jpeg"},
6473
        {"jp2", "image/jp2"},  {"jpx", "image/jp2"},
6474
        {"j2k", "image/jp2"},  {"jpc", "image/jp2"},
6475
        {"png", "image/png"},
6476
    };
6477
6478
    const std::string osExt = CPLGetExtensionSafe(pszPath);
6479
    if (!osExt.empty())
6480
    {
6481
        for (const auto &pair : aosExtMimePairs)
6482
        {
6483
            if (EQUAL(osExt.c_str(), pair.ext))
6484
            {
6485
6486
                const std::string osContentType(
6487
                    CPLSPrintf("Content-Type: %s", pair.mime));
6488
                poList = curl_slist_append(poList, osContentType.c_str());
6489
#ifdef DEBUG_VERBOSE
6490
                CPLDebug("HTTP", "Setting %s, based on lookup table.",
6491
                         osContentType.c_str());
6492
#endif
6493
                break;
6494
            }
6495
        }
6496
    }
6497
6498
    return poList;
6499
}
6500
6501
/************************************************************************/
6502
/*                VSICurlSetCreationHeadersFromOptions()                */
6503
/************************************************************************/
6504
6505
struct curl_slist *VSICurlSetCreationHeadersFromOptions(
6506
    struct curl_slist *headers, CSLConstList papszOptions, const char *pszPath)
6507
{
6508
    bool bContentTypeFound = false;
6509
    for (CSLConstList papszIter = papszOptions; papszIter && *papszIter;
6510
         ++papszIter)
6511
    {
6512
        char *pszKey = nullptr;
6513
        const char *pszValue = CPLParseNameValue(*papszIter, &pszKey);
6514
        if (pszKey && pszValue)
6515
        {
6516
            if (EQUAL(pszKey, "Content-Type"))
6517
            {
6518
                bContentTypeFound = true;
6519
            }
6520
            headers = curl_slist_append(headers,
6521
                                        CPLSPrintf("%s: %s", pszKey, pszValue));
6522
        }
6523
        CPLFree(pszKey);
6524
    }
6525
6526
    // If Content-type not found in papszOptions, try to set it from the
6527
    // filename exstension.
6528
    if (!bContentTypeFound)
6529
    {
6530
        headers = VSICurlSetContentTypeFromExt(headers, pszPath);
6531
    }
6532
6533
    return headers;
6534
}
6535
6536
#endif  // DOXYGEN_SKIP
6537
//! @endcond
6538
6539
/************************************************************************/
6540
/*                     VSIInstallCurlFileHandler()                      */
6541
/************************************************************************/
6542
6543
/*!
6544
 \brief Install /vsicurl/ HTTP/FTP file system handler (requires libcurl)
6545
6546
 \verbatim embed:rst
6547
 See :ref:`/vsicurl/ documentation <vsicurl>`
6548
 \endverbatim
6549
6550
 */
6551
void VSIInstallCurlFileHandler(void)
6552
{
6553
    auto poHandler = std::make_shared<cpl::VSICurlFilesystemHandler>();
6554
    for (const char *pszPrefix : VSICURL_PREFIXES)
6555
    {
6556
        VSIFileManager::InstallHandler(pszPrefix, poHandler);
6557
    }
6558
}
6559
6560
/************************************************************************/
6561
/*                         VSICurlClearCache()                          */
6562
/************************************************************************/
6563
6564
/**
6565
 * \brief Clean local cache associated with /vsicurl/ (and related file systems)
6566
 *
6567
 * /vsicurl (and related file systems like /vsis3/, /vsigs/, /vsiaz/, /vsioss/,
6568
 * /vsiswift/) cache a number of
6569
 * metadata and data for faster execution in read-only scenarios. But when the
6570
 * content on the server-side may change during the same process, those
6571
 * mechanisms can prevent opening new files, or give an outdated version of
6572
 * them.
6573
 *
6574
 */
6575
6576
void VSICurlClearCache(void)
6577
{
6578
    // FIXME ? Currently we have different filesystem instances for
6579
    // vsicurl/, /vsis3/, /vsigs/ . So each one has its own cache of regions.
6580
    // File properties cache are now shared
6581
    char **papszPrefix = VSIFileManager::GetPrefixes();
6582
    for (size_t i = 0; papszPrefix && papszPrefix[i]; ++i)
6583
    {
6584
        auto poFSHandler = dynamic_cast<cpl::VSICurlFilesystemHandlerBase *>(
6585
            VSIFileManager::GetHandler(papszPrefix[i]));
6586
6587
        if (poFSHandler)
6588
            poFSHandler->ClearCache();
6589
    }
6590
    CSLDestroy(papszPrefix);
6591
6592
    VSICurlStreamingClearCache();
6593
}
6594
6595
/************************************************************************/
6596
/*                      VSICurlPartialClearCache()                      */
6597
/************************************************************************/
6598
6599
/**
6600
 * \brief Clean local cache associated with /vsicurl/ (and related file systems)
6601
 * for a given filename (and its subfiles and subdirectories if it is a
6602
 * directory)
6603
 *
6604
 * /vsicurl (and related file systems like /vsis3/, /vsigs/, /vsiaz/, /vsioss/,
6605
 * /vsiswift/) cache a number of
6606
 * metadata and data for faster execution in read-only scenarios. But when the
6607
 * content on the server-side may change during the same process, those
6608
 * mechanisms can prevent opening new files, or give an outdated version of
6609
 * them.
6610
 *
6611
 * The filename prefix must start with the name of a known virtual file system
6612
 * (such as "/vsicurl/", "/vsis3/")
6613
 *
6614
 * VSICurlPartialClearCache("/vsis3/b") will clear all cached state for any file
6615
 * or directory starting with that prefix, so potentially "/vsis3/bucket",
6616
 * "/vsis3/basket/" or "/vsis3/basket/object".
6617
 *
6618
 * @param pszFilenamePrefix Filename prefix
6619
 */
6620
6621
void VSICurlPartialClearCache(const char *pszFilenamePrefix)
6622
{
6623
    auto poFSHandler = dynamic_cast<cpl::VSICurlFilesystemHandlerBase *>(
6624
        VSIFileManager::GetHandler(pszFilenamePrefix));
6625
6626
    if (poFSHandler)
6627
        poFSHandler->PartialClearCache(pszFilenamePrefix);
6628
}
6629
6630
/************************************************************************/
6631
/*                        VSINetworkStatsReset()                        */
6632
/************************************************************************/
6633
6634
/**
6635
 * \brief Clear network related statistics.
6636
 *
6637
 * The effect of the CPL_VSIL_NETWORK_STATS_ENABLED configuration option
6638
 * will also be reset. That is, that the next network access will check its
6639
 * value again.
6640
 *
6641
 * @since GDAL 3.2.0
6642
 */
6643
6644
void VSINetworkStatsReset(void)
6645
{
6646
    cpl::NetworkStatisticsLogger::Reset();
6647
}
6648
6649
/************************************************************************/
6650
/*                 VSINetworkStatsGetAsSerializedJSON()                 */
6651
/************************************************************************/
6652
6653
/**
6654
 * \brief Return network related statistics, as a JSON serialized object.
6655
 *
6656
 * Statistics collecting should be enabled with the
6657
 CPL_VSIL_NETWORK_STATS_ENABLED
6658
 * configuration option set to YES before any network activity starts
6659
 * (for efficiency, reading it is cached on first access, until
6660
 VSINetworkStatsReset() is called)
6661
 *
6662
 * Statistics can also be emitted on standard output at process termination if
6663
 * the CPL_VSIL_SHOW_NETWORK_STATS configuration option is set to YES.
6664
 *
6665
 * Example of output:
6666
 * \code{.js}
6667
 * {
6668
 *   "methods":{
6669
 *     "GET":{
6670
 *       "count":6,
6671
 *       "downloaded_bytes":40825
6672
 *     },
6673
 *     "PUT":{
6674
 *       "count":1,
6675
 *       "uploaded_bytes":35472
6676
 *     }
6677
 *   },
6678
 *   "handlers":{
6679
 *     "vsigs":{
6680
 *       "methods":{
6681
 *         "GET":{
6682
 *           "count":2,
6683
 *           "downloaded_bytes":446
6684
 *         },
6685
 *         "PUT":{
6686
 *           "count":1,
6687
 *           "uploaded_bytes":35472
6688
 *         }
6689
 *       },
6690
 *       "files":{
6691
 *         "\/vsigs\/spatialys\/byte.tif":{
6692
 *           "methods":{
6693
 *             "PUT":{
6694
 *               "count":1,
6695
 *               "uploaded_bytes":35472
6696
 *             }
6697
 *           },
6698
 *           "actions":{
6699
 *             "Write":{
6700
 *               "methods":{
6701
 *                 "PUT":{
6702
 *                   "count":1,
6703
 *                   "uploaded_bytes":35472
6704
 *                 }
6705
 *               }
6706
 *             }
6707
 *           }
6708
 *         }
6709
 *       },
6710
 *       "actions":{
6711
 *         "Stat":{
6712
 *           "methods":{
6713
 *             "GET":{
6714
 *               "count":2,
6715
 *               "downloaded_bytes":446
6716
 *             }
6717
 *           },
6718
 *           "files":{
6719
 *             "\/vsigs\/spatialys\/byte.tif\/":{
6720
 *               "methods":{
6721
 *                 "GET":{
6722
 *                   "count":1,
6723
 *                   "downloaded_bytes":181
6724
 *                 }
6725
 *               }
6726
 *             }
6727
 *           }
6728
 *         }
6729
 *       }
6730
 *     },
6731
 *     "vsis3":{
6732
 *          [...]
6733
 *     }
6734
 *   }
6735
 * }
6736
 * \endcode
6737
 *
6738
 * @param papszOptions Unused.
6739
 * @return a JSON serialized string to free with VSIFree(), or nullptr
6740
 * @since GDAL 3.2.0
6741
 */
6742
6743
char *VSINetworkStatsGetAsSerializedJSON(CPL_UNUSED char **papszOptions)
6744
{
6745
    return CPLStrdup(
6746
        cpl::NetworkStatisticsLogger::GetReportAsSerializedJSON().c_str());
6747
}
6748
6749
#endif /* HAVE_CURL */
6750
6751
#undef ENABLE_DEBUG