Coverage Report

Created: 2026-02-14 06:52

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/gdal/port/cpl_vsil_curl.cpp
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Project:  CPL - Common Portability Library
4
 * Purpose:  Implement VSI large file api for HTTP/FTP files
5
 * Author:   Even Rouault, even.rouault at spatialys.com
6
 *
7
 ******************************************************************************
8
 * Copyright (c) 2010-2018, Even Rouault <even.rouault at spatialys.com>
9
 *
10
 * SPDX-License-Identifier: MIT
11
 ****************************************************************************/
12
13
#include "cpl_port.h"
14
#include "cpl_vsil_curl_priv.h"
15
#include "cpl_vsil_curl_class.h"
16
17
#include <algorithm>
18
#include <array>
19
#include <limits>
20
#include <map>
21
#include <memory>
22
#include <set>
23
24
#include "cpl_aws.h"
25
#include "cpl_json.h"
26
#include "cpl_json_header.h"
27
#include "cpl_minixml.h"
28
#include "cpl_multiproc.h"
29
#include "cpl_string.h"
30
#include "cpl_time.h"
31
#include "cpl_vsi.h"
32
#include "cpl_vsi_virtual.h"
33
#include "cpl_http.h"
34
#include "cpl_mem_cache.h"
35
36
#ifndef S_IRUSR
37
#define S_IRUSR 00400
38
#define S_IWUSR 00200
39
#define S_IXUSR 00100
40
#define S_IRGRP 00040
41
#define S_IWGRP 00020
42
#define S_IXGRP 00010
43
#define S_IROTH 00004
44
#define S_IWOTH 00002
45
#define S_IXOTH 00001
46
#endif
47
48
#ifndef HAVE_CURL
49
50
void VSIInstallCurlFileHandler(void)
51
0
{
52
    // Not supported.
53
0
}
54
55
void VSICurlClearCache(void)
56
0
{
57
    // Not supported.
58
0
}
59
60
void VSICurlPartialClearCache(const char *)
61
0
{
62
    // Not supported.
63
0
}
64
65
void VSICurlAuthParametersChanged()
66
521
{
67
    // Not supported.
68
521
}
69
70
void VSINetworkStatsReset(void)
71
0
{
72
    // Not supported
73
0
}
74
75
char *VSINetworkStatsGetAsSerializedJSON(char ** /* papszOptions */)
76
0
{
77
    // Not supported
78
0
    return nullptr;
79
0
}
80
81
/************************************************************************/
82
/*                       VSICurlInstallReadCbk()                        */
83
/************************************************************************/
84
85
int VSICurlInstallReadCbk(VSILFILE * /* fp */,
86
                          VSICurlReadCbkFunc /* pfnReadCbk */,
87
                          void * /* pfnUserData */,
88
                          int /* bStopOnInterruptUntilUninstall */)
89
0
{
90
0
    return FALSE;
91
0
}
92
93
/************************************************************************/
94
/*                      VSICurlUninstallReadCbk()                       */
95
/************************************************************************/
96
97
int VSICurlUninstallReadCbk(VSILFILE * /* fp */)
98
0
{
99
0
    return FALSE;
100
0
}
101
102
#else
103
104
//! @cond Doxygen_Suppress
105
#ifndef DOXYGEN_SKIP
106
107
#define ENABLE_DEBUG 1
108
#define ENABLE_DEBUG_VERBOSE 0
109
110
#define unchecked_curl_easy_setopt(handle, opt, param)                         \
111
    CPL_IGNORE_RET_VAL(curl_easy_setopt(handle, opt, param))
112
113
/***********************************************************รน************/
114
/*                    VSICurlAuthParametersChanged()                    */
115
/************************************************************************/
116
117
static unsigned int gnGenerationAuthParameters = 0;
118
119
void VSICurlAuthParametersChanged()
120
{
121
    gnGenerationAuthParameters++;
122
}
123
124
// Do not access those variables directly !
125
// Use VSICURLGetDownloadChunkSize() and GetMaxRegions()
126
static int N_MAX_REGIONS_DO_NOT_USE_DIRECTLY = 0;
127
static int DOWNLOAD_CHUNK_SIZE_DO_NOT_USE_DIRECTLY = 0;
128
129
/************************************************************************/
130
/*                   VSICURLReadGlobalEnvVariables()                    */
131
/************************************************************************/
132
133
static void VSICURLReadGlobalEnvVariables()
134
{
135
    struct Initializer
136
    {
137
        Initializer()
138
        {
139
            constexpr int DOWNLOAD_CHUNK_SIZE_DEFAULT = 16384;
140
            const char *pszChunkSize =
141
                CPLGetConfigOption("CPL_VSIL_CURL_CHUNK_SIZE", nullptr);
142
            GIntBig nChunkSize = DOWNLOAD_CHUNK_SIZE_DEFAULT;
143
144
            if (pszChunkSize)
145
            {
146
                if (CPLParseMemorySize(pszChunkSize, &nChunkSize, nullptr) !=
147
                    CE_None)
148
                {
149
                    CPLError(
150
                        CE_Warning, CPLE_AppDefined,
151
                        "Could not parse value for CPL_VSIL_CURL_CHUNK_SIZE. "
152
                        "Using default value of %d instead.",
153
                        DOWNLOAD_CHUNK_SIZE_DEFAULT);
154
                }
155
            }
156
157
            constexpr int MIN_CHUNK_SIZE = 1024;
158
            constexpr int MAX_CHUNK_SIZE = 10 * 1024 * 1024;
159
            if (nChunkSize < MIN_CHUNK_SIZE || nChunkSize > MAX_CHUNK_SIZE)
160
            {
161
                nChunkSize = DOWNLOAD_CHUNK_SIZE_DEFAULT;
162
                CPLError(CE_Warning, CPLE_AppDefined,
163
                         "Invalid value for CPL_VSIL_CURL_CHUNK_SIZE. "
164
                         "Allowed range is [%d, %d]. "
165
                         "Using CPL_VSIL_CURL_CHUNK_SIZE=%d instead",
166
                         MIN_CHUNK_SIZE, MAX_CHUNK_SIZE,
167
                         DOWNLOAD_CHUNK_SIZE_DEFAULT);
168
            }
169
            DOWNLOAD_CHUNK_SIZE_DO_NOT_USE_DIRECTLY =
170
                static_cast<int>(nChunkSize);
171
172
            constexpr int N_MAX_REGIONS_DEFAULT = 1000;
173
            constexpr int CACHE_SIZE_DEFAULT =
174
                N_MAX_REGIONS_DEFAULT * DOWNLOAD_CHUNK_SIZE_DEFAULT;
175
176
            const char *pszCacheSize =
177
                CPLGetConfigOption("CPL_VSIL_CURL_CACHE_SIZE", nullptr);
178
            GIntBig nCacheSize = CACHE_SIZE_DEFAULT;
179
180
            if (pszCacheSize)
181
            {
182
                if (CPLParseMemorySize(pszCacheSize, &nCacheSize, nullptr) !=
183
                    CE_None)
184
                {
185
                    CPLError(
186
                        CE_Warning, CPLE_AppDefined,
187
                        "Could not parse value for CPL_VSIL_CURL_CACHE_SIZE. "
188
                        "Using default value of " CPL_FRMT_GIB " instead.",
189
                        nCacheSize);
190
                }
191
            }
192
193
            const auto nMaxRAM = CPLGetUsablePhysicalRAM();
194
            const auto nMinVal = DOWNLOAD_CHUNK_SIZE_DO_NOT_USE_DIRECTLY;
195
            auto nMaxVal = static_cast<GIntBig>(INT_MAX) *
196
                           DOWNLOAD_CHUNK_SIZE_DO_NOT_USE_DIRECTLY;
197
            if (nMaxRAM > 0 && nMaxVal > nMaxRAM)
198
                nMaxVal = nMaxRAM;
199
            if (nCacheSize < nMinVal || nCacheSize > nMaxVal)
200
            {
201
                nCacheSize = nCacheSize < nMinVal ? nMinVal : nMaxVal;
202
                CPLError(CE_Warning, CPLE_AppDefined,
203
                         "Invalid value for CPL_VSIL_CURL_CACHE_SIZE. "
204
                         "Allowed range is [%d, " CPL_FRMT_GIB "]. "
205
                         "Using CPL_VSIL_CURL_CACHE_SIZE=" CPL_FRMT_GIB
206
                         " instead",
207
                         nMinVal, nMaxVal, nCacheSize);
208
            }
209
            N_MAX_REGIONS_DO_NOT_USE_DIRECTLY = std::max(
210
                1, static_cast<int>(nCacheSize /
211
                                    DOWNLOAD_CHUNK_SIZE_DO_NOT_USE_DIRECTLY));
212
        }
213
    };
214
215
    static Initializer initializer;
216
}
217
218
/************************************************************************/
219
/*                    VSICURLGetDownloadChunkSize()                     */
220
/************************************************************************/
221
222
int VSICURLGetDownloadChunkSize()
223
{
224
    VSICURLReadGlobalEnvVariables();
225
    return DOWNLOAD_CHUNK_SIZE_DO_NOT_USE_DIRECTLY;
226
}
227
228
/************************************************************************/
229
/*                           GetMaxRegions()                            */
230
/************************************************************************/
231
232
static int GetMaxRegions()
233
{
234
    VSICURLReadGlobalEnvVariables();
235
    return N_MAX_REGIONS_DO_NOT_USE_DIRECTLY;
236
}
237
238
/************************************************************************/
239
/*          VSICurlFindStringSensitiveExceptEscapeSequences()           */
240
/************************************************************************/
241
242
static int
243
VSICurlFindStringSensitiveExceptEscapeSequences(CSLConstList papszList,
244
                                                const char *pszTarget)
245
246
{
247
    if (papszList == nullptr)
248
        return -1;
249
250
    for (int i = 0; papszList[i] != nullptr; i++)
251
    {
252
        const char *pszIter1 = papszList[i];
253
        const char *pszIter2 = pszTarget;
254
        char ch1 = '\0';
255
        char ch2 = '\0';
256
        /* The comparison is case-sensitive, except for escaped */
257
        /* sequences where letters of the hexadecimal sequence */
258
        /* can be uppercase or lowercase depending on the quoting algorithm */
259
        while (true)
260
        {
261
            ch1 = *pszIter1;
262
            ch2 = *pszIter2;
263
            if (ch1 == '\0' || ch2 == '\0')
264
                break;
265
            if (ch1 == '%' && ch2 == '%' && pszIter1[1] != '\0' &&
266
                pszIter1[2] != '\0' && pszIter2[1] != '\0' &&
267
                pszIter2[2] != '\0')
268
            {
269
                if (!EQUALN(pszIter1 + 1, pszIter2 + 1, 2))
270
                    break;
271
                pszIter1 += 2;
272
                pszIter2 += 2;
273
            }
274
            if (ch1 != ch2)
275
                break;
276
            pszIter1++;
277
            pszIter2++;
278
        }
279
        if (ch1 == ch2 && ch1 == '\0')
280
            return i;
281
    }
282
283
    return -1;
284
}
285
286
/************************************************************************/
287
/*                        VSICurlIsFileInList()                         */
288
/************************************************************************/
289
290
static int VSICurlIsFileInList(CSLConstList papszList, const char *pszTarget)
291
{
292
    int nRet =
293
        VSICurlFindStringSensitiveExceptEscapeSequences(papszList, pszTarget);
294
    if (nRet >= 0)
295
        return nRet;
296
297
    // If we didn't find anything, try to URL-escape the target filename.
298
    char *pszEscaped = CPLEscapeString(pszTarget, -1, CPLES_URL);
299
    if (strcmp(pszTarget, pszEscaped) != 0)
300
    {
301
        nRet = VSICurlFindStringSensitiveExceptEscapeSequences(papszList,
302
                                                               pszEscaped);
303
    }
304
    CPLFree(pszEscaped);
305
    return nRet;
306
}
307
308
/************************************************************************/
309
/*                     VSICurlGetURLFromFilename()                      */
310
/************************************************************************/
311
312
static std::string VSICurlGetURLFromFilename(
313
    const char *pszFilename, CPLHTTPRetryParameters *poRetryParameters,
314
    bool *pbUseHead, bool *pbUseRedirectURLIfNoQueryStringParams,
315
    bool *pbListDir, bool *pbEmptyDir, CPLStringList *paosHTTPOptions,
316
    bool *pbPlanetaryComputerURLSigning, char **ppszPlanetaryComputerCollection)
317
{
318
    if (ppszPlanetaryComputerCollection)
319
        *ppszPlanetaryComputerCollection = nullptr;
320
321
    if (!STARTS_WITH(pszFilename, "/vsicurl/") &&
322
        !STARTS_WITH(pszFilename, "/vsicurl?"))
323
        return pszFilename;
324
325
    if (pbPlanetaryComputerURLSigning)
326
    {
327
        // It may be more convenient sometimes to store Planetary Computer URL
328
        // signing as a per-path specific option rather than capturing it in
329
        // the filename with the &pc_url_signing=yes option.
330
        if (CPLTestBool(VSIGetPathSpecificOption(
331
                pszFilename, "VSICURL_PC_URL_SIGNING", "FALSE")))
332
        {
333
            *pbPlanetaryComputerURLSigning = true;
334
        }
335
    }
336
337
    pszFilename += strlen("/vsicurl/");
338
    if (!STARTS_WITH(pszFilename, "http://") &&
339
        !STARTS_WITH(pszFilename, "https://") &&
340
        !STARTS_WITH(pszFilename, "ftp://") &&
341
        !STARTS_WITH(pszFilename, "file://"))
342
    {
343
        if (*pszFilename == '?')
344
            pszFilename++;
345
        char **papszTokens = CSLTokenizeString2(pszFilename, "&", 0);
346
        for (int i = 0; papszTokens[i] != nullptr; i++)
347
        {
348
            char *pszUnescaped =
349
                CPLUnescapeString(papszTokens[i], nullptr, CPLES_URL);
350
            CPLFree(papszTokens[i]);
351
            papszTokens[i] = pszUnescaped;
352
        }
353
354
        std::string osURL;
355
        std::string osHeaders;
356
        for (int i = 0; papszTokens[i]; i++)
357
        {
358
            char *pszKey = nullptr;
359
            const char *pszValue = CPLParseNameValue(papszTokens[i], &pszKey);
360
            if (pszKey && pszValue)
361
            {
362
                if (EQUAL(pszKey, "max_retry"))
363
                {
364
                    if (poRetryParameters)
365
                        poRetryParameters->nMaxRetry = atoi(pszValue);
366
                }
367
                else if (EQUAL(pszKey, "retry_delay"))
368
                {
369
                    if (poRetryParameters)
370
                        poRetryParameters->dfInitialDelay = CPLAtof(pszValue);
371
                }
372
                else if (EQUAL(pszKey, "retry_codes"))
373
                {
374
                    if (poRetryParameters)
375
                        poRetryParameters->osRetryCodes = pszValue;
376
                }
377
                else if (EQUAL(pszKey, "use_head"))
378
                {
379
                    if (pbUseHead)
380
                        *pbUseHead = CPLTestBool(pszValue);
381
                }
382
                else if (EQUAL(pszKey,
383
                               "use_redirect_url_if_no_query_string_params"))
384
                {
385
                    /* Undocumented. Used by PLScenes driver */
386
                    if (pbUseRedirectURLIfNoQueryStringParams)
387
                        *pbUseRedirectURLIfNoQueryStringParams =
388
                            CPLTestBool(pszValue);
389
                }
390
                else if (EQUAL(pszKey, "list_dir"))
391
                {
392
                    if (pbListDir)
393
                        *pbListDir = CPLTestBool(pszValue);
394
                }
395
                else if (EQUAL(pszKey, "empty_dir"))
396
                {
397
                    if (pbEmptyDir)
398
                        *pbEmptyDir = CPLTestBool(pszValue);
399
                }
400
                else if (EQUAL(pszKey, "useragent") ||
401
                         EQUAL(pszKey, "referer") || EQUAL(pszKey, "cookie") ||
402
                         EQUAL(pszKey, "header_file") ||
403
                         EQUAL(pszKey, "unsafessl") ||
404
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
405
                         EQUAL(pszKey, "timeout") ||
406
                         EQUAL(pszKey, "connecttimeout") ||
407
#endif
408
                         EQUAL(pszKey, "low_speed_time") ||
409
                         EQUAL(pszKey, "low_speed_limit") ||
410
                         EQUAL(pszKey, "proxy") || EQUAL(pszKey, "proxyauth") ||
411
                         EQUAL(pszKey, "proxyuserpwd"))
412
                {
413
                    // Above names are the ones supported by
414
                    // CPLHTTPSetOptions()
415
                    if (paosHTTPOptions)
416
                    {
417
                        paosHTTPOptions->SetNameValue(pszKey, pszValue);
418
                    }
419
                }
420
                else if (EQUAL(pszKey, "url"))
421
                {
422
                    osURL = pszValue;
423
                }
424
                else if (EQUAL(pszKey, "pc_url_signing"))
425
                {
426
                    if (pbPlanetaryComputerURLSigning)
427
                        *pbPlanetaryComputerURLSigning = CPLTestBool(pszValue);
428
                }
429
                else if (EQUAL(pszKey, "pc_collection"))
430
                {
431
                    if (ppszPlanetaryComputerCollection)
432
                    {
433
                        CPLFree(*ppszPlanetaryComputerCollection);
434
                        *ppszPlanetaryComputerCollection = CPLStrdup(pszValue);
435
                    }
436
                }
437
                else if (STARTS_WITH(pszKey, "header."))
438
                {
439
                    osHeaders += (pszKey + strlen("header."));
440
                    osHeaders += ':';
441
                    osHeaders += pszValue;
442
                    osHeaders += "\r\n";
443
                }
444
                else
445
                {
446
                    CPLError(CE_Warning, CPLE_NotSupported,
447
                             "Unsupported option: %s", pszKey);
448
                }
449
            }
450
            CPLFree(pszKey);
451
        }
452
453
        if (paosHTTPOptions && !osHeaders.empty())
454
            paosHTTPOptions->SetNameValue("HEADERS", osHeaders.c_str());
455
456
        CSLDestroy(papszTokens);
457
        if (osURL.empty())
458
        {
459
            CPLError(CE_Failure, CPLE_IllegalArg, "Missing url parameter");
460
            return pszFilename;
461
        }
462
463
        return osURL;
464
    }
465
466
    return pszFilename;
467
}
468
469
namespace cpl
470
{
471
472
/************************************************************************/
473
/*                           VSICurlHandle()                            */
474
/************************************************************************/
475
476
VSICurlHandle::VSICurlHandle(VSICurlFilesystemHandlerBase *poFSIn,
477
                             const char *pszFilename, const char *pszURLIn)
478
    : poFS(poFSIn), m_osFilename(pszFilename),
479
      m_aosHTTPOptions(CPLHTTPGetOptionsFromEnv(pszFilename)),
480
      m_oRetryParameters(m_aosHTTPOptions),
481
      m_bUseHead(
482
          CPLTestBool(CPLGetConfigOption("CPL_VSIL_CURL_USE_HEAD", "YES")))
483
{
484
    if (pszURLIn)
485
    {
486
        m_pszURL = CPLStrdup(pszURLIn);
487
    }
488
    else
489
    {
490
        char *pszPCCollection = nullptr;
491
        m_pszURL =
492
            CPLStrdup(VSICurlGetURLFromFilename(
493
                          pszFilename, &m_oRetryParameters, &m_bUseHead,
494
                          &m_bUseRedirectURLIfNoQueryStringParams, nullptr,
495
                          nullptr, &m_aosHTTPOptions,
496
                          &m_bPlanetaryComputerURLSigning, &pszPCCollection)
497
                          .c_str());
498
        if (pszPCCollection)
499
            m_osPlanetaryComputerCollection = pszPCCollection;
500
        CPLFree(pszPCCollection);
501
    }
502
503
    m_bCached = poFSIn->AllowCachedDataFor(pszFilename);
504
    poFS->GetCachedFileProp(m_pszURL, oFileProp);
505
}
506
507
/************************************************************************/
508
/*                           ~VSICurlHandle()                           */
509
/************************************************************************/
510
511
VSICurlHandle::~VSICurlHandle()
512
{
513
    if (m_oThreadAdviseRead.joinable())
514
    {
515
        m_oThreadAdviseRead.join();
516
    }
517
    if (m_hCurlMultiHandleForAdviseRead)
518
    {
519
        curl_multi_cleanup(m_hCurlMultiHandleForAdviseRead);
520
    }
521
522
    if (!m_bCached)
523
    {
524
        poFS->InvalidateCachedData(m_pszURL);
525
        poFS->InvalidateDirContent(CPLGetDirnameSafe(m_osFilename.c_str()));
526
    }
527
    CPLFree(m_pszURL);
528
}
529
530
/************************************************************************/
531
/*                               SetURL()                               */
532
/************************************************************************/
533
534
void VSICurlHandle::SetURL(const char *pszURLIn)
535
{
536
    CPLFree(m_pszURL);
537
    m_pszURL = CPLStrdup(pszURLIn);
538
}
539
540
/************************************************************************/
541
/*                           InstallReadCbk()                           */
542
/************************************************************************/
543
544
int VSICurlHandle::InstallReadCbk(VSICurlReadCbkFunc pfnReadCbkIn,
545
                                  void *pfnUserDataIn,
546
                                  int bStopOnInterruptUntilUninstallIn)
547
{
548
    if (pfnReadCbk != nullptr)
549
        return FALSE;
550
551
    pfnReadCbk = pfnReadCbkIn;
552
    pReadCbkUserData = pfnUserDataIn;
553
    bStopOnInterruptUntilUninstall =
554
        CPL_TO_BOOL(bStopOnInterruptUntilUninstallIn);
555
    bInterrupted = false;
556
    return TRUE;
557
}
558
559
/************************************************************************/
560
/*                          UninstallReadCbk()                          */
561
/************************************************************************/
562
563
int VSICurlHandle::UninstallReadCbk()
564
{
565
    if (pfnReadCbk == nullptr)
566
        return FALSE;
567
568
    pfnReadCbk = nullptr;
569
    pReadCbkUserData = nullptr;
570
    bStopOnInterruptUntilUninstall = false;
571
    bInterrupted = false;
572
    return TRUE;
573
}
574
575
/************************************************************************/
576
/*                                Seek()                                */
577
/************************************************************************/
578
579
int VSICurlHandle::Seek(vsi_l_offset nOffset, int nWhence)
580
{
581
    if (nWhence == SEEK_SET)
582
    {
583
        curOffset = nOffset;
584
    }
585
    else if (nWhence == SEEK_CUR)
586
    {
587
        curOffset = curOffset + nOffset;
588
    }
589
    else
590
    {
591
        curOffset = GetFileSize(false) + nOffset;
592
    }
593
    bEOF = false;
594
    return 0;
595
}
596
597
}  // namespace cpl
598
599
/************************************************************************/
600
/*               VSICurlGetTimeStampFromRFC822DateTime()                */
601
/************************************************************************/
602
603
static GIntBig VSICurlGetTimeStampFromRFC822DateTime(const char *pszDT)
604
{
605
    // Sun, 03 Apr 2016 12:07:27 GMT
606
    if (strlen(pszDT) >= 5 && pszDT[3] == ',' && pszDT[4] == ' ')
607
        pszDT += 5;
608
    int nDay = 0;
609
    int nYear = 0;
610
    int nHour = 0;
611
    int nMinute = 0;
612
    int nSecond = 0;
613
    char szMonth[4] = {};
614
    szMonth[3] = 0;
615
    if (sscanf(pszDT, "%02d %03s %04d %02d:%02d:%02d GMT", &nDay, szMonth,
616
               &nYear, &nHour, &nMinute, &nSecond) == 6)
617
    {
618
        static const char *const aszMonthStr[] = {"Jan", "Feb", "Mar", "Apr",
619
                                                  "May", "Jun", "Jul", "Aug",
620
                                                  "Sep", "Oct", "Nov", "Dec"};
621
622
        int nMonthIdx0 = -1;
623
        for (int i = 0; i < 12; i++)
624
        {
625
            if (EQUAL(szMonth, aszMonthStr[i]))
626
            {
627
                nMonthIdx0 = i;
628
                break;
629
            }
630
        }
631
        if (nMonthIdx0 >= 0)
632
        {
633
            struct tm brokendowntime;
634
            brokendowntime.tm_year = nYear - 1900;
635
            brokendowntime.tm_mon = nMonthIdx0;
636
            brokendowntime.tm_mday = nDay;
637
            brokendowntime.tm_hour = nHour;
638
            brokendowntime.tm_min = nMinute;
639
            brokendowntime.tm_sec = nSecond;
640
            return CPLYMDHMSToUnixTime(&brokendowntime);
641
        }
642
    }
643
    return 0;
644
}
645
646
/************************************************************************/
647
/*                     VSICURLInitWriteFuncStruct()                     */
648
/************************************************************************/
649
650
void VSICURLInitWriteFuncStruct(cpl::WriteFuncStruct *psStruct, VSILFILE *fp,
651
                                VSICurlReadCbkFunc pfnReadCbk,
652
                                void *pReadCbkUserData)
653
{
654
    psStruct->pBuffer = nullptr;
655
    psStruct->nSize = 0;
656
    psStruct->bIsHTTP = false;
657
    psStruct->bMultiRange = false;
658
    psStruct->nStartOffset = 0;
659
    psStruct->nEndOffset = 0;
660
    psStruct->nHTTPCode = 0;
661
    psStruct->nFirstHTTPCode = 0;
662
    psStruct->nContentLength = 0;
663
    psStruct->bFoundContentRange = false;
664
    psStruct->bError = false;
665
    psStruct->bDetectRangeDownloadingError = true;
666
    psStruct->nTimestampDate = 0;
667
668
    psStruct->fp = fp;
669
    psStruct->pfnReadCbk = pfnReadCbk;
670
    psStruct->pReadCbkUserData = pReadCbkUserData;
671
    psStruct->bInterrupted = false;
672
}
673
674
/************************************************************************/
675
/*                       VSICurlHandleWriteFunc()                       */
676
/************************************************************************/
677
678
size_t VSICurlHandleWriteFunc(void *buffer, size_t count, size_t nmemb,
679
                              void *req)
680
{
681
    cpl::WriteFuncStruct *psStruct = static_cast<cpl::WriteFuncStruct *>(req);
682
    const size_t nSize = count * nmemb;
683
684
    if (psStruct->bInterrupted)
685
    {
686
        return 0;
687
    }
688
689
    char *pNewBuffer = static_cast<char *>(
690
        VSIRealloc(psStruct->pBuffer, psStruct->nSize + nSize + 1));
691
    if (pNewBuffer)
692
    {
693
        psStruct->pBuffer = pNewBuffer;
694
        memcpy(psStruct->pBuffer + psStruct->nSize, buffer, nSize);
695
        psStruct->pBuffer[psStruct->nSize + nSize] = '\0';
696
        if (psStruct->bIsHTTP)
697
        {
698
            char *pszLine = psStruct->pBuffer + psStruct->nSize;
699
            if (STARTS_WITH_CI(pszLine, "HTTP/"))
700
            {
701
                char *pszSpace = strchr(pszLine, ' ');
702
                if (pszSpace)
703
                {
704
                    const int nHTTPCode = atoi(pszSpace + 1);
705
                    if (psStruct->nFirstHTTPCode == 0)
706
                        psStruct->nFirstHTTPCode = nHTTPCode;
707
                    psStruct->nHTTPCode = nHTTPCode;
708
                }
709
            }
710
            else if (STARTS_WITH_CI(pszLine, "Content-Length: "))
711
            {
712
                psStruct->nContentLength = CPLScanUIntBig(
713
                    pszLine + 16, static_cast<int>(strlen(pszLine + 16)));
714
            }
715
            else if (STARTS_WITH_CI(pszLine, "Content-Range: "))
716
            {
717
                psStruct->bFoundContentRange = true;
718
            }
719
            else if (STARTS_WITH_CI(pszLine, "Date: "))
720
            {
721
                CPLString osDate = pszLine + strlen("Date: ");
722
                size_t nSizeLine = osDate.size();
723
                while (nSizeLine && (osDate[nSizeLine - 1] == '\r' ||
724
                                     osDate[nSizeLine - 1] == '\n'))
725
                {
726
                    osDate.resize(nSizeLine - 1);
727
                    nSizeLine--;
728
                }
729
                osDate.Trim();
730
731
                GIntBig nTimestampDate =
732
                    VSICurlGetTimeStampFromRFC822DateTime(osDate.c_str());
733
#if DEBUG_VERBOSE
734
                CPLDebug("VSICURL", "Timestamp = " CPL_FRMT_GIB,
735
                         nTimestampDate);
736
#endif
737
                psStruct->nTimestampDate = nTimestampDate;
738
            }
739
            /*if( nSize > 2 && pszLine[nSize - 2] == '\r' &&
740
                  pszLine[nSize - 1] == '\n' )
741
            {
742
                pszLine[nSize - 2] = 0;
743
                CPLDebug("VSICURL", "%s", pszLine);
744
                pszLine[nSize - 2] = '\r';
745
            }*/
746
747
            if (pszLine[0] == '\r' && pszLine[1] == '\n')
748
            {
749
                // Detect servers that don't support range downloading.
750
                if (psStruct->nHTTPCode == 200 &&
751
                    psStruct->bDetectRangeDownloadingError &&
752
                    !psStruct->bMultiRange && !psStruct->bFoundContentRange &&
753
                    (psStruct->nStartOffset != 0 ||
754
                     psStruct->nContentLength >
755
                         10 * (psStruct->nEndOffset - psStruct->nStartOffset +
756
                               1)))
757
                {
758
                    CPLError(CE_Failure, CPLE_AppDefined,
759
                             "Range downloading not supported by this "
760
                             "server!");
761
                    psStruct->bError = true;
762
                    return 0;
763
                }
764
            }
765
        }
766
        else
767
        {
768
            if (psStruct->pfnReadCbk)
769
            {
770
                if (!psStruct->pfnReadCbk(psStruct->fp, buffer, nSize,
771
                                          psStruct->pReadCbkUserData))
772
                {
773
                    psStruct->bInterrupted = true;
774
                    return 0;
775
                }
776
            }
777
        }
778
        psStruct->nSize += nSize;
779
        return nmemb;
780
    }
781
    else
782
    {
783
        return 0;
784
    }
785
}
786
787
/************************************************************************/
788
/*                      VSICurlIsS3LikeSignedURL()                      */
789
/************************************************************************/
790
791
static bool VSICurlIsS3LikeSignedURL(const char *pszURL)
792
{
793
    return ((strstr(pszURL, ".s3.amazonaws.com/") != nullptr ||
794
             strstr(pszURL, ".s3.amazonaws.com:") != nullptr ||
795
             strstr(pszURL, ".storage.googleapis.com/") != nullptr ||
796
             strstr(pszURL, ".storage.googleapis.com:") != nullptr ||
797
             strstr(pszURL, ".cloudfront.net/") != nullptr ||
798
             strstr(pszURL, ".cloudfront.net:") != nullptr) &&
799
            (strstr(pszURL, "&Signature=") != nullptr ||
800
             strstr(pszURL, "?Signature=") != nullptr)) ||
801
           strstr(pszURL, "&X-Amz-Signature=") != nullptr ||
802
           strstr(pszURL, "?X-Amz-Signature=") != nullptr;
803
}
804
805
/************************************************************************/
806
/*                VSICurlGetExpiresFromS3LikeSignedURL()                */
807
/************************************************************************/
808
809
static GIntBig VSICurlGetExpiresFromS3LikeSignedURL(const char *pszURL)
810
{
811
    const auto GetParamValue = [pszURL](const char *pszKey) -> const char *
812
    {
813
        for (const char *pszPrefix : {"&", "?"})
814
        {
815
            std::string osNeedle(pszPrefix);
816
            osNeedle += pszKey;
817
            osNeedle += '=';
818
            const char *pszStr = strstr(pszURL, osNeedle.c_str());
819
            if (pszStr)
820
                return pszStr + osNeedle.size();
821
        }
822
        return nullptr;
823
    };
824
825
    {
826
        // Expires= is a Unix timestamp
827
        const char *pszExpires = GetParamValue("Expires");
828
        if (pszExpires != nullptr)
829
            return CPLAtoGIntBig(pszExpires);
830
    }
831
832
    // X-Amz-Expires= is a delay, to be combined with X-Amz-Date=
833
    const char *pszAmzExpires = GetParamValue("X-Amz-Expires");
834
    if (pszAmzExpires == nullptr)
835
        return 0;
836
    const int nDelay = atoi(pszAmzExpires);
837
838
    const char *pszAmzDate = GetParamValue("X-Amz-Date");
839
    if (pszAmzDate == nullptr)
840
        return 0;
841
    // pszAmzDate should be YYYYMMDDTHHMMSSZ
842
    if (strlen(pszAmzDate) < strlen("YYYYMMDDTHHMMSSZ"))
843
        return 0;
844
    if (pszAmzDate[strlen("YYYYMMDDTHHMMSSZ") - 1] != 'Z')
845
        return 0;
846
    struct tm brokendowntime;
847
    brokendowntime.tm_year =
848
        atoi(std::string(pszAmzDate).substr(0, 4).c_str()) - 1900;
849
    brokendowntime.tm_mon =
850
        atoi(std::string(pszAmzDate).substr(4, 2).c_str()) - 1;
851
    brokendowntime.tm_mday = atoi(std::string(pszAmzDate).substr(6, 2).c_str());
852
    brokendowntime.tm_hour = atoi(std::string(pszAmzDate).substr(9, 2).c_str());
853
    brokendowntime.tm_min = atoi(std::string(pszAmzDate).substr(11, 2).c_str());
854
    brokendowntime.tm_sec = atoi(std::string(pszAmzDate).substr(13, 2).c_str());
855
    return CPLYMDHMSToUnixTime(&brokendowntime) + nDelay;
856
}
857
858
/************************************************************************/
859
/*                        VSICURLMultiPerform()                         */
860
/************************************************************************/
861
862
void VSICURLMultiPerform(CURLM *hCurlMultiHandle, CURL *hEasyHandle,
863
                         std::atomic<bool> *pbInterrupt)
864
{
865
    int repeats = 0;
866
867
    if (hEasyHandle)
868
        curl_multi_add_handle(hCurlMultiHandle, hEasyHandle);
869
870
    void *old_handler = CPLHTTPIgnoreSigPipe();
871
    while (true)
872
    {
873
        int still_running;
874
        while (curl_multi_perform(hCurlMultiHandle, &still_running) ==
875
               CURLM_CALL_MULTI_PERFORM)
876
        {
877
            // loop
878
        }
879
        if (!still_running)
880
        {
881
            break;
882
        }
883
884
#ifdef undef
885
        CURLMsg *msg;
886
        do
887
        {
888
            int msgq = 0;
889
            msg = curl_multi_info_read(hCurlMultiHandle, &msgq);
890
            if (msg && (msg->msg == CURLMSG_DONE))
891
            {
892
                CURL *e = msg->easy_handle;
893
            }
894
        } while (msg);
895
#endif
896
897
        CPLMultiPerformWait(hCurlMultiHandle, repeats);
898
899
        if (pbInterrupt && *pbInterrupt)
900
            break;
901
    }
902
    CPLHTTPRestoreSigPipeHandler(old_handler);
903
904
    if (hEasyHandle)
905
        curl_multi_remove_handle(hCurlMultiHandle, hEasyHandle);
906
}
907
908
/************************************************************************/
909
/*                       VSICurlDummyWriteFunc()                        */
910
/************************************************************************/
911
912
static size_t VSICurlDummyWriteFunc(void *, size_t, size_t, void *)
913
{
914
    return 0;
915
}
916
917
/************************************************************************/
918
/*                VSICURLResetHeaderAndWriterFunctions()                */
919
/************************************************************************/
920
921
void VSICURLResetHeaderAndWriterFunctions(CURL *hCurlHandle)
922
{
923
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION,
924
                               VSICurlDummyWriteFunc);
925
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION,
926
                               VSICurlDummyWriteFunc);
927
}
928
929
/************************************************************************/
930
/*                         Iso8601ToUnixTime()                          */
931
/************************************************************************/
932
933
static bool Iso8601ToUnixTime(const char *pszDT, GIntBig *pnUnixTime)
934
{
935
    int nYear;
936
    int nMonth;
937
    int nDay;
938
    int nHour;
939
    int nMinute;
940
    int nSecond;
941
    if (sscanf(pszDT, "%04d-%02d-%02dT%02d:%02d:%02d", &nYear, &nMonth, &nDay,
942
               &nHour, &nMinute, &nSecond) == 6)
943
    {
944
        struct tm brokendowntime;
945
        brokendowntime.tm_year = nYear - 1900;
946
        brokendowntime.tm_mon = nMonth - 1;
947
        brokendowntime.tm_mday = nDay;
948
        brokendowntime.tm_hour = nHour;
949
        brokendowntime.tm_min = nMinute;
950
        brokendowntime.tm_sec = nSecond;
951
        *pnUnixTime = CPLYMDHMSToUnixTime(&brokendowntime);
952
        return true;
953
    }
954
    return false;
955
}
956
957
namespace cpl
958
{
959
960
/************************************************************************/
961
/*                   ManagePlanetaryComputerSigning()                   */
962
/************************************************************************/
963
964
void VSICurlHandle::ManagePlanetaryComputerSigning() const
965
{
966
    // Take global lock
967
    static std::mutex goMutex;
968
    std::lock_guard<std::mutex> oLock(goMutex);
969
970
    struct PCSigningInfo
971
    {
972
        std::string osQueryString{};
973
        GIntBig nExpireTimestamp = 0;
974
    };
975
976
    PCSigningInfo sSigningInfo;
977
    constexpr int knExpirationDelayMargin = 60;
978
979
    if (!m_osPlanetaryComputerCollection.empty())
980
    {
981
        // key is the name of a collection
982
        static lru11::Cache<std::string, PCSigningInfo> goCacheCollection{1024};
983
984
        if (goCacheCollection.tryGet(m_osPlanetaryComputerCollection,
985
                                     sSigningInfo) &&
986
            time(nullptr) + knExpirationDelayMargin <=
987
                sSigningInfo.nExpireTimestamp)
988
        {
989
            m_osQueryString = sSigningInfo.osQueryString;
990
        }
991
        else
992
        {
993
            const auto psResult =
994
                CPLHTTPFetch((std::string(CPLGetConfigOption(
995
                                  "VSICURL_PC_SAS_TOKEN_URL",
996
                                  "https://planetarycomputer.microsoft.com/api/"
997
                                  "sas/v1/token/")) +
998
                              m_osPlanetaryComputerCollection)
999
                                 .c_str(),
1000
                             nullptr);
1001
            if (psResult)
1002
            {
1003
                const auto aosKeyVals = CPLParseKeyValueJson(
1004
                    reinterpret_cast<const char *>(psResult->pabyData));
1005
                const char *pszToken = aosKeyVals.FetchNameValue("token");
1006
                if (pszToken)
1007
                {
1008
                    m_osQueryString = '?';
1009
                    m_osQueryString += pszToken;
1010
1011
                    sSigningInfo.osQueryString = m_osQueryString;
1012
                    sSigningInfo.nExpireTimestamp = 0;
1013
                    const char *pszExpiry =
1014
                        aosKeyVals.FetchNameValue("msft:expiry");
1015
                    if (pszExpiry)
1016
                    {
1017
                        Iso8601ToUnixTime(pszExpiry,
1018
                                          &sSigningInfo.nExpireTimestamp);
1019
                    }
1020
                    goCacheCollection.insert(m_osPlanetaryComputerCollection,
1021
                                             sSigningInfo);
1022
1023
                    CPLDebug("VSICURL", "Got token from Planetary Computer: %s",
1024
                             m_osQueryString.c_str());
1025
                }
1026
                CPLHTTPDestroyResult(psResult);
1027
            }
1028
        }
1029
    }
1030
    else
1031
    {
1032
        // key is a URL
1033
        static lru11::Cache<std::string, PCSigningInfo> goCacheURL{1024};
1034
1035
        if (goCacheURL.tryGet(m_pszURL, sSigningInfo) &&
1036
            time(nullptr) + knExpirationDelayMargin <=
1037
                sSigningInfo.nExpireTimestamp)
1038
        {
1039
            m_osQueryString = sSigningInfo.osQueryString;
1040
        }
1041
        else
1042
        {
1043
            const auto psResult =
1044
                CPLHTTPFetch((std::string(CPLGetConfigOption(
1045
                                  "VSICURL_PC_SAS_SIGN_HREF_URL",
1046
                                  "https://planetarycomputer.microsoft.com/api/"
1047
                                  "sas/v1/sign?href=")) +
1048
                              m_pszURL)
1049
                                 .c_str(),
1050
                             nullptr);
1051
            if (psResult)
1052
            {
1053
                const auto aosKeyVals = CPLParseKeyValueJson(
1054
                    reinterpret_cast<const char *>(psResult->pabyData));
1055
                const char *pszHref = aosKeyVals.FetchNameValue("href");
1056
                if (pszHref && STARTS_WITH(pszHref, m_pszURL))
1057
                {
1058
                    m_osQueryString = pszHref + strlen(m_pszURL);
1059
1060
                    sSigningInfo.osQueryString = m_osQueryString;
1061
                    sSigningInfo.nExpireTimestamp = 0;
1062
                    const char *pszExpiry =
1063
                        aosKeyVals.FetchNameValue("msft:expiry");
1064
                    if (pszExpiry)
1065
                    {
1066
                        Iso8601ToUnixTime(pszExpiry,
1067
                                          &sSigningInfo.nExpireTimestamp);
1068
                    }
1069
                    goCacheURL.insert(m_pszURL, sSigningInfo);
1070
1071
                    CPLDebug("VSICURL",
1072
                             "Got signature from Planetary Computer: %s",
1073
                             m_osQueryString.c_str());
1074
                }
1075
                CPLHTTPDestroyResult(psResult);
1076
            }
1077
        }
1078
    }
1079
}
1080
1081
/************************************************************************/
1082
/*                         UpdateQueryString()                          */
1083
/************************************************************************/
1084
1085
void VSICurlHandle::UpdateQueryString() const
1086
{
1087
    if (m_bPlanetaryComputerURLSigning)
1088
    {
1089
        ManagePlanetaryComputerSigning();
1090
    }
1091
    else
1092
    {
1093
        const char *pszQueryString = VSIGetPathSpecificOption(
1094
            m_osFilename.c_str(), "VSICURL_QUERY_STRING", nullptr);
1095
        if (pszQueryString)
1096
        {
1097
            if (m_osFilename.back() == '?')
1098
            {
1099
                if (pszQueryString[0] == '?')
1100
                    m_osQueryString = pszQueryString + 1;
1101
                else
1102
                    m_osQueryString = pszQueryString;
1103
            }
1104
            else
1105
            {
1106
                if (pszQueryString[0] == '?')
1107
                    m_osQueryString = pszQueryString;
1108
                else
1109
                {
1110
                    m_osQueryString = "?";
1111
                    m_osQueryString.append(pszQueryString);
1112
                }
1113
            }
1114
        }
1115
    }
1116
}
1117
1118
/************************************************************************/
1119
/*                        GetFileSizeOrHeaders()                        */
1120
/************************************************************************/
1121
1122
vsi_l_offset VSICurlHandle::GetFileSizeOrHeaders(bool bSetError,
1123
                                                 bool bGetHeaders)
1124
{
1125
    if (oFileProp.bHasComputedFileSize && !bGetHeaders)
1126
        return oFileProp.fileSize;
1127
1128
    NetworkStatisticsFileSystem oContextFS(poFS->GetFSPrefix().c_str());
1129
    NetworkStatisticsFile oContextFile(m_osFilename.c_str());
1130
    NetworkStatisticsAction oContextAction("GetFileSize");
1131
1132
    oFileProp.bHasComputedFileSize = true;
1133
1134
    CURLM *hCurlMultiHandle = poFS->GetCurlMultiHandleFor(m_pszURL);
1135
1136
    UpdateQueryString();
1137
1138
    std::string osURL(m_pszURL + m_osQueryString);
1139
    int nTryCount = 0;
1140
    bool bRetryWithGet = false;
1141
    bool bS3LikeRedirect = false;
1142
    CPLHTTPRetryContext oRetryContext(m_oRetryParameters);
1143
1144
retry:
1145
    ++nTryCount;
1146
    CURL *hCurlHandle = curl_easy_init();
1147
1148
    struct curl_slist *headers = nullptr;
1149
    if (bS3LikeRedirect)
1150
    {
1151
        // Do not propagate authentication sent to the original URL to a S3-like
1152
        // redirect.
1153
        CPLStringList aosHTTPOptions{};
1154
        for (const auto &pszOption : m_aosHTTPOptions)
1155
        {
1156
            if (STARTS_WITH_CI(pszOption, "HTTPAUTH") ||
1157
                STARTS_WITH_CI(pszOption, "HTTP_BEARER"))
1158
                continue;
1159
            aosHTTPOptions.AddString(pszOption);
1160
        }
1161
        headers = VSICurlSetOptions(hCurlHandle, osURL.c_str(),
1162
                                    aosHTTPOptions.List());
1163
    }
1164
    else
1165
    {
1166
        headers = VSICurlSetOptions(hCurlHandle, osURL.c_str(),
1167
                                    m_aosHTTPOptions.List());
1168
    }
1169
1170
    WriteFuncStruct sWriteFuncHeaderData;
1171
    VSICURLInitWriteFuncStruct(&sWriteFuncHeaderData, nullptr, nullptr,
1172
                               nullptr);
1173
    sWriteFuncHeaderData.bDetectRangeDownloadingError = false;
1174
    sWriteFuncHeaderData.bIsHTTP = STARTS_WITH(osURL.c_str(), "http");
1175
1176
    WriteFuncStruct sWriteFuncData;
1177
    VSICURLInitWriteFuncStruct(&sWriteFuncData, nullptr, nullptr, nullptr);
1178
1179
    std::string osVerb;
1180
    std::string osRange;  // leave in this scope !
1181
    int nRoundedBufSize = 0;
1182
    const int knDOWNLOAD_CHUNK_SIZE = VSICURLGetDownloadChunkSize();
1183
    if (UseLimitRangeGetInsteadOfHead())
1184
    {
1185
        osVerb = "GET";
1186
        const int nBufSize = std::max(
1187
            1024, std::min(10 * 1024 * 1024,
1188
                           atoi(CPLGetConfigOption(
1189
                               "GDAL_INGESTED_BYTES_AT_OPEN", "1024"))));
1190
        nRoundedBufSize = cpl::div_round_up(nBufSize, knDOWNLOAD_CHUNK_SIZE) *
1191
                          knDOWNLOAD_CHUNK_SIZE;
1192
1193
        // so it gets included in Azure signature
1194
        osRange = CPLSPrintf("Range: bytes=0-%d", nRoundedBufSize - 1);
1195
        headers = curl_slist_append(headers, osRange.c_str());
1196
    }
1197
    // HACK for mbtiles driver: http://a.tiles.mapbox.com/v3/ doesn't accept
1198
    // HEAD, as it is a redirect to AWS S3 signed URL, but those are only valid
1199
    // for a given type of HTTP request, and thus GET. This is valid for any
1200
    // signed URL for AWS S3.
1201
    else if (bRetryWithGet ||
1202
             strstr(osURL.c_str(), ".tiles.mapbox.com/") != nullptr ||
1203
             VSICurlIsS3LikeSignedURL(osURL.c_str()) || !m_bUseHead)
1204
    {
1205
        sWriteFuncData.bInterrupted = true;
1206
        osVerb = "GET";
1207
    }
1208
    else
1209
    {
1210
        unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_NOBODY, 1);
1211
        unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HTTPGET, 0);
1212
        unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADER, 1);
1213
        osVerb = "HEAD";
1214
    }
1215
1216
    if (!AllowAutomaticRedirection())
1217
        unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_FOLLOWLOCATION, 0);
1218
1219
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA,
1220
                               &sWriteFuncHeaderData);
1221
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION,
1222
                               VSICurlHandleWriteFunc);
1223
1224
    // Bug with older curl versions (<=7.16.4) and FTP.
1225
    // See http://curl.haxx.se/mail/lib-2007-08/0312.html
1226
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData);
1227
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION,
1228
                               VSICurlHandleWriteFunc);
1229
1230
    char szCurlErrBuf[CURL_ERROR_SIZE + 1] = {};
1231
    szCurlErrBuf[0] = '\0';
1232
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, szCurlErrBuf);
1233
1234
    headers = GetCurlHeaders(osVerb, headers);
1235
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HTTPHEADER, headers);
1236
1237
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_FILETIME, 1);
1238
1239
    VSICURLMultiPerform(hCurlMultiHandle, hCurlHandle, &m_bInterrupt);
1240
1241
    VSICURLResetHeaderAndWriterFunctions(hCurlHandle);
1242
1243
    curl_slist_free_all(headers);
1244
1245
    oFileProp.eExists = EXIST_UNKNOWN;
1246
1247
    long mtime = 0;
1248
    curl_easy_getinfo(hCurlHandle, CURLINFO_FILETIME, &mtime);
1249
1250
    if (osVerb == "GET")
1251
        NetworkStatisticsLogger::LogGET(sWriteFuncData.nSize);
1252
    else
1253
        NetworkStatisticsLogger::LogHEAD();
1254
1255
    if (STARTS_WITH(osURL.c_str(), "ftp"))
1256
    {
1257
        if (sWriteFuncData.pBuffer != nullptr)
1258
        {
1259
            const char *pszContentLength =
1260
                strstr(const_cast<const char *>(sWriteFuncData.pBuffer),
1261
                       "Content-Length: ");
1262
            if (pszContentLength)
1263
            {
1264
                pszContentLength += strlen("Content-Length: ");
1265
                oFileProp.eExists = EXIST_YES;
1266
                oFileProp.fileSize =
1267
                    CPLScanUIntBig(pszContentLength,
1268
                                   static_cast<int>(strlen(pszContentLength)));
1269
                if (ENABLE_DEBUG)
1270
                    CPLDebug(poFS->GetDebugKey(),
1271
                             "GetFileSize(%s)=" CPL_FRMT_GUIB, osURL.c_str(),
1272
                             oFileProp.fileSize);
1273
            }
1274
        }
1275
    }
1276
1277
    double dfSize = 0;
1278
    long response_code = -1;
1279
    if (oFileProp.eExists != EXIST_YES)
1280
    {
1281
        curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE, &response_code);
1282
1283
        bool bAlreadyLogged = false;
1284
        if (response_code >= 400 && szCurlErrBuf[0] == '\0')
1285
        {
1286
            const bool bLogResponse =
1287
                CPLTestBool(CPLGetConfigOption("CPL_CURL_VERBOSE", "NO"));
1288
            if (bLogResponse && sWriteFuncData.pBuffer)
1289
            {
1290
                const char *pszErrorMsg =
1291
                    static_cast<const char *>(sWriteFuncData.pBuffer);
1292
                bAlreadyLogged = true;
1293
                CPLDebug(
1294
                    poFS->GetDebugKey(),
1295
                    "GetFileSize(%s): response_code=%d, server error msg=%s",
1296
                    osURL.c_str(), static_cast<int>(response_code),
1297
                    pszErrorMsg[0] ? pszErrorMsg : "(no message provided)");
1298
            }
1299
        }
1300
        else if (szCurlErrBuf[0] != '\0')
1301
        {
1302
            bAlreadyLogged = true;
1303
            CPLDebug(poFS->GetDebugKey(),
1304
                     "GetFileSize(%s): response_code=%d, curl error msg=%s",
1305
                     osURL.c_str(), static_cast<int>(response_code),
1306
                     szCurlErrBuf);
1307
        }
1308
1309
        std::string osEffectiveURL;
1310
        {
1311
            char *pszEffectiveURL = nullptr;
1312
            curl_easy_getinfo(hCurlHandle, CURLINFO_EFFECTIVE_URL,
1313
                              &pszEffectiveURL);
1314
            if (pszEffectiveURL)
1315
                osEffectiveURL = pszEffectiveURL;
1316
        }
1317
1318
        if (!osEffectiveURL.empty() &&
1319
            strstr(osEffectiveURL.c_str(), osURL.c_str()) == nullptr)
1320
        {
1321
            // Moved permanently ?
1322
            if (sWriteFuncHeaderData.nFirstHTTPCode == 301 ||
1323
                (m_bUseRedirectURLIfNoQueryStringParams &&
1324
                 osEffectiveURL.find('?') == std::string::npos))
1325
            {
1326
                CPLDebug(poFS->GetDebugKey(),
1327
                         "Using effective URL %s permanently",
1328
                         osEffectiveURL.c_str());
1329
                oFileProp.osRedirectURL = osEffectiveURL;
1330
                poFS->SetCachedFileProp(m_pszURL, oFileProp);
1331
            }
1332
            else
1333
            {
1334
                CPLDebug(poFS->GetDebugKey(),
1335
                         "Using effective URL %s temporarily",
1336
                         osEffectiveURL.c_str());
1337
            }
1338
1339
            // Is this is a redirect to a S3 URL?
1340
            if (VSICurlIsS3LikeSignedURL(osEffectiveURL.c_str()) &&
1341
                !VSICurlIsS3LikeSignedURL(osURL.c_str()))
1342
            {
1343
                // Note that this is a redirect as we won't notice after the
1344
                // retry.
1345
                bS3LikeRedirect = true;
1346
1347
                if (!bRetryWithGet && osVerb == "HEAD" && response_code == 403)
1348
                {
1349
                    CPLDebug(poFS->GetDebugKey(),
1350
                             "Redirected to a AWS S3 signed URL. Retrying "
1351
                             "with GET request instead of HEAD since the URL "
1352
                             "might be valid only for GET");
1353
                    bRetryWithGet = true;
1354
                    osURL = std::move(osEffectiveURL);
1355
                    CPLFree(sWriteFuncData.pBuffer);
1356
                    CPLFree(sWriteFuncHeaderData.pBuffer);
1357
                    curl_easy_cleanup(hCurlHandle);
1358
                    goto retry;
1359
                }
1360
            }
1361
            else if (oFileProp.osRedirectURL.empty() && nTryCount == 1 &&
1362
                     ((response_code >= 300 && response_code < 400) ||
1363
                      (osVerb == "HEAD" && response_code == 403)))
1364
            {
1365
                if (response_code == 403)
1366
                {
1367
                    CPLDebug(
1368
                        poFS->GetDebugKey(),
1369
                        "Retrying redirected URL with GET instead of HEAD");
1370
                    bRetryWithGet = true;
1371
                }
1372
                osURL = std::move(osEffectiveURL);
1373
                CPLFree(sWriteFuncData.pBuffer);
1374
                CPLFree(sWriteFuncHeaderData.pBuffer);
1375
                curl_easy_cleanup(hCurlHandle);
1376
                goto retry;
1377
            }
1378
        }
1379
1380
        if (bS3LikeRedirect && response_code >= 200 && response_code < 300 &&
1381
            sWriteFuncHeaderData.nTimestampDate > 0 &&
1382
            !osEffectiveURL.empty() &&
1383
            CPLTestBool(
1384
                CPLGetConfigOption("CPL_VSIL_CURL_USE_S3_REDIRECT", "TRUE")))
1385
        {
1386
            const GIntBig nExpireTimestamp =
1387
                VSICurlGetExpiresFromS3LikeSignedURL(osEffectiveURL.c_str());
1388
            if (nExpireTimestamp > sWriteFuncHeaderData.nTimestampDate + 10)
1389
            {
1390
                const int nValidity = static_cast<int>(
1391
                    nExpireTimestamp - sWriteFuncHeaderData.nTimestampDate);
1392
                CPLDebug(poFS->GetDebugKey(),
1393
                         "Will use redirect URL for the next %d seconds",
1394
                         nValidity);
1395
                // As our local clock might not be in sync with server clock,
1396
                // figure out the expiration timestamp in local time
1397
                oFileProp.bS3LikeRedirect = true;
1398
                oFileProp.nExpireTimestampLocal = time(nullptr) + nValidity;
1399
                oFileProp.osRedirectURL = osEffectiveURL;
1400
                poFS->SetCachedFileProp(m_pszURL, oFileProp);
1401
            }
1402
        }
1403
1404
        if (response_code < 400)
1405
        {
1406
            curl_off_t nSizeTmp = 0;
1407
            const CURLcode code = curl_easy_getinfo(
1408
                hCurlHandle, CURLINFO_CONTENT_LENGTH_DOWNLOAD_T, &nSizeTmp);
1409
            CPL_IGNORE_RET_VAL(dfSize);
1410
            dfSize = static_cast<double>(nSizeTmp);
1411
            if (code == 0)
1412
            {
1413
                oFileProp.eExists = EXIST_YES;
1414
                if (dfSize < 0)
1415
                {
1416
                    if (osVerb == "HEAD" && !bRetryWithGet &&
1417
                        response_code == 200)
1418
                    {
1419
                        CPLDebug(poFS->GetDebugKey(),
1420
                                 "HEAD did not provide file size. Retrying "
1421
                                 "with GET");
1422
                        bRetryWithGet = true;
1423
                        CPLFree(sWriteFuncData.pBuffer);
1424
                        CPLFree(sWriteFuncHeaderData.pBuffer);
1425
                        curl_easy_cleanup(hCurlHandle);
1426
                        goto retry;
1427
                    }
1428
                    oFileProp.fileSize = 0;
1429
                }
1430
                else
1431
                    oFileProp.fileSize = static_cast<GUIntBig>(dfSize);
1432
            }
1433
        }
1434
1435
        if (sWriteFuncHeaderData.pBuffer != nullptr &&
1436
            (response_code == 200 || response_code == 206))
1437
        {
1438
            {
1439
                char **papszHeaders =
1440
                    CSLTokenizeString2(sWriteFuncHeaderData.pBuffer, "\r\n", 0);
1441
                for (int i = 0; papszHeaders[i]; ++i)
1442
                {
1443
                    char *pszKey = nullptr;
1444
                    const char *pszValue =
1445
                        CPLParseNameValue(papszHeaders[i], &pszKey);
1446
                    if (pszKey && pszValue)
1447
                    {
1448
                        if (bGetHeaders)
1449
                        {
1450
                            m_aosHeaders.SetNameValue(pszKey, pszValue);
1451
                        }
1452
                        if (EQUAL(pszKey, "Cache-Control") &&
1453
                            EQUAL(pszValue, "no-cache") &&
1454
                            CPLTestBool(CPLGetConfigOption(
1455
                                "CPL_VSIL_CURL_HONOR_CACHE_CONTROL", "YES")))
1456
                        {
1457
                            m_bCached = false;
1458
                        }
1459
1460
                        else if (EQUAL(pszKey, "ETag"))
1461
                        {
1462
                            std::string osValue(pszValue);
1463
                            if (osValue.size() >= 2 && osValue.front() == '"' &&
1464
                                osValue.back() == '"')
1465
                                osValue = osValue.substr(1, osValue.size() - 2);
1466
                            oFileProp.ETag = std::move(osValue);
1467
                        }
1468
1469
                        // Azure Data Lake Storage
1470
                        else if (EQUAL(pszKey, "x-ms-resource-type"))
1471
                        {
1472
                            if (EQUAL(pszValue, "file"))
1473
                            {
1474
                                oFileProp.nMode |= S_IFREG;
1475
                            }
1476
                            else if (EQUAL(pszValue, "directory"))
1477
                            {
1478
                                oFileProp.bIsDirectory = true;
1479
                                oFileProp.nMode |= S_IFDIR;
1480
                            }
1481
                        }
1482
                        else if (EQUAL(pszKey, "x-ms-permissions"))
1483
                        {
1484
                            oFileProp.nMode |=
1485
                                VSICurlParseUnixPermissions(pszValue);
1486
                        }
1487
1488
                        // https://overturemapswestus2.blob.core.windows.net/release/2024-11-13.0/theme%3Ddivisions/type%3Ddivision_area
1489
                        // returns a x-ms-meta-hdi_isfolder: true header
1490
                        else if (EQUAL(pszKey, "x-ms-meta-hdi_isfolder") &&
1491
                                 EQUAL(pszValue, "true"))
1492
                        {
1493
                            oFileProp.bIsAzureFolder = true;
1494
                            oFileProp.bIsDirectory = true;
1495
                            oFileProp.nMode |= S_IFDIR;
1496
                        }
1497
                    }
1498
                    CPLFree(pszKey);
1499
                }
1500
                CSLDestroy(papszHeaders);
1501
            }
1502
        }
1503
1504
        if (UseLimitRangeGetInsteadOfHead() && response_code == 206)
1505
        {
1506
            oFileProp.eExists = EXIST_NO;
1507
            oFileProp.fileSize = 0;
1508
            if (sWriteFuncHeaderData.pBuffer != nullptr)
1509
            {
1510
                const char *pszContentRange = strstr(
1511
                    sWriteFuncHeaderData.pBuffer, "Content-Range: bytes ");
1512
                if (pszContentRange == nullptr)
1513
                    pszContentRange = strstr(sWriteFuncHeaderData.pBuffer,
1514
                                             "content-range: bytes ");
1515
                if (pszContentRange)
1516
                    pszContentRange = strchr(pszContentRange, '/');
1517
                if (pszContentRange)
1518
                {
1519
                    oFileProp.eExists = EXIST_YES;
1520
                    oFileProp.fileSize = static_cast<GUIntBig>(
1521
                        CPLAtoGIntBig(pszContentRange + 1));
1522
                }
1523
1524
                // Add first bytes to cache
1525
                if (sWriteFuncData.pBuffer != nullptr)
1526
                {
1527
                    size_t nOffset = 0;
1528
                    while (nOffset < sWriteFuncData.nSize)
1529
                    {
1530
                        const size_t nToCache =
1531
                            std::min<size_t>(sWriteFuncData.nSize - nOffset,
1532
                                             knDOWNLOAD_CHUNK_SIZE);
1533
                        poFS->AddRegion(m_pszURL, nOffset, nToCache,
1534
                                        sWriteFuncData.pBuffer + nOffset);
1535
                        nOffset += nToCache;
1536
                    }
1537
                }
1538
            }
1539
        }
1540
        else if (IsDirectoryFromExists(osVerb.c_str(),
1541
                                       static_cast<int>(response_code)))
1542
        {
1543
            oFileProp.eExists = EXIST_YES;
1544
            oFileProp.fileSize = 0;
1545
            oFileProp.bIsDirectory = true;
1546
        }
1547
        // 405 = Method not allowed
1548
        else if (response_code == 405 && !bRetryWithGet && osVerb == "HEAD")
1549
        {
1550
            CPLDebug(poFS->GetDebugKey(),
1551
                     "HEAD not allowed. Retrying with GET");
1552
            bRetryWithGet = true;
1553
            CPLFree(sWriteFuncData.pBuffer);
1554
            CPLFree(sWriteFuncHeaderData.pBuffer);
1555
            curl_easy_cleanup(hCurlHandle);
1556
            goto retry;
1557
        }
1558
        else if (response_code == 416)
1559
        {
1560
            oFileProp.eExists = EXIST_YES;
1561
            oFileProp.fileSize = 0;
1562
        }
1563
        else if (response_code != 200)
1564
        {
1565
            // Look if we should attempt a retry
1566
            if (oRetryContext.CanRetry(static_cast<int>(response_code),
1567
                                       sWriteFuncHeaderData.pBuffer,
1568
                                       szCurlErrBuf))
1569
            {
1570
                CPLError(CE_Warning, CPLE_AppDefined,
1571
                         "HTTP error code: %d - %s. "
1572
                         "Retrying again in %.1f secs",
1573
                         static_cast<int>(response_code), m_pszURL,
1574
                         oRetryContext.GetCurrentDelay());
1575
                CPLSleep(oRetryContext.GetCurrentDelay());
1576
                CPLFree(sWriteFuncData.pBuffer);
1577
                CPLFree(sWriteFuncHeaderData.pBuffer);
1578
                curl_easy_cleanup(hCurlHandle);
1579
                goto retry;
1580
            }
1581
1582
            if (sWriteFuncData.pBuffer != nullptr)
1583
            {
1584
                if (UseLimitRangeGetInsteadOfHead() &&
1585
                    CanRestartOnError(sWriteFuncData.pBuffer,
1586
                                      sWriteFuncHeaderData.pBuffer, bSetError))
1587
                {
1588
                    oFileProp.bHasComputedFileSize = false;
1589
                    CPLFree(sWriteFuncData.pBuffer);
1590
                    CPLFree(sWriteFuncHeaderData.pBuffer);
1591
                    curl_easy_cleanup(hCurlHandle);
1592
                    return GetFileSizeOrHeaders(bSetError, bGetHeaders);
1593
                }
1594
                else
1595
                {
1596
                    CPL_IGNORE_RET_VAL(CanRestartOnError(
1597
                        sWriteFuncData.pBuffer, sWriteFuncHeaderData.pBuffer,
1598
                        bSetError));
1599
                }
1600
            }
1601
1602
            // If there was no VSI error thrown in the process,
1603
            // fail by reporting the HTTP response code.
1604
            if (bSetError && VSIGetLastErrorNo() == 0)
1605
            {
1606
                if (strlen(szCurlErrBuf) > 0)
1607
                {
1608
                    if (response_code == 0)
1609
                    {
1610
                        VSIError(VSIE_HttpError, "CURL error: %s",
1611
                                 szCurlErrBuf);
1612
                    }
1613
                    else
1614
                    {
1615
                        VSIError(VSIE_HttpError, "HTTP response code: %d - %s",
1616
                                 static_cast<int>(response_code), szCurlErrBuf);
1617
                    }
1618
                }
1619
                else
1620
                {
1621
                    VSIError(VSIE_HttpError, "HTTP response code: %d",
1622
                             static_cast<int>(response_code));
1623
                }
1624
            }
1625
            else
1626
            {
1627
                if (response_code != 400 && response_code != 404)
1628
                {
1629
                    CPLError(CE_Warning, CPLE_AppDefined,
1630
                             "HTTP response code on %s: %d", osURL.c_str(),
1631
                             static_cast<int>(response_code));
1632
                }
1633
                // else a CPLDebug() is emitted below
1634
            }
1635
1636
            oFileProp.eExists = EXIST_NO;
1637
            oFileProp.nHTTPCode = static_cast<int>(response_code);
1638
            oFileProp.fileSize = 0;
1639
        }
1640
        else if (sWriteFuncData.pBuffer != nullptr)
1641
        {
1642
            ProcessGetFileSizeResult(
1643
                reinterpret_cast<const char *>(sWriteFuncData.pBuffer));
1644
        }
1645
1646
        // Try to guess if this is a directory. Generally if this is a
1647
        // directory, curl will retry with an URL with slash added.
1648
        if (!osEffectiveURL.empty() &&
1649
            strncmp(osURL.c_str(), osEffectiveURL.c_str(), osURL.size()) == 0 &&
1650
            osEffectiveURL[osURL.size()] == '/' &&
1651
            oFileProp.eExists != EXIST_NO)
1652
        {
1653
            oFileProp.eExists = EXIST_YES;
1654
            oFileProp.fileSize = 0;
1655
            oFileProp.bIsDirectory = true;
1656
        }
1657
        else if (osURL.back() == '/')
1658
        {
1659
            oFileProp.bIsDirectory = true;
1660
        }
1661
1662
        if (!bAlreadyLogged)
1663
        {
1664
            CPLDebug(poFS->GetDebugKey(),
1665
                     "GetFileSize(%s)=" CPL_FRMT_GUIB "  response_code=%d",
1666
                     osURL.c_str(), oFileProp.fileSize,
1667
                     static_cast<int>(response_code));
1668
        }
1669
    }
1670
1671
    CPLFree(sWriteFuncData.pBuffer);
1672
    CPLFree(sWriteFuncHeaderData.pBuffer);
1673
    curl_easy_cleanup(hCurlHandle);
1674
1675
    oFileProp.bHasComputedFileSize = true;
1676
    if (mtime > 0)
1677
        oFileProp.mTime = mtime;
1678
    // Do not update cached file properties if cURL returned a non-HTTP error
1679
    if (response_code != 0)
1680
        poFS->SetCachedFileProp(m_pszURL, oFileProp);
1681
1682
    return oFileProp.fileSize;
1683
}
1684
1685
/************************************************************************/
1686
/*                               Exists()                               */
1687
/************************************************************************/
1688
1689
bool VSICurlHandle::Exists(bool bSetError)
1690
{
1691
    if (oFileProp.eExists == EXIST_UNKNOWN)
1692
    {
1693
        GetFileSize(bSetError);
1694
    }
1695
    else if (oFileProp.eExists == EXIST_NO)
1696
    {
1697
        // If there was no VSI error thrown in the process,
1698
        // and we know the HTTP error code of the first request where the
1699
        // file could not be retrieved, fail by reporting the HTTP code.
1700
        if (bSetError && VSIGetLastErrorNo() == 0 && oFileProp.nHTTPCode)
1701
        {
1702
            VSIError(VSIE_HttpError, "HTTP response code: %d",
1703
                     oFileProp.nHTTPCode);
1704
        }
1705
    }
1706
1707
    return oFileProp.eExists == EXIST_YES;
1708
}
1709
1710
/************************************************************************/
1711
/*                                Tell()                                */
1712
/************************************************************************/
1713
1714
vsi_l_offset VSICurlHandle::Tell()
1715
{
1716
    return curOffset;
1717
}
1718
1719
/************************************************************************/
1720
/*                       GetRedirectURLIfValid()                        */
1721
/************************************************************************/
1722
1723
std::string
1724
VSICurlHandle::GetRedirectURLIfValid(bool &bHasExpired,
1725
                                     CPLStringList &aosHTTPOptions) const
1726
{
1727
    bHasExpired = false;
1728
    poFS->GetCachedFileProp(m_pszURL, oFileProp);
1729
1730
    std::string osURL(m_pszURL + m_osQueryString);
1731
    if (oFileProp.bS3LikeRedirect)
1732
    {
1733
        if (time(nullptr) + 1 < oFileProp.nExpireTimestampLocal)
1734
        {
1735
            CPLDebug(poFS->GetDebugKey(),
1736
                     "Using redirect URL as it looks to be still valid "
1737
                     "(%d seconds left)",
1738
                     static_cast<int>(oFileProp.nExpireTimestampLocal -
1739
                                      time(nullptr)));
1740
            osURL = oFileProp.osRedirectURL;
1741
        }
1742
        else
1743
        {
1744
            CPLDebug(poFS->GetDebugKey(),
1745
                     "Redirect URL has expired. Using original URL");
1746
            oFileProp.bS3LikeRedirect = false;
1747
            poFS->SetCachedFileProp(m_pszURL, oFileProp);
1748
            bHasExpired = true;
1749
        }
1750
    }
1751
    else if (!oFileProp.osRedirectURL.empty())
1752
    {
1753
        osURL = oFileProp.osRedirectURL;
1754
        bHasExpired = false;
1755
    }
1756
1757
    if (m_pszURL != osURL)
1758
    {
1759
        const char *pszAuthorizationHeaderAllowed = VSIGetPathSpecificOption(
1760
            m_osFilename.c_str(),
1761
            "CPL_VSIL_CURL_AUTHORIZATION_HEADER_ALLOWED_IF_REDIRECT",
1762
            "IF_SAME_HOST");
1763
        if (EQUAL(pszAuthorizationHeaderAllowed, "IF_SAME_HOST"))
1764
        {
1765
            const auto ExtractServer = [](const std::string &s)
1766
            {
1767
                size_t afterHTTPPos = 0;
1768
                if (STARTS_WITH(s.c_str(), "http://"))
1769
                    afterHTTPPos = strlen("http://");
1770
                else if (STARTS_WITH(s.c_str(), "https://"))
1771
                    afterHTTPPos = strlen("https://");
1772
                const auto posSlash = s.find('/', afterHTTPPos);
1773
                if (posSlash != std::string::npos)
1774
                    return s.substr(afterHTTPPos, posSlash - afterHTTPPos);
1775
                else
1776
                    return s.substr(afterHTTPPos);
1777
            };
1778
1779
            if (ExtractServer(osURL) != ExtractServer(m_pszURL))
1780
            {
1781
                aosHTTPOptions.SetNameValue("AUTHORIZATION_HEADER_ALLOWED",
1782
                                            "NO");
1783
            }
1784
        }
1785
        else if (!CPLTestBool(pszAuthorizationHeaderAllowed))
1786
        {
1787
            aosHTTPOptions.SetNameValue("AUTHORIZATION_HEADER_ALLOWED", "NO");
1788
        }
1789
    }
1790
1791
    return osURL;
1792
}
1793
1794
/************************************************************************/
1795
/*                           CurrentDownload                            */
1796
/************************************************************************/
1797
1798
namespace
1799
{
1800
struct CurrentDownload
1801
{
1802
    VSICurlFilesystemHandlerBase *m_poFS = nullptr;
1803
    std::string m_osURL{};
1804
    vsi_l_offset m_nStartOffset = 0;
1805
    int m_nBlocks = 0;
1806
    std::string m_osAlreadyDownloadedData{};
1807
    bool m_bHasAlreadyDownloadedData = false;
1808
1809
    CurrentDownload(VSICurlFilesystemHandlerBase *poFS, const char *pszURL,
1810
                    vsi_l_offset startOffset, int nBlocks)
1811
        : m_poFS(poFS), m_osURL(pszURL), m_nStartOffset(startOffset),
1812
          m_nBlocks(nBlocks)
1813
    {
1814
        auto res = m_poFS->NotifyStartDownloadRegion(m_osURL, m_nStartOffset,
1815
                                                     m_nBlocks);
1816
        m_bHasAlreadyDownloadedData = res.first;
1817
        m_osAlreadyDownloadedData = std::move(res.second);
1818
    }
1819
1820
    bool HasAlreadyDownloadedData() const
1821
    {
1822
        return m_bHasAlreadyDownloadedData;
1823
    }
1824
1825
    const std::string &GetAlreadyDownloadedData() const
1826
    {
1827
        return m_osAlreadyDownloadedData;
1828
    }
1829
1830
    void SetData(const std::string &osData)
1831
    {
1832
        CPLAssert(!m_bHasAlreadyDownloadedData);
1833
        m_bHasAlreadyDownloadedData = true;
1834
        m_poFS->NotifyStopDownloadRegion(m_osURL, m_nStartOffset, m_nBlocks,
1835
                                         osData);
1836
    }
1837
1838
    ~CurrentDownload()
1839
    {
1840
        if (!m_bHasAlreadyDownloadedData)
1841
            m_poFS->NotifyStopDownloadRegion(m_osURL, m_nStartOffset, m_nBlocks,
1842
                                             std::string());
1843
    }
1844
1845
    CurrentDownload(const CurrentDownload &) = delete;
1846
    CurrentDownload &operator=(const CurrentDownload &) = delete;
1847
};
1848
}  // namespace
1849
1850
/************************************************************************/
1851
/*                     NotifyStartDownloadRegion()                      */
1852
/************************************************************************/
1853
1854
/** Indicate intent at downloading a new region.
1855
 *
1856
 * If the region is already in download in another thread, then wait for its
1857
 * completion.
1858
 *
1859
 * Returns:
1860
 * - (false, empty string) if a new download is needed
1861
 * - (true, region_content) if we have been waiting for a download of the same
1862
 *   region to be completed and got its result. Note that region_content will be
1863
 *   empty if the download of that region failed.
1864
 */
1865
std::pair<bool, std::string>
1866
VSICurlFilesystemHandlerBase::NotifyStartDownloadRegion(
1867
    const std::string &osURL, vsi_l_offset startOffset, int nBlocks)
1868
{
1869
    std::string osId(osURL);
1870
    osId += '_';
1871
    osId += std::to_string(startOffset);
1872
    osId += '_';
1873
    osId += std::to_string(nBlocks);
1874
1875
    m_oMutex.lock();
1876
    auto oIter = m_oMapRegionInDownload.find(osId);
1877
    if (oIter != m_oMapRegionInDownload.end())
1878
    {
1879
        auto &region = *(oIter->second);
1880
        std::unique_lock<std::mutex> oRegionLock(region.oMutex);
1881
        m_oMutex.unlock();
1882
        region.nWaiters++;
1883
        while (region.bDownloadInProgress)
1884
        {
1885
            region.oCond.wait(oRegionLock);
1886
        }
1887
        std::string osRet = region.osData;
1888
        region.nWaiters--;
1889
        region.oCond.notify_one();
1890
        return std::pair<bool, std::string>(true, osRet);
1891
    }
1892
    else
1893
    {
1894
        auto poRegionInDownload = std::make_unique<RegionInDownload>();
1895
        poRegionInDownload->bDownloadInProgress = true;
1896
        m_oMapRegionInDownload[osId] = std::move(poRegionInDownload);
1897
        m_oMutex.unlock();
1898
        return std::pair<bool, std::string>(false, std::string());
1899
    }
1900
}
1901
1902
/************************************************************************/
1903
/*                      NotifyStopDownloadRegion()                      */
1904
/************************************************************************/
1905
1906
void VSICurlFilesystemHandlerBase::NotifyStopDownloadRegion(
1907
    const std::string &osURL, vsi_l_offset startOffset, int nBlocks,
1908
    const std::string &osData)
1909
{
1910
    std::string osId(osURL);
1911
    osId += '_';
1912
    osId += std::to_string(startOffset);
1913
    osId += '_';
1914
    osId += std::to_string(nBlocks);
1915
1916
    m_oMutex.lock();
1917
    auto oIter = m_oMapRegionInDownload.find(osId);
1918
    CPLAssert(oIter != m_oMapRegionInDownload.end());
1919
    auto &region = *(oIter->second);
1920
    {
1921
        std::unique_lock<std::mutex> oRegionLock(region.oMutex);
1922
        if (region.nWaiters)
1923
        {
1924
            region.osData = osData;
1925
            region.bDownloadInProgress = false;
1926
            region.oCond.notify_all();
1927
1928
            while (region.nWaiters)
1929
            {
1930
                region.oCond.wait(oRegionLock);
1931
            }
1932
        }
1933
    }
1934
    m_oMapRegionInDownload.erase(oIter);
1935
    m_oMutex.unlock();
1936
}
1937
1938
/************************************************************************/
1939
/*                           DownloadRegion()                           */
1940
/************************************************************************/
1941
1942
std::string VSICurlHandle::DownloadRegion(const vsi_l_offset startOffset,
1943
                                          const int nBlocks)
1944
{
1945
    if (bInterrupted && bStopOnInterruptUntilUninstall)
1946
        return std::string();
1947
1948
    if (oFileProp.eExists == EXIST_NO)
1949
        return std::string();
1950
1951
    // Check if there is not a download of the same region in progress in
1952
    // another thread, and if so wait for it to be completed
1953
    CurrentDownload currentDownload(poFS, m_pszURL, startOffset, nBlocks);
1954
    if (currentDownload.HasAlreadyDownloadedData())
1955
    {
1956
        return currentDownload.GetAlreadyDownloadedData();
1957
    }
1958
1959
begin:
1960
    CURLM *hCurlMultiHandle = poFS->GetCurlMultiHandleFor(m_pszURL);
1961
1962
    UpdateQueryString();
1963
1964
    bool bHasExpired = false;
1965
1966
    CPLStringList aosHTTPOptions(m_aosHTTPOptions);
1967
    std::string osURL(GetRedirectURLIfValid(bHasExpired, aosHTTPOptions));
1968
    bool bUsedRedirect = osURL != m_pszURL;
1969
1970
    WriteFuncStruct sWriteFuncData;
1971
    WriteFuncStruct sWriteFuncHeaderData;
1972
    CPLHTTPRetryContext oRetryContext(m_oRetryParameters);
1973
1974
retry:
1975
    CURL *hCurlHandle = curl_easy_init();
1976
    struct curl_slist *headers =
1977
        VSICurlSetOptions(hCurlHandle, osURL.c_str(), aosHTTPOptions.List());
1978
1979
    if (!AllowAutomaticRedirection())
1980
        unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_FOLLOWLOCATION, 0);
1981
1982
    VSICURLInitWriteFuncStruct(&sWriteFuncData, this, pfnReadCbk,
1983
                               pReadCbkUserData);
1984
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData);
1985
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION,
1986
                               VSICurlHandleWriteFunc);
1987
1988
    VSICURLInitWriteFuncStruct(&sWriteFuncHeaderData, nullptr, nullptr,
1989
                               nullptr);
1990
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA,
1991
                               &sWriteFuncHeaderData);
1992
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION,
1993
                               VSICurlHandleWriteFunc);
1994
    sWriteFuncHeaderData.bIsHTTP = STARTS_WITH(m_pszURL, "http");
1995
    sWriteFuncHeaderData.nStartOffset = startOffset;
1996
    sWriteFuncHeaderData.nEndOffset =
1997
        startOffset +
1998
        static_cast<vsi_l_offset>(nBlocks) * VSICURLGetDownloadChunkSize() - 1;
1999
    // Some servers don't like we try to read after end-of-file (#5786).
2000
    if (oFileProp.bHasComputedFileSize &&
2001
        sWriteFuncHeaderData.nEndOffset >= oFileProp.fileSize)
2002
    {
2003
        sWriteFuncHeaderData.nEndOffset = oFileProp.fileSize - 1;
2004
    }
2005
2006
    char rangeStr[512] = {};
2007
    snprintf(rangeStr, sizeof(rangeStr), CPL_FRMT_GUIB "-" CPL_FRMT_GUIB,
2008
             startOffset, sWriteFuncHeaderData.nEndOffset);
2009
2010
    if (ENABLE_DEBUG)
2011
        CPLDebug(poFS->GetDebugKey(), "Downloading %s (%s)...", rangeStr,
2012
                 osURL.c_str());
2013
2014
    std::string osHeaderRange;  // leave in this scope
2015
    if (sWriteFuncHeaderData.bIsHTTP)
2016
    {
2017
        osHeaderRange = CPLSPrintf("Range: bytes=%s", rangeStr);
2018
        // So it gets included in Azure signature
2019
        headers = curl_slist_append(headers, osHeaderRange.c_str());
2020
        unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, nullptr);
2021
    }
2022
    else
2023
        unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, rangeStr);
2024
2025
    char szCurlErrBuf[CURL_ERROR_SIZE + 1] = {};
2026
    szCurlErrBuf[0] = '\0';
2027
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, szCurlErrBuf);
2028
2029
    headers = GetCurlHeaders("GET", headers);
2030
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HTTPHEADER, headers);
2031
2032
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_FILETIME, 1);
2033
2034
    VSICURLMultiPerform(hCurlMultiHandle, hCurlHandle, &m_bInterrupt);
2035
2036
    VSICURLResetHeaderAndWriterFunctions(hCurlHandle);
2037
2038
    curl_slist_free_all(headers);
2039
2040
    NetworkStatisticsLogger::LogGET(sWriteFuncData.nSize);
2041
2042
    if (sWriteFuncData.bInterrupted || m_bInterrupt)
2043
    {
2044
        bInterrupted = true;
2045
2046
        // Notify that the download of the current region is finished
2047
        currentDownload.SetData(std::string());
2048
2049
        CPLFree(sWriteFuncData.pBuffer);
2050
        CPLFree(sWriteFuncHeaderData.pBuffer);
2051
        curl_easy_cleanup(hCurlHandle);
2052
2053
        return std::string();
2054
    }
2055
2056
    long response_code = 0;
2057
    curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE, &response_code);
2058
2059
    if (ENABLE_DEBUG && szCurlErrBuf[0] != '\0')
2060
    {
2061
        CPLDebug(poFS->GetDebugKey(),
2062
                 "DownloadRegion(%s): response_code=%d, msg=%s", osURL.c_str(),
2063
                 static_cast<int>(response_code), szCurlErrBuf);
2064
    }
2065
2066
    long mtime = 0;
2067
    curl_easy_getinfo(hCurlHandle, CURLINFO_FILETIME, &mtime);
2068
    if (mtime > 0)
2069
    {
2070
        oFileProp.mTime = mtime;
2071
        poFS->SetCachedFileProp(m_pszURL, oFileProp);
2072
    }
2073
2074
    if (ENABLE_DEBUG)
2075
        CPLDebug(poFS->GetDebugKey(), "Got response_code=%ld", response_code);
2076
2077
    if (bUsedRedirect &&
2078
        (response_code == 403 ||
2079
         // Below case is in particular for
2080
         // gdalinfo
2081
         // /vsicurl/https://lpdaac.earthdata.nasa.gov/lp-prod-protected/HLSS30.015/HLS.S30.T10TEK.2020273T190109.v1.5.B8A.tif
2082
         // --config GDAL_DISABLE_READDIR_ON_OPEN EMPTY_DIR --config
2083
         // GDAL_HTTP_COOKIEFILE /tmp/cookie.txt --config GDAL_HTTP_COOKIEJAR
2084
         // /tmp/cookie.txt We got the redirect URL from a HEAD request, but it
2085
         // is not valid for a GET. So retry with GET on original URL to get a
2086
         // redirect URL valid for it.
2087
         (response_code == 400 &&
2088
          osURL.find(".cloudfront.net") != std::string::npos)))
2089
    {
2090
        CPLDebug(poFS->GetDebugKey(),
2091
                 "Got an error with redirect URL. Retrying with original one");
2092
        oFileProp.bS3LikeRedirect = false;
2093
        poFS->SetCachedFileProp(m_pszURL, oFileProp);
2094
        bUsedRedirect = false;
2095
        osURL = m_pszURL;
2096
        CPLFree(sWriteFuncData.pBuffer);
2097
        CPLFree(sWriteFuncHeaderData.pBuffer);
2098
        curl_easy_cleanup(hCurlHandle);
2099
        goto retry;
2100
    }
2101
2102
    if (response_code == 401 && oRetryContext.CanRetry())
2103
    {
2104
        CPLDebug(poFS->GetDebugKey(), "Unauthorized, trying to authenticate");
2105
        CPLFree(sWriteFuncData.pBuffer);
2106
        CPLFree(sWriteFuncHeaderData.pBuffer);
2107
        curl_easy_cleanup(hCurlHandle);
2108
        if (Authenticate(m_osFilename.c_str()))
2109
            goto retry;
2110
        return std::string();
2111
    }
2112
2113
    UpdateRedirectInfo(hCurlHandle, sWriteFuncHeaderData);
2114
2115
    if ((response_code != 200 && response_code != 206 && response_code != 225 &&
2116
         response_code != 226 && response_code != 426) ||
2117
        sWriteFuncHeaderData.bError)
2118
    {
2119
        if (sWriteFuncData.pBuffer != nullptr &&
2120
            CanRestartOnError(
2121
                reinterpret_cast<const char *>(sWriteFuncData.pBuffer),
2122
                reinterpret_cast<const char *>(sWriteFuncHeaderData.pBuffer),
2123
                true))
2124
        {
2125
            CPLFree(sWriteFuncData.pBuffer);
2126
            CPLFree(sWriteFuncHeaderData.pBuffer);
2127
            curl_easy_cleanup(hCurlHandle);
2128
            goto begin;
2129
        }
2130
2131
        // Look if we should attempt a retry
2132
        if (oRetryContext.CanRetry(static_cast<int>(response_code),
2133
                                   sWriteFuncHeaderData.pBuffer, szCurlErrBuf))
2134
        {
2135
            CPLError(CE_Warning, CPLE_AppDefined,
2136
                     "HTTP error code: %d - %s. "
2137
                     "Retrying again in %.1f secs",
2138
                     static_cast<int>(response_code), m_pszURL,
2139
                     oRetryContext.GetCurrentDelay());
2140
            CPLSleep(oRetryContext.GetCurrentDelay());
2141
            CPLFree(sWriteFuncData.pBuffer);
2142
            CPLFree(sWriteFuncHeaderData.pBuffer);
2143
            curl_easy_cleanup(hCurlHandle);
2144
            goto retry;
2145
        }
2146
2147
        if (response_code >= 400 && szCurlErrBuf[0] != '\0')
2148
        {
2149
            if (strcmp(szCurlErrBuf, "Couldn't use REST") == 0)
2150
                CPLError(
2151
                    CE_Failure, CPLE_AppDefined,
2152
                    "%d: %s, Range downloading not supported by this server!",
2153
                    static_cast<int>(response_code), szCurlErrBuf);
2154
            else
2155
                CPLError(CE_Failure, CPLE_AppDefined, "%d: %s",
2156
                         static_cast<int>(response_code), szCurlErrBuf);
2157
        }
2158
        else if (response_code == 416) /* Range Not Satisfiable */
2159
        {
2160
            if (sWriteFuncData.pBuffer)
2161
            {
2162
                CPLError(
2163
                    CE_Failure, CPLE_AppDefined,
2164
                    "%d: Range downloading not supported by this server: %s",
2165
                    static_cast<int>(response_code), sWriteFuncData.pBuffer);
2166
            }
2167
            else
2168
            {
2169
                CPLError(CE_Failure, CPLE_AppDefined,
2170
                         "%d: Range downloading not supported by this server",
2171
                         static_cast<int>(response_code));
2172
            }
2173
        }
2174
        if (!oFileProp.bHasComputedFileSize && startOffset == 0)
2175
        {
2176
            oFileProp.bHasComputedFileSize = true;
2177
            oFileProp.fileSize = 0;
2178
            oFileProp.eExists = EXIST_NO;
2179
            poFS->SetCachedFileProp(m_pszURL, oFileProp);
2180
        }
2181
        CPLFree(sWriteFuncData.pBuffer);
2182
        CPLFree(sWriteFuncHeaderData.pBuffer);
2183
        curl_easy_cleanup(hCurlHandle);
2184
        return std::string();
2185
    }
2186
2187
    if (!oFileProp.bHasComputedFileSize && sWriteFuncHeaderData.pBuffer)
2188
    {
2189
        // Try to retrieve the filesize from the HTTP headers
2190
        // if in the form: "Content-Range: bytes x-y/filesize".
2191
        char *pszContentRange =
2192
            strstr(sWriteFuncHeaderData.pBuffer, "Content-Range: bytes ");
2193
        if (pszContentRange == nullptr)
2194
            pszContentRange =
2195
                strstr(sWriteFuncHeaderData.pBuffer, "content-range: bytes ");
2196
        if (pszContentRange)
2197
        {
2198
            char *pszEOL = strchr(pszContentRange, '\n');
2199
            if (pszEOL)
2200
            {
2201
                *pszEOL = 0;
2202
                pszEOL = strchr(pszContentRange, '\r');
2203
                if (pszEOL)
2204
                    *pszEOL = 0;
2205
                char *pszSlash = strchr(pszContentRange, '/');
2206
                if (pszSlash)
2207
                {
2208
                    pszSlash++;
2209
                    oFileProp.fileSize = CPLScanUIntBig(
2210
                        pszSlash, static_cast<int>(strlen(pszSlash)));
2211
                }
2212
            }
2213
        }
2214
        else if (STARTS_WITH(m_pszURL, "ftp"))
2215
        {
2216
            // Parse 213 answer for FTP protocol.
2217
            char *pszSize = strstr(sWriteFuncHeaderData.pBuffer, "213 ");
2218
            if (pszSize)
2219
            {
2220
                pszSize += 4;
2221
                char *pszEOL = strchr(pszSize, '\n');
2222
                if (pszEOL)
2223
                {
2224
                    *pszEOL = 0;
2225
                    pszEOL = strchr(pszSize, '\r');
2226
                    if (pszEOL)
2227
                        *pszEOL = 0;
2228
2229
                    oFileProp.fileSize = CPLScanUIntBig(
2230
                        pszSize, static_cast<int>(strlen(pszSize)));
2231
                }
2232
            }
2233
        }
2234
2235
        if (oFileProp.fileSize != 0)
2236
        {
2237
            oFileProp.eExists = EXIST_YES;
2238
2239
            if (ENABLE_DEBUG)
2240
                CPLDebug(poFS->GetDebugKey(),
2241
                         "GetFileSize(%s)=" CPL_FRMT_GUIB "  response_code=%d",
2242
                         m_pszURL, oFileProp.fileSize,
2243
                         static_cast<int>(response_code));
2244
2245
            oFileProp.bHasComputedFileSize = true;
2246
            poFS->SetCachedFileProp(m_pszURL, oFileProp);
2247
        }
2248
    }
2249
2250
    DownloadRegionPostProcess(startOffset, nBlocks, sWriteFuncData.pBuffer,
2251
                              sWriteFuncData.nSize);
2252
2253
    std::string osRet;
2254
    osRet.assign(sWriteFuncData.pBuffer, sWriteFuncData.nSize);
2255
2256
    // Notify that the download of the current region is finished
2257
    currentDownload.SetData(osRet);
2258
2259
    CPLFree(sWriteFuncData.pBuffer);
2260
    CPLFree(sWriteFuncHeaderData.pBuffer);
2261
    curl_easy_cleanup(hCurlHandle);
2262
2263
    return osRet;
2264
}
2265
2266
/************************************************************************/
2267
/*                         UpdateRedirectInfo()                         */
2268
/************************************************************************/
2269
2270
void VSICurlHandle::UpdateRedirectInfo(
2271
    CURL *hCurlHandle, const WriteFuncStruct &sWriteFuncHeaderData)
2272
{
2273
    std::string osEffectiveURL;
2274
    {
2275
        char *pszEffectiveURL = nullptr;
2276
        curl_easy_getinfo(hCurlHandle, CURLINFO_EFFECTIVE_URL,
2277
                          &pszEffectiveURL);
2278
        if (pszEffectiveURL)
2279
            osEffectiveURL = pszEffectiveURL;
2280
    }
2281
2282
    if (!oFileProp.bS3LikeRedirect && !osEffectiveURL.empty() &&
2283
        strstr(osEffectiveURL.c_str(), m_pszURL) == nullptr)
2284
    {
2285
        CPLDebug(poFS->GetDebugKey(), "Effective URL: %s",
2286
                 osEffectiveURL.c_str());
2287
2288
        long response_code = 0;
2289
        curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE, &response_code);
2290
        if (response_code >= 200 && response_code < 300 &&
2291
            sWriteFuncHeaderData.nTimestampDate > 0 &&
2292
            VSICurlIsS3LikeSignedURL(osEffectiveURL.c_str()) &&
2293
            !VSICurlIsS3LikeSignedURL(m_pszURL) &&
2294
            CPLTestBool(
2295
                CPLGetConfigOption("CPL_VSIL_CURL_USE_S3_REDIRECT", "TRUE")))
2296
        {
2297
            GIntBig nExpireTimestamp =
2298
                VSICurlGetExpiresFromS3LikeSignedURL(osEffectiveURL.c_str());
2299
            if (nExpireTimestamp > sWriteFuncHeaderData.nTimestampDate + 10)
2300
            {
2301
                const int nValidity = static_cast<int>(
2302
                    nExpireTimestamp - sWriteFuncHeaderData.nTimestampDate);
2303
                CPLDebug(poFS->GetDebugKey(),
2304
                         "Will use redirect URL for the next %d seconds",
2305
                         nValidity);
2306
                // As our local clock might not be in sync with server clock,
2307
                // figure out the expiration timestamp in local time.
2308
                oFileProp.bS3LikeRedirect = true;
2309
                oFileProp.nExpireTimestampLocal = time(nullptr) + nValidity;
2310
                oFileProp.osRedirectURL = std::move(osEffectiveURL);
2311
                poFS->SetCachedFileProp(m_pszURL, oFileProp);
2312
            }
2313
        }
2314
    }
2315
}
2316
2317
/************************************************************************/
2318
/*                     DownloadRegionPostProcess()                      */
2319
/************************************************************************/
2320
2321
void VSICurlHandle::DownloadRegionPostProcess(const vsi_l_offset startOffset,
2322
                                              const int nBlocks,
2323
                                              const char *pBuffer, size_t nSize)
2324
{
2325
    const int knDOWNLOAD_CHUNK_SIZE = VSICURLGetDownloadChunkSize();
2326
    lastDownloadedOffset = startOffset + static_cast<vsi_l_offset>(nBlocks) *
2327
                                             knDOWNLOAD_CHUNK_SIZE;
2328
2329
    if (nSize > static_cast<size_t>(nBlocks) * knDOWNLOAD_CHUNK_SIZE)
2330
    {
2331
        if (ENABLE_DEBUG)
2332
            CPLDebug(
2333
                poFS->GetDebugKey(),
2334
                "Got more data than expected : %u instead of %u",
2335
                static_cast<unsigned int>(nSize),
2336
                static_cast<unsigned int>(nBlocks * knDOWNLOAD_CHUNK_SIZE));
2337
    }
2338
2339
    vsi_l_offset l_startOffset = startOffset;
2340
    while (nSize > 0)
2341
    {
2342
#if DEBUG_VERBOSE
2343
        if (ENABLE_DEBUG)
2344
            CPLDebug(poFS->GetDebugKey(), "Add region %u - %u",
2345
                     static_cast<unsigned int>(startOffset),
2346
                     static_cast<unsigned int>(std::min(
2347
                         static_cast<size_t>(knDOWNLOAD_CHUNK_SIZE), nSize)));
2348
#endif
2349
        const size_t nChunkSize =
2350
            std::min(static_cast<size_t>(knDOWNLOAD_CHUNK_SIZE), nSize);
2351
        poFS->AddRegion(m_pszURL, l_startOffset, nChunkSize, pBuffer);
2352
        l_startOffset += nChunkSize;
2353
        pBuffer += nChunkSize;
2354
        nSize -= nChunkSize;
2355
    }
2356
}
2357
2358
/************************************************************************/
2359
/*                                Read()                                */
2360
/************************************************************************/
2361
2362
size_t VSICurlHandle::Read(void *const pBufferIn, size_t const nBytes)
2363
{
2364
    NetworkStatisticsFileSystem oContextFS(poFS->GetFSPrefix().c_str());
2365
    NetworkStatisticsFile oContextFile(m_osFilename.c_str());
2366
    NetworkStatisticsAction oContextAction("Read");
2367
2368
    size_t nBufferRequestSize = nBytes;
2369
    if (nBufferRequestSize == 0)
2370
        return 0;
2371
2372
    void *pBuffer = pBufferIn;
2373
2374
#if DEBUG_VERBOSE
2375
    CPLDebug(poFS->GetDebugKey(), "offset=%d, size=%d",
2376
             static_cast<int>(curOffset), static_cast<int>(nBufferRequestSize));
2377
#endif
2378
2379
    vsi_l_offset iterOffset = curOffset;
2380
    const int knMAX_REGIONS = GetMaxRegions();
2381
    const int knDOWNLOAD_CHUNK_SIZE = VSICURLGetDownloadChunkSize();
2382
    while (nBufferRequestSize)
2383
    {
2384
        // Don't try to read after end of file.
2385
        poFS->GetCachedFileProp(m_pszURL, oFileProp);
2386
        if (oFileProp.bHasComputedFileSize && iterOffset >= oFileProp.fileSize)
2387
        {
2388
            if (iterOffset == curOffset)
2389
            {
2390
                CPLDebug(poFS->GetDebugKey(),
2391
                         "Request at offset " CPL_FRMT_GUIB
2392
                         ", after end of file",
2393
                         iterOffset);
2394
            }
2395
            break;
2396
        }
2397
2398
        const vsi_l_offset nOffsetToDownload =
2399
            (iterOffset / knDOWNLOAD_CHUNK_SIZE) * knDOWNLOAD_CHUNK_SIZE;
2400
        std::string osRegion;
2401
        std::shared_ptr<std::string> psRegion =
2402
            poFS->GetRegion(m_pszURL, nOffsetToDownload);
2403
        if (psRegion != nullptr)
2404
        {
2405
            osRegion = *psRegion;
2406
        }
2407
        else
2408
        {
2409
            if (nOffsetToDownload == lastDownloadedOffset)
2410
            {
2411
                // In case of consecutive reads (of small size), we use a
2412
                // heuristic that we will read the file sequentially, so
2413
                // we double the requested size to decrease the number of
2414
                // client/server roundtrips.
2415
                constexpr int MAX_CHUNK_SIZE_INCREASE_FACTOR = 128;
2416
                if (nBlocksToDownload < MAX_CHUNK_SIZE_INCREASE_FACTOR)
2417
                    nBlocksToDownload *= 2;
2418
            }
2419
            else
2420
            {
2421
                // Random reads. Cancel the above heuristics.
2422
                nBlocksToDownload = 1;
2423
            }
2424
2425
            // Ensure that we will request at least the number of blocks
2426
            // to satisfy the remaining buffer size to read.
2427
            const vsi_l_offset nEndOffsetToDownload =
2428
                ((iterOffset + nBufferRequestSize + knDOWNLOAD_CHUNK_SIZE - 1) /
2429
                 knDOWNLOAD_CHUNK_SIZE) *
2430
                knDOWNLOAD_CHUNK_SIZE;
2431
            const int nMinBlocksToDownload =
2432
                static_cast<int>((nEndOffsetToDownload - nOffsetToDownload) /
2433
                                 knDOWNLOAD_CHUNK_SIZE);
2434
            if (nBlocksToDownload < nMinBlocksToDownload)
2435
                nBlocksToDownload = nMinBlocksToDownload;
2436
2437
            // Avoid reading already cached data.
2438
            // Note: this might get evicted if concurrent reads are done, but
2439
            // this should not cause bugs. Just missed optimization.
2440
            for (int i = 1; i < nBlocksToDownload; i++)
2441
            {
2442
                if (poFS->GetRegion(m_pszURL, nOffsetToDownload +
2443
                                                  static_cast<vsi_l_offset>(i) *
2444
                                                      knDOWNLOAD_CHUNK_SIZE) !=
2445
                    nullptr)
2446
                {
2447
                    nBlocksToDownload = i;
2448
                    break;
2449
                }
2450
            }
2451
2452
            // We can't download more than knMAX_REGIONS chunks at a time,
2453
            // otherwise the cache will not be big enough to store them and
2454
            // copy their content to the target buffer.
2455
            if (nBlocksToDownload > knMAX_REGIONS)
2456
                nBlocksToDownload = knMAX_REGIONS;
2457
2458
            osRegion = DownloadRegion(nOffsetToDownload, nBlocksToDownload);
2459
            if (osRegion.empty())
2460
            {
2461
                if (!bInterrupted)
2462
                    bError = true;
2463
                return 0;
2464
            }
2465
        }
2466
2467
        const vsi_l_offset nRegionOffset = iterOffset - nOffsetToDownload;
2468
        if (osRegion.size() < nRegionOffset)
2469
        {
2470
            if (iterOffset == curOffset)
2471
            {
2472
                CPLDebug(poFS->GetDebugKey(),
2473
                         "Request at offset " CPL_FRMT_GUIB
2474
                         ", after end of file",
2475
                         iterOffset);
2476
            }
2477
            break;
2478
        }
2479
2480
        const int nToCopy = static_cast<int>(
2481
            std::min(static_cast<vsi_l_offset>(nBufferRequestSize),
2482
                     osRegion.size() - nRegionOffset));
2483
        memcpy(pBuffer, osRegion.data() + nRegionOffset, nToCopy);
2484
        pBuffer = static_cast<char *>(pBuffer) + nToCopy;
2485
        iterOffset += nToCopy;
2486
        nBufferRequestSize -= nToCopy;
2487
        if (osRegion.size() < static_cast<size_t>(knDOWNLOAD_CHUNK_SIZE) &&
2488
            nBufferRequestSize != 0)
2489
        {
2490
            break;
2491
        }
2492
    }
2493
2494
    const size_t ret = static_cast<size_t>(iterOffset - curOffset);
2495
    if (ret != nBytes)
2496
        bEOF = true;
2497
2498
    curOffset = iterOffset;
2499
2500
    return ret;
2501
}
2502
2503
/************************************************************************/
2504
/*                           ReadMultiRange()                           */
2505
/************************************************************************/
2506
2507
int VSICurlHandle::ReadMultiRange(int const nRanges, void **const ppData,
2508
                                  const vsi_l_offset *const panOffsets,
2509
                                  const size_t *const panSizes)
2510
{
2511
    if (bInterrupted && bStopOnInterruptUntilUninstall)
2512
        return FALSE;
2513
2514
    poFS->GetCachedFileProp(m_pszURL, oFileProp);
2515
    if (oFileProp.eExists == EXIST_NO)
2516
        return -1;
2517
2518
    NetworkStatisticsFileSystem oContextFS(poFS->GetFSPrefix().c_str());
2519
    NetworkStatisticsFile oContextFile(m_osFilename.c_str());
2520
    NetworkStatisticsAction oContextAction("ReadMultiRange");
2521
2522
    const char *pszMultiRangeStrategy =
2523
        CPLGetConfigOption("GDAL_HTTP_MULTIRANGE", "");
2524
    if (EQUAL(pszMultiRangeStrategy, "SINGLE_GET"))
2525
    {
2526
        // Just in case someone needs it, but the interest of this mode is
2527
        // rather dubious now. We could probably remove it
2528
        return ReadMultiRangeSingleGet(nRanges, ppData, panOffsets, panSizes);
2529
    }
2530
    else if (nRanges == 1 || EQUAL(pszMultiRangeStrategy, "SERIAL"))
2531
    {
2532
        return VSIVirtualHandle::ReadMultiRange(nRanges, ppData, panOffsets,
2533
                                                panSizes);
2534
    }
2535
2536
    UpdateQueryString();
2537
2538
    bool bHasExpired = false;
2539
2540
    CPLStringList aosHTTPOptions(m_aosHTTPOptions);
2541
    std::string osURL(GetRedirectURLIfValid(bHasExpired, aosHTTPOptions));
2542
    if (bHasExpired)
2543
    {
2544
        return VSIVirtualHandle::ReadMultiRange(nRanges, ppData, panOffsets,
2545
                                                panSizes);
2546
    }
2547
2548
    CURLM *hMultiHandle = poFS->GetCurlMultiHandleFor(osURL);
2549
#ifdef CURLPIPE_MULTIPLEX
2550
    // Enable HTTP/2 multiplexing (ignored if an older version of HTTP is
2551
    // used)
2552
    // Not that this does not enable HTTP/1.1 pipeling, which is not
2553
    // recommended for example by Google Cloud Storage.
2554
    // For HTTP/1.1, parallel connections work better since you can get
2555
    // results out of order.
2556
    if (CPLTestBool(CPLGetConfigOption("GDAL_HTTP_MULTIPLEX", "YES")))
2557
    {
2558
        curl_multi_setopt(hMultiHandle, CURLMOPT_PIPELINING,
2559
                          CURLPIPE_MULTIPLEX);
2560
    }
2561
#endif
2562
2563
    struct CurlErrBuffer
2564
    {
2565
        std::array<char, CURL_ERROR_SIZE + 1> szCurlErrBuf;
2566
    };
2567
2568
    const bool bMergeConsecutiveRanges = CPLTestBool(
2569
        CPLGetConfigOption("GDAL_HTTP_MERGE_CONSECUTIVE_RANGES", "TRUE"));
2570
2571
    // Build list of merged requests upfront, each with its own retry context
2572
    struct MergedRequest
2573
    {
2574
        int iFirstRange;
2575
        int iLastRange;
2576
        vsi_l_offset nStartOffset;
2577
        size_t nSize;
2578
        CPLHTTPRetryContext retryContext;
2579
        bool bToRetry = true;  // true initially to trigger first attempt
2580
2581
        MergedRequest(int first, int last, vsi_l_offset start, size_t size,
2582
                      const CPLHTTPRetryParameters &params)
2583
            : iFirstRange(first), iLastRange(last), nStartOffset(start),
2584
              nSize(size), retryContext(params)
2585
        {
2586
        }
2587
    };
2588
2589
    std::vector<MergedRequest> asMergedRequests;
2590
    for (int i = 0; i < nRanges;)
2591
    {
2592
        size_t nSize = 0;
2593
        int iNext = i;
2594
        // Identify consecutive ranges
2595
        while (bMergeConsecutiveRanges && iNext + 1 < nRanges &&
2596
               panOffsets[iNext] + panSizes[iNext] == panOffsets[iNext + 1])
2597
        {
2598
            nSize += panSizes[iNext];
2599
            iNext++;
2600
        }
2601
        nSize += panSizes[iNext];
2602
2603
        if (nSize == 0)
2604
        {
2605
            i = iNext + 1;
2606
            continue;
2607
        }
2608
2609
        asMergedRequests.emplace_back(i, iNext, panOffsets[i], nSize,
2610
                                      m_oRetryParameters);
2611
        i = iNext + 1;
2612
    }
2613
2614
    if (asMergedRequests.empty())
2615
        return 0;
2616
2617
    int nRet = 0;
2618
    size_t nTotalDownloaded = 0;
2619
2620
    // Retry loop: re-issue only failed requests that are retryable
2621
    while (true)
2622
    {
2623
        const size_t nRequests = asMergedRequests.size();
2624
        std::vector<CURL *> aHandles(nRequests, nullptr);
2625
        std::vector<WriteFuncStruct> asWriteFuncData(nRequests);
2626
        std::vector<WriteFuncStruct> asWriteFuncHeaderData(nRequests);
2627
        std::vector<char *> apszRanges(nRequests, nullptr);
2628
        std::vector<struct curl_slist *> aHeaders(nRequests, nullptr);
2629
        std::vector<CurlErrBuffer> asCurlErrors(nRequests);
2630
2631
        bool bAnyHandle = false;
2632
        for (size_t iReq = 0; iReq < nRequests; iReq++)
2633
        {
2634
            if (!asMergedRequests[iReq].bToRetry)
2635
                continue;
2636
            asMergedRequests[iReq].bToRetry = false;
2637
2638
            CURL *hCurlHandle = curl_easy_init();
2639
            aHandles[iReq] = hCurlHandle;
2640
            bAnyHandle = true;
2641
2642
            struct curl_slist *headers = VSICurlSetOptions(
2643
                hCurlHandle, osURL.c_str(), aosHTTPOptions.List());
2644
2645
            VSICURLInitWriteFuncStruct(&asWriteFuncData[iReq], this, pfnReadCbk,
2646
                                       pReadCbkUserData);
2647
            unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA,
2648
                                       &asWriteFuncData[iReq]);
2649
            unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION,
2650
                                       VSICurlHandleWriteFunc);
2651
2652
            VSICURLInitWriteFuncStruct(&asWriteFuncHeaderData[iReq], nullptr,
2653
                                       nullptr, nullptr);
2654
            unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA,
2655
                                       &asWriteFuncHeaderData[iReq]);
2656
            unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION,
2657
                                       VSICurlHandleWriteFunc);
2658
            asWriteFuncHeaderData[iReq].bIsHTTP = STARTS_WITH(m_pszURL, "http");
2659
            asWriteFuncHeaderData[iReq].nStartOffset =
2660
                asMergedRequests[iReq].nStartOffset;
2661
            asWriteFuncHeaderData[iReq].nEndOffset =
2662
                asMergedRequests[iReq].nStartOffset +
2663
                asMergedRequests[iReq].nSize - 1;
2664
2665
            char rangeStr[512] = {};
2666
            snprintf(rangeStr, sizeof(rangeStr),
2667
                     CPL_FRMT_GUIB "-" CPL_FRMT_GUIB,
2668
                     asWriteFuncHeaderData[iReq].nStartOffset,
2669
                     asWriteFuncHeaderData[iReq].nEndOffset);
2670
2671
            if (ENABLE_DEBUG)
2672
                CPLDebug(poFS->GetDebugKey(), "Downloading %s (%s)...",
2673
                         rangeStr, osURL.c_str());
2674
2675
            if (asWriteFuncHeaderData[iReq].bIsHTTP)
2676
            {
2677
                // So it gets included in Azure signature
2678
                char *pszRange =
2679
                    CPLStrdup(CPLSPrintf("Range: bytes=%s", rangeStr));
2680
                apszRanges[iReq] = pszRange;
2681
                headers = curl_slist_append(headers, pszRange);
2682
                unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, nullptr);
2683
            }
2684
            else
2685
            {
2686
                unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_RANGE,
2687
                                           rangeStr);
2688
            }
2689
2690
            asCurlErrors[iReq].szCurlErrBuf[0] = '\0';
2691
            unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER,
2692
                                       &asCurlErrors[iReq].szCurlErrBuf[0]);
2693
2694
            headers = GetCurlHeaders("GET", headers);
2695
            unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HTTPHEADER,
2696
                                       headers);
2697
            aHeaders[iReq] = headers;
2698
            curl_multi_add_handle(hMultiHandle, hCurlHandle);
2699
        }
2700
2701
        if (bAnyHandle)
2702
        {
2703
            VSICURLMultiPerform(hMultiHandle);
2704
        }
2705
2706
        // Process results
2707
        bool bRetry = false;
2708
        double dfMaxDelay = 0.0;
2709
        for (size_t iReq = 0; iReq < nRequests; iReq++)
2710
        {
2711
            if (!aHandles[iReq])
2712
                continue;
2713
2714
            long response_code = 0;
2715
            curl_easy_getinfo(aHandles[iReq], CURLINFO_HTTP_CODE,
2716
                              &response_code);
2717
2718
            if (ENABLE_DEBUG && asCurlErrors[iReq].szCurlErrBuf[0] != '\0')
2719
            {
2720
                char rangeStr[512] = {};
2721
                snprintf(rangeStr, sizeof(rangeStr),
2722
                         CPL_FRMT_GUIB "-" CPL_FRMT_GUIB,
2723
                         asWriteFuncHeaderData[iReq].nStartOffset,
2724
                         asWriteFuncHeaderData[iReq].nEndOffset);
2725
2726
                const char *pszErrorMsg = &asCurlErrors[iReq].szCurlErrBuf[0];
2727
                CPLDebug(poFS->GetDebugKey(),
2728
                         "ReadMultiRange(%s), %s: response_code=%d, msg=%s",
2729
                         osURL.c_str(), rangeStr,
2730
                         static_cast<int>(response_code), pszErrorMsg);
2731
            }
2732
2733
            if ((response_code != 206 && response_code != 225) ||
2734
                asWriteFuncHeaderData[iReq].nEndOffset + 1 !=
2735
                    asWriteFuncHeaderData[iReq].nStartOffset +
2736
                        asWriteFuncData[iReq].nSize)
2737
            {
2738
                char rangeStr[512] = {};
2739
                snprintf(rangeStr, sizeof(rangeStr),
2740
                         CPL_FRMT_GUIB "-" CPL_FRMT_GUIB,
2741
                         asWriteFuncHeaderData[iReq].nStartOffset,
2742
                         asWriteFuncHeaderData[iReq].nEndOffset);
2743
2744
                // Look if we should attempt a retry
2745
                if (asMergedRequests[iReq].retryContext.CanRetry(
2746
                        static_cast<int>(response_code),
2747
                        asWriteFuncData[iReq].pBuffer,
2748
                        &asCurlErrors[iReq].szCurlErrBuf[0]))
2749
                {
2750
                    CPLError(
2751
                        CE_Warning, CPLE_AppDefined,
2752
                        "HTTP error code for %s range %s: %d. "
2753
                        "Retrying again in %.1f secs",
2754
                        osURL.c_str(), rangeStr,
2755
                        static_cast<int>(response_code),
2756
                        asMergedRequests[iReq].retryContext.GetCurrentDelay());
2757
                    dfMaxDelay = std::max(
2758
                        dfMaxDelay,
2759
                        asMergedRequests[iReq].retryContext.GetCurrentDelay());
2760
                    asMergedRequests[iReq].bToRetry = true;
2761
                    bRetry = true;
2762
                }
2763
                else
2764
                {
2765
                    CPLError(CE_Failure, CPLE_AppDefined,
2766
                             "Request for %s failed with response_code=%ld",
2767
                             rangeStr, response_code);
2768
                    nRet = -1;
2769
                }
2770
            }
2771
            else if (nRet == 0)
2772
            {
2773
                size_t nOffset = 0;
2774
                size_t nRemainingSize = asWriteFuncData[iReq].nSize;
2775
                nTotalDownloaded += nRemainingSize;
2776
                for (int iRange = asMergedRequests[iReq].iFirstRange;
2777
                     iRange <= asMergedRequests[iReq].iLastRange; iRange++)
2778
                {
2779
                    if (nRemainingSize < panSizes[iRange])
2780
                    {
2781
                        nRet = -1;
2782
                        break;
2783
                    }
2784
2785
                    if (panSizes[iRange] > 0)
2786
                    {
2787
                        memcpy(ppData[iRange],
2788
                               asWriteFuncData[iReq].pBuffer + nOffset,
2789
                               panSizes[iRange]);
2790
                    }
2791
                    nOffset += panSizes[iRange];
2792
                    nRemainingSize -= panSizes[iRange];
2793
                }
2794
            }
2795
2796
            curl_multi_remove_handle(hMultiHandle, aHandles[iReq]);
2797
            VSICURLResetHeaderAndWriterFunctions(aHandles[iReq]);
2798
            curl_easy_cleanup(aHandles[iReq]);
2799
            CPLFree(apszRanges[iReq]);
2800
            CPLFree(asWriteFuncData[iReq].pBuffer);
2801
            CPLFree(asWriteFuncHeaderData[iReq].pBuffer);
2802
            if (aHeaders[iReq])
2803
                curl_slist_free_all(aHeaders[iReq]);
2804
        }
2805
2806
        if (!bRetry || nRet != 0)
2807
            break;
2808
        CPLSleep(dfMaxDelay);
2809
    }
2810
2811
    NetworkStatisticsLogger::LogGET(nTotalDownloaded);
2812
2813
    if (ENABLE_DEBUG)
2814
        CPLDebug(poFS->GetDebugKey(), "Download completed");
2815
2816
    return nRet;
2817
}
2818
2819
/************************************************************************/
2820
/*                      ReadMultiRangeSingleGet()                       */
2821
/************************************************************************/
2822
2823
// TODO: the interest of this mode is rather dubious now. We could probably
2824
// remove it
2825
int VSICurlHandle::ReadMultiRangeSingleGet(int const nRanges,
2826
                                           void **const ppData,
2827
                                           const vsi_l_offset *const panOffsets,
2828
                                           const size_t *const panSizes)
2829
{
2830
    std::string osRanges;
2831
    std::string osFirstRange;
2832
    std::string osLastRange;
2833
    int nMergedRanges = 0;
2834
    vsi_l_offset nTotalReqSize = 0;
2835
    for (int i = 0; i < nRanges; i++)
2836
    {
2837
        std::string osCurRange;
2838
        if (i != 0)
2839
            osRanges.append(",");
2840
        osCurRange = CPLSPrintf(CPL_FRMT_GUIB "-", panOffsets[i]);
2841
        while (i + 1 < nRanges &&
2842
               panOffsets[i] + panSizes[i] == panOffsets[i + 1])
2843
        {
2844
            nTotalReqSize += panSizes[i];
2845
            i++;
2846
        }
2847
        nTotalReqSize += panSizes[i];
2848
        osCurRange.append(
2849
            CPLSPrintf(CPL_FRMT_GUIB, panOffsets[i] + panSizes[i] - 1));
2850
        nMergedRanges++;
2851
2852
        osRanges += osCurRange;
2853
2854
        if (nMergedRanges == 1)
2855
            osFirstRange = osCurRange;
2856
        osLastRange = std::move(osCurRange);
2857
    }
2858
2859
    const char *pszMaxRanges =
2860
        CPLGetConfigOption("CPL_VSIL_CURL_MAX_RANGES", "250");
2861
    int nMaxRanges = atoi(pszMaxRanges);
2862
    if (nMaxRanges <= 0)
2863
        nMaxRanges = 250;
2864
    if (nMergedRanges > nMaxRanges)
2865
    {
2866
        const int nHalf = nRanges / 2;
2867
        const int nRet = ReadMultiRange(nHalf, ppData, panOffsets, panSizes);
2868
        if (nRet != 0)
2869
            return nRet;
2870
        return ReadMultiRange(nRanges - nHalf, ppData + nHalf,
2871
                              panOffsets + nHalf, panSizes + nHalf);
2872
    }
2873
2874
    CURLM *hCurlMultiHandle = poFS->GetCurlMultiHandleFor(m_pszURL);
2875
    CURL *hCurlHandle = curl_easy_init();
2876
2877
    struct curl_slist *headers =
2878
        VSICurlSetOptions(hCurlHandle, m_pszURL, m_aosHTTPOptions.List());
2879
2880
    WriteFuncStruct sWriteFuncData;
2881
    WriteFuncStruct sWriteFuncHeaderData;
2882
2883
    VSICURLInitWriteFuncStruct(&sWriteFuncData, this, pfnReadCbk,
2884
                               pReadCbkUserData);
2885
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData);
2886
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION,
2887
                               VSICurlHandleWriteFunc);
2888
2889
    VSICURLInitWriteFuncStruct(&sWriteFuncHeaderData, nullptr, nullptr,
2890
                               nullptr);
2891
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA,
2892
                               &sWriteFuncHeaderData);
2893
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION,
2894
                               VSICurlHandleWriteFunc);
2895
    sWriteFuncHeaderData.bIsHTTP = STARTS_WITH(m_pszURL, "http");
2896
    sWriteFuncHeaderData.bMultiRange = nMergedRanges > 1;
2897
    if (nMergedRanges == 1)
2898
    {
2899
        sWriteFuncHeaderData.nStartOffset = panOffsets[0];
2900
        sWriteFuncHeaderData.nEndOffset = panOffsets[0] + nTotalReqSize - 1;
2901
    }
2902
2903
    if (ENABLE_DEBUG)
2904
    {
2905
        if (nMergedRanges == 1)
2906
            CPLDebug(poFS->GetDebugKey(), "Downloading %s (%s)...",
2907
                     osRanges.c_str(), m_pszURL);
2908
        else
2909
            CPLDebug(poFS->GetDebugKey(),
2910
                     "Downloading %s, ..., %s (" CPL_FRMT_GUIB " bytes, %s)...",
2911
                     osFirstRange.c_str(), osLastRange.c_str(),
2912
                     static_cast<GUIntBig>(nTotalReqSize), m_pszURL);
2913
    }
2914
2915
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, osRanges.c_str());
2916
2917
    char szCurlErrBuf[CURL_ERROR_SIZE + 1] = {};
2918
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, szCurlErrBuf);
2919
2920
    headers = GetCurlHeaders("GET", headers);
2921
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HTTPHEADER, headers);
2922
2923
    VSICURLMultiPerform(hCurlMultiHandle, hCurlHandle);
2924
2925
    VSICURLResetHeaderAndWriterFunctions(hCurlHandle);
2926
2927
    curl_slist_free_all(headers);
2928
2929
    NetworkStatisticsLogger::LogGET(sWriteFuncData.nSize);
2930
2931
    if (sWriteFuncData.bInterrupted)
2932
    {
2933
        bInterrupted = true;
2934
2935
        CPLFree(sWriteFuncData.pBuffer);
2936
        CPLFree(sWriteFuncHeaderData.pBuffer);
2937
        curl_easy_cleanup(hCurlHandle);
2938
2939
        return -1;
2940
    }
2941
2942
    long response_code = 0;
2943
    curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE, &response_code);
2944
2945
    if ((response_code != 200 && response_code != 206 && response_code != 225 &&
2946
         response_code != 226 && response_code != 426) ||
2947
        sWriteFuncHeaderData.bError)
2948
    {
2949
        if (response_code >= 400 && szCurlErrBuf[0] != '\0')
2950
        {
2951
            if (strcmp(szCurlErrBuf, "Couldn't use REST") == 0)
2952
                CPLError(
2953
                    CE_Failure, CPLE_AppDefined,
2954
                    "%d: %s, Range downloading not supported by this server!",
2955
                    static_cast<int>(response_code), szCurlErrBuf);
2956
            else
2957
                CPLError(CE_Failure, CPLE_AppDefined, "%d: %s",
2958
                         static_cast<int>(response_code), szCurlErrBuf);
2959
        }
2960
        /*
2961
        if( !bHasComputedFileSize && startOffset == 0 )
2962
        {
2963
            cachedFileProp->bHasComputedFileSize = bHasComputedFileSize = true;
2964
            cachedFileProp->fileSize = fileSize = 0;
2965
            cachedFileProp->eExists = eExists = EXIST_NO;
2966
        }
2967
        */
2968
        CPLFree(sWriteFuncData.pBuffer);
2969
        CPLFree(sWriteFuncHeaderData.pBuffer);
2970
        curl_easy_cleanup(hCurlHandle);
2971
        return -1;
2972
    }
2973
2974
    char *pBuffer = sWriteFuncData.pBuffer;
2975
    size_t nSize = sWriteFuncData.nSize;
2976
2977
    // TODO(schwehr): Localize after removing gotos.
2978
    int nRet = -1;
2979
    char *pszBoundary;
2980
    std::string osBoundary;
2981
    char *pszNext = nullptr;
2982
    int iRange = 0;
2983
    int iPart = 0;
2984
    char *pszEOL = nullptr;
2985
2986
    /* -------------------------------------------------------------------- */
2987
    /*      No multipart if a single range has been requested               */
2988
    /* -------------------------------------------------------------------- */
2989
2990
    if (nMergedRanges == 1)
2991
    {
2992
        size_t nAccSize = 0;
2993
        if (static_cast<vsi_l_offset>(nSize) < nTotalReqSize)
2994
            goto end;
2995
2996
        for (int i = 0; i < nRanges; i++)
2997
        {
2998
            memcpy(ppData[i], pBuffer + nAccSize, panSizes[i]);
2999
            nAccSize += panSizes[i];
3000
        }
3001
3002
        nRet = 0;
3003
        goto end;
3004
    }
3005
3006
    /* -------------------------------------------------------------------- */
3007
    /*      Extract boundary name                                           */
3008
    /* -------------------------------------------------------------------- */
3009
3010
    pszBoundary = strstr(sWriteFuncHeaderData.pBuffer,
3011
                         "Content-Type: multipart/byteranges; boundary=");
3012
    if (pszBoundary == nullptr)
3013
    {
3014
        CPLError(CE_Failure, CPLE_AppDefined, "Could not find '%s'",
3015
                 "Content-Type: multipart/byteranges; boundary=");
3016
        goto end;
3017
    }
3018
3019
    pszBoundary += strlen("Content-Type: multipart/byteranges; boundary=");
3020
3021
    pszEOL = strchr(pszBoundary, '\r');
3022
    if (pszEOL)
3023
        *pszEOL = 0;
3024
    pszEOL = strchr(pszBoundary, '\n');
3025
    if (pszEOL)
3026
        *pszEOL = 0;
3027
3028
    /* Remove optional double-quote character around boundary name */
3029
    if (pszBoundary[0] == '"')
3030
    {
3031
        pszBoundary++;
3032
        char *pszLastDoubleQuote = strrchr(pszBoundary, '"');
3033
        if (pszLastDoubleQuote)
3034
            *pszLastDoubleQuote = 0;
3035
    }
3036
3037
    osBoundary = "--";
3038
    osBoundary += pszBoundary;
3039
3040
    /* -------------------------------------------------------------------- */
3041
    /*      Find the start of the first chunk.                              */
3042
    /* -------------------------------------------------------------------- */
3043
    pszNext = strstr(pBuffer, osBoundary.c_str());
3044
    if (pszNext == nullptr)
3045
    {
3046
        CPLError(CE_Failure, CPLE_AppDefined, "No parts found.");
3047
        goto end;
3048
    }
3049
3050
    pszNext += osBoundary.size();
3051
    while (*pszNext != '\n' && *pszNext != '\r' && *pszNext != '\0')
3052
        pszNext++;
3053
    if (*pszNext == '\r')
3054
        pszNext++;
3055
    if (*pszNext == '\n')
3056
        pszNext++;
3057
3058
    /* -------------------------------------------------------------------- */
3059
    /*      Loop over parts...                                              */
3060
    /* -------------------------------------------------------------------- */
3061
    while (iPart < nRanges)
3062
    {
3063
        /* --------------------------------------------------------------------
3064
         */
3065
        /*      Collect headers. */
3066
        /* --------------------------------------------------------------------
3067
         */
3068
        bool bExpectedRange = false;
3069
3070
        while (*pszNext != '\n' && *pszNext != '\r' && *pszNext != '\0')
3071
        {
3072
            pszEOL = strstr(pszNext, "\n");
3073
3074
            if (pszEOL == nullptr)
3075
            {
3076
                CPLError(CE_Failure, CPLE_AppDefined,
3077
                         "Error while parsing multipart content (at line %d)",
3078
                         __LINE__);
3079
                goto end;
3080
            }
3081
3082
            *pszEOL = '\0';
3083
            bool bRestoreAntislashR = false;
3084
            if (pszEOL - pszNext > 1 && pszEOL[-1] == '\r')
3085
            {
3086
                bRestoreAntislashR = true;
3087
                pszEOL[-1] = '\0';
3088
            }
3089
3090
            if (STARTS_WITH_CI(pszNext, "Content-Range: bytes "))
3091
            {
3092
                bExpectedRange = true; /* FIXME */
3093
            }
3094
3095
            if (bRestoreAntislashR)
3096
                pszEOL[-1] = '\r';
3097
            *pszEOL = '\n';
3098
3099
            pszNext = pszEOL + 1;
3100
        }
3101
3102
        if (!bExpectedRange)
3103
        {
3104
            CPLError(CE_Failure, CPLE_AppDefined,
3105
                     "Error while parsing multipart content (at line %d)",
3106
                     __LINE__);
3107
            goto end;
3108
        }
3109
3110
        if (*pszNext == '\r')
3111
            pszNext++;
3112
        if (*pszNext == '\n')
3113
            pszNext++;
3114
3115
        /* --------------------------------------------------------------------
3116
         */
3117
        /*      Work out the data block size. */
3118
        /* --------------------------------------------------------------------
3119
         */
3120
        size_t nBytesAvail = nSize - (pszNext - pBuffer);
3121
3122
        while (true)
3123
        {
3124
            if (nBytesAvail < panSizes[iRange])
3125
            {
3126
                CPLError(CE_Failure, CPLE_AppDefined,
3127
                         "Error while parsing multipart content (at line %d)",
3128
                         __LINE__);
3129
                goto end;
3130
            }
3131
3132
            memcpy(ppData[iRange], pszNext, panSizes[iRange]);
3133
            pszNext += panSizes[iRange];
3134
            nBytesAvail -= panSizes[iRange];
3135
            if (iRange + 1 < nRanges &&
3136
                panOffsets[iRange] + panSizes[iRange] == panOffsets[iRange + 1])
3137
            {
3138
                iRange++;
3139
            }
3140
            else
3141
            {
3142
                break;
3143
            }
3144
        }
3145
3146
        iPart++;
3147
        iRange++;
3148
3149
        while (nBytesAvail > 0 &&
3150
               (*pszNext != '-' ||
3151
                strncmp(pszNext, osBoundary.c_str(), osBoundary.size()) != 0))
3152
        {
3153
            pszNext++;
3154
            nBytesAvail--;
3155
        }
3156
3157
        if (nBytesAvail == 0)
3158
        {
3159
            CPLError(CE_Failure, CPLE_AppDefined,
3160
                     "Error while parsing multipart content (at line %d)",
3161
                     __LINE__);
3162
            goto end;
3163
        }
3164
3165
        pszNext += osBoundary.size();
3166
        if (STARTS_WITH(pszNext, "--"))
3167
        {
3168
            // End of multipart.
3169
            break;
3170
        }
3171
3172
        if (*pszNext == '\r')
3173
            pszNext++;
3174
        if (*pszNext == '\n')
3175
            pszNext++;
3176
        else
3177
        {
3178
            CPLError(CE_Failure, CPLE_AppDefined,
3179
                     "Error while parsing multipart content (at line %d)",
3180
                     __LINE__);
3181
            goto end;
3182
        }
3183
    }
3184
3185
    if (iPart == nMergedRanges)
3186
        nRet = 0;
3187
    else
3188
        CPLError(CE_Failure, CPLE_AppDefined,
3189
                 "Got only %d parts, where %d were expected", iPart,
3190
                 nMergedRanges);
3191
3192
end:
3193
    CPLFree(sWriteFuncData.pBuffer);
3194
    CPLFree(sWriteFuncHeaderData.pBuffer);
3195
    curl_easy_cleanup(hCurlHandle);
3196
3197
    return nRet;
3198
}
3199
3200
/************************************************************************/
3201
/*                               PRead()                                */
3202
/************************************************************************/
3203
3204
size_t VSICurlHandle::PRead(void *pBuffer, size_t nSize,
3205
                            vsi_l_offset nOffset) const
3206
{
3207
    // Try to use AdviseRead ranges fetched asynchronously
3208
    if (!m_aoAdviseReadRanges.empty())
3209
    {
3210
        for (auto &poRange : m_aoAdviseReadRanges)
3211
        {
3212
            if (nOffset >= poRange->nStartOffset &&
3213
                nOffset + nSize <= poRange->nStartOffset + poRange->nSize)
3214
            {
3215
                {
3216
                    std::unique_lock<std::mutex> oLock(poRange->oMutex);
3217
                    // coverity[missing_lock:FALSE]
3218
                    while (!poRange->bDone)
3219
                    {
3220
                        poRange->oCV.wait(oLock);
3221
                    }
3222
                }
3223
                if (poRange->abyData.empty())
3224
                    return 0;
3225
3226
                auto nEndOffset =
3227
                    poRange->nStartOffset + poRange->abyData.size();
3228
                if (nOffset >= nEndOffset)
3229
                    return 0;
3230
                const size_t nToCopy = static_cast<size_t>(
3231
                    std::min<vsi_l_offset>(nSize, nEndOffset - nOffset));
3232
                memcpy(pBuffer,
3233
                       poRange->abyData.data() +
3234
                           static_cast<size_t>(nOffset - poRange->nStartOffset),
3235
                       nToCopy);
3236
                return nToCopy;
3237
            }
3238
        }
3239
    }
3240
3241
    // poFS has a global mutex
3242
    poFS->GetCachedFileProp(m_pszURL, oFileProp);
3243
    if (oFileProp.eExists == EXIST_NO)
3244
        return static_cast<size_t>(-1);
3245
3246
    NetworkStatisticsFileSystem oContextFS(poFS->GetFSPrefix().c_str());
3247
    NetworkStatisticsFile oContextFile(m_osFilename.c_str());
3248
    NetworkStatisticsAction oContextAction("PRead");
3249
3250
    CPLStringList aosHTTPOptions(m_aosHTTPOptions);
3251
    std::string osURL;
3252
    {
3253
        std::lock_guard<std::mutex> oLock(m_oMutex);
3254
        UpdateQueryString();
3255
        bool bHasExpired;
3256
        osURL = GetRedirectURLIfValid(bHasExpired, aosHTTPOptions);
3257
    }
3258
3259
    CURL *hCurlHandle = curl_easy_init();
3260
3261
    struct curl_slist *headers =
3262
        VSICurlSetOptions(hCurlHandle, osURL.c_str(), aosHTTPOptions.List());
3263
3264
    WriteFuncStruct sWriteFuncData;
3265
    VSICURLInitWriteFuncStruct(&sWriteFuncData, nullptr, nullptr, nullptr);
3266
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData);
3267
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION,
3268
                               VSICurlHandleWriteFunc);
3269
3270
    WriteFuncStruct sWriteFuncHeaderData;
3271
    VSICURLInitWriteFuncStruct(&sWriteFuncHeaderData, nullptr, nullptr,
3272
                               nullptr);
3273
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA,
3274
                               &sWriteFuncHeaderData);
3275
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION,
3276
                               VSICurlHandleWriteFunc);
3277
    sWriteFuncHeaderData.bIsHTTP = STARTS_WITH(m_pszURL, "http");
3278
    sWriteFuncHeaderData.nStartOffset = nOffset;
3279
3280
    sWriteFuncHeaderData.nEndOffset = nOffset + nSize - 1;
3281
3282
    char rangeStr[512] = {};
3283
    snprintf(rangeStr, sizeof(rangeStr), CPL_FRMT_GUIB "-" CPL_FRMT_GUIB,
3284
             sWriteFuncHeaderData.nStartOffset,
3285
             sWriteFuncHeaderData.nEndOffset);
3286
3287
#if 0
3288
    if( ENABLE_DEBUG )
3289
        CPLDebug(poFS->GetDebugKey(),
3290
                 "Downloading %s (%s)...", rangeStr, osURL.c_str());
3291
#endif
3292
3293
    std::string osHeaderRange;
3294
    if (sWriteFuncHeaderData.bIsHTTP)
3295
    {
3296
        osHeaderRange = CPLSPrintf("Range: bytes=%s", rangeStr);
3297
        // So it gets included in Azure signature
3298
        headers = curl_slist_append(headers, osHeaderRange.data());
3299
        unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, nullptr);
3300
    }
3301
    else
3302
    {
3303
        unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, rangeStr);
3304
    }
3305
3306
    std::array<char, CURL_ERROR_SIZE + 1> szCurlErrBuf;
3307
    szCurlErrBuf[0] = '\0';
3308
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER,
3309
                               &szCurlErrBuf[0]);
3310
3311
    {
3312
        std::lock_guard<std::mutex> oLock(m_oMutex);
3313
        headers =
3314
            const_cast<VSICurlHandle *>(this)->GetCurlHeaders("GET", headers);
3315
    }
3316
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HTTPHEADER, headers);
3317
3318
    CURLM *hMultiHandle = poFS->GetCurlMultiHandleFor(osURL);
3319
    VSICURLMultiPerform(hMultiHandle, hCurlHandle, &m_bInterrupt);
3320
3321
    {
3322
        std::lock_guard<std::mutex> oLock(m_oMutex);
3323
        const_cast<VSICurlHandle *>(this)->UpdateRedirectInfo(
3324
            hCurlHandle, sWriteFuncHeaderData);
3325
    }
3326
3327
    long response_code = 0;
3328
    curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE, &response_code);
3329
3330
    if (ENABLE_DEBUG && szCurlErrBuf[0] != '\0')
3331
    {
3332
        const char *pszErrorMsg = &szCurlErrBuf[0];
3333
        CPLDebug(poFS->GetDebugKey(), "PRead(%s), %s: response_code=%d, msg=%s",
3334
                 osURL.c_str(), rangeStr, static_cast<int>(response_code),
3335
                 pszErrorMsg);
3336
    }
3337
3338
    size_t nRet;
3339
    if ((response_code != 206 && response_code != 225) ||
3340
        sWriteFuncData.nSize == 0)
3341
    {
3342
        if (!m_bInterrupt)
3343
        {
3344
            CPLDebug(poFS->GetDebugKey(),
3345
                     "Request for %s failed with response_code=%ld", rangeStr,
3346
                     response_code);
3347
        }
3348
        nRet = static_cast<size_t>(-1);
3349
    }
3350
    else
3351
    {
3352
        nRet = std::min(sWriteFuncData.nSize, nSize);
3353
        if (nRet > 0)
3354
            memcpy(pBuffer, sWriteFuncData.pBuffer, nRet);
3355
    }
3356
3357
    VSICURLResetHeaderAndWriterFunctions(hCurlHandle);
3358
    curl_easy_cleanup(hCurlHandle);
3359
    CPLFree(sWriteFuncData.pBuffer);
3360
    CPLFree(sWriteFuncHeaderData.pBuffer);
3361
    curl_slist_free_all(headers);
3362
3363
    NetworkStatisticsLogger::LogGET(sWriteFuncData.nSize);
3364
3365
#if 0
3366
    if( ENABLE_DEBUG )
3367
        CPLDebug(poFS->GetDebugKey(), "Download completed");
3368
#endif
3369
3370
    return nRet;
3371
}
3372
3373
/************************************************************************/
3374
/*                    GetAdviseReadTotalBytesLimit()                    */
3375
/************************************************************************/
3376
3377
size_t VSICurlHandle::GetAdviseReadTotalBytesLimit() const
3378
{
3379
    return static_cast<size_t>(std::min<unsigned long long>(
3380
        std::numeric_limits<size_t>::max(),
3381
        // 100 MB
3382
        std::strtoull(
3383
            CPLGetConfigOption("CPL_VSIL_CURL_ADVISE_READ_TOTAL_BYTES_LIMIT",
3384
                               "104857600"),
3385
            nullptr, 10)));
3386
}
3387
3388
/************************************************************************/
3389
/*                          VSICURLMultiInit()                          */
3390
/************************************************************************/
3391
3392
static CURLM *VSICURLMultiInit()
3393
{
3394
    CURLM *hCurlMultiHandle = curl_multi_init();
3395
3396
    if (const char *pszMAXCONNECTS =
3397
            CPLGetConfigOption("GDAL_HTTP_MAX_CACHED_CONNECTIONS", nullptr))
3398
    {
3399
        curl_multi_setopt(hCurlMultiHandle, CURLMOPT_MAXCONNECTS,
3400
                          atoi(pszMAXCONNECTS));
3401
    }
3402
3403
    if (const char *pszMAX_TOTAL_CONNECTIONS =
3404
            CPLGetConfigOption("GDAL_HTTP_MAX_TOTAL_CONNECTIONS", nullptr))
3405
    {
3406
        curl_multi_setopt(hCurlMultiHandle, CURLMOPT_MAX_TOTAL_CONNECTIONS,
3407
                          atoi(pszMAX_TOTAL_CONNECTIONS));
3408
    }
3409
3410
    return hCurlMultiHandle;
3411
}
3412
3413
/************************************************************************/
3414
/*                             AdviseRead()                             */
3415
/************************************************************************/
3416
3417
void VSICurlHandle::AdviseRead(int nRanges, const vsi_l_offset *panOffsets,
3418
                               const size_t *panSizes)
3419
{
3420
    if (!CPLTestBool(
3421
            CPLGetConfigOption("GDAL_HTTP_ENABLE_ADVISE_READ", "TRUE")))
3422
        return;
3423
3424
    if (m_oThreadAdviseRead.joinable())
3425
    {
3426
        m_oThreadAdviseRead.join();
3427
    }
3428
3429
    // Give up if we need to allocate too much memory
3430
    vsi_l_offset nMaxSize = 0;
3431
    const size_t nLimit = GetAdviseReadTotalBytesLimit();
3432
    for (int i = 0; i < nRanges; ++i)
3433
    {
3434
        if (panSizes[i] > nLimit - nMaxSize)
3435
        {
3436
            CPLDebug(poFS->GetDebugKey(),
3437
                     "Trying to request too many bytes in AdviseRead()");
3438
            return;
3439
        }
3440
        nMaxSize += panSizes[i];
3441
    }
3442
3443
    UpdateQueryString();
3444
3445
    bool bHasExpired = false;
3446
    CPLStringList aosHTTPOptions(m_aosHTTPOptions);
3447
    const std::string l_osURL(
3448
        GetRedirectURLIfValid(bHasExpired, aosHTTPOptions));
3449
    if (bHasExpired)
3450
    {
3451
        return;
3452
    }
3453
3454
    const bool bMergeConsecutiveRanges = CPLTestBool(
3455
        CPLGetConfigOption("GDAL_HTTP_MERGE_CONSECUTIVE_RANGES", "TRUE"));
3456
3457
    try
3458
    {
3459
        m_aoAdviseReadRanges.clear();
3460
        m_aoAdviseReadRanges.reserve(nRanges);
3461
        for (int i = 0; i < nRanges;)
3462
        {
3463
            int iNext = i;
3464
            // Identify consecutive ranges
3465
            constexpr size_t SIZE_COG_MARKERS = 2 * sizeof(uint32_t);
3466
            auto nEndOffset = panOffsets[iNext] + panSizes[iNext];
3467
            while (bMergeConsecutiveRanges && iNext + 1 < nRanges &&
3468
                   panOffsets[iNext + 1] > panOffsets[iNext] &&
3469
                   panOffsets[iNext] + panSizes[iNext] + SIZE_COG_MARKERS >=
3470
                       panOffsets[iNext + 1] &&
3471
                   panOffsets[iNext + 1] + panSizes[iNext + 1] > nEndOffset)
3472
            {
3473
                iNext++;
3474
                nEndOffset = panOffsets[iNext] + panSizes[iNext];
3475
            }
3476
            CPLAssert(panOffsets[i] <= nEndOffset);
3477
            const size_t nSize =
3478
                static_cast<size_t>(nEndOffset - panOffsets[i]);
3479
3480
            if (nSize == 0)
3481
            {
3482
                i = iNext + 1;
3483
                continue;
3484
            }
3485
3486
            auto newAdviseReadRange =
3487
                std::make_unique<AdviseReadRange>(m_oRetryParameters);
3488
            newAdviseReadRange->nStartOffset = panOffsets[i];
3489
            newAdviseReadRange->nSize = nSize;
3490
            newAdviseReadRange->abyData.resize(nSize);
3491
            m_aoAdviseReadRanges.push_back(std::move(newAdviseReadRange));
3492
3493
            i = iNext + 1;
3494
        }
3495
    }
3496
    catch (const std::exception &)
3497
    {
3498
        CPLError(CE_Failure, CPLE_OutOfMemory,
3499
                 "Out of memory in VSICurlHandle::AdviseRead()");
3500
        m_aoAdviseReadRanges.clear();
3501
    }
3502
3503
    if (m_aoAdviseReadRanges.empty())
3504
        return;
3505
3506
#ifdef DEBUG
3507
    CPLDebug(poFS->GetDebugKey(), "AdviseRead(): fetching %u ranges",
3508
             static_cast<unsigned>(m_aoAdviseReadRanges.size()));
3509
#endif
3510
3511
    const auto task = [this, aosHTTPOptions = std::move(aosHTTPOptions)](
3512
                          const std::string &osURL)
3513
    {
3514
        if (!m_hCurlMultiHandleForAdviseRead)
3515
            m_hCurlMultiHandleForAdviseRead = VSICURLMultiInit();
3516
3517
        NetworkStatisticsFileSystem oContextFS(poFS->GetFSPrefix().c_str());
3518
        NetworkStatisticsFile oContextFile(m_osFilename.c_str());
3519
        NetworkStatisticsAction oContextAction("AdviseRead");
3520
3521
#ifdef CURLPIPE_MULTIPLEX
3522
        // Enable HTTP/2 multiplexing (ignored if an older version of HTTP is
3523
        // used)
3524
        // Not that this does not enable HTTP/1.1 pipeling, which is not
3525
        // recommended for example by Google Cloud Storage.
3526
        // For HTTP/1.1, parallel connections work better since you can get
3527
        // results out of order.
3528
        if (CPLTestBool(CPLGetConfigOption("GDAL_HTTP_MULTIPLEX", "YES")))
3529
        {
3530
            curl_multi_setopt(m_hCurlMultiHandleForAdviseRead,
3531
                              CURLMOPT_PIPELINING, CURLPIPE_MULTIPLEX);
3532
        }
3533
#endif
3534
3535
        size_t nTotalDownloaded = 0;
3536
3537
        while (true)
3538
        {
3539
3540
            std::vector<CURL *> aHandles;
3541
            std::vector<WriteFuncStruct> asWriteFuncData(
3542
                m_aoAdviseReadRanges.size());
3543
            std::vector<WriteFuncStruct> asWriteFuncHeaderData(
3544
                m_aoAdviseReadRanges.size());
3545
            std::vector<char *> apszRanges;
3546
            std::vector<struct curl_slist *> aHeaders;
3547
3548
            struct CurlErrBuffer
3549
            {
3550
                std::array<char, CURL_ERROR_SIZE + 1> szCurlErrBuf;
3551
            };
3552
            std::vector<CurlErrBuffer> asCurlErrors(
3553
                m_aoAdviseReadRanges.size());
3554
3555
            std::map<CURL *, size_t> oMapHandleToIdx;
3556
            for (size_t i = 0; i < m_aoAdviseReadRanges.size(); ++i)
3557
            {
3558
                if (!m_aoAdviseReadRanges[i]->bToRetry)
3559
                {
3560
                    aHandles.push_back(nullptr);
3561
                    apszRanges.push_back(nullptr);
3562
                    aHeaders.push_back(nullptr);
3563
                    continue;
3564
                }
3565
                m_aoAdviseReadRanges[i]->bToRetry = false;
3566
3567
                CURL *hCurlHandle = curl_easy_init();
3568
                oMapHandleToIdx[hCurlHandle] = i;
3569
                aHandles.push_back(hCurlHandle);
3570
3571
                // As the multi-range request is likely not the first one, we don't
3572
                // need to wait as we already know if pipelining is possible
3573
                // unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_PIPEWAIT, 1);
3574
3575
                struct curl_slist *headers = VSICurlSetOptions(
3576
                    hCurlHandle, osURL.c_str(), aosHTTPOptions.List());
3577
3578
                VSICURLInitWriteFuncStruct(&asWriteFuncData[i], this,
3579
                                           pfnReadCbk, pReadCbkUserData);
3580
                unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA,
3581
                                           &asWriteFuncData[i]);
3582
                unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION,
3583
                                           VSICurlHandleWriteFunc);
3584
3585
                VSICURLInitWriteFuncStruct(&asWriteFuncHeaderData[i], nullptr,
3586
                                           nullptr, nullptr);
3587
                unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA,
3588
                                           &asWriteFuncHeaderData[i]);
3589
                unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION,
3590
                                           VSICurlHandleWriteFunc);
3591
                asWriteFuncHeaderData[i].bIsHTTP =
3592
                    STARTS_WITH(m_pszURL, "http");
3593
                asWriteFuncHeaderData[i].nStartOffset =
3594
                    m_aoAdviseReadRanges[i]->nStartOffset;
3595
3596
                asWriteFuncHeaderData[i].nEndOffset =
3597
                    m_aoAdviseReadRanges[i]->nStartOffset +
3598
                    m_aoAdviseReadRanges[i]->nSize - 1;
3599
3600
                char rangeStr[512] = {};
3601
                snprintf(rangeStr, sizeof(rangeStr),
3602
                         CPL_FRMT_GUIB "-" CPL_FRMT_GUIB,
3603
                         asWriteFuncHeaderData[i].nStartOffset,
3604
                         asWriteFuncHeaderData[i].nEndOffset);
3605
3606
                if (ENABLE_DEBUG)
3607
                    CPLDebug(poFS->GetDebugKey(), "Downloading %s (%s)...",
3608
                             rangeStr, osURL.c_str());
3609
3610
                if (asWriteFuncHeaderData[i].bIsHTTP)
3611
                {
3612
                    std::string osHeaderRange(
3613
                        CPLSPrintf("Range: bytes=%s", rangeStr));
3614
                    // So it gets included in Azure signature
3615
                    char *pszRange = CPLStrdup(osHeaderRange.c_str());
3616
                    apszRanges.push_back(pszRange);
3617
                    headers = curl_slist_append(headers, pszRange);
3618
                    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_RANGE,
3619
                                               nullptr);
3620
                }
3621
                else
3622
                {
3623
                    apszRanges.push_back(nullptr);
3624
                    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_RANGE,
3625
                                               rangeStr);
3626
                }
3627
3628
                asCurlErrors[i].szCurlErrBuf[0] = '\0';
3629
                unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER,
3630
                                           &asCurlErrors[i].szCurlErrBuf[0]);
3631
3632
                headers = GetCurlHeaders("GET", headers);
3633
                unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HTTPHEADER,
3634
                                           headers);
3635
                aHeaders.push_back(headers);
3636
                curl_multi_add_handle(m_hCurlMultiHandleForAdviseRead,
3637
                                      hCurlHandle);
3638
            }
3639
3640
            const auto DealWithRequest = [this, &osURL, &nTotalDownloaded,
3641
                                          &oMapHandleToIdx, &asCurlErrors,
3642
                                          &asWriteFuncHeaderData,
3643
                                          &asWriteFuncData](CURL *hCurlHandle)
3644
            {
3645
                auto oIter = oMapHandleToIdx.find(hCurlHandle);
3646
                CPLAssert(oIter != oMapHandleToIdx.end());
3647
                const auto iReq = oIter->second;
3648
3649
                long response_code = 0;
3650
                curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE,
3651
                                  &response_code);
3652
3653
                if (ENABLE_DEBUG && asCurlErrors[iReq].szCurlErrBuf[0] != '\0')
3654
                {
3655
                    char rangeStr[512] = {};
3656
                    snprintf(rangeStr, sizeof(rangeStr),
3657
                             CPL_FRMT_GUIB "-" CPL_FRMT_GUIB,
3658
                             asWriteFuncHeaderData[iReq].nStartOffset,
3659
                             asWriteFuncHeaderData[iReq].nEndOffset);
3660
3661
                    const char *pszErrorMsg =
3662
                        &asCurlErrors[iReq].szCurlErrBuf[0];
3663
                    CPLDebug(poFS->GetDebugKey(),
3664
                             "ReadMultiRange(%s), %s: response_code=%d, msg=%s",
3665
                             osURL.c_str(), rangeStr,
3666
                             static_cast<int>(response_code), pszErrorMsg);
3667
                }
3668
3669
                bool bToRetry = false;
3670
                if ((response_code != 206 && response_code != 225) ||
3671
                    asWriteFuncHeaderData[iReq].nEndOffset + 1 !=
3672
                        asWriteFuncHeaderData[iReq].nStartOffset +
3673
                            asWriteFuncData[iReq].nSize)
3674
                {
3675
                    char rangeStr[512] = {};
3676
                    snprintf(rangeStr, sizeof(rangeStr),
3677
                             CPL_FRMT_GUIB "-" CPL_FRMT_GUIB,
3678
                             asWriteFuncHeaderData[iReq].nStartOffset,
3679
                             asWriteFuncHeaderData[iReq].nEndOffset);
3680
3681
                    // Look if we should attempt a retry
3682
                    if (m_aoAdviseReadRanges[iReq]->retryContext.CanRetry(
3683
                            static_cast<int>(response_code),
3684
                            asWriteFuncData[iReq].pBuffer,
3685
                            &asCurlErrors[iReq].szCurlErrBuf[0]))
3686
                    {
3687
                        CPLError(CE_Warning, CPLE_AppDefined,
3688
                                 "HTTP error code for %s range %s: %d. "
3689
                                 "Retrying again in %.1f secs",
3690
                                 osURL.c_str(), rangeStr,
3691
                                 static_cast<int>(response_code),
3692
                                 m_aoAdviseReadRanges[iReq]
3693
                                     ->retryContext.GetCurrentDelay());
3694
                        m_aoAdviseReadRanges[iReq]->dfSleepDelay =
3695
                            m_aoAdviseReadRanges[iReq]
3696
                                ->retryContext.GetCurrentDelay();
3697
                        bToRetry = true;
3698
                    }
3699
                    else
3700
                    {
3701
                        CPLError(CE_Failure, CPLE_AppDefined,
3702
                                 "Request for %s range %s failed with "
3703
                                 "response_code=%ld",
3704
                                 osURL.c_str(), rangeStr, response_code);
3705
                    }
3706
                }
3707
                else
3708
                {
3709
                    const size_t nSize = asWriteFuncData[iReq].nSize;
3710
                    memcpy(&m_aoAdviseReadRanges[iReq]->abyData[0],
3711
                           asWriteFuncData[iReq].pBuffer, nSize);
3712
                    m_aoAdviseReadRanges[iReq]->abyData.resize(nSize);
3713
3714
                    nTotalDownloaded += nSize;
3715
                }
3716
3717
                m_aoAdviseReadRanges[iReq]->bToRetry = bToRetry;
3718
3719
                if (!bToRetry)
3720
                {
3721
                    std::lock_guard<std::mutex> oLock(
3722
                        m_aoAdviseReadRanges[iReq]->oMutex);
3723
                    m_aoAdviseReadRanges[iReq]->bDone = true;
3724
                    m_aoAdviseReadRanges[iReq]->oCV.notify_all();
3725
                }
3726
            };
3727
3728
            int repeats = 0;
3729
3730
            void *old_handler = CPLHTTPIgnoreSigPipe();
3731
            while (true)
3732
            {
3733
                int still_running;
3734
                while (curl_multi_perform(m_hCurlMultiHandleForAdviseRead,
3735
                                          &still_running) ==
3736
                       CURLM_CALL_MULTI_PERFORM)
3737
                {
3738
                    // loop
3739
                }
3740
                if (!still_running)
3741
                {
3742
                    break;
3743
                }
3744
3745
                CURLMsg *msg;
3746
                do
3747
                {
3748
                    int msgq = 0;
3749
                    msg = curl_multi_info_read(m_hCurlMultiHandleForAdviseRead,
3750
                                               &msgq);
3751
                    if (msg && (msg->msg == CURLMSG_DONE))
3752
                    {
3753
                        DealWithRequest(msg->easy_handle);
3754
                    }
3755
                } while (msg);
3756
3757
                CPLMultiPerformWait(m_hCurlMultiHandleForAdviseRead, repeats);
3758
            }
3759
            CPLHTTPRestoreSigPipeHandler(old_handler);
3760
3761
            bool bRetry = false;
3762
            double dfDelay = 0.0;
3763
            for (size_t i = 0; i < m_aoAdviseReadRanges.size(); ++i)
3764
            {
3765
                bool bReqDone;
3766
                {
3767
                    // To please Coverity Scan
3768
                    std::lock_guard<std::mutex> oLock(
3769
                        m_aoAdviseReadRanges[i]->oMutex);
3770
                    bReqDone = m_aoAdviseReadRanges[i]->bDone;
3771
                }
3772
                if (!bReqDone && !m_aoAdviseReadRanges[i]->bToRetry)
3773
                {
3774
                    DealWithRequest(aHandles[i]);
3775
                }
3776
                if (m_aoAdviseReadRanges[i]->bToRetry)
3777
                    dfDelay = std::max(dfDelay,
3778
                                       m_aoAdviseReadRanges[i]->dfSleepDelay);
3779
                bRetry = bRetry || m_aoAdviseReadRanges[i]->bToRetry;
3780
                if (aHandles[i])
3781
                {
3782
                    curl_multi_remove_handle(m_hCurlMultiHandleForAdviseRead,
3783
                                             aHandles[i]);
3784
                    VSICURLResetHeaderAndWriterFunctions(aHandles[i]);
3785
                    curl_easy_cleanup(aHandles[i]);
3786
                }
3787
                CPLFree(apszRanges[i]);
3788
                CPLFree(asWriteFuncData[i].pBuffer);
3789
                CPLFree(asWriteFuncHeaderData[i].pBuffer);
3790
                if (aHeaders[i])
3791
                    curl_slist_free_all(aHeaders[i]);
3792
            }
3793
            if (!bRetry)
3794
                break;
3795
            CPLSleep(dfDelay);
3796
        }
3797
3798
        NetworkStatisticsLogger::LogGET(nTotalDownloaded);
3799
    };
3800
3801
    m_oThreadAdviseRead = std::thread(task, l_osURL);
3802
}
3803
3804
/************************************************************************/
3805
/*                               Write()                                */
3806
/************************************************************************/
3807
3808
size_t VSICurlHandle::Write(const void * /* pBuffer */, size_t /* nBytes */)
3809
{
3810
    return 0;
3811
}
3812
3813
/************************************************************************/
3814
/*                              ClearErr()                              */
3815
/************************************************************************/
3816
3817
void VSICurlHandle::ClearErr()
3818
3819
{
3820
    bEOF = false;
3821
    bError = false;
3822
}
3823
3824
/************************************************************************/
3825
/*                               Error()                                */
3826
/************************************************************************/
3827
3828
int VSICurlHandle::Error()
3829
3830
{
3831
    return bError ? TRUE : FALSE;
3832
}
3833
3834
/************************************************************************/
3835
/*                                Eof()                                 */
3836
/************************************************************************/
3837
3838
int VSICurlHandle::Eof()
3839
3840
{
3841
    return bEOF ? TRUE : FALSE;
3842
}
3843
3844
/************************************************************************/
3845
/*                               Flush()                                */
3846
/************************************************************************/
3847
3848
int VSICurlHandle::Flush()
3849
{
3850
    return 0;
3851
}
3852
3853
/************************************************************************/
3854
/*                               Close()                                */
3855
/************************************************************************/
3856
3857
int VSICurlHandle::Close()
3858
{
3859
    return 0;
3860
}
3861
3862
/************************************************************************/
3863
/*                    VSICurlFilesystemHandlerBase()                    */
3864
/************************************************************************/
3865
3866
VSICurlFilesystemHandlerBase::VSICurlFilesystemHandlerBase()
3867
    : oCacheFileProp{100 * 1024}, oCacheDirList{1024, 0}
3868
{
3869
}
3870
3871
/************************************************************************/
3872
/*                           CachedConnection                           */
3873
/************************************************************************/
3874
3875
namespace
3876
{
3877
struct CachedConnection
3878
{
3879
    CURLM *hCurlMultiHandle = nullptr;
3880
    void clear();
3881
3882
    ~CachedConnection()
3883
    {
3884
        clear();
3885
    }
3886
};
3887
}  // namespace
3888
3889
#ifdef _WIN32
3890
// Currently thread_local and C++ objects don't work well with DLL on Windows
3891
static void FreeCachedConnection(void *pData)
3892
{
3893
    delete static_cast<
3894
        std::map<VSICurlFilesystemHandlerBase *, CachedConnection> *>(pData);
3895
}
3896
3897
// Per-thread and per-filesystem Curl connection cache.
3898
static std::map<VSICurlFilesystemHandlerBase *, CachedConnection> &
3899
GetConnectionCache()
3900
{
3901
    static std::map<VSICurlFilesystemHandlerBase *, CachedConnection>
3902
        dummyCache;
3903
    int bMemoryErrorOccurred = false;
3904
    void *pData =
3905
        CPLGetTLSEx(CTLS_VSICURL_CACHEDCONNECTION, &bMemoryErrorOccurred);
3906
    if (bMemoryErrorOccurred)
3907
    {
3908
        return dummyCache;
3909
    }
3910
    if (pData == nullptr)
3911
    {
3912
        auto cachedConnection =
3913
            new std::map<VSICurlFilesystemHandlerBase *, CachedConnection>();
3914
        CPLSetTLSWithFreeFuncEx(CTLS_VSICURL_CACHEDCONNECTION, cachedConnection,
3915
                                FreeCachedConnection, &bMemoryErrorOccurred);
3916
        if (bMemoryErrorOccurred)
3917
        {
3918
            delete cachedConnection;
3919
            return dummyCache;
3920
        }
3921
        return *cachedConnection;
3922
    }
3923
    return *static_cast<
3924
        std::map<VSICurlFilesystemHandlerBase *, CachedConnection> *>(pData);
3925
}
3926
#else
3927
static thread_local std::map<VSICurlFilesystemHandlerBase *, CachedConnection>
3928
    g_tls_connectionCache;
3929
3930
static std::map<VSICurlFilesystemHandlerBase *, CachedConnection> &
3931
GetConnectionCache()
3932
{
3933
    return g_tls_connectionCache;
3934
}
3935
#endif
3936
3937
/************************************************************************/
3938
/*                               clear()                                */
3939
/************************************************************************/
3940
3941
void CachedConnection::clear()
3942
{
3943
    if (hCurlMultiHandle)
3944
    {
3945
        VSICURLMultiCleanup(hCurlMultiHandle);
3946
        hCurlMultiHandle = nullptr;
3947
    }
3948
}
3949
3950
/************************************************************************/
3951
/*                   ~VSICurlFilesystemHandlerBase()                    */
3952
/************************************************************************/
3953
3954
VSICurlFilesystemHandlerBase::~VSICurlFilesystemHandlerBase()
3955
{
3956
    VSICurlFilesystemHandlerBase::ClearCache();
3957
    GetConnectionCache().erase(this);
3958
3959
    if (hMutex != nullptr)
3960
        CPLDestroyMutex(hMutex);
3961
    hMutex = nullptr;
3962
}
3963
3964
/************************************************************************/
3965
/*                         AllowCachedDataFor()                         */
3966
/************************************************************************/
3967
3968
bool VSICurlFilesystemHandlerBase::AllowCachedDataFor(const char *pszFilename)
3969
{
3970
    bool bCachedAllowed = true;
3971
    char **papszTokens = CSLTokenizeString2(
3972
        CPLGetConfigOption("CPL_VSIL_CURL_NON_CACHED", ""), ":", 0);
3973
    for (int i = 0; papszTokens && papszTokens[i]; i++)
3974
    {
3975
        if (STARTS_WITH(pszFilename, papszTokens[i]))
3976
        {
3977
            bCachedAllowed = false;
3978
            break;
3979
        }
3980
    }
3981
    CSLDestroy(papszTokens);
3982
    return bCachedAllowed;
3983
}
3984
3985
/************************************************************************/
3986
/*                       GetCurlMultiHandleFor()                        */
3987
/************************************************************************/
3988
3989
CURLM *VSICurlFilesystemHandlerBase::GetCurlMultiHandleFor(
3990
    const std::string & /*osURL*/)
3991
{
3992
    auto &conn = GetConnectionCache()[this];
3993
    if (conn.hCurlMultiHandle == nullptr)
3994
    {
3995
        conn.hCurlMultiHandle = VSICURLMultiInit();
3996
    }
3997
    return conn.hCurlMultiHandle;
3998
}
3999
4000
/************************************************************************/
4001
/*                           GetRegionCache()                           */
4002
/************************************************************************/
4003
4004
VSICurlFilesystemHandlerBase::RegionCacheType *
4005
VSICurlFilesystemHandlerBase::GetRegionCache()
4006
{
4007
    // should be called under hMutex taken
4008
    if (m_poRegionCacheDoNotUseDirectly == nullptr)
4009
    {
4010
        m_poRegionCacheDoNotUseDirectly.reset(
4011
            new RegionCacheType(static_cast<size_t>(GetMaxRegions())));
4012
    }
4013
    return m_poRegionCacheDoNotUseDirectly.get();
4014
}
4015
4016
/************************************************************************/
4017
/*                             GetRegion()                              */
4018
/************************************************************************/
4019
4020
std::shared_ptr<std::string>
4021
VSICurlFilesystemHandlerBase::GetRegion(const char *pszURL,
4022
                                        vsi_l_offset nFileOffsetStart)
4023
{
4024
    CPLMutexHolder oHolder(&hMutex);
4025
4026
    const int knDOWNLOAD_CHUNK_SIZE = VSICURLGetDownloadChunkSize();
4027
    nFileOffsetStart =
4028
        (nFileOffsetStart / knDOWNLOAD_CHUNK_SIZE) * knDOWNLOAD_CHUNK_SIZE;
4029
4030
    std::shared_ptr<std::string> out;
4031
    if (GetRegionCache()->tryGet(
4032
            FilenameOffsetPair(std::string(pszURL), nFileOffsetStart), out))
4033
    {
4034
        return out;
4035
    }
4036
4037
    return nullptr;
4038
}
4039
4040
/************************************************************************/
4041
/*                             AddRegion()                              */
4042
/************************************************************************/
4043
4044
void VSICurlFilesystemHandlerBase::AddRegion(const char *pszURL,
4045
                                             vsi_l_offset nFileOffsetStart,
4046
                                             size_t nSize, const char *pData)
4047
{
4048
    CPLMutexHolder oHolder(&hMutex);
4049
4050
    auto value = std::make_shared<std::string>();
4051
    value->assign(pData, nSize);
4052
    GetRegionCache()->insert(
4053
        FilenameOffsetPair(std::string(pszURL), nFileOffsetStart),
4054
        std::move(value));
4055
}
4056
4057
/************************************************************************/
4058
/*                         GetCachedFileProp()                          */
4059
/************************************************************************/
4060
4061
bool VSICurlFilesystemHandlerBase::GetCachedFileProp(const char *pszURL,
4062
                                                     FileProp &oFileProp)
4063
{
4064
    CPLMutexHolder oHolder(&hMutex);
4065
    bool inCache;
4066
    if (oCacheFileProp.tryGet(std::string(pszURL), inCache))
4067
    {
4068
        if (VSICURLGetCachedFileProp(pszURL, oFileProp))
4069
        {
4070
            return true;
4071
        }
4072
        oCacheFileProp.remove(std::string(pszURL));
4073
    }
4074
    return false;
4075
}
4076
4077
/************************************************************************/
4078
/*                         SetCachedFileProp()                          */
4079
/************************************************************************/
4080
4081
void VSICurlFilesystemHandlerBase::SetCachedFileProp(const char *pszURL,
4082
                                                     FileProp &oFileProp)
4083
{
4084
    CPLMutexHolder oHolder(&hMutex);
4085
    oCacheFileProp.insert(std::string(pszURL), true);
4086
    VSICURLSetCachedFileProp(pszURL, oFileProp);
4087
}
4088
4089
/************************************************************************/
4090
/*                          GetCachedDirList()                          */
4091
/************************************************************************/
4092
4093
bool VSICurlFilesystemHandlerBase::GetCachedDirList(
4094
    const char *pszURL, CachedDirList &oCachedDirList)
4095
{
4096
    CPLMutexHolder oHolder(&hMutex);
4097
4098
    return oCacheDirList.tryGet(std::string(pszURL), oCachedDirList) &&
4099
           // Let a chance to use new auth parameters
4100
           gnGenerationAuthParameters ==
4101
               oCachedDirList.nGenerationAuthParameters;
4102
}
4103
4104
/************************************************************************/
4105
/*                          SetCachedDirList()                          */
4106
/************************************************************************/
4107
4108
void VSICurlFilesystemHandlerBase::SetCachedDirList(
4109
    const char *pszURL, CachedDirList &oCachedDirList)
4110
{
4111
    CPLMutexHolder oHolder(&hMutex);
4112
4113
    std::string key(pszURL);
4114
    CachedDirList oldValue;
4115
    if (oCacheDirList.tryGet(key, oldValue))
4116
    {
4117
        nCachedFilesInDirList -= oldValue.oFileList.size();
4118
        oCacheDirList.remove(key);
4119
    }
4120
4121
    while ((!oCacheDirList.empty() &&
4122
            nCachedFilesInDirList + oCachedDirList.oFileList.size() >
4123
                1024 * 1024) ||
4124
           oCacheDirList.size() == oCacheDirList.getMaxAllowedSize())
4125
    {
4126
        std::string oldestKey;
4127
        oCacheDirList.getOldestEntry(oldestKey, oldValue);
4128
        nCachedFilesInDirList -= oldValue.oFileList.size();
4129
        oCacheDirList.remove(oldestKey);
4130
    }
4131
    oCachedDirList.nGenerationAuthParameters = gnGenerationAuthParameters;
4132
4133
    nCachedFilesInDirList += oCachedDirList.oFileList.size();
4134
    oCacheDirList.insert(key, oCachedDirList);
4135
}
4136
4137
/************************************************************************/
4138
/*                        ExistsInCacheDirList()                        */
4139
/************************************************************************/
4140
4141
bool VSICurlFilesystemHandlerBase::ExistsInCacheDirList(
4142
    const std::string &osDirname, bool *pbIsDir)
4143
{
4144
    CachedDirList cachedDirList;
4145
    if (GetCachedDirList(osDirname.c_str(), cachedDirList))
4146
    {
4147
        if (pbIsDir)
4148
            *pbIsDir = !cachedDirList.oFileList.empty();
4149
        return false;
4150
    }
4151
    else
4152
    {
4153
        if (pbIsDir)
4154
            *pbIsDir = false;
4155
        return false;
4156
    }
4157
}
4158
4159
/************************************************************************/
4160
/*                        InvalidateCachedData()                        */
4161
/************************************************************************/
4162
4163
void VSICurlFilesystemHandlerBase::InvalidateCachedData(const char *pszURL)
4164
{
4165
    CPLMutexHolder oHolder(&hMutex);
4166
4167
    oCacheFileProp.remove(std::string(pszURL));
4168
4169
    // Invalidate all cached regions for this URL
4170
    std::list<FilenameOffsetPair> keysToRemove;
4171
    std::string osURL(pszURL);
4172
    auto lambda =
4173
        [&keysToRemove,
4174
         &osURL](const lru11::KeyValuePair<FilenameOffsetPair,
4175
                                           std::shared_ptr<std::string>> &kv)
4176
    {
4177
        if (kv.key.filename_ == osURL)
4178
            keysToRemove.push_back(kv.key);
4179
    };
4180
    auto *poRegionCache = GetRegionCache();
4181
    poRegionCache->cwalk(lambda);
4182
    for (const auto &key : keysToRemove)
4183
        poRegionCache->remove(key);
4184
}
4185
4186
/************************************************************************/
4187
/*                             ClearCache()                             */
4188
/************************************************************************/
4189
4190
void VSICurlFilesystemHandlerBase::ClearCache()
4191
{
4192
    CPLMutexHolder oHolder(&hMutex);
4193
4194
    GetRegionCache()->clear();
4195
4196
    {
4197
        const auto lambda = [](const lru11::KeyValuePair<std::string, bool> &kv)
4198
        { VSICURLInvalidateCachedFileProp(kv.key.c_str()); };
4199
        oCacheFileProp.cwalk(lambda);
4200
        oCacheFileProp.clear();
4201
    }
4202
4203
    oCacheDirList.clear();
4204
    nCachedFilesInDirList = 0;
4205
4206
    GetConnectionCache()[this].clear();
4207
}
4208
4209
/************************************************************************/
4210
/*                         PartialClearCache()                          */
4211
/************************************************************************/
4212
4213
void VSICurlFilesystemHandlerBase::PartialClearCache(
4214
    const char *pszFilenamePrefix)
4215
{
4216
    CPLMutexHolder oHolder(&hMutex);
4217
4218
    std::string osURL = GetURLFromFilename(pszFilenamePrefix);
4219
    {
4220
        std::list<FilenameOffsetPair> keysToRemove;
4221
        auto lambda =
4222
            [&keysToRemove, &osURL](
4223
                const lru11::KeyValuePair<FilenameOffsetPair,
4224
                                          std::shared_ptr<std::string>> &kv)
4225
        {
4226
            if (strncmp(kv.key.filename_.c_str(), osURL.c_str(),
4227
                        osURL.size()) == 0)
4228
                keysToRemove.push_back(kv.key);
4229
        };
4230
        auto *poRegionCache = GetRegionCache();
4231
        poRegionCache->cwalk(lambda);
4232
        for (const auto &key : keysToRemove)
4233
            poRegionCache->remove(key);
4234
    }
4235
4236
    {
4237
        std::list<std::string> keysToRemove;
4238
        auto lambda = [&keysToRemove,
4239
                       &osURL](const lru11::KeyValuePair<std::string, bool> &kv)
4240
        {
4241
            if (strncmp(kv.key.c_str(), osURL.c_str(), osURL.size()) == 0)
4242
                keysToRemove.push_back(kv.key);
4243
        };
4244
        oCacheFileProp.cwalk(lambda);
4245
        for (const auto &key : keysToRemove)
4246
            oCacheFileProp.remove(key);
4247
    }
4248
    VSICURLInvalidateCachedFilePropPrefix(osURL.c_str());
4249
4250
    {
4251
        const size_t nLen = strlen(pszFilenamePrefix);
4252
        std::list<std::string> keysToRemove;
4253
        auto lambda =
4254
            [this, &keysToRemove, pszFilenamePrefix,
4255
             nLen](const lru11::KeyValuePair<std::string, CachedDirList> &kv)
4256
        {
4257
            if (strncmp(kv.key.c_str(), pszFilenamePrefix, nLen) == 0)
4258
            {
4259
                keysToRemove.push_back(kv.key);
4260
                nCachedFilesInDirList -= kv.value.oFileList.size();
4261
            }
4262
        };
4263
        oCacheDirList.cwalk(lambda);
4264
        for (const auto &key : keysToRemove)
4265
            oCacheDirList.remove(key);
4266
    }
4267
}
4268
4269
/************************************************************************/
4270
/*                          CreateFileHandle()                          */
4271
/************************************************************************/
4272
4273
VSICurlHandle *
4274
VSICurlFilesystemHandlerBase::CreateFileHandle(const char *pszFilename)
4275
{
4276
    return new VSICurlHandle(this, pszFilename);
4277
}
4278
4279
/************************************************************************/
4280
/*                            GetActualURL()                            */
4281
/************************************************************************/
4282
4283
const char *VSICurlFilesystemHandlerBase::GetActualURL(const char *pszFilename)
4284
{
4285
    VSICurlHandle *poHandle = CreateFileHandle(pszFilename);
4286
    if (poHandle == nullptr)
4287
        return pszFilename;
4288
    std::string osURL(poHandle->GetURL());
4289
    delete poHandle;
4290
    return CPLSPrintf("%s", osURL.c_str());
4291
}
4292
4293
/************************************************************************/
4294
/*                             GetOptions()                             */
4295
/************************************************************************/
4296
4297
#define VSICURL_OPTIONS                                                        \
4298
    "  <Option name='GDAL_HTTP_MAX_RETRY' type='int' "                         \
4299
    "description='Maximum number of retries' default='0'/>"                    \
4300
    "  <Option name='GDAL_HTTP_RETRY_DELAY' type='double' "                    \
4301
    "description='Retry delay in seconds' default='30'/>"                      \
4302
    "  <Option name='GDAL_HTTP_HEADER_FILE' type='string' "                    \
4303
    "description='Filename of a file that contains HTTP headers to "           \
4304
    "forward to the server'/>"                                                 \
4305
    "  <Option name='CPL_VSIL_CURL_USE_HEAD' type='boolean' "                  \
4306
    "description='Whether to use HTTP HEAD verb to retrieve "                  \
4307
    "file information' default='YES'/>"                                        \
4308
    "  <Option name='GDAL_HTTP_MULTIRANGE' type='string-select' "              \
4309
    "description='Strategy to apply to run multi-range requests' "             \
4310
    "default='PARALLEL'>"                                                      \
4311
    "       <Value>PARALLEL</Value>"                                           \
4312
    "       <Value>SERIAL</Value>"                                             \
4313
    "  </Option>"                                                              \
4314
    "  <Option name='GDAL_HTTP_MULTIPLEX' type='boolean' "                     \
4315
    "description='Whether to enable HTTP/2 multiplexing' default='YES'/>"      \
4316
    "  <Option name='GDAL_HTTP_MERGE_CONSECUTIVE_RANGES' type='boolean' "      \
4317
    "description='Whether to merge consecutive ranges in multirange "          \
4318
    "requests' default='YES'/>"                                                \
4319
    "  <Option name='CPL_VSIL_CURL_NON_CACHED' type='string' "                 \
4320
    "description='Colon-separated list of filenames whose content"             \
4321
    "must not be cached across open attempts'/>"                               \
4322
    "  <Option name='CPL_VSIL_CURL_ALLOWED_FILENAME' type='string' "           \
4323
    "description='Single filename that is allowed to be opened'/>"             \
4324
    "  <Option name='CPL_VSIL_CURL_ALLOWED_EXTENSIONS' type='string' "         \
4325
    "description='Comma or space separated list of allowed file "              \
4326
    "extensions'/>"                                                            \
4327
    "  <Option name='GDAL_DISABLE_READDIR_ON_OPEN' type='string-select' "      \
4328
    "description='Whether to disable establishing the list of files in "       \
4329
    "the directory of the current filename' default='NO'>"                     \
4330
    "       <Value>NO</Value>"                                                 \
4331
    "       <Value>YES</Value>"                                                \
4332
    "       <Value>EMPTY_DIR</Value>"                                          \
4333
    "  </Option>"                                                              \
4334
    "  <Option name='VSI_CACHE' type='boolean' "                               \
4335
    "description='Whether to cache in memory the contents of the opened "      \
4336
    "file as soon as they are read' default='NO'/>"                            \
4337
    "  <Option name='CPL_VSIL_CURL_CHUNK_SIZE' type='integer' "                \
4338
    "description='Size in bytes of the minimum amount of data read in a "      \
4339
    "file' default='16384' min='1024' max='10485760'/>"                        \
4340
    "  <Option name='CPL_VSIL_CURL_CACHE_SIZE' type='integer' "                \
4341
    "description='Size in bytes of the global /vsicurl/ cache' "               \
4342
    "default='16384000'/>"                                                     \
4343
    "  <Option name='CPL_VSIL_CURL_IGNORE_GLACIER_STORAGE' type='boolean' "    \
4344
    "description='Whether to skip files with Glacier storage class in "        \
4345
    "directory listing.' default='YES'/>"                                      \
4346
    "  <Option name='CPL_VSIL_CURL_ADVISE_READ_TOTAL_BYTES_LIMIT' "            \
4347
    "type='integer' description='Maximum number of bytes AdviseRead() is "     \
4348
    "allowed to fetch at once' default='104857600'/>"                          \
4349
    "  <Option name='GDAL_HTTP_MAX_CACHED_CONNECTIONS' type='integer' "        \
4350
    "description='Maximum amount of connections that libcurl may keep alive "  \
4351
    "in its connection cache after use'/>"                                     \
4352
    "  <Option name='GDAL_HTTP_MAX_TOTAL_CONNECTIONS' type='integer' "         \
4353
    "description='Maximum number of simultaneously open connections in "       \
4354
    "total'/>"
4355
4356
const char *VSICurlFilesystemHandlerBase::GetOptionsStatic()
4357
{
4358
    return VSICURL_OPTIONS;
4359
}
4360
4361
const char *VSICurlFilesystemHandlerBase::GetOptions()
4362
{
4363
    static std::string osOptions(std::string("<Options>") + GetOptionsStatic() +
4364
                                 "</Options>");
4365
    return osOptions.c_str();
4366
}
4367
4368
/************************************************************************/
4369
/*                         IsAllowedFilename()                          */
4370
/************************************************************************/
4371
4372
bool VSICurlFilesystemHandlerBase::IsAllowedFilename(const char *pszFilename)
4373
{
4374
    const char *pszAllowedFilename =
4375
        CPLGetConfigOption("CPL_VSIL_CURL_ALLOWED_FILENAME", nullptr);
4376
    if (pszAllowedFilename != nullptr)
4377
    {
4378
        return strcmp(pszFilename, pszAllowedFilename) == 0;
4379
    }
4380
4381
    // Consider that only the files whose extension ends up with one that is
4382
    // listed in CPL_VSIL_CURL_ALLOWED_EXTENSIONS exist on the server.  This can
4383
    // speeds up dramatically open experience, in case the server cannot return
4384
    // a file list.  {noext} can be used as a special token to mean file with no
4385
    // extension.
4386
    // For example:
4387
    // gdalinfo --config CPL_VSIL_CURL_ALLOWED_EXTENSIONS ".tif"
4388
    // /vsicurl/http://igskmncngs506.cr.usgs.gov/gmted/Global_tiles_GMTED/075darcsec/bln/W030/30N030W_20101117_gmted_bln075.tif
4389
    const char *pszAllowedExtensions =
4390
        CPLGetConfigOption("CPL_VSIL_CURL_ALLOWED_EXTENSIONS", nullptr);
4391
    if (pszAllowedExtensions)
4392
    {
4393
        char **papszExtensions =
4394
            CSLTokenizeString2(pszAllowedExtensions, ", ", 0);
4395
        const char *queryStart = strchr(pszFilename, '?');
4396
        char *pszFilenameWithoutQuery = nullptr;
4397
        if (queryStart != nullptr)
4398
        {
4399
            pszFilenameWithoutQuery = CPLStrdup(pszFilename);
4400
            pszFilenameWithoutQuery[queryStart - pszFilename] = '\0';
4401
            pszFilename = pszFilenameWithoutQuery;
4402
        }
4403
        const size_t nURLLen = strlen(pszFilename);
4404
        bool bFound = false;
4405
        for (int i = 0; papszExtensions[i] != nullptr; i++)
4406
        {
4407
            const size_t nExtensionLen = strlen(papszExtensions[i]);
4408
            if (EQUAL(papszExtensions[i], "{noext}"))
4409
            {
4410
                const char *pszLastSlash = strrchr(pszFilename, '/');
4411
                if (pszLastSlash != nullptr &&
4412
                    strchr(pszLastSlash, '.') == nullptr)
4413
                {
4414
                    bFound = true;
4415
                    break;
4416
                }
4417
            }
4418
            else if (nURLLen > nExtensionLen &&
4419
                     EQUAL(pszFilename + nURLLen - nExtensionLen,
4420
                           papszExtensions[i]))
4421
            {
4422
                bFound = true;
4423
                break;
4424
            }
4425
        }
4426
4427
        CSLDestroy(papszExtensions);
4428
        if (pszFilenameWithoutQuery)
4429
        {
4430
            CPLFree(pszFilenameWithoutQuery);
4431
        }
4432
4433
        return bFound;
4434
    }
4435
    return TRUE;
4436
}
4437
4438
/************************************************************************/
4439
/*                                Open()                                */
4440
/************************************************************************/
4441
4442
VSIVirtualHandleUniquePtr
4443
VSICurlFilesystemHandlerBase::Open(const char *pszFilename,
4444
                                   const char *pszAccess, bool bSetError,
4445
                                   CSLConstList papszOptions)
4446
{
4447
    if (!STARTS_WITH_CI(pszFilename, GetFSPrefix().c_str()) &&
4448
        !STARTS_WITH_CI(pszFilename, "/vsicurl?"))
4449
        return nullptr;
4450
4451
    if (strchr(pszAccess, 'w') != nullptr || strchr(pszAccess, '+') != nullptr)
4452
    {
4453
        if (bSetError)
4454
        {
4455
            VSIError(VSIE_FileError,
4456
                     "Only read-only mode is supported for /vsicurl");
4457
        }
4458
        return nullptr;
4459
    }
4460
    if (!papszOptions ||
4461
        !CPLTestBool(CSLFetchNameValueDef(
4462
            papszOptions, "IGNORE_FILENAME_RESTRICTIONS", "NO")))
4463
    {
4464
        if (!IsAllowedFilename(pszFilename))
4465
            return nullptr;
4466
    }
4467
4468
    bool bListDir = true;
4469
    bool bEmptyDir = false;
4470
    CPL_IGNORE_RET_VAL(VSICurlGetURLFromFilename(pszFilename, nullptr, nullptr,
4471
                                                 nullptr, &bListDir, &bEmptyDir,
4472
                                                 nullptr, nullptr, nullptr));
4473
4474
    const char *pszOptionVal = CSLFetchNameValueDef(
4475
        papszOptions, "DISABLE_READDIR_ON_OPEN",
4476
        VSIGetPathSpecificOption(pszFilename, "GDAL_DISABLE_READDIR_ON_OPEN",
4477
                                 "NO"));
4478
    const bool bCache = CPLTestBool(CSLFetchNameValueDef(
4479
        papszOptions, "CACHE", AllowCachedDataFor(pszFilename) ? "YES" : "NO"));
4480
    const bool bSkipReadDir = !bListDir || bEmptyDir ||
4481
                              EQUAL(pszOptionVal, "EMPTY_DIR") ||
4482
                              CPLTestBool(pszOptionVal) || !bCache;
4483
4484
    std::string osFilename(pszFilename);
4485
    bool bGotFileList = !bSkipReadDir;
4486
    bool bForceExistsCheck = false;
4487
    FileProp cachedFileProp;
4488
    if (!bSkipReadDir &&
4489
        !(GetCachedFileProp(osFilename.c_str() + strlen(GetFSPrefix().c_str()),
4490
                            cachedFileProp) &&
4491
          cachedFileProp.eExists == EXIST_YES) &&
4492
        strchr(CPLGetFilename(osFilename.c_str()), '.') != nullptr &&
4493
        !STARTS_WITH(CPLGetExtensionSafe(osFilename.c_str()).c_str(), "zip") &&
4494
        // Likely a Kerchunk JSON reference file: no need to list siblings
4495
        !cpl::ends_with(osFilename, ".nc.zarr"))
4496
    {
4497
        // 1000 corresponds to the default page size of S3.
4498
        constexpr int FILE_COUNT_LIMIT = 1000;
4499
        const CPLStringList aosFileList(ReadDirInternal(
4500
            (CPLGetDirnameSafe(osFilename.c_str()) + '/').c_str(),
4501
            FILE_COUNT_LIMIT, &bGotFileList));
4502
        const bool bFound =
4503
            VSICurlIsFileInList(aosFileList.List(),
4504
                                CPLGetFilename(osFilename.c_str())) != -1;
4505
        if (bGotFileList && !bFound && aosFileList.size() < FILE_COUNT_LIMIT)
4506
        {
4507
            // Some file servers are case insensitive, so in case there is a
4508
            // match with case difference, do a full check just in case.
4509
            // e.g.
4510
            // http://pds-geosciences.wustl.edu/mgs/mgs-m-mola-5-megdr-l3-v1/mgsl_300x/meg004/MEGA90N000CB.IMG
4511
            // that is queried by
4512
            // gdalinfo
4513
            // /vsicurl/http://pds-geosciences.wustl.edu/mgs/mgs-m-mola-5-megdr-l3-v1/mgsl_300x/meg004/mega90n000cb.lbl
4514
            if (aosFileList.FindString(CPLGetFilename(osFilename.c_str())) !=
4515
                -1)
4516
            {
4517
                bForceExistsCheck = true;
4518
            }
4519
            else
4520
            {
4521
                return nullptr;
4522
            }
4523
        }
4524
    }
4525
4526
    auto poHandle =
4527
        std::unique_ptr<VSICurlHandle>(CreateFileHandle(osFilename.c_str()));
4528
    if (poHandle == nullptr)
4529
        return nullptr;
4530
    poHandle->SetCache(bCache);
4531
    if (!bGotFileList || bForceExistsCheck)
4532
    {
4533
        // If we didn't get a filelist, check that the file really exists.
4534
        if (!poHandle->Exists(bSetError))
4535
        {
4536
            return nullptr;
4537
        }
4538
    }
4539
4540
    if (CPLTestBool(CPLGetConfigOption("VSI_CACHE", "FALSE")))
4541
        return VSIVirtualHandleUniquePtr(
4542
            VSICreateCachedFile(poHandle.release()));
4543
    else
4544
        return VSIVirtualHandleUniquePtr(poHandle.release());
4545
}
4546
4547
/************************************************************************/
4548
/*                        VSICurlParserFindEOL()                        */
4549
/*                                                                      */
4550
/*      Small helper function for VSICurlPaseHTMLFileList() to find     */
4551
/*      the end of a line in the directory listing.  Either a <br>      */
4552
/*      or newline.                                                     */
4553
/************************************************************************/
4554
4555
static char *VSICurlParserFindEOL(char *pszData)
4556
4557
{
4558
    while (*pszData != '\0' && *pszData != '\n' &&
4559
           !STARTS_WITH_CI(pszData, "<br>"))
4560
        pszData++;
4561
4562
    if (*pszData == '\0')
4563
        return nullptr;
4564
4565
    return pszData;
4566
}
4567
4568
/************************************************************************/
4569
/*                  VSICurlParseHTMLDateTimeFileSize()                  */
4570
/************************************************************************/
4571
4572
static const char *const apszMonths[] = {
4573
    "January", "February", "March",     "April",   "May",      "June",
4574
    "July",    "August",   "September", "October", "November", "December"};
4575
4576
static bool VSICurlParseHTMLDateTimeFileSize(const char *pszStr,
4577
                                             struct tm &brokendowntime,
4578
                                             GUIntBig &nFileSize,
4579
                                             GIntBig &mTime)
4580
{
4581
    for (int iMonth = 0; iMonth < 12; iMonth++)
4582
    {
4583
        char szMonth[32] = {};
4584
        szMonth[0] = '-';
4585
        memcpy(szMonth + 1, apszMonths[iMonth], 3);
4586
        szMonth[4] = '-';
4587
        szMonth[5] = '\0';
4588
        const char *pszMonthFound = strstr(pszStr, szMonth);
4589
        if (pszMonthFound)
4590
        {
4591
            // Format of Apache, like in
4592
            // http://download.osgeo.org/gdal/data/gtiff/
4593
            // "17-May-2010 12:26"
4594
            const auto nMonthFoundLen = strlen(pszMonthFound);
4595
            if (pszMonthFound - pszStr > 2 && nMonthFoundLen > 15 &&
4596
                pszMonthFound[-2 + 11] == ' ' && pszMonthFound[-2 + 14] == ':')
4597
            {
4598
                pszMonthFound -= 2;
4599
                int nDay = atoi(pszMonthFound);
4600
                int nYear = atoi(pszMonthFound + 7);
4601
                int nHour = atoi(pszMonthFound + 12);
4602
                int nMin = atoi(pszMonthFound + 15);
4603
                if (nDay >= 1 && nDay <= 31 && nYear >= 1900 && nHour >= 0 &&
4604
                    nHour <= 24 && nMin >= 0 && nMin < 60)
4605
                {
4606
                    brokendowntime.tm_year = nYear - 1900;
4607
                    brokendowntime.tm_mon = iMonth;
4608
                    brokendowntime.tm_mday = nDay;
4609
                    brokendowntime.tm_hour = nHour;
4610
                    brokendowntime.tm_min = nMin;
4611
                    mTime = CPLYMDHMSToUnixTime(&brokendowntime);
4612
4613
                    if (nMonthFoundLen > 15 + 2)
4614
                    {
4615
                        const char *pszFilesize = pszMonthFound + 15 + 2;
4616
                        while (*pszFilesize == ' ')
4617
                            pszFilesize++;
4618
                        if (*pszFilesize >= '1' && *pszFilesize <= '9')
4619
                            nFileSize = CPLScanUIntBig(
4620
                                pszFilesize,
4621
                                static_cast<int>(strlen(pszFilesize)));
4622
                    }
4623
4624
                    return true;
4625
                }
4626
            }
4627
            return false;
4628
        }
4629
4630
        /* Microsoft IIS */
4631
        snprintf(szMonth, sizeof(szMonth), " %s ", apszMonths[iMonth]);
4632
        pszMonthFound = strstr(pszStr, szMonth);
4633
        if (pszMonthFound)
4634
        {
4635
            int nLenMonth = static_cast<int>(strlen(apszMonths[iMonth]));
4636
            if (pszMonthFound - pszStr > 2 && pszMonthFound[-1] != ',' &&
4637
                pszMonthFound[-2] != ' ' &&
4638
                static_cast<int>(strlen(pszMonthFound - 2)) >
4639
                    2 + 1 + nLenMonth + 1 + 4 + 1 + 5 + 1 + 4)
4640
            {
4641
                /* Format of http://ortho.linz.govt.nz/tifs/1994_95/ */
4642
                /* "        Friday, 21 April 2006 12:05 p.m.     48062343
4643
                 * m35a_fy_94_95.tif" */
4644
                pszMonthFound -= 2;
4645
                int nDay = atoi(pszMonthFound);
4646
                int nCurOffset = 2 + 1 + nLenMonth + 1;
4647
                int nYear = atoi(pszMonthFound + nCurOffset);
4648
                nCurOffset += 4 + 1;
4649
                int nHour = atoi(pszMonthFound + nCurOffset);
4650
                if (nHour < 10)
4651
                    nCurOffset += 1 + 1;
4652
                else
4653
                    nCurOffset += 2 + 1;
4654
                const int nMin = atoi(pszMonthFound + nCurOffset);
4655
                nCurOffset += 2 + 1;
4656
                if (STARTS_WITH(pszMonthFound + nCurOffset, "p.m."))
4657
                    nHour += 12;
4658
                else if (!STARTS_WITH(pszMonthFound + nCurOffset, "a.m."))
4659
                    nHour = -1;
4660
                nCurOffset += 4;
4661
4662
                const char *pszFilesize = pszMonthFound + nCurOffset;
4663
                while (*pszFilesize == ' ')
4664
                    pszFilesize++;
4665
                if (*pszFilesize >= '1' && *pszFilesize <= '9')
4666
                    nFileSize = CPLScanUIntBig(
4667
                        pszFilesize, static_cast<int>(strlen(pszFilesize)));
4668
4669
                if (nDay >= 1 && nDay <= 31 && nYear >= 1900 && nHour >= 0 &&
4670
                    nHour <= 24 && nMin >= 0 && nMin < 60)
4671
                {
4672
                    brokendowntime.tm_year = nYear - 1900;
4673
                    brokendowntime.tm_mon = iMonth;
4674
                    brokendowntime.tm_mday = nDay;
4675
                    brokendowntime.tm_hour = nHour;
4676
                    brokendowntime.tm_min = nMin;
4677
                    mTime = CPLYMDHMSToUnixTime(&brokendowntime);
4678
4679
                    return true;
4680
                }
4681
                nFileSize = 0;
4682
            }
4683
            else if (pszMonthFound - pszStr > 1 && pszMonthFound[-1] == ',' &&
4684
                     static_cast<int>(strlen(pszMonthFound)) >
4685
                         1 + nLenMonth + 1 + 2 + 1 + 1 + 4 + 1 + 5 + 1 + 2)
4686
            {
4687
                // Format of
4688
                // http://publicfiles.dep.state.fl.us/dear/BWR_GIS/2007NWFLULC/
4689
                // "        Sunday, June 20, 2010  6:46 PM    233170905
4690
                // NWF2007LULCForSDE.zip"
4691
                pszMonthFound += 1;
4692
                int nCurOffset = nLenMonth + 1;
4693
                int nDay = atoi(pszMonthFound + nCurOffset);
4694
                nCurOffset += 2 + 1 + 1;
4695
                int nYear = atoi(pszMonthFound + nCurOffset);
4696
                nCurOffset += 4 + 1;
4697
                int nHour = atoi(pszMonthFound + nCurOffset);
4698
                nCurOffset += 2 + 1;
4699
                const int nMin = atoi(pszMonthFound + nCurOffset);
4700
                nCurOffset += 2 + 1;
4701
                if (STARTS_WITH(pszMonthFound + nCurOffset, "PM"))
4702
                    nHour += 12;
4703
                else if (!STARTS_WITH(pszMonthFound + nCurOffset, "AM"))
4704
                    nHour = -1;
4705
                nCurOffset += 2;
4706
4707
                const char *pszFilesize = pszMonthFound + nCurOffset;
4708
                while (*pszFilesize == ' ')
4709
                    pszFilesize++;
4710
                if (*pszFilesize >= '1' && *pszFilesize <= '9')
4711
                    nFileSize = CPLScanUIntBig(
4712
                        pszFilesize, static_cast<int>(strlen(pszFilesize)));
4713
4714
                if (nDay >= 1 && nDay <= 31 && nYear >= 1900 && nHour >= 0 &&
4715
                    nHour <= 24 && nMin >= 0 && nMin < 60)
4716
                {
4717
                    brokendowntime.tm_year = nYear - 1900;
4718
                    brokendowntime.tm_mon = iMonth;
4719
                    brokendowntime.tm_mday = nDay;
4720
                    brokendowntime.tm_hour = nHour;
4721
                    brokendowntime.tm_min = nMin;
4722
                    mTime = CPLYMDHMSToUnixTime(&brokendowntime);
4723
4724
                    return true;
4725
                }
4726
                nFileSize = 0;
4727
            }
4728
            return false;
4729
        }
4730
    }
4731
4732
    return false;
4733
}
4734
4735
/************************************************************************/
4736
/*                          ParseHTMLFileList()                         */
4737
/*                                                                      */
4738
/*      Parse a file list document and return all the components.       */
4739
/************************************************************************/
4740
4741
char **VSICurlFilesystemHandlerBase::ParseHTMLFileList(const char *pszFilename,
4742
                                                       int nMaxFiles,
4743
                                                       char *pszData,
4744
                                                       bool *pbGotFileList)
4745
{
4746
    *pbGotFileList = false;
4747
4748
    std::string osURL(VSICurlGetURLFromFilename(pszFilename, nullptr, nullptr,
4749
                                                nullptr, nullptr, nullptr,
4750
                                                nullptr, nullptr, nullptr));
4751
    const char *pszDir = nullptr;
4752
    if (STARTS_WITH_CI(osURL.c_str(), "http://"))
4753
        pszDir = strchr(osURL.c_str() + strlen("http://"), '/');
4754
    else if (STARTS_WITH_CI(osURL.c_str(), "https://"))
4755
        pszDir = strchr(osURL.c_str() + strlen("https://"), '/');
4756
    else if (STARTS_WITH_CI(osURL.c_str(), "ftp://"))
4757
        pszDir = strchr(osURL.c_str() + strlen("ftp://"), '/');
4758
    if (pszDir == nullptr)
4759
        pszDir = "";
4760
4761
    /* Apache / Nginx */
4762
    /* Most of the time the format is <title>Index of {pszDir[/]}</title>, but
4763
     * there are special cases like https://cdn.star.nesdis.noaa.gov/GOES18/ABI/MESO/M1/GEOCOLOR/
4764
     * where a CDN stuff makes that the title is <title>Index of /ma-cdn02/GOES/data/GOES18/ABI/MESO/M1/GEOCOLOR/</title>
4765
     */
4766
    const std::string osTitleIndexOfPrefix = "<title>Index of ";
4767
    const std::string osExpectedSuffix = std::string(pszDir).append("</title>");
4768
    const std::string osExpectedSuffixWithSlash =
4769
        std::string(pszDir).append("/</title>");
4770
    /* FTP */
4771
    const std::string osExpectedStringFTP =
4772
        std::string("FTP Listing of ").append(pszDir).append("/");
4773
    /* Apache 1.3.33 */
4774
    const std::string osExpectedStringOldApache =
4775
        std::string("<TITLE>Index of ").append(pszDir).append("</TITLE>");
4776
4777
    // The listing of
4778
    // http://dds.cr.usgs.gov/srtm/SRTM_image_sample/picture%20examples/
4779
    // has
4780
    // "<title>Index of /srtm/SRTM_image_sample/picture examples</title>"
4781
    // so we must try unescaped %20 also.
4782
    // Similar with
4783
    // http://datalib.usask.ca/gis/Data/Central_America_goodbutdoweown%3f/
4784
    std::string osExpectedString_unescaped;
4785
    if (strchr(pszDir, '%'))
4786
    {
4787
        char *pszUnescapedDir = CPLUnescapeString(pszDir, nullptr, CPLES_URL);
4788
        osExpectedString_unescaped = osTitleIndexOfPrefix;
4789
        osExpectedString_unescaped += pszUnescapedDir;
4790
        osExpectedString_unescaped += "</title>";
4791
        CPLFree(pszUnescapedDir);
4792
    }
4793
4794
    char *c = nullptr;
4795
    int nCount = 0;
4796
    int nCountTable = 0;
4797
    CPLStringList oFileList;
4798
    char *pszLine = pszData;
4799
    bool bIsHTMLDirList = false;
4800
4801
    while ((c = VSICurlParserFindEOL(pszLine)) != nullptr)
4802
    {
4803
        *c = '\0';
4804
4805
        // To avoid false positive on pages such as
4806
        // http://www.ngs.noaa.gov/PC_PROD/USGG2009BETA
4807
        // This is a heuristics, but normal HTML listing of files have not more
4808
        // than one table.
4809
        if (strstr(pszLine, "<table"))
4810
        {
4811
            nCountTable++;
4812
            if (nCountTable == 2)
4813
            {
4814
                *pbGotFileList = false;
4815
                return nullptr;
4816
            }
4817
        }
4818
4819
        if (!bIsHTMLDirList &&
4820
            ((strstr(pszLine, osTitleIndexOfPrefix.c_str()) &&
4821
              (strstr(pszLine, osExpectedSuffix.c_str()) ||
4822
               strstr(pszLine, osExpectedSuffixWithSlash.c_str()))) ||
4823
             strstr(pszLine, osExpectedStringFTP.c_str()) ||
4824
             strstr(pszLine, osExpectedStringOldApache.c_str()) ||
4825
             (!osExpectedString_unescaped.empty() &&
4826
              strstr(pszLine, osExpectedString_unescaped.c_str()))))
4827
        {
4828
            bIsHTMLDirList = true;
4829
            *pbGotFileList = true;
4830
        }
4831
        // Subversion HTTP listing
4832
        // or Microsoft-IIS/6.0 listing
4833
        // (e.g. http://ortho.linz.govt.nz/tifs/2005_06/) */
4834
        else if (!bIsHTMLDirList && strstr(pszLine, "<title>"))
4835
        {
4836
            // Detect something like:
4837
            // <html><head><title>gdal - Revision 20739:
4838
            // /trunk/autotest/gcore/data</title></head> */ The annoying thing
4839
            // is that what is after ': ' is a subpart of what is after
4840
            // http://server/
4841
            char *pszSubDir = strstr(pszLine, ": ");
4842
            if (pszSubDir == nullptr)
4843
                // or <title>ortho.linz.govt.nz - /tifs/2005_06/</title>
4844
                pszSubDir = strstr(pszLine, "- ");
4845
            if (pszSubDir)
4846
            {
4847
                pszSubDir += 2;
4848
                char *pszTmp = strstr(pszSubDir, "</title>");
4849
                if (pszTmp)
4850
                {
4851
                    if (pszTmp[-1] == '/')
4852
                        pszTmp[-1] = 0;
4853
                    else
4854
                        *pszTmp = 0;
4855
                    if (strstr(pszDir, pszSubDir))
4856
                    {
4857
                        bIsHTMLDirList = true;
4858
                        *pbGotFileList = true;
4859
                    }
4860
                }
4861
            }
4862
        }
4863
        else if (bIsHTMLDirList &&
4864
                 (strstr(pszLine, "<a href=\"") != nullptr ||
4865
                  strstr(pszLine, "<A HREF=\"") != nullptr) &&
4866
                 // Exclude absolute links, like to subversion home.
4867
                 strstr(pszLine, "<a href=\"http://") == nullptr &&
4868
                 // exclude parent directory.
4869
                 strstr(pszLine, "Parent Directory") == nullptr)
4870
        {
4871
            char *beginFilename = strstr(pszLine, "<a href=\"");
4872
            if (beginFilename == nullptr)
4873
                beginFilename = strstr(pszLine, "<A HREF=\"");
4874
            beginFilename += strlen("<a href=\"");
4875
            char *endQuote = strchr(beginFilename, '"');
4876
            if (endQuote && !STARTS_WITH(beginFilename, "?C=") &&
4877
                !STARTS_WITH(beginFilename, "?N="))
4878
            {
4879
                struct tm brokendowntime;
4880
                memset(&brokendowntime, 0, sizeof(brokendowntime));
4881
                GUIntBig nFileSize = 0;
4882
                GIntBig mTime = 0;
4883
4884
                VSICurlParseHTMLDateTimeFileSize(pszLine, brokendowntime,
4885
                                                 nFileSize, mTime);
4886
4887
                *endQuote = '\0';
4888
4889
                // Remove trailing slash, that are returned for directories by
4890
                // Apache.
4891
                bool bIsDirectory = false;
4892
                if (endQuote[-1] == '/')
4893
                {
4894
                    bIsDirectory = true;
4895
                    endQuote[-1] = 0;
4896
                }
4897
4898
                // shttpd links include slashes from the root directory.
4899
                // Skip them.
4900
                while (strchr(beginFilename, '/'))
4901
                    beginFilename = strchr(beginFilename, '/') + 1;
4902
4903
                if (strcmp(beginFilename, ".") != 0 &&
4904
                    strcmp(beginFilename, "..") != 0)
4905
                {
4906
                    std::string osCachedFilename =
4907
                        CPLSPrintf("%s/%s", osURL.c_str(), beginFilename);
4908
4909
                    FileProp cachedFileProp;
4910
                    GetCachedFileProp(osCachedFilename.c_str(), cachedFileProp);
4911
                    cachedFileProp.eExists = EXIST_YES;
4912
                    cachedFileProp.bIsDirectory = bIsDirectory;
4913
                    cachedFileProp.mTime = static_cast<time_t>(mTime);
4914
                    cachedFileProp.bHasComputedFileSize = nFileSize > 0;
4915
                    cachedFileProp.fileSize = nFileSize;
4916
                    SetCachedFileProp(osCachedFilename.c_str(), cachedFileProp);
4917
4918
                    oFileList.AddString(beginFilename);
4919
                    if (ENABLE_DEBUG_VERBOSE)
4920
                    {
4921
                        CPLDebug(
4922
                            GetDebugKey(),
4923
                            "File[%d] = %s, is_dir = %d, size = " CPL_FRMT_GUIB
4924
                            ", time = %04d/%02d/%02d %02d:%02d:%02d",
4925
                            nCount, osCachedFilename.c_str(),
4926
                            bIsDirectory ? 1 : 0, nFileSize,
4927
                            brokendowntime.tm_year + 1900,
4928
                            brokendowntime.tm_mon + 1, brokendowntime.tm_mday,
4929
                            brokendowntime.tm_hour, brokendowntime.tm_min,
4930
                            brokendowntime.tm_sec);
4931
                    }
4932
                    nCount++;
4933
4934
                    if (nMaxFiles > 0 && oFileList.Count() > nMaxFiles)
4935
                        break;
4936
                }
4937
            }
4938
        }
4939
        pszLine = c + 1;
4940
    }
4941
4942
    return oFileList.StealList();
4943
}
4944
4945
/************************************************************************/
4946
/*                        GetStreamingFilename()                        */
4947
/************************************************************************/
4948
4949
std::string VSICurlFilesystemHandler::GetStreamingFilename(
4950
    const std::string &osFilename) const
4951
{
4952
    if (STARTS_WITH(osFilename.c_str(), GetFSPrefix().c_str()))
4953
        return "/vsicurl_streaming/" + osFilename.substr(GetFSPrefix().size());
4954
    return osFilename;
4955
}
4956
4957
/************************************************************************/
4958
/*                          VSICurlGetToken()                           */
4959
/************************************************************************/
4960
4961
static char *VSICurlGetToken(char *pszCurPtr, char **ppszNextToken)
4962
{
4963
    if (pszCurPtr == nullptr)
4964
        return nullptr;
4965
4966
    while ((*pszCurPtr) == ' ')
4967
        pszCurPtr++;
4968
    if (*pszCurPtr == '\0')
4969
        return nullptr;
4970
4971
    char *pszToken = pszCurPtr;
4972
    while ((*pszCurPtr) != ' ' && (*pszCurPtr) != '\0')
4973
        pszCurPtr++;
4974
    if (*pszCurPtr == '\0')
4975
    {
4976
        *ppszNextToken = nullptr;
4977
    }
4978
    else
4979
    {
4980
        *pszCurPtr = '\0';
4981
        pszCurPtr++;
4982
        while ((*pszCurPtr) == ' ')
4983
            pszCurPtr++;
4984
        *ppszNextToken = pszCurPtr;
4985
    }
4986
4987
    return pszToken;
4988
}
4989
4990
/************************************************************************/
4991
/*                      VSICurlParseFullFTPLine()                       */
4992
/************************************************************************/
4993
4994
/* Parse lines like the following ones :
4995
-rw-r--r--    1 10003    100           430 Jul 04  2008 COPYING
4996
lrwxrwxrwx    1 ftp      ftp            28 Jun 14 14:13 MPlayer ->
4997
mirrors/mplayerhq.hu/MPlayer -rw-r--r--    1 ftp      ftp      725614592 May 13
4998
20:13 Fedora-15-x86_64-Live-KDE.iso drwxr-xr-x  280 1003  1003  6656 Aug 26
4999
04:17 gnu
5000
*/
5001
5002
static bool VSICurlParseFullFTPLine(char *pszLine, char *&pszFilename,
5003
                                    bool &bSizeValid, GUIntBig &nSize,
5004
                                    bool &bIsDirectory, GIntBig &nUnixTime)
5005
{
5006
    char *pszNextToken = pszLine;
5007
    char *pszPermissions = VSICurlGetToken(pszNextToken, &pszNextToken);
5008
    if (pszPermissions == nullptr || strlen(pszPermissions) != 10)
5009
        return false;
5010
    bIsDirectory = pszPermissions[0] == 'd';
5011
5012
    for (int i = 0; i < 3; i++)
5013
    {
5014
        if (VSICurlGetToken(pszNextToken, &pszNextToken) == nullptr)
5015
            return false;
5016
    }
5017
5018
    char *pszSize = VSICurlGetToken(pszNextToken, &pszNextToken);
5019
    if (pszSize == nullptr)
5020
        return false;
5021
5022
    if (pszPermissions[0] == '-')
5023
    {
5024
        // Regular file.
5025
        bSizeValid = true;
5026
        nSize = CPLScanUIntBig(pszSize, static_cast<int>(strlen(pszSize)));
5027
    }
5028
5029
    struct tm brokendowntime;
5030
    memset(&brokendowntime, 0, sizeof(brokendowntime));
5031
    bool bBrokenDownTimeValid = true;
5032
5033
    char *pszMonth = VSICurlGetToken(pszNextToken, &pszNextToken);
5034
    if (pszMonth == nullptr || strlen(pszMonth) != 3)
5035
        return false;
5036
5037
    int i = 0;  // Used after for.
5038
    for (; i < 12; i++)
5039
    {
5040
        if (EQUALN(pszMonth, apszMonths[i], 3))
5041
            break;
5042
    }
5043
    if (i < 12)
5044
        brokendowntime.tm_mon = i;
5045
    else
5046
        bBrokenDownTimeValid = false;
5047
5048
    char *pszDay = VSICurlGetToken(pszNextToken, &pszNextToken);
5049
    if (pszDay == nullptr || (strlen(pszDay) != 1 && strlen(pszDay) != 2))
5050
        return false;
5051
    int nDay = atoi(pszDay);
5052
    if (nDay >= 1 && nDay <= 31)
5053
        brokendowntime.tm_mday = nDay;
5054
    else
5055
        bBrokenDownTimeValid = false;
5056
5057
    char *pszHourOrYear = VSICurlGetToken(pszNextToken, &pszNextToken);
5058
    if (pszHourOrYear == nullptr ||
5059
        (strlen(pszHourOrYear) != 4 && strlen(pszHourOrYear) != 5))
5060
        return false;
5061
    if (strlen(pszHourOrYear) == 4)
5062
    {
5063
        brokendowntime.tm_year = atoi(pszHourOrYear) - 1900;
5064
    }
5065
    else
5066
    {
5067
        time_t sTime;
5068
        time(&sTime);
5069
        struct tm currentBrokendowntime;
5070
        CPLUnixTimeToYMDHMS(static_cast<GIntBig>(sTime),
5071
                            &currentBrokendowntime);
5072
        brokendowntime.tm_year = currentBrokendowntime.tm_year;
5073
        brokendowntime.tm_hour = atoi(pszHourOrYear);
5074
        brokendowntime.tm_min = atoi(pszHourOrYear + 3);
5075
    }
5076
5077
    if (bBrokenDownTimeValid)
5078
        nUnixTime = CPLYMDHMSToUnixTime(&brokendowntime);
5079
    else
5080
        nUnixTime = 0;
5081
5082
    if (pszNextToken == nullptr)
5083
        return false;
5084
5085
    pszFilename = pszNextToken;
5086
5087
    char *pszCurPtr = pszFilename;
5088
    while (*pszCurPtr != '\0')
5089
    {
5090
        // In case of a link, stop before the pointed part of the link.
5091
        if (pszPermissions[0] == 'l' && STARTS_WITH(pszCurPtr, " -> "))
5092
        {
5093
            break;
5094
        }
5095
        pszCurPtr++;
5096
    }
5097
    *pszCurPtr = '\0';
5098
5099
    return true;
5100
}
5101
5102
/************************************************************************/
5103
/*                         GetURLFromFilename()                         */
5104
/************************************************************************/
5105
5106
std::string VSICurlFilesystemHandlerBase::GetURLFromFilename(
5107
    const std::string &osFilename) const
5108
{
5109
    return VSICurlGetURLFromFilename(osFilename.c_str(), nullptr, nullptr,
5110
                                     nullptr, nullptr, nullptr, nullptr,
5111
                                     nullptr, nullptr);
5112
}
5113
5114
/************************************************************************/
5115
/*                          RegisterEmptyDir()                          */
5116
/************************************************************************/
5117
5118
void VSICurlFilesystemHandlerBase::RegisterEmptyDir(
5119
    const std::string &osDirname)
5120
{
5121
    CachedDirList cachedDirList;
5122
    cachedDirList.bGotFileList = true;
5123
    cachedDirList.oFileList.AddString(".");
5124
    SetCachedDirList(osDirname.c_str(), cachedDirList);
5125
}
5126
5127
/************************************************************************/
5128
/*                            GetFileList()                             */
5129
/************************************************************************/
5130
5131
char **VSICurlFilesystemHandlerBase::GetFileList(const char *pszDirname,
5132
                                                 int nMaxFiles,
5133
                                                 bool *pbGotFileList)
5134
{
5135
    if (ENABLE_DEBUG)
5136
        CPLDebug(GetDebugKey(), "GetFileList(%s)", pszDirname);
5137
5138
    *pbGotFileList = false;
5139
5140
    bool bListDir = true;
5141
    bool bEmptyDir = false;
5142
    std::string osURL(VSICurlGetURLFromFilename(pszDirname, nullptr, nullptr,
5143
                                                nullptr, &bListDir, &bEmptyDir,
5144
                                                nullptr, nullptr, nullptr));
5145
    if (bEmptyDir)
5146
    {
5147
        *pbGotFileList = true;
5148
        return CSLAddString(nullptr, ".");
5149
    }
5150
    if (!bListDir)
5151
        return nullptr;
5152
5153
    // Deal with publicly visible Azure directories.
5154
    if (STARTS_WITH(osURL.c_str(), "https://"))
5155
    {
5156
        const char *pszBlobCore =
5157
            strstr(osURL.c_str(), ".blob.core.windows.net/");
5158
        if (pszBlobCore)
5159
        {
5160
            FileProp cachedFileProp;
5161
            GetCachedFileProp(osURL.c_str(), cachedFileProp);
5162
            if (cachedFileProp.bIsAzureFolder)
5163
            {
5164
                const char *pszURLWithoutHTTPS =
5165
                    osURL.c_str() + strlen("https://");
5166
                const std::string osStorageAccount(
5167
                    pszURLWithoutHTTPS, pszBlobCore - pszURLWithoutHTTPS);
5168
                CPLConfigOptionSetter oSetter1("AZURE_NO_SIGN_REQUEST", "YES",
5169
                                               false);
5170
                CPLConfigOptionSetter oSetter2("AZURE_STORAGE_ACCOUNT",
5171
                                               osStorageAccount.c_str(), false);
5172
                const std::string osVSIAZ(std::string("/vsiaz/").append(
5173
                    pszBlobCore + strlen(".blob.core.windows.net/")));
5174
                char **papszFileList = VSIReadDirEx(osVSIAZ.c_str(), nMaxFiles);
5175
                if (papszFileList)
5176
                {
5177
                    *pbGotFileList = true;
5178
                    return papszFileList;
5179
                }
5180
            }
5181
        }
5182
    }
5183
5184
    // HACK (optimization in fact) for MBTiles driver.
5185
    if (strstr(pszDirname, ".tiles.mapbox.com") != nullptr)
5186
        return nullptr;
5187
5188
    if (STARTS_WITH(osURL.c_str(), "ftp://"))
5189
    {
5190
        WriteFuncStruct sWriteFuncData;
5191
        sWriteFuncData.pBuffer = nullptr;
5192
5193
        std::string osDirname(osURL);
5194
        osDirname += '/';
5195
5196
        char **papszFileList = nullptr;
5197
5198
        CURLM *hCurlMultiHandle = GetCurlMultiHandleFor(osDirname);
5199
        CURL *hCurlHandle = curl_easy_init();
5200
5201
        for (int iTry = 0; iTry < 2; iTry++)
5202
        {
5203
            struct curl_slist *headers =
5204
                VSICurlSetOptions(hCurlHandle, osDirname.c_str(), nullptr);
5205
5206
            // On the first pass, we want to try fetching all the possible
5207
            // information (filename, file/directory, size). If that does not
5208
            // work, then try again with CURLOPT_DIRLISTONLY set.
5209
            if (iTry == 1)
5210
            {
5211
                unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_DIRLISTONLY, 1);
5212
            }
5213
5214
            VSICURLInitWriteFuncStruct(&sWriteFuncData, nullptr, nullptr,
5215
                                       nullptr);
5216
            unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA,
5217
                                       &sWriteFuncData);
5218
            unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION,
5219
                                       VSICurlHandleWriteFunc);
5220
5221
            char szCurlErrBuf[CURL_ERROR_SIZE + 1] = {};
5222
            unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER,
5223
                                       szCurlErrBuf);
5224
5225
            unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HTTPHEADER,
5226
                                       headers);
5227
5228
            VSICURLMultiPerform(hCurlMultiHandle, hCurlHandle);
5229
5230
            curl_slist_free_all(headers);
5231
5232
            if (sWriteFuncData.pBuffer == nullptr)
5233
            {
5234
                curl_easy_cleanup(hCurlHandle);
5235
                return nullptr;
5236
            }
5237
5238
            char *pszLine = sWriteFuncData.pBuffer;
5239
            char *c = nullptr;
5240
            int nCount = 0;
5241
5242
            if (STARTS_WITH_CI(pszLine, "<!DOCTYPE HTML") ||
5243
                STARTS_WITH_CI(pszLine, "<HTML>"))
5244
            {
5245
                papszFileList =
5246
                    ParseHTMLFileList(pszDirname, nMaxFiles,
5247
                                      sWriteFuncData.pBuffer, pbGotFileList);
5248
                break;
5249
            }
5250
            else if (iTry == 0)
5251
            {
5252
                CPLStringList oFileList;
5253
                *pbGotFileList = true;
5254
5255
                while ((c = strchr(pszLine, '\n')) != nullptr)
5256
                {
5257
                    *c = 0;
5258
                    if (c - pszLine > 0 && c[-1] == '\r')
5259
                        c[-1] = 0;
5260
5261
                    char *pszFilename = nullptr;
5262
                    bool bSizeValid = false;
5263
                    GUIntBig nFileSize = 0;
5264
                    bool bIsDirectory = false;
5265
                    GIntBig mUnixTime = 0;
5266
                    if (!VSICurlParseFullFTPLine(pszLine, pszFilename,
5267
                                                 bSizeValid, nFileSize,
5268
                                                 bIsDirectory, mUnixTime))
5269
                        break;
5270
5271
                    if (strcmp(pszFilename, ".") != 0 &&
5272
                        strcmp(pszFilename, "..") != 0)
5273
                    {
5274
                        if (CPLHasUnbalancedPathTraversal(pszFilename))
5275
                        {
5276
                            CPLError(CE_Warning, CPLE_AppDefined,
5277
                                     "Ignoring '%s' that has a path traversal "
5278
                                     "pattern",
5279
                                     pszFilename);
5280
                        }
5281
                        else
5282
                        {
5283
                            std::string osCachedFilename =
5284
                                CPLSPrintf("%s/%s", osURL.c_str(), pszFilename);
5285
5286
                            FileProp cachedFileProp;
5287
                            GetCachedFileProp(osCachedFilename.c_str(),
5288
                                              cachedFileProp);
5289
                            cachedFileProp.eExists = EXIST_YES;
5290
                            cachedFileProp.bIsDirectory = bIsDirectory;
5291
                            cachedFileProp.mTime =
5292
                                static_cast<time_t>(mUnixTime);
5293
                            cachedFileProp.bHasComputedFileSize = bSizeValid;
5294
                            cachedFileProp.fileSize = nFileSize;
5295
                            SetCachedFileProp(osCachedFilename.c_str(),
5296
                                              cachedFileProp);
5297
5298
                            oFileList.AddString(pszFilename);
5299
                            if (ENABLE_DEBUG_VERBOSE)
5300
                            {
5301
                                struct tm brokendowntime;
5302
                                CPLUnixTimeToYMDHMS(mUnixTime, &brokendowntime);
5303
                                CPLDebug(
5304
                                    GetDebugKey(),
5305
                                    "File[%d] = %s, is_dir = %d, size "
5306
                                    "= " CPL_FRMT_GUIB
5307
                                    ", time = %04d/%02d/%02d %02d:%02d:%02d",
5308
                                    nCount, pszFilename, bIsDirectory ? 1 : 0,
5309
                                    nFileSize, brokendowntime.tm_year + 1900,
5310
                                    brokendowntime.tm_mon + 1,
5311
                                    brokendowntime.tm_mday,
5312
                                    brokendowntime.tm_hour,
5313
                                    brokendowntime.tm_min,
5314
                                    brokendowntime.tm_sec);
5315
                            }
5316
5317
                            nCount++;
5318
5319
                            if (nMaxFiles > 0 && oFileList.Count() > nMaxFiles)
5320
                                break;
5321
                        }
5322
                    }
5323
5324
                    pszLine = c + 1;
5325
                }
5326
5327
                if (c == nullptr)
5328
                {
5329
                    papszFileList = oFileList.StealList();
5330
                    break;
5331
                }
5332
            }
5333
            else
5334
            {
5335
                CPLStringList oFileList;
5336
                *pbGotFileList = true;
5337
5338
                while ((c = strchr(pszLine, '\n')) != nullptr)
5339
                {
5340
                    *c = 0;
5341
                    if (c - pszLine > 0 && c[-1] == '\r')
5342
                        c[-1] = 0;
5343
5344
                    if (strcmp(pszLine, ".") != 0 && strcmp(pszLine, "..") != 0)
5345
                    {
5346
                        oFileList.AddString(pszLine);
5347
                        if (ENABLE_DEBUG_VERBOSE)
5348
                        {
5349
                            CPLDebug(GetDebugKey(), "File[%d] = %s", nCount,
5350
                                     pszLine);
5351
                        }
5352
                        nCount++;
5353
                    }
5354
5355
                    pszLine = c + 1;
5356
                }
5357
5358
                papszFileList = oFileList.StealList();
5359
            }
5360
5361
            CPLFree(sWriteFuncData.pBuffer);
5362
            sWriteFuncData.pBuffer = nullptr;
5363
        }
5364
5365
        CPLFree(sWriteFuncData.pBuffer);
5366
        curl_easy_cleanup(hCurlHandle);
5367
5368
        return papszFileList;
5369
    }
5370
5371
    // Try to recognize HTML pages that list the content of a directory.
5372
    // Currently this supports what Apache and shttpd can return.
5373
    else if (STARTS_WITH(osURL.c_str(), "http://") ||
5374
             STARTS_WITH(osURL.c_str(), "https://"))
5375
    {
5376
        std::string osDirname(std::move(osURL));
5377
        osDirname += '/';
5378
5379
        CURLM *hCurlMultiHandle = GetCurlMultiHandleFor(osDirname);
5380
        CURL *hCurlHandle = curl_easy_init();
5381
5382
        struct curl_slist *headers =
5383
            VSICurlSetOptions(hCurlHandle, osDirname.c_str(), nullptr);
5384
5385
        unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, nullptr);
5386
5387
        WriteFuncStruct sWriteFuncData;
5388
        VSICURLInitWriteFuncStruct(&sWriteFuncData, nullptr, nullptr, nullptr);
5389
        unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA,
5390
                                   &sWriteFuncData);
5391
        unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION,
5392
                                   VSICurlHandleWriteFunc);
5393
5394
        char szCurlErrBuf[CURL_ERROR_SIZE + 1] = {};
5395
        unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER,
5396
                                   szCurlErrBuf);
5397
5398
        unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HTTPHEADER, headers);
5399
5400
        VSICURLMultiPerform(hCurlMultiHandle, hCurlHandle);
5401
5402
        curl_slist_free_all(headers);
5403
5404
        NetworkStatisticsLogger::LogGET(sWriteFuncData.nSize);
5405
5406
        if (sWriteFuncData.pBuffer == nullptr)
5407
        {
5408
            curl_easy_cleanup(hCurlHandle);
5409
            return nullptr;
5410
        }
5411
5412
        char **papszFileList = nullptr;
5413
        if (STARTS_WITH_CI(sWriteFuncData.pBuffer, "<?xml") &&
5414
            strstr(sWriteFuncData.pBuffer, "<ListBucketResult") != nullptr)
5415
        {
5416
            CPLStringList osFileList;
5417
            std::string osBaseURL(pszDirname);
5418
            osBaseURL += "/";
5419
            bool bIsTruncated = true;
5420
            bool ret = AnalyseS3FileList(
5421
                osBaseURL, sWriteFuncData.pBuffer, osFileList, nMaxFiles,
5422
                GetS3IgnoredStorageClasses(), bIsTruncated);
5423
            // If the list is truncated, then don't report it.
5424
            if (ret && !bIsTruncated)
5425
            {
5426
                if (osFileList.empty())
5427
                {
5428
                    // To avoid an error to be reported
5429
                    osFileList.AddString(".");
5430
                }
5431
                papszFileList = osFileList.StealList();
5432
                *pbGotFileList = true;
5433
            }
5434
        }
5435
        else
5436
        {
5437
            papszFileList = ParseHTMLFileList(
5438
                pszDirname, nMaxFiles, sWriteFuncData.pBuffer, pbGotFileList);
5439
        }
5440
5441
        CPLFree(sWriteFuncData.pBuffer);
5442
        curl_easy_cleanup(hCurlHandle);
5443
        return papszFileList;
5444
    }
5445
5446
    return nullptr;
5447
}
5448
5449
/************************************************************************/
5450
/*                     GetS3IgnoredStorageClasses()                     */
5451
/************************************************************************/
5452
5453
std::set<std::string> VSICurlFilesystemHandlerBase::GetS3IgnoredStorageClasses()
5454
{
5455
    std::set<std::string> oSetIgnoredStorageClasses;
5456
    const char *pszIgnoredStorageClasses =
5457
        CPLGetConfigOption("CPL_VSIL_CURL_IGNORE_STORAGE_CLASSES", nullptr);
5458
    const char *pszIgnoreGlacierStorage =
5459
        CPLGetConfigOption("CPL_VSIL_CURL_IGNORE_GLACIER_STORAGE", nullptr);
5460
    CPLStringList aosIgnoredStorageClasses(
5461
        CSLTokenizeString2(pszIgnoredStorageClasses ? pszIgnoredStorageClasses
5462
                                                    : "GLACIER,DEEP_ARCHIVE",
5463
                           ",", 0));
5464
    for (int i = 0; i < aosIgnoredStorageClasses.size(); ++i)
5465
        oSetIgnoredStorageClasses.insert(aosIgnoredStorageClasses[i]);
5466
    if (pszIgnoredStorageClasses == nullptr &&
5467
        pszIgnoreGlacierStorage != nullptr &&
5468
        !CPLTestBool(pszIgnoreGlacierStorage))
5469
    {
5470
        oSetIgnoredStorageClasses.clear();
5471
    }
5472
    return oSetIgnoredStorageClasses;
5473
}
5474
5475
/************************************************************************/
5476
/*                                Stat()                                */
5477
/************************************************************************/
5478
5479
int VSICurlFilesystemHandlerBase::Stat(const char *pszFilename,
5480
                                       VSIStatBufL *pStatBuf, int nFlags)
5481
{
5482
    if (!STARTS_WITH_CI(pszFilename, GetFSPrefix().c_str()) &&
5483
        !STARTS_WITH_CI(pszFilename, "/vsicurl?"))
5484
        return -1;
5485
5486
    memset(pStatBuf, 0, sizeof(VSIStatBufL));
5487
5488
    if ((nFlags & VSI_STAT_CACHE_ONLY) != 0)
5489
    {
5490
        cpl::FileProp oFileProp;
5491
        if (!GetCachedFileProp(GetURLFromFilename(pszFilename).c_str(),
5492
                               oFileProp) ||
5493
            oFileProp.eExists != EXIST_YES)
5494
        {
5495
            return -1;
5496
        }
5497
        pStatBuf->st_mode = static_cast<unsigned short>(oFileProp.nMode);
5498
        pStatBuf->st_mtime = oFileProp.mTime;
5499
        pStatBuf->st_size = oFileProp.fileSize;
5500
        return 0;
5501
    }
5502
5503
    NetworkStatisticsFileSystem oContextFS(GetFSPrefix().c_str());
5504
    NetworkStatisticsAction oContextAction("Stat");
5505
5506
    const std::string osFilename(pszFilename);
5507
5508
    if (!IsAllowedFilename(pszFilename))
5509
        return -1;
5510
5511
    bool bListDir = true;
5512
    bool bEmptyDir = false;
5513
    std::string osURL(VSICurlGetURLFromFilename(pszFilename, nullptr, nullptr,
5514
                                                nullptr, &bListDir, &bEmptyDir,
5515
                                                nullptr, nullptr, nullptr));
5516
5517
    const char *pszOptionVal = VSIGetPathSpecificOption(
5518
        pszFilename, "GDAL_DISABLE_READDIR_ON_OPEN", "NO");
5519
    const bool bSkipReadDir =
5520
        !bListDir || bEmptyDir || EQUAL(pszOptionVal, "EMPTY_DIR") ||
5521
        CPLTestBool(pszOptionVal) || !AllowCachedDataFor(pszFilename);
5522
5523
    // Does it look like a FTP directory?
5524
    if (STARTS_WITH(osURL.c_str(), "ftp://") && osFilename.back() == '/' &&
5525
        !bSkipReadDir)
5526
    {
5527
        char **papszFileList = ReadDirEx(osFilename.c_str(), 0);
5528
        if (papszFileList)
5529
        {
5530
            pStatBuf->st_mode = S_IFDIR;
5531
            pStatBuf->st_size = 0;
5532
5533
            CSLDestroy(papszFileList);
5534
5535
            return 0;
5536
        }
5537
        return -1;
5538
    }
5539
    else if (strchr(CPLGetFilename(osFilename.c_str()), '.') != nullptr &&
5540
             !STARTS_WITH_CI(CPLGetExtensionSafe(osFilename.c_str()).c_str(),
5541
                             "zip") &&
5542
             strstr(osFilename.c_str(), ".zip.") != nullptr &&
5543
             strstr(osFilename.c_str(), ".ZIP.") != nullptr && !bSkipReadDir)
5544
    {
5545
        bool bGotFileList = false;
5546
        char **papszFileList = ReadDirInternal(
5547
            CPLGetDirnameSafe(osFilename.c_str()).c_str(), 0, &bGotFileList);
5548
        const bool bFound =
5549
            VSICurlIsFileInList(papszFileList,
5550
                                CPLGetFilename(osFilename.c_str())) != -1;
5551
        CSLDestroy(papszFileList);
5552
        if (bGotFileList && !bFound)
5553
        {
5554
            return -1;
5555
        }
5556
    }
5557
5558
    VSICurlHandle *poHandle = CreateFileHandle(osFilename.c_str());
5559
    if (poHandle == nullptr)
5560
        return -1;
5561
5562
    if (poHandle->IsKnownFileSize() ||
5563
        ((nFlags & VSI_STAT_SIZE_FLAG) && !poHandle->IsDirectory() &&
5564
         CPLTestBool(CPLGetConfigOption("CPL_VSIL_CURL_SLOW_GET_SIZE", "YES"))))
5565
    {
5566
        pStatBuf->st_size = poHandle->GetFileSize(true);
5567
    }
5568
5569
    const int nRet =
5570
        poHandle->Exists((nFlags & VSI_STAT_SET_ERROR_FLAG) > 0) ? 0 : -1;
5571
    pStatBuf->st_mtime = poHandle->GetMTime();
5572
    pStatBuf->st_mode = static_cast<unsigned short>(poHandle->GetMode());
5573
    if (pStatBuf->st_mode == 0)
5574
        pStatBuf->st_mode = poHandle->IsDirectory() ? S_IFDIR : S_IFREG;
5575
    delete poHandle;
5576
    return nRet;
5577
}
5578
5579
/************************************************************************/
5580
/*                          ReadDirInternal()                           */
5581
/************************************************************************/
5582
5583
char **VSICurlFilesystemHandlerBase::ReadDirInternal(const char *pszDirname,
5584
                                                     int nMaxFiles,
5585
                                                     bool *pbGotFileList)
5586
{
5587
    std::string osDirname(pszDirname);
5588
5589
    // Replace a/b/../c by a/c
5590
    const auto posSlashDotDot = osDirname.find("/..");
5591
    if (posSlashDotDot != std::string::npos && posSlashDotDot >= 1)
5592
    {
5593
        const auto posPrecedingSlash =
5594
            osDirname.find_last_of('/', posSlashDotDot - 1);
5595
        if (posPrecedingSlash != std::string::npos && posPrecedingSlash >= 1)
5596
        {
5597
            osDirname.erase(osDirname.begin() + posPrecedingSlash,
5598
                            osDirname.begin() + posSlashDotDot + strlen("/.."));
5599
        }
5600
    }
5601
5602
    std::string osDirnameOri(osDirname);
5603
    if (osDirname + "/" == GetFSPrefix())
5604
    {
5605
        osDirname += "/";
5606
    }
5607
    else if (osDirname != GetFSPrefix())
5608
    {
5609
        while (!osDirname.empty() && osDirname.back() == '/')
5610
            osDirname.erase(osDirname.size() - 1);
5611
    }
5612
5613
    if (osDirname.size() < GetFSPrefix().size())
5614
    {
5615
        if (pbGotFileList)
5616
            *pbGotFileList = true;
5617
        return nullptr;
5618
    }
5619
5620
    NetworkStatisticsFileSystem oContextFS(GetFSPrefix().c_str());
5621
    NetworkStatisticsAction oContextAction("ReadDir");
5622
5623
    CPLMutexHolder oHolder(&hMutex);
5624
5625
    // If we know the file exists and is not a directory,
5626
    // then don't try to list its content.
5627
    FileProp cachedFileProp;
5628
    if (GetCachedFileProp(GetURLFromFilename(osDirname.c_str()).c_str(),
5629
                          cachedFileProp) &&
5630
        cachedFileProp.eExists == EXIST_YES && !cachedFileProp.bIsDirectory)
5631
    {
5632
        if (osDirnameOri != osDirname)
5633
        {
5634
            if (GetCachedFileProp((GetURLFromFilename(osDirname) + "/").c_str(),
5635
                                  cachedFileProp) &&
5636
                cachedFileProp.eExists == EXIST_YES &&
5637
                !cachedFileProp.bIsDirectory)
5638
            {
5639
                if (pbGotFileList)
5640
                    *pbGotFileList = true;
5641
                return nullptr;
5642
            }
5643
        }
5644
        else
5645
        {
5646
            if (pbGotFileList)
5647
                *pbGotFileList = true;
5648
            return nullptr;
5649
        }
5650
    }
5651
5652
    CachedDirList cachedDirList;
5653
    if (!GetCachedDirList(osDirname.c_str(), cachedDirList))
5654
    {
5655
        cachedDirList.oFileList.Assign(GetFileList(osDirname.c_str(), nMaxFiles,
5656
                                                   &cachedDirList.bGotFileList),
5657
                                       true);
5658
        if (cachedDirList.bGotFileList && cachedDirList.oFileList.empty())
5659
        {
5660
            // To avoid an error to be reported
5661
            cachedDirList.oFileList.AddString(".");
5662
        }
5663
        if (nMaxFiles <= 0 || cachedDirList.oFileList.size() < nMaxFiles)
5664
        {
5665
            // Only cache content if we didn't hit the limitation
5666
            SetCachedDirList(osDirname.c_str(), cachedDirList);
5667
        }
5668
    }
5669
5670
    if (pbGotFileList)
5671
        *pbGotFileList = cachedDirList.bGotFileList;
5672
5673
    return CSLDuplicate(cachedDirList.oFileList.List());
5674
}
5675
5676
/************************************************************************/
5677
/*                        InvalidateDirContent()                        */
5678
/************************************************************************/
5679
5680
void VSICurlFilesystemHandlerBase::InvalidateDirContent(
5681
    const std::string &osDirname)
5682
{
5683
    CPLMutexHolder oHolder(&hMutex);
5684
5685
    CachedDirList oCachedDirList;
5686
    if (oCacheDirList.tryGet(osDirname, oCachedDirList))
5687
    {
5688
        nCachedFilesInDirList -= oCachedDirList.oFileList.size();
5689
        oCacheDirList.remove(osDirname);
5690
    }
5691
}
5692
5693
/************************************************************************/
5694
/*                             ReadDirEx()                              */
5695
/************************************************************************/
5696
5697
char **VSICurlFilesystemHandlerBase::ReadDirEx(const char *pszDirname,
5698
                                               int nMaxFiles)
5699
{
5700
    return ReadDirInternal(pszDirname, nMaxFiles, nullptr);
5701
}
5702
5703
/************************************************************************/
5704
/*                            SiblingFiles()                            */
5705
/************************************************************************/
5706
5707
char **VSICurlFilesystemHandlerBase::SiblingFiles(const char *pszFilename)
5708
{
5709
    /* Small optimization to avoid unnecessary stat'ing from PAux or ENVI */
5710
    /* drivers. The MBTiles driver needs no companion file. */
5711
    if (EQUAL(CPLGetExtensionSafe(pszFilename).c_str(), "mbtiles"))
5712
    {
5713
        return static_cast<char **>(CPLCalloc(1, sizeof(char *)));
5714
    }
5715
    return nullptr;
5716
}
5717
5718
/************************************************************************/
5719
/*                          GetFileMetadata()                           */
5720
/************************************************************************/
5721
5722
char **VSICurlFilesystemHandlerBase::GetFileMetadata(const char *pszFilename,
5723
                                                     const char *pszDomain,
5724
                                                     CSLConstList)
5725
{
5726
    if (pszDomain == nullptr || !EQUAL(pszDomain, "HEADERS"))
5727
        return nullptr;
5728
    std::unique_ptr<VSICurlHandle> poHandle(CreateFileHandle(pszFilename));
5729
    if (poHandle == nullptr)
5730
        return nullptr;
5731
5732
    NetworkStatisticsFileSystem oContextFS(GetFSPrefix().c_str());
5733
    NetworkStatisticsAction oContextAction("GetFileMetadata");
5734
5735
    poHandle->GetFileSizeOrHeaders(true, true);
5736
    return CSLDuplicate(poHandle->GetHeaders().List());
5737
}
5738
5739
/************************************************************************/
5740
/*                        VSIAppendWriteHandle()                        */
5741
/************************************************************************/
5742
5743
VSIAppendWriteHandle::VSIAppendWriteHandle(VSICurlFilesystemHandlerBase *poFS,
5744
                                           const char *pszFSPrefix,
5745
                                           const char *pszFilename,
5746
                                           int nChunkSize)
5747
    : m_poFS(poFS), m_osFSPrefix(pszFSPrefix), m_osFilename(pszFilename),
5748
      m_oRetryParameters(CPLStringList(CPLHTTPGetOptionsFromEnv(pszFilename))),
5749
      m_nBufferSize(nChunkSize)
5750
{
5751
    m_pabyBuffer = static_cast<GByte *>(VSIMalloc(m_nBufferSize));
5752
    if (m_pabyBuffer == nullptr)
5753
    {
5754
        CPLError(CE_Failure, CPLE_AppDefined,
5755
                 "Cannot allocate working buffer for %s writing",
5756
                 m_osFSPrefix.c_str());
5757
    }
5758
}
5759
5760
/************************************************************************/
5761
/*                       ~VSIAppendWriteHandle()                        */
5762
/************************************************************************/
5763
5764
VSIAppendWriteHandle::~VSIAppendWriteHandle()
5765
{
5766
    /* WARNING: implementation should call Close() themselves */
5767
    /* cannot be done safely from here, since Send() can be called. */
5768
    CPLFree(m_pabyBuffer);
5769
}
5770
5771
/************************************************************************/
5772
/*                                Seek()                                */
5773
/************************************************************************/
5774
5775
int VSIAppendWriteHandle::Seek(vsi_l_offset nOffset, int nWhence)
5776
{
5777
    if (!((nWhence == SEEK_SET && nOffset == m_nCurOffset) ||
5778
          (nWhence == SEEK_CUR && nOffset == 0) ||
5779
          (nWhence == SEEK_END && nOffset == 0)))
5780
    {
5781
        CPLError(CE_Failure, CPLE_NotSupported,
5782
                 "Seek not supported on writable %s files",
5783
                 m_osFSPrefix.c_str());
5784
        m_bError = true;
5785
        return -1;
5786
    }
5787
    return 0;
5788
}
5789
5790
/************************************************************************/
5791
/*                                Tell()                                */
5792
/************************************************************************/
5793
5794
vsi_l_offset VSIAppendWriteHandle::Tell()
5795
{
5796
    return m_nCurOffset;
5797
}
5798
5799
/************************************************************************/
5800
/*                                Read()                                */
5801
/************************************************************************/
5802
5803
size_t VSIAppendWriteHandle::Read(void * /* pBuffer */, size_t /* nBytes */)
5804
{
5805
    CPLError(CE_Failure, CPLE_NotSupported,
5806
             "Read not supported on writable %s files", m_osFSPrefix.c_str());
5807
    m_bError = true;
5808
    return 0;
5809
}
5810
5811
/************************************************************************/
5812
/*                         ReadCallBackBuffer()                         */
5813
/************************************************************************/
5814
5815
size_t VSIAppendWriteHandle::ReadCallBackBuffer(char *buffer, size_t size,
5816
                                                size_t nitems, void *instream)
5817
{
5818
    VSIAppendWriteHandle *poThis =
5819
        static_cast<VSIAppendWriteHandle *>(instream);
5820
    const int nSizeMax = static_cast<int>(size * nitems);
5821
    const int nSizeToWrite = std::min(
5822
        nSizeMax, poThis->m_nBufferOff - poThis->m_nBufferOffReadCallback);
5823
    memcpy(buffer, poThis->m_pabyBuffer + poThis->m_nBufferOffReadCallback,
5824
           nSizeToWrite);
5825
    poThis->m_nBufferOffReadCallback += nSizeToWrite;
5826
    return nSizeToWrite;
5827
}
5828
5829
/************************************************************************/
5830
/*                               Write()                                */
5831
/************************************************************************/
5832
5833
size_t VSIAppendWriteHandle::Write(const void *pBuffer, size_t nBytes)
5834
{
5835
    if (m_bError)
5836
        return 0;
5837
5838
    size_t nBytesToWrite = nBytes;
5839
    if (nBytesToWrite == 0)
5840
        return 0;
5841
5842
    const GByte *pabySrcBuffer = reinterpret_cast<const GByte *>(pBuffer);
5843
    while (nBytesToWrite > 0)
5844
    {
5845
        if (m_nBufferOff == m_nBufferSize)
5846
        {
5847
            if (!Send(false))
5848
            {
5849
                m_bError = true;
5850
                return 0;
5851
            }
5852
            m_nBufferOff = 0;
5853
        }
5854
5855
        const int nToWriteInBuffer = static_cast<int>(std::min(
5856
            static_cast<size_t>(m_nBufferSize - m_nBufferOff), nBytesToWrite));
5857
        memcpy(m_pabyBuffer + m_nBufferOff, pabySrcBuffer, nToWriteInBuffer);
5858
        pabySrcBuffer += nToWriteInBuffer;
5859
        m_nBufferOff += nToWriteInBuffer;
5860
        m_nCurOffset += nToWriteInBuffer;
5861
        nBytesToWrite -= nToWriteInBuffer;
5862
    }
5863
    return nBytes;
5864
}
5865
5866
/************************************************************************/
5867
/*                               Close()                                */
5868
/************************************************************************/
5869
5870
int VSIAppendWriteHandle::Close()
5871
{
5872
    int nRet = 0;
5873
    if (!m_bClosed)
5874
    {
5875
        m_bClosed = true;
5876
        if (!m_bError && !Send(true))
5877
            nRet = -1;
5878
    }
5879
    return nRet;
5880
}
5881
5882
/************************************************************************/
5883
/*                         CurlRequestHelper()                          */
5884
/************************************************************************/
5885
5886
CurlRequestHelper::CurlRequestHelper()
5887
{
5888
    VSICURLInitWriteFuncStruct(&sWriteFuncData, nullptr, nullptr, nullptr);
5889
    VSICURLInitWriteFuncStruct(&sWriteFuncHeaderData, nullptr, nullptr,
5890
                               nullptr);
5891
}
5892
5893
/************************************************************************/
5894
/*                         ~CurlRequestHelper()                         */
5895
/************************************************************************/
5896
5897
CurlRequestHelper::~CurlRequestHelper()
5898
{
5899
    CPLFree(sWriteFuncData.pBuffer);
5900
    CPLFree(sWriteFuncHeaderData.pBuffer);
5901
}
5902
5903
/************************************************************************/
5904
/*                              perform()                               */
5905
/************************************************************************/
5906
5907
long CurlRequestHelper::perform(CURL *hCurlHandle, struct curl_slist *headers,
5908
                                VSICurlFilesystemHandlerBase *poFS,
5909
                                IVSIS3LikeHandleHelper *poS3HandleHelper)
5910
{
5911
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HTTPHEADER, headers);
5912
5913
    poS3HandleHelper->ResetQueryParameters();
5914
5915
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData);
5916
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION,
5917
                               VSICurlHandleWriteFunc);
5918
5919
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA,
5920
                               &sWriteFuncHeaderData);
5921
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION,
5922
                               VSICurlHandleWriteFunc);
5923
5924
    szCurlErrBuf[0] = '\0';
5925
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, szCurlErrBuf);
5926
5927
    VSICURLMultiPerform(poFS->GetCurlMultiHandleFor(poS3HandleHelper->GetURL()),
5928
                        hCurlHandle);
5929
5930
    VSICURLResetHeaderAndWriterFunctions(hCurlHandle);
5931
5932
    curl_slist_free_all(headers);
5933
5934
    long response_code = 0;
5935
    curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE, &response_code);
5936
    return response_code;
5937
}
5938
5939
/************************************************************************/
5940
/*                       NetworkStatisticsLogger                        */
5941
/************************************************************************/
5942
5943
// Global variable
5944
NetworkStatisticsLogger NetworkStatisticsLogger::gInstance{};
5945
int NetworkStatisticsLogger::gnEnabled = -1;  // unknown state
5946
5947
static void ShowNetworkStats()
5948
{
5949
    printf("Network statistics:\n%s\n",  // ok
5950
           NetworkStatisticsLogger::GetReportAsSerializedJSON().c_str());
5951
}
5952
5953
void NetworkStatisticsLogger::ReadEnabled()
5954
{
5955
    const bool bShowNetworkStats =
5956
        CPLTestBool(CPLGetConfigOption("CPL_VSIL_SHOW_NETWORK_STATS", "NO"));
5957
    gnEnabled =
5958
        (bShowNetworkStats || CPLTestBool(CPLGetConfigOption(
5959
                                  "CPL_VSIL_NETWORK_STATS_ENABLED", "NO")))
5960
            ? TRUE
5961
            : FALSE;
5962
    if (bShowNetworkStats)
5963
    {
5964
        static bool bRegistered = false;
5965
        if (!bRegistered)
5966
        {
5967
            bRegistered = true;
5968
            atexit(ShowNetworkStats);
5969
        }
5970
    }
5971
}
5972
5973
void NetworkStatisticsLogger::EnterFileSystem(const char *pszName)
5974
{
5975
    if (!IsEnabled())
5976
        return;
5977
    std::lock_guard<std::mutex> oLock(gInstance.m_mutex);
5978
    gInstance.m_mapThreadIdToContextPath[CPLGetPID()].push_back(
5979
        ContextPathItem(ContextPathType::FILESYSTEM, pszName));
5980
}
5981
5982
void NetworkStatisticsLogger::LeaveFileSystem()
5983
{
5984
    if (!IsEnabled())
5985
        return;
5986
    std::lock_guard<std::mutex> oLock(gInstance.m_mutex);
5987
    gInstance.m_mapThreadIdToContextPath[CPLGetPID()].pop_back();
5988
}
5989
5990
void NetworkStatisticsLogger::EnterFile(const char *pszName)
5991
{
5992
    if (!IsEnabled())
5993
        return;
5994
    std::lock_guard<std::mutex> oLock(gInstance.m_mutex);
5995
    gInstance.m_mapThreadIdToContextPath[CPLGetPID()].push_back(
5996
        ContextPathItem(ContextPathType::FILE, pszName));
5997
}
5998
5999
void NetworkStatisticsLogger::LeaveFile()
6000
{
6001
    if (!IsEnabled())
6002
        return;
6003
    std::lock_guard<std::mutex> oLock(gInstance.m_mutex);
6004
    gInstance.m_mapThreadIdToContextPath[CPLGetPID()].pop_back();
6005
}
6006
6007
void NetworkStatisticsLogger::EnterAction(const char *pszName)
6008
{
6009
    if (!IsEnabled())
6010
        return;
6011
    std::lock_guard<std::mutex> oLock(gInstance.m_mutex);
6012
    gInstance.m_mapThreadIdToContextPath[CPLGetPID()].push_back(
6013
        ContextPathItem(ContextPathType::ACTION, pszName));
6014
}
6015
6016
void NetworkStatisticsLogger::LeaveAction()
6017
{
6018
    if (!IsEnabled())
6019
        return;
6020
    std::lock_guard<std::mutex> oLock(gInstance.m_mutex);
6021
    gInstance.m_mapThreadIdToContextPath[CPLGetPID()].pop_back();
6022
}
6023
6024
std::vector<NetworkStatisticsLogger::Counters *>
6025
NetworkStatisticsLogger::GetCountersForContext()
6026
{
6027
    std::vector<Counters *> v;
6028
    const auto &contextPath = gInstance.m_mapThreadIdToContextPath[CPLGetPID()];
6029
6030
    Stats *curStats = &m_stats;
6031
    v.push_back(&(curStats->counters));
6032
6033
    bool inFileSystem = false;
6034
    bool inFile = false;
6035
    bool inAction = false;
6036
    for (const auto &item : contextPath)
6037
    {
6038
        if (item.eType == ContextPathType::FILESYSTEM)
6039
        {
6040
            if (inFileSystem)
6041
                continue;
6042
            inFileSystem = true;
6043
        }
6044
        else if (item.eType == ContextPathType::FILE)
6045
        {
6046
            if (inFile)
6047
                continue;
6048
            inFile = true;
6049
        }
6050
        else if (item.eType == ContextPathType::ACTION)
6051
        {
6052
            if (inAction)
6053
                continue;
6054
            inAction = true;
6055
        }
6056
6057
        curStats = &(curStats->children[item]);
6058
        v.push_back(&(curStats->counters));
6059
    }
6060
6061
    return v;
6062
}
6063
6064
void NetworkStatisticsLogger::LogGET(size_t nDownloadedBytes)
6065
{
6066
    if (!IsEnabled())
6067
        return;
6068
    std::lock_guard<std::mutex> oLock(gInstance.m_mutex);
6069
    for (auto counters : gInstance.GetCountersForContext())
6070
    {
6071
        counters->nGET++;
6072
        counters->nGETDownloadedBytes += nDownloadedBytes;
6073
    }
6074
}
6075
6076
void NetworkStatisticsLogger::LogPUT(size_t nUploadedBytes)
6077
{
6078
    if (!IsEnabled())
6079
        return;
6080
    std::lock_guard<std::mutex> oLock(gInstance.m_mutex);
6081
    for (auto counters : gInstance.GetCountersForContext())
6082
    {
6083
        counters->nPUT++;
6084
        counters->nPUTUploadedBytes += nUploadedBytes;
6085
    }
6086
}
6087
6088
void NetworkStatisticsLogger::LogHEAD()
6089
{
6090
    if (!IsEnabled())
6091
        return;
6092
    std::lock_guard<std::mutex> oLock(gInstance.m_mutex);
6093
    for (auto counters : gInstance.GetCountersForContext())
6094
    {
6095
        counters->nHEAD++;
6096
    }
6097
}
6098
6099
void NetworkStatisticsLogger::LogPOST(size_t nUploadedBytes,
6100
                                      size_t nDownloadedBytes)
6101
{
6102
    if (!IsEnabled())
6103
        return;
6104
    std::lock_guard<std::mutex> oLock(gInstance.m_mutex);
6105
    for (auto counters : gInstance.GetCountersForContext())
6106
    {
6107
        counters->nPOST++;
6108
        counters->nPOSTUploadedBytes += nUploadedBytes;
6109
        counters->nPOSTDownloadedBytes += nDownloadedBytes;
6110
    }
6111
}
6112
6113
void NetworkStatisticsLogger::LogDELETE()
6114
{
6115
    if (!IsEnabled())
6116
        return;
6117
    std::lock_guard<std::mutex> oLock(gInstance.m_mutex);
6118
    for (auto counters : gInstance.GetCountersForContext())
6119
    {
6120
        counters->nDELETE++;
6121
    }
6122
}
6123
6124
void NetworkStatisticsLogger::Reset()
6125
{
6126
    std::lock_guard<std::mutex> oLock(gInstance.m_mutex);
6127
    gInstance.m_stats = Stats();
6128
    gnEnabled = -1;
6129
}
6130
6131
void NetworkStatisticsLogger::Stats::AsJSON(CPLJSONObject &oJSON) const
6132
{
6133
    CPLJSONObject oMethods;
6134
    if (counters.nHEAD)
6135
        oMethods.Add("HEAD/count", counters.nHEAD);
6136
    if (counters.nGET)
6137
        oMethods.Add("GET/count", counters.nGET);
6138
    if (counters.nGETDownloadedBytes)
6139
        oMethods.Add("GET/downloaded_bytes", counters.nGETDownloadedBytes);
6140
    if (counters.nPUT)
6141
        oMethods.Add("PUT/count", counters.nPUT);
6142
    if (counters.nPUTUploadedBytes)
6143
        oMethods.Add("PUT/uploaded_bytes", counters.nPUTUploadedBytes);
6144
    if (counters.nPOST)
6145
        oMethods.Add("POST/count", counters.nPOST);
6146
    if (counters.nPOSTUploadedBytes)
6147
        oMethods.Add("POST/uploaded_bytes", counters.nPOSTUploadedBytes);
6148
    if (counters.nPOSTDownloadedBytes)
6149
        oMethods.Add("POST/downloaded_bytes", counters.nPOSTDownloadedBytes);
6150
    if (counters.nDELETE)
6151
        oMethods.Add("DELETE/count", counters.nDELETE);
6152
    oJSON.Add("methods", oMethods);
6153
    CPLJSONObject oFiles;
6154
    bool bFilesAdded = false;
6155
    for (const auto &kv : children)
6156
    {
6157
        CPLJSONObject childJSON;
6158
        kv.second.AsJSON(childJSON);
6159
        if (kv.first.eType == ContextPathType::FILESYSTEM)
6160
        {
6161
            std::string osName(kv.first.osName);
6162
            if (!osName.empty() && osName[0] == '/')
6163
                osName = osName.substr(1);
6164
            if (!osName.empty() && osName.back() == '/')
6165
                osName.pop_back();
6166
            oJSON.Add(("handlers/" + osName).c_str(), childJSON);
6167
        }
6168
        else if (kv.first.eType == ContextPathType::FILE)
6169
        {
6170
            if (!bFilesAdded)
6171
            {
6172
                bFilesAdded = true;
6173
                oJSON.Add("files", oFiles);
6174
            }
6175
            oFiles.AddNoSplitName(kv.first.osName.c_str(), childJSON);
6176
        }
6177
        else if (kv.first.eType == ContextPathType::ACTION)
6178
        {
6179
            oJSON.Add(("actions/" + kv.first.osName).c_str(), childJSON);
6180
        }
6181
    }
6182
}
6183
6184
std::string NetworkStatisticsLogger::GetReportAsSerializedJSON()
6185
{
6186
    std::lock_guard<std::mutex> oLock(gInstance.m_mutex);
6187
6188
    CPLJSONObject oJSON;
6189
    gInstance.m_stats.AsJSON(oJSON);
6190
    return oJSON.Format(CPLJSONObject::PrettyFormat::Pretty);
6191
}
6192
6193
} /* end of namespace cpl */
6194
6195
/************************************************************************/
6196
/*                    VSICurlParseUnixPermissions()                     */
6197
/************************************************************************/
6198
6199
int VSICurlParseUnixPermissions(const char *pszPermissions)
6200
{
6201
    if (strlen(pszPermissions) != 9)
6202
        return 0;
6203
    int nMode = 0;
6204
    if (pszPermissions[0] == 'r')
6205
        nMode |= S_IRUSR;
6206
    if (pszPermissions[1] == 'w')
6207
        nMode |= S_IWUSR;
6208
    if (pszPermissions[2] == 'x')
6209
        nMode |= S_IXUSR;
6210
    if (pszPermissions[3] == 'r')
6211
        nMode |= S_IRGRP;
6212
    if (pszPermissions[4] == 'w')
6213
        nMode |= S_IWGRP;
6214
    if (pszPermissions[5] == 'x')
6215
        nMode |= S_IXGRP;
6216
    if (pszPermissions[6] == 'r')
6217
        nMode |= S_IROTH;
6218
    if (pszPermissions[7] == 'w')
6219
        nMode |= S_IWOTH;
6220
    if (pszPermissions[8] == 'x')
6221
        nMode |= S_IXOTH;
6222
    return nMode;
6223
}
6224
6225
/************************************************************************/
6226
/*                      Cache of file properties.                       */
6227
/************************************************************************/
6228
6229
static std::mutex oCacheFilePropMutex;
6230
static lru11::Cache<std::string, cpl::FileProp> *poCacheFileProp = nullptr;
6231
6232
/************************************************************************/
6233
/*                      VSICURLGetCachedFileProp()                      */
6234
/************************************************************************/
6235
6236
bool VSICURLGetCachedFileProp(const char *pszURL, cpl::FileProp &oFileProp)
6237
{
6238
    std::lock_guard<std::mutex> oLock(oCacheFilePropMutex);
6239
    return poCacheFileProp != nullptr &&
6240
           poCacheFileProp->tryGet(std::string(pszURL), oFileProp) &&
6241
           // Let a chance to use new auth parameters
6242
           !(oFileProp.eExists == cpl::EXIST_NO &&
6243
             gnGenerationAuthParameters != oFileProp.nGenerationAuthParameters);
6244
}
6245
6246
/************************************************************************/
6247
/*                      VSICURLSetCachedFileProp()                      */
6248
/************************************************************************/
6249
6250
void VSICURLSetCachedFileProp(const char *pszURL, cpl::FileProp &oFileProp)
6251
{
6252
    std::lock_guard<std::mutex> oLock(oCacheFilePropMutex);
6253
    if (poCacheFileProp == nullptr)
6254
        poCacheFileProp =
6255
            new lru11::Cache<std::string, cpl::FileProp>(100 * 1024);
6256
    oFileProp.nGenerationAuthParameters = gnGenerationAuthParameters;
6257
    poCacheFileProp->insert(std::string(pszURL), oFileProp);
6258
}
6259
6260
/************************************************************************/
6261
/*                  VSICURLInvalidateCachedFileProp()                   */
6262
/************************************************************************/
6263
6264
void VSICURLInvalidateCachedFileProp(const char *pszURL)
6265
{
6266
    std::lock_guard<std::mutex> oLock(oCacheFilePropMutex);
6267
    if (poCacheFileProp != nullptr)
6268
        poCacheFileProp->remove(std::string(pszURL));
6269
}
6270
6271
/************************************************************************/
6272
/*               VSICURLInvalidateCachedFilePropPrefix()                */
6273
/************************************************************************/
6274
6275
void VSICURLInvalidateCachedFilePropPrefix(const char *pszURL)
6276
{
6277
    std::lock_guard<std::mutex> oLock(oCacheFilePropMutex);
6278
    if (poCacheFileProp != nullptr)
6279
    {
6280
        std::list<std::string> keysToRemove;
6281
        const size_t nURLSize = strlen(pszURL);
6282
        auto lambda =
6283
            [&keysToRemove, &pszURL, nURLSize](
6284
                const lru11::KeyValuePair<std::string, cpl::FileProp> &kv)
6285
        {
6286
            if (strncmp(kv.key.c_str(), pszURL, nURLSize) == 0)
6287
                keysToRemove.push_back(kv.key);
6288
        };
6289
        poCacheFileProp->cwalk(lambda);
6290
        for (const auto &key : keysToRemove)
6291
            poCacheFileProp->remove(key);
6292
    }
6293
}
6294
6295
/************************************************************************/
6296
/*                    VSICURLDestroyCacheFileProp()                     */
6297
/************************************************************************/
6298
6299
void VSICURLDestroyCacheFileProp()
6300
{
6301
    std::lock_guard<std::mutex> oLock(oCacheFilePropMutex);
6302
    delete poCacheFileProp;
6303
    poCacheFileProp = nullptr;
6304
}
6305
6306
/************************************************************************/
6307
/*                        VSICURLMultiCleanup()                         */
6308
/************************************************************************/
6309
6310
void VSICURLMultiCleanup(CURLM *hCurlMultiHandle)
6311
{
6312
    void *old_handler = CPLHTTPIgnoreSigPipe();
6313
    curl_multi_cleanup(hCurlMultiHandle);
6314
    CPLHTTPRestoreSigPipeHandler(old_handler);
6315
}
6316
6317
/************************************************************************/
6318
/*                       VSICurlInstallReadCbk()                        */
6319
/************************************************************************/
6320
6321
int VSICurlInstallReadCbk(VSILFILE *fp, VSICurlReadCbkFunc pfnReadCbk,
6322
                          void *pfnUserData, int bStopOnInterruptUntilUninstall)
6323
{
6324
    return reinterpret_cast<cpl::VSICurlHandle *>(fp)->InstallReadCbk(
6325
        pfnReadCbk, pfnUserData, bStopOnInterruptUntilUninstall);
6326
}
6327
6328
/************************************************************************/
6329
/*                      VSICurlUninstallReadCbk()                       */
6330
/************************************************************************/
6331
6332
int VSICurlUninstallReadCbk(VSILFILE *fp)
6333
{
6334
    return reinterpret_cast<cpl::VSICurlHandle *>(fp)->UninstallReadCbk();
6335
}
6336
6337
/************************************************************************/
6338
/*                         VSICurlSetOptions()                          */
6339
/************************************************************************/
6340
6341
struct curl_slist *VSICurlSetOptions(CURL *hCurlHandle, const char *pszURL,
6342
                                     const char *const *papszOptions)
6343
{
6344
    struct curl_slist *headers = static_cast<struct curl_slist *>(
6345
        CPLHTTPSetOptions(hCurlHandle, pszURL, papszOptions));
6346
6347
    long option = CURLFTPMETHOD_SINGLECWD;
6348
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_FTP_FILEMETHOD, option);
6349
6350
    // ftp://ftp2.cits.rncan.gc.ca/pub/cantopo/250k_tif/
6351
    // doesn't like EPSV command,
6352
    unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_FTP_USE_EPSV, 0);
6353
6354
    return headers;
6355
}
6356
6357
/************************************************************************/
6358
/*                    VSICurlSetContentTypeFromExt()                    */
6359
/************************************************************************/
6360
6361
struct curl_slist *VSICurlSetContentTypeFromExt(struct curl_slist *poList,
6362
                                                const char *pszPath)
6363
{
6364
    struct curl_slist *iter = poList;
6365
    while (iter != nullptr)
6366
    {
6367
        if (STARTS_WITH_CI(iter->data, "Content-Type"))
6368
        {
6369
            return poList;
6370
        }
6371
        iter = iter->next;
6372
    }
6373
6374
    static const struct
6375
    {
6376
        const char *ext;
6377
        const char *mime;
6378
    } aosExtMimePairs[] = {
6379
        {"txt", "text/plain"}, {"json", "application/json"},
6380
        {"tif", "image/tiff"}, {"tiff", "image/tiff"},
6381
        {"jpg", "image/jpeg"}, {"jpeg", "image/jpeg"},
6382
        {"jp2", "image/jp2"},  {"jpx", "image/jp2"},
6383
        {"j2k", "image/jp2"},  {"jpc", "image/jp2"},
6384
        {"png", "image/png"},
6385
    };
6386
6387
    const std::string osExt = CPLGetExtensionSafe(pszPath);
6388
    if (!osExt.empty())
6389
    {
6390
        for (const auto &pair : aosExtMimePairs)
6391
        {
6392
            if (EQUAL(osExt.c_str(), pair.ext))
6393
            {
6394
6395
                const std::string osContentType(
6396
                    CPLSPrintf("Content-Type: %s", pair.mime));
6397
                poList = curl_slist_append(poList, osContentType.c_str());
6398
#ifdef DEBUG_VERBOSE
6399
                CPLDebug("HTTP", "Setting %s, based on lookup table.",
6400
                         osContentType.c_str());
6401
#endif
6402
                break;
6403
            }
6404
        }
6405
    }
6406
6407
    return poList;
6408
}
6409
6410
/************************************************************************/
6411
/*                VSICurlSetCreationHeadersFromOptions()                */
6412
/************************************************************************/
6413
6414
struct curl_slist *VSICurlSetCreationHeadersFromOptions(
6415
    struct curl_slist *headers, CSLConstList papszOptions, const char *pszPath)
6416
{
6417
    bool bContentTypeFound = false;
6418
    for (CSLConstList papszIter = papszOptions; papszIter && *papszIter;
6419
         ++papszIter)
6420
    {
6421
        char *pszKey = nullptr;
6422
        const char *pszValue = CPLParseNameValue(*papszIter, &pszKey);
6423
        if (pszKey && pszValue)
6424
        {
6425
            if (EQUAL(pszKey, "Content-Type"))
6426
            {
6427
                bContentTypeFound = true;
6428
            }
6429
            headers = curl_slist_append(headers,
6430
                                        CPLSPrintf("%s: %s", pszKey, pszValue));
6431
        }
6432
        CPLFree(pszKey);
6433
    }
6434
6435
    // If Content-type not found in papszOptions, try to set it from the
6436
    // filename exstension.
6437
    if (!bContentTypeFound)
6438
    {
6439
        headers = VSICurlSetContentTypeFromExt(headers, pszPath);
6440
    }
6441
6442
    return headers;
6443
}
6444
6445
#endif  // DOXYGEN_SKIP
6446
//! @endcond
6447
6448
/************************************************************************/
6449
/*                     VSIInstallCurlFileHandler()                      */
6450
/************************************************************************/
6451
6452
/*!
6453
 \brief Install /vsicurl/ HTTP/FTP file system handler (requires libcurl)
6454
6455
 \verbatim embed:rst
6456
 See :ref:`/vsicurl/ documentation <vsicurl>`
6457
 \endverbatim
6458
6459
 */
6460
void VSIInstallCurlFileHandler(void)
6461
{
6462
    auto poHandler = std::make_shared<cpl::VSICurlFilesystemHandler>();
6463
    VSIFileManager::InstallHandler("/vsicurl/", poHandler);
6464
    VSIFileManager::InstallHandler("/vsicurl?", poHandler);
6465
}
6466
6467
/************************************************************************/
6468
/*                         VSICurlClearCache()                          */
6469
/************************************************************************/
6470
6471
/**
6472
 * \brief Clean local cache associated with /vsicurl/ (and related file systems)
6473
 *
6474
 * /vsicurl (and related file systems like /vsis3/, /vsigs/, /vsiaz/, /vsioss/,
6475
 * /vsiswift/) cache a number of
6476
 * metadata and data for faster execution in read-only scenarios. But when the
6477
 * content on the server-side may change during the same process, those
6478
 * mechanisms can prevent opening new files, or give an outdated version of
6479
 * them.
6480
 *
6481
 */
6482
6483
void VSICurlClearCache(void)
6484
{
6485
    // FIXME ? Currently we have different filesystem instances for
6486
    // vsicurl/, /vsis3/, /vsigs/ . So each one has its own cache of regions.
6487
    // File properties cache are now shared
6488
    char **papszPrefix = VSIFileManager::GetPrefixes();
6489
    for (size_t i = 0; papszPrefix && papszPrefix[i]; ++i)
6490
    {
6491
        auto poFSHandler = dynamic_cast<cpl::VSICurlFilesystemHandlerBase *>(
6492
            VSIFileManager::GetHandler(papszPrefix[i]));
6493
6494
        if (poFSHandler)
6495
            poFSHandler->ClearCache();
6496
    }
6497
    CSLDestroy(papszPrefix);
6498
6499
    VSICurlStreamingClearCache();
6500
}
6501
6502
/************************************************************************/
6503
/*                      VSICurlPartialClearCache()                      */
6504
/************************************************************************/
6505
6506
/**
6507
 * \brief Clean local cache associated with /vsicurl/ (and related file systems)
6508
 * for a given filename (and its subfiles and subdirectories if it is a
6509
 * directory)
6510
 *
6511
 * /vsicurl (and related file systems like /vsis3/, /vsigs/, /vsiaz/, /vsioss/,
6512
 * /vsiswift/) cache a number of
6513
 * metadata and data for faster execution in read-only scenarios. But when the
6514
 * content on the server-side may change during the same process, those
6515
 * mechanisms can prevent opening new files, or give an outdated version of
6516
 * them.
6517
 *
6518
 * The filename prefix must start with the name of a known virtual file system
6519
 * (such as "/vsicurl/", "/vsis3/")
6520
 *
6521
 * VSICurlPartialClearCache("/vsis3/b") will clear all cached state for any file
6522
 * or directory starting with that prefix, so potentially "/vsis3/bucket",
6523
 * "/vsis3/basket/" or "/vsis3/basket/object".
6524
 *
6525
 * @param pszFilenamePrefix Filename prefix
6526
 */
6527
6528
void VSICurlPartialClearCache(const char *pszFilenamePrefix)
6529
{
6530
    auto poFSHandler = dynamic_cast<cpl::VSICurlFilesystemHandlerBase *>(
6531
        VSIFileManager::GetHandler(pszFilenamePrefix));
6532
6533
    if (poFSHandler)
6534
        poFSHandler->PartialClearCache(pszFilenamePrefix);
6535
}
6536
6537
/************************************************************************/
6538
/*                        VSINetworkStatsReset()                        */
6539
/************************************************************************/
6540
6541
/**
6542
 * \brief Clear network related statistics.
6543
 *
6544
 * The effect of the CPL_VSIL_NETWORK_STATS_ENABLED configuration option
6545
 * will also be reset. That is, that the next network access will check its
6546
 * value again.
6547
 *
6548
 * @since GDAL 3.2.0
6549
 */
6550
6551
void VSINetworkStatsReset(void)
6552
{
6553
    cpl::NetworkStatisticsLogger::Reset();
6554
}
6555
6556
/************************************************************************/
6557
/*                 VSINetworkStatsGetAsSerializedJSON()                 */
6558
/************************************************************************/
6559
6560
/**
6561
 * \brief Return network related statistics, as a JSON serialized object.
6562
 *
6563
 * Statistics collecting should be enabled with the
6564
 CPL_VSIL_NETWORK_STATS_ENABLED
6565
 * configuration option set to YES before any network activity starts
6566
 * (for efficiency, reading it is cached on first access, until
6567
 VSINetworkStatsReset() is called)
6568
 *
6569
 * Statistics can also be emitted on standard output at process termination if
6570
 * the CPL_VSIL_SHOW_NETWORK_STATS configuration option is set to YES.
6571
 *
6572
 * Example of output:
6573
 * \code{.js}
6574
 * {
6575
 *   "methods":{
6576
 *     "GET":{
6577
 *       "count":6,
6578
 *       "downloaded_bytes":40825
6579
 *     },
6580
 *     "PUT":{
6581
 *       "count":1,
6582
 *       "uploaded_bytes":35472
6583
 *     }
6584
 *   },
6585
 *   "handlers":{
6586
 *     "vsigs":{
6587
 *       "methods":{
6588
 *         "GET":{
6589
 *           "count":2,
6590
 *           "downloaded_bytes":446
6591
 *         },
6592
 *         "PUT":{
6593
 *           "count":1,
6594
 *           "uploaded_bytes":35472
6595
 *         }
6596
 *       },
6597
 *       "files":{
6598
 *         "\/vsigs\/spatialys\/byte.tif":{
6599
 *           "methods":{
6600
 *             "PUT":{
6601
 *               "count":1,
6602
 *               "uploaded_bytes":35472
6603
 *             }
6604
 *           },
6605
 *           "actions":{
6606
 *             "Write":{
6607
 *               "methods":{
6608
 *                 "PUT":{
6609
 *                   "count":1,
6610
 *                   "uploaded_bytes":35472
6611
 *                 }
6612
 *               }
6613
 *             }
6614
 *           }
6615
 *         }
6616
 *       },
6617
 *       "actions":{
6618
 *         "Stat":{
6619
 *           "methods":{
6620
 *             "GET":{
6621
 *               "count":2,
6622
 *               "downloaded_bytes":446
6623
 *             }
6624
 *           },
6625
 *           "files":{
6626
 *             "\/vsigs\/spatialys\/byte.tif\/":{
6627
 *               "methods":{
6628
 *                 "GET":{
6629
 *                   "count":1,
6630
 *                   "downloaded_bytes":181
6631
 *                 }
6632
 *               }
6633
 *             }
6634
 *           }
6635
 *         }
6636
 *       }
6637
 *     },
6638
 *     "vsis3":{
6639
 *          [...]
6640
 *     }
6641
 *   }
6642
 * }
6643
 * \endcode
6644
 *
6645
 * @param papszOptions Unused.
6646
 * @return a JSON serialized string to free with VSIFree(), or nullptr
6647
 * @since GDAL 3.2.0
6648
 */
6649
6650
char *VSINetworkStatsGetAsSerializedJSON(CPL_UNUSED char **papszOptions)
6651
{
6652
    return CPLStrdup(
6653
        cpl::NetworkStatisticsLogger::GetReportAsSerializedJSON().c_str());
6654
}
6655
6656
#endif /* HAVE_CURL */
6657
6658
#undef ENABLE_DEBUG