Coverage Report

Created: 2025-06-13 06:29

/src/gdal/gcore/gdalopeninfo.cpp
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 *
3
 * Project:  GDAL Core
4
 * Purpose:  Implementation of GDALOpenInfo class.
5
 * Author:   Frank Warmerdam, warmerdam@pobox.com
6
 *
7
 **********************************************************************
8
 * Copyright (c) 2002, Frank Warmerdam
9
 * Copyright (c) 2008-2012, Even Rouault <even dot rouault at spatialys.com>
10
 *
11
 * SPDX-License-Identifier: MIT
12
 ****************************************************************************/
13
14
#include "gdal_priv.h"  // Must be included first for mingw VSIStatBufL.
15
#include "cpl_port.h"
16
17
#include <cstdlib>
18
#include <cstring>
19
#ifdef HAVE_UNISTD_H
20
#include <unistd.h>
21
#endif
22
23
#include <algorithm>
24
#include <map>
25
#include <mutex>
26
#include <vector>
27
28
#include "cpl_config.h"
29
#include "cpl_conv.h"
30
#include "cpl_error.h"
31
#include "cpl_string.h"
32
#include "cpl_vsi.h"
33
#include "gdal.h"
34
35
// Keep in sync prototype of those 2 functions between gdalopeninfo.cpp,
36
// ogrsqlitedatasource.cpp and ogrgeopackagedatasource.cpp
37
void GDALOpenInfoDeclareFileNotToOpen(const char *pszFilename,
38
                                      const GByte *pabyHeader,
39
                                      int nHeaderBytes);
40
void GDALOpenInfoUnDeclareFileNotToOpen(const char *pszFilename);
41
42
/************************************************************************/
43
44
/* This whole section helps for SQLite/GPKG, especially with write-ahead
45
 * log enabled. The issue is that sqlite3 relies on POSIX advisory locks to
46
 * properly work and decide when to create/delete the wal related files.
47
 * One issue with POSIX advisory locks is that if within the same process
48
 * you do
49
 * f1 = open('somefile')
50
 * set locks on f1
51
 * f2 = open('somefile')
52
 * close(f2)
53
 * The close(f2) will cancel the locks set on f1. The work on f1 is done by
54
 * libsqlite3 whereas the work on f2 is done by GDALOpenInfo.
55
 * So as soon as sqlite3 has opened a file we should make sure not to re-open
56
 * it (actually close it) ourselves.
57
 */
58
59
namespace
60
{
61
struct FileNotToOpen
62
{
63
    CPLString osFilename{};
64
    int nRefCount{};
65
    GByte *pabyHeader{nullptr};
66
    int nHeaderBytes{0};
67
};
68
}  // namespace
69
70
static std::mutex sFNTOMutex;
71
static std::map<CPLString, FileNotToOpen> *pMapFNTO = nullptr;
72
73
void GDALOpenInfoDeclareFileNotToOpen(const char *pszFilename,
74
                                      const GByte *pabyHeader, int nHeaderBytes)
75
0
{
76
0
    std::lock_guard<std::mutex> oLock(sFNTOMutex);
77
0
    if (pMapFNTO == nullptr)
78
0
        pMapFNTO = new std::map<CPLString, FileNotToOpen>();
79
0
    auto oIter = pMapFNTO->find(pszFilename);
80
0
    if (oIter != pMapFNTO->end())
81
0
    {
82
0
        oIter->second.nRefCount++;
83
0
    }
84
0
    else
85
0
    {
86
0
        FileNotToOpen fnto;
87
0
        fnto.osFilename = pszFilename;
88
0
        fnto.nRefCount = 1;
89
0
        fnto.pabyHeader = static_cast<GByte *>(CPLMalloc(nHeaderBytes + 1));
90
0
        memcpy(fnto.pabyHeader, pabyHeader, nHeaderBytes);
91
0
        fnto.pabyHeader[nHeaderBytes] = 0;
92
0
        fnto.nHeaderBytes = nHeaderBytes;
93
0
        (*pMapFNTO)[pszFilename] = std::move(fnto);
94
0
    }
95
0
}
96
97
void GDALOpenInfoUnDeclareFileNotToOpen(const char *pszFilename)
98
0
{
99
0
    std::lock_guard<std::mutex> oLock(sFNTOMutex);
100
0
    CPLAssert(pMapFNTO);
101
0
    auto oIter = pMapFNTO->find(pszFilename);
102
0
    CPLAssert(oIter != pMapFNTO->end());
103
0
    oIter->second.nRefCount--;
104
0
    if (oIter->second.nRefCount == 0)
105
0
    {
106
0
        CPLFree(oIter->second.pabyHeader);
107
0
        pMapFNTO->erase(oIter);
108
0
    }
109
0
    if (pMapFNTO->empty())
110
0
    {
111
0
        delete pMapFNTO;
112
0
        pMapFNTO = nullptr;
113
0
    }
114
0
}
115
116
static GByte *GDALOpenInfoGetFileNotToOpen(const char *pszFilename,
117
                                           int *pnHeaderBytes)
118
0
{
119
0
    std::lock_guard<std::mutex> oLock(sFNTOMutex);
120
0
    *pnHeaderBytes = 0;
121
0
    if (pMapFNTO == nullptr)
122
0
    {
123
0
        return nullptr;
124
0
    }
125
0
    auto oIter = pMapFNTO->find(pszFilename);
126
0
    if (oIter == pMapFNTO->end())
127
0
    {
128
0
        return nullptr;
129
0
    }
130
0
    *pnHeaderBytes = oIter->second.nHeaderBytes;
131
0
    GByte *pabyHeader = static_cast<GByte *>(CPLMalloc(*pnHeaderBytes + 1));
132
0
    memcpy(pabyHeader, oIter->second.pabyHeader, *pnHeaderBytes);
133
0
    pabyHeader[*pnHeaderBytes] = 0;
134
0
    return pabyHeader;
135
0
}
136
137
/************************************************************************/
138
/* ==================================================================== */
139
/*                             GDALOpenInfo                             */
140
/* ==================================================================== */
141
/************************************************************************/
142
143
/************************************************************************/
144
/*                            GDALOpenInfo()                            */
145
/************************************************************************/
146
147
/** Constructor/
148
 * @param pszFilenameIn filename
149
 * @param nOpenFlagsIn open flags
150
 * @param papszSiblingsIn list of sibling files, or NULL.
151
 */
152
GDALOpenInfo::GDALOpenInfo(const char *pszFilenameIn, int nOpenFlagsIn,
153
                           const char *const *papszSiblingsIn)
154
0
    : pszFilename(CPLStrdup(pszFilenameIn)),
155
0
      osExtension(CPLGetExtensionSafe(pszFilenameIn)),
156
0
      eAccess(nOpenFlagsIn & GDAL_OF_UPDATE ? GA_Update : GA_ReadOnly),
157
0
      nOpenFlags(nOpenFlagsIn)
158
0
{
159
0
    if (STARTS_WITH(pszFilename, "MVT:/vsi"))
160
0
        return;
161
162
/* -------------------------------------------------------------------- */
163
/*      Ensure that C: is treated as C:\ so we can stat it on           */
164
/*      Windows.  Similar to what is done in CPLStat().                 */
165
/* -------------------------------------------------------------------- */
166
#ifdef _WIN32
167
    if (strlen(pszFilenameIn) == 2 && pszFilenameIn[1] == ':')
168
    {
169
        char szAltPath[10];
170
171
        strcpy(szAltPath, pszFilenameIn);
172
        strcat(szAltPath, "\\");
173
        CPLFree(pszFilename);
174
        pszFilename = CPLStrdup(szAltPath);
175
    }
176
#endif  // WIN32
177
178
    /* -------------------------------------------------------------------- */
179
    /*      Collect information about the file.                             */
180
    /* -------------------------------------------------------------------- */
181
182
0
#ifdef HAVE_READLINK
183
0
    bool bHasRetried = false;
184
185
0
retry:  // TODO(schwehr): Stop using goto.
186
187
0
#endif  // HAVE_READLINK
188
189
#if !(defined(_WIN32) || defined(__linux__) || defined(__ANDROID__) ||         \
190
      (defined(__MACH__) && defined(__APPLE__)))
191
    /* On BSDs, fread() on a directory returns non zero, so we have to */
192
    /* do a stat() before to check the nature of pszFilename. */
193
    bool bPotentialDirectory = (eAccess == GA_ReadOnly);
194
#else
195
0
    bool bPotentialDirectory = false;
196
0
#endif
197
198
    /* Check if the filename might be a directory of a special virtual file
199
     * system */
200
0
    if (STARTS_WITH(pszFilename, "/vsizip/") ||
201
0
        STARTS_WITH(pszFilename, "/vsitar/") ||
202
0
        STARTS_WITH(pszFilename, "/vsi7z/") ||
203
0
        STARTS_WITH(pszFilename, "/vsirar/"))
204
0
    {
205
0
        const char *pszExt = osExtension.c_str();
206
0
        if (EQUAL(pszExt, "zip") || EQUAL(pszExt, "tar") ||
207
0
            EQUAL(pszExt, "gz") || EQUAL(pszExt, "7z") ||
208
0
            EQUAL(pszExt, "rar") ||
209
0
            pszFilename[strlen(pszFilename) - 1] == '}'
210
0
#ifdef DEBUG
211
            // For AFL, so that .cur_input is detected as the archive filename.
212
0
            || EQUAL(CPLGetFilename(pszFilename), ".cur_input")
213
0
#endif  // DEBUG
214
0
        )
215
0
        {
216
0
            bPotentialDirectory = true;
217
0
        }
218
0
    }
219
0
    else if (STARTS_WITH(pszFilename, "/vsicurl/"))
220
0
    {
221
0
        bPotentialDirectory = true;
222
0
    }
223
224
0
    if (bPotentialDirectory)
225
0
    {
226
0
        int nStatFlags = VSI_STAT_EXISTS_FLAG | VSI_STAT_NATURE_FLAG;
227
0
        if (nOpenFlagsIn & GDAL_OF_VERBOSE_ERROR)
228
0
            nStatFlags |= VSI_STAT_SET_ERROR_FLAG;
229
230
        // For those special files, opening them with VSIFOpenL() might result
231
        // in content, even if they should be considered as directories, so
232
        // use stat.
233
0
        VSIStatBufL sStat;
234
235
0
        if (VSIStatExL(pszFilename, &sStat, nStatFlags) == 0)
236
0
        {
237
0
            bStatOK = TRUE;
238
0
            if (VSI_ISDIR(sStat.st_mode))
239
0
                bIsDirectory = TRUE;
240
0
        }
241
0
    }
242
243
0
    pabyHeader = GDALOpenInfoGetFileNotToOpen(pszFilename, &nHeaderBytes);
244
245
0
    if (!bIsDirectory && pabyHeader == nullptr)
246
0
    {
247
0
        fpL = VSIFOpenExL(pszFilename, (eAccess == GA_Update) ? "r+b" : "rb",
248
0
                          (nOpenFlagsIn & GDAL_OF_VERBOSE_ERROR) > 0);
249
0
    }
250
0
    if (pabyHeader)
251
0
    {
252
0
        bStatOK = TRUE;
253
0
        nHeaderBytesTried = nHeaderBytes;
254
0
    }
255
0
    else if (fpL != nullptr)
256
0
    {
257
0
        bStatOK = TRUE;
258
0
        int nBufSize =
259
0
            atoi(CPLGetConfigOption("GDAL_INGESTED_BYTES_AT_OPEN", "1024"));
260
0
        if (nBufSize < 1024)
261
0
            nBufSize = 1024;
262
0
        else if (nBufSize > 10 * 1024 * 1024)
263
0
            nBufSize = 10 * 1024 * 1024;
264
0
        pabyHeader = static_cast<GByte *>(CPLCalloc(nBufSize + 1, 1));
265
0
        nHeaderBytesTried = nBufSize;
266
0
        nHeaderBytes =
267
0
            static_cast<int>(VSIFReadL(pabyHeader, 1, nHeaderBytesTried, fpL));
268
0
        VSIRewindL(fpL);
269
270
        /* If we cannot read anything, check if it is not a directory instead */
271
0
        VSIStatBufL sStat;
272
0
        if (nHeaderBytes == 0 &&
273
0
            VSIStatExL(pszFilename, &sStat,
274
0
                       VSI_STAT_EXISTS_FLAG | VSI_STAT_NATURE_FLAG) == 0 &&
275
0
            VSI_ISDIR(sStat.st_mode))
276
0
        {
277
0
            CPL_IGNORE_RET_VAL(VSIFCloseL(fpL));
278
0
            fpL = nullptr;
279
0
            CPLFree(pabyHeader);
280
0
            pabyHeader = nullptr;
281
0
            bIsDirectory = TRUE;
282
0
        }
283
0
    }
284
0
    else if (!bStatOK)
285
0
    {
286
0
        VSIStatBufL sStat;
287
0
        if (!bPotentialDirectory &&
288
0
            VSIStatExL(pszFilename, &sStat,
289
0
                       VSI_STAT_EXISTS_FLAG | VSI_STAT_NATURE_FLAG) == 0)
290
0
        {
291
0
            bStatOK = TRUE;
292
0
            if (VSI_ISDIR(sStat.st_mode))
293
0
                bIsDirectory = TRUE;
294
0
        }
295
0
#ifdef HAVE_READLINK
296
0
        else if (!bHasRetried && !STARTS_WITH(pszFilename, "/vsi"))
297
0
        {
298
            // If someone creates a file with "ln -sf
299
            // /vsicurl/http://download.osgeo.org/gdal/data/gtiff/utm.tif
300
            // my_remote_utm.tif" we will be able to open it by passing
301
            // my_remote_utm.tif.  This helps a lot for GDAL based readers that
302
            // only provide file explorers to open datasets.
303
0
            const int nBufSize = 2048;
304
0
            std::vector<char> oFilename(nBufSize);
305
0
            char *szPointerFilename = &oFilename[0];
306
0
            int nBytes = static_cast<int>(
307
0
                readlink(pszFilename, szPointerFilename, nBufSize));
308
0
            if (nBytes != -1)
309
0
            {
310
0
                szPointerFilename[std::min(nBytes, nBufSize - 1)] = 0;
311
0
                CPLFree(pszFilename);
312
0
                pszFilename = CPLStrdup(szPointerFilename);
313
0
                osExtension = CPLGetExtensionSafe(pszFilename);
314
0
                papszSiblingsIn = nullptr;
315
0
                bHasRetried = true;
316
0
                goto retry;
317
0
            }
318
0
        }
319
0
#endif  // HAVE_READLINK
320
0
    }
321
322
    /* -------------------------------------------------------------------- */
323
    /*      Capture sibling list either from passed in values, or by        */
324
    /*      scanning for them only if requested through GetSiblingFiles().  */
325
    /* -------------------------------------------------------------------- */
326
0
    if (papszSiblingsIn != nullptr)
327
0
    {
328
0
        papszSiblingFiles = CSLDuplicate(papszSiblingsIn);
329
0
        bHasGotSiblingFiles = true;
330
0
    }
331
0
    else if (bStatOK && !bIsDirectory)
332
0
    {
333
0
        papszSiblingFiles = VSISiblingFiles(pszFilename);
334
0
        if (papszSiblingFiles != nullptr)
335
0
        {
336
0
            bHasGotSiblingFiles = true;
337
0
        }
338
0
        else
339
0
        {
340
0
            const char *pszOptionVal = VSIGetPathSpecificOption(
341
0
                pszFilename, "GDAL_DISABLE_READDIR_ON_OPEN", "NO");
342
0
            if (EQUAL(pszOptionVal, "EMPTY_DIR"))
343
0
            {
344
0
                papszSiblingFiles =
345
0
                    CSLAddString(nullptr, CPLGetFilename(pszFilename));
346
0
                bHasGotSiblingFiles = true;
347
0
            }
348
0
            else if (CPLTestBool(pszOptionVal))
349
0
            {
350
                /* skip reading the directory */
351
0
                papszSiblingFiles = nullptr;
352
0
                bHasGotSiblingFiles = true;
353
0
            }
354
0
            else
355
0
            {
356
                /* will be lazy loaded */
357
0
                papszSiblingFiles = nullptr;
358
0
                bHasGotSiblingFiles = false;
359
0
            }
360
0
        }
361
0
    }
362
0
    else
363
0
    {
364
0
        papszSiblingFiles = nullptr;
365
0
        bHasGotSiblingFiles = true;
366
0
    }
367
0
}
368
369
/************************************************************************/
370
/*                           ~GDALOpenInfo()                            */
371
/************************************************************************/
372
373
GDALOpenInfo::~GDALOpenInfo()
374
375
0
{
376
0
    VSIFree(pabyHeader);
377
0
    CPLFree(pszFilename);
378
379
0
    if (fpL != nullptr)
380
0
        CPL_IGNORE_RET_VAL(VSIFCloseL(fpL));
381
0
    CSLDestroy(papszSiblingFiles);
382
0
}
383
384
/************************************************************************/
385
/*                         GetSiblingFiles()                            */
386
/************************************************************************/
387
388
/** Return sibling files.
389
 *
390
 * If the list of sibling files has not already been established, it will be,
391
 * unless the GDAL_DISABLE_READDIR_ON_OPEN configuration option has been set to
392
 * YES or EMPTY_DIR when this instance was constructed.
393
 *
394
 * @return sibling files. Ownership belongs to "this".
395
 */
396
char **GDALOpenInfo::GetSiblingFiles()
397
0
{
398
0
    if (bHasGotSiblingFiles)
399
0
        return papszSiblingFiles;
400
0
    bHasGotSiblingFiles = true;
401
402
0
    papszSiblingFiles = VSISiblingFiles(pszFilename);
403
0
    if (papszSiblingFiles != nullptr)
404
0
    {
405
0
        return papszSiblingFiles;
406
0
    }
407
408
0
    const CPLString osDir = CPLGetDirnameSafe(pszFilename);
409
0
    const int nMaxFiles = atoi(VSIGetPathSpecificOption(
410
0
        pszFilename, "GDAL_READDIR_LIMIT_ON_OPEN", "1000"));
411
0
    papszSiblingFiles = VSIReadDirEx(osDir, nMaxFiles);
412
0
    if (nMaxFiles > 0 && CSLCount(papszSiblingFiles) > nMaxFiles)
413
0
    {
414
0
        CPLDebug("GDAL", "GDAL_READDIR_LIMIT_ON_OPEN reached on %s",
415
0
                 osDir.c_str());
416
0
        CSLDestroy(papszSiblingFiles);
417
0
        papszSiblingFiles = nullptr;
418
0
    }
419
420
0
    return papszSiblingFiles;
421
0
}
422
423
/************************************************************************/
424
/*                         StealSiblingFiles()                          */
425
/*                                                                      */
426
/*      Same as GetSiblingFiles() except that the list is stealed       */
427
/*      (ie ownership transferred to the caller) and the associated     */
428
/*      member variable is set to NULL.                                 */
429
/************************************************************************/
430
431
/** Return sibling files and steal reference
432
 * @return sibling files. Ownership below to the caller (must be freed with
433
 * CSLDestroy)
434
 */
435
char **GDALOpenInfo::StealSiblingFiles()
436
0
{
437
0
    char **papszRet = GetSiblingFiles();
438
0
    papszSiblingFiles = nullptr;
439
0
    return papszRet;
440
0
}
441
442
/************************************************************************/
443
/*                        AreSiblingFilesLoaded()                       */
444
/************************************************************************/
445
446
/** Return whether sibling files have been loaded.
447
 * @return true or false.
448
 */
449
bool GDALOpenInfo::AreSiblingFilesLoaded() const
450
0
{
451
0
    return bHasGotSiblingFiles;
452
0
}
453
454
/************************************************************************/
455
/*                           TryToIngest()                              */
456
/************************************************************************/
457
458
/** Ingest bytes from the file.
459
 * @param nBytes number of bytes to ingest.
460
 * @return TRUE if successful
461
 */
462
int GDALOpenInfo::TryToIngest(int nBytes)
463
0
{
464
0
    if (fpL == nullptr)
465
0
        return FALSE;
466
0
    if (nHeaderBytes < nHeaderBytesTried)
467
0
        return TRUE;
468
0
    pabyHeader = static_cast<GByte *>(CPLRealloc(pabyHeader, nBytes + 1));
469
0
    memset(pabyHeader, 0, nBytes + 1);
470
0
    VSIRewindL(fpL);
471
0
    nHeaderBytesTried = nBytes;
472
0
    nHeaderBytes = static_cast<int>(VSIFReadL(pabyHeader, 1, nBytes, fpL));
473
0
    VSIRewindL(fpL);
474
475
0
    return TRUE;
476
0
}
477
478
/************************************************************************/
479
/*                       IsSingleAllowedDriver()                        */
480
/************************************************************************/
481
482
/** Returns true if the driver name is the single in the list of allowed
483
 * drivers.
484
 *
485
 * @param pszDriverName Driver name to test.
486
 * @return true if the driver name is the single in the list of allowed
487
 * drivers.
488
 * @since GDAL 3.10
489
 */
490
bool GDALOpenInfo::IsSingleAllowedDriver(const char *pszDriverName) const
491
0
{
492
0
    return papszAllowedDrivers && papszAllowedDrivers[0] &&
493
0
           !papszAllowedDrivers[1] &&
494
0
           EQUAL(papszAllowedDrivers[0], pszDriverName);
495
0
}