Coverage Report

Created: 2026-02-14 06:52

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/gdal/apps/gdalalg_dataset_check.cpp
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Project:  GDAL
4
 * Purpose:  gdal "dataset check" subcommand
5
 * Author:   Even Rouault <even dot rouault at spatialys.com>
6
 *
7
 ******************************************************************************
8
 * Copyright (c) 2026, Even Rouault <even dot rouault at spatialys.com>
9
 *
10
 * SPDX-License-Identifier: MIT
11
 ****************************************************************************/
12
13
//! @cond Doxygen_Suppress
14
15
#include "gdalalg_dataset_check.h"
16
17
#include "cpl_progress.h"
18
#include "cpl_string.h"
19
#include "gdal_dataset.h"
20
#include "gdal_multidim.h"
21
#include "gdal_rasterband.h"
22
#include "ogrsf_frmts.h"
23
#include "ogr_recordbatch.h"
24
25
#include <algorithm>
26
#include <limits>
27
28
#ifndef _
29
0
#define _(x) (x)
30
#endif
31
32
/************************************************************************/
33
/*                     GDALDatasetCheckAlgorithm()                      */
34
/************************************************************************/
35
36
GDALDatasetCheckAlgorithm::GDALDatasetCheckAlgorithm()
37
0
    : GDALAlgorithm(NAME, DESCRIPTION, HELP_URL)
38
0
{
39
0
    AddProgressArg();
40
41
0
    AddInputDatasetArg(&m_input, GDAL_OF_RASTER | GDAL_OF_VECTOR |
42
0
                                     GDAL_OF_MULTIDIM_RASTER);
43
0
    AddOpenOptionsArg(&m_openOptions);
44
0
    AddInputFormatsArg(&m_inputFormats);
45
46
0
    AddArg("return-code", 0, _("Return code"), &m_retCode)
47
0
        .SetHiddenForCLI()
48
0
        .SetIsInput(false)
49
0
        .SetIsOutput(true);
50
0
}
51
52
/************************************************************************/
53
/*                 GDALDatasetCheckAlgorithm::RunImpl()                 */
54
/************************************************************************/
55
56
bool GDALDatasetCheckAlgorithm::RunImpl(GDALProgressFunc pfnProgress,
57
                                        void *pProgressData)
58
0
{
59
0
    auto poDS = m_input.GetDatasetRef();
60
0
    CPLAssert(poDS);
61
62
0
    const CPLStringList aosOpenOptions(m_openOptions);
63
0
    const CPLStringList aosAllowedDrivers(m_inputFormats);
64
65
0
    const CPLStringList aosSubdatasets(
66
0
        CSLDuplicate(poDS->GetMetadata("SUBDATASETS")));
67
0
    const int nSubdatasets = aosSubdatasets.size() / 2;
68
69
0
    bool bRet = true;
70
0
    if (nSubdatasets)
71
0
    {
72
0
        int i = 0;
73
0
        for (auto [pszKey, pszValue] : cpl::IterateNameValue(aosSubdatasets))
74
0
        {
75
0
            if (cpl::ends_with(std::string_view(pszKey), "_NAME"))
76
0
            {
77
0
                auto poSubDS = std::unique_ptr<GDALDataset>(
78
0
                    GDALDataset::Open(pszValue, 0, aosAllowedDrivers.List(),
79
0
                                      aosOpenOptions.List()));
80
0
                if (poSubDS)
81
0
                {
82
0
                    std::unique_ptr<void, decltype(&GDALDestroyScaledProgress)>
83
0
                        pScaled(GDALCreateScaledProgress(
84
0
                                    static_cast<double>(i) / nSubdatasets,
85
0
                                    static_cast<double>(i + 1) / nSubdatasets,
86
0
                                    pfnProgress, pProgressData),
87
0
                                GDALDestroyScaledProgress);
88
0
                    ++i;
89
0
                    bRet = CheckDataset(poSubDS.get(), false,
90
0
                                        GDALScaledProgress, pScaled.get());
91
0
                    if (!bRet)
92
0
                        break;
93
0
                }
94
0
                else
95
0
                {
96
0
                    m_retCode = 1;
97
0
                }
98
0
            }
99
0
        }
100
0
    }
101
0
    else
102
0
    {
103
0
        bRet = CheckDataset(poDS, /* bRasterOnly=*/false, pfnProgress,
104
0
                            pProgressData);
105
0
    }
106
107
0
    return bRet;
108
0
}
109
110
/************************************************************************/
111
/*                         GetGroupPixelCount()                         */
112
/************************************************************************/
113
114
static GIntBig GetGroupPixelCount(const GDALGroup *poGroup)
115
0
{
116
0
    GIntBig nPixelCount = 0;
117
0
    for (const std::string &osArrayName : poGroup->GetMDArrayNames())
118
0
    {
119
0
        auto poArray = poGroup->OpenMDArray(osArrayName);
120
0
        if (poArray)
121
0
        {
122
0
            GIntBig nPixels = 1;
123
0
            for (auto &poDim : poArray->GetDimensions())
124
0
                nPixels *= poDim->GetSize();
125
0
            nPixelCount += nPixels;
126
0
        }
127
0
    }
128
0
    for (const std::string &osGroupName : poGroup->GetGroupNames())
129
0
    {
130
0
        auto poSubGroup = poGroup->OpenGroup(osGroupName);
131
0
        if (poSubGroup)
132
0
            nPixelCount += GetGroupPixelCount(poSubGroup.get());
133
0
    }
134
0
    return nPixelCount;
135
0
}
136
137
/************************************************************************/
138
/*                            ProgressStruct                            */
139
/************************************************************************/
140
141
namespace
142
{
143
struct ProgressStruct
144
{
145
    GIntBig nTotalContent = 0;
146
    GDALProgressFunc pfnProgress = nullptr;
147
    void *pProgressData = nullptr;
148
149
    // In-out variable
150
    GIntBig nProgress = 0;
151
152
    // Work variable
153
    std::vector<GByte> *pabyData = nullptr;
154
155
    // Output variables
156
    bool bError = false;
157
    bool bInterrupted = false;
158
};
159
}  // namespace
160
161
/************************************************************************/
162
/*                         MDArrayProcessFunc()                         */
163
/************************************************************************/
164
165
/** Read a chunk of a multidimensional array */
166
static bool MDArrayProcessFunc(GDALAbstractMDArray *array,
167
                               const GUInt64 *startIdx,
168
                               const size_t *chunkCount,
169
                               GUInt64 /* iCurChunk */,
170
                               GUInt64 /* nChunkCount */, void *pUserData)
171
0
{
172
0
    ProgressStruct *psProgress = static_cast<ProgressStruct *>(pUserData);
173
0
    size_t nPixels = 1;
174
0
    const auto nDimCount = array->GetDimensionCount();
175
0
    for (size_t i = 0; i < nDimCount; ++i)
176
0
        nPixels *= chunkCount[i];
177
0
    auto &dt = array->GetDataType();
178
0
    const size_t nDTSize = dt.GetSize();
179
0
    const size_t nReqSize = nPixels * nDTSize;
180
0
    if (psProgress->pabyData->size() < nReqSize)
181
0
    {
182
0
        try
183
0
        {
184
0
            psProgress->pabyData->resize(nReqSize);
185
0
        }
186
0
        catch (const std::exception &)
187
0
        {
188
0
            CPLError(CE_Failure, CPLE_AppDefined,
189
0
                     "Out of memory while allocating memory chunk");
190
0
            psProgress->bError = true;
191
0
            return false;
192
0
        }
193
0
    }
194
0
    if (!array->Read(startIdx, chunkCount, /* arrayStep = */ nullptr,
195
0
                     /* bufferStride = */ nullptr, dt,
196
0
                     psProgress->pabyData->data()))
197
0
    {
198
0
        psProgress->bError = true;
199
0
        return false;
200
0
    }
201
0
    if (dt.NeedsFreeDynamicMemory())
202
0
    {
203
0
        for (size_t i = 0; i < nPixels; ++i)
204
0
        {
205
0
            dt.FreeDynamicMemory(psProgress->pabyData->data() + i * nDTSize);
206
0
        }
207
0
    }
208
0
    psProgress->nProgress += nPixels;
209
0
    if (psProgress->pfnProgress &&
210
0
        !psProgress->pfnProgress(
211
0
            static_cast<double>(psProgress->nProgress) /
212
0
                static_cast<double>(psProgress->nTotalContent),
213
0
            "", psProgress->pProgressData))
214
0
    {
215
0
        psProgress->bInterrupted = true;
216
0
        return false;
217
0
    }
218
0
    return true;
219
0
}
220
221
/************************************************************************/
222
/*               GDALDatasetCheckAlgorithm::CheckGroup()                */
223
/************************************************************************/
224
225
bool GDALDatasetCheckAlgorithm::CheckGroup(GDALGroup *poGroup,
226
                                           GIntBig &nProgress,
227
                                           GIntBig nTotalContent,
228
                                           GDALProgressFunc pfnProgress,
229
                                           void *pProgressData)
230
0
{
231
0
    CPLDebug("GDALDatasetCheckAlgorithm", "Checking group %s",
232
0
             poGroup->GetFullName().c_str());
233
0
    for (const std::string &osArrayName : poGroup->GetMDArrayNames())
234
0
    {
235
0
        auto poArray = poGroup->OpenMDArray(osArrayName);
236
0
        if (poArray)
237
0
        {
238
0
            CPLDebug("GDALDatasetCheckAlgorithm", "Checking array %s",
239
0
                     poArray->GetFullName().c_str());
240
0
            std::vector<GUInt64> anStartIdx(poArray->GetDimensionCount());
241
0
            std::vector<GUInt64> anCount;
242
0
            for (auto &poDim : poArray->GetDimensions())
243
0
                anCount.push_back(poDim->GetSize());
244
0
            constexpr size_t BUFFER_SIZE = 10 * 1024 * 1024;
245
246
0
            std::vector<GByte> abyData;
247
248
0
            ProgressStruct sProgress;
249
0
            sProgress.pabyData = &abyData;
250
0
            sProgress.nProgress = nProgress;
251
0
            sProgress.nTotalContent = nTotalContent;
252
0
            sProgress.pfnProgress = pfnProgress;
253
0
            sProgress.pProgressData = pProgressData;
254
0
            if (!poArray->ProcessPerChunk(
255
0
                    anStartIdx.data(), anCount.data(),
256
0
                    poArray->GetProcessingChunkSize(BUFFER_SIZE).data(),
257
0
                    MDArrayProcessFunc, &sProgress) ||
258
0
                sProgress.bError)
259
0
            {
260
0
                if (sProgress.bInterrupted)
261
0
                {
262
0
                    ReportError(CE_Failure, CPLE_UserInterrupt,
263
0
                                "Interrupted by user");
264
0
                }
265
0
                m_retCode = 1;
266
0
                return false;
267
0
            }
268
0
            nProgress = sProgress.nProgress;
269
0
        }
270
0
    }
271
0
    for (const std::string &osGroupName : poGroup->GetGroupNames())
272
0
    {
273
0
        auto poSubGroup = poGroup->OpenGroup(osGroupName);
274
0
        if (poSubGroup &&
275
0
            !CheckGroup(poSubGroup.get(), nProgress, nTotalContent, pfnProgress,
276
0
                        pProgressData))
277
0
        {
278
0
            return false;
279
0
        }
280
0
    }
281
0
    return true;
282
0
}
283
284
/************************************************************************/
285
/*              GDALDatasetCheckAlgorithm::CheckDataset()               */
286
/************************************************************************/
287
288
bool GDALDatasetCheckAlgorithm::CheckDataset(GDALDataset *poDS,
289
                                             bool bRasterOnly,
290
                                             GDALProgressFunc pfnProgress,
291
                                             void *pProgressData)
292
0
{
293
0
    const int nBands = poDS->GetRasterCount();
294
0
    auto poRootGroup = poDS->GetRootGroup();
295
0
    const GIntBig nTotalPixelsMD =
296
0
        poRootGroup ? GetGroupPixelCount(poRootGroup.get()) : 0;
297
0
    const GIntBig nTotalPixelsRegularRaster =
298
0
        nTotalPixelsMD ? 0
299
0
                       : static_cast<GIntBig>(nBands) * poDS->GetRasterXSize() *
300
0
                             poDS->GetRasterYSize();
301
0
    GIntBig nTotalFeatures = 0;
302
0
    bool bFastArrow = true;
303
0
    if (!bRasterOnly)
304
0
    {
305
0
        for (auto *poLayer : poDS->GetLayers())
306
0
        {
307
0
            bFastArrow =
308
0
                bFastArrow && poLayer->TestCapability(OLCFastGetArrowStream);
309
0
            const auto nFeatures = poLayer->GetFeatureCount(false);
310
0
            if (nFeatures >= 0)
311
0
                nTotalFeatures += nFeatures;
312
0
        }
313
0
    }
314
315
    // Totally arbitrary "equivalence" between a vector feature and a pixel
316
    // in terms of computation / I/O effort.
317
0
    constexpr int RATIO_FEATURE_TO_PIXEL = 100;
318
0
    const GIntBig nTotalContent = nTotalPixelsMD + nTotalPixelsRegularRaster +
319
0
                                  nTotalFeatures * RATIO_FEATURE_TO_PIXEL;
320
321
0
    if (!bRasterOnly)
322
0
    {
323
0
        const double dfRatioFeatures =
324
0
            (nTotalFeatures == nTotalContent)
325
0
                ? 1.0
326
0
                : static_cast<double>(nTotalFeatures * RATIO_FEATURE_TO_PIXEL) /
327
0
                      nTotalContent;
328
329
0
        if (bFastArrow)
330
0
        {
331
0
            GIntBig nCountFeatures = 0;
332
0
            for (auto *poLayer : poDS->GetLayers())
333
0
            {
334
0
                struct ArrowArrayStream stream;
335
0
                if (!poLayer->GetArrowStream(&stream))
336
0
                {
337
0
                    ReportError(CE_Failure, CPLE_AppDefined,
338
0
                                "GetArrowStream() failed");
339
0
                    m_retCode = 1;
340
0
                    return false;
341
0
                }
342
0
                while (true)
343
0
                {
344
0
                    struct ArrowArray array;
345
0
                    int ret = stream.get_next(&stream, &array);
346
0
                    if (ret != 0 || CPLGetLastErrorType() == CE_Failure)
347
0
                    {
348
0
                        if (array.release)
349
0
                            array.release(&array);
350
0
                        ReportError(CE_Failure, CPLE_AppDefined,
351
0
                                    "ArrowArrayStream::get_next() failed");
352
0
                        m_retCode = 1;
353
0
                        stream.release(&stream);
354
0
                        return false;
355
0
                    }
356
0
                    if (array.release == nullptr)
357
0
                        break;
358
0
                    nCountFeatures += array.length;
359
0
                    array.release(&array);
360
0
                    const double dfPct = static_cast<double>(nCountFeatures) /
361
0
                                         (static_cast<double>(nTotalFeatures) +
362
0
                                          std::numeric_limits<double>::min()) *
363
0
                                         dfRatioFeatures;
364
0
                    if (pfnProgress && !pfnProgress(dfPct, "", pProgressData))
365
0
                    {
366
0
                        ReportError(CE_Failure, CPLE_UserInterrupt,
367
0
                                    "Interrupted by user");
368
0
                        m_retCode = 1;
369
0
                        stream.release(&stream);
370
0
                        return false;
371
0
                    }
372
0
                }
373
0
                stream.release(&stream);
374
0
            }
375
0
        }
376
0
        else
377
0
        {
378
0
            std::unique_ptr<void, decltype(&GDALDestroyScaledProgress)> pScaled(
379
0
                GDALCreateScaledProgress(0, dfRatioFeatures, pfnProgress,
380
0
                                         pProgressData),
381
0
                GDALDestroyScaledProgress);
382
0
            GIntBig nCurFeatures = 0;
383
0
            while (true)
384
0
            {
385
0
                const bool bGotFeature =
386
0
                    std::unique_ptr<OGRFeature>(poDS->GetNextFeature(
387
0
                        nullptr, nullptr, GDALScaledProgress, pScaled.get())) !=
388
0
                    nullptr;
389
0
                if (CPLGetLastErrorType() == CE_Failure)
390
0
                {
391
0
                    m_retCode = 1;
392
0
                    return false;
393
0
                }
394
0
                if (!bGotFeature)
395
0
                    break;
396
0
                ++nCurFeatures;
397
0
                if (pfnProgress && nTotalFeatures > 0 &&
398
0
                    !pfnProgress(
399
0
                        std::min(1.0, static_cast<double>(nCurFeatures) /
400
0
                                          static_cast<double>(nTotalFeatures)) *
401
0
                            dfRatioFeatures,
402
0
                        "", pProgressData))
403
0
                {
404
0
                    ReportError(CE_Failure, CPLE_UserInterrupt,
405
0
                                "Interrupted by user");
406
0
                    m_retCode = 1;
407
0
                    return false;
408
0
                }
409
0
            }
410
0
            if (pfnProgress && nTotalContent == 0)
411
0
                pfnProgress(1.0, "", pProgressData);
412
0
        }
413
0
    }
414
415
0
    GIntBig nProgress = nTotalFeatures * RATIO_FEATURE_TO_PIXEL;
416
0
    if (poRootGroup && nTotalPixelsMD)
417
0
    {
418
0
        return CheckGroup(poRootGroup.get(), nProgress, nTotalContent,
419
0
                          pfnProgress, pProgressData);
420
0
    }
421
0
    else if (nBands)
422
0
    {
423
0
        std::vector<GByte> abyBuffer;
424
0
        const auto eDT = poDS->GetRasterBand(1)->GetRasterDataType();
425
0
        const auto nDTSize = GDALGetDataTypeSizeBytes(eDT);
426
0
        constexpr size_t BUFFER_SIZE = 10 * 1024 * 1024;
427
0
        const char *pszInterleaving =
428
0
            poDS->GetMetadataItem("INTERLEAVE", "IMAGE_STRUCTURE");
429
0
        if (pszInterleaving && EQUAL(pszInterleaving, "PIXEL"))
430
0
        {
431
0
            for (const auto &oWindow :
432
0
                 poDS->GetRasterBand(1)->IterateWindows(BUFFER_SIZE))
433
0
            {
434
0
                const size_t nPixels = static_cast<size_t>(oWindow.nXSize) *
435
0
                                       oWindow.nYSize * nBands;
436
0
                const size_t nReqSize = nPixels * nDTSize;
437
0
                if (abyBuffer.size() < nReqSize)
438
0
                {
439
0
                    try
440
0
                    {
441
0
                        abyBuffer.resize(nReqSize);
442
0
                    }
443
0
                    catch (const std::exception &)
444
0
                    {
445
0
                        ReportError(
446
0
                            CE_Failure, CPLE_OutOfMemory,
447
0
                            "Out of memory while allocating memory chunk");
448
0
                        m_retCode = 1;
449
0
                        return false;
450
0
                    }
451
0
                }
452
0
                if (poDS->RasterIO(GF_Read, oWindow.nXOff, oWindow.nYOff,
453
0
                                   oWindow.nXSize, oWindow.nYSize,
454
0
                                   abyBuffer.data(), oWindow.nXSize,
455
0
                                   oWindow.nYSize, eDT, nBands, nullptr, 0, 0,
456
0
                                   0, nullptr) != CE_None ||
457
0
                    CPLGetLastErrorType() == CE_Failure)
458
0
                {
459
0
                    m_retCode = 1;
460
0
                    return false;
461
0
                }
462
0
                nProgress += nPixels;
463
0
                if (pfnProgress &&
464
0
                    !pfnProgress(static_cast<double>(nProgress) /
465
0
                                     static_cast<double>(
466
0
                                         std::max<GIntBig>(1, nTotalContent)),
467
0
                                 "", pProgressData))
468
0
                {
469
0
                    ReportError(CE_Failure, CPLE_UserInterrupt,
470
0
                                "Interrupted by user");
471
0
                    m_retCode = 1;
472
0
                    return false;
473
0
                }
474
0
            }
475
0
        }
476
0
        else
477
0
        {
478
0
            for (int iBand = 1; iBand <= nBands; ++iBand)
479
0
            {
480
0
                auto poBand = poDS->GetRasterBand(iBand);
481
0
                for (const auto &oWindow : poBand->IterateWindows(BUFFER_SIZE))
482
0
                {
483
0
                    const size_t nPixels =
484
0
                        static_cast<size_t>(oWindow.nXSize) * oWindow.nYSize;
485
0
                    const size_t nReqSize = nPixels * nDTSize;
486
0
                    if (abyBuffer.size() < nReqSize)
487
0
                    {
488
0
                        try
489
0
                        {
490
0
                            abyBuffer.resize(nReqSize);
491
0
                        }
492
0
                        catch (const std::exception &)
493
0
                        {
494
0
                            ReportError(
495
0
                                CE_Failure, CPLE_OutOfMemory,
496
0
                                "Out of memory while allocating memory chunk");
497
0
                            m_retCode = 1;
498
0
                            return false;
499
0
                        }
500
0
                    }
501
0
                    if (poBand->RasterIO(GF_Read, oWindow.nXOff, oWindow.nYOff,
502
0
                                         oWindow.nXSize, oWindow.nYSize,
503
0
                                         abyBuffer.data(), oWindow.nXSize,
504
0
                                         oWindow.nYSize, eDT, 0, 0,
505
0
                                         nullptr) != CE_None ||
506
0
                        CPLGetLastErrorType() == CE_Failure)
507
0
                    {
508
0
                        m_retCode = 1;
509
0
                        return false;
510
0
                    }
511
0
                    nProgress +=
512
0
                        static_cast<GIntBig>(oWindow.nXSize) * oWindow.nYSize;
513
0
                    if (pfnProgress &&
514
0
                        !pfnProgress(static_cast<double>(nProgress) /
515
0
                                         static_cast<double>(std::max<GIntBig>(
516
0
                                             1, nTotalContent)),
517
0
                                     "", pProgressData))
518
0
                    {
519
0
                        ReportError(CE_Failure, CPLE_UserInterrupt,
520
0
                                    "Interrupted by user");
521
0
                        m_retCode = 1;
522
0
                        return false;
523
0
                    }
524
0
                }
525
0
            }
526
0
        }
527
0
    }
528
0
    return true;
529
0
}
530
531
//! @endcond