Coverage Report

Created: 2025-06-09 08:44

/src/gdal/frmts/zarr/zarr_v3_array.cpp
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 *
3
 * Project:  GDAL
4
 * Purpose:  Zarr driver
5
 * Author:   Even Rouault <even dot rouault at spatialys.com>
6
 *
7
 ******************************************************************************
8
 * Copyright (c) 2021, Even Rouault <even dot rouault at spatialys.com>
9
 *
10
 * SPDX-License-Identifier: MIT
11
 ****************************************************************************/
12
13
#include "cpl_float.h"
14
#include "cpl_vsi_virtual.h"
15
#include "gdal_thread_pool.h"
16
#include "zarr.h"
17
18
#include <algorithm>
19
#include <cassert>
20
#include <cmath>
21
#include <cstdlib>
22
#include <limits>
23
#include <map>
24
#include <set>
25
26
/************************************************************************/
27
/*                       ZarrV3Array::ZarrV3Array()                     */
28
/************************************************************************/
29
30
ZarrV3Array::ZarrV3Array(
31
    const std::shared_ptr<ZarrSharedResource> &poSharedResource,
32
    const std::string &osParentName, const std::string &osName,
33
    const std::vector<std::shared_ptr<GDALDimension>> &aoDims,
34
    const GDALExtendedDataType &oType, const std::vector<DtypeElt> &aoDtypeElts,
35
    const std::vector<GUInt64> &anBlockSize)
36
268
    : GDALAbstractMDArray(osParentName, osName),
37
268
      ZarrArray(poSharedResource, osParentName, osName, aoDims, oType,
38
268
                aoDtypeElts, anBlockSize)
39
268
{
40
268
}
Unexecuted instantiation: ZarrV3Array::ZarrV3Array(std::__1::shared_ptr<ZarrSharedResource> const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::vector<std::__1::shared_ptr<GDALDimension>, std::__1::allocator<std::__1::shared_ptr<GDALDimension> > > const&, GDALExtendedDataType const&, std::__1::vector<DtypeElt, std::__1::allocator<DtypeElt> > const&, std::__1::vector<unsigned long long, std::__1::allocator<unsigned long long> > const&)
ZarrV3Array::ZarrV3Array(std::__1::shared_ptr<ZarrSharedResource> const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::vector<std::__1::shared_ptr<GDALDimension>, std::__1::allocator<std::__1::shared_ptr<GDALDimension> > > const&, GDALExtendedDataType const&, std::__1::vector<DtypeElt, std::__1::allocator<DtypeElt> > const&, std::__1::vector<unsigned long long, std::__1::allocator<unsigned long long> > const&)
Line
Count
Source
36
268
    : GDALAbstractMDArray(osParentName, osName),
37
268
      ZarrArray(poSharedResource, osParentName, osName, aoDims, oType,
38
268
                aoDtypeElts, anBlockSize)
39
268
{
40
268
}
41
42
/************************************************************************/
43
/*                         ZarrV3Array::Create()                        */
44
/************************************************************************/
45
46
std::shared_ptr<ZarrV3Array>
47
ZarrV3Array::Create(const std::shared_ptr<ZarrSharedResource> &poSharedResource,
48
                    const std::string &osParentName, const std::string &osName,
49
                    const std::vector<std::shared_ptr<GDALDimension>> &aoDims,
50
                    const GDALExtendedDataType &oType,
51
                    const std::vector<DtypeElt> &aoDtypeElts,
52
                    const std::vector<GUInt64> &anBlockSize)
53
268
{
54
268
    auto arr = std::shared_ptr<ZarrV3Array>(
55
268
        new ZarrV3Array(poSharedResource, osParentName, osName, aoDims, oType,
56
268
                        aoDtypeElts, anBlockSize));
57
268
    if (arr->m_nTotalTileCount == 0)
58
42
        return nullptr;
59
226
    arr->SetSelf(arr);
60
61
226
    return arr;
62
268
}
63
64
/************************************************************************/
65
/*                             ~ZarrV3Array()                           */
66
/************************************************************************/
67
68
ZarrV3Array::~ZarrV3Array()
69
268
{
70
268
    ZarrV3Array::Flush();
71
268
}
72
73
/************************************************************************/
74
/*                                Flush()                               */
75
/************************************************************************/
76
77
void ZarrV3Array::Flush()
78
494
{
79
494
    if (!m_bValid)
80
0
        return;
81
82
494
    ZarrV3Array::FlushDirtyTile();
83
84
494
    if (!m_aoDims.empty())
85
494
    {
86
494
        for (const auto &poDim : m_aoDims)
87
494
        {
88
494
            const auto poZarrDim =
89
494
                dynamic_cast<const ZarrDimension *>(poDim.get());
90
494
            if (poZarrDim && poZarrDim->IsXArrayDimension())
91
0
            {
92
0
                if (poZarrDim->IsModified())
93
0
                    m_bDefinitionModified = true;
94
0
            }
95
494
            else
96
494
            {
97
494
                break;
98
494
            }
99
494
        }
100
494
    }
101
102
494
    CPLJSONObject oAttrs;
103
494
    if (m_oAttrGroup.IsModified() || m_bUnitModified || m_bOffsetModified ||
104
494
        m_bScaleModified || m_bSRSModified)
105
0
    {
106
0
        m_bNew = false;
107
108
0
        oAttrs = SerializeSpecialAttributes();
109
110
0
        m_bDefinitionModified = true;
111
0
    }
112
113
494
    if (m_bDefinitionModified)
114
0
    {
115
0
        Serialize(oAttrs);
116
0
        m_bDefinitionModified = false;
117
0
    }
118
494
}
119
120
/************************************************************************/
121
/*                    ZarrV3Array::Serialize()                          */
122
/************************************************************************/
123
124
void ZarrV3Array::Serialize(const CPLJSONObject &oAttrs)
125
0
{
126
0
    CPLJSONDocument oDoc;
127
0
    CPLJSONObject oRoot = oDoc.GetRoot();
128
129
0
    oRoot.Add("zarr_format", 3);
130
0
    oRoot.Add("node_type", "array");
131
132
0
    CPLJSONArray oShape;
133
0
    for (const auto &poDim : m_aoDims)
134
0
    {
135
0
        oShape.Add(static_cast<GInt64>(poDim->GetSize()));
136
0
    }
137
0
    oRoot.Add("shape", oShape);
138
139
0
    oRoot.Add("data_type", m_dtype.ToString());
140
141
0
    {
142
0
        CPLJSONObject oChunkGrid;
143
0
        oRoot.Add("chunk_grid", oChunkGrid);
144
0
        oChunkGrid.Add("name", "regular");
145
0
        CPLJSONObject oConfiguration;
146
0
        oChunkGrid.Add("configuration", oConfiguration);
147
0
        CPLJSONArray oChunks;
148
0
        for (const auto nBlockSize : m_anBlockSize)
149
0
        {
150
0
            oChunks.Add(static_cast<GInt64>(nBlockSize));
151
0
        }
152
0
        oConfiguration.Add("chunk_shape", oChunks);
153
0
    }
154
155
0
    {
156
0
        CPLJSONObject oChunkKeyEncoding;
157
0
        oRoot.Add("chunk_key_encoding", oChunkKeyEncoding);
158
0
        oChunkKeyEncoding.Add("name", m_bV2ChunkKeyEncoding ? "v2" : "default");
159
0
        CPLJSONObject oConfiguration;
160
0
        oChunkKeyEncoding.Add("configuration", oConfiguration);
161
0
        oConfiguration.Add("separator", m_osDimSeparator);
162
0
    }
163
164
0
    if (m_pabyNoData == nullptr)
165
0
    {
166
0
        if (m_oType.GetNumericDataType() == GDT_Float16 ||
167
0
            m_oType.GetNumericDataType() == GDT_Float32 ||
168
0
            m_oType.GetNumericDataType() == GDT_Float64)
169
0
        {
170
0
            oRoot.Add("fill_value", "NaN");
171
0
        }
172
0
        else
173
0
        {
174
0
            oRoot.AddNull("fill_value");
175
0
        }
176
0
    }
177
0
    else
178
0
    {
179
0
        if (m_oType.GetNumericDataType() == GDT_CFloat16 ||
180
0
            m_oType.GetNumericDataType() == GDT_CFloat32 ||
181
0
            m_oType.GetNumericDataType() == GDT_CFloat64)
182
0
        {
183
0
            double adfNoDataValue[2];
184
0
            GDALCopyWords(m_pabyNoData, m_oType.GetNumericDataType(), 0,
185
0
                          adfNoDataValue, GDT_CFloat64, 0, 1);
186
0
            CPLJSONArray oArray;
187
0
            for (int i = 0; i < 2; ++i)
188
0
            {
189
0
                if (std::isnan(adfNoDataValue[i]))
190
0
                    oArray.Add("NaN");
191
0
                else if (adfNoDataValue[i] ==
192
0
                         std::numeric_limits<double>::infinity())
193
0
                    oArray.Add("Infinity");
194
0
                else if (adfNoDataValue[i] ==
195
0
                         -std::numeric_limits<double>::infinity())
196
0
                    oArray.Add("-Infinity");
197
0
                else
198
0
                    oArray.Add(adfNoDataValue[i]);
199
0
            }
200
0
            oRoot.Add("fill_value", oArray);
201
0
        }
202
0
        else
203
0
        {
204
0
            SerializeNumericNoData(oRoot);
205
0
        }
206
0
    }
207
208
0
    if (m_poCodecs)
209
0
    {
210
0
        oRoot.Add("codecs", m_poCodecs->GetJSon());
211
0
    }
212
213
0
    oRoot.Add("attributes", oAttrs);
214
215
    // Set dimension_names
216
0
    if (!m_aoDims.empty())
217
0
    {
218
0
        CPLJSONArray oDimensions;
219
0
        for (const auto &poDim : m_aoDims)
220
0
        {
221
0
            const auto poZarrDim =
222
0
                dynamic_cast<const ZarrDimension *>(poDim.get());
223
0
            if (poZarrDim && poZarrDim->IsXArrayDimension())
224
0
            {
225
0
                oDimensions.Add(poDim->GetName());
226
0
            }
227
0
            else
228
0
            {
229
0
                oDimensions = CPLJSONArray();
230
0
                break;
231
0
            }
232
0
        }
233
0
        if (oDimensions.Size() > 0)
234
0
        {
235
0
            oRoot.Add("dimension_names", oDimensions);
236
0
        }
237
0
    }
238
239
    // TODO: codecs
240
241
0
    oDoc.Save(m_osFilename);
242
0
}
243
244
/************************************************************************/
245
/*                  ZarrV3Array::NeedDecodedBuffer()                    */
246
/************************************************************************/
247
248
bool ZarrV3Array::NeedDecodedBuffer() const
249
280
{
250
280
    for (const auto &elt : m_aoDtypeElts)
251
280
    {
252
280
        if (elt.needByteSwapping || elt.gdalTypeIsApproxOfNative)
253
0
        {
254
0
            return true;
255
0
        }
256
280
    }
257
280
    return false;
258
280
}
259
260
/************************************************************************/
261
/*               ZarrV3Array::AllocateWorkingBuffers()                  */
262
/************************************************************************/
263
264
bool ZarrV3Array::AllocateWorkingBuffers() const
265
688
{
266
688
    if (m_bAllocateWorkingBuffersDone)
267
548
        return m_bWorkingBuffersOK;
268
269
140
    m_bAllocateWorkingBuffersDone = true;
270
271
140
    size_t nSizeNeeded = m_nTileSize;
272
140
    if (NeedDecodedBuffer())
273
0
    {
274
0
        size_t nDecodedBufferSize = m_oType.GetSize();
275
0
        for (const auto &nBlockSize : m_anBlockSize)
276
0
        {
277
0
            if (nDecodedBufferSize > std::numeric_limits<size_t>::max() /
278
0
                                         static_cast<size_t>(nBlockSize))
279
0
            {
280
0
                CPLError(CE_Failure, CPLE_AppDefined, "Too large chunk size");
281
0
                return false;
282
0
            }
283
0
            nDecodedBufferSize *= static_cast<size_t>(nBlockSize);
284
0
        }
285
0
        if (nSizeNeeded >
286
0
            std::numeric_limits<size_t>::max() - nDecodedBufferSize)
287
0
        {
288
0
            CPLError(CE_Failure, CPLE_AppDefined, "Too large chunk size");
289
0
            return false;
290
0
        }
291
0
        nSizeNeeded += nDecodedBufferSize;
292
0
    }
293
294
    // Reserve a buffer for tile content
295
140
    if (nSizeNeeded > 1024 * 1024 * 1024 &&
296
140
        !CPLTestBool(CPLGetConfigOption("ZARR_ALLOW_BIG_TILE_SIZE", "NO")))
297
0
    {
298
0
        CPLError(CE_Failure, CPLE_AppDefined,
299
0
                 "Zarr tile allocation would require " CPL_FRMT_GUIB " bytes. "
300
0
                 "By default the driver limits to 1 GB. To allow that memory "
301
0
                 "allocation, set the ZARR_ALLOW_BIG_TILE_SIZE configuration "
302
0
                 "option to YES.",
303
0
                 static_cast<GUIntBig>(nSizeNeeded));
304
0
        return false;
305
0
    }
306
307
140
    m_bWorkingBuffersOK =
308
140
        AllocateWorkingBuffers(m_abyRawTileData, m_abyDecodedTileData);
309
140
    return m_bWorkingBuffersOK;
310
140
}
311
312
bool ZarrV3Array::AllocateWorkingBuffers(
313
    ZarrByteVectorQuickResize &abyRawTileData,
314
    ZarrByteVectorQuickResize &abyDecodedTileData) const
315
140
{
316
    // This method should NOT modify any ZarrArray member, as it is going to
317
    // be called concurrently from several threads.
318
319
    // Set those #define to avoid accidental use of some global variables
320
140
#define m_abyRawTileData cannot_use_here
321
140
#define m_abyDecodedTileData cannot_use_here
322
323
140
    try
324
140
    {
325
140
        abyRawTileData.resize(m_nTileSize);
326
140
    }
327
140
    catch (const std::bad_alloc &e)
328
140
    {
329
0
        CPLError(CE_Failure, CPLE_OutOfMemory, "%s", e.what());
330
0
        return false;
331
0
    }
332
333
140
    if (NeedDecodedBuffer())
334
0
    {
335
0
        size_t nDecodedBufferSize = m_oType.GetSize();
336
0
        for (const auto &nBlockSize : m_anBlockSize)
337
0
        {
338
0
            nDecodedBufferSize *= static_cast<size_t>(nBlockSize);
339
0
        }
340
0
        try
341
0
        {
342
0
            abyDecodedTileData.resize(nDecodedBufferSize);
343
0
        }
344
0
        catch (const std::bad_alloc &e)
345
0
        {
346
0
            CPLError(CE_Failure, CPLE_OutOfMemory, "%s", e.what());
347
0
            return false;
348
0
        }
349
0
    }
350
351
140
    return true;
352
140
#undef m_abyRawTileData
353
140
#undef m_abyDecodedTileData
354
140
}
355
356
/************************************************************************/
357
/*                      ZarrV3Array::LoadTileData()                     */
358
/************************************************************************/
359
360
bool ZarrV3Array::LoadTileData(const uint64_t *tileIndices,
361
                               bool &bMissingTileOut) const
362
7.97k
{
363
7.97k
    return LoadTileData(tileIndices,
364
7.97k
                        false,  // use mutex
365
7.97k
                        m_poCodecs.get(), m_abyRawTileData,
366
7.97k
                        m_abyDecodedTileData, bMissingTileOut);
367
7.97k
}
368
369
bool ZarrV3Array::LoadTileData(const uint64_t *tileIndices, bool bUseMutex,
370
                               ZarrV3CodecSequence *poCodecs,
371
                               ZarrByteVectorQuickResize &abyRawTileData,
372
                               ZarrByteVectorQuickResize &abyDecodedTileData,
373
                               bool &bMissingTileOut) const
374
7.97k
{
375
    // This method should NOT modify any ZarrArray member, as it is going to
376
    // be called concurrently from several threads.
377
378
    // Set those #define to avoid accidental use of some global variables
379
7.97k
#define m_abyRawTileData cannot_use_here
380
7.97k
#define m_abyDecodedTileData cannot_use_here
381
7.97k
#define m_poCodecs cannot_use_here
382
383
7.97k
    bMissingTileOut = false;
384
385
7.97k
    std::string osFilename = BuildTileFilename(tileIndices);
386
387
    // For network file systems, get the streaming version of the filename,
388
    // as we don't need arbitrary seeking in the file
389
7.97k
    osFilename = VSIFileManager::GetHandler(osFilename.c_str())
390
7.97k
                     ->GetStreamingFilename(osFilename);
391
392
    // First if we have a tile presence cache, check tile presence from it
393
7.97k
    if (bUseMutex)
394
0
        m_oMutex.lock();
395
7.97k
    auto poTilePresenceArray = OpenTilePresenceCache(false);
396
7.97k
    if (poTilePresenceArray)
397
0
    {
398
0
        std::vector<GUInt64> anTileIdx(m_aoDims.size());
399
0
        const std::vector<size_t> anCount(m_aoDims.size(), 1);
400
0
        const std::vector<GInt64> anArrayStep(m_aoDims.size(), 0);
401
0
        const std::vector<GPtrDiff_t> anBufferStride(m_aoDims.size(), 0);
402
0
        const auto eByteDT = GDALExtendedDataType::Create(GDT_Byte);
403
0
        for (size_t i = 0; i < m_aoDims.size(); ++i)
404
0
        {
405
0
            anTileIdx[i] = static_cast<GUInt64>(tileIndices[i]);
406
0
        }
407
0
        GByte byValue = 0;
408
0
        if (poTilePresenceArray->Read(anTileIdx.data(), anCount.data(),
409
0
                                      anArrayStep.data(), anBufferStride.data(),
410
0
                                      eByteDT, &byValue) &&
411
0
            byValue == 0)
412
0
        {
413
0
            if (bUseMutex)
414
0
                m_oMutex.unlock();
415
0
            CPLDebugOnly(ZARR_DEBUG_KEY, "Tile %s missing (=nodata)",
416
0
                         osFilename.c_str());
417
0
            bMissingTileOut = true;
418
0
            return true;
419
0
        }
420
0
    }
421
7.97k
    if (bUseMutex)
422
0
        m_oMutex.unlock();
423
424
7.97k
    VSILFILE *fp = nullptr;
425
    // This is the number of files returned in a S3 directory listing operation
426
7.97k
    constexpr uint64_t MAX_TILES_ALLOWED_FOR_DIRECTORY_LISTING = 1000;
427
7.97k
    const char *const apszOpenOptions[] = {"IGNORE_FILENAME_RESTRICTIONS=YES",
428
7.97k
                                           nullptr};
429
7.97k
    const auto nErrorBefore = CPLGetErrorCounter();
430
7.97k
    if ((m_osDimSeparator == "/" && !m_anBlockSize.empty() &&
431
7.97k
         m_anBlockSize.back() > MAX_TILES_ALLOWED_FOR_DIRECTORY_LISTING) ||
432
7.97k
        (m_osDimSeparator != "/" &&
433
7.97k
         m_nTotalTileCount > MAX_TILES_ALLOWED_FOR_DIRECTORY_LISTING))
434
5.83k
    {
435
        // Avoid issuing ReadDir() when a lot of files are expected
436
5.83k
        CPLConfigOptionSetter optionSetter("GDAL_DISABLE_READDIR_ON_OPEN",
437
5.83k
                                           "YES", true);
438
5.83k
        fp = VSIFOpenEx2L(osFilename.c_str(), "rb", 0, apszOpenOptions);
439
5.83k
    }
440
2.14k
    else
441
2.14k
    {
442
2.14k
        fp = VSIFOpenEx2L(osFilename.c_str(), "rb", 0, apszOpenOptions);
443
2.14k
    }
444
7.97k
    if (fp == nullptr)
445
7.97k
    {
446
7.97k
        if (nErrorBefore != CPLGetErrorCounter())
447
0
        {
448
0
            return false;
449
0
        }
450
7.97k
        else
451
7.97k
        {
452
            // Missing files are OK and indicate nodata_value
453
7.97k
            CPLDebugOnly(ZARR_DEBUG_KEY, "Tile %s missing (=nodata)",
454
7.97k
                         osFilename.c_str());
455
7.97k
            bMissingTileOut = true;
456
7.97k
            return true;
457
7.97k
        }
458
7.97k
    }
459
460
6
    bMissingTileOut = false;
461
462
6
    CPLAssert(abyRawTileData.capacity() >= m_nTileSize);
463
    // should not fail
464
6
    abyRawTileData.resize(m_nTileSize);
465
466
6
    bool bRet = true;
467
6
    size_t nRawDataSize = abyRawTileData.size();
468
6
    if (poCodecs == nullptr)
469
0
    {
470
0
        nRawDataSize = VSIFReadL(&abyRawTileData[0], 1, nRawDataSize, fp);
471
0
    }
472
6
    else
473
6
    {
474
6
        VSIFSeekL(fp, 0, SEEK_END);
475
6
        const auto nSize = VSIFTellL(fp);
476
6
        VSIFSeekL(fp, 0, SEEK_SET);
477
6
        if (nSize > static_cast<vsi_l_offset>(std::numeric_limits<int>::max()))
478
0
        {
479
0
            CPLError(CE_Failure, CPLE_AppDefined, "Too large tile %s",
480
0
                     osFilename.c_str());
481
0
            bRet = false;
482
0
        }
483
6
        else
484
6
        {
485
6
            try
486
6
            {
487
6
                abyRawTileData.resize(static_cast<size_t>(nSize));
488
6
            }
489
6
            catch (const std::exception &)
490
6
            {
491
0
                CPLError(CE_Failure, CPLE_OutOfMemory,
492
0
                         "Cannot allocate memory for tile %s",
493
0
                         osFilename.c_str());
494
0
                bRet = false;
495
0
            }
496
497
6
            if (bRet && (abyRawTileData.empty() ||
498
6
                         VSIFReadL(&abyRawTileData[0], 1, abyRawTileData.size(),
499
6
                                   fp) != abyRawTileData.size()))
500
0
            {
501
0
                CPLError(CE_Failure, CPLE_AppDefined,
502
0
                         "Could not read tile %s correctly",
503
0
                         osFilename.c_str());
504
0
                bRet = false;
505
0
            }
506
6
            else
507
6
            {
508
6
                if (!poCodecs->Decode(abyRawTileData))
509
0
                {
510
0
                    CPLError(CE_Failure, CPLE_AppDefined,
511
0
                             "Decompression of tile %s failed",
512
0
                             osFilename.c_str());
513
0
                    bRet = false;
514
0
                }
515
6
            }
516
6
        }
517
6
    }
518
6
    VSIFCloseL(fp);
519
6
    if (!bRet)
520
0
        return false;
521
522
6
    if (nRawDataSize != abyRawTileData.size())
523
6
    {
524
6
        CPLError(CE_Failure, CPLE_AppDefined,
525
6
                 "Decompressed tile %s has not expected size. "
526
6
                 "Got %u instead of %u",
527
6
                 osFilename.c_str(),
528
6
                 static_cast<unsigned>(abyRawTileData.size()),
529
6
                 static_cast<unsigned>(nRawDataSize));
530
6
        return false;
531
6
    }
532
533
0
    if (!abyDecodedTileData.empty())
534
0
    {
535
0
        const size_t nSourceSize =
536
0
            m_aoDtypeElts.back().nativeOffset + m_aoDtypeElts.back().nativeSize;
537
0
        const auto nDTSize = m_oType.GetSize();
538
0
        const size_t nValues = abyDecodedTileData.size() / nDTSize;
539
0
        CPLAssert(nValues == m_nTileSize / nSourceSize);
540
0
        const GByte *pSrc = abyRawTileData.data();
541
0
        GByte *pDst = &abyDecodedTileData[0];
542
0
        for (size_t i = 0; i < nValues;
543
0
             i++, pSrc += nSourceSize, pDst += nDTSize)
544
0
        {
545
0
            DecodeSourceElt(m_aoDtypeElts, pSrc, pDst);
546
0
        }
547
0
    }
548
549
0
    return true;
550
551
6
#undef m_abyRawTileData
552
6
#undef m_abyDecodedTileData
553
6
#undef m_poCodecs
554
6
}
555
556
/************************************************************************/
557
/*                      ZarrV3Array::IAdviseRead()                      */
558
/************************************************************************/
559
560
bool ZarrV3Array::IAdviseRead(const GUInt64 *arrayStartIdx, const size_t *count,
561
                              CSLConstList papszOptions) const
562
0
{
563
0
    std::vector<uint64_t> anIndicesCur;
564
0
    int nThreadsMax = 0;
565
0
    std::vector<uint64_t> anReqTilesIndices;
566
0
    size_t nReqTiles = 0;
567
0
    if (!IAdviseReadCommon(arrayStartIdx, count, papszOptions, anIndicesCur,
568
0
                           nThreadsMax, anReqTilesIndices, nReqTiles))
569
0
    {
570
0
        return false;
571
0
    }
572
0
    if (nThreadsMax <= 1)
573
0
    {
574
0
        return true;
575
0
    }
576
577
0
    const int nThreads =
578
0
        static_cast<int>(std::min(static_cast<size_t>(nThreadsMax), nReqTiles));
579
580
0
    CPLWorkerThreadPool *wtp = GDALGetGlobalThreadPool(nThreadsMax);
581
0
    if (wtp == nullptr)
582
0
        return false;
583
584
0
    struct JobStruct
585
0
    {
586
0
        JobStruct() = default;
587
588
0
        JobStruct(const JobStruct &) = delete;
589
0
        JobStruct &operator=(const JobStruct &) = delete;
590
591
0
        JobStruct(JobStruct &&) = default;
592
0
        JobStruct &operator=(JobStruct &&) = default;
593
594
0
        const ZarrV3Array *poArray = nullptr;
595
0
        bool *pbGlobalStatus = nullptr;
596
0
        int *pnRemainingThreads = nullptr;
597
0
        const std::vector<uint64_t> *panReqTilesIndices = nullptr;
598
0
        size_t nFirstIdx = 0;
599
0
        size_t nLastIdxNotIncluded = 0;
600
0
    };
601
602
0
    std::vector<JobStruct> asJobStructs;
603
604
0
    bool bGlobalStatus = true;
605
0
    int nRemainingThreads = nThreads;
606
    // Check for very highly overflow in below loop
607
0
    assert(static_cast<size_t>(nThreads) <
608
0
           std::numeric_limits<size_t>::max() / nReqTiles);
609
610
    // Setup jobs
611
0
    for (int i = 0; i < nThreads; i++)
612
0
    {
613
0
        JobStruct jobStruct;
614
0
        jobStruct.poArray = this;
615
0
        jobStruct.pbGlobalStatus = &bGlobalStatus;
616
0
        jobStruct.pnRemainingThreads = &nRemainingThreads;
617
0
        jobStruct.panReqTilesIndices = &anReqTilesIndices;
618
0
        jobStruct.nFirstIdx = static_cast<size_t>(i * nReqTiles / nThreads);
619
0
        jobStruct.nLastIdxNotIncluded = std::min(
620
0
            static_cast<size_t>((i + 1) * nReqTiles / nThreads), nReqTiles);
621
0
        asJobStructs.emplace_back(std::move(jobStruct));
622
0
    }
623
624
0
    const auto JobFunc = [](void *pThreadData)
625
0
    {
626
0
        const JobStruct *jobStruct =
627
0
            static_cast<const JobStruct *>(pThreadData);
628
629
0
        const auto poArray = jobStruct->poArray;
630
0
        const auto &aoDims = poArray->GetDimensions();
631
0
        const size_t l_nDims = poArray->GetDimensionCount();
632
0
        ZarrByteVectorQuickResize abyRawTileData;
633
0
        ZarrByteVectorQuickResize abyDecodedTileData;
634
0
        std::unique_ptr<ZarrV3CodecSequence> poCodecs;
635
0
        if (poArray->m_poCodecs)
636
0
        {
637
0
            std::lock_guard<std::mutex> oLock(poArray->m_oMutex);
638
0
            poCodecs = poArray->m_poCodecs->Clone();
639
0
        }
640
641
0
        for (size_t iReq = jobStruct->nFirstIdx;
642
0
             iReq < jobStruct->nLastIdxNotIncluded; ++iReq)
643
0
        {
644
            // Check if we must early exit
645
0
            {
646
0
                std::lock_guard<std::mutex> oLock(poArray->m_oMutex);
647
0
                if (!(*jobStruct->pbGlobalStatus))
648
0
                    return;
649
0
            }
650
651
0
            const uint64_t *tileIndices =
652
0
                jobStruct->panReqTilesIndices->data() + iReq * l_nDims;
653
654
0
            uint64_t nTileIdx = 0;
655
0
            for (size_t j = 0; j < l_nDims; ++j)
656
0
            {
657
0
                if (j > 0)
658
0
                    nTileIdx *= aoDims[j - 1]->GetSize();
659
0
                nTileIdx += tileIndices[j];
660
0
            }
661
662
0
            if (!poArray->AllocateWorkingBuffers(abyRawTileData,
663
0
                                                 abyDecodedTileData))
664
0
            {
665
0
                std::lock_guard<std::mutex> oLock(poArray->m_oMutex);
666
0
                *jobStruct->pbGlobalStatus = false;
667
0
                break;
668
0
            }
669
670
0
            bool bIsEmpty = false;
671
0
            bool success = poArray->LoadTileData(tileIndices,
672
0
                                                 true,  // use mutex
673
0
                                                 poCodecs.get(), abyRawTileData,
674
0
                                                 abyDecodedTileData, bIsEmpty);
675
676
0
            std::lock_guard<std::mutex> oLock(poArray->m_oMutex);
677
0
            if (!success)
678
0
            {
679
0
                *jobStruct->pbGlobalStatus = false;
680
0
                break;
681
0
            }
682
683
0
            CachedTile cachedTile;
684
0
            if (!bIsEmpty)
685
0
            {
686
0
                if (!abyDecodedTileData.empty())
687
0
                    std::swap(cachedTile.abyDecoded, abyDecodedTileData);
688
0
                else
689
0
                    std::swap(cachedTile.abyDecoded, abyRawTileData);
690
0
            }
691
0
            poArray->m_oMapTileIndexToCachedTile[nTileIdx] =
692
0
                std::move(cachedTile);
693
0
        }
694
695
0
        std::lock_guard<std::mutex> oLock(poArray->m_oMutex);
696
0
        (*jobStruct->pnRemainingThreads)--;
697
0
    };
698
699
    // Start jobs
700
0
    for (int i = 0; i < nThreads; i++)
701
0
    {
702
0
        if (!wtp->SubmitJob(JobFunc, &asJobStructs[i]))
703
0
        {
704
0
            std::lock_guard<std::mutex> oLock(m_oMutex);
705
0
            bGlobalStatus = false;
706
0
            nRemainingThreads = i;
707
0
            break;
708
0
        }
709
0
    }
710
711
    // Wait for all jobs to be finished
712
0
    while (true)
713
0
    {
714
0
        {
715
0
            std::lock_guard<std::mutex> oLock(m_oMutex);
716
0
            if (nRemainingThreads == 0)
717
0
                break;
718
0
        }
719
0
        wtp->WaitEvent();
720
0
    }
721
722
0
    return bGlobalStatus;
723
0
}
724
725
/************************************************************************/
726
/*                    ZarrV3Array::FlushDirtyTile()                     */
727
/************************************************************************/
728
729
bool ZarrV3Array::FlushDirtyTile() const
730
8.47k
{
731
8.47k
    if (!m_bDirtyTile)
732
8.47k
        return true;
733
0
    m_bDirtyTile = false;
734
735
0
    std::string osFilename = BuildTileFilename(m_anCachedTiledIndices.data());
736
737
0
    const size_t nSourceSize =
738
0
        m_aoDtypeElts.back().nativeOffset + m_aoDtypeElts.back().nativeSize;
739
0
    const auto &abyTile =
740
0
        m_abyDecodedTileData.empty() ? m_abyRawTileData : m_abyDecodedTileData;
741
742
0
    if (IsEmptyTile(abyTile))
743
0
    {
744
0
        m_bCachedTiledEmpty = true;
745
746
0
        VSIStatBufL sStat;
747
0
        if (VSIStatL(osFilename.c_str(), &sStat) == 0)
748
0
        {
749
0
            CPLDebugOnly(ZARR_DEBUG_KEY,
750
0
                         "Deleting tile %s that has now empty content",
751
0
                         osFilename.c_str());
752
0
            return VSIUnlink(osFilename.c_str()) == 0;
753
0
        }
754
0
        return true;
755
0
    }
756
757
0
    if (!m_abyDecodedTileData.empty())
758
0
    {
759
0
        const size_t nDTSize = m_oType.GetSize();
760
0
        const size_t nValues = m_abyDecodedTileData.size() / nDTSize;
761
0
        GByte *pDst = &m_abyRawTileData[0];
762
0
        const GByte *pSrc = m_abyDecodedTileData.data();
763
0
        for (size_t i = 0; i < nValues;
764
0
             i++, pDst += nSourceSize, pSrc += nDTSize)
765
0
        {
766
0
            EncodeElt(m_aoDtypeElts, pSrc, pDst);
767
0
        }
768
0
    }
769
770
0
    const size_t nSizeBefore = m_abyRawTileData.size();
771
0
    if (m_poCodecs)
772
0
    {
773
0
        if (!m_poCodecs->Encode(m_abyRawTileData))
774
0
        {
775
0
            m_abyRawTileData.resize(nSizeBefore);
776
0
            return false;
777
0
        }
778
0
    }
779
780
0
    if (m_osDimSeparator == "/")
781
0
    {
782
0
        std::string osDir = CPLGetDirnameSafe(osFilename.c_str());
783
0
        VSIStatBufL sStat;
784
0
        if (VSIStatL(osDir.c_str(), &sStat) != 0)
785
0
        {
786
0
            if (VSIMkdirRecursive(osDir.c_str(), 0755) != 0)
787
0
            {
788
0
                CPLError(CE_Failure, CPLE_AppDefined,
789
0
                         "Cannot create directory %s", osDir.c_str());
790
0
                m_abyRawTileData.resize(nSizeBefore);
791
0
                return false;
792
0
            }
793
0
        }
794
0
    }
795
796
0
    VSILFILE *fp = VSIFOpenL(osFilename.c_str(), "wb");
797
0
    if (fp == nullptr)
798
0
    {
799
0
        CPLError(CE_Failure, CPLE_AppDefined, "Cannot create tile %s",
800
0
                 osFilename.c_str());
801
0
        m_abyRawTileData.resize(nSizeBefore);
802
0
        return false;
803
0
    }
804
805
0
    bool bRet = true;
806
0
    const size_t nRawDataSize = m_abyRawTileData.size();
807
0
    if (VSIFWriteL(m_abyRawTileData.data(), 1, nRawDataSize, fp) !=
808
0
        nRawDataSize)
809
0
    {
810
0
        CPLError(CE_Failure, CPLE_AppDefined,
811
0
                 "Could not write tile %s correctly", osFilename.c_str());
812
0
        bRet = false;
813
0
    }
814
0
    VSIFCloseL(fp);
815
816
0
    m_abyRawTileData.resize(nSizeBefore);
817
818
0
    return bRet;
819
0
}
820
821
/************************************************************************/
822
/*                          BuildTileFilename()                         */
823
/************************************************************************/
824
825
std::string ZarrV3Array::BuildTileFilename(const uint64_t *tileIndices) const
826
7.97k
{
827
7.97k
    if (m_aoDims.empty())
828
0
    {
829
0
        return CPLFormFilenameSafe(
830
0
            CPLGetDirnameSafe(m_osFilename.c_str()).c_str(),
831
0
            m_bV2ChunkKeyEncoding ? "0" : "c", nullptr);
832
0
    }
833
7.97k
    else
834
7.97k
    {
835
7.97k
        std::string osFilename(CPLGetDirnameSafe(m_osFilename.c_str()));
836
7.97k
        osFilename += '/';
837
7.97k
        if (!m_bV2ChunkKeyEncoding)
838
2.10k
        {
839
2.10k
            osFilename += 'c';
840
2.10k
        }
841
23.5k
        for (size_t i = 0; i < m_aoDims.size(); ++i)
842
15.5k
        {
843
15.5k
            if (i > 0 || !m_bV2ChunkKeyEncoding)
844
9.66k
                osFilename += m_osDimSeparator;
845
15.5k
            osFilename += std::to_string(tileIndices[i]);
846
15.5k
        }
847
7.97k
        return osFilename;
848
7.97k
    }
849
7.97k
}
850
851
/************************************************************************/
852
/*                          GetDataDirectory()                          */
853
/************************************************************************/
854
855
std::string ZarrV3Array::GetDataDirectory() const
856
0
{
857
0
    return std::string(CPLGetDirnameSafe(m_osFilename.c_str()));
858
0
}
859
860
/************************************************************************/
861
/*                        GetTileIndicesFromFilename()                  */
862
/************************************************************************/
863
864
CPLStringList
865
ZarrV3Array::GetTileIndicesFromFilename(const char *pszFilename) const
866
0
{
867
0
    if (!m_bV2ChunkKeyEncoding)
868
0
    {
869
0
        if (pszFilename[0] != 'c')
870
0
            return CPLStringList();
871
0
        if (m_osDimSeparator == "/")
872
0
        {
873
0
            if (pszFilename[1] != '/' && pszFilename[1] != '\\')
874
0
                return CPLStringList();
875
0
        }
876
0
        else if (pszFilename[1] != m_osDimSeparator[0])
877
0
        {
878
0
            return CPLStringList();
879
0
        }
880
0
    }
881
0
    return CPLStringList(
882
0
        CSLTokenizeString2(pszFilename + (m_bV2ChunkKeyEncoding ? 0 : 2),
883
0
                           m_osDimSeparator.c_str(), 0));
884
0
}
885
886
/************************************************************************/
887
/*                           ParseDtypeV3()                             */
888
/************************************************************************/
889
890
static GDALExtendedDataType ParseDtypeV3(const CPLJSONObject &obj,
891
                                         std::vector<DtypeElt> &elts)
892
823
{
893
823
    do
894
823
    {
895
823
        if (obj.GetType() == CPLJSONObject::Type::String)
896
823
        {
897
823
            const auto str = obj.ToString();
898
823
            DtypeElt elt;
899
823
            GDALDataType eDT = GDT_Unknown;
900
901
823
            if (str == "bool")  // boolean
902
0
            {
903
0
                elt.nativeType = DtypeElt::NativeType::BOOLEAN;
904
0
                eDT = GDT_Byte;
905
0
            }
906
823
            else if (str == "int8")
907
0
            {
908
0
                elt.nativeType = DtypeElt::NativeType::SIGNED_INT;
909
0
                eDT = GDT_Int8;
910
0
            }
911
823
            else if (str == "uint8")
912
297
            {
913
297
                elt.nativeType = DtypeElt::NativeType::UNSIGNED_INT;
914
297
                eDT = GDT_Byte;
915
297
            }
916
526
            else if (str == "int16")
917
0
            {
918
0
                elt.nativeType = DtypeElt::NativeType::SIGNED_INT;
919
0
                eDT = GDT_Int16;
920
0
            }
921
526
            else if (str == "uint16")
922
1
            {
923
1
                elt.nativeType = DtypeElt::NativeType::UNSIGNED_INT;
924
1
                eDT = GDT_UInt16;
925
1
            }
926
525
            else if (str == "int32")
927
0
            {
928
0
                elt.nativeType = DtypeElt::NativeType::SIGNED_INT;
929
0
                eDT = GDT_Int32;
930
0
            }
931
525
            else if (str == "uint32")
932
13
            {
933
13
                elt.nativeType = DtypeElt::NativeType::UNSIGNED_INT;
934
13
                eDT = GDT_UInt32;
935
13
            }
936
512
            else if (str == "int64")
937
0
            {
938
0
                elt.nativeType = DtypeElt::NativeType::SIGNED_INT;
939
0
                eDT = GDT_Int64;
940
0
            }
941
512
            else if (str == "uint64")
942
0
            {
943
0
                elt.nativeType = DtypeElt::NativeType::UNSIGNED_INT;
944
0
                eDT = GDT_UInt64;
945
0
            }
946
512
            else if (str == "float16")
947
0
            {
948
                // elt.nativeType = DtypeElt::NativeType::IEEEFP;
949
                // elt.nativeSize = 2;
950
                // elt.gdalTypeIsApproxOfNative = true;
951
                // eDT = GDT_Float32;
952
0
                elt.nativeType = DtypeElt::NativeType::IEEEFP;
953
0
                elt.nativeSize = 2;
954
0
                eDT = GDT_Float16;
955
0
            }
956
512
            else if (str == "float32")
957
0
            {
958
0
                elt.nativeType = DtypeElt::NativeType::IEEEFP;
959
0
                eDT = GDT_Float32;
960
0
            }
961
512
            else if (str == "float64")
962
0
            {
963
0
                elt.nativeType = DtypeElt::NativeType::IEEEFP;
964
0
                eDT = GDT_Float64;
965
0
            }
966
512
            else if (str == "complex64")
967
0
            {
968
0
                elt.nativeType = DtypeElt::NativeType::COMPLEX_IEEEFP;
969
0
                eDT = GDT_CFloat32;
970
0
            }
971
512
            else if (str == "complex128")
972
0
            {
973
0
                elt.nativeType = DtypeElt::NativeType::COMPLEX_IEEEFP;
974
0
                eDT = GDT_CFloat64;
975
0
            }
976
512
            else
977
512
                break;
978
979
311
            elt.gdalType = GDALExtendedDataType::Create(eDT);
980
311
            elt.gdalSize = elt.gdalType.GetSize();
981
311
            if (!elt.gdalTypeIsApproxOfNative)
982
311
                elt.nativeSize = elt.gdalSize;
983
984
311
            if (elt.nativeSize > 1)
985
14
            {
986
14
                elt.needByteSwapping = (CPL_IS_LSB == 0);
987
14
            }
988
989
311
            elts.emplace_back(elt);
990
311
            return GDALExtendedDataType::Create(eDT);
991
823
        }
992
823
    } while (false);
993
512
    CPLError(CE_Failure, CPLE_AppDefined,
994
512
             "Invalid or unsupported format for data_type: %s",
995
512
             obj.ToString().c_str());
996
512
    return GDALExtendedDataType::Create(GDT_Unknown);
997
823
}
998
999
/************************************************************************/
1000
/*                    ParseNoDataStringAsDouble()                       */
1001
/************************************************************************/
1002
1003
static double ParseNoDataStringAsDouble(const std::string &osVal, bool &bOK)
1004
5
{
1005
5
    double dfNoDataValue = std::numeric_limits<double>::quiet_NaN();
1006
5
    if (osVal == "NaN")
1007
0
    {
1008
        // initialized above
1009
0
    }
1010
5
    else if (osVal == "Infinity" || osVal == "+Infinity")
1011
0
    {
1012
0
        dfNoDataValue = std::numeric_limits<double>::infinity();
1013
0
    }
1014
5
    else if (osVal == "-Infinity")
1015
0
    {
1016
0
        dfNoDataValue = -std::numeric_limits<double>::infinity();
1017
0
    }
1018
5
    else
1019
5
    {
1020
5
        bOK = false;
1021
5
    }
1022
5
    return dfNoDataValue;
1023
5
}
1024
1025
/************************************************************************/
1026
/*                     ParseNoDataComponent()                           */
1027
/************************************************************************/
1028
1029
template <typename T, typename Tint>
1030
static T ParseNoDataComponent(const CPLJSONObject &oObj, bool &bOK)
1031
0
{
1032
0
    if (oObj.GetType() == CPLJSONObject::Type::Integer ||
1033
0
        oObj.GetType() == CPLJSONObject::Type::Long ||
1034
0
        oObj.GetType() == CPLJSONObject::Type::Double)
1035
0
    {
1036
0
        return static_cast<T>(oObj.ToDouble());
1037
0
    }
1038
0
    else if (oObj.GetType() == CPLJSONObject::Type::String)
1039
0
    {
1040
0
        const auto osVal = oObj.ToString();
1041
0
        if (STARTS_WITH(osVal.c_str(), "0x"))
1042
0
        {
1043
0
            if (osVal.size() > 2 + 2 * sizeof(T))
1044
0
            {
1045
0
                bOK = false;
1046
0
                return 0;
1047
0
            }
1048
0
            Tint nVal = static_cast<Tint>(
1049
0
                std::strtoull(osVal.c_str() + 2, nullptr, 16));
1050
0
            T fVal;
1051
0
            static_assert(sizeof(nVal) == sizeof(fVal),
1052
0
                          "sizeof(nVal) == sizeof(dfVal)");
1053
0
            memcpy(&fVal, &nVal, sizeof(nVal));
1054
0
            return fVal;
1055
0
        }
1056
0
        else
1057
0
        {
1058
0
            return static_cast<T>(ParseNoDataStringAsDouble(osVal, bOK));
1059
0
        }
1060
0
    }
1061
0
    else
1062
0
    {
1063
0
        bOK = false;
1064
0
        return 0;
1065
0
    }
1066
0
}
Unexecuted instantiation: zarr_v3_array.cpp:double ParseNoDataComponent<double, unsigned long>(CPLJSONObject const&, bool&)
Unexecuted instantiation: zarr_v3_array.cpp:float ParseNoDataComponent<float, unsigned int>(CPLJSONObject const&, bool&)
1067
1068
/************************************************************************/
1069
/*                     ZarrV3Group::LoadArray()                         */
1070
/************************************************************************/
1071
1072
std::shared_ptr<ZarrArray>
1073
ZarrV3Group::LoadArray(const std::string &osArrayName,
1074
                       const std::string &osZarrayFilename,
1075
                       const CPLJSONObject &oRoot) const
1076
2.46k
{
1077
    // Add osZarrayFilename to m_poSharedResource during the scope
1078
    // of this function call.
1079
2.46k
    ZarrSharedResource::SetFilenameAdder filenameAdder(m_poSharedResource,
1080
2.46k
                                                       osZarrayFilename);
1081
2.46k
    if (!filenameAdder.ok())
1082
0
        return nullptr;
1083
1084
    // Warn about unknown members (the spec suggests to error out, but let be
1085
    // a bit more lenient)
1086
2.46k
    for (const auto &oNode : oRoot.GetChildren())
1087
17.9k
    {
1088
17.9k
        const auto osName = oNode.GetName();
1089
17.9k
        if (osName != "zarr_format" && osName != "node_type" &&
1090
17.9k
            osName != "shape" && osName != "chunk_grid" &&
1091
17.9k
            osName != "data_type" && osName != "chunk_key_encoding" &&
1092
17.9k
            osName != "fill_value" &&
1093
            // Below are optional
1094
17.9k
            osName != "dimension_names" && osName != "codecs" &&
1095
17.9k
            osName != "storage_transformers" && osName != "attributes")
1096
6.03k
        {
1097
6.03k
            CPLError(CE_Warning, CPLE_AppDefined,
1098
6.03k
                     "%s array definition contains a unknown member (%s). "
1099
6.03k
                     "Interpretation of the array might be wrong.",
1100
6.03k
                     osZarrayFilename.c_str(), osName.c_str());
1101
6.03k
        }
1102
17.9k
    }
1103
1104
2.46k
    const auto oStorageTransformers = oRoot["storage_transformers"].ToArray();
1105
2.46k
    if (oStorageTransformers.Size() > 0)
1106
0
    {
1107
0
        CPLError(CE_Failure, CPLE_AppDefined,
1108
0
                 "storage_transformers are not supported.");
1109
0
        return nullptr;
1110
0
    }
1111
1112
2.46k
    const auto oShape = oRoot["shape"].ToArray();
1113
2.46k
    if (!oShape.IsValid())
1114
1.04k
    {
1115
1.04k
        CPLError(CE_Failure, CPLE_AppDefined, "shape missing or not an array");
1116
1.04k
        return nullptr;
1117
1.04k
    }
1118
1119
    // Parse chunk_grid
1120
1.41k
    const auto oChunkGrid = oRoot["chunk_grid"];
1121
1.41k
    if (oChunkGrid.GetType() != CPLJSONObject::Type::Object)
1122
83
    {
1123
83
        CPLError(CE_Failure, CPLE_AppDefined,
1124
83
                 "chunk_grid missing or not an object");
1125
83
        return nullptr;
1126
83
    }
1127
1128
1.33k
    const auto oChunkGridName = oChunkGrid["name"];
1129
1.33k
    if (oChunkGridName.ToString() != "regular")
1130
111
    {
1131
111
        CPLError(CE_Failure, CPLE_AppDefined,
1132
111
                 "Only chunk_grid.name = regular supported");
1133
111
        return nullptr;
1134
111
    }
1135
1136
1.22k
    const auto oChunks = oChunkGrid["configuration"]["chunk_shape"].ToArray();
1137
1.22k
    if (!oChunks.IsValid())
1138
44
    {
1139
44
        CPLError(
1140
44
            CE_Failure, CPLE_AppDefined,
1141
44
            "chunk_grid.configuration.chunk_shape missing or not an array");
1142
44
        return nullptr;
1143
44
    }
1144
1145
1.18k
    if (oShape.Size() != oChunks.Size())
1146
4
    {
1147
4
        CPLError(CE_Failure, CPLE_AppDefined,
1148
4
                 "shape and chunks arrays are of different size");
1149
4
        return nullptr;
1150
4
    }
1151
1152
    // Parse chunk_key_encoding
1153
1.17k
    const auto oChunkKeyEncoding = oRoot["chunk_key_encoding"];
1154
1.17k
    if (oChunkKeyEncoding.GetType() != CPLJSONObject::Type::Object)
1155
42
    {
1156
42
        CPLError(CE_Failure, CPLE_AppDefined,
1157
42
                 "chunk_key_encoding missing or not an object");
1158
42
        return nullptr;
1159
42
    }
1160
1161
1.13k
    std::string osDimSeparator;
1162
1.13k
    bool bV2ChunkKeyEncoding = false;
1163
1.13k
    const auto oChunkKeyEncodingName = oChunkKeyEncoding["name"];
1164
1.13k
    if (oChunkKeyEncodingName.ToString() == "default")
1165
823
    {
1166
823
        osDimSeparator = "/";
1167
823
    }
1168
312
    else if (oChunkKeyEncodingName.ToString() == "v2")
1169
141
    {
1170
141
        osDimSeparator = ".";
1171
141
        bV2ChunkKeyEncoding = true;
1172
141
    }
1173
171
    else
1174
171
    {
1175
171
        CPLError(CE_Failure, CPLE_AppDefined,
1176
171
                 "Unsupported chunk_key_encoding.name");
1177
171
        return nullptr;
1178
171
    }
1179
1180
964
    {
1181
964
        auto oConfiguration = oChunkKeyEncoding["configuration"];
1182
964
        if (oConfiguration.GetType() == CPLJSONObject::Type::Object)
1183
766
        {
1184
766
            auto oSeparator = oConfiguration["separator"];
1185
766
            if (oSeparator.IsValid())
1186
533
            {
1187
533
                osDimSeparator = oSeparator.ToString();
1188
533
                if (osDimSeparator != "/" && osDimSeparator != ".")
1189
14
                {
1190
14
                    CPLError(CE_Failure, CPLE_AppDefined,
1191
14
                             "Separator can only be '/' or '.'");
1192
14
                    return nullptr;
1193
14
                }
1194
533
            }
1195
766
        }
1196
964
    }
1197
1198
950
    CPLJSONObject oAttributes = oRoot["attributes"];
1199
1200
    // Deep-clone of oAttributes
1201
950
    if (oAttributes.IsValid())
1202
36
    {
1203
36
        oAttributes = oAttributes.Clone();
1204
36
    }
1205
1206
950
    std::vector<std::shared_ptr<GDALDimension>> aoDims;
1207
2.52k
    for (int i = 0; i < oShape.Size(); ++i)
1208
1.60k
    {
1209
1.60k
        const auto nSize = static_cast<GUInt64>(oShape[i].ToLong());
1210
1.60k
        if (nSize == 0)
1211
39
        {
1212
39
            CPLError(CE_Failure, CPLE_AppDefined, "Invalid content for shape");
1213
39
            return nullptr;
1214
39
        }
1215
1.57k
        aoDims.emplace_back(std::make_shared<ZarrDimension>(
1216
1.57k
            m_poSharedResource,
1217
1.57k
            std::dynamic_pointer_cast<ZarrGroupBase>(m_pSelf.lock()),
1218
1.57k
            std::string(), CPLSPrintf("dim%d", i), std::string(), std::string(),
1219
1.57k
            nSize));
1220
1.57k
    }
1221
1222
    // Deal with dimension_names
1223
911
    const auto dimensionNames = oRoot["dimension_names"];
1224
1225
911
    const auto FindDimension = [this, &aoDims, &osArrayName, &oAttributes](
1226
911
                                   const std::string &osDimName,
1227
911
                                   std::shared_ptr<GDALDimension> &poDim, int i)
1228
911
    {
1229
0
        auto oIter = m_oMapDimensions.find(osDimName);
1230
0
        if (oIter != m_oMapDimensions.end())
1231
0
        {
1232
0
            if (m_bDimSizeInUpdate ||
1233
0
                oIter->second->GetSize() == poDim->GetSize())
1234
0
            {
1235
0
                poDim = oIter->second;
1236
0
                return true;
1237
0
            }
1238
0
            else
1239
0
            {
1240
0
                CPLError(CE_Warning, CPLE_AppDefined,
1241
0
                         "Size of _ARRAY_DIMENSIONS[%d] different "
1242
0
                         "from the one of shape",
1243
0
                         i);
1244
0
                return false;
1245
0
            }
1246
0
        }
1247
1248
        // Try to load the indexing variable.
1249
        // Not in m_oMapMDArrays,
1250
        // then stat() the indexing variable.
1251
0
        else if (osArrayName != osDimName &&
1252
0
                 m_oMapMDArrays.find(osDimName) == m_oMapMDArrays.end())
1253
0
        {
1254
0
            std::string osDirName = m_osDirectoryName;
1255
0
            while (true)
1256
0
            {
1257
0
                const std::string osArrayFilenameDim = CPLFormFilenameSafe(
1258
0
                    CPLFormFilenameSafe(osDirName.c_str(), osDimName.c_str(),
1259
0
                                        nullptr)
1260
0
                        .c_str(),
1261
0
                    "zarr.json", nullptr);
1262
0
                VSIStatBufL sStat;
1263
0
                if (VSIStatL(osArrayFilenameDim.c_str(), &sStat) == 0)
1264
0
                {
1265
0
                    CPLJSONDocument oDoc;
1266
0
                    if (oDoc.Load(osArrayFilenameDim))
1267
0
                    {
1268
0
                        LoadArray(osDimName, osArrayFilenameDim,
1269
0
                                  oDoc.GetRoot());
1270
0
                    }
1271
0
                }
1272
0
                else
1273
0
                {
1274
                    // Recurse to upper level for datasets such as
1275
                    // /vsis3/hrrrzarr/sfc/20210809/20210809_00z_anl.zarr/0.1_sigma_level/HAIL_max_fcst/0.1_sigma_level/HAIL_max_fcst
1276
0
                    std::string osDirNameNew =
1277
0
                        CPLGetPathSafe(osDirName.c_str());
1278
0
                    if (!osDirNameNew.empty() && osDirNameNew != osDirName)
1279
0
                    {
1280
0
                        osDirName = std::move(osDirNameNew);
1281
0
                        continue;
1282
0
                    }
1283
0
                }
1284
0
                break;
1285
0
            }
1286
0
        }
1287
1288
0
        oIter = m_oMapDimensions.find(osDimName);
1289
        // cppcheck-suppress knownConditionTrueFalse
1290
0
        if (oIter != m_oMapDimensions.end() &&
1291
0
            oIter->second->GetSize() == poDim->GetSize())
1292
0
        {
1293
0
            poDim = oIter->second;
1294
0
            return true;
1295
0
        }
1296
1297
0
        std::string osType;
1298
0
        std::string osDirection;
1299
0
        if (aoDims.size() == 1 && osArrayName == osDimName)
1300
0
        {
1301
0
            ZarrArray::GetDimensionTypeDirection(oAttributes, osType,
1302
0
                                                 osDirection);
1303
0
        }
1304
1305
0
        auto poDimLocal = std::make_shared<ZarrDimension>(
1306
0
            m_poSharedResource,
1307
0
            std::dynamic_pointer_cast<ZarrGroupBase>(m_pSelf.lock()),
1308
0
            GetFullName(), osDimName, osType, osDirection, poDim->GetSize());
1309
0
        poDimLocal->SetXArrayDimension();
1310
0
        m_oMapDimensions[osDimName] = poDimLocal;
1311
0
        poDim = poDimLocal;
1312
0
        return true;
1313
0
    };
1314
1315
911
    if (dimensionNames.GetType() == CPLJSONObject::Type::Array)
1316
0
    {
1317
0
        const auto arrayDims = dimensionNames.ToArray();
1318
0
        if (arrayDims.Size() == oShape.Size())
1319
0
        {
1320
0
            for (int i = 0; i < oShape.Size(); ++i)
1321
0
            {
1322
0
                if (arrayDims[i].GetType() == CPLJSONObject::Type::String)
1323
0
                {
1324
0
                    const auto osDimName = arrayDims[i].ToString();
1325
0
                    FindDimension(osDimName, aoDims[i], i);
1326
0
                }
1327
0
            }
1328
0
        }
1329
0
        else
1330
0
        {
1331
0
            CPLError(
1332
0
                CE_Failure, CPLE_AppDefined,
1333
0
                "Size of dimension_names[] different from the one of shape");
1334
0
            return nullptr;
1335
0
        }
1336
0
    }
1337
911
    else if (dimensionNames.IsValid())
1338
0
    {
1339
0
        CPLError(CE_Failure, CPLE_AppDefined,
1340
0
                 "dimension_names should be an array");
1341
0
        return nullptr;
1342
0
    }
1343
1344
911
    auto oDtype = oRoot["data_type"];
1345
911
    if (!oDtype.IsValid())
1346
88
    {
1347
88
        CPLError(CE_Failure, CPLE_NotSupported, "data_type missing");
1348
88
        return nullptr;
1349
88
    }
1350
823
    if (oDtype["fallback"].IsValid())
1351
0
        oDtype = oDtype["fallback"];
1352
823
    std::vector<DtypeElt> aoDtypeElts;
1353
823
    const auto oType = ParseDtypeV3(oDtype, aoDtypeElts);
1354
823
    if (oType.GetClass() == GEDTC_NUMERIC &&
1355
823
        oType.GetNumericDataType() == GDT_Unknown)
1356
512
        return nullptr;
1357
1358
311
    std::vector<GUInt64> anBlockSize;
1359
311
    if (!ZarrArray::ParseChunkSize(oChunks, oType, anBlockSize))
1360
2
        return nullptr;
1361
1362
309
    std::vector<GByte> abyNoData;
1363
1364
309
    auto oFillValue = oRoot["fill_value"];
1365
309
    auto eFillValueType = oFillValue.GetType();
1366
1367
309
    if (!oFillValue.IsValid())
1368
143
    {
1369
143
        CPLError(CE_Warning, CPLE_AppDefined, "Missing fill_value is invalid");
1370
143
    }
1371
166
    else if (eFillValueType == CPLJSONObject::Type::Null)
1372
0
    {
1373
0
        CPLError(CE_Warning, CPLE_AppDefined, "fill_value = null is invalid");
1374
0
    }
1375
166
    else if (GDALDataTypeIsComplex(oType.GetNumericDataType()) &&
1376
166
             eFillValueType != CPLJSONObject::Type::Array)
1377
0
    {
1378
0
        CPLError(CE_Failure, CPLE_AppDefined, "Invalid fill_value");
1379
0
        return nullptr;
1380
0
    }
1381
166
    else if (eFillValueType == CPLJSONObject::Type::String)
1382
5
    {
1383
5
        const auto osFillValue = oFillValue.ToString();
1384
5
        if (STARTS_WITH(osFillValue.c_str(), "0x"))
1385
0
        {
1386
0
            if (osFillValue.size() > 2 + 2 * oType.GetSize())
1387
0
            {
1388
0
                CPLError(CE_Failure, CPLE_AppDefined, "Invalid fill_value");
1389
0
                return nullptr;
1390
0
            }
1391
0
            uint64_t nVal = static_cast<uint64_t>(
1392
0
                std::strtoull(osFillValue.c_str() + 2, nullptr, 16));
1393
0
            if (oType.GetSize() == 4)
1394
0
            {
1395
0
                abyNoData.resize(oType.GetSize());
1396
0
                uint32_t nTmp = static_cast<uint32_t>(nVal);
1397
0
                memcpy(&abyNoData[0], &nTmp, sizeof(nTmp));
1398
0
            }
1399
0
            else if (oType.GetSize() == 8)
1400
0
            {
1401
0
                abyNoData.resize(oType.GetSize());
1402
0
                memcpy(&abyNoData[0], &nVal, sizeof(nVal));
1403
0
            }
1404
0
            else
1405
0
            {
1406
0
                CPLError(CE_Failure, CPLE_AppDefined,
1407
0
                         "Hexadecimal representation of fill_value no "
1408
0
                         "supported for this data type");
1409
0
                return nullptr;
1410
0
            }
1411
0
        }
1412
5
        else if (STARTS_WITH(osFillValue.c_str(), "0b"))
1413
0
        {
1414
0
            if (osFillValue.size() > 2 + 8 * oType.GetSize())
1415
0
            {
1416
0
                CPLError(CE_Failure, CPLE_AppDefined, "Invalid fill_value");
1417
0
                return nullptr;
1418
0
            }
1419
0
            uint64_t nVal = static_cast<uint64_t>(
1420
0
                std::strtoull(osFillValue.c_str() + 2, nullptr, 2));
1421
0
            if (oType.GetSize() == 4)
1422
0
            {
1423
0
                abyNoData.resize(oType.GetSize());
1424
0
                uint32_t nTmp = static_cast<uint32_t>(nVal);
1425
0
                memcpy(&abyNoData[0], &nTmp, sizeof(nTmp));
1426
0
            }
1427
0
            else if (oType.GetSize() == 8)
1428
0
            {
1429
0
                abyNoData.resize(oType.GetSize());
1430
0
                memcpy(&abyNoData[0], &nVal, sizeof(nVal));
1431
0
            }
1432
0
            else
1433
0
            {
1434
0
                CPLError(CE_Failure, CPLE_AppDefined,
1435
0
                         "Binary representation of fill_value no supported for "
1436
0
                         "this data type");
1437
0
                return nullptr;
1438
0
            }
1439
0
        }
1440
5
        else
1441
5
        {
1442
5
            bool bOK = true;
1443
5
            double dfNoDataValue = ParseNoDataStringAsDouble(osFillValue, bOK);
1444
5
            if (!bOK)
1445
5
            {
1446
5
                CPLError(CE_Failure, CPLE_AppDefined, "Invalid fill_value");
1447
5
                return nullptr;
1448
5
            }
1449
0
            else if (oType.GetNumericDataType() == GDT_Float16)
1450
0
            {
1451
0
                const GFloat16 hfNoDataValue =
1452
0
                    static_cast<GFloat16>(dfNoDataValue);
1453
0
                abyNoData.resize(sizeof(hfNoDataValue));
1454
0
                memcpy(&abyNoData[0], &hfNoDataValue, sizeof(hfNoDataValue));
1455
0
            }
1456
0
            else if (oType.GetNumericDataType() == GDT_Float32)
1457
0
            {
1458
0
                const float fNoDataValue = static_cast<float>(dfNoDataValue);
1459
0
                abyNoData.resize(sizeof(fNoDataValue));
1460
0
                memcpy(&abyNoData[0], &fNoDataValue, sizeof(fNoDataValue));
1461
0
            }
1462
0
            else if (oType.GetNumericDataType() == GDT_Float64)
1463
0
            {
1464
0
                abyNoData.resize(sizeof(dfNoDataValue));
1465
0
                memcpy(&abyNoData[0], &dfNoDataValue, sizeof(dfNoDataValue));
1466
0
            }
1467
0
            else
1468
0
            {
1469
0
                CPLError(CE_Failure, CPLE_AppDefined,
1470
0
                         "Invalid fill_value for this data type");
1471
0
                return nullptr;
1472
0
            }
1473
5
        }
1474
5
    }
1475
161
    else if (eFillValueType == CPLJSONObject::Type::Boolean ||
1476
161
             eFillValueType == CPLJSONObject::Type::Integer ||
1477
161
             eFillValueType == CPLJSONObject::Type::Long ||
1478
161
             eFillValueType == CPLJSONObject::Type::Double)
1479
155
    {
1480
155
        const double dfNoDataValue = oFillValue.ToDouble();
1481
155
        if (oType.GetNumericDataType() == GDT_Int64)
1482
0
        {
1483
0
            const int64_t nNoDataValue =
1484
0
                static_cast<int64_t>(oFillValue.ToLong());
1485
0
            abyNoData.resize(oType.GetSize());
1486
0
            GDALCopyWords(&nNoDataValue, GDT_Int64, 0, &abyNoData[0],
1487
0
                          oType.GetNumericDataType(), 0, 1);
1488
0
        }
1489
155
        else if (oType.GetNumericDataType() == GDT_UInt64 &&
1490
                 /* we can't really deal with nodata value between */
1491
                 /* int64::max and uint64::max due to json-c limitations */
1492
155
                 dfNoDataValue >= 0)
1493
0
        {
1494
0
            const int64_t nNoDataValue =
1495
0
                static_cast<int64_t>(oFillValue.ToLong());
1496
0
            abyNoData.resize(oType.GetSize());
1497
0
            GDALCopyWords(&nNoDataValue, GDT_Int64, 0, &abyNoData[0],
1498
0
                          oType.GetNumericDataType(), 0, 1);
1499
0
        }
1500
155
        else
1501
155
        {
1502
155
            abyNoData.resize(oType.GetSize());
1503
155
            GDALCopyWords(&dfNoDataValue, GDT_Float64, 0, &abyNoData[0],
1504
155
                          oType.GetNumericDataType(), 0, 1);
1505
155
        }
1506
155
    }
1507
6
    else if (eFillValueType == CPLJSONObject::Type::Array)
1508
6
    {
1509
6
        const auto oFillValueArray = oFillValue.ToArray();
1510
6
        if (oFillValueArray.Size() == 2 &&
1511
6
            GDALDataTypeIsComplex(oType.GetNumericDataType()))
1512
0
        {
1513
0
            if (oType.GetNumericDataType() == GDT_CFloat64)
1514
0
            {
1515
0
                bool bOK = true;
1516
0
                const double adfNoDataValue[2] = {
1517
0
                    ParseNoDataComponent<double, uint64_t>(oFillValueArray[0],
1518
0
                                                           bOK),
1519
0
                    ParseNoDataComponent<double, uint64_t>(oFillValueArray[1],
1520
0
                                                           bOK),
1521
0
                };
1522
0
                if (!bOK)
1523
0
                {
1524
0
                    CPLError(CE_Failure, CPLE_AppDefined, "Invalid fill_value");
1525
0
                    return nullptr;
1526
0
                }
1527
0
                abyNoData.resize(oType.GetSize());
1528
0
                CPLAssert(sizeof(adfNoDataValue) == oType.GetSize());
1529
0
                memcpy(abyNoData.data(), adfNoDataValue,
1530
0
                       sizeof(adfNoDataValue));
1531
0
            }
1532
0
            else
1533
0
            {
1534
0
                CPLAssert(oType.GetNumericDataType() == GDT_CFloat32);
1535
0
                bool bOK = true;
1536
0
                const float afNoDataValue[2] = {
1537
0
                    ParseNoDataComponent<float, uint32_t>(oFillValueArray[0],
1538
0
                                                          bOK),
1539
0
                    ParseNoDataComponent<float, uint32_t>(oFillValueArray[1],
1540
0
                                                          bOK),
1541
0
                };
1542
0
                if (!bOK)
1543
0
                {
1544
0
                    CPLError(CE_Failure, CPLE_AppDefined, "Invalid fill_value");
1545
0
                    return nullptr;
1546
0
                }
1547
0
                abyNoData.resize(oType.GetSize());
1548
0
                CPLAssert(sizeof(afNoDataValue) == oType.GetSize());
1549
0
                memcpy(abyNoData.data(), afNoDataValue, sizeof(afNoDataValue));
1550
0
            }
1551
0
        }
1552
6
        else
1553
6
        {
1554
6
            CPLError(CE_Failure, CPLE_AppDefined, "Invalid fill_value");
1555
6
            return nullptr;
1556
6
        }
1557
6
    }
1558
0
    else
1559
0
    {
1560
0
        CPLError(CE_Failure, CPLE_AppDefined, "Invalid fill_value");
1561
0
        return nullptr;
1562
0
    }
1563
1564
298
    const auto oCodecs = oRoot["codecs"].ToArray();
1565
298
    std::unique_ptr<ZarrV3CodecSequence> poCodecs;
1566
298
    if (oCodecs.Size() > 0)
1567
56
    {
1568
        // Byte swapping will be done by the codec chain
1569
56
        aoDtypeElts.back().needByteSwapping = false;
1570
1571
56
        ZarrArrayMetadata oInputArrayMetadata;
1572
56
        for (auto &nSize : anBlockSize)
1573
76
            oInputArrayMetadata.anBlockSizes.push_back(
1574
76
                static_cast<size_t>(nSize));
1575
56
        oInputArrayMetadata.oElt = aoDtypeElts.back();
1576
56
        poCodecs = std::make_unique<ZarrV3CodecSequence>(oInputArrayMetadata);
1577
56
        if (!poCodecs->InitFromJson(oCodecs))
1578
30
            return nullptr;
1579
56
    }
1580
1581
268
    auto poArray =
1582
268
        ZarrV3Array::Create(m_poSharedResource, GetFullName(), osArrayName,
1583
268
                            aoDims, oType, aoDtypeElts, anBlockSize);
1584
268
    if (!poArray)
1585
42
        return nullptr;
1586
226
    poArray->SetUpdatable(m_bUpdatable);  // must be set before SetAttributes()
1587
226
    poArray->SetFilename(osZarrayFilename);
1588
226
    poArray->SetIsV2ChunkKeyEncoding(bV2ChunkKeyEncoding);
1589
226
    poArray->SetDimSeparator(osDimSeparator);
1590
226
    if (!abyNoData.empty())
1591
146
    {
1592
146
        poArray->RegisterNoDataValue(abyNoData.data());
1593
146
    }
1594
226
    poArray->ParseSpecialAttributes(m_pSelf.lock(), oAttributes);
1595
226
    poArray->SetAttributes(oAttributes);
1596
226
    poArray->SetDtype(oDtype);
1597
226
    if (oCodecs.Size() > 0 &&
1598
226
        oCodecs[oCodecs.Size() - 1].GetString("name") != "bytes")
1599
0
    {
1600
0
        poArray->SetStructuralInfo(
1601
0
            "COMPRESSOR", oCodecs[oCodecs.Size() - 1].ToString().c_str());
1602
0
    }
1603
226
    if (poCodecs)
1604
26
        poArray->SetCodecs(std::move(poCodecs));
1605
226
    RegisterArray(poArray);
1606
1607
    // If this is an indexing variable, attach it to the dimension.
1608
226
    if (aoDims.size() == 1 && aoDims[0]->GetName() == poArray->GetName())
1609
0
    {
1610
0
        auto oIter = m_oMapDimensions.find(poArray->GetName());
1611
0
        if (oIter != m_oMapDimensions.end())
1612
0
        {
1613
0
            oIter->second->SetIndexingVariable(poArray);
1614
0
        }
1615
0
    }
1616
1617
226
    if (CPLTestBool(m_poSharedResource->GetOpenOptions().FetchNameValueDef(
1618
226
            "CACHE_TILE_PRESENCE", "NO")))
1619
0
    {
1620
0
        poArray->CacheTilePresence();
1621
0
    }
1622
1623
226
    return poArray;
1624
268
}