Coverage Report

Created: 2025-06-09 08:44

/src/gdal/frmts/zarr/zarr_v2_array.cpp
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 *
3
 * Project:  GDAL
4
 * Purpose:  Zarr driver
5
 * Author:   Even Rouault <even dot rouault at spatialys.com>
6
 *
7
 ******************************************************************************
8
 * Copyright (c) 2021, Even Rouault <even dot rouault at spatialys.com>
9
 *
10
 * SPDX-License-Identifier: MIT
11
 ****************************************************************************/
12
13
#include "cpl_float.h"
14
#include "cpl_vsi_virtual.h"
15
#include "gdal_thread_pool.h"
16
#include "zarr.h"
17
18
#include "netcdf_cf_constants.h"  // for CF_UNITS, etc
19
20
#include <algorithm>
21
#include <cassert>
22
#include <cstdlib>
23
#include <limits>
24
#include <map>
25
#include <set>
26
27
/************************************************************************/
28
/*                       ZarrV2Array::ZarrV2Array()                     */
29
/************************************************************************/
30
31
ZarrV2Array::ZarrV2Array(
32
    const std::shared_ptr<ZarrSharedResource> &poSharedResource,
33
    const std::string &osParentName, const std::string &osName,
34
    const std::vector<std::shared_ptr<GDALDimension>> &aoDims,
35
    const GDALExtendedDataType &oType, const std::vector<DtypeElt> &aoDtypeElts,
36
    const std::vector<GUInt64> &anBlockSize, bool bFortranOrder)
37
188k
    : GDALAbstractMDArray(osParentName, osName),
38
188k
      ZarrArray(poSharedResource, osParentName, osName, aoDims, oType,
39
188k
                aoDtypeElts, anBlockSize),
40
188k
      m_bFortranOrder(bFortranOrder)
41
188k
{
42
188k
    m_oCompressorJSon.Deinit();
43
188k
}
Unexecuted instantiation: ZarrV2Array::ZarrV2Array(std::__1::shared_ptr<ZarrSharedResource> const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::vector<std::__1::shared_ptr<GDALDimension>, std::__1::allocator<std::__1::shared_ptr<GDALDimension> > > const&, GDALExtendedDataType const&, std::__1::vector<DtypeElt, std::__1::allocator<DtypeElt> > const&, std::__1::vector<unsigned long long, std::__1::allocator<unsigned long long> > const&, bool)
ZarrV2Array::ZarrV2Array(std::__1::shared_ptr<ZarrSharedResource> const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::vector<std::__1::shared_ptr<GDALDimension>, std::__1::allocator<std::__1::shared_ptr<GDALDimension> > > const&, GDALExtendedDataType const&, std::__1::vector<DtypeElt, std::__1::allocator<DtypeElt> > const&, std::__1::vector<unsigned long long, std::__1::allocator<unsigned long long> > const&, bool)
Line
Count
Source
37
188k
    : GDALAbstractMDArray(osParentName, osName),
38
188k
      ZarrArray(poSharedResource, osParentName, osName, aoDims, oType,
39
188k
                aoDtypeElts, anBlockSize),
40
188k
      m_bFortranOrder(bFortranOrder)
41
188k
{
42
188k
    m_oCompressorJSon.Deinit();
43
188k
}
44
45
/************************************************************************/
46
/*                         ZarrV2Array::Create()                        */
47
/************************************************************************/
48
49
std::shared_ptr<ZarrV2Array>
50
ZarrV2Array::Create(const std::shared_ptr<ZarrSharedResource> &poSharedResource,
51
                    const std::string &osParentName, const std::string &osName,
52
                    const std::vector<std::shared_ptr<GDALDimension>> &aoDims,
53
                    const GDALExtendedDataType &oType,
54
                    const std::vector<DtypeElt> &aoDtypeElts,
55
                    const std::vector<GUInt64> &anBlockSize, bool bFortranOrder)
56
188k
{
57
188k
    auto arr = std::shared_ptr<ZarrV2Array>(
58
188k
        new ZarrV2Array(poSharedResource, osParentName, osName, aoDims, oType,
59
188k
                        aoDtypeElts, anBlockSize, bFortranOrder));
60
188k
    if (arr->m_nTotalTileCount == 0)
61
2.71k
        return nullptr;
62
186k
    arr->SetSelf(arr);
63
64
186k
    return arr;
65
188k
}
66
67
/************************************************************************/
68
/*                             ~ZarrV2Array()                           */
69
/************************************************************************/
70
71
ZarrV2Array::~ZarrV2Array()
72
188k
{
73
188k
    ZarrV2Array::Flush();
74
188k
}
75
76
/************************************************************************/
77
/*                                Flush()                               */
78
/************************************************************************/
79
80
void ZarrV2Array::Flush()
81
376k
{
82
376k
    if (!m_bValid)
83
0
        return;
84
85
376k
    ZarrV2Array::FlushDirtyTile();
86
87
376k
    if (m_bDefinitionModified)
88
1.65k
    {
89
1.65k
        Serialize();
90
1.65k
        m_bDefinitionModified = false;
91
1.65k
    }
92
93
376k
    CPLJSONArray j_ARRAY_DIMENSIONS;
94
376k
    bool bDimensionsModified = false;
95
376k
    if (!m_aoDims.empty())
96
342k
    {
97
342k
        for (const auto &poDim : m_aoDims)
98
398k
        {
99
398k
            const auto poZarrDim =
100
398k
                dynamic_cast<const ZarrDimension *>(poDim.get());
101
398k
            if (poZarrDim && poZarrDim->IsXArrayDimension())
102
198k
            {
103
198k
                if (poZarrDim->IsModified())
104
0
                    bDimensionsModified = true;
105
198k
                j_ARRAY_DIMENSIONS.Add(poDim->GetName());
106
198k
            }
107
200k
            else
108
200k
            {
109
200k
                j_ARRAY_DIMENSIONS = CPLJSONArray();
110
200k
                break;
111
200k
            }
112
398k
        }
113
342k
    }
114
115
376k
    if (m_oAttrGroup.IsModified() || bDimensionsModified ||
116
376k
        (m_bNew && j_ARRAY_DIMENSIONS.Size() != 0) || m_bUnitModified ||
117
376k
        m_bOffsetModified || m_bScaleModified || m_bSRSModified)
118
2.26k
    {
119
2.26k
        m_bNew = false;
120
121
2.26k
        auto oAttrs = SerializeSpecialAttributes();
122
123
2.26k
        if (j_ARRAY_DIMENSIONS.Size() != 0)
124
2.26k
        {
125
2.26k
            oAttrs.Delete("_ARRAY_DIMENSIONS");
126
2.26k
            oAttrs.Add("_ARRAY_DIMENSIONS", j_ARRAY_DIMENSIONS);
127
2.26k
        }
128
129
2.26k
        CPLJSONDocument oDoc;
130
2.26k
        oDoc.SetRoot(oAttrs);
131
2.26k
        const std::string osAttrFilename =
132
2.26k
            CPLFormFilenameSafe(CPLGetDirnameSafe(m_osFilename.c_str()).c_str(),
133
2.26k
                                ".zattrs", nullptr);
134
2.26k
        oDoc.Save(osAttrFilename);
135
2.26k
        m_poSharedResource->SetZMetadataItem(osAttrFilename, oAttrs);
136
2.26k
    }
137
376k
}
138
139
/************************************************************************/
140
/*           StripUselessItemsFromCompressorConfiguration()             */
141
/************************************************************************/
142
143
static void StripUselessItemsFromCompressorConfiguration(CPLJSONObject &o)
144
0
{
145
0
    if (o.GetType() == CPLJSONObject::Type::Object)
146
0
    {
147
0
        o.Delete("num_threads");  // Blosc
148
0
        o.Delete("typesize");     // Blosc
149
0
        o.Delete("header");       // LZ4
150
0
    }
151
0
}
152
153
/************************************************************************/
154
/*                    ZarrV2Array::Serialize()                          */
155
/************************************************************************/
156
157
void ZarrV2Array::Serialize()
158
1.65k
{
159
1.65k
    CPLJSONDocument oDoc;
160
1.65k
    CPLJSONObject oRoot = oDoc.GetRoot();
161
162
1.65k
    CPLJSONArray oChunks;
163
1.65k
    for (const auto nBlockSize : m_anBlockSize)
164
3.01k
    {
165
3.01k
        oChunks.Add(static_cast<GInt64>(nBlockSize));
166
3.01k
    }
167
1.65k
    oRoot.Add("chunks", oChunks);
168
169
1.65k
    if (m_oCompressorJSon.IsValid())
170
0
    {
171
0
        oRoot.Add("compressor", m_oCompressorJSon);
172
0
        CPLJSONObject compressor = oRoot["compressor"];
173
0
        StripUselessItemsFromCompressorConfiguration(compressor);
174
0
    }
175
1.65k
    else
176
1.65k
    {
177
1.65k
        oRoot.AddNull("compressor");
178
1.65k
    }
179
180
1.65k
    if (m_dtype.GetType() == CPLJSONObject::Type::Object)
181
1.65k
        oRoot.Add("dtype", m_dtype["dummy"]);
182
0
    else
183
0
        oRoot.Add("dtype", m_dtype);
184
185
1.65k
    if (m_pabyNoData == nullptr)
186
1.64k
    {
187
1.64k
        oRoot.AddNull("fill_value");
188
1.64k
    }
189
18
    else
190
18
    {
191
18
        switch (m_oType.GetClass())
192
18
        {
193
18
            case GEDTC_NUMERIC:
194
18
            {
195
18
                SerializeNumericNoData(oRoot);
196
18
                break;
197
0
            }
198
199
0
            case GEDTC_STRING:
200
0
            {
201
0
                char *pszStr;
202
0
                char **ppszStr = reinterpret_cast<char **>(m_pabyNoData);
203
0
                memcpy(&pszStr, ppszStr, sizeof(pszStr));
204
0
                if (pszStr)
205
0
                {
206
0
                    const size_t nNativeSize =
207
0
                        m_aoDtypeElts.back().nativeOffset +
208
0
                        m_aoDtypeElts.back().nativeSize;
209
0
                    char *base64 = CPLBase64Encode(
210
0
                        static_cast<int>(std::min(nNativeSize, strlen(pszStr))),
211
0
                        reinterpret_cast<const GByte *>(pszStr));
212
0
                    oRoot.Add("fill_value", base64);
213
0
                    CPLFree(base64);
214
0
                }
215
0
                else
216
0
                {
217
0
                    oRoot.AddNull("fill_value");
218
0
                }
219
0
                break;
220
0
            }
221
222
0
            case GEDTC_COMPOUND:
223
0
            {
224
0
                const size_t nNativeSize = m_aoDtypeElts.back().nativeOffset +
225
0
                                           m_aoDtypeElts.back().nativeSize;
226
0
                std::vector<GByte> nativeNoData(nNativeSize);
227
0
                EncodeElt(m_aoDtypeElts, m_pabyNoData, &nativeNoData[0]);
228
0
                char *base64 = CPLBase64Encode(static_cast<int>(nNativeSize),
229
0
                                               nativeNoData.data());
230
0
                oRoot.Add("fill_value", base64);
231
0
                CPLFree(base64);
232
0
            }
233
18
        }
234
18
    }
235
236
1.65k
    if (m_oFiltersArray.Size() == 0)
237
1.65k
        oRoot.AddNull("filters");
238
0
    else
239
0
        oRoot.Add("filters", m_oFiltersArray);
240
241
1.65k
    oRoot.Add("order", m_bFortranOrder ? "F" : "C");
242
243
1.65k
    CPLJSONArray oShape;
244
1.65k
    for (const auto &poDim : m_aoDims)
245
3.01k
    {
246
3.01k
        oShape.Add(static_cast<GInt64>(poDim->GetSize()));
247
3.01k
    }
248
1.65k
    oRoot.Add("shape", oShape);
249
250
1.65k
    oRoot.Add("zarr_format", 2);
251
252
1.65k
    if (m_osDimSeparator != ".")
253
0
    {
254
0
        oRoot.Add("dimension_separator", m_osDimSeparator);
255
0
    }
256
257
1.65k
    oDoc.Save(m_osFilename);
258
259
1.65k
    m_poSharedResource->SetZMetadataItem(m_osFilename, oRoot);
260
1.65k
}
261
262
/************************************************************************/
263
/*                  ZarrV2Array::NeedDecodedBuffer()                    */
264
/************************************************************************/
265
266
bool ZarrV2Array::NeedDecodedBuffer() const
267
31.0k
{
268
31.0k
    const size_t nSourceSize =
269
31.0k
        m_aoDtypeElts.back().nativeOffset + m_aoDtypeElts.back().nativeSize;
270
31.0k
    if (m_oType.GetClass() == GEDTC_COMPOUND &&
271
31.0k
        nSourceSize != m_oType.GetSize())
272
0
    {
273
0
        return true;
274
0
    }
275
31.0k
    else if (m_oType.GetClass() != GEDTC_STRING)
276
31.0k
    {
277
31.0k
        for (const auto &elt : m_aoDtypeElts)
278
31.0k
        {
279
31.0k
            if (elt.needByteSwapping || elt.gdalTypeIsApproxOfNative ||
280
31.0k
                elt.nativeType == DtypeElt::NativeType::STRING_ASCII ||
281
31.0k
                elt.nativeType == DtypeElt::NativeType::STRING_UNICODE)
282
520
            {
283
520
                return true;
284
520
            }
285
31.0k
        }
286
31.0k
    }
287
30.5k
    return false;
288
31.0k
}
289
290
/************************************************************************/
291
/*               ZarrV2Array::AllocateWorkingBuffers()                  */
292
/************************************************************************/
293
294
bool ZarrV2Array::AllocateWorkingBuffers() const
295
58.0k
{
296
58.0k
    if (m_bAllocateWorkingBuffersDone)
297
42.5k
        return m_bWorkingBuffersOK;
298
299
15.5k
    m_bAllocateWorkingBuffersDone = true;
300
301
15.5k
    size_t nSizeNeeded = m_nTileSize;
302
15.5k
    if (m_bFortranOrder || m_oFiltersArray.Size() != 0)
303
1.45k
    {
304
1.45k
        if (nSizeNeeded > std::numeric_limits<size_t>::max() / 2)
305
0
        {
306
0
            CPLError(CE_Failure, CPLE_AppDefined, "Too large chunk size");
307
0
            return false;
308
0
        }
309
1.45k
        nSizeNeeded *= 2;
310
1.45k
    }
311
15.5k
    if (NeedDecodedBuffer())
312
260
    {
313
260
        size_t nDecodedBufferSize = m_oType.GetSize();
314
260
        for (const auto &nBlockSize : m_anBlockSize)
315
264
        {
316
264
            if (nDecodedBufferSize > std::numeric_limits<size_t>::max() /
317
264
                                         static_cast<size_t>(nBlockSize))
318
0
            {
319
0
                CPLError(CE_Failure, CPLE_AppDefined, "Too large chunk size");
320
0
                return false;
321
0
            }
322
264
            nDecodedBufferSize *= static_cast<size_t>(nBlockSize);
323
264
        }
324
260
        if (nSizeNeeded >
325
260
            std::numeric_limits<size_t>::max() - nDecodedBufferSize)
326
0
        {
327
0
            CPLError(CE_Failure, CPLE_AppDefined, "Too large chunk size");
328
0
            return false;
329
0
        }
330
260
        nSizeNeeded += nDecodedBufferSize;
331
260
    }
332
333
    // Reserve a buffer for tile content
334
15.5k
    if (nSizeNeeded > 1024 * 1024 * 1024 &&
335
15.5k
        !CPLTestBool(CPLGetConfigOption("ZARR_ALLOW_BIG_TILE_SIZE", "NO")))
336
28
    {
337
28
        CPLError(CE_Failure, CPLE_AppDefined,
338
28
                 "Zarr tile allocation would require " CPL_FRMT_GUIB " bytes. "
339
28
                 "By default the driver limits to 1 GB. To allow that memory "
340
28
                 "allocation, set the ZARR_ALLOW_BIG_TILE_SIZE configuration "
341
28
                 "option to YES.",
342
28
                 static_cast<GUIntBig>(nSizeNeeded));
343
28
        return false;
344
28
    }
345
346
15.4k
    m_bWorkingBuffersOK = AllocateWorkingBuffers(
347
15.4k
        m_abyRawTileData, m_abyTmpRawTileData, m_abyDecodedTileData);
348
15.4k
    return m_bWorkingBuffersOK;
349
15.5k
}
350
351
bool ZarrV2Array::AllocateWorkingBuffers(
352
    ZarrByteVectorQuickResize &abyRawTileData,
353
    ZarrByteVectorQuickResize &abyTmpRawTileData,
354
    ZarrByteVectorQuickResize &abyDecodedTileData) const
355
15.4k
{
356
    // This method should NOT modify any ZarrArray member, as it is going to
357
    // be called concurrently from several threads.
358
359
    // Set those #define to avoid accidental use of some global variables
360
15.4k
#define m_abyTmpRawTileData cannot_use_here
361
15.4k
#define m_abyRawTileData cannot_use_here
362
15.4k
#define m_abyDecodedTileData cannot_use_here
363
364
15.4k
    try
365
15.4k
    {
366
15.4k
        abyRawTileData.resize(m_nTileSize);
367
15.4k
        if (m_bFortranOrder || m_oFiltersArray.Size() != 0)
368
1.42k
            abyTmpRawTileData.resize(m_nTileSize);
369
15.4k
    }
370
15.4k
    catch (const std::bad_alloc &e)
371
15.4k
    {
372
0
        CPLError(CE_Failure, CPLE_OutOfMemory, "%s", e.what());
373
0
        return false;
374
0
    }
375
376
15.4k
    if (NeedDecodedBuffer())
377
260
    {
378
260
        size_t nDecodedBufferSize = m_oType.GetSize();
379
260
        for (const auto &nBlockSize : m_anBlockSize)
380
264
        {
381
264
            nDecodedBufferSize *= static_cast<size_t>(nBlockSize);
382
264
        }
383
260
        try
384
260
        {
385
260
            abyDecodedTileData.resize(nDecodedBufferSize);
386
260
        }
387
260
        catch (const std::bad_alloc &e)
388
260
        {
389
0
            CPLError(CE_Failure, CPLE_OutOfMemory, "%s", e.what());
390
0
            return false;
391
0
        }
392
260
    }
393
394
15.4k
    return true;
395
15.4k
#undef m_abyTmpRawTileData
396
15.4k
#undef m_abyRawTileData
397
15.4k
#undef m_abyDecodedTileData
398
15.4k
}
399
400
/************************************************************************/
401
/*                      ZarrV2Array::LoadTileData()                     */
402
/************************************************************************/
403
404
bool ZarrV2Array::LoadTileData(const uint64_t *tileIndices,
405
                               bool &bMissingTileOut) const
406
1.26M
{
407
1.26M
    return LoadTileData(tileIndices,
408
1.26M
                        false,  // use mutex
409
1.26M
                        m_psDecompressor, m_abyRawTileData, m_abyTmpRawTileData,
410
1.26M
                        m_abyDecodedTileData, bMissingTileOut);
411
1.26M
}
412
413
bool ZarrV2Array::LoadTileData(const uint64_t *tileIndices, bool bUseMutex,
414
                               const CPLCompressor *psDecompressor,
415
                               ZarrByteVectorQuickResize &abyRawTileData,
416
                               ZarrByteVectorQuickResize &abyTmpRawTileData,
417
                               ZarrByteVectorQuickResize &abyDecodedTileData,
418
                               bool &bMissingTileOut) const
419
1.26M
{
420
    // This method should NOT modify any ZarrArray member, as it is going to
421
    // be called concurrently from several threads.
422
423
    // Set those #define to avoid accidental use of some global variables
424
1.26M
#define m_abyTmpRawTileData cannot_use_here
425
1.26M
#define m_abyRawTileData cannot_use_here
426
1.26M
#define m_abyDecodedTileData cannot_use_here
427
1.26M
#define m_psDecompressor cannot_use_here
428
429
1.26M
    bMissingTileOut = false;
430
431
1.26M
    std::string osFilename = BuildTileFilename(tileIndices);
432
433
    // For network file systems, get the streaming version of the filename,
434
    // as we don't need arbitrary seeking in the file
435
1.26M
    osFilename = VSIFileManager::GetHandler(osFilename.c_str())
436
1.26M
                     ->GetStreamingFilename(osFilename);
437
438
    // First if we have a tile presence cache, check tile presence from it
439
1.26M
    if (bUseMutex)
440
0
        m_oMutex.lock();
441
1.26M
    auto poTilePresenceArray = OpenTilePresenceCache(false);
442
1.26M
    if (poTilePresenceArray)
443
0
    {
444
0
        std::vector<GUInt64> anTileIdx(m_aoDims.size());
445
0
        const std::vector<size_t> anCount(m_aoDims.size(), 1);
446
0
        const std::vector<GInt64> anArrayStep(m_aoDims.size(), 0);
447
0
        const std::vector<GPtrDiff_t> anBufferStride(m_aoDims.size(), 0);
448
0
        const auto eByteDT = GDALExtendedDataType::Create(GDT_Byte);
449
0
        for (size_t i = 0; i < m_aoDims.size(); ++i)
450
0
        {
451
0
            anTileIdx[i] = static_cast<GUInt64>(tileIndices[i]);
452
0
        }
453
0
        GByte byValue = 0;
454
0
        if (poTilePresenceArray->Read(anTileIdx.data(), anCount.data(),
455
0
                                      anArrayStep.data(), anBufferStride.data(),
456
0
                                      eByteDT, &byValue) &&
457
0
            byValue == 0)
458
0
        {
459
0
            if (bUseMutex)
460
0
                m_oMutex.unlock();
461
0
            CPLDebugOnly(ZARR_DEBUG_KEY, "Tile %s missing (=nodata)",
462
0
                         osFilename.c_str());
463
0
            bMissingTileOut = true;
464
0
            return true;
465
0
        }
466
0
    }
467
1.26M
    if (bUseMutex)
468
0
        m_oMutex.unlock();
469
470
1.26M
    VSILFILE *fp = nullptr;
471
    // This is the number of files returned in a S3 directory listing operation
472
1.26M
    constexpr uint64_t MAX_TILES_ALLOWED_FOR_DIRECTORY_LISTING = 1000;
473
1.26M
    const char *const apszOpenOptions[] = {"IGNORE_FILENAME_RESTRICTIONS=YES",
474
1.26M
                                           nullptr};
475
1.26M
    const auto nErrorBefore = CPLGetErrorCounter();
476
1.26M
    if ((m_osDimSeparator == "/" && !m_anBlockSize.empty() &&
477
1.26M
         m_anBlockSize.back() > MAX_TILES_ALLOWED_FOR_DIRECTORY_LISTING) ||
478
1.26M
        (m_osDimSeparator != "/" &&
479
1.26M
         m_nTotalTileCount > MAX_TILES_ALLOWED_FOR_DIRECTORY_LISTING))
480
172k
    {
481
        // Avoid issuing ReadDir() when a lot of files are expected
482
172k
        CPLConfigOptionSetter optionSetter("GDAL_DISABLE_READDIR_ON_OPEN",
483
172k
                                           "YES", true);
484
172k
        fp = VSIFOpenEx2L(osFilename.c_str(), "rb", 0, apszOpenOptions);
485
172k
    }
486
1.09M
    else
487
1.09M
    {
488
1.09M
        fp = VSIFOpenEx2L(osFilename.c_str(), "rb", 0, apszOpenOptions);
489
1.09M
    }
490
1.26M
    if (fp == nullptr)
491
782k
    {
492
782k
        if (nErrorBefore != CPLGetErrorCounter())
493
0
        {
494
0
            return false;
495
0
        }
496
782k
        else
497
782k
        {
498
            // Missing files are OK and indicate nodata_value
499
782k
            CPLDebugOnly(ZARR_DEBUG_KEY, "Tile %s missing (=nodata)",
500
782k
                         osFilename.c_str());
501
782k
            bMissingTileOut = true;
502
782k
            return true;
503
782k
        }
504
782k
    }
505
506
480k
    bMissingTileOut = false;
507
480k
    bool bRet = true;
508
480k
    size_t nRawDataSize = abyRawTileData.size();
509
480k
    if (psDecompressor == nullptr)
510
478k
    {
511
478k
        nRawDataSize = VSIFReadL(&abyRawTileData[0], 1, nRawDataSize, fp);
512
478k
    }
513
2.21k
    else
514
2.21k
    {
515
2.21k
        VSIFSeekL(fp, 0, SEEK_END);
516
2.21k
        const auto nSize = VSIFTellL(fp);
517
2.21k
        VSIFSeekL(fp, 0, SEEK_SET);
518
2.21k
        if (nSize > static_cast<vsi_l_offset>(std::numeric_limits<int>::max()))
519
0
        {
520
0
            CPLError(CE_Failure, CPLE_AppDefined, "Too large tile %s",
521
0
                     osFilename.c_str());
522
0
            bRet = false;
523
0
        }
524
2.21k
        else
525
2.21k
        {
526
2.21k
            ZarrByteVectorQuickResize abyCompressedData;
527
2.21k
            try
528
2.21k
            {
529
2.21k
                abyCompressedData.resize(static_cast<size_t>(nSize));
530
2.21k
            }
531
2.21k
            catch (const std::exception &)
532
2.21k
            {
533
0
                CPLError(CE_Failure, CPLE_OutOfMemory,
534
0
                         "Cannot allocate memory for tile %s",
535
0
                         osFilename.c_str());
536
0
                bRet = false;
537
0
            }
538
539
2.21k
            if (bRet &&
540
2.21k
                (abyCompressedData.empty() ||
541
2.21k
                 VSIFReadL(&abyCompressedData[0], 1, abyCompressedData.size(),
542
2.21k
                           fp) != abyCompressedData.size()))
543
0
            {
544
0
                CPLError(CE_Failure, CPLE_AppDefined,
545
0
                         "Could not read tile %s correctly",
546
0
                         osFilename.c_str());
547
0
                bRet = false;
548
0
            }
549
2.21k
            else
550
2.21k
            {
551
2.21k
                void *out_buffer = &abyRawTileData[0];
552
2.21k
                if (!psDecompressor->pfnFunc(
553
2.21k
                        abyCompressedData.data(), abyCompressedData.size(),
554
2.21k
                        &out_buffer, &nRawDataSize, nullptr,
555
2.21k
                        psDecompressor->user_data))
556
2.21k
                {
557
2.21k
                    CPLError(CE_Failure, CPLE_AppDefined,
558
2.21k
                             "Decompression of tile %s failed",
559
2.21k
                             osFilename.c_str());
560
2.21k
                    bRet = false;
561
2.21k
                }
562
2.21k
            }
563
2.21k
        }
564
2.21k
    }
565
480k
    VSIFCloseL(fp);
566
480k
    if (!bRet)
567
2.21k
        return false;
568
569
478k
    for (int i = m_oFiltersArray.Size(); i > 0;)
570
174
    {
571
174
        --i;
572
174
        const auto &oFilter = m_oFiltersArray[i];
573
174
        const auto osFilterId = oFilter["id"].ToString();
574
174
        const auto psFilterDecompressor =
575
174
            EQUAL(osFilterId.c_str(), "shuffle") ? ZarrGetShuffleDecompressor()
576
174
            : EQUAL(osFilterId.c_str(), "quantize")
577
174
                ? ZarrGetQuantizeDecompressor()
578
174
            : EQUAL(osFilterId.c_str(), "fixedscaleoffset")
579
174
                ? ZarrGetFixedScaleOffsetDecompressor()
580
174
                : CPLGetDecompressor(osFilterId.c_str());
581
174
        CPLAssert(psFilterDecompressor);
582
583
174
        CPLStringList aosOptions;
584
174
        for (const auto &obj : oFilter.GetChildren())
585
1.99k
        {
586
1.99k
            aosOptions.SetNameValue(obj.GetName().c_str(),
587
1.99k
                                    obj.ToString().c_str());
588
1.99k
        }
589
174
        void *out_buffer = &abyTmpRawTileData[0];
590
174
        size_t nOutSize = abyTmpRawTileData.size();
591
174
        if (!psFilterDecompressor->pfnFunc(
592
174
                abyRawTileData.data(), nRawDataSize, &out_buffer, &nOutSize,
593
174
                aosOptions.List(), psFilterDecompressor->user_data))
594
35
        {
595
35
            CPLError(CE_Failure, CPLE_AppDefined,
596
35
                     "Filter %s for tile %s failed", osFilterId.c_str(),
597
35
                     osFilename.c_str());
598
35
            return false;
599
35
        }
600
601
139
        nRawDataSize = nOutSize;
602
139
        std::swap(abyRawTileData, abyTmpRawTileData);
603
139
    }
604
478k
    if (nRawDataSize != abyRawTileData.size())
605
345
    {
606
345
        CPLError(CE_Failure, CPLE_AppDefined,
607
345
                 "Decompressed tile %s has not expected size after filters",
608
345
                 osFilename.c_str());
609
345
        return false;
610
345
    }
611
612
477k
    if (m_bFortranOrder && !m_aoDims.empty())
613
125
    {
614
125
        BlockTranspose(abyRawTileData, abyTmpRawTileData, true);
615
125
        std::swap(abyRawTileData, abyTmpRawTileData);
616
125
    }
617
618
477k
    if (!abyDecodedTileData.empty())
619
61
    {
620
61
        const size_t nSourceSize =
621
61
            m_aoDtypeElts.back().nativeOffset + m_aoDtypeElts.back().nativeSize;
622
61
        const auto nDTSize = m_oType.GetSize();
623
61
        const size_t nValues = abyDecodedTileData.size() / nDTSize;
624
61
        const GByte *pSrc = abyRawTileData.data();
625
61
        GByte *pDst = &abyDecodedTileData[0];
626
6.36k
        for (size_t i = 0; i < nValues;
627
6.30k
             i++, pSrc += nSourceSize, pDst += nDTSize)
628
6.30k
        {
629
6.30k
            DecodeSourceElt(m_aoDtypeElts, pSrc, pDst);
630
6.30k
        }
631
61
    }
632
633
477k
    return true;
634
635
478k
#undef m_abyTmpRawTileData
636
478k
#undef m_abyRawTileData
637
478k
#undef m_abyDecodedTileData
638
478k
#undef m_psDecompressor
639
478k
}
640
641
/************************************************************************/
642
/*                      ZarrV2Array::IAdviseRead()                      */
643
/************************************************************************/
644
645
bool ZarrV2Array::IAdviseRead(const GUInt64 *arrayStartIdx, const size_t *count,
646
                              CSLConstList papszOptions) const
647
0
{
648
0
    std::vector<uint64_t> anIndicesCur;
649
0
    int nThreadsMax = 0;
650
0
    std::vector<uint64_t> anReqTilesIndices;
651
0
    size_t nReqTiles = 0;
652
0
    if (!IAdviseReadCommon(arrayStartIdx, count, papszOptions, anIndicesCur,
653
0
                           nThreadsMax, anReqTilesIndices, nReqTiles))
654
0
    {
655
0
        return false;
656
0
    }
657
0
    if (nThreadsMax <= 1)
658
0
    {
659
0
        return true;
660
0
    }
661
662
0
    const int nThreads =
663
0
        static_cast<int>(std::min(static_cast<size_t>(nThreadsMax), nReqTiles));
664
665
0
    CPLWorkerThreadPool *wtp = GDALGetGlobalThreadPool(nThreadsMax);
666
0
    if (wtp == nullptr)
667
0
        return false;
668
669
0
    struct JobStruct
670
0
    {
671
0
        JobStruct() = default;
672
673
0
        JobStruct(const JobStruct &) = delete;
674
0
        JobStruct &operator=(const JobStruct &) = delete;
675
676
0
        JobStruct(JobStruct &&) = default;
677
0
        JobStruct &operator=(JobStruct &&) = default;
678
679
0
        const ZarrV2Array *poArray = nullptr;
680
0
        bool *pbGlobalStatus = nullptr;
681
0
        int *pnRemainingThreads = nullptr;
682
0
        const std::vector<uint64_t> *panReqTilesIndices = nullptr;
683
0
        size_t nFirstIdx = 0;
684
0
        size_t nLastIdxNotIncluded = 0;
685
0
    };
686
687
0
    std::vector<JobStruct> asJobStructs;
688
689
0
    bool bGlobalStatus = true;
690
0
    int nRemainingThreads = nThreads;
691
    // Check for very highly overflow in below loop
692
0
    assert(static_cast<size_t>(nThreads) <
693
0
           std::numeric_limits<size_t>::max() / nReqTiles);
694
695
    // Setup jobs
696
0
    for (int i = 0; i < nThreads; i++)
697
0
    {
698
0
        JobStruct jobStruct;
699
0
        jobStruct.poArray = this;
700
0
        jobStruct.pbGlobalStatus = &bGlobalStatus;
701
0
        jobStruct.pnRemainingThreads = &nRemainingThreads;
702
0
        jobStruct.panReqTilesIndices = &anReqTilesIndices;
703
0
        jobStruct.nFirstIdx = static_cast<size_t>(i * nReqTiles / nThreads);
704
0
        jobStruct.nLastIdxNotIncluded = std::min(
705
0
            static_cast<size_t>((i + 1) * nReqTiles / nThreads), nReqTiles);
706
0
        asJobStructs.emplace_back(std::move(jobStruct));
707
0
    }
708
709
0
    const auto JobFunc = [](void *pThreadData)
710
0
    {
711
0
        const JobStruct *jobStruct =
712
0
            static_cast<const JobStruct *>(pThreadData);
713
714
0
        const auto poArray = jobStruct->poArray;
715
0
        const auto &aoDims = poArray->GetDimensions();
716
0
        const size_t l_nDims = poArray->GetDimensionCount();
717
0
        ZarrByteVectorQuickResize abyRawTileData;
718
0
        ZarrByteVectorQuickResize abyDecodedTileData;
719
0
        ZarrByteVectorQuickResize abyTmpRawTileData;
720
0
        const CPLCompressor *psDecompressor =
721
0
            CPLGetDecompressor(poArray->m_osDecompressorId.c_str());
722
723
0
        for (size_t iReq = jobStruct->nFirstIdx;
724
0
             iReq < jobStruct->nLastIdxNotIncluded; ++iReq)
725
0
        {
726
            // Check if we must early exit
727
0
            {
728
0
                std::lock_guard<std::mutex> oLock(poArray->m_oMutex);
729
0
                if (!(*jobStruct->pbGlobalStatus))
730
0
                    return;
731
0
            }
732
733
0
            const uint64_t *tileIndices =
734
0
                jobStruct->panReqTilesIndices->data() + iReq * l_nDims;
735
736
0
            uint64_t nTileIdx = 0;
737
0
            for (size_t j = 0; j < l_nDims; ++j)
738
0
            {
739
0
                if (j > 0)
740
0
                    nTileIdx *= aoDims[j - 1]->GetSize();
741
0
                nTileIdx += tileIndices[j];
742
0
            }
743
744
0
            if (!poArray->AllocateWorkingBuffers(
745
0
                    abyRawTileData, abyTmpRawTileData, abyDecodedTileData))
746
0
            {
747
0
                std::lock_guard<std::mutex> oLock(poArray->m_oMutex);
748
0
                *jobStruct->pbGlobalStatus = false;
749
0
                break;
750
0
            }
751
752
0
            bool bIsEmpty = false;
753
0
            bool success = poArray->LoadTileData(tileIndices,
754
0
                                                 true,  // use mutex
755
0
                                                 psDecompressor, abyRawTileData,
756
0
                                                 abyTmpRawTileData,
757
0
                                                 abyDecodedTileData, bIsEmpty);
758
759
0
            std::lock_guard<std::mutex> oLock(poArray->m_oMutex);
760
0
            if (!success)
761
0
            {
762
0
                *jobStruct->pbGlobalStatus = false;
763
0
                break;
764
0
            }
765
766
0
            CachedTile cachedTile;
767
0
            if (!bIsEmpty)
768
0
            {
769
0
                if (!abyDecodedTileData.empty())
770
0
                    std::swap(cachedTile.abyDecoded, abyDecodedTileData);
771
0
                else
772
0
                    std::swap(cachedTile.abyDecoded, abyRawTileData);
773
0
            }
774
0
            poArray->m_oMapTileIndexToCachedTile[nTileIdx] =
775
0
                std::move(cachedTile);
776
0
        }
777
778
0
        std::lock_guard<std::mutex> oLock(poArray->m_oMutex);
779
0
        (*jobStruct->pnRemainingThreads)--;
780
0
    };
781
782
    // Start jobs
783
0
    for (int i = 0; i < nThreads; i++)
784
0
    {
785
0
        if (!wtp->SubmitJob(JobFunc, &asJobStructs[i]))
786
0
        {
787
0
            std::lock_guard<std::mutex> oLock(m_oMutex);
788
0
            bGlobalStatus = false;
789
0
            nRemainingThreads = i;
790
0
            break;
791
0
        }
792
0
    }
793
794
    // Wait for all jobs to be finished
795
0
    while (true)
796
0
    {
797
0
        {
798
0
            std::lock_guard<std::mutex> oLock(m_oMutex);
799
0
            if (nRemainingThreads == 0)
800
0
                break;
801
0
        }
802
0
        wtp->WaitEvent();
803
0
    }
804
805
0
    return bGlobalStatus;
806
0
}
807
808
/************************************************************************/
809
/*                    ZarrV2Array::FlushDirtyTile()                     */
810
/************************************************************************/
811
812
bool ZarrV2Array::FlushDirtyTile() const
813
1.67M
{
814
1.67M
    if (!m_bDirtyTile)
815
519k
        return true;
816
1.15M
    m_bDirtyTile = false;
817
818
1.15M
    std::string osFilename = BuildTileFilename(m_anCachedTiledIndices.data());
819
820
1.15M
    const size_t nSourceSize =
821
1.15M
        m_aoDtypeElts.back().nativeOffset + m_aoDtypeElts.back().nativeSize;
822
1.15M
    const auto &abyTile =
823
1.15M
        m_abyDecodedTileData.empty() ? m_abyRawTileData : m_abyDecodedTileData;
824
825
1.15M
    if (IsEmptyTile(abyTile))
826
616k
    {
827
616k
        m_bCachedTiledEmpty = true;
828
829
616k
        VSIStatBufL sStat;
830
616k
        if (VSIStatL(osFilename.c_str(), &sStat) == 0)
831
0
        {
832
0
            CPLDebugOnly(ZARR_DEBUG_KEY,
833
0
                         "Deleting tile %s that has now empty content",
834
0
                         osFilename.c_str());
835
0
            return VSIUnlink(osFilename.c_str()) == 0;
836
0
        }
837
616k
        return true;
838
616k
    }
839
840
534k
    if (!m_abyDecodedTileData.empty())
841
0
    {
842
0
        const size_t nDTSize = m_oType.GetSize();
843
0
        const size_t nValues = m_abyDecodedTileData.size() / nDTSize;
844
0
        GByte *pDst = &m_abyRawTileData[0];
845
0
        const GByte *pSrc = m_abyDecodedTileData.data();
846
0
        for (size_t i = 0; i < nValues;
847
0
             i++, pDst += nSourceSize, pSrc += nDTSize)
848
0
        {
849
0
            EncodeElt(m_aoDtypeElts, pSrc, pDst);
850
0
        }
851
0
    }
852
853
534k
    if (m_bFortranOrder && !m_aoDims.empty())
854
0
    {
855
0
        BlockTranspose(m_abyRawTileData, m_abyTmpRawTileData, false);
856
0
        std::swap(m_abyRawTileData, m_abyTmpRawTileData);
857
0
    }
858
859
534k
    size_t nRawDataSize = m_abyRawTileData.size();
860
534k
    for (const auto &oFilter : m_oFiltersArray)
861
0
    {
862
0
        const auto osFilterId = oFilter["id"].ToString();
863
0
        if (osFilterId == "quantize" || osFilterId == "fixedscaleoffset")
864
0
        {
865
0
            CPLError(CE_Failure, CPLE_NotSupported,
866
0
                     "%s filter not supported for writing", osFilterId.c_str());
867
0
            return false;
868
0
        }
869
0
        const auto psFilterCompressor =
870
0
            EQUAL(osFilterId.c_str(), "shuffle")
871
0
                ? ZarrGetShuffleCompressor()
872
0
                : CPLGetCompressor(osFilterId.c_str());
873
0
        CPLAssert(psFilterCompressor);
874
875
0
        CPLStringList aosOptions;
876
0
        for (const auto &obj : oFilter.GetChildren())
877
0
        {
878
0
            aosOptions.SetNameValue(obj.GetName().c_str(),
879
0
                                    obj.ToString().c_str());
880
0
        }
881
0
        void *out_buffer = &m_abyTmpRawTileData[0];
882
0
        size_t nOutSize = m_abyTmpRawTileData.size();
883
0
        if (!psFilterCompressor->pfnFunc(
884
0
                m_abyRawTileData.data(), nRawDataSize, &out_buffer, &nOutSize,
885
0
                aosOptions.List(), psFilterCompressor->user_data))
886
0
        {
887
0
            CPLError(CE_Failure, CPLE_AppDefined,
888
0
                     "Filter %s for tile %s failed", osFilterId.c_str(),
889
0
                     osFilename.c_str());
890
0
            return false;
891
0
        }
892
893
0
        nRawDataSize = nOutSize;
894
0
        std::swap(m_abyRawTileData, m_abyTmpRawTileData);
895
0
    }
896
897
534k
    if (m_osDimSeparator == "/")
898
0
    {
899
0
        std::string osDir = CPLGetDirnameSafe(osFilename.c_str());
900
0
        VSIStatBufL sStat;
901
0
        if (VSIStatL(osDir.c_str(), &sStat) != 0)
902
0
        {
903
0
            if (VSIMkdirRecursive(osDir.c_str(), 0755) != 0)
904
0
            {
905
0
                CPLError(CE_Failure, CPLE_AppDefined,
906
0
                         "Cannot create directory %s", osDir.c_str());
907
0
                return false;
908
0
            }
909
0
        }
910
0
    }
911
912
534k
    if (m_psCompressor == nullptr && m_psDecompressor != nullptr)
913
0
    {
914
        // Case of imagecodecs_tiff
915
916
0
        CPLError(CE_Failure, CPLE_NotSupported,
917
0
                 "Only decompression supported for '%s' compression method",
918
0
                 m_osDecompressorId.c_str());
919
0
        return false;
920
0
    }
921
922
534k
    VSILFILE *fp = VSIFOpenL(osFilename.c_str(), "wb");
923
534k
    if (fp == nullptr)
924
0
    {
925
0
        CPLError(CE_Failure, CPLE_AppDefined, "Cannot create tile %s",
926
0
                 osFilename.c_str());
927
0
        return false;
928
0
    }
929
930
534k
    bool bRet = true;
931
534k
    if (m_psCompressor == nullptr)
932
534k
    {
933
534k
        if (VSIFWriteL(m_abyRawTileData.data(), 1, nRawDataSize, fp) !=
934
534k
            nRawDataSize)
935
0
        {
936
0
            CPLError(CE_Failure, CPLE_AppDefined,
937
0
                     "Could not write tile %s correctly", osFilename.c_str());
938
0
            bRet = false;
939
0
        }
940
534k
    }
941
0
    else
942
0
    {
943
0
        std::vector<GByte> abyCompressedData;
944
0
        try
945
0
        {
946
0
            constexpr size_t MIN_BUF_SIZE = 64;  // somewhat arbitrary
947
0
            abyCompressedData.resize(static_cast<size_t>(
948
0
                MIN_BUF_SIZE + nRawDataSize + nRawDataSize / 3));
949
0
        }
950
0
        catch (const std::exception &)
951
0
        {
952
0
            CPLError(CE_Failure, CPLE_OutOfMemory,
953
0
                     "Cannot allocate memory for tile %s", osFilename.c_str());
954
0
            bRet = false;
955
0
        }
956
957
0
        if (bRet)
958
0
        {
959
0
            void *out_buffer = &abyCompressedData[0];
960
0
            size_t out_size = abyCompressedData.size();
961
0
            CPLStringList aosOptions;
962
0
            const auto &compressorConfig = m_oCompressorJSon;
963
0
            for (const auto &obj : compressorConfig.GetChildren())
964
0
            {
965
0
                aosOptions.SetNameValue(obj.GetName().c_str(),
966
0
                                        obj.ToString().c_str());
967
0
            }
968
0
            if (EQUAL(m_psCompressor->pszId, "blosc") &&
969
0
                m_oType.GetClass() == GEDTC_NUMERIC)
970
0
            {
971
0
                aosOptions.SetNameValue(
972
0
                    "TYPESIZE",
973
0
                    CPLSPrintf("%d", GDALGetDataTypeSizeBytes(
974
0
                                         GDALGetNonComplexDataType(
975
0
                                             m_oType.GetNumericDataType()))));
976
0
            }
977
978
0
            if (!m_psCompressor->pfnFunc(
979
0
                    m_abyRawTileData.data(), nRawDataSize, &out_buffer,
980
0
                    &out_size, aosOptions.List(), m_psCompressor->user_data))
981
0
            {
982
0
                CPLError(CE_Failure, CPLE_AppDefined,
983
0
                         "Compression of tile %s failed", osFilename.c_str());
984
0
                bRet = false;
985
0
            }
986
0
            abyCompressedData.resize(out_size);
987
0
        }
988
989
0
        if (bRet &&
990
0
            VSIFWriteL(abyCompressedData.data(), 1, abyCompressedData.size(),
991
0
                       fp) != abyCompressedData.size())
992
0
        {
993
0
            CPLError(CE_Failure, CPLE_AppDefined,
994
0
                     "Could not write tile %s correctly", osFilename.c_str());
995
0
            bRet = false;
996
0
        }
997
0
    }
998
534k
    VSIFCloseL(fp);
999
1000
534k
    return bRet;
1001
534k
}
1002
1003
/************************************************************************/
1004
/*                          BuildTileFilename()                         */
1005
/************************************************************************/
1006
1007
std::string ZarrV2Array::BuildTileFilename(const uint64_t *tileIndices) const
1008
2.41M
{
1009
2.41M
    std::string osFilename;
1010
2.41M
    if (m_aoDims.empty())
1011
1.26k
    {
1012
1.26k
        osFilename = "0";
1013
1.26k
    }
1014
2.41M
    else
1015
2.41M
    {
1016
9.42M
        for (size_t i = 0; i < m_aoDims.size(); ++i)
1017
7.01M
        {
1018
7.01M
            if (!osFilename.empty())
1019
4.60M
                osFilename += m_osDimSeparator;
1020
7.01M
            osFilename += std::to_string(tileIndices[i]);
1021
7.01M
        }
1022
2.41M
    }
1023
1024
2.41M
    return CPLFormFilenameSafe(CPLGetDirnameSafe(m_osFilename.c_str()).c_str(),
1025
2.41M
                               osFilename.c_str(), nullptr);
1026
2.41M
}
1027
1028
/************************************************************************/
1029
/*                          GetDataDirectory()                          */
1030
/************************************************************************/
1031
1032
std::string ZarrV2Array::GetDataDirectory() const
1033
0
{
1034
0
    return CPLGetDirnameSafe(m_osFilename.c_str());
1035
0
}
1036
1037
/************************************************************************/
1038
/*                        GetTileIndicesFromFilename()                  */
1039
/************************************************************************/
1040
1041
CPLStringList
1042
ZarrV2Array::GetTileIndicesFromFilename(const char *pszFilename) const
1043
0
{
1044
0
    return CPLStringList(
1045
0
        CSLTokenizeString2(pszFilename, m_osDimSeparator.c_str(), 0));
1046
0
}
1047
1048
/************************************************************************/
1049
/*                             ParseDtype()                             */
1050
/************************************************************************/
1051
1052
static size_t GetAlignment(const CPLJSONObject &obj)
1053
183k
{
1054
183k
    if (obj.GetType() == CPLJSONObject::Type::String)
1055
174k
    {
1056
174k
        const auto str = obj.ToString();
1057
174k
        if (str.size() < 3)
1058
0
            return 1;
1059
174k
        const char chType = str[1];
1060
174k
        const int nBytes = atoi(str.c_str() + 2);
1061
174k
        if (chType == 'S')
1062
16.8k
            return sizeof(char *);
1063
157k
        if (chType == 'c' && nBytes == 8)
1064
11.3k
            return sizeof(float);
1065
146k
        if (chType == 'c' && nBytes == 16)
1066
0
            return sizeof(double);
1067
146k
        return nBytes;
1068
146k
    }
1069
8.68k
    else if (obj.GetType() == CPLJSONObject::Type::Array)
1070
8.68k
    {
1071
8.68k
        const auto oArray = obj.ToArray();
1072
8.68k
        size_t nAlignment = 1;
1073
8.68k
        for (const auto &oElt : oArray)
1074
22.7k
        {
1075
22.7k
            const auto oEltArray = oElt.ToArray();
1076
22.7k
            if (!oEltArray.IsValid() || oEltArray.Size() != 2 ||
1077
22.7k
                oEltArray[0].GetType() != CPLJSONObject::Type::String)
1078
0
            {
1079
0
                return 1;
1080
0
            }
1081
22.7k
            nAlignment = std::max(nAlignment, GetAlignment(oEltArray[1]));
1082
22.7k
            if (nAlignment == sizeof(void *))
1083
2.85k
                break;
1084
22.7k
        }
1085
8.68k
        return nAlignment;
1086
8.68k
    }
1087
0
    return 1;
1088
183k
}
1089
1090
static GDALExtendedDataType ParseDtype(const CPLJSONObject &obj,
1091
                                       std::vector<DtypeElt> &elts)
1092
580k
{
1093
580k
    const auto AlignOffsetOn = [](size_t offset, size_t alignment)
1094
580k
    { return offset + (alignment - (offset % alignment)) % alignment; };
1095
1096
580k
    do
1097
580k
    {
1098
580k
        if (obj.GetType() == CPLJSONObject::Type::String)
1099
527k
        {
1100
527k
            const auto str = obj.ToString();
1101
527k
            char chEndianness = 0;
1102
527k
            char chType;
1103
527k
            int nBytes;
1104
527k
            DtypeElt elt;
1105
527k
            if (str.size() < 3)
1106
832
                break;
1107
526k
            chEndianness = str[0];
1108
526k
            chType = str[1];
1109
526k
            nBytes = atoi(str.c_str() + 2);
1110
526k
            if (nBytes <= 0 || nBytes >= 1000)
1111
31.5k
                break;
1112
1113
494k
            elt.needByteSwapping = false;
1114
494k
            if ((nBytes > 1 && chType != 'S') || chType == 'U')
1115
348k
            {
1116
348k
                if (chEndianness == '<')
1117
318k
                    elt.needByteSwapping = (CPL_IS_LSB == 0);
1118
29.6k
                else if (chEndianness == '>')
1119
7.39k
                    elt.needByteSwapping = (CPL_IS_LSB != 0);
1120
348k
            }
1121
1122
494k
            GDALDataType eDT;
1123
494k
            if (!elts.empty())
1124
112k
            {
1125
112k
                elt.nativeOffset =
1126
112k
                    elts.back().nativeOffset + elts.back().nativeSize;
1127
112k
            }
1128
494k
            elt.nativeSize = nBytes;
1129
494k
            if (chType == 'b' && nBytes == 1)  // boolean
1130
783
            {
1131
783
                elt.nativeType = DtypeElt::NativeType::BOOLEAN;
1132
783
                eDT = GDT_Byte;
1133
783
            }
1134
494k
            else if (chType == 'u' && nBytes == 1)
1135
91.3k
            {
1136
91.3k
                elt.nativeType = DtypeElt::NativeType::UNSIGNED_INT;
1137
91.3k
                eDT = GDT_Byte;
1138
91.3k
            }
1139
402k
            else if (chType == 'i' && nBytes == 1)
1140
36.7k
            {
1141
36.7k
                elt.nativeType = DtypeElt::NativeType::SIGNED_INT;
1142
36.7k
                eDT = GDT_Int8;
1143
36.7k
            }
1144
366k
            else if (chType == 'i' && nBytes == 2)
1145
17.9k
            {
1146
17.9k
                elt.nativeType = DtypeElt::NativeType::SIGNED_INT;
1147
17.9k
                eDT = GDT_Int16;
1148
17.9k
            }
1149
348k
            else if (chType == 'i' && nBytes == 4)
1150
114k
            {
1151
114k
                elt.nativeType = DtypeElt::NativeType::SIGNED_INT;
1152
114k
                eDT = GDT_Int32;
1153
114k
            }
1154
233k
            else if (chType == 'i' && nBytes == 8)
1155
6.93k
            {
1156
6.93k
                elt.nativeType = DtypeElt::NativeType::SIGNED_INT;
1157
6.93k
                eDT = GDT_Int64;
1158
6.93k
            }
1159
226k
            else if (chType == 'u' && nBytes == 2)
1160
49.6k
            {
1161
49.6k
                elt.nativeType = DtypeElt::NativeType::UNSIGNED_INT;
1162
49.6k
                eDT = GDT_UInt16;
1163
49.6k
            }
1164
176k
            else if (chType == 'u' && nBytes == 4)
1165
9.24k
            {
1166
9.24k
                elt.nativeType = DtypeElt::NativeType::UNSIGNED_INT;
1167
9.24k
                eDT = GDT_UInt32;
1168
9.24k
            }
1169
167k
            else if (chType == 'u' && nBytes == 8)
1170
5.36k
            {
1171
5.36k
                elt.nativeType = DtypeElt::NativeType::UNSIGNED_INT;
1172
5.36k
                eDT = GDT_UInt64;
1173
5.36k
            }
1174
161k
            else if (chType == 'f' && nBytes == 2)
1175
21.4k
            {
1176
                // elt.nativeType = DtypeElt::NativeType::IEEEFP;
1177
                // elt.gdalTypeIsApproxOfNative = true;
1178
                // eDT = GDT_Float32;
1179
21.4k
                elt.nativeType = DtypeElt::NativeType::IEEEFP;
1180
21.4k
                eDT = GDT_Float16;
1181
21.4k
            }
1182
140k
            else if (chType == 'f' && nBytes == 4)
1183
74.4k
            {
1184
74.4k
                elt.nativeType = DtypeElt::NativeType::IEEEFP;
1185
74.4k
                eDT = GDT_Float32;
1186
74.4k
            }
1187
66.1k
            else if (chType == 'f' && nBytes == 8)
1188
25.2k
            {
1189
25.2k
                elt.nativeType = DtypeElt::NativeType::IEEEFP;
1190
25.2k
                eDT = GDT_Float64;
1191
25.2k
            }
1192
40.8k
            else if (chType == 'c' && nBytes == 8)
1193
6.66k
            {
1194
6.66k
                elt.nativeType = DtypeElt::NativeType::COMPLEX_IEEEFP;
1195
6.66k
                eDT = GDT_CFloat32;
1196
6.66k
            }
1197
34.1k
            else if (chType == 'c' && nBytes == 16)
1198
275
            {
1199
275
                elt.nativeType = DtypeElt::NativeType::COMPLEX_IEEEFP;
1200
275
                eDT = GDT_CFloat64;
1201
275
            }
1202
33.9k
            else if (chType == 'S')
1203
17.2k
            {
1204
17.2k
                elt.nativeType = DtypeElt::NativeType::STRING_ASCII;
1205
17.2k
                elt.gdalType = GDALExtendedDataType::CreateString(nBytes);
1206
17.2k
                elt.gdalSize = elt.gdalType.GetSize();
1207
17.2k
                elts.emplace_back(elt);
1208
17.2k
                return GDALExtendedDataType::CreateString(nBytes);
1209
17.2k
            }
1210
16.6k
            else if (chType == 'U')
1211
2.47k
            {
1212
2.47k
                elt.nativeType = DtypeElt::NativeType::STRING_UNICODE;
1213
                // the dtype declaration is number of UCS4 characters. Store it
1214
                // as bytes
1215
2.47k
                elt.nativeSize *= 4;
1216
                // We can really map UCS4 size to UTF-8
1217
2.47k
                elt.gdalType = GDALExtendedDataType::CreateString();
1218
2.47k
                elt.gdalSize = elt.gdalType.GetSize();
1219
2.47k
                elts.emplace_back(elt);
1220
2.47k
                return GDALExtendedDataType::CreateString();
1221
2.47k
            }
1222
14.1k
            else
1223
14.1k
                break;
1224
461k
            elt.gdalType = GDALExtendedDataType::Create(eDT);
1225
461k
            elt.gdalSize = elt.gdalType.GetSize();
1226
461k
            elts.emplace_back(elt);
1227
461k
            return GDALExtendedDataType::Create(eDT);
1228
494k
        }
1229
52.6k
        else if (obj.GetType() == CPLJSONObject::Type::Array)
1230
52.3k
        {
1231
52.3k
            bool error = false;
1232
52.3k
            const auto oArray = obj.ToArray();
1233
52.3k
            std::vector<std::unique_ptr<GDALEDTComponent>> comps;
1234
52.3k
            size_t offset = 0;
1235
52.3k
            size_t alignmentMax = 1;
1236
52.3k
            for (const auto &oElt : oArray)
1237
164k
            {
1238
164k
                const auto oEltArray = oElt.ToArray();
1239
164k
                if (!oEltArray.IsValid() || oEltArray.Size() != 2 ||
1240
164k
                    oEltArray[0].GetType() != CPLJSONObject::Type::String)
1241
666
                {
1242
666
                    error = true;
1243
666
                    break;
1244
666
                }
1245
163k
                GDALExtendedDataType subDT = ParseDtype(oEltArray[1], elts);
1246
163k
                if (subDT.GetClass() == GEDTC_NUMERIC &&
1247
163k
                    subDT.GetNumericDataType() == GDT_Unknown)
1248
2.89k
                {
1249
2.89k
                    error = true;
1250
2.89k
                    break;
1251
2.89k
                }
1252
1253
160k
                const std::string osName = oEltArray[0].ToString();
1254
                // Add padding for alignment
1255
160k
                const size_t alignmentSub = GetAlignment(oEltArray[1]);
1256
160k
                assert(alignmentSub);
1257
160k
                alignmentMax = std::max(alignmentMax, alignmentSub);
1258
160k
                offset = AlignOffsetOn(offset, alignmentSub);
1259
160k
                comps.emplace_back(std::unique_ptr<GDALEDTComponent>(
1260
160k
                    new GDALEDTComponent(osName, offset, subDT)));
1261
160k
                offset += subDT.GetSize();
1262
160k
            }
1263
52.3k
            if (error)
1264
3.56k
                break;
1265
48.7k
            size_t nTotalSize = offset;
1266
48.7k
            nTotalSize = AlignOffsetOn(nTotalSize, alignmentMax);
1267
48.7k
            return GDALExtendedDataType::Create(obj.ToString(), nTotalSize,
1268
48.7k
                                                std::move(comps));
1269
52.3k
        }
1270
580k
    } while (false);
1271
50.3k
    CPLError(CE_Failure, CPLE_AppDefined,
1272
50.3k
             "Invalid or unsupported format for dtype: %s",
1273
50.3k
             obj.ToString().c_str());
1274
50.3k
    return GDALExtendedDataType::Create(GDT_Unknown);
1275
580k
}
1276
1277
static void SetGDALOffset(const GDALExtendedDataType &dt,
1278
                          const size_t nBaseOffset, std::vector<DtypeElt> &elts,
1279
                          size_t &iCurElt)
1280
524k
{
1281
524k
    if (dt.GetClass() == GEDTC_COMPOUND)
1282
47.2k
    {
1283
47.2k
        const auto &comps = dt.GetComponents();
1284
47.2k
        for (const auto &comp : comps)
1285
157k
        {
1286
157k
            const size_t nBaseOffsetSub = nBaseOffset + comp->GetOffset();
1287
157k
            SetGDALOffset(comp->GetType(), nBaseOffsetSub, elts, iCurElt);
1288
157k
        }
1289
47.2k
    }
1290
477k
    else
1291
477k
    {
1292
477k
        elts[iCurElt].gdalOffset = nBaseOffset;
1293
477k
        iCurElt++;
1294
477k
    }
1295
524k
}
1296
1297
/************************************************************************/
1298
/*                     ZarrV2Group::LoadArray()                         */
1299
/************************************************************************/
1300
1301
std::shared_ptr<ZarrArray>
1302
ZarrV2Group::LoadArray(const std::string &osArrayName,
1303
                       const std::string &osZarrayFilename,
1304
                       const CPLJSONObject &oRoot, bool bLoadedFromZMetadata,
1305
                       const CPLJSONObject &oAttributesIn) const
1306
607k
{
1307
    // Add osZarrayFilename to m_poSharedResource during the scope
1308
    // of this function call.
1309
607k
    ZarrSharedResource::SetFilenameAdder filenameAdder(m_poSharedResource,
1310
607k
                                                       osZarrayFilename);
1311
607k
    if (!filenameAdder.ok())
1312
104k
        return nullptr;
1313
1314
503k
    const auto osFormat = oRoot["zarr_format"].ToString();
1315
503k
    if (osFormat != "2")
1316
19.9k
    {
1317
19.9k
        CPLError(CE_Failure, CPLE_NotSupported,
1318
19.9k
                 "Invalid value for zarr_format: %s", osFormat.c_str());
1319
19.9k
        return nullptr;
1320
19.9k
    }
1321
1322
483k
    bool bFortranOrder = false;
1323
483k
    const char *orderKey = "order";
1324
483k
    const auto osOrder = oRoot[orderKey].ToString();
1325
483k
    if (osOrder == "C")
1326
453k
    {
1327
        // ok
1328
453k
    }
1329
30.3k
    else if (osOrder == "F")
1330
21.1k
    {
1331
21.1k
        bFortranOrder = true;
1332
21.1k
    }
1333
9.21k
    else
1334
9.21k
    {
1335
9.21k
        CPLError(CE_Failure, CPLE_NotSupported, "Invalid value for %s",
1336
9.21k
                 orderKey);
1337
9.21k
        return nullptr;
1338
9.21k
    }
1339
1340
474k
    const auto oShape = oRoot["shape"].ToArray();
1341
474k
    if (!oShape.IsValid())
1342
2.23k
    {
1343
2.23k
        CPLError(CE_Failure, CPLE_AppDefined, "shape missing or not an array");
1344
2.23k
        return nullptr;
1345
2.23k
    }
1346
1347
472k
    const char *chunksKey = "chunks";
1348
472k
    const auto oChunks = oRoot[chunksKey].ToArray();
1349
472k
    if (!oChunks.IsValid())
1350
1.82k
    {
1351
1.82k
        CPLError(CE_Failure, CPLE_AppDefined, "%s missing or not an array",
1352
1.82k
                 chunksKey);
1353
1.82k
        return nullptr;
1354
1.82k
    }
1355
1356
470k
    if (oShape.Size() != oChunks.Size())
1357
2.87k
    {
1358
2.87k
        CPLError(CE_Failure, CPLE_AppDefined,
1359
2.87k
                 "shape and chunks arrays are of different size");
1360
2.87k
        return nullptr;
1361
2.87k
    }
1362
1363
467k
    CPLJSONObject oAttributes(oAttributesIn);
1364
467k
    if (!bLoadedFromZMetadata)
1365
466k
    {
1366
466k
        CPLJSONDocument oDoc;
1367
466k
        const std::string osZattrsFilename(CPLFormFilenameSafe(
1368
466k
            CPLGetDirnameSafe(osZarrayFilename.c_str()).c_str(), ".zattrs",
1369
466k
            nullptr));
1370
466k
        CPLErrorStateBackuper oErrorStateBackuper(CPLQuietErrorHandler);
1371
466k
        if (oDoc.Load(osZattrsFilename))
1372
217k
        {
1373
217k
            oAttributes = oDoc.GetRoot();
1374
217k
        }
1375
466k
    }
1376
1377
    // Deep-clone of oAttributes
1378
467k
    {
1379
467k
        CPLJSONDocument oTmpDoc;
1380
467k
        oTmpDoc.SetRoot(oAttributes);
1381
467k
        CPL_IGNORE_RET_VAL(oTmpDoc.LoadMemory(oTmpDoc.SaveAsString()));
1382
467k
        oAttributes = oTmpDoc.GetRoot();
1383
467k
    }
1384
1385
467k
    std::vector<std::shared_ptr<GDALDimension>> aoDims;
1386
1.15M
    for (int i = 0; i < oShape.Size(); ++i)
1387
684k
    {
1388
684k
        const auto nSize = static_cast<GUInt64>(oShape[i].ToLong());
1389
684k
        if (nSize == 0)
1390
345
        {
1391
345
            CPLError(CE_Failure, CPLE_AppDefined, "Invalid content for shape");
1392
345
            return nullptr;
1393
345
        }
1394
683k
        aoDims.emplace_back(std::make_shared<ZarrDimension>(
1395
683k
            m_poSharedResource,
1396
683k
            std::dynamic_pointer_cast<ZarrGroupBase>(m_pSelf.lock()),
1397
683k
            std::string(), CPLSPrintf("dim%d", i), std::string(), std::string(),
1398
683k
            nSize));
1399
683k
    }
1400
1401
    // XArray extension
1402
467k
    const auto arrayDimensionsObj = oAttributes["_ARRAY_DIMENSIONS"];
1403
1404
467k
    const auto FindDimension =
1405
467k
        [this, &aoDims, bLoadedFromZMetadata, &osArrayName,
1406
467k
         &oAttributes](const std::string &osDimName,
1407
467k
                       std::shared_ptr<GDALDimension> &poDim, int i)
1408
467k
    {
1409
200k
        auto oIter = m_oMapDimensions.find(osDimName);
1410
200k
        if (oIter != m_oMapDimensions.end())
1411
11.1k
        {
1412
11.1k
            if (m_bDimSizeInUpdate ||
1413
11.1k
                oIter->second->GetSize() == poDim->GetSize())
1414
9.24k
            {
1415
9.24k
                poDim = oIter->second;
1416
9.24k
                return true;
1417
9.24k
            }
1418
1.90k
            else
1419
1.90k
            {
1420
1.90k
                CPLError(CE_Warning, CPLE_AppDefined,
1421
1.90k
                         "Size of _ARRAY_DIMENSIONS[%d] different "
1422
1.90k
                         "from the one of shape",
1423
1.90k
                         i);
1424
1.90k
                return false;
1425
1.90k
            }
1426
11.1k
        }
1427
1428
        // Try to load the indexing variable.
1429
1430
        // If loading from zmetadata, we should have normally
1431
        // already loaded the dimension variables, unless they
1432
        // are in a upper level.
1433
189k
        if (bLoadedFromZMetadata && osArrayName != osDimName &&
1434
189k
            m_oMapMDArrays.find(osDimName) == m_oMapMDArrays.end())
1435
48
        {
1436
48
            auto poParent = m_poParent.lock();
1437
50
            while (poParent != nullptr)
1438
8
            {
1439
8
                oIter = poParent->m_oMapDimensions.find(osDimName);
1440
8
                if (oIter != poParent->m_oMapDimensions.end() &&
1441
8
                    oIter->second->GetSize() == poDim->GetSize())
1442
6
                {
1443
6
                    poDim = oIter->second;
1444
6
                    return true;
1445
6
                }
1446
2
                poParent = poParent->m_poParent.lock();
1447
2
            }
1448
48
        }
1449
1450
        // Not loading from zmetadata, and not in m_oMapMDArrays,
1451
        // then stat() the indexing variable.
1452
189k
        else if (!bLoadedFromZMetadata && osArrayName != osDimName &&
1453
189k
                 m_oMapMDArrays.find(osDimName) == m_oMapMDArrays.end())
1454
170k
        {
1455
170k
            std::string osDirName = m_osDirectoryName;
1456
597k
            while (true)
1457
597k
            {
1458
597k
                const std::string osArrayFilenameDim = CPLFormFilenameSafe(
1459
597k
                    CPLFormFilenameSafe(osDirName.c_str(), osDimName.c_str(),
1460
597k
                                        nullptr)
1461
597k
                        .c_str(),
1462
597k
                    ".zarray", nullptr);
1463
597k
                VSIStatBufL sStat;
1464
597k
                if (VSIStatL(osArrayFilenameDim.c_str(), &sStat) == 0)
1465
96.4k
                {
1466
96.4k
                    CPLJSONDocument oDoc;
1467
96.4k
                    if (oDoc.Load(osArrayFilenameDim))
1468
85.1k
                    {
1469
85.1k
                        LoadArray(osDimName, osArrayFilenameDim, oDoc.GetRoot(),
1470
85.1k
                                  false, CPLJSONObject());
1471
85.1k
                    }
1472
96.4k
                }
1473
500k
                else
1474
500k
                {
1475
                    // Recurse to upper level for datasets such as
1476
                    // /vsis3/hrrrzarr/sfc/20210809/20210809_00z_anl.zarr/0.1_sigma_level/HAIL_max_fcst/0.1_sigma_level/HAIL_max_fcst
1477
500k
                    std::string osDirNameNew =
1478
500k
                        CPLGetPathSafe(osDirName.c_str());
1479
500k
                    if (!osDirNameNew.empty() && osDirNameNew != osDirName)
1480
426k
                    {
1481
426k
                        osDirName = std::move(osDirNameNew);
1482
426k
                        continue;
1483
426k
                    }
1484
500k
                }
1485
170k
                break;
1486
597k
            }
1487
170k
        }
1488
1489
189k
        oIter = m_oMapDimensions.find(osDimName);
1490
189k
        if (oIter != m_oMapDimensions.end() &&
1491
189k
            oIter->second->GetSize() == poDim->GetSize())
1492
12.4k
        {
1493
12.4k
            poDim = oIter->second;
1494
12.4k
            return true;
1495
12.4k
        }
1496
1497
177k
        std::string osType;
1498
177k
        std::string osDirection;
1499
177k
        if (aoDims.size() == 1 && osArrayName == osDimName)
1500
17.7k
        {
1501
17.7k
            ZarrArray::GetDimensionTypeDirection(oAttributes, osType,
1502
17.7k
                                                 osDirection);
1503
17.7k
        }
1504
1505
177k
        auto poDimLocal = std::make_shared<ZarrDimension>(
1506
177k
            m_poSharedResource,
1507
177k
            std::dynamic_pointer_cast<ZarrGroupBase>(m_pSelf.lock()),
1508
177k
            GetFullName(), osDimName, osType, osDirection, poDim->GetSize());
1509
177k
        poDimLocal->SetXArrayDimension();
1510
177k
        m_oMapDimensions[osDimName] = poDimLocal;
1511
177k
        poDim = poDimLocal;
1512
177k
        return true;
1513
189k
    };
1514
1515
467k
    if (arrayDimensionsObj.GetType() == CPLJSONObject::Type::Array)
1516
181k
    {
1517
181k
        const auto arrayDims = arrayDimensionsObj.ToArray();
1518
181k
        if (arrayDims.Size() == oShape.Size())
1519
174k
        {
1520
174k
            bool ok = true;
1521
376k
            for (int i = 0; i < oShape.Size(); ++i)
1522
202k
            {
1523
202k
                if (arrayDims[i].GetType() == CPLJSONObject::Type::String)
1524
200k
                {
1525
200k
                    const auto osDimName = arrayDims[i].ToString();
1526
200k
                    ok &= FindDimension(osDimName, aoDims[i], i);
1527
200k
                }
1528
202k
            }
1529
174k
            if (ok)
1530
172k
            {
1531
172k
                oAttributes.Delete("_ARRAY_DIMENSIONS");
1532
172k
            }
1533
174k
        }
1534
7.74k
        else
1535
7.74k
        {
1536
7.74k
            CPLError(
1537
7.74k
                CE_Warning, CPLE_AppDefined,
1538
7.74k
                "Size of _ARRAY_DIMENSIONS different from the one of shape");
1539
7.74k
        }
1540
181k
    }
1541
1542
    // _NCZARR_ARRAY extension
1543
467k
    const auto nczarrArrayDimrefs = oRoot["_NCZARR_ARRAY"]["dimrefs"].ToArray();
1544
467k
    if (nczarrArrayDimrefs.IsValid())
1545
169k
    {
1546
169k
        const auto arrayDims = nczarrArrayDimrefs.ToArray();
1547
169k
        if (arrayDims.Size() == oShape.Size())
1548
168k
        {
1549
168k
            auto poRG =
1550
168k
                std::dynamic_pointer_cast<ZarrGroupBase>(m_pSelf.lock());
1551
168k
            CPLAssert(poRG != nullptr);
1552
5.29M
            while (true)
1553
5.29M
            {
1554
5.29M
                auto poNewRG = poRG->m_poParent.lock();
1555
5.29M
                if (poNewRG == nullptr)
1556
168k
                    break;
1557
5.12M
                poRG = std::move(poNewRG);
1558
5.12M
            }
1559
1560
486k
            for (int i = 0; i < oShape.Size(); ++i)
1561
317k
            {
1562
317k
                if (arrayDims[i].GetType() == CPLJSONObject::Type::String)
1563
301k
                {
1564
301k
                    const auto osDimFullpath = arrayDims[i].ToString();
1565
301k
                    const std::string osArrayFullname =
1566
301k
                        (GetFullName() != "/" ? GetFullName() : std::string()) +
1567
301k
                        '/' + osArrayName;
1568
301k
                    if (aoDims.size() == 1 &&
1569
301k
                        (osDimFullpath == osArrayFullname ||
1570
18.6k
                         osDimFullpath == "/" + osArrayFullname))
1571
503
                    {
1572
                        // If this is an indexing variable, then fetch the
1573
                        // dimension type and direction, and patch the dimension
1574
503
                        std::string osType;
1575
503
                        std::string osDirection;
1576
503
                        ZarrArray::GetDimensionTypeDirection(
1577
503
                            oAttributes, osType, osDirection);
1578
1579
503
                        auto poDimLocal = std::make_shared<ZarrDimension>(
1580
503
                            m_poSharedResource,
1581
503
                            std::dynamic_pointer_cast<ZarrGroupBase>(
1582
503
                                m_pSelf.lock()),
1583
503
                            GetFullName(), osArrayName, osType, osDirection,
1584
503
                            aoDims[i]->GetSize());
1585
503
                        aoDims[i] = poDimLocal;
1586
1587
503
                        m_oMapDimensions[osArrayName] = std::move(poDimLocal);
1588
503
                    }
1589
300k
                    else if (auto poDim =
1590
300k
                                 poRG->OpenDimensionFromFullname(osDimFullpath))
1591
91.3k
                    {
1592
91.3k
                        if (poDim->GetSize() != aoDims[i]->GetSize())
1593
29.7k
                        {
1594
29.7k
                            CPLError(CE_Failure, CPLE_AppDefined,
1595
29.7k
                                     "Inconsistency in size between NCZarr "
1596
29.7k
                                     "dimension %s and regular dimension",
1597
29.7k
                                     osDimFullpath.c_str());
1598
29.7k
                        }
1599
61.6k
                        else
1600
61.6k
                        {
1601
61.6k
                            aoDims[i] = std::move(poDim);
1602
61.6k
                        }
1603
91.3k
                    }
1604
209k
                    else
1605
209k
                    {
1606
209k
                        CPLError(CE_Failure, CPLE_AppDefined,
1607
209k
                                 "Cannot find NCZarr dimension %s",
1608
209k
                                 osDimFullpath.c_str());
1609
209k
                    }
1610
301k
                }
1611
317k
            }
1612
168k
        }
1613
806
        else
1614
806
        {
1615
806
            CPLError(CE_Warning, CPLE_AppDefined,
1616
806
                     "Size of _NCZARR_ARRAY.dimrefs different from the one of "
1617
806
                     "shape");
1618
806
        }
1619
169k
    }
1620
1621
467k
    constexpr const char *dtypeKey = "dtype";
1622
467k
    auto oDtype = oRoot[dtypeKey];
1623
467k
    if (!oDtype.IsValid())
1624
50.7k
    {
1625
50.7k
        CPLError(CE_Failure, CPLE_NotSupported, "%s missing", dtypeKey);
1626
50.7k
        return nullptr;
1627
50.7k
    }
1628
416k
    std::vector<DtypeElt> aoDtypeElts;
1629
416k
    const auto oType = ParseDtype(oDtype, aoDtypeElts);
1630
416k
    if (oType.GetClass() == GEDTC_NUMERIC &&
1631
416k
        oType.GetNumericDataType() == GDT_Unknown)
1632
48.9k
        return nullptr;
1633
367k
    size_t iCurElt = 0;
1634
367k
    SetGDALOffset(oType, 0, aoDtypeElts, iCurElt);
1635
1636
367k
    std::vector<GUInt64> anBlockSize;
1637
367k
    if (!ZarrArray::ParseChunkSize(oChunks, oType, anBlockSize))
1638
92.6k
        return nullptr;
1639
1640
274k
    std::string osDimSeparator = oRoot["dimension_separator"].ToString();
1641
274k
    if (osDimSeparator.empty())
1642
274k
        osDimSeparator = ".";
1643
1644
274k
    std::vector<GByte> abyNoData;
1645
1646
274k
    struct NoDataFreer
1647
274k
    {
1648
274k
        std::vector<GByte> &m_abyNodata;
1649
274k
        const GDALExtendedDataType &m_oType;
1650
1651
274k
        NoDataFreer(std::vector<GByte> &abyNoDataIn,
1652
274k
                    const GDALExtendedDataType &oTypeIn)
1653
274k
            : m_abyNodata(abyNoDataIn), m_oType(oTypeIn)
1654
274k
        {
1655
274k
        }
1656
1657
274k
        ~NoDataFreer()
1658
274k
        {
1659
274k
            if (!m_abyNodata.empty())
1660
73.7k
                m_oType.FreeDynamicMemory(&m_abyNodata[0]);
1661
274k
        }
1662
274k
    };
1663
1664
274k
    NoDataFreer NoDataFreer(abyNoData, oType);
1665
1666
274k
    auto oFillValue = oRoot["fill_value"];
1667
274k
    auto eFillValueType = oFillValue.GetType();
1668
1669
    // Normally arrays are not supported, but that's what NCZarr 4.8.0 outputs
1670
274k
    if (eFillValueType == CPLJSONObject::Type::Array &&
1671
274k
        oFillValue.ToArray().Size() == 1)
1672
207
    {
1673
207
        oFillValue = oFillValue.ToArray()[0];
1674
207
        eFillValueType = oFillValue.GetType();
1675
207
    }
1676
1677
274k
    if (!oFillValue.IsValid())
1678
83.1k
    {
1679
        // fill_value is normally required but some implementations
1680
        // are lacking it: https://github.com/Unidata/netcdf-c/issues/2059
1681
83.1k
        CPLError(CE_Warning, CPLE_AppDefined, "fill_value missing");
1682
83.1k
    }
1683
191k
    else if (eFillValueType == CPLJSONObject::Type::Null)
1684
105k
    {
1685
        // Nothing to do
1686
105k
    }
1687
85.6k
    else if (eFillValueType == CPLJSONObject::Type::String)
1688
58.2k
    {
1689
58.2k
        const auto osFillValue = oFillValue.ToString();
1690
58.2k
        if (oType.GetClass() == GEDTC_NUMERIC &&
1691
58.2k
            CPLGetValueType(osFillValue.c_str()) != CPL_VALUE_STRING)
1692
11.4k
        {
1693
11.4k
            abyNoData.resize(oType.GetSize());
1694
            // Be tolerant with numeric values serialized as strings.
1695
11.4k
            if (oType.GetNumericDataType() == GDT_Int64)
1696
246
            {
1697
246
                const int64_t nVal = static_cast<int64_t>(
1698
246
                    std::strtoll(osFillValue.c_str(), nullptr, 10));
1699
246
                GDALCopyWords(&nVal, GDT_Int64, 0, &abyNoData[0],
1700
246
                              oType.GetNumericDataType(), 0, 1);
1701
246
            }
1702
11.2k
            else if (oType.GetNumericDataType() == GDT_UInt64)
1703
419
            {
1704
419
                const uint64_t nVal = static_cast<uint64_t>(
1705
419
                    std::strtoull(osFillValue.c_str(), nullptr, 10));
1706
419
                GDALCopyWords(&nVal, GDT_UInt64, 0, &abyNoData[0],
1707
419
                              oType.GetNumericDataType(), 0, 1);
1708
419
            }
1709
10.8k
            else
1710
10.8k
            {
1711
10.8k
                const double dfNoDataValue = CPLAtof(osFillValue.c_str());
1712
10.8k
                GDALCopyWords(&dfNoDataValue, GDT_Float64, 0, &abyNoData[0],
1713
10.8k
                              oType.GetNumericDataType(), 0, 1);
1714
10.8k
            }
1715
11.4k
        }
1716
46.7k
        else if (oType.GetClass() == GEDTC_NUMERIC)
1717
7.54k
        {
1718
7.54k
            double dfNoDataValue;
1719
7.54k
            if (osFillValue == "NaN")
1720
0
            {
1721
0
                dfNoDataValue = std::numeric_limits<double>::quiet_NaN();
1722
0
            }
1723
7.54k
            else if (osFillValue == "Infinity")
1724
564
            {
1725
564
                dfNoDataValue = std::numeric_limits<double>::infinity();
1726
564
            }
1727
6.97k
            else if (osFillValue == "-Infinity")
1728
1.03k
            {
1729
1.03k
                dfNoDataValue = -std::numeric_limits<double>::infinity();
1730
1.03k
            }
1731
5.94k
            else
1732
5.94k
            {
1733
5.94k
                CPLError(CE_Failure, CPLE_AppDefined, "Invalid fill_value");
1734
5.94k
                return nullptr;
1735
5.94k
            }
1736
1.59k
            if (oType.GetNumericDataType() == GDT_Float16)
1737
595
            {
1738
595
                const GFloat16 hfNoDataValue =
1739
595
                    static_cast<GFloat16>(dfNoDataValue);
1740
595
                abyNoData.resize(sizeof(hfNoDataValue));
1741
595
                memcpy(&abyNoData[0], &hfNoDataValue, sizeof(hfNoDataValue));
1742
595
            }
1743
1.59k
            if (oType.GetNumericDataType() == GDT_Float32)
1744
435
            {
1745
435
                const float fNoDataValue = static_cast<float>(dfNoDataValue);
1746
435
                abyNoData.resize(sizeof(fNoDataValue));
1747
435
                memcpy(&abyNoData[0], &fNoDataValue, sizeof(fNoDataValue));
1748
435
            }
1749
1.15k
            else if (oType.GetNumericDataType() == GDT_Float64)
1750
266
            {
1751
266
                abyNoData.resize(sizeof(dfNoDataValue));
1752
266
                memcpy(&abyNoData[0], &dfNoDataValue, sizeof(dfNoDataValue));
1753
266
            }
1754
893
            else
1755
893
            {
1756
893
                CPLError(CE_Failure, CPLE_AppDefined, "Invalid fill_value");
1757
893
                return nullptr;
1758
893
            }
1759
1.59k
        }
1760
39.1k
        else if (oType.GetClass() == GEDTC_STRING)
1761
756
        {
1762
            // zarr.open('unicode_be.zarr', mode = 'w', shape=(1,), dtype =
1763
            // '>U1', compressor = None) oddly generates "fill_value": "0"
1764
756
            if (osFillValue != "0")
1765
639
            {
1766
639
                std::vector<GByte> abyNativeFillValue(osFillValue.size() + 1);
1767
639
                memcpy(&abyNativeFillValue[0], osFillValue.data(),
1768
639
                       osFillValue.size());
1769
639
                int nBytes = CPLBase64DecodeInPlace(&abyNativeFillValue[0]);
1770
639
                abyNativeFillValue.resize(nBytes + 1);
1771
639
                abyNativeFillValue[nBytes] = 0;
1772
639
                abyNoData.resize(oType.GetSize());
1773
639
                char *pDstStr = CPLStrdup(
1774
639
                    reinterpret_cast<const char *>(&abyNativeFillValue[0]));
1775
639
                char **pDstPtr = reinterpret_cast<char **>(&abyNoData[0]);
1776
639
                memcpy(pDstPtr, &pDstStr, sizeof(pDstStr));
1777
639
            }
1778
756
        }
1779
38.4k
        else
1780
38.4k
        {
1781
38.4k
            std::vector<GByte> abyNativeFillValue(osFillValue.size() + 1);
1782
38.4k
            memcpy(&abyNativeFillValue[0], osFillValue.data(),
1783
38.4k
                   osFillValue.size());
1784
38.4k
            int nBytes = CPLBase64DecodeInPlace(&abyNativeFillValue[0]);
1785
38.4k
            abyNativeFillValue.resize(nBytes);
1786
38.4k
            if (abyNativeFillValue.size() !=
1787
38.4k
                aoDtypeElts.back().nativeOffset + aoDtypeElts.back().nativeSize)
1788
5.15k
            {
1789
5.15k
                CPLError(CE_Failure, CPLE_AppDefined, "Invalid fill_value");
1790
5.15k
                return nullptr;
1791
5.15k
            }
1792
33.2k
            abyNoData.resize(oType.GetSize());
1793
33.2k
            ZarrArray::DecodeSourceElt(aoDtypeElts, abyNativeFillValue.data(),
1794
33.2k
                                       &abyNoData[0]);
1795
33.2k
        }
1796
58.2k
    }
1797
27.4k
    else if (eFillValueType == CPLJSONObject::Type::Boolean ||
1798
27.4k
             eFillValueType == CPLJSONObject::Type::Integer ||
1799
27.4k
             eFillValueType == CPLJSONObject::Type::Long ||
1800
27.4k
             eFillValueType == CPLJSONObject::Type::Double)
1801
27.1k
    {
1802
27.1k
        if (oType.GetClass() == GEDTC_NUMERIC)
1803
27.0k
        {
1804
27.0k
            const double dfNoDataValue = oFillValue.ToDouble();
1805
27.0k
            if (oType.GetNumericDataType() == GDT_Int64)
1806
671
            {
1807
671
                const int64_t nNoDataValue =
1808
671
                    static_cast<int64_t>(oFillValue.ToLong());
1809
671
                abyNoData.resize(oType.GetSize());
1810
671
                GDALCopyWords(&nNoDataValue, GDT_Int64, 0, &abyNoData[0],
1811
671
                              oType.GetNumericDataType(), 0, 1);
1812
671
            }
1813
26.4k
            else if (oType.GetNumericDataType() == GDT_UInt64 &&
1814
                     /* we can't really deal with nodata value between */
1815
                     /* int64::max and uint64::max due to json-c limitations */
1816
26.4k
                     dfNoDataValue >= 0)
1817
681
            {
1818
681
                const int64_t nNoDataValue =
1819
681
                    static_cast<int64_t>(oFillValue.ToLong());
1820
681
                abyNoData.resize(oType.GetSize());
1821
681
                GDALCopyWords(&nNoDataValue, GDT_Int64, 0, &abyNoData[0],
1822
681
                              oType.GetNumericDataType(), 0, 1);
1823
681
            }
1824
25.7k
            else
1825
25.7k
            {
1826
25.7k
                abyNoData.resize(oType.GetSize());
1827
25.7k
                GDALCopyWords(&dfNoDataValue, GDT_Float64, 0, &abyNoData[0],
1828
25.7k
                              oType.GetNumericDataType(), 0, 1);
1829
25.7k
            }
1830
27.0k
        }
1831
91
        else
1832
91
        {
1833
91
            CPLError(CE_Failure, CPLE_AppDefined, "Invalid fill_value");
1834
91
            return nullptr;
1835
91
        }
1836
27.1k
    }
1837
295
    else
1838
295
    {
1839
295
        CPLError(CE_Failure, CPLE_AppDefined, "Invalid fill_value");
1840
295
        return nullptr;
1841
295
    }
1842
1843
262k
    const CPLCompressor *psCompressor = nullptr;
1844
262k
    const CPLCompressor *psDecompressor = nullptr;
1845
262k
    const auto oCompressor = oRoot["compressor"];
1846
262k
    std::string osDecompressorId("NONE");
1847
1848
262k
    if (!oCompressor.IsValid())
1849
13.2k
    {
1850
13.2k
        CPLError(CE_Failure, CPLE_AppDefined, "compressor missing");
1851
13.2k
        return nullptr;
1852
13.2k
    }
1853
249k
    if (oCompressor.GetType() == CPLJSONObject::Type::Null)
1854
189k
    {
1855
        // nothing to do
1856
189k
    }
1857
59.2k
    else if (oCompressor.GetType() == CPLJSONObject::Type::Object)
1858
22.8k
    {
1859
22.8k
        osDecompressorId = oCompressor["id"].ToString();
1860
22.8k
        if (osDecompressorId.empty())
1861
673
        {
1862
673
            CPLError(CE_Failure, CPLE_AppDefined, "Missing compressor id");
1863
673
            return nullptr;
1864
673
        }
1865
22.2k
        if (osDecompressorId == "imagecodecs_tiff")
1866
19.7k
        {
1867
19.7k
            psDecompressor = ZarrGetTIFFDecompressor();
1868
19.7k
        }
1869
2.50k
        else
1870
2.50k
        {
1871
2.50k
            psCompressor = CPLGetCompressor(osDecompressorId.c_str());
1872
2.50k
            psDecompressor = CPLGetDecompressor(osDecompressorId.c_str());
1873
2.50k
            if (psCompressor == nullptr || psDecompressor == nullptr)
1874
749
            {
1875
749
                CPLError(CE_Failure, CPLE_AppDefined,
1876
749
                         "Decompressor %s not handled",
1877
749
                         osDecompressorId.c_str());
1878
749
                return nullptr;
1879
749
            }
1880
2.50k
        }
1881
22.2k
    }
1882
36.3k
    else
1883
36.3k
    {
1884
36.3k
        CPLError(CE_Failure, CPLE_AppDefined, "Invalid compressor");
1885
36.3k
        return nullptr;
1886
36.3k
    }
1887
1888
211k
    CPLJSONArray oFiltersArray;
1889
211k
    const auto oFilters = oRoot["filters"];
1890
211k
    if (!oFilters.IsValid())
1891
22.9k
    {
1892
22.9k
        CPLError(CE_Failure, CPLE_AppDefined, "filters missing");
1893
22.9k
        return nullptr;
1894
22.9k
    }
1895
188k
    if (oFilters.GetType() == CPLJSONObject::Type::Null)
1896
177k
    {
1897
177k
    }
1898
11.2k
    else if (oFilters.GetType() == CPLJSONObject::Type::Array)
1899
10.9k
    {
1900
10.9k
        oFiltersArray = oFilters.ToArray();
1901
10.9k
        for (const auto &oFilter : oFiltersArray)
1902
2.46k
        {
1903
2.46k
            const auto osFilterId = oFilter["id"].ToString();
1904
2.46k
            if (osFilterId.empty())
1905
464
            {
1906
464
                CPLError(CE_Failure, CPLE_AppDefined, "Missing filter id");
1907
464
                return nullptr;
1908
464
            }
1909
2.00k
            if (!EQUAL(osFilterId.c_str(), "shuffle") &&
1910
2.00k
                !EQUAL(osFilterId.c_str(), "quantize") &&
1911
2.00k
                !EQUAL(osFilterId.c_str(), "fixedscaleoffset"))
1912
2.00k
            {
1913
2.00k
                const auto psFilterCompressor =
1914
2.00k
                    CPLGetCompressor(osFilterId.c_str());
1915
2.00k
                const auto psFilterDecompressor =
1916
2.00k
                    CPLGetDecompressor(osFilterId.c_str());
1917
2.00k
                if (psFilterCompressor == nullptr ||
1918
2.00k
                    psFilterDecompressor == nullptr)
1919
593
                {
1920
593
                    CPLError(CE_Failure, CPLE_AppDefined,
1921
593
                             "Filter %s not handled", osFilterId.c_str());
1922
593
                    return nullptr;
1923
593
                }
1924
2.00k
            }
1925
2.00k
        }
1926
10.9k
    }
1927
296
    else
1928
296
    {
1929
296
        CPLError(CE_Failure, CPLE_AppDefined, "Invalid filters");
1930
296
        return nullptr;
1931
296
    }
1932
1933
187k
    auto poArray = ZarrV2Array::Create(m_poSharedResource, GetFullName(),
1934
187k
                                       osArrayName, aoDims, oType, aoDtypeElts,
1935
187k
                                       anBlockSize, bFortranOrder);
1936
187k
    if (!poArray)
1937
2.71k
        return nullptr;
1938
184k
    poArray->SetCompressorJson(oCompressor);
1939
184k
    poArray->SetUpdatable(m_bUpdatable);  // must be set before SetAttributes()
1940
184k
    poArray->SetFilename(osZarrayFilename);
1941
184k
    poArray->SetDimSeparator(osDimSeparator);
1942
184k
    poArray->SetCompressorDecompressor(osDecompressorId, psCompressor,
1943
184k
                                       psDecompressor);
1944
184k
    poArray->SetFilters(oFiltersArray);
1945
184k
    if (!abyNoData.empty())
1946
56.1k
    {
1947
56.1k
        poArray->RegisterNoDataValue(abyNoData.data());
1948
56.1k
    }
1949
1950
184k
    const auto gridMapping = oAttributes["grid_mapping"];
1951
184k
    if (gridMapping.GetType() == CPLJSONObject::Type::String)
1952
20.9k
    {
1953
20.9k
        const std::string gridMappingName = gridMapping.ToString();
1954
20.9k
        if (m_oMapMDArrays.find(gridMappingName) == m_oMapMDArrays.end())
1955
10.9k
        {
1956
10.9k
            const std::string osArrayFilenameDim = CPLFormFilenameSafe(
1957
10.9k
                CPLFormFilenameSafe(m_osDirectoryName.c_str(),
1958
10.9k
                                    gridMappingName.c_str(), nullptr)
1959
10.9k
                    .c_str(),
1960
10.9k
                ".zarray", nullptr);
1961
10.9k
            VSIStatBufL sStat;
1962
10.9k
            if (VSIStatL(osArrayFilenameDim.c_str(), &sStat) == 0)
1963
8.06k
            {
1964
8.06k
                CPLJSONDocument oDoc;
1965
8.06k
                if (oDoc.Load(osArrayFilenameDim))
1966
7.23k
                {
1967
7.23k
                    LoadArray(gridMappingName, osArrayFilenameDim,
1968
7.23k
                              oDoc.GetRoot(), false, CPLJSONObject());
1969
7.23k
                }
1970
8.06k
            }
1971
10.9k
        }
1972
20.9k
    }
1973
1974
184k
    poArray->ParseSpecialAttributes(m_pSelf.lock(), oAttributes);
1975
184k
    poArray->SetAttributes(oAttributes);
1976
184k
    poArray->SetDtype(oDtype);
1977
184k
    RegisterArray(poArray);
1978
1979
    // If this is an indexing variable, attach it to the dimension.
1980
184k
    if (aoDims.size() == 1 && aoDims[0]->GetName() == poArray->GetName())
1981
16.0k
    {
1982
16.0k
        auto oIter = m_oMapDimensions.find(poArray->GetName());
1983
16.0k
        if (oIter != m_oMapDimensions.end())
1984
15.8k
        {
1985
15.8k
            oIter->second->SetIndexingVariable(poArray);
1986
15.8k
        }
1987
16.0k
    }
1988
1989
184k
    if (CPLTestBool(m_poSharedResource->GetOpenOptions().FetchNameValueDef(
1990
184k
            "CACHE_TILE_PRESENCE", "NO")))
1991
0
    {
1992
0
        poArray->CacheTilePresence();
1993
0
    }
1994
1995
184k
    return poArray;
1996
187k
}
1997
1998
/************************************************************************/
1999
/*                    ZarrV2Group::SetCompressorJson()                  */
2000
/************************************************************************/
2001
2002
void ZarrV2Array::SetCompressorJson(const CPLJSONObject &oCompressor)
2003
184k
{
2004
184k
    m_oCompressorJSon = oCompressor;
2005
184k
    if (oCompressor.GetType() != CPLJSONObject::Type::Null)
2006
20.5k
        m_aosStructuralInfo.SetNameValue("COMPRESSOR",
2007
20.5k
                                         oCompressor.ToString().c_str());
2008
184k
}
2009
2010
/************************************************************************/
2011
/*                     ZarrV2Group::SetFilters()                        */
2012
/************************************************************************/
2013
2014
void ZarrV2Array::SetFilters(const CPLJSONArray &oFiltersArray)
2015
186k
{
2016
186k
    m_oFiltersArray = oFiltersArray;
2017
186k
    if (oFiltersArray.Size() > 0)
2018
1.41k
        m_aosStructuralInfo.SetNameValue("FILTERS",
2019
1.41k
                                         oFiltersArray.ToString().c_str());
2020
186k
}