Coverage Report

Created: 2025-12-31 08:30

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/gdal/frmts/zarr/zarr_v2_array.cpp
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Project:  GDAL
4
 * Purpose:  Zarr driver
5
 * Author:   Even Rouault <even dot rouault at spatialys.com>
6
 *
7
 ******************************************************************************
8
 * Copyright (c) 2021, Even Rouault <even dot rouault at spatialys.com>
9
 *
10
 * SPDX-License-Identifier: MIT
11
 ****************************************************************************/
12
13
#include "cpl_float.h"
14
#include "cpl_vsi_virtual.h"
15
#include "gdal_thread_pool.h"
16
#include "zarr.h"
17
#include "vsikerchunk.h"
18
19
#include "netcdf_cf_constants.h"  // for CF_UNITS, etc
20
21
#include <algorithm>
22
#include <cassert>
23
#include <cstdlib>
24
#include <limits>
25
#include <map>
26
#include <set>
27
28
/************************************************************************/
29
/*                       ZarrV2Array::ZarrV2Array()                     */
30
/************************************************************************/
31
32
ZarrV2Array::ZarrV2Array(
33
    const std::shared_ptr<ZarrSharedResource> &poSharedResource,
34
    const std::string &osParentName, const std::string &osName,
35
    const std::vector<std::shared_ptr<GDALDimension>> &aoDims,
36
    const GDALExtendedDataType &oType, const std::vector<DtypeElt> &aoDtypeElts,
37
    const std::vector<GUInt64> &anBlockSize, bool bFortranOrder)
38
246
    : GDALAbstractMDArray(osParentName, osName),
39
246
      ZarrArray(poSharedResource, osParentName, osName, aoDims, oType,
40
246
                aoDtypeElts, anBlockSize),
41
246
      m_bFortranOrder(bFortranOrder)
42
246
{
43
246
    m_oCompressorJSon.Deinit();
44
246
}
Unexecuted instantiation: ZarrV2Array::ZarrV2Array(std::__1::shared_ptr<ZarrSharedResource> const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::vector<std::__1::shared_ptr<GDALDimension>, std::__1::allocator<std::__1::shared_ptr<GDALDimension> > > const&, GDALExtendedDataType const&, std::__1::vector<DtypeElt, std::__1::allocator<DtypeElt> > const&, std::__1::vector<unsigned long long, std::__1::allocator<unsigned long long> > const&, bool)
ZarrV2Array::ZarrV2Array(std::__1::shared_ptr<ZarrSharedResource> const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::vector<std::__1::shared_ptr<GDALDimension>, std::__1::allocator<std::__1::shared_ptr<GDALDimension> > > const&, GDALExtendedDataType const&, std::__1::vector<DtypeElt, std::__1::allocator<DtypeElt> > const&, std::__1::vector<unsigned long long, std::__1::allocator<unsigned long long> > const&, bool)
Line
Count
Source
38
246
    : GDALAbstractMDArray(osParentName, osName),
39
246
      ZarrArray(poSharedResource, osParentName, osName, aoDims, oType,
40
246
                aoDtypeElts, anBlockSize),
41
246
      m_bFortranOrder(bFortranOrder)
42
246
{
43
246
    m_oCompressorJSon.Deinit();
44
246
}
45
46
/************************************************************************/
47
/*                         ZarrV2Array::Create()                        */
48
/************************************************************************/
49
50
std::shared_ptr<ZarrV2Array>
51
ZarrV2Array::Create(const std::shared_ptr<ZarrSharedResource> &poSharedResource,
52
                    const std::string &osParentName, const std::string &osName,
53
                    const std::vector<std::shared_ptr<GDALDimension>> &aoDims,
54
                    const GDALExtendedDataType &oType,
55
                    const std::vector<DtypeElt> &aoDtypeElts,
56
                    const std::vector<GUInt64> &anBlockSize, bool bFortranOrder)
57
246
{
58
246
    auto arr = std::shared_ptr<ZarrV2Array>(
59
246
        new ZarrV2Array(poSharedResource, osParentName, osName, aoDims, oType,
60
246
                        aoDtypeElts, anBlockSize, bFortranOrder));
61
246
    if (arr->m_nTotalTileCount == 0)
62
0
        return nullptr;
63
246
    arr->SetSelf(arr);
64
65
246
    return arr;
66
246
}
67
68
/************************************************************************/
69
/*                             ~ZarrV2Array()                           */
70
/************************************************************************/
71
72
ZarrV2Array::~ZarrV2Array()
73
246
{
74
246
    ZarrV2Array::Flush();
75
246
}
76
77
/************************************************************************/
78
/*                                Flush()                               */
79
/************************************************************************/
80
81
bool ZarrV2Array::Flush()
82
1.13k
{
83
1.13k
    if (!m_bValid)
84
0
        return true;
85
86
1.13k
    bool ret = ZarrV2Array::FlushDirtyTile();
87
88
1.13k
    if (m_bDefinitionModified)
89
246
    {
90
246
        if (!Serialize())
91
0
            ret = false;
92
246
        m_bDefinitionModified = false;
93
246
    }
94
95
1.13k
    CPLJSONArray j_ARRAY_DIMENSIONS;
96
1.13k
    bool bDimensionsModified = false;
97
1.13k
    if (!m_aoDims.empty())
98
1.13k
    {
99
1.13k
        for (const auto &poDim : m_aoDims)
100
2.12k
        {
101
2.12k
            const auto poZarrDim =
102
2.12k
                dynamic_cast<const ZarrDimension *>(poDim.get());
103
2.12k
            if (poZarrDim && poZarrDim->IsXArrayDimension())
104
2.12k
            {
105
2.12k
                if (poZarrDim->IsModified())
106
0
                    bDimensionsModified = true;
107
2.12k
                j_ARRAY_DIMENSIONS.Add(poDim->GetName());
108
2.12k
            }
109
0
            else
110
0
            {
111
0
                j_ARRAY_DIMENSIONS = CPLJSONArray();
112
0
                break;
113
0
            }
114
2.12k
        }
115
1.13k
    }
116
117
1.13k
    if (m_oAttrGroup.IsModified() || bDimensionsModified ||
118
900
        (m_bNew && j_ARRAY_DIMENSIONS.Size() != 0) || m_bUnitModified ||
119
754
        m_bOffsetModified || m_bScaleModified || m_bSRSModified)
120
385
    {
121
385
        m_bNew = false;
122
123
385
        auto oAttrs = SerializeSpecialAttributes();
124
125
385
        if (j_ARRAY_DIMENSIONS.Size() != 0)
126
385
        {
127
385
            oAttrs.Delete("_ARRAY_DIMENSIONS");
128
385
            oAttrs.Add("_ARRAY_DIMENSIONS", j_ARRAY_DIMENSIONS);
129
385
        }
130
131
385
        CPLJSONDocument oDoc;
132
385
        oDoc.SetRoot(oAttrs);
133
385
        const std::string osAttrFilename =
134
385
            CPLFormFilenameSafe(CPLGetDirnameSafe(m_osFilename.c_str()).c_str(),
135
385
                                ".zattrs", nullptr);
136
385
        if (!oDoc.Save(osAttrFilename))
137
0
            ret = false;
138
385
        m_poSharedResource->SetZMetadataItem(osAttrFilename, oAttrs);
139
385
    }
140
141
1.13k
    return ret;
142
1.13k
}
143
144
/************************************************************************/
145
/*           StripUselessItemsFromCompressorConfiguration()             */
146
/************************************************************************/
147
148
static void StripUselessItemsFromCompressorConfiguration(CPLJSONObject &o)
149
0
{
150
0
    if (o.GetType() == CPLJSONObject::Type::Object)
151
0
    {
152
0
        o.Delete("num_threads");  // Blosc
153
0
        o.Delete("typesize");     // Blosc
154
0
        o.Delete("header");       // LZ4
155
0
    }
156
0
}
157
158
/************************************************************************/
159
/*                    ZarrV2Array::Serialize()                          */
160
/************************************************************************/
161
162
bool ZarrV2Array::Serialize()
163
246
{
164
246
    CPLJSONDocument oDoc;
165
246
    CPLJSONObject oRoot = oDoc.GetRoot();
166
167
246
    CPLJSONArray oChunks;
168
246
    for (const auto nBlockSize : m_anBlockSize)
169
458
    {
170
458
        oChunks.Add(static_cast<GInt64>(nBlockSize));
171
458
    }
172
246
    oRoot.Add("chunks", oChunks);
173
174
246
    if (m_oCompressorJSon.IsValid())
175
0
    {
176
0
        oRoot.Add("compressor", m_oCompressorJSon);
177
0
        CPLJSONObject compressor = oRoot["compressor"];
178
0
        StripUselessItemsFromCompressorConfiguration(compressor);
179
0
    }
180
246
    else
181
246
    {
182
246
        oRoot.AddNull("compressor");
183
246
    }
184
185
246
    if (m_dtype.GetType() == CPLJSONObject::Type::Object)
186
246
        oRoot.Add("dtype", m_dtype["dummy"]);
187
0
    else
188
0
        oRoot.Add("dtype", m_dtype);
189
190
246
    if (m_pabyNoData == nullptr)
191
245
    {
192
245
        oRoot.AddNull("fill_value");
193
245
    }
194
1
    else
195
1
    {
196
1
        switch (m_oType.GetClass())
197
1
        {
198
1
            case GEDTC_NUMERIC:
199
1
            {
200
1
                SerializeNumericNoData(oRoot);
201
1
                break;
202
0
            }
203
204
0
            case GEDTC_STRING:
205
0
            {
206
0
                char *pszStr;
207
0
                char **ppszStr = reinterpret_cast<char **>(m_pabyNoData);
208
0
                memcpy(&pszStr, ppszStr, sizeof(pszStr));
209
0
                if (pszStr)
210
0
                {
211
0
                    const size_t nNativeSize =
212
0
                        m_aoDtypeElts.back().nativeOffset +
213
0
                        m_aoDtypeElts.back().nativeSize;
214
0
                    char *base64 = CPLBase64Encode(
215
0
                        static_cast<int>(std::min(nNativeSize, strlen(pszStr))),
216
0
                        reinterpret_cast<const GByte *>(pszStr));
217
0
                    oRoot.Add("fill_value", base64);
218
0
                    CPLFree(base64);
219
0
                }
220
0
                else
221
0
                {
222
0
                    oRoot.AddNull("fill_value");
223
0
                }
224
0
                break;
225
0
            }
226
227
0
            case GEDTC_COMPOUND:
228
0
            {
229
0
                const size_t nNativeSize = m_aoDtypeElts.back().nativeOffset +
230
0
                                           m_aoDtypeElts.back().nativeSize;
231
0
                std::vector<GByte> nativeNoData(nNativeSize);
232
0
                EncodeElt(m_aoDtypeElts, m_pabyNoData, &nativeNoData[0]);
233
0
                char *base64 = CPLBase64Encode(static_cast<int>(nNativeSize),
234
0
                                               nativeNoData.data());
235
0
                oRoot.Add("fill_value", base64);
236
0
                CPLFree(base64);
237
0
            }
238
1
        }
239
1
    }
240
241
246
    if (m_oFiltersArray.Size() == 0)
242
246
        oRoot.AddNull("filters");
243
0
    else
244
0
        oRoot.Add("filters", m_oFiltersArray);
245
246
246
    oRoot.Add("order", m_bFortranOrder ? "F" : "C");
247
248
246
    CPLJSONArray oShape;
249
246
    for (const auto &poDim : m_aoDims)
250
458
    {
251
458
        oShape.Add(static_cast<GInt64>(poDim->GetSize()));
252
458
    }
253
246
    oRoot.Add("shape", oShape);
254
255
246
    oRoot.Add("zarr_format", 2);
256
257
246
    if (m_osDimSeparator != ".")
258
0
    {
259
0
        oRoot.Add("dimension_separator", m_osDimSeparator);
260
0
    }
261
262
246
    bool ret = oDoc.Save(m_osFilename);
263
264
246
    m_poSharedResource->SetZMetadataItem(m_osFilename, oRoot);
265
266
246
    return ret;
267
246
}
268
269
/************************************************************************/
270
/*                  ZarrV2Array::NeedDecodedBuffer()                    */
271
/************************************************************************/
272
273
bool ZarrV2Array::NeedDecodedBuffer() const
274
416
{
275
416
    const size_t nSourceSize =
276
416
        m_aoDtypeElts.back().nativeOffset + m_aoDtypeElts.back().nativeSize;
277
416
    if (m_oType.GetClass() == GEDTC_COMPOUND &&
278
0
        nSourceSize != m_oType.GetSize())
279
0
    {
280
0
        return true;
281
0
    }
282
416
    else if (m_oType.GetClass() != GEDTC_STRING)
283
416
    {
284
416
        for (const auto &elt : m_aoDtypeElts)
285
416
        {
286
416
            if (elt.needByteSwapping || elt.gdalTypeIsApproxOfNative ||
287
416
                elt.nativeType == DtypeElt::NativeType::STRING_ASCII ||
288
416
                elt.nativeType == DtypeElt::NativeType::STRING_UNICODE)
289
0
            {
290
0
                return true;
291
0
            }
292
416
        }
293
416
    }
294
416
    return false;
295
416
}
296
297
/************************************************************************/
298
/*               ZarrV2Array::AllocateWorkingBuffers()                  */
299
/************************************************************************/
300
301
bool ZarrV2Array::AllocateWorkingBuffers() const
302
15.7k
{
303
15.7k
    if (m_bAllocateWorkingBuffersDone)
304
15.5k
        return m_bWorkingBuffersOK;
305
306
208
    m_bAllocateWorkingBuffersDone = true;
307
308
208
    size_t nSizeNeeded = m_nTileSize;
309
208
    if (m_bFortranOrder || m_oFiltersArray.Size() != 0)
310
0
    {
311
0
        if (nSizeNeeded > std::numeric_limits<size_t>::max() / 2)
312
0
        {
313
0
            CPLError(CE_Failure, CPLE_AppDefined, "Too large chunk size");
314
0
            return false;
315
0
        }
316
0
        nSizeNeeded *= 2;
317
0
    }
318
208
    if (NeedDecodedBuffer())
319
0
    {
320
0
        size_t nDecodedBufferSize = m_oType.GetSize();
321
0
        for (const auto &nBlockSize : m_anBlockSize)
322
0
        {
323
0
            if (nDecodedBufferSize > std::numeric_limits<size_t>::max() /
324
0
                                         static_cast<size_t>(nBlockSize))
325
0
            {
326
0
                CPLError(CE_Failure, CPLE_AppDefined, "Too large chunk size");
327
0
                return false;
328
0
            }
329
0
            nDecodedBufferSize *= static_cast<size_t>(nBlockSize);
330
0
        }
331
0
        if (nSizeNeeded >
332
0
            std::numeric_limits<size_t>::max() - nDecodedBufferSize)
333
0
        {
334
0
            CPLError(CE_Failure, CPLE_AppDefined, "Too large chunk size");
335
0
            return false;
336
0
        }
337
0
        nSizeNeeded += nDecodedBufferSize;
338
0
    }
339
340
    // Reserve a buffer for tile content
341
208
    if (nSizeNeeded > 1024 * 1024 * 1024 &&
342
0
        !CPLTestBool(CPLGetConfigOption("ZARR_ALLOW_BIG_TILE_SIZE", "NO")))
343
0
    {
344
0
        CPLError(CE_Failure, CPLE_AppDefined,
345
0
                 "Zarr tile allocation would require " CPL_FRMT_GUIB " bytes. "
346
0
                 "By default the driver limits to 1 GB. To allow that memory "
347
0
                 "allocation, set the ZARR_ALLOW_BIG_TILE_SIZE configuration "
348
0
                 "option to YES.",
349
0
                 static_cast<GUIntBig>(nSizeNeeded));
350
0
        return false;
351
0
    }
352
353
208
    m_bWorkingBuffersOK = AllocateWorkingBuffers(
354
208
        m_abyRawTileData, m_abyTmpRawTileData, m_abyDecodedTileData);
355
208
    return m_bWorkingBuffersOK;
356
208
}
357
358
bool ZarrV2Array::AllocateWorkingBuffers(
359
    ZarrByteVectorQuickResize &abyRawTileData,
360
    ZarrByteVectorQuickResize &abyTmpRawTileData,
361
    ZarrByteVectorQuickResize &abyDecodedTileData) const
362
208
{
363
    // This method should NOT modify any ZarrArray member, as it is going to
364
    // be called concurrently from several threads.
365
366
    // Set those #define to avoid accidental use of some global variables
367
208
#define m_abyTmpRawTileData cannot_use_here
368
208
#define m_abyRawTileData cannot_use_here
369
208
#define m_abyDecodedTileData cannot_use_here
370
371
208
    try
372
208
    {
373
208
        abyRawTileData.resize(m_nTileSize);
374
208
        if (m_bFortranOrder || m_oFiltersArray.Size() != 0)
375
0
            abyTmpRawTileData.resize(m_nTileSize);
376
208
    }
377
208
    catch (const std::bad_alloc &e)
378
208
    {
379
0
        CPLError(CE_Failure, CPLE_OutOfMemory, "%s", e.what());
380
0
        return false;
381
0
    }
382
383
208
    if (NeedDecodedBuffer())
384
0
    {
385
0
        size_t nDecodedBufferSize = m_oType.GetSize();
386
0
        for (const auto &nBlockSize : m_anBlockSize)
387
0
        {
388
0
            nDecodedBufferSize *= static_cast<size_t>(nBlockSize);
389
0
        }
390
0
        try
391
0
        {
392
0
            abyDecodedTileData.resize(nDecodedBufferSize);
393
0
        }
394
0
        catch (const std::bad_alloc &e)
395
0
        {
396
0
            CPLError(CE_Failure, CPLE_OutOfMemory, "%s", e.what());
397
0
            return false;
398
0
        }
399
0
    }
400
401
208
    return true;
402
208
#undef m_abyTmpRawTileData
403
208
#undef m_abyRawTileData
404
208
#undef m_abyDecodedTileData
405
208
}
406
407
/************************************************************************/
408
/*                      ZarrV2Array::LoadTileData()                     */
409
/************************************************************************/
410
411
bool ZarrV2Array::LoadTileData(const uint64_t *tileIndices,
412
                               bool &bMissingTileOut) const
413
461k
{
414
461k
    return LoadTileData(tileIndices,
415
461k
                        false,  // use mutex
416
461k
                        m_psDecompressor, m_abyRawTileData, m_abyTmpRawTileData,
417
461k
                        m_abyDecodedTileData, bMissingTileOut);
418
461k
}
419
420
bool ZarrV2Array::LoadTileData(const uint64_t *tileIndices, bool bUseMutex,
421
                               const CPLCompressor *psDecompressor,
422
                               ZarrByteVectorQuickResize &abyRawTileData,
423
                               ZarrByteVectorQuickResize &abyTmpRawTileData,
424
                               ZarrByteVectorQuickResize &abyDecodedTileData,
425
                               bool &bMissingTileOut) const
426
461k
{
427
    // This method should NOT modify any ZarrArray member, as it is going to
428
    // be called concurrently from several threads.
429
430
    // Set those #define to avoid accidental use of some global variables
431
461k
#define m_abyTmpRawTileData cannot_use_here
432
461k
#define m_abyRawTileData cannot_use_here
433
461k
#define m_abyDecodedTileData cannot_use_here
434
461k
#define m_psDecompressor cannot_use_here
435
436
461k
    bMissingTileOut = false;
437
438
461k
    std::string osFilename = BuildTileFilename(tileIndices);
439
440
    // For network file systems, get the streaming version of the filename,
441
    // as we don't need arbitrary seeking in the file
442
461k
    osFilename = VSIFileManager::GetHandler(osFilename.c_str())
443
461k
                     ->GetStreamingFilename(osFilename);
444
445
    // First if we have a tile presence cache, check tile presence from it
446
461k
    bool bEarlyRet;
447
461k
    if (bUseMutex)
448
0
    {
449
0
        std::lock_guard<std::mutex> oLock(m_oMutex);
450
0
        bEarlyRet = IsTileMissingFromCacheInfo(osFilename, tileIndices);
451
0
    }
452
461k
    else
453
461k
    {
454
461k
        bEarlyRet = IsTileMissingFromCacheInfo(osFilename, tileIndices);
455
461k
    }
456
461k
    if (bEarlyRet)
457
0
    {
458
0
        bMissingTileOut = true;
459
0
        return true;
460
0
    }
461
462
461k
    VSILFILE *fp = nullptr;
463
    // This is the number of files returned in a S3 directory listing operation
464
461k
    constexpr uint64_t MAX_TILES_ALLOWED_FOR_DIRECTORY_LISTING = 1000;
465
461k
    const char *const apszOpenOptions[] = {"IGNORE_FILENAME_RESTRICTIONS=YES",
466
461k
                                           nullptr};
467
461k
    const auto nErrorBefore = CPLGetErrorCounter();
468
461k
    if ((m_osDimSeparator == "/" && !m_anBlockSize.empty() &&
469
0
         m_anBlockSize.back() > MAX_TILES_ALLOWED_FOR_DIRECTORY_LISTING) ||
470
461k
        (m_osDimSeparator != "/" &&
471
461k
         m_nTotalTileCount > MAX_TILES_ALLOWED_FOR_DIRECTORY_LISTING))
472
2.25k
    {
473
        // Avoid issuing ReadDir() when a lot of files are expected
474
2.25k
        CPLConfigOptionSetter optionSetter("GDAL_DISABLE_READDIR_ON_OPEN",
475
2.25k
                                           "YES", true);
476
2.25k
        fp = VSIFOpenEx2L(osFilename.c_str(), "rb", 0, apszOpenOptions);
477
2.25k
    }
478
459k
    else
479
459k
    {
480
459k
        fp = VSIFOpenEx2L(osFilename.c_str(), "rb", 0, apszOpenOptions);
481
459k
    }
482
461k
    if (fp == nullptr)
483
262k
    {
484
262k
        if (nErrorBefore != CPLGetErrorCounter())
485
0
        {
486
0
            return false;
487
0
        }
488
262k
        else
489
262k
        {
490
            // Missing files are OK and indicate nodata_value
491
262k
            CPLDebugOnly(ZARR_DEBUG_KEY, "Tile %s missing (=nodata)",
492
262k
                         osFilename.c_str());
493
262k
            bMissingTileOut = true;
494
262k
            return true;
495
262k
        }
496
262k
    }
497
498
199k
    bMissingTileOut = false;
499
199k
    bool bRet = true;
500
199k
    size_t nRawDataSize = abyRawTileData.size();
501
199k
    if (psDecompressor == nullptr)
502
199k
    {
503
199k
        nRawDataSize = VSIFReadL(&abyRawTileData[0], 1, nRawDataSize, fp);
504
199k
    }
505
0
    else
506
0
    {
507
0
        VSIFSeekL(fp, 0, SEEK_END);
508
0
        const auto nSize = VSIFTellL(fp);
509
0
        VSIFSeekL(fp, 0, SEEK_SET);
510
0
        if (nSize > static_cast<vsi_l_offset>(std::numeric_limits<int>::max()))
511
0
        {
512
0
            CPLError(CE_Failure, CPLE_AppDefined, "Too large tile %s",
513
0
                     osFilename.c_str());
514
0
            bRet = false;
515
0
        }
516
0
        else
517
0
        {
518
0
            ZarrByteVectorQuickResize abyCompressedData;
519
0
            try
520
0
            {
521
0
                abyCompressedData.resize(static_cast<size_t>(nSize));
522
0
            }
523
0
            catch (const std::exception &)
524
0
            {
525
0
                CPLError(CE_Failure, CPLE_OutOfMemory,
526
0
                         "Cannot allocate memory for tile %s",
527
0
                         osFilename.c_str());
528
0
                bRet = false;
529
0
            }
530
531
0
            if (bRet &&
532
0
                (abyCompressedData.empty() ||
533
0
                 VSIFReadL(&abyCompressedData[0], 1, abyCompressedData.size(),
534
0
                           fp) != abyCompressedData.size()))
535
0
            {
536
0
                CPLError(CE_Failure, CPLE_AppDefined,
537
0
                         "Could not read tile %s correctly",
538
0
                         osFilename.c_str());
539
0
                bRet = false;
540
0
            }
541
0
            else
542
0
            {
543
0
                void *out_buffer = &abyRawTileData[0];
544
0
                if (!psDecompressor->pfnFunc(
545
0
                        abyCompressedData.data(), abyCompressedData.size(),
546
0
                        &out_buffer, &nRawDataSize, nullptr,
547
0
                        psDecompressor->user_data))
548
0
                {
549
0
                    CPLError(CE_Failure, CPLE_AppDefined,
550
0
                             "Decompression of tile %s failed",
551
0
                             osFilename.c_str());
552
0
                    bRet = false;
553
0
                }
554
0
            }
555
0
        }
556
0
    }
557
199k
    VSIFCloseL(fp);
558
199k
    if (!bRet)
559
0
        return false;
560
561
199k
    for (int i = m_oFiltersArray.Size(); i > 0;)
562
0
    {
563
0
        --i;
564
0
        const auto &oFilter = m_oFiltersArray[i];
565
0
        const auto osFilterId = oFilter["id"].ToString();
566
0
        const auto psFilterDecompressor =
567
0
            EQUAL(osFilterId.c_str(), "shuffle") ? ZarrGetShuffleDecompressor()
568
0
            : EQUAL(osFilterId.c_str(), "quantize")
569
0
                ? ZarrGetQuantizeDecompressor()
570
0
            : EQUAL(osFilterId.c_str(), "fixedscaleoffset")
571
0
                ? ZarrGetFixedScaleOffsetDecompressor()
572
0
                : CPLGetDecompressor(osFilterId.c_str());
573
0
        CPLAssert(psFilterDecompressor);
574
575
0
        CPLStringList aosOptions;
576
0
        for (const auto &obj : oFilter.GetChildren())
577
0
        {
578
0
            aosOptions.SetNameValue(obj.GetName().c_str(),
579
0
                                    obj.ToString().c_str());
580
0
        }
581
0
        void *out_buffer = &abyTmpRawTileData[0];
582
0
        size_t nOutSize = abyTmpRawTileData.size();
583
0
        if (!psFilterDecompressor->pfnFunc(
584
0
                abyRawTileData.data(), nRawDataSize, &out_buffer, &nOutSize,
585
0
                aosOptions.List(), psFilterDecompressor->user_data))
586
0
        {
587
0
            CPLError(CE_Failure, CPLE_AppDefined,
588
0
                     "Filter %s for tile %s failed", osFilterId.c_str(),
589
0
                     osFilename.c_str());
590
0
            return false;
591
0
        }
592
593
0
        nRawDataSize = nOutSize;
594
0
        std::swap(abyRawTileData, abyTmpRawTileData);
595
0
    }
596
199k
    if (nRawDataSize != abyRawTileData.size())
597
0
    {
598
0
        CPLError(CE_Failure, CPLE_AppDefined,
599
0
                 "Decompressed tile %s has not expected size after filters",
600
0
                 osFilename.c_str());
601
0
        return false;
602
0
    }
603
604
199k
    if (m_bFortranOrder && !m_aoDims.empty())
605
0
    {
606
0
        BlockTranspose(abyRawTileData, abyTmpRawTileData, true);
607
0
        std::swap(abyRawTileData, abyTmpRawTileData);
608
0
    }
609
610
199k
    if (!abyDecodedTileData.empty())
611
0
    {
612
0
        const size_t nSourceSize =
613
0
            m_aoDtypeElts.back().nativeOffset + m_aoDtypeElts.back().nativeSize;
614
0
        const auto nDTSize = m_oType.GetSize();
615
0
        const size_t nValues = abyDecodedTileData.size() / nDTSize;
616
0
        const GByte *pSrc = abyRawTileData.data();
617
0
        GByte *pDst = &abyDecodedTileData[0];
618
0
        for (size_t i = 0; i < nValues;
619
0
             i++, pSrc += nSourceSize, pDst += nDTSize)
620
0
        {
621
0
            DecodeSourceElt(m_aoDtypeElts, pSrc, pDst);
622
0
        }
623
0
    }
624
625
199k
    return true;
626
627
199k
#undef m_abyTmpRawTileData
628
199k
#undef m_abyRawTileData
629
199k
#undef m_abyDecodedTileData
630
199k
#undef m_psDecompressor
631
199k
}
632
633
/************************************************************************/
634
/*                      ZarrV2Array::IAdviseRead()                      */
635
/************************************************************************/
636
637
bool ZarrV2Array::IAdviseRead(const GUInt64 *arrayStartIdx, const size_t *count,
638
                              CSLConstList papszOptions) const
639
0
{
640
0
    std::vector<uint64_t> anIndicesCur;
641
0
    int nThreadsMax = 0;
642
0
    std::vector<uint64_t> anReqTilesIndices;
643
0
    size_t nReqTiles = 0;
644
0
    if (!IAdviseReadCommon(arrayStartIdx, count, papszOptions, anIndicesCur,
645
0
                           nThreadsMax, anReqTilesIndices, nReqTiles))
646
0
    {
647
0
        return false;
648
0
    }
649
0
    if (nThreadsMax <= 1)
650
0
    {
651
0
        return true;
652
0
    }
653
654
0
    const int nThreads =
655
0
        static_cast<int>(std::min(static_cast<size_t>(nThreadsMax), nReqTiles));
656
657
0
    CPLWorkerThreadPool *wtp = GDALGetGlobalThreadPool(nThreadsMax);
658
0
    if (wtp == nullptr)
659
0
        return false;
660
661
0
    struct JobStruct
662
0
    {
663
0
        JobStruct() = default;
664
665
0
        JobStruct(const JobStruct &) = delete;
666
0
        JobStruct &operator=(const JobStruct &) = delete;
667
668
0
        JobStruct(JobStruct &&) = default;
669
0
        JobStruct &operator=(JobStruct &&) = default;
670
671
0
        const ZarrV2Array *poArray = nullptr;
672
0
        bool *pbGlobalStatus = nullptr;
673
0
        int *pnRemainingThreads = nullptr;
674
0
        const std::vector<uint64_t> *panReqTilesIndices = nullptr;
675
0
        size_t nFirstIdx = 0;
676
0
        size_t nLastIdxNotIncluded = 0;
677
0
    };
678
679
0
    std::vector<JobStruct> asJobStructs;
680
681
0
    bool bGlobalStatus = true;
682
0
    int nRemainingThreads = nThreads;
683
    // Check for very highly overflow in below loop
684
0
    assert(static_cast<size_t>(nThreads) <
685
0
           std::numeric_limits<size_t>::max() / nReqTiles);
686
687
    // Setup jobs
688
0
    for (int i = 0; i < nThreads; i++)
689
0
    {
690
0
        JobStruct jobStruct;
691
0
        jobStruct.poArray = this;
692
0
        jobStruct.pbGlobalStatus = &bGlobalStatus;
693
0
        jobStruct.pnRemainingThreads = &nRemainingThreads;
694
0
        jobStruct.panReqTilesIndices = &anReqTilesIndices;
695
0
        jobStruct.nFirstIdx = static_cast<size_t>(i * nReqTiles / nThreads);
696
0
        jobStruct.nLastIdxNotIncluded = std::min(
697
0
            static_cast<size_t>((i + 1) * nReqTiles / nThreads), nReqTiles);
698
0
        asJobStructs.emplace_back(std::move(jobStruct));
699
0
    }
700
701
0
    const auto JobFunc = [](void *pThreadData)
702
0
    {
703
0
        const JobStruct *jobStruct =
704
0
            static_cast<const JobStruct *>(pThreadData);
705
706
0
        const auto poArray = jobStruct->poArray;
707
0
        const auto &aoDims = poArray->GetDimensions();
708
0
        const size_t l_nDims = poArray->GetDimensionCount();
709
0
        ZarrByteVectorQuickResize abyRawTileData;
710
0
        ZarrByteVectorQuickResize abyDecodedTileData;
711
0
        ZarrByteVectorQuickResize abyTmpRawTileData;
712
0
        const CPLCompressor *psDecompressor =
713
0
            CPLGetDecompressor(poArray->m_osDecompressorId.c_str());
714
715
0
        for (size_t iReq = jobStruct->nFirstIdx;
716
0
             iReq < jobStruct->nLastIdxNotIncluded; ++iReq)
717
0
        {
718
            // Check if we must early exit
719
0
            {
720
0
                std::lock_guard<std::mutex> oLock(poArray->m_oMutex);
721
0
                if (!(*jobStruct->pbGlobalStatus))
722
0
                    return;
723
0
            }
724
725
0
            const uint64_t *tileIndices =
726
0
                jobStruct->panReqTilesIndices->data() + iReq * l_nDims;
727
728
0
            uint64_t nTileIdx = 0;
729
0
            for (size_t j = 0; j < l_nDims; ++j)
730
0
            {
731
0
                if (j > 0)
732
0
                    nTileIdx *= aoDims[j - 1]->GetSize();
733
0
                nTileIdx += tileIndices[j];
734
0
            }
735
736
0
            if (!poArray->AllocateWorkingBuffers(
737
0
                    abyRawTileData, abyTmpRawTileData, abyDecodedTileData))
738
0
            {
739
0
                std::lock_guard<std::mutex> oLock(poArray->m_oMutex);
740
0
                *jobStruct->pbGlobalStatus = false;
741
0
                break;
742
0
            }
743
744
0
            bool bIsEmpty = false;
745
0
            bool success = poArray->LoadTileData(tileIndices,
746
0
                                                 true,  // use mutex
747
0
                                                 psDecompressor, abyRawTileData,
748
0
                                                 abyTmpRawTileData,
749
0
                                                 abyDecodedTileData, bIsEmpty);
750
751
0
            std::lock_guard<std::mutex> oLock(poArray->m_oMutex);
752
0
            if (!success)
753
0
            {
754
0
                *jobStruct->pbGlobalStatus = false;
755
0
                break;
756
0
            }
757
758
0
            CachedTile cachedTile;
759
0
            if (!bIsEmpty)
760
0
            {
761
0
                if (!abyDecodedTileData.empty())
762
0
                    std::swap(cachedTile.abyDecoded, abyDecodedTileData);
763
0
                else
764
0
                    std::swap(cachedTile.abyDecoded, abyRawTileData);
765
0
            }
766
0
            poArray->m_oMapTileIndexToCachedTile[nTileIdx] =
767
0
                std::move(cachedTile);
768
0
        }
769
770
0
        std::lock_guard<std::mutex> oLock(poArray->m_oMutex);
771
0
        (*jobStruct->pnRemainingThreads)--;
772
0
    };
773
774
    // Start jobs
775
0
    for (int i = 0; i < nThreads; i++)
776
0
    {
777
0
        if (!wtp->SubmitJob(JobFunc, &asJobStructs[i]))
778
0
        {
779
0
            std::lock_guard<std::mutex> oLock(m_oMutex);
780
0
            bGlobalStatus = false;
781
0
            nRemainingThreads = i;
782
0
            break;
783
0
        }
784
0
    }
785
786
    // Wait for all jobs to be finished
787
0
    while (true)
788
0
    {
789
0
        {
790
0
            std::lock_guard<std::mutex> oLock(m_oMutex);
791
0
            if (nRemainingThreads == 0)
792
0
                break;
793
0
        }
794
0
        wtp->WaitEvent();
795
0
    }
796
797
0
    return bGlobalStatus;
798
0
}
799
800
/************************************************************************/
801
/*                    ZarrV2Array::FlushDirtyTile()                     */
802
/************************************************************************/
803
804
bool ZarrV2Array::FlushDirtyTile() const
805
476k
{
806
476k
    if (!m_bDirtyTile)
807
1.13k
        return true;
808
475k
    m_bDirtyTile = false;
809
810
475k
    std::string osFilename = BuildTileFilename(m_anCachedTiledIndices.data());
811
812
475k
    const size_t nSourceSize =
813
475k
        m_aoDtypeElts.back().nativeOffset + m_aoDtypeElts.back().nativeSize;
814
475k
    const auto &abyTile =
815
475k
        m_abyDecodedTileData.empty() ? m_abyRawTileData : m_abyDecodedTileData;
816
817
475k
    if (IsEmptyTile(abyTile))
818
258k
    {
819
258k
        m_bCachedTiledEmpty = true;
820
821
258k
        VSIStatBufL sStat;
822
258k
        if (VSIStatL(osFilename.c_str(), &sStat) == 0)
823
0
        {
824
0
            CPLDebugOnly(ZARR_DEBUG_KEY,
825
0
                         "Deleting tile %s that has now empty content",
826
0
                         osFilename.c_str());
827
0
            return VSIUnlink(osFilename.c_str()) == 0;
828
0
        }
829
258k
        return true;
830
258k
    }
831
832
217k
    if (!m_abyDecodedTileData.empty())
833
0
    {
834
0
        const size_t nDTSize = m_oType.GetSize();
835
0
        const size_t nValues = m_abyDecodedTileData.size() / nDTSize;
836
0
        GByte *pDst = &m_abyRawTileData[0];
837
0
        const GByte *pSrc = m_abyDecodedTileData.data();
838
0
        for (size_t i = 0; i < nValues;
839
0
             i++, pDst += nSourceSize, pSrc += nDTSize)
840
0
        {
841
0
            EncodeElt(m_aoDtypeElts, pSrc, pDst);
842
0
        }
843
0
    }
844
845
217k
    if (m_bFortranOrder && !m_aoDims.empty())
846
0
    {
847
0
        BlockTranspose(m_abyRawTileData, m_abyTmpRawTileData, false);
848
0
        std::swap(m_abyRawTileData, m_abyTmpRawTileData);
849
0
    }
850
851
217k
    size_t nRawDataSize = m_abyRawTileData.size();
852
217k
    for (const auto &oFilter : m_oFiltersArray)
853
0
    {
854
0
        const auto osFilterId = oFilter["id"].ToString();
855
0
        if (osFilterId == "quantize" || osFilterId == "fixedscaleoffset")
856
0
        {
857
0
            CPLError(CE_Failure, CPLE_NotSupported,
858
0
                     "%s filter not supported for writing", osFilterId.c_str());
859
0
            return false;
860
0
        }
861
0
        const auto psFilterCompressor =
862
0
            EQUAL(osFilterId.c_str(), "shuffle")
863
0
                ? ZarrGetShuffleCompressor()
864
0
                : CPLGetCompressor(osFilterId.c_str());
865
0
        CPLAssert(psFilterCompressor);
866
867
0
        CPLStringList aosOptions;
868
0
        for (const auto &obj : oFilter.GetChildren())
869
0
        {
870
0
            aosOptions.SetNameValue(obj.GetName().c_str(),
871
0
                                    obj.ToString().c_str());
872
0
        }
873
0
        void *out_buffer = &m_abyTmpRawTileData[0];
874
0
        size_t nOutSize = m_abyTmpRawTileData.size();
875
0
        if (!psFilterCompressor->pfnFunc(
876
0
                m_abyRawTileData.data(), nRawDataSize, &out_buffer, &nOutSize,
877
0
                aosOptions.List(), psFilterCompressor->user_data))
878
0
        {
879
0
            CPLError(CE_Failure, CPLE_AppDefined,
880
0
                     "Filter %s for tile %s failed", osFilterId.c_str(),
881
0
                     osFilename.c_str());
882
0
            return false;
883
0
        }
884
885
0
        nRawDataSize = nOutSize;
886
0
        std::swap(m_abyRawTileData, m_abyTmpRawTileData);
887
0
    }
888
889
217k
    if (m_osDimSeparator == "/")
890
0
    {
891
0
        std::string osDir = CPLGetDirnameSafe(osFilename.c_str());
892
0
        VSIStatBufL sStat;
893
0
        if (VSIStatL(osDir.c_str(), &sStat) != 0)
894
0
        {
895
0
            if (VSIMkdirRecursive(osDir.c_str(), 0755) != 0)
896
0
            {
897
0
                CPLError(CE_Failure, CPLE_AppDefined,
898
0
                         "Cannot create directory %s", osDir.c_str());
899
0
                return false;
900
0
            }
901
0
        }
902
0
    }
903
904
217k
    if (m_psCompressor == nullptr && m_psDecompressor != nullptr)
905
0
    {
906
        // Case of imagecodecs_tiff
907
908
0
        CPLError(CE_Failure, CPLE_NotSupported,
909
0
                 "Only decompression supported for '%s' compression method",
910
0
                 m_osDecompressorId.c_str());
911
0
        return false;
912
0
    }
913
914
217k
    VSILFILE *fp = VSIFOpenL(osFilename.c_str(), "wb");
915
217k
    if (fp == nullptr)
916
0
    {
917
0
        CPLError(CE_Failure, CPLE_AppDefined, "Cannot create tile %s",
918
0
                 osFilename.c_str());
919
0
        return false;
920
0
    }
921
922
217k
    bool bRet = true;
923
217k
    if (m_psCompressor == nullptr)
924
217k
    {
925
217k
        if (VSIFWriteL(m_abyRawTileData.data(), 1, nRawDataSize, fp) !=
926
217k
            nRawDataSize)
927
0
        {
928
0
            CPLError(CE_Failure, CPLE_AppDefined,
929
0
                     "Could not write tile %s correctly", osFilename.c_str());
930
0
            bRet = false;
931
0
        }
932
217k
    }
933
0
    else
934
0
    {
935
0
        std::vector<GByte> abyCompressedData;
936
0
        try
937
0
        {
938
0
            constexpr size_t MIN_BUF_SIZE = 64;  // somewhat arbitrary
939
0
            abyCompressedData.resize(static_cast<size_t>(
940
0
                MIN_BUF_SIZE + nRawDataSize + nRawDataSize / 3));
941
0
        }
942
0
        catch (const std::exception &)
943
0
        {
944
0
            CPLError(CE_Failure, CPLE_OutOfMemory,
945
0
                     "Cannot allocate memory for tile %s", osFilename.c_str());
946
0
            bRet = false;
947
0
        }
948
949
0
        if (bRet)
950
0
        {
951
0
            void *out_buffer = &abyCompressedData[0];
952
0
            size_t out_size = abyCompressedData.size();
953
0
            CPLStringList aosOptions;
954
0
            const auto &compressorConfig = m_oCompressorJSon;
955
0
            for (const auto &obj : compressorConfig.GetChildren())
956
0
            {
957
0
                aosOptions.SetNameValue(obj.GetName().c_str(),
958
0
                                        obj.ToString().c_str());
959
0
            }
960
0
            if (EQUAL(m_psCompressor->pszId, "blosc") &&
961
0
                m_oType.GetClass() == GEDTC_NUMERIC)
962
0
            {
963
0
                aosOptions.SetNameValue(
964
0
                    "TYPESIZE",
965
0
                    CPLSPrintf("%d", GDALGetDataTypeSizeBytes(
966
0
                                         GDALGetNonComplexDataType(
967
0
                                             m_oType.GetNumericDataType()))));
968
0
            }
969
970
0
            if (!m_psCompressor->pfnFunc(
971
0
                    m_abyRawTileData.data(), nRawDataSize, &out_buffer,
972
0
                    &out_size, aosOptions.List(), m_psCompressor->user_data))
973
0
            {
974
0
                CPLError(CE_Failure, CPLE_AppDefined,
975
0
                         "Compression of tile %s failed", osFilename.c_str());
976
0
                bRet = false;
977
0
            }
978
0
            abyCompressedData.resize(out_size);
979
0
        }
980
981
0
        if (bRet &&
982
0
            VSIFWriteL(abyCompressedData.data(), 1, abyCompressedData.size(),
983
0
                       fp) != abyCompressedData.size())
984
0
        {
985
0
            CPLError(CE_Failure, CPLE_AppDefined,
986
0
                     "Could not write tile %s correctly", osFilename.c_str());
987
0
            bRet = false;
988
0
        }
989
0
    }
990
217k
    VSIFCloseL(fp);
991
992
217k
    return bRet;
993
217k
}
994
995
/************************************************************************/
996
/*                          BuildTileFilename()                         */
997
/************************************************************************/
998
999
std::string ZarrV2Array::BuildTileFilename(const uint64_t *tileIndices) const
1000
937k
{
1001
937k
    std::string osFilename;
1002
937k
    if (m_aoDims.empty())
1003
0
    {
1004
0
        osFilename = "0";
1005
0
    }
1006
937k
    else
1007
937k
    {
1008
3.74M
        for (size_t i = 0; i < m_aoDims.size(); ++i)
1009
2.81M
        {
1010
2.81M
            if (!osFilename.empty())
1011
1.87M
                osFilename += m_osDimSeparator;
1012
2.81M
            osFilename += std::to_string(tileIndices[i]);
1013
2.81M
        }
1014
937k
    }
1015
1016
937k
    return CPLFormFilenameSafe(CPLGetDirnameSafe(m_osFilename.c_str()).c_str(),
1017
937k
                               osFilename.c_str(), nullptr);
1018
937k
}
1019
1020
/************************************************************************/
1021
/*                          GetDataDirectory()                          */
1022
/************************************************************************/
1023
1024
std::string ZarrV2Array::GetDataDirectory() const
1025
0
{
1026
0
    return CPLGetDirnameSafe(m_osFilename.c_str());
1027
0
}
1028
1029
/************************************************************************/
1030
/*                        GetTileIndicesFromFilename()                  */
1031
/************************************************************************/
1032
1033
CPLStringList
1034
ZarrV2Array::GetTileIndicesFromFilename(const char *pszFilename) const
1035
0
{
1036
0
    return CPLStringList(
1037
0
        CSLTokenizeString2(pszFilename, m_osDimSeparator.c_str(), 0));
1038
0
}
1039
1040
/************************************************************************/
1041
/*                             ParseDtype()                             */
1042
/************************************************************************/
1043
1044
static size_t GetAlignment(const CPLJSONObject &obj)
1045
0
{
1046
0
    if (obj.GetType() == CPLJSONObject::Type::String)
1047
0
    {
1048
0
        const auto str = obj.ToString();
1049
0
        if (str.size() < 3)
1050
0
            return 1;
1051
0
        const char chType = str[1];
1052
0
        const int nBytes = atoi(str.c_str() + 2);
1053
0
        if (chType == 'S')
1054
0
            return sizeof(char *);
1055
0
        if (chType == 'c' && nBytes == 8)
1056
0
            return sizeof(float);
1057
0
        if (chType == 'c' && nBytes == 16)
1058
0
            return sizeof(double);
1059
0
        return nBytes;
1060
0
    }
1061
0
    else if (obj.GetType() == CPLJSONObject::Type::Array)
1062
0
    {
1063
0
        const auto oArray = obj.ToArray();
1064
0
        size_t nAlignment = 1;
1065
0
        for (const auto &oElt : oArray)
1066
0
        {
1067
0
            const auto oEltArray = oElt.ToArray();
1068
0
            if (!oEltArray.IsValid() || oEltArray.Size() != 2 ||
1069
0
                oEltArray[0].GetType() != CPLJSONObject::Type::String)
1070
0
            {
1071
0
                return 1;
1072
0
            }
1073
0
            nAlignment = std::max(nAlignment, GetAlignment(oEltArray[1]));
1074
0
            if (nAlignment == sizeof(void *))
1075
0
                break;
1076
0
        }
1077
0
        return nAlignment;
1078
0
    }
1079
0
    return 1;
1080
0
}
1081
1082
static GDALExtendedDataType ParseDtype(const CPLJSONObject &obj,
1083
                                       std::vector<DtypeElt> &elts)
1084
0
{
1085
0
    const auto AlignOffsetOn = [](size_t offset, size_t alignment)
1086
0
    { return offset + (alignment - (offset % alignment)) % alignment; };
1087
1088
0
    do
1089
0
    {
1090
0
        if (obj.GetType() == CPLJSONObject::Type::String)
1091
0
        {
1092
0
            const auto str = obj.ToString();
1093
0
            char chEndianness = 0;
1094
0
            char chType;
1095
0
            int nBytes;
1096
0
            DtypeElt elt;
1097
0
            if (str.size() < 3)
1098
0
                break;
1099
0
            chEndianness = str[0];
1100
0
            chType = str[1];
1101
0
            nBytes = atoi(str.c_str() + 2);
1102
0
            if (nBytes <= 0 || nBytes >= 1000)
1103
0
                break;
1104
1105
0
            elt.needByteSwapping = false;
1106
0
            if ((nBytes > 1 && chType != 'S') || chType == 'U')
1107
0
            {
1108
0
                if (chEndianness == '<')
1109
0
                    elt.needByteSwapping = (CPL_IS_LSB == 0);
1110
0
                else if (chEndianness == '>')
1111
0
                    elt.needByteSwapping = (CPL_IS_LSB != 0);
1112
0
            }
1113
1114
0
            GDALDataType eDT;
1115
0
            if (!elts.empty())
1116
0
            {
1117
0
                elt.nativeOffset =
1118
0
                    elts.back().nativeOffset + elts.back().nativeSize;
1119
0
            }
1120
0
            elt.nativeSize = nBytes;
1121
0
            if (chType == 'b' && nBytes == 1)  // boolean
1122
0
            {
1123
0
                elt.nativeType = DtypeElt::NativeType::BOOLEAN;
1124
0
                eDT = GDT_UInt8;
1125
0
            }
1126
0
            else if (chType == 'u' && nBytes == 1)
1127
0
            {
1128
0
                elt.nativeType = DtypeElt::NativeType::UNSIGNED_INT;
1129
0
                eDT = GDT_UInt8;
1130
0
            }
1131
0
            else if (chType == 'i' && nBytes == 1)
1132
0
            {
1133
0
                elt.nativeType = DtypeElt::NativeType::SIGNED_INT;
1134
0
                eDT = GDT_Int8;
1135
0
            }
1136
0
            else if (chType == 'i' && nBytes == 2)
1137
0
            {
1138
0
                elt.nativeType = DtypeElt::NativeType::SIGNED_INT;
1139
0
                eDT = GDT_Int16;
1140
0
            }
1141
0
            else if (chType == 'i' && nBytes == 4)
1142
0
            {
1143
0
                elt.nativeType = DtypeElt::NativeType::SIGNED_INT;
1144
0
                eDT = GDT_Int32;
1145
0
            }
1146
0
            else if (chType == 'i' && nBytes == 8)
1147
0
            {
1148
0
                elt.nativeType = DtypeElt::NativeType::SIGNED_INT;
1149
0
                eDT = GDT_Int64;
1150
0
            }
1151
0
            else if (chType == 'u' && nBytes == 2)
1152
0
            {
1153
0
                elt.nativeType = DtypeElt::NativeType::UNSIGNED_INT;
1154
0
                eDT = GDT_UInt16;
1155
0
            }
1156
0
            else if (chType == 'u' && nBytes == 4)
1157
0
            {
1158
0
                elt.nativeType = DtypeElt::NativeType::UNSIGNED_INT;
1159
0
                eDT = GDT_UInt32;
1160
0
            }
1161
0
            else if (chType == 'u' && nBytes == 8)
1162
0
            {
1163
0
                elt.nativeType = DtypeElt::NativeType::UNSIGNED_INT;
1164
0
                eDT = GDT_UInt64;
1165
0
            }
1166
0
            else if (chType == 'f' && nBytes == 2)
1167
0
            {
1168
                // elt.nativeType = DtypeElt::NativeType::IEEEFP;
1169
                // elt.gdalTypeIsApproxOfNative = true;
1170
                // eDT = GDT_Float32;
1171
0
                elt.nativeType = DtypeElt::NativeType::IEEEFP;
1172
0
                eDT = GDT_Float16;
1173
0
            }
1174
0
            else if (chType == 'f' && nBytes == 4)
1175
0
            {
1176
0
                elt.nativeType = DtypeElt::NativeType::IEEEFP;
1177
0
                eDT = GDT_Float32;
1178
0
            }
1179
0
            else if (chType == 'f' && nBytes == 8)
1180
0
            {
1181
0
                elt.nativeType = DtypeElt::NativeType::IEEEFP;
1182
0
                eDT = GDT_Float64;
1183
0
            }
1184
0
            else if (chType == 'c' && nBytes == 8)
1185
0
            {
1186
0
                elt.nativeType = DtypeElt::NativeType::COMPLEX_IEEEFP;
1187
0
                eDT = GDT_CFloat32;
1188
0
            }
1189
0
            else if (chType == 'c' && nBytes == 16)
1190
0
            {
1191
0
                elt.nativeType = DtypeElt::NativeType::COMPLEX_IEEEFP;
1192
0
                eDT = GDT_CFloat64;
1193
0
            }
1194
0
            else if (chType == 'S')
1195
0
            {
1196
0
                elt.nativeType = DtypeElt::NativeType::STRING_ASCII;
1197
0
                elt.gdalType = GDALExtendedDataType::CreateString(nBytes);
1198
0
                elt.gdalSize = elt.gdalType.GetSize();
1199
0
                elts.emplace_back(elt);
1200
0
                return GDALExtendedDataType::CreateString(nBytes);
1201
0
            }
1202
0
            else if (chType == 'U')
1203
0
            {
1204
0
                elt.nativeType = DtypeElt::NativeType::STRING_UNICODE;
1205
                // the dtype declaration is number of UCS4 characters. Store it
1206
                // as bytes
1207
0
                elt.nativeSize *= 4;
1208
                // We can really map UCS4 size to UTF-8
1209
0
                elt.gdalType = GDALExtendedDataType::CreateString();
1210
0
                elt.gdalSize = elt.gdalType.GetSize();
1211
0
                elts.emplace_back(elt);
1212
0
                return GDALExtendedDataType::CreateString();
1213
0
            }
1214
0
            else
1215
0
                break;
1216
0
            elt.gdalType = GDALExtendedDataType::Create(eDT);
1217
0
            elt.gdalSize = elt.gdalType.GetSize();
1218
0
            elts.emplace_back(elt);
1219
0
            return GDALExtendedDataType::Create(eDT);
1220
0
        }
1221
0
        else if (obj.GetType() == CPLJSONObject::Type::Array)
1222
0
        {
1223
0
            bool error = false;
1224
0
            const auto oArray = obj.ToArray();
1225
0
            std::vector<std::unique_ptr<GDALEDTComponent>> comps;
1226
0
            size_t offset = 0;
1227
0
            size_t alignmentMax = 1;
1228
0
            for (const auto &oElt : oArray)
1229
0
            {
1230
0
                const auto oEltArray = oElt.ToArray();
1231
0
                if (!oEltArray.IsValid() || oEltArray.Size() != 2 ||
1232
0
                    oEltArray[0].GetType() != CPLJSONObject::Type::String)
1233
0
                {
1234
0
                    error = true;
1235
0
                    break;
1236
0
                }
1237
0
                GDALExtendedDataType subDT = ParseDtype(oEltArray[1], elts);
1238
0
                if (subDT.GetClass() == GEDTC_NUMERIC &&
1239
0
                    subDT.GetNumericDataType() == GDT_Unknown)
1240
0
                {
1241
0
                    error = true;
1242
0
                    break;
1243
0
                }
1244
1245
0
                const std::string osName = oEltArray[0].ToString();
1246
                // Add padding for alignment
1247
0
                const size_t alignmentSub = GetAlignment(oEltArray[1]);
1248
0
                assert(alignmentSub);
1249
0
                alignmentMax = std::max(alignmentMax, alignmentSub);
1250
0
                offset = AlignOffsetOn(offset, alignmentSub);
1251
0
                comps.emplace_back(std::unique_ptr<GDALEDTComponent>(
1252
0
                    new GDALEDTComponent(osName, offset, subDT)));
1253
0
                offset += subDT.GetSize();
1254
0
            }
1255
0
            if (error)
1256
0
                break;
1257
0
            size_t nTotalSize = offset;
1258
0
            nTotalSize = AlignOffsetOn(nTotalSize, alignmentMax);
1259
0
            return GDALExtendedDataType::Create(obj.ToString(), nTotalSize,
1260
0
                                                std::move(comps));
1261
0
        }
1262
0
    } while (false);
1263
0
    CPLError(CE_Failure, CPLE_AppDefined,
1264
0
             "Invalid or unsupported format for dtype: %s",
1265
0
             obj.ToString().c_str());
1266
0
    return GDALExtendedDataType::Create(GDT_Unknown);
1267
0
}
1268
1269
static void SetGDALOffset(const GDALExtendedDataType &dt,
1270
                          const size_t nBaseOffset, std::vector<DtypeElt> &elts,
1271
                          size_t &iCurElt)
1272
0
{
1273
0
    if (dt.GetClass() == GEDTC_COMPOUND)
1274
0
    {
1275
0
        const auto &comps = dt.GetComponents();
1276
0
        for (const auto &comp : comps)
1277
0
        {
1278
0
            const size_t nBaseOffsetSub = nBaseOffset + comp->GetOffset();
1279
0
            SetGDALOffset(comp->GetType(), nBaseOffsetSub, elts, iCurElt);
1280
0
        }
1281
0
    }
1282
0
    else
1283
0
    {
1284
0
        elts[iCurElt].gdalOffset = nBaseOffset;
1285
0
        iCurElt++;
1286
0
    }
1287
0
}
1288
1289
/************************************************************************/
1290
/*                     ZarrV2Group::LoadArray()                         */
1291
/************************************************************************/
1292
1293
std::shared_ptr<ZarrArray>
1294
ZarrV2Group::LoadArray(const std::string &osArrayName,
1295
                       const std::string &osZarrayFilename,
1296
                       const CPLJSONObject &oRoot, bool bLoadedFromZMetadata,
1297
                       const CPLJSONObject &oAttributesIn) const
1298
0
{
1299
    // Add osZarrayFilename to m_poSharedResource during the scope
1300
    // of this function call.
1301
0
    ZarrSharedResource::SetFilenameAdder filenameAdder(m_poSharedResource,
1302
0
                                                       osZarrayFilename);
1303
0
    if (!filenameAdder.ok())
1304
0
        return nullptr;
1305
1306
0
    const auto osFormat = oRoot["zarr_format"].ToString();
1307
0
    if (osFormat != "2")
1308
0
    {
1309
0
        CPLError(CE_Failure, CPLE_NotSupported,
1310
0
                 "Invalid value for zarr_format: %s", osFormat.c_str());
1311
0
        return nullptr;
1312
0
    }
1313
1314
0
    bool bFortranOrder = false;
1315
0
    const char *orderKey = "order";
1316
0
    const auto osOrder = oRoot[orderKey].ToString();
1317
0
    if (osOrder == "C")
1318
0
    {
1319
        // ok
1320
0
    }
1321
0
    else if (osOrder == "F")
1322
0
    {
1323
0
        bFortranOrder = true;
1324
0
    }
1325
0
    else
1326
0
    {
1327
0
        CPLError(CE_Failure, CPLE_NotSupported, "Invalid value for %s",
1328
0
                 orderKey);
1329
0
        return nullptr;
1330
0
    }
1331
1332
0
    const auto oShape = oRoot["shape"].ToArray();
1333
0
    if (!oShape.IsValid())
1334
0
    {
1335
0
        CPLError(CE_Failure, CPLE_AppDefined, "shape missing or not an array");
1336
0
        return nullptr;
1337
0
    }
1338
1339
0
    const char *chunksKey = "chunks";
1340
0
    const auto oChunks = oRoot[chunksKey].ToArray();
1341
0
    if (!oChunks.IsValid())
1342
0
    {
1343
0
        CPLError(CE_Failure, CPLE_AppDefined, "%s missing or not an array",
1344
0
                 chunksKey);
1345
0
        return nullptr;
1346
0
    }
1347
1348
0
    if (oShape.Size() != oChunks.Size())
1349
0
    {
1350
0
        CPLError(CE_Failure, CPLE_AppDefined,
1351
0
                 "shape and chunks arrays are of different size");
1352
0
        return nullptr;
1353
0
    }
1354
1355
0
    CPLJSONObject oAttributes(oAttributesIn);
1356
0
    if (!bLoadedFromZMetadata)
1357
0
    {
1358
0
        CPLJSONDocument oDoc;
1359
0
        const std::string osZattrsFilename(CPLFormFilenameSafe(
1360
0
            CPLGetDirnameSafe(osZarrayFilename.c_str()).c_str(), ".zattrs",
1361
0
            nullptr));
1362
0
        CPLErrorStateBackuper oErrorStateBackuper(CPLQuietErrorHandler);
1363
0
        if (oDoc.Load(osZattrsFilename))
1364
0
        {
1365
0
            oAttributes = oDoc.GetRoot();
1366
0
        }
1367
0
    }
1368
1369
    // Deep-clone of oAttributes
1370
0
    {
1371
0
        CPLJSONDocument oTmpDoc;
1372
0
        oTmpDoc.SetRoot(oAttributes);
1373
0
        CPL_IGNORE_RET_VAL(oTmpDoc.LoadMemory(oTmpDoc.SaveAsString()));
1374
0
        oAttributes = oTmpDoc.GetRoot();
1375
0
    }
1376
1377
0
    std::vector<std::shared_ptr<GDALDimension>> aoDims;
1378
0
    for (int i = 0; i < oShape.Size(); ++i)
1379
0
    {
1380
0
        const auto nSize = static_cast<GUInt64>(oShape[i].ToLong());
1381
0
        if (nSize == 0)
1382
0
        {
1383
0
            CPLError(CE_Failure, CPLE_AppDefined, "Invalid content for shape");
1384
0
            return nullptr;
1385
0
        }
1386
0
        aoDims.emplace_back(std::make_shared<ZarrDimension>(
1387
0
            m_poSharedResource,
1388
0
            std::dynamic_pointer_cast<ZarrGroupBase>(m_pSelf.lock()),
1389
0
            std::string(), CPLSPrintf("dim%d", i), std::string(), std::string(),
1390
0
            nSize));
1391
0
    }
1392
1393
    // XArray extension
1394
0
    const auto arrayDimensionsObj = oAttributes["_ARRAY_DIMENSIONS"];
1395
1396
0
    const auto FindDimension =
1397
0
        [this, &aoDims, bLoadedFromZMetadata, &osArrayName,
1398
0
         &oAttributes](const std::string &osDimName,
1399
0
                       std::shared_ptr<GDALDimension> &poDim, int i)
1400
0
    {
1401
0
        auto oIter = m_oMapDimensions.find(osDimName);
1402
0
        if (oIter != m_oMapDimensions.end())
1403
0
        {
1404
0
            if (m_bDimSizeInUpdate ||
1405
0
                oIter->second->GetSize() == poDim->GetSize())
1406
0
            {
1407
0
                poDim = oIter->second;
1408
0
                return true;
1409
0
            }
1410
0
            else
1411
0
            {
1412
0
                CPLError(CE_Warning, CPLE_AppDefined,
1413
0
                         "Size of _ARRAY_DIMENSIONS[%d] different "
1414
0
                         "from the one of shape",
1415
0
                         i);
1416
0
                return false;
1417
0
            }
1418
0
        }
1419
1420
        // Try to load the indexing variable.
1421
1422
        // If loading from zmetadata, we should have normally
1423
        // already loaded the dimension variables, unless they
1424
        // are in a upper level.
1425
0
        if (bLoadedFromZMetadata && osArrayName != osDimName &&
1426
0
            m_oMapMDArrays.find(osDimName) == m_oMapMDArrays.end())
1427
0
        {
1428
0
            auto poParent = m_poParent.lock();
1429
0
            while (poParent != nullptr)
1430
0
            {
1431
0
                oIter = poParent->m_oMapDimensions.find(osDimName);
1432
0
                if (oIter != poParent->m_oMapDimensions.end() &&
1433
0
                    oIter->second->GetSize() == poDim->GetSize())
1434
0
                {
1435
0
                    poDim = oIter->second;
1436
0
                    return true;
1437
0
                }
1438
0
                poParent = poParent->m_poParent.lock();
1439
0
            }
1440
0
        }
1441
1442
        // Not loading from zmetadata, and not in m_oMapMDArrays,
1443
        // then stat() the indexing variable.
1444
0
        else if (!bLoadedFromZMetadata && osArrayName != osDimName &&
1445
0
                 m_oMapMDArrays.find(osDimName) == m_oMapMDArrays.end())
1446
0
        {
1447
0
            std::string osDirName = m_osDirectoryName;
1448
0
            while (true)
1449
0
            {
1450
0
                if (CPLHasPathTraversal(osDimName.c_str()))
1451
0
                {
1452
0
                    CPLError(CE_Failure, CPLE_AppDefined,
1453
0
                             "Path traversal detected in %s",
1454
0
                             osDimName.c_str());
1455
0
                    return false;
1456
0
                }
1457
0
                const std::string osArrayFilenameDim = CPLFormFilenameSafe(
1458
0
                    CPLFormFilenameSafe(osDirName.c_str(), osDimName.c_str(),
1459
0
                                        nullptr)
1460
0
                        .c_str(),
1461
0
                    ".zarray", nullptr);
1462
0
                VSIStatBufL sStat;
1463
0
                if (VSIStatL(osArrayFilenameDim.c_str(), &sStat) == 0)
1464
0
                {
1465
0
                    CPLJSONDocument oDoc;
1466
0
                    if (oDoc.Load(osArrayFilenameDim))
1467
0
                    {
1468
0
                        LoadArray(osDimName, osArrayFilenameDim, oDoc.GetRoot(),
1469
0
                                  false, CPLJSONObject());
1470
0
                    }
1471
0
                }
1472
0
                else
1473
0
                {
1474
0
                    if ((cpl::starts_with(osDirName, JSON_REF_FS_PREFIX) ||
1475
0
                         cpl::starts_with(osDirName, PARQUET_REF_FS_PREFIX)) &&
1476
0
                        osDirName.back() == '}')
1477
0
                    {
1478
0
                        break;
1479
0
                    }
1480
1481
                    // Recurse to upper level for datasets such as
1482
                    // /vsis3/hrrrzarr/sfc/20210809/20210809_00z_anl.zarr/0.1_sigma_level/HAIL_max_fcst/0.1_sigma_level/HAIL_max_fcst
1483
0
                    std::string osDirNameNew =
1484
0
                        CPLGetPathSafe(osDirName.c_str());
1485
0
                    if (!osDirNameNew.empty() && osDirNameNew != osDirName)
1486
0
                    {
1487
0
                        osDirName = std::move(osDirNameNew);
1488
0
                        continue;
1489
0
                    }
1490
0
                }
1491
0
                break;
1492
0
            }
1493
0
        }
1494
1495
0
        oIter = m_oMapDimensions.find(osDimName);
1496
0
        if (oIter != m_oMapDimensions.end() &&
1497
0
            oIter->second->GetSize() == poDim->GetSize())
1498
0
        {
1499
0
            poDim = oIter->second;
1500
0
            return true;
1501
0
        }
1502
1503
0
        std::string osType;
1504
0
        std::string osDirection;
1505
0
        if (aoDims.size() == 1 && osArrayName == osDimName)
1506
0
        {
1507
0
            ZarrArray::GetDimensionTypeDirection(oAttributes, osType,
1508
0
                                                 osDirection);
1509
0
        }
1510
1511
0
        auto poDimLocal = std::make_shared<ZarrDimension>(
1512
0
            m_poSharedResource,
1513
0
            std::dynamic_pointer_cast<ZarrGroupBase>(m_pSelf.lock()),
1514
0
            GetFullName(), osDimName, osType, osDirection, poDim->GetSize());
1515
0
        poDimLocal->SetXArrayDimension();
1516
0
        m_oMapDimensions[osDimName] = poDimLocal;
1517
0
        poDim = poDimLocal;
1518
0
        return true;
1519
0
    };
1520
1521
0
    if (arrayDimensionsObj.GetType() == CPLJSONObject::Type::Array)
1522
0
    {
1523
0
        const auto arrayDims = arrayDimensionsObj.ToArray();
1524
0
        if (arrayDims.Size() == oShape.Size())
1525
0
        {
1526
0
            bool ok = true;
1527
0
            for (int i = 0; i < oShape.Size(); ++i)
1528
0
            {
1529
0
                if (arrayDims[i].GetType() == CPLJSONObject::Type::String)
1530
0
                {
1531
0
                    const auto osDimName = arrayDims[i].ToString();
1532
0
                    ok &= FindDimension(osDimName, aoDims[i], i);
1533
0
                }
1534
0
            }
1535
0
            if (ok)
1536
0
            {
1537
0
                oAttributes.Delete("_ARRAY_DIMENSIONS");
1538
0
            }
1539
0
        }
1540
0
        else
1541
0
        {
1542
0
            CPLError(
1543
0
                CE_Warning, CPLE_AppDefined,
1544
0
                "Size of _ARRAY_DIMENSIONS different from the one of shape");
1545
0
        }
1546
0
    }
1547
1548
    // _NCZARR_ARRAY extension
1549
0
    const auto nczarrArrayDimrefs = oRoot["_NCZARR_ARRAY"]["dimrefs"].ToArray();
1550
0
    if (nczarrArrayDimrefs.IsValid())
1551
0
    {
1552
0
        const auto arrayDims = nczarrArrayDimrefs.ToArray();
1553
0
        if (arrayDims.Size() == oShape.Size())
1554
0
        {
1555
0
            auto poRG =
1556
0
                std::dynamic_pointer_cast<ZarrGroupBase>(m_pSelf.lock());
1557
0
            CPLAssert(poRG != nullptr);
1558
0
            while (true)
1559
0
            {
1560
0
                auto poNewRG = poRG->m_poParent.lock();
1561
0
                if (poNewRG == nullptr)
1562
0
                    break;
1563
0
                poRG = std::move(poNewRG);
1564
0
            }
1565
1566
0
            for (int i = 0; i < oShape.Size(); ++i)
1567
0
            {
1568
0
                if (arrayDims[i].GetType() == CPLJSONObject::Type::String)
1569
0
                {
1570
0
                    const auto osDimFullpath = arrayDims[i].ToString();
1571
0
                    const std::string osArrayFullname =
1572
0
                        (GetFullName() != "/" ? GetFullName() : std::string()) +
1573
0
                        '/' + osArrayName;
1574
0
                    if (aoDims.size() == 1 &&
1575
0
                        (osDimFullpath == osArrayFullname ||
1576
0
                         osDimFullpath == "/" + osArrayFullname))
1577
0
                    {
1578
                        // If this is an indexing variable, then fetch the
1579
                        // dimension type and direction, and patch the dimension
1580
0
                        std::string osType;
1581
0
                        std::string osDirection;
1582
0
                        ZarrArray::GetDimensionTypeDirection(
1583
0
                            oAttributes, osType, osDirection);
1584
1585
0
                        auto poDimLocal = std::make_shared<ZarrDimension>(
1586
0
                            m_poSharedResource,
1587
0
                            std::dynamic_pointer_cast<ZarrGroupBase>(
1588
0
                                m_pSelf.lock()),
1589
0
                            GetFullName(), osArrayName, osType, osDirection,
1590
0
                            aoDims[i]->GetSize());
1591
0
                        aoDims[i] = poDimLocal;
1592
1593
0
                        m_oMapDimensions[osArrayName] = std::move(poDimLocal);
1594
0
                    }
1595
0
                    else if (auto poDim =
1596
0
                                 poRG->OpenDimensionFromFullname(osDimFullpath))
1597
0
                    {
1598
0
                        if (poDim->GetSize() != aoDims[i]->GetSize())
1599
0
                        {
1600
0
                            CPLError(CE_Failure, CPLE_AppDefined,
1601
0
                                     "Inconsistency in size between NCZarr "
1602
0
                                     "dimension %s and regular dimension",
1603
0
                                     osDimFullpath.c_str());
1604
0
                        }
1605
0
                        else
1606
0
                        {
1607
0
                            aoDims[i] = std::move(poDim);
1608
0
                        }
1609
0
                    }
1610
0
                    else
1611
0
                    {
1612
0
                        CPLError(CE_Failure, CPLE_AppDefined,
1613
0
                                 "Cannot find NCZarr dimension %s",
1614
0
                                 osDimFullpath.c_str());
1615
0
                    }
1616
0
                }
1617
0
            }
1618
0
        }
1619
0
        else
1620
0
        {
1621
0
            CPLError(CE_Warning, CPLE_AppDefined,
1622
0
                     "Size of _NCZARR_ARRAY.dimrefs different from the one of "
1623
0
                     "shape");
1624
0
        }
1625
0
    }
1626
1627
0
    constexpr const char *dtypeKey = "dtype";
1628
0
    auto oDtype = oRoot[dtypeKey];
1629
0
    if (!oDtype.IsValid())
1630
0
    {
1631
0
        CPLError(CE_Failure, CPLE_NotSupported, "%s missing", dtypeKey);
1632
0
        return nullptr;
1633
0
    }
1634
0
    std::vector<DtypeElt> aoDtypeElts;
1635
0
    const auto oType = ParseDtype(oDtype, aoDtypeElts);
1636
0
    if (oType.GetClass() == GEDTC_NUMERIC &&
1637
0
        oType.GetNumericDataType() == GDT_Unknown)
1638
0
        return nullptr;
1639
0
    size_t iCurElt = 0;
1640
0
    SetGDALOffset(oType, 0, aoDtypeElts, iCurElt);
1641
1642
0
    std::vector<GUInt64> anBlockSize;
1643
0
    if (!ZarrArray::ParseChunkSize(oChunks, oType, anBlockSize))
1644
0
        return nullptr;
1645
1646
0
    std::string osDimSeparator = oRoot["dimension_separator"].ToString();
1647
0
    if (osDimSeparator.empty())
1648
0
        osDimSeparator = ".";
1649
1650
0
    std::vector<GByte> abyNoData;
1651
1652
0
    struct NoDataFreer
1653
0
    {
1654
0
        std::vector<GByte> &m_abyNodata;
1655
0
        const GDALExtendedDataType &m_oType;
1656
1657
0
        NoDataFreer(std::vector<GByte> &abyNoDataIn,
1658
0
                    const GDALExtendedDataType &oTypeIn)
1659
0
            : m_abyNodata(abyNoDataIn), m_oType(oTypeIn)
1660
0
        {
1661
0
        }
1662
1663
0
        ~NoDataFreer()
1664
0
        {
1665
0
            if (!m_abyNodata.empty())
1666
0
                m_oType.FreeDynamicMemory(&m_abyNodata[0]);
1667
0
        }
1668
0
    };
1669
1670
0
    NoDataFreer NoDataFreer(abyNoData, oType);
1671
1672
0
    auto oFillValue = oRoot["fill_value"];
1673
0
    auto eFillValueType = oFillValue.GetType();
1674
1675
    // Normally arrays are not supported, but that's what NCZarr 4.8.0 outputs
1676
0
    if (eFillValueType == CPLJSONObject::Type::Array &&
1677
0
        oFillValue.ToArray().Size() == 1)
1678
0
    {
1679
0
        oFillValue = oFillValue.ToArray()[0];
1680
0
        eFillValueType = oFillValue.GetType();
1681
0
    }
1682
1683
0
    if (!oFillValue.IsValid())
1684
0
    {
1685
        // fill_value is normally required but some implementations
1686
        // are lacking it: https://github.com/Unidata/netcdf-c/issues/2059
1687
0
        CPLError(CE_Warning, CPLE_AppDefined, "fill_value missing");
1688
0
    }
1689
0
    else if (eFillValueType == CPLJSONObject::Type::Null)
1690
0
    {
1691
        // Nothing to do
1692
0
    }
1693
0
    else if (eFillValueType == CPLJSONObject::Type::String)
1694
0
    {
1695
0
        const auto osFillValue = oFillValue.ToString();
1696
0
        if (oType.GetClass() == GEDTC_NUMERIC &&
1697
0
            CPLGetValueType(osFillValue.c_str()) != CPL_VALUE_STRING)
1698
0
        {
1699
0
            abyNoData.resize(oType.GetSize());
1700
            // Be tolerant with numeric values serialized as strings.
1701
0
            if (oType.GetNumericDataType() == GDT_Int64)
1702
0
            {
1703
0
                const int64_t nVal = static_cast<int64_t>(
1704
0
                    std::strtoll(osFillValue.c_str(), nullptr, 10));
1705
0
                GDALCopyWords(&nVal, GDT_Int64, 0, &abyNoData[0],
1706
0
                              oType.GetNumericDataType(), 0, 1);
1707
0
            }
1708
0
            else if (oType.GetNumericDataType() == GDT_UInt64)
1709
0
            {
1710
0
                const uint64_t nVal = static_cast<uint64_t>(
1711
0
                    std::strtoull(osFillValue.c_str(), nullptr, 10));
1712
0
                GDALCopyWords(&nVal, GDT_UInt64, 0, &abyNoData[0],
1713
0
                              oType.GetNumericDataType(), 0, 1);
1714
0
            }
1715
0
            else
1716
0
            {
1717
0
                const double dfNoDataValue = CPLAtof(osFillValue.c_str());
1718
0
                GDALCopyWords(&dfNoDataValue, GDT_Float64, 0, &abyNoData[0],
1719
0
                              oType.GetNumericDataType(), 0, 1);
1720
0
            }
1721
0
        }
1722
0
        else if (oType.GetClass() == GEDTC_NUMERIC)
1723
0
        {
1724
0
            double dfNoDataValue;
1725
0
            if (osFillValue == "NaN")
1726
0
            {
1727
0
                dfNoDataValue = std::numeric_limits<double>::quiet_NaN();
1728
0
            }
1729
0
            else if (osFillValue == "Infinity")
1730
0
            {
1731
0
                dfNoDataValue = std::numeric_limits<double>::infinity();
1732
0
            }
1733
0
            else if (osFillValue == "-Infinity")
1734
0
            {
1735
0
                dfNoDataValue = -std::numeric_limits<double>::infinity();
1736
0
            }
1737
0
            else
1738
0
            {
1739
0
                CPLError(CE_Failure, CPLE_AppDefined, "Invalid fill_value");
1740
0
                return nullptr;
1741
0
            }
1742
0
            if (oType.GetNumericDataType() == GDT_Float16)
1743
0
            {
1744
0
                const GFloat16 hfNoDataValue =
1745
0
                    static_cast<GFloat16>(dfNoDataValue);
1746
0
                abyNoData.resize(sizeof(hfNoDataValue));
1747
0
                memcpy(&abyNoData[0], &hfNoDataValue, sizeof(hfNoDataValue));
1748
0
            }
1749
0
            if (oType.GetNumericDataType() == GDT_Float32)
1750
0
            {
1751
0
                const float fNoDataValue = static_cast<float>(dfNoDataValue);
1752
0
                abyNoData.resize(sizeof(fNoDataValue));
1753
0
                memcpy(&abyNoData[0], &fNoDataValue, sizeof(fNoDataValue));
1754
0
            }
1755
0
            else if (oType.GetNumericDataType() == GDT_Float64)
1756
0
            {
1757
0
                abyNoData.resize(sizeof(dfNoDataValue));
1758
0
                memcpy(&abyNoData[0], &dfNoDataValue, sizeof(dfNoDataValue));
1759
0
            }
1760
0
            else
1761
0
            {
1762
0
                CPLError(CE_Failure, CPLE_AppDefined, "Invalid fill_value");
1763
0
                return nullptr;
1764
0
            }
1765
0
        }
1766
0
        else if (oType.GetClass() == GEDTC_STRING)
1767
0
        {
1768
            // zarr.open('unicode_be.zarr', mode = 'w', shape=(1,), dtype =
1769
            // '>U1', compressor = None) oddly generates "fill_value": "0"
1770
0
            if (osFillValue != "0")
1771
0
            {
1772
0
                std::vector<GByte> abyNativeFillValue(osFillValue.size() + 1);
1773
0
                memcpy(&abyNativeFillValue[0], osFillValue.data(),
1774
0
                       osFillValue.size());
1775
0
                int nBytes = CPLBase64DecodeInPlace(&abyNativeFillValue[0]);
1776
0
                abyNativeFillValue.resize(nBytes + 1);
1777
0
                abyNativeFillValue[nBytes] = 0;
1778
0
                abyNoData.resize(oType.GetSize());
1779
0
                char *pDstStr = CPLStrdup(
1780
0
                    reinterpret_cast<const char *>(&abyNativeFillValue[0]));
1781
0
                char **pDstPtr = reinterpret_cast<char **>(&abyNoData[0]);
1782
0
                memcpy(pDstPtr, &pDstStr, sizeof(pDstStr));
1783
0
            }
1784
0
        }
1785
0
        else
1786
0
        {
1787
0
            std::vector<GByte> abyNativeFillValue(osFillValue.size() + 1);
1788
0
            memcpy(&abyNativeFillValue[0], osFillValue.data(),
1789
0
                   osFillValue.size());
1790
0
            int nBytes = CPLBase64DecodeInPlace(&abyNativeFillValue[0]);
1791
0
            abyNativeFillValue.resize(nBytes);
1792
0
            if (abyNativeFillValue.size() !=
1793
0
                aoDtypeElts.back().nativeOffset + aoDtypeElts.back().nativeSize)
1794
0
            {
1795
0
                CPLError(CE_Failure, CPLE_AppDefined, "Invalid fill_value");
1796
0
                return nullptr;
1797
0
            }
1798
0
            abyNoData.resize(oType.GetSize());
1799
0
            ZarrArray::DecodeSourceElt(aoDtypeElts, abyNativeFillValue.data(),
1800
0
                                       &abyNoData[0]);
1801
0
        }
1802
0
    }
1803
0
    else if (eFillValueType == CPLJSONObject::Type::Boolean ||
1804
0
             eFillValueType == CPLJSONObject::Type::Integer ||
1805
0
             eFillValueType == CPLJSONObject::Type::Long ||
1806
0
             eFillValueType == CPLJSONObject::Type::Double)
1807
0
    {
1808
0
        if (oType.GetClass() == GEDTC_NUMERIC)
1809
0
        {
1810
0
            const double dfNoDataValue = oFillValue.ToDouble();
1811
0
            if (oType.GetNumericDataType() == GDT_Int64)
1812
0
            {
1813
0
                const int64_t nNoDataValue =
1814
0
                    static_cast<int64_t>(oFillValue.ToLong());
1815
0
                abyNoData.resize(oType.GetSize());
1816
0
                GDALCopyWords(&nNoDataValue, GDT_Int64, 0, &abyNoData[0],
1817
0
                              oType.GetNumericDataType(), 0, 1);
1818
0
            }
1819
0
            else if (oType.GetNumericDataType() == GDT_UInt64 &&
1820
                     /* we can't really deal with nodata value between */
1821
                     /* int64::max and uint64::max due to json-c limitations */
1822
0
                     dfNoDataValue >= 0)
1823
0
            {
1824
0
                const int64_t nNoDataValue =
1825
0
                    static_cast<int64_t>(oFillValue.ToLong());
1826
0
                abyNoData.resize(oType.GetSize());
1827
0
                GDALCopyWords(&nNoDataValue, GDT_Int64, 0, &abyNoData[0],
1828
0
                              oType.GetNumericDataType(), 0, 1);
1829
0
            }
1830
0
            else
1831
0
            {
1832
0
                abyNoData.resize(oType.GetSize());
1833
0
                GDALCopyWords(&dfNoDataValue, GDT_Float64, 0, &abyNoData[0],
1834
0
                              oType.GetNumericDataType(), 0, 1);
1835
0
            }
1836
0
        }
1837
0
        else
1838
0
        {
1839
0
            CPLError(CE_Failure, CPLE_AppDefined, "Invalid fill_value");
1840
0
            return nullptr;
1841
0
        }
1842
0
    }
1843
0
    else
1844
0
    {
1845
0
        CPLError(CE_Failure, CPLE_AppDefined, "Invalid fill_value");
1846
0
        return nullptr;
1847
0
    }
1848
1849
0
    const CPLCompressor *psCompressor = nullptr;
1850
0
    const CPLCompressor *psDecompressor = nullptr;
1851
0
    const auto oCompressor = oRoot["compressor"];
1852
0
    std::string osDecompressorId("NONE");
1853
1854
0
    if (!oCompressor.IsValid())
1855
0
    {
1856
0
        CPLError(CE_Failure, CPLE_AppDefined, "compressor missing");
1857
0
        return nullptr;
1858
0
    }
1859
0
    if (oCompressor.GetType() == CPLJSONObject::Type::Null)
1860
0
    {
1861
        // nothing to do
1862
0
    }
1863
0
    else if (oCompressor.GetType() == CPLJSONObject::Type::Object)
1864
0
    {
1865
0
        osDecompressorId = oCompressor["id"].ToString();
1866
0
        if (osDecompressorId.empty())
1867
0
        {
1868
0
            CPLError(CE_Failure, CPLE_AppDefined, "Missing compressor id");
1869
0
            return nullptr;
1870
0
        }
1871
0
        if (osDecompressorId == "imagecodecs_tiff")
1872
0
        {
1873
0
            psDecompressor = ZarrGetTIFFDecompressor();
1874
0
        }
1875
0
        else
1876
0
        {
1877
0
            psCompressor = CPLGetCompressor(osDecompressorId.c_str());
1878
0
            psDecompressor = CPLGetDecompressor(osDecompressorId.c_str());
1879
0
            if (psCompressor == nullptr || psDecompressor == nullptr)
1880
0
            {
1881
0
                CPLError(CE_Failure, CPLE_AppDefined,
1882
0
                         "Decompressor %s not handled",
1883
0
                         osDecompressorId.c_str());
1884
0
                return nullptr;
1885
0
            }
1886
0
        }
1887
0
    }
1888
0
    else
1889
0
    {
1890
0
        CPLError(CE_Failure, CPLE_AppDefined, "Invalid compressor");
1891
0
        return nullptr;
1892
0
    }
1893
1894
0
    CPLJSONArray oFiltersArray;
1895
0
    const auto oFilters = oRoot["filters"];
1896
0
    if (!oFilters.IsValid())
1897
0
    {
1898
0
        CPLError(CE_Failure, CPLE_AppDefined, "filters missing");
1899
0
        return nullptr;
1900
0
    }
1901
0
    if (oFilters.GetType() == CPLJSONObject::Type::Null)
1902
0
    {
1903
0
    }
1904
0
    else if (oFilters.GetType() == CPLJSONObject::Type::Array)
1905
0
    {
1906
0
        oFiltersArray = oFilters.ToArray();
1907
0
        for (const auto &oFilter : oFiltersArray)
1908
0
        {
1909
0
            const auto osFilterId = oFilter["id"].ToString();
1910
0
            if (osFilterId.empty())
1911
0
            {
1912
0
                CPLError(CE_Failure, CPLE_AppDefined, "Missing filter id");
1913
0
                return nullptr;
1914
0
            }
1915
0
            if (!EQUAL(osFilterId.c_str(), "shuffle") &&
1916
0
                !EQUAL(osFilterId.c_str(), "quantize") &&
1917
0
                !EQUAL(osFilterId.c_str(), "fixedscaleoffset"))
1918
0
            {
1919
0
                const auto psFilterCompressor =
1920
0
                    CPLGetCompressor(osFilterId.c_str());
1921
0
                const auto psFilterDecompressor =
1922
0
                    CPLGetDecompressor(osFilterId.c_str());
1923
0
                if (psFilterCompressor == nullptr ||
1924
0
                    psFilterDecompressor == nullptr)
1925
0
                {
1926
0
                    CPLError(CE_Failure, CPLE_AppDefined,
1927
0
                             "Filter %s not handled", osFilterId.c_str());
1928
0
                    return nullptr;
1929
0
                }
1930
0
            }
1931
0
        }
1932
0
    }
1933
0
    else
1934
0
    {
1935
0
        CPLError(CE_Failure, CPLE_AppDefined, "Invalid filters");
1936
0
        return nullptr;
1937
0
    }
1938
1939
0
    auto poArray = ZarrV2Array::Create(m_poSharedResource, GetFullName(),
1940
0
                                       osArrayName, aoDims, oType, aoDtypeElts,
1941
0
                                       anBlockSize, bFortranOrder);
1942
0
    if (!poArray)
1943
0
        return nullptr;
1944
0
    poArray->SetCompressorJson(oCompressor);
1945
0
    poArray->SetUpdatable(m_bUpdatable);  // must be set before SetAttributes()
1946
0
    poArray->SetFilename(osZarrayFilename);
1947
0
    poArray->SetDimSeparator(osDimSeparator);
1948
0
    poArray->SetCompressorDecompressor(osDecompressorId, psCompressor,
1949
0
                                       psDecompressor);
1950
0
    poArray->SetFilters(oFiltersArray);
1951
0
    if (!abyNoData.empty())
1952
0
    {
1953
0
        poArray->RegisterNoDataValue(abyNoData.data());
1954
0
    }
1955
1956
0
    const auto gridMapping = oAttributes["grid_mapping"];
1957
0
    if (gridMapping.GetType() == CPLJSONObject::Type::String)
1958
0
    {
1959
0
        const std::string gridMappingName = gridMapping.ToString();
1960
0
        if (m_oMapMDArrays.find(gridMappingName) == m_oMapMDArrays.end())
1961
0
        {
1962
0
            if (CPLHasPathTraversal(gridMappingName.c_str()))
1963
0
            {
1964
0
                CPLError(CE_Failure, CPLE_AppDefined,
1965
0
                         "Path traversal detected in %s",
1966
0
                         gridMappingName.c_str());
1967
0
                return nullptr;
1968
0
            }
1969
0
            const std::string osArrayFilenameDim = CPLFormFilenameSafe(
1970
0
                CPLFormFilenameSafe(m_osDirectoryName.c_str(),
1971
0
                                    gridMappingName.c_str(), nullptr)
1972
0
                    .c_str(),
1973
0
                ".zarray", nullptr);
1974
0
            VSIStatBufL sStat;
1975
0
            if (VSIStatL(osArrayFilenameDim.c_str(), &sStat) == 0)
1976
0
            {
1977
0
                CPLJSONDocument oDoc;
1978
0
                if (oDoc.Load(osArrayFilenameDim))
1979
0
                {
1980
0
                    LoadArray(gridMappingName, osArrayFilenameDim,
1981
0
                              oDoc.GetRoot(), false, CPLJSONObject());
1982
0
                }
1983
0
            }
1984
0
        }
1985
0
    }
1986
1987
0
    poArray->ParseSpecialAttributes(m_pSelf.lock(), oAttributes);
1988
0
    poArray->SetAttributes(oAttributes);
1989
0
    poArray->SetDtype(oDtype);
1990
0
    RegisterArray(poArray);
1991
1992
    // If this is an indexing variable, attach it to the dimension.
1993
0
    if (aoDims.size() == 1 && aoDims[0]->GetName() == poArray->GetName())
1994
0
    {
1995
0
        auto oIter = m_oMapDimensions.find(poArray->GetName());
1996
0
        if (oIter != m_oMapDimensions.end())
1997
0
        {
1998
0
            oIter->second->SetIndexingVariable(poArray);
1999
0
        }
2000
0
    }
2001
2002
0
    if (CPLTestBool(m_poSharedResource->GetOpenOptions().FetchNameValueDef(
2003
0
            "CACHE_TILE_PRESENCE", "NO")))
2004
0
    {
2005
0
        poArray->CacheTilePresence();
2006
0
    }
2007
2008
0
    return poArray;
2009
0
}
2010
2011
/************************************************************************/
2012
/*                    ZarrV2Array::SetCompressorJson()                  */
2013
/************************************************************************/
2014
2015
void ZarrV2Array::SetCompressorJson(const CPLJSONObject &oCompressor)
2016
0
{
2017
0
    m_oCompressorJSon = oCompressor;
2018
0
    if (oCompressor.GetType() != CPLJSONObject::Type::Null)
2019
0
        m_aosStructuralInfo.SetNameValue("COMPRESSOR",
2020
0
                                         oCompressor.ToString().c_str());
2021
0
}
2022
2023
/************************************************************************/
2024
/*                     ZarrV2Array::SetFilters()                        */
2025
/************************************************************************/
2026
2027
void ZarrV2Array::SetFilters(const CPLJSONArray &oFiltersArray)
2028
246
{
2029
246
    m_oFiltersArray = oFiltersArray;
2030
246
    if (oFiltersArray.Size() > 0)
2031
0
        m_aosStructuralInfo.SetNameValue("FILTERS",
2032
0
                                         oFiltersArray.ToString().c_str());
2033
246
}
2034
2035
/************************************************************************/
2036
/*                   ZarrV2Array::GetRawBlockInfoInfo()                 */
2037
/************************************************************************/
2038
2039
CPLStringList ZarrV2Array::GetRawBlockInfoInfo() const
2040
0
{
2041
0
    CPLStringList aosInfo(m_aosStructuralInfo);
2042
0
    if (!m_aoDtypeElts.empty() && m_aoDtypeElts[0].nativeSize > 1 &&
2043
0
        m_aoDtypeElts[0].nativeType != DtypeElt::NativeType::STRING_ASCII &&
2044
0
        m_aoDtypeElts[0].nativeType != DtypeElt::NativeType::STRING_UNICODE)
2045
0
    {
2046
0
        if (m_aoDtypeElts[0].needByteSwapping ^ CPL_IS_LSB)
2047
0
            aosInfo.SetNameValue("ENDIANNESS", "LITTLE");
2048
0
        else
2049
0
            aosInfo.SetNameValue("ENDIANNESS", "BIG");
2050
0
    }
2051
0
    if (m_bFortranOrder)
2052
0
    {
2053
0
        const int nDims = static_cast<int>(m_aoDims.size());
2054
0
        if (nDims > 1)
2055
0
        {
2056
0
            std::string osOrder("[");
2057
0
            for (int i = 0; i < nDims; ++i)
2058
0
            {
2059
0
                if (i > 0)
2060
0
                    osOrder += ',';
2061
0
                osOrder += std::to_string(nDims - 1 - i);
2062
0
            }
2063
0
            osOrder += ']';
2064
0
            aosInfo.SetNameValue("TRANSPOSE_ORDER", osOrder.c_str());
2065
0
        }
2066
0
    }
2067
0
    return aosInfo;
2068
0
}