Coverage Report

Created: 2026-02-14 09:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/gdal/frmts/zarr/zarr_v3_codec_sequence.cpp
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Project:  GDAL
4
 * Purpose:  Zarr driver, ZarrV3CodecSequence class
5
 * Author:   Even Rouault <even dot rouault at spatialys.com>
6
 *
7
 ******************************************************************************
8
 * Copyright (c) 2023, Even Rouault <even dot rouault at spatialys.com>
9
 *
10
 * SPDX-License-Identifier: MIT
11
 ****************************************************************************/
12
13
#include "zarr_v3_codec.h"
14
15
/************************************************************************/
16
/*                     ZarrV3CodecSequence::Clone()                     */
17
/************************************************************************/
18
19
std::unique_ptr<ZarrV3CodecSequence> ZarrV3CodecSequence::Clone() const
20
0
{
21
0
    auto poClone = std::make_unique<ZarrV3CodecSequence>(m_oInputArrayMetadata);
22
0
    for (const auto &poCodec : m_apoCodecs)
23
0
        poClone->m_apoCodecs.emplace_back(poCodec->Clone());
24
0
    poClone->m_oCodecArray = m_oCodecArray.Clone();
25
0
    poClone->m_bPartialDecodingPossible = m_bPartialDecodingPossible;
26
0
    return poClone;
27
0
}
28
29
/************************************************************************/
30
/*                 ZarrV3CodecSequence::InitFromJson()                  */
31
/************************************************************************/
32
33
bool ZarrV3CodecSequence::InitFromJson(const CPLJSONObject &oCodecs,
34
                                       ZarrArrayMetadata &oOutputArrayMetadata)
35
0
{
36
0
    if (oCodecs.GetType() != CPLJSONObject::Type::Array)
37
0
    {
38
0
        CPLError(CE_Failure, CPLE_AppDefined, "codecs is not an array");
39
0
        return false;
40
0
    }
41
0
    auto oCodecsArray = oCodecs.ToArray();
42
43
0
    ZarrArrayMetadata oInputArrayMetadata = m_oInputArrayMetadata;
44
0
    ZarrV3Codec::IOType eLastType = ZarrV3Codec::IOType::ARRAY;
45
0
    std::string osLastCodec;
46
47
0
    const auto InsertImplicitEndianCodecIfNeeded =
48
0
        [this, &oInputArrayMetadata, &eLastType, &osLastCodec]()
49
0
    {
50
0
        CPL_IGNORE_RET_VAL(this);
51
0
        if (eLastType == ZarrV3Codec::IOType::ARRAY &&
52
0
            oInputArrayMetadata.oElt.nativeSize > 1)
53
0
        {
54
0
            CPLError(CE_Warning, CPLE_AppDefined,
55
0
                     "'bytes' codec missing. Assuming little-endian storage, "
56
0
                     "but such tolerance may be removed in future versions");
57
0
            auto poEndianCodec = std::make_unique<ZarrV3CodecBytes>();
58
0
            ZarrArrayMetadata oTmpOutputArrayMetadata;
59
0
            poEndianCodec->InitFromConfiguration(
60
0
                ZarrV3CodecBytes::GetConfiguration(true), oInputArrayMetadata,
61
0
                oTmpOutputArrayMetadata, /* bEmitWarnings = */ true);
62
0
            oInputArrayMetadata = std::move(oTmpOutputArrayMetadata);
63
0
            eLastType = poEndianCodec->GetOutputType();
64
0
            osLastCodec = poEndianCodec->GetName();
65
            if constexpr (!CPL_IS_LSB)
66
            {
67
                // Insert a little endian codec if we are on a big endian target
68
                m_apoCodecs.emplace_back(std::move(poEndianCodec));
69
            }
70
0
        }
71
0
    };
72
73
0
    bool bShardingFound = false;
74
0
    std::vector<size_t> anBlockSizesBeforeSharding;
75
0
    for (const auto &oCodec : oCodecsArray)
76
0
    {
77
0
        if (oCodec.GetType() != CPLJSONObject::Type::Object)
78
0
        {
79
0
            CPLError(CE_Failure, CPLE_AppDefined, "codecs[] is not an object");
80
0
            return false;
81
0
        }
82
0
        const auto osName = oCodec["name"].ToString();
83
0
        std::unique_ptr<ZarrV3Codec> poCodec;
84
0
        if (osName == ZarrV3CodecGZip::NAME)
85
0
            poCodec = std::make_unique<ZarrV3CodecGZip>();
86
0
        else if (osName == ZarrV3CodecBlosc::NAME)
87
0
            poCodec = std::make_unique<ZarrV3CodecBlosc>();
88
0
        else if (osName == ZarrV3CodecZstd::NAME)
89
0
            poCodec = std::make_unique<ZarrV3CodecZstd>();
90
0
        else if (osName == ZarrV3CodecBytes::NAME ||
91
0
                 osName == "endian" /* endian is the old name */)
92
0
            poCodec = std::make_unique<ZarrV3CodecBytes>();
93
0
        else if (osName == ZarrV3CodecTranspose::NAME)
94
0
            poCodec = std::make_unique<ZarrV3CodecTranspose>();
95
0
        else if (osName == ZarrV3CodecCRC32C::NAME)
96
0
            poCodec = std::make_unique<ZarrV3CodecCRC32C>();
97
0
        else if (osName == ZarrV3CodecShardingIndexed::NAME)
98
0
        {
99
0
            bShardingFound = true;
100
0
            poCodec = std::make_unique<ZarrV3CodecShardingIndexed>();
101
0
        }
102
0
        else
103
0
        {
104
0
            CPLError(CE_Failure, CPLE_NotSupported, "Unsupported codec: %s",
105
0
                     osName.c_str());
106
0
            return false;
107
0
        }
108
109
0
        if (poCodec->GetInputType() == ZarrV3Codec::IOType::ARRAY)
110
0
        {
111
0
            if (eLastType == ZarrV3Codec::IOType::BYTES)
112
0
            {
113
0
                CPLError(CE_Failure, CPLE_AppDefined,
114
0
                         "Cannot chain codec %s with %s",
115
0
                         poCodec->GetName().c_str(), osLastCodec.c_str());
116
0
                return false;
117
0
            }
118
0
        }
119
0
        else
120
0
        {
121
0
            InsertImplicitEndianCodecIfNeeded();
122
0
        }
123
124
0
        ZarrArrayMetadata oStepOutputArrayMetadata;
125
0
        if (osName == ZarrV3CodecShardingIndexed::NAME)
126
0
        {
127
0
            anBlockSizesBeforeSharding = oInputArrayMetadata.anBlockSizes;
128
0
        }
129
0
        if (!poCodec->InitFromConfiguration(oCodec["configuration"],
130
0
                                            oInputArrayMetadata,
131
0
                                            oStepOutputArrayMetadata,
132
0
                                            /* bEmitWarnings = */ true))
133
0
        {
134
0
            return false;
135
0
        }
136
0
        oInputArrayMetadata = std::move(oStepOutputArrayMetadata);
137
0
        eLastType = poCodec->GetOutputType();
138
0
        osLastCodec = poCodec->GetName();
139
140
0
        if (!poCodec->IsNoOp())
141
0
            m_apoCodecs.emplace_back(std::move(poCodec));
142
0
    }
143
144
0
    if (bShardingFound)
145
0
    {
146
0
        m_bPartialDecodingPossible =
147
0
            (m_apoCodecs.back()->GetName() == ZarrV3CodecShardingIndexed::NAME);
148
0
        if (!m_bPartialDecodingPossible)
149
0
        {
150
0
            m_bPartialDecodingPossible = false;
151
            // This is not an implementation limitation, but the result of a
152
            // badly thought dataset. Zarr-Python also emits a similar warning.
153
0
            CPLError(
154
0
                CE_Warning, CPLE_AppDefined,
155
0
                "Sharding codec found, but not in last position. Consequently "
156
0
                "partial shard decoding will not be possible");
157
0
            oInputArrayMetadata.anBlockSizes =
158
0
                std::move(anBlockSizesBeforeSharding);
159
0
        }
160
0
    }
161
162
0
    InsertImplicitEndianCodecIfNeeded();
163
164
0
    m_oCodecArray = oCodecs.Clone();
165
0
    oOutputArrayMetadata = std::move(oInputArrayMetadata);
166
0
    return true;
167
0
}
168
169
/************************************************************************/
170
/*                  ZarrV3CodecBytes::AllocateBuffer()                  */
171
/************************************************************************/
172
173
bool ZarrV3CodecSequence::AllocateBuffer(ZarrByteVectorQuickResize &abyBuffer,
174
                                         size_t nEltCount)
175
0
{
176
0
    if (!m_apoCodecs.empty())
177
0
    {
178
0
        const size_t nRawSize =
179
0
            nEltCount * m_oInputArrayMetadata.oElt.nativeSize;
180
        // Grow the temporary buffer a bit beyond the uncompressed size
181
0
        const size_t nMaxSize = nRawSize + nRawSize / 3 + 64;
182
0
        try
183
0
        {
184
0
            m_abyTmp.resize(nMaxSize);
185
0
        }
186
0
        catch (const std::exception &e)
187
0
        {
188
0
            CPLError(CE_Failure, CPLE_OutOfMemory, "%s", e.what());
189
0
            return false;
190
0
        }
191
0
        m_abyTmp.resize(nRawSize);
192
193
        // Grow the input/output buffer too if we have several steps
194
0
        if (m_apoCodecs.size() >= 2 && abyBuffer.capacity() < nMaxSize)
195
0
        {
196
0
            const size_t nSize = abyBuffer.size();
197
0
            try
198
0
            {
199
0
                abyBuffer.resize(nMaxSize);
200
0
            }
201
0
            catch (const std::exception &e)
202
0
            {
203
0
                CPLError(CE_Failure, CPLE_OutOfMemory, "%s", e.what());
204
0
                return false;
205
0
            }
206
0
            abyBuffer.resize(nSize);
207
0
        }
208
0
    }
209
0
    return true;
210
0
}
211
212
/************************************************************************/
213
/*                    ZarrV3CodecSequence::Encode()                     */
214
/************************************************************************/
215
216
bool ZarrV3CodecSequence::Encode(ZarrByteVectorQuickResize &abyBuffer)
217
0
{
218
0
    if (!AllocateBuffer(abyBuffer,
219
0
                        MultiplyElements(m_oInputArrayMetadata.anBlockSizes)))
220
0
        return false;
221
0
    for (const auto &poCodec : m_apoCodecs)
222
0
    {
223
0
        if (!poCodec->Encode(abyBuffer, m_abyTmp))
224
0
            return false;
225
0
        std::swap(abyBuffer, m_abyTmp);
226
0
    }
227
0
    return true;
228
0
}
229
230
/************************************************************************/
231
/*                    ZarrV3CodecSequence::Decode()                     */
232
/************************************************************************/
233
234
bool ZarrV3CodecSequence::Decode(ZarrByteVectorQuickResize &abyBuffer)
235
0
{
236
0
    if (!AllocateBuffer(abyBuffer,
237
0
                        MultiplyElements(m_oInputArrayMetadata.anBlockSizes)))
238
0
        return false;
239
0
    for (auto iter = m_apoCodecs.rbegin(); iter != m_apoCodecs.rend(); ++iter)
240
0
    {
241
0
        const auto &poCodec = *iter;
242
0
        if (!poCodec->Decode(abyBuffer, m_abyTmp))
243
0
            return false;
244
0
        std::swap(abyBuffer, m_abyTmp);
245
0
    }
246
0
    return true;
247
0
}
248
249
/************************************************************************/
250
/*                 ZarrV3CodecSequence::DecodePartial()                 */
251
/************************************************************************/
252
253
bool ZarrV3CodecSequence::DecodePartial(VSIVirtualHandle *poFile,
254
                                        ZarrByteVectorQuickResize &abyBuffer,
255
                                        const std::vector<size_t> &anStartIdxIn,
256
                                        const std::vector<size_t> &anCountIn)
257
0
{
258
0
    CPLAssert(anStartIdxIn.size() == m_oInputArrayMetadata.anBlockSizes.size());
259
0
    CPLAssert(anStartIdxIn.size() == anCountIn.size());
260
261
0
    if (!AllocateBuffer(abyBuffer, MultiplyElements(anCountIn)))
262
0
        return false;
263
264
    // anStartIdxIn and anCountIn are expressed in the shape *before* encoding
265
    // We need to apply the potential transpositions before submitting them
266
    // to the decoder of the Array->Bytes decoder
267
0
    std::vector<size_t> anStartIdx(anStartIdxIn);
268
0
    std::vector<size_t> anCount(anCountIn);
269
0
    for (auto &poCodec : m_apoCodecs)
270
0
    {
271
0
        poCodec->ChangeArrayShapeForward(anStartIdx, anCount);
272
0
    }
273
274
0
    for (auto iter = m_apoCodecs.rbegin(); iter != m_apoCodecs.rend(); ++iter)
275
0
    {
276
0
        const auto &poCodec = *iter;
277
278
0
        if (!poCodec->DecodePartial(poFile, abyBuffer, m_abyTmp, anStartIdx,
279
0
                                    anCount))
280
0
            return false;
281
0
        std::swap(abyBuffer, m_abyTmp);
282
0
    }
283
0
    return true;
284
0
}
285
286
/************************************************************************/
287
/*              ZarrV3CodecSequence::BatchDecodePartial()               */
288
/************************************************************************/
289
290
bool ZarrV3CodecSequence::BatchDecodePartial(
291
    VSIVirtualHandle *poFile,
292
    const std::vector<std::pair<std::vector<size_t>, std::vector<size_t>>>
293
        &anRequests,
294
    std::vector<ZarrByteVectorQuickResize> &aResults)
295
0
{
296
    // Only batch-decode when sharding is the sole codec. If other codecs
297
    // (e.g. transpose) precede it, indices and output need codec-specific
298
    // transformations that BatchDecodePartial does not handle.
299
0
    if (m_apoCodecs.size() == 1)
300
0
    {
301
0
        auto *poSharding = dynamic_cast<ZarrV3CodecShardingIndexed *>(
302
0
            m_apoCodecs.back().get());
303
0
        if (poSharding)
304
0
        {
305
0
            return poSharding->BatchDecodePartial(poFile, anRequests, aResults);
306
0
        }
307
0
    }
308
309
    // Fallback: sequential DecodePartial for non-sharding codec chains
310
0
    aResults.resize(anRequests.size());
311
0
    for (size_t i = 0; i < anRequests.size(); ++i)
312
0
    {
313
0
        if (!DecodePartial(poFile, aResults[i], anRequests[i].first,
314
0
                           anRequests[i].second))
315
0
            return false;
316
0
    }
317
0
    return true;
318
0
}
319
320
/************************************************************************/
321
/*             ZarrV3CodecSequence::GetInnerMostBlockSize()             */
322
/************************************************************************/
323
324
std::vector<size_t> ZarrV3CodecSequence::GetInnerMostBlockSize(
325
    const std::vector<size_t> &anOuterBlockSize) const
326
0
{
327
0
    auto chunkSize = anOuterBlockSize;
328
0
    for (auto iter = m_apoCodecs.rbegin(); iter != m_apoCodecs.rend(); ++iter)
329
0
    {
330
0
        const auto &poCodec = *iter;
331
0
        if (m_bPartialDecodingPossible ||
332
0
            poCodec->GetName() != ZarrV3CodecShardingIndexed::NAME)
333
0
        {
334
0
            chunkSize = poCodec->GetInnerMostBlockSize(chunkSize);
335
0
        }
336
0
    }
337
0
    return chunkSize;
338
0
}