Coverage Report

Created: 2025-12-31 06:48

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/gdal/ogr/ogrsf_frmts/generic/ograrrowarrayhelper.cpp
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Project:  OpenGIS Simple Features Reference Implementation
4
 * Purpose:  Helper to fill ArrowArray
5
 * Author:   Even Rouault <even dot rouault at spatialys.com>
6
 *
7
 ******************************************************************************
8
 * Copyright (c) 2022, Even Rouault <even dot rouault at spatialys.com>
9
 *
10
 * SPDX-License-Identifier: MIT
11
 ****************************************************************************/
12
13
#include "ograrrowarrayhelper.h"
14
#include "ogrlayerarrow.h"
15
#include "ogr_p.h"
16
17
#include <limits>
18
19
//! @cond Doxygen_Suppress
20
21
/************************************************************************/
22
/*                           GetMemLimit()                              */
23
/************************************************************************/
24
25
/*static*/ uint32_t OGRArrowArrayHelper::GetMemLimit()
26
0
{
27
0
    uint32_t nMemLimit =
28
0
        static_cast<uint32_t>(std::numeric_limits<int32_t>::max());
29
    // Just for tests
30
0
    const char *pszOGR_ARROW_MEM_LIMIT =
31
0
        CPLGetConfigOption("OGR_ARROW_MEM_LIMIT", nullptr);
32
0
    if (pszOGR_ARROW_MEM_LIMIT)
33
0
        nMemLimit = atoi(pszOGR_ARROW_MEM_LIMIT);
34
0
    else
35
0
    {
36
0
        const auto nUsableRAM = CPLGetUsablePhysicalRAM();
37
0
        if (nUsableRAM > 0 && static_cast<uint64_t>(nUsableRAM / 4) < nMemLimit)
38
0
            nMemLimit = static_cast<uint32_t>(nUsableRAM / 4);
39
0
    }
40
0
    return nMemLimit;
41
0
}
42
43
/************************************************************************/
44
/*                       GetMaxFeaturesInBatch()                        */
45
/************************************************************************/
46
47
/* static */
48
int OGRArrowArrayHelper::GetMaxFeaturesInBatch(
49
    const CPLStringList &aosArrowArrayStreamOptions)
50
0
{
51
0
    int l_nMaxBatchSize = atoi(aosArrowArrayStreamOptions.FetchNameValueDef(
52
0
        "MAX_FEATURES_IN_BATCH", "65536"));
53
0
    if (l_nMaxBatchSize <= 0)
54
0
        l_nMaxBatchSize = 1;
55
0
    if (l_nMaxBatchSize > INT_MAX - 1)
56
0
        l_nMaxBatchSize = INT_MAX - 1;
57
58
0
    return l_nMaxBatchSize;
59
0
}
60
61
/************************************************************************/
62
/*                       OGRArrowArrayHelper()                          */
63
/************************************************************************/
64
65
OGRArrowArrayHelper::OGRArrowArrayHelper(struct ArrowArray *out_array,
66
                                         int nMaxBatchSize)
67
0
    : m_nMaxBatchSize(nMaxBatchSize), m_out_array(out_array)
68
0
{
69
0
    m_anArrowFieldMaxAlloc.resize(static_cast<size_t>(out_array->n_children));
70
0
}
71
72
/************************************************************************/
73
/*                       OGRArrowArrayHelper()                          */
74
/************************************************************************/
75
76
OGRArrowArrayHelper::OGRArrowArrayHelper(
77
    GDALDataset *poDS, OGRFeatureDefn *poFeatureDefn,
78
    const CPLStringList &aosArrowArrayStreamOptions,
79
    struct ArrowArray *out_array)
80
0
    : m_bIncludeFID(CPLTestBool(
81
0
          aosArrowArrayStreamOptions.FetchNameValueDef("INCLUDE_FID", "YES"))),
82
0
      m_nMaxBatchSize(GetMaxFeaturesInBatch(aosArrowArrayStreamOptions)),
83
0
      m_nFieldCount(poFeatureDefn->GetFieldCount()),
84
0
      m_nGeomFieldCount(poFeatureDefn->GetGeomFieldCount()),
85
0
      m_out_array(out_array)
86
0
{
87
0
    memset(out_array, 0, sizeof(*out_array));
88
89
0
    m_mapOGRFieldToArrowField.resize(m_nFieldCount, -1);
90
0
    m_mapOGRGeomFieldToArrowField.resize(m_nGeomFieldCount, -1);
91
0
    m_abNullableFields.resize(m_nFieldCount);
92
0
    m_anTZFlags.resize(m_nFieldCount);
93
0
    int nTZFlagOverride = -1;
94
0
    const char *pszTZOverride =
95
0
        aosArrowArrayStreamOptions.FetchNameValue("TIMEZONE");
96
0
    if (pszTZOverride)
97
0
    {
98
0
        if (EQUAL(pszTZOverride, "unknown") || EQUAL(pszTZOverride, ""))
99
0
        {
100
0
            nTZFlagOverride = OGR_TZFLAG_UNKNOWN;
101
0
        }
102
0
        else
103
0
        {
104
            // we don't really care about the actual timezone, since we
105
            // will convert OGRField::Date to UTC in all cases
106
0
            nTZFlagOverride = OGR_TZFLAG_UTC;
107
0
        }
108
0
    }
109
0
    const bool bDateTimeAsString =
110
0
        aosArrowArrayStreamOptions.FetchBool(GAS_OPT_DATETIME_AS_STRING, false);
111
112
0
    if (m_bIncludeFID)
113
0
    {
114
0
        m_nChildren++;
115
0
    }
116
    // cppcheck-suppress knownConditionTrueFalse
117
0
    for (int i = 0; i < m_nFieldCount; i++)
118
0
    {
119
0
        const auto poFieldDefn = poFeatureDefn->GetFieldDefn(i);
120
0
        m_abNullableFields[i] = CPL_TO_BOOL(poFieldDefn->IsNullable());
121
0
        m_anTZFlags[i] =
122
0
            nTZFlagOverride >= 0 ? nTZFlagOverride : poFieldDefn->GetTZFlag();
123
0
        if (!poFieldDefn->IsIgnored())
124
0
        {
125
0
            m_mapOGRFieldToArrowField[i] = m_nChildren;
126
0
            m_nChildren++;
127
0
        }
128
0
    }
129
    // cppcheck-suppress knownConditionTrueFalse
130
0
    for (int i = 0; i < m_nGeomFieldCount; i++)
131
0
    {
132
0
        if (!poFeatureDefn->GetGeomFieldDefn(i)->IsIgnored())
133
0
        {
134
0
            m_mapOGRGeomFieldToArrowField[i] = m_nChildren;
135
0
            m_nChildren++;
136
0
        }
137
0
    }
138
139
0
    m_anArrowFieldMaxAlloc.resize(m_nChildren);
140
141
0
    out_array->release = OGRLayer::ReleaseArray;
142
143
0
    out_array->length = m_nMaxBatchSize;
144
0
    out_array->null_count = 0;
145
146
0
    out_array->n_children = m_nChildren;
147
0
    out_array->children = static_cast<struct ArrowArray **>(
148
0
        CPLCalloc(m_nChildren, sizeof(struct ArrowArray *)));
149
0
    out_array->release = OGRLayer::ReleaseArray;
150
0
    out_array->n_buffers = 1;
151
0
    out_array->buffers =
152
0
        static_cast<const void **>(CPLCalloc(1, sizeof(void *)));
153
154
    // Allocate buffers
155
156
0
    if (m_bIncludeFID)
157
0
    {
158
0
        out_array->children[0] = static_cast<struct ArrowArray *>(
159
0
            CPLCalloc(1, sizeof(struct ArrowArray)));
160
0
        auto psChild = out_array->children[0];
161
0
        psChild->release = OGRLayer::ReleaseArray;
162
0
        psChild->length = m_nMaxBatchSize;
163
0
        psChild->n_buffers = 2;
164
0
        psChild->buffers =
165
0
            static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
166
0
        m_panFIDValues = static_cast<int64_t *>(
167
0
            VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(int64_t) * m_nMaxBatchSize));
168
0
        if (m_panFIDValues == nullptr)
169
0
            goto error;
170
0
        psChild->buffers[1] = m_panFIDValues;
171
0
    }
172
173
    // cppcheck-suppress knownConditionTrueFalse
174
0
    for (int i = 0; i < m_nFieldCount; i++)
175
0
    {
176
0
        const int iArrowField = m_mapOGRFieldToArrowField[i];
177
0
        if (iArrowField >= 0)
178
0
        {
179
0
            const auto poFieldDefn = poFeatureDefn->GetFieldDefn(i);
180
0
            out_array->children[iArrowField] = static_cast<struct ArrowArray *>(
181
0
                CPLCalloc(1, sizeof(struct ArrowArray)));
182
0
            auto psChild = out_array->children[iArrowField];
183
184
0
            psChild->release = OGRLayer::ReleaseArray;
185
0
            psChild->length = m_nMaxBatchSize;
186
0
            const auto eSubType = poFieldDefn->GetSubType();
187
0
            size_t nEltSize = 0;
188
0
            switch (poFieldDefn->GetType())
189
0
            {
190
0
                case OFTInteger:
191
0
                {
192
0
                    if (eSubType == OFSTBoolean)
193
0
                    {
194
0
                        nEltSize = sizeof(uint8_t);
195
0
                    }
196
0
                    else if (eSubType == OFSTInt16)
197
0
                    {
198
0
                        nEltSize = sizeof(int16_t);
199
0
                    }
200
0
                    else
201
0
                    {
202
0
                        nEltSize = sizeof(int32_t);
203
0
                    }
204
205
0
                    const auto &osDomainName = poFieldDefn->GetDomainName();
206
0
                    if (!osDomainName.empty() && poDS != nullptr)
207
0
                    {
208
0
                        const auto poFieldDomain =
209
0
                            poDS->GetFieldDomain(osDomainName);
210
0
                        if (poFieldDomain &&
211
0
                            poFieldDomain->GetDomainType() == OFDT_CODED)
212
0
                        {
213
0
                            const OGRCodedFieldDomain *poCodedDomain =
214
0
                                static_cast<const OGRCodedFieldDomain *>(
215
0
                                    poFieldDomain);
216
0
                            FillDict(psChild, poCodedDomain);
217
0
                        }
218
0
                    }
219
220
0
                    break;
221
0
                }
222
0
                case OFTInteger64:
223
0
                {
224
0
                    nEltSize = sizeof(int64_t);
225
0
                    break;
226
0
                }
227
0
                case OFTReal:
228
0
                {
229
0
                    if (eSubType == OFSTFloat32)
230
0
                    {
231
0
                        nEltSize = sizeof(float);
232
0
                    }
233
0
                    else
234
0
                    {
235
0
                        nEltSize = sizeof(double);
236
0
                    }
237
0
                    break;
238
0
                }
239
240
0
                case OFTDateTime:
241
0
                {
242
0
                    if (!bDateTimeAsString)
243
0
                    {
244
0
                        nEltSize = sizeof(int64_t);
245
0
                        break;
246
0
                    }
247
0
                    else
248
0
                    {
249
0
                        [[fallthrough]];
250
0
                    }
251
0
                }
252
253
0
                case OFTString:
254
0
                case OFTBinary:
255
0
                {
256
0
                    psChild->n_buffers = 3;
257
0
                    psChild->buffers = static_cast<const void **>(
258
0
                        CPLCalloc(3, sizeof(void *)));
259
0
                    psChild->buffers[1] = VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
260
0
                        sizeof(uint32_t) * (1 + m_nMaxBatchSize));
261
0
                    if (psChild->buffers[1] == nullptr)
262
0
                        goto error;
263
0
                    memset(const_cast<void *>(psChild->buffers[1]), 0,
264
0
                           sizeof(uint32_t) * (1 + m_nMaxBatchSize));
265
0
                    constexpr size_t DEFAULT_STRING_SIZE = 10;
266
0
                    m_anArrowFieldMaxAlloc[iArrowField] =
267
0
                        DEFAULT_STRING_SIZE * m_nMaxBatchSize;
268
0
                    psChild->buffers[2] = VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
269
0
                        m_anArrowFieldMaxAlloc[iArrowField]);
270
0
                    if (psChild->buffers[2] == nullptr)
271
0
                        goto error;
272
0
                    break;
273
0
                }
274
275
0
                case OFTDate:
276
0
                {
277
0
                    nEltSize = sizeof(int32_t);
278
0
                    break;
279
0
                }
280
281
0
                case OFTTime:
282
0
                {
283
0
                    nEltSize = sizeof(int32_t);
284
0
                    break;
285
0
                }
286
287
0
                default:
288
0
                    break;
289
0
            }
290
291
0
            if (nEltSize != 0)
292
0
            {
293
0
                psChild->n_buffers = 2;
294
0
                psChild->buffers =
295
0
                    static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
296
0
                psChild->buffers[1] =
297
0
                    VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nEltSize * m_nMaxBatchSize);
298
0
                if (psChild->buffers[1] == nullptr)
299
0
                    goto error;
300
0
                memset(const_cast<void *>(psChild->buffers[1]), 0,
301
0
                       nEltSize * m_nMaxBatchSize);
302
0
            }
303
0
        }
304
0
    }
305
306
    // cppcheck-suppress knownConditionTrueFalse
307
0
    for (int i = 0; i < m_nGeomFieldCount; i++)
308
0
    {
309
0
        const int iArrowField = m_mapOGRGeomFieldToArrowField[i];
310
0
        if (iArrowField >= 0)
311
0
        {
312
0
            out_array->children[iArrowField] = static_cast<struct ArrowArray *>(
313
0
                CPLCalloc(1, sizeof(struct ArrowArray)));
314
0
            auto psChild = out_array->children[iArrowField];
315
316
0
            psChild->release = OGRLayer::ReleaseArray;
317
0
            psChild->length = m_nMaxBatchSize;
318
319
0
            psChild->n_buffers = 3;
320
0
            psChild->buffers =
321
0
                static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
322
0
            psChild->buffers[1] = VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
323
0
                sizeof(uint32_t) * (1 + m_nMaxBatchSize));
324
0
            if (psChild->buffers[1] == nullptr)
325
0
                goto error;
326
0
            memset(const_cast<void *>(psChild->buffers[1]), 0,
327
0
                   sizeof(uint32_t) * (1 + m_nMaxBatchSize));
328
0
            constexpr size_t DEFAULT_WKB_SIZE = 100;
329
0
            m_anArrowFieldMaxAlloc[iArrowField] =
330
0
                DEFAULT_WKB_SIZE * m_nMaxBatchSize;
331
0
            psChild->buffers[2] = VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
332
0
                m_anArrowFieldMaxAlloc[iArrowField]);
333
0
            if (psChild->buffers[2] == nullptr)
334
0
                goto error;
335
0
        }
336
0
    }
337
338
0
    return;
339
340
0
error:
341
0
    out_array->release(out_array);
342
0
    memset(out_array, 0, sizeof(*out_array));
343
0
}
344
345
/************************************************************************/
346
/*                             FillDict()                               */
347
/************************************************************************/
348
349
/* static */
350
bool OGRArrowArrayHelper::FillDict(struct ArrowArray *psChild,
351
                                   const OGRCodedFieldDomain *poCodedDomain)
352
0
{
353
0
    int nLastCode = -1;
354
0
    uint32_t nCountChars = 0;
355
0
    int nCountNull = 0;
356
0
    for (const OGRCodedValue *psIter = poCodedDomain->GetEnumeration();
357
0
         psIter->pszCode; ++psIter)
358
0
    {
359
0
        if (CPLGetValueType(psIter->pszCode) != CPL_VALUE_INTEGER)
360
0
        {
361
0
            return false;
362
0
        }
363
0
        int nCode = atoi(psIter->pszCode);
364
0
        if (nCode <= nLastCode || nCode - nLastCode > 100)
365
0
        {
366
0
            return false;
367
0
        }
368
0
        for (int i = nLastCode + 1; i < nCode; ++i)
369
0
        {
370
0
            nCountNull++;
371
0
        }
372
0
        if (psIter->pszValue)
373
0
        {
374
0
            const size_t nLen = strlen(psIter->pszValue);
375
0
            if (nLen > std::numeric_limits<uint32_t>::max() - nCountChars)
376
0
                return false;
377
0
            nCountChars += static_cast<uint32_t>(nLen);
378
0
        }
379
0
        else
380
0
        {
381
0
            nCountNull++;
382
0
        }
383
0
        nLastCode = nCode;
384
0
    }
385
0
    const int nLength = 1 + nLastCode;
386
387
0
    auto psDict = static_cast<struct ArrowArray *>(
388
0
        CPLCalloc(1, sizeof(struct ArrowArray)));
389
0
    psChild->dictionary = psDict;
390
391
0
    psDict->release = OGRLayer::ReleaseArray;
392
0
    psDict->length = nLength;
393
0
    psDict->n_buffers = 3;
394
0
    psDict->buffers = static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
395
0
    psDict->null_count = nCountNull;
396
0
    uint8_t *pabyNull = nullptr;
397
0
    if (nCountNull)
398
0
    {
399
0
        pabyNull = static_cast<uint8_t *>(
400
0
            VSI_MALLOC_ALIGNED_AUTO_VERBOSE((nLength + 7) / 8));
401
0
        if (pabyNull == nullptr)
402
0
        {
403
0
            psDict->release(psDict);
404
0
            CPLFree(psDict);
405
0
            psChild->dictionary = nullptr;
406
0
            return false;
407
0
        }
408
0
        memset(pabyNull, 0xFF, (nLength + 7) / 8);
409
0
        psDict->buffers[0] = pabyNull;
410
0
    }
411
412
0
    uint32_t *panOffsets = static_cast<uint32_t *>(
413
0
        VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(uint32_t) * (1 + nLength)));
414
0
    if (panOffsets == nullptr)
415
0
    {
416
0
        psDict->release(psDict);
417
0
        CPLFree(psDict);
418
0
        psChild->dictionary = nullptr;
419
0
        return false;
420
0
    }
421
0
    psDict->buffers[1] = panOffsets;
422
423
0
    char *pachValues =
424
0
        static_cast<char *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nCountChars));
425
0
    if (pachValues == nullptr)
426
0
    {
427
0
        psDict->release(psDict);
428
0
        CPLFree(psDict);
429
0
        psChild->dictionary = nullptr;
430
0
        return false;
431
0
    }
432
0
    psDict->buffers[2] = pachValues;
433
434
0
    nLastCode = -1;
435
0
    uint32_t nOffset = 0;
436
0
    for (const OGRCodedValue *psIter = poCodedDomain->GetEnumeration();
437
0
         psIter->pszCode; ++psIter)
438
0
    {
439
0
        if (CPLGetValueType(psIter->pszCode) != CPL_VALUE_INTEGER)
440
0
        {
441
0
            psDict->release(psDict);
442
0
            CPLFree(psDict);
443
0
            psChild->dictionary = nullptr;
444
0
            return false;
445
0
        }
446
0
        int nCode = atoi(psIter->pszCode);
447
0
        if (nCode <= nLastCode || nCode - nLastCode > 100)
448
0
        {
449
0
            psDict->release(psDict);
450
0
            CPLFree(psDict);
451
0
            psChild->dictionary = nullptr;
452
0
            return false;
453
0
        }
454
0
        for (int i = nLastCode + 1; i < nCode; ++i)
455
0
        {
456
0
            panOffsets[i] = nOffset;
457
0
            if (pabyNull)
458
0
                pabyNull[i / 8] &= static_cast<uint8_t>(
459
0
                    ~(1 << (static_cast<unsigned>(i) % 8)));
460
0
        }
461
0
        panOffsets[nCode] = nOffset;
462
0
        if (psIter->pszValue)
463
0
        {
464
0
            const size_t nLen = strlen(psIter->pszValue);
465
0
            memcpy(pachValues + nOffset, psIter->pszValue, nLen);
466
0
            nOffset += static_cast<uint32_t>(nLen);
467
0
        }
468
0
        else if (pabyNull)
469
0
        {
470
0
            pabyNull[nCode / 8] &= static_cast<uint8_t>(~(1 << (nCode % 8)));
471
0
        }
472
0
        nLastCode = nCode;
473
0
    }
474
0
    panOffsets[nLength] = nOffset;
475
476
0
    return true;
477
0
}
478
479
//! @endcond