Coverage Report

Created: 2026-02-14 06:52

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/gdal/ogr/ogrsf_frmts/generic/ograrrowarrayhelper.cpp
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Project:  OpenGIS Simple Features Reference Implementation
4
 * Purpose:  Helper to fill ArrowArray
5
 * Author:   Even Rouault <even dot rouault at spatialys.com>
6
 *
7
 ******************************************************************************
8
 * Copyright (c) 2022, Even Rouault <even dot rouault at spatialys.com>
9
 *
10
 * SPDX-License-Identifier: MIT
11
 ****************************************************************************/
12
13
#include "ograrrowarrayhelper.h"
14
#include "ogrlayerarrow.h"
15
#include "ogr_p.h"
16
17
#include <limits>
18
19
//! @cond Doxygen_Suppress
20
21
/************************************************************************/
22
/*                            GetMemLimit()                             */
23
/************************************************************************/
24
25
/*static*/ uint32_t OGRArrowArrayHelper::GetMemLimit()
26
0
{
27
0
    uint32_t nMemLimit =
28
0
        static_cast<uint32_t>(std::numeric_limits<int32_t>::max());
29
    // Just for tests
30
0
    const char *pszOGR_ARROW_MEM_LIMIT =
31
0
        CPLGetConfigOption("OGR_ARROW_MEM_LIMIT", nullptr);
32
0
    if (pszOGR_ARROW_MEM_LIMIT)
33
0
        nMemLimit = atoi(pszOGR_ARROW_MEM_LIMIT);
34
0
    else
35
0
    {
36
0
        const auto nUsableRAM = CPLGetUsablePhysicalRAM();
37
0
        if (nUsableRAM > 0 && static_cast<uint64_t>(nUsableRAM / 4) < nMemLimit)
38
0
            nMemLimit = static_cast<uint32_t>(nUsableRAM / 4);
39
0
    }
40
0
    return nMemLimit;
41
0
}
42
43
/************************************************************************/
44
/*                       GetMaxFeaturesInBatch()                        */
45
/************************************************************************/
46
47
/* static */
48
int OGRArrowArrayHelper::GetMaxFeaturesInBatch(
49
    const CPLStringList &aosArrowArrayStreamOptions)
50
0
{
51
0
    int l_nMaxBatchSize = atoi(aosArrowArrayStreamOptions.FetchNameValueDef(
52
0
        "MAX_FEATURES_IN_BATCH", "65536"));
53
0
    if (l_nMaxBatchSize <= 0)
54
0
        l_nMaxBatchSize = 1;
55
0
    if (l_nMaxBatchSize > INT_MAX - 1)
56
0
        l_nMaxBatchSize = INT_MAX - 1;
57
58
0
    return l_nMaxBatchSize;
59
0
}
60
61
/************************************************************************/
62
/*                        OGRArrowArrayHelper()                         */
63
/************************************************************************/
64
65
OGRArrowArrayHelper::OGRArrowArrayHelper(struct ArrowArray *out_array,
66
                                         int nMaxBatchSize)
67
0
    : m_nMaxBatchSize(nMaxBatchSize), m_out_array(out_array)
68
0
{
69
0
    m_anArrowFieldMaxAlloc.resize(static_cast<size_t>(out_array->n_children));
70
0
}
71
72
/************************************************************************/
73
/*                        OGRArrowArrayHelper()                         */
74
/************************************************************************/
75
76
OGRArrowArrayHelper::OGRArrowArrayHelper(
77
    GDALDataset *poDS, OGRFeatureDefn *poFeatureDefn,
78
    const CPLStringList &aosArrowArrayStreamOptions,
79
    struct ArrowArray *out_array)
80
0
    : m_bIncludeFID(CPLTestBool(
81
0
          aosArrowArrayStreamOptions.FetchNameValueDef("INCLUDE_FID", "YES"))),
82
0
      m_nMaxBatchSize(GetMaxFeaturesInBatch(aosArrowArrayStreamOptions)),
83
0
      m_nFieldCount(poFeatureDefn->GetFieldCount()),
84
0
      m_nGeomFieldCount(poFeatureDefn->GetGeomFieldCount()),
85
0
      m_out_array(out_array)
86
0
{
87
0
    memset(out_array, 0, sizeof(*out_array));
88
89
0
    m_mapOGRFieldToArrowField.resize(m_nFieldCount, -1);
90
0
    m_mapOGRGeomFieldToArrowField.resize(m_nGeomFieldCount, -1);
91
0
    m_abNullableFields.resize(m_nFieldCount);
92
0
    m_anTZFlags.resize(m_nFieldCount);
93
0
    int nTZFlagOverride = -1;
94
0
    const char *pszTZOverride =
95
0
        aosArrowArrayStreamOptions.FetchNameValue("TIMEZONE");
96
0
    if (pszTZOverride)
97
0
    {
98
0
        if (EQUAL(pszTZOverride, "unknown") || EQUAL(pszTZOverride, ""))
99
0
        {
100
0
            nTZFlagOverride = OGR_TZFLAG_UNKNOWN;
101
0
        }
102
0
        else if (EQUAL(pszTZOverride, "mixed"))
103
0
        {
104
0
            nTZFlagOverride = OGR_TZFLAG_MIXED_TZ;
105
0
        }
106
0
        else
107
0
        {
108
            // we don't really care about the actual timezone, since we
109
            // will convert OGRField::Date to UTC in all cases
110
0
            nTZFlagOverride = OGR_TZFLAG_UTC;
111
0
        }
112
0
    }
113
0
    const bool bDateTimeAsString =
114
0
        aosArrowArrayStreamOptions.FetchBool(GAS_OPT_DATETIME_AS_STRING, false);
115
116
0
    if (m_bIncludeFID)
117
0
    {
118
0
        m_nChildren++;
119
0
    }
120
    // cppcheck-suppress knownConditionTrueFalse
121
0
    for (int i = 0; i < m_nFieldCount; i++)
122
0
    {
123
0
        const auto poFieldDefn = poFeatureDefn->GetFieldDefn(i);
124
0
        m_abNullableFields[i] = CPL_TO_BOOL(poFieldDefn->IsNullable());
125
0
        m_anTZFlags[i] =
126
0
            nTZFlagOverride >= 0 ? nTZFlagOverride : poFieldDefn->GetTZFlag();
127
0
        if (!poFieldDefn->IsIgnored())
128
0
        {
129
0
            m_mapOGRFieldToArrowField[i] = m_nChildren;
130
0
            m_nChildren++;
131
0
        }
132
0
    }
133
    // cppcheck-suppress knownConditionTrueFalse
134
0
    for (int i = 0; i < m_nGeomFieldCount; i++)
135
0
    {
136
0
        if (!poFeatureDefn->GetGeomFieldDefn(i)->IsIgnored())
137
0
        {
138
0
            m_mapOGRGeomFieldToArrowField[i] = m_nChildren;
139
0
            m_nChildren++;
140
0
        }
141
0
    }
142
143
0
    m_anArrowFieldMaxAlloc.resize(m_nChildren);
144
145
0
    out_array->release = OGRLayer::ReleaseArray;
146
147
0
    out_array->length = m_nMaxBatchSize;
148
0
    out_array->null_count = 0;
149
150
0
    out_array->n_children = m_nChildren;
151
0
    out_array->children = static_cast<struct ArrowArray **>(
152
0
        CPLCalloc(m_nChildren, sizeof(struct ArrowArray *)));
153
0
    out_array->release = OGRLayer::ReleaseArray;
154
0
    out_array->n_buffers = 1;
155
0
    out_array->buffers =
156
0
        static_cast<const void **>(CPLCalloc(1, sizeof(void *)));
157
158
    // Allocate buffers
159
160
0
    if (m_bIncludeFID)
161
0
    {
162
0
        out_array->children[0] = static_cast<struct ArrowArray *>(
163
0
            CPLCalloc(1, sizeof(struct ArrowArray)));
164
0
        auto psChild = out_array->children[0];
165
0
        psChild->release = OGRLayer::ReleaseArray;
166
0
        psChild->length = m_nMaxBatchSize;
167
0
        psChild->n_buffers = 2;
168
0
        psChild->buffers =
169
0
            static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
170
0
        m_panFIDValues = static_cast<int64_t *>(
171
0
            VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(int64_t) * m_nMaxBatchSize));
172
0
        if (m_panFIDValues == nullptr)
173
0
            goto error;
174
0
        psChild->buffers[1] = m_panFIDValues;
175
0
    }
176
177
    // cppcheck-suppress knownConditionTrueFalse
178
0
    for (int i = 0; i < m_nFieldCount; i++)
179
0
    {
180
0
        const int iArrowField = m_mapOGRFieldToArrowField[i];
181
0
        if (iArrowField >= 0)
182
0
        {
183
0
            const auto poFieldDefn = poFeatureDefn->GetFieldDefn(i);
184
0
            out_array->children[iArrowField] = static_cast<struct ArrowArray *>(
185
0
                CPLCalloc(1, sizeof(struct ArrowArray)));
186
0
            auto psChild = out_array->children[iArrowField];
187
188
0
            psChild->release = OGRLayer::ReleaseArray;
189
0
            psChild->length = m_nMaxBatchSize;
190
0
            const auto eSubType = poFieldDefn->GetSubType();
191
0
            size_t nEltSize = 0;
192
0
            switch (poFieldDefn->GetType())
193
0
            {
194
0
                case OFTInteger:
195
0
                {
196
0
                    if (eSubType == OFSTBoolean)
197
0
                    {
198
0
                        nEltSize = sizeof(uint8_t);
199
0
                    }
200
0
                    else if (eSubType == OFSTInt16)
201
0
                    {
202
0
                        nEltSize = sizeof(int16_t);
203
0
                    }
204
0
                    else
205
0
                    {
206
0
                        nEltSize = sizeof(int32_t);
207
0
                    }
208
209
0
                    const auto &osDomainName = poFieldDefn->GetDomainName();
210
0
                    if (!osDomainName.empty() && poDS != nullptr)
211
0
                    {
212
0
                        const auto poFieldDomain =
213
0
                            poDS->GetFieldDomain(osDomainName);
214
0
                        if (poFieldDomain &&
215
0
                            poFieldDomain->GetDomainType() == OFDT_CODED)
216
0
                        {
217
0
                            const OGRCodedFieldDomain *poCodedDomain =
218
0
                                static_cast<const OGRCodedFieldDomain *>(
219
0
                                    poFieldDomain);
220
0
                            FillDict(psChild, poCodedDomain);
221
0
                        }
222
0
                    }
223
224
0
                    break;
225
0
                }
226
0
                case OFTInteger64:
227
0
                {
228
0
                    nEltSize = sizeof(int64_t);
229
0
                    break;
230
0
                }
231
0
                case OFTReal:
232
0
                {
233
0
                    if (eSubType == OFSTFloat32)
234
0
                    {
235
0
                        nEltSize = sizeof(float);
236
0
                    }
237
0
                    else
238
0
                    {
239
0
                        nEltSize = sizeof(double);
240
0
                    }
241
0
                    break;
242
0
                }
243
244
0
                case OFTDateTime:
245
0
                {
246
0
                    if (!bDateTimeAsString)
247
0
                    {
248
0
                        if (m_anTZFlags[i] == OGR_TZFLAG_MIXED_TZ)
249
0
                        {
250
0
                            psChild->n_buffers = 1;
251
0
                            psChild->buffers = static_cast<const void **>(
252
0
                                CPLCalloc(1, sizeof(void *)));
253
254
0
                            psChild->n_children = 2;
255
0
                            psChild->children =
256
0
                                static_cast<struct ArrowArray **>(
257
0
                                    CPLCalloc(2, sizeof(struct ArrowArray *)));
258
259
                            // Create sub-child for timestamp in UTC
260
0
                            psChild->children[0] =
261
0
                                static_cast<struct ArrowArray *>(
262
0
                                    CPLCalloc(1, sizeof(struct ArrowArray)));
263
0
                            psChild->children[0]->release =
264
0
                                OGRLayer::ReleaseArray;
265
0
                            psChild->children[0]->length = m_nMaxBatchSize;
266
0
                            psChild->children[0]->n_buffers = 2;
267
0
                            psChild->children[0]->buffers =
268
0
                                static_cast<const void **>(
269
0
                                    CPLCalloc(2, sizeof(void *)));
270
0
                            psChild->children[0]->buffers[1] =
271
0
                                VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
272
0
                                    sizeof(int64_t) * m_nMaxBatchSize);
273
0
                            if (psChild->children[0]->buffers[1] == nullptr)
274
0
                                goto error;
275
0
                            memset(const_cast<void *>(
276
0
                                       psChild->children[0]->buffers[1]),
277
0
                                   0, sizeof(int64_t) * m_nMaxBatchSize);
278
279
                            // Create sub-child for offset to UTC in minutes
280
0
                            psChild->children[1] =
281
0
                                static_cast<struct ArrowArray *>(
282
0
                                    CPLCalloc(1, sizeof(struct ArrowArray)));
283
0
                            psChild->children[1]->release =
284
0
                                OGRLayer::ReleaseArray;
285
0
                            psChild->children[1]->length = m_nMaxBatchSize;
286
0
                            psChild->children[1]->n_buffers = 2;
287
0
                            psChild->children[1]->buffers =
288
0
                                static_cast<const void **>(
289
0
                                    CPLCalloc(2, sizeof(void *)));
290
0
                            psChild->children[1]->buffers[1] =
291
0
                                VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
292
0
                                    sizeof(int16_t) * m_nMaxBatchSize);
293
0
                            if (psChild->children[1]->buffers[1] == nullptr)
294
0
                                goto error;
295
0
                            memset(const_cast<void *>(
296
0
                                       psChild->children[1]->buffers[1]),
297
0
                                   0, sizeof(int16_t) * m_nMaxBatchSize);
298
0
                        }
299
0
                        else
300
0
                        {
301
0
                            nEltSize = sizeof(int64_t);
302
0
                        }
303
0
                        break;
304
0
                    }
305
0
                    else
306
0
                    {
307
0
                        [[fallthrough]];
308
0
                    }
309
0
                }
310
311
0
                case OFTString:
312
0
                case OFTBinary:
313
0
                {
314
0
                    psChild->n_buffers = 3;
315
0
                    psChild->buffers = static_cast<const void **>(
316
0
                        CPLCalloc(3, sizeof(void *)));
317
0
                    psChild->buffers[1] = VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
318
0
                        sizeof(uint32_t) * (1 + m_nMaxBatchSize));
319
0
                    if (psChild->buffers[1] == nullptr)
320
0
                        goto error;
321
0
                    memset(const_cast<void *>(psChild->buffers[1]), 0,
322
0
                           sizeof(uint32_t) * (1 + m_nMaxBatchSize));
323
0
                    constexpr size_t DEFAULT_STRING_SIZE = 10;
324
0
                    m_anArrowFieldMaxAlloc[iArrowField] =
325
0
                        DEFAULT_STRING_SIZE * m_nMaxBatchSize;
326
0
                    psChild->buffers[2] = VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
327
0
                        m_anArrowFieldMaxAlloc[iArrowField]);
328
0
                    if (psChild->buffers[2] == nullptr)
329
0
                        goto error;
330
0
                    break;
331
0
                }
332
333
0
                case OFTDate:
334
0
                {
335
0
                    nEltSize = sizeof(int32_t);
336
0
                    break;
337
0
                }
338
339
0
                case OFTTime:
340
0
                {
341
0
                    nEltSize = sizeof(int32_t);
342
0
                    break;
343
0
                }
344
345
0
                default:
346
0
                    break;
347
0
            }
348
349
0
            if (nEltSize != 0)
350
0
            {
351
0
                psChild->n_buffers = 2;
352
0
                psChild->buffers =
353
0
                    static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
354
0
                psChild->buffers[1] =
355
0
                    VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nEltSize * m_nMaxBatchSize);
356
0
                if (psChild->buffers[1] == nullptr)
357
0
                    goto error;
358
0
                memset(const_cast<void *>(psChild->buffers[1]), 0,
359
0
                       nEltSize * m_nMaxBatchSize);
360
0
            }
361
0
        }
362
0
    }
363
364
    // cppcheck-suppress knownConditionTrueFalse
365
0
    for (int i = 0; i < m_nGeomFieldCount; i++)
366
0
    {
367
0
        const int iArrowField = m_mapOGRGeomFieldToArrowField[i];
368
0
        if (iArrowField >= 0)
369
0
        {
370
0
            out_array->children[iArrowField] = static_cast<struct ArrowArray *>(
371
0
                CPLCalloc(1, sizeof(struct ArrowArray)));
372
0
            auto psChild = out_array->children[iArrowField];
373
374
0
            psChild->release = OGRLayer::ReleaseArray;
375
0
            psChild->length = m_nMaxBatchSize;
376
377
0
            psChild->n_buffers = 3;
378
0
            psChild->buffers =
379
0
                static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
380
0
            psChild->buffers[1] = VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
381
0
                sizeof(uint32_t) * (1 + m_nMaxBatchSize));
382
0
            if (psChild->buffers[1] == nullptr)
383
0
                goto error;
384
0
            memset(const_cast<void *>(psChild->buffers[1]), 0,
385
0
                   sizeof(uint32_t) * (1 + m_nMaxBatchSize));
386
0
            constexpr size_t DEFAULT_WKB_SIZE = 100;
387
0
            m_anArrowFieldMaxAlloc[iArrowField] =
388
0
                DEFAULT_WKB_SIZE * m_nMaxBatchSize;
389
0
            psChild->buffers[2] = VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
390
0
                m_anArrowFieldMaxAlloc[iArrowField]);
391
0
            if (psChild->buffers[2] == nullptr)
392
0
                goto error;
393
0
        }
394
0
    }
395
396
0
    return;
397
398
0
error:
399
0
    out_array->release(out_array);
400
0
    memset(out_array, 0, sizeof(*out_array));
401
0
}
402
403
/************************************************************************/
404
/*                              FillDict()                              */
405
/************************************************************************/
406
407
/* static */
408
bool OGRArrowArrayHelper::FillDict(struct ArrowArray *psChild,
409
                                   const OGRCodedFieldDomain *poCodedDomain)
410
0
{
411
0
    int nLastCode = -1;
412
0
    uint32_t nCountChars = 0;
413
0
    int nCountNull = 0;
414
0
    for (const OGRCodedValue *psIter = poCodedDomain->GetEnumeration();
415
0
         psIter->pszCode; ++psIter)
416
0
    {
417
0
        if (CPLGetValueType(psIter->pszCode) != CPL_VALUE_INTEGER)
418
0
        {
419
0
            return false;
420
0
        }
421
0
        int nCode = atoi(psIter->pszCode);
422
0
        if (nCode <= nLastCode || nCode - nLastCode > 100)
423
0
        {
424
0
            return false;
425
0
        }
426
0
        for (int i = nLastCode + 1; i < nCode; ++i)
427
0
        {
428
0
            nCountNull++;
429
0
        }
430
0
        if (psIter->pszValue)
431
0
        {
432
0
            const size_t nLen = strlen(psIter->pszValue);
433
0
            if (nLen > std::numeric_limits<uint32_t>::max() - nCountChars)
434
0
                return false;
435
0
            nCountChars += static_cast<uint32_t>(nLen);
436
0
        }
437
0
        else
438
0
        {
439
0
            nCountNull++;
440
0
        }
441
0
        nLastCode = nCode;
442
0
    }
443
0
    const int nLength = 1 + nLastCode;
444
445
0
    auto psDict = static_cast<struct ArrowArray *>(
446
0
        CPLCalloc(1, sizeof(struct ArrowArray)));
447
0
    psChild->dictionary = psDict;
448
449
0
    psDict->release = OGRLayer::ReleaseArray;
450
0
    psDict->length = nLength;
451
0
    psDict->n_buffers = 3;
452
0
    psDict->buffers = static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
453
0
    psDict->null_count = nCountNull;
454
0
    uint8_t *pabyNull = nullptr;
455
0
    if (nCountNull)
456
0
    {
457
0
        pabyNull = static_cast<uint8_t *>(
458
0
            VSI_MALLOC_ALIGNED_AUTO_VERBOSE((nLength + 7) / 8));
459
0
        if (pabyNull == nullptr)
460
0
        {
461
0
            psDict->release(psDict);
462
0
            CPLFree(psDict);
463
0
            psChild->dictionary = nullptr;
464
0
            return false;
465
0
        }
466
0
        memset(pabyNull, 0xFF, (nLength + 7) / 8);
467
0
        psDict->buffers[0] = pabyNull;
468
0
    }
469
470
0
    uint32_t *panOffsets = static_cast<uint32_t *>(
471
0
        VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(uint32_t) * (1 + nLength)));
472
0
    if (panOffsets == nullptr)
473
0
    {
474
0
        psDict->release(psDict);
475
0
        CPLFree(psDict);
476
0
        psChild->dictionary = nullptr;
477
0
        return false;
478
0
    }
479
0
    psDict->buffers[1] = panOffsets;
480
481
0
    char *pachValues =
482
0
        static_cast<char *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nCountChars));
483
0
    if (pachValues == nullptr)
484
0
    {
485
0
        psDict->release(psDict);
486
0
        CPLFree(psDict);
487
0
        psChild->dictionary = nullptr;
488
0
        return false;
489
0
    }
490
0
    psDict->buffers[2] = pachValues;
491
492
0
    nLastCode = -1;
493
0
    uint32_t nOffset = 0;
494
0
    for (const OGRCodedValue *psIter = poCodedDomain->GetEnumeration();
495
0
         psIter->pszCode; ++psIter)
496
0
    {
497
0
        if (CPLGetValueType(psIter->pszCode) != CPL_VALUE_INTEGER)
498
0
        {
499
0
            psDict->release(psDict);
500
0
            CPLFree(psDict);
501
0
            psChild->dictionary = nullptr;
502
0
            return false;
503
0
        }
504
0
        int nCode = atoi(psIter->pszCode);
505
0
        if (nCode <= nLastCode || nCode - nLastCode > 100)
506
0
        {
507
0
            psDict->release(psDict);
508
0
            CPLFree(psDict);
509
0
            psChild->dictionary = nullptr;
510
0
            return false;
511
0
        }
512
0
        for (int i = nLastCode + 1; i < nCode; ++i)
513
0
        {
514
0
            panOffsets[i] = nOffset;
515
0
            if (pabyNull)
516
0
                pabyNull[i / 8] &= static_cast<uint8_t>(
517
0
                    ~(1 << (static_cast<unsigned>(i) % 8)));
518
0
        }
519
0
        panOffsets[nCode] = nOffset;
520
0
        if (psIter->pszValue)
521
0
        {
522
0
            const size_t nLen = strlen(psIter->pszValue);
523
0
            memcpy(pachValues + nOffset, psIter->pszValue, nLen);
524
0
            nOffset += static_cast<uint32_t>(nLen);
525
0
        }
526
0
        else if (pabyNull)
527
0
        {
528
0
            pabyNull[nCode / 8] &= static_cast<uint8_t>(~(1 << (nCode % 8)));
529
0
        }
530
0
        nLastCode = nCode;
531
0
    }
532
0
    panOffsets[nLength] = nOffset;
533
534
0
    return true;
535
0
}
536
537
//! @endcond