Coverage Report

Created: 2026-02-14 06:52

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/gdal/ogr/ogrsf_frmts/generic/ogrlayerarrow.cpp
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Project:  OpenGIS Simple Features Reference Implementation
4
 * Purpose:  Parts of OGRLayer dealing with Arrow C interface
5
 * Author:   Even Rouault, <even dot rouault at spatialys.com>
6
 *
7
 ******************************************************************************
8
 * Copyright (c) 2022-2023, Even Rouault <even dot rouault at spatialys.com>
9
 *
10
 * SPDX-License-Identifier: MIT
11
 ****************************************************************************/
12
13
#include "ogrsf_frmts.h"
14
#include "ogr_api.h"
15
#include "ogr_recordbatch.h"
16
#include "ograrrowarrayhelper.h"
17
#include "ogrlayerarrow.h"
18
#include "ogr_p.h"
19
#include "ogr_swq.h"
20
#include "ogr_wkb.h"
21
#include "ogr_p.h"
22
#include "ogrlayer_private.h"
23
24
#include "cpl_float.h"
25
#include "cpl_json.h"
26
#include "cpl_time.h"
27
28
#include <algorithm>
29
#include <cassert>
30
#include <cinttypes>
31
#include <limits>
32
#include <utility>
33
#include <set>
34
35
constexpr const char *MD_GDAL_OGR_TYPE = "GDAL:OGR:type";
36
constexpr const char *MD_GDAL_OGR_ALTERNATIVE_NAME =
37
    "GDAL:OGR:alternative_name";
38
constexpr const char *MD_GDAL_OGR_COMMENT = "GDAL:OGR:comment";
39
constexpr const char *MD_GDAL_OGR_DEFAULT = "GDAL:OGR:default";
40
constexpr const char *MD_GDAL_OGR_SUBTYPE = "GDAL:OGR:subtype";
41
constexpr const char *MD_GDAL_OGR_WIDTH = "GDAL:OGR:width";
42
constexpr const char *MD_GDAL_OGR_UNIQUE = "GDAL:OGR:unique";
43
constexpr const char *MD_GDAL_OGR_DOMAIN_NAME = "GDAL:OGR:domain_name";
44
45
constexpr char ARROW_LETTER_BOOLEAN = 'b';
46
constexpr char ARROW_LETTER_INT8 = 'c';
47
constexpr char ARROW_LETTER_UINT8 = 'C';
48
constexpr char ARROW_LETTER_INT16 = 's';
49
constexpr char ARROW_LETTER_UINT16 = 'S';
50
constexpr char ARROW_LETTER_INT32 = 'i';
51
constexpr char ARROW_LETTER_UINT32 = 'I';
52
constexpr char ARROW_LETTER_INT64 = 'l';
53
constexpr char ARROW_LETTER_UINT64 = 'L';
54
constexpr char ARROW_LETTER_FLOAT16 = 'e';
55
constexpr char ARROW_LETTER_FLOAT32 = 'f';
56
constexpr char ARROW_LETTER_FLOAT64 = 'g';
57
constexpr char ARROW_LETTER_STRING = 'u';
58
constexpr char ARROW_LETTER_LARGE_STRING = 'U';
59
constexpr char ARROW_LETTER_BINARY = 'z';
60
constexpr char ARROW_LETTER_LARGE_BINARY = 'Z';
61
constexpr char ARROW_LETTER_DECIMAL = 'd';
62
constexpr char ARROW_2ND_LETTER_LIST = 'l';
63
constexpr char ARROW_2ND_LETTER_LARGE_LIST = 'L';
64
65
static inline bool IsStructure(const char *format)
66
0
{
67
0
    return format[0] == '+' && format[1] == 's' && format[2] == 0;
68
0
}
69
70
static inline bool IsMap(const char *format)
71
0
{
72
0
    return format[0] == '+' && format[1] == 'm' && format[2] == 0;
73
0
}
74
75
static inline bool IsFixedWidthBinary(const char *format)
76
0
{
77
0
    return format[0] == 'w' && format[1] == ':';
78
0
}
79
80
static inline int GetFixedWithBinary(const char *format)
81
0
{
82
0
    return atoi(format + strlen("w:"));
83
0
}
84
85
static inline bool IsList(const char *format)
86
0
{
87
0
    return format[0] == '+' && format[1] == ARROW_2ND_LETTER_LIST &&
88
0
           format[2] == 0;
89
0
}
90
91
static inline bool IsLargeList(const char *format)
92
0
{
93
0
    return format[0] == '+' && format[1] == ARROW_2ND_LETTER_LARGE_LIST &&
94
0
           format[2] == 0;
95
0
}
96
97
static inline bool IsFixedSizeList(const char *format)
98
0
{
99
0
    return format[0] == '+' && format[1] == 'w' && format[2] == ':';
100
0
}
101
102
static inline int GetFixedSizeList(const char *format)
103
0
{
104
0
    return atoi(format + strlen("+w:"));
105
0
}
106
107
static inline bool IsDecimal(const char *format)
108
0
{
109
0
    return format[0] == ARROW_LETTER_DECIMAL && format[1] == ':';
110
0
}
111
112
static inline bool IsBoolean(const char *format)
113
0
{
114
0
    return format[0] == ARROW_LETTER_BOOLEAN && format[1] == 0;
115
0
}
116
117
static inline bool IsInt8(const char *format)
118
0
{
119
0
    return format[0] == ARROW_LETTER_INT8 && format[1] == 0;
120
0
}
121
122
static inline bool IsUInt8(const char *format)
123
0
{
124
0
    return format[0] == ARROW_LETTER_UINT8 && format[1] == 0;
125
0
}
126
127
static inline bool IsInt16(const char *format)
128
0
{
129
0
    return format[0] == ARROW_LETTER_INT16 && format[1] == 0;
130
0
}
131
132
static inline bool IsUInt16(const char *format)
133
0
{
134
0
    return format[0] == ARROW_LETTER_UINT16 && format[1] == 0;
135
0
}
136
137
static inline bool IsInt32(const char *format)
138
0
{
139
0
    return format[0] == ARROW_LETTER_INT32 && format[1] == 0;
140
0
}
141
142
static inline bool IsUInt32(const char *format)
143
0
{
144
0
    return format[0] == ARROW_LETTER_UINT32 && format[1] == 0;
145
0
}
146
147
static inline bool IsInt64(const char *format)
148
0
{
149
0
    return format[0] == ARROW_LETTER_INT64 && format[1] == 0;
150
0
}
151
152
static inline bool IsUInt64(const char *format)
153
0
{
154
0
    return format[0] == ARROW_LETTER_UINT64 && format[1] == 0;
155
0
}
156
157
static inline bool IsFloat16(const char *format)
158
0
{
159
0
    return format[0] == ARROW_LETTER_FLOAT16 && format[1] == 0;
160
0
}
161
162
static inline bool IsFloat32(const char *format)
163
0
{
164
0
    return format[0] == ARROW_LETTER_FLOAT32 && format[1] == 0;
165
0
}
166
167
static inline bool IsFloat64(const char *format)
168
0
{
169
0
    return format[0] == ARROW_LETTER_FLOAT64 && format[1] == 0;
170
0
}
171
172
static inline bool IsString(const char *format)
173
0
{
174
0
    return format[0] == ARROW_LETTER_STRING && format[1] == 0;
175
0
}
176
177
static inline bool IsLargeString(const char *format)
178
0
{
179
0
    return format[0] == ARROW_LETTER_LARGE_STRING && format[1] == 0;
180
0
}
181
182
static inline bool IsBinary(const char *format)
183
0
{
184
0
    return format[0] == ARROW_LETTER_BINARY && format[1] == 0;
185
0
}
186
187
static inline bool IsLargeBinary(const char *format)
188
0
{
189
0
    return format[0] == ARROW_LETTER_LARGE_BINARY && format[1] == 0;
190
0
}
191
192
static inline bool IsTimestampInternal(const char *format, char chType)
193
0
{
194
0
    return format[0] == 't' && format[1] == 's' && format[2] == chType &&
195
0
           format[3] == ':';
196
0
}
197
198
static inline bool IsTimestampSeconds(const char *format)
199
0
{
200
0
    return IsTimestampInternal(format, 's');
201
0
}
202
203
static inline bool IsTimestampMilliseconds(const char *format)
204
0
{
205
0
    return IsTimestampInternal(format, 'm');
206
0
}
207
208
static inline bool IsTimestampMicroseconds(const char *format)
209
0
{
210
0
    return IsTimestampInternal(format, 'u');
211
0
}
212
213
static inline bool IsTimestampNanoseconds(const char *format)
214
0
{
215
0
    return IsTimestampInternal(format, 'n');
216
0
}
217
218
static inline bool IsTimestamp(const char *format)
219
0
{
220
0
    return IsTimestampSeconds(format) || IsTimestampMilliseconds(format) ||
221
0
           IsTimestampMicroseconds(format) || IsTimestampNanoseconds(format);
222
0
}
223
224
static inline const char *GetTimestampTimezone(const char *format)
225
0
{
226
0
    return IsTimestamp(format) ? format + strlen("tm?:") : "";
227
0
}
228
229
/************************************************************************/
230
/*                              TestBit()                               */
231
/************************************************************************/
232
233
inline bool TestBit(const uint8_t *pabyData, size_t nIdx)
234
0
{
235
0
    return (pabyData[nIdx / 8] & (1 << (nIdx % 8))) != 0;
236
0
}
237
238
/************************************************************************/
239
/*                               SetBit()                               */
240
/************************************************************************/
241
242
inline void SetBit(uint8_t *pabyData, size_t nIdx)
243
0
{
244
0
    pabyData[nIdx / 8] |= (1 << (nIdx % 8));
245
0
}
246
247
/************************************************************************/
248
/*                              UnsetBit()                              */
249
/************************************************************************/
250
251
inline void UnsetBit(uint8_t *pabyData, size_t nIdx)
252
0
{
253
0
    pabyData[nIdx / 8] &= uint8_t(~(1 << (nIdx % 8)));
254
0
}
255
256
/************************************************************************/
257
/*                        DefaultReleaseSchema()                        */
258
/************************************************************************/
259
260
static void OGRLayerReleaseSchema(struct ArrowSchema *schema,
261
                                  bool bFullFreeFormat)
262
0
{
263
0
    CPLAssert(schema->release != nullptr);
264
0
    if (bFullFreeFormat || STARTS_WITH(schema->format, "w:") ||
265
0
        STARTS_WITH(schema->format, "tsm:"))
266
0
    {
267
0
        CPLFree(const_cast<char *>(schema->format));
268
0
    }
269
0
    CPLFree(const_cast<char *>(schema->name));
270
0
    CPLFree(const_cast<char *>(schema->metadata));
271
0
    if (schema->children)
272
0
    {
273
0
        for (int i = 0; i < static_cast<int>(schema->n_children); ++i)
274
0
        {
275
0
            if (schema->children[i] && schema->children[i]->release)
276
0
            {
277
0
                schema->children[i]->release(schema->children[i]);
278
0
                CPLFree(schema->children[i]);
279
0
            }
280
0
        }
281
0
        CPLFree(schema->children);
282
0
    }
283
0
    if (schema->dictionary)
284
0
    {
285
0
        if (schema->dictionary->release)
286
0
        {
287
0
            schema->dictionary->release(schema->dictionary);
288
0
            CPLFree(schema->dictionary);
289
0
        }
290
0
    }
291
0
    schema->release = nullptr;
292
0
}
293
294
static void OGRLayerPartialReleaseSchema(struct ArrowSchema *schema)
295
0
{
296
0
    OGRLayerReleaseSchema(schema, /* bFullFreeFormat = */ false);
297
0
}
298
299
static void OGRLayerFullReleaseSchema(struct ArrowSchema *schema)
300
0
{
301
0
    OGRLayerReleaseSchema(schema, /* bFullFreeFormat = */ true);
302
0
}
303
304
/** Release a ArrowSchema.
305
 *
306
 * To be used by driver implementations that have a custom GetArrowStream()
307
 * implementation.
308
 *
309
 * @param schema Schema to release.
310
 * @since GDAL 3.6
311
 */
312
313
void OGRLayer::ReleaseSchema(struct ArrowSchema *schema)
314
0
{
315
0
    OGRLayerPartialReleaseSchema(schema);
316
0
}
317
318
/************************************************************************/
319
/*                          AddDictToSchema()                           */
320
/************************************************************************/
321
322
static void AddDictToSchema(struct ArrowSchema *psChild,
323
                            const OGRCodedFieldDomain *poCodedDomain)
324
0
{
325
0
    const OGRCodedValue *psIter = poCodedDomain->GetEnumeration();
326
0
    int nLastCode = -1;
327
0
    int nCountNull = 0;
328
0
    uint32_t nCountChars = 0;
329
0
    for (; psIter->pszCode; ++psIter)
330
0
    {
331
0
        if (CPLGetValueType(psIter->pszCode) != CPL_VALUE_INTEGER)
332
0
        {
333
0
            return;
334
0
        }
335
0
        int nCode = atoi(psIter->pszCode);
336
0
        if (nCode <= nLastCode || nCode - nLastCode > 100)
337
0
        {
338
0
            return;
339
0
        }
340
0
        for (int i = nLastCode + 1; i < nCode; ++i)
341
0
        {
342
0
            nCountNull++;
343
0
        }
344
0
        if (psIter->pszValue != nullptr)
345
0
        {
346
0
            const size_t nLen = strlen(psIter->pszValue);
347
0
            if (nLen > std::numeric_limits<uint32_t>::max() - nCountChars)
348
0
                return;
349
0
            nCountChars += static_cast<uint32_t>(nLen);
350
0
        }
351
0
        else
352
0
            nCountNull++;
353
0
        nLastCode = nCode;
354
0
    }
355
356
0
    auto psChildDict = static_cast<struct ArrowSchema *>(
357
0
        CPLCalloc(1, sizeof(struct ArrowSchema)));
358
0
    psChild->dictionary = psChildDict;
359
0
    psChildDict->release = OGRLayerPartialReleaseSchema;
360
0
    psChildDict->name = CPLStrdup(poCodedDomain->GetName().c_str());
361
0
    psChildDict->format = "u";
362
0
    if (nCountNull)
363
0
        psChildDict->flags = ARROW_FLAG_NULLABLE;
364
0
}
365
366
/************************************************************************/
367
/*                       DefaultGetArrowSchema()                        */
368
/************************************************************************/
369
370
/** Default implementation of the ArrowArrayStream::get_schema() callback.
371
 *
372
 * To be used by driver implementations that have a custom GetArrowStream()
373
 * implementation.
374
 *
375
 * @since GDAL 3.6
376
 */
377
int OGRLayer::GetArrowSchema(struct ArrowArrayStream *,
378
                             struct ArrowSchema *out_schema)
379
0
{
380
0
    const bool bIncludeFID = CPLTestBool(
381
0
        m_aosArrowArrayStreamOptions.FetchNameValueDef("INCLUDE_FID", "YES"));
382
0
    const bool bDateTimeAsString = m_aosArrowArrayStreamOptions.FetchBool(
383
0
        GAS_OPT_DATETIME_AS_STRING, false);
384
0
    memset(out_schema, 0, sizeof(*out_schema));
385
0
    out_schema->format = "+s";
386
0
    out_schema->name = CPLStrdup("");
387
0
    out_schema->metadata = nullptr;
388
0
    auto poLayerDefn = GetLayerDefn();
389
0
    const int nFieldCount = poLayerDefn->GetFieldCount();
390
0
    const int nGeomFieldCount = poLayerDefn->GetGeomFieldCount();
391
0
    const int nChildren = 1 + nFieldCount + nGeomFieldCount;
392
393
0
    out_schema->children = static_cast<struct ArrowSchema **>(
394
0
        CPLCalloc(nChildren, sizeof(struct ArrowSchema *)));
395
0
    int iSchemaChild = 0;
396
0
    if (bIncludeFID)
397
0
    {
398
0
        out_schema->children[iSchemaChild] = static_cast<struct ArrowSchema *>(
399
0
            CPLCalloc(1, sizeof(struct ArrowSchema)));
400
0
        auto psChild = out_schema->children[iSchemaChild];
401
0
        ++iSchemaChild;
402
0
        psChild->release = OGRLayer::ReleaseSchema;
403
0
        const char *pszFIDName = GetFIDColumn();
404
0
        psChild->name =
405
0
            CPLStrdup((pszFIDName && pszFIDName[0]) ? pszFIDName
406
0
                                                    : DEFAULT_ARROW_FID_NAME);
407
0
        psChild->format = "l";
408
0
    }
409
0
    for (int i = 0; i < nFieldCount; ++i)
410
0
    {
411
0
        const auto poFieldDefn = poLayerDefn->GetFieldDefn(i);
412
0
        if (poFieldDefn->IsIgnored())
413
0
        {
414
0
            continue;
415
0
        }
416
417
0
        out_schema->children[iSchemaChild] = static_cast<struct ArrowSchema *>(
418
0
            CPLCalloc(1, sizeof(struct ArrowSchema)));
419
0
        auto psChild = out_schema->children[iSchemaChild];
420
0
        ++iSchemaChild;
421
0
        psChild->release = OGRLayer::ReleaseSchema;
422
0
        psChild->name = CPLStrdup(poFieldDefn->GetNameRef());
423
0
        if (poFieldDefn->IsNullable())
424
0
            psChild->flags = ARROW_FLAG_NULLABLE;
425
0
        const auto eType = poFieldDefn->GetType();
426
0
        const auto eSubType = poFieldDefn->GetSubType();
427
0
        const char *item_format = nullptr;
428
429
0
        std::vector<std::pair<std::string, std::string>> oMetadata;
430
431
0
        switch (eType)
432
0
        {
433
0
            case OFTInteger:
434
0
            {
435
0
                if (eSubType == OFSTBoolean)
436
0
                    psChild->format = "b";
437
0
                else if (eSubType == OFSTInt16)
438
0
                    psChild->format = "s";
439
0
                else
440
0
                    psChild->format = "i";
441
442
0
                const auto &osDomainName = poFieldDefn->GetDomainName();
443
0
                if (!osDomainName.empty())
444
0
                {
445
0
                    auto poDS = GetDataset();
446
0
                    if (poDS)
447
0
                    {
448
0
                        const auto poFieldDomain =
449
0
                            poDS->GetFieldDomain(osDomainName);
450
0
                        if (poFieldDomain &&
451
0
                            poFieldDomain->GetDomainType() == OFDT_CODED)
452
0
                        {
453
0
                            const OGRCodedFieldDomain *poCodedDomain =
454
0
                                static_cast<const OGRCodedFieldDomain *>(
455
0
                                    poFieldDomain);
456
0
                            AddDictToSchema(psChild, poCodedDomain);
457
0
                        }
458
0
                    }
459
0
                }
460
461
0
                break;
462
0
            }
463
464
0
            case OFTInteger64:
465
0
                psChild->format = "l";
466
0
                break;
467
468
0
            case OFTReal:
469
0
            {
470
0
                if (eSubType == OFSTFloat32)
471
0
                    psChild->format = "f";
472
0
                else
473
0
                    psChild->format = "g";
474
0
                break;
475
0
            }
476
477
0
            case OFTString:
478
0
            case OFTWideString:
479
0
                psChild->format = "u";
480
0
                break;
481
482
0
            case OFTBinary:
483
0
            {
484
0
                if (poFieldDefn->GetWidth() > 0)
485
0
                    psChild->format =
486
0
                        CPLStrdup(CPLSPrintf("w:%d", poFieldDefn->GetWidth()));
487
0
                else
488
0
                    psChild->format = "z";
489
0
                break;
490
0
            }
491
492
0
            case OFTIntegerList:
493
0
            {
494
0
                if (eSubType == OFSTBoolean)
495
0
                    item_format = "b";
496
0
                else if (eSubType == OFSTInt16)
497
0
                    item_format = "s";
498
0
                else
499
0
                    item_format = "i";
500
0
                break;
501
0
            }
502
503
0
            case OFTInteger64List:
504
0
                item_format = "l";
505
0
                break;
506
507
0
            case OFTRealList:
508
0
            {
509
0
                if (eSubType == OFSTFloat32)
510
0
                    item_format = "f";
511
0
                else
512
0
                    item_format = "g";
513
0
                break;
514
0
            }
515
516
0
            case OFTStringList:
517
0
            case OFTWideStringList:
518
0
                item_format = "u";
519
0
                break;
520
521
0
            case OFTDate:
522
0
                psChild->format = "tdD";
523
0
                break;
524
525
0
            case OFTTime:
526
0
                psChild->format = "ttm";
527
0
                break;
528
529
0
            case OFTDateTime:
530
0
            {
531
0
                const char *pszPrefix = "tsm:";
532
0
                const char *pszTZOverride =
533
0
                    m_aosArrowArrayStreamOptions.FetchNameValue("TIMEZONE");
534
0
                const int nTZFlag = poFieldDefn->GetTZFlag();
535
0
                if (bDateTimeAsString)
536
0
                {
537
0
                    psChild->format = "u";
538
0
                }
539
0
                else if (pszTZOverride && EQUAL(pszTZOverride, "unknown"))
540
0
                {
541
0
                    psChild->format = CPLStrdup(pszPrefix);
542
0
                }
543
0
                else if ((pszTZOverride && EQUAL(pszTZOverride, "mixed")) ||
544
0
                         (!pszTZOverride && nTZFlag == OGR_TZFLAG_MIXED_TZ))
545
0
                {
546
0
                    oMetadata.emplace_back(
547
0
                        std::pair(ARROW_EXTENSION_NAME_KEY,
548
0
                                  EXTENSION_NAME_ARROW_TIMESTAMP_WITH_OFFSET));
549
550
0
                    psChild->format = "+s";
551
0
                    psChild->n_children = 2;
552
0
                    psChild->children = static_cast<struct ArrowSchema **>(
553
0
                        CPLCalloc(2, sizeof(struct ArrowSchema *)));
554
555
                    // Create sub-child for timestamp in UTC
556
0
                    psChild->children[0] = static_cast<struct ArrowSchema *>(
557
0
                        CPLCalloc(1, sizeof(struct ArrowSchema)));
558
0
                    psChild->children[0]->release = OGRLayer::ReleaseSchema;
559
0
                    psChild->children[0]->name =
560
0
                        CPLStrdup(ATSWO_TIMESTAMP_FIELD_NAME);
561
0
                    psChild->children[0]->format = CPLStrdup("tsm:UTC");
562
563
                    // Create sub-child for offset to UTC in minutes
564
0
                    psChild->children[1] = static_cast<struct ArrowSchema *>(
565
0
                        CPLCalloc(1, sizeof(struct ArrowSchema)));
566
0
                    psChild->children[1]->release = OGRLayer::ReleaseSchema;
567
0
                    psChild->children[1]->name =
568
0
                        CPLStrdup(ATSWO_OFFSET_MINUTES_FIELD_NAME);
569
0
                    psChild->children[1]->format = "s";
570
0
                }
571
0
                else if (pszTZOverride)
572
0
                {
573
0
                    psChild->format = CPLStrdup(
574
0
                        (std::string(pszPrefix) + pszTZOverride).c_str());
575
0
                }
576
0
                else
577
0
                {
578
0
                    if (nTZFlag == OGR_TZFLAG_UTC)
579
0
                    {
580
0
                        psChild->format =
581
0
                            CPLStrdup(CPLSPrintf("%sUTC", pszPrefix));
582
0
                    }
583
0
                    else if (nTZFlag == OGR_TZFLAG_UNKNOWN ||
584
0
                             nTZFlag == OGR_TZFLAG_LOCALTIME)
585
0
                    {
586
0
                        psChild->format = CPLStrdup(pszPrefix);
587
0
                    }
588
0
                    else
589
0
                    {
590
0
                        psChild->format = CPLStrdup(
591
0
                            (pszPrefix + OGRTZFlagToTimezone(nTZFlag, "UTC"))
592
0
                                .c_str());
593
0
                    }
594
0
                }
595
0
                break;
596
0
            }
597
0
        }
598
599
0
        if (item_format)
600
0
        {
601
0
            psChild->format = "+l";
602
0
            psChild->n_children = 1;
603
0
            psChild->children = static_cast<struct ArrowSchema **>(
604
0
                CPLCalloc(1, sizeof(struct ArrowSchema *)));
605
0
            psChild->children[0] = static_cast<struct ArrowSchema *>(
606
0
                CPLCalloc(1, sizeof(struct ArrowSchema)));
607
0
            psChild->children[0]->release = OGRLayer::ReleaseSchema;
608
0
            psChild->children[0]->name = CPLStrdup("item");
609
0
            psChild->children[0]->format = item_format;
610
0
        }
611
612
0
        if (eType == OFTDateTime && bDateTimeAsString)
613
0
        {
614
0
            oMetadata.emplace_back(
615
0
                std::pair(MD_GDAL_OGR_TYPE, OGR_GetFieldTypeName(eType)));
616
0
        }
617
618
0
        const char *pszAlternativeName = poFieldDefn->GetAlternativeNameRef();
619
0
        if (pszAlternativeName && pszAlternativeName[0])
620
0
            oMetadata.emplace_back(
621
0
                std::pair(MD_GDAL_OGR_ALTERNATIVE_NAME, pszAlternativeName));
622
623
0
        const char *pszDefault = poFieldDefn->GetDefault();
624
0
        if (pszDefault && pszDefault[0])
625
0
            oMetadata.emplace_back(std::pair(MD_GDAL_OGR_DEFAULT, pszDefault));
626
627
0
        const std::string &osComment = poFieldDefn->GetComment();
628
0
        if (!osComment.empty())
629
0
            oMetadata.emplace_back(std::pair(MD_GDAL_OGR_COMMENT, osComment));
630
631
0
        if (eType == OFTString && eSubType == OFSTJSON)
632
0
        {
633
0
            oMetadata.emplace_back(
634
0
                std::pair(ARROW_EXTENSION_NAME_KEY, EXTENSION_NAME_ARROW_JSON));
635
0
        }
636
0
        else if (eSubType != OFSTNone && eSubType != OFSTBoolean &&
637
0
                 eSubType != OFSTFloat32)
638
0
        {
639
0
            oMetadata.emplace_back(std::pair(
640
0
                MD_GDAL_OGR_SUBTYPE, OGR_GetFieldSubTypeName(eSubType)));
641
0
        }
642
0
        if (eType == OFTString && poFieldDefn->GetWidth() > 0)
643
0
        {
644
0
            oMetadata.emplace_back(std::pair(
645
0
                MD_GDAL_OGR_WIDTH, CPLSPrintf("%d", poFieldDefn->GetWidth())));
646
0
        }
647
0
        if (poFieldDefn->IsUnique())
648
0
        {
649
0
            oMetadata.emplace_back(std::pair(MD_GDAL_OGR_UNIQUE, "true"));
650
0
        }
651
0
        if (!poFieldDefn->GetDomainName().empty())
652
0
        {
653
0
            oMetadata.emplace_back(std::pair(MD_GDAL_OGR_DOMAIN_NAME,
654
0
                                             poFieldDefn->GetDomainName()));
655
0
        }
656
657
0
        if (!oMetadata.empty())
658
0
        {
659
0
            uint64_t nLen64 = sizeof(int32_t);
660
0
            for (const auto &oPair : oMetadata)
661
0
            {
662
0
                nLen64 += sizeof(int32_t);
663
0
                nLen64 += oPair.first.size();
664
0
                nLen64 += sizeof(int32_t);
665
0
                nLen64 += oPair.second.size();
666
0
            }
667
0
            if (nLen64 <
668
0
                static_cast<uint64_t>(std::numeric_limits<int32_t>::max()))
669
0
            {
670
0
                const size_t nLen = static_cast<size_t>(nLen64);
671
0
                char *pszMetadata = static_cast<char *>(CPLMalloc(nLen));
672
0
                psChild->metadata = pszMetadata;
673
0
                size_t offsetMD = 0;
674
0
                int32_t nSize = static_cast<int>(oMetadata.size());
675
0
                memcpy(pszMetadata + offsetMD, &nSize, sizeof(nSize));
676
0
                offsetMD += sizeof(int32_t);
677
0
                for (const auto &oPair : oMetadata)
678
0
                {
679
0
                    nSize = static_cast<int32_t>(oPair.first.size());
680
0
                    memcpy(pszMetadata + offsetMD, &nSize, sizeof(nSize));
681
0
                    offsetMD += sizeof(int32_t);
682
0
                    memcpy(pszMetadata + offsetMD, oPair.first.data(),
683
0
                           oPair.first.size());
684
0
                    offsetMD += oPair.first.size();
685
686
0
                    nSize = static_cast<int32_t>(oPair.second.size());
687
0
                    memcpy(pszMetadata + offsetMD, &nSize, sizeof(nSize));
688
0
                    offsetMD += sizeof(int32_t);
689
0
                    memcpy(pszMetadata + offsetMD, oPair.second.data(),
690
0
                           oPair.second.size());
691
0
                    offsetMD += oPair.second.size();
692
0
                }
693
694
0
                CPLAssert(offsetMD == nLen);
695
0
                CPL_IGNORE_RET_VAL(offsetMD);
696
0
            }
697
0
            else
698
0
            {
699
                // Extremely unlikely !
700
0
                CPLError(CE_Warning, CPLE_AppDefined,
701
0
                         "Cannot write ArrowSchema::metadata due to "
702
0
                         "too large content");
703
0
            }
704
0
        }
705
0
    }
706
707
0
    const char *const pszGeometryMetadataEncoding =
708
0
        m_aosArrowArrayStreamOptions.FetchNameValue(
709
0
            "GEOMETRY_METADATA_ENCODING");
710
0
    const char *pszExtensionName = EXTENSION_NAME_OGC_WKB;
711
0
    if (pszGeometryMetadataEncoding)
712
0
    {
713
0
        if (EQUAL(pszGeometryMetadataEncoding, "OGC"))
714
0
            pszExtensionName = EXTENSION_NAME_OGC_WKB;
715
0
        else if (EQUAL(pszGeometryMetadataEncoding, "GEOARROW"))
716
0
            pszExtensionName = EXTENSION_NAME_GEOARROW_WKB;
717
0
        else
718
0
            CPLError(CE_Warning, CPLE_NotSupported,
719
0
                     "Unsupported GEOMETRY_METADATA_ENCODING value: %s",
720
0
                     pszGeometryMetadataEncoding);
721
0
    }
722
0
    for (int i = 0; i < nGeomFieldCount; ++i)
723
0
    {
724
0
        const auto poFieldDefn = poLayerDefn->GetGeomFieldDefn(i);
725
0
        if (poFieldDefn->IsIgnored())
726
0
        {
727
0
            continue;
728
0
        }
729
730
0
        out_schema->children[iSchemaChild] = CreateSchemaForWKBGeometryColumn(
731
0
            poFieldDefn, "z", pszExtensionName);
732
733
0
        ++iSchemaChild;
734
0
    }
735
736
0
    out_schema->n_children = iSchemaChild;
737
0
    out_schema->release = OGRLayer::ReleaseSchema;
738
0
    return 0;
739
0
}
740
741
/************************************************************************/
742
/*                  CreateSchemaForWKBGeometryColumn()                  */
743
/************************************************************************/
744
745
/** Return a ArrowSchema* corresponding to the WKB encoding of a geometry
746
 * column.
747
 */
748
749
/* static */
750
struct ArrowSchema *
751
OGRLayer::CreateSchemaForWKBGeometryColumn(const OGRGeomFieldDefn *poFieldDefn,
752
                                           const char *pszArrowFormat,
753
                                           const char *pszExtensionName)
754
0
{
755
0
    CPLAssert(strcmp(pszArrowFormat, "z") == 0 ||
756
0
              strcmp(pszArrowFormat, "Z") == 0);
757
0
    if (!EQUAL(pszExtensionName, EXTENSION_NAME_OGC_WKB) &&
758
0
        !EQUAL(pszExtensionName, EXTENSION_NAME_GEOARROW_WKB))
759
0
    {
760
0
        CPLError(CE_Failure, CPLE_NotSupported,
761
0
                 "Unsupported extension name '%s'. Defaulting to '%s'",
762
0
                 pszExtensionName, EXTENSION_NAME_OGC_WKB);
763
0
        pszExtensionName = EXTENSION_NAME_OGC_WKB;
764
0
    }
765
0
    auto psSchema = static_cast<struct ArrowSchema *>(
766
0
        CPLCalloc(1, sizeof(struct ArrowSchema)));
767
0
    psSchema->release = OGRLayer::ReleaseSchema;
768
0
    const char *pszGeomFieldName = poFieldDefn->GetNameRef();
769
0
    if (pszGeomFieldName[0] == '\0')
770
0
        pszGeomFieldName = DEFAULT_ARROW_GEOMETRY_NAME;
771
0
    psSchema->name = CPLStrdup(pszGeomFieldName);
772
0
    if (poFieldDefn->IsNullable())
773
0
        psSchema->flags = ARROW_FLAG_NULLABLE;
774
0
    psSchema->format = strcmp(pszArrowFormat, "z") == 0 ? "z" : "Z";
775
0
    std::string osExtensionMetadata;
776
0
    if (EQUAL(pszExtensionName, EXTENSION_NAME_GEOARROW_WKB))
777
0
    {
778
0
        const auto poSRS = poFieldDefn->GetSpatialRef();
779
0
        if (poSRS)
780
0
        {
781
0
            char *pszPROJJSON = nullptr;
782
0
            poSRS->exportToPROJJSON(&pszPROJJSON, nullptr);
783
0
            if (pszPROJJSON)
784
0
            {
785
0
                osExtensionMetadata = "{\"crs\":";
786
0
                osExtensionMetadata += pszPROJJSON;
787
0
                osExtensionMetadata += '}';
788
0
                CPLFree(pszPROJJSON);
789
0
            }
790
0
            else
791
0
            {
792
0
                CPLError(CE_Warning, CPLE_AppDefined,
793
0
                         "Cannot export CRS of geometry field %s to PROJJSON",
794
0
                         poFieldDefn->GetNameRef());
795
0
            }
796
0
        }
797
0
    }
798
0
    size_t nLen = sizeof(int32_t) + sizeof(int32_t) +
799
0
                  strlen(ARROW_EXTENSION_NAME_KEY) + sizeof(int32_t) +
800
0
                  strlen(pszExtensionName);
801
0
    if (!osExtensionMetadata.empty())
802
0
    {
803
0
        nLen += sizeof(int32_t) + strlen(ARROW_EXTENSION_METADATA_KEY) +
804
0
                sizeof(int32_t) + osExtensionMetadata.size();
805
0
    }
806
0
    char *pszMetadata = static_cast<char *>(CPLMalloc(nLen));
807
0
    psSchema->metadata = pszMetadata;
808
0
    size_t offsetMD = 0;
809
0
    *reinterpret_cast<int32_t *>(pszMetadata + offsetMD) =
810
0
        osExtensionMetadata.empty() ? 1 : 2;
811
0
    offsetMD += sizeof(int32_t);
812
0
    *reinterpret_cast<int32_t *>(pszMetadata + offsetMD) =
813
0
        static_cast<int32_t>(strlen(ARROW_EXTENSION_NAME_KEY));
814
0
    offsetMD += sizeof(int32_t);
815
0
    memcpy(pszMetadata + offsetMD, ARROW_EXTENSION_NAME_KEY,
816
0
           strlen(ARROW_EXTENSION_NAME_KEY));
817
0
    offsetMD += static_cast<int>(strlen(ARROW_EXTENSION_NAME_KEY));
818
0
    *reinterpret_cast<int32_t *>(pszMetadata + offsetMD) =
819
0
        static_cast<int32_t>(strlen(pszExtensionName));
820
0
    offsetMD += sizeof(int32_t);
821
0
    memcpy(pszMetadata + offsetMD, pszExtensionName, strlen(pszExtensionName));
822
0
    offsetMD += strlen(pszExtensionName);
823
0
    if (!osExtensionMetadata.empty())
824
0
    {
825
0
        *reinterpret_cast<int32_t *>(pszMetadata + offsetMD) =
826
0
            static_cast<int32_t>(strlen(ARROW_EXTENSION_METADATA_KEY));
827
0
        offsetMD += sizeof(int32_t);
828
0
        memcpy(pszMetadata + offsetMD, ARROW_EXTENSION_METADATA_KEY,
829
0
               strlen(ARROW_EXTENSION_METADATA_KEY));
830
0
        offsetMD += static_cast<int>(strlen(ARROW_EXTENSION_METADATA_KEY));
831
0
        *reinterpret_cast<int32_t *>(pszMetadata + offsetMD) =
832
0
            static_cast<int32_t>(osExtensionMetadata.size());
833
0
        offsetMD += sizeof(int32_t);
834
0
        memcpy(pszMetadata + offsetMD, osExtensionMetadata.c_str(),
835
0
               osExtensionMetadata.size());
836
0
        offsetMD += osExtensionMetadata.size();
837
0
    }
838
0
    CPLAssert(offsetMD == nLen);
839
0
    CPL_IGNORE_RET_VAL(offsetMD);
840
0
    return psSchema;
841
0
}
842
843
/************************************************************************/
844
/*                        StaticGetArrowSchema()                        */
845
/************************************************************************/
846
847
/** Default implementation of the ArrowArrayStream::get_schema() callback.
848
 *
849
 * To be used by driver implementations that have a custom GetArrowStream()
850
 * implementation.
851
 *
852
 * @since GDAL 3.6
853
 */
854
int OGRLayer::StaticGetArrowSchema(struct ArrowArrayStream *stream,
855
                                   struct ArrowSchema *out_schema)
856
0
{
857
0
    auto poLayer = static_cast<ArrowArrayStreamPrivateDataSharedDataWrapper *>(
858
0
                       stream->private_data)
859
0
                       ->poShared->m_poLayer;
860
0
    if (poLayer == nullptr)
861
0
    {
862
0
        CPLError(CE_Failure, CPLE_NotSupported,
863
0
                 "Calling get_schema() on a freed OGRLayer is not supported");
864
0
        return EINVAL;
865
0
    }
866
0
    return poLayer->GetArrowSchema(stream, out_schema);
867
0
}
868
869
/************************************************************************/
870
/*                        DefaultReleaseArray()                         */
871
/************************************************************************/
872
873
static void OGRLayerDefaultReleaseArray(struct ArrowArray *array)
874
0
{
875
0
    if (array->buffers)
876
0
    {
877
0
        for (int i = 0; i < static_cast<int>(array->n_buffers); ++i)
878
0
            VSIFreeAligned(const_cast<void *>(array->buffers[i]));
879
0
        CPLFree(array->buffers);
880
0
    }
881
0
    if (array->children)
882
0
    {
883
0
        for (int i = 0; i < static_cast<int>(array->n_children); ++i)
884
0
        {
885
0
            if (array->children[i] && array->children[i]->release)
886
0
            {
887
0
                array->children[i]->release(array->children[i]);
888
0
                CPLFree(array->children[i]);
889
0
            }
890
0
        }
891
0
        CPLFree(array->children);
892
0
    }
893
0
    if (array->dictionary)
894
0
    {
895
0
        if (array->dictionary->release)
896
0
        {
897
0
            array->dictionary->release(array->dictionary);
898
0
            CPLFree(array->dictionary);
899
0
        }
900
0
    }
901
0
    array->release = nullptr;
902
0
}
903
904
/** Release a ArrowArray.
905
 *
906
 * To be used by driver implementations that have a custom GetArrowStream()
907
 * implementation.
908
 *
909
 * @param array Arrow array to release.
910
 * @since GDAL 3.6
911
 */
912
void OGRLayer::ReleaseArray(struct ArrowArray *array)
913
0
{
914
0
    OGRLayerDefaultReleaseArray(array);
915
0
}
916
917
/************************************************************************/
918
/*                            IsValidField()                            */
919
/************************************************************************/
920
921
static inline bool IsValidField(const OGRField *psRawField)
922
0
{
923
0
    return (!(psRawField->Set.nMarker1 == OGRUnsetMarker &&
924
0
              psRawField->Set.nMarker2 == OGRUnsetMarker &&
925
0
              psRawField->Set.nMarker3 == OGRUnsetMarker) &&
926
0
            !(psRawField->Set.nMarker1 == OGRNullMarker &&
927
0
              psRawField->Set.nMarker2 == OGRNullMarker &&
928
0
              psRawField->Set.nMarker3 == OGRNullMarker));
929
0
}
930
931
/************************************************************************/
932
/*                        AllocValidityBitmap()                         */
933
/************************************************************************/
934
935
static uint8_t *AllocValidityBitmap(size_t nSize)
936
0
{
937
0
    auto pabyValidity = static_cast<uint8_t *>(
938
0
        VSI_MALLOC_ALIGNED_AUTO_VERBOSE((1 + nSize + 7) / 8));
939
0
    if (pabyValidity)
940
0
    {
941
        // All valid initially
942
0
        memset(pabyValidity, 0xFF, (nSize + 7) / 8);
943
0
    }
944
0
    return pabyValidity;
945
0
}
946
947
/************************************************************************/
948
/*                             FillArray()                              */
949
/************************************************************************/
950
951
template <class T, typename TMember>
952
static bool FillArray(struct ArrowArray *psChild,
953
                      std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
954
                      const size_t nFeatureCountLimit, const bool bIsNullable,
955
                      TMember member, const int i)
956
0
{
957
0
    psChild->n_buffers = 2;
958
0
    psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
959
0
    uint8_t *pabyValidity = nullptr;
960
0
    T *panValues = static_cast<T *>(
961
0
        VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (1 + nFeatureCountLimit)));
962
0
    if (panValues == nullptr)
963
0
        return false;
964
0
    psChild->buffers[1] = panValues;
965
0
    for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
966
0
    {
967
0
        auto &poFeature = apoFeatures[iFeat];
968
0
        const auto psRawField = poFeature->GetRawFieldRef(i);
969
0
        if (IsValidField(psRawField))
970
0
        {
971
0
            panValues[iFeat] = static_cast<T>((*psRawField).*member);
972
0
        }
973
0
        else if (bIsNullable)
974
0
        {
975
0
            panValues[iFeat] = 0;
976
0
            ++psChild->null_count;
977
0
            if (pabyValidity == nullptr)
978
0
            {
979
0
                pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
980
0
                psChild->buffers[0] = pabyValidity;
981
0
                if (pabyValidity == nullptr)
982
0
                    return false;
983
0
            }
984
0
            UnsetBit(pabyValidity, iFeat);
985
0
        }
986
0
        else
987
0
        {
988
0
            panValues[iFeat] = 0;
989
0
        }
990
0
    }
991
0
    return true;
992
0
}
Unexecuted instantiation: ogrlayerarrow.cpp:bool FillArray<short, int OGRField::*>(ArrowArray*, std::__1::deque<std::__1::unique_ptr<OGRFeature, std::__1::default_delete<OGRFeature> >, std::__1::allocator<std::__1::unique_ptr<OGRFeature, std::__1::default_delete<OGRFeature> > > >&, unsigned long, bool, int OGRField::*, int)
Unexecuted instantiation: ogrlayerarrow.cpp:bool FillArray<int, int OGRField::*>(ArrowArray*, std::__1::deque<std::__1::unique_ptr<OGRFeature, std::__1::default_delete<OGRFeature> >, std::__1::allocator<std::__1::unique_ptr<OGRFeature, std::__1::default_delete<OGRFeature> > > >&, unsigned long, bool, int OGRField::*, int)
Unexecuted instantiation: ogrlayerarrow.cpp:bool FillArray<long, long long OGRField::*>(ArrowArray*, std::__1::deque<std::__1::unique_ptr<OGRFeature, std::__1::default_delete<OGRFeature> >, std::__1::allocator<std::__1::unique_ptr<OGRFeature, std::__1::default_delete<OGRFeature> > > >&, unsigned long, bool, long long OGRField::*, int)
Unexecuted instantiation: ogrlayerarrow.cpp:bool FillArray<float, double OGRField::*>(ArrowArray*, std::__1::deque<std::__1::unique_ptr<OGRFeature, std::__1::default_delete<OGRFeature> >, std::__1::allocator<std::__1::unique_ptr<OGRFeature, std::__1::default_delete<OGRFeature> > > >&, unsigned long, bool, double OGRField::*, int)
Unexecuted instantiation: ogrlayerarrow.cpp:bool FillArray<double, double OGRField::*>(ArrowArray*, std::__1::deque<std::__1::unique_ptr<OGRFeature, std::__1::default_delete<OGRFeature> >, std::__1::allocator<std::__1::unique_ptr<OGRFeature, std::__1::default_delete<OGRFeature> > > >&, unsigned long, bool, double OGRField::*, int)
993
994
/************************************************************************/
995
/*                           FillBoolArray()                            */
996
/************************************************************************/
997
998
template <typename TMember>
999
static bool FillBoolArray(struct ArrowArray *psChild,
1000
                          std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1001
                          const size_t nFeatureCountLimit,
1002
                          const bool bIsNullable, TMember member, const int i)
1003
0
{
1004
0
    psChild->n_buffers = 2;
1005
0
    psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1006
0
    uint8_t *pabyValidity = nullptr;
1007
0
    uint8_t *panValues = static_cast<uint8_t *>(
1008
0
        VSI_MALLOC_ALIGNED_AUTO_VERBOSE((nFeatureCountLimit + 7 + 1) / 8));
1009
0
    if (panValues == nullptr)
1010
0
        return false;
1011
0
    memset(panValues, 0, (nFeatureCountLimit + 7) / 8);
1012
0
    psChild->buffers[1] = panValues;
1013
0
    for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
1014
0
    {
1015
0
        auto &poFeature = apoFeatures[iFeat];
1016
0
        const auto psRawField = poFeature->GetRawFieldRef(i);
1017
0
        if (IsValidField(psRawField))
1018
0
        {
1019
0
            if ((*psRawField).*member)
1020
0
                SetBit(panValues, iFeat);
1021
0
        }
1022
0
        else if (bIsNullable)
1023
0
        {
1024
0
            ++psChild->null_count;
1025
0
            if (pabyValidity == nullptr)
1026
0
            {
1027
0
                pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1028
0
                psChild->buffers[0] = pabyValidity;
1029
0
                if (pabyValidity == nullptr)
1030
0
                    return false;
1031
0
            }
1032
0
            UnsetBit(pabyValidity, iFeat);
1033
0
        }
1034
0
    }
1035
0
    return true;
1036
0
}
1037
1038
/************************************************************************/
1039
/*                           FillListArray()                            */
1040
/************************************************************************/
1041
1042
struct GetFromIntegerList
1043
{
1044
    static inline int getCount(const OGRField *psRawField)
1045
0
    {
1046
0
        return psRawField->IntegerList.nCount;
1047
0
    }
1048
1049
    static inline const int *getValues(const OGRField *psRawField)
1050
0
    {
1051
0
        return psRawField->IntegerList.paList;
1052
0
    }
1053
};
1054
1055
struct GetFromInteger64List
1056
{
1057
    static inline int getCount(const OGRField *psRawField)
1058
0
    {
1059
0
        return psRawField->Integer64List.nCount;
1060
0
    }
1061
1062
    static inline const GIntBig *getValues(const OGRField *psRawField)
1063
0
    {
1064
0
        return psRawField->Integer64List.paList;
1065
0
    }
1066
};
1067
1068
struct GetFromRealList
1069
{
1070
    static inline int getCount(const OGRField *psRawField)
1071
0
    {
1072
0
        return psRawField->RealList.nCount;
1073
0
    }
1074
1075
    static inline const double *getValues(const OGRField *psRawField)
1076
0
    {
1077
0
        return psRawField->RealList.paList;
1078
0
    }
1079
};
1080
1081
template <class OffsetType, class T, class GetFromList>
1082
static size_t
1083
FillListArray(struct ArrowArray *psChild,
1084
              std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1085
              const size_t nFeatureCountLimit, const bool bIsNullable,
1086
              const int i, const size_t nMemLimit)
1087
0
{
1088
0
    psChild->n_buffers = 2;
1089
0
    psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1090
0
    uint8_t *pabyValidity = nullptr;
1091
0
    OffsetType *panOffsets =
1092
0
        static_cast<OffsetType *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
1093
0
            sizeof(OffsetType) * (1 + nFeatureCountLimit)));
1094
0
    if (panOffsets == nullptr)
1095
0
        return 0;
1096
0
    psChild->buffers[1] = panOffsets;
1097
1098
0
    OffsetType nOffset = 0;
1099
0
    size_t nFeatCount = 0;
1100
0
    for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1101
0
    {
1102
0
        panOffsets[iFeat] = nOffset;
1103
0
        auto &poFeature = apoFeatures[iFeat];
1104
0
        const auto psRawField = poFeature->GetRawFieldRef(i);
1105
0
        if (IsValidField(psRawField))
1106
0
        {
1107
0
            const unsigned nCount = GetFromList::getCount(psRawField);
1108
0
            if (nCount > static_cast<size_t>(nMemLimit - nOffset))
1109
0
            {
1110
0
                if (nFeatCount == 0)
1111
0
                    return 0;
1112
0
                break;
1113
0
            }
1114
0
            nOffset += static_cast<OffsetType>(nCount);
1115
0
        }
1116
0
        else if (bIsNullable)
1117
0
        {
1118
0
            ++psChild->null_count;
1119
0
            if (pabyValidity == nullptr)
1120
0
            {
1121
0
                pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1122
0
                psChild->buffers[0] = pabyValidity;
1123
0
                if (pabyValidity == nullptr)
1124
0
                    return 0;
1125
0
            }
1126
0
            UnsetBit(pabyValidity, iFeat);
1127
0
        }
1128
0
    }
1129
0
    panOffsets[nFeatCount] = nOffset;
1130
1131
0
    psChild->n_children = 1;
1132
0
    psChild->children = static_cast<struct ArrowArray **>(
1133
0
        CPLCalloc(1, sizeof(struct ArrowArray *)));
1134
0
    psChild->children[0] = static_cast<struct ArrowArray *>(
1135
0
        CPLCalloc(1, sizeof(struct ArrowArray)));
1136
0
    auto psValueChild = psChild->children[0];
1137
1138
0
    psValueChild->release = OGRLayerDefaultReleaseArray;
1139
0
    psValueChild->n_buffers = 2;
1140
0
    psValueChild->buffers =
1141
0
        static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1142
0
    psValueChild->length = nOffset;
1143
0
    T *panValues = static_cast<T *>(
1144
0
        VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (nOffset + 1)));
1145
0
    if (panValues == nullptr)
1146
0
        return 0;
1147
0
    psValueChild->buffers[1] = panValues;
1148
1149
0
    nOffset = 0;
1150
0
    for (size_t iFeat = 0; iFeat < nFeatCount; ++iFeat)
1151
0
    {
1152
0
        auto &poFeature = apoFeatures[iFeat];
1153
0
        const auto psRawField = poFeature->GetRawFieldRef(i);
1154
0
        if (IsValidField(psRawField))
1155
0
        {
1156
0
            const int nCount = GetFromList::getCount(psRawField);
1157
0
            const auto paList = GetFromList::getValues(psRawField);
1158
0
            if (sizeof(*paList) == sizeof(T))
1159
0
                memcpy(panValues + nOffset, paList, nCount * sizeof(T));
1160
0
            else
1161
0
            {
1162
0
                for (int j = 0; j < nCount; ++j)
1163
0
                {
1164
0
                    panValues[nOffset + j] = static_cast<T>(paList[j]);
1165
0
                }
1166
0
            }
1167
0
            nOffset += static_cast<OffsetType>(nCount);
1168
0
        }
1169
0
    }
1170
1171
0
    return nFeatCount;
1172
0
}
Unexecuted instantiation: ogrlayerarrow.cpp:unsigned long FillListArray<int, short, GetFromIntegerList>(ArrowArray*, std::__1::deque<std::__1::unique_ptr<OGRFeature, std::__1::default_delete<OGRFeature> >, std::__1::allocator<std::__1::unique_ptr<OGRFeature, std::__1::default_delete<OGRFeature> > > >&, unsigned long, bool, int, unsigned long)
Unexecuted instantiation: ogrlayerarrow.cpp:unsigned long FillListArray<int, int, GetFromIntegerList>(ArrowArray*, std::__1::deque<std::__1::unique_ptr<OGRFeature, std::__1::default_delete<OGRFeature> >, std::__1::allocator<std::__1::unique_ptr<OGRFeature, std::__1::default_delete<OGRFeature> > > >&, unsigned long, bool, int, unsigned long)
Unexecuted instantiation: ogrlayerarrow.cpp:unsigned long FillListArray<int, long, GetFromInteger64List>(ArrowArray*, std::__1::deque<std::__1::unique_ptr<OGRFeature, std::__1::default_delete<OGRFeature> >, std::__1::allocator<std::__1::unique_ptr<OGRFeature, std::__1::default_delete<OGRFeature> > > >&, unsigned long, bool, int, unsigned long)
Unexecuted instantiation: ogrlayerarrow.cpp:unsigned long FillListArray<int, float, GetFromRealList>(ArrowArray*, std::__1::deque<std::__1::unique_ptr<OGRFeature, std::__1::default_delete<OGRFeature> >, std::__1::allocator<std::__1::unique_ptr<OGRFeature, std::__1::default_delete<OGRFeature> > > >&, unsigned long, bool, int, unsigned long)
Unexecuted instantiation: ogrlayerarrow.cpp:unsigned long FillListArray<int, double, GetFromRealList>(ArrowArray*, std::__1::deque<std::__1::unique_ptr<OGRFeature, std::__1::default_delete<OGRFeature> >, std::__1::allocator<std::__1::unique_ptr<OGRFeature, std::__1::default_delete<OGRFeature> > > >&, unsigned long, bool, int, unsigned long)
1173
1174
template <class OffsetType, class GetFromList>
1175
static size_t
1176
FillListArrayBool(struct ArrowArray *psChild,
1177
                  std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1178
                  const size_t nFeatureCountLimit, const bool bIsNullable,
1179
                  const int i, const size_t nMemLimit)
1180
0
{
1181
0
    psChild->n_buffers = 2;
1182
0
    psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1183
0
    uint8_t *pabyValidity = nullptr;
1184
0
    OffsetType *panOffsets =
1185
0
        static_cast<OffsetType *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
1186
0
            sizeof(OffsetType) * (1 + nFeatureCountLimit)));
1187
0
    if (panOffsets == nullptr)
1188
0
        return 0;
1189
0
    psChild->buffers[1] = panOffsets;
1190
1191
0
    OffsetType nOffset = 0;
1192
0
    size_t nFeatCount = 0;
1193
0
    for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1194
0
    {
1195
0
        panOffsets[iFeat] = nOffset;
1196
0
        auto &poFeature = apoFeatures[iFeat];
1197
0
        const auto psRawField = poFeature->GetRawFieldRef(i);
1198
0
        if (IsValidField(psRawField))
1199
0
        {
1200
0
            const unsigned nCount = GetFromList::getCount(psRawField);
1201
0
            if (nCount > static_cast<size_t>(nMemLimit - nOffset))
1202
0
            {
1203
0
                if (nFeatCount == 0)
1204
0
                    return 0;
1205
0
                break;
1206
0
            }
1207
0
            nOffset += static_cast<OffsetType>(nCount);
1208
0
        }
1209
0
        else if (bIsNullable)
1210
0
        {
1211
0
            ++psChild->null_count;
1212
0
            if (pabyValidity == nullptr)
1213
0
            {
1214
0
                pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1215
0
                psChild->buffers[0] = pabyValidity;
1216
0
                if (pabyValidity == nullptr)
1217
0
                    return 0;
1218
0
            }
1219
0
            UnsetBit(pabyValidity, iFeat);
1220
0
        }
1221
0
    }
1222
0
    panOffsets[nFeatCount] = nOffset;
1223
1224
0
    psChild->n_children = 1;
1225
0
    psChild->children = static_cast<struct ArrowArray **>(
1226
0
        CPLCalloc(1, sizeof(struct ArrowArray *)));
1227
0
    psChild->children[0] = static_cast<struct ArrowArray *>(
1228
0
        CPLCalloc(1, sizeof(struct ArrowArray)));
1229
0
    auto psValueChild = psChild->children[0];
1230
1231
0
    psValueChild->release = OGRLayerDefaultReleaseArray;
1232
0
    psValueChild->n_buffers = 2;
1233
0
    psValueChild->buffers =
1234
0
        static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1235
0
    psValueChild->length = nOffset;
1236
0
    uint8_t *panValues = static_cast<uint8_t *>(
1237
0
        VSI_MALLOC_ALIGNED_AUTO_VERBOSE((nOffset + 7 + 1) / 8));
1238
0
    if (panValues == nullptr)
1239
0
        return 0;
1240
0
    memset(panValues, 0, (nOffset + 7) / 8);
1241
0
    psValueChild->buffers[1] = panValues;
1242
1243
0
    nOffset = 0;
1244
0
    for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
1245
0
    {
1246
0
        auto &poFeature = apoFeatures[iFeat];
1247
0
        const auto psRawField = poFeature->GetRawFieldRef(i);
1248
0
        if (IsValidField(psRawField))
1249
0
        {
1250
0
            const int nCount = GetFromList::getCount(psRawField);
1251
0
            const auto paList = GetFromList::getValues(psRawField);
1252
1253
0
            for (int j = 0; j < nCount; ++j)
1254
0
            {
1255
0
                if (paList[j])
1256
0
                    SetBit(panValues, nOffset + j);
1257
0
            }
1258
0
            nOffset += static_cast<OffsetType>(nCount);
1259
0
        }
1260
0
    }
1261
1262
0
    return nFeatCount;
1263
0
}
1264
1265
/************************************************************************/
1266
/*                          FillStringArray()                           */
1267
/************************************************************************/
1268
1269
template <class T>
1270
static size_t
1271
FillStringArray(struct ArrowArray *psChild,
1272
                std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1273
                const size_t nFeatureCountLimit, const bool bIsNullable,
1274
                const int i, const size_t nMemLimit)
1275
0
{
1276
0
    psChild->n_buffers = 3;
1277
0
    psChild->buffers = static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
1278
0
    uint8_t *pabyValidity = nullptr;
1279
0
    T *panOffsets = static_cast<T *>(
1280
0
        VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (1 + nFeatureCountLimit)));
1281
0
    if (panOffsets == nullptr)
1282
0
        return 0;
1283
0
    psChild->buffers[1] = panOffsets;
1284
1285
0
    size_t nOffset = 0;
1286
0
    size_t nFeatCount = 0;
1287
0
    for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1288
0
    {
1289
0
        panOffsets[iFeat] = static_cast<T>(nOffset);
1290
0
        const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
1291
0
        if (IsValidField(psRawField))
1292
0
        {
1293
0
            const size_t nLen = strlen(psRawField->String);
1294
0
            if (nLen > nMemLimit - nOffset)
1295
0
            {
1296
0
                if (nFeatCount == 0)
1297
0
                    return 0;
1298
0
                break;
1299
0
            }
1300
0
            nOffset += static_cast<T>(nLen);
1301
0
        }
1302
0
        else if (bIsNullable)
1303
0
        {
1304
0
            ++psChild->null_count;
1305
0
            if (pabyValidity == nullptr)
1306
0
            {
1307
0
                pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1308
0
                psChild->buffers[0] = pabyValidity;
1309
0
                if (pabyValidity == nullptr)
1310
0
                    return 0;
1311
0
            }
1312
0
            UnsetBit(pabyValidity, iFeat);
1313
0
        }
1314
0
    }
1315
0
    panOffsets[nFeatCount] = static_cast<T>(nOffset);
1316
1317
0
    char *pachValues =
1318
0
        static_cast<char *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nOffset + 1));
1319
0
    if (pachValues == nullptr)
1320
0
        return 0;
1321
0
    psChild->buffers[2] = pachValues;
1322
1323
0
    nOffset = 0;
1324
0
    for (size_t iFeat = 0; iFeat < nFeatCount; ++iFeat)
1325
0
    {
1326
0
        const size_t nLen =
1327
0
            static_cast<size_t>(panOffsets[iFeat + 1] - panOffsets[iFeat]);
1328
0
        if (nLen)
1329
0
        {
1330
0
            const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
1331
0
            memcpy(pachValues + nOffset, psRawField->String, nLen);
1332
0
            nOffset += nLen;
1333
0
        }
1334
0
    }
1335
1336
0
    return nFeatCount;
1337
0
}
1338
1339
/************************************************************************/
1340
/*                        FillStringListArray()                         */
1341
/************************************************************************/
1342
1343
template <class OffsetType>
1344
static size_t
1345
FillStringListArray(struct ArrowArray *psChild,
1346
                    std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1347
                    const size_t nFeatureCountLimit, const bool bIsNullable,
1348
                    const int i, const size_t nMemLimit)
1349
0
{
1350
0
    psChild->n_buffers = 2;
1351
0
    psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1352
0
    uint8_t *pabyValidity = nullptr;
1353
0
    OffsetType *panOffsets =
1354
0
        static_cast<OffsetType *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
1355
0
            sizeof(OffsetType) * (1 + nFeatureCountLimit)));
1356
0
    if (panOffsets == nullptr)
1357
0
        return false;
1358
0
    psChild->buffers[1] = panOffsets;
1359
1360
0
    OffsetType nStrings = 0;
1361
0
    OffsetType nCountChars = 0;
1362
0
    size_t nFeatCount = 0;
1363
0
    for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1364
0
    {
1365
0
        panOffsets[iFeat] = nStrings;
1366
0
        auto &poFeature = apoFeatures[iFeat];
1367
0
        const auto psRawField = poFeature->GetRawFieldRef(i);
1368
0
        if (IsValidField(psRawField))
1369
0
        {
1370
0
            const int nCount = psRawField->StringList.nCount;
1371
0
            if (static_cast<size_t>(nCount) >
1372
0
                static_cast<size_t>(nMemLimit - nStrings))
1373
0
            {
1374
0
                if (nFeatCount == 0)
1375
0
                    return 0;
1376
0
                goto after_loop;
1377
0
            }
1378
0
            for (int j = 0; j < nCount; ++j)
1379
0
            {
1380
0
                const size_t nLen = strlen(psRawField->StringList.paList[j]);
1381
0
                if (nLen > static_cast<size_t>(nMemLimit - nCountChars))
1382
0
                {
1383
0
                    if (nFeatCount == 0)
1384
0
                        return 0;
1385
0
                    goto after_loop;
1386
0
                }
1387
0
                nCountChars += static_cast<OffsetType>(nLen);
1388
0
            }
1389
0
            nStrings += static_cast<OffsetType>(nCount);
1390
0
        }
1391
0
        else if (bIsNullable)
1392
0
        {
1393
0
            ++psChild->null_count;
1394
0
            if (pabyValidity == nullptr)
1395
0
            {
1396
0
                pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1397
0
                psChild->buffers[0] = pabyValidity;
1398
0
                if (pabyValidity == nullptr)
1399
0
                    return 0;
1400
0
            }
1401
0
            UnsetBit(pabyValidity, iFeat);
1402
0
        }
1403
0
    }
1404
0
after_loop:
1405
0
    panOffsets[nFeatCount] = nStrings;
1406
1407
0
    psChild->n_children = 1;
1408
0
    psChild->children = static_cast<struct ArrowArray **>(
1409
0
        CPLCalloc(1, sizeof(struct ArrowArray *)));
1410
0
    psChild->children[0] = static_cast<struct ArrowArray *>(
1411
0
        CPLCalloc(1, sizeof(struct ArrowArray)));
1412
0
    auto psValueChild = psChild->children[0];
1413
1414
0
    psValueChild->release = OGRLayerDefaultReleaseArray;
1415
0
    psValueChild->length = nStrings;
1416
0
    psValueChild->n_buffers = 3;
1417
0
    psValueChild->buffers =
1418
0
        static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
1419
1420
0
    OffsetType *panChildOffsets = static_cast<OffsetType *>(
1421
0
        VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(OffsetType) * (1 + nStrings)));
1422
0
    if (panChildOffsets == nullptr)
1423
0
        return 0;
1424
0
    psValueChild->buffers[1] = panChildOffsets;
1425
1426
0
    char *pachValues =
1427
0
        static_cast<char *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nCountChars + 1));
1428
0
    if (pachValues == nullptr)
1429
0
        return 0;
1430
0
    psValueChild->buffers[2] = pachValues;
1431
1432
0
    nStrings = 0;
1433
0
    nCountChars = 0;
1434
0
    for (size_t iFeat = 0; iFeat < nFeatCount; ++iFeat)
1435
0
    {
1436
0
        auto &poFeature = apoFeatures[iFeat];
1437
0
        const auto psRawField = poFeature->GetRawFieldRef(i);
1438
0
        if (IsValidField(psRawField))
1439
0
        {
1440
0
            const int nCount = psRawField->StringList.nCount;
1441
0
            for (int j = 0; j < nCount; ++j)
1442
0
            {
1443
0
                panChildOffsets[nStrings] = nCountChars;
1444
0
                ++nStrings;
1445
0
                const size_t nLen = strlen(psRawField->StringList.paList[j]);
1446
0
                memcpy(pachValues + nCountChars,
1447
0
                       psRawField->StringList.paList[j], nLen);
1448
0
                nCountChars += static_cast<OffsetType>(nLen);
1449
0
            }
1450
0
        }
1451
0
    }
1452
0
    panChildOffsets[nStrings] = nCountChars;
1453
1454
0
    return nFeatCount;
1455
0
}
1456
1457
/************************************************************************/
1458
/*                          FillBinaryArray()                           */
1459
/************************************************************************/
1460
1461
template <class T>
1462
static size_t
1463
FillBinaryArray(struct ArrowArray *psChild,
1464
                std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1465
                const size_t nFeatureCountLimit, const bool bIsNullable,
1466
                const int i, const size_t nMemLimit)
1467
0
{
1468
0
    psChild->n_buffers = 3;
1469
0
    psChild->buffers = static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
1470
0
    uint8_t *pabyValidity = nullptr;
1471
0
    T *panOffsets = static_cast<T *>(
1472
0
        VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (1 + nFeatureCountLimit)));
1473
0
    if (panOffsets == nullptr)
1474
0
        return 0;
1475
0
    psChild->buffers[1] = panOffsets;
1476
1477
0
    T nOffset = 0;
1478
0
    size_t nFeatCount = 0;
1479
0
    for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1480
0
    {
1481
0
        panOffsets[iFeat] = nOffset;
1482
0
        const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
1483
0
        if (IsValidField(psRawField))
1484
0
        {
1485
0
            const size_t nLen = psRawField->Binary.nCount;
1486
0
            if (nLen > static_cast<size_t>(nMemLimit - nOffset))
1487
0
            {
1488
0
                if (iFeat == 0)
1489
0
                    return 0;
1490
0
                break;
1491
0
            }
1492
0
            nOffset += static_cast<T>(nLen);
1493
0
        }
1494
0
        else if (bIsNullable)
1495
0
        {
1496
0
            ++psChild->null_count;
1497
0
            if (pabyValidity == nullptr)
1498
0
            {
1499
0
                pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1500
0
                psChild->buffers[0] = pabyValidity;
1501
0
                if (pabyValidity == nullptr)
1502
0
                    return 0;
1503
0
            }
1504
0
            UnsetBit(pabyValidity, iFeat);
1505
0
        }
1506
0
    }
1507
0
    panOffsets[nFeatCount] = nOffset;
1508
1509
0
    GByte *pabyValues =
1510
0
        static_cast<GByte *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nOffset + 1));
1511
0
    if (pabyValues == nullptr)
1512
0
        return 0;
1513
0
    psChild->buffers[2] = pabyValues;
1514
1515
0
    nOffset = 0;
1516
0
    for (size_t iFeat = 0; iFeat < nFeatCount; ++iFeat)
1517
0
    {
1518
0
        const size_t nLen =
1519
0
            static_cast<size_t>(panOffsets[iFeat + 1] - panOffsets[iFeat]);
1520
0
        if (nLen)
1521
0
        {
1522
0
            const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
1523
0
            memcpy(pabyValues + nOffset, psRawField->Binary.paData, nLen);
1524
0
            nOffset += static_cast<T>(nLen);
1525
0
        }
1526
0
    }
1527
1528
0
    return nFeatCount;
1529
0
}
1530
1531
/************************************************************************/
1532
/*                     FillFixedWidthBinaryArray()                      */
1533
/************************************************************************/
1534
1535
static bool
1536
FillFixedWidthBinaryArray(struct ArrowArray *psChild,
1537
                          std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1538
                          const size_t nFeatureCountLimit,
1539
                          const bool bIsNullable, const int nWidth, const int i)
1540
0
{
1541
0
    psChild->n_buffers = 2;
1542
0
    psChild->buffers = static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
1543
0
    uint8_t *pabyValidity = nullptr;
1544
1545
0
    assert(nFeatureCountLimit + 1 <=
1546
0
           std::numeric_limits<size_t>::max() / nWidth);
1547
0
    GByte *pabyValues = static_cast<GByte *>(
1548
0
        VSI_MALLOC_ALIGNED_AUTO_VERBOSE((nFeatureCountLimit + 1) * nWidth));
1549
0
    if (pabyValues == nullptr)
1550
0
        return false;
1551
0
    psChild->buffers[1] = pabyValues;
1552
1553
0
    for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
1554
0
    {
1555
0
        const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
1556
0
        if (IsValidField(psRawField))
1557
0
        {
1558
0
            const auto nLen = psRawField->Binary.nCount;
1559
0
            if (nLen < nWidth)
1560
0
            {
1561
0
                memcpy(pabyValues + iFeat * nWidth, psRawField->Binary.paData,
1562
0
                       nLen);
1563
0
                memset(pabyValues + iFeat * nWidth + nLen, 0, nWidth - nLen);
1564
0
            }
1565
0
            else
1566
0
            {
1567
0
                memcpy(pabyValues + iFeat * nWidth, psRawField->Binary.paData,
1568
0
                       nWidth);
1569
0
            }
1570
0
        }
1571
0
        else
1572
0
        {
1573
0
            memset(pabyValues + iFeat * nWidth, 0, nWidth);
1574
0
            if (bIsNullable)
1575
0
            {
1576
0
                ++psChild->null_count;
1577
0
                if (pabyValidity == nullptr)
1578
0
                {
1579
0
                    pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1580
0
                    psChild->buffers[0] = pabyValidity;
1581
0
                    if (pabyValidity == nullptr)
1582
0
                        return false;
1583
0
                }
1584
0
                UnsetBit(pabyValidity, iFeat);
1585
0
            }
1586
0
        }
1587
0
    }
1588
1589
0
    return true;
1590
0
}
1591
1592
/************************************************************************/
1593
/*                        FillWKBGeometryArray()                        */
1594
/************************************************************************/
1595
1596
template <class T>
1597
static size_t
1598
FillWKBGeometryArray(struct ArrowArray *psChild,
1599
                     std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1600
                     const size_t nFeatureCountLimit,
1601
                     const OGRGeomFieldDefn *poFieldDefn, const int i,
1602
                     const size_t nMemLimit)
1603
0
{
1604
0
    const bool bIsNullable = CPL_TO_BOOL(poFieldDefn->IsNullable());
1605
0
    psChild->n_buffers = 3;
1606
0
    psChild->buffers = static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
1607
0
    uint8_t *pabyValidity = nullptr;
1608
0
    T *panOffsets = static_cast<T *>(
1609
0
        VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (1 + nFeatureCountLimit)));
1610
0
    if (panOffsets == nullptr)
1611
0
        return 0;
1612
0
    psChild->buffers[1] = panOffsets;
1613
0
    const auto eGeomType = poFieldDefn->GetType();
1614
0
    auto poEmptyGeom =
1615
0
        std::unique_ptr<OGRGeometry>(OGRGeometryFactory::createGeometry(
1616
0
            (eGeomType == wkbNone || wkbFlatten(eGeomType) == wkbUnknown)
1617
0
                ? wkbGeometryCollection
1618
0
                : eGeomType));
1619
1620
0
    size_t nOffset = 0;
1621
0
    size_t nFeatCount = 0;
1622
0
    for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1623
0
    {
1624
0
        panOffsets[iFeat] = static_cast<T>(nOffset);
1625
0
        const auto poGeom = apoFeatures[iFeat]->GetGeomFieldRef(i);
1626
0
        if (poGeom != nullptr)
1627
0
        {
1628
0
            const size_t nLen = poGeom->WkbSize();
1629
0
            if (nLen > nMemLimit - nOffset)
1630
0
            {
1631
0
                if (nFeatCount == 0)
1632
0
                    return 0;
1633
0
                break;
1634
0
            }
1635
0
            nOffset += static_cast<T>(nLen);
1636
0
        }
1637
0
        else if (bIsNullable)
1638
0
        {
1639
0
            ++psChild->null_count;
1640
0
            if (pabyValidity == nullptr)
1641
0
            {
1642
0
                pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1643
0
                psChild->buffers[0] = pabyValidity;
1644
0
                if (pabyValidity == nullptr)
1645
0
                    return 0;
1646
0
            }
1647
0
            UnsetBit(pabyValidity, iFeat);
1648
0
        }
1649
0
        else if (poEmptyGeom)
1650
0
        {
1651
0
            const size_t nLen = poEmptyGeom->WkbSize();
1652
0
            if (nLen > nMemLimit - nOffset)
1653
0
            {
1654
0
                if (nFeatCount == 0)
1655
0
                    return 0;
1656
0
                break;
1657
0
            }
1658
0
            nOffset += static_cast<T>(nLen);
1659
0
        }
1660
0
    }
1661
0
    panOffsets[nFeatCount] = static_cast<T>(nOffset);
1662
1663
0
    GByte *pabyValues =
1664
0
        static_cast<GByte *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nOffset + 1));
1665
0
    if (pabyValues == nullptr)
1666
0
        return 0;
1667
0
    psChild->buffers[2] = pabyValues;
1668
1669
0
    nOffset = 0;
1670
0
    for (size_t iFeat = 0; iFeat < nFeatCount; ++iFeat)
1671
0
    {
1672
0
        const size_t nLen =
1673
0
            static_cast<size_t>(panOffsets[iFeat + 1] - panOffsets[iFeat]);
1674
0
        if (nLen)
1675
0
        {
1676
0
            const auto poGeom = apoFeatures[iFeat]->GetGeomFieldRef(i);
1677
0
            poGeom->exportToWkb(wkbNDR, pabyValues + nOffset, wkbVariantIso);
1678
0
            nOffset += nLen;
1679
0
        }
1680
0
        else if (!bIsNullable && poEmptyGeom)
1681
0
        {
1682
0
            poEmptyGeom->exportToWkb(wkbNDR, pabyValues + nOffset,
1683
0
                                     wkbVariantIso);
1684
0
            nOffset += nLen;
1685
0
        }
1686
0
    }
1687
1688
0
    return nFeatCount;
1689
0
}
1690
1691
/************************************************************************/
1692
/*                           FillDateArray()                            */
1693
/************************************************************************/
1694
1695
static bool FillDateArray(struct ArrowArray *psChild,
1696
                          std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1697
                          const size_t nFeatureCountLimit,
1698
                          const bool bIsNullable, const int i)
1699
0
{
1700
0
    psChild->n_buffers = 2;
1701
0
    psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1702
0
    uint8_t *pabyValidity = nullptr;
1703
0
    int32_t *panValues = static_cast<int32_t *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
1704
0
        sizeof(int32_t) * (nFeatureCountLimit + 1)));
1705
0
    if (panValues == nullptr)
1706
0
        return false;
1707
0
    psChild->buffers[1] = panValues;
1708
0
    for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
1709
0
    {
1710
0
        auto &poFeature = apoFeatures[iFeat];
1711
0
        const auto psRawField = poFeature->GetRawFieldRef(i);
1712
0
        if (IsValidField(psRawField))
1713
0
        {
1714
0
            struct tm brokenDown;
1715
0
            memset(&brokenDown, 0, sizeof(brokenDown));
1716
0
            brokenDown.tm_year = psRawField->Date.Year - 1900;
1717
0
            brokenDown.tm_mon = psRawField->Date.Month - 1;
1718
0
            brokenDown.tm_mday = psRawField->Date.Day;
1719
0
            panValues[iFeat] =
1720
0
                static_cast<int>(CPLYMDHMSToUnixTime(&brokenDown) / 86400);
1721
0
        }
1722
0
        else if (bIsNullable)
1723
0
        {
1724
0
            panValues[iFeat] = 0;
1725
0
            ++psChild->null_count;
1726
0
            if (pabyValidity == nullptr)
1727
0
            {
1728
0
                pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1729
0
                psChild->buffers[0] = pabyValidity;
1730
0
                if (pabyValidity == nullptr)
1731
0
                    return false;
1732
0
            }
1733
0
            UnsetBit(pabyValidity, iFeat);
1734
0
        }
1735
0
        else
1736
0
        {
1737
0
            panValues[iFeat] = 0;
1738
0
        }
1739
0
    }
1740
0
    return true;
1741
0
}
1742
1743
/************************************************************************/
1744
/*                           FillTimeArray()                            */
1745
/************************************************************************/
1746
1747
static bool FillTimeArray(struct ArrowArray *psChild,
1748
                          std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1749
                          const size_t nFeatureCountLimit,
1750
                          const bool bIsNullable, const int i)
1751
0
{
1752
0
    psChild->n_buffers = 2;
1753
0
    psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1754
0
    uint8_t *pabyValidity = nullptr;
1755
0
    int32_t *panValues = static_cast<int32_t *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
1756
0
        sizeof(int32_t) * (nFeatureCountLimit + 1)));
1757
0
    if (panValues == nullptr)
1758
0
        return false;
1759
0
    psChild->buffers[1] = panValues;
1760
0
    for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
1761
0
    {
1762
0
        auto &poFeature = apoFeatures[iFeat];
1763
0
        const auto psRawField = poFeature->GetRawFieldRef(i);
1764
0
        if (IsValidField(psRawField))
1765
0
        {
1766
0
            panValues[iFeat] =
1767
0
                psRawField->Date.Hour * 3600000 +
1768
0
                psRawField->Date.Minute * 60000 +
1769
0
                static_cast<int>(psRawField->Date.Second * 1000 + 0.5f);
1770
0
        }
1771
0
        else if (bIsNullable)
1772
0
        {
1773
0
            panValues[iFeat] = 0;
1774
0
            ++psChild->null_count;
1775
0
            if (pabyValidity == nullptr)
1776
0
            {
1777
0
                pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1778
0
                psChild->buffers[0] = pabyValidity;
1779
0
                if (pabyValidity == nullptr)
1780
0
                    return false;
1781
0
            }
1782
0
            UnsetBit(pabyValidity, iFeat);
1783
0
        }
1784
0
        else
1785
0
        {
1786
0
            panValues[iFeat] = 0;
1787
0
        }
1788
0
    }
1789
0
    return true;
1790
0
}
1791
1792
/************************************************************************/
1793
/*                         FillDateTimeArray()                          */
1794
/************************************************************************/
1795
1796
static bool
1797
FillDateTimeArray(struct ArrowArray *psChild,
1798
                  std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1799
                  const size_t nFeatureCountLimit, const bool bIsNullable,
1800
                  const int i, int nFieldTZFlag)
1801
0
{
1802
0
    psChild->n_buffers = 2;
1803
0
    psChild->buffers = static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1804
0
    uint8_t *pabyValidity = nullptr;
1805
0
    int64_t *panValues = static_cast<int64_t *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
1806
0
        sizeof(int64_t) * (nFeatureCountLimit + 1)));
1807
0
    if (panValues == nullptr)
1808
0
        return false;
1809
0
    psChild->buffers[1] = panValues;
1810
0
    struct tm brokenDown;
1811
0
    memset(&brokenDown, 0, sizeof(brokenDown));
1812
0
    for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
1813
0
    {
1814
0
        auto &poFeature = apoFeatures[iFeat];
1815
0
        const auto psRawField = poFeature->GetRawFieldRef(i);
1816
0
        if (IsValidField(psRawField))
1817
0
        {
1818
0
            brokenDown.tm_year = psRawField->Date.Year - 1900;
1819
0
            brokenDown.tm_mon = psRawField->Date.Month - 1;
1820
0
            brokenDown.tm_mday = psRawField->Date.Day;
1821
0
            brokenDown.tm_hour = psRawField->Date.Hour;
1822
0
            brokenDown.tm_min = psRawField->Date.Minute;
1823
0
            brokenDown.tm_sec = static_cast<int>(psRawField->Date.Second);
1824
0
            auto nVal =
1825
0
                CPLYMDHMSToUnixTime(&brokenDown) * 1000 +
1826
0
                (static_cast<int>(psRawField->Date.Second * 1000 + 0.5f) %
1827
0
                 1000);
1828
0
            if (nFieldTZFlag >= OGR_TZFLAG_MIXED_TZ &&
1829
0
                psRawField->Date.TZFlag > OGR_TZFLAG_MIXED_TZ)
1830
0
            {
1831
                // Convert for psRawField->Date.TZFlag to UTC
1832
0
                const int TZOffset =
1833
0
                    (psRawField->Date.TZFlag - OGR_TZFLAG_UTC) * 15;
1834
0
                const int TZOffsetMS = TZOffset * 60 * 1000;
1835
0
                nVal -= TZOffsetMS;
1836
0
            }
1837
0
            panValues[iFeat] = nVal;
1838
0
        }
1839
0
        else if (bIsNullable)
1840
0
        {
1841
0
            panValues[iFeat] = 0;
1842
0
            ++psChild->null_count;
1843
0
            if (pabyValidity == nullptr)
1844
0
            {
1845
0
                pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1846
0
                psChild->buffers[0] = pabyValidity;
1847
0
                if (pabyValidity == nullptr)
1848
0
                    return false;
1849
0
            }
1850
0
            UnsetBit(pabyValidity, iFeat);
1851
0
        }
1852
0
        else
1853
0
        {
1854
0
            panValues[iFeat] = 0;
1855
0
        }
1856
0
    }
1857
0
    return true;
1858
0
}
1859
1860
/************************************************************************/
1861
/*                   FillDateTimeArrayWithTimeZone()                    */
1862
/************************************************************************/
1863
1864
static bool FillDateTimeArrayWithTimeZone(
1865
    struct ArrowArray *psChild,
1866
    std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1867
    const size_t nFeatureCountLimit, const bool bIsNullable, const int i,
1868
    int nFieldTZFlag)
1869
0
{
1870
0
    psChild->n_children = 2;
1871
0
    psChild->children = static_cast<struct ArrowArray **>(
1872
0
        CPLCalloc(2, sizeof(struct ArrowArray *)));
1873
0
    psChild->n_buffers = 1;
1874
0
    psChild->buffers = static_cast<const void **>(CPLCalloc(1, sizeof(void *)));
1875
0
    uint8_t *pabyValidity = nullptr;
1876
1877
    // Create sub-array for timestamp in UTC
1878
0
    psChild->children[0] = static_cast<struct ArrowArray *>(
1879
0
        CPLCalloc(1, sizeof(struct ArrowArray)));
1880
0
    psChild->children[0]->n_buffers = 2;
1881
0
    psChild->children[0]->buffers =
1882
0
        static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1883
0
    psChild->children[0]->release = OGRLayerDefaultReleaseArray;
1884
0
    int64_t *panTimestamps = static_cast<int64_t *>(
1885
0
        VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(int64_t) * nFeatureCountLimit));
1886
0
    if (panTimestamps == nullptr)
1887
0
        return false;
1888
0
    psChild->children[0]->buffers[1] = panTimestamps;
1889
1890
    // Create sub-array for offset to UTC in minutes
1891
0
    psChild->children[1] = static_cast<struct ArrowArray *>(
1892
0
        CPLCalloc(1, sizeof(struct ArrowArray)));
1893
0
    psChild->children[1]->n_buffers = 2;
1894
0
    psChild->children[1]->buffers =
1895
0
        static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
1896
0
    psChild->children[1]->release = OGRLayerDefaultReleaseArray;
1897
0
    int16_t *panOffsetsMinutes = static_cast<int16_t *>(
1898
0
        VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(int16_t) * nFeatureCountLimit));
1899
0
    if (panOffsetsMinutes == nullptr)
1900
0
        return false;
1901
0
    psChild->children[1]->buffers[1] = panOffsetsMinutes;
1902
1903
0
    struct tm brokenDown;
1904
0
    memset(&brokenDown, 0, sizeof(brokenDown));
1905
1906
0
    for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat)
1907
0
    {
1908
0
        auto &poFeature = apoFeatures[iFeat];
1909
0
        const auto psRawField = poFeature->GetRawFieldRef(i);
1910
0
        panTimestamps[iFeat] = 0;
1911
0
        panOffsetsMinutes[iFeat] = 0;
1912
0
        if (IsValidField(psRawField))
1913
0
        {
1914
0
            brokenDown.tm_year = psRawField->Date.Year - 1900;
1915
0
            brokenDown.tm_mon = psRawField->Date.Month - 1;
1916
0
            brokenDown.tm_mday = psRawField->Date.Day;
1917
0
            brokenDown.tm_hour = psRawField->Date.Hour;
1918
0
            brokenDown.tm_min = psRawField->Date.Minute;
1919
0
            brokenDown.tm_sec = static_cast<int>(psRawField->Date.Second);
1920
0
            auto nVal =
1921
0
                CPLYMDHMSToUnixTime(&brokenDown) * 1000 +
1922
0
                (static_cast<int>(psRawField->Date.Second * 1000 + 0.5f) %
1923
0
                 1000);
1924
0
            if (nFieldTZFlag >= OGR_TZFLAG_MIXED_TZ &&
1925
0
                psRawField->Date.TZFlag > OGR_TZFLAG_MIXED_TZ)
1926
0
            {
1927
                // Convert for psRawField->Date.TZFlag to UTC
1928
0
                const int TZOffsetMinute =
1929
0
                    (psRawField->Date.TZFlag - OGR_TZFLAG_UTC) * 15;
1930
0
                const int TZOffsetMS = TZOffsetMinute * 60 * 1000;
1931
0
                nVal -= TZOffsetMS;
1932
1933
0
                panOffsetsMinutes[iFeat] = static_cast<int16_t>(TZOffsetMinute);
1934
0
            }
1935
0
            panTimestamps[iFeat] = nVal;
1936
0
        }
1937
0
        else if (bIsNullable)
1938
0
        {
1939
0
            ++psChild->null_count;
1940
0
            if (pabyValidity == nullptr)
1941
0
            {
1942
0
                pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
1943
0
                psChild->buffers[0] = pabyValidity;
1944
0
                if (pabyValidity == nullptr)
1945
0
                    return false;
1946
0
            }
1947
0
            UnsetBit(pabyValidity, iFeat);
1948
0
        }
1949
0
    }
1950
0
    return true;
1951
0
}
1952
1953
/************************************************************************/
1954
/*                     FillDateTimeArrayAsString()                      */
1955
/************************************************************************/
1956
1957
static size_t
1958
FillDateTimeArrayAsString(struct ArrowArray *psChild,
1959
                          std::deque<std::unique_ptr<OGRFeature>> &apoFeatures,
1960
                          const size_t nFeatureCountLimit,
1961
                          const bool bIsNullable, const int i,
1962
                          const size_t nMemLimit)
1963
0
{
1964
0
    psChild->n_buffers = 3;
1965
0
    psChild->buffers = static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
1966
0
    uint8_t *pabyValidity = nullptr;
1967
0
    using T = uint32_t;
1968
0
    T *panOffsets = static_cast<T *>(
1969
0
        VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (1 + nFeatureCountLimit)));
1970
0
    if (panOffsets == nullptr)
1971
0
        return 0;
1972
0
    psChild->buffers[1] = panOffsets;
1973
1974
0
    size_t nOffset = 0;
1975
0
    size_t nFeatCount = 0;
1976
0
    for (size_t iFeat = 0; iFeat < nFeatureCountLimit; ++iFeat, ++nFeatCount)
1977
0
    {
1978
0
        panOffsets[iFeat] = static_cast<T>(nOffset);
1979
0
        const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
1980
0
        if (IsValidField(psRawField))
1981
0
        {
1982
0
            size_t nLen = strlen("YYYY-MM-DDTHH:MM:SS");
1983
0
            if (fmodf(psRawField->Date.Second, 1.0f) != 0)
1984
0
                nLen += strlen(".sss");
1985
0
            if (psRawField->Date.TZFlag == OGR_TZFLAG_UTC)
1986
0
                nLen += 1;  // 'Z'
1987
0
            else if (psRawField->Date.TZFlag > OGR_TZFLAG_MIXED_TZ)
1988
0
                nLen += strlen("+hh:mm");
1989
0
            if (nLen > nMemLimit - nOffset)
1990
0
            {
1991
0
                if (nFeatCount == 0)
1992
0
                    return 0;
1993
0
                break;
1994
0
            }
1995
0
            nOffset += static_cast<T>(nLen);
1996
0
        }
1997
0
        else if (bIsNullable)
1998
0
        {
1999
0
            ++psChild->null_count;
2000
0
            if (pabyValidity == nullptr)
2001
0
            {
2002
0
                pabyValidity = AllocValidityBitmap(nFeatureCountLimit);
2003
0
                psChild->buffers[0] = pabyValidity;
2004
0
                if (pabyValidity == nullptr)
2005
0
                    return 0;
2006
0
            }
2007
0
            UnsetBit(pabyValidity, iFeat);
2008
0
        }
2009
0
    }
2010
0
    panOffsets[nFeatCount] = static_cast<T>(nOffset);
2011
2012
0
    char *pachValues =
2013
0
        static_cast<char *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nOffset + 1));
2014
0
    if (pachValues == nullptr)
2015
0
        return 0;
2016
0
    psChild->buffers[2] = pachValues;
2017
2018
0
    nOffset = 0;
2019
0
    char szBuffer[OGR_SIZEOF_ISO8601_DATETIME_BUFFER];
2020
0
    OGRISO8601Format sFormat;
2021
0
    sFormat.ePrecision = OGRISO8601Precision::AUTO;
2022
0
    for (size_t iFeat = 0; iFeat < nFeatCount; ++iFeat)
2023
0
    {
2024
0
        const int nLen =
2025
0
            static_cast<int>(panOffsets[iFeat + 1] - panOffsets[iFeat]);
2026
0
        if (nLen)
2027
0
        {
2028
0
            const auto psRawField = apoFeatures[iFeat]->GetRawFieldRef(i);
2029
0
            int nBufSize = OGRGetISO8601DateTime(psRawField, sFormat, szBuffer);
2030
0
            if (nBufSize)
2031
0
            {
2032
0
                memcpy(pachValues + nOffset, szBuffer,
2033
0
                       std::min(nLen, nBufSize));
2034
0
            }
2035
0
            if (nBufSize < nLen)
2036
0
            {
2037
0
                memset(pachValues + nOffset + nBufSize, 0, nLen - nBufSize);
2038
0
            }
2039
0
            nOffset += nLen;
2040
0
        }
2041
0
    }
2042
2043
0
    return nFeatCount;
2044
0
}
2045
2046
/************************************************************************/
2047
/*                         GetNextArrowArray()                          */
2048
/************************************************************************/
2049
2050
/** Default implementation of the ArrowArrayStream::get_next() callback.
2051
 *
2052
 * To be used by driver implementations that have a custom GetArrowStream()
2053
 * implementation.
2054
 *
2055
 * @since GDAL 3.6
2056
 */
2057
int OGRLayer::GetNextArrowArray(struct ArrowArrayStream *stream,
2058
                                struct ArrowArray *out_array)
2059
0
{
2060
0
    ArrowArrayStreamPrivateDataSharedDataWrapper *poPrivate =
2061
0
        static_cast<ArrowArrayStreamPrivateDataSharedDataWrapper *>(
2062
0
            stream->private_data);
2063
2064
0
    const bool bIncludeFID = CPLTestBool(
2065
0
        m_aosArrowArrayStreamOptions.FetchNameValueDef("INCLUDE_FID", "YES"));
2066
0
    const bool bDateTimeAsString = m_aosArrowArrayStreamOptions.FetchBool(
2067
0
        GAS_OPT_DATETIME_AS_STRING, false);
2068
0
    int nMaxBatchSize = atoi(m_aosArrowArrayStreamOptions.FetchNameValueDef(
2069
0
        "MAX_FEATURES_IN_BATCH", "65536"));
2070
0
    if (nMaxBatchSize <= 0)
2071
0
        nMaxBatchSize = 1;
2072
0
    if (nMaxBatchSize > INT_MAX - 1)
2073
0
        nMaxBatchSize = INT_MAX - 1;
2074
2075
0
    auto &oFeatureQueue =
2076
0
        m_poSharedArrowArrayStreamPrivateData->m_oFeatureQueue;
2077
2078
0
    memset(out_array, 0, sizeof(*out_array));
2079
2080
0
    auto poLayerDefn = GetLayerDefn();
2081
0
    const int nFieldCount = poLayerDefn->GetFieldCount();
2082
0
    const int nGeomFieldCount = poLayerDefn->GetGeomFieldCount();
2083
0
    const int nMaxChildren =
2084
0
        (bIncludeFID ? 1 : 0) + nFieldCount + nGeomFieldCount;
2085
0
    int iSchemaChild = 0;
2086
2087
0
    if (!m_poSharedArrowArrayStreamPrivateData->m_anQueriedFIDs.empty())
2088
0
    {
2089
0
        if (poPrivate->poShared->m_bEOF)
2090
0
        {
2091
0
            return 0;
2092
0
        }
2093
0
        if (m_poSharedArrowArrayStreamPrivateData->m_iQueriedFIDS == 0)
2094
0
        {
2095
0
            CPLDebug("OGR", "Using fast FID filtering");
2096
0
        }
2097
0
        while (
2098
0
            oFeatureQueue.size() < static_cast<size_t>(nMaxBatchSize) &&
2099
0
            m_poSharedArrowArrayStreamPrivateData->m_iQueriedFIDS <
2100
0
                m_poSharedArrowArrayStreamPrivateData->m_anQueriedFIDs.size())
2101
0
        {
2102
0
            const auto nFID =
2103
0
                m_poSharedArrowArrayStreamPrivateData->m_anQueriedFIDs
2104
0
                    [m_poSharedArrowArrayStreamPrivateData->m_iQueriedFIDS];
2105
0
            auto poFeature = std::unique_ptr<OGRFeature>(GetFeature(nFID));
2106
0
            ++m_poSharedArrowArrayStreamPrivateData->m_iQueriedFIDS;
2107
0
            if (poFeature && (m_poFilterGeom == nullptr ||
2108
0
                              FilterGeometry(poFeature->GetGeomFieldRef(
2109
0
                                  m_iGeomFieldFilter))))
2110
0
            {
2111
0
                oFeatureQueue.emplace_back(std::move(poFeature));
2112
0
            }
2113
0
        }
2114
0
        if (m_poSharedArrowArrayStreamPrivateData->m_iQueriedFIDS ==
2115
0
            m_poSharedArrowArrayStreamPrivateData->m_anQueriedFIDs.size())
2116
0
        {
2117
0
            poPrivate->poShared->m_bEOF = true;
2118
0
        }
2119
0
    }
2120
0
    else if (!poPrivate->poShared->m_bEOF)
2121
0
    {
2122
0
        while (oFeatureQueue.size() < static_cast<size_t>(nMaxBatchSize))
2123
0
        {
2124
0
            auto poFeature = std::unique_ptr<OGRFeature>(GetNextFeature());
2125
0
            if (!poFeature)
2126
0
            {
2127
0
                poPrivate->poShared->m_bEOF = true;
2128
0
                break;
2129
0
            }
2130
0
            oFeatureQueue.emplace_back(std::move(poFeature));
2131
0
        }
2132
0
    }
2133
0
    if (oFeatureQueue.empty())
2134
0
    {
2135
0
        return 0;
2136
0
    }
2137
2138
0
    out_array->release = OGRLayerDefaultReleaseArray;
2139
0
    out_array->null_count = 0;
2140
2141
0
    out_array->n_children = nMaxChildren;
2142
0
    out_array->children = static_cast<struct ArrowArray **>(
2143
0
        CPLCalloc(nMaxChildren, sizeof(struct ArrowArray *)));
2144
0
    out_array->release = OGRLayerDefaultReleaseArray;
2145
0
    out_array->n_buffers = 1;
2146
0
    out_array->buffers =
2147
0
        static_cast<const void **>(CPLCalloc(1, sizeof(void *)));
2148
2149
0
    size_t nFeatureCount = oFeatureQueue.size();
2150
0
    const uint32_t nMemLimit = OGRArrowArrayHelper::GetMemLimit();
2151
0
    std::set<int> anArrayIndicesOfStructDateTime;
2152
0
    if (bIncludeFID)
2153
0
    {
2154
0
        out_array->children[iSchemaChild] = static_cast<struct ArrowArray *>(
2155
0
            CPLCalloc(1, sizeof(struct ArrowArray)));
2156
0
        auto psChild = out_array->children[iSchemaChild];
2157
0
        ++iSchemaChild;
2158
0
        psChild->release = OGRLayerDefaultReleaseArray;
2159
0
        psChild->n_buffers = 2;
2160
0
        psChild->buffers =
2161
0
            static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
2162
0
        int64_t *panValues =
2163
0
            static_cast<int64_t *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
2164
0
                sizeof(int64_t) * (oFeatureQueue.size() + 1)));
2165
0
        if (panValues == nullptr)
2166
0
            goto error;
2167
0
        psChild->buffers[1] = panValues;
2168
0
        for (size_t iFeat = 0; iFeat < oFeatureQueue.size(); ++iFeat)
2169
0
        {
2170
0
            panValues[iFeat] = oFeatureQueue[iFeat]->GetFID();
2171
0
        }
2172
0
    }
2173
2174
0
    for (int i = 0; i < nFieldCount; ++i)
2175
0
    {
2176
0
        const auto poFieldDefn = poLayerDefn->GetFieldDefn(i);
2177
0
        if (poFieldDefn->IsIgnored())
2178
0
        {
2179
0
            continue;
2180
0
        }
2181
2182
0
        out_array->children[iSchemaChild] = static_cast<struct ArrowArray *>(
2183
0
            CPLCalloc(1, sizeof(struct ArrowArray)));
2184
0
        auto psChild = out_array->children[iSchemaChild];
2185
0
        psChild->release = OGRLayerDefaultReleaseArray;
2186
0
        const bool bIsNullable = CPL_TO_BOOL(poFieldDefn->IsNullable());
2187
0
        const auto eSubType = poFieldDefn->GetSubType();
2188
0
        switch (poFieldDefn->GetType())
2189
0
        {
2190
0
            case OFTInteger:
2191
0
            {
2192
0
                if (eSubType == OFSTBoolean)
2193
0
                {
2194
0
                    if (!FillBoolArray(psChild, oFeatureQueue, nFeatureCount,
2195
0
                                       bIsNullable, &OGRField::Integer, i))
2196
0
                        goto error;
2197
0
                }
2198
0
                else if (eSubType == OFSTInt16)
2199
0
                {
2200
0
                    if (!FillArray<int16_t>(psChild, oFeatureQueue,
2201
0
                                            nFeatureCount, bIsNullable,
2202
0
                                            &OGRField::Integer, i))
2203
0
                        goto error;
2204
0
                }
2205
0
                else
2206
0
                {
2207
0
                    if (!FillArray<int32_t>(psChild, oFeatureQueue,
2208
0
                                            nFeatureCount, bIsNullable,
2209
0
                                            &OGRField::Integer, i))
2210
0
                        goto error;
2211
0
                }
2212
2213
0
                const auto &osDomainName = poFieldDefn->GetDomainName();
2214
0
                if (!osDomainName.empty())
2215
0
                {
2216
0
                    auto poDS = GetDataset();
2217
0
                    if (poDS)
2218
0
                    {
2219
0
                        const auto poFieldDomain =
2220
0
                            poDS->GetFieldDomain(osDomainName);
2221
0
                        if (poFieldDomain &&
2222
0
                            poFieldDomain->GetDomainType() == OFDT_CODED)
2223
0
                        {
2224
0
                            const OGRCodedFieldDomain *poCodedDomain =
2225
0
                                static_cast<const OGRCodedFieldDomain *>(
2226
0
                                    poFieldDomain);
2227
0
                            OGRArrowArrayHelper::FillDict(psChild,
2228
0
                                                          poCodedDomain);
2229
0
                        }
2230
0
                    }
2231
0
                }
2232
2233
0
                break;
2234
0
            }
2235
2236
0
            case OFTInteger64:
2237
0
            {
2238
0
                if (!FillArray<int64_t>(psChild, oFeatureQueue, nFeatureCount,
2239
0
                                        bIsNullable, &OGRField::Integer64, i))
2240
0
                    goto error;
2241
0
                break;
2242
0
            }
2243
2244
0
            case OFTReal:
2245
0
            {
2246
0
                if (eSubType == OFSTFloat32)
2247
0
                {
2248
0
                    if (!FillArray<float>(psChild, oFeatureQueue, nFeatureCount,
2249
0
                                          bIsNullable, &OGRField::Real, i))
2250
0
                        goto error;
2251
0
                }
2252
0
                else
2253
0
                {
2254
0
                    if (!FillArray<double>(psChild, oFeatureQueue,
2255
0
                                           nFeatureCount, bIsNullable,
2256
0
                                           &OGRField::Real, i))
2257
0
                        goto error;
2258
0
                }
2259
0
                break;
2260
0
            }
2261
2262
0
            case OFTString:
2263
0
            case OFTWideString:
2264
0
            {
2265
0
                const size_t nThisFeatureCount = FillStringArray<int32_t>(
2266
0
                    psChild, oFeatureQueue, nFeatureCount, bIsNullable, i,
2267
0
                    nMemLimit);
2268
0
                if (nThisFeatureCount == 0)
2269
0
                {
2270
0
                    goto error_max_mem;
2271
0
                }
2272
0
                if (nThisFeatureCount < nFeatureCount)
2273
0
                    nFeatureCount = nThisFeatureCount;
2274
0
                break;
2275
0
            }
2276
2277
0
            case OFTBinary:
2278
0
            {
2279
0
                const int nWidth = poFieldDefn->GetWidth();
2280
0
                if (nWidth > 0)
2281
0
                {
2282
0
                    if (nFeatureCount > nMemLimit / nWidth)
2283
0
                    {
2284
0
                        nFeatureCount = nMemLimit / nWidth;
2285
0
                        if (nFeatureCount == 0)
2286
0
                            goto error_max_mem;
2287
0
                    }
2288
0
                    if (!FillFixedWidthBinaryArray(psChild, oFeatureQueue,
2289
0
                                                   nFeatureCount, bIsNullable,
2290
0
                                                   nWidth, i))
2291
0
                        goto error;
2292
0
                }
2293
0
                else
2294
0
                {
2295
0
                    const size_t nThisFeatureCount = FillBinaryArray<int32_t>(
2296
0
                        psChild, oFeatureQueue, nFeatureCount, bIsNullable, i,
2297
0
                        nMemLimit);
2298
0
                    if (nThisFeatureCount == 0)
2299
0
                    {
2300
0
                        goto error_max_mem;
2301
0
                    }
2302
0
                    if (nThisFeatureCount < nFeatureCount)
2303
0
                        nFeatureCount = nThisFeatureCount;
2304
0
                }
2305
0
                break;
2306
0
            }
2307
2308
0
            case OFTIntegerList:
2309
0
            {
2310
0
                size_t nThisFeatureCount;
2311
0
                if (eSubType == OFSTBoolean)
2312
0
                {
2313
0
                    nThisFeatureCount =
2314
0
                        FillListArrayBool<int32_t, GetFromIntegerList>(
2315
0
                            psChild, oFeatureQueue, nFeatureCount, bIsNullable,
2316
0
                            i, nMemLimit);
2317
0
                }
2318
0
                else if (eSubType == OFSTInt16)
2319
0
                {
2320
0
                    nThisFeatureCount =
2321
0
                        FillListArray<int32_t, int16_t, GetFromIntegerList>(
2322
0
                            psChild, oFeatureQueue, nFeatureCount, bIsNullable,
2323
0
                            i, nMemLimit);
2324
0
                }
2325
0
                else
2326
0
                {
2327
0
                    nThisFeatureCount =
2328
0
                        FillListArray<int32_t, int32_t, GetFromIntegerList>(
2329
0
                            psChild, oFeatureQueue, nFeatureCount, bIsNullable,
2330
0
                            i, nMemLimit);
2331
0
                }
2332
0
                if (nThisFeatureCount == 0)
2333
0
                {
2334
0
                    goto error_max_mem;
2335
0
                }
2336
0
                if (nThisFeatureCount < nFeatureCount)
2337
0
                    nFeatureCount = nThisFeatureCount;
2338
0
                break;
2339
0
            }
2340
2341
0
            case OFTInteger64List:
2342
0
            {
2343
0
                const size_t nThisFeatureCount =
2344
0
                    FillListArray<int32_t, int64_t, GetFromInteger64List>(
2345
0
                        psChild, oFeatureQueue, nFeatureCount, bIsNullable, i,
2346
0
                        nMemLimit);
2347
0
                if (nThisFeatureCount == 0)
2348
0
                {
2349
0
                    goto error_max_mem;
2350
0
                }
2351
0
                if (nThisFeatureCount < nFeatureCount)
2352
0
                    nFeatureCount = nThisFeatureCount;
2353
0
                break;
2354
0
            }
2355
2356
0
            case OFTRealList:
2357
0
            {
2358
0
                size_t nThisFeatureCount;
2359
0
                if (eSubType == OFSTFloat32)
2360
0
                {
2361
0
                    nThisFeatureCount =
2362
0
                        FillListArray<int32_t, float, GetFromRealList>(
2363
0
                            psChild, oFeatureQueue, nFeatureCount, bIsNullable,
2364
0
                            i, nMemLimit);
2365
0
                }
2366
0
                else
2367
0
                {
2368
0
                    nThisFeatureCount =
2369
0
                        FillListArray<int32_t, double, GetFromRealList>(
2370
0
                            psChild, oFeatureQueue, nFeatureCount, bIsNullable,
2371
0
                            i, nMemLimit);
2372
0
                }
2373
0
                if (nThisFeatureCount == 0)
2374
0
                {
2375
0
                    goto error_max_mem;
2376
0
                }
2377
0
                if (nThisFeatureCount < nFeatureCount)
2378
0
                    nFeatureCount = nThisFeatureCount;
2379
0
                break;
2380
0
            }
2381
2382
0
            case OFTStringList:
2383
0
            case OFTWideStringList:
2384
0
            {
2385
0
                const size_t nThisFeatureCount = FillStringListArray<int32_t>(
2386
0
                    psChild, oFeatureQueue, nFeatureCount, bIsNullable, i,
2387
0
                    nMemLimit);
2388
0
                if (nThisFeatureCount == 0)
2389
0
                {
2390
0
                    goto error_max_mem;
2391
0
                }
2392
0
                if (nThisFeatureCount < nFeatureCount)
2393
0
                    nFeatureCount = nThisFeatureCount;
2394
0
                break;
2395
0
            }
2396
2397
0
            case OFTDate:
2398
0
            {
2399
0
                if (!FillDateArray(psChild, oFeatureQueue, nFeatureCount,
2400
0
                                   bIsNullable, i))
2401
0
                    goto error;
2402
0
                break;
2403
0
            }
2404
2405
0
            case OFTTime:
2406
0
            {
2407
0
                if (!FillTimeArray(psChild, oFeatureQueue, nFeatureCount,
2408
0
                                   bIsNullable, i))
2409
0
                    goto error;
2410
0
                break;
2411
0
            }
2412
2413
0
            case OFTDateTime:
2414
0
            {
2415
0
                if (bDateTimeAsString)
2416
0
                {
2417
0
                    const size_t nThisFeatureCount = FillDateTimeArrayAsString(
2418
0
                        psChild, oFeatureQueue, nFeatureCount, bIsNullable, i,
2419
0
                        nMemLimit);
2420
0
                    if (nThisFeatureCount == 0)
2421
0
                    {
2422
0
                        goto error_max_mem;
2423
0
                    }
2424
0
                    if (nThisFeatureCount < nFeatureCount)
2425
0
                        nFeatureCount = nThisFeatureCount;
2426
0
                }
2427
0
                else
2428
0
                {
2429
0
                    const char *pszTZOverride =
2430
0
                        m_aosArrowArrayStreamOptions.FetchNameValue("TIMEZONE");
2431
0
                    const int nTZFlag = poFieldDefn->GetTZFlag();
2432
0
                    if ((pszTZOverride && EQUAL(pszTZOverride, "mixed")) ||
2433
0
                        (!pszTZOverride && nTZFlag == OGR_TZFLAG_MIXED_TZ))
2434
2435
0
                    {
2436
0
                        anArrayIndicesOfStructDateTime.insert(iSchemaChild);
2437
0
                        if (!FillDateTimeArrayWithTimeZone(
2438
0
                                psChild, oFeatureQueue, nFeatureCount,
2439
0
                                bIsNullable, i, nTZFlag))
2440
0
                        {
2441
0
                            goto error;
2442
0
                        }
2443
0
                    }
2444
0
                    else if (!FillDateTimeArray(psChild, oFeatureQueue,
2445
0
                                                nFeatureCount, bIsNullable, i,
2446
0
                                                nTZFlag))
2447
0
                    {
2448
0
                        goto error;
2449
0
                    }
2450
0
                }
2451
0
                break;
2452
0
            }
2453
0
        }
2454
2455
0
        ++iSchemaChild;
2456
0
    }
2457
0
    for (int i = 0; i < nGeomFieldCount; ++i)
2458
0
    {
2459
0
        const auto poFieldDefn = poLayerDefn->GetGeomFieldDefn(i);
2460
0
        if (poFieldDefn->IsIgnored())
2461
0
        {
2462
0
            continue;
2463
0
        }
2464
2465
0
        out_array->children[iSchemaChild] = static_cast<struct ArrowArray *>(
2466
0
            CPLCalloc(1, sizeof(struct ArrowArray)));
2467
0
        auto psChild = out_array->children[iSchemaChild];
2468
0
        ++iSchemaChild;
2469
0
        psChild->release = OGRLayerDefaultReleaseArray;
2470
0
        psChild->length = oFeatureQueue.size();
2471
0
        const size_t nThisFeatureCount = FillWKBGeometryArray<int32_t>(
2472
0
            psChild, oFeatureQueue, nFeatureCount, poFieldDefn, i, nMemLimit);
2473
0
        if (nThisFeatureCount == 0)
2474
0
        {
2475
0
            goto error_max_mem;
2476
0
        }
2477
0
        if (nThisFeatureCount < nFeatureCount)
2478
0
            nFeatureCount = nThisFeatureCount;
2479
0
    }
2480
2481
    // Remove consumed features from the queue
2482
0
    if (nFeatureCount == oFeatureQueue.size())
2483
0
        oFeatureQueue.clear();
2484
0
    else
2485
0
    {
2486
0
        for (size_t i = 0; i < nFeatureCount; ++i)
2487
0
        {
2488
0
            oFeatureQueue.pop_front();
2489
0
        }
2490
0
    }
2491
2492
0
    out_array->n_children = iSchemaChild;
2493
0
    out_array->length = nFeatureCount;
2494
0
    for (int i = 0; i < out_array->n_children; ++i)
2495
0
    {
2496
0
        out_array->children[i]->length = nFeatureCount;
2497
0
        if (cpl::contains(anArrayIndicesOfStructDateTime, i))
2498
0
        {
2499
0
            for (int j = 0; j < out_array->children[i]->n_children; ++j)
2500
0
            {
2501
0
                out_array->children[i]->children[j]->length = nFeatureCount;
2502
0
            }
2503
0
        }
2504
0
    }
2505
2506
0
    return 0;
2507
2508
0
error_max_mem:
2509
0
    CPLError(CE_Failure, CPLE_AppDefined,
2510
0
             "Too large feature: not even a single feature can be returned");
2511
0
error:
2512
0
    oFeatureQueue.clear();
2513
0
    poPrivate->poShared->m_bEOF = true;
2514
0
    out_array->release(out_array);
2515
0
    memset(out_array, 0, sizeof(*out_array));
2516
0
    return ENOMEM;
2517
0
}
2518
2519
/************************************************************************/
2520
/*                      StaticGetNextArrowArray()                       */
2521
/************************************************************************/
2522
2523
/** Default implementation of the ArrowArrayStream::get_next() callback.
2524
 *
2525
 * To be used by driver implementations that have a custom GetArrowStream()
2526
 * implementation.
2527
 *
2528
 * @since GDAL 3.6
2529
 */
2530
int OGRLayer::StaticGetNextArrowArray(struct ArrowArrayStream *stream,
2531
                                      struct ArrowArray *out_array)
2532
0
{
2533
0
    auto poLayer = static_cast<ArrowArrayStreamPrivateDataSharedDataWrapper *>(
2534
0
                       stream->private_data)
2535
0
                       ->poShared->m_poLayer;
2536
0
    if (poLayer == nullptr)
2537
0
    {
2538
0
        CPLError(CE_Failure, CPLE_NotSupported,
2539
0
                 "Calling get_next() on a freed OGRLayer is not supported");
2540
0
        return EINVAL;
2541
0
    }
2542
0
    return poLayer->GetNextArrowArray(stream, out_array);
2543
0
}
2544
2545
/************************************************************************/
2546
/*                           ReleaseStream()                            */
2547
/************************************************************************/
2548
2549
/** Release a ArrowArrayStream.
2550
 *
2551
 * To be used by driver implementations that have a custom GetArrowStream()
2552
 * implementation.
2553
 *
2554
 * @param stream Arrow array stream to release.
2555
 * @since GDAL 3.6
2556
 */
2557
void OGRLayer::ReleaseStream(struct ArrowArrayStream *stream)
2558
0
{
2559
0
    assert(stream->release == OGRLayer::ReleaseStream);
2560
0
    ArrowArrayStreamPrivateDataSharedDataWrapper *poPrivate =
2561
0
        static_cast<ArrowArrayStreamPrivateDataSharedDataWrapper *>(
2562
0
            stream->private_data);
2563
0
    poPrivate->poShared->m_bArrowArrayStreamInProgress = false;
2564
0
    poPrivate->poShared->m_bEOF = false;
2565
0
    if (poPrivate->poShared->m_poLayer)
2566
0
        poPrivate->poShared->m_poLayer->ResetReading();
2567
0
    delete poPrivate;
2568
0
    stream->private_data = nullptr;
2569
0
    stream->release = nullptr;
2570
0
}
2571
2572
/************************************************************************/
2573
/*                    GetLastErrorArrowArrayStream()                    */
2574
/************************************************************************/
2575
2576
/** Default implementation of the ArrowArrayStream::get_last_error() callback.
2577
 *
2578
 * To be used by driver implementations that have a custom GetArrowStream()
2579
 * implementation.
2580
 *
2581
 * @since GDAL 3.6
2582
 */
2583
const char *OGRLayer::GetLastErrorArrowArrayStream(struct ArrowArrayStream *)
2584
0
{
2585
0
    const char *pszLastErrorMsg = CPLGetLastErrorMsg();
2586
0
    return pszLastErrorMsg[0] != '\0' ? pszLastErrorMsg : nullptr;
2587
0
}
2588
2589
/************************************************************************/
2590
/*                           GetArrowStream()                           */
2591
/************************************************************************/
2592
2593
/** Get a Arrow C stream.
2594
 *
2595
 * On successful return, and when the stream interfaces is no longer needed, it
2596
 * must must be freed with out_stream->release(out_stream). Please carefully
2597
 * read https://arrow.apache.org/docs/format/CStreamInterface.html for more
2598
 * details on using Arrow C stream.
2599
 *
2600
 * The method may take into account ignored fields set with SetIgnoredFields()
2601
 * (the default implementation does), and should take into account filters set
2602
 * with SetSpatialFilter() and SetAttributeFilter(). Note however that
2603
 * specialized implementations may fallback to the default (slower)
2604
 * implementation when filters are set.
2605
 * Drivers that have a specialized implementation should advertise the
2606
 * OLCFastGetArrowStream capability.
2607
 *
2608
 * There are extra precautions to take into account in a OGR context. Unless
2609
 * otherwise specified by a particular driver implementation, the get_schema(),
2610
 * get_next() and get_last_error() function pointers of the ArrowArrayStream
2611
 * structure should no longer be used after the OGRLayer, from which the
2612
 * ArrowArrayStream structure was initialized, has been destroyed (typically at
2613
 * dataset closing). The reason is that those function pointers will typically
2614
 * point to methods of the OGRLayer instance.
2615
 * However, the ArrowSchema and ArrowArray structures filled from those
2616
 * callbacks can be used and must be released independently from the
2617
 * ArrowArrayStream or the layer.
2618
 *
2619
 * Furthermore, unless otherwise specified by a particular driver
2620
 * implementation, only one ArrowArrayStream can be active at a time on
2621
 * a given layer (that is the last active one must be explicitly released before
2622
 * a next one is asked). Changing filter state, ignored columns, modifying the
2623
 * schema or using ResetReading()/GetNextFeature() while using a
2624
 * ArrowArrayStream is strongly discouraged and may lead to unexpected results.
2625
 * As a rule of thumb, no OGRLayer methods that affect the state of a layer
2626
 * should be called on a layer, while an ArrowArrayStream on it is active.
2627
 *
2628
 * Starting with GDAL 3.8, the ArrowSchema::metadata field filled by the
2629
 * get_schema() callback may be set with the potential following items:
2630
 * <ul>
2631
 * <li>"GDAL:OGR:type": value of OGRFieldDefn::GetType(): (added in 3.11)
2632
 *      Only used for DateTime fields when the DATETIME_AS_STRING=YES option is
2633
 *      specified.</li>
2634
 * <li>"GDAL:OGR:alternative_name": value of
2635
 *     OGRFieldDefn::GetAlternativeNameRef()</li>
2636
 * <li>"GDAL:OGR:comment": value of OGRFieldDefn::GetComment()</li>
2637
 * <li>"GDAL:OGR:default": value of OGRFieldDefn::GetDefault()</li>
2638
 * <li>"GDAL:OGR:subtype": value of OGRFieldDefn::GetSubType()</li>
2639
 * <li>"GDAL:OGR:width": value of OGRFieldDefn::GetWidth() (serialized as a
2640
 *     string)</li>
2641
 * <li>"GDAL:OGR:unique": value of OGRFieldDefn::IsUnique() (serialized as
2642
 *     "true" or "false")</li>
2643
 * <li>"GDAL:OGR:domain_name": value of OGRFieldDefn::GetDomainName()</li>
2644
 * </ul>
2645
 *
2646
 * A potential usage can be:
2647
\code{.cpp}
2648
    struct ArrowArrayStream stream;
2649
    if( !poLayer->GetArrowStream(&stream, nullptr))
2650
    {
2651
        CPLError(CE_Failure, CPLE_AppDefined, "GetArrowStream() failed\n");
2652
        exit(1);
2653
    }
2654
    struct ArrowSchema schema;
2655
    if( stream.get_schema(&stream, &schema) == 0 )
2656
    {
2657
        // Do something useful
2658
        schema.release(schema);
2659
    }
2660
    while( true )
2661
    {
2662
        struct ArrowArray array;
2663
        // Look for an error (get_next() returning a non-zero code), or
2664
        // end of iteration (array.release == nullptr)
2665
        if( stream.get_next(&stream, &array) != 0 ||
2666
            array.release == nullptr )
2667
        {
2668
            break;
2669
        }
2670
        // Do something useful
2671
        array.release(&array);
2672
    }
2673
    stream.release(&stream);
2674
\endcode
2675
 *
2676
 * A full example is available in the
2677
 * <a
2678
href="https://gdal.org/tutorials/vector_api_tut.html#reading-from-ogr-using-the-arrow-c-stream-data-interface">Reading
2679
From OGR using the Arrow C Stream data interface</a> tutorial.
2680
 *
2681
 * Options may be driver specific. The default implementation recognizes the
2682
 * following options:
2683
 * <ul>
2684
 * <li>INCLUDE_FID=YES/NO. Whether to include the FID column. Defaults to YES.
2685
 * </li>
2686
 * <li>MAX_FEATURES_IN_BATCH=integer. Maximum number of features to retrieve in
2687
 *     a ArrowArray batch. Defaults to 65 536.</li>
2688
 * <li>TIMEZONE="unknown", "mixed", "UTC", "(+|:)HH:MM" or any other value supported by
2689
 *     Arrow. (GDAL >= 3.8)
2690
 *     Override the timezone flag nominally provided by
2691
 *     OGRFieldDefn::GetTZFlag(), and used for the Arrow field timezone
2692
 *     declaration, with a user specified timezone.
2693
 *     Note that datetime values in Arrow arrays are always stored in UTC, and
2694
 *     that the time zone flag used by GDAL to convert to UTC is the one of the
2695
 *     OGRField::Date::TZFlag member at the OGRFeature level. The conversion
2696
 *     to UTC of a OGRField::Date is only done if both the timezone indicated by
2697
 *     OGRField::Date::TZFlag and the one at the OGRFieldDefn level (or set by
2698
 *     this TIMEZONE option) are not unknown.
2699
 *     Since GDAL 3.13, "mixed" can be used to create an Arrow structure field,
2700
 *     following the "timestamp with offset" extension (https://github.com/apache/arrow/blob/main/docs/source/format/CanonicalExtensions.rst#timestamp-with-offset)
2701
 *     and storing both a UTC timestamp and the offset in minutes from the UTC
2702
 *     timezone.
2703
 * </li>
2704
 * <li>DATETIME_AS_STRING=YES/NO. Defaults to NO. Added in GDAL 3.11.
2705
 *     Whether DateTime fields should be returned as a (normally ISO-8601
2706
 *     formatted) string by drivers. The aim is to be able to handle mixed
2707
 *     timezones (or timezone naive values) in the same column.
2708
 *     All drivers must honour that option, and potentially fallback to the
2709
 *     OGRLayer generic implementation if they cannot (which is the case for the
2710
 *     Arrow, Parquet and ADBC drivers).
2711
 *     When DATETIME_AS_STRING=YES, the TIMEZONE option is ignored.
2712
 * </li>
2713
 * <li>GEOMETRY_METADATA_ENCODING=OGC/GEOARROW (GDAL >= 3.8).
2714
 *     The default is OGC, which will lead to setting
2715
 *     the Arrow geometry column metadata to ARROW:extension:name=ogc.wkb.
2716
 *     If setting to GEOMETRY_METADATA_ENCODING to GEOARROW,
2717
 *     ARROW:extension:name=geoarrow.wkb and
2718
 *     ARROW:extension:metadata={"crs": &lt;projjson CRS representation>&gt; are set.
2719
 * </li>
2720
 * </ul>
2721
 *
2722
 * The Arrow/Parquet drivers recognize the following option:
2723
 * <ul>
2724
 * <li>GEOMETRY_ENCODING=WKB. To force a fallback to the generic implementation
2725
 *     when the native geometry encoding is not WKB. Otherwise the geometry
2726
 *     will be returned with its native Arrow encoding
2727
 *     (possibly using GeoArrow encoding).</li>
2728
 * </ul>
2729
 *
2730
 * @param out_stream Output stream. Must *not* be NULL. The content of the
2731
 *                  structure does not need to be initialized.
2732
 * @param papszOptions NULL terminated list of key=value options.
2733
 * @return true in case of success.
2734
 * @since GDAL 3.6
2735
 */
2736
bool OGRLayer::GetArrowStream(struct ArrowArrayStream *out_stream,
2737
                              CSLConstList papszOptions)
2738
0
{
2739
0
    memset(out_stream, 0, sizeof(*out_stream));
2740
0
    if (m_poSharedArrowArrayStreamPrivateData &&
2741
0
        m_poSharedArrowArrayStreamPrivateData->m_bArrowArrayStreamInProgress)
2742
0
    {
2743
0
        CPLError(CE_Failure, CPLE_AppDefined,
2744
0
                 "An arrow Arrow Stream is in progress on that layer. Only "
2745
0
                 "one at a time is allowed in this implementation.");
2746
0
        return false;
2747
0
    }
2748
0
    m_aosArrowArrayStreamOptions.Assign(CSLDuplicate(papszOptions), true);
2749
2750
0
    out_stream->get_schema = OGRLayer::StaticGetArrowSchema;
2751
0
    out_stream->get_next = OGRLayer::StaticGetNextArrowArray;
2752
0
    out_stream->get_last_error = OGRLayer::GetLastErrorArrowArrayStream;
2753
0
    out_stream->release = OGRLayer::ReleaseStream;
2754
2755
0
    if (m_poSharedArrowArrayStreamPrivateData == nullptr)
2756
0
    {
2757
0
        m_poSharedArrowArrayStreamPrivateData =
2758
0
            std::make_shared<ArrowArrayStreamPrivateData>();
2759
0
        m_poSharedArrowArrayStreamPrivateData->m_poLayer = this;
2760
0
    }
2761
0
    m_poSharedArrowArrayStreamPrivateData->m_bArrowArrayStreamInProgress = true;
2762
2763
    // Special case for "FID = constant", or "FID IN (constant1, ...., constantN)"
2764
0
    m_poSharedArrowArrayStreamPrivateData->m_anQueriedFIDs.clear();
2765
0
    m_poSharedArrowArrayStreamPrivateData->m_iQueriedFIDS = 0;
2766
0
    if (m_poAttrQuery)
2767
0
    {
2768
0
        swq_expr_node *poNode =
2769
0
            static_cast<swq_expr_node *>(m_poAttrQuery->GetSWQExpr());
2770
0
        if (poNode->eNodeType == SNT_OPERATION &&
2771
0
            (poNode->nOperation == SWQ_IN || poNode->nOperation == SWQ_EQ) &&
2772
0
            poNode->papoSubExpr[0]->eNodeType == SNT_COLUMN &&
2773
0
            poNode->papoSubExpr[0]->field_index ==
2774
0
                GetLayerDefn()->GetFieldCount() + SPF_FID &&
2775
0
            TestCapability(OLCRandomRead))
2776
0
        {
2777
0
            std::set<GIntBig> oSetAlreadyListed;
2778
0
            for (int i = 1; i < poNode->nSubExprCount; ++i)
2779
0
            {
2780
0
                if (poNode->papoSubExpr[i]->eNodeType == SNT_CONSTANT &&
2781
0
                    poNode->papoSubExpr[i]->field_type == SWQ_INTEGER64 &&
2782
0
                    oSetAlreadyListed.find(poNode->papoSubExpr[i]->int_value) ==
2783
0
                        oSetAlreadyListed.end())
2784
0
                {
2785
0
                    oSetAlreadyListed.insert(poNode->papoSubExpr[i]->int_value);
2786
0
                    m_poSharedArrowArrayStreamPrivateData->m_anQueriedFIDs
2787
0
                        .push_back(poNode->papoSubExpr[i]->int_value);
2788
0
                }
2789
0
            }
2790
0
        }
2791
0
    }
2792
2793
0
    auto poPrivateData = new ArrowArrayStreamPrivateDataSharedDataWrapper();
2794
0
    poPrivateData->poShared = m_poSharedArrowArrayStreamPrivateData;
2795
0
    out_stream->private_data = poPrivateData;
2796
0
    return true;
2797
0
}
2798
2799
/************************************************************************/
2800
/*                        OGR_L_GetArrowStream()                        */
2801
/************************************************************************/
2802
2803
/** Get a Arrow C stream.
2804
 *
2805
 * On successful return, and when the stream interfaces is no longer needed, it
2806
 * must be freed with out_stream->release(out_stream). Please carefully read
2807
 * https://arrow.apache.org/docs/format/CStreamInterface.html for more details
2808
 * on using Arrow C stream.
2809
 *
2810
 * The method may take into account ignored fields set with SetIgnoredFields()
2811
 * (the default implementation does), and should take into account filters set
2812
 * with SetSpatialFilter() and SetAttributeFilter(). Note however that
2813
 * specialized implementations may fallback to the default (slower)
2814
 * implementation when filters are set.
2815
 * Drivers that have a specialized implementation should
2816
 * advertise the OLCFastGetArrowStream capability.
2817
 *
2818
 * There are extra precautions to take into account in a OGR context. Unless
2819
 * otherwise specified by a particular driver implementation, the get_schema(),
2820
 * get_next() and get_last_error() function pointers of the ArrowArrayStream
2821
 * structure should no longer be used after the OGRLayer, from which the
2822
 * ArrowArrayStream structure was initialized, has been destroyed (typically at
2823
 * dataset closing). The reason is that those function pointers will typically
2824
 * point to methods of the OGRLayer instance.
2825
 * However, the ArrowSchema and ArrowArray structures filled from those
2826
 * callbacks can be used and must be released independently from the
2827
 * ArrowArrayStream or the layer.
2828
 *
2829
 * Furthermore, unless otherwise specified by a particular driver
2830
 * implementation, only one ArrowArrayStream can be active at a time on
2831
 * a given layer (that is the last active one must be explicitly released before
2832
 * a next one is asked). Changing filter state, ignored columns, modifying the
2833
 * schema or using ResetReading()/GetNextFeature() while using a
2834
 * ArrowArrayStream is strongly discouraged and may lead to unexpected results.
2835
 * As a rule of thumb, no OGRLayer methods that affect the state of a layer
2836
 * should be called on a layer, while an ArrowArrayStream on it is active.
2837
 *
2838
 * Starting with GDAL 3.8, the ArrowSchema::metadata field filled by the
2839
 * get_schema() callback may be set with the potential following items:
2840
 * <ul>
2841
 * <li>"GDAL:OGR:type": value of OGRFieldDefn::GetType(): (added in 3.11)
2842
 *      Only used for DateTime fields when the DATETIME_AS_STRING=YES option is
2843
 *      specified.</li>
2844
 * <li>"GDAL:OGR:alternative_name": value of
2845
 *     OGRFieldDefn::GetAlternativeNameRef()</li>
2846
 * <li>"GDAL:OGR:comment": value of OGRFieldDefn::GetComment()</li>
2847
 * <li>"GDAL:OGR:default": value of OGRFieldDefn::GetDefault()</li>
2848
 * <li>"GDAL:OGR:subtype": value of OGRFieldDefn::GetSubType()</li>
2849
 * <li>"GDAL:OGR:width": value of OGRFieldDefn::GetWidth() (serialized as a
2850
 *     string)</li>
2851
 * <li>"GDAL:OGR:unique": value of OGRFieldDefn::IsUnique() (serialized as
2852
 *     "true" or "false")</li>
2853
 * <li>"GDAL:OGR:domain_name": value of OGRFieldDefn::GetDomainName()</li>
2854
 * </ul>
2855
 *
2856
 * A potential usage can be:
2857
\code{.cpp}
2858
    struct ArrowArrayStream stream;
2859
    if( !OGR_L_GetArrowStream(hLayer, &stream, nullptr))
2860
    {
2861
        CPLError(CE_Failure, CPLE_AppDefined,
2862
                 "OGR_L_GetArrowStream() failed\n");
2863
        exit(1);
2864
    }
2865
    struct ArrowSchema schema;
2866
    if( stream.get_schema(&stream, &schema) == 0 )
2867
    {
2868
        // Do something useful
2869
        schema.release(schema);
2870
    }
2871
    while( true )
2872
    {
2873
        struct ArrowArray array;
2874
        // Look for an error (get_next() returning a non-zero code), or
2875
        // end of iteration (array.release == nullptr)
2876
        if( stream.get_next(&stream, &array) != 0 ||
2877
            array.release == nullptr )
2878
        {
2879
            break;
2880
        }
2881
        // Do something useful
2882
        array.release(&array);
2883
    }
2884
    stream.release(&stream);
2885
\endcode
2886
 *
2887
 * A full example is available in the
2888
 * <a
2889
href="https://gdal.org/tutorials/vector_api_tut.html#reading-from-ogr-using-the-arrow-c-stream-data-interface">Reading
2890
From OGR using the Arrow C Stream data interface</a> tutorial.
2891
 *
2892
 * Options may be driver specific. The default implementation recognizes the
2893
 * following options:
2894
 * <ul>
2895
 * <li>INCLUDE_FID=YES/NO. Whether to include the FID column. Defaults to
2896
YES.</li>
2897
 * <li>MAX_FEATURES_IN_BATCH=integer. Maximum number of features to retrieve in
2898
 *     a ArrowArray batch. Defaults to 65 536.</li>
2899
 * <li>TIMEZONE="unknown", "mixed", "UTC", "(+|:)HH:MM" or any other value supported by
2900
 *     Arrow. (GDAL >= 3.8)
2901
 *     Override the timezone flag nominally provided by
2902
 *     OGRFieldDefn::GetTZFlag(), and used for the Arrow field timezone
2903
 *     declaration, with a user specified timezone.
2904
 *     Note that datetime values in Arrow arrays are always stored in UTC, and
2905
 *     that the time zone flag used by GDAL to convert to UTC is the one of the
2906
 *     OGRField::Date::TZFlag member at the OGRFeature level. The conversion
2907
 *     to UTC of a OGRField::Date is only done if both the timezone indicated by
2908
 *     OGRField::Date::TZFlag and the one at the OGRFieldDefn level (or set by
2909
 *     this TIMEZONE option) are not unknown.
2910
 *     Since GDAL 3.13, "mixed" can be used to create an Arrow structure field,
2911
 *     following the "timestamp with offset" extension (https://github.com/apache/arrow/blob/main/docs/source/format/CanonicalExtensions.rst#timestamp-with-offset)
2912
 *     and storing both a UTC timestamp and the offset in minutes from the UTC
2913
 *     timezone.
2914
 * </li>
2915
 * <li>DATETIME_AS_STRING=YES/NO. Defaults to NO. Added in GDAL 3.11.
2916
 *     Whether DateTime fields should be returned as a (normally ISO-8601
2917
 *     formatted) string by drivers. The aim is to be able to handle mixed
2918
 *     timezones (or timezone naive values) in the same column.
2919
 *     All drivers must honour that option, and potentially fallback to the
2920
 *     OGRLayer generic implementation if they cannot (which is the case for the
2921
 *     Arrow, Parquet and ADBC drivers).
2922
 *     When DATETIME_AS_STRING=YES, the TIMEZONE option is ignored.
2923
 * </li>
2924
 * <li>GEOMETRY_METADATA_ENCODING=OGC/GEOARROW (GDAL >= 3.8).
2925
 *     The default is OGC, which will lead to setting
2926
 *     the Arrow geometry column metadata to ARROW:extension:name=ogc.wkb.
2927
 *     If setting to GEOMETRY_METADATA_ENCODING to GEOARROW,
2928
 *     ARROW:extension:name=geoarrow.wkb and
2929
 *     ARROW:extension:metadata={"crs": &lt;projjson CRS representation>&gt; are set.
2930
 * </li>
2931
 * </ul>
2932
 *
2933
 * The Arrow/Parquet drivers recognize the following option:
2934
 * <ul>
2935
 * <li>GEOMETRY_ENCODING=WKB. To force a fallback to the generic implementation
2936
 *     when the native geometry encoding is not WKB. Otherwise the geometry
2937
 *     will be returned with its native Arrow encoding
2938
 *     (possibly using GeoArrow encoding).</li>
2939
 * </ul>
2940
 *
2941
 * @param hLayer Layer
2942
 * @param out_stream Output stream. Must *not* be NULL. The content of the
2943
 *                  structure does not need to be initialized.
2944
 * @param papszOptions NULL terminated list of key=value options.
2945
 * @return true in case of success.
2946
 * @since GDAL 3.6
2947
 */
2948
bool OGR_L_GetArrowStream(OGRLayerH hLayer, struct ArrowArrayStream *out_stream,
2949
                          CSLConstList papszOptions)
2950
0
{
2951
0
    VALIDATE_POINTER1(hLayer, "OGR_L_GetArrowStream", false);
2952
0
    VALIDATE_POINTER1(out_stream, "OGR_L_GetArrowStream", false);
2953
2954
0
    return OGRLayer::FromHandle(hLayer)->GetArrowStream(out_stream,
2955
0
                                                        papszOptions);
2956
0
}
2957
2958
/************************************************************************/
2959
/*                       OGRParseArrowMetadata()                        */
2960
/************************************************************************/
2961
2962
std::map<std::string, std::string>
2963
OGRParseArrowMetadata(const char *pabyMetadata)
2964
0
{
2965
0
    std::map<std::string, std::string> oMetadata;
2966
0
    int32_t nKVP;
2967
0
    memcpy(&nKVP, pabyMetadata, sizeof(int32_t));
2968
0
    pabyMetadata += sizeof(int32_t);
2969
0
    for (int i = 0; i < nKVP; ++i)
2970
0
    {
2971
0
        int32_t nSizeKey;
2972
0
        memcpy(&nSizeKey, pabyMetadata, sizeof(int32_t));
2973
0
        pabyMetadata += sizeof(int32_t);
2974
0
        std::string osKey;
2975
0
        osKey.assign(pabyMetadata, nSizeKey);
2976
0
        pabyMetadata += nSizeKey;
2977
2978
0
        int32_t nSizeValue;
2979
0
        memcpy(&nSizeValue, pabyMetadata, sizeof(int32_t));
2980
0
        pabyMetadata += sizeof(int32_t);
2981
0
        std::string osValue;
2982
0
        osValue.assign(pabyMetadata, nSizeValue);
2983
0
        pabyMetadata += nSizeValue;
2984
2985
0
        oMetadata[osKey] = std::move(osValue);
2986
0
    }
2987
2988
0
    return oMetadata;
2989
0
}
2990
2991
/************************************************************************/
2992
/*                         ParseDecimalFormat()                         */
2993
/************************************************************************/
2994
2995
static bool ParseDecimalFormat(const char *format, int &nPrecision, int &nScale,
2996
                               int &nWidthInBytes)
2997
0
{
2998
    // d:19,10     ==> decimal128 [precision 19, scale 10]
2999
    // d:19,10,NNN ==> decimal bitwidth = NNN [precision 19, scale 10]
3000
0
    nPrecision = 0;
3001
0
    nScale = 0;
3002
0
    nWidthInBytes = 128 / 8;  // 128 bit
3003
0
    const char *pszFirstComma = strchr(format + 2, ',');
3004
0
    if (pszFirstComma)
3005
0
    {
3006
0
        nPrecision = atoi(format + 2);
3007
0
        nScale = atoi(pszFirstComma + 1);
3008
0
        const char *pszSecondComma = strchr(pszFirstComma + 1, ',');
3009
0
        if (pszSecondComma)
3010
0
        {
3011
0
            const int nWidthInBits = atoi(pszSecondComma + 1);
3012
0
            if ((nWidthInBits % 8) != 0)
3013
0
            {
3014
                // shouldn't happen for well-format schemas
3015
0
                nWidthInBytes = 0;
3016
0
                return false;
3017
0
            }
3018
0
            else
3019
0
            {
3020
0
                nWidthInBytes = nWidthInBits / 8;
3021
0
            }
3022
0
        }
3023
0
    }
3024
0
    else
3025
0
    {
3026
        // shouldn't happen for well-format schemas
3027
0
        nWidthInBytes = 0;
3028
0
        return false;
3029
0
    }
3030
0
    return true;
3031
0
}
3032
3033
/************************************************************************/
3034
/*                    GetErrorIfUnsupportedDecimal()                    */
3035
/************************************************************************/
3036
3037
static const char *GetErrorIfUnsupportedDecimal(int nWidthInBytes,
3038
                                                int nPrecision)
3039
0
{
3040
3041
0
    if (nWidthInBytes != 128 / 8 && nWidthInBytes != 256 / 8)
3042
0
    {
3043
0
        return "For decimal field, only width 128 and 256 are supported";
3044
0
    }
3045
3046
    // precision=19 fits on 64 bits
3047
0
    if (nPrecision <= 0 || nPrecision > 19)
3048
0
    {
3049
0
        return "For decimal field, only precision up to 19 is supported";
3050
0
    }
3051
3052
0
    return nullptr;
3053
0
}
3054
3055
/************************************************************************/
3056
/*                  IsArrowTimeStampWithOffsetField()                   */
3057
/************************************************************************/
3058
3059
static bool IsArrowTimeStampWithOffsetField(const struct ArrowSchema *schema)
3060
0
{
3061
0
    bool ret =
3062
0
        IsStructure(schema->format) && schema->n_children == 2 &&
3063
0
        IsTimestamp(schema->children[0]->format) &&
3064
0
        IsInt16(schema->children[1]->format) &&
3065
0
        strcmp(schema->children[0]->name, ATSWO_TIMESTAMP_FIELD_NAME) == 0 &&
3066
0
        strcmp(schema->children[1]->name, ATSWO_OFFSET_MINUTES_FIELD_NAME) == 0;
3067
0
    if (ret)
3068
0
    {
3069
0
        const auto oMetadata = OGRParseArrowMetadata(schema->metadata);
3070
0
        const auto oIter = oMetadata.find(ARROW_EXTENSION_NAME_KEY);
3071
0
        ret = oIter != oMetadata.end() &&
3072
0
              oIter->second == EXTENSION_NAME_ARROW_TIMESTAMP_WITH_OFFSET;
3073
0
    }
3074
0
    return ret;
3075
0
}
3076
3077
/************************************************************************/
3078
/*                          IsHandledSchema()                           */
3079
/************************************************************************/
3080
3081
static bool IsHandledSchema(bool bTopLevel, const struct ArrowSchema *schema,
3082
                            const std::string &osPrefix, bool bHasAttrQuery,
3083
                            const CPLStringList &aosUsedFields)
3084
0
{
3085
0
    const char *format = schema->format;
3086
0
    if (IsStructure(format))
3087
0
    {
3088
0
        if (IsArrowTimeStampWithOffsetField(schema) &&
3089
0
            aosUsedFields.FindString((osPrefix + schema->name).c_str()) >= 0)
3090
0
        {
3091
0
            return false;
3092
0
        }
3093
3094
0
        for (int64_t i = 0; i < schema->n_children; ++i)
3095
0
        {
3096
0
            if (!IsHandledSchema(/* bTopLevel = */ false,
3097
0
                                 schema->children[static_cast<size_t>(i)],
3098
0
                                 bTopLevel ? std::string()
3099
0
                                           : osPrefix + schema->name + ".",
3100
0
                                 bHasAttrQuery, aosUsedFields))
3101
0
            {
3102
0
                return false;
3103
0
            }
3104
0
        }
3105
0
        return true;
3106
0
    }
3107
3108
    // Lists or maps
3109
0
    if (IsList(format) || IsLargeList(format) || IsFixedSizeList(format) ||
3110
0
        IsMap(format))
3111
0
    {
3112
0
        if (!IsHandledSchema(/* bTopLevel = */ false, schema->children[0],
3113
0
                             osPrefix, bHasAttrQuery, aosUsedFields))
3114
0
        {
3115
0
            return false;
3116
0
        }
3117
        // For now, we can't filter on lists or maps
3118
0
        if (aosUsedFields.FindString((osPrefix + schema->name).c_str()) >= 0)
3119
0
        {
3120
0
            CPLDebug("OGR",
3121
0
                     "Field %s has unhandled format '%s' for an "
3122
0
                     "attribute to filter on",
3123
0
                     (osPrefix + schema->name).c_str(), format);
3124
0
            return false;
3125
0
        }
3126
0
        return true;
3127
0
    }
3128
3129
0
    const char *const apszHandledFormats[] = {
3130
0
        "b",    // boolean
3131
0
        "c",    // int8
3132
0
        "C",    // uint8
3133
0
        "s",    // int16
3134
0
        "S",    // uint16
3135
0
        "i",    // int32
3136
0
        "I",    // uint32
3137
0
        "l",    // int64
3138
0
        "L",    // uint64
3139
0
        "e",    // float16
3140
0
        "f",    // float32
3141
0
        "g",    // float64,
3142
0
        "z",    // binary
3143
0
        "Z",    // large binary
3144
0
        "u",    // UTF-8 string
3145
0
        "U",    // large UTF-8 string
3146
0
        "tdD",  // date32[days]
3147
0
        "tdm",  // date64[milliseconds]
3148
0
        "tts",  //time32 [seconds]
3149
0
        "ttm",  //time32 [milliseconds]
3150
0
        "ttu",  //time64 [microseconds]
3151
0
        "ttn",  //time64 [nanoseconds]
3152
0
    };
3153
3154
0
    for (const char *pszHandledFormat : apszHandledFormats)
3155
0
    {
3156
0
        if (strcmp(format, pszHandledFormat) == 0)
3157
0
        {
3158
0
            return true;
3159
0
        }
3160
0
    }
3161
3162
0
    if (IsDecimal(format))
3163
0
    {
3164
0
        if (bHasAttrQuery &&
3165
0
            aosUsedFields.FindString((osPrefix + schema->name).c_str()) >= 0)
3166
0
        {
3167
0
            int nPrecision = 0;
3168
0
            int nScale = 0;
3169
0
            int nWidthInBytes = 0;
3170
0
            if (!ParseDecimalFormat(format, nPrecision, nScale, nWidthInBytes))
3171
0
            {
3172
0
                CPLDebug("OGR", "%s",
3173
0
                         (std::string("Invalid field format ") + format +
3174
0
                          " for field " + osPrefix + schema->name)
3175
0
                             .c_str());
3176
0
                return false;
3177
0
            }
3178
3179
0
            const char *pszError =
3180
0
                GetErrorIfUnsupportedDecimal(nWidthInBytes, nPrecision);
3181
0
            if (pszError)
3182
0
            {
3183
0
                CPLDebug("OGR", "%s", pszError);
3184
0
                return false;
3185
0
            }
3186
0
        }
3187
0
        return true;
3188
0
    }
3189
3190
0
    if (IsFixedWidthBinary(format) || IsTimestamp(format))
3191
0
    {
3192
0
        return true;
3193
0
    }
3194
3195
0
    CPLDebug("OGR", "Field %s has unhandled format '%s'",
3196
0
             (osPrefix + schema->name).c_str(), format);
3197
0
    return false;
3198
0
}
3199
3200
/************************************************************************/
3201
/*                 OGRLayer::CanPostFilterArrowArray()                  */
3202
/************************************************************************/
3203
3204
/** Whether the PostFilterArrowArray() can work on the schema to remove
3205
 * rows that aren't selected by the spatial or attribute filter.
3206
 */
3207
bool OGRLayer::CanPostFilterArrowArray(const struct ArrowSchema *schema) const
3208
0
{
3209
0
    if (!IsHandledSchema(
3210
0
            /* bTopLevel=*/true, schema, std::string(),
3211
0
            m_poAttrQuery != nullptr,
3212
0
            m_poAttrQuery ? CPLStringList(m_poAttrQuery->GetUsedFields())
3213
0
                          : CPLStringList()))
3214
0
    {
3215
0
        return false;
3216
0
    }
3217
3218
0
    if (m_poFilterGeom)
3219
0
    {
3220
0
        bool bFound = false;
3221
0
        const char *pszGeomFieldName =
3222
0
            const_cast<OGRLayer *>(this)
3223
0
                ->GetLayerDefn()
3224
0
                ->GetGeomFieldDefn(m_iGeomFieldFilter)
3225
0
                ->GetNameRef();
3226
0
        for (int64_t i = 0; i < schema->n_children; ++i)
3227
0
        {
3228
0
            const auto fieldSchema = schema->children[i];
3229
0
            if (strcmp(fieldSchema->name, pszGeomFieldName) == 0)
3230
0
            {
3231
0
                if (!IsBinary(fieldSchema->format) &&
3232
0
                    !IsLargeBinary(fieldSchema->format))
3233
0
                {
3234
0
                    CPLDebug("OGR", "Geometry field %s has handled format '%s'",
3235
0
                             fieldSchema->name, fieldSchema->format);
3236
0
                    return false;
3237
0
                }
3238
3239
                // Check if ARROW:extension:name = ogc.wkb
3240
0
                const char *pabyMetadata = fieldSchema->metadata;
3241
0
                if (!pabyMetadata)
3242
0
                {
3243
0
                    CPLDebug(
3244
0
                        "OGR",
3245
0
                        "Geometry field %s lacks metadata in its schema field",
3246
0
                        fieldSchema->name);
3247
0
                    return false;
3248
0
                }
3249
3250
0
                const auto oMetadata = OGRParseArrowMetadata(pabyMetadata);
3251
0
                auto oIter = oMetadata.find(ARROW_EXTENSION_NAME_KEY);
3252
0
                if (oIter == oMetadata.end())
3253
0
                {
3254
0
                    CPLDebug("OGR",
3255
0
                             "Geometry field %s lacks "
3256
0
                             "%s metadata "
3257
0
                             "in its schema field",
3258
0
                             fieldSchema->name, ARROW_EXTENSION_NAME_KEY);
3259
0
                    return false;
3260
0
                }
3261
0
                if (oIter->second != EXTENSION_NAME_OGC_WKB &&
3262
0
                    oIter->second != EXTENSION_NAME_GEOARROW_WKB)
3263
0
                {
3264
0
                    CPLDebug("OGR",
3265
0
                             "Geometry field %s has unexpected "
3266
0
                             "%s = '%s' metadata "
3267
0
                             "in its schema field",
3268
0
                             fieldSchema->name, ARROW_EXTENSION_NAME_KEY,
3269
0
                             oIter->second.c_str());
3270
0
                    return false;
3271
0
                }
3272
3273
0
                bFound = true;
3274
0
                break;
3275
0
            }
3276
0
        }
3277
0
        if (!bFound)
3278
0
        {
3279
0
            CPLDebug("OGR", "Cannot find geometry field %s in schema",
3280
0
                     pszGeomFieldName);
3281
0
            return false;
3282
0
        }
3283
0
    }
3284
3285
0
    return true;
3286
0
}
3287
3288
#if 0
3289
/************************************************************************/
3290
/*                        CheckValidityBuffer()                         */
3291
/************************************************************************/
3292
3293
static void CheckValidityBuffer(const struct ArrowArray *array)
3294
{
3295
    if (array->null_count < 0)
3296
        return;
3297
    const uint8_t *pabyValidity =
3298
        static_cast<const uint8_t *>(const_cast<const void *>(array->buffers[0]));
3299
    if( !pabyValidity )
3300
    {
3301
        CPLAssert(array->null_count == 0);
3302
        return;
3303
    }
3304
    size_t null_count = 0;
3305
    const size_t nOffset = static_cast<size_t>(array->offset);
3306
    for(size_t i = 0; i < static_cast<size_t>(array->length); ++i )
3307
    {
3308
        if (!TestBit(pabyValidity, i + nOffset))
3309
            ++ null_count;
3310
    }
3311
    CPLAssert(static_cast<size_t>(array->null_count) == null_count);
3312
}
3313
#endif
3314
3315
/************************************************************************/
3316
/*                       CompactValidityBuffer()                        */
3317
/************************************************************************/
3318
3319
static void CompactValidityBuffer(
3320
    const struct ArrowSchema *, struct ArrowArray *array, size_t iStart,
3321
    const std::vector<bool> &abyValidityFromFilters, size_t nNewLength)
3322
0
{
3323
    // Invalidate null_count as the same validity buffer may be used when
3324
    // scrolling batches, and this creates confusion if we try to set it
3325
    // to different values among the batches
3326
0
    if (array->null_count <= 0)
3327
0
    {
3328
0
        array->null_count = -1;
3329
0
        return;
3330
0
    }
3331
0
    array->null_count = -1;
3332
3333
0
    CPLAssert(static_cast<size_t>(array->length) >=
3334
0
              iStart + abyValidityFromFilters.size());
3335
0
    uint8_t *pabyValidity =
3336
0
        static_cast<uint8_t *>(const_cast<void *>(array->buffers[0]));
3337
0
    const size_t nLength = abyValidityFromFilters.size();
3338
0
    const size_t nOffset = static_cast<size_t>(array->offset);
3339
0
    size_t j = iStart + nOffset;
3340
0
    for (size_t i = 0; i < nLength && j < nNewLength + nOffset; ++i)
3341
0
    {
3342
0
        if (abyValidityFromFilters[i])
3343
0
        {
3344
0
            if (TestBit(pabyValidity, i + iStart + nOffset))
3345
0
                SetBit(pabyValidity, j);
3346
0
            else
3347
0
                UnsetBit(pabyValidity, j);
3348
0
            ++j;
3349
0
        }
3350
0
    }
3351
0
}
3352
3353
/************************************************************************/
3354
/*                          CompactBoolArray()                          */
3355
/************************************************************************/
3356
3357
static void CompactBoolArray(const struct ArrowSchema *schema,
3358
                             struct ArrowArray *array, size_t iStart,
3359
                             const std::vector<bool> &abyValidityFromFilters,
3360
                             size_t nNewLength)
3361
0
{
3362
0
    CPLAssert(array->n_children == 0);
3363
0
    CPLAssert(array->n_buffers == 2);
3364
0
    CPLAssert(static_cast<size_t>(array->length) >=
3365
0
              iStart + abyValidityFromFilters.size());
3366
3367
0
    const size_t nLength = abyValidityFromFilters.size();
3368
0
    const size_t nOffset = static_cast<size_t>(array->offset);
3369
0
    uint8_t *pabyData =
3370
0
        static_cast<uint8_t *>(const_cast<void *>(array->buffers[1]));
3371
0
    size_t j = iStart + nOffset;
3372
0
    for (size_t i = 0; i < nLength; ++i)
3373
0
    {
3374
0
        if (abyValidityFromFilters[i])
3375
0
        {
3376
0
            if (TestBit(pabyData, i + iStart + nOffset))
3377
0
                SetBit(pabyData, j);
3378
0
            else
3379
0
                UnsetBit(pabyData, j);
3380
3381
0
            ++j;
3382
0
        }
3383
0
    }
3384
3385
0
    if (schema->flags & ARROW_FLAG_NULLABLE)
3386
0
        CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3387
0
                              nNewLength);
3388
3389
0
    array->length = nNewLength;
3390
0
}
3391
3392
/************************************************************************/
3393
/*                       CompactPrimitiveArray()                        */
3394
/************************************************************************/
3395
3396
template <class T>
3397
static void CompactPrimitiveArray(
3398
    const struct ArrowSchema *schema, struct ArrowArray *array, size_t iStart,
3399
    const std::vector<bool> &abyValidityFromFilters, size_t nNewLength)
3400
0
{
3401
0
    CPLAssert(array->n_children == 0);
3402
0
    CPLAssert(array->n_buffers == 2);
3403
0
    CPLAssert(static_cast<size_t>(array->length) >=
3404
0
              iStart + abyValidityFromFilters.size());
3405
3406
0
    const size_t nLength = abyValidityFromFilters.size();
3407
0
    const size_t nOffset = static_cast<size_t>(array->offset);
3408
0
    T *paData =
3409
0
        static_cast<T *>(const_cast<void *>(array->buffers[1])) + nOffset;
3410
0
    size_t j = iStart;
3411
0
    for (size_t i = 0; i < nLength; ++i)
3412
0
    {
3413
0
        if (abyValidityFromFilters[i])
3414
0
        {
3415
0
            paData[j] = paData[i + iStart];
3416
0
            ++j;
3417
0
        }
3418
0
    }
3419
3420
0
    if (schema->flags & ARROW_FLAG_NULLABLE)
3421
0
        CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3422
0
                              nNewLength);
3423
3424
0
    array->length = nNewLength;
3425
0
}
Unexecuted instantiation: ogrlayerarrow.cpp:void CompactPrimitiveArray<unsigned char>(ArrowSchema const*, ArrowArray*, unsigned long, std::__1::vector<bool, std::__1::allocator<bool> > const&, unsigned long)
Unexecuted instantiation: ogrlayerarrow.cpp:void CompactPrimitiveArray<unsigned short>(ArrowSchema const*, ArrowArray*, unsigned long, std::__1::vector<bool, std::__1::allocator<bool> > const&, unsigned long)
Unexecuted instantiation: ogrlayerarrow.cpp:void CompactPrimitiveArray<unsigned int>(ArrowSchema const*, ArrowArray*, unsigned long, std::__1::vector<bool, std::__1::allocator<bool> > const&, unsigned long)
Unexecuted instantiation: ogrlayerarrow.cpp:void CompactPrimitiveArray<unsigned long>(ArrowSchema const*, ArrowArray*, unsigned long, std::__1::vector<bool, std::__1::allocator<bool> > const&, unsigned long)
3426
3427
/************************************************************************/
3428
/*                     CompactStringOrBinaryArray()                     */
3429
/************************************************************************/
3430
3431
template <class OffsetType>
3432
static void CompactStringOrBinaryArray(
3433
    const struct ArrowSchema *schema, struct ArrowArray *array, size_t iStart,
3434
    const std::vector<bool> &abyValidityFromFilters, size_t nNewLength)
3435
0
{
3436
0
    CPLAssert(array->n_children == 0);
3437
0
    CPLAssert(array->n_buffers == 3);
3438
0
    CPLAssert(static_cast<size_t>(array->length) >=
3439
0
              iStart + abyValidityFromFilters.size());
3440
3441
0
    const size_t nLength = abyValidityFromFilters.size();
3442
0
    const size_t nOffset = static_cast<size_t>(array->offset);
3443
0
    OffsetType *panOffsets =
3444
0
        static_cast<OffsetType *>(const_cast<void *>(array->buffers[1])) +
3445
0
        nOffset;
3446
0
    GByte *pabyData =
3447
0
        static_cast<GByte *>(const_cast<void *>(array->buffers[2]));
3448
0
    size_t j = iStart;
3449
0
    OffsetType nCurOffset = panOffsets[iStart];
3450
0
    for (size_t i = 0; i < nLength; ++i)
3451
0
    {
3452
0
        if (abyValidityFromFilters[i])
3453
0
        {
3454
0
            const auto nStartOffset = panOffsets[i + iStart];
3455
0
            const auto nEndOffset = panOffsets[i + iStart + 1];
3456
0
            panOffsets[j] = nCurOffset;
3457
0
            const auto nSize = static_cast<size_t>(nEndOffset - nStartOffset);
3458
0
            if (nSize)
3459
0
            {
3460
0
                if (nCurOffset < nStartOffset)
3461
0
                {
3462
0
                    memmove(pabyData + nCurOffset, pabyData + nStartOffset,
3463
0
                            nSize);
3464
0
                }
3465
0
                nCurOffset += static_cast<OffsetType>(nSize);
3466
0
            }
3467
0
            ++j;
3468
0
        }
3469
0
    }
3470
0
    panOffsets[j] = nCurOffset;
3471
3472
0
    if (schema->flags & ARROW_FLAG_NULLABLE)
3473
0
        CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3474
0
                              nNewLength);
3475
3476
0
    array->length = nNewLength;
3477
0
}
Unexecuted instantiation: ogrlayerarrow.cpp:void CompactStringOrBinaryArray<unsigned int>(ArrowSchema const*, ArrowArray*, unsigned long, std::__1::vector<bool, std::__1::allocator<bool> > const&, unsigned long)
Unexecuted instantiation: ogrlayerarrow.cpp:void CompactStringOrBinaryArray<unsigned long>(ArrowSchema const*, ArrowArray*, unsigned long, std::__1::vector<bool, std::__1::allocator<bool> > const&, unsigned long)
3478
3479
/************************************************************************/
3480
/*                       CompactFixedWidthArray()                       */
3481
/************************************************************************/
3482
3483
static void
3484
CompactFixedWidthArray(const struct ArrowSchema *schema,
3485
                       struct ArrowArray *array, int nWidth, size_t iStart,
3486
                       const std::vector<bool> &abyValidityFromFilters,
3487
                       size_t nNewLength)
3488
0
{
3489
0
    CPLAssert(array->n_children == 0);
3490
0
    CPLAssert(array->n_buffers == 2);
3491
0
    CPLAssert(static_cast<size_t>(array->length) >=
3492
0
              iStart + abyValidityFromFilters.size());
3493
3494
0
    const size_t nLength = abyValidityFromFilters.size();
3495
0
    const size_t nOffset = static_cast<size_t>(array->offset);
3496
0
    GByte *pabyData =
3497
0
        static_cast<GByte *>(const_cast<void *>(array->buffers[1]));
3498
0
    size_t nStartOffset = (iStart + nOffset) * nWidth;
3499
0
    size_t nCurOffset = nStartOffset;
3500
0
    for (size_t i = 0; i < nLength; ++i, nStartOffset += nWidth)
3501
0
    {
3502
0
        if (abyValidityFromFilters[i])
3503
0
        {
3504
0
            if (nCurOffset < nStartOffset)
3505
0
            {
3506
0
                memcpy(pabyData + nCurOffset, pabyData + nStartOffset, nWidth);
3507
0
            }
3508
0
            nCurOffset += nWidth;
3509
0
        }
3510
0
    }
3511
3512
0
    if (schema->flags & ARROW_FLAG_NULLABLE)
3513
0
        CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3514
0
                              nNewLength);
3515
3516
0
    array->length = nNewLength;
3517
0
}
3518
3519
/************************************************************************/
3520
/*                         CompactStructArray()                         */
3521
/************************************************************************/
3522
3523
static bool CompactArray(const struct ArrowSchema *schema,
3524
                         struct ArrowArray *array, size_t iStart,
3525
                         const std::vector<bool> &abyValidityFromFilters,
3526
                         size_t nNewLength);
3527
3528
static bool CompactStructArray(const struct ArrowSchema *schema,
3529
                               struct ArrowArray *array, size_t iStart,
3530
                               const std::vector<bool> &abyValidityFromFilters,
3531
                               size_t nNewLength)
3532
0
{
3533
    // The equality might not be strict in the case of when some sub-arrays
3534
    // are fully void !
3535
0
    CPLAssert(array->n_children <= schema->n_children);
3536
0
    for (int64_t iField = 0; iField < array->n_children; ++iField)
3537
0
    {
3538
0
        const auto psChildSchema = schema->children[iField];
3539
0
        const auto psChildArray = array->children[iField];
3540
        // To please Arrow validation...
3541
0
        const size_t nChildNewLength =
3542
0
            static_cast<size_t>(array->offset) + nNewLength;
3543
0
        if (psChildArray->length > array->length)
3544
0
        {
3545
0
            std::vector<bool> abyChildValidity(abyValidityFromFilters);
3546
0
            abyChildValidity.resize(
3547
0
                abyValidityFromFilters.size() +
3548
0
                    static_cast<size_t>(psChildArray->length - array->length),
3549
0
                false);
3550
0
            if (!CompactArray(psChildSchema, psChildArray, iStart,
3551
0
                              abyChildValidity, nChildNewLength))
3552
0
            {
3553
0
                return false;
3554
0
            }
3555
0
        }
3556
0
        else
3557
0
        {
3558
0
            if (!CompactArray(psChildSchema, psChildArray, iStart,
3559
0
                              abyValidityFromFilters, nChildNewLength))
3560
0
            {
3561
0
                return false;
3562
0
            }
3563
0
        }
3564
0
        CPLAssert(psChildArray->length ==
3565
0
                  static_cast<int64_t>(nChildNewLength));
3566
0
    }
3567
3568
0
    if (schema->flags & ARROW_FLAG_NULLABLE)
3569
0
        CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3570
0
                              nNewLength);
3571
3572
0
    array->length = nNewLength;
3573
3574
0
    return true;
3575
0
}
3576
3577
/************************************************************************/
3578
/*                       InvalidateNullCountRec()                       */
3579
/************************************************************************/
3580
3581
static void InvalidateNullCountRec(const struct ArrowSchema *schema,
3582
                                   struct ArrowArray *array)
3583
0
{
3584
0
    if (schema->flags & ARROW_FLAG_NULLABLE)
3585
0
        array->null_count = -1;
3586
0
    for (int i = 0; i < array->n_children; ++i)
3587
0
        InvalidateNullCountRec(schema->children[i], array->children[i]);
3588
0
}
3589
3590
/************************************************************************/
3591
/*                          CompactListArray()                          */
3592
/************************************************************************/
3593
3594
template <class OffsetType>
3595
static bool CompactListArray(const struct ArrowSchema *schema,
3596
                             struct ArrowArray *array, size_t iStart,
3597
                             const std::vector<bool> &abyValidityFromFilters,
3598
                             size_t nNewLength)
3599
0
{
3600
0
    CPLAssert(static_cast<size_t>(array->length) >=
3601
0
              iStart + abyValidityFromFilters.size());
3602
0
    CPLAssert(array->n_children == 1);
3603
0
    CPLAssert(array->n_buffers == 2);
3604
3605
0
    const auto psChildSchema = schema->children[0];
3606
0
    const auto psChildArray = array->children[0];
3607
3608
0
    const size_t nLength = abyValidityFromFilters.size();
3609
0
    const size_t nOffset = static_cast<size_t>(array->offset);
3610
0
    OffsetType *panOffsets =
3611
0
        static_cast<OffsetType *>(const_cast<void *>(array->buffers[1])) +
3612
0
        nOffset;
3613
3614
0
    if (panOffsets[iStart + nLength] > panOffsets[iStart])
3615
0
    {
3616
0
        std::vector<bool> abyChildValidity(
3617
0
            static_cast<size_t>(panOffsets[iStart + nLength] -
3618
0
                                panOffsets[iStart]),
3619
0
            true);
3620
0
        size_t j = iStart;
3621
0
        OffsetType nCurOffset = panOffsets[iStart];
3622
0
        for (size_t i = 0; i < nLength; ++i)
3623
0
        {
3624
0
            if (abyValidityFromFilters[i])
3625
0
            {
3626
0
                const auto nSize =
3627
0
                    panOffsets[i + iStart + 1] - panOffsets[i + iStart];
3628
0
                panOffsets[j] = nCurOffset;
3629
0
                nCurOffset += nSize;
3630
0
                ++j;
3631
0
            }
3632
0
            else
3633
0
            {
3634
0
                const auto nStartOffset = panOffsets[i + iStart];
3635
0
                const auto nEndOffset = panOffsets[i + iStart + 1];
3636
0
                if (nStartOffset != nEndOffset)
3637
0
                {
3638
0
                    if (nStartOffset >=
3639
0
                        panOffsets[iStart] + abyChildValidity.size())
3640
0
                    {
3641
                        // shouldn't happen in sane arrays...
3642
0
                        CPLError(CE_Failure, CPLE_AppDefined,
3643
0
                                 "nStartOffset >= panOffsets[iStart] + "
3644
0
                                 "abyChildValidity.size()");
3645
0
                        return false;
3646
0
                    }
3647
                    // nEndOffset might be equal to abyChildValidity.size()
3648
0
                    if (nEndOffset >
3649
0
                        panOffsets[iStart] + abyChildValidity.size())
3650
0
                    {
3651
                        // shouldn't happen in sane arrays...
3652
0
                        CPLError(CE_Failure, CPLE_AppDefined,
3653
0
                                 "nEndOffset > panOffsets[iStart] + "
3654
0
                                 "abyChildValidity.size()");
3655
0
                        return false;
3656
0
                    }
3657
0
                    for (auto k = nStartOffset - panOffsets[iStart];
3658
0
                         k < nEndOffset - panOffsets[iStart]; ++k)
3659
0
                        abyChildValidity[static_cast<size_t>(k)] = false;
3660
0
                }
3661
0
            }
3662
0
        }
3663
0
        panOffsets[j] = nCurOffset;
3664
0
        const size_t nChildNewLength = static_cast<size_t>(panOffsets[j]);
3665
        // To please Arrow validation
3666
0
        for (; j < iStart + nLength; ++j)
3667
0
            panOffsets[j] = nCurOffset;
3668
3669
0
        if (!CompactArray(psChildSchema, psChildArray,
3670
0
                          static_cast<size_t>(panOffsets[iStart]),
3671
0
                          abyChildValidity, nChildNewLength))
3672
0
            return false;
3673
3674
0
        CPLAssert(psChildArray->length ==
3675
0
                  static_cast<int64_t>(nChildNewLength));
3676
0
    }
3677
0
    else
3678
0
    {
3679
0
        InvalidateNullCountRec(psChildSchema, psChildArray);
3680
0
    }
3681
3682
0
    if (schema->flags & ARROW_FLAG_NULLABLE)
3683
0
        CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3684
0
                              nNewLength);
3685
3686
0
    array->length = nNewLength;
3687
3688
0
    return true;
3689
0
}
Unexecuted instantiation: ogrlayerarrow.cpp:bool CompactListArray<unsigned int>(ArrowSchema const*, ArrowArray*, unsigned long, std::__1::vector<bool, std::__1::allocator<bool> > const&, unsigned long)
Unexecuted instantiation: ogrlayerarrow.cpp:bool CompactListArray<unsigned long>(ArrowSchema const*, ArrowArray*, unsigned long, std::__1::vector<bool, std::__1::allocator<bool> > const&, unsigned long)
3690
3691
/************************************************************************/
3692
/*                     CompactFixedSizeListArray()                      */
3693
/************************************************************************/
3694
3695
static bool
3696
CompactFixedSizeListArray(const struct ArrowSchema *schema,
3697
                          struct ArrowArray *array, size_t N, size_t iStart,
3698
                          const std::vector<bool> &abyValidityFromFilters,
3699
                          size_t nNewLength)
3700
0
{
3701
0
    CPLAssert(static_cast<size_t>(array->length) >=
3702
0
              iStart + abyValidityFromFilters.size());
3703
0
    CPLAssert(array->n_children == 1);
3704
3705
0
    const auto psChildSchema = schema->children[0];
3706
0
    const auto psChildArray = array->children[0];
3707
3708
0
    const size_t nLength = abyValidityFromFilters.size();
3709
0
    const size_t nOffset = static_cast<size_t>(array->offset);
3710
0
    std::vector<bool> abyChildValidity(N * nLength, true);
3711
0
    size_t nChildNewLength = (iStart + nOffset) * N;
3712
0
    size_t nSrcLength = 0;
3713
0
    for (size_t i = 0; i < nLength; ++i)
3714
0
    {
3715
0
        if (abyValidityFromFilters[i])
3716
0
        {
3717
0
            nChildNewLength += N;
3718
0
            nSrcLength++;
3719
0
        }
3720
0
        else
3721
0
        {
3722
0
            const size_t nStartOffset = i * N;
3723
0
            const size_t nEndOffset = (i + 1) * N;
3724
0
            for (size_t k = nStartOffset; k < nEndOffset; ++k)
3725
0
                abyChildValidity[k] = false;
3726
0
        }
3727
0
    }
3728
0
    CPL_IGNORE_RET_VAL(nSrcLength);
3729
0
    CPLAssert(iStart + nSrcLength == nNewLength);
3730
3731
0
    if (!CompactArray(psChildSchema, psChildArray, (iStart + nOffset) * N,
3732
0
                      abyChildValidity, nChildNewLength))
3733
0
        return false;
3734
3735
0
    if (schema->flags & ARROW_FLAG_NULLABLE)
3736
0
        CompactValidityBuffer(schema, array, iStart, abyValidityFromFilters,
3737
0
                              nNewLength);
3738
3739
0
    array->length = nNewLength;
3740
3741
0
    CPLAssert(psChildArray->length >=
3742
0
              static_cast<int64_t>(N) * (array->length + array->offset));
3743
3744
0
    return true;
3745
0
}
3746
3747
/************************************************************************/
3748
/*                          CompactMapArray()                           */
3749
/************************************************************************/
3750
3751
static bool CompactMapArray(const struct ArrowSchema *schema,
3752
                            struct ArrowArray *array, size_t iStart,
3753
                            const std::vector<bool> &abyValidityFromFilters,
3754
                            size_t nNewLength)
3755
0
{
3756
0
    return CompactListArray<uint32_t>(schema, array, iStart,
3757
0
                                      abyValidityFromFilters, nNewLength);
3758
0
}
3759
3760
/************************************************************************/
3761
/*                            CompactArray()                            */
3762
/************************************************************************/
3763
3764
static bool CompactArray(const struct ArrowSchema *schema,
3765
                         struct ArrowArray *array, size_t iStart,
3766
                         const std::vector<bool> &abyValidityFromFilters,
3767
                         size_t nNewLength)
3768
0
{
3769
0
    const char *format = schema->format;
3770
3771
0
    if (IsStructure(format))
3772
0
    {
3773
0
        if (!CompactStructArray(schema, array, iStart, abyValidityFromFilters,
3774
0
                                nNewLength))
3775
0
            return false;
3776
0
    }
3777
0
    else if (IsList(format))
3778
0
    {
3779
0
        if (!CompactListArray<uint32_t>(schema, array, iStart,
3780
0
                                        abyValidityFromFilters, nNewLength))
3781
0
            return false;
3782
0
    }
3783
0
    else if (IsLargeList(format))
3784
0
    {
3785
0
        if (!CompactListArray<uint64_t>(schema, array, iStart,
3786
0
                                        abyValidityFromFilters, nNewLength))
3787
0
            return false;
3788
0
    }
3789
0
    else if (IsMap(format))
3790
0
    {
3791
0
        if (!CompactMapArray(schema, array, iStart, abyValidityFromFilters,
3792
0
                             nNewLength))
3793
0
            return false;
3794
0
    }
3795
0
    else if (IsFixedSizeList(format))
3796
0
    {
3797
0
        const int N = GetFixedSizeList(format);
3798
0
        if (N <= 0)
3799
0
            return false;
3800
0
        if (!CompactFixedSizeListArray(schema, array, static_cast<size_t>(N),
3801
0
                                       iStart, abyValidityFromFilters,
3802
0
                                       nNewLength))
3803
0
            return false;
3804
0
    }
3805
0
    else if (IsBoolean(format))
3806
0
    {
3807
0
        CompactBoolArray(schema, array, iStart, abyValidityFromFilters,
3808
0
                         nNewLength);
3809
0
    }
3810
0
    else if (IsInt8(format) || IsUInt8(format))
3811
0
    {
3812
0
        CompactPrimitiveArray<uint8_t>(schema, array, iStart,
3813
0
                                       abyValidityFromFilters, nNewLength);
3814
0
    }
3815
0
    else if (IsInt16(format) || IsUInt16(format) || IsFloat16(format))
3816
0
    {
3817
0
        CompactPrimitiveArray<uint16_t>(schema, array, iStart,
3818
0
                                        abyValidityFromFilters, nNewLength);
3819
0
    }
3820
0
    else if (IsInt32(format) || IsUInt32(format) || IsFloat32(format) ||
3821
0
             strcmp(format, "tdD") == 0 || strcmp(format, "tts") == 0 ||
3822
0
             strcmp(format, "ttm") == 0)
3823
0
    {
3824
0
        CompactPrimitiveArray<uint32_t>(schema, array, iStart,
3825
0
                                        abyValidityFromFilters, nNewLength);
3826
0
    }
3827
0
    else if (IsInt64(format) || IsUInt64(format) || IsFloat64(format) ||
3828
0
             strcmp(format, "tdm") == 0 || strcmp(format, "ttu") == 0 ||
3829
0
             strcmp(format, "ttn") == 0 || strncmp(format, "ts", 2) == 0)
3830
0
    {
3831
0
        CompactPrimitiveArray<uint64_t>(schema, array, iStart,
3832
0
                                        abyValidityFromFilters, nNewLength);
3833
0
    }
3834
0
    else if (IsString(format) || IsBinary(format))
3835
0
    {
3836
0
        CompactStringOrBinaryArray<uint32_t>(
3837
0
            schema, array, iStart, abyValidityFromFilters, nNewLength);
3838
0
    }
3839
0
    else if (IsLargeString(format) || IsLargeBinary(format))
3840
0
    {
3841
0
        CompactStringOrBinaryArray<uint64_t>(
3842
0
            schema, array, iStart, abyValidityFromFilters, nNewLength);
3843
0
    }
3844
0
    else if (IsFixedWidthBinary(format))
3845
0
    {
3846
0
        const int nWidth = GetFixedWithBinary(format);
3847
0
        CompactFixedWidthArray(schema, array, nWidth, iStart,
3848
0
                               abyValidityFromFilters, nNewLength);
3849
0
    }
3850
0
    else if (IsDecimal(format))
3851
0
    {
3852
0
        int nPrecision = 0;
3853
0
        int nScale = 0;
3854
0
        int nWidthInBytes = 0;
3855
0
        if (!ParseDecimalFormat(format, nPrecision, nScale, nWidthInBytes))
3856
0
        {
3857
0
            CPLError(CE_Failure, CPLE_AppDefined,
3858
0
                     "Unexpected error in PostFilterArrowArray(): unhandled "
3859
0
                     "field format: %s",
3860
0
                     format);
3861
3862
0
            return false;
3863
0
        }
3864
0
        CompactFixedWidthArray(schema, array, nWidthInBytes, iStart,
3865
0
                               abyValidityFromFilters, nNewLength);
3866
0
    }
3867
0
    else
3868
0
    {
3869
0
        CPLError(CE_Failure, CPLE_AppDefined,
3870
0
                 "Unexpected error in CompactArray(): unhandled "
3871
0
                 "field format: %s",
3872
0
                 format);
3873
0
        return false;
3874
0
    }
3875
3876
0
    return true;
3877
0
}
3878
3879
/************************************************************************/
3880
/*                   FillValidityArrayFromWKBArray()                    */
3881
/************************************************************************/
3882
3883
template <class OffsetType>
3884
static size_t
3885
FillValidityArrayFromWKBArray(struct ArrowArray *array, const OGRLayer *poLayer,
3886
                              std::vector<bool> &abyValidityFromFilters)
3887
0
{
3888
0
    const size_t nLength = static_cast<size_t>(array->length);
3889
0
    const uint8_t *pabyValidity =
3890
0
        array->null_count == 0
3891
0
            ? nullptr
3892
0
            : static_cast<const uint8_t *>(array->buffers[0]);
3893
0
    const size_t nOffset = static_cast<size_t>(array->offset);
3894
0
    const OffsetType *panOffsets =
3895
0
        static_cast<const OffsetType *>(array->buffers[1]) + nOffset;
3896
0
    const GByte *pabyData = static_cast<const GByte *>(array->buffers[2]);
3897
0
    OGREnvelope sEnvelope;
3898
0
    abyValidityFromFilters.resize(nLength);
3899
0
    size_t nCountIntersecting = 0;
3900
0
    for (size_t i = 0; i < nLength; ++i)
3901
0
    {
3902
0
        if (!pabyValidity || TestBit(pabyValidity, i + nOffset))
3903
0
        {
3904
0
            const GByte *pabyWKB = pabyData + panOffsets[i];
3905
0
            const size_t nWKBSize =
3906
0
                static_cast<size_t>(panOffsets[i + 1] - panOffsets[i]);
3907
0
            if (poLayer->FilterWKBGeometry(pabyWKB, nWKBSize,
3908
0
                                           /* bEnvelopeAlreadySet=*/false,
3909
0
                                           sEnvelope))
3910
0
            {
3911
0
                abyValidityFromFilters[i] = true;
3912
0
                nCountIntersecting++;
3913
0
            }
3914
0
        }
3915
0
    }
3916
0
    return nCountIntersecting;
3917
0
}
Unexecuted instantiation: ogrlayerarrow.cpp:unsigned long FillValidityArrayFromWKBArray<unsigned int>(ArrowArray*, OGRLayer const*, std::__1::vector<bool, std::__1::allocator<bool> >&)
Unexecuted instantiation: ogrlayerarrow.cpp:unsigned long FillValidityArrayFromWKBArray<unsigned long>(ArrowArray*, OGRLayer const*, std::__1::vector<bool, std::__1::allocator<bool> >&)
3918
3919
/************************************************************************/
3920
/*                    ArrowTimestampToOGRDateTime()                     */
3921
/************************************************************************/
3922
3923
static void ArrowTimestampToOGRDateTime(int64_t nTimestamp,
3924
                                        int nInvFactorToSecond,
3925
                                        const char *pszTZ, OGRFeature &oFeature,
3926
                                        int iField)
3927
0
{
3928
0
    double floatingPart = 0;
3929
0
    if (nInvFactorToSecond)
3930
0
    {
3931
0
        floatingPart =
3932
0
            (nTimestamp % nInvFactorToSecond) / double(nInvFactorToSecond);
3933
0
        nTimestamp /= nInvFactorToSecond;
3934
0
    }
3935
0
    int nTZFlag = 0;
3936
0
    const size_t nTZLen = strlen(pszTZ);
3937
0
    if ((nTZLen == 3 && strcmp(pszTZ, "UTC") == 0) ||
3938
0
        (nTZLen == 7 && strcmp(pszTZ, "Etc/UTC") == 0))
3939
0
    {
3940
0
        nTZFlag = 100;
3941
0
    }
3942
0
    else if (nTZLen == 6 && (pszTZ[0] == '+' || pszTZ[0] == '-') &&
3943
0
             pszTZ[3] == ':')
3944
0
    {
3945
0
        int nTZHour = atoi(pszTZ + 1);
3946
0
        int nTZMin = atoi(pszTZ + 4);
3947
0
        if (nTZHour >= 0 && nTZHour <= 14 && nTZMin >= 0 && nTZMin < 60 &&
3948
0
            (nTZMin % 15) == 0)
3949
0
        {
3950
0
            nTZFlag = (nTZHour * 4) + (nTZMin / 15);
3951
0
            if (pszTZ[0] == '+')
3952
0
            {
3953
0
                nTZFlag = 100 + nTZFlag;
3954
0
                nTimestamp += nTZHour * 3600 + nTZMin * 60;
3955
0
            }
3956
0
            else
3957
0
            {
3958
0
                nTZFlag = 100 - nTZFlag;
3959
0
                nTimestamp -= nTZHour * 3600 + nTZMin * 60;
3960
0
            }
3961
0
        }
3962
0
    }
3963
0
    struct tm dt;
3964
0
    CPLUnixTimeToYMDHMS(nTimestamp, &dt);
3965
0
    oFeature.SetField(iField, dt.tm_year + 1900, dt.tm_mon + 1, dt.tm_mday,
3966
0
                      dt.tm_hour, dt.tm_min,
3967
0
                      static_cast<float>(dt.tm_sec + floatingPart), nTZFlag);
3968
0
}
3969
3970
/************************************************************************/
3971
/*                    BuildMapFieldNameToArrowPath()                    */
3972
/************************************************************************/
3973
3974
static void
3975
BuildMapFieldNameToArrowPath(const struct ArrowSchema *schema,
3976
                             std::map<std::string, std::vector<int>> &oMap,
3977
                             const std::string &osPrefix,
3978
                             std::vector<int> &anArrowPath)
3979
0
{
3980
0
    for (int64_t i = 0; i < schema->n_children; ++i)
3981
0
    {
3982
0
        auto psChild = schema->children[i];
3983
0
        anArrowPath.push_back(static_cast<int>(i));
3984
0
        if (IsStructure(psChild->format))
3985
0
        {
3986
0
            std::string osNewPrefix(osPrefix);
3987
0
            osNewPrefix += psChild->name;
3988
0
            osNewPrefix += ".";
3989
0
            BuildMapFieldNameToArrowPath(psChild, oMap, osNewPrefix,
3990
0
                                         anArrowPath);
3991
0
        }
3992
0
        else
3993
0
        {
3994
0
            oMap[osPrefix + psChild->name] = anArrowPath;
3995
0
        }
3996
0
        anArrowPath.pop_back();
3997
0
    }
3998
0
}
3999
4000
/************************************************************************/
4001
/*                           FillFieldList()                            */
4002
/************************************************************************/
4003
4004
template <typename ListOffsetType, typename ArrowType,
4005
          typename OGRType = ArrowType>
4006
inline static void FillFieldList(const struct ArrowArray *array,
4007
                                 int iOGRFieldIdx, size_t nOffsettedIndex,
4008
                                 const struct ArrowArray *childArray,
4009
                                 OGRFeature &oFeature)
4010
0
{
4011
0
    const auto panOffsets =
4012
0
        static_cast<const ListOffsetType *>(array->buffers[1]) +
4013
0
        nOffsettedIndex;
4014
0
    std::vector<OGRType> aValues;
4015
0
    const auto *paValues =
4016
0
        static_cast<const ArrowType *>(childArray->buffers[1]);
4017
0
    for (size_t i = static_cast<size_t>(panOffsets[0]);
4018
0
         i < static_cast<size_t>(panOffsets[1]); ++i)
4019
0
    {
4020
0
        aValues.push_back(static_cast<OGRType>(paValues[i]));
4021
0
    }
4022
0
    oFeature.SetField(iOGRFieldIdx, static_cast<int>(aValues.size()),
4023
0
                      aValues.data());
4024
0
}
Unexecuted instantiation: ogrlayerarrow.cpp:void FillFieldList<unsigned int, signed char, int>(ArrowArray const*, int, unsigned long, ArrowArray const*, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillFieldList<unsigned long, signed char, int>(ArrowArray const*, int, unsigned long, ArrowArray const*, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillFieldList<unsigned int, unsigned char, int>(ArrowArray const*, int, unsigned long, ArrowArray const*, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillFieldList<unsigned long, unsigned char, int>(ArrowArray const*, int, unsigned long, ArrowArray const*, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillFieldList<unsigned int, short, int>(ArrowArray const*, int, unsigned long, ArrowArray const*, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillFieldList<unsigned long, short, int>(ArrowArray const*, int, unsigned long, ArrowArray const*, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillFieldList<unsigned int, unsigned short, int>(ArrowArray const*, int, unsigned long, ArrowArray const*, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillFieldList<unsigned long, unsigned short, int>(ArrowArray const*, int, unsigned long, ArrowArray const*, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillFieldList<unsigned int, int, int>(ArrowArray const*, int, unsigned long, ArrowArray const*, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillFieldList<unsigned long, int, int>(ArrowArray const*, int, unsigned long, ArrowArray const*, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillFieldList<unsigned int, unsigned int, long long>(ArrowArray const*, int, unsigned long, ArrowArray const*, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillFieldList<unsigned long, unsigned int, long long>(ArrowArray const*, int, unsigned long, ArrowArray const*, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillFieldList<unsigned int, long, long long>(ArrowArray const*, int, unsigned long, ArrowArray const*, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillFieldList<unsigned long, long, long long>(ArrowArray const*, int, unsigned long, ArrowArray const*, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillFieldList<unsigned int, unsigned long, double>(ArrowArray const*, int, unsigned long, ArrowArray const*, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillFieldList<unsigned long, unsigned long, double>(ArrowArray const*, int, unsigned long, ArrowArray const*, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillFieldList<unsigned int, float, double>(ArrowArray const*, int, unsigned long, ArrowArray const*, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillFieldList<unsigned long, float, double>(ArrowArray const*, int, unsigned long, ArrowArray const*, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillFieldList<unsigned int, double, double>(ArrowArray const*, int, unsigned long, ArrowArray const*, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillFieldList<unsigned long, double, double>(ArrowArray const*, int, unsigned long, ArrowArray const*, OGRFeature&)
4025
4026
/************************************************************************/
4027
/*                       FillFieldListFromBool()                        */
4028
/************************************************************************/
4029
4030
template <typename ListOffsetType>
4031
inline static void
4032
FillFieldListFromBool(const struct ArrowArray *array, int iOGRFieldIdx,
4033
                      size_t nOffsettedIndex,
4034
                      const struct ArrowArray *childArray, OGRFeature &oFeature)
4035
0
{
4036
0
    const auto panOffsets =
4037
0
        static_cast<const ListOffsetType *>(array->buffers[1]) +
4038
0
        nOffsettedIndex;
4039
0
    std::vector<int> aValues;
4040
0
    const auto *paValues = static_cast<const uint8_t *>(childArray->buffers[1]);
4041
0
    for (size_t i = static_cast<size_t>(panOffsets[0]);
4042
0
         i < static_cast<size_t>(panOffsets[1]); ++i)
4043
0
    {
4044
0
        aValues.push_back(TestBit(paValues, i) ? 1 : 0);
4045
0
    }
4046
0
    oFeature.SetField(iOGRFieldIdx, static_cast<int>(aValues.size()),
4047
0
                      aValues.data());
4048
0
}
Unexecuted instantiation: ogrlayerarrow.cpp:void FillFieldListFromBool<unsigned int>(ArrowArray const*, int, unsigned long, ArrowArray const*, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillFieldListFromBool<unsigned long>(ArrowArray const*, int, unsigned long, ArrowArray const*, OGRFeature&)
4049
4050
/************************************************************************/
4051
/*                     FillFieldListFromHalfFloat()                     */
4052
/************************************************************************/
4053
4054
template <typename ListOffsetType>
4055
inline static void FillFieldListFromHalfFloat(
4056
    const struct ArrowArray *array, int iOGRFieldIdx, size_t nOffsettedIndex,
4057
    const struct ArrowArray *childArray, OGRFeature &oFeature)
4058
0
{
4059
0
    const auto panOffsets =
4060
0
        static_cast<const ListOffsetType *>(array->buffers[1]) +
4061
0
        nOffsettedIndex;
4062
0
    std::vector<double> aValues;
4063
0
    const auto *paValues =
4064
0
        static_cast<const uint16_t *>(childArray->buffers[1]);
4065
0
    for (size_t i = static_cast<size_t>(panOffsets[0]);
4066
0
         i < static_cast<size_t>(panOffsets[1]); ++i)
4067
0
    {
4068
0
        const auto nFloat16AsUInt32 = CPLHalfToFloat(paValues[i]);
4069
0
        float f;
4070
0
        memcpy(&f, &nFloat16AsUInt32, sizeof(f));
4071
0
        aValues.push_back(static_cast<double>(f));
4072
0
    }
4073
0
    oFeature.SetField(iOGRFieldIdx, static_cast<int>(aValues.size()),
4074
0
                      aValues.data());
4075
0
}
Unexecuted instantiation: ogrlayerarrow.cpp:void FillFieldListFromHalfFloat<unsigned int>(ArrowArray const*, int, unsigned long, ArrowArray const*, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillFieldListFromHalfFloat<unsigned long>(ArrowArray const*, int, unsigned long, ArrowArray const*, OGRFeature&)
4076
4077
/************************************************************************/
4078
/*                      FillFieldListFromString()                       */
4079
/************************************************************************/
4080
4081
template <typename ListOffsetType, typename StringOffsetType>
4082
inline static void FillFieldListFromString(const struct ArrowArray *array,
4083
                                           int iOGRFieldIdx,
4084
                                           size_t nOffsettedIndex,
4085
                                           const struct ArrowArray *childArray,
4086
                                           OGRFeature &oFeature)
4087
0
{
4088
0
    const auto panOffsets =
4089
0
        static_cast<const ListOffsetType *>(array->buffers[1]) +
4090
0
        nOffsettedIndex;
4091
0
    CPLStringList aosVals;
4092
0
    const auto panSubOffsets =
4093
0
        static_cast<const StringOffsetType *>(childArray->buffers[1]);
4094
0
    const char *pszValues = static_cast<const char *>(childArray->buffers[2]);
4095
0
    std::string osTmp;
4096
0
    for (size_t i = static_cast<size_t>(panOffsets[0]);
4097
0
         i < static_cast<size_t>(panOffsets[1]); ++i)
4098
0
    {
4099
0
        osTmp.assign(
4100
0
            pszValues + panSubOffsets[i],
4101
0
            static_cast<size_t>(panSubOffsets[i + 1] - panSubOffsets[i]));
4102
0
        aosVals.AddString(osTmp.c_str());
4103
0
    }
4104
0
    oFeature.SetField(iOGRFieldIdx, aosVals.List());
4105
0
}
Unexecuted instantiation: ogrlayerarrow.cpp:void FillFieldListFromString<unsigned int, unsigned int>(ArrowArray const*, int, unsigned long, ArrowArray const*, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillFieldListFromString<unsigned long, unsigned int>(ArrowArray const*, int, unsigned long, ArrowArray const*, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillFieldListFromString<unsigned int, unsigned long>(ArrowArray const*, int, unsigned long, ArrowArray const*, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillFieldListFromString<unsigned long, unsigned long>(ArrowArray const*, int, unsigned long, ArrowArray const*, OGRFeature&)
4106
4107
/************************************************************************/
4108
/*                       FillFieldFixedSizeList()                       */
4109
/************************************************************************/
4110
4111
template <typename ArrowType, typename OGRType = ArrowType>
4112
inline static void FillFieldFixedSizeList(
4113
    const struct ArrowArray *, int iOGRFieldIdx, size_t nOffsettedIndex,
4114
    const int nItems, const struct ArrowArray *childArray, OGRFeature &oFeature)
4115
0
{
4116
0
    std::vector<OGRType> aValues;
4117
0
    const auto *paValues =
4118
0
        static_cast<const ArrowType *>(childArray->buffers[1]) +
4119
0
        childArray->offset + nOffsettedIndex * nItems;
4120
0
    for (int i = 0; i < nItems; ++i)
4121
0
    {
4122
0
        aValues.push_back(static_cast<OGRType>(paValues[i]));
4123
0
    }
4124
0
    oFeature.SetField(iOGRFieldIdx, static_cast<int>(aValues.size()),
4125
0
                      aValues.data());
4126
0
}
Unexecuted instantiation: ogrlayerarrow.cpp:void FillFieldFixedSizeList<signed char, int>(ArrowArray const*, int, unsigned long, int, ArrowArray const*, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillFieldFixedSizeList<unsigned char, int>(ArrowArray const*, int, unsigned long, int, ArrowArray const*, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillFieldFixedSizeList<short, int>(ArrowArray const*, int, unsigned long, int, ArrowArray const*, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillFieldFixedSizeList<unsigned short, int>(ArrowArray const*, int, unsigned long, int, ArrowArray const*, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillFieldFixedSizeList<int, int>(ArrowArray const*, int, unsigned long, int, ArrowArray const*, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillFieldFixedSizeList<unsigned int, long long>(ArrowArray const*, int, unsigned long, int, ArrowArray const*, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillFieldFixedSizeList<long, long long>(ArrowArray const*, int, unsigned long, int, ArrowArray const*, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillFieldFixedSizeList<unsigned long, double>(ArrowArray const*, int, unsigned long, int, ArrowArray const*, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillFieldFixedSizeList<float, double>(ArrowArray const*, int, unsigned long, int, ArrowArray const*, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillFieldFixedSizeList<double, double>(ArrowArray const*, int, unsigned long, int, ArrowArray const*, OGRFeature&)
4127
4128
/************************************************************************/
4129
/*                    FillFieldFixedSizeListString()                    */
4130
/************************************************************************/
4131
4132
template <typename StringOffsetType>
4133
inline static void FillFieldFixedSizeListString(
4134
    const struct ArrowArray *, int iOGRFieldIdx, size_t nOffsettedIndex,
4135
    const int nItems, const struct ArrowArray *childArray, OGRFeature &oFeature)
4136
0
{
4137
0
    CPLStringList aosVals;
4138
0
    const auto panSubOffsets =
4139
0
        static_cast<const StringOffsetType *>(childArray->buffers[1]) +
4140
0
        childArray->offset + nOffsettedIndex * nItems;
4141
0
    const char *pszValues = static_cast<const char *>(childArray->buffers[2]);
4142
0
    std::string osTmp;
4143
0
    for (int i = 0; i < nItems; ++i)
4144
0
    {
4145
0
        osTmp.assign(
4146
0
            pszValues + panSubOffsets[i],
4147
0
            static_cast<size_t>(panSubOffsets[i + 1] - panSubOffsets[i]));
4148
0
        aosVals.AddString(osTmp.c_str());
4149
0
    }
4150
0
    oFeature.SetField(iOGRFieldIdx, aosVals.List());
4151
0
}
Unexecuted instantiation: ogrlayerarrow.cpp:void FillFieldFixedSizeListString<unsigned int>(ArrowArray const*, int, unsigned long, int, ArrowArray const*, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillFieldFixedSizeListString<unsigned long>(ArrowArray const*, int, unsigned long, int, ArrowArray const*, OGRFeature&)
4152
4153
/************************************************************************/
4154
/*                              GetValue()                              */
4155
/************************************************************************/
4156
4157
template <typename ArrowType>
4158
inline static ArrowType GetValue(const struct ArrowArray *array,
4159
                                 size_t iFeature)
4160
0
{
4161
0
    const auto *panValues = static_cast<const ArrowType *>(array->buffers[1]);
4162
0
    return panValues[iFeature + array->offset];
4163
0
}
Unexecuted instantiation: ogrlayerarrow.cpp:unsigned char GetValue<unsigned char>(ArrowArray const*, unsigned long)
Unexecuted instantiation: ogrlayerarrow.cpp:signed char GetValue<signed char>(ArrowArray const*, unsigned long)
Unexecuted instantiation: ogrlayerarrow.cpp:unsigned short GetValue<unsigned short>(ArrowArray const*, unsigned long)
Unexecuted instantiation: ogrlayerarrow.cpp:short GetValue<short>(ArrowArray const*, unsigned long)
Unexecuted instantiation: ogrlayerarrow.cpp:unsigned int GetValue<unsigned int>(ArrowArray const*, unsigned long)
Unexecuted instantiation: ogrlayerarrow.cpp:int GetValue<int>(ArrowArray const*, unsigned long)
Unexecuted instantiation: ogrlayerarrow.cpp:unsigned long GetValue<unsigned long>(ArrowArray const*, unsigned long)
Unexecuted instantiation: ogrlayerarrow.cpp:long GetValue<long>(ArrowArray const*, unsigned long)
Unexecuted instantiation: ogrlayerarrow.cpp:float GetValue<float>(ArrowArray const*, unsigned long)
Unexecuted instantiation: ogrlayerarrow.cpp:double GetValue<double>(ArrowArray const*, unsigned long)
4164
4165
template <> bool GetValue<bool>(const struct ArrowArray *array, size_t iFeature)
4166
0
{
4167
0
    const auto *pabyValues = static_cast<const uint8_t *>(array->buffers[1]);
4168
0
    return TestBit(pabyValues, iFeature + static_cast<size_t>(array->offset));
4169
0
}
4170
4171
/************************************************************************/
4172
/*                          GetValueFloat16()                           */
4173
/************************************************************************/
4174
4175
static float GetValueFloat16(const struct ArrowArray *array, const size_t nIdx)
4176
0
{
4177
0
    const auto *panValues = static_cast<const uint16_t *>(array->buffers[1]);
4178
0
    const auto nFloat16AsUInt32 =
4179
0
        CPLHalfToFloat(panValues[nIdx + array->offset]);
4180
0
    float f;
4181
0
    memcpy(&f, &nFloat16AsUInt32, sizeof(f));
4182
0
    return f;
4183
0
}
4184
4185
/************************************************************************/
4186
/*                          GetValueDecimal()                           */
4187
/************************************************************************/
4188
4189
static double GetValueDecimal(const struct ArrowArray *array,
4190
                              const int nWidthIn64BitWord, const int nScale,
4191
                              const size_t nIdx)
4192
0
{
4193
0
#ifdef CPL_LSB
4194
0
    const auto nIdxIn64BitWord = nIdx * nWidthIn64BitWord;
4195
#else
4196
    const auto nIdxIn64BitWord =
4197
        nIdx * nWidthIn64BitWord + nWidthIn64BitWord - 1;
4198
#endif
4199
0
    const auto *panValues = static_cast<const int64_t *>(array->buffers[1]);
4200
0
    const auto nVal =
4201
0
        panValues[nIdxIn64BitWord + array->offset * nWidthIn64BitWord];
4202
0
    return static_cast<double>(nVal) * std::pow(10.0, -nScale);
4203
0
}
4204
4205
/************************************************************************/
4206
/*                             GetString()                              */
4207
/************************************************************************/
4208
4209
template <class OffsetType>
4210
static std::string GetString(const struct ArrowArray *array, const size_t nIdx)
4211
0
{
4212
0
    const OffsetType *panOffsets =
4213
0
        static_cast<const OffsetType *>(array->buffers[1]) +
4214
0
        static_cast<size_t>(array->offset) + nIdx;
4215
0
    const char *pabyStr = static_cast<const char *>(array->buffers[2]);
4216
0
    std::string osStr;
4217
0
    osStr.assign(pabyStr + static_cast<size_t>(panOffsets[0]),
4218
0
                 static_cast<size_t>(panOffsets[1] - panOffsets[0]));
4219
0
    return osStr;
4220
0
}
Unexecuted instantiation: ogrlayerarrow.cpp:std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > GetString<unsigned int>(ArrowArray const*, unsigned long)
Unexecuted instantiation: ogrlayerarrow.cpp:std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > GetString<unsigned long>(ArrowArray const*, unsigned long)
4221
4222
/************************************************************************/
4223
/*                         GetBinaryAsBase64()                          */
4224
/************************************************************************/
4225
4226
template <class OffsetType>
4227
static std::string GetBinaryAsBase64(const struct ArrowArray *array,
4228
                                     const size_t nIdx)
4229
0
{
4230
0
    const OffsetType *panOffsets =
4231
0
        static_cast<const OffsetType *>(array->buffers[1]) +
4232
0
        static_cast<size_t>(array->offset) + nIdx;
4233
0
    const GByte *pabyData = static_cast<const GByte *>(array->buffers[2]);
4234
0
    const size_t nLen = static_cast<size_t>(panOffsets[1] - panOffsets[0]);
4235
0
    if (nLen > static_cast<size_t>(std::numeric_limits<int>::max()))
4236
0
    {
4237
0
        CPLError(CE_Failure, CPLE_AppDefined, "Too large binary");
4238
0
        return std::string();
4239
0
    }
4240
0
    char *pszVal = CPLBase64Encode(
4241
0
        static_cast<int>(nLen), pabyData + static_cast<size_t>(panOffsets[0]));
4242
0
    std::string osStr(pszVal);
4243
0
    CPLFree(pszVal);
4244
0
    return osStr;
4245
0
}
Unexecuted instantiation: ogrlayerarrow.cpp:std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > GetBinaryAsBase64<unsigned int>(ArrowArray const*, unsigned long)
Unexecuted instantiation: ogrlayerarrow.cpp:std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > GetBinaryAsBase64<unsigned long>(ArrowArray const*, unsigned long)
4246
4247
/************************************************************************/
4248
/*                  GetValueFixedWithBinaryAsBase64()                   */
4249
/************************************************************************/
4250
4251
static std::string
4252
GetValueFixedWithBinaryAsBase64(const struct ArrowArray *array,
4253
                                const int nWidth, const size_t nIdx)
4254
0
{
4255
0
    const GByte *pabyData = static_cast<const GByte *>(array->buffers[1]);
4256
0
    char *pszVal = CPLBase64Encode(
4257
0
        nWidth,
4258
0
        pabyData + (static_cast<size_t>(array->offset) + nIdx) * nWidth);
4259
0
    std::string osStr(pszVal);
4260
0
    CPLFree(pszVal);
4261
0
    return osStr;
4262
0
}
4263
4264
static CPLJSONObject GetObjectAsJSON(const struct ArrowSchema *schema,
4265
                                     const struct ArrowArray *array,
4266
                                     const size_t nIdx);
4267
4268
/************************************************************************/
4269
/*                             AddToArray()                             */
4270
/************************************************************************/
4271
4272
static void AddToArray(CPLJSONArray &oArray, const struct ArrowSchema *schema,
4273
                       const struct ArrowArray *array, const size_t nIdx)
4274
0
{
4275
0
    if (IsBoolean(schema->format))
4276
0
        oArray.Add(GetValue<bool>(array, nIdx));
4277
0
    else if (IsUInt8(schema->format))
4278
0
        oArray.Add(GetValue<uint8_t>(array, nIdx));
4279
0
    else if (IsInt8(schema->format))
4280
0
        oArray.Add(GetValue<int8_t>(array, nIdx));
4281
0
    else if (IsUInt16(schema->format))
4282
0
        oArray.Add(GetValue<uint16_t>(array, nIdx));
4283
0
    else if (IsInt16(schema->format))
4284
0
        oArray.Add(GetValue<int16_t>(array, nIdx));
4285
0
    else if (IsUInt32(schema->format))
4286
0
        oArray.Add(static_cast<GIntBig>(GetValue<uint32_t>(array, nIdx)));
4287
0
    else if (IsInt32(schema->format))
4288
0
        oArray.Add(GetValue<int32_t>(array, nIdx));
4289
0
    else if (IsUInt64(schema->format))
4290
0
        oArray.Add(GetValue<uint64_t>(array, nIdx));
4291
0
    else if (IsInt64(schema->format))
4292
0
        oArray.Add(static_cast<GIntBig>(GetValue<int64_t>(array, nIdx)));
4293
0
    else if (IsFloat16(schema->format))
4294
0
        oArray.Add(static_cast<double>(GetValueFloat16(array, nIdx)));
4295
0
    else if (IsFloat32(schema->format))
4296
0
        oArray.Add(static_cast<double>(GetValue<float>(array, nIdx)));
4297
0
    else if (IsFloat64(schema->format))
4298
0
        oArray.Add(GetValue<double>(array, nIdx));
4299
0
    else if (IsString(schema->format))
4300
0
        oArray.Add(GetString<uint32_t>(array, nIdx));
4301
0
    else if (IsLargeString(schema->format))
4302
0
        oArray.Add(GetString<uint64_t>(array, nIdx));
4303
0
    else if (IsBinary(schema->format))
4304
0
        oArray.Add(GetBinaryAsBase64<uint32_t>(array, nIdx));
4305
0
    else if (IsLargeBinary(schema->format))
4306
0
        oArray.Add(GetBinaryAsBase64<uint64_t>(array, nIdx));
4307
0
    else if (IsFixedWidthBinary(schema->format))
4308
0
        oArray.Add(GetValueFixedWithBinaryAsBase64(
4309
0
            array, GetFixedWithBinary(schema->format), nIdx));
4310
0
    else if (IsDecimal(schema->format))
4311
0
    {
4312
0
        int nPrecision = 0;
4313
0
        int nScale = 0;
4314
0
        int nWidthInBytes = 0;
4315
0
        const bool bOK = ParseDecimalFormat(schema->format, nPrecision, nScale,
4316
0
                                            nWidthInBytes);
4317
        // Already validated
4318
0
        CPLAssert(bOK);
4319
0
        CPL_IGNORE_RET_VAL(bOK);
4320
0
        oArray.Add(GetValueDecimal(array, nWidthInBytes / 8, nScale, nIdx));
4321
0
    }
4322
0
    else
4323
0
        oArray.Add(GetObjectAsJSON(schema, array, nIdx));
4324
0
}
4325
4326
/************************************************************************/
4327
/*                           GetListAsJSON()                            */
4328
/************************************************************************/
4329
4330
template <class OffsetType>
4331
static CPLJSONArray GetListAsJSON(const struct ArrowSchema *schema,
4332
                                  const struct ArrowArray *array,
4333
                                  const size_t nIdx)
4334
0
{
4335
0
    CPLJSONArray oArray;
4336
0
    const auto panOffsets = static_cast<const OffsetType *>(array->buffers[1]) +
4337
0
                            array->offset + nIdx;
4338
0
    const auto childSchema = schema->children[0];
4339
0
    const auto childArray = array->children[0];
4340
0
    const uint8_t *pabyValidity =
4341
0
        childArray->null_count == 0
4342
0
            ? nullptr
4343
0
            : static_cast<const uint8_t *>(childArray->buffers[0]);
4344
0
    for (size_t k = static_cast<size_t>(panOffsets[0]);
4345
0
         k < static_cast<size_t>(panOffsets[1]); k++)
4346
0
    {
4347
0
        if (!pabyValidity ||
4348
0
            TestBit(pabyValidity, k + static_cast<size_t>(childArray->offset)))
4349
0
        {
4350
0
            AddToArray(oArray, childSchema, childArray, k);
4351
0
        }
4352
0
        else
4353
0
        {
4354
0
            oArray.AddNull();
4355
0
        }
4356
0
    }
4357
0
    return oArray;
4358
0
}
Unexecuted instantiation: ogrlayerarrow.cpp:CPLJSONArray GetListAsJSON<unsigned int>(ArrowSchema const*, ArrowArray const*, unsigned long)
Unexecuted instantiation: ogrlayerarrow.cpp:CPLJSONArray GetListAsJSON<unsigned long>(ArrowSchema const*, ArrowArray const*, unsigned long)
4359
4360
/************************************************************************/
4361
/*                       GetFixedSizeListAsJSON()                       */
4362
/************************************************************************/
4363
4364
static CPLJSONArray GetFixedSizeListAsJSON(const struct ArrowSchema *schema,
4365
                                           const struct ArrowArray *array,
4366
                                           const size_t nIdx)
4367
0
{
4368
0
    CPLJSONArray oArray;
4369
0
    const int nVals = GetFixedSizeList(schema->format);
4370
0
    const auto childSchema = schema->children[0];
4371
0
    const auto childArray = array->children[0];
4372
0
    const uint8_t *pabyValidity =
4373
0
        childArray->null_count == 0
4374
0
            ? nullptr
4375
0
            : static_cast<const uint8_t *>(childArray->buffers[0]);
4376
0
    for (size_t k = nIdx * nVals; k < (nIdx + 1) * nVals; k++)
4377
0
    {
4378
0
        if (!pabyValidity ||
4379
0
            TestBit(pabyValidity, k + static_cast<size_t>(childArray->offset)))
4380
0
        {
4381
0
            AddToArray(oArray, childSchema, childArray, k);
4382
0
        }
4383
0
        else
4384
0
        {
4385
0
            oArray.AddNull();
4386
0
        }
4387
0
    }
4388
0
    return oArray;
4389
0
}
4390
4391
/************************************************************************/
4392
/*                             AddToDict()                              */
4393
/************************************************************************/
4394
4395
static void AddToDict(CPLJSONObject &oDict, const std::string &osKey,
4396
                      const struct ArrowSchema *schema,
4397
                      const struct ArrowArray *array, const size_t nIdx)
4398
0
{
4399
0
    if (IsBoolean(schema->format))
4400
0
        oDict.Add(osKey, GetValue<bool>(array, nIdx));
4401
0
    else if (IsUInt8(schema->format))
4402
0
        oDict.Add(osKey, GetValue<uint8_t>(array, nIdx));
4403
0
    else if (IsInt8(schema->format))
4404
0
        oDict.Add(osKey, GetValue<int8_t>(array, nIdx));
4405
0
    else if (IsUInt16(schema->format))
4406
0
        oDict.Add(osKey, GetValue<uint16_t>(array, nIdx));
4407
0
    else if (IsInt16(schema->format))
4408
0
        oDict.Add(osKey, GetValue<int16_t>(array, nIdx));
4409
0
    else if (IsUInt32(schema->format))
4410
0
        oDict.Add(osKey, static_cast<GIntBig>(GetValue<uint32_t>(array, nIdx)));
4411
0
    else if (IsInt32(schema->format))
4412
0
        oDict.Add(osKey, GetValue<int32_t>(array, nIdx));
4413
0
    else if (IsUInt64(schema->format))
4414
0
        oDict.Add(osKey, GetValue<uint64_t>(array, nIdx));
4415
0
    else if (IsInt64(schema->format))
4416
0
        oDict.Add(osKey, static_cast<GIntBig>(GetValue<int64_t>(array, nIdx)));
4417
0
    else if (IsFloat16(schema->format))
4418
0
        oDict.Add(osKey, static_cast<double>(GetValueFloat16(array, nIdx)));
4419
0
    else if (IsFloat32(schema->format))
4420
0
        oDict.Add(osKey, static_cast<double>(GetValue<float>(array, nIdx)));
4421
0
    else if (IsFloat64(schema->format))
4422
0
        oDict.Add(osKey, GetValue<double>(array, nIdx));
4423
0
    else if (IsString(schema->format))
4424
0
        oDict.Add(osKey, GetString<uint32_t>(array, nIdx));
4425
0
    else if (IsLargeString(schema->format))
4426
0
        oDict.Add(osKey, GetString<uint64_t>(array, nIdx));
4427
0
    else if (IsBinary(schema->format))
4428
0
        oDict.Add(osKey, GetBinaryAsBase64<uint32_t>(array, nIdx));
4429
0
    else if (IsLargeBinary(schema->format))
4430
0
        oDict.Add(osKey, GetBinaryAsBase64<uint64_t>(array, nIdx));
4431
0
    else if (IsFixedWidthBinary(schema->format))
4432
0
        oDict.Add(osKey, GetValueFixedWithBinaryAsBase64(
4433
0
                             array, GetFixedWithBinary(schema->format), nIdx));
4434
0
    else if (IsDecimal(schema->format))
4435
0
    {
4436
0
        int nPrecision = 0;
4437
0
        int nScale = 0;
4438
0
        int nWidthInBytes = 0;
4439
0
        const bool bOK = ParseDecimalFormat(schema->format, nPrecision, nScale,
4440
0
                                            nWidthInBytes);
4441
        // Already validated
4442
0
        CPLAssert(bOK);
4443
0
        CPL_IGNORE_RET_VAL(bOK);
4444
0
        oDict.Add(osKey,
4445
0
                  GetValueDecimal(array, nWidthInBytes / 8, nScale, nIdx));
4446
0
    }
4447
0
    else
4448
0
        oDict.Add(osKey, GetObjectAsJSON(schema, array, nIdx));
4449
0
}
4450
4451
/************************************************************************/
4452
/*                            GetMapAsJSON()                            */
4453
/************************************************************************/
4454
4455
static CPLJSONObject GetMapAsJSON(const struct ArrowSchema *schema,
4456
                                  const struct ArrowArray *array,
4457
                                  const size_t nIdx)
4458
0
{
4459
0
    const auto schemaStruct = schema->children[0];
4460
0
    if (!IsStructure(schemaStruct->format))
4461
0
    {
4462
0
        CPLError(CE_Failure, CPLE_AppDefined,
4463
0
                 "GetMapAsJSON(): !IsStructure(schemaStruct->format))");
4464
0
        return CPLJSONObject();
4465
0
    }
4466
0
    const auto schemaKey = schemaStruct->children[0];
4467
0
    const auto schemaValues = schemaStruct->children[1];
4468
0
    if (!IsString(schemaKey->format))
4469
0
    {
4470
0
        CPLError(CE_Failure, CPLE_AppDefined,
4471
0
                 "GetMapAsJSON(): !IsString(schemaKey->format))");
4472
0
        return CPLJSONObject();
4473
0
    }
4474
0
    const auto arrayKeys = array->children[0]->children[0];
4475
0
    const auto arrayValues = array->children[0]->children[1];
4476
4477
0
    CPLJSONObject oDict;
4478
0
    const auto panOffsets =
4479
0
        static_cast<const uint32_t *>(array->buffers[1]) + array->offset + nIdx;
4480
0
    const uint8_t *pabyValidityKeys =
4481
0
        arrayKeys->null_count == 0
4482
0
            ? nullptr
4483
0
            : static_cast<const uint8_t *>(arrayKeys->buffers[0]);
4484
0
    const uint32_t *panOffsetsKeys =
4485
0
        static_cast<const uint32_t *>(arrayKeys->buffers[1]) +
4486
0
        arrayKeys->offset;
4487
0
    const char *pabyKeys = static_cast<const char *>(arrayKeys->buffers[2]);
4488
0
    const uint8_t *pabyValidityValues =
4489
0
        arrayValues->null_count == 0
4490
0
            ? nullptr
4491
0
            : static_cast<const uint8_t *>(arrayValues->buffers[0]);
4492
0
    for (uint32_t k = panOffsets[0]; k < panOffsets[1]; k++)
4493
0
    {
4494
0
        if (!pabyValidityKeys ||
4495
0
            TestBit(pabyValidityKeys,
4496
0
                    k + static_cast<size_t>(arrayKeys->offset)))
4497
0
        {
4498
0
            std::string osKey;
4499
0
            osKey.assign(pabyKeys + panOffsetsKeys[k],
4500
0
                         panOffsetsKeys[k + 1] - panOffsetsKeys[k]);
4501
4502
0
            if (!pabyValidityValues ||
4503
0
                TestBit(pabyValidityValues,
4504
0
                        k + static_cast<size_t>(arrayValues->offset)))
4505
0
            {
4506
0
                AddToDict(oDict, osKey, schemaValues, arrayValues, k);
4507
0
            }
4508
0
            else
4509
0
            {
4510
0
                oDict.AddNull(osKey);
4511
0
            }
4512
0
        }
4513
0
    }
4514
0
    return oDict;
4515
0
}
4516
4517
/************************************************************************/
4518
/*                         GetStructureAsJSON()                         */
4519
/************************************************************************/
4520
4521
static CPLJSONObject GetStructureAsJSON(const struct ArrowSchema *schema,
4522
                                        const struct ArrowArray *array,
4523
                                        const size_t nIdx)
4524
0
{
4525
0
    CPLJSONObject oDict;
4526
0
    for (int64_t k = 0; k < array->n_children; k++)
4527
0
    {
4528
0
        const uint8_t *pabyValidityValues =
4529
0
            array->children[k]->null_count == 0
4530
0
                ? nullptr
4531
0
                : static_cast<const uint8_t *>(array->children[k]->buffers[0]);
4532
0
        if (!pabyValidityValues ||
4533
0
            TestBit(pabyValidityValues,
4534
0
                    nIdx + static_cast<size_t>(array->children[k]->offset)))
4535
0
        {
4536
0
            AddToDict(oDict, schema->children[k]->name, schema->children[k],
4537
0
                      array->children[k], nIdx);
4538
0
        }
4539
0
        else
4540
0
        {
4541
0
            oDict.AddNull(schema->children[k]->name);
4542
0
        }
4543
0
    }
4544
0
    return oDict;
4545
0
}
4546
4547
/************************************************************************/
4548
/*                          GetObjectAsJSON()                           */
4549
/************************************************************************/
4550
4551
static CPLJSONObject GetObjectAsJSON(const struct ArrowSchema *schema,
4552
                                     const struct ArrowArray *array,
4553
                                     const size_t nIdx)
4554
0
{
4555
0
    if (IsMap(schema->format))
4556
0
        return GetMapAsJSON(schema, array, nIdx);
4557
0
    else if (IsList(schema->format))
4558
0
        return GetListAsJSON<uint32_t>(schema, array, nIdx);
4559
0
    else if (IsLargeList(schema->format))
4560
0
        return GetListAsJSON<uint64_t>(schema, array, nIdx);
4561
0
    else if (IsFixedSizeList(schema->format))
4562
0
        return GetFixedSizeListAsJSON(schema, array, nIdx);
4563
0
    else if (IsStructure(schema->format))
4564
0
        return GetStructureAsJSON(schema, array, nIdx);
4565
0
    else
4566
0
    {
4567
0
        CPLError(CE_Failure, CPLE_AppDefined,
4568
0
                 "GetObjectAsJSON(): unhandled value format: %s",
4569
0
                 schema->format);
4570
0
        return CPLJSONObject();
4571
0
    }
4572
0
}
4573
4574
/************************************************************************/
4575
/*                      SetFieldForOtherFormats()                       */
4576
/************************************************************************/
4577
4578
static bool SetFieldForOtherFormats(OGRFeature &oFeature,
4579
                                    const int iOGRFieldIndex,
4580
                                    const size_t nOffsettedIndex,
4581
                                    const struct ArrowSchema *schema,
4582
                                    const struct ArrowArray *array)
4583
0
{
4584
0
    const char *format = schema->format;
4585
0
    if (IsFloat16(format))
4586
0
    {
4587
0
        oFeature.SetField(
4588
0
            iOGRFieldIndex,
4589
0
            static_cast<double>(GetValueFloat16(
4590
0
                array, nOffsettedIndex - static_cast<size_t>(array->offset))));
4591
0
    }
4592
4593
0
    else if (IsFixedWidthBinary(format))
4594
0
    {
4595
        // Fixed width binary
4596
0
        const int nWidth = GetFixedWithBinary(format);
4597
0
        oFeature.SetField(iOGRFieldIndex, nWidth,
4598
0
                          static_cast<const GByte *>(array->buffers[1]) +
4599
0
                              nOffsettedIndex * nWidth);
4600
0
    }
4601
0
    else if (format[0] == 't' && format[1] == 'd' &&
4602
0
             format[2] == 'D')  // strcmp(format, "tdD") == 0
4603
0
    {
4604
        // date32[days]
4605
        // number of days since Epoch
4606
0
        int64_t timestamp = static_cast<int64_t>(static_cast<const int32_t *>(
4607
0
                                array->buffers[1])[nOffsettedIndex]) *
4608
0
                            3600 * 24;
4609
0
        struct tm dt;
4610
0
        CPLUnixTimeToYMDHMS(timestamp, &dt);
4611
0
        oFeature.SetField(iOGRFieldIndex, dt.tm_year + 1900, dt.tm_mon + 1,
4612
0
                          dt.tm_mday, 0, 0, 0);
4613
0
        return true;
4614
0
    }
4615
0
    else if (format[0] == 't' && format[1] == 'd' &&
4616
0
             format[2] == 'm')  // strcmp(format, "tdm") == 0
4617
0
    {
4618
        // date64[milliseconds]
4619
        // number of milliseconds since Epoch
4620
0
        int64_t timestamp =
4621
0
            static_cast<const int64_t *>(array->buffers[1])[nOffsettedIndex] /
4622
0
            1000;
4623
0
        struct tm dt;
4624
0
        CPLUnixTimeToYMDHMS(timestamp, &dt);
4625
0
        oFeature.SetField(iOGRFieldIndex, dt.tm_year + 1900, dt.tm_mon + 1,
4626
0
                          dt.tm_mday, 0, 0, 0);
4627
0
    }
4628
0
    else if (format[0] == 't' && format[1] == 't' &&
4629
0
             format[2] == 's')  // strcmp(format, "tts") == 0
4630
0
    {
4631
        // time32 [seconds]
4632
0
        int32_t value =
4633
0
            static_cast<const int32_t *>(array->buffers[1])[nOffsettedIndex];
4634
0
        const int nHour = value / 3600;
4635
0
        const int nMinute = (value / 60) % 60;
4636
0
        const int nSecond = value % 60;
4637
0
        oFeature.SetField(iOGRFieldIndex, 0, 0, 0, nHour, nMinute,
4638
0
                          static_cast<float>(nSecond));
4639
0
    }
4640
0
    else if (format[0] == 't' && format[1] == 't' &&
4641
0
             format[2] == 'm')  // strcmp(format, "ttm") == 0
4642
0
    {
4643
        // time32 [milliseconds]
4644
0
        int32_t value =
4645
0
            static_cast<const int32_t *>(array->buffers[1])[nOffsettedIndex];
4646
0
        double floatingPart = (value % 1000) / 1e3;
4647
0
        value /= 1000;
4648
0
        const int nHour = value / 3600;
4649
0
        const int nMinute = (value / 60) % 60;
4650
0
        const int nSecond = value % 60;
4651
0
        oFeature.SetField(iOGRFieldIndex, 0, 0, 0, nHour, nMinute,
4652
0
                          static_cast<float>(nSecond + floatingPart));
4653
0
    }
4654
0
    else if (format[0] == 't' && format[1] == 't' &&
4655
0
             (format[2] == 'u' ||  // time64 [microseconds]
4656
0
              format[2] == 'n'))   // time64 [nanoseconds]
4657
0
    {
4658
0
        int64_t value =
4659
0
            static_cast<const int64_t *>(array->buffers[1])[nOffsettedIndex];
4660
0
        if (oFeature.GetFieldDefnRef(iOGRFieldIndex)->GetType() == OFTInteger64)
4661
0
        {
4662
0
            oFeature.SetField(iOGRFieldIndex, static_cast<GIntBig>(value));
4663
0
        }
4664
0
        else
4665
0
        {
4666
0
            double floatingPart;
4667
0
            if (format[2] == 'u')
4668
0
            {
4669
0
                floatingPart = (value % (1000 * 1000)) / 1e6;
4670
0
                value /= 1000 * 1000;
4671
0
            }
4672
0
            else
4673
0
            {
4674
0
                floatingPart = (value % (1000 * 1000 * 1000)) / 1e9;
4675
0
                value /= 1000 * 1000 * 1000;
4676
0
            }
4677
0
            const int nHour = static_cast<int>(value / 3600);
4678
0
            const int nMinute = static_cast<int>((value / 60) % 60);
4679
0
            const int nSecond = static_cast<int>(value % 60);
4680
0
            oFeature.SetField(iOGRFieldIndex, 0, 0, 0, nHour, nMinute,
4681
0
                              static_cast<float>(nSecond + floatingPart));
4682
0
        }
4683
0
    }
4684
0
    else if (IsTimestampSeconds(format))
4685
0
    {
4686
0
        ArrowTimestampToOGRDateTime(
4687
0
            static_cast<const int64_t *>(array->buffers[1])[nOffsettedIndex], 1,
4688
0
            GetTimestampTimezone(format), oFeature, iOGRFieldIndex);
4689
0
    }
4690
0
    else if (IsTimestampMilliseconds(format))
4691
0
    {
4692
0
        ArrowTimestampToOGRDateTime(
4693
0
            static_cast<const int64_t *>(array->buffers[1])[nOffsettedIndex],
4694
0
            1000, GetTimestampTimezone(format), oFeature, iOGRFieldIndex);
4695
0
    }
4696
0
    else if (IsTimestampMicroseconds(format))
4697
0
    {
4698
0
        ArrowTimestampToOGRDateTime(
4699
0
            static_cast<const int64_t *>(array->buffers[1])[nOffsettedIndex],
4700
0
            1000 * 1000, GetTimestampTimezone(format), oFeature,
4701
0
            iOGRFieldIndex);
4702
0
    }
4703
0
    else if (IsTimestampNanoseconds(format))
4704
0
    {
4705
0
        ArrowTimestampToOGRDateTime(
4706
0
            static_cast<const int64_t *>(array->buffers[1])[nOffsettedIndex],
4707
0
            1000 * 1000 * 1000, GetTimestampTimezone(format), oFeature,
4708
0
            iOGRFieldIndex);
4709
0
    }
4710
0
    else if (IsFixedSizeList(format))
4711
0
    {
4712
0
        const int nItems = GetFixedSizeList(format);
4713
0
        const auto childArray = array->children[0];
4714
0
        const char *childFormat = schema->children[0]->format;
4715
0
        if (IsBoolean(childFormat))
4716
0
        {
4717
0
            std::vector<int> aValues;
4718
0
            const auto *paValues =
4719
0
                static_cast<const uint8_t *>(childArray->buffers[1]);
4720
0
            for (int i = 0; i < nItems; ++i)
4721
0
            {
4722
0
                aValues.push_back(
4723
0
                    TestBit(paValues,
4724
0
                            static_cast<size_t>(childArray->offset +
4725
0
                                                nOffsettedIndex * nItems + i))
4726
0
                        ? 1
4727
0
                        : 0);
4728
0
            }
4729
0
            oFeature.SetField(iOGRFieldIndex, static_cast<int>(aValues.size()),
4730
0
                              aValues.data());
4731
0
        }
4732
0
        else if (IsInt8(childFormat))
4733
0
        {
4734
0
            FillFieldFixedSizeList<int8_t, int>(array, iOGRFieldIndex,
4735
0
                                                nOffsettedIndex, nItems,
4736
0
                                                childArray, oFeature);
4737
0
        }
4738
0
        else if (IsUInt8(childFormat))
4739
0
        {
4740
0
            FillFieldFixedSizeList<uint8_t, int>(array, iOGRFieldIndex,
4741
0
                                                 nOffsettedIndex, nItems,
4742
0
                                                 childArray, oFeature);
4743
0
        }
4744
0
        else if (IsInt16(childFormat))
4745
0
        {
4746
0
            FillFieldFixedSizeList<int16_t, int>(array, iOGRFieldIndex,
4747
0
                                                 nOffsettedIndex, nItems,
4748
0
                                                 childArray, oFeature);
4749
0
        }
4750
0
        else if (IsUInt16(childFormat))
4751
0
        {
4752
0
            FillFieldFixedSizeList<uint16_t, int>(array, iOGRFieldIndex,
4753
0
                                                  nOffsettedIndex, nItems,
4754
0
                                                  childArray, oFeature);
4755
0
        }
4756
0
        else if (IsInt32(childFormat))
4757
0
        {
4758
0
            FillFieldFixedSizeList<int32_t, int>(array, iOGRFieldIndex,
4759
0
                                                 nOffsettedIndex, nItems,
4760
0
                                                 childArray, oFeature);
4761
0
        }
4762
0
        else if (IsUInt32(childFormat))
4763
0
        {
4764
0
            FillFieldFixedSizeList<uint32_t, GIntBig>(array, iOGRFieldIndex,
4765
0
                                                      nOffsettedIndex, nItems,
4766
0
                                                      childArray, oFeature);
4767
0
        }
4768
0
        else if (IsInt64(childFormat))
4769
0
        {
4770
0
            FillFieldFixedSizeList<int64_t, GIntBig>(array, iOGRFieldIndex,
4771
0
                                                     nOffsettedIndex, nItems,
4772
0
                                                     childArray, oFeature);
4773
0
        }
4774
0
        else if (IsUInt64(childFormat))
4775
0
        {
4776
0
            FillFieldFixedSizeList<uint64_t, double>(array, iOGRFieldIndex,
4777
0
                                                     nOffsettedIndex, nItems,
4778
0
                                                     childArray, oFeature);
4779
0
        }
4780
0
        else if (IsFloat16(childFormat))
4781
0
        {
4782
0
            std::vector<double> aValues;
4783
0
            for (int i = 0; i < nItems; ++i)
4784
0
            {
4785
0
                aValues.push_back(static_cast<double>(
4786
0
                    GetValueFloat16(childArray, nOffsettedIndex * nItems + i)));
4787
0
            }
4788
0
            oFeature.SetField(iOGRFieldIndex, static_cast<int>(aValues.size()),
4789
0
                              aValues.data());
4790
0
        }
4791
0
        else if (IsFloat32(childFormat))
4792
0
        {
4793
0
            FillFieldFixedSizeList<float, double>(array, iOGRFieldIndex,
4794
0
                                                  nOffsettedIndex, nItems,
4795
0
                                                  childArray, oFeature);
4796
0
        }
4797
0
        else if (IsFloat64(childFormat))
4798
0
        {
4799
0
            FillFieldFixedSizeList<double, double>(array, iOGRFieldIndex,
4800
0
                                                   nOffsettedIndex, nItems,
4801
0
                                                   childArray, oFeature);
4802
0
        }
4803
0
        else if (IsString(childFormat))
4804
0
        {
4805
0
            FillFieldFixedSizeListString<uint32_t>(array, iOGRFieldIndex,
4806
0
                                                   nOffsettedIndex, nItems,
4807
0
                                                   childArray, oFeature);
4808
0
        }
4809
0
        else if (IsLargeString(childFormat))
4810
0
        {
4811
0
            FillFieldFixedSizeListString<uint64_t>(array, iOGRFieldIndex,
4812
0
                                                   nOffsettedIndex, nItems,
4813
0
                                                   childArray, oFeature);
4814
0
        }
4815
0
    }
4816
0
    else if (IsList(format) || IsLargeList(format))
4817
0
    {
4818
0
        const auto childArray = array->children[0];
4819
0
        const char *childFormat = schema->children[0]->format;
4820
0
        if (IsBoolean(childFormat))
4821
0
        {
4822
0
            if (format[1] == ARROW_2ND_LETTER_LIST)
4823
0
                FillFieldListFromBool<uint32_t>(array, iOGRFieldIndex,
4824
0
                                                nOffsettedIndex, childArray,
4825
0
                                                oFeature);
4826
0
            else
4827
0
                FillFieldListFromBool<uint64_t>(array, iOGRFieldIndex,
4828
0
                                                nOffsettedIndex, childArray,
4829
0
                                                oFeature);
4830
0
        }
4831
0
        else if (IsInt8(childFormat))
4832
0
        {
4833
0
            if (format[1] == ARROW_2ND_LETTER_LIST)
4834
0
                FillFieldList<uint32_t, int8_t, int>(array, iOGRFieldIndex,
4835
0
                                                     nOffsettedIndex,
4836
0
                                                     childArray, oFeature);
4837
0
            else
4838
0
                FillFieldList<uint64_t, int8_t, int>(array, iOGRFieldIndex,
4839
0
                                                     nOffsettedIndex,
4840
0
                                                     childArray, oFeature);
4841
0
        }
4842
0
        else if (IsUInt8(childFormat))
4843
0
        {
4844
0
            if (format[1] == ARROW_2ND_LETTER_LIST)
4845
0
                FillFieldList<uint32_t, uint8_t, int>(array, iOGRFieldIndex,
4846
0
                                                      nOffsettedIndex,
4847
0
                                                      childArray, oFeature);
4848
0
            else
4849
0
                FillFieldList<uint64_t, uint8_t, int>(array, iOGRFieldIndex,
4850
0
                                                      nOffsettedIndex,
4851
0
                                                      childArray, oFeature);
4852
0
        }
4853
0
        else if (IsInt16(childFormat))
4854
0
        {
4855
0
            if (format[1] == ARROW_2ND_LETTER_LIST)
4856
0
                FillFieldList<uint32_t, int16_t, int>(array, iOGRFieldIndex,
4857
0
                                                      nOffsettedIndex,
4858
0
                                                      childArray, oFeature);
4859
0
            else
4860
0
                FillFieldList<uint64_t, int16_t, int>(array, iOGRFieldIndex,
4861
0
                                                      nOffsettedIndex,
4862
0
                                                      childArray, oFeature);
4863
0
        }
4864
0
        else if (IsUInt16(childFormat))
4865
0
        {
4866
0
            if (format[1] == ARROW_2ND_LETTER_LIST)
4867
0
                FillFieldList<uint32_t, uint16_t, int>(array, iOGRFieldIndex,
4868
0
                                                       nOffsettedIndex,
4869
0
                                                       childArray, oFeature);
4870
0
            else
4871
0
                FillFieldList<uint64_t, uint16_t, int>(array, iOGRFieldIndex,
4872
0
                                                       nOffsettedIndex,
4873
0
                                                       childArray, oFeature);
4874
0
        }
4875
0
        else if (IsInt32(childFormat))
4876
0
        {
4877
0
            if (format[1] == ARROW_2ND_LETTER_LIST)
4878
0
                FillFieldList<uint32_t, int32_t, int>(array, iOGRFieldIndex,
4879
0
                                                      nOffsettedIndex,
4880
0
                                                      childArray, oFeature);
4881
0
            else
4882
0
                FillFieldList<uint64_t, int32_t, int>(array, iOGRFieldIndex,
4883
0
                                                      nOffsettedIndex,
4884
0
                                                      childArray, oFeature);
4885
0
        }
4886
0
        else if (IsUInt32(childFormat))
4887
0
        {
4888
0
            if (format[1] == ARROW_2ND_LETTER_LIST)
4889
0
                FillFieldList<uint32_t, uint32_t, GIntBig>(
4890
0
                    array, iOGRFieldIndex, nOffsettedIndex, childArray,
4891
0
                    oFeature);
4892
0
            else
4893
0
                FillFieldList<uint64_t, uint32_t, GIntBig>(
4894
0
                    array, iOGRFieldIndex, nOffsettedIndex, childArray,
4895
0
                    oFeature);
4896
0
        }
4897
0
        else if (IsInt64(childFormat))
4898
0
        {
4899
0
            if (format[1] == ARROW_2ND_LETTER_LIST)
4900
0
                FillFieldList<uint32_t, int64_t, GIntBig>(array, iOGRFieldIndex,
4901
0
                                                          nOffsettedIndex,
4902
0
                                                          childArray, oFeature);
4903
0
            else
4904
0
                FillFieldList<uint64_t, int64_t, GIntBig>(array, iOGRFieldIndex,
4905
0
                                                          nOffsettedIndex,
4906
0
                                                          childArray, oFeature);
4907
0
        }
4908
0
        else if (IsUInt64(childFormat))  // (lossy conversion)
4909
0
        {
4910
0
            if (format[1] == ARROW_2ND_LETTER_LIST)
4911
0
                FillFieldList<uint32_t, uint64_t, double>(array, iOGRFieldIndex,
4912
0
                                                          nOffsettedIndex,
4913
0
                                                          childArray, oFeature);
4914
0
            else
4915
0
                FillFieldList<uint64_t, uint64_t, double>(array, iOGRFieldIndex,
4916
0
                                                          nOffsettedIndex,
4917
0
                                                          childArray, oFeature);
4918
0
        }
4919
0
        else if (IsFloat16(childFormat))
4920
0
        {
4921
0
            if (format[1] == ARROW_2ND_LETTER_LIST)
4922
0
                FillFieldListFromHalfFloat<uint32_t>(array, iOGRFieldIndex,
4923
0
                                                     nOffsettedIndex,
4924
0
                                                     childArray, oFeature);
4925
0
            else
4926
0
                FillFieldListFromHalfFloat<uint64_t>(array, iOGRFieldIndex,
4927
0
                                                     nOffsettedIndex,
4928
0
                                                     childArray, oFeature);
4929
0
        }
4930
0
        else if (IsFloat32(childFormat))
4931
0
        {
4932
0
            if (format[1] == ARROW_2ND_LETTER_LIST)
4933
0
                FillFieldList<uint32_t, float, double>(array, iOGRFieldIndex,
4934
0
                                                       nOffsettedIndex,
4935
0
                                                       childArray, oFeature);
4936
0
            else
4937
0
                FillFieldList<uint64_t, float, double>(array, iOGRFieldIndex,
4938
0
                                                       nOffsettedIndex,
4939
0
                                                       childArray, oFeature);
4940
0
        }
4941
0
        else if (IsFloat64(childFormat))
4942
0
        {
4943
0
            if (format[1] == ARROW_2ND_LETTER_LIST)
4944
0
                FillFieldList<uint32_t, double, double>(array, iOGRFieldIndex,
4945
0
                                                        nOffsettedIndex,
4946
0
                                                        childArray, oFeature);
4947
0
            else
4948
0
                FillFieldList<uint64_t, double, double>(array, iOGRFieldIndex,
4949
0
                                                        nOffsettedIndex,
4950
0
                                                        childArray, oFeature);
4951
0
        }
4952
0
        else if (IsString(childFormat))
4953
0
        {
4954
0
            if (format[1] == ARROW_2ND_LETTER_LIST)
4955
0
                FillFieldListFromString<uint32_t, uint32_t>(
4956
0
                    array, iOGRFieldIndex, nOffsettedIndex, childArray,
4957
0
                    oFeature);
4958
0
            else
4959
0
                FillFieldListFromString<uint64_t, uint32_t>(
4960
0
                    array, iOGRFieldIndex, nOffsettedIndex, childArray,
4961
0
                    oFeature);
4962
0
        }
4963
0
        else if (IsLargeString(childFormat))
4964
0
        {
4965
0
            if (format[1] == ARROW_2ND_LETTER_LIST)
4966
0
                FillFieldListFromString<uint32_t, uint64_t>(
4967
0
                    array, iOGRFieldIndex, nOffsettedIndex, childArray,
4968
0
                    oFeature);
4969
0
            else
4970
0
                FillFieldListFromString<uint64_t, uint64_t>(
4971
0
                    array, iOGRFieldIndex, nOffsettedIndex, childArray,
4972
0
                    oFeature);
4973
0
        }
4974
0
        else if (format[1] == ARROW_2ND_LETTER_LIST)
4975
0
        {
4976
0
            const size_t iFeature =
4977
0
                static_cast<size_t>(nOffsettedIndex - array->offset);
4978
0
            oFeature.SetField(iOGRFieldIndex,
4979
0
                              GetListAsJSON<uint32_t>(schema, array, iFeature)
4980
0
                                  .Format(CPLJSONObject::PrettyFormat::Plain)
4981
0
                                  .c_str());
4982
0
        }
4983
0
        else
4984
0
        {
4985
0
            const size_t iFeature =
4986
0
                static_cast<size_t>(nOffsettedIndex - array->offset);
4987
0
            oFeature.SetField(iOGRFieldIndex,
4988
0
                              GetListAsJSON<uint64_t>(schema, array, iFeature)
4989
0
                                  .Format(CPLJSONObject::PrettyFormat::Plain)
4990
0
                                  .c_str());
4991
0
        }
4992
0
    }
4993
0
    else if (IsDecimal(format))
4994
0
    {
4995
0
        int nPrecision = 0;
4996
0
        int nScale = 0;
4997
0
        int nWidthInBytes = 0;
4998
0
        if (!ParseDecimalFormat(format, nPrecision, nScale, nWidthInBytes))
4999
0
        {
5000
0
            CPLAssert(false);
5001
0
        }
5002
5003
        // fits on a int64
5004
0
        CPLAssert(nPrecision <= 19);
5005
        // either 128 or 256 bits
5006
0
        CPLAssert((nWidthInBytes % 8) == 0);
5007
0
        const int nWidthIn64BitWord = nWidthInBytes / 8;
5008
0
        const size_t iFeature =
5009
0
            static_cast<size_t>(nOffsettedIndex - array->offset);
5010
0
        oFeature.SetField(
5011
0
            iOGRFieldIndex,
5012
0
            GetValueDecimal(array, nWidthIn64BitWord, nScale, iFeature));
5013
0
        return true;
5014
0
    }
5015
0
    else if (IsMap(format))
5016
0
    {
5017
0
        const size_t iFeature =
5018
0
            static_cast<size_t>(nOffsettedIndex - array->offset);
5019
0
        oFeature.SetField(iOGRFieldIndex,
5020
0
                          GetMapAsJSON(schema, array, iFeature)
5021
0
                              .Format(CPLJSONObject::PrettyFormat::Plain)
5022
0
                              .c_str());
5023
0
    }
5024
0
    else
5025
0
    {
5026
0
        return false;
5027
0
    }
5028
5029
0
    return true;
5030
0
}
5031
5032
/************************************************************************/
5033
/*                   FillValidityArrayFromAttrQuery()                   */
5034
/************************************************************************/
5035
5036
static size_t FillValidityArrayFromAttrQuery(
5037
    const OGRLayer *poLayer, OGRFeatureQuery *poAttrQuery,
5038
    const struct ArrowSchema *schema, struct ArrowArray *array,
5039
    std::vector<bool> &abyValidityFromFilters, CSLConstList papszOptions)
5040
0
{
5041
0
    size_t nCountIntersecting = 0;
5042
0
    auto poFeatureDefn = const_cast<OGRLayer *>(poLayer)->GetLayerDefn();
5043
0
    OGRFeature oFeature(poFeatureDefn);
5044
5045
0
    std::map<std::string, std::vector<int>> oMapFieldNameToArrowPath;
5046
0
    std::vector<int> anArrowPathTmp;
5047
0
    BuildMapFieldNameToArrowPath(schema, oMapFieldNameToArrowPath,
5048
0
                                 std::string(), anArrowPathTmp);
5049
5050
0
    struct UsedFieldsInfo
5051
0
    {
5052
0
        int iOGRFieldIndex{};
5053
0
        std::vector<int> anArrowPath{};
5054
0
    };
5055
5056
0
    std::vector<UsedFieldsInfo> aoUsedFieldsInfo;
5057
5058
0
    bool bNeedsFID = false;
5059
0
    const CPLStringList aosUsedFields(poAttrQuery->GetUsedFields());
5060
0
    for (int i = 0; i < aosUsedFields.size(); ++i)
5061
0
    {
5062
0
        int iOGRFieldIndex = poFeatureDefn->GetFieldIndex(aosUsedFields[i]);
5063
0
        if (iOGRFieldIndex >= 0)
5064
0
        {
5065
0
            const auto oIter = oMapFieldNameToArrowPath.find(aosUsedFields[i]);
5066
0
            if (oIter != oMapFieldNameToArrowPath.end())
5067
0
            {
5068
0
                UsedFieldsInfo info;
5069
0
                info.iOGRFieldIndex = iOGRFieldIndex;
5070
0
                info.anArrowPath = oIter->second;
5071
0
                aoUsedFieldsInfo.push_back(std::move(info));
5072
0
            }
5073
0
            else
5074
0
            {
5075
0
                CPLError(CE_Failure, CPLE_AppDefined,
5076
0
                         "Cannot find %s in oMapFieldNameToArrowPath",
5077
0
                         aosUsedFields[i]);
5078
0
            }
5079
0
        }
5080
0
        else if (EQUAL(aosUsedFields[i], "FID"))
5081
0
        {
5082
0
            bNeedsFID = true;
5083
0
        }
5084
0
        else
5085
0
        {
5086
0
            CPLDebug("OGR", "Cannot find used field %s", aosUsedFields[i]);
5087
0
        }
5088
0
    }
5089
5090
0
    const size_t nLength = abyValidityFromFilters.size();
5091
5092
0
    GIntBig nBaseSeqFID = -1;
5093
0
    std::vector<int> anArrowPathToFIDColumn;
5094
0
    if (bNeedsFID)
5095
0
    {
5096
        // BASE_SEQUENTIAL_FID is set when there is no Arrow column for the FID
5097
        // and we assume sequential FID numbering
5098
0
        const char *pszBaseSeqFID =
5099
0
            CSLFetchNameValue(papszOptions, "BASE_SEQUENTIAL_FID");
5100
0
        if (pszBaseSeqFID)
5101
0
        {
5102
0
            nBaseSeqFID = CPLAtoGIntBig(pszBaseSeqFID);
5103
5104
            // Optimizimation for "FID = constant"
5105
0
            swq_expr_node *poNode =
5106
0
                static_cast<swq_expr_node *>(poAttrQuery->GetSWQExpr());
5107
0
            if (poNode->eNodeType == SNT_OPERATION &&
5108
0
                poNode->nOperation == SWQ_EQ && poNode->nSubExprCount == 2 &&
5109
0
                poNode->papoSubExpr[0]->eNodeType == SNT_COLUMN &&
5110
0
                poNode->papoSubExpr[1]->eNodeType == SNT_CONSTANT &&
5111
0
                poNode->papoSubExpr[0]->field_index ==
5112
0
                    poFeatureDefn->GetFieldCount() + SPF_FID &&
5113
0
                poNode->papoSubExpr[1]->field_type == SWQ_INTEGER64)
5114
0
            {
5115
0
                if (nBaseSeqFID + static_cast<int64_t>(nLength) <
5116
0
                        poNode->papoSubExpr[1]->int_value ||
5117
0
                    nBaseSeqFID > poNode->papoSubExpr[1]->int_value)
5118
0
                {
5119
0
                    return 0;
5120
0
                }
5121
0
            }
5122
0
        }
5123
0
        else
5124
0
        {
5125
0
            const char *pszFIDColumn =
5126
0
                const_cast<OGRLayer *>(poLayer)->GetFIDColumn();
5127
0
            if (pszFIDColumn && pszFIDColumn[0])
5128
0
            {
5129
0
                const auto oIter = oMapFieldNameToArrowPath.find(pszFIDColumn);
5130
0
                if (oIter != oMapFieldNameToArrowPath.end())
5131
0
                {
5132
0
                    anArrowPathToFIDColumn = oIter->second;
5133
0
                }
5134
0
            }
5135
0
            if (anArrowPathToFIDColumn.empty())
5136
0
            {
5137
0
                CPLError(CE_Failure, CPLE_AppDefined,
5138
0
                         "Filtering on FID requested but cannot associate a "
5139
0
                         "FID with Arrow records");
5140
0
            }
5141
0
        }
5142
0
    }
5143
5144
0
    for (size_t iRow = 0; iRow < nLength; ++iRow)
5145
0
    {
5146
0
        if (!abyValidityFromFilters[iRow])
5147
0
            continue;
5148
5149
0
        if (bNeedsFID)
5150
0
        {
5151
0
            if (nBaseSeqFID >= 0)
5152
0
            {
5153
0
                oFeature.SetFID(nBaseSeqFID + iRow);
5154
0
            }
5155
0
            else if (!anArrowPathToFIDColumn.empty())
5156
0
            {
5157
0
                oFeature.SetFID(OGRNullFID);
5158
5159
0
                const struct ArrowSchema *psSchemaField = schema;
5160
0
                const struct ArrowArray *psArray = array;
5161
0
                bool bSkip = false;
5162
0
                for (size_t i = 0; i < anArrowPathToFIDColumn.size(); ++i)
5163
0
                {
5164
0
                    const int iChild = anArrowPathToFIDColumn[i];
5165
0
                    if (i > 0)
5166
0
                    {
5167
0
                        const uint8_t *pabyValidity =
5168
0
                            psArray->null_count == 0
5169
0
                                ? nullptr
5170
0
                                : static_cast<uint8_t *>(
5171
0
                                      const_cast<void *>(psArray->buffers[0]));
5172
0
                        const size_t nOffsettedIndex =
5173
0
                            static_cast<size_t>(iRow + psArray->offset);
5174
0
                        if (pabyValidity &&
5175
0
                            !TestBit(pabyValidity, nOffsettedIndex))
5176
0
                        {
5177
0
                            bSkip = true;
5178
0
                            break;
5179
0
                        }
5180
0
                    }
5181
5182
0
                    psSchemaField = psSchemaField->children[iChild];
5183
0
                    psArray = psArray->children[iChild];
5184
0
                }
5185
0
                if (bSkip)
5186
0
                    continue;
5187
5188
0
                const char *format = psSchemaField->format;
5189
0
                const uint8_t *pabyValidity =
5190
0
                    psArray->null_count == 0
5191
0
                        ? nullptr
5192
0
                        : static_cast<uint8_t *>(
5193
0
                              const_cast<void *>(psArray->buffers[0]));
5194
0
                const size_t nOffsettedIndex =
5195
0
                    static_cast<size_t>(iRow + psArray->offset);
5196
0
                if (pabyValidity && !TestBit(pabyValidity, nOffsettedIndex))
5197
0
                {
5198
                    // do nothing
5199
0
                }
5200
0
                else if (IsInt32(format))
5201
0
                {
5202
0
                    oFeature.SetFID(static_cast<const int32_t *>(
5203
0
                        psArray->buffers[1])[nOffsettedIndex]);
5204
0
                }
5205
0
                else if (IsInt64(format))
5206
0
                {
5207
0
                    oFeature.SetFID(static_cast<const int64_t *>(
5208
0
                        psArray->buffers[1])[nOffsettedIndex]);
5209
0
                }
5210
0
            }
5211
0
        }
5212
5213
0
        for (const auto &sInfo : aoUsedFieldsInfo)
5214
0
        {
5215
0
            const int iOGRFieldIndex = sInfo.iOGRFieldIndex;
5216
0
            const struct ArrowSchema *psSchemaField = schema;
5217
0
            const struct ArrowArray *psArray = array;
5218
0
            bool bSkip = false;
5219
0
            for (size_t i = 0; i < sInfo.anArrowPath.size(); ++i)
5220
0
            {
5221
0
                const int iChild = sInfo.anArrowPath[i];
5222
0
                if (i > 0)
5223
0
                {
5224
0
                    const uint8_t *pabyValidity =
5225
0
                        psArray->null_count == 0
5226
0
                            ? nullptr
5227
0
                            : static_cast<uint8_t *>(
5228
0
                                  const_cast<void *>(psArray->buffers[0]));
5229
0
                    const size_t nOffsettedIndex =
5230
0
                        static_cast<size_t>(iRow + psArray->offset);
5231
0
                    if (pabyValidity && !TestBit(pabyValidity, nOffsettedIndex))
5232
0
                    {
5233
0
                        bSkip = true;
5234
0
                        oFeature.SetFieldNull(iOGRFieldIndex);
5235
0
                        break;
5236
0
                    }
5237
0
                }
5238
5239
0
                psSchemaField = psSchemaField->children[iChild];
5240
0
                psArray = psArray->children[iChild];
5241
0
            }
5242
0
            if (bSkip)
5243
0
                continue;
5244
5245
0
            const char *format = psSchemaField->format;
5246
0
            const uint8_t *pabyValidity =
5247
0
                psArray->null_count == 0
5248
0
                    ? nullptr
5249
0
                    : static_cast<uint8_t *>(
5250
0
                          const_cast<void *>(psArray->buffers[0]));
5251
0
            const size_t nOffsettedIndex =
5252
0
                static_cast<size_t>(iRow + psArray->offset);
5253
0
            if (pabyValidity && !TestBit(pabyValidity, nOffsettedIndex))
5254
0
            {
5255
0
                oFeature.SetFieldNull(iOGRFieldIndex);
5256
0
            }
5257
0
            else if (IsBoolean(format))
5258
0
            {
5259
0
                oFeature.SetField(
5260
0
                    iOGRFieldIndex,
5261
0
                    TestBit(static_cast<const uint8_t *>(psArray->buffers[1]),
5262
0
                            nOffsettedIndex));
5263
0
            }
5264
0
            else if (IsInt8(format))
5265
0
            {
5266
0
                oFeature.SetField(iOGRFieldIndex,
5267
0
                                  static_cast<const int8_t *>(
5268
0
                                      psArray->buffers[1])[nOffsettedIndex]);
5269
0
            }
5270
0
            else if (IsUInt8(format))
5271
0
            {
5272
0
                oFeature.SetField(iOGRFieldIndex,
5273
0
                                  static_cast<const uint8_t *>(
5274
0
                                      psArray->buffers[1])[nOffsettedIndex]);
5275
0
            }
5276
0
            else if (IsInt16(format))
5277
0
            {
5278
0
                oFeature.SetField(iOGRFieldIndex,
5279
0
                                  static_cast<const int16_t *>(
5280
0
                                      psArray->buffers[1])[nOffsettedIndex]);
5281
0
            }
5282
0
            else if (IsUInt16(format))
5283
0
            {
5284
0
                oFeature.SetField(iOGRFieldIndex,
5285
0
                                  static_cast<const uint16_t *>(
5286
0
                                      psArray->buffers[1])[nOffsettedIndex]);
5287
0
            }
5288
0
            else if (IsInt32(format))
5289
0
            {
5290
0
                oFeature.SetField(iOGRFieldIndex,
5291
0
                                  static_cast<const int32_t *>(
5292
0
                                      psArray->buffers[1])[nOffsettedIndex]);
5293
0
            }
5294
0
            else if (IsUInt32(format))
5295
0
            {
5296
0
                oFeature.SetField(
5297
0
                    iOGRFieldIndex,
5298
0
                    static_cast<GIntBig>(static_cast<const uint32_t *>(
5299
0
                        psArray->buffers[1])[nOffsettedIndex]));
5300
0
            }
5301
0
            else if (IsInt64(format))
5302
0
            {
5303
0
                oFeature.SetField(
5304
0
                    iOGRFieldIndex,
5305
0
                    static_cast<GIntBig>(static_cast<const int64_t *>(
5306
0
                        psArray->buffers[1])[nOffsettedIndex]));
5307
0
            }
5308
0
            else if (IsUInt64(format))
5309
0
            {
5310
0
                oFeature.SetField(
5311
0
                    iOGRFieldIndex,
5312
0
                    static_cast<double>(static_cast<const uint64_t *>(
5313
0
                        psArray->buffers[1])[nOffsettedIndex]));
5314
0
            }
5315
0
            else if (IsFloat32(format))
5316
0
            {
5317
0
                oFeature.SetField(
5318
0
                    iOGRFieldIndex,
5319
0
                    static_cast<double>(static_cast<const float *>(
5320
0
                        psArray->buffers[1])[nOffsettedIndex]));
5321
0
            }
5322
0
            else if (IsFloat64(format))
5323
0
            {
5324
0
                oFeature.SetField(iOGRFieldIndex,
5325
0
                                  static_cast<const double *>(
5326
0
                                      psArray->buffers[1])[nOffsettedIndex]);
5327
0
            }
5328
0
            else if (IsString(format))
5329
0
            {
5330
0
                const auto nOffset = static_cast<const uint32_t *>(
5331
0
                    psArray->buffers[1])[nOffsettedIndex];
5332
0
                const auto nNextOffset = static_cast<const uint32_t *>(
5333
0
                    psArray->buffers[1])[nOffsettedIndex + 1];
5334
0
                const GByte *pabyData =
5335
0
                    static_cast<const GByte *>(psArray->buffers[2]);
5336
0
                const uint32_t nSize = nNextOffset - nOffset;
5337
0
                CPLAssert(oFeature.GetFieldDefnRef(iOGRFieldIndex)->GetType() ==
5338
0
                          OFTString);
5339
0
                char *pszStr = static_cast<char *>(CPLMalloc(nSize + 1));
5340
0
                memcpy(pszStr, pabyData + nOffset, nSize);
5341
0
                pszStr[nSize] = 0;
5342
0
                OGRField *psField = oFeature.GetRawFieldRef(iOGRFieldIndex);
5343
0
                if (IsValidField(psField))
5344
0
                    CPLFree(psField->String);
5345
0
                psField->String = pszStr;
5346
0
            }
5347
0
            else if (IsLargeString(format))
5348
0
            {
5349
0
                const auto nOffset = static_cast<const uint64_t *>(
5350
0
                    psArray->buffers[1])[nOffsettedIndex];
5351
0
                const auto nNextOffset = static_cast<const uint64_t *>(
5352
0
                    psArray->buffers[1])[nOffsettedIndex + 1];
5353
0
                const GByte *pabyData =
5354
0
                    static_cast<const GByte *>(psArray->buffers[2]);
5355
0
                const size_t nSize = static_cast<size_t>(nNextOffset - nOffset);
5356
0
                char *pszStr = static_cast<char *>(CPLMalloc(nSize + 1));
5357
0
                memcpy(pszStr, pabyData + static_cast<size_t>(nOffset), nSize);
5358
0
                pszStr[nSize] = 0;
5359
0
                OGRField *psField = oFeature.GetRawFieldRef(iOGRFieldIndex);
5360
0
                if (IsValidField(psField))
5361
0
                    CPLFree(psField->String);
5362
0
                psField->String = pszStr;
5363
0
            }
5364
0
            else if (IsBinary(format))
5365
0
            {
5366
0
                const auto nOffset = static_cast<const uint32_t *>(
5367
0
                    psArray->buffers[1])[nOffsettedIndex];
5368
0
                const auto nNextOffset = static_cast<const uint32_t *>(
5369
0
                    psArray->buffers[1])[nOffsettedIndex + 1];
5370
0
                const GByte *pabyData =
5371
0
                    static_cast<const GByte *>(psArray->buffers[2]);
5372
0
                const uint32_t nSize = nNextOffset - nOffset;
5373
0
                if (nSize >
5374
0
                    static_cast<size_t>(std::numeric_limits<int32_t>::max()))
5375
0
                {
5376
0
                    abyValidityFromFilters.clear();
5377
0
                    abyValidityFromFilters.resize(nLength);
5378
0
                    CPLError(CE_Failure, CPLE_AppDefined,
5379
0
                             "Unexpected error in PostFilterArrowArray(): too "
5380
0
                             "large binary");
5381
0
                    return 0;
5382
0
                }
5383
0
                oFeature.SetField(iOGRFieldIndex, static_cast<int>(nSize),
5384
0
                                  pabyData + nOffset);
5385
0
            }
5386
0
            else if (IsLargeBinary(format))
5387
0
            {
5388
0
                const auto nOffset = static_cast<const uint64_t *>(
5389
0
                    psArray->buffers[1])[nOffsettedIndex];
5390
0
                const auto nNextOffset = static_cast<const uint64_t *>(
5391
0
                    psArray->buffers[1])[nOffsettedIndex + 1];
5392
0
                const GByte *pabyData =
5393
0
                    static_cast<const GByte *>(psArray->buffers[2]);
5394
0
                const uint64_t nSize = nNextOffset - nOffset;
5395
0
                if (nSize >
5396
0
                    static_cast<uint64_t>(std::numeric_limits<int32_t>::max()))
5397
0
                {
5398
0
                    abyValidityFromFilters.clear();
5399
0
                    abyValidityFromFilters.resize(nLength);
5400
0
                    CPLError(CE_Failure, CPLE_AppDefined,
5401
0
                             "Unexpected error in PostFilterArrowArray(): too "
5402
0
                             "large binary");
5403
0
                    return 0;
5404
0
                }
5405
0
                oFeature.SetField(iOGRFieldIndex, static_cast<int>(nSize),
5406
0
                                  pabyData + nOffset);
5407
0
            }
5408
0
            else if (!SetFieldForOtherFormats(oFeature, iOGRFieldIndex,
5409
0
                                              nOffsettedIndex, psSchemaField,
5410
0
                                              psArray))
5411
0
            {
5412
0
                abyValidityFromFilters.clear();
5413
0
                abyValidityFromFilters.resize(nLength);
5414
0
                CPLError(
5415
0
                    CE_Failure, CPLE_AppDefined,
5416
0
                    "Unexpected error in PostFilterArrowArray(): unhandled "
5417
0
                    "field format: %s",
5418
0
                    format);
5419
0
                return 0;
5420
0
            }
5421
0
        }
5422
0
        if (poAttrQuery->Evaluate(&oFeature))
5423
0
        {
5424
0
            nCountIntersecting++;
5425
0
        }
5426
0
        else
5427
0
        {
5428
0
            abyValidityFromFilters[iRow] = false;
5429
0
        }
5430
0
    }
5431
0
    return nCountIntersecting;
5432
0
}
5433
5434
/************************************************************************/
5435
/*                   OGRLayer::PostFilterArrowArray()                   */
5436
/************************************************************************/
5437
5438
/** Remove rows that aren't selected by the spatial or attribute filter.
5439
 *
5440
 * Assumes that CanPostFilterArrowArray() has been called and returned true.
5441
 */
5442
void OGRLayer::PostFilterArrowArray(const struct ArrowSchema *schema,
5443
                                    struct ArrowArray *array,
5444
                                    CSLConstList papszOptions) const
5445
0
{
5446
0
    if (!m_poFilterGeom && !m_poAttrQuery)
5447
0
        return;
5448
5449
0
    CPLAssert(schema->n_children == array->n_children);
5450
5451
0
    int64_t iGeomField = -1;
5452
0
    if (m_poFilterGeom)
5453
0
    {
5454
0
        const char *pszGeomFieldName =
5455
0
            const_cast<OGRLayer *>(this)
5456
0
                ->GetLayerDefn()
5457
0
                ->GetGeomFieldDefn(m_iGeomFieldFilter)
5458
0
                ->GetNameRef();
5459
0
        for (int64_t iField = 0; iField < schema->n_children; ++iField)
5460
0
        {
5461
0
            const auto fieldSchema = schema->children[iField];
5462
0
            if (strcmp(fieldSchema->name, pszGeomFieldName) == 0)
5463
0
            {
5464
0
                iGeomField = iField;
5465
0
                break;
5466
0
            }
5467
0
            CPLAssert(array->children[iField]->length ==
5468
0
                      array->children[0]->length);
5469
0
        }
5470
        // Guaranteed if CanPostFilterArrowArray() returned true
5471
0
        CPLAssert(iGeomField >= 0);
5472
0
        CPLAssert(IsBinary(schema->children[iGeomField]->format) ||
5473
0
                  IsLargeBinary(schema->children[iGeomField]->format));
5474
0
        CPLAssert(array->children[iGeomField]->n_buffers == 3);
5475
0
    }
5476
5477
0
    std::vector<bool> abyValidityFromFilters;
5478
0
    const size_t nLength = static_cast<size_t>(array->length);
5479
0
    const size_t nCountIntersectingGeom =
5480
0
        m_poFilterGeom ? (IsBinary(schema->children[iGeomField]->format)
5481
0
                              ? FillValidityArrayFromWKBArray<uint32_t>(
5482
0
                                    array->children[iGeomField], this,
5483
0
                                    abyValidityFromFilters)
5484
0
                              : FillValidityArrayFromWKBArray<uint64_t>(
5485
0
                                    array->children[iGeomField], this,
5486
0
                                    abyValidityFromFilters))
5487
0
                       : nLength;
5488
0
    if (!m_poFilterGeom)
5489
0
        abyValidityFromFilters.resize(nLength, true);
5490
0
    const size_t nCountIntersecting =
5491
0
        m_poAttrQuery && nCountIntersectingGeom > 0
5492
0
            ? FillValidityArrayFromAttrQuery(this, m_poAttrQuery, schema, array,
5493
0
                                             abyValidityFromFilters,
5494
0
                                             papszOptions)
5495
0
        : m_poFilterGeom ? nCountIntersectingGeom
5496
0
                         : nLength;
5497
    // Nothing to do ?
5498
0
    if (nCountIntersecting == nLength)
5499
0
    {
5500
        // CPLDebug("OGR", "All rows match filter");
5501
0
        return;
5502
0
    }
5503
5504
0
    if (nCountIntersecting == 0)
5505
0
    {
5506
0
        array->length = 0;
5507
0
    }
5508
0
    else if (!CompactStructArray(schema, array, 0, abyValidityFromFilters,
5509
0
                                 nCountIntersecting))
5510
0
    {
5511
0
        array->release(array);
5512
0
        memset(array, 0, sizeof(*array));
5513
0
    }
5514
0
}
5515
5516
/************************************************************************/
5517
/*                          OGRCloneArrowArray                          */
5518
/************************************************************************/
5519
5520
static bool OGRCloneArrowArray(const struct ArrowSchema *schema,
5521
                               const struct ArrowArray *src_array,
5522
                               struct ArrowArray *out_array,
5523
                               size_t nParentOffset)
5524
0
{
5525
0
    memset(out_array, 0, sizeof(*out_array));
5526
0
    const size_t nLength =
5527
0
        static_cast<size_t>(src_array->length) - nParentOffset;
5528
0
    out_array->length = nLength;
5529
0
    out_array->null_count = src_array->null_count;
5530
0
    out_array->release = OGRLayerDefaultReleaseArray;
5531
5532
0
    bool bRet = true;
5533
5534
0
    out_array->n_buffers = src_array->n_buffers;
5535
0
    out_array->buffers = static_cast<const void **>(CPLCalloc(
5536
0
        static_cast<size_t>(src_array->n_buffers), sizeof(const void *)));
5537
0
    CPLAssert(static_cast<size_t>(src_array->length) >= nParentOffset);
5538
0
    const char *format = schema->format;
5539
0
    const auto nOffset = static_cast<size_t>(src_array->offset) + nParentOffset;
5540
0
    for (int64_t i = 0; i < src_array->n_buffers; ++i)
5541
0
    {
5542
0
        if (i == 0 || IsBoolean(format))
5543
0
        {
5544
0
            if (i == 1)
5545
0
            {
5546
0
                CPLAssert(src_array->buffers[i]);
5547
0
            }
5548
0
            if (src_array->buffers[i])
5549
0
            {
5550
0
                const size_t nBytes = nLength ? (nLength + 7) / 8 : 1;
5551
0
                uint8_t *CPL_RESTRICT p = static_cast<uint8_t *>(
5552
0
                    VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nBytes));
5553
0
                if (!p)
5554
0
                {
5555
0
                    bRet = false;
5556
0
                    break;
5557
0
                }
5558
0
                const auto *CPL_RESTRICT pSrcArray =
5559
0
                    static_cast<const uint8_t *>(src_array->buffers[i]);
5560
0
                if ((nOffset % 8) != 0)
5561
0
                {
5562
                    // Make sure last byte is fully initialized
5563
0
                    p[nBytes - 1] = 0;
5564
0
                    for (size_t iRow = 0; iRow < nLength; ++iRow)
5565
0
                    {
5566
0
                        if (TestBit(pSrcArray, nOffset + iRow))
5567
0
                            SetBit(p, iRow);
5568
0
                        else
5569
0
                            UnsetBit(p, iRow);
5570
0
                    }
5571
0
                }
5572
0
                else
5573
0
                {
5574
0
                    memcpy(p, pSrcArray + nOffset / 8, nBytes);
5575
0
                }
5576
0
                out_array->buffers[i] = p;
5577
0
            }
5578
0
        }
5579
0
        else if (i == 1)
5580
0
        {
5581
0
            CPLAssert(src_array->buffers[i]);
5582
0
            size_t nEltSize = 0;
5583
0
            size_t nExtraElt = 0;
5584
0
            if (IsUInt8(format) || IsInt8(format))
5585
0
                nEltSize = sizeof(uint8_t);
5586
0
            else if (IsUInt16(format) || IsInt16(format) || IsFloat16(format))
5587
0
                nEltSize = sizeof(uint16_t);
5588
0
            else if (IsUInt32(format) || IsInt32(format) || IsFloat32(format) ||
5589
0
                     strcmp(format, "tdD") == 0 || strcmp(format, "tts") == 0 ||
5590
0
                     strcmp(format, "ttm") == 0)
5591
0
            {
5592
0
                nEltSize = sizeof(uint32_t);
5593
0
            }
5594
0
            else if (IsString(format) || IsBinary(format) || IsList(format) ||
5595
0
                     IsMap(format))
5596
0
            {
5597
0
                nEltSize = sizeof(uint32_t);
5598
0
                nExtraElt = 1;
5599
0
            }
5600
0
            else if (IsUInt64(format) || IsInt64(format) || IsFloat64(format) ||
5601
0
                     strcmp(format, "tdm") == 0 || strcmp(format, "ttu") == 0 ||
5602
0
                     strcmp(format, "ttn") == 0 || IsTimestamp(format))
5603
0
            {
5604
0
                nEltSize = sizeof(uint64_t);
5605
0
            }
5606
0
            else if (IsLargeString(format) || IsLargeBinary(format) ||
5607
0
                     IsLargeList(format))
5608
0
            {
5609
0
                nEltSize = sizeof(uint64_t);
5610
0
                nExtraElt = 1;
5611
0
            }
5612
0
            else if (IsFixedWidthBinary(format))
5613
0
            {
5614
0
                nEltSize = GetFixedWithBinary(format);
5615
0
            }
5616
0
            else if (IsDecimal(format))
5617
0
            {
5618
0
                int nPrecision = 0;
5619
0
                int nScale = 0;
5620
0
                int nWidthInBytes = 0;
5621
0
                if (!ParseDecimalFormat(format, nPrecision, nScale,
5622
0
                                        nWidthInBytes))
5623
0
                {
5624
0
                    CPLError(
5625
0
                        CE_Failure, CPLE_AppDefined,
5626
0
                        "Unexpected error in OGRCloneArrowArray(): unhandled "
5627
0
                        "field format: %s",
5628
0
                        format);
5629
5630
0
                    return false;
5631
0
                }
5632
0
                nEltSize = nWidthInBytes;
5633
0
            }
5634
0
            if (nEltSize)
5635
0
            {
5636
0
                void *p = VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
5637
0
                    nLength ? nEltSize * (nLength + nExtraElt) : 1);
5638
0
                if (!p)
5639
0
                {
5640
0
                    bRet = false;
5641
0
                    break;
5642
0
                }
5643
0
                if (nLength)
5644
0
                {
5645
0
                    if ((IsString(format) || IsBinary(format)) &&
5646
0
                        static_cast<const uint32_t *>(
5647
0
                            src_array->buffers[1])[nOffset] != 0)
5648
0
                    {
5649
0
                        const auto *CPL_RESTRICT pSrcOffsets =
5650
0
                            static_cast<const uint32_t *>(
5651
0
                                src_array->buffers[1]) +
5652
0
                            nOffset;
5653
0
                        const auto nShiftOffset = pSrcOffsets[0];
5654
0
                        auto *CPL_RESTRICT pDstOffsets =
5655
0
                            static_cast<uint32_t *>(p);
5656
0
                        for (size_t iRow = 0; iRow <= nLength; ++iRow)
5657
0
                        {
5658
0
                            pDstOffsets[iRow] =
5659
0
                                pSrcOffsets[iRow] - nShiftOffset;
5660
0
                        }
5661
0
                    }
5662
0
                    else if ((IsLargeString(format) || IsLargeBinary(format)) &&
5663
0
                             static_cast<const uint64_t *>(
5664
0
                                 src_array->buffers[1])[nOffset] != 0)
5665
0
                    {
5666
0
                        const auto *CPL_RESTRICT pSrcOffsets =
5667
0
                            static_cast<const uint64_t *>(
5668
0
                                src_array->buffers[1]) +
5669
0
                            nOffset;
5670
0
                        const auto nShiftOffset = pSrcOffsets[0];
5671
0
                        auto *CPL_RESTRICT pDstOffsets =
5672
0
                            static_cast<uint64_t *>(p);
5673
0
                        for (size_t iRow = 0; iRow <= nLength; ++iRow)
5674
0
                        {
5675
0
                            pDstOffsets[iRow] =
5676
0
                                pSrcOffsets[iRow] - nShiftOffset;
5677
0
                        }
5678
0
                    }
5679
0
                    else
5680
0
                    {
5681
0
                        memcpy(
5682
0
                            p,
5683
0
                            static_cast<const GByte *>(src_array->buffers[i]) +
5684
0
                                nEltSize * nOffset,
5685
0
                            nEltSize * (nLength + nExtraElt));
5686
0
                    }
5687
0
                }
5688
0
                out_array->buffers[i] = p;
5689
0
            }
5690
0
            else
5691
0
            {
5692
0
                CPLError(CE_Failure, CPLE_AppDefined,
5693
0
                         "OGRCloneArrowArray(): unhandled case, array = %s, "
5694
0
                         "format = '%s', i = 1",
5695
0
                         schema->name, format);
5696
0
                bRet = false;
5697
0
                break;
5698
0
            }
5699
0
        }
5700
0
        else if (i == 2)
5701
0
        {
5702
0
            CPLAssert(src_array->buffers[i]);
5703
0
            size_t nSrcCharOffset = 0;
5704
0
            size_t nCharCount = 0;
5705
0
            if (IsString(format) || IsBinary(format))
5706
0
            {
5707
0
                const auto *pSrcOffsets =
5708
0
                    static_cast<const uint32_t *>(src_array->buffers[1]) +
5709
0
                    nOffset;
5710
0
                nSrcCharOffset = pSrcOffsets[0];
5711
0
                nCharCount = pSrcOffsets[nLength] - pSrcOffsets[0];
5712
0
            }
5713
0
            else if (IsLargeString(format) || IsLargeBinary(format))
5714
0
            {
5715
0
                const auto *pSrcOffsets =
5716
0
                    static_cast<const uint64_t *>(src_array->buffers[1]) +
5717
0
                    nOffset;
5718
0
                nSrcCharOffset = static_cast<size_t>(pSrcOffsets[0]);
5719
0
                nCharCount =
5720
0
                    static_cast<size_t>(pSrcOffsets[nLength] - pSrcOffsets[0]);
5721
0
            }
5722
0
            else
5723
0
            {
5724
0
                CPLError(CE_Failure, CPLE_AppDefined,
5725
0
                         "OGRCloneArrowArray(): unhandled case, array = %s, "
5726
0
                         "format = '%s', i = 2",
5727
0
                         schema->name, format);
5728
0
                bRet = false;
5729
0
                break;
5730
0
            }
5731
0
            void *p =
5732
0
                VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nCharCount ? nCharCount : 1);
5733
0
            if (!p)
5734
0
            {
5735
0
                bRet = false;
5736
0
                break;
5737
0
            }
5738
0
            if (nCharCount)
5739
0
            {
5740
0
                memcpy(p,
5741
0
                       static_cast<const GByte *>(src_array->buffers[i]) +
5742
0
                           nSrcCharOffset,
5743
0
                       nCharCount);
5744
0
            }
5745
0
            out_array->buffers[i] = p;
5746
0
        }
5747
0
        else
5748
0
        {
5749
0
            CPLError(CE_Failure, CPLE_AppDefined,
5750
0
                     "OGRCloneArrowArray(): unhandled case, array = %s, format "
5751
0
                     "= '%s', i = 3",
5752
0
                     schema->name, format);
5753
0
            bRet = false;
5754
0
            break;
5755
0
        }
5756
0
    }
5757
5758
0
    if (bRet)
5759
0
    {
5760
0
        out_array->n_children = src_array->n_children;
5761
0
        out_array->children = static_cast<struct ArrowArray **>(
5762
0
            CPLCalloc(static_cast<size_t>(src_array->n_children),
5763
0
                      sizeof(struct ArrowArray *)));
5764
0
        for (int64_t i = 0; i < src_array->n_children; ++i)
5765
0
        {
5766
0
            out_array->children[i] = static_cast<struct ArrowArray *>(
5767
0
                CPLCalloc(1, sizeof(struct ArrowArray)));
5768
0
            if (!OGRCloneArrowArray(schema->children[i], src_array->children[i],
5769
0
                                    out_array->children[i],
5770
0
                                    IsFixedSizeList(format)
5771
0
                                        ? nOffset * GetFixedSizeList(format)
5772
0
                                    : IsStructure(format) ? nOffset
5773
0
                                                          : 0))
5774
0
            {
5775
0
                bRet = false;
5776
0
                break;
5777
0
            }
5778
0
        }
5779
0
    }
5780
5781
0
    if (bRet && src_array->dictionary)
5782
0
    {
5783
0
        out_array->dictionary = static_cast<struct ArrowArray *>(
5784
0
            CPLCalloc(1, sizeof(struct ArrowArray)));
5785
0
        bRet = OGRCloneArrowArray(schema->dictionary, src_array->dictionary,
5786
0
                                  out_array->dictionary, 0);
5787
0
    }
5788
5789
0
    if (!bRet)
5790
0
    {
5791
0
        out_array->release(out_array);
5792
0
        memset(out_array, 0, sizeof(*out_array));
5793
0
    }
5794
0
    return bRet;
5795
0
}
5796
5797
/** Full/deep copy of an array.
5798
 *
5799
 * Renormalize the offset of the array (and its children) to 0.
5800
 *
5801
 * In case of failure, out_array will be let in a released state.
5802
 *
5803
 * @param schema Schema of the array. Must *NOT* be NULL.
5804
 * @param src_array Source array. Must *NOT* be NULL.
5805
 * @param out_array Output array.  Must *NOT* be NULL (but its content may be random)
5806
 * @return true if success.
5807
 */
5808
bool OGRCloneArrowArray(const struct ArrowSchema *schema,
5809
                        const struct ArrowArray *src_array,
5810
                        struct ArrowArray *out_array)
5811
0
{
5812
0
    return OGRCloneArrowArray(schema, src_array, out_array, 0);
5813
0
}
5814
5815
/************************************************************************/
5816
/*                       OGRCloneArrowMetadata()                        */
5817
/************************************************************************/
5818
5819
static void *OGRCloneArrowMetadata(const void *pMetadata)
5820
0
{
5821
0
    if (!pMetadata)
5822
0
        return nullptr;
5823
0
    std::vector<GByte> abyOut;
5824
0
    const GByte *pabyMetadata = static_cast<const GByte *>(pMetadata);
5825
0
    int32_t nKVP;
5826
0
    abyOut.insert(abyOut.end(), pabyMetadata, pabyMetadata + sizeof(int32_t));
5827
0
    memcpy(&nKVP, pabyMetadata, sizeof(int32_t));
5828
0
    pabyMetadata += sizeof(int32_t);
5829
0
    for (int i = 0; i < nKVP; ++i)
5830
0
    {
5831
0
        int32_t nSizeKey;
5832
0
        abyOut.insert(abyOut.end(), pabyMetadata,
5833
0
                      pabyMetadata + sizeof(int32_t));
5834
0
        memcpy(&nSizeKey, pabyMetadata, sizeof(int32_t));
5835
0
        pabyMetadata += sizeof(int32_t);
5836
0
        abyOut.insert(abyOut.end(), pabyMetadata, pabyMetadata + nSizeKey);
5837
0
        pabyMetadata += nSizeKey;
5838
5839
0
        int32_t nSizeValue;
5840
0
        abyOut.insert(abyOut.end(), pabyMetadata,
5841
0
                      pabyMetadata + sizeof(int32_t));
5842
0
        memcpy(&nSizeValue, pabyMetadata, sizeof(int32_t));
5843
0
        pabyMetadata += sizeof(int32_t);
5844
0
        abyOut.insert(abyOut.end(), pabyMetadata, pabyMetadata + nSizeValue);
5845
0
        pabyMetadata += nSizeValue;
5846
0
    }
5847
5848
0
    GByte *pabyOut = static_cast<GByte *>(VSI_MALLOC_VERBOSE(abyOut.size()));
5849
0
    if (pabyOut)
5850
0
        memcpy(pabyOut, abyOut.data(), abyOut.size());
5851
0
    return pabyOut;
5852
0
}
5853
5854
/************************************************************************/
5855
/*                        OGRCloneArrowSchema()                         */
5856
/************************************************************************/
5857
5858
/** Full/deep copy of a schema.
5859
 *
5860
 * In case of failure, out_schema will be let in a released state.
5861
 *
5862
 * @param schema Schema to clone. Must *NOT* be NULL.
5863
 * @param out_schema Output schema.  Must *NOT* be NULL (but its content may be random)
5864
 * @return true if success.
5865
 */
5866
bool OGRCloneArrowSchema(const struct ArrowSchema *schema,
5867
                         struct ArrowSchema *out_schema)
5868
0
{
5869
0
    memset(out_schema, 0, sizeof(*out_schema));
5870
0
    out_schema->release = OGRLayerFullReleaseSchema;
5871
0
    out_schema->format = CPLStrdup(schema->format);
5872
0
    out_schema->name = CPLStrdup(schema->name);
5873
0
    out_schema->metadata = static_cast<const char *>(
5874
0
        const_cast<const void *>(OGRCloneArrowMetadata(schema->metadata)));
5875
0
    out_schema->flags = schema->flags;
5876
0
    if (schema->n_children)
5877
0
    {
5878
0
        out_schema->children =
5879
0
            static_cast<struct ArrowSchema **>(VSI_CALLOC_VERBOSE(
5880
0
                static_cast<int>(schema->n_children), sizeof(ArrowSchema *)));
5881
0
        if (!out_schema->children)
5882
0
        {
5883
0
            out_schema->release(out_schema);
5884
0
            return false;
5885
0
        }
5886
0
        out_schema->n_children = schema->n_children;
5887
0
        for (int i = 0; i < static_cast<int>(schema->n_children); ++i)
5888
0
        {
5889
0
            out_schema->children[i] = static_cast<struct ArrowSchema *>(
5890
0
                CPLMalloc(sizeof(ArrowSchema)));
5891
0
            if (!OGRCloneArrowSchema(schema->children[i],
5892
0
                                     out_schema->children[i]))
5893
0
            {
5894
0
                out_schema->release(out_schema);
5895
0
                return false;
5896
0
            }
5897
0
        }
5898
0
    }
5899
0
    if (schema->dictionary)
5900
0
    {
5901
0
        out_schema->dictionary =
5902
0
            static_cast<struct ArrowSchema *>(CPLMalloc(sizeof(ArrowSchema)));
5903
0
        if (!OGRCloneArrowSchema(schema->dictionary, out_schema->dictionary))
5904
0
        {
5905
0
            out_schema->release(out_schema);
5906
0
            return false;
5907
0
        }
5908
0
    }
5909
0
    return true;
5910
0
}
5911
5912
/************************************************************************/
5913
/*                  OGRLayer::IsArrowSchemaSupported()                  */
5914
/************************************************************************/
5915
5916
const struct
5917
{
5918
    const char *arrowType;
5919
    OGRFieldType eType;
5920
    OGRFieldSubType eSubType;
5921
} gasArrowTypesToOGR[] = {
5922
    {"b", OFTInteger, OFSTBoolean}, {"c", OFTInteger, OFSTInt16},  // Int8
5923
    {"C", OFTInteger, OFSTInt16},                                  // UInt8
5924
    {"s", OFTInteger, OFSTInt16},                                  // Int16
5925
    {"S", OFTInteger, OFSTNone},                                   // UInt16
5926
    {"i", OFTInteger, OFSTNone},                                   // Int32
5927
    {"I", OFTInteger64, OFSTNone},                                 // UInt32
5928
    {"l", OFTInteger64, OFSTNone},                                 // Int64
5929
    {"L", OFTReal, OFSTNone},  // UInt64 (potentially lossy conversion if going through OGRFeature)
5930
    {"e", OFTReal, OFSTFloat32},  // float16
5931
    {"f", OFTReal, OFSTFloat32},  // float32
5932
    {"g", OFTReal, OFSTNone},     // float64
5933
    {"z", OFTBinary, OFSTNone},   // binary
5934
    {"Z", OFTBinary, OFSTNone},  // large binary (will be limited to 32 bit length though if going through OGRFeature!)
5935
    {"u", OFTString, OFSTNone},  // string
5936
    {"U", OFTString, OFSTNone},  // large string
5937
    {"tdD", OFTDate, OFSTNone},  // date32[days]
5938
    {"tdm", OFTDate, OFSTNone},  // date64[milliseconds]
5939
    {"tts", OFTTime, OFSTNone},  // time32 [seconds]
5940
    {"ttm", OFTTime, OFSTNone},  // time32 [milliseconds]
5941
    {"ttu", OFTTime, OFSTNone},  // time64 [microseconds]
5942
    {"ttn", OFTTime, OFSTNone},  // time64 [nanoseconds]
5943
};
5944
5945
const struct
5946
{
5947
    const char arrowLetter;
5948
    OGRFieldType eType;
5949
    OGRFieldSubType eSubType;
5950
} gasListTypes[] = {
5951
    {ARROW_LETTER_BOOLEAN, OFTIntegerList, OFSTBoolean},
5952
    {ARROW_LETTER_INT8, OFTIntegerList, OFSTInt16},
5953
    {ARROW_LETTER_UINT8, OFTIntegerList, OFSTInt16},
5954
    {ARROW_LETTER_INT16, OFTIntegerList, OFSTInt16},
5955
    {ARROW_LETTER_UINT16, OFTIntegerList, OFSTNone},
5956
    {ARROW_LETTER_INT32, OFTIntegerList, OFSTNone},
5957
    {ARROW_LETTER_UINT32, OFTInteger64List, OFSTNone},
5958
    {ARROW_LETTER_INT64, OFTInteger64List, OFSTNone},
5959
    {ARROW_LETTER_UINT64, OFTRealList,
5960
     OFSTNone},  //(potentially lossy conversion if going through OGRFeature)
5961
    {ARROW_LETTER_FLOAT16, OFTRealList, OFSTFloat32},
5962
    {ARROW_LETTER_FLOAT32, OFTRealList, OFSTFloat32},
5963
    {ARROW_LETTER_FLOAT64, OFTRealList, OFSTNone},
5964
    {ARROW_LETTER_STRING, OFTStringList, OFSTNone},
5965
    {ARROW_LETTER_LARGE_STRING, OFTStringList, OFSTNone},
5966
};
5967
5968
static inline bool IsValidDictionaryIndexType(const char *format)
5969
0
{
5970
0
    return (format[0] == ARROW_LETTER_INT8 || format[0] == ARROW_LETTER_UINT8 ||
5971
0
            format[0] == ARROW_LETTER_INT16 ||
5972
0
            format[0] == ARROW_LETTER_UINT16 ||
5973
0
            format[0] == ARROW_LETTER_INT32 ||
5974
0
            format[0] == ARROW_LETTER_UINT32 ||
5975
0
            format[0] == ARROW_LETTER_INT64 ||
5976
0
            format[0] == ARROW_LETTER_UINT64) &&
5977
0
           format[1] == 0;
5978
0
}
5979
5980
static bool IsSupportForJSONObj(const struct ArrowSchema *schema)
5981
0
{
5982
0
    const char *format = schema->format;
5983
0
    if (IsStructure(format))
5984
0
    {
5985
0
        for (int64_t i = 0; i < schema->n_children; ++i)
5986
0
        {
5987
0
            if (!IsSupportForJSONObj(schema->children[i]))
5988
0
                return false;
5989
0
        }
5990
0
        return true;
5991
0
    }
5992
5993
0
    for (const auto &sType : gasListTypes)
5994
0
    {
5995
0
        if (format[0] == sType.arrowLetter && format[1] == 0)
5996
0
        {
5997
0
            return true;
5998
0
        }
5999
0
    }
6000
6001
0
    if (IsBinary(format) || IsLargeBinary(format) || IsFixedWidthBinary(format))
6002
0
        return true;
6003
6004
0
    if (IsDecimal(format))
6005
0
    {
6006
0
        int nPrecision = 0;
6007
0
        int nScale = 0;
6008
0
        int nWidthInBytes = 0;
6009
0
        if (!ParseDecimalFormat(format, nPrecision, nScale, nWidthInBytes))
6010
0
        {
6011
0
            CPLError(CE_Failure, CPLE_AppDefined, "Invalid field format %s",
6012
0
                     format);
6013
0
            return false;
6014
0
        }
6015
6016
0
        return GetErrorIfUnsupportedDecimal(nWidthInBytes, nPrecision) ==
6017
0
               nullptr;
6018
0
    }
6019
6020
0
    if (IsMap(format))
6021
0
    {
6022
0
        return IsStructure(schema->children[0]->format) &&
6023
0
               schema->children[0]->n_children == 2 &&
6024
0
               IsString(schema->children[0]->children[0]->format) &&
6025
0
               IsSupportForJSONObj(schema->children[0]->children[1]);
6026
0
    }
6027
6028
0
    if (IsList(format) || IsLargeList(format) || IsFixedSizeList(format))
6029
0
    {
6030
0
        return IsSupportForJSONObj(schema->children[0]);
6031
0
    }
6032
6033
0
    return false;
6034
0
}
6035
6036
static bool IsArrowSchemaSupportedInternal(const struct ArrowSchema *schema,
6037
                                           const std::string &osFieldPrefix,
6038
                                           std::string &osErrorMsg)
6039
0
{
6040
0
    const auto AppendError = [&osErrorMsg](const std::string &osMsg)
6041
0
    {
6042
0
        if (!osErrorMsg.empty())
6043
0
            osErrorMsg += " ";
6044
0
        osErrorMsg += osMsg;
6045
0
    };
6046
6047
0
    const char *fieldName = schema->name;
6048
0
    const char *format = schema->format;
6049
0
    if (IsStructure(format))
6050
0
    {
6051
0
        bool bRet = true;
6052
0
        const std::string osNewPrefix(osFieldPrefix + fieldName + ".");
6053
0
        for (int64_t i = 0; i < schema->n_children; ++i)
6054
0
        {
6055
0
            if (!IsArrowSchemaSupportedInternal(schema->children[i],
6056
0
                                                osNewPrefix, osErrorMsg))
6057
0
                bRet = false;
6058
0
        }
6059
0
        return bRet;
6060
0
    }
6061
6062
0
    if (schema->dictionary)
6063
0
    {
6064
0
        if (!IsValidDictionaryIndexType(format))
6065
0
        {
6066
0
            AppendError("Dictionary only supported if the parent is of "
6067
0
                        "type [U]Int[8|16|32|64]");
6068
0
            return false;
6069
0
        }
6070
6071
0
        schema = schema->dictionary;
6072
0
        format = schema->format;
6073
0
    }
6074
6075
0
    if (IsList(format) || IsLargeList(format) || IsFixedSizeList(format))
6076
0
    {
6077
        // Only some subtypes supported
6078
0
        const char *childFormat = schema->children[0]->format;
6079
0
        for (const auto &sType : gasListTypes)
6080
0
        {
6081
0
            if (childFormat[0] == sType.arrowLetter && childFormat[1] == 0)
6082
0
            {
6083
0
                return true;
6084
0
            }
6085
0
        }
6086
6087
0
        if (IsDecimal(childFormat))
6088
0
        {
6089
0
            int nPrecision = 0;
6090
0
            int nScale = 0;
6091
0
            int nWidthInBytes = 0;
6092
0
            if (!ParseDecimalFormat(childFormat, nPrecision, nScale,
6093
0
                                    nWidthInBytes))
6094
0
            {
6095
0
                AppendError(std::string("Invalid field format ") + childFormat +
6096
0
                            " for field " + osFieldPrefix + fieldName);
6097
0
                return false;
6098
0
            }
6099
6100
0
            const char *pszError =
6101
0
                GetErrorIfUnsupportedDecimal(nWidthInBytes, nPrecision);
6102
0
            if (pszError)
6103
0
            {
6104
0
                AppendError(pszError);
6105
0
                return false;
6106
0
            }
6107
6108
0
            return true;
6109
0
        }
6110
6111
0
        if (IsSupportForJSONObj(schema))
6112
0
        {
6113
0
            return true;
6114
0
        }
6115
6116
0
        AppendError("Type list for field " + osFieldPrefix + fieldName +
6117
0
                    " is not supported.");
6118
0
        return false;
6119
0
    }
6120
6121
0
    else if (IsMap(format))
6122
0
    {
6123
0
        if (IsSupportForJSONObj(schema))
6124
0
            return true;
6125
6126
0
        AppendError("Type map for field " + osFieldPrefix + fieldName +
6127
0
                    " is not supported.");
6128
0
        return false;
6129
0
    }
6130
0
    else if (IsDecimal(format))
6131
0
    {
6132
0
        int nPrecision = 0;
6133
0
        int nScale = 0;
6134
0
        int nWidthInBytes = 0;
6135
0
        if (!ParseDecimalFormat(format, nPrecision, nScale, nWidthInBytes))
6136
0
        {
6137
0
            AppendError(std::string("Invalid field format ") + format +
6138
0
                        " for field " + osFieldPrefix + fieldName);
6139
0
            return false;
6140
0
        }
6141
6142
0
        const char *pszError =
6143
0
            GetErrorIfUnsupportedDecimal(nWidthInBytes, nPrecision);
6144
0
        if (pszError)
6145
0
        {
6146
0
            AppendError(pszError);
6147
0
            return false;
6148
0
        }
6149
6150
0
        return true;
6151
0
    }
6152
0
    else
6153
0
    {
6154
0
        for (const auto &sType : gasArrowTypesToOGR)
6155
0
        {
6156
0
            if (strcmp(format, sType.arrowType) == 0)
6157
0
            {
6158
0
                return true;
6159
0
            }
6160
0
        }
6161
6162
0
        if (IsFixedWidthBinary(format) || IsTimestamp(format))
6163
0
            return true;
6164
6165
0
        AppendError("Type '" + std::string(format) + "' for field " +
6166
0
                    osFieldPrefix + fieldName + " is not supported.");
6167
0
        return false;
6168
0
    }
6169
0
}
6170
6171
/** Returns whether the provided ArrowSchema is supported for writing.
6172
 *
6173
 * This method exists since not all drivers may support all Arrow data types.
6174
 *
6175
 * The ArrowSchema must be of type struct (format=+s)
6176
 *
6177
 * It is recommended to call this method before calling WriteArrowBatch().
6178
 *
6179
 * This is the same as the C function OGR_L_IsArrowSchemaSupported().
6180
 *
6181
 * @param schema Schema of type struct (format = '+s')
6182
 * @param papszOptions Options (none currently). Null terminated list, or nullptr.
6183
 * @param[out] osErrorMsg Reason of the failure, when this method returns false.
6184
 * @return true if the ArrowSchema is supported for writing.
6185
 * @since 3.8
6186
 */
6187
bool OGRLayer::IsArrowSchemaSupported(const struct ArrowSchema *schema,
6188
                                      CPL_UNUSED CSLConstList papszOptions,
6189
                                      std::string &osErrorMsg) const
6190
0
{
6191
0
    if (!IsStructure(schema->format))
6192
0
    {
6193
0
        osErrorMsg =
6194
0
            "IsArrowSchemaSupported() should be called on a schema that is a "
6195
0
            "struct of fields";
6196
0
        return false;
6197
0
    }
6198
6199
0
    bool bRet = true;
6200
0
    for (int64_t i = 0; i < schema->n_children; ++i)
6201
0
    {
6202
0
        if (!IsArrowSchemaSupportedInternal(schema->children[i], std::string(),
6203
0
                                            osErrorMsg))
6204
0
            bRet = false;
6205
0
    }
6206
0
    return bRet;
6207
0
}
6208
6209
/************************************************************************/
6210
/*                    OGR_L_IsArrowSchemaSupported()                    */
6211
/************************************************************************/
6212
6213
/** Returns whether the provided ArrowSchema is supported for writing.
6214
 *
6215
 * This function exists since not all drivers may support all Arrow data types.
6216
 *
6217
 * The ArrowSchema must be of type struct (format=+s)
6218
 *
6219
 * It is recommended to call this function before calling OGR_L_WriteArrowBatch().
6220
 *
6221
 * This is the same as the C++ method OGRLayer::IsArrowSchemaSupported().
6222
 *
6223
 * @param hLayer Layer.
6224
 * @param schema Schema of type struct (format = '+s')
6225
 * @param papszOptions Options (none currently). Null terminated list, or nullptr.
6226
 * @param[out] ppszErrorMsg nullptr, or pointer to a string that will contain
6227
 * the reason of the failure, when this function returns false.
6228
 * @return true if the ArrowSchema is supported for writing.
6229
 * @since 3.8
6230
 */
6231
bool OGR_L_IsArrowSchemaSupported(OGRLayerH hLayer,
6232
                                  const struct ArrowSchema *schema,
6233
                                  CSLConstList papszOptions,
6234
                                  char **ppszErrorMsg)
6235
0
{
6236
0
    VALIDATE_POINTER1(hLayer, __func__, false);
6237
0
    VALIDATE_POINTER1(schema, __func__, false);
6238
6239
0
    std::string osErrorMsg;
6240
0
    if (!OGRLayer::FromHandle(hLayer)->IsArrowSchemaSupported(
6241
0
            schema, papszOptions, osErrorMsg))
6242
0
    {
6243
0
        if (ppszErrorMsg)
6244
0
            *ppszErrorMsg = VSIStrdup(osErrorMsg.c_str());
6245
0
        return false;
6246
0
    }
6247
0
    else
6248
0
    {
6249
0
        if (ppszErrorMsg)
6250
0
            *ppszErrorMsg = nullptr;
6251
0
        return true;
6252
0
    }
6253
0
}
6254
6255
/************************************************************************/
6256
/*                      IsKnownCodedFieldDomain()                       */
6257
/************************************************************************/
6258
6259
static bool IsKnownCodedFieldDomain(OGRLayer *poLayer,
6260
                                    const char *arrowMetadata)
6261
0
{
6262
0
    if (arrowMetadata)
6263
0
    {
6264
0
        const auto oMetadata = OGRParseArrowMetadata(arrowMetadata);
6265
0
        for (const auto &oIter : oMetadata)
6266
0
        {
6267
0
            if (oIter.first == MD_GDAL_OGR_DOMAIN_NAME)
6268
0
            {
6269
0
                auto poDS = poLayer->GetDataset();
6270
0
                if (poDS)
6271
0
                {
6272
0
                    const auto poFieldDomain =
6273
0
                        poDS->GetFieldDomain(oIter.second);
6274
0
                    if (poFieldDomain &&
6275
0
                        poFieldDomain->GetDomainType() == OFDT_CODED)
6276
0
                    {
6277
0
                        return true;
6278
0
                    }
6279
0
                }
6280
0
            }
6281
0
        }
6282
0
    }
6283
0
    return false;
6284
0
}
6285
6286
/************************************************************************/
6287
/*                OGRLayer::CreateFieldFromArrowSchema()                */
6288
/************************************************************************/
6289
6290
//! @cond Doxygen_Suppress
6291
bool OGRLayer::CreateFieldFromArrowSchemaInternal(
6292
    const struct ArrowSchema *schema, const std::string &osFieldPrefix,
6293
    CSLConstList papszOptions)
6294
0
{
6295
0
    const char *fieldName = schema->name;
6296
0
    const char *format = schema->format;
6297
0
    if (IsStructure(format))
6298
0
    {
6299
0
        if (IsArrowTimeStampWithOffsetField(schema))
6300
0
        {
6301
0
            OGRFieldDefn oFieldDefn((osFieldPrefix + fieldName).c_str(),
6302
0
                                    OFTDateTime);
6303
0
            oFieldDefn.SetTZFlag(OGR_TZFLAG_MIXED_TZ);
6304
0
            auto poLayerDefn = GetLayerDefn();
6305
0
            const int nFieldCountBefore = poLayerDefn->GetFieldCount();
6306
0
            if (CreateField(&oFieldDefn) != OGRERR_NONE ||
6307
0
                nFieldCountBefore + 1 != poLayerDefn->GetFieldCount())
6308
0
            {
6309
0
                return false;
6310
0
            }
6311
0
        }
6312
0
        else
6313
0
        {
6314
0
            const std::string osNewPrefix(osFieldPrefix + fieldName + ".");
6315
0
            for (int64_t i = 0; i < schema->n_children; ++i)
6316
0
            {
6317
0
                if (!CreateFieldFromArrowSchemaInternal(
6318
0
                        schema->children[i], osNewPrefix, papszOptions))
6319
0
                    return false;
6320
0
            }
6321
0
        }
6322
0
        return true;
6323
0
    }
6324
6325
0
    CPLStringList aosNativeTypes;
6326
0
    auto poLayer = const_cast<OGRLayer *>(this);
6327
0
    auto poDS = poLayer->GetDataset();
6328
0
    if (poDS)
6329
0
    {
6330
0
        auto poDriver = poDS->GetDriver();
6331
0
        if (poDriver)
6332
0
        {
6333
0
            const char *pszMetadataItem =
6334
0
                poDriver->GetMetadataItem(GDAL_DMD_CREATIONFIELDDATATYPES);
6335
0
            if (pszMetadataItem)
6336
0
                aosNativeTypes = CSLTokenizeString2(pszMetadataItem, " ", 0);
6337
0
        }
6338
0
    }
6339
6340
0
    if (schema->dictionary &&
6341
0
        !IsKnownCodedFieldDomain(poLayer, schema->metadata))
6342
0
    {
6343
0
        if (!IsValidDictionaryIndexType(format))
6344
0
        {
6345
0
            CPLError(CE_Failure, CPLE_NotSupported,
6346
0
                     "Dictionary only supported if the parent is of "
6347
0
                     "type [U]Int[8|16|32|64]");
6348
0
            return false;
6349
0
        }
6350
6351
0
        schema = schema->dictionary;
6352
0
        format = schema->format;
6353
0
    }
6354
6355
0
    const auto AddField = [this, schema, fieldName, &aosNativeTypes,
6356
0
                           &osFieldPrefix, poDS](OGRFieldType eTypeIn,
6357
0
                                                 OGRFieldSubType eSubTypeIn,
6358
0
                                                 int nWidth, int nPrecision)
6359
0
    {
6360
0
        const char *pszTypeName = OGRFieldDefn::GetFieldTypeName(eTypeIn);
6361
0
        auto eTypeOut = eTypeIn;
6362
0
        auto eSubTypeOut = eSubTypeIn;
6363
0
        if (!aosNativeTypes.empty() &&
6364
0
            aosNativeTypes.FindString(pszTypeName) < 0)
6365
0
        {
6366
0
            eTypeOut = OFTString;
6367
0
            eSubTypeOut =
6368
0
                (eTypeIn == OFTIntegerList || eTypeIn == OFTInteger64List ||
6369
0
                 eTypeIn == OFTRealList || eTypeIn == OFTStringList)
6370
0
                    ? OFSTJSON
6371
0
                    : OFSTNone;
6372
0
        }
6373
6374
0
        const std::string osWantedOGRFieldName = osFieldPrefix + fieldName;
6375
0
        OGRFieldDefn oFieldDefn(osWantedOGRFieldName.c_str(), eTypeOut);
6376
0
        oFieldDefn.SetSubType(eSubTypeOut);
6377
0
        if (eTypeOut == eTypeIn && eSubTypeOut == eSubTypeIn)
6378
0
        {
6379
0
            oFieldDefn.SetWidth(nWidth);
6380
0
            oFieldDefn.SetPrecision(nPrecision);
6381
0
        }
6382
0
        oFieldDefn.SetNullable((schema->flags & ARROW_FLAG_NULLABLE) != 0);
6383
6384
0
        if (schema->metadata)
6385
0
        {
6386
0
            const auto oMetadata = OGRParseArrowMetadata(schema->metadata);
6387
0
            for (const auto &oIter : oMetadata)
6388
0
            {
6389
0
                if (oIter.first == MD_GDAL_OGR_TYPE)
6390
0
                {
6391
0
                    const auto &osType = oIter.second;
6392
0
                    for (auto eType = OFTInteger; eType <= OFTMaxType;)
6393
0
                    {
6394
0
                        if (OGRFieldDefn::GetFieldTypeName(eType) == osType)
6395
0
                        {
6396
0
                            oFieldDefn.SetType(eType);
6397
0
                            break;
6398
0
                        }
6399
0
                        if (eType == OFTMaxType)
6400
0
                            break;
6401
0
                        else
6402
0
                            eType = static_cast<OGRFieldType>(eType + 1);
6403
0
                    }
6404
0
                }
6405
0
                else if (oIter.first == MD_GDAL_OGR_ALTERNATIVE_NAME)
6406
0
                    oFieldDefn.SetAlternativeName(oIter.second.c_str());
6407
0
                else if (oIter.first == MD_GDAL_OGR_COMMENT)
6408
0
                    oFieldDefn.SetComment(oIter.second);
6409
0
                else if (oIter.first == MD_GDAL_OGR_DEFAULT)
6410
0
                    oFieldDefn.SetDefault(oIter.second.c_str());
6411
0
                else if (oIter.first == MD_GDAL_OGR_SUBTYPE)
6412
0
                {
6413
0
                    if (eTypeIn == eTypeOut)
6414
0
                    {
6415
0
                        const auto &osSubType = oIter.second;
6416
0
                        for (auto eSubType = OFSTNone;
6417
0
                             eSubType <= OFSTMaxSubType;)
6418
0
                        {
6419
0
                            if (OGRFieldDefn::GetFieldSubTypeName(eSubType) ==
6420
0
                                osSubType)
6421
0
                            {
6422
0
                                oFieldDefn.SetSubType(eSubType);
6423
0
                                break;
6424
0
                            }
6425
0
                            if (eSubType == OFSTMaxSubType)
6426
0
                                break;
6427
0
                            else
6428
0
                                eSubType =
6429
0
                                    static_cast<OGRFieldSubType>(eSubType + 1);
6430
0
                        }
6431
0
                    }
6432
0
                }
6433
0
                else if (oIter.first == MD_GDAL_OGR_WIDTH)
6434
0
                    oFieldDefn.SetWidth(atoi(oIter.second.c_str()));
6435
0
                else if (oIter.first == MD_GDAL_OGR_UNIQUE)
6436
0
                    oFieldDefn.SetUnique(oIter.second == "true");
6437
0
                else if (oIter.first == MD_GDAL_OGR_DOMAIN_NAME)
6438
0
                {
6439
0
                    if (poDS && poDS->GetFieldDomain(oIter.second))
6440
0
                        oFieldDefn.SetDomainName(oIter.second);
6441
0
                }
6442
0
                else if (oIter.first == ARROW_EXTENSION_NAME_KEY &&
6443
0
                         (oIter.second == EXTENSION_NAME_ARROW_JSON ||
6444
                          // Used by BigQuery through ADBC driver
6445
0
                          oIter.second == "google:sqlType:json"))
6446
0
                {
6447
0
                    oFieldDefn.SetSubType(OFSTJSON);
6448
0
                }
6449
0
                else if (oIter.first == ARROW_EXTENSION_NAME_KEY)
6450
0
                {
6451
0
                    CPLDebug("OGR", "Unknown Arrow extension: %s",
6452
0
                             oIter.second.c_str());
6453
0
                }
6454
0
                else
6455
0
                {
6456
0
                    CPLDebug("OGR", "Unknown field metadata: %s",
6457
0
                             oIter.first.c_str());
6458
0
                }
6459
0
            }
6460
0
        }
6461
0
        auto poLayerDefn = GetLayerDefn();
6462
0
        const int nFieldCountBefore = poLayerDefn->GetFieldCount();
6463
0
        if (CreateField(&oFieldDefn) != OGRERR_NONE ||
6464
0
            nFieldCountBefore + 1 != poLayerDefn->GetFieldCount())
6465
0
        {
6466
0
            return false;
6467
0
        }
6468
0
        const char *pszActualFieldName =
6469
0
            poLayerDefn->GetFieldDefn(nFieldCountBefore)->GetNameRef();
6470
0
        if (pszActualFieldName != osWantedOGRFieldName)
6471
0
        {
6472
0
            m_poPrivate
6473
0
                ->m_oMapArrowFieldNameToOGRFieldName[osWantedOGRFieldName] =
6474
0
                pszActualFieldName;
6475
0
        }
6476
0
        return true;
6477
0
    };
6478
6479
0
    for (const auto &sType : gasArrowTypesToOGR)
6480
0
    {
6481
0
        if (strcmp(format, sType.arrowType) == 0)
6482
0
        {
6483
0
            return AddField(sType.eType, sType.eSubType, 0, 0);
6484
0
        }
6485
0
    }
6486
6487
0
    if (IsMap(format))
6488
0
    {
6489
0
        return AddField(OFTString, OFSTJSON, 0, 0);
6490
0
    }
6491
6492
0
    if (IsTimestamp(format))
6493
0
    {
6494
0
        return AddField(OFTDateTime, OFSTNone, 0, 0);
6495
0
    }
6496
6497
0
    if (IsFixedWidthBinary(format))
6498
0
    {
6499
0
        return AddField(OFTBinary, OFSTNone, GetFixedWithBinary(format), 0);
6500
0
    }
6501
6502
0
    if (IsList(format) || IsLargeList(format) || IsFixedSizeList(format))
6503
0
    {
6504
0
        const char *childFormat = schema->children[0]->format;
6505
0
        for (const auto &sType : gasListTypes)
6506
0
        {
6507
0
            if (childFormat[0] == sType.arrowLetter && childFormat[1] == 0)
6508
0
            {
6509
0
                return AddField(sType.eType, sType.eSubType, 0, 0);
6510
0
            }
6511
0
        }
6512
6513
0
        if (IsDecimal(childFormat))
6514
0
        {
6515
0
            int nPrecision = 0;
6516
0
            int nScale = 0;
6517
0
            int nWidthInBytes = 0;
6518
0
            if (!ParseDecimalFormat(childFormat, nPrecision, nScale,
6519
0
                                    nWidthInBytes))
6520
0
            {
6521
0
                CPLError(CE_Failure, CPLE_AppDefined, "%s",
6522
0
                         (std::string("Invalid field format ") + format +
6523
0
                          " for field " + osFieldPrefix + fieldName)
6524
0
                             .c_str());
6525
0
                return false;
6526
0
            }
6527
6528
0
            const char *pszError =
6529
0
                GetErrorIfUnsupportedDecimal(nWidthInBytes, nPrecision);
6530
0
            if (pszError)
6531
0
            {
6532
0
                CPLError(CE_Failure, CPLE_NotSupported, "%s", pszError);
6533
0
                return false;
6534
0
            }
6535
6536
            // DBF convention: add space for negative sign and decimal separator
6537
0
            return AddField(OFTRealList, OFSTNone, nPrecision + 2, nScale);
6538
0
        }
6539
6540
0
        if (IsSupportForJSONObj(schema->children[0]))
6541
0
        {
6542
0
            return AddField(OFTString, OFSTJSON, 0, 0);
6543
0
        }
6544
6545
0
        CPLError(CE_Failure, CPLE_NotSupported, "%s",
6546
0
                 ("List of type '" + std::string(childFormat) + "' for field " +
6547
0
                  osFieldPrefix + fieldName + " is not supported.")
6548
0
                     .c_str());
6549
0
        return false;
6550
0
    }
6551
6552
0
    if (IsDecimal(format))
6553
0
    {
6554
0
        int nPrecision = 0;
6555
0
        int nScale = 0;
6556
0
        int nWidthInBytes = 0;
6557
0
        if (!ParseDecimalFormat(format, nPrecision, nScale, nWidthInBytes))
6558
0
        {
6559
0
            CPLError(CE_Failure, CPLE_AppDefined, "%s",
6560
0
                     (std::string("Invalid field format ") + format +
6561
0
                      " for field " + osFieldPrefix + fieldName)
6562
0
                         .c_str());
6563
0
            return false;
6564
0
        }
6565
6566
0
        const char *pszError =
6567
0
            GetErrorIfUnsupportedDecimal(nWidthInBytes, nPrecision);
6568
0
        if (pszError)
6569
0
        {
6570
0
            CPLError(CE_Failure, CPLE_NotSupported, "%s", pszError);
6571
0
            return false;
6572
0
        }
6573
6574
        // DBF convention: add space for negative sign and decimal separator
6575
0
        return AddField(OFTReal, OFSTNone, nPrecision + 2, nScale);
6576
0
    }
6577
6578
0
    CPLError(CE_Failure, CPLE_NotSupported, "%s",
6579
0
             ("Type '" + std::string(format) + "' for field " + osFieldPrefix +
6580
0
              fieldName + " is not supported.")
6581
0
                 .c_str());
6582
0
    return false;
6583
0
}
6584
6585
//! @endcond
6586
6587
/** Creates a field from an ArrowSchema.
6588
 *
6589
 * This should only be used for attribute fields. Geometry fields should
6590
 * be created with CreateGeomField(). The FID field should also not be
6591
 * passed with this method.
6592
 *
6593
 * Contrary to the IsArrowSchemaSupported() and WriteArrowBatch() methods, the
6594
 * passed schema must be for an individual field, and thus, is *not* of type
6595
 * struct (format=+s) (unless writing a set of fields grouped together in the
6596
 * same structure).
6597
 *
6598
 * Additional field metadata can be specified through the ArrowSchema::metadata
6599
 * field with the potential following items:
6600
 * <ul>
6601
 * <li>"GDAL:OGR:alternative_name": value of
6602
 *     OGRFieldDefn::GetAlternativeNameRef()</li>
6603
 * <li>"GDAL:OGR:comment": value of OGRFieldDefn::GetComment()</li>
6604
 * <li>"GDAL:OGR:default": value of OGRFieldDefn::GetDefault()</li>
6605
 * <li>"GDAL:OGR:subtype": value of OGRFieldDefn::GetSubType()</li>
6606
 * <li>"GDAL:OGR:width": value of OGRFieldDefn::GetWidth() (serialized as a
6607
 *     string)</li>
6608
 * <li>"GDAL:OGR:unique": value of OGRFieldDefn::IsUnique() (serialized as
6609
 *     "true" or "false")</li>
6610
 * <li>"GDAL:OGR:domain_name": value of OGRFieldDefn::GetDomainName()</li>
6611
 * </ul>
6612
 *
6613
 * This method and CreateField() are mutually exclusive in the same session.
6614
 *
6615
 * This method is the same as the C function OGR_L_CreateFieldFromArrowSchema().
6616
 *
6617
 * @param schema Schema of the field to create.
6618
 * @param papszOptions Options (none currently). Null terminated list, or nullptr.
6619
 * @return true in case of success
6620
 * @since 3.8
6621
 */
6622
bool OGRLayer::CreateFieldFromArrowSchema(const struct ArrowSchema *schema,
6623
                                          CSLConstList papszOptions)
6624
0
{
6625
0
    return CreateFieldFromArrowSchemaInternal(schema, std::string(),
6626
0
                                              papszOptions);
6627
0
}
6628
6629
/************************************************************************/
6630
/*                  OGR_L_CreateFieldFromArrowSchema()                  */
6631
/************************************************************************/
6632
6633
/** Creates a field from an ArrowSchema.
6634
 *
6635
 * This should only be used for attribute fields. Geometry fields should
6636
 * be created with CreateGeomField(). The FID field should also not be
6637
 * passed with this method.
6638
 *
6639
 * Contrary to the IsArrowSchemaSupported() and WriteArrowBatch() methods, the
6640
 * passed schema must be for an individual field, and thus, is *not* of type
6641
 * struct (format=+s) (unless writing a set of fields grouped together in the
6642
 * same structure).
6643
 *
6644
 * Additional field metadata can be specified through the ArrowSchema::metadata
6645
 * field with the potential following items:
6646
 * <ul>
6647
 * <li>"GDAL:OGR:alternative_name": value of
6648
 *     OGRFieldDefn::GetAlternativeNameRef()</li>
6649
 * <li>"GDAL:OGR:comment": value of OGRFieldDefn::GetComment()</li>
6650
 * <li>"GDAL:OGR:default": value of OGRFieldDefn::GetDefault()</li>
6651
 * <li>"GDAL:OGR:subtype": value of OGRFieldDefn::GetSubType()</li>
6652
 * <li>"GDAL:OGR:width": value of OGRFieldDefn::GetWidth() (serialized as a
6653
 *     string)</li>
6654
 * <li>"GDAL:OGR:unique": value of OGRFieldDefn::IsUnique() (serialized as
6655
 *     "true" or "false")</li>
6656
 * <li>"GDAL:OGR:domain_name": value of OGRFieldDefn::GetDomainName()</li>
6657
 * </ul>
6658
 *
6659
 * This method and CreateField() are mutually exclusive in the same session.
6660
 *
6661
 * This method is the same as the C++ method OGRLayer::CreateFieldFromArrowSchema().
6662
 *
6663
 * @param hLayer Layer.
6664
 * @param schema Schema of the field to create.
6665
 * @param papszOptions Options (none currently). Null terminated list, or nullptr.
6666
 * @return true in case of success
6667
 * @since 3.8
6668
 */
6669
bool OGR_L_CreateFieldFromArrowSchema(OGRLayerH hLayer,
6670
                                      const struct ArrowSchema *schema,
6671
                                      CSLConstList papszOptions)
6672
0
{
6673
0
    VALIDATE_POINTER1(hLayer, __func__, false);
6674
0
    VALIDATE_POINTER1(schema, __func__, false);
6675
6676
0
    return OGRLayer::FromHandle(hLayer)->CreateFieldFromArrowSchema(
6677
0
        schema, papszOptions);
6678
0
}
6679
6680
/************************************************************************/
6681
/*                         BuildOGRFieldInfo()                          */
6682
/************************************************************************/
6683
6684
constexpr int FID_COLUMN_SPECIAL_OGR_FIELD_IDX = -2;
6685
6686
struct FieldInfo
6687
{
6688
    std::string osName{};
6689
    int iOGRFieldIdx = -1;
6690
    const char *format = nullptr;
6691
    OGRFieldType eNominalFieldType =
6692
        OFTMaxType;  // OGR data type that would best match the Arrow type
6693
    OGRFieldType eTargetFieldType =
6694
        OFTMaxType;  // actual OGR data type of the layer field
6695
    // OGR data type of the feature passed to FillFeature()
6696
    OGRFieldType eSetFeatureFieldType = OFTMaxType;
6697
    bool bIsGeomCol = false;
6698
    bool bUseDictionary = false;
6699
    bool bUseStringOptim = false;
6700
    int nWidthInBytes = 0;  // only used for decimal fields
6701
    int nPrecision = 0;     // only used for decimal fields
6702
    int nScale = 0;         // only used for decimal fields
6703
};
6704
6705
static bool BuildOGRFieldInfo(
6706
    const struct ArrowSchema *schema, struct ArrowArray *array,
6707
    const OGRFeatureDefn *poFeatureDefn, const std::string &osFieldPrefix,
6708
    const CPLStringList &aosNativeTypes, bool &bFallbackTypesUsed,
6709
    std::vector<FieldInfo> &asFieldInfo, const char *pszFIDName,
6710
    const char *pszGeomFieldName, OGRLayer *poLayer,
6711
    const std::map<std::string, std::string> &oMapArrowFieldNameToOGRFieldName,
6712
    const struct ArrowSchema *&schemaFIDColumn,
6713
    struct ArrowArray *&arrayFIDColumn)
6714
0
{
6715
0
    const char *fieldName = schema->name;
6716
0
    const char *format = schema->format;
6717
0
    if (IsStructure(format))
6718
0
    {
6719
0
        if (IsArrowTimeStampWithOffsetField(schema))
6720
0
        {
6721
0
            FieldInfo sInfo;
6722
0
            sInfo.osName = fieldName;
6723
0
            sInfo.format = "+s";
6724
0
            sInfo.eNominalFieldType = OFTDateTime;
6725
0
            const std::string &osExpectedOGRFieldName =
6726
0
                [&oMapArrowFieldNameToOGRFieldName,
6727
0
                 &sInfo]() -> const std::string &
6728
0
            {
6729
0
                const auto oIter =
6730
0
                    oMapArrowFieldNameToOGRFieldName.find(sInfo.osName);
6731
0
                if (oIter != oMapArrowFieldNameToOGRFieldName.end())
6732
0
                    return oIter->second;
6733
0
                return sInfo.osName;
6734
0
            }();
6735
0
            sInfo.iOGRFieldIdx =
6736
0
                poFeatureDefn->GetFieldIndex(osExpectedOGRFieldName.c_str());
6737
0
            if (sInfo.iOGRFieldIdx >= 0)
6738
0
            {
6739
0
                const auto eOGRType =
6740
0
                    poFeatureDefn->GetFieldDefn(sInfo.iOGRFieldIdx)->GetType();
6741
0
                sInfo.eTargetFieldType = eOGRType;
6742
0
            }
6743
0
            asFieldInfo.emplace_back(std::move(sInfo));
6744
0
        }
6745
0
        else
6746
0
        {
6747
0
            const std::string osNewPrefix(osFieldPrefix + fieldName + ".");
6748
0
            for (int64_t i = 0; i < array->n_children; ++i)
6749
0
            {
6750
0
                if (!BuildOGRFieldInfo(
6751
0
                        schema->children[i], array->children[i], poFeatureDefn,
6752
0
                        osNewPrefix, aosNativeTypes, bFallbackTypesUsed,
6753
0
                        asFieldInfo, pszFIDName, pszGeomFieldName, poLayer,
6754
0
                        oMapArrowFieldNameToOGRFieldName, schemaFIDColumn,
6755
0
                        arrayFIDColumn))
6756
0
                {
6757
0
                    return false;
6758
0
                }
6759
0
            }
6760
0
        }
6761
0
        return true;
6762
0
    }
6763
6764
0
    FieldInfo sInfo;
6765
6766
0
    if (schema->dictionary &&
6767
0
        !IsKnownCodedFieldDomain(poLayer, schema->metadata))
6768
0
    {
6769
0
        if (!IsValidDictionaryIndexType(format))
6770
0
        {
6771
0
            CPLError(CE_Failure, CPLE_NotSupported,
6772
0
                     "Dictionary only supported if the parent is of "
6773
0
                     "type [U]Int[8|16|32|64]");
6774
0
            return false;
6775
0
        }
6776
6777
0
        sInfo.bUseDictionary = true;
6778
0
        schema = schema->dictionary;
6779
0
        format = schema->format;
6780
0
        array = array->dictionary;
6781
0
    }
6782
6783
0
    sInfo.osName = osFieldPrefix + fieldName;
6784
0
    sInfo.format = format;
6785
0
    if (pszFIDName && sInfo.osName == pszFIDName)
6786
0
    {
6787
0
        if (IsInt32(format) || IsInt64(format))
6788
0
        {
6789
0
            sInfo.iOGRFieldIdx = FID_COLUMN_SPECIAL_OGR_FIELD_IDX;
6790
0
            schemaFIDColumn = schema;
6791
0
            arrayFIDColumn = array;
6792
0
        }
6793
0
        else
6794
0
        {
6795
0
            CPLError(CE_Failure, CPLE_AppDefined,
6796
0
                     "FID column '%s' should be of Arrow format 'i' "
6797
0
                     "(int32) or 'l' (int64)",
6798
0
                     sInfo.osName.c_str());
6799
0
            return false;
6800
0
        }
6801
0
    }
6802
0
    else
6803
0
    {
6804
0
        const std::string &osExpectedOGRFieldName =
6805
0
            [&oMapArrowFieldNameToOGRFieldName, &sInfo]() -> const std::string &
6806
0
        {
6807
0
            const auto oIter =
6808
0
                oMapArrowFieldNameToOGRFieldName.find(sInfo.osName);
6809
0
            if (oIter != oMapArrowFieldNameToOGRFieldName.end())
6810
0
                return oIter->second;
6811
0
            return sInfo.osName;
6812
0
        }();
6813
0
        sInfo.iOGRFieldIdx =
6814
0
            poFeatureDefn->GetFieldIndex(osExpectedOGRFieldName.c_str());
6815
0
        if (sInfo.iOGRFieldIdx >= 0)
6816
0
        {
6817
0
            bool bTypeOK = false;
6818
0
            const auto eOGRType =
6819
0
                poFeatureDefn->GetFieldDefn(sInfo.iOGRFieldIdx)->GetType();
6820
0
            sInfo.eTargetFieldType = eOGRType;
6821
0
            for (const auto &sType : gasArrowTypesToOGR)
6822
0
            {
6823
0
                if (strcmp(format, sType.arrowType) == 0)
6824
0
                {
6825
0
                    sInfo.bUseStringOptim = sType.eType == OFTString;
6826
0
                    sInfo.eNominalFieldType = sType.eType;
6827
0
                    if (eOGRType == sInfo.eNominalFieldType)
6828
0
                    {
6829
0
                        bTypeOK = true;
6830
0
                        break;
6831
0
                    }
6832
0
                    else if (eOGRType == OFTString)
6833
0
                    {
6834
0
                        bFallbackTypesUsed = true;
6835
0
                        bTypeOK = true;
6836
0
                        break;
6837
0
                    }
6838
0
                    else if (eOGRType == OFTInteger &&
6839
0
                             sType.eType == OFTInteger64)
6840
0
                    {
6841
                        // Potentially lossy.
6842
0
                        CPLDebug("OGR",
6843
0
                                 "For field %s, writing from Arrow array of "
6844
0
                                 "type Int64 into OGR Int32 field. "
6845
0
                                 "Potentially loss conversion can happen",
6846
0
                                 sInfo.osName.c_str());
6847
0
                        bFallbackTypesUsed = true;
6848
0
                        bTypeOK = true;
6849
0
                        break;
6850
0
                    }
6851
0
                    else if (eOGRType == OFTInteger && sType.eType == OFTReal)
6852
0
                    {
6853
                        // Potentially lossy.
6854
0
                        CPLDebug("OGR",
6855
0
                                 "For field %s, writing from Arrow array of "
6856
0
                                 "type Real into OGR Int32 field. "
6857
0
                                 "Potentially loss conversion can happen",
6858
0
                                 sInfo.osName.c_str());
6859
0
                        bFallbackTypesUsed = true;
6860
0
                        bTypeOK = true;
6861
0
                        break;
6862
0
                    }
6863
0
                    else if (eOGRType == OFTInteger64 && sType.eType == OFTReal)
6864
0
                    {
6865
                        // Potentially lossy.
6866
0
                        CPLDebug("OGR",
6867
0
                                 "For field %s, writing from Arrow array of "
6868
0
                                 "type Real into OGR Int64 field. "
6869
0
                                 "Potentially loss conversion can happen",
6870
0
                                 sInfo.osName.c_str());
6871
0
                        bFallbackTypesUsed = true;
6872
0
                        bTypeOK = true;
6873
0
                        break;
6874
0
                    }
6875
0
                    else if (eOGRType == OFTReal && sType.eType == OFTInteger64)
6876
0
                    {
6877
                        // Potentially lossy.
6878
0
                        CPLDebug("OGR",
6879
0
                                 "For field %s, writing from Arrow array of "
6880
0
                                 "type Int64 into OGR Real field. "
6881
0
                                 "Potentially loss conversion can happen",
6882
0
                                 sInfo.osName.c_str());
6883
0
                        bFallbackTypesUsed = true;
6884
0
                        bTypeOK = true;
6885
0
                        break;
6886
0
                    }
6887
0
                    else if ((eOGRType == OFTInteger64 ||
6888
0
                              eOGRType == OFTReal) &&
6889
0
                             sType.eType == OFTInteger)
6890
0
                    {
6891
                        // Non-lossy
6892
0
                        bFallbackTypesUsed = true;
6893
0
                        bTypeOK = true;
6894
0
                        break;
6895
0
                    }
6896
0
                    else if (eOGRType == OFTDateTime &&
6897
0
                             sType.eType == OFTString)
6898
0
                    {
6899
0
                        bFallbackTypesUsed = true;
6900
0
                        bTypeOK = true;
6901
0
                        break;
6902
0
                    }
6903
0
                    else
6904
0
                    {
6905
0
                        CPLError(CE_Failure, CPLE_AppDefined,
6906
0
                                 "For field %s, OGR field type is %s whereas "
6907
0
                                 "Arrow type implies %s",
6908
0
                                 sInfo.osName.c_str(),
6909
0
                                 OGR_GetFieldTypeName(eOGRType),
6910
0
                                 OGR_GetFieldTypeName(sType.eType));
6911
0
                        return false;
6912
0
                    }
6913
0
                }
6914
0
            }
6915
6916
0
            if (!bTypeOK && IsMap(format))
6917
0
            {
6918
0
                sInfo.eNominalFieldType = OFTString;
6919
0
                if (eOGRType == sInfo.eNominalFieldType)
6920
0
                {
6921
0
                    bTypeOK = true;
6922
0
                }
6923
0
                else
6924
0
                {
6925
0
                    CPLError(CE_Failure, CPLE_AppDefined,
6926
0
                             "For field %s, OGR field type is %s whereas "
6927
0
                             "Arrow type implies %s",
6928
0
                             sInfo.osName.c_str(),
6929
0
                             OGR_GetFieldTypeName(eOGRType),
6930
0
                             OGR_GetFieldTypeName(OFTString));
6931
0
                    return false;
6932
0
                }
6933
0
            }
6934
6935
0
            if (!bTypeOK && IsTimestamp(format))
6936
0
            {
6937
0
                sInfo.eNominalFieldType = OFTDateTime;
6938
0
                if (eOGRType == sInfo.eNominalFieldType)
6939
0
                {
6940
0
                    bTypeOK = true;
6941
0
                }
6942
0
                else if (eOGRType == OFTString)
6943
0
                {
6944
0
                    bFallbackTypesUsed = true;
6945
0
                    bTypeOK = true;
6946
0
                }
6947
0
                else
6948
0
                {
6949
0
                    CPLError(CE_Failure, CPLE_AppDefined,
6950
0
                             "For field %s, OGR field type is %s whereas "
6951
0
                             "Arrow type implies %s",
6952
0
                             sInfo.osName.c_str(),
6953
0
                             OGR_GetFieldTypeName(eOGRType),
6954
0
                             OGR_GetFieldTypeName(OFTDateTime));
6955
0
                    return false;
6956
0
                }
6957
0
            }
6958
6959
0
            if (!bTypeOK && IsFixedWidthBinary(format))
6960
0
            {
6961
0
                sInfo.eNominalFieldType = OFTBinary;
6962
0
                if (eOGRType == sInfo.eNominalFieldType)
6963
0
                {
6964
0
                    bTypeOK = true;
6965
0
                }
6966
0
                else if (eOGRType == OFTString)
6967
0
                {
6968
0
                    bFallbackTypesUsed = true;
6969
0
                    bTypeOK = true;
6970
0
                }
6971
0
                else
6972
0
                {
6973
0
                    CPLError(CE_Failure, CPLE_AppDefined,
6974
0
                             "For field %s, OGR field type is %s whereas "
6975
0
                             "Arrow type implies %s",
6976
0
                             sInfo.osName.c_str(),
6977
0
                             OGR_GetFieldTypeName(eOGRType),
6978
0
                             OGR_GetFieldTypeName(OFTBinary));
6979
0
                    return false;
6980
0
                }
6981
0
            }
6982
6983
0
            if (!bTypeOK && (IsList(format) || IsLargeList(format) ||
6984
0
                             IsFixedSizeList(format)))
6985
0
            {
6986
0
                const char *childFormat = schema->children[0]->format;
6987
0
                for (const auto &sType : gasListTypes)
6988
0
                {
6989
0
                    if (childFormat[0] == sType.arrowLetter &&
6990
0
                        childFormat[1] == 0)
6991
0
                    {
6992
0
                        sInfo.eNominalFieldType = sType.eType;
6993
0
                        if (eOGRType == sInfo.eNominalFieldType)
6994
0
                        {
6995
0
                            bTypeOK = true;
6996
0
                            break;
6997
0
                        }
6998
0
                        else if (eOGRType == OFTString)
6999
0
                        {
7000
0
                            bFallbackTypesUsed = true;
7001
0
                            bTypeOK = true;
7002
0
                            break;
7003
0
                        }
7004
0
                        else
7005
0
                        {
7006
0
                            CPLError(CE_Failure, CPLE_AppDefined,
7007
0
                                     "For field %s, OGR field type is %s "
7008
0
                                     "whereas "
7009
0
                                     "Arrow type implies %s",
7010
0
                                     sInfo.osName.c_str(),
7011
0
                                     OGR_GetFieldTypeName(eOGRType),
7012
0
                                     OGR_GetFieldTypeName(sType.eType));
7013
0
                            return false;
7014
0
                        }
7015
0
                    }
7016
0
                }
7017
7018
0
                if (!bTypeOK && IsDecimal(childFormat))
7019
0
                {
7020
0
                    if (!ParseDecimalFormat(childFormat, sInfo.nPrecision,
7021
0
                                            sInfo.nScale, sInfo.nWidthInBytes))
7022
0
                    {
7023
0
                        CPLError(CE_Failure, CPLE_AppDefined, "%s",
7024
0
                                 (std::string("Invalid field format ") +
7025
0
                                  childFormat + " for field " + osFieldPrefix +
7026
0
                                  fieldName)
7027
0
                                     .c_str());
7028
0
                        return false;
7029
0
                    }
7030
7031
0
                    const char *pszError = GetErrorIfUnsupportedDecimal(
7032
0
                        sInfo.nWidthInBytes, sInfo.nPrecision);
7033
0
                    if (pszError)
7034
0
                    {
7035
0
                        CPLError(CE_Failure, CPLE_NotSupported, "%s", pszError);
7036
0
                        return false;
7037
0
                    }
7038
7039
0
                    sInfo.eNominalFieldType = OFTRealList;
7040
0
                    if (eOGRType == sInfo.eNominalFieldType)
7041
0
                    {
7042
0
                        bTypeOK = true;
7043
0
                    }
7044
0
                    else if (eOGRType == OFTString)
7045
0
                    {
7046
0
                        bFallbackTypesUsed = true;
7047
0
                        bTypeOK = true;
7048
0
                    }
7049
0
                    else
7050
0
                    {
7051
0
                        CPLError(CE_Failure, CPLE_AppDefined,
7052
0
                                 "For field %s, OGR field type is %s whereas "
7053
0
                                 "Arrow type implies %s",
7054
0
                                 sInfo.osName.c_str(),
7055
0
                                 OGR_GetFieldTypeName(eOGRType),
7056
0
                                 OGR_GetFieldTypeName(OFTRealList));
7057
0
                        return false;
7058
0
                    }
7059
0
                }
7060
7061
0
                if (!bTypeOK && IsSupportForJSONObj(schema->children[0]))
7062
0
                {
7063
0
                    sInfo.eNominalFieldType = OFTString;
7064
0
                    if (eOGRType == sInfo.eNominalFieldType)
7065
0
                    {
7066
0
                        bTypeOK = true;
7067
0
                    }
7068
0
                    else
7069
0
                    {
7070
0
                        CPLError(CE_Failure, CPLE_AppDefined,
7071
0
                                 "For field %s, OGR field type is %s whereas "
7072
0
                                 "Arrow type implies %s",
7073
0
                                 sInfo.osName.c_str(),
7074
0
                                 OGR_GetFieldTypeName(eOGRType),
7075
0
                                 OGR_GetFieldTypeName(OFTString));
7076
0
                        return false;
7077
0
                    }
7078
0
                }
7079
7080
0
                if (!bTypeOK)
7081
0
                {
7082
0
                    CPLError(CE_Failure, CPLE_NotSupported, "%s",
7083
0
                             ("List of type '" + std::string(childFormat) +
7084
0
                              "' for field " + osFieldPrefix + fieldName +
7085
0
                              " is not supported.")
7086
0
                                 .c_str());
7087
0
                    return false;
7088
0
                }
7089
0
            }
7090
7091
0
            if (!bTypeOK && IsDecimal(format))
7092
0
            {
7093
0
                if (!ParseDecimalFormat(format, sInfo.nPrecision, sInfo.nScale,
7094
0
                                        sInfo.nWidthInBytes))
7095
0
                {
7096
0
                    CPLError(CE_Failure, CPLE_AppDefined, "%s",
7097
0
                             (std::string("Invalid field format ") + format +
7098
0
                              " for field " + osFieldPrefix + fieldName)
7099
0
                                 .c_str());
7100
0
                    return false;
7101
0
                }
7102
7103
0
                const char *pszError = GetErrorIfUnsupportedDecimal(
7104
0
                    sInfo.nWidthInBytes, sInfo.nPrecision);
7105
0
                if (pszError)
7106
0
                {
7107
0
                    CPLError(CE_Failure, CPLE_NotSupported, "%s", pszError);
7108
0
                    return false;
7109
0
                }
7110
7111
0
                sInfo.eNominalFieldType = OFTReal;
7112
0
                if (eOGRType == sInfo.eNominalFieldType)
7113
0
                {
7114
0
                    bTypeOK = true;
7115
0
                }
7116
0
                else if (eOGRType == OFTString)
7117
0
                {
7118
0
                    bFallbackTypesUsed = true;
7119
0
                    bTypeOK = true;
7120
0
                }
7121
0
                else
7122
0
                {
7123
0
                    CPLError(CE_Failure, CPLE_AppDefined,
7124
0
                             "For field %s, OGR field type is %s whereas "
7125
0
                             "Arrow type implies %s",
7126
0
                             sInfo.osName.c_str(),
7127
0
                             OGR_GetFieldTypeName(eOGRType),
7128
0
                             OGR_GetFieldTypeName(OFTReal));
7129
0
                    return false;
7130
0
                }
7131
0
            }
7132
7133
0
            if (!bTypeOK)
7134
0
            {
7135
0
                CPLError(CE_Failure, CPLE_NotSupported, "%s",
7136
0
                         ("Type '" + std::string(format) + "' for field " +
7137
0
                          osFieldPrefix + fieldName + " is not supported.")
7138
0
                             .c_str());
7139
0
                return false;
7140
0
            }
7141
0
        }
7142
0
        else
7143
0
        {
7144
0
            sInfo.iOGRFieldIdx = poFeatureDefn->GetGeomFieldIndex(
7145
0
                osExpectedOGRFieldName.c_str());
7146
0
            if (sInfo.iOGRFieldIdx < 0)
7147
0
            {
7148
0
                if (pszGeomFieldName && pszGeomFieldName == sInfo.osName)
7149
0
                {
7150
0
                    if (poFeatureDefn->GetGeomFieldCount() == 0)
7151
0
                    {
7152
0
                        CPLError(CE_Failure, CPLE_AppDefined,
7153
0
                                 "Cannot find OGR geometry field for Arrow "
7154
0
                                 "array %s",
7155
0
                                 sInfo.osName.c_str());
7156
0
                        return false;
7157
0
                    }
7158
0
                    sInfo.iOGRFieldIdx = 0;
7159
0
                }
7160
0
                else
7161
0
                {
7162
                    // Check if ARROW:extension:name = ogc.wkb or geoarrow.wkb
7163
0
                    const char *pabyMetadata = schema->metadata;
7164
0
                    if (pabyMetadata)
7165
0
                    {
7166
0
                        const auto oMetadata =
7167
0
                            OGRParseArrowMetadata(pabyMetadata);
7168
0
                        auto oIter = oMetadata.find(ARROW_EXTENSION_NAME_KEY);
7169
0
                        if (oIter != oMetadata.end() &&
7170
0
                            (oIter->second == EXTENSION_NAME_OGC_WKB ||
7171
0
                             oIter->second == EXTENSION_NAME_GEOARROW_WKB))
7172
0
                        {
7173
0
                            if (poFeatureDefn->GetGeomFieldCount() == 0)
7174
0
                            {
7175
0
                                CPLError(CE_Failure, CPLE_AppDefined,
7176
0
                                         "Cannot find OGR geometry field "
7177
0
                                         "for Arrow array %s",
7178
0
                                         sInfo.osName.c_str());
7179
0
                                return false;
7180
0
                            }
7181
0
                            sInfo.iOGRFieldIdx = 0;
7182
0
                        }
7183
0
                    }
7184
0
                }
7185
7186
0
                if (sInfo.iOGRFieldIdx < 0)
7187
0
                {
7188
0
                    CPLError(CE_Failure, CPLE_AppDefined,
7189
0
                             "Cannot find OGR field for Arrow array %s",
7190
0
                             sInfo.osName.c_str());
7191
0
                    return false;
7192
0
                }
7193
0
            }
7194
7195
0
            if (!IsBinary(format) && !IsLargeBinary(format))
7196
0
            {
7197
0
                CPLError(CE_Failure, CPLE_AppDefined,
7198
0
                         "Geometry column '%s' should be of Arrow format "
7199
0
                         "'z' (binary) or 'Z' (large binary)",
7200
0
                         sInfo.osName.c_str());
7201
0
                return false;
7202
0
            }
7203
0
            sInfo.bIsGeomCol = true;
7204
0
        }
7205
0
    }
7206
7207
0
    asFieldInfo.emplace_back(std::move(sInfo));
7208
0
    return true;
7209
0
}
7210
7211
/************************************************************************/
7212
/*                           GetUInt64Value()                           */
7213
/************************************************************************/
7214
7215
static inline uint64_t GetUInt64Value(const struct ArrowSchema *schema,
7216
                                      const struct ArrowArray *array,
7217
                                      size_t iFeature)
7218
0
{
7219
0
    uint64_t nVal = 0;
7220
0
    CPLAssert(schema->format[1] == 0);
7221
0
    switch (schema->format[0])
7222
0
    {
7223
0
        case ARROW_LETTER_INT8:
7224
0
            nVal = GetValue<int8_t>(array, iFeature);
7225
0
            break;
7226
0
        case ARROW_LETTER_UINT8:
7227
0
            nVal = GetValue<uint8_t>(array, iFeature);
7228
0
            break;
7229
0
        case ARROW_LETTER_INT16:
7230
0
            nVal = GetValue<int16_t>(array, iFeature);
7231
0
            break;
7232
0
        case ARROW_LETTER_UINT16:
7233
0
            nVal = GetValue<uint16_t>(array, iFeature);
7234
0
            break;
7235
0
        case ARROW_LETTER_INT32:
7236
0
            nVal = GetValue<int32_t>(array, iFeature);
7237
0
            break;
7238
0
        case ARROW_LETTER_UINT32:
7239
0
            nVal = GetValue<uint32_t>(array, iFeature);
7240
0
            break;
7241
0
        case ARROW_LETTER_INT64:
7242
0
            nVal = GetValue<int64_t>(array, iFeature);
7243
0
            break;
7244
0
        case ARROW_LETTER_UINT64:
7245
0
            nVal = GetValue<uint64_t>(array, iFeature);
7246
0
            break;
7247
0
        default:
7248
            // Shouldn't happen given checks in BuildOGRFieldInfo()
7249
0
            CPLAssert(false);
7250
0
            break;
7251
0
    }
7252
0
    return nVal;
7253
0
}
7254
7255
/************************************************************************/
7256
/*                        GetWorkingBufferSize()                        */
7257
/************************************************************************/
7258
7259
static size_t GetWorkingBufferSize(const struct ArrowSchema *schema,
7260
                                   const struct ArrowArray *array,
7261
                                   size_t iFeature, int &iArrowIdxInOut,
7262
                                   const std::vector<FieldInfo> &asFieldInfo)
7263
0
{
7264
0
    const char *fieldName = schema->name;
7265
0
    const char *format = schema->format;
7266
0
    const int iArrowIdx = iArrowIdxInOut;
7267
0
    if (IsStructure(format))
7268
0
    {
7269
0
        if (asFieldInfo[iArrowIdx].eNominalFieldType == OFTDateTime)
7270
0
        {
7271
0
            ++iArrowIdxInOut;
7272
0
            return 0;
7273
0
        }
7274
7275
0
        size_t nRet = 0;
7276
0
        for (int64_t i = 0; i < array->n_children; ++i)
7277
0
        {
7278
0
            nRet += GetWorkingBufferSize(
7279
0
                schema->children[i], array->children[i],
7280
0
                iFeature + static_cast<size_t>(array->offset), iArrowIdxInOut,
7281
0
                asFieldInfo);
7282
0
        }
7283
0
        return nRet;
7284
0
    }
7285
0
    ++iArrowIdxInOut;
7286
7287
0
    if (!asFieldInfo[iArrowIdx].bUseStringOptim)
7288
0
        return 0;
7289
7290
0
    const uint8_t *pabyValidity =
7291
0
        static_cast<const uint8_t *>(array->buffers[0]);
7292
0
    if (array->null_count != 0 && pabyValidity &&
7293
0
        !TestBit(pabyValidity, static_cast<size_t>(iFeature + array->offset)))
7294
0
    {
7295
        // empty string
7296
0
        return 0;
7297
0
    }
7298
7299
0
    if (asFieldInfo[iArrowIdx].bUseDictionary)
7300
0
    {
7301
0
        const uint64_t nDictIdx = GetUInt64Value(schema, array, iFeature);
7302
0
        const auto dictArray = array->dictionary;
7303
0
        if (nDictIdx >= static_cast<uint64_t>(dictArray->length))
7304
0
        {
7305
0
            CPLError(CE_Failure, CPLE_AppDefined,
7306
0
                     "Feature %" PRIu64
7307
0
                     ", field %s: invalid dictionary index: %" PRIu64,
7308
0
                     static_cast<uint64_t>(iFeature), fieldName, nDictIdx);
7309
0
            return 0;
7310
0
        }
7311
7312
0
        array = dictArray;
7313
0
        schema = schema->dictionary;
7314
0
        format = schema->format;
7315
0
        iFeature = static_cast<size_t>(nDictIdx);
7316
0
    }
7317
7318
0
    if (IsString(format))
7319
0
    {
7320
0
        const auto *panOffsets =
7321
0
            static_cast<const uint32_t *>(array->buffers[1]) + array->offset;
7322
0
        return 1 + (panOffsets[iFeature + 1] - panOffsets[iFeature]);
7323
0
    }
7324
0
    else if (IsLargeString(format))
7325
0
    {
7326
0
        const auto *panOffsets =
7327
0
            static_cast<const uint64_t *>(array->buffers[1]) + array->offset;
7328
0
        return 1 + static_cast<size_t>(panOffsets[iFeature + 1] -
7329
0
                                       panOffsets[iFeature]);
7330
0
    }
7331
0
    return 0;
7332
0
}
7333
7334
/************************************************************************/
7335
/*                             FillField()                              */
7336
/************************************************************************/
7337
7338
template <typename ArrowType, typename OGRType = ArrowType>
7339
inline static void FillField(const struct ArrowArray *array, int iOGRFieldIdx,
7340
                             size_t iFeature, OGRFeature &oFeature)
7341
0
{
7342
0
    const auto *panValues = static_cast<const ArrowType *>(array->buffers[1]);
7343
0
    oFeature.SetFieldSameTypeUnsafe(
7344
0
        iOGRFieldIdx,
7345
0
        static_cast<OGRType>(panValues[iFeature + array->offset]));
7346
0
}
Unexecuted instantiation: ogrlayerarrow.cpp:void FillField<signed char, signed char>(ArrowArray const*, int, unsigned long, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillField<unsigned char, unsigned char>(ArrowArray const*, int, unsigned long, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillField<short, short>(ArrowArray const*, int, unsigned long, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillField<unsigned short, unsigned short>(ArrowArray const*, int, unsigned long, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillField<int, int>(ArrowArray const*, int, unsigned long, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillField<unsigned int, long long>(ArrowArray const*, int, unsigned long, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillField<long, long long>(ArrowArray const*, int, unsigned long, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillField<unsigned long, double>(ArrowArray const*, int, unsigned long, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillField<float, double>(ArrowArray const*, int, unsigned long, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillField<double, double>(ArrowArray const*, int, unsigned long, OGRFeature&)
7347
7348
/************************************************************************/
7349
/*                          FillFieldString()                           */
7350
/************************************************************************/
7351
7352
template <typename OffsetType>
7353
inline static void
7354
FillFieldString(const struct ArrowArray *array, int iOGRFieldIdx,
7355
                size_t iFeature, int iArrowIdx,
7356
                const std::vector<FieldInfo> &asFieldInfo,
7357
                std::string &osWorkingBuffer, OGRFeature &oFeature)
7358
0
{
7359
0
    const auto *panOffsets =
7360
0
        static_cast<const OffsetType *>(array->buffers[1]) + array->offset;
7361
0
    const char *pszStr = static_cast<const char *>(array->buffers[2]);
7362
0
    const size_t nLen =
7363
0
        static_cast<size_t>(panOffsets[iFeature + 1] - panOffsets[iFeature]);
7364
0
    if (asFieldInfo[iArrowIdx].bUseStringOptim)
7365
0
    {
7366
0
        oFeature.SetFieldSameTypeUnsafe(
7367
0
            iOGRFieldIdx, &osWorkingBuffer[0] + osWorkingBuffer.size());
7368
0
        osWorkingBuffer.append(pszStr + panOffsets[iFeature], nLen);
7369
0
        osWorkingBuffer.push_back(0);  // append null character
7370
0
    }
7371
0
    else
7372
0
    {
7373
0
        const std::string osTmp(pszStr, nLen);
7374
0
        oFeature.SetField(iOGRFieldIdx, osTmp.c_str());
7375
0
    }
7376
0
}
Unexecuted instantiation: ogrlayerarrow.cpp:void FillFieldString<unsigned int>(ArrowArray const*, int, unsigned long, int, std::__1::vector<FieldInfo, std::__1::allocator<FieldInfo> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:void FillFieldString<unsigned long>(ArrowArray const*, int, unsigned long, int, std::__1::vector<FieldInfo, std::__1::allocator<FieldInfo> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&, OGRFeature&)
7377
7378
/************************************************************************/
7379
/*                          FillFieldBinary()                           */
7380
/************************************************************************/
7381
7382
template <typename OffsetType>
7383
inline static bool
7384
FillFieldBinary(const struct ArrowArray *array, int iOGRFieldIdx,
7385
                size_t iFeature, int iArrowIdx,
7386
                const std::vector<FieldInfo> &asFieldInfo,
7387
                const std::string &osFieldPrefix, const char *pszFieldName,
7388
                OGRFeature &oFeature)
7389
0
{
7390
0
    const auto *panOffsets =
7391
0
        static_cast<const OffsetType *>(array->buffers[1]) + array->offset;
7392
0
    const GByte *pabyData = static_cast<const GByte *>(array->buffers[2]) +
7393
0
                            static_cast<size_t>(panOffsets[iFeature]);
7394
0
    const size_t nLen =
7395
0
        static_cast<size_t>(panOffsets[iFeature + 1] - panOffsets[iFeature]);
7396
0
    if (asFieldInfo[iArrowIdx].bIsGeomCol)
7397
0
    {
7398
0
        size_t nBytesConsumedOut = 0;
7399
7400
        // Check if we can reuse the existing geometry, to save dynamic memory
7401
        // allocations.
7402
0
        if (nLen >= 5 && pabyData[0] == wkbNDR && pabyData[1] <= wkbTriangle &&
7403
0
            pabyData[2] == 0 && pabyData[3] == 0 && pabyData[4] == 0)
7404
0
        {
7405
0
            const auto poExistingGeom = oFeature.GetGeomFieldRef(iOGRFieldIdx);
7406
0
            if (poExistingGeom &&
7407
0
                poExistingGeom->getGeometryType() == pabyData[1])
7408
0
            {
7409
0
                poExistingGeom->importFromWkb(pabyData, nLen, wkbVariantIso,
7410
0
                                              nBytesConsumedOut);
7411
0
                return true;
7412
0
            }
7413
0
        }
7414
7415
0
        OGRGeometry *poGeometry = nullptr;
7416
0
        OGRGeometryFactory::createFromWkb(pabyData, nullptr, &poGeometry, nLen,
7417
0
                                          wkbVariantIso, nBytesConsumedOut);
7418
0
        oFeature.SetGeomFieldDirectly(iOGRFieldIdx, poGeometry);
7419
0
    }
7420
0
    else
7421
0
    {
7422
0
        if (nLen > static_cast<size_t>(std::numeric_limits<int>::max()))
7423
0
        {
7424
0
            CPLError(CE_Failure, CPLE_NotSupported,
7425
0
                     "Content for field %s%s is too large",
7426
0
                     osFieldPrefix.c_str(), pszFieldName);
7427
0
            return false;
7428
0
        }
7429
0
        oFeature.SetField(iOGRFieldIdx, static_cast<int>(nLen), pabyData);
7430
0
    }
7431
0
    return true;
7432
0
}
Unexecuted instantiation: ogrlayerarrow.cpp:bool FillFieldBinary<unsigned int>(ArrowArray const*, int, unsigned long, int, std::__1::vector<FieldInfo, std::__1::allocator<FieldInfo> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, char const*, OGRFeature&)
Unexecuted instantiation: ogrlayerarrow.cpp:bool FillFieldBinary<unsigned long>(ArrowArray const*, int, unsigned long, int, std::__1::vector<FieldInfo, std::__1::allocator<FieldInfo> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, char const*, OGRFeature&)
7433
7434
/************************************************************************/
7435
/*                            FillFeature()                             */
7436
/************************************************************************/
7437
7438
static bool FillFeature(OGRLayer *poLayer, const struct ArrowSchema *schema,
7439
                        const struct ArrowArray *array,
7440
                        const std::string &osFieldPrefix, size_t iFeature,
7441
                        int &iArrowIdxInOut,
7442
                        const std::vector<FieldInfo> &asFieldInfo,
7443
                        OGRFeature &oFeature, std::string &osWorkingBuffer)
7444
7445
0
{
7446
0
    const char *fieldName = schema->name;
7447
0
    const char *format = schema->format;
7448
0
    const int iArrowIdx = iArrowIdxInOut;
7449
0
    if (IsStructure(format))
7450
0
    {
7451
0
        if (asFieldInfo[iArrowIdx].eNominalFieldType == OFTDateTime)
7452
0
        {
7453
0
            ++iArrowIdxInOut;
7454
0
            const int iOGRFieldIdx = asFieldInfo[iArrowIdx].iOGRFieldIdx;
7455
7456
0
            if (array->null_count != 0)
7457
0
            {
7458
0
                const uint8_t *pabyValidity =
7459
0
                    static_cast<const uint8_t *>(array->buffers[0]);
7460
0
                if (pabyValidity &&
7461
0
                    !TestBit(pabyValidity,
7462
0
                             static_cast<size_t>(iFeature + array->offset)))
7463
0
                {
7464
0
                    OGRField *psField = oFeature.GetRawFieldRef(iOGRFieldIdx);
7465
0
                    OGR_RawField_SetNull(psField);
7466
0
                }
7467
0
            }
7468
7469
0
            const auto *panTimestamps =
7470
0
                static_cast<const int64_t *>(array->children[0]->buffers[1]);
7471
0
            int64_t nTimestamp = panTimestamps[iFeature + array->offset];
7472
0
            const auto *panOffsetsMinutes =
7473
0
                static_cast<const int16_t *>(array->children[1]->buffers[1]);
7474
0
            const int nOffsetMinute =
7475
0
                panOffsetsMinutes[iFeature + array->offset];
7476
0
            const int nTZFlag =
7477
0
                nOffsetMinute >= -14 * 60 && nOffsetMinute <= 14 * 60
7478
0
                    ? OGR_TZFLAG_UTC + nOffsetMinute / 15
7479
0
                    : OGR_TZFLAG_UTC;
7480
7481
0
            const char *formatTS = schema->children[0]->format;
7482
0
            const int nInvFactorToSecond =
7483
0
                IsTimestampSeconds(formatTS)        ? 1
7484
0
                : IsTimestampMilliseconds(formatTS) ? 1000
7485
0
                : IsTimestampMicroseconds(formatTS) ? 1000 * 1000
7486
0
                : IsTimestampNanoseconds(formatTS)  ? 1000 * 1000 * 1000
7487
0
                                                    : 1;
7488
0
            double floatingPart = 0;
7489
0
            if (nInvFactorToSecond)
7490
0
            {
7491
0
                floatingPart = (nTimestamp % nInvFactorToSecond) /
7492
0
                               double(nInvFactorToSecond);
7493
0
                nTimestamp /= nInvFactorToSecond;
7494
0
            }
7495
0
            nTimestamp += (nTZFlag - OGR_TZFLAG_UTC) * 15 * 60;
7496
0
            struct tm dt;
7497
0
            CPLUnixTimeToYMDHMS(nTimestamp, &dt);
7498
0
            oFeature.SetField(iOGRFieldIdx, dt.tm_year + 1900, dt.tm_mon + 1,
7499
0
                              dt.tm_mday, dt.tm_hour, dt.tm_min,
7500
0
                              static_cast<float>(dt.tm_sec + floatingPart),
7501
0
                              nTZFlag);
7502
0
        }
7503
0
        else
7504
0
        {
7505
0
            const std::string osNewPrefix(osFieldPrefix + fieldName + ".");
7506
0
            for (int64_t i = 0; i < array->n_children; ++i)
7507
0
            {
7508
0
                if (!FillFeature(poLayer, schema->children[i],
7509
0
                                 array->children[i], osNewPrefix,
7510
0
                                 iFeature + static_cast<size_t>(array->offset),
7511
0
                                 iArrowIdxInOut, asFieldInfo, oFeature,
7512
0
                                 osWorkingBuffer))
7513
0
                    return false;
7514
0
            }
7515
0
        }
7516
0
        return true;
7517
0
    }
7518
0
    ++iArrowIdxInOut;
7519
0
    const int iOGRFieldIdx = asFieldInfo[iArrowIdx].iOGRFieldIdx;
7520
7521
0
    if (asFieldInfo[iArrowIdx].bUseDictionary)
7522
0
    {
7523
0
        format = schema->dictionary->format;
7524
0
    }
7525
7526
0
    if (array->null_count != 0)
7527
0
    {
7528
0
        const uint8_t *pabyValidity =
7529
0
            static_cast<const uint8_t *>(array->buffers[0]);
7530
0
        if (pabyValidity &&
7531
0
            !TestBit(pabyValidity,
7532
0
                     static_cast<size_t>(iFeature + array->offset)))
7533
0
        {
7534
0
            if (iOGRFieldIdx == FID_COLUMN_SPECIAL_OGR_FIELD_IDX)
7535
0
                oFeature.SetFID(OGRNullFID);
7536
0
            else if (asFieldInfo[iArrowIdx].bIsGeomCol)
7537
0
                oFeature.SetGeomFieldDirectly(iOGRFieldIdx, nullptr);
7538
0
            else if (asFieldInfo[iArrowIdx].eSetFeatureFieldType == OFTString)
7539
0
            {
7540
0
                OGRField *psField = oFeature.GetRawFieldRef(iOGRFieldIdx);
7541
0
                if (!asFieldInfo[iArrowIdx].bUseStringOptim)
7542
0
                {
7543
0
                    if (IsValidField(psField))
7544
0
                    {
7545
0
                        CPLFree(psField->String);
7546
0
                        OGR_RawField_SetNull(psField);
7547
0
                    }
7548
0
                }
7549
0
                else
7550
0
                {
7551
0
                    OGR_RawField_SetNull(psField);
7552
0
                }
7553
0
            }
7554
0
            else
7555
0
            {
7556
0
                OGRField *psField = oFeature.GetRawFieldRef(iOGRFieldIdx);
7557
0
                switch (asFieldInfo[iArrowIdx].eSetFeatureFieldType)
7558
0
                {
7559
0
                    case OFTRealList:
7560
0
                    case OFTIntegerList:
7561
0
                    case OFTInteger64List:
7562
0
                        if (IsValidField(psField))
7563
0
                            CPLFree(psField->IntegerList.paList);
7564
0
                        break;
7565
7566
0
                    case OFTStringList:
7567
0
                        if (IsValidField(psField))
7568
0
                            CSLDestroy(psField->StringList.paList);
7569
0
                        break;
7570
7571
0
                    case OFTBinary:
7572
0
                        if (IsValidField(psField))
7573
0
                            CPLFree(psField->Binary.paData);
7574
0
                        break;
7575
7576
0
                    default:
7577
0
                        break;
7578
0
                }
7579
0
                OGR_RawField_SetNull(psField);
7580
0
            }
7581
0
            return true;
7582
0
        }
7583
0
    }
7584
7585
0
    if (asFieldInfo[iArrowIdx].bUseDictionary)
7586
0
    {
7587
0
        const uint64_t nDictIdx = GetUInt64Value(schema, array, iFeature);
7588
0
        auto dictArray = array->dictionary;
7589
0
        if (nDictIdx >= static_cast<uint64_t>(dictArray->length))
7590
0
        {
7591
0
            CPLError(CE_Failure, CPLE_AppDefined,
7592
0
                     "Feature %" PRIu64
7593
0
                     ", field %s: invalid dictionary index: %" PRIu64,
7594
0
                     static_cast<uint64_t>(iFeature),
7595
0
                     (osFieldPrefix + fieldName).c_str(), nDictIdx);
7596
0
            return false;
7597
0
        }
7598
0
        array = dictArray;
7599
0
        schema = schema->dictionary;
7600
0
        iFeature = static_cast<size_t>(nDictIdx);
7601
0
    }
7602
7603
0
    if (IsBoolean(format))
7604
0
    {
7605
0
        const uint8_t *pabyValues =
7606
0
            static_cast<const uint8_t *>(array->buffers[1]);
7607
0
        oFeature.SetFieldSameTypeUnsafe(
7608
0
            iOGRFieldIdx,
7609
0
            TestBit(pabyValues, static_cast<size_t>(iFeature + array->offset))
7610
0
                ? 1
7611
0
                : 0);
7612
0
        return true;
7613
0
    }
7614
0
    else if (IsInt8(format))
7615
0
    {
7616
0
        FillField<int8_t>(array, iOGRFieldIdx, iFeature, oFeature);
7617
0
        return true;
7618
0
    }
7619
0
    else if (IsUInt8(format))
7620
0
    {
7621
0
        FillField<uint8_t>(array, iOGRFieldIdx, iFeature, oFeature);
7622
0
        return true;
7623
0
    }
7624
0
    else if (IsInt16(format))
7625
0
    {
7626
0
        FillField<int16_t>(array, iOGRFieldIdx, iFeature, oFeature);
7627
0
        return true;
7628
0
    }
7629
0
    else if (IsUInt16(format))
7630
0
    {
7631
0
        FillField<uint16_t>(array, iOGRFieldIdx, iFeature, oFeature);
7632
0
        return true;
7633
0
    }
7634
0
    else if (IsInt32(format))
7635
0
    {
7636
0
        if (iOGRFieldIdx == FID_COLUMN_SPECIAL_OGR_FIELD_IDX)
7637
0
        {
7638
0
            const auto *panValues =
7639
0
                static_cast<const int32_t *>(array->buffers[1]);
7640
0
            oFeature.SetFID(panValues[iFeature + array->offset]);
7641
0
        }
7642
0
        else
7643
0
        {
7644
0
            FillField<int32_t>(array, iOGRFieldIdx, iFeature, oFeature);
7645
0
        }
7646
0
        return true;
7647
0
    }
7648
0
    else if (IsUInt32(format))
7649
0
    {
7650
0
        FillField<uint32_t, GIntBig>(array, iOGRFieldIdx, iFeature, oFeature);
7651
0
        return true;
7652
0
    }
7653
0
    else if (IsInt64(format))
7654
0
    {
7655
0
        if (iOGRFieldIdx == FID_COLUMN_SPECIAL_OGR_FIELD_IDX)
7656
0
        {
7657
0
            const auto *panValues =
7658
0
                static_cast<const int64_t *>(array->buffers[1]);
7659
0
            oFeature.SetFID(panValues[iFeature + array->offset]);
7660
0
        }
7661
0
        else
7662
0
        {
7663
0
            FillField<int64_t, GIntBig>(array, iOGRFieldIdx, iFeature,
7664
0
                                        oFeature);
7665
0
        }
7666
0
        return true;
7667
0
    }
7668
0
    else if (IsUInt64(format))
7669
0
    {
7670
0
        FillField<uint64_t, double>(array, iOGRFieldIdx, iFeature, oFeature);
7671
0
        return true;
7672
0
    }
7673
0
    else if (IsFloat32(format))
7674
0
    {
7675
0
        FillField<float, double>(array, iOGRFieldIdx, iFeature, oFeature);
7676
0
        return true;
7677
0
    }
7678
0
    else if (IsFloat64(format))
7679
0
    {
7680
0
        FillField<double>(array, iOGRFieldIdx, iFeature, oFeature);
7681
0
        return true;
7682
0
    }
7683
0
    else if (IsString(format))
7684
0
    {
7685
0
        FillFieldString<uint32_t>(array, iOGRFieldIdx, iFeature, iArrowIdx,
7686
0
                                  asFieldInfo, osWorkingBuffer, oFeature);
7687
0
        return true;
7688
0
    }
7689
0
    else if (IsLargeString(format))
7690
0
    {
7691
0
        FillFieldString<uint64_t>(array, iOGRFieldIdx, iFeature, iArrowIdx,
7692
0
                                  asFieldInfo, osWorkingBuffer, oFeature);
7693
0
        return true;
7694
0
    }
7695
0
    else if (IsBinary(format))
7696
0
    {
7697
0
        return FillFieldBinary<uint32_t>(array, iOGRFieldIdx, iFeature,
7698
0
                                         iArrowIdx, asFieldInfo, osFieldPrefix,
7699
0
                                         fieldName, oFeature);
7700
0
    }
7701
0
    else if (IsLargeBinary(format))
7702
0
    {
7703
0
        return FillFieldBinary<uint64_t>(array, iOGRFieldIdx, iFeature,
7704
0
                                         iArrowIdx, asFieldInfo, osFieldPrefix,
7705
0
                                         fieldName, oFeature);
7706
0
    }
7707
0
    else if (asFieldInfo[iArrowIdx].nPrecision > 0)
7708
0
    {
7709
        // fits on a int64
7710
0
        CPLAssert(asFieldInfo[iArrowIdx].nPrecision <= 19);
7711
        // either 128 or 256 bits
7712
0
        CPLAssert((asFieldInfo[iArrowIdx].nWidthInBytes % 8) == 0);
7713
0
        const int nWidthIn64BitWord = asFieldInfo[iArrowIdx].nWidthInBytes / 8;
7714
7715
0
        if (IsList(format))
7716
0
        {
7717
0
            const auto panOffsets =
7718
0
                static_cast<const uint32_t *>(array->buffers[1]) +
7719
0
                array->offset;
7720
0
            const auto childArray = array->children[0];
7721
0
            std::vector<double> aValues;
7722
0
            for (auto i = panOffsets[iFeature]; i < panOffsets[iFeature + 1];
7723
0
                 ++i)
7724
0
            {
7725
0
                aValues.push_back(GetValueDecimal(childArray, nWidthIn64BitWord,
7726
0
                                                  asFieldInfo[iArrowIdx].nScale,
7727
0
                                                  i));
7728
0
            }
7729
0
            oFeature.SetField(iOGRFieldIdx, static_cast<int>(aValues.size()),
7730
0
                              aValues.data());
7731
0
            return true;
7732
0
        }
7733
0
        else if (IsLargeList(format))
7734
0
        {
7735
0
            const auto panOffsets =
7736
0
                static_cast<const uint64_t *>(array->buffers[1]) +
7737
0
                array->offset;
7738
0
            const auto childArray = array->children[0];
7739
0
            std::vector<double> aValues;
7740
0
            for (auto i = static_cast<size_t>(panOffsets[iFeature]);
7741
0
                 i < static_cast<size_t>(panOffsets[iFeature + 1]); ++i)
7742
0
            {
7743
0
                aValues.push_back(GetValueDecimal(childArray, nWidthIn64BitWord,
7744
0
                                                  asFieldInfo[iArrowIdx].nScale,
7745
0
                                                  i));
7746
0
            }
7747
0
            oFeature.SetField(iOGRFieldIdx, static_cast<int>(aValues.size()),
7748
0
                              aValues.data());
7749
0
            return true;
7750
0
        }
7751
0
        else if (IsFixedSizeList(format))
7752
0
        {
7753
0
            const int nVals = GetFixedSizeList(format);
7754
0
            const auto childArray = array->children[0];
7755
0
            std::vector<double> aValues;
7756
0
            for (int i = 0; i < nVals; ++i)
7757
0
            {
7758
0
                aValues.push_back(GetValueDecimal(childArray, nWidthIn64BitWord,
7759
0
                                                  asFieldInfo[iArrowIdx].nScale,
7760
0
                                                  iFeature * nVals + i));
7761
0
            }
7762
0
            oFeature.SetField(iOGRFieldIdx, nVals, aValues.data());
7763
0
            return true;
7764
0
        }
7765
7766
0
        CPLAssert(format[0] == ARROW_LETTER_DECIMAL);
7767
7768
0
        oFeature.SetFieldSameTypeUnsafe(
7769
0
            iOGRFieldIdx,
7770
0
            GetValueDecimal(array, nWidthIn64BitWord,
7771
0
                            asFieldInfo[iArrowIdx].nScale, iFeature));
7772
0
        return true;
7773
0
    }
7774
0
    else if (SetFieldForOtherFormats(
7775
0
                 oFeature, iOGRFieldIdx,
7776
0
                 static_cast<size_t>(iFeature + array->offset), schema, array))
7777
0
    {
7778
0
        return true;
7779
0
    }
7780
7781
0
    CPLError(CE_Failure, CPLE_NotSupported, "%s",
7782
0
             ("Type '" + std::string(format) + "' for field " + osFieldPrefix +
7783
0
              fieldName + " is not supported.")
7784
0
                 .c_str());
7785
0
    return false;
7786
0
}
7787
7788
/************************************************************************/
7789
/*                     OGRLayer::WriteArrowBatch()                      */
7790
/************************************************************************/
7791
7792
// clang-format off
7793
/** Writes a batch of rows from an ArrowArray.
7794
 *
7795
 * This is semantically close to calling CreateFeature() with multiple features
7796
 * at once.
7797
 *
7798
 * The ArrowArray must be of type struct (format=+s), and its children generally
7799
 * map to a OGR attribute or geometry field (unless they are struct themselves).
7800
 *
7801
 * Method IsArrowSchemaSupported() can be called to determine if the schema
7802
 * will be supported by WriteArrowBatch().
7803
 *
7804
 * OGR fields for the corresponding children arrays must exist and be of a
7805
 * compatible type. For attribute fields, they should generally be created with
7806
 * CreateFieldFromArrowSchema(). This is strictly required for output drivers
7807
 * Arrow or Parquet, and strongly recommended otherwise. For geometry fields,
7808
 * they should be created either implicitly at CreateLayer() type
7809
 * (if geom_type != wkbNone), or explicitly with CreateGeomField().
7810
 *
7811
 * Starting with GDAL 3.9, some tolerance has been introduced in the base
7812
 * implementation of WriteArrowBatch() for scenarios that involve appending to
7813
 * an already existing output layer when the input Arrow field type and the
7814
 * OGR layer field type are 32/64-bi integers or real number, but do not match
7815
 * exactly, which may cause lossy conversions. The IF_FIELD_NOT_PRESERVED option
7816
 * can be used to control the behavior in case of lossy conversion.
7817
 *
7818
 * Arrays for geometry columns should be of binary or large binary type and
7819
 * contain WKB geometry.
7820
 *
7821
 * Note that the passed array may be set to a released state
7822
 * (array->release==NULL) after this call (not by the base implementation,
7823
 * but in specialized ones such as Parquet or Arrow for example)
7824
 *
7825
 * Supported options of the base implementation are:
7826
 * <ul>
7827
 * <li>FID=name. Name of the FID column in the array. If not provided,
7828
 *     GetFIDColumn() is used to determine it. The special name
7829
 *     OGRLayer::DEFAULT_ARROW_FID_NAME is also recognized if neither FID nor
7830
 *     GetFIDColumn() are set.
7831
 *     The corresponding ArrowArray must be of type int32 (i) or int64 (l).
7832
 *     On input, values of the FID column are used to create the feature.
7833
 *     On output, the values of the FID column may be set with the FID of the
7834
 *     created feature (if the array is not released).
7835
 * </li>
7836
 * <li>IF_FID_NOT_PRESERVED=NOTHING/ERROR/WARNING. Action to perform when the
7837
 *     input FID is not preserved in the output layer. The default is NOTHING.
7838
 *     Setting it to ERROR will cause the function to error out. Setting it
7839
 *     to WARNING will cause the function to emit a warning but continue its
7840
 *     processing.
7841
 * </li>
7842
 * <li>IF_FIELD_NOT_PRESERVED=ERROR/WARNING. (since GDAL 3.9)
7843
 *     Action to perform when the input field value is not preserved in the
7844
 *     output layer.
7845
 *     The default is WARNING, which will cause the function to emit a warning
7846
 *     but continue its processing.
7847
 *     Setting it to ERROR will cause the function to error out if a lossy
7848
 *     conversion is detected.
7849
 * </li>
7850
 * <li>GEOMETRY_NAME=name. Name of the geometry column. If not provided,
7851
 *     GetGeometryColumn() is used. The special name
7852
 *     OGRLayer::DEFAULT_ARROW_GEOMETRY_NAME is also recognized if neither
7853
 *     GEOMETRY_NAME nor GetGeometryColumn() are set.
7854
 *     Geometry columns are also identified if they have
7855
 *     ARROW:extension:name=ogc.wkb as a field metadata.
7856
 *     The corresponding ArrowArray must be of type binary (w) or large
7857
 *     binary (W).
7858
 * </li>
7859
 * </ul>
7860
 *
7861
 * The following example demonstrates how to copy a layer from one format to
7862
 * another one (assuming it has at most a single geometry column):
7863
\code{.py}
7864
    def copy_layer(src_lyr, out_filename, out_format, lcos = {}):
7865
        stream = src_lyr.GetArrowStream()
7866
        schema = stream.GetSchema()
7867
7868
        # If the source layer has a FID column and the output driver supports
7869
        # a FID layer creation option, set it to the source FID column name.
7870
        if src_lyr.GetFIDColumn():
7871
            creationOptions = gdal.GetDriverByName(out_format).GetMetadataItem(
7872
                "DS_LAYER_CREATIONOPTIONLIST"
7873
            )
7874
            if creationOptions and '"FID"' in creationOptions:
7875
                lcos["FID"] = src_lyr.GetFIDColumn()
7876
7877
        with ogr.GetDriverByName(out_format).CreateDataSource(out_filename) as out_ds:
7878
            if src_lyr.GetLayerDefn().GetGeomFieldCount() > 1:
7879
                out_lyr = out_ds.CreateLayer(
7880
                    src_lyr.GetName(), geom_type=ogr.wkbNone, options=lcos
7881
                )
7882
                for i in range(src_lyr.GetLayerDefn().GetGeomFieldCount()):
7883
                    out_lyr.CreateGeomField(src_lyr.GetLayerDefn().GetGeomFieldDefn(i))
7884
            else:
7885
                out_lyr = out_ds.CreateLayer(
7886
                    src_lyr.GetName(),
7887
                    geom_type=src_lyr.GetGeomType(),
7888
                    srs=src_lyr.GetSpatialRef(),
7889
                    options=lcos,
7890
                )
7891
7892
            success, error_msg = out_lyr.IsArrowSchemaSupported(schema)
7893
            assert success, error_msg
7894
7895
            src_geom_field_names = [
7896
                src_lyr.GetLayerDefn().GetGeomFieldDefn(i).GetName()
7897
                for i in range(src_lyr.GetLayerDefn().GetGeomFieldCount())
7898
            ]
7899
            for i in range(schema.GetChildrenCount()):
7900
                # GetArrowStream() may return "OGC_FID" for a unnamed source FID
7901
                # column and "wkb_geometry" for a unnamed source geometry column.
7902
                # Also test GetFIDColumn() and src_geom_field_names if they are
7903
                # named.
7904
                if (
7905
                    schema.GetChild(i).GetName()
7906
                    not in ("OGC_FID", "wkb_geometry", src_lyr.GetFIDColumn())
7907
                    and schema.GetChild(i).GetName() not in src_geom_field_names
7908
                ):
7909
                    out_lyr.CreateFieldFromArrowSchema(schema.GetChild(i))
7910
7911
            write_options = []
7912
            if src_lyr.GetFIDColumn():
7913
                write_options.append("FID=" + src_lyr.GetFIDColumn())
7914
            if (
7915
                src_lyr.GetLayerDefn().GetGeomFieldCount() == 1
7916
                and src_lyr.GetGeometryColumn()
7917
            ):
7918
                write_options.append("GEOMETRY_NAME=" + src_lyr.GetGeometryColumn())
7919
7920
            while True:
7921
                array = stream.GetNextRecordBatch()
7922
                if array is None:
7923
                    break
7924
                out_lyr.WriteArrowBatch(schema, array, write_options)
7925
\endcode
7926
 *
7927
 * This method and CreateFeature() are mutually exclusive in the same session.
7928
 *
7929
 * This method is the same as the C function OGR_L_WriteArrowBatch().
7930
 *
7931
 * @param schema Schema of array
7932
 * @param array Array of type struct. It may be released (array->release==NULL)
7933
 *              after calling this method.
7934
 * @param papszOptions Options. Null terminated list, or nullptr.
7935
 * @return true in case of success
7936
 * @since 3.8
7937
 */
7938
// clang-format on
7939
7940
bool OGRLayer::WriteArrowBatch(const struct ArrowSchema *schema,
7941
                               struct ArrowArray *array,
7942
                               CSLConstList papszOptions)
7943
0
{
7944
0
    const char *format = schema->format;
7945
0
    if (!IsStructure(format))
7946
0
    {
7947
0
        CPLError(CE_Failure, CPLE_AppDefined,
7948
0
                 "WriteArrowBatch() should be called on a schema that is a "
7949
0
                 "struct of fields");
7950
0
        return false;
7951
0
    }
7952
7953
0
    if (schema->n_children != array->n_children)
7954
0
    {
7955
0
        CPLError(CE_Failure, CPLE_AppDefined,
7956
0
                 "WriteArrowBatch(): schema->n_children (%d) != "
7957
0
                 "array->n_children (%d)",
7958
0
                 int(schema->n_children), int(array->n_children));
7959
0
        return false;
7960
0
    }
7961
7962
0
    CPLStringList aosNativeTypes;
7963
0
    auto poDS = const_cast<OGRLayer *>(this)->GetDataset();
7964
0
    if (poDS)
7965
0
    {
7966
0
        auto poDriver = poDS->GetDriver();
7967
0
        if (poDriver)
7968
0
        {
7969
0
            const char *pszMetadataItem =
7970
0
                poDriver->GetMetadataItem(GDAL_DMD_CREATIONFIELDDATATYPES);
7971
0
            if (pszMetadataItem)
7972
0
                aosNativeTypes = CSLTokenizeString2(pszMetadataItem, " ", 0);
7973
0
        }
7974
0
    }
7975
7976
0
    std::vector<FieldInfo> asFieldInfo;
7977
0
    auto poLayerDefn = GetLayerDefn();
7978
0
    const char *pszFIDName =
7979
0
        CSLFetchNameValueDef(papszOptions, "FID", GetFIDColumn());
7980
0
    if (!pszFIDName || pszFIDName[0] == 0)
7981
0
        pszFIDName = DEFAULT_ARROW_FID_NAME;
7982
0
    const bool bErrorIfFIDNotPreserved =
7983
0
        EQUAL(CSLFetchNameValueDef(papszOptions, "IF_FID_NOT_PRESERVED", ""),
7984
0
              "ERROR");
7985
0
    const bool bWarningIfFIDNotPreserved =
7986
0
        EQUAL(CSLFetchNameValueDef(papszOptions, "IF_FID_NOT_PRESERVED", ""),
7987
0
              "WARNING");
7988
0
    const bool bErrorIfFieldNotPreserved =
7989
0
        EQUAL(CSLFetchNameValueDef(papszOptions, "IF_FIELD_NOT_PRESERVED", ""),
7990
0
              "ERROR");
7991
0
    const char *pszGeomFieldName = CSLFetchNameValueDef(
7992
0
        papszOptions, "GEOMETRY_NAME", GetGeometryColumn());
7993
0
    if (!pszGeomFieldName || pszGeomFieldName[0] == 0)
7994
0
        pszGeomFieldName = DEFAULT_ARROW_GEOMETRY_NAME;
7995
0
    const struct ArrowSchema *schemaFIDColumn = nullptr;
7996
0
    struct ArrowArray *arrayFIDColumn = nullptr;
7997
0
    bool bFallbackTypesUsed = false;
7998
0
    for (int64_t i = 0; i < schema->n_children; ++i)
7999
0
    {
8000
0
        if (!BuildOGRFieldInfo(schema->children[i], array->children[i],
8001
0
                               poLayerDefn, std::string(), aosNativeTypes,
8002
0
                               bFallbackTypesUsed, asFieldInfo, pszFIDName,
8003
0
                               pszGeomFieldName, this,
8004
0
                               m_poPrivate->m_oMapArrowFieldNameToOGRFieldName,
8005
0
                               schemaFIDColumn, arrayFIDColumn))
8006
0
        {
8007
0
            return false;
8008
0
        }
8009
0
    }
8010
8011
0
    std::map<int, int> oMapOGRFieldIndexToFieldInfoIndex;
8012
0
    std::vector<bool> abUseStringOptim(poLayerDefn->GetFieldCount(), false);
8013
0
    for (int i = 0; i < static_cast<int>(asFieldInfo.size()); ++i)
8014
0
    {
8015
0
        if (asFieldInfo[i].iOGRFieldIdx >= 0 && !asFieldInfo[i].bIsGeomCol)
8016
0
        {
8017
0
            CPLAssert(oMapOGRFieldIndexToFieldInfoIndex.find(
8018
0
                          asFieldInfo[i].iOGRFieldIdx) ==
8019
0
                      oMapOGRFieldIndexToFieldInfoIndex.end());
8020
0
            oMapOGRFieldIndexToFieldInfoIndex[asFieldInfo[i].iOGRFieldIdx] = i;
8021
0
            abUseStringOptim[asFieldInfo[i].iOGRFieldIdx] =
8022
0
                asFieldInfo[i].bUseStringOptim;
8023
0
        }
8024
0
    }
8025
8026
0
    OGRFeatureDefn oLayerDefnTmp(poLayerDefn->GetName());
8027
8028
0
    struct LayerDefnTmpRefReleaser
8029
0
    {
8030
0
        OGRFeatureDefn &m_oDefn;
8031
8032
0
        explicit LayerDefnTmpRefReleaser(OGRFeatureDefn &oDefn) : m_oDefn(oDefn)
8033
0
        {
8034
0
            m_oDefn.Reference();
8035
0
        }
8036
8037
0
        ~LayerDefnTmpRefReleaser()
8038
0
        {
8039
0
            m_oDefn.Dereference();
8040
0
        }
8041
0
    };
8042
8043
0
    LayerDefnTmpRefReleaser oLayerDefnTmpRefReleaser(oLayerDefnTmp);
8044
8045
0
    std::vector<int> anIdentityFieldMap;
8046
0
    if (bFallbackTypesUsed)
8047
0
    {
8048
0
        oLayerDefnTmp.SetGeomType(wkbNone);
8049
0
        for (int i = 0; i < poLayerDefn->GetFieldCount(); ++i)
8050
0
        {
8051
0
            anIdentityFieldMap.push_back(i);
8052
0
            const auto poSrcFieldDefn = poLayerDefn->GetFieldDefn(i);
8053
0
            const auto oIter = oMapOGRFieldIndexToFieldInfoIndex.find(i);
8054
0
            OGRFieldDefn oFieldDefn(
8055
0
                poSrcFieldDefn->GetNameRef(),
8056
0
                oIter == oMapOGRFieldIndexToFieldInfoIndex.end()
8057
0
                    ? poSrcFieldDefn->GetType()
8058
0
                    : asFieldInfo[oIter->second].eNominalFieldType);
8059
0
            if (oIter != oMapOGRFieldIndexToFieldInfoIndex.end())
8060
0
                asFieldInfo[oIter->second].eSetFeatureFieldType =
8061
0
                    asFieldInfo[oIter->second].eNominalFieldType;
8062
0
            oLayerDefnTmp.AddFieldDefn(&oFieldDefn);
8063
0
        }
8064
0
        for (int i = 0; i < poLayerDefn->GetGeomFieldCount(); ++i)
8065
0
        {
8066
0
            oLayerDefnTmp.AddGeomFieldDefn(poLayerDefn->GetGeomFieldDefn(i));
8067
0
        }
8068
0
    }
8069
0
    else
8070
0
    {
8071
0
        for (auto &sFieldInfo : asFieldInfo)
8072
0
            sFieldInfo.eSetFeatureFieldType = sFieldInfo.eTargetFieldType;
8073
0
    }
8074
8075
0
    struct FeatureCleaner
8076
0
    {
8077
0
        OGRFeature &m_oFeature;
8078
0
        const std::vector<bool> &m_abUseStringOptim;
8079
8080
0
        explicit FeatureCleaner(OGRFeature &oFeature,
8081
0
                                const std::vector<bool> &abUseStringOptim)
8082
0
            : m_oFeature(oFeature), m_abUseStringOptim(abUseStringOptim)
8083
0
        {
8084
0
        }
8085
8086
        // As we set a value that can't be CPLFree()'d in the .String member
8087
        // of string fields, we must take care of manually unsetting it before
8088
        // the destructor of OGRFeature gets called.
8089
0
        ~FeatureCleaner()
8090
0
        {
8091
0
            const auto poLayerDefn = m_oFeature.GetDefnRef();
8092
0
            const int nFieldCount = poLayerDefn->GetFieldCount();
8093
0
            for (int i = 0; i < nFieldCount; ++i)
8094
0
            {
8095
0
                if (m_abUseStringOptim[i])
8096
0
                {
8097
0
                    if (m_oFeature.IsFieldSetAndNotNullUnsafe(i))
8098
0
                        m_oFeature.SetFieldSameTypeUnsafe(
8099
0
                            i, static_cast<char *>(nullptr));
8100
0
                }
8101
0
            }
8102
0
        }
8103
0
    };
8104
8105
0
    OGRFeature oFeature(bFallbackTypesUsed ? &oLayerDefnTmp : poLayerDefn);
8106
0
    FeatureCleaner oCleaner(oFeature, abUseStringOptim);
8107
0
    OGRFeature oFeatureTarget(poLayerDefn);
8108
0
    OGRFeature *const poFeatureTarget =
8109
0
        bFallbackTypesUsed ? &oFeatureTarget : &oFeature;
8110
8111
    // We accumulate the content of all strings in osWorkingBuffer to avoid
8112
    // a few dynamic memory allocations
8113
0
    std::string osWorkingBuffer;
8114
8115
0
    bool bTransactionOK;
8116
0
    {
8117
0
        CPLErrorStateBackuper oBackuper(CPLQuietErrorHandler);
8118
0
        bTransactionOK = StartTransaction() == OGRERR_NONE;
8119
0
    }
8120
8121
0
    const std::string emptyString;
8122
0
    int64_t fidNullCount = 0;
8123
0
    for (size_t iFeature = 0; iFeature < static_cast<size_t>(array->length);
8124
0
         ++iFeature)
8125
0
    {
8126
0
        oFeature.SetFID(OGRNullFID);
8127
8128
0
        int iArrowIdx = 0;
8129
0
        const size_t nWorkingBufferSize = GetWorkingBufferSize(
8130
0
            schema, array, iFeature, iArrowIdx, asFieldInfo);
8131
0
        osWorkingBuffer.clear();
8132
0
        osWorkingBuffer.reserve(nWorkingBufferSize);
8133
0
#ifdef DEBUG
8134
0
        const char *pszWorkingBuffer = osWorkingBuffer.c_str();
8135
0
        CPL_IGNORE_RET_VAL(pszWorkingBuffer);
8136
0
#endif
8137
0
        iArrowIdx = 0;
8138
0
        for (int64_t i = 0; i < schema->n_children; ++i)
8139
0
        {
8140
0
            if (!FillFeature(this, schema->children[i], array->children[i],
8141
0
                             emptyString, iFeature, iArrowIdx, asFieldInfo,
8142
0
                             oFeature, osWorkingBuffer))
8143
0
            {
8144
0
                if (bTransactionOK)
8145
0
                    RollbackTransaction();
8146
0
                return false;
8147
0
            }
8148
0
        }
8149
0
#ifdef DEBUG
8150
        // Check that the buffer didn't get reallocated
8151
0
        CPLAssert(pszWorkingBuffer == osWorkingBuffer.c_str());
8152
0
        CPLAssert(osWorkingBuffer.size() == nWorkingBufferSize);
8153
0
#endif
8154
8155
0
        if (bFallbackTypesUsed)
8156
0
        {
8157
0
            oFeatureTarget.SetFrom(&oFeature, anIdentityFieldMap.data(),
8158
0
                                   /*bForgiving=*/true,
8159
0
                                   /*bUseISO8601ForDateTimeAsString=*/true);
8160
0
            oFeatureTarget.SetFID(oFeature.GetFID());
8161
8162
0
            if (bErrorIfFieldNotPreserved)
8163
0
            {
8164
0
                for (int i = 0; i < poLayerDefn->GetFieldCount(); ++i)
8165
0
                {
8166
0
                    if (!oFeature.IsFieldSetAndNotNullUnsafe(i))
8167
0
                    {
8168
0
                        continue;
8169
0
                    }
8170
0
                    bool bLossyConversion = false;
8171
0
                    const auto eSrcType =
8172
0
                        oLayerDefnTmp.GetFieldDefnUnsafe(i)->GetType();
8173
0
                    const auto eDstType =
8174
0
                        poLayerDefn->GetFieldDefnUnsafe(i)->GetType();
8175
8176
0
                    const auto IsDoubleCastToInt64EqualTInt64 =
8177
0
                        [](double dfVal, int64_t nOtherVal)
8178
0
                    {
8179
                        // Values in the range [INT64_MAX - 1023, INT64_MAX - 1]
8180
                        // get converted to a double that once cast to int64_t
8181
                        // is INT64_MAX + 1, hence the strict < comparison
8182
0
                        return dfVal >=
8183
0
                                   static_cast<double>(
8184
0
                                       std::numeric_limits<int64_t>::min()) &&
8185
0
                               dfVal <
8186
0
                                   static_cast<double>(
8187
0
                                       std::numeric_limits<int64_t>::max()) &&
8188
0
                               static_cast<int64_t>(dfVal) == nOtherVal;
8189
0
                    };
8190
8191
0
                    if (eSrcType == OFTInteger64 && eDstType == OFTInteger &&
8192
0
                        oFeatureTarget.GetFieldAsIntegerUnsafe(i) !=
8193
0
                            oFeature.GetFieldAsInteger64Unsafe(i))
8194
0
                    {
8195
0
                        bLossyConversion = true;
8196
0
                    }
8197
0
                    else if (eSrcType == OFTReal && eDstType == OFTInteger &&
8198
0
                             oFeatureTarget.GetFieldAsIntegerUnsafe(i) !=
8199
0
                                 oFeature.GetFieldAsDoubleUnsafe(i))
8200
0
                    {
8201
0
                        bLossyConversion = true;
8202
0
                    }
8203
0
                    else if (eSrcType == OFTReal && eDstType == OFTInteger64 &&
8204
0
                             static_cast<double>(
8205
0
                                 oFeatureTarget.GetFieldAsInteger64Unsafe(i)) !=
8206
0
                                 oFeature.GetFieldAsDoubleUnsafe(i))
8207
0
                    {
8208
0
                        bLossyConversion = true;
8209
0
                    }
8210
0
                    else if (eSrcType == OFTInteger64 && eDstType == OFTReal &&
8211
0
                             !IsDoubleCastToInt64EqualTInt64(
8212
0
                                 oFeatureTarget.GetFieldAsDoubleUnsafe(i),
8213
0
                                 oFeature.GetFieldAsInteger64Unsafe(i)))
8214
0
                    {
8215
0
                        bLossyConversion = true;
8216
0
                    }
8217
0
                    if (bLossyConversion)
8218
0
                    {
8219
0
                        CPLError(CE_Failure, CPLE_AppDefined,
8220
0
                                 "For feature " CPL_FRMT_GIB
8221
0
                                 ", value of field %s cannot not preserved",
8222
0
                                 oFeatureTarget.GetFID(),
8223
0
                                 oLayerDefnTmp.GetFieldDefn(i)->GetNameRef());
8224
0
                        if (bTransactionOK)
8225
0
                            RollbackTransaction();
8226
0
                        return false;
8227
0
                    }
8228
0
                }
8229
0
            }
8230
0
        }
8231
8232
0
        const auto nInputFID = poFeatureTarget->GetFID();
8233
0
        if (CreateFeature(poFeatureTarget) != OGRERR_NONE)
8234
0
        {
8235
0
            if (bTransactionOK)
8236
0
                RollbackTransaction();
8237
0
            return false;
8238
0
        }
8239
0
        if (nInputFID != OGRNullFID)
8240
0
        {
8241
0
            if (bWarningIfFIDNotPreserved &&
8242
                // cppcheck-suppress knownConditionTrueFalse
8243
0
                poFeatureTarget->GetFID() != nInputFID)
8244
0
            {
8245
0
                CPLError(CE_Warning, CPLE_AppDefined,
8246
0
                         "Feature id " CPL_FRMT_GIB " not preserved",
8247
0
                         nInputFID);
8248
0
            }
8249
0
            else if (bErrorIfFIDNotPreserved &&
8250
                     // cppcheck-suppress knownConditionTrueFalse
8251
0
                     poFeatureTarget->GetFID() != nInputFID)
8252
0
            {
8253
0
                CPLError(CE_Failure, CPLE_AppDefined,
8254
0
                         "Feature id " CPL_FRMT_GIB " not preserved",
8255
0
                         nInputFID);
8256
0
                if (bTransactionOK)
8257
0
                    RollbackTransaction();
8258
0
                return false;
8259
0
            }
8260
0
        }
8261
8262
0
        if (arrayFIDColumn)
8263
0
        {
8264
0
            uint8_t *pabyValidity = static_cast<uint8_t *>(
8265
0
                const_cast<void *>(arrayFIDColumn->buffers[0]));
8266
0
            if (IsInt32(schemaFIDColumn->format))
8267
0
            {
8268
0
                auto *panValues = static_cast<int32_t *>(
8269
0
                    const_cast<void *>(arrayFIDColumn->buffers[1]));
8270
0
                if (poFeatureTarget->GetFID() >
8271
0
                    std::numeric_limits<int32_t>::max())
8272
0
                {
8273
0
                    if (pabyValidity)
8274
0
                    {
8275
0
                        ++fidNullCount;
8276
0
                        UnsetBit(pabyValidity,
8277
0
                                 static_cast<size_t>(iFeature +
8278
0
                                                     arrayFIDColumn->offset));
8279
0
                    }
8280
0
                    CPLError(CE_Warning, CPLE_AppDefined,
8281
0
                             "FID " CPL_FRMT_GIB
8282
0
                             " cannot be stored in FID array of type int32",
8283
0
                             poFeatureTarget->GetFID());
8284
0
                }
8285
0
                else
8286
0
                {
8287
0
                    if (pabyValidity)
8288
0
                    {
8289
0
                        SetBit(pabyValidity,
8290
0
                               static_cast<size_t>(iFeature +
8291
0
                                                   arrayFIDColumn->offset));
8292
0
                    }
8293
0
                    panValues[iFeature + arrayFIDColumn->offset] =
8294
0
                        static_cast<int32_t>(poFeatureTarget->GetFID());
8295
0
                }
8296
0
            }
8297
0
            else if (IsInt64(schemaFIDColumn->format))
8298
0
            {
8299
0
                if (pabyValidity)
8300
0
                {
8301
0
                    SetBit(
8302
0
                        pabyValidity,
8303
0
                        static_cast<size_t>(iFeature + arrayFIDColumn->offset));
8304
0
                }
8305
0
                auto *panValues = static_cast<int64_t *>(
8306
0
                    const_cast<void *>(arrayFIDColumn->buffers[1]));
8307
0
                panValues[iFeature + arrayFIDColumn->offset] =
8308
0
                    poFeatureTarget->GetFID();
8309
0
            }
8310
0
            else
8311
0
            {
8312
0
                CPLAssert(false);
8313
0
            }
8314
0
        }
8315
0
    }
8316
0
    if (arrayFIDColumn && arrayFIDColumn->buffers[0])
8317
0
    {
8318
0
        arrayFIDColumn->null_count = fidNullCount;
8319
0
    }
8320
8321
0
    bool bRet = true;
8322
0
    if (bTransactionOK)
8323
0
        bRet = CommitTransaction() == OGRERR_NONE;
8324
8325
0
    return bRet;
8326
0
}
8327
8328
/************************************************************************/
8329
/*                       OGR_L_WriteArrowBatch()                        */
8330
/************************************************************************/
8331
8332
// clang-format off
8333
/** Writes a batch of rows from an ArrowArray.
8334
 *
8335
 * This is semantically close to calling CreateFeature() with multiple features
8336
 * at once.
8337
 *
8338
 * The ArrowArray must be of type struct (format=+s), and its children generally
8339
 * map to a OGR attribute or geometry field (unless they are struct themselves).
8340
 *
8341
 * Method IsArrowSchemaSupported() can be called to determine if the schema
8342
 * will be supported by WriteArrowBatch().
8343
 *
8344
 * OGR fields for the corresponding children arrays must exist and be of a
8345
 * compatible type. For attribute fields, they should generally be created with
8346
 * CreateFieldFromArrowSchema(). This is strictly required for output drivers
8347
 * Arrow or Parquet, and strongly recommended otherwise. For geometry fields,
8348
 * they should be created either implicitly at CreateLayer() type
8349
 * (if geom_type != wkbNone), or explicitly with CreateGeomField().
8350
 *
8351
 * Starting with GDAL 3.9, some tolerance has been introduced in the base
8352
 * implementation of WriteArrowBatch() for scenarios that involve appending to
8353
 * an already existing output layer when the input Arrow field type and the
8354
 * OGR layer field type are 32/64-bi integers or real number, but do not match
8355
 * exactly, which may cause lossy conversions. The IF_FIELD_NOT_PRESERVED option
8356
 * can be used to control the behavior in case of lossy conversion.
8357
 *
8358
 * Arrays for geometry columns should be of binary or large binary type and
8359
 * contain WKB geometry.
8360
 *
8361
 * Note that the passed array may be set to a released state
8362
 * (array->release==NULL) after this call (not by the base implementation,
8363
 * but in specialized ones such as Parquet or Arrow for example)
8364
 *
8365
 * Supported options of the base implementation are:
8366
 * <ul>
8367
 * <li>FID=name. Name of the FID column in the array. If not provided,
8368
 *     GetFIDColumn() is used to determine it. The special name
8369
 *     OGRLayer::DEFAULT_ARROW_FID_NAME is also recognized if neither FID nor
8370
 *     GetFIDColumn() are set.
8371
 *     The corresponding ArrowArray must be of type int32 (i) or int64 (l).
8372
 *     On input, values of the FID column are used to create the feature.
8373
 *     On output, the values of the FID column may be set with the FID of the
8374
 *     created feature (if the array is not released).
8375
 * </li>
8376
 * <li>IF_FID_NOT_PRESERVED=NOTHING/ERROR/WARNING. Action to perform when the
8377
 *     input FID is not preserved in the output layer. The default is NOTHING.
8378
 *     Setting it to ERROR will cause the function to error out. Setting it
8379
 *     to WARNING will cause the function to emit a warning but continue its
8380
 *     processing.
8381
 * </li>
8382
 * <li>IF_FIELD_NOT_PRESERVED=ERROR/WARNING. (since GDAL 3.9)
8383
 *     Action to perform when the input field value is not preserved in the
8384
 *     output layer.
8385
 *     The default is WARNING, which will cause the function to emit a warning
8386
 *     but continue its processing.
8387
 *     Setting it to ERROR will cause the function to error out if a lossy
8388
 *     conversion is detected.
8389
 * </li>
8390
 * <li>GEOMETRY_NAME=name. Name of the geometry column. If not provided,
8391
 *     GetGeometryColumn() is used. The special name
8392
 *     OGRLayer::DEFAULT_ARROW_GEOMETRY_NAME is also recognized if neither
8393
 *     GEOMETRY_NAME nor GetGeometryColumn() are set.
8394
 *     Geometry columns are also identified if they have
8395
 *     ARROW:extension:name=ogc.wkb as a field metadata.
8396
 *     The corresponding ArrowArray must be of type binary (w) or large
8397
 *     binary (W).
8398
 * </li>
8399
 * </ul>
8400
 *
8401
 * The following example demonstrates how to copy a layer from one format to
8402
 * another one (assuming it has at most a single geometry column):
8403
\code{.py}
8404
    def copy_layer(src_lyr, out_filename, out_format, lcos = {}):
8405
        stream = src_lyr.GetArrowStream()
8406
        schema = stream.GetSchema()
8407
8408
        # If the source layer has a FID column and the output driver supports
8409
        # a FID layer creation option, set it to the source FID column name.
8410
        if src_lyr.GetFIDColumn():
8411
            creationOptions = gdal.GetDriverByName(out_format).GetMetadataItem(
8412
                "DS_LAYER_CREATIONOPTIONLIST"
8413
            )
8414
            if creationOptions and '"FID"' in creationOptions:
8415
                lcos["FID"] = src_lyr.GetFIDColumn()
8416
8417
        with ogr.GetDriverByName(out_format).CreateDataSource(out_filename) as out_ds:
8418
            if src_lyr.GetLayerDefn().GetGeomFieldCount() > 1:
8419
                out_lyr = out_ds.CreateLayer(
8420
                    src_lyr.GetName(), geom_type=ogr.wkbNone, options=lcos
8421
                )
8422
                for i in range(src_lyr.GetLayerDefn().GetGeomFieldCount()):
8423
                    out_lyr.CreateGeomField(src_lyr.GetLayerDefn().GetGeomFieldDefn(i))
8424
            else:
8425
                out_lyr = out_ds.CreateLayer(
8426
                    src_lyr.GetName(),
8427
                    geom_type=src_lyr.GetGeomType(),
8428
                    srs=src_lyr.GetSpatialRef(),
8429
                    options=lcos,
8430
                )
8431
8432
            success, error_msg = out_lyr.IsArrowSchemaSupported(schema)
8433
            assert success, error_msg
8434
8435
            src_geom_field_names = [
8436
                src_lyr.GetLayerDefn().GetGeomFieldDefn(i).GetName()
8437
                for i in range(src_lyr.GetLayerDefn().GetGeomFieldCount())
8438
            ]
8439
            for i in range(schema.GetChildrenCount()):
8440
                # GetArrowStream() may return "OGC_FID" for a unnamed source FID
8441
                # column and "wkb_geometry" for a unnamed source geometry column.
8442
                # Also test GetFIDColumn() and src_geom_field_names if they are
8443
                # named.
8444
                if (
8445
                    schema.GetChild(i).GetName()
8446
                    not in ("OGC_FID", "wkb_geometry", src_lyr.GetFIDColumn())
8447
                    and schema.GetChild(i).GetName() not in src_geom_field_names
8448
                ):
8449
                    out_lyr.CreateFieldFromArrowSchema(schema.GetChild(i))
8450
8451
            write_options = []
8452
            if src_lyr.GetFIDColumn():
8453
                write_options.append("FID=" + src_lyr.GetFIDColumn())
8454
            if (
8455
                src_lyr.GetLayerDefn().GetGeomFieldCount() == 1
8456
                and src_lyr.GetGeometryColumn()
8457
            ):
8458
                write_options.append("GEOMETRY_NAME=" + src_lyr.GetGeometryColumn())
8459
8460
            while True:
8461
                array = stream.GetNextRecordBatch()
8462
                if array is None:
8463
                    break
8464
                out_lyr.WriteArrowBatch(schema, array, write_options)
8465
\endcode
8466
 *
8467
 * This method and CreateFeature() are mutually exclusive in the same session.
8468
 *
8469
 * This method is the same as the C++ method OGRLayer::WriteArrowBatch().
8470
 *
8471
 * @param hLayer Layer.
8472
 * @param schema Schema of array.
8473
 * @param array Array of type struct. It may be released (array->release==NULL)
8474
 *              after calling this method.
8475
 * @param papszOptions Options. Null terminated list, or nullptr.
8476
 * @return true in case of success
8477
 * @since 3.8
8478
 */
8479
// clang-format on
8480
8481
bool OGR_L_WriteArrowBatch(OGRLayerH hLayer, const struct ArrowSchema *schema,
8482
                           struct ArrowArray *array, CSLConstList papszOptions)
8483
0
{
8484
0
    VALIDATE_POINTER1(hLayer, __func__, false);
8485
0
    VALIDATE_POINTER1(schema, __func__, false);
8486
0
    VALIDATE_POINTER1(array, __func__, false);
8487
8488
0
    return OGRLayer::FromHandle(hLayer)->WriteArrowBatch(schema, array,
8489
0
                                                         papszOptions);
8490
0
}