Coverage Report

Created: 2025-12-31 06:48

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/gdal/ogr/ogrsf_frmts/generic/ograrrowarrayhelper.h
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Project:  OpenGIS Simple Features Reference Implementation
4
 * Purpose:  Helper to fill ArrowArray
5
 * Author:   Even Rouault <even dot rouault at spatialys.com>
6
 *
7
 ******************************************************************************
8
 * Copyright (c) 2022, Even Rouault <even dot rouault at spatialys.com>
9
 *
10
 * SPDX-License-Identifier: MIT
11
 ****************************************************************************/
12
13
#pragma once
14
15
//! @cond Doxygen_Suppress
16
17
#include <algorithm>
18
#include <limits>
19
20
#include "cpl_time.h"
21
22
#include "ogrsf_frmts.h"
23
#include "ogr_recordbatch.h"
24
25
class CPL_DLL OGRArrowArrayHelper
26
{
27
    OGRArrowArrayHelper(const OGRArrowArrayHelper &) = delete;
28
    OGRArrowArrayHelper &operator=(const OGRArrowArrayHelper &) = delete;
29
30
  public:
31
    bool m_bIncludeFID = false;
32
    int m_nMaxBatchSize = 0;
33
    int m_nChildren = 0;
34
    const int m_nFieldCount = 0;
35
    const int m_nGeomFieldCount = 0;
36
    std::vector<int> m_mapOGRFieldToArrowField{};
37
    std::vector<int> m_mapOGRGeomFieldToArrowField{};
38
    std::vector<bool> m_abNullableFields{};
39
    std::vector<uint32_t> m_anArrowFieldMaxAlloc{};
40
    std::vector<int> m_anTZFlags{};
41
    int64_t *m_panFIDValues = nullptr;
42
    struct ArrowArray *m_out_array = nullptr;
43
44
    static uint32_t GetMemLimit();
45
46
    static int
47
    GetMaxFeaturesInBatch(const CPLStringList &aosArrowArrayStreamOptions);
48
49
    OGRArrowArrayHelper(GDALDataset *poDS, OGRFeatureDefn *poFeatureDefn,
50
                        const CPLStringList &aosArrowArrayStreamOptions,
51
                        struct ArrowArray *out_array);
52
53
    //! Construct an helper from an already initialized array
54
    OGRArrowArrayHelper(struct ArrowArray *out_array, int nMaxBatchSize);
55
56
    static bool SetNull(struct ArrowArray *psArray, int iFeat,
57
                        int nMaxBatchSize, bool bAlignedMalloc)
58
0
    {
59
0
        ++psArray->null_count;
60
0
        uint8_t *pabyNull =
61
0
            static_cast<uint8_t *>(const_cast<void *>(psArray->buffers[0]));
62
0
        if (psArray->buffers[0] == nullptr)
63
0
        {
64
0
            pabyNull = static_cast<uint8_t *>(
65
0
                bAlignedMalloc
66
0
                    ? VSI_MALLOC_ALIGNED_AUTO_VERBOSE((nMaxBatchSize + 7) / 8)
67
0
                    : VSI_MALLOC_VERBOSE((nMaxBatchSize + 7) / 8));
68
0
            if (pabyNull == nullptr)
69
0
            {
70
0
                return false;
71
0
            }
72
0
            memset(pabyNull, 0xFF, (nMaxBatchSize + 7) / 8);
73
0
            psArray->buffers[0] = pabyNull;
74
0
        }
75
0
        pabyNull[iFeat / 8] &= static_cast<uint8_t>(~(1 << (iFeat % 8)));
76
0
77
0
        if (psArray->n_buffers == 3)
78
0
        {
79
0
            auto panOffsets =
80
0
                static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]));
81
0
            panOffsets[iFeat + 1] = panOffsets[iFeat];
82
0
        }
83
0
        return true;
84
0
    }
85
86
    bool SetNull(int iArrowField, int iFeat)
87
0
    {
88
0
        return SetNull(m_out_array->children[iArrowField], iFeat,
89
0
                       m_nMaxBatchSize, true);
90
0
    }
91
92
    inline static void SetBoolOn(struct ArrowArray *psArray, int iFeat)
93
0
    {
94
0
        static_cast<uint8_t *>(
95
0
            const_cast<void *>(psArray->buffers[1]))[iFeat / 8] |=
96
0
            static_cast<uint8_t>(1 << (iFeat % 8));
97
0
    }
98
99
    inline static void SetInt8(struct ArrowArray *psArray, int iFeat,
100
                               int8_t nVal)
101
0
    {
102
0
        static_cast<int8_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
103
0
            nVal;
104
0
    }
105
106
    inline static void SetUInt8(struct ArrowArray *psArray, int iFeat,
107
                                uint8_t nVal)
108
0
    {
109
0
        static_cast<uint8_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
110
0
            nVal;
111
0
    }
112
113
    inline static void SetInt16(struct ArrowArray *psArray, int iFeat,
114
                                int16_t nVal)
115
0
    {
116
0
        static_cast<int16_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
117
0
            nVal;
118
0
    }
119
120
    inline static void SetUInt16(struct ArrowArray *psArray, int iFeat,
121
                                 uint16_t nVal)
122
0
    {
123
0
        static_cast<uint16_t *>(
124
0
            const_cast<void *>(psArray->buffers[1]))[iFeat] = nVal;
125
0
    }
126
127
    inline static void SetInt32(struct ArrowArray *psArray, int iFeat,
128
                                int32_t nVal)
129
0
    {
130
0
        static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
131
0
            nVal;
132
0
    }
133
134
    inline static void SetUInt32(struct ArrowArray *psArray, int iFeat,
135
                                 uint32_t nVal)
136
0
    {
137
0
        static_cast<uint32_t *>(
138
0
            const_cast<void *>(psArray->buffers[1]))[iFeat] = nVal;
139
0
    }
140
141
    inline static void SetInt64(struct ArrowArray *psArray, int iFeat,
142
                                int64_t nVal)
143
0
    {
144
0
        static_cast<int64_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
145
0
            nVal;
146
0
    }
147
148
    inline static void SetUInt64(struct ArrowArray *psArray, int iFeat,
149
                                 uint64_t nVal)
150
0
    {
151
0
        static_cast<uint64_t *>(
152
0
            const_cast<void *>(psArray->buffers[1]))[iFeat] = nVal;
153
0
    }
154
155
    inline static void SetFloat(struct ArrowArray *psArray, int iFeat,
156
                                float fVal)
157
0
    {
158
0
        static_cast<float *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
159
0
            fVal;
160
0
    }
161
162
    inline static void SetDouble(struct ArrowArray *psArray, int iFeat,
163
                                 double dfVal)
164
0
    {
165
0
        static_cast<double *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
166
0
            dfVal;
167
0
    }
168
169
    static void SetDate(struct ArrowArray *psArray, int iFeat,
170
                        struct tm &brokenDown, const OGRField &ogrField)
171
0
    {
172
0
        brokenDown.tm_year = ogrField.Date.Year - 1900;
173
0
        brokenDown.tm_mon = ogrField.Date.Month - 1;
174
0
        brokenDown.tm_mday = ogrField.Date.Day;
175
0
        brokenDown.tm_hour = 0;
176
0
        brokenDown.tm_min = 0;
177
0
        brokenDown.tm_sec = 0;
178
0
        static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
179
0
            static_cast<int>(CPLYMDHMSToUnixTime(&brokenDown) / 86400);
180
0
    }
181
182
    static void SetDateTime(struct ArrowArray *psArray, int iFeat,
183
                            struct tm &brokenDown, int nFieldTZFlag,
184
                            const OGRField &ogrField)
185
0
    {
186
0
        brokenDown.tm_year = ogrField.Date.Year - 1900;
187
0
        brokenDown.tm_mon = ogrField.Date.Month - 1;
188
0
        brokenDown.tm_mday = ogrField.Date.Day;
189
0
        brokenDown.tm_hour = ogrField.Date.Hour;
190
0
        brokenDown.tm_min = ogrField.Date.Minute;
191
0
        brokenDown.tm_sec = static_cast<int>(ogrField.Date.Second);
192
0
        auto nVal =
193
0
            CPLYMDHMSToUnixTime(&brokenDown) * 1000 +
194
0
            (static_cast<int>(ogrField.Date.Second * 1000 + 0.5f) % 1000);
195
0
        if (nFieldTZFlag >= OGR_TZFLAG_MIXED_TZ &&
196
0
            ogrField.Date.TZFlag > OGR_TZFLAG_MIXED_TZ)
197
0
        {
198
0
            // Convert for ogrField.Date.TZFlag to UTC
199
0
            const int TZOffset = (ogrField.Date.TZFlag - OGR_TZFLAG_UTC) * 15;
200
0
            const int TZOffsetMS = TZOffset * 60 * 1000;
201
0
            nVal -= TZOffsetMS;
202
0
        }
203
0
        static_cast<int64_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
204
0
            nVal;
205
0
    }
206
207
    static GByte *GetPtrForStringOrBinary(struct ArrowArray *psArray, int iFeat,
208
                                          size_t nLen, uint32_t &nMaxAlloc,
209
                                          bool bAlignedMalloc)
210
0
    {
211
0
        auto panOffsets =
212
0
            static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]));
213
0
        const uint32_t nCurLength = static_cast<uint32_t>(panOffsets[iFeat]);
214
0
        if (nLen > nMaxAlloc - nCurLength)
215
0
        {
216
0
            constexpr uint32_t INT32_MAX_AS_UINT32 =
217
0
                static_cast<uint32_t>(std::numeric_limits<int32_t>::max());
218
0
            if (!(nCurLength <= INT32_MAX_AS_UINT32 &&
219
0
                  nLen <= INT32_MAX_AS_UINT32 - nCurLength))
220
0
            {
221
0
                CPLError(CE_Failure, CPLE_AppDefined,
222
0
                         "Too large string or binary content");
223
0
                return nullptr;
224
0
            }
225
0
            uint32_t nNewSize = nCurLength + static_cast<uint32_t>(nLen);
226
0
            if (nMaxAlloc <= INT32_MAX_AS_UINT32)
227
0
            {
228
0
                const uint32_t nDoubleSize = 2U * nMaxAlloc;
229
0
                if (nNewSize < nDoubleSize)
230
0
                    nNewSize = nDoubleSize;
231
0
            }
232
0
            void *newBuffer;
233
0
            if (bAlignedMalloc)
234
0
            {
235
0
                newBuffer = VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nNewSize);
236
0
                if (newBuffer == nullptr)
237
0
                    return nullptr;
238
0
                nMaxAlloc = nNewSize;
239
0
                memcpy(newBuffer, psArray->buffers[2], nCurLength);
240
0
                VSIFreeAligned(const_cast<void *>(psArray->buffers[2]));
241
0
            }
242
0
            else
243
0
            {
244
0
                // coverity[overflow_sink]
245
0
                newBuffer = VSI_REALLOC_VERBOSE(
246
0
                    const_cast<void *>(psArray->buffers[2]), nNewSize);
247
0
                if (newBuffer == nullptr)
248
0
                    return nullptr;
249
0
                nMaxAlloc = nNewSize;
250
0
            }
251
0
            psArray->buffers[2] = newBuffer;
252
0
        }
253
0
        GByte *paby =
254
0
            static_cast<GByte *>(const_cast<void *>(psArray->buffers[2])) +
255
0
            nCurLength;
256
0
        panOffsets[iFeat + 1] = panOffsets[iFeat] + static_cast<int32_t>(nLen);
257
0
        return paby;
258
0
    }
259
260
    GByte *GetPtrForStringOrBinary(int iArrowField, int iFeat, size_t nLen,
261
                                   bool bAlignedMalloc = true)
262
0
    {
263
0
        auto psArray = m_out_array->children[iArrowField];
264
0
        return GetPtrForStringOrBinary(psArray, iFeat, nLen,
265
0
                                       m_anArrowFieldMaxAlloc[iArrowField],
266
0
                                       bAlignedMalloc);
267
0
    }
268
269
    static void SetEmptyStringOrBinary(struct ArrowArray *psArray, int iFeat)
270
0
    {
271
0
        auto panOffsets =
272
0
            static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]));
273
0
        panOffsets[iFeat + 1] = panOffsets[iFeat];
274
0
    }
275
276
    void Shrink(int nFeatures)
277
0
    {
278
0
        if (nFeatures < m_nMaxBatchSize)
279
0
        {
280
0
            m_out_array->length = nFeatures;
281
0
            for (int i = 0; i < m_nChildren; i++)
282
0
            {
283
0
                m_out_array->children[i]->length = nFeatures;
284
0
            }
285
0
        }
286
0
    }
287
288
    void ClearArray()
289
0
    {
290
0
        if (m_out_array->release)
291
0
            m_out_array->release(m_out_array);
292
0
        memset(m_out_array, 0, sizeof(*m_out_array));
293
0
    }
294
295
    static bool FillDict(struct ArrowArray *psChild,
296
                         const OGRCodedFieldDomain *poCodedDomain);
297
};
298
299
//! @endcond