/src/gdal/ogr/ogrsf_frmts/generic/ograrrowarrayhelper.h
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Project: OpenGIS Simple Features Reference Implementation |
4 | | * Purpose: Helper to fill ArrowArray |
5 | | * Author: Even Rouault <even dot rouault at spatialys.com> |
6 | | * |
7 | | ****************************************************************************** |
8 | | * Copyright (c) 2022, Even Rouault <even dot rouault at spatialys.com> |
9 | | * |
10 | | * SPDX-License-Identifier: MIT |
11 | | ****************************************************************************/ |
12 | | |
13 | | #pragma once |
14 | | |
15 | | //! @cond Doxygen_Suppress |
16 | | |
17 | | #include <algorithm> |
18 | | #include <limits> |
19 | | |
20 | | #include "cpl_time.h" |
21 | | |
22 | | #include "ogrsf_frmts.h" |
23 | | #include "ogr_recordbatch.h" |
24 | | |
25 | | class CPL_DLL OGRArrowArrayHelper |
26 | | { |
27 | | OGRArrowArrayHelper(const OGRArrowArrayHelper &) = delete; |
28 | | OGRArrowArrayHelper &operator=(const OGRArrowArrayHelper &) = delete; |
29 | | |
30 | | public: |
31 | | bool m_bIncludeFID = false; |
32 | | int m_nMaxBatchSize = 0; |
33 | | int m_nChildren = 0; |
34 | | const int m_nFieldCount = 0; |
35 | | const int m_nGeomFieldCount = 0; |
36 | | std::vector<int> m_mapOGRFieldToArrowField{}; |
37 | | std::vector<int> m_mapOGRGeomFieldToArrowField{}; |
38 | | std::vector<bool> m_abNullableFields{}; |
39 | | std::vector<uint32_t> m_anArrowFieldMaxAlloc{}; |
40 | | std::vector<int> m_anTZFlags{}; |
41 | | int64_t *m_panFIDValues = nullptr; |
42 | | struct ArrowArray *m_out_array = nullptr; |
43 | | |
44 | | static uint32_t GetMemLimit(); |
45 | | |
46 | | static int |
47 | | GetMaxFeaturesInBatch(const CPLStringList &aosArrowArrayStreamOptions); |
48 | | |
49 | | OGRArrowArrayHelper(GDALDataset *poDS, OGRFeatureDefn *poFeatureDefn, |
50 | | const CPLStringList &aosArrowArrayStreamOptions, |
51 | | struct ArrowArray *out_array); |
52 | | |
53 | | //! Construct an helper from an already initialized array |
54 | | OGRArrowArrayHelper(struct ArrowArray *out_array, int nMaxBatchSize); |
55 | | |
56 | | static bool SetNull(struct ArrowArray *psArray, int iFeat, |
57 | | int nMaxBatchSize, bool bAlignedMalloc) |
58 | 0 | { |
59 | 0 | ++psArray->null_count; |
60 | 0 | uint8_t *pabyNull = |
61 | 0 | static_cast<uint8_t *>(const_cast<void *>(psArray->buffers[0])); |
62 | 0 | if (psArray->buffers[0] == nullptr) |
63 | 0 | { |
64 | 0 | pabyNull = static_cast<uint8_t *>( |
65 | 0 | bAlignedMalloc |
66 | 0 | ? VSI_MALLOC_ALIGNED_AUTO_VERBOSE((nMaxBatchSize + 7) / 8) |
67 | 0 | : VSI_MALLOC_VERBOSE((nMaxBatchSize + 7) / 8)); |
68 | 0 | if (pabyNull == nullptr) |
69 | 0 | { |
70 | 0 | return false; |
71 | 0 | } |
72 | 0 | memset(pabyNull, 0xFF, (nMaxBatchSize + 7) / 8); |
73 | 0 | psArray->buffers[0] = pabyNull; |
74 | 0 | } |
75 | 0 | pabyNull[iFeat / 8] &= static_cast<uint8_t>(~(1 << (iFeat % 8))); |
76 | 0 |
|
77 | 0 | if (psArray->n_buffers == 3) |
78 | 0 | { |
79 | 0 | auto panOffsets = |
80 | 0 | static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1])); |
81 | 0 | panOffsets[iFeat + 1] = panOffsets[iFeat]; |
82 | 0 | } |
83 | 0 | return true; |
84 | 0 | } |
85 | | |
86 | | bool SetNull(int iArrowField, int iFeat) |
87 | 0 | { |
88 | 0 | return SetNull(m_out_array->children[iArrowField], iFeat, |
89 | 0 | m_nMaxBatchSize, true); |
90 | 0 | } |
91 | | |
92 | | inline static void SetBoolOn(struct ArrowArray *psArray, int iFeat) |
93 | 0 | { |
94 | 0 | static_cast<uint8_t *>( |
95 | 0 | const_cast<void *>(psArray->buffers[1]))[iFeat / 8] |= |
96 | 0 | static_cast<uint8_t>(1 << (iFeat % 8)); |
97 | 0 | } |
98 | | |
99 | | inline static void SetInt8(struct ArrowArray *psArray, int iFeat, |
100 | | int8_t nVal) |
101 | 0 | { |
102 | 0 | static_cast<int8_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] = |
103 | 0 | nVal; |
104 | 0 | } |
105 | | |
106 | | inline static void SetUInt8(struct ArrowArray *psArray, int iFeat, |
107 | | uint8_t nVal) |
108 | 0 | { |
109 | 0 | static_cast<uint8_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] = |
110 | 0 | nVal; |
111 | 0 | } |
112 | | |
113 | | inline static void SetInt16(struct ArrowArray *psArray, int iFeat, |
114 | | int16_t nVal) |
115 | 0 | { |
116 | 0 | static_cast<int16_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] = |
117 | 0 | nVal; |
118 | 0 | } |
119 | | |
120 | | inline static void SetUInt16(struct ArrowArray *psArray, int iFeat, |
121 | | uint16_t nVal) |
122 | 0 | { |
123 | 0 | static_cast<uint16_t *>( |
124 | 0 | const_cast<void *>(psArray->buffers[1]))[iFeat] = nVal; |
125 | 0 | } |
126 | | |
127 | | inline static void SetInt32(struct ArrowArray *psArray, int iFeat, |
128 | | int32_t nVal) |
129 | 0 | { |
130 | 0 | static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] = |
131 | 0 | nVal; |
132 | 0 | } |
133 | | |
134 | | inline static void SetUInt32(struct ArrowArray *psArray, int iFeat, |
135 | | uint32_t nVal) |
136 | 0 | { |
137 | 0 | static_cast<uint32_t *>( |
138 | 0 | const_cast<void *>(psArray->buffers[1]))[iFeat] = nVal; |
139 | 0 | } |
140 | | |
141 | | inline static void SetInt64(struct ArrowArray *psArray, int iFeat, |
142 | | int64_t nVal) |
143 | 0 | { |
144 | 0 | static_cast<int64_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] = |
145 | 0 | nVal; |
146 | 0 | } |
147 | | |
148 | | inline static void SetUInt64(struct ArrowArray *psArray, int iFeat, |
149 | | uint64_t nVal) |
150 | 0 | { |
151 | 0 | static_cast<uint64_t *>( |
152 | 0 | const_cast<void *>(psArray->buffers[1]))[iFeat] = nVal; |
153 | 0 | } |
154 | | |
155 | | inline static void SetFloat(struct ArrowArray *psArray, int iFeat, |
156 | | float fVal) |
157 | 0 | { |
158 | 0 | static_cast<float *>(const_cast<void *>(psArray->buffers[1]))[iFeat] = |
159 | 0 | fVal; |
160 | 0 | } |
161 | | |
162 | | inline static void SetDouble(struct ArrowArray *psArray, int iFeat, |
163 | | double dfVal) |
164 | 0 | { |
165 | 0 | static_cast<double *>(const_cast<void *>(psArray->buffers[1]))[iFeat] = |
166 | 0 | dfVal; |
167 | 0 | } |
168 | | |
169 | | static void SetDate(struct ArrowArray *psArray, int iFeat, |
170 | | struct tm &brokenDown, const OGRField &ogrField) |
171 | 0 | { |
172 | 0 | brokenDown.tm_year = ogrField.Date.Year - 1900; |
173 | 0 | brokenDown.tm_mon = ogrField.Date.Month - 1; |
174 | 0 | brokenDown.tm_mday = ogrField.Date.Day; |
175 | 0 | brokenDown.tm_hour = 0; |
176 | 0 | brokenDown.tm_min = 0; |
177 | 0 | brokenDown.tm_sec = 0; |
178 | 0 | static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] = |
179 | 0 | static_cast<int>(CPLYMDHMSToUnixTime(&brokenDown) / 86400); |
180 | 0 | } |
181 | | |
182 | | static void SetDateTime(struct ArrowArray *psArray, int iFeat, |
183 | | struct tm &brokenDown, int nFieldTZFlag, |
184 | | const OGRField &ogrField) |
185 | 0 | { |
186 | 0 | brokenDown.tm_year = ogrField.Date.Year - 1900; |
187 | 0 | brokenDown.tm_mon = ogrField.Date.Month - 1; |
188 | 0 | brokenDown.tm_mday = ogrField.Date.Day; |
189 | 0 | brokenDown.tm_hour = ogrField.Date.Hour; |
190 | 0 | brokenDown.tm_min = ogrField.Date.Minute; |
191 | 0 | brokenDown.tm_sec = static_cast<int>(ogrField.Date.Second); |
192 | 0 | auto nVal = |
193 | 0 | CPLYMDHMSToUnixTime(&brokenDown) * 1000 + |
194 | 0 | (static_cast<int>(ogrField.Date.Second * 1000 + 0.5f) % 1000); |
195 | 0 | if (nFieldTZFlag >= OGR_TZFLAG_MIXED_TZ && |
196 | 0 | ogrField.Date.TZFlag > OGR_TZFLAG_MIXED_TZ) |
197 | 0 | { |
198 | 0 | // Convert for ogrField.Date.TZFlag to UTC |
199 | 0 | const int TZOffset = (ogrField.Date.TZFlag - OGR_TZFLAG_UTC) * 15; |
200 | 0 | const int TZOffsetMS = TZOffset * 60 * 1000; |
201 | 0 | nVal -= TZOffsetMS; |
202 | 0 | } |
203 | 0 | static_cast<int64_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] = |
204 | 0 | nVal; |
205 | 0 | } |
206 | | |
207 | | static GByte *GetPtrForStringOrBinary(struct ArrowArray *psArray, int iFeat, |
208 | | size_t nLen, uint32_t &nMaxAlloc, |
209 | | bool bAlignedMalloc) |
210 | 0 | { |
211 | 0 | auto panOffsets = |
212 | 0 | static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1])); |
213 | 0 | const uint32_t nCurLength = static_cast<uint32_t>(panOffsets[iFeat]); |
214 | 0 | if (nLen > nMaxAlloc - nCurLength) |
215 | 0 | { |
216 | 0 | constexpr uint32_t INT32_MAX_AS_UINT32 = |
217 | 0 | static_cast<uint32_t>(std::numeric_limits<int32_t>::max()); |
218 | 0 | if (!(nCurLength <= INT32_MAX_AS_UINT32 && |
219 | 0 | nLen <= INT32_MAX_AS_UINT32 - nCurLength)) |
220 | 0 | { |
221 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
222 | 0 | "Too large string or binary content"); |
223 | 0 | return nullptr; |
224 | 0 | } |
225 | 0 | uint32_t nNewSize = nCurLength + static_cast<uint32_t>(nLen); |
226 | 0 | if (nMaxAlloc <= INT32_MAX_AS_UINT32) |
227 | 0 | { |
228 | 0 | const uint32_t nDoubleSize = 2U * nMaxAlloc; |
229 | 0 | if (nNewSize < nDoubleSize) |
230 | 0 | nNewSize = nDoubleSize; |
231 | 0 | } |
232 | 0 | void *newBuffer; |
233 | 0 | if (bAlignedMalloc) |
234 | 0 | { |
235 | 0 | newBuffer = VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nNewSize); |
236 | 0 | if (newBuffer == nullptr) |
237 | 0 | return nullptr; |
238 | 0 | nMaxAlloc = nNewSize; |
239 | 0 | memcpy(newBuffer, psArray->buffers[2], nCurLength); |
240 | 0 | VSIFreeAligned(const_cast<void *>(psArray->buffers[2])); |
241 | 0 | } |
242 | 0 | else |
243 | 0 | { |
244 | 0 | // coverity[overflow_sink] |
245 | 0 | newBuffer = VSI_REALLOC_VERBOSE( |
246 | 0 | const_cast<void *>(psArray->buffers[2]), nNewSize); |
247 | 0 | if (newBuffer == nullptr) |
248 | 0 | return nullptr; |
249 | 0 | nMaxAlloc = nNewSize; |
250 | 0 | } |
251 | 0 | psArray->buffers[2] = newBuffer; |
252 | 0 | } |
253 | 0 | GByte *paby = |
254 | 0 | static_cast<GByte *>(const_cast<void *>(psArray->buffers[2])) + |
255 | 0 | nCurLength; |
256 | 0 | panOffsets[iFeat + 1] = panOffsets[iFeat] + static_cast<int32_t>(nLen); |
257 | 0 | return paby; |
258 | 0 | } |
259 | | |
260 | | GByte *GetPtrForStringOrBinary(int iArrowField, int iFeat, size_t nLen, |
261 | | bool bAlignedMalloc = true) |
262 | 0 | { |
263 | 0 | auto psArray = m_out_array->children[iArrowField]; |
264 | 0 | return GetPtrForStringOrBinary(psArray, iFeat, nLen, |
265 | 0 | m_anArrowFieldMaxAlloc[iArrowField], |
266 | 0 | bAlignedMalloc); |
267 | 0 | } |
268 | | |
269 | | static void SetEmptyStringOrBinary(struct ArrowArray *psArray, int iFeat) |
270 | 0 | { |
271 | 0 | auto panOffsets = |
272 | 0 | static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1])); |
273 | 0 | panOffsets[iFeat + 1] = panOffsets[iFeat]; |
274 | 0 | } |
275 | | |
276 | | void Shrink(int nFeatures) |
277 | 0 | { |
278 | 0 | if (nFeatures < m_nMaxBatchSize) |
279 | 0 | { |
280 | 0 | m_out_array->length = nFeatures; |
281 | 0 | for (int i = 0; i < m_nChildren; i++) |
282 | 0 | { |
283 | 0 | m_out_array->children[i]->length = nFeatures; |
284 | 0 | } |
285 | 0 | } |
286 | 0 | } |
287 | | |
288 | | void ClearArray() |
289 | 0 | { |
290 | 0 | if (m_out_array->release) |
291 | 0 | m_out_array->release(m_out_array); |
292 | 0 | memset(m_out_array, 0, sizeof(*m_out_array)); |
293 | 0 | } |
294 | | |
295 | | static bool FillDict(struct ArrowArray *psChild, |
296 | | const OGRCodedFieldDomain *poCodedDomain); |
297 | | }; |
298 | | |
299 | | //! @endcond |