/src/gdal/ogr/ogrsf_frmts/generic/ograrrowarrayhelper.h
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Project: OpenGIS Simple Features Reference Implementation |
4 | | * Purpose: Helper to fill ArrowArray |
5 | | * Author: Even Rouault <even dot rouault at spatialys.com> |
6 | | * |
7 | | ****************************************************************************** |
8 | | * Copyright (c) 2022, Even Rouault <even dot rouault at spatialys.com> |
9 | | * |
10 | | * SPDX-License-Identifier: MIT |
11 | | ****************************************************************************/ |
12 | | |
13 | | #pragma once |
14 | | |
15 | | //! @cond Doxygen_Suppress |
16 | | |
17 | | #include <algorithm> |
18 | | #include <limits> |
19 | | |
20 | | #include "cpl_time.h" |
21 | | |
22 | | #include "ogrsf_frmts.h" |
23 | | #include "ogr_recordbatch.h" |
24 | | |
25 | | class CPL_DLL OGRArrowArrayHelper |
26 | | { |
27 | | OGRArrowArrayHelper(const OGRArrowArrayHelper &) = delete; |
28 | | OGRArrowArrayHelper &operator=(const OGRArrowArrayHelper &) = delete; |
29 | | |
30 | | public: |
31 | | bool m_bIncludeFID = false; |
32 | | int m_nMaxBatchSize = 0; |
33 | | int m_nChildren = 0; |
34 | | const int m_nFieldCount = 0; |
35 | | const int m_nGeomFieldCount = 0; |
36 | | std::vector<int> m_mapOGRFieldToArrowField{}; |
37 | | std::vector<int> m_mapOGRGeomFieldToArrowField{}; |
38 | | std::vector<bool> m_abNullableFields{}; |
39 | | std::vector<uint32_t> m_anArrowFieldMaxAlloc{}; |
40 | | std::vector<int> m_anTZFlags{}; |
41 | | int64_t *m_panFIDValues = nullptr; |
42 | | struct ArrowArray *m_out_array = nullptr; |
43 | | |
44 | | static uint32_t GetMemLimit(); |
45 | | |
46 | | static int |
47 | | GetMaxFeaturesInBatch(const CPLStringList &aosArrowArrayStreamOptions); |
48 | | |
49 | | OGRArrowArrayHelper(GDALDataset *poDS, OGRFeatureDefn *poFeatureDefn, |
50 | | const CPLStringList &aosArrowArrayStreamOptions, |
51 | | struct ArrowArray *out_array); |
52 | | |
53 | | bool SetNull(int iArrowField, int iFeat) |
54 | 0 | { |
55 | 0 | auto psArray = m_out_array->children[iArrowField]; |
56 | 0 | ++psArray->null_count; |
57 | 0 | uint8_t *pabyNull = |
58 | 0 | static_cast<uint8_t *>(const_cast<void *>(psArray->buffers[0])); |
59 | 0 | if (psArray->buffers[0] == nullptr) |
60 | 0 | { |
61 | 0 | pabyNull = static_cast<uint8_t *>( |
62 | 0 | VSI_MALLOC_ALIGNED_AUTO_VERBOSE((m_nMaxBatchSize + 7) / 8)); |
63 | 0 | if (pabyNull == nullptr) |
64 | 0 | { |
65 | 0 | return false; |
66 | 0 | } |
67 | 0 | memset(pabyNull, 0xFF, (m_nMaxBatchSize + 7) / 8); |
68 | 0 | psArray->buffers[0] = pabyNull; |
69 | 0 | } |
70 | 0 | pabyNull[iFeat / 8] &= static_cast<uint8_t>(~(1 << (iFeat % 8))); |
71 | |
|
72 | 0 | if (psArray->n_buffers == 3) |
73 | 0 | { |
74 | 0 | auto panOffsets = |
75 | 0 | static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1])); |
76 | 0 | panOffsets[iFeat + 1] = panOffsets[iFeat]; |
77 | 0 | } |
78 | 0 | return true; |
79 | 0 | } |
80 | | |
81 | | inline static void SetBoolOn(struct ArrowArray *psArray, int iFeat) |
82 | 0 | { |
83 | 0 | static_cast<uint8_t *>( |
84 | 0 | const_cast<void *>(psArray->buffers[1]))[iFeat / 8] |= |
85 | 0 | static_cast<uint8_t>(1 << (iFeat % 8)); |
86 | 0 | } |
87 | | |
88 | | inline static void SetInt8(struct ArrowArray *psArray, int iFeat, |
89 | | int8_t nVal) |
90 | 0 | { |
91 | 0 | static_cast<int8_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] = |
92 | 0 | nVal; |
93 | 0 | } |
94 | | |
95 | | inline static void SetUInt8(struct ArrowArray *psArray, int iFeat, |
96 | | uint8_t nVal) |
97 | 0 | { |
98 | 0 | static_cast<uint8_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] = |
99 | 0 | nVal; |
100 | 0 | } |
101 | | |
102 | | inline static void SetInt16(struct ArrowArray *psArray, int iFeat, |
103 | | int16_t nVal) |
104 | 0 | { |
105 | 0 | static_cast<int16_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] = |
106 | 0 | nVal; |
107 | 0 | } |
108 | | |
109 | | inline static void SetUInt16(struct ArrowArray *psArray, int iFeat, |
110 | | uint16_t nVal) |
111 | 0 | { |
112 | 0 | static_cast<uint16_t *>( |
113 | 0 | const_cast<void *>(psArray->buffers[1]))[iFeat] = nVal; |
114 | 0 | } |
115 | | |
116 | | inline static void SetInt32(struct ArrowArray *psArray, int iFeat, |
117 | | int32_t nVal) |
118 | 0 | { |
119 | 0 | static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] = |
120 | 0 | nVal; |
121 | 0 | } |
122 | | |
123 | | inline static void SetUInt32(struct ArrowArray *psArray, int iFeat, |
124 | | uint32_t nVal) |
125 | 0 | { |
126 | 0 | static_cast<uint32_t *>( |
127 | 0 | const_cast<void *>(psArray->buffers[1]))[iFeat] = nVal; |
128 | 0 | } |
129 | | |
130 | | inline static void SetInt64(struct ArrowArray *psArray, int iFeat, |
131 | | int64_t nVal) |
132 | 0 | { |
133 | 0 | static_cast<int64_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] = |
134 | 0 | nVal; |
135 | 0 | } |
136 | | |
137 | | inline static void SetUInt64(struct ArrowArray *psArray, int iFeat, |
138 | | uint64_t nVal) |
139 | 0 | { |
140 | 0 | static_cast<uint64_t *>( |
141 | 0 | const_cast<void *>(psArray->buffers[1]))[iFeat] = nVal; |
142 | 0 | } |
143 | | |
144 | | inline static void SetFloat(struct ArrowArray *psArray, int iFeat, |
145 | | float fVal) |
146 | 0 | { |
147 | 0 | static_cast<float *>(const_cast<void *>(psArray->buffers[1]))[iFeat] = |
148 | 0 | fVal; |
149 | 0 | } |
150 | | |
151 | | inline static void SetDouble(struct ArrowArray *psArray, int iFeat, |
152 | | double dfVal) |
153 | 0 | { |
154 | 0 | static_cast<double *>(const_cast<void *>(psArray->buffers[1]))[iFeat] = |
155 | 0 | dfVal; |
156 | 0 | } |
157 | | |
158 | | static void SetDate(struct ArrowArray *psArray, int iFeat, |
159 | | struct tm &brokenDown, const OGRField &ogrField) |
160 | 0 | { |
161 | 0 | brokenDown.tm_year = ogrField.Date.Year - 1900; |
162 | 0 | brokenDown.tm_mon = ogrField.Date.Month - 1; |
163 | 0 | brokenDown.tm_mday = ogrField.Date.Day; |
164 | 0 | brokenDown.tm_hour = 0; |
165 | 0 | brokenDown.tm_min = 0; |
166 | 0 | brokenDown.tm_sec = 0; |
167 | 0 | static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] = |
168 | 0 | static_cast<int>(CPLYMDHMSToUnixTime(&brokenDown) / 86400); |
169 | 0 | } |
170 | | |
171 | | static void SetDateTime(struct ArrowArray *psArray, int iFeat, |
172 | | struct tm &brokenDown, int nFieldTZFlag, |
173 | | const OGRField &ogrField) |
174 | 0 | { |
175 | 0 | brokenDown.tm_year = ogrField.Date.Year - 1900; |
176 | 0 | brokenDown.tm_mon = ogrField.Date.Month - 1; |
177 | 0 | brokenDown.tm_mday = ogrField.Date.Day; |
178 | 0 | brokenDown.tm_hour = ogrField.Date.Hour; |
179 | 0 | brokenDown.tm_min = ogrField.Date.Minute; |
180 | 0 | brokenDown.tm_sec = static_cast<int>(ogrField.Date.Second); |
181 | 0 | auto nVal = |
182 | 0 | CPLYMDHMSToUnixTime(&brokenDown) * 1000 + |
183 | 0 | (static_cast<int>(ogrField.Date.Second * 1000 + 0.5) % 1000); |
184 | 0 | if (nFieldTZFlag >= OGR_TZFLAG_MIXED_TZ && |
185 | 0 | ogrField.Date.TZFlag > OGR_TZFLAG_MIXED_TZ) |
186 | 0 | { |
187 | | // Convert for ogrField.Date.TZFlag to UTC |
188 | 0 | const int TZOffset = (ogrField.Date.TZFlag - OGR_TZFLAG_UTC) * 15; |
189 | 0 | const int TZOffsetMS = TZOffset * 60 * 1000; |
190 | 0 | nVal -= TZOffsetMS; |
191 | 0 | } |
192 | 0 | static_cast<int64_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] = |
193 | 0 | nVal; |
194 | 0 | } |
195 | | |
196 | | GByte *GetPtrForStringOrBinary(int iArrowField, int iFeat, size_t nLen) |
197 | 0 | { |
198 | 0 | auto psArray = m_out_array->children[iArrowField]; |
199 | 0 | auto panOffsets = |
200 | 0 | static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1])); |
201 | 0 | const uint32_t nCurLength = static_cast<uint32_t>(panOffsets[iFeat]); |
202 | 0 | if (nLen > m_anArrowFieldMaxAlloc[iArrowField] - nCurLength) |
203 | 0 | { |
204 | 0 | if (nLen > |
205 | 0 | static_cast<uint32_t>(std::numeric_limits<int32_t>::max()) - |
206 | 0 | nCurLength) |
207 | 0 | { |
208 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
209 | 0 | "Too large string or binary content"); |
210 | 0 | return nullptr; |
211 | 0 | } |
212 | 0 | uint32_t nNewSize = nCurLength + static_cast<uint32_t>(nLen); |
213 | 0 | if ((m_anArrowFieldMaxAlloc[iArrowField] >> 31) == 0) |
214 | 0 | { |
215 | 0 | const uint32_t nDoubleSize = |
216 | 0 | 2U * m_anArrowFieldMaxAlloc[iArrowField]; |
217 | 0 | if (nNewSize < nDoubleSize) |
218 | 0 | nNewSize = nDoubleSize; |
219 | 0 | } |
220 | 0 | void *newBuffer = VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nNewSize); |
221 | 0 | if (newBuffer == nullptr) |
222 | 0 | return nullptr; |
223 | 0 | m_anArrowFieldMaxAlloc[iArrowField] = nNewSize; |
224 | 0 | memcpy(newBuffer, psArray->buffers[2], nCurLength); |
225 | 0 | VSIFreeAligned(const_cast<void *>(psArray->buffers[2])); |
226 | 0 | psArray->buffers[2] = newBuffer; |
227 | 0 | } |
228 | 0 | GByte *paby = |
229 | 0 | static_cast<GByte *>(const_cast<void *>(psArray->buffers[2])) + |
230 | 0 | nCurLength; |
231 | 0 | panOffsets[iFeat + 1] = panOffsets[iFeat] + static_cast<int32_t>(nLen); |
232 | 0 | return paby; |
233 | 0 | } |
234 | | |
235 | | static void SetEmptyStringOrBinary(struct ArrowArray *psArray, int iFeat) |
236 | 0 | { |
237 | 0 | auto panOffsets = |
238 | 0 | static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1])); |
239 | 0 | panOffsets[iFeat + 1] = panOffsets[iFeat]; |
240 | 0 | } |
241 | | |
242 | | void Shrink(int nFeatures) |
243 | 0 | { |
244 | 0 | if (nFeatures < m_nMaxBatchSize) |
245 | 0 | { |
246 | 0 | m_out_array->length = nFeatures; |
247 | 0 | for (int i = 0; i < m_nChildren; i++) |
248 | 0 | { |
249 | 0 | m_out_array->children[i]->length = nFeatures; |
250 | 0 | } |
251 | 0 | } |
252 | 0 | } |
253 | | |
254 | | void ClearArray() |
255 | 0 | { |
256 | 0 | if (m_out_array->release) |
257 | 0 | m_out_array->release(m_out_array); |
258 | 0 | memset(m_out_array, 0, sizeof(*m_out_array)); |
259 | 0 | } |
260 | | |
261 | | static bool FillDict(struct ArrowArray *psChild, |
262 | | const OGRCodedFieldDomain *poCodedDomain); |
263 | | }; |
264 | | |
265 | | //! @endcond |