/src/gdal/apps/gdalalg_vector_combine.cpp
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Project: GDAL |
4 | | * Purpose: "gdal vector combine" subcommand |
5 | | * Author: Daniel Baston |
6 | | * |
7 | | ****************************************************************************** |
8 | | * Copyright (c) 2025-2026, ISciences LLC |
9 | | * |
10 | | * SPDX-License-Identifier: MIT |
11 | | ****************************************************************************/ |
12 | | |
13 | | #include "gdalalg_vector_combine.h" |
14 | | |
15 | | #include "cpl_error.h" |
16 | | #include "gdal_priv.h" |
17 | | #include "gdalalg_vector_geom.h" |
18 | | #include "ogr_geometry.h" |
19 | | |
20 | | #include <algorithm> |
21 | | #include <cinttypes> |
22 | | #include <optional> |
23 | | |
24 | | #ifndef _ |
25 | 0 | #define _(x) (x) |
26 | | #endif |
27 | | |
28 | | //! @cond Doxygen_Suppress |
29 | | |
30 | | GDALVectorCombineAlgorithm::GDALVectorCombineAlgorithm(bool standaloneStep) |
31 | 0 | : GDALVectorPipelineStepAlgorithm(NAME, DESCRIPTION, HELP_URL, |
32 | 0 | standaloneStep) |
33 | 0 | { |
34 | 0 | AddArg("group-by", 0, |
35 | 0 | _("Names of field(s) by which inputs should be grouped"), &m_groupBy) |
36 | 0 | .SetDuplicateValuesAllowed(false); |
37 | |
|
38 | 0 | AddArg("keep-nested", 0, |
39 | 0 | _("Avoid combining the components of multipart geometries"), |
40 | 0 | &m_keepNested); |
41 | 0 | } |
42 | | |
43 | | namespace |
44 | | { |
45 | | class GDALVectorCombineOutputLayer final |
46 | | : public GDALVectorNonStreamingAlgorithmLayer |
47 | | { |
48 | | public: |
49 | | explicit GDALVectorCombineOutputLayer( |
50 | | OGRLayer &srcLayer, int geomFieldIndex, |
51 | | const std::vector<std::string> &groupBy, bool keepNested) |
52 | 0 | : GDALVectorNonStreamingAlgorithmLayer(srcLayer, geomFieldIndex), |
53 | 0 | m_groupBy(groupBy), m_defn(OGRFeatureDefn::CreateFeatureDefn( |
54 | 0 | srcLayer.GetLayerDefn()->GetName())), |
55 | 0 | m_keepNested(keepNested) |
56 | 0 | { |
57 | 0 | m_defn->Reference(); |
58 | |
|
59 | 0 | const OGRFeatureDefn *srcDefn = m_srcLayer.GetLayerDefn(); |
60 | | |
61 | | // Copy field definitions for attribute fields used in |
62 | | // --group-by. All other attributes are discarded. |
63 | 0 | for (const auto &fieldName : m_groupBy) |
64 | 0 | { |
65 | | // RunStep already checked that the field exists |
66 | 0 | const auto iField = srcDefn->GetFieldIndex(fieldName.c_str()); |
67 | 0 | CPLAssert(iField >= 0); |
68 | | |
69 | 0 | m_srcFieldIndices.push_back(iField); |
70 | 0 | m_defn->AddFieldDefn(srcDefn->GetFieldDefn(iField)); |
71 | 0 | } |
72 | | |
73 | | // Create a new geometry field corresponding to each input geometry |
74 | | // field. An appropriate type is worked out below. |
75 | 0 | m_defn->SetGeomType(wkbNone); // Remove default geometry field |
76 | 0 | for (const OGRGeomFieldDefn *srcGeomDefn : srcDefn->GetGeomFields()) |
77 | 0 | { |
78 | 0 | const auto eSrcGeomType = srcGeomDefn->GetType(); |
79 | 0 | const bool bHasZ = OGR_GT_HasZ(eSrcGeomType); |
80 | 0 | const bool bHasM = OGR_GT_HasM(eSrcGeomType); |
81 | |
|
82 | 0 | OGRwkbGeometryType eDstGeomType = |
83 | 0 | OGR_GT_SetModifier(wkbGeometryCollection, bHasZ, bHasM); |
84 | | |
85 | | // If the layer claims to have single-part geometries, choose a more |
86 | | // specific output type like "MultiPoint" rather than "GeometryCollection" |
87 | 0 | if (wkbFlatten(eSrcGeomType) != wkbUnknown && |
88 | 0 | !OGR_GT_IsSubClassOf(wkbFlatten(eSrcGeomType), |
89 | 0 | wkbGeometryCollection)) |
90 | 0 | { |
91 | 0 | eDstGeomType = OGR_GT_GetCollection(eSrcGeomType); |
92 | 0 | } |
93 | |
|
94 | 0 | auto dstGeomDefn = std::make_unique<OGRGeomFieldDefn>( |
95 | 0 | srcGeomDefn->GetNameRef(), eDstGeomType); |
96 | 0 | dstGeomDefn->SetSpatialRef(srcGeomDefn->GetSpatialRef()); |
97 | 0 | m_defn->AddGeomFieldDefn(std::move(dstGeomDefn)); |
98 | 0 | } |
99 | 0 | } |
100 | | |
101 | | ~GDALVectorCombineOutputLayer() override |
102 | 0 | { |
103 | 0 | m_defn->Release(); |
104 | 0 | } |
105 | | |
106 | | GIntBig GetFeatureCount(int bForce) override |
107 | 0 | { |
108 | 0 | if (m_poAttrQuery == nullptr && m_poFilterGeom == nullptr) |
109 | 0 | { |
110 | 0 | return static_cast<GIntBig>(m_features.size()); |
111 | 0 | } |
112 | | |
113 | 0 | return OGRLayer::GetFeatureCount(bForce); |
114 | 0 | } |
115 | | |
116 | | const OGRFeatureDefn *GetLayerDefn() const override |
117 | 0 | { |
118 | 0 | return m_defn; |
119 | 0 | } |
120 | | |
121 | | OGRErr IGetExtent(int iGeomField, OGREnvelope *psExtent, |
122 | | bool bForce) override |
123 | 0 | { |
124 | 0 | return m_srcLayer.GetExtent(iGeomField, psExtent, bForce); |
125 | 0 | } |
126 | | |
127 | | OGRErr IGetExtent3D(int iGeomField, OGREnvelope3D *psExtent, |
128 | | bool bForce) override |
129 | 0 | { |
130 | 0 | return m_srcLayer.GetExtent3D(iGeomField, psExtent, bForce); |
131 | 0 | } |
132 | | |
133 | | std::unique_ptr<OGRFeature> GetNextProcessedFeature() override |
134 | 0 | { |
135 | 0 | if (!m_itFeature) |
136 | 0 | { |
137 | 0 | m_itFeature = m_features.begin(); |
138 | 0 | } |
139 | |
|
140 | 0 | if (m_itFeature.value() == m_features.end()) |
141 | 0 | { |
142 | 0 | return nullptr; |
143 | 0 | } |
144 | | |
145 | 0 | std::unique_ptr<OGRFeature> feature( |
146 | 0 | m_itFeature.value()->second->Clone()); |
147 | 0 | feature->SetFID(m_nProcessedFeaturesRead++); |
148 | 0 | ++m_itFeature.value(); |
149 | 0 | return feature; |
150 | 0 | } |
151 | | |
152 | | bool Process(GDALProgressFunc pfnProgress, void *pProgressData) override |
153 | 0 | { |
154 | 0 | const int nGeomFields = m_srcLayer.GetLayerDefn()->GetGeomFieldCount(); |
155 | |
|
156 | 0 | const GIntBig nLayerFeatures = |
157 | 0 | m_srcLayer.TestCapability(OLCFastFeatureCount) |
158 | 0 | ? m_srcLayer.GetFeatureCount(false) |
159 | 0 | : -1; |
160 | 0 | const double dfInvLayerFeatures = |
161 | 0 | 1.0 / std::max(1.0, static_cast<double>(nLayerFeatures)); |
162 | |
|
163 | 0 | GIntBig nFeaturesRead = 0; |
164 | |
|
165 | 0 | std::vector<std::string> fieldValues(m_srcFieldIndices.size()); |
166 | 0 | const auto srcDstFieldMap = |
167 | 0 | m_defn->ComputeMapForSetFrom(m_srcLayer.GetLayerDefn(), true); |
168 | 0 | for (const auto &srcFeature : m_srcLayer) |
169 | 0 | { |
170 | 0 | for (size_t iDstField = 0; iDstField < m_srcFieldIndices.size(); |
171 | 0 | iDstField++) |
172 | 0 | { |
173 | 0 | const int iSrcField = m_srcFieldIndices[iDstField]; |
174 | 0 | fieldValues[iDstField] = |
175 | 0 | srcFeature->GetFieldAsString(iSrcField); |
176 | 0 | } |
177 | |
|
178 | 0 | OGRFeature *dstFeature; |
179 | |
|
180 | 0 | if (auto it = m_features.find(fieldValues); it == m_features.end()) |
181 | 0 | { |
182 | 0 | it = m_features |
183 | 0 | .insert(std::pair( |
184 | 0 | fieldValues, std::make_unique<OGRFeature>(m_defn))) |
185 | 0 | .first; |
186 | 0 | dstFeature = it->second.get(); |
187 | |
|
188 | 0 | dstFeature->SetFrom(srcFeature.get(), srcDstFieldMap.data(), |
189 | 0 | false); |
190 | |
|
191 | 0 | for (int iGeomField = 0; iGeomField < nGeomFields; iGeomField++) |
192 | 0 | { |
193 | 0 | OGRGeomFieldDefn *poGeomDefn = |
194 | 0 | m_defn->GetGeomFieldDefn(iGeomField); |
195 | 0 | const auto eGeomType = poGeomDefn->GetType(); |
196 | |
|
197 | 0 | std::unique_ptr<OGRGeometry> poGeom( |
198 | 0 | OGRGeometryFactory::createGeometry(eGeomType)); |
199 | 0 | poGeom->assignSpatialReference(poGeomDefn->GetSpatialRef()); |
200 | |
|
201 | 0 | dstFeature->SetGeomField(iGeomField, std::move(poGeom)); |
202 | 0 | } |
203 | 0 | } |
204 | 0 | else |
205 | 0 | { |
206 | 0 | dstFeature = it->second.get(); |
207 | 0 | } |
208 | |
|
209 | 0 | for (int iGeomField = 0; iGeomField < nGeomFields; iGeomField++) |
210 | 0 | { |
211 | 0 | OGRGeomFieldDefn *poGeomFieldDefn = |
212 | 0 | m_defn->GetGeomFieldDefn(iGeomField); |
213 | |
|
214 | 0 | std::unique_ptr<OGRGeometry> poSrcGeom( |
215 | 0 | srcFeature->StealGeometry(iGeomField)); |
216 | 0 | if (poSrcGeom != nullptr && !poSrcGeom->IsEmpty()) |
217 | 0 | { |
218 | 0 | const auto eSrcType = poSrcGeom->getGeometryType(); |
219 | 0 | const auto bSrcIsCollection = OGR_GT_IsSubClassOf( |
220 | 0 | wkbFlatten(eSrcType), wkbGeometryCollection); |
221 | 0 | const auto bDstIsUntypedCollection = |
222 | 0 | wkbFlatten(poGeomFieldDefn->GetType()) == |
223 | 0 | wkbGeometryCollection; |
224 | | |
225 | | // Did this geometry unexpectedly have Z? |
226 | 0 | if (OGR_GT_HasZ(eSrcType) != |
227 | 0 | OGR_GT_HasZ(poGeomFieldDefn->GetType())) |
228 | 0 | { |
229 | 0 | AddZ(iGeomField); |
230 | 0 | } |
231 | | |
232 | | // Did this geometry unexpectedly have M? |
233 | 0 | if (OGR_GT_HasM(eSrcType) != |
234 | 0 | OGR_GT_HasM(poGeomFieldDefn->GetType())) |
235 | 0 | { |
236 | 0 | AddM(iGeomField); |
237 | 0 | } |
238 | | |
239 | | // Do we need to change the output from a typed collection |
240 | | // like MultiPolygon to a generic GeometryCollection? |
241 | 0 | if (m_keepNested && bSrcIsCollection && |
242 | 0 | !bDstIsUntypedCollection) |
243 | 0 | { |
244 | 0 | SetTypeGeometryCollection(iGeomField); |
245 | 0 | } |
246 | |
|
247 | 0 | OGRGeometryCollection *poDstGeom = |
248 | 0 | cpl::down_cast<OGRGeometryCollection *>( |
249 | 0 | dstFeature->GetGeomFieldRef(iGeomField)); |
250 | |
|
251 | 0 | if (m_keepNested || !bSrcIsCollection) |
252 | 0 | { |
253 | 0 | if (poDstGeom->addGeometry(std::move(poSrcGeom)) != |
254 | 0 | OGRERR_NONE) |
255 | 0 | { |
256 | 0 | CPLError( |
257 | 0 | CE_Failure, CPLE_AppDefined, |
258 | 0 | "Failed to add geometry of type %s to output " |
259 | 0 | "feature of type %s", |
260 | 0 | OGRGeometryTypeToName(eSrcType), |
261 | 0 | OGRGeometryTypeToName( |
262 | 0 | poDstGeom->getGeometryType())); |
263 | 0 | return false; |
264 | 0 | } |
265 | 0 | } |
266 | 0 | else |
267 | 0 | { |
268 | 0 | std::unique_ptr<OGRGeometryCollection> |
269 | 0 | poSrcGeomCollection( |
270 | 0 | poSrcGeom.release()->toGeometryCollection()); |
271 | 0 | if (poDstGeom->addGeometryComponents( |
272 | 0 | std::move(poSrcGeomCollection)) != OGRERR_NONE) |
273 | 0 | { |
274 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
275 | 0 | "Failed to add components from geometry " |
276 | 0 | "of type %s to output " |
277 | 0 | "feature of type %s", |
278 | 0 | OGRGeometryTypeToName(eSrcType), |
279 | 0 | OGRGeometryTypeToName( |
280 | 0 | poDstGeom->getGeometryType())); |
281 | 0 | return false; |
282 | 0 | } |
283 | 0 | } |
284 | 0 | } |
285 | 0 | } |
286 | | |
287 | 0 | if (pfnProgress && nLayerFeatures > 0 && |
288 | 0 | !pfnProgress(static_cast<double>(++nFeaturesRead) * |
289 | 0 | dfInvLayerFeatures, |
290 | 0 | "", pProgressData)) |
291 | 0 | { |
292 | 0 | CPLError(CE_Failure, CPLE_UserInterrupt, "Interrupted by user"); |
293 | 0 | return false; |
294 | 0 | } |
295 | 0 | } |
296 | | |
297 | 0 | if (pfnProgress) |
298 | 0 | { |
299 | 0 | pfnProgress(1.0, "", pProgressData); |
300 | 0 | } |
301 | |
|
302 | 0 | return true; |
303 | 0 | } |
304 | | |
305 | | int TestCapability(const char *pszCap) const override |
306 | 0 | { |
307 | 0 | if (EQUAL(pszCap, OLCFastFeatureCount)) |
308 | 0 | { |
309 | 0 | return true; |
310 | 0 | } |
311 | | |
312 | 0 | if (EQUAL(pszCap, OLCStringsAsUTF8) || |
313 | 0 | EQUAL(pszCap, OLCFastGetExtent) || |
314 | 0 | EQUAL(pszCap, OLCFastGetExtent3D) || |
315 | 0 | EQUAL(pszCap, OLCCurveGeometries) || |
316 | 0 | EQUAL(pszCap, OLCMeasuredGeometries) || |
317 | 0 | EQUAL(pszCap, OLCZGeometries)) |
318 | 0 | { |
319 | 0 | return m_srcLayer.TestCapability(pszCap); |
320 | 0 | } |
321 | | |
322 | 0 | return false; |
323 | 0 | } |
324 | | |
325 | | void ResetReading() override |
326 | 0 | { |
327 | 0 | m_itFeature.reset(); |
328 | 0 | m_nProcessedFeaturesRead = 0; |
329 | 0 | } |
330 | | |
331 | | CPL_DISALLOW_COPY_ASSIGN(GDALVectorCombineOutputLayer) |
332 | | |
333 | | private: |
334 | | void AddM(int iGeomField) |
335 | 0 | { |
336 | 0 | OGRGeomFieldDefn *poGeomFieldDefn = |
337 | 0 | m_defn->GetGeomFieldDefn(iGeomField); |
338 | 0 | whileUnsealing(poGeomFieldDefn) |
339 | 0 | ->SetType(OGR_GT_SetM(poGeomFieldDefn->GetType())); |
340 | |
|
341 | 0 | for (auto &[_, poFeature] : m_features) |
342 | 0 | { |
343 | 0 | poFeature->GetGeomFieldRef(iGeomField)->setMeasured(true); |
344 | 0 | } |
345 | 0 | } |
346 | | |
347 | | void AddZ(int iGeomField) |
348 | 0 | { |
349 | 0 | OGRGeomFieldDefn *poGeomFieldDefn = |
350 | 0 | m_defn->GetGeomFieldDefn(iGeomField); |
351 | 0 | whileUnsealing(poGeomFieldDefn) |
352 | 0 | ->SetType(OGR_GT_SetZ(poGeomFieldDefn->GetType())); |
353 | |
|
354 | 0 | for (auto &[_, poFeature] : m_features) |
355 | 0 | { |
356 | 0 | poFeature->GetGeomFieldRef(iGeomField)->set3D(true); |
357 | 0 | } |
358 | 0 | } |
359 | | |
360 | | void SetTypeGeometryCollection(int iGeomField) |
361 | 0 | { |
362 | 0 | OGRGeomFieldDefn *poGeomFieldDefn = |
363 | 0 | m_defn->GetGeomFieldDefn(iGeomField); |
364 | 0 | const bool hasZ = OGR_GT_HasZ(poGeomFieldDefn->GetType()); |
365 | 0 | const bool hasM = OGR_GT_HasM(poGeomFieldDefn->GetType()); |
366 | |
|
367 | 0 | whileUnsealing(poGeomFieldDefn) |
368 | 0 | ->SetType(OGR_GT_SetModifier(wkbGeometryCollection, hasZ, hasM)); |
369 | |
|
370 | 0 | for (auto &[_, poFeature] : m_features) |
371 | 0 | { |
372 | 0 | std::unique_ptr<OGRGeometry> poTmpGeom( |
373 | 0 | poFeature->StealGeometry(iGeomField)); |
374 | 0 | poTmpGeom = OGRGeometryFactory::forceTo(std::move(poTmpGeom), |
375 | 0 | poGeomFieldDefn->GetType()); |
376 | 0 | CPLAssert(poTmpGeom); |
377 | 0 | poFeature->SetGeomField(iGeomField, std::move(poTmpGeom)); |
378 | 0 | } |
379 | 0 | } |
380 | | |
381 | | const std::vector<std::string> m_groupBy{}; |
382 | | std::vector<int> m_srcFieldIndices{}; |
383 | | std::map<std::vector<std::string>, std::unique_ptr<OGRFeature>> |
384 | | m_features{}; |
385 | | std::optional<decltype(m_features)::const_iterator> m_itFeature{}; |
386 | | OGRFeatureDefn *const m_defn; |
387 | | GIntBig m_nProcessedFeaturesRead = 0; |
388 | | const bool m_keepNested; |
389 | | }; |
390 | | } // namespace |
391 | | |
392 | | bool GDALVectorCombineAlgorithm::RunStep(GDALPipelineStepRunContext &ctxt) |
393 | 0 | { |
394 | 0 | auto poSrcDS = m_inputDataset[0].GetDatasetRef(); |
395 | 0 | auto poDstDS = |
396 | 0 | std::make_unique<GDALVectorNonStreamingAlgorithmDataset>(*poSrcDS); |
397 | |
|
398 | 0 | GDALVectorAlgorithmLayerProgressHelper progressHelper(ctxt); |
399 | |
|
400 | 0 | for (auto &&poSrcLayer : poSrcDS->GetLayers()) |
401 | 0 | { |
402 | 0 | if (m_inputLayerNames.empty() || |
403 | 0 | std::find(m_inputLayerNames.begin(), m_inputLayerNames.end(), |
404 | 0 | poSrcLayer->GetDescription()) != m_inputLayerNames.end()) |
405 | 0 | { |
406 | 0 | const auto poSrcLayerDefn = poSrcLayer->GetLayerDefn(); |
407 | 0 | if (poSrcLayerDefn->GetGeomFieldCount() == 0) |
408 | 0 | { |
409 | 0 | if (m_inputLayerNames.empty()) |
410 | 0 | continue; |
411 | 0 | ReportError(CE_Failure, CPLE_AppDefined, |
412 | 0 | "Specified layer '%s' has no geometry field", |
413 | 0 | poSrcLayer->GetDescription()); |
414 | 0 | return false; |
415 | 0 | } |
416 | | |
417 | | // Check that all attributes exist |
418 | 0 | for (const auto &fieldName : m_groupBy) |
419 | 0 | { |
420 | 0 | const int iSrcFieldIndex = |
421 | 0 | poSrcLayerDefn->GetFieldIndex(fieldName.c_str()); |
422 | 0 | if (iSrcFieldIndex == -1) |
423 | 0 | { |
424 | 0 | ReportError(CE_Failure, CPLE_AppDefined, |
425 | 0 | "Specified attribute field '%s' does not exist " |
426 | 0 | "in layer '%s'", |
427 | 0 | fieldName.c_str(), |
428 | 0 | poSrcLayer->GetDescription()); |
429 | 0 | return false; |
430 | 0 | } |
431 | 0 | } |
432 | | |
433 | 0 | progressHelper.AddProcessedLayer(*poSrcLayer); |
434 | 0 | } |
435 | 0 | } |
436 | | |
437 | 0 | for ([[maybe_unused]] auto [poSrcLayer, bProcessed, layerProgressFunc, |
438 | 0 | layerProgressData] : progressHelper) |
439 | 0 | { |
440 | 0 | auto poLayer = std::make_unique<GDALVectorCombineOutputLayer>( |
441 | 0 | *poSrcLayer, -1, m_groupBy, m_keepNested); |
442 | |
|
443 | 0 | if (!poDstDS->AddProcessedLayer(std::move(poLayer), layerProgressFunc, |
444 | 0 | layerProgressData.get())) |
445 | 0 | { |
446 | 0 | return false; |
447 | 0 | } |
448 | 0 | } |
449 | | |
450 | 0 | m_outputDataset.Set(std::move(poDstDS)); |
451 | |
|
452 | 0 | return true; |
453 | 0 | } |
454 | | |
455 | 0 | GDALVectorCombineAlgorithmStandalone::~GDALVectorCombineAlgorithmStandalone() = |
456 | | default; |
457 | | |
458 | | //! @endcond |