Coverage Report

Created: 2025-12-31 08:30

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/gdal/ogr/ogrsf_frmts/osm/ogr_osm.h
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Project:  OpenGIS Simple Features Reference Implementation
4
 * Purpose:  Private definitions for OGR/OpenStreeMap driver.
5
 * Author:   Even Rouault, <even dot rouault at spatialys.com>
6
 *
7
 ******************************************************************************
8
 * Copyright (c) 2012-2014, Even Rouault <even dot rouault at spatialys.com>
9
 *
10
 * SPDX-License-Identifier: MIT
11
 ****************************************************************************/
12
13
#ifndef OGR_OSM_H_INCLUDED
14
#define OGR_OSM_H_INCLUDED
15
16
// replace O(log2(N)) complexity of FindNode() by O(1)
17
#define ENABLE_NODE_LOOKUP_BY_HASHING 1
18
19
#include "ogrsf_frmts.h"
20
#include "cpl_string.h"
21
22
#include <array>
23
#include <set>
24
#include <unordered_set>
25
#include <map>
26
#include <vector>
27
28
#include "osm_parser.h"
29
30
#include "ogrsqlitevfs.h"
31
32
class OGROSMConstCharComp
33
{
34
  public:
35
    bool operator()(const char *a, const char *b) const
36
373k
    {
37
373k
        return strcmp(a, b) < 0;
38
373k
    }
39
};
40
41
class OGROSMComputedAttribute
42
{
43
  public:
44
    CPLString osName{};
45
    int nIndex = -1;
46
    OGRFieldType eType = OFTString;
47
    CPLString osSQL{};
48
    sqlite3_stmt *hStmt = nullptr;
49
    std::vector<CPLString> aosAttrToBind{};
50
    std::vector<int> anIndexToBind{};
51
    bool bHardcodedZOrder = false;
52
53
0
    OGROSMComputedAttribute() = default;
54
55
1.36k
    explicit OGROSMComputedAttribute(const char *pszName) : osName(pszName)
56
1.36k
    {
57
1.36k
    }
58
59
1.36k
    OGROSMComputedAttribute(OGROSMComputedAttribute &&) = default;
60
    OGROSMComputedAttribute &operator=(OGROSMComputedAttribute &&) = default;
61
62
  private:
63
    OGROSMComputedAttribute(const OGROSMComputedAttribute &) = delete;
64
    OGROSMComputedAttribute &
65
    operator=(const OGROSMComputedAttribute &) = delete;
66
};
67
68
/************************************************************************/
69
/*                           OGROSMLayer                                */
70
/************************************************************************/
71
72
class OGROSMDataSource;
73
74
class OGROSMLayer final : public OGRLayer
75
{
76
    friend class OGROSMDataSource;
77
78
    OGROSMDataSource *m_poDS = nullptr;
79
    int m_nIdxLayer = 0;
80
    OGRFeatureDefn *m_poFeatureDefn = nullptr;
81
    OGRSpatialReference *m_poSRS = nullptr;
82
83
    std::vector<char *>
84
        m_apszNames{}; /* Needed to keep a "reference" to the string inserted
85
                          into oMapFieldNameToIndex */
86
    std::map<const char *, int, OGROSMConstCharComp> m_oMapFieldNameToIndex{};
87
88
    std::vector<OGROSMComputedAttribute> m_oComputedAttributes{};
89
90
    bool m_bResetReadingAllowed = false;
91
92
    size_t m_nFeatureArrayIndex = 0;
93
    std::vector<std::unique_ptr<OGRFeature>> m_apoFeatures{};
94
95
    bool m_bHasOSMId = false;
96
    int m_nIndexOSMId = -1;
97
    int m_nIndexOSMWayId = -1;
98
    bool m_bHasVersion = false;
99
    bool m_bHasTimestamp = false;
100
    bool m_bHasUID = false;
101
    bool m_bHasUser = false;
102
    bool m_bHasChangeset = false;
103
    bool m_bHasOtherTags = true;
104
    int m_nIndexOtherTags = -1;
105
    bool m_bHasAllTags = false;
106
    int m_nIndexAllTags = -1;
107
108
    bool m_bHasWarnedTooManyFeatures = false;
109
110
    std::string m_osAllTagsBuffer{};
111
112
    bool m_bUserInterested = true;
113
114
    bool AddToArray(std::unique_ptr<OGRFeature>, bool bCheckFeatureThreshold);
115
116
    int AddInOtherOrAllTags(const char *pszK);
117
118
    char szLaunderedFieldName[256];
119
    const char *GetLaunderedFieldName(const char *pszName);
120
121
    std::vector<char *> apszInsignificantKeys{};
122
    std::map<const char *, int, OGROSMConstCharComp> aoSetInsignificantKeys{};
123
124
    std::vector<char *> apszIgnoreKeys{};
125
    std::map<const char *, int, OGROSMConstCharComp> aoSetIgnoreKeys{};
126
127
    std::set<std::string> aoSetWarnKeys{};
128
129
    OGROSMLayer(const OGROSMLayer &) = delete;
130
    OGROSMLayer &operator=(const OGROSMLayer &) = delete;
131
132
  public:
133
    OGROSMLayer(OGROSMDataSource *m_poDS, int m_nIdxLayer, const char *pszName);
134
    ~OGROSMLayer() override;
135
136
    using OGRLayer::GetLayerDefn;
137
138
    const OGRFeatureDefn *GetLayerDefn() const override
139
19.9k
    {
140
19.9k
        return m_poFeatureDefn;
141
19.9k
    }
142
143
    void ResetReading() override;
144
    int TestCapability(const char *) const override;
145
146
    OGRFeature *GetNextFeature() override;
147
148
    OGRFeature *MyGetNextFeature(OGROSMLayer **ppoNewCurLayer,
149
                                 GDALProgressFunc pfnProgress,
150
                                 void *pProgressData);
151
152
    GIntBig GetFeatureCount(int bForce) override;
153
154
    OGRErr SetAttributeFilter(const char *pszAttrQuery) override;
155
156
    OGRErr IGetExtent(int iGeomField, OGREnvelope *psExtent,
157
                      bool bForce) override;
158
159
    const OGREnvelope *GetSpatialFilterEnvelope();
160
161
    bool AddFeature(std::unique_ptr<OGRFeature> poFeature,
162
                    bool bAttrFilterAlreadyEvaluated,
163
                    bool *pbFilteredOut = nullptr,
164
                    bool bCheckFeatureThreshold = true);
165
    void ForceResetReading();
166
167
    void AddField(const char *pszName, OGRFieldType eFieldType,
168
                  OGRFieldSubType eSubType = OFSTNone);
169
    int GetFieldIndex(const char *pszName);
170
171
    bool HasOSMId() const
172
0
    {
173
0
        return m_bHasOSMId;
174
0
    }
175
176
    void SetHasOSMId(bool bIn)
177
3.40k
    {
178
3.40k
        m_bHasOSMId = bIn;
179
3.40k
    }
180
181
    bool HasVersion() const
182
680
    {
183
680
        return m_bHasVersion;
184
680
    }
185
186
    void SetHasVersion(bool bIn)
187
3.40k
    {
188
3.40k
        m_bHasVersion = bIn;
189
3.40k
    }
190
191
    bool HasTimestamp() const
192
680
    {
193
680
        return m_bHasTimestamp;
194
680
    }
195
196
    void SetHasTimestamp(bool bIn)
197
3.40k
    {
198
3.40k
        m_bHasTimestamp = bIn;
199
3.40k
    }
200
201
    bool HasUID() const
202
680
    {
203
680
        return m_bHasUID;
204
680
    }
205
206
    void SetHasUID(bool bIn)
207
3.40k
    {
208
3.40k
        m_bHasUID = bIn;
209
3.40k
    }
210
211
    bool HasUser() const
212
680
    {
213
680
        return m_bHasUser;
214
680
    }
215
216
    void SetHasUser(bool bIn)
217
3.40k
    {
218
3.40k
        m_bHasUser = bIn;
219
3.40k
    }
220
221
    bool HasChangeset() const
222
680
    {
223
680
        return m_bHasChangeset;
224
680
    }
225
226
    void SetHasChangeset(bool bIn)
227
3.40k
    {
228
3.40k
        m_bHasChangeset = bIn;
229
3.40k
    }
230
231
    bool HasOtherTags() const
232
3.40k
    {
233
3.40k
        return m_bHasOtherTags;
234
3.40k
    }
235
236
    void SetHasOtherTags(bool bIn)
237
0
    {
238
0
        m_bHasOtherTags = bIn;
239
0
    }
240
241
    bool HasAllTags() const
242
3.40k
    {
243
3.40k
        return m_bHasAllTags;
244
3.40k
    }
245
246
    void SetHasAllTags(bool bIn)
247
0
    {
248
0
        m_bHasAllTags = bIn;
249
0
    }
250
251
    void SetFieldsFromTags(OGRFeature *poFeature, GIntBig nID, bool bIsWayID,
252
                           unsigned int nTags, const OSMTag *pasTags,
253
                           const OSMInfo *psInfo);
254
255
    void SetDeclareInterest(bool bIn)
256
0
    {
257
0
        m_bUserInterested = bIn;
258
0
    }
259
260
    bool IsUserInterested() const
261
2.18k
    {
262
2.18k
        return m_bUserInterested;
263
2.18k
    }
264
265
    int HasAttributeFilter() const
266
218
    {
267
218
        return m_poAttrQuery != nullptr;
268
218
    }
269
270
    int EvaluateAttributeFilter(OGRFeature *poFeature);
271
272
    void AddInsignificantKey(const char *pszK);
273
274
    int IsSignificantKey(const char *pszK) const
275
237
    {
276
237
        return aoSetInsignificantKeys.find(pszK) ==
277
237
               aoSetInsignificantKeys.end();
278
237
    }
279
280
    void AddIgnoreKey(const char *pszK);
281
    void AddWarnKey(const char *pszK);
282
283
    void AddComputedAttribute(const char *pszName, OGRFieldType eType,
284
                              const char *pszSQL);
285
};
286
287
/************************************************************************/
288
/*                        OGROSMDataSource                              */
289
/************************************************************************/
290
291
struct KeyDesc
292
{
293
    char *pszK = nullptr;
294
    int nKeyIndex = 0;
295
    int nOccurrences = 0;
296
    std::vector<char *> apszValues{};
297
    //! map that is the reverse of apszValues
298
    std::map<const char *, int, OGROSMConstCharComp> anMapV{};
299
};
300
301
typedef struct
302
{
303
    short bKIsIndex; /* whether we should use nKeyIndex or
304
                        nOffsetInpabyNonRedundantKeys */
305
    short bVIsIndex; /* whether we should use nValueIndex or
306
                        nOffsetInpabyNonRedundantValues */
307
308
    union
309
    {
310
        int nKeyIndex; /* index of OGROSMDataSource.asKeys */
311
        int nOffsetInpabyNonRedundantKeys; /* offset in
312
                                              OGROSMDataSource.pabyNonRedundantKeys
313
                                            */
314
    } uKey;
315
316
    union
317
    {
318
        int nValueIndex;                     /* index of KeyDesc.apszValues */
319
        int nOffsetInpabyNonRedundantValues; /* offset in
320
                                                OGROSMDataSource.pabyNonRedundantValues
321
                                              */
322
    } uVal;
323
} IndexedKVP;
324
325
typedef struct
326
{
327
    GIntBig nOff;
328
329
    /* Note: only one of nth bucket pabyBitmap or panSectorSize must be free'd
330
     */
331
    union
332
    {
333
        GByte *pabyBitmap;    /* array of BUCKET_BITMAP_SIZE bytes */
334
        GByte *panSectorSize; /* array of BUCKET_SECTOR_SIZE_ARRAY_SIZE bytes.
335
                                 Each values means (size in bytes - 8 ) / 2,
336
                                 minus 8. 252 means uncompressed */
337
    } u;
338
} Bucket;
339
340
typedef struct
341
{
342
    int nLon;
343
    int nLat;
344
} LonLat;
345
346
struct WayFeaturePair
347
{
348
    GIntBig nWayID = 0;
349
    /* point to a sub-array of OGROSMDataSource.anReqIds */
350
    GIntBig *panNodeRefs = nullptr;
351
    unsigned int nRefs = 0;
352
    unsigned int nTags = 0;
353
    IndexedKVP *pasTags = nullptr; /*  point to a sub-array of
354
                            OGROSMDataSource.pasAccumulatedTags */
355
    OSMInfo sInfo{};
356
    std::unique_ptr<OGRFeature> poFeature{};
357
    bool bIsArea = false;
358
    bool bAttrFilterAlreadyEvaluated = false;
359
};
360
361
#ifdef ENABLE_NODE_LOOKUP_BY_HASHING
362
typedef struct
363
{
364
    int nInd;  /* values are indexes of panReqIds */
365
    int nNext; /* values are indexes of psCollisionBuckets, or -1 to stop the
366
                  chain */
367
} CollisionBucket;
368
#endif
369
370
class OGROSMDataSource final : public GDALDataset
371
{
372
    friend class OGROSMLayer;
373
374
    std::vector<std::unique_ptr<OGROSMLayer>> m_apoLayers{};
375
376
    std::string m_osConfigFile{};
377
378
    OGREnvelope m_sExtent{};
379
    bool m_bExtentValid = false;
380
381
    // Starts off at -1 to indicate that we do not know.
382
    int m_bInterleavedReading = -1;
383
    OGROSMLayer *m_poCurrentLayer = nullptr;
384
385
    OSMContext *m_psParser = nullptr;
386
    bool m_bHasParsedFirstChunk = false;
387
    bool m_bStopParsing = false;
388
389
    sqlite3_vfs *m_pMyVFS = nullptr;
390
391
    sqlite3 *m_hDB = nullptr;
392
    sqlite3_stmt *m_hInsertNodeStmt = nullptr;
393
    sqlite3_stmt *m_hInsertWayStmt = nullptr;
394
    sqlite3_stmt **m_pahSelectNodeStmt = nullptr;
395
    sqlite3_stmt **m_pahSelectWayStmt = nullptr;
396
    sqlite3_stmt *m_hInsertPolygonsStandaloneStmt = nullptr;
397
    sqlite3_stmt *m_hDeletePolygonsStandaloneStmt = nullptr;
398
    sqlite3_stmt *m_hSelectPolygonsStandaloneStmt = nullptr;
399
    bool m_bHasRowInPolygonsStandalone = false;
400
401
    sqlite3 *m_hDBForComputedAttributes = nullptr;
402
403
    int m_nMaxSizeForInMemoryDBInMB = 0;
404
    bool m_bInMemoryTmpDB = false;
405
    bool m_bMustUnlink = true;
406
    CPLString m_osTmpDBName{};
407
408
    std::unordered_set<std::string> aoSetClosedWaysArePolygons{};
409
    int m_nMinSizeKeysInSetClosedWaysArePolygons = 0;
410
    int m_nMaxSizeKeysInSetClosedWaysArePolygons = 0;
411
412
    std::vector<LonLat> m_asLonLatCache{};
413
414
    std::array<const char *, 7> m_ignoredKeys = {{"area", "created_by",
415
                                                  "converted_by", "note",
416
                                                  "todo", "fixme", "FIXME"}};
417
418
    bool m_bReportAllNodes = false;
419
    bool m_bReportAllWays = false;
420
    bool m_bTagsAsHSTORE = true;  // if false, as JSON
421
422
    bool m_bFeatureAdded = false;
423
424
    bool m_bInTransaction = false;
425
426
    bool m_bIndexPoints = true;
427
    bool m_bUsePointsIndex = true;
428
    bool m_bIndexWays = true;
429
    bool m_bUseWaysIndex = true;
430
431
    std::vector<bool> m_abSavedDeclaredInterest{};
432
    OGRLayer *m_poResultSetLayer = nullptr;
433
    bool m_bIndexPointsBackup = false;
434
    bool m_bUsePointsIndexBackup = false;
435
    bool m_bIndexWaysBackup = false;
436
    bool m_bUseWaysIndexBackup = false;
437
438
    bool m_bIsFeatureCountEnabled = false;
439
440
    bool m_bAttributeNameLaundering = true;
441
442
    std::vector<GByte> m_abyWayBuffer{};
443
444
    int m_nWaysProcessed = 0;
445
    int m_nRelationsProcessed = 0;
446
447
    bool m_bCustomIndexing = true;
448
    bool m_bCompressNodes = false;
449
450
    unsigned int m_nUnsortedReqIds = 0;
451
    GIntBig *m_panUnsortedReqIds = nullptr;
452
453
    unsigned int m_nReqIds = 0;
454
    GIntBig *m_panReqIds = nullptr;
455
456
#ifdef ENABLE_NODE_LOOKUP_BY_HASHING
457
    bool m_bEnableHashedIndex = true;
458
    /* values >= 0 are indexes of panReqIds. */
459
    /*        == -1 for unoccupied */
460
    /*        < -1 are expressed as -nIndexToCollisionBuckets-2 where
461
     * nIndexToCollisionBuckets point to psCollisionBuckets */
462
    int *m_panHashedIndexes = nullptr;
463
    CollisionBucket *m_psCollisionBuckets = nullptr;
464
    bool m_bHashedIndexValid = false;
465
#endif
466
467
    LonLat *m_pasLonLatArray = nullptr;
468
469
    IndexedKVP *m_pasAccumulatedTags =
470
        nullptr; /* points to content of pabyNonRedundantValues or
471
                    aoMapIndexedKeys */
472
    int m_nAccumulatedTags = 0;
473
    unsigned int MAX_INDEXED_KEYS = 0;
474
    GByte *pabyNonRedundantKeys = nullptr;
475
    int nNonRedundantKeysLen = 0;
476
    unsigned int MAX_INDEXED_VALUES_PER_KEY = 0;
477
    GByte *pabyNonRedundantValues = nullptr;
478
    int nNonRedundantValuesLen = 0;
479
    std::vector<WayFeaturePair> m_asWayFeaturePairs{};
480
481
    std::vector<KeyDesc *> m_apsKeys{};
482
    std::map<const char *, KeyDesc *, OGROSMConstCharComp>
483
        m_aoMapIndexedKeys{}; /* map that is the reverse of asKeys */
484
485
    CPLString m_osNodesFilename{};
486
    bool m_bInMemoryNodesFile = false;
487
    bool m_bMustUnlinkNodesFile = true;
488
    GIntBig m_nNodesFileSize = 0;
489
    VSILFILE *m_fpNodes = nullptr;
490
491
    GIntBig m_nPrevNodeId = -INT_MAX;
492
    int m_nBucketOld = -1;
493
    int m_nOffInBucketReducedOld = -1;
494
    GByte *m_pabySector = nullptr;
495
    std::map<int, Bucket> m_oMapBuckets{};
496
    Bucket *GetBucket(int nBucketId);
497
498
    bool m_bNeedsToSaveWayInfo = false;
499
500
    static const GIntBig FILESIZE_NOT_INIT = -2;
501
    static const GIntBig FILESIZE_INVALID = -1;
502
    GIntBig m_nFileSize = FILESIZE_NOT_INIT;
503
504
    void CompressWay(bool bIsArea, unsigned int nTags,
505
                     const IndexedKVP *pasTags, int nPoints,
506
                     const LonLat *pasLonLatPairs, const OSMInfo *psInfo,
507
                     std::vector<GByte> &abyCompressedWay);
508
    void UncompressWay(int nBytes, const GByte *pabyCompressedWay,
509
                       bool *pbIsArea, std::vector<LonLat> &asCoords,
510
                       unsigned int *pnTags, OSMTag *pasTags, OSMInfo *psInfo);
511
512
    bool ParseConf(CSLConstList papszOpenOptions);
513
    bool CreateTempDB();
514
    bool SetDBOptions();
515
    void SetCacheSize();
516
    bool CreatePreparedStatements();
517
    void CloseDB();
518
519
    bool IndexPoint(const OSMNode *psNode);
520
    bool IndexPointSQLite(const OSMNode *psNode);
521
    bool FlushCurrentSector();
522
    bool FlushCurrentSectorCompressedCase();
523
    bool FlushCurrentSectorNonCompressedCase();
524
    bool IndexPointCustom(const OSMNode *psNode);
525
526
    void IndexWay(GIntBig nWayID, bool bIsArea, unsigned int nTags,
527
                  const IndexedKVP *pasTags, const LonLat *pasLonLatPairs,
528
                  int nPairs, const OSMInfo *psInfo);
529
530
    bool StartTransactionCacheDB();
531
    bool CommitTransactionCacheDB();
532
533
    int FindNode(GIntBig nID);
534
    void ProcessWaysBatch();
535
536
    void ProcessPolygonsStandalone();
537
538
    void LookupNodes();
539
    void LookupNodesSQLite();
540
    void LookupNodesCustom();
541
    void LookupNodesCustomCompressedCase();
542
    void LookupNodesCustomNonCompressedCase();
543
544
    unsigned int
545
    LookupWays(std::map<GIntBig, std::pair<int, void *>> &aoMapWays,
546
               const OSMRelation *psRelation);
547
548
    OGRGeometry *BuildMultiPolygon(const OSMRelation *psRelation,
549
                                   unsigned int *pnTags, OSMTag *pasTags);
550
    OGRGeometry *BuildGeometryCollection(const OSMRelation *psRelation,
551
                                         bool bMultiLineString);
552
553
    bool TransferToDiskIfNecesserary();
554
555
    Bucket *AllocBucket(int iBucket);
556
557
    void AddComputedAttributes(
558
        int iCurLayer, const std::vector<OGROSMComputedAttribute> &oAttributes);
559
    bool IsClosedWayTaggedAsPolygon(unsigned int nTags, const OSMTag *pasTags);
560
561
    OGROSMDataSource(const OGROSMDataSource &) = delete;
562
    OGROSMDataSource &operator=(const OGROSMDataSource &) = delete;
563
564
  public:
565
    OGROSMDataSource();
566
    ~OGROSMDataSource() override;
567
568
    int GetLayerCount() const override
569
1.05k
    {
570
1.05k
        return static_cast<int>(m_apoLayers.size());
571
1.05k
    }
572
573
    const OGRLayer *GetLayer(int) const override;
574
575
    int TestCapability(const char *) const override;
576
577
    OGRLayer *ExecuteSQL(const char *pszSQLCommand,
578
                         OGRGeometry *poSpatialFilter,
579
                         const char *pszDialect) override;
580
    void ReleaseResultSet(OGRLayer *poLayer) override;
581
582
    void ResetReading() override;
583
    virtual OGRFeature *GetNextFeature(OGRLayer **ppoBelongingLayer,
584
                                       double *pdfProgressPct,
585
                                       GDALProgressFunc pfnProgress,
586
                                       void *pProgressData) override;
587
588
    int Open(const char *pszFilename, CSLConstList papszOpenOptions);
589
590
    int MyResetReading();
591
    bool ParseNextChunk(int nIdxLayer, GDALProgressFunc pfnProgress,
592
                        void *pProgressData);
593
    OGRErr GetNativeExtent(OGREnvelope *psExtent);
594
    int IsInterleavedReading();
595
596
    void NotifyNodes(unsigned int nNodes, const OSMNode *pasNodes);
597
    void NotifyWay(const OSMWay *psWay);
598
    void NotifyRelation(const OSMRelation *psRelation);
599
    void NotifyBounds(double dfXMin, double dfYMin, double dfXMax,
600
                      double dfYMax);
601
602
    OGROSMLayer *GetCurrentLayer()
603
2.11k
    {
604
2.11k
        return m_poCurrentLayer;
605
2.11k
    }
606
607
    void SetCurrentLayer(OGROSMLayer *poLyr)
608
2.11k
    {
609
2.11k
        m_poCurrentLayer = poLyr;
610
2.11k
    }
611
612
    bool IsFeatureCountEnabled() const
613
0
    {
614
0
        return m_bIsFeatureCountEnabled;
615
0
    }
616
617
    bool DoesAttributeNameLaundering() const
618
35.3k
    {
619
35.3k
        return m_bAttributeNameLaundering;
620
35.3k
    }
621
};
622
623
#endif /* ndef OGR_OSM_H_INCLUDED */