Coverage Report

Created: 2025-08-11 09:23

/src/gdal/ogr/ogrsf_frmts/mitab/mitab_utils.cpp
Line
Count
Source (jump to first uncovered line)
1
/**********************************************************************
2
 *
3
 * Name:     mitab_utils.cpp
4
 * Project:  MapInfo TAB Read/Write library
5
 * Language: C++
6
 * Purpose:  Misc. util. functions for the library
7
 * Author:   Daniel Morissette, dmorissette@dmsolutions.ca
8
 *
9
 **********************************************************************
10
 * Copyright (c) 1999-2001, Daniel Morissette
11
 *
12
 * SPDX-License-Identifier: MIT
13
 **********************************************************************/
14
15
#include "cpl_port.h"
16
#include "mitab_utils.h"
17
18
#include <cctype>
19
#include <climits>
20
#include <cmath>
21
#include <cstring>
22
#include <limits>
23
24
#include "mitab.h"
25
#include "cpl_conv.h"
26
#include "cpl_error.h"
27
#include "cpl_string.h"
28
#include "cpl_vsi.h"
29
30
/**********************************************************************
31
 *                       TABGenerateArc()
32
 *
33
 * Generate the coordinates for an arc and ADD the coordinates to the
34
 * geometry object.  If the geometry already contains some points then
35
 * these won't be lost.
36
 *
37
 * poLine can be a OGRLineString or one of its derived classes, such as
38
 *        OGRLinearRing
39
 * numPoints is the number of points to generate.
40
 * Angles are specified in radians, valid values are in the range [0..2*PI]
41
 *
42
 * Arcs are always generated counterclockwise, even if StartAngle > EndAngle
43
 *
44
 * Returns 0 on success, -1 on error.
45
 **********************************************************************/
46
int TABGenerateArc(OGRLineString *poLine, int numPoints, double dCenterX,
47
                   double dCenterY, double dXRadius, double dYRadius,
48
                   double dStartAngle, double dEndAngle)
49
93.8k
{
50
    // Adjust angles to go counterclockwise
51
93.8k
    if (dEndAngle < dStartAngle)
52
4.81k
        dEndAngle += 2.0 * M_PI;
53
54
93.8k
    const double dAngleStep = (dEndAngle - dStartAngle) / (numPoints - 1.0);
55
56
93.8k
    double dAngle = 0.0;
57
6.30M
    for (int i = 0; i < numPoints; i++)
58
6.20M
    {
59
6.20M
        dAngle = dStartAngle + i * dAngleStep;
60
6.20M
        const double dX = dCenterX + dXRadius * cos(dAngle);
61
6.20M
        const double dY = dCenterY + dYRadius * sin(dAngle);
62
6.20M
        poLine->addPoint(dX, dY);
63
6.20M
    }
64
65
    // Complete the arc with the last EndAngle, to make sure that
66
    // the arc is correctly closed.
67
93.8k
    const double dX = dCenterX + dXRadius * cos(dAngle);
68
93.8k
    const double dY = dCenterY + dYRadius * sin(dAngle);
69
93.8k
    poLine->addPoint(dX, dY);
70
71
93.8k
    return 0;
72
93.8k
}
73
74
/**********************************************************************
75
 *                       TABCloseRing()
76
 *
77
 * Check if a ring is closed, and add a point to close it if necessary.
78
 *
79
 * Returns 0 on success, -1 on error.
80
 **********************************************************************/
81
int TABCloseRing(OGRLineString *poRing)
82
28.7k
{
83
28.7k
    if (poRing->getNumPoints() > 0 && !poRing->get_IsClosed())
84
8.83k
    {
85
8.83k
        poRing->addPoint(poRing->getX(0), poRing->getY(0));
86
8.83k
    }
87
88
28.7k
    return 0;
89
28.7k
}
90
91
/**********************************************************************
92
 *                     TABAdjustCaseSensitiveFilename()
93
 *
94
 * Scan a filename and its path, adjust uppercase/lowercases if
95
 * necessary.
96
 *
97
 * Returns TRUE if file found, or FALSE if it could not be located with
98
 * a case-insensitive search.
99
 *
100
 * This function works on the original buffer and returns a reference to it.
101
 * It does nothing on Windows systems where filenames are not case sensitive.
102
 **********************************************************************/
103
#ifdef _WIN32
104
static bool TABAdjustCaseSensitiveFilename(char * /* pszFname */)
105
{
106
    // Nothing to do on Windows.
107
    return true;
108
}
109
#else
110
// Unix case.
111
static bool TABAdjustCaseSensitiveFilename(char *pszFname)
112
19.8k
{
113
19.8k
    VSIStatBufL sStatBuf;
114
115
    // First check if the filename is OK as is.
116
19.8k
    if (VSIStatL(pszFname, &sStatBuf) == 0)
117
0
    {
118
0
        return true;
119
0
    }
120
121
    // File either does not exist or has the wrong cases.
122
    // Go backwards until we find a portion of the path that is valid.
123
19.8k
    char *pszTmpPath = CPLStrdup(pszFname);
124
19.8k
    const int nTotalLen = static_cast<int>(strlen(pszTmpPath));
125
19.8k
    int iTmpPtr = nTotalLen;
126
19.8k
    bool bValidPath = false;
127
128
41.1k
    while (iTmpPtr > 0 && !bValidPath)
129
21.2k
    {
130
        // Move back to the previous '/' separator.
131
21.2k
        pszTmpPath[--iTmpPtr] = '\0';
132
925k
        while (iTmpPtr > 0 && pszTmpPath[iTmpPtr - 1] != '/')
133
904k
        {
134
904k
            pszTmpPath[--iTmpPtr] = '\0';
135
904k
        }
136
137
21.2k
        if (iTmpPtr > 0 && VSIStatL(pszTmpPath, &sStatBuf) == 0)
138
19.8k
            bValidPath = true;
139
21.2k
    }
140
141
19.8k
    CPLAssert(iTmpPtr >= 0);
142
143
    // Assume that CWD is valid.  Therefore an empty path is a valid.
144
19.8k
    if (iTmpPtr == 0)
145
0
        bValidPath = true;
146
147
    // Now that we have a valid base, reconstruct the whole path
148
    // by scanning all the sub-directories.
149
    // If we get to a point where a path component does not exist then
150
    // we simply return the rest of the path as is.
151
39.7k
    while (bValidPath && static_cast<int>(strlen(pszTmpPath)) < nTotalLen)
152
19.8k
    {
153
19.8k
        int iLastPartStart = iTmpPtr;
154
19.8k
        char **papszDir = VSIReadDir(pszTmpPath);
155
156
        // Add one component to the current path.
157
19.8k
        pszTmpPath[iTmpPtr] = pszFname[iTmpPtr];
158
19.8k
        iTmpPtr++;
159
913k
        for (; pszFname[iTmpPtr] != '\0' && pszFname[iTmpPtr] != '/'; iTmpPtr++)
160
893k
        {
161
893k
            pszTmpPath[iTmpPtr] = pszFname[iTmpPtr];
162
893k
        }
163
164
19.8k
        while (iLastPartStart < iTmpPtr && pszTmpPath[iLastPartStart] == '/')
165
0
            iLastPartStart++;
166
167
        // And do a case insensitive search in the current dir.
168
347k
        for (int iEntry = 0; papszDir && papszDir[iEntry]; iEntry++)
169
334k
        {
170
334k
            if (EQUAL(pszTmpPath + iLastPartStart, papszDir[iEntry]))
171
6.77k
            {
172
                // Fount it.
173
6.77k
                strcpy(pszTmpPath + iLastPartStart, papszDir[iEntry]);
174
6.77k
                break;
175
6.77k
            }
176
334k
        }
177
178
19.8k
        if (iTmpPtr > 0 && VSIStatL(pszTmpPath, &sStatBuf) != 0)
179
13.1k
            bValidPath = false;
180
181
19.8k
        CSLDestroy(papszDir);
182
19.8k
    }
183
184
    // We reached the last valid path component... just copy the rest
185
    // of the path as is.
186
19.8k
    if (iTmpPtr < nTotalLen - 1)
187
33
    {
188
33
        strncpy(pszTmpPath + iTmpPtr, pszFname + iTmpPtr, nTotalLen - iTmpPtr);
189
33
    }
190
191
    // Update the source buffer and return.
192
19.8k
    strcpy(pszFname, pszTmpPath);
193
19.8k
    CPLFree(pszTmpPath);
194
195
19.8k
    return bValidPath;
196
19.8k
}
197
#endif  // Not win32.
198
199
/**********************************************************************
200
 *                       TABAdjustFilenameExtension()
201
 *
202
 * Because Unix filenames are case sensitive and MapInfo datasets often have
203
 * mixed cases filenames, we use this function to find the right filename
204
 * to use to open a specific file.
205
 *
206
 * This function works directly on the source string, so the filename it
207
 * contains at the end of the call is the one that should be used.
208
 *
209
 * Returns TRUE if one of the extensions worked, and FALSE otherwise.
210
 * If none of the extensions worked then the original extension will NOT be
211
 * restored.
212
 **********************************************************************/
213
GBool TABAdjustFilenameExtension(char *pszFname)
214
67.7k
{
215
67.7k
    VSIStatBufL sStatBuf;
216
217
    // First try using filename as provided
218
67.7k
    if (VSIStatL(pszFname, &sStatBuf) == 0)
219
47.8k
    {
220
47.8k
        return TRUE;
221
47.8k
    }
222
223
    // Try using uppercase extension (we assume that fname contains a '.')
224
19.8k
    for (int i = static_cast<int>(strlen(pszFname)) - 1;
225
79.5k
         i >= 0 && pszFname[i] != '.'; i--)
226
59.6k
    {
227
59.6k
        pszFname[i] = static_cast<char>(
228
59.6k
            CPLToupper(static_cast<unsigned char>(pszFname[i])));
229
59.6k
    }
230
231
19.8k
    if (VSIStatL(pszFname, &sStatBuf) == 0)
232
0
    {
233
0
        return TRUE;
234
0
    }
235
236
    // Try using lowercase extension.
237
19.8k
    for (int i = static_cast<int>(strlen(pszFname)) - 1;
238
79.5k
         i >= 0 && pszFname[i] != '.'; i--)
239
59.6k
    {
240
59.6k
        pszFname[i] = static_cast<char>(
241
59.6k
            CPLTolower(static_cast<unsigned char>(pszFname[i])));
242
59.6k
    }
243
244
19.8k
    if (VSIStatL(pszFname, &sStatBuf) == 0)
245
0
    {
246
0
        return TRUE;
247
0
    }
248
249
    // None of the extensions worked.
250
    // Try adjusting cases in the whole path and filename.
251
19.8k
    return TABAdjustCaseSensitiveFilename(pszFname);
252
19.8k
}
253
254
/**********************************************************************
255
 *                       TABGetBasename()
256
 *
257
 * Extract the basename part of a complete file path.
258
 *
259
 * Returns a newly allocated string without the leading path (dirs) and
260
 * the extension.  The returned string should be freed using CPLFree().
261
 **********************************************************************/
262
char *TABGetBasename(const char *pszFname)
263
47.6k
{
264
    // Skip leading path or use whole name if no path dividers are encountered.
265
47.6k
    const char *pszTmp = pszFname + strlen(pszFname) - 1;
266
1.31M
    while (pszTmp != pszFname && *pszTmp != '/' && *pszTmp != '\\')
267
1.26M
        pszTmp--;
268
269
47.6k
    if (pszTmp != pszFname)
270
47.6k
        pszTmp++;
271
272
    // Now allocate our own copy and remove extension.
273
47.6k
    char *pszBasename = CPLStrdup(pszTmp);
274
190k
    for (int i = static_cast<int>(strlen(pszBasename)) - 1; i >= 0; i--)
275
190k
    {
276
190k
        if (pszBasename[i] == '.')
277
47.6k
        {
278
47.6k
            pszBasename[i] = '\0';
279
47.6k
            break;
280
47.6k
        }
281
190k
    }
282
283
47.6k
    return pszBasename;
284
47.6k
}
285
286
/**********************************************************************
287
 *                       TAB_CSLLoad()
288
 *
289
 * Same as CSLLoad(), but does not produce an error if it fails... it
290
 * just returns NULL silently instead.
291
 *
292
 * Load a test file into a stringlist.
293
 *
294
 * Lines are limited in length by the size of the CPLReadLine() buffer.
295
 **********************************************************************/
296
char **TAB_CSLLoad(const char *pszFname)
297
2.26k
{
298
2.26k
    CPLStringList oList;
299
300
2.26k
    VSILFILE *fp = VSIFOpenL(pszFname, "rt");
301
302
2.26k
    if (fp)
303
2.22k
    {
304
5.09M
        while (const char *pszLine = CPLReadLineL(fp))
305
5.09M
        {
306
5.09M
            oList.AddString(pszLine);
307
5.09M
        }
308
309
2.22k
        VSIFCloseL(fp);
310
2.22k
    }
311
312
2.26k
    return oList.StealList();
313
2.26k
}
314
315
/**********************************************************************
316
 *                       TABUnEscapeString()
317
 *
318
 * Convert a string that can possibly contain escaped "\n" chars in
319
 * into into a new one with binary newlines in it.
320
 *
321
 * Tries to work on the original buffer unless bSrcIsConst=TRUE, in
322
 * which case the original is always untouched and a copy is allocated
323
 * ONLY IF NECESSARY.  This means that the caller should compare the
324
 * return value and the source (pszString) to see if a copy was returned,
325
 * in which case the caller becomes responsible of freeing both the
326
 * source and the copy.
327
 **********************************************************************/
328
char *TABUnEscapeString(char *pszString, GBool bSrcIsConst)
329
79.2k
{
330
    // First check if we need to do any replacement.
331
79.2k
    if (pszString == nullptr || strstr(pszString, "\\n") == nullptr)
332
72.1k
    {
333
72.1k
        return pszString;
334
72.1k
    }
335
336
    // Yes, we need to replace at least one "\n".
337
    // We try to work on the original buffer unless we have bSrcIsConst=TRUE.
338
    //
339
    // Note that we do not worry about freeing the source buffer when we
340
    // return a copy.  It is up to the caller to decide if the source needs
341
    // to be freed based on context and by comparing pszString with
342
    // the returned pointer (pszWorkString) to see if they are identical.
343
7.14k
    char *pszWorkString = nullptr;
344
7.14k
    if (bSrcIsConst)
345
7.14k
    {
346
        // We have to create a copy to work on.
347
7.14k
        pszWorkString = static_cast<char *>(
348
7.14k
            CPLMalloc(sizeof(char) * (strlen(pszString) + 1)));
349
7.14k
    }
350
0
    else
351
0
    {
352
        // Work on the original.
353
0
        pszWorkString = pszString;
354
0
    }
355
356
7.14k
    int i = 0;
357
7.14k
    int j = 0;
358
1.71M
    while (pszString[i])
359
1.71M
    {
360
1.71M
        if (pszString[i] == '\\' && pszString[i + 1] == 'n')
361
8.41k
        {
362
8.41k
            pszWorkString[j++] = '\n';
363
8.41k
            i += 2;
364
8.41k
        }
365
1.70M
        else if (pszString[i] == '\\' && pszString[i + 1] == '\\')
366
33.6k
        {
367
33.6k
            pszWorkString[j++] = '\\';
368
33.6k
            i += 2;
369
33.6k
        }
370
1.66M
        else
371
1.66M
        {
372
1.66M
            pszWorkString[j++] = pszString[i++];
373
1.66M
        }
374
1.71M
    }
375
7.14k
    pszWorkString[j++] = '\0';
376
377
7.14k
    return pszWorkString;
378
79.2k
}
379
380
/**********************************************************************
381
 *                       TABEscapeString()
382
 *
383
 * Convert a string that can possibly contain binary "\n" chars in
384
 * into into a new one with escaped newlines ("\\" + "n") in it.
385
 *
386
 * The function returns the original string pointer if it did not need to
387
 * be modified, or a copy that has to be freed by the caller if the
388
 * string had to be modified.
389
 *
390
 * It is up to the caller to decide if the returned string needs to be
391
 * freed by comparing the source (pszString) pointer with the returned
392
 * pointer (pszWorkString) to see if they are identical.
393
 **********************************************************************/
394
char *TABEscapeString(char *pszString)
395
0
{
396
    // First check if we need to do any replacement
397
0
    if (pszString == nullptr || strchr(pszString, '\n') == nullptr)
398
0
    {
399
0
        return pszString;
400
0
    }
401
402
    // Need to do some replacements.  Alloc a copy big enough
403
    // to hold the worst possible case.
404
0
    char *pszWorkString = static_cast<char *>(
405
0
        CPLMalloc(2 * sizeof(char) * (strlen(pszString) + 1)));
406
407
0
    int i = 0;
408
0
    int j = 0;
409
410
0
    while (pszString[i])
411
0
    {
412
0
        if (pszString[i] == '\n')
413
0
        {
414
0
            pszWorkString[j++] = '\\';
415
0
            pszWorkString[j++] = 'n';
416
0
            i++;
417
0
        }
418
0
        else if (pszString[i] == '\\')
419
0
        {
420
0
            pszWorkString[j++] = '\\';
421
0
            pszWorkString[j++] = '\\';
422
0
            i++;
423
0
        }
424
0
        else
425
0
        {
426
0
            pszWorkString[j++] = pszString[i++];
427
0
        }
428
0
    }
429
0
    pszWorkString[j++] = '\0';
430
431
0
    return pszWorkString;
432
0
}
433
434
/**********************************************************************
435
 *                       TABCleanFieldName()
436
 *
437
 * Return a copy of pszSrcName that contains only valid characters for a
438
 * TAB field name.  All invalid characters are replaced by '_'.
439
 *
440
 * The returned string should be freed by the caller.
441
 **********************************************************************/
442
char *TABCleanFieldName(const char *pszSrcName, const char *pszEncoding,
443
                        bool bStrictLaundering)
444
59
{
445
59
    char *pszNewName = CPLStrdup(pszSrcName);
446
59
    int numInvalidChars = 0;
447
448
59
    if (bStrictLaundering)
449
59
    {
450
59
        if (strlen(pszNewName) > 31)
451
0
        {
452
0
            pszNewName[31] = '\0';
453
0
            CPLError(CE_Warning,
454
0
                     static_cast<CPLErrorNum>(TAB_WarningInvalidFieldName),
455
0
                     "Field name '%s' is longer than the max of 31 characters. "
456
0
                     "'%s' will be used instead.",
457
0
                     pszSrcName, pszNewName);
458
0
        }
459
460
        // According to the MapInfo User's Guide (p. 240, v5.5).
461
        // New Table Command:
462
        //  Name:
463
        // Displays the field name in the name box. You can also enter new field
464
        // names here. Defaults are Field1, Field2, etc. A field name can contain
465
        // up to 31 alphanumeric characters. Use letters, numbers, and the
466
        // underscore. Do not use spaces; instead, use the underscore character
467
        // (_) to separate words in a field name. Use upper and lower case for
468
        // legibility, but MapInfo is not case-sensitive.
469
        //
470
        // It was also verified that extended chars with accents are also
471
        // accepted.
472
59
        bool bNeutralCharset =
473
59
            (pszEncoding == nullptr || strlen(pszEncoding) == 0);
474
812
        for (int i = 0; pszSrcName && pszSrcName[i] != '\0'; i++)
475
753
        {
476
753
            if (pszSrcName[i] == '#')
477
0
            {
478
0
                if (i == 0)
479
0
                {
480
0
                    pszNewName[i] = '_';
481
0
                    numInvalidChars++;
482
0
                }
483
0
            }
484
753
            else if (!(pszSrcName[i] == '_' ||
485
753
                       (i != 0 && pszSrcName[i] >= '0' &&
486
716
                        pszSrcName[i] <= '9') ||
487
753
                       (!bNeutralCharset ||
488
692
                        ((pszSrcName[i] >= 'a' && pszSrcName[i] <= 'z') ||
489
692
                         (pszSrcName[i] >= 'A' && pszSrcName[i] <= 'Z') ||
490
692
                         static_cast<GByte>(pszSrcName[i]) >= 192))))
491
174
            {
492
174
                pszNewName[i] = '_';
493
174
                numInvalidChars++;
494
174
            }
495
753
        }
496
59
    }
497
0
    else
498
0
    {
499
        // There is a note at mapinfo-pro-v2021-user-guide.pdf
500
        // (p. 1425, Columns section: "Field names cannot have spaces".
501
        // There seem to be no other constraints.
502
0
        for (int i = 0; pszSrcName && pszSrcName[i] != '\0'; i++)
503
0
        {
504
0
            if (pszSrcName[i] == ' ')
505
0
            {
506
0
                pszNewName[i] = '_';
507
0
                numInvalidChars++;
508
0
            }
509
0
        }
510
0
    }
511
59
    if (numInvalidChars > 0)
512
25
    {
513
25
        CPLError(CE_Warning,
514
25
                 static_cast<CPLErrorNum>(TAB_WarningInvalidFieldName),
515
25
                 "Field name '%s' contains invalid characters. "
516
25
                 "'%s' will be used instead.",
517
25
                 pszSrcName, pszNewName);
518
25
    }
519
520
59
    return pszNewName;
521
59
}
522
523
/**********************************************************************
524
 *                       TABSaturatedAdd()
525
 ***********************************************************************/
526
527
void TABSaturatedAdd(GInt32 &nVal, GInt32 nAdd)
528
3.07k
{
529
3.07k
    const GInt32 int_max = std::numeric_limits<GInt32>::max();
530
3.07k
    const GInt32 int_min = std::numeric_limits<GInt32>::min();
531
532
3.07k
    if (nAdd >= 0 && nVal > int_max - nAdd)
533
3
        nVal = int_max;
534
3.07k
    else if (nAdd == int_min && nVal < 0)
535
22
        nVal = int_min;
536
3.05k
    else if (nAdd != int_min && nAdd < 0 && nVal < int_min - nAdd)
537
27
        nVal = int_min;
538
3.02k
    else
539
3.02k
        nVal += nAdd;
540
3.07k
}
541
542
/**********************************************************************
543
 *                           TABInt16Diff()
544
 **********************************************************************/
545
546
GInt16 TABInt16Diff(int a, int b)
547
8.09k
{
548
8.09k
    GIntBig nDiff = static_cast<GIntBig>(a) - b;
549
    // Maybe we should error out instead of saturating ???
550
8.09k
    if (nDiff < -32768)
551
1
        return -32768;
552
8.09k
    if (nDiff > 32767)
553
0
        return 32767;
554
8.09k
    return static_cast<GInt16>(nDiff);
555
8.09k
}