Coverage Report

Created: 2025-08-28 06:57

/src/gdal/port/cpl_csv.cpp
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 *
3
 * Project:  CPL - Common Portability Library
4
 * Purpose:  CSV (comma separated value) file access.
5
 * Author:   Frank Warmerdam, warmerdam@pobox.com
6
 *
7
 ******************************************************************************
8
 * Copyright (c) 1999, Frank Warmerdam
9
 * Copyright (c) 2009-2012, Even Rouault <even dot rouault at spatialys.com>
10
 *
11
 * SPDX-License-Identifier: MIT
12
 ****************************************************************************/
13
14
#include "cpl_port.h"
15
#include "cpl_csv.h"
16
17
#include <cstddef>
18
#include <cstdlib>
19
#include <cstring>
20
#include <fcntl.h>
21
22
#include "cpl_conv.h"
23
#include "cpl_error.h"
24
#include "cpl_multiproc.h"
25
#include "gdal_csv.h"
26
27
#include <algorithm>
28
29
/* ==================================================================== */
30
/*      The CSVTable is a persistent set of info about an open CSV      */
31
/*      table.  While it doesn't currently maintain a record index,     */
32
/*      or in-memory copy of the table, it could be changed to do so    */
33
/*      in the future.                                                  */
34
/* ==================================================================== */
35
typedef struct ctb
36
{
37
    VSILFILE *fp;
38
    struct ctb *psNext;
39
    char *pszFilename;
40
    char **papszFieldNames;
41
    int *panFieldNamesLength;
42
    char **papszRecFields;
43
    int nFields;
44
    int iLastLine;
45
    bool bNonUniqueKey;
46
47
    /* Cache for whole file */
48
    int nLineCount;
49
    char **papszLines;
50
    int *panLineIndex;
51
    char *pszRawData;
52
} CSVTable;
53
54
static void CSVDeaccessInternal(CSVTable **ppsCSVTableList, bool bCanUseTLS,
55
                                const char *pszFilename);
56
57
/************************************************************************/
58
/*                            CSVFreeTLS()                              */
59
/************************************************************************/
60
static void CSVFreeTLS(void *pData)
61
0
{
62
0
    CSVDeaccessInternal(static_cast<CSVTable **>(pData), false, nullptr);
63
0
    CPLFree(pData);
64
0
}
65
66
/* It would likely be better to share this list between threads, but
67
   that will require some rework. */
68
69
/************************************************************************/
70
/*                             CSVAccess()                              */
71
/*                                                                      */
72
/*      This function will fetch a handle to the requested table.       */
73
/*      If not found in the ``open table list'' the table will be       */
74
/*      opened and added to the list.  Eventually this function may     */
75
/*      become public with an abstracted return type so that            */
76
/*      applications can set options about the table.  For now this     */
77
/*      isn't done.                                                     */
78
/************************************************************************/
79
80
static CSVTable *CSVAccess(const char *pszFilename)
81
82
0
{
83
    /* -------------------------------------------------------------------- */
84
    /*      Fetch the table, and allocate the thread-local pointer to it    */
85
    /*      if there isn't already one.                                     */
86
    /* -------------------------------------------------------------------- */
87
0
    int bMemoryError = FALSE;
88
0
    CSVTable **ppsCSVTableList =
89
0
        static_cast<CSVTable **>(CPLGetTLSEx(CTLS_CSVTABLEPTR, &bMemoryError));
90
0
    if (bMemoryError)
91
0
        return nullptr;
92
0
    if (ppsCSVTableList == nullptr)
93
0
    {
94
0
        ppsCSVTableList =
95
0
            static_cast<CSVTable **>(VSI_CALLOC_VERBOSE(1, sizeof(CSVTable *)));
96
0
        if (ppsCSVTableList == nullptr)
97
0
            return nullptr;
98
0
        CPLSetTLSWithFreeFunc(CTLS_CSVTABLEPTR, ppsCSVTableList, CSVFreeTLS);
99
0
    }
100
101
    /* -------------------------------------------------------------------- */
102
    /*      Is the table already in the list.                               */
103
    /* -------------------------------------------------------------------- */
104
0
    for (CSVTable *psTable = *ppsCSVTableList; psTable != nullptr;
105
0
         psTable = psTable->psNext)
106
0
    {
107
0
        if (EQUAL(psTable->pszFilename, pszFilename))
108
0
        {
109
            /*
110
             * Eventually we should consider promoting to the front of
111
             * the list to accelerate frequently accessed tables.
112
             */
113
0
            return psTable;
114
0
        }
115
0
    }
116
117
    /* -------------------------------------------------------------------- */
118
    /*      If not, try to open it.                                         */
119
    /* -------------------------------------------------------------------- */
120
0
    VSILFILE *fp = VSIFOpenL(pszFilename, "rb");
121
0
    if (fp == nullptr)
122
0
        return nullptr;
123
124
    /* -------------------------------------------------------------------- */
125
    /*      Create an information structure about this table, and add to    */
126
    /*      the front of the list.                                          */
127
    /* -------------------------------------------------------------------- */
128
0
    CSVTable *const psTable =
129
0
        static_cast<CSVTable *>(VSI_CALLOC_VERBOSE(sizeof(CSVTable), 1));
130
0
    if (psTable == nullptr)
131
0
    {
132
0
        VSIFCloseL(fp);
133
0
        return nullptr;
134
0
    }
135
136
0
    psTable->fp = fp;
137
0
    psTable->pszFilename = VSI_STRDUP_VERBOSE(pszFilename);
138
0
    if (psTable->pszFilename == nullptr)
139
0
    {
140
0
        VSIFree(psTable);
141
0
        VSIFCloseL(fp);
142
0
        return nullptr;
143
0
    }
144
0
    psTable->bNonUniqueKey = false;  // As far as we know now.
145
0
    psTable->psNext = *ppsCSVTableList;
146
147
0
    *ppsCSVTableList = psTable;
148
149
    /* -------------------------------------------------------------------- */
150
    /*      Read the table header record containing the field names.        */
151
    /* -------------------------------------------------------------------- */
152
0
    psTable->papszFieldNames = CSVReadParseLineL(fp);
153
0
    psTable->nFields = CSLCount(psTable->papszFieldNames);
154
0
    psTable->panFieldNamesLength =
155
0
        static_cast<int *>(CPLMalloc(sizeof(int) * psTable->nFields));
156
0
    for (int i = 0;
157
0
         i < psTable->nFields &&
158
         /* null-pointer check to avoid a false positive from CLang S.A. */
159
0
         psTable->papszFieldNames != nullptr;
160
0
         i++)
161
0
    {
162
0
        psTable->panFieldNamesLength[i] =
163
0
            static_cast<int>(strlen(psTable->papszFieldNames[i]));
164
0
    }
165
166
0
    return psTable;
167
0
}
168
169
/************************************************************************/
170
/*                            CSVDeaccess()                             */
171
/************************************************************************/
172
173
static void CSVDeaccessInternal(CSVTable **ppsCSVTableList, bool bCanUseTLS,
174
                                const char *pszFilename)
175
176
0
{
177
0
    if (ppsCSVTableList == nullptr)
178
0
        return;
179
180
    /* -------------------------------------------------------------------- */
181
    /*      A NULL means deaccess all tables.                               */
182
    /* -------------------------------------------------------------------- */
183
0
    if (pszFilename == nullptr)
184
0
    {
185
0
        while (*ppsCSVTableList != nullptr)
186
0
            CSVDeaccessInternal(ppsCSVTableList, bCanUseTLS,
187
0
                                (*ppsCSVTableList)->pszFilename);
188
189
0
        return;
190
0
    }
191
192
    /* -------------------------------------------------------------------- */
193
    /*      Find this table.                                                */
194
    /* -------------------------------------------------------------------- */
195
0
    CSVTable *psLast = nullptr;
196
0
    CSVTable *psTable = *ppsCSVTableList;
197
0
    for (; psTable != nullptr && !EQUAL(psTable->pszFilename, pszFilename);
198
0
         psTable = psTable->psNext)
199
0
    {
200
0
        psLast = psTable;
201
0
    }
202
203
0
    if (psTable == nullptr)
204
0
    {
205
0
        if (bCanUseTLS)
206
0
            CPLDebug("CPL_CSV", "CPLDeaccess( %s ) - no match.", pszFilename);
207
0
        return;
208
0
    }
209
210
    /* -------------------------------------------------------------------- */
211
    /*      Remove the link from the list.                                  */
212
    /* -------------------------------------------------------------------- */
213
0
    if (psLast != nullptr)
214
0
        psLast->psNext = psTable->psNext;
215
0
    else
216
0
        *ppsCSVTableList = psTable->psNext;
217
218
    /* -------------------------------------------------------------------- */
219
    /*      Free the table.                                                 */
220
    /* -------------------------------------------------------------------- */
221
0
    if (psTable->fp != nullptr)
222
0
        VSIFCloseL(psTable->fp);
223
224
0
    CSLDestroy(psTable->papszFieldNames);
225
0
    CPLFree(psTable->panFieldNamesLength);
226
0
    CSLDestroy(psTable->papszRecFields);
227
0
    CPLFree(psTable->pszFilename);
228
0
    CPLFree(psTable->panLineIndex);
229
0
    CPLFree(psTable->pszRawData);
230
0
    CPLFree(psTable->papszLines);
231
232
0
    CPLFree(psTable);
233
234
0
    if (bCanUseTLS)
235
0
        CPLReadLine(nullptr);
236
0
}
237
238
void CSVDeaccess(const char *pszFilename)
239
0
{
240
    /* -------------------------------------------------------------------- */
241
    /*      Fetch the table, and allocate the thread-local pointer to it    */
242
    /*      if there isn't already one.                                     */
243
    /* -------------------------------------------------------------------- */
244
0
    int bMemoryError = FALSE;
245
0
    CSVTable **ppsCSVTableList =
246
0
        static_cast<CSVTable **>(CPLGetTLSEx(CTLS_CSVTABLEPTR, &bMemoryError));
247
248
0
    CSVDeaccessInternal(ppsCSVTableList, true, pszFilename);
249
0
}
250
251
/************************************************************************/
252
/*                            CSVSplitLine()                            */
253
/*                                                                      */
254
/*      Tokenize a CSV line into fields in the form of a string         */
255
/*      list.  This is used instead of the CPLTokenizeString()          */
256
/*      because it provides correct CSV escaping and quoting            */
257
/*      semantics.                                                      */
258
/************************************************************************/
259
260
static char **CSVSplitLine(const char *pszString, const char *pszDelimiter,
261
                           bool bKeepLeadingAndClosingQuotes,
262
                           bool bMergeDelimiter)
263
264
0
{
265
0
    CPLStringList aosRetList;
266
0
    if (pszString == nullptr)
267
0
        return static_cast<char **>(CPLCalloc(sizeof(char *), 1));
268
269
0
    char *pszToken = static_cast<char *>(CPLCalloc(10, 1));
270
0
    int nTokenMax = 10;
271
0
    const size_t nDelimiterLength = strlen(pszDelimiter);
272
273
0
    const char *pszIter = pszString;
274
0
    while (*pszIter != '\0')
275
0
    {
276
0
        bool bInString = false;
277
278
0
        int nTokenLen = 0;
279
280
        // Try to find the next delimiter, marking end of token.
281
0
        do
282
0
        {
283
            // End if this is a delimiter skip it and break.
284
0
            if (!bInString &&
285
0
                strncmp(pszIter, pszDelimiter, nDelimiterLength) == 0)
286
0
            {
287
0
                pszIter += nDelimiterLength;
288
0
                if (bMergeDelimiter)
289
0
                {
290
0
                    while (strncmp(pszIter, pszDelimiter, nDelimiterLength) ==
291
0
                           0)
292
0
                        pszIter += nDelimiterLength;
293
0
                }
294
0
                break;
295
0
            }
296
297
0
            if (*pszIter == '"')
298
0
            {
299
0
                if (!bInString && nTokenLen > 0)
300
0
                {
301
                    // do not treat in a special way double quotes that appear
302
                    // in the middle of a field (similarly to OpenOffice)
303
                    // Like in records: 1,50°46'06.6"N 116°42'04.4,foo
304
0
                }
305
0
                else if (!bInString || pszIter[1] != '"')
306
0
                {
307
0
                    bInString = !bInString;
308
0
                    if (!bKeepLeadingAndClosingQuotes)
309
0
                        continue;
310
0
                }
311
0
                else  // Doubled quotes in string resolve to one quote.
312
0
                {
313
0
                    pszIter++;
314
0
                }
315
0
            }
316
317
0
            if (nTokenLen >= nTokenMax - 2)
318
0
            {
319
0
                nTokenMax = nTokenMax * 2 + 10;
320
0
                pszToken = static_cast<char *>(CPLRealloc(pszToken, nTokenMax));
321
0
            }
322
323
0
            pszToken[nTokenLen] = *pszIter;
324
0
            nTokenLen++;
325
0
        } while (*(++pszIter) != '\0');
326
327
0
        pszToken[nTokenLen] = '\0';
328
0
        aosRetList.AddString(pszToken);
329
330
        // If the last token is an empty token, then we have to catch
331
        // it now, otherwise we won't reenter the loop and it will be lost.
332
0
        if (*pszIter == '\0' &&
333
0
            pszIter - pszString >= static_cast<int>(nDelimiterLength) &&
334
0
            strncmp(pszIter - nDelimiterLength, pszDelimiter,
335
0
                    nDelimiterLength) == 0)
336
0
        {
337
0
            aosRetList.AddString("");
338
0
        }
339
0
    }
340
341
0
    CPLFree(pszToken);
342
343
0
    if (aosRetList.Count() == 0)
344
0
        return static_cast<char **>(CPLCalloc(sizeof(char *), 1));
345
0
    else
346
0
        return aosRetList.StealList();
347
0
}
348
349
/************************************************************************/
350
/*                          CSVFindNextLine()                           */
351
/*                                                                      */
352
/*      Find the start of the next line, while at the same time zero    */
353
/*      terminating this line.  Take into account that there may be     */
354
/*      newline indicators within quoted strings, and that quotes       */
355
/*      can be escaped with a backslash.                                */
356
/************************************************************************/
357
358
static char *CSVFindNextLine(char *pszThisLine)
359
360
0
{
361
0
    int i = 0;  // i is used after the for loop.
362
363
0
    for (int nQuoteCount = 0; pszThisLine[i] != '\0'; i++)
364
0
    {
365
0
        if (pszThisLine[i] == '\"' && (i == 0 || pszThisLine[i - 1] != '\\'))
366
0
            nQuoteCount++;
367
368
0
        if ((pszThisLine[i] == 10 || pszThisLine[i] == 13) &&
369
0
            (nQuoteCount % 2) == 0)
370
0
            break;
371
0
    }
372
373
0
    while (pszThisLine[i] == 10 || pszThisLine[i] == 13)
374
0
        pszThisLine[i++] = '\0';
375
376
0
    if (pszThisLine[i] == '\0')
377
0
        return nullptr;
378
379
0
    return pszThisLine + i;
380
0
}
381
382
/************************************************************************/
383
/*                             CSVIngest()                              */
384
/*                                                                      */
385
/*      Load entire file into memory and setup index if possible.       */
386
/************************************************************************/
387
388
// TODO(schwehr): Clean up all the casting in CSVIngest.
389
static void CSVIngest(CSVTable *psTable)
390
391
0
{
392
0
    if (psTable->pszRawData != nullptr)
393
0
        return;
394
395
    /* -------------------------------------------------------------------- */
396
    /*      Ingest whole file.                                              */
397
    /* -------------------------------------------------------------------- */
398
0
    if (VSIFSeekL(psTable->fp, 0, SEEK_END) != 0)
399
0
    {
400
0
        CPLError(CE_Failure, CPLE_FileIO,
401
0
                 "Failed using seek end and tell to get file length: %s",
402
0
                 psTable->pszFilename);
403
0
        return;
404
0
    }
405
0
    const vsi_l_offset nFileLen = VSIFTellL(psTable->fp);
406
0
    if (static_cast<long>(nFileLen) == -1)
407
0
    {
408
0
        CPLError(CE_Failure, CPLE_FileIO,
409
0
                 "Failed using seek end and tell to get file length: %s",
410
0
                 psTable->pszFilename);
411
0
        return;
412
0
    }
413
0
    VSIRewindL(psTable->fp);
414
415
0
    psTable->pszRawData = static_cast<char *>(
416
0
        VSI_MALLOC_VERBOSE(static_cast<size_t>(nFileLen) + 1));
417
0
    if (psTable->pszRawData == nullptr)
418
0
        return;
419
0
    if (VSIFReadL(psTable->pszRawData, 1, static_cast<size_t>(nFileLen),
420
0
                  psTable->fp) != static_cast<size_t>(nFileLen))
421
0
    {
422
0
        CPLFree(psTable->pszRawData);
423
0
        psTable->pszRawData = nullptr;
424
425
0
        CPLError(CE_Failure, CPLE_FileIO, "Read of file %s failed.",
426
0
                 psTable->pszFilename);
427
0
        return;
428
0
    }
429
430
0
    psTable->pszRawData[nFileLen] = '\0';
431
432
    /* -------------------------------------------------------------------- */
433
    /*      Get count of newlines so we can allocate line array.            */
434
    /* -------------------------------------------------------------------- */
435
0
    int nMaxLineCount = 0;
436
0
    for (int i = 0; i < static_cast<int>(nFileLen); i++)
437
0
    {
438
0
        if (psTable->pszRawData[i] == 10)
439
0
            nMaxLineCount++;
440
0
    }
441
442
0
    psTable->papszLines =
443
0
        static_cast<char **>(VSI_CALLOC_VERBOSE(sizeof(char *), nMaxLineCount));
444
0
    if (psTable->papszLines == nullptr)
445
0
        return;
446
447
    /* -------------------------------------------------------------------- */
448
    /*      Build a list of record pointers into the raw data buffer        */
449
    /*      based on line terminators.  Zero terminate the line             */
450
    /*      strings.                                                        */
451
    /* -------------------------------------------------------------------- */
452
    /* skip header line */
453
0
    char *pszThisLine = CSVFindNextLine(psTable->pszRawData);
454
455
0
    int iLine = 0;
456
0
    while (pszThisLine != nullptr && iLine < nMaxLineCount)
457
0
    {
458
0
        if (pszThisLine[0] != '#')
459
0
            psTable->papszLines[iLine++] = pszThisLine;
460
0
        pszThisLine = CSVFindNextLine(pszThisLine);
461
0
    }
462
463
0
    psTable->nLineCount = iLine;
464
465
    /* -------------------------------------------------------------------- */
466
    /*      Allocate and populate index array.  Ensure they are in          */
467
    /*      ascending order so that binary searches can be done on the      */
468
    /*      array.                                                          */
469
    /* -------------------------------------------------------------------- */
470
0
    psTable->panLineIndex = static_cast<int *>(
471
0
        VSI_MALLOC_VERBOSE(sizeof(int) * psTable->nLineCount));
472
0
    if (psTable->panLineIndex == nullptr)
473
0
        return;
474
475
0
    for (int i = 0; i < psTable->nLineCount; i++)
476
0
    {
477
0
        psTable->panLineIndex[i] = atoi(psTable->papszLines[i]);
478
479
0
        if (i > 0 && psTable->panLineIndex[i] < psTable->panLineIndex[i - 1])
480
0
        {
481
0
            CPLFree(psTable->panLineIndex);
482
0
            psTable->panLineIndex = nullptr;
483
0
            break;
484
0
        }
485
0
    }
486
487
0
    psTable->iLastLine = -1;
488
489
    /* -------------------------------------------------------------------- */
490
    /*      We should never need the file handle against, so close it.      */
491
    /* -------------------------------------------------------------------- */
492
0
    VSIFCloseL(psTable->fp);
493
0
    psTable->fp = nullptr;
494
0
}
495
496
static void CSVIngest(const char *pszFilename)
497
498
0
{
499
0
    CSVTable *psTable = CSVAccess(pszFilename);
500
0
    if (psTable == nullptr)
501
0
    {
502
0
        CPLError(CE_Failure, CPLE_FileIO, "Failed to open file: %s",
503
0
                 pszFilename);
504
0
        return;
505
0
    }
506
0
    CSVIngest(psTable);
507
0
}
508
509
/************************************************************************/
510
/*                        CSVDetectSeperator()                          */
511
/************************************************************************/
512
513
/** Detect which field separator is used.
514
 *
515
 * Currently, it can detect comma, semicolon, space, tabulation or pipe.
516
 * In case of ambiguity, starting with GDAL 3.7.1, the separator with the
517
 * most occurrences will be selected (and a warning emitted).
518
 * If no separator found, comma will be considered as the separator.
519
 *
520
 * @return ',', ';', ' ', tabulation character or '|'.
521
 */
522
char CSVDetectSeperator(const char *pszLine)
523
0
{
524
0
    bool bInString = false;
525
0
    int nCountComma = 0;
526
0
    int nCountSemicolon = 0;
527
0
    int nCountTab = 0;
528
0
    int nCountPipe = 0;
529
0
    int nCountSpace = 0;
530
531
0
    for (; *pszLine != '\0'; pszLine++)
532
0
    {
533
0
        if (!bInString && *pszLine == ',')
534
0
        {
535
0
            nCountComma++;
536
0
        }
537
0
        else if (!bInString && *pszLine == ';')
538
0
        {
539
0
            nCountSemicolon++;
540
0
        }
541
0
        else if (!bInString && *pszLine == '\t')
542
0
        {
543
0
            nCountTab++;
544
0
        }
545
0
        else if (!bInString && *pszLine == '|')
546
0
        {
547
0
            nCountPipe++;
548
0
        }
549
0
        else if (!bInString && *pszLine == ' ')
550
0
        {
551
0
            nCountSpace++;
552
0
        }
553
0
        else if (*pszLine == '"')
554
0
        {
555
0
            if (!bInString || pszLine[1] != '"')
556
0
            {
557
0
                bInString = !bInString;
558
0
                continue;
559
0
            }
560
0
            else /* doubled quotes in string resolve to one quote */
561
0
            {
562
0
                pszLine++;
563
0
            }
564
0
        }
565
0
    }
566
567
0
    const int nMaxCountExceptSpace =
568
0
        std::max(std::max(nCountComma, nCountSemicolon),
569
0
                 std::max(nCountTab, nCountPipe));
570
0
    char chDelimiter = ',';
571
0
    if (nMaxCountExceptSpace == 0)
572
0
    {
573
0
        if (nCountSpace > 0)
574
0
            chDelimiter = ' ';
575
0
    }
576
0
    else
577
0
    {
578
0
        bool bWarn = false;
579
0
        if (nCountComma == nMaxCountExceptSpace)
580
0
        {
581
0
            chDelimiter = ',';
582
0
            bWarn = (nCountSemicolon > 0 || nCountTab > 0 || nCountPipe > 0);
583
0
        }
584
0
        else if (nCountSemicolon == nMaxCountExceptSpace)
585
0
        {
586
0
            chDelimiter = ';';
587
0
            bWarn = (nCountComma > 0 || nCountTab > 0 || nCountPipe > 0);
588
0
        }
589
0
        else if (nCountTab == nMaxCountExceptSpace)
590
0
        {
591
0
            chDelimiter = '\t';
592
0
            bWarn = (nCountComma > 0 || nCountSemicolon > 0 || nCountPipe > 0);
593
0
        }
594
0
        else /* if( nCountPipe == nMaxCountExceptSpace ) */
595
0
        {
596
0
            chDelimiter = '|';
597
0
            bWarn = (nCountComma > 0 || nCountSemicolon > 0 || nCountTab > 0);
598
0
        }
599
0
        if (bWarn)
600
0
        {
601
0
            CPLError(CE_Warning, CPLE_AppDefined,
602
0
                     "Selecting '%c' as CSV field separator, but "
603
0
                     "other candidate separator(s) have been found.",
604
0
                     chDelimiter);
605
0
        }
606
0
    }
607
608
0
    return chDelimiter;
609
0
}
610
611
/************************************************************************/
612
/*                      CSVReadParseLine3L()                            */
613
/*                                                                      */
614
/*      Read one line, and return split into fields.  The return        */
615
/*      result is a stringlist, in the sense of the CSL functions.      */
616
/************************************************************************/
617
618
static char **
619
CSVReadParseLineGeneric(void *fp, const char *(*pfnReadLine)(void *, size_t),
620
                        size_t nMaxLineSize, const char *pszDelimiter,
621
                        bool bHonourStrings, bool bKeepLeadingAndClosingQuotes,
622
                        bool bMergeDelimiter, bool bSkipBOM)
623
0
{
624
0
    const char *pszLine = pfnReadLine(fp, nMaxLineSize);
625
0
    if (pszLine == nullptr)
626
0
        return nullptr;
627
628
0
    if (bSkipBOM)
629
0
    {
630
        // Skip BOM.
631
0
        const GByte *pabyData = reinterpret_cast<const GByte *>(pszLine);
632
0
        if (pabyData[0] == 0xEF && pabyData[1] == 0xBB && pabyData[2] == 0xBF)
633
0
            pszLine += 3;
634
0
    }
635
636
    // Special fix to read NdfcFacilities.xls with un-balanced double quotes.
637
0
    if (!bHonourStrings)
638
0
    {
639
0
        return CSLTokenizeStringComplex(pszLine, pszDelimiter, FALSE, TRUE);
640
0
    }
641
642
    // If there are no quotes, then this is the simple case.
643
    // Parse, and return tokens.
644
0
    if (strchr(pszLine, '\"') == nullptr)
645
0
        return CSVSplitLine(pszLine, pszDelimiter, bKeepLeadingAndClosingQuotes,
646
0
                            bMergeDelimiter);
647
648
0
    const size_t nDelimiterLength = strlen(pszDelimiter);
649
0
    bool bInString = false;           // keep in that scope !
650
0
    std::string osWorkLine(pszLine);  // keep in that scope !
651
0
    size_t i = 0;                     // keep in that scope !
652
653
0
    try
654
0
    {
655
0
        while (true)
656
0
        {
657
0
            for (; i < osWorkLine.size(); ++i)
658
0
            {
659
0
                if (osWorkLine[i] == '\"')
660
0
                {
661
0
                    if (!bInString)
662
0
                    {
663
                        // Only consider " as the start of a quoted string
664
                        // if it is the first character of the line, or
665
                        // if it is immediately after the field delimiter.
666
0
                        if (i == 0 ||
667
0
                            (i >= nDelimiterLength &&
668
0
                             osWorkLine.compare(i - nDelimiterLength,
669
0
                                                nDelimiterLength, pszDelimiter,
670
0
                                                nDelimiterLength) == 0))
671
0
                        {
672
0
                            bInString = true;
673
0
                        }
674
0
                    }
675
0
                    else if (i + 1 < osWorkLine.size() &&
676
0
                             osWorkLine[i + 1] == '"')
677
0
                    {
678
                        // Escaped double quote in a quoted string
679
0
                        ++i;
680
0
                    }
681
0
                    else
682
0
                    {
683
0
                        bInString = false;
684
0
                    }
685
0
                }
686
0
            }
687
688
0
            if (!bInString)
689
0
            {
690
0
                return CSVSplitLine(osWorkLine.c_str(), pszDelimiter,
691
0
                                    bKeepLeadingAndClosingQuotes,
692
0
                                    bMergeDelimiter);
693
0
            }
694
695
0
            const char *pszNewLine = pfnReadLine(fp, nMaxLineSize);
696
0
            if (pszNewLine == nullptr)
697
0
                break;
698
699
0
            osWorkLine.append("\n");
700
0
            osWorkLine.append(pszNewLine);
701
0
        }
702
0
    }
703
0
    catch (const std::exception &e)
704
0
    {
705
0
        CPLError(CE_Failure, CPLE_OutOfMemory, "%s", e.what());
706
0
    }
707
708
0
    if (bInString)
709
0
    {
710
0
        CPLError(CE_Failure, CPLE_AppDefined,
711
0
                 "CSV file has unbalanced number of double-quotes. Corrupted "
712
0
                 "data will likely be returned");
713
0
    }
714
715
0
    return nullptr;
716
0
}
717
718
/************************************************************************/
719
/*                          CSVReadParseLine()                          */
720
/*                                                                      */
721
/*      Read one line, and return split into fields.  The return        */
722
/*      result is a stringlist, in the sense of the CSL functions.      */
723
/*                                                                      */
724
/*      Deprecated.  Replaced by CSVReadParseLineL().                   */
725
/************************************************************************/
726
727
char **CSVReadParseLine(FILE *fp)
728
0
{
729
0
    return CSVReadParseLine2(fp, ',');
730
0
}
731
732
static const char *ReadLineClassicalFile(void *fp, size_t /* nMaxLineSize */)
733
0
{
734
0
    return CPLReadLine(static_cast<FILE *>(fp));
735
0
}
736
737
char **CSVReadParseLine2(FILE *fp, char chDelimiter)
738
0
{
739
0
    CPLAssert(fp != nullptr);
740
0
    if (fp == nullptr)
741
0
        return nullptr;
742
743
0
    char szDelimiter[2] = {chDelimiter, 0};
744
0
    return CSVReadParseLineGeneric(fp, ReadLineClassicalFile,
745
0
                                   0,  // nMaxLineSize,
746
0
                                   szDelimiter,
747
0
                                   true,   // bHonourStrings
748
0
                                   false,  // bKeepLeadingAndClosingQuotes
749
0
                                   false,  // bMergeDelimiter
750
0
                                   true /* bSkipBOM */);
751
0
}
752
753
/************************************************************************/
754
/*                          CSVReadParseLineL()                         */
755
/*                                                                      */
756
/*      Read one line, and return split into fields.  The return        */
757
/*      result is a stringlist, in the sense of the CSL functions.      */
758
/*                                                                      */
759
/*      Replaces CSVReadParseLine().  These functions use the VSI       */
760
/*      layer to allow reading from other file containers.              */
761
/************************************************************************/
762
763
char **CSVReadParseLineL(VSILFILE *fp)
764
0
{
765
0
    return CSVReadParseLine2L(fp, ',');
766
0
}
767
768
char **CSVReadParseLine2L(VSILFILE *fp, char chDelimiter)
769
770
0
{
771
0
    CPLAssert(fp != nullptr);
772
0
    if (fp == nullptr)
773
0
        return nullptr;
774
775
0
    char szDelimiter[2] = {chDelimiter, 0};
776
0
    return CSVReadParseLine3L(fp,
777
0
                              0,  // nMaxLineSize
778
0
                              szDelimiter,
779
0
                              true,   // bHonourStrings
780
0
                              false,  // bKeepLeadingAndClosingQuotes
781
0
                              false,  // bMergeDelimiter
782
0
                              true /* bSkipBOM */);
783
0
}
784
785
/************************************************************************/
786
/*                      ReadLineLargeFile()                             */
787
/************************************************************************/
788
789
static const char *ReadLineLargeFile(void *fp, size_t nMaxLineSize)
790
0
{
791
0
    int nBufLength = 0;
792
0
    return CPLReadLine3L(static_cast<VSILFILE *>(fp),
793
0
                         nMaxLineSize == 0 ? -1
794
0
                                           : static_cast<int>(nMaxLineSize),
795
0
                         &nBufLength, nullptr);
796
0
}
797
798
/************************************************************************/
799
/*                      CSVReadParseLine3L()                            */
800
/*                                                                      */
801
/*      Read one line, and return split into fields.  The return        */
802
/*      result is a stringlist, in the sense of the CSL functions.      */
803
/************************************************************************/
804
805
/** Read one line, and return split into fields.
806
 * The return result is a stringlist, in the sense of the CSL functions.
807
 *
808
 * @param fp File handle. Must not be NULL
809
 * @param nMaxLineSize Maximum line size, or 0 for unlimited.
810
 * @param pszDelimiter Delimiter sequence for readers (can be multiple bytes)
811
 * @param bHonourStrings Should be true, unless double quotes should not be
812
 *                       considered when separating fields.
813
 * @param bKeepLeadingAndClosingQuotes Whether the leading and closing double
814
 *                                     quote characters should be kept.
815
 * @param bMergeDelimiter Whether consecutive delimiters should be considered
816
 *                        as a single one. Should generally be set to false.
817
 * @param bSkipBOM Whether leading UTF-8 BOM should be skipped.
818
 */
819
char **CSVReadParseLine3L(VSILFILE *fp, size_t nMaxLineSize,
820
                          const char *pszDelimiter, bool bHonourStrings,
821
                          bool bKeepLeadingAndClosingQuotes,
822
                          bool bMergeDelimiter, bool bSkipBOM)
823
824
0
{
825
0
    return CSVReadParseLineGeneric(
826
0
        fp, ReadLineLargeFile, nMaxLineSize, pszDelimiter, bHonourStrings,
827
0
        bKeepLeadingAndClosingQuotes, bMergeDelimiter, bSkipBOM);
828
0
}
829
830
/************************************************************************/
831
/*                             CSVCompare()                             */
832
/*                                                                      */
833
/*      Compare a field to a search value using a particular            */
834
/*      criteria.                                                       */
835
/************************************************************************/
836
837
static bool CSVCompare(const char *pszFieldValue, const char *pszTarget,
838
                       CSVCompareCriteria eCriteria)
839
840
0
{
841
0
    if (eCriteria == CC_ExactString)
842
0
    {
843
0
        return (strcmp(pszFieldValue, pszTarget) == 0);
844
0
    }
845
0
    else if (eCriteria == CC_ApproxString)
846
0
    {
847
0
        return EQUAL(pszFieldValue, pszTarget);
848
0
    }
849
0
    else if (eCriteria == CC_Integer)
850
0
    {
851
0
        return (CPLGetValueType(pszFieldValue) == CPL_VALUE_INTEGER &&
852
0
                atoi(pszFieldValue) == atoi(pszTarget));
853
0
    }
854
855
0
    return false;
856
0
}
857
858
/************************************************************************/
859
/*                            CSVScanLines()                            */
860
/*                                                                      */
861
/*      Read the file scanline for lines where the key field equals     */
862
/*      the indicated value with the suggested comparison criteria.     */
863
/*      Return the first matching line split into fields.               */
864
/*                                                                      */
865
/*      Deprecated.  Replaced by CSVScanLinesL().                       */
866
/************************************************************************/
867
868
char **CSVScanLines(FILE *fp, int iKeyField, const char *pszValue,
869
                    CSVCompareCriteria eCriteria)
870
871
0
{
872
0
    CPLAssert(pszValue != nullptr);
873
0
    CPLAssert(iKeyField >= 0);
874
0
    CPLAssert(fp != nullptr);
875
876
0
    bool bSelected = false;
877
0
    const int nTestValue = atoi(pszValue);
878
0
    char **papszFields = nullptr;
879
880
0
    while (!bSelected)
881
0
    {
882
0
        papszFields = CSVReadParseLine(fp);
883
0
        if (papszFields == nullptr)
884
0
            return nullptr;
885
886
0
        if (CSLCount(papszFields) < iKeyField + 1)
887
0
        {
888
            /* not selected */
889
0
        }
890
0
        else if (eCriteria == CC_Integer &&
891
0
                 atoi(papszFields[iKeyField]) == nTestValue)
892
0
        {
893
0
            bSelected = true;
894
0
        }
895
0
        else
896
0
        {
897
0
            bSelected = CSVCompare(papszFields[iKeyField], pszValue, eCriteria);
898
0
        }
899
900
0
        if (!bSelected)
901
0
        {
902
0
            CSLDestroy(papszFields);
903
0
            papszFields = nullptr;
904
0
        }
905
0
    }
906
907
0
    return papszFields;
908
0
}
909
910
/************************************************************************/
911
/*                            CSVScanLinesL()                           */
912
/*                                                                      */
913
/*      Read the file scanline for lines where the key field equals     */
914
/*      the indicated value with the suggested comparison criteria.     */
915
/*      Return the first matching line split into fields.               */
916
/************************************************************************/
917
918
char **CSVScanLinesL(VSILFILE *fp, int iKeyField, const char *pszValue,
919
                     CSVCompareCriteria eCriteria)
920
921
0
{
922
0
    CPLAssert(pszValue != nullptr);
923
0
    CPLAssert(iKeyField >= 0);
924
0
    CPLAssert(fp != nullptr);
925
926
0
    bool bSelected = false;
927
0
    const int nTestValue = atoi(pszValue);
928
0
    char **papszFields = nullptr;
929
930
0
    while (!bSelected)
931
0
    {
932
0
        papszFields = CSVReadParseLineL(fp);
933
0
        if (papszFields == nullptr)
934
0
            return nullptr;
935
936
0
        if (CSLCount(papszFields) < iKeyField + 1)
937
0
        {
938
            /* not selected */
939
0
        }
940
0
        else if (eCriteria == CC_Integer &&
941
0
                 atoi(papszFields[iKeyField]) == nTestValue)
942
0
        {
943
0
            bSelected = true;
944
0
        }
945
0
        else
946
0
        {
947
0
            bSelected = CSVCompare(papszFields[iKeyField], pszValue, eCriteria);
948
0
        }
949
950
0
        if (!bSelected)
951
0
        {
952
0
            CSLDestroy(papszFields);
953
0
            papszFields = nullptr;
954
0
        }
955
0
    }
956
957
0
    return papszFields;
958
0
}
959
960
/************************************************************************/
961
/*                        CSVScanLinesIndexed()                         */
962
/*                                                                      */
963
/*      Read the file scanline for lines where the key field equals     */
964
/*      the indicated value with the suggested comparison criteria.     */
965
/*      Return the first matching line split into fields.               */
966
/************************************************************************/
967
968
static char **CSVScanLinesIndexed(CSVTable *psTable, int nKeyValue)
969
970
0
{
971
0
    CPLAssert(psTable->panLineIndex != nullptr);
972
973
    /* -------------------------------------------------------------------- */
974
    /*      Find target record with binary search.                          */
975
    /* -------------------------------------------------------------------- */
976
0
    int iTop = psTable->nLineCount - 1;
977
0
    int iBottom = 0;
978
0
    int iResult = -1;
979
980
0
    while (iTop >= iBottom)
981
0
    {
982
0
        const int iMiddle = (iTop + iBottom) / 2;
983
0
        if (psTable->panLineIndex[iMiddle] > nKeyValue)
984
0
            iTop = iMiddle - 1;
985
0
        else if (psTable->panLineIndex[iMiddle] < nKeyValue)
986
0
            iBottom = iMiddle + 1;
987
0
        else
988
0
        {
989
0
            iResult = iMiddle;
990
            // if a key is not unique, select the first instance of it.
991
0
            while (iResult > 0 &&
992
0
                   psTable->panLineIndex[iResult - 1] == nKeyValue)
993
0
            {
994
0
                psTable->bNonUniqueKey = true;
995
0
                iResult--;
996
0
            }
997
0
            break;
998
0
        }
999
0
    }
1000
1001
0
    if (iResult == -1)
1002
0
        return nullptr;
1003
1004
    /* -------------------------------------------------------------------- */
1005
    /*      Parse target line, and update iLastLine indicator.              */
1006
    /* -------------------------------------------------------------------- */
1007
0
    psTable->iLastLine = iResult;
1008
1009
0
    return CSVSplitLine(psTable->papszLines[iResult], ",", false, false);
1010
0
}
1011
1012
/************************************************************************/
1013
/*                        CSVScanLinesIngested()                        */
1014
/*                                                                      */
1015
/*      Read the file scanline for lines where the key field equals     */
1016
/*      the indicated value with the suggested comparison criteria.     */
1017
/*      Return the first matching line split into fields.               */
1018
/************************************************************************/
1019
1020
static char **CSVScanLinesIngested(CSVTable *psTable, int iKeyField,
1021
                                   const char *pszValue,
1022
                                   CSVCompareCriteria eCriteria)
1023
1024
0
{
1025
0
    CPLAssert(pszValue != nullptr);
1026
0
    CPLAssert(iKeyField >= 0);
1027
1028
0
    const int nTestValue = atoi(pszValue);
1029
1030
    /* -------------------------------------------------------------------- */
1031
    /*      Short cut for indexed files.                                    */
1032
    /* -------------------------------------------------------------------- */
1033
0
    if (iKeyField == 0 && eCriteria == CC_Integer &&
1034
0
        psTable->panLineIndex != nullptr)
1035
0
        return CSVScanLinesIndexed(psTable, nTestValue);
1036
1037
    /* -------------------------------------------------------------------- */
1038
    /*      Scan from in-core lines.                                        */
1039
    /* -------------------------------------------------------------------- */
1040
0
    char **papszFields = nullptr;
1041
0
    bool bSelected = false;
1042
1043
0
    while (!bSelected && psTable->iLastLine + 1 < psTable->nLineCount)
1044
0
    {
1045
0
        psTable->iLastLine++;
1046
0
        papszFields = CSVSplitLine(psTable->papszLines[psTable->iLastLine], ",",
1047
0
                                   false, false);
1048
1049
0
        if (CSLCount(papszFields) < iKeyField + 1)
1050
0
        {
1051
            /* not selected */
1052
0
        }
1053
0
        else if (eCriteria == CC_Integer &&
1054
0
                 atoi(papszFields[iKeyField]) == nTestValue)
1055
0
        {
1056
0
            bSelected = true;
1057
0
        }
1058
0
        else
1059
0
        {
1060
0
            bSelected = CSVCompare(papszFields[iKeyField], pszValue, eCriteria);
1061
0
        }
1062
1063
0
        if (!bSelected)
1064
0
        {
1065
0
            CSLDestroy(papszFields);
1066
0
            papszFields = nullptr;
1067
0
        }
1068
0
    }
1069
1070
0
    return papszFields;
1071
0
}
1072
1073
/************************************************************************/
1074
/*                            CSVRewind()                               */
1075
/*                                                                      */
1076
/*      Rewind a CSV file based on a passed in filename.                */
1077
/*      This is aimed at being used with CSVGetNextLine().              */
1078
/************************************************************************/
1079
1080
void CSVRewind(const char *pszFilename)
1081
1082
0
{
1083
    /* -------------------------------------------------------------------- */
1084
    /*      Get access to the table.                                        */
1085
    /* -------------------------------------------------------------------- */
1086
0
    CPLAssert(pszFilename != nullptr);
1087
1088
0
    CSVTable *const psTable = CSVAccess(pszFilename);
1089
0
    if (psTable != nullptr)
1090
0
        psTable->iLastLine = -1;
1091
0
}
1092
1093
/************************************************************************/
1094
/*                           CSVGetNextLine()                           */
1095
/*                                                                      */
1096
/*      Fetch the next line of a CSV file based on a passed in          */
1097
/*      filename.  Returns NULL at end of file, or if file is not       */
1098
/*      really established.                                             */
1099
/*      This ingests the whole file into memory if not already done.    */
1100
/*      When reaching end of file, CSVRewind() may be used to read      */
1101
/*      again from the beginning.                                       */
1102
/************************************************************************/
1103
1104
char **CSVGetNextLine(const char *pszFilename)
1105
1106
0
{
1107
1108
    /* -------------------------------------------------------------------- */
1109
    /*      Get access to the table.                                        */
1110
    /* -------------------------------------------------------------------- */
1111
0
    CPLAssert(pszFilename != nullptr);
1112
1113
0
    CSVTable *const psTable = CSVAccess(pszFilename);
1114
0
    if (psTable == nullptr)
1115
0
        return nullptr;
1116
1117
0
    CSVIngest(psTable->pszFilename);
1118
1119
    /* -------------------------------------------------------------------- */
1120
    /*      If we use CSVGetNextLine() we can pretty much assume we have    */
1121
    /*      a non-unique key.                                               */
1122
    /* -------------------------------------------------------------------- */
1123
0
    psTable->bNonUniqueKey = true;
1124
1125
    /* -------------------------------------------------------------------- */
1126
    /*      Do we have a next line available?  This only works for          */
1127
    /*      ingested tables I believe.                                      */
1128
    /* -------------------------------------------------------------------- */
1129
0
    if (psTable->iLastLine + 1 >= psTable->nLineCount)
1130
0
        return nullptr;
1131
1132
0
    psTable->iLastLine++;
1133
0
    CSLDestroy(psTable->papszRecFields);
1134
0
    psTable->papszRecFields = CSVSplitLine(
1135
0
        psTable->papszLines[psTable->iLastLine], ",", false, false);
1136
1137
0
    return psTable->papszRecFields;
1138
0
}
1139
1140
/************************************************************************/
1141
/*                            CSVScanFile()                             */
1142
/*                                                                      */
1143
/*      Scan a whole file using criteria similar to above, but also     */
1144
/*      taking care of file opening and closing.                        */
1145
/************************************************************************/
1146
1147
static char **CSVScanFile(CSVTable *const psTable, int iKeyField,
1148
                          const char *pszValue, CSVCompareCriteria eCriteria)
1149
0
{
1150
0
    CSVIngest(psTable->pszFilename);
1151
1152
    /* -------------------------------------------------------------------- */
1153
    /*      Does the current record match the criteria?  If so, just        */
1154
    /*      return it again.                                                */
1155
    /* -------------------------------------------------------------------- */
1156
0
    if (iKeyField >= 0 && iKeyField < CSLCount(psTable->papszRecFields) &&
1157
0
        CSVCompare(psTable->papszRecFields[iKeyField], pszValue, eCriteria) &&
1158
0
        !psTable->bNonUniqueKey)
1159
0
    {
1160
0
        return psTable->papszRecFields;
1161
0
    }
1162
1163
    /* -------------------------------------------------------------------- */
1164
    /*      Scan the file from the beginning, replacing the ``current       */
1165
    /*      record'' in our structure with the one that is found.           */
1166
    /* -------------------------------------------------------------------- */
1167
0
    psTable->iLastLine = -1;
1168
0
    CSLDestroy(psTable->papszRecFields);
1169
1170
0
    if (psTable->pszRawData != nullptr)
1171
0
        psTable->papszRecFields =
1172
0
            CSVScanLinesIngested(psTable, iKeyField, pszValue, eCriteria);
1173
0
    else
1174
0
    {
1175
0
        VSIRewindL(psTable->fp);
1176
0
        CPLReadLineL(psTable->fp); /* throw away the header line */
1177
1178
0
        psTable->papszRecFields =
1179
0
            CSVScanLinesL(psTable->fp, iKeyField, pszValue, eCriteria);
1180
0
    }
1181
1182
0
    return psTable->papszRecFields;
1183
0
}
1184
1185
char **CSVScanFile(const char *pszFilename, int iKeyField, const char *pszValue,
1186
                   CSVCompareCriteria eCriteria)
1187
1188
0
{
1189
    /* -------------------------------------------------------------------- */
1190
    /*      Get access to the table.                                        */
1191
    /* -------------------------------------------------------------------- */
1192
0
    CPLAssert(pszFilename != nullptr);
1193
1194
0
    if (iKeyField < 0)
1195
0
        return nullptr;
1196
1197
0
    CSVTable *const psTable = CSVAccess(pszFilename);
1198
0
    if (psTable == nullptr)
1199
0
        return nullptr;
1200
1201
0
    return CSVScanFile(psTable, iKeyField, pszValue, eCriteria);
1202
0
}
1203
1204
/************************************************************************/
1205
/*                           CPLGetFieldId()                            */
1206
/*                                                                      */
1207
/*      Read the first record of a CSV file (rewinding to be sure),     */
1208
/*      and find the field with the indicated name.  Returns -1 if      */
1209
/*      it fails to find the field name.  Comparison is case            */
1210
/*      insensitive, but otherwise exact.  After this function has      */
1211
/*      been called the file pointer will be positioned just after      */
1212
/*      the first record.                                               */
1213
/*                                                                      */
1214
/*      Deprecated.  Replaced by CPLGetFieldIdL().                      */
1215
/************************************************************************/
1216
1217
int CSVGetFieldId(FILE *fp, const char *pszFieldName)
1218
1219
0
{
1220
0
    CPLAssert(fp != nullptr && pszFieldName != nullptr);
1221
1222
0
    VSIRewind(fp);
1223
1224
0
    char **papszFields = CSVReadParseLine(fp);
1225
0
    for (int i = 0; papszFields != nullptr && papszFields[i] != nullptr; i++)
1226
0
    {
1227
0
        if (EQUAL(papszFields[i], pszFieldName))
1228
0
        {
1229
0
            CSLDestroy(papszFields);
1230
0
            return i;
1231
0
        }
1232
0
    }
1233
1234
0
    CSLDestroy(papszFields);
1235
1236
0
    return -1;
1237
0
}
1238
1239
/************************************************************************/
1240
/*                           CPLGetFieldIdL()                           */
1241
/*                                                                      */
1242
/*      Read the first record of a CSV file (rewinding to be sure),     */
1243
/*      and find the field with the indicated name.  Returns -1 if      */
1244
/*      it fails to find the field name.  Comparison is case            */
1245
/*      insensitive, but otherwise exact.  After this function has      */
1246
/*      been called the file pointer will be positioned just after      */
1247
/*      the first record.                                               */
1248
/************************************************************************/
1249
1250
int CSVGetFieldIdL(VSILFILE *fp, const char *pszFieldName)
1251
1252
0
{
1253
0
    CPLAssert(fp != nullptr && pszFieldName != nullptr);
1254
1255
0
    VSIRewindL(fp);
1256
1257
0
    char **papszFields = CSVReadParseLineL(fp);
1258
0
    for (int i = 0; papszFields != nullptr && papszFields[i] != nullptr; i++)
1259
0
    {
1260
0
        if (EQUAL(papszFields[i], pszFieldName))
1261
0
        {
1262
0
            CSLDestroy(papszFields);
1263
0
            return i;
1264
0
        }
1265
0
    }
1266
1267
0
    CSLDestroy(papszFields);
1268
1269
0
    return -1;
1270
0
}
1271
1272
/************************************************************************/
1273
/*                         CSVGetFileFieldId()                          */
1274
/*                                                                      */
1275
/*      Same as CPLGetFieldId(), except that we get the file based      */
1276
/*      on filename, rather than having an existing handle.             */
1277
/************************************************************************/
1278
1279
static int CSVGetFileFieldId(CSVTable *const psTable, const char *pszFieldName)
1280
1281
0
{
1282
    /* -------------------------------------------------------------------- */
1283
    /*      Find the requested field.                                       */
1284
    /* -------------------------------------------------------------------- */
1285
0
    const int nFieldNameLength = static_cast<int>(strlen(pszFieldName));
1286
0
    for (int i = 0; psTable->papszFieldNames != nullptr &&
1287
0
                    psTable->papszFieldNames[i] != nullptr;
1288
0
         i++)
1289
0
    {
1290
0
        if (psTable->panFieldNamesLength[i] == nFieldNameLength &&
1291
0
            EQUALN(psTable->papszFieldNames[i], pszFieldName, nFieldNameLength))
1292
0
        {
1293
0
            return i;
1294
0
        }
1295
0
    }
1296
1297
0
    return -1;
1298
0
}
1299
1300
int CSVGetFileFieldId(const char *pszFilename, const char *pszFieldName)
1301
1302
0
{
1303
    /* -------------------------------------------------------------------- */
1304
    /*      Get access to the table.                                        */
1305
    /* -------------------------------------------------------------------- */
1306
0
    CPLAssert(pszFilename != nullptr);
1307
1308
0
    CSVTable *const psTable = CSVAccess(pszFilename);
1309
0
    if (psTable == nullptr)
1310
0
        return -1;
1311
0
    return CSVGetFileFieldId(psTable, pszFieldName);
1312
0
}
1313
1314
/************************************************************************/
1315
/*                         CSVScanFileByName()                          */
1316
/*                                                                      */
1317
/*      Same as CSVScanFile(), but using a field name instead of a      */
1318
/*      field number.                                                   */
1319
/************************************************************************/
1320
1321
char **CSVScanFileByName(const char *pszFilename, const char *pszKeyFieldName,
1322
                         const char *pszValue, CSVCompareCriteria eCriteria)
1323
1324
0
{
1325
0
    const int iKeyField = CSVGetFileFieldId(pszFilename, pszKeyFieldName);
1326
0
    if (iKeyField == -1)
1327
0
        return nullptr;
1328
1329
0
    return CSVScanFile(pszFilename, iKeyField, pszValue, eCriteria);
1330
0
}
1331
1332
/************************************************************************/
1333
/*                            CSVGetField()                             */
1334
/*                                                                      */
1335
/*      The all-in-one function to fetch a particular field value       */
1336
/*      from a CSV file.  Note this function will return an empty       */
1337
/*      string, rather than NULL if it fails to find the desired        */
1338
/*      value for some reason.  The caller can't establish that the     */
1339
/*      fetch failed.                                                   */
1340
/************************************************************************/
1341
1342
const char *CSVGetField(const char *pszFilename, const char *pszKeyFieldName,
1343
                        const char *pszKeyFieldValue,
1344
                        CSVCompareCriteria eCriteria,
1345
                        const char *pszTargetField)
1346
1347
0
{
1348
    /* -------------------------------------------------------------------- */
1349
    /*      Find the table.                                                 */
1350
    /* -------------------------------------------------------------------- */
1351
0
    CSVTable *const psTable = CSVAccess(pszFilename);
1352
0
    if (psTable == nullptr)
1353
0
        return "";
1354
1355
0
    const int iKeyField = CSVGetFileFieldId(psTable, pszKeyFieldName);
1356
0
    if (iKeyField == -1)
1357
0
        return "";
1358
1359
    /* -------------------------------------------------------------------- */
1360
    /*      Find the correct record.                                        */
1361
    /* -------------------------------------------------------------------- */
1362
0
    char **papszRecord =
1363
0
        CSVScanFile(psTable, iKeyField, pszKeyFieldValue, eCriteria);
1364
0
    if (papszRecord == nullptr)
1365
0
        return "";
1366
1367
    /* -------------------------------------------------------------------- */
1368
    /*      Figure out which field we want out of this.                     */
1369
    /* -------------------------------------------------------------------- */
1370
0
    const int iTargetField = CSVGetFileFieldId(psTable, pszTargetField);
1371
0
    if (iTargetField < 0)
1372
0
        return "";
1373
1374
0
    for (int i = 0; papszRecord[i] != nullptr; ++i)
1375
0
    {
1376
0
        if (i == iTargetField)
1377
0
            return papszRecord[iTargetField];
1378
0
    }
1379
0
    return "";
1380
0
}
1381
1382
/************************************************************************/
1383
/*                       GDALDefaultCSVFilename()                       */
1384
/************************************************************************/
1385
1386
typedef struct
1387
{
1388
    char szPath[512];
1389
    bool bCSVFinderInitialized;
1390
} DefaultCSVFileNameTLS;
1391
1392
const char *GDALDefaultCSVFilename(const char *pszBasename)
1393
1394
0
{
1395
    /* -------------------------------------------------------------------- */
1396
    /*      Do we already have this file accessed?  If so, just return      */
1397
    /*      the existing path without any further probing.                  */
1398
    /* -------------------------------------------------------------------- */
1399
0
    int bMemoryError = FALSE;
1400
0
    CSVTable **ppsCSVTableList =
1401
0
        static_cast<CSVTable **>(CPLGetTLSEx(CTLS_CSVTABLEPTR, &bMemoryError));
1402
0
    if (ppsCSVTableList != nullptr)
1403
0
    {
1404
0
        const size_t nBasenameLen = strlen(pszBasename);
1405
1406
0
        for (const CSVTable *psTable = *ppsCSVTableList; psTable != nullptr;
1407
0
             psTable = psTable->psNext)
1408
0
        {
1409
0
            const size_t nFullLen = strlen(psTable->pszFilename);
1410
1411
0
            if (nFullLen > nBasenameLen &&
1412
0
                strcmp(psTable->pszFilename + nFullLen - nBasenameLen,
1413
0
                       pszBasename) == 0 &&
1414
0
                strchr("/\\",
1415
0
                       psTable->pszFilename[+nFullLen - nBasenameLen - 1]) !=
1416
0
                    nullptr)
1417
0
            {
1418
0
                return psTable->pszFilename;
1419
0
            }
1420
0
        }
1421
0
    }
1422
1423
    /* -------------------------------------------------------------------- */
1424
    /*      Otherwise we need to look harder for it.                        */
1425
    /* -------------------------------------------------------------------- */
1426
0
    DefaultCSVFileNameTLS *pTLSData = static_cast<DefaultCSVFileNameTLS *>(
1427
0
        CPLGetTLSEx(CTLS_CSVDEFAULTFILENAME, &bMemoryError));
1428
0
    if (pTLSData == nullptr && !bMemoryError)
1429
0
    {
1430
0
        pTLSData = static_cast<DefaultCSVFileNameTLS *>(
1431
0
            VSI_CALLOC_VERBOSE(1, sizeof(DefaultCSVFileNameTLS)));
1432
0
        if (pTLSData)
1433
0
            CPLSetTLS(CTLS_CSVDEFAULTFILENAME, pTLSData, TRUE);
1434
0
    }
1435
0
    if (pTLSData == nullptr)
1436
0
        return "/not_existing_dir/not_existing_path";
1437
1438
0
    const char *pszResult = CPLFindFile("gdal", pszBasename);
1439
1440
0
    if (pszResult != nullptr)
1441
0
        return pszResult;
1442
1443
0
    if (!pTLSData->bCSVFinderInitialized)
1444
0
    {
1445
0
        pTLSData->bCSVFinderInitialized = true;
1446
1447
0
        if (CPLGetConfigOption("GDAL_DATA", nullptr) != nullptr)
1448
0
            CPLPushFinderLocation(CPLGetConfigOption("GDAL_DATA", nullptr));
1449
1450
0
        pszResult = CPLFindFile("gdal", pszBasename);
1451
1452
0
        if (pszResult != nullptr)
1453
0
            return pszResult;
1454
0
    }
1455
1456
    // For systems like sandboxes that do not allow other checks.
1457
0
    CPLDebug("CPL_CSV",
1458
0
             "Failed to find file in GDALDefaultCSVFilename.  "
1459
0
             "Returning original basename: %s",
1460
0
             pszBasename);
1461
0
    CPLStrlcpy(pTLSData->szPath, pszBasename, sizeof(pTLSData->szPath));
1462
0
    return pTLSData->szPath;
1463
0
}
1464
1465
/************************************************************************/
1466
/*                            CSVFilename()                             */
1467
/*                                                                      */
1468
/*      Return the full path to a particular CSV file.  This will       */
1469
/*      eventually be something the application can override.           */
1470
/************************************************************************/
1471
1472
CPL_C_START
1473
static const char *(*pfnCSVFilenameHook)(const char *) = nullptr;
1474
CPL_C_END
1475
1476
const char *CSVFilename(const char *pszBasename)
1477
1478
0
{
1479
0
    if (pfnCSVFilenameHook == nullptr)
1480
0
        return GDALDefaultCSVFilename(pszBasename);
1481
1482
0
    return pfnCSVFilenameHook(pszBasename);
1483
0
}
1484
1485
/************************************************************************/
1486
/*                         SetCSVFilenameHook()                         */
1487
/*                                                                      */
1488
/*      Applications can use this to set a function that will           */
1489
/*      massage CSV filenames.                                          */
1490
/************************************************************************/
1491
1492
/**
1493
 * Override CSV file search method.
1494
 *
1495
 * @param pfnNewHook The pointer to a function which will return the
1496
 * full path for a given filename.
1497
 *
1498
1499
This function allows an application to override how the GTIFGetDefn()
1500
and related function find the CSV (Comma Separated Value) values
1501
required. The pfnHook argument should be a pointer to a function that
1502
will take in a CSV filename and return a full path to the file. The
1503
returned string should be to an internal static buffer so that the
1504
caller doesn't have to free the result.
1505
1506
Example:
1507
1508
The listgeo utility uses the following override function if the user
1509
specified a CSV file directory with the -t commandline switch (argument
1510
put into CSVDirName).
1511
1512
\code{.cpp}
1513
1514
    ...
1515
    SetCSVFilenameHook( CSVFileOverride );
1516
    ...
1517
1518
static const char *CSVFileOverride( const char * pszInput )
1519
1520
{
1521
    static char szPath[1024] = {};
1522
1523
    sprintf( szPath, "%s/%s", CSVDirName, pszInput );
1524
1525
    return szPath;
1526
}
1527
\endcode
1528
1529
*/
1530
1531
CPL_C_START
1532
void SetCSVFilenameHook(const char *(*pfnNewHook)(const char *))
1533
1534
0
{
1535
0
    pfnCSVFilenameHook = pfnNewHook;
1536
0
}
1537
1538
CPL_C_END