Coverage Report

Created: 2025-06-13 06:18

/src/gdal/port/cpl_csv.cpp
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 *
3
 * Project:  CPL - Common Portability Library
4
 * Purpose:  CSV (comma separated value) file access.
5
 * Author:   Frank Warmerdam, warmerdam@pobox.com
6
 *
7
 ******************************************************************************
8
 * Copyright (c) 1999, Frank Warmerdam
9
 * Copyright (c) 2009-2012, Even Rouault <even dot rouault at spatialys.com>
10
 *
11
 * SPDX-License-Identifier: MIT
12
 ****************************************************************************/
13
14
#include "cpl_port.h"
15
#include "cpl_csv.h"
16
17
#include <cstddef>
18
#include <cstdlib>
19
#include <cstring>
20
#if HAVE_FCNTL_H
21
#include <fcntl.h>
22
#endif
23
24
#include "cpl_conv.h"
25
#include "cpl_error.h"
26
#include "cpl_multiproc.h"
27
#include "gdal_csv.h"
28
29
#include <algorithm>
30
31
/* ==================================================================== */
32
/*      The CSVTable is a persistent set of info about an open CSV      */
33
/*      table.  While it doesn't currently maintain a record index,     */
34
/*      or in-memory copy of the table, it could be changed to do so    */
35
/*      in the future.                                                  */
36
/* ==================================================================== */
37
typedef struct ctb
38
{
39
    VSILFILE *fp;
40
    struct ctb *psNext;
41
    char *pszFilename;
42
    char **papszFieldNames;
43
    int *panFieldNamesLength;
44
    char **papszRecFields;
45
    int nFields;
46
    int iLastLine;
47
    bool bNonUniqueKey;
48
49
    /* Cache for whole file */
50
    int nLineCount;
51
    char **papszLines;
52
    int *panLineIndex;
53
    char *pszRawData;
54
} CSVTable;
55
56
static void CSVDeaccessInternal(CSVTable **ppsCSVTableList, bool bCanUseTLS,
57
                                const char *pszFilename);
58
59
/************************************************************************/
60
/*                            CSVFreeTLS()                              */
61
/************************************************************************/
62
static void CSVFreeTLS(void *pData)
63
0
{
64
0
    CSVDeaccessInternal(static_cast<CSVTable **>(pData), false, nullptr);
65
0
    CPLFree(pData);
66
0
}
67
68
/* It would likely be better to share this list between threads, but
69
   that will require some rework. */
70
71
/************************************************************************/
72
/*                             CSVAccess()                              */
73
/*                                                                      */
74
/*      This function will fetch a handle to the requested table.       */
75
/*      If not found in the ``open table list'' the table will be       */
76
/*      opened and added to the list.  Eventually this function may     */
77
/*      become public with an abstracted return type so that            */
78
/*      applications can set options about the table.  For now this     */
79
/*      isn't done.                                                     */
80
/************************************************************************/
81
82
static CSVTable *CSVAccess(const char *pszFilename)
83
84
0
{
85
    /* -------------------------------------------------------------------- */
86
    /*      Fetch the table, and allocate the thread-local pointer to it    */
87
    /*      if there isn't already one.                                     */
88
    /* -------------------------------------------------------------------- */
89
0
    int bMemoryError = FALSE;
90
0
    CSVTable **ppsCSVTableList =
91
0
        static_cast<CSVTable **>(CPLGetTLSEx(CTLS_CSVTABLEPTR, &bMemoryError));
92
0
    if (bMemoryError)
93
0
        return nullptr;
94
0
    if (ppsCSVTableList == nullptr)
95
0
    {
96
0
        ppsCSVTableList =
97
0
            static_cast<CSVTable **>(VSI_CALLOC_VERBOSE(1, sizeof(CSVTable *)));
98
0
        if (ppsCSVTableList == nullptr)
99
0
            return nullptr;
100
0
        CPLSetTLSWithFreeFunc(CTLS_CSVTABLEPTR, ppsCSVTableList, CSVFreeTLS);
101
0
    }
102
103
    /* -------------------------------------------------------------------- */
104
    /*      Is the table already in the list.                               */
105
    /* -------------------------------------------------------------------- */
106
0
    for (CSVTable *psTable = *ppsCSVTableList; psTable != nullptr;
107
0
         psTable = psTable->psNext)
108
0
    {
109
0
        if (EQUAL(psTable->pszFilename, pszFilename))
110
0
        {
111
            /*
112
             * Eventually we should consider promoting to the front of
113
             * the list to accelerate frequently accessed tables.
114
             */
115
0
            return psTable;
116
0
        }
117
0
    }
118
119
    /* -------------------------------------------------------------------- */
120
    /*      If not, try to open it.                                         */
121
    /* -------------------------------------------------------------------- */
122
0
    VSILFILE *fp = VSIFOpenL(pszFilename, "rb");
123
0
    if (fp == nullptr)
124
0
        return nullptr;
125
126
    /* -------------------------------------------------------------------- */
127
    /*      Create an information structure about this table, and add to    */
128
    /*      the front of the list.                                          */
129
    /* -------------------------------------------------------------------- */
130
0
    CSVTable *const psTable =
131
0
        static_cast<CSVTable *>(VSI_CALLOC_VERBOSE(sizeof(CSVTable), 1));
132
0
    if (psTable == nullptr)
133
0
    {
134
0
        VSIFCloseL(fp);
135
0
        return nullptr;
136
0
    }
137
138
0
    psTable->fp = fp;
139
0
    psTable->pszFilename = VSI_STRDUP_VERBOSE(pszFilename);
140
0
    if (psTable->pszFilename == nullptr)
141
0
    {
142
0
        VSIFree(psTable);
143
0
        VSIFCloseL(fp);
144
0
        return nullptr;
145
0
    }
146
0
    psTable->bNonUniqueKey = false;  // As far as we know now.
147
0
    psTable->psNext = *ppsCSVTableList;
148
149
0
    *ppsCSVTableList = psTable;
150
151
    /* -------------------------------------------------------------------- */
152
    /*      Read the table header record containing the field names.        */
153
    /* -------------------------------------------------------------------- */
154
0
    psTable->papszFieldNames = CSVReadParseLineL(fp);
155
0
    psTable->nFields = CSLCount(psTable->papszFieldNames);
156
0
    psTable->panFieldNamesLength =
157
0
        static_cast<int *>(CPLMalloc(sizeof(int) * psTable->nFields));
158
0
    for (int i = 0;
159
0
         i < psTable->nFields &&
160
         /* null-pointer check to avoid a false positive from CLang S.A. */
161
0
         psTable->papszFieldNames != nullptr;
162
0
         i++)
163
0
    {
164
0
        psTable->panFieldNamesLength[i] =
165
0
            static_cast<int>(strlen(psTable->papszFieldNames[i]));
166
0
    }
167
168
0
    return psTable;
169
0
}
170
171
/************************************************************************/
172
/*                            CSVDeaccess()                             */
173
/************************************************************************/
174
175
static void CSVDeaccessInternal(CSVTable **ppsCSVTableList, bool bCanUseTLS,
176
                                const char *pszFilename)
177
178
0
{
179
0
    if (ppsCSVTableList == nullptr)
180
0
        return;
181
182
    /* -------------------------------------------------------------------- */
183
    /*      A NULL means deaccess all tables.                               */
184
    /* -------------------------------------------------------------------- */
185
0
    if (pszFilename == nullptr)
186
0
    {
187
0
        while (*ppsCSVTableList != nullptr)
188
0
            CSVDeaccessInternal(ppsCSVTableList, bCanUseTLS,
189
0
                                (*ppsCSVTableList)->pszFilename);
190
191
0
        return;
192
0
    }
193
194
    /* -------------------------------------------------------------------- */
195
    /*      Find this table.                                                */
196
    /* -------------------------------------------------------------------- */
197
0
    CSVTable *psLast = nullptr;
198
0
    CSVTable *psTable = *ppsCSVTableList;
199
0
    for (; psTable != nullptr && !EQUAL(psTable->pszFilename, pszFilename);
200
0
         psTable = psTable->psNext)
201
0
    {
202
0
        psLast = psTable;
203
0
    }
204
205
0
    if (psTable == nullptr)
206
0
    {
207
0
        if (bCanUseTLS)
208
0
            CPLDebug("CPL_CSV", "CPLDeaccess( %s ) - no match.", pszFilename);
209
0
        return;
210
0
    }
211
212
    /* -------------------------------------------------------------------- */
213
    /*      Remove the link from the list.                                  */
214
    /* -------------------------------------------------------------------- */
215
0
    if (psLast != nullptr)
216
0
        psLast->psNext = psTable->psNext;
217
0
    else
218
0
        *ppsCSVTableList = psTable->psNext;
219
220
    /* -------------------------------------------------------------------- */
221
    /*      Free the table.                                                 */
222
    /* -------------------------------------------------------------------- */
223
0
    if (psTable->fp != nullptr)
224
0
        VSIFCloseL(psTable->fp);
225
226
0
    CSLDestroy(psTable->papszFieldNames);
227
0
    CPLFree(psTable->panFieldNamesLength);
228
0
    CSLDestroy(psTable->papszRecFields);
229
0
    CPLFree(psTable->pszFilename);
230
0
    CPLFree(psTable->panLineIndex);
231
0
    CPLFree(psTable->pszRawData);
232
0
    CPLFree(psTable->papszLines);
233
234
0
    CPLFree(psTable);
235
236
0
    if (bCanUseTLS)
237
0
        CPLReadLine(nullptr);
238
0
}
239
240
void CSVDeaccess(const char *pszFilename)
241
0
{
242
    /* -------------------------------------------------------------------- */
243
    /*      Fetch the table, and allocate the thread-local pointer to it    */
244
    /*      if there isn't already one.                                     */
245
    /* -------------------------------------------------------------------- */
246
0
    int bMemoryError = FALSE;
247
0
    CSVTable **ppsCSVTableList =
248
0
        static_cast<CSVTable **>(CPLGetTLSEx(CTLS_CSVTABLEPTR, &bMemoryError));
249
250
0
    CSVDeaccessInternal(ppsCSVTableList, true, pszFilename);
251
0
}
252
253
/************************************************************************/
254
/*                            CSVSplitLine()                            */
255
/*                                                                      */
256
/*      Tokenize a CSV line into fields in the form of a string         */
257
/*      list.  This is used instead of the CPLTokenizeString()          */
258
/*      because it provides correct CSV escaping and quoting            */
259
/*      semantics.                                                      */
260
/************************************************************************/
261
262
static char **CSVSplitLine(const char *pszString, const char *pszDelimiter,
263
                           bool bKeepLeadingAndClosingQuotes,
264
                           bool bMergeDelimiter)
265
266
0
{
267
0
    CPLStringList aosRetList;
268
0
    if (pszString == nullptr)
269
0
        return static_cast<char **>(CPLCalloc(sizeof(char *), 1));
270
271
0
    char *pszToken = static_cast<char *>(CPLCalloc(10, 1));
272
0
    int nTokenMax = 10;
273
0
    const size_t nDelimiterLength = strlen(pszDelimiter);
274
275
0
    const char *pszIter = pszString;
276
0
    while (*pszIter != '\0')
277
0
    {
278
0
        bool bInString = false;
279
280
0
        int nTokenLen = 0;
281
282
        // Try to find the next delimiter, marking end of token.
283
0
        do
284
0
        {
285
            // End if this is a delimiter skip it and break.
286
0
            if (!bInString &&
287
0
                strncmp(pszIter, pszDelimiter, nDelimiterLength) == 0)
288
0
            {
289
0
                pszIter += nDelimiterLength;
290
0
                if (bMergeDelimiter)
291
0
                {
292
0
                    while (strncmp(pszIter, pszDelimiter, nDelimiterLength) ==
293
0
                           0)
294
0
                        pszIter += nDelimiterLength;
295
0
                }
296
0
                break;
297
0
            }
298
299
0
            if (*pszIter == '"')
300
0
            {
301
0
                if (!bInString && nTokenLen > 0)
302
0
                {
303
                    // do not treat in a special way double quotes that appear
304
                    // in the middle of a field (similarly to OpenOffice)
305
                    // Like in records: 1,50°46'06.6"N 116°42'04.4,foo
306
0
                }
307
0
                else if (!bInString || pszIter[1] != '"')
308
0
                {
309
0
                    bInString = !bInString;
310
0
                    if (!bKeepLeadingAndClosingQuotes)
311
0
                        continue;
312
0
                }
313
0
                else  // Doubled quotes in string resolve to one quote.
314
0
                {
315
0
                    pszIter++;
316
0
                }
317
0
            }
318
319
0
            if (nTokenLen >= nTokenMax - 2)
320
0
            {
321
0
                nTokenMax = nTokenMax * 2 + 10;
322
0
                pszToken = static_cast<char *>(CPLRealloc(pszToken, nTokenMax));
323
0
            }
324
325
0
            pszToken[nTokenLen] = *pszIter;
326
0
            nTokenLen++;
327
0
        } while (*(++pszIter) != '\0');
328
329
0
        pszToken[nTokenLen] = '\0';
330
0
        aosRetList.AddString(pszToken);
331
332
        // If the last token is an empty token, then we have to catch
333
        // it now, otherwise we won't reenter the loop and it will be lost.
334
0
        if (*pszIter == '\0' &&
335
0
            pszIter - pszString >= static_cast<int>(nDelimiterLength) &&
336
0
            strncmp(pszIter - nDelimiterLength, pszDelimiter,
337
0
                    nDelimiterLength) == 0)
338
0
        {
339
0
            aosRetList.AddString("");
340
0
        }
341
0
    }
342
343
0
    CPLFree(pszToken);
344
345
0
    if (aosRetList.Count() == 0)
346
0
        return static_cast<char **>(CPLCalloc(sizeof(char *), 1));
347
0
    else
348
0
        return aosRetList.StealList();
349
0
}
350
351
/************************************************************************/
352
/*                          CSVFindNextLine()                           */
353
/*                                                                      */
354
/*      Find the start of the next line, while at the same time zero    */
355
/*      terminating this line.  Take into account that there may be     */
356
/*      newline indicators within quoted strings, and that quotes       */
357
/*      can be escaped with a backslash.                                */
358
/************************************************************************/
359
360
static char *CSVFindNextLine(char *pszThisLine)
361
362
0
{
363
0
    int i = 0;  // i is used after the for loop.
364
365
0
    for (int nQuoteCount = 0; pszThisLine[i] != '\0'; i++)
366
0
    {
367
0
        if (pszThisLine[i] == '\"' && (i == 0 || pszThisLine[i - 1] != '\\'))
368
0
            nQuoteCount++;
369
370
0
        if ((pszThisLine[i] == 10 || pszThisLine[i] == 13) &&
371
0
            (nQuoteCount % 2) == 0)
372
0
            break;
373
0
    }
374
375
0
    while (pszThisLine[i] == 10 || pszThisLine[i] == 13)
376
0
        pszThisLine[i++] = '\0';
377
378
0
    if (pszThisLine[i] == '\0')
379
0
        return nullptr;
380
381
0
    return pszThisLine + i;
382
0
}
383
384
/************************************************************************/
385
/*                             CSVIngest()                              */
386
/*                                                                      */
387
/*      Load entire file into memory and setup index if possible.       */
388
/************************************************************************/
389
390
// TODO(schwehr): Clean up all the casting in CSVIngest.
391
static void CSVIngest(CSVTable *psTable)
392
393
0
{
394
0
    if (psTable->pszRawData != nullptr)
395
0
        return;
396
397
    /* -------------------------------------------------------------------- */
398
    /*      Ingest whole file.                                              */
399
    /* -------------------------------------------------------------------- */
400
0
    if (VSIFSeekL(psTable->fp, 0, SEEK_END) != 0)
401
0
    {
402
0
        CPLError(CE_Failure, CPLE_FileIO,
403
0
                 "Failed using seek end and tell to get file length: %s",
404
0
                 psTable->pszFilename);
405
0
        return;
406
0
    }
407
0
    const vsi_l_offset nFileLen = VSIFTellL(psTable->fp);
408
0
    if (static_cast<long>(nFileLen) == -1)
409
0
    {
410
0
        CPLError(CE_Failure, CPLE_FileIO,
411
0
                 "Failed using seek end and tell to get file length: %s",
412
0
                 psTable->pszFilename);
413
0
        return;
414
0
    }
415
0
    VSIRewindL(psTable->fp);
416
417
0
    psTable->pszRawData = static_cast<char *>(
418
0
        VSI_MALLOC_VERBOSE(static_cast<size_t>(nFileLen) + 1));
419
0
    if (psTable->pszRawData == nullptr)
420
0
        return;
421
0
    if (VSIFReadL(psTable->pszRawData, 1, static_cast<size_t>(nFileLen),
422
0
                  psTable->fp) != static_cast<size_t>(nFileLen))
423
0
    {
424
0
        CPLFree(psTable->pszRawData);
425
0
        psTable->pszRawData = nullptr;
426
427
0
        CPLError(CE_Failure, CPLE_FileIO, "Read of file %s failed.",
428
0
                 psTable->pszFilename);
429
0
        return;
430
0
    }
431
432
0
    psTable->pszRawData[nFileLen] = '\0';
433
434
    /* -------------------------------------------------------------------- */
435
    /*      Get count of newlines so we can allocate line array.            */
436
    /* -------------------------------------------------------------------- */
437
0
    int nMaxLineCount = 0;
438
0
    for (int i = 0; i < static_cast<int>(nFileLen); i++)
439
0
    {
440
0
        if (psTable->pszRawData[i] == 10)
441
0
            nMaxLineCount++;
442
0
    }
443
444
0
    psTable->papszLines =
445
0
        static_cast<char **>(VSI_CALLOC_VERBOSE(sizeof(char *), nMaxLineCount));
446
0
    if (psTable->papszLines == nullptr)
447
0
        return;
448
449
    /* -------------------------------------------------------------------- */
450
    /*      Build a list of record pointers into the raw data buffer        */
451
    /*      based on line terminators.  Zero terminate the line             */
452
    /*      strings.                                                        */
453
    /* -------------------------------------------------------------------- */
454
    /* skip header line */
455
0
    char *pszThisLine = CSVFindNextLine(psTable->pszRawData);
456
457
0
    int iLine = 0;
458
0
    while (pszThisLine != nullptr && iLine < nMaxLineCount)
459
0
    {
460
0
        if (pszThisLine[0] != '#')
461
0
            psTable->papszLines[iLine++] = pszThisLine;
462
0
        pszThisLine = CSVFindNextLine(pszThisLine);
463
0
    }
464
465
0
    psTable->nLineCount = iLine;
466
467
    /* -------------------------------------------------------------------- */
468
    /*      Allocate and populate index array.  Ensure they are in          */
469
    /*      ascending order so that binary searches can be done on the      */
470
    /*      array.                                                          */
471
    /* -------------------------------------------------------------------- */
472
0
    psTable->panLineIndex = static_cast<int *>(
473
0
        VSI_MALLOC_VERBOSE(sizeof(int) * psTable->nLineCount));
474
0
    if (psTable->panLineIndex == nullptr)
475
0
        return;
476
477
0
    for (int i = 0; i < psTable->nLineCount; i++)
478
0
    {
479
0
        psTable->panLineIndex[i] = atoi(psTable->papszLines[i]);
480
481
0
        if (i > 0 && psTable->panLineIndex[i] < psTable->panLineIndex[i - 1])
482
0
        {
483
0
            CPLFree(psTable->panLineIndex);
484
0
            psTable->panLineIndex = nullptr;
485
0
            break;
486
0
        }
487
0
    }
488
489
0
    psTable->iLastLine = -1;
490
491
    /* -------------------------------------------------------------------- */
492
    /*      We should never need the file handle against, so close it.      */
493
    /* -------------------------------------------------------------------- */
494
0
    VSIFCloseL(psTable->fp);
495
0
    psTable->fp = nullptr;
496
0
}
497
498
static void CSVIngest(const char *pszFilename)
499
500
0
{
501
0
    CSVTable *psTable = CSVAccess(pszFilename);
502
0
    if (psTable == nullptr)
503
0
    {
504
0
        CPLError(CE_Failure, CPLE_FileIO, "Failed to open file: %s",
505
0
                 pszFilename);
506
0
        return;
507
0
    }
508
0
    CSVIngest(psTable);
509
0
}
510
511
/************************************************************************/
512
/*                        CSVDetectSeperator()                          */
513
/************************************************************************/
514
515
/** Detect which field separator is used.
516
 *
517
 * Currently, it can detect comma, semicolon, space, tabulation or pipe.
518
 * In case of ambiguity, starting with GDAL 3.7.1, the separator with the
519
 * most occurrences will be selected (and a warning emitted).
520
 * If no separator found, comma will be considered as the separator.
521
 *
522
 * @return ',', ';', ' ', tabulation character or '|'.
523
 */
524
char CSVDetectSeperator(const char *pszLine)
525
0
{
526
0
    bool bInString = false;
527
0
    int nCountComma = 0;
528
0
    int nCountSemicolon = 0;
529
0
    int nCountTab = 0;
530
0
    int nCountPipe = 0;
531
0
    int nCountSpace = 0;
532
533
0
    for (; *pszLine != '\0'; pszLine++)
534
0
    {
535
0
        if (!bInString && *pszLine == ',')
536
0
        {
537
0
            nCountComma++;
538
0
        }
539
0
        else if (!bInString && *pszLine == ';')
540
0
        {
541
0
            nCountSemicolon++;
542
0
        }
543
0
        else if (!bInString && *pszLine == '\t')
544
0
        {
545
0
            nCountTab++;
546
0
        }
547
0
        else if (!bInString && *pszLine == '|')
548
0
        {
549
0
            nCountPipe++;
550
0
        }
551
0
        else if (!bInString && *pszLine == ' ')
552
0
        {
553
0
            nCountSpace++;
554
0
        }
555
0
        else if (*pszLine == '"')
556
0
        {
557
0
            if (!bInString || pszLine[1] != '"')
558
0
            {
559
0
                bInString = !bInString;
560
0
                continue;
561
0
            }
562
0
            else /* doubled quotes in string resolve to one quote */
563
0
            {
564
0
                pszLine++;
565
0
            }
566
0
        }
567
0
    }
568
569
0
    const int nMaxCountExceptSpace =
570
0
        std::max(std::max(nCountComma, nCountSemicolon),
571
0
                 std::max(nCountTab, nCountPipe));
572
0
    char chDelimiter = ',';
573
0
    if (nMaxCountExceptSpace == 0)
574
0
    {
575
0
        if (nCountSpace > 0)
576
0
            chDelimiter = ' ';
577
0
    }
578
0
    else
579
0
    {
580
0
        bool bWarn = false;
581
0
        if (nCountComma == nMaxCountExceptSpace)
582
0
        {
583
0
            chDelimiter = ',';
584
0
            bWarn = (nCountSemicolon > 0 || nCountTab > 0 || nCountPipe > 0);
585
0
        }
586
0
        else if (nCountSemicolon == nMaxCountExceptSpace)
587
0
        {
588
0
            chDelimiter = ';';
589
0
            bWarn = (nCountComma > 0 || nCountTab > 0 || nCountPipe > 0);
590
0
        }
591
0
        else if (nCountTab == nMaxCountExceptSpace)
592
0
        {
593
0
            chDelimiter = '\t';
594
0
            bWarn = (nCountComma > 0 || nCountSemicolon > 0 || nCountPipe > 0);
595
0
        }
596
0
        else /* if( nCountPipe == nMaxCountExceptSpace ) */
597
0
        {
598
0
            chDelimiter = '|';
599
0
            bWarn = (nCountComma > 0 || nCountSemicolon > 0 || nCountTab > 0);
600
0
        }
601
0
        if (bWarn)
602
0
        {
603
0
            CPLError(CE_Warning, CPLE_AppDefined,
604
0
                     "Selecting '%c' as CSV field separator, but "
605
0
                     "other candidate separator(s) have been found.",
606
0
                     chDelimiter);
607
0
        }
608
0
    }
609
610
0
    return chDelimiter;
611
0
}
612
613
/************************************************************************/
614
/*                      CSVReadParseLine3L()                            */
615
/*                                                                      */
616
/*      Read one line, and return split into fields.  The return        */
617
/*      result is a stringlist, in the sense of the CSL functions.      */
618
/************************************************************************/
619
620
static char **
621
CSVReadParseLineGeneric(void *fp, const char *(*pfnReadLine)(void *, size_t),
622
                        size_t nMaxLineSize, const char *pszDelimiter,
623
                        bool bHonourStrings, bool bKeepLeadingAndClosingQuotes,
624
                        bool bMergeDelimiter, bool bSkipBOM)
625
0
{
626
0
    const char *pszLine = pfnReadLine(fp, nMaxLineSize);
627
0
    if (pszLine == nullptr)
628
0
        return nullptr;
629
630
0
    if (bSkipBOM)
631
0
    {
632
        // Skip BOM.
633
0
        const GByte *pabyData = reinterpret_cast<const GByte *>(pszLine);
634
0
        if (pabyData[0] == 0xEF && pabyData[1] == 0xBB && pabyData[2] == 0xBF)
635
0
            pszLine += 3;
636
0
    }
637
638
    // Special fix to read NdfcFacilities.xls with un-balanced double quotes.
639
0
    if (!bHonourStrings)
640
0
    {
641
0
        return CSLTokenizeStringComplex(pszLine, pszDelimiter, FALSE, TRUE);
642
0
    }
643
644
    // If there are no quotes, then this is the simple case.
645
    // Parse, and return tokens.
646
0
    if (strchr(pszLine, '\"') == nullptr)
647
0
        return CSVSplitLine(pszLine, pszDelimiter, bKeepLeadingAndClosingQuotes,
648
0
                            bMergeDelimiter);
649
650
0
    const size_t nDelimiterLength = strlen(pszDelimiter);
651
0
    bool bInString = false;           // keep in that scope !
652
0
    std::string osWorkLine(pszLine);  // keep in that scope !
653
0
    size_t i = 0;                     // keep in that scope !
654
655
0
    try
656
0
    {
657
0
        while (true)
658
0
        {
659
0
            for (; i < osWorkLine.size(); ++i)
660
0
            {
661
0
                if (osWorkLine[i] == '\"')
662
0
                {
663
0
                    if (!bInString)
664
0
                    {
665
                        // Only consider " as the start of a quoted string
666
                        // if it is the first character of the line, or
667
                        // if it is immediately after the field delimiter.
668
0
                        if (i == 0 ||
669
0
                            (i >= nDelimiterLength &&
670
0
                             osWorkLine.compare(i - nDelimiterLength,
671
0
                                                nDelimiterLength, pszDelimiter,
672
0
                                                nDelimiterLength) == 0))
673
0
                        {
674
0
                            bInString = true;
675
0
                        }
676
0
                    }
677
0
                    else if (i + 1 < osWorkLine.size() &&
678
0
                             osWorkLine[i + 1] == '"')
679
0
                    {
680
                        // Escaped double quote in a quoted string
681
0
                        ++i;
682
0
                    }
683
0
                    else
684
0
                    {
685
0
                        bInString = false;
686
0
                    }
687
0
                }
688
0
            }
689
690
0
            if (!bInString)
691
0
            {
692
0
                return CSVSplitLine(osWorkLine.c_str(), pszDelimiter,
693
0
                                    bKeepLeadingAndClosingQuotes,
694
0
                                    bMergeDelimiter);
695
0
            }
696
697
0
            const char *pszNewLine = pfnReadLine(fp, nMaxLineSize);
698
0
            if (pszNewLine == nullptr)
699
0
                break;
700
701
0
            osWorkLine.append("\n");
702
0
            osWorkLine.append(pszNewLine);
703
0
        }
704
0
    }
705
0
    catch (const std::exception &e)
706
0
    {
707
0
        CPLError(CE_Failure, CPLE_OutOfMemory, "%s", e.what());
708
0
    }
709
710
0
    if (bInString)
711
0
    {
712
0
        CPLError(CE_Failure, CPLE_AppDefined,
713
0
                 "CSV file has unbalanced number of double-quotes. Corrupted "
714
0
                 "data will likely be returned");
715
0
    }
716
717
0
    return nullptr;
718
0
}
719
720
/************************************************************************/
721
/*                          CSVReadParseLine()                          */
722
/*                                                                      */
723
/*      Read one line, and return split into fields.  The return        */
724
/*      result is a stringlist, in the sense of the CSL functions.      */
725
/*                                                                      */
726
/*      Deprecated.  Replaced by CSVReadParseLineL().                   */
727
/************************************************************************/
728
729
char **CSVReadParseLine(FILE *fp)
730
0
{
731
0
    return CSVReadParseLine2(fp, ',');
732
0
}
733
734
static const char *ReadLineClassicalFile(void *fp, size_t /* nMaxLineSize */)
735
0
{
736
0
    return CPLReadLine(static_cast<FILE *>(fp));
737
0
}
738
739
char **CSVReadParseLine2(FILE *fp, char chDelimiter)
740
0
{
741
0
    CPLAssert(fp != nullptr);
742
0
    if (fp == nullptr)
743
0
        return nullptr;
744
745
0
    char szDelimiter[2] = {chDelimiter, 0};
746
0
    return CSVReadParseLineGeneric(fp, ReadLineClassicalFile,
747
0
                                   0,  // nMaxLineSize,
748
0
                                   szDelimiter,
749
0
                                   true,   // bHonourStrings
750
0
                                   false,  // bKeepLeadingAndClosingQuotes
751
0
                                   false,  // bMergeDelimiter
752
0
                                   true /* bSkipBOM */);
753
0
}
754
755
/************************************************************************/
756
/*                          CSVReadParseLineL()                         */
757
/*                                                                      */
758
/*      Read one line, and return split into fields.  The return        */
759
/*      result is a stringlist, in the sense of the CSL functions.      */
760
/*                                                                      */
761
/*      Replaces CSVReadParseLine().  These functions use the VSI       */
762
/*      layer to allow reading from other file containers.              */
763
/************************************************************************/
764
765
char **CSVReadParseLineL(VSILFILE *fp)
766
0
{
767
0
    return CSVReadParseLine2L(fp, ',');
768
0
}
769
770
char **CSVReadParseLine2L(VSILFILE *fp, char chDelimiter)
771
772
0
{
773
0
    CPLAssert(fp != nullptr);
774
0
    if (fp == nullptr)
775
0
        return nullptr;
776
777
0
    char szDelimiter[2] = {chDelimiter, 0};
778
0
    return CSVReadParseLine3L(fp,
779
0
                              0,  // nMaxLineSize
780
0
                              szDelimiter,
781
0
                              true,   // bHonourStrings
782
0
                              false,  // bKeepLeadingAndClosingQuotes
783
0
                              false,  // bMergeDelimiter
784
0
                              true /* bSkipBOM */);
785
0
}
786
787
/************************************************************************/
788
/*                      ReadLineLargeFile()                             */
789
/************************************************************************/
790
791
static const char *ReadLineLargeFile(void *fp, size_t nMaxLineSize)
792
0
{
793
0
    int nBufLength = 0;
794
0
    return CPLReadLine3L(static_cast<VSILFILE *>(fp),
795
0
                         nMaxLineSize == 0 ? -1
796
0
                                           : static_cast<int>(nMaxLineSize),
797
0
                         &nBufLength, nullptr);
798
0
}
799
800
/************************************************************************/
801
/*                      CSVReadParseLine3L()                            */
802
/*                                                                      */
803
/*      Read one line, and return split into fields.  The return        */
804
/*      result is a stringlist, in the sense of the CSL functions.      */
805
/************************************************************************/
806
807
/** Read one line, and return split into fields.
808
 * The return result is a stringlist, in the sense of the CSL functions.
809
 *
810
 * @param fp File handle. Must not be NULL
811
 * @param nMaxLineSize Maximum line size, or 0 for unlimited.
812
 * @param pszDelimiter Delimiter sequence for readers (can be multiple bytes)
813
 * @param bHonourStrings Should be true, unless double quotes should not be
814
 *                       considered when separating fields.
815
 * @param bKeepLeadingAndClosingQuotes Whether the leading and closing double
816
 *                                     quote characters should be kept.
817
 * @param bMergeDelimiter Whether consecutive delimiters should be considered
818
 *                        as a single one. Should generally be set to false.
819
 * @param bSkipBOM Whether leading UTF-8 BOM should be skipped.
820
 */
821
char **CSVReadParseLine3L(VSILFILE *fp, size_t nMaxLineSize,
822
                          const char *pszDelimiter, bool bHonourStrings,
823
                          bool bKeepLeadingAndClosingQuotes,
824
                          bool bMergeDelimiter, bool bSkipBOM)
825
826
0
{
827
0
    return CSVReadParseLineGeneric(
828
0
        fp, ReadLineLargeFile, nMaxLineSize, pszDelimiter, bHonourStrings,
829
0
        bKeepLeadingAndClosingQuotes, bMergeDelimiter, bSkipBOM);
830
0
}
831
832
/************************************************************************/
833
/*                             CSVCompare()                             */
834
/*                                                                      */
835
/*      Compare a field to a search value using a particular            */
836
/*      criteria.                                                       */
837
/************************************************************************/
838
839
static bool CSVCompare(const char *pszFieldValue, const char *pszTarget,
840
                       CSVCompareCriteria eCriteria)
841
842
0
{
843
0
    if (eCriteria == CC_ExactString)
844
0
    {
845
0
        return (strcmp(pszFieldValue, pszTarget) == 0);
846
0
    }
847
0
    else if (eCriteria == CC_ApproxString)
848
0
    {
849
0
        return EQUAL(pszFieldValue, pszTarget);
850
0
    }
851
0
    else if (eCriteria == CC_Integer)
852
0
    {
853
0
        return (CPLGetValueType(pszFieldValue) == CPL_VALUE_INTEGER &&
854
0
                atoi(pszFieldValue) == atoi(pszTarget));
855
0
    }
856
857
0
    return false;
858
0
}
859
860
/************************************************************************/
861
/*                            CSVScanLines()                            */
862
/*                                                                      */
863
/*      Read the file scanline for lines where the key field equals     */
864
/*      the indicated value with the suggested comparison criteria.     */
865
/*      Return the first matching line split into fields.               */
866
/*                                                                      */
867
/*      Deprecated.  Replaced by CSVScanLinesL().                       */
868
/************************************************************************/
869
870
char **CSVScanLines(FILE *fp, int iKeyField, const char *pszValue,
871
                    CSVCompareCriteria eCriteria)
872
873
0
{
874
0
    CPLAssert(pszValue != nullptr);
875
0
    CPLAssert(iKeyField >= 0);
876
0
    CPLAssert(fp != nullptr);
877
878
0
    bool bSelected = false;
879
0
    const int nTestValue = atoi(pszValue);
880
0
    char **papszFields = nullptr;
881
882
0
    while (!bSelected)
883
0
    {
884
0
        papszFields = CSVReadParseLine(fp);
885
0
        if (papszFields == nullptr)
886
0
            return nullptr;
887
888
0
        if (CSLCount(papszFields) < iKeyField + 1)
889
0
        {
890
            /* not selected */
891
0
        }
892
0
        else if (eCriteria == CC_Integer &&
893
0
                 atoi(papszFields[iKeyField]) == nTestValue)
894
0
        {
895
0
            bSelected = true;
896
0
        }
897
0
        else
898
0
        {
899
0
            bSelected = CSVCompare(papszFields[iKeyField], pszValue, eCriteria);
900
0
        }
901
902
0
        if (!bSelected)
903
0
        {
904
0
            CSLDestroy(papszFields);
905
0
            papszFields = nullptr;
906
0
        }
907
0
    }
908
909
0
    return papszFields;
910
0
}
911
912
/************************************************************************/
913
/*                            CSVScanLinesL()                           */
914
/*                                                                      */
915
/*      Read the file scanline for lines where the key field equals     */
916
/*      the indicated value with the suggested comparison criteria.     */
917
/*      Return the first matching line split into fields.               */
918
/************************************************************************/
919
920
char **CSVScanLinesL(VSILFILE *fp, int iKeyField, const char *pszValue,
921
                     CSVCompareCriteria eCriteria)
922
923
0
{
924
0
    CPLAssert(pszValue != nullptr);
925
0
    CPLAssert(iKeyField >= 0);
926
0
    CPLAssert(fp != nullptr);
927
928
0
    bool bSelected = false;
929
0
    const int nTestValue = atoi(pszValue);
930
0
    char **papszFields = nullptr;
931
932
0
    while (!bSelected)
933
0
    {
934
0
        papszFields = CSVReadParseLineL(fp);
935
0
        if (papszFields == nullptr)
936
0
            return nullptr;
937
938
0
        if (CSLCount(papszFields) < iKeyField + 1)
939
0
        {
940
            /* not selected */
941
0
        }
942
0
        else if (eCriteria == CC_Integer &&
943
0
                 atoi(papszFields[iKeyField]) == nTestValue)
944
0
        {
945
0
            bSelected = true;
946
0
        }
947
0
        else
948
0
        {
949
0
            bSelected = CSVCompare(papszFields[iKeyField], pszValue, eCriteria);
950
0
        }
951
952
0
        if (!bSelected)
953
0
        {
954
0
            CSLDestroy(papszFields);
955
0
            papszFields = nullptr;
956
0
        }
957
0
    }
958
959
0
    return papszFields;
960
0
}
961
962
/************************************************************************/
963
/*                        CSVScanLinesIndexed()                         */
964
/*                                                                      */
965
/*      Read the file scanline for lines where the key field equals     */
966
/*      the indicated value with the suggested comparison criteria.     */
967
/*      Return the first matching line split into fields.               */
968
/************************************************************************/
969
970
static char **CSVScanLinesIndexed(CSVTable *psTable, int nKeyValue)
971
972
0
{
973
0
    CPLAssert(psTable->panLineIndex != nullptr);
974
975
    /* -------------------------------------------------------------------- */
976
    /*      Find target record with binary search.                          */
977
    /* -------------------------------------------------------------------- */
978
0
    int iTop = psTable->nLineCount - 1;
979
0
    int iBottom = 0;
980
0
    int iResult = -1;
981
982
0
    while (iTop >= iBottom)
983
0
    {
984
0
        const int iMiddle = (iTop + iBottom) / 2;
985
0
        if (psTable->panLineIndex[iMiddle] > nKeyValue)
986
0
            iTop = iMiddle - 1;
987
0
        else if (psTable->panLineIndex[iMiddle] < nKeyValue)
988
0
            iBottom = iMiddle + 1;
989
0
        else
990
0
        {
991
0
            iResult = iMiddle;
992
            // if a key is not unique, select the first instance of it.
993
0
            while (iResult > 0 &&
994
0
                   psTable->panLineIndex[iResult - 1] == nKeyValue)
995
0
            {
996
0
                psTable->bNonUniqueKey = true;
997
0
                iResult--;
998
0
            }
999
0
            break;
1000
0
        }
1001
0
    }
1002
1003
0
    if (iResult == -1)
1004
0
        return nullptr;
1005
1006
    /* -------------------------------------------------------------------- */
1007
    /*      Parse target line, and update iLastLine indicator.              */
1008
    /* -------------------------------------------------------------------- */
1009
0
    psTable->iLastLine = iResult;
1010
1011
0
    return CSVSplitLine(psTable->papszLines[iResult], ",", false, false);
1012
0
}
1013
1014
/************************************************************************/
1015
/*                        CSVScanLinesIngested()                        */
1016
/*                                                                      */
1017
/*      Read the file scanline for lines where the key field equals     */
1018
/*      the indicated value with the suggested comparison criteria.     */
1019
/*      Return the first matching line split into fields.               */
1020
/************************************************************************/
1021
1022
static char **CSVScanLinesIngested(CSVTable *psTable, int iKeyField,
1023
                                   const char *pszValue,
1024
                                   CSVCompareCriteria eCriteria)
1025
1026
0
{
1027
0
    CPLAssert(pszValue != nullptr);
1028
0
    CPLAssert(iKeyField >= 0);
1029
1030
0
    const int nTestValue = atoi(pszValue);
1031
1032
    /* -------------------------------------------------------------------- */
1033
    /*      Short cut for indexed files.                                    */
1034
    /* -------------------------------------------------------------------- */
1035
0
    if (iKeyField == 0 && eCriteria == CC_Integer &&
1036
0
        psTable->panLineIndex != nullptr)
1037
0
        return CSVScanLinesIndexed(psTable, nTestValue);
1038
1039
    /* -------------------------------------------------------------------- */
1040
    /*      Scan from in-core lines.                                        */
1041
    /* -------------------------------------------------------------------- */
1042
0
    char **papszFields = nullptr;
1043
0
    bool bSelected = false;
1044
1045
0
    while (!bSelected && psTable->iLastLine + 1 < psTable->nLineCount)
1046
0
    {
1047
0
        psTable->iLastLine++;
1048
0
        papszFields = CSVSplitLine(psTable->papszLines[psTable->iLastLine], ",",
1049
0
                                   false, false);
1050
1051
0
        if (CSLCount(papszFields) < iKeyField + 1)
1052
0
        {
1053
            /* not selected */
1054
0
        }
1055
0
        else if (eCriteria == CC_Integer &&
1056
0
                 atoi(papszFields[iKeyField]) == nTestValue)
1057
0
        {
1058
0
            bSelected = true;
1059
0
        }
1060
0
        else
1061
0
        {
1062
0
            bSelected = CSVCompare(papszFields[iKeyField], pszValue, eCriteria);
1063
0
        }
1064
1065
0
        if (!bSelected)
1066
0
        {
1067
0
            CSLDestroy(papszFields);
1068
0
            papszFields = nullptr;
1069
0
        }
1070
0
    }
1071
1072
0
    return papszFields;
1073
0
}
1074
1075
/************************************************************************/
1076
/*                            CSVRewind()                               */
1077
/*                                                                      */
1078
/*      Rewind a CSV file based on a passed in filename.                */
1079
/*      This is aimed at being used with CSVGetNextLine().              */
1080
/************************************************************************/
1081
1082
void CSVRewind(const char *pszFilename)
1083
1084
0
{
1085
    /* -------------------------------------------------------------------- */
1086
    /*      Get access to the table.                                        */
1087
    /* -------------------------------------------------------------------- */
1088
0
    CPLAssert(pszFilename != nullptr);
1089
1090
0
    CSVTable *const psTable = CSVAccess(pszFilename);
1091
0
    if (psTable != nullptr)
1092
0
        psTable->iLastLine = -1;
1093
0
}
1094
1095
/************************************************************************/
1096
/*                           CSVGetNextLine()                           */
1097
/*                                                                      */
1098
/*      Fetch the next line of a CSV file based on a passed in          */
1099
/*      filename.  Returns NULL at end of file, or if file is not       */
1100
/*      really established.                                             */
1101
/*      This ingests the whole file into memory if not already done.    */
1102
/*      When reaching end of file, CSVRewind() may be used to read      */
1103
/*      again from the beginning.                                       */
1104
/************************************************************************/
1105
1106
char **CSVGetNextLine(const char *pszFilename)
1107
1108
0
{
1109
1110
    /* -------------------------------------------------------------------- */
1111
    /*      Get access to the table.                                        */
1112
    /* -------------------------------------------------------------------- */
1113
0
    CPLAssert(pszFilename != nullptr);
1114
1115
0
    CSVTable *const psTable = CSVAccess(pszFilename);
1116
0
    if (psTable == nullptr)
1117
0
        return nullptr;
1118
1119
0
    CSVIngest(psTable->pszFilename);
1120
1121
    /* -------------------------------------------------------------------- */
1122
    /*      If we use CSVGetNextLine() we can pretty much assume we have    */
1123
    /*      a non-unique key.                                               */
1124
    /* -------------------------------------------------------------------- */
1125
0
    psTable->bNonUniqueKey = true;
1126
1127
    /* -------------------------------------------------------------------- */
1128
    /*      Do we have a next line available?  This only works for          */
1129
    /*      ingested tables I believe.                                      */
1130
    /* -------------------------------------------------------------------- */
1131
0
    if (psTable->iLastLine + 1 >= psTable->nLineCount)
1132
0
        return nullptr;
1133
1134
0
    psTable->iLastLine++;
1135
0
    CSLDestroy(psTable->papszRecFields);
1136
0
    psTable->papszRecFields = CSVSplitLine(
1137
0
        psTable->papszLines[psTable->iLastLine], ",", false, false);
1138
1139
0
    return psTable->papszRecFields;
1140
0
}
1141
1142
/************************************************************************/
1143
/*                            CSVScanFile()                             */
1144
/*                                                                      */
1145
/*      Scan a whole file using criteria similar to above, but also     */
1146
/*      taking care of file opening and closing.                        */
1147
/************************************************************************/
1148
1149
static char **CSVScanFile(CSVTable *const psTable, int iKeyField,
1150
                          const char *pszValue, CSVCompareCriteria eCriteria)
1151
0
{
1152
0
    CSVIngest(psTable->pszFilename);
1153
1154
    /* -------------------------------------------------------------------- */
1155
    /*      Does the current record match the criteria?  If so, just        */
1156
    /*      return it again.                                                */
1157
    /* -------------------------------------------------------------------- */
1158
0
    if (iKeyField >= 0 && iKeyField < CSLCount(psTable->papszRecFields) &&
1159
0
        CSVCompare(psTable->papszRecFields[iKeyField], pszValue, eCriteria) &&
1160
0
        !psTable->bNonUniqueKey)
1161
0
    {
1162
0
        return psTable->papszRecFields;
1163
0
    }
1164
1165
    /* -------------------------------------------------------------------- */
1166
    /*      Scan the file from the beginning, replacing the ``current       */
1167
    /*      record'' in our structure with the one that is found.           */
1168
    /* -------------------------------------------------------------------- */
1169
0
    psTable->iLastLine = -1;
1170
0
    CSLDestroy(psTable->papszRecFields);
1171
1172
0
    if (psTable->pszRawData != nullptr)
1173
0
        psTable->papszRecFields =
1174
0
            CSVScanLinesIngested(psTable, iKeyField, pszValue, eCriteria);
1175
0
    else
1176
0
    {
1177
0
        VSIRewindL(psTable->fp);
1178
0
        CPLReadLineL(psTable->fp); /* throw away the header line */
1179
1180
0
        psTable->papszRecFields =
1181
0
            CSVScanLinesL(psTable->fp, iKeyField, pszValue, eCriteria);
1182
0
    }
1183
1184
0
    return psTable->papszRecFields;
1185
0
}
1186
1187
char **CSVScanFile(const char *pszFilename, int iKeyField, const char *pszValue,
1188
                   CSVCompareCriteria eCriteria)
1189
1190
0
{
1191
    /* -------------------------------------------------------------------- */
1192
    /*      Get access to the table.                                        */
1193
    /* -------------------------------------------------------------------- */
1194
0
    CPLAssert(pszFilename != nullptr);
1195
1196
0
    if (iKeyField < 0)
1197
0
        return nullptr;
1198
1199
0
    CSVTable *const psTable = CSVAccess(pszFilename);
1200
0
    if (psTable == nullptr)
1201
0
        return nullptr;
1202
1203
0
    return CSVScanFile(psTable, iKeyField, pszValue, eCriteria);
1204
0
}
1205
1206
/************************************************************************/
1207
/*                           CPLGetFieldId()                            */
1208
/*                                                                      */
1209
/*      Read the first record of a CSV file (rewinding to be sure),     */
1210
/*      and find the field with the indicated name.  Returns -1 if      */
1211
/*      it fails to find the field name.  Comparison is case            */
1212
/*      insensitive, but otherwise exact.  After this function has      */
1213
/*      been called the file pointer will be positioned just after      */
1214
/*      the first record.                                               */
1215
/*                                                                      */
1216
/*      Deprecated.  Replaced by CPLGetFieldIdL().                      */
1217
/************************************************************************/
1218
1219
int CSVGetFieldId(FILE *fp, const char *pszFieldName)
1220
1221
0
{
1222
0
    CPLAssert(fp != nullptr && pszFieldName != nullptr);
1223
1224
0
    VSIRewind(fp);
1225
1226
0
    char **papszFields = CSVReadParseLine(fp);
1227
0
    for (int i = 0; papszFields != nullptr && papszFields[i] != nullptr; i++)
1228
0
    {
1229
0
        if (EQUAL(papszFields[i], pszFieldName))
1230
0
        {
1231
0
            CSLDestroy(papszFields);
1232
0
            return i;
1233
0
        }
1234
0
    }
1235
1236
0
    CSLDestroy(papszFields);
1237
1238
0
    return -1;
1239
0
}
1240
1241
/************************************************************************/
1242
/*                           CPLGetFieldIdL()                           */
1243
/*                                                                      */
1244
/*      Read the first record of a CSV file (rewinding to be sure),     */
1245
/*      and find the field with the indicated name.  Returns -1 if      */
1246
/*      it fails to find the field name.  Comparison is case            */
1247
/*      insensitive, but otherwise exact.  After this function has      */
1248
/*      been called the file pointer will be positioned just after      */
1249
/*      the first record.                                               */
1250
/************************************************************************/
1251
1252
int CSVGetFieldIdL(VSILFILE *fp, const char *pszFieldName)
1253
1254
0
{
1255
0
    CPLAssert(fp != nullptr && pszFieldName != nullptr);
1256
1257
0
    VSIRewindL(fp);
1258
1259
0
    char **papszFields = CSVReadParseLineL(fp);
1260
0
    for (int i = 0; papszFields != nullptr && papszFields[i] != nullptr; i++)
1261
0
    {
1262
0
        if (EQUAL(papszFields[i], pszFieldName))
1263
0
        {
1264
0
            CSLDestroy(papszFields);
1265
0
            return i;
1266
0
        }
1267
0
    }
1268
1269
0
    CSLDestroy(papszFields);
1270
1271
0
    return -1;
1272
0
}
1273
1274
/************************************************************************/
1275
/*                         CSVGetFileFieldId()                          */
1276
/*                                                                      */
1277
/*      Same as CPLGetFieldId(), except that we get the file based      */
1278
/*      on filename, rather than having an existing handle.             */
1279
/************************************************************************/
1280
1281
static int CSVGetFileFieldId(CSVTable *const psTable, const char *pszFieldName)
1282
1283
0
{
1284
    /* -------------------------------------------------------------------- */
1285
    /*      Find the requested field.                                       */
1286
    /* -------------------------------------------------------------------- */
1287
0
    const int nFieldNameLength = static_cast<int>(strlen(pszFieldName));
1288
0
    for (int i = 0; psTable->papszFieldNames != nullptr &&
1289
0
                    psTable->papszFieldNames[i] != nullptr;
1290
0
         i++)
1291
0
    {
1292
0
        if (psTable->panFieldNamesLength[i] == nFieldNameLength &&
1293
0
            EQUALN(psTable->papszFieldNames[i], pszFieldName, nFieldNameLength))
1294
0
        {
1295
0
            return i;
1296
0
        }
1297
0
    }
1298
1299
0
    return -1;
1300
0
}
1301
1302
int CSVGetFileFieldId(const char *pszFilename, const char *pszFieldName)
1303
1304
0
{
1305
    /* -------------------------------------------------------------------- */
1306
    /*      Get access to the table.                                        */
1307
    /* -------------------------------------------------------------------- */
1308
0
    CPLAssert(pszFilename != nullptr);
1309
1310
0
    CSVTable *const psTable = CSVAccess(pszFilename);
1311
0
    if (psTable == nullptr)
1312
0
        return -1;
1313
0
    return CSVGetFileFieldId(psTable, pszFieldName);
1314
0
}
1315
1316
/************************************************************************/
1317
/*                         CSVScanFileByName()                          */
1318
/*                                                                      */
1319
/*      Same as CSVScanFile(), but using a field name instead of a      */
1320
/*      field number.                                                   */
1321
/************************************************************************/
1322
1323
char **CSVScanFileByName(const char *pszFilename, const char *pszKeyFieldName,
1324
                         const char *pszValue, CSVCompareCriteria eCriteria)
1325
1326
0
{
1327
0
    const int iKeyField = CSVGetFileFieldId(pszFilename, pszKeyFieldName);
1328
0
    if (iKeyField == -1)
1329
0
        return nullptr;
1330
1331
0
    return CSVScanFile(pszFilename, iKeyField, pszValue, eCriteria);
1332
0
}
1333
1334
/************************************************************************/
1335
/*                            CSVGetField()                             */
1336
/*                                                                      */
1337
/*      The all-in-one function to fetch a particular field value       */
1338
/*      from a CSV file.  Note this function will return an empty       */
1339
/*      string, rather than NULL if it fails to find the desired        */
1340
/*      value for some reason.  The caller can't establish that the     */
1341
/*      fetch failed.                                                   */
1342
/************************************************************************/
1343
1344
const char *CSVGetField(const char *pszFilename, const char *pszKeyFieldName,
1345
                        const char *pszKeyFieldValue,
1346
                        CSVCompareCriteria eCriteria,
1347
                        const char *pszTargetField)
1348
1349
0
{
1350
    /* -------------------------------------------------------------------- */
1351
    /*      Find the table.                                                 */
1352
    /* -------------------------------------------------------------------- */
1353
0
    CSVTable *const psTable = CSVAccess(pszFilename);
1354
0
    if (psTable == nullptr)
1355
0
        return "";
1356
1357
0
    const int iKeyField = CSVGetFileFieldId(psTable, pszKeyFieldName);
1358
0
    if (iKeyField == -1)
1359
0
        return "";
1360
1361
    /* -------------------------------------------------------------------- */
1362
    /*      Find the correct record.                                        */
1363
    /* -------------------------------------------------------------------- */
1364
0
    char **papszRecord =
1365
0
        CSVScanFile(psTable, iKeyField, pszKeyFieldValue, eCriteria);
1366
0
    if (papszRecord == nullptr)
1367
0
        return "";
1368
1369
    /* -------------------------------------------------------------------- */
1370
    /*      Figure out which field we want out of this.                     */
1371
    /* -------------------------------------------------------------------- */
1372
0
    const int iTargetField = CSVGetFileFieldId(psTable, pszTargetField);
1373
0
    if (iTargetField < 0)
1374
0
        return "";
1375
1376
0
    for (int i = 0; papszRecord[i] != nullptr; ++i)
1377
0
    {
1378
0
        if (i == iTargetField)
1379
0
            return papszRecord[iTargetField];
1380
0
    }
1381
0
    return "";
1382
0
}
1383
1384
/************************************************************************/
1385
/*                       GDALDefaultCSVFilename()                       */
1386
/************************************************************************/
1387
1388
typedef struct
1389
{
1390
    char szPath[512];
1391
    bool bCSVFinderInitialized;
1392
} DefaultCSVFileNameTLS;
1393
1394
const char *GDALDefaultCSVFilename(const char *pszBasename)
1395
1396
0
{
1397
    /* -------------------------------------------------------------------- */
1398
    /*      Do we already have this file accessed?  If so, just return      */
1399
    /*      the existing path without any further probing.                  */
1400
    /* -------------------------------------------------------------------- */
1401
0
    int bMemoryError = FALSE;
1402
0
    CSVTable **ppsCSVTableList =
1403
0
        static_cast<CSVTable **>(CPLGetTLSEx(CTLS_CSVTABLEPTR, &bMemoryError));
1404
0
    if (ppsCSVTableList != nullptr)
1405
0
    {
1406
0
        const size_t nBasenameLen = strlen(pszBasename);
1407
1408
0
        for (const CSVTable *psTable = *ppsCSVTableList; psTable != nullptr;
1409
0
             psTable = psTable->psNext)
1410
0
        {
1411
0
            const size_t nFullLen = strlen(psTable->pszFilename);
1412
1413
0
            if (nFullLen > nBasenameLen &&
1414
0
                strcmp(psTable->pszFilename + nFullLen - nBasenameLen,
1415
0
                       pszBasename) == 0 &&
1416
0
                strchr("/\\",
1417
0
                       psTable->pszFilename[+nFullLen - nBasenameLen - 1]) !=
1418
0
                    nullptr)
1419
0
            {
1420
0
                return psTable->pszFilename;
1421
0
            }
1422
0
        }
1423
0
    }
1424
1425
    /* -------------------------------------------------------------------- */
1426
    /*      Otherwise we need to look harder for it.                        */
1427
    /* -------------------------------------------------------------------- */
1428
0
    DefaultCSVFileNameTLS *pTLSData = static_cast<DefaultCSVFileNameTLS *>(
1429
0
        CPLGetTLSEx(CTLS_CSVDEFAULTFILENAME, &bMemoryError));
1430
0
    if (pTLSData == nullptr && !bMemoryError)
1431
0
    {
1432
0
        pTLSData = static_cast<DefaultCSVFileNameTLS *>(
1433
0
            VSI_CALLOC_VERBOSE(1, sizeof(DefaultCSVFileNameTLS)));
1434
0
        if (pTLSData)
1435
0
            CPLSetTLS(CTLS_CSVDEFAULTFILENAME, pTLSData, TRUE);
1436
0
    }
1437
0
    if (pTLSData == nullptr)
1438
0
        return "/not_existing_dir/not_existing_path";
1439
1440
0
    const char *pszResult = CPLFindFile("gdal", pszBasename);
1441
1442
0
    if (pszResult != nullptr)
1443
0
        return pszResult;
1444
1445
0
    if (!pTLSData->bCSVFinderInitialized)
1446
0
    {
1447
0
        pTLSData->bCSVFinderInitialized = true;
1448
1449
0
        if (CPLGetConfigOption("GDAL_DATA", nullptr) != nullptr)
1450
0
            CPLPushFinderLocation(CPLGetConfigOption("GDAL_DATA", nullptr));
1451
1452
0
        pszResult = CPLFindFile("gdal", pszBasename);
1453
1454
0
        if (pszResult != nullptr)
1455
0
            return pszResult;
1456
0
    }
1457
1458
    // For systems like sandboxes that do not allow other checks.
1459
0
    CPLDebug("CPL_CSV",
1460
0
             "Failed to find file in GDALDefaultCSVFilename.  "
1461
0
             "Returning original basename: %s",
1462
0
             pszBasename);
1463
0
    CPLStrlcpy(pTLSData->szPath, pszBasename, sizeof(pTLSData->szPath));
1464
0
    return pTLSData->szPath;
1465
0
}
1466
1467
/************************************************************************/
1468
/*                            CSVFilename()                             */
1469
/*                                                                      */
1470
/*      Return the full path to a particular CSV file.  This will       */
1471
/*      eventually be something the application can override.           */
1472
/************************************************************************/
1473
1474
CPL_C_START
1475
static const char *(*pfnCSVFilenameHook)(const char *) = nullptr;
1476
CPL_C_END
1477
1478
const char *CSVFilename(const char *pszBasename)
1479
1480
0
{
1481
0
    if (pfnCSVFilenameHook == nullptr)
1482
0
        return GDALDefaultCSVFilename(pszBasename);
1483
1484
0
    return pfnCSVFilenameHook(pszBasename);
1485
0
}
1486
1487
/************************************************************************/
1488
/*                         SetCSVFilenameHook()                         */
1489
/*                                                                      */
1490
/*      Applications can use this to set a function that will           */
1491
/*      massage CSV filenames.                                          */
1492
/************************************************************************/
1493
1494
/**
1495
 * Override CSV file search method.
1496
 *
1497
 * @param pfnNewHook The pointer to a function which will return the
1498
 * full path for a given filename.
1499
 *
1500
1501
This function allows an application to override how the GTIFGetDefn()
1502
and related function find the CSV (Comma Separated Value) values
1503
required. The pfnHook argument should be a pointer to a function that
1504
will take in a CSV filename and return a full path to the file. The
1505
returned string should be to an internal static buffer so that the
1506
caller doesn't have to free the result.
1507
1508
Example:
1509
1510
The listgeo utility uses the following override function if the user
1511
specified a CSV file directory with the -t commandline switch (argument
1512
put into CSVDirName).
1513
1514
\code{.cpp}
1515
1516
    ...
1517
    SetCSVFilenameHook( CSVFileOverride );
1518
    ...
1519
1520
static const char *CSVFileOverride( const char * pszInput )
1521
1522
{
1523
    static char szPath[1024] = {};
1524
1525
    sprintf( szPath, "%s/%s", CSVDirName, pszInput );
1526
1527
    return szPath;
1528
}
1529
\endcode
1530
1531
*/
1532
1533
CPL_C_START
1534
void SetCSVFilenameHook(const char *(*pfnNewHook)(const char *))
1535
1536
0
{
1537
0
    pfnCSVFilenameHook = pfnNewHook;
1538
0
}
1539
1540
CPL_C_END