/src/gdal/port/cpl_csv.cpp
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /******************************************************************************  | 
2  |  |  *  | 
3  |  |  * Project:  CPL - Common Portability Library  | 
4  |  |  * Purpose:  CSV (comma separated value) file access.  | 
5  |  |  * Author:   Frank Warmerdam, warmerdam@pobox.com  | 
6  |  |  *  | 
7  |  |  ******************************************************************************  | 
8  |  |  * Copyright (c) 1999, Frank Warmerdam  | 
9  |  |  * Copyright (c) 2009-2012, Even Rouault <even dot rouault at spatialys.com>  | 
10  |  |  *  | 
11  |  |  * SPDX-License-Identifier: MIT  | 
12  |  |  ****************************************************************************/  | 
13  |  |  | 
14  |  | #include "cpl_port.h"  | 
15  |  | #include "cpl_csv.h"  | 
16  |  |  | 
17  |  | #include <cstddef>  | 
18  |  | #include <cstdlib>  | 
19  |  | #include <cstring>  | 
20  |  | #if HAVE_FCNTL_H  | 
21  |  | #include <fcntl.h>  | 
22  |  | #endif  | 
23  |  |  | 
24  |  | #include "cpl_conv.h"  | 
25  |  | #include "cpl_error.h"  | 
26  |  | #include "cpl_multiproc.h"  | 
27  |  | #include "gdal_csv.h"  | 
28  |  |  | 
29  |  | #include <algorithm>  | 
30  |  |  | 
31  |  | /* ==================================================================== */  | 
32  |  | /*      The CSVTable is a persistent set of info about an open CSV      */  | 
33  |  | /*      table.  While it doesn't currently maintain a record index,     */  | 
34  |  | /*      or in-memory copy of the table, it could be changed to do so    */  | 
35  |  | /*      in the future.                                                  */  | 
36  |  | /* ==================================================================== */  | 
37  |  | typedef struct ctb  | 
38  |  | { | 
39  |  |     VSILFILE *fp;  | 
40  |  |     struct ctb *psNext;  | 
41  |  |     char *pszFilename;  | 
42  |  |     char **papszFieldNames;  | 
43  |  |     int *panFieldNamesLength;  | 
44  |  |     char **papszRecFields;  | 
45  |  |     int nFields;  | 
46  |  |     int iLastLine;  | 
47  |  |     bool bNonUniqueKey;  | 
48  |  |  | 
49  |  |     /* Cache for whole file */  | 
50  |  |     int nLineCount;  | 
51  |  |     char **papszLines;  | 
52  |  |     int *panLineIndex;  | 
53  |  |     char *pszRawData;  | 
54  |  | } CSVTable;  | 
55  |  |  | 
56  |  | static void CSVDeaccessInternal(CSVTable **ppsCSVTableList, bool bCanUseTLS,  | 
57  |  |                                 const char *pszFilename);  | 
58  |  |  | 
59  |  | /************************************************************************/  | 
60  |  | /*                            CSVFreeTLS()                              */  | 
61  |  | /************************************************************************/  | 
62  |  | static void CSVFreeTLS(void *pData)  | 
63  | 0  | { | 
64  | 0  |     CSVDeaccessInternal(static_cast<CSVTable **>(pData), false, nullptr);  | 
65  | 0  |     CPLFree(pData);  | 
66  | 0  | }  | 
67  |  |  | 
68  |  | /* It would likely be better to share this list between threads, but  | 
69  |  |    that will require some rework. */  | 
70  |  |  | 
71  |  | /************************************************************************/  | 
72  |  | /*                             CSVAccess()                              */  | 
73  |  | /*                                                                      */  | 
74  |  | /*      This function will fetch a handle to the requested table.       */  | 
75  |  | /*      If not found in the ``open table list'' the table will be       */  | 
76  |  | /*      opened and added to the list.  Eventually this function may     */  | 
77  |  | /*      become public with an abstracted return type so that            */  | 
78  |  | /*      applications can set options about the table.  For now this     */  | 
79  |  | /*      isn't done.                                                     */  | 
80  |  | /************************************************************************/  | 
81  |  |  | 
82  |  | static CSVTable *CSVAccess(const char *pszFilename)  | 
83  |  |  | 
84  | 0  | { | 
85  |  |     /* -------------------------------------------------------------------- */  | 
86  |  |     /*      Fetch the table, and allocate the thread-local pointer to it    */  | 
87  |  |     /*      if there isn't already one.                                     */  | 
88  |  |     /* -------------------------------------------------------------------- */  | 
89  | 0  |     int bMemoryError = FALSE;  | 
90  | 0  |     CSVTable **ppsCSVTableList =  | 
91  | 0  |         static_cast<CSVTable **>(CPLGetTLSEx(CTLS_CSVTABLEPTR, &bMemoryError));  | 
92  | 0  |     if (bMemoryError)  | 
93  | 0  |         return nullptr;  | 
94  | 0  |     if (ppsCSVTableList == nullptr)  | 
95  | 0  |     { | 
96  | 0  |         ppsCSVTableList =  | 
97  | 0  |             static_cast<CSVTable **>(VSI_CALLOC_VERBOSE(1, sizeof(CSVTable *)));  | 
98  | 0  |         if (ppsCSVTableList == nullptr)  | 
99  | 0  |             return nullptr;  | 
100  | 0  |         CPLSetTLSWithFreeFunc(CTLS_CSVTABLEPTR, ppsCSVTableList, CSVFreeTLS);  | 
101  | 0  |     }  | 
102  |  |  | 
103  |  |     /* -------------------------------------------------------------------- */  | 
104  |  |     /*      Is the table already in the list.                               */  | 
105  |  |     /* -------------------------------------------------------------------- */  | 
106  | 0  |     for (CSVTable *psTable = *ppsCSVTableList; psTable != nullptr;  | 
107  | 0  |          psTable = psTable->psNext)  | 
108  | 0  |     { | 
109  | 0  |         if (EQUAL(psTable->pszFilename, pszFilename))  | 
110  | 0  |         { | 
111  |  |             /*  | 
112  |  |              * Eventually we should consider promoting to the front of  | 
113  |  |              * the list to accelerate frequently accessed tables.  | 
114  |  |              */  | 
115  | 0  |             return psTable;  | 
116  | 0  |         }  | 
117  | 0  |     }  | 
118  |  |  | 
119  |  |     /* -------------------------------------------------------------------- */  | 
120  |  |     /*      If not, try to open it.                                         */  | 
121  |  |     /* -------------------------------------------------------------------- */  | 
122  | 0  |     VSILFILE *fp = VSIFOpenL(pszFilename, "rb");  | 
123  | 0  |     if (fp == nullptr)  | 
124  | 0  |         return nullptr;  | 
125  |  |  | 
126  |  |     /* -------------------------------------------------------------------- */  | 
127  |  |     /*      Create an information structure about this table, and add to    */  | 
128  |  |     /*      the front of the list.                                          */  | 
129  |  |     /* -------------------------------------------------------------------- */  | 
130  | 0  |     CSVTable *const psTable =  | 
131  | 0  |         static_cast<CSVTable *>(VSI_CALLOC_VERBOSE(sizeof(CSVTable), 1));  | 
132  | 0  |     if (psTable == nullptr)  | 
133  | 0  |     { | 
134  | 0  |         VSIFCloseL(fp);  | 
135  | 0  |         return nullptr;  | 
136  | 0  |     }  | 
137  |  |  | 
138  | 0  |     psTable->fp = fp;  | 
139  | 0  |     psTable->pszFilename = VSI_STRDUP_VERBOSE(pszFilename);  | 
140  | 0  |     if (psTable->pszFilename == nullptr)  | 
141  | 0  |     { | 
142  | 0  |         VSIFree(psTable);  | 
143  | 0  |         VSIFCloseL(fp);  | 
144  | 0  |         return nullptr;  | 
145  | 0  |     }  | 
146  | 0  |     psTable->bNonUniqueKey = false;  // As far as we know now.  | 
147  | 0  |     psTable->psNext = *ppsCSVTableList;  | 
148  |  | 
  | 
149  | 0  |     *ppsCSVTableList = psTable;  | 
150  |  |  | 
151  |  |     /* -------------------------------------------------------------------- */  | 
152  |  |     /*      Read the table header record containing the field names.        */  | 
153  |  |     /* -------------------------------------------------------------------- */  | 
154  | 0  |     psTable->papszFieldNames = CSVReadParseLineL(fp);  | 
155  | 0  |     psTable->nFields = CSLCount(psTable->papszFieldNames);  | 
156  | 0  |     psTable->panFieldNamesLength =  | 
157  | 0  |         static_cast<int *>(CPLMalloc(sizeof(int) * psTable->nFields));  | 
158  | 0  |     for (int i = 0;  | 
159  | 0  |          i < psTable->nFields &&  | 
160  |  |          /* null-pointer check to avoid a false positive from CLang S.A. */  | 
161  | 0  |          psTable->papszFieldNames != nullptr;  | 
162  | 0  |          i++)  | 
163  | 0  |     { | 
164  | 0  |         psTable->panFieldNamesLength[i] =  | 
165  | 0  |             static_cast<int>(strlen(psTable->papszFieldNames[i]));  | 
166  | 0  |     }  | 
167  |  | 
  | 
168  | 0  |     return psTable;  | 
169  | 0  | }  | 
170  |  |  | 
171  |  | /************************************************************************/  | 
172  |  | /*                            CSVDeaccess()                             */  | 
173  |  | /************************************************************************/  | 
174  |  |  | 
175  |  | static void CSVDeaccessInternal(CSVTable **ppsCSVTableList, bool bCanUseTLS,  | 
176  |  |                                 const char *pszFilename)  | 
177  |  |  | 
178  | 0  | { | 
179  | 0  |     if (ppsCSVTableList == nullptr)  | 
180  | 0  |         return;  | 
181  |  |  | 
182  |  |     /* -------------------------------------------------------------------- */  | 
183  |  |     /*      A NULL means deaccess all tables.                               */  | 
184  |  |     /* -------------------------------------------------------------------- */  | 
185  | 0  |     if (pszFilename == nullptr)  | 
186  | 0  |     { | 
187  | 0  |         while (*ppsCSVTableList != nullptr)  | 
188  | 0  |             CSVDeaccessInternal(ppsCSVTableList, bCanUseTLS,  | 
189  | 0  |                                 (*ppsCSVTableList)->pszFilename);  | 
190  |  | 
  | 
191  | 0  |         return;  | 
192  | 0  |     }  | 
193  |  |  | 
194  |  |     /* -------------------------------------------------------------------- */  | 
195  |  |     /*      Find this table.                                                */  | 
196  |  |     /* -------------------------------------------------------------------- */  | 
197  | 0  |     CSVTable *psLast = nullptr;  | 
198  | 0  |     CSVTable *psTable = *ppsCSVTableList;  | 
199  | 0  |     for (; psTable != nullptr && !EQUAL(psTable->pszFilename, pszFilename);  | 
200  | 0  |          psTable = psTable->psNext)  | 
201  | 0  |     { | 
202  | 0  |         psLast = psTable;  | 
203  | 0  |     }  | 
204  |  | 
  | 
205  | 0  |     if (psTable == nullptr)  | 
206  | 0  |     { | 
207  | 0  |         if (bCanUseTLS)  | 
208  | 0  |             CPLDebug("CPL_CSV", "CPLDeaccess( %s ) - no match.", pszFilename); | 
209  | 0  |         return;  | 
210  | 0  |     }  | 
211  |  |  | 
212  |  |     /* -------------------------------------------------------------------- */  | 
213  |  |     /*      Remove the link from the list.                                  */  | 
214  |  |     /* -------------------------------------------------------------------- */  | 
215  | 0  |     if (psLast != nullptr)  | 
216  | 0  |         psLast->psNext = psTable->psNext;  | 
217  | 0  |     else  | 
218  | 0  |         *ppsCSVTableList = psTable->psNext;  | 
219  |  |  | 
220  |  |     /* -------------------------------------------------------------------- */  | 
221  |  |     /*      Free the table.                                                 */  | 
222  |  |     /* -------------------------------------------------------------------- */  | 
223  | 0  |     if (psTable->fp != nullptr)  | 
224  | 0  |         VSIFCloseL(psTable->fp);  | 
225  |  | 
  | 
226  | 0  |     CSLDestroy(psTable->papszFieldNames);  | 
227  | 0  |     CPLFree(psTable->panFieldNamesLength);  | 
228  | 0  |     CSLDestroy(psTable->papszRecFields);  | 
229  | 0  |     CPLFree(psTable->pszFilename);  | 
230  | 0  |     CPLFree(psTable->panLineIndex);  | 
231  | 0  |     CPLFree(psTable->pszRawData);  | 
232  | 0  |     CPLFree(psTable->papszLines);  | 
233  |  | 
  | 
234  | 0  |     CPLFree(psTable);  | 
235  |  | 
  | 
236  | 0  |     if (bCanUseTLS)  | 
237  | 0  |         CPLReadLine(nullptr);  | 
238  | 0  | }  | 
239  |  |  | 
240  |  | void CSVDeaccess(const char *pszFilename)  | 
241  | 0  | { | 
242  |  |     /* -------------------------------------------------------------------- */  | 
243  |  |     /*      Fetch the table, and allocate the thread-local pointer to it    */  | 
244  |  |     /*      if there isn't already one.                                     */  | 
245  |  |     /* -------------------------------------------------------------------- */  | 
246  | 0  |     int bMemoryError = FALSE;  | 
247  | 0  |     CSVTable **ppsCSVTableList =  | 
248  | 0  |         static_cast<CSVTable **>(CPLGetTLSEx(CTLS_CSVTABLEPTR, &bMemoryError));  | 
249  |  | 
  | 
250  | 0  |     CSVDeaccessInternal(ppsCSVTableList, true, pszFilename);  | 
251  | 0  | }  | 
252  |  |  | 
253  |  | /************************************************************************/  | 
254  |  | /*                            CSVSplitLine()                            */  | 
255  |  | /*                                                                      */  | 
256  |  | /*      Tokenize a CSV line into fields in the form of a string         */  | 
257  |  | /*      list.  This is used instead of the CPLTokenizeString()          */  | 
258  |  | /*      because it provides correct CSV escaping and quoting            */  | 
259  |  | /*      semantics.                                                      */  | 
260  |  | /************************************************************************/  | 
261  |  |  | 
262  |  | static char **CSVSplitLine(const char *pszString, const char *pszDelimiter,  | 
263  |  |                            bool bKeepLeadingAndClosingQuotes,  | 
264  |  |                            bool bMergeDelimiter)  | 
265  |  |  | 
266  | 0  | { | 
267  | 0  |     CPLStringList aosRetList;  | 
268  | 0  |     if (pszString == nullptr)  | 
269  | 0  |         return static_cast<char **>(CPLCalloc(sizeof(char *), 1));  | 
270  |  |  | 
271  | 0  |     char *pszToken = static_cast<char *>(CPLCalloc(10, 1));  | 
272  | 0  |     int nTokenMax = 10;  | 
273  | 0  |     const size_t nDelimiterLength = strlen(pszDelimiter);  | 
274  |  | 
  | 
275  | 0  |     const char *pszIter = pszString;  | 
276  | 0  |     while (*pszIter != '\0')  | 
277  | 0  |     { | 
278  | 0  |         bool bInString = false;  | 
279  |  | 
  | 
280  | 0  |         int nTokenLen = 0;  | 
281  |  |  | 
282  |  |         // Try to find the next delimiter, marking end of token.  | 
283  | 0  |         do  | 
284  | 0  |         { | 
285  |  |             // End if this is a delimiter skip it and break.  | 
286  | 0  |             if (!bInString &&  | 
287  | 0  |                 strncmp(pszIter, pszDelimiter, nDelimiterLength) == 0)  | 
288  | 0  |             { | 
289  | 0  |                 pszIter += nDelimiterLength;  | 
290  | 0  |                 if (bMergeDelimiter)  | 
291  | 0  |                 { | 
292  | 0  |                     while (strncmp(pszIter, pszDelimiter, nDelimiterLength) ==  | 
293  | 0  |                            0)  | 
294  | 0  |                         pszIter += nDelimiterLength;  | 
295  | 0  |                 }  | 
296  | 0  |                 break;  | 
297  | 0  |             }  | 
298  |  |  | 
299  | 0  |             if (*pszIter == '"')  | 
300  | 0  |             { | 
301  | 0  |                 if (!bInString && nTokenLen > 0)  | 
302  | 0  |                 { | 
303  |  |                     // do not treat in a special way double quotes that appear  | 
304  |  |                     // in the middle of a field (similarly to OpenOffice)  | 
305  |  |                     // Like in records: 1,50°46'06.6"N 116°42'04.4,foo  | 
306  | 0  |                 }  | 
307  | 0  |                 else if (!bInString || pszIter[1] != '"')  | 
308  | 0  |                 { | 
309  | 0  |                     bInString = !bInString;  | 
310  | 0  |                     if (!bKeepLeadingAndClosingQuotes)  | 
311  | 0  |                         continue;  | 
312  | 0  |                 }  | 
313  | 0  |                 else  // Doubled quotes in string resolve to one quote.  | 
314  | 0  |                 { | 
315  | 0  |                     pszIter++;  | 
316  | 0  |                 }  | 
317  | 0  |             }  | 
318  |  |  | 
319  | 0  |             if (nTokenLen >= nTokenMax - 2)  | 
320  | 0  |             { | 
321  | 0  |                 nTokenMax = nTokenMax * 2 + 10;  | 
322  | 0  |                 pszToken = static_cast<char *>(CPLRealloc(pszToken, nTokenMax));  | 
323  | 0  |             }  | 
324  |  | 
  | 
325  | 0  |             pszToken[nTokenLen] = *pszIter;  | 
326  | 0  |             nTokenLen++;  | 
327  | 0  |         } while (*(++pszIter) != '\0');  | 
328  |  |  | 
329  | 0  |         pszToken[nTokenLen] = '\0';  | 
330  | 0  |         aosRetList.AddString(pszToken);  | 
331  |  |  | 
332  |  |         // If the last token is an empty token, then we have to catch  | 
333  |  |         // it now, otherwise we won't reenter the loop and it will be lost.  | 
334  | 0  |         if (*pszIter == '\0' &&  | 
335  | 0  |             pszIter - pszString >= static_cast<int>(nDelimiterLength) &&  | 
336  | 0  |             strncmp(pszIter - nDelimiterLength, pszDelimiter,  | 
337  | 0  |                     nDelimiterLength) == 0)  | 
338  | 0  |         { | 
339  | 0  |             aosRetList.AddString(""); | 
340  | 0  |         }  | 
341  | 0  |     }  | 
342  |  | 
  | 
343  | 0  |     CPLFree(pszToken);  | 
344  |  | 
  | 
345  | 0  |     if (aosRetList.Count() == 0)  | 
346  | 0  |         return static_cast<char **>(CPLCalloc(sizeof(char *), 1));  | 
347  | 0  |     else  | 
348  | 0  |         return aosRetList.StealList();  | 
349  | 0  | }  | 
350  |  |  | 
351  |  | /************************************************************************/  | 
352  |  | /*                          CSVFindNextLine()                           */  | 
353  |  | /*                                                                      */  | 
354  |  | /*      Find the start of the next line, while at the same time zero    */  | 
355  |  | /*      terminating this line.  Take into account that there may be     */  | 
356  |  | /*      newline indicators within quoted strings, and that quotes       */  | 
357  |  | /*      can be escaped with a backslash.                                */  | 
358  |  | /************************************************************************/  | 
359  |  |  | 
360  |  | static char *CSVFindNextLine(char *pszThisLine)  | 
361  |  |  | 
362  | 0  | { | 
363  | 0  |     int i = 0;  // i is used after the for loop.  | 
364  |  | 
  | 
365  | 0  |     for (int nQuoteCount = 0; pszThisLine[i] != '\0'; i++)  | 
366  | 0  |     { | 
367  | 0  |         if (pszThisLine[i] == '\"' && (i == 0 || pszThisLine[i - 1] != '\\'))  | 
368  | 0  |             nQuoteCount++;  | 
369  |  | 
  | 
370  | 0  |         if ((pszThisLine[i] == 10 || pszThisLine[i] == 13) &&  | 
371  | 0  |             (nQuoteCount % 2) == 0)  | 
372  | 0  |             break;  | 
373  | 0  |     }  | 
374  |  | 
  | 
375  | 0  |     while (pszThisLine[i] == 10 || pszThisLine[i] == 13)  | 
376  | 0  |         pszThisLine[i++] = '\0';  | 
377  |  | 
  | 
378  | 0  |     if (pszThisLine[i] == '\0')  | 
379  | 0  |         return nullptr;  | 
380  |  |  | 
381  | 0  |     return pszThisLine + i;  | 
382  | 0  | }  | 
383  |  |  | 
384  |  | /************************************************************************/  | 
385  |  | /*                             CSVIngest()                              */  | 
386  |  | /*                                                                      */  | 
387  |  | /*      Load entire file into memory and setup index if possible.       */  | 
388  |  | /************************************************************************/  | 
389  |  |  | 
390  |  | // TODO(schwehr): Clean up all the casting in CSVIngest.  | 
391  |  | static void CSVIngest(CSVTable *psTable)  | 
392  |  |  | 
393  | 0  | { | 
394  | 0  |     if (psTable->pszRawData != nullptr)  | 
395  | 0  |         return;  | 
396  |  |  | 
397  |  |     /* -------------------------------------------------------------------- */  | 
398  |  |     /*      Ingest whole file.                                              */  | 
399  |  |     /* -------------------------------------------------------------------- */  | 
400  | 0  |     if (VSIFSeekL(psTable->fp, 0, SEEK_END) != 0)  | 
401  | 0  |     { | 
402  | 0  |         CPLError(CE_Failure, CPLE_FileIO,  | 
403  | 0  |                  "Failed using seek end and tell to get file length: %s",  | 
404  | 0  |                  psTable->pszFilename);  | 
405  | 0  |         return;  | 
406  | 0  |     }  | 
407  | 0  |     const vsi_l_offset nFileLen = VSIFTellL(psTable->fp);  | 
408  | 0  |     if (static_cast<long>(nFileLen) == -1)  | 
409  | 0  |     { | 
410  | 0  |         CPLError(CE_Failure, CPLE_FileIO,  | 
411  | 0  |                  "Failed using seek end and tell to get file length: %s",  | 
412  | 0  |                  psTable->pszFilename);  | 
413  | 0  |         return;  | 
414  | 0  |     }  | 
415  | 0  |     VSIRewindL(psTable->fp);  | 
416  |  | 
  | 
417  | 0  |     psTable->pszRawData = static_cast<char *>(  | 
418  | 0  |         VSI_MALLOC_VERBOSE(static_cast<size_t>(nFileLen) + 1));  | 
419  | 0  |     if (psTable->pszRawData == nullptr)  | 
420  | 0  |         return;  | 
421  | 0  |     if (VSIFReadL(psTable->pszRawData, 1, static_cast<size_t>(nFileLen),  | 
422  | 0  |                   psTable->fp) != static_cast<size_t>(nFileLen))  | 
423  | 0  |     { | 
424  | 0  |         CPLFree(psTable->pszRawData);  | 
425  | 0  |         psTable->pszRawData = nullptr;  | 
426  |  | 
  | 
427  | 0  |         CPLError(CE_Failure, CPLE_FileIO, "Read of file %s failed.",  | 
428  | 0  |                  psTable->pszFilename);  | 
429  | 0  |         return;  | 
430  | 0  |     }  | 
431  |  |  | 
432  | 0  |     psTable->pszRawData[nFileLen] = '\0';  | 
433  |  |  | 
434  |  |     /* -------------------------------------------------------------------- */  | 
435  |  |     /*      Get count of newlines so we can allocate line array.            */  | 
436  |  |     /* -------------------------------------------------------------------- */  | 
437  | 0  |     int nMaxLineCount = 0;  | 
438  | 0  |     for (int i = 0; i < static_cast<int>(nFileLen); i++)  | 
439  | 0  |     { | 
440  | 0  |         if (psTable->pszRawData[i] == 10)  | 
441  | 0  |             nMaxLineCount++;  | 
442  | 0  |     }  | 
443  |  | 
  | 
444  | 0  |     psTable->papszLines =  | 
445  | 0  |         static_cast<char **>(VSI_CALLOC_VERBOSE(sizeof(char *), nMaxLineCount));  | 
446  | 0  |     if (psTable->papszLines == nullptr)  | 
447  | 0  |         return;  | 
448  |  |  | 
449  |  |     /* -------------------------------------------------------------------- */  | 
450  |  |     /*      Build a list of record pointers into the raw data buffer        */  | 
451  |  |     /*      based on line terminators.  Zero terminate the line             */  | 
452  |  |     /*      strings.                                                        */  | 
453  |  |     /* -------------------------------------------------------------------- */  | 
454  |  |     /* skip header line */  | 
455  | 0  |     char *pszThisLine = CSVFindNextLine(psTable->pszRawData);  | 
456  |  | 
  | 
457  | 0  |     int iLine = 0;  | 
458  | 0  |     while (pszThisLine != nullptr && iLine < nMaxLineCount)  | 
459  | 0  |     { | 
460  | 0  |         if (pszThisLine[0] != '#')  | 
461  | 0  |             psTable->papszLines[iLine++] = pszThisLine;  | 
462  | 0  |         pszThisLine = CSVFindNextLine(pszThisLine);  | 
463  | 0  |     }  | 
464  |  | 
  | 
465  | 0  |     psTable->nLineCount = iLine;  | 
466  |  |  | 
467  |  |     /* -------------------------------------------------------------------- */  | 
468  |  |     /*      Allocate and populate index array.  Ensure they are in          */  | 
469  |  |     /*      ascending order so that binary searches can be done on the      */  | 
470  |  |     /*      array.                                                          */  | 
471  |  |     /* -------------------------------------------------------------------- */  | 
472  | 0  |     psTable->panLineIndex = static_cast<int *>(  | 
473  | 0  |         VSI_MALLOC_VERBOSE(sizeof(int) * psTable->nLineCount));  | 
474  | 0  |     if (psTable->panLineIndex == nullptr)  | 
475  | 0  |         return;  | 
476  |  |  | 
477  | 0  |     for (int i = 0; i < psTable->nLineCount; i++)  | 
478  | 0  |     { | 
479  | 0  |         psTable->panLineIndex[i] = atoi(psTable->papszLines[i]);  | 
480  |  | 
  | 
481  | 0  |         if (i > 0 && psTable->panLineIndex[i] < psTable->panLineIndex[i - 1])  | 
482  | 0  |         { | 
483  | 0  |             CPLFree(psTable->panLineIndex);  | 
484  | 0  |             psTable->panLineIndex = nullptr;  | 
485  | 0  |             break;  | 
486  | 0  |         }  | 
487  | 0  |     }  | 
488  |  | 
  | 
489  | 0  |     psTable->iLastLine = -1;  | 
490  |  |  | 
491  |  |     /* -------------------------------------------------------------------- */  | 
492  |  |     /*      We should never need the file handle against, so close it.      */  | 
493  |  |     /* -------------------------------------------------------------------- */  | 
494  | 0  |     VSIFCloseL(psTable->fp);  | 
495  | 0  |     psTable->fp = nullptr;  | 
496  | 0  | }  | 
497  |  |  | 
498  |  | static void CSVIngest(const char *pszFilename)  | 
499  |  |  | 
500  | 0  | { | 
501  | 0  |     CSVTable *psTable = CSVAccess(pszFilename);  | 
502  | 0  |     if (psTable == nullptr)  | 
503  | 0  |     { | 
504  | 0  |         CPLError(CE_Failure, CPLE_FileIO, "Failed to open file: %s",  | 
505  | 0  |                  pszFilename);  | 
506  | 0  |         return;  | 
507  | 0  |     }  | 
508  | 0  |     CSVIngest(psTable);  | 
509  | 0  | }  | 
510  |  |  | 
511  |  | /************************************************************************/  | 
512  |  | /*                        CSVDetectSeperator()                          */  | 
513  |  | /************************************************************************/  | 
514  |  |  | 
515  |  | /** Detect which field separator is used.  | 
516  |  |  *  | 
517  |  |  * Currently, it can detect comma, semicolon, space, tabulation or pipe.  | 
518  |  |  * In case of ambiguity, starting with GDAL 3.7.1, the separator with the  | 
519  |  |  * most occurrences will be selected (and a warning emitted).  | 
520  |  |  * If no separator found, comma will be considered as the separator.  | 
521  |  |  *  | 
522  |  |  * @return ',', ';', ' ', tabulation character or '|'.  | 
523  |  |  */  | 
524  |  | char CSVDetectSeperator(const char *pszLine)  | 
525  | 0  | { | 
526  | 0  |     bool bInString = false;  | 
527  | 0  |     int nCountComma = 0;  | 
528  | 0  |     int nCountSemicolon = 0;  | 
529  | 0  |     int nCountTab = 0;  | 
530  | 0  |     int nCountPipe = 0;  | 
531  | 0  |     int nCountSpace = 0;  | 
532  |  | 
  | 
533  | 0  |     for (; *pszLine != '\0'; pszLine++)  | 
534  | 0  |     { | 
535  | 0  |         if (!bInString && *pszLine == ',')  | 
536  | 0  |         { | 
537  | 0  |             nCountComma++;  | 
538  | 0  |         }  | 
539  | 0  |         else if (!bInString && *pszLine == ';')  | 
540  | 0  |         { | 
541  | 0  |             nCountSemicolon++;  | 
542  | 0  |         }  | 
543  | 0  |         else if (!bInString && *pszLine == '\t')  | 
544  | 0  |         { | 
545  | 0  |             nCountTab++;  | 
546  | 0  |         }  | 
547  | 0  |         else if (!bInString && *pszLine == '|')  | 
548  | 0  |         { | 
549  | 0  |             nCountPipe++;  | 
550  | 0  |         }  | 
551  | 0  |         else if (!bInString && *pszLine == ' ')  | 
552  | 0  |         { | 
553  | 0  |             nCountSpace++;  | 
554  | 0  |         }  | 
555  | 0  |         else if (*pszLine == '"')  | 
556  | 0  |         { | 
557  | 0  |             if (!bInString || pszLine[1] != '"')  | 
558  | 0  |             { | 
559  | 0  |                 bInString = !bInString;  | 
560  | 0  |                 continue;  | 
561  | 0  |             }  | 
562  | 0  |             else /* doubled quotes in string resolve to one quote */  | 
563  | 0  |             { | 
564  | 0  |                 pszLine++;  | 
565  | 0  |             }  | 
566  | 0  |         }  | 
567  | 0  |     }  | 
568  |  | 
  | 
569  | 0  |     const int nMaxCountExceptSpace =  | 
570  | 0  |         std::max(std::max(nCountComma, nCountSemicolon),  | 
571  | 0  |                  std::max(nCountTab, nCountPipe));  | 
572  | 0  |     char chDelimiter = ',';  | 
573  | 0  |     if (nMaxCountExceptSpace == 0)  | 
574  | 0  |     { | 
575  | 0  |         if (nCountSpace > 0)  | 
576  | 0  |             chDelimiter = ' ';  | 
577  | 0  |     }  | 
578  | 0  |     else  | 
579  | 0  |     { | 
580  | 0  |         bool bWarn = false;  | 
581  | 0  |         if (nCountComma == nMaxCountExceptSpace)  | 
582  | 0  |         { | 
583  | 0  |             chDelimiter = ',';  | 
584  | 0  |             bWarn = (nCountSemicolon > 0 || nCountTab > 0 || nCountPipe > 0);  | 
585  | 0  |         }  | 
586  | 0  |         else if (nCountSemicolon == nMaxCountExceptSpace)  | 
587  | 0  |         { | 
588  | 0  |             chDelimiter = ';';  | 
589  | 0  |             bWarn = (nCountComma > 0 || nCountTab > 0 || nCountPipe > 0);  | 
590  | 0  |         }  | 
591  | 0  |         else if (nCountTab == nMaxCountExceptSpace)  | 
592  | 0  |         { | 
593  | 0  |             chDelimiter = '\t';  | 
594  | 0  |             bWarn = (nCountComma > 0 || nCountSemicolon > 0 || nCountPipe > 0);  | 
595  | 0  |         }  | 
596  | 0  |         else /* if( nCountPipe == nMaxCountExceptSpace ) */  | 
597  | 0  |         { | 
598  | 0  |             chDelimiter = '|';  | 
599  | 0  |             bWarn = (nCountComma > 0 || nCountSemicolon > 0 || nCountTab > 0);  | 
600  | 0  |         }  | 
601  | 0  |         if (bWarn)  | 
602  | 0  |         { | 
603  | 0  |             CPLError(CE_Warning, CPLE_AppDefined,  | 
604  | 0  |                      "Selecting '%c' as CSV field separator, but "  | 
605  | 0  |                      "other candidate separator(s) have been found.",  | 
606  | 0  |                      chDelimiter);  | 
607  | 0  |         }  | 
608  | 0  |     }  | 
609  |  | 
  | 
610  | 0  |     return chDelimiter;  | 
611  | 0  | }  | 
612  |  |  | 
613  |  | /************************************************************************/  | 
614  |  | /*                      CSVReadParseLine3L()                            */  | 
615  |  | /*                                                                      */  | 
616  |  | /*      Read one line, and return split into fields.  The return        */  | 
617  |  | /*      result is a stringlist, in the sense of the CSL functions.      */  | 
618  |  | /************************************************************************/  | 
619  |  |  | 
620  |  | static char **  | 
621  |  | CSVReadParseLineGeneric(void *fp, const char *(*pfnReadLine)(void *, size_t),  | 
622  |  |                         size_t nMaxLineSize, const char *pszDelimiter,  | 
623  |  |                         bool bHonourStrings, bool bKeepLeadingAndClosingQuotes,  | 
624  |  |                         bool bMergeDelimiter, bool bSkipBOM)  | 
625  | 0  | { | 
626  | 0  |     const char *pszLine = pfnReadLine(fp, nMaxLineSize);  | 
627  | 0  |     if (pszLine == nullptr)  | 
628  | 0  |         return nullptr;  | 
629  |  |  | 
630  | 0  |     if (bSkipBOM)  | 
631  | 0  |     { | 
632  |  |         // Skip BOM.  | 
633  | 0  |         const GByte *pabyData = reinterpret_cast<const GByte *>(pszLine);  | 
634  | 0  |         if (pabyData[0] == 0xEF && pabyData[1] == 0xBB && pabyData[2] == 0xBF)  | 
635  | 0  |             pszLine += 3;  | 
636  | 0  |     }  | 
637  |  |  | 
638  |  |     // Special fix to read NdfcFacilities.xls with un-balanced double quotes.  | 
639  | 0  |     if (!bHonourStrings)  | 
640  | 0  |     { | 
641  | 0  |         return CSLTokenizeStringComplex(pszLine, pszDelimiter, FALSE, TRUE);  | 
642  | 0  |     }  | 
643  |  |  | 
644  |  |     // If there are no quotes, then this is the simple case.  | 
645  |  |     // Parse, and return tokens.  | 
646  | 0  |     if (strchr(pszLine, '\"') == nullptr)  | 
647  | 0  |         return CSVSplitLine(pszLine, pszDelimiter, bKeepLeadingAndClosingQuotes,  | 
648  | 0  |                             bMergeDelimiter);  | 
649  |  |  | 
650  | 0  |     const size_t nDelimiterLength = strlen(pszDelimiter);  | 
651  | 0  |     bool bInString = false;           // keep in that scope !  | 
652  | 0  |     std::string osWorkLine(pszLine);  // keep in that scope !  | 
653  | 0  |     size_t i = 0;                     // keep in that scope !  | 
654  |  | 
  | 
655  | 0  |     try  | 
656  | 0  |     { | 
657  | 0  |         while (true)  | 
658  | 0  |         { | 
659  | 0  |             for (; i < osWorkLine.size(); ++i)  | 
660  | 0  |             { | 
661  | 0  |                 if (osWorkLine[i] == '\"')  | 
662  | 0  |                 { | 
663  | 0  |                     if (!bInString)  | 
664  | 0  |                     { | 
665  |  |                         // Only consider " as the start of a quoted string  | 
666  |  |                         // if it is the first character of the line, or  | 
667  |  |                         // if it is immediately after the field delimiter.  | 
668  | 0  |                         if (i == 0 ||  | 
669  | 0  |                             (i >= nDelimiterLength &&  | 
670  | 0  |                              osWorkLine.compare(i - nDelimiterLength,  | 
671  | 0  |                                                 nDelimiterLength, pszDelimiter,  | 
672  | 0  |                                                 nDelimiterLength) == 0))  | 
673  | 0  |                         { | 
674  | 0  |                             bInString = true;  | 
675  | 0  |                         }  | 
676  | 0  |                     }  | 
677  | 0  |                     else if (i + 1 < osWorkLine.size() &&  | 
678  | 0  |                              osWorkLine[i + 1] == '"')  | 
679  | 0  |                     { | 
680  |  |                         // Escaped double quote in a quoted string  | 
681  | 0  |                         ++i;  | 
682  | 0  |                     }  | 
683  | 0  |                     else  | 
684  | 0  |                     { | 
685  | 0  |                         bInString = false;  | 
686  | 0  |                     }  | 
687  | 0  |                 }  | 
688  | 0  |             }  | 
689  |  | 
  | 
690  | 0  |             if (!bInString)  | 
691  | 0  |             { | 
692  | 0  |                 return CSVSplitLine(osWorkLine.c_str(), pszDelimiter,  | 
693  | 0  |                                     bKeepLeadingAndClosingQuotes,  | 
694  | 0  |                                     bMergeDelimiter);  | 
695  | 0  |             }  | 
696  |  |  | 
697  | 0  |             const char *pszNewLine = pfnReadLine(fp, nMaxLineSize);  | 
698  | 0  |             if (pszNewLine == nullptr)  | 
699  | 0  |                 break;  | 
700  |  |  | 
701  | 0  |             osWorkLine.append("\n"); | 
702  | 0  |             osWorkLine.append(pszNewLine);  | 
703  | 0  |         }  | 
704  | 0  |     }  | 
705  | 0  |     catch (const std::exception &e)  | 
706  | 0  |     { | 
707  | 0  |         CPLError(CE_Failure, CPLE_OutOfMemory, "%s", e.what());  | 
708  | 0  |     }  | 
709  |  |  | 
710  | 0  |     if (bInString)  | 
711  | 0  |     { | 
712  | 0  |         CPLError(CE_Failure, CPLE_AppDefined,  | 
713  | 0  |                  "CSV file has unbalanced number of double-quotes. Corrupted "  | 
714  | 0  |                  "data will likely be returned");  | 
715  | 0  |     }  | 
716  |  | 
  | 
717  | 0  |     return nullptr;  | 
718  | 0  | }  | 
719  |  |  | 
720  |  | /************************************************************************/  | 
721  |  | /*                          CSVReadParseLine()                          */  | 
722  |  | /*                                                                      */  | 
723  |  | /*      Read one line, and return split into fields.  The return        */  | 
724  |  | /*      result is a stringlist, in the sense of the CSL functions.      */  | 
725  |  | /*                                                                      */  | 
726  |  | /*      Deprecated.  Replaced by CSVReadParseLineL().                   */  | 
727  |  | /************************************************************************/  | 
728  |  |  | 
729  |  | char **CSVReadParseLine(FILE *fp)  | 
730  | 0  | { | 
731  | 0  |     return CSVReadParseLine2(fp, ',');  | 
732  | 0  | }  | 
733  |  |  | 
734  |  | static const char *ReadLineClassicalFile(void *fp, size_t /* nMaxLineSize */)  | 
735  | 0  | { | 
736  | 0  |     return CPLReadLine(static_cast<FILE *>(fp));  | 
737  | 0  | }  | 
738  |  |  | 
739  |  | char **CSVReadParseLine2(FILE *fp, char chDelimiter)  | 
740  | 0  | { | 
741  | 0  |     CPLAssert(fp != nullptr);  | 
742  | 0  |     if (fp == nullptr)  | 
743  | 0  |         return nullptr;  | 
744  |  |  | 
745  | 0  |     char szDelimiter[2] = {chDelimiter, 0}; | 
746  | 0  |     return CSVReadParseLineGeneric(fp, ReadLineClassicalFile,  | 
747  | 0  |                                    0,  // nMaxLineSize,  | 
748  | 0  |                                    szDelimiter,  | 
749  | 0  |                                    true,   // bHonourStrings  | 
750  | 0  |                                    false,  // bKeepLeadingAndClosingQuotes  | 
751  | 0  |                                    false,  // bMergeDelimiter  | 
752  | 0  |                                    true /* bSkipBOM */);  | 
753  | 0  | }  | 
754  |  |  | 
755  |  | /************************************************************************/  | 
756  |  | /*                          CSVReadParseLineL()                         */  | 
757  |  | /*                                                                      */  | 
758  |  | /*      Read one line, and return split into fields.  The return        */  | 
759  |  | /*      result is a stringlist, in the sense of the CSL functions.      */  | 
760  |  | /*                                                                      */  | 
761  |  | /*      Replaces CSVReadParseLine().  These functions use the VSI       */  | 
762  |  | /*      layer to allow reading from other file containers.              */  | 
763  |  | /************************************************************************/  | 
764  |  |  | 
765  |  | char **CSVReadParseLineL(VSILFILE *fp)  | 
766  | 0  | { | 
767  | 0  |     return CSVReadParseLine2L(fp, ',');  | 
768  | 0  | }  | 
769  |  |  | 
770  |  | char **CSVReadParseLine2L(VSILFILE *fp, char chDelimiter)  | 
771  |  |  | 
772  | 0  | { | 
773  | 0  |     CPLAssert(fp != nullptr);  | 
774  | 0  |     if (fp == nullptr)  | 
775  | 0  |         return nullptr;  | 
776  |  |  | 
777  | 0  |     char szDelimiter[2] = {chDelimiter, 0}; | 
778  | 0  |     return CSVReadParseLine3L(fp,  | 
779  | 0  |                               0,  // nMaxLineSize  | 
780  | 0  |                               szDelimiter,  | 
781  | 0  |                               true,   // bHonourStrings  | 
782  | 0  |                               false,  // bKeepLeadingAndClosingQuotes  | 
783  | 0  |                               false,  // bMergeDelimiter  | 
784  | 0  |                               true /* bSkipBOM */);  | 
785  | 0  | }  | 
786  |  |  | 
787  |  | /************************************************************************/  | 
788  |  | /*                      ReadLineLargeFile()                             */  | 
789  |  | /************************************************************************/  | 
790  |  |  | 
791  |  | static const char *ReadLineLargeFile(void *fp, size_t nMaxLineSize)  | 
792  | 0  | { | 
793  | 0  |     int nBufLength = 0;  | 
794  | 0  |     return CPLReadLine3L(static_cast<VSILFILE *>(fp),  | 
795  | 0  |                          nMaxLineSize == 0 ? -1  | 
796  | 0  |                                            : static_cast<int>(nMaxLineSize),  | 
797  | 0  |                          &nBufLength, nullptr);  | 
798  | 0  | }  | 
799  |  |  | 
800  |  | /************************************************************************/  | 
801  |  | /*                      CSVReadParseLine3L()                            */  | 
802  |  | /*                                                                      */  | 
803  |  | /*      Read one line, and return split into fields.  The return        */  | 
804  |  | /*      result is a stringlist, in the sense of the CSL functions.      */  | 
805  |  | /************************************************************************/  | 
806  |  |  | 
807  |  | /** Read one line, and return split into fields.  | 
808  |  |  * The return result is a stringlist, in the sense of the CSL functions.  | 
809  |  |  *  | 
810  |  |  * @param fp File handle. Must not be NULL  | 
811  |  |  * @param nMaxLineSize Maximum line size, or 0 for unlimited.  | 
812  |  |  * @param pszDelimiter Delimiter sequence for readers (can be multiple bytes)  | 
813  |  |  * @param bHonourStrings Should be true, unless double quotes should not be  | 
814  |  |  *                       considered when separating fields.  | 
815  |  |  * @param bKeepLeadingAndClosingQuotes Whether the leading and closing double  | 
816  |  |  *                                     quote characters should be kept.  | 
817  |  |  * @param bMergeDelimiter Whether consecutive delimiters should be considered  | 
818  |  |  *                        as a single one. Should generally be set to false.  | 
819  |  |  * @param bSkipBOM Whether leading UTF-8 BOM should be skipped.  | 
820  |  |  */  | 
821  |  | char **CSVReadParseLine3L(VSILFILE *fp, size_t nMaxLineSize,  | 
822  |  |                           const char *pszDelimiter, bool bHonourStrings,  | 
823  |  |                           bool bKeepLeadingAndClosingQuotes,  | 
824  |  |                           bool bMergeDelimiter, bool bSkipBOM)  | 
825  |  |  | 
826  | 0  | { | 
827  | 0  |     return CSVReadParseLineGeneric(  | 
828  | 0  |         fp, ReadLineLargeFile, nMaxLineSize, pszDelimiter, bHonourStrings,  | 
829  | 0  |         bKeepLeadingAndClosingQuotes, bMergeDelimiter, bSkipBOM);  | 
830  | 0  | }  | 
831  |  |  | 
832  |  | /************************************************************************/  | 
833  |  | /*                             CSVCompare()                             */  | 
834  |  | /*                                                                      */  | 
835  |  | /*      Compare a field to a search value using a particular            */  | 
836  |  | /*      criteria.                                                       */  | 
837  |  | /************************************************************************/  | 
838  |  |  | 
839  |  | static bool CSVCompare(const char *pszFieldValue, const char *pszTarget,  | 
840  |  |                        CSVCompareCriteria eCriteria)  | 
841  |  |  | 
842  | 0  | { | 
843  | 0  |     if (eCriteria == CC_ExactString)  | 
844  | 0  |     { | 
845  | 0  |         return (strcmp(pszFieldValue, pszTarget) == 0);  | 
846  | 0  |     }  | 
847  | 0  |     else if (eCriteria == CC_ApproxString)  | 
848  | 0  |     { | 
849  | 0  |         return EQUAL(pszFieldValue, pszTarget);  | 
850  | 0  |     }  | 
851  | 0  |     else if (eCriteria == CC_Integer)  | 
852  | 0  |     { | 
853  | 0  |         return (CPLGetValueType(pszFieldValue) == CPL_VALUE_INTEGER &&  | 
854  | 0  |                 atoi(pszFieldValue) == atoi(pszTarget));  | 
855  | 0  |     }  | 
856  |  |  | 
857  | 0  |     return false;  | 
858  | 0  | }  | 
859  |  |  | 
860  |  | /************************************************************************/  | 
861  |  | /*                            CSVScanLines()                            */  | 
862  |  | /*                                                                      */  | 
863  |  | /*      Read the file scanline for lines where the key field equals     */  | 
864  |  | /*      the indicated value with the suggested comparison criteria.     */  | 
865  |  | /*      Return the first matching line split into fields.               */  | 
866  |  | /*                                                                      */  | 
867  |  | /*      Deprecated.  Replaced by CSVScanLinesL().                       */  | 
868  |  | /************************************************************************/  | 
869  |  |  | 
870  |  | char **CSVScanLines(FILE *fp, int iKeyField, const char *pszValue,  | 
871  |  |                     CSVCompareCriteria eCriteria)  | 
872  |  |  | 
873  | 0  | { | 
874  | 0  |     CPLAssert(pszValue != nullptr);  | 
875  | 0  |     CPLAssert(iKeyField >= 0);  | 
876  | 0  |     CPLAssert(fp != nullptr);  | 
877  |  |  | 
878  | 0  |     bool bSelected = false;  | 
879  | 0  |     const int nTestValue = atoi(pszValue);  | 
880  | 0  |     char **papszFields = nullptr;  | 
881  |  | 
  | 
882  | 0  |     while (!bSelected)  | 
883  | 0  |     { | 
884  | 0  |         papszFields = CSVReadParseLine(fp);  | 
885  | 0  |         if (papszFields == nullptr)  | 
886  | 0  |             return nullptr;  | 
887  |  |  | 
888  | 0  |         if (CSLCount(papszFields) < iKeyField + 1)  | 
889  | 0  |         { | 
890  |  |             /* not selected */  | 
891  | 0  |         }  | 
892  | 0  |         else if (eCriteria == CC_Integer &&  | 
893  | 0  |                  atoi(papszFields[iKeyField]) == nTestValue)  | 
894  | 0  |         { | 
895  | 0  |             bSelected = true;  | 
896  | 0  |         }  | 
897  | 0  |         else  | 
898  | 0  |         { | 
899  | 0  |             bSelected = CSVCompare(papszFields[iKeyField], pszValue, eCriteria);  | 
900  | 0  |         }  | 
901  |  | 
  | 
902  | 0  |         if (!bSelected)  | 
903  | 0  |         { | 
904  | 0  |             CSLDestroy(papszFields);  | 
905  | 0  |             papszFields = nullptr;  | 
906  | 0  |         }  | 
907  | 0  |     }  | 
908  |  |  | 
909  | 0  |     return papszFields;  | 
910  | 0  | }  | 
911  |  |  | 
912  |  | /************************************************************************/  | 
913  |  | /*                            CSVScanLinesL()                           */  | 
914  |  | /*                                                                      */  | 
915  |  | /*      Read the file scanline for lines where the key field equals     */  | 
916  |  | /*      the indicated value with the suggested comparison criteria.     */  | 
917  |  | /*      Return the first matching line split into fields.               */  | 
918  |  | /************************************************************************/  | 
919  |  |  | 
920  |  | char **CSVScanLinesL(VSILFILE *fp, int iKeyField, const char *pszValue,  | 
921  |  |                      CSVCompareCriteria eCriteria)  | 
922  |  |  | 
923  | 0  | { | 
924  | 0  |     CPLAssert(pszValue != nullptr);  | 
925  | 0  |     CPLAssert(iKeyField >= 0);  | 
926  | 0  |     CPLAssert(fp != nullptr);  | 
927  |  |  | 
928  | 0  |     bool bSelected = false;  | 
929  | 0  |     const int nTestValue = atoi(pszValue);  | 
930  | 0  |     char **papszFields = nullptr;  | 
931  |  | 
  | 
932  | 0  |     while (!bSelected)  | 
933  | 0  |     { | 
934  | 0  |         papszFields = CSVReadParseLineL(fp);  | 
935  | 0  |         if (papszFields == nullptr)  | 
936  | 0  |             return nullptr;  | 
937  |  |  | 
938  | 0  |         if (CSLCount(papszFields) < iKeyField + 1)  | 
939  | 0  |         { | 
940  |  |             /* not selected */  | 
941  | 0  |         }  | 
942  | 0  |         else if (eCriteria == CC_Integer &&  | 
943  | 0  |                  atoi(papszFields[iKeyField]) == nTestValue)  | 
944  | 0  |         { | 
945  | 0  |             bSelected = true;  | 
946  | 0  |         }  | 
947  | 0  |         else  | 
948  | 0  |         { | 
949  | 0  |             bSelected = CSVCompare(papszFields[iKeyField], pszValue, eCriteria);  | 
950  | 0  |         }  | 
951  |  | 
  | 
952  | 0  |         if (!bSelected)  | 
953  | 0  |         { | 
954  | 0  |             CSLDestroy(papszFields);  | 
955  | 0  |             papszFields = nullptr;  | 
956  | 0  |         }  | 
957  | 0  |     }  | 
958  |  |  | 
959  | 0  |     return papszFields;  | 
960  | 0  | }  | 
961  |  |  | 
962  |  | /************************************************************************/  | 
963  |  | /*                        CSVScanLinesIndexed()                         */  | 
964  |  | /*                                                                      */  | 
965  |  | /*      Read the file scanline for lines where the key field equals     */  | 
966  |  | /*      the indicated value with the suggested comparison criteria.     */  | 
967  |  | /*      Return the first matching line split into fields.               */  | 
968  |  | /************************************************************************/  | 
969  |  |  | 
970  |  | static char **CSVScanLinesIndexed(CSVTable *psTable, int nKeyValue)  | 
971  |  |  | 
972  | 0  | { | 
973  | 0  |     CPLAssert(psTable->panLineIndex != nullptr);  | 
974  |  |  | 
975  |  |     /* -------------------------------------------------------------------- */  | 
976  |  |     /*      Find target record with binary search.                          */  | 
977  |  |     /* -------------------------------------------------------------------- */  | 
978  | 0  |     int iTop = psTable->nLineCount - 1;  | 
979  | 0  |     int iBottom = 0;  | 
980  | 0  |     int iResult = -1;  | 
981  |  | 
  | 
982  | 0  |     while (iTop >= iBottom)  | 
983  | 0  |     { | 
984  | 0  |         const int iMiddle = (iTop + iBottom) / 2;  | 
985  | 0  |         if (psTable->panLineIndex[iMiddle] > nKeyValue)  | 
986  | 0  |             iTop = iMiddle - 1;  | 
987  | 0  |         else if (psTable->panLineIndex[iMiddle] < nKeyValue)  | 
988  | 0  |             iBottom = iMiddle + 1;  | 
989  | 0  |         else  | 
990  | 0  |         { | 
991  | 0  |             iResult = iMiddle;  | 
992  |  |             // if a key is not unique, select the first instance of it.  | 
993  | 0  |             while (iResult > 0 &&  | 
994  | 0  |                    psTable->panLineIndex[iResult - 1] == nKeyValue)  | 
995  | 0  |             { | 
996  | 0  |                 psTable->bNonUniqueKey = true;  | 
997  | 0  |                 iResult--;  | 
998  | 0  |             }  | 
999  | 0  |             break;  | 
1000  | 0  |         }  | 
1001  | 0  |     }  | 
1002  |  | 
  | 
1003  | 0  |     if (iResult == -1)  | 
1004  | 0  |         return nullptr;  | 
1005  |  |  | 
1006  |  |     /* -------------------------------------------------------------------- */  | 
1007  |  |     /*      Parse target line, and update iLastLine indicator.              */  | 
1008  |  |     /* -------------------------------------------------------------------- */  | 
1009  | 0  |     psTable->iLastLine = iResult;  | 
1010  |  | 
  | 
1011  | 0  |     return CSVSplitLine(psTable->papszLines[iResult], ",", false, false);  | 
1012  | 0  | }  | 
1013  |  |  | 
1014  |  | /************************************************************************/  | 
1015  |  | /*                        CSVScanLinesIngested()                        */  | 
1016  |  | /*                                                                      */  | 
1017  |  | /*      Read the file scanline for lines where the key field equals     */  | 
1018  |  | /*      the indicated value with the suggested comparison criteria.     */  | 
1019  |  | /*      Return the first matching line split into fields.               */  | 
1020  |  | /************************************************************************/  | 
1021  |  |  | 
1022  |  | static char **CSVScanLinesIngested(CSVTable *psTable, int iKeyField,  | 
1023  |  |                                    const char *pszValue,  | 
1024  |  |                                    CSVCompareCriteria eCriteria)  | 
1025  |  |  | 
1026  | 0  | { | 
1027  | 0  |     CPLAssert(pszValue != nullptr);  | 
1028  | 0  |     CPLAssert(iKeyField >= 0);  | 
1029  |  |  | 
1030  | 0  |     const int nTestValue = atoi(pszValue);  | 
1031  |  |  | 
1032  |  |     /* -------------------------------------------------------------------- */  | 
1033  |  |     /*      Short cut for indexed files.                                    */  | 
1034  |  |     /* -------------------------------------------------------------------- */  | 
1035  | 0  |     if (iKeyField == 0 && eCriteria == CC_Integer &&  | 
1036  | 0  |         psTable->panLineIndex != nullptr)  | 
1037  | 0  |         return CSVScanLinesIndexed(psTable, nTestValue);  | 
1038  |  |  | 
1039  |  |     /* -------------------------------------------------------------------- */  | 
1040  |  |     /*      Scan from in-core lines.                                        */  | 
1041  |  |     /* -------------------------------------------------------------------- */  | 
1042  | 0  |     char **papszFields = nullptr;  | 
1043  | 0  |     bool bSelected = false;  | 
1044  |  | 
  | 
1045  | 0  |     while (!bSelected && psTable->iLastLine + 1 < psTable->nLineCount)  | 
1046  | 0  |     { | 
1047  | 0  |         psTable->iLastLine++;  | 
1048  | 0  |         papszFields = CSVSplitLine(psTable->papszLines[psTable->iLastLine], ",",  | 
1049  | 0  |                                    false, false);  | 
1050  |  | 
  | 
1051  | 0  |         if (CSLCount(papszFields) < iKeyField + 1)  | 
1052  | 0  |         { | 
1053  |  |             /* not selected */  | 
1054  | 0  |         }  | 
1055  | 0  |         else if (eCriteria == CC_Integer &&  | 
1056  | 0  |                  atoi(papszFields[iKeyField]) == nTestValue)  | 
1057  | 0  |         { | 
1058  | 0  |             bSelected = true;  | 
1059  | 0  |         }  | 
1060  | 0  |         else  | 
1061  | 0  |         { | 
1062  | 0  |             bSelected = CSVCompare(papszFields[iKeyField], pszValue, eCriteria);  | 
1063  | 0  |         }  | 
1064  |  | 
  | 
1065  | 0  |         if (!bSelected)  | 
1066  | 0  |         { | 
1067  | 0  |             CSLDestroy(papszFields);  | 
1068  | 0  |             papszFields = nullptr;  | 
1069  | 0  |         }  | 
1070  | 0  |     }  | 
1071  |  | 
  | 
1072  | 0  |     return papszFields;  | 
1073  | 0  | }  | 
1074  |  |  | 
1075  |  | /************************************************************************/  | 
1076  |  | /*                            CSVRewind()                               */  | 
1077  |  | /*                                                                      */  | 
1078  |  | /*      Rewind a CSV file based on a passed in filename.                */  | 
1079  |  | /*      This is aimed at being used with CSVGetNextLine().              */  | 
1080  |  | /************************************************************************/  | 
1081  |  |  | 
1082  |  | void CSVRewind(const char *pszFilename)  | 
1083  |  |  | 
1084  | 0  | { | 
1085  |  |     /* -------------------------------------------------------------------- */  | 
1086  |  |     /*      Get access to the table.                                        */  | 
1087  |  |     /* -------------------------------------------------------------------- */  | 
1088  | 0  |     CPLAssert(pszFilename != nullptr);  | 
1089  |  |  | 
1090  | 0  |     CSVTable *const psTable = CSVAccess(pszFilename);  | 
1091  | 0  |     if (psTable != nullptr)  | 
1092  | 0  |         psTable->iLastLine = -1;  | 
1093  | 0  | }  | 
1094  |  |  | 
1095  |  | /************************************************************************/  | 
1096  |  | /*                           CSVGetNextLine()                           */  | 
1097  |  | /*                                                                      */  | 
1098  |  | /*      Fetch the next line of a CSV file based on a passed in          */  | 
1099  |  | /*      filename.  Returns NULL at end of file, or if file is not       */  | 
1100  |  | /*      really established.                                             */  | 
1101  |  | /*      This ingests the whole file into memory if not already done.    */  | 
1102  |  | /*      When reaching end of file, CSVRewind() may be used to read      */  | 
1103  |  | /*      again from the beginning.                                       */  | 
1104  |  | /************************************************************************/  | 
1105  |  |  | 
1106  |  | char **CSVGetNextLine(const char *pszFilename)  | 
1107  |  |  | 
1108  | 0  | { | 
1109  |  |  | 
1110  |  |     /* -------------------------------------------------------------------- */  | 
1111  |  |     /*      Get access to the table.                                        */  | 
1112  |  |     /* -------------------------------------------------------------------- */  | 
1113  | 0  |     CPLAssert(pszFilename != nullptr);  | 
1114  |  |  | 
1115  | 0  |     CSVTable *const psTable = CSVAccess(pszFilename);  | 
1116  | 0  |     if (psTable == nullptr)  | 
1117  | 0  |         return nullptr;  | 
1118  |  |  | 
1119  | 0  |     CSVIngest(psTable->pszFilename);  | 
1120  |  |  | 
1121  |  |     /* -------------------------------------------------------------------- */  | 
1122  |  |     /*      If we use CSVGetNextLine() we can pretty much assume we have    */  | 
1123  |  |     /*      a non-unique key.                                               */  | 
1124  |  |     /* -------------------------------------------------------------------- */  | 
1125  | 0  |     psTable->bNonUniqueKey = true;  | 
1126  |  |  | 
1127  |  |     /* -------------------------------------------------------------------- */  | 
1128  |  |     /*      Do we have a next line available?  This only works for          */  | 
1129  |  |     /*      ingested tables I believe.                                      */  | 
1130  |  |     /* -------------------------------------------------------------------- */  | 
1131  | 0  |     if (psTable->iLastLine + 1 >= psTable->nLineCount)  | 
1132  | 0  |         return nullptr;  | 
1133  |  |  | 
1134  | 0  |     psTable->iLastLine++;  | 
1135  | 0  |     CSLDestroy(psTable->papszRecFields);  | 
1136  | 0  |     psTable->papszRecFields = CSVSplitLine(  | 
1137  | 0  |         psTable->papszLines[psTable->iLastLine], ",", false, false);  | 
1138  |  | 
  | 
1139  | 0  |     return psTable->papszRecFields;  | 
1140  | 0  | }  | 
1141  |  |  | 
1142  |  | /************************************************************************/  | 
1143  |  | /*                            CSVScanFile()                             */  | 
1144  |  | /*                                                                      */  | 
1145  |  | /*      Scan a whole file using criteria similar to above, but also     */  | 
1146  |  | /*      taking care of file opening and closing.                        */  | 
1147  |  | /************************************************************************/  | 
1148  |  |  | 
1149  |  | static char **CSVScanFile(CSVTable *const psTable, int iKeyField,  | 
1150  |  |                           const char *pszValue, CSVCompareCriteria eCriteria)  | 
1151  | 0  | { | 
1152  | 0  |     CSVIngest(psTable->pszFilename);  | 
1153  |  |  | 
1154  |  |     /* -------------------------------------------------------------------- */  | 
1155  |  |     /*      Does the current record match the criteria?  If so, just        */  | 
1156  |  |     /*      return it again.                                                */  | 
1157  |  |     /* -------------------------------------------------------------------- */  | 
1158  | 0  |     if (iKeyField >= 0 && iKeyField < CSLCount(psTable->papszRecFields) &&  | 
1159  | 0  |         CSVCompare(psTable->papszRecFields[iKeyField], pszValue, eCriteria) &&  | 
1160  | 0  |         !psTable->bNonUniqueKey)  | 
1161  | 0  |     { | 
1162  | 0  |         return psTable->papszRecFields;  | 
1163  | 0  |     }  | 
1164  |  |  | 
1165  |  |     /* -------------------------------------------------------------------- */  | 
1166  |  |     /*      Scan the file from the beginning, replacing the ``current       */  | 
1167  |  |     /*      record'' in our structure with the one that is found.           */  | 
1168  |  |     /* -------------------------------------------------------------------- */  | 
1169  | 0  |     psTable->iLastLine = -1;  | 
1170  | 0  |     CSLDestroy(psTable->papszRecFields);  | 
1171  |  | 
  | 
1172  | 0  |     if (psTable->pszRawData != nullptr)  | 
1173  | 0  |         psTable->papszRecFields =  | 
1174  | 0  |             CSVScanLinesIngested(psTable, iKeyField, pszValue, eCriteria);  | 
1175  | 0  |     else  | 
1176  | 0  |     { | 
1177  | 0  |         VSIRewindL(psTable->fp);  | 
1178  | 0  |         CPLReadLineL(psTable->fp); /* throw away the header line */  | 
1179  |  | 
  | 
1180  | 0  |         psTable->papszRecFields =  | 
1181  | 0  |             CSVScanLinesL(psTable->fp, iKeyField, pszValue, eCriteria);  | 
1182  | 0  |     }  | 
1183  |  | 
  | 
1184  | 0  |     return psTable->papszRecFields;  | 
1185  | 0  | }  | 
1186  |  |  | 
1187  |  | char **CSVScanFile(const char *pszFilename, int iKeyField, const char *pszValue,  | 
1188  |  |                    CSVCompareCriteria eCriteria)  | 
1189  |  |  | 
1190  | 0  | { | 
1191  |  |     /* -------------------------------------------------------------------- */  | 
1192  |  |     /*      Get access to the table.                                        */  | 
1193  |  |     /* -------------------------------------------------------------------- */  | 
1194  | 0  |     CPLAssert(pszFilename != nullptr);  | 
1195  |  |  | 
1196  | 0  |     if (iKeyField < 0)  | 
1197  | 0  |         return nullptr;  | 
1198  |  |  | 
1199  | 0  |     CSVTable *const psTable = CSVAccess(pszFilename);  | 
1200  | 0  |     if (psTable == nullptr)  | 
1201  | 0  |         return nullptr;  | 
1202  |  |  | 
1203  | 0  |     return CSVScanFile(psTable, iKeyField, pszValue, eCriteria);  | 
1204  | 0  | }  | 
1205  |  |  | 
1206  |  | /************************************************************************/  | 
1207  |  | /*                           CPLGetFieldId()                            */  | 
1208  |  | /*                                                                      */  | 
1209  |  | /*      Read the first record of a CSV file (rewinding to be sure),     */  | 
1210  |  | /*      and find the field with the indicated name.  Returns -1 if      */  | 
1211  |  | /*      it fails to find the field name.  Comparison is case            */  | 
1212  |  | /*      insensitive, but otherwise exact.  After this function has      */  | 
1213  |  | /*      been called the file pointer will be positioned just after      */  | 
1214  |  | /*      the first record.                                               */  | 
1215  |  | /*                                                                      */  | 
1216  |  | /*      Deprecated.  Replaced by CPLGetFieldIdL().                      */  | 
1217  |  | /************************************************************************/  | 
1218  |  |  | 
1219  |  | int CSVGetFieldId(FILE *fp, const char *pszFieldName)  | 
1220  |  |  | 
1221  | 0  | { | 
1222  | 0  |     CPLAssert(fp != nullptr && pszFieldName != nullptr);  | 
1223  |  |  | 
1224  | 0  |     VSIRewind(fp);  | 
1225  |  | 
  | 
1226  | 0  |     char **papszFields = CSVReadParseLine(fp);  | 
1227  | 0  |     for (int i = 0; papszFields != nullptr && papszFields[i] != nullptr; i++)  | 
1228  | 0  |     { | 
1229  | 0  |         if (EQUAL(papszFields[i], pszFieldName))  | 
1230  | 0  |         { | 
1231  | 0  |             CSLDestroy(papszFields);  | 
1232  | 0  |             return i;  | 
1233  | 0  |         }  | 
1234  | 0  |     }  | 
1235  |  |  | 
1236  | 0  |     CSLDestroy(papszFields);  | 
1237  |  | 
  | 
1238  | 0  |     return -1;  | 
1239  | 0  | }  | 
1240  |  |  | 
1241  |  | /************************************************************************/  | 
1242  |  | /*                           CPLGetFieldIdL()                           */  | 
1243  |  | /*                                                                      */  | 
1244  |  | /*      Read the first record of a CSV file (rewinding to be sure),     */  | 
1245  |  | /*      and find the field with the indicated name.  Returns -1 if      */  | 
1246  |  | /*      it fails to find the field name.  Comparison is case            */  | 
1247  |  | /*      insensitive, but otherwise exact.  After this function has      */  | 
1248  |  | /*      been called the file pointer will be positioned just after      */  | 
1249  |  | /*      the first record.                                               */  | 
1250  |  | /************************************************************************/  | 
1251  |  |  | 
1252  |  | int CSVGetFieldIdL(VSILFILE *fp, const char *pszFieldName)  | 
1253  |  |  | 
1254  | 0  | { | 
1255  | 0  |     CPLAssert(fp != nullptr && pszFieldName != nullptr);  | 
1256  |  |  | 
1257  | 0  |     VSIRewindL(fp);  | 
1258  |  | 
  | 
1259  | 0  |     char **papszFields = CSVReadParseLineL(fp);  | 
1260  | 0  |     for (int i = 0; papszFields != nullptr && papszFields[i] != nullptr; i++)  | 
1261  | 0  |     { | 
1262  | 0  |         if (EQUAL(papszFields[i], pszFieldName))  | 
1263  | 0  |         { | 
1264  | 0  |             CSLDestroy(papszFields);  | 
1265  | 0  |             return i;  | 
1266  | 0  |         }  | 
1267  | 0  |     }  | 
1268  |  |  | 
1269  | 0  |     CSLDestroy(papszFields);  | 
1270  |  | 
  | 
1271  | 0  |     return -1;  | 
1272  | 0  | }  | 
1273  |  |  | 
1274  |  | /************************************************************************/  | 
1275  |  | /*                         CSVGetFileFieldId()                          */  | 
1276  |  | /*                                                                      */  | 
1277  |  | /*      Same as CPLGetFieldId(), except that we get the file based      */  | 
1278  |  | /*      on filename, rather than having an existing handle.             */  | 
1279  |  | /************************************************************************/  | 
1280  |  |  | 
1281  |  | static int CSVGetFileFieldId(CSVTable *const psTable, const char *pszFieldName)  | 
1282  |  |  | 
1283  | 0  | { | 
1284  |  |     /* -------------------------------------------------------------------- */  | 
1285  |  |     /*      Find the requested field.                                       */  | 
1286  |  |     /* -------------------------------------------------------------------- */  | 
1287  | 0  |     const int nFieldNameLength = static_cast<int>(strlen(pszFieldName));  | 
1288  | 0  |     for (int i = 0; psTable->papszFieldNames != nullptr &&  | 
1289  | 0  |                     psTable->papszFieldNames[i] != nullptr;  | 
1290  | 0  |          i++)  | 
1291  | 0  |     { | 
1292  | 0  |         if (psTable->panFieldNamesLength[i] == nFieldNameLength &&  | 
1293  | 0  |             EQUALN(psTable->papszFieldNames[i], pszFieldName, nFieldNameLength))  | 
1294  | 0  |         { | 
1295  | 0  |             return i;  | 
1296  | 0  |         }  | 
1297  | 0  |     }  | 
1298  |  |  | 
1299  | 0  |     return -1;  | 
1300  | 0  | }  | 
1301  |  |  | 
1302  |  | int CSVGetFileFieldId(const char *pszFilename, const char *pszFieldName)  | 
1303  |  |  | 
1304  | 0  | { | 
1305  |  |     /* -------------------------------------------------------------------- */  | 
1306  |  |     /*      Get access to the table.                                        */  | 
1307  |  |     /* -------------------------------------------------------------------- */  | 
1308  | 0  |     CPLAssert(pszFilename != nullptr);  | 
1309  |  |  | 
1310  | 0  |     CSVTable *const psTable = CSVAccess(pszFilename);  | 
1311  | 0  |     if (psTable == nullptr)  | 
1312  | 0  |         return -1;  | 
1313  | 0  |     return CSVGetFileFieldId(psTable, pszFieldName);  | 
1314  | 0  | }  | 
1315  |  |  | 
1316  |  | /************************************************************************/  | 
1317  |  | /*                         CSVScanFileByName()                          */  | 
1318  |  | /*                                                                      */  | 
1319  |  | /*      Same as CSVScanFile(), but using a field name instead of a      */  | 
1320  |  | /*      field number.                                                   */  | 
1321  |  | /************************************************************************/  | 
1322  |  |  | 
1323  |  | char **CSVScanFileByName(const char *pszFilename, const char *pszKeyFieldName,  | 
1324  |  |                          const char *pszValue, CSVCompareCriteria eCriteria)  | 
1325  |  |  | 
1326  | 0  | { | 
1327  | 0  |     const int iKeyField = CSVGetFileFieldId(pszFilename, pszKeyFieldName);  | 
1328  | 0  |     if (iKeyField == -1)  | 
1329  | 0  |         return nullptr;  | 
1330  |  |  | 
1331  | 0  |     return CSVScanFile(pszFilename, iKeyField, pszValue, eCriteria);  | 
1332  | 0  | }  | 
1333  |  |  | 
1334  |  | /************************************************************************/  | 
1335  |  | /*                            CSVGetField()                             */  | 
1336  |  | /*                                                                      */  | 
1337  |  | /*      The all-in-one function to fetch a particular field value       */  | 
1338  |  | /*      from a CSV file.  Note this function will return an empty       */  | 
1339  |  | /*      string, rather than NULL if it fails to find the desired        */  | 
1340  |  | /*      value for some reason.  The caller can't establish that the     */  | 
1341  |  | /*      fetch failed.                                                   */  | 
1342  |  | /************************************************************************/  | 
1343  |  |  | 
1344  |  | const char *CSVGetField(const char *pszFilename, const char *pszKeyFieldName,  | 
1345  |  |                         const char *pszKeyFieldValue,  | 
1346  |  |                         CSVCompareCriteria eCriteria,  | 
1347  |  |                         const char *pszTargetField)  | 
1348  |  |  | 
1349  | 0  | { | 
1350  |  |     /* -------------------------------------------------------------------- */  | 
1351  |  |     /*      Find the table.                                                 */  | 
1352  |  |     /* -------------------------------------------------------------------- */  | 
1353  | 0  |     CSVTable *const psTable = CSVAccess(pszFilename);  | 
1354  | 0  |     if (psTable == nullptr)  | 
1355  | 0  |         return "";  | 
1356  |  |  | 
1357  | 0  |     const int iKeyField = CSVGetFileFieldId(psTable, pszKeyFieldName);  | 
1358  | 0  |     if (iKeyField == -1)  | 
1359  | 0  |         return "";  | 
1360  |  |  | 
1361  |  |     /* -------------------------------------------------------------------- */  | 
1362  |  |     /*      Find the correct record.                                        */  | 
1363  |  |     /* -------------------------------------------------------------------- */  | 
1364  | 0  |     char **papszRecord =  | 
1365  | 0  |         CSVScanFile(psTable, iKeyField, pszKeyFieldValue, eCriteria);  | 
1366  | 0  |     if (papszRecord == nullptr)  | 
1367  | 0  |         return "";  | 
1368  |  |  | 
1369  |  |     /* -------------------------------------------------------------------- */  | 
1370  |  |     /*      Figure out which field we want out of this.                     */  | 
1371  |  |     /* -------------------------------------------------------------------- */  | 
1372  | 0  |     const int iTargetField = CSVGetFileFieldId(psTable, pszTargetField);  | 
1373  | 0  |     if (iTargetField < 0)  | 
1374  | 0  |         return "";  | 
1375  |  |  | 
1376  | 0  |     for (int i = 0; papszRecord[i] != nullptr; ++i)  | 
1377  | 0  |     { | 
1378  | 0  |         if (i == iTargetField)  | 
1379  | 0  |             return papszRecord[iTargetField];  | 
1380  | 0  |     }  | 
1381  | 0  |     return "";  | 
1382  | 0  | }  | 
1383  |  |  | 
1384  |  | /************************************************************************/  | 
1385  |  | /*                       GDALDefaultCSVFilename()                       */  | 
1386  |  | /************************************************************************/  | 
1387  |  |  | 
1388  |  | typedef struct  | 
1389  |  | { | 
1390  |  |     char szPath[512];  | 
1391  |  |     bool bCSVFinderInitialized;  | 
1392  |  | } DefaultCSVFileNameTLS;  | 
1393  |  |  | 
1394  |  | const char *GDALDefaultCSVFilename(const char *pszBasename)  | 
1395  |  |  | 
1396  | 0  | { | 
1397  |  |     /* -------------------------------------------------------------------- */  | 
1398  |  |     /*      Do we already have this file accessed?  If so, just return      */  | 
1399  |  |     /*      the existing path without any further probing.                  */  | 
1400  |  |     /* -------------------------------------------------------------------- */  | 
1401  | 0  |     int bMemoryError = FALSE;  | 
1402  | 0  |     CSVTable **ppsCSVTableList =  | 
1403  | 0  |         static_cast<CSVTable **>(CPLGetTLSEx(CTLS_CSVTABLEPTR, &bMemoryError));  | 
1404  | 0  |     if (ppsCSVTableList != nullptr)  | 
1405  | 0  |     { | 
1406  | 0  |         const size_t nBasenameLen = strlen(pszBasename);  | 
1407  |  | 
  | 
1408  | 0  |         for (const CSVTable *psTable = *ppsCSVTableList; psTable != nullptr;  | 
1409  | 0  |              psTable = psTable->psNext)  | 
1410  | 0  |         { | 
1411  | 0  |             const size_t nFullLen = strlen(psTable->pszFilename);  | 
1412  |  | 
  | 
1413  | 0  |             if (nFullLen > nBasenameLen &&  | 
1414  | 0  |                 strcmp(psTable->pszFilename + nFullLen - nBasenameLen,  | 
1415  | 0  |                        pszBasename) == 0 &&  | 
1416  | 0  |                 strchr("/\\", | 
1417  | 0  |                        psTable->pszFilename[+nFullLen - nBasenameLen - 1]) !=  | 
1418  | 0  |                     nullptr)  | 
1419  | 0  |             { | 
1420  | 0  |                 return psTable->pszFilename;  | 
1421  | 0  |             }  | 
1422  | 0  |         }  | 
1423  | 0  |     }  | 
1424  |  |  | 
1425  |  |     /* -------------------------------------------------------------------- */  | 
1426  |  |     /*      Otherwise we need to look harder for it.                        */  | 
1427  |  |     /* -------------------------------------------------------------------- */  | 
1428  | 0  |     DefaultCSVFileNameTLS *pTLSData = static_cast<DefaultCSVFileNameTLS *>(  | 
1429  | 0  |         CPLGetTLSEx(CTLS_CSVDEFAULTFILENAME, &bMemoryError));  | 
1430  | 0  |     if (pTLSData == nullptr && !bMemoryError)  | 
1431  | 0  |     { | 
1432  | 0  |         pTLSData = static_cast<DefaultCSVFileNameTLS *>(  | 
1433  | 0  |             VSI_CALLOC_VERBOSE(1, sizeof(DefaultCSVFileNameTLS)));  | 
1434  | 0  |         if (pTLSData)  | 
1435  | 0  |             CPLSetTLS(CTLS_CSVDEFAULTFILENAME, pTLSData, TRUE);  | 
1436  | 0  |     }  | 
1437  | 0  |     if (pTLSData == nullptr)  | 
1438  | 0  |         return "/not_existing_dir/not_existing_path";  | 
1439  |  |  | 
1440  | 0  |     const char *pszResult = CPLFindFile("gdal", pszBasename); | 
1441  |  | 
  | 
1442  | 0  |     if (pszResult != nullptr)  | 
1443  | 0  |         return pszResult;  | 
1444  |  |  | 
1445  | 0  |     if (!pTLSData->bCSVFinderInitialized)  | 
1446  | 0  |     { | 
1447  | 0  |         pTLSData->bCSVFinderInitialized = true;  | 
1448  |  | 
  | 
1449  | 0  |         if (CPLGetConfigOption("GDAL_DATA", nullptr) != nullptr) | 
1450  | 0  |             CPLPushFinderLocation(CPLGetConfigOption("GDAL_DATA", nullptr)); | 
1451  |  | 
  | 
1452  | 0  |         pszResult = CPLFindFile("gdal", pszBasename); | 
1453  |  | 
  | 
1454  | 0  |         if (pszResult != nullptr)  | 
1455  | 0  |             return pszResult;  | 
1456  | 0  |     }  | 
1457  |  |  | 
1458  |  |     // For systems like sandboxes that do not allow other checks.  | 
1459  | 0  |     CPLDebug("CPL_CSV", | 
1460  | 0  |              "Failed to find file in GDALDefaultCSVFilename.  "  | 
1461  | 0  |              "Returning original basename: %s",  | 
1462  | 0  |              pszBasename);  | 
1463  | 0  |     CPLStrlcpy(pTLSData->szPath, pszBasename, sizeof(pTLSData->szPath));  | 
1464  | 0  |     return pTLSData->szPath;  | 
1465  | 0  | }  | 
1466  |  |  | 
1467  |  | /************************************************************************/  | 
1468  |  | /*                            CSVFilename()                             */  | 
1469  |  | /*                                                                      */  | 
1470  |  | /*      Return the full path to a particular CSV file.  This will       */  | 
1471  |  | /*      eventually be something the application can override.           */  | 
1472  |  | /************************************************************************/  | 
1473  |  |  | 
1474  |  | CPL_C_START  | 
1475  |  | static const char *(*pfnCSVFilenameHook)(const char *) = nullptr;  | 
1476  |  | CPL_C_END  | 
1477  |  |  | 
1478  |  | const char *CSVFilename(const char *pszBasename)  | 
1479  |  |  | 
1480  | 0  | { | 
1481  | 0  |     if (pfnCSVFilenameHook == nullptr)  | 
1482  | 0  |         return GDALDefaultCSVFilename(pszBasename);  | 
1483  |  |  | 
1484  | 0  |     return pfnCSVFilenameHook(pszBasename);  | 
1485  | 0  | }  | 
1486  |  |  | 
1487  |  | /************************************************************************/  | 
1488  |  | /*                         SetCSVFilenameHook()                         */  | 
1489  |  | /*                                                                      */  | 
1490  |  | /*      Applications can use this to set a function that will           */  | 
1491  |  | /*      massage CSV filenames.                                          */  | 
1492  |  | /************************************************************************/  | 
1493  |  |  | 
1494  |  | /**  | 
1495  |  |  * Override CSV file search method.  | 
1496  |  |  *  | 
1497  |  |  * @param pfnNewHook The pointer to a function which will return the  | 
1498  |  |  * full path for a given filename.  | 
1499  |  |  *  | 
1500  |  |  | 
1501  |  | This function allows an application to override how the GTIFGetDefn()  | 
1502  |  | and related function find the CSV (Comma Separated Value) values  | 
1503  |  | required. The pfnHook argument should be a pointer to a function that  | 
1504  |  | will take in a CSV filename and return a full path to the file. The  | 
1505  |  | returned string should be to an internal static buffer so that the  | 
1506  |  | caller doesn't have to free the result.  | 
1507  |  |  | 
1508  |  | Example:  | 
1509  |  |  | 
1510  |  | The listgeo utility uses the following override function if the user  | 
1511  |  | specified a CSV file directory with the -t commandline switch (argument  | 
1512  |  | put into CSVDirName).  | 
1513  |  |  | 
1514  |  | \code{.cpp} | 
1515  |  |  | 
1516  |  |     ...  | 
1517  |  |     SetCSVFilenameHook( CSVFileOverride );  | 
1518  |  |     ...  | 
1519  |  |  | 
1520  |  | static const char *CSVFileOverride( const char * pszInput )  | 
1521  |  |  | 
1522  |  | { | 
1523  |  |     static char szPath[1024] = {}; | 
1524  |  |  | 
1525  |  |     sprintf( szPath, "%s/%s", CSVDirName, pszInput );  | 
1526  |  |  | 
1527  |  |     return szPath;  | 
1528  |  | }  | 
1529  |  | \endcode  | 
1530  |  |  | 
1531  |  | */  | 
1532  |  |  | 
1533  |  | CPL_C_START  | 
1534  |  | void SetCSVFilenameHook(const char *(*pfnNewHook)(const char *))  | 
1535  |  |  | 
1536  | 0  | { | 
1537  | 0  |     pfnCSVFilenameHook = pfnNewHook;  | 
1538  | 0  | }  | 
1539  |  |  | 
1540  |  | CPL_C_END  |