/src/MapServer/src/mapstring.cpp
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * $Id$ |
3 | | * |
4 | | * Project: MapServer |
5 | | * Purpose: Various string handling functions. |
6 | | * Author: Steve Lime and the MapServer team. |
7 | | * |
8 | | * Notes: A couple of string handling functions (strrstr, strlcat) were taken |
9 | | *from other sources. Copyright notices accompany those functions below. |
10 | | * |
11 | | ****************************************************************************** |
12 | | * Copyright (c) 1996-2005 Regents of the University of Minnesota. |
13 | | * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com> |
14 | | * |
15 | | * Permission is hereby granted, free of charge, to any person obtaining a |
16 | | * copy of this software and associated documentation files (the "Software"), |
17 | | * to deal in the Software without restriction, including without limitation |
18 | | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
19 | | * and/or sell copies of the Software, and to permit persons to whom the |
20 | | * Software is furnished to do so, subject to the following conditions: |
21 | | * |
22 | | * The above copyright notice and this permission notice shall be included in |
23 | | * all copies of this Software or works derived from this Software. |
24 | | * |
25 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
26 | | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
27 | | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
28 | | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
29 | | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
30 | | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
31 | | * DEALINGS IN THE SOFTWARE. |
32 | | ****************************************************************************/ |
33 | | |
34 | | #include "mapserver.h" |
35 | | #include "mapthread.h" |
36 | | |
37 | | #include "cpl_vsi.h" |
38 | | |
39 | | #include <ctype.h> |
40 | | #include <float.h> |
41 | | #include <string.h> |
42 | | #include <errno.h> |
43 | | |
44 | | #include <algorithm> |
45 | | #include <cctype> |
46 | | |
47 | | /* |
48 | | * Find the first occurrence of find in s, ignore case. |
49 | | */ |
50 | | |
51 | | #ifdef USE_FRIBIDI |
52 | | #if (defined(_WIN32) && !defined(__CYGWIN__)) || defined(HAVE_FRIBIDI2) |
53 | | #include "fribidi.h" |
54 | | #else |
55 | | #include <fribidi/fribidi.h> |
56 | | #endif |
57 | | #define MAX_STR_LEN 65000 |
58 | | #endif |
59 | | |
60 | | #ifdef USE_ICONV |
61 | | #include "mapiconv.h" |
62 | | #include <iconv.h> |
63 | | #include <wchar.h> |
64 | | #endif |
65 | | |
66 | | #include "mapentities.h" |
67 | | |
68 | | #ifndef HAVE_STRRSTR |
69 | | /* |
70 | | ** Copyright (c) 2000-2004 University of Illinois Board of Trustees |
71 | | ** Copyright (c) 2000-2005 Mark D. Roth |
72 | | ** All rights reserved. |
73 | | ** |
74 | | ** Developed by: Campus Information Technologies and Educational Services, |
75 | | ** University of Illinois at Urbana-Champaign |
76 | | ** |
77 | | ** Permission is hereby granted, free of charge, to any person obtaining |
78 | | ** a copy of this software and associated documentation files (the |
79 | | ** ``Software''), to deal with the Software without restriction, including |
80 | | ** without limitation the rights to use, copy, modify, merge, publish, |
81 | | ** distribute, sublicense, and/or sell copies of the Software, and to |
82 | | ** permit persons to whom the Software is furnished to do so, subject to |
83 | | ** the following conditions: |
84 | | ** |
85 | | ** * Redistributions of source code must retain the above copyright |
86 | | ** notice, this list of conditions and the following disclaimers. |
87 | | ** |
88 | | ** * Redistributions in binary form must reproduce the above copyright |
89 | | ** notice, this list of conditions and the following disclaimers in the |
90 | | ** documentation and/or other materials provided with the distribution. |
91 | | ** |
92 | | ** * Neither the names of Campus Information Technologies and Educational |
93 | | ** Services, University of Illinois at Urbana-Champaign, nor the names |
94 | | ** of its contributors may be used to endorse or promote products derived |
95 | | ** from this Software without specific prior written permission. |
96 | | ** |
97 | | ** THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, |
98 | | ** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
99 | | ** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
100 | | ** IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR |
101 | | ** ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
102 | | ** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE |
103 | | ** OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. |
104 | | */ |
105 | 0 | char *strrstr(const char *string, const char *find) { |
106 | 0 | size_t stringlen, findlen; |
107 | 0 | const char *cp; |
108 | |
|
109 | 0 | findlen = strlen(find); |
110 | 0 | stringlen = strlen(string); |
111 | 0 | if (findlen > stringlen) |
112 | 0 | return NULL; |
113 | | |
114 | 0 | for (cp = string + stringlen - findlen; cp >= string; cp--) |
115 | 0 | if (strncmp(cp, find, findlen) == 0) |
116 | 0 | return (char *)cp; |
117 | | |
118 | 0 | return NULL; |
119 | 0 | } |
120 | | #endif |
121 | | |
122 | | #ifndef HAVE_STRLCAT |
123 | | /* |
124 | | * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com> |
125 | | * |
126 | | * Permission to use, copy, modify, and distribute this software for any |
127 | | * purpose with or without fee is hereby granted, provided that the above |
128 | | * copyright notice and this permission notice appear in all copies. |
129 | | * |
130 | | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
131 | | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
132 | | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
133 | | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
134 | | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
135 | | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
136 | | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
137 | | */ |
138 | | |
139 | | /* |
140 | | * Appends src to string dst of size siz (unlike strncat, siz is the |
141 | | * full size of dst, not space left). At most siz-1 characters |
142 | | * will be copied. Always NUL terminates (unless siz <= strlen(dst)). |
143 | | * Returns strlen(src) + MS_MIN(siz, strlen(initial dst)). |
144 | | * If retval >= siz, truncation occurred. |
145 | | */ |
146 | 881 | size_t strlcat(char *dst, const char *src, size_t siz) { |
147 | 881 | char *d = dst; |
148 | 881 | const char *s = src; |
149 | 881 | size_t n = siz; |
150 | 881 | size_t dlen; |
151 | | |
152 | | /* Find the end of dst and adjust bytes left but don't go past end */ |
153 | 15.6k | while (n-- != 0 && *d != '\0') |
154 | 14.7k | d++; |
155 | 881 | dlen = d - dst; |
156 | 881 | n = siz - dlen; |
157 | | |
158 | 881 | if (n == 0) |
159 | 0 | return (dlen + strlen(s)); |
160 | 3.52k | while (*s != '\0') { |
161 | 2.64k | if (n != 1) { |
162 | 2.63k | *d++ = *s; |
163 | 2.63k | n--; |
164 | 2.63k | } |
165 | 2.64k | s++; |
166 | 2.64k | } |
167 | 881 | *d = '\0'; |
168 | | |
169 | 881 | return (dlen + (s - src)); /* count does not include NUL */ |
170 | 881 | } |
171 | | #endif |
172 | | |
173 | | #ifndef HAVE_STRLCPY |
174 | | /* |
175 | | * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com> |
176 | | * All rights reserved. |
177 | | * |
178 | | * Redistribution and use in source and binary forms, with or without |
179 | | * modification, are permitted provided that the following conditions |
180 | | * are met: |
181 | | * 1. Redistributions of source code must retain the above copyright |
182 | | * notice, this list of conditions and the following disclaimer. |
183 | | * 2. Redistributions in binary form must reproduce the above copyright |
184 | | * notice, this list of conditions and the following disclaimer in the |
185 | | * documentation and/or other materials provided with the distribution. |
186 | | * 3. The name of the author may not be used to endorse or promote products |
187 | | * derived from this software without specific prior written permission. |
188 | | * |
189 | | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, |
190 | | * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY |
191 | | * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL |
192 | | * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
193 | | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
194 | | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; |
195 | | * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, |
196 | | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR |
197 | | * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
198 | | * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
199 | | */ |
200 | | |
201 | | /* |
202 | | * Copy src to string dst of size siz. At most siz-1 characters |
203 | | * will be copied. Always NUL terminates (unless siz == 0). |
204 | | * Returns strlen(src); if retval >= siz, truncation occurred. |
205 | | */ |
206 | 68.7k | size_t strlcpy(char *dst, const char *src, size_t siz) { |
207 | 68.7k | char *d = dst; |
208 | 68.7k | const char *s = src; |
209 | 68.7k | size_t n = siz; |
210 | | |
211 | | /* Copy as many bytes as will fit */ |
212 | 68.7k | if (n != 0 && --n != 0) { |
213 | 5.42M | do { |
214 | 5.42M | if ((*d++ = *s++) == 0) |
215 | 68.0k | break; |
216 | 5.42M | } while (--n != 0); |
217 | 68.7k | } |
218 | | |
219 | | /* Not enough room in dst, add NUL and traverse rest of src */ |
220 | 68.7k | if (n == 0) { |
221 | 668 | if (siz != 0) |
222 | 668 | *d = '\0'; /* NUL-terminate dst */ |
223 | 1.50k | while (*s++) |
224 | 835 | ; |
225 | 668 | } |
226 | | |
227 | 68.7k | return (s - src - 1); /* count does not include NUL */ |
228 | 68.7k | } |
229 | | #endif |
230 | | |
231 | | #ifndef HAVE_STRCASESTR |
232 | | /*- |
233 | | * Copyright (c) 1990, 1993 |
234 | | * The Regents of the University of California. All rights reserved. |
235 | | * |
236 | | * This code is derived from software contributed to Berkeley by |
237 | | * Chris Torek. |
238 | | * |
239 | | * Redistribution and use in source and binary forms, with or without |
240 | | * modification, are permitted provided that the following conditions |
241 | | * are met: |
242 | | * 1. Redistributions of source code must retain the above copyright |
243 | | * notice, this list of conditions and the following disclaimer. |
244 | | * 2. Redistributions in binary form must reproduce the above copyright |
245 | | * notice, this list of conditions and the following disclaimer in the |
246 | | * documentation and/or other materials provided with the distribution. |
247 | | * 3. Neither the name of the University nor the names of its contributors |
248 | | * may be used to endorse or promote products derived from this software |
249 | | * without specific prior written permission. |
250 | | * |
251 | | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
252 | | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
253 | | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
254 | | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
255 | | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
256 | | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
257 | | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
258 | | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
259 | | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
260 | | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
261 | | * SUCH DAMAGE. |
262 | | */ |
263 | | char *strcasestr(const char *s, const char *find) { |
264 | | char c, sc; |
265 | | size_t len; |
266 | | |
267 | | if ((c = *find++) != 0) { |
268 | | c = tolower((unsigned char)c); |
269 | | len = strlen(find); |
270 | | do { |
271 | | do { |
272 | | if ((sc = *s++) == 0) |
273 | | return (NULL); |
274 | | } while ((char)tolower((unsigned char)sc) != c); |
275 | | } while (strncasecmp(s, find, len) != 0); |
276 | | s--; |
277 | | } |
278 | | return ((char *)s); |
279 | | } |
280 | | #endif |
281 | | |
282 | | #ifndef HAVE_STRNCASECMP |
283 | | int strncasecmp(const char *s1, const char *s2, size_t len) { |
284 | | const char *cp1, *cp2; |
285 | | |
286 | | cp1 = s1; |
287 | | cp2 = s2; |
288 | | |
289 | | if (len == 0) |
290 | | return (0); |
291 | | |
292 | | if (!*cp1) |
293 | | return -1; |
294 | | else if (!*cp2) |
295 | | return 1; |
296 | | |
297 | | while (*cp1 && *cp2 && len) { |
298 | | int cmp; |
299 | | if ((cmp = (toupper(*cp1) - toupper(*cp2))) != 0) |
300 | | return (cmp); |
301 | | cp1++; |
302 | | cp2++; |
303 | | len--; |
304 | | } |
305 | | |
306 | | if (len == 0) { |
307 | | return (0); |
308 | | } |
309 | | if (*cp1 || *cp2) { |
310 | | if (*cp1) |
311 | | return (1); |
312 | | else |
313 | | return (-1); |
314 | | } |
315 | | return (0); |
316 | | } |
317 | | #endif |
318 | | |
319 | | #ifndef HAVE_STRCASECMP |
320 | | int strcasecmp(const char *s1, const char *s2) { |
321 | | const char *cp1, *cp2; |
322 | | |
323 | | cp1 = s1; |
324 | | cp2 = s2; |
325 | | if ((!cp1) || (!cp2)) { |
326 | | return (0); |
327 | | } |
328 | | while (*cp1 && *cp2) { |
329 | | int cmp; |
330 | | if ((cmp = (toupper(*cp1) - toupper(*cp2))) != 0) |
331 | | return (cmp); |
332 | | cp1++; |
333 | | cp2++; |
334 | | } |
335 | | if (*cp1 || *cp2) { |
336 | | if (*cp1) |
337 | | return (1); |
338 | | else |
339 | | return (-1); |
340 | | } |
341 | | |
342 | | return (0); |
343 | | } |
344 | | #endif |
345 | | |
346 | | /* |
347 | | ** msStringToInt() and msStringToDouble() are helper functions to convert |
348 | | *strings to numbers. They |
349 | | ** return MS_FAILURE if the input string is NULL or if the entire string did not |
350 | | *convert successfully. |
351 | | */ |
352 | 0 | int msStringToInt(const char *str, int *value, int base) { |
353 | 0 | char *parse_check = NULL; |
354 | |
|
355 | 0 | if (!str) |
356 | 0 | return MS_FAILURE; |
357 | | |
358 | 0 | *value = (int)strtol(str, &parse_check, base); |
359 | 0 | if (*parse_check != '\0') |
360 | 0 | return MS_FAILURE; |
361 | | |
362 | 0 | return MS_SUCCESS; |
363 | 0 | } |
364 | | |
365 | 0 | int msStringToDouble(const char *str, double *value) { |
366 | 0 | char *parse_check = NULL; |
367 | |
|
368 | 0 | if (!str) |
369 | 0 | return MS_FAILURE; |
370 | | |
371 | 0 | *value = strtod(str, &parse_check); |
372 | 0 | if (*parse_check != '\0') |
373 | 0 | return MS_FAILURE; |
374 | | |
375 | 0 | return MS_SUCCESS; |
376 | 0 | } |
377 | | |
378 | 0 | char *msLongToString(long value) { |
379 | 0 | size_t bufferSize = 256; |
380 | 0 | char *buffer = (char *)msSmallMalloc(bufferSize); |
381 | |
|
382 | 0 | snprintf(buffer, bufferSize, "%ld", value); |
383 | 0 | return (buffer); |
384 | 0 | } |
385 | | |
386 | 0 | char *msDoubleToString(double value, int force_f) { |
387 | 0 | size_t bufferSize = 256; |
388 | 0 | char *buffer = (char *)msSmallMalloc(bufferSize); |
389 | |
|
390 | 0 | if (force_f == MS_TRUE) |
391 | 0 | snprintf(buffer, bufferSize, "%f", value); |
392 | 0 | else |
393 | 0 | snprintf(buffer, bufferSize, "%g", value); |
394 | 0 | return (buffer); |
395 | 0 | } |
396 | | |
397 | 0 | char *msIntToString(int value) { |
398 | 0 | size_t bufferSize = 256; |
399 | 0 | char *buffer = (char *)msSmallMalloc(bufferSize); |
400 | |
|
401 | 0 | snprintf(buffer, bufferSize, "%i", value); |
402 | 0 | return (buffer); |
403 | 0 | } |
404 | | |
405 | 0 | void msStringToUpper(char *string) { |
406 | 0 | int i; |
407 | |
|
408 | 0 | if (string != NULL) { |
409 | 0 | for (i = 0; string[i]; i++) { |
410 | 0 | string[i] = toupper(string[i]); |
411 | 0 | } |
412 | 0 | return; |
413 | 0 | } |
414 | 0 | } |
415 | | |
416 | 0 | void msStringToLower(char *string) { |
417 | 0 | int i; |
418 | |
|
419 | 0 | if (string != NULL) { |
420 | 0 | for (i = 0; string[i]; i++) { |
421 | 0 | string[i] = tolower(string[i]); |
422 | 0 | } |
423 | 0 | return; |
424 | 0 | } |
425 | 0 | } |
426 | | |
427 | 0 | std::string msStringToLower(const std::string &s) { |
428 | 0 | std::string ret(s); |
429 | 0 | std::transform(ret.begin(), ret.end(), ret.begin(), |
430 | 0 | [](unsigned char c) { return std::tolower(c); }); |
431 | 0 | return ret; |
432 | 0 | } |
433 | | |
434 | | /** |
435 | | * Force the first character to uppercase and the rest of the characters to |
436 | | * lower case for EACH word in the string. |
437 | | */ |
438 | 0 | void msStringInitCap(char *string) { |
439 | 0 | int i; |
440 | 0 | int start = 1; |
441 | 0 | if (string != NULL) { |
442 | 0 | for (i = 0; i < (int)strlen(string); i++) { |
443 | 0 | if (string[i] == ' ') |
444 | 0 | start = 1; |
445 | 0 | else if (start) { |
446 | 0 | string[i] = toupper(string[i]); |
447 | 0 | start = 0; |
448 | 0 | } else { |
449 | 0 | string[i] = tolower(string[i]); |
450 | 0 | } |
451 | 0 | } |
452 | 0 | } |
453 | 0 | } |
454 | | |
455 | | /** |
456 | | * Force the first character to uppercase for the FIRST word in the string |
457 | | * and the rest of the characters to lower case. |
458 | | */ |
459 | 0 | void msStringFirstCap(char *string) { |
460 | 0 | int i; |
461 | 0 | int start = 1; |
462 | 0 | if (string != NULL) { |
463 | 0 | for (i = 0; i < (int)strlen(string); i++) { |
464 | 0 | if (string[i] != ' ') { |
465 | 0 | if (start) { |
466 | 0 | string[i] = toupper(string[i]); |
467 | 0 | start = 0; |
468 | 0 | } else |
469 | 0 | string[i] = tolower(string[i]); |
470 | 0 | } |
471 | 0 | } |
472 | 0 | } |
473 | 0 | } |
474 | | |
475 | 23.3k | char *msStringChop(char *string) { |
476 | 23.3k | int n; |
477 | | |
478 | 23.3k | n = strlen(string); |
479 | 23.3k | if (n > 0) |
480 | 23.3k | string[n - 1] = '\0'; |
481 | | |
482 | 23.3k | return (string); |
483 | 23.3k | } |
484 | | |
485 | | /* |
486 | | ** Trim leading and trailing white space. |
487 | | */ |
488 | 0 | void msStringTrim(char *str) { |
489 | 0 | int i; |
490 | | |
491 | | /* Send nulls home without supper. */ |
492 | 0 | if (!str) |
493 | 0 | return; |
494 | | |
495 | | /* Move non-white string to the front. */ |
496 | 0 | i = strspn(str, " "); |
497 | 0 | if (i) { |
498 | 0 | memmove(str, str + i, strlen(str) - i + 1); |
499 | 0 | } |
500 | | /* Nothing left? Exit. */ |
501 | 0 | if (strlen(str) == 0) { |
502 | 0 | return; |
503 | 0 | } |
504 | | /* Null-terminate end of non-white string. */ |
505 | 0 | for (i = strlen(str) - 1; i >= 0; i--) { /* step backwards from end */ |
506 | 0 | if (str[i] != ' ') { |
507 | 0 | str[i + 1] = '\0'; |
508 | 0 | return; |
509 | 0 | } |
510 | 0 | } |
511 | 0 | return; |
512 | 0 | } |
513 | | |
514 | 0 | void msStringTrim(std::string &string) { |
515 | 0 | const size_t npos = string.find_first_not_of(' '); |
516 | 0 | if (npos != std::string::npos) |
517 | 0 | string.erase(0, npos); |
518 | 0 | msStringTrimBlanks(string); |
519 | 0 | } |
520 | | |
521 | | /* |
522 | | ** Remove leading white spaces and shift everything to the left. |
523 | | */ |
524 | 0 | char *msStringTrimLeft(char *string) { |
525 | 0 | char *read, *write; |
526 | 0 | int i, length; |
527 | |
|
528 | 0 | if (string && strlen(string) > 0) { |
529 | 0 | length = strlen(string); |
530 | 0 | read = string; |
531 | 0 | write = string; |
532 | |
|
533 | 0 | for (i = 0; i < length; i++) { |
534 | 0 | if (isspace(string[i])) |
535 | 0 | read++; |
536 | 0 | else |
537 | 0 | break; |
538 | 0 | } |
539 | |
|
540 | 0 | if (read > write) { |
541 | 0 | while (*read) { |
542 | 0 | *write = *read; |
543 | 0 | read++; |
544 | 0 | write++; |
545 | 0 | } |
546 | 0 | *write = '\0'; |
547 | 0 | } |
548 | 0 | } |
549 | 0 | return string; |
550 | 0 | } |
551 | | |
552 | 0 | void msStringTrimLeft(std::string &string) { |
553 | 0 | const size_t length = string.length(); |
554 | 0 | for (size_t i = 0; i < length; i++) { |
555 | 0 | if (!isspace(string[i])) { |
556 | 0 | if (i > 0) { |
557 | 0 | string.erase(0, i - 1); |
558 | 0 | } |
559 | 0 | return; |
560 | 0 | } |
561 | 0 | } |
562 | 0 | string.clear(); |
563 | 0 | } |
564 | | |
565 | | /* ------------------------------------------------------------------------------- |
566 | | */ |
567 | | /* Trims trailing blanks from a string */ |
568 | | /* ------------------------------------------------------------------------------- |
569 | | */ |
570 | 0 | void msStringTrimBlanks(char *string) { |
571 | 0 | int i, n; |
572 | |
|
573 | 0 | n = strlen(string); |
574 | 0 | for (i = n - 1; i >= 0; i--) { /* step backwards through the string */ |
575 | 0 | if (string[i] != ' ') { |
576 | 0 | string[i + 1] = '\0'; |
577 | 0 | return; |
578 | 0 | } |
579 | 0 | } |
580 | 0 | } |
581 | | |
582 | 0 | void msStringTrimBlanks(std::string &string) { |
583 | 0 | const size_t npos = string.find_last_not_of(' '); |
584 | 0 | if (npos != std::string::npos) |
585 | 0 | string.resize(npos + 1); |
586 | 0 | } |
587 | | |
588 | | /* ------------------------------------------------------------------------------- |
589 | | */ |
590 | | /* Trims end-of-line marker from a string */ |
591 | | /* Useful in conjunction with fgets() calls */ |
592 | | /* ------------------------------------------------------------------------------- |
593 | | */ |
594 | 0 | void msStringTrimEOL(char *string) { |
595 | 0 | int i; |
596 | |
|
597 | 0 | for (i = 0; string[i] != '\0'; i++) { |
598 | 0 | if (string[i] == '\n') { |
599 | 0 | string[i] = '\0'; /* Terminate the string at the newline */ |
600 | 0 | return; |
601 | 0 | } |
602 | 0 | } |
603 | 0 | } |
604 | | |
605 | | /* ------------------------------------------------------------------------------- |
606 | | */ |
607 | | /* Replace all occurrences of old with new in str. */ |
608 | | /* It is assumed that str was dynamically created using malloc. */ |
609 | | /* ------------------------------------------------------------------------------- |
610 | | */ |
611 | 0 | char *msReplaceSubstring(char *str, const char *old, const char *newstr) { |
612 | 0 | size_t str_len, old_len, new_len, tmp_offset; |
613 | 0 | char *tmp_ptr; |
614 | |
|
615 | 0 | if (newstr == NULL) |
616 | 0 | newstr = ""; |
617 | | |
618 | | /* |
619 | | ** If old is not found then leave str alone |
620 | | */ |
621 | 0 | if ((tmp_ptr = strstr(str, old)) == NULL) |
622 | 0 | return (str); |
623 | | |
624 | | /* |
625 | | ** Grab some info about incoming strings |
626 | | */ |
627 | 0 | str_len = strlen(str); |
628 | 0 | old_len = strlen(old); |
629 | 0 | new_len = strlen(newstr); |
630 | | |
631 | | /* |
632 | | ** Now loop until old is NOT found in new |
633 | | */ |
634 | 0 | while (tmp_ptr != NULL) { |
635 | | |
636 | | /* |
637 | | ** re-allocate memory for buf assuming 1 replacement of old with new |
638 | | ** don't bother reallocating if old is larger than new) |
639 | | */ |
640 | 0 | if (old_len < new_len) { |
641 | 0 | tmp_offset = tmp_ptr - str; |
642 | 0 | str_len = str_len - old_len + new_len; |
643 | 0 | str = (char *)msSmallRealloc( |
644 | 0 | str, (str_len + 1)); /* make new space for a copy */ |
645 | 0 | tmp_ptr = str + tmp_offset; |
646 | 0 | } |
647 | | |
648 | | /* |
649 | | ** Move the trailing part of str to make some room unless old_len == new_len |
650 | | */ |
651 | 0 | if (old_len != new_len) { |
652 | 0 | memmove(tmp_ptr + new_len, tmp_ptr + old_len, |
653 | 0 | strlen(tmp_ptr) - old_len + 1); |
654 | 0 | } |
655 | | |
656 | | /* |
657 | | ** Now copy new over old |
658 | | */ |
659 | 0 | memcpy(tmp_ptr, newstr, new_len); |
660 | | |
661 | | /* |
662 | | ** And look for more matches in the rest of the string |
663 | | */ |
664 | 0 | tmp_ptr = strstr(tmp_ptr + new_len, old); |
665 | 0 | } |
666 | |
|
667 | 0 | return (str); |
668 | 0 | } |
669 | | |
670 | | /* |
671 | | * same goal as msReplaceSubstring, but for the known case |
672 | | * when we won't have to do reallocs etc |
673 | | * used to replace the wrap characetr by a newline for labels |
674 | | */ |
675 | 0 | void msReplaceChar(char *str, char old, char newstr) { |
676 | 0 | while (*(str++)) |
677 | 0 | if (*str == old) |
678 | 0 | *str = newstr; |
679 | 0 | } |
680 | | |
681 | | /* |
682 | | ** how many times does ch occur in str |
683 | | */ |
684 | 0 | int msCountChars(char *str, char ch) { |
685 | 0 | int i, l, n = 0; |
686 | |
|
687 | 0 | l = strlen(str); |
688 | 0 | for (i = 0; i < l; i++) |
689 | 0 | if (str[i] == ch) |
690 | 0 | n++; |
691 | |
|
692 | 0 | return (n); |
693 | 0 | } |
694 | | |
695 | | /* ------------------------------------------------------------------------------- |
696 | | */ |
697 | | /* Strip filename from a full path */ |
698 | | /* ------------------------------------------------------------------------------- |
699 | | */ |
700 | 0 | char *msStripPath(char *fn) { |
701 | 0 | char *pSlash; |
702 | 0 | char *pBackslash; |
703 | | |
704 | | /* try to locate both, the last slash or backslash */ |
705 | 0 | pSlash = strrchr(fn, '/'); |
706 | 0 | pBackslash = strrchr(fn, '\\'); |
707 | |
|
708 | 0 | if (pSlash != NULL && pBackslash != NULL) { |
709 | 0 | if (pSlash < pBackslash) |
710 | 0 | return ++pBackslash; |
711 | 0 | else |
712 | 0 | return ++pSlash; |
713 | 0 | } else if (pSlash != NULL) |
714 | 0 | return ++pSlash; /* skip past the "slash" */ |
715 | 0 | else if (pBackslash != NULL) |
716 | 0 | return ++pBackslash; /* skip past the "backslash" */ |
717 | 0 | else |
718 | 0 | return (fn); |
719 | 0 | } |
720 | | |
721 | | /* |
722 | | ** Returns the *path* portion of the filename fn. Memory is allocated using |
723 | | *malloc. |
724 | | */ |
725 | 20.5k | char *msGetPath(const char *fn) { |
726 | 20.5k | char *str; |
727 | 20.5k | int i, length; |
728 | | |
729 | 20.5k | length = strlen(fn); |
730 | 20.5k | if ((str = msStrdup(fn)) == NULL) |
731 | 0 | return (NULL); |
732 | | |
733 | 287k | for (i = length - 1; i >= 0; i--) { /* step backwards through the string */ |
734 | 287k | if ((str[i] == '/') || (str[i] == '\\')) { |
735 | 20.5k | str[i + 1] = '\0'; |
736 | 20.5k | break; |
737 | 20.5k | } |
738 | 287k | } |
739 | | |
740 | 20.5k | if (strcmp(str, fn) == 0) { |
741 | 323 | msFree(str); |
742 | | #if defined(_WIN32) && !defined(__CYGWIN__) |
743 | | str = msStrdup(".\\"); |
744 | | #else |
745 | 323 | str = msStrdup("./"); |
746 | 323 | #endif |
747 | 323 | } |
748 | | |
749 | 20.5k | return (str); |
750 | 20.5k | } |
751 | | |
752 | | /* |
753 | | ** Returns a *path* built from abs_path and path. |
754 | | ** The pszReturnPath must be declared by the caller function as an array |
755 | | ** of MS_MAXPATHLEN char |
756 | | */ |
757 | 24.5k | char *msBuildPath(char *pszReturnPath, const char *abs_path, const char *path) { |
758 | 24.5k | int abslen = 0; |
759 | 24.5k | int pathlen = 0; |
760 | | |
761 | 24.5k | if (path == NULL) { |
762 | 0 | msSetError(MS_IOERR, NULL, "msBuildPath"); |
763 | 0 | return NULL; |
764 | 0 | } |
765 | | |
766 | 24.5k | pathlen = strlen(path); |
767 | 24.5k | if (abs_path) |
768 | 22.3k | abslen = strlen(abs_path); |
769 | | |
770 | 24.5k | if ((pathlen + abslen + 2) > MS_MAXPATHLEN) { |
771 | 20 | msSetError(MS_IOERR, "Path is too long. Check server logs.", |
772 | 20 | "msBuildPath()"); |
773 | 20 | msDebug("msBuildPath(): (%s%s): path is too long.\n", abs_path, path); |
774 | 20 | return NULL; |
775 | 20 | } |
776 | | |
777 | | /* Check if path is absolute */ |
778 | 24.5k | if ((abs_path == NULL) || (abslen == 0) || (path[0] == '\\') || |
779 | 22.2k | (path[0] == '/') || (pathlen > 1 && (path[1] == ':'))) { |
780 | 3.72k | strlcpy(pszReturnPath, path, MS_MAXPATHLEN); |
781 | 3.72k | return (pszReturnPath); |
782 | 3.72k | } |
783 | | |
784 | | /* else return abs_path/path */ |
785 | 20.8k | if ((abs_path[abslen - 1] == '/') || (abs_path[abslen - 1] == '\\')) { |
786 | 1.47k | snprintf(pszReturnPath, MS_MAXPATHLEN, "%s%s", abs_path, path); |
787 | 19.3k | } else { |
788 | 19.3k | snprintf(pszReturnPath, MS_MAXPATHLEN, "%s/%s", abs_path, path); |
789 | 19.3k | } |
790 | | |
791 | 20.8k | return (pszReturnPath); |
792 | 24.5k | } |
793 | | |
794 | | /* |
795 | | ** Returns a *path* built from abs_path, path1 and path2. |
796 | | ** abs_path/path1/path2 |
797 | | ** The pszReturnPath must be declared by the caller function as an array |
798 | | ** of MS_MAXPATHLEN char |
799 | | */ |
800 | | char *msBuildPath3(char *pszReturnPath, const char *abs_path, const char *path1, |
801 | 0 | const char *path2) { |
802 | 0 | char szPath[MS_MAXPATHLEN]; |
803 | |
|
804 | 0 | return msBuildPath(pszReturnPath, abs_path, |
805 | 0 | msBuildPath(szPath, path1, path2)); |
806 | 0 | } |
807 | | |
808 | | /* |
809 | | ** Similar to msBuildPath(), but the input path is only qualified by the |
810 | | ** absolute path if this will result in it pointing to a readable file. |
811 | | ** |
812 | | ** Returns NULL if the resulting path doesn't point to a readable file. |
813 | | */ |
814 | | |
815 | | char *msTryBuildPath(char *szReturnPath, const char *abs_path, const char *path) |
816 | | |
817 | 0 | { |
818 | 0 | VSILFILE *fp; |
819 | |
|
820 | 0 | if (msBuildPath(szReturnPath, abs_path, path) == NULL) |
821 | 0 | return NULL; |
822 | | |
823 | 0 | fp = VSIFOpenL(szReturnPath, "r"); |
824 | 0 | if (fp == NULL) { |
825 | 0 | strlcpy(szReturnPath, path, MS_MAXPATHLEN); |
826 | 0 | return NULL; |
827 | 0 | } else |
828 | 0 | VSIFCloseL(fp); |
829 | | |
830 | 0 | return szReturnPath; |
831 | 0 | } |
832 | | |
833 | | /* |
834 | | ** Similar to msBuildPath3(), but the input path is only qualified by the |
835 | | ** absolute path if this will result in it pointing to a readable file. |
836 | | ** |
837 | | ** Returns NULL if the resulting path doesn't point to a readable file. |
838 | | */ |
839 | | |
840 | | char *msTryBuildPath3(char *szReturnPath, const char *abs_path, |
841 | | const char *path1, const char *path2) |
842 | | |
843 | 0 | { |
844 | 0 | VSILFILE *fp; |
845 | |
|
846 | 0 | if (msBuildPath3(szReturnPath, abs_path, path1, path2) == NULL) |
847 | 0 | return NULL; |
848 | | |
849 | 0 | fp = VSIFOpenL(szReturnPath, "r"); |
850 | 0 | if (fp == NULL) { |
851 | 0 | strlcpy(szReturnPath, path2, MS_MAXPATHLEN); |
852 | 0 | return NULL; |
853 | 0 | } else |
854 | 0 | VSIFCloseL(fp); |
855 | | |
856 | 0 | return szReturnPath; |
857 | 0 | } |
858 | | |
859 | | /* |
860 | | ** Splits a string into multiple strings based on ch. Consecutive ch's are |
861 | | *ignored. |
862 | | */ |
863 | 11.6k | char **msStringSplit(const char *string, char ch, int *num_tokens) { |
864 | 11.6k | int i, j, k; |
865 | 11.6k | int length, n; |
866 | 11.6k | char **token; |
867 | 11.6k | char last_ch = '\0'; |
868 | | |
869 | 11.6k | n = 1; /* always at least 1 token, the string itself */ |
870 | 11.6k | length = strlen(string); |
871 | 4.98M | for (i = 0; i < length; i++) { |
872 | 4.96M | if (string[i] == ch && last_ch != ch) |
873 | 42.7k | n++; |
874 | 4.96M | last_ch = string[i]; |
875 | 4.96M | } |
876 | | |
877 | 11.6k | token = (char **)msSmallMalloc(sizeof(char *) * n); |
878 | | |
879 | 11.6k | k = 0; |
880 | 11.6k | token[k] = (char *)msSmallMalloc(sizeof(char) * (length + 1)); |
881 | | |
882 | 11.6k | j = 0; |
883 | 11.6k | last_ch = '\0'; |
884 | 4.98M | for (i = 0; i < length; i++) { |
885 | 4.96M | if (string[i] == ch) { |
886 | | |
887 | 46.3k | if (last_ch == ch) |
888 | 3.63k | continue; |
889 | | |
890 | 42.7k | token[k][j] = '\0'; /* terminate current token */ |
891 | | |
892 | 42.7k | k++; |
893 | 42.7k | token[k] = (char *)msSmallMalloc(sizeof(char) * (length + 1)); |
894 | | |
895 | 42.7k | j = 0; |
896 | 4.92M | } else { |
897 | 4.92M | token[k][j] = string[i]; |
898 | 4.92M | j++; |
899 | 4.92M | } |
900 | | |
901 | 4.96M | last_ch = string[i]; |
902 | 4.96M | } |
903 | | |
904 | 11.6k | token[k][j] = '\0'; /* terminate last token */ |
905 | | |
906 | 11.6k | *num_tokens = n; |
907 | | |
908 | 11.6k | return (token); |
909 | 11.6k | } |
910 | | |
911 | 0 | std::vector<std::string> msStringSplit(const char *string, char ch) { |
912 | 0 | int num_tokens = 0; |
913 | 0 | char **tmp = msStringSplit(string, ch, &num_tokens); |
914 | 0 | std::vector<std::string> res; |
915 | 0 | res.reserve(num_tokens); |
916 | 0 | for (int i = 0; i < num_tokens; i++) |
917 | 0 | res.push_back(tmp[i]); |
918 | 0 | msFreeCharArray(tmp, num_tokens); |
919 | 0 | return res; |
920 | 0 | } |
921 | | |
922 | | /* |
923 | | This function is a copy of CSLTokenizeString2() function of the CPL component. |
924 | | See the port/cpl_string.cpp file in gdal source for the complete documentation. |
925 | | Available Flags: |
926 | | * - MS_ALLOWEMPTYTOKENS: allow the return of empty tokens when two |
927 | | * delimiters in a row occur with no other text between them. If not set, |
928 | | * empty tokens will be discarded; |
929 | | * - MS_STRIPLEADSPACES: strip leading space characters from the token (as |
930 | | * reported by isspace()); |
931 | | * - MS_STRIPENDSPACES: strip ending space characters from the token (as |
932 | | * reported by isspace()); |
933 | | * - MS_HONOURSTRINGS: double quotes can be used to hold values that should |
934 | | * not be broken into multiple tokens; |
935 | | * - MS_PRESERVEQUOTES: string quotes are carried into the tokens when this |
936 | | * is set, otherwise they are removed; |
937 | | * - MS_PRESERVEESCAPES: if set backslash escapes (for backslash itself, |
938 | | * and for literal double quotes) will be preserved in the tokens, otherwise |
939 | | * the backslashes will be removed in processing. |
940 | | */ |
941 | | char **msStringSplitComplex(const char *pszString, const char *pszDelimiters, |
942 | | int *num_tokens, int nFlags) |
943 | | |
944 | 2.01k | { |
945 | 2.01k | char **papszRetList = NULL; |
946 | 2.01k | int nRetMax = 0, nRetLen = 0; |
947 | 2.01k | char *pszToken; |
948 | 2.01k | int nTokenMax, nTokenLen; |
949 | 2.01k | int bHonourStrings = (nFlags & MS_HONOURSTRINGS); |
950 | 2.01k | int bAllowEmptyTokens = (nFlags & MS_ALLOWEMPTYTOKENS); |
951 | 2.01k | int bStripLeadSpaces = (nFlags & MS_STRIPLEADSPACES); |
952 | 2.01k | int bStripEndSpaces = (nFlags & MS_STRIPENDSPACES); |
953 | | |
954 | 2.01k | pszToken = (char *)msSmallMalloc(sizeof(char) * 10); |
955 | 2.01k | ; |
956 | 2.01k | nTokenMax = 10; |
957 | | |
958 | 10.2k | while (pszString != NULL && *pszString != '\0') { |
959 | 8.28k | int bInString = MS_FALSE; |
960 | 8.28k | int bStartString = MS_TRUE; |
961 | | |
962 | 8.28k | nTokenLen = 0; |
963 | | |
964 | | /* Try to find the next delimiter, marking end of token */ |
965 | 24.0k | for (; *pszString != '\0'; pszString++) { |
966 | | |
967 | | /* End if this is a delimiter skip it and break. */ |
968 | 23.0k | if (!bInString && strchr(pszDelimiters, *pszString) != NULL) { |
969 | 7.31k | pszString++; |
970 | 7.31k | break; |
971 | 7.31k | } |
972 | | |
973 | | /* If this is a quote, and we are honouring constant |
974 | | strings, then process the constant strings, with out delim |
975 | | but don't copy over the quotes */ |
976 | 15.7k | if (bHonourStrings && *pszString == '"') { |
977 | 0 | if (nFlags & MS_PRESERVEQUOTES) { |
978 | 0 | pszToken[nTokenLen] = *pszString; |
979 | 0 | nTokenLen++; |
980 | 0 | } |
981 | |
|
982 | 0 | if (bInString) { |
983 | 0 | bInString = MS_FALSE; |
984 | 0 | continue; |
985 | 0 | } else { |
986 | 0 | bInString = MS_TRUE; |
987 | 0 | continue; |
988 | 0 | } |
989 | 0 | } |
990 | | |
991 | | /* |
992 | | * Within string constants we allow for escaped quotes, but in |
993 | | * processing them we will unescape the quotes and \\ sequence |
994 | | * reduces to \ |
995 | | */ |
996 | 15.7k | if (bInString && pszString[0] == '\\') { |
997 | 0 | if (pszString[1] == '"' || pszString[1] == '\\') { |
998 | 0 | if (nFlags & MS_PRESERVEESCAPES) { |
999 | 0 | pszToken[nTokenLen] = *pszString; |
1000 | 0 | nTokenLen++; |
1001 | 0 | } |
1002 | |
|
1003 | 0 | pszString++; |
1004 | 0 | } |
1005 | 0 | } |
1006 | | |
1007 | | /* |
1008 | | * Strip spaces at the token start if requested. |
1009 | | */ |
1010 | 15.7k | if (!bInString && bStripLeadSpaces && bStartString && |
1011 | 0 | isspace((unsigned char)*pszString)) |
1012 | 0 | continue; |
1013 | | |
1014 | 15.7k | bStartString = MS_FALSE; |
1015 | | |
1016 | | /* |
1017 | | * Extend token buffer if we are running close to its end. |
1018 | | */ |
1019 | 15.7k | if (nTokenLen >= nTokenMax - 3) { |
1020 | 467 | nTokenMax = nTokenMax * 2 + 10; |
1021 | 467 | pszToken = (char *)msSmallRealloc(pszToken, sizeof(char) * nTokenMax); |
1022 | 467 | } |
1023 | | |
1024 | 15.7k | pszToken[nTokenLen] = *pszString; |
1025 | 15.7k | nTokenLen++; |
1026 | 15.7k | } |
1027 | | |
1028 | | /* |
1029 | | * Strip spaces at the token end if requested. |
1030 | | */ |
1031 | 8.28k | if (!bInString && bStripEndSpaces) { |
1032 | 0 | while (nTokenLen && isspace((unsigned char)pszToken[nTokenLen - 1])) |
1033 | 0 | nTokenLen--; |
1034 | 0 | } |
1035 | | |
1036 | 8.28k | pszToken[nTokenLen] = '\0'; |
1037 | | |
1038 | | /* |
1039 | | * Add the token. |
1040 | | */ |
1041 | 8.28k | if (pszToken[0] != '\0' || bAllowEmptyTokens) { |
1042 | 8.28k | if (nRetLen >= nRetMax - 1) { |
1043 | 1.54k | nRetMax = nRetMax * 2 + 10; |
1044 | 1.54k | papszRetList = |
1045 | 1.54k | (char **)msSmallRealloc(papszRetList, sizeof(char *) * nRetMax); |
1046 | 1.54k | } |
1047 | | |
1048 | 8.28k | papszRetList[nRetLen++] = msStrdup(pszToken); |
1049 | 8.28k | papszRetList[nRetLen] = NULL; |
1050 | 8.28k | } |
1051 | 8.28k | } |
1052 | | |
1053 | | /* |
1054 | | * If the last token was empty, then we need to capture |
1055 | | * it now, as the loop would skip it. |
1056 | | */ |
1057 | 2.01k | if (pszString != NULL && *pszString == '\0' && bAllowEmptyTokens && |
1058 | 2.01k | nRetLen > 0 && strchr(pszDelimiters, *(pszString - 1)) != NULL) { |
1059 | 263 | if (nRetLen >= nRetMax - 1) { |
1060 | 121 | nRetMax = nRetMax * 2 + 10; |
1061 | 121 | papszRetList = |
1062 | 121 | (char **)msSmallRealloc(papszRetList, sizeof(char *) * nRetMax); |
1063 | 121 | } |
1064 | | |
1065 | 263 | papszRetList[nRetLen++] = msStrdup(""); |
1066 | 263 | papszRetList[nRetLen] = NULL; |
1067 | 263 | } |
1068 | | |
1069 | 2.01k | if (papszRetList == NULL) |
1070 | 772 | papszRetList = (char **)msSmallMalloc(sizeof(char *) * 1); |
1071 | | |
1072 | 2.01k | *num_tokens = nRetLen; |
1073 | 2.01k | free(pszToken); |
1074 | | |
1075 | 2.01k | return papszRetList; |
1076 | 2.01k | } |
1077 | | |
1078 | | /* This method is similar to msStringSplit but support quoted strings. |
1079 | | It also support multi-characters delimiter and allows to preserve quotes */ |
1080 | | char **msStringTokenize(const char *pszLine, const char *pszDelim, |
1081 | 0 | int *num_tokens, int preserve_quote) { |
1082 | 0 | char **papszResult = NULL; |
1083 | 0 | int n = 1, iChar, nLength = strlen(pszLine), iTokenChar = 0, |
1084 | 0 | bInQuotes = MS_FALSE; |
1085 | 0 | char *pszToken = (char *)msSmallMalloc(sizeof(char) * (nLength + 1)); |
1086 | 0 | int nDelimLen = strlen(pszDelim); |
1087 | | |
1088 | | /* Compute the number of tokens */ |
1089 | 0 | for (iChar = 0; pszLine[iChar] != '\0'; iChar++) { |
1090 | 0 | if (bInQuotes && pszLine[iChar] == '"' && pszLine[iChar + 1] == '"') { |
1091 | 0 | iChar++; |
1092 | 0 | } else if (pszLine[iChar] == '"') { |
1093 | 0 | bInQuotes = !bInQuotes; |
1094 | 0 | } else if (!bInQuotes && |
1095 | 0 | strncmp(pszLine + iChar, pszDelim, nDelimLen) == 0) { |
1096 | 0 | iChar += nDelimLen - 1; |
1097 | 0 | n++; |
1098 | 0 | } |
1099 | 0 | } |
1100 | |
|
1101 | 0 | papszResult = (char **)msSmallMalloc(sizeof(char *) * n); |
1102 | 0 | n = iTokenChar = bInQuotes = 0; |
1103 | 0 | for (iChar = 0; pszLine[iChar] != '\0'; iChar++) { |
1104 | 0 | if (bInQuotes && pszLine[iChar] == '"' && pszLine[iChar + 1] == '"') { |
1105 | 0 | if (preserve_quote == MS_TRUE) |
1106 | 0 | pszToken[iTokenChar++] = '"'; |
1107 | 0 | pszToken[iTokenChar++] = '"'; |
1108 | 0 | iChar++; |
1109 | 0 | } else if (pszLine[iChar] == '"') { |
1110 | 0 | if (preserve_quote == MS_TRUE) |
1111 | 0 | pszToken[iTokenChar++] = '"'; |
1112 | 0 | bInQuotes = !bInQuotes; |
1113 | 0 | } else if (!bInQuotes && |
1114 | 0 | strncmp(pszLine + iChar, pszDelim, nDelimLen) == 0) { |
1115 | 0 | pszToken[iTokenChar++] = '\0'; |
1116 | 0 | papszResult[n] = pszToken; |
1117 | 0 | pszToken = (char *)msSmallMalloc(sizeof(char) * (nLength + 1)); |
1118 | 0 | iChar += nDelimLen - 1; |
1119 | 0 | iTokenChar = 0; |
1120 | 0 | n++; |
1121 | 0 | } else { |
1122 | 0 | pszToken[iTokenChar++] = pszLine[iChar]; |
1123 | 0 | } |
1124 | 0 | } |
1125 | |
|
1126 | 0 | pszToken[iTokenChar++] = '\0'; |
1127 | 0 | papszResult[n] = pszToken; |
1128 | |
|
1129 | 0 | *num_tokens = n + 1; |
1130 | |
|
1131 | 0 | return papszResult; |
1132 | 0 | } |
1133 | | |
1134 | | /********************************************************************** |
1135 | | * msEncodeChar() |
1136 | | * |
1137 | | * Return 1 if the character argument should be encoded for safety |
1138 | | * in URL use and 0 otherwise. Specific character map taken from |
1139 | | * http://www.ietf.org/rfc/rfc2396.txt |
1140 | | * |
1141 | | **********************************************************************/ |
1142 | | |
1143 | 0 | int msEncodeChar(const char c) { |
1144 | 0 | if ((c >= 0x61 && c <= 0x7A) || /* Letters a-z */ |
1145 | 0 | (c >= 0x41 && c <= 0x5A) || /* Letters A-Z */ |
1146 | 0 | (c >= 0x30 && c <= 0x39) || /* Numbers 0-9 */ |
1147 | 0 | (c >= 0x27 && c <= 0x2A) || /* * ' ( ) */ |
1148 | 0 | (c >= 0x2D && c <= 0x2E) || /* - . */ |
1149 | 0 | (c == 0x5F) || /* _ */ |
1150 | 0 | (c == 0x21) || /* ! */ |
1151 | 0 | (c == 0x7E)) { /* ~ */ |
1152 | 0 | return (0); |
1153 | 0 | } else { |
1154 | 0 | return (1); |
1155 | 0 | } |
1156 | 0 | } |
1157 | | |
1158 | 0 | char *msEncodeUrl(const char *data) { |
1159 | | /* |
1160 | | * Delegate to msEncodeUrlExcept, with a null second argument |
1161 | | * to render the except handling moot. |
1162 | | */ |
1163 | 0 | return (msEncodeUrlExcept(data, '\0')); |
1164 | 0 | } |
1165 | | |
1166 | | /********************************************************************** |
1167 | | * msEncodeCharExcept() |
1168 | | * |
1169 | | * URL encoding, applies RFP2396 encoding to all characters |
1170 | | * except the one exception character. An exception character |
1171 | | * of '\0' implies no exception handling. |
1172 | | * |
1173 | | **********************************************************************/ |
1174 | | |
1175 | 0 | char *msEncodeUrlExcept(const char *data, const char except) { |
1176 | 0 | static const char *hex = "0123456789ABCDEF"; |
1177 | 0 | const char *i; |
1178 | 0 | char *j, *code; |
1179 | 0 | int inc; |
1180 | 0 | unsigned char ch; |
1181 | |
|
1182 | 0 | for (inc = 0, i = data; *i != '\0'; i++) |
1183 | 0 | if (msEncodeChar(*i)) |
1184 | 0 | inc += 2; |
1185 | |
|
1186 | 0 | code = (char *)msSmallMalloc(strlen(data) + inc + 1); |
1187 | |
|
1188 | 0 | for (j = code, i = data; *i != '\0'; i++, j++) { |
1189 | 0 | if (except != '\0' && *i == except) { |
1190 | 0 | *j = except; |
1191 | 0 | } else if (msEncodeChar(*i)) { |
1192 | 0 | ch = *i; |
1193 | 0 | *j++ = '%'; |
1194 | 0 | *j++ = hex[ch / 16]; |
1195 | 0 | *j = hex[ch % 16]; |
1196 | 0 | } else |
1197 | 0 | *j = *i; |
1198 | 0 | } |
1199 | 0 | *j = '\0'; |
1200 | |
|
1201 | 0 | return code; |
1202 | 0 | } |
1203 | | |
1204 | | /************************************************************************/ |
1205 | | /* msEscapeJSonString() */ |
1206 | | /************************************************************************/ |
1207 | | |
1208 | | /* The input (and output) string are not supposed to start/end with double */ |
1209 | | /* quote characters. It is the responsibility of the caller to do that. */ |
1210 | 0 | char *msEscapeJSonString(const char *pszJSonString) { |
1211 | | /* Worst case is one character to become \uABCD so 6 characters */ |
1212 | 0 | char *pszRet; |
1213 | 0 | int i = 0, j = 0; |
1214 | 0 | static const char *pszHex = "0123456789ABCDEF"; |
1215 | |
|
1216 | 0 | pszRet = (char *)msSmallMalloc(strlen(pszJSonString) * 6 + 1); |
1217 | | /* From http://www.json.org/ */ |
1218 | 0 | for (i = 0; pszJSonString[i] != '\0'; i++) { |
1219 | 0 | unsigned char ch = pszJSonString[i]; |
1220 | 0 | if (ch == '\b') { |
1221 | 0 | pszRet[j++] = '\\'; |
1222 | 0 | pszRet[j++] = 'b'; |
1223 | 0 | } else if (ch == '\f') { |
1224 | 0 | pszRet[j++] = '\\'; |
1225 | 0 | pszRet[j++] = 'f'; |
1226 | 0 | } else if (ch == '\n') { |
1227 | 0 | pszRet[j++] = '\\'; |
1228 | 0 | pszRet[j++] = 'n'; |
1229 | 0 | } else if (ch == '\r') { |
1230 | 0 | pszRet[j++] = '\\'; |
1231 | 0 | pszRet[j++] = 'r'; |
1232 | 0 | } else if (ch == '\t') { |
1233 | 0 | pszRet[j++] = '\\'; |
1234 | 0 | pszRet[j++] = 't'; |
1235 | 0 | } else if (ch < 32) { |
1236 | 0 | pszRet[j++] = '\\'; |
1237 | 0 | pszRet[j++] = 'u'; |
1238 | 0 | pszRet[j++] = '0'; |
1239 | 0 | pszRet[j++] = '0'; |
1240 | 0 | pszRet[j++] = pszHex[ch / 16]; |
1241 | 0 | pszRet[j++] = pszHex[ch % 16]; |
1242 | 0 | } else if (ch == '"') { |
1243 | 0 | pszRet[j++] = '\\'; |
1244 | 0 | pszRet[j++] = '"'; |
1245 | 0 | } else if (ch == '\\') { |
1246 | 0 | pszRet[j++] = '\\'; |
1247 | 0 | pszRet[j++] = '\\'; |
1248 | 0 | } else { |
1249 | 0 | pszRet[j++] = ch; |
1250 | 0 | } |
1251 | 0 | } |
1252 | 0 | pszRet[j] = '\0'; |
1253 | 0 | return pszRet; |
1254 | 0 | } |
1255 | | |
1256 | | /* msEncodeHTMLEntities() |
1257 | | ** |
1258 | | ** Return a copy of string after replacing some problematic chars with their |
1259 | | ** HTML entity equivalents. |
1260 | | ** |
1261 | | ** The replacements performed are: |
1262 | | ** '&' -> "&", '"' -> """, '<' -> "<" and '>' -> ">" |
1263 | | **/ |
1264 | 0 | char *msEncodeHTMLEntities(const char *string) { |
1265 | 0 | int buflen, i; |
1266 | 0 | char *newstring; |
1267 | 0 | const char *c; |
1268 | |
|
1269 | 0 | if (string == NULL) |
1270 | 0 | return NULL; |
1271 | | |
1272 | | /* Start with 100 extra chars for replacements... */ |
1273 | | /* should be good enough for most cases */ |
1274 | 0 | buflen = strlen(string) + 100; |
1275 | 0 | newstring = (char *)malloc(buflen + 1); |
1276 | 0 | MS_CHECK_ALLOC(newstring, buflen + 1, NULL); |
1277 | |
|
1278 | 0 | for (i = 0, c = string; *c != '\0'; c++) { |
1279 | | /* Need to realloc buffer? */ |
1280 | 0 | if (i + 6 > buflen) { |
1281 | | /* If we had to realloc then this string must contain several */ |
1282 | | /* entities... so let's go with twice the previous buffer size */ |
1283 | 0 | buflen *= 2; |
1284 | | /* cppcheck-suppress memleakOnRealloc */ |
1285 | 0 | newstring = (char *)realloc(newstring, buflen + 1); |
1286 | 0 | MS_CHECK_ALLOC(newstring, buflen + 1, NULL); |
1287 | 0 | } |
1288 | | |
1289 | 0 | switch (*c) { |
1290 | 0 | case '&': |
1291 | 0 | strcpy(newstring + i, "&"); |
1292 | 0 | i += 5; |
1293 | 0 | break; |
1294 | 0 | case '<': |
1295 | 0 | strcpy(newstring + i, "<"); |
1296 | 0 | i += 4; |
1297 | 0 | break; |
1298 | 0 | case '>': |
1299 | 0 | strcpy(newstring + i, ">"); |
1300 | 0 | i += 4; |
1301 | 0 | break; |
1302 | 0 | case '"': |
1303 | 0 | strcpy(newstring + i, """); |
1304 | 0 | i += 6; |
1305 | 0 | break; |
1306 | 0 | case '\'': |
1307 | 0 | strcpy(newstring + i, |
1308 | 0 | "'"); /* changed from ' and i += 6 (bug 1040) */ |
1309 | 0 | i += 5; |
1310 | 0 | break; |
1311 | 0 | default: |
1312 | 0 | newstring[i++] = *c; |
1313 | 0 | } |
1314 | 0 | } |
1315 | | |
1316 | 0 | newstring[i++] = '\0'; |
1317 | |
|
1318 | 0 | return newstring; |
1319 | 0 | } |
1320 | | |
1321 | | /* msDecodeHTMLEntities() |
1322 | | ** |
1323 | | ** Modify the string to replace encoded characters by their true value |
1324 | | ** |
1325 | | ** The replacements performed are: |
1326 | | ** "&" -> '&', """ -> '"', "<" -> '<' and ">" -> '>' |
1327 | | **/ |
1328 | 0 | void msDecodeHTMLEntities(const char *string) { |
1329 | 0 | char *pszAmp = NULL, *pszSemiColon = NULL, *pszReplace = NULL, *pszEnd = NULL; |
1330 | 0 | char *pszBuffer = NULL; |
1331 | 0 | size_t bufferSize = 0; |
1332 | |
|
1333 | 0 | if (string == NULL) |
1334 | 0 | return; |
1335 | 0 | else |
1336 | 0 | pszBuffer = (char *)string; |
1337 | | |
1338 | 0 | bufferSize = strlen(pszBuffer); |
1339 | 0 | pszReplace = (char *)msSmallMalloc(bufferSize + 1); |
1340 | 0 | pszEnd = (char *)msSmallMalloc(bufferSize + 1); |
1341 | |
|
1342 | 0 | while ((pszAmp = strchr(pszBuffer, '&')) != NULL) { |
1343 | | /* Get the &...; */ |
1344 | 0 | strlcpy(pszReplace, pszAmp, bufferSize); |
1345 | 0 | pszSemiColon = strchr(pszReplace, ';'); |
1346 | 0 | if (pszSemiColon == NULL) |
1347 | 0 | break; |
1348 | 0 | else |
1349 | 0 | pszSemiColon++; |
1350 | | |
1351 | | /* Get everything after the &...; */ |
1352 | 0 | strlcpy(pszEnd, pszSemiColon, bufferSize); |
1353 | |
|
1354 | 0 | pszReplace[pszSemiColon - pszReplace] = '\0'; |
1355 | | |
1356 | | /* Replace the &...; */ |
1357 | 0 | if (strcasecmp(pszReplace, "&") == 0) { |
1358 | 0 | pszBuffer[pszAmp - pszBuffer] = '&'; |
1359 | 0 | pszBuffer[pszAmp - pszBuffer + 1] = '\0'; |
1360 | 0 | strcat(pszBuffer, pszEnd); |
1361 | 0 | } else if (strcasecmp(pszReplace, "<") == 0) { |
1362 | 0 | pszBuffer[pszAmp - pszBuffer] = '<'; |
1363 | 0 | pszBuffer[pszAmp - pszBuffer + 1] = '\0'; |
1364 | 0 | strcat(pszBuffer, pszEnd); |
1365 | 0 | } else if (strcasecmp(pszReplace, ">") == 0) { |
1366 | 0 | pszBuffer[pszAmp - pszBuffer] = '>'; |
1367 | 0 | pszBuffer[pszAmp - pszBuffer + 1] = '\0'; |
1368 | 0 | strcat(pszBuffer, pszEnd); |
1369 | 0 | } else if (strcasecmp(pszReplace, """) == 0) { |
1370 | 0 | pszBuffer[pszAmp - pszBuffer] = '"'; |
1371 | 0 | pszBuffer[pszAmp - pszBuffer + 1] = '\0'; |
1372 | 0 | strcat(pszBuffer, pszEnd); |
1373 | 0 | } else if (strcasecmp(pszReplace, "'") == 0) { |
1374 | 0 | pszBuffer[pszAmp - pszBuffer] = '\''; |
1375 | 0 | pszBuffer[pszAmp - pszBuffer + 1] = '\0'; |
1376 | 0 | strcat(pszBuffer, pszEnd); |
1377 | 0 | } |
1378 | |
|
1379 | 0 | pszBuffer = pszAmp + 1; |
1380 | 0 | } |
1381 | |
|
1382 | 0 | free(pszReplace); |
1383 | 0 | free(pszEnd); |
1384 | |
|
1385 | 0 | return; |
1386 | 0 | } |
1387 | | |
1388 | | /* |
1389 | | ** msIsXMLValid |
1390 | | ** |
1391 | | ** Check if the string is an XML valid string. It should contains only |
1392 | | ** A-Z, a-z, 0-9, '_', '-', '.', and ':' |
1393 | | ** Return MS_TRUE or MS_FALSE |
1394 | | */ |
1395 | 0 | int msIsXMLTagValid(const char *string) { |
1396 | 0 | int i, nLen; |
1397 | |
|
1398 | 0 | nLen = strlen(string); |
1399 | |
|
1400 | 0 | for (i = 0; i < nLen; i++) { |
1401 | 0 | if (!(string[i] >= 'A' && string[i] <= 'Z') && |
1402 | 0 | !(string[i] >= 'a' && string[i] <= 'z') && |
1403 | 0 | !(string[i] >= '0' && string[i] <= '9') && string[i] != '-' && |
1404 | 0 | string[i] != '.' && string[i] != ':' && string[i] != '_') |
1405 | 0 | return MS_FALSE; |
1406 | 0 | } |
1407 | | |
1408 | 0 | return MS_TRUE; |
1409 | 0 | } |
1410 | | |
1411 | | /* |
1412 | | * Concatenate pszSrc to pszDest and reallocate memory if necessary. |
1413 | | */ |
1414 | 0 | char *msStringConcatenate(char *pszDest, const char *pszSrc) { |
1415 | 0 | int nLen; |
1416 | |
|
1417 | 0 | if (pszSrc == NULL) |
1418 | 0 | return pszDest; |
1419 | | |
1420 | | /* if destination is null, allocate memory */ |
1421 | 0 | if (pszDest == NULL) { |
1422 | 0 | pszDest = msStrdup(pszSrc); |
1423 | 0 | } else { /* if dest is not null, reallocate memory */ |
1424 | 0 | char *pszTemp; |
1425 | |
|
1426 | 0 | nLen = strlen(pszDest) + strlen(pszSrc); |
1427 | |
|
1428 | 0 | pszTemp = (char *)realloc(pszDest, nLen + 1); |
1429 | 0 | if (pszTemp) { |
1430 | 0 | pszDest = pszTemp; |
1431 | 0 | strcat(pszDest, pszSrc); |
1432 | 0 | pszDest[nLen] = '\0'; |
1433 | 0 | } else { |
1434 | 0 | msSetError(MS_MEMERR, "Error while reallocating memory.", |
1435 | 0 | "msStringConcatenate()"); |
1436 | 0 | return NULL; |
1437 | 0 | } |
1438 | 0 | } |
1439 | | |
1440 | 0 | return pszDest; |
1441 | 0 | } |
1442 | | |
1443 | 0 | char *msJoinStrings(char **array, int arrayLength, const char *delimiter) { |
1444 | 0 | char *string; |
1445 | 0 | int stringLength = 0; |
1446 | 0 | int delimiterLength; |
1447 | 0 | int i; |
1448 | |
|
1449 | 0 | if (!array || arrayLength <= 0 || !delimiter) |
1450 | 0 | return NULL; |
1451 | | |
1452 | 0 | delimiterLength = strlen(delimiter); |
1453 | |
|
1454 | 0 | for (i = 0; i < arrayLength; i++) |
1455 | 0 | stringLength += strlen(array[i]) + delimiterLength; |
1456 | |
|
1457 | 0 | string = (char *)calloc(stringLength + 1, sizeof(char)); |
1458 | 0 | MS_CHECK_ALLOC(string, (stringLength + 1) * sizeof(char), NULL); |
1459 | 0 | string[0] = '\0'; |
1460 | |
|
1461 | 0 | for (i = 0; i < arrayLength - 1; i++) { |
1462 | 0 | strlcat(string, array[i], stringLength); |
1463 | 0 | strlcat(string, delimiter, stringLength); |
1464 | 0 | } |
1465 | 0 | strlcat(string, array[i], stringLength); /* add last element, no delimiter */ |
1466 | |
|
1467 | 0 | return string; |
1468 | 0 | } |
1469 | | |
1470 | 0 | #define HASH_SIZE 16 |
1471 | | /* |
1472 | | * Return a hashed string for a given input string. |
1473 | | * The caller should free the return value. |
1474 | | */ |
1475 | 0 | char *msHashString(const char *pszStr) { |
1476 | 0 | unsigned char sums[HASH_SIZE] = {0, 0, 0, 0, 0, 0, 0, 0, |
1477 | 0 | 0, 0, 0, 0, 0, 0, 0, 0}; |
1478 | 0 | char *pszOutBuf = NULL; |
1479 | 0 | size_t bufferSize = 0; |
1480 | 0 | int i = 0; |
1481 | |
|
1482 | 0 | bufferSize = HASH_SIZE * 2 + 1; |
1483 | 0 | pszOutBuf = (char *)msSmallMalloc(bufferSize); |
1484 | |
|
1485 | 0 | for (i = 0; pszStr && pszStr[i]; i++) { |
1486 | 0 | sums[i % HASH_SIZE] += (unsigned char)(pszStr[i]); |
1487 | 0 | } |
1488 | |
|
1489 | 0 | for (i = 0; i < HASH_SIZE; i++) { |
1490 | 0 | snprintf(pszOutBuf + i * 2, bufferSize - (i * 2), "%02x", sums[i]); |
1491 | 0 | } |
1492 | |
|
1493 | 0 | return pszOutBuf; |
1494 | 0 | } |
1495 | | |
1496 | 0 | char *msCommifyString(char *str) { |
1497 | 0 | int i, j, old_length, new_length; |
1498 | 0 | int num_commas = 0, num_decimal_points = 0; |
1499 | 0 | int add_commas; |
1500 | |
|
1501 | 0 | char comma = ',', decimal_point = '.'; |
1502 | |
|
1503 | 0 | if (!str) |
1504 | 0 | return NULL; |
1505 | | |
1506 | 0 | num_decimal_points = msCountChars(str, decimal_point); |
1507 | 0 | if (num_decimal_points > 1) |
1508 | 0 | return str; |
1509 | | |
1510 | 0 | old_length = strlen(str); |
1511 | 0 | if (num_decimal_points == 0) { |
1512 | 0 | num_commas = floor((old_length - 1) / 3); |
1513 | 0 | add_commas = 1; /* add commas right away */ |
1514 | 0 | } else { |
1515 | 0 | num_commas = |
1516 | 0 | floor(((old_length - strlen(strchr(str, decimal_point))) - 1) / 3); |
1517 | 0 | add_commas = 0; /* wait until after the decimal point */ |
1518 | 0 | } |
1519 | |
|
1520 | 0 | if (num_commas < 1) |
1521 | 0 | return str; /* nothing to add */ |
1522 | | |
1523 | 0 | new_length = old_length + num_commas; |
1524 | 0 | str = (char *)msSmallRealloc(str, new_length + 1); |
1525 | 0 | str[new_length] = '\0'; |
1526 | |
|
1527 | 0 | j = 0; |
1528 | 0 | for (i = new_length - 1; i >= 0; |
1529 | 0 | i--) { /* step backwards through the string */ |
1530 | |
|
1531 | 0 | if (num_decimal_points == 1 && |
1532 | 0 | add_commas == 0) { /* to the right of the decimal point, no commas */ |
1533 | 0 | str[i] = str[i - num_commas]; |
1534 | 0 | if (str[i] == decimal_point) |
1535 | 0 | add_commas = 1; |
1536 | 0 | } else if (add_commas == 1 && j > 2) { /* need a comma */ |
1537 | 0 | str[i] = comma; |
1538 | 0 | num_commas--; /* need one fewer now */ |
1539 | 0 | j = 0; /* reset */ |
1540 | 0 | } else { |
1541 | 0 | str[i] = str[i - num_commas]; /* shift to the right */ |
1542 | 0 | j++; |
1543 | 0 | } |
1544 | |
|
1545 | 0 | if (num_commas == 0) |
1546 | 0 | break; /* done, rest of string is ok "as is" */ |
1547 | 0 | } |
1548 | |
|
1549 | 0 | return str; |
1550 | 0 | } |
1551 | | |
1552 | | /************************************************************************/ |
1553 | | /* msToString() */ |
1554 | | /************************************************************************/ |
1555 | | |
1556 | 0 | char *msToString(const char *format, double value) { |
1557 | 0 | bool pctAlreadyFound = false; |
1558 | | // Validate that the formatting string is OK for a single input double value |
1559 | 0 | int extra_size = 0; |
1560 | 0 | for (const char *ptr = format; *ptr; ++ptr) { |
1561 | 0 | if (*ptr == '%' && ptr[1] == '%') { |
1562 | 0 | ++ptr; |
1563 | 0 | } else if (*ptr == '%') { |
1564 | 0 | if (pctAlreadyFound) { |
1565 | 0 | msSetError(MS_MISCERR, "More than one conversion specifier", |
1566 | 0 | "msToString()"); |
1567 | 0 | return nullptr; |
1568 | 0 | } |
1569 | 0 | pctAlreadyFound = true; |
1570 | 0 | ++ptr; |
1571 | | // Skip flag characters |
1572 | 0 | while (*ptr == '+' || *ptr == '-' || *ptr == ' ' || *ptr == '\'' || |
1573 | 0 | *ptr == '0') { |
1574 | 0 | ++ptr; |
1575 | 0 | } |
1576 | | // Skip width |
1577 | 0 | if (*ptr >= '1' && *ptr <= '9') { |
1578 | 0 | extra_size = atoi(ptr); |
1579 | 0 | do { |
1580 | 0 | ++ptr; |
1581 | 0 | } while (*ptr >= '0' && *ptr <= '9'); |
1582 | 0 | if (extra_size > 1024) { |
1583 | | // To avoid arbitrary memory allocatin |
1584 | 0 | msSetError(MS_MISCERR, "Too large width", "msToString()"); |
1585 | 0 | return nullptr; |
1586 | 0 | } |
1587 | 0 | } |
1588 | | // maximum double value is of the order of ~1e308 |
1589 | 0 | if (extra_size < DBL_MAX_10_EXP) |
1590 | 0 | extra_size = DBL_MAX_10_EXP; |
1591 | 0 | extra_size += 32; // extra margin |
1592 | | |
1593 | | // Skip precision |
1594 | 0 | if (*ptr == '.') { |
1595 | 0 | ++ptr; |
1596 | 0 | while (*ptr >= '0' && *ptr <= '9') |
1597 | 0 | ++ptr; |
1598 | 0 | } |
1599 | | // Check conversion specifier |
1600 | 0 | if (!(*ptr == 'e' || *ptr == 'E' || *ptr == 'f' || *ptr == 'F' || |
1601 | 0 | *ptr == 'g' || *ptr == 'G')) { |
1602 | 0 | msSetError(MS_MISCERR, "Invalid conversion specifier", "msToString()"); |
1603 | 0 | return nullptr; |
1604 | 0 | } |
1605 | 0 | } |
1606 | 0 | } |
1607 | | // extra_size / 3 if thousands' grouping characters is used |
1608 | 0 | const size_t nBufferSize = strlen(format) + extra_size + (extra_size / 3) + 1; |
1609 | 0 | char *ret = static_cast<char *>(msSmallMalloc(nBufferSize)); |
1610 | 0 | snprintf(ret, nBufferSize, format, value); |
1611 | 0 | return ret; |
1612 | 0 | } |
1613 | | |
1614 | | /* ------------------------------------------------------------------------------- |
1615 | | */ |
1616 | | /* Replace all occurrences of old with new in str. */ |
1617 | | /* It is assumed that str was dynamically created using malloc. */ |
1618 | | /* Same function as msReplaceSubstring but this is case insensitive */ |
1619 | | /* ------------------------------------------------------------------------------- |
1620 | | */ |
1621 | 0 | char *msCaseReplaceSubstring(char *str, const char *old, const char *newstr) { |
1622 | 0 | size_t str_len, old_len, new_len, tmp_offset; |
1623 | 0 | char *tmp_ptr; |
1624 | | |
1625 | | /* |
1626 | | ** If old is not found then leave str alone |
1627 | | */ |
1628 | 0 | if ((tmp_ptr = (char *)strcasestr(str, old)) == NULL) |
1629 | 0 | return (str); |
1630 | | |
1631 | 0 | if (newstr == NULL) |
1632 | 0 | newstr = ""; |
1633 | | |
1634 | | /* |
1635 | | ** Grab some info about incoming strings |
1636 | | */ |
1637 | 0 | str_len = strlen(str); |
1638 | 0 | old_len = strlen(old); |
1639 | 0 | new_len = strlen(newstr); |
1640 | | |
1641 | | /* |
1642 | | ** Now loop until old is NOT found in new |
1643 | | */ |
1644 | 0 | while (tmp_ptr != NULL) { |
1645 | | |
1646 | | /* |
1647 | | ** re-allocate memory for buf assuming 1 replacement of old with new |
1648 | | ** don't bother reallocating if old is larger than new) |
1649 | | */ |
1650 | 0 | if (old_len < new_len) { |
1651 | 0 | tmp_offset = tmp_ptr - str; |
1652 | 0 | str_len = str_len - old_len + new_len; |
1653 | 0 | str = (char *)msSmallRealloc( |
1654 | 0 | str, (str_len + 1)); /* make new space for a copy */ |
1655 | 0 | tmp_ptr = str + tmp_offset; |
1656 | 0 | } |
1657 | | |
1658 | | /* |
1659 | | ** Move the trailing part of str to make some room unless old_len == new_len |
1660 | | */ |
1661 | 0 | if (old_len != new_len) { |
1662 | 0 | memmove(tmp_ptr + new_len, tmp_ptr + old_len, |
1663 | 0 | strlen(tmp_ptr) - old_len + 1); |
1664 | 0 | } |
1665 | | |
1666 | | /* |
1667 | | ** Now copy new over old |
1668 | | */ |
1669 | 0 | memcpy(tmp_ptr, newstr, new_len); |
1670 | | |
1671 | | /* |
1672 | | ** And look for more matches in the rest of the string |
1673 | | */ |
1674 | 0 | tmp_ptr = (char *)strcasestr(tmp_ptr + new_len, old); |
1675 | 0 | } |
1676 | |
|
1677 | 0 | return (str); |
1678 | 0 | } |
1679 | | |
1680 | | /* |
1681 | | ** Converts a 2 character hexadecimal string to an integer. |
1682 | | */ |
1683 | 1.37k | int msHexToInt(char *hex) { |
1684 | 1.37k | int number; |
1685 | | |
1686 | 1.37k | number = (hex[0] >= 'A' ? ((hex[0] & 0xdf) - 'A') + 10 : (hex[0] - '0')); |
1687 | 1.37k | number *= 16; |
1688 | 1.37k | number += (hex[1] >= 'A' ? ((hex[1] & 0xdf) - 'A') + 10 : (hex[1] - '0')); |
1689 | | |
1690 | 1.37k | return (number); |
1691 | 1.37k | } |
1692 | | |
1693 | | /* |
1694 | | ** Use FRIBIDI to encode the string. |
1695 | | ** The return value must be freed by the caller. |
1696 | | */ |
1697 | | #ifdef USE_FRIBIDI |
1698 | | char *msGetFriBidiEncodedString(const char *string, const char *encoding) { |
1699 | | FriBidiChar logical[MAX_STR_LEN]; |
1700 | | FriBidiParType base; |
1701 | | size_t len; |
1702 | | |
1703 | | #ifdef FRIBIDI_NO_CHARSETS |
1704 | | iconv_t to_ucs4, from_ucs4; |
1705 | | #else |
1706 | | FriBidiCharSet to_char_set_num; |
1707 | | FriBidiCharSet from_char_set_num; |
1708 | | #endif |
1709 | | |
1710 | | len = strlen(string); |
1711 | | |
1712 | | #ifdef FRIBIDI_NO_CHARSETS |
1713 | | to_ucs4 = iconv_open("WCHAR_T", encoding); |
1714 | | from_ucs4 = iconv_open("UTF-8", "WCHAR_T"); |
1715 | | #else |
1716 | | to_char_set_num = fribidi_parse_charset((char *)encoding); |
1717 | | from_char_set_num = fribidi_parse_charset("UTF-8"); |
1718 | | #endif |
1719 | | |
1720 | | #ifdef FRIBIDI_NO_CHARSETS |
1721 | | if (to_ucs4 == (iconv_t)(-1) || from_ucs4 == (iconv_t)(-1)) |
1722 | | #else |
1723 | | if (!to_char_set_num || !from_char_set_num) |
1724 | | #endif |
1725 | | { |
1726 | | msSetError(MS_IDENTERR, "Encoding not supported (%s).", |
1727 | | "msGetFriBidiEncodedString()", encoding); |
1728 | | return NULL; |
1729 | | } |
1730 | | |
1731 | | #ifdef FRIBIDI_NO_CHARSETS |
1732 | | { |
1733 | | char *st = string, *ust = (char *)logical; |
1734 | | int in_len = (int)len; |
1735 | | len = sizeof logical; |
1736 | | iconv(to_ucs4, &st, &in_len, &ust, (int *)&len); |
1737 | | len = (FriBidiChar *)ust - logical; |
1738 | | } |
1739 | | #else |
1740 | | len = |
1741 | | fribidi_charset_to_unicode(to_char_set_num, (char *)string, len, logical); |
1742 | | #endif |
1743 | | |
1744 | | { |
1745 | | FriBidiChar *visual; |
1746 | | char outstring[MAX_STR_LEN]; |
1747 | | FriBidiStrIndex *ltov, *vtol; |
1748 | | FriBidiLevel *levels; |
1749 | | FriBidiStrIndex new_len; |
1750 | | fribidi_boolean log2vis; |
1751 | | |
1752 | | visual = (FriBidiChar *)msSmallMalloc(sizeof(FriBidiChar) * (len + 1)); |
1753 | | ltov = NULL; |
1754 | | vtol = NULL; |
1755 | | levels = NULL; |
1756 | | |
1757 | | // fribidi_log2vis() doesn't support multi-line paragraphs. |
1758 | | // See: |
1759 | | // https://lists.freedesktop.org/archives/fribidi/2008-January/000515.html |
1760 | | for (size_t i = 0; i < len; i++) { |
1761 | | if (logical[i] == '\n') { |
1762 | | msSetError( |
1763 | | MS_IDENTERR, |
1764 | | "Input string is a multi-line paragraph, which is not supported.", |
1765 | | "msGetFriBidiEncodedString()"); |
1766 | | msFree(visual); |
1767 | | return NULL; |
1768 | | } |
1769 | | } |
1770 | | |
1771 | | /* Create a bidi string. */ |
1772 | | log2vis = fribidi_log2vis(logical, len, &base, |
1773 | | /* output */ |
1774 | | visual, ltov, vtol, levels); |
1775 | | |
1776 | | if (!log2vis) { |
1777 | | msSetError(MS_IDENTERR, "Failed to create bidi string.", |
1778 | | "msGetFriBidiEncodedString()"); |
1779 | | msFree(visual); |
1780 | | return NULL; |
1781 | | } |
1782 | | |
1783 | | /* Convert it to utf-8 for display. */ |
1784 | | #ifdef FRIBIDI_NO_CHARSETS |
1785 | | { |
1786 | | char *str = outstring, *ust = (char *)visual; |
1787 | | int in_len = len * sizeof visual[0]; |
1788 | | new_len = sizeof outstring; |
1789 | | iconv(from_ucs4, &ust, &in_len, &str, (int *)&new_len); |
1790 | | *str = '\0'; |
1791 | | new_len = str - outstring; |
1792 | | } |
1793 | | #else |
1794 | | new_len = |
1795 | | fribidi_unicode_to_charset(from_char_set_num, visual, len, outstring); |
1796 | | |
1797 | | /* scan str and compress out FRIBIDI_CHAR_FILL UTF8 characters */ |
1798 | | |
1799 | | int j = 0; |
1800 | | for (int i = 0; i < new_len; i++, j++) { |
1801 | | if (outstring[i] == '\xef' && outstring[i + 1] == '\xbb' && |
1802 | | outstring[i + 2] == '\xbf') { |
1803 | | i += 3; |
1804 | | } |
1805 | | if (i != j) { |
1806 | | outstring[j] = outstring[i]; |
1807 | | } |
1808 | | } |
1809 | | outstring[j] = '\0'; |
1810 | | |
1811 | | #endif |
1812 | | |
1813 | | msFree(visual); |
1814 | | return msStrdup(outstring); |
1815 | | } |
1816 | | } |
1817 | | #endif |
1818 | | |
1819 | | /* |
1820 | | ** Simple charset converter. Converts string from specified encoding to UTF-8. |
1821 | | ** The return value must be freed by the caller. |
1822 | | */ |
1823 | 0 | char *msGetEncodedString(const char *string, const char *encoding) { |
1824 | 0 | #ifdef USE_ICONV |
1825 | 0 | iconv_t cd = NULL; |
1826 | 0 | const char *inp; |
1827 | 0 | char *outp, *out = NULL; |
1828 | 0 | size_t len, bufsize, bufleft; |
1829 | 0 | assert(encoding); |
1830 | | |
1831 | | #ifdef USE_FRIBIDI |
1832 | | msAcquireLock(TLOCK_FRIBIDI); |
1833 | | if (fribidi_parse_charset((char *)encoding)) { |
1834 | | char *ret = msGetFriBidiEncodedString(string, encoding); |
1835 | | msReleaseLock(TLOCK_FRIBIDI); |
1836 | | return ret; |
1837 | | } |
1838 | | msReleaseLock(TLOCK_FRIBIDI); |
1839 | | #endif |
1840 | 0 | len = strlen(string); |
1841 | |
|
1842 | 0 | if (len == 0 || strcasecmp(encoding, "UTF-8") == 0) |
1843 | 0 | return msStrdup(string); /* Nothing to do: string already in UTF-8 */ |
1844 | | |
1845 | 0 | cd = iconv_open("UTF-8", encoding); |
1846 | 0 | if (cd == (iconv_t)-1) { |
1847 | 0 | msSetError(MS_IDENTERR, "Encoding not supported by libiconv (%s).", |
1848 | 0 | "msGetEncodedString()", encoding); |
1849 | 0 | return NULL; |
1850 | 0 | } |
1851 | | |
1852 | 0 | bufsize = len * 6 + 1; /* Each UTF-8 char can be up to 6 bytes */ |
1853 | 0 | inp = string; |
1854 | 0 | out = (char *)malloc(bufsize); |
1855 | 0 | if (out == NULL) { |
1856 | 0 | msSetError(MS_MEMERR, NULL, "msGetEncodedString()"); |
1857 | 0 | iconv_close(cd); |
1858 | 0 | return NULL; |
1859 | 0 | } |
1860 | 0 | strlcpy(out, string, bufsize); |
1861 | 0 | outp = out; |
1862 | |
|
1863 | 0 | bufleft = bufsize; |
1864 | |
|
1865 | 0 | while (len > 0) { |
1866 | 0 | const size_t iconv_status = |
1867 | 0 | msIconv(cd, (char **)&inp, &len, &outp, &bufleft); |
1868 | 0 | if (iconv_status == static_cast<size_t>(-1)) { |
1869 | 0 | msFree(out); |
1870 | 0 | iconv_close(cd); |
1871 | 0 | return msStrdup(string); |
1872 | 0 | } |
1873 | 0 | } |
1874 | 0 | out[bufsize - bufleft] = '\0'; |
1875 | |
|
1876 | 0 | iconv_close(cd); |
1877 | |
|
1878 | 0 | return out; |
1879 | | #else |
1880 | | if (*string == '\0' || (encoding && strcasecmp(encoding, "UTF-8") == 0)) |
1881 | | return msStrdup(string); /* Nothing to do: string already in UTF-8 */ |
1882 | | |
1883 | | msSetError(MS_MISCERR, "Not implemented since Iconv is not enabled.", |
1884 | | "msGetEncodedString()"); |
1885 | | return NULL; |
1886 | | #endif |
1887 | 0 | } |
1888 | | |
1889 | 0 | char *msConvertWideStringToUTF8(const wchar_t *string, const char *encoding) { |
1890 | 0 | #ifdef USE_ICONV |
1891 | |
|
1892 | 0 | char *output = NULL; |
1893 | 0 | const char *errormessage = NULL; |
1894 | 0 | iconv_t cd = NULL; |
1895 | 0 | size_t nStr; |
1896 | 0 | size_t nInSize; |
1897 | 0 | size_t nOutSize; |
1898 | 0 | size_t nBufferSize; |
1899 | |
|
1900 | 0 | char *pszUTF8 = NULL; |
1901 | 0 | const wchar_t *pwszWide = NULL; |
1902 | |
|
1903 | 0 | if (string != NULL) { |
1904 | 0 | nStr = wcslen(string); |
1905 | 0 | nBufferSize = ((nStr * 6) + 1); |
1906 | 0 | output = (char *)msSmallMalloc(nBufferSize); |
1907 | |
|
1908 | 0 | if (nStr == 0) { |
1909 | | /* return an empty 8 byte string */ |
1910 | 0 | output[0] = '\0'; |
1911 | 0 | return output; |
1912 | 0 | } |
1913 | | |
1914 | 0 | cd = iconv_open("UTF-8", encoding); |
1915 | |
|
1916 | 0 | nOutSize = nBufferSize; |
1917 | 0 | if ((iconv_t)-1 != cd) { |
1918 | 0 | nInSize = sizeof(wchar_t) * nStr; |
1919 | 0 | pszUTF8 = output; |
1920 | 0 | pwszWide = string; |
1921 | 0 | size_t iconv_status = |
1922 | 0 | msIconv(cd, (char **)&pwszWide, &nInSize, &pszUTF8, &nOutSize); |
1923 | 0 | if ((size_t)-1 == iconv_status) { |
1924 | 0 | switch (errno) { |
1925 | 0 | case E2BIG: |
1926 | 0 | errormessage = "There is not sufficient room in buffer"; |
1927 | 0 | break; |
1928 | 0 | case EILSEQ: |
1929 | 0 | errormessage = |
1930 | 0 | "An invalid multibyte sequence has been encountered in the input"; |
1931 | 0 | break; |
1932 | 0 | case EINVAL: |
1933 | 0 | errormessage = "An incomplete multibyte sequence has been " |
1934 | 0 | "encountered in the input"; |
1935 | 0 | break; |
1936 | 0 | default: |
1937 | 0 | errormessage = "Unknown"; |
1938 | 0 | break; |
1939 | 0 | } |
1940 | 0 | msSetError(MS_MISCERR, |
1941 | 0 | "Unable to convert string in encoding '%s' to UTF8 %s", |
1942 | 0 | "msConvertWideStringToUTF8()", encoding, errormessage); |
1943 | 0 | iconv_close(cd); |
1944 | 0 | msFree(output); |
1945 | 0 | return NULL; |
1946 | 0 | } |
1947 | 0 | iconv_close(cd); |
1948 | 0 | } else { |
1949 | 0 | msSetError(MS_MISCERR, "Encoding not supported by libiconv (%s).", |
1950 | 0 | "msConvertWideStringToUTF8()", encoding); |
1951 | 0 | msFree(output); |
1952 | 0 | return NULL; |
1953 | 0 | } |
1954 | |
|
1955 | 0 | } else { |
1956 | | /* we were given a NULL wide string, nothing we can do here */ |
1957 | 0 | return NULL; |
1958 | 0 | } |
1959 | | |
1960 | | /* NULL-terminate the output string */ |
1961 | 0 | output[nBufferSize - nOutSize] = '\0'; |
1962 | 0 | return output; |
1963 | | #else |
1964 | | msSetError(MS_MISCERR, "Not implemented since Iconv is not enabled.", |
1965 | | "msConvertWideStringToUTF8()"); |
1966 | | return NULL; |
1967 | | #endif |
1968 | 0 | } |
1969 | | |
1970 | 0 | wchar_t *msConvertWideStringFromUTF8(const char *string, const char *encoding) { |
1971 | 0 | #ifdef USE_ICONV |
1972 | 0 | wchar_t *output = NULL; |
1973 | 0 | const char *errormessage = NULL; |
1974 | 0 | iconv_t cd = NULL; |
1975 | 0 | size_t nStr; |
1976 | 0 | size_t nInSize; |
1977 | 0 | size_t nOutSize; |
1978 | 0 | size_t nBufferSize; |
1979 | |
|
1980 | 0 | const char *pszUTF8 = NULL; |
1981 | 0 | wchar_t *pwszWide = NULL; |
1982 | |
|
1983 | 0 | if (string != NULL) { |
1984 | 0 | nStr = strlen(string); |
1985 | 0 | nBufferSize = ((nStr * 6) + 1); |
1986 | 0 | output = (wchar_t *)msSmallMalloc(nBufferSize); |
1987 | |
|
1988 | 0 | if (nStr == 0) { |
1989 | | /* return an empty 8 byte string */ |
1990 | 0 | output[0] = '\0'; |
1991 | 0 | return output; |
1992 | 0 | } |
1993 | | |
1994 | 0 | cd = iconv_open(encoding, "UTF-8"); |
1995 | |
|
1996 | 0 | nOutSize = nBufferSize; |
1997 | 0 | if ((iconv_t)-1 != cd) { |
1998 | 0 | nInSize = sizeof(char) * nStr; |
1999 | 0 | pszUTF8 = string; |
2000 | 0 | pwszWide = output; |
2001 | 0 | size_t iconv_status = msIconv(cd, (char **)&pszUTF8, &nInSize, |
2002 | 0 | (char **)&pwszWide, &nOutSize); |
2003 | 0 | if ((size_t)-1 == iconv_status) { |
2004 | 0 | switch (errno) { |
2005 | 0 | case E2BIG: |
2006 | 0 | errormessage = "There is not sufficient room in buffer"; |
2007 | 0 | break; |
2008 | 0 | case EILSEQ: |
2009 | 0 | errormessage = |
2010 | 0 | "An invalid multibyte sequence has been encountered in the input"; |
2011 | 0 | break; |
2012 | 0 | case EINVAL: |
2013 | 0 | errormessage = "An incomplete multibyte sequence has been " |
2014 | 0 | "encountered in the input"; |
2015 | 0 | break; |
2016 | 0 | default: |
2017 | 0 | errormessage = "Unknown"; |
2018 | 0 | break; |
2019 | 0 | } |
2020 | 0 | msSetError(MS_MISCERR, |
2021 | 0 | "Unable to convert string in UTF8 to encoding '%s' %s", |
2022 | 0 | "msConvertWideStringFromUTF8()", encoding, errormessage); |
2023 | 0 | iconv_close(cd); |
2024 | 0 | msFree(output); |
2025 | 0 | return NULL; |
2026 | 0 | } |
2027 | 0 | iconv_close(cd); |
2028 | 0 | } else { |
2029 | 0 | msSetError(MS_MISCERR, "Encoding not supported by libiconv (%s).", |
2030 | 0 | "msConvertWideStringFromUTF8()", encoding); |
2031 | 0 | msFree(output); |
2032 | 0 | return NULL; |
2033 | 0 | } |
2034 | 0 | } else { |
2035 | | /* we were given a NULL wide string, nothing we can do here */ |
2036 | 0 | return NULL; |
2037 | 0 | } |
2038 | | |
2039 | | /* NULL-terminate the output string */ |
2040 | 0 | if (nOutSize >= sizeof(wchar_t)) |
2041 | 0 | *((wchar_t *)pwszWide) = L'\0'; |
2042 | |
|
2043 | 0 | return output; |
2044 | | #else |
2045 | | msSetError(MS_MISCERR, "Not implemented since Iconv is not enabled.", |
2046 | | "msConvertWideStringFromUTF8()"); |
2047 | | return NULL; |
2048 | | #endif |
2049 | 0 | } |
2050 | | |
2051 | | /* |
2052 | | ** Returns the next glyph in string and advances *in_ptr to the next |
2053 | | ** character. |
2054 | | ** |
2055 | | ** If out_string is not NULL then the character (bytes) is copied to this |
2056 | | ** buffer and null-terminated. out_string must be a pre-allocated buffer of |
2057 | | ** at least 11 bytes. |
2058 | | ** |
2059 | | ** The function returns the number of bytes in this glyph. |
2060 | | ** |
2061 | | ** This function treats 3 types of glyph encodings: |
2062 | | * - as an html entity, for example { , Ư , or é |
2063 | | * - as an utf8 encoded character |
2064 | | * - if utf8 decoding fails, as a raw character |
2065 | | * |
2066 | | ** This function mimics the character decoding function used in gdft.c of |
2067 | | * libGD. It is necessary to have the same behavior, as input strings must be |
2068 | | * split into the same glyphs as what gd does. |
2069 | | ** |
2070 | | ** In UTF-8, the number of leading 1 bits in the first byte specifies the |
2071 | | ** number of bytes in the entire sequence. |
2072 | | ** Source: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 |
2073 | | ** |
2074 | | ** U-00000000 U-0000007F: 0xxxxxxx |
2075 | | ** U-00000080 U-000007FF: 110xxxxx 10xxxxxx |
2076 | | ** U-00000800 U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx |
2077 | | ** U-00010000 U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx |
2078 | | ** U-00200000 U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx |
2079 | | ** U-04000000 U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx |
2080 | | */ |
2081 | 0 | int msGetNextGlyph(const char **in_ptr, char *out_string) { |
2082 | 0 | unsigned char in; |
2083 | 0 | int numbytes = 0; |
2084 | 0 | unsigned int unicode; |
2085 | 0 | int i; |
2086 | |
|
2087 | 0 | in = (unsigned char)**in_ptr; |
2088 | |
|
2089 | 0 | if (in == 0) |
2090 | 0 | return -1; /* Empty string */ |
2091 | 0 | if ((numbytes = msGetUnicodeEntity(*in_ptr, &unicode)) > 0) { |
2092 | 0 | if (out_string) { |
2093 | 0 | for (i = 0; i < numbytes; i++) { |
2094 | 0 | out_string[i] = (*in_ptr)[i]; |
2095 | 0 | } |
2096 | 0 | out_string[numbytes] = '\0'; |
2097 | 0 | } |
2098 | 0 | *in_ptr += numbytes; |
2099 | 0 | return numbytes; |
2100 | 0 | } |
2101 | 0 | if (in < 0xC0) { |
2102 | | /* |
2103 | | * Handles properly formed UTF-8 characters between |
2104 | | * 0x01 and 0x7F. Also treats \0 and naked trail |
2105 | | * bytes 0x80 to 0xBF as valid characters representing |
2106 | | * themselves. |
2107 | | */ |
2108 | | /*goto end of loop to return just the char*/ |
2109 | 0 | } else if (in < 0xE0) { |
2110 | 0 | if (((*in_ptr)[1] & 0xC0) == 0x80) { |
2111 | 0 | if (out_string) { |
2112 | 0 | out_string[0] = in; |
2113 | 0 | out_string[1] = (*in_ptr)[1]; |
2114 | 0 | out_string[2] = '\0'; |
2115 | 0 | } |
2116 | 0 | *in_ptr += 2; |
2117 | 0 | return 2; /*110xxxxx 10xxxxxx*/ |
2118 | 0 | } |
2119 | 0 | } else if (in < 0xF0) { |
2120 | 0 | if (((*in_ptr)[1] & 0xC0) == 0x80 && ((*in_ptr)[2] & 0xC0) == 0x80) { |
2121 | 0 | if (out_string) { |
2122 | 0 | out_string[0] = in; |
2123 | 0 | *in_ptr += numbytes; |
2124 | 0 | out_string[1] = (*in_ptr)[1]; |
2125 | 0 | out_string[2] = (*in_ptr)[2]; |
2126 | 0 | out_string[3] = '\0'; |
2127 | 0 | } |
2128 | 0 | *in_ptr += 3; |
2129 | 0 | return 3; /* 1110xxxx 10xxxxxx 10xxxxxx */ |
2130 | 0 | } |
2131 | 0 | } else if (in < 0xF8) { |
2132 | 0 | if (((*in_ptr)[1] & 0xC0) == 0x80 && ((*in_ptr)[2] & 0xC0) == 0x80 && |
2133 | 0 | ((*in_ptr)[3] & 0xC0) == 0x80) { |
2134 | 0 | if (out_string) { |
2135 | 0 | out_string[0] = in; |
2136 | 0 | out_string[1] = (*in_ptr)[1]; |
2137 | 0 | out_string[2] = (*in_ptr)[2]; |
2138 | 0 | out_string[3] = (*in_ptr)[3]; |
2139 | 0 | out_string[4] = '\0'; |
2140 | 0 | } |
2141 | 0 | *in_ptr += 4; |
2142 | 0 | return 4; /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ |
2143 | 0 | } |
2144 | 0 | } else if (in < 0xFC) { |
2145 | 0 | if (((*in_ptr)[1] & 0xC0) == 0x80 && ((*in_ptr)[2] & 0xC0) == 0x80 && |
2146 | 0 | ((*in_ptr)[3] & 0xC0) == 0x80 && ((*in_ptr)[4] & 0xC0) == 0x80) { |
2147 | 0 | if (out_string) { |
2148 | 0 | out_string[0] = in; |
2149 | 0 | out_string[1] = (*in_ptr)[1]; |
2150 | 0 | out_string[2] = (*in_ptr)[2]; |
2151 | 0 | out_string[3] = (*in_ptr)[3]; |
2152 | 0 | out_string[4] = (*in_ptr)[4]; |
2153 | 0 | out_string[5] = '\0'; |
2154 | 0 | } |
2155 | 0 | *in_ptr += 5; |
2156 | 0 | return 5; /* 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */ |
2157 | 0 | } |
2158 | 0 | } else if (in < 0xFE) { |
2159 | 0 | if (((*in_ptr)[1] & 0xC0) == 0x80 && ((*in_ptr)[2] & 0xC0) == 0x80 && |
2160 | 0 | ((*in_ptr)[3] & 0xC0) == 0x80 && ((*in_ptr)[4] & 0xC0) == 0x80 && |
2161 | 0 | ((*in_ptr)[5] & 0xC0) == 0x80) { |
2162 | 0 | if (out_string) { |
2163 | 0 | out_string[0] = in; |
2164 | 0 | out_string[1] = (*in_ptr)[1]; |
2165 | 0 | out_string[2] = (*in_ptr)[2]; |
2166 | 0 | out_string[3] = (*in_ptr)[3]; |
2167 | 0 | out_string[4] = (*in_ptr)[4]; |
2168 | 0 | out_string[5] = (*in_ptr)[5]; |
2169 | 0 | out_string[6] = '\0'; |
2170 | 0 | } |
2171 | 0 | *in_ptr += 6; |
2172 | 0 | return 6; /* 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */ |
2173 | 0 | } |
2174 | 0 | } |
2175 | | |
2176 | 0 | if (out_string) { |
2177 | 0 | out_string[0] = in; |
2178 | 0 | out_string[1] = '\0'; /* 0xxxxxxx */ |
2179 | 0 | } |
2180 | 0 | (*in_ptr)++; |
2181 | 0 | return 1; |
2182 | 0 | } |
2183 | | |
2184 | | /* |
2185 | | ** Returns the number of glyphs in string |
2186 | | */ |
2187 | 0 | int msGetNumGlyphs(const char *in_ptr) { |
2188 | 0 | int numchars = 0; |
2189 | |
|
2190 | 0 | while (msGetNextGlyph(&in_ptr, NULL) != -1) |
2191 | 0 | numchars++; |
2192 | |
|
2193 | 0 | return numchars; |
2194 | 0 | } |
2195 | | |
2196 | 0 | static int cmp_entities(const void *e1, const void *e2) { |
2197 | 0 | struct mapentities_s *en1 = (struct mapentities_s *)e1; |
2198 | 0 | struct mapentities_s *en2 = (struct mapentities_s *)e2; |
2199 | 0 | return strcmp(en1->name, en2->name); |
2200 | 0 | } |
2201 | | /* |
2202 | | * this function tests if the string pointed by inptr represents |
2203 | | * an HTML entity, in decimal form ( e.g. Å), in hexadecimal |
2204 | | * form ( e.g. 水 ), or from html 4.0 spec ( e.g. é ) |
2205 | | * - returns returns 0 if the string doesn't represent such an entity. |
2206 | | * - if the string does start with such entity,it returns the number of |
2207 | | * bytes occupied by said entity, and stores the unicode value in *unicode |
2208 | | */ |
2209 | 0 | int msGetUnicodeEntity(const char *inptr, unsigned int *unicode) { |
2210 | 0 | unsigned char *in = (unsigned char *)inptr; |
2211 | 0 | int l, val = 0; |
2212 | 0 | if (*in == '&') { |
2213 | 0 | in++; |
2214 | 0 | if (*in == '#') { |
2215 | 0 | in++; |
2216 | 0 | if (*in == 'x' || *in == 'X') { |
2217 | 0 | in++; |
2218 | 0 | for (l = 3; l < 8; l++) { |
2219 | 0 | char byte; |
2220 | 0 | if (*in >= '0' && *in <= '9') |
2221 | 0 | byte = *in - '0'; |
2222 | 0 | else if (*in >= 'a' && *in <= 'f') |
2223 | 0 | byte = *in - 'a' + 10; |
2224 | 0 | else if (*in >= 'A' && *in <= 'F') |
2225 | 0 | byte = *in - 'A' + 10; |
2226 | 0 | else |
2227 | 0 | break; |
2228 | 0 | in++; |
2229 | 0 | val = (val * 16) + byte; |
2230 | 0 | } |
2231 | 0 | if (*in == ';' && l > 3) { |
2232 | 0 | *unicode = val; |
2233 | 0 | return ++l; |
2234 | 0 | } |
2235 | 0 | } else { |
2236 | 0 | for (l = 2; l < 8; l++) { |
2237 | 0 | if (*in >= '0' && *in <= '9') { |
2238 | 0 | val = val * 10 + *in - '0'; |
2239 | 0 | in++; |
2240 | 0 | } else |
2241 | 0 | break; |
2242 | 0 | } |
2243 | 0 | if (*in == ';' && l > 2) { |
2244 | 0 | *unicode = val; |
2245 | 0 | return ++l; |
2246 | 0 | } |
2247 | 0 | } |
2248 | 0 | } else { |
2249 | 0 | char entity_name_buf[MAP_ENTITY_NAME_LENGTH_MAX + 1]; |
2250 | 0 | char *p; |
2251 | 0 | struct mapentities_s key; |
2252 | 0 | key.name = p = entity_name_buf; |
2253 | 0 | key.value = 0; // unused, but makes cppcheck happy |
2254 | 0 | for (l = 1; l <= MAP_ENTITY_NAME_LENGTH_MAX + 1; l++) { |
2255 | 0 | if (*in == '\0') /*end of string before possible entity: return*/ |
2256 | 0 | break; |
2257 | 0 | if (*in == ';') { /*possible end of entity: do a lookup*/ |
2258 | 0 | *p++ = '\0'; |
2259 | 0 | const struct mapentities_s *res = |
2260 | 0 | static_cast<const struct mapentities_s *>( |
2261 | 0 | bsearch(&key, mapentities, MAP_NR_OF_ENTITIES, |
2262 | 0 | sizeof(mapentities[0]), cmp_entities)); |
2263 | 0 | if (res) { |
2264 | 0 | *unicode = res->value; |
2265 | 0 | return ++l; |
2266 | 0 | } |
2267 | 0 | break; /*the string was of the form of an entity but didn't correspond |
2268 | | to an existing one: return*/ |
2269 | 0 | } |
2270 | 0 | *p++ = *in; |
2271 | 0 | in++; |
2272 | 0 | } |
2273 | 0 | } |
2274 | 0 | } |
2275 | 0 | return 0; |
2276 | 0 | } |
2277 | | |
2278 | | /** |
2279 | | * msStringIsInteger() |
2280 | | * |
2281 | | * determines whether a given string is an integer |
2282 | | * |
2283 | | * @param string the string to be tested |
2284 | | * |
2285 | | * @return MS_SUCCESS or MS_FAILURE |
2286 | | */ |
2287 | | |
2288 | 0 | int msStringIsInteger(const char *string) { |
2289 | 0 | int length, i; |
2290 | |
|
2291 | 0 | length = strlen(string); |
2292 | |
|
2293 | 0 | if (length == 0) |
2294 | 0 | return MS_FAILURE; |
2295 | | |
2296 | 0 | for (i = 0; i < length; i++) { |
2297 | 0 | if (!isdigit(string[i])) |
2298 | 0 | return MS_FAILURE; |
2299 | 0 | } |
2300 | | |
2301 | 0 | return MS_SUCCESS; |
2302 | 0 | } |
2303 | | |
2304 | | /************************************************************************/ |
2305 | | /* msStrdup() */ |
2306 | | /************************************************************************/ |
2307 | | |
2308 | | /* Safe version of msStrdup(). This function is taken from gdal/cpl. */ |
2309 | | |
2310 | 595k | char *msStrdup(const char *pszString) { |
2311 | 595k | size_t nStringLength; |
2312 | 595k | char *pszReturn; |
2313 | | |
2314 | 595k | if (pszString == NULL) |
2315 | 10 | pszString = ""; |
2316 | | |
2317 | 595k | nStringLength = strlen(pszString) + 1; /* null terminated byte */ |
2318 | 595k | pszReturn = static_cast<char *>(malloc(nStringLength)); |
2319 | | |
2320 | 595k | if (pszReturn == NULL) { |
2321 | 0 | fprintf(stderr, "msSmallMalloc(): Out of memory allocating %ld bytes.\n", |
2322 | 0 | (long)strlen(pszString)); |
2323 | 0 | exit(1); |
2324 | 0 | } |
2325 | | |
2326 | 595k | memcpy(pszReturn, pszString, nStringLength); |
2327 | | |
2328 | 595k | return pszReturn; |
2329 | 595k | } |
2330 | | |
2331 | | /************************************************************************/ |
2332 | | /* msStringEscape() */ |
2333 | | /************************************************************************/ |
2334 | | |
2335 | | /* Checks if a string contains single or double quotes and escape them. |
2336 | | NOTE: the user must free the returned char* if it is different than the |
2337 | | one passed in */ |
2338 | | |
2339 | 0 | char *msStringEscape(const char *pszString) { |
2340 | 0 | char *string_tmp, *string_ptr; |
2341 | 0 | int i, ncharstoescape = 0; |
2342 | |
|
2343 | 0 | if (pszString == NULL || strlen(pszString) == 0) |
2344 | 0 | return msStrdup(""); |
2345 | | |
2346 | 0 | for (i = 0; pszString[i]; i++) |
2347 | 0 | ncharstoescape += ((pszString[i] == '\"') || (pszString[i] == '\'')); |
2348 | |
|
2349 | 0 | if (!ncharstoescape) { |
2350 | 0 | return (char *)pszString; |
2351 | 0 | } |
2352 | | |
2353 | 0 | string_tmp = (char *)msSmallMalloc(strlen(pszString) + ncharstoescape + 1); |
2354 | 0 | for (string_ptr = (char *)pszString, i = 0; *string_ptr != '\0'; |
2355 | 0 | ++string_ptr, ++i) { |
2356 | 0 | if ((*string_ptr == '\"') || (*string_ptr == '\'')) { |
2357 | 0 | string_tmp[i] = '\\'; |
2358 | 0 | ++i; |
2359 | 0 | } |
2360 | 0 | string_tmp[i] = *string_ptr; |
2361 | 0 | } |
2362 | |
|
2363 | 0 | string_tmp[i] = '\0'; |
2364 | 0 | return string_tmp; |
2365 | 0 | } |
2366 | | |
2367 | 0 | std::string msStdStringEscape(const char *pszString) { |
2368 | 0 | char *tmp = msStringEscape(pszString); |
2369 | 0 | std::string ret(tmp); |
2370 | 0 | if (tmp != pszString) |
2371 | 0 | msFree(tmp); |
2372 | 0 | return ret; |
2373 | 0 | } |
2374 | | |
2375 | | /************************************************************************/ |
2376 | | /* msStringInArray() */ |
2377 | | /************************************************************************/ |
2378 | | |
2379 | | /* Check if a string is in a array */ |
2380 | 0 | int msStringInArray(const char *pszString, char **array, int numelements) { |
2381 | 0 | int i; |
2382 | 0 | for (i = 0; i < numelements; ++i) { |
2383 | 0 | if (strcasecmp(pszString, array[i]) == 0) |
2384 | 0 | return MS_TRUE; |
2385 | 0 | } |
2386 | 0 | return MS_FALSE; |
2387 | 0 | } |
2388 | | |
2389 | | bool msStringInArray(const char *pszString, |
2390 | 0 | const std::vector<std::string> &array) { |
2391 | 0 | for (const auto &str : array) { |
2392 | 0 | if (strcasecmp(pszString, str.c_str()) == 0) |
2393 | 0 | return true; |
2394 | 0 | } |
2395 | 0 | return false; |
2396 | 0 | } |
2397 | | |
2398 | 0 | int msLayerEncodeShapeAttributes(layerObj *layer, shapeObj *shape) { |
2399 | |
|
2400 | 0 | #ifdef USE_ICONV |
2401 | 0 | iconv_t cd = NULL; |
2402 | 0 | const char *inp; |
2403 | 0 | char *outp, *out = NULL; |
2404 | 0 | size_t len, bufsize, bufleft; |
2405 | 0 | int i; |
2406 | |
|
2407 | 0 | if (!layer->encoding || !*layer->encoding || |
2408 | 0 | !strcasecmp(layer->encoding, "UTF-8")) |
2409 | 0 | return MS_SUCCESS; |
2410 | | |
2411 | 0 | cd = iconv_open("UTF-8", layer->encoding); |
2412 | 0 | if (cd == (iconv_t)-1) { |
2413 | 0 | msSetError(MS_IDENTERR, "Encoding not supported by libiconv (%s).", |
2414 | 0 | "msGetEncodedString()", layer->encoding); |
2415 | 0 | return MS_FAILURE; |
2416 | 0 | } |
2417 | | |
2418 | 0 | for (i = 0; i < shape->numvalues; i++) { |
2419 | 0 | if (!shape->values[i] || (len = strlen(shape->values[i])) == 0) { |
2420 | 0 | continue; /* Nothing to do */ |
2421 | 0 | } |
2422 | | |
2423 | 0 | bufsize = len * 6 + 1; /* Each UTF-8 char can be up to 6 bytes */ |
2424 | 0 | inp = shape->values[i]; |
2425 | 0 | out = (char *)msSmallMalloc(bufsize); |
2426 | |
|
2427 | 0 | strlcpy(out, shape->values[i], bufsize); |
2428 | 0 | outp = out; |
2429 | |
|
2430 | 0 | bufleft = bufsize; |
2431 | |
|
2432 | 0 | bool failedIconv = false; |
2433 | 0 | while (len > 0) { |
2434 | 0 | const size_t iconv_status = |
2435 | 0 | msIconv(cd, (char **)&inp, &len, &outp, &bufleft); |
2436 | 0 | if (iconv_status == static_cast<size_t>(-1)) { |
2437 | 0 | failedIconv = true; |
2438 | 0 | break; |
2439 | 0 | } |
2440 | 0 | } |
2441 | 0 | if (failedIconv) { |
2442 | 0 | msFree(out); |
2443 | 0 | continue; /* silently ignore failed conversions */ |
2444 | 0 | } |
2445 | 0 | out[bufsize - bufleft] = '\0'; |
2446 | 0 | msFree(shape->values[i]); |
2447 | 0 | shape->values[i] = out; |
2448 | 0 | } |
2449 | 0 | iconv_close(cd); |
2450 | |
|
2451 | 0 | return MS_SUCCESS; |
2452 | | #else |
2453 | | if (!layer->encoding || !*layer->encoding || |
2454 | | !strcasecmp(layer->encoding, "UTF-8")) |
2455 | | return MS_SUCCESS; |
2456 | | msSetError(MS_MISCERR, "Not implemented since Iconv is not enabled.", |
2457 | | "msGetEncodedString()"); |
2458 | | return MS_FAILURE; |
2459 | | #endif |
2460 | 0 | } |
2461 | | |
2462 | | /************************************************************************/ |
2463 | | /* msStringBuffer */ |
2464 | | /************************************************************************/ |
2465 | | |
2466 | | struct msStringBuffer { |
2467 | | size_t alloc_size; |
2468 | | size_t length; |
2469 | | char *str; |
2470 | | }; |
2471 | | |
2472 | | /************************************************************************/ |
2473 | | /* msStringBufferAlloc() */ |
2474 | | /************************************************************************/ |
2475 | | |
2476 | 0 | msStringBuffer *msStringBufferAlloc(void) { |
2477 | 0 | return (msStringBuffer *)msSmallCalloc(sizeof(msStringBuffer), 1); |
2478 | 0 | } |
2479 | | |
2480 | | /************************************************************************/ |
2481 | | /* msStringBufferFree() */ |
2482 | | /************************************************************************/ |
2483 | | |
2484 | 0 | void msStringBufferFree(msStringBuffer *sb) { |
2485 | 0 | if (sb) |
2486 | 0 | msFree(sb->str); |
2487 | 0 | msFree(sb); |
2488 | 0 | } |
2489 | | |
2490 | | /************************************************************************/ |
2491 | | /* msStringBufferGetString() */ |
2492 | | /************************************************************************/ |
2493 | | |
2494 | 0 | const char *msStringBufferGetString(msStringBuffer *sb) { return sb->str; } |
2495 | | |
2496 | | /************************************************************************/ |
2497 | | /* msStringBufferReleaseStringAndFree() */ |
2498 | | /************************************************************************/ |
2499 | | |
2500 | 0 | char *msStringBufferReleaseStringAndFree(msStringBuffer *sb) { |
2501 | 0 | char *str = sb->str; |
2502 | 0 | sb->str = NULL; |
2503 | 0 | sb->alloc_size = 0; |
2504 | 0 | sb->length = 0; |
2505 | 0 | msStringBufferFree(sb); |
2506 | 0 | return str; |
2507 | 0 | } |
2508 | | |
2509 | | /************************************************************************/ |
2510 | | /* msStringBufferAppend() */ |
2511 | | /************************************************************************/ |
2512 | | |
2513 | 0 | int msStringBufferAppend(msStringBuffer *sb, const char *pszAppendedString) { |
2514 | 0 | size_t nAppendLen = strlen(pszAppendedString); |
2515 | 0 | if (sb->length + nAppendLen >= sb->alloc_size) { |
2516 | 0 | size_t newAllocSize1 = sb->alloc_size + sb->alloc_size / 3; |
2517 | 0 | size_t newAllocSize2 = sb->length + nAppendLen + 1; |
2518 | 0 | size_t newAllocSize = MAX(newAllocSize1, newAllocSize2); |
2519 | 0 | void *newStr = realloc(sb->str, newAllocSize); |
2520 | 0 | if (newStr == NULL) { |
2521 | 0 | msSetError(MS_MEMERR, "Not enough memory", "msStringBufferAppend()"); |
2522 | 0 | return MS_FAILURE; |
2523 | 0 | } |
2524 | 0 | sb->alloc_size = newAllocSize; |
2525 | 0 | sb->str = (char *)newStr; |
2526 | 0 | } |
2527 | 0 | memcpy(sb->str + sb->length, pszAppendedString, nAppendLen + 1); |
2528 | 0 | sb->length += nAppendLen; |
2529 | 0 | return MS_SUCCESS; |
2530 | 0 | } |
2531 | | |
2532 | | /************************************************************************/ |
2533 | | /* msStringUnescape() */ |
2534 | | /************************************************************************/ |
2535 | | |
2536 | | /** Modify in place pszString such that a sequence of two consecutive |
2537 | | * chEscapeChar is replaced by a single one. |
2538 | | * Does the reverse of FLTEscapePropertyName() |
2539 | | */ |
2540 | 0 | void msStringUnescape(char *pszString, char chEscapeChar) { |
2541 | 0 | char *pszDest = pszString; |
2542 | 0 | for (; *pszString; ++pszString, ++pszDest) { |
2543 | 0 | if (pszString[0] == chEscapeChar && pszString[1] == chEscapeChar) { |
2544 | 0 | *pszDest = chEscapeChar; |
2545 | 0 | ++pszString; |
2546 | 0 | } else { |
2547 | 0 | *pszDest = *pszString; |
2548 | 0 | } |
2549 | 0 | } |
2550 | 0 | *pszDest = 0; |
2551 | 0 | } |