/src/gdal/netcdf-c-4.7.4/libdispatch/dstring.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright 2018, University Corporation for Atmospheric Research |
3 | | * See netcdf/COPYRIGHT file for copying and redistribution conditions. |
4 | | */ |
5 | | /* $Id: string.c,v 1.76 2010/05/26 21:43:33 dmh Exp $ */ |
6 | | |
7 | | #include "config.h" |
8 | | #include <stdlib.h> |
9 | | #include <stdio.h> |
10 | | #include <string.h> |
11 | | #include <ctype.h> |
12 | | #include <assert.h> |
13 | | #include "ncdispatch.h" |
14 | | #include "rnd.h" |
15 | | #include "ncutf8.h" |
16 | | |
17 | | /* There are 3 levels of UTF8 checking: 1=> (exact)validating 2=>relaxed |
18 | | and 3=>very relaxed |
19 | | */ |
20 | | /* Use semi-relaxed check */ |
21 | | #define UTF8_CHECK 2 |
22 | | |
23 | | /* |
24 | | * Free string, and, if needed, its values. |
25 | | * Formerly |
26 | | NC_free_string() |
27 | | */ |
28 | | void |
29 | | free_NC_string(NC_string *ncstrp) |
30 | 13.0k | { |
31 | 13.0k | if(ncstrp==NULL) |
32 | 0 | return; |
33 | 13.0k | free(ncstrp); |
34 | 13.0k | } |
35 | | |
36 | | |
37 | | static int |
38 | | nextUTF8(const char* cp) |
39 | 0 | { |
40 | | /* The goal here is to recognize the length of each |
41 | | multibyte utf8 character sequence and skip it. |
42 | | Again, we assume that every non-ascii character is legal. |
43 | | We can define three possible tests of decreasing correctness |
44 | | (in the sense that the least correct will allow some sequences that |
45 | | are technically illegal UTF8). |
46 | | As Regular expressions they are as follows: |
47 | | 1. most correct: |
48 | | UTF8 ([\xC2-\xDF][\x80-\xBF]) \ |
49 | | | (\xE0[\xA0-\xBF][\x80-\xBF]) \ |
50 | | | ([\xE1-\xEC][\x80-\xBF][\x80-\xBF]) \ |
51 | | | (\xED[\x80-\x9F][\x80-\xBF]) \ |
52 | | | ([\xEE-\xEF][\x80-\xBF][\x80-\xBF]) \ |
53 | | | (\xF0[\x90-\xBF][\x80-\xBF][\x80-\xBF]) \ |
54 | | | ([\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF]) \ |
55 | | | (\xF4[\x80-\x8F][\x80-\xBF][\x80-\xBF]) \ |
56 | | |
57 | | 2. partially relaxed: |
58 | | UTF8 ([\xC0-\xDF][\x80-\xBF]) |
59 | | |([\xE0-\xEF][\x80-\xBF][\x80-\xBF]) |
60 | | |([\xF0-\xF7][\x80-\xBF][\x80-\xBF][\x80-\xBF]) |
61 | | |
62 | | 3. The most relaxed version of UTF8: |
63 | | UTF8 ([\xC0-\xD6].)|([\xE0-\xEF]..)|([\xF0-\xF7]...) |
64 | | |
65 | | We use #2 here. |
66 | | |
67 | | The tests are derived from the table at |
68 | | http://www.w3.org/2005/03/23-lex-U |
69 | | */ |
70 | | |
71 | | /* Define a test macro to test against a range */ |
72 | 0 | #define RANGE(c,lo,hi) (((uchar)c) >= lo && ((uchar)c) <= hi) |
73 | | /* Define a common RANGE */ |
74 | 0 | #define RANGE0(c) RANGE(c,0x80,0xBF) |
75 | |
|
76 | 0 | int ch0; |
77 | |
|
78 | 0 | int skip = -1; /* assume failed */ |
79 | |
|
80 | 0 | ch0 = (uchar)*cp; |
81 | 0 | if(ch0 <= 0x7f) skip = 1; /* remove ascii case */ |
82 | 0 | else |
83 | | |
84 | 0 | #if UTF8_CHECK == 2 |
85 | | /* Do relaxed validation check */ |
86 | 0 | if(RANGE(ch0,0xC0,0XDF)) {/* 2-bytes, but check */ |
87 | 0 | if(cp[1] != 0 && RANGE0(cp[1])) |
88 | 0 | skip = 2; /* two bytes */ |
89 | 0 | } else if(RANGE(ch0,0xE0,0XEF)) {/* 3-bytes, but check */ |
90 | 0 | if(cp[1] != 0 && RANGE0(cp[1]) && cp[2] != 0 && RANGE0(cp[1])) |
91 | 0 | skip = 3; /* three bytes */ |
92 | 0 | } else if(RANGE(ch0,0xF0,0XF7)) {/* 3-bytes, but check */ |
93 | 0 | if(cp[1] != 0 && RANGE0(cp[1]) && cp[2] != 0 |
94 | 0 | && RANGE0(cp[1]) && cp[3] != 0 && RANGE0(cp[1])) |
95 | 0 | skip = 4; /* four bytes*/ |
96 | 0 | } |
97 | | #elif UTF8_CHECK == 1 |
98 | | /* Do exact validation check */ |
99 | | if(RANGE(ch0,0xC2,0xDF)) {/* non-overlong 2-bytes */ |
100 | | int ch1 = (uchar)cp[1]; |
101 | | if(ch1 != 0 && RANGE0(ch1)) skip = 2; |
102 | | } else if((ch0 == 0xE0)) {/* 3-bytes, not overlong */ |
103 | | int ch1 = (uchar)cp[1]; |
104 | | if(ch1 != 0 && RANGE(ch1,0xA0,0xBF)) { |
105 | | int ch2 = (uchar)cp[2]; |
106 | | if(ch2 != 0 && RANGE0(ch2)) skip = 3; |
107 | | } else if((ch0 == 0xED)) {/* 3-bytes minus surrogates */ |
108 | | int ch1 = (uchar)cp[1]; |
109 | | if(ch1 != 0 && RANGE(ch1,0x80,0x9f)) { |
110 | | int ch2 = (uchar)cp[2]; |
111 | | if(ch2 != 0 && RANGE0(ch2)) skip = 3; |
112 | | } else if(RANGE(ch0,0xE1,0xEC) || ch0 == 0xEE || ch0 == 0xEF) |
113 | | int ch1 = (uchar)cp[1]; |
114 | | if(ch1 != 0 && RANGE0(ch1)) { |
115 | | int ch2 = (uchar)cp[2]; |
116 | | if(ch2 != 0 && RANGE0(ch2)) skip = 3; |
117 | | } |
118 | | } else if((ch0 == 0xF0)) {/* planes 1-3 */ |
119 | | int ch1 = (uchar)cp[1]; |
120 | | if(ch1 != 0 && RANGE(ch1,0x90,0xBF) { |
121 | | int ch2 = (uchar)cp[2]; |
122 | | if(ch2 != 0 && RANGE0(ch2)) { |
123 | | int ch3 = (uchar)cp[3]; |
124 | | if(ch3 != 0 && RANGE0(ch3)) skip = 4; |
125 | | } |
126 | | } |
127 | | } else if((ch0 == 0xF4)) {/* plane 16 */ |
128 | | int ch1 = (uchar)cp[1]; |
129 | | if(ch1 != 0 && RANGE0(ch1)) { |
130 | | int ch2 = (uchar)cp[2]; |
131 | | if(ch2 != 0 && RANGE0(ch2)) { |
132 | | int ch3 = (uchar)cp[3]; |
133 | | if(ch3 != 0 && RANGE0(ch3)) skip = 4; |
134 | | } |
135 | | } |
136 | | } else if(RANGE(ch0,0xF1,0xF3) { /* planes 4-15 */ |
137 | | int ch1 = (uchar)cp[1]; |
138 | | if(ch1 != 0 && RANGE0(ch1)) { |
139 | | int ch2 = (uchar)cp[2]; |
140 | | if(ch2 != 0 && RANGE0(ch2)) { |
141 | | int ch3 = (uchar)cp[3]; |
142 | | if(ch3 != 0 && RANGE0(ch3)) skip = 4; |
143 | | } |
144 | | } |
145 | | } |
146 | | #else |
147 | | #error "Must Define UTF8_CHECK as 1 or 2" |
148 | | #endif |
149 | 0 | return skip; |
150 | 0 | } |
151 | | |
152 | | |
153 | | /* |
154 | | * Verify that a name string is valid syntax. The allowed name |
155 | | * syntax (in RE form) is: |
156 | | * |
157 | | * ([a-zA-Z0-9_]|{UTF8})([^\x00-\x1F\x7F/]|{UTF8})* |
158 | | * |
159 | | * where UTF8 represents a multibyte UTF-8 encoding. Also, no |
160 | | * trailing spaces are permitted in names. This definition |
161 | | * must be consistent with the one in ncgen.l. We do not allow '/' |
162 | | * because HDF5 does not permit slashes in names as slash is used as a |
163 | | * group separator. If UTF-8 is supported, then a multi-byte UTF-8 |
164 | | * character can occur anywhere within an identifier. We later |
165 | | * normalize UTF-8 strings to NFC to facilitate matching and queries. |
166 | | */ |
167 | | int |
168 | | NC_check_name(const char *name) |
169 | 23.3k | { |
170 | 23.3k | int skip; |
171 | 23.3k | int ch; |
172 | 23.3k | const char *cp = name; |
173 | 23.3k | int stat; |
174 | | |
175 | 23.3k | assert(name != NULL); |
176 | | |
177 | 23.3k | if(*name == 0 /* empty names disallowed */ |
178 | 23.3k | || strchr(cp, '/')) /* '/' can't be in a name */ |
179 | 411 | goto fail; |
180 | | |
181 | | /* check validity of any UTF-8 */ |
182 | 22.8k | stat = nc_utf8_validate((const unsigned char *)name); |
183 | 22.8k | if (stat != NC_NOERR) |
184 | 451 | goto fail; |
185 | | |
186 | | /* First char must be [a-z][A-Z][0-9]_ | UTF8 */ |
187 | 22.4k | ch = (uchar)*cp; |
188 | 22.4k | if(ch <= 0x7f) { |
189 | 22.4k | if( !('A' <= ch && ch <= 'Z') |
190 | 22.4k | && !('a' <= ch && ch <= 'z') |
191 | 22.4k | && !('0' <= ch && ch <= '9') |
192 | 22.4k | && ch != '_' ) |
193 | 6.02k | goto fail; |
194 | 16.4k | cp++; |
195 | 16.4k | } else { |
196 | 0 | if((skip = nextUTF8(cp)) < 0) |
197 | 0 | goto fail; |
198 | 0 | cp += skip; |
199 | 0 | } |
200 | | |
201 | 100k | while(*cp != 0) { |
202 | 84.4k | ch = (uchar)*cp; |
203 | | /* handle simple 0x00-0x7f characters here */ |
204 | 84.4k | if(ch <= 0x7f) { |
205 | 84.4k | if( ch < ' ' || ch > 0x7E) /* control char or DEL */ |
206 | 100 | goto fail; |
207 | 84.3k | cp++; |
208 | 84.3k | } else { |
209 | 0 | if((skip = nextUTF8(cp)) < 0) goto fail; |
210 | 0 | cp += skip; |
211 | 0 | } |
212 | 84.3k | if(cp - name > NC_MAX_NAME) |
213 | 24 | return NC_EMAXNAME; |
214 | 84.3k | } |
215 | 16.2k | if(ch <= 0x7f && isspace(ch)) /* trailing spaces disallowed */ |
216 | 0 | goto fail; |
217 | 16.2k | return NC_NOERR; |
218 | 6.98k | fail: |
219 | 6.98k | return NC_EBADNAME; |
220 | 16.2k | } |
221 | | |
222 | | |
223 | | /* |
224 | | * Allocate a NC_string structure large enough |
225 | | * to hold slen characters. |
226 | | * Formerly |
227 | | NC_new_string(count, str) |
228 | | */ |
229 | | |
230 | | NC_string * |
231 | | new_NC_string(size_t slen, const char *str) |
232 | 13.0k | { |
233 | 13.0k | NC_string *ncstrp; |
234 | 13.0k | size_t sz = M_RNDUP(sizeof(NC_string)) + slen + 1; |
235 | | |
236 | | #if 0 |
237 | | sz = _RNDUP(sz, X_ALIGN); |
238 | | #endif |
239 | | |
240 | 13.0k | ncstrp = (NC_string *)malloc(sz); |
241 | 13.0k | if( ncstrp == NULL ) |
242 | 0 | return NULL; |
243 | 13.0k | (void) memset(ncstrp, 0, sz); |
244 | | |
245 | 13.0k | ncstrp->nchars = sz - M_RNDUP(sizeof(NC_string)) - 1; |
246 | 13.0k | assert(ncstrp->nchars + 1 > slen); |
247 | 13.0k | ncstrp->cp = (char *)ncstrp + M_RNDUP(sizeof(NC_string)); |
248 | | |
249 | 13.0k | if(str != NULL && *str != 0) |
250 | 6.62k | { |
251 | 6.62k | (void) strncpy(ncstrp->cp, str, ncstrp->nchars +1); |
252 | 6.62k | ncstrp->cp[ncstrp->nchars] = 0; |
253 | 6.62k | } |
254 | | |
255 | 13.0k | return(ncstrp); |
256 | 13.0k | } |
257 | | |
258 | | |
259 | | /* |
260 | | * If possible, change the value of an NC_string to 'str'. |
261 | | * |
262 | | * Formerly |
263 | | NC_re_string() |
264 | | */ |
265 | | |
266 | | int |
267 | | set_NC_string(NC_string *ncstrp, const char *str) |
268 | 0 | { |
269 | 0 | size_t slen; |
270 | |
|
271 | 0 | assert(str != NULL && *str != 0); |
272 | | |
273 | 0 | slen = strlen(str); |
274 | |
|
275 | 0 | if(ncstrp->nchars < slen) |
276 | 0 | return NC_ENOTINDEFINE; |
277 | | |
278 | 0 | strncpy(ncstrp->cp, str, ncstrp->nchars); |
279 | | /* Don't adjust ncstrp->nchars, it includes extra space in the |
280 | | * header for potential later expansion of string. */ |
281 | |
|
282 | 0 | return NC_NOERR; |
283 | 0 | } |
284 | | |
285 | | /**************************************************/ |
286 | | /* Provide local alternatives for unix functions |
287 | | not available on all machines. Place here so that |
288 | | all subsequence code modules can use it. |
289 | | */ |
290 | | |
291 | | #ifndef HAVE_STRDUP |
292 | | char* |
293 | | strdup(const char* s) |
294 | | { |
295 | | char* dup; |
296 | | if(s == NULL) return NULL; |
297 | | dup = malloc(strlen(s)+1); |
298 | | strcpy(dup,s); |
299 | | return dup; |
300 | | } |
301 | | #endif |
302 | | |
303 | | /**************************************************/ |
304 | | /* strlcat */ |
305 | | /* |
306 | | * Copyright (c) 1998, 2015 Todd C. Miller <Todd.Miller@courtesan.com> |
307 | | * |
308 | | * Permission to use, copy, modify, and distribute this software for any |
309 | | * purpose with or without fee is hereby granted, provided that the above |
310 | | * copyright notice and this permission notice appear in all copies. |
311 | | * |
312 | | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
313 | | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
314 | | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
315 | | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
316 | | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
317 | | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
318 | | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
319 | | */ |
320 | | |
321 | | #ifndef HAVE_STRLCAT |
322 | | #ifndef _WIN32 /* We will use strcat_s */ |
323 | | /* |
324 | | * Appends src to string dst of size dsize (unlike strncat, dsize is the |
325 | | * full size of dst, not space left). At most dsize-1 characters |
326 | | * will be copied. Always NUL terminates (unless dsize <= strlen(dst)). |
327 | | * Returns strlen(src) + MIN(dsize, strlen(initial dst)). |
328 | | * If retval >= dsize, truncation occurred. |
329 | | */ |
330 | | EXTERNL size_t |
331 | | strlcat(char* dst, const char* src, size_t dsize) |
332 | 0 | { |
333 | 0 | const char *odst = dst; |
334 | 0 | const char *osrc = src; |
335 | 0 | size_t n = dsize; |
336 | 0 | size_t dlen; |
337 | | |
338 | | /* Find the end of dst and adjust bytes left but don't go past end. */ |
339 | 0 | while (n-- != 0 && *dst != '\0') |
340 | 0 | dst++; |
341 | 0 | dlen = dst - odst; |
342 | 0 | n = dsize - dlen; |
343 | |
|
344 | 0 | if (n-- == 0) |
345 | 0 | return(dlen + strlen(src)); |
346 | 0 | while (*src != '\0') { |
347 | 0 | if (n != 0) { |
348 | 0 | *dst++ = *src; |
349 | 0 | n--; |
350 | 0 | } |
351 | 0 | src++; |
352 | 0 | } |
353 | 0 | *dst = '\0'; |
354 | |
|
355 | 0 | return(dlen + (src - osrc)); /* count does not include NUL */ |
356 | 0 | } |
357 | | #endif /*!_WIN32*/ |
358 | | #endif /*!HAVE_STRLCAT*/ |