/src/gdal/netcdf-c-4.7.4/libdispatch/dstring.c
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /*  | 
2  |  |  *  Copyright 2018, University Corporation for Atmospheric Research  | 
3  |  |  *      See netcdf/COPYRIGHT file for copying and redistribution conditions.  | 
4  |  |  */  | 
5  |  | /* $Id: string.c,v 1.76 2010/05/26 21:43:33 dmh Exp $ */  | 
6  |  |  | 
7  |  | #include "config.h"  | 
8  |  | #include <stdlib.h>  | 
9  |  | #include <stdio.h>  | 
10  |  | #include <string.h>  | 
11  |  | #include <ctype.h>  | 
12  |  | #include <assert.h>  | 
13  |  | #include "ncdispatch.h"  | 
14  |  | #include "rnd.h"  | 
15  |  | #include "ncutf8.h"  | 
16  |  |  | 
17  |  | /* There are 3 levels of UTF8 checking: 1=> (exact)validating 2=>relaxed  | 
18  |  |    and 3=>very relaxed  | 
19  |  | */  | 
20  |  | /* Use semi-relaxed check */  | 
21  |  | #define UTF8_CHECK 2  | 
22  |  |  | 
23  |  | /*  | 
24  |  |  * Free string, and, if needed, its values.  | 
25  |  |  * Formerly  | 
26  |  | NC_free_string()  | 
27  |  |  */  | 
28  |  | void  | 
29  |  | free_NC_string(NC_string *ncstrp)  | 
30  | 13.0k  | { | 
31  | 13.0k  |   if(ncstrp==NULL)  | 
32  | 0  |     return;  | 
33  | 13.0k  |   free(ncstrp);  | 
34  | 13.0k  | }  | 
35  |  |  | 
36  |  |  | 
37  |  | static int  | 
38  |  | nextUTF8(const char* cp)  | 
39  | 0  | { | 
40  |  |     /*  The goal here is to recognize the length of each  | 
41  |  |   multibyte utf8 character sequence and skip it.  | 
42  |  |         Again, we assume that every non-ascii character is legal.  | 
43  |  |         We can define three possible tests of decreasing correctness  | 
44  |  |         (in the sense that the least correct will allow some sequences that  | 
45  |  |         are technically illegal UTF8).  | 
46  |  |         As Regular expressions they are as follows:  | 
47  |  |         1. most correct:  | 
48  |  |             UTF8   ([\xC2-\xDF][\x80-\xBF])                       \  | 
49  |  |                  | (\xE0[\xA0-\xBF][\x80-\xBF])                   \  | 
50  |  |                  | ([\xE1-\xEC][\x80-\xBF][\x80-\xBF])            \  | 
51  |  |                  | (\xED[\x80-\x9F][\x80-\xBF])                   \  | 
52  |  |                  | ([\xEE-\xEF][\x80-\xBF][\x80-\xBF])            \  | 
53  |  |                  | (\xF0[\x90-\xBF][\x80-\xBF][\x80-\xBF])        \  | 
54  |  |                  | ([\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF]) \  | 
55  |  |                  | (\xF4[\x80-\x8F][\x80-\xBF][\x80-\xBF])        \  | 
56  |  |  | 
57  |  |         2. partially relaxed:  | 
58  |  |             UTF8 ([\xC0-\xDF][\x80-\xBF])  | 
59  |  |                  |([\xE0-\xEF][\x80-\xBF][\x80-\xBF])  | 
60  |  |                  |([\xF0-\xF7][\x80-\xBF][\x80-\xBF][\x80-\xBF])  | 
61  |  |  | 
62  |  |         3. The most relaxed version of UTF8:  | 
63  |  |             UTF8 ([\xC0-\xD6].)|([\xE0-\xEF]..)|([\xF0-\xF7]...)  | 
64  |  |  | 
65  |  |         We use #2 here.  | 
66  |  |  | 
67  |  |   The tests are derived from the table at  | 
68  |  |       http://www.w3.org/2005/03/23-lex-U  | 
69  |  |     */  | 
70  |  |  | 
71  |  | /* Define a test macro to test against a range */  | 
72  | 0  | #define RANGE(c,lo,hi) (((uchar)c) >= lo && ((uchar)c) <= hi)  | 
73  |  | /* Define a common RANGE */  | 
74  | 0  | #define RANGE0(c) RANGE(c,0x80,0xBF)  | 
75  |  | 
  | 
76  | 0  |     int ch0;  | 
77  |  | 
  | 
78  | 0  |     int skip = -1; /* assume failed */  | 
79  |  | 
  | 
80  | 0  |     ch0 = (uchar)*cp;  | 
81  | 0  |     if(ch0 <= 0x7f) skip = 1; /* remove ascii case */  | 
82  | 0  |     else  | 
83  |  |  | 
84  | 0  | #if UTF8_CHECK == 2  | 
85  |  |     /* Do relaxed validation check */  | 
86  | 0  |     if(RANGE(ch0,0xC0,0XDF)) {/* 2-bytes, but check */ | 
87  | 0  |         if(cp[1] != 0 && RANGE0(cp[1]))  | 
88  | 0  |     skip = 2; /* two bytes */  | 
89  | 0  |     } else if(RANGE(ch0,0xE0,0XEF)) {/* 3-bytes, but check */ | 
90  | 0  |         if(cp[1] != 0 && RANGE0(cp[1]) && cp[2] != 0 && RANGE0(cp[1]))  | 
91  | 0  |     skip = 3; /* three bytes */  | 
92  | 0  |     } else if(RANGE(ch0,0xF0,0XF7)) {/* 3-bytes, but check */ | 
93  | 0  |         if(cp[1] != 0 && RANGE0(cp[1]) && cp[2] != 0  | 
94  | 0  |            && RANGE0(cp[1]) && cp[3] != 0 && RANGE0(cp[1]))  | 
95  | 0  |     skip = 4; /* four bytes*/  | 
96  | 0  |     }  | 
97  |  | #elif UTF8_CHECK == 1  | 
98  |  |     /* Do exact validation check */  | 
99  |  |     if(RANGE(ch0,0xC2,0xDF)) {/* non-overlong 2-bytes */ | 
100  |  |   int ch1 = (uchar)cp[1];  | 
101  |  |   if(ch1 != 0 && RANGE0(ch1)) skip = 2;  | 
102  |  |     } else if((ch0 == 0xE0)) {/* 3-bytes, not overlong */ | 
103  |  |   int ch1 = (uchar)cp[1];  | 
104  |  |   if(ch1 != 0 && RANGE(ch1,0xA0,0xBF)) { | 
105  |  |       int ch2 = (uchar)cp[2];  | 
106  |  |       if(ch2 != 0 && RANGE0(ch2)) skip = 3;  | 
107  |  |     } else if((ch0 == 0xED)) {/* 3-bytes minus surrogates */ | 
108  |  |   int ch1 = (uchar)cp[1];  | 
109  |  |   if(ch1 != 0 && RANGE(ch1,0x80,0x9f)) { | 
110  |  |       int ch2 = (uchar)cp[2];  | 
111  |  |       if(ch2 != 0 && RANGE0(ch2)) skip = 3;  | 
112  |  |     } else if(RANGE(ch0,0xE1,0xEC) || ch0 == 0xEE || ch0 == 0xEF)  | 
113  |  |   int ch1 = (uchar)cp[1];  | 
114  |  |   if(ch1 != 0 && RANGE0(ch1)) { | 
115  |  |       int ch2 = (uchar)cp[2];  | 
116  |  |       if(ch2 != 0 && RANGE0(ch2)) skip = 3;  | 
117  |  |   }  | 
118  |  |     } else if((ch0 == 0xF0)) {/* planes 1-3 */ | 
119  |  |   int ch1 = (uchar)cp[1];  | 
120  |  |   if(ch1 != 0 && RANGE(ch1,0x90,0xBF) { | 
121  |  |       int ch2 = (uchar)cp[2];  | 
122  |  |       if(ch2 != 0 && RANGE0(ch2)) { | 
123  |  |           int ch3 = (uchar)cp[3];  | 
124  |  |           if(ch3 != 0 && RANGE0(ch3)) skip = 4;  | 
125  |  |       }  | 
126  |  |   }  | 
127  |  |     } else if((ch0 == 0xF4)) {/* plane 16 */ | 
128  |  |   int ch1 = (uchar)cp[1];  | 
129  |  |   if(ch1 != 0 && RANGE0(ch1)) { | 
130  |  |       int ch2 = (uchar)cp[2];  | 
131  |  |       if(ch2 != 0 && RANGE0(ch2)) { | 
132  |  |           int ch3 = (uchar)cp[3];  | 
133  |  |           if(ch3 != 0 && RANGE0(ch3)) skip = 4;  | 
134  |  |       }  | 
135  |  |   }  | 
136  |  |     } else if(RANGE(ch0,0xF1,0xF3) { /* planes 4-15 */ | 
137  |  |   int ch1 = (uchar)cp[1];  | 
138  |  |   if(ch1 != 0 && RANGE0(ch1)) { | 
139  |  |       int ch2 = (uchar)cp[2];  | 
140  |  |       if(ch2 != 0 && RANGE0(ch2)) { | 
141  |  |           int ch3 = (uchar)cp[3];  | 
142  |  |           if(ch3 != 0 && RANGE0(ch3)) skip = 4;  | 
143  |  |       }  | 
144  |  |   }  | 
145  |  |     }  | 
146  |  | #else  | 
147  |  | #error "Must Define UTF8_CHECK as 1 or 2"  | 
148  |  | #endif  | 
149  | 0  |     return skip;  | 
150  | 0  | }  | 
151  |  |  | 
152  |  |  | 
153  |  | /*  | 
154  |  |  * Verify that a name string is valid syntax.  The allowed name  | 
155  |  |  * syntax (in RE form) is:  | 
156  |  |  *  | 
157  |  |  * ([a-zA-Z0-9_]|{UTF8})([^\x00-\x1F\x7F/]|{UTF8})* | 
158  |  |  *  | 
159  |  |  * where UTF8 represents a multibyte UTF-8 encoding.  Also, no  | 
160  |  |  * trailing spaces are permitted in names.  This definition  | 
161  |  |  * must be consistent with the one in ncgen.l.  We do not allow '/'  | 
162  |  |  * because HDF5 does not permit slashes in names as slash is used as a  | 
163  |  |  * group separator.  If UTF-8 is supported, then a multi-byte UTF-8  | 
164  |  |  * character can occur anywhere within an identifier.  We later  | 
165  |  |  * normalize UTF-8 strings to NFC to facilitate matching and queries.  | 
166  |  |  */  | 
167  |  | int  | 
168  |  | NC_check_name(const char *name)  | 
169  | 23.3k  | { | 
170  | 23.3k  |   int skip;  | 
171  | 23.3k  |   int ch;  | 
172  | 23.3k  |   const char *cp = name;  | 
173  | 23.3k  |   int stat;  | 
174  |  |  | 
175  | 23.3k  |   assert(name != NULL);  | 
176  |  |  | 
177  | 23.3k  |   if(*name == 0    /* empty names disallowed */  | 
178  | 23.3k  |      || strchr(cp, '/'))  /* '/' can't be in a name */  | 
179  | 411  |     goto fail;  | 
180  |  |  | 
181  |  |   /* check validity of any UTF-8 */  | 
182  | 22.8k  |   stat = nc_utf8_validate((const unsigned char *)name);  | 
183  | 22.8k  |   if (stat != NC_NOERR)  | 
184  | 451  |       goto fail;  | 
185  |  |  | 
186  |  |   /* First char must be [a-z][A-Z][0-9]_ | UTF8 */  | 
187  | 22.4k  |   ch = (uchar)*cp;  | 
188  | 22.4k  |   if(ch <= 0x7f) { | 
189  | 22.4k  |       if(   !('A' <= ch && ch <= 'Z') | 
190  | 22.4k  |          && !('a' <= ch && ch <= 'z') | 
191  | 22.4k  |          && !('0' <= ch && ch <= '9') | 
192  | 22.4k  |          && ch != '_' )  | 
193  | 6.02k  |     goto fail;  | 
194  | 16.4k  |       cp++;  | 
195  | 16.4k  |   } else { | 
196  | 0  |       if((skip = nextUTF8(cp)) < 0)  | 
197  | 0  |     goto fail;  | 
198  | 0  |       cp += skip;  | 
199  | 0  |   }  | 
200  |  |  | 
201  | 100k  |   while(*cp != 0) { | 
202  | 84.4k  |       ch = (uchar)*cp;  | 
203  |  |       /* handle simple 0x00-0x7f characters here */  | 
204  | 84.4k  |       if(ch <= 0x7f) { | 
205  | 84.4k  |                 if( ch < ' ' || ch > 0x7E) /* control char or DEL */  | 
206  | 100  |       goto fail;  | 
207  | 84.3k  |     cp++;  | 
208  | 84.3k  |       } else { | 
209  | 0  |     if((skip = nextUTF8(cp)) < 0) goto fail;  | 
210  | 0  |     cp += skip;  | 
211  | 0  |       }  | 
212  | 84.3k  |       if(cp - name > NC_MAX_NAME)  | 
213  | 24  |     return NC_EMAXNAME;  | 
214  | 84.3k  |   }  | 
215  | 16.2k  |   if(ch <= 0x7f && isspace(ch)) /* trailing spaces disallowed */  | 
216  | 0  |       goto fail;  | 
217  | 16.2k  |   return NC_NOERR;  | 
218  | 6.98k  | fail:  | 
219  | 6.98k  |         return NC_EBADNAME;  | 
220  | 16.2k  | }  | 
221  |  |  | 
222  |  |  | 
223  |  | /*  | 
224  |  |  * Allocate a NC_string structure large enough  | 
225  |  |  * to hold slen characters.  | 
226  |  |  * Formerly  | 
227  |  | NC_new_string(count, str)  | 
228  |  |  */  | 
229  |  |  | 
230  |  | NC_string *  | 
231  |  | new_NC_string(size_t slen, const char *str)  | 
232  | 13.0k  | { | 
233  | 13.0k  |   NC_string *ncstrp;  | 
234  | 13.0k  |   size_t sz = M_RNDUP(sizeof(NC_string)) + slen + 1;  | 
235  |  |  | 
236  |  | #if 0  | 
237  |  |   sz = _RNDUP(sz, X_ALIGN);  | 
238  |  | #endif  | 
239  |  |  | 
240  | 13.0k  |   ncstrp = (NC_string *)malloc(sz);  | 
241  | 13.0k  |   if( ncstrp == NULL )  | 
242  | 0  |     return NULL;  | 
243  | 13.0k  |   (void) memset(ncstrp, 0, sz);  | 
244  |  |  | 
245  | 13.0k  |   ncstrp->nchars = sz - M_RNDUP(sizeof(NC_string)) - 1;  | 
246  | 13.0k  |   assert(ncstrp->nchars + 1 > slen);  | 
247  | 13.0k  |   ncstrp->cp = (char *)ncstrp + M_RNDUP(sizeof(NC_string));  | 
248  |  |  | 
249  | 13.0k  |   if(str != NULL && *str != 0)  | 
250  | 6.62k  |   { | 
251  | 6.62k  |     (void) strncpy(ncstrp->cp, str, ncstrp->nchars +1);  | 
252  | 6.62k  |     ncstrp->cp[ncstrp->nchars] = 0;  | 
253  | 6.62k  |   }  | 
254  |  |  | 
255  | 13.0k  |   return(ncstrp);  | 
256  | 13.0k  | }  | 
257  |  |  | 
258  |  |  | 
259  |  | /*  | 
260  |  |  * If possible, change the value of an NC_string to 'str'.  | 
261  |  |  *  | 
262  |  |  * Formerly  | 
263  |  | NC_re_string()  | 
264  |  |  */  | 
265  |  |  | 
266  |  | int  | 
267  |  |    set_NC_string(NC_string *ncstrp, const char *str)  | 
268  | 0  |  { | 
269  | 0  |   size_t slen;  | 
270  |  | 
  | 
271  | 0  |   assert(str != NULL && *str != 0);  | 
272  |  |  | 
273  | 0  |   slen = strlen(str);  | 
274  |  | 
  | 
275  | 0  |   if(ncstrp->nchars < slen)  | 
276  | 0  |     return NC_ENOTINDEFINE;  | 
277  |  |  | 
278  | 0  |   strncpy(ncstrp->cp, str, ncstrp->nchars);  | 
279  |  |   /* Don't adjust ncstrp->nchars, it includes extra space in the  | 
280  |  |    * header for potential later expansion of string. */  | 
281  |  | 
  | 
282  | 0  |   return NC_NOERR;  | 
283  | 0  | }  | 
284  |  |  | 
285  |  | /**************************************************/  | 
286  |  | /* Provide local alternatives for unix functions  | 
287  |  |    not available on all machines. Place here so that  | 
288  |  |    all subsequence code modules can use it.  | 
289  |  | */  | 
290  |  |  | 
291  |  | #ifndef HAVE_STRDUP  | 
292  |  | char*  | 
293  |  | strdup(const char* s)  | 
294  |  | { | 
295  |  |     char* dup;  | 
296  |  |     if(s == NULL) return NULL;  | 
297  |  |     dup = malloc(strlen(s)+1);  | 
298  |  |     strcpy(dup,s);  | 
299  |  |     return dup;  | 
300  |  | }  | 
301  |  | #endif  | 
302  |  |  | 
303  |  | /**************************************************/  | 
304  |  | /* strlcat */  | 
305  |  | /*  | 
306  |  |  * Copyright (c) 1998, 2015 Todd C. Miller <Todd.Miller@courtesan.com>  | 
307  |  |  *  | 
308  |  |  * Permission to use, copy, modify, and distribute this software for any  | 
309  |  |  * purpose with or without fee is hereby granted, provided that the above  | 
310  |  |  * copyright notice and this permission notice appear in all copies.  | 
311  |  |  *  | 
312  |  |  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES  | 
313  |  |  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF  | 
314  |  |  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR  | 
315  |  |  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES  | 
316  |  |  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN  | 
317  |  |  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF  | 
318  |  |  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  | 
319  |  |  */  | 
320  |  |  | 
321  |  | #ifndef HAVE_STRLCAT  | 
322  |  | #ifndef _WIN32 /* We will use strcat_s */  | 
323  |  | /*  | 
324  |  |  * Appends src to string dst of size dsize (unlike strncat, dsize is the  | 
325  |  |  * full size of dst, not space left).  At most dsize-1 characters  | 
326  |  |  * will be copied.  Always NUL terminates (unless dsize <= strlen(dst)).  | 
327  |  |  * Returns strlen(src) + MIN(dsize, strlen(initial dst)).  | 
328  |  |  * If retval >= dsize, truncation occurred.  | 
329  |  |  */  | 
330  |  | EXTERNL size_t  | 
331  |  | strlcat(char* dst, const char* src, size_t dsize)  | 
332  | 0  | { | 
333  | 0  |   const char *odst = dst;  | 
334  | 0  |   const char *osrc = src;  | 
335  | 0  |   size_t n = dsize;  | 
336  | 0  |   size_t dlen;  | 
337  |  |  | 
338  |  |   /* Find the end of dst and adjust bytes left but don't go past end. */  | 
339  | 0  |   while (n-- != 0 && *dst != '\0')  | 
340  | 0  |     dst++;  | 
341  | 0  |   dlen = dst - odst;  | 
342  | 0  |   n = dsize - dlen;  | 
343  |  | 
  | 
344  | 0  |   if (n-- == 0)  | 
345  | 0  |     return(dlen + strlen(src));  | 
346  | 0  |   while (*src != '\0') { | 
347  | 0  |     if (n != 0) { | 
348  | 0  |       *dst++ = *src;  | 
349  | 0  |       n--;  | 
350  | 0  |     }  | 
351  | 0  |     src++;  | 
352  | 0  |   }  | 
353  | 0  |   *dst = '\0';  | 
354  |  | 
  | 
355  | 0  |   return(dlen + (src - osrc));  /* count does not include NUL */  | 
356  | 0  | }  | 
357  |  | #endif /*!_WIN32*/  | 
358  |  | #endif /*!HAVE_STRLCAT*/  |