/src/ntp-dev/sntp/libopts/tokenize.c
Line | Count | Source (jump to first uncovered line) |
1 | | /** \file tokenize.c |
2 | | * |
3 | | * Tokenize a string, accommodating quoted strings. |
4 | | * |
5 | | * @addtogroup autoopts |
6 | | * @{ |
7 | | */ |
8 | | /* |
9 | | * This file defines the string_tokenize interface |
10 | | * This file is part of AutoOpts, a companion to AutoGen. |
11 | | * AutoOpts is free software. |
12 | | * AutoOpts is Copyright (C) 1992-2015 by Bruce Korb - all rights reserved |
13 | | * |
14 | | * AutoOpts is available under any one of two licenses. The license |
15 | | * in use must be one of these two and the choice is under the control |
16 | | * of the user of the license. |
17 | | * |
18 | | * The GNU Lesser General Public License, version 3 or later |
19 | | * See the files "COPYING.lgplv3" and "COPYING.gplv3" |
20 | | * |
21 | | * The Modified Berkeley Software Distribution License |
22 | | * See the file "COPYING.mbsd" |
23 | | * |
24 | | * These files have the following sha256 sums: |
25 | | * |
26 | | * 8584710e9b04216a394078dc156b781d0b47e1729104d666658aecef8ee32e95 COPYING.gplv3 |
27 | | * 4379e7444a0e2ce2b12dd6f5a52a27a4d02d39d247901d3285c88cf0d37f477b COPYING.lgplv3 |
28 | | * 13aa749a5b0a454917a944ed8fffc530b784f5ead522b1aacaf4ec8aa55a6239 COPYING.mbsd |
29 | | */ |
30 | | |
31 | | #include <errno.h> |
32 | | #include <stdlib.h> |
33 | | |
34 | 0 | #define cc_t const unsigned char |
35 | 0 | #define ch_t unsigned char |
36 | | |
37 | | /* = = = START-STATIC-FORWARD = = = */ |
38 | | static void |
39 | | copy_cooked(ch_t ** ppDest, char const ** ppSrc); |
40 | | |
41 | | static void |
42 | | copy_raw(ch_t ** ppDest, char const ** ppSrc); |
43 | | |
44 | | static token_list_t * |
45 | | alloc_token_list(char const * str); |
46 | | /* = = = END-STATIC-FORWARD = = = */ |
47 | | |
48 | | static void |
49 | | copy_cooked(ch_t ** ppDest, char const ** ppSrc) |
50 | 0 | { |
51 | 0 | ch_t * pDest = (ch_t *)*ppDest; |
52 | 0 | const ch_t * pSrc = (const ch_t *)(*ppSrc + 1); |
53 | |
|
54 | 0 | for (;;) { |
55 | 0 | ch_t ch = *(pSrc++); |
56 | 0 | switch (ch) { |
57 | 0 | case NUL: *ppSrc = NULL; return; |
58 | 0 | case '"': goto done; |
59 | 0 | case '\\': |
60 | 0 | pSrc += ao_string_cook_escape_char((char *)pSrc, (char *)&ch, 0x7F); |
61 | 0 | if (ch == 0x7F) |
62 | 0 | break; |
63 | | /* FALLTHROUGH */ |
64 | | |
65 | 0 | default: |
66 | 0 | *(pDest++) = ch; |
67 | 0 | } |
68 | 0 | } |
69 | | |
70 | 0 | done: |
71 | 0 | *ppDest = (ch_t *)pDest; /* next spot for storing character */ |
72 | 0 | *ppSrc = (char const *)pSrc; /* char following closing quote */ |
73 | 0 | } |
74 | | |
75 | | |
76 | | static void |
77 | | copy_raw(ch_t ** ppDest, char const ** ppSrc) |
78 | 0 | { |
79 | 0 | ch_t * pDest = *ppDest; |
80 | 0 | cc_t * pSrc = (cc_t *) (*ppSrc + 1); |
81 | |
|
82 | 0 | for (;;) { |
83 | 0 | ch_t ch = *(pSrc++); |
84 | 0 | switch (ch) { |
85 | 0 | case NUL: *ppSrc = NULL; return; |
86 | 0 | case '\'': goto done; |
87 | 0 | case '\\': |
88 | | /* |
89 | | * *Four* escapes are handled: newline removal, escape char |
90 | | * quoting and apostrophe quoting |
91 | | */ |
92 | 0 | switch (*pSrc) { |
93 | 0 | case NUL: *ppSrc = NULL; return; |
94 | 0 | case '\r': |
95 | 0 | if (*(++pSrc) == NL) |
96 | 0 | ++pSrc; |
97 | 0 | continue; |
98 | | |
99 | 0 | case NL: |
100 | 0 | ++pSrc; |
101 | 0 | continue; |
102 | | |
103 | 0 | case '\'': |
104 | 0 | ch = '\''; |
105 | | /* FALLTHROUGH */ |
106 | |
|
107 | 0 | case '\\': |
108 | 0 | ++pSrc; |
109 | 0 | break; |
110 | 0 | } |
111 | | /* FALLTHROUGH */ |
112 | | |
113 | 0 | default: |
114 | 0 | *(pDest++) = ch; |
115 | 0 | } |
116 | 0 | } |
117 | | |
118 | 0 | done: |
119 | 0 | *ppDest = pDest; /* next spot for storing character */ |
120 | 0 | *ppSrc = (char const *) pSrc; /* char following closing quote */ |
121 | 0 | } |
122 | | |
123 | | static token_list_t * |
124 | | alloc_token_list(char const * str) |
125 | 0 | { |
126 | 0 | token_list_t * res; |
127 | |
|
128 | 0 | int max_token_ct = 2; /* allow for trailing NULL pointer & NUL on string */ |
129 | |
|
130 | 0 | if (str == NULL) goto enoent_res; |
131 | | |
132 | | /* |
133 | | * Trim leading white space. Use "ENOENT" and a NULL return to indicate |
134 | | * an empty string was passed. |
135 | | */ |
136 | 0 | str = SPN_WHITESPACE_CHARS(str); |
137 | 0 | if (*str == NUL) goto enoent_res; |
138 | | |
139 | | /* |
140 | | * Take an approximate count of tokens. If no quoted strings are used, |
141 | | * it will be accurate. If quoted strings are used, it will be a little |
142 | | * high and we'll squander the space for a few extra pointers. |
143 | | */ |
144 | 0 | { |
145 | 0 | char const * pz = str; |
146 | |
|
147 | 0 | do { |
148 | 0 | max_token_ct++; |
149 | 0 | pz = BRK_WHITESPACE_CHARS(pz+1); |
150 | 0 | pz = SPN_WHITESPACE_CHARS(pz); |
151 | 0 | } while (*pz != NUL); |
152 | |
|
153 | 0 | res = malloc(sizeof(*res) + (size_t)(pz - str) |
154 | 0 | + ((size_t)max_token_ct * sizeof(ch_t *))); |
155 | 0 | } |
156 | |
|
157 | 0 | if (res == NULL) |
158 | 0 | errno = ENOMEM; |
159 | 0 | else res->tkn_list[0] = (ch_t *)(res->tkn_list + (max_token_ct - 1)); |
160 | |
|
161 | 0 | return res; |
162 | | |
163 | 0 | enoent_res: |
164 | |
|
165 | 0 | errno = ENOENT; |
166 | 0 | return NULL; |
167 | 0 | } |
168 | | |
169 | | /*=export_func ao_string_tokenize |
170 | | * |
171 | | * what: tokenize an input string |
172 | | * |
173 | | * arg: + char const * + string + string to be tokenized + |
174 | | * |
175 | | * ret_type: token_list_t * |
176 | | * ret_desc: pointer to a structure that lists each token |
177 | | * |
178 | | * doc: |
179 | | * |
180 | | * This function will convert one input string into a list of strings. |
181 | | * The list of strings is derived by separating the input based on |
182 | | * white space separation. However, if the input contains either single |
183 | | * or double quote characters, then the text after that character up to |
184 | | * a matching quote will become the string in the list. |
185 | | * |
186 | | * The returned pointer should be deallocated with @code{free(3C)} when |
187 | | * are done using the data. The data are placed in a single block of |
188 | | * allocated memory. Do not deallocate individual token/strings. |
189 | | * |
190 | | * The structure pointed to will contain at least these two fields: |
191 | | * @table @samp |
192 | | * @item tkn_ct |
193 | | * The number of tokens found in the input string. |
194 | | * @item tok_list |
195 | | * An array of @code{tkn_ct + 1} pointers to substring tokens, with |
196 | | * the last pointer set to NULL. |
197 | | * @end table |
198 | | * |
199 | | * There are two types of quoted strings: single quoted (@code{'}) and |
200 | | * double quoted (@code{"}). Singly quoted strings are fairly raw in that |
201 | | * escape characters (@code{\\}) are simply another character, except when |
202 | | * preceding the following characters: |
203 | | * @example |
204 | | * @code{\\} double backslashes reduce to one |
205 | | * @code{'} incorporates the single quote into the string |
206 | | * @code{\n} suppresses both the backslash and newline character |
207 | | * @end example |
208 | | * |
209 | | * Double quote strings are formed according to the rules of string |
210 | | * constants in ANSI-C programs. |
211 | | * |
212 | | * example: |
213 | | * @example |
214 | | * #include <stdlib.h> |
215 | | * int ix; |
216 | | * token_list_t * ptl = ao_string_tokenize(some_string) |
217 | | * for (ix = 0; ix < ptl->tkn_ct; ix++) |
218 | | * do_something_with_tkn(ptl->tkn_list[ix]); |
219 | | * free(ptl); |
220 | | * @end example |
221 | | * Note that everything is freed with the one call to @code{free(3C)}. |
222 | | * |
223 | | * err: |
224 | | * NULL is returned and @code{errno} will be set to indicate the problem: |
225 | | * @itemize @bullet |
226 | | * @item |
227 | | * @code{EINVAL} - There was an unterminated quoted string. |
228 | | * @item |
229 | | * @code{ENOENT} - The input string was empty. |
230 | | * @item |
231 | | * @code{ENOMEM} - There is not enough memory. |
232 | | * @end itemize |
233 | | =*/ |
234 | | token_list_t * |
235 | | ao_string_tokenize(char const * str) |
236 | 0 | { |
237 | 0 | token_list_t * res = alloc_token_list(str); |
238 | 0 | ch_t * pzDest; |
239 | | |
240 | | /* |
241 | | * Now copy each token into the output buffer. |
242 | | */ |
243 | 0 | if (res == NULL) |
244 | 0 | return res; |
245 | | |
246 | 0 | pzDest = (ch_t *)(res->tkn_list[0]); |
247 | 0 | res->tkn_ct = 0; |
248 | |
|
249 | 0 | do { |
250 | 0 | res->tkn_list[ res->tkn_ct++ ] = pzDest; |
251 | 0 | for (;;) { |
252 | 0 | int ch = (ch_t)*str; |
253 | 0 | if (IS_WHITESPACE_CHAR(ch)) { |
254 | 0 | found_white_space: |
255 | 0 | str = SPN_WHITESPACE_CHARS(str+1); |
256 | 0 | break; |
257 | 0 | } |
258 | | |
259 | 0 | switch (ch) { |
260 | 0 | case '"': |
261 | 0 | copy_cooked(&pzDest, &str); |
262 | 0 | if (str == NULL) { |
263 | 0 | free(res); |
264 | 0 | errno = EINVAL; |
265 | 0 | return NULL; |
266 | 0 | } |
267 | 0 | if (IS_WHITESPACE_CHAR(*str)) |
268 | 0 | goto found_white_space; |
269 | 0 | break; |
270 | | |
271 | 0 | case '\'': |
272 | 0 | copy_raw(&pzDest, &str); |
273 | 0 | if (str == NULL) { |
274 | 0 | free(res); |
275 | 0 | errno = EINVAL; |
276 | 0 | return NULL; |
277 | 0 | } |
278 | 0 | if (IS_WHITESPACE_CHAR(*str)) |
279 | 0 | goto found_white_space; |
280 | 0 | break; |
281 | | |
282 | 0 | case NUL: |
283 | 0 | goto copy_done; |
284 | | |
285 | 0 | default: |
286 | 0 | str++; |
287 | 0 | *(pzDest++) = (unsigned char)ch; |
288 | 0 | } |
289 | 0 | } copy_done:; |
290 | | |
291 | | /* |
292 | | * NUL terminate the last token and see if we have any more tokens. |
293 | | */ |
294 | 0 | *(pzDest++) = NUL; |
295 | 0 | } while (*str != NUL); |
296 | | |
297 | 0 | res->tkn_list[ res->tkn_ct ] = NULL; |
298 | |
|
299 | 0 | return res; |
300 | 0 | } |
301 | | |
302 | | #ifdef TEST |
303 | | #include <stdio.h> |
304 | | #include <string.h> |
305 | | |
306 | | int |
307 | | main(int argc, char ** argv) |
308 | | { |
309 | | if (argc == 1) { |
310 | | printf("USAGE: %s arg [ ... ]\n", *argv); |
311 | | return 1; |
312 | | } |
313 | | while (--argc > 0) { |
314 | | char * arg = *(++argv); |
315 | | token_list_t * p = ao_string_tokenize(arg); |
316 | | if (p == NULL) { |
317 | | printf("Parsing string ``%s'' failed:\n\terrno %d (%s)\n", |
318 | | arg, errno, strerror(errno)); |
319 | | } else { |
320 | | int ix = 0; |
321 | | printf("Parsed string ``%s''\ninto %d tokens:\n", arg, p->tkn_ct); |
322 | | do { |
323 | | printf(" %3d: ``%s''\n", ix+1, p->tkn_list[ix]); |
324 | | } while (++ix < p->tkn_ct); |
325 | | free(p); |
326 | | } |
327 | | } |
328 | | return 0; |
329 | | } |
330 | | #endif |
331 | | |
332 | | /** @} |
333 | | * |
334 | | * Local Variables: |
335 | | * mode: C |
336 | | * c-file-style: "stroustrup" |
337 | | * indent-tabs-mode: nil |
338 | | * End: |
339 | | * end of autoopts/tokenize.c */ |