Coverage Report

Created: 2023-05-19 06:16

/src/ntp-dev/sntp/libopts/tokenize.c
Line
Count
Source (jump to first uncovered line)
1
/** \file tokenize.c
2
 *
3
 *  Tokenize a string, accommodating quoted strings.
4
 *
5
 * @addtogroup autoopts
6
 * @{
7
 */
8
/*
9
 *  This file defines the string_tokenize interface
10
 *  This file is part of AutoOpts, a companion to AutoGen.
11
 *  AutoOpts is free software.
12
 *  AutoOpts is Copyright (C) 1992-2015 by Bruce Korb - all rights reserved
13
 *
14
 *  AutoOpts is available under any one of two licenses.  The license
15
 *  in use must be one of these two and the choice is under the control
16
 *  of the user of the license.
17
 *
18
 *   The GNU Lesser General Public License, version 3 or later
19
 *      See the files "COPYING.lgplv3" and "COPYING.gplv3"
20
 *
21
 *   The Modified Berkeley Software Distribution License
22
 *      See the file "COPYING.mbsd"
23
 *
24
 *  These files have the following sha256 sums:
25
 *
26
 *  8584710e9b04216a394078dc156b781d0b47e1729104d666658aecef8ee32e95  COPYING.gplv3
27
 *  4379e7444a0e2ce2b12dd6f5a52a27a4d02d39d247901d3285c88cf0d37f477b  COPYING.lgplv3
28
 *  13aa749a5b0a454917a944ed8fffc530b784f5ead522b1aacaf4ec8aa55a6239  COPYING.mbsd
29
 */
30
31
#include <errno.h>
32
#include <stdlib.h>
33
34
0
#define cc_t   const unsigned char
35
0
#define ch_t   unsigned char
36
37
/* = = = START-STATIC-FORWARD = = = */
38
static void
39
copy_cooked(ch_t ** ppDest, char const ** ppSrc);
40
41
static void
42
copy_raw(ch_t ** ppDest, char const ** ppSrc);
43
44
static token_list_t *
45
alloc_token_list(char const * str);
46
/* = = = END-STATIC-FORWARD = = = */
47
48
static void
49
copy_cooked(ch_t ** ppDest, char const ** ppSrc)
50
0
{
51
0
    ch_t * pDest = (ch_t *)*ppDest;
52
0
    const ch_t * pSrc  = (const ch_t *)(*ppSrc + 1);
53
54
0
    for (;;) {
55
0
        ch_t ch = *(pSrc++);
56
0
        switch (ch) {
57
0
        case NUL:   *ppSrc = NULL; return;
58
0
        case '"':   goto done;
59
0
        case '\\':
60
0
            pSrc += ao_string_cook_escape_char((char *)pSrc, (char *)&ch, 0x7F);
61
0
            if (ch == 0x7F)
62
0
                break;
63
            /* FALLTHROUGH */
64
65
0
        default:
66
0
            *(pDest++) = ch;
67
0
        }
68
0
    }
69
70
0
 done:
71
0
    *ppDest = (ch_t *)pDest; /* next spot for storing character */
72
0
    *ppSrc  = (char const *)pSrc;  /* char following closing quote    */
73
0
}
74
75
76
static void
77
copy_raw(ch_t ** ppDest, char const ** ppSrc)
78
0
{
79
0
    ch_t * pDest = *ppDest;
80
0
    cc_t * pSrc  = (cc_t *) (*ppSrc + 1);
81
82
0
    for (;;) {
83
0
        ch_t ch = *(pSrc++);
84
0
        switch (ch) {
85
0
        case NUL:   *ppSrc = NULL; return;
86
0
        case '\'':  goto done;
87
0
        case '\\':
88
            /*
89
             *  *Four* escapes are handled:  newline removal, escape char
90
             *  quoting and apostrophe quoting
91
             */
92
0
            switch (*pSrc) {
93
0
            case NUL:   *ppSrc = NULL; return;
94
0
            case '\r':
95
0
                if (*(++pSrc) == NL)
96
0
                    ++pSrc;
97
0
                continue;
98
99
0
            case NL:
100
0
                ++pSrc;
101
0
                continue;
102
103
0
            case '\'':
104
0
                ch = '\'';
105
                /* FALLTHROUGH */
106
107
0
            case '\\':
108
0
                ++pSrc;
109
0
                break;
110
0
            }
111
            /* FALLTHROUGH */
112
113
0
        default:
114
0
            *(pDest++) = ch;
115
0
        }
116
0
    }
117
118
0
 done:
119
0
    *ppDest = pDest; /* next spot for storing character */
120
0
    *ppSrc  = (char const *) pSrc;  /* char following closing quote    */
121
0
}
122
123
static token_list_t *
124
alloc_token_list(char const * str)
125
0
{
126
0
    token_list_t * res;
127
128
0
    int max_token_ct = 2; /* allow for trailing NULL pointer & NUL on string */
129
130
0
    if (str == NULL) goto enoent_res;
131
132
    /*
133
     *  Trim leading white space.  Use "ENOENT" and a NULL return to indicate
134
     *  an empty string was passed.
135
     */
136
0
    str = SPN_WHITESPACE_CHARS(str);
137
0
    if (*str == NUL)  goto enoent_res;
138
139
    /*
140
     *  Take an approximate count of tokens.  If no quoted strings are used,
141
     *  it will be accurate.  If quoted strings are used, it will be a little
142
     *  high and we'll squander the space for a few extra pointers.
143
     */
144
0
    {
145
0
        char const * pz = str;
146
147
0
        do {
148
0
            max_token_ct++;
149
0
            pz = BRK_WHITESPACE_CHARS(pz+1);
150
0
            pz = SPN_WHITESPACE_CHARS(pz);
151
0
        } while (*pz != NUL);
152
153
0
        res = malloc(sizeof(*res) + (size_t)(pz - str)
154
0
                     + ((size_t)max_token_ct * sizeof(ch_t *)));
155
0
    }
156
157
0
    if (res == NULL)
158
0
        errno = ENOMEM;
159
0
    else res->tkn_list[0] = (ch_t *)(res->tkn_list + (max_token_ct - 1));
160
161
0
    return res;
162
163
0
    enoent_res:
164
165
0
    errno = ENOENT;
166
0
    return NULL;
167
0
}
168
169
/*=export_func ao_string_tokenize
170
 *
171
 * what: tokenize an input string
172
 *
173
 * arg:  + char const * + string + string to be tokenized +
174
 *
175
 * ret_type:  token_list_t *
176
 * ret_desc:  pointer to a structure that lists each token
177
 *
178
 * doc:
179
 *
180
 * This function will convert one input string into a list of strings.
181
 * The list of strings is derived by separating the input based on
182
 * white space separation.  However, if the input contains either single
183
 * or double quote characters, then the text after that character up to
184
 * a matching quote will become the string in the list.
185
 *
186
 *  The returned pointer should be deallocated with @code{free(3C)} when
187
 *  are done using the data.  The data are placed in a single block of
188
 *  allocated memory.  Do not deallocate individual token/strings.
189
 *
190
 *  The structure pointed to will contain at least these two fields:
191
 *  @table @samp
192
 *  @item tkn_ct
193
 *  The number of tokens found in the input string.
194
 *  @item tok_list
195
 *  An array of @code{tkn_ct + 1} pointers to substring tokens, with
196
 *  the last pointer set to NULL.
197
 *  @end table
198
 *
199
 * There are two types of quoted strings: single quoted (@code{'}) and
200
 * double quoted (@code{"}).  Singly quoted strings are fairly raw in that
201
 * escape characters (@code{\\}) are simply another character, except when
202
 * preceding the following characters:
203
 * @example
204
 * @code{\\}  double backslashes reduce to one
205
 * @code{'}   incorporates the single quote into the string
206
 * @code{\n}  suppresses both the backslash and newline character
207
 * @end example
208
 *
209
 * Double quote strings are formed according to the rules of string
210
 * constants in ANSI-C programs.
211
 *
212
 * example:
213
 * @example
214
 *    #include <stdlib.h>
215
 *    int ix;
216
 *    token_list_t * ptl = ao_string_tokenize(some_string)
217
 *    for (ix = 0; ix < ptl->tkn_ct; ix++)
218
 *       do_something_with_tkn(ptl->tkn_list[ix]);
219
 *    free(ptl);
220
 * @end example
221
 * Note that everything is freed with the one call to @code{free(3C)}.
222
 *
223
 * err:
224
 *  NULL is returned and @code{errno} will be set to indicate the problem:
225
 *  @itemize @bullet
226
 *  @item
227
 *  @code{EINVAL} - There was an unterminated quoted string.
228
 *  @item
229
 *  @code{ENOENT} - The input string was empty.
230
 *  @item
231
 *  @code{ENOMEM} - There is not enough memory.
232
 *  @end itemize
233
=*/
234
token_list_t *
235
ao_string_tokenize(char const * str)
236
0
{
237
0
    token_list_t * res = alloc_token_list(str);
238
0
    ch_t * pzDest;
239
240
    /*
241
     *  Now copy each token into the output buffer.
242
     */
243
0
    if (res == NULL)
244
0
        return res;
245
246
0
    pzDest = (ch_t *)(res->tkn_list[0]);
247
0
    res->tkn_ct  = 0;
248
249
0
    do  {
250
0
        res->tkn_list[ res->tkn_ct++ ] = pzDest;
251
0
        for (;;) {
252
0
            int ch = (ch_t)*str;
253
0
            if (IS_WHITESPACE_CHAR(ch)) {
254
0
            found_white_space:
255
0
                str = SPN_WHITESPACE_CHARS(str+1);
256
0
                break;
257
0
            }
258
259
0
            switch (ch) {
260
0
            case '"':
261
0
                copy_cooked(&pzDest, &str);
262
0
                if (str == NULL) {
263
0
                    free(res);
264
0
                    errno = EINVAL;
265
0
                    return NULL;
266
0
                }
267
0
                if (IS_WHITESPACE_CHAR(*str))
268
0
                    goto found_white_space;
269
0
                break;
270
271
0
            case '\'':
272
0
                copy_raw(&pzDest, &str);
273
0
                if (str == NULL) {
274
0
                    free(res);
275
0
                    errno = EINVAL;
276
0
                    return NULL;
277
0
                }
278
0
                if (IS_WHITESPACE_CHAR(*str))
279
0
                    goto found_white_space;
280
0
                break;
281
282
0
            case NUL:
283
0
                goto copy_done;
284
285
0
            default:
286
0
                str++;
287
0
                *(pzDest++) = (unsigned char)ch;
288
0
            }
289
0
        } copy_done:;
290
291
        /*
292
         * NUL terminate the last token and see if we have any more tokens.
293
         */
294
0
        *(pzDest++) = NUL;
295
0
    } while (*str != NUL);
296
297
0
    res->tkn_list[ res->tkn_ct ] = NULL;
298
299
0
    return res;
300
0
}
301
302
#ifdef TEST
303
#include <stdio.h>
304
#include <string.h>
305
306
int
307
main(int argc, char ** argv)
308
{
309
    if (argc == 1) {
310
        printf("USAGE:  %s arg [ ... ]\n", *argv);
311
        return 1;
312
    }
313
    while (--argc > 0) {
314
        char * arg = *(++argv);
315
        token_list_t * p = ao_string_tokenize(arg);
316
        if (p == NULL) {
317
            printf("Parsing string ``%s'' failed:\n\terrno %d (%s)\n",
318
                   arg, errno, strerror(errno));
319
        } else {
320
            int ix = 0;
321
            printf("Parsed string ``%s''\ninto %d tokens:\n", arg, p->tkn_ct);
322
            do {
323
                printf(" %3d:  ``%s''\n", ix+1, p->tkn_list[ix]);
324
            } while (++ix < p->tkn_ct);
325
            free(p);
326
        }
327
    }
328
    return 0;
329
}
330
#endif
331
332
/** @}
333
 *
334
 * Local Variables:
335
 * mode: C
336
 * c-file-style: "stroustrup"
337
 * indent-tabs-mode: nil
338
 * End:
339
 * end of autoopts/tokenize.c */