Coverage Report

Created: 2022-04-16 11:23

/src/ghostpdl/base/gsargs.c
Line
Count
Source (jump to first uncovered line)
1
/* Copyright (C) 2001-2021 Artifex Software, Inc.
2
   All Rights Reserved.
3
4
   This software is provided AS-IS with no warranty, either express or
5
   implied.
6
7
   This software is distributed under license and may not be copied,
8
   modified or distributed except as expressly authorized under the terms
9
   of the license contained in the file LICENSE in this distribution.
10
11
   Refer to licensing information at http://www.artifex.com or contact
12
   Artifex Software, Inc.,  1305 Grant Avenue - Suite 200, Novato,
13
   CA 94945, U.S.A., +1(415)492-9861, for further information.
14
*/
15
16
17
/* Command line argument list management */
18
#include "ctype_.h"
19
#include "stdio_.h"
20
#include "string_.h"
21
#include "gsexit.h"
22
#include "gsmemory.h"
23
#include "gsargs.h"
24
#include "gserrors.h"
25
#include "gp.h"
26
27
int codepoint_to_utf8(char *cstr, int rune)
28
152k
{
29
152k
    int idx = 0;
30
31
152k
    if (rune < 0x80) {
32
152k
        cstr[idx++] = rune;
33
152k
    } else {
34
0
        if (rune < 0x800) {
35
0
            cstr[idx++] = 0xc0 | (rune>>6);
36
0
        } else {
37
0
            if (rune < 0x10000) {
38
0
                cstr[idx++] = 0xe0 | (rune>>12);
39
0
            } else {
40
0
                if (rune < 0x200000) {
41
0
                    cstr[idx++] = 0xf0 | (rune>>18);
42
0
                } else {
43
                    /* Shouldn't ever be required, but included for completeness */
44
0
                    if (rune < 0x4000000) {
45
0
                        cstr[idx++] = 0xf8 | (rune>>24);
46
0
                    } else {
47
0
                        cstr[idx++] = 0xfc | (rune>>30);
48
0
                        cstr[idx++] = 0x80 | ((rune>>24) & 0x3f);
49
0
                    }
50
0
                    cstr[idx++] = 0x80 | ((rune>>18) & 0x3f);
51
0
                }
52
0
                cstr[idx++] = 0x80 | ((rune>>12) & 0x3f);
53
0
            }
54
0
            cstr[idx++] = 0x80 | ((rune>>6) & 0x3f);
55
0
        }
56
0
        cstr[idx++] = 0x80 | (rune & 0x3f);
57
0
    }
58
59
152k
    return idx;
60
152k
}
61
62
static int get_codepoint_utf8(gp_file *file, const char **astr)
63
257k
{
64
257k
    int c;
65
257k
    int rune;
66
257k
    int len;
67
68
    /* This code spots the BOM for utf8 and ignores it. Strictly speaking
69
     * this may be wrong, as we are only supposed to ignore it at the beginning
70
     * of the string, but if anyone is stupid enough to use ZWNBSP (zero width
71
     * non breaking space) in the middle of their strings, then they deserve
72
     * what they get. */
73
74
257k
    do {
75
257k
        c = (file ? gp_fgetc(file) : (**astr ? (int)(unsigned char)*(*astr)++ : EOF));
76
257k
        if (c == EOF)
77
12.2k
            return EOF;
78
245k
        if (c < 0x80)
79
245k
            return c;
80
0
lead: /* We've just read a byte >= 0x80, presumably a leading byte */
81
0
        if (c < 0xc0)
82
0
            continue; /* Illegal - skip it */
83
0
        else if (c < 0xe0)
84
0
            len = 1, rune = c & 0x1f;
85
0
        else if (c < 0xf0)
86
0
            len = 2, rune = c & 0xf;
87
0
        else if (c < 0xf8)
88
0
            len = 3, rune = c & 7;
89
0
        else if (c < 0xfc)
90
0
            len = 4, rune = c & 3;
91
0
        else if (c < 0xfe)
92
0
            len = 5, rune = c & 1;
93
0
        else
94
0
            continue; /* Illegal - skip it */
95
0
        do {
96
0
            c = (file ? gp_fgetc(file) : (**astr ? (int)(unsigned char)*(*astr)++ : EOF));
97
0
            if (c == EOF)
98
0
                return EOF;
99
0
            rune = (rune<<6) | (c & 0x3f);
100
0
        } while (((c & 0xC0) == 0x80) && --len);
101
0
        if (len) {
102
            /* The rune we are collecting is improperly formed. */
103
0
            if (c < 0x80) {
104
                /* Just return the simple char we've ended on. */
105
0
                return c;
106
0
            }
107
            /* Start collecting again */
108
0
            goto lead;
109
0
        }
110
0
        if (rune == 0xFEFF)
111
0
            continue; /* BOM. Skip it */
112
0
        break;
113
0
    } while (1);
114
115
0
    return rune;
116
257k
}
117
118
/* Initialize an arg list. */
119
int
120
arg_init(arg_list     * pal,
121
         const char  **argv,
122
         int           argc,
123
         gp_file      *(*arg_fopen)(const char *fname, void *fopen_data),
124
         void         *fopen_data,
125
         int           (*get_codepoint)(gp_file *file, const char **astr),
126
         gs_memory_t  *memory)
127
683
{
128
683
    int code;
129
683
    const char *arg;
130
131
683
    pal->expand_ats = true;
132
683
    pal->arg_fopen = arg_fopen;
133
683
    pal->fopen_data = fopen_data;
134
683
    pal->get_codepoint = (get_codepoint ? get_codepoint : get_codepoint_utf8);
135
683
    pal->memory = memory;
136
683
    pal->argp = argv;
137
683
    pal->argn = argc;
138
683
    pal->depth = 0;
139
683
    pal->sources[0].is_file = 0;
140
683
    pal->sources[0].u.s.memory = NULL;
141
683
    pal->sources[0].u.s.decoded = 0;
142
683
    pal->sources[0].u.s.parsed = 0;
143
144
    /* Stash the 0th one */
145
683
    code = arg_next(pal, &arg, memory);
146
683
    if (code < 0)
147
0
        return code;
148
683
    return gs_lib_ctx_stash_exe(memory->gs_lib_ctx, arg);
149
683
}
150
151
/* Push a string onto an arg list. */
152
int
153
arg_push_memory_string(arg_list * pal, char *str, bool parsed, gs_memory_t * mem)
154
0
{
155
0
    return arg_push_decoded_memory_string(pal, str, parsed, parsed, mem);
156
0
}
157
158
int
159
arg_push_decoded_memory_string(arg_list * pal, char *str, bool parsed, bool decoded, gs_memory_t * mem)
160
0
{
161
0
    arg_source *pas;
162
163
0
    if (pal->depth+1 == arg_depth_max) {
164
0
        lprintf("Too much nesting of @-files.\n");
165
0
        return 1;
166
0
    }
167
0
    pas = &pal->sources[++pal->depth];
168
0
    pas->is_file = false;
169
0
    pas->u.s.parsed = parsed;
170
0
    pas->u.s.decoded = decoded;
171
0
    pas->u.s.chars = str;
172
0
    pas->u.s.memory = mem;
173
0
    pas->u.s.str = str;
174
0
    return 0;
175
0
}
176
177
/* Clean up an arg list. */
178
void
179
arg_finit(arg_list * pal)
180
0
{
181
    /* No cleanup is required for level 0 */
182
0
    while (pal->depth) {
183
0
        arg_source *pas = &pal->sources[pal->depth--];
184
185
0
        if (pas->is_file)
186
0
            gp_fclose(pas->u.file);
187
0
        else if (pas->u.s.memory)
188
0
            gs_free_object(pas->u.s.memory, pas->u.s.chars, "arg_finit");
189
0
    }
190
0
}
191
192
static int get_codepoint(arg_list *pal, arg_source *pas)
193
164k
{
194
164k
    int (*fn)(gp_file *file, const char **str);
195
196
164k
    fn = (!pas->is_file && pas->u.s.decoded ? get_codepoint_utf8 : pal->get_codepoint);
197
164k
    return fn(pas->is_file ? pas->u.file : NULL, &pas->u.s.str);
198
164k
}
199
200
/* Get the next arg from a list. */
201
/* Note that these are not copied to the heap. */
202
/* returns:
203
 * >0 - valid argument
204
 *  0 - arguments exhausted
205
 * <0 - error condition
206
 * *argstr is *always* set: to the arg string if it is valid,
207
 * or to NULL otherwise
208
 */
209
int
210
arg_next(arg_list * pal, const char **argstr, const gs_memory_t *errmem)
211
12.9k
{
212
12.9k
    arg_source *pas;
213
12.9k
    char *cstr;
214
12.9k
    int c;
215
12.9k
    int i;
216
12.9k
    bool in_quote, eol;
217
218
12.9k
    *argstr = NULL;
219
220
    /* Loop over arguments, finding one to return. */
221
12.9k
    do {
222
12.9k
        pas = &pal->sources[pal->depth];
223
12.9k
        if (!pas->is_file && pas->u.s.parsed) {
224
            /* This string is a "pushed-back" argument (retrieved
225
             * by a preceding arg_next(), but not processed). No
226
             * decoding is required. */
227
            /* assert(pas->u.s.decoded); */
228
0
            if (strlen(pas->u.s.str) >= arg_str_max) {
229
0
                errprintf(errmem, "Command too long: %s\n", pas->u.s.str);
230
0
                return_error(gs_error_Fatal);
231
0
            }
232
0
            strcpy(pal->cstr, pas->u.s.str);
233
0
            *argstr = pal->cstr;
234
0
            if (pas->u.s.memory)
235
0
                gs_free_object(pas->u.s.memory, pas->u.s.chars, "arg_next");
236
0
            pal->depth--;
237
12.9k
        } else {
238
            /* We need to decode the next argument */
239
12.9k
            if (pal->depth == 0) {
240
12.9k
                if (pal->argn <= 0)
241
674
                    return 0; /* all done */
242
                /* Move onto the next argument from the string. */
243
12.2k
                pal->argn--;
244
12.2k
                pas->u.s.str = *(pal->argp++);
245
12.2k
            }
246
            /* Skip a prefix of whitespace. */
247
12.2k
            do {
248
12.2k
                c = get_codepoint(pal, pas);
249
12.2k
            } while (c > 0 && c < 256 && isspace(c));
250
12.2k
            if (c == EOF) {
251
                /* EOF before any argument characters. */
252
0
                if (pas->is_file)
253
0
                    gp_fclose(pas->u.file);
254
0
                else if (pas->u.s.memory)
255
0
                    gs_free_object(pas->u.s.memory, pas->u.s.chars,
256
0
                                   "arg_next");
257
                /* If depth is 0, then we are reading from the simple
258
                 * argument list and we just hit an "empty" argument
259
                 * (such as -o ""). Return this. */
260
0
                if (pal->depth == 0)
261
0
                {
262
0
                    *argstr = pal->cstr;
263
0
                    pal->cstr[0] = 0;
264
0
                    break;
265
0
                }
266
                /* If depth > 0, then we're reading from a response
267
                 * file, and we've hit the end of the response file.
268
                 * Pop up one level and continue. */
269
0
                pal->depth--;
270
0
                continue; /* Next argument */
271
0
            }
272
152k
    #define is_eol(c) (c == '\r' || c == '\n')
273
            /* Convert from astr into pal->cstr, and return it as *argstr. */
274
12.2k
            *argstr = cstr = pal->cstr;
275
12.2k
            in_quote = false;
276
            /* We keep track of whether we have just read an "eol" or not,
277
             * in order to skip # characters at the start of a line
278
             * (possibly preceeded by whitespace). We do NOT want this to
279
             * apply to the start of arguments in the arg list, so only
280
             * set eol to be true, if we are in a file. */
281
12.2k
            eol = pal->depth > 0;
282
164k
            for (i = 0;;) {
283
164k
                if (c == EOF) {
284
12.2k
                    if (in_quote) {
285
0
                        cstr[i] = 0;
286
0
                        errprintf(errmem,
287
0
                                  "Unterminated quote in @-file: %s\n", cstr);
288
0
                        return_error(gs_error_Fatal);
289
0
                    }
290
12.2k
                    break; /* End of arg */
291
12.2k
                }
292
                /* c != 0 */
293
                /* If we aren't parsing from the arglist (i.e. depth > 0)
294
                 * then we break on whitespace (unless we're in quotes). */
295
152k
                if (pal->depth > 0 && !in_quote && c > 0 && c < 256 && isspace(c))
296
0
                    break; /* End of arg */
297
                /* c isn't leading or terminating whitespace. */
298
152k
                if (c == '#' && eol) {
299
                    /* Skip a comment. */
300
0
                    do {
301
0
                        c = get_codepoint(pal, pas);
302
0
                    } while (c != 0 && !is_eol(c) && c != EOF);
303
0
                    if (c == '\r')
304
0
                        c = get_codepoint(pal, pas);
305
0
                    if (c == '\n')
306
0
                        c = get_codepoint(pal, pas);
307
0
                    continue; /* Next char */
308
0
                }
309
152k
                if (c == '\\') {
310
                    /* Check for \ followed by newline. */
311
0
                    c = get_codepoint(pal, pas);
312
0
                    if (is_eol(c)) {
313
0
                        if (c == '\r')
314
0
                            c = get_codepoint(pal, pas);
315
0
                        if (c == '\n')
316
0
                            c = get_codepoint(pal, pas);
317
0
                        eol = true;
318
0
                        continue; /* Next char */
319
0
                    }
320
                    /* \ anywhere else is treated as a printing character. */
321
                    /* This is different from the Unix shells. */
322
0
                    if (i >= arg_str_max - 1) {
323
0
                        cstr[i] = 0;
324
0
                        errprintf(errmem, "Command too long: %s\n", cstr);
325
0
                        return_error(gs_error_Fatal);
326
0
                    }
327
0
                    cstr[i++] = '\\';
328
0
                    eol = false;
329
0
                    continue; /* Next char */
330
0
                }
331
                /* c will become part of the argument */
332
152k
                if (i >= arg_str_max - 1) {
333
0
                    cstr[i] = 0;
334
0
                    errprintf(errmem, "Command too long: %s\n", cstr);
335
0
                    return_error(gs_error_Fatal);
336
0
                }
337
                /* Now, some (slightly hairy) code to allow quotes to protect whitespace.
338
                 * We only allow for double-quote quoting within @files, as a) command-
339
                 * line args passed via argv are zero terminated so we should have no
340
                 * confusion with whitespace, and b) callers using the command line will
341
                 * have to have carefully quoted double-quotes to make them survive the
342
                 * shell anyway! */
343
152k
                if (c == '"' && pal->depth > 0) {
344
0
                    if (i == 0 && !in_quote)
345
0
                        in_quote = true;
346
0
                    else if (in_quote) {
347
                        /* Need to check the next char to see if we're closing at the end */
348
0
                        c = get_codepoint(pal, pas);
349
0
                        if (c > 0 && c < 256 && isspace(c)) {
350
                            /* Reading from an @file, we've hit a space char. That's good, this
351
                             * was a close quote. */
352
0
                            cstr[i] = 0;
353
0
                            break;
354
0
                        }
355
                        /* Not a close quote, just a literal quote. */
356
0
                        i += codepoint_to_utf8(&cstr[i], '"');
357
0
                        eol = false;
358
0
                        continue; /* Jump to the start of the loop without reading another char. */
359
0
                    } else
360
0
                        i += codepoint_to_utf8(&cstr[i], c);
361
0
                }
362
152k
                else
363
152k
                    i += codepoint_to_utf8(&cstr[i], c);
364
152k
                eol = is_eol(c);
365
152k
                c = get_codepoint(pal, pas);
366
152k
            }
367
12.2k
            cstr[i] = 0;
368
12.2k
        }
369
370
        /* At this point *argstr is full of utf8 encoded argument. */
371
        /* If it's an @filename argument, then deal with it, and never return
372
         * it to the caller. */
373
12.2k
        if (pal->expand_ats && **argstr == '@') {
374
0
            char *fname;
375
0
            gp_file *f;
376
0
            if (pal->depth+1 == arg_depth_max) {
377
0
                errprintf(errmem, "Too much nesting of @-files.\n");
378
0
                return_error(gs_error_Fatal);
379
0
            }
380
0
            fname = (char *)*argstr + 1; /* skip @ */
381
382
0
            if (gs_add_control_path(pal->memory, gs_permit_file_reading, fname) < 0)
383
0
                return_error(gs_error_Fatal);
384
385
0
            f = (*pal->arg_fopen) (fname, pal->fopen_data);
386
0
            DISCARD(gs_remove_control_path(pal->memory, gs_permit_file_reading, fname));
387
0
            if (f == NULL) {
388
0
                errprintf(errmem, "Unable to open command line file %s\n", *argstr);
389
0
                return_error(gs_error_Fatal);
390
0
            }
391
0
            pas = &pal->sources[++pal->depth];
392
0
            pas->is_file = true;
393
0
            pas->u.file = f;
394
0
            *argstr = NULL; /* Empty the argument string so we don't return it. */
395
0
            continue; /* Loop back to parse the first arg from the file. */
396
0
        }
397
12.2k
    } while (*argstr == NULL || **argstr == 0); /* Until we get a non-empty arg */
398
399
12.2k
    return 1;
400
12.9k
}
401
402
/* Copy an argument string to the heap. */
403
char *
404
arg_copy(const char *str, gs_memory_t * mem)
405
10.2k
{
406
10.2k
    char *sstr = (char *)gs_alloc_bytes(mem, strlen(str) + 1, "arg_copy");
407
408
10.2k
    if (sstr == 0) {
409
0
        lprintf("Out of memory!\n");
410
0
        return NULL;
411
0
    }
412
10.2k
    strcpy(sstr, str);
413
10.2k
    return sstr;
414
10.2k
}
415
416
/* Free a previously arg_copy'd string */
417
void
418
arg_free(char *str, gs_memory_t * mem)
419
10.2k
{
420
10.2k
    gs_free_object(mem, str, "arg_copy");
421
10.2k
}
422
423
int arg_strcmp(arg_list *pal, const char *arg, const char *match)
424
46.4k
{
425
46.4k
    int rune, c;
426
427
46.4k
    if (!arg || !match)
428
0
        return 1;
429
92.8k
    do {
430
92.8k
        rune = pal->get_codepoint(NULL, &arg);
431
92.8k
        if (rune == -1)
432
0
            rune = 0;
433
92.8k
        c = *match++;
434
92.8k
        if (rune != c)
435
46.4k
            return rune - c;
436
92.8k
    } while (rune && c);
437
0
    return 0;
438
46.4k
}