Coverage Report

Created: 2022-10-31 07:00

/src/ghostpdl/base/gsargs.c
Line
Count
Source (jump to first uncovered line)
1
/* Copyright (C) 2001-2022 Artifex Software, Inc.
2
   All Rights Reserved.
3
4
   This software is provided AS-IS with no warranty, either express or
5
   implied.
6
7
   This software is distributed under license and may not be copied,
8
   modified or distributed except as expressly authorized under the terms
9
   of the license contained in the file LICENSE in this distribution.
10
11
   Refer to licensing information at http://www.artifex.com or contact
12
   Artifex Software, Inc.,  1305 Grant Avenue - Suite 200, Novato,
13
   CA 94945, U.S.A., +1(415)492-9861, for further information.
14
*/
15
16
17
/* Command line argument list management */
18
#include "ctype_.h"
19
#include "stdio_.h"
20
#include "string_.h"
21
#include "gsexit.h"
22
#include "gsmemory.h"
23
#include "gsargs.h"
24
#include "gserrors.h"
25
#include "gp.h"
26
27
int codepoint_to_utf8(char *cstr, int rune)
28
20.1M
{
29
20.1M
    int idx = 0;
30
31
20.1M
    if (rune < 0x80) {
32
20.1M
        cstr[idx++] = rune;
33
20.1M
    } else {
34
0
        if (rune < 0x800) {
35
0
            cstr[idx++] = 0xc0 | (rune>>6);
36
0
        } else {
37
0
            if (rune < 0x10000) {
38
0
                cstr[idx++] = 0xe0 | (rune>>12);
39
0
            } else {
40
0
                if (rune < 0x200000) {
41
0
                    cstr[idx++] = 0xf0 | (rune>>18);
42
0
                } else {
43
                    /* Shouldn't ever be required, but included for completeness */
44
0
                    if (rune < 0x4000000) {
45
0
                        cstr[idx++] = 0xf8 | (rune>>24);
46
0
                    } else {
47
0
                        cstr[idx++] = 0xfc | (rune>>30);
48
0
                        cstr[idx++] = 0x80 | ((rune>>24) & 0x3f);
49
0
                    }
50
0
                    cstr[idx++] = 0x80 | ((rune>>18) & 0x3f);
51
0
                }
52
0
                cstr[idx++] = 0x80 | ((rune>>12) & 0x3f);
53
0
            }
54
0
            cstr[idx++] = 0x80 | ((rune>>6) & 0x3f);
55
0
        }
56
0
        cstr[idx++] = 0x80 | (rune & 0x3f);
57
0
    }
58
59
20.1M
    return idx;
60
20.1M
}
61
62
static int get_codepoint_utf8(gp_file *file, const char **astr)
63
33.8M
{
64
33.8M
    int c;
65
33.8M
    int rune;
66
33.8M
    int len;
67
68
    /* This code spots the BOM for utf8 and ignores it. Strictly speaking
69
     * this may be wrong, as we are only supposed to ignore it at the beginning
70
     * of the string, but if anyone is stupid enough to use ZWNBSP (zero width
71
     * non breaking space) in the middle of their strings, then they deserve
72
     * what they get. */
73
74
33.8M
    do {
75
33.8M
        c = (file ? gp_fgetc(file) : (**astr ? (int)(unsigned char)*(*astr)++ : EOF));
76
33.8M
        if (c == EOF)
77
1.60M
            return EOF;
78
32.2M
        if (c < 0x80)
79
32.2M
            return c;
80
0
lead: /* We've just read a byte >= 0x80, presumably a leading byte */
81
0
        if (c < 0xc0)
82
0
            continue; /* Illegal - skip it */
83
0
        else if (c < 0xe0)
84
0
            len = 1, rune = c & 0x1f;
85
0
        else if (c < 0xf0)
86
0
            len = 2, rune = c & 0xf;
87
0
        else if (c < 0xf8)
88
0
            len = 3, rune = c & 7;
89
0
        else if (c < 0xfc)
90
0
            len = 4, rune = c & 3;
91
0
        else if (c < 0xfe)
92
0
            len = 5, rune = c & 1;
93
0
        else
94
0
            continue; /* Illegal - skip it */
95
0
        do {
96
0
            c = (file ? gp_fgetc(file) : (**astr ? (int)(unsigned char)*(*astr)++ : EOF));
97
0
            if (c == EOF)
98
0
                return EOF;
99
0
            rune = (rune<<6) | (c & 0x3f);
100
0
        } while (((c & 0xC0) == 0x80) && --len);
101
0
        if (len) {
102
            /* The rune we are collecting is improperly formed. */
103
0
            if (c < 0x80) {
104
                /* Just return the simple char we've ended on. */
105
0
                return c;
106
0
            }
107
            /* Start collecting again */
108
0
            goto lead;
109
0
        }
110
0
        if (rune == 0xFEFF)
111
0
            continue; /* BOM. Skip it */
112
0
        break;
113
0
    } while (1);
114
115
0
    return rune;
116
33.8M
}
117
118
/* Initialize an arg list. */
119
int
120
arg_init(arg_list     * pal,
121
         const char  **argv,
122
         int           argc,
123
         gp_file      *(*arg_fopen)(const char *fname, void *fopen_data),
124
         void         *fopen_data,
125
         int           (*get_codepoint)(gp_file *file, const char **astr),
126
         gs_memory_t  *memory)
127
89.2k
{
128
89.2k
    int code;
129
89.2k
    const char *arg;
130
131
89.2k
    pal->expand_ats = true;
132
89.2k
    pal->arg_fopen = arg_fopen;
133
89.2k
    pal->fopen_data = fopen_data;
134
89.2k
    pal->get_codepoint = (get_codepoint ? get_codepoint : get_codepoint_utf8);
135
89.2k
    pal->memory = memory;
136
89.2k
    pal->argp = argv;
137
89.2k
    pal->argn = argc;
138
89.2k
    pal->depth = 0;
139
89.2k
    pal->sources[0].is_file = 0;
140
89.2k
    pal->sources[0].u.s.memory = NULL;
141
89.2k
    pal->sources[0].u.s.decoded = 0;
142
89.2k
    pal->sources[0].u.s.parsed = 0;
143
144
    /* Stash the 0th one */
145
89.2k
    code = arg_next(pal, &arg, memory);
146
89.2k
    if (code < 0)
147
0
        return code;
148
89.2k
    return gs_lib_ctx_stash_exe(memory->gs_lib_ctx, arg);
149
89.2k
}
150
151
/* Push a string onto an arg list. */
152
int
153
arg_push_memory_string(arg_list * pal, char *str, bool parsed, gs_memory_t * mem)
154
0
{
155
0
    return arg_push_decoded_memory_string(pal, str, parsed, parsed, mem);
156
0
}
157
158
int
159
arg_push_decoded_memory_string(arg_list * pal, char *str, bool parsed, bool decoded, gs_memory_t * mem)
160
0
{
161
0
    arg_source *pas;
162
163
0
    if (pal->depth+1 == arg_depth_max) {
164
0
        lprintf("Too much nesting of @-files.\n");
165
0
        return 1;
166
0
    }
167
0
    pas = &pal->sources[++pal->depth];
168
0
    pas->is_file = false;
169
0
    pas->u.s.parsed = parsed;
170
0
    pas->u.s.decoded = decoded;
171
0
    pas->u.s.chars = str;
172
0
    pas->u.s.memory = mem;
173
0
    pas->u.s.str = str;
174
0
    return 0;
175
0
}
176
177
/* Clean up an arg list. */
178
void
179
arg_finit(arg_list * pal)
180
0
{
181
    /* No cleanup is required for level 0 */
182
0
    while (pal->depth) {
183
0
        arg_source *pas = &pal->sources[pal->depth--];
184
185
0
        if (pas->is_file)
186
0
            gp_fclose(pas->u.file);
187
0
        else if (pas->u.s.memory)
188
0
            gs_free_object(pas->u.s.memory, pas->u.s.chars, "arg_finit");
189
0
    }
190
0
}
191
192
static int get_codepoint(arg_list *pal, arg_source *pas)
193
21.7M
{
194
21.7M
    int (*fn)(gp_file *file, const char **str);
195
196
21.7M
    fn = (!pas->is_file && pas->u.s.decoded ? get_codepoint_utf8 : pal->get_codepoint);
197
21.7M
    return fn(pas->is_file ? pas->u.file : NULL, &pas->u.s.str);
198
21.7M
}
199
200
/* Get the next arg from a list. */
201
/* Note that these are not copied to the heap. */
202
/* returns:
203
 * >0 - valid argument
204
 *  0 - arguments exhausted
205
 * <0 - error condition
206
 * *argstr is *always* set: to the arg string if it is valid,
207
 * or to NULL otherwise
208
 */
209
int
210
arg_next(arg_list * pal, const char **argstr, const gs_memory_t *errmem)
211
1.66M
{
212
1.66M
    arg_source *pas;
213
1.66M
    char *cstr;
214
1.66M
    int c;
215
1.66M
    int i;
216
1.66M
    bool in_quote, eol;
217
1.66M
    int prev_c_was_equals = 0;
218
219
1.66M
    *argstr = NULL;
220
221
    /* Loop over arguments, finding one to return. */
222
1.66M
    do {
223
1.66M
        pas = &pal->sources[pal->depth];
224
1.66M
        if (!pas->is_file && pas->u.s.parsed) {
225
            /* This string is a "pushed-back" argument (retrieved
226
             * by a preceding arg_next(), but not processed). No
227
             * decoding is required. */
228
            /* assert(pas->u.s.decoded); */
229
0
            if (strlen(pas->u.s.str) >= arg_str_max) {
230
0
                errprintf(errmem, "Command too long: %s\n", pas->u.s.str);
231
0
                return_error(gs_error_Fatal);
232
0
            }
233
0
            strcpy(pal->cstr, pas->u.s.str);
234
0
            *argstr = pal->cstr;
235
0
            if (pas->u.s.memory)
236
0
                gs_free_object(pas->u.s.memory, pas->u.s.chars, "arg_next");
237
0
            pal->depth--;
238
1.66M
        } else {
239
            /* We need to decode the next argument */
240
1.66M
            if (pal->depth == 0) {
241
1.66M
                if (pal->argn <= 0)
242
54.1k
                    return 0; /* all done */
243
                /* Move onto the next argument from the string. */
244
1.60M
                pal->argn--;
245
1.60M
                pas->u.s.str = *(pal->argp++);
246
1.60M
            }
247
            /* Skip a prefix of whitespace. */
248
1.60M
            do {
249
1.60M
                c = get_codepoint(pal, pas);
250
1.60M
            } while (c > 0 && c < 256 && isspace(c));
251
1.60M
            if (c == EOF) {
252
                /* EOF before any argument characters. */
253
0
                if (pas->is_file)
254
0
                    gp_fclose(pas->u.file);
255
0
                else if (pas->u.s.memory)
256
0
                    gs_free_object(pas->u.s.memory, pas->u.s.chars,
257
0
                                   "arg_next");
258
                /* If depth is 0, then we are reading from the simple
259
                 * argument list and we just hit an "empty" argument
260
                 * (such as -o ""). Return this. */
261
0
                if (pal->depth == 0)
262
0
                {
263
0
                    *argstr = pal->cstr;
264
0
                    pal->cstr[0] = 0;
265
0
                    break;
266
0
                }
267
                /* If depth > 0, then we're reading from a response
268
                 * file, and we've hit the end of the response file.
269
                 * Pop up one level and continue. */
270
0
                pal->depth--;
271
0
                continue; /* Next argument */
272
0
            }
273
20.1M
    #define is_eol(c) (c == '\r' || c == '\n')
274
            /* Convert from astr into pal->cstr, and return it as *argstr. */
275
1.60M
            *argstr = cstr = pal->cstr;
276
1.60M
            in_quote = false;
277
            /* We keep track of whether we have just read an "eol" or not,
278
             * in order to skip # characters at the start of a line
279
             * (possibly preceeded by whitespace). We do NOT want this to
280
             * apply to the start of arguments in the arg list, so only
281
             * set eol to be true, if we are in a file. */
282
1.60M
            eol = pal->depth > 0;
283
21.7M
            for (i = 0;;) {
284
21.7M
                if (c == EOF) {
285
1.60M
                    if (in_quote) {
286
0
                        cstr[i] = 0;
287
0
                        errprintf(errmem,
288
0
                                  "Unterminated quote in @-file: %s\n", cstr);
289
0
                        return_error(gs_error_Fatal);
290
0
                    }
291
1.60M
                    break; /* End of arg */
292
1.60M
                }
293
                /* c != 0 */
294
                /* If we aren't parsing from the arglist (i.e. depth > 0)
295
                 * then we break on whitespace (unless we're in quotes). */
296
20.1M
                if (pal->depth > 0 && !in_quote && c > 0 && c < 256 && isspace(c))
297
0
                    break; /* End of arg */
298
                /* c isn't leading or terminating whitespace. */
299
20.1M
                if (c == '#' && eol) {
300
                    /* Skip a comment. */
301
0
                    do {
302
0
                        c = get_codepoint(pal, pas);
303
0
                    } while (c != 0 && !is_eol(c) && c != EOF);
304
0
                    if (c == '\r')
305
0
                        c = get_codepoint(pal, pas);
306
0
                    if (c == '\n')
307
0
                        c = get_codepoint(pal, pas);
308
0
                    prev_c_was_equals = 0;
309
0
                    continue; /* Next char */
310
0
                }
311
20.1M
                if (c == '\\' && pal->depth > 0) {
312
                    /* Check for \ followed by newline. */
313
0
                    c = get_codepoint(pal, pas);
314
0
                    if (is_eol(c)) {
315
0
                        if (c == '\r')
316
0
                            c = get_codepoint(pal, pas);
317
0
                        if (c == '\n')
318
0
                            c = get_codepoint(pal, pas);
319
0
                        eol = true;
320
0
                        prev_c_was_equals = 0;
321
0
                        continue; /* Next char */
322
0
                    }
323
0
                    {
324
0
                        char what;
325
326
0
                        if (c == '"') {
327
                            /* currently \" is treated as literal ". No other literals yet.
328
                             * We may expand this in future. */
329
0
                            what = c;
330
0
                            c = get_codepoint(pal, pas);
331
0
                        } else {
332
                            /* \ anywhere else is treated as a printing character. */
333
                            /* This is different from the Unix shells. */
334
0
                            what = '\\';
335
0
                        }
336
337
0
                        if (i >= arg_str_max - 1) {
338
0
                            cstr[i] = 0;
339
0
                            errprintf(errmem, "Command too long: %s\n", cstr);
340
0
                            return_error(gs_error_Fatal);
341
0
                        }
342
0
                        cstr[i++] = what;
343
0
                        eol = false;
344
0
                        prev_c_was_equals = 0;
345
0
                        continue; /* Next char */
346
0
                    }
347
0
                }
348
                /* c will become part of the argument */
349
20.1M
                if (i >= arg_str_max - 1) {
350
0
                    cstr[i] = 0;
351
0
                    errprintf(errmem, "Command too long: %s\n", cstr);
352
0
                    return_error(gs_error_Fatal);
353
0
                }
354
                /* Now, some (slightly hairy) code to allow quotes to protect whitespace.
355
                 * We only allow for double-quote quoting within @files, as a) command-
356
                 * line args passed via argv are zero terminated so we should have no
357
                 * confusion with whitespace, and b) callers using the command line will
358
                 * have to have carefully quoted double-quotes to make them survive the
359
                 * shell anyway! */
360
20.1M
                if (c == '"' && pal->depth > 0) {
361
0
                    if ((i == 0 || prev_c_was_equals) && !in_quote)
362
0
                        in_quote = true;
363
0
                    else if (in_quote) {
364
                        /* Need to check the next char to see if we're closing at the end */
365
0
                        c = get_codepoint(pal, pas);
366
0
                        if (c > 0 && c < 256 && isspace(c)) {
367
                            /* Reading from an @file, we've hit a space char. That's good, this
368
                             * was a close quote. */
369
0
                            cstr[i] = 0;
370
0
                            break;
371
0
                        }
372
                        /* Not a close quote, just a literal quote. */
373
0
                        i += codepoint_to_utf8(&cstr[i], '"');
374
0
                        eol = false;
375
0
                        prev_c_was_equals = 0;
376
0
                        continue; /* Jump to the start of the loop without reading another char. */
377
0
                    } else
378
0
                        i += codepoint_to_utf8(&cstr[i], c);
379
0
                }
380
20.1M
                else
381
20.1M
                    i += codepoint_to_utf8(&cstr[i], c);
382
20.1M
                eol = is_eol(c);
383
20.1M
                prev_c_was_equals = (c == '=');
384
20.1M
                c = get_codepoint(pal, pas);
385
20.1M
            }
386
1.60M
            cstr[i] = 0;
387
1.60M
        }
388
389
        /* At this point *argstr is full of utf8 encoded argument. */
390
        /* If it's an @filename argument, then deal with it, and never return
391
         * it to the caller. */
392
1.60M
        if (pal->expand_ats && **argstr == '@') {
393
0
            char *fname;
394
0
            gp_file *f;
395
0
            if (pal->depth+1 == arg_depth_max) {
396
0
                errprintf(errmem, "Too much nesting of @-files.\n");
397
0
                return_error(gs_error_Fatal);
398
0
            }
399
0
            fname = (char *)*argstr + 1; /* skip @ */
400
401
0
            if (gs_add_control_path(pal->memory, gs_permit_file_reading, fname) < 0)
402
0
                return_error(gs_error_Fatal);
403
404
0
            f = (*pal->arg_fopen) (fname, pal->fopen_data);
405
0
            DISCARD(gs_remove_control_path(pal->memory, gs_permit_file_reading, fname));
406
0
            if (f == NULL) {
407
0
                errprintf(errmem, "Unable to open command line file %s\n", *argstr);
408
0
                return_error(gs_error_Fatal);
409
0
            }
410
0
            pas = &pal->sources[++pal->depth];
411
0
            pas->is_file = true;
412
0
            pas->u.file = f;
413
0
            *argstr = NULL; /* Empty the argument string so we don't return it. */
414
0
            continue; /* Loop back to parse the first arg from the file. */
415
0
        }
416
1.60M
    } while (*argstr == NULL || **argstr == 0); /* Until we get a non-empty arg */
417
418
1.60M
    return 1;
419
1.66M
}
420
421
/* Copy an argument string to the heap. */
422
char *
423
arg_copy(const char *str, gs_memory_t * mem)
424
1.33M
{
425
1.33M
    char *sstr = (char *)gs_alloc_bytes(mem, strlen(str) + 1, "arg_copy");
426
427
1.33M
    if (sstr == 0) {
428
0
        lprintf("Out of memory!\n");
429
0
        return NULL;
430
0
    }
431
1.33M
    strcpy(sstr, str);
432
1.33M
    return sstr;
433
1.33M
}
434
435
/* Free a previously arg_copy'd string */
436
void
437
arg_free(char *str, gs_memory_t * mem)
438
1.33M
{
439
1.33M
    gs_free_object(mem, str, "arg_copy");
440
1.33M
}
441
442
int arg_strcmp(arg_list *pal, const char *arg, const char *match)
443
6.06M
{
444
6.06M
    int rune, c;
445
446
6.06M
    if (!arg || !match)
447
0
        return 1;
448
12.1M
    do {
449
12.1M
        rune = pal->get_codepoint(NULL, &arg);
450
12.1M
        if (rune == -1)
451
0
            rune = 0;
452
12.1M
        c = *match++;
453
12.1M
        if (rune != c)
454
6.06M
            return rune - c;
455
12.1M
    } while (rune && c);
456
0
    return 0;
457
6.06M
}