Coverage Report

Created: 2025-06-10 06:59

/src/ghostpdl/base/gsargs.c
Line
Count
Source (jump to first uncovered line)
1
/* Copyright (C) 2001-2023 Artifex Software, Inc.
2
   All Rights Reserved.
3
4
   This software is provided AS-IS with no warranty, either express or
5
   implied.
6
7
   This software is distributed under license and may not be copied,
8
   modified or distributed except as expressly authorized under the terms
9
   of the license contained in the file LICENSE in this distribution.
10
11
   Refer to licensing information at http://www.artifex.com or contact
12
   Artifex Software, Inc.,  39 Mesa Street, Suite 108A, San Francisco,
13
   CA 94129, USA, for further information.
14
*/
15
16
17
/* Command line argument list management */
18
#include "ctype_.h"
19
#include "stdio_.h"
20
#include "string_.h"
21
#include "gsexit.h"
22
#include "gsmemory.h"
23
#include "gsargs.h"
24
#include "gserrors.h"
25
#include "gp.h"
26
27
int codepoint_to_utf8(char *cstr, int rune)
28
2.21M
{
29
2.21M
    int idx = 0;
30
31
2.21M
    if (rune < 0x80) {
32
2.21M
        cstr[idx++] = rune;
33
2.21M
    } else {
34
0
        if (rune < 0x800) {
35
0
            cstr[idx++] = 0xc0 | (rune>>6);
36
0
        } else {
37
0
            if (rune < 0x10000) {
38
0
                cstr[idx++] = 0xe0 | (rune>>12);
39
0
            } else {
40
0
                if (rune < 0x200000) {
41
0
                    cstr[idx++] = 0xf0 | (rune>>18);
42
0
                } else {
43
                    /* Shouldn't ever be required, but included for completeness */
44
0
                    if (rune < 0x4000000) {
45
0
                        cstr[idx++] = 0xf8 | (rune>>24);
46
0
                    } else {
47
0
                        cstr[idx++] = 0xfc | (rune>>30);
48
0
                        cstr[idx++] = 0x80 | ((rune>>24) & 0x3f);
49
0
                    }
50
0
                    cstr[idx++] = 0x80 | ((rune>>18) & 0x3f);
51
0
                }
52
0
                cstr[idx++] = 0x80 | ((rune>>12) & 0x3f);
53
0
            }
54
0
            cstr[idx++] = 0x80 | ((rune>>6) & 0x3f);
55
0
        }
56
0
        cstr[idx++] = 0x80 | (rune & 0x3f);
57
0
    }
58
59
2.21M
    return idx;
60
2.21M
}
61
62
static int get_codepoint_utf8(stream *s, const char **astr)
63
3.73M
{
64
3.73M
    int c;
65
3.73M
    int rune;
66
3.73M
    int len;
67
68
    /* This code spots the BOM for utf8 and ignores it. Strictly speaking
69
     * this may be wrong, as we are only supposed to ignore it at the beginning
70
     * of the string, but if anyone is stupid enough to use ZWNBSP (zero width
71
     * non breaking space) in the middle of their strings, then they deserve
72
     * what they get. */
73
74
3.73M
    do {
75
3.73M
        c = (s ? spgetc(s) : (**astr ? (int)(unsigned char)*(*astr)++ : EOF));
76
3.73M
        if (c == EOF)
77
177k
            return EOF;
78
3.56M
        if (c < 0x80)
79
3.56M
            return c;
80
0
lead: /* We've just read a byte >= 0x80, presumably a leading byte */
81
0
        if (c < 0xc0)
82
0
            continue; /* Illegal - skip it */
83
0
        else if (c < 0xe0)
84
0
            len = 1, rune = c & 0x1f;
85
0
        else if (c < 0xf0)
86
0
            len = 2, rune = c & 0xf;
87
0
        else if (c < 0xf8)
88
0
            len = 3, rune = c & 7;
89
0
        else if (c < 0xfc)
90
0
            len = 4, rune = c & 3;
91
0
        else if (c < 0xfe)
92
0
            len = 5, rune = c & 1;
93
0
        else
94
0
            continue; /* Illegal - skip it */
95
0
        do {
96
0
            c = (s ? spgetc(s) : (**astr ? (int)(unsigned char)*(*astr)++ : EOF));
97
0
            if (c == EOF)
98
0
                return EOF;
99
0
            rune = (rune<<6) | (c & 0x3f);
100
0
        } while (((c & 0xC0) == 0x80) && --len);
101
0
        if (len) {
102
            /* The rune we are collecting is improperly formed. */
103
0
            if (c < 0x80) {
104
                /* Just return the simple char we've ended on. */
105
0
                return c;
106
0
            }
107
            /* Start collecting again */
108
0
            goto lead;
109
0
        }
110
0
        if (rune == 0xFEFF)
111
0
            continue; /* BOM. Skip it */
112
0
        break;
113
0
    } while (1);
114
115
0
    return rune;
116
3.73M
}
117
118
/* Initialize an arg list. */
119
int
120
arg_init(arg_list     * pal,
121
         const char  **argv,
122
         int           argc,
123
         stream      *(*arg_fopen)(const char *fname, void *fopen_data),
124
         void         *fopen_data,
125
         int           (*get_codepoint)(stream *s, const char **astr),
126
         gs_memory_t  *memory)
127
9.86k
{
128
9.86k
    int code;
129
9.86k
    const char *arg;
130
131
9.86k
    pal->expand_ats = true;
132
9.86k
    pal->arg_fopen = arg_fopen;
133
9.86k
    pal->fopen_data = fopen_data;
134
9.86k
    pal->get_codepoint = (get_codepoint ? get_codepoint : get_codepoint_utf8);
135
9.86k
    pal->memory = memory;
136
9.86k
    pal->argp = argv;
137
9.86k
    pal->argn = argc;
138
9.86k
    pal->depth = 0;
139
9.86k
    pal->sources[0].is_file = 0;
140
9.86k
    pal->sources[0].u.s.memory = NULL;
141
9.86k
    pal->sources[0].u.s.decoded = 0;
142
9.86k
    pal->sources[0].u.s.parsed = 0;
143
144
    /* Stash the 0th one */
145
9.86k
    code = arg_next(pal, &arg, memory);
146
9.86k
    if (code < 0)
147
0
        return code;
148
9.86k
    return gs_lib_ctx_stash_exe(memory->gs_lib_ctx, arg);
149
9.86k
}
150
151
/* Push a string onto an arg list. */
152
int
153
arg_push_memory_string(arg_list * pal, char *str, bool parsed, gs_memory_t * mem)
154
0
{
155
0
    return arg_push_decoded_memory_string(pal, str, parsed, parsed, mem);
156
0
}
157
158
int
159
arg_push_decoded_memory_string(arg_list * pal, char *str, bool parsed, bool decoded, gs_memory_t * mem)
160
0
{
161
0
    arg_source *pas;
162
163
0
    if (pal->depth+1 == arg_depth_max) {
164
0
        lprintf("Too much nesting of @-files.\n");
165
0
        return 1;
166
0
    }
167
0
    pas = &pal->sources[++pal->depth];
168
0
    pas->is_file = false;
169
0
    pas->u.s.parsed = parsed;
170
0
    pas->u.s.decoded = decoded;
171
0
    pas->u.s.chars = str;
172
0
    pas->u.s.memory = mem;
173
0
    pas->u.s.str = str;
174
0
    return 0;
175
0
}
176
177
/* Clean up an arg list. */
178
void
179
arg_finit(arg_list * pal)
180
0
{
181
    /* No cleanup is required for level 0 */
182
0
    while (pal->depth) {
183
0
        arg_source *pas = &pal->sources[pal->depth--];
184
185
0
        if (pas->is_file)
186
0
            sclose(pas->u.strm);
187
0
        else if (pas->u.s.memory)
188
0
            gs_free_object(pas->u.s.memory, pas->u.s.chars, "arg_finit");
189
0
    }
190
0
}
191
192
static int get_codepoint(arg_list *pal, arg_source *pas)
193
2.39M
{
194
2.39M
    int (*fn)(stream *s, const char **str);
195
196
2.39M
    fn = (!pas->is_file && pas->u.s.decoded ? get_codepoint_utf8 : pal->get_codepoint);
197
2.39M
    return fn(pas->is_file ? pas->u.strm : NULL, &pas->u.s.str);
198
2.39M
}
199
200
/* Get the next arg from a list. */
201
/* Note that these are not copied to the heap. */
202
/* returns:
203
 * >0 - valid argument
204
 *  0 - arguments exhausted
205
 * <0 - error condition
206
 * *argstr is *always* set: to the arg string if it is valid,
207
 * or to NULL otherwise
208
 */
209
int
210
arg_next(arg_list * pal, const char **argstr, const gs_memory_t *errmem)
211
184k
{
212
184k
    arg_source *pas;
213
184k
    char *cstr;
214
184k
    int c;
215
184k
    int i;
216
184k
    bool in_quote, eol;
217
184k
    int prev_c_was_equals = 0;
218
219
184k
    *argstr = NULL;
220
221
    /* Loop over arguments, finding one to return. */
222
184k
    do {
223
184k
        pas = &pal->sources[pal->depth];
224
184k
        if (!pas->is_file && pas->u.s.parsed) {
225
            /* This string is a "pushed-back" argument (retrieved
226
             * by a preceding arg_next(), but not processed). No
227
             * decoding is required. */
228
            /* assert(pas->u.s.decoded); */
229
0
            if (strlen(pas->u.s.str) >= arg_str_max) {
230
0
                errprintf(errmem, "Command too long: %s\n", pas->u.s.str);
231
0
                return_error(gs_error_Fatal);
232
0
            }
233
0
            strcpy(pal->cstr, pas->u.s.str);
234
0
            *argstr = pal->cstr;
235
0
            if (pas->u.s.memory)
236
0
                gs_free_object(pas->u.s.memory, pas->u.s.chars, "arg_next");
237
0
            pal->depth--;
238
184k
        } else {
239
            /* We need to decode the next argument */
240
184k
            if (pal->depth == 0) {
241
184k
                if (pal->argn <= 0)
242
6.86k
                    return 0; /* all done */
243
                /* Move onto the next argument from the string. */
244
177k
                pal->argn--;
245
177k
                pas->u.s.str = *(pal->argp++);
246
177k
            }
247
            /* Skip a prefix of whitespace. */
248
177k
            do {
249
177k
                c = get_codepoint(pal, pas);
250
177k
            } while (c > 0 && c < 256 && isspace(c));
251
177k
            if (c == EOF) {
252
                /* EOF before any argument characters. */
253
0
                if (pas->is_file) {
254
0
                    sclose(pas->u.strm);
255
0
                    gs_free_object(pas->u.strm->memory, pas->u.strm, "arg stream");
256
0
                    pas->u.strm = NULL;
257
0
                }
258
0
                else if (pas->u.s.memory)
259
0
                    gs_free_object(pas->u.s.memory, pas->u.s.chars,
260
0
                                   "arg_next");
261
                /* If depth is 0, then we are reading from the simple
262
                 * argument list and we just hit an "empty" argument
263
                 * (such as -o ""). Return this. */
264
0
                if (pal->depth == 0)
265
0
                {
266
0
                    *argstr = pal->cstr;
267
0
                    pal->cstr[0] = 0;
268
0
                    break;
269
0
                }
270
                /* If depth > 0, then we're reading from a response
271
                 * file, and we've hit the end of the response file.
272
                 * Pop up one level and continue. */
273
0
                pal->depth--;
274
0
                continue; /* Next argument */
275
0
            }
276
2.21M
    #define is_eol(c) (c == '\r' || c == '\n')
277
            /* Convert from astr into pal->cstr, and return it as *argstr. */
278
177k
            *argstr = cstr = pal->cstr;
279
177k
            in_quote = false;
280
            /* We keep track of whether we have just read an "eol" or not,
281
             * in order to skip # characters at the start of a line
282
             * (possibly preceeded by whitespace). We do NOT want this to
283
             * apply to the start of arguments in the arg list, so only
284
             * set eol to be true, if we are in a file. */
285
177k
            eol = pal->depth > 0;
286
2.39M
            for (i = 0;;) {
287
2.39M
                if (c == EOF) {
288
177k
                    if (in_quote) {
289
0
                        cstr[i] = 0;
290
0
                        errprintf(errmem,
291
0
                                  "Unterminated quote in @-file: %s\n", cstr);
292
0
                        return_error(gs_error_Fatal);
293
0
                    }
294
177k
                    break; /* End of arg */
295
177k
                }
296
                /* c != 0 */
297
                /* If we aren't parsing from the arglist (i.e. depth > 0)
298
                 * then we break on whitespace (unless we're in quotes). */
299
2.21M
                if (pal->depth > 0 && !in_quote && c > 0 && c < 256 && isspace(c))
300
0
                    break; /* End of arg */
301
                /* c isn't leading or terminating whitespace. */
302
2.21M
                if (c == '#' && eol) {
303
                    /* Skip a comment. */
304
0
                    do {
305
0
                        c = get_codepoint(pal, pas);
306
0
                    } while (c != 0 && !is_eol(c) && c != EOF);
307
0
                    if (c == '\r')
308
0
                        c = get_codepoint(pal, pas);
309
0
                    if (c == '\n')
310
0
                        c = get_codepoint(pal, pas);
311
0
                    prev_c_was_equals = 0;
312
0
                    continue; /* Next char */
313
0
                }
314
2.21M
                if (c == '\\' && pal->depth > 0) {
315
                    /* Check for \ followed by newline. */
316
0
                    c = get_codepoint(pal, pas);
317
0
                    if (is_eol(c)) {
318
0
                        if (c == '\r')
319
0
                            c = get_codepoint(pal, pas);
320
0
                        if (c == '\n')
321
0
                            c = get_codepoint(pal, pas);
322
0
                        eol = true;
323
0
                        prev_c_was_equals = 0;
324
0
                        continue; /* Next char */
325
0
                    }
326
0
                    {
327
0
                        char what;
328
329
0
                        if (c == '"') {
330
                            /* currently \" is treated as literal ". No other literals yet.
331
                             * We may expand this in future. */
332
0
                            what = c;
333
0
                            c = get_codepoint(pal, pas);
334
0
                        } else {
335
                            /* \ anywhere else is treated as a printing character. */
336
                            /* This is different from the Unix shells. */
337
0
                            what = '\\';
338
0
                        }
339
340
0
                        if (i >= arg_str_max - 1) {
341
0
                            cstr[i] = 0;
342
0
                            errprintf(errmem, "Command too long: %s\n", cstr);
343
0
                            return_error(gs_error_Fatal);
344
0
                        }
345
0
                        cstr[i++] = what;
346
0
                        eol = false;
347
0
                        prev_c_was_equals = 0;
348
0
                        continue; /* Next char */
349
0
                    }
350
0
                }
351
                /* c will become part of the argument */
352
2.21M
                if (i >= arg_str_max - 1) {
353
0
                    cstr[i] = 0;
354
0
                    errprintf(errmem, "Command too long: %s\n", cstr);
355
0
                    return_error(gs_error_Fatal);
356
0
                }
357
                /* Now, some (slightly hairy) code to allow quotes to protect whitespace.
358
                 * We only allow for double-quote quoting within @files, as a) command-
359
                 * line args passed via argv are zero terminated so we should have no
360
                 * confusion with whitespace, and b) callers using the command line will
361
                 * have to have carefully quoted double-quotes to make them survive the
362
                 * shell anyway! */
363
2.21M
                if (c == '"' && pal->depth > 0) {
364
0
                    if ((i == 0 || prev_c_was_equals) && !in_quote)
365
0
                        in_quote = true;
366
0
                    else if (in_quote) {
367
                        /* Need to check the next char to see if we're closing at the end */
368
0
                        c = get_codepoint(pal, pas);
369
0
                        if (c > 0 && c < 256 && isspace(c)) {
370
                            /* Reading from an @file, we've hit a space char. That's good, this
371
                             * was a close quote. */
372
0
                            cstr[i] = 0;
373
0
                            break;
374
0
                        }
375
                        /* Not a close quote, just a literal quote. */
376
0
                        i += codepoint_to_utf8(&cstr[i], '"');
377
0
                        eol = false;
378
0
                        prev_c_was_equals = 0;
379
0
                        continue; /* Jump to the start of the loop without reading another char. */
380
0
                    } else
381
0
                        i += codepoint_to_utf8(&cstr[i], c);
382
0
                }
383
2.21M
                else
384
2.21M
                    i += codepoint_to_utf8(&cstr[i], c);
385
2.21M
                eol = is_eol(c);
386
2.21M
                prev_c_was_equals = (c == '=') || (c == '#');
387
2.21M
                c = get_codepoint(pal, pas);
388
2.21M
            }
389
177k
            cstr[i] = 0;
390
177k
        }
391
392
        /* At this point *argstr is full of utf8 encoded argument. */
393
        /* If it's an @filename argument, then deal with it, and never return
394
         * it to the caller. */
395
177k
        if (pal->expand_ats && **argstr == '@') {
396
0
            char *fname;
397
0
            stream *s;
398
0
            if (pal->depth+1 == arg_depth_max) {
399
0
                errprintf(errmem, "Too much nesting of @-files.\n");
400
0
                return_error(gs_error_Fatal);
401
0
            }
402
0
            fname = (char *)*argstr + 1; /* skip @ */
403
404
0
            if (gs_add_control_path(pal->memory, gs_permit_file_reading, fname) < 0)
405
0
                return_error(gs_error_Fatal);
406
407
0
            s = (*pal->arg_fopen) (fname, pal->fopen_data);
408
0
            DISCARD(gs_remove_control_path(pal->memory, gs_permit_file_reading, fname));
409
0
            if (s == NULL) {
410
0
                errprintf(errmem, "Unable to open command line file %s\n", *argstr);
411
0
                return_error(gs_error_Fatal);
412
0
            }
413
0
            pas = &pal->sources[++pal->depth];
414
0
            pas->is_file = true;
415
0
            pas->u.strm = s;
416
0
            *argstr = NULL; /* Empty the argument string so we don't return it. */
417
0
            continue; /* Loop back to parse the first arg from the file. */
418
0
        }
419
177k
    } while (*argstr == NULL || **argstr == 0); /* Until we get a non-empty arg */
420
421
177k
    return 1;
422
184k
}
423
424
/* Copy an argument string to the heap. */
425
char *
426
arg_copy(const char *str, gs_memory_t * mem)
427
147k
{
428
147k
    char *sstr = (char *)gs_alloc_bytes(mem, strlen(str) + 1, "arg_copy");
429
430
147k
    if (sstr == 0) {
431
0
        lprintf("Out of memory!\n");
432
0
        return NULL;
433
0
    }
434
147k
    strcpy(sstr, str);
435
147k
    return sstr;
436
147k
}
437
438
/* Free a previously arg_copy'd string */
439
void
440
arg_free(char *str, gs_memory_t * mem)
441
147k
{
442
147k
    gs_free_object(mem, str, "arg_copy");
443
147k
}
444
445
int arg_strcmp(arg_list *pal, const char *arg, const char *match)
446
670k
{
447
670k
    int rune, c;
448
449
670k
    if (!arg || !match)
450
0
        return 1;
451
1.34M
    do {
452
1.34M
        rune = pal->get_codepoint(NULL, &arg);
453
1.34M
        if (rune == -1)
454
0
            rune = 0;
455
1.34M
        c = *match++;
456
1.34M
        if (rune != c)
457
670k
            return rune - c;
458
1.34M
    } while (rune && c);
459
0
    return 0;
460
670k
}