/src/ghostpdl/base/gsargs.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* Copyright (C) 2001-2023 Artifex Software, Inc. |
2 | | All Rights Reserved. |
3 | | |
4 | | This software is provided AS-IS with no warranty, either express or |
5 | | implied. |
6 | | |
7 | | This software is distributed under license and may not be copied, |
8 | | modified or distributed except as expressly authorized under the terms |
9 | | of the license contained in the file LICENSE in this distribution. |
10 | | |
11 | | Refer to licensing information at http://www.artifex.com or contact |
12 | | Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, |
13 | | CA 94129, USA, for further information. |
14 | | */ |
15 | | |
16 | | |
17 | | /* Command line argument list management */ |
18 | | #include "ctype_.h" |
19 | | #include "stdio_.h" |
20 | | #include "string_.h" |
21 | | #include "gsexit.h" |
22 | | #include "gsmemory.h" |
23 | | #include "gsargs.h" |
24 | | #include "gserrors.h" |
25 | | #include "gp.h" |
26 | | |
27 | | int codepoint_to_utf8(char *cstr, int rune) |
28 | 1.95M | { |
29 | 1.95M | int idx = 0; |
30 | | |
31 | 1.95M | if (rune < 0x80) { |
32 | 1.95M | cstr[idx++] = rune; |
33 | 1.95M | } else { |
34 | 0 | if (rune < 0x800) { |
35 | 0 | cstr[idx++] = 0xc0 | (rune>>6); |
36 | 0 | } else { |
37 | 0 | if (rune < 0x10000) { |
38 | 0 | cstr[idx++] = 0xe0 | (rune>>12); |
39 | 0 | } else { |
40 | 0 | if (rune < 0x200000) { |
41 | 0 | cstr[idx++] = 0xf0 | (rune>>18); |
42 | 0 | } else { |
43 | | /* Shouldn't ever be required, but included for completeness */ |
44 | 0 | if (rune < 0x4000000) { |
45 | 0 | cstr[idx++] = 0xf8 | (rune>>24); |
46 | 0 | } else { |
47 | 0 | cstr[idx++] = 0xfc | (rune>>30); |
48 | 0 | cstr[idx++] = 0x80 | ((rune>>24) & 0x3f); |
49 | 0 | } |
50 | 0 | cstr[idx++] = 0x80 | ((rune>>18) & 0x3f); |
51 | 0 | } |
52 | 0 | cstr[idx++] = 0x80 | ((rune>>12) & 0x3f); |
53 | 0 | } |
54 | 0 | cstr[idx++] = 0x80 | ((rune>>6) & 0x3f); |
55 | 0 | } |
56 | 0 | cstr[idx++] = 0x80 | (rune & 0x3f); |
57 | 0 | } |
58 | | |
59 | 1.95M | return idx; |
60 | 1.95M | } |
61 | | |
62 | | static int get_codepoint_utf8(stream *s, const char **astr) |
63 | 3.29M | { |
64 | 3.29M | int c; |
65 | 3.29M | int rune; |
66 | 3.29M | int len; |
67 | | |
68 | | /* This code spots the BOM for utf8 and ignores it. Strictly speaking |
69 | | * this may be wrong, as we are only supposed to ignore it at the beginning |
70 | | * of the string, but if anyone is stupid enough to use ZWNBSP (zero width |
71 | | * non breaking space) in the middle of their strings, then they deserve |
72 | | * what they get. */ |
73 | | |
74 | 3.29M | do { |
75 | 3.29M | c = (s ? spgetc(s) : (**astr ? (int)(unsigned char)*(*astr)++ : EOF)); |
76 | 3.29M | if (c == EOF) |
77 | 156k | return EOF; |
78 | 3.13M | if (c < 0x80) |
79 | 3.13M | return c; |
80 | 0 | lead: /* We've just read a byte >= 0x80, presumably a leading byte */ |
81 | 0 | if (c < 0xc0) |
82 | 0 | continue; /* Illegal - skip it */ |
83 | 0 | else if (c < 0xe0) |
84 | 0 | len = 1, rune = c & 0x1f; |
85 | 0 | else if (c < 0xf0) |
86 | 0 | len = 2, rune = c & 0xf; |
87 | 0 | else if (c < 0xf8) |
88 | 0 | len = 3, rune = c & 7; |
89 | 0 | else if (c < 0xfc) |
90 | 0 | len = 4, rune = c & 3; |
91 | 0 | else if (c < 0xfe) |
92 | 0 | len = 5, rune = c & 1; |
93 | 0 | else |
94 | 0 | continue; /* Illegal - skip it */ |
95 | 0 | do { |
96 | 0 | c = (s ? spgetc(s) : (**astr ? (int)(unsigned char)*(*astr)++ : EOF)); |
97 | 0 | if (c == EOF) |
98 | 0 | return EOF; |
99 | 0 | rune = (rune<<6) | (c & 0x3f); |
100 | 0 | } while (((c & 0xC0) == 0x80) && --len); |
101 | 0 | if (len) { |
102 | | /* The rune we are collecting is improperly formed. */ |
103 | 0 | if (c < 0x80) { |
104 | | /* Just return the simple char we've ended on. */ |
105 | 0 | return c; |
106 | 0 | } |
107 | | /* Start collecting again */ |
108 | 0 | goto lead; |
109 | 0 | } |
110 | 0 | if (rune == 0xFEFF) |
111 | 0 | continue; /* BOM. Skip it */ |
112 | 0 | break; |
113 | 0 | } while (1); |
114 | | |
115 | 0 | return rune; |
116 | 3.29M | } |
117 | | |
118 | | /* Initialize an arg list. */ |
119 | | int |
120 | | arg_init(arg_list * pal, |
121 | | const char **argv, |
122 | | int argc, |
123 | | stream *(*arg_fopen)(const char *fname, void *fopen_data), |
124 | | void *fopen_data, |
125 | | int (*get_codepoint)(stream *s, const char **astr), |
126 | | gs_memory_t *memory) |
127 | 8.71k | { |
128 | 8.71k | int code; |
129 | 8.71k | const char *arg; |
130 | | |
131 | 8.71k | pal->expand_ats = true; |
132 | 8.71k | pal->arg_fopen = arg_fopen; |
133 | 8.71k | pal->fopen_data = fopen_data; |
134 | 8.71k | pal->get_codepoint = (get_codepoint ? get_codepoint : get_codepoint_utf8); |
135 | 8.71k | pal->memory = memory; |
136 | 8.71k | pal->argp = argv; |
137 | 8.71k | pal->argn = argc; |
138 | 8.71k | pal->depth = 0; |
139 | 8.71k | pal->sources[0].is_file = 0; |
140 | 8.71k | pal->sources[0].u.s.memory = NULL; |
141 | 8.71k | pal->sources[0].u.s.decoded = 0; |
142 | 8.71k | pal->sources[0].u.s.parsed = 0; |
143 | | |
144 | | /* Stash the 0th one */ |
145 | 8.71k | code = arg_next(pal, &arg, memory); |
146 | 8.71k | if (code < 0) |
147 | 0 | return code; |
148 | 8.71k | return gs_lib_ctx_stash_exe(memory->gs_lib_ctx, arg); |
149 | 8.71k | } |
150 | | |
151 | | /* Push a string onto an arg list. */ |
152 | | int |
153 | | arg_push_memory_string(arg_list * pal, char *str, bool parsed, gs_memory_t * mem) |
154 | 0 | { |
155 | 0 | return arg_push_decoded_memory_string(pal, str, parsed, parsed, mem); |
156 | 0 | } |
157 | | |
158 | | int |
159 | | arg_push_decoded_memory_string(arg_list * pal, char *str, bool parsed, bool decoded, gs_memory_t * mem) |
160 | 0 | { |
161 | 0 | arg_source *pas; |
162 | |
|
163 | 0 | if (pal->depth+1 == arg_depth_max) { |
164 | 0 | lprintf("Too much nesting of @-files.\n"); |
165 | 0 | return 1; |
166 | 0 | } |
167 | 0 | pas = &pal->sources[++pal->depth]; |
168 | 0 | pas->is_file = false; |
169 | 0 | pas->u.s.parsed = parsed; |
170 | 0 | pas->u.s.decoded = decoded; |
171 | 0 | pas->u.s.chars = str; |
172 | 0 | pas->u.s.memory = mem; |
173 | 0 | pas->u.s.str = str; |
174 | 0 | return 0; |
175 | 0 | } |
176 | | |
177 | | /* Clean up an arg list. */ |
178 | | void |
179 | | arg_finit(arg_list * pal) |
180 | 0 | { |
181 | | /* No cleanup is required for level 0 */ |
182 | 0 | while (pal->depth) { |
183 | 0 | arg_source *pas = &pal->sources[pal->depth--]; |
184 | |
|
185 | 0 | if (pas->is_file) |
186 | 0 | sclose(pas->u.strm); |
187 | 0 | else if (pas->u.s.memory) |
188 | 0 | gs_free_object(pas->u.s.memory, pas->u.s.chars, "arg_finit"); |
189 | 0 | } |
190 | 0 | } |
191 | | |
192 | | static int get_codepoint(arg_list *pal, arg_source *pas) |
193 | 2.10M | { |
194 | 2.10M | int (*fn)(stream *s, const char **str); |
195 | | |
196 | 2.10M | fn = (!pas->is_file && pas->u.s.decoded ? get_codepoint_utf8 : pal->get_codepoint); |
197 | 2.10M | return fn(pas->is_file ? pas->u.strm : NULL, &pas->u.s.str); |
198 | 2.10M | } |
199 | | |
200 | | /* Get the next arg from a list. */ |
201 | | /* Note that these are not copied to the heap. */ |
202 | | /* returns: |
203 | | * >0 - valid argument |
204 | | * 0 - arguments exhausted |
205 | | * <0 - error condition |
206 | | * *argstr is *always* set: to the arg string if it is valid, |
207 | | * or to NULL otherwise |
208 | | */ |
209 | | int |
210 | | arg_next(arg_list * pal, const char **argstr, const gs_memory_t *errmem) |
211 | 162k | { |
212 | 162k | arg_source *pas; |
213 | 162k | char *cstr; |
214 | 162k | int c; |
215 | 162k | int i; |
216 | 162k | bool in_quote, eol; |
217 | 162k | int prev_c_was_equals = 0; |
218 | | |
219 | 162k | *argstr = NULL; |
220 | | |
221 | | /* Loop over arguments, finding one to return. */ |
222 | 162k | do { |
223 | 162k | pas = &pal->sources[pal->depth]; |
224 | 162k | if (!pas->is_file && pas->u.s.parsed) { |
225 | | /* This string is a "pushed-back" argument (retrieved |
226 | | * by a preceding arg_next(), but not processed). No |
227 | | * decoding is required. */ |
228 | | /* assert(pas->u.s.decoded); */ |
229 | 0 | if (strlen(pas->u.s.str) >= arg_str_max) { |
230 | 0 | errprintf(errmem, "Command too long: %s\n", pas->u.s.str); |
231 | 0 | return_error(gs_error_Fatal); |
232 | 0 | } |
233 | 0 | strcpy(pal->cstr, pas->u.s.str); |
234 | 0 | *argstr = pal->cstr; |
235 | 0 | if (pas->u.s.memory) |
236 | 0 | gs_free_object(pas->u.s.memory, pas->u.s.chars, "arg_next"); |
237 | 0 | pal->depth--; |
238 | 162k | } else { |
239 | | /* We need to decode the next argument */ |
240 | 162k | if (pal->depth == 0) { |
241 | 162k | if (pal->argn <= 0) |
242 | 5.45k | return 0; /* all done */ |
243 | | /* Move onto the next argument from the string. */ |
244 | 156k | pal->argn--; |
245 | 156k | pas->u.s.str = *(pal->argp++); |
246 | 156k | } |
247 | | /* Skip a prefix of whitespace. */ |
248 | 156k | do { |
249 | 156k | c = get_codepoint(pal, pas); |
250 | 156k | } while (c > 0 && c < 256 && isspace(c)); |
251 | 156k | if (c == EOF) { |
252 | | /* EOF before any argument characters. */ |
253 | 0 | if (pas->is_file) { |
254 | 0 | sclose(pas->u.strm); |
255 | 0 | gs_free_object(pas->u.strm->memory, pas->u.strm, "arg stream"); |
256 | 0 | pas->u.strm = NULL; |
257 | 0 | } |
258 | 0 | else if (pas->u.s.memory) |
259 | 0 | gs_free_object(pas->u.s.memory, pas->u.s.chars, |
260 | 0 | "arg_next"); |
261 | | /* If depth is 0, then we are reading from the simple |
262 | | * argument list and we just hit an "empty" argument |
263 | | * (such as -o ""). Return this. */ |
264 | 0 | if (pal->depth == 0) |
265 | 0 | { |
266 | 0 | *argstr = pal->cstr; |
267 | 0 | pal->cstr[0] = 0; |
268 | 0 | break; |
269 | 0 | } |
270 | | /* If depth > 0, then we're reading from a response |
271 | | * file, and we've hit the end of the response file. |
272 | | * Pop up one level and continue. */ |
273 | 0 | pal->depth--; |
274 | 0 | continue; /* Next argument */ |
275 | 0 | } |
276 | 1.95M | #define is_eol(c) (c == '\r' || c == '\n') |
277 | | /* Convert from astr into pal->cstr, and return it as *argstr. */ |
278 | 156k | *argstr = cstr = pal->cstr; |
279 | 156k | in_quote = false; |
280 | | /* We keep track of whether we have just read an "eol" or not, |
281 | | * in order to skip # characters at the start of a line |
282 | | * (possibly preceeded by whitespace). We do NOT want this to |
283 | | * apply to the start of arguments in the arg list, so only |
284 | | * set eol to be true, if we are in a file. */ |
285 | 156k | eol = pal->depth > 0; |
286 | 2.10M | for (i = 0;;) { |
287 | 2.10M | if (c == EOF) { |
288 | 156k | if (in_quote) { |
289 | 0 | cstr[i] = 0; |
290 | 0 | errprintf(errmem, |
291 | 0 | "Unterminated quote in @-file: %s\n", cstr); |
292 | 0 | return_error(gs_error_Fatal); |
293 | 0 | } |
294 | 156k | break; /* End of arg */ |
295 | 156k | } |
296 | | /* c != 0 */ |
297 | | /* If we aren't parsing from the arglist (i.e. depth > 0) |
298 | | * then we break on whitespace (unless we're in quotes). */ |
299 | 1.95M | if (pal->depth > 0 && !in_quote && c > 0 && c < 256 && isspace(c)) |
300 | 0 | break; /* End of arg */ |
301 | | /* c isn't leading or terminating whitespace. */ |
302 | 1.95M | if (c == '#' && eol) { |
303 | | /* Skip a comment. */ |
304 | 0 | do { |
305 | 0 | c = get_codepoint(pal, pas); |
306 | 0 | } while (c != 0 && !is_eol(c) && c != EOF); |
307 | 0 | if (c == '\r') |
308 | 0 | c = get_codepoint(pal, pas); |
309 | 0 | if (c == '\n') |
310 | 0 | c = get_codepoint(pal, pas); |
311 | 0 | prev_c_was_equals = 0; |
312 | 0 | continue; /* Next char */ |
313 | 0 | } |
314 | 1.95M | if (c == '\\' && pal->depth > 0) { |
315 | | /* Check for \ followed by newline. */ |
316 | 0 | c = get_codepoint(pal, pas); |
317 | 0 | if (is_eol(c)) { |
318 | 0 | if (c == '\r') |
319 | 0 | c = get_codepoint(pal, pas); |
320 | 0 | if (c == '\n') |
321 | 0 | c = get_codepoint(pal, pas); |
322 | 0 | eol = true; |
323 | 0 | prev_c_was_equals = 0; |
324 | 0 | continue; /* Next char */ |
325 | 0 | } |
326 | 0 | { |
327 | 0 | char what; |
328 | |
|
329 | 0 | if (c == '"') { |
330 | | /* currently \" is treated as literal ". No other literals yet. |
331 | | * We may expand this in future. */ |
332 | 0 | what = c; |
333 | 0 | c = get_codepoint(pal, pas); |
334 | 0 | } else { |
335 | | /* \ anywhere else is treated as a printing character. */ |
336 | | /* This is different from the Unix shells. */ |
337 | 0 | what = '\\'; |
338 | 0 | } |
339 | |
|
340 | 0 | if (i >= arg_str_max - 1) { |
341 | 0 | cstr[i] = 0; |
342 | 0 | errprintf(errmem, "Command too long: %s\n", cstr); |
343 | 0 | return_error(gs_error_Fatal); |
344 | 0 | } |
345 | 0 | cstr[i++] = what; |
346 | 0 | eol = false; |
347 | 0 | prev_c_was_equals = 0; |
348 | 0 | continue; /* Next char */ |
349 | 0 | } |
350 | 0 | } |
351 | | /* c will become part of the argument */ |
352 | 1.95M | if (i >= arg_str_max - 1) { |
353 | 0 | cstr[i] = 0; |
354 | 0 | errprintf(errmem, "Command too long: %s\n", cstr); |
355 | 0 | return_error(gs_error_Fatal); |
356 | 0 | } |
357 | | /* Now, some (slightly hairy) code to allow quotes to protect whitespace. |
358 | | * We only allow for double-quote quoting within @files, as a) command- |
359 | | * line args passed via argv are zero terminated so we should have no |
360 | | * confusion with whitespace, and b) callers using the command line will |
361 | | * have to have carefully quoted double-quotes to make them survive the |
362 | | * shell anyway! */ |
363 | 1.95M | if (c == '"' && pal->depth > 0) { |
364 | 0 | if ((i == 0 || prev_c_was_equals) && !in_quote) |
365 | 0 | in_quote = true; |
366 | 0 | else if (in_quote) { |
367 | | /* Need to check the next char to see if we're closing at the end */ |
368 | 0 | c = get_codepoint(pal, pas); |
369 | 0 | if (c > 0 && c < 256 && isspace(c)) { |
370 | | /* Reading from an @file, we've hit a space char. That's good, this |
371 | | * was a close quote. */ |
372 | 0 | cstr[i] = 0; |
373 | 0 | break; |
374 | 0 | } |
375 | | /* Not a close quote, just a literal quote. */ |
376 | 0 | i += codepoint_to_utf8(&cstr[i], '"'); |
377 | 0 | eol = false; |
378 | 0 | prev_c_was_equals = 0; |
379 | 0 | continue; /* Jump to the start of the loop without reading another char. */ |
380 | 0 | } else |
381 | 0 | i += codepoint_to_utf8(&cstr[i], c); |
382 | 0 | } |
383 | 1.95M | else |
384 | 1.95M | i += codepoint_to_utf8(&cstr[i], c); |
385 | 1.95M | eol = is_eol(c); |
386 | 1.95M | prev_c_was_equals = (c == '=') || (c == '#'); |
387 | 1.95M | c = get_codepoint(pal, pas); |
388 | 1.95M | } |
389 | 156k | cstr[i] = 0; |
390 | 156k | } |
391 | | |
392 | | /* At this point *argstr is full of utf8 encoded argument. */ |
393 | | /* If it's an @filename argument, then deal with it, and never return |
394 | | * it to the caller. */ |
395 | 156k | if (pal->expand_ats && **argstr == '@') { |
396 | 0 | char *fname; |
397 | 0 | stream *s; |
398 | 0 | if (pal->depth+1 == arg_depth_max) { |
399 | 0 | errprintf(errmem, "Too much nesting of @-files.\n"); |
400 | 0 | return_error(gs_error_Fatal); |
401 | 0 | } |
402 | 0 | fname = (char *)*argstr + 1; /* skip @ */ |
403 | |
|
404 | 0 | if (gs_add_control_path(pal->memory, gs_permit_file_reading, fname) < 0) |
405 | 0 | return_error(gs_error_Fatal); |
406 | | |
407 | 0 | s = (*pal->arg_fopen) (fname, pal->fopen_data); |
408 | 0 | DISCARD(gs_remove_control_path(pal->memory, gs_permit_file_reading, fname)); |
409 | 0 | if (s == NULL) { |
410 | 0 | errprintf(errmem, "Unable to open command line file %s\n", *argstr); |
411 | 0 | return_error(gs_error_Fatal); |
412 | 0 | } |
413 | 0 | pas = &pal->sources[++pal->depth]; |
414 | 0 | pas->is_file = true; |
415 | 0 | pas->u.strm = s; |
416 | 0 | *argstr = NULL; /* Empty the argument string so we don't return it. */ |
417 | 0 | continue; /* Loop back to parse the first arg from the file. */ |
418 | 0 | } |
419 | 156k | } while (*argstr == NULL || **argstr == 0); /* Until we get a non-empty arg */ |
420 | | |
421 | 156k | return 1; |
422 | 162k | } |
423 | | |
424 | | /* Copy an argument string to the heap. */ |
425 | | char * |
426 | | arg_copy(const char *str, gs_memory_t * mem) |
427 | 130k | { |
428 | 130k | char *sstr = (char *)gs_alloc_bytes(mem, strlen(str) + 1, "arg_copy"); |
429 | | |
430 | 130k | if (sstr == 0) { |
431 | 0 | lprintf("Out of memory!\n"); |
432 | 0 | return NULL; |
433 | 0 | } |
434 | 130k | strcpy(sstr, str); |
435 | 130k | return sstr; |
436 | 130k | } |
437 | | |
438 | | /* Free a previously arg_copy'd string */ |
439 | | void |
440 | | arg_free(char *str, gs_memory_t * mem) |
441 | 130k | { |
442 | 130k | gs_free_object(mem, str, "arg_copy"); |
443 | 130k | } |
444 | | |
445 | | int arg_strcmp(arg_list *pal, const char *arg, const char *match) |
446 | 592k | { |
447 | 592k | int rune, c; |
448 | | |
449 | 592k | if (!arg || !match) |
450 | 0 | return 1; |
451 | 1.18M | do { |
452 | 1.18M | rune = pal->get_codepoint(NULL, &arg); |
453 | 1.18M | if (rune == -1) |
454 | 0 | rune = 0; |
455 | 1.18M | c = *match++; |
456 | 1.18M | if (rune != c) |
457 | 592k | return rune - c; |
458 | 1.18M | } while (rune && c); |
459 | 0 | return 0; |
460 | 592k | } |