/src/ghostpdl/base/gsargs.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* Copyright (C) 2001-2022 Artifex Software, Inc. |
2 | | All Rights Reserved. |
3 | | |
4 | | This software is provided AS-IS with no warranty, either express or |
5 | | implied. |
6 | | |
7 | | This software is distributed under license and may not be copied, |
8 | | modified or distributed except as expressly authorized under the terms |
9 | | of the license contained in the file LICENSE in this distribution. |
10 | | |
11 | | Refer to licensing information at http://www.artifex.com or contact |
12 | | Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, |
13 | | CA 94945, U.S.A., +1(415)492-9861, for further information. |
14 | | */ |
15 | | |
16 | | |
17 | | /* Command line argument list management */ |
18 | | #include "ctype_.h" |
19 | | #include "stdio_.h" |
20 | | #include "string_.h" |
21 | | #include "gsexit.h" |
22 | | #include "gsmemory.h" |
23 | | #include "gsargs.h" |
24 | | #include "gserrors.h" |
25 | | #include "gp.h" |
26 | | |
27 | | int codepoint_to_utf8(char *cstr, int rune) |
28 | 20.1M | { |
29 | 20.1M | int idx = 0; |
30 | | |
31 | 20.1M | if (rune < 0x80) { |
32 | 20.1M | cstr[idx++] = rune; |
33 | 20.1M | } else { |
34 | 0 | if (rune < 0x800) { |
35 | 0 | cstr[idx++] = 0xc0 | (rune>>6); |
36 | 0 | } else { |
37 | 0 | if (rune < 0x10000) { |
38 | 0 | cstr[idx++] = 0xe0 | (rune>>12); |
39 | 0 | } else { |
40 | 0 | if (rune < 0x200000) { |
41 | 0 | cstr[idx++] = 0xf0 | (rune>>18); |
42 | 0 | } else { |
43 | | /* Shouldn't ever be required, but included for completeness */ |
44 | 0 | if (rune < 0x4000000) { |
45 | 0 | cstr[idx++] = 0xf8 | (rune>>24); |
46 | 0 | } else { |
47 | 0 | cstr[idx++] = 0xfc | (rune>>30); |
48 | 0 | cstr[idx++] = 0x80 | ((rune>>24) & 0x3f); |
49 | 0 | } |
50 | 0 | cstr[idx++] = 0x80 | ((rune>>18) & 0x3f); |
51 | 0 | } |
52 | 0 | cstr[idx++] = 0x80 | ((rune>>12) & 0x3f); |
53 | 0 | } |
54 | 0 | cstr[idx++] = 0x80 | ((rune>>6) & 0x3f); |
55 | 0 | } |
56 | 0 | cstr[idx++] = 0x80 | (rune & 0x3f); |
57 | 0 | } |
58 | | |
59 | 20.1M | return idx; |
60 | 20.1M | } |
61 | | |
62 | | static int get_codepoint_utf8(gp_file *file, const char **astr) |
63 | 33.8M | { |
64 | 33.8M | int c; |
65 | 33.8M | int rune; |
66 | 33.8M | int len; |
67 | | |
68 | | /* This code spots the BOM for utf8 and ignores it. Strictly speaking |
69 | | * this may be wrong, as we are only supposed to ignore it at the beginning |
70 | | * of the string, but if anyone is stupid enough to use ZWNBSP (zero width |
71 | | * non breaking space) in the middle of their strings, then they deserve |
72 | | * what they get. */ |
73 | | |
74 | 33.8M | do { |
75 | 33.8M | c = (file ? gp_fgetc(file) : (**astr ? (int)(unsigned char)*(*astr)++ : EOF)); |
76 | 33.8M | if (c == EOF) |
77 | 1.60M | return EOF; |
78 | 32.2M | if (c < 0x80) |
79 | 32.2M | return c; |
80 | 0 | lead: /* We've just read a byte >= 0x80, presumably a leading byte */ |
81 | 0 | if (c < 0xc0) |
82 | 0 | continue; /* Illegal - skip it */ |
83 | 0 | else if (c < 0xe0) |
84 | 0 | len = 1, rune = c & 0x1f; |
85 | 0 | else if (c < 0xf0) |
86 | 0 | len = 2, rune = c & 0xf; |
87 | 0 | else if (c < 0xf8) |
88 | 0 | len = 3, rune = c & 7; |
89 | 0 | else if (c < 0xfc) |
90 | 0 | len = 4, rune = c & 3; |
91 | 0 | else if (c < 0xfe) |
92 | 0 | len = 5, rune = c & 1; |
93 | 0 | else |
94 | 0 | continue; /* Illegal - skip it */ |
95 | 0 | do { |
96 | 0 | c = (file ? gp_fgetc(file) : (**astr ? (int)(unsigned char)*(*astr)++ : EOF)); |
97 | 0 | if (c == EOF) |
98 | 0 | return EOF; |
99 | 0 | rune = (rune<<6) | (c & 0x3f); |
100 | 0 | } while (((c & 0xC0) == 0x80) && --len); |
101 | 0 | if (len) { |
102 | | /* The rune we are collecting is improperly formed. */ |
103 | 0 | if (c < 0x80) { |
104 | | /* Just return the simple char we've ended on. */ |
105 | 0 | return c; |
106 | 0 | } |
107 | | /* Start collecting again */ |
108 | 0 | goto lead; |
109 | 0 | } |
110 | 0 | if (rune == 0xFEFF) |
111 | 0 | continue; /* BOM. Skip it */ |
112 | 0 | break; |
113 | 0 | } while (1); |
114 | | |
115 | 0 | return rune; |
116 | 33.8M | } |
117 | | |
118 | | /* Initialize an arg list. */ |
119 | | int |
120 | | arg_init(arg_list * pal, |
121 | | const char **argv, |
122 | | int argc, |
123 | | gp_file *(*arg_fopen)(const char *fname, void *fopen_data), |
124 | | void *fopen_data, |
125 | | int (*get_codepoint)(gp_file *file, const char **astr), |
126 | | gs_memory_t *memory) |
127 | 89.2k | { |
128 | 89.2k | int code; |
129 | 89.2k | const char *arg; |
130 | | |
131 | 89.2k | pal->expand_ats = true; |
132 | 89.2k | pal->arg_fopen = arg_fopen; |
133 | 89.2k | pal->fopen_data = fopen_data; |
134 | 89.2k | pal->get_codepoint = (get_codepoint ? get_codepoint : get_codepoint_utf8); |
135 | 89.2k | pal->memory = memory; |
136 | 89.2k | pal->argp = argv; |
137 | 89.2k | pal->argn = argc; |
138 | 89.2k | pal->depth = 0; |
139 | 89.2k | pal->sources[0].is_file = 0; |
140 | 89.2k | pal->sources[0].u.s.memory = NULL; |
141 | 89.2k | pal->sources[0].u.s.decoded = 0; |
142 | 89.2k | pal->sources[0].u.s.parsed = 0; |
143 | | |
144 | | /* Stash the 0th one */ |
145 | 89.2k | code = arg_next(pal, &arg, memory); |
146 | 89.2k | if (code < 0) |
147 | 0 | return code; |
148 | 89.2k | return gs_lib_ctx_stash_exe(memory->gs_lib_ctx, arg); |
149 | 89.2k | } |
150 | | |
151 | | /* Push a string onto an arg list. */ |
152 | | int |
153 | | arg_push_memory_string(arg_list * pal, char *str, bool parsed, gs_memory_t * mem) |
154 | 0 | { |
155 | 0 | return arg_push_decoded_memory_string(pal, str, parsed, parsed, mem); |
156 | 0 | } |
157 | | |
158 | | int |
159 | | arg_push_decoded_memory_string(arg_list * pal, char *str, bool parsed, bool decoded, gs_memory_t * mem) |
160 | 0 | { |
161 | 0 | arg_source *pas; |
162 | |
|
163 | 0 | if (pal->depth+1 == arg_depth_max) { |
164 | 0 | lprintf("Too much nesting of @-files.\n"); |
165 | 0 | return 1; |
166 | 0 | } |
167 | 0 | pas = &pal->sources[++pal->depth]; |
168 | 0 | pas->is_file = false; |
169 | 0 | pas->u.s.parsed = parsed; |
170 | 0 | pas->u.s.decoded = decoded; |
171 | 0 | pas->u.s.chars = str; |
172 | 0 | pas->u.s.memory = mem; |
173 | 0 | pas->u.s.str = str; |
174 | 0 | return 0; |
175 | 0 | } |
176 | | |
177 | | /* Clean up an arg list. */ |
178 | | void |
179 | | arg_finit(arg_list * pal) |
180 | 0 | { |
181 | | /* No cleanup is required for level 0 */ |
182 | 0 | while (pal->depth) { |
183 | 0 | arg_source *pas = &pal->sources[pal->depth--]; |
184 | |
|
185 | 0 | if (pas->is_file) |
186 | 0 | gp_fclose(pas->u.file); |
187 | 0 | else if (pas->u.s.memory) |
188 | 0 | gs_free_object(pas->u.s.memory, pas->u.s.chars, "arg_finit"); |
189 | 0 | } |
190 | 0 | } |
191 | | |
192 | | static int get_codepoint(arg_list *pal, arg_source *pas) |
193 | 21.7M | { |
194 | 21.7M | int (*fn)(gp_file *file, const char **str); |
195 | | |
196 | 21.7M | fn = (!pas->is_file && pas->u.s.decoded ? get_codepoint_utf8 : pal->get_codepoint); |
197 | 21.7M | return fn(pas->is_file ? pas->u.file : NULL, &pas->u.s.str); |
198 | 21.7M | } |
199 | | |
200 | | /* Get the next arg from a list. */ |
201 | | /* Note that these are not copied to the heap. */ |
202 | | /* returns: |
203 | | * >0 - valid argument |
204 | | * 0 - arguments exhausted |
205 | | * <0 - error condition |
206 | | * *argstr is *always* set: to the arg string if it is valid, |
207 | | * or to NULL otherwise |
208 | | */ |
209 | | int |
210 | | arg_next(arg_list * pal, const char **argstr, const gs_memory_t *errmem) |
211 | 1.66M | { |
212 | 1.66M | arg_source *pas; |
213 | 1.66M | char *cstr; |
214 | 1.66M | int c; |
215 | 1.66M | int i; |
216 | 1.66M | bool in_quote, eol; |
217 | 1.66M | int prev_c_was_equals = 0; |
218 | | |
219 | 1.66M | *argstr = NULL; |
220 | | |
221 | | /* Loop over arguments, finding one to return. */ |
222 | 1.66M | do { |
223 | 1.66M | pas = &pal->sources[pal->depth]; |
224 | 1.66M | if (!pas->is_file && pas->u.s.parsed) { |
225 | | /* This string is a "pushed-back" argument (retrieved |
226 | | * by a preceding arg_next(), but not processed). No |
227 | | * decoding is required. */ |
228 | | /* assert(pas->u.s.decoded); */ |
229 | 0 | if (strlen(pas->u.s.str) >= arg_str_max) { |
230 | 0 | errprintf(errmem, "Command too long: %s\n", pas->u.s.str); |
231 | 0 | return_error(gs_error_Fatal); |
232 | 0 | } |
233 | 0 | strcpy(pal->cstr, pas->u.s.str); |
234 | 0 | *argstr = pal->cstr; |
235 | 0 | if (pas->u.s.memory) |
236 | 0 | gs_free_object(pas->u.s.memory, pas->u.s.chars, "arg_next"); |
237 | 0 | pal->depth--; |
238 | 1.66M | } else { |
239 | | /* We need to decode the next argument */ |
240 | 1.66M | if (pal->depth == 0) { |
241 | 1.66M | if (pal->argn <= 0) |
242 | 54.1k | return 0; /* all done */ |
243 | | /* Move onto the next argument from the string. */ |
244 | 1.60M | pal->argn--; |
245 | 1.60M | pas->u.s.str = *(pal->argp++); |
246 | 1.60M | } |
247 | | /* Skip a prefix of whitespace. */ |
248 | 1.60M | do { |
249 | 1.60M | c = get_codepoint(pal, pas); |
250 | 1.60M | } while (c > 0 && c < 256 && isspace(c)); |
251 | 1.60M | if (c == EOF) { |
252 | | /* EOF before any argument characters. */ |
253 | 0 | if (pas->is_file) |
254 | 0 | gp_fclose(pas->u.file); |
255 | 0 | else if (pas->u.s.memory) |
256 | 0 | gs_free_object(pas->u.s.memory, pas->u.s.chars, |
257 | 0 | "arg_next"); |
258 | | /* If depth is 0, then we are reading from the simple |
259 | | * argument list and we just hit an "empty" argument |
260 | | * (such as -o ""). Return this. */ |
261 | 0 | if (pal->depth == 0) |
262 | 0 | { |
263 | 0 | *argstr = pal->cstr; |
264 | 0 | pal->cstr[0] = 0; |
265 | 0 | break; |
266 | 0 | } |
267 | | /* If depth > 0, then we're reading from a response |
268 | | * file, and we've hit the end of the response file. |
269 | | * Pop up one level and continue. */ |
270 | 0 | pal->depth--; |
271 | 0 | continue; /* Next argument */ |
272 | 0 | } |
273 | 20.1M | #define is_eol(c) (c == '\r' || c == '\n') |
274 | | /* Convert from astr into pal->cstr, and return it as *argstr. */ |
275 | 1.60M | *argstr = cstr = pal->cstr; |
276 | 1.60M | in_quote = false; |
277 | | /* We keep track of whether we have just read an "eol" or not, |
278 | | * in order to skip # characters at the start of a line |
279 | | * (possibly preceeded by whitespace). We do NOT want this to |
280 | | * apply to the start of arguments in the arg list, so only |
281 | | * set eol to be true, if we are in a file. */ |
282 | 1.60M | eol = pal->depth > 0; |
283 | 21.7M | for (i = 0;;) { |
284 | 21.7M | if (c == EOF) { |
285 | 1.60M | if (in_quote) { |
286 | 0 | cstr[i] = 0; |
287 | 0 | errprintf(errmem, |
288 | 0 | "Unterminated quote in @-file: %s\n", cstr); |
289 | 0 | return_error(gs_error_Fatal); |
290 | 0 | } |
291 | 1.60M | break; /* End of arg */ |
292 | 1.60M | } |
293 | | /* c != 0 */ |
294 | | /* If we aren't parsing from the arglist (i.e. depth > 0) |
295 | | * then we break on whitespace (unless we're in quotes). */ |
296 | 20.1M | if (pal->depth > 0 && !in_quote && c > 0 && c < 256 && isspace(c)) |
297 | 0 | break; /* End of arg */ |
298 | | /* c isn't leading or terminating whitespace. */ |
299 | 20.1M | if (c == '#' && eol) { |
300 | | /* Skip a comment. */ |
301 | 0 | do { |
302 | 0 | c = get_codepoint(pal, pas); |
303 | 0 | } while (c != 0 && !is_eol(c) && c != EOF); |
304 | 0 | if (c == '\r') |
305 | 0 | c = get_codepoint(pal, pas); |
306 | 0 | if (c == '\n') |
307 | 0 | c = get_codepoint(pal, pas); |
308 | 0 | prev_c_was_equals = 0; |
309 | 0 | continue; /* Next char */ |
310 | 0 | } |
311 | 20.1M | if (c == '\\' && pal->depth > 0) { |
312 | | /* Check for \ followed by newline. */ |
313 | 0 | c = get_codepoint(pal, pas); |
314 | 0 | if (is_eol(c)) { |
315 | 0 | if (c == '\r') |
316 | 0 | c = get_codepoint(pal, pas); |
317 | 0 | if (c == '\n') |
318 | 0 | c = get_codepoint(pal, pas); |
319 | 0 | eol = true; |
320 | 0 | prev_c_was_equals = 0; |
321 | 0 | continue; /* Next char */ |
322 | 0 | } |
323 | 0 | { |
324 | 0 | char what; |
325 | |
|
326 | 0 | if (c == '"') { |
327 | | /* currently \" is treated as literal ". No other literals yet. |
328 | | * We may expand this in future. */ |
329 | 0 | what = c; |
330 | 0 | c = get_codepoint(pal, pas); |
331 | 0 | } else { |
332 | | /* \ anywhere else is treated as a printing character. */ |
333 | | /* This is different from the Unix shells. */ |
334 | 0 | what = '\\'; |
335 | 0 | } |
336 | |
|
337 | 0 | if (i >= arg_str_max - 1) { |
338 | 0 | cstr[i] = 0; |
339 | 0 | errprintf(errmem, "Command too long: %s\n", cstr); |
340 | 0 | return_error(gs_error_Fatal); |
341 | 0 | } |
342 | 0 | cstr[i++] = what; |
343 | 0 | eol = false; |
344 | 0 | prev_c_was_equals = 0; |
345 | 0 | continue; /* Next char */ |
346 | 0 | } |
347 | 0 | } |
348 | | /* c will become part of the argument */ |
349 | 20.1M | if (i >= arg_str_max - 1) { |
350 | 0 | cstr[i] = 0; |
351 | 0 | errprintf(errmem, "Command too long: %s\n", cstr); |
352 | 0 | return_error(gs_error_Fatal); |
353 | 0 | } |
354 | | /* Now, some (slightly hairy) code to allow quotes to protect whitespace. |
355 | | * We only allow for double-quote quoting within @files, as a) command- |
356 | | * line args passed via argv are zero terminated so we should have no |
357 | | * confusion with whitespace, and b) callers using the command line will |
358 | | * have to have carefully quoted double-quotes to make them survive the |
359 | | * shell anyway! */ |
360 | 20.1M | if (c == '"' && pal->depth > 0) { |
361 | 0 | if ((i == 0 || prev_c_was_equals) && !in_quote) |
362 | 0 | in_quote = true; |
363 | 0 | else if (in_quote) { |
364 | | /* Need to check the next char to see if we're closing at the end */ |
365 | 0 | c = get_codepoint(pal, pas); |
366 | 0 | if (c > 0 && c < 256 && isspace(c)) { |
367 | | /* Reading from an @file, we've hit a space char. That's good, this |
368 | | * was a close quote. */ |
369 | 0 | cstr[i] = 0; |
370 | 0 | break; |
371 | 0 | } |
372 | | /* Not a close quote, just a literal quote. */ |
373 | 0 | i += codepoint_to_utf8(&cstr[i], '"'); |
374 | 0 | eol = false; |
375 | 0 | prev_c_was_equals = 0; |
376 | 0 | continue; /* Jump to the start of the loop without reading another char. */ |
377 | 0 | } else |
378 | 0 | i += codepoint_to_utf8(&cstr[i], c); |
379 | 0 | } |
380 | 20.1M | else |
381 | 20.1M | i += codepoint_to_utf8(&cstr[i], c); |
382 | 20.1M | eol = is_eol(c); |
383 | 20.1M | prev_c_was_equals = (c == '='); |
384 | 20.1M | c = get_codepoint(pal, pas); |
385 | 20.1M | } |
386 | 1.60M | cstr[i] = 0; |
387 | 1.60M | } |
388 | | |
389 | | /* At this point *argstr is full of utf8 encoded argument. */ |
390 | | /* If it's an @filename argument, then deal with it, and never return |
391 | | * it to the caller. */ |
392 | 1.60M | if (pal->expand_ats && **argstr == '@') { |
393 | 0 | char *fname; |
394 | 0 | gp_file *f; |
395 | 0 | if (pal->depth+1 == arg_depth_max) { |
396 | 0 | errprintf(errmem, "Too much nesting of @-files.\n"); |
397 | 0 | return_error(gs_error_Fatal); |
398 | 0 | } |
399 | 0 | fname = (char *)*argstr + 1; /* skip @ */ |
400 | |
|
401 | 0 | if (gs_add_control_path(pal->memory, gs_permit_file_reading, fname) < 0) |
402 | 0 | return_error(gs_error_Fatal); |
403 | | |
404 | 0 | f = (*pal->arg_fopen) (fname, pal->fopen_data); |
405 | 0 | DISCARD(gs_remove_control_path(pal->memory, gs_permit_file_reading, fname)); |
406 | 0 | if (f == NULL) { |
407 | 0 | errprintf(errmem, "Unable to open command line file %s\n", *argstr); |
408 | 0 | return_error(gs_error_Fatal); |
409 | 0 | } |
410 | 0 | pas = &pal->sources[++pal->depth]; |
411 | 0 | pas->is_file = true; |
412 | 0 | pas->u.file = f; |
413 | 0 | *argstr = NULL; /* Empty the argument string so we don't return it. */ |
414 | 0 | continue; /* Loop back to parse the first arg from the file. */ |
415 | 0 | } |
416 | 1.60M | } while (*argstr == NULL || **argstr == 0); /* Until we get a non-empty arg */ |
417 | | |
418 | 1.60M | return 1; |
419 | 1.66M | } |
420 | | |
421 | | /* Copy an argument string to the heap. */ |
422 | | char * |
423 | | arg_copy(const char *str, gs_memory_t * mem) |
424 | 1.33M | { |
425 | 1.33M | char *sstr = (char *)gs_alloc_bytes(mem, strlen(str) + 1, "arg_copy"); |
426 | | |
427 | 1.33M | if (sstr == 0) { |
428 | 0 | lprintf("Out of memory!\n"); |
429 | 0 | return NULL; |
430 | 0 | } |
431 | 1.33M | strcpy(sstr, str); |
432 | 1.33M | return sstr; |
433 | 1.33M | } |
434 | | |
435 | | /* Free a previously arg_copy'd string */ |
436 | | void |
437 | | arg_free(char *str, gs_memory_t * mem) |
438 | 1.33M | { |
439 | 1.33M | gs_free_object(mem, str, "arg_copy"); |
440 | 1.33M | } |
441 | | |
442 | | int arg_strcmp(arg_list *pal, const char *arg, const char *match) |
443 | 6.06M | { |
444 | 6.06M | int rune, c; |
445 | | |
446 | 6.06M | if (!arg || !match) |
447 | 0 | return 1; |
448 | 12.1M | do { |
449 | 12.1M | rune = pal->get_codepoint(NULL, &arg); |
450 | 12.1M | if (rune == -1) |
451 | 0 | rune = 0; |
452 | 12.1M | c = *match++; |
453 | 12.1M | if (rune != c) |
454 | 6.06M | return rune - c; |
455 | 12.1M | } while (rune && c); |
456 | 0 | return 0; |
457 | 6.06M | } |