/src/ghostpdl/base/gsargs.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* Copyright (C) 2001-2021 Artifex Software, Inc. |
2 | | All Rights Reserved. |
3 | | |
4 | | This software is provided AS-IS with no warranty, either express or |
5 | | implied. |
6 | | |
7 | | This software is distributed under license and may not be copied, |
8 | | modified or distributed except as expressly authorized under the terms |
9 | | of the license contained in the file LICENSE in this distribution. |
10 | | |
11 | | Refer to licensing information at http://www.artifex.com or contact |
12 | | Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, |
13 | | CA 94945, U.S.A., +1(415)492-9861, for further information. |
14 | | */ |
15 | | |
16 | | |
17 | | /* Command line argument list management */ |
18 | | #include "ctype_.h" |
19 | | #include "stdio_.h" |
20 | | #include "string_.h" |
21 | | #include "gsexit.h" |
22 | | #include "gsmemory.h" |
23 | | #include "gsargs.h" |
24 | | #include "gserrors.h" |
25 | | #include "gp.h" |
26 | | |
27 | | int codepoint_to_utf8(char *cstr, int rune) |
28 | 152k | { |
29 | 152k | int idx = 0; |
30 | | |
31 | 152k | if (rune < 0x80) { |
32 | 152k | cstr[idx++] = rune; |
33 | 152k | } else { |
34 | 0 | if (rune < 0x800) { |
35 | 0 | cstr[idx++] = 0xc0 | (rune>>6); |
36 | 0 | } else { |
37 | 0 | if (rune < 0x10000) { |
38 | 0 | cstr[idx++] = 0xe0 | (rune>>12); |
39 | 0 | } else { |
40 | 0 | if (rune < 0x200000) { |
41 | 0 | cstr[idx++] = 0xf0 | (rune>>18); |
42 | 0 | } else { |
43 | | /* Shouldn't ever be required, but included for completeness */ |
44 | 0 | if (rune < 0x4000000) { |
45 | 0 | cstr[idx++] = 0xf8 | (rune>>24); |
46 | 0 | } else { |
47 | 0 | cstr[idx++] = 0xfc | (rune>>30); |
48 | 0 | cstr[idx++] = 0x80 | ((rune>>24) & 0x3f); |
49 | 0 | } |
50 | 0 | cstr[idx++] = 0x80 | ((rune>>18) & 0x3f); |
51 | 0 | } |
52 | 0 | cstr[idx++] = 0x80 | ((rune>>12) & 0x3f); |
53 | 0 | } |
54 | 0 | cstr[idx++] = 0x80 | ((rune>>6) & 0x3f); |
55 | 0 | } |
56 | 0 | cstr[idx++] = 0x80 | (rune & 0x3f); |
57 | 0 | } |
58 | | |
59 | 152k | return idx; |
60 | 152k | } |
61 | | |
62 | | static int get_codepoint_utf8(gp_file *file, const char **astr) |
63 | 257k | { |
64 | 257k | int c; |
65 | 257k | int rune; |
66 | 257k | int len; |
67 | | |
68 | | /* This code spots the BOM for utf8 and ignores it. Strictly speaking |
69 | | * this may be wrong, as we are only supposed to ignore it at the beginning |
70 | | * of the string, but if anyone is stupid enough to use ZWNBSP (zero width |
71 | | * non breaking space) in the middle of their strings, then they deserve |
72 | | * what they get. */ |
73 | | |
74 | 257k | do { |
75 | 257k | c = (file ? gp_fgetc(file) : (**astr ? (int)(unsigned char)*(*astr)++ : EOF)); |
76 | 257k | if (c == EOF) |
77 | 12.2k | return EOF; |
78 | 245k | if (c < 0x80) |
79 | 245k | return c; |
80 | 0 | lead: /* We've just read a byte >= 0x80, presumably a leading byte */ |
81 | 0 | if (c < 0xc0) |
82 | 0 | continue; /* Illegal - skip it */ |
83 | 0 | else if (c < 0xe0) |
84 | 0 | len = 1, rune = c & 0x1f; |
85 | 0 | else if (c < 0xf0) |
86 | 0 | len = 2, rune = c & 0xf; |
87 | 0 | else if (c < 0xf8) |
88 | 0 | len = 3, rune = c & 7; |
89 | 0 | else if (c < 0xfc) |
90 | 0 | len = 4, rune = c & 3; |
91 | 0 | else if (c < 0xfe) |
92 | 0 | len = 5, rune = c & 1; |
93 | 0 | else |
94 | 0 | continue; /* Illegal - skip it */ |
95 | 0 | do { |
96 | 0 | c = (file ? gp_fgetc(file) : (**astr ? (int)(unsigned char)*(*astr)++ : EOF)); |
97 | 0 | if (c == EOF) |
98 | 0 | return EOF; |
99 | 0 | rune = (rune<<6) | (c & 0x3f); |
100 | 0 | } while (((c & 0xC0) == 0x80) && --len); |
101 | 0 | if (len) { |
102 | | /* The rune we are collecting is improperly formed. */ |
103 | 0 | if (c < 0x80) { |
104 | | /* Just return the simple char we've ended on. */ |
105 | 0 | return c; |
106 | 0 | } |
107 | | /* Start collecting again */ |
108 | 0 | goto lead; |
109 | 0 | } |
110 | 0 | if (rune == 0xFEFF) |
111 | 0 | continue; /* BOM. Skip it */ |
112 | 0 | break; |
113 | 0 | } while (1); |
114 | | |
115 | 0 | return rune; |
116 | 257k | } |
117 | | |
118 | | /* Initialize an arg list. */ |
119 | | int |
120 | | arg_init(arg_list * pal, |
121 | | const char **argv, |
122 | | int argc, |
123 | | gp_file *(*arg_fopen)(const char *fname, void *fopen_data), |
124 | | void *fopen_data, |
125 | | int (*get_codepoint)(gp_file *file, const char **astr), |
126 | | gs_memory_t *memory) |
127 | 683 | { |
128 | 683 | int code; |
129 | 683 | const char *arg; |
130 | | |
131 | 683 | pal->expand_ats = true; |
132 | 683 | pal->arg_fopen = arg_fopen; |
133 | 683 | pal->fopen_data = fopen_data; |
134 | 683 | pal->get_codepoint = (get_codepoint ? get_codepoint : get_codepoint_utf8); |
135 | 683 | pal->memory = memory; |
136 | 683 | pal->argp = argv; |
137 | 683 | pal->argn = argc; |
138 | 683 | pal->depth = 0; |
139 | 683 | pal->sources[0].is_file = 0; |
140 | 683 | pal->sources[0].u.s.memory = NULL; |
141 | 683 | pal->sources[0].u.s.decoded = 0; |
142 | 683 | pal->sources[0].u.s.parsed = 0; |
143 | | |
144 | | /* Stash the 0th one */ |
145 | 683 | code = arg_next(pal, &arg, memory); |
146 | 683 | if (code < 0) |
147 | 0 | return code; |
148 | 683 | return gs_lib_ctx_stash_exe(memory->gs_lib_ctx, arg); |
149 | 683 | } |
150 | | |
151 | | /* Push a string onto an arg list. */ |
152 | | int |
153 | | arg_push_memory_string(arg_list * pal, char *str, bool parsed, gs_memory_t * mem) |
154 | 0 | { |
155 | 0 | return arg_push_decoded_memory_string(pal, str, parsed, parsed, mem); |
156 | 0 | } |
157 | | |
158 | | int |
159 | | arg_push_decoded_memory_string(arg_list * pal, char *str, bool parsed, bool decoded, gs_memory_t * mem) |
160 | 0 | { |
161 | 0 | arg_source *pas; |
162 | |
|
163 | 0 | if (pal->depth+1 == arg_depth_max) { |
164 | 0 | lprintf("Too much nesting of @-files.\n"); |
165 | 0 | return 1; |
166 | 0 | } |
167 | 0 | pas = &pal->sources[++pal->depth]; |
168 | 0 | pas->is_file = false; |
169 | 0 | pas->u.s.parsed = parsed; |
170 | 0 | pas->u.s.decoded = decoded; |
171 | 0 | pas->u.s.chars = str; |
172 | 0 | pas->u.s.memory = mem; |
173 | 0 | pas->u.s.str = str; |
174 | 0 | return 0; |
175 | 0 | } |
176 | | |
177 | | /* Clean up an arg list. */ |
178 | | void |
179 | | arg_finit(arg_list * pal) |
180 | 0 | { |
181 | | /* No cleanup is required for level 0 */ |
182 | 0 | while (pal->depth) { |
183 | 0 | arg_source *pas = &pal->sources[pal->depth--]; |
184 | |
|
185 | 0 | if (pas->is_file) |
186 | 0 | gp_fclose(pas->u.file); |
187 | 0 | else if (pas->u.s.memory) |
188 | 0 | gs_free_object(pas->u.s.memory, pas->u.s.chars, "arg_finit"); |
189 | 0 | } |
190 | 0 | } |
191 | | |
192 | | static int get_codepoint(arg_list *pal, arg_source *pas) |
193 | 164k | { |
194 | 164k | int (*fn)(gp_file *file, const char **str); |
195 | | |
196 | 164k | fn = (!pas->is_file && pas->u.s.decoded ? get_codepoint_utf8 : pal->get_codepoint); |
197 | 164k | return fn(pas->is_file ? pas->u.file : NULL, &pas->u.s.str); |
198 | 164k | } |
199 | | |
200 | | /* Get the next arg from a list. */ |
201 | | /* Note that these are not copied to the heap. */ |
202 | | /* returns: |
203 | | * >0 - valid argument |
204 | | * 0 - arguments exhausted |
205 | | * <0 - error condition |
206 | | * *argstr is *always* set: to the arg string if it is valid, |
207 | | * or to NULL otherwise |
208 | | */ |
209 | | int |
210 | | arg_next(arg_list * pal, const char **argstr, const gs_memory_t *errmem) |
211 | 12.9k | { |
212 | 12.9k | arg_source *pas; |
213 | 12.9k | char *cstr; |
214 | 12.9k | int c; |
215 | 12.9k | int i; |
216 | 12.9k | bool in_quote, eol; |
217 | | |
218 | 12.9k | *argstr = NULL; |
219 | | |
220 | | /* Loop over arguments, finding one to return. */ |
221 | 12.9k | do { |
222 | 12.9k | pas = &pal->sources[pal->depth]; |
223 | 12.9k | if (!pas->is_file && pas->u.s.parsed) { |
224 | | /* This string is a "pushed-back" argument (retrieved |
225 | | * by a preceding arg_next(), but not processed). No |
226 | | * decoding is required. */ |
227 | | /* assert(pas->u.s.decoded); */ |
228 | 0 | if (strlen(pas->u.s.str) >= arg_str_max) { |
229 | 0 | errprintf(errmem, "Command too long: %s\n", pas->u.s.str); |
230 | 0 | return_error(gs_error_Fatal); |
231 | 0 | } |
232 | 0 | strcpy(pal->cstr, pas->u.s.str); |
233 | 0 | *argstr = pal->cstr; |
234 | 0 | if (pas->u.s.memory) |
235 | 0 | gs_free_object(pas->u.s.memory, pas->u.s.chars, "arg_next"); |
236 | 0 | pal->depth--; |
237 | 12.9k | } else { |
238 | | /* We need to decode the next argument */ |
239 | 12.9k | if (pal->depth == 0) { |
240 | 12.9k | if (pal->argn <= 0) |
241 | 674 | return 0; /* all done */ |
242 | | /* Move onto the next argument from the string. */ |
243 | 12.2k | pal->argn--; |
244 | 12.2k | pas->u.s.str = *(pal->argp++); |
245 | 12.2k | } |
246 | | /* Skip a prefix of whitespace. */ |
247 | 12.2k | do { |
248 | 12.2k | c = get_codepoint(pal, pas); |
249 | 12.2k | } while (c > 0 && c < 256 && isspace(c)); |
250 | 12.2k | if (c == EOF) { |
251 | | /* EOF before any argument characters. */ |
252 | 0 | if (pas->is_file) |
253 | 0 | gp_fclose(pas->u.file); |
254 | 0 | else if (pas->u.s.memory) |
255 | 0 | gs_free_object(pas->u.s.memory, pas->u.s.chars, |
256 | 0 | "arg_next"); |
257 | | /* If depth is 0, then we are reading from the simple |
258 | | * argument list and we just hit an "empty" argument |
259 | | * (such as -o ""). Return this. */ |
260 | 0 | if (pal->depth == 0) |
261 | 0 | { |
262 | 0 | *argstr = pal->cstr; |
263 | 0 | pal->cstr[0] = 0; |
264 | 0 | break; |
265 | 0 | } |
266 | | /* If depth > 0, then we're reading from a response |
267 | | * file, and we've hit the end of the response file. |
268 | | * Pop up one level and continue. */ |
269 | 0 | pal->depth--; |
270 | 0 | continue; /* Next argument */ |
271 | 0 | } |
272 | 152k | #define is_eol(c) (c == '\r' || c == '\n') |
273 | | /* Convert from astr into pal->cstr, and return it as *argstr. */ |
274 | 12.2k | *argstr = cstr = pal->cstr; |
275 | 12.2k | in_quote = false; |
276 | | /* We keep track of whether we have just read an "eol" or not, |
277 | | * in order to skip # characters at the start of a line |
278 | | * (possibly preceeded by whitespace). We do NOT want this to |
279 | | * apply to the start of arguments in the arg list, so only |
280 | | * set eol to be true, if we are in a file. */ |
281 | 12.2k | eol = pal->depth > 0; |
282 | 164k | for (i = 0;;) { |
283 | 164k | if (c == EOF) { |
284 | 12.2k | if (in_quote) { |
285 | 0 | cstr[i] = 0; |
286 | 0 | errprintf(errmem, |
287 | 0 | "Unterminated quote in @-file: %s\n", cstr); |
288 | 0 | return_error(gs_error_Fatal); |
289 | 0 | } |
290 | 12.2k | break; /* End of arg */ |
291 | 12.2k | } |
292 | | /* c != 0 */ |
293 | | /* If we aren't parsing from the arglist (i.e. depth > 0) |
294 | | * then we break on whitespace (unless we're in quotes). */ |
295 | 152k | if (pal->depth > 0 && !in_quote && c > 0 && c < 256 && isspace(c)) |
296 | 0 | break; /* End of arg */ |
297 | | /* c isn't leading or terminating whitespace. */ |
298 | 152k | if (c == '#' && eol) { |
299 | | /* Skip a comment. */ |
300 | 0 | do { |
301 | 0 | c = get_codepoint(pal, pas); |
302 | 0 | } while (c != 0 && !is_eol(c) && c != EOF); |
303 | 0 | if (c == '\r') |
304 | 0 | c = get_codepoint(pal, pas); |
305 | 0 | if (c == '\n') |
306 | 0 | c = get_codepoint(pal, pas); |
307 | 0 | continue; /* Next char */ |
308 | 0 | } |
309 | 152k | if (c == '\\') { |
310 | | /* Check for \ followed by newline. */ |
311 | 0 | c = get_codepoint(pal, pas); |
312 | 0 | if (is_eol(c)) { |
313 | 0 | if (c == '\r') |
314 | 0 | c = get_codepoint(pal, pas); |
315 | 0 | if (c == '\n') |
316 | 0 | c = get_codepoint(pal, pas); |
317 | 0 | eol = true; |
318 | 0 | continue; /* Next char */ |
319 | 0 | } |
320 | | /* \ anywhere else is treated as a printing character. */ |
321 | | /* This is different from the Unix shells. */ |
322 | 0 | if (i >= arg_str_max - 1) { |
323 | 0 | cstr[i] = 0; |
324 | 0 | errprintf(errmem, "Command too long: %s\n", cstr); |
325 | 0 | return_error(gs_error_Fatal); |
326 | 0 | } |
327 | 0 | cstr[i++] = '\\'; |
328 | 0 | eol = false; |
329 | 0 | continue; /* Next char */ |
330 | 0 | } |
331 | | /* c will become part of the argument */ |
332 | 152k | if (i >= arg_str_max - 1) { |
333 | 0 | cstr[i] = 0; |
334 | 0 | errprintf(errmem, "Command too long: %s\n", cstr); |
335 | 0 | return_error(gs_error_Fatal); |
336 | 0 | } |
337 | | /* Now, some (slightly hairy) code to allow quotes to protect whitespace. |
338 | | * We only allow for double-quote quoting within @files, as a) command- |
339 | | * line args passed via argv are zero terminated so we should have no |
340 | | * confusion with whitespace, and b) callers using the command line will |
341 | | * have to have carefully quoted double-quotes to make them survive the |
342 | | * shell anyway! */ |
343 | 152k | if (c == '"' && pal->depth > 0) { |
344 | 0 | if (i == 0 && !in_quote) |
345 | 0 | in_quote = true; |
346 | 0 | else if (in_quote) { |
347 | | /* Need to check the next char to see if we're closing at the end */ |
348 | 0 | c = get_codepoint(pal, pas); |
349 | 0 | if (c > 0 && c < 256 && isspace(c)) { |
350 | | /* Reading from an @file, we've hit a space char. That's good, this |
351 | | * was a close quote. */ |
352 | 0 | cstr[i] = 0; |
353 | 0 | break; |
354 | 0 | } |
355 | | /* Not a close quote, just a literal quote. */ |
356 | 0 | i += codepoint_to_utf8(&cstr[i], '"'); |
357 | 0 | eol = false; |
358 | 0 | continue; /* Jump to the start of the loop without reading another char. */ |
359 | 0 | } else |
360 | 0 | i += codepoint_to_utf8(&cstr[i], c); |
361 | 0 | } |
362 | 152k | else |
363 | 152k | i += codepoint_to_utf8(&cstr[i], c); |
364 | 152k | eol = is_eol(c); |
365 | 152k | c = get_codepoint(pal, pas); |
366 | 152k | } |
367 | 12.2k | cstr[i] = 0; |
368 | 12.2k | } |
369 | | |
370 | | /* At this point *argstr is full of utf8 encoded argument. */ |
371 | | /* If it's an @filename argument, then deal with it, and never return |
372 | | * it to the caller. */ |
373 | 12.2k | if (pal->expand_ats && **argstr == '@') { |
374 | 0 | char *fname; |
375 | 0 | gp_file *f; |
376 | 0 | if (pal->depth+1 == arg_depth_max) { |
377 | 0 | errprintf(errmem, "Too much nesting of @-files.\n"); |
378 | 0 | return_error(gs_error_Fatal); |
379 | 0 | } |
380 | 0 | fname = (char *)*argstr + 1; /* skip @ */ |
381 | |
|
382 | 0 | if (gs_add_control_path(pal->memory, gs_permit_file_reading, fname) < 0) |
383 | 0 | return_error(gs_error_Fatal); |
384 | | |
385 | 0 | f = (*pal->arg_fopen) (fname, pal->fopen_data); |
386 | 0 | DISCARD(gs_remove_control_path(pal->memory, gs_permit_file_reading, fname)); |
387 | 0 | if (f == NULL) { |
388 | 0 | errprintf(errmem, "Unable to open command line file %s\n", *argstr); |
389 | 0 | return_error(gs_error_Fatal); |
390 | 0 | } |
391 | 0 | pas = &pal->sources[++pal->depth]; |
392 | 0 | pas->is_file = true; |
393 | 0 | pas->u.file = f; |
394 | 0 | *argstr = NULL; /* Empty the argument string so we don't return it. */ |
395 | 0 | continue; /* Loop back to parse the first arg from the file. */ |
396 | 0 | } |
397 | 12.2k | } while (*argstr == NULL || **argstr == 0); /* Until we get a non-empty arg */ |
398 | | |
399 | 12.2k | return 1; |
400 | 12.9k | } |
401 | | |
402 | | /* Copy an argument string to the heap. */ |
403 | | char * |
404 | | arg_copy(const char *str, gs_memory_t * mem) |
405 | 10.2k | { |
406 | 10.2k | char *sstr = (char *)gs_alloc_bytes(mem, strlen(str) + 1, "arg_copy"); |
407 | | |
408 | 10.2k | if (sstr == 0) { |
409 | 0 | lprintf("Out of memory!\n"); |
410 | 0 | return NULL; |
411 | 0 | } |
412 | 10.2k | strcpy(sstr, str); |
413 | 10.2k | return sstr; |
414 | 10.2k | } |
415 | | |
416 | | /* Free a previously arg_copy'd string */ |
417 | | void |
418 | | arg_free(char *str, gs_memory_t * mem) |
419 | 10.2k | { |
420 | 10.2k | gs_free_object(mem, str, "arg_copy"); |
421 | 10.2k | } |
422 | | |
423 | | int arg_strcmp(arg_list *pal, const char *arg, const char *match) |
424 | 46.4k | { |
425 | 46.4k | int rune, c; |
426 | | |
427 | 46.4k | if (!arg || !match) |
428 | 0 | return 1; |
429 | 92.8k | do { |
430 | 92.8k | rune = pal->get_codepoint(NULL, &arg); |
431 | 92.8k | if (rune == -1) |
432 | 0 | rune = 0; |
433 | 92.8k | c = *match++; |
434 | 92.8k | if (rune != c) |
435 | 46.4k | return rune - c; |
436 | 92.8k | } while (rune && c); |
437 | 0 | return 0; |
438 | 46.4k | } |