Line | Count | Source (jump to first uncovered line) |
1 | | /* quotearg.c - quote arguments for output |
2 | | |
3 | | Copyright (C) 1998-2002, 2004-2024 Free Software Foundation, Inc. |
4 | | |
5 | | This program is free software: you can redistribute it and/or modify |
6 | | it under the terms of the GNU General Public License as published by |
7 | | the Free Software Foundation, either version 3 of the License, or |
8 | | (at your option) any later version. |
9 | | |
10 | | This program is distributed in the hope that it will be useful, |
11 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | | GNU General Public License for more details. |
14 | | |
15 | | You should have received a copy of the GNU General Public License |
16 | | along with this program. If not, see <https://www.gnu.org/licenses/>. */ |
17 | | |
18 | | /* Written by Paul Eggert <eggert@twinsun.com> */ |
19 | | |
20 | | /* Without this pragma, gcc 4.7.0 20111124 mistakenly suggests that |
21 | | the quoting_options_from_style function might be candidate for |
22 | | attribute 'pure' */ |
23 | | #if (__GNUC__ == 4 && 6 <= __GNUC_MINOR__) || 4 < __GNUC__ |
24 | | # pragma GCC diagnostic ignored "-Wsuggest-attribute=pure" |
25 | | #endif |
26 | | |
27 | | #include <config.h> |
28 | | |
29 | | #include "quotearg.h" |
30 | | #include "quote.h" |
31 | | |
32 | | #include "attribute.h" |
33 | | #include "minmax.h" |
34 | | #include "xalloc.h" |
35 | | #include "c-strcaseeq.h" |
36 | | #include "localcharset.h" |
37 | | |
38 | | #include <ctype.h> |
39 | | #include <errno.h> |
40 | | #include <limits.h> |
41 | | #include <stdint.h> |
42 | | #include <stdlib.h> |
43 | | #include <string.h> |
44 | | #include <uchar.h> |
45 | | #include <wchar.h> |
46 | | |
47 | | #include "gettext.h" |
48 | 7.81k | #define _(msgid) gettext (msgid) |
49 | 7.81k | #define N_(msgid) msgid |
50 | | |
51 | | #ifndef SIZE_MAX |
52 | | # define SIZE_MAX ((size_t) -1) |
53 | | #endif |
54 | | |
55 | 292k | #define INT_BITS (sizeof (int) * CHAR_BIT) |
56 | | |
57 | | struct quoting_options |
58 | | { |
59 | | /* Basic quoting style. */ |
60 | | enum quoting_style style; |
61 | | |
62 | | /* Additional flags. Bitwise combination of enum quoting_flags. */ |
63 | | int flags; |
64 | | |
65 | | /* Quote the characters indicated by this bit vector even if the |
66 | | quoting style would not normally require them to be quoted. */ |
67 | | unsigned int quote_these_too[(UCHAR_MAX / INT_BITS) + 1]; |
68 | | |
69 | | /* The left quote for custom_quoting_style. */ |
70 | | char const *left_quote; |
71 | | |
72 | | /* The right quote for custom_quoting_style. */ |
73 | | char const *right_quote; |
74 | | }; |
75 | | |
76 | | /* Names of quoting styles. */ |
77 | | char const *const quoting_style_args[] = |
78 | | { |
79 | | "literal", |
80 | | "shell", |
81 | | "shell-always", |
82 | | "shell-escape", |
83 | | "shell-escape-always", |
84 | | "c", |
85 | | "c-maybe", |
86 | | "escape", |
87 | | "locale", |
88 | | "clocale", |
89 | | 0 |
90 | | }; |
91 | | |
92 | | /* Correspondences to quoting style names. */ |
93 | | enum quoting_style const quoting_style_vals[] = |
94 | | { |
95 | | literal_quoting_style, |
96 | | shell_quoting_style, |
97 | | shell_always_quoting_style, |
98 | | shell_escape_quoting_style, |
99 | | shell_escape_always_quoting_style, |
100 | | c_quoting_style, |
101 | | c_maybe_quoting_style, |
102 | | escape_quoting_style, |
103 | | locale_quoting_style, |
104 | | clocale_quoting_style |
105 | | }; |
106 | | |
107 | | /* The default quoting options. */ |
108 | | static struct quoting_options default_quoting_options; |
109 | | |
110 | | /* Allocate a new set of quoting options, with contents initially identical |
111 | | to O if O is not null, or to the default if O is null. |
112 | | It is the caller's responsibility to free the result. */ |
113 | | struct quoting_options * |
114 | | clone_quoting_options (struct quoting_options *o) |
115 | 0 | { |
116 | 0 | int e = errno; |
117 | 0 | struct quoting_options *p = xmemdup (o ? o : &default_quoting_options, |
118 | 0 | sizeof *o); |
119 | 0 | errno = e; |
120 | 0 | return p; |
121 | 0 | } |
122 | | |
123 | | /* Get the value of O's quoting style. If O is null, use the default. */ |
124 | | enum quoting_style |
125 | | get_quoting_style (struct quoting_options const *o) |
126 | 0 | { |
127 | 0 | return (o ? o : &default_quoting_options)->style; |
128 | 0 | } |
129 | | |
130 | | /* In O (or in the default if O is null), |
131 | | set the value of the quoting style to S. */ |
132 | | void |
133 | | set_quoting_style (struct quoting_options *o, enum quoting_style s) |
134 | 0 | { |
135 | 0 | (o ? o : &default_quoting_options)->style = s; |
136 | 0 | } |
137 | | |
138 | | /* In O (or in the default if O is null), |
139 | | set the value of the quoting options for character C to I. |
140 | | Return the old value. Currently, the only values defined for I are |
141 | | 0 (the default) and 1 (which means to quote the character even if |
142 | | it would not otherwise be quoted). */ |
143 | | int |
144 | | set_char_quoting (struct quoting_options *o, char c, int i) |
145 | 0 | { |
146 | 0 | unsigned char uc = c; |
147 | 0 | unsigned int *p = |
148 | 0 | (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS; |
149 | 0 | int shift = uc % INT_BITS; |
150 | 0 | int r = (*p >> shift) & 1; |
151 | 0 | *p ^= ((i & 1) ^ r) << shift; |
152 | 0 | return r; |
153 | 0 | } |
154 | | |
155 | | /* In O (or in the default if O is null), |
156 | | set the value of the quoting options flag to I, which can be a |
157 | | bitwise combination of enum quoting_flags, or 0 for default |
158 | | behavior. Return the old value. */ |
159 | | int |
160 | | set_quoting_flags (struct quoting_options *o, int i) |
161 | 0 | { |
162 | 0 | int r; |
163 | 0 | if (!o) |
164 | 0 | o = &default_quoting_options; |
165 | 0 | r = o->flags; |
166 | 0 | o->flags = i; |
167 | 0 | return r; |
168 | 0 | } |
169 | | |
170 | | void |
171 | | set_custom_quoting (struct quoting_options *o, |
172 | | char const *left_quote, char const *right_quote) |
173 | 0 | { |
174 | 0 | if (!o) |
175 | 0 | o = &default_quoting_options; |
176 | 0 | o->style = custom_quoting_style; |
177 | 0 | if (!left_quote || !right_quote) |
178 | 0 | abort (); |
179 | 0 | o->left_quote = left_quote; |
180 | 0 | o->right_quote = right_quote; |
181 | 0 | } |
182 | | |
183 | | /* Return quoting options for STYLE, with no extra quoting. */ |
184 | | static struct quoting_options /* NOT PURE!! */ |
185 | | quoting_options_from_style (enum quoting_style style) |
186 | 3.50k | { |
187 | 3.50k | struct quoting_options o = { literal_quoting_style, 0, { 0 }, NULL, NULL }; |
188 | 3.50k | if (style == custom_quoting_style) |
189 | 0 | abort (); |
190 | 3.50k | o.style = style; |
191 | 3.50k | return o; |
192 | 3.50k | } |
193 | | |
194 | | /* MSGID approximates a quotation mark. Return its translation if it |
195 | | has one; otherwise, return either it or "\"", depending on S. |
196 | | |
197 | | S is either clocale_quoting_style or locale_quoting_style. */ |
198 | | static char const * |
199 | | gettext_quote (char const *msgid, enum quoting_style s) |
200 | 7.81k | { |
201 | 7.81k | char const *translation = _(msgid); |
202 | 7.81k | char const *locale_code; |
203 | | |
204 | 7.81k | if (translation != msgid) |
205 | 0 | return translation; |
206 | | |
207 | | /* For UTF-8 and GB-18030, use single quotes U+2018 and U+2019. |
208 | | Here is a list of other locales that include U+2018 and U+2019: |
209 | | |
210 | | ISO-8859-7 0xA1 KOI8-T 0x91 |
211 | | CP869 0x8B CP874 0x91 |
212 | | CP932 0x81 0x65 CP936 0xA1 0xAE |
213 | | CP949 0xA1 0xAE CP950 0xA1 0xA5 |
214 | | CP1250 0x91 CP1251 0x91 |
215 | | CP1252 0x91 CP1253 0x91 |
216 | | CP1254 0x91 CP1255 0x91 |
217 | | CP1256 0x91 CP1257 0x91 |
218 | | EUC-JP 0xA1 0xC6 EUC-KR 0xA1 0xAE |
219 | | EUC-TW 0xA1 0xE4 BIG5 0xA1 0xA5 |
220 | | BIG5-HKSCS 0xA1 0xA5 EUC-CN 0xA1 0xAE |
221 | | GBK 0xA1 0xAE Georgian-PS 0x91 |
222 | | PT154 0x91 |
223 | | |
224 | | None of these is still in wide use; using iconv is overkill. */ |
225 | 7.81k | locale_code = locale_charset (); |
226 | 7.81k | if (STRCASEEQ (locale_code, "UTF-8", 'U','T','F','-','8',0,0,0,0)) |
227 | 0 | return msgid[0] == '`' ? "\xe2\x80\x98": "\xe2\x80\x99"; |
228 | 7.81k | if (STRCASEEQ (locale_code, "GB18030", 'G','B','1','8','0','3','0',0,0)) |
229 | 0 | return msgid[0] == '`' ? "\xa1\ae": "\xa1\xaf"; |
230 | | |
231 | 7.81k | return (s == clocale_quoting_style ? "\"" : "'"); |
232 | 7.81k | } |
233 | | |
234 | | /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of |
235 | | argument ARG (of size ARGSIZE), using QUOTING_STYLE, FLAGS, and |
236 | | QUOTE_THESE_TOO to control quoting. |
237 | | Terminate the output with a null character, and return the written |
238 | | size of the output, not counting the terminating null. |
239 | | If BUFFERSIZE is too small to store the output string, return the |
240 | | value that would have been returned had BUFFERSIZE been large enough. |
241 | | If ARGSIZE is SIZE_MAX, use the string length of the argument for ARGSIZE. |
242 | | |
243 | | This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG, |
244 | | ARGSIZE, O), except it breaks O into its component pieces and is |
245 | | not careful about errno. */ |
246 | | |
247 | | static size_t |
248 | | quotearg_buffer_restyled (char *buffer, size_t buffersize, |
249 | | char const *arg, size_t argsize, |
250 | | enum quoting_style quoting_style, int flags, |
251 | | unsigned int const *quote_these_too, |
252 | | char const *left_quote, |
253 | | char const *right_quote) |
254 | 7.45k | { |
255 | 7.45k | size_t i; |
256 | 7.45k | size_t len = 0; |
257 | 7.45k | size_t orig_buffersize = 0; |
258 | 7.45k | char const *quote_string = 0; |
259 | 7.45k | size_t quote_string_len = 0; |
260 | 7.45k | bool backslash_escapes = false; |
261 | 7.45k | bool unibyte_locale = MB_CUR_MAX == 1; |
262 | 7.45k | bool elide_outer_quotes = (flags & QA_ELIDE_OUTER_QUOTES) != 0; |
263 | 7.45k | bool encountered_single_quote = false; |
264 | 7.45k | bool all_c_and_shell_quote_compat = true; |
265 | | |
266 | 7.45k | #define STORE(c) \ |
267 | 772k | do \ |
268 | 772k | { \ |
269 | 772k | if (len < buffersize) \ |
270 | 772k | buffer[len] = (c); \ |
271 | 772k | len++; \ |
272 | 772k | } \ |
273 | 772k | while (0) |
274 | | |
275 | 7.45k | #define START_ESC() \ |
276 | 162k | do \ |
277 | 162k | { \ |
278 | 162k | if (elide_outer_quotes) \ |
279 | 162k | goto force_outer_quoting_style; \ |
280 | 162k | escaping = true; \ |
281 | 162k | if (quoting_style == shell_always_quoting_style \ |
282 | 162k | && ! pending_shell_escape_end) \ |
283 | 162k | { \ |
284 | 0 | STORE ('\''); \ |
285 | 0 | STORE ('$'); \ |
286 | 0 | STORE ('\''); \ |
287 | 0 | pending_shell_escape_end = true; \ |
288 | 0 | } \ |
289 | 162k | STORE ('\\'); \ |
290 | 162k | } \ |
291 | 162k | while (0) |
292 | | |
293 | 7.45k | #define END_ESC() \ |
294 | 307k | do \ |
295 | 307k | { \ |
296 | 307k | if (pending_shell_escape_end && ! escaping) \ |
297 | 307k | { \ |
298 | 0 | STORE ('\''); \ |
299 | 0 | STORE ('\''); \ |
300 | 0 | pending_shell_escape_end = false; \ |
301 | 0 | } \ |
302 | 307k | } \ |
303 | 307k | while (0) |
304 | | |
305 | 7.45k | process_input: ; |
306 | 7.45k | bool pending_shell_escape_end = false; |
307 | | |
308 | 7.45k | switch (quoting_style) |
309 | 7.45k | { |
310 | 0 | case c_maybe_quoting_style: |
311 | 0 | quoting_style = c_quoting_style; |
312 | 0 | elide_outer_quotes = true; |
313 | 0 | FALLTHROUGH; |
314 | 0 | case c_quoting_style: |
315 | 0 | if (!elide_outer_quotes) |
316 | 0 | STORE ('"'); |
317 | 0 | backslash_escapes = true; |
318 | 0 | quote_string = "\""; |
319 | 0 | quote_string_len = 1; |
320 | 0 | break; |
321 | | |
322 | 3.54k | case escape_quoting_style: |
323 | 3.54k | backslash_escapes = true; |
324 | 3.54k | elide_outer_quotes = false; |
325 | 3.54k | break; |
326 | | |
327 | 3.90k | case locale_quoting_style: |
328 | 3.90k | case clocale_quoting_style: |
329 | 3.90k | case custom_quoting_style: |
330 | 3.90k | { |
331 | 3.90k | if (quoting_style != custom_quoting_style) |
332 | 3.90k | { |
333 | | /* TRANSLATORS: |
334 | | Get translations for open and closing quotation marks. |
335 | | The message catalog should translate "`" to a left |
336 | | quotation mark suitable for the locale, and similarly for |
337 | | "'". For example, a French Unicode local should translate |
338 | | these to U+00AB (LEFT-POINTING DOUBLE ANGLE |
339 | | QUOTATION MARK), and U+00BB (RIGHT-POINTING DOUBLE ANGLE |
340 | | QUOTATION MARK), respectively. |
341 | | |
342 | | If the catalog has no translation, we will try to |
343 | | use Unicode U+2018 (LEFT SINGLE QUOTATION MARK) and |
344 | | Unicode U+2019 (RIGHT SINGLE QUOTATION MARK). If the |
345 | | current locale is not Unicode, locale_quoting_style |
346 | | will quote 'like this', and clocale_quoting_style will |
347 | | quote "like this". You should always include translations |
348 | | for "`" and "'" even if U+2018 and U+2019 are appropriate |
349 | | for your locale. |
350 | | |
351 | | If you don't know what to put here, please see |
352 | | <https://en.wikipedia.org/wiki/Quotation_marks_in_other_languages> |
353 | | and use glyphs suitable for your language. */ |
354 | 3.90k | left_quote = gettext_quote (N_("`"), quoting_style); |
355 | 3.90k | right_quote = gettext_quote (N_("'"), quoting_style); |
356 | 3.90k | } |
357 | 3.90k | if (!elide_outer_quotes) |
358 | 7.81k | for (quote_string = left_quote; *quote_string; quote_string++) |
359 | 3.90k | STORE (*quote_string); |
360 | 3.90k | backslash_escapes = true; |
361 | 3.90k | quote_string = right_quote; |
362 | 3.90k | quote_string_len = strlen (quote_string); |
363 | 3.90k | } |
364 | 3.90k | break; |
365 | | |
366 | 0 | case shell_escape_quoting_style: |
367 | 0 | backslash_escapes = true; |
368 | 0 | FALLTHROUGH; |
369 | 0 | case shell_quoting_style: |
370 | 0 | elide_outer_quotes = true; |
371 | 0 | FALLTHROUGH; |
372 | 0 | case shell_escape_always_quoting_style: |
373 | 0 | if (!elide_outer_quotes) |
374 | 0 | backslash_escapes = true; |
375 | 0 | FALLTHROUGH; |
376 | 0 | case shell_always_quoting_style: |
377 | 0 | quoting_style = shell_always_quoting_style; |
378 | 0 | if (!elide_outer_quotes) |
379 | 0 | STORE ('\''); |
380 | 0 | quote_string = "'"; |
381 | 0 | quote_string_len = 1; |
382 | 0 | break; |
383 | | |
384 | 0 | case literal_quoting_style: |
385 | 0 | elide_outer_quotes = false; |
386 | 0 | break; |
387 | | |
388 | 0 | default: |
389 | 0 | abort (); |
390 | 7.45k | } |
391 | | |
392 | 315k | for (i = 0; ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize); i++) |
393 | 307k | { |
394 | 307k | unsigned char c; |
395 | 307k | unsigned char esc; |
396 | 307k | bool is_right_quote = false; |
397 | 307k | bool escaping = false; |
398 | 307k | bool c_and_shell_quote_compat = false; |
399 | | |
400 | 307k | if (backslash_escapes |
401 | 307k | && quoting_style != shell_always_quoting_style |
402 | 307k | && quote_string_len |
403 | 307k | && (i + quote_string_len |
404 | 119k | <= (argsize == SIZE_MAX && 1 < quote_string_len |
405 | | /* Use strlen only if we must: when argsize is SIZE_MAX, |
406 | | and when the quote string is more than 1 byte long. |
407 | | If we do call strlen, save the result. */ |
408 | 119k | ? (argsize = strlen (arg)) : argsize)) |
409 | 307k | && memcmp (arg + i, quote_string, quote_string_len) == 0) |
410 | 1.15k | { |
411 | 1.15k | if (elide_outer_quotes) |
412 | 0 | goto force_outer_quoting_style; |
413 | 1.15k | is_right_quote = true; |
414 | 1.15k | } |
415 | | |
416 | 307k | c = arg[i]; |
417 | 307k | switch (c) |
418 | 307k | { |
419 | 0 | case '\0': |
420 | 0 | if (backslash_escapes) |
421 | 0 | { |
422 | 0 | START_ESC (); |
423 | | /* If quote_string were to begin with digits, we'd need to |
424 | | test for the end of the arg as well. However, it's |
425 | | hard to imagine any locale that would use digits in |
426 | | quotes, and set_custom_quoting is documented not to |
427 | | accept them. Use only a single \0 with shell-escape |
428 | | as currently digits are not printed within $'...' */ |
429 | 0 | if (quoting_style != shell_always_quoting_style |
430 | 0 | && i + 1 < argsize && '0' <= arg[i + 1] && arg[i + 1] <= '9') |
431 | 0 | { |
432 | 0 | STORE ('0'); |
433 | 0 | STORE ('0'); |
434 | 0 | } |
435 | 0 | c = '0'; |
436 | | /* We don't have to worry that this last '0' will be |
437 | | backslash-escaped because, again, quote_string should |
438 | | not start with it and because quote_these_too is |
439 | | documented as not accepting it. */ |
440 | 0 | } |
441 | 0 | else if (flags & QA_ELIDE_NULL_BYTES) |
442 | 0 | continue; |
443 | 0 | break; |
444 | | |
445 | 1.17k | case '?': |
446 | 1.17k | switch (quoting_style) |
447 | 1.17k | { |
448 | 0 | case shell_always_quoting_style: |
449 | 0 | if (elide_outer_quotes) |
450 | 0 | goto force_outer_quoting_style; |
451 | 0 | break; |
452 | | |
453 | 0 | case c_quoting_style: |
454 | 0 | if ((flags & QA_SPLIT_TRIGRAPHS) |
455 | 0 | && i + 2 < argsize && arg[i + 1] == '?') |
456 | 0 | switch (arg[i + 2]) |
457 | 0 | { |
458 | 0 | case '!': case '\'': |
459 | 0 | case '(': case ')': case '-': case '/': |
460 | 0 | case '<': case '=': case '>': |
461 | | /* Escape the second '?' in what would otherwise be |
462 | | a trigraph. */ |
463 | 0 | if (elide_outer_quotes) |
464 | 0 | goto force_outer_quoting_style; |
465 | 0 | c = arg[i + 2]; |
466 | 0 | i += 2; |
467 | 0 | STORE ('?'); |
468 | 0 | STORE ('"'); |
469 | 0 | STORE ('"'); |
470 | 0 | STORE ('?'); |
471 | 0 | break; |
472 | | |
473 | 0 | default: |
474 | 0 | break; |
475 | 0 | } |
476 | 0 | break; |
477 | | |
478 | 1.17k | default: |
479 | 1.17k | break; |
480 | 1.17k | } |
481 | 1.17k | break; |
482 | | |
483 | 1.73k | case '\a': esc = 'a'; goto c_escape; |
484 | 2.33k | case '\b': esc = 'b'; goto c_escape; |
485 | 1.55k | case '\f': esc = 'f'; goto c_escape; |
486 | 2.22k | case '\n': esc = 'n'; goto c_and_shell_escape; |
487 | 1.96k | case '\r': esc = 'r'; goto c_and_shell_escape; |
488 | 1.99k | case '\t': esc = 't'; goto c_and_shell_escape; |
489 | 1.48k | case '\v': esc = 'v'; goto c_escape; |
490 | 1.47k | case '\\': esc = c; |
491 | | /* Never need to escape '\' in shell case. */ |
492 | 1.47k | if (quoting_style == shell_always_quoting_style) |
493 | 0 | { |
494 | 0 | if (elide_outer_quotes) |
495 | 0 | goto force_outer_quoting_style; |
496 | 0 | goto store_c; |
497 | 0 | } |
498 | | |
499 | | /* No need to escape the escape if we are trying to elide |
500 | | outer quotes and nothing else is problematic. */ |
501 | 1.47k | if (backslash_escapes && elide_outer_quotes && quote_string_len) |
502 | 0 | goto store_c; |
503 | | |
504 | 7.65k | c_and_shell_escape: |
505 | 7.65k | if (quoting_style == shell_always_quoting_style |
506 | 7.65k | && elide_outer_quotes) |
507 | 0 | goto force_outer_quoting_style; |
508 | | /* fall through */ |
509 | 14.7k | c_escape: |
510 | 14.7k | if (backslash_escapes) |
511 | 14.7k | { |
512 | 14.7k | c = esc; |
513 | 14.7k | goto store_escape; |
514 | 14.7k | } |
515 | 0 | break; |
516 | | |
517 | 2.37k | case '{': case '}': /* sometimes special if isolated */ |
518 | 2.37k | if (! (argsize == SIZE_MAX ? arg[1] == '\0' : argsize == 1)) |
519 | 2.36k | break; |
520 | 2.37k | FALLTHROUGH; |
521 | 5.35k | case '#': case '~': |
522 | 5.35k | if (i != 0) |
523 | 5.20k | break; |
524 | 5.35k | FALLTHROUGH; |
525 | 1.58k | case ' ': |
526 | 1.58k | c_and_shell_quote_compat = true; |
527 | 1.58k | FALLTHROUGH; |
528 | 2.78k | case '!': /* special in bash */ |
529 | 6.46k | case '"': case '$': case '&': |
530 | 13.7k | case '(': case ')': case '*': case ';': |
531 | 14.8k | case '<': |
532 | 17.7k | case '=': /* sometimes special in 0th or (with "set -k") later args */ |
533 | 20.1k | case '>': case '[': |
534 | 21.2k | case '^': /* special in old /bin/sh, e.g., Solaris 10 */ |
535 | 23.6k | case '`': case '|': |
536 | | /* A shell special character. */ |
537 | 23.6k | if (quoting_style == shell_always_quoting_style |
538 | 23.6k | && elide_outer_quotes) |
539 | 0 | goto force_outer_quoting_style; |
540 | 23.6k | break; |
541 | | |
542 | 23.6k | case '\'': |
543 | 2.18k | encountered_single_quote = true; |
544 | 2.18k | c_and_shell_quote_compat = true; |
545 | 2.18k | if (quoting_style == shell_always_quoting_style) |
546 | 0 | { |
547 | 0 | if (elide_outer_quotes) |
548 | 0 | goto force_outer_quoting_style; |
549 | | |
550 | 0 | if (buffersize && ! orig_buffersize) |
551 | 0 | { |
552 | | /* Just scan string to see if supports a more concise |
553 | | representation, rather than writing a longer string |
554 | | but returning the length of the more concise form. */ |
555 | 0 | orig_buffersize = buffersize; |
556 | 0 | buffersize = 0; |
557 | 0 | } |
558 | |
|
559 | 0 | STORE ('\''); |
560 | 0 | STORE ('\\'); |
561 | 0 | STORE ('\''); |
562 | 0 | pending_shell_escape_end = false; |
563 | 0 | } |
564 | 2.18k | break; |
565 | | |
566 | 7.84k | case '%': case '+': case ',': case '-': case '.': case '/': |
567 | 15.1k | case '0': case '1': case '2': case '3': case '4': case '5': |
568 | 21.5k | case '6': case '7': case '8': case '9': case ':': |
569 | 28.7k | case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': |
570 | 36.1k | case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': |
571 | 43.8k | case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': |
572 | 51.2k | case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': |
573 | 61.0k | case 'Y': case 'Z': case ']': case '_': case 'a': case 'b': |
574 | 72.7k | case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': |
575 | 84.6k | case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': |
576 | 101k | case 'o': case 'p': case 'q': case 'r': case 's': case 't': |
577 | 110k | case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': |
578 | | /* These characters don't cause problems, no matter what the |
579 | | quoting style is. They cannot start multibyte sequences. |
580 | | A digit or a special letter would cause trouble if it |
581 | | appeared at the beginning of quote_string because we'd then |
582 | | escape by prepending a backslash. However, it's hard to |
583 | | imagine any locale that would use digits or letters as |
584 | | quotes, and set_custom_quoting is documented not to accept |
585 | | them. Also, a digit or a special letter would cause |
586 | | trouble if it appeared in quote_these_too, but that's also |
587 | | documented as not accepting them. */ |
588 | 110k | c_and_shell_quote_compat = true; |
589 | 110k | break; |
590 | | |
591 | 148k | default: |
592 | | /* If we have a multibyte sequence, copy it until we reach |
593 | | its end, find an error, or come back to the initial shift |
594 | | state. For C-like styles, if the sequence has |
595 | | unprintable characters, escape the whole sequence, since |
596 | | we can't easily escape single characters within it. */ |
597 | 148k | { |
598 | | /* Length of multibyte sequence found so far. */ |
599 | 148k | size_t m; |
600 | | |
601 | 148k | bool printable; |
602 | | |
603 | 148k | if (unibyte_locale) |
604 | 148k | { |
605 | 148k | m = 1; |
606 | 148k | printable = isprint (c) != 0; |
607 | 148k | } |
608 | 0 | else |
609 | 0 | { |
610 | 0 | mbstate_t mbstate; |
611 | 0 | mbszero (&mbstate); |
612 | |
|
613 | 0 | m = 0; |
614 | 0 | printable = true; |
615 | 0 | if (argsize == SIZE_MAX) |
616 | 0 | argsize = strlen (arg); |
617 | |
|
618 | 0 | for (;;) |
619 | 0 | { |
620 | 0 | char32_t w; |
621 | 0 | size_t bytes = mbrtoc32 (&w, &arg[i + m], |
622 | 0 | argsize - (i + m), &mbstate); |
623 | 0 | if (bytes == 0) |
624 | 0 | break; |
625 | 0 | else if (bytes == (size_t) -1) |
626 | 0 | { |
627 | 0 | printable = false; |
628 | 0 | break; |
629 | 0 | } |
630 | 0 | else if (bytes == (size_t) -2) |
631 | 0 | { |
632 | 0 | printable = false; |
633 | 0 | while (i + m < argsize && arg[i + m]) |
634 | 0 | m++; |
635 | 0 | break; |
636 | 0 | } |
637 | 0 | else |
638 | 0 | { |
639 | 0 | #if !GNULIB_MBRTOC32_REGULAR |
640 | 0 | if (bytes == (size_t) -3) |
641 | 0 | bytes = 0; |
642 | 0 | #endif |
643 | | /* Work around a bug with older shells that "see" a '\' |
644 | | that is really the 2nd byte of a multibyte character. |
645 | | In practice the problem is limited to ASCII |
646 | | chars >= '@' that are shell special chars. */ |
647 | 0 | if ('[' == 0x5b && elide_outer_quotes |
648 | 0 | && quoting_style == shell_always_quoting_style) |
649 | 0 | { |
650 | 0 | size_t j; |
651 | 0 | for (j = 1; j < bytes; j++) |
652 | 0 | switch (arg[i + m + j]) |
653 | 0 | { |
654 | 0 | case '[': case '\\': case '^': |
655 | 0 | case '`': case '|': |
656 | 0 | goto force_outer_quoting_style; |
657 | | |
658 | 0 | default: |
659 | 0 | break; |
660 | 0 | } |
661 | 0 | } |
662 | | |
663 | 0 | if (! c32isprint (w)) |
664 | 0 | printable = false; |
665 | 0 | m += bytes; |
666 | 0 | } |
667 | 0 | #if !GNULIB_MBRTOC32_REGULAR |
668 | 0 | if (mbsinit (&mbstate)) |
669 | 0 | #endif |
670 | 0 | break; |
671 | 0 | } |
672 | 0 | } |
673 | | |
674 | 148k | c_and_shell_quote_compat = printable; |
675 | | |
676 | 148k | if (1 < m || (backslash_escapes && ! printable)) |
677 | 147k | { |
678 | | /* Output a multibyte sequence, or an escaped |
679 | | unprintable unibyte character. */ |
680 | 147k | size_t ilim = i + m; |
681 | | |
682 | 147k | for (;;) |
683 | 147k | { |
684 | 147k | if (backslash_escapes && ! printable) |
685 | 147k | { |
686 | 147k | START_ESC (); |
687 | 147k | STORE ('0' + (c >> 6)); |
688 | 147k | STORE ('0' + ((c >> 3) & 7)); |
689 | 147k | c = '0' + (c & 7); |
690 | 147k | } |
691 | 0 | else if (is_right_quote) |
692 | 0 | { |
693 | 0 | STORE ('\\'); |
694 | 0 | is_right_quote = false; |
695 | 0 | } |
696 | 147k | if (ilim <= i + 1) |
697 | 147k | break; |
698 | 0 | END_ESC (); |
699 | 0 | STORE (c); |
700 | 0 | c = arg[++i]; |
701 | 0 | } |
702 | | |
703 | 147k | goto store_c; |
704 | 147k | } |
705 | 148k | } |
706 | 307k | } |
707 | | |
708 | 146k | if (! (((backslash_escapes && quoting_style != shell_always_quoting_style) |
709 | 146k | || elide_outer_quotes) |
710 | 146k | && quote_these_too |
711 | 146k | && quote_these_too[c / INT_BITS] >> (c % INT_BITS) & 1) |
712 | 146k | && !is_right_quote) |
713 | 144k | goto store_c; |
714 | | |
715 | 15.9k | store_escape: |
716 | 15.9k | START_ESC (); |
717 | | |
718 | 307k | store_c: |
719 | 307k | END_ESC (); |
720 | 307k | STORE (c); |
721 | | |
722 | 307k | if (! c_and_shell_quote_compat) |
723 | 192k | all_c_and_shell_quote_compat = false; |
724 | 307k | } |
725 | | |
726 | 7.45k | if (len == 0 && quoting_style == shell_always_quoting_style |
727 | 7.45k | && elide_outer_quotes) |
728 | 0 | goto force_outer_quoting_style; |
729 | | |
730 | | /* Single shell quotes (') are commonly enough used as an apostrophe, |
731 | | that we attempt to minimize the quoting in this case. Note itʼs |
732 | | better to use the apostrophe modifier "\u02BC" if possible, as that |
733 | | renders better and works with the word match regex \W+ etc. */ |
734 | 7.45k | if (quoting_style == shell_always_quoting_style && ! elide_outer_quotes |
735 | 7.45k | && encountered_single_quote) |
736 | 0 | { |
737 | 0 | if (all_c_and_shell_quote_compat) |
738 | 0 | return quotearg_buffer_restyled (buffer, orig_buffersize, arg, argsize, |
739 | 0 | c_quoting_style, |
740 | 0 | flags, quote_these_too, |
741 | 0 | left_quote, right_quote); |
742 | 0 | else if (! buffersize && orig_buffersize) |
743 | 0 | { |
744 | | /* Disable read-only scan, and reprocess to write quoted string. */ |
745 | 0 | buffersize = orig_buffersize; |
746 | 0 | len = 0; |
747 | 0 | goto process_input; |
748 | 0 | } |
749 | 0 | } |
750 | | |
751 | 7.45k | if (quote_string && !elide_outer_quotes) |
752 | 7.81k | for (; *quote_string; quote_string++) |
753 | 3.90k | STORE (*quote_string); |
754 | | |
755 | 7.45k | if (len < buffersize) |
756 | 7.36k | buffer[len] = '\0'; |
757 | 7.45k | return len; |
758 | | |
759 | 0 | force_outer_quoting_style: |
760 | | /* Don't reuse quote_these_too, since the addition of outer quotes |
761 | | sufficiently quotes the specified characters. */ |
762 | 0 | if (quoting_style == shell_always_quoting_style && backslash_escapes) |
763 | 0 | quoting_style = shell_escape_always_quoting_style; |
764 | 0 | return quotearg_buffer_restyled (buffer, buffersize, arg, argsize, |
765 | 0 | quoting_style, |
766 | 0 | flags & ~QA_ELIDE_OUTER_QUOTES, NULL, |
767 | 0 | left_quote, right_quote); |
768 | 7.45k | } |
769 | | |
770 | | /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of |
771 | | argument ARG (of size ARGSIZE), using O to control quoting. |
772 | | If O is null, use the default. |
773 | | Terminate the output with a null character, and return the written |
774 | | size of the output, not counting the terminating null. |
775 | | If BUFFERSIZE is too small to store the output string, return the |
776 | | value that would have been returned had BUFFERSIZE been large enough. |
777 | | If ARGSIZE is SIZE_MAX, use the string length of the argument for |
778 | | ARGSIZE. */ |
779 | | size_t |
780 | | quotearg_buffer (char *buffer, size_t buffersize, |
781 | | char const *arg, size_t argsize, |
782 | | struct quoting_options const *o) |
783 | 0 | { |
784 | 0 | struct quoting_options const *p = o ? o : &default_quoting_options; |
785 | 0 | int e = errno; |
786 | 0 | size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize, |
787 | 0 | p->style, p->flags, p->quote_these_too, |
788 | 0 | p->left_quote, p->right_quote); |
789 | 0 | errno = e; |
790 | 0 | return r; |
791 | 0 | } |
792 | | |
793 | | char * |
794 | | quotearg_alloc (char const *arg, size_t argsize, |
795 | | struct quoting_options const *o) |
796 | 0 | { |
797 | 0 | return quotearg_alloc_mem (arg, argsize, NULL, o); |
798 | 0 | } |
799 | | |
800 | | /* Like quotearg_buffer (..., ARG, ARGSIZE, O), except return newly |
801 | | allocated storage containing the quoted string, and store the |
802 | | resulting size into *SIZE, if non-NULL. The result can contain |
803 | | embedded null bytes only if ARGSIZE is not SIZE_MAX, SIZE is not |
804 | | NULL, and set_quoting_flags has not set the null byte elision |
805 | | flag. */ |
806 | | char * |
807 | | quotearg_alloc_mem (char const *arg, size_t argsize, size_t *size, |
808 | | struct quoting_options const *o) |
809 | 0 | { |
810 | 0 | struct quoting_options const *p = o ? o : &default_quoting_options; |
811 | 0 | int e = errno; |
812 | | /* Elide embedded null bytes if we can't return a size. */ |
813 | 0 | int flags = p->flags | (size ? 0 : QA_ELIDE_NULL_BYTES); |
814 | 0 | size_t bufsize = quotearg_buffer_restyled (0, 0, arg, argsize, p->style, |
815 | 0 | flags, p->quote_these_too, |
816 | 0 | p->left_quote, |
817 | 0 | p->right_quote) + 1; |
818 | 0 | char *buf = xcharalloc (bufsize); |
819 | 0 | quotearg_buffer_restyled (buf, bufsize, arg, argsize, p->style, flags, |
820 | 0 | p->quote_these_too, |
821 | 0 | p->left_quote, p->right_quote); |
822 | 0 | errno = e; |
823 | 0 | if (size) |
824 | 0 | *size = bufsize - 1; |
825 | 0 | return buf; |
826 | 0 | } |
827 | | |
828 | | /* A storage slot with size and pointer to a value. */ |
829 | | struct slotvec |
830 | | { |
831 | | size_t size; |
832 | | char *val; |
833 | | }; |
834 | | |
835 | | /* Preallocate a slot 0 buffer, so that the caller can always quote |
836 | | one small component of a "memory exhausted" message in slot 0. */ |
837 | | static char slot0[256]; |
838 | | static int nslots = 1; |
839 | | static struct slotvec slotvec0 = {sizeof slot0, slot0}; |
840 | | static struct slotvec *slotvec = &slotvec0; |
841 | | |
842 | | void |
843 | | quotearg_free (void) |
844 | 0 | { |
845 | 0 | struct slotvec *sv = slotvec; |
846 | 0 | int i; |
847 | 0 | for (i = 1; i < nslots; i++) |
848 | 0 | free (sv[i].val); |
849 | 0 | if (sv[0].val != slot0) |
850 | 0 | { |
851 | 0 | free (sv[0].val); |
852 | 0 | slotvec0.size = sizeof slot0; |
853 | 0 | slotvec0.val = slot0; |
854 | 0 | } |
855 | 0 | if (sv != &slotvec0) |
856 | 0 | { |
857 | 0 | free (sv); |
858 | 0 | slotvec = &slotvec0; |
859 | 0 | } |
860 | 0 | nslots = 1; |
861 | 0 | } |
862 | | |
863 | | /* Use storage slot N to return a quoted version of argument ARG. |
864 | | ARG is of size ARGSIZE, but if that is SIZE_MAX, ARG is a |
865 | | null-terminated string. |
866 | | OPTIONS specifies the quoting options. |
867 | | The returned value points to static storage that can be |
868 | | reused by the next call to this function with the same value of N. |
869 | | N must be nonnegative; it is typically small, and must be |
870 | | less than MIN (INT_MAX, IDX_MAX). The type of N is signed |
871 | | to allow for future extensions (using negative values). */ |
872 | | static char * |
873 | | quotearg_n_options (int n, char const *arg, size_t argsize, |
874 | | struct quoting_options const *options) |
875 | 7.36k | { |
876 | 7.36k | int e = errno; |
877 | | |
878 | 7.36k | struct slotvec *sv = slotvec; |
879 | | |
880 | 7.36k | int nslots_max = MIN (INT_MAX, IDX_MAX); |
881 | 7.36k | if (! (0 <= n && n < nslots_max)) |
882 | 0 | abort (); |
883 | | |
884 | 7.36k | if (nslots <= n) |
885 | 0 | { |
886 | 0 | bool preallocated = (sv == &slotvec0); |
887 | 0 | idx_t new_nslots = nslots; |
888 | |
|
889 | 0 | slotvec = sv = xpalloc (preallocated ? NULL : sv, &new_nslots, |
890 | 0 | n - nslots + 1, nslots_max, sizeof *sv); |
891 | 0 | if (preallocated) |
892 | 0 | *sv = slotvec0; |
893 | 0 | memset (sv + nslots, 0, (new_nslots - nslots) * sizeof *sv); |
894 | 0 | nslots = new_nslots; |
895 | 0 | } |
896 | | |
897 | 7.36k | { |
898 | 7.36k | size_t size = sv[n].size; |
899 | 7.36k | char *val = sv[n].val; |
900 | | /* Elide embedded null bytes since we don't return a size. */ |
901 | 7.36k | int flags = options->flags | QA_ELIDE_NULL_BYTES; |
902 | 7.36k | size_t qsize = quotearg_buffer_restyled (val, size, arg, argsize, |
903 | 7.36k | options->style, flags, |
904 | 7.36k | options->quote_these_too, |
905 | 7.36k | options->left_quote, |
906 | 7.36k | options->right_quote); |
907 | | |
908 | 7.36k | if (size <= qsize) |
909 | 96 | { |
910 | 96 | sv[n].size = size = qsize + 1; |
911 | 96 | if (val != slot0) |
912 | 94 | free (val); |
913 | 96 | sv[n].val = val = xcharalloc (size); |
914 | 96 | quotearg_buffer_restyled (val, size, arg, argsize, options->style, |
915 | 96 | flags, options->quote_these_too, |
916 | 96 | options->left_quote, |
917 | 96 | options->right_quote); |
918 | 96 | } |
919 | | |
920 | 7.36k | errno = e; |
921 | 7.36k | return val; |
922 | 7.36k | } |
923 | 7.36k | } |
924 | | |
925 | | char * |
926 | | quotearg_n (int n, char const *arg) |
927 | 0 | { |
928 | 0 | return quotearg_n_options (n, arg, SIZE_MAX, &default_quoting_options); |
929 | 0 | } |
930 | | |
931 | | char * |
932 | | quotearg_n_mem (int n, char const *arg, size_t argsize) |
933 | 0 | { |
934 | 0 | return quotearg_n_options (n, arg, argsize, &default_quoting_options); |
935 | 0 | } |
936 | | |
937 | | char * |
938 | | quotearg (char const *arg) |
939 | 0 | { |
940 | 0 | return quotearg_n (0, arg); |
941 | 0 | } |
942 | | |
943 | | char * |
944 | | quotearg_mem (char const *arg, size_t argsize) |
945 | 0 | { |
946 | 0 | return quotearg_n_mem (0, arg, argsize); |
947 | 0 | } |
948 | | |
949 | | char * |
950 | | quotearg_n_style (int n, enum quoting_style s, char const *arg) |
951 | 3.50k | { |
952 | 3.50k | struct quoting_options const o = quoting_options_from_style (s); |
953 | 3.50k | return quotearg_n_options (n, arg, SIZE_MAX, &o); |
954 | 3.50k | } |
955 | | |
956 | | char * |
957 | | quotearg_n_style_mem (int n, enum quoting_style s, |
958 | | char const *arg, size_t argsize) |
959 | 0 | { |
960 | 0 | struct quoting_options const o = quoting_options_from_style (s); |
961 | 0 | return quotearg_n_options (n, arg, argsize, &o); |
962 | 0 | } |
963 | | |
964 | | char * |
965 | | quotearg_style (enum quoting_style s, char const *arg) |
966 | 3.50k | { |
967 | 3.50k | return quotearg_n_style (0, s, arg); |
968 | 3.50k | } |
969 | | |
970 | | char * |
971 | | quotearg_style_mem (enum quoting_style s, char const *arg, size_t argsize) |
972 | 0 | { |
973 | 0 | return quotearg_n_style_mem (0, s, arg, argsize); |
974 | 0 | } |
975 | | |
976 | | char * |
977 | | quotearg_char_mem (char const *arg, size_t argsize, char ch) |
978 | 0 | { |
979 | 0 | struct quoting_options options; |
980 | 0 | options = default_quoting_options; |
981 | 0 | set_char_quoting (&options, ch, 1); |
982 | 0 | return quotearg_n_options (0, arg, argsize, &options); |
983 | 0 | } |
984 | | |
985 | | char * |
986 | | quotearg_char (char const *arg, char ch) |
987 | 0 | { |
988 | 0 | return quotearg_char_mem (arg, SIZE_MAX, ch); |
989 | 0 | } |
990 | | |
991 | | char * |
992 | | quotearg_colon (char const *arg) |
993 | 0 | { |
994 | 0 | return quotearg_char (arg, ':'); |
995 | 0 | } |
996 | | |
997 | | char * |
998 | | quotearg_colon_mem (char const *arg, size_t argsize) |
999 | 0 | { |
1000 | 0 | return quotearg_char_mem (arg, argsize, ':'); |
1001 | 0 | } |
1002 | | |
1003 | | char * |
1004 | | quotearg_n_style_colon (int n, enum quoting_style s, char const *arg) |
1005 | 0 | { |
1006 | 0 | struct quoting_options options; |
1007 | 0 | options = quoting_options_from_style (s); |
1008 | 0 | set_char_quoting (&options, ':', 1); |
1009 | 0 | return quotearg_n_options (n, arg, SIZE_MAX, &options); |
1010 | 0 | } |
1011 | | |
1012 | | char * |
1013 | | quotearg_n_custom (int n, char const *left_quote, |
1014 | | char const *right_quote, char const *arg) |
1015 | 0 | { |
1016 | 0 | return quotearg_n_custom_mem (n, left_quote, right_quote, arg, |
1017 | 0 | SIZE_MAX); |
1018 | 0 | } |
1019 | | |
1020 | | char * |
1021 | | quotearg_n_custom_mem (int n, char const *left_quote, |
1022 | | char const *right_quote, |
1023 | | char const *arg, size_t argsize) |
1024 | 0 | { |
1025 | 0 | struct quoting_options o = default_quoting_options; |
1026 | 0 | set_custom_quoting (&o, left_quote, right_quote); |
1027 | 0 | return quotearg_n_options (n, arg, argsize, &o); |
1028 | 0 | } |
1029 | | |
1030 | | char * |
1031 | | quotearg_custom (char const *left_quote, char const *right_quote, |
1032 | | char const *arg) |
1033 | 0 | { |
1034 | 0 | return quotearg_n_custom (0, left_quote, right_quote, arg); |
1035 | 0 | } |
1036 | | |
1037 | | char * |
1038 | | quotearg_custom_mem (char const *left_quote, char const *right_quote, |
1039 | | char const *arg, size_t argsize) |
1040 | 0 | { |
1041 | 0 | return quotearg_n_custom_mem (0, left_quote, right_quote, arg, |
1042 | 0 | argsize); |
1043 | 0 | } |
1044 | | |
1045 | | |
1046 | | /* The quoting option used by the functions of quote.h. */ |
1047 | | struct quoting_options quote_quoting_options = |
1048 | | { |
1049 | | locale_quoting_style, |
1050 | | 0, |
1051 | | { 0 }, |
1052 | | NULL, NULL |
1053 | | }; |
1054 | | |
1055 | | char const * |
1056 | | quote_n_mem (int n, char const *arg, size_t argsize) |
1057 | 3.85k | { |
1058 | 3.85k | return quotearg_n_options (n, arg, argsize, "e_quoting_options); |
1059 | 3.85k | } |
1060 | | |
1061 | | char const * |
1062 | | quote_mem (char const *arg, size_t argsize) |
1063 | 0 | { |
1064 | 0 | return quote_n_mem (0, arg, argsize); |
1065 | 0 | } |
1066 | | |
1067 | | char const * |
1068 | | quote_n (int n, char const *arg) |
1069 | 3.85k | { |
1070 | 3.85k | return quote_n_mem (n, arg, SIZE_MAX); |
1071 | 3.85k | } |
1072 | | |
1073 | | char const * |
1074 | | quote (char const *arg) |
1075 | 3.85k | { |
1076 | 3.85k | return quote_n (0, arg); |
1077 | 3.85k | } |
1078 | | |
1079 | | /* |
1080 | | * Hey Emacs! |
1081 | | * Local Variables: |
1082 | | * coding: utf-8 |
1083 | | * End: |
1084 | | */ |