/src/systemd/src/basic/escape.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* SPDX-License-Identifier: LGPL-2.1+ */ |
2 | | |
3 | | #include <errno.h> |
4 | | #include <stdlib.h> |
5 | | #include <string.h> |
6 | | |
7 | | #include "alloc-util.h" |
8 | | #include "escape.h" |
9 | | #include "hexdecoct.h" |
10 | | #include "macro.h" |
11 | | #include "utf8.h" |
12 | | |
13 | 30.7M | int cescape_char(char c, char *buf) { |
14 | 30.7M | char *buf_old = buf; |
15 | 30.7M | |
16 | 30.7M | /* Needs space for 4 characters in the buffer */ |
17 | 30.7M | |
18 | 30.7M | switch (c) { |
19 | 30.7M | |
20 | 30.7M | case '\a': |
21 | 293 | *(buf++) = '\\'; |
22 | 293 | *(buf++) = 'a'; |
23 | 293 | break; |
24 | 30.7M | case '\b': |
25 | 278 | *(buf++) = '\\'; |
26 | 278 | *(buf++) = 'b'; |
27 | 278 | break; |
28 | 30.7M | case '\f': |
29 | 348 | *(buf++) = '\\'; |
30 | 348 | *(buf++) = 'f'; |
31 | 348 | break; |
32 | 30.7M | case '\n': |
33 | 371 | *(buf++) = '\\'; |
34 | 371 | *(buf++) = 'n'; |
35 | 371 | break; |
36 | 30.7M | case '\r': |
37 | 385 | *(buf++) = '\\'; |
38 | 385 | *(buf++) = 'r'; |
39 | 385 | break; |
40 | 30.7M | case '\t': |
41 | 566 | *(buf++) = '\\'; |
42 | 566 | *(buf++) = 't'; |
43 | 566 | break; |
44 | 30.7M | case '\v': |
45 | 304 | *(buf++) = '\\'; |
46 | 304 | *(buf++) = 'v'; |
47 | 304 | break; |
48 | 30.7M | case '\\': |
49 | 268 | *(buf++) = '\\'; |
50 | 268 | *(buf++) = '\\'; |
51 | 268 | break; |
52 | 30.7M | case '"': |
53 | 285 | *(buf++) = '\\'; |
54 | 285 | *(buf++) = '"'; |
55 | 285 | break; |
56 | 30.7M | case '\'': |
57 | 309 | *(buf++) = '\\'; |
58 | 309 | *(buf++) = '\''; |
59 | 309 | break; |
60 | 30.7M | |
61 | 30.7M | default: |
62 | 30.7M | /* For special chars we prefer octal over |
63 | 30.7M | * hexadecimal encoding, simply because glib's |
64 | 30.7M | * g_strescape() does the same */ |
65 | 30.7M | if ((c < ' ') || (c >= 127)) { |
66 | 2.45k | *(buf++) = '\\'; |
67 | 2.45k | *(buf++) = octchar((unsigned char) c >> 6); |
68 | 2.45k | *(buf++) = octchar((unsigned char) c >> 3); |
69 | 2.45k | *(buf++) = octchar((unsigned char) c); |
70 | 2.45k | } else |
71 | 30.7M | *(buf++) = c; |
72 | 30.7M | break; |
73 | 30.7M | } |
74 | 30.7M | |
75 | 30.7M | return buf - buf_old; |
76 | 30.7M | } |
77 | | |
78 | 333 | char *cescape_length(const char *s, size_t n) { |
79 | 333 | const char *f; |
80 | 333 | char *r, *t; |
81 | 333 | |
82 | 333 | assert(s || n == 0); |
83 | 333 | |
84 | 333 | /* Does C style string escaping. May be reversed with |
85 | 333 | * cunescape(). */ |
86 | 333 | |
87 | 333 | r = new(char, n*4 + 1); |
88 | 333 | if (!r) |
89 | 0 | return NULL; |
90 | 333 | |
91 | 7.38k | for (f = s, t = r; f < s + n; f++) |
92 | 7.05k | t += cescape_char(*f, t); |
93 | 333 | |
94 | 333 | *t = 0; |
95 | 333 | |
96 | 333 | return r; |
97 | 333 | } |
98 | | |
99 | 172 | char *cescape(const char *s) { |
100 | 172 | assert(s); |
101 | 172 | |
102 | 172 | return cescape_length(s, strlen(s)); |
103 | 172 | } |
104 | | |
105 | 501k | int cunescape_one(const char *p, size_t length, char32_t *ret, bool *eight_bit) { |
106 | 501k | int r = 1; |
107 | 501k | |
108 | 501k | assert(p); |
109 | 501k | assert(ret); |
110 | 501k | |
111 | 501k | /* Unescapes C style. Returns the unescaped character in ret. |
112 | 501k | * Sets *eight_bit to true if the escaped sequence either fits in |
113 | 501k | * one byte in UTF-8 or is a non-unicode literal byte and should |
114 | 501k | * instead be copied directly. |
115 | 501k | */ |
116 | 501k | |
117 | 501k | if (length != (size_t) -1 && length < 1) |
118 | 0 | return -EINVAL; |
119 | 501k | |
120 | 501k | switch (p[0]) { |
121 | 501k | |
122 | 501k | case 'a': |
123 | 526 | *ret = '\a'; |
124 | 526 | break; |
125 | 501k | case 'b': |
126 | 27.1k | *ret = '\b'; |
127 | 27.1k | break; |
128 | 501k | case 'f': |
129 | 1.24k | *ret = '\f'; |
130 | 1.24k | break; |
131 | 501k | case 'n': |
132 | 46.4k | *ret = '\n'; |
133 | 46.4k | break; |
134 | 501k | case 'r': |
135 | 15.0k | *ret = '\r'; |
136 | 15.0k | break; |
137 | 501k | case 't': |
138 | 636 | *ret = '\t'; |
139 | 636 | break; |
140 | 501k | case 'v': |
141 | 907 | *ret = '\v'; |
142 | 907 | break; |
143 | 501k | case '\\': |
144 | 53.4k | *ret = '\\'; |
145 | 53.4k | break; |
146 | 501k | case '"': |
147 | 394 | *ret = '"'; |
148 | 394 | break; |
149 | 501k | case '\'': |
150 | 412 | *ret = '\''; |
151 | 412 | break; |
152 | 501k | |
153 | 501k | case 's': |
154 | 9.00k | /* This is an extension of the XDG syntax files */ |
155 | 9.00k | *ret = ' '; |
156 | 9.00k | break; |
157 | 501k | |
158 | 501k | case 'x': { |
159 | 163k | /* hexadecimal encoding */ |
160 | 163k | int a, b; |
161 | 163k | |
162 | 163k | if (length != (size_t) -1 && length < 3) |
163 | 69 | return -EINVAL; |
164 | 163k | |
165 | 163k | a = unhexchar(p[1]); |
166 | 163k | if (a < 0) |
167 | 25.4k | return -EINVAL; |
168 | 137k | |
169 | 137k | b = unhexchar(p[2]); |
170 | 137k | if (b < 0) |
171 | 112k | return -EINVAL; |
172 | 25.5k | |
173 | 25.5k | /* Don't allow NUL bytes */ |
174 | 25.5k | if (a == 0 && b == 0) |
175 | 1.14k | return -EINVAL; |
176 | 24.3k | |
177 | 24.3k | *ret = (a << 4U) | b; |
178 | 24.3k | *eight_bit = true; |
179 | 24.3k | r = 3; |
180 | 24.3k | break; |
181 | 24.3k | } |
182 | 24.3k | |
183 | 31.8k | case 'u': { |
184 | 31.8k | /* C++11 style 16bit unicode */ |
185 | 31.8k | |
186 | 31.8k | int a[4]; |
187 | 31.8k | size_t i; |
188 | 31.8k | uint32_t c; |
189 | 31.8k | |
190 | 31.8k | if (length != (size_t) -1 && length < 5) |
191 | 204 | return -EINVAL; |
192 | 31.6k | |
193 | 49.4k | for (i = 0; i < 4; i++) { |
194 | 45.5k | a[i] = unhexchar(p[1 + i]); |
195 | 45.5k | if (a[i] < 0) |
196 | 27.7k | return a[i]; |
197 | 45.5k | } |
198 | 31.6k | |
199 | 31.6k | c = ((uint32_t) a[0] << 12U) | ((uint32_t) a[1] << 8U) | ((uint32_t) a[2] << 4U) | (uint32_t) a[3]; |
200 | 3.93k | |
201 | 3.93k | /* Don't allow 0 chars */ |
202 | 3.93k | if (c == 0) |
203 | 531 | return -EINVAL; |
204 | 3.40k | |
205 | 3.40k | *ret = c; |
206 | 3.40k | r = 5; |
207 | 3.40k | break; |
208 | 3.40k | } |
209 | 3.40k | |
210 | 25.3k | case 'U': { |
211 | 25.3k | /* C++11 style 32bit unicode */ |
212 | 25.3k | |
213 | 25.3k | int a[8]; |
214 | 25.3k | size_t i; |
215 | 25.3k | char32_t c; |
216 | 25.3k | |
217 | 25.3k | if (length != (size_t) -1 && length < 9) |
218 | 208 | return -EINVAL; |
219 | 25.1k | |
220 | 155k | for (i = 0; i < 8; i++) { |
221 | 142k | a[i] = unhexchar(p[1 + i]); |
222 | 142k | if (a[i] < 0) |
223 | 11.9k | return a[i]; |
224 | 142k | } |
225 | 25.1k | |
226 | 25.1k | c = ((uint32_t) a[0] << 28U) | ((uint32_t) a[1] << 24U) | ((uint32_t) a[2] << 20U) | ((uint32_t) a[3] << 16U) | |
227 | 13.1k | ((uint32_t) a[4] << 12U) | ((uint32_t) a[5] << 8U) | ((uint32_t) a[6] << 4U) | (uint32_t) a[7]; |
228 | 13.1k | |
229 | 13.1k | /* Don't allow 0 chars */ |
230 | 13.1k | if (c == 0) |
231 | 1.79k | return -EINVAL; |
232 | 11.3k | |
233 | 11.3k | /* Don't allow invalid code points */ |
234 | 11.3k | if (!unichar_is_valid(c)) |
235 | 9.42k | return -EINVAL; |
236 | 1.92k | |
237 | 1.92k | *ret = c; |
238 | 1.92k | r = 9; |
239 | 1.92k | break; |
240 | 1.92k | } |
241 | 1.92k | |
242 | 53.8k | case '0': |
243 | 53.8k | case '1': |
244 | 53.8k | case '2': |
245 | 53.8k | case '3': |
246 | 53.8k | case '4': |
247 | 53.8k | case '5': |
248 | 53.8k | case '6': |
249 | 53.8k | case '7': { |
250 | 53.8k | /* octal encoding */ |
251 | 53.8k | int a, b, c; |
252 | 53.8k | char32_t m; |
253 | 53.8k | |
254 | 53.8k | if (length != (size_t) -1 && length < 3) |
255 | 147 | return -EINVAL; |
256 | 53.7k | |
257 | 53.7k | a = unoctchar(p[0]); |
258 | 53.7k | if (a < 0) |
259 | 0 | return -EINVAL; |
260 | 53.7k | |
261 | 53.7k | b = unoctchar(p[1]); |
262 | 53.7k | if (b < 0) |
263 | 30.8k | return -EINVAL; |
264 | 22.8k | |
265 | 22.8k | c = unoctchar(p[2]); |
266 | 22.8k | if (c < 0) |
267 | 17.3k | return -EINVAL; |
268 | 5.45k | |
269 | 5.45k | /* don't allow NUL bytes */ |
270 | 5.45k | if (a == 0 && b == 0 && c == 0) |
271 | 565 | return -EINVAL; |
272 | 4.88k | |
273 | 4.88k | /* Don't allow bytes above 255 */ |
274 | 4.88k | m = ((uint32_t) a << 6U) | ((uint32_t) b << 3U) | (uint32_t) c; |
275 | 4.88k | if (m > 255) |
276 | 614 | return -EINVAL; |
277 | 4.27k | |
278 | 4.27k | *ret = m; |
279 | 4.27k | *eight_bit = true; |
280 | 4.27k | r = 3; |
281 | 4.27k | break; |
282 | 4.27k | } |
283 | 4.27k | |
284 | 71.7k | default: |
285 | 71.7k | return -EINVAL; |
286 | 189k | } |
287 | 189k | |
288 | 189k | return r; |
289 | 189k | } |
290 | | |
291 | 4.39k | int cunescape_length_with_prefix(const char *s, size_t length, const char *prefix, UnescapeFlags flags, char **ret) { |
292 | 4.39k | char *r, *t; |
293 | 4.39k | const char *f; |
294 | 4.39k | size_t pl; |
295 | 4.39k | |
296 | 4.39k | assert(s); |
297 | 4.39k | assert(ret); |
298 | 4.39k | |
299 | 4.39k | /* Undoes C style string escaping, and optionally prefixes it. */ |
300 | 4.39k | |
301 | 4.39k | pl = strlen_ptr(prefix); |
302 | 4.39k | |
303 | 4.39k | r = new(char, pl+length+1); |
304 | 4.39k | if (!r) |
305 | 0 | return -ENOMEM; |
306 | 4.39k | |
307 | 4.39k | if (prefix) |
308 | 3.91k | memcpy(r, prefix, pl); |
309 | 4.39k | |
310 | 38.9M | for (f = s, t = r + pl; f < s + length; f++) { |
311 | 38.9M | size_t remaining; |
312 | 38.9M | bool eight_bit = false; |
313 | 38.9M | char32_t u; |
314 | 38.9M | int k; |
315 | 38.9M | |
316 | 38.9M | remaining = s + length - f; |
317 | 38.9M | assert(remaining > 0); |
318 | 38.9M | |
319 | 38.9M | if (*f != '\\') { |
320 | 38.5M | /* A literal, copy verbatim */ |
321 | 38.5M | *(t++) = *f; |
322 | 38.5M | continue; |
323 | 38.5M | } |
324 | 480k | |
325 | 480k | if (remaining == 1) { |
326 | 82 | if (flags & UNESCAPE_RELAX) { |
327 | 81 | /* A trailing backslash, copy verbatim */ |
328 | 81 | *(t++) = *f; |
329 | 81 | continue; |
330 | 81 | } |
331 | 1 | |
332 | 1 | free(r); |
333 | 1 | return -EINVAL; |
334 | 1 | } |
335 | 480k | |
336 | 480k | k = cunescape_one(f + 1, remaining - 1, &u, &eight_bit); |
337 | 480k | if (k < 0) { |
338 | 300k | if (flags & UNESCAPE_RELAX) { |
339 | 300k | /* Invalid escape code, let's take it literal then */ |
340 | 300k | *(t++) = '\\'; |
341 | 300k | continue; |
342 | 300k | } |
343 | 58 | |
344 | 58 | free(r); |
345 | 58 | return k; |
346 | 58 | } |
347 | 180k | |
348 | 180k | f += k; |
349 | 180k | if (eight_bit) |
350 | 25.9k | /* One byte? Set directly as specified */ |
351 | 25.9k | *(t++) = u; |
352 | 154k | else |
353 | 154k | /* Otherwise encode as multi-byte UTF-8 */ |
354 | 154k | t += utf8_encode_unichar(t, u); |
355 | 180k | } |
356 | 4.39k | |
357 | 4.39k | *t = 0; |
358 | 4.33k | |
359 | 4.33k | *ret = r; |
360 | 4.33k | return t - r; |
361 | 4.39k | } |
362 | | |
363 | 483 | int cunescape_length(const char *s, size_t length, UnescapeFlags flags, char **ret) { |
364 | 483 | return cunescape_length_with_prefix(s, length, NULL, flags, ret); |
365 | 483 | } |
366 | | |
367 | 483 | int cunescape(const char *s, UnescapeFlags flags, char **ret) { |
368 | 483 | return cunescape_length(s, strlen(s), flags, ret); |
369 | 483 | } |
370 | | |
371 | 0 | char *xescape_full(const char *s, const char *bad, size_t console_width, bool eight_bits) { |
372 | 0 | char *ans, *t, *prev, *prev2; |
373 | 0 | const char *f; |
374 | 0 |
|
375 | 0 | /* Escapes all chars in bad, in addition to \ and all special chars, in \xFF style escaping. May be |
376 | 0 | * reversed with cunescape(). If eight_bits is true, characters >= 127 are let through unchanged. |
377 | 0 | * This corresponds to non-ASCII printable characters in pre-unicode encodings. |
378 | 0 | * |
379 | 0 | * If console_width is reached, output is truncated and "..." is appended. */ |
380 | 0 |
|
381 | 0 | if (console_width == 0) |
382 | 0 | return strdup(""); |
383 | 0 | |
384 | 0 | ans = new(char, MIN(strlen(s), console_width) * 4 + 1); |
385 | 0 | if (!ans) |
386 | 0 | return NULL; |
387 | 0 | |
388 | 0 | memset(ans, '_', MIN(strlen(s), console_width) * 4); |
389 | 0 | ans[MIN(strlen(s), console_width) * 4] = 0; |
390 | 0 |
|
391 | 0 | for (f = s, t = prev = prev2 = ans; ; f++) { |
392 | 0 | char *tmp_t = t; |
393 | 0 |
|
394 | 0 | if (!*f) { |
395 | 0 | *t = 0; |
396 | 0 | return ans; |
397 | 0 | } |
398 | 0 | |
399 | 0 | if ((unsigned char) *f < ' ' || (!eight_bits && (unsigned char) *f >= 127) || |
400 | 0 | *f == '\\' || strchr(bad, *f)) { |
401 | 0 | if ((size_t) (t - ans) + 4 > console_width) |
402 | 0 | break; |
403 | 0 | |
404 | 0 | *(t++) = '\\'; |
405 | 0 | *(t++) = 'x'; |
406 | 0 | *(t++) = hexchar(*f >> 4); |
407 | 0 | *(t++) = hexchar(*f); |
408 | 0 | } else { |
409 | 0 | if ((size_t) (t - ans) + 1 > console_width) |
410 | 0 | break; |
411 | 0 | |
412 | 0 | *(t++) = *f; |
413 | 0 | } |
414 | 0 |
|
415 | 0 | /* We might need to go back two cycles to fit three dots, so remember two positions */ |
416 | 0 | prev2 = prev; |
417 | 0 | prev = tmp_t; |
418 | 0 | } |
419 | 0 |
|
420 | 0 | /* We can just write where we want, since chars are one-byte */ |
421 | 0 | size_t c = MIN(console_width, 3u); /* If the console is too narrow, write fewer dots */ |
422 | 0 | size_t off; |
423 | 0 | if (console_width - c >= (size_t) (t - ans)) |
424 | 0 | off = (size_t) (t - ans); |
425 | 0 | else if (console_width - c >= (size_t) (prev - ans)) |
426 | 0 | off = (size_t) (prev - ans); |
427 | 0 | else if (console_width - c >= (size_t) (prev2 - ans)) |
428 | 0 | off = (size_t) (prev2 - ans); |
429 | 0 | else |
430 | 0 | off = console_width - c; |
431 | 0 | assert(off <= (size_t) (t - ans)); |
432 | 0 |
|
433 | 0 | memcpy(ans + off, "...", c); |
434 | 0 | ans[off + c] = '\0'; |
435 | 0 | return ans; |
436 | 0 | } |
437 | | |
438 | 23.8k | char *escape_non_printable_full(const char *str, size_t console_width, bool eight_bit) { |
439 | 23.8k | if (eight_bit) |
440 | 0 | return xescape_full(str, "", console_width, true); |
441 | 23.8k | else |
442 | 23.8k | return utf8_escape_non_printable_full(str, console_width); |
443 | 23.8k | } |
444 | | |
445 | 0 | char *octescape(const char *s, size_t len) { |
446 | 0 | char *r, *t; |
447 | 0 | const char *f; |
448 | 0 |
|
449 | 0 | /* Escapes all chars in bad, in addition to \ and " chars, |
450 | 0 | * in \nnn style escaping. */ |
451 | 0 |
|
452 | 0 | r = new(char, len * 4 + 1); |
453 | 0 | if (!r) |
454 | 0 | return NULL; |
455 | 0 | |
456 | 0 | for (f = s, t = r; f < s + len; f++) { |
457 | 0 |
|
458 | 0 | if (*f < ' ' || *f >= 127 || IN_SET(*f, '\\', '"')) { |
459 | 0 | *(t++) = '\\'; |
460 | 0 | *(t++) = '0' + (*f >> 6); |
461 | 0 | *(t++) = '0' + ((*f >> 3) & 8); |
462 | 0 | *(t++) = '0' + (*f & 8); |
463 | 0 | } else |
464 | 0 | *(t++) = *f; |
465 | 0 | } |
466 | 0 |
|
467 | 0 | *t = 0; |
468 | 0 |
|
469 | 0 | return r; |
470 | 0 |
|
471 | 0 | } |
472 | | |
473 | 0 | static char *strcpy_backslash_escaped(char *t, const char *s, const char *bad, bool escape_tab_nl) { |
474 | 0 | assert(bad); |
475 | 0 |
|
476 | 0 | for (; *s; s++) { |
477 | 0 | if (escape_tab_nl && IN_SET(*s, '\n', '\t')) { |
478 | 0 | *(t++) = '\\'; |
479 | 0 | *(t++) = *s == '\n' ? 'n' : 't'; |
480 | 0 | continue; |
481 | 0 | } |
482 | 0 |
|
483 | 0 | if (*s == '\\' || strchr(bad, *s)) |
484 | 0 | *(t++) = '\\'; |
485 | 0 |
|
486 | 0 | *(t++) = *s; |
487 | 0 | } |
488 | 0 |
|
489 | 0 | return t; |
490 | 0 | } |
491 | | |
492 | 0 | char *shell_escape(const char *s, const char *bad) { |
493 | 0 | char *r, *t; |
494 | 0 |
|
495 | 0 | r = new(char, strlen(s)*2+1); |
496 | 0 | if (!r) |
497 | 0 | return NULL; |
498 | 0 | |
499 | 0 | t = strcpy_backslash_escaped(r, s, bad, false); |
500 | 0 | *t = 0; |
501 | 0 |
|
502 | 0 | return r; |
503 | 0 | } |
504 | | |
505 | 0 | char* shell_maybe_quote(const char *s, EscapeStyle style) { |
506 | 0 | const char *p; |
507 | 0 | char *r, *t; |
508 | 0 |
|
509 | 0 | assert(s); |
510 | 0 |
|
511 | 0 | /* Encloses a string in quotes if necessary to make it OK as a shell |
512 | 0 | * string. Note that we treat benign UTF-8 characters as needing |
513 | 0 | * escaping too, but that should be OK. */ |
514 | 0 |
|
515 | 0 | for (p = s; *p; p++) |
516 | 0 | if (*p <= ' ' || |
517 | 0 | *p >= 127 || |
518 | 0 | strchr(SHELL_NEED_QUOTES, *p)) |
519 | 0 | break; |
520 | 0 |
|
521 | 0 | if (!*p) |
522 | 0 | return strdup(s); |
523 | 0 | |
524 | 0 | r = new(char, (style == ESCAPE_POSIX) + 1 + strlen(s)*2 + 1 + 1); |
525 | 0 | if (!r) |
526 | 0 | return NULL; |
527 | 0 | |
528 | 0 | t = r; |
529 | 0 | if (style == ESCAPE_BACKSLASH) |
530 | 0 | *(t++) = '"'; |
531 | 0 | else if (style == ESCAPE_POSIX) { |
532 | 0 | *(t++) = '$'; |
533 | 0 | *(t++) = '\''; |
534 | 0 | } else |
535 | 0 | assert_not_reached("Bad EscapeStyle"); |
536 | 0 |
|
537 | 0 | t = mempcpy(t, s, p - s); |
538 | 0 |
|
539 | 0 | if (style == ESCAPE_BACKSLASH) |
540 | 0 | t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE, false); |
541 | 0 | else |
542 | 0 | t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE_POSIX, true); |
543 | 0 |
|
544 | 0 | if (style == ESCAPE_BACKSLASH) |
545 | 0 | *(t++) = '"'; |
546 | 0 | else |
547 | 0 | *(t++) = '\''; |
548 | 0 | *t = 0; |
549 | 0 |
|
550 | 0 | return r; |
551 | 0 | } |