Coverage Report

Created: 2019-06-19 13:33

/src/systemd/src/basic/escape.c
Line
Count
Source (jump to first uncovered line)
1
/* SPDX-License-Identifier: LGPL-2.1+ */
2
3
#include <errno.h>
4
#include <stdlib.h>
5
#include <string.h>
6
7
#include "alloc-util.h"
8
#include "escape.h"
9
#include "hexdecoct.h"
10
#include "macro.h"
11
#include "utf8.h"
12
13
30.7M
int cescape_char(char c, char *buf) {
14
30.7M
        char *buf_old = buf;
15
30.7M
16
30.7M
        /* Needs space for 4 characters in the buffer */
17
30.7M
18
30.7M
        switch (c) {
19
30.7M
20
30.7M
                case '\a':
21
293
                        *(buf++) = '\\';
22
293
                        *(buf++) = 'a';
23
293
                        break;
24
30.7M
                case '\b':
25
278
                        *(buf++) = '\\';
26
278
                        *(buf++) = 'b';
27
278
                        break;
28
30.7M
                case '\f':
29
348
                        *(buf++) = '\\';
30
348
                        *(buf++) = 'f';
31
348
                        break;
32
30.7M
                case '\n':
33
371
                        *(buf++) = '\\';
34
371
                        *(buf++) = 'n';
35
371
                        break;
36
30.7M
                case '\r':
37
385
                        *(buf++) = '\\';
38
385
                        *(buf++) = 'r';
39
385
                        break;
40
30.7M
                case '\t':
41
566
                        *(buf++) = '\\';
42
566
                        *(buf++) = 't';
43
566
                        break;
44
30.7M
                case '\v':
45
304
                        *(buf++) = '\\';
46
304
                        *(buf++) = 'v';
47
304
                        break;
48
30.7M
                case '\\':
49
268
                        *(buf++) = '\\';
50
268
                        *(buf++) = '\\';
51
268
                        break;
52
30.7M
                case '"':
53
285
                        *(buf++) = '\\';
54
285
                        *(buf++) = '"';
55
285
                        break;
56
30.7M
                case '\'':
57
309
                        *(buf++) = '\\';
58
309
                        *(buf++) = '\'';
59
309
                        break;
60
30.7M
61
30.7M
                default:
62
30.7M
                        /* For special chars we prefer octal over
63
30.7M
                         * hexadecimal encoding, simply because glib's
64
30.7M
                         * g_strescape() does the same */
65
30.7M
                        if ((c < ' ') || (c >= 127)) {
66
2.45k
                                *(buf++) = '\\';
67
2.45k
                                *(buf++) = octchar((unsigned char) c >> 6);
68
2.45k
                                *(buf++) = octchar((unsigned char) c >> 3);
69
2.45k
                                *(buf++) = octchar((unsigned char) c);
70
2.45k
                        } else
71
30.7M
                                *(buf++) = c;
72
30.7M
                        break;
73
30.7M
        }
74
30.7M
75
30.7M
        return buf - buf_old;
76
30.7M
}
77
78
333
char *cescape_length(const char *s, size_t n) {
79
333
        const char *f;
80
333
        char *r, *t;
81
333
82
333
        assert(s || n == 0);
83
333
84
333
        /* Does C style string escaping. May be reversed with
85
333
         * cunescape(). */
86
333
87
333
        r = new(char, n*4 + 1);
88
333
        if (!r)
89
0
                return NULL;
90
333
91
7.38k
        for (f = s, t = r; f < s + n; f++)
92
7.05k
                t += cescape_char(*f, t);
93
333
94
333
        *t = 0;
95
333
96
333
        return r;
97
333
}
98
99
172
char *cescape(const char *s) {
100
172
        assert(s);
101
172
102
172
        return cescape_length(s, strlen(s));
103
172
}
104
105
501k
int cunescape_one(const char *p, size_t length, char32_t *ret, bool *eight_bit) {
106
501k
        int r = 1;
107
501k
108
501k
        assert(p);
109
501k
        assert(ret);
110
501k
111
501k
        /* Unescapes C style. Returns the unescaped character in ret.
112
501k
         * Sets *eight_bit to true if the escaped sequence either fits in
113
501k
         * one byte in UTF-8 or is a non-unicode literal byte and should
114
501k
         * instead be copied directly.
115
501k
         */
116
501k
117
501k
        if (length != (size_t) -1 && length < 1)
118
0
                return -EINVAL;
119
501k
120
501k
        switch (p[0]) {
121
501k
122
501k
        case 'a':
123
526
                *ret = '\a';
124
526
                break;
125
501k
        case 'b':
126
27.1k
                *ret = '\b';
127
27.1k
                break;
128
501k
        case 'f':
129
1.24k
                *ret = '\f';
130
1.24k
                break;
131
501k
        case 'n':
132
46.4k
                *ret = '\n';
133
46.4k
                break;
134
501k
        case 'r':
135
15.0k
                *ret = '\r';
136
15.0k
                break;
137
501k
        case 't':
138
636
                *ret = '\t';
139
636
                break;
140
501k
        case 'v':
141
907
                *ret = '\v';
142
907
                break;
143
501k
        case '\\':
144
53.4k
                *ret = '\\';
145
53.4k
                break;
146
501k
        case '"':
147
394
                *ret = '"';
148
394
                break;
149
501k
        case '\'':
150
412
                *ret = '\'';
151
412
                break;
152
501k
153
501k
        case 's':
154
9.00k
                /* This is an extension of the XDG syntax files */
155
9.00k
                *ret = ' ';
156
9.00k
                break;
157
501k
158
501k
        case 'x': {
159
163k
                /* hexadecimal encoding */
160
163k
                int a, b;
161
163k
162
163k
                if (length != (size_t) -1 && length < 3)
163
69
                        return -EINVAL;
164
163k
165
163k
                a = unhexchar(p[1]);
166
163k
                if (a < 0)
167
25.4k
                        return -EINVAL;
168
137k
169
137k
                b = unhexchar(p[2]);
170
137k
                if (b < 0)
171
112k
                        return -EINVAL;
172
25.5k
173
25.5k
                /* Don't allow NUL bytes */
174
25.5k
                if (a == 0 && b == 0)
175
1.14k
                        return -EINVAL;
176
24.3k
177
24.3k
                *ret = (a << 4U) | b;
178
24.3k
                *eight_bit = true;
179
24.3k
                r = 3;
180
24.3k
                break;
181
24.3k
        }
182
24.3k
183
31.8k
        case 'u': {
184
31.8k
                /* C++11 style 16bit unicode */
185
31.8k
186
31.8k
                int a[4];
187
31.8k
                size_t i;
188
31.8k
                uint32_t c;
189
31.8k
190
31.8k
                if (length != (size_t) -1 && length < 5)
191
204
                        return -EINVAL;
192
31.6k
193
49.4k
                for (i = 0; i < 4; i++) {
194
45.5k
                        a[i] = unhexchar(p[1 + i]);
195
45.5k
                        if (a[i] < 0)
196
27.7k
                                return a[i];
197
45.5k
                }
198
31.6k
199
31.6k
                c = ((uint32_t) a[0] << 12U) | ((uint32_t) a[1] << 8U) | ((uint32_t) a[2] << 4U) | (uint32_t) a[3];
200
3.93k
201
3.93k
                /* Don't allow 0 chars */
202
3.93k
                if (c == 0)
203
531
                        return -EINVAL;
204
3.40k
205
3.40k
                *ret = c;
206
3.40k
                r = 5;
207
3.40k
                break;
208
3.40k
        }
209
3.40k
210
25.3k
        case 'U': {
211
25.3k
                /* C++11 style 32bit unicode */
212
25.3k
213
25.3k
                int a[8];
214
25.3k
                size_t i;
215
25.3k
                char32_t c;
216
25.3k
217
25.3k
                if (length != (size_t) -1 && length < 9)
218
208
                        return -EINVAL;
219
25.1k
220
155k
                for (i = 0; i < 8; i++) {
221
142k
                        a[i] = unhexchar(p[1 + i]);
222
142k
                        if (a[i] < 0)
223
11.9k
                                return a[i];
224
142k
                }
225
25.1k
226
25.1k
                c = ((uint32_t) a[0] << 28U) | ((uint32_t) a[1] << 24U) | ((uint32_t) a[2] << 20U) | ((uint32_t) a[3] << 16U) |
227
13.1k
                    ((uint32_t) a[4] << 12U) | ((uint32_t) a[5] <<  8U) | ((uint32_t) a[6] <<  4U) |  (uint32_t) a[7];
228
13.1k
229
13.1k
                /* Don't allow 0 chars */
230
13.1k
                if (c == 0)
231
1.79k
                        return -EINVAL;
232
11.3k
233
11.3k
                /* Don't allow invalid code points */
234
11.3k
                if (!unichar_is_valid(c))
235
9.42k
                        return -EINVAL;
236
1.92k
237
1.92k
                *ret = c;
238
1.92k
                r = 9;
239
1.92k
                break;
240
1.92k
        }
241
1.92k
242
53.8k
        case '0':
243
53.8k
        case '1':
244
53.8k
        case '2':
245
53.8k
        case '3':
246
53.8k
        case '4':
247
53.8k
        case '5':
248
53.8k
        case '6':
249
53.8k
        case '7': {
250
53.8k
                /* octal encoding */
251
53.8k
                int a, b, c;
252
53.8k
                char32_t m;
253
53.8k
254
53.8k
                if (length != (size_t) -1 && length < 3)
255
147
                        return -EINVAL;
256
53.7k
257
53.7k
                a = unoctchar(p[0]);
258
53.7k
                if (a < 0)
259
0
                        return -EINVAL;
260
53.7k
261
53.7k
                b = unoctchar(p[1]);
262
53.7k
                if (b < 0)
263
30.8k
                        return -EINVAL;
264
22.8k
265
22.8k
                c = unoctchar(p[2]);
266
22.8k
                if (c < 0)
267
17.3k
                        return -EINVAL;
268
5.45k
269
5.45k
                /* don't allow NUL bytes */
270
5.45k
                if (a == 0 && b == 0 && c == 0)
271
565
                        return -EINVAL;
272
4.88k
273
4.88k
                /* Don't allow bytes above 255 */
274
4.88k
                m = ((uint32_t) a << 6U) | ((uint32_t) b << 3U) | (uint32_t) c;
275
4.88k
                if (m > 255)
276
614
                        return -EINVAL;
277
4.27k
278
4.27k
                *ret = m;
279
4.27k
                *eight_bit = true;
280
4.27k
                r = 3;
281
4.27k
                break;
282
4.27k
        }
283
4.27k
284
71.7k
        default:
285
71.7k
                return -EINVAL;
286
189k
        }
287
189k
288
189k
        return r;
289
189k
}
290
291
4.39k
int cunescape_length_with_prefix(const char *s, size_t length, const char *prefix, UnescapeFlags flags, char **ret) {
292
4.39k
        char *r, *t;
293
4.39k
        const char *f;
294
4.39k
        size_t pl;
295
4.39k
296
4.39k
        assert(s);
297
4.39k
        assert(ret);
298
4.39k
299
4.39k
        /* Undoes C style string escaping, and optionally prefixes it. */
300
4.39k
301
4.39k
        pl = strlen_ptr(prefix);
302
4.39k
303
4.39k
        r = new(char, pl+length+1);
304
4.39k
        if (!r)
305
0
                return -ENOMEM;
306
4.39k
307
4.39k
        if (prefix)
308
3.91k
                memcpy(r, prefix, pl);
309
4.39k
310
38.9M
        for (f = s, t = r + pl; f < s + length; f++) {
311
38.9M
                size_t remaining;
312
38.9M
                bool eight_bit = false;
313
38.9M
                char32_t u;
314
38.9M
                int k;
315
38.9M
316
38.9M
                remaining = s + length - f;
317
38.9M
                assert(remaining > 0);
318
38.9M
319
38.9M
                if (*f != '\\') {
320
38.5M
                        /* A literal, copy verbatim */
321
38.5M
                        *(t++) = *f;
322
38.5M
                        continue;
323
38.5M
                }
324
480k
325
480k
                if (remaining == 1) {
326
82
                        if (flags & UNESCAPE_RELAX) {
327
81
                                /* A trailing backslash, copy verbatim */
328
81
                                *(t++) = *f;
329
81
                                continue;
330
81
                        }
331
1
332
1
                        free(r);
333
1
                        return -EINVAL;
334
1
                }
335
480k
336
480k
                k = cunescape_one(f + 1, remaining - 1, &u, &eight_bit);
337
480k
                if (k < 0) {
338
300k
                        if (flags & UNESCAPE_RELAX) {
339
300k
                                /* Invalid escape code, let's take it literal then */
340
300k
                                *(t++) = '\\';
341
300k
                                continue;
342
300k
                        }
343
58
344
58
                        free(r);
345
58
                        return k;
346
58
                }
347
180k
348
180k
                f += k;
349
180k
                if (eight_bit)
350
25.9k
                        /* One byte? Set directly as specified */
351
25.9k
                        *(t++) = u;
352
154k
                else
353
154k
                        /* Otherwise encode as multi-byte UTF-8 */
354
154k
                        t += utf8_encode_unichar(t, u);
355
180k
        }
356
4.39k
357
4.39k
        *t = 0;
358
4.33k
359
4.33k
        *ret = r;
360
4.33k
        return t - r;
361
4.39k
}
362
363
483
int cunescape_length(const char *s, size_t length, UnescapeFlags flags, char **ret) {
364
483
        return cunescape_length_with_prefix(s, length, NULL, flags, ret);
365
483
}
366
367
483
int cunescape(const char *s, UnescapeFlags flags, char **ret) {
368
483
        return cunescape_length(s, strlen(s), flags, ret);
369
483
}
370
371
0
char *xescape_full(const char *s, const char *bad, size_t console_width, bool eight_bits) {
372
0
        char *ans, *t, *prev, *prev2;
373
0
        const char *f;
374
0
375
0
        /* Escapes all chars in bad, in addition to \ and all special chars, in \xFF style escaping. May be
376
0
         * reversed with cunescape(). If eight_bits is true, characters >= 127 are let through unchanged.
377
0
         * This corresponds to non-ASCII printable characters in pre-unicode encodings.
378
0
         *
379
0
         * If console_width is reached, output is truncated and "..." is appended. */
380
0
381
0
        if (console_width == 0)
382
0
                return strdup("");
383
0
384
0
        ans = new(char, MIN(strlen(s), console_width) * 4 + 1);
385
0
        if (!ans)
386
0
                return NULL;
387
0
388
0
        memset(ans, '_', MIN(strlen(s), console_width) * 4);
389
0
        ans[MIN(strlen(s), console_width) * 4] = 0;
390
0
391
0
        for (f = s, t = prev = prev2 = ans; ; f++) {
392
0
                char *tmp_t = t;
393
0
394
0
                if (!*f) {
395
0
                        *t = 0;
396
0
                        return ans;
397
0
                }
398
0
399
0
                if ((unsigned char) *f < ' ' || (!eight_bits && (unsigned char) *f >= 127) ||
400
0
                    *f == '\\' || strchr(bad, *f)) {
401
0
                        if ((size_t) (t - ans) + 4 > console_width)
402
0
                                break;
403
0
404
0
                        *(t++) = '\\';
405
0
                        *(t++) = 'x';
406
0
                        *(t++) = hexchar(*f >> 4);
407
0
                        *(t++) = hexchar(*f);
408
0
                } else {
409
0
                        if ((size_t) (t - ans) + 1 > console_width)
410
0
                                break;
411
0
412
0
                        *(t++) = *f;
413
0
                }
414
0
415
0
                /* We might need to go back two cycles to fit three dots, so remember two positions */
416
0
                prev2 = prev;
417
0
                prev = tmp_t;
418
0
        }
419
0
420
0
        /* We can just write where we want, since chars are one-byte */
421
0
        size_t c = MIN(console_width, 3u); /* If the console is too narrow, write fewer dots */
422
0
        size_t off;
423
0
        if (console_width - c >= (size_t) (t - ans))
424
0
                off = (size_t) (t - ans);
425
0
        else if (console_width - c >= (size_t) (prev - ans))
426
0
                off = (size_t) (prev - ans);
427
0
        else if (console_width - c >= (size_t) (prev2 - ans))
428
0
                off = (size_t) (prev2 - ans);
429
0
        else
430
0
                off = console_width - c;
431
0
        assert(off <= (size_t) (t - ans));
432
0
433
0
        memcpy(ans + off, "...", c);
434
0
        ans[off + c] = '\0';
435
0
        return ans;
436
0
}
437
438
23.8k
char *escape_non_printable_full(const char *str, size_t console_width, bool eight_bit) {
439
23.8k
        if (eight_bit)
440
0
                return xescape_full(str, "", console_width, true);
441
23.8k
        else
442
23.8k
                return utf8_escape_non_printable_full(str, console_width);
443
23.8k
}
444
445
0
char *octescape(const char *s, size_t len) {
446
0
        char *r, *t;
447
0
        const char *f;
448
0
449
0
        /* Escapes all chars in bad, in addition to \ and " chars,
450
0
         * in \nnn style escaping. */
451
0
452
0
        r = new(char, len * 4 + 1);
453
0
        if (!r)
454
0
                return NULL;
455
0
456
0
        for (f = s, t = r; f < s + len; f++) {
457
0
458
0
                if (*f < ' ' || *f >= 127 || IN_SET(*f, '\\', '"')) {
459
0
                        *(t++) = '\\';
460
0
                        *(t++) = '0' + (*f >> 6);
461
0
                        *(t++) = '0' + ((*f >> 3) & 8);
462
0
                        *(t++) = '0' + (*f & 8);
463
0
                } else
464
0
                        *(t++) = *f;
465
0
        }
466
0
467
0
        *t = 0;
468
0
469
0
        return r;
470
0
471
0
}
472
473
0
static char *strcpy_backslash_escaped(char *t, const char *s, const char *bad, bool escape_tab_nl) {
474
0
        assert(bad);
475
0
476
0
        for (; *s; s++) {
477
0
                if (escape_tab_nl && IN_SET(*s, '\n', '\t')) {
478
0
                        *(t++) = '\\';
479
0
                        *(t++) = *s == '\n' ? 'n' : 't';
480
0
                        continue;
481
0
                }
482
0
483
0
                if (*s == '\\' || strchr(bad, *s))
484
0
                        *(t++) = '\\';
485
0
486
0
                *(t++) = *s;
487
0
        }
488
0
489
0
        return t;
490
0
}
491
492
0
char *shell_escape(const char *s, const char *bad) {
493
0
        char *r, *t;
494
0
495
0
        r = new(char, strlen(s)*2+1);
496
0
        if (!r)
497
0
                return NULL;
498
0
499
0
        t = strcpy_backslash_escaped(r, s, bad, false);
500
0
        *t = 0;
501
0
502
0
        return r;
503
0
}
504
505
0
char* shell_maybe_quote(const char *s, EscapeStyle style) {
506
0
        const char *p;
507
0
        char *r, *t;
508
0
509
0
        assert(s);
510
0
511
0
        /* Encloses a string in quotes if necessary to make it OK as a shell
512
0
         * string. Note that we treat benign UTF-8 characters as needing
513
0
         * escaping too, but that should be OK. */
514
0
515
0
        for (p = s; *p; p++)
516
0
                if (*p <= ' ' ||
517
0
                    *p >= 127 ||
518
0
                    strchr(SHELL_NEED_QUOTES, *p))
519
0
                        break;
520
0
521
0
        if (!*p)
522
0
                return strdup(s);
523
0
524
0
        r = new(char, (style == ESCAPE_POSIX) + 1 + strlen(s)*2 + 1 + 1);
525
0
        if (!r)
526
0
                return NULL;
527
0
528
0
        t = r;
529
0
        if (style == ESCAPE_BACKSLASH)
530
0
                *(t++) = '"';
531
0
        else if (style == ESCAPE_POSIX) {
532
0
                *(t++) = '$';
533
0
                *(t++) = '\'';
534
0
        } else
535
0
                assert_not_reached("Bad EscapeStyle");
536
0
537
0
        t = mempcpy(t, s, p - s);
538
0
539
0
        if (style == ESCAPE_BACKSLASH)
540
0
                t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE, false);
541
0
        else
542
0
                t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE_POSIX, true);
543
0
544
0
        if (style == ESCAPE_BACKSLASH)
545
0
                *(t++) = '"';
546
0
        else
547
0
                *(t++) = '\'';
548
0
        *t = 0;
549
0
550
0
        return r;
551
0
}