Coverage Report

Created: 2025-10-12 06:56

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/util-linux/lib/mbsalign.c
Line
Count
Source
1
/*
2
 * SPDX-License-Identifier: LGPL-2.1-or-later
3
 *
4
 * Align/Truncate a string in a given screen width
5
 * Copyright (C) 2009-2010 Free Software Foundation, Inc.
6
 *
7
 * This program is free software: you can redistribute it and/or modify it
8
 * under the terms of the GNU Lesser General Public License as published by the
9
 * Free Software Foundation, either version 2.1 of the License, or (at your
10
 * option) any later version.
11
 *
12
 * Written by Pádraig Brady.
13
 */
14
#include <stdlib.h>
15
#include <string.h>
16
#include <stdio.h>
17
#include <stdbool.h>
18
#include <limits.h>
19
#include <ctype.h>
20
21
#include "c.h"
22
#include "mbsalign.h"
23
#include "strutils.h"
24
#include "widechar.h"
25
26
/*
27
 * Counts number of cells in multibyte string. All control and
28
 * non-printable chars are ignored.
29
 *
30
 * Returns: number of cells.
31
 */
32
size_t mbs_nwidth(const char *buf, size_t bufsz)
33
0
{
34
0
  const char *p = buf, *last = buf;
35
0
  size_t width = 0;
36
37
0
#ifdef HAVE_WIDECHAR
38
0
  mbstate_t st;
39
0
  memset(&st, 0, sizeof(st));
40
0
#endif
41
0
  if (p && *p && bufsz)
42
0
    last = p + (bufsz - 1);
43
44
0
  while (p && *p && p <= last) {
45
0
    if (iscntrl((unsigned char) *p)) {
46
0
      p++;
47
48
      /* try detect "\e[x;ym" and skip on success */
49
0
      if (*p && *p == '[') {
50
0
        const char *e = p;
51
0
        while (*e && e < last && *e != 'm')
52
0
          e++;
53
0
        if (*e == 'm')
54
0
          p = e + 1;
55
0
      }
56
0
      continue;
57
0
    }
58
0
#ifdef HAVE_WIDECHAR
59
0
    wchar_t wc;
60
0
    size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
61
62
0
    if (len == 0)
63
0
      break;
64
0
    if (len > 0 && iswprint(wc)) {
65
0
      int x = wcwidth(wc);
66
0
      if (x > 0)
67
0
        width += x;
68
0
    } else if (len == (size_t) -1 || len == (size_t) -2)
69
0
      len = 1;
70
0
    p += len;
71
#else
72
    if (isprint((unsigned char) *p))
73
      width++;
74
    p++;
75
#endif
76
0
  }
77
78
0
  return width;
79
0
}
80
81
size_t mbs_width(const char *s)
82
0
{
83
0
  if (!s || !*s)
84
0
    return 0;
85
0
  return mbs_nwidth(s, strlen(s));
86
0
}
87
88
/*
89
 * Counts number of cells in multibyte string. For all control and
90
 * non-printable chars is the result width enlarged to store \x?? hex
91
 * sequence. See mbs_safe_encode().
92
 *
93
 * Returns: number of cells, @sz returns number of bytes.
94
 */
95
size_t mbs_safe_nwidth(const char *buf, size_t bufsz, size_t *sz)
96
0
{
97
0
  const char *p = buf, *last = buf;
98
0
  size_t width = 0, bytes = 0;
99
100
0
#ifdef HAVE_WIDECHAR
101
0
  mbstate_t st;
102
0
  memset(&st, 0, sizeof(st));
103
0
#endif
104
0
  if (p && *p && bufsz)
105
0
    last = p + (bufsz - 1);
106
107
0
  while (p && *p && p <= last) {
108
0
    if ((p < last && *p == '\\' && *(p + 1) == 'x')
109
0
        || iscntrl((unsigned char) *p)) {
110
0
      width += 4, bytes += 4;   /* *p encoded to \x?? */
111
0
      p++;
112
0
    }
113
0
#ifdef HAVE_WIDECHAR
114
0
    else {
115
0
      wchar_t wc;
116
0
      size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
117
118
0
      if (len == 0)
119
0
        break;
120
121
0
      if (len == (size_t) -1 || len == (size_t) -2) {
122
0
        len = 1;
123
0
        if (isprint((unsigned char) *p))
124
0
          width += 1, bytes += 1;
125
0
        else
126
0
          width += 4, bytes += 4;
127
128
0
      } else if (!iswprint(wc)) {
129
0
        width += len * 4; /* hex encode whole sequence */
130
0
        bytes += len * 4;
131
0
      } else {
132
0
        width += wcwidth(wc); /* number of cells */
133
0
        bytes += len;   /* number of bytes */
134
0
      }
135
0
      p += len;
136
0
    }
137
#else
138
    else if (!isprint((unsigned char) *p)) {
139
      width += 4, bytes += 4;   /* *p encoded to \x?? */
140
      p++;
141
    } else {
142
      width++, bytes++;
143
      p++;
144
    }
145
#endif
146
0
  }
147
148
0
  if (sz)
149
0
    *sz = bytes;
150
0
  return width;
151
0
}
152
153
size_t mbs_safe_width(const char *s)
154
0
{
155
0
  if (!s || !*s)
156
0
    return 0;
157
0
  return mbs_safe_nwidth(s, strlen(s), NULL);
158
0
}
159
160
/*
161
 * Copy @s to @buf and replace control and non-printable chars with
162
 * \x?? hex sequence. The @width returns number of cells. The @safechars
163
 * are not encoded.
164
 *
165
 * The @buf has to be big enough to store mbs_safe_encode_size(strlen(s)))
166
 * bytes.
167
 */
168
char *mbs_safe_encode_to_buffer(const char *s, size_t *width, char *buf, size_t bufsiz, const char *safechars)
169
0
{
170
0
  const char *p = s;
171
0
  char *r;
172
0
  int rsz;
173
0
  size_t sz = s ? strlen(s) : 0;
174
175
0
#ifdef HAVE_WIDECHAR
176
0
  mbstate_t st;
177
0
  memset(&st, 0, sizeof(st));
178
0
#endif
179
0
  if (!sz || !buf || !bufsiz)
180
0
    return NULL;
181
182
0
  r = buf;
183
0
  rsz = (int) bufsiz;
184
0
  *width = 0;
185
186
0
  while (p && *p) {
187
0
    if (safechars && strchr(safechars, *p)) {
188
0
      if (rsz < 2)
189
0
        break;
190
0
      *r++ = *p++;
191
0
      rsz--;
192
0
      continue;
193
0
    }
194
195
0
    if ((*p == '\\' && *(p + 1) == 'x')
196
0
        || iscntrl((unsigned char) *p)) {
197
0
      if (snprintf(r, rsz, "\\x%02x", (unsigned char) *p) < 4)
198
0
        break;
199
0
      r += 4;
200
0
      rsz -= 4;
201
0
      *width += 4;
202
0
      p++;
203
0
    }
204
0
#ifdef HAVE_WIDECHAR
205
0
    else {
206
0
      wchar_t wc;
207
0
      size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
208
209
0
      if (len == 0)
210
0
        break;   /* end of string */
211
212
0
      if (len == (size_t) -1 || len == (size_t) -2) {
213
0
        len = 1;
214
        /*
215
         * Not valid multibyte sequence -- maybe it's
216
         * printable char according to the current locales.
217
         */
218
0
        if (!isprint((unsigned char) *p)) {
219
0
          if (snprintf(r, rsz, "\\x%02x", (unsigned char) *p) < 4)
220
0
            break;
221
0
          r += 4;
222
0
          rsz -= 4;
223
0
          *width += 4;
224
0
        } else {
225
0
          if (rsz < 2)
226
0
            break;
227
0
          (*width)++;
228
0
          *r++ = *p;
229
0
          rsz--;
230
0
        }
231
0
      } else if (!iswprint(wc)) {
232
0
        size_t i;
233
0
        for (i = 0; i < len; i++) {
234
0
          if (snprintf(r, rsz, "\\x%02x", (unsigned char) p[i]) < 4)
235
0
            break;
236
0
          r += 4;
237
0
          rsz -= 4;
238
0
          *width += 4;
239
0
        }
240
0
        if (i < len)
241
0
          break;
242
0
      } else {
243
0
        if (rsz < (int)len + 1)
244
0
          break;
245
0
        memcpy(r, p, len);
246
0
        r += len;
247
0
        rsz -= len;
248
0
        *width += wcwidth(wc);
249
0
      }
250
0
      p += len;
251
0
    }
252
#else
253
    else if (!isprint((unsigned char) *p)) {
254
      if (snprintf(r, rsz, "\\x%02x", (unsigned char) *p) < 4)
255
        break;
256
      p++;
257
      r += 4;
258
      rsz -= 4;
259
      *width += 4;
260
    } else {
261
      if (rsz < 2)
262
        break;
263
      *r++ = *p++;
264
      rsz--;
265
      (*width)++;
266
    }
267
#endif
268
0
  }
269
270
0
  *r = '\0';
271
0
  return buf;
272
0
}
273
274
/*
275
 * Copy @s to @buf and replace broken sequences to \x?? hex sequence. The
276
 * @width returns number of cells. The @safechars are not encoded.
277
 *
278
 * The @buf has to be big enough to store mbs_safe_encode_size(strlen(s)))
279
 * bytes.
280
 */
281
char *mbs_invalid_encode_to_buffer(const char *s, size_t *width, char *buf, size_t bufsiz)
282
0
{
283
0
  const char *p = s;
284
0
  char *r;
285
0
  int rsz;
286
0
  size_t sz = s ? strlen(s) : 0;
287
288
0
#ifdef HAVE_WIDECHAR
289
0
  mbstate_t st;
290
0
  memset(&st, 0, sizeof(st));
291
0
#endif
292
0
  if (!sz || !buf || !bufsiz)
293
0
    return NULL;
294
295
0
  r = buf;
296
0
  rsz = (int) bufsiz;
297
0
  *width = 0;
298
299
0
  while (p && *p) {
300
0
#ifdef HAVE_WIDECHAR
301
0
    wchar_t wc;
302
0
    size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
303
#else
304
    size_t len = 1;
305
#endif
306
307
0
    if (len == 0)
308
0
      break;   /* end of string */
309
310
0
    if (len == (size_t) -1 || len == (size_t) -2) {
311
0
      len = 1;
312
      /*
313
       * Not valid multibyte sequence -- maybe it's
314
       * printable char according to the current locales.
315
       */
316
0
      if (!isprint((unsigned char) *p)) {
317
0
        if (snprintf(r, rsz, "\\x%02x", (unsigned char) *p) < 4)
318
0
          break;
319
0
        r += 4;
320
0
        rsz -= 4;
321
0
        *width += 4;
322
0
      } else {
323
0
        if (rsz < 2)
324
0
          break;
325
0
        (*width)++;
326
0
        *r++ = *p;
327
0
        rsz--;
328
0
      }
329
0
    } else if (*p == '\\' && *(p + 1) == 'x') {
330
0
      if (snprintf(r, rsz, "\\x%02x", (unsigned char) *p) < 4)
331
0
        break;
332
0
      r += 4;
333
0
      rsz -= 4;
334
0
      *width += 4;
335
0
    } else {
336
0
      if (rsz < (int)len + 1)
337
0
        break;
338
0
      r = mempcpy(r, p, len);
339
0
      rsz -= len;
340
0
      *width += wcwidth(wc);
341
0
    }
342
0
    p += len;
343
0
  }
344
345
0
  *r = '\0';
346
0
  return buf;
347
0
}
348
349
/*
350
 * Guess size
351
 */
352
size_t mbs_safe_encode_size(size_t bytes)
353
0
{
354
0
  return (bytes * 4) + 1;
355
0
}
356
357
/*
358
 * Count size of the original string in bytes (count \x?? as one byte)
359
 */
360
size_t mbs_safe_decode_size(const char *p)
361
0
{
362
0
  size_t bytes = 0;
363
364
0
  while (p && *p) {
365
0
    if (*p == '\\' && *(p + 1) == 'x' &&
366
0
        isxdigit(*(p + 2)) && isxdigit(*(p + 3)))
367
0
      p += 4;
368
0
    else
369
0
      p++;
370
0
    bytes++;
371
0
  }
372
0
  return bytes;
373
0
}
374
375
/*
376
 * Returns allocated string where all control and non-printable chars are
377
 * replaced with \x?? hex sequence.
378
 */
379
char *mbs_safe_encode(const char *s, size_t *width)
380
0
{
381
0
  size_t sz = s ? strlen(s) : 0;
382
0
  size_t bufsz;
383
0
  char *buf, *ret = NULL;
384
385
0
  if (!sz)
386
0
    return NULL;
387
0
  bufsz = mbs_safe_encode_size(sz);
388
0
  buf = malloc(bufsz);
389
0
  if (buf)
390
0
    ret = mbs_safe_encode_to_buffer(s, width, buf, bufsz, NULL);
391
0
  if (!ret)
392
0
    free(buf);
393
0
  return ret;
394
0
}
395
396
/*
397
 * Returns allocated string where all broken widechars chars are
398
 * replaced with \x?? hex sequence.
399
 */
400
char *mbs_invalid_encode(const char *s, size_t *width)
401
0
{
402
0
  size_t sz = s ? strlen(s) : 0;
403
0
  size_t bufsz;
404
0
  char *buf, *ret = NULL;
405
406
0
  if (!sz)
407
0
    return NULL;
408
0
  bufsz = mbs_safe_encode_size(sz);
409
0
  buf = malloc(bufsz);
410
0
  if (buf)
411
0
    ret = mbs_invalid_encode_to_buffer(s, width, buf, bufsz);
412
0
  if (!ret)
413
0
    free(buf);
414
0
  return ret;
415
0
}
416
417
#ifdef HAVE_WIDECHAR
418
419
static bool
420
wc_ensure_printable (wchar_t *wchars)
421
0
{
422
0
  bool replaced = false;
423
0
  wchar_t *wc = wchars;
424
0
  while (*wc)
425
0
    {
426
0
      if (!iswprint ((wint_t) *wc))
427
0
        {
428
0
          *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */
429
0
          replaced = true;
430
0
        }
431
0
      wc++;
432
0
    }
433
0
  return replaced;
434
0
}
435
436
/* Truncate wchar string to width cells.
437
 * Returns number of cells used.  */
438
439
static size_t
440
wc_truncate (wchar_t *wc, size_t width)
441
0
{
442
0
  size_t cells = 0;
443
0
  int next_cells = 0;
444
445
0
  while (*wc)
446
0
    {
447
0
      next_cells = wcwidth (*wc);
448
0
      if (next_cells == -1) /* non printable */
449
0
        {
450
0
          *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */
451
0
          next_cells = 1;
452
0
        }
453
0
      if (cells + next_cells > width)
454
0
        break;
455
456
0
      cells += next_cells;
457
0
      wc++;
458
0
    }
459
0
  *wc = L'\0';
460
0
  return cells;
461
0
}
462
463
static int
464
rpl_wcswidth (const wchar_t *s, size_t n)
465
0
{
466
0
  int ret = 0;
467
468
0
  while (n-- > 0 && *s != L'\0')
469
0
    {
470
0
      int nwidth = wcwidth (*s++);
471
0
      if (nwidth == -1)             /* non printable */
472
0
        return -1;
473
0
      if (ret > (INT_MAX - nwidth)) /* overflow */
474
0
        return -1;
475
0
      ret += nwidth;
476
0
    }
477
478
0
  return ret;
479
0
}
480
#endif /* HAVE_WIDECHAR */
481
482
/* Truncate multi-byte string to @width and returns number of
483
 * bytes of the new string @str, and in @width returns number
484
 * of cells.
485
 */
486
size_t
487
mbs_truncate(char *str, size_t *width)
488
0
{
489
0
  ssize_t bytes = strlen(str);
490
0
#ifdef HAVE_WIDECHAR
491
0
  ssize_t sz = mbstowcs(NULL, str, 0);
492
0
  wchar_t *wcs = NULL;
493
494
0
  if (sz == (ssize_t) -1)
495
0
    goto done;
496
497
0
  wcs = calloc(1, (sz + 1) * sizeof(wchar_t));
498
0
  if (!wcs)
499
0
    goto done;
500
501
0
  if (!mbstowcs(wcs, str, sz))
502
0
    goto done;
503
0
  *width = wc_truncate(wcs, *width);
504
0
  bytes = wcstombs(str, wcs, bytes);
505
0
done:
506
0
  free(wcs);
507
#else
508
  if (bytes >= 0 && *width < (size_t) bytes)
509
    bytes = *width;
510
#endif
511
0
  if (bytes >= 0)
512
0
    str[bytes] = '\0';
513
0
  return bytes;
514
0
}
515
516
/* Write N_SPACES space characters to DEST while ensuring
517
   nothing is written beyond DEST_END. A terminating NUL
518
   is always added to DEST.
519
   A pointer to the terminating NUL is returned.  */
520
521
static char*
522
mbs_align_pad (char *dest, const char* dest_end, size_t n_spaces, int padchar)
523
0
{
524
0
  for (/* nothing */; n_spaces && (dest < dest_end); n_spaces--)
525
0
    *dest++ = padchar;
526
0
  *dest = '\0';
527
0
  return dest;
528
0
}
529
530
size_t
531
mbsalign (const char *src, char *dest, size_t dest_size,
532
          size_t *width, mbs_align_t align, int flags)
533
0
{
534
0
  return mbsalign_with_padding(src, dest, dest_size, width, align, flags, ' ');
535
0
}
536
537
/* Align a string, SRC, in a field of *WIDTH columns, handling multi-byte
538
   characters; write the result into the DEST_SIZE-byte buffer, DEST.
539
   ALIGNMENT specifies whether to left- or right-justify or to center.
540
   If SRC requires more than *WIDTH columns, truncate it to fit.
541
   When centering, the number of trailing spaces may be one less than the
542
   number of leading spaces. The FLAGS parameter is unused at present.
543
   Return the length in bytes required for the final result, not counting
544
   the trailing NUL.  A return value of DEST_SIZE or larger means there
545
   wasn't enough space.  DEST will be NUL terminated in any case.
546
   Return (size_t) -1 upon error (invalid multi-byte sequence in SRC,
547
   or malloc failure), unless MBA_UNIBYTE_FALLBACK is specified.
548
   Update *WIDTH to indicate how many columns were used before padding.  */
549
550
size_t
551
mbsalign_with_padding (const char *src, char *dest, size_t dest_size,
552
                 size_t *width, mbs_align_t align,
553
#ifdef HAVE_WIDECHAR
554
           int flags,
555
#else
556
           int flags __attribute__((__unused__)),
557
#endif
558
           int padchar)
559
0
{
560
0
  size_t ret = -1;
561
0
  size_t src_size = strlen (src) + 1;
562
0
  char *newstr = NULL;
563
0
  wchar_t *str_wc = NULL;
564
0
  const char *str_to_print = src;
565
0
  size_t n_cols = src_size - 1;
566
0
  size_t n_used_bytes = n_cols; /* Not including NUL */
567
0
  size_t n_spaces = 0, space_left;
568
569
0
#ifdef HAVE_WIDECHAR
570
0
  bool conversion = false;
571
0
  bool wc_enabled = false;
572
573
  /* In multi-byte locales convert to wide characters
574
     to allow easy truncation. Also determine number
575
     of screen columns used.  */
576
0
  if (MB_CUR_MAX > 1)
577
0
    {
578
0
      size_t src_chars = mbstowcs (NULL, src, 0);
579
0
      if (src_chars == (size_t) -1)
580
0
        {
581
0
          if (flags & MBA_UNIBYTE_FALLBACK)
582
0
            goto mbsalign_unibyte;
583
0
          else
584
0
            goto mbsalign_cleanup;
585
0
        }
586
0
      src_chars += 1; /* make space for NUL */
587
0
      str_wc = malloc (src_chars * sizeof (wchar_t));
588
0
      if (str_wc == NULL)
589
0
        {
590
0
          if (flags & MBA_UNIBYTE_FALLBACK)
591
0
            goto mbsalign_unibyte;
592
0
          else
593
0
            goto mbsalign_cleanup;
594
0
        }
595
0
      if (mbstowcs (str_wc, src, src_chars) != 0)
596
0
        {
597
0
          str_wc[src_chars - 1] = L'\0';
598
0
          wc_enabled = true;
599
0
          conversion = wc_ensure_printable (str_wc);
600
0
          n_cols = rpl_wcswidth (str_wc, src_chars);
601
0
        }
602
0
    }
603
604
  /* If we transformed or need to truncate the source string
605
     then create a modified copy of it.  */
606
0
  if (wc_enabled && (conversion || (n_cols > *width)))
607
0
    {
608
0
        if (conversion)
609
0
          {
610
             /* May have increased the size by converting
611
                \t to \uFFFD for example.  */
612
0
            src_size = wcstombs(NULL, str_wc, 0) + 1;
613
0
          }
614
0
        newstr = malloc (src_size);
615
0
        if (newstr == NULL)
616
0
        {
617
0
          if (flags & MBA_UNIBYTE_FALLBACK)
618
0
            goto mbsalign_unibyte;
619
0
          else
620
0
            goto mbsalign_cleanup;
621
0
        }
622
0
        str_to_print = newstr;
623
0
        n_cols = wc_truncate (str_wc, *width);
624
0
        n_used_bytes = wcstombs (newstr, str_wc, src_size);
625
0
    }
626
627
0
mbsalign_unibyte:
628
0
#endif
629
630
0
  if (n_cols > *width) /* Unibyte truncation required.  */
631
0
    {
632
0
      n_cols = *width;
633
0
      n_used_bytes = n_cols;
634
0
    }
635
636
0
  if (*width > n_cols) /* Padding required.  */
637
0
    n_spaces = *width - n_cols;
638
639
  /* indicate to caller how many cells needed (not including padding).  */
640
0
  *width = n_cols;
641
642
  /* indicate to caller how many bytes needed (not including NUL).  */
643
0
  ret = n_used_bytes + (n_spaces * 1);
644
645
  /* Write as much NUL terminated output to DEST as possible.  */
646
0
  if (dest_size != 0)
647
0
    {
648
0
      char *dest_end = dest + dest_size - 1;
649
0
      size_t start_spaces;
650
0
      size_t end_spaces;
651
652
0
      switch (align)
653
0
        {
654
0
        case MBS_ALIGN_CENTER:
655
0
          start_spaces = n_spaces / 2 + n_spaces % 2;
656
0
          end_spaces = n_spaces / 2;
657
0
          break;
658
0
        case MBS_ALIGN_LEFT:
659
0
          start_spaces = 0;
660
0
          end_spaces = n_spaces;
661
0
          break;
662
0
        case MBS_ALIGN_RIGHT:
663
0
          start_spaces = n_spaces;
664
0
          end_spaces = 0;
665
0
          break;
666
0
  default:
667
0
    abort();
668
0
        }
669
670
0
      dest = mbs_align_pad (dest, dest_end, start_spaces, padchar);
671
0
      space_left = dest_end - dest;
672
0
      dest = mempcpy (dest, str_to_print, min (n_used_bytes, space_left));
673
0
      mbs_align_pad (dest, dest_end, end_spaces, padchar);
674
0
    }
675
0
#ifdef HAVE_WIDECHAR
676
0
mbsalign_cleanup:
677
0
#endif
678
0
  free (str_wc);
679
0
  free (newstr);
680
681
0
  return ret;
682
0
}