Coverage Report

Created: 2026-04-11 06:29

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/util-linux/lib/mbsalign.c
Line
Count
Source
1
/*
2
 * SPDX-License-Identifier: LGPL-2.1-or-later
3
 *
4
 * Align/Truncate a string in a given screen width
5
 * Copyright (C) 2009-2010 Free Software Foundation, Inc.
6
 *
7
 * This program is free software: you can redistribute it and/or modify it
8
 * under the terms of the GNU Lesser General Public License as published by the
9
 * Free Software Foundation, either version 2.1 of the License, or (at your
10
 * option) any later version.
11
 *
12
 * Written by Pádraig Brady.
13
 */
14
#include <stdlib.h>
15
#include <string.h>
16
#include <stdio.h>
17
#include <stdbool.h>
18
#include <limits.h>
19
#include <ctype.h>
20
21
#include "c.h"
22
#include "mbsalign.h"
23
#include "strutils.h"
24
#include "widechar.h"
25
26
/*
27
 * Counts number of cells in multibyte string. All control and
28
 * non-printable chars are ignored.
29
 *
30
 * Returns: number of cells.
31
 */
32
size_t mbs_nwidth(const char *buf, size_t bufsz)
33
0
{
34
0
  const char *p = buf, *last = buf;
35
0
  size_t width = 0;
36
37
0
#ifdef HAVE_WIDECHAR
38
0
  mbstate_t st;
39
0
  memset(&st, 0, sizeof(st));
40
0
#endif
41
0
  if (p && *p && bufsz)
42
0
    last = p + (bufsz - 1);
43
44
0
  while (p && *p && p <= last) {
45
0
    if (*p == '\033') {
46
0
      p++;
47
48
      /* try detect "\e[x;ym" and skip on success */
49
0
      if (*p && *p == '[') {
50
0
        const char *e = p;
51
0
        while (*e && e < last && *e != 'm')
52
0
          e++;
53
0
        if (*e == 'm')
54
0
          p = e + 1;
55
0
      }
56
      /* try detect SCS sequences "\e(X", "\e)X", "\e*X", "\e+X" and skip on success */
57
0
      else if (p < last && (*p == '(' || *p == ')' || *p == '*' || *p == '+'))
58
0
        p += 2;  /* skip the SCS sequence */
59
0
      continue;
60
0
    }
61
0
    if (iscntrl((unsigned char) *p)) {
62
0
      p++;
63
0
      continue;
64
0
    }
65
0
#ifdef HAVE_WIDECHAR
66
0
    wchar_t wc;
67
0
    size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
68
69
0
    if (len == 0)
70
0
      break;
71
0
    if (len > 0 && iswprint(wc)) {
72
0
      int x = wcwidth(wc);
73
0
      if (x > 0)
74
0
        width += x;
75
0
    } else if (len == (size_t) -1 || len == (size_t) -2)
76
0
      len = 1;
77
0
    p += len;
78
#else
79
    if (isprint((unsigned char) *p))
80
      width++;
81
    p++;
82
#endif
83
0
  }
84
85
0
  return width;
86
0
}
87
88
size_t mbs_width(const char *s)
89
0
{
90
0
  if (!s || !*s)
91
0
    return 0;
92
0
  return mbs_nwidth(s, strlen(s));
93
0
}
94
95
/*
96
 * Counts number of cells in multibyte string. For all control and
97
 * non-printable chars is the result width enlarged to store \x?? hex
98
 * sequence. See mbs_safe_encode().
99
 *
100
 * Returns: number of cells, @sz returns number of bytes.
101
 */
102
size_t mbs_safe_nwidth(const char *buf, size_t bufsz, size_t *sz)
103
0
{
104
0
  const char *p = buf, *last = buf;
105
0
  size_t width = 0, bytes = 0;
106
107
0
#ifdef HAVE_WIDECHAR
108
0
  mbstate_t st;
109
0
  memset(&st, 0, sizeof(st));
110
0
#endif
111
0
  if (p && *p && bufsz)
112
0
    last = p + (bufsz - 1);
113
114
0
  while (p && *p && p <= last) {
115
0
    if ((p < last && *p == '\\' && *(p + 1) == 'x')
116
0
        || iscntrl((unsigned char) *p)) {
117
0
      width += 4, bytes += 4;   /* *p encoded to \x?? */
118
0
      p++;
119
0
    }
120
0
#ifdef HAVE_WIDECHAR
121
0
    else {
122
0
      wchar_t wc;
123
0
      size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
124
125
0
      if (len == 0)
126
0
        break;
127
128
0
      if (len == (size_t) -1 || len == (size_t) -2) {
129
0
        len = 1;
130
0
        if (isprint((unsigned char) *p))
131
0
          width += 1, bytes += 1;
132
0
        else
133
0
          width += 4, bytes += 4;
134
135
0
      } else if (!iswprint(wc)) {
136
0
        width += len * 4; /* hex encode whole sequence */
137
0
        bytes += len * 4;
138
0
      } else {
139
0
        width += wcwidth(wc); /* number of cells */
140
0
        bytes += len;   /* number of bytes */
141
0
      }
142
0
      p += len;
143
0
    }
144
#else
145
    else if (!isprint((unsigned char) *p)) {
146
      width += 4, bytes += 4;   /* *p encoded to \x?? */
147
      p++;
148
    } else {
149
      width++, bytes++;
150
      p++;
151
    }
152
#endif
153
0
  }
154
155
0
  if (sz)
156
0
    *sz = bytes;
157
0
  return width;
158
0
}
159
160
size_t mbs_safe_width(const char *s)
161
0
{
162
0
  if (!s || !*s)
163
0
    return 0;
164
0
  return mbs_safe_nwidth(s, strlen(s), NULL);
165
0
}
166
167
/*
168
 * Copy @s to @buf and replace control and non-printable chars with
169
 * \x?? hex sequence. The @width returns number of cells. The @safechars
170
 * are not encoded.
171
 *
172
 * The @buf has to be big enough to store mbs_safe_encode_size(strlen(s)))
173
 * bytes.
174
 */
175
char *mbs_safe_encode_to_buffer(const char *s, size_t *width, char *buf, size_t bufsiz, const char *safechars)
176
0
{
177
0
  const char *p = s;
178
0
  char *r;
179
0
  int rsz;
180
0
  size_t sz = s ? strlen(s) : 0;
181
182
0
#ifdef HAVE_WIDECHAR
183
0
  mbstate_t st;
184
0
  memset(&st, 0, sizeof(st));
185
0
#endif
186
0
  if (!sz || !buf || !bufsiz)
187
0
    return NULL;
188
189
0
  r = buf;
190
0
  rsz = (int) bufsiz;
191
0
  *width = 0;
192
193
0
  while (p && *p) {
194
0
    if (safechars && strchr(safechars, *p)) {
195
0
      if (rsz < 2)
196
0
        break;
197
0
      *r++ = *p++;
198
0
      rsz--;
199
0
      continue;
200
0
    }
201
202
0
    if ((*p == '\\' && *(p + 1) == 'x')
203
0
        || iscntrl((unsigned char) *p)) {
204
0
      if (snprintf(r, rsz, "\\x%02x", (unsigned char) *p) < 4)
205
0
        break;
206
0
      r += 4;
207
0
      rsz -= 4;
208
0
      *width += 4;
209
0
      p++;
210
0
    }
211
0
#ifdef HAVE_WIDECHAR
212
0
    else {
213
0
      wchar_t wc;
214
0
      size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
215
216
0
      if (len == 0)
217
0
        break;   /* end of string */
218
219
0
      if (len == (size_t) -1 || len == (size_t) -2) {
220
0
        len = 1;
221
        /*
222
         * Not valid multibyte sequence -- maybe it's
223
         * printable char according to the current locales.
224
         */
225
0
        if (!isprint((unsigned char) *p)) {
226
0
          if (snprintf(r, rsz, "\\x%02x", (unsigned char) *p) < 4)
227
0
            break;
228
0
          r += 4;
229
0
          rsz -= 4;
230
0
          *width += 4;
231
0
        } else {
232
0
          if (rsz < 2)
233
0
            break;
234
0
          (*width)++;
235
0
          *r++ = *p;
236
0
          rsz--;
237
0
        }
238
0
      } else if (!iswprint(wc)) {
239
0
        size_t i;
240
0
        for (i = 0; i < len; i++) {
241
0
          if (snprintf(r, rsz, "\\x%02x", (unsigned char) p[i]) < 4)
242
0
            break;
243
0
          r += 4;
244
0
          rsz -= 4;
245
0
          *width += 4;
246
0
        }
247
0
        if (i < len)
248
0
          break;
249
0
      } else {
250
0
        if (rsz < (int)len + 1)
251
0
          break;
252
0
        memcpy(r, p, len);
253
0
        r += len;
254
0
        rsz -= len;
255
0
        *width += wcwidth(wc);
256
0
      }
257
0
      p += len;
258
0
    }
259
#else
260
    else if (!isprint((unsigned char) *p)) {
261
      if (snprintf(r, rsz, "\\x%02x", (unsigned char) *p) < 4)
262
        break;
263
      p++;
264
      r += 4;
265
      rsz -= 4;
266
      *width += 4;
267
    } else {
268
      if (rsz < 2)
269
        break;
270
      *r++ = *p++;
271
      rsz--;
272
      (*width)++;
273
    }
274
#endif
275
0
  }
276
277
0
  *r = '\0';
278
0
  return buf;
279
0
}
280
281
/*
282
 * Copy @s to @buf and replace broken sequences to \x?? hex sequence. The
283
 * @width returns number of cells. The @safechars are not encoded.
284
 *
285
 * The @buf has to be big enough to store mbs_safe_encode_size(strlen(s)))
286
 * bytes.
287
 */
288
char *mbs_invalid_encode_to_buffer(const char *s, size_t *width, char *buf, size_t bufsiz)
289
0
{
290
0
  const char *p = s;
291
0
  char *r;
292
0
  int rsz;
293
0
  size_t sz = s ? strlen(s) : 0;
294
295
0
#ifdef HAVE_WIDECHAR
296
0
  mbstate_t st;
297
0
  memset(&st, 0, sizeof(st));
298
0
#endif
299
0
  if (!sz || !buf || !bufsiz)
300
0
    return NULL;
301
302
0
  r = buf;
303
0
  rsz = (int) bufsiz;
304
0
  *width = 0;
305
306
0
  while (p && *p) {
307
0
#ifdef HAVE_WIDECHAR
308
0
    wchar_t wc;
309
0
    size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
310
#else
311
    size_t len = 1;
312
#endif
313
314
0
    if (len == 0)
315
0
      break;   /* end of string */
316
317
0
    if (len == (size_t) -1 || len == (size_t) -2) {
318
0
      len = 1;
319
      /*
320
       * Not valid multibyte sequence -- maybe it's
321
       * printable char according to the current locales.
322
       */
323
0
      if (!isprint((unsigned char) *p)) {
324
0
        if (snprintf(r, rsz, "\\x%02x", (unsigned char) *p) < 4)
325
0
          break;
326
0
        r += 4;
327
0
        rsz -= 4;
328
0
        *width += 4;
329
0
      } else {
330
0
        if (rsz < 2)
331
0
          break;
332
0
        (*width)++;
333
0
        *r++ = *p;
334
0
        rsz--;
335
0
      }
336
0
    } else if (*p == '\\' && *(p + 1) == 'x') {
337
0
      if (snprintf(r, rsz, "\\x%02x", (unsigned char) *p) < 4)
338
0
        break;
339
0
      r += 4;
340
0
      rsz -= 4;
341
0
      *width += 4;
342
0
    } else {
343
0
      if (rsz < (int)len + 1)
344
0
        break;
345
0
      r = mempcpy(r, p, len);
346
0
      rsz -= len;
347
0
      *width += wcwidth(wc);
348
0
    }
349
0
    p += len;
350
0
  }
351
352
0
  *r = '\0';
353
0
  return buf;
354
0
}
355
356
/*
357
 * Guess size
358
 */
359
size_t mbs_safe_encode_size(size_t bytes)
360
0
{
361
0
  return (bytes * 4) + 1;
362
0
}
363
364
/*
365
 * Count size of the original string in bytes (count \x?? as one byte)
366
 */
367
size_t mbs_safe_decode_size(const char *p)
368
0
{
369
0
  size_t bytes = 0;
370
371
0
  while (p && *p) {
372
0
    if (*p == '\\' && *(p + 1) == 'x' &&
373
0
        isxdigit(*(p + 2)) && isxdigit(*(p + 3)))
374
0
      p += 4;
375
0
    else
376
0
      p++;
377
0
    bytes++;
378
0
  }
379
0
  return bytes;
380
0
}
381
382
/*
383
 * Returns allocated string where all control and non-printable chars are
384
 * replaced with \x?? hex sequence.
385
 */
386
char *mbs_safe_encode(const char *s, size_t *width)
387
0
{
388
0
  size_t sz = s ? strlen(s) : 0;
389
0
  size_t bufsz;
390
0
  char *buf, *ret = NULL;
391
392
0
  if (!sz)
393
0
    return NULL;
394
0
  bufsz = mbs_safe_encode_size(sz);
395
0
  buf = malloc(bufsz);
396
0
  if (buf)
397
0
    ret = mbs_safe_encode_to_buffer(s, width, buf, bufsz, NULL);
398
0
  if (!ret)
399
0
    free(buf);
400
0
  return ret;
401
0
}
402
403
/*
404
 * Returns allocated string where all broken widechars chars are
405
 * replaced with \x?? hex sequence.
406
 */
407
char *mbs_invalid_encode(const char *s, size_t *width)
408
0
{
409
0
  size_t sz = s ? strlen(s) : 0;
410
0
  size_t bufsz;
411
0
  char *buf, *ret = NULL;
412
413
0
  if (!sz)
414
0
    return NULL;
415
0
  bufsz = mbs_safe_encode_size(sz);
416
0
  buf = malloc(bufsz);
417
0
  if (buf)
418
0
    ret = mbs_invalid_encode_to_buffer(s, width, buf, bufsz);
419
0
  if (!ret)
420
0
    free(buf);
421
0
  return ret;
422
0
}
423
424
#ifdef HAVE_WIDECHAR
425
426
static bool
427
wc_ensure_printable (wchar_t *wchars)
428
0
{
429
0
  bool replaced = false;
430
0
  wchar_t *wc = wchars;
431
0
  while (*wc)
432
0
    {
433
0
      if (!iswprint ((wint_t) *wc))
434
0
        {
435
0
          *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */
436
0
          replaced = true;
437
0
        }
438
0
      wc++;
439
0
    }
440
0
  return replaced;
441
0
}
442
443
/* Truncate wchar string to width cells.
444
 * Returns number of cells used.  */
445
446
static size_t
447
wc_truncate (wchar_t *wc, size_t width)
448
0
{
449
0
  size_t cells = 0;
450
0
  int next_cells = 0;
451
452
0
  while (*wc)
453
0
    {
454
0
      next_cells = wcwidth (*wc);
455
0
      if (next_cells == -1) /* non printable */
456
0
        {
457
0
          *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */
458
0
          next_cells = 1;
459
0
        }
460
0
      if (cells + next_cells > width)
461
0
        break;
462
463
0
      cells += next_cells;
464
0
      wc++;
465
0
    }
466
0
  *wc = L'\0';
467
0
  return cells;
468
0
}
469
470
static int
471
rpl_wcswidth (const wchar_t *s, size_t n)
472
0
{
473
0
  int ret = 0;
474
475
0
  while (n-- > 0 && *s != L'\0')
476
0
    {
477
0
      int nwidth = wcwidth (*s++);
478
0
      if (nwidth == -1)             /* non printable */
479
0
        return -1;
480
0
      if (ret > (INT_MAX - nwidth)) /* overflow */
481
0
        return -1;
482
0
      ret += nwidth;
483
0
    }
484
485
0
  return ret;
486
0
}
487
#endif /* HAVE_WIDECHAR */
488
489
/* Truncate multi-byte string to @width and returns number of
490
 * bytes of the new string @str, and in @width returns number
491
 * of cells.
492
 */
493
size_t
494
mbs_truncate(char *str, size_t *width)
495
0
{
496
0
  ssize_t bytes = strlen(str);
497
0
#ifdef HAVE_WIDECHAR
498
0
  ssize_t sz = mbstowcs(NULL, str, 0);
499
0
  wchar_t *wcs = NULL;
500
501
0
  if (sz == (ssize_t) -1)
502
0
    goto done;
503
504
0
  wcs = calloc(1, (sz + 1) * sizeof(wchar_t));
505
0
  if (!wcs)
506
0
    goto done;
507
508
0
  if (!mbstowcs(wcs, str, sz))
509
0
    goto done;
510
0
  *width = wc_truncate(wcs, *width);
511
0
  bytes = wcstombs(str, wcs, bytes);
512
0
done:
513
0
  free(wcs);
514
#else
515
  if (bytes >= 0 && *width < (size_t) bytes)
516
    bytes = *width;
517
#endif
518
0
  if (bytes >= 0)
519
0
    str[bytes] = '\0';
520
0
  return bytes;
521
0
}
522
523
/* Write N_SPACES space characters to DEST while ensuring
524
   nothing is written beyond DEST_END. A terminating NUL
525
   is always added to DEST.
526
   A pointer to the terminating NUL is returned.  */
527
528
static char*
529
mbs_align_pad (char *dest, const char* dest_end, size_t n_spaces, int padchar)
530
0
{
531
0
  for (/* nothing */; n_spaces && (dest < dest_end); n_spaces--)
532
0
    *dest++ = padchar;
533
0
  *dest = '\0';
534
0
  return dest;
535
0
}
536
537
size_t
538
mbsalign (const char *src, char *dest, size_t dest_size,
539
          size_t *width, mbs_align_t align, int flags)
540
0
{
541
0
  return mbsalign_with_padding(src, dest, dest_size, width, align, flags, ' ');
542
0
}
543
544
/* Align a string, SRC, in a field of *WIDTH columns, handling multi-byte
545
   characters; write the result into the DEST_SIZE-byte buffer, DEST.
546
   ALIGNMENT specifies whether to left- or right-justify or to center.
547
   If SRC requires more than *WIDTH columns, truncate it to fit.
548
   When centering, the number of trailing spaces may be one less than the
549
   number of leading spaces. The FLAGS parameter is unused at present.
550
   Return the length in bytes required for the final result, not counting
551
   the trailing NUL.  A return value of DEST_SIZE or larger means there
552
   wasn't enough space.  DEST will be NUL terminated in any case.
553
   Return (size_t) -1 upon error (invalid multi-byte sequence in SRC,
554
   or malloc failure), unless MBA_UNIBYTE_FALLBACK is specified.
555
   Update *WIDTH to indicate how many columns were used before padding.  */
556
557
size_t
558
mbsalign_with_padding (const char *src, char *dest, size_t dest_size,
559
                 size_t *width, mbs_align_t align,
560
#ifdef HAVE_WIDECHAR
561
           int flags,
562
#else
563
           int flags __attribute__((__unused__)),
564
#endif
565
           int padchar)
566
0
{
567
0
  size_t ret = -1;
568
0
  size_t src_size = strlen (src) + 1;
569
0
  char *newstr = NULL;
570
0
  wchar_t *str_wc = NULL;
571
0
  const char *str_to_print = src;
572
0
  size_t n_cols = src_size - 1;
573
0
  size_t n_used_bytes = n_cols; /* Not including NUL */
574
0
  size_t n_spaces = 0, space_left;
575
576
0
#ifdef HAVE_WIDECHAR
577
0
  bool conversion = false;
578
0
  bool wc_enabled = false;
579
580
  /* In multi-byte locales convert to wide characters
581
     to allow easy truncation. Also determine number
582
     of screen columns used.  */
583
0
  if (MB_CUR_MAX > 1)
584
0
    {
585
0
      size_t src_chars = mbstowcs (NULL, src, 0);
586
0
      if (src_chars == (size_t) -1)
587
0
        {
588
0
          if (flags & MBA_UNIBYTE_FALLBACK)
589
0
            goto mbsalign_unibyte;
590
0
          else
591
0
            goto mbsalign_cleanup;
592
0
        }
593
0
      src_chars += 1; /* make space for NUL */
594
0
      str_wc = malloc (src_chars * sizeof (wchar_t));
595
0
      if (str_wc == NULL)
596
0
        {
597
0
          if (flags & MBA_UNIBYTE_FALLBACK)
598
0
            goto mbsalign_unibyte;
599
0
          else
600
0
            goto mbsalign_cleanup;
601
0
        }
602
0
      if (mbstowcs (str_wc, src, src_chars) != 0)
603
0
        {
604
0
          str_wc[src_chars - 1] = L'\0';
605
0
          wc_enabled = true;
606
0
          conversion = wc_ensure_printable (str_wc);
607
0
          n_cols = rpl_wcswidth (str_wc, src_chars);
608
0
        }
609
0
    }
610
611
  /* If we transformed or need to truncate the source string
612
     then create a modified copy of it.  */
613
0
  if (wc_enabled && (conversion || (n_cols > *width)))
614
0
    {
615
0
        if (conversion)
616
0
          {
617
             /* May have increased the size by converting
618
                \t to \uFFFD for example.  */
619
0
            src_size = wcstombs(NULL, str_wc, 0) + 1;
620
0
          }
621
0
        newstr = malloc (src_size);
622
0
        if (newstr == NULL)
623
0
        {
624
0
          if (flags & MBA_UNIBYTE_FALLBACK)
625
0
            goto mbsalign_unibyte;
626
0
          else
627
0
            goto mbsalign_cleanup;
628
0
        }
629
0
        str_to_print = newstr;
630
0
        n_cols = wc_truncate (str_wc, *width);
631
0
        n_used_bytes = wcstombs (newstr, str_wc, src_size);
632
0
    }
633
634
0
mbsalign_unibyte:
635
0
#endif
636
637
0
  if (n_cols > *width) /* Unibyte truncation required.  */
638
0
    {
639
0
      n_cols = *width;
640
0
      n_used_bytes = n_cols;
641
0
    }
642
643
0
  if (*width > n_cols) /* Padding required.  */
644
0
    n_spaces = *width - n_cols;
645
646
  /* indicate to caller how many cells needed (not including padding).  */
647
0
  *width = n_cols;
648
649
  /* indicate to caller how many bytes needed (not including NUL).  */
650
0
  ret = n_used_bytes + (n_spaces * 1);
651
652
  /* Write as much NUL terminated output to DEST as possible.  */
653
0
  if (dest_size != 0)
654
0
    {
655
0
      char *dest_end = dest + dest_size - 1;
656
0
      size_t start_spaces;
657
0
      size_t end_spaces;
658
659
0
      switch (align)
660
0
        {
661
0
        case MBS_ALIGN_CENTER:
662
0
          start_spaces = n_spaces / 2 + n_spaces % 2;
663
0
          end_spaces = n_spaces / 2;
664
0
          break;
665
0
        case MBS_ALIGN_LEFT:
666
0
          start_spaces = 0;
667
0
          end_spaces = n_spaces;
668
0
          break;
669
0
        case MBS_ALIGN_RIGHT:
670
0
          start_spaces = n_spaces;
671
0
          end_spaces = 0;
672
0
          break;
673
0
  default:
674
0
    abort();
675
0
        }
676
677
0
      dest = mbs_align_pad (dest, dest_end, start_spaces, padchar);
678
0
      space_left = dest_end - dest;
679
0
      dest = mempcpy (dest, str_to_print, min (n_used_bytes, space_left));
680
0
      mbs_align_pad (dest, dest_end, end_spaces, padchar);
681
0
    }
682
0
#ifdef HAVE_WIDECHAR
683
0
mbsalign_cleanup:
684
0
#endif
685
0
  free (str_wc);
686
0
  free (newstr);
687
688
0
  return ret;
689
0
}