Coverage Report

Created: 2023-06-07 06:09

/src/util-linux/lib/mbsalign.c
Line
Count
Source (jump to first uncovered line)
1
/* Align/Truncate a string in a given screen width
2
   Copyright (C) 2009-2010 Free Software Foundation, Inc.
3
4
   This program is free software: you can redistribute it and/or modify
5
   it under the terms of the GNU Lesser General Public License as published by
6
   the Free Software Foundation, either version 2.1 of the License, or
7
   (at your option) any later version.
8
9
   This program is distributed in the hope that it will be useful,
10
   but WITHOUT ANY WARRANTY; without even the implied warranty of
11
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
   GNU General Public License for more details.
13
14
   You should have received a copy of the GNU General Public License
15
   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
16
17
/* Written by Pádraig Brady.  */
18
19
#include <stdlib.h>
20
#include <string.h>
21
#include <stdio.h>
22
#include <stdbool.h>
23
#include <limits.h>
24
#include <ctype.h>
25
26
#include "c.h"
27
#include "mbsalign.h"
28
#include "strutils.h"
29
#include "widechar.h"
30
31
/*
32
 * Counts number of cells in multibyte string. All control and
33
 * non-printable chars are ignored.
34
 *
35
 * Returns: number of cells.
36
 */
37
size_t mbs_nwidth(const char *buf, size_t bufsz)
38
0
{
39
0
  const char *p = buf, *last = buf;
40
0
  size_t width = 0;
41
42
0
#ifdef HAVE_WIDECHAR
43
0
  mbstate_t st;
44
0
  memset(&st, 0, sizeof(st));
45
0
#endif
46
0
  if (p && *p && bufsz)
47
0
    last = p + (bufsz - 1);
48
49
0
  while (p && *p && p <= last) {
50
0
    if (iscntrl((unsigned char) *p)) {
51
0
      p++;
52
53
      /* try detect "\e[x;ym" and skip on success */
54
0
      if (*p && *p == '[') {
55
0
        const char *e = p;
56
0
        while (*e && e < last && *e != 'm')
57
0
          e++;
58
0
        if (*e == 'm')
59
0
          p = e + 1;
60
0
      }
61
0
      continue;
62
0
    }
63
0
#ifdef HAVE_WIDECHAR
64
0
    wchar_t wc;
65
0
    size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
66
67
0
    if (len == 0)
68
0
      break;
69
0
    if (len > 0 && iswprint(wc)) {
70
0
      int x = wcwidth(wc);
71
0
      if (x > 0)
72
0
        width += x;
73
0
    } else if (len == (size_t) -1 || len == (size_t) -2)
74
0
      len = 1;
75
0
    p += len;
76
#else
77
    if (isprint((unsigned char) *p))
78
      width++;
79
    p++;
80
#endif
81
0
  }
82
83
0
  return width;
84
0
}
85
86
size_t mbs_width(const char *s)
87
0
{
88
0
  if (!s || !*s)
89
0
    return 0;
90
0
  return mbs_nwidth(s, strlen(s));
91
0
}
92
93
/*
94
 * Counts number of cells in multibyte string. For all control and
95
 * non-printable chars is the result width enlarged to store \x?? hex
96
 * sequence. See mbs_safe_encode().
97
 *
98
 * Returns: number of cells, @sz returns number of bytes.
99
 */
100
size_t mbs_safe_nwidth(const char *buf, size_t bufsz, size_t *sz)
101
0
{
102
0
  const char *p = buf, *last = buf;
103
0
  size_t width = 0, bytes = 0;
104
105
0
#ifdef HAVE_WIDECHAR
106
0
  mbstate_t st;
107
0
  memset(&st, 0, sizeof(st));
108
0
#endif
109
0
  if (p && *p && bufsz)
110
0
    last = p + (bufsz - 1);
111
112
0
  while (p && *p && p <= last) {
113
0
    if ((p < last && *p == '\\' && *(p + 1) == 'x')
114
0
        || iscntrl((unsigned char) *p)) {
115
0
      width += 4, bytes += 4;   /* *p encoded to \x?? */
116
0
      p++;
117
0
    }
118
0
#ifdef HAVE_WIDECHAR
119
0
    else {
120
0
      wchar_t wc;
121
0
      size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
122
123
0
      if (len == 0)
124
0
        break;
125
126
0
      if (len == (size_t) -1 || len == (size_t) -2) {
127
0
        len = 1;
128
0
        if (isprint((unsigned char) *p))
129
0
          width += 1, bytes += 1;
130
0
        else
131
0
          width += 4, bytes += 4;
132
133
0
      } else if (!iswprint(wc)) {
134
0
        width += len * 4; /* hex encode whole sequence */
135
0
        bytes += len * 4;
136
0
      } else {
137
0
        width += wcwidth(wc); /* number of cells */
138
0
        bytes += len;   /* number of bytes */
139
0
      }
140
0
      p += len;
141
0
    }
142
#else
143
    else if (!isprint((unsigned char) *p)) {
144
      width += 4, bytes += 4;   /* *p encoded to \x?? */
145
      p++;
146
    } else {
147
      width++, bytes++;
148
      p++;
149
    }
150
#endif
151
0
  }
152
153
0
  if (sz)
154
0
    *sz = bytes;
155
0
  return width;
156
0
}
157
158
size_t mbs_safe_width(const char *s)
159
0
{
160
0
  if (!s || !*s)
161
0
    return 0;
162
0
  return mbs_safe_nwidth(s, strlen(s), NULL);
163
0
}
164
165
/*
166
 * Copy @s to @buf and replace control and non-printable chars with
167
 * \x?? hex sequence. The @width returns number of cells. The @safechars
168
 * are not encoded.
169
 *
170
 * The @buf has to be big enough to store mbs_safe_encode_size(strlen(s)))
171
 * bytes.
172
 */
173
char *mbs_safe_encode_to_buffer(const char *s, size_t *width, char *buf, const char *safechars)
174
0
{
175
0
  const char *p = s;
176
0
  char *r;
177
0
  size_t sz = s ? strlen(s) : 0;
178
179
0
#ifdef HAVE_WIDECHAR
180
0
  mbstate_t st;
181
0
  memset(&st, 0, sizeof(st));
182
0
#endif
183
0
  if (!sz || !buf)
184
0
    return NULL;
185
186
0
  r = buf;
187
0
  *width = 0;
188
189
0
  while (p && *p) {
190
0
    if (safechars && strchr(safechars, *p)) {
191
0
      *r++ = *p++;
192
0
      continue;
193
0
    }
194
195
0
    if ((*p == '\\' && *(p + 1) == 'x')
196
0
        || iscntrl((unsigned char) *p)) {
197
0
      sprintf(r, "\\x%02x", (unsigned char) *p);
198
0
      r += 4;
199
0
      *width += 4;
200
0
      p++;
201
0
    }
202
0
#ifdef HAVE_WIDECHAR
203
0
    else {
204
0
      wchar_t wc;
205
0
      size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
206
207
0
      if (len == 0)
208
0
        break;   /* end of string */
209
210
0
      if (len == (size_t) -1 || len == (size_t) -2) {
211
0
        len = 1;
212
        /*
213
         * Not valid multibyte sequence -- maybe it's
214
         * printable char according to the current locales.
215
         */
216
0
        if (!isprint((unsigned char) *p)) {
217
0
          sprintf(r, "\\x%02x", (unsigned char) *p);
218
0
          r += 4;
219
0
          *width += 4;
220
0
        } else {
221
0
          (*width)++;
222
0
          *r++ = *p;
223
0
        }
224
0
      } else if (!iswprint(wc)) {
225
0
        size_t i;
226
0
        for (i = 0; i < len; i++) {
227
0
          sprintf(r, "\\x%02x", (unsigned char) p[i]);
228
0
          r += 4;
229
0
          *width += 4;
230
0
        }
231
0
      } else {
232
0
        memcpy(r, p, len);
233
0
        r += len;
234
0
        *width += wcwidth(wc);
235
0
      }
236
0
      p += len;
237
0
    }
238
#else
239
    else if (!isprint((unsigned char) *p)) {
240
      sprintf(r, "\\x%02x", (unsigned char) *p);
241
      p++;
242
      r += 4;
243
      *width += 4;
244
    } else {
245
      *r++ = *p++;
246
      (*width)++;
247
    }
248
#endif
249
0
  }
250
251
0
  *r = '\0';
252
0
  return buf;
253
0
}
254
255
/*
256
 * Copy @s to @buf and replace broken sequences to \x?? hex sequence. The
257
 * @width returns number of cells. The @safechars are not encoded.
258
 *
259
 * The @buf has to be big enough to store mbs_safe_encode_size(strlen(s)))
260
 * bytes.
261
 */
262
char *mbs_invalid_encode_to_buffer(const char *s, size_t *width, char *buf)
263
0
{
264
0
  const char *p = s;
265
0
  char *r;
266
0
  size_t sz = s ? strlen(s) : 0;
267
268
0
#ifdef HAVE_WIDECHAR
269
0
  mbstate_t st;
270
0
  memset(&st, 0, sizeof(st));
271
0
#endif
272
0
  if (!sz || !buf)
273
0
    return NULL;
274
275
0
  r = buf;
276
0
  *width = 0;
277
278
0
  while (p && *p) {
279
0
#ifdef HAVE_WIDECHAR
280
0
    wchar_t wc;
281
0
    size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
282
#else
283
    size_t len = 1;
284
#endif
285
286
0
    if (len == 0)
287
0
      break;   /* end of string */
288
289
0
    if (len == (size_t) -1 || len == (size_t) -2) {
290
0
      len = 1;
291
      /*
292
       * Not valid multibyte sequence -- maybe it's
293
       * printable char according to the current locales.
294
       */
295
0
      if (!isprint((unsigned char) *p)) {
296
0
        sprintf(r, "\\x%02x", (unsigned char) *p);
297
0
        r += 4;
298
0
        *width += 4;
299
0
      } else {
300
0
        (*width)++;
301
0
        *r++ = *p;
302
0
      }
303
0
    } else if (*p == '\\' && *(p + 1) == 'x') {
304
0
      sprintf(r, "\\x%02x", (unsigned char) *p);
305
0
      r += 4;
306
0
      *width += 4;
307
0
    } else {
308
0
      r = mempcpy(r, p, len);
309
0
      *width += wcwidth(wc);
310
0
    }
311
0
    p += len;
312
0
  }
313
314
0
  *r = '\0';
315
0
  return buf;
316
0
}
317
318
size_t mbs_safe_encode_size(size_t bytes)
319
0
{
320
0
  return (bytes * 4) + 1;
321
0
}
322
323
/*
324
 * Returns allocated string where all control and non-printable chars are
325
 * replaced with \x?? hex sequence.
326
 */
327
char *mbs_safe_encode(const char *s, size_t *width)
328
0
{
329
0
  size_t sz = s ? strlen(s) : 0;
330
0
  char *buf, *ret = NULL;
331
332
0
  if (!sz)
333
0
    return NULL;
334
0
  buf = malloc(mbs_safe_encode_size(sz));
335
0
  if (buf)
336
0
    ret = mbs_safe_encode_to_buffer(s, width, buf, NULL);
337
0
  if (!ret)
338
0
    free(buf);
339
0
  return ret;
340
0
}
341
342
/*
343
 * Returns allocated string where all broken widechars chars are
344
 * replaced with \x?? hex sequence.
345
 */
346
char *mbs_invalid_encode(const char *s, size_t *width)
347
0
{
348
0
  size_t sz = s ? strlen(s) : 0;
349
0
  char *buf, *ret = NULL;
350
351
0
  if (!sz)
352
0
    return NULL;
353
0
  buf = malloc(mbs_safe_encode_size(sz));
354
0
  if (buf)
355
0
    ret = mbs_invalid_encode_to_buffer(s, width, buf);
356
0
  if (!ret)
357
0
    free(buf);
358
0
  return ret;
359
0
}
360
361
#ifdef HAVE_WIDECHAR
362
363
static bool
364
wc_ensure_printable (wchar_t *wchars)
365
0
{
366
0
  bool replaced = false;
367
0
  wchar_t *wc = wchars;
368
0
  while (*wc)
369
0
    {
370
0
      if (!iswprint ((wint_t) *wc))
371
0
        {
372
0
          *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */
373
0
          replaced = true;
374
0
        }
375
0
      wc++;
376
0
    }
377
0
  return replaced;
378
0
}
379
380
/* Truncate wchar string to width cells.
381
 * Returns number of cells used.  */
382
383
static size_t
384
wc_truncate (wchar_t *wc, size_t width)
385
0
{
386
0
  size_t cells = 0;
387
0
  int next_cells = 0;
388
389
0
  while (*wc)
390
0
    {
391
0
      next_cells = wcwidth (*wc);
392
0
      if (next_cells == -1) /* non printable */
393
0
        {
394
0
          *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */
395
0
          next_cells = 1;
396
0
        }
397
0
      if (cells + next_cells > width)
398
0
        break;
399
400
0
      cells += next_cells;
401
0
      wc++;
402
0
    }
403
0
  *wc = L'\0';
404
0
  return cells;
405
0
}
406
407
static int
408
rpl_wcswidth (const wchar_t *s, size_t n)
409
0
{
410
0
  int ret = 0;
411
412
0
  while (n-- > 0 && *s != L'\0')
413
0
    {
414
0
      int nwidth = wcwidth (*s++);
415
0
      if (nwidth == -1)             /* non printable */
416
0
        return -1;
417
0
      if (ret > (INT_MAX - nwidth)) /* overflow */
418
0
        return -1;
419
0
      ret += nwidth;
420
0
    }
421
422
0
  return ret;
423
0
}
424
#endif /* HAVE_WIDECHAR */
425
426
/* Truncate multi-byte string to @width and returns number of
427
 * bytes of the new string @str, and in @width returns number
428
 * of cells.
429
 */
430
size_t
431
mbs_truncate(char *str, size_t *width)
432
0
{
433
0
  ssize_t bytes = strlen(str);
434
0
#ifdef HAVE_WIDECHAR
435
0
  ssize_t sz = mbstowcs(NULL, str, 0);
436
0
  wchar_t *wcs = NULL;
437
438
0
  if (sz == (ssize_t) -1)
439
0
    goto done;
440
441
0
  wcs = calloc(1, (sz + 1) * sizeof(wchar_t));
442
0
  if (!wcs)
443
0
    goto done;
444
445
0
  if (!mbstowcs(wcs, str, sz))
446
0
    goto done;
447
0
  *width = wc_truncate(wcs, *width);
448
0
  bytes = wcstombs(str, wcs, bytes);
449
0
done:
450
0
  free(wcs);
451
#else
452
  if (bytes >= 0 && *width < (size_t) bytes)
453
    bytes = *width;
454
#endif
455
0
  if (bytes >= 0)
456
0
    str[bytes] = '\0';
457
0
  return bytes;
458
0
}
459
460
/* Write N_SPACES space characters to DEST while ensuring
461
   nothing is written beyond DEST_END. A terminating NUL
462
   is always added to DEST.
463
   A pointer to the terminating NUL is returned.  */
464
465
static char*
466
mbs_align_pad (char *dest, const char* dest_end, size_t n_spaces, int padchar)
467
0
{
468
0
  for (/* nothing */; n_spaces && (dest < dest_end); n_spaces--)
469
0
    *dest++ = padchar;
470
0
  *dest = '\0';
471
0
  return dest;
472
0
}
473
474
size_t
475
mbsalign (const char *src, char *dest, size_t dest_size,
476
          size_t *width, mbs_align_t align, int flags)
477
0
{
478
0
  return mbsalign_with_padding(src, dest, dest_size, width, align, flags, ' ');
479
0
}
480
481
/* Align a string, SRC, in a field of *WIDTH columns, handling multi-byte
482
   characters; write the result into the DEST_SIZE-byte buffer, DEST.
483
   ALIGNMENT specifies whether to left- or right-justify or to center.
484
   If SRC requires more than *WIDTH columns, truncate it to fit.
485
   When centering, the number of trailing spaces may be one less than the
486
   number of leading spaces. The FLAGS parameter is unused at present.
487
   Return the length in bytes required for the final result, not counting
488
   the trailing NUL.  A return value of DEST_SIZE or larger means there
489
   wasn't enough space.  DEST will be NUL terminated in any case.
490
   Return (size_t) -1 upon error (invalid multi-byte sequence in SRC,
491
   or malloc failure), unless MBA_UNIBYTE_FALLBACK is specified.
492
   Update *WIDTH to indicate how many columns were used before padding.  */
493
494
size_t
495
mbsalign_with_padding (const char *src, char *dest, size_t dest_size,
496
                 size_t *width, mbs_align_t align,
497
#ifdef HAVE_WIDECHAR
498
           int flags,
499
#else
500
           int flags __attribute__((__unused__)),
501
#endif
502
           int padchar)
503
0
{
504
0
  size_t ret = -1;
505
0
  size_t src_size = strlen (src) + 1;
506
0
  char *newstr = NULL;
507
0
  wchar_t *str_wc = NULL;
508
0
  const char *str_to_print = src;
509
0
  size_t n_cols = src_size - 1;
510
0
  size_t n_used_bytes = n_cols; /* Not including NUL */
511
0
  size_t n_spaces = 0, space_left;
512
513
0
#ifdef HAVE_WIDECHAR
514
0
  bool conversion = false;
515
0
  bool wc_enabled = false;
516
517
  /* In multi-byte locales convert to wide characters
518
     to allow easy truncation. Also determine number
519
     of screen columns used.  */
520
0
  if (MB_CUR_MAX > 1)
521
0
    {
522
0
      size_t src_chars = mbstowcs (NULL, src, 0);
523
0
      if (src_chars == (size_t) -1)
524
0
        {
525
0
          if (flags & MBA_UNIBYTE_FALLBACK)
526
0
            goto mbsalign_unibyte;
527
0
          else
528
0
            goto mbsalign_cleanup;
529
0
        }
530
0
      src_chars += 1; /* make space for NUL */
531
0
      str_wc = malloc (src_chars * sizeof (wchar_t));
532
0
      if (str_wc == NULL)
533
0
        {
534
0
          if (flags & MBA_UNIBYTE_FALLBACK)
535
0
            goto mbsalign_unibyte;
536
0
          else
537
0
            goto mbsalign_cleanup;
538
0
        }
539
0
      if (mbstowcs (str_wc, src, src_chars) != 0)
540
0
        {
541
0
          str_wc[src_chars - 1] = L'\0';
542
0
          wc_enabled = true;
543
0
          conversion = wc_ensure_printable (str_wc);
544
0
          n_cols = rpl_wcswidth (str_wc, src_chars);
545
0
        }
546
0
    }
547
548
  /* If we transformed or need to truncate the source string
549
     then create a modified copy of it.  */
550
0
  if (wc_enabled && (conversion || (n_cols > *width)))
551
0
    {
552
0
        if (conversion)
553
0
          {
554
             /* May have increased the size by converting
555
                \t to \uFFFD for example.  */
556
0
            src_size = wcstombs(NULL, str_wc, 0) + 1;
557
0
          }
558
0
        newstr = malloc (src_size);
559
0
        if (newstr == NULL)
560
0
        {
561
0
          if (flags & MBA_UNIBYTE_FALLBACK)
562
0
            goto mbsalign_unibyte;
563
0
          else
564
0
            goto mbsalign_cleanup;
565
0
        }
566
0
        str_to_print = newstr;
567
0
        n_cols = wc_truncate (str_wc, *width);
568
0
        n_used_bytes = wcstombs (newstr, str_wc, src_size);
569
0
    }
570
571
0
mbsalign_unibyte:
572
0
#endif
573
574
0
  if (n_cols > *width) /* Unibyte truncation required.  */
575
0
    {
576
0
      n_cols = *width;
577
0
      n_used_bytes = n_cols;
578
0
    }
579
580
0
  if (*width > n_cols) /* Padding required.  */
581
0
    n_spaces = *width - n_cols;
582
583
  /* indicate to caller how many cells needed (not including padding).  */
584
0
  *width = n_cols;
585
586
  /* indicate to caller how many bytes needed (not including NUL).  */
587
0
  ret = n_used_bytes + (n_spaces * 1);
588
589
  /* Write as much NUL terminated output to DEST as possible.  */
590
0
  if (dest_size != 0)
591
0
    {
592
0
      char *dest_end = dest + dest_size - 1;
593
0
      size_t start_spaces;
594
0
      size_t end_spaces;
595
596
0
      switch (align)
597
0
        {
598
0
        case MBS_ALIGN_CENTER:
599
0
          start_spaces = n_spaces / 2 + n_spaces % 2;
600
0
          end_spaces = n_spaces / 2;
601
0
          break;
602
0
        case MBS_ALIGN_LEFT:
603
0
          start_spaces = 0;
604
0
          end_spaces = n_spaces;
605
0
          break;
606
0
        case MBS_ALIGN_RIGHT:
607
0
          start_spaces = n_spaces;
608
0
          end_spaces = 0;
609
0
          break;
610
0
  default:
611
0
    abort();
612
0
        }
613
614
0
      dest = mbs_align_pad (dest, dest_end, start_spaces, padchar);
615
0
      space_left = dest_end - dest;
616
0
      dest = mempcpy (dest, str_to_print, min (n_used_bytes, space_left));
617
0
      mbs_align_pad (dest, dest_end, end_spaces, padchar);
618
0
    }
619
0
#ifdef HAVE_WIDECHAR
620
0
mbsalign_cleanup:
621
0
#endif
622
0
  free (str_wc);
623
0
  free (newstr);
624
625
0
  return ret;
626
0
}