Coverage Report

Created: 2026-01-25 07:18

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/gettext-0.26/gettext-tools/libgettextpo/uniwidth/width.c
Line
Count
Source
1
/* Determine display width of Unicode character.
2
   Copyright (C) 2001-2002, 2006-2025 Free Software Foundation, Inc.
3
   Written by Bruno Haible <bruno@clisp.org>, 2002.
4
5
   This file is free software: you can redistribute it and/or modify
6
   it under the terms of the GNU Lesser General Public License as
7
   published by the Free Software Foundation; either version 2.1 of the
8
   License, or (at your option) any later version.
9
10
   This file is distributed in the hope that it will be useful,
11
   but WITHOUT ANY WARRANTY; without even the implied warranty of
12
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
   GNU Lesser General Public License for more details.
14
15
   You should have received a copy of the GNU Lesser General Public License
16
   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
17
18
#include <config.h>
19
20
/* Specification.  */
21
#include "uniwidth.h"
22
23
#include "cjk.h"
24
25
/* The non-spacing attribute table consists of:
26
   * Non-spacing characters; generated from PropList.txt or
27
     "grep '^[^;]*;[^;]*;[^;]*;[^;]*;NSM;' UnicodeData.txt"
28
   * Format control characters; generated from
29
     "grep '^[^;]*;[^;]*;Cf;' UnicodeData.txt"
30
   * Zero width characters; generated from
31
     "grep '^[^;]*;ZERO WIDTH ' UnicodeData.txt"
32
   * Hangul Jamo characters that have conjoining behaviour:
33
       - jungseong = syllable-middle vowels
34
       - jongseong = syllable-final consonants
35
     Rationale:
36
     1) These characters act like combining characters. They have no
37
     equivalent in legacy character sets. Therefore the EastAsianWidth.txt
38
     file does not really matter for them; UAX #11 East Asian Width
39
     <https://www.unicode.org/reports/tr11/> makes it clear that it focus
40
     is on compatibility with traditional Japanese layout.
41
     By contrast, the same glyphs without conjoining behaviour are available
42
     in the U+3130..U+318F block, and these characters are mapped to legacy
43
     character sets, and traditional Japanese layout matters for them.
44
     2) glibc does the same thing, see
45
     <https://sourceware.org/bugzilla/show_bug.cgi?id=21750>
46
     <https://sourceware.org/bugzilla/show_bug.cgi?id=26120>
47
 */
48
#include "uniwidth/width0.h"
49
50
#include "uniwidth/width2.h"
51
#include "unictype/bitmap.h"
52
53
4.33M
#define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
54
55
56
/* Determine number of column positions required for UC.  */
57
int
58
uc_width (ucs4_t uc, const char *encoding)
59
4.33M
{
60
  /* Test for non-spacing or control character.  */
61
4.33M
  if ((uc >> 9) < SIZEOF (nonspacing_table_ind))
62
4.33M
    {
63
4.33M
      int ind = nonspacing_table_ind[uc >> 9];
64
4.33M
      if (ind >= 0)
65
3.97M
        if ((nonspacing_table_data[64*ind + ((uc >> 3) & 63)] >> (uc & 7)) & 1)
66
2.99M
          {
67
2.99M
            if (uc > 0 && uc < 0xa0)
68
181k
              return -1;
69
2.80M
            else
70
2.80M
              return 0;
71
2.99M
          }
72
4.33M
    }
73
0
  else if ((uc >> 9) == (0xe0000 >> 9))
74
0
    {
75
0
      if (uc >= 0xe0100)
76
0
        {
77
0
          if (uc <= 0xe01ef)
78
0
            return 0;
79
0
        }
80
0
      else
81
0
        {
82
0
          if (uc >= 0xe0020 ? uc <= 0xe007f : uc == 0xe0001)
83
0
            return 0;
84
0
        }
85
0
    }
86
  /* Test for double-width character.  */
87
1.34M
  if (bitmap_lookup (&u_width2, uc))
88
368k
    return 2;
89
  /* In ancient CJK encodings, Cyrillic and most other characters are
90
     double-width as well.  */
91
977k
  if (uc >= 0x00A1 && uc < 0xFF61 && uc != 0x20A9
92
12.1k
      && is_cjk_encoding (encoding))
93
12.1k
    return 2;
94
965k
  return 1;
95
977k
}