Coverage Report

Created: 2026-01-06 07:18

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libunistring/lib/uninorm/decomposition.c
Line
Count
Source
1
/* Decomposition of Unicode characters.
2
   Copyright (C) 2009-2026 Free Software Foundation, Inc.
3
   Written by Bruno Haible <bruno@clisp.org>, 2009.
4
5
   This file is free software.
6
   It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
7
   You can redistribute it and/or modify it under either
8
     - the terms of the GNU Lesser General Public License as published
9
       by the Free Software Foundation, either version 3, or (at your
10
       option) any later version, or
11
     - the terms of the GNU General Public License as published by the
12
       Free Software Foundation; either version 2, or (at your option)
13
       any later version, or
14
     - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
15
16
   This file is distributed in the hope that it will be useful,
17
   but WITHOUT ANY WARRANTY; without even the implied warranty of
18
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19
   Lesser General Public License and the GNU General Public License
20
   for more details.
21
22
   You should have received a copy of the GNU Lesser General Public
23
   License and of the GNU General Public License along with this
24
   program.  If not, see <https://www.gnu.org/licenses/>.  */
25
26
#include <config.h>
27
28
/* Specification.  */
29
#include "uninorm.h"
30
31
#include "uninorm/decomposition-table.h"
32
33
int
34
uc_decomposition (ucs4_t uc, int *decomp_tag, ucs4_t *decomposition)
35
28.6M
{
36
28.6M
  if (uc >= 0xAC00 && uc < 0xD7A4)
37
4.47k
    {
38
      /* Hangul syllable.  See Unicode standard, chapter 3, section
39
         "Hangul Syllable Decomposition",  See also the clarification at
40
         <https://www.unicode.org/versions/Unicode5.1.0/>, section
41
         "Clarification of Hangul Jamo Handling".  */
42
4.47k
      uc -= 0xAC00;
43
4.47k
      unsigned int t = uc % 28;
44
45
4.47k
      *decomp_tag = UC_DECOMP_CANONICAL;
46
4.47k
      if (t == 0)
47
2.25k
        {
48
2.25k
          uc = uc / 28;
49
2.25k
          unsigned int v = uc % 21;
50
2.25k
          unsigned int l = uc / 21;
51
52
2.25k
          decomposition[0] = 0x1100 + l;
53
2.25k
          decomposition[1] = 0x1161 + v;
54
2.25k
          return 2;
55
2.25k
        }
56
2.21k
      else
57
2.21k
        {
58
2.21k
#if 1 /* Return the pairwise decomposition, not the full decomposition.  */
59
2.21k
          decomposition[0] = 0xAC00 + uc - t; /* = 0xAC00 + (l * 21 + v) * 28; */
60
2.21k
          decomposition[1] = 0x11A7 + t;
61
2.21k
          return 2;
62
#else
63
          uc = uc / 28;
64
          unsigned int v = uc % 21;
65
          unsigned int l = uc / 21;
66
67
          decomposition[0] = 0x1100 + l;
68
          decomposition[1] = 0x1161 + v;
69
          decomposition[2] = 0x11A7 + t;
70
          return 3;
71
#endif
72
2.21k
        }
73
4.47k
    }
74
28.6M
  else if (uc < 0x110000)
75
28.6M
    {
76
28.6M
      unsigned short entry = decomp_index (uc);
77
28.6M
      if (entry != (unsigned short)(-1))
78
3.30M
        {
79
3.30M
          const unsigned char *p = &gl_uninorm_decomp_chars_table[3 * (entry & 0x7FFF)];
80
3.30M
          unsigned int element = (p[0] << 16) | (p[1] << 8) | p[2];
81
          /* The first element has 5 bits for the decomposition type.  */
82
3.30M
          *decomp_tag = (element >> 18) & 0x1f;
83
3.30M
          unsigned int length = 1;
84
3.30M
          for (;;)
85
9.58M
            {
86
              /* Every element has an 18 bits wide Unicode code point.  */
87
9.58M
              *decomposition = element & 0x3ffff;
88
              /* Bit 23 tells whether there are more elements,  */
89
9.58M
              if ((element & (1 << 23)) == 0)
90
3.30M
                break;
91
6.28M
              p += 3;
92
6.28M
              element = (p[0] << 16) | (p[1] << 8) | p[2];
93
6.28M
              decomposition++;
94
6.28M
              length++;
95
6.28M
            }
96
3.30M
          return length;
97
3.30M
        }
98
28.6M
    }
99
25.3M
  return -1;
100
28.6M
}