Coverage Report

Created: 2026-03-31 06:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libunistring/lib/uninorm/decomposition.c
Line
Count
Source
1
/* Decomposition of Unicode characters.
2
   Copyright (C) 2009-2026 Free Software Foundation, Inc.
3
   Written by Bruno Haible <bruno@clisp.org>, 2009.
4
5
   This file is free software.
6
   It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
7
   You can redistribute it and/or modify it under either
8
     - the terms of the GNU Lesser General Public License as published
9
       by the Free Software Foundation, either version 3, or (at your
10
       option) any later version, or
11
     - the terms of the GNU General Public License as published by the
12
       Free Software Foundation; either version 2, or (at your option)
13
       any later version, or
14
     - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
15
16
   This file is distributed in the hope that it will be useful,
17
   but WITHOUT ANY WARRANTY; without even the implied warranty of
18
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19
   Lesser General Public License and the GNU General Public License
20
   for more details.
21
22
   You should have received a copy of the GNU Lesser General Public
23
   License and of the GNU General Public License along with this
24
   program.  If not, see <https://www.gnu.org/licenses/>.  */
25
26
#include <config.h>
27
28
/* Specification.  */
29
#include "uninorm.h"
30
31
#include "uninorm/decomposition-table.h"
32
33
int
34
uc_decomposition (ucs4_t uc, int *decomp_tag, ucs4_t *decomposition)
35
0
{
36
0
  if (uc >= 0xAC00 && uc < 0xD7A4)
37
0
    {
38
      /* Hangul syllable.  See Unicode standard, chapter 3, section
39
         "Hangul Syllable Decomposition",  See also the clarification at
40
         <https://www.unicode.org/versions/Unicode5.1.0/>, section
41
         "Clarification of Hangul Jamo Handling".  */
42
0
      uc -= 0xAC00;
43
0
      unsigned int t = uc % 28;
44
45
0
      *decomp_tag = UC_DECOMP_CANONICAL;
46
0
      if (t == 0)
47
0
        {
48
0
          uc = uc / 28;
49
0
          unsigned int v = uc % 21;
50
0
          unsigned int l = uc / 21;
51
52
0
          decomposition[0] = 0x1100 + l;
53
0
          decomposition[1] = 0x1161 + v;
54
0
          return 2;
55
0
        }
56
0
      else
57
0
        {
58
0
#if 1 /* Return the pairwise decomposition, not the full decomposition.  */
59
0
          decomposition[0] = 0xAC00 + uc - t; /* = 0xAC00 + (l * 21 + v) * 28; */
60
0
          decomposition[1] = 0x11A7 + t;
61
0
          return 2;
62
#else
63
          uc = uc / 28;
64
          unsigned int v = uc % 21;
65
          unsigned int l = uc / 21;
66
67
          decomposition[0] = 0x1100 + l;
68
          decomposition[1] = 0x1161 + v;
69
          decomposition[2] = 0x11A7 + t;
70
          return 3;
71
#endif
72
0
        }
73
0
    }
74
0
  else if (uc < 0x110000)
75
0
    {
76
0
      unsigned short entry = decomp_index (uc);
77
0
      if (entry != (unsigned short)(-1))
78
0
        {
79
0
          const unsigned char *p = &gl_uninorm_decomp_chars_table[3 * (entry & 0x7FFF)];
80
0
          unsigned int element = (p[0] << 16) | (p[1] << 8) | p[2];
81
          /* The first element has 5 bits for the decomposition type.  */
82
0
          *decomp_tag = (element >> 18) & 0x1f;
83
0
          unsigned int length = 1;
84
0
          for (;;)
85
0
            {
86
              /* Every element has an 18 bits wide Unicode code point.  */
87
0
              *decomposition = element & 0x3ffff;
88
              /* Bit 23 tells whether there are more elements,  */
89
0
              if ((element & (1 << 23)) == 0)
90
0
                break;
91
0
              p += 3;
92
0
              element = (p[0] << 16) | (p[1] << 8) | p[2];
93
0
              decomposition++;
94
0
              length++;
95
0
            }
96
0
          return length;
97
0
        }
98
0
    }
99
0
  return -1;
100
0
}