Coverage Report

Created: 2025-07-11 06:23

/src/libunistring/lib/uninorm/decomposition.c
Line
Count
Source
1
/* Decomposition of Unicode characters.
2
   Copyright (C) 2009-2025 Free Software Foundation, Inc.
3
   Written by Bruno Haible <bruno@clisp.org>, 2009.
4
5
   This file is free software.
6
   It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
7
   You can redistribute it and/or modify it under either
8
     - the terms of the GNU Lesser General Public License as published
9
       by the Free Software Foundation, either version 3, or (at your
10
       option) any later version, or
11
     - the terms of the GNU General Public License as published by the
12
       Free Software Foundation; either version 2, or (at your option)
13
       any later version, or
14
     - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
15
16
   This file is distributed in the hope that it will be useful,
17
   but WITHOUT ANY WARRANTY; without even the implied warranty of
18
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19
   Lesser General Public License and the GNU General Public License
20
   for more details.
21
22
   You should have received a copy of the GNU Lesser General Public
23
   License and of the GNU General Public License along with this
24
   program.  If not, see <https://www.gnu.org/licenses/>.  */
25
26
#include <config.h>
27
28
/* Specification.  */
29
#include "uninorm.h"
30
31
#include "uninorm/decomposition-table.h"
32
33
int
34
uc_decomposition (ucs4_t uc, int *decomp_tag, ucs4_t *decomposition)
35
23.6M
{
36
23.6M
  if (uc >= 0xAC00 && uc < 0xD7A4)
37
4.26k
    {
38
      /* Hangul syllable.  See Unicode standard, chapter 3, section
39
         "Hangul Syllable Decomposition",  See also the clarification at
40
         <https://www.unicode.org/versions/Unicode5.1.0/>, section
41
         "Clarification of Hangul Jamo Handling".  */
42
4.26k
      unsigned int t;
43
44
4.26k
      uc -= 0xAC00;
45
4.26k
      t = uc % 28;
46
47
4.26k
      *decomp_tag = UC_DECOMP_CANONICAL;
48
4.26k
      if (t == 0)
49
2.13k
        {
50
2.13k
          unsigned int v, l;
51
52
2.13k
          uc = uc / 28;
53
2.13k
          v = uc % 21;
54
2.13k
          l = uc / 21;
55
56
2.13k
          decomposition[0] = 0x1100 + l;
57
2.13k
          decomposition[1] = 0x1161 + v;
58
2.13k
          return 2;
59
2.13k
        }
60
2.13k
      else
61
2.13k
        {
62
2.13k
#if 1 /* Return the pairwise decomposition, not the full decomposition.  */
63
2.13k
          decomposition[0] = 0xAC00 + uc - t; /* = 0xAC00 + (l * 21 + v) * 28; */
64
2.13k
          decomposition[1] = 0x11A7 + t;
65
2.13k
          return 2;
66
#else
67
          unsigned int v, l;
68
69
          uc = uc / 28;
70
          v = uc % 21;
71
          l = uc / 21;
72
73
          decomposition[0] = 0x1100 + l;
74
          decomposition[1] = 0x1161 + v;
75
          decomposition[2] = 0x11A7 + t;
76
          return 3;
77
#endif
78
2.13k
        }
79
4.26k
    }
80
23.6M
  else if (uc < 0x110000)
81
23.6M
    {
82
23.6M
      unsigned short entry = decomp_index (uc);
83
23.6M
      if (entry != (unsigned short)(-1))
84
2.86M
        {
85
2.86M
          const unsigned char *p;
86
2.86M
          unsigned int element;
87
2.86M
          unsigned int length;
88
89
2.86M
          p = &gl_uninorm_decomp_chars_table[3 * (entry & 0x7FFF)];
90
2.86M
          element = (p[0] << 16) | (p[1] << 8) | p[2];
91
          /* The first element has 5 bits for the decomposition type.  */
92
2.86M
          *decomp_tag = (element >> 18) & 0x1f;
93
2.86M
          length = 1;
94
2.86M
          for (;;)
95
8.09M
            {
96
              /* Every element has an 18 bits wide Unicode code point.  */
97
8.09M
              *decomposition = element & 0x3ffff;
98
              /* Bit 23 tells whether there are more elements,  */
99
8.09M
              if ((element & (1 << 23)) == 0)
100
2.86M
                break;
101
5.22M
              p += 3;
102
5.22M
              element = (p[0] << 16) | (p[1] << 8) | p[2];
103
5.22M
              decomposition++;
104
5.22M
              length++;
105
5.22M
            }
106
2.86M
          return length;
107
2.86M
        }
108
23.6M
    }
109
20.7M
  return -1;
110
23.6M
}