Coverage Report

Created: 2025-10-13 06:15

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libidn2/unistring/uninorm/canonical-decomposition.c
Line
Count
Source
1
/* Canonical decomposition of Unicode characters.
2
   Copyright (C) 2009-2025 Free Software Foundation, Inc.
3
   Written by Bruno Haible <bruno@clisp.org>, 2009.
4
5
   This file is free software: you can redistribute it and/or modify
6
   it under the terms of the GNU Lesser General Public License as
7
   published by the Free Software Foundation; either version 2.1 of the
8
   License, or (at your option) any later version.
9
10
   This file is distributed in the hope that it will be useful,
11
   but WITHOUT ANY WARRANTY; without even the implied warranty of
12
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
   GNU Lesser General Public License for more details.
14
15
   You should have received a copy of the GNU Lesser General Public License
16
   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
17
18
#include <config.h>
19
20
/* Specification.  */
21
#include "uninorm.h"
22
23
#include <stdlib.h>
24
25
#include "uninorm/decomposition-table.h"
26
27
int
28
uc_canonical_decomposition (ucs4_t uc, ucs4_t *decomposition)
29
797k
{
30
797k
  if (uc >= 0xAC00 && uc < 0xD7A4)
31
6.43k
    {
32
      /* Hangul syllable.  See Unicode standard, chapter 3, section
33
         "Hangul Syllable Decomposition",  See also the clarification at
34
         <https://www.unicode.org/versions/Unicode5.1.0/>, section
35
         "Clarification of Hangul Jamo Handling".  */
36
6.43k
      unsigned int t;
37
38
6.43k
      uc -= 0xAC00;
39
6.43k
      t = uc % 28;
40
41
6.43k
      if (t == 0)
42
3.68k
        {
43
3.68k
          unsigned int v, l;
44
45
3.68k
          uc = uc / 28;
46
3.68k
          v = uc % 21;
47
3.68k
          l = uc / 21;
48
49
3.68k
          decomposition[0] = 0x1100 + l;
50
3.68k
          decomposition[1] = 0x1161 + v;
51
3.68k
          return 2;
52
3.68k
        }
53
2.75k
      else
54
2.75k
        {
55
2.75k
#if 1 /* Return the pairwise decomposition, not the full decomposition.  */
56
2.75k
          decomposition[0] = 0xAC00 + uc - t; /* = 0xAC00 + (l * 21 + v) * 28; */
57
2.75k
          decomposition[1] = 0x11A7 + t;
58
2.75k
          return 2;
59
#else
60
          unsigned int v, l;
61
62
          uc = uc / 28;
63
          v = uc % 21;
64
          l = uc / 21;
65
66
          decomposition[0] = 0x1100 + l;
67
          decomposition[1] = 0x1161 + v;
68
          decomposition[2] = 0x11A7 + t;
69
          return 3;
70
#endif
71
2.75k
        }
72
6.43k
    }
73
790k
  else if (uc < 0x110000)
74
790k
    {
75
790k
      unsigned short entry = decomp_index (uc);
76
      /* An entry of (unsigned short)(-1) denotes an absent entry.
77
         Otherwise, bit 15 of the entry tells whether the decomposition
78
         is a canonical one.  */
79
790k
      if (entry < 0x8000)
80
28.4k
        {
81
28.4k
          const unsigned char *p;
82
28.4k
          unsigned int element;
83
28.4k
          unsigned int length;
84
85
28.4k
          p = &gl_uninorm_decomp_chars_table[3 * entry];
86
28.4k
          element = (p[0] << 16) | (p[1] << 8) | p[2];
87
          /* The first element has 5 bits for the decomposition type.  */
88
28.4k
          if (((element >> 18) & 0x1f) != UC_DECOMP_CANONICAL)
89
0
            abort ();
90
28.4k
          length = 1;
91
28.4k
          for (;;)
92
56.2k
            {
93
              /* Every element has an 18 bits wide Unicode code point.  */
94
56.2k
              *decomposition = element & 0x3ffff;
95
              /* Bit 23 tells whether there are more elements,  */
96
56.2k
              if ((element & (1 << 23)) == 0)
97
28.4k
                break;
98
27.7k
              p += 3;
99
27.7k
              element = (p[0] << 16) | (p[1] << 8) | p[2];
100
27.7k
              decomposition++;
101
27.7k
              length++;
102
27.7k
            }
103
28.4k
          return length;
104
28.4k
        }
105
790k
    }
106
762k
  return -1;
107
797k
}