Coverage Report

Created: 2025-03-18 06:55

/src/gnutls/lib/unistring/uninorm/decomposition.c
Line
Count
Source (jump to first uncovered line)
1
/* Decomposition of Unicode characters.
2
   Copyright (C) 2009-2025 Free Software Foundation, Inc.
3
   Written by Bruno Haible <bruno@clisp.org>, 2009.
4
5
   This file is free software.
6
   It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
7
   You can redistribute it and/or modify it under either
8
     - the terms of the GNU Lesser General Public License as published
9
       by the Free Software Foundation, either version 3, or (at your
10
       option) any later version, or
11
     - the terms of the GNU General Public License as published by the
12
       Free Software Foundation; either version 2, or (at your option)
13
       any later version, or
14
     - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
15
16
   This file is distributed in the hope that it will be useful,
17
   but WITHOUT ANY WARRANTY; without even the implied warranty of
18
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19
   Lesser General Public License and the GNU General Public License
20
   for more details.
21
22
   You should have received a copy of the GNU Lesser General Public
23
   License and of the GNU General Public License along with this
24
   program.  If not, see <https://www.gnu.org/licenses/>.  */
25
26
#include <config.h>
27
28
/* Specification.  */
29
#include "uninorm.h"
30
31
#include "uninorm/decomposition-table.h"
32
33
int
34
uc_decomposition (ucs4_t uc, int *decomp_tag, ucs4_t *decomposition)
35
0
{
36
0
  if (uc >= 0xAC00 && uc < 0xD7A4)
37
0
    {
38
      /* Hangul syllable.  See Unicode standard, chapter 3, section
39
         "Hangul Syllable Decomposition",  See also the clarification at
40
         <https://www.unicode.org/versions/Unicode5.1.0/>, section
41
         "Clarification of Hangul Jamo Handling".  */
42
0
      unsigned int t;
43
44
0
      uc -= 0xAC00;
45
0
      t = uc % 28;
46
47
0
      *decomp_tag = UC_DECOMP_CANONICAL;
48
0
      if (t == 0)
49
0
        {
50
0
          unsigned int v, l;
51
52
0
          uc = uc / 28;
53
0
          v = uc % 21;
54
0
          l = uc / 21;
55
56
0
          decomposition[0] = 0x1100 + l;
57
0
          decomposition[1] = 0x1161 + v;
58
0
          return 2;
59
0
        }
60
0
      else
61
0
        {
62
0
#if 1 /* Return the pairwise decomposition, not the full decomposition.  */
63
0
          decomposition[0] = 0xAC00 + uc - t; /* = 0xAC00 + (l * 21 + v) * 28; */
64
0
          decomposition[1] = 0x11A7 + t;
65
0
          return 2;
66
#else
67
          unsigned int v, l;
68
69
          uc = uc / 28;
70
          v = uc % 21;
71
          l = uc / 21;
72
73
          decomposition[0] = 0x1100 + l;
74
          decomposition[1] = 0x1161 + v;
75
          decomposition[2] = 0x11A7 + t;
76
          return 3;
77
#endif
78
0
        }
79
0
    }
80
0
  else if (uc < 0x110000)
81
0
    {
82
0
      unsigned short entry = decomp_index (uc);
83
0
      if (entry != (unsigned short)(-1))
84
0
        {
85
0
          const unsigned char *p;
86
0
          unsigned int element;
87
0
          unsigned int length;
88
89
0
          p = &gl_uninorm_decomp_chars_table[3 * (entry & 0x7FFF)];
90
0
          element = (p[0] << 16) | (p[1] << 8) | p[2];
91
          /* The first element has 5 bits for the decomposition type.  */
92
0
          *decomp_tag = (element >> 18) & 0x1f;
93
0
          length = 1;
94
0
          for (;;)
95
0
            {
96
              /* Every element has an 18 bits wide Unicode code point.  */
97
0
              *decomposition = element & 0x3ffff;
98
              /* Bit 23 tells whether there are more elements,  */
99
0
              if ((element & (1 << 23)) == 0)
100
0
                break;
101
0
              p += 3;
102
0
              element = (p[0] << 16) | (p[1] << 8) | p[2];
103
0
              decomposition++;
104
0
              length++;
105
0
            }
106
0
          return length;
107
0
        }
108
0
    }
109
0
  return -1;
110
0
}