/src/gnutls/lib/unistring/uninorm/decomposition.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* Decomposition of Unicode characters. |
2 | | Copyright (C) 2009-2025 Free Software Foundation, Inc. |
3 | | Written by Bruno Haible <bruno@clisp.org>, 2009. |
4 | | |
5 | | This file is free software. |
6 | | It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+". |
7 | | You can redistribute it and/or modify it under either |
8 | | - the terms of the GNU Lesser General Public License as published |
9 | | by the Free Software Foundation, either version 3, or (at your |
10 | | option) any later version, or |
11 | | - the terms of the GNU General Public License as published by the |
12 | | Free Software Foundation; either version 2, or (at your option) |
13 | | any later version, or |
14 | | - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+". |
15 | | |
16 | | This file is distributed in the hope that it will be useful, |
17 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
18 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
19 | | Lesser General Public License and the GNU General Public License |
20 | | for more details. |
21 | | |
22 | | You should have received a copy of the GNU Lesser General Public |
23 | | License and of the GNU General Public License along with this |
24 | | program. If not, see <https://www.gnu.org/licenses/>. */ |
25 | | |
26 | | #include <config.h> |
27 | | |
28 | | /* Specification. */ |
29 | | #include "uninorm.h" |
30 | | |
31 | | #include "uninorm/decomposition-table.h" |
32 | | |
33 | | int |
34 | | uc_decomposition (ucs4_t uc, int *decomp_tag, ucs4_t *decomposition) |
35 | 0 | { |
36 | 0 | if (uc >= 0xAC00 && uc < 0xD7A4) |
37 | 0 | { |
38 | | /* Hangul syllable. See Unicode standard, chapter 3, section |
39 | | "Hangul Syllable Decomposition", See also the clarification at |
40 | | <https://www.unicode.org/versions/Unicode5.1.0/>, section |
41 | | "Clarification of Hangul Jamo Handling". */ |
42 | 0 | unsigned int t; |
43 | |
|
44 | 0 | uc -= 0xAC00; |
45 | 0 | t = uc % 28; |
46 | |
|
47 | 0 | *decomp_tag = UC_DECOMP_CANONICAL; |
48 | 0 | if (t == 0) |
49 | 0 | { |
50 | 0 | unsigned int v, l; |
51 | |
|
52 | 0 | uc = uc / 28; |
53 | 0 | v = uc % 21; |
54 | 0 | l = uc / 21; |
55 | |
|
56 | 0 | decomposition[0] = 0x1100 + l; |
57 | 0 | decomposition[1] = 0x1161 + v; |
58 | 0 | return 2; |
59 | 0 | } |
60 | 0 | else |
61 | 0 | { |
62 | 0 | #if 1 /* Return the pairwise decomposition, not the full decomposition. */ |
63 | 0 | decomposition[0] = 0xAC00 + uc - t; /* = 0xAC00 + (l * 21 + v) * 28; */ |
64 | 0 | decomposition[1] = 0x11A7 + t; |
65 | 0 | return 2; |
66 | | #else |
67 | | unsigned int v, l; |
68 | | |
69 | | uc = uc / 28; |
70 | | v = uc % 21; |
71 | | l = uc / 21; |
72 | | |
73 | | decomposition[0] = 0x1100 + l; |
74 | | decomposition[1] = 0x1161 + v; |
75 | | decomposition[2] = 0x11A7 + t; |
76 | | return 3; |
77 | | #endif |
78 | 0 | } |
79 | 0 | } |
80 | 0 | else if (uc < 0x110000) |
81 | 0 | { |
82 | 0 | unsigned short entry = decomp_index (uc); |
83 | 0 | if (entry != (unsigned short)(-1)) |
84 | 0 | { |
85 | 0 | const unsigned char *p; |
86 | 0 | unsigned int element; |
87 | 0 | unsigned int length; |
88 | |
|
89 | 0 | p = &gl_uninorm_decomp_chars_table[3 * (entry & 0x7FFF)]; |
90 | 0 | element = (p[0] << 16) | (p[1] << 8) | p[2]; |
91 | | /* The first element has 5 bits for the decomposition type. */ |
92 | 0 | *decomp_tag = (element >> 18) & 0x1f; |
93 | 0 | length = 1; |
94 | 0 | for (;;) |
95 | 0 | { |
96 | | /* Every element has an 18 bits wide Unicode code point. */ |
97 | 0 | *decomposition = element & 0x3ffff; |
98 | | /* Bit 23 tells whether there are more elements, */ |
99 | 0 | if ((element & (1 << 23)) == 0) |
100 | 0 | break; |
101 | 0 | p += 3; |
102 | 0 | element = (p[0] << 16) | (p[1] << 8) | p[2]; |
103 | 0 | decomposition++; |
104 | 0 | length++; |
105 | 0 | } |
106 | 0 | return length; |
107 | 0 | } |
108 | 0 | } |
109 | 0 | return -1; |
110 | 0 | } |