/src/gettext-0.26/gettext-tools/libgettextpo/unilbrk/lbrktables.h
Line | Count | Source |
1 | | /* Line breaking auxiliary tables. |
2 | | Copyright (C) 2001-2003, 2006-2025 Free Software Foundation, Inc. |
3 | | Written by Bruno Haible <bruno@clisp.org>, 2001. |
4 | | |
5 | | This file is free software. |
6 | | It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+". |
7 | | You can redistribute it and/or modify it under either |
8 | | - the terms of the GNU Lesser General Public License as published |
9 | | by the Free Software Foundation, either version 3, or (at your |
10 | | option) any later version, or |
11 | | - the terms of the GNU General Public License as published by the |
12 | | Free Software Foundation; either version 2, or (at your option) |
13 | | any later version, or |
14 | | - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+". |
15 | | |
16 | | This file is distributed in the hope that it will be useful, |
17 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
18 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
19 | | Lesser General Public License and the GNU General Public License |
20 | | for more details. |
21 | | |
22 | | You should have received a copy of the GNU Lesser General Public |
23 | | License and of the GNU General Public License along with this |
24 | | program. If not, see <https://www.gnu.org/licenses/>. */ |
25 | | |
26 | | #include "unitypes.h" |
27 | | |
28 | | #ifdef __cplusplus |
29 | | extern "C" { |
30 | | #endif |
31 | | |
32 | | |
33 | | /* Line breaking classification. */ |
34 | | |
35 | | enum |
36 | | { |
37 | | /* Values >= 41 are resolved at run time. */ |
38 | | LBP_BK = 41, /* mandatory break */ |
39 | | LBP_CR = 42, /* carriage return */ |
40 | | LBP_LF = 43, /* line feed */ |
41 | | LBP_CM = 44, /* attached characters and combining marks */ |
42 | | /*LBP_NL, next line - not used here because it's equivalent to LBP_BK */ |
43 | | /*LBP_SG, surrogates - not used here because they are not characters */ |
44 | | LBP_WJ = 0, /* word joiner */ |
45 | | LBP_ZW = 45, /* zero width space */ |
46 | | LBP_GL = 1, /* non-breaking (glue) */ |
47 | | LBP_SP = 46, /* space */ |
48 | | LBP_B2 = 2, /* break opportunity before and after */ |
49 | | LBP_BA = 3, /* break opportunity after */ |
50 | | LBP_BB = 4, /* break opportunity before */ |
51 | | LBP_HY = 5, /* hyphen */ |
52 | | LBP_CB = 47, /* contingent break opportunity */ |
53 | | LBP_CL = 6, /* closing punctuation */ |
54 | | LBP_CP1 = 7, /* closing parenthesis, non-EastAsian character */ |
55 | | LBP_CP2 = 8, /* closing parenthesis, EastAsian character */ |
56 | | LBP_EX = 9, /* exclamation/interrogation */ |
57 | | LBP_IN = 10, /* inseparable */ |
58 | | LBP_NS = 11, /* non starter */ |
59 | | LBP_OP1 = 12, /* opening punctuation, non-EastAsian character */ |
60 | | LBP_OP2 = 13, /* opening punctuation, EastAsian character */ |
61 | | LBP_QU1 = 14, /* ambiguous quotation, neither initial nor final punctuation */ |
62 | | LBP_QU2 = 15, /* ambiguous quotation, initial punctuation */ |
63 | | LBP_QU3 = 16, /* ambiguous quotation, final punctuation */ |
64 | | LBP_IS = 17, /* infix separator (numeric) */ |
65 | | LBP_NU = 18, /* numeric */ |
66 | | LBP_PO = 19, /* postfix (numeric) */ |
67 | | LBP_PR = 20, /* prefix (numeric) */ |
68 | | LBP_SY = 21, /* symbols allowing breaks */ |
69 | | LBP_AI = 48, /* ambiguous (alphabetic or ideograph) */ |
70 | | LBP_AL1 = 22, /* ordinary alphabetic and symbol characters, != U+25CC */ |
71 | | LBP_AL2 = 23, /* ordinary alphabetic and symbol characters, == U+25CC */ |
72 | | /*LBP_CJ, conditional Japanese starter, resolved to NS */ |
73 | | LBP_H2 = 24, /* Hangul LV syllable */ |
74 | | LBP_H3 = 25, /* Hangul LVT syllable */ |
75 | | LBP_HL = 31, /* Hebrew letter */ |
76 | | LBP_ID1 = 26, /* ideographic */ |
77 | | LBP_ID2 = 27, /* ideographic and potential future emoji */ |
78 | | LBP_JL = 28, /* Hangul L Jamo */ |
79 | | LBP_JV = 29, /* Hangul V Jamo */ |
80 | | LBP_JT = 30, /* Hangul T Jamo */ |
81 | | LBP_AP = 32, /* Brahmic scripts: pre-base repha */ |
82 | | LBP_AK = 33, /* Brahmic scripts: consonants */ |
83 | | LBP_AS = 34, /* Brahmic scripts: independent vowels */ |
84 | | LBP_VI = 35, /* Brahmic scripts: conjoining viramas */ |
85 | | LBP_VF = 36, /* Brahmic scripts: viramas for final consonants */ |
86 | | LBP_RI = 37, /* regional indicator */ |
87 | | LBP_SA = 49, /* complex context (South East Asian) */ |
88 | | LBP_ZWJ = 38, /* zero width joiner */ |
89 | | LBP_EB = 39, /* emoji base */ |
90 | | LBP_EM = 40, /* emoji modifier */ |
91 | | LBP_XX = 50, /* unknown */ |
92 | | /* Artificial values that exist only at runtime, not in the tables. */ |
93 | | LBP_AKLS_VI = 100, |
94 | | LBP_HL_BA = 101 |
95 | | }; |
96 | | |
97 | | #include "lbrkprop1.h" |
98 | | |
99 | | /* Combining prop and ea to a table entry. */ |
100 | 0 | #define PROP_EA(prop,ea) (((prop) << 1) | (ea)) |
101 | | |
102 | | /* Splitting a table entry into prop and ea. */ |
103 | 0 | #define PROP(entry) ((entry) >> 1) |
104 | 0 | #define EA(entry) ((entry) & 1) |
105 | | |
106 | | /* Returns (prop << 1) | ea, where |
107 | | - prop is the line breaking property, |
108 | | - ea is the EastAsian property (1 bit) |
109 | | of UC. */ |
110 | | static inline unsigned char |
111 | | unilbrkprop_lookup (ucs4_t uc) |
112 | 0 | { |
113 | 0 | unsigned int index1 = uc >> lbrkprop_header_0; |
114 | 0 | if (index1 < lbrkprop_header_1) |
115 | 0 | { |
116 | 0 | int lookup1 = unilbrkprop.level1[index1]; |
117 | 0 | if (lookup1 >= 0) |
118 | 0 | { |
119 | 0 | unsigned int index2 = (uc >> lbrkprop_header_2) & lbrkprop_header_3; |
120 | 0 | int lookup2 = unilbrkprop.level2[lookup1 + index2]; |
121 | 0 | if (lookup2 >= 0) |
122 | 0 | { |
123 | 0 | unsigned int index3 = uc & lbrkprop_header_4; |
124 | 0 | return unilbrkprop.level3[lookup2 + index3]; |
125 | 0 | } |
126 | 0 | } |
127 | 0 | } |
128 | 0 | return PROP_EA (LBP_XX, 0); |
129 | 0 | } Unexecuted instantiation: ulc-width-linebreaks.c:unilbrkprop_lookup Unexecuted instantiation: u8-width-linebreaks.c:unilbrkprop_lookup Unexecuted instantiation: u8-possible-linebreaks.c:unilbrkprop_lookup Unexecuted instantiation: lbrktables.c:unilbrkprop_lookup |
130 | | |
131 | | |
132 | | /* Table indexed by two line breaking classifications. */ |
133 | 0 | #define D 1 /* direct break opportunity, empty in table 7.3 of UTR #14 */ |
134 | 0 | #define I 2 /* indirect break opportunity, '%' in table 7.3 of UTR #14 */ |
135 | 0 | #define P 3 /* prohibited break, '^' in table 7.3 of UTR #14 */ |
136 | | |
137 | | extern const unsigned char unilbrk_table[41][41]; |
138 | | |
139 | | /* We don't support line breaking of complex-context dependent characters |
140 | | (Thai, Lao, Myanmar, Khmer) yet, because it requires dictionary lookup. */ |
141 | | |
142 | | |
143 | | #ifdef __cplusplus |
144 | | } |
145 | | #endif |