Coverage Report

Created: 2025-07-07 10:01

/work/workdir/UnpackedTarball/harfbuzz/src/hb-icu.cc
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright © 2009  Red Hat, Inc.
3
 * Copyright © 2009  Keith Stribley
4
 * Copyright © 2011  Google, Inc.
5
 *
6
 *  This is part of HarfBuzz, a text shaping library.
7
 *
8
 * Permission is hereby granted, without written agreement and without
9
 * license or royalty fees, to use, copy, modify, and distribute this
10
 * software and its documentation for any purpose, provided that the
11
 * above copyright notice and the following two paragraphs appear in
12
 * all copies of this software.
13
 *
14
 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
15
 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
16
 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
17
 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
18
 * DAMAGE.
19
 *
20
 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
21
 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
22
 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
23
 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
24
 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
25
 *
26
 * Red Hat Author(s): Behdad Esfahbod
27
 * Google Author(s): Behdad Esfahbod
28
 */
29
30
#include "hb.hh"
31
32
#ifdef HAVE_ICU
33
34
#pragma GCC diagnostic push
35
36
// https://github.com/harfbuzz/harfbuzz/issues/4915
37
#pragma GCC diagnostic ignored "-Wredundant-decls"
38
39
#include "hb-icu.h"
40
41
#include "hb-machinery.hh"
42
43
#include <unicode/uchar.h>
44
#include <unicode/unorm2.h>
45
#include <unicode/ustring.h>
46
#include <unicode/utf16.h>
47
#include <unicode/uversion.h>
48
49
/* ICU extra semicolon, fixed since 65, https://github.com/unicode-org/icu/commit/480bec3 */
50
#if U_ICU_VERSION_MAJOR_NUM < 65 && (defined(__GNUC__) || defined(__clang__))
51
#define HB_ICU_EXTRA_SEMI_IGNORED
52
#pragma GCC diagnostic ignored "-Wextra-semi-stmt"
53
#endif
54
55
/**
56
 * SECTION:hb-icu
57
 * @title: hb-icu
58
 * @short_description: ICU integration
59
 * @include: hb-icu.h
60
 *
61
 * Functions for using HarfBuzz with the International Components for Unicode
62
 * (ICU) library. HarfBuzz supports using ICU to provide Unicode data, by attaching
63
 * ICU functions to the virtual methods in a #hb_unicode_funcs_t function
64
 * structure.
65
 **/
66
67
/**
68
 * hb_icu_script_to_script:
69
 * @script: The UScriptCode identifier to query
70
 *
71
 * Fetches the #hb_script_t script that corresponds to the
72
 * specified UScriptCode identifier.
73
 *
74
 * Return value: the #hb_script_t script found
75
 *
76
 **/
77
78
hb_script_t
79
hb_icu_script_to_script (UScriptCode script)
80
32.5M
{
81
32.5M
  if (unlikely (script == USCRIPT_INVALID_CODE))
82
0
    return HB_SCRIPT_INVALID;
83
84
32.5M
  return hb_script_from_string (uscript_getShortName (script), -1);
85
32.5M
}
86
87
/**
88
 * hb_icu_script_from_script:
89
 * @script: The #hb_script_t script to query
90
 *
91
 * Fetches the UScriptCode identifier that corresponds to the
92
 * specified #hb_script_t script.
93
 *
94
 * Return value: the UScriptCode identifier found
95
 *
96
 **/
97
UScriptCode
98
hb_icu_script_from_script (hb_script_t script)
99
0
{
100
0
  UScriptCode out = USCRIPT_INVALID_CODE;
101
102
0
  if (unlikely (script == HB_SCRIPT_INVALID))
103
0
    return out;
104
105
0
  UErrorCode icu_err = U_ZERO_ERROR;
106
0
  const unsigned char buf[5] = {HB_UNTAG (script), 0};
107
0
  uscript_getCode ((const char *) buf, &out, 1, &icu_err);
108
109
0
  return out;
110
0
}
111
112
113
static hb_unicode_combining_class_t
114
hb_icu_unicode_combining_class (hb_unicode_funcs_t *ufuncs HB_UNUSED,
115
        hb_codepoint_t      unicode,
116
        void               *user_data HB_UNUSED)
117
118
0
{
119
0
  return (hb_unicode_combining_class_t) u_getCombiningClass (unicode);
120
0
}
121
122
static hb_unicode_general_category_t
123
hb_icu_unicode_general_category (hb_unicode_funcs_t *ufuncs HB_UNUSED,
124
         hb_codepoint_t      unicode,
125
         void               *user_data HB_UNUSED)
126
0
{
127
0
  switch (u_getIntPropertyValue(unicode, UCHAR_GENERAL_CATEGORY))
128
0
  {
129
0
  case U_UNASSIGNED:      return HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED;
130
131
0
  case U_UPPERCASE_LETTER:    return HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER;
132
0
  case U_LOWERCASE_LETTER:    return HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER;
133
0
  case U_TITLECASE_LETTER:    return HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER;
134
0
  case U_MODIFIER_LETTER:   return HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER;
135
0
  case U_OTHER_LETTER:      return HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER;
136
137
0
  case U_NON_SPACING_MARK:    return HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK;
138
0
  case U_ENCLOSING_MARK:    return HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK;
139
0
  case U_COMBINING_SPACING_MARK:  return HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK;
140
141
0
  case U_DECIMAL_DIGIT_NUMBER:    return HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER;
142
0
  case U_LETTER_NUMBER:     return HB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER;
143
0
  case U_OTHER_NUMBER:      return HB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER;
144
145
0
  case U_SPACE_SEPARATOR:   return HB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR;
146
0
  case U_LINE_SEPARATOR:    return HB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR;
147
0
  case U_PARAGRAPH_SEPARATOR:   return HB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR;
148
149
0
  case U_CONTROL_CHAR:      return HB_UNICODE_GENERAL_CATEGORY_CONTROL;
150
0
  case U_FORMAT_CHAR:     return HB_UNICODE_GENERAL_CATEGORY_FORMAT;
151
0
  case U_PRIVATE_USE_CHAR:    return HB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE;
152
0
  case U_SURROGATE:     return HB_UNICODE_GENERAL_CATEGORY_SURROGATE;
153
154
155
0
  case U_DASH_PUNCTUATION:    return HB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION;
156
0
  case U_START_PUNCTUATION:   return HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION;
157
0
  case U_END_PUNCTUATION:   return HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION;
158
0
  case U_CONNECTOR_PUNCTUATION:   return HB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION;
159
0
  case U_OTHER_PUNCTUATION:   return HB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION;
160
161
0
  case U_MATH_SYMBOL:     return HB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL;
162
0
  case U_CURRENCY_SYMBOL:   return HB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL;
163
0
  case U_MODIFIER_SYMBOL:   return HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL;
164
0
  case U_OTHER_SYMBOL:      return HB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL;
165
166
0
  case U_INITIAL_PUNCTUATION:   return HB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION;
167
0
  case U_FINAL_PUNCTUATION:   return HB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION;
168
0
  }
169
170
0
  return HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED;
171
0
}
172
173
static hb_codepoint_t
174
hb_icu_unicode_mirroring (hb_unicode_funcs_t *ufuncs HB_UNUSED,
175
        hb_codepoint_t      unicode,
176
        void               *user_data HB_UNUSED)
177
0
{
178
0
  return u_charMirror(unicode);
179
0
}
180
181
static hb_script_t
182
hb_icu_unicode_script (hb_unicode_funcs_t *ufuncs HB_UNUSED,
183
           hb_codepoint_t      unicode,
184
           void               *user_data HB_UNUSED)
185
0
{
186
0
  UErrorCode status = U_ZERO_ERROR;
187
0
  UScriptCode scriptCode = uscript_getScript(unicode, &status);
188
189
0
  if (unlikely (U_FAILURE (status)))
190
0
    return HB_SCRIPT_UNKNOWN;
191
192
0
  return hb_icu_script_to_script (scriptCode);
193
0
}
194
195
static hb_bool_t
196
hb_icu_unicode_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
197
      hb_codepoint_t      a,
198
      hb_codepoint_t      b,
199
      hb_codepoint_t     *ab,
200
      void               *user_data)
201
0
{
202
0
  const UNormalizer2 *normalizer = (const UNormalizer2 *) user_data;
203
0
  UChar32 ret = unorm2_composePair (normalizer, a, b);
204
0
  if (ret < 0) return false;
205
0
  *ab = ret;
206
0
  return true;
207
0
}
208
209
static hb_bool_t
210
hb_icu_unicode_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
211
        hb_codepoint_t      ab,
212
        hb_codepoint_t     *a,
213
        hb_codepoint_t     *b,
214
        void               *user_data)
215
0
{
216
0
  const UNormalizer2 *normalizer = (const UNormalizer2 *) user_data;
217
0
  UChar decomposed[4];
218
0
  int len;
219
0
  UErrorCode icu_err = U_ZERO_ERROR;
220
0
  len = unorm2_getRawDecomposition (normalizer, ab, decomposed,
221
0
            ARRAY_LENGTH (decomposed), &icu_err);
222
0
  if (U_FAILURE (icu_err) || len < 0) return false;
223
224
0
  len = u_countChar32 (decomposed, len);
225
0
  if (len == 1)
226
0
  {
227
0
    U16_GET_UNSAFE (decomposed, 0, *a);
228
0
    *b = 0;
229
0
    return *a != ab;
230
0
  }
231
0
  else if (len == 2)
232
0
  {
233
0
    len = 0;
234
0
    U16_NEXT_UNSAFE (decomposed, len, *a);
235
0
    U16_NEXT_UNSAFE (decomposed, len, *b);
236
0
  }
237
0
  return true;
238
0
}
239
240
241
static inline void free_static_icu_funcs ();
242
243
static struct hb_icu_unicode_funcs_lazy_loader_t : hb_unicode_funcs_lazy_loader_t<hb_icu_unicode_funcs_lazy_loader_t>
244
{
245
  static hb_unicode_funcs_t *create ()
246
0
  {
247
0
    void *user_data = nullptr;
248
0
    UErrorCode icu_err = U_ZERO_ERROR;
249
0
    user_data = (void *) unorm2_getNFCInstance (&icu_err);
250
0
    assert (user_data);
251
252
0
    hb_unicode_funcs_t *funcs = hb_unicode_funcs_create (nullptr);
253
254
0
    hb_unicode_funcs_set_combining_class_func (funcs, hb_icu_unicode_combining_class, nullptr, nullptr);
255
0
    hb_unicode_funcs_set_general_category_func (funcs, hb_icu_unicode_general_category, nullptr, nullptr);
256
0
    hb_unicode_funcs_set_mirroring_func (funcs, hb_icu_unicode_mirroring, nullptr, nullptr);
257
0
    hb_unicode_funcs_set_script_func (funcs, hb_icu_unicode_script, nullptr, nullptr);
258
0
    hb_unicode_funcs_set_compose_func (funcs, hb_icu_unicode_compose, user_data, nullptr);
259
0
    hb_unicode_funcs_set_decompose_func (funcs, hb_icu_unicode_decompose, user_data, nullptr);
260
261
0
    hb_unicode_funcs_make_immutable (funcs);
262
263
0
    hb_atexit (free_static_icu_funcs);
264
265
0
    return funcs;
266
0
  }
267
} static_icu_funcs;
268
269
static inline
270
void free_static_icu_funcs ()
271
0
{
272
0
  static_icu_funcs.free_instance ();
273
0
}
274
275
/**
276
 * hb_icu_get_unicode_funcs:
277
 *
278
 * Fetches a Unicode-functions structure that is populated
279
 * with the appropriate ICU function for each method.
280
 *
281
 * Return value: (transfer none): a pointer to the #hb_unicode_funcs_t Unicode-functions structure
282
 *
283
 * Since: 0.9.38
284
 **/
285
hb_unicode_funcs_t *
286
hb_icu_get_unicode_funcs ()
287
0
{
288
0
  return static_icu_funcs.get_unconst ();
289
0
}
290
291
#pragma GCC diagnostic pop
292
293
#endif