/src/mupdf/thirdparty/harfbuzz/src/hb-buffer-verify.cc
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright © 2022 Behdad Esfahbod |
3 | | * |
4 | | * This is part of HarfBuzz, a text shaping library. |
5 | | * |
6 | | * Permission is hereby granted, without written agreement and without |
7 | | * license or royalty fees, to use, copy, modify, and distribute this |
8 | | * software and its documentation for any purpose, provided that the |
9 | | * above copyright notice and the following two paragraphs appear in |
10 | | * all copies of this software. |
11 | | * |
12 | | * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
13 | | * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
14 | | * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
15 | | * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
16 | | * DAMAGE. |
17 | | * |
18 | | * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
19 | | * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
20 | | * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
21 | | * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
22 | | * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
23 | | * |
24 | | * Google Author(s): Behdad Esfahbod |
25 | | */ |
26 | | |
27 | | #include "hb.hh" |
28 | | |
29 | | #ifndef HB_NO_BUFFER_VERIFY |
30 | | |
31 | | #include "hb-buffer.hh" |
32 | | |
33 | | |
34 | 0 | #define BUFFER_VERIFY_ERROR "buffer verify error: " |
35 | | static inline void |
36 | | buffer_verify_error (hb_buffer_t *buffer, |
37 | | hb_font_t *font, |
38 | | const char *fmt, |
39 | | ...) HB_PRINTF_FUNC(3, 4); |
40 | | |
41 | | static inline void |
42 | | buffer_verify_error (hb_buffer_t *buffer, |
43 | | hb_font_t *font, |
44 | | const char *fmt, |
45 | | ...) |
46 | 0 | { |
47 | 0 | va_list ap; |
48 | 0 | va_start (ap, fmt); |
49 | 0 | if (buffer->messaging ()) |
50 | 0 | { |
51 | 0 | buffer->message_impl (font, fmt, ap); |
52 | 0 | } |
53 | 0 | else |
54 | 0 | { |
55 | 0 | fprintf (stderr, "harfbuzz "); |
56 | 0 | vfprintf (stderr, fmt, ap); |
57 | 0 | fprintf (stderr, "\n"); |
58 | 0 | } |
59 | 0 | va_end (ap); |
60 | 0 | } |
61 | | |
62 | | static bool |
63 | | buffer_verify_monotone (hb_buffer_t *buffer, |
64 | | hb_font_t *font) |
65 | 0 | { |
66 | | /* Check that clusters are monotone. */ |
67 | 0 | if (buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES || |
68 | 0 | buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS) |
69 | 0 | { |
70 | 0 | bool is_forward = HB_DIRECTION_IS_FORWARD (hb_buffer_get_direction (buffer)); |
71 | |
|
72 | 0 | unsigned int num_glyphs; |
73 | 0 | hb_glyph_info_t *info = hb_buffer_get_glyph_infos (buffer, &num_glyphs); |
74 | |
|
75 | 0 | for (unsigned int i = 1; i < num_glyphs; i++) |
76 | 0 | if (info[i-1].cluster != info[i].cluster && |
77 | 0 | (info[i-1].cluster < info[i].cluster) != is_forward) |
78 | 0 | { |
79 | 0 | buffer_verify_error (buffer, font, BUFFER_VERIFY_ERROR "clusters are not monotone."); |
80 | 0 | return false; |
81 | 0 | } |
82 | 0 | } |
83 | | |
84 | 0 | return true; |
85 | 0 | } |
86 | | |
87 | | static bool |
88 | | buffer_verify_unsafe_to_break (hb_buffer_t *buffer, |
89 | | hb_buffer_t *text_buffer, |
90 | | hb_font_t *font, |
91 | | const hb_feature_t *features, |
92 | | unsigned int num_features, |
93 | | const char * const *shapers) |
94 | 0 | { |
95 | 0 | if (buffer->cluster_level != HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES && |
96 | 0 | buffer->cluster_level != HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS) |
97 | 0 | { |
98 | | /* Cannot perform this check without monotone clusters. */ |
99 | 0 | return true; |
100 | 0 | } |
101 | | |
102 | | /* Check that breaking up shaping at safe-to-break is indeed safe. */ |
103 | | |
104 | 0 | hb_buffer_t *fragment = hb_buffer_create_similar (buffer); |
105 | 0 | hb_buffer_set_flags (fragment, (hb_buffer_flags_t (hb_buffer_get_flags (fragment) & ~HB_BUFFER_FLAG_VERIFY))); |
106 | 0 | hb_buffer_t *reconstruction = hb_buffer_create_similar (buffer); |
107 | 0 | hb_buffer_set_flags (reconstruction, (hb_buffer_flags_t (hb_buffer_get_flags (reconstruction) & ~HB_BUFFER_FLAG_VERIFY))); |
108 | |
|
109 | 0 | unsigned int num_glyphs; |
110 | 0 | hb_glyph_info_t *info = hb_buffer_get_glyph_infos (buffer, &num_glyphs); |
111 | |
|
112 | 0 | unsigned int num_chars; |
113 | 0 | hb_glyph_info_t *text = hb_buffer_get_glyph_infos (text_buffer, &num_chars); |
114 | | |
115 | | /* Chop text and shape fragments. */ |
116 | 0 | bool forward = HB_DIRECTION_IS_FORWARD (hb_buffer_get_direction (buffer)); |
117 | 0 | unsigned int start = 0; |
118 | 0 | unsigned int text_start = forward ? 0 : num_chars; |
119 | 0 | unsigned int text_end = text_start; |
120 | 0 | for (unsigned int end = 1; end < num_glyphs + 1; end++) |
121 | 0 | { |
122 | 0 | if (end < num_glyphs && |
123 | 0 | (info[end].cluster == info[end-1].cluster || |
124 | 0 | info[end-(forward?0:1)].mask & HB_GLYPH_FLAG_UNSAFE_TO_BREAK)) |
125 | 0 | continue; |
126 | | |
127 | | /* Shape segment corresponding to glyphs start..end. */ |
128 | 0 | if (end == num_glyphs) |
129 | 0 | { |
130 | 0 | if (forward) |
131 | 0 | text_end = num_chars; |
132 | 0 | else |
133 | 0 | text_start = 0; |
134 | 0 | } |
135 | 0 | else |
136 | 0 | { |
137 | 0 | if (forward) |
138 | 0 | { |
139 | 0 | unsigned int cluster = info[end].cluster; |
140 | 0 | while (text_end < num_chars && text[text_end].cluster < cluster) |
141 | 0 | text_end++; |
142 | 0 | } |
143 | 0 | else |
144 | 0 | { |
145 | 0 | unsigned int cluster = info[end - 1].cluster; |
146 | 0 | while (text_start && text[text_start - 1].cluster >= cluster) |
147 | 0 | text_start--; |
148 | 0 | } |
149 | 0 | } |
150 | 0 | assert (text_start < text_end); |
151 | | |
152 | 0 | if (0) |
153 | 0 | printf("start %d end %d text start %d end %d\n", start, end, text_start, text_end); |
154 | |
|
155 | 0 | hb_buffer_clear_contents (fragment); |
156 | |
|
157 | 0 | hb_buffer_flags_t flags = hb_buffer_get_flags (fragment); |
158 | 0 | if (0 < text_start) |
159 | 0 | flags = (hb_buffer_flags_t) (flags & ~HB_BUFFER_FLAG_BOT); |
160 | 0 | if (text_end < num_chars) |
161 | 0 | flags = (hb_buffer_flags_t) (flags & ~HB_BUFFER_FLAG_EOT); |
162 | 0 | hb_buffer_set_flags (fragment, flags); |
163 | |
|
164 | 0 | hb_buffer_append (fragment, text_buffer, text_start, text_end); |
165 | 0 | if (!hb_shape_full (font, fragment, features, num_features, shapers)) |
166 | 0 | { |
167 | 0 | buffer_verify_error (buffer, font, BUFFER_VERIFY_ERROR "shaping failed while shaping fragment."); |
168 | 0 | hb_buffer_destroy (reconstruction); |
169 | 0 | hb_buffer_destroy (fragment); |
170 | 0 | return false; |
171 | 0 | } |
172 | 0 | else if (!fragment->successful || fragment->shaping_failed) |
173 | 0 | { |
174 | 0 | hb_buffer_destroy (reconstruction); |
175 | 0 | hb_buffer_destroy (fragment); |
176 | 0 | return true; |
177 | 0 | } |
178 | 0 | hb_buffer_append (reconstruction, fragment, 0, -1); |
179 | |
|
180 | 0 | start = end; |
181 | 0 | if (forward) |
182 | 0 | text_start = text_end; |
183 | 0 | else |
184 | 0 | text_end = text_start; |
185 | 0 | } |
186 | | |
187 | 0 | bool ret = true; |
188 | 0 | hb_buffer_diff_flags_t diff = hb_buffer_diff (reconstruction, buffer, (hb_codepoint_t) -1, 0); |
189 | 0 | if (diff & ~HB_BUFFER_DIFF_FLAG_GLYPH_FLAGS_MISMATCH) |
190 | 0 | { |
191 | 0 | buffer_verify_error (buffer, font, BUFFER_VERIFY_ERROR "unsafe-to-break test failed."); |
192 | 0 | ret = false; |
193 | | |
194 | | /* Return the reconstructed result instead so it can be inspected. */ |
195 | 0 | hb_buffer_set_length (buffer, 0); |
196 | 0 | hb_buffer_append (buffer, reconstruction, 0, -1); |
197 | 0 | } |
198 | |
|
199 | 0 | hb_buffer_destroy (reconstruction); |
200 | 0 | hb_buffer_destroy (fragment); |
201 | |
|
202 | 0 | return ret; |
203 | 0 | } |
204 | | |
205 | | static bool |
206 | | buffer_verify_unsafe_to_concat (hb_buffer_t *buffer, |
207 | | hb_buffer_t *text_buffer, |
208 | | hb_font_t *font, |
209 | | const hb_feature_t *features, |
210 | | unsigned int num_features, |
211 | | const char * const *shapers) |
212 | 0 | { |
213 | 0 | if (buffer->cluster_level != HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES && |
214 | 0 | buffer->cluster_level != HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS) |
215 | 0 | { |
216 | | /* Cannot perform this check without monotone clusters. */ |
217 | 0 | return true; |
218 | 0 | } |
219 | | |
220 | | /* Check that shuffling up text before shaping at safe-to-concat points |
221 | | * is indeed safe. */ |
222 | | |
223 | | /* This is what we do: |
224 | | * |
225 | | * 1. We shape text once. Then segment the text at all the safe-to-concat |
226 | | * points; |
227 | | * |
228 | | * 2. Then we create two buffers, one containing all the even segments and |
229 | | * one all the odd segments. |
230 | | * |
231 | | * 3. Because all these segments were safe-to-concat at both ends, we |
232 | | * expect that concatenating them and shaping should NOT change the |
233 | | * shaping results of each segment. As such, we expect that after |
234 | | * shaping the two buffers, we still get cluster boundaries at the |
235 | | * segment boundaries, and that those all are safe-to-concat points. |
236 | | * Moreover, that there are NOT any safe-to-concat points within the |
237 | | * segments. |
238 | | * |
239 | | * 4. Finally, we reconstruct the shaping results of the original text by |
240 | | * simply interleaving the shaping results of the segments from the two |
241 | | * buffers, and assert that the total shaping results is the same as |
242 | | * the one from original buffer in step 1. |
243 | | */ |
244 | | |
245 | 0 | hb_buffer_t *fragments[2] {hb_buffer_create_similar (buffer), |
246 | 0 | hb_buffer_create_similar (buffer)}; |
247 | 0 | hb_buffer_set_flags (fragments[0], (hb_buffer_flags_t (hb_buffer_get_flags (fragments[0]) & ~HB_BUFFER_FLAG_VERIFY))); |
248 | 0 | hb_buffer_set_flags (fragments[1], (hb_buffer_flags_t (hb_buffer_get_flags (fragments[1]) & ~HB_BUFFER_FLAG_VERIFY))); |
249 | 0 | hb_buffer_t *reconstruction = hb_buffer_create_similar (buffer); |
250 | 0 | hb_buffer_set_flags (reconstruction, (hb_buffer_flags_t (hb_buffer_get_flags (reconstruction) & ~HB_BUFFER_FLAG_VERIFY))); |
251 | 0 | hb_segment_properties_t props; |
252 | 0 | hb_buffer_get_segment_properties (buffer, &props); |
253 | 0 | hb_buffer_set_segment_properties (fragments[0], &props); |
254 | 0 | hb_buffer_set_segment_properties (fragments[1], &props); |
255 | 0 | hb_buffer_set_segment_properties (reconstruction, &props); |
256 | |
|
257 | 0 | unsigned num_glyphs; |
258 | 0 | hb_glyph_info_t *info = hb_buffer_get_glyph_infos (buffer, &num_glyphs); |
259 | |
|
260 | 0 | unsigned num_chars; |
261 | 0 | hb_glyph_info_t *text = hb_buffer_get_glyph_infos (text_buffer, &num_chars); |
262 | |
|
263 | 0 | bool forward = HB_DIRECTION_IS_FORWARD (hb_buffer_get_direction (buffer)); |
264 | |
|
265 | 0 | if (!forward) |
266 | 0 | hb_buffer_reverse (buffer); |
267 | | |
268 | | /* |
269 | | * Split text into segments and collect into to fragment streams. |
270 | | */ |
271 | 0 | { |
272 | 0 | unsigned fragment_idx = 0; |
273 | 0 | unsigned start = 0; |
274 | 0 | unsigned text_start = 0; |
275 | 0 | unsigned text_end = 0; |
276 | 0 | for (unsigned end = 1; end < num_glyphs + 1; end++) |
277 | 0 | { |
278 | 0 | if (end < num_glyphs && |
279 | 0 | (info[end].cluster == info[end-1].cluster || |
280 | 0 | info[end].mask & HB_GLYPH_FLAG_UNSAFE_TO_CONCAT)) |
281 | 0 | continue; |
282 | | |
283 | | /* Accumulate segment corresponding to glyphs start..end. */ |
284 | 0 | if (end == num_glyphs) |
285 | 0 | text_end = num_chars; |
286 | 0 | else |
287 | 0 | { |
288 | 0 | unsigned cluster = info[end].cluster; |
289 | 0 | while (text_end < num_chars && text[text_end].cluster < cluster) |
290 | 0 | text_end++; |
291 | 0 | } |
292 | 0 | assert (text_start < text_end); |
293 | | |
294 | 0 | if (0) |
295 | 0 | printf("start %d end %d text start %d end %d\n", start, end, text_start, text_end); |
296 | |
|
297 | | #if 0 |
298 | | hb_buffer_flags_t flags = hb_buffer_get_flags (fragment); |
299 | | if (0 < text_start) |
300 | | flags = (hb_buffer_flags_t) (flags & ~HB_BUFFER_FLAG_BOT); |
301 | | if (text_end < num_chars) |
302 | | flags = (hb_buffer_flags_t) (flags & ~HB_BUFFER_FLAG_EOT); |
303 | | hb_buffer_set_flags (fragment, flags); |
304 | | #endif |
305 | |
|
306 | 0 | hb_buffer_append (fragments[fragment_idx], text_buffer, text_start, text_end); |
307 | |
|
308 | 0 | start = end; |
309 | 0 | text_start = text_end; |
310 | 0 | fragment_idx = 1 - fragment_idx; |
311 | 0 | } |
312 | 0 | } |
313 | | |
314 | 0 | bool ret = true; |
315 | 0 | hb_buffer_diff_flags_t diff; |
316 | | /* |
317 | | * Shape the two fragment streams. |
318 | | */ |
319 | 0 | if (!hb_shape_full (font, fragments[0], features, num_features, shapers)) |
320 | 0 | { |
321 | 0 | buffer_verify_error (buffer, font, BUFFER_VERIFY_ERROR "shaping failed while shaping fragment."); |
322 | 0 | ret = false; |
323 | 0 | goto out; |
324 | 0 | } |
325 | 0 | else if (!fragments[0]->successful || fragments[0]->shaping_failed) |
326 | 0 | { |
327 | 0 | ret = true; |
328 | 0 | goto out; |
329 | 0 | } |
330 | 0 | if (!hb_shape_full (font, fragments[1], features, num_features, shapers)) |
331 | 0 | { |
332 | 0 | buffer_verify_error (buffer, font, BUFFER_VERIFY_ERROR "shaping failed while shaping fragment."); |
333 | 0 | ret = false; |
334 | 0 | goto out; |
335 | 0 | } |
336 | 0 | else if (!fragments[1]->successful || fragments[1]->shaping_failed) |
337 | 0 | { |
338 | 0 | ret = true; |
339 | 0 | goto out; |
340 | 0 | } |
341 | | |
342 | 0 | if (!forward) |
343 | 0 | { |
344 | 0 | hb_buffer_reverse (fragments[0]); |
345 | 0 | hb_buffer_reverse (fragments[1]); |
346 | 0 | } |
347 | | |
348 | | /* |
349 | | * Reconstruct results. |
350 | | */ |
351 | 0 | { |
352 | 0 | unsigned fragment_idx = 0; |
353 | 0 | unsigned fragment_start[2] {0, 0}; |
354 | 0 | unsigned fragment_num_glyphs[2]; |
355 | 0 | hb_glyph_info_t *fragment_info[2]; |
356 | 0 | for (unsigned i = 0; i < 2; i++) |
357 | 0 | fragment_info[i] = hb_buffer_get_glyph_infos (fragments[i], &fragment_num_glyphs[i]); |
358 | 0 | while (fragment_start[0] < fragment_num_glyphs[0] || |
359 | 0 | fragment_start[1] < fragment_num_glyphs[1]) |
360 | 0 | { |
361 | 0 | unsigned fragment_end = fragment_start[fragment_idx] + 1; |
362 | 0 | while (fragment_end < fragment_num_glyphs[fragment_idx] && |
363 | 0 | (fragment_info[fragment_idx][fragment_end].cluster == fragment_info[fragment_idx][fragment_end - 1].cluster || |
364 | 0 | fragment_info[fragment_idx][fragment_end].mask & HB_GLYPH_FLAG_UNSAFE_TO_CONCAT)) |
365 | 0 | fragment_end++; |
366 | |
|
367 | 0 | hb_buffer_append (reconstruction, fragments[fragment_idx], fragment_start[fragment_idx], fragment_end); |
368 | |
|
369 | 0 | fragment_start[fragment_idx] = fragment_end; |
370 | 0 | fragment_idx = 1 - fragment_idx; |
371 | 0 | } |
372 | 0 | } |
373 | |
|
374 | 0 | if (!forward) |
375 | 0 | { |
376 | 0 | hb_buffer_reverse (buffer); |
377 | 0 | hb_buffer_reverse (reconstruction); |
378 | 0 | } |
379 | | |
380 | | /* |
381 | | * Diff results. |
382 | | */ |
383 | 0 | diff = hb_buffer_diff (reconstruction, buffer, (hb_codepoint_t) -1, 0); |
384 | 0 | if (diff & ~HB_BUFFER_DIFF_FLAG_GLYPH_FLAGS_MISMATCH) |
385 | 0 | { |
386 | 0 | buffer_verify_error (buffer, font, BUFFER_VERIFY_ERROR "unsafe-to-concat test failed."); |
387 | 0 | ret = false; |
388 | | |
389 | | /* Return the reconstructed result instead so it can be inspected. */ |
390 | 0 | hb_buffer_set_length (buffer, 0); |
391 | 0 | hb_buffer_append (buffer, reconstruction, 0, -1); |
392 | 0 | } |
393 | | |
394 | |
|
395 | 0 | out: |
396 | 0 | hb_buffer_destroy (reconstruction); |
397 | 0 | hb_buffer_destroy (fragments[0]); |
398 | 0 | hb_buffer_destroy (fragments[1]); |
399 | |
|
400 | 0 | return ret; |
401 | 0 | } |
402 | | |
403 | | bool |
404 | | hb_buffer_t::verify (hb_buffer_t *text_buffer, |
405 | | hb_font_t *font, |
406 | | const hb_feature_t *features, |
407 | | unsigned int num_features, |
408 | | const char * const *shapers) |
409 | 0 | { |
410 | 0 | bool ret = true; |
411 | 0 | if (!buffer_verify_monotone (this, font)) |
412 | 0 | ret = false; |
413 | 0 | if (!buffer_verify_unsafe_to_break (this, text_buffer, font, features, num_features, shapers)) |
414 | 0 | ret = false; |
415 | 0 | if ((flags & HB_BUFFER_FLAG_PRODUCE_UNSAFE_TO_CONCAT) != 0 && |
416 | 0 | !buffer_verify_unsafe_to_concat (this, text_buffer, font, features, num_features, shapers)) |
417 | 0 | ret = false; |
418 | 0 | if (!ret) |
419 | 0 | { |
420 | 0 | #ifndef HB_NO_BUFFER_SERIALIZE |
421 | 0 | unsigned len = text_buffer->len; |
422 | 0 | hb_vector_t<char> bytes; |
423 | 0 | if (likely (bytes.resize (len * 10 + 16))) |
424 | 0 | { |
425 | 0 | hb_buffer_serialize_unicode (text_buffer, |
426 | 0 | 0, len, |
427 | 0 | bytes.arrayZ, bytes.length, |
428 | 0 | &len, |
429 | 0 | HB_BUFFER_SERIALIZE_FORMAT_TEXT, |
430 | 0 | HB_BUFFER_SERIALIZE_FLAG_NO_CLUSTERS); |
431 | 0 | buffer_verify_error (this, font, BUFFER_VERIFY_ERROR "text was: %s.", bytes.arrayZ); |
432 | 0 | } |
433 | 0 | #endif |
434 | 0 | } |
435 | 0 | return ret; |
436 | 0 | } |
437 | | |
438 | | |
439 | | #endif |