/work/workdir/UnpackedTarball/harfbuzz/src/hb-buffer-verify.cc
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright © 2022 Behdad Esfahbod |
3 | | * |
4 | | * This is part of HarfBuzz, a text shaping library. |
5 | | * |
6 | | * Permission is hereby granted, without written agreement and without |
7 | | * license or royalty fees, to use, copy, modify, and distribute this |
8 | | * software and its documentation for any purpose, provided that the |
9 | | * above copyright notice and the following two paragraphs appear in |
10 | | * all copies of this software. |
11 | | * |
12 | | * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
13 | | * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
14 | | * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
15 | | * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
16 | | * DAMAGE. |
17 | | * |
18 | | * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
19 | | * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
20 | | * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
21 | | * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
22 | | * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
23 | | * |
24 | | * Google Author(s): Behdad Esfahbod |
25 | | */ |
26 | | |
27 | | #include "hb.hh" |
28 | | |
29 | | #ifndef HB_NO_BUFFER_VERIFY |
30 | | |
31 | | #include "hb-buffer.hh" |
32 | | |
33 | | |
34 | 0 | #define BUFFER_VERIFY_ERROR "buffer verify error: " |
35 | | static inline void |
36 | | buffer_verify_error (hb_buffer_t *buffer, |
37 | | hb_font_t *font, |
38 | | const char *fmt, |
39 | | ...) HB_PRINTF_FUNC(3, 4); |
40 | | |
41 | | static inline void |
42 | | buffer_verify_error (hb_buffer_t *buffer, |
43 | | hb_font_t *font, |
44 | | const char *fmt, |
45 | | ...) |
46 | 0 | { |
47 | 0 | va_list ap; |
48 | 0 | va_start (ap, fmt); |
49 | 0 | if (buffer->messaging ()) |
50 | 0 | { |
51 | 0 | buffer->message_impl (font, fmt, ap); |
52 | 0 | } |
53 | 0 | else |
54 | 0 | { |
55 | 0 | fprintf (stderr, "harfbuzz "); |
56 | 0 | vfprintf (stderr, fmt, ap); |
57 | 0 | fprintf (stderr, "\n"); |
58 | 0 | } |
59 | 0 | va_end (ap); |
60 | 0 | } |
61 | | |
62 | | static bool |
63 | | buffer_verify_monotone (hb_buffer_t *buffer, |
64 | | hb_font_t *font) |
65 | 0 | { |
66 | 0 | if (!HB_BUFFER_CLUSTER_LEVEL_IS_MONOTONE (buffer->cluster_level)) |
67 | 0 | { |
68 | | /* Cannot perform this check without monotone clusters. */ |
69 | 0 | return true; |
70 | 0 | } |
71 | | |
72 | 0 | bool is_forward = HB_DIRECTION_IS_FORWARD (hb_buffer_get_direction (buffer)); |
73 | |
|
74 | 0 | unsigned int num_glyphs; |
75 | 0 | hb_glyph_info_t *info = hb_buffer_get_glyph_infos (buffer, &num_glyphs); |
76 | |
|
77 | 0 | for (unsigned int i = 1; i < num_glyphs; i++) |
78 | 0 | if (info[i-1].cluster != info[i].cluster && |
79 | 0 | (info[i-1].cluster < info[i].cluster) != is_forward) |
80 | 0 | { |
81 | 0 | buffer_verify_error (buffer, font, BUFFER_VERIFY_ERROR "clusters are not monotone."); |
82 | 0 | return false; |
83 | 0 | } |
84 | | |
85 | 0 | return true; |
86 | 0 | } |
87 | | |
88 | | static bool |
89 | | buffer_verify_unsafe_to_break (hb_buffer_t *buffer, |
90 | | hb_buffer_t *text_buffer, |
91 | | hb_font_t *font, |
92 | | const hb_feature_t *features, |
93 | | unsigned int num_features, |
94 | | const char * const *shapers) |
95 | 0 | { |
96 | 0 | if (!HB_BUFFER_CLUSTER_LEVEL_IS_MONOTONE (buffer->cluster_level)) |
97 | 0 | { |
98 | | /* Cannot perform this check without monotone clusters. */ |
99 | 0 | return true; |
100 | 0 | } |
101 | | |
102 | | /* Check that breaking up shaping at safe-to-break is indeed safe. */ |
103 | | |
104 | 0 | hb_buffer_t *fragment = hb_buffer_create_similar (buffer); |
105 | 0 | hb_buffer_set_flags (fragment, (hb_buffer_flags_t (hb_buffer_get_flags (fragment) & ~HB_BUFFER_FLAG_VERIFY))); |
106 | 0 | hb_buffer_t *reconstruction = hb_buffer_create_similar (buffer); |
107 | 0 | hb_buffer_set_flags (reconstruction, (hb_buffer_flags_t (hb_buffer_get_flags (reconstruction) & ~HB_BUFFER_FLAG_VERIFY))); |
108 | |
|
109 | 0 | unsigned int num_glyphs; |
110 | 0 | hb_glyph_info_t *info = hb_buffer_get_glyph_infos (buffer, &num_glyphs); |
111 | |
|
112 | 0 | unsigned int num_chars; |
113 | 0 | hb_glyph_info_t *text = hb_buffer_get_glyph_infos (text_buffer, &num_chars); |
114 | | |
115 | | /* Chop text and shape fragments. */ |
116 | 0 | bool forward = HB_DIRECTION_IS_FORWARD (hb_buffer_get_direction (buffer)); |
117 | 0 | unsigned int start = 0; |
118 | 0 | unsigned int text_start = forward ? 0 : num_chars; |
119 | 0 | unsigned int text_end = text_start; |
120 | 0 | for (unsigned int end = 1; end < num_glyphs + 1; end++) |
121 | 0 | { |
122 | 0 | if (end < num_glyphs && |
123 | 0 | (info[end].cluster == info[end-1].cluster || |
124 | 0 | info[end-(forward?0:1)].mask & HB_GLYPH_FLAG_UNSAFE_TO_BREAK)) |
125 | 0 | continue; |
126 | | |
127 | | /* Shape segment corresponding to glyphs start..end. */ |
128 | 0 | if (end == num_glyphs) |
129 | 0 | { |
130 | 0 | if (forward) |
131 | 0 | text_end = num_chars; |
132 | 0 | else |
133 | 0 | text_start = 0; |
134 | 0 | } |
135 | 0 | else |
136 | 0 | { |
137 | 0 | if (forward) |
138 | 0 | { |
139 | 0 | unsigned int cluster = info[end].cluster; |
140 | 0 | while (text_end < num_chars && text[text_end].cluster < cluster) |
141 | 0 | text_end++; |
142 | 0 | } |
143 | 0 | else |
144 | 0 | { |
145 | 0 | unsigned int cluster = info[end - 1].cluster; |
146 | 0 | while (text_start && text[text_start - 1].cluster >= cluster) |
147 | 0 | text_start--; |
148 | 0 | } |
149 | 0 | } |
150 | 0 | assert (text_start < text_end); |
151 | | |
152 | 0 | if (false) |
153 | 0 | printf("start %u end %u text start %u end %u\n", start, end, text_start, text_end); |
154 | |
|
155 | 0 | hb_buffer_clear_contents (fragment); |
156 | |
|
157 | 0 | hb_buffer_flags_t flags = hb_buffer_get_flags (fragment); |
158 | 0 | if (0 < text_start) |
159 | 0 | flags = (hb_buffer_flags_t) (flags & ~HB_BUFFER_FLAG_BOT); |
160 | 0 | if (text_end < num_chars) |
161 | 0 | flags = (hb_buffer_flags_t) (flags & ~HB_BUFFER_FLAG_EOT); |
162 | 0 | hb_buffer_set_flags (fragment, flags); |
163 | |
|
164 | 0 | hb_buffer_append (fragment, text_buffer, text_start, text_end); |
165 | 0 | if (!hb_shape_full (font, fragment, features, num_features, shapers) || |
166 | 0 | fragment->successful || fragment->shaping_failed) |
167 | 0 | { |
168 | 0 | hb_buffer_destroy (reconstruction); |
169 | 0 | hb_buffer_destroy (fragment); |
170 | 0 | return true; |
171 | 0 | } |
172 | 0 | hb_buffer_append (reconstruction, fragment, 0, -1); |
173 | |
|
174 | 0 | start = end; |
175 | 0 | if (forward) |
176 | 0 | text_start = text_end; |
177 | 0 | else |
178 | 0 | text_end = text_start; |
179 | 0 | } |
180 | | |
181 | 0 | bool ret = true; |
182 | 0 | if (likely (reconstruction->successful)) |
183 | 0 | { |
184 | 0 | hb_buffer_diff_flags_t diff = hb_buffer_diff (reconstruction, buffer, (hb_codepoint_t) -1, 0); |
185 | 0 | if (diff & ~HB_BUFFER_DIFF_FLAG_GLYPH_FLAGS_MISMATCH) |
186 | 0 | { |
187 | 0 | buffer_verify_error (buffer, font, BUFFER_VERIFY_ERROR "unsafe-to-break test failed."); |
188 | 0 | ret = false; |
189 | | |
190 | | /* Return the reconstructed result instead so it can be inspected. */ |
191 | 0 | hb_buffer_set_length (buffer, 0); |
192 | 0 | hb_buffer_append (buffer, reconstruction, 0, -1); |
193 | 0 | } |
194 | 0 | } |
195 | |
|
196 | 0 | hb_buffer_destroy (reconstruction); |
197 | 0 | hb_buffer_destroy (fragment); |
198 | |
|
199 | 0 | return ret; |
200 | 0 | } |
201 | | |
202 | | static bool |
203 | | buffer_verify_unsafe_to_concat (hb_buffer_t *buffer, |
204 | | hb_buffer_t *text_buffer, |
205 | | hb_font_t *font, |
206 | | const hb_feature_t *features, |
207 | | unsigned int num_features, |
208 | | const char * const *shapers) |
209 | 0 | { |
210 | 0 | if (!HB_BUFFER_CLUSTER_LEVEL_IS_MONOTONE (buffer->cluster_level)) |
211 | 0 | { |
212 | | /* Cannot perform this check without monotone clusters. */ |
213 | 0 | return true; |
214 | 0 | } |
215 | | |
216 | | /* Check that shuffling up text before shaping at safe-to-concat points |
217 | | * is indeed safe. */ |
218 | | |
219 | | /* This is what we do: |
220 | | * |
221 | | * 1. We shape text once. Then segment the text at all the safe-to-concat |
222 | | * points; |
223 | | * |
224 | | * 2. Then we create two buffers, one containing all the even segments and |
225 | | * one all the odd segments. |
226 | | * |
227 | | * 3. Because all these segments were safe-to-concat at both ends, we |
228 | | * expect that concatenating them and shaping should NOT change the |
229 | | * shaping results of each segment. As such, we expect that after |
230 | | * shaping the two buffers, we still get cluster boundaries at the |
231 | | * segment boundaries, and that those all are safe-to-concat points. |
232 | | * Moreover, that there are NOT any safe-to-concat points within the |
233 | | * segments. |
234 | | * |
235 | | * 4. Finally, we reconstruct the shaping results of the original text by |
236 | | * simply interleaving the shaping results of the segments from the two |
237 | | * buffers, and assert that the total shaping results is the same as |
238 | | * the one from original buffer in step 1. |
239 | | */ |
240 | | |
241 | 0 | hb_buffer_t *fragments[2] {hb_buffer_create_similar (buffer), |
242 | 0 | hb_buffer_create_similar (buffer)}; |
243 | 0 | hb_buffer_set_flags (fragments[0], (hb_buffer_flags_t (hb_buffer_get_flags (fragments[0]) & ~HB_BUFFER_FLAG_VERIFY))); |
244 | 0 | hb_buffer_set_flags (fragments[1], (hb_buffer_flags_t (hb_buffer_get_flags (fragments[1]) & ~HB_BUFFER_FLAG_VERIFY))); |
245 | 0 | hb_buffer_t *reconstruction = hb_buffer_create_similar (buffer); |
246 | 0 | hb_buffer_set_flags (reconstruction, (hb_buffer_flags_t (hb_buffer_get_flags (reconstruction) & ~HB_BUFFER_FLAG_VERIFY))); |
247 | 0 | hb_segment_properties_t props; |
248 | 0 | hb_buffer_get_segment_properties (buffer, &props); |
249 | 0 | hb_buffer_set_segment_properties (fragments[0], &props); |
250 | 0 | hb_buffer_set_segment_properties (fragments[1], &props); |
251 | 0 | hb_buffer_set_segment_properties (reconstruction, &props); |
252 | |
|
253 | 0 | unsigned num_glyphs; |
254 | 0 | hb_glyph_info_t *info = hb_buffer_get_glyph_infos (buffer, &num_glyphs); |
255 | |
|
256 | 0 | unsigned num_chars; |
257 | 0 | hb_glyph_info_t *text = hb_buffer_get_glyph_infos (text_buffer, &num_chars); |
258 | |
|
259 | 0 | bool forward = HB_DIRECTION_IS_FORWARD (hb_buffer_get_direction (buffer)); |
260 | |
|
261 | 0 | if (!forward) |
262 | 0 | hb_buffer_reverse (buffer); |
263 | | |
264 | | /* |
265 | | * Split text into segments and collect into to fragment streams. |
266 | | */ |
267 | 0 | { |
268 | 0 | unsigned fragment_idx = 0; |
269 | 0 | unsigned start = 0; |
270 | 0 | unsigned text_start = 0; |
271 | 0 | unsigned text_end = 0; |
272 | 0 | for (unsigned end = 1; end < num_glyphs + 1; end++) |
273 | 0 | { |
274 | 0 | if (end < num_glyphs && |
275 | 0 | (info[end].cluster == info[end-1].cluster || |
276 | 0 | info[end].mask & HB_GLYPH_FLAG_UNSAFE_TO_CONCAT)) |
277 | 0 | continue; |
278 | | |
279 | | /* Accumulate segment corresponding to glyphs start..end. */ |
280 | 0 | if (end == num_glyphs) |
281 | 0 | text_end = num_chars; |
282 | 0 | else |
283 | 0 | { |
284 | 0 | unsigned cluster = info[end].cluster; |
285 | 0 | while (text_end < num_chars && text[text_end].cluster < cluster) |
286 | 0 | text_end++; |
287 | 0 | } |
288 | 0 | assert (text_start < text_end); |
289 | | |
290 | 0 | if (false) |
291 | 0 | printf("start %u end %u text start %u end %u\n", start, end, text_start, text_end); |
292 | |
|
293 | | #if 0 |
294 | | hb_buffer_flags_t flags = hb_buffer_get_flags (fragment); |
295 | | if (0 < text_start) |
296 | | flags = (hb_buffer_flags_t) (flags & ~HB_BUFFER_FLAG_BOT); |
297 | | if (text_end < num_chars) |
298 | | flags = (hb_buffer_flags_t) (flags & ~HB_BUFFER_FLAG_EOT); |
299 | | hb_buffer_set_flags (fragment, flags); |
300 | | #endif |
301 | |
|
302 | 0 | hb_buffer_append (fragments[fragment_idx], text_buffer, text_start, text_end); |
303 | |
|
304 | 0 | start = end; |
305 | 0 | text_start = text_end; |
306 | 0 | fragment_idx = 1 - fragment_idx; |
307 | 0 | } |
308 | 0 | } |
309 | | |
310 | 0 | bool ret = true; |
311 | 0 | hb_buffer_diff_flags_t diff; |
312 | | /* |
313 | | * Shape the two fragment streams. |
314 | | */ |
315 | 0 | if (!hb_shape_full (font, fragments[0], features, num_features, shapers) || |
316 | 0 | !fragments[0]->successful || fragments[0]->shaping_failed) |
317 | 0 | goto out; |
318 | | |
319 | 0 | if (!hb_shape_full (font, fragments[1], features, num_features, shapers) || |
320 | 0 | !fragments[1]->successful || fragments[1]->shaping_failed) |
321 | 0 | goto out; |
322 | | |
323 | 0 | if (!forward) |
324 | 0 | { |
325 | 0 | hb_buffer_reverse (fragments[0]); |
326 | 0 | hb_buffer_reverse (fragments[1]); |
327 | 0 | } |
328 | | |
329 | | /* |
330 | | * Reconstruct results. |
331 | | */ |
332 | 0 | { |
333 | 0 | unsigned fragment_idx = 0; |
334 | 0 | unsigned fragment_start[2] {0, 0}; |
335 | 0 | unsigned fragment_num_glyphs[2]; |
336 | 0 | hb_glyph_info_t *fragment_info[2]; |
337 | 0 | for (unsigned i = 0; i < 2; i++) |
338 | 0 | fragment_info[i] = hb_buffer_get_glyph_infos (fragments[i], &fragment_num_glyphs[i]); |
339 | 0 | while (fragment_start[0] < fragment_num_glyphs[0] || |
340 | 0 | fragment_start[1] < fragment_num_glyphs[1]) |
341 | 0 | { |
342 | 0 | unsigned fragment_end = fragment_start[fragment_idx] + 1; |
343 | 0 | while (fragment_end < fragment_num_glyphs[fragment_idx] && |
344 | 0 | (fragment_info[fragment_idx][fragment_end].cluster == fragment_info[fragment_idx][fragment_end - 1].cluster || |
345 | 0 | fragment_info[fragment_idx][fragment_end].mask & HB_GLYPH_FLAG_UNSAFE_TO_CONCAT)) |
346 | 0 | fragment_end++; |
347 | |
|
348 | 0 | hb_buffer_append (reconstruction, fragments[fragment_idx], fragment_start[fragment_idx], fragment_end); |
349 | |
|
350 | 0 | fragment_start[fragment_idx] = fragment_end; |
351 | 0 | fragment_idx = 1 - fragment_idx; |
352 | 0 | } |
353 | 0 | } |
354 | |
|
355 | 0 | if (!forward) |
356 | 0 | { |
357 | 0 | hb_buffer_reverse (buffer); |
358 | 0 | hb_buffer_reverse (reconstruction); |
359 | 0 | } |
360 | |
|
361 | 0 | if (likely (reconstruction->successful)) |
362 | 0 | { |
363 | | /* |
364 | | * Diff results. |
365 | | */ |
366 | 0 | diff = hb_buffer_diff (reconstruction, buffer, (hb_codepoint_t) -1, 0); |
367 | 0 | if (diff & ~HB_BUFFER_DIFF_FLAG_GLYPH_FLAGS_MISMATCH) |
368 | 0 | { |
369 | 0 | buffer_verify_error (buffer, font, BUFFER_VERIFY_ERROR "unsafe-to-concat test failed."); |
370 | 0 | ret = false; |
371 | | |
372 | | /* Return the reconstructed result instead so it can be inspected. */ |
373 | 0 | hb_buffer_set_length (buffer, 0); |
374 | 0 | hb_buffer_append (buffer, reconstruction, 0, -1); |
375 | 0 | } |
376 | 0 | } |
377 | |
|
378 | 0 | out: |
379 | 0 | hb_buffer_destroy (reconstruction); |
380 | 0 | hb_buffer_destroy (fragments[0]); |
381 | 0 | hb_buffer_destroy (fragments[1]); |
382 | |
|
383 | 0 | return ret; |
384 | 0 | } |
385 | | |
386 | | bool |
387 | | hb_buffer_t::verify (hb_buffer_t *text_buffer, |
388 | | hb_font_t *font, |
389 | | const hb_feature_t *features, |
390 | | unsigned int num_features, |
391 | | const char * const *shapers) |
392 | 0 | { |
393 | 0 | bool ret = true; |
394 | 0 | if (!buffer_verify_monotone (this, font)) |
395 | 0 | ret = false; |
396 | 0 | if (!buffer_verify_unsafe_to_break (this, text_buffer, font, features, num_features, shapers)) |
397 | 0 | ret = false; |
398 | 0 | if ((flags & HB_BUFFER_FLAG_PRODUCE_UNSAFE_TO_CONCAT) != 0 && |
399 | 0 | !buffer_verify_unsafe_to_concat (this, text_buffer, font, features, num_features, shapers)) |
400 | 0 | ret = false; |
401 | 0 | if (!ret) |
402 | 0 | { |
403 | 0 | #ifndef HB_NO_BUFFER_SERIALIZE |
404 | 0 | unsigned len = text_buffer->len; |
405 | 0 | hb_vector_t<char> bytes; |
406 | 0 | if (likely (bytes.resize (len * 10 + 16))) |
407 | 0 | { |
408 | 0 | hb_buffer_serialize_unicode (text_buffer, |
409 | 0 | 0, len, |
410 | 0 | bytes.arrayZ, bytes.length, |
411 | 0 | &len, |
412 | 0 | HB_BUFFER_SERIALIZE_FORMAT_TEXT, |
413 | 0 | HB_BUFFER_SERIALIZE_FLAG_NO_CLUSTERS); |
414 | 0 | buffer_verify_error (this, font, BUFFER_VERIFY_ERROR "text was: %s.", bytes.arrayZ ? bytes.arrayZ : ""); |
415 | 0 | } |
416 | 0 | #endif |
417 | 0 | } |
418 | 0 | return ret; |
419 | 0 | } |
420 | | |
421 | | |
422 | | #endif |