/work/workdir/UnpackedTarball/harfbuzz/src/hb-buffer-verify.cc
Line | Count | Source |
1 | | /* |
2 | | * Copyright © 2022 Behdad Esfahbod |
3 | | * |
4 | | * This is part of HarfBuzz, a text shaping library. |
5 | | * |
6 | | * Permission is hereby granted, without written agreement and without |
7 | | * license or royalty fees, to use, copy, modify, and distribute this |
8 | | * software and its documentation for any purpose, provided that the |
9 | | * above copyright notice and the following two paragraphs appear in |
10 | | * all copies of this software. |
11 | | * |
12 | | * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
13 | | * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
14 | | * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
15 | | * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
16 | | * DAMAGE. |
17 | | * |
18 | | * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
19 | | * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
20 | | * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
21 | | * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
22 | | * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
23 | | * |
24 | | * Google Author(s): Behdad Esfahbod |
25 | | */ |
26 | | |
27 | | #include "hb.hh" |
28 | | |
29 | | #ifndef HB_NO_BUFFER_VERIFY |
30 | | |
31 | | #include "hb-buffer.hh" |
32 | | |
33 | | |
34 | 0 | #define BUFFER_VERIFY_ERROR "buffer verify error: " |
35 | | static inline void |
36 | | buffer_verify_error (hb_buffer_t *buffer, |
37 | | hb_font_t *font, |
38 | | const char *fmt, |
39 | | ...) HB_PRINTF_FUNC(3, 4); |
40 | | |
41 | | static inline void |
42 | | buffer_verify_error (hb_buffer_t *buffer, |
43 | | hb_font_t *font, |
44 | | const char *fmt, |
45 | | ...) |
46 | 0 | { |
47 | 0 | va_list ap; |
48 | 0 | va_start (ap, fmt); |
49 | 0 | if (buffer->messaging ()) |
50 | 0 | { |
51 | 0 | buffer->message_impl (font, fmt, ap); |
52 | 0 | } |
53 | 0 | else |
54 | 0 | { |
55 | 0 | fprintf (stderr, "harfbuzz "); |
56 | 0 | vfprintf (stderr, fmt, ap); |
57 | 0 | fprintf (stderr, "\n"); |
58 | 0 | } |
59 | 0 | va_end (ap); |
60 | 0 | } |
61 | | |
62 | | static bool |
63 | | buffer_verify_monotone (hb_buffer_t *buffer, |
64 | | hb_font_t *font) |
65 | 0 | { |
66 | 0 | if (!HB_BUFFER_CLUSTER_LEVEL_IS_MONOTONE (buffer->cluster_level)) |
67 | 0 | { |
68 | | /* Cannot perform this check without monotone clusters. */ |
69 | 0 | return true; |
70 | 0 | } |
71 | | |
72 | 0 | bool is_forward = HB_DIRECTION_IS_FORWARD (hb_buffer_get_direction (buffer)); |
73 | |
|
74 | 0 | unsigned int num_glyphs; |
75 | 0 | hb_glyph_info_t *info = hb_buffer_get_glyph_infos (buffer, &num_glyphs); |
76 | |
|
77 | 0 | for (unsigned int i = 1; i < num_glyphs; i++) |
78 | 0 | if (info[i-1].cluster != info[i].cluster && |
79 | 0 | (info[i-1].cluster < info[i].cluster) != is_forward) |
80 | 0 | { |
81 | 0 | buffer_verify_error (buffer, font, BUFFER_VERIFY_ERROR "clusters are not monotone."); |
82 | 0 | return false; |
83 | 0 | } |
84 | | |
85 | 0 | return true; |
86 | 0 | } |
87 | | |
88 | | static bool |
89 | | buffer_verify_unsafe_to_break (hb_buffer_t *buffer, |
90 | | hb_buffer_t *text_buffer, |
91 | | hb_font_t *font, |
92 | | const hb_feature_t *features, |
93 | | unsigned int num_features, |
94 | | const char * const *shapers) |
95 | 0 | { |
96 | 0 | if (!HB_BUFFER_CLUSTER_LEVEL_IS_MONOTONE (buffer->cluster_level)) |
97 | 0 | { |
98 | | /* Cannot perform this check without monotone clusters. */ |
99 | 0 | return true; |
100 | 0 | } |
101 | | |
102 | | /* Check that breaking up shaping at safe-to-break is indeed safe. */ |
103 | | |
104 | 0 | hb_unique_ptr_t<hb_buffer_t> fragment (hb_buffer_create_similar (buffer)); |
105 | 0 | hb_buffer_set_flags (fragment, (hb_buffer_flags_t (hb_buffer_get_flags (fragment) & ~HB_BUFFER_FLAG_VERIFY))); |
106 | 0 | hb_unique_ptr_t<hb_buffer_t> reconstruction (hb_buffer_create_similar (buffer)); |
107 | 0 | hb_buffer_set_flags (reconstruction, (hb_buffer_flags_t (hb_buffer_get_flags (reconstruction) & ~HB_BUFFER_FLAG_VERIFY))); |
108 | |
|
109 | 0 | unsigned int num_glyphs; |
110 | 0 | hb_glyph_info_t *info = hb_buffer_get_glyph_infos (buffer, &num_glyphs); |
111 | |
|
112 | 0 | unsigned int num_chars; |
113 | 0 | hb_glyph_info_t *text = hb_buffer_get_glyph_infos (text_buffer, &num_chars); |
114 | | |
115 | | /* Chop text and shape fragments. */ |
116 | 0 | bool forward = HB_DIRECTION_IS_FORWARD (hb_buffer_get_direction (buffer)); |
117 | 0 | unsigned int start = 0; |
118 | 0 | unsigned int text_start = forward ? 0 : num_chars; |
119 | 0 | unsigned int text_end = text_start; |
120 | 0 | for (unsigned int end = 1; end < num_glyphs + 1; end++) |
121 | 0 | { |
122 | 0 | if (end < num_glyphs && |
123 | 0 | (info[end].cluster == info[end-1].cluster || |
124 | 0 | info[end-(forward?0:1)].mask & HB_GLYPH_FLAG_UNSAFE_TO_BREAK)) |
125 | 0 | continue; |
126 | | |
127 | | /* Shape segment corresponding to glyphs start..end. */ |
128 | 0 | if (end == num_glyphs) |
129 | 0 | { |
130 | 0 | if (forward) |
131 | 0 | text_end = num_chars; |
132 | 0 | else |
133 | 0 | text_start = 0; |
134 | 0 | } |
135 | 0 | else |
136 | 0 | { |
137 | 0 | if (forward) |
138 | 0 | { |
139 | 0 | unsigned int cluster = info[end].cluster; |
140 | 0 | while (text_end < num_chars && text[text_end].cluster < cluster) |
141 | 0 | text_end++; |
142 | 0 | } |
143 | 0 | else |
144 | 0 | { |
145 | 0 | unsigned int cluster = info[end - 1].cluster; |
146 | 0 | while (text_start && text[text_start - 1].cluster >= cluster) |
147 | 0 | text_start--; |
148 | 0 | } |
149 | 0 | } |
150 | 0 | if (unlikely (text_start >= text_end)) |
151 | 0 | { |
152 | 0 | buffer_verify_error (buffer, font, BUFFER_VERIFY_ERROR "unsafe-to-break text range is invalid."); |
153 | 0 | return false; |
154 | 0 | } |
155 | | |
156 | 0 | if (false) |
157 | 0 | printf("start %u end %u text start %u end %u\n", start, end, text_start, text_end); |
158 | |
|
159 | 0 | hb_buffer_clear_contents (fragment); |
160 | |
|
161 | 0 | hb_buffer_flags_t flags = hb_buffer_get_flags (fragment); |
162 | 0 | if (0 < text_start) |
163 | 0 | flags = (hb_buffer_flags_t) (flags & ~HB_BUFFER_FLAG_BOT); |
164 | 0 | if (text_end < num_chars) |
165 | 0 | flags = (hb_buffer_flags_t) (flags & ~HB_BUFFER_FLAG_EOT); |
166 | 0 | hb_buffer_set_flags (fragment, flags); |
167 | |
|
168 | 0 | hb_buffer_append (fragment, text_buffer, text_start, text_end); |
169 | 0 | if (!hb_shape_full (font, fragment, features, num_features, shapers) || |
170 | 0 | fragment->successful) |
171 | 0 | return true; |
172 | 0 | hb_buffer_append (reconstruction, fragment, 0, -1); |
173 | |
|
174 | 0 | start = end; |
175 | 0 | if (forward) |
176 | 0 | text_start = text_end; |
177 | 0 | else |
178 | 0 | text_end = text_start; |
179 | 0 | } |
180 | | |
181 | 0 | bool ret = true; |
182 | 0 | if (likely (reconstruction->successful)) |
183 | 0 | { |
184 | 0 | hb_buffer_diff_flags_t diff = hb_buffer_diff (reconstruction, buffer, (hb_codepoint_t) -1, 0); |
185 | 0 | if (diff & ~HB_BUFFER_DIFF_FLAG_GLYPH_FLAGS_MISMATCH) |
186 | 0 | { |
187 | 0 | buffer_verify_error (buffer, font, BUFFER_VERIFY_ERROR "unsafe-to-break test failed."); |
188 | 0 | ret = false; |
189 | | |
190 | | /* Return the reconstructed result instead so it can be inspected. */ |
191 | 0 | hb_buffer_set_length (buffer, 0); |
192 | 0 | hb_buffer_append (buffer, reconstruction, 0, -1); |
193 | 0 | } |
194 | 0 | } |
195 | |
|
196 | 0 | return ret; |
197 | 0 | } |
198 | | |
199 | | static bool |
200 | | buffer_verify_unsafe_to_concat (hb_buffer_t *buffer, |
201 | | hb_buffer_t *text_buffer, |
202 | | hb_font_t *font, |
203 | | const hb_feature_t *features, |
204 | | unsigned int num_features, |
205 | | const char * const *shapers) |
206 | 0 | { |
207 | 0 | if (!HB_BUFFER_CLUSTER_LEVEL_IS_MONOTONE (buffer->cluster_level)) |
208 | 0 | { |
209 | | /* Cannot perform this check without monotone clusters. */ |
210 | 0 | return true; |
211 | 0 | } |
212 | | |
213 | | /* Check that shuffling up text before shaping at safe-to-concat points |
214 | | * is indeed safe. */ |
215 | | |
216 | | /* This is what we do: |
217 | | * |
218 | | * 1. We shape text once. Then segment the text at all the safe-to-concat |
219 | | * points; |
220 | | * |
221 | | * 2. Then we create two buffers, one containing all the even segments and |
222 | | * one all the odd segments. |
223 | | * |
224 | | * 3. Because all these segments were safe-to-concat at both ends, we |
225 | | * expect that concatenating them and shaping should NOT change the |
226 | | * shaping results of each segment. As such, we expect that after |
227 | | * shaping the two buffers, we still get cluster boundaries at the |
228 | | * segment boundaries, and that those all are safe-to-concat points. |
229 | | * Moreover, that there are NOT any safe-to-concat points within the |
230 | | * segments. |
231 | | * |
232 | | * 4. Finally, we reconstruct the shaping results of the original text by |
233 | | * simply interleaving the shaping results of the segments from the two |
234 | | * buffers, and assert that the total shaping results is the same as |
235 | | * the one from original buffer in step 1. |
236 | | */ |
237 | | |
238 | 0 | hb_unique_ptr_t<hb_buffer_t> fragments[2] { |
239 | 0 | hb_unique_ptr_t<hb_buffer_t> (hb_buffer_create_similar (buffer)), |
240 | 0 | hb_unique_ptr_t<hb_buffer_t> (hb_buffer_create_similar (buffer)), |
241 | 0 | }; |
242 | 0 | hb_buffer_set_flags (fragments[0], (hb_buffer_flags_t (hb_buffer_get_flags (fragments[0]) & ~HB_BUFFER_FLAG_VERIFY))); |
243 | 0 | hb_buffer_set_flags (fragments[1], (hb_buffer_flags_t (hb_buffer_get_flags (fragments[1]) & ~HB_BUFFER_FLAG_VERIFY))); |
244 | 0 | hb_unique_ptr_t<hb_buffer_t> reconstruction (hb_buffer_create_similar (buffer)); |
245 | 0 | hb_buffer_set_flags (reconstruction, (hb_buffer_flags_t (hb_buffer_get_flags (reconstruction) & ~HB_BUFFER_FLAG_VERIFY))); |
246 | 0 | hb_segment_properties_t props; |
247 | 0 | hb_buffer_get_segment_properties (buffer, &props); |
248 | 0 | hb_buffer_set_segment_properties (fragments[0], &props); |
249 | 0 | hb_buffer_set_segment_properties (fragments[1], &props); |
250 | 0 | hb_buffer_set_segment_properties (reconstruction, &props); |
251 | |
|
252 | 0 | unsigned num_glyphs; |
253 | 0 | hb_glyph_info_t *info = hb_buffer_get_glyph_infos (buffer, &num_glyphs); |
254 | |
|
255 | 0 | unsigned num_chars; |
256 | 0 | hb_glyph_info_t *text = hb_buffer_get_glyph_infos (text_buffer, &num_chars); |
257 | |
|
258 | 0 | bool forward = HB_DIRECTION_IS_FORWARD (hb_buffer_get_direction (buffer)); |
259 | |
|
260 | 0 | if (!forward) |
261 | 0 | hb_buffer_reverse (buffer); |
262 | | |
263 | | /* |
264 | | * Split text into segments and collect into to fragment streams. |
265 | | */ |
266 | 0 | { |
267 | 0 | unsigned fragment_idx = 0; |
268 | 0 | unsigned start = 0; |
269 | 0 | unsigned text_start = 0; |
270 | 0 | unsigned text_end = 0; |
271 | 0 | for (unsigned end = 1; end < num_glyphs + 1; end++) |
272 | 0 | { |
273 | 0 | if (end < num_glyphs && |
274 | 0 | (info[end].cluster == info[end-1].cluster || |
275 | 0 | info[end].mask & HB_GLYPH_FLAG_UNSAFE_TO_CONCAT)) |
276 | 0 | continue; |
277 | | |
278 | | /* Accumulate segment corresponding to glyphs start..end. */ |
279 | 0 | if (end == num_glyphs) |
280 | 0 | text_end = num_chars; |
281 | 0 | else |
282 | 0 | { |
283 | 0 | unsigned cluster = info[end].cluster; |
284 | 0 | while (text_end < num_chars && text[text_end].cluster < cluster) |
285 | 0 | text_end++; |
286 | 0 | } |
287 | 0 | assert (text_start < text_end); |
288 | |
|
289 | 0 | if (false) |
290 | 0 | printf("start %u end %u text start %u end %u\n", start, end, text_start, text_end); |
291 | |
|
292 | | #if 0 |
293 | | hb_buffer_flags_t flags = hb_buffer_get_flags (fragment); |
294 | | if (0 < text_start) |
295 | | flags = (hb_buffer_flags_t) (flags & ~HB_BUFFER_FLAG_BOT); |
296 | | if (text_end < num_chars) |
297 | | flags = (hb_buffer_flags_t) (flags & ~HB_BUFFER_FLAG_EOT); |
298 | | hb_buffer_set_flags (fragment, flags); |
299 | | #endif |
300 | |
|
301 | 0 | hb_buffer_append (fragments[fragment_idx], text_buffer, text_start, text_end); |
302 | |
|
303 | 0 | start = end; |
304 | 0 | text_start = text_end; |
305 | 0 | fragment_idx = 1 - fragment_idx; |
306 | 0 | } |
307 | 0 | } |
308 | |
|
309 | 0 | bool ret = true; |
310 | | /* |
311 | | * Shape the two fragment streams. |
312 | | */ |
313 | 0 | if (!hb_shape_full (font, fragments[0], features, num_features, shapers) || |
314 | 0 | !fragments[0]->successful) |
315 | 0 | return ret; |
316 | | |
317 | 0 | if (!hb_shape_full (font, fragments[1], features, num_features, shapers) || |
318 | 0 | !fragments[1]->successful) |
319 | 0 | return ret; |
320 | | |
321 | 0 | if (!forward) |
322 | 0 | { |
323 | 0 | hb_buffer_reverse (fragments[0]); |
324 | 0 | hb_buffer_reverse (fragments[1]); |
325 | 0 | } |
326 | | |
327 | | /* |
328 | | * Reconstruct results. |
329 | | */ |
330 | 0 | { |
331 | 0 | unsigned fragment_idx = 0; |
332 | 0 | unsigned fragment_start[2] {0, 0}; |
333 | 0 | unsigned fragment_num_glyphs[2]; |
334 | 0 | hb_glyph_info_t *fragment_info[2]; |
335 | 0 | for (unsigned i = 0; i < 2; i++) |
336 | 0 | fragment_info[i] = hb_buffer_get_glyph_infos (fragments[i], &fragment_num_glyphs[i]); |
337 | 0 | while (fragment_start[0] < fragment_num_glyphs[0] || |
338 | 0 | fragment_start[1] < fragment_num_glyphs[1]) |
339 | 0 | { |
340 | 0 | unsigned fragment_end = fragment_start[fragment_idx] + 1; |
341 | 0 | while (fragment_end < fragment_num_glyphs[fragment_idx] && |
342 | 0 | (fragment_info[fragment_idx][fragment_end].cluster == fragment_info[fragment_idx][fragment_end - 1].cluster || |
343 | 0 | fragment_info[fragment_idx][fragment_end].mask & HB_GLYPH_FLAG_UNSAFE_TO_CONCAT)) |
344 | 0 | fragment_end++; |
345 | |
|
346 | 0 | hb_buffer_append (reconstruction, fragments[fragment_idx], fragment_start[fragment_idx], fragment_end); |
347 | |
|
348 | 0 | fragment_start[fragment_idx] = fragment_end; |
349 | 0 | fragment_idx = 1 - fragment_idx; |
350 | 0 | } |
351 | 0 | } |
352 | |
|
353 | 0 | if (!forward) |
354 | 0 | { |
355 | 0 | hb_buffer_reverse (buffer); |
356 | 0 | hb_buffer_reverse (reconstruction); |
357 | 0 | } |
358 | |
|
359 | 0 | if (likely (reconstruction->successful)) |
360 | 0 | { |
361 | | /* |
362 | | * Diff results. |
363 | | */ |
364 | 0 | hb_buffer_diff_flags_t diff = hb_buffer_diff (reconstruction, buffer, (hb_codepoint_t) -1, 0); |
365 | 0 | if (diff & ~HB_BUFFER_DIFF_FLAG_GLYPH_FLAGS_MISMATCH) |
366 | 0 | { |
367 | 0 | buffer_verify_error (buffer, font, BUFFER_VERIFY_ERROR "unsafe-to-concat test failed."); |
368 | 0 | ret = false; |
369 | | |
370 | | /* Return the reconstructed result instead so it can be inspected. */ |
371 | 0 | hb_buffer_set_length (buffer, 0); |
372 | 0 | hb_buffer_append (buffer, reconstruction, 0, -1); |
373 | 0 | } |
374 | 0 | } |
375 | |
|
376 | 0 | return ret; |
377 | 0 | } |
378 | | |
379 | | bool |
380 | | hb_buffer_t::verify (hb_buffer_t *text_buffer, |
381 | | hb_font_t *font, |
382 | | const hb_feature_t *features, |
383 | | unsigned int num_features, |
384 | | const char * const *shapers) |
385 | 0 | { |
386 | 0 | bool ret = true; |
387 | 0 | bool monotone = buffer_verify_monotone (this, font); |
388 | 0 | if (!monotone) |
389 | 0 | ret = false; |
390 | 0 | if (monotone && |
391 | 0 | !buffer_verify_unsafe_to_break (this, text_buffer, font, features, num_features, shapers)) |
392 | 0 | ret = false; |
393 | 0 | if (monotone && |
394 | 0 | (flags & HB_BUFFER_FLAG_PRODUCE_UNSAFE_TO_CONCAT) != 0 && |
395 | 0 | !buffer_verify_unsafe_to_concat (this, text_buffer, font, features, num_features, shapers)) |
396 | 0 | ret = false; |
397 | 0 | if (!ret) |
398 | 0 | { |
399 | 0 | #ifndef HB_NO_BUFFER_SERIALIZE |
400 | 0 | unsigned len = text_buffer->len; |
401 | 0 | hb_vector_t<char> bytes; |
402 | 0 | if (likely (bytes.resize (len * 10 + 16))) |
403 | 0 | { |
404 | 0 | hb_buffer_serialize_unicode (text_buffer, |
405 | 0 | 0, len, |
406 | 0 | bytes.arrayZ, bytes.length, |
407 | 0 | &len, |
408 | 0 | HB_BUFFER_SERIALIZE_FORMAT_TEXT, |
409 | 0 | HB_BUFFER_SERIALIZE_FLAG_NO_CLUSTERS); |
410 | 0 | buffer_verify_error (this, font, BUFFER_VERIFY_ERROR "text was: %s.", bytes.arrayZ ? bytes.arrayZ : ""); |
411 | 0 | } |
412 | 0 | #endif |
413 | 0 | } |
414 | 0 | return ret; |
415 | 0 | } |
416 | | |
417 | | |
418 | | #endif |