/src/mupdf/source/fitz/bidi.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Bidirectional text processing. |
3 | | * |
4 | | * Processes unicode text by arranging the characters into an order suitable |
5 | | * for display. E.g. Hebrew text will be arranged from right-to-left and |
6 | | * any English within the text will remain in the left-to-right order. |
7 | | * Characters such as parenthesis will be substituted for their mirrored |
8 | | * equivalents if they are part of text which must be reversed. |
9 | | * |
10 | | * This is an implementation of the unicode Bidirectional Algorithm which |
11 | | * can be found here: http://www.unicode.org/reports/tr9/ and is based |
12 | | * on the reference implementation of the algorithm found on that page. |
13 | | * |
14 | | * For a nice overview of how it works, read this... |
15 | | * http://www.w3.org/TR/REC-html40/struct/dirlang.html |
16 | | * |
17 | | * Extracted from the SmartOffice code, where it was modified by Ian |
18 | | * Beveridge. |
19 | | * |
20 | | * Copyright (C) Picsel, 2004. All Rights Reserved. |
21 | | */ |
22 | | |
23 | | /* |
24 | | * Original copyright notice from unicode reference implementation. |
25 | | * ---------------------------------------------------------------- |
26 | | * Written by: Asmus Freytag |
27 | | * C++ and Windows dependencies removed, and |
28 | | * command line interface added by: Rick McGowan |
29 | | * |
30 | | * Copyright (C) 1999, ASMUS, Inc. All Rights Reserved |
31 | | */ |
32 | | |
33 | | /* |
34 | | * Includes... |
35 | | */ |
36 | | |
37 | | #include "mupdf/fitz.h" |
38 | | #include "mupdf/ucdn.h" |
39 | | #include "bidi-imp.h" /* standard bidi code interface */ |
40 | | #include <assert.h> |
41 | | |
42 | | /* |
43 | | * Macros... |
44 | | */ |
45 | | |
46 | 0 | #define ODD(x) ((x) & 1) |
47 | | |
48 | | #define REPLACEABLE_TYPE(t) ( \ |
49 | | ((t)==BDI_ES) || ((t)==BDI_ET) || ((t)==BDI_CS) || \ |
50 | | ((t)==BDI_NSM) || ((t)==BDI_PDF) || ((t)==BDI_BN) || \ |
51 | | ((t)==BDI_S) || ((t)==BDI_WS) || ((t)==BDI_N) ) |
52 | | |
53 | | #ifdef DEBUG_BIDI_VERBOSE |
54 | | #define DBUGVF(params) do { fz_warn params; } while (0) |
55 | | #else |
56 | | #define DBUGVF(params) do {} while (0) |
57 | | #endif |
58 | | |
59 | | #ifdef DEBUG_BIDI_OUTLINE |
60 | | #define DBUGH(params) do { fz_warn params; } while (0) |
61 | | #else |
62 | 244 | #define DBUGH(params) do {} while (0) |
63 | | #endif |
64 | | |
65 | | #define UNICODE_EOS 0 |
66 | | #define UNICODE_DIGIT_ZERO 0x0030 |
67 | | #define UNICODE_DIGIT_NINE 0x0039 |
68 | | #define UNICODE_SUPERSCRIPT_TWO 0x00B2 |
69 | | #define UNICODE_SUPERSCRIPT_THREE 0x00B3 |
70 | | #define UNICODE_SUPERSCRIPT_ONE 0x00B9 |
71 | | #define UNICODE_RTL_START 0x0590 |
72 | | #define UNICODE_RTL_END 0x07BF |
73 | | #define UNICODE_ARABIC_INDIC_DIGIT_ZERO 0x0660 |
74 | | #define UNICODE_ARABIC_INDIC_DIGIT_NINE 0x0669 |
75 | | #define UNICODE_EXTENDED_ARABIC_INDIC_DIGIT_ZERO 0x06F0 |
76 | | #define UNICODE_EXTENDED_ARABIC_INDIC_DIGIT_NINE 0x06F9 |
77 | | #define UNICODE_ZERO_WIDTH_NON_JOINER 0x200C |
78 | | #define UNICODE_SUPERSCRIPT_ZERO 0x2070 |
79 | | #define UNICODE_SUPERSCRIPT_FOUR 0x2074 |
80 | | #define UNICODE_SUPERSCRIPT_NINE 0x2079 |
81 | | #define UNICODE_SUBSCRIPT_ZERO 0x2080 |
82 | | #define UNICODE_SUBSCRIPT_NINE 0x2089 |
83 | | #define UNICODE_CIRCLED_DIGIT_ONE 0x2460 |
84 | | #define UNICODE_NUMBER_TWENTY_FULL_STOP 0x249B |
85 | | #define UNICODE_CIRCLED_DIGIT_ZERO 0x24EA |
86 | | #define UNICODE_FULLWIDTH_DIGIT_ZERO 0xFF10 |
87 | | #define UNICODE_FULLWIDTH_DIGIT_NINE 0xFF19 |
88 | | |
89 | | #ifndef TRUE |
90 | 12.5k | #define TRUE (1) |
91 | | #endif |
92 | | #ifndef FALSE |
93 | 1.22k | #define FALSE (0) |
94 | | #endif |
95 | | |
96 | | /* |
97 | | * Enumerations... |
98 | | */ |
99 | | |
100 | | #ifdef DEBUG_BIDI_VERBOSE |
101 | | /* display support: */ |
102 | | static const char char_from_types[] = |
103 | | { |
104 | | ' ', /* ON */ |
105 | | '>', /* L */ |
106 | | '<', /* R */ |
107 | | '9', /* AN */ |
108 | | '1', /* EN */ |
109 | | 'a', /* AL */ |
110 | | '@', /* NSM */ |
111 | | '.', /* CS */ |
112 | | ',', /* ES */ |
113 | | '$', /* ET */ |
114 | | ':', /* BN */ |
115 | | 'X', /* S */ |
116 | | '_', /* WS */ |
117 | | 'B', /* B */ |
118 | | '+', /* RLO */ |
119 | | '+', /* RLE */ |
120 | | '+', /* LRO */ |
121 | | '+', /* LRE */ |
122 | | '-', /* PDF */ |
123 | | '=' /* LS */ |
124 | | }; |
125 | | #endif |
126 | | |
127 | | /* |
128 | | * Functions and static functions... |
129 | | */ |
130 | | |
131 | | /* UCDN uses a different ordering than Bidi does. We cannot |
132 | | * change to the UCDN ordering, as the bidi-std.c code relies |
133 | | * on the exact ordering (at least that N = ON = 0). We |
134 | | * therefore map between the two using this small table. It |
135 | | * also takes care of fudging LRI, RLI, FSI and PDI, that this |
136 | | * code does not currently support. */ |
137 | | static const uint8_t ucdn_to_bidi[] = |
138 | | { |
139 | | BDI_L, /* UCDN_BIDI_CLASS_L = 0 */ |
140 | | BDI_LRE, /* UCDN_BIDI_CLASS_LRE = 1 */ |
141 | | BDI_LRO, /* UCDN_BIDI_CLASS_LRO = 2 */ |
142 | | BDI_R, /* UCDN_BIDI_CLASS_R = 3 */ |
143 | | BDI_AL, /* UCDN_BIDI_CLASS_AL = 4 */ |
144 | | BDI_RLE, /* UCDN_BIDI_CLASS_RLE = 5 */ |
145 | | BDI_RLO, /* UCDN_BIDI_CLASS_RLO = 6 */ |
146 | | BDI_PDF, /* UCDN_BIDI_CLASS_PDF = 7 */ |
147 | | BDI_EN, /* UCDN_BIDI_CLASS_EN = 8 */ |
148 | | BDI_ES, /* UCDN_BIDI_CLASS_ES = 9 */ |
149 | | BDI_ET, /* UCDN_BIDI_CLASS_ET = 10 */ |
150 | | BDI_AN, /* UCDN_BIDI_CLASS_AN = 11 */ |
151 | | BDI_CS, /* UCDN_BIDI_CLASS_CS = 12 */ |
152 | | BDI_NSM, /* UCDN_BIDI_CLASS_NSM = 13 */ |
153 | | BDI_BN, /* UCDN_BIDI_CLASS_BN = 14 */ |
154 | | BDI_B, /* UCDN_BIDI_CLASS_B = 15 */ |
155 | | BDI_S, /* UCDN_BIDI_CLASS_S = 16 */ |
156 | | BDI_WS, /* UCDN_BIDI_CLASS_WS = 17 */ |
157 | | BDI_ON, /* UCDN_BIDI_CLASS_ON = 18 */ |
158 | | BDI_LRE, /* UCDN_BIDI_CLASS_LRI = 19 */ |
159 | | BDI_RLE, /* UCDN_BIDI_CLASS_RLI = 20 */ |
160 | | BDI_N, /* UCDN_BIDI_CLASS_FSI = 21 */ |
161 | | BDI_N, /* UCDN_BIDI_CLASS_PDI = 22 */ |
162 | | }; |
163 | | |
164 | 30.4k | #define class_from_ch_ws(ch) (ucdn_to_bidi[ucdn_get_bidi_class(ch)]) |
165 | | |
166 | | /* Return a direction for white-space on the second pass of the algorithm. */ |
167 | | static fz_bidi_chartype class_from_ch_n(uint32_t ch) |
168 | 15.2k | { |
169 | 15.2k | fz_bidi_chartype from_ch_ws = class_from_ch_ws(ch); |
170 | 15.2k | if (from_ch_ws == BDI_S || from_ch_ws == BDI_WS) |
171 | 1.57k | return BDI_N; |
172 | 13.6k | return from_ch_ws; |
173 | 15.2k | } |
174 | | |
175 | | /* Split fragments into single scripts (or punctuation + single script) */ |
176 | | static void |
177 | | split_at_script(const uint32_t *fragment, |
178 | | size_t fragment_len, |
179 | | int level, |
180 | | void *arg, |
181 | | fz_bidi_fragment_fn *callback) |
182 | 244 | { |
183 | 244 | int script = UCDN_SCRIPT_COMMON; |
184 | 244 | size_t script_start, i; |
185 | | |
186 | 244 | script_start = 0; |
187 | 15.4k | for (i = 0; i < fragment_len; i++) |
188 | 15.2k | { |
189 | 15.2k | int s = ucdn_get_script(fragment[i]); |
190 | 15.2k | if (s == UCDN_SCRIPT_COMMON || s == UCDN_SCRIPT_INHERITED) |
191 | 3.19k | { |
192 | | /* Punctuation etc. This is fine. */ |
193 | 3.19k | } |
194 | 12.0k | else if (s == script) |
195 | 11.8k | { |
196 | | /* Same script. Still fine. */ |
197 | 11.8k | } |
198 | 216 | else if (script == UCDN_SCRIPT_COMMON || script == UCDN_SCRIPT_INHERITED) |
199 | 216 | { |
200 | | /* First non punctuation thing. Set the script. */ |
201 | 216 | script = s; |
202 | 216 | } |
203 | 0 | else |
204 | 0 | { |
205 | | /* Change of script. Break the fragment. */ |
206 | 0 | (*callback)(&fragment[script_start], i - script_start, level, script, arg); |
207 | 0 | script_start = i; |
208 | 0 | script = s; |
209 | 0 | } |
210 | 15.2k | } |
211 | 244 | if (script_start != fragment_len) |
212 | 244 | { |
213 | 244 | (*callback)(&fragment[script_start], fragment_len - script_start, level, script, arg); |
214 | 244 | } |
215 | 244 | } |
216 | | |
217 | | /* Determines the character classes for all following |
218 | | * passes of the algorithm. A character class is basically the type of Bidi |
219 | | * behaviour that the character exhibits. |
220 | | */ |
221 | | static void |
222 | | classify_characters(const uint32_t *text, |
223 | | fz_bidi_chartype *types, |
224 | | size_t len, |
225 | | fz_bidi_flags flags) |
226 | 488 | { |
227 | 488 | size_t i; |
228 | | |
229 | 488 | if ((flags & FZ_BIDI_CLASSIFY_WHITE_SPACE)!=0) |
230 | 244 | { |
231 | 15.4k | for (i = 0; i < len; i++) |
232 | 15.2k | { |
233 | 15.2k | types[i] = class_from_ch_ws(text[i]); |
234 | 15.2k | } |
235 | 244 | } |
236 | 244 | else |
237 | 244 | { |
238 | | #ifdef DEBUG_BIDI_VERBOSE |
239 | | fprintf(stderr, "Text: "); |
240 | | for (i = 0; i < len; i++) |
241 | | { |
242 | | /* So that we can actually sort of read the debug string, any |
243 | | * non-ascii characters are replaced with a 1-digit hash |
244 | | * value from 0-9, making non-english characters appear |
245 | | * as numbers |
246 | | */ |
247 | | fprintf(stderr, "%c", (text[i] <= 127 && text[i] >= 32) ? |
248 | | text[i] : text[i] % 9 + '0'); |
249 | | } |
250 | | fprintf(stderr, "\nTypes: "); |
251 | | #endif |
252 | 15.4k | for (i = 0; i < len; i++) |
253 | 15.2k | { |
254 | 15.2k | types[i] = class_from_ch_n(text[i]); |
255 | | #ifdef DEBUG_BIDI_VERBOSE |
256 | | fprintf(stderr, "%c", char_from_types[(int)types[i]]); |
257 | | #endif |
258 | 15.2k | } |
259 | | #ifdef DEBUG_BIDI_VERBOSE |
260 | | fprintf(stderr, "\n"); |
261 | | #endif |
262 | 244 | } |
263 | 488 | } |
264 | | |
265 | | /* Determines the base level of the text. |
266 | | * Implements rule P2 of the Unicode Bidi Algorithm. |
267 | | * Note: Ignores explicit embeddings |
268 | | */ |
269 | | static fz_bidi_level base_level_from_text(fz_bidi_chartype *types, size_t len) |
270 | 0 | { |
271 | 0 | size_t i; |
272 | |
|
273 | 0 | for (i = 0; i < len; i++) |
274 | 0 | { |
275 | 0 | switch (types[i]) |
276 | 0 | { |
277 | | /* strong left */ |
278 | 0 | case BDI_L: |
279 | 0 | return FZ_BIDI_LTR; |
280 | | |
281 | | /* strong right */ |
282 | 0 | case BDI_R: |
283 | 0 | case BDI_AL: |
284 | 0 | return FZ_BIDI_RTL; |
285 | 0 | } |
286 | 0 | } |
287 | 0 | return FZ_BIDI_LTR; |
288 | 0 | } |
289 | | |
290 | | static fz_bidi_direction direction_from_type(fz_bidi_chartype type) |
291 | 15.2k | { |
292 | 15.2k | switch (type) |
293 | 15.2k | { |
294 | 12.0k | case BDI_L: |
295 | 12.5k | case BDI_EN: |
296 | 12.5k | return FZ_BIDI_LTR; |
297 | | |
298 | 0 | case BDI_R: |
299 | 0 | case BDI_AL: |
300 | 0 | return FZ_BIDI_RTL; |
301 | | |
302 | 2.64k | default: |
303 | 2.64k | return FZ_BIDI_NEUTRAL; |
304 | 15.2k | } |
305 | 15.2k | } |
306 | | |
307 | | static void |
308 | | classify_quoted_blocks(const uint32_t *text, |
309 | | fz_bidi_chartype *types, |
310 | | size_t len) |
311 | 244 | { |
312 | 244 | size_t i; |
313 | 244 | int inQuote = FALSE; |
314 | 244 | int pdfNeeded = FALSE; |
315 | 244 | int ltrFound = FALSE; |
316 | 244 | int rtlFound = FALSE; |
317 | | |
318 | | /* Only do anything special here if there is mixed content |
319 | | * (LTR *and* RTL) in the text. |
320 | | */ |
321 | 15.4k | for (i = 0; i < len; i++) |
322 | 15.2k | { |
323 | 15.2k | switch (direction_from_type(types[i])) |
324 | 15.2k | { |
325 | 12.5k | case FZ_BIDI_LTR: |
326 | 12.5k | ltrFound = TRUE; |
327 | 12.5k | break; |
328 | | |
329 | 0 | case FZ_BIDI_RTL: |
330 | 0 | rtlFound = TRUE; |
331 | 0 | break; |
332 | | |
333 | 2.64k | default: |
334 | 2.64k | break; |
335 | 15.2k | } |
336 | 15.2k | } |
337 | | |
338 | | /* Only make any changes if *both* LTR and RTL characters exist |
339 | | * in this text. |
340 | | */ |
341 | 244 | if (!ltrFound || !rtlFound) |
342 | 244 | { |
343 | 244 | return; |
344 | 244 | } |
345 | | |
346 | 0 | for (i = 0; i < len; i++) |
347 | 0 | { |
348 | 0 | if (text[i]=='"') |
349 | 0 | { |
350 | | /* If we're already in a quote then terminate it, |
351 | | * else start a new block. |
352 | | */ |
353 | 0 | if (inQuote) |
354 | 0 | { |
355 | 0 | inQuote = FALSE; |
356 | 0 | if (pdfNeeded) |
357 | 0 | { |
358 | 0 | pdfNeeded = FALSE; |
359 | 0 | types[i] = BDI_PDF; |
360 | 0 | } |
361 | 0 | } |
362 | 0 | else |
363 | 0 | { |
364 | 0 | size_t j; |
365 | 0 | int done = FALSE; |
366 | |
|
367 | 0 | inQuote = TRUE; |
368 | | |
369 | | /* Find the first strong right or left type and |
370 | | * use that to determine whether we should classify |
371 | | * the quote as LRE or RLE. Or neither, if we |
372 | | * hit another quote before any strongly-directional |
373 | | * character. |
374 | | */ |
375 | 0 | for (j = i + 1; !done && (j < len) && text[j] != '"'; ++j) |
376 | 0 | { |
377 | 0 | switch(types[j]) |
378 | 0 | { |
379 | 0 | case BDI_RLE: |
380 | 0 | case BDI_LRE: |
381 | 0 | done = TRUE; |
382 | 0 | break; |
383 | | |
384 | 0 | case BDI_L: |
385 | 0 | case BDI_EN: |
386 | 0 | types[i] = BDI_LRE; |
387 | 0 | pdfNeeded = TRUE; |
388 | 0 | done = TRUE; |
389 | 0 | break; |
390 | | |
391 | 0 | case BDI_R: |
392 | 0 | case BDI_AL: |
393 | 0 | types[i] = BDI_RLE; |
394 | 0 | pdfNeeded = TRUE; |
395 | 0 | done = TRUE; |
396 | 0 | break; |
397 | | |
398 | 0 | default: |
399 | 0 | break; |
400 | 0 | } |
401 | 0 | } |
402 | 0 | } |
403 | 0 | } |
404 | 0 | } |
405 | 0 | } |
406 | | |
407 | | /* Creates a buffer with an embedding level for every character in the |
408 | | * given text. Also determines the base level and returns it in |
409 | | * *baseDir if *baseDir does not initially contain a valid direction. |
410 | | */ |
411 | | static fz_bidi_level * |
412 | | create_levels(fz_context *ctx, |
413 | | const uint32_t *text, |
414 | | size_t len, |
415 | | fz_bidi_direction *baseDir, |
416 | | int resolveWhiteSpace, |
417 | | int flags) |
418 | 244 | { |
419 | 244 | fz_bidi_level *levels, *plevels; |
420 | 244 | fz_bidi_chartype *types = NULL; |
421 | 244 | fz_bidi_chartype *ptypes; |
422 | 244 | fz_bidi_level baseLevel; |
423 | 244 | const uint32_t *ptext; |
424 | 244 | size_t plen, remaining; |
425 | | |
426 | 244 | levels = Memento_label(fz_malloc(ctx, len * sizeof(*levels)), "bidi_levels"); |
427 | | |
428 | 244 | fz_var(types); |
429 | | |
430 | 488 | fz_try(ctx) |
431 | 488 | { |
432 | 244 | types = fz_malloc(ctx, len * sizeof(fz_bidi_chartype)); |
433 | | |
434 | 244 | classify_characters(text, types, len, flags); |
435 | | |
436 | 244 | if (*baseDir != FZ_BIDI_LTR && *baseDir != FZ_BIDI_RTL) |
437 | 0 | { |
438 | | /* Derive the base level from the text and |
439 | | * update *baseDir in case the caller wants to know. |
440 | | */ |
441 | 0 | baseLevel = base_level_from_text(types, len); |
442 | 0 | *baseDir = ODD(baseLevel)==1 ? FZ_BIDI_RTL : FZ_BIDI_LTR; |
443 | 0 | } |
444 | 244 | else |
445 | 244 | { |
446 | 244 | baseLevel = (fz_bidi_level)*baseDir; |
447 | 244 | } |
448 | | |
449 | 244 | { |
450 | | /* Replace tab with base direction, i.e. make tab appear as |
451 | | * 'strong left' if the base direction is left-to-right and |
452 | | * 'strong right' if base direction is right-to-left. This |
453 | | * allows Layout to implicitly treat tabs as 'segment separators'. |
454 | | */ |
455 | 244 | size_t i; |
456 | | |
457 | 15.4k | for (i = 0u; i < len; i++) |
458 | 15.2k | { |
459 | 15.2k | if (text[i]=='\t') |
460 | 0 | { |
461 | 0 | types[i] = (*baseDir == FZ_BIDI_RTL) ? BDI_R : BDI_L; |
462 | 0 | } |
463 | 15.2k | } |
464 | 244 | } |
465 | | |
466 | | /* Look for quotation marks. Classify them as RLE or LRE |
467 | | * or leave them alone, depending on what follows them. |
468 | | */ |
469 | 244 | classify_quoted_blocks(text, types, len); |
470 | | |
471 | | /* Work one paragraph at a time. */ |
472 | 244 | plevels = levels; |
473 | 244 | ptypes = types; |
474 | 244 | ptext = text; |
475 | 244 | remaining = len; |
476 | 488 | while (remaining) |
477 | 244 | { |
478 | 244 | plen = fz_bidi_resolve_paragraphs(ptypes, remaining); |
479 | | |
480 | | /* Work out the levels and character types... */ |
481 | 244 | (void)fz_bidi_resolve_explicit(baseLevel, BDI_N, ptypes, plevels, plen, 0); |
482 | 244 | fz_bidi_resolve_weak(ctx, baseLevel, ptypes, plevels, plen); |
483 | 244 | fz_bidi_resolve_neutrals(baseLevel, ptypes, plevels, plen); |
484 | 244 | fz_bidi_resolve_implicit(ptypes, plevels, plen); |
485 | | |
486 | 244 | classify_characters(ptext, ptypes, plen, FZ_BIDI_CLASSIFY_WHITE_SPACE); |
487 | | |
488 | 244 | if (resolveWhiteSpace) |
489 | 0 | { |
490 | | /* resolve whitespace */ |
491 | 0 | fz_bidi_resolve_whitespace(baseLevel, ptypes, plevels, plen); |
492 | 0 | } |
493 | | |
494 | 244 | plevels += plen; |
495 | 244 | ptypes += plen; |
496 | 244 | ptext += plen; |
497 | 244 | remaining -= plen; |
498 | 244 | } |
499 | | |
500 | | /* The levels buffer now has odd and even numbers indicating |
501 | | * rtl or ltr characters, respectively. |
502 | | */ |
503 | | #ifdef DEBUG_BIDI_VERBOSE |
504 | | fprintf(stderr, "Levels: "); |
505 | | { |
506 | | size_t i; |
507 | | for (i = 0; i < len; i++) |
508 | | { |
509 | | fprintf(stderr, "%d", levels[i]>9?0:levels[i]); |
510 | | } |
511 | | fprintf(stderr, "\n"); |
512 | | } |
513 | | #endif |
514 | 244 | } |
515 | 488 | fz_always(ctx) |
516 | 244 | { |
517 | 244 | fz_free(ctx, types); |
518 | 244 | } |
519 | 244 | fz_catch(ctx) |
520 | 0 | { |
521 | 0 | fz_free(ctx, levels); |
522 | 0 | fz_rethrow(ctx); |
523 | 0 | } |
524 | 244 | return levels; |
525 | 244 | } |
526 | | |
527 | | /* Partitions the given character sequence into one or more unidirectional |
528 | | * fragments and invokes the given callback function for each fragment. |
529 | | */ |
530 | | void fz_bidi_fragment_text(fz_context *ctx, |
531 | | const uint32_t *text, |
532 | | size_t textlen, |
533 | | fz_bidi_direction *baseDir, |
534 | | fz_bidi_fragment_fn *callback, |
535 | | void *arg, |
536 | | int flags) |
537 | 244 | { |
538 | 244 | size_t startOfFragment; |
539 | 244 | size_t i; |
540 | 244 | fz_bidi_level *levels; |
541 | | |
542 | 244 | if (text == NULL || callback == NULL || textlen == 0) |
543 | 0 | return; |
544 | | |
545 | 244 | DBUGH(("fz_bidi_fragment_text('%S', len = %d)\n", text, textlen)); |
546 | | |
547 | 244 | levels = create_levels(ctx, text, textlen, baseDir, FALSE, flags); |
548 | | |
549 | | /* We now have an array with an embedding level |
550 | | * for each character in text. |
551 | | */ |
552 | 244 | assert(levels != NULL); |
553 | | |
554 | 488 | fz_try(ctx) |
555 | 488 | { |
556 | 244 | startOfFragment = 0; |
557 | 15.2k | for (i = 1; i < textlen; i++) |
558 | 14.9k | { |
559 | 14.9k | if (levels[i] != levels[i-1]) |
560 | 0 | { |
561 | | /* We've gone past the end of the fragment. |
562 | | * Create a text object for it, then start |
563 | | * a new fragment. |
564 | | */ |
565 | 0 | split_at_script(&text[startOfFragment], |
566 | 0 | i - startOfFragment, |
567 | 0 | levels[startOfFragment], |
568 | 0 | arg, |
569 | 0 | callback); |
570 | 0 | startOfFragment = i; |
571 | 0 | } |
572 | 14.9k | } |
573 | | /* Now i == textlen. Deal with the final (or maybe only) fragment. */ |
574 | | /* otherwise create 1 fragment */ |
575 | 244 | split_at_script(&text[startOfFragment], |
576 | 244 | i - startOfFragment, |
577 | 244 | levels[startOfFragment], |
578 | 244 | arg, |
579 | 244 | callback); |
580 | 244 | } |
581 | 488 | fz_always(ctx) |
582 | 244 | { |
583 | 244 | fz_free(ctx, levels); |
584 | 244 | } |
585 | 244 | fz_catch(ctx) |
586 | 0 | { |
587 | 0 | fz_rethrow(ctx); |
588 | 0 | } |
589 | 244 | } |