/src/mupdf/source/fitz/bidi-std.c
Line | Count | Source (jump to first uncovered line) |
1 | | // Extracted from Bidi.cpp - version 26 |
2 | | |
3 | | // Reference implementation for Unicode Bidirectional Algorithm |
4 | | |
5 | | // Bidi include file |
6 | | #include "mupdf/fitz.h" |
7 | | #include "bidi-imp.h" |
8 | | |
9 | | #include <assert.h> |
10 | | |
11 | | #ifndef TRUE |
12 | | #define TRUE (1) |
13 | | #endif |
14 | | #ifndef FALSE |
15 | | #define FALSE (0) |
16 | | #endif |
17 | | |
18 | | /*------------------------------------------------------------------------ |
19 | | File: Bidi.Cpp |
20 | | |
21 | | Description |
22 | | ----------- |
23 | | |
24 | | Sample Implementation of the Unicode Bidirectional Algorithm as it |
25 | | was revised by Revision 5 of the Unicode Technical Report # 9 |
26 | | (1999-8-17) |
27 | | |
28 | | Verified for changes to the algorithm up through Unicode 5.2.0 (2009). |
29 | | |
30 | | This implementation is organized into several passes, each implemen- |
31 | | ting one or more of the rules of the Unicode Bidi Algorithm. The |
32 | | resolution of Weak Types and of Neutrals each use a state table |
33 | | approach. |
34 | | |
35 | | Both a printf based interface and a Windows DlgProc are provided for |
36 | | interactive testing. |
37 | | |
38 | | A stress harness comparing this implementation (v24) to a Java based |
39 | | implementation was used by Doug Felt to verify that the two |
40 | | implementations produce identical results for all strings up to six |
41 | | bidi classes and stochastic strings up to length 20. |
42 | | |
43 | | Version 26 was verified by the author against the Unicode 5.2.0 |
44 | | file BidiTest.txt, which provides an exhaustive text of strings of |
45 | | length 4 or less, but covers some important cases where the language |
46 | | in UAX#9 had been clarified. |
47 | | |
48 | | To see this code running in an actual Windows program, |
49 | | download the free Unibook utility from http://unicode.org/unibook |
50 | | The bidi demo is executed from the tools menu. It is build from |
51 | | this source file. |
52 | | |
53 | | Build Notes |
54 | | ----------- |
55 | | |
56 | | To compile the sample implementation please set the #define |
57 | | directives above so the correct headers get included. Not all the |
58 | | files are needed for all purposes. For the commandline version |
59 | | only bidi.h and bidi.cpp are needed. |
60 | | |
61 | | The Win32 version is provided as a dialog procedure. To use as a |
62 | | standalone program compile with the lbmain.cpp file. If all you |
63 | | need is the ability to run the code "as is", you can instead download |
64 | | the unibook utility from http://uincode.org/unibook/ which contains |
65 | | the bidi demo compiled from this source file. |
66 | | |
67 | | This code uses an extension to C++ that gives variables declared in |
68 | | a for() statement function the same scope as the for() statement. |
69 | | If your compiler does not support this extension, you may need to |
70 | | move the declaration, e.g. int ich = 0; in front of the for statement. |
71 | | |
72 | | Implementation Note |
73 | | ------------------- |
74 | | |
75 | | NOTE: The Unicode Bidirectional Algorithm removes all explicit |
76 | | formatting codes in rule X9, but states that this can be |
77 | | simulated by conformant implementations. This implementation |
78 | | attempts to demonstrate such a simulation |
79 | | |
80 | | To demonstrate this, the current implementation does the |
81 | | following: |
82 | | |
83 | | in resolveExplicit() |
84 | | - change LRE, LRO, RLE, RLO, PDF to BN |
85 | | - assign nested levels to BN |
86 | | |
87 | | in resolveWeak and resolveNeutrals |
88 | | - assign L and R to BN's where they exist in place of |
89 | | sor and eor by changing the last BN in front of a |
90 | | level change to a strong type |
91 | | - skip over BN's for the purpose of determining actions |
92 | | - include BN in the count of deferred runs |
93 | | which will resolve some of them to EN, AN and N |
94 | | |
95 | | in resolveWhiteSpace |
96 | | - set the level of any surviving BN to the base level, |
97 | | or the level of the preceding character |
98 | | - include LRE,LRO, RLE, RLO, PDF and BN in the count |
99 | | whitespace to be reset |
100 | | |
101 | | This will result in the same order for non-BN characters as |
102 | | if the BN characters had been removed. |
103 | | |
104 | | The clean() function can be used to remove boundary marks for |
105 | | verification purposes. |
106 | | |
107 | | Notation |
108 | | -------- |
109 | | Pointer variables generally start with the letter p |
110 | | Counter variables generally start with the letter c |
111 | | Index variables generally start with the letter i |
112 | | Boolean variables generally start with the letter f |
113 | | |
114 | | The enumerated bidirectional types have the same name as in the |
115 | | description for the Unicode Bidirectional Algorithm |
116 | | |
117 | | Using this code outside a demo context |
118 | | -------------------------------------- |
119 | | |
120 | | The way the functions are broken down in this demo code is based |
121 | | on the needs of the demo to show the evolution in internal state |
122 | | as the algorithm proceeds. This obscures how the algorithm would |
123 | | be used in practice. These are the steps: |
124 | | |
125 | | 1. Allocate a pair of arrays large enough to hold bidi class |
126 | | and calculated levels (one for each input character) |
127 | | |
128 | | 2. Provide your own function to assign directional types (bidi |
129 | | classes) corresponding to each character in the input, |
130 | | conflating ON, WS, S to N. Unlike the classify function in this |
131 | | demo, the input would be actual Unicode characters. |
132 | | |
133 | | 3. Process the first paragraph by calling BidiParagraph. That |
134 | | function changes B into BN and returns a length including the |
135 | | paragraph separator. (The iteration over multiple paragraphs |
136 | | is left as exercise for the reader). |
137 | | |
138 | | 4. Assign directional types again, but now assign specific types |
139 | | to whitespace characters. |
140 | | |
141 | | 5. Instead of reordering the input in place it is often desirable |
142 | | to calculate an array of offsets indicating the reordering. |
143 | | To that end, allocate such an array here and use it instead |
144 | | of the input array in the next step. |
145 | | |
146 | | 6. Resolve and reorder the lines by calling BidiLines. That |
147 | | function 'breaks' lines on LS characters. Provide an optional |
148 | | array of flags indicating the location of other line breaks as |
149 | | needed. |
150 | | |
151 | | Update History |
152 | | -------------- |
153 | | Version 24 is the initial published and verified version of this |
154 | | reference implementation. Version 25 and its updates fix various |
155 | | minor issues with the scaffolding used for demonstrating the |
156 | | algorithm using pseudo-alphabets from the command line or dialog |
157 | | box. No changes to the implementation of the actual bidi algorithm |
158 | | are made in any of the minor updates to version 25. Version 26 |
159 | | also makes no change to the actual algorithm but was verified |
160 | | against the official BidiTest.txt file for Unicode 5.2.0. |
161 | | |
162 | | - updated pseudo-alphabet |
163 | | |
164 | | - Last Revised 12-10-99 (25) |
165 | | |
166 | | - enable demo mode for release builds - no other changes |
167 | | |
168 | | - Last Revised 12-10-00 (25a) |
169 | | |
170 | | - fix regression in pseudo alphabet use for Windows UI |
171 | | |
172 | | - Last Revised 02-01-01 (25b) |
173 | | |
174 | | - fixed a few comments, renamed a variable |
175 | | |
176 | | - Last Revised 03-04-01 (25c) |
177 | | |
178 | | - make base level settable, enable mirror by default, |
179 | | fix dialog size |
180 | | |
181 | | - Last Revised 06-02-01 (25e) |
182 | | |
183 | | - fixed some comments |
184 | | |
185 | | - Last Revised 09-29-01 (25f) |
186 | | |
187 | | - fixed classification for LS,RLM,LRM in pseudo alphabet, |
188 | | focus issues in UI, regression fix to commandline from 25(e) |
189 | | fix DEMO switch |
190 | | |
191 | | - Last Revised 11-07-01 (25g) |
192 | | |
193 | | - fixed classification for plus/minus in pseudo alphabet |
194 | | to track changes made in Unicode 4.0.1 |
195 | | |
196 | | - Last Revised 12-03-04 (25h) |
197 | | |
198 | | - now compiles as dialog-only program for WINDOWS_UI==1 |
199 | | using new bidimain.cpp |
200 | | |
201 | | - Last Revised 12-02-07 (25i) |
202 | | |
203 | | - cleaned up whitespace and indenting in the source, |
204 | | fixed two comments (table headers) |
205 | | |
206 | | - Last Revised 15-03-07 (25j) |
207 | | |
208 | | - named enumerations |
209 | | |
210 | | - Last Revised 30-05-07 (25k) |
211 | | |
212 | | - added usage notes, minor edits to comments, indentation, etc |
213 | | throughout. Added the bidiParagraph function. Checked against |
214 | | changes in the Unicode Bidi Algorithm for Unicode 5.2.0. No |
215 | | changes needed to this implementation to match the values in |
216 | | the BidiTest.txt file in the Unicode Character Database. |
217 | | Minor fixes to dialog/windows proc, updated preprocessor directives. |
218 | | |
219 | | - Last Revised 03-08-09 (26) |
220 | | |
221 | | Credits: |
222 | | ------- |
223 | | Written by: Asmus Freytag |
224 | | Command line interface by: Rick McGowan |
225 | | Verification (v24): Doug Felt |
226 | | |
227 | | Disclaimer and legal rights: |
228 | | --------------------------- |
229 | | Copyright (C) 1999-2009, ASMUS, Inc. All Rights Reserved. |
230 | | Distributed under the Terms of Use in http://www.unicode.org/copyright.html. |
231 | | |
232 | | THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
233 | | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
234 | | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. |
235 | | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE |
236 | | BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, |
237 | | OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, |
238 | | WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, |
239 | | ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE SOFTWARE. |
240 | | |
241 | | The file bid.rc is included in the software covered by the above. |
242 | | ------------------------------------------------------------------------*/ |
243 | | |
244 | | /* === HELPER FUNCTIONS AND DECLARATIONS ================================= */ |
245 | | |
246 | 220k | #define odd(x) ((x) & 1) |
247 | | |
248 | | /*---------------------------------------------------------------------- |
249 | | The following array maps character codes to types for the purpose of |
250 | | this sample implementation. The legend string gives a human readable |
251 | | explanation of the pseudo alphabet. |
252 | | |
253 | | For simplicity, characters entered by buttons are given a 1:1 mapping |
254 | | between their type and pseudo character value. Pseudo characters that |
255 | | can be typed from the keyboard are explained in the legend string. |
256 | | |
257 | | Use the Unicode Character Database for the real values in real use. |
258 | | ---------------------------------------------------------------------*/ |
259 | | |
260 | | enum |
261 | | { |
262 | | chLS = 0x15 |
263 | | }; |
264 | | |
265 | | #if 0 |
266 | | static const fz_bidi_chartype types_from_char[] = |
267 | | { |
268 | | // 0 1 2 3 4 5 6 7 8 9 a b c d e f |
269 | | BDI_BN, BDI_BN, BDI_BN, BDI_BN, BDI_L, BDI_R, BDI_BN, BDI_BN, BDI_BN, BDI_S, BDI_B, BDI_S, BDI_WS, BDI_B, BDI_BN, BDI_BN, /*00-0f*/ |
270 | | BDI_LRO,BDI_LRE,BDI_PDF,BDI_RLO,BDI_RLE,BDI_WS, BDI_L, BDI_R, BDI_BN, BDI_BN, BDI_BN, BDI_BN, BDI_B, BDI_B, BDI_B, BDI_S, /*10-1f*/ |
271 | | BDI_WS, BDI_ON, BDI_ON, BDI_ET, BDI_ET, BDI_ET, BDI_ON, BDI_ON, BDI_ON, BDI_ON, BDI_ON, BDI_ES, BDI_CS, BDI_ES, BDI_CS, BDI_ES, /*20-2f*/ |
272 | | BDI_EN, BDI_EN, BDI_EN, BDI_EN, BDI_EN, BDI_EN, BDI_AN, BDI_AN, BDI_AN, BDI_AN, BDI_CS, BDI_ON, BDI_ON, BDI_ON, BDI_ON, BDI_ON, /*30-3f*/ |
273 | | BDI_ON, BDI_AL, BDI_AL, BDI_AL, BDI_AL, BDI_AL, BDI_AL, BDI_R, BDI_R, BDI_R, BDI_R, BDI_R, BDI_R, BDI_R, BDI_R, BDI_R, /*40-4f*/ |
274 | | BDI_R, BDI_R, BDI_R, BDI_R, BDI_R, BDI_R, BDI_R, BDI_R, BDI_R, BDI_R, BDI_R, BDI_LRE,BDI_ON, BDI_RLE,BDI_PDF,BDI_S, /*50-5f*/ |
275 | | BDI_NSM,BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, /*60-6f*/ |
276 | | BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_L, BDI_LRO,BDI_B, BDI_RLO,BDI_BN, BDI_ON, /*70-7f*/ |
277 | | }; |
278 | | #endif |
279 | | |
280 | | /*************************************** |
281 | | Reverse, human readable reference: |
282 | | |
283 | | LRM: 0x4 |
284 | | RLM: 0x5 |
285 | | L: 0x16,a-z |
286 | | LRE: 0x11,[ |
287 | | LRO: 0x10,{ |
288 | | R: 0x17,G-Z |
289 | | AL: A-F |
290 | | RLE: 0x14,] |
291 | | RLO: 0x13,} |
292 | | PDF: 0x12,^ |
293 | | EN: 0-5 |
294 | | ES: /,+,[hyphen] |
295 | | ET: #,$,% |
296 | | AN: 6-9 |
297 | | CS: [comma],.,: |
298 | | NSM: ` |
299 | | BN: 0x0-0x8,0xe,0xf,0x18-0x1b,~ |
300 | | B: 0xa,0xd,0x1c-0x1e,| |
301 | | S: 0x9,0xb,0x1f,_ |
302 | | WS: 0xc,0x15,[space] |
303 | | ON: !,",&,',(,),*,;,<,=,>,?,@,\,0x7f |
304 | | ****************************************/ |
305 | | |
306 | | // === HELPER FUNCTIONS ================================================ |
307 | | |
308 | | #ifdef BIDI_LINE_AT_A_TIME |
309 | | // reverse cch characters |
310 | | static |
311 | | void reverse(uint32_t *psz, int cch) |
312 | | { |
313 | | uint32_t ch_temp; |
314 | | int ich; |
315 | | |
316 | | for (ich = 0; ich < --cch; ich++) |
317 | | { |
318 | | ch_temp = psz[ich]; |
319 | | psz[ich] = psz[cch]; |
320 | | psz[cch] = ch_temp; |
321 | | } |
322 | | } |
323 | | #endif |
324 | | |
325 | | // Set a run of cval values at locations all prior to, but not including |
326 | | // iStart, to the new value nval. |
327 | | static |
328 | | void set_deferred_run(fz_bidi_chartype *pval, size_t cval, size_t iStart, fz_bidi_chartype nval) |
329 | 31.8k | { |
330 | 31.8k | size_t i; |
331 | | |
332 | 100k | for (i = iStart; i > iStart - cval; ) |
333 | 68.2k | { |
334 | 68.2k | pval[--i] = nval; |
335 | 68.2k | } |
336 | 31.8k | } |
337 | | |
338 | | static |
339 | | void set_deferred_level_run(fz_bidi_level *pval, size_t cval, size_t iStart, fz_bidi_level nval) |
340 | 0 | { |
341 | 0 | size_t i; |
342 | |
|
343 | 0 | for (i = iStart; i > iStart - cval; ) |
344 | 0 | { |
345 | 0 | pval[--i] = nval; |
346 | 0 | } |
347 | 0 | } |
348 | | |
349 | | // === ASSIGNING BIDI CLASSES ============================================ |
350 | | |
351 | | // === THE PARAGRAPH LEVEL =============================================== |
352 | | |
353 | | /*------------------------------------------------------------------------ |
354 | | Function: resolve_paragraphs |
355 | | |
356 | | Resolves the input strings into blocks over which the algorithm |
357 | | is then applied. |
358 | | |
359 | | Implements Rule P1 of the Unicode Bidi Algorithm |
360 | | |
361 | | Input: Text string |
362 | | Character count |
363 | | |
364 | | Output: revised character count |
365 | | |
366 | | Note: This is a very simplistic function. In effect it restricts |
367 | | the action of the algorithm to the first paragraph in the input |
368 | | where a paragraph ends at the end of the first block separator |
369 | | or at the end of the input text. |
370 | | |
371 | | ------------------------------------------------------------------------*/ |
372 | | size_t fz_bidi_resolve_paragraphs(fz_bidi_chartype *types, size_t cch) |
373 | 1.22k | { |
374 | 1.22k | size_t ich; |
375 | | |
376 | | // skip characters not of type B |
377 | 213k | for(ich = 0; ich < cch && types[ich] != BDI_B; ich++) |
378 | 212k | ; |
379 | | // stop after first B, make it a BN for use in the next steps |
380 | 1.22k | if (ich < cch && types[ich] == BDI_B) |
381 | 11 | types[ich++] = BDI_BN; |
382 | 1.22k | return ich; |
383 | 1.22k | } |
384 | | |
385 | | #if 0 |
386 | | /*------------------------------------------------------------------------ |
387 | | Function: base_level |
388 | | |
389 | | Determines the base level |
390 | | |
391 | | Implements rule P2 of the Unicode Bidi Algorithm. |
392 | | |
393 | | Input: Array of directional classes |
394 | | Character count |
395 | | |
396 | | Note: Ignores explicit embeddings |
397 | | ------------------------------------------------------------------------*/ |
398 | | static int base_level(const fz_bidi_chartype *pcls, int cch) |
399 | | { |
400 | | int ich; |
401 | | |
402 | | for (ich = 0; ich < cch; ich++) |
403 | | { |
404 | | switch (pcls[ich]) |
405 | | { |
406 | | // strong left |
407 | | case BDI_L: |
408 | | return 0; |
409 | | |
410 | | // strong right |
411 | | case BDI_R: |
412 | | case BDI_AL: |
413 | | return 1; |
414 | | } |
415 | | } |
416 | | return 0; |
417 | | } |
418 | | #endif |
419 | | |
420 | | //====== RESOLVE EXPLICIT ================================================ |
421 | | |
422 | | static fz_bidi_level greater_even(fz_bidi_level i) |
423 | 86 | { |
424 | 86 | return odd(i) ? i + 1 : i + 2; |
425 | 86 | } |
426 | | |
427 | | static fz_bidi_level greater_odd(fz_bidi_level i) |
428 | 98 | { |
429 | 98 | return odd(i) ? i + 2 : i + 1; |
430 | 98 | } |
431 | | |
432 | | static fz_bidi_chartype embedding_direction(fz_bidi_chartype level) |
433 | 5.11k | { |
434 | 5.11k | return odd(level) ? BDI_R : BDI_L; |
435 | 5.11k | } |
436 | | |
437 | | /*------------------------------------------------------------------------ |
438 | | Function: resolveExplicit |
439 | | |
440 | | Recursively resolves explicit embedding levels and overrides. |
441 | | Implements rules X1-X9, of the Unicode Bidirectional Algorithm. |
442 | | |
443 | | Input: Base embedding level and direction |
444 | | Character count |
445 | | |
446 | | Output: Array of embedding levels |
447 | | Caller must allocate (one level per input character) |
448 | | |
449 | | In/Out: Array of direction classes |
450 | | |
451 | | Note: The function uses two simple counters to keep track of |
452 | | matching explicit codes and PDF. Use the default argument for |
453 | | the outermost call. The nesting counter counts the recursion |
454 | | depth and not the embedding level. |
455 | | ------------------------------------------------------------------------*/ |
456 | | size_t fz_bidi_resolve_explicit(fz_bidi_level level, fz_bidi_chartype dir, fz_bidi_chartype *pcls, fz_bidi_level *plevel, size_t cch, |
457 | | fz_bidi_level n_nest) |
458 | 1.31k | { |
459 | 1.31k | size_t ich; |
460 | | |
461 | | // always called with a valid nesting level |
462 | | // nesting levels are != embedding levels |
463 | 1.31k | int nLastValid = n_nest; |
464 | | |
465 | | // check input values |
466 | 1.31k | assert(n_nest >= 0 && level >= 0 && level <= BIDI_LEVEL_MAX); |
467 | | |
468 | | // process the text |
469 | 213k | for (ich = 0; ich < cch; ich++) |
470 | 212k | { |
471 | 212k | fz_bidi_chartype cls = pcls[ich]; |
472 | 212k | switch (cls) |
473 | 212k | { |
474 | 42 | case BDI_LRO: |
475 | 43 | case BDI_LRE: |
476 | 43 | n_nest++; |
477 | 43 | if (greater_even(level) <= BIDI_LEVEL_MAX) |
478 | 43 | { |
479 | 43 | plevel[ich] = greater_even(level); |
480 | 43 | pcls[ich] = BDI_BN; |
481 | 43 | ich += fz_bidi_resolve_explicit(plevel[ich], (cls == BDI_LRE ? BDI_N : BDI_L), |
482 | 43 | &pcls[ich+1], &plevel[ich+1], |
483 | 43 | cch - (ich+1), n_nest); |
484 | 43 | n_nest--; |
485 | 43 | continue; |
486 | 43 | } |
487 | 0 | cls = pcls[ich] = BDI_BN; |
488 | 0 | break; |
489 | | |
490 | 0 | case BDI_RLO: |
491 | 49 | case BDI_RLE: |
492 | 49 | n_nest++; |
493 | 49 | if (greater_odd(level) <= BIDI_LEVEL_MAX) |
494 | 49 | { |
495 | 49 | plevel[ich] = greater_odd(level); |
496 | 49 | pcls[ich] = BDI_BN; |
497 | 49 | ich += fz_bidi_resolve_explicit(plevel[ich], (cls == BDI_RLE ? BDI_N : BDI_R), |
498 | 49 | &pcls[ich+1], &plevel[ich+1], |
499 | 49 | cch - (ich+1), n_nest); |
500 | 49 | n_nest--; |
501 | 49 | continue; |
502 | 49 | } |
503 | 0 | cls = pcls[ich] = BDI_BN; |
504 | 0 | break; |
505 | | |
506 | 1 | case BDI_PDF: |
507 | 1 | cls = pcls[ich] = BDI_BN; |
508 | 1 | if (n_nest) |
509 | 0 | { |
510 | 0 | if (nLastValid < n_nest) |
511 | 0 | { |
512 | 0 | n_nest--; |
513 | 0 | } |
514 | 0 | else |
515 | 0 | { |
516 | 0 | cch = ich; // break the loop, but complete body |
517 | 0 | } |
518 | 0 | } |
519 | 1 | break; |
520 | 212k | } |
521 | | |
522 | | // Apply the override |
523 | 212k | if (dir != BDI_N) |
524 | 16.7k | { |
525 | 16.7k | cls = dir; |
526 | 16.7k | } |
527 | 212k | plevel[ich] = level; |
528 | 212k | if (pcls[ich] != BDI_BN) |
529 | 212k | pcls[ich] = cls; |
530 | 212k | } |
531 | | |
532 | 1.31k | return ich; |
533 | 1.31k | } |
534 | | |
535 | | // === RESOLVE WEAK TYPES ================================================ |
536 | | |
537 | | enum bidi_state // possible states |
538 | | { |
539 | | xa, // arabic letter |
540 | | xr, // right letter |
541 | | xl, // left letter |
542 | | |
543 | | ao, // arabic lett. foll by ON |
544 | | ro, // right lett. foll by ON |
545 | | lo, // left lett. foll by ON |
546 | | |
547 | | rt, // ET following R |
548 | | lt, // ET following L |
549 | | |
550 | | cn, // EN, AN following AL |
551 | | ra, // arabic number foll R |
552 | | re, // european number foll R |
553 | | la, // arabic number foll L |
554 | | le, // european number foll L |
555 | | |
556 | | ac, // CS following cn |
557 | | rc, // CS following ra |
558 | | rs, // CS,ES following re |
559 | | lc, // CS following la |
560 | | ls, // CS,ES following le |
561 | | |
562 | | ret, // ET following re |
563 | | let // ET following le |
564 | | } ; |
565 | | |
566 | | static const unsigned char state_weak[][10] = |
567 | | { |
568 | | // N, L, R, AN, EN, AL,NSM, CS, ES, ET, |
569 | | /*xa*/ { ao, xl, xr, cn, cn, xa, xa, ao, ao, ao }, /* arabic letter */ |
570 | | /*xr*/ { ro, xl, xr, ra, re, xa, xr, ro, ro, rt }, /* right letter */ |
571 | | /*xl*/ { lo, xl, xr, la, le, xa, xl, lo, lo, lt }, /* left letter */ |
572 | | |
573 | | /*ao*/ { ao, xl, xr, cn, cn, xa, ao, ao, ao, ao }, /* arabic lett. foll by ON*/ |
574 | | /*ro*/ { ro, xl, xr, ra, re, xa, ro, ro, ro, rt }, /* right lett. foll by ON */ |
575 | | /*lo*/ { lo, xl, xr, la, le, xa, lo, lo, lo, lt }, /* left lett. foll by ON */ |
576 | | |
577 | | /*rt*/ { ro, xl, xr, ra, re, xa, rt, ro, ro, rt }, /* ET following R */ |
578 | | /*lt*/ { lo, xl, xr, la, le, xa, lt, lo, lo, lt }, /* ET following L */ |
579 | | |
580 | | /*cn*/ { ao, xl, xr, cn, cn, xa, cn, ac, ao, ao }, /* EN, AN following AL */ |
581 | | /*ra*/ { ro, xl, xr, ra, re, xa, ra, rc, ro, rt }, /* arabic number foll R */ |
582 | | /*re*/ { ro, xl, xr, ra, re, xa, re, rs, rs,ret }, /* european number foll R */ |
583 | | /*la*/ { lo, xl, xr, la, le, xa, la, lc, lo, lt }, /* arabic number foll L */ |
584 | | /*le*/ { lo, xl, xr, la, le, xa, le, ls, ls,let }, /* european number foll L */ |
585 | | |
586 | | /*ac*/ { ao, xl, xr, cn, cn, xa, ao, ao, ao, ao }, /* CS following cn */ |
587 | | /*rc*/ { ro, xl, xr, ra, re, xa, ro, ro, ro, rt }, /* CS following ra */ |
588 | | /*rs*/ { ro, xl, xr, ra, re, xa, ro, ro, ro, rt }, /* CS,ES following re */ |
589 | | /*lc*/ { lo, xl, xr, la, le, xa, lo, lo, lo, lt }, /* CS following la */ |
590 | | /*ls*/ { lo, xl, xr, la, le, xa, lo, lo, lo, lt }, /* CS,ES following le */ |
591 | | |
592 | | /*ret*/ { ro, xl, xr, ra, re, xa,ret, ro, ro,ret }, /* ET following re */ |
593 | | /*let*/ { lo, xl, xr, la, le, xa,let, lo, lo,let } /* ET following le */ |
594 | | |
595 | | }; |
596 | | |
597 | | enum bidi_action // possible actions |
598 | | { |
599 | | // primitives |
600 | | IX = 0x100, // increment |
601 | | XX = 0xF, // no-op |
602 | | |
603 | | // actions |
604 | | xxx = (XX << 4) + XX, // no-op |
605 | | xIx = IX + xxx, // increment run |
606 | | xxN = (XX << 4) + BDI_ON, // set current to N |
607 | | xxE = (XX << 4) + BDI_EN, // set current to EN |
608 | | xxA = (XX << 4) + BDI_AN, // set current to AN |
609 | | xxR = (XX << 4) + BDI_R, // set current to R |
610 | | xxL = (XX << 4) + BDI_L, // set current to L |
611 | | Nxx = (BDI_ON << 4) + 0xF, // set run to neutral |
612 | | Axx = (BDI_AN << 4) + 0xF, // set run to AN |
613 | | ExE = (BDI_EN << 4) + BDI_EN, // set run to EN, set current to EN |
614 | | NIx = (BDI_ON << 4) + 0xF + IX, // set run to N, increment |
615 | | NxN = (BDI_ON << 4) + BDI_ON, // set run to N, set current to N |
616 | | NxR = (BDI_ON << 4) + BDI_R, // set run to N, set current to R |
617 | | NxE = (BDI_ON << 4) + BDI_EN, // set run to N, set current to EN |
618 | | |
619 | | AxA = (BDI_AN << 4) + BDI_AN, // set run to AN, set current to AN |
620 | | NxL = (BDI_ON << 4) + BDI_L, // set run to N, set current to L |
621 | | LxL = (BDI_L << 4) + BDI_L // set run to L, set current to L |
622 | | }; |
623 | | |
624 | | typedef uint16_t fz_bidi_action; |
625 | | |
626 | | static const fz_bidi_action action_weak[][10] = |
627 | | { |
628 | | // N,.. L, R, AN, EN, AL, NSM, CS,..ES, ET, |
629 | | /*xa*/ { xxx, xxx, xxx, xxx, xxA, xxR, xxR, xxN, xxN, xxN }, /* arabic letter */ |
630 | | /*xr*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxR, xxN, xxN, xIx }, /* right letter */ |
631 | | /*xl*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxL, xxN, xxN, xIx }, /* left letter */ |
632 | | |
633 | | /*ao*/ { xxx, xxx, xxx, xxx, xxA, xxR, xxN, xxN, xxN, xxN }, /* arabic lett. foll by ON */ |
634 | | /*ro*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxN, xxN, xxN, xIx }, /* right lett. foll by ON */ |
635 | | /*lo*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxN, xxN, xxN, xIx }, /* left lett. foll by ON */ |
636 | | |
637 | | /*rt*/ { Nxx, Nxx, Nxx, Nxx, ExE, NxR, xIx, NxN, NxN, xIx }, /* ET following R */ |
638 | | /*lt*/ { Nxx, Nxx, Nxx, Nxx, LxL, NxR, xIx, NxN, NxN, xIx }, /* ET following L */ |
639 | | |
640 | | /*cn*/ { xxx, xxx, xxx, xxx, xxA, xxR, xxA, xIx, xxN, xxN }, /* EN, AN following AL */ |
641 | | /*ra*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxA, xIx, xxN, xIx }, /* arabic number foll R */ |
642 | | /*re*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxE, xIx, xIx, xxE }, /* european number foll R */ |
643 | | /*la*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxA, xIx, xxN, xIx }, /* arabic number foll L */ |
644 | | /*le*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxL, xIx, xIx, xxL }, /* european number foll L */ |
645 | | |
646 | | /*ac*/ { Nxx, Nxx, Nxx, Axx, AxA, NxR, NxN, NxN, NxN, NxN }, /* CS following cn */ |
647 | | /*rc*/ { Nxx, Nxx, Nxx, Axx, NxE, NxR, NxN, NxN, NxN, NIx }, /* CS following ra */ |
648 | | /*rs*/ { Nxx, Nxx, Nxx, Nxx, ExE, NxR, NxN, NxN, NxN, NIx }, /* CS,ES following re */ |
649 | | /*lc*/ { Nxx, Nxx, Nxx, Axx, NxL, NxR, NxN, NxN, NxN, NIx }, /* CS following la */ |
650 | | /*ls*/ { Nxx, Nxx, Nxx, Nxx, LxL, NxR, NxN, NxN, NxN, NIx }, /* CS,ES following le */ |
651 | | |
652 | | /*ret*/{ xxx, xxx, xxx, xxx, xxE, xxR, xxE, xxN, xxN, xxE }, /* ET following re */ |
653 | | /*let*/{ xxx, xxx, xxx, xxx, xxL, xxR, xxL, xxN, xxN, xxL } /* ET following le */ |
654 | | }; |
655 | | |
656 | | static |
657 | | fz_bidi_chartype get_deferred_type(fz_bidi_action action) |
658 | 213k | { |
659 | 213k | return (action >> 4) & 0xF; |
660 | 213k | } |
661 | | |
662 | | static |
663 | | fz_bidi_chartype get_resolved_type(fz_bidi_action action) |
664 | 212k | { |
665 | 212k | return action & 0xF; |
666 | 212k | } |
667 | | |
668 | | /* Note on action table: |
669 | | |
670 | | States can be of two kinds: |
671 | | - Immediate Resolution State, where each input token |
672 | | is resolved as soon as it is seen. These states have |
673 | | only single action codes (xxN) or the no-op (xxx) |
674 | | for static input tokens. |
675 | | - Deferred Resolution State, where input tokens either |
676 | | either extend the run (xIx) or resolve its Type (e.g. Nxx). |
677 | | |
678 | | Input classes are of three kinds |
679 | | - Static Input Token, where the class of the token remains |
680 | | unchanged on output (AN, L, N, R) |
681 | | - Replaced Input Token, where the class of the token is |
682 | | always replaced on output (AL, BDI_BN, NSM, CS, ES, ET) |
683 | | - Conditional Input Token, where the class of the token is |
684 | | changed on output in some but not all cases (EN) |
685 | | |
686 | | Where tokens are subject to change, a double action |
687 | | (e.g. NxA, or NxN) is _required_ after deferred states, |
688 | | resolving both the deferred state and changing the current token. |
689 | | |
690 | | These properties of the table are verified by assertions below. |
691 | | This code is needed only during debugging and maintenance |
692 | | */ |
693 | | |
694 | | /*------------------------------------------------------------------------ |
695 | | Function: resolveWeak |
696 | | |
697 | | Resolves the directionality of numeric and other weak character types |
698 | | |
699 | | Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm. |
700 | | |
701 | | Input: Array of embedding levels |
702 | | Character count |
703 | | |
704 | | In/Out: Array of directional classes |
705 | | |
706 | | Note: On input only these directional classes are expected |
707 | | AL, HL, R, L, ON, BDI_BN, NSM, AN, EN, ES, ET, CS, |
708 | | ------------------------------------------------------------------------*/ |
709 | | void fz_bidi_resolve_weak(fz_context *ctx, fz_bidi_level baselevel, fz_bidi_chartype *pcls, fz_bidi_level *plevel, size_t cch) |
710 | 1.22k | { |
711 | 1.22k | int state = odd(baselevel) ? xr : xl; |
712 | 1.22k | fz_bidi_chartype cls; |
713 | 1.22k | size_t ich; |
714 | 1.22k | fz_bidi_action action; |
715 | 1.22k | fz_bidi_chartype cls_run; |
716 | 1.22k | fz_bidi_chartype cls_new; |
717 | | |
718 | 1.22k | fz_bidi_level level = baselevel; |
719 | | |
720 | 1.22k | size_t cch_run = 0; |
721 | | |
722 | 213k | for (ich = 0; ich < cch; ich++) |
723 | 212k | { |
724 | 212k | if (pcls[ich] > BDI_BN) { |
725 | 0 | fz_warn(ctx, "error: pcls[%zu] > BN (%d)\n", ich, pcls[ich]); |
726 | 0 | } |
727 | | |
728 | | // ignore boundary neutrals |
729 | 212k | if (pcls[ich] == BDI_BN) |
730 | 588 | { |
731 | | // must flatten levels unless at a level change; |
732 | 588 | plevel[ich] = level; |
733 | | |
734 | | // lookahead for level changes |
735 | 588 | if (ich + 1 == cch && level != baselevel) |
736 | 11 | { |
737 | | // have to fixup last BN before end of the loop, since |
738 | | // its fix-upped value will be needed below the assert |
739 | 11 | pcls[ich] = embedding_direction(level); |
740 | 11 | } |
741 | 577 | else if (ich + 1 < cch && level != plevel[ich+1] && pcls[ich+1] != BDI_BN) |
742 | 92 | { |
743 | | // fixup LAST BN in front / after a level run to make |
744 | | // it act like the SOR/EOR in rule X10 |
745 | 92 | int newlevel = plevel[ich+1]; |
746 | 92 | if (level > newlevel) { |
747 | 0 | newlevel = level; |
748 | 0 | } |
749 | 92 | plevel[ich] = newlevel; |
750 | | |
751 | | // must match assigned level |
752 | 92 | pcls[ich] = embedding_direction(newlevel); |
753 | 92 | level = plevel[ich+1]; |
754 | 92 | } |
755 | 485 | else |
756 | 485 | { |
757 | | // don't interrupt runs |
758 | 485 | if (cch_run) |
759 | 382 | { |
760 | 382 | cch_run++; |
761 | 382 | } |
762 | 485 | continue; |
763 | 485 | } |
764 | 588 | } |
765 | | |
766 | 212k | assert(pcls[ich] <= BDI_BN); |
767 | 212k | cls = pcls[ich]; |
768 | | |
769 | 212k | action = action_weak[state][cls]; |
770 | | |
771 | | // resolve the directionality for deferred runs |
772 | 212k | cls_run = get_deferred_type(action); |
773 | 212k | if (cls_run != XX) |
774 | 2.36k | { |
775 | 2.36k | set_deferred_run(pcls, cch_run, ich, cls_run); |
776 | 2.36k | cch_run = 0; |
777 | 2.36k | } |
778 | | |
779 | | // resolve the directionality class at the current location |
780 | 212k | cls_new = get_resolved_type(action); |
781 | 212k | if (cls_new != XX) |
782 | 29.3k | pcls[ich] = cls_new; |
783 | | |
784 | | // increment a deferred run |
785 | 212k | if (IX & action) |
786 | 2.68k | cch_run++; |
787 | | |
788 | 212k | state = state_weak[state][cls]; |
789 | 212k | } |
790 | | |
791 | | // resolve any deferred runs |
792 | | // use the direction of the current level to emulate PDF |
793 | 1.22k | cls = embedding_direction(level); |
794 | | |
795 | | // resolve the directionality for deferred runs |
796 | 1.22k | cls_run = get_deferred_type(action_weak[state][cls]); |
797 | 1.22k | if (cls_run != XX) |
798 | 3 | set_deferred_run(pcls, cch_run, ich, cls_run); |
799 | 1.22k | } |
800 | | |
801 | | // === RESOLVE NEUTRAL TYPES ============================================== |
802 | | |
803 | | // action values |
804 | | enum neutral_action |
805 | | { |
806 | | // action to resolve previous input |
807 | | nL = BDI_L, // resolve EN to L |
808 | | En = 3 << 4, // resolve neutrals run to embedding level direction |
809 | | Rn = BDI_R << 4, // resolve neutrals run to strong right |
810 | | Ln = BDI_L << 4, // resolved neutrals run to strong left |
811 | | In = (1<<8), // increment count of deferred neutrals |
812 | | LnL = (1<<4)+BDI_L // set run and EN to L |
813 | | }; |
814 | | |
815 | | static fz_bidi_chartype |
816 | | get_deferred_neutrals(fz_bidi_action action, fz_bidi_level level) |
817 | 213k | { |
818 | 213k | action = (action >> 4) & 0xF; |
819 | 213k | if (action == (En >> 4)) |
820 | 2.57k | return embedding_direction(level); |
821 | 211k | else |
822 | 211k | return action; |
823 | 213k | } |
824 | | |
825 | | static fz_bidi_chartype get_resolved_neutrals(fz_bidi_action action) |
826 | 212k | { |
827 | 212k | action = action & 0xF; |
828 | | |
829 | | /* RJW: The original code does: |
830 | | * if (action == In) |
831 | | * return 0; |
832 | | * else |
833 | | * return action; |
834 | | * BUT, this can never be true, as In == 256. |
835 | | * This upsets coverity. We fix this here, but include this note |
836 | | * so that we understand what changed in case we ever update to |
837 | | * a newer release of the bidirectional code. |
838 | | */ |
839 | 212k | return action; |
840 | 212k | } |
841 | | |
842 | | // state values |
843 | | enum neutral_state |
844 | | { |
845 | | // new temporary class |
846 | | r, // R and characters resolved to R |
847 | | l, // L and characters resolved to L |
848 | | rn, // N preceded by right |
849 | | ln, // N preceded by left |
850 | | a, // AN preceded by left (the abbrev 'la' is used up above) |
851 | | na // N preceded by a |
852 | | } ; |
853 | | |
854 | | /*------------------------------------------------------------------------ |
855 | | Notes: |
856 | | |
857 | | By rule W7, whenever a EN is 'dominated' by an L (including start of |
858 | | run with embedding direction = L) it is resolved to, and further treated |
859 | | as L. |
860 | | |
861 | | This leads to the need for 'a' and 'na' states. |
862 | | ------------------------------------------------------------------------*/ |
863 | | |
864 | | static const int action_neutrals[][5] = |
865 | | { |
866 | | // N, L, R, AN, EN, = cls |
867 | | // state = |
868 | | {In, 0, 0, 0, 0}, // r right |
869 | | {In, 0, 0, 0, BDI_L}, // l left |
870 | | |
871 | | {In, En, Rn, Rn, Rn}, // rn N preceded by right |
872 | | {In, Ln, En, En, LnL}, // ln N preceded by left |
873 | | |
874 | | {In, 0, 0, 0, BDI_L}, // a AN preceded by left |
875 | | {In, En, Rn, Rn, En} // na N preceded by a |
876 | | } ; |
877 | | |
878 | | static const int state_neutrals[][5] = |
879 | | { |
880 | | // N, L, R, AN, EN = cls |
881 | | // state = |
882 | | {rn, l, r, r, r}, // r right |
883 | | {ln, l, r, a, l}, // l left |
884 | | |
885 | | {rn, l, r, r, r}, // rn N preceded by right |
886 | | {ln, l, r, a, l}, // ln N preceded by left |
887 | | |
888 | | {na, l, r, a, l}, // a AN preceded by left |
889 | | {na, l, r, a, l} // na N preceded by la |
890 | | } ; |
891 | | |
892 | | /*------------------------------------------------------------------------ |
893 | | Function: resolveNeutrals |
894 | | |
895 | | Resolves the directionality of neutral character types. |
896 | | |
897 | | Implements rules W7, N1 and N2 of the Unicode Bidi Algorithm. |
898 | | |
899 | | Input: Array of embedding levels |
900 | | Character count |
901 | | Baselevel |
902 | | |
903 | | In/Out: Array of directional classes |
904 | | |
905 | | Note: On input only these directional classes are expected |
906 | | R, L, N, AN, EN and BN |
907 | | |
908 | | W8 resolves a number of ENs to L |
909 | | ------------------------------------------------------------------------*/ |
910 | | void fz_bidi_resolve_neutrals(fz_bidi_level baselevel, fz_bidi_chartype *pcls, const fz_bidi_level *plevel, size_t cch) |
911 | 1.22k | { |
912 | | // the state at the start of text depends on the base level |
913 | 1.22k | int state = odd(baselevel) ? r : l; |
914 | 1.22k | fz_bidi_chartype cls; |
915 | 1.22k | size_t ich; |
916 | 1.22k | fz_bidi_chartype cls_run; |
917 | | |
918 | 1.22k | size_t cch_run = 0; |
919 | 1.22k | fz_bidi_level level = baselevel; |
920 | | |
921 | 213k | for (ich = 0; ich < cch; ich++) |
922 | 212k | { |
923 | 212k | int action; |
924 | 212k | fz_bidi_chartype cls_new; |
925 | | |
926 | | // ignore boundary neutrals |
927 | 212k | if (pcls[ich] == BDI_BN) |
928 | 103 | { |
929 | | // include in the count for a deferred run |
930 | 103 | if (cch_run) |
931 | 18 | cch_run++; |
932 | | |
933 | | // skip any further processing |
934 | 103 | continue; |
935 | 103 | } |
936 | | |
937 | 212k | assert(pcls[ich] < 5); // "Only N, L, R, AN, EN are allowed" |
938 | 212k | cls = pcls[ich]; |
939 | | |
940 | 212k | action = action_neutrals[state][cls]; |
941 | | |
942 | | // resolve the directionality for deferred runs |
943 | 212k | cls_run = get_deferred_neutrals(action, level); |
944 | 212k | if (cls_run != BDI_N) |
945 | 29.1k | { |
946 | 29.1k | set_deferred_run(pcls, cch_run, ich, cls_run); |
947 | 29.1k | cch_run = 0; |
948 | 29.1k | } |
949 | | |
950 | | // resolve the directionality class at the current location |
951 | 212k | cls_new = get_resolved_neutrals(action); |
952 | 212k | if (cls_new != BDI_N) |
953 | 0 | pcls[ich] = cls_new; |
954 | | |
955 | 212k | if (In & action) |
956 | 65.1k | cch_run++; |
957 | | |
958 | 212k | state = state_neutrals[state][cls]; |
959 | 212k | level = plevel[ich]; |
960 | 212k | } |
961 | | |
962 | | // resolve any deferred runs |
963 | 1.22k | cls = embedding_direction(level); // eor has type of current level |
964 | | |
965 | | // resolve the directionality for deferred runs |
966 | 1.22k | cls_run = get_deferred_neutrals(action_neutrals[state][cls], level); |
967 | 1.22k | if (cls_run != BDI_N) |
968 | 319 | set_deferred_run(pcls, cch_run, ich, cls_run); |
969 | 1.22k | } |
970 | | |
971 | | // === RESOLVE IMPLICITLY ================================================= |
972 | | |
973 | | /*------------------------------------------------------------------------ |
974 | | Function: resolveImplicit |
975 | | |
976 | | Recursively resolves implicit embedding levels. |
977 | | Implements rules I1 and I2 of the Unicode Bidirectional Algorithm. |
978 | | |
979 | | Input: Array of direction classes |
980 | | Character count |
981 | | Base level |
982 | | |
983 | | In/Out: Array of embedding levels |
984 | | |
985 | | Note: levels may exceed 15 on output. |
986 | | Accepted subset of direction classes |
987 | | R, L, AN, EN |
988 | | ------------------------------------------------------------------------*/ |
989 | | static const fz_bidi_level add_level[][4] = |
990 | | { |
991 | | // L, R, AN, EN = cls |
992 | | // level = |
993 | | /* even */ { 0, 1, 2, 2 }, // EVEN |
994 | | /* odd */ { 1, 0, 1, 1 } // ODD |
995 | | |
996 | | }; |
997 | | |
998 | | void fz_bidi_resolve_implicit(const fz_bidi_chartype *pcls, fz_bidi_level *plevel, size_t cch) |
999 | 1.22k | { |
1000 | 1.22k | size_t ich; |
1001 | | |
1002 | 213k | for (ich = 0; ich < cch; ich++) |
1003 | 212k | { |
1004 | | // cannot resolve bn here, since some bn were resolved to strong |
1005 | | // types in resolveWeak. To remove these we need the original |
1006 | | // types, which are available again in resolveWhiteSpace |
1007 | 212k | if (pcls[ich] == BDI_BN) |
1008 | 85 | { |
1009 | 85 | continue; |
1010 | 85 | } |
1011 | 212k | assert(pcls[ich] > 0); // "No Neutrals allowed to survive here." |
1012 | 212k | assert(pcls[ich] < 5); // "Out of range." |
1013 | 212k | plevel[ich] += add_level[odd(plevel[ich])][pcls[ich] - 1]; |
1014 | 212k | } |
1015 | 1.22k | } |
1016 | | |
1017 | | #if 0 |
1018 | | // === REORDER =========================================================== |
1019 | | /*------------------------------------------------------------------------ |
1020 | | Function: resolve_lines |
1021 | | |
1022 | | Breaks a paragraph into lines |
1023 | | |
1024 | | Input: Character count |
1025 | | Array of line break flags |
1026 | | In/Out: Array of characters |
1027 | | |
1028 | | Returns the count of characters on the first line |
1029 | | |
1030 | | Note: This function only breaks lines at hard line breaks. Other |
1031 | | line breaks can be passed in. If pbrk[n] is true, then a break |
1032 | | occurs after the character in psz_input[n]. Breaks before the first |
1033 | | character are not allowed. |
1034 | | ------------------------------------------------------------------------*/ |
1035 | | static int resolve_lines(uint32_t *psz_input, int *pbrk, int cch) |
1036 | | { |
1037 | | int ich; |
1038 | | |
1039 | | // skip characters not of type LS |
1040 | | for(ich = 0; ich < cch; ich++) |
1041 | | { |
1042 | | if (psz_input[ich] == chLS || (pbrk && pbrk[ich])) |
1043 | | { |
1044 | | ich++; |
1045 | | break; |
1046 | | } |
1047 | | } |
1048 | | |
1049 | | return ich; |
1050 | | } |
1051 | | #endif |
1052 | | |
1053 | | /*------------------------------------------------------------------------ |
1054 | | Function: fz_bidi_resolve_whitespace |
1055 | | |
1056 | | Resolves levels for WS and S |
1057 | | Implements rule L1 of the Unicode bidi Algorithm. |
1058 | | |
1059 | | Input: Base embedding level |
1060 | | Character count |
1061 | | Array of direction classes (for one line of text) |
1062 | | |
1063 | | In/Out: Array of embedding levels (for one line of text) |
1064 | | |
1065 | | Note: this should be applied a line at a time. The default driver |
1066 | | code supplied in this file assumes a single line of text; for |
1067 | | a real implementation, cch and the initial pointer values |
1068 | | would have to be adjusted. |
1069 | | ------------------------------------------------------------------------*/ |
1070 | | void fz_bidi_resolve_whitespace(fz_bidi_level baselevel, const fz_bidi_chartype *pcls, fz_bidi_level *plevel, |
1071 | | size_t cch) |
1072 | 0 | { |
1073 | 0 | size_t cchrun = 0; |
1074 | 0 | fz_bidi_level oldlevel = baselevel; |
1075 | 0 | size_t ich; |
1076 | |
|
1077 | 0 | for (ich = 0; ich < cch; ich++) |
1078 | 0 | { |
1079 | 0 | switch(pcls[ich]) |
1080 | 0 | { |
1081 | 0 | default: |
1082 | 0 | cchrun = 0; // any other character breaks the run |
1083 | 0 | break; |
1084 | 0 | case BDI_WS: |
1085 | 0 | cchrun++; |
1086 | 0 | break; |
1087 | | |
1088 | 0 | case BDI_RLE: |
1089 | 0 | case BDI_LRE: |
1090 | 0 | case BDI_LRO: |
1091 | 0 | case BDI_RLO: |
1092 | 0 | case BDI_PDF: |
1093 | 0 | case BDI_BN: |
1094 | 0 | plevel[ich] = oldlevel; |
1095 | 0 | cchrun++; |
1096 | 0 | break; |
1097 | | |
1098 | 0 | case BDI_S: |
1099 | 0 | case BDI_B: |
1100 | | // reset levels for WS before eot |
1101 | 0 | set_deferred_level_run(plevel, cchrun, ich, baselevel); |
1102 | 0 | cchrun = 0; |
1103 | 0 | plevel[ich] = baselevel; |
1104 | 0 | break; |
1105 | 0 | } |
1106 | 0 | oldlevel = plevel[ich]; |
1107 | 0 | } |
1108 | | // reset level before eot |
1109 | 0 | set_deferred_level_run(plevel, cchrun, ich, baselevel); |
1110 | 0 | } |
1111 | | |
1112 | | #ifdef BIDI_LINE_AT_A_TIME |
1113 | | /*------------------------------------------------------------------------ |
1114 | | Functions: reorder/reorderLevel |
1115 | | |
1116 | | Recursively reorders the display string |
1117 | | "From the highest level down, reverse all characters at that level and |
1118 | | higher, down to the lowest odd level" |
1119 | | |
1120 | | Implements rule L2 of the Unicode bidi Algorithm. |
1121 | | |
1122 | | Input: Array of embedding levels |
1123 | | Character count |
1124 | | Flag enabling reversal (set to false by initial caller) |
1125 | | |
1126 | | In/Out: Text to reorder |
1127 | | |
1128 | | Note: levels may exceed 15 resp. 61 on input. |
1129 | | |
1130 | | Rule L3 - reorder combining marks is not implemented here |
1131 | | Rule L4 - glyph mirroring is implemented as a display option below |
1132 | | |
1133 | | Note: this should be applied a line at a time |
1134 | | -------------------------------------------------------------------------*/ |
1135 | | static int reorderLevel(fz_bidi_level level, uint32_t *psz_text, const fz_bidi_level *plevel, int cch, |
1136 | | int f_reverse) |
1137 | | { |
1138 | | int ich; |
1139 | | |
1140 | | // true as soon as first odd level encountered |
1141 | | f_reverse = f_reverse || odd(level); |
1142 | | |
1143 | | for (ich = 0; ich < cch; ich++) |
1144 | | { |
1145 | | if (plevel[ich] < level) |
1146 | | { |
1147 | | break; |
1148 | | } |
1149 | | else if (plevel[ich] > level) |
1150 | | { |
1151 | | ich += reorderLevel(level + 1, psz_text + ich, plevel + ich, |
1152 | | cch - ich, f_reverse) - 1; |
1153 | | } |
1154 | | } |
1155 | | if (f_reverse) |
1156 | | { |
1157 | | reverse(psz_text, ich); |
1158 | | } |
1159 | | return ich; |
1160 | | } |
1161 | | |
1162 | | int Bidi_reorder(fz_bidi_level baselevel, uint32_t *psz_text, const fz_bidi_level *plevel, int cch) |
1163 | | { |
1164 | | int ich = 0; |
1165 | | |
1166 | | while (ich < cch) |
1167 | | { |
1168 | | ich += reorderLevel(baselevel, psz_text + ich, plevel + ich, |
1169 | | cch - ich, FALSE); |
1170 | | } |
1171 | | return ich; |
1172 | | } |
1173 | | #endif |