Coverage Report

Created: 2021-08-22 09:07

/src/skia/third_party/externals/icu/source/common/ubidi.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
******************************************************************************
5
*
6
*   Copyright (C) 1999-2015, International Business Machines
7
*   Corporation and others.  All Rights Reserved.
8
*
9
******************************************************************************
10
*   file name:  ubidi.c
11
*   encoding:   UTF-8
12
*   tab size:   8 (not used)
13
*   indentation:4
14
*
15
*   created on: 1999jul27
16
*   created by: Markus W. Scherer, updated by Matitiahu Allouche
17
*
18
*/
19
20
#include "cmemory.h"
21
#include "unicode/utypes.h"
22
#include "unicode/ustring.h"
23
#include "unicode/uchar.h"
24
#include "unicode/ubidi.h"
25
#include "unicode/utf16.h"
26
#include "ubidi_props.h"
27
#include "ubidiimp.h"
28
#include "uassert.h"
29
30
/*
31
 * General implementation notes:
32
 *
33
 * Throughout the implementation, there are comments like (W2) that refer to
34
 * rules of the BiDi algorithm, in this example to the second rule of the
35
 * resolution of weak types.
36
 *
37
 * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
38
 * character according to UTF-16, the second UChar gets the directional property of
39
 * the entire character assigned, while the first one gets a BN, a boundary
40
 * neutral, type, which is ignored by most of the algorithm according to
41
 * rule (X9) and the implementation suggestions of the BiDi algorithm.
42
 *
43
 * Later, adjustWSLevels() will set the level for each BN to that of the
44
 * following character (UChar), which results in surrogate pairs getting the
45
 * same level on each of their surrogates.
46
 *
47
 * In a UTF-8 implementation, the same thing could be done: the last byte of
48
 * a multi-byte sequence would get the "real" property, while all previous
49
 * bytes of that sequence would get BN.
50
 *
51
 * It is not possible to assign all those parts of a character the same real
52
 * property because this would fail in the resolution of weak types with rules
53
 * that look at immediately surrounding types.
54
 *
55
 * As a related topic, this implementation does not remove Boundary Neutral
56
 * types from the input, but ignores them wherever this is relevant.
57
 * For example, the loop for the resolution of the weak types reads
58
 * types until it finds a non-BN.
59
 * Also, explicit embedding codes are neither changed into BN nor removed.
60
 * They are only treated the same way real BNs are.
61
 * As stated before, adjustWSLevels() takes care of them at the end.
62
 * For the purpose of conformance, the levels of all these codes
63
 * do not matter.
64
 *
65
 * Note that this implementation modifies the dirProps
66
 * after the initial setup, when applying X5c (replace FSI by LRI or RLI),
67
 * X6, N0 (replace paired brackets by L or R).
68
 *
69
 * In this implementation, the resolution of weak types (W1 to W6),
70
 * neutrals (N1 and N2), and the assignment of the resolved level (In)
71
 * are all done in one single loop, in resolveImplicitLevels().
72
 * Changes of dirProp values are done on the fly, without writing
73
 * them back to the dirProps array.
74
 *
75
 *
76
 * This implementation contains code that allows to bypass steps of the
77
 * algorithm that are not needed on the specific paragraph
78
 * in order to speed up the most common cases considerably,
79
 * like text that is entirely LTR, or RTL text without numbers.
80
 *
81
 * Most of this is done by setting a bit for each directional property
82
 * in a flags variable and later checking for whether there are
83
 * any LTR characters or any RTL characters, or both, whether
84
 * there are any explicit embedding codes, etc.
85
 *
86
 * If the (Xn) steps are performed, then the flags are re-evaluated,
87
 * because they will then not contain the embedding codes any more
88
 * and will be adjusted for override codes, so that subsequently
89
 * more bypassing may be possible than what the initial flags suggested.
90
 *
91
 * If the text is not mixed-directional, then the
92
 * algorithm steps for the weak type resolution are not performed,
93
 * and all levels are set to the paragraph level.
94
 *
95
 * If there are no explicit embedding codes, then the (Xn) steps
96
 * are not performed.
97
 *
98
 * If embedding levels are supplied as a parameter, then all
99
 * explicit embedding codes are ignored, and the (Xn) steps
100
 * are not performed.
101
 *
102
 * White Space types could get the level of the run they belong to,
103
 * and are checked with a test of (flags&MASK_EMBEDDING) to
104
 * consider if the paragraph direction should be considered in
105
 * the flags variable.
106
 *
107
 * If there are no White Space types in the paragraph, then
108
 * (L1) is not necessary in adjustWSLevels().
109
 */
110
111
/* to avoid some conditional statements, use tiny constant arrays */
112
static const Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) };
113
static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) };
114
static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) };
115
116
97.3k
#define DIRPROP_FLAG_LR(level) flagLR[(level)&1]
117
20.7k
#define DIRPROP_FLAG_E(level)  flagE[(level)&1]
118
2.63k
#define DIRPROP_FLAG_O(level)  flagO[(level)&1]
119
120
1.21M
#define DIR_FROM_STRONG(strong) ((strong)==L ? L : R)
121
122
4.75M
#define NO_OVERRIDE(level)  ((level)&~UBIDI_LEVEL_OVERRIDE)
123
124
/* UBiDi object management -------------------------------------------------- */
125
126
U_CAPI UBiDi * U_EXPORT2
127
ubidi_open(void)
128
0
{
129
0
    UErrorCode errorCode=U_ZERO_ERROR;
130
0
    return ubidi_openSized(0, 0, &errorCode);
131
0
}
132
133
U_CAPI UBiDi * U_EXPORT2
134
23.1k
ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) {
135
23.1k
    UBiDi *pBiDi;
136
137
    /* check the argument values */
138
23.1k
    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
139
0
        return NULL;
140
23.1k
    } else if(maxLength<0 || maxRunCount<0) {
141
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
142
0
        return NULL;    /* invalid arguments */
143
0
    }
144
145
    /* allocate memory for the object */
146
23.1k
    pBiDi=(UBiDi *)uprv_malloc(sizeof(UBiDi));
147
23.1k
    if(pBiDi==NULL) {
148
0
        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
149
0
        return NULL;
150
0
    }
151
152
    /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */
153
23.1k
    uprv_memset(pBiDi, 0, sizeof(UBiDi));
154
155
    /* allocate memory for arrays as requested */
156
23.1k
    if(maxLength>0) {
157
19.0k
        if( !getInitialDirPropsMemory(pBiDi, maxLength) ||
158
19.0k
            !getInitialLevelsMemory(pBiDi, maxLength)
159
0
        ) {
160
0
            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
161
0
        }
162
4.09k
    } else {
163
4.09k
        pBiDi->mayAllocateText=TRUE;
164
4.09k
    }
165
166
23.1k
    if(maxRunCount>0) {
167
0
        if(maxRunCount==1) {
168
            /* use simpleRuns[] */
169
0
            pBiDi->runsSize=sizeof(Run);
170
0
        } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) {
171
0
            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
172
0
        }
173
23.1k
    } else {
174
23.1k
        pBiDi->mayAllocateRuns=TRUE;
175
23.1k
    }
176
177
23.1k
    if(U_SUCCESS(*pErrorCode)) {
178
23.1k
        return pBiDi;
179
0
    } else {
180
0
        ubidi_close(pBiDi);
181
0
        return NULL;
182
0
    }
183
23.1k
}
184
185
/*
186
 * We are allowed to allocate memory if memory==NULL or
187
 * mayAllocate==TRUE for each array that we need.
188
 * We also try to grow memory as needed if we
189
 * allocate it.
190
 *
191
 * Assume sizeNeeded>0.
192
 * If *pMemory!=NULL, then assume *pSize>0.
193
 *
194
 * ### this realloc() may unnecessarily copy the old data,
195
 * which we know we don't need any more;
196
 * is this the best way to do this??
197
 */
198
U_CFUNC UBool
199
87.9k
ubidi_getMemory(BidiMemoryForAllocation *bidiMem, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded) {
200
87.9k
    void **pMemory = (void **)bidiMem;
201
    /* check for existing memory */
202
87.9k
    if(*pMemory==NULL) {
203
        /* we need to allocate memory */
204
40.5k
        if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=NULL) {
205
40.5k
            *pSize=sizeNeeded;
206
40.5k
            return TRUE;
207
0
        } else {
208
0
            return FALSE;
209
0
        }
210
47.4k
    } else {
211
47.4k
        if(sizeNeeded<=*pSize) {
212
            /* there is already enough memory */
213
38.1k
            return TRUE;
214
38.1k
        }
215
9.21k
        else if(!mayAllocate) {
216
            /* not enough memory, and we must not allocate */
217
0
            return FALSE;
218
9.21k
        } else {
219
            /* we try to grow */
220
9.21k
            void *memory;
221
            /* in most cases, we do not need the copy-old-data part of
222
             * realloc, but it is needed when adding runs using getRunsMemory()
223
             * in setParaRunsOnly()
224
             */
225
9.21k
            if((memory=uprv_realloc(*pMemory, sizeNeeded))!=NULL) {
226
9.21k
                *pMemory=memory;
227
9.21k
                *pSize=sizeNeeded;
228
9.21k
                return TRUE;
229
0
            } else {
230
                /* we failed to grow */
231
0
                return FALSE;
232
0
            }
233
9.21k
        }
234
47.4k
    }
235
87.9k
}
236
237
U_CAPI void U_EXPORT2
238
23.1k
ubidi_close(UBiDi *pBiDi) {
239
23.1k
    if(pBiDi!=NULL) {
240
23.1k
        pBiDi->pParaBiDi=NULL;          /* in case one tries to reuse this block */
241
23.1k
        if(pBiDi->dirPropsMemory!=NULL) {
242
19.0k
            uprv_free(pBiDi->dirPropsMemory);
243
19.0k
        }
244
23.1k
        if(pBiDi->levelsMemory!=NULL) {
245
19.0k
            uprv_free(pBiDi->levelsMemory);
246
19.0k
        }
247
23.1k
        if(pBiDi->openingsMemory!=NULL) {
248
1.56k
            uprv_free(pBiDi->openingsMemory);
249
1.56k
        }
250
23.1k
        if(pBiDi->parasMemory!=NULL) {
251
144
            uprv_free(pBiDi->parasMemory);
252
144
        }
253
23.1k
        if(pBiDi->runsMemory!=NULL) {
254
0
            uprv_free(pBiDi->runsMemory);
255
0
        }
256
23.1k
        if(pBiDi->isolatesMemory!=NULL) {
257
635
            uprv_free(pBiDi->isolatesMemory);
258
635
        }
259
23.1k
        if(pBiDi->insertPoints.points!=NULL) {
260
0
            uprv_free(pBiDi->insertPoints.points);
261
0
        }
262
263
23.1k
        uprv_free(pBiDi);
264
23.1k
    }
265
23.1k
}
266
267
/* set to approximate "inverse BiDi" ---------------------------------------- */
268
269
U_CAPI void U_EXPORT2
270
0
ubidi_setInverse(UBiDi *pBiDi, UBool isInverse) {
271
0
    if(pBiDi!=NULL) {
272
0
        pBiDi->isInverse=isInverse;
273
0
        pBiDi->reorderingMode = isInverse ? UBIDI_REORDER_INVERSE_NUMBERS_AS_L
274
0
                                          : UBIDI_REORDER_DEFAULT;
275
0
    }
276
0
}
277
278
U_CAPI UBool U_EXPORT2
279
0
ubidi_isInverse(UBiDi *pBiDi) {
280
0
    if(pBiDi!=NULL) {
281
0
        return pBiDi->isInverse;
282
0
    } else {
283
0
        return FALSE;
284
0
    }
285
0
}
286
287
/* FOOD FOR THOUGHT: currently the reordering modes are a mixture of
288
 * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre
289
 * concept of RUNS_ONLY which is a double operation.
290
 * It could be advantageous to divide this into 3 concepts:
291
 * a) Operation: direct / inverse / RUNS_ONLY
292
 * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R
293
 * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL
294
 * This would allow combinations not possible today like RUNS_ONLY with
295
 * NUMBERS_SPECIAL.
296
 * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and
297
 * REMOVE_CONTROLS for the inverse step.
298
 * Not all combinations would be supported, and probably not all do make sense.
299
 * This would need to document which ones are supported and what are the
300
 * fallbacks for unsupported combinations.
301
 */
302
U_CAPI void U_EXPORT2
303
0
ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode) {
304
0
    if ((pBiDi!=NULL) && (reorderingMode >= UBIDI_REORDER_DEFAULT)
305
0
                        && (reorderingMode < UBIDI_REORDER_COUNT)) {
306
0
        pBiDi->reorderingMode = reorderingMode;
307
0
        pBiDi->isInverse = (UBool)(reorderingMode == UBIDI_REORDER_INVERSE_NUMBERS_AS_L);
308
0
    }
309
0
}
310
311
U_CAPI UBiDiReorderingMode U_EXPORT2
312
0
ubidi_getReorderingMode(UBiDi *pBiDi) {
313
0
    if (pBiDi!=NULL) {
314
0
        return pBiDi->reorderingMode;
315
0
    } else {
316
0
        return UBIDI_REORDER_DEFAULT;
317
0
    }
318
0
}
319
320
U_CAPI void U_EXPORT2
321
0
ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions) {
322
0
    if (reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
323
0
        reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
324
0
    }
325
0
    if (pBiDi!=NULL) {
326
0
        pBiDi->reorderingOptions=reorderingOptions;
327
0
    }
328
0
}
329
330
U_CAPI uint32_t U_EXPORT2
331
0
ubidi_getReorderingOptions(UBiDi *pBiDi) {
332
0
    if (pBiDi!=NULL) {
333
0
        return pBiDi->reorderingOptions;
334
0
    } else {
335
0
        return 0;
336
0
    }
337
0
}
338
339
U_CAPI UBiDiDirection U_EXPORT2
340
ubidi_getBaseDirection(const UChar *text,
341
0
int32_t length){
342
343
0
    int32_t i;
344
0
    UChar32 uchar;
345
0
    UCharDirection dir;
346
347
0
    if( text==NULL || length<-1 ){
348
0
        return UBIDI_NEUTRAL;
349
0
    }
350
351
0
    if(length==-1) {
352
0
        length=u_strlen(text);
353
0
    }
354
355
0
    for( i = 0 ; i < length; ) {
356
        /* i is incremented by U16_NEXT */
357
0
        U16_NEXT(text, i, length, uchar);
358
0
        dir = u_charDirection(uchar);
359
0
        if( dir == U_LEFT_TO_RIGHT )
360
0
                return UBIDI_LTR;
361
0
        if( dir == U_RIGHT_TO_LEFT || dir ==U_RIGHT_TO_LEFT_ARABIC )
362
0
                return UBIDI_RTL;
363
0
    }
364
0
    return UBIDI_NEUTRAL;
365
0
}
366
367
/* perform (P2)..(P3) ------------------------------------------------------- */
368
369
/**
370
 * Returns the directionality of the first strong character
371
 * after the last B in prologue, if any.
372
 * Requires prologue!=null.
373
 */
374
static DirProp
375
0
firstL_R_AL(UBiDi *pBiDi) {
376
0
    const UChar *text=pBiDi->prologue;
377
0
    int32_t length=pBiDi->proLength;
378
0
    int32_t i;
379
0
    UChar32 uchar;
380
0
    DirProp dirProp, result=ON;
381
0
    for(i=0; i<length; ) {
382
        /* i is incremented by U16_NEXT */
383
0
        U16_NEXT(text, i, length, uchar);
384
0
        dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
385
0
        if(result==ON) {
386
0
            if(dirProp==L || dirProp==R || dirProp==AL) {
387
0
                result=dirProp;
388
0
            }
389
0
        } else {
390
0
            if(dirProp==B) {
391
0
                result=ON;
392
0
            }
393
0
        }
394
0
    }
395
0
    return result;
396
0
}
397
398
/*
399
 * Check that there are enough entries in the array pointed to by pBiDi->paras
400
 */
401
static UBool
402
9.71k
checkParaCount(UBiDi *pBiDi) {
403
9.71k
    int32_t count=pBiDi->paraCount;
404
9.71k
    if(pBiDi->paras==pBiDi->simpleParas) {
405
2.25k
        if(count<=SIMPLE_PARAS_COUNT)
406
2.11k
            return TRUE;
407
144
        if(!getInitialParasMemory(pBiDi, SIMPLE_PARAS_COUNT * 2))
408
0
            return FALSE;
409
144
        pBiDi->paras=pBiDi->parasMemory;
410
144
        uprv_memcpy(pBiDi->parasMemory, pBiDi->simpleParas, SIMPLE_PARAS_COUNT * sizeof(Para));
411
144
        return TRUE;
412
144
    }
413
7.45k
    if(!getInitialParasMemory(pBiDi, count * 2))
414
0
        return FALSE;
415
7.45k
    pBiDi->paras=pBiDi->parasMemory;
416
7.45k
    return TRUE;
417
7.45k
}
418
419
/*
420
 * Get the directional properties for the text, calculate the flags bit-set, and
421
 * determine the paragraph level if necessary (in pBiDi->paras[i].level).
422
 * FSI initiators are also resolved and their dirProp replaced with LRI or RLI.
423
 * When encountering an FSI, it is initially replaced with an LRI, which is the
424
 * default. Only if a strong R or AL is found within its scope will the LRI be
425
 * replaced by an RLI.
426
 */
427
static UBool
428
19.0k
getDirProps(UBiDi *pBiDi) {
429
19.0k
    const UChar *text=pBiDi->text;
430
19.0k
    DirProp *dirProps=pBiDi->dirPropsMemory;    /* pBiDi->dirProps is const */
431
432
19.0k
    int32_t i=0, originalLength=pBiDi->originalLength;
433
19.0k
    Flags flags=0;      /* collect all directionalities in the text */
434
19.0k
    UChar32 uchar;
435
19.0k
    DirProp dirProp=0, defaultParaLevel=0;  /* initialize to avoid compiler warnings */
436
19.0k
    UBool isDefaultLevel=IS_DEFAULT_LEVEL(pBiDi->paraLevel);
437
    /* for inverse BiDi, the default para level is set to RTL if there is a
438
       strong R or AL character at either end of the text                            */
439
19.0k
    UBool isDefaultLevelInverse=isDefaultLevel && (UBool)
440
0
            (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT ||
441
0
             pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL);
442
19.0k
    int32_t lastArabicPos=-1;
443
19.0k
    int32_t controlCount=0;
444
19.0k
    UBool removeBiDiControls = (UBool)(pBiDi->reorderingOptions &
445
19.0k
                                       UBIDI_OPTION_REMOVE_CONTROLS);
446
447
19.0k
    enum State {
448
19.0k
         NOT_SEEKING_STRONG,            /* 0: not contextual paraLevel, not after FSI */
449
19.0k
         SEEKING_STRONG_FOR_PARA,       /* 1: looking for first strong char in para */
450
19.0k
         SEEKING_STRONG_FOR_FSI,        /* 2: looking for first strong after FSI */
451
19.0k
         LOOKING_FOR_PDI                /* 3: found strong after FSI, looking for PDI */
452
19.0k
    };
453
19.0k
    State state;
454
19.0k
    DirProp lastStrong=ON;              /* for default level & inverse BiDi */
455
    /* The following stacks are used to manage isolate sequences. Those
456
       sequences may be nested, but obviously never more deeply than the
457
       maximum explicit embedding level.
458
       lastStack is the index of the last used entry in the stack. A value of -1
459
       means that there is no open isolate sequence.
460
       lastStack is reset to -1 on paragraph boundaries. */
461
    /* The following stack contains the position of the initiator of
462
       each open isolate sequence */
463
19.0k
    int32_t isolateStartStack[UBIDI_MAX_EXPLICIT_LEVEL+1];
464
    /* The following stack contains the last known state before
465
       encountering the initiator of an isolate sequence */
466
19.0k
    State  previousStateStack[UBIDI_MAX_EXPLICIT_LEVEL+1];
467
19.0k
    int32_t stackLast=-1;
468
469
19.0k
    if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING)
470
0
        pBiDi->length=0;
471
19.0k
    defaultParaLevel=pBiDi->paraLevel&1;
472
19.0k
    if(isDefaultLevel) {
473
0
        pBiDi->paras[0].level=defaultParaLevel;
474
0
        lastStrong=defaultParaLevel;
475
0
        if(pBiDi->proLength>0 &&                    /* there is a prologue */
476
0
           (dirProp=firstL_R_AL(pBiDi))!=ON) {  /* with a strong character */
477
0
            if(dirProp==L)
478
0
                pBiDi->paras[0].level=0;    /* set the default para level */
479
0
            else
480
0
                pBiDi->paras[0].level=1;    /* set the default para level */
481
0
            state=NOT_SEEKING_STRONG;
482
0
        } else {
483
0
            state=SEEKING_STRONG_FOR_PARA;
484
0
        }
485
19.0k
    } else {
486
19.0k
        pBiDi->paras[0].level=pBiDi->paraLevel;
487
19.0k
        state=NOT_SEEKING_STRONG;
488
19.0k
    }
489
    /* count paragraphs and determine the paragraph level (P2..P3) */
490
    /*
491
     * see comment in ubidi.h:
492
     * the UBIDI_DEFAULT_XXX values are designed so that
493
     * their bit 0 alone yields the intended default
494
     */
495
5.59M
    for( /* i=0 above */ ; i<originalLength; ) {
496
        /* i is incremented by U16_NEXT */
497
5.57M
        U16_NEXT(text, i, originalLength, uchar);
498
5.57M
        flags|=DIRPROP_FLAG(dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar));
499
5.57M
        dirProps[i-1]=dirProp;
500
5.57M
        if(uchar>0xffff) {  /* set the lead surrogate's property to BN */
501
10.8k
            flags|=DIRPROP_FLAG(BN);
502
10.8k
            dirProps[i-2]=BN;
503
10.8k
        }
504
5.57M
        if(removeBiDiControls && IS_BIDI_CONTROL_CHAR(uchar))
505
0
            controlCount++;
506
5.57M
        if(dirProp==L) {
507
1.65M
            if(state==SEEKING_STRONG_FOR_PARA) {
508
0
                pBiDi->paras[pBiDi->paraCount-1].level=0;
509
0
                state=NOT_SEEKING_STRONG;
510
0
            }
511
1.65M
            else if(state==SEEKING_STRONG_FOR_FSI) {
512
3.11k
                if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
513
                    /* no need for next statement, already set by default */
514
                    /* dirProps[isolateStartStack[stackLast]]=LRI; */
515
2.78k
                    flags|=DIRPROP_FLAG(LRI);
516
2.78k
                }
517
3.11k
                state=LOOKING_FOR_PDI;
518
3.11k
            }
519
1.65M
            lastStrong=L;
520
1.65M
            continue;
521
1.65M
        }
522
3.91M
        if(dirProp==R || dirProp==AL) {
523
42.2k
            if(state==SEEKING_STRONG_FOR_PARA) {
524
0
                pBiDi->paras[pBiDi->paraCount-1].level=1;
525
0
                state=NOT_SEEKING_STRONG;
526
0
            }
527
42.2k
            else if(state==SEEKING_STRONG_FOR_FSI) {
528
848
                if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
529
721
                    dirProps[isolateStartStack[stackLast]]=RLI;
530
721
                    flags|=DIRPROP_FLAG(RLI);
531
721
                }
532
848
                state=LOOKING_FOR_PDI;
533
848
            }
534
42.2k
            lastStrong=R;
535
42.2k
            if(dirProp==AL)
536
40.3k
                lastArabicPos=i-1;
537
42.2k
            continue;
538
42.2k
        }
539
3.87M
        if(dirProp>=FSI && dirProp<=RLI) {  /* FSI, LRI or RLI */
540
41.9k
            stackLast++;
541
41.9k
            if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
542
37.6k
                isolateStartStack[stackLast]=i-1;
543
37.6k
                previousStateStack[stackLast]=state;
544
37.6k
            }
545
41.9k
            if(dirProp==FSI) {
546
6.19k
                dirProps[i-1]=LRI;      /* default if no strong char */
547
6.19k
                state=SEEKING_STRONG_FOR_FSI;
548
6.19k
            }
549
35.7k
            else
550
35.7k
                state=LOOKING_FOR_PDI;
551
41.9k
            continue;
552
41.9k
        }
553
3.83M
        if(dirProp==PDI) {
554
4.27k
            if(state==SEEKING_STRONG_FOR_FSI) {
555
688
                if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
556
                    /* no need for next statement, already set by default */
557
                    /* dirProps[isolateStartStack[stackLast]]=LRI; */
558
591
                    flags|=DIRPROP_FLAG(LRI);
559
591
                }
560
688
            }
561
4.27k
            if(stackLast>=0) {
562
3.76k
                if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL)
563
3.30k
                    state=previousStateStack[stackLast];
564
3.76k
                stackLast--;
565
3.76k
            }
566
4.27k
            continue;
567
4.27k
        }
568
3.82M
        if(dirProp==B) {
569
10.1k
            if(i<originalLength && uchar==CR && text[i]==LF) /* do nothing on the CR */
570
411
                continue;
571
9.73k
            pBiDi->paras[pBiDi->paraCount-1].limit=i;
572
9.73k
            if(isDefaultLevelInverse && lastStrong==R)
573
0
                pBiDi->paras[pBiDi->paraCount-1].level=1;
574
9.73k
            if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
575
                /* When streaming, we only process whole paragraphs
576
                   thus some updates are only done on paragraph boundaries */
577
0
                pBiDi->length=i;        /* i is index to next character */
578
0
                pBiDi->controlCount=controlCount;
579
0
            }
580
9.73k
            if(i<originalLength) {              /* B not last char in text */
581
9.71k
                pBiDi->paraCount++;
582
9.71k
                if(checkParaCount(pBiDi)==FALSE)    /* not enough memory for a new para entry */
583
0
                    return FALSE;
584
9.71k
                if(isDefaultLevel) {
585
0
                    pBiDi->paras[pBiDi->paraCount-1].level=defaultParaLevel;
586
0
                    state=SEEKING_STRONG_FOR_PARA;
587
0
                    lastStrong=defaultParaLevel;
588
9.71k
                } else {
589
9.71k
                    pBiDi->paras[pBiDi->paraCount-1].level=pBiDi->paraLevel;
590
9.71k
                    state=NOT_SEEKING_STRONG;
591
9.71k
                }
592
9.71k
                stackLast=-1;
593
9.71k
            }
594
9.73k
            continue;
595
9.73k
        }
596
3.82M
    }
597
    /* Ignore still open isolate sequences with overflow */
598
19.0k
    if(stackLast>UBIDI_MAX_EXPLICIT_LEVEL) {
599
92
        stackLast=UBIDI_MAX_EXPLICIT_LEVEL;
600
92
        state=SEEKING_STRONG_FOR_FSI;   /* to be on the safe side */
601
92
    }
602
    /* Resolve direction of still unresolved open FSI sequences */
603
39.3k
    while(stackLast>=0) {
604
20.4k
        if(state==SEEKING_STRONG_FOR_FSI) {
605
            /* no need for next statement, already set by default */
606
            /* dirProps[isolateStartStack[stackLast]]=LRI; */
607
176
            flags|=DIRPROP_FLAG(LRI);
608
176
            break;
609
176
        }
610
20.2k
        state=previousStateStack[stackLast];
611
20.2k
        stackLast--;
612
20.2k
    }
613
    /* When streaming, ignore text after the last paragraph separator */
614
19.0k
    if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
615
0
        if(pBiDi->length<originalLength)
616
0
            pBiDi->paraCount--;
617
19.0k
    } else {
618
19.0k
        pBiDi->paras[pBiDi->paraCount-1].limit=originalLength;
619
19.0k
        pBiDi->controlCount=controlCount;
620
19.0k
    }
621
    /* For inverse bidi, default para direction is RTL if there is
622
       a strong R or AL at either end of the paragraph */
623
19.0k
    if(isDefaultLevelInverse && lastStrong==R) {
624
0
        pBiDi->paras[pBiDi->paraCount-1].level=1;
625
0
    }
626
19.0k
    if(isDefaultLevel) {
627
0
        pBiDi->paraLevel=static_cast<UBiDiLevel>(pBiDi->paras[0].level);
628
0
    }
629
    /* The following is needed to resolve the text direction for default level
630
       paragraphs containing no strong character */
631
47.9k
    for(i=0; i<pBiDi->paraCount; i++)
632
28.8k
        flags|=DIRPROP_FLAG_LR(pBiDi->paras[i].level);
633
634
19.0k
    if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
635
0
        flags|=DIRPROP_FLAG(L);
636
0
    }
637
19.0k
    pBiDi->flags=flags;
638
19.0k
    pBiDi->lastArabicPos=lastArabicPos;
639
19.0k
    return TRUE;
640
19.0k
}
641
642
/* determine the paragraph level at position index */
643
U_CFUNC UBiDiLevel
644
0
ubidi_getParaLevelAtIndex(const UBiDi *pBiDi, int32_t pindex) {
645
0
    int32_t i;
646
0
    for(i=0; i<pBiDi->paraCount; i++)
647
0
        if(pindex<pBiDi->paras[i].limit)
648
0
            break;
649
0
    if(i>=pBiDi->paraCount)
650
0
        i=pBiDi->paraCount-1;
651
0
    return (UBiDiLevel)(pBiDi->paras[i].level);
652
0
}
653
654
/* Functions for handling paired brackets ----------------------------------- */
655
656
/* In the isoRuns array, the first entry is used for text outside of any
657
   isolate sequence.  Higher entries are used for each more deeply nested
658
   isolate sequence. isoRunLast is the index of the last used entry.  The
659
   openings array is used to note the data of opening brackets not yet
660
   matched by a closing bracket, or matched but still susceptible to change
661
   level.
662
   Each isoRun entry contains the index of the first and
663
   one-after-last openings entries for pending opening brackets it
664
   contains.  The next openings entry to use is the one-after-last of the
665
   most deeply nested isoRun entry.
666
   isoRun entries also contain their current embedding level and the last
667
   encountered strong character, since these will be needed to resolve
668
   the level of paired brackets.  */
669
670
static void
671
11.1k
bracketInit(UBiDi *pBiDi, BracketData *bd) {
672
11.1k
    bd->pBiDi=pBiDi;
673
11.1k
    bd->isoRunLast=0;
674
11.1k
    bd->isoRuns[0].start=0;
675
11.1k
    bd->isoRuns[0].limit=0;
676
11.1k
    bd->isoRuns[0].level=GET_PARALEVEL(pBiDi, 0);
677
11.1k
    UBiDiLevel t = GET_PARALEVEL(pBiDi, 0) & 1;
678
11.1k
    bd->isoRuns[0].lastStrong = bd->isoRuns[0].lastBase = t;
679
11.1k
    bd->isoRuns[0].contextDir = (UBiDiDirection)t;
680
11.1k
    bd->isoRuns[0].contextPos=0;
681
11.1k
    if(pBiDi->openingsMemory) {
682
0
        bd->openings=pBiDi->openingsMemory;
683
0
        bd->openingsCount=pBiDi->openingsSize / sizeof(Opening);
684
11.1k
    } else {
685
11.1k
        bd->openings=bd->simpleOpenings;
686
11.1k
        bd->openingsCount=SIMPLE_OPENINGS_COUNT;
687
11.1k
    }
688
11.1k
    bd->isNumbersSpecial=bd->pBiDi->reorderingMode==UBIDI_REORDER_NUMBERS_SPECIAL ||
689
11.1k
                         bd->pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL;
690
11.1k
}
691
692
/* paragraph boundary */
693
static void
694
6.91k
bracketProcessB(BracketData *bd, UBiDiLevel level) {
695
6.91k
    bd->isoRunLast=0;
696
6.91k
    bd->isoRuns[0].limit=0;
697
6.91k
    bd->isoRuns[0].level=level;
698
6.91k
    bd->isoRuns[0].lastStrong=bd->isoRuns[0].lastBase=level&1;
699
6.91k
    bd->isoRuns[0].contextDir=(UBiDiDirection)(level&1);
700
6.91k
    bd->isoRuns[0].contextPos=0;
701
6.91k
}
702
703
/* LRE, LRO, RLE, RLO, PDF */
704
static void
705
bracketProcessBoundary(BracketData *bd, int32_t lastCcPos,
706
30.1k
                       UBiDiLevel contextLevel, UBiDiLevel embeddingLevel) {
707
30.1k
    IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
708
30.1k
    DirProp *dirProps=bd->pBiDi->dirProps;
709
30.1k
    if(DIRPROP_FLAG(dirProps[lastCcPos])&MASK_ISO)  /* after an isolate */
710
25.5k
        return;
711
4.64k
    if(NO_OVERRIDE(embeddingLevel)>NO_OVERRIDE(contextLevel))   /* not a PDF */
712
4.43k
        contextLevel=embeddingLevel;
713
4.64k
    pLastIsoRun->limit=pLastIsoRun->start;
714
4.64k
    pLastIsoRun->level=embeddingLevel;
715
4.64k
    pLastIsoRun->lastStrong=pLastIsoRun->lastBase=contextLevel&1;
716
4.64k
    pLastIsoRun->contextDir=(UBiDiDirection)(contextLevel&1);
717
4.64k
    pLastIsoRun->contextPos=(UBiDiDirection)lastCcPos;
718
4.64k
}
719
720
/* LRI or RLI */
721
static void
722
25.6k
bracketProcessLRI_RLI(BracketData *bd, UBiDiLevel level) {
723
25.6k
    IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
724
25.6k
    int16_t lastLimit;
725
25.6k
    pLastIsoRun->lastBase=ON;
726
25.6k
    lastLimit=pLastIsoRun->limit;
727
25.6k
    bd->isoRunLast++;
728
25.6k
    pLastIsoRun++;
729
25.6k
    pLastIsoRun->start=pLastIsoRun->limit=lastLimit;
730
25.6k
    pLastIsoRun->level=level;
731
25.6k
    pLastIsoRun->lastStrong=pLastIsoRun->lastBase=level&1;
732
25.6k
    pLastIsoRun->contextDir=(UBiDiDirection)(level&1);
733
25.6k
    pLastIsoRun->contextPos=0;
734
25.6k
}
735
736
/* PDI */
737
static void
738
2.50k
bracketProcessPDI(BracketData *bd) {
739
2.50k
    IsoRun *pLastIsoRun;
740
2.50k
    bd->isoRunLast--;
741
2.50k
    pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
742
2.50k
    pLastIsoRun->lastBase=ON;
743
2.50k
}
744
745
/* newly found opening bracket: create an openings entry */
746
static UBool                            /* return TRUE if success */
747
324k
bracketAddOpening(BracketData *bd, UChar match, int32_t position) {
748
324k
    IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
749
324k
    Opening *pOpening;
750
324k
    if(pLastIsoRun->limit>=bd->openingsCount) {  /* no available new entry */
751
3.32k
        UBiDi *pBiDi=bd->pBiDi;
752
3.32k
        if(!getInitialOpeningsMemory(pBiDi, pLastIsoRun->limit * 2))
753
0
            return FALSE;
754
3.32k
        if(bd->openings==bd->simpleOpenings)
755
3.32k
            uprv_memcpy(pBiDi->openingsMemory, bd->simpleOpenings,
756
3.32k
                        SIMPLE_OPENINGS_COUNT * sizeof(Opening));
757
3.32k
        bd->openings=pBiDi->openingsMemory;     /* may have changed */
758
3.32k
        bd->openingsCount=pBiDi->openingsSize / sizeof(Opening);
759
3.32k
    }
760
324k
    pOpening=&bd->openings[pLastIsoRun->limit];
761
324k
    pOpening->position=position;
762
324k
    pOpening->match=match;
763
324k
    pOpening->contextDir=pLastIsoRun->contextDir;
764
324k
    pOpening->contextPos=pLastIsoRun->contextPos;
765
324k
    pOpening->flags=0;
766
324k
    pLastIsoRun->limit++;
767
324k
    return TRUE;
768
324k
}
769
770
/* change N0c1 to N0c2 when a preceding bracket is assigned the embedding level */
771
static void
772
36.0k
fixN0c(BracketData *bd, int32_t openingIndex, int32_t newPropPosition, DirProp newProp) {
773
    /* This function calls itself recursively */
774
36.0k
    IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
775
36.0k
    Opening *qOpening;
776
36.0k
    DirProp *dirProps=bd->pBiDi->dirProps;
777
36.0k
    int32_t k, openingPosition, closingPosition;
778
418k
    for(k=openingIndex+1, qOpening=&bd->openings[k]; k<pLastIsoRun->limit; k++, qOpening++) {
779
393k
        if(qOpening->match>=0)      /* not an N0c match */
780
378k
            continue;
781
14.7k
        if(newPropPosition<qOpening->contextPos)
782
1.84k
            break;
783
12.9k
        if(newPropPosition>=qOpening->position)
784
584
            continue;
785
12.3k
        if(newProp==qOpening->contextDir)
786
9.11k
            break;
787
3.23k
        openingPosition=qOpening->position;
788
3.23k
        dirProps[openingPosition]=newProp;
789
3.23k
        closingPosition=-(qOpening->match);
790
3.23k
        dirProps[closingPosition]=newProp;
791
3.23k
        qOpening->match=0;                      /* prevent further changes */
792
3.23k
        fixN0c(bd, k, openingPosition, newProp);
793
3.23k
        fixN0c(bd, k, closingPosition, newProp);
794
3.23k
    }
795
36.0k
}
796
797
/* process closing bracket */
798
static DirProp              /* return L or R if N0b or N0c, ON if N0d */
799
34.3k
bracketProcessClosing(BracketData *bd, int32_t openIdx, int32_t position) {
800
34.3k
    IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
801
34.3k
    Opening *pOpening, *qOpening;
802
34.3k
    UBiDiDirection direction;
803
34.3k
    UBool stable;
804
34.3k
    DirProp newProp;
805
34.3k
    pOpening=&bd->openings[openIdx];
806
34.3k
    direction=(UBiDiDirection)(pLastIsoRun->level&1);
807
34.3k
    stable=TRUE;            /* assume stable until proved otherwise */
808
809
    /* The stable flag is set when brackets are paired and their
810
       level is resolved and cannot be changed by what will be
811
       found later in the source string.
812
       An unstable match can occur only when applying N0c, where
813
       the resolved level depends on the preceding context, and
814
       this context may be affected by text occurring later.
815
       Example: RTL paragraph containing:  abc[(latin) HEBREW]
816
       When the closing parenthesis is encountered, it appears
817
       that N0c1 must be applied since 'abc' sets an opposite
818
       direction context and both parentheses receive level 2.
819
       However, when the closing square bracket is processed,
820
       N0b applies because of 'HEBREW' being included within the
821
       brackets, thus the square brackets are treated like R and
822
       receive level 1. However, this changes the preceding
823
       context of the opening parenthesis, and it now appears
824
       that N0c2 must be applied to the parentheses rather than
825
       N0c1. */
826
827
34.3k
    if((direction==0 && pOpening->flags&FOUND_L) ||
828
24.1k
       (direction==1 && pOpening->flags&FOUND_R)) {                         /* N0b */
829
11.3k
        newProp=static_cast<DirProp>(direction);
830
11.3k
    }
831
23.0k
    else if(pOpening->flags&(FOUND_L|FOUND_R)) {                            /* N0c */
832
        /* it is stable if there is no containing pair or in
833
           conditions too complicated and not worth checking */
834
18.1k
        stable=(openIdx==pLastIsoRun->start);
835
18.1k
        if(direction!=pOpening->contextDir)
836
15.4k
            newProp= static_cast<DirProp>(pOpening->contextDir);           /* N0c1 */
837
2.73k
        else
838
2.73k
            newProp= static_cast<DirProp>(direction);                      /* N0c2 */
839
4.85k
    } else {
840
        /* forget this and any brackets nested within this pair */
841
4.85k
        pLastIsoRun->limit= static_cast<uint16_t>(openIdx);
842
4.85k
        return ON;                                                          /* N0d */
843
4.85k
    }
844
29.5k
    bd->pBiDi->dirProps[pOpening->position]=newProp;
845
29.5k
    bd->pBiDi->dirProps[position]=newProp;
846
    /* Update nested N0c pairs that may be affected */
847
29.5k
    fixN0c(bd, openIdx, pOpening->position, newProp);
848
29.5k
    if(stable) {
849
11.6k
        pLastIsoRun->limit= static_cast<uint16_t>(openIdx); /* forget any brackets nested within this pair */
850
        /* remove lower located synonyms if any */
851
12.0k
        while(pLastIsoRun->limit>pLastIsoRun->start &&
852
9.69k
              bd->openings[pLastIsoRun->limit-1].position==pOpening->position)
853
337
            pLastIsoRun->limit--;
854
17.8k
    } else {
855
17.8k
        int32_t k;
856
17.8k
        pOpening->match=-position;
857
        /* neutralize lower located synonyms if any */
858
17.8k
        k=openIdx-1;
859
18.3k
        while(k>=pLastIsoRun->start &&
860
18.2k
              bd->openings[k].position==pOpening->position)
861
468
            bd->openings[k--].match=0;
862
        /* neutralize any unmatched opening between the current pair;
863
           this will also neutralize higher located synonyms if any */
864
380k
        for(k=openIdx+1; k<pLastIsoRun->limit; k++) {
865
362k
            qOpening=&bd->openings[k];
866
362k
            if(qOpening->position>=position)
867
0
                break;
868
362k
            if(qOpening->match>0)
869
3.21k
                qOpening->match=0;
870
362k
        }
871
17.8k
    }
872
29.5k
    return newProp;
873
29.5k
}
874
875
/* handle strong characters, digits and candidates for closing brackets */
876
static UBool                            /* return TRUE if success */
877
3.96M
bracketProcessChar(BracketData *bd, int32_t position) {
878
3.96M
    IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
879
3.96M
    DirProp *dirProps, dirProp, newProp;
880
3.96M
    UBiDiLevel level;
881
3.96M
    dirProps=bd->pBiDi->dirProps;
882
3.96M
    dirProp=dirProps[position];
883
3.96M
    if(dirProp==ON) {
884
785k
        UChar c, match;
885
785k
        int32_t idx;
886
        /* First see if it is a matching closing bracket. Hopefully, this is
887
           more efficient than checking if it is a closing bracket at all */
888
785k
        c=bd->pBiDi->text[position];
889
127M
        for(idx=pLastIsoRun->limit-1; idx>=pLastIsoRun->start; idx--) {
890
126M
            if(bd->openings[idx].match!=c)
891
126M
                continue;
892
            /* We have a match */
893
34.3k
            newProp=bracketProcessClosing(bd, idx, position);
894
34.3k
            if(newProp==ON) {           /* N0d */
895
4.85k
                c=0;        /* prevent handling as an opening */
896
4.85k
                break;
897
4.85k
            }
898
29.5k
            pLastIsoRun->lastBase=ON;
899
29.5k
            pLastIsoRun->contextDir=(UBiDiDirection)newProp;
900
29.5k
            pLastIsoRun->contextPos=position;
901
29.5k
            level=bd->pBiDi->levels[position];
902
29.5k
            if(level&UBIDI_LEVEL_OVERRIDE) {    /* X4, X5 */
903
1.95k
                uint16_t flag;
904
1.95k
                int32_t i;
905
1.95k
                newProp=level&1;
906
1.95k
                pLastIsoRun->lastStrong=newProp;
907
1.95k
                flag=DIRPROP_FLAG(newProp);
908
69.7k
                for(i=pLastIsoRun->start; i<idx; i++)
909
67.7k
                    bd->openings[i].flags|=flag;
910
                /* matching brackets are not overridden by LRO/RLO */
911
1.95k
                bd->pBiDi->levels[position]&=~UBIDI_LEVEL_OVERRIDE;
912
1.95k
            }
913
            /* matching brackets are not overridden by LRO/RLO */
914
29.5k
            bd->pBiDi->levels[bd->openings[idx].position]&=~UBIDI_LEVEL_OVERRIDE;
915
29.5k
            return TRUE;
916
29.5k
        }
917
        /* We get here only if the ON character is not a matching closing
918
           bracket or it is a case of N0d */
919
        /* Now see if it is an opening bracket */
920
755k
        if(c)
921
750k
            match= static_cast<UChar>(u_getBidiPairedBracket(c));    /* get the matching char */
922
4.85k
        else
923
4.85k
            match=0;
924
755k
        if(match!=c &&                  /* has a matching char */
925
368k
           ubidi_getPairedBracketType(c)==U_BPT_OPEN) { /* opening bracket */
926
            /* special case: process synonyms
927
               create an opening entry for each synonym */
928
323k
            if(match==0x232A) {     /* RIGHT-POINTING ANGLE BRACKET */
929
852
                if(!bracketAddOpening(bd, 0x3009, position))
930
0
                    return FALSE;
931
322k
            }
932
322k
            else if(match==0x3009) {         /* RIGHT ANGLE BRACKET */
933
378
                if(!bracketAddOpening(bd, 0x232A, position))
934
0
                    return FALSE;
935
323k
            }
936
323k
            if(!bracketAddOpening(bd, match, position))
937
0
                return FALSE;
938
3.93M
        }
939
755k
    }
940
3.93M
    level=bd->pBiDi->levels[position];
941
3.93M
    if(level&UBIDI_LEVEL_OVERRIDE) {    /* X4, X5 */
942
291k
        newProp=level&1;
943
291k
        if(dirProp!=S && dirProp!=WS && dirProp!=ON)
944
242k
            dirProps[position]=newProp;
945
291k
        pLastIsoRun->lastBase=newProp;
946
291k
        pLastIsoRun->lastStrong=newProp;
947
291k
        pLastIsoRun->contextDir=(UBiDiDirection)newProp;
948
291k
        pLastIsoRun->contextPos=position;
949
291k
    }
950
3.64M
    else if(dirProp<=R || dirProp==AL) {
951
1.21M
        newProp= static_cast<DirProp>(DIR_FROM_STRONG(dirProp));
952
1.21M
        pLastIsoRun->lastBase=dirProp;
953
1.21M
        pLastIsoRun->lastStrong=dirProp;
954
1.21M
        pLastIsoRun->contextDir=(UBiDiDirection)newProp;
955
1.21M
        pLastIsoRun->contextPos=position;
956
1.21M
    }
957
2.43M
    else if(dirProp==EN) {
958
883k
        pLastIsoRun->lastBase=EN;
959
883k
        if(pLastIsoRun->lastStrong==L) {
960
657k
            newProp=L;                  /* W7 */
961
657k
            if(!bd->isNumbersSpecial)
962
657k
                dirProps[position]=ENL;
963
657k
            pLastIsoRun->contextDir=(UBiDiDirection)L;
964
657k
            pLastIsoRun->contextPos=position;
965
657k
        }
966
225k
        else {
967
225k
            newProp=R;                  /* N0 */
968
225k
            if(pLastIsoRun->lastStrong==AL)
969
175k
                dirProps[position]=AN;  /* W2 */
970
50.5k
            else
971
50.5k
                dirProps[position]=ENR;
972
225k
            pLastIsoRun->contextDir=(UBiDiDirection)R;
973
225k
            pLastIsoRun->contextPos=position;
974
225k
        }
975
883k
    }
976
1.55M
    else if(dirProp==AN) {
977
261
        newProp=R;                      /* N0 */
978
261
        pLastIsoRun->lastBase=AN;
979
261
        pLastIsoRun->contextDir=(UBiDiDirection)R;
980
261
        pLastIsoRun->contextPos=position;
981
261
    }
982
1.55M
    else if(dirProp==NSM) {
983
        /* if the last real char was ON, change NSM to ON so that it
984
           will stay ON even if the last real char is a bracket which
985
           may be changed to L or R */
986
175k
        newProp=pLastIsoRun->lastBase;
987
175k
        if(newProp==ON)
988
25.2k
            dirProps[position]=newProp;
989
175k
    }
990
1.37M
    else {
991
1.37M
        newProp=dirProp;
992
1.37M
        pLastIsoRun->lastBase=dirProp;
993
1.37M
    }
994
3.93M
    if(newProp<=R || newProp==AL) {
995
2.50M
        int32_t i;
996
2.50M
        uint16_t flag=DIRPROP_FLAG(DIR_FROM_STRONG(newProp));
997
113M
        for(i=pLastIsoRun->start; i<pLastIsoRun->limit; i++)
998
110M
            if(position>bd->openings[i].position)
999
110M
                bd->openings[i].flags|=flag;
1000
2.50M
    }
1001
3.93M
    return TRUE;
1002
3.93M
}
1003
1004
/* perform (X1)..(X9) ------------------------------------------------------- */
1005
1006
/* determine if the text is mixed-directional or single-directional */
1007
static UBiDiDirection
1008
20.9k
directionFromFlags(UBiDi *pBiDi) {
1009
20.9k
    Flags flags=pBiDi->flags;
1010
    /* if the text contains AN and neutrals, then some neutrals may become RTL */
1011
20.9k
    if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N)))) {
1012
7.51k
        return UBIDI_LTR;
1013
13.4k
    } else if(!(flags&MASK_LTR)) {
1014
560
        return UBIDI_RTL;
1015
12.8k
    } else {
1016
12.8k
        return UBIDI_MIXED;
1017
12.8k
    }
1018
20.9k
}
1019
1020
/*
1021
 * Resolve the explicit levels as specified by explicit embedding codes.
1022
 * Recalculate the flags to have them reflect the real properties
1023
 * after taking the explicit embeddings into account.
1024
 *
1025
 * The BiDi algorithm is designed to result in the same behavior whether embedding
1026
 * levels are externally specified (from "styled text", supposedly the preferred
1027
 * method) or set by explicit embedding codes (LRx, RLx, PDF, FSI, PDI) in the plain text.
1028
 * That is why (X9) instructs to remove all not-isolate explicit codes (and BN).
1029
 * However, in a real implementation, the removal of these codes and their index
1030
 * positions in the plain text is undesirable since it would result in
1031
 * reallocated, reindexed text.
1032
 * Instead, this implementation leaves the codes in there and just ignores them
1033
 * in the subsequent processing.
1034
 * In order to get the same reordering behavior, positions with a BN or a not-isolate
1035
 * explicit embedding code just get the same level assigned as the last "real"
1036
 * character.
1037
 *
1038
 * Some implementations, not this one, then overwrite some of these
1039
 * directionality properties at "real" same-level-run boundaries by
1040
 * L or R codes so that the resolution of weak types can be performed on the
1041
 * entire paragraph at once instead of having to parse it once more and
1042
 * perform that resolution on same-level-runs.
1043
 * This limits the scope of the implicit rules in effectively
1044
 * the same way as the run limits.
1045
 *
1046
 * Instead, this implementation does not modify these codes, except for
1047
 * paired brackets whose properties (ON) may be replaced by L or R.
1048
 * On one hand, the paragraph has to be scanned for same-level-runs, but
1049
 * on the other hand, this saves another loop to reset these codes,
1050
 * or saves making and modifying a copy of dirProps[].
1051
 *
1052
 *
1053
 * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm.
1054
 *
1055
 *
1056
 * Handling the stack of explicit levels (Xn):
1057
 *
1058
 * With the BiDi stack of explicit levels, as pushed with each
1059
 * LRE, RLE, LRO, RLO, LRI, RLI and FSI and popped with each PDF and PDI,
1060
 * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL.
1061
 *
1062
 * In order to have a correct push-pop semantics even in the case of overflows,
1063
 * overflow counters and a valid isolate counter are used as described in UAX#9
1064
 * section 3.3.2 "Explicit Levels and Directions".
1065
 *
1066
 * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
1067
 *
1068
 * Returns normally the direction; -1 if there was a memory shortage
1069
 *
1070
 */
1071
static UBiDiDirection
1072
19.0k
resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
1073
19.0k
    DirProp *dirProps=pBiDi->dirProps;
1074
19.0k
    UBiDiLevel *levels=pBiDi->levels;
1075
19.0k
    const UChar *text=pBiDi->text;
1076
1077
19.0k
    int32_t i=0, length=pBiDi->length;
1078
19.0k
    Flags flags=pBiDi->flags;       /* collect all directionalities in the text */
1079
19.0k
    DirProp dirProp;
1080
19.0k
    UBiDiLevel level=GET_PARALEVEL(pBiDi, 0);
1081
19.0k
    UBiDiDirection direction;
1082
19.0k
    pBiDi->isolateCount=0;
1083
1084
19.0k
    if(U_FAILURE(*pErrorCode)) { return UBIDI_LTR; }
1085
1086
    /* determine if the text is mixed-directional or single-directional */
1087
19.0k
    direction=directionFromFlags(pBiDi);
1088
1089
    /* we may not need to resolve any explicit levels */
1090
19.0k
    if((direction!=UBIDI_MIXED)) {
1091
        /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
1092
7.97k
        return direction;
1093
7.97k
    }
1094
11.1k
    if(pBiDi->reorderingMode > UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL) {
1095
        /* inverse BiDi: mixed, but all characters are at the same embedding level */
1096
        /* set all levels to the paragraph level */
1097
0
        int32_t paraIndex, start, limit;
1098
0
        for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) {
1099
0
            if(paraIndex==0)
1100
0
                start=0;
1101
0
            else
1102
0
                start=pBiDi->paras[paraIndex-1].limit;
1103
0
            limit=pBiDi->paras[paraIndex].limit;
1104
0
            level= static_cast<UBiDiLevel>(pBiDi->paras[paraIndex].level);
1105
0
            for(i=start; i<limit; i++)
1106
0
                levels[i]=level;
1107
0
        }
1108
0
        return direction;   /* no bracket matching for inverse BiDi */
1109
0
    }
1110
11.1k
    if(!(flags&(MASK_EXPLICIT|MASK_ISO))) {
1111
        /* no embeddings, set all levels to the paragraph level */
1112
        /* we still have to perform bracket matching */
1113
9.26k
        int32_t paraIndex, start, limit;
1114
9.26k
        BracketData bracketData;
1115
9.26k
        bracketInit(pBiDi, &bracketData);
1116
22.6k
        for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) {
1117
13.3k
            if(paraIndex==0)
1118
9.26k
                start=0;
1119
4.12k
            else
1120
4.12k
                start=pBiDi->paras[paraIndex-1].limit;
1121
13.3k
            limit=pBiDi->paras[paraIndex].limit;
1122
13.3k
            level= static_cast<UBiDiLevel>(pBiDi->paras[paraIndex].level);
1123
1.95M
            for(i=start; i<limit; i++) {
1124
1.94M
                levels[i]=level;
1125
1.94M
                dirProp=dirProps[i];
1126
1.94M
                if(dirProp==BN)
1127
226k
                    continue;
1128
1.71M
                if(dirProp==B) {
1129
4.32k
                    if((i+1)<length) {
1130
4.31k
                        if(text[i]==CR && text[i+1]==LF)
1131
195
                            continue;   /* skip CR when followed by LF */
1132
4.12k
                        bracketProcessB(&bracketData, level);
1133
4.12k
                    }
1134
4.13k
                    continue;
1135
1.71M
                }
1136
1.71M
                if(!bracketProcessChar(&bracketData, i)) {
1137
0
                    *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1138
0
                    return UBIDI_LTR;
1139
0
                }
1140
1.71M
            }
1141
13.3k
        }
1142
9.26k
        return direction;
1143
1.85k
    }
1144
1.85k
    {
1145
        /* continue to perform (Xn) */
1146
1147
        /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
1148
        /* both variables may carry the UBIDI_LEVEL_OVERRIDE flag to indicate the override status */
1149
1.85k
        UBiDiLevel embeddingLevel=level, newLevel;
1150
1.85k
        UBiDiLevel previousLevel=level;     /* previous level for regular (not CC) characters */
1151
1.85k
        int32_t lastCcPos=0;                /* index of last effective LRx,RLx, PDx */
1152
1153
        /* The following stack remembers the embedding level and the ISOLATE flag of level runs.
1154
           stackLast points to its current entry. */
1155
1.85k
        uint16_t stack[UBIDI_MAX_EXPLICIT_LEVEL+2];   /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL
1156
                                                        but we need one more entry as base */
1157
1.85k
        uint32_t stackLast=0;
1158
1.85k
        int32_t overflowIsolateCount=0;
1159
1.85k
        int32_t overflowEmbeddingCount=0;
1160
1.85k
        int32_t validIsolateCount=0;
1161
1.85k
        BracketData bracketData;
1162
1.85k
        bracketInit(pBiDi, &bracketData);
1163
1.85k
        stack[0]=level;     /* initialize base entry to para level, no override, no isolate */
1164
1165
        /* recalculate the flags */
1166
1.85k
        flags=0;
1167
1168
2.39M
        for(i=0; i<length; ++i) {
1169
2.39M
            dirProp=dirProps[i];
1170
2.39M
            switch(dirProp) {
1171
1.78k
            case LRE:
1172
7.00k
            case RLE:
1173
8.88k
            case LRO:
1174
11.1k
            case RLO:
1175
                /* (X2, X3, X4, X5) */
1176
11.1k
                flags|=DIRPROP_FLAG(BN);
1177
11.1k
                levels[i]=previousLevel;
1178
11.1k
                if (dirProp==LRE || dirProp==LRO)
1179
                    /* least greater even level */
1180
3.66k
                    newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1));
1181
7.52k
                else
1182
                    /* least greater odd level */
1183
7.52k
                    newLevel=(UBiDiLevel)((NO_OVERRIDE(embeddingLevel)+1)|1);
1184
11.1k
                if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 &&
1185
9.28k
                                                         overflowEmbeddingCount==0) {
1186
9.08k
                    lastCcPos=i;
1187
9.08k
                    embeddingLevel=newLevel;
1188
9.08k
                    if(dirProp==LRO || dirProp==RLO)
1189
3.10k
                        embeddingLevel|=UBIDI_LEVEL_OVERRIDE;
1190
9.08k
                    stackLast++;
1191
9.08k
                    stack[stackLast]=embeddingLevel;
1192
                    /* we don't need to set UBIDI_LEVEL_OVERRIDE off for LRE and RLE
1193
                       since this has already been done for newLevel which is
1194
                       the source for embeddingLevel.
1195
                     */
1196
2.11k
                } else {
1197
2.11k
                    if(overflowIsolateCount==0)
1198
865
                        overflowEmbeddingCount++;
1199
2.11k
                }
1200
11.1k
                break;
1201
1.01k
            case PDF:
1202
                /* (X7) */
1203
1.01k
                flags|=DIRPROP_FLAG(BN);
1204
1.01k
                levels[i]=previousLevel;
1205
                /* handle all the overflow cases first */
1206
1.01k
                if(overflowIsolateCount) {
1207
202
                    break;
1208
202
                }
1209
817
                if(overflowEmbeddingCount) {
1210
69
                    overflowEmbeddingCount--;
1211
69
                    break;
1212
69
                }
1213
748
                if(stackLast>0 && stack[stackLast]<ISOLATE) {   /* not an isolate entry */
1214
288
                    lastCcPos=i;
1215
288
                    stackLast--;
1216
288
                    embeddingLevel=(UBiDiLevel)stack[stackLast];
1217
288
                }
1218
748
                break;
1219
25.2k
            case LRI:
1220
39.3k
            case RLI:
1221
39.3k
                flags|=(DIRPROP_FLAG(ON)|DIRPROP_FLAG_LR(embeddingLevel));
1222
39.3k
                levels[i]=NO_OVERRIDE(embeddingLevel);
1223
39.3k
                if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
1224
6.28k
                    bracketProcessBoundary(&bracketData, lastCcPos,
1225
6.28k
                                           previousLevel, embeddingLevel);
1226
6.28k
                    flags|=DIRPROP_FLAG_MULTI_RUNS;
1227
6.28k
                }
1228
39.3k
                previousLevel=embeddingLevel;
1229
                /* (X5a, X5b) */
1230
39.3k
                if(dirProp==LRI)
1231
                    /* least greater even level */
1232
25.2k
                    newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1));
1233
14.0k
                else
1234
                    /* least greater odd level */
1235
14.0k
                    newLevel=(UBiDiLevel)((NO_OVERRIDE(embeddingLevel)+1)|1);
1236
39.3k
                if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 &&
1237
25.7k
                                                         overflowEmbeddingCount==0) {
1238
25.6k
                    flags|=DIRPROP_FLAG(dirProp);
1239
25.6k
                    lastCcPos=i;
1240
25.6k
                    validIsolateCount++;
1241
25.6k
                    if(validIsolateCount>pBiDi->isolateCount)
1242
22.8k
                        pBiDi->isolateCount=validIsolateCount;
1243
25.6k
                    embeddingLevel=newLevel;
1244
                    /* we can increment stackLast without checking because newLevel
1245
                       will exceed UBIDI_MAX_EXPLICIT_LEVEL before stackLast overflows */
1246
25.6k
                    stackLast++;
1247
25.6k
                    stack[stackLast]=embeddingLevel+ISOLATE;
1248
25.6k
                    bracketProcessLRI_RLI(&bracketData, embeddingLevel);
1249
13.6k
                } else {
1250
                    /* make it WS so that it is handled by adjustWSLevels() */
1251
13.6k
                    dirProps[i]=WS;
1252
13.6k
                    overflowIsolateCount++;
1253
13.6k
                }
1254
39.3k
                break;
1255
4.17k
            case PDI:
1256
4.17k
                if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
1257
508
                    bracketProcessBoundary(&bracketData, lastCcPos,
1258
508
                                           previousLevel, embeddingLevel);
1259
508
                    flags|=DIRPROP_FLAG_MULTI_RUNS;
1260
508
                }
1261
                /* (X6a) */
1262
4.17k
                if(overflowIsolateCount) {
1263
1.18k
                    overflowIsolateCount--;
1264
                    /* make it WS so that it is handled by adjustWSLevels() */
1265
1.18k
                    dirProps[i]=WS;
1266
1.18k
                }
1267
2.98k
                else if(validIsolateCount) {
1268
2.50k
                    flags|=DIRPROP_FLAG(PDI);
1269
2.50k
                    lastCcPos=i;
1270
2.50k
                    overflowEmbeddingCount=0;
1271
3.28k
                    while(stack[stackLast]<ISOLATE) /* pop embedding entries */
1272
777
                        stackLast--;                /* until the last isolate entry */
1273
2.50k
                    stackLast--;                    /* pop also the last isolate entry */
1274
2.50k
                    validIsolateCount--;
1275
2.50k
                    bracketProcessPDI(&bracketData);
1276
2.50k
                } else
1277
                    /* make it WS so that it is handled by adjustWSLevels() */
1278
477
                    dirProps[i]=WS;
1279
4.17k
                embeddingLevel=(UBiDiLevel)stack[stackLast]&~ISOLATE;
1280
4.17k
                flags|=(DIRPROP_FLAG(ON)|DIRPROP_FLAG_LR(embeddingLevel));
1281
4.17k
                previousLevel=embeddingLevel;
1282
4.17k
                levels[i]=NO_OVERRIDE(embeddingLevel);
1283
4.17k
                break;
1284
3.00k
            case B:
1285
3.00k
                flags|=DIRPROP_FLAG(B);
1286
3.00k
                levels[i]=GET_PARALEVEL(pBiDi, i);
1287
3.00k
                if((i+1)<length) {
1288
2.99k
                    if(text[i]==CR && text[i+1]==LF)
1289
198
                        break;          /* skip CR when followed by LF */
1290
2.79k
                    overflowEmbeddingCount=overflowIsolateCount=0;
1291
2.79k
                    validIsolateCount=0;
1292
2.79k
                    stackLast=0;
1293
2.79k
                    previousLevel=embeddingLevel=GET_PARALEVEL(pBiDi, i+1);
1294
2.79k
                    stack[0]=embeddingLevel; /* initialize base entry to para level, no override, no isolate */
1295
2.79k
                    bracketProcessB(&bracketData, embeddingLevel);
1296
2.79k
                }
1297
2.80k
                break;
1298
77.7k
            case BN:
1299
                /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
1300
                /* they will get their levels set correctly in adjustWSLevels() */
1301
77.7k
                levels[i]=previousLevel;
1302
77.7k
                flags|=DIRPROP_FLAG(BN);
1303
77.7k
                break;
1304
2.25M
            default:
1305
                /* all other types are normal characters and get the "real" level */
1306
2.25M
                if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
1307
23.3k
                    bracketProcessBoundary(&bracketData, lastCcPos,
1308
23.3k
                                           previousLevel, embeddingLevel);
1309
23.3k
                    flags|=DIRPROP_FLAG_MULTI_RUNS;
1310
23.3k
                    if(embeddingLevel&UBIDI_LEVEL_OVERRIDE)
1311
2.63k
                        flags|=DIRPROP_FLAG_O(embeddingLevel);
1312
23.3k
                    else
1313
20.7k
                        flags|=DIRPROP_FLAG_E(embeddingLevel);
1314
23.3k
                }
1315
2.25M
                previousLevel=embeddingLevel;
1316
2.25M
                levels[i]=embeddingLevel;
1317
2.25M
                if(!bracketProcessChar(&bracketData, i))
1318
0
                    return (UBiDiDirection)-1;
1319
                /* the dirProp may have been changed in bracketProcessChar() */
1320
2.25M
                flags|=DIRPROP_FLAG(dirProps[i]);
1321
2.25M
                break;
1322
2.39M
            }
1323
2.39M
        }
1324
1.85k
        if(flags&MASK_EMBEDDING)
1325
1.85k
            flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
1326
1.85k
        if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B)))
1327
0
            flags|=DIRPROP_FLAG(L);
1328
        /* again, determine if the text is mixed-directional or single-directional */
1329
1.85k
        pBiDi->flags=flags;
1330
1.85k
        direction=directionFromFlags(pBiDi);
1331
1.85k
    }
1332
1.85k
    return direction;
1333
1.85k
}
1334
1335
/*
1336
 * Use a pre-specified embedding levels array:
1337
 *
1338
 * Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
1339
 * ignore all explicit codes (X9),
1340
 * and check all the preset levels.
1341
 *
1342
 * Recalculate the flags to have them reflect the real properties
1343
 * after taking the explicit embeddings into account.
1344
 */
1345
static UBiDiDirection
1346
0
checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
1347
0
    DirProp *dirProps=pBiDi->dirProps;
1348
0
    UBiDiLevel *levels=pBiDi->levels;
1349
0
    int32_t isolateCount=0;
1350
1351
0
    int32_t length=pBiDi->length;
1352
0
    Flags flags=0;  /* collect all directionalities in the text */
1353
0
    pBiDi->isolateCount=0;
1354
1355
0
    int32_t currentParaIndex = 0;
1356
0
    int32_t currentParaLimit = pBiDi->paras[0].limit;
1357
0
    int32_t currentParaLevel = pBiDi->paraLevel;
1358
1359
0
    for(int32_t i=0; i<length; ++i) {
1360
0
        UBiDiLevel level=levels[i];
1361
0
        DirProp dirProp=dirProps[i];
1362
0
        if(dirProp==LRI || dirProp==RLI) {
1363
0
            isolateCount++;
1364
0
            if(isolateCount>pBiDi->isolateCount)
1365
0
                pBiDi->isolateCount=isolateCount;
1366
0
        }
1367
0
        else if(dirProp==PDI)
1368
0
            isolateCount--;
1369
0
        else if(dirProp==B)
1370
0
            isolateCount=0;
1371
1372
        // optimized version of  int32_t currentParaLevel = GET_PARALEVEL(pBiDi, i);
1373
0
        if (pBiDi->defaultParaLevel != 0 &&
1374
0
                i == currentParaLimit && (currentParaIndex + 1) < pBiDi->paraCount) {
1375
0
            currentParaLevel = pBiDi->paras[++currentParaIndex].level;
1376
0
            currentParaLimit = pBiDi->paras[currentParaIndex].limit;
1377
0
        }
1378
1379
0
        UBiDiLevel overrideFlag = level & UBIDI_LEVEL_OVERRIDE;
1380
0
        level &= ~UBIDI_LEVEL_OVERRIDE;
1381
0
        if (level < currentParaLevel || UBIDI_MAX_EXPLICIT_LEVEL < level) {
1382
0
            if (level == 0) {
1383
0
                if (dirProp == B) {
1384
                    // Paragraph separators are ok with explicit level 0.
1385
                    // Prevents reordering of paragraphs.
1386
0
                } else {
1387
                    // Treat explicit level 0 as a wildcard for the paragraph level.
1388
                    // Avoid making the caller guess what the paragraph level would be.
1389
0
                    level = (UBiDiLevel)currentParaLevel;
1390
0
                    levels[i] = level | overrideFlag;
1391
0
                }
1392
0
            } else {
1393
                // 1 <= level < currentParaLevel or UBIDI_MAX_EXPLICIT_LEVEL < level
1394
                /* level out of bounds */
1395
0
                *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1396
0
                return UBIDI_LTR;
1397
0
            }
1398
0
        }
1399
0
        if (overrideFlag != 0) {
1400
            /* keep the override flag in levels[i] but adjust the flags */
1401
0
            flags|=DIRPROP_FLAG_O(level);
1402
0
        } else {
1403
            /* set the flags */
1404
0
            flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProp);
1405
0
        }
1406
0
    }
1407
0
    if(flags&MASK_EMBEDDING)
1408
0
        flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
1409
    /* determine if the text is mixed-directional or single-directional */
1410
0
    pBiDi->flags=flags;
1411
0
    return directionFromFlags(pBiDi);
1412
0
}
1413
1414
/******************************************************************
1415
 The Properties state machine table
1416
*******************************************************************
1417
1418
 All table cells are 8 bits:
1419
      bits 0..4:  next state
1420
      bits 5..7:  action to perform (if > 0)
1421
1422
 Cells may be of format "n" where n represents the next state
1423
 (except for the rightmost column).
1424
 Cells may also be of format "s(x,y)" where x represents an action
1425
 to perform and y represents the next state.
1426
1427
*******************************************************************
1428
 Definitions and type for properties state table
1429
*******************************************************************
1430
*/
1431
1.16M
#define IMPTABPROPS_COLUMNS 16
1432
1.16M
#define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1)
1433
4.03M
#define GET_STATEPROPS(cell) ((cell)&0x1f)
1434
4.03M
#define GET_ACTIONPROPS(cell) ((cell)>>5)
1435
#define s(action, newState) ((uint8_t)(newState+(action<<5)))
1436
1437
static const uint8_t groupProp[] =          /* dirProp regrouped */
1438
{
1439
/*  L   R   EN  ES  ET  AN  CS  B   S   WS  ON  LRE LRO AL  RLE RLO PDF NSM BN  FSI LRI RLI PDI ENL ENR */
1440
    0,  1,  2,  7,  8,  3,  9,  6,  5,  4,  4,  10, 10, 12, 10, 10, 10, 11, 10, 4,  4,  4,  4,  13, 14
1441
};
1442
enum { DirProp_L=0, DirProp_R=1, DirProp_EN=2, DirProp_AN=3, DirProp_ON=4, DirProp_S=5, DirProp_B=6 }; /* reduced dirProp */
1443
1444
/******************************************************************
1445
1446
      PROPERTIES  STATE  TABLE
1447
1448
 In table impTabProps,
1449
      - the ON column regroups ON and WS, FSI, RLI, LRI and PDI
1450
      - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF
1451
      - the Res column is the reduced property assigned to a run
1452
1453
 Action 1: process current run1, init new run1
1454
        2: init new run2
1455
        3: process run1, process run2, init new run1
1456
        4: process run1, set run1=run2, init new run2
1457
1458
 Notes:
1459
  1) This table is used in resolveImplicitLevels().
1460
  2) This table triggers actions when there is a change in the Bidi
1461
     property of incoming characters (action 1).
1462
  3) Most such property sequences are processed immediately (in
1463
     fact, passed to processPropertySeq().
1464
  4) However, numbers are assembled as one sequence. This means
1465
     that undefined situations (like CS following digits, until
1466
     it is known if the next char will be a digit) are held until
1467
     following chars define them.
1468
     Example: digits followed by CS, then comes another CS or ON;
1469
              the digits will be processed, then the CS assigned
1470
              as the start of an ON sequence (action 3).
1471
  5) There are cases where more than one sequence must be
1472
     processed, for instance digits followed by CS followed by L:
1473
     the digits must be processed as one sequence, and the CS
1474
     must be processed as an ON sequence, all this before starting
1475
     assembling chars for the opening L sequence.
1476
1477
1478
*/
1479
static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] =
1480
{
1481
/*                        L ,     R ,    EN ,    AN ,    ON ,     S ,     B ,    ES ,    ET ,    CS ,    BN ,   NSM ,    AL ,   ENL ,   ENR , Res */
1482
/* 0 Init        */ {     1 ,     2 ,     4 ,     5 ,     7 ,    15 ,    17 ,     7 ,     9 ,     7 ,     0 ,     7 ,     3 ,    18 ,    21 , DirProp_ON },
1483
/* 1 L           */ {     1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7),     1 ,     1 , s(1,3),s(1,18),s(1,21),  DirProp_L },
1484
/* 2 R           */ { s(1,1),     2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7),     2 ,     2 , s(1,3),s(1,18),s(1,21),  DirProp_R },
1485
/* 3 AL          */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8),     3 ,     3 ,     3 ,s(1,18),s(1,21),  DirProp_R },
1486
/* 4 EN          */ { s(1,1), s(1,2),     4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10),    11 ,s(2,10),     4 ,     4 , s(1,3),    18 ,    21 , DirProp_EN },
1487
/* 5 AN          */ { s(1,1), s(1,2), s(1,4),     5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12),     5 ,     5 , s(1,3),s(1,18),s(1,21), DirProp_AN },
1488
/* 6 AL:EN/AN    */ { s(1,1), s(1,2),     6 ,     6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13),     6 ,     6 , s(1,3),    18 ,    21 , DirProp_AN },
1489
/* 7 ON          */ { s(1,1), s(1,2), s(1,4), s(1,5),     7 ,s(1,15),s(1,17),     7 ,s(2,14),     7 ,     7 ,     7 , s(1,3),s(1,18),s(1,21), DirProp_ON },
1490
/* 8 AL:ON       */ { s(1,1), s(1,2), s(1,6), s(1,6),     8 ,s(1,16),s(1,17),     8 ,     8 ,     8 ,     8 ,     8 , s(1,3),s(1,18),s(1,21), DirProp_ON },
1491
/* 9 ET          */ { s(1,1), s(1,2),     4 , s(1,5),     7 ,s(1,15),s(1,17),     7 ,     9 ,     7 ,     9 ,     9 , s(1,3),    18 ,    21 , DirProp_ON },
1492
/*10 EN+ES/CS    */ { s(3,1), s(3,2),     4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    10 , s(4,7), s(3,3),    18 ,    21 , DirProp_EN },
1493
/*11 EN+ET       */ { s(1,1), s(1,2),     4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7),    11 , s(1,7),    11 ,    11 , s(1,3),    18 ,    21 , DirProp_EN },
1494
/*12 AN+CS       */ { s(3,1), s(3,2), s(3,4),     5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    12 , s(4,7), s(3,3),s(3,18),s(3,21), DirProp_AN },
1495
/*13 AL:EN/AN+CS */ { s(3,1), s(3,2),     6 ,     6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8),    13 , s(4,8), s(3,3),    18 ,    21 , DirProp_AN },
1496
/*14 ON+ET       */ { s(1,1), s(1,2), s(4,4), s(1,5),     7 ,s(1,15),s(1,17),     7 ,    14 ,     7 ,    14 ,    14 , s(1,3),s(4,18),s(4,21), DirProp_ON },
1497
/*15 S           */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),    15 ,s(1,17), s(1,7), s(1,9), s(1,7),    15 , s(1,7), s(1,3),s(1,18),s(1,21),  DirProp_S },
1498
/*16 AL:S        */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),    16 ,s(1,17), s(1,8), s(1,8), s(1,8),    16 , s(1,8), s(1,3),s(1,18),s(1,21),  DirProp_S },
1499
/*17 B           */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),    17 , s(1,7), s(1,9), s(1,7),    17 , s(1,7), s(1,3),s(1,18),s(1,21),  DirProp_B },
1500
/*18 ENL         */ { s(1,1), s(1,2),    18 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,19),    20 ,s(2,19),    18 ,    18 , s(1,3),    18 ,    21 ,  DirProp_L },
1501
/*19 ENL+ES/CS   */ { s(3,1), s(3,2),    18 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    19 , s(4,7), s(3,3),    18 ,    21 ,  DirProp_L },
1502
/*20 ENL+ET      */ { s(1,1), s(1,2),    18 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7),    20 , s(1,7),    20 ,    20 , s(1,3),    18 ,    21 ,  DirProp_L },
1503
/*21 ENR         */ { s(1,1), s(1,2),    21 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,22),    23 ,s(2,22),    21 ,    21 , s(1,3),    18 ,    21 , DirProp_AN },
1504
/*22 ENR+ES/CS   */ { s(3,1), s(3,2),    21 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    22 , s(4,7), s(3,3),    18 ,    21 , DirProp_AN },
1505
/*23 ENR+ET      */ { s(1,1), s(1,2),    21 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7),    23 , s(1,7),    23 ,    23 , s(1,3),    18 ,    21 , DirProp_AN }
1506
};
1507
1508
/*  we must undef macro s because the levels tables have a different
1509
 *  structure (4 bits for action and 4 bits for next state.
1510
 */
1511
#undef s
1512
1513
/******************************************************************
1514
 The levels state machine tables
1515
*******************************************************************
1516
1517
 All table cells are 8 bits:
1518
      bits 0..3:  next state
1519
      bits 4..7:  action to perform (if > 0)
1520
1521
 Cells may be of format "n" where n represents the next state
1522
 (except for the rightmost column).
1523
 Cells may also be of format "s(x,y)" where x represents an action
1524
 to perform and y represents the next state.
1525
1526
 This format limits each table to 16 states each and to 15 actions.
1527
1528
*******************************************************************
1529
 Definitions and type for levels state tables
1530
*******************************************************************
1531
*/
1532
997k
#define IMPTABLEVELS_COLUMNS (DirProp_B + 2)
1533
997k
#define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1)
1534
997k
#define GET_STATE(cell) ((cell)&0x0f)
1535
997k
#define GET_ACTION(cell) ((cell)>>4)
1536
#define s(action, newState) ((uint8_t)(newState+(action<<4)))
1537
1538
typedef uint8_t ImpTab[][IMPTABLEVELS_COLUMNS];
1539
typedef uint8_t ImpAct[];
1540
1541
/* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct,
1542
 * instead of having a pair of ImpTab and a pair of ImpAct.
1543
 */
1544
typedef struct ImpTabPair {
1545
    const void * pImpTab[2];
1546
    const void * pImpAct[2];
1547
} ImpTabPair;
1548
1549
/******************************************************************
1550
1551
      LEVELS  STATE  TABLES
1552
1553
 In all levels state tables,
1554
      - state 0 is the initial state
1555
      - the Res column is the increment to add to the text level
1556
        for this property sequence.
1557
1558
 The impAct arrays for each table of a pair map the local action
1559
 numbers of the table to the total list of actions. For instance,
1560
 action 2 in a given table corresponds to the action number which
1561
 appears in entry [2] of the impAct array for that table.
1562
 The first entry of all impAct arrays must be 0.
1563
1564
 Action 1: init conditional sequence
1565
        2: prepend conditional sequence to current sequence
1566
        3: set ON sequence to new level - 1
1567
        4: init EN/AN/ON sequence
1568
        5: fix EN/AN/ON sequence followed by R
1569
        6: set previous level sequence to level 2
1570
1571
 Notes:
1572
  1) These tables are used in processPropertySeq(). The input
1573
     is property sequences as determined by resolveImplicitLevels.
1574
  2) Most such property sequences are processed immediately
1575
     (levels are assigned).
1576
  3) However, some sequences cannot be assigned a final level till
1577
     one or more following sequences are received. For instance,
1578
     ON following an R sequence within an even-level paragraph.
1579
     If the following sequence is R, the ON sequence will be
1580
     assigned basic run level+1, and so will the R sequence.
1581
  4) S is generally handled like ON, since its level will be fixed
1582
     to paragraph level in adjustWSLevels().
1583
1584
*/
1585
1586
static const ImpTab impTabL_DEFAULT =   /* Even paragraph level */
1587
/*  In this table, conditional sequences receive the lower possible level
1588
    until proven otherwise.
1589
*/
1590
{
1591
/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1592
/* 0 : init       */ {     0 ,     1 ,     0 ,     2 ,     0 ,     0 ,     0 ,  0 },
1593
/* 1 : R          */ {     0 ,     1 ,     3 ,     3 , s(1,4), s(1,4),     0 ,  1 },
1594
/* 2 : AN         */ {     0 ,     1 ,     0 ,     2 , s(1,5), s(1,5),     0 ,  2 },
1595
/* 3 : R+EN/AN    */ {     0 ,     1 ,     3 ,     3 , s(1,4), s(1,4),     0 ,  2 },
1596
/* 4 : R+ON       */ {     0 , s(2,1), s(3,3), s(3,3),     4 ,     4 ,     0 ,  0 },
1597
/* 5 : AN+ON      */ {     0 , s(2,1),     0 , s(3,2),     5 ,     5 ,     0 ,  0 }
1598
};
1599
static const ImpTab impTabR_DEFAULT =   /* Odd  paragraph level */
1600
/*  In this table, conditional sequences receive the lower possible level
1601
    until proven otherwise.
1602
*/
1603
{
1604
/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1605
/* 0 : init       */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  0 },
1606
/* 1 : L          */ {     1 ,     0 ,     1 ,     3 , s(1,4), s(1,4),     0 ,  1 },
1607
/* 2 : EN/AN      */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  1 },
1608
/* 3 : L+AN       */ {     1 ,     0 ,     1 ,     3 ,     5 ,     5 ,     0 ,  1 },
1609
/* 4 : L+ON       */ { s(2,1),     0 , s(2,1),     3 ,     4 ,     4 ,     0 ,  0 },
1610
/* 5 : L+AN+ON    */ {     1 ,     0 ,     1 ,     3 ,     5 ,     5 ,     0 ,  0 }
1611
};
1612
static const ImpAct impAct0 = {0,1,2,3,4};
1613
static const ImpTabPair impTab_DEFAULT = {{&impTabL_DEFAULT,
1614
                                           &impTabR_DEFAULT},
1615
                                          {&impAct0, &impAct0}};
1616
1617
static const ImpTab impTabL_NUMBERS_SPECIAL =   /* Even paragraph level */
1618
/*  In this table, conditional sequences receive the lower possible level
1619
    until proven otherwise.
1620
*/
1621
{
1622
/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1623
/* 0 : init       */ {     0 ,     2 , s(1,1), s(1,1),     0 ,     0 ,     0 ,  0 },
1624
/* 1 : L+EN/AN    */ {     0 , s(4,2),     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1625
/* 2 : R          */ {     0 ,     2 ,     4 ,     4 , s(1,3), s(1,3),     0 ,  1 },
1626
/* 3 : R+ON       */ {     0 , s(2,2), s(3,4), s(3,4),     3 ,     3 ,     0 ,  0 },
1627
/* 4 : R+EN/AN    */ {     0 ,     2 ,     4 ,     4 , s(1,3), s(1,3),     0 ,  2 }
1628
};
1629
static const ImpTabPair impTab_NUMBERS_SPECIAL = {{&impTabL_NUMBERS_SPECIAL,
1630
                                                   &impTabR_DEFAULT},
1631
                                                  {&impAct0, &impAct0}};
1632
1633
static const ImpTab impTabL_GROUP_NUMBERS_WITH_R =
1634
/*  In this table, EN/AN+ON sequences receive levels as if associated with R
1635
    until proven that there is L or sor/eor on both sides. AN is handled like EN.
1636
*/
1637
{
1638
/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1639
/* 0 init         */ {     0 ,     3 , s(1,1), s(1,1),     0 ,     0 ,     0 ,  0 },
1640
/* 1 EN/AN        */ { s(2,0),     3 ,     1 ,     1 ,     2 , s(2,0), s(2,0),  2 },
1641
/* 2 EN/AN+ON     */ { s(2,0),     3 ,     1 ,     1 ,     2 , s(2,0), s(2,0),  1 },
1642
/* 3 R            */ {     0 ,     3 ,     5 ,     5 , s(1,4),     0 ,     0 ,  1 },
1643
/* 4 R+ON         */ { s(2,0),     3 ,     5 ,     5 ,     4 , s(2,0), s(2,0),  1 },
1644
/* 5 R+EN/AN      */ {     0 ,     3 ,     5 ,     5 , s(1,4),     0 ,     0 ,  2 }
1645
};
1646
static const ImpTab impTabR_GROUP_NUMBERS_WITH_R =
1647
/*  In this table, EN/AN+ON sequences receive levels as if associated with R
1648
    until proven that there is L on both sides. AN is handled like EN.
1649
*/
1650
{
1651
/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1652
/* 0 init         */ {     2 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1653
/* 1 EN/AN        */ {     2 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  1 },
1654
/* 2 L            */ {     2 ,     0 , s(1,4), s(1,4), s(1,3),     0 ,     0 ,  1 },
1655
/* 3 L+ON         */ { s(2,2),     0 ,     4 ,     4 ,     3 ,     0 ,     0 ,  0 },
1656
/* 4 L+EN/AN      */ { s(2,2),     0 ,     4 ,     4 ,     3 ,     0 ,     0 ,  1 }
1657
};
1658
static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R = {
1659
                        {&impTabL_GROUP_NUMBERS_WITH_R,
1660
                         &impTabR_GROUP_NUMBERS_WITH_R},
1661
                        {&impAct0, &impAct0}};
1662
1663
1664
static const ImpTab impTabL_INVERSE_NUMBERS_AS_L =
1665
/*  This table is identical to the Default LTR table except that EN and AN are
1666
    handled like L.
1667
*/
1668
{
1669
/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1670
/* 0 : init       */ {     0 ,     1 ,     0 ,     0 ,     0 ,     0 ,     0 ,  0 },
1671
/* 1 : R          */ {     0 ,     1 ,     0 ,     0 , s(1,4), s(1,4),     0 ,  1 },
1672
/* 2 : AN         */ {     0 ,     1 ,     0 ,     0 , s(1,5), s(1,5),     0 ,  2 },
1673
/* 3 : R+EN/AN    */ {     0 ,     1 ,     0 ,     0 , s(1,4), s(1,4),     0 ,  2 },
1674
/* 4 : R+ON       */ { s(2,0),     1 , s(2,0), s(2,0),     4 ,     4 , s(2,0),  1 },
1675
/* 5 : AN+ON      */ { s(2,0),     1 , s(2,0), s(2,0),     5 ,     5 , s(2,0),  1 }
1676
};
1677
static const ImpTab impTabR_INVERSE_NUMBERS_AS_L =
1678
/*  This table is identical to the Default RTL table except that EN and AN are
1679
    handled like L.
1680
*/
1681
{
1682
/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1683
/* 0 : init       */ {     1 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1684
/* 1 : L          */ {     1 ,     0 ,     1 ,     1 , s(1,4), s(1,4),     0 ,  1 },
1685
/* 2 : EN/AN      */ {     1 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  1 },
1686
/* 3 : L+AN       */ {     1 ,     0 ,     1 ,     1 ,     5 ,     5 ,     0 ,  1 },
1687
/* 4 : L+ON       */ { s(2,1),     0 , s(2,1), s(2,1),     4 ,     4 ,     0 ,  0 },
1688
/* 5 : L+AN+ON    */ {     1 ,     0 ,     1 ,     1 ,     5 ,     5 ,     0 ,  0 }
1689
};
1690
static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L = {
1691
                        {&impTabL_INVERSE_NUMBERS_AS_L,
1692
                         &impTabR_INVERSE_NUMBERS_AS_L},
1693
                        {&impAct0, &impAct0}};
1694
1695
static const ImpTab impTabR_INVERSE_LIKE_DIRECT =   /* Odd  paragraph level */
1696
/*  In this table, conditional sequences receive the lower possible level
1697
    until proven otherwise.
1698
*/
1699
{
1700
/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1701
/* 0 : init       */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  0 },
1702
/* 1 : L          */ {     1 ,     0 ,     1 ,     2 , s(1,3), s(1,3),     0 ,  1 },
1703
/* 2 : EN/AN      */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  1 },
1704
/* 3 : L+ON       */ { s(2,1), s(3,0),     6 ,     4 ,     3 ,     3 , s(3,0),  0 },
1705
/* 4 : L+ON+AN    */ { s(2,1), s(3,0),     6 ,     4 ,     5 ,     5 , s(3,0),  3 },
1706
/* 5 : L+AN+ON    */ { s(2,1), s(3,0),     6 ,     4 ,     5 ,     5 , s(3,0),  2 },
1707
/* 6 : L+ON+EN    */ { s(2,1), s(3,0),     6 ,     4 ,     3 ,     3 , s(3,0),  1 }
1708
};
1709
static const ImpAct impAct1 = {0,1,13,14};
1710
/* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc"
1711
 */
1712
static const ImpTabPair impTab_INVERSE_LIKE_DIRECT = {
1713
                        {&impTabL_DEFAULT,
1714
                         &impTabR_INVERSE_LIKE_DIRECT},
1715
                        {&impAct0, &impAct1}};
1716
1717
static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS =
1718
/*  The case handled in this table is (visually):  R EN L
1719
*/
1720
{
1721
/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1722
/* 0 : init       */ {     0 , s(6,3),     0 ,     1 ,     0 ,     0 ,     0 ,  0 },
1723
/* 1 : L+AN       */ {     0 , s(6,3),     0 ,     1 , s(1,2), s(3,0),     0 ,  4 },
1724
/* 2 : L+AN+ON    */ { s(2,0), s(6,3), s(2,0),     1 ,     2 , s(3,0), s(2,0),  3 },
1725
/* 3 : R          */ {     0 , s(6,3), s(5,5), s(5,6), s(1,4), s(3,0),     0 ,  3 },
1726
/* 4 : R+ON       */ { s(3,0), s(4,3), s(5,5), s(5,6),     4 , s(3,0), s(3,0),  3 },
1727
/* 5 : R+EN       */ { s(3,0), s(4,3),     5 , s(5,6), s(1,4), s(3,0), s(3,0),  4 },
1728
/* 6 : R+AN       */ { s(3,0), s(4,3), s(5,5),     6 , s(1,4), s(3,0), s(3,0),  4 }
1729
};
1730
static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS =
1731
/*  The cases handled in this table are (visually):  R EN L
1732
                                                     R L AN L
1733
*/
1734
{
1735
/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1736
/* 0 : init       */ { s(1,3),     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1737
/* 1 : R+EN/AN    */ { s(2,3),     0 ,     1 ,     1 ,     2 , s(4,0),     0 ,  1 },
1738
/* 2 : R+EN/AN+ON */ { s(2,3),     0 ,     1 ,     1 ,     2 , s(4,0),     0 ,  0 },
1739
/* 3 : L          */ {     3 ,     0 ,     3 , s(3,6), s(1,4), s(4,0),     0 ,  1 },
1740
/* 4 : L+ON       */ { s(5,3), s(4,0),     5 , s(3,6),     4 , s(4,0), s(4,0),  0 },
1741
/* 5 : L+ON+EN    */ { s(5,3), s(4,0),     5 , s(3,6),     4 , s(4,0), s(4,0),  1 },
1742
/* 6 : L+AN       */ { s(5,3), s(4,0),     6 ,     6 ,     4 , s(4,0), s(4,0),  3 }
1743
};
1744
static const ImpAct impAct2 = {0,1,2,5,6,7,8};
1745
static const ImpAct impAct3 = {0,1,9,10,11,12};
1746
static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = {
1747
                        {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS,
1748
                         &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
1749
                        {&impAct2, &impAct3}};
1750
1751
static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = {
1752
                        {&impTabL_NUMBERS_SPECIAL,
1753
                         &impTabR_INVERSE_LIKE_DIRECT},
1754
                        {&impAct0, &impAct1}};
1755
1756
static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS =
1757
/*  The case handled in this table is (visually):  R EN L
1758
*/
1759
{
1760
/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1761
/* 0 : init       */ {     0 , s(6,2),     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1762
/* 1 : L+EN/AN    */ {     0 , s(6,2),     1 ,     1 ,     0 , s(3,0),     0 ,  4 },
1763
/* 2 : R          */ {     0 , s(6,2), s(5,4), s(5,4), s(1,3), s(3,0),     0 ,  3 },
1764
/* 3 : R+ON       */ { s(3,0), s(4,2), s(5,4), s(5,4),     3 , s(3,0), s(3,0),  3 },
1765
/* 4 : R+EN/AN    */ { s(3,0), s(4,2),     4 ,     4 , s(1,3), s(3,0), s(3,0),  4 }
1766
};
1767
static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = {
1768
                        {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS,
1769
                         &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
1770
                        {&impAct2, &impAct3}};
1771
1772
#undef s
1773
1774
typedef struct {
1775
    const ImpTab * pImpTab;             /* level table pointer          */
1776
    const ImpAct * pImpAct;             /* action map array             */
1777
    int32_t startON;                    /* start of ON sequence         */
1778
    int32_t startL2EN;                  /* start of level 2 sequence    */
1779
    int32_t lastStrongRTL;              /* index of last found R or AL  */
1780
    int32_t state;                      /* current state                */
1781
    int32_t runStart;                   /* start position of the run    */
1782
    UBiDiLevel runLevel;                /* run level before implicit solving */
1783
} LevState;
1784
1785
/*------------------------------------------------------------------------*/
1786
1787
static void
1788
addPoint(UBiDi *pBiDi, int32_t pos, int32_t flag)
1789
  /* param pos:     position where to insert
1790
     param flag:    one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER
1791
  */
1792
0
{
1793
0
#define FIRSTALLOC  10
1794
0
    Point point;
1795
0
    InsertPoints * pInsertPoints=&(pBiDi->insertPoints);
1796
1797
0
    if (pInsertPoints->capacity == 0)
1798
0
    {
1799
0
        pInsertPoints->points=static_cast<Point *>(uprv_malloc(sizeof(Point)*FIRSTALLOC));
1800
0
        if (pInsertPoints->points == NULL)
1801
0
        {
1802
0
            pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
1803
0
            return;
1804
0
        }
1805
0
        pInsertPoints->capacity=FIRSTALLOC;
1806
0
    }
1807
0
    if (pInsertPoints->size >= pInsertPoints->capacity) /* no room for new point */
1808
0
    {
1809
0
        Point * savePoints=pInsertPoints->points;
1810
0
        pInsertPoints->points=static_cast<Point *>(uprv_realloc(pInsertPoints->points,
1811
0
                                           pInsertPoints->capacity*2*sizeof(Point)));
1812
0
        if (pInsertPoints->points == NULL)
1813
0
        {
1814
0
            pInsertPoints->points=savePoints;
1815
0
            pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
1816
0
            return;
1817
0
        }
1818
0
        else  pInsertPoints->capacity*=2;
1819
0
    }
1820
0
    point.pos=pos;
1821
0
    point.flag=flag;
1822
0
    pInsertPoints->points[pInsertPoints->size]=point;
1823
0
    pInsertPoints->size++;
1824
0
#undef FIRSTALLOC
1825
0
}
1826
1827
static void
1828
setLevelsOutsideIsolates(UBiDi *pBiDi, int32_t start, int32_t limit, UBiDiLevel level)
1829
47.1k
{
1830
47.1k
    DirProp *dirProps=pBiDi->dirProps, dirProp;
1831
47.1k
    UBiDiLevel *levels=pBiDi->levels;
1832
47.1k
    int32_t isolateCount=0, k;
1833
192k
    for(k=start; k<limit; k++) {
1834
144k
        dirProp=dirProps[k];
1835
144k
        if(dirProp==PDI)
1836
1.12k
            isolateCount--;
1837
144k
        if(isolateCount==0)
1838
128k
            levels[k]=level;
1839
144k
        if(dirProp==LRI || dirProp==RLI)
1840
1.12k
            isolateCount++;
1841
144k
    }
1842
47.1k
}
1843
1844
/* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
1845
1846
/*
1847
 * This implementation of the (Wn) rules applies all rules in one pass.
1848
 * In order to do so, it needs a look-ahead of typically 1 character
1849
 * (except for W5: sequences of ET) and keeps track of changes
1850
 * in a rule Wp that affect a later Wq (p<q).
1851
 *
1852
 * The (Nn) and (In) rules are also performed in that same single loop,
1853
 * but effectively one iteration behind for white space.
1854
 *
1855
 * Since all implicit rules are performed in one step, it is not necessary
1856
 * to actually store the intermediate directional properties in dirProps[].
1857
 */
1858
1859
static void
1860
processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop,
1861
997k
                   int32_t start, int32_t limit) {
1862
997k
    uint8_t cell, oldStateSeq, actionSeq;
1863
997k
    const ImpTab * pImpTab=pLevState->pImpTab;
1864
997k
    const ImpAct * pImpAct=pLevState->pImpAct;
1865
997k
    UBiDiLevel * levels=pBiDi->levels;
1866
997k
    UBiDiLevel level, addLevel;
1867
997k
    InsertPoints * pInsertPoints;
1868
997k
    int32_t start0, k;
1869
1870
997k
    start0=start;                           /* save original start position */
1871
997k
    oldStateSeq=(uint8_t)pLevState->state;
1872
997k
    cell=(*pImpTab)[oldStateSeq][_prop];
1873
997k
    pLevState->state=GET_STATE(cell);       /* isolate the new state */
1874
997k
    actionSeq=(*pImpAct)[GET_ACTION(cell)]; /* isolate the action */
1875
997k
    addLevel=(*pImpTab)[pLevState->state][IMPTABLEVELS_RES];
1876
1877
997k
    if(actionSeq) {
1878
221k
        switch(actionSeq) {
1879
123k
        case 1:                         /* init ON seq */
1880
123k
            pLevState->startON=start0;
1881
123k
            break;
1882
1883
51.0k
        case 2:                         /* prepend ON seq to current seq */
1884
51.0k
            start=pLevState->startON;
1885
51.0k
            break;
1886
1887
46.8k
        case 3:                         /* EN/AN after R+ON */
1888
46.8k
            level=pLevState->runLevel+1;
1889
46.8k
            setLevelsOutsideIsolates(pBiDi, pLevState->startON, start0, level);
1890
46.8k
            break;
1891
1892
0
        case 4:                         /* EN/AN before R for NUMBERS_SPECIAL */
1893
0
            level=pLevState->runLevel+2;
1894
0
            setLevelsOutsideIsolates(pBiDi, pLevState->startON, start0, level);
1895
0
            break;
1896
1897
0
        case 5:                         /* L or S after possible relevant EN/AN */
1898
            /* check if we had EN after R/AL */
1899
0
            if (pLevState->startL2EN >= 0) {
1900
0
                addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
1901
0
            }
1902
0
            pLevState->startL2EN=-1;  /* not within previous if since could also be -2 */
1903
            /* check if we had any relevant EN/AN after R/AL */
1904
0
            pInsertPoints=&(pBiDi->insertPoints);
1905
0
            if ((pInsertPoints->capacity == 0) ||
1906
0
                (pInsertPoints->size <= pInsertPoints->confirmed))
1907
0
            {
1908
                /* nothing, just clean up */
1909
0
                pLevState->lastStrongRTL=-1;
1910
                /* check if we have a pending conditional segment */
1911
0
                level=(*pImpTab)[oldStateSeq][IMPTABLEVELS_RES];
1912
0
                if ((level & 1) && (pLevState->startON > 0)) {  /* after ON */
1913
0
                    start=pLevState->startON;   /* reset to basic run level */
1914
0
                }
1915
0
                if (_prop == DirProp_S)                /* add LRM before S */
1916
0
                {
1917
0
                    addPoint(pBiDi, start0, LRM_BEFORE);
1918
0
                    pInsertPoints->confirmed=pInsertPoints->size;
1919
0
                }
1920
0
                break;
1921
0
            }
1922
            /* reset previous RTL cont to level for LTR text */
1923
0
            for (k=pLevState->lastStrongRTL+1; k<start0; k++)
1924
0
            {
1925
                /* reset odd level, leave runLevel+2 as is */
1926
0
                levels[k]=(levels[k] - 2) & ~1;
1927
0
            }
1928
            /* mark insert points as confirmed */
1929
0
            pInsertPoints->confirmed=pInsertPoints->size;
1930
0
            pLevState->lastStrongRTL=-1;
1931
0
            if (_prop == DirProp_S)            /* add LRM before S */
1932
0
            {
1933
0
                addPoint(pBiDi, start0, LRM_BEFORE);
1934
0
                pInsertPoints->confirmed=pInsertPoints->size;
1935
0
            }
1936
0
            break;
1937
1938
0
        case 6:                         /* R/AL after possible relevant EN/AN */
1939
            /* just clean up */
1940
0
            pInsertPoints=&(pBiDi->insertPoints);
1941
0
            if (pInsertPoints->capacity > 0)
1942
                /* remove all non confirmed insert points */
1943
0
                pInsertPoints->size=pInsertPoints->confirmed;
1944
0
            pLevState->startON=-1;
1945
0
            pLevState->startL2EN=-1;
1946
0
            pLevState->lastStrongRTL=limit - 1;
1947
0
            break;
1948
1949
0
        case 7:                         /* EN/AN after R/AL + possible cont */
1950
            /* check for real AN */
1951
0
            if ((_prop == DirProp_AN) && (pBiDi->dirProps[start0] == AN) &&
1952
0
                (pBiDi->reorderingMode!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))
1953
0
            {
1954
                /* real AN */
1955
0
                if (pLevState->startL2EN == -1) /* if no relevant EN already found */
1956
0
                {
1957
                    /* just note the righmost digit as a strong RTL */
1958
0
                    pLevState->lastStrongRTL=limit - 1;
1959
0
                    break;
1960
0
                }
1961
0
                if (pLevState->startL2EN >= 0)  /* after EN, no AN */
1962
0
                {
1963
0
                    addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
1964
0
                    pLevState->startL2EN=-2;
1965
0
                }
1966
                /* note AN */
1967
0
                addPoint(pBiDi, start0, LRM_BEFORE);
1968
0
                break;
1969
0
            }
1970
            /* if first EN/AN after R/AL */
1971
0
            if (pLevState->startL2EN == -1) {
1972
0
                pLevState->startL2EN=start0;
1973
0
            }
1974
0
            break;
1975
1976
0
        case 8:                         /* note location of latest R/AL */
1977
0
            pLevState->lastStrongRTL=limit - 1;
1978
0
            pLevState->startON=-1;
1979
0
            break;
1980
1981
0
        case 9:                         /* L after R+ON/EN/AN */
1982
            /* include possible adjacent number on the left */
1983
0
            for (k=start0-1; k>=0 && !(levels[k]&1); k--);
1984
0
            if(k>=0) {
1985
0
                addPoint(pBiDi, k, RLM_BEFORE);             /* add RLM before */
1986
0
                pInsertPoints=&(pBiDi->insertPoints);
1987
0
                pInsertPoints->confirmed=pInsertPoints->size;   /* confirm it */
1988
0
            }
1989
0
            pLevState->startON=start0;
1990
0
            break;
1991
1992
0
        case 10:                        /* AN after L */
1993
            /* AN numbers between L text on both sides may be trouble. */
1994
            /* tentatively bracket with LRMs; will be confirmed if followed by L */
1995
0
            addPoint(pBiDi, start0, LRM_BEFORE);    /* add LRM before */
1996
0
            addPoint(pBiDi, start0, LRM_AFTER);     /* add LRM after  */
1997
0
            break;
1998
1999
0
        case 11:                        /* R after L+ON/EN/AN */
2000
            /* false alert, infirm LRMs around previous AN */
2001
0
            pInsertPoints=&(pBiDi->insertPoints);
2002
0
            pInsertPoints->size=pInsertPoints->confirmed;
2003
0
            if (_prop == DirProp_S)            /* add RLM before S */
2004
0
            {
2005
0
                addPoint(pBiDi, start0, RLM_BEFORE);
2006
0
                pInsertPoints->confirmed=pInsertPoints->size;
2007
0
            }
2008
0
            break;
2009
2010
0
        case 12:                        /* L after L+ON/AN */
2011
0
            level=pLevState->runLevel + addLevel;
2012
0
            for(k=pLevState->startON; k<start0; k++) {
2013
0
                if (levels[k]<level)
2014
0
                    levels[k]=level;
2015
0
            }
2016
0
            pInsertPoints=&(pBiDi->insertPoints);
2017
0
            pInsertPoints->confirmed=pInsertPoints->size;   /* confirm inserts */
2018
0
            pLevState->startON=start0;
2019
0
            break;
2020
2021
0
        case 13:                        /* L after L+ON+EN/AN/ON */
2022
0
            level=pLevState->runLevel;
2023
0
            for(k=start0-1; k>=pLevState->startON; k--) {
2024
0
                if(levels[k]==level+3) {
2025
0
                    while(levels[k]==level+3) {
2026
0
                        levels[k--]-=2;
2027
0
                    }
2028
0
                    while(levels[k]==level) {
2029
0
                        k--;
2030
0
                    }
2031
0
                }
2032
0
                if(levels[k]==level+2) {
2033
0
                    levels[k]=level;
2034
0
                    continue;
2035
0
                }
2036
0
                levels[k]=level+1;
2037
0
            }
2038
0
            break;
2039
2040
0
        case 14:                        /* R after L+ON+EN/AN/ON */
2041
0
            level=pLevState->runLevel+1;
2042
0
            for(k=start0-1; k>=pLevState->startON; k--) {
2043
0
                if(levels[k]>level) {
2044
0
                    levels[k]-=2;
2045
0
                }
2046
0
            }
2047
0
            break;
2048
2049
0
        default:                        /* we should never get here */
2050
0
            UPRV_UNREACHABLE;
2051
997k
        }
2052
997k
    }
2053
997k
    if((addLevel) || (start < start0)) {
2054
197k
        level=pLevState->runLevel + addLevel;
2055
197k
        if(start>=pLevState->runStart) {
2056
1.38M
            for(k=start; k<limit; k++) {
2057
1.19M
                levels[k]=level;
2058
1.19M
            }
2059
274
        } else {
2060
274
            setLevelsOutsideIsolates(pBiDi, start, limit, level);
2061
274
        }
2062
197k
    }
2063
997k
}
2064
2065
/**
2066
 * Returns the directionality of the last strong character at the end of the prologue, if any.
2067
 * Requires prologue!=null.
2068
 */
2069
static DirProp
2070
0
lastL_R_AL(UBiDi *pBiDi) {
2071
0
    const UChar *text=pBiDi->prologue;
2072
0
    int32_t length=pBiDi->proLength;
2073
0
    int32_t i;
2074
0
    UChar32 uchar;
2075
0
    DirProp dirProp;
2076
0
    for(i=length; i>0; ) {
2077
        /* i is decremented by U16_PREV */
2078
0
        U16_PREV(text, 0, i, uchar);
2079
0
        dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
2080
0
        if(dirProp==L) {
2081
0
            return DirProp_L;
2082
0
        }
2083
0
        if(dirProp==R || dirProp==AL) {
2084
0
            return DirProp_R;
2085
0
        }
2086
0
        if(dirProp==B) {
2087
0
            return DirProp_ON;
2088
0
        }
2089
0
    }
2090
0
    return DirProp_ON;
2091
0
}
2092
2093
/**
2094
 * Returns the directionality of the first strong character, or digit, in the epilogue, if any.
2095
 * Requires epilogue!=null.
2096
 */
2097
static DirProp
2098
0
firstL_R_AL_EN_AN(UBiDi *pBiDi) {
2099
0
    const UChar *text=pBiDi->epilogue;
2100
0
    int32_t length=pBiDi->epiLength;
2101
0
    int32_t i;
2102
0
    UChar32 uchar;
2103
0
    DirProp dirProp;
2104
0
    for(i=0; i<length; ) {
2105
        /* i is incremented by U16_NEXT */
2106
0
        U16_NEXT(text, i, length, uchar);
2107
0
        dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
2108
0
        if(dirProp==L) {
2109
0
            return DirProp_L;
2110
0
        }
2111
0
        if(dirProp==R || dirProp==AL) {
2112
0
            return DirProp_R;
2113
0
        }
2114
0
        if(dirProp==EN) {
2115
0
            return DirProp_EN;
2116
0
        }
2117
0
        if(dirProp==AN) {
2118
0
            return DirProp_AN;
2119
0
        }
2120
0
    }
2121
0
    return DirProp_ON;
2122
0
}
2123
2124
static void
2125
resolveImplicitLevels(UBiDi *pBiDi,
2126
                      int32_t start, int32_t limit,
2127
45.2k
                      DirProp sor, DirProp eor) {
2128
45.2k
    const DirProp *dirProps=pBiDi->dirProps;
2129
45.2k
    DirProp dirProp;
2130
45.2k
    LevState levState;
2131
45.2k
    int32_t i, start1, start2;
2132
45.2k
    uint16_t oldStateImp, stateImp, actionImp;
2133
45.2k
    uint8_t gprop, resProp, cell;
2134
45.2k
    UBool inverseRTL;
2135
45.2k
    DirProp nextStrongProp=R;
2136
45.2k
    int32_t nextStrongPos=-1;
2137
2138
    /* check for RTL inverse BiDi mode */
2139
    /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to
2140
     * loop on the text characters from end to start.
2141
     * This would need a different properties state table (at least different
2142
     * actions) and different levels state tables (maybe very similar to the
2143
     * LTR corresponding ones.
2144
     */
2145
45.2k
    inverseRTL=(UBool)
2146
45.2k
        ((start<pBiDi->lastArabicPos) && (GET_PARALEVEL(pBiDi, start) & 1) &&
2147
2.61k
         (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT  ||
2148
2.61k
          pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL));
2149
2150
    /* initialize for property and levels state tables */
2151
45.2k
    levState.startL2EN=-1;              /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
2152
45.2k
    levState.lastStrongRTL=-1;          /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
2153
45.2k
    levState.runStart=start;
2154
45.2k
    levState.runLevel=pBiDi->levels[start];
2155
45.2k
    levState.pImpTab=(const ImpTab*)((pBiDi->pImpTabPair)->pImpTab)[levState.runLevel&1];
2156
45.2k
    levState.pImpAct=(const ImpAct*)((pBiDi->pImpTabPair)->pImpAct)[levState.runLevel&1];
2157
45.2k
    if(start==0 && pBiDi->proLength>0) {
2158
0
        DirProp lastStrong=lastL_R_AL(pBiDi);
2159
0
        if(lastStrong!=DirProp_ON) {
2160
0
            sor=lastStrong;
2161
0
        }
2162
0
    }
2163
    /* The isolates[] entries contain enough information to
2164
       resume the bidi algorithm in the same state as it was
2165
       when it was interrupted by an isolate sequence. */
2166
45.2k
    if(dirProps[start]==PDI  && pBiDi->isolateCount >= 0) {
2167
2.00k
        levState.startON=pBiDi->isolates[pBiDi->isolateCount].startON;
2168
2.00k
        start1=pBiDi->isolates[pBiDi->isolateCount].start1;
2169
2.00k
        stateImp=pBiDi->isolates[pBiDi->isolateCount].stateImp;
2170
2.00k
        levState.state=pBiDi->isolates[pBiDi->isolateCount].state;
2171
2.00k
        pBiDi->isolateCount--;
2172
43.2k
    } else {
2173
43.2k
        levState.startON=-1;
2174
43.2k
        start1=start;
2175
43.2k
        if(dirProps[start]==NSM)
2176
253
            stateImp = 1 + sor;
2177
42.9k
        else
2178
42.9k
            stateImp=0;
2179
43.2k
        levState.state=0;
2180
43.2k
        processPropertySeq(pBiDi, &levState, sor, start, start);
2181
43.2k
    }
2182
45.2k
    start2=start;                       /* to make Java compiler happy */
2183
2184
4.08M
    for(i=start; i<=limit; i++) {
2185
4.06M
        if(i>=limit) {
2186
45.2k
            int32_t k;
2187
299k
            for(k=limit-1; k>start&&(DIRPROP_FLAG(dirProps[k])&MASK_BN_EXPLICIT); k--);
2188
45.2k
            dirProp=dirProps[k];
2189
45.2k
            if(dirProp==LRI || dirProp==RLI)
2190
25.0k
                break;      /* no forced closing for sequence ending with LRI/RLI */
2191
20.1k
            gprop=eor;
2192
4.01M
        } else {
2193
4.01M
            DirProp prop, prop1;
2194
4.01M
            prop=dirProps[i];
2195
4.01M
            if(prop==B) {
2196
7.19k
                pBiDi->isolateCount=-1; /* current isolates stack entry == none */
2197
7.19k
            }
2198
4.01M
            if(inverseRTL) {
2199
0
                if(prop==AL) {
2200
                    /* AL before EN does not make it AN */
2201
0
                    prop=R;
2202
0
                } else if(prop==EN) {
2203
0
                    if(nextStrongPos<=i) {
2204
                        /* look for next strong char (L/R/AL) */
2205
0
                        int32_t j;
2206
0
                        nextStrongProp=R;   /* set default */
2207
0
                        nextStrongPos=limit;
2208
0
                        for(j=i+1; j<limit; j++) {
2209
0
                            prop1=dirProps[j];
2210
0
                            if(prop1==L || prop1==R || prop1==AL) {
2211
0
                                nextStrongProp=prop1;
2212
0
                                nextStrongPos=j;
2213
0
                                break;
2214
0
                            }
2215
0
                        }
2216
0
                    }
2217
0
                    if(nextStrongProp==AL) {
2218
0
                        prop=AN;
2219
0
                    }
2220
0
                }
2221
0
            }
2222
4.01M
            gprop=groupProp[prop];
2223
4.01M
        }
2224
4.03M
        oldStateImp=stateImp;
2225
4.03M
        cell=impTabProps[oldStateImp][gprop];
2226
4.03M
        stateImp=GET_STATEPROPS(cell);      /* isolate the new state */
2227
4.03M
        actionImp=GET_ACTIONPROPS(cell);    /* isolate the action */
2228
4.03M
        if((i==limit) && (actionImp==0)) {
2229
            /* there is an unprocessed sequence if its property == eor   */
2230
9.51k
            actionImp=1;                    /* process the last sequence */
2231
9.51k
        }
2232
4.03M
        if(actionImp) {
2233
1.16M
            resProp=impTabProps[oldStateImp][IMPTABPROPS_RES];
2234
1.16M
            switch(actionImp) {
2235
858k
            case 1:             /* process current seq1, init new seq1 */
2236
858k
                processPropertySeq(pBiDi, &levState, resProp, start1, i);
2237
858k
                start1=i;
2238
858k
                break;
2239
250k
            case 2:             /* init new seq2 */
2240
250k
                start2=i;
2241
250k
                break;
2242
17.1k
            case 3:             /* process seq1, process seq2, init new seq1 */
2243
17.1k
                processPropertySeq(pBiDi, &levState, resProp, start1, start2);
2244
17.1k
                processPropertySeq(pBiDi, &levState, DirProp_ON, start2, i);
2245
17.1k
                start1=i;
2246
17.1k
                break;
2247
41.5k
            case 4:             /* process seq1, set seq1=seq2, init new seq2 */
2248
41.5k
                processPropertySeq(pBiDi, &levState, resProp, start1, start2);
2249
41.5k
                start1=start2;
2250
41.5k
                start2=i;
2251
41.5k
                break;
2252
0
            default:            /* we should never get here */
2253
0
                UPRV_UNREACHABLE;
2254
1.16M
            }
2255
1.16M
        }
2256
4.03M
    }
2257
2258
    /* flush possible pending sequence, e.g. ON */
2259
45.2k
    if(limit==pBiDi->length && pBiDi->epiLength>0) {
2260
0
        DirProp firstStrong=firstL_R_AL_EN_AN(pBiDi);
2261
0
        if(firstStrong!=DirProp_ON) {
2262
0
            eor=firstStrong;
2263
0
        }
2264
0
    }
2265
2266
    /* look for the last char not a BN or LRE/RLE/LRO/RLO/PDF */
2267
299k
    for(i=limit-1; i>start&&(DIRPROP_FLAG(dirProps[i])&MASK_BN_EXPLICIT); i--);
2268
45.2k
    dirProp=dirProps[i];
2269
45.2k
    if((dirProp==LRI || dirProp==RLI) && limit<pBiDi->length) {
2270
24.9k
        pBiDi->isolateCount++;
2271
24.9k
        pBiDi->isolates[pBiDi->isolateCount].stateImp=stateImp;
2272
24.9k
        pBiDi->isolates[pBiDi->isolateCount].state=levState.state;
2273
24.9k
        pBiDi->isolates[pBiDi->isolateCount].start1=start1;
2274
24.9k
        pBiDi->isolates[pBiDi->isolateCount].startON=levState.startON;
2275
24.9k
    }
2276
20.2k
    else
2277
20.2k
        processPropertySeq(pBiDi, &levState, eor, limit, limit);
2278
45.2k
}
2279
2280
/* perform (L1) and (X9) ---------------------------------------------------- */
2281
2282
/*
2283
 * Reset the embedding levels for some non-graphic characters (L1).
2284
 * This function also sets appropriate levels for BN, and
2285
 * explicit embedding types that are supposed to have been removed
2286
 * from the paragraph in (X9).
2287
 */
2288
static void
2289
11.0k
adjustWSLevels(UBiDi *pBiDi) {
2290
11.0k
    const DirProp *dirProps=pBiDi->dirProps;
2291
11.0k
    UBiDiLevel *levels=pBiDi->levels;
2292
11.0k
    int32_t i;
2293
2294
11.0k
    if(pBiDi->flags&MASK_WS) {
2295
6.67k
        UBool orderParagraphsLTR=pBiDi->orderParagraphsLTR;
2296
6.67k
        Flags flag;
2297
2298
6.67k
        i=pBiDi->trailingWSStart;
2299
16.0k
        while(i>0) {
2300
            /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
2301
269k
            while(i>0 && (flag=DIRPROP_FLAG(dirProps[--i]))&MASK_WS) {
2302
260k
                if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
2303
0
                    levels[i]=0;
2304
260k
                } else {
2305
260k
                    levels[i]=GET_PARALEVEL(pBiDi, i);
2306
260k
                }
2307
260k
            }
2308
2309
            /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
2310
            /* here, i+1 is guaranteed to be <length */
2311
3.73M
            while(i>0) {
2312
3.72M
                flag=DIRPROP_FLAG(dirProps[--i]);
2313
3.72M
                if(flag&MASK_BN_EXPLICIT) {
2314
56.0k
                    levels[i]=levels[i+1];
2315
3.66M
                } else if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
2316
0
                    levels[i]=0;
2317
0
                    break;
2318
3.66M
                } else if(flag&MASK_B_S) {
2319
2.71k
                    levels[i]=GET_PARALEVEL(pBiDi, i);
2320
2.71k
                    break;
2321
2.71k
                }
2322
3.72M
            }
2323
9.34k
        }
2324
6.67k
    }
2325
11.0k
}
2326
2327
U_CAPI void U_EXPORT2
2328
ubidi_setContext(UBiDi *pBiDi,
2329
                 const UChar *prologue, int32_t proLength,
2330
                 const UChar *epilogue, int32_t epiLength,
2331
0
                 UErrorCode *pErrorCode) {
2332
    /* check the argument values */
2333
0
    RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2334
0
    if(pBiDi==NULL || proLength<-1 || epiLength<-1 ||
2335
0
       (prologue==NULL && proLength!=0) || (epilogue==NULL && epiLength!=0)) {
2336
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2337
0
        return;
2338
0
    }
2339
2340
0
    if(proLength==-1) {
2341
0
        pBiDi->proLength=u_strlen(prologue);
2342
0
    } else {
2343
0
        pBiDi->proLength=proLength;
2344
0
    }
2345
0
    if(epiLength==-1) {
2346
0
        pBiDi->epiLength=u_strlen(epilogue);
2347
0
    } else {
2348
0
        pBiDi->epiLength=epiLength;
2349
0
    }
2350
0
    pBiDi->prologue=prologue;
2351
0
    pBiDi->epilogue=epilogue;
2352
0
}
2353
2354
static void
2355
23.1k
setParaSuccess(UBiDi *pBiDi) {
2356
23.1k
    pBiDi->proLength=0;                 /* forget the last context */
2357
23.1k
    pBiDi->epiLength=0;
2358
23.1k
    pBiDi->pParaBiDi=pBiDi;             /* mark successful setPara */
2359
23.1k
}
2360
2361
0
#define BIDI_MIN(x, y)   ((x)<(y) ? (x) : (y))
2362
0
#define BIDI_ABS(x)      ((x)>=0  ? (x) : (-(x)))
2363
2364
static void
2365
setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length,
2366
0
                UBiDiLevel paraLevel, UErrorCode *pErrorCode) {
2367
0
    int32_t *runsOnlyMemory = NULL;
2368
0
    int32_t *visualMap;
2369
0
    UChar *visualText;
2370
0
    int32_t saveLength, saveTrailingWSStart;
2371
0
    const UBiDiLevel *levels;
2372
0
    UBiDiLevel *saveLevels;
2373
0
    UBiDiDirection saveDirection;
2374
0
    UBool saveMayAllocateText;
2375
0
    Run *runs;
2376
0
    int32_t visualLength, i, j, visualStart, logicalStart,
2377
0
            runCount, runLength, addedRuns, insertRemove,
2378
0
            start, limit, step, indexOddBit, logicalPos,
2379
0
            index0, index1;
2380
0
    uint32_t saveOptions;
2381
2382
0
    pBiDi->reorderingMode=UBIDI_REORDER_DEFAULT;
2383
0
    if(length==0) {
2384
0
        ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
2385
0
        goto cleanup3;
2386
0
    }
2387
    /* obtain memory for mapping table and visual text */
2388
0
    runsOnlyMemory=static_cast<int32_t *>(uprv_malloc(length*(sizeof(int32_t)+sizeof(UChar)+sizeof(UBiDiLevel))));
2389
0
    if(runsOnlyMemory==NULL) {
2390
0
        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2391
0
        goto cleanup3;
2392
0
    }
2393
0
    visualMap=runsOnlyMemory;
2394
0
    visualText=(UChar *)&visualMap[length];
2395
0
    saveLevels=(UBiDiLevel *)&visualText[length];
2396
0
    saveOptions=pBiDi->reorderingOptions;
2397
0
    if(saveOptions & UBIDI_OPTION_INSERT_MARKS) {
2398
0
        pBiDi->reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
2399
0
        pBiDi->reorderingOptions|=UBIDI_OPTION_REMOVE_CONTROLS;
2400
0
    }
2401
0
    paraLevel&=1;                       /* accept only 0 or 1 */
2402
0
    ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
2403
0
    if(U_FAILURE(*pErrorCode)) {
2404
0
        goto cleanup3;
2405
0
    }
2406
    /* we cannot access directly pBiDi->levels since it is not yet set if
2407
     * direction is not MIXED
2408
     */
2409
0
    levels=ubidi_getLevels(pBiDi, pErrorCode);
2410
0
    uprv_memcpy(saveLevels, levels, (size_t)pBiDi->length*sizeof(UBiDiLevel));
2411
0
    saveTrailingWSStart=pBiDi->trailingWSStart;
2412
0
    saveLength=pBiDi->length;
2413
0
    saveDirection=pBiDi->direction;
2414
2415
    /* FOOD FOR THOUGHT: instead of writing the visual text, we could use
2416
     * the visual map and the dirProps array to drive the second call
2417
     * to ubidi_setPara (but must make provision for possible removal of
2418
     * BiDi controls.  Alternatively, only use the dirProps array via
2419
     * customized classifier callback.
2420
     */
2421
0
    visualLength=ubidi_writeReordered(pBiDi, visualText, length,
2422
0
                                      UBIDI_DO_MIRRORING, pErrorCode);
2423
0
    ubidi_getVisualMap(pBiDi, visualMap, pErrorCode);
2424
0
    if(U_FAILURE(*pErrorCode)) {
2425
0
        goto cleanup2;
2426
0
    }
2427
0
    pBiDi->reorderingOptions=saveOptions;
2428
2429
0
    pBiDi->reorderingMode=UBIDI_REORDER_INVERSE_LIKE_DIRECT;
2430
0
    paraLevel^=1;
2431
    /* Because what we did with reorderingOptions, visualText may be shorter
2432
     * than the original text. But we don't want the levels memory to be
2433
     * reallocated shorter than the original length, since we need to restore
2434
     * the levels as after the first call to ubidi_setpara() before returning.
2435
     * We will force mayAllocateText to FALSE before the second call to
2436
     * ubidi_setpara(), and will restore it afterwards.
2437
     */
2438
0
    saveMayAllocateText=pBiDi->mayAllocateText;
2439
0
    pBiDi->mayAllocateText=FALSE;
2440
0
    ubidi_setPara(pBiDi, visualText, visualLength, paraLevel, NULL, pErrorCode);
2441
0
    pBiDi->mayAllocateText=saveMayAllocateText;
2442
0
    ubidi_getRuns(pBiDi, pErrorCode);
2443
0
    if(U_FAILURE(*pErrorCode)) {
2444
0
        goto cleanup1;
2445
0
    }
2446
    /* check if some runs must be split, count how many splits */
2447
0
    addedRuns=0;
2448
0
    runCount=pBiDi->runCount;
2449
0
    runs=pBiDi->runs;
2450
0
    visualStart=0;
2451
0
    for(i=0; i<runCount; i++, visualStart+=runLength) {
2452
0
        runLength=runs[i].visualLimit-visualStart;
2453
0
        if(runLength<2) {
2454
0
            continue;
2455
0
        }
2456
0
        logicalStart=GET_INDEX(runs[i].logicalStart);
2457
0
        for(j=logicalStart+1; j<logicalStart+runLength; j++) {
2458
0
            index0=visualMap[j];
2459
0
            index1=visualMap[j-1];
2460
0
            if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
2461
0
                addedRuns++;
2462
0
            }
2463
0
        }
2464
0
    }
2465
0
    if(addedRuns) {
2466
0
        if(getRunsMemory(pBiDi, runCount+addedRuns)) {
2467
0
            if(runCount==1) {
2468
                /* because we switch from UBiDi.simpleRuns to UBiDi.runs */
2469
0
                pBiDi->runsMemory[0]=runs[0];
2470
0
            }
2471
0
            runs=pBiDi->runs=pBiDi->runsMemory;
2472
0
            pBiDi->runCount+=addedRuns;
2473
0
        } else {
2474
0
            goto cleanup1;
2475
0
        }
2476
0
    }
2477
    /* split runs which are not consecutive in source text */
2478
0
    for(i=runCount-1; i>=0; i--) {
2479
0
        runLength= i==0 ? runs[0].visualLimit :
2480
0
                          runs[i].visualLimit-runs[i-1].visualLimit;
2481
0
        logicalStart=runs[i].logicalStart;
2482
0
        indexOddBit=GET_ODD_BIT(logicalStart);
2483
0
        logicalStart=GET_INDEX(logicalStart);
2484
0
        if(runLength<2) {
2485
0
            if(addedRuns) {
2486
0
                runs[i+addedRuns]=runs[i];
2487
0
            }
2488
0
            logicalPos=visualMap[logicalStart];
2489
0
            runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
2490
0
                                            saveLevels[logicalPos]^indexOddBit);
2491
0
            continue;
2492
0
        }
2493
0
        if(indexOddBit) {
2494
0
            start=logicalStart;
2495
0
            limit=logicalStart+runLength-1;
2496
0
            step=1;
2497
0
        } else {
2498
0
            start=logicalStart+runLength-1;
2499
0
            limit=logicalStart;
2500
0
            step=-1;
2501
0
        }
2502
0
        for(j=start; j!=limit; j+=step) {
2503
0
            index0=visualMap[j];
2504
0
            index1=visualMap[j+step];
2505
0
            if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
2506
0
                logicalPos=BIDI_MIN(visualMap[start], index0);
2507
0
                runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
2508
0
                                            saveLevels[logicalPos]^indexOddBit);
2509
0
                runs[i+addedRuns].visualLimit=runs[i].visualLimit;
2510
0
                runs[i].visualLimit-=BIDI_ABS(j-start)+1;
2511
0
                insertRemove=runs[i].insertRemove&(LRM_AFTER|RLM_AFTER);
2512
0
                runs[i+addedRuns].insertRemove=insertRemove;
2513
0
                runs[i].insertRemove&=~insertRemove;
2514
0
                start=j+step;
2515
0
                addedRuns--;
2516
0
            }
2517
0
        }
2518
0
        if(addedRuns) {
2519
0
            runs[i+addedRuns]=runs[i];
2520
0
        }
2521
0
        logicalPos=BIDI_MIN(visualMap[start], visualMap[limit]);
2522
0
        runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
2523
0
                                            saveLevels[logicalPos]^indexOddBit);
2524
0
    }
2525
2526
0
  cleanup1:
2527
    /* restore initial paraLevel */
2528
0
    pBiDi->paraLevel^=1;
2529
0
  cleanup2:
2530
    /* restore real text */
2531
0
    pBiDi->text=text;
2532
0
    pBiDi->length=saveLength;
2533
0
    pBiDi->originalLength=length;
2534
0
    pBiDi->direction=saveDirection;
2535
    /* the saved levels should never excess levelsSize, but we check anyway */
2536
0
    if(saveLength>pBiDi->levelsSize) {
2537
0
        saveLength=pBiDi->levelsSize;
2538
0
    }
2539
0
    uprv_memcpy(pBiDi->levels, saveLevels, (size_t)saveLength*sizeof(UBiDiLevel));
2540
0
    pBiDi->trailingWSStart=saveTrailingWSStart;
2541
0
    if(pBiDi->runCount>1) {
2542
0
        pBiDi->direction=UBIDI_MIXED;
2543
0
    }
2544
0
  cleanup3:
2545
    /* free memory for mapping table and visual text */
2546
0
    uprv_free(runsOnlyMemory);
2547
2548
0
    pBiDi->reorderingMode=UBIDI_REORDER_RUNS_ONLY;
2549
0
}
2550
2551
/* ubidi_setPara ------------------------------------------------------------ */
2552
2553
U_CAPI void U_EXPORT2
2554
ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
2555
              UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
2556
23.1k
              UErrorCode *pErrorCode) {
2557
23.1k
    UBiDiDirection direction;
2558
23.1k
    DirProp *dirProps;
2559
2560
    /* check the argument values */
2561
23.1k
    RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2562
23.1k
    if(pBiDi==NULL || text==NULL || length<-1 ||
2563
23.1k
       (paraLevel>UBIDI_MAX_EXPLICIT_LEVEL && paraLevel<UBIDI_DEFAULT_LTR)) {
2564
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2565
0
        return;
2566
0
    }
2567
2568
23.1k
    if(length==-1) {
2569
0
        length=u_strlen(text);
2570
0
    }
2571
2572
    /* special treatment for RUNS_ONLY mode */
2573
23.1k
    if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) {
2574
0
        setParaRunsOnly(pBiDi, text, length, paraLevel, pErrorCode);
2575
0
        return;
2576
0
    }
2577
2578
    /* initialize the UBiDi structure */
2579
23.1k
    pBiDi->pParaBiDi=NULL;          /* mark unfinished setPara */
2580
23.1k
    pBiDi->text=text;
2581
23.1k
    pBiDi->length=pBiDi->originalLength=pBiDi->resultLength=length;
2582
23.1k
    pBiDi->paraLevel=paraLevel;
2583
23.1k
    pBiDi->direction=(UBiDiDirection)(paraLevel&1);
2584
23.1k
    pBiDi->paraCount=1;
2585
2586
23.1k
    pBiDi->dirProps=NULL;
2587
23.1k
    pBiDi->levels=NULL;
2588
23.1k
    pBiDi->runs=NULL;
2589
23.1k
    pBiDi->insertPoints.size=0;         /* clean up from last call */
2590
23.1k
    pBiDi->insertPoints.confirmed=0;    /* clean up from last call */
2591
2592
    /*
2593
     * Save the original paraLevel if contextual; otherwise, set to 0.
2594
     */
2595
23.1k
    pBiDi->defaultParaLevel=IS_DEFAULT_LEVEL(paraLevel);
2596
2597
23.1k
    if(length==0) {
2598
        /*
2599
         * For an empty paragraph, create a UBiDi object with the paraLevel and
2600
         * the flags and the direction set but without allocating zero-length arrays.
2601
         * There is nothing more to do.
2602
         */
2603
4.09k
        if(IS_DEFAULT_LEVEL(paraLevel)) {
2604
0
            pBiDi->paraLevel&=1;
2605
0
            pBiDi->defaultParaLevel=0;
2606
0
        }
2607
4.09k
        pBiDi->flags=DIRPROP_FLAG_LR(paraLevel);
2608
4.09k
        pBiDi->runCount=0;
2609
4.09k
        pBiDi->paraCount=0;
2610
4.09k
        setParaSuccess(pBiDi);          /* mark successful setPara */
2611
4.09k
        return;
2612
4.09k
    }
2613
2614
19.0k
    pBiDi->runCount=-1;
2615
2616
    /* allocate paras memory */
2617
19.0k
    if(pBiDi->parasMemory)
2618
0
        pBiDi->paras=pBiDi->parasMemory;
2619
19.0k
    else
2620
19.0k
        pBiDi->paras=pBiDi->simpleParas;
2621
2622
    /*
2623
     * Get the directional properties,
2624
     * the flags bit-set, and
2625
     * determine the paragraph level if necessary.
2626
     */
2627
19.0k
    if(getDirPropsMemory(pBiDi, length)) {
2628
19.0k
        pBiDi->dirProps=pBiDi->dirPropsMemory;
2629
19.0k
        if(!getDirProps(pBiDi)) {
2630
0
            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2631
0
            return;
2632
0
        }
2633
0
    } else {
2634
0
        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2635
0
        return;
2636
0
    }
2637
19.0k
    dirProps=pBiDi->dirProps;
2638
    /* the processed length may have changed if UBIDI_OPTION_STREAMING */
2639
19.0k
    length= pBiDi->length;
2640
19.0k
    pBiDi->trailingWSStart=length;  /* the levels[] will reflect the WS run */
2641
2642
    /* are explicit levels specified? */
2643
19.0k
    if(embeddingLevels==NULL) {
2644
        /* no: determine explicit levels according to the (Xn) rules */\
2645
19.0k
        if(getLevelsMemory(pBiDi, length)) {
2646
19.0k
            pBiDi->levels=pBiDi->levelsMemory;
2647
19.0k
            direction=resolveExplicitLevels(pBiDi, pErrorCode);
2648
19.0k
            if(U_FAILURE(*pErrorCode)) {
2649
0
                return;
2650
0
            }
2651
0
        } else {
2652
0
            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2653
0
            return;
2654
0
        }
2655
0
    } else {
2656
        /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */
2657
0
        pBiDi->levels=embeddingLevels;
2658
0
        direction=checkExplicitLevels(pBiDi, pErrorCode);
2659
0
        if(U_FAILURE(*pErrorCode)) {
2660
0
            return;
2661
0
        }
2662
19.0k
    }
2663
2664
    /* allocate isolate memory */
2665
19.0k
    if(pBiDi->isolateCount<=SIMPLE_ISOLATES_COUNT)
2666
18.4k
        pBiDi->isolates=pBiDi->simpleIsolates;
2667
635
    else
2668
635
        if((int32_t)(pBiDi->isolateCount*sizeof(Isolate))<=pBiDi->isolatesSize)
2669
0
            pBiDi->isolates=pBiDi->isolatesMemory;
2670
635
        else {
2671
635
            if(getInitialIsolatesMemory(pBiDi, pBiDi->isolateCount)) {
2672
635
                pBiDi->isolates=pBiDi->isolatesMemory;
2673
0
            } else {
2674
0
                *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2675
0
                return;
2676
0
            }
2677
19.0k
        }
2678
19.0k
    pBiDi->isolateCount=-1;             /* current isolates stack entry == none */
2679
2680
    /*
2681
     * The steps after (X9) in the UBiDi algorithm are performed only if
2682
     * the paragraph text has mixed directionality!
2683
     */
2684
19.0k
    pBiDi->direction=direction;
2685
19.0k
    switch(direction) {
2686
7.51k
    case UBIDI_LTR:
2687
        /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
2688
7.51k
        pBiDi->trailingWSStart=0;
2689
7.51k
        break;
2690
560
    case UBIDI_RTL:
2691
        /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
2692
560
        pBiDi->trailingWSStart=0;
2693
560
        break;
2694
11.0k
    default:
2695
        /*
2696
         *  Choose the right implicit state table
2697
         */
2698
11.0k
        switch(pBiDi->reorderingMode) {
2699
11.0k
        case UBIDI_REORDER_DEFAULT:
2700
11.0k
            pBiDi->pImpTabPair=&impTab_DEFAULT;
2701
11.0k
            break;
2702
0
        case UBIDI_REORDER_NUMBERS_SPECIAL:
2703
0
            pBiDi->pImpTabPair=&impTab_NUMBERS_SPECIAL;
2704
0
            break;
2705
0
        case UBIDI_REORDER_GROUP_NUMBERS_WITH_R:
2706
0
            pBiDi->pImpTabPair=&impTab_GROUP_NUMBERS_WITH_R;
2707
0
            break;
2708
0
        case UBIDI_REORDER_INVERSE_NUMBERS_AS_L:
2709
0
            pBiDi->pImpTabPair=&impTab_INVERSE_NUMBERS_AS_L;
2710
0
            break;
2711
0
        case UBIDI_REORDER_INVERSE_LIKE_DIRECT:
2712
0
            if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
2713
0
                pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS;
2714
0
            } else {
2715
0
                pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT;
2716
0
            }
2717
0
            break;
2718
0
        case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL:
2719
0
            if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
2720
0
                pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS;
2721
0
            } else {
2722
0
                pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL;
2723
0
            }
2724
0
            break;
2725
0
        default:
2726
            /* we should never get here */
2727
0
            UPRV_UNREACHABLE;
2728
11.0k
        }
2729
        /*
2730
         * If there are no external levels specified and there
2731
         * are no significant explicit level codes in the text,
2732
         * then we can treat the entire paragraph as one run.
2733
         * Otherwise, we need to perform the following rules on runs of
2734
         * the text with the same embedding levels. (X10)
2735
         * "Significant" explicit level codes are ones that actually
2736
         * affect non-BN characters.
2737
         * Examples for "insignificant" ones are empty embeddings
2738
         * LRE-PDF, LRE-RLE-PDF-PDF, etc.
2739
         */
2740
11.0k
        if(embeddingLevels==NULL && pBiDi->paraCount<=1 &&
2741
10.6k
                                   !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) {
2742
9.06k
            resolveImplicitLevels(pBiDi, 0, length,
2743
9.06k
                                    GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, 0)),
2744
9.06k
                                    GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, length-1)));
2745
1.95k
        } else {
2746
            /* sor, eor: start and end types of same-level-run */
2747
1.95k
            UBiDiLevel *levels=pBiDi->levels;
2748
1.95k
            int32_t start, limit=0;
2749
1.95k
            UBiDiLevel level, nextLevel;
2750
1.95k
            DirProp sor, eor;
2751
2752
            /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
2753
1.95k
            level=GET_PARALEVEL(pBiDi, 0);
2754
1.95k
            nextLevel=levels[0];
2755
1.95k
            if(level<nextLevel) {
2756
0
                eor=GET_LR_FROM_LEVEL(nextLevel);
2757
1.95k
            } else {
2758
1.95k
                eor=GET_LR_FROM_LEVEL(level);
2759
1.95k
            }
2760
2761
42.7k
            do {
2762
                /* determine start and limit of the run (end points just behind the run) */
2763
2764
                /* the values for this run's start are the same as for the previous run's end */
2765
42.7k
                start=limit;
2766
42.7k
                level=nextLevel;
2767
42.7k
                if((start>0) && (dirProps[start-1]==B)) {
2768
                    /* except if this is a new paragraph, then set sor = para level */
2769
0
                    sor=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, start));
2770
42.7k
                } else {
2771
42.7k
                    sor=eor;
2772
42.7k
                }
2773
2774
                /* search for the limit of this run */
2775
2.55M
                while((++limit<length) &&
2776
2.55M
                      ((levels[limit]==level) ||
2777
2.51M
                       (DIRPROP_FLAG(dirProps[limit])&MASK_BN_EXPLICIT))) {}
2778
2779
                /* get the correct level of the next run */
2780
42.7k
                if(limit<length) {
2781
40.7k
                    nextLevel=levels[limit];
2782
1.95k
                } else {
2783
1.95k
                    nextLevel=GET_PARALEVEL(pBiDi, length-1);
2784
1.95k
                }
2785
2786
                /* determine eor from max(level, nextLevel); sor is last run's eor */
2787
42.7k
                if(NO_OVERRIDE(level)<NO_OVERRIDE(nextLevel)) {
2788
29.1k
                    eor=GET_LR_FROM_LEVEL(nextLevel);
2789
13.5k
                } else {
2790
13.5k
                    eor=GET_LR_FROM_LEVEL(level);
2791
13.5k
                }
2792
2793
                /* if the run consists of overridden directional types, then there
2794
                   are no implicit types to be resolved */
2795
42.7k
                if(!(level&UBIDI_LEVEL_OVERRIDE)) {
2796
36.1k
                    resolveImplicitLevels(pBiDi, start, limit, sor, eor);
2797
6.60k
                } else {
2798
                    /* remove the UBIDI_LEVEL_OVERRIDE flags */
2799
288k
                    do {
2800
288k
                        levels[start++]&=~UBIDI_LEVEL_OVERRIDE;
2801
288k
                    } while(start<limit);
2802
6.60k
                }
2803
42.7k
            } while(limit<length);
2804
1.95k
        }
2805
        /* check if we got any memory shortage while adding insert points */
2806
11.0k
        if (U_FAILURE(pBiDi->insertPoints.errorCode))
2807
0
        {
2808
0
            *pErrorCode=pBiDi->insertPoints.errorCode;
2809
0
            return;
2810
0
        }
2811
        /* reset the embedding levels for some non-graphic characters (L1), (X9) */
2812
11.0k
        adjustWSLevels(pBiDi);
2813
11.0k
        break;
2814
19.0k
    }
2815
    /* add RLM for inverse Bidi with contextual orientation resolving
2816
     * to RTL which would not round-trip otherwise
2817
     */
2818
19.0k
    if((pBiDi->defaultParaLevel>0) &&
2819
0
       (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) &&
2820
0
       ((pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT) ||
2821
0
        (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) {
2822
0
        int32_t i, j, start, last;
2823
0
        UBiDiLevel level;
2824
0
        DirProp dirProp;
2825
0
        for(i=0; i<pBiDi->paraCount; i++) {
2826
0
            last=(pBiDi->paras[i].limit)-1;
2827
0
            level= static_cast<UBiDiLevel>(pBiDi->paras[i].level);
2828
0
            if(level==0)
2829
0
                continue;           /* LTR paragraph */
2830
0
            start= i==0 ? 0 : pBiDi->paras[i-1].limit;
2831
0
            for(j=last; j>=start; j--) {
2832
0
                dirProp=dirProps[j];
2833
0
                if(dirProp==L) {
2834
0
                    if(j<last) {
2835
0
                        while(dirProps[last]==B) {
2836
0
                            last--;
2837
0
                        }
2838
0
                    }
2839
0
                    addPoint(pBiDi, last, RLM_BEFORE);
2840
0
                    break;
2841
0
                }
2842
0
                if(DIRPROP_FLAG(dirProp) & MASK_R_AL) {
2843
0
                    break;
2844
0
                }
2845
0
            }
2846
0
        }
2847
0
    }
2848
2849
19.0k
    if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
2850
0
        pBiDi->resultLength -= pBiDi->controlCount;
2851
19.0k
    } else {
2852
19.0k
        pBiDi->resultLength += pBiDi->insertPoints.size;
2853
19.0k
    }
2854
19.0k
    setParaSuccess(pBiDi);              /* mark successful setPara */
2855
19.0k
}
2856
2857
U_CAPI void U_EXPORT2
2858
0
ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR) {
2859
0
    if(pBiDi!=NULL) {
2860
0
        pBiDi->orderParagraphsLTR=orderParagraphsLTR;
2861
0
    }
2862
0
}
2863
2864
U_CAPI UBool U_EXPORT2
2865
0
ubidi_isOrderParagraphsLTR(UBiDi *pBiDi) {
2866
0
    if(pBiDi!=NULL) {
2867
0
        return pBiDi->orderParagraphsLTR;
2868
0
    } else {
2869
0
        return FALSE;
2870
0
    }
2871
0
}
2872
2873
U_CAPI UBiDiDirection U_EXPORT2
2874
0
ubidi_getDirection(const UBiDi *pBiDi) {
2875
0
    if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2876
0
        return pBiDi->direction;
2877
0
    } else {
2878
0
        return UBIDI_LTR;
2879
0
    }
2880
0
}
2881
2882
U_CAPI const UChar * U_EXPORT2
2883
0
ubidi_getText(const UBiDi *pBiDi) {
2884
0
    if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2885
0
        return pBiDi->text;
2886
0
    } else {
2887
0
        return NULL;
2888
0
    }
2889
0
}
2890
2891
U_CAPI int32_t U_EXPORT2
2892
391k
ubidi_getLength(const UBiDi *pBiDi) {
2893
391k
    if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2894
391k
        return pBiDi->originalLength;
2895
0
    } else {
2896
0
        return 0;
2897
0
    }
2898
391k
}
2899
2900
U_CAPI int32_t U_EXPORT2
2901
0
ubidi_getProcessedLength(const UBiDi *pBiDi) {
2902
0
    if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2903
0
        return pBiDi->length;
2904
0
    } else {
2905
0
        return 0;
2906
0
    }
2907
0
}
2908
2909
U_CAPI int32_t U_EXPORT2
2910
0
ubidi_getResultLength(const UBiDi *pBiDi) {
2911
0
    if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2912
0
        return pBiDi->resultLength;
2913
0
    } else {
2914
0
        return 0;
2915
0
    }
2916
0
}
2917
2918
/* paragraphs API functions ------------------------------------------------- */
2919
2920
U_CAPI UBiDiLevel U_EXPORT2
2921
0
ubidi_getParaLevel(const UBiDi *pBiDi) {
2922
0
    if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2923
0
        return pBiDi->paraLevel;
2924
0
    } else {
2925
0
        return 0;
2926
0
    }
2927
0
}
2928
2929
U_CAPI int32_t U_EXPORT2
2930
0
ubidi_countParagraphs(UBiDi *pBiDi) {
2931
0
    if(!IS_VALID_PARA_OR_LINE(pBiDi)) {
2932
0
        return 0;
2933
0
    } else {
2934
0
        return pBiDi->paraCount;
2935
0
    }
2936
0
}
2937
2938
U_CAPI void U_EXPORT2
2939
ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex,
2940
                          int32_t *pParaStart, int32_t *pParaLimit,
2941
0
                          UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
2942
0
    int32_t paraStart;
2943
2944
    /* check the argument values */
2945
0
    RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2946
0
    RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode);
2947
0
    RETURN_VOID_IF_BAD_RANGE(paraIndex, 0, pBiDi->paraCount, *pErrorCode);
2948
2949
0
    pBiDi=pBiDi->pParaBiDi;             /* get Para object if Line object */
2950
0
    if(paraIndex) {
2951
0
        paraStart=pBiDi->paras[paraIndex-1].limit;
2952
0
    } else {
2953
0
        paraStart=0;
2954
0
    }
2955
0
    if(pParaStart!=NULL) {
2956
0
        *pParaStart=paraStart;
2957
0
    }
2958
0
    if(pParaLimit!=NULL) {
2959
0
        *pParaLimit=pBiDi->paras[paraIndex].limit;
2960
0
    }
2961
0
    if(pParaLevel!=NULL) {
2962
0
        *pParaLevel=GET_PARALEVEL(pBiDi, paraStart);
2963
0
    }
2964
0
}
2965
2966
U_CAPI int32_t U_EXPORT2
2967
ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex,
2968
                          int32_t *pParaStart, int32_t *pParaLimit,
2969
0
                          UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
2970
0
    int32_t paraIndex;
2971
2972
    /* check the argument values */
2973
    /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */
2974
0
    RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
2975
0
    RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
2976
0
    pBiDi=pBiDi->pParaBiDi;             /* get Para object if Line object */
2977
0
    RETURN_IF_BAD_RANGE(charIndex, 0, pBiDi->length, *pErrorCode, -1);
2978
2979
0
    for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex].limit; paraIndex++);
2980
0
    ubidi_getParagraphByIndex(pBiDi, paraIndex, pParaStart, pParaLimit, pParaLevel, pErrorCode);
2981
0
    return paraIndex;
2982
0
}
2983
2984
U_CAPI void U_EXPORT2
2985
ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn,
2986
                       const void *newContext, UBiDiClassCallback **oldFn,
2987
                       const void **oldContext, UErrorCode *pErrorCode)
2988
0
{
2989
0
    RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2990
0
    if(pBiDi==NULL) {
2991
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2992
0
        return;
2993
0
    }
2994
0
    if( oldFn )
2995
0
    {
2996
0
        *oldFn = pBiDi->fnClassCallback;
2997
0
    }
2998
0
    if( oldContext )
2999
0
    {
3000
0
        *oldContext = pBiDi->coClassCallback;
3001
0
    }
3002
0
    pBiDi->fnClassCallback = newFn;
3003
0
    pBiDi->coClassCallback = newContext;
3004
0
}
3005
3006
U_CAPI void U_EXPORT2
3007
ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context)
3008
0
{
3009
0
    if(pBiDi==NULL) {
3010
0
        return;
3011
0
    }
3012
0
    if( fn )
3013
0
    {
3014
0
        *fn = pBiDi->fnClassCallback;
3015
0
    }
3016
0
    if( context )
3017
0
    {
3018
0
        *context = pBiDi->coClassCallback;
3019
0
    }
3020
0
}
3021
3022
U_CAPI UCharDirection U_EXPORT2
3023
ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c)
3024
5.57M
{
3025
5.57M
    UCharDirection dir;
3026
3027
5.57M
    if( pBiDi->fnClassCallback == NULL ||
3028
0
        (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT )
3029
5.57M
    {
3030
5.57M
        dir = ubidi_getClass(c);
3031
5.57M
    }
3032
5.57M
    if(dir >= U_CHAR_DIRECTION_COUNT) {
3033
0
        dir = (UCharDirection)ON;
3034
0
    }
3035
5.57M
    return dir;
3036
5.57M
}