Coverage Report

Created: 2026-06-07 06:21

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/icu/icu4c/source/common/ubidi.cpp
Line
Count
Source
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
******************************************************************************
5
*
6
*   Copyright (C) 1999-2015, International Business Machines
7
*   Corporation and others.  All Rights Reserved.
8
*
9
******************************************************************************
10
*   file name:  ubidi.c
11
*   encoding:   UTF-8
12
*   tab size:   8 (not used)
13
*   indentation:4
14
*
15
*   created on: 1999jul27
16
*   created by: Markus W. Scherer, updated by Matitiahu Allouche
17
*
18
*/
19
20
#include <limits>
21
22
#include "cmemory.h"
23
#include "unicode/utypes.h"
24
#include "unicode/ustring.h"
25
#include "unicode/uchar.h"
26
#include "unicode/ubidi.h"
27
#include "unicode/utf16.h"
28
#include "ubidi_props.h"
29
#include "ubidiimp.h"
30
#include "uassert.h"
31
32
/*
33
 * General implementation notes:
34
 *
35
 * Throughout the implementation, there are comments like (W2) that refer to
36
 * rules of the BiDi algorithm, in this example to the second rule of the
37
 * resolution of weak types.
38
 *
39
 * For handling surrogate pairs, where two char16_t's form one "abstract" (or UTF-32)
40
 * character according to UTF-16, the second char16_t gets the directional property of
41
 * the entire character assigned, while the first one gets a BN, a boundary
42
 * neutral, type, which is ignored by most of the algorithm according to
43
 * rule (X9) and the implementation suggestions of the BiDi algorithm.
44
 *
45
 * Later, adjustWSLevels() will set the level for each BN to that of the
46
 * following character (char16_t), which results in surrogate pairs getting the
47
 * same level on each of their surrogates.
48
 *
49
 * In a UTF-8 implementation, the same thing could be done: the last byte of
50
 * a multi-byte sequence would get the "real" property, while all previous
51
 * bytes of that sequence would get BN.
52
 *
53
 * It is not possible to assign all those parts of a character the same real
54
 * property because this would fail in the resolution of weak types with rules
55
 * that look at immediately surrounding types.
56
 *
57
 * As a related topic, this implementation does not remove Boundary Neutral
58
 * types from the input, but ignores them wherever this is relevant.
59
 * For example, the loop for the resolution of the weak types reads
60
 * types until it finds a non-BN.
61
 * Also, explicit embedding codes are neither changed into BN nor removed.
62
 * They are only treated the same way real BNs are.
63
 * As stated before, adjustWSLevels() takes care of them at the end.
64
 * For the purpose of conformance, the levels of all these codes
65
 * do not matter.
66
 *
67
 * Note that this implementation modifies the dirProps
68
 * after the initial setup, when applying X5c (replace FSI by LRI or RLI),
69
 * X6, N0 (replace paired brackets by L or R).
70
 *
71
 * In this implementation, the resolution of weak types (W1 to W6),
72
 * neutrals (N1 and N2), and the assignment of the resolved level (In)
73
 * are all done in one single loop, in resolveImplicitLevels().
74
 * Changes of dirProp values are done on the fly, without writing
75
 * them back to the dirProps array.
76
 *
77
 *
78
 * This implementation contains code that allows to bypass steps of the
79
 * algorithm that are not needed on the specific paragraph
80
 * in order to speed up the most common cases considerably,
81
 * like text that is entirely LTR, or RTL text without numbers.
82
 *
83
 * Most of this is done by setting a bit for each directional property
84
 * in a flags variable and later checking for whether there are
85
 * any LTR characters or any RTL characters, or both, whether
86
 * there are any explicit embedding codes, etc.
87
 *
88
 * If the (Xn) steps are performed, then the flags are re-evaluated,
89
 * because they will then not contain the embedding codes any more
90
 * and will be adjusted for override codes, so that subsequently
91
 * more bypassing may be possible than what the initial flags suggested.
92
 *
93
 * If the text is not mixed-directional, then the
94
 * algorithm steps for the weak type resolution are not performed,
95
 * and all levels are set to the paragraph level.
96
 *
97
 * If there are no explicit embedding codes, then the (Xn) steps
98
 * are not performed.
99
 *
100
 * If embedding levels are supplied as a parameter, then all
101
 * explicit embedding codes are ignored, and the (Xn) steps
102
 * are not performed.
103
 *
104
 * White Space types could get the level of the run they belong to,
105
 * and are checked with a test of (flags&MASK_EMBEDDING) to
106
 * consider if the paragraph direction should be considered in
107
 * the flags variable.
108
 *
109
 * If there are no White Space types in the paragraph, then
110
 * (L1) is not necessary in adjustWSLevels().
111
 */
112
113
/* to avoid some conditional statements, use tiny constant arrays */
114
static const Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) };
115
static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) };
116
static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) };
117
118
29.1k
#define DIRPROP_FLAG_LR(level) flagLR[(level)&1]
119
1.51k
#define DIRPROP_FLAG_E(level)  flagE[(level)&1]
120
550
#define DIRPROP_FLAG_O(level)  flagO[(level)&1]
121
122
9.29k
#define DIR_FROM_STRONG(strong) ((strong)==L ? L : R)
123
124
105k
#define NO_OVERRIDE(level)  ((level)&~UBIDI_LEVEL_OVERRIDE)
125
126
/* UBiDi object management -------------------------------------------------- */
127
128
U_CAPI UBiDi * U_EXPORT2
129
ubidi_open()
130
2.89k
{
131
2.89k
    UErrorCode errorCode=U_ZERO_ERROR;
132
2.89k
    return ubidi_openSized(0, 0, &errorCode);
133
2.89k
}
134
135
U_CAPI UBiDi * U_EXPORT2
136
2.89k
ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) {
137
2.89k
    UBiDi *pBiDi;
138
139
    /* check the argument values */
140
2.89k
    if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
141
0
        return nullptr;
142
2.89k
    } else if(maxLength<0 || maxRunCount<0) {
143
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
144
0
        return nullptr;    /* invalid arguments */
145
0
    }
146
147
    /* allocate memory for the object */
148
2.89k
    pBiDi=(UBiDi *)uprv_malloc(sizeof(UBiDi));
149
2.89k
    if(pBiDi==nullptr) {
150
0
        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
151
0
        return nullptr;
152
0
    }
153
154
    /* reset the object, all pointers nullptr, all flags false, all sizes 0 */
155
2.89k
    uprv_memset(pBiDi, 0, sizeof(UBiDi));
156
157
    /* allocate memory for arrays as requested */
158
2.89k
    if(maxLength>0) {
159
0
        if( !getInitialDirPropsMemory(pBiDi, maxLength) ||
160
0
            !getInitialLevelsMemory(pBiDi, maxLength)
161
0
        ) {
162
0
            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
163
0
        }
164
2.89k
    } else {
165
2.89k
        pBiDi->mayAllocateText=true;
166
2.89k
    }
167
168
2.89k
    if(maxRunCount>0) {
169
0
        if(maxRunCount==1) {
170
            /* use simpleRuns[] */
171
0
            pBiDi->runsSize=sizeof(Run);
172
0
        } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) {
173
0
            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
174
0
        }
175
2.89k
    } else {
176
2.89k
        pBiDi->mayAllocateRuns=true;
177
2.89k
    }
178
179
2.89k
    if(U_SUCCESS(*pErrorCode)) {
180
2.89k
        return pBiDi;
181
2.89k
    } else {
182
0
        ubidi_close(pBiDi);
183
0
        return nullptr;
184
0
    }
185
2.89k
}
186
187
/*
188
 * We are allowed to allocate memory if memory==nullptr or
189
 * mayAllocate==true for each array that we need.
190
 * We also try to grow memory as needed if we
191
 * allocate it.
192
 *
193
 * Assume sizeNeeded>0.
194
 * If *pMemory!=nullptr, then assume *pSize>0.
195
 *
196
 * ### this realloc() may unnecessarily copy the old data,
197
 * which we know we don't need any more;
198
 * is this the best way to do this??
199
 */
200
U_CFUNC UBool
201
12.6k
ubidi_getMemory(BidiMemoryForAllocation *bidiMem, int32_t *pSize, UBool mayAllocate, size_t sizeNeeded) {
202
12.6k
    if (sizeNeeded > std::numeric_limits<int32_t>::max()) {
203
      // TODO(egg): Ugly guard for ICU-23397.  A cleaner fix would be to change the pSize to size_t*
204
      // and update callers throughout to use size_t for sizes (just like uprv_malloc already does).
205
      // Not that anyone should be running the UBA on text with that many runs…
206
0
      return false;
207
0
    }
208
12.6k
    void **pMemory = (void **)bidiMem;
209
    /* check for existing memory */
210
12.6k
    if(*pMemory==nullptr) {
211
        /* we need to allocate memory */
212
7.51k
        if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=nullptr) {
213
7.51k
            *pSize=static_cast<int32_t>(sizeNeeded);
214
7.51k
            return true;
215
7.51k
        } else {
216
0
            return false;
217
0
        }
218
7.51k
    } else {
219
5.17k
        if(sizeNeeded<=static_cast<size_t>(*pSize)) {
220
            /* there is already enough memory */
221
0
            return true;
222
0
        }
223
5.17k
        else if(!mayAllocate) {
224
            /* not enough memory, and we must not allocate */
225
0
            return false;
226
5.17k
        } else {
227
            /* we try to grow */
228
5.17k
            void *memory;
229
            /* in most cases, we do not need the copy-old-data part of
230
             * realloc, but it is needed when adding runs using getRunsMemory()
231
             * in setParaRunsOnly()
232
             */
233
5.17k
            if((memory=uprv_realloc(*pMemory, sizeNeeded))!=nullptr) {
234
5.17k
                *pMemory=memory;
235
5.17k
                *pSize=static_cast<int32_t>(sizeNeeded);
236
5.17k
                return true;
237
5.17k
            } else {
238
                /* we failed to grow */
239
0
                return false;
240
0
            }
241
5.17k
        }
242
5.17k
    }
243
12.6k
}
244
245
U_CAPI void U_EXPORT2
246
2.89k
ubidi_close(UBiDi *pBiDi) {
247
2.89k
    if(pBiDi!=nullptr) {
248
2.89k
        pBiDi->pParaBiDi=nullptr;          /* in case one tries to reuse this block */
249
2.89k
        if(pBiDi->dirPropsMemory!=nullptr) {
250
2.84k
            uprv_free(pBiDi->dirPropsMemory);
251
2.84k
        }
252
2.89k
        if(pBiDi->levelsMemory!=nullptr) {
253
2.84k
            uprv_free(pBiDi->levelsMemory);
254
2.84k
        }
255
2.89k
        if(pBiDi->openingsMemory!=nullptr) {
256
117
            uprv_free(pBiDi->openingsMemory);
257
117
        }
258
2.89k
        if(pBiDi->parasMemory!=nullptr) {
259
159
            uprv_free(pBiDi->parasMemory);
260
159
        }
261
2.89k
        if(pBiDi->runsMemory!=nullptr) {
262
1.32k
            uprv_free(pBiDi->runsMemory);
263
1.32k
        }
264
2.89k
        if(pBiDi->isolatesMemory!=nullptr) {
265
231
            uprv_free(pBiDi->isolatesMemory);
266
231
        }
267
2.89k
        if(pBiDi->insertPoints.points!=nullptr) {
268
0
            uprv_free(pBiDi->insertPoints.points);
269
0
        }
270
271
2.89k
        uprv_free(pBiDi);
272
2.89k
    }
273
2.89k
}
274
275
/* set to approximate "inverse BiDi" ---------------------------------------- */
276
277
U_CAPI void U_EXPORT2
278
1.69k
ubidi_setInverse(UBiDi *pBiDi, UBool isInverse) {
279
1.69k
    if(pBiDi!=nullptr) {
280
1.69k
        pBiDi->isInverse=isInverse;
281
1.69k
        pBiDi->reorderingMode = isInverse ? UBIDI_REORDER_INVERSE_NUMBERS_AS_L
282
1.69k
                                          : UBIDI_REORDER_DEFAULT;
283
1.69k
    }
284
1.69k
}
285
286
U_CAPI UBool U_EXPORT2
287
0
ubidi_isInverse(UBiDi *pBiDi) {
288
0
    if(pBiDi!=nullptr) {
289
0
        return pBiDi->isInverse;
290
0
    } else {
291
0
        return false;
292
0
    }
293
0
}
294
295
/* FOOD FOR THOUGHT: currently the reordering modes are a mixture of
296
 * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre
297
 * concept of RUNS_ONLY which is a double operation.
298
 * It could be advantageous to divide this into 3 concepts:
299
 * a) Operation: direct / inverse / RUNS_ONLY
300
 * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R
301
 * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL
302
 * This would allow combinations not possible today like RUNS_ONLY with
303
 * NUMBERS_SPECIAL.
304
 * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and
305
 * REMOVE_CONTROLS for the inverse step.
306
 * Not all combinations would be supported, and probably not all do make sense.
307
 * This would need to document which ones are supported and what are the
308
 * fallbacks for unsupported combinations.
309
 */
310
U_CAPI void U_EXPORT2
311
0
ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode) UPRV_NO_SANITIZE_UNDEFINED {
312
0
    if ((pBiDi!=nullptr) && (reorderingMode >= UBIDI_REORDER_DEFAULT)
313
0
                        && (reorderingMode < UBIDI_REORDER_COUNT)) {
314
0
        pBiDi->reorderingMode = reorderingMode;
315
0
        pBiDi->isInverse = reorderingMode == UBIDI_REORDER_INVERSE_NUMBERS_AS_L;
316
0
    }
317
0
}
318
319
U_CAPI UBiDiReorderingMode U_EXPORT2
320
0
ubidi_getReorderingMode(UBiDi *pBiDi) {
321
0
    if (pBiDi!=nullptr) {
322
0
        return pBiDi->reorderingMode;
323
0
    } else {
324
0
        return UBIDI_REORDER_DEFAULT;
325
0
    }
326
0
}
327
328
U_CAPI void U_EXPORT2
329
0
ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions) {
330
0
    if (reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
331
0
        reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
332
0
    }
333
0
    if (pBiDi!=nullptr) {
334
0
        pBiDi->reorderingOptions=reorderingOptions;
335
0
    }
336
0
}
337
338
U_CAPI uint32_t U_EXPORT2
339
0
ubidi_getReorderingOptions(UBiDi *pBiDi) {
340
0
    if (pBiDi!=nullptr) {
341
0
        return pBiDi->reorderingOptions;
342
0
    } else {
343
0
        return 0;
344
0
    }
345
0
}
346
347
U_CAPI UBiDiDirection U_EXPORT2
348
ubidi_getBaseDirection(const char16_t *text,
349
0
int32_t length){
350
351
0
    int32_t i;
352
0
    UChar32 uchar;
353
0
    UCharDirection dir;
354
355
0
    if( text==nullptr || length<-1 ){
356
0
        return UBIDI_NEUTRAL;
357
0
    }
358
359
0
    if(length==-1) {
360
0
        length=u_strlen(text);
361
0
    }
362
363
0
    for( i = 0 ; i < length; ) {
364
        /* i is incremented by U16_NEXT */
365
0
        U16_NEXT(text, i, length, uchar);
366
0
        dir = u_charDirection(uchar);
367
0
        if( dir == U_LEFT_TO_RIGHT )
368
0
                return UBIDI_LTR;
369
0
        if( dir == U_RIGHT_TO_LEFT || dir ==U_RIGHT_TO_LEFT_ARABIC )
370
0
                return UBIDI_RTL;
371
0
    }
372
0
    return UBIDI_NEUTRAL;
373
0
}
374
375
/* perform (P2)..(P3) ------------------------------------------------------- */
376
377
/**
378
 * Returns the directionality of the first strong character
379
 * after the last B in prologue, if any.
380
 * Requires prologue!=null.
381
 */
382
static DirProp
383
0
firstL_R_AL(UBiDi *pBiDi) {
384
0
    const char16_t *text=pBiDi->prologue;
385
0
    int32_t length=pBiDi->proLength;
386
0
    int32_t i;
387
0
    UChar32 uchar;
388
0
    DirProp dirProp, result=ON;
389
0
    for(i=0; i<length; ) {
390
        /* i is incremented by U16_NEXT */
391
0
        U16_NEXT(text, i, length, uchar);
392
0
        dirProp = static_cast<DirProp>(ubidi_getCustomizedClass(pBiDi, uchar));
393
0
        if(result==ON) {
394
0
            if(dirProp==L || dirProp==R || dirProp==AL) {
395
0
                result=dirProp;
396
0
            }
397
0
        } else {
398
0
            if(dirProp==B) {
399
0
                result=ON;
400
0
            }
401
0
        }
402
0
    }
403
0
    return result;
404
0
}
405
406
/*
407
 * Check that there are enough entries in the array pointed to by pBiDi->paras
408
 */
409
static UBool
410
7.67k
checkParaCount(UBiDi *pBiDi) {
411
7.67k
    int32_t count=pBiDi->paraCount;
412
7.67k
    if(pBiDi->paras==pBiDi->simpleParas) {
413
2.64k
        if(count<=SIMPLE_PARAS_COUNT)
414
2.48k
            return true;
415
159
        if(!getInitialParasMemory(pBiDi, SIMPLE_PARAS_COUNT * 2))
416
0
            return false;
417
159
        pBiDi->paras=pBiDi->parasMemory;
418
159
        uprv_memcpy(pBiDi->parasMemory, pBiDi->simpleParas, SIMPLE_PARAS_COUNT * sizeof(Para));
419
159
        return true;
420
159
    }
421
5.02k
    if(!getInitialParasMemory(pBiDi, count * 2))
422
0
        return false;
423
5.02k
    pBiDi->paras=pBiDi->parasMemory;
424
5.02k
    return true;
425
5.02k
}
426
427
/*
428
 * Get the directional properties for the text, calculate the flags bit-set, and
429
 * determine the paragraph level if necessary (in pBiDi->paras[i].level).
430
 * FSI initiators are also resolved and their dirProp replaced with LRI or RLI.
431
 * When encountering an FSI, it is initially replaced with an LRI, which is the
432
 * default. Only if a strong R or AL is found within its scope will the LRI be
433
 * replaced by an RLI.
434
 */
435
static UBool
436
2.84k
getDirProps(UBiDi *pBiDi) {
437
2.84k
    const char16_t *text=pBiDi->text;
438
2.84k
    DirProp *dirProps=pBiDi->dirPropsMemory;    /* pBiDi->dirProps is const */
439
440
2.84k
    int32_t i=0, originalLength=pBiDi->originalLength;
441
2.84k
    Flags flags=0;      /* collect all directionalities in the text */
442
2.84k
    UChar32 uchar;
443
2.84k
    DirProp dirProp=0, defaultParaLevel=0;  /* initialize to avoid compiler warnings */
444
2.84k
    UBool isDefaultLevel=IS_DEFAULT_LEVEL(pBiDi->paraLevel);
445
    /* for inverse BiDi, the default para level is set to RTL if there is a
446
       strong R or AL character at either end of the text                            */
447
2.84k
    UBool isDefaultLevelInverse = isDefaultLevel && static_cast<UBool>(
448
918
            pBiDi->reorderingMode == UBIDI_REORDER_INVERSE_LIKE_DIRECT ||
449
918
            pBiDi->reorderingMode == UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL);
450
2.84k
    int32_t lastArabicPos=-1;
451
2.84k
    int32_t controlCount=0;
452
2.84k
    UBool removeBiDiControls =
453
2.84k
        static_cast<UBool>(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS);
454
455
2.84k
    enum State {
456
2.84k
         NOT_SEEKING_STRONG,            /* 0: not contextual paraLevel, not after FSI */
457
2.84k
         SEEKING_STRONG_FOR_PARA,       /* 1: looking for first strong char in para */
458
2.84k
         SEEKING_STRONG_FOR_FSI,        /* 2: looking for first strong after FSI */
459
2.84k
         LOOKING_FOR_PDI                /* 3: found strong after FSI, looking for PDI */
460
2.84k
    };
461
2.84k
    State state;
462
2.84k
    DirProp lastStrong=ON;              /* for default level & inverse BiDi */
463
    /* The following stacks are used to manage isolate sequences. Those
464
       sequences may be nested, but obviously never more deeply than the
465
       maximum explicit embedding level.
466
       lastStack is the index of the last used entry in the stack. A value of -1
467
       means that there is no open isolate sequence.
468
       lastStack is reset to -1 on paragraph boundaries. */
469
    /* The following stack contains the position of the initiator of
470
       each open isolate sequence */
471
2.84k
    int32_t isolateStartStack[UBIDI_MAX_EXPLICIT_LEVEL+1];
472
    /* The following stack contains the last known state before
473
       encountering the initiator of an isolate sequence */
474
2.84k
    State  previousStateStack[UBIDI_MAX_EXPLICIT_LEVEL+1];
475
2.84k
    int32_t stackLast=-1;
476
477
2.84k
    if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING)
478
0
        pBiDi->length=0;
479
2.84k
    defaultParaLevel=pBiDi->paraLevel&1;
480
2.84k
    if(isDefaultLevel) {
481
918
        pBiDi->paras[0].level=defaultParaLevel;
482
918
        lastStrong=defaultParaLevel;
483
918
        if(pBiDi->proLength>0 &&                    /* there is a prologue */
484
0
           (dirProp=firstL_R_AL(pBiDi))!=ON) {  /* with a strong character */
485
0
            if(dirProp==L)
486
0
                pBiDi->paras[0].level=0;    /* set the default para level */
487
0
            else
488
0
                pBiDi->paras[0].level=1;    /* set the default para level */
489
0
            state=NOT_SEEKING_STRONG;
490
918
        } else {
491
918
            state=SEEKING_STRONG_FOR_PARA;
492
918
        }
493
1.92k
    } else {
494
1.92k
        pBiDi->paras[0].level=pBiDi->paraLevel;
495
1.92k
        state=NOT_SEEKING_STRONG;
496
1.92k
    }
497
    /* count paragraphs and determine the paragraph level (P2..P3) */
498
    /*
499
     * see comment in ubidi.h:
500
     * the UBIDI_DEFAULT_XXX values are designed so that
501
     * their bit 0 alone yields the intended default
502
     */
503
83.0k
    for( /* i=0 above */ ; i<originalLength; ) {
504
        /* i is incremented by U16_NEXT */
505
80.1k
        U16_NEXT(text, i, originalLength, uchar);
506
80.1k
        flags|=DIRPROP_FLAG(dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar));
507
80.1k
        dirProps[i-1]=dirProp;
508
80.1k
        if(uchar>0xffff) {  /* set the lead surrogate's property to BN */
509
1.01k
            flags|=DIRPROP_FLAG(BN);
510
1.01k
            dirProps[i-2]=BN;
511
1.01k
        }
512
80.1k
        if(removeBiDiControls && IS_BIDI_CONTROL_CHAR(uchar))
513
0
            controlCount++;
514
80.1k
        if(dirProp==L) {
515
13.8k
            if(state==SEEKING_STRONG_FOR_PARA) {
516
858
                pBiDi->paras[pBiDi->paraCount-1].level=0;
517
858
                state=NOT_SEEKING_STRONG;
518
858
            }
519
13.0k
            else if(state==SEEKING_STRONG_FOR_FSI) {
520
438
                if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
521
                    /* no need for next statement, already set by default */
522
                    /* dirProps[isolateStartStack[stackLast]]=LRI; */
523
371
                    flags|=DIRPROP_FLAG(LRI);
524
371
                }
525
438
                state=LOOKING_FOR_PDI;
526
438
            }
527
13.8k
            lastStrong=L;
528
13.8k
            continue;
529
13.8k
        }
530
66.2k
        if(dirProp==R || dirProp==AL) {
531
4.09k
            if(state==SEEKING_STRONG_FOR_PARA) {
532
198
                pBiDi->paras[pBiDi->paraCount-1].level=1;
533
198
                state=NOT_SEEKING_STRONG;
534
198
            }
535
3.89k
            else if(state==SEEKING_STRONG_FOR_FSI) {
536
252
                if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
537
186
                    dirProps[isolateStartStack[stackLast]]=RLI;
538
186
                    flags|=DIRPROP_FLAG(RLI);
539
186
                }
540
252
                state=LOOKING_FOR_PDI;
541
252
            }
542
4.09k
            lastStrong=R;
543
4.09k
            if(dirProp==AL)
544
1.88k
                lastArabicPos=i-1;
545
4.09k
            continue;
546
4.09k
        }
547
62.2k
        if(dirProp>=FSI && dirProp<=RLI) {  /* FSI, LRI or RLI */
548
15.8k
            stackLast++;
549
15.8k
            if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
550
15.4k
                isolateStartStack[stackLast]=i-1;
551
15.4k
                previousStateStack[stackLast]=state;
552
15.4k
            }
553
15.8k
            if(dirProp==FSI) {
554
10.9k
                dirProps[i-1]=LRI;      /* default if no strong char */
555
10.9k
                state=SEEKING_STRONG_FOR_FSI;
556
10.9k
            }
557
4.87k
            else
558
4.87k
                state=LOOKING_FOR_PDI;
559
15.8k
            continue;
560
15.8k
        }
561
46.3k
        if(dirProp==PDI) {
562
1.69k
            if(state==SEEKING_STRONG_FOR_FSI) {
563
602
                if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
564
                    /* no need for next statement, already set by default */
565
                    /* dirProps[isolateStartStack[stackLast]]=LRI; */
566
532
                    flags|=DIRPROP_FLAG(LRI);
567
532
                }
568
602
            }
569
1.69k
            if(stackLast>=0) {
570
1.07k
                if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL)
571
1.00k
                    state=previousStateStack[stackLast];
572
1.07k
                stackLast--;
573
1.07k
            }
574
1.69k
            continue;
575
1.69k
        }
576
44.6k
        if(dirProp==B) {
577
8.02k
            if(i<originalLength && uchar==CR && text[i]==LF) /* do nothing on the CR */
578
218
                continue;
579
7.80k
            pBiDi->paras[pBiDi->paraCount-1].limit=i;
580
7.80k
            if(isDefaultLevelInverse && lastStrong==R)
581
0
                pBiDi->paras[pBiDi->paraCount-1].level=1;
582
7.80k
            if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
583
                /* When streaming, we only process whole paragraphs
584
                   thus some updates are only done on paragraph boundaries */
585
0
                pBiDi->length=i;        /* i is index to next character */
586
0
                pBiDi->controlCount=controlCount;
587
0
            }
588
7.80k
            if(i<originalLength) {              /* B not last char in text */
589
7.67k
                pBiDi->paraCount++;
590
7.67k
                if(checkParaCount(pBiDi)==false)    /* not enough memory for a new para entry */
591
0
                    return false;
592
7.67k
                if(isDefaultLevel) {
593
5.34k
                    pBiDi->paras[pBiDi->paraCount-1].level=defaultParaLevel;
594
5.34k
                    state=SEEKING_STRONG_FOR_PARA;
595
5.34k
                    lastStrong=defaultParaLevel;
596
5.34k
                } else {
597
2.32k
                    pBiDi->paras[pBiDi->paraCount-1].level=pBiDi->paraLevel;
598
2.32k
                    state=NOT_SEEKING_STRONG;
599
2.32k
                }
600
7.67k
                stackLast=-1;
601
7.67k
            }
602
7.80k
            continue;
603
7.80k
        }
604
44.6k
    }
605
    /* Ignore still open isolate sequences with overflow */
606
2.84k
    if(stackLast>UBIDI_MAX_EXPLICIT_LEVEL) {
607
33
        stackLast=UBIDI_MAX_EXPLICIT_LEVEL;
608
33
        state=SEEKING_STRONG_FOR_FSI;   /* to be on the safe side */
609
33
    }
610
    /* Resolve direction of still unresolved open FSI sequences */
611
5.52k
    while(stackLast>=0) {
612
3.05k
        if(state==SEEKING_STRONG_FOR_FSI) {
613
            /* no need for next statement, already set by default */
614
            /* dirProps[isolateStartStack[stackLast]]=LRI; */
615
375
            flags|=DIRPROP_FLAG(LRI);
616
375
            break;
617
375
        }
618
2.68k
        state=previousStateStack[stackLast];
619
2.68k
        stackLast--;
620
2.68k
    }
621
    /* When streaming, ignore text after the last paragraph separator */
622
2.84k
    if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
623
0
        if(pBiDi->length<originalLength)
624
0
            pBiDi->paraCount--;
625
2.84k
    } else {
626
2.84k
        pBiDi->paras[pBiDi->paraCount-1].limit=originalLength;
627
2.84k
        pBiDi->controlCount=controlCount;
628
2.84k
    }
629
    /* For inverse bidi, default para direction is RTL if there is
630
       a strong R or AL at either end of the paragraph */
631
2.84k
    if(isDefaultLevelInverse && lastStrong==R) {
632
0
        pBiDi->paras[pBiDi->paraCount-1].level=1;
633
0
    }
634
2.84k
    if(isDefaultLevel) {
635
918
        pBiDi->paraLevel=static_cast<UBiDiLevel>(pBiDi->paras[0].level);
636
918
    }
637
    /* The following is needed to resolve the text direction for default level
638
       paragraphs containing no strong character */
639
13.3k
    for(i=0; i<pBiDi->paraCount; i++)
640
10.5k
        flags|=DIRPROP_FLAG_LR(pBiDi->paras[i].level);
641
642
2.84k
    if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
643
0
        flags|=DIRPROP_FLAG(L);
644
0
    }
645
2.84k
    pBiDi->flags=flags;
646
2.84k
    pBiDi->lastArabicPos=lastArabicPos;
647
2.84k
    return true;
648
2.84k
}
649
650
/* determine the paragraph level at position index */
651
U_CFUNC UBiDiLevel
652
12.0k
ubidi_getParaLevelAtIndex(const UBiDi *pBiDi, int32_t pindex) {
653
12.0k
    int32_t i;
654
340k
    for(i=0; i<pBiDi->paraCount; i++)
655
340k
        if(pindex<pBiDi->paras[i].limit)
656
12.0k
            break;
657
12.0k
    if(i>=pBiDi->paraCount)
658
0
        i=pBiDi->paraCount-1;
659
12.0k
    return (UBiDiLevel)(pBiDi->paras[i].level);
660
12.0k
}
661
662
/* Functions for handling paired brackets ----------------------------------- */
663
664
/* In the isoRuns array, the first entry is used for text outside of any
665
   isolate sequence.  Higher entries are used for each more deeply nested
666
   isolate sequence. isoRunLast is the index of the last used entry.  The
667
   openings array is used to note the data of opening brackets not yet
668
   matched by a closing bracket, or matched but still susceptible to change
669
   level.
670
   Each isoRun entry contains the index of the first and
671
   one-after-last openings entries for pending opening brackets it
672
   contains.  The next openings entry to use is the one-after-last of the
673
   most deeply nested isoRun entry.
674
   isoRun entries also contain their current embedding level and the last
675
   encountered strong character, since these will be needed to resolve
676
   the level of paired brackets.  */
677
678
static void
679
1.83k
bracketInit(UBiDi *pBiDi, BracketData *bd) {
680
1.83k
    bd->pBiDi=pBiDi;
681
1.83k
    bd->isoRunLast=0;
682
1.83k
    bd->isoRuns[0].start=0;
683
1.83k
    bd->isoRuns[0].limit=0;
684
1.83k
    bd->isoRuns[0].level=GET_PARALEVEL(pBiDi, 0);
685
1.83k
    UBiDiLevel t = GET_PARALEVEL(pBiDi, 0) & 1;
686
1.83k
    bd->isoRuns[0].lastStrong = bd->isoRuns[0].lastBase = t;
687
1.83k
    bd->isoRuns[0].contextDir = static_cast<UBiDiDirection>(t);
688
1.83k
    bd->isoRuns[0].contextPos=0;
689
1.83k
    if(pBiDi->openingsMemory) {
690
0
        bd->openings=pBiDi->openingsMemory;
691
0
        bd->openingsCount=pBiDi->openingsSize / sizeof(Opening);
692
1.83k
    } else {
693
1.83k
        bd->openings=bd->simpleOpenings;
694
1.83k
        bd->openingsCount=SIMPLE_OPENINGS_COUNT;
695
1.83k
    }
696
1.83k
    bd->isNumbersSpecial=bd->pBiDi->reorderingMode==UBIDI_REORDER_NUMBERS_SPECIAL ||
697
1.83k
                         bd->pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL;
698
1.83k
}
699
700
/* paragraph boundary */
701
static void
702
3.50k
bracketProcessB(BracketData *bd, UBiDiLevel level) {
703
3.50k
    bd->isoRunLast=0;
704
3.50k
    bd->isoRuns[0].limit=0;
705
3.50k
    bd->isoRuns[0].level=level;
706
3.50k
    bd->isoRuns[0].lastStrong=bd->isoRuns[0].lastBase=level&1;
707
3.50k
    bd->isoRuns[0].contextDir = static_cast<UBiDiDirection>(level & 1);
708
3.50k
    bd->isoRuns[0].contextPos=0;
709
3.50k
}
710
711
/* LRE, LRO, RLE, RLO, PDF */
712
static void
713
bracketProcessBoundary(BracketData *bd, int32_t lastCcPos,
714
9.09k
                       UBiDiLevel contextLevel, UBiDiLevel embeddingLevel) {
715
9.09k
    IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
716
9.09k
    DirProp *dirProps=bd->pBiDi->dirProps;
717
9.09k
    if(DIRPROP_FLAG(dirProps[lastCcPos])&MASK_ISO)  /* after an isolate */
718
7.81k
        return;
719
1.28k
    if(NO_OVERRIDE(embeddingLevel)>NO_OVERRIDE(contextLevel))   /* not a PDF */
720
1.03k
        contextLevel=embeddingLevel;
721
1.28k
    pLastIsoRun->limit=pLastIsoRun->start;
722
1.28k
    pLastIsoRun->level=embeddingLevel;
723
1.28k
    pLastIsoRun->lastStrong=pLastIsoRun->lastBase=contextLevel&1;
724
1.28k
    pLastIsoRun->contextDir = static_cast<UBiDiDirection>(contextLevel & 1);
725
1.28k
    pLastIsoRun->contextPos = static_cast<UBiDiDirection>(lastCcPos);
726
1.28k
}
727
728
/* LRI or RLI */
729
static void
730
8.25k
bracketProcessLRI_RLI(BracketData *bd, UBiDiLevel level) {
731
8.25k
    IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
732
8.25k
    int16_t lastLimit;
733
8.25k
    pLastIsoRun->lastBase=ON;
734
8.25k
    lastLimit=pLastIsoRun->limit;
735
8.25k
    bd->isoRunLast++;
736
8.25k
    pLastIsoRun++;
737
8.25k
    pLastIsoRun->start=pLastIsoRun->limit=lastLimit;
738
8.25k
    pLastIsoRun->level=level;
739
8.25k
    pLastIsoRun->lastStrong=pLastIsoRun->lastBase=level&1;
740
8.25k
    pLastIsoRun->contextDir = static_cast<UBiDiDirection>(level & 1);
741
8.25k
    pLastIsoRun->contextPos=0;
742
8.25k
}
743
744
/* PDI */
745
static void
746
841
bracketProcessPDI(BracketData *bd) {
747
841
    IsoRun *pLastIsoRun;
748
841
    bd->isoRunLast--;
749
841
    pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
750
841
    pLastIsoRun->lastBase=ON;
751
841
}
752
753
/* newly found opening bracket: create an openings entry */
754
static UBool                            /* return true if success */
755
13.4k
bracketAddOpening(BracketData *bd, char16_t match, int32_t position) {
756
13.4k
    IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
757
13.4k
    Opening *pOpening;
758
13.4k
    if(pLastIsoRun->limit>=bd->openingsCount) {  /* no available new entry */
759
266
        UBiDi *pBiDi=bd->pBiDi;
760
266
        if(!getInitialOpeningsMemory(pBiDi, pLastIsoRun->limit * 2))
761
0
            return false;
762
266
        if(bd->openings==bd->simpleOpenings)
763
266
            uprv_memcpy(pBiDi->openingsMemory, bd->simpleOpenings,
764
266
                        SIMPLE_OPENINGS_COUNT * sizeof(Opening));
765
266
        bd->openings=pBiDi->openingsMemory;     /* may have changed */
766
266
        bd->openingsCount=pBiDi->openingsSize / sizeof(Opening);
767
266
    }
768
13.4k
    pOpening=&bd->openings[pLastIsoRun->limit];
769
13.4k
    pOpening->position=position;
770
13.4k
    pOpening->match=match;
771
13.4k
    pOpening->contextDir=pLastIsoRun->contextDir;
772
13.4k
    pOpening->contextPos=pLastIsoRun->contextPos;
773
13.4k
    pOpening->flags=0;
774
13.4k
    pLastIsoRun->limit++;
775
13.4k
    return true;
776
13.4k
}
777
778
/* change N0c1 to N0c2 when a preceding bracket is assigned the embedding level */
779
static void
780
4.65k
fixN0c(BracketData *bd, int32_t openingIndex, int32_t newPropPosition, DirProp newProp) {
781
    /* This function calls itself recursively */
782
4.65k
    IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
783
4.65k
    Opening *qOpening;
784
4.65k
    DirProp *dirProps=bd->pBiDi->dirProps;
785
4.65k
    int32_t k, openingPosition, closingPosition;
786
120k
    for(k=openingIndex+1, qOpening=&bd->openings[k]; k<pLastIsoRun->limit; k++, qOpening++) {
787
117k
        if(qOpening->match>=0)      /* not an N0c match */
788
114k
            continue;
789
3.28k
        if(newPropPosition<qOpening->contextPos)
790
388
            break;
791
2.89k
        if(newPropPosition>=qOpening->position)
792
283
            continue;
793
2.60k
        if(newProp==qOpening->contextDir)
794
1.46k
            break;
795
1.14k
        openingPosition=qOpening->position;
796
1.14k
        dirProps[openingPosition]=newProp;
797
1.14k
        closingPosition=-(qOpening->match);
798
1.14k
        dirProps[closingPosition]=newProp;
799
1.14k
        qOpening->match=0;                      /* prevent further changes */
800
1.14k
        fixN0c(bd, k, openingPosition, newProp);
801
1.14k
        fixN0c(bd, k, closingPosition, newProp);
802
1.14k
    }
803
4.65k
}
804
805
/* process closing bracket */
806
static DirProp              /* return L or R if N0b or N0c, ON if N0d */
807
2.79k
bracketProcessClosing(BracketData *bd, int32_t openIdx, int32_t position) {
808
2.79k
    IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
809
2.79k
    Opening *pOpening, *qOpening;
810
2.79k
    UBiDiDirection direction;
811
2.79k
    UBool stable;
812
2.79k
    DirProp newProp;
813
2.79k
    pOpening=&bd->openings[openIdx];
814
2.79k
    direction = static_cast<UBiDiDirection>(pLastIsoRun->level & 1);
815
2.79k
    stable=true;            /* assume stable until proved otherwise */
816
817
    /* The stable flag is set when brackets are paired and their
818
       level is resolved and cannot be changed by what will be
819
       found later in the source string.
820
       An unstable match can occur only when applying N0c, where
821
       the resolved level depends on the preceding context, and
822
       this context may be affected by text occurring later.
823
       Example: RTL paragraph containing:  abc[(latin) HEBREW]
824
       When the closing parenthesis is encountered, it appears
825
       that N0c1 must be applied since 'abc' sets an opposite
826
       direction context and both parentheses receive level 2.
827
       However, when the closing square bracket is processed,
828
       N0b applies because of 'HEBREW' being included within the
829
       brackets, thus the square brackets are treated like R and
830
       receive level 1. However, this changes the preceding
831
       context of the opening parenthesis, and it now appears
832
       that N0c2 must be applied to the parentheses rather than
833
       N0c1. */
834
835
2.79k
    if((direction==0 && pOpening->flags&FOUND_L) ||
836
2.63k
       (direction==1 && pOpening->flags&FOUND_R)) {                         /* N0b */
837
552
        newProp=static_cast<DirProp>(direction);
838
552
    }
839
2.24k
    else if(pOpening->flags&(FOUND_L|FOUND_R)) {                            /* N0c */
840
        /* it is stable if there is no containing pair or in
841
           conditions too complicated and not worth checking */
842
1.81k
        stable=(openIdx==pLastIsoRun->start);
843
1.81k
        if(direction!=pOpening->contextDir)
844
1.31k
            newProp= static_cast<DirProp>(pOpening->contextDir);           /* N0c1 */
845
500
        else
846
500
            newProp= static_cast<DirProp>(direction);                      /* N0c2 */
847
1.81k
    } else {
848
        /* forget this and any brackets nested within this pair */
849
427
        pLastIsoRun->limit= static_cast<uint16_t>(openIdx);
850
427
        return ON;                                                          /* N0d */
851
427
    }
852
2.37k
    bd->pBiDi->dirProps[pOpening->position]=newProp;
853
2.37k
    bd->pBiDi->dirProps[position]=newProp;
854
    /* Update nested N0c pairs that may be affected */
855
2.37k
    fixN0c(bd, openIdx, pOpening->position, newProp);
856
2.37k
    if(stable) {
857
572
        pLastIsoRun->limit= static_cast<uint16_t>(openIdx); /* forget any brackets nested within this pair */
858
        /* remove lower located synonyms if any */
859
938
        while(pLastIsoRun->limit>pLastIsoRun->start &&
860
700
              bd->openings[pLastIsoRun->limit-1].position==pOpening->position)
861
366
            pLastIsoRun->limit--;
862
1.79k
    } else {
863
1.79k
        int32_t k;
864
1.79k
        pOpening->match=-position;
865
        /* neutralize lower located synonyms if any */
866
1.79k
        k=openIdx-1;
867
3.48k
        while(k>=pLastIsoRun->start &&
868
3.35k
              bd->openings[k].position==pOpening->position)
869
1.68k
            bd->openings[k--].match=0;
870
        /* neutralize any unmatched opening between the current pair;
871
           this will also neutralize higher located synonyms if any */
872
67.3k
        for(k=openIdx+1; k<pLastIsoRun->limit; k++) {
873
65.5k
            qOpening=&bd->openings[k];
874
65.5k
            if(qOpening->position>=position)
875
0
                break;
876
65.5k
            if(qOpening->match>0)
877
355
                qOpening->match=0;
878
65.5k
        }
879
1.79k
    }
880
2.37k
    return newProp;
881
2.79k
}
882
883
/* handle strong characters, digits and candidates for closing brackets */
884
static UBool                            /* return true if success */
885
30.8k
bracketProcessChar(BracketData *bd, int32_t position) {
886
30.8k
    IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
887
30.8k
    DirProp *dirProps, dirProp, newProp;
888
30.8k
    UBiDiLevel level;
889
30.8k
    dirProps=bd->pBiDi->dirProps;
890
30.8k
    dirProp=dirProps[position];
891
30.8k
    if(dirProp==ON) {
892
13.5k
        char16_t c, match;
893
13.5k
        int32_t idx;
894
        /* First see if it is a matching closing bracket. Hopefully, this is
895
           more efficient than checking if it is a closing bracket at all */
896
13.5k
        c=bd->pBiDi->text[position];
897
557k
        for(idx=pLastIsoRun->limit-1; idx>=pLastIsoRun->start; idx--) {
898
546k
            if(bd->openings[idx].match!=c)
899
543k
                continue;
900
            /* We have a match */
901
2.79k
            newProp=bracketProcessClosing(bd, idx, position);
902
2.79k
            if(newProp==ON) {           /* N0d */
903
427
                c=0;        /* prevent handling as an opening */
904
427
                break;
905
427
            }
906
2.37k
            pLastIsoRun->lastBase=ON;
907
2.37k
            pLastIsoRun->contextDir = static_cast<UBiDiDirection>(newProp);
908
2.37k
            pLastIsoRun->contextPos=position;
909
2.37k
            level=bd->pBiDi->levels[position];
910
2.37k
            if(level&UBIDI_LEVEL_OVERRIDE) {    /* X4, X5 */
911
282
                uint16_t flag;
912
282
                int32_t i;
913
282
                newProp=level&1;
914
282
                pLastIsoRun->lastStrong=newProp;
915
282
                flag=DIRPROP_FLAG(newProp);
916
3.49k
                for(i=pLastIsoRun->start; i<idx; i++)
917
3.21k
                    bd->openings[i].flags|=flag;
918
                /* matching brackets are not overridden by LRO/RLO */
919
282
                bd->pBiDi->levels[position]&=~UBIDI_LEVEL_OVERRIDE;
920
282
            }
921
            /* matching brackets are not overridden by LRO/RLO */
922
2.37k
            bd->pBiDi->levels[bd->openings[idx].position]&=~UBIDI_LEVEL_OVERRIDE;
923
2.37k
            return true;
924
2.79k
        }
925
        /* We get here only if the ON character is not a matching closing
926
           bracket or it is a case of N0d */
927
        /* Now see if it is an opening bracket */
928
11.1k
        if(c)
929
10.7k
            match= static_cast<char16_t>(u_getBidiPairedBracket(c));    /* get the matching char */
930
427
        else
931
427
            match=0;
932
11.1k
        if(match!=c &&                  /* has a matching char */
933
7.26k
           ubidi_getPairedBracketType(c)==U_BPT_OPEN) { /* opening bracket */
934
            /* special case: process synonyms
935
               create an opening entry for each synonym */
936
6.98k
            if(match==0x232A) {     /* RIGHT-POINTING ANGLE BRACKET */
937
5.45k
                if(!bracketAddOpening(bd, 0x3009, position))
938
0
                    return false;
939
5.45k
            }
940
1.53k
            else if(match==0x3009) {         /* RIGHT ANGLE BRACKET */
941
984
                if(!bracketAddOpening(bd, 0x232A, position))
942
0
                    return false;
943
984
            }
944
6.98k
            if(!bracketAddOpening(bd, match, position))
945
0
                return false;
946
6.98k
        }
947
11.1k
    }
948
28.4k
    level=bd->pBiDi->levels[position];
949
28.4k
    if(level&UBIDI_LEVEL_OVERRIDE) {    /* X4, X5 */
950
5.95k
        newProp=level&1;
951
5.95k
        if(dirProp!=S && dirProp!=WS && dirProp!=ON)
952
2.57k
            dirProps[position]=newProp;
953
5.95k
        pLastIsoRun->lastBase=newProp;
954
5.95k
        pLastIsoRun->lastStrong=newProp;
955
5.95k
        pLastIsoRun->contextDir = static_cast<UBiDiDirection>(newProp);
956
5.95k
        pLastIsoRun->contextPos=position;
957
5.95k
    }
958
22.4k
    else if(dirProp<=R || dirProp==AL) {
959
9.29k
        newProp= static_cast<DirProp>(DIR_FROM_STRONG(dirProp));
960
9.29k
        pLastIsoRun->lastBase=dirProp;
961
9.29k
        pLastIsoRun->lastStrong=dirProp;
962
9.29k
        pLastIsoRun->contextDir = static_cast<UBiDiDirection>(newProp);
963
9.29k
        pLastIsoRun->contextPos=position;
964
9.29k
    }
965
13.1k
    else if(dirProp==EN) {
966
980
        pLastIsoRun->lastBase=EN;
967
980
        if(pLastIsoRun->lastStrong==L) {
968
358
            newProp=L;                  /* W7 */
969
358
            if(!bd->isNumbersSpecial)
970
358
                dirProps[position]=ENL;
971
358
            pLastIsoRun->contextDir = static_cast<UBiDiDirection>(L);
972
358
            pLastIsoRun->contextPos=position;
973
358
        }
974
622
        else {
975
622
            newProp=R;                  /* N0 */
976
622
            if(pLastIsoRun->lastStrong==AL)
977
199
                dirProps[position]=AN;  /* W2 */
978
423
            else
979
423
                dirProps[position]=ENR;
980
622
            pLastIsoRun->contextDir = static_cast<UBiDiDirection>(R);
981
622
            pLastIsoRun->contextPos=position;
982
622
        }
983
980
    }
984
12.2k
    else if(dirProp==AN) {
985
1.38k
        newProp=R;                      /* N0 */
986
1.38k
        pLastIsoRun->lastBase=AN;
987
1.38k
        pLastIsoRun->contextDir = static_cast<UBiDiDirection>(R);
988
1.38k
        pLastIsoRun->contextPos=position;
989
1.38k
    }
990
10.8k
    else if(dirProp==NSM) {
991
        /* if the last real char was ON, change NSM to ON so that it
992
           will stay ON even if the last real char is a bracket which
993
           may be changed to L or R */
994
1.17k
        newProp=pLastIsoRun->lastBase;
995
1.17k
        if(newProp==ON)
996
237
            dirProps[position]=newProp;
997
1.17k
    }
998
9.64k
    else {
999
9.64k
        newProp=dirProp;
1000
9.64k
        pLastIsoRun->lastBase=dirProp;
1001
9.64k
    }
1002
28.4k
    if(newProp<=R || newProp==AL) {
1003
18.5k
        int32_t i;
1004
18.5k
        uint16_t flag=DIRPROP_FLAG(DIR_FROM_STRONG(newProp));
1005
166k
        for(i=pLastIsoRun->start; i<pLastIsoRun->limit; i++)
1006
147k
            if(position>bd->openings[i].position)
1007
144k
                bd->openings[i].flags|=flag;
1008
18.5k
    }
1009
28.4k
    return true;
1010
30.8k
}
1011
1012
/* perform (X1)..(X9) ------------------------------------------------------- */
1013
1014
/* determine if the text is mixed-directional or single-directional */
1015
static UBiDiDirection
1016
4.02k
directionFromFlags(UBiDi *pBiDi) {
1017
4.02k
    Flags flags=pBiDi->flags;
1018
    /* if the text contains AN and neutrals, then some neutrals may become RTL */
1019
4.02k
    if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N)))) {
1020
413
        return UBIDI_LTR;
1021
3.61k
    } else if(!(flags&MASK_LTR)) {
1022
481
        return UBIDI_RTL;
1023
3.13k
    } else {
1024
3.13k
        return UBIDI_MIXED;
1025
3.13k
    }
1026
4.02k
}
1027
1028
/*
1029
 * Resolve the explicit levels as specified by explicit embedding codes.
1030
 * Recalculate the flags to have them reflect the real properties
1031
 * after taking the explicit embeddings into account.
1032
 *
1033
 * The BiDi algorithm is designed to result in the same behavior whether embedding
1034
 * levels are externally specified (from "styled text", supposedly the preferred
1035
 * method) or set by explicit embedding codes (LRx, RLx, PDF, FSI, PDI) in the plain text.
1036
 * That is why (X9) instructs to remove all not-isolate explicit codes (and BN).
1037
 * However, in a real implementation, the removal of these codes and their index
1038
 * positions in the plain text is undesirable since it would result in
1039
 * reallocated, reindexed text.
1040
 * Instead, this implementation leaves the codes in there and just ignores them
1041
 * in the subsequent processing.
1042
 * In order to get the same reordering behavior, positions with a BN or a not-isolate
1043
 * explicit embedding code just get the same level assigned as the last "real"
1044
 * character.
1045
 *
1046
 * Some implementations, not this one, then overwrite some of these
1047
 * directionality properties at "real" same-level-run boundaries by
1048
 * L or R codes so that the resolution of weak types can be performed on the
1049
 * entire paragraph at once instead of having to parse it once more and
1050
 * perform that resolution on same-level-runs.
1051
 * This limits the scope of the implicit rules in effectively
1052
 * the same way as the run limits.
1053
 *
1054
 * Instead, this implementation does not modify these codes, except for
1055
 * paired brackets whose properties (ON) may be replaced by L or R.
1056
 * On one hand, the paragraph has to be scanned for same-level-runs, but
1057
 * on the other hand, this saves another loop to reset these codes,
1058
 * or saves making and modifying a copy of dirProps[].
1059
 *
1060
 *
1061
 * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm.
1062
 *
1063
 *
1064
 * Handling the stack of explicit levels (Xn):
1065
 *
1066
 * With the BiDi stack of explicit levels, as pushed with each
1067
 * LRE, RLE, LRO, RLO, LRI, RLI and FSI and popped with each PDF and PDI,
1068
 * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL.
1069
 *
1070
 * In order to have a correct push-pop semantics even in the case of overflows,
1071
 * overflow counters and a valid isolate counter are used as described in UAX#9
1072
 * section 3.3.2 "Explicit Levels and Directions".
1073
 *
1074
 * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
1075
 *
1076
 * Returns normally the direction; -1 if there was a memory shortage
1077
 *
1078
 */
1079
static UBiDiDirection
1080
2.84k
resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
1081
2.84k
    DirProp *dirProps=pBiDi->dirProps;
1082
2.84k
    UBiDiLevel *levels=pBiDi->levels;
1083
2.84k
    const char16_t *text=pBiDi->text;
1084
1085
2.84k
    int32_t i=0, length=pBiDi->length;
1086
2.84k
    Flags flags=pBiDi->flags;       /* collect all directionalities in the text */
1087
2.84k
    DirProp dirProp;
1088
2.84k
    UBiDiLevel level=GET_PARALEVEL(pBiDi, 0);
1089
2.84k
    UBiDiDirection direction;
1090
2.84k
    pBiDi->isolateCount=0;
1091
1092
2.84k
    if(U_FAILURE(*pErrorCode)) { return UBIDI_LTR; }
1093
1094
    /* determine if the text is mixed-directional or single-directional */
1095
2.84k
    direction=directionFromFlags(pBiDi);
1096
1097
    /* we may not need to resolve any explicit levels */
1098
2.84k
    if((direction!=UBIDI_MIXED)) {
1099
        /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
1100
614
        return direction;
1101
614
    }
1102
2.22k
    if(pBiDi->reorderingMode > UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL) {
1103
        /* inverse BiDi: mixed, but all characters are at the same embedding level */
1104
        /* set all levels to the paragraph level */
1105
390
        int32_t paraIndex, start, limit;
1106
4.12k
        for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) {
1107
3.73k
            if(paraIndex==0)
1108
390
                start=0;
1109
3.34k
            else
1110
3.34k
                start=pBiDi->paras[paraIndex-1].limit;
1111
3.73k
            limit=pBiDi->paras[paraIndex].limit;
1112
3.73k
            level= static_cast<UBiDiLevel>(pBiDi->paras[paraIndex].level);
1113
16.0k
            for(i=start; i<limit; i++)
1114
12.3k
                levels[i]=level;
1115
3.73k
        }
1116
390
        return direction;   /* no bracket matching for inverse BiDi */
1117
390
    }
1118
1.83k
    if(!(flags&(MASK_EXPLICIT|MASK_ISO))) {
1119
        /* no embeddings, set all levels to the paragraph level */
1120
        /* we still have to perform bracket matching */
1121
647
        int32_t paraIndex, start, limit;
1122
647
        BracketData bracketData;
1123
647
        bracketInit(pBiDi, &bracketData);
1124
2.85k
        for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) {
1125
2.20k
            if(paraIndex==0)
1126
647
                start=0;
1127
1.56k
            else
1128
1.56k
                start=pBiDi->paras[paraIndex-1].limit;
1129
2.20k
            limit=pBiDi->paras[paraIndex].limit;
1130
2.20k
            level= static_cast<UBiDiLevel>(pBiDi->paras[paraIndex].level);
1131
21.9k
            for(i=start; i<limit; i++) {
1132
19.7k
                levels[i]=level;
1133
19.7k
                dirProp=dirProps[i];
1134
19.7k
                if(dirProp==BN)
1135
2.13k
                    continue;
1136
17.5k
                if(dirProp==B) {
1137
1.63k
                    if((i+1)<length) {
1138
1.62k
                        if(text[i]==CR && text[i+1]==LF)
1139
67
                            continue;   /* skip CR when followed by LF */
1140
1.56k
                        bracketProcessB(&bracketData, level);
1141
1.56k
                    }
1142
1.56k
                    continue;
1143
1.63k
                }
1144
15.9k
                if(!bracketProcessChar(&bracketData, i)) {
1145
0
                    *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1146
0
                    return UBIDI_LTR;
1147
0
                }
1148
15.9k
            }
1149
2.20k
        }
1150
647
        return direction;
1151
647
    }
1152
1.18k
    {
1153
        /* continue to perform (Xn) */
1154
1155
        /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
1156
        /* both variables may carry the UBIDI_LEVEL_OVERRIDE flag to indicate the override status */
1157
1.18k
        UBiDiLevel embeddingLevel=level, newLevel;
1158
1.18k
        UBiDiLevel previousLevel=level;     /* previous level for regular (not CC) characters */
1159
1.18k
        int32_t lastCcPos=0;                /* index of last effective LRx,RLx, PDx */
1160
1161
        /* The following stack remembers the embedding level and the ISOLATE flag of level runs.
1162
           stackLast points to its current entry. */
1163
1.18k
        uint16_t stack[UBIDI_MAX_EXPLICIT_LEVEL+2];   /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL
1164
                                                        but we need one more entry as base */
1165
1.18k
        uint32_t stackLast=0;
1166
1.18k
        int32_t overflowIsolateCount=0;
1167
1.18k
        int32_t overflowEmbeddingCount=0;
1168
1.18k
        int32_t validIsolateCount=0;
1169
1.18k
        BracketData bracketData;
1170
1.18k
        bracketInit(pBiDi, &bracketData);
1171
1.18k
        stack[0]=level;     /* initialize base entry to para level, no override, no isolate */
1172
1173
        /* recalculate the flags */
1174
1.18k
        flags=0;
1175
1176
41.4k
        for(i=0; i<length; ++i) {
1177
40.2k
            dirProp=dirProps[i];
1178
40.2k
            switch(dirProp) {
1179
810
            case LRE:
1180
2.32k
            case RLE:
1181
2.90k
            case LRO:
1182
4.37k
            case RLO:
1183
                /* (X2, X3, X4, X5) */
1184
4.37k
                flags|=DIRPROP_FLAG(BN);
1185
4.37k
                levels[i]=previousLevel;
1186
4.37k
                if (dirProp==LRE || dirProp==LRO)
1187
                    /* least greater even level */
1188
1.39k
                    newLevel = static_cast<UBiDiLevel>((embeddingLevel + 2) & ~(UBIDI_LEVEL_OVERRIDE | 1));
1189
2.97k
                else
1190
                    /* least greater odd level */
1191
2.97k
                    newLevel = static_cast<UBiDiLevel>((NO_OVERRIDE(embeddingLevel) + 1) | 1);
1192
4.37k
                if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 &&
1193
2.98k
                                                         overflowEmbeddingCount==0) {
1194
2.64k
                    lastCcPos=i;
1195
2.64k
                    embeddingLevel=newLevel;
1196
2.64k
                    if(dirProp==LRO || dirProp==RLO)
1197
1.57k
                        embeddingLevel|=UBIDI_LEVEL_OVERRIDE;
1198
2.64k
                    stackLast++;
1199
2.64k
                    stack[stackLast]=embeddingLevel;
1200
                    /* we don't need to set UBIDI_LEVEL_OVERRIDE off for LRE and RLE
1201
                       since this has already been done for newLevel which is
1202
                       the source for embeddingLevel.
1203
                     */
1204
2.64k
                } else {
1205
1.73k
                    if(overflowIsolateCount==0)
1206
1.46k
                        overflowEmbeddingCount++;
1207
1.73k
                }
1208
4.37k
                break;
1209
1.23k
            case PDF:
1210
                /* (X7) */
1211
1.23k
                flags|=DIRPROP_FLAG(BN);
1212
1.23k
                levels[i]=previousLevel;
1213
                /* handle all the overflow cases first */
1214
1.23k
                if(overflowIsolateCount) {
1215
205
                    break;
1216
205
                }
1217
1.03k
                if(overflowEmbeddingCount) {
1218
75
                    overflowEmbeddingCount--;
1219
75
                    break;
1220
75
                }
1221
956
                if(stackLast>0 && stack[stackLast]<ISOLATE) {   /* not an isolate entry */
1222
364
                    lastCcPos=i;
1223
364
                    stackLast--;
1224
364
                    embeddingLevel = static_cast<UBiDiLevel>(stack[stackLast]);
1225
364
                }
1226
956
                break;
1227
10.2k
            case LRI:
1228
13.0k
            case RLI:
1229
13.0k
                flags|=(DIRPROP_FLAG(ON)|DIRPROP_FLAG_LR(embeddingLevel));
1230
13.0k
                levels[i]=NO_OVERRIDE(embeddingLevel);
1231
13.0k
                if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
1232
6.63k
                    bracketProcessBoundary(&bracketData, lastCcPos,
1233
6.63k
                                           previousLevel, embeddingLevel);
1234
6.63k
                    flags|=DIRPROP_FLAG_MULTI_RUNS;
1235
6.63k
                }
1236
13.0k
                previousLevel=embeddingLevel;
1237
                /* (X5a, X5b) */
1238
13.0k
                if(dirProp==LRI)
1239
                    /* least greater even level */
1240
10.2k
                    newLevel = static_cast<UBiDiLevel>((embeddingLevel + 2) & ~(UBIDI_LEVEL_OVERRIDE | 1));
1241
2.82k
                else
1242
                    /* least greater odd level */
1243
2.82k
                    newLevel = static_cast<UBiDiLevel>((NO_OVERRIDE(embeddingLevel) + 1) | 1);
1244
13.0k
                if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 &&
1245
8.33k
                                                         overflowEmbeddingCount==0) {
1246
8.25k
                    flags|=DIRPROP_FLAG(dirProp);
1247
8.25k
                    lastCcPos=i;
1248
8.25k
                    validIsolateCount++;
1249
8.25k
                    if(validIsolateCount>pBiDi->isolateCount)
1250
7.18k
                        pBiDi->isolateCount=validIsolateCount;
1251
8.25k
                    embeddingLevel=newLevel;
1252
                    /* we can increment stackLast without checking because newLevel
1253
                       will exceed UBIDI_MAX_EXPLICIT_LEVEL before stackLast overflows */
1254
8.25k
                    stackLast++;
1255
8.25k
                    stack[stackLast]=embeddingLevel+ISOLATE;
1256
8.25k
                    bracketProcessLRI_RLI(&bracketData, embeddingLevel);
1257
8.25k
                } else {
1258
                    /* make it WS so that it is handled by adjustWSLevels() */
1259
4.82k
                    dirProps[i]=WS;
1260
4.82k
                    overflowIsolateCount++;
1261
4.82k
                }
1262
13.0k
                break;
1263
1.46k
            case PDI:
1264
1.46k
                if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
1265
396
                    bracketProcessBoundary(&bracketData, lastCcPos,
1266
396
                                           previousLevel, embeddingLevel);
1267
396
                    flags|=DIRPROP_FLAG_MULTI_RUNS;
1268
396
                }
1269
                /* (X6a) */
1270
1.46k
                if(overflowIsolateCount) {
1271
159
                    overflowIsolateCount--;
1272
                    /* make it WS so that it is handled by adjustWSLevels() */
1273
159
                    dirProps[i]=WS;
1274
159
                }
1275
1.30k
                else if(validIsolateCount) {
1276
841
                    flags|=DIRPROP_FLAG(PDI);
1277
841
                    lastCcPos=i;
1278
841
                    overflowEmbeddingCount=0;
1279
1.05k
                    while(stack[stackLast]<ISOLATE) /* pop embedding entries */
1280
214
                        stackLast--;                /* until the last isolate entry */
1281
841
                    stackLast--;                    /* pop also the last isolate entry */
1282
841
                    validIsolateCount--;
1283
841
                    bracketProcessPDI(&bracketData);
1284
841
                } else
1285
                    /* make it WS so that it is handled by adjustWSLevels() */
1286
463
                    dirProps[i]=WS;
1287
1.46k
                embeddingLevel = static_cast<UBiDiLevel>(stack[stackLast]) & ~ISOLATE;
1288
1.46k
                flags|=(DIRPROP_FLAG(ON)|DIRPROP_FLAG_LR(embeddingLevel));
1289
1.46k
                previousLevel=embeddingLevel;
1290
1.46k
                levels[i]=NO_OVERRIDE(embeddingLevel);
1291
1.46k
                break;
1292
2.07k
            case B:
1293
2.07k
                flags|=DIRPROP_FLAG(B);
1294
2.07k
                levels[i]=GET_PARALEVEL(pBiDi, i);
1295
2.07k
                if((i+1)<length) {
1296
2.03k
                    if(text[i]==CR && text[i+1]==LF)
1297
85
                        break;          /* skip CR when followed by LF */
1298
1.94k
                    overflowEmbeddingCount=overflowIsolateCount=0;
1299
1.94k
                    validIsolateCount=0;
1300
1.94k
                    stackLast=0;
1301
1.94k
                    previousLevel=embeddingLevel=GET_PARALEVEL(pBiDi, i+1);
1302
1.94k
                    stack[0]=embeddingLevel; /* initialize base entry to para level, no override, no isolate */
1303
1.94k
                    bracketProcessB(&bracketData, embeddingLevel);
1304
1.94k
                }
1305
1.99k
                break;
1306
3.21k
            case BN:
1307
                /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
1308
                /* they will get their levels set correctly in adjustWSLevels() */
1309
3.21k
                levels[i]=previousLevel;
1310
3.21k
                flags|=DIRPROP_FLAG(BN);
1311
3.21k
                break;
1312
14.8k
            default:
1313
                /* all other types are normal characters and get the "real" level */
1314
14.8k
                if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
1315
2.06k
                    bracketProcessBoundary(&bracketData, lastCcPos,
1316
2.06k
                                           previousLevel, embeddingLevel);
1317
2.06k
                    flags|=DIRPROP_FLAG_MULTI_RUNS;
1318
2.06k
                    if(embeddingLevel&UBIDI_LEVEL_OVERRIDE)
1319
550
                        flags|=DIRPROP_FLAG_O(embeddingLevel);
1320
1.51k
                    else
1321
1.51k
                        flags|=DIRPROP_FLAG_E(embeddingLevel);
1322
2.06k
                }
1323
14.8k
                previousLevel=embeddingLevel;
1324
14.8k
                levels[i]=embeddingLevel;
1325
14.8k
                if(!bracketProcessChar(&bracketData, i))
1326
0
                    return static_cast<UBiDiDirection>(-1);
1327
                /* the dirProp may have been changed in bracketProcessChar() */
1328
14.8k
                flags|=DIRPROP_FLAG(dirProps[i]);
1329
14.8k
                break;
1330
40.2k
            }
1331
40.2k
        }
1332
1.18k
        if(flags&MASK_EMBEDDING)
1333
1.18k
            flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
1334
1.18k
        if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B)))
1335
0
            flags|=DIRPROP_FLAG(L);
1336
        /* again, determine if the text is mixed-directional or single-directional */
1337
1.18k
        pBiDi->flags=flags;
1338
1.18k
        direction=directionFromFlags(pBiDi);
1339
1.18k
    }
1340
0
    return direction;
1341
1.18k
}
1342
1343
/*
1344
 * Use a pre-specified embedding levels array:
1345
 *
1346
 * Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
1347
 * ignore all explicit codes (X9),
1348
 * and check all the preset levels.
1349
 *
1350
 * Recalculate the flags to have them reflect the real properties
1351
 * after taking the explicit embeddings into account.
1352
 */
1353
static UBiDiDirection
1354
0
checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
1355
0
    DirProp *dirProps=pBiDi->dirProps;
1356
0
    UBiDiLevel *levels=pBiDi->levels;
1357
0
    int32_t isolateCount=0;
1358
1359
0
    int32_t length=pBiDi->length;
1360
0
    Flags flags=0;  /* collect all directionalities in the text */
1361
0
    pBiDi->isolateCount=0;
1362
1363
0
    int32_t currentParaIndex = 0;
1364
0
    int32_t currentParaLimit = pBiDi->paras[0].limit;
1365
0
    int32_t currentParaLevel = pBiDi->paraLevel;
1366
1367
0
    for(int32_t i=0; i<length; ++i) {
1368
0
        UBiDiLevel level=levels[i];
1369
0
        DirProp dirProp=dirProps[i];
1370
0
        if(dirProp==LRI || dirProp==RLI) {
1371
0
            isolateCount++;
1372
0
            if(isolateCount>pBiDi->isolateCount)
1373
0
                pBiDi->isolateCount=isolateCount;
1374
0
        }
1375
0
        else if(dirProp==PDI)
1376
0
            isolateCount--;
1377
0
        else if(dirProp==B)
1378
0
            isolateCount=0;
1379
1380
        // optimized version of  int32_t currentParaLevel = GET_PARALEVEL(pBiDi, i);
1381
0
        if (pBiDi->defaultParaLevel != 0 &&
1382
0
                i == currentParaLimit && (currentParaIndex + 1) < pBiDi->paraCount) {
1383
0
            currentParaLevel = pBiDi->paras[++currentParaIndex].level;
1384
0
            currentParaLimit = pBiDi->paras[currentParaIndex].limit;
1385
0
        }
1386
1387
0
        UBiDiLevel overrideFlag = level & UBIDI_LEVEL_OVERRIDE;
1388
0
        level &= ~UBIDI_LEVEL_OVERRIDE;
1389
0
        if (level < currentParaLevel || UBIDI_MAX_EXPLICIT_LEVEL < level) {
1390
0
            if (level == 0) {
1391
0
                if (dirProp == B) {
1392
                    // Paragraph separators are ok with explicit level 0.
1393
                    // Prevents reordering of paragraphs.
1394
0
                } else {
1395
                    // Treat explicit level 0 as a wildcard for the paragraph level.
1396
                    // Avoid making the caller guess what the paragraph level would be.
1397
0
                    level = static_cast<UBiDiLevel>(currentParaLevel);
1398
0
                    levels[i] = level | overrideFlag;
1399
0
                }
1400
0
            } else {
1401
                // 1 <= level < currentParaLevel or UBIDI_MAX_EXPLICIT_LEVEL < level
1402
                /* level out of bounds */
1403
0
                *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1404
0
                return UBIDI_LTR;
1405
0
            }
1406
0
        }
1407
0
        if (overrideFlag != 0) {
1408
            /* keep the override flag in levels[i] but adjust the flags */
1409
0
            flags|=DIRPROP_FLAG_O(level);
1410
0
        } else {
1411
            /* set the flags */
1412
0
            flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProp);
1413
0
        }
1414
0
    }
1415
0
    if(flags&MASK_EMBEDDING)
1416
0
        flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
1417
    /* determine if the text is mixed-directional or single-directional */
1418
0
    pBiDi->flags=flags;
1419
0
    return directionFromFlags(pBiDi);
1420
0
}
1421
1422
/******************************************************************
1423
 The Properties state machine table
1424
*******************************************************************
1425
1426
 All table cells are 8 bits:
1427
      bits 0..4:  next state
1428
      bits 5..7:  action to perform (if > 0)
1429
1430
 Cells may be of format "n" where n represents the next state
1431
 (except for the rightmost column).
1432
 Cells may also be of format "s(x,y)" where x represents an action
1433
 to perform and y represents the next state.
1434
1435
*******************************************************************
1436
 Definitions and type for properties state table
1437
*******************************************************************
1438
*/
1439
20.7k
#define IMPTABPROPS_COLUMNS 16
1440
20.7k
#define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1)
1441
63.2k
#define GET_STATEPROPS(cell) ((cell)&0x1f)
1442
63.2k
#define GET_ACTIONPROPS(cell) ((cell)>>5)
1443
#define s(action, newState) ((uint8_t)(newState+(action<<5)))
1444
1445
static const uint8_t groupProp[] =          /* dirProp regrouped */
1446
{
1447
/*  L   R   EN  ES  ET  AN  CS  B   S   WS  ON  LRE LRO AL  RLE RLO PDF NSM BN  FSI LRI RLI PDI ENL ENR */
1448
    0,  1,  2,  7,  8,  3,  9,  6,  5,  4,  4,  10, 10, 12, 10, 10, 10, 11, 10, 4,  4,  4,  4,  13, 14
1449
};
1450
enum { DirProp_L=0, DirProp_R=1, DirProp_EN=2, DirProp_AN=3, DirProp_ON=4, DirProp_S=5, DirProp_B=6 }; /* reduced dirProp */
1451
1452
/******************************************************************
1453
1454
      PROPERTIES  STATE  TABLE
1455
1456
 In table impTabProps,
1457
      - the ON column regroups ON and WS, FSI, RLI, LRI and PDI
1458
      - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF
1459
      - the Res column is the reduced property assigned to a run
1460
1461
 Action 1: process current run1, init new run1
1462
        2: init new run2
1463
        3: process run1, process run2, init new run1
1464
        4: process run1, set run1=run2, init new run2
1465
1466
 Notes:
1467
  1) This table is used in resolveImplicitLevels().
1468
  2) This table triggers actions when there is a change in the Bidi
1469
     property of incoming characters (action 1).
1470
  3) Most such property sequences are processed immediately (in
1471
     fact, passed to processPropertySeq().
1472
  4) However, numbers are assembled as one sequence. This means
1473
     that undefined situations (like CS following digits, until
1474
     it is known if the next char will be a digit) are held until
1475
     following chars define them.
1476
     Example: digits followed by CS, then comes another CS or ON;
1477
              the digits will be processed, then the CS assigned
1478
              as the start of an ON sequence (action 3).
1479
  5) There are cases where more than one sequence must be
1480
     processed, for instance digits followed by CS followed by L:
1481
     the digits must be processed as one sequence, and the CS
1482
     must be processed as an ON sequence, all this before starting
1483
     assembling chars for the opening L sequence.
1484
1485
1486
*/
1487
static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] =
1488
{
1489
/*                        L ,     R ,    EN ,    AN ,    ON ,     S ,     B ,    ES ,    ET ,    CS ,    BN ,   NSM ,    AL ,   ENL ,   ENR , Res */
1490
/* 0 Init        */ {     1 ,     2 ,     4 ,     5 ,     7 ,    15 ,    17 ,     7 ,     9 ,     7 ,     0 ,     7 ,     3 ,    18 ,    21 , DirProp_ON },
1491
/* 1 L           */ {     1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7),     1 ,     1 , s(1,3),s(1,18),s(1,21),  DirProp_L },
1492
/* 2 R           */ { s(1,1),     2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7),     2 ,     2 , s(1,3),s(1,18),s(1,21),  DirProp_R },
1493
/* 3 AL          */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8),     3 ,     3 ,     3 ,s(1,18),s(1,21),  DirProp_R },
1494
/* 4 EN          */ { s(1,1), s(1,2),     4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10),    11 ,s(2,10),     4 ,     4 , s(1,3),    18 ,    21 , DirProp_EN },
1495
/* 5 AN          */ { s(1,1), s(1,2), s(1,4),     5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12),     5 ,     5 , s(1,3),s(1,18),s(1,21), DirProp_AN },
1496
/* 6 AL:EN/AN    */ { s(1,1), s(1,2),     6 ,     6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13),     6 ,     6 , s(1,3),    18 ,    21 , DirProp_AN },
1497
/* 7 ON          */ { s(1,1), s(1,2), s(1,4), s(1,5),     7 ,s(1,15),s(1,17),     7 ,s(2,14),     7 ,     7 ,     7 , s(1,3),s(1,18),s(1,21), DirProp_ON },
1498
/* 8 AL:ON       */ { s(1,1), s(1,2), s(1,6), s(1,6),     8 ,s(1,16),s(1,17),     8 ,     8 ,     8 ,     8 ,     8 , s(1,3),s(1,18),s(1,21), DirProp_ON },
1499
/* 9 ET          */ { s(1,1), s(1,2),     4 , s(1,5),     7 ,s(1,15),s(1,17),     7 ,     9 ,     7 ,     9 ,     9 , s(1,3),    18 ,    21 , DirProp_ON },
1500
/*10 EN+ES/CS    */ { s(3,1), s(3,2),     4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    10 , s(4,7), s(3,3),    18 ,    21 , DirProp_EN },
1501
/*11 EN+ET       */ { s(1,1), s(1,2),     4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7),    11 , s(1,7),    11 ,    11 , s(1,3),    18 ,    21 , DirProp_EN },
1502
/*12 AN+CS       */ { s(3,1), s(3,2), s(3,4),     5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    12 , s(4,7), s(3,3),s(3,18),s(3,21), DirProp_AN },
1503
/*13 AL:EN/AN+CS */ { s(3,1), s(3,2),     6 ,     6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8),    13 , s(4,8), s(3,3),    18 ,    21 , DirProp_AN },
1504
/*14 ON+ET       */ { s(1,1), s(1,2), s(4,4), s(1,5),     7 ,s(1,15),s(1,17),     7 ,    14 ,     7 ,    14 ,    14 , s(1,3),s(4,18),s(4,21), DirProp_ON },
1505
/*15 S           */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),    15 ,s(1,17), s(1,7), s(1,9), s(1,7),    15 , s(1,7), s(1,3),s(1,18),s(1,21),  DirProp_S },
1506
/*16 AL:S        */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),    16 ,s(1,17), s(1,8), s(1,8), s(1,8),    16 , s(1,8), s(1,3),s(1,18),s(1,21),  DirProp_S },
1507
/*17 B           */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),    17 , s(1,7), s(1,9), s(1,7),    17 , s(1,7), s(1,3),s(1,18),s(1,21),  DirProp_B },
1508
/*18 ENL         */ { s(1,1), s(1,2),    18 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,19),    20 ,s(2,19),    18 ,    18 , s(1,3),    18 ,    21 ,  DirProp_L },
1509
/*19 ENL+ES/CS   */ { s(3,1), s(3,2),    18 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    19 , s(4,7), s(3,3),    18 ,    21 ,  DirProp_L },
1510
/*20 ENL+ET      */ { s(1,1), s(1,2),    18 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7),    20 , s(1,7),    20 ,    20 , s(1,3),    18 ,    21 ,  DirProp_L },
1511
/*21 ENR         */ { s(1,1), s(1,2),    21 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,22),    23 ,s(2,22),    21 ,    21 , s(1,3),    18 ,    21 , DirProp_AN },
1512
/*22 ENR+ES/CS   */ { s(3,1), s(3,2),    21 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    22 , s(4,7), s(3,3),    18 ,    21 , DirProp_AN },
1513
/*23 ENR+ET      */ { s(1,1), s(1,2),    21 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7),    23 , s(1,7),    23 ,    23 , s(1,3),    18 ,    21 , DirProp_AN }
1514
};
1515
1516
/*  we must undef macro s because the levels tables have a different
1517
 *  structure (4 bits for action and 4 bits for next state.
1518
 */
1519
#undef s
1520
1521
/******************************************************************
1522
 The levels state machine tables
1523
*******************************************************************
1524
1525
 All table cells are 8 bits:
1526
      bits 0..3:  next state
1527
      bits 4..7:  action to perform (if > 0)
1528
1529
 Cells may be of format "n" where n represents the next state
1530
 (except for the rightmost column).
1531
 Cells may also be of format "s(x,y)" where x represents an action
1532
 to perform and y represents the next state.
1533
1534
 This format limits each table to 16 states each and to 15 actions.
1535
1536
*******************************************************************
1537
 Definitions and type for levels state tables
1538
*******************************************************************
1539
*/
1540
36.3k
#define IMPTABLEVELS_COLUMNS (DirProp_B + 2)
1541
36.3k
#define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1)
1542
36.3k
#define GET_STATE(cell) ((cell)&0x0f)
1543
36.3k
#define GET_ACTION(cell) ((cell)>>4)
1544
#define s(action, newState) ((uint8_t)(newState+(action<<4)))
1545
1546
typedef uint8_t ImpTab[][IMPTABLEVELS_COLUMNS];
1547
typedef uint8_t ImpAct[];
1548
1549
/* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct,
1550
 * instead of having a pair of ImpTab and a pair of ImpAct.
1551
 */
1552
typedef struct ImpTabPair {
1553
    const void * pImpTab[2];
1554
    const void * pImpAct[2];
1555
} ImpTabPair;
1556
1557
/******************************************************************
1558
1559
      LEVELS  STATE  TABLES
1560
1561
 In all levels state tables,
1562
      - state 0 is the initial state
1563
      - the Res column is the increment to add to the text level
1564
        for this property sequence.
1565
1566
 The impAct arrays for each table of a pair map the local action
1567
 numbers of the table to the total list of actions. For instance,
1568
 action 2 in a given table corresponds to the action number which
1569
 appears in entry [2] of the impAct array for that table.
1570
 The first entry of all impAct arrays must be 0.
1571
1572
 Action 1: init conditional sequence
1573
        2: prepend conditional sequence to current sequence
1574
        3: set ON sequence to new level - 1
1575
        4: init EN/AN/ON sequence
1576
        5: fix EN/AN/ON sequence followed by R
1577
        6: set previous level sequence to level 2
1578
1579
 Notes:
1580
  1) These tables are used in processPropertySeq(). The input
1581
     is property sequences as determined by resolveImplicitLevels.
1582
  2) Most such property sequences are processed immediately
1583
     (levels are assigned).
1584
  3) However, some sequences cannot be assigned a final level till
1585
     one or more following sequences are received. For instance,
1586
     ON following an R sequence within an even-level paragraph.
1587
     If the following sequence is R, the ON sequence will be
1588
     assigned basic run level+1, and so will the R sequence.
1589
  4) S is generally handled like ON, since its level will be fixed
1590
     to paragraph level in adjustWSLevels().
1591
1592
*/
1593
1594
static const ImpTab impTabL_DEFAULT =   /* Even paragraph level */
1595
/*  In this table, conditional sequences receive the lower possible level
1596
    until proven otherwise.
1597
*/
1598
{
1599
/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1600
/* 0 : init       */ {     0 ,     1 ,     0 ,     2 ,     0 ,     0 ,     0 ,  0 },
1601
/* 1 : R          */ {     0 ,     1 ,     3 ,     3 , s(1,4), s(1,4),     0 ,  1 },
1602
/* 2 : AN         */ {     0 ,     1 ,     0 ,     2 , s(1,5), s(1,5),     0 ,  2 },
1603
/* 3 : R+EN/AN    */ {     0 ,     1 ,     3 ,     3 , s(1,4), s(1,4),     0 ,  2 },
1604
/* 4 : R+ON       */ {     0 , s(2,1), s(3,3), s(3,3),     4 ,     4 ,     0 ,  0 },
1605
/* 5 : AN+ON      */ {     0 , s(2,1),     0 , s(3,2),     5 ,     5 ,     0 ,  0 }
1606
};
1607
static const ImpTab impTabR_DEFAULT =   /* Odd  paragraph level */
1608
/*  In this table, conditional sequences receive the lower possible level
1609
    until proven otherwise.
1610
*/
1611
{
1612
/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1613
/* 0 : init       */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  0 },
1614
/* 1 : L          */ {     1 ,     0 ,     1 ,     3 , s(1,4), s(1,4),     0 ,  1 },
1615
/* 2 : EN/AN      */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  1 },
1616
/* 3 : L+AN       */ {     1 ,     0 ,     1 ,     3 ,     5 ,     5 ,     0 ,  1 },
1617
/* 4 : L+ON       */ { s(2,1),     0 , s(2,1),     3 ,     4 ,     4 ,     0 ,  0 },
1618
/* 5 : L+AN+ON    */ {     1 ,     0 ,     1 ,     3 ,     5 ,     5 ,     0 ,  0 }
1619
};
1620
static const ImpAct impAct0 = {0,1,2,3,4};
1621
static const ImpTabPair impTab_DEFAULT = {{&impTabL_DEFAULT,
1622
                                           &impTabR_DEFAULT},
1623
                                          {&impAct0, &impAct0}};
1624
1625
static const ImpTab impTabL_NUMBERS_SPECIAL =   /* Even paragraph level */
1626
/*  In this table, conditional sequences receive the lower possible level
1627
    until proven otherwise.
1628
*/
1629
{
1630
/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1631
/* 0 : init       */ {     0 ,     2 , s(1,1), s(1,1),     0 ,     0 ,     0 ,  0 },
1632
/* 1 : L+EN/AN    */ {     0 , s(4,2),     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1633
/* 2 : R          */ {     0 ,     2 ,     4 ,     4 , s(1,3), s(1,3),     0 ,  1 },
1634
/* 3 : R+ON       */ {     0 , s(2,2), s(3,4), s(3,4),     3 ,     3 ,     0 ,  0 },
1635
/* 4 : R+EN/AN    */ {     0 ,     2 ,     4 ,     4 , s(1,3), s(1,3),     0 ,  2 }
1636
};
1637
static const ImpTabPair impTab_NUMBERS_SPECIAL = {{&impTabL_NUMBERS_SPECIAL,
1638
                                                   &impTabR_DEFAULT},
1639
                                                  {&impAct0, &impAct0}};
1640
1641
static const ImpTab impTabL_GROUP_NUMBERS_WITH_R =
1642
/*  In this table, EN/AN+ON sequences receive levels as if associated with R
1643
    until proven that there is L or sor/eor on both sides. AN is handled like EN.
1644
*/
1645
{
1646
/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1647
/* 0 init         */ {     0 ,     3 , s(1,1), s(1,1),     0 ,     0 ,     0 ,  0 },
1648
/* 1 EN/AN        */ { s(2,0),     3 ,     1 ,     1 ,     2 , s(2,0), s(2,0),  2 },
1649
/* 2 EN/AN+ON     */ { s(2,0),     3 ,     1 ,     1 ,     2 , s(2,0), s(2,0),  1 },
1650
/* 3 R            */ {     0 ,     3 ,     5 ,     5 , s(1,4),     0 ,     0 ,  1 },
1651
/* 4 R+ON         */ { s(2,0),     3 ,     5 ,     5 ,     4 , s(2,0), s(2,0),  1 },
1652
/* 5 R+EN/AN      */ {     0 ,     3 ,     5 ,     5 , s(1,4),     0 ,     0 ,  2 }
1653
};
1654
static const ImpTab impTabR_GROUP_NUMBERS_WITH_R =
1655
/*  In this table, EN/AN+ON sequences receive levels as if associated with R
1656
    until proven that there is L on both sides. AN is handled like EN.
1657
*/
1658
{
1659
/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1660
/* 0 init         */ {     2 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1661
/* 1 EN/AN        */ {     2 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  1 },
1662
/* 2 L            */ {     2 ,     0 , s(1,4), s(1,4), s(1,3),     0 ,     0 ,  1 },
1663
/* 3 L+ON         */ { s(2,2),     0 ,     4 ,     4 ,     3 ,     0 ,     0 ,  0 },
1664
/* 4 L+EN/AN      */ { s(2,2),     0 ,     4 ,     4 ,     3 ,     0 ,     0 ,  1 }
1665
};
1666
static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R = {
1667
                        {&impTabL_GROUP_NUMBERS_WITH_R,
1668
                         &impTabR_GROUP_NUMBERS_WITH_R},
1669
                        {&impAct0, &impAct0}};
1670
1671
1672
static const ImpTab impTabL_INVERSE_NUMBERS_AS_L =
1673
/*  This table is identical to the Default LTR table except that EN and AN are
1674
    handled like L.
1675
*/
1676
{
1677
/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1678
/* 0 : init       */ {     0 ,     1 ,     0 ,     0 ,     0 ,     0 ,     0 ,  0 },
1679
/* 1 : R          */ {     0 ,     1 ,     0 ,     0 , s(1,4), s(1,4),     0 ,  1 },
1680
/* 2 : AN         */ {     0 ,     1 ,     0 ,     0 , s(1,5), s(1,5),     0 ,  2 },
1681
/* 3 : R+EN/AN    */ {     0 ,     1 ,     0 ,     0 , s(1,4), s(1,4),     0 ,  2 },
1682
/* 4 : R+ON       */ { s(2,0),     1 , s(2,0), s(2,0),     4 ,     4 , s(2,0),  1 },
1683
/* 5 : AN+ON      */ { s(2,0),     1 , s(2,0), s(2,0),     5 ,     5 , s(2,0),  1 }
1684
};
1685
static const ImpTab impTabR_INVERSE_NUMBERS_AS_L =
1686
/*  This table is identical to the Default RTL table except that EN and AN are
1687
    handled like L.
1688
*/
1689
{
1690
/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1691
/* 0 : init       */ {     1 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1692
/* 1 : L          */ {     1 ,     0 ,     1 ,     1 , s(1,4), s(1,4),     0 ,  1 },
1693
/* 2 : EN/AN      */ {     1 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  1 },
1694
/* 3 : L+AN       */ {     1 ,     0 ,     1 ,     1 ,     5 ,     5 ,     0 ,  1 },
1695
/* 4 : L+ON       */ { s(2,1),     0 , s(2,1), s(2,1),     4 ,     4 ,     0 ,  0 },
1696
/* 5 : L+AN+ON    */ {     1 ,     0 ,     1 ,     1 ,     5 ,     5 ,     0 ,  0 }
1697
};
1698
static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L = {
1699
                        {&impTabL_INVERSE_NUMBERS_AS_L,
1700
                         &impTabR_INVERSE_NUMBERS_AS_L},
1701
                        {&impAct0, &impAct0}};
1702
1703
static const ImpTab impTabR_INVERSE_LIKE_DIRECT =   /* Odd  paragraph level */
1704
/*  In this table, conditional sequences receive the lower possible level
1705
    until proven otherwise.
1706
*/
1707
{
1708
/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1709
/* 0 : init       */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  0 },
1710
/* 1 : L          */ {     1 ,     0 ,     1 ,     2 , s(1,3), s(1,3),     0 ,  1 },
1711
/* 2 : EN/AN      */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  1 },
1712
/* 3 : L+ON       */ { s(2,1), s(3,0),     6 ,     4 ,     3 ,     3 , s(3,0),  0 },
1713
/* 4 : L+ON+AN    */ { s(2,1), s(3,0),     6 ,     4 ,     5 ,     5 , s(3,0),  3 },
1714
/* 5 : L+AN+ON    */ { s(2,1), s(3,0),     6 ,     4 ,     5 ,     5 , s(3,0),  2 },
1715
/* 6 : L+ON+EN    */ { s(2,1), s(3,0),     6 ,     4 ,     3 ,     3 , s(3,0),  1 }
1716
};
1717
static const ImpAct impAct1 = {0,1,13,14};
1718
/* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc"
1719
 */
1720
static const ImpTabPair impTab_INVERSE_LIKE_DIRECT = {
1721
                        {&impTabL_DEFAULT,
1722
                         &impTabR_INVERSE_LIKE_DIRECT},
1723
                        {&impAct0, &impAct1}};
1724
1725
static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS =
1726
/*  The case handled in this table is (visually):  R EN L
1727
*/
1728
{
1729
/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1730
/* 0 : init       */ {     0 , s(6,3),     0 ,     1 ,     0 ,     0 ,     0 ,  0 },
1731
/* 1 : L+AN       */ {     0 , s(6,3),     0 ,     1 , s(1,2), s(3,0),     0 ,  4 },
1732
/* 2 : L+AN+ON    */ { s(2,0), s(6,3), s(2,0),     1 ,     2 , s(3,0), s(2,0),  3 },
1733
/* 3 : R          */ {     0 , s(6,3), s(5,5), s(5,6), s(1,4), s(3,0),     0 ,  3 },
1734
/* 4 : R+ON       */ { s(3,0), s(4,3), s(5,5), s(5,6),     4 , s(3,0), s(3,0),  3 },
1735
/* 5 : R+EN       */ { s(3,0), s(4,3),     5 , s(5,6), s(1,4), s(3,0), s(3,0),  4 },
1736
/* 6 : R+AN       */ { s(3,0), s(4,3), s(5,5),     6 , s(1,4), s(3,0), s(3,0),  4 }
1737
};
1738
static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS =
1739
/*  The cases handled in this table are (visually):  R EN L
1740
                                                     R L AN L
1741
*/
1742
{
1743
/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1744
/* 0 : init       */ { s(1,3),     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1745
/* 1 : R+EN/AN    */ { s(2,3),     0 ,     1 ,     1 ,     2 , s(4,0),     0 ,  1 },
1746
/* 2 : R+EN/AN+ON */ { s(2,3),     0 ,     1 ,     1 ,     2 , s(4,0),     0 ,  0 },
1747
/* 3 : L          */ {     3 ,     0 ,     3 , s(3,6), s(1,4), s(4,0),     0 ,  1 },
1748
/* 4 : L+ON       */ { s(5,3), s(4,0),     5 , s(3,6),     4 , s(4,0), s(4,0),  0 },
1749
/* 5 : L+ON+EN    */ { s(5,3), s(4,0),     5 , s(3,6),     4 , s(4,0), s(4,0),  1 },
1750
/* 6 : L+AN       */ { s(5,3), s(4,0),     6 ,     6 ,     4 , s(4,0), s(4,0),  3 }
1751
};
1752
static const ImpAct impAct2 = {0,1,2,5,6,7,8};
1753
static const ImpAct impAct3 = {0,1,9,10,11,12};
1754
static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = {
1755
                        {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS,
1756
                         &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
1757
                        {&impAct2, &impAct3}};
1758
1759
static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = {
1760
                        {&impTabL_NUMBERS_SPECIAL,
1761
                         &impTabR_INVERSE_LIKE_DIRECT},
1762
                        {&impAct0, &impAct1}};
1763
1764
static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS =
1765
/*  The case handled in this table is (visually):  R EN L
1766
*/
1767
{
1768
/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1769
/* 0 : init       */ {     0 , s(6,2),     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1770
/* 1 : L+EN/AN    */ {     0 , s(6,2),     1 ,     1 ,     0 , s(3,0),     0 ,  4 },
1771
/* 2 : R          */ {     0 , s(6,2), s(5,4), s(5,4), s(1,3), s(3,0),     0 ,  3 },
1772
/* 3 : R+ON       */ { s(3,0), s(4,2), s(5,4), s(5,4),     3 , s(3,0), s(3,0),  3 },
1773
/* 4 : R+EN/AN    */ { s(3,0), s(4,2),     4 ,     4 , s(1,3), s(3,0), s(3,0),  4 }
1774
};
1775
static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = {
1776
                        {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS,
1777
                         &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
1778
                        {&impAct2, &impAct3}};
1779
1780
#undef s
1781
1782
typedef struct {
1783
    const ImpTab * pImpTab;             /* level table pointer          */
1784
    const ImpAct * pImpAct;             /* action map array             */
1785
    int32_t startON;                    /* start of ON sequence         */
1786
    int32_t startL2EN;                  /* start of level 2 sequence    */
1787
    int32_t lastStrongRTL;              /* index of last found R or AL  */
1788
    int32_t state;                      /* current state                */
1789
    int32_t runStart;                   /* start position of the run    */
1790
    UBiDiLevel runLevel;                /* run level before implicit solving */
1791
} LevState;
1792
1793
/*------------------------------------------------------------------------*/
1794
1795
static void
1796
addPoint(UBiDi *pBiDi, int32_t pos, int32_t flag)
1797
  /* param pos:     position where to insert
1798
     param flag:    one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER
1799
  */
1800
0
{
1801
0
#define FIRSTALLOC  10
1802
0
    Point point;
1803
0
    InsertPoints * pInsertPoints=&(pBiDi->insertPoints);
1804
1805
0
    if (pInsertPoints->capacity == 0)
1806
0
    {
1807
0
        pInsertPoints->points=static_cast<Point *>(uprv_malloc(sizeof(Point)*FIRSTALLOC));
1808
0
        if (pInsertPoints->points == nullptr)
1809
0
        {
1810
0
            pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
1811
0
            return;
1812
0
        }
1813
0
        pInsertPoints->capacity=FIRSTALLOC;
1814
0
    }
1815
0
    if (pInsertPoints->size >= pInsertPoints->capacity) /* no room for new point */
1816
0
    {
1817
0
        Point * savePoints=pInsertPoints->points;
1818
0
        pInsertPoints->points=static_cast<Point *>(uprv_realloc(pInsertPoints->points,
1819
0
                                           pInsertPoints->capacity*2*sizeof(Point)));
1820
0
        if (pInsertPoints->points == nullptr)
1821
0
        {
1822
0
            pInsertPoints->points=savePoints;
1823
0
            pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
1824
0
            return;
1825
0
        }
1826
0
        else  pInsertPoints->capacity*=2;
1827
0
    }
1828
0
    point.pos=pos;
1829
0
    point.flag=flag;
1830
0
    pInsertPoints->points[pInsertPoints->size]=point;
1831
0
    pInsertPoints->size++;
1832
0
#undef FIRSTALLOC
1833
0
}
1834
1835
static void
1836
setLevelsOutsideIsolates(UBiDi *pBiDi, int32_t start, int32_t limit, UBiDiLevel level)
1837
553
{
1838
553
    DirProp *dirProps=pBiDi->dirProps, dirProp;
1839
553
    UBiDiLevel *levels=pBiDi->levels;
1840
553
    int32_t isolateCount=0, k;
1841
7.31k
    for(k=start; k<limit; k++) {
1842
6.75k
        dirProp=dirProps[k];
1843
6.75k
        if(dirProp==PDI)
1844
1.79k
            isolateCount--;
1845
6.75k
        if(isolateCount==0)
1846
1.50k
            levels[k]=level;
1847
6.75k
        if(dirProp==LRI || dirProp==RLI)
1848
1.79k
            isolateCount++;
1849
6.75k
    }
1850
553
}
1851
1852
/* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
1853
1854
/*
1855
 * This implementation of the (Wn) rules applies all rules in one pass.
1856
 * In order to do so, it needs a look-ahead of typically 1 character
1857
 * (except for W5: sequences of ET) and keeps track of changes
1858
 * in a rule Wp that affect a later Wq (p<q).
1859
 *
1860
 * The (Nn) and (In) rules are also performed in that same single loop,
1861
 * but effectively one iteration behind for white space.
1862
 *
1863
 * Since all implicit rules are performed in one step, it is not necessary
1864
 * to actually store the intermediate directional properties in dirProps[].
1865
 */
1866
1867
static void
1868
processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop,
1869
36.3k
                   int32_t start, int32_t limit) {
1870
36.3k
    uint8_t cell, oldStateSeq, actionSeq;
1871
36.3k
    const ImpTab * pImpTab=pLevState->pImpTab;
1872
36.3k
    const ImpAct * pImpAct=pLevState->pImpAct;
1873
36.3k
    UBiDiLevel * levels=pBiDi->levels;
1874
36.3k
    UBiDiLevel level, addLevel;
1875
36.3k
    InsertPoints * pInsertPoints;
1876
36.3k
    int32_t start0, k;
1877
1878
36.3k
    start0=start;                           /* save original start position */
1879
36.3k
    oldStateSeq = static_cast<uint8_t>(pLevState->state);
1880
36.3k
    cell=(*pImpTab)[oldStateSeq][_prop];
1881
36.3k
    pLevState->state=GET_STATE(cell);       /* isolate the new state */
1882
36.3k
    actionSeq=(*pImpAct)[GET_ACTION(cell)]; /* isolate the action */
1883
36.3k
    addLevel=(*pImpTab)[pLevState->state][IMPTABLEVELS_RES];
1884
1885
36.3k
    if(actionSeq) {
1886
3.29k
        switch(actionSeq) {
1887
1.82k
        case 1:                         /* init ON seq */
1888
1.82k
            pLevState->startON=start0;
1889
1.82k
            break;
1890
1891
1.10k
        case 2:                         /* prepend ON seq to current seq */
1892
1.10k
            start=pLevState->startON;
1893
1.10k
            break;
1894
1895
363
        case 3:                         /* EN/AN after R+ON */
1896
363
            level=pLevState->runLevel+1;
1897
363
            setLevelsOutsideIsolates(pBiDi, pLevState->startON, start0, level);
1898
363
            break;
1899
1900
0
        case 4:                         /* EN/AN before R for NUMBERS_SPECIAL */
1901
0
            level=pLevState->runLevel+2;
1902
0
            setLevelsOutsideIsolates(pBiDi, pLevState->startON, start0, level);
1903
0
            break;
1904
1905
0
        case 5:                         /* L or S after possible relevant EN/AN */
1906
            /* check if we had EN after R/AL */
1907
0
            if (pLevState->startL2EN >= 0) {
1908
0
                addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
1909
0
            }
1910
0
            pLevState->startL2EN=-1;  /* not within previous if since could also be -2 */
1911
            /* check if we had any relevant EN/AN after R/AL */
1912
0
            pInsertPoints=&(pBiDi->insertPoints);
1913
0
            if ((pInsertPoints->capacity == 0) ||
1914
0
                (pInsertPoints->size <= pInsertPoints->confirmed))
1915
0
            {
1916
                /* nothing, just clean up */
1917
0
                pLevState->lastStrongRTL=-1;
1918
                /* check if we have a pending conditional segment */
1919
0
                level=(*pImpTab)[oldStateSeq][IMPTABLEVELS_RES];
1920
0
                if ((level & 1) && (pLevState->startON > 0)) {  /* after ON */
1921
0
                    start=pLevState->startON;   /* reset to basic run level */
1922
0
                }
1923
0
                if (_prop == DirProp_S)                /* add LRM before S */
1924
0
                {
1925
0
                    addPoint(pBiDi, start0, LRM_BEFORE);
1926
0
                    pInsertPoints->confirmed=pInsertPoints->size;
1927
0
                }
1928
0
                break;
1929
0
            }
1930
            /* reset previous RTL cont to level for LTR text */
1931
0
            for (k=pLevState->lastStrongRTL+1; k<start0; k++)
1932
0
            {
1933
                /* reset odd level, leave runLevel+2 as is */
1934
0
                levels[k]=(levels[k] - 2) & ~1;
1935
0
            }
1936
            /* mark insert points as confirmed */
1937
0
            pInsertPoints->confirmed=pInsertPoints->size;
1938
0
            pLevState->lastStrongRTL=-1;
1939
0
            if (_prop == DirProp_S)            /* add LRM before S */
1940
0
            {
1941
0
                addPoint(pBiDi, start0, LRM_BEFORE);
1942
0
                pInsertPoints->confirmed=pInsertPoints->size;
1943
0
            }
1944
0
            break;
1945
1946
0
        case 6:                         /* R/AL after possible relevant EN/AN */
1947
            /* just clean up */
1948
0
            pInsertPoints=&(pBiDi->insertPoints);
1949
0
            if (pInsertPoints->capacity > 0)
1950
                /* remove all non confirmed insert points */
1951
0
                pInsertPoints->size=pInsertPoints->confirmed;
1952
0
            pLevState->startON=-1;
1953
0
            pLevState->startL2EN=-1;
1954
0
            pLevState->lastStrongRTL=limit - 1;
1955
0
            break;
1956
1957
0
        case 7:                         /* EN/AN after R/AL + possible cont */
1958
            /* check for real AN */
1959
0
            if ((_prop == DirProp_AN) && (pBiDi->dirProps[start0] == AN) &&
1960
0
                (pBiDi->reorderingMode!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))
1961
0
            {
1962
                /* real AN */
1963
0
                if (pLevState->startL2EN == -1) /* if no relevant EN already found */
1964
0
                {
1965
                    /* just note the righmost digit as a strong RTL */
1966
0
                    pLevState->lastStrongRTL=limit - 1;
1967
0
                    break;
1968
0
                }
1969
0
                if (pLevState->startL2EN >= 0)  /* after EN, no AN */
1970
0
                {
1971
0
                    addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
1972
0
                    pLevState->startL2EN=-2;
1973
0
                }
1974
                /* note AN */
1975
0
                addPoint(pBiDi, start0, LRM_BEFORE);
1976
0
                break;
1977
0
            }
1978
            /* if first EN/AN after R/AL */
1979
0
            if (pLevState->startL2EN == -1) {
1980
0
                pLevState->startL2EN=start0;
1981
0
            }
1982
0
            break;
1983
1984
0
        case 8:                         /* note location of latest R/AL */
1985
0
            pLevState->lastStrongRTL=limit - 1;
1986
0
            pLevState->startON=-1;
1987
0
            break;
1988
1989
0
        case 9:                         /* L after R+ON/EN/AN */
1990
            /* include possible adjacent number on the left */
1991
0
            for (k=start0-1; k>=0 && !(levels[k]&1); k--);
1992
0
            if(k>=0) {
1993
0
                addPoint(pBiDi, k, RLM_BEFORE);             /* add RLM before */
1994
0
                pInsertPoints=&(pBiDi->insertPoints);
1995
0
                pInsertPoints->confirmed=pInsertPoints->size;   /* confirm it */
1996
0
            }
1997
0
            pLevState->startON=start0;
1998
0
            break;
1999
2000
0
        case 10:                        /* AN after L */
2001
            /* AN numbers between L text on both sides may be trouble. */
2002
            /* tentatively bracket with LRMs; will be confirmed if followed by L */
2003
0
            addPoint(pBiDi, start0, LRM_BEFORE);    /* add LRM before */
2004
0
            addPoint(pBiDi, start0, LRM_AFTER);     /* add LRM after  */
2005
0
            break;
2006
2007
0
        case 11:                        /* R after L+ON/EN/AN */
2008
            /* false alert, infirm LRMs around previous AN */
2009
0
            pInsertPoints=&(pBiDi->insertPoints);
2010
0
            pInsertPoints->size=pInsertPoints->confirmed;
2011
0
            if (_prop == DirProp_S)            /* add RLM before S */
2012
0
            {
2013
0
                addPoint(pBiDi, start0, RLM_BEFORE);
2014
0
                pInsertPoints->confirmed=pInsertPoints->size;
2015
0
            }
2016
0
            break;
2017
2018
0
        case 12:                        /* L after L+ON/AN */
2019
0
            level=pLevState->runLevel + addLevel;
2020
0
            for(k=pLevState->startON; k<start0; k++) {
2021
0
                if (levels[k]<level)
2022
0
                    levels[k]=level;
2023
0
            }
2024
0
            pInsertPoints=&(pBiDi->insertPoints);
2025
0
            pInsertPoints->confirmed=pInsertPoints->size;   /* confirm inserts */
2026
0
            pLevState->startON=start0;
2027
0
            break;
2028
2029
0
        case 13:                        /* L after L+ON+EN/AN/ON */
2030
0
            level=pLevState->runLevel;
2031
0
            for(k=start0-1; k>=pLevState->startON; k--) {
2032
0
                if(levels[k]==level+3) {
2033
0
                    while(levels[k]==level+3) {
2034
0
                        levels[k--]-=2;
2035
0
                    }
2036
0
                    while(levels[k]==level) {
2037
0
                        k--;
2038
0
                    }
2039
0
                }
2040
0
                if(levels[k]==level+2) {
2041
0
                    levels[k]=level;
2042
0
                    continue;
2043
0
                }
2044
0
                levels[k]=level+1;
2045
0
            }
2046
0
            break;
2047
2048
0
        case 14:                        /* R after L+ON+EN/AN/ON */
2049
0
            level=pLevState->runLevel+1;
2050
0
            for(k=start0-1; k>=pLevState->startON; k--) {
2051
0
                if(levels[k]>level) {
2052
0
                    levels[k]-=2;
2053
0
                }
2054
0
            }
2055
0
            break;
2056
2057
0
        default:                        /* we should never get here */
2058
0
            UPRV_UNREACHABLE_EXIT;
2059
3.29k
        }
2060
3.29k
    }
2061
36.3k
    if((addLevel) || (start < start0)) {
2062
9.69k
        level=pLevState->runLevel + addLevel;
2063
9.69k
        if(start>=pLevState->runStart) {
2064
25.8k
            for(k=start; k<limit; k++) {
2065
16.3k
                levels[k]=level;
2066
16.3k
            }
2067
9.50k
        } else {
2068
190
            setLevelsOutsideIsolates(pBiDi, start, limit, level);
2069
190
        }
2070
9.69k
    }
2071
36.3k
}
2072
2073
/**
2074
 * Returns the directionality of the last strong character at the end of the prologue, if any.
2075
 * Requires prologue!=null.
2076
 */
2077
static DirProp
2078
0
lastL_R_AL(UBiDi *pBiDi) {
2079
0
    const char16_t *text=pBiDi->prologue;
2080
0
    int32_t length=pBiDi->proLength;
2081
0
    int32_t i;
2082
0
    UChar32 uchar;
2083
0
    DirProp dirProp;
2084
0
    for(i=length; i>0; ) {
2085
        /* i is decremented by U16_PREV */
2086
0
        U16_PREV(text, 0, i, uchar);
2087
0
        dirProp = static_cast<DirProp>(ubidi_getCustomizedClass(pBiDi, uchar));
2088
0
        if(dirProp==L) {
2089
0
            return DirProp_L;
2090
0
        }
2091
0
        if(dirProp==R || dirProp==AL) {
2092
0
            return DirProp_R;
2093
0
        }
2094
0
        if(dirProp==B) {
2095
0
            return DirProp_ON;
2096
0
        }
2097
0
    }
2098
0
    return DirProp_ON;
2099
0
}
2100
2101
/**
2102
 * Returns the directionality of the first strong character, or digit, in the epilogue, if any.
2103
 * Requires epilogue!=null.
2104
 */
2105
static DirProp
2106
0
firstL_R_AL_EN_AN(UBiDi *pBiDi) {
2107
0
    const char16_t *text=pBiDi->epilogue;
2108
0
    int32_t length=pBiDi->epiLength;
2109
0
    int32_t i;
2110
0
    UChar32 uchar;
2111
0
    DirProp dirProp;
2112
0
    for(i=0; i<length; ) {
2113
        /* i is incremented by U16_NEXT */
2114
0
        U16_NEXT(text, i, length, uchar);
2115
0
        dirProp = static_cast<DirProp>(ubidi_getCustomizedClass(pBiDi, uchar));
2116
0
        if(dirProp==L) {
2117
0
            return DirProp_L;
2118
0
        }
2119
0
        if(dirProp==R || dirProp==AL) {
2120
0
            return DirProp_R;
2121
0
        }
2122
0
        if(dirProp==EN) {
2123
0
            return DirProp_EN;
2124
0
        }
2125
0
        if(dirProp==AN) {
2126
0
            return DirProp_AN;
2127
0
        }
2128
0
    }
2129
0
    return DirProp_ON;
2130
0
}
2131
2132
static void
2133
resolveImplicitLevels(UBiDi *pBiDi,
2134
                      int32_t start, int32_t limit,
2135
12.0k
                      DirProp sor, DirProp eor) {
2136
12.0k
    const DirProp *dirProps=pBiDi->dirProps;
2137
12.0k
    DirProp dirProp;
2138
12.0k
    LevState levState;
2139
12.0k
    int32_t i, start1, start2;
2140
12.0k
    uint16_t oldStateImp, stateImp, actionImp;
2141
12.0k
    uint8_t gprop, resProp, cell;
2142
12.0k
    UBool inverseRTL;
2143
12.0k
    DirProp nextStrongProp=R;
2144
12.0k
    int32_t nextStrongPos=-1;
2145
2146
    /* check for RTL inverse BiDi mode */
2147
    /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to
2148
     * loop on the text characters from end to start.
2149
     * This would need a different properties state table (at least different
2150
     * actions) and different levels state tables (maybe very similar to the
2151
     * LTR corresponding ones.
2152
     */
2153
12.0k
    inverseRTL =
2154
12.0k
        static_cast<UBool>((start < pBiDi->lastArabicPos) && (GET_PARALEVEL(pBiDi, start) & 1) &&
2155
1.33k
                           (pBiDi->reorderingMode == UBIDI_REORDER_INVERSE_LIKE_DIRECT ||
2156
1.33k
                            pBiDi->reorderingMode == UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL));
2157
2158
    /* initialize for property and levels state tables */
2159
12.0k
    levState.startL2EN=-1;              /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
2160
12.0k
    levState.lastStrongRTL=-1;          /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
2161
12.0k
    levState.runStart=start;
2162
12.0k
    levState.runLevel=pBiDi->levels[start];
2163
12.0k
    levState.pImpTab = static_cast<const ImpTab*>(((pBiDi->pImpTabPair)->pImpTab)[levState.runLevel & 1]);
2164
12.0k
    levState.pImpAct = static_cast<const ImpAct*>(((pBiDi->pImpTabPair)->pImpAct)[levState.runLevel & 1]);
2165
12.0k
    if(start==0 && pBiDi->proLength>0) {
2166
0
        DirProp lastStrong=lastL_R_AL(pBiDi);
2167
0
        if(lastStrong!=DirProp_ON) {
2168
0
            sor=lastStrong;
2169
0
        }
2170
0
    }
2171
    /* The isolates[] entries contain enough information to
2172
       resume the bidi algorithm in the same state as it was
2173
       when it was interrupted by an isolate sequence. */
2174
12.0k
    if(dirProps[start]==PDI  && pBiDi->isolateCount >= 0) {
2175
627
        levState.startON=pBiDi->isolates[pBiDi->isolateCount].startON;
2176
627
        start1=pBiDi->isolates[pBiDi->isolateCount].start1;
2177
627
        stateImp=pBiDi->isolates[pBiDi->isolateCount].stateImp;
2178
627
        levState.state=pBiDi->isolates[pBiDi->isolateCount].state;
2179
627
        pBiDi->isolateCount--;
2180
11.3k
    } else {
2181
11.3k
        levState.startON=-1;
2182
11.3k
        start1=start;
2183
11.3k
        if(dirProps[start]==NSM)
2184
128
            stateImp = 1 + sor;
2185
11.2k
        else
2186
11.2k
            stateImp=0;
2187
11.3k
        levState.state=0;
2188
11.3k
        processPropertySeq(pBiDi, &levState, sor, start, start);
2189
11.3k
    }
2190
12.0k
    start2=start;                       /* to make Java compiler happy */
2191
2192
75.2k
    for(i=start; i<=limit; i++) {
2193
71.0k
        if(i>=limit) {
2194
12.0k
            int32_t k;
2195
15.2k
            for(k=limit-1; k>start&&(DIRPROP_FLAG(dirProps[k])&MASK_BN_EXPLICIT); k--);
2196
12.0k
            dirProp=dirProps[k];
2197
12.0k
            if(dirProp==LRI || dirProp==RLI)
2198
7.82k
                break;      /* no forced closing for sequence ending with LRI/RLI */
2199
4.19k
            gprop=eor;
2200
59.0k
        } else {
2201
59.0k
            DirProp prop, prop1;
2202
59.0k
            prop=dirProps[i];
2203
59.0k
            if(prop==B) {
2204
6.83k
                pBiDi->isolateCount=-1; /* current isolates stack entry == none */
2205
6.83k
            }
2206
59.0k
            if(inverseRTL) {
2207
0
                if(prop==AL) {
2208
                    /* AL before EN does not make it AN */
2209
0
                    prop=R;
2210
0
                } else if(prop==EN) {
2211
0
                    if(nextStrongPos<=i) {
2212
                        /* look for next strong char (L/R/AL) */
2213
0
                        int32_t j;
2214
0
                        nextStrongProp=R;   /* set default */
2215
0
                        nextStrongPos=limit;
2216
0
                        for(j=i+1; j<limit; j++) {
2217
0
                            prop1=dirProps[j];
2218
0
                            if(prop1==L || prop1==R || prop1==AL) {
2219
0
                                nextStrongProp=prop1;
2220
0
                                nextStrongPos=j;
2221
0
                                break;
2222
0
                            }
2223
0
                        }
2224
0
                    }
2225
0
                    if(nextStrongProp==AL) {
2226
0
                        prop=AN;
2227
0
                    }
2228
0
                }
2229
0
            }
2230
59.0k
            gprop=groupProp[prop];
2231
59.0k
        }
2232
63.2k
        oldStateImp=stateImp;
2233
63.2k
        cell=impTabProps[oldStateImp][gprop];
2234
63.2k
        stateImp=GET_STATEPROPS(cell);      /* isolate the new state */
2235
63.2k
        actionImp=GET_ACTIONPROPS(cell);    /* isolate the action */
2236
63.2k
        if((i==limit) && (actionImp==0)) {
2237
            /* there is an unprocessed sequence if its property == eor   */
2238
1.09k
            actionImp=1;                    /* process the last sequence */
2239
1.09k
        }
2240
63.2k
        if(actionImp) {
2241
20.7k
            resProp=impTabProps[oldStateImp][IMPTABPROPS_RES];
2242
20.7k
            switch(actionImp) {
2243
20.1k
            case 1:             /* process current seq1, init new seq1 */
2244
20.1k
                processPropertySeq(pBiDi, &levState, resProp, start1, i);
2245
20.1k
                start1=i;
2246
20.1k
                break;
2247
259
            case 2:             /* init new seq2 */
2248
259
                start2=i;
2249
259
                break;
2250
125
            case 3:             /* process seq1, process seq2, init new seq1 */
2251
125
                processPropertySeq(pBiDi, &levState, resProp, start1, start2);
2252
125
                processPropertySeq(pBiDi, &levState, DirProp_ON, start2, i);
2253
125
                start1=i;
2254
125
                break;
2255
200
            case 4:             /* process seq1, set seq1=seq2, init new seq2 */
2256
200
                processPropertySeq(pBiDi, &levState, resProp, start1, start2);
2257
200
                start1=start2;
2258
200
                start2=i;
2259
200
                break;
2260
0
            default:            /* we should never get here */
2261
0
                UPRV_UNREACHABLE_EXIT;
2262
20.7k
            }
2263
20.7k
        }
2264
63.2k
    }
2265
2266
    /* flush possible pending sequence, e.g. ON */
2267
12.0k
    if(limit==pBiDi->length && pBiDi->epiLength>0) {
2268
0
        DirProp firstStrong=firstL_R_AL_EN_AN(pBiDi);
2269
0
        if(firstStrong!=DirProp_ON) {
2270
0
            eor=firstStrong;
2271
0
        }
2272
0
    }
2273
2274
    /* look for the last char not a BN or LRE/RLE/LRO/RLO/PDF */
2275
15.2k
    for(i=limit-1; i>start&&(DIRPROP_FLAG(dirProps[i])&MASK_BN_EXPLICIT); i--);
2276
12.0k
    dirProp=dirProps[i];
2277
12.0k
    if((dirProp==LRI || dirProp==RLI) && limit<pBiDi->length) {
2278
7.64k
        pBiDi->isolateCount++;
2279
7.64k
        pBiDi->isolates[pBiDi->isolateCount].stateImp=stateImp;
2280
7.64k
        pBiDi->isolates[pBiDi->isolateCount].state=levState.state;
2281
7.64k
        pBiDi->isolates[pBiDi->isolateCount].start1=start1;
2282
7.64k
        pBiDi->isolates[pBiDi->isolateCount].startON=levState.startON;
2283
7.64k
    }
2284
4.37k
    else
2285
4.37k
        processPropertySeq(pBiDi, &levState, eor, limit, limit);
2286
12.0k
}
2287
2288
/* perform (L1) and (X9) ---------------------------------------------------- */
2289
2290
/*
2291
 * Reset the embedding levels for some non-graphic characters (L1).
2292
 * This function also sets appropriate levels for BN, and
2293
 * explicit embedding types that are supposed to have been removed
2294
 * from the paragraph in (X9).
2295
 */
2296
static void
2297
1.94k
adjustWSLevels(UBiDi *pBiDi) {
2298
1.94k
    const DirProp *dirProps=pBiDi->dirProps;
2299
1.94k
    UBiDiLevel *levels=pBiDi->levels;
2300
1.94k
    int32_t i;
2301
2302
1.94k
    if(pBiDi->flags&MASK_WS) {
2303
1.38k
        UBool orderParagraphsLTR=pBiDi->orderParagraphsLTR;
2304
1.38k
        Flags flag;
2305
2306
1.38k
        i=pBiDi->trailingWSStart;
2307
6.86k
        while(i>0) {
2308
            /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
2309
16.9k
            while(i>0 && (flag=DIRPROP_FLAG(dirProps[--i]))&MASK_WS) {
2310
11.5k
                if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
2311
0
                    levels[i]=0;
2312
11.5k
                } else {
2313
11.5k
                    levels[i]=GET_PARALEVEL(pBiDi, i);
2314
11.5k
                }
2315
11.5k
            }
2316
2317
            /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
2318
            /* here, i+1 is guaranteed to be <length */
2319
35.2k
            while(i>0) {
2320
33.9k
                flag=DIRPROP_FLAG(dirProps[--i]);
2321
33.9k
                if(flag&MASK_BN_EXPLICIT) {
2322
5.96k
                    levels[i]=levels[i+1];
2323
27.9k
                } else if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
2324
0
                    levels[i]=0;
2325
0
                    break;
2326
27.9k
                } else if(flag&MASK_B_S) {
2327
4.19k
                    levels[i]=GET_PARALEVEL(pBiDi, i);
2328
4.19k
                    break;
2329
4.19k
                }
2330
33.9k
            }
2331
5.48k
        }
2332
1.38k
    }
2333
1.94k
}
2334
2335
U_CAPI void U_EXPORT2
2336
ubidi_setContext(UBiDi *pBiDi,
2337
                 const char16_t *prologue, int32_t proLength,
2338
                 const char16_t *epilogue, int32_t epiLength,
2339
0
                 UErrorCode *pErrorCode) {
2340
    /* check the argument values */
2341
0
    RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2342
0
    if(pBiDi==nullptr || proLength<-1 || epiLength<-1 ||
2343
0
       (prologue==nullptr && proLength!=0) || (epilogue==nullptr && epiLength!=0)) {
2344
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2345
0
        return;
2346
0
    }
2347
2348
0
    if(proLength==-1) {
2349
0
        pBiDi->proLength=u_strlen(prologue);
2350
0
    } else {
2351
0
        pBiDi->proLength=proLength;
2352
0
    }
2353
0
    if(epiLength==-1) {
2354
0
        pBiDi->epiLength=u_strlen(epilogue);
2355
0
    } else {
2356
0
        pBiDi->epiLength=epiLength;
2357
0
    }
2358
0
    pBiDi->prologue=prologue;
2359
0
    pBiDi->epilogue=epilogue;
2360
0
}
2361
2362
static void
2363
2.87k
setParaSuccess(UBiDi *pBiDi) {
2364
2.87k
    pBiDi->proLength=0;                 /* forget the last context */
2365
2.87k
    pBiDi->epiLength=0;
2366
2.87k
    pBiDi->pParaBiDi=pBiDi;             /* mark successful setPara */
2367
2.87k
}
2368
2369
0
#define BIDI_MIN(x, y)   ((x)<(y) ? (x) : (y))
2370
0
#define BIDI_ABS(x)      ((x)>=0  ? (x) : (-(x)))
2371
2372
static void
2373
setParaRunsOnly(UBiDi *pBiDi, const char16_t *text, int32_t length,
2374
0
                UBiDiLevel paraLevel, UErrorCode *pErrorCode) {
2375
0
    int32_t *runsOnlyMemory = nullptr;
2376
0
    int32_t *visualMap;
2377
0
    char16_t *visualText;
2378
0
    int32_t saveLength, saveTrailingWSStart;
2379
0
    const UBiDiLevel *levels;
2380
0
    UBiDiLevel *saveLevels;
2381
0
    UBiDiDirection saveDirection;
2382
0
    UBool saveMayAllocateText;
2383
0
    Run *runs;
2384
0
    int32_t visualLength, i, j, visualStart, logicalStart,
2385
0
            runCount, runLength, addedRuns, insertRemove,
2386
0
            start, limit, step, indexOddBit, logicalPos,
2387
0
            index0, index1;
2388
0
    uint32_t saveOptions;
2389
2390
0
    pBiDi->reorderingMode=UBIDI_REORDER_DEFAULT;
2391
0
    if(length==0) {
2392
0
        ubidi_setPara(pBiDi, text, length, paraLevel, nullptr, pErrorCode);
2393
0
        goto cleanup3;
2394
0
    }
2395
    /* obtain memory for mapping table and visual text */
2396
0
    runsOnlyMemory=static_cast<int32_t *>(uprv_malloc(length*(sizeof(int32_t)+sizeof(char16_t)+sizeof(UBiDiLevel))));
2397
0
    if(runsOnlyMemory==nullptr) {
2398
0
        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2399
0
        goto cleanup3;
2400
0
    }
2401
0
    visualMap=runsOnlyMemory;
2402
0
    visualText = reinterpret_cast<char16_t*>(&visualMap[length]);
2403
0
    saveLevels = reinterpret_cast<UBiDiLevel*>(&visualText[length]);
2404
0
    saveOptions=pBiDi->reorderingOptions;
2405
0
    if(saveOptions & UBIDI_OPTION_INSERT_MARKS) {
2406
0
        pBiDi->reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
2407
0
        pBiDi->reorderingOptions|=UBIDI_OPTION_REMOVE_CONTROLS;
2408
0
    }
2409
0
    paraLevel&=1;                       /* accept only 0 or 1 */
2410
0
    ubidi_setPara(pBiDi, text, length, paraLevel, nullptr, pErrorCode);
2411
0
    if(U_FAILURE(*pErrorCode)) {
2412
0
        goto cleanup3;
2413
0
    }
2414
    /* we cannot access directly pBiDi->levels since it is not yet set if
2415
     * direction is not MIXED
2416
     */
2417
0
    levels=ubidi_getLevels(pBiDi, pErrorCode);
2418
0
    uprv_memcpy(saveLevels, levels, (size_t)pBiDi->length*sizeof(UBiDiLevel));
2419
0
    saveTrailingWSStart=pBiDi->trailingWSStart;
2420
0
    saveLength=pBiDi->length;
2421
0
    saveDirection=pBiDi->direction;
2422
2423
    /* FOOD FOR THOUGHT: instead of writing the visual text, we could use
2424
     * the visual map and the dirProps array to drive the second call
2425
     * to ubidi_setPara (but must make provision for possible removal of
2426
     * BiDi controls.  Alternatively, only use the dirProps array via
2427
     * customized classifier callback.
2428
     */
2429
0
    visualLength=ubidi_writeReordered(pBiDi, visualText, length,
2430
0
                                      UBIDI_DO_MIRRORING, pErrorCode);
2431
0
    ubidi_getVisualMap(pBiDi, visualMap, pErrorCode);
2432
0
    if(U_FAILURE(*pErrorCode)) {
2433
0
        goto cleanup2;
2434
0
    }
2435
0
    pBiDi->reorderingOptions=saveOptions;
2436
2437
0
    pBiDi->reorderingMode=UBIDI_REORDER_INVERSE_LIKE_DIRECT;
2438
0
    paraLevel^=1;
2439
    /* Because what we did with reorderingOptions, visualText may be shorter
2440
     * than the original text. But we don't want the levels memory to be
2441
     * reallocated shorter than the original length, since we need to restore
2442
     * the levels as after the first call to ubidi_setpara() before returning.
2443
     * We will force mayAllocateText to false before the second call to
2444
     * ubidi_setpara(), and will restore it afterwards.
2445
     */
2446
0
    saveMayAllocateText=pBiDi->mayAllocateText;
2447
0
    pBiDi->mayAllocateText=false;
2448
0
    ubidi_setPara(pBiDi, visualText, visualLength, paraLevel, nullptr, pErrorCode);
2449
0
    pBiDi->mayAllocateText=saveMayAllocateText;
2450
0
    ubidi_getRuns(pBiDi, pErrorCode);
2451
0
    if(U_FAILURE(*pErrorCode)) {
2452
0
        goto cleanup1;
2453
0
    }
2454
    /* check if some runs must be split, count how many splits */
2455
0
    addedRuns=0;
2456
0
    runCount=pBiDi->runCount;
2457
0
    runs=pBiDi->runs;
2458
0
    visualStart=0;
2459
0
    for(i=0; i<runCount; i++, visualStart+=runLength) {
2460
0
        runLength=runs[i].visualLimit-visualStart;
2461
0
        if(runLength<2) {
2462
0
            continue;
2463
0
        }
2464
0
        logicalStart=GET_INDEX(runs[i].logicalStart);
2465
0
        for(j=logicalStart+1; j<logicalStart+runLength; j++) {
2466
0
            index0=visualMap[j];
2467
0
            index1=visualMap[j-1];
2468
0
            if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
2469
0
                addedRuns++;
2470
0
            }
2471
0
        }
2472
0
    }
2473
0
    if(addedRuns) {
2474
0
        if(getRunsMemory(pBiDi, runCount+addedRuns)) {
2475
0
            if(runCount==1) {
2476
                /* because we switch from UBiDi.simpleRuns to UBiDi.runs */
2477
0
                pBiDi->runsMemory[0]=runs[0];
2478
0
            }
2479
0
            runs=pBiDi->runs=pBiDi->runsMemory;
2480
0
            pBiDi->runCount+=addedRuns;
2481
0
        } else {
2482
0
            *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
2483
0
            goto cleanup1;
2484
0
        }
2485
0
    }
2486
    /* split runs which are not consecutive in source text */
2487
0
    for(i=runCount-1; i>=0; i--) {
2488
0
        runLength= i==0 ? runs[0].visualLimit :
2489
0
                          runs[i].visualLimit-runs[i-1].visualLimit;
2490
0
        logicalStart=runs[i].logicalStart;
2491
0
        indexOddBit=GET_ODD_BIT(logicalStart);
2492
0
        logicalStart=GET_INDEX(logicalStart);
2493
0
        if(runLength<2) {
2494
0
            if(addedRuns) {
2495
0
                runs[i+addedRuns]=runs[i];
2496
0
            }
2497
0
            logicalPos=visualMap[logicalStart];
2498
0
            runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
2499
0
                                            saveLevels[logicalPos]^indexOddBit);
2500
0
            continue;
2501
0
        }
2502
0
        if(indexOddBit) {
2503
0
            start=logicalStart;
2504
0
            limit=logicalStart+runLength-1;
2505
0
            step=1;
2506
0
        } else {
2507
0
            start=logicalStart+runLength-1;
2508
0
            limit=logicalStart;
2509
0
            step=-1;
2510
0
        }
2511
0
        for(j=start; j!=limit; j+=step) {
2512
0
            index0=visualMap[j];
2513
0
            index1=visualMap[j+step];
2514
0
            if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
2515
0
                logicalPos=BIDI_MIN(visualMap[start], index0);
2516
0
                runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
2517
0
                                            saveLevels[logicalPos]^indexOddBit);
2518
0
                runs[i+addedRuns].visualLimit=runs[i].visualLimit;
2519
0
                runs[i].visualLimit-=BIDI_ABS(j-start)+1;
2520
0
                insertRemove=runs[i].insertRemove&(LRM_AFTER|RLM_AFTER);
2521
0
                runs[i+addedRuns].insertRemove=insertRemove;
2522
0
                runs[i].insertRemove&=~insertRemove;
2523
0
                start=j+step;
2524
0
                addedRuns--;
2525
0
            }
2526
0
        }
2527
0
        if(addedRuns) {
2528
0
            runs[i+addedRuns]=runs[i];
2529
0
        }
2530
0
        logicalPos=BIDI_MIN(visualMap[start], visualMap[limit]);
2531
0
        runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
2532
0
                                            saveLevels[logicalPos]^indexOddBit);
2533
0
    }
2534
2535
0
  cleanup1:
2536
    /* restore initial paraLevel */
2537
0
    pBiDi->paraLevel^=1;
2538
0
  cleanup2:
2539
    /* restore real text */
2540
0
    pBiDi->text=text;
2541
0
    pBiDi->length=saveLength;
2542
0
    pBiDi->originalLength=length;
2543
0
    pBiDi->direction=saveDirection;
2544
    /* the saved levels should never excess levelsSize, but we check anyway */
2545
0
    if(saveLength>pBiDi->levelsSize) {
2546
0
        saveLength=pBiDi->levelsSize;
2547
0
    }
2548
0
    uprv_memcpy(pBiDi->levels, saveLevels, (size_t)saveLength*sizeof(UBiDiLevel));
2549
0
    pBiDi->trailingWSStart=saveTrailingWSStart;
2550
0
    if(pBiDi->runCount>1) {
2551
0
        pBiDi->direction=UBIDI_MIXED;
2552
0
    }
2553
0
  cleanup3:
2554
    /* free memory for mapping table and visual text */
2555
0
    uprv_free(runsOnlyMemory);
2556
2557
0
    pBiDi->reorderingMode=UBIDI_REORDER_RUNS_ONLY;
2558
0
}
2559
2560
/* ubidi_setPara ------------------------------------------------------------ */
2561
2562
U_CAPI void U_EXPORT2
2563
ubidi_setPara(UBiDi *pBiDi, const char16_t *text, int32_t length,
2564
              UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
2565
2.89k
              UErrorCode *pErrorCode) {
2566
2.89k
    UBiDiDirection direction;
2567
2.89k
    DirProp *dirProps;
2568
2569
    /* check the argument values */
2570
2.89k
    RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2571
2.89k
    if(pBiDi==nullptr || text==nullptr || length<-1 ||
2572
2.89k
       (paraLevel>UBIDI_MAX_EXPLICIT_LEVEL && paraLevel<UBIDI_DEFAULT_LTR)) {
2573
29
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2574
29
        return;
2575
29
    }
2576
2577
2.87k
    if(length==-1) {
2578
0
        length=u_strlen(text);
2579
0
    }
2580
2581
    /* special treatment for RUNS_ONLY mode */
2582
2.87k
    if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) {
2583
0
        setParaRunsOnly(pBiDi, text, length, paraLevel, pErrorCode);
2584
0
        return;
2585
0
    }
2586
2587
    /* initialize the UBiDi structure */
2588
2.87k
    pBiDi->pParaBiDi=nullptr;          /* mark unfinished setPara */
2589
2.87k
    pBiDi->text=text;
2590
2.87k
    pBiDi->length=pBiDi->originalLength=pBiDi->resultLength=length;
2591
2.87k
    pBiDi->paraLevel=paraLevel;
2592
2.87k
    pBiDi->direction=(UBiDiDirection)(paraLevel&1);
2593
2.87k
    pBiDi->paraCount=1;
2594
2595
2.87k
    pBiDi->dirProps=nullptr;
2596
2.87k
    pBiDi->levels=nullptr;
2597
2.87k
    pBiDi->runs=nullptr;
2598
2.87k
    pBiDi->insertPoints.size=0;         /* clean up from last call */
2599
2.87k
    pBiDi->insertPoints.confirmed=0;    /* clean up from last call */
2600
2601
    /*
2602
     * Save the original paraLevel if contextual; otherwise, set to 0.
2603
     */
2604
2.87k
    pBiDi->defaultParaLevel=IS_DEFAULT_LEVEL(paraLevel);
2605
2606
2.87k
    if(length==0) {
2607
        /*
2608
         * For an empty paragraph, create a UBiDi object with the paraLevel and
2609
         * the flags and the direction set but without allocating zero-length arrays.
2610
         * There is nothing more to do.
2611
         */
2612
30
        if(IS_DEFAULT_LEVEL(paraLevel)) {
2613
3
            pBiDi->paraLevel&=1;
2614
3
            pBiDi->defaultParaLevel=0;
2615
3
        }
2616
30
        pBiDi->flags=DIRPROP_FLAG_LR(paraLevel);
2617
30
        pBiDi->runCount=0;
2618
30
        pBiDi->paraCount=0;
2619
30
        setParaSuccess(pBiDi);          /* mark successful setPara */
2620
30
        return;
2621
30
    }
2622
2623
2.84k
    pBiDi->runCount=-1;
2624
2625
    /* allocate paras memory */
2626
2.84k
    if(pBiDi->parasMemory)
2627
0
        pBiDi->paras=pBiDi->parasMemory;
2628
2.84k
    else
2629
2.84k
        pBiDi->paras=pBiDi->simpleParas;
2630
2631
    /*
2632
     * Get the directional properties,
2633
     * the flags bit-set, and
2634
     * determine the paragraph level if necessary.
2635
     */
2636
2.84k
    if(getDirPropsMemory(pBiDi, length)) {
2637
2.84k
        pBiDi->dirProps=pBiDi->dirPropsMemory;
2638
2.84k
        if(!getDirProps(pBiDi)) {
2639
0
            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2640
0
            return;
2641
0
        }
2642
2.84k
    } else {
2643
0
        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2644
0
        return;
2645
0
    }
2646
2.84k
    dirProps=pBiDi->dirProps;
2647
    /* the processed length may have changed if UBIDI_OPTION_STREAMING */
2648
2.84k
    length= pBiDi->length;
2649
2.84k
    pBiDi->trailingWSStart=length;  /* the levels[] will reflect the WS run */
2650
2651
    /* are explicit levels specified? */
2652
2.84k
    if(embeddingLevels==nullptr) {
2653
        /* no: determine explicit levels according to the (Xn) rules */\
2654
2.84k
        if(getLevelsMemory(pBiDi, length)) {
2655
2.84k
            pBiDi->levels=pBiDi->levelsMemory;
2656
2.84k
            direction=resolveExplicitLevels(pBiDi, pErrorCode);
2657
2.84k
            if(U_FAILURE(*pErrorCode)) {
2658
0
                return;
2659
0
            }
2660
2.84k
        } else {
2661
0
            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2662
0
            return;
2663
0
        }
2664
2.84k
    } else {
2665
        /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */
2666
0
        pBiDi->levels=embeddingLevels;
2667
0
        direction=checkExplicitLevels(pBiDi, pErrorCode);
2668
0
        if(U_FAILURE(*pErrorCode)) {
2669
0
            return;
2670
0
        }
2671
0
    }
2672
2673
    /* allocate isolate memory */
2674
2.84k
    if(pBiDi->isolateCount<=SIMPLE_ISOLATES_COUNT)
2675
2.60k
        pBiDi->isolates=pBiDi->simpleIsolates;
2676
231
    else
2677
231
        if((int32_t)(pBiDi->isolateCount*sizeof(Isolate))<=pBiDi->isolatesSize)
2678
0
            pBiDi->isolates=pBiDi->isolatesMemory;
2679
231
        else {
2680
231
            if(getInitialIsolatesMemory(pBiDi, pBiDi->isolateCount)) {
2681
231
                pBiDi->isolates=pBiDi->isolatesMemory;
2682
231
            } else {
2683
0
                *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2684
0
                return;
2685
0
            }
2686
231
        }
2687
2.84k
    pBiDi->isolateCount=-1;             /* current isolates stack entry == none */
2688
2689
    /*
2690
     * The steps after (X9) in the UBiDi algorithm are performed only if
2691
     * the paragraph text has mixed directionality!
2692
     */
2693
2.84k
    pBiDi->direction=direction;
2694
2.84k
    switch(direction) {
2695
413
    case UBIDI_LTR:
2696
        /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
2697
413
        pBiDi->trailingWSStart=0;
2698
413
        break;
2699
481
    case UBIDI_RTL:
2700
        /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
2701
481
        pBiDi->trailingWSStart=0;
2702
481
        break;
2703
1.94k
    default:
2704
        /*
2705
         *  Choose the right implicit state table
2706
         */
2707
1.94k
        switch(pBiDi->reorderingMode) {
2708
1.55k
        case UBIDI_REORDER_DEFAULT:
2709
1.55k
            pBiDi->pImpTabPair=&impTab_DEFAULT;
2710
1.55k
            break;
2711
0
        case UBIDI_REORDER_NUMBERS_SPECIAL:
2712
0
            pBiDi->pImpTabPair=&impTab_NUMBERS_SPECIAL;
2713
0
            break;
2714
0
        case UBIDI_REORDER_GROUP_NUMBERS_WITH_R:
2715
0
            pBiDi->pImpTabPair=&impTab_GROUP_NUMBERS_WITH_R;
2716
0
            break;
2717
390
        case UBIDI_REORDER_INVERSE_NUMBERS_AS_L:
2718
390
            pBiDi->pImpTabPair=&impTab_INVERSE_NUMBERS_AS_L;
2719
390
            break;
2720
0
        case UBIDI_REORDER_INVERSE_LIKE_DIRECT:
2721
0
            if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
2722
0
                pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS;
2723
0
            } else {
2724
0
                pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT;
2725
0
            }
2726
0
            break;
2727
0
        case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL:
2728
0
            if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
2729
0
                pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS;
2730
0
            } else {
2731
0
                pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL;
2732
0
            }
2733
0
            break;
2734
0
        default:
2735
            /* we should never get here */
2736
0
            UPRV_UNREACHABLE_EXIT;
2737
1.94k
        }
2738
        /*
2739
         * If there are no external levels specified and there
2740
         * are no significant explicit level codes in the text,
2741
         * then we can treat the entire paragraph as one run.
2742
         * Otherwise, we need to perform the following rules on runs of
2743
         * the text with the same embedding levels. (X10)
2744
         * "Significant" explicit level codes are ones that actually
2745
         * affect non-BN characters.
2746
         * Examples for "insignificant" ones are empty embeddings
2747
         * LRE-PDF, LRE-RLE-PDF-PDF, etc.
2748
         */
2749
1.94k
        if(embeddingLevels==nullptr && pBiDi->paraCount<=1 &&
2750
1.51k
                                   !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) {
2751
789
            resolveImplicitLevels(pBiDi, 0, length,
2752
789
                                    GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, 0)),
2753
789
                                    GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, length-1)));
2754
1.15k
        } else {
2755
            /* sor, eor: start and end types of same-level-run */
2756
1.15k
            UBiDiLevel *levels=pBiDi->levels;
2757
1.15k
            int32_t start, limit=0;
2758
1.15k
            UBiDiLevel level, nextLevel;
2759
1.15k
            DirProp sor, eor;
2760
2761
            /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
2762
1.15k
            level=GET_PARALEVEL(pBiDi, 0);
2763
1.15k
            nextLevel=levels[0];
2764
1.15k
            if(level<nextLevel) {
2765
0
                eor=GET_LR_FROM_LEVEL(nextLevel);
2766
1.15k
            } else {
2767
1.15k
                eor=GET_LR_FROM_LEVEL(level);
2768
1.15k
            }
2769
2770
11.9k
            do {
2771
                /* determine start and limit of the run (end points just behind the run) */
2772
2773
                /* the values for this run's start are the same as for the previous run's end */
2774
11.9k
                start=limit;
2775
11.9k
                level=nextLevel;
2776
11.9k
                if((start>0) && (dirProps[start-1]==B)) {
2777
                    /* except if this is a new paragraph, then set sor = para level */
2778
735
                    sor=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, start));
2779
11.2k
                } else {
2780
11.2k
                    sor=eor;
2781
11.2k
                }
2782
2783
                /* search for the limit of this run */
2784
43.7k
                while((++limit<length) &&
2785
42.5k
                      ((levels[limit]==level) ||
2786
31.7k
                       (DIRPROP_FLAG(dirProps[limit])&MASK_BN_EXPLICIT))) {}
2787
2788
                /* get the correct level of the next run */
2789
11.9k
                if(limit<length) {
2790
10.7k
                    nextLevel=levels[limit];
2791
10.7k
                } else {
2792
1.15k
                    nextLevel=GET_PARALEVEL(pBiDi, length-1);
2793
1.15k
                }
2794
2795
                /* determine eor from max(level, nextLevel); sor is last run's eor */
2796
11.9k
                if(NO_OVERRIDE(level)<NO_OVERRIDE(nextLevel)) {
2797
8.71k
                    eor=GET_LR_FROM_LEVEL(nextLevel);
2798
8.71k
                } else {
2799
3.24k
                    eor=GET_LR_FROM_LEVEL(level);
2800
3.24k
                }
2801
2802
                /* if the run consists of overridden directional types, then there
2803
                   are no implicit types to be resolved */
2804
11.9k
                if(!(level&UBIDI_LEVEL_OVERRIDE)) {
2805
11.2k
                    resolveImplicitLevels(pBiDi, start, limit, sor, eor);
2806
11.2k
                } else {
2807
                    /* remove the UBIDI_LEVEL_OVERRIDE flags */
2808
6.03k
                    do {
2809
6.03k
                        levels[start++]&=~UBIDI_LEVEL_OVERRIDE;
2810
6.03k
                    } while(start<limit);
2811
725
                }
2812
11.9k
            } while(limit<length);
2813
1.15k
        }
2814
        /* check if we got any memory shortage while adding insert points */
2815
1.94k
        if (U_FAILURE(pBiDi->insertPoints.errorCode))
2816
0
        {
2817
0
            *pErrorCode=pBiDi->insertPoints.errorCode;
2818
0
            return;
2819
0
        }
2820
        /* reset the embedding levels for some non-graphic characters (L1), (X9) */
2821
1.94k
        adjustWSLevels(pBiDi);
2822
1.94k
        break;
2823
2.84k
    }
2824
    /* add RLM for inverse Bidi with contextual orientation resolving
2825
     * to RTL which would not round-trip otherwise
2826
     */
2827
2.84k
    if((pBiDi->defaultParaLevel>0) &&
2828
918
       (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) &&
2829
0
       ((pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT) ||
2830
0
        (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) {
2831
0
        int32_t i, j, start, last;
2832
0
        UBiDiLevel level;
2833
0
        DirProp dirProp;
2834
0
        for(i=0; i<pBiDi->paraCount; i++) {
2835
0
            last=(pBiDi->paras[i].limit)-1;
2836
0
            level= static_cast<UBiDiLevel>(pBiDi->paras[i].level);
2837
0
            if(level==0)
2838
0
                continue;           /* LTR paragraph */
2839
0
            start= i==0 ? 0 : pBiDi->paras[i-1].limit;
2840
0
            for(j=last; j>=start; j--) {
2841
0
                dirProp=dirProps[j];
2842
0
                if(dirProp==L) {
2843
0
                    if(j<last) {
2844
0
                        while(dirProps[last]==B) {
2845
0
                            last--;
2846
0
                        }
2847
0
                    }
2848
0
                    addPoint(pBiDi, last, RLM_BEFORE);
2849
0
                    break;
2850
0
                }
2851
0
                if(DIRPROP_FLAG(dirProp) & MASK_R_AL) {
2852
0
                    break;
2853
0
                }
2854
0
            }
2855
0
        }
2856
0
    }
2857
2858
2.84k
    if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
2859
0
        pBiDi->resultLength -= pBiDi->controlCount;
2860
2.84k
    } else {
2861
2.84k
        pBiDi->resultLength += pBiDi->insertPoints.size;
2862
2.84k
    }
2863
2.84k
    setParaSuccess(pBiDi);              /* mark successful setPara */
2864
2.84k
}
2865
2866
U_CAPI void U_EXPORT2
2867
0
ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR) {
2868
0
    if(pBiDi!=nullptr) {
2869
0
        pBiDi->orderParagraphsLTR=orderParagraphsLTR;
2870
0
    }
2871
0
}
2872
2873
U_CAPI UBool U_EXPORT2
2874
0
ubidi_isOrderParagraphsLTR(UBiDi *pBiDi) {
2875
0
    if(pBiDi!=nullptr) {
2876
0
        return pBiDi->orderParagraphsLTR;
2877
0
    } else {
2878
0
        return false;
2879
0
    }
2880
0
}
2881
2882
U_CAPI UBiDiDirection U_EXPORT2
2883
0
ubidi_getDirection(const UBiDi *pBiDi) {
2884
0
    if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2885
0
        return pBiDi->direction;
2886
0
    } else {
2887
0
        return UBIDI_LTR;
2888
0
    }
2889
0
}
2890
2891
U_CAPI const char16_t * U_EXPORT2
2892
0
ubidi_getText(const UBiDi *pBiDi) {
2893
0
    if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2894
0
        return pBiDi->text;
2895
0
    } else {
2896
0
        return nullptr;
2897
0
    }
2898
0
}
2899
2900
U_CAPI int32_t U_EXPORT2
2901
0
ubidi_getLength(const UBiDi *pBiDi) {
2902
0
    if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2903
0
        return pBiDi->originalLength;
2904
0
    } else {
2905
0
        return 0;
2906
0
    }
2907
0
}
2908
2909
U_CAPI int32_t U_EXPORT2
2910
853
ubidi_getProcessedLength(const UBiDi *pBiDi) {
2911
853
    if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2912
853
        return pBiDi->length;
2913
853
    } else {
2914
0
        return 0;
2915
0
    }
2916
853
}
2917
2918
U_CAPI int32_t U_EXPORT2
2919
853
ubidi_getResultLength(const UBiDi *pBiDi) {
2920
853
    if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2921
853
        return pBiDi->resultLength;
2922
853
    } else {
2923
0
        return 0;
2924
0
    }
2925
853
}
2926
2927
/* paragraphs API functions ------------------------------------------------- */
2928
2929
U_CAPI UBiDiLevel U_EXPORT2
2930
0
ubidi_getParaLevel(const UBiDi *pBiDi) {
2931
0
    if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2932
0
        return pBiDi->paraLevel;
2933
0
    } else {
2934
0
        return 0;
2935
0
    }
2936
0
}
2937
2938
U_CAPI int32_t U_EXPORT2
2939
0
ubidi_countParagraphs(UBiDi *pBiDi) {
2940
0
    if(!IS_VALID_PARA_OR_LINE(pBiDi)) {
2941
0
        return 0;
2942
0
    } else {
2943
0
        return pBiDi->paraCount;
2944
0
    }
2945
0
}
2946
2947
U_CAPI void U_EXPORT2
2948
ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex,
2949
                          int32_t *pParaStart, int32_t *pParaLimit,
2950
0
                          UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
2951
0
    int32_t paraStart;
2952
2953
    /* check the argument values */
2954
0
    RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2955
0
    RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode);
2956
0
    RETURN_VOID_IF_BAD_RANGE(paraIndex, 0, pBiDi->paraCount, *pErrorCode);
2957
2958
0
    pBiDi=pBiDi->pParaBiDi;             /* get Para object if Line object */
2959
0
    if(paraIndex) {
2960
0
        paraStart=pBiDi->paras[paraIndex-1].limit;
2961
0
    } else {
2962
0
        paraStart=0;
2963
0
    }
2964
0
    if(pParaStart!=nullptr) {
2965
0
        *pParaStart=paraStart;
2966
0
    }
2967
0
    if(pParaLimit!=nullptr) {
2968
0
        *pParaLimit=pBiDi->paras[paraIndex].limit;
2969
0
    }
2970
0
    if(pParaLevel!=nullptr) {
2971
0
        *pParaLevel=GET_PARALEVEL(pBiDi, paraStart);
2972
0
    }
2973
0
}
2974
2975
U_CAPI int32_t U_EXPORT2
2976
ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex,
2977
                          int32_t *pParaStart, int32_t *pParaLimit,
2978
0
                          UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
2979
0
    int32_t paraIndex;
2980
2981
    /* check the argument values */
2982
    /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */
2983
0
    RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
2984
0
    RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
2985
0
    pBiDi=pBiDi->pParaBiDi;             /* get Para object if Line object */
2986
0
    RETURN_IF_BAD_RANGE(charIndex, 0, pBiDi->length, *pErrorCode, -1);
2987
2988
0
    for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex].limit; paraIndex++);
2989
0
    ubidi_getParagraphByIndex(pBiDi, paraIndex, pParaStart, pParaLimit, pParaLevel, pErrorCode);
2990
0
    return paraIndex;
2991
0
}
2992
2993
U_CAPI void U_EXPORT2
2994
ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn,
2995
                       const void *newContext, UBiDiClassCallback **oldFn,
2996
                       const void **oldContext, UErrorCode *pErrorCode)
2997
0
{
2998
0
    RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2999
0
    if(pBiDi==nullptr) {
3000
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
3001
0
        return;
3002
0
    }
3003
0
    if( oldFn )
3004
0
    {
3005
0
        *oldFn = pBiDi->fnClassCallback;
3006
0
    }
3007
0
    if( oldContext )
3008
0
    {
3009
0
        *oldContext = pBiDi->coClassCallback;
3010
0
    }
3011
0
    pBiDi->fnClassCallback = newFn;
3012
0
    pBiDi->coClassCallback = newContext;
3013
0
}
3014
3015
U_CAPI void U_EXPORT2
3016
ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context)
3017
0
{
3018
0
    if(pBiDi==nullptr) {
3019
0
        return;
3020
0
    }
3021
0
    if( fn )
3022
0
    {
3023
0
        *fn = pBiDi->fnClassCallback;
3024
0
    }
3025
0
    if( context )
3026
0
    {
3027
0
        *context = pBiDi->coClassCallback;
3028
0
    }
3029
0
}
3030
3031
U_CAPI UCharDirection U_EXPORT2
3032
ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c)
3033
80.1k
{
3034
80.1k
    UCharDirection dir;
3035
3036
80.1k
    if( pBiDi->fnClassCallback == nullptr ||
3037
0
        (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT )
3038
80.1k
    {
3039
80.1k
        dir = ubidi_getClass(c);
3040
80.1k
    }
3041
80.1k
    if(dir >= U_CHAR_DIRECTION_COUNT) {
3042
0
        dir = (UCharDirection)ON;
3043
0
    }
3044
80.1k
    return dir;
3045
80.1k
}