Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/intl/icu/source/common/rbbi.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
***************************************************************************
5
*   Copyright (C) 1999-2016 International Business Machines Corporation
6
*   and others. All rights reserved.
7
***************************************************************************
8
*/
9
//
10
//  file:  rbbi.cpp  Contains the implementation of the rule based break iterator
11
//                   runtime engine and the API implementation for
12
//                   class RuleBasedBreakIterator
13
//
14
15
#include "utypeinfo.h"  // for 'typeid' to work
16
17
#include "unicode/utypes.h"
18
19
#if !UCONFIG_NO_BREAK_ITERATION
20
21
#include "unicode/rbbi.h"
22
#include "unicode/schriter.h"
23
#include "unicode/uchriter.h"
24
#include "unicode/uclean.h"
25
#include "unicode/udata.h"
26
27
#include "brkeng.h"
28
#include "ucln_cmn.h"
29
#include "cmemory.h"
30
#include "cstring.h"
31
#include "rbbidata.h"
32
#include "rbbi_cache.h"
33
#include "rbbirb.h"
34
#include "uassert.h"
35
#include "umutex.h"
36
#include "uvectr32.h"
37
38
// if U_LOCAL_SERVICE_HOOK is defined, then localsvc.cpp is expected to be included.
39
#if U_LOCAL_SERVICE_HOOK
40
#include "localsvc.h"
41
#endif
42
43
#ifdef RBBI_DEBUG
44
static UBool gTrace = FALSE;
45
#endif
46
47
U_NAMESPACE_BEGIN
48
49
// The state number of the starting state
50
constexpr int32_t START_STATE = 1;
51
52
// The state-transition value indicating "stop"
53
constexpr int32_t STOP_STATE = 0;
54
55
56
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedBreakIterator)
57
58
59
//=======================================================================
60
// constructors
61
//=======================================================================
62
63
/**
64
 * Constructs a RuleBasedBreakIterator that uses the already-created
65
 * tables object that is passed in as a parameter.
66
 */
67
RuleBasedBreakIterator::RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status)
68
 : fSCharIter(UnicodeString())
69
0
{
70
0
    init(status);
71
0
    fData = new RBBIDataWrapper(data, status); // status checked in constructor
72
0
    if (U_FAILURE(status)) {return;}
73
0
    if(fData == 0) {
74
0
        status = U_MEMORY_ALLOCATION_ERROR;
75
0
        return;
76
0
    }
77
0
}
78
79
//
80
//  Construct from precompiled binary rules (tables).  This constructor is public API,
81
//  taking the rules as a (const uint8_t *) to match the type produced by getBinaryRules().
82
//
83
RuleBasedBreakIterator::RuleBasedBreakIterator(const uint8_t *compiledRules,
84
                       uint32_t       ruleLength,
85
                       UErrorCode     &status)
86
 : fSCharIter(UnicodeString())
87
0
{
88
0
    init(status);
89
0
    if (U_FAILURE(status)) {
90
0
        return;
91
0
    }
92
0
    if (compiledRules == NULL || ruleLength < sizeof(RBBIDataHeader)) {
93
0
        status = U_ILLEGAL_ARGUMENT_ERROR;
94
0
        return;
95
0
    }
96
0
    const RBBIDataHeader *data = (const RBBIDataHeader *)compiledRules;
97
0
    if (data->fLength > ruleLength) {
98
0
        status = U_ILLEGAL_ARGUMENT_ERROR;
99
0
        return;
100
0
    }
101
0
    fData = new RBBIDataWrapper(data, RBBIDataWrapper::kDontAdopt, status);
102
0
    if (U_FAILURE(status)) {return;}
103
0
    if(fData == 0) {
104
0
        status = U_MEMORY_ALLOCATION_ERROR;
105
0
        return;
106
0
    }
107
0
}
108
109
110
//-------------------------------------------------------------------------------
111
//
112
//   Constructor   from a UDataMemory handle to precompiled break rules
113
//                 stored in an ICU data file.
114
//
115
//-------------------------------------------------------------------------------
116
RuleBasedBreakIterator::RuleBasedBreakIterator(UDataMemory* udm, UErrorCode &status)
117
 : fSCharIter(UnicodeString())
118
0
{
119
0
    init(status);
120
0
    fData = new RBBIDataWrapper(udm, status); // status checked in constructor
121
0
    if (U_FAILURE(status)) {return;}
122
0
    if(fData == 0) {
123
0
        status = U_MEMORY_ALLOCATION_ERROR;
124
0
        return;
125
0
    }
126
0
}
127
128
129
130
//-------------------------------------------------------------------------------
131
//
132
//   Constructor       from a set of rules supplied as a string.
133
//
134
//-------------------------------------------------------------------------------
135
RuleBasedBreakIterator::RuleBasedBreakIterator( const UnicodeString  &rules,
136
                                                UParseError          &parseError,
137
                                                UErrorCode           &status)
138
 : fSCharIter(UnicodeString())
139
0
{
140
0
    init(status);
141
0
    if (U_FAILURE(status)) {return;}
142
0
    RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)
143
0
        RBBIRuleBuilder::createRuleBasedBreakIterator(rules, &parseError, status);
144
0
    // Note:  This is a bit awkward.  The RBBI ruleBuilder has a factory method that
145
0
    //        creates and returns a complete RBBI.  From here, in a constructor, we
146
0
    //        can't just return the object created by the builder factory, hence
147
0
    //        the assignment of the factory created object to "this".
148
0
    if (U_SUCCESS(status)) {
149
0
        *this = *bi;
150
0
        delete bi;
151
0
    }
152
0
}
153
154
155
//-------------------------------------------------------------------------------
156
//
157
// Default Constructor.      Create an empty shell that can be set up later.
158
//                           Used when creating a RuleBasedBreakIterator from a set
159
//                           of rules.
160
//-------------------------------------------------------------------------------
161
RuleBasedBreakIterator::RuleBasedBreakIterator()
162
 : fSCharIter(UnicodeString())
163
0
{
164
0
    UErrorCode status = U_ZERO_ERROR;
165
0
    init(status);
166
0
}
167
168
169
//-------------------------------------------------------------------------------
170
//
171
//   Copy constructor.  Will produce a break iterator with the same behavior,
172
//                      and which iterates over the same text, as the one passed in.
173
//
174
//-------------------------------------------------------------------------------
175
RuleBasedBreakIterator::RuleBasedBreakIterator(const RuleBasedBreakIterator& other)
176
: BreakIterator(other),
177
  fSCharIter(UnicodeString())
178
0
{
179
0
    UErrorCode status = U_ZERO_ERROR;
180
0
    this->init(status);
181
0
    *this = other;
182
0
}
183
184
185
/**
186
 * Destructor
187
 */
188
0
RuleBasedBreakIterator::~RuleBasedBreakIterator() {
189
0
    if (fCharIter != &fSCharIter) {
190
0
        // fCharIter was adopted from the outside.
191
0
        delete fCharIter;
192
0
    }
193
0
    fCharIter = NULL;
194
0
195
0
    utext_close(&fText);
196
0
197
0
    if (fData != NULL) {
198
0
        fData->removeReference();
199
0
        fData = NULL;
200
0
    }
201
0
    delete fBreakCache;
202
0
    fBreakCache = NULL;
203
0
204
0
    delete fDictionaryCache;
205
0
    fDictionaryCache = NULL;
206
0
207
0
    delete fLanguageBreakEngines;
208
0
    fLanguageBreakEngines = NULL;
209
0
210
0
    delete fUnhandledBreakEngine;
211
0
    fUnhandledBreakEngine = NULL;
212
0
}
213
214
/**
215
 * Assignment operator.  Sets this iterator to have the same behavior,
216
 * and iterate over the same text, as the one passed in.
217
 */
218
RuleBasedBreakIterator&
219
0
RuleBasedBreakIterator::operator=(const RuleBasedBreakIterator& that) {
220
0
    if (this == &that) {
221
0
        return *this;
222
0
    }
223
0
    BreakIterator::operator=(that);
224
0
225
0
    if (fLanguageBreakEngines != NULL) {
226
0
        delete fLanguageBreakEngines;
227
0
        fLanguageBreakEngines = NULL;   // Just rebuild for now
228
0
    }
229
0
    // TODO: clone fLanguageBreakEngines from "that"
230
0
    UErrorCode status = U_ZERO_ERROR;
231
0
    utext_clone(&fText, &that.fText, FALSE, TRUE, &status);
232
0
233
0
    if (fCharIter != &fSCharIter) {
234
0
        delete fCharIter;
235
0
    }
236
0
    fCharIter = &fSCharIter;
237
0
238
0
    if (that.fCharIter != NULL && that.fCharIter != &that.fSCharIter) {
239
0
        // This is a little bit tricky - it will intially appear that
240
0
        //  this->fCharIter is adopted, even if that->fCharIter was
241
0
        //  not adopted.  That's ok.
242
0
        fCharIter = that.fCharIter->clone();
243
0
    }
244
0
    fSCharIter = that.fSCharIter;
245
0
    if (fCharIter == NULL) {
246
0
        fCharIter = &fSCharIter;
247
0
    }
248
0
249
0
    if (fData != NULL) {
250
0
        fData->removeReference();
251
0
        fData = NULL;
252
0
    }
253
0
    if (that.fData != NULL) {
254
0
        fData = that.fData->addReference();
255
0
    }
256
0
257
0
    fPosition = that.fPosition;
258
0
    fRuleStatusIndex = that.fRuleStatusIndex;
259
0
    fDone = that.fDone;
260
0
261
0
    // TODO: both the dictionary and the main cache need to be copied.
262
0
    //       Current position could be within a dictionary range. Trying to continue
263
0
    //       the iteration without the caches present would go to the rules, with
264
0
    //       the assumption that the current position is on a rule boundary.
265
0
    fBreakCache->reset(fPosition, fRuleStatusIndex);
266
0
    fDictionaryCache->reset();
267
0
268
0
    return *this;
269
0
}
270
271
272
273
//-----------------------------------------------------------------------------
274
//
275
//    init()      Shared initialization routine.   Used by all the constructors.
276
//                Initializes all fields, leaving the object in a consistent state.
277
//
278
//-----------------------------------------------------------------------------
279
0
void RuleBasedBreakIterator::init(UErrorCode &status) {
280
0
    fCharIter             = NULL;
281
0
    fData                 = NULL;
282
0
    fPosition             = 0;
283
0
    fRuleStatusIndex      = 0;
284
0
    fDone                 = false;
285
0
    fDictionaryCharCount  = 0;
286
0
    fLanguageBreakEngines = NULL;
287
0
    fUnhandledBreakEngine = NULL;
288
0
    fBreakCache           = NULL;
289
0
    fDictionaryCache      = NULL;
290
0
291
0
    // Note: IBM xlC is unable to assign or initialize member fText from UTEXT_INITIALIZER.
292
0
    // fText                 = UTEXT_INITIALIZER;
293
0
    static const UText initializedUText = UTEXT_INITIALIZER;
294
0
    uprv_memcpy(&fText, &initializedUText, sizeof(UText));
295
0
296
0
   if (U_FAILURE(status)) {
297
0
        return;
298
0
    }
299
0
300
0
    utext_openUChars(&fText, NULL, 0, &status);
301
0
    fDictionaryCache = new DictionaryCache(this, status);
302
0
    fBreakCache      = new BreakCache(this, status);
303
0
    if (U_SUCCESS(status) && (fDictionaryCache == NULL || fBreakCache == NULL)) {
304
0
        status = U_MEMORY_ALLOCATION_ERROR;
305
0
    }
306
0
307
#ifdef RBBI_DEBUG
308
    static UBool debugInitDone = FALSE;
309
    if (debugInitDone == FALSE) {
310
        char *debugEnv = getenv("U_RBBIDEBUG");
311
        if (debugEnv && uprv_strstr(debugEnv, "trace")) {
312
            gTrace = TRUE;
313
        }
314
        debugInitDone = TRUE;
315
    }
316
#endif
317
}
318
319
320
321
//-----------------------------------------------------------------------------
322
//
323
//    clone - Returns a newly-constructed RuleBasedBreakIterator with the same
324
//            behavior, and iterating over the same text, as this one.
325
//            Virtual function: does the right thing with subclasses.
326
//
327
//-----------------------------------------------------------------------------
328
BreakIterator*
329
0
RuleBasedBreakIterator::clone(void) const {
330
0
    return new RuleBasedBreakIterator(*this);
331
0
}
332
333
/**
334
 * Equality operator.  Returns TRUE if both BreakIterators are of the
335
 * same class, have the same behavior, and iterate over the same text.
336
 */
337
UBool
338
0
RuleBasedBreakIterator::operator==(const BreakIterator& that) const {
339
0
    if (typeid(*this) != typeid(that)) {
340
0
        return FALSE;
341
0
    }
342
0
    if (this == &that) {
343
0
        return TRUE;
344
0
    }
345
0
346
0
    // The base class BreakIterator carries no state that participates in equality,
347
0
    // and does not implement an equality function that would otherwise be
348
0
    // checked at this point.
349
0
350
0
    const RuleBasedBreakIterator& that2 = (const RuleBasedBreakIterator&) that;
351
0
352
0
    if (!utext_equals(&fText, &that2.fText)) {
353
0
        // The two break iterators are operating on different text,
354
0
        //   or have a different iteration position.
355
0
        //   Note that fText's position is always the same as the break iterator's position.
356
0
        return FALSE;
357
0
    };
358
0
359
0
    if (!(fPosition == that2.fPosition &&
360
0
            fRuleStatusIndex == that2.fRuleStatusIndex &&
361
0
            fDone == that2.fDone)) {
362
0
        return FALSE;
363
0
    }
364
0
365
0
    if (that2.fData == fData ||
366
0
        (fData != NULL && that2.fData != NULL && *that2.fData == *fData)) {
367
0
            // The two break iterators are using the same rules.
368
0
            return TRUE;
369
0
        }
370
0
    return FALSE;
371
0
}
372
373
/**
374
 * Compute a hash code for this BreakIterator
375
 * @return A hash code
376
 */
377
int32_t
378
0
RuleBasedBreakIterator::hashCode(void) const {
379
0
    int32_t   hash = 0;
380
0
    if (fData != NULL) {
381
0
        hash = fData->hashCode();
382
0
    }
383
0
    return hash;
384
0
}
385
386
387
0
void RuleBasedBreakIterator::setText(UText *ut, UErrorCode &status) {
388
0
    if (U_FAILURE(status)) {
389
0
        return;
390
0
    }
391
0
    fBreakCache->reset();
392
0
    fDictionaryCache->reset();
393
0
    utext_clone(&fText, ut, FALSE, TRUE, &status);
394
0
395
0
    // Set up a dummy CharacterIterator to be returned if anyone
396
0
    //   calls getText().  With input from UText, there is no reasonable
397
0
    //   way to return a characterIterator over the actual input text.
398
0
    //   Return one over an empty string instead - this is the closest
399
0
    //   we can come to signaling a failure.
400
0
    //   (GetText() is obsolete, this failure is sort of OK)
401
0
    fSCharIter.setText(UnicodeString());
402
0
403
0
    if (fCharIter != &fSCharIter) {
404
0
        // existing fCharIter was adopted from the outside.  Delete it now.
405
0
        delete fCharIter;
406
0
    }
407
0
    fCharIter = &fSCharIter;
408
0
409
0
    this->first();
410
0
}
411
412
413
0
UText *RuleBasedBreakIterator::getUText(UText *fillIn, UErrorCode &status) const {
414
0
    UText *result = utext_clone(fillIn, &fText, FALSE, TRUE, &status);
415
0
    return result;
416
0
}
417
418
419
//=======================================================================
420
// BreakIterator overrides
421
//=======================================================================
422
423
/**
424
 * Return a CharacterIterator over the text being analyzed.
425
 */
426
CharacterIterator&
427
0
RuleBasedBreakIterator::getText() const {
428
0
    return *fCharIter;
429
0
}
430
431
/**
432
 * Set the iterator to analyze a new piece of text.  This function resets
433
 * the current iteration position to the beginning of the text.
434
 * @param newText An iterator over the text to analyze.
435
 */
436
void
437
0
RuleBasedBreakIterator::adoptText(CharacterIterator* newText) {
438
0
    // If we are holding a CharacterIterator adopted from a
439
0
    //   previous call to this function, delete it now.
440
0
    if (fCharIter != &fSCharIter) {
441
0
        delete fCharIter;
442
0
    }
443
0
444
0
    fCharIter = newText;
445
0
    UErrorCode status = U_ZERO_ERROR;
446
0
    fBreakCache->reset();
447
0
    fDictionaryCache->reset();
448
0
    if (newText==NULL || newText->startIndex() != 0) {
449
0
        // startIndex !=0 wants to be an error, but there's no way to report it.
450
0
        // Make the iterator text be an empty string.
451
0
        utext_openUChars(&fText, NULL, 0, &status);
452
0
    } else {
453
0
        utext_openCharacterIterator(&fText, newText, &status);
454
0
    }
455
0
    this->first();
456
0
}
457
458
/**
459
 * Set the iterator to analyze a new piece of text.  This function resets
460
 * the current iteration position to the beginning of the text.
461
 * @param newText An iterator over the text to analyze.
462
 */
463
void
464
0
RuleBasedBreakIterator::setText(const UnicodeString& newText) {
465
0
    UErrorCode status = U_ZERO_ERROR;
466
0
    fBreakCache->reset();
467
0
    fDictionaryCache->reset();
468
0
    utext_openConstUnicodeString(&fText, &newText, &status);
469
0
470
0
    // Set up a character iterator on the string.
471
0
    //   Needed in case someone calls getText().
472
0
    //  Can not, unfortunately, do this lazily on the (probably never)
473
0
    //  call to getText(), because getText is const.
474
0
    fSCharIter.setText(newText);
475
0
476
0
    if (fCharIter != &fSCharIter) {
477
0
        // old fCharIter was adopted from the outside.  Delete it.
478
0
        delete fCharIter;
479
0
    }
480
0
    fCharIter = &fSCharIter;
481
0
482
0
    this->first();
483
0
}
484
485
486
/**
487
 *  Provide a new UText for the input text.  Must reference text with contents identical
488
 *  to the original.
489
 *  Intended for use with text data originating in Java (garbage collected) environments
490
 *  where the data may be moved in memory at arbitrary times.
491
 */
492
0
RuleBasedBreakIterator &RuleBasedBreakIterator::refreshInputText(UText *input, UErrorCode &status) {
493
0
    if (U_FAILURE(status)) {
494
0
        return *this;
495
0
    }
496
0
    if (input == NULL) {
497
0
        status = U_ILLEGAL_ARGUMENT_ERROR;
498
0
        return *this;
499
0
    }
500
0
    int64_t pos = utext_getNativeIndex(&fText);
501
0
    //  Shallow read-only clone of the new UText into the existing input UText
502
0
    utext_clone(&fText, input, FALSE, TRUE, &status);
503
0
    if (U_FAILURE(status)) {
504
0
        return *this;
505
0
    }
506
0
    utext_setNativeIndex(&fText, pos);
507
0
    if (utext_getNativeIndex(&fText) != pos) {
508
0
        // Sanity check.  The new input utext is supposed to have the exact same
509
0
        // contents as the old.  If we can't set to the same position, it doesn't.
510
0
        // The contents underlying the old utext might be invalid at this point,
511
0
        // so it's not safe to check directly.
512
0
        status = U_ILLEGAL_ARGUMENT_ERROR;
513
0
    }
514
0
    return *this;
515
0
}
516
517
518
/**
519
 * Sets the current iteration position to the beginning of the text, position zero.
520
 * @return The new iterator position, which is zero.
521
 */
522
0
int32_t RuleBasedBreakIterator::first(void) {
523
0
    UErrorCode status = U_ZERO_ERROR;
524
0
    if (!fBreakCache->seek(0)) {
525
0
        fBreakCache->populateNear(0, status);
526
0
    }
527
0
    fBreakCache->current();
528
0
    U_ASSERT(fPosition == 0);
529
0
    return 0;
530
0
}
531
532
/**
533
 * Sets the current iteration position to the end of the text.
534
 * @return The text's past-the-end offset.
535
 */
536
0
int32_t RuleBasedBreakIterator::last(void) {
537
0
    int32_t endPos = (int32_t)utext_nativeLength(&fText);
538
0
    UBool endShouldBeBoundary = isBoundary(endPos);      // Has side effect of setting iterator position.
539
0
    (void)endShouldBeBoundary;
540
0
    U_ASSERT(endShouldBeBoundary);
541
0
    U_ASSERT(fPosition == endPos);
542
0
    return endPos;
543
0
}
544
545
/**
546
 * Advances the iterator either forward or backward the specified number of steps.
547
 * Negative values move backward, and positive values move forward.  This is
548
 * equivalent to repeatedly calling next() or previous().
549
 * @param n The number of steps to move.  The sign indicates the direction
550
 * (negative is backwards, and positive is forwards).
551
 * @return The character offset of the boundary position n boundaries away from
552
 * the current one.
553
 */
554
0
int32_t RuleBasedBreakIterator::next(int32_t n) {
555
0
    int32_t result = 0;
556
0
    if (n > 0) {
557
0
        for (; n > 0 && result != UBRK_DONE; --n) {
558
0
            result = next();
559
0
        }
560
0
    } else if (n < 0) {
561
0
        for (; n < 0 && result != UBRK_DONE; ++n) {
562
0
            result = previous();
563
0
        }
564
0
    } else {
565
0
        result = current();
566
0
    }
567
0
    return result;
568
0
}
569
570
/**
571
 * Advances the iterator to the next boundary position.
572
 * @return The position of the first boundary after this one.
573
 */
574
0
int32_t RuleBasedBreakIterator::next(void) {
575
0
    fBreakCache->next();
576
0
    return fDone ? UBRK_DONE : fPosition;
577
0
}
578
579
/**
580
 * Move the iterator backwards, to the boundary preceding the current one.
581
 *
582
 *         Starts from the current position within fText.
583
 *         Starting position need not be on a boundary.
584
 *
585
 * @return The position of the boundary position immediately preceding the starting position.
586
 */
587
0
int32_t RuleBasedBreakIterator::previous(void) {
588
0
    UErrorCode status = U_ZERO_ERROR;
589
0
    fBreakCache->previous(status);
590
0
    return fDone ? UBRK_DONE : fPosition;
591
0
}
592
593
/**
594
 * Sets the iterator to refer to the first boundary position following
595
 * the specified position.
596
 * @param startPos The position from which to begin searching for a break position.
597
 * @return The position of the first break after the current position.
598
 */
599
0
int32_t RuleBasedBreakIterator::following(int32_t startPos) {
600
0
    // if the supplied position is before the beginning, return the
601
0
    // text's starting offset
602
0
    if (startPos < 0) {
603
0
        return first();
604
0
    }
605
0
606
0
    // Move requested offset to a code point start. It might be on a trail surrogate,
607
0
    // or on a trail byte if the input is UTF-8. Or it may be beyond the end of the text.
608
0
    utext_setNativeIndex(&fText, startPos);
609
0
    startPos = (int32_t)utext_getNativeIndex(&fText);
610
0
611
0
    UErrorCode status = U_ZERO_ERROR;
612
0
    fBreakCache->following(startPos, status);
613
0
    return fDone ? UBRK_DONE : fPosition;
614
0
}
615
616
/**
617
 * Sets the iterator to refer to the last boundary position before the
618
 * specified position.
619
 * @param offset The position to begin searching for a break from.
620
 * @return The position of the last boundary before the starting position.
621
 */
622
0
int32_t RuleBasedBreakIterator::preceding(int32_t offset) {
623
0
    if (offset > utext_nativeLength(&fText)) {
624
0
        return last();
625
0
    }
626
0
627
0
    // Move requested offset to a code point start. It might be on a trail surrogate,
628
0
    // or on a trail byte if the input is UTF-8.
629
0
630
0
    utext_setNativeIndex(&fText, offset);
631
0
    int32_t adjustedOffset = utext_getNativeIndex(&fText);
632
0
633
0
    UErrorCode status = U_ZERO_ERROR;
634
0
    fBreakCache->preceding(adjustedOffset, status);
635
0
    return fDone ? UBRK_DONE : fPosition;
636
0
}
637
638
/**
639
 * Returns true if the specfied position is a boundary position.  As a side
640
 * effect, leaves the iterator pointing to the first boundary position at
641
 * or after "offset".
642
 *
643
 * @param offset the offset to check.
644
 * @return True if "offset" is a boundary position.
645
 */
646
0
UBool RuleBasedBreakIterator::isBoundary(int32_t offset) {
647
0
    // out-of-range indexes are never boundary positions
648
0
    if (offset < 0) {
649
0
        first();       // For side effects on current position, tag values.
650
0
        return FALSE;
651
0
    }
652
0
653
0
    // Adjust offset to be on a code point boundary and not beyond the end of the text.
654
0
    // Note that isBoundary() is always false for offsets that are not on code point boundaries.
655
0
    // But we still need the side effect of leaving iteration at the following boundary.
656
0
657
0
    utext_setNativeIndex(&fText, offset);
658
0
    int32_t adjustedOffset = utext_getNativeIndex(&fText);
659
0
660
0
    bool result = false;
661
0
    UErrorCode status = U_ZERO_ERROR;
662
0
    if (fBreakCache->seek(adjustedOffset) || fBreakCache->populateNear(adjustedOffset, status)) {
663
0
        result = (fBreakCache->current() == offset);
664
0
    }
665
0
666
0
    if (result && adjustedOffset < offset && utext_char32At(&fText, offset) == U_SENTINEL) {
667
0
        // Original offset is beyond the end of the text. Return FALSE, it's not a boundary,
668
0
        // but the iteration position remains set to the end of the text, which is a boundary.
669
0
        return FALSE;
670
0
    }
671
0
    if (!result) {
672
0
        // Not on a boundary. isBoundary() must leave iterator on the following boundary.
673
0
        // Cache->seek(), above, left us on the preceding boundary, so advance one.
674
0
        next();
675
0
    }
676
0
    return result;
677
0
}
678
679
680
/**
681
 * Returns the current iteration position.
682
 * @return The current iteration position.
683
 */
684
0
int32_t RuleBasedBreakIterator::current(void) const {
685
0
    return fPosition;
686
0
}
687
688
689
//=======================================================================
690
// implementation
691
//=======================================================================
692
693
//
694
// RBBIRunMode  -  the state machine runs an extra iteration at the beginning and end
695
//                 of user text.  A variable with this enum type keeps track of where we
696
//                 are.  The state machine only fetches user input while in the RUN mode.
697
//
698
enum RBBIRunMode {
699
    RBBI_START,     // state machine processing is before first char of input
700
    RBBI_RUN,       // state machine processing is in the user text
701
    RBBI_END        // state machine processing is after end of user text.
702
};
703
704
705
// Map from look-ahead break states (corresponds to rules) to boundary positions.
706
// Allows multiple lookahead break rules to be in flight at the same time.
707
//
708
// This is a temporary approach for ICU 57. A better fix is to make the look-ahead numbers
709
// in the state table be sequential, then we can just index an array. And the
710
// table could also tell us in advance how big that array needs to be.
711
//
712
// Before ICU 57 there was just a single simple variable for a look-ahead match that
713
// was in progress. Two rules at once did not work.
714
715
static const int32_t kMaxLookaheads = 8;
716
struct LookAheadResults {
717
    int32_t    fUsedSlotLimit;
718
    int32_t    fPositions[8];
719
    int16_t    fKeys[8];
720
721
0
    LookAheadResults() : fUsedSlotLimit(0), fPositions(), fKeys() {};
722
723
0
    int32_t getPosition(int16_t key) {
724
0
        for (int32_t i=0; i<fUsedSlotLimit; ++i) {
725
0
            if (fKeys[i] == key) {
726
0
                return fPositions[i];
727
0
            }
728
0
        }
729
0
        U_ASSERT(FALSE);
730
0
        return -1;
731
0
    }
732
733
0
    void setPosition(int16_t key, int32_t position) {
734
0
        int32_t i;
735
0
        for (i=0; i<fUsedSlotLimit; ++i) {
736
0
            if (fKeys[i] == key) {
737
0
                fPositions[i] = position;
738
0
                return;
739
0
            }
740
0
        }
741
0
        if (i >= kMaxLookaheads) {
742
0
            U_ASSERT(FALSE);
743
0
            i = kMaxLookaheads - 1;
744
0
        }
745
0
        fKeys[i] = key;
746
0
        fPositions[i] = position;
747
0
        U_ASSERT(fUsedSlotLimit == i);
748
0
        fUsedSlotLimit = i + 1;
749
0
    }
750
};
751
752
753
//-----------------------------------------------------------------------------------
754
//
755
//  handleNext()
756
//     Run the state machine to find a boundary
757
//
758
//-----------------------------------------------------------------------------------
759
0
int32_t RuleBasedBreakIterator::handleNext() {
760
0
    int32_t             state;
761
0
    uint16_t            category        = 0;
762
0
    RBBIRunMode         mode;
763
0
764
0
    RBBIStateTableRow  *row;
765
0
    UChar32             c;
766
0
    LookAheadResults    lookAheadMatches;
767
0
    int32_t             result             = 0;
768
0
    int32_t             initialPosition    = 0;
769
0
    const RBBIStateTable *statetable       = fData->fForwardTable;
770
0
    const char         *tableData          = statetable->fTableData;
771
0
    uint32_t            tableRowLen        = statetable->fRowLen;
772
    #ifdef RBBI_DEBUG
773
        if (gTrace) {
774
            RBBIDebugPuts("Handle Next   pos   char  state category");
775
        }
776
    #endif
777
778
0
    // handleNext alway sets the break tag value.
779
0
    // Set the default for it.
780
0
    fRuleStatusIndex = 0;
781
0
782
0
    fDictionaryCharCount = 0;
783
0
784
0
    // if we're already at the end of the text, return DONE.
785
0
    initialPosition = fPosition;
786
0
    UTEXT_SETNATIVEINDEX(&fText, initialPosition);
787
0
    result          = initialPosition;
788
0
    c               = UTEXT_NEXT32(&fText);
789
0
    if (c==U_SENTINEL) {
790
0
        fDone = TRUE;
791
0
        return UBRK_DONE;
792
0
    }
793
0
794
0
    //  Set the initial state for the state machine
795
0
    state = START_STATE;
796
0
    row = (RBBIStateTableRow *)
797
0
            //(statetable->fTableData + (statetable->fRowLen * state));
798
0
            (tableData + tableRowLen * state);
799
0
800
0
801
0
    mode     = RBBI_RUN;
802
0
    if (statetable->fFlags & RBBI_BOF_REQUIRED) {
803
0
        category = 2;
804
0
        mode     = RBBI_START;
805
0
    }
806
0
807
0
808
0
    // loop until we reach the end of the text or transition to state 0
809
0
    //
810
0
    for (;;) {
811
0
        if (c == U_SENTINEL) {
812
0
            // Reached end of input string.
813
0
            if (mode == RBBI_END) {
814
0
                // We have already run the loop one last time with the
815
0
                //   character set to the psueudo {eof} value.  Now it is time
816
0
                //   to unconditionally bail out.
817
0
                break;
818
0
            }
819
0
            // Run the loop one last time with the fake end-of-input character category.
820
0
            mode = RBBI_END;
821
0
            category = 1;
822
0
        }
823
0
824
0
        //
825
0
        // Get the char category.  An incoming category of 1 or 2 means that
826
0
        //      we are preset for doing the beginning or end of input, and
827
0
        //      that we shouldn't get a category from an actual text input character.
828
0
        //
829
0
        if (mode == RBBI_RUN) {
830
0
            // look up the current character's character category, which tells us
831
0
            // which column in the state table to look at.
832
0
            // Note:  the 16 in UTRIE_GET16 refers to the size of the data being returned,
833
0
            //        not the size of the character going in, which is a UChar32.
834
0
            //
835
0
            category = UTRIE2_GET16(fData->fTrie, c);
836
0
837
0
            // Check the dictionary bit in the character's category.
838
0
            //    Counter is only used by dictionary based iteration.
839
0
            //    Chars that need to be handled by a dictionary have a flag bit set
840
0
            //    in their category values.
841
0
            //
842
0
            if ((category & 0x4000) != 0)  {
843
0
                fDictionaryCharCount++;
844
0
                //  And off the dictionary flag bit.
845
0
                category &= ~0x4000;
846
0
            }
847
0
        }
848
0
849
       #ifdef RBBI_DEBUG
850
            if (gTrace) {
851
                RBBIDebugPrintf("             %4ld   ", utext_getNativeIndex(&fText));
852
                if (0x20<=c && c<0x7f) {
853
                    RBBIDebugPrintf("\"%c\"  ", c);
854
                } else {
855
                    RBBIDebugPrintf("%5x  ", c);
856
                }
857
                RBBIDebugPrintf("%3d  %3d\n", state, category);
858
            }
859
        #endif
860
861
0
        // State Transition - move machine to its next state
862
0
        //
863
0
864
0
        // fNextState is a variable-length array.
865
0
        U_ASSERT(category<fData->fHeader->fCatCount);
866
0
        state = row->fNextState[category];  /*Not accessing beyond memory*/
867
0
        row = (RBBIStateTableRow *)
868
0
            // (statetable->fTableData + (statetable->fRowLen * state));
869
0
            (tableData + tableRowLen * state);
870
0
871
0
872
0
        if (row->fAccepting == -1) {
873
0
            // Match found, common case.
874
0
            if (mode != RBBI_START) {
875
0
                result = (int32_t)UTEXT_GETNATIVEINDEX(&fText);
876
0
            }
877
0
            fRuleStatusIndex = row->fTagIdx;   // Remember the break status (tag) values.
878
0
        }
879
0
880
0
        int16_t completedRule = row->fAccepting;
881
0
        if (completedRule > 0) {
882
0
            // Lookahead match is completed.
883
0
            int32_t lookaheadResult = lookAheadMatches.getPosition(completedRule);
884
0
            if (lookaheadResult >= 0) {
885
0
                fRuleStatusIndex = row->fTagIdx;
886
0
                fPosition = lookaheadResult;
887
0
                return lookaheadResult;
888
0
            }
889
0
        }
890
0
        int16_t rule = row->fLookAhead;
891
0
        if (rule != 0) {
892
0
            // At the position of a '/' in a look-ahead match. Record it.
893
0
            int32_t  pos = (int32_t)UTEXT_GETNATIVEINDEX(&fText);
894
0
            lookAheadMatches.setPosition(rule, pos);
895
0
        }
896
0
897
0
        if (state == STOP_STATE) {
898
0
            // This is the normal exit from the lookup state machine.
899
0
            // We have advanced through the string until it is certain that no
900
0
            //   longer match is possible, no matter what characters follow.
901
0
            break;
902
0
        }
903
0
904
0
        // Advance to the next character.
905
0
        // If this is a beginning-of-input loop iteration, don't advance
906
0
        //    the input position.  The next iteration will be processing the
907
0
        //    first real input character.
908
0
        if (mode == RBBI_RUN) {
909
0
            c = UTEXT_NEXT32(&fText);
910
0
        } else {
911
0
            if (mode == RBBI_START) {
912
0
                mode = RBBI_RUN;
913
0
            }
914
0
        }
915
0
    }
916
0
917
0
    // The state machine is done.  Check whether it found a match...
918
0
919
0
    // If the iterator failed to advance in the match engine, force it ahead by one.
920
0
    //   (This really indicates a defect in the break rules.  They should always match
921
0
    //    at least one character.)
922
0
    if (result == initialPosition) {
923
0
        utext_setNativeIndex(&fText, initialPosition);
924
0
        utext_next32(&fText);
925
0
        result = (int32_t)utext_getNativeIndex(&fText);
926
0
        fRuleStatusIndex = 0;
927
0
    }
928
0
929
0
    // Leave the iterator at our result position.
930
0
    fPosition = result;
931
    #ifdef RBBI_DEBUG
932
        if (gTrace) {
933
            RBBIDebugPrintf("result = %d\n\n", result);
934
        }
935
    #endif
936
    return result;
937
0
}
938
939
940
//-----------------------------------------------------------------------------------
941
//
942
//  handleSafePrevious()
943
//
944
//      Iterate backwards using the safe reverse rules.
945
//      The logic of this function is similar to handleNext(), but simpler
946
//      because the safe table does not require as many options.
947
//
948
//-----------------------------------------------------------------------------------
949
0
int32_t RuleBasedBreakIterator::handleSafePrevious(int32_t fromPosition) {
950
0
    int32_t             state;
951
0
    uint16_t            category        = 0;
952
0
    RBBIStateTableRow  *row;
953
0
    UChar32             c;
954
0
    int32_t             result          = 0;
955
0
956
0
    const RBBIStateTable *stateTable = fData->fReverseTable;
957
0
    UTEXT_SETNATIVEINDEX(&fText, fromPosition);
958
    #ifdef RBBI_DEBUG
959
        if (gTrace) {
960
            RBBIDebugPuts("Handle Previous   pos   char  state category");
961
        }
962
    #endif
963
964
0
    // if we're already at the start of the text, return DONE.
965
0
    if (fData == NULL || UTEXT_GETNATIVEINDEX(&fText)==0) {
966
0
        return BreakIterator::DONE;
967
0
    }
968
0
969
0
    //  Set the initial state for the state machine
970
0
    c = UTEXT_PREVIOUS32(&fText);
971
0
    state = START_STATE;
972
0
    row = (RBBIStateTableRow *)
973
0
            (stateTable->fTableData + (stateTable->fRowLen * state));
974
0
975
0
    // loop until we reach the start of the text or transition to state 0
976
0
    //
977
0
    for (; c != U_SENTINEL; c = UTEXT_PREVIOUS32(&fText)) {
978
0
979
0
        // look up the current character's character category, which tells us
980
0
        // which column in the state table to look at.
981
0
        // Note:  the 16 in UTRIE_GET16 refers to the size of the data being returned,
982
0
        //        not the size of the character going in, which is a UChar32.
983
0
        //
984
0
        //  And off the dictionary flag bit. For reverse iteration it is not used.
985
0
        category = UTRIE2_GET16(fData->fTrie, c);
986
0
        category &= ~0x4000;
987
0
988
        #ifdef RBBI_DEBUG
989
            if (gTrace) {
990
                RBBIDebugPrintf("             %4d   ", (int32_t)utext_getNativeIndex(&fText));
991
                if (0x20<=c && c<0x7f) {
992
                    RBBIDebugPrintf("\"%c\"  ", c);
993
                } else {
994
                    RBBIDebugPrintf("%5x  ", c);
995
                }
996
                RBBIDebugPrintf("%3d  %3d\n", state, category);
997
            }
998
        #endif
999
1000
0
        // State Transition - move machine to its next state
1001
0
        //
1002
0
        // fNextState is a variable-length array.
1003
0
        U_ASSERT(category<fData->fHeader->fCatCount);
1004
0
        state = row->fNextState[category];  /*Not accessing beyond memory*/
1005
0
        row = (RBBIStateTableRow *)
1006
0
            (stateTable->fTableData + (stateTable->fRowLen * state));
1007
0
1008
0
        if (state == STOP_STATE) {
1009
0
            // This is the normal exit from the lookup state machine.
1010
0
            // Transistion to state zero means we have found a safe point.
1011
0
            break;
1012
0
        }
1013
0
    }
1014
0
1015
0
    // The state machine is done.  Check whether it found a match...
1016
0
    result = (int32_t)UTEXT_GETNATIVEINDEX(&fText);
1017
    #ifdef RBBI_DEBUG
1018
        if (gTrace) {
1019
            RBBIDebugPrintf("result = %d\n\n", result);
1020
        }
1021
    #endif
1022
    return result;
1023
0
}
1024
1025
//-------------------------------------------------------------------------------
1026
//
1027
//   getRuleStatus()   Return the break rule tag associated with the current
1028
//                     iterator position.  If the iterator arrived at its current
1029
//                     position by iterating forwards, the value will have been
1030
//                     cached by the handleNext() function.
1031
//
1032
//-------------------------------------------------------------------------------
1033
1034
0
int32_t  RuleBasedBreakIterator::getRuleStatus() const {
1035
0
1036
0
    // fLastRuleStatusIndex indexes to the start of the appropriate status record
1037
0
    //                                                 (the number of status values.)
1038
0
    //   This function returns the last (largest) of the array of status values.
1039
0
    int32_t  idx = fRuleStatusIndex + fData->fRuleStatusTable[fRuleStatusIndex];
1040
0
    int32_t  tagVal = fData->fRuleStatusTable[idx];
1041
0
1042
0
    return tagVal;
1043
0
}
1044
1045
1046
int32_t RuleBasedBreakIterator::getRuleStatusVec(
1047
0
             int32_t *fillInVec, int32_t capacity, UErrorCode &status) {
1048
0
    if (U_FAILURE(status)) {
1049
0
        return 0;
1050
0
    }
1051
0
1052
0
    int32_t  numVals = fData->fRuleStatusTable[fRuleStatusIndex];
1053
0
    int32_t  numValsToCopy = numVals;
1054
0
    if (numVals > capacity) {
1055
0
        status = U_BUFFER_OVERFLOW_ERROR;
1056
0
        numValsToCopy = capacity;
1057
0
    }
1058
0
    int i;
1059
0
    for (i=0; i<numValsToCopy; i++) {
1060
0
        fillInVec[i] = fData->fRuleStatusTable[fRuleStatusIndex + i + 1];
1061
0
    }
1062
0
    return numVals;
1063
0
}
1064
1065
1066
1067
//-------------------------------------------------------------------------------
1068
//
1069
//   getBinaryRules        Access to the compiled form of the rules,
1070
//                         for use by build system tools that save the data
1071
//                         for standard iterator types.
1072
//
1073
//-------------------------------------------------------------------------------
1074
0
const uint8_t  *RuleBasedBreakIterator::getBinaryRules(uint32_t &length) {
1075
0
    const uint8_t  *retPtr = NULL;
1076
0
    length = 0;
1077
0
1078
0
    if (fData != NULL) {
1079
0
        retPtr = (const uint8_t *)fData->fHeader;
1080
0
        length = fData->fHeader->fLength;
1081
0
    }
1082
0
    return retPtr;
1083
0
}
1084
1085
1086
BreakIterator *  RuleBasedBreakIterator::createBufferClone(void * /*stackBuffer*/,
1087
                                   int32_t &bufferSize,
1088
                                   UErrorCode &status)
1089
0
{
1090
0
    if (U_FAILURE(status)){
1091
0
        return NULL;
1092
0
    }
1093
0
1094
0
    if (bufferSize == 0) {
1095
0
        bufferSize = 1;  // preflighting for deprecated functionality
1096
0
        return NULL;
1097
0
    }
1098
0
1099
0
    BreakIterator *clonedBI = clone();
1100
0
    if (clonedBI == NULL) {
1101
0
        status = U_MEMORY_ALLOCATION_ERROR;
1102
0
    } else {
1103
0
        status = U_SAFECLONE_ALLOCATED_WARNING;
1104
0
    }
1105
0
    return (RuleBasedBreakIterator *)clonedBI;
1106
0
}
1107
1108
U_NAMESPACE_END
1109
1110
1111
static icu::UStack *gLanguageBreakFactories = nullptr;
1112
static const icu::UnicodeString *gEmptyString = nullptr;
1113
static icu::UInitOnce gLanguageBreakFactoriesInitOnce = U_INITONCE_INITIALIZER;
1114
static icu::UInitOnce gRBBIInitOnce = U_INITONCE_INITIALIZER;
1115
1116
/**
1117
 * Release all static memory held by breakiterator.
1118
 */
1119
U_CDECL_BEGIN
1120
0
static UBool U_CALLCONV rbbi_cleanup(void) {
1121
0
    delete gLanguageBreakFactories;
1122
0
    gLanguageBreakFactories = nullptr;
1123
0
    delete gEmptyString;
1124
0
    gEmptyString = nullptr;
1125
0
    gLanguageBreakFactoriesInitOnce.reset();
1126
0
    gRBBIInitOnce.reset();
1127
0
    return TRUE;
1128
0
}
1129
U_CDECL_END
1130
1131
U_CDECL_BEGIN
1132
0
static void U_CALLCONV _deleteFactory(void *obj) {
1133
0
    delete (icu::LanguageBreakFactory *) obj;
1134
0
}
1135
U_CDECL_END
1136
U_NAMESPACE_BEGIN
1137
1138
0
static void U_CALLCONV rbbiInit() {
1139
0
    gEmptyString = new UnicodeString();
1140
0
    ucln_common_registerCleanup(UCLN_COMMON_RBBI, rbbi_cleanup);
1141
0
}
1142
1143
0
static void U_CALLCONV initLanguageFactories() {
1144
0
    UErrorCode status = U_ZERO_ERROR;
1145
0
    U_ASSERT(gLanguageBreakFactories == NULL);
1146
0
    gLanguageBreakFactories = new UStack(_deleteFactory, NULL, status);
1147
0
    if (gLanguageBreakFactories != NULL && U_SUCCESS(status)) {
1148
0
        ICULanguageBreakFactory *builtIn = new ICULanguageBreakFactory(status);
1149
0
        gLanguageBreakFactories->push(builtIn, status);
1150
#ifdef U_LOCAL_SERVICE_HOOK
1151
        LanguageBreakFactory *extra = (LanguageBreakFactory *)uprv_svc_hook("languageBreakFactory", &status);
1152
        if (extra != NULL) {
1153
            gLanguageBreakFactories->push(extra, status);
1154
        }
1155
#endif
1156
    }
1157
0
    ucln_common_registerCleanup(UCLN_COMMON_RBBI, rbbi_cleanup);
1158
0
}
1159
1160
1161
static const LanguageBreakEngine*
1162
getLanguageBreakEngineFromFactory(UChar32 c)
1163
0
{
1164
0
    umtx_initOnce(gLanguageBreakFactoriesInitOnce, &initLanguageFactories);
1165
0
    if (gLanguageBreakFactories == NULL) {
1166
0
        return NULL;
1167
0
    }
1168
0
1169
0
    int32_t i = gLanguageBreakFactories->size();
1170
0
    const LanguageBreakEngine *lbe = NULL;
1171
0
    while (--i >= 0) {
1172
0
        LanguageBreakFactory *factory = (LanguageBreakFactory *)(gLanguageBreakFactories->elementAt(i));
1173
0
        lbe = factory->getEngineFor(c);
1174
0
        if (lbe != NULL) {
1175
0
            break;
1176
0
        }
1177
0
    }
1178
0
    return lbe;
1179
0
}
1180
1181
1182
//-------------------------------------------------------------------------------
1183
//
1184
//  getLanguageBreakEngine  Find an appropriate LanguageBreakEngine for the
1185
//                          the character c.
1186
//
1187
//-------------------------------------------------------------------------------
1188
const LanguageBreakEngine *
1189
0
RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c) {
1190
0
    const LanguageBreakEngine *lbe = NULL;
1191
0
    UErrorCode status = U_ZERO_ERROR;
1192
0
1193
0
    if (fLanguageBreakEngines == NULL) {
1194
0
        fLanguageBreakEngines = new UStack(status);
1195
0
        if (fLanguageBreakEngines == NULL || U_FAILURE(status)) {
1196
0
            delete fLanguageBreakEngines;
1197
0
            fLanguageBreakEngines = 0;
1198
0
            return NULL;
1199
0
        }
1200
0
    }
1201
0
1202
0
    int32_t i = fLanguageBreakEngines->size();
1203
0
    while (--i >= 0) {
1204
0
        lbe = (const LanguageBreakEngine *)(fLanguageBreakEngines->elementAt(i));
1205
0
        if (lbe->handles(c)) {
1206
0
            return lbe;
1207
0
        }
1208
0
    }
1209
0
1210
0
    // No existing dictionary took the character. See if a factory wants to
1211
0
    // give us a new LanguageBreakEngine for this character.
1212
0
    lbe = getLanguageBreakEngineFromFactory(c);
1213
0
1214
0
    // If we got one, use it and push it on our stack.
1215
0
    if (lbe != NULL) {
1216
0
        fLanguageBreakEngines->push((void *)lbe, status);
1217
0
        // Even if we can't remember it, we can keep looking it up, so
1218
0
        // return it even if the push fails.
1219
0
        return lbe;
1220
0
    }
1221
0
1222
0
    // No engine is forthcoming for this character. Add it to the
1223
0
    // reject set. Create the reject break engine if needed.
1224
0
    if (fUnhandledBreakEngine == NULL) {
1225
0
        fUnhandledBreakEngine = new UnhandledEngine(status);
1226
0
        if (U_SUCCESS(status) && fUnhandledBreakEngine == NULL) {
1227
0
            status = U_MEMORY_ALLOCATION_ERROR;
1228
0
            return nullptr;
1229
0
        }
1230
0
        // Put it last so that scripts for which we have an engine get tried
1231
0
        // first.
1232
0
        fLanguageBreakEngines->insertElementAt(fUnhandledBreakEngine, 0, status);
1233
0
        // If we can't insert it, or creation failed, get rid of it
1234
0
        if (U_FAILURE(status)) {
1235
0
            delete fUnhandledBreakEngine;
1236
0
            fUnhandledBreakEngine = 0;
1237
0
            return NULL;
1238
0
        }
1239
0
    }
1240
0
1241
0
    // Tell the reject engine about the character; at its discretion, it may
1242
0
    // add more than just the one character.
1243
0
    fUnhandledBreakEngine->handleCharacter(c);
1244
0
1245
0
    return fUnhandledBreakEngine;
1246
0
}
1247
1248
0
void RuleBasedBreakIterator::dumpCache() {
1249
0
    fBreakCache->dumpCache();
1250
0
}
1251
1252
0
void RuleBasedBreakIterator::dumpTables() {
1253
0
    fData->printData();
1254
0
}
1255
1256
/**
1257
 * Returns the description used to create this iterator
1258
 */
1259
1260
const UnicodeString&
1261
0
RuleBasedBreakIterator::getRules() const {
1262
0
    if (fData != NULL) {
1263
0
        return fData->getRuleSourceString();
1264
0
    } else {
1265
0
        umtx_initOnce(gRBBIInitOnce, &rbbiInit);
1266
0
        return *gEmptyString;
1267
0
    }
1268
0
}
1269
1270
U_NAMESPACE_END
1271
1272
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */