Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/intl/icu/source/i18n/alphaindex.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
* Copyright (C) 2009-2014, International Business Machines Corporation and
6
* others. All Rights Reserved.
7
*******************************************************************************
8
*/
9
10
#include "unicode/utypes.h"
11
12
#if !UCONFIG_NO_COLLATION
13
14
#include "unicode/alphaindex.h"
15
#include "unicode/coll.h"
16
#include "unicode/localpointer.h"
17
#include "unicode/normalizer2.h"
18
#include "unicode/tblcoll.h"
19
#include "unicode/uchar.h"
20
#include "unicode/ulocdata.h"
21
#include "unicode/uniset.h"
22
#include "unicode/uobject.h"
23
#include "unicode/usetiter.h"
24
#include "unicode/utf16.h"
25
26
#include "cmemory.h"
27
#include "cstring.h"
28
#include "uassert.h"
29
#include "uvector.h"
30
#include "uvectr64.h"
31
32
//#include <string>
33
//#include <iostream>
34
35
U_NAMESPACE_BEGIN
36
37
namespace {
38
39
/**
40
 * Prefix string for Chinese index buckets.
41
 * See http://unicode.org/repos/cldr/trunk/specs/ldml/tr35-collation.html#Collation_Indexes
42
 */
43
const UChar BASE[1] = { 0xFDD0 };
44
const int32_t BASE_LENGTH = 1;
45
46
UBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
47
                                const UnicodeString &one, const UnicodeString &other);
48
49
}  // namespace
50
51
static int32_t U_CALLCONV
52
collatorComparator(const void *context, const void *left, const void *right);
53
54
static int32_t U_CALLCONV
55
recordCompareFn(const void *context, const void *left, const void *right);
56
57
//  UVector<Record *> support function, delete a Record.
58
static void U_CALLCONV
59
0
alphaIndex_deleteRecord(void *obj) {
60
0
    delete static_cast<AlphabeticIndex::Record *>(obj);
61
0
}
62
63
namespace {
64
65
UnicodeString *ownedString(const UnicodeString &s, LocalPointer<UnicodeString> &owned,
66
0
                           UErrorCode &errorCode) {
67
0
    if (U_FAILURE(errorCode)) { return NULL; }
68
0
    if (owned.isValid()) {
69
0
        return owned.orphan();
70
0
    }
71
0
    UnicodeString *p = new UnicodeString(s);
72
0
    if (p == NULL) {
73
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
74
0
    }
75
0
    return p;
76
0
}
77
78
0
inline UnicodeString *getString(const UVector &list, int32_t i) {
79
0
    return static_cast<UnicodeString *>(list[i]);
80
0
}
81
82
0
inline AlphabeticIndex::Bucket *getBucket(const UVector &list, int32_t i) {
83
0
    return static_cast<AlphabeticIndex::Bucket *>(list[i]);
84
0
}
85
86
0
inline AlphabeticIndex::Record *getRecord(const UVector &list, int32_t i) {
87
0
    return static_cast<AlphabeticIndex::Record *>(list[i]);
88
0
}
89
90
/**
91
 * Like Java Collections.binarySearch(List, String, Comparator).
92
 *
93
 * @return the index>=0 where the item was found,
94
 *         or the index<0 for inserting the string at ~index in sorted order
95
 */
96
0
int32_t binarySearch(const UVector &list, const UnicodeString &s, const Collator &coll) {
97
0
    if (list.size() == 0) { return ~0; }
98
0
    int32_t start = 0;
99
0
    int32_t limit = list.size();
100
0
    for (;;) {
101
0
        int32_t i = (start + limit) / 2;
102
0
        const UnicodeString *si = static_cast<UnicodeString *>(list.elementAt(i));
103
0
        UErrorCode errorCode = U_ZERO_ERROR;
104
0
        UCollationResult cmp = coll.compare(s, *si, errorCode);
105
0
        if (cmp == UCOL_EQUAL) {
106
0
            return i;
107
0
        } else if (cmp < 0) {
108
0
            if (i == start) {
109
0
                return ~start;  // insert s before *si
110
0
            }
111
0
            limit = i;
112
0
        } else {
113
0
            if (i == start) {
114
0
                return ~(start + 1);  // insert s after *si
115
0
            }
116
0
            start = i;
117
0
        }
118
0
    }
119
0
}
120
121
}  // namespace
122
123
// The BucketList is not in the anonymous namespace because only Clang
124
// seems to support its use in other classes from there.
125
// However, we also don't need U_I18N_API because it is not used from outside the i18n library.
126
class BucketList : public UObject {
127
public:
128
    BucketList(UVector *bucketList, UVector *publicBucketList)
129
0
            : bucketList_(bucketList), immutableVisibleList_(publicBucketList) {
130
0
        int32_t displayIndex = 0;
131
0
        for (int32_t i = 0; i < publicBucketList->size(); ++i) {
132
0
            getBucket(*publicBucketList, i)->displayIndex_ = displayIndex++;
133
0
        }
134
0
    }
135
136
    // The virtual destructor must not be inline.
137
    // See ticket #8454 for details.
138
    virtual ~BucketList();
139
140
0
    int32_t getBucketCount() const {
141
0
        return immutableVisibleList_->size();
142
0
    }
143
144
    int32_t getBucketIndex(const UnicodeString &name, const Collator &collatorPrimaryOnly,
145
0
                           UErrorCode &errorCode) {
146
0
        // binary search
147
0
        int32_t start = 0;
148
0
        int32_t limit = bucketList_->size();
149
0
        while ((start + 1) < limit) {
150
0
            int32_t i = (start + limit) / 2;
151
0
            const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, i);
152
0
            UCollationResult nameVsBucket =
153
0
                collatorPrimaryOnly.compare(name, bucket->lowerBoundary_, errorCode);
154
0
            if (nameVsBucket < 0) {
155
0
                limit = i;
156
0
            } else {
157
0
                start = i;
158
0
            }
159
0
        }
160
0
        const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, start);
161
0
        if (bucket->displayBucket_ != NULL) {
162
0
            bucket = bucket->displayBucket_;
163
0
        }
164
0
        return bucket->displayIndex_;
165
0
    }
166
167
    /** All of the buckets, visible and invisible. */
168
    UVector *bucketList_;
169
    /** Just the visible buckets. */
170
    UVector *immutableVisibleList_;
171
};
172
173
0
BucketList::~BucketList() {
174
0
    delete bucketList_;
175
0
    if (immutableVisibleList_ != bucketList_) {
176
0
        delete immutableVisibleList_;
177
0
    }
178
0
}
179
180
0
AlphabeticIndex::ImmutableIndex::~ImmutableIndex() {
181
0
    delete buckets_;
182
0
    delete collatorPrimaryOnly_;
183
0
}
184
185
int32_t
186
0
AlphabeticIndex::ImmutableIndex::getBucketCount() const {
187
0
    return buckets_->getBucketCount();
188
0
}
189
190
int32_t
191
AlphabeticIndex::ImmutableIndex::getBucketIndex(
192
0
        const UnicodeString &name, UErrorCode &errorCode) const {
193
0
    return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, errorCode);
194
0
}
195
196
const AlphabeticIndex::Bucket *
197
0
AlphabeticIndex::ImmutableIndex::getBucket(int32_t index) const {
198
0
    if (0 <= index && index < buckets_->getBucketCount()) {
199
0
        return icu::getBucket(*buckets_->immutableVisibleList_, index);
200
0
    } else {
201
0
        return NULL;
202
0
    }
203
0
}
204
205
AlphabeticIndex::AlphabeticIndex(const Locale &locale, UErrorCode &status)
206
        : inputList_(NULL),
207
          labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
208
          maxLabelCount_(99),
209
          initialLabels_(NULL), firstCharsInScripts_(NULL),
210
          collator_(NULL), collatorPrimaryOnly_(NULL),
211
0
          buckets_(NULL) {
212
0
    init(&locale, status);
213
0
}
214
215
216
AlphabeticIndex::AlphabeticIndex(RuleBasedCollator *collator, UErrorCode &status)
217
        : inputList_(NULL),
218
          labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
219
          maxLabelCount_(99),
220
          initialLabels_(NULL), firstCharsInScripts_(NULL),
221
          collator_(collator), collatorPrimaryOnly_(NULL),
222
0
          buckets_(NULL) {
223
0
    init(NULL, status);
224
0
}
225
226
227
228
0
AlphabeticIndex::~AlphabeticIndex() {
229
0
    delete collator_;
230
0
    delete collatorPrimaryOnly_;
231
0
    delete firstCharsInScripts_;
232
0
    delete buckets_;
233
0
    delete inputList_;
234
0
    delete initialLabels_;
235
0
}
236
237
238
0
AlphabeticIndex &AlphabeticIndex::addLabels(const UnicodeSet &additions, UErrorCode &status) {
239
0
    if (U_FAILURE(status)) {
240
0
        return *this;
241
0
    }
242
0
    initialLabels_->addAll(additions);
243
0
    clearBuckets();
244
0
    return *this;
245
0
}
246
247
248
0
AlphabeticIndex &AlphabeticIndex::addLabels(const Locale &locale, UErrorCode &status) {
249
0
    addIndexExemplars(locale, status);
250
0
    clearBuckets();
251
0
    return *this;
252
0
}
253
254
255
0
AlphabeticIndex::ImmutableIndex *AlphabeticIndex::buildImmutableIndex(UErrorCode &errorCode) {
256
0
    if (U_FAILURE(errorCode)) { return NULL; }
257
0
    // In C++, the ImmutableIndex must own its copy of the BucketList,
258
0
    // even if it contains no records, for proper memory management.
259
0
    // We could clone the buckets_ if they are not NULL,
260
0
    // but that would be worth it only if this method is called multiple times,
261
0
    // or called after using the old-style bucket iterator API.
262
0
    LocalPointer<BucketList> immutableBucketList(createBucketList(errorCode));
263
0
    LocalPointer<RuleBasedCollator> coll(
264
0
        static_cast<RuleBasedCollator *>(collatorPrimaryOnly_->clone()));
265
0
    if (immutableBucketList.isNull() || coll.isNull()) {
266
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
267
0
        return NULL;
268
0
    }
269
0
    ImmutableIndex *immIndex = new ImmutableIndex(immutableBucketList.getAlias(), coll.getAlias());
270
0
    if (immIndex == NULL) {
271
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
272
0
        return NULL;
273
0
    }
274
0
    // The ImmutableIndex adopted its parameter objects.
275
0
    immutableBucketList.orphan();
276
0
    coll.orphan();
277
0
    return immIndex;
278
0
}
279
280
0
int32_t AlphabeticIndex::getBucketCount(UErrorCode &status) {
281
0
    initBuckets(status);
282
0
    if (U_FAILURE(status)) {
283
0
        return 0;
284
0
    }
285
0
    return buckets_->getBucketCount();
286
0
}
287
288
289
0
int32_t AlphabeticIndex::getRecordCount(UErrorCode &status) {
290
0
    if (U_FAILURE(status) || inputList_ == NULL) {
291
0
        return 0;
292
0
    }
293
0
    return inputList_->size();
294
0
}
295
296
0
void AlphabeticIndex::initLabels(UVector &indexCharacters, UErrorCode &errorCode) const {
297
0
    const Normalizer2 *nfkdNormalizer = Normalizer2::getNFKDInstance(errorCode);
298
0
    if (U_FAILURE(errorCode)) { return; }
299
0
300
0
    const UnicodeString &firstScriptBoundary = *getString(*firstCharsInScripts_, 0);
301
0
    const UnicodeString &overflowBoundary =
302
0
        *getString(*firstCharsInScripts_, firstCharsInScripts_->size() - 1);
303
0
304
0
    // We make a sorted array of elements.
305
0
    // Some of the input may be redundant.
306
0
    // That is, we might have c, ch, d, where "ch" sorts just like "c", "h".
307
0
    // We filter out those cases.
308
0
    UnicodeSetIterator iter(*initialLabels_);
309
0
    while (iter.next()) {
310
0
        const UnicodeString *item = &iter.getString();
311
0
        LocalPointer<UnicodeString> ownedItem;
312
0
        UBool checkDistinct;
313
0
        int32_t itemLength = item->length();
314
0
        if (!item->hasMoreChar32Than(0, itemLength, 1)) {
315
0
            checkDistinct = FALSE;
316
0
        } else if(item->charAt(itemLength - 1) == 0x2a &&  // '*'
317
0
                item->charAt(itemLength - 2) != 0x2a) {
318
0
            // Use a label if it is marked with one trailing star,
319
0
            // even if the label string sorts the same when all contractions are suppressed.
320
0
            ownedItem.adoptInstead(new UnicodeString(*item, 0, itemLength - 1));
321
0
            item = ownedItem.getAlias();
322
0
            if (item == NULL) {
323
0
                errorCode = U_MEMORY_ALLOCATION_ERROR;
324
0
                return;
325
0
            }
326
0
            checkDistinct = FALSE;
327
0
        } else {
328
0
            checkDistinct = TRUE;
329
0
        }
330
0
        if (collatorPrimaryOnly_->compare(*item, firstScriptBoundary, errorCode) < 0) {
331
0
            // Ignore a primary-ignorable or non-alphabetic index character.
332
0
        } else if (collatorPrimaryOnly_->compare(*item, overflowBoundary, errorCode) >= 0) {
333
0
            // Ignore an index character that will land in the overflow bucket.
334
0
        } else if (checkDistinct &&
335
0
                collatorPrimaryOnly_->compare(*item, separated(*item), errorCode) == 0) {
336
0
            // Ignore a multi-code point index character that does not sort distinctly
337
0
            // from the sequence of its separate characters.
338
0
        } else {
339
0
            int32_t insertionPoint = binarySearch(indexCharacters, *item, *collatorPrimaryOnly_);
340
0
            if (insertionPoint < 0) {
341
0
                indexCharacters.insertElementAt(
342
0
                    ownedString(*item, ownedItem, errorCode), ~insertionPoint, errorCode);
343
0
            } else {
344
0
                const UnicodeString &itemAlreadyIn = *getString(indexCharacters, insertionPoint);
345
0
                if (isOneLabelBetterThanOther(*nfkdNormalizer, *item, itemAlreadyIn)) {
346
0
                    indexCharacters.setElementAt(
347
0
                        ownedString(*item, ownedItem, errorCode), insertionPoint);
348
0
                }
349
0
            }
350
0
        }
351
0
    }
352
0
    if (U_FAILURE(errorCode)) { return; }
353
0
354
0
    // if the result is still too large, cut down to maxLabelCount_ elements, by removing every nth element
355
0
356
0
    int32_t size = indexCharacters.size() - 1;
357
0
    if (size > maxLabelCount_) {
358
0
        int32_t count = 0;
359
0
        int32_t old = -1;
360
0
        for (int32_t i = 0; i < indexCharacters.size();) {
361
0
            ++count;
362
0
            int32_t bump = count * maxLabelCount_ / size;
363
0
            if (bump == old) {
364
0
                indexCharacters.removeElementAt(i);
365
0
            } else {
366
0
                old = bump;
367
0
                ++i;
368
0
            }
369
0
        }
370
0
    }
371
0
}
372
373
namespace {
374
375
0
const UnicodeString &fixLabel(const UnicodeString &current, UnicodeString &temp) {
376
0
    if (!current.startsWith(BASE, BASE_LENGTH)) {
377
0
        return current;
378
0
    }
379
0
    UChar rest = current.charAt(BASE_LENGTH);
380
0
    if (0x2800 < rest && rest <= 0x28FF) { // stroke count
381
0
        int32_t count = rest-0x2800;
382
0
        temp.setTo((UChar)(0x30 + count % 10));
383
0
        if (count >= 10) {
384
0
            count /= 10;
385
0
            temp.insert(0, (UChar)(0x30 + count % 10));
386
0
            if (count >= 10) {
387
0
                count /= 10;
388
0
                temp.insert(0, (UChar)(0x30 + count));
389
0
            }
390
0
        }
391
0
        return temp.append((UChar)0x5283);
392
0
    }
393
0
    return temp.setTo(current, BASE_LENGTH);
394
0
}
395
396
UBool hasMultiplePrimaryWeights(
397
        const RuleBasedCollator &coll, uint32_t variableTop,
398
0
        const UnicodeString &s, UVector64 &ces, UErrorCode &errorCode) {
399
0
    ces.removeAllElements();
400
0
    coll.internalGetCEs(s, ces, errorCode);
401
0
    if (U_FAILURE(errorCode)) { return FALSE; }
402
0
    UBool seenPrimary = FALSE;
403
0
    for (int32_t i = 0; i < ces.size(); ++i) {
404
0
        int64_t ce = ces.elementAti(i);
405
0
        uint32_t p = (uint32_t)(ce >> 32);
406
0
        if (p > variableTop) {
407
0
            // not primary ignorable
408
0
            if (seenPrimary) {
409
0
                return TRUE;
410
0
            }
411
0
            seenPrimary = TRUE;
412
0
        }
413
0
    }
414
0
    return FALSE;
415
0
}
416
417
}  // namespace
418
419
0
BucketList *AlphabeticIndex::createBucketList(UErrorCode &errorCode) const {
420
0
    // Initialize indexCharacters.
421
0
    UVector indexCharacters(errorCode);
422
0
    indexCharacters.setDeleter(uprv_deleteUObject);
423
0
    initLabels(indexCharacters, errorCode);
424
0
    if (U_FAILURE(errorCode)) { return NULL; }
425
0
426
0
    // Variables for hasMultiplePrimaryWeights().
427
0
    UVector64 ces(errorCode);
428
0
    uint32_t variableTop;
429
0
    if (collatorPrimaryOnly_->getAttribute(UCOL_ALTERNATE_HANDLING, errorCode) == UCOL_SHIFTED) {
430
0
        variableTop = collatorPrimaryOnly_->getVariableTop(errorCode);
431
0
    } else {
432
0
        variableTop = 0;
433
0
    }
434
0
    UBool hasInvisibleBuckets = FALSE;
435
0
436
0
    // Helper arrays for Chinese Pinyin collation.
437
0
    Bucket *asciiBuckets[26] = {
438
0
        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
439
0
        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
440
0
    };
441
0
    Bucket *pinyinBuckets[26] = {
442
0
        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
443
0
        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
444
0
    };
445
0
    UBool hasPinyin = FALSE;
446
0
447
0
    LocalPointer<UVector> bucketList(new UVector(errorCode), errorCode);
448
0
    if (U_FAILURE(errorCode)) {
449
0
        return NULL;
450
0
    }
451
0
    bucketList->setDeleter(uprv_deleteUObject);
452
0
453
0
    // underflow bucket
454
0
    Bucket *bucket = new Bucket(getUnderflowLabel(), emptyString_, U_ALPHAINDEX_UNDERFLOW);
455
0
    if (bucket == NULL) {
456
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
457
0
        return NULL;
458
0
    }
459
0
    bucketList->addElement(bucket, errorCode);
460
0
    if (U_FAILURE(errorCode)) { return NULL; }
461
0
462
0
    UnicodeString temp;
463
0
464
0
    // fix up the list, adding underflow, additions, overflow
465
0
    // Insert inflow labels as needed.
466
0
    int32_t scriptIndex = -1;
467
0
    const UnicodeString *scriptUpperBoundary = &emptyString_;
468
0
    for (int32_t i = 0; i < indexCharacters.size(); ++i) {
469
0
        UnicodeString &current = *getString(indexCharacters, i);
470
0
        if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) >= 0) {
471
0
            // We crossed the script boundary into a new script.
472
0
            const UnicodeString &inflowBoundary = *scriptUpperBoundary;
473
0
            UBool skippedScript = FALSE;
474
0
            for (;;) {
475
0
                scriptUpperBoundary = getString(*firstCharsInScripts_, ++scriptIndex);
476
0
                if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) < 0) {
477
0
                    break;
478
0
                }
479
0
                skippedScript = TRUE;
480
0
            }
481
0
            if (skippedScript && bucketList->size() > 1) {
482
0
                // We are skipping one or more scripts,
483
0
                // and we are not just getting out of the underflow label.
484
0
                bucket = new Bucket(getInflowLabel(), inflowBoundary, U_ALPHAINDEX_INFLOW);
485
0
                if (bucket == NULL) {
486
0
                    errorCode = U_MEMORY_ALLOCATION_ERROR;
487
0
                    return NULL;
488
0
                }
489
0
                bucketList->addElement(bucket, errorCode);
490
0
            }
491
0
        }
492
0
        // Add a bucket with the current label.
493
0
        bucket = new Bucket(fixLabel(current, temp), current, U_ALPHAINDEX_NORMAL);
494
0
        if (bucket == NULL) {
495
0
            errorCode = U_MEMORY_ALLOCATION_ERROR;
496
0
            return NULL;
497
0
        }
498
0
        bucketList->addElement(bucket, errorCode);
499
0
        // Remember ASCII and Pinyin buckets for Pinyin redirects.
500
0
        UChar c;
501
0
        if (current.length() == 1 && 0x41 <= (c = current.charAt(0)) && c <= 0x5A) {  // A-Z
502
0
            asciiBuckets[c - 0x41] = bucket;
503
0
        } else if (current.length() == BASE_LENGTH + 1 && current.startsWith(BASE, BASE_LENGTH) &&
504
0
                0x41 <= (c = current.charAt(BASE_LENGTH)) && c <= 0x5A) {
505
0
            pinyinBuckets[c - 0x41] = bucket;
506
0
            hasPinyin = TRUE;
507
0
        }
508
0
        // Check for multiple primary weights.
509
0
        if (!current.startsWith(BASE, BASE_LENGTH) &&
510
0
                hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop, current,
511
0
                                          ces, errorCode) &&
512
0
                current.charAt(current.length() - 1) != 0xFFFF /* !current.endsWith("\uffff") */) {
513
0
            // "AE-ligature" or "Sch" etc.
514
0
            for (int32_t i = bucketList->size() - 2;; --i) {
515
0
                Bucket *singleBucket = getBucket(*bucketList, i);
516
0
                if (singleBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
517
0
                    // There is no single-character bucket since the last
518
0
                    // underflow or inflow label.
519
0
                    break;
520
0
                }
521
0
                if (singleBucket->displayBucket_ == NULL &&
522
0
                        !hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop,
523
0
                                                   singleBucket->lowerBoundary_,
524
0
                                                   ces, errorCode)) {
525
0
                    // Add an invisible bucket that redirects strings greater than the expansion
526
0
                    // to the previous single-character bucket.
527
0
                    // For example, after ... Q R S Sch we add Sch\uFFFF->S
528
0
                    // and after ... Q R S Sch Sch\uFFFF St we add St\uFFFF->S.
529
0
                    bucket = new Bucket(emptyString_,
530
0
                        UnicodeString(current).append((UChar)0xFFFF),
531
0
                        U_ALPHAINDEX_NORMAL);
532
0
                    if (bucket == NULL) {
533
0
                        errorCode = U_MEMORY_ALLOCATION_ERROR;
534
0
                        return NULL;
535
0
                    }
536
0
                    bucket->displayBucket_ = singleBucket;
537
0
                    bucketList->addElement(bucket, errorCode);
538
0
                    hasInvisibleBuckets = TRUE;
539
0
                    break;
540
0
                }
541
0
            }
542
0
        }
543
0
    }
544
0
    if (U_FAILURE(errorCode)) { return NULL; }
545
0
    if (bucketList->size() == 1) {
546
0
        // No real labels, show only the underflow label.
547
0
        BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
548
0
        if (bl == NULL) {
549
0
            errorCode = U_MEMORY_ALLOCATION_ERROR;
550
0
            return NULL;
551
0
        }
552
0
        bucketList.orphan();
553
0
        return bl;
554
0
    }
555
0
    // overflow bucket
556
0
    bucket = new Bucket(getOverflowLabel(), *scriptUpperBoundary, U_ALPHAINDEX_OVERFLOW);
557
0
    if (bucket == NULL) {
558
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
559
0
        return NULL;
560
0
    }
561
0
    bucketList->addElement(bucket, errorCode); // final
562
0
563
0
    if (hasPinyin) {
564
0
        // Redirect Pinyin buckets.
565
0
        Bucket *asciiBucket = NULL;
566
0
        for (int32_t i = 0; i < 26; ++i) {
567
0
            if (asciiBuckets[i] != NULL) {
568
0
                asciiBucket = asciiBuckets[i];
569
0
            }
570
0
            if (pinyinBuckets[i] != NULL && asciiBucket != NULL) {
571
0
                pinyinBuckets[i]->displayBucket_ = asciiBucket;
572
0
                hasInvisibleBuckets = TRUE;
573
0
            }
574
0
        }
575
0
    }
576
0
577
0
    if (U_FAILURE(errorCode)) { return NULL; }
578
0
    if (!hasInvisibleBuckets) {
579
0
        BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
580
0
        if (bl == NULL) {
581
0
            errorCode = U_MEMORY_ALLOCATION_ERROR;
582
0
            return NULL;
583
0
        }
584
0
        bucketList.orphan();
585
0
        return bl;
586
0
    }
587
0
    // Merge inflow buckets that are visually adjacent.
588
0
    // Iterate backwards: Merge inflow into overflow rather than the other way around.
589
0
    int32_t i = bucketList->size() - 1;
590
0
    Bucket *nextBucket = getBucket(*bucketList, i);
591
0
    while (--i > 0) {
592
0
        bucket = getBucket(*bucketList, i);
593
0
        if (bucket->displayBucket_ != NULL) {
594
0
            continue;  // skip invisible buckets
595
0
        }
596
0
        if (bucket->labelType_ == U_ALPHAINDEX_INFLOW) {
597
0
            if (nextBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
598
0
                bucket->displayBucket_ = nextBucket;
599
0
                continue;
600
0
            }
601
0
        }
602
0
        nextBucket = bucket;
603
0
    }
604
0
605
0
    LocalPointer<UVector> publicBucketList(new UVector(errorCode), errorCode);
606
0
    if (U_FAILURE(errorCode)) {
607
0
        return NULL;
608
0
    }
609
0
    // Do not call publicBucketList->setDeleter():
610
0
    // This vector shares its objects with the bucketList.
611
0
    for (int32_t i = 0; i < bucketList->size(); ++i) {
612
0
        bucket = getBucket(*bucketList, i);
613
0
        if (bucket->displayBucket_ == NULL) {
614
0
            publicBucketList->addElement(bucket, errorCode);
615
0
        }
616
0
    }
617
0
    if (U_FAILURE(errorCode)) { return NULL; }
618
0
    BucketList *bl = new BucketList(bucketList.getAlias(), publicBucketList.getAlias());
619
0
    if (bl == NULL) {
620
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
621
0
        return NULL;
622
0
    }
623
0
    bucketList.orphan();
624
0
    publicBucketList.orphan();
625
0
    return bl;
626
0
}
627
628
/**
629
 * Creates an index, and buckets and sorts the list of records into the index.
630
 */
631
0
void AlphabeticIndex::initBuckets(UErrorCode &errorCode) {
632
0
    if (U_FAILURE(errorCode) || buckets_ != NULL) {
633
0
        return;
634
0
    }
635
0
    buckets_ = createBucketList(errorCode);
636
0
    if (U_FAILURE(errorCode) || inputList_ == NULL || inputList_->isEmpty()) {
637
0
        return;
638
0
    }
639
0
640
0
    // Sort the records by name.
641
0
    // Stable sort preserves input order of collation duplicates.
642
0
    inputList_->sortWithUComparator(recordCompareFn, collator_, errorCode);
643
0
644
0
    // Now, we traverse all of the input, which is now sorted.
645
0
    // If the item doesn't go in the current bucket, we find the next bucket that contains it.
646
0
    // This makes the process order n*log(n), since we just sort the list and then do a linear process.
647
0
    // However, if the user adds an item at a time and then gets the buckets, this isn't efficient, so
648
0
    // we need to improve it for that case.
649
0
650
0
    Bucket *currentBucket = getBucket(*buckets_->bucketList_, 0);
651
0
    int32_t bucketIndex = 1;
652
0
    Bucket *nextBucket;
653
0
    const UnicodeString *upperBoundary;
654
0
    if (bucketIndex < buckets_->bucketList_->size()) {
655
0
        nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
656
0
        upperBoundary = &nextBucket->lowerBoundary_;
657
0
    } else {
658
0
        nextBucket = NULL;
659
0
        upperBoundary = NULL;
660
0
    }
661
0
    for (int32_t i = 0; i < inputList_->size(); ++i) {
662
0
        Record *r = getRecord(*inputList_, i);
663
0
        // if the current bucket isn't the right one, find the one that is
664
0
        // We have a special flag for the last bucket so that we don't look any further
665
0
        while (upperBoundary != NULL &&
666
0
                collatorPrimaryOnly_->compare(r->name_, *upperBoundary, errorCode) >= 0) {
667
0
            currentBucket = nextBucket;
668
0
            // now reset the boundary that we compare against
669
0
            if (bucketIndex < buckets_->bucketList_->size()) {
670
0
                nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
671
0
                upperBoundary = &nextBucket->lowerBoundary_;
672
0
            } else {
673
0
                upperBoundary = NULL;
674
0
            }
675
0
        }
676
0
        // now put the record into the bucket.
677
0
        Bucket *bucket = currentBucket;
678
0
        if (bucket->displayBucket_ != NULL) {
679
0
            bucket = bucket->displayBucket_;
680
0
        }
681
0
        if (bucket->records_ == NULL) {
682
0
            bucket->records_ = new UVector(errorCode);
683
0
            if (bucket->records_ == NULL) {
684
0
                errorCode = U_MEMORY_ALLOCATION_ERROR;
685
0
                return;
686
0
            }
687
0
        }
688
0
        bucket->records_->addElement(r, errorCode);
689
0
    }
690
0
}
691
692
0
void AlphabeticIndex::clearBuckets() {
693
0
    if (buckets_ != NULL) {
694
0
        delete buckets_;
695
0
        buckets_ = NULL;
696
0
        internalResetBucketIterator();
697
0
    }
698
0
}
699
700
0
void AlphabeticIndex::internalResetBucketIterator() {
701
0
    labelsIterIndex_ = -1;
702
0
    currentBucket_ = NULL;
703
0
}
704
705
706
0
void AlphabeticIndex::addIndexExemplars(const Locale &locale, UErrorCode &status) {
707
0
    LocalULocaleDataPointer uld(ulocdata_open(locale.getName(), &status));
708
0
    if (U_FAILURE(status)) {
709
0
        return;
710
0
    }
711
0
712
0
    UnicodeSet exemplars;
713
0
    ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_INDEX, &status);
714
0
    if (U_SUCCESS(status)) {
715
0
        initialLabels_->addAll(exemplars);
716
0
        return;
717
0
    }
718
0
    status = U_ZERO_ERROR;  // Clear out U_MISSING_RESOURCE_ERROR
719
0
720
0
    // The locale data did not include explicit Index characters.
721
0
    // Synthesize a set of them from the locale's standard exemplar characters.
722
0
    ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_STANDARD, &status);
723
0
    if (U_FAILURE(status)) {
724
0
        return;
725
0
    }
726
0
727
0
    // question: should we add auxiliary exemplars?
728
0
    if (exemplars.containsSome(0x61, 0x7A) /* a-z */ || exemplars.isEmpty()) {
729
0
        exemplars.add(0x61, 0x7A);
730
0
    }
731
0
    if (exemplars.containsSome(0xAC00, 0xD7A3)) {  // Hangul syllables
732
0
        // cut down to small list
733
0
        exemplars.remove(0xAC00, 0xD7A3).
734
0
            add(0xAC00).add(0xB098).add(0xB2E4).add(0xB77C).
735
0
            add(0xB9C8).add(0xBC14).add(0xC0AC).add(0xC544).
736
0
            add(0xC790).add(0xCC28).add(0xCE74).add(0xD0C0).
737
0
            add(0xD30C).add(0xD558);
738
0
    }
739
0
    if (exemplars.containsSome(0x1200, 0x137F)) {  // Ethiopic block
740
0
        // cut down to small list
741
0
        // make use of the fact that Ethiopic is allocated in 8's, where
742
0
        // the base is 0 mod 8.
743
0
        UnicodeSet ethiopic(UnicodeString(u"[ሀለሐመሠረሰሸቀቈቐቘበቨተቸኀኈነኘአከኰኸዀወዐዘዠየደዸጀገጐጘጠጨጰጸፀፈፐፘ]"), status);
744
0
        ethiopic.retainAll(exemplars);
745
0
        exemplars.remove(u'ሀ', 0x137F).addAll(ethiopic);
746
0
    }
747
0
748
0
    // Upper-case any that aren't already so.
749
0
    //   (We only do this for synthesized index characters.)
750
0
    UnicodeSetIterator it(exemplars);
751
0
    UnicodeString upperC;
752
0
    while (it.next()) {
753
0
        const UnicodeString &exemplarC = it.getString();
754
0
        upperC = exemplarC;
755
0
        upperC.toUpper(locale);
756
0
        initialLabels_->add(upperC);
757
0
    }
758
0
}
759
760
0
UBool AlphabeticIndex::addChineseIndexCharacters(UErrorCode &errorCode) {
761
0
    UnicodeSet contractions;
762
0
    collatorPrimaryOnly_->internalAddContractions(BASE[0], contractions, errorCode);
763
0
    if (U_FAILURE(errorCode) || contractions.isEmpty()) { return FALSE; }
764
0
    initialLabels_->addAll(contractions);
765
0
    UnicodeSetIterator iter(contractions);
766
0
    while (iter.next()) {
767
0
        const UnicodeString &s = iter.getString();
768
0
        U_ASSERT (s.startsWith(BASE, BASE_LENGTH));
769
0
        UChar c = s.charAt(s.length() - 1);
770
0
        if (0x41 <= c && c <= 0x5A) {  // A-Z
771
0
            // There are Pinyin labels, add ASCII A-Z labels as well.
772
0
            initialLabels_->add(0x41, 0x5A);  // A-Z
773
0
            break;
774
0
        }
775
0
    }
776
0
    return TRUE;
777
0
}
778
779
780
/*
781
 * Return the string with interspersed CGJs. Input must have more than 2 codepoints.
782
 */
783
static const UChar CGJ = 0x034F;
784
0
UnicodeString AlphabeticIndex::separated(const UnicodeString &item) {
785
0
    UnicodeString result;
786
0
    if (item.length() == 0) {
787
0
        return result;
788
0
    }
789
0
    int32_t i = 0;
790
0
    for (;;) {
791
0
        UChar32  cp = item.char32At(i);
792
0
        result.append(cp);
793
0
        i = item.moveIndex32(i, 1);
794
0
        if (i >= item.length()) {
795
0
            break;
796
0
        }
797
0
        result.append(CGJ);
798
0
    }
799
0
    return result;
800
0
}
801
802
803
0
UBool AlphabeticIndex::operator==(const AlphabeticIndex& /* other */) const {
804
0
    return FALSE;
805
0
}
806
807
808
0
UBool AlphabeticIndex::operator!=(const AlphabeticIndex& /* other */) const {
809
0
    return FALSE;
810
0
}
811
812
813
0
const RuleBasedCollator &AlphabeticIndex::getCollator() const {
814
0
    return *collator_;
815
0
}
816
817
818
0
const UnicodeString &AlphabeticIndex::getInflowLabel() const {
819
0
    return inflowLabel_;
820
0
}
821
822
0
const UnicodeString &AlphabeticIndex::getOverflowLabel() const {
823
0
    return overflowLabel_;
824
0
}
825
826
827
0
const UnicodeString &AlphabeticIndex::getUnderflowLabel() const {
828
0
    return underflowLabel_;
829
0
}
830
831
832
0
AlphabeticIndex &AlphabeticIndex::setInflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
833
0
    inflowLabel_ = label;
834
0
    clearBuckets();
835
0
    return *this;
836
0
}
837
838
839
0
AlphabeticIndex &AlphabeticIndex::setOverflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
840
0
    overflowLabel_ = label;
841
0
    clearBuckets();
842
0
    return *this;
843
0
}
844
845
846
0
AlphabeticIndex &AlphabeticIndex::setUnderflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
847
0
    underflowLabel_ = label;
848
0
    clearBuckets();
849
0
    return *this;
850
0
}
851
852
853
0
int32_t AlphabeticIndex::getMaxLabelCount() const {
854
0
    return maxLabelCount_;
855
0
}
856
857
858
0
AlphabeticIndex &AlphabeticIndex::setMaxLabelCount(int32_t maxLabelCount, UErrorCode &status) {
859
0
    if (U_FAILURE(status)) {
860
0
        return *this;
861
0
    }
862
0
    if (maxLabelCount <= 0) {
863
0
        status = U_ILLEGAL_ARGUMENT_ERROR;
864
0
        return *this;
865
0
    }
866
0
    maxLabelCount_ = maxLabelCount;
867
0
    clearBuckets();
868
0
    return *this;
869
0
}
870
871
872
//
873
//  init() - Common code for constructors.
874
//
875
876
0
void AlphabeticIndex::init(const Locale *locale, UErrorCode &status) {
877
0
    if (U_FAILURE(status)) { return; }
878
0
    if (locale == NULL && collator_ == NULL) {
879
0
        status = U_ILLEGAL_ARGUMENT_ERROR;
880
0
        return;
881
0
    }
882
0
883
0
    initialLabels_         = new UnicodeSet();
884
0
    if (initialLabels_ == NULL) {
885
0
        status = U_MEMORY_ALLOCATION_ERROR;
886
0
        return;
887
0
    }
888
0
889
0
    inflowLabel_.setTo((UChar)0x2026);    // Ellipsis
890
0
    overflowLabel_ = inflowLabel_;
891
0
    underflowLabel_ = inflowLabel_;
892
0
893
0
    if (collator_ == NULL) {
894
0
        Collator *coll = Collator::createInstance(*locale, status);
895
0
        if (U_FAILURE(status)) {
896
0
            delete coll;
897
0
            return;
898
0
        }
899
0
        if (coll == NULL) {
900
0
            status = U_MEMORY_ALLOCATION_ERROR;
901
0
            return;
902
0
        }
903
0
        collator_ = dynamic_cast<RuleBasedCollator *>(coll);
904
0
        if (collator_ == NULL) {
905
0
            delete coll;
906
0
            status = U_UNSUPPORTED_ERROR;
907
0
            return;
908
0
        }
909
0
    }
910
0
    collatorPrimaryOnly_ = static_cast<RuleBasedCollator *>(collator_->clone());
911
0
    if (collatorPrimaryOnly_ == NULL) {
912
0
        status = U_MEMORY_ALLOCATION_ERROR;
913
0
        return;
914
0
    }
915
0
    collatorPrimaryOnly_->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, status);
916
0
    firstCharsInScripts_ = firstStringsInScript(status);
917
0
    if (U_FAILURE(status)) { return; }
918
0
    firstCharsInScripts_->sortWithUComparator(collatorComparator, collatorPrimaryOnly_, status);
919
0
    // Guard against a degenerate collator where
920
0
    // some script boundary strings are primary ignorable.
921
0
    for (;;) {
922
0
        if (U_FAILURE(status)) { return; }
923
0
        if (firstCharsInScripts_->isEmpty()) {
924
0
            // AlphabeticIndex requires some non-ignorable script boundary strings.
925
0
            status = U_ILLEGAL_ARGUMENT_ERROR;
926
0
            return;
927
0
        }
928
0
        if (collatorPrimaryOnly_->compare(
929
0
                *static_cast<UnicodeString *>(firstCharsInScripts_->elementAt(0)),
930
0
                emptyString_, status) == UCOL_EQUAL) {
931
0
            firstCharsInScripts_->removeElementAt(0);
932
0
        } else {
933
0
            break;
934
0
        }
935
0
    }
936
0
937
0
    // Chinese index characters, which are specific to each of the several Chinese tailorings,
938
0
    // take precedence over the single locale data exemplar set per language.
939
0
    if (!addChineseIndexCharacters(status) && locale != NULL) {
940
0
        addIndexExemplars(*locale, status);
941
0
    }
942
0
}
943
944
945
//
946
//  Comparison function for UVector<UnicodeString *> sorting with a collator.
947
//
948
static int32_t U_CALLCONV
949
0
collatorComparator(const void *context, const void *left, const void *right) {
950
0
    const UElement *leftElement = static_cast<const UElement *>(left);
951
0
    const UElement *rightElement = static_cast<const UElement *>(right);
952
0
    const UnicodeString *leftString  = static_cast<const UnicodeString *>(leftElement->pointer);
953
0
    const UnicodeString *rightString = static_cast<const UnicodeString *>(rightElement->pointer);
954
0
955
0
    if (leftString == rightString) {
956
0
        // Catches case where both are NULL
957
0
        return 0;
958
0
    }
959
0
    if (leftString == NULL) {
960
0
        return 1;
961
0
    };
962
0
    if (rightString == NULL) {
963
0
        return -1;
964
0
    }
965
0
    const Collator *col = static_cast<const Collator *>(context);
966
0
    UErrorCode errorCode = U_ZERO_ERROR;
967
0
    return col->compare(*leftString, *rightString, errorCode);
968
0
}
969
970
//
971
//  Comparison function for UVector<Record *> sorting with a collator.
972
//
973
static int32_t U_CALLCONV
974
0
recordCompareFn(const void *context, const void *left, const void *right) {
975
0
    const UElement *leftElement = static_cast<const UElement *>(left);
976
0
    const UElement *rightElement = static_cast<const UElement *>(right);
977
0
    const AlphabeticIndex::Record *leftRec  = static_cast<const AlphabeticIndex::Record *>(leftElement->pointer);
978
0
    const AlphabeticIndex::Record *rightRec = static_cast<const AlphabeticIndex::Record *>(rightElement->pointer);
979
0
    const Collator *col = static_cast<const Collator *>(context);
980
0
    UErrorCode errorCode = U_ZERO_ERROR;
981
0
    return col->compare(leftRec->name_, rightRec->name_, errorCode);
982
0
}
983
984
0
UVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) {
985
0
    if (U_FAILURE(status)) {
986
0
        return NULL;
987
0
    }
988
0
    LocalPointer<UVector> dest(new UVector(status), status);
989
0
    if (U_FAILURE(status)) {
990
0
        return NULL;
991
0
    }
992
0
    dest->setDeleter(uprv_deleteUObject);
993
0
    // Fetch the script-first-primary contractions which are defined in the root collator.
994
0
    // They all start with U+FDD1.
995
0
    UnicodeSet set;
996
0
    collatorPrimaryOnly_->internalAddContractions(0xFDD1, set, status);
997
0
    if (U_FAILURE(status)) {
998
0
        return NULL;
999
0
    }
1000
0
    if (set.isEmpty()) {
1001
0
        status = U_UNSUPPORTED_ERROR;
1002
0
        return NULL;
1003
0
    }
1004
0
    UnicodeSetIterator iter(set);
1005
0
    while (iter.next()) {
1006
0
        const UnicodeString &boundary = iter.getString();
1007
0
        uint32_t gcMask = U_GET_GC_MASK(boundary.char32At(1));
1008
0
        if ((gcMask & (U_GC_L_MASK | U_GC_CN_MASK)) == 0) {
1009
0
            // Ignore boundaries for the special reordering groups.
1010
0
            // Take only those for "real scripts" (where the sample character is a Letter,
1011
0
            // and the one for unassigned implicit weights (Cn).
1012
0
            continue;
1013
0
        }
1014
0
        UnicodeString *s = new UnicodeString(boundary);
1015
0
        if (s == NULL) {
1016
0
            status = U_MEMORY_ALLOCATION_ERROR;
1017
0
            return NULL;
1018
0
        }
1019
0
        dest->addElement(s, status);
1020
0
    }
1021
0
    return dest.orphan();
1022
0
}
1023
1024
1025
namespace {
1026
1027
/**
1028
 * Returns true if one index character string is "better" than the other.
1029
 * Shorter NFKD is better, and otherwise NFKD-binary-less-than is
1030
 * better, and otherwise binary-less-than is better.
1031
 */
1032
UBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
1033
0
                                const UnicodeString &one, const UnicodeString &other) {
1034
0
    // This is called with primary-equal strings, but never with one.equals(other).
1035
0
    UErrorCode status = U_ZERO_ERROR;
1036
0
    UnicodeString n1 = nfkdNormalizer.normalize(one, status);
1037
0
    UnicodeString n2 = nfkdNormalizer.normalize(other, status);
1038
0
    if (U_FAILURE(status)) { return FALSE; }
1039
0
    int32_t result = n1.countChar32() - n2.countChar32();
1040
0
    if (result != 0) {
1041
0
        return result < 0;
1042
0
    }
1043
0
    result = n1.compareCodePointOrder(n2);
1044
0
    if (result != 0) {
1045
0
        return result < 0;
1046
0
    }
1047
0
    return one.compareCodePointOrder(other) < 0;
1048
0
}
1049
1050
}  // namespace
1051
1052
//
1053
//  Constructor & Destructor for AlphabeticIndex::Record
1054
//
1055
//     Records are internal only, instances are not directly surfaced in the public API.
1056
//     This class is mostly struct-like, with all public fields.
1057
1058
AlphabeticIndex::Record::Record(const UnicodeString &name, const void *data)
1059
0
        : name_(name), data_(data) {}
1060
1061
0
AlphabeticIndex::Record::~Record() {
1062
0
}
1063
1064
1065
0
AlphabeticIndex & AlphabeticIndex::addRecord(const UnicodeString &name, const void *data, UErrorCode &status) {
1066
0
    if (U_FAILURE(status)) {
1067
0
        return *this;
1068
0
    }
1069
0
    if (inputList_ == NULL) {
1070
0
        inputList_ = new UVector(status);
1071
0
        if (inputList_ == NULL) {
1072
0
            status = U_MEMORY_ALLOCATION_ERROR;
1073
0
            return *this;
1074
0
        }
1075
0
        inputList_->setDeleter(alphaIndex_deleteRecord);
1076
0
    }
1077
0
    Record *r = new Record(name, data);
1078
0
    if (r == NULL) {
1079
0
        status = U_MEMORY_ALLOCATION_ERROR;
1080
0
        return *this;
1081
0
    }
1082
0
    inputList_->addElement(r, status);
1083
0
    clearBuckets();
1084
0
    //std::string ss;
1085
0
    //std::string ss2;
1086
0
    //std::cout << "added record: name = \"" << r->name_.toUTF8String(ss) << "\"" << 
1087
0
    //             "   sortingName = \"" << r->sortingName_.toUTF8String(ss2) << "\"" << std::endl;
1088
0
    return *this;
1089
0
}
1090
1091
1092
0
AlphabeticIndex &AlphabeticIndex::clearRecords(UErrorCode &status) {
1093
0
    if (U_SUCCESS(status) && inputList_ != NULL && !inputList_->isEmpty()) {
1094
0
        inputList_->removeAllElements();
1095
0
        clearBuckets();
1096
0
    }
1097
0
    return *this;
1098
0
}
1099
1100
0
int32_t AlphabeticIndex::getBucketIndex(const UnicodeString &name, UErrorCode &status) {
1101
0
    initBuckets(status);
1102
0
    if (U_FAILURE(status)) {
1103
0
        return 0;
1104
0
    }
1105
0
    return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, status);
1106
0
}
1107
1108
1109
0
int32_t AlphabeticIndex::getBucketIndex() const {
1110
0
    return labelsIterIndex_;
1111
0
}
1112
1113
1114
0
UBool AlphabeticIndex::nextBucket(UErrorCode &status) {
1115
0
    if (U_FAILURE(status)) {
1116
0
        return FALSE;
1117
0
    }
1118
0
    if (buckets_ == NULL && currentBucket_ != NULL) {
1119
0
        status = U_ENUM_OUT_OF_SYNC_ERROR;
1120
0
        return FALSE;
1121
0
    }
1122
0
    initBuckets(status);
1123
0
    if (U_FAILURE(status)) {
1124
0
        return FALSE;
1125
0
    }
1126
0
    ++labelsIterIndex_;
1127
0
    if (labelsIterIndex_ >= buckets_->getBucketCount()) {
1128
0
        labelsIterIndex_ = buckets_->getBucketCount();
1129
0
        return FALSE;
1130
0
    }
1131
0
    currentBucket_ = getBucket(*buckets_->immutableVisibleList_, labelsIterIndex_);
1132
0
    resetRecordIterator();
1133
0
    return TRUE;
1134
0
}
1135
1136
0
const UnicodeString &AlphabeticIndex::getBucketLabel() const {
1137
0
    if (currentBucket_ != NULL) {
1138
0
        return currentBucket_->label_;
1139
0
    } else {
1140
0
        return emptyString_;
1141
0
    }
1142
0
}
1143
1144
1145
0
UAlphabeticIndexLabelType AlphabeticIndex::getBucketLabelType() const {
1146
0
    if (currentBucket_ != NULL) {
1147
0
        return currentBucket_->labelType_;
1148
0
    } else {
1149
0
        return U_ALPHAINDEX_NORMAL;
1150
0
    }
1151
0
}
1152
1153
1154
0
int32_t AlphabeticIndex::getBucketRecordCount() const {
1155
0
    if (currentBucket_ != NULL && currentBucket_->records_ != NULL) {
1156
0
        return currentBucket_->records_->size();
1157
0
    } else {
1158
0
        return 0;
1159
0
    }
1160
0
}
1161
1162
1163
0
AlphabeticIndex &AlphabeticIndex::resetBucketIterator(UErrorCode &status) {
1164
0
    if (U_FAILURE(status)) {
1165
0
        return *this;
1166
0
    }
1167
0
    internalResetBucketIterator();
1168
0
    return *this;
1169
0
}
1170
1171
1172
0
UBool AlphabeticIndex::nextRecord(UErrorCode &status) {
1173
0
    if (U_FAILURE(status)) {
1174
0
        return FALSE;
1175
0
    }
1176
0
    if (currentBucket_ == NULL) {
1177
0
        // We are trying to iterate over the items in a bucket, but there is no
1178
0
        // current bucket from the enumeration of buckets.
1179
0
        status = U_INVALID_STATE_ERROR;
1180
0
        return FALSE;
1181
0
    }
1182
0
    if (buckets_ == NULL) {
1183
0
        status = U_ENUM_OUT_OF_SYNC_ERROR;
1184
0
        return FALSE;
1185
0
    }
1186
0
    if (currentBucket_->records_ == NULL) {
1187
0
        return FALSE;
1188
0
    }
1189
0
    ++itemsIterIndex_;
1190
0
    if (itemsIterIndex_ >= currentBucket_->records_->size()) {
1191
0
        itemsIterIndex_  = currentBucket_->records_->size();
1192
0
        return FALSE;
1193
0
    }
1194
0
    return TRUE;
1195
0
}
1196
1197
1198
0
const UnicodeString &AlphabeticIndex::getRecordName() const {
1199
0
    const UnicodeString *retStr = &emptyString_;
1200
0
    if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
1201
0
        itemsIterIndex_ >= 0 &&
1202
0
        itemsIterIndex_ < currentBucket_->records_->size()) {
1203
0
            Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
1204
0
            retStr = &item->name_;
1205
0
    }
1206
0
    return *retStr;
1207
0
}
1208
1209
0
const void *AlphabeticIndex::getRecordData() const {
1210
0
    const void *retPtr = NULL;
1211
0
    if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
1212
0
        itemsIterIndex_ >= 0 &&
1213
0
        itemsIterIndex_ < currentBucket_->records_->size()) {
1214
0
            Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
1215
0
            retPtr = item->data_;
1216
0
    }
1217
0
    return retPtr;
1218
0
}
1219
1220
1221
0
AlphabeticIndex & AlphabeticIndex::resetRecordIterator() {
1222
0
    itemsIterIndex_ = -1;
1223
0
    return *this;
1224
0
}
1225
1226
1227
1228
AlphabeticIndex::Bucket::Bucket(const UnicodeString &label,
1229
                                const UnicodeString &lowerBoundary,
1230
                                UAlphabeticIndexLabelType type)
1231
        : label_(label), lowerBoundary_(lowerBoundary), labelType_(type),
1232
          displayBucket_(NULL), displayIndex_(-1),
1233
0
          records_(NULL) {
1234
0
}
1235
1236
1237
0
AlphabeticIndex::Bucket::~Bucket() {
1238
0
    delete records_;
1239
0
}
1240
1241
U_NAMESPACE_END
1242
1243
#endif  // !UCONFIG_NO_COLLATION