Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/intl/icu/source/i18n/ucoleitr.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
******************************************************************************
5
*   Copyright (C) 2001-2016, International Business Machines
6
*   Corporation and others.  All Rights Reserved.
7
******************************************************************************
8
*
9
* File ucoleitr.cpp
10
*
11
* Modification History:
12
*
13
* Date        Name        Description
14
* 02/15/2001  synwee      Modified all methods to process its own function 
15
*                         instead of calling the equivalent c++ api (coleitr.h)
16
* 2012-2014   markus      Rewritten in C++ again.
17
******************************************************************************/
18
19
#include "unicode/utypes.h"
20
21
#if !UCONFIG_NO_COLLATION
22
23
#include "unicode/coleitr.h"
24
#include "unicode/tblcoll.h"
25
#include "unicode/ucoleitr.h"
26
#include "unicode/ustring.h"
27
#include "unicode/sortkey.h"
28
#include "unicode/uobject.h"
29
#include "cmemory.h"
30
#include "usrchimp.h"
31
32
U_NAMESPACE_USE
33
34
#define BUFFER_LENGTH             100
35
36
#define DEFAULT_BUFFER_SIZE 16
37
0
#define BUFFER_GROW 8
38
39
0
#define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src), (size_t)(count) * sizeof (src)[0])
40
41
0
#define NEW_ARRAY(type, count) (type *) uprv_malloc((size_t)(count) * sizeof(type))
42
43
0
#define DELETE_ARRAY(array) uprv_free((void *) (array))
44
45
struct RCEI
46
{
47
    uint32_t ce;
48
    int32_t  low;
49
    int32_t  high;
50
};
51
52
U_NAMESPACE_BEGIN
53
54
struct RCEBuffer
55
{
56
    RCEI    defaultBuffer[DEFAULT_BUFFER_SIZE];
57
    RCEI   *buffer;
58
    int32_t bufferIndex;
59
    int32_t bufferSize;
60
61
    RCEBuffer();
62
    ~RCEBuffer();
63
64
    UBool isEmpty() const;
65
    void  put(uint32_t ce, int32_t ixLow, int32_t ixHigh, UErrorCode &errorCode);
66
    const RCEI *get();
67
};
68
69
RCEBuffer::RCEBuffer()
70
0
{
71
0
    buffer = defaultBuffer;
72
0
    bufferIndex = 0;
73
0
    bufferSize = UPRV_LENGTHOF(defaultBuffer);
74
0
}
75
76
RCEBuffer::~RCEBuffer()
77
0
{
78
0
    if (buffer != defaultBuffer) {
79
0
        DELETE_ARRAY(buffer);
80
0
    }
81
0
}
82
83
UBool RCEBuffer::isEmpty() const
84
0
{
85
0
    return bufferIndex <= 0;
86
0
}
87
88
void RCEBuffer::put(uint32_t ce, int32_t ixLow, int32_t ixHigh, UErrorCode &errorCode)
89
0
{
90
0
    if (U_FAILURE(errorCode)) {
91
0
        return;
92
0
    }
93
0
    if (bufferIndex >= bufferSize) {
94
0
        RCEI *newBuffer = NEW_ARRAY(RCEI, bufferSize + BUFFER_GROW);
95
0
        if (newBuffer == NULL) {
96
0
            errorCode = U_MEMORY_ALLOCATION_ERROR;
97
0
            return;
98
0
        }
99
0
100
0
        ARRAY_COPY(newBuffer, buffer, bufferSize);
101
0
102
0
        if (buffer != defaultBuffer) {
103
0
            DELETE_ARRAY(buffer);
104
0
        }
105
0
106
0
        buffer = newBuffer;
107
0
        bufferSize += BUFFER_GROW;
108
0
    }
109
0
110
0
    buffer[bufferIndex].ce   = ce;
111
0
    buffer[bufferIndex].low  = ixLow;
112
0
    buffer[bufferIndex].high = ixHigh;
113
0
114
0
    bufferIndex += 1;
115
0
}
116
117
const RCEI *RCEBuffer::get()
118
0
{
119
0
    if (bufferIndex > 0) {
120
0
     return &buffer[--bufferIndex];
121
0
    }
122
0
123
0
    return NULL;
124
0
}
125
126
PCEBuffer::PCEBuffer()
127
0
{
128
0
    buffer = defaultBuffer;
129
0
    bufferIndex = 0;
130
0
    bufferSize = UPRV_LENGTHOF(defaultBuffer);
131
0
}
132
133
PCEBuffer::~PCEBuffer()
134
0
{
135
0
    if (buffer != defaultBuffer) {
136
0
        DELETE_ARRAY(buffer);
137
0
    }
138
0
}
139
140
void PCEBuffer::reset()
141
0
{
142
0
    bufferIndex = 0;
143
0
}
144
145
UBool PCEBuffer::isEmpty() const
146
0
{
147
0
    return bufferIndex <= 0;
148
0
}
149
150
void PCEBuffer::put(uint64_t ce, int32_t ixLow, int32_t ixHigh, UErrorCode &errorCode)
151
0
{
152
0
    if (U_FAILURE(errorCode)) {
153
0
        return;
154
0
    }
155
0
    if (bufferIndex >= bufferSize) {
156
0
        PCEI *newBuffer = NEW_ARRAY(PCEI, bufferSize + BUFFER_GROW);
157
0
        if (newBuffer == NULL) {
158
0
            errorCode = U_MEMORY_ALLOCATION_ERROR;
159
0
            return;
160
0
        }
161
0
162
0
        ARRAY_COPY(newBuffer, buffer, bufferSize);
163
0
164
0
        if (buffer != defaultBuffer) {
165
0
            DELETE_ARRAY(buffer);
166
0
        }
167
0
168
0
        buffer = newBuffer;
169
0
        bufferSize += BUFFER_GROW;
170
0
    }
171
0
172
0
    buffer[bufferIndex].ce   = ce;
173
0
    buffer[bufferIndex].low  = ixLow;
174
0
    buffer[bufferIndex].high = ixHigh;
175
0
176
0
    bufferIndex += 1;
177
0
}
178
179
const PCEI *PCEBuffer::get()
180
0
{
181
0
    if (bufferIndex > 0) {
182
0
     return &buffer[--bufferIndex];
183
0
    }
184
0
185
0
    return NULL;
186
0
}
187
188
0
UCollationPCE::UCollationPCE(UCollationElements *elems) { init(elems); }
189
190
0
UCollationPCE::UCollationPCE(CollationElementIterator *iter) { init(iter); }
191
192
0
void UCollationPCE::init(UCollationElements *elems) {
193
0
    init(CollationElementIterator::fromUCollationElements(elems));
194
0
}
195
196
void UCollationPCE::init(CollationElementIterator *iter)
197
0
{
198
0
    cei = iter;
199
0
    init(*iter->rbc_);
200
0
}
201
202
void UCollationPCE::init(const Collator &coll)
203
0
{
204
0
    UErrorCode status = U_ZERO_ERROR;
205
0
206
0
    strength    = coll.getAttribute(UCOL_STRENGTH, status);
207
0
    toShift     = coll.getAttribute(UCOL_ALTERNATE_HANDLING, status) == UCOL_SHIFTED;
208
0
    isShifted   = FALSE;
209
0
    variableTop = coll.getVariableTop(status);
210
0
}
211
212
UCollationPCE::~UCollationPCE()
213
0
{
214
0
    // nothing to do
215
0
}
216
217
uint64_t UCollationPCE::processCE(uint32_t ce)
218
0
{
219
0
    uint64_t primary = 0, secondary = 0, tertiary = 0, quaternary = 0;
220
0
221
0
    // This is clean, but somewhat slow...
222
0
    // We could apply the mask to ce and then
223
0
    // just get all three orders...
224
0
    switch(strength) {
225
0
    default:
226
0
        tertiary = ucol_tertiaryOrder(ce);
227
0
        U_FALLTHROUGH;
228
0
229
0
    case UCOL_SECONDARY:
230
0
        secondary = ucol_secondaryOrder(ce);
231
0
        U_FALLTHROUGH;
232
0
233
0
    case UCOL_PRIMARY:
234
0
        primary = ucol_primaryOrder(ce);
235
0
    }
236
0
237
0
    // **** This should probably handle continuations too.  ****
238
0
    // **** That means that we need 24 bits for the primary ****
239
0
    // **** instead of the 16 that we're currently using.   ****
240
0
    // **** So we can lay out the 64 bits as: 24.12.12.16.  ****
241
0
    // **** Another complication with continuations is that ****
242
0
    // **** the *second* CE is marked as a continuation, so ****
243
0
    // **** we always have to peek ahead to know how long   ****
244
0
    // **** the primary is...                               ****
245
0
    if ((toShift && variableTop > ce && primary != 0)
246
0
                || (isShifted && primary == 0)) {
247
0
248
0
        if (primary == 0) {
249
0
            return UCOL_IGNORABLE;
250
0
        }
251
0
252
0
        if (strength >= UCOL_QUATERNARY) {
253
0
            quaternary = primary;
254
0
        }
255
0
256
0
        primary = secondary = tertiary = 0;
257
0
        isShifted = TRUE;
258
0
    } else {
259
0
        if (strength >= UCOL_QUATERNARY) {
260
0
            quaternary = 0xFFFF;
261
0
        }
262
0
263
0
        isShifted = FALSE;
264
0
    }
265
0
266
0
    return primary << 48 | secondary << 32 | tertiary << 16 | quaternary;
267
0
}
268
269
U_NAMESPACE_END
270
271
/* public methods ---------------------------------------------------- */
272
273
U_CAPI UCollationElements* U_EXPORT2
274
ucol_openElements(const UCollator  *coll,
275
                  const UChar      *text,
276
                        int32_t    textLength,
277
                        UErrorCode *status)
278
0
{
279
0
    if (U_FAILURE(*status)) {
280
0
        return NULL;
281
0
    }
282
0
    if (coll == NULL || (text == NULL && textLength != 0)) {
283
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
284
0
        return NULL;
285
0
    }
286
0
    const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll);
287
0
    if (rbc == NULL) {
288
0
        *status = U_UNSUPPORTED_ERROR;  // coll is a Collator but not a RuleBasedCollator
289
0
        return NULL;
290
0
    }
291
0
292
0
    UnicodeString s((UBool)(textLength < 0), text, textLength);
293
0
    CollationElementIterator *cei = rbc->createCollationElementIterator(s);
294
0
    if (cei == NULL) {
295
0
        *status = U_MEMORY_ALLOCATION_ERROR;
296
0
        return NULL;
297
0
    }
298
0
299
0
    return cei->toUCollationElements();
300
0
}
301
302
303
U_CAPI void U_EXPORT2
304
ucol_closeElements(UCollationElements *elems)
305
0
{
306
0
    delete CollationElementIterator::fromUCollationElements(elems);
307
0
}
308
309
U_CAPI void U_EXPORT2
310
ucol_reset(UCollationElements *elems)
311
0
{
312
0
    CollationElementIterator::fromUCollationElements(elems)->reset();
313
0
}
314
315
U_CAPI int32_t U_EXPORT2
316
ucol_next(UCollationElements *elems, 
317
          UErrorCode         *status)
318
0
{
319
0
    if (U_FAILURE(*status)) {
320
0
        return UCOL_NULLORDER;
321
0
    }
322
0
323
0
    return CollationElementIterator::fromUCollationElements(elems)->next(*status);
324
0
}
325
326
U_NAMESPACE_BEGIN
327
328
int64_t
329
UCollationPCE::nextProcessed(
330
                   int32_t            *ixLow,
331
                   int32_t            *ixHigh,
332
                   UErrorCode         *status)
333
0
{
334
0
    int64_t result = UCOL_IGNORABLE;
335
0
    uint32_t low = 0, high = 0;
336
0
337
0
    if (U_FAILURE(*status)) {
338
0
        return UCOL_PROCESSED_NULLORDER;
339
0
    }
340
0
341
0
    pceBuffer.reset();
342
0
343
0
    do {
344
0
        low = cei->getOffset();
345
0
        int32_t ce = cei->next(*status);
346
0
        high = cei->getOffset();
347
0
348
0
        if (ce == UCOL_NULLORDER) {
349
0
             result = UCOL_PROCESSED_NULLORDER;
350
0
             break;
351
0
        }
352
0
353
0
        result = processCE((uint32_t)ce);
354
0
    } while (result == UCOL_IGNORABLE);
355
0
356
0
    if (ixLow != NULL) {
357
0
        *ixLow = low;
358
0
    }
359
0
360
0
    if (ixHigh != NULL) {
361
0
        *ixHigh = high;
362
0
    }
363
0
364
0
    return result;
365
0
}
366
367
U_NAMESPACE_END
368
369
U_CAPI int32_t U_EXPORT2
370
ucol_previous(UCollationElements *elems,
371
              UErrorCode         *status)
372
0
{
373
0
    if(U_FAILURE(*status)) {
374
0
        return UCOL_NULLORDER;
375
0
    }
376
0
    return CollationElementIterator::fromUCollationElements(elems)->previous(*status);
377
0
}
378
379
U_NAMESPACE_BEGIN
380
381
int64_t
382
UCollationPCE::previousProcessed(
383
                   int32_t            *ixLow,
384
                   int32_t            *ixHigh,
385
                   UErrorCode         *status)
386
0
{
387
0
    int64_t result = UCOL_IGNORABLE;
388
0
    int32_t  low = 0, high = 0;
389
0
390
0
    if (U_FAILURE(*status)) {
391
0
        return UCOL_PROCESSED_NULLORDER;
392
0
    }
393
0
394
0
    // pceBuffer.reset();
395
0
396
0
    while (pceBuffer.isEmpty()) {
397
0
        // buffer raw CEs up to non-ignorable primary
398
0
        RCEBuffer rceb;
399
0
        int32_t ce;
400
0
        
401
0
        // **** do we need to reset rceb, or will it always be empty at this point ****
402
0
        do {
403
0
            high = cei->getOffset();
404
0
            ce   = cei->previous(*status);
405
0
            low  = cei->getOffset();
406
0
407
0
            if (ce == UCOL_NULLORDER) {
408
0
                if (!rceb.isEmpty()) {
409
0
                    break;
410
0
                }
411
0
412
0
                goto finish;
413
0
            }
414
0
415
0
            rceb.put((uint32_t)ce, low, high, *status);
416
0
        } while (U_SUCCESS(*status) && ((ce & UCOL_PRIMARYORDERMASK) == 0 || isContinuation(ce)));
417
0
418
0
        // process the raw CEs
419
0
        while (U_SUCCESS(*status) && !rceb.isEmpty()) {
420
0
            const RCEI *rcei = rceb.get();
421
0
422
0
            result = processCE(rcei->ce);
423
0
424
0
            if (result != UCOL_IGNORABLE) {
425
0
                pceBuffer.put(result, rcei->low, rcei->high, *status);
426
0
            }
427
0
        }
428
0
        if (U_FAILURE(*status)) {
429
0
            return UCOL_PROCESSED_NULLORDER;
430
0
        }
431
0
    }
432
0
433
0
finish:
434
0
    if (pceBuffer.isEmpty()) {
435
0
        // **** Is -1 the right value for ixLow, ixHigh? ****
436
0
      if (ixLow != NULL) {
437
0
        *ixLow = -1;
438
0
      }
439
0
      
440
0
      if (ixHigh != NULL) {
441
0
        *ixHigh = -1
442
0
        ;
443
0
      }
444
0
        return UCOL_PROCESSED_NULLORDER;
445
0
    }
446
0
447
0
    const PCEI *pcei = pceBuffer.get();
448
0
449
0
    if (ixLow != NULL) {
450
0
        *ixLow = pcei->low;
451
0
    }
452
0
453
0
    if (ixHigh != NULL) {
454
0
        *ixHigh = pcei->high;
455
0
    }
456
0
457
0
    return pcei->ce;
458
0
}
459
460
U_NAMESPACE_END
461
462
U_CAPI int32_t U_EXPORT2
463
ucol_getMaxExpansion(const UCollationElements *elems,
464
                           int32_t            order)
465
0
{
466
0
    return CollationElementIterator::fromUCollationElements(elems)->getMaxExpansion(order);
467
0
468
0
    // TODO: The old code masked the order according to strength and then did a binary search.
469
0
    // However this was probably at least partially broken because of the following comment.
470
0
    // Still, it might have found a match when this version may not.
471
0
472
0
    // FIXME: with a masked search, there might be more than one hit,
473
0
    // so we need to look forward and backward from the match to find all
474
0
    // of the hits...
475
0
}
476
477
U_CAPI void U_EXPORT2
478
ucol_setText(      UCollationElements *elems,
479
             const UChar              *text,
480
                   int32_t            textLength,
481
                   UErrorCode         *status)
482
0
{
483
0
    if (U_FAILURE(*status)) {
484
0
        return;
485
0
    }
486
0
487
0
    if ((text == NULL && textLength != 0)) {
488
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
489
0
        return;
490
0
    }
491
0
    UnicodeString s((UBool)(textLength < 0), text, textLength);
492
0
    return CollationElementIterator::fromUCollationElements(elems)->setText(s, *status);
493
0
}
494
495
U_CAPI int32_t U_EXPORT2
496
ucol_getOffset(const UCollationElements *elems)
497
0
{
498
0
    return CollationElementIterator::fromUCollationElements(elems)->getOffset();
499
0
}
500
501
U_CAPI void U_EXPORT2
502
ucol_setOffset(UCollationElements    *elems,
503
               int32_t           offset,
504
               UErrorCode            *status)
505
0
{
506
0
    if (U_FAILURE(*status)) {
507
0
        return;
508
0
    }
509
0
510
0
    CollationElementIterator::fromUCollationElements(elems)->setOffset(offset, *status);
511
0
}
512
513
U_CAPI int32_t U_EXPORT2
514
ucol_primaryOrder (int32_t order) 
515
0
{
516
0
    return (order >> 16) & 0xffff;
517
0
}
518
519
U_CAPI int32_t U_EXPORT2
520
ucol_secondaryOrder (int32_t order) 
521
0
{
522
0
    return (order >> 8) & 0xff;
523
0
}
524
525
U_CAPI int32_t U_EXPORT2
526
ucol_tertiaryOrder (int32_t order) 
527
0
{
528
0
    return order & 0xff;
529
0
}
530
531
#endif /* #if !UCONFIG_NO_COLLATION */