Coverage Report

Created: 2021-08-22 09:07

/src/skia/third_party/externals/icu/source/common/ucharstriebuilder.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
*   Copyright (C) 2010-2012, International Business Machines
6
*   Corporation and others.  All Rights Reserved.
7
*******************************************************************************
8
*   file name:  ucharstriebuilder.h
9
*   encoding:   UTF-8
10
*   tab size:   8 (not used)
11
*   indentation:4
12
*
13
*   created on: 2010nov14
14
*   created by: Markus W. Scherer
15
*/
16
17
#include "unicode/utypes.h"
18
#include "unicode/ucharstrie.h"
19
#include "unicode/ucharstriebuilder.h"
20
#include "unicode/unistr.h"
21
#include "unicode/ustring.h"
22
#include "cmemory.h"
23
#include "uarrsort.h"
24
#include "uassert.h"
25
#include "uhash.h"
26
#include "ustr_imp.h"
27
28
U_NAMESPACE_BEGIN
29
30
/*
31
 * Note: This builder implementation stores (string, value) pairs with full copies
32
 * of the 16-bit-unit sequences, until the UCharsTrie is built.
33
 * It might(!) take less memory if we collected the data in a temporary, dynamic trie.
34
 */
35
36
class UCharsTrieElement : public UMemory {
37
public:
38
    // Use compiler's default constructor, initializes nothing.
39
40
    void setTo(const UnicodeString &s, int32_t val, UnicodeString &strings, UErrorCode &errorCode);
41
42
0
    UnicodeString getString(const UnicodeString &strings) const {
43
0
        int32_t length=strings[stringOffset];
44
0
        return strings.tempSubString(stringOffset+1, length);
45
0
    }
46
0
    int32_t getStringLength(const UnicodeString &strings) const {
47
0
        return strings[stringOffset];
48
0
    }
49
50
0
    UChar charAt(int32_t index, const UnicodeString &strings) const {
51
0
        return strings[stringOffset+1+index];
52
0
    }
53
54
0
    int32_t getValue() const { return value; }
55
56
    int32_t compareStringTo(const UCharsTrieElement &o, const UnicodeString &strings) const;
57
58
private:
59
    // The first strings unit contains the string length.
60
    // (Compared with a stringLength field here, this saves 2 bytes per string.)
61
    int32_t stringOffset;
62
    int32_t value;
63
};
64
65
void
66
UCharsTrieElement::setTo(const UnicodeString &s, int32_t val,
67
0
                         UnicodeString &strings, UErrorCode &errorCode) {
68
0
    if(U_FAILURE(errorCode)) {
69
0
        return;
70
0
    }
71
0
    int32_t length=s.length();
72
0
    if(length>0xffff) {
73
        // Too long: We store the length in 1 unit.
74
0
        errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
75
0
        return;
76
0
    }
77
0
    stringOffset=strings.length();
78
0
    strings.append((UChar)length);
79
0
    value=val;
80
0
    strings.append(s);
81
0
}
82
83
int32_t
84
0
UCharsTrieElement::compareStringTo(const UCharsTrieElement &other, const UnicodeString &strings) const {
85
0
    return getString(strings).compare(other.getString(strings));
86
0
}
87
88
UCharsTrieBuilder::UCharsTrieBuilder(UErrorCode & /*errorCode*/)
89
        : elements(NULL), elementsCapacity(0), elementsLength(0),
90
0
          uchars(NULL), ucharsCapacity(0), ucharsLength(0) {}
91
92
0
UCharsTrieBuilder::~UCharsTrieBuilder() {
93
0
    delete[] elements;
94
0
    uprv_free(uchars);
95
0
}
96
97
UCharsTrieBuilder &
98
0
UCharsTrieBuilder::add(const UnicodeString &s, int32_t value, UErrorCode &errorCode) {
99
0
    if(U_FAILURE(errorCode)) {
100
0
        return *this;
101
0
    }
102
0
    if(ucharsLength>0) {
103
        // Cannot add elements after building.
104
0
        errorCode=U_NO_WRITE_PERMISSION;
105
0
        return *this;
106
0
    }
107
0
    if(elementsLength==elementsCapacity) {
108
0
        int32_t newCapacity;
109
0
        if(elementsCapacity==0) {
110
0
            newCapacity=1024;
111
0
        } else {
112
0
            newCapacity=4*elementsCapacity;
113
0
        }
114
0
        UCharsTrieElement *newElements=new UCharsTrieElement[newCapacity];
115
0
        if(newElements==NULL) {
116
0
            errorCode=U_MEMORY_ALLOCATION_ERROR;
117
0
            return *this;
118
0
        }
119
0
        if(elementsLength>0) {
120
0
            uprv_memcpy(newElements, elements, (size_t)elementsLength*sizeof(UCharsTrieElement));
121
0
        }
122
0
        delete[] elements;
123
0
        elements=newElements;
124
0
        elementsCapacity=newCapacity;
125
0
    }
126
0
    elements[elementsLength++].setTo(s, value, strings, errorCode);
127
0
    if(U_SUCCESS(errorCode) && strings.isBogus()) {
128
0
        errorCode=U_MEMORY_ALLOCATION_ERROR;
129
0
    }
130
0
    return *this;
131
0
}
132
133
U_CDECL_BEGIN
134
135
static int32_t U_CALLCONV
136
0
compareElementStrings(const void *context, const void *left, const void *right) {
137
0
    const UnicodeString *strings=static_cast<const UnicodeString *>(context);
138
0
    const UCharsTrieElement *leftElement=static_cast<const UCharsTrieElement *>(left);
139
0
    const UCharsTrieElement *rightElement=static_cast<const UCharsTrieElement *>(right);
140
0
    return leftElement->compareStringTo(*rightElement, *strings);
141
0
}
142
143
U_CDECL_END
144
145
UCharsTrie *
146
0
UCharsTrieBuilder::build(UStringTrieBuildOption buildOption, UErrorCode &errorCode) {
147
0
    buildUChars(buildOption, errorCode);
148
0
    UCharsTrie *newTrie=NULL;
149
0
    if(U_SUCCESS(errorCode)) {
150
0
        newTrie=new UCharsTrie(uchars, uchars+(ucharsCapacity-ucharsLength));
151
0
        if(newTrie==NULL) {
152
0
            errorCode=U_MEMORY_ALLOCATION_ERROR;
153
0
        } else {
154
0
            uchars=NULL;  // The new trie now owns the array.
155
0
            ucharsCapacity=0;
156
0
        }
157
0
    }
158
0
    return newTrie;
159
0
}
160
161
UnicodeString &
162
UCharsTrieBuilder::buildUnicodeString(UStringTrieBuildOption buildOption, UnicodeString &result,
163
0
                                      UErrorCode &errorCode) {
164
0
    buildUChars(buildOption, errorCode);
165
0
    if(U_SUCCESS(errorCode)) {
166
0
        result.setTo(FALSE, uchars+(ucharsCapacity-ucharsLength), ucharsLength);
167
0
    }
168
0
    return result;
169
0
}
170
171
void
172
0
UCharsTrieBuilder::buildUChars(UStringTrieBuildOption buildOption, UErrorCode &errorCode) {
173
0
    if(U_FAILURE(errorCode)) {
174
0
        return;
175
0
    }
176
0
    if(uchars!=NULL && ucharsLength>0) {
177
        // Already built.
178
0
        return;
179
0
    }
180
0
    if(ucharsLength==0) {
181
0
        if(elementsLength==0) {
182
0
            errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
183
0
            return;
184
0
        }
185
0
        if(strings.isBogus()) {
186
0
            errorCode=U_MEMORY_ALLOCATION_ERROR;
187
0
            return;
188
0
        }
189
0
        uprv_sortArray(elements, elementsLength, (int32_t)sizeof(UCharsTrieElement),
190
0
                      compareElementStrings, &strings,
191
0
                      FALSE,  // need not be a stable sort
192
0
                      &errorCode);
193
0
        if(U_FAILURE(errorCode)) {
194
0
            return;
195
0
        }
196
        // Duplicate strings are not allowed.
197
0
        UnicodeString prev=elements[0].getString(strings);
198
0
        for(int32_t i=1; i<elementsLength; ++i) {
199
0
            UnicodeString current=elements[i].getString(strings);
200
0
            if(prev==current) {
201
0
                errorCode=U_ILLEGAL_ARGUMENT_ERROR;
202
0
                return;
203
0
            }
204
0
            prev.fastCopyFrom(current);
205
0
        }
206
0
    }
207
    // Create and UChar-serialize the trie for the elements.
208
0
    ucharsLength=0;
209
0
    int32_t capacity=strings.length();
210
0
    if(capacity<1024) {
211
0
        capacity=1024;
212
0
    }
213
0
    if(ucharsCapacity<capacity) {
214
0
        uprv_free(uchars);
215
0
        uchars=static_cast<UChar *>(uprv_malloc(capacity*2));
216
0
        if(uchars==NULL) {
217
0
            errorCode=U_MEMORY_ALLOCATION_ERROR;
218
0
            ucharsCapacity=0;
219
0
            return;
220
0
        }
221
0
        ucharsCapacity=capacity;
222
0
    }
223
0
    StringTrieBuilder::build(buildOption, elementsLength, errorCode);
224
0
    if(uchars==NULL) {
225
0
        errorCode=U_MEMORY_ALLOCATION_ERROR;
226
0
    }
227
0
}
228
229
int32_t
230
0
UCharsTrieBuilder::getElementStringLength(int32_t i) const {
231
0
    return elements[i].getStringLength(strings);
232
0
}
233
234
UChar
235
0
UCharsTrieBuilder::getElementUnit(int32_t i, int32_t unitIndex) const {
236
0
    return elements[i].charAt(unitIndex, strings);
237
0
}
238
239
int32_t
240
0
UCharsTrieBuilder::getElementValue(int32_t i) const {
241
0
    return elements[i].getValue();
242
0
}
243
244
int32_t
245
0
UCharsTrieBuilder::getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const {
246
0
    const UCharsTrieElement &firstElement=elements[first];
247
0
    const UCharsTrieElement &lastElement=elements[last];
248
0
    int32_t minStringLength=firstElement.getStringLength(strings);
249
0
    while(++unitIndex<minStringLength &&
250
0
            firstElement.charAt(unitIndex, strings)==
251
0
            lastElement.charAt(unitIndex, strings)) {}
252
0
    return unitIndex;
253
0
}
254
255
int32_t
256
0
UCharsTrieBuilder::countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const {
257
0
    int32_t length=0;  // Number of different units at unitIndex.
258
0
    int32_t i=start;
259
0
    do {
260
0
        UChar unit=elements[i++].charAt(unitIndex, strings);
261
0
        while(i<limit && unit==elements[i].charAt(unitIndex, strings)) {
262
0
            ++i;
263
0
        }
264
0
        ++length;
265
0
    } while(i<limit);
266
0
    return length;
267
0
}
268
269
int32_t
270
0
UCharsTrieBuilder::skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const {
271
0
    do {
272
0
        UChar unit=elements[i++].charAt(unitIndex, strings);
273
0
        while(unit==elements[i].charAt(unitIndex, strings)) {
274
0
            ++i;
275
0
        }
276
0
    } while(--count>0);
277
0
    return i;
278
0
}
279
280
int32_t
281
0
UCharsTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, UChar unit) const {
282
0
    while(unit==elements[i].charAt(unitIndex, strings)) {
283
0
        ++i;
284
0
    }
285
0
    return i;
286
0
}
287
288
UCharsTrieBuilder::UCTLinearMatchNode::UCTLinearMatchNode(const UChar *units, int32_t len, Node *nextNode)
289
0
        : LinearMatchNode(len, nextNode), s(units) {
290
0
    hash=hash*37u+ustr_hashUCharsN(units, len);
291
0
}
292
293
UBool
294
0
UCharsTrieBuilder::UCTLinearMatchNode::operator==(const Node &other) const {
295
0
    if(this==&other) {
296
0
        return TRUE;
297
0
    }
298
0
    if(!LinearMatchNode::operator==(other)) {
299
0
        return FALSE;
300
0
    }
301
0
    const UCTLinearMatchNode &o=(const UCTLinearMatchNode &)other;
302
0
    return 0==u_memcmp(s, o.s, length);
303
0
}
304
305
void
306
0
UCharsTrieBuilder::UCTLinearMatchNode::write(StringTrieBuilder &builder) {
307
0
    UCharsTrieBuilder &b=(UCharsTrieBuilder &)builder;
308
0
    next->write(builder);
309
0
    b.write(s, length);
310
0
    offset=b.writeValueAndType(hasValue, value, b.getMinLinearMatch()+length-1);
311
0
}
312
313
StringTrieBuilder::Node *
314
UCharsTrieBuilder::createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length,
315
0
                                         Node *nextNode) const {
316
0
    return new UCTLinearMatchNode(
317
0
            elements[i].getString(strings).getBuffer()+unitIndex,
318
0
            length,
319
0
            nextNode);
320
0
}
321
322
UBool
323
0
UCharsTrieBuilder::ensureCapacity(int32_t length) {
324
0
    if(uchars==NULL) {
325
0
        return FALSE;  // previous memory allocation had failed
326
0
    }
327
0
    if(length>ucharsCapacity) {
328
0
        int32_t newCapacity=ucharsCapacity;
329
0
        do {
330
0
            newCapacity*=2;
331
0
        } while(newCapacity<=length);
332
0
        UChar *newUChars=static_cast<UChar *>(uprv_malloc(newCapacity*2));
333
0
        if(newUChars==NULL) {
334
            // unable to allocate memory
335
0
            uprv_free(uchars);
336
0
            uchars=NULL;
337
0
            ucharsCapacity=0;
338
0
            return FALSE;
339
0
        }
340
0
        u_memcpy(newUChars+(newCapacity-ucharsLength),
341
0
                 uchars+(ucharsCapacity-ucharsLength), ucharsLength);
342
0
        uprv_free(uchars);
343
0
        uchars=newUChars;
344
0
        ucharsCapacity=newCapacity;
345
0
    }
346
0
    return TRUE;
347
0
}
348
349
int32_t
350
0
UCharsTrieBuilder::write(int32_t unit) {
351
0
    int32_t newLength=ucharsLength+1;
352
0
    if(ensureCapacity(newLength)) {
353
0
        ucharsLength=newLength;
354
0
        uchars[ucharsCapacity-ucharsLength]=(UChar)unit;
355
0
    }
356
0
    return ucharsLength;
357
0
}
358
359
int32_t
360
0
UCharsTrieBuilder::write(const UChar *s, int32_t length) {
361
0
    int32_t newLength=ucharsLength+length;
362
0
    if(ensureCapacity(newLength)) {
363
0
        ucharsLength=newLength;
364
0
        u_memcpy(uchars+(ucharsCapacity-ucharsLength), s, length);
365
0
    }
366
0
    return ucharsLength;
367
0
}
368
369
int32_t
370
0
UCharsTrieBuilder::writeElementUnits(int32_t i, int32_t unitIndex, int32_t length) {
371
0
    return write(elements[i].getString(strings).getBuffer()+unitIndex, length);
372
0
}
373
374
int32_t
375
0
UCharsTrieBuilder::writeValueAndFinal(int32_t i, UBool isFinal) {
376
0
    if(0<=i && i<=UCharsTrie::kMaxOneUnitValue) {
377
0
        return write(i|(isFinal<<15));
378
0
    }
379
0
    UChar intUnits[3];
380
0
    int32_t length;
381
0
    if(i<0 || i>UCharsTrie::kMaxTwoUnitValue) {
382
0
        intUnits[0]=(UChar)(UCharsTrie::kThreeUnitValueLead);
383
0
        intUnits[1]=(UChar)((uint32_t)i>>16);
384
0
        intUnits[2]=(UChar)i;
385
0
        length=3;
386
    // } else if(i<=UCharsTrie::kMaxOneUnitValue) {
387
    //     intUnits[0]=(UChar)(i);
388
    //     length=1;
389
0
    } else {
390
0
        intUnits[0]=(UChar)(UCharsTrie::kMinTwoUnitValueLead+(i>>16));
391
0
        intUnits[1]=(UChar)i;
392
0
        length=2;
393
0
    }
394
0
    intUnits[0]=(UChar)(intUnits[0]|(isFinal<<15));
395
0
    return write(intUnits, length);
396
0
}
397
398
int32_t
399
0
UCharsTrieBuilder::writeValueAndType(UBool hasValue, int32_t value, int32_t node) {
400
0
    if(!hasValue) {
401
0
        return write(node);
402
0
    }
403
0
    UChar intUnits[3];
404
0
    int32_t length;
405
0
    if(value<0 || value>UCharsTrie::kMaxTwoUnitNodeValue) {
406
0
        intUnits[0]=(UChar)(UCharsTrie::kThreeUnitNodeValueLead);
407
0
        intUnits[1]=(UChar)((uint32_t)value>>16);
408
0
        intUnits[2]=(UChar)value;
409
0
        length=3;
410
0
    } else if(value<=UCharsTrie::kMaxOneUnitNodeValue) {
411
0
        intUnits[0]=(UChar)((value+1)<<6);
412
0
        length=1;
413
0
    } else {
414
0
        intUnits[0]=(UChar)(UCharsTrie::kMinTwoUnitNodeValueLead+((value>>10)&0x7fc0));
415
0
        intUnits[1]=(UChar)value;
416
0
        length=2;
417
0
    }
418
0
    intUnits[0]|=(UChar)node;
419
0
    return write(intUnits, length);
420
0
}
421
422
int32_t
423
0
UCharsTrieBuilder::writeDeltaTo(int32_t jumpTarget) {
424
0
    int32_t i=ucharsLength-jumpTarget;
425
0
    U_ASSERT(i>=0);
426
0
    if(i<=UCharsTrie::kMaxOneUnitDelta) {
427
0
        return write(i);
428
0
    }
429
0
    UChar intUnits[3];
430
0
    int32_t length;
431
0
    if(i<=UCharsTrie::kMaxTwoUnitDelta) {
432
0
        intUnits[0]=(UChar)(UCharsTrie::kMinTwoUnitDeltaLead+(i>>16));
433
0
        length=1;
434
0
    } else {
435
0
        intUnits[0]=(UChar)(UCharsTrie::kThreeUnitDeltaLead);
436
0
        intUnits[1]=(UChar)(i>>16);
437
0
        length=2;
438
0
    }
439
0
    intUnits[length++]=(UChar)i;
440
0
    return write(intUnits, length);
441
0
}
442
443
U_NAMESPACE_END