Coverage Report

Created: 2021-08-22 09:07

/src/skia/third_party/externals/icu/source/common/bytestrie.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
*   Copyright (C) 2010-2011, International Business Machines
6
*   Corporation and others.  All Rights Reserved.
7
*******************************************************************************
8
*   file name:  bytestrie.cpp
9
*   encoding:   UTF-8
10
*   tab size:   8 (not used)
11
*   indentation:4
12
*
13
*   created on: 2010sep25
14
*   created by: Markus W. Scherer
15
*/
16
17
#include "unicode/utypes.h"
18
#include "unicode/bytestream.h"
19
#include "unicode/bytestrie.h"
20
#include "unicode/uobject.h"
21
#include "cmemory.h"
22
#include "uassert.h"
23
24
U_NAMESPACE_BEGIN
25
26
71.1k
BytesTrie::~BytesTrie() {
27
71.1k
    uprv_free(ownedArray_);
28
71.1k
}
29
30
// lead byte already shifted right by 1.
31
int32_t
32
56
BytesTrie::readValue(const uint8_t *pos, int32_t leadByte) {
33
56
    int32_t value;
34
56
    if(leadByte<kMinTwoByteValueLead) {
35
32
        value=leadByte-kMinOneByteValueLead;
36
24
    } else if(leadByte<kMinThreeByteValueLead) {
37
24
        value=((leadByte-kMinTwoByteValueLead)<<8)|*pos;
38
0
    } else if(leadByte<kFourByteValueLead) {
39
0
        value=((leadByte-kMinThreeByteValueLead)<<16)|(pos[0]<<8)|pos[1];
40
0
    } else if(leadByte==kFourByteValueLead) {
41
0
        value=(pos[0]<<16)|(pos[1]<<8)|pos[2];
42
0
    } else {
43
0
        value=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
44
0
    }
45
56
    return value;
46
56
}
47
48
const uint8_t *
49
204k
BytesTrie::jumpByDelta(const uint8_t *pos) {
50
204k
    int32_t delta=*pos++;
51
204k
    if(delta<kMinTwoByteDeltaLead) {
52
        // nothing to do
53
181k
    } else if(delta<kMinThreeByteDeltaLead) {
54
92.8k
        delta=((delta-kMinTwoByteDeltaLead)<<8)|*pos++;
55
88.9k
    } else if(delta<kFourByteDeltaLead) {
56
88.9k
        delta=((delta-kMinThreeByteDeltaLead)<<16)|(pos[0]<<8)|pos[1];
57
88.9k
        pos+=2;
58
0
    } else if(delta==kFourByteDeltaLead) {
59
0
        delta=(pos[0]<<16)|(pos[1]<<8)|pos[2];
60
0
        pos+=3;
61
0
    } else {
62
0
        delta=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
63
0
        pos+=4;
64
0
    }
65
204k
    return pos+delta;
66
204k
}
67
68
UStringTrieResult
69
0
BytesTrie::current() const {
70
0
    const uint8_t *pos=pos_;
71
0
    if(pos==NULL) {
72
0
        return USTRINGTRIE_NO_MATCH;
73
0
    } else {
74
0
        int32_t node;
75
0
        return (remainingMatchLength_<0 && (node=*pos)>=kMinValueLead) ?
76
0
                valueResult(node) : USTRINGTRIE_NO_VALUE;
77
0
    }
78
0
}
79
80
UStringTrieResult
81
152k
BytesTrie::branchNext(const uint8_t *pos, int32_t length, int32_t inByte) {
82
    // Branch according to the current byte.
83
152k
    if(length==0) {
84
112k
        length=*pos++;
85
112k
    }
86
152k
    ++length;
87
    // The length of the branch is the number of bytes to select from.
88
    // The data structure encodes a binary search.
89
553k
    while(length>kMaxBranchLinearSubNodeLength) {
90
400k
        if(inByte<*pos++) {
91
204k
            length>>=1;
92
204k
            pos=jumpByDelta(pos);
93
195k
        } else {
94
195k
            length=length-(length>>1);
95
195k
            pos=skipDelta(pos);
96
195k
        }
97
400k
    }
98
    // Drop down to linear search for the last few bytes.
99
    // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3
100
    // and divides length by 2.
101
312k
    do {
102
312k
        if(inByte==*pos++) {
103
78.1k
            UStringTrieResult result;
104
78.1k
            int32_t node=*pos;
105
78.1k
            U_ASSERT(node>=kMinValueLead);
106
78.1k
            if(node&kValueIsFinal) {
107
                // Leave the final value for getValue() to read.
108
2.16k
                result=USTRINGTRIE_FINAL_VALUE;
109
75.9k
            } else {
110
                // Use the non-final value as the jump delta.
111
75.9k
                ++pos;
112
                // int32_t delta=readValue(pos, node>>1);
113
75.9k
                node>>=1;
114
75.9k
                int32_t delta;
115
75.9k
                if(node<kMinTwoByteValueLead) {
116
23.7k
                    delta=node-kMinOneByteValueLead;
117
52.2k
                } else if(node<kMinThreeByteValueLead) {
118
45.0k
                    delta=((node-kMinTwoByteValueLead)<<8)|*pos++;
119
7.21k
                } else if(node<kFourByteValueLead) {
120
7.21k
                    delta=((node-kMinThreeByteValueLead)<<16)|(pos[0]<<8)|pos[1];
121
7.21k
                    pos+=2;
122
0
                } else if(node==kFourByteValueLead) {
123
0
                    delta=(pos[0]<<16)|(pos[1]<<8)|pos[2];
124
0
                    pos+=3;
125
0
                } else {
126
0
                    delta=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
127
0
                    pos+=4;
128
0
                }
129
                // end readValue()
130
75.9k
                pos+=delta;
131
75.9k
                node=*pos;
132
48.9k
                result= node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
133
75.9k
            }
134
78.1k
            pos_=pos;
135
78.1k
            return result;
136
78.1k
        }
137
233k
        --length;
138
233k
        pos=skipValue(pos);
139
233k
    } while(length>1);
140
74.6k
    if(inByte==*pos++) {
141
27.1k
        pos_=pos;
142
27.1k
        int32_t node=*pos;
143
23.6k
        return node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
144
47.4k
    } else {
145
47.4k
        stop();
146
47.4k
        return USTRINGTRIE_NO_MATCH;
147
47.4k
    }
148
74.6k
}
149
150
UStringTrieResult
151
165k
BytesTrie::nextImpl(const uint8_t *pos, int32_t inByte) {
152
210k
    for(;;) {
153
210k
        int32_t node=*pos++;
154
210k
        if(node<kMinLinearMatch) {
155
152k
            return branchNext(pos, node, inByte);
156
57.7k
        } else if(node<kMinValueLead) {
157
            // Match the first of length+1 bytes.
158
12.4k
            int32_t length=node-kMinLinearMatch;  // Actual match length minus 1.
159
12.4k
            if(inByte==*pos++) {
160
3.97k
                remainingMatchLength_=--length;
161
3.97k
                pos_=pos;
162
3.97k
                return (length<0 && (node=*pos)>=kMinValueLead) ?
163
3.03k
                        valueResult(node) : USTRINGTRIE_NO_VALUE;
164
8.43k
            } else {
165
                // No match.
166
8.43k
                break;
167
8.43k
            }
168
45.3k
        } else if(node&kValueIsFinal) {
169
            // No further matching bytes.
170
0
            break;
171
45.3k
        } else {
172
            // Skip intermediate value.
173
45.3k
            pos=skipValue(pos, node);
174
            // The next node must not also be a value node.
175
45.3k
            U_ASSERT(*pos<kMinValueLead);
176
45.3k
        }
177
210k
    }
178
8.43k
    stop();
179
8.43k
    return USTRINGTRIE_NO_MATCH;
180
165k
}
181
182
UStringTrieResult
183
96.0k
BytesTrie::next(int32_t inByte) {
184
96.0k
    const uint8_t *pos=pos_;
185
96.0k
    if(pos==NULL) {
186
0
        return USTRINGTRIE_NO_MATCH;
187
0
    }
188
96.0k
    if(inByte<0) {
189
664
        inByte+=0x100;
190
664
    }
191
96.0k
    int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
192
96.0k
    if(length>=0) {
193
        // Remaining part of a linear-match node.
194
1.95k
        if(inByte==*pos++) {
195
541
            remainingMatchLength_=--length;
196
541
            pos_=pos;
197
541
            int32_t node;
198
541
            return (length<0 && (node=*pos)>=kMinValueLead) ?
199
324
                    valueResult(node) : USTRINGTRIE_NO_VALUE;
200
1.41k
        } else {
201
1.41k
            stop();
202
1.41k
            return USTRINGTRIE_NO_MATCH;
203
1.41k
        }
204
94.1k
    }
205
94.1k
    return nextImpl(pos, inByte);
206
94.1k
}
207
208
UStringTrieResult
209
0
BytesTrie::next(const char *s, int32_t sLength) {
210
0
    if(sLength<0 ? *s==0 : sLength==0) {
211
        // Empty input.
212
0
        return current();
213
0
    }
214
0
    const uint8_t *pos=pos_;
215
0
    if(pos==NULL) {
216
0
        return USTRINGTRIE_NO_MATCH;
217
0
    }
218
0
    int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
219
0
    for(;;) {
220
        // Fetch the next input byte, if there is one.
221
        // Continue a linear-match node without rechecking sLength<0.
222
0
        int32_t inByte;
223
0
        if(sLength<0) {
224
0
            for(;;) {
225
0
                if((inByte=*s++)==0) {
226
0
                    remainingMatchLength_=length;
227
0
                    pos_=pos;
228
0
                    int32_t node;
229
0
                    return (length<0 && (node=*pos)>=kMinValueLead) ?
230
0
                            valueResult(node) : USTRINGTRIE_NO_VALUE;
231
0
                }
232
0
                if(length<0) {
233
0
                    remainingMatchLength_=length;
234
0
                    break;
235
0
                }
236
0
                if(inByte!=*pos) {
237
0
                    stop();
238
0
                    return USTRINGTRIE_NO_MATCH;
239
0
                }
240
0
                ++pos;
241
0
                --length;
242
0
            }
243
0
        } else {
244
0
            for(;;) {
245
0
                if(sLength==0) {
246
0
                    remainingMatchLength_=length;
247
0
                    pos_=pos;
248
0
                    int32_t node;
249
0
                    return (length<0 && (node=*pos)>=kMinValueLead) ?
250
0
                            valueResult(node) : USTRINGTRIE_NO_VALUE;
251
0
                }
252
0
                inByte=*s++;
253
0
                --sLength;
254
0
                if(length<0) {
255
0
                    remainingMatchLength_=length;
256
0
                    break;
257
0
                }
258
0
                if(inByte!=*pos) {
259
0
                    stop();
260
0
                    return USTRINGTRIE_NO_MATCH;
261
0
                }
262
0
                ++pos;
263
0
                --length;
264
0
            }
265
0
        }
266
0
        for(;;) {
267
0
            int32_t node=*pos++;
268
0
            if(node<kMinLinearMatch) {
269
0
                UStringTrieResult result=branchNext(pos, node, inByte);
270
0
                if(result==USTRINGTRIE_NO_MATCH) {
271
0
                    return USTRINGTRIE_NO_MATCH;
272
0
                }
273
                // Fetch the next input byte, if there is one.
274
0
                if(sLength<0) {
275
0
                    if((inByte=*s++)==0) {
276
0
                        return result;
277
0
                    }
278
0
                } else {
279
0
                    if(sLength==0) {
280
0
                        return result;
281
0
                    }
282
0
                    inByte=*s++;
283
0
                    --sLength;
284
0
                }
285
0
                if(result==USTRINGTRIE_FINAL_VALUE) {
286
                    // No further matching bytes.
287
0
                    stop();
288
0
                    return USTRINGTRIE_NO_MATCH;
289
0
                }
290
0
                pos=pos_;  // branchNext() advanced pos and wrote it to pos_ .
291
0
            } else if(node<kMinValueLead) {
292
                // Match length+1 bytes.
293
0
                length=node-kMinLinearMatch;  // Actual match length minus 1.
294
0
                if(inByte!=*pos) {
295
0
                    stop();
296
0
                    return USTRINGTRIE_NO_MATCH;
297
0
                }
298
0
                ++pos;
299
0
                --length;
300
0
                break;
301
0
            } else if(node&kValueIsFinal) {
302
                // No further matching bytes.
303
0
                stop();
304
0
                return USTRINGTRIE_NO_MATCH;
305
0
            } else {
306
                // Skip intermediate value.
307
0
                pos=skipValue(pos, node);
308
                // The next node must not also be a value node.
309
0
                U_ASSERT(*pos<kMinValueLead);
310
0
            }
311
0
        }
312
0
    }
313
0
}
314
315
const uint8_t *
316
BytesTrie::findUniqueValueFromBranch(const uint8_t *pos, int32_t length,
317
0
                                     UBool haveUniqueValue, int32_t &uniqueValue) {
318
0
    while(length>kMaxBranchLinearSubNodeLength) {
319
0
        ++pos;  // ignore the comparison byte
320
0
        if(NULL==findUniqueValueFromBranch(jumpByDelta(pos), length>>1, haveUniqueValue, uniqueValue)) {
321
0
            return NULL;
322
0
        }
323
0
        length=length-(length>>1);
324
0
        pos=skipDelta(pos);
325
0
    }
326
0
    do {
327
0
        ++pos;  // ignore a comparison byte
328
        // handle its value
329
0
        int32_t node=*pos++;
330
0
        UBool isFinal=(UBool)(node&kValueIsFinal);
331
0
        int32_t value=readValue(pos, node>>1);
332
0
        pos=skipValue(pos, node);
333
0
        if(isFinal) {
334
0
            if(haveUniqueValue) {
335
0
                if(value!=uniqueValue) {
336
0
                    return NULL;
337
0
                }
338
0
            } else {
339
0
                uniqueValue=value;
340
0
                haveUniqueValue=TRUE;
341
0
            }
342
0
        } else {
343
0
            if(!findUniqueValue(pos+value, haveUniqueValue, uniqueValue)) {
344
0
                return NULL;
345
0
            }
346
0
            haveUniqueValue=TRUE;
347
0
        }
348
0
    } while(--length>1);
349
0
    return pos+1;  // ignore the last comparison byte
350
0
}
351
352
UBool
353
0
BytesTrie::findUniqueValue(const uint8_t *pos, UBool haveUniqueValue, int32_t &uniqueValue) {
354
0
    for(;;) {
355
0
        int32_t node=*pos++;
356
0
        if(node<kMinLinearMatch) {
357
0
            if(node==0) {
358
0
                node=*pos++;
359
0
            }
360
0
            pos=findUniqueValueFromBranch(pos, node+1, haveUniqueValue, uniqueValue);
361
0
            if(pos==NULL) {
362
0
                return FALSE;
363
0
            }
364
0
            haveUniqueValue=TRUE;
365
0
        } else if(node<kMinValueLead) {
366
            // linear-match node
367
0
            pos+=node-kMinLinearMatch+1;  // Ignore the match bytes.
368
0
        } else {
369
0
            UBool isFinal=(UBool)(node&kValueIsFinal);
370
0
            int32_t value=readValue(pos, node>>1);
371
0
            if(haveUniqueValue) {
372
0
                if(value!=uniqueValue) {
373
0
                    return FALSE;
374
0
                }
375
0
            } else {
376
0
                uniqueValue=value;
377
0
                haveUniqueValue=TRUE;
378
0
            }
379
0
            if(isFinal) {
380
0
                return TRUE;
381
0
            }
382
0
            pos=skipValue(pos, node);
383
0
        }
384
0
    }
385
0
}
386
387
int32_t
388
0
BytesTrie::getNextBytes(ByteSink &out) const {
389
0
    const uint8_t *pos=pos_;
390
0
    if(pos==NULL) {
391
0
        return 0;
392
0
    }
393
0
    if(remainingMatchLength_>=0) {
394
0
        append(out, *pos);  // Next byte of a pending linear-match node.
395
0
        return 1;
396
0
    }
397
0
    int32_t node=*pos++;
398
0
    if(node>=kMinValueLead) {
399
0
        if(node&kValueIsFinal) {
400
0
            return 0;
401
0
        } else {
402
0
            pos=skipValue(pos, node);
403
0
            node=*pos++;
404
0
            U_ASSERT(node<kMinValueLead);
405
0
        }
406
0
    }
407
0
    if(node<kMinLinearMatch) {
408
0
        if(node==0) {
409
0
            node=*pos++;
410
0
        }
411
0
        getNextBranchBytes(pos, ++node, out);
412
0
        return node;
413
0
    } else {
414
        // First byte of the linear-match node.
415
0
        append(out, *pos);
416
0
        return 1;
417
0
    }
418
0
}
419
420
void
421
0
BytesTrie::getNextBranchBytes(const uint8_t *pos, int32_t length, ByteSink &out) {
422
0
    while(length>kMaxBranchLinearSubNodeLength) {
423
0
        ++pos;  // ignore the comparison byte
424
0
        getNextBranchBytes(jumpByDelta(pos), length>>1, out);
425
0
        length=length-(length>>1);
426
0
        pos=skipDelta(pos);
427
0
    }
428
0
    do {
429
0
        append(out, *pos++);
430
0
        pos=skipValue(pos);
431
0
    } while(--length>1);
432
0
    append(out, *pos);
433
0
}
434
435
void
436
0
BytesTrie::append(ByteSink &out, int c) {
437
0
    char ch=(char)c;
438
0
    out.Append(&ch, 1);
439
0
}
440
441
U_NAMESPACE_END