Coverage Report

Created: 2021-08-22 09:07

/src/skia/third_party/externals/icu/source/common/dictionarydata.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
* Copyright (C) 2014-2016, International Business Machines
6
* Corporation and others.  All Rights Reserved.
7
*******************************************************************************
8
* dictionarydata.h
9
*
10
* created on: 2012may31
11
* created by: Markus W. Scherer & Maxime Serrano
12
*/
13
14
#include "dictionarydata.h"
15
#include "unicode/ucharstrie.h"
16
#include "unicode/bytestrie.h"
17
#include "unicode/udata.h"
18
#include "cmemory.h"
19
20
#if !UCONFIG_NO_BREAK_ITERATION
21
22
U_NAMESPACE_BEGIN
23
24
const int32_t  DictionaryData::TRIE_TYPE_BYTES = 0;
25
const int32_t  DictionaryData::TRIE_TYPE_UCHARS = 1;
26
const int32_t  DictionaryData::TRIE_TYPE_MASK = 7;
27
const int32_t  DictionaryData::TRIE_HAS_VALUES = 8;
28
29
const int32_t  DictionaryData::TRANSFORM_NONE = 0;
30
const int32_t  DictionaryData::TRANSFORM_TYPE_OFFSET = 0x1000000;
31
const int32_t  DictionaryData::TRANSFORM_TYPE_MASK = 0x7f000000;
32
const int32_t  DictionaryData::TRANSFORM_OFFSET_MASK = 0x1fffff;
33
    
34
0
DictionaryMatcher::~DictionaryMatcher() {
35
0
}
36
37
0
UCharsDictionaryMatcher::~UCharsDictionaryMatcher() {
38
0
    udata_close(file);
39
0
}
40
41
0
int32_t UCharsDictionaryMatcher::getType() const {
42
0
    return DictionaryData::TRIE_TYPE_UCHARS;
43
0
}
44
45
int32_t UCharsDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t limit,
46
                            int32_t *lengths, int32_t *cpLengths, int32_t *values,
47
0
                            int32_t *prefix) const {
48
49
0
    UCharsTrie uct(characters);
50
0
    int32_t startingTextIndex = (int32_t)utext_getNativeIndex(text);
51
0
    int32_t wordCount = 0;
52
0
    int32_t codePointsMatched = 0;
53
54
0
    for (UChar32 c = utext_next32(text); c >= 0; c=utext_next32(text)) {
55
0
        UStringTrieResult result = (codePointsMatched == 0) ? uct.first(c) : uct.next(c);
56
0
        int32_t lengthMatched = (int32_t)utext_getNativeIndex(text) - startingTextIndex;
57
0
        codePointsMatched += 1;
58
0
        if (USTRINGTRIE_HAS_VALUE(result)) {
59
0
            if (wordCount < limit) {
60
0
                if (values != NULL) {
61
0
                    values[wordCount] = uct.getValue();
62
0
                }
63
0
                if (lengths != NULL) {
64
0
                    lengths[wordCount] = lengthMatched;
65
0
                }
66
0
                if (cpLengths != NULL) {
67
0
                    cpLengths[wordCount] = codePointsMatched;
68
0
                }
69
0
                ++wordCount;
70
0
            }
71
0
            if (result == USTRINGTRIE_FINAL_VALUE) {
72
0
                break;
73
0
            }
74
0
        }
75
0
        else if (result == USTRINGTRIE_NO_MATCH) {
76
0
            break;
77
0
        }
78
0
        if (lengthMatched >= maxLength) {
79
0
            break;
80
0
        }
81
0
    }
82
83
0
    if (prefix != NULL) {
84
0
        *prefix = codePointsMatched;
85
0
    }
86
0
    return wordCount;
87
0
}
88
89
0
BytesDictionaryMatcher::~BytesDictionaryMatcher() {
90
0
    udata_close(file);
91
0
}
92
93
166k
UChar32 BytesDictionaryMatcher::transform(UChar32 c) const {
94
166k
    if ((transformConstant & DictionaryData::TRANSFORM_TYPE_MASK) == DictionaryData::TRANSFORM_TYPE_OFFSET) {
95
166k
        if (c == 0x200D) {
96
0
            return 0xFF;
97
166k
        } else if (c == 0x200C) {
98
0
            return 0xFE;
99
0
        }
100
166k
        int32_t delta = c - (transformConstant & DictionaryData::TRANSFORM_OFFSET_MASK);
101
166k
        if (delta < 0 || 0xFD < delta) {
102
1.01k
            return U_SENTINEL;
103
1.01k
        }
104
165k
        return (UChar32)delta;
105
165k
    }
106
0
    return c;
107
0
}
108
109
0
int32_t BytesDictionaryMatcher::getType() const {
110
0
    return DictionaryData::TRIE_TYPE_BYTES;
111
0
}
112
113
int32_t BytesDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t limit,
114
                            int32_t *lengths, int32_t *cpLengths, int32_t *values,
115
71.0k
                            int32_t *prefix) const {
116
71.0k
    BytesTrie bt(characters);
117
71.0k
    int32_t startingTextIndex = (int32_t)utext_getNativeIndex(text);
118
71.0k
    int32_t wordCount = 0;
119
71.0k
    int32_t codePointsMatched = 0;
120
121
166k
    for (UChar32 c = utext_next32(text); c >= 0; c=utext_next32(text)) {
122
95.8k
        UStringTrieResult result = (codePointsMatched == 0) ? bt.first(transform(c)) : bt.next(transform(c));
123
166k
        int32_t lengthMatched = (int32_t)utext_getNativeIndex(text) - startingTextIndex;
124
166k
        codePointsMatched += 1;
125
166k
        if (USTRINGTRIE_HAS_VALUE(result)) {
126
54.0k
            if (wordCount < limit) {
127
54.0k
                if (values != NULL) {
128
0
                    values[wordCount] = bt.getValue();
129
0
                }
130
54.0k
                if (lengths != NULL) {
131
54.0k
                    lengths[wordCount] = lengthMatched;
132
54.0k
                }
133
54.0k
                if (cpLengths != NULL) {
134
54.0k
                    cpLengths[wordCount] = codePointsMatched;
135
54.0k
                }
136
54.0k
                ++wordCount;
137
54.0k
            }
138
54.0k
            if (result == USTRINGTRIE_FINAL_VALUE) {
139
3.39k
                break;
140
3.39k
            }
141
112k
        }
142
112k
        else if (result == USTRINGTRIE_NO_MATCH) {
143
57.2k
            break;
144
57.2k
        }
145
106k
        if (lengthMatched >= maxLength) {
146
10.3k
            break;
147
10.3k
        }
148
106k
    }
149
150
71.0k
    if (prefix != NULL) {
151
71.0k
        *prefix = codePointsMatched;
152
71.0k
    }
153
71.0k
    return wordCount;
154
71.0k
}
155
156
157
U_NAMESPACE_END
158
159
U_NAMESPACE_USE
160
161
U_CAPI int32_t U_EXPORT2
162
udict_swap(const UDataSwapper *ds, const void *inData, int32_t length,
163
0
           void *outData, UErrorCode *pErrorCode) {
164
0
    const UDataInfo *pInfo;
165
0
    int32_t headerSize;
166
0
    const uint8_t *inBytes;
167
0
    uint8_t *outBytes;
168
0
    const int32_t *inIndexes;
169
0
    int32_t indexes[DictionaryData::IX_COUNT];
170
0
    int32_t i, offset, size;
171
172
0
    headerSize = udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
173
0
    if (pErrorCode == NULL || U_FAILURE(*pErrorCode)) return 0;
174
0
    pInfo = (const UDataInfo *)((const char *)inData + 4);
175
0
    if (!(pInfo->dataFormat[0] == 0x44 && 
176
0
          pInfo->dataFormat[1] == 0x69 && 
177
0
          pInfo->dataFormat[2] == 0x63 && 
178
0
          pInfo->dataFormat[3] == 0x74 && 
179
0
          pInfo->formatVersion[0] == 1)) {
180
0
        udata_printError(ds, "udict_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as dictionary data\n",
181
0
                         pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]);
182
0
        *pErrorCode = U_UNSUPPORTED_ERROR;
183
0
        return 0;
184
0
    }
185
186
0
    inBytes = (const uint8_t *)inData + headerSize;
187
0
    outBytes = (uint8_t *)outData + headerSize;
188
189
0
    inIndexes = (const int32_t *)inBytes;
190
0
    if (length >= 0) {
191
0
        length -= headerSize;
192
0
        if (length < (int32_t)(sizeof(indexes))) {
193
0
            udata_printError(ds, "udict_swap(): too few bytes (%d after header) for dictionary data\n", length);
194
0
            *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
195
0
            return 0;
196
0
        }
197
0
    }
198
199
0
    for (i = 0; i < DictionaryData::IX_COUNT; i++) {
200
0
        indexes[i] = udata_readInt32(ds, inIndexes[i]);
201
0
    }
202
203
0
    size = indexes[DictionaryData::IX_TOTAL_SIZE];
204
205
0
    if (length >= 0) {
206
0
        if (length < size) {
207
0
            udata_printError(ds, "udict_swap(): too few bytes (%d after header) for all of dictionary data\n", length);
208
0
            *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
209
0
            return 0;
210
0
        }
211
212
0
        if (inBytes != outBytes) {
213
0
            uprv_memcpy(outBytes, inBytes, size);
214
0
        }
215
216
0
        offset = 0;
217
0
        ds->swapArray32(ds, inBytes, sizeof(indexes), outBytes, pErrorCode);
218
0
        offset = (int32_t)sizeof(indexes);
219
0
        int32_t trieType = indexes[DictionaryData::IX_TRIE_TYPE] & DictionaryData::TRIE_TYPE_MASK;
220
0
        int32_t nextOffset = indexes[DictionaryData::IX_RESERVED1_OFFSET];
221
222
0
        if (trieType == DictionaryData::TRIE_TYPE_UCHARS) {
223
0
            ds->swapArray16(ds, inBytes + offset, nextOffset - offset, outBytes + offset, pErrorCode);
224
0
        } else if (trieType == DictionaryData::TRIE_TYPE_BYTES) {
225
            // nothing to do
226
0
        } else {
227
0
            udata_printError(ds, "udict_swap(): unknown trie type!\n");
228
0
            *pErrorCode = U_UNSUPPORTED_ERROR;
229
0
            return 0;
230
0
        }
231
232
        // these next two sections are empty in the current format,
233
        // but may be used later.
234
0
        offset = nextOffset;
235
0
        nextOffset = indexes[DictionaryData::IX_RESERVED2_OFFSET];
236
0
        offset = nextOffset;
237
0
        nextOffset = indexes[DictionaryData::IX_TOTAL_SIZE];
238
0
        offset = nextOffset;
239
0
    }
240
0
    return headerSize + size;
241
0
}
242
#endif