/src/icu/source/common/utrie_swap.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // © 2018 and later: Unicode, Inc. and others. |
2 | | // License & terms of use: http://www.unicode.org/copyright.html |
3 | | |
4 | | // utrie_swap.cpp |
5 | | // created: 2018aug08 Markus W. Scherer |
6 | | |
7 | | #include "unicode/utypes.h" |
8 | | #include "cmemory.h" |
9 | | #include "ucptrie_impl.h" |
10 | | #include "udataswp.h" |
11 | | #include "utrie.h" |
12 | | #include "utrie2_impl.h" |
13 | | |
14 | | // These functions for swapping different generations of ICU code point tries are here |
15 | | // so that their implementation files need not depend on swapper code, |
16 | | // need not depend on each other, and so that other swapper code |
17 | | // need not depend on other trie code. |
18 | | |
19 | | namespace { |
20 | | |
21 | | constexpr int32_t ASCII_LIMIT = 0x80; |
22 | | |
23 | | } // namespace |
24 | | |
25 | | U_CAPI int32_t U_EXPORT2 |
26 | | utrie_swap(const UDataSwapper *ds, |
27 | | const void *inData, int32_t length, void *outData, |
28 | 0 | UErrorCode *pErrorCode) { |
29 | 0 | const UTrieHeader *inTrie; |
30 | 0 | UTrieHeader trie; |
31 | 0 | int32_t size; |
32 | 0 | UBool dataIs32; |
33 | |
|
34 | 0 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
35 | 0 | return 0; |
36 | 0 | } |
37 | 0 | if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) { |
38 | 0 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
39 | 0 | return 0; |
40 | 0 | } |
41 | | |
42 | | /* setup and swapping */ |
43 | 0 | if(length>=0 && (uint32_t)length<sizeof(UTrieHeader)) { |
44 | 0 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
45 | 0 | return 0; |
46 | 0 | } |
47 | | |
48 | 0 | inTrie=(const UTrieHeader *)inData; |
49 | 0 | trie.signature=ds->readUInt32(inTrie->signature); |
50 | 0 | trie.options=ds->readUInt32(inTrie->options); |
51 | 0 | trie.indexLength=udata_readInt32(ds, inTrie->indexLength); |
52 | 0 | trie.dataLength=udata_readInt32(ds, inTrie->dataLength); |
53 | |
|
54 | 0 | if( trie.signature!=0x54726965 || |
55 | 0 | (trie.options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT || |
56 | 0 | ((trie.options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT || |
57 | 0 | trie.indexLength<UTRIE_BMP_INDEX_LENGTH || |
58 | 0 | (trie.indexLength&(UTRIE_SURROGATE_BLOCK_COUNT-1))!=0 || |
59 | 0 | trie.dataLength<UTRIE_DATA_BLOCK_LENGTH || |
60 | 0 | (trie.dataLength&(UTRIE_DATA_GRANULARITY-1))!=0 || |
61 | 0 | ((trie.options&UTRIE_OPTIONS_LATIN1_IS_LINEAR)!=0 && trie.dataLength<(UTRIE_DATA_BLOCK_LENGTH+0x100)) |
62 | 0 | ) { |
63 | 0 | *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */ |
64 | 0 | return 0; |
65 | 0 | } |
66 | | |
67 | 0 | dataIs32=(UBool)((trie.options&UTRIE_OPTIONS_DATA_IS_32_BIT)!=0); |
68 | 0 | size=sizeof(UTrieHeader)+trie.indexLength*2+trie.dataLength*(dataIs32?4:2); |
69 | |
|
70 | 0 | if(length>=0) { |
71 | 0 | UTrieHeader *outTrie; |
72 | |
|
73 | 0 | if(length<size) { |
74 | 0 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
75 | 0 | return 0; |
76 | 0 | } |
77 | | |
78 | 0 | outTrie=(UTrieHeader *)outData; |
79 | | |
80 | | /* swap the header */ |
81 | 0 | ds->swapArray32(ds, inTrie, sizeof(UTrieHeader), outTrie, pErrorCode); |
82 | | |
83 | | /* swap the index and the data */ |
84 | 0 | if(dataIs32) { |
85 | 0 | ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); |
86 | 0 | ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, trie.dataLength*4, |
87 | 0 | (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode); |
88 | 0 | } else { |
89 | 0 | ds->swapArray16(ds, inTrie+1, (trie.indexLength+trie.dataLength)*2, outTrie+1, pErrorCode); |
90 | 0 | } |
91 | 0 | } |
92 | | |
93 | 0 | return size; |
94 | 0 | } |
95 | | |
96 | | U_CAPI int32_t U_EXPORT2 |
97 | | utrie2_swap(const UDataSwapper *ds, |
98 | | const void *inData, int32_t length, void *outData, |
99 | 0 | UErrorCode *pErrorCode) { |
100 | 0 | const UTrie2Header *inTrie; |
101 | 0 | UTrie2Header trie; |
102 | 0 | int32_t dataLength, size; |
103 | 0 | UTrie2ValueBits valueBits; |
104 | |
|
105 | 0 | if(U_FAILURE(*pErrorCode)) { |
106 | 0 | return 0; |
107 | 0 | } |
108 | 0 | if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) { |
109 | 0 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
110 | 0 | return 0; |
111 | 0 | } |
112 | | |
113 | | /* setup and swapping */ |
114 | 0 | if(length>=0 && length<(int32_t)sizeof(UTrie2Header)) { |
115 | 0 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
116 | 0 | return 0; |
117 | 0 | } |
118 | | |
119 | 0 | inTrie=(const UTrie2Header *)inData; |
120 | 0 | trie.signature=ds->readUInt32(inTrie->signature); |
121 | 0 | trie.options=ds->readUInt16(inTrie->options); |
122 | 0 | trie.indexLength=ds->readUInt16(inTrie->indexLength); |
123 | 0 | trie.shiftedDataLength=ds->readUInt16(inTrie->shiftedDataLength); |
124 | |
|
125 | 0 | valueBits=(UTrie2ValueBits)(trie.options&UTRIE2_OPTIONS_VALUE_BITS_MASK); |
126 | 0 | dataLength=(int32_t)trie.shiftedDataLength<<UTRIE2_INDEX_SHIFT; |
127 | |
|
128 | 0 | if( trie.signature!=UTRIE2_SIG || |
129 | 0 | valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits || |
130 | 0 | trie.indexLength<UTRIE2_INDEX_1_OFFSET || |
131 | 0 | dataLength<UTRIE2_DATA_START_OFFSET |
132 | 0 | ) { |
133 | 0 | *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */ |
134 | 0 | return 0; |
135 | 0 | } |
136 | | |
137 | 0 | size=sizeof(UTrie2Header)+trie.indexLength*2; |
138 | 0 | switch(valueBits) { |
139 | 0 | case UTRIE2_16_VALUE_BITS: |
140 | 0 | size+=dataLength*2; |
141 | 0 | break; |
142 | 0 | case UTRIE2_32_VALUE_BITS: |
143 | 0 | size+=dataLength*4; |
144 | 0 | break; |
145 | 0 | default: |
146 | 0 | *pErrorCode=U_INVALID_FORMAT_ERROR; |
147 | 0 | return 0; |
148 | 0 | } |
149 | | |
150 | 0 | if(length>=0) { |
151 | 0 | UTrie2Header *outTrie; |
152 | |
|
153 | 0 | if(length<size) { |
154 | 0 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
155 | 0 | return 0; |
156 | 0 | } |
157 | | |
158 | 0 | outTrie=(UTrie2Header *)outData; |
159 | | |
160 | | /* swap the header */ |
161 | 0 | ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode); |
162 | 0 | ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode); |
163 | | |
164 | | /* swap the index and the data */ |
165 | 0 | switch(valueBits) { |
166 | 0 | case UTRIE2_16_VALUE_BITS: |
167 | 0 | ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode); |
168 | 0 | break; |
169 | 0 | case UTRIE2_32_VALUE_BITS: |
170 | 0 | ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); |
171 | 0 | ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4, |
172 | 0 | (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode); |
173 | 0 | break; |
174 | 0 | default: |
175 | 0 | *pErrorCode=U_INVALID_FORMAT_ERROR; |
176 | 0 | return 0; |
177 | 0 | } |
178 | 0 | } |
179 | | |
180 | 0 | return size; |
181 | 0 | } |
182 | | |
183 | | U_CAPI int32_t U_EXPORT2 |
184 | | ucptrie_swap(const UDataSwapper *ds, |
185 | | const void *inData, int32_t length, void *outData, |
186 | 0 | UErrorCode *pErrorCode) { |
187 | 0 | const UCPTrieHeader *inTrie; |
188 | 0 | UCPTrieHeader trie; |
189 | 0 | int32_t dataLength, size; |
190 | 0 | UCPTrieValueWidth valueWidth; |
191 | |
|
192 | 0 | if(U_FAILURE(*pErrorCode)) { |
193 | 0 | return 0; |
194 | 0 | } |
195 | 0 | if(ds==nullptr || inData==nullptr || (length>=0 && outData==nullptr)) { |
196 | 0 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
197 | 0 | return 0; |
198 | 0 | } |
199 | | |
200 | | /* setup and swapping */ |
201 | 0 | if(length>=0 && length<(int32_t)sizeof(UCPTrieHeader)) { |
202 | 0 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
203 | 0 | return 0; |
204 | 0 | } |
205 | | |
206 | 0 | inTrie=(const UCPTrieHeader *)inData; |
207 | 0 | trie.signature=ds->readUInt32(inTrie->signature); |
208 | 0 | trie.options=ds->readUInt16(inTrie->options); |
209 | 0 | trie.indexLength=ds->readUInt16(inTrie->indexLength); |
210 | 0 | trie.dataLength = ds->readUInt16(inTrie->dataLength); |
211 | |
|
212 | 0 | UCPTrieType type = (UCPTrieType)((trie.options >> 6) & 3); |
213 | 0 | valueWidth = (UCPTrieValueWidth)(trie.options & UCPTRIE_OPTIONS_VALUE_BITS_MASK); |
214 | 0 | dataLength = ((int32_t)(trie.options & UCPTRIE_OPTIONS_DATA_LENGTH_MASK) << 4) | trie.dataLength; |
215 | |
|
216 | 0 | int32_t minIndexLength = type == UCPTRIE_TYPE_FAST ? |
217 | 0 | UCPTRIE_BMP_INDEX_LENGTH : UCPTRIE_SMALL_INDEX_LENGTH; |
218 | 0 | if( trie.signature!=UCPTRIE_SIG || |
219 | 0 | type > UCPTRIE_TYPE_SMALL || |
220 | 0 | (trie.options & UCPTRIE_OPTIONS_RESERVED_MASK) != 0 || |
221 | 0 | valueWidth > UCPTRIE_VALUE_BITS_8 || |
222 | 0 | trie.indexLength < minIndexLength || |
223 | 0 | dataLength < ASCII_LIMIT |
224 | 0 | ) { |
225 | 0 | *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UCPTrie */ |
226 | 0 | return 0; |
227 | 0 | } |
228 | | |
229 | 0 | size=sizeof(UCPTrieHeader)+trie.indexLength*2; |
230 | 0 | switch(valueWidth) { |
231 | 0 | case UCPTRIE_VALUE_BITS_16: |
232 | 0 | size+=dataLength*2; |
233 | 0 | break; |
234 | 0 | case UCPTRIE_VALUE_BITS_32: |
235 | 0 | size+=dataLength*4; |
236 | 0 | break; |
237 | 0 | case UCPTRIE_VALUE_BITS_8: |
238 | 0 | size+=dataLength; |
239 | 0 | break; |
240 | 0 | default: |
241 | 0 | *pErrorCode=U_INVALID_FORMAT_ERROR; |
242 | 0 | return 0; |
243 | 0 | } |
244 | | |
245 | 0 | if(length>=0) { |
246 | 0 | UCPTrieHeader *outTrie; |
247 | |
|
248 | 0 | if(length<size) { |
249 | 0 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
250 | 0 | return 0; |
251 | 0 | } |
252 | | |
253 | 0 | outTrie=(UCPTrieHeader *)outData; |
254 | | |
255 | | /* swap the header */ |
256 | 0 | ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode); |
257 | 0 | ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode); |
258 | | |
259 | | /* swap the index */ |
260 | 0 | const uint16_t *inIndex=reinterpret_cast<const uint16_t *>(inTrie+1); |
261 | 0 | uint16_t *outIndex=reinterpret_cast<uint16_t *>(outTrie+1); |
262 | 0 | ds->swapArray16(ds, inIndex, trie.indexLength*2, outIndex, pErrorCode); |
263 | | |
264 | | /* swap the data */ |
265 | 0 | const uint16_t *inData=inIndex+trie.indexLength; |
266 | 0 | uint16_t *outData=outIndex+trie.indexLength; |
267 | 0 | switch(valueWidth) { |
268 | 0 | case UCPTRIE_VALUE_BITS_16: |
269 | 0 | ds->swapArray16(ds, inData, dataLength*2, outData, pErrorCode); |
270 | 0 | break; |
271 | 0 | case UCPTRIE_VALUE_BITS_32: |
272 | 0 | ds->swapArray32(ds, inData, dataLength*4, outData, pErrorCode); |
273 | 0 | break; |
274 | 0 | case UCPTRIE_VALUE_BITS_8: |
275 | 0 | if(inTrie!=outTrie) { |
276 | 0 | uprv_memmove(outData, inData, dataLength); |
277 | 0 | } |
278 | 0 | break; |
279 | 0 | default: |
280 | 0 | *pErrorCode=U_INVALID_FORMAT_ERROR; |
281 | 0 | return 0; |
282 | 0 | } |
283 | 0 | } |
284 | | |
285 | 0 | return size; |
286 | 0 | } |
287 | | |
288 | | namespace { |
289 | | |
290 | | /** |
291 | | * Gets the trie version from 32-bit-aligned memory containing the serialized form |
292 | | * of a UTrie (version 1), a UTrie2 (version 2), or a UCPTrie (version 3). |
293 | | * |
294 | | * @param data a pointer to 32-bit-aligned memory containing the serialized form of a trie |
295 | | * @param length the number of bytes available at data; |
296 | | * can be more than necessary (see return value) |
297 | | * @param anyEndianOk If FALSE, only platform-endian serialized forms are recognized. |
298 | | * If TRUE, opposite-endian serialized forms are recognized as well. |
299 | | * @return the trie version of the serialized form, or 0 if it is not |
300 | | * recognized as a serialized trie |
301 | | */ |
302 | | int32_t |
303 | 0 | getVersion(const void *data, int32_t length, UBool anyEndianOk) { |
304 | 0 | uint32_t signature; |
305 | 0 | if(length<16 || data==nullptr || (U_POINTER_MASK_LSB(data, 3)!=0)) { |
306 | 0 | return 0; |
307 | 0 | } |
308 | 0 | signature=*(const uint32_t *)data; |
309 | 0 | if(signature==UCPTRIE_SIG) { |
310 | 0 | return 3; |
311 | 0 | } |
312 | 0 | if(anyEndianOk && signature==UCPTRIE_OE_SIG) { |
313 | 0 | return 3; |
314 | 0 | } |
315 | 0 | if(signature==UTRIE2_SIG) { |
316 | 0 | return 2; |
317 | 0 | } |
318 | 0 | if(anyEndianOk && signature==UTRIE2_OE_SIG) { |
319 | 0 | return 2; |
320 | 0 | } |
321 | 0 | if(signature==UTRIE_SIG) { |
322 | 0 | return 1; |
323 | 0 | } |
324 | 0 | if(anyEndianOk && signature==UTRIE_OE_SIG) { |
325 | 0 | return 1; |
326 | 0 | } |
327 | 0 | return 0; |
328 | 0 | } |
329 | | |
330 | | } // namespace |
331 | | |
332 | | U_CAPI int32_t U_EXPORT2 |
333 | | utrie_swapAnyVersion(const UDataSwapper *ds, |
334 | | const void *inData, int32_t length, void *outData, |
335 | 0 | UErrorCode *pErrorCode) { |
336 | 0 | if(U_FAILURE(*pErrorCode)) { return 0; } |
337 | 0 | switch(getVersion(inData, length, TRUE)) { |
338 | 0 | case 1: |
339 | 0 | return utrie_swap(ds, inData, length, outData, pErrorCode); |
340 | 0 | case 2: |
341 | 0 | return utrie2_swap(ds, inData, length, outData, pErrorCode); |
342 | 0 | case 3: |
343 | 0 | return ucptrie_swap(ds, inData, length, outData, pErrorCode); |
344 | 0 | default: |
345 | 0 | *pErrorCode=U_INVALID_FORMAT_ERROR; |
346 | 0 | return 0; |
347 | 0 | } |
348 | 0 | } |