/src/icu/source/common/ucnv_ext.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // © 2016 and later: Unicode, Inc. and others. |
2 | | // License & terms of use: http://www.unicode.org/copyright.html |
3 | | /* |
4 | | ****************************************************************************** |
5 | | * |
6 | | * Copyright (C) 2003-2016, International Business Machines |
7 | | * Corporation and others. All Rights Reserved. |
8 | | * |
9 | | ****************************************************************************** |
10 | | * file name: ucnv_ext.cpp |
11 | | * encoding: UTF-8 |
12 | | * tab size: 8 (not used) |
13 | | * indentation:4 |
14 | | * |
15 | | * created on: 2003jun13 |
16 | | * created by: Markus W. Scherer |
17 | | * |
18 | | * Conversion extensions |
19 | | */ |
20 | | |
21 | | #include "unicode/utypes.h" |
22 | | |
23 | | #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION |
24 | | |
25 | | #include "unicode/uset.h" |
26 | | #include "unicode/ustring.h" |
27 | | #include "ucnv_bld.h" |
28 | | #include "ucnv_cnv.h" |
29 | | #include "ucnv_ext.h" |
30 | | #include "cmemory.h" |
31 | | #include "uassert.h" |
32 | | |
33 | | /* to Unicode --------------------------------------------------------------- */ |
34 | | |
35 | | /* |
36 | | * @return lookup value for the byte, if found; else 0 |
37 | | */ |
38 | | static inline uint32_t |
39 | 0 | ucnv_extFindToU(const uint32_t *toUSection, int32_t length, uint8_t byte) { |
40 | 0 | uint32_t word0, word; |
41 | 0 | int32_t i, start, limit; |
42 | | |
43 | | /* check the input byte against the lowest and highest section bytes */ |
44 | 0 | start=(int32_t)UCNV_EXT_TO_U_GET_BYTE(toUSection[0]); |
45 | 0 | limit=(int32_t)UCNV_EXT_TO_U_GET_BYTE(toUSection[length-1]); |
46 | 0 | if(byte<start || limit<byte) { |
47 | 0 | return 0; /* the byte is out of range */ |
48 | 0 | } |
49 | | |
50 | 0 | if(length==((limit-start)+1)) { |
51 | | /* direct access on a linear array */ |
52 | 0 | return UCNV_EXT_TO_U_GET_VALUE(toUSection[byte-start]); /* could be 0 */ |
53 | 0 | } |
54 | | |
55 | | /* word0 is suitable for <=toUSection[] comparison, word for <toUSection[] */ |
56 | 0 | word0=UCNV_EXT_TO_U_MAKE_WORD(byte, 0); |
57 | | |
58 | | /* |
59 | | * Shift byte once instead of each section word and add 0xffffff. |
60 | | * We will compare the shifted/added byte (bbffffff) against |
61 | | * section words which have byte values in the same bit position. |
62 | | * If and only if byte bb < section byte ss then bbffffff<ssvvvvvv |
63 | | * for all v=0..f |
64 | | * so we need not mask off the lower 24 bits of each section word. |
65 | | */ |
66 | 0 | word=word0|UCNV_EXT_TO_U_VALUE_MASK; |
67 | | |
68 | | /* binary search */ |
69 | 0 | start=0; |
70 | 0 | limit=length; |
71 | 0 | for(;;) { |
72 | 0 | i=limit-start; |
73 | 0 | if(i<=1) { |
74 | 0 | break; /* done */ |
75 | 0 | } |
76 | | /* start<limit-1 */ |
77 | | |
78 | 0 | if(i<=4) { |
79 | | /* linear search for the last part */ |
80 | 0 | if(word0<=toUSection[start]) { |
81 | 0 | break; |
82 | 0 | } |
83 | 0 | if(++start<limit && word0<=toUSection[start]) { |
84 | 0 | break; |
85 | 0 | } |
86 | 0 | if(++start<limit && word0<=toUSection[start]) { |
87 | 0 | break; |
88 | 0 | } |
89 | | /* always break at start==limit-1 */ |
90 | 0 | ++start; |
91 | 0 | break; |
92 | 0 | } |
93 | | |
94 | 0 | i=(start+limit)/2; |
95 | 0 | if(word<toUSection[i]) { |
96 | 0 | limit=i; |
97 | 0 | } else { |
98 | 0 | start=i; |
99 | 0 | } |
100 | 0 | } |
101 | | |
102 | | /* did we really find it? */ |
103 | 0 | if(start<limit && byte==UCNV_EXT_TO_U_GET_BYTE(word=toUSection[start])) { |
104 | 0 | return UCNV_EXT_TO_U_GET_VALUE(word); /* never 0 */ |
105 | 0 | } else { |
106 | 0 | return 0; /* not found */ |
107 | 0 | } |
108 | 0 | } |
109 | | |
110 | | /* |
111 | | * TRUE if not an SI/SO stateful converter, |
112 | | * or if the match length fits with the current converter state |
113 | | */ |
114 | | #define UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, match) \ |
115 | 0 | ((sisoState)<0 || ((sisoState)==0) == (match==1)) |
116 | | |
117 | | /* |
118 | | * this works like ucnv_extMatchFromU() except |
119 | | * - the first character is in pre |
120 | | * - no trie is used |
121 | | * - the returned matchLength is not offset by 2 |
122 | | */ |
123 | | static int32_t |
124 | | ucnv_extMatchToU(const int32_t *cx, int8_t sisoState, |
125 | | const char *pre, int32_t preLength, |
126 | | const char *src, int32_t srcLength, |
127 | | uint32_t *pMatchValue, |
128 | 0 | UBool /*useFallback*/, UBool flush) { |
129 | 0 | const uint32_t *toUTable, *toUSection; |
130 | |
|
131 | 0 | uint32_t value, matchValue; |
132 | 0 | int32_t i, j, idx, length, matchLength; |
133 | 0 | uint8_t b; |
134 | |
|
135 | 0 | if(cx==NULL || cx[UCNV_EXT_TO_U_LENGTH]<=0) { |
136 | 0 | return 0; /* no extension data, no match */ |
137 | 0 | } |
138 | | |
139 | | /* initialize */ |
140 | 0 | toUTable=UCNV_EXT_ARRAY(cx, UCNV_EXT_TO_U_INDEX, uint32_t); |
141 | 0 | idx=0; |
142 | |
|
143 | 0 | matchValue=0; |
144 | 0 | i=j=matchLength=0; |
145 | |
|
146 | 0 | if(sisoState==0) { |
147 | | /* SBCS state of an SI/SO stateful converter, look at only exactly 1 byte */ |
148 | 0 | if(preLength>1) { |
149 | 0 | return 0; /* no match of a DBCS sequence in SBCS mode */ |
150 | 0 | } else if(preLength==1) { |
151 | 0 | srcLength=0; |
152 | 0 | } else /* preLength==0 */ { |
153 | 0 | if(srcLength>1) { |
154 | 0 | srcLength=1; |
155 | 0 | } |
156 | 0 | } |
157 | 0 | flush=TRUE; |
158 | 0 | } |
159 | | |
160 | | /* we must not remember fallback matches when not using fallbacks */ |
161 | | |
162 | | /* match input units until there is a full match or the input is consumed */ |
163 | 0 | for(;;) { |
164 | | /* go to the next section */ |
165 | 0 | toUSection=toUTable+idx; |
166 | | |
167 | | /* read first pair of the section */ |
168 | 0 | value=*toUSection++; |
169 | 0 | length=UCNV_EXT_TO_U_GET_BYTE(value); |
170 | 0 | value=UCNV_EXT_TO_U_GET_VALUE(value); |
171 | 0 | if( value!=0 && |
172 | 0 | (UCNV_EXT_TO_U_IS_ROUNDTRIP(value) || |
173 | 0 | TO_U_USE_FALLBACK(useFallback)) && |
174 | 0 | UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, i+j) |
175 | 0 | ) { |
176 | | /* remember longest match so far */ |
177 | 0 | matchValue=value; |
178 | 0 | matchLength=i+j; |
179 | 0 | } |
180 | | |
181 | | /* match pre[] then src[] */ |
182 | 0 | if(i<preLength) { |
183 | 0 | b=(uint8_t)pre[i++]; |
184 | 0 | } else if(j<srcLength) { |
185 | 0 | b=(uint8_t)src[j++]; |
186 | 0 | } else { |
187 | | /* all input consumed, partial match */ |
188 | 0 | if(flush || (length=(i+j))>UCNV_EXT_MAX_BYTES) { |
189 | | /* |
190 | | * end of the entire input stream, stop with the longest match so far |
191 | | * or: partial match must not be longer than UCNV_EXT_MAX_BYTES |
192 | | * because it must fit into state buffers |
193 | | */ |
194 | 0 | break; |
195 | 0 | } else { |
196 | | /* continue with more input next time */ |
197 | 0 | return -length; |
198 | 0 | } |
199 | 0 | } |
200 | | |
201 | | /* search for the current UChar */ |
202 | 0 | value=ucnv_extFindToU(toUSection, length, b); |
203 | 0 | if(value==0) { |
204 | | /* no match here, stop with the longest match so far */ |
205 | 0 | break; |
206 | 0 | } else { |
207 | 0 | if(UCNV_EXT_TO_U_IS_PARTIAL(value)) { |
208 | | /* partial match, continue */ |
209 | 0 | idx=(int32_t)UCNV_EXT_TO_U_GET_PARTIAL_INDEX(value); |
210 | 0 | } else { |
211 | 0 | if( (UCNV_EXT_TO_U_IS_ROUNDTRIP(value) || |
212 | 0 | TO_U_USE_FALLBACK(useFallback)) && |
213 | 0 | UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, i+j) |
214 | 0 | ) { |
215 | | /* full match, stop with result */ |
216 | 0 | matchValue=value; |
217 | 0 | matchLength=i+j; |
218 | 0 | } else { |
219 | | /* full match on fallback not taken, stop with the longest match so far */ |
220 | 0 | } |
221 | 0 | break; |
222 | 0 | } |
223 | 0 | } |
224 | 0 | } |
225 | | |
226 | 0 | if(matchLength==0) { |
227 | | /* no match at all */ |
228 | 0 | return 0; |
229 | 0 | } |
230 | | |
231 | | /* return result */ |
232 | 0 | *pMatchValue=UCNV_EXT_TO_U_MASK_ROUNDTRIP(matchValue); |
233 | 0 | return matchLength; |
234 | 0 | } |
235 | | |
236 | | static inline void |
237 | | ucnv_extWriteToU(UConverter *cnv, const int32_t *cx, |
238 | | uint32_t value, |
239 | | UChar **target, const UChar *targetLimit, |
240 | | int32_t **offsets, int32_t srcIndex, |
241 | 0 | UErrorCode *pErrorCode) { |
242 | | /* output the result */ |
243 | 0 | if(UCNV_EXT_TO_U_IS_CODE_POINT(value)) { |
244 | | /* output a single code point */ |
245 | 0 | ucnv_toUWriteCodePoint( |
246 | 0 | cnv, UCNV_EXT_TO_U_GET_CODE_POINT(value), |
247 | 0 | target, targetLimit, |
248 | 0 | offsets, srcIndex, |
249 | 0 | pErrorCode); |
250 | 0 | } else { |
251 | | /* output a string - with correct data we have resultLength>0 */ |
252 | 0 | ucnv_toUWriteUChars( |
253 | 0 | cnv, |
254 | 0 | UCNV_EXT_ARRAY(cx, UCNV_EXT_TO_U_UCHARS_INDEX, UChar)+ |
255 | 0 | UCNV_EXT_TO_U_GET_INDEX(value), |
256 | 0 | UCNV_EXT_TO_U_GET_LENGTH(value), |
257 | 0 | target, targetLimit, |
258 | 0 | offsets, srcIndex, |
259 | 0 | pErrorCode); |
260 | 0 | } |
261 | 0 | } |
262 | | |
263 | | /* |
264 | | * get the SI/SO toU state (state 0 is for SBCS, 1 for DBCS), |
265 | | * or 1 for DBCS-only, |
266 | | * or -1 if the converter is not SI/SO stateful |
267 | | * |
268 | | * Note: For SI/SO stateful converters getting here, |
269 | | * cnv->mode==0 is equivalent to firstLength==1. |
270 | | */ |
271 | | #define UCNV_SISO_STATE(cnv) \ |
272 | 0 | ((cnv)->sharedData->mbcs.outputType==MBCS_OUTPUT_2_SISO ? (int8_t)(cnv)->mode : \ |
273 | 0 | (cnv)->sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ? 1 : -1) |
274 | | |
275 | | /* |
276 | | * target<targetLimit; set error code for overflow |
277 | | */ |
278 | | U_CFUNC UBool |
279 | | ucnv_extInitialMatchToU(UConverter *cnv, const int32_t *cx, |
280 | | int32_t firstLength, |
281 | | const char **src, const char *srcLimit, |
282 | | UChar **target, const UChar *targetLimit, |
283 | | int32_t **offsets, int32_t srcIndex, |
284 | | UBool flush, |
285 | 0 | UErrorCode *pErrorCode) { |
286 | 0 | uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ |
287 | 0 | int32_t match; |
288 | | |
289 | | /* try to match */ |
290 | 0 | match=ucnv_extMatchToU(cx, (int8_t)UCNV_SISO_STATE(cnv), |
291 | 0 | (const char *)cnv->toUBytes, firstLength, |
292 | 0 | *src, (int32_t)(srcLimit-*src), |
293 | 0 | &value, |
294 | 0 | cnv->useFallback, flush); |
295 | 0 | if(match>0) { |
296 | | /* advance src pointer for the consumed input */ |
297 | 0 | *src+=match-firstLength; |
298 | | |
299 | | /* write result to target */ |
300 | 0 | ucnv_extWriteToU(cnv, cx, |
301 | 0 | value, |
302 | 0 | target, targetLimit, |
303 | 0 | offsets, srcIndex, |
304 | 0 | pErrorCode); |
305 | 0 | return TRUE; |
306 | 0 | } else if(match<0) { |
307 | | /* save state for partial match */ |
308 | 0 | const char *s; |
309 | 0 | int32_t j; |
310 | | |
311 | | /* copy the first code point */ |
312 | 0 | s=(const char *)cnv->toUBytes; |
313 | 0 | cnv->preToUFirstLength=(int8_t)firstLength; |
314 | 0 | for(j=0; j<firstLength; ++j) { |
315 | 0 | cnv->preToU[j]=*s++; |
316 | 0 | } |
317 | | |
318 | | /* now copy the newly consumed input */ |
319 | 0 | s=*src; |
320 | 0 | match=-match; |
321 | 0 | for(; j<match; ++j) { |
322 | 0 | cnv->preToU[j]=*s++; |
323 | 0 | } |
324 | 0 | *src=s; /* same as *src=srcLimit; because we reached the end of input */ |
325 | 0 | cnv->preToULength=(int8_t)match; |
326 | 0 | return TRUE; |
327 | 0 | } else /* match==0 no match */ { |
328 | 0 | return FALSE; |
329 | 0 | } |
330 | 0 | } |
331 | | |
332 | | U_CFUNC UChar32 |
333 | | ucnv_extSimpleMatchToU(const int32_t *cx, |
334 | | const char *source, int32_t length, |
335 | 0 | UBool useFallback) { |
336 | 0 | uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ |
337 | 0 | int32_t match; |
338 | |
|
339 | 0 | if(length<=0) { |
340 | 0 | return 0xffff; |
341 | 0 | } |
342 | | |
343 | | /* try to match */ |
344 | 0 | match=ucnv_extMatchToU(cx, -1, |
345 | 0 | source, length, |
346 | 0 | NULL, 0, |
347 | 0 | &value, |
348 | 0 | useFallback, TRUE); |
349 | 0 | if(match==length) { |
350 | | /* write result for simple, single-character conversion */ |
351 | 0 | if(UCNV_EXT_TO_U_IS_CODE_POINT(value)) { |
352 | 0 | return UCNV_EXT_TO_U_GET_CODE_POINT(value); |
353 | 0 | } |
354 | 0 | } |
355 | | |
356 | | /* |
357 | | * return no match because |
358 | | * - match>0 && value points to string: simple conversion cannot handle multiple code points |
359 | | * - match>0 && match!=length: not all input consumed, forbidden for this function |
360 | | * - match==0: no match found in the first place |
361 | | * - match<0: partial match, not supported for simple conversion (and flush==TRUE) |
362 | | */ |
363 | 0 | return 0xfffe; |
364 | 0 | } |
365 | | |
366 | | /* |
367 | | * continue partial match with new input |
368 | | * never called for simple, single-character conversion |
369 | | */ |
370 | | U_CFUNC void |
371 | | ucnv_extContinueMatchToU(UConverter *cnv, |
372 | | UConverterToUnicodeArgs *pArgs, int32_t srcIndex, |
373 | 0 | UErrorCode *pErrorCode) { |
374 | 0 | uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ |
375 | 0 | int32_t match, length; |
376 | |
|
377 | 0 | match=ucnv_extMatchToU(cnv->sharedData->mbcs.extIndexes, (int8_t)UCNV_SISO_STATE(cnv), |
378 | 0 | cnv->preToU, cnv->preToULength, |
379 | 0 | pArgs->source, (int32_t)(pArgs->sourceLimit-pArgs->source), |
380 | 0 | &value, |
381 | 0 | cnv->useFallback, pArgs->flush); |
382 | 0 | if(match>0) { |
383 | 0 | if(match>=cnv->preToULength) { |
384 | | /* advance src pointer for the consumed input */ |
385 | 0 | pArgs->source+=match-cnv->preToULength; |
386 | 0 | cnv->preToULength=0; |
387 | 0 | } else { |
388 | | /* the match did not use all of preToU[] - keep the rest for replay */ |
389 | 0 | length=cnv->preToULength-match; |
390 | 0 | uprv_memmove(cnv->preToU, cnv->preToU+match, length); |
391 | 0 | cnv->preToULength=(int8_t)-length; |
392 | 0 | } |
393 | | |
394 | | /* write result */ |
395 | 0 | ucnv_extWriteToU(cnv, cnv->sharedData->mbcs.extIndexes, |
396 | 0 | value, |
397 | 0 | &pArgs->target, pArgs->targetLimit, |
398 | 0 | &pArgs->offsets, srcIndex, |
399 | 0 | pErrorCode); |
400 | 0 | } else if(match<0) { |
401 | | /* save state for partial match */ |
402 | 0 | const char *s; |
403 | 0 | int32_t j; |
404 | | |
405 | | /* just _append_ the newly consumed input to preToU[] */ |
406 | 0 | s=pArgs->source; |
407 | 0 | match=-match; |
408 | 0 | for(j=cnv->preToULength; j<match; ++j) { |
409 | 0 | cnv->preToU[j]=*s++; |
410 | 0 | } |
411 | 0 | pArgs->source=s; /* same as *src=srcLimit; because we reached the end of input */ |
412 | 0 | cnv->preToULength=(int8_t)match; |
413 | 0 | } else /* match==0 */ { |
414 | | /* |
415 | | * no match |
416 | | * |
417 | | * We need to split the previous input into two parts: |
418 | | * |
419 | | * 1. The first codepage character is unmappable - that's how we got into |
420 | | * trying the extension data in the first place. |
421 | | * We need to move it from the preToU buffer |
422 | | * to the error buffer, set an error code, |
423 | | * and prepare the rest of the previous input for 2. |
424 | | * |
425 | | * 2. The rest of the previous input must be converted once we |
426 | | * come back from the callback for the first character. |
427 | | * At that time, we have to try again from scratch to convert |
428 | | * these input characters. |
429 | | * The replay will be handled by the ucnv.c conversion code. |
430 | | */ |
431 | | |
432 | | /* move the first codepage character to the error field */ |
433 | 0 | uprv_memcpy(cnv->toUBytes, cnv->preToU, cnv->preToUFirstLength); |
434 | 0 | cnv->toULength=cnv->preToUFirstLength; |
435 | | |
436 | | /* move the rest up inside the buffer */ |
437 | 0 | length=cnv->preToULength-cnv->preToUFirstLength; |
438 | 0 | if(length>0) { |
439 | 0 | uprv_memmove(cnv->preToU, cnv->preToU+cnv->preToUFirstLength, length); |
440 | 0 | } |
441 | | |
442 | | /* mark preToU for replay */ |
443 | 0 | cnv->preToULength=(int8_t)-length; |
444 | | |
445 | | /* set the error code for unassigned */ |
446 | 0 | *pErrorCode=U_INVALID_CHAR_FOUND; |
447 | 0 | } |
448 | 0 | } |
449 | | |
450 | | /* from Unicode ------------------------------------------------------------- */ |
451 | | |
452 | | // Use roundtrips, "good one-way" mappings, and some normal fallbacks. |
453 | | static inline UBool |
454 | 0 | extFromUUseMapping(UBool useFallback, uint32_t value, UChar32 firstCP) { |
455 | 0 | return |
456 | 0 | ((value&UCNV_EXT_FROM_U_STATUS_MASK)!=0 || |
457 | 0 | FROM_U_USE_FALLBACK(useFallback, firstCP)) && |
458 | 0 | (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0; |
459 | 0 | } |
460 | | |
461 | | /* |
462 | | * @return index of the UChar, if found; else <0 |
463 | | */ |
464 | | static inline int32_t |
465 | 0 | ucnv_extFindFromU(const UChar *fromUSection, int32_t length, UChar u) { |
466 | 0 | int32_t i, start, limit; |
467 | | |
468 | | /* binary search */ |
469 | 0 | start=0; |
470 | 0 | limit=length; |
471 | 0 | for(;;) { |
472 | 0 | i=limit-start; |
473 | 0 | if(i<=1) { |
474 | 0 | break; /* done */ |
475 | 0 | } |
476 | | /* start<limit-1 */ |
477 | | |
478 | 0 | if(i<=4) { |
479 | | /* linear search for the last part */ |
480 | 0 | if(u<=fromUSection[start]) { |
481 | 0 | break; |
482 | 0 | } |
483 | 0 | if(++start<limit && u<=fromUSection[start]) { |
484 | 0 | break; |
485 | 0 | } |
486 | 0 | if(++start<limit && u<=fromUSection[start]) { |
487 | 0 | break; |
488 | 0 | } |
489 | | /* always break at start==limit-1 */ |
490 | 0 | ++start; |
491 | 0 | break; |
492 | 0 | } |
493 | | |
494 | 0 | i=(start+limit)/2; |
495 | 0 | if(u<fromUSection[i]) { |
496 | 0 | limit=i; |
497 | 0 | } else { |
498 | 0 | start=i; |
499 | 0 | } |
500 | 0 | } |
501 | | |
502 | | /* did we really find it? */ |
503 | 0 | if(start<limit && u==fromUSection[start]) { |
504 | 0 | return start; |
505 | 0 | } else { |
506 | 0 | return -1; /* not found */ |
507 | 0 | } |
508 | 0 | } |
509 | | |
510 | | /* |
511 | | * @param cx pointer to extension data; if NULL, returns 0 |
512 | | * @param firstCP the first code point before all the other UChars |
513 | | * @param pre UChars that must match; !initialMatch: partial match with them |
514 | | * @param preLength length of pre, >=0 |
515 | | * @param src UChars that can be used to complete a match |
516 | | * @param srcLength length of src, >=0 |
517 | | * @param pMatchValue [out] output result value for the match from the data structure |
518 | | * @param useFallback "use fallback" flag, usually from cnv->useFallback |
519 | | * @param flush TRUE if the end of the input stream is reached |
520 | | * @return >1: matched, return value=total match length (number of input units matched) |
521 | | * 1: matched, no mapping but request for <subchar1> |
522 | | * (only for the first code point) |
523 | | * 0: no match |
524 | | * <0: partial match, return value=negative total match length |
525 | | * (partial matches are never returned for flush==TRUE) |
526 | | * (partial matches are never returned as being longer than UCNV_EXT_MAX_UCHARS) |
527 | | * the matchLength is 2 if only firstCP matched, and >2 if firstCP and |
528 | | * further code units matched |
529 | | */ |
530 | | static int32_t |
531 | | ucnv_extMatchFromU(const int32_t *cx, |
532 | | UChar32 firstCP, |
533 | | const UChar *pre, int32_t preLength, |
534 | | const UChar *src, int32_t srcLength, |
535 | | uint32_t *pMatchValue, |
536 | 0 | UBool useFallback, UBool flush) { |
537 | 0 | const uint16_t *stage12, *stage3; |
538 | 0 | const uint32_t *stage3b; |
539 | |
|
540 | 0 | const UChar *fromUTableUChars, *fromUSectionUChars; |
541 | 0 | const uint32_t *fromUTableValues, *fromUSectionValues; |
542 | |
|
543 | 0 | uint32_t value, matchValue; |
544 | 0 | int32_t i, j, idx, length, matchLength; |
545 | 0 | UChar c; |
546 | |
|
547 | 0 | if(cx==NULL) { |
548 | 0 | return 0; /* no extension data, no match */ |
549 | 0 | } |
550 | | |
551 | | /* trie lookup of firstCP */ |
552 | 0 | idx=firstCP>>10; /* stage 1 index */ |
553 | 0 | if(idx>=cx[UCNV_EXT_FROM_U_STAGE_1_LENGTH]) { |
554 | 0 | return 0; /* the first code point is outside the trie */ |
555 | 0 | } |
556 | | |
557 | 0 | stage12=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_12_INDEX, uint16_t); |
558 | 0 | stage3=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3_INDEX, uint16_t); |
559 | 0 | idx=UCNV_EXT_FROM_U(stage12, stage3, idx, firstCP); |
560 | |
|
561 | 0 | stage3b=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3B_INDEX, uint32_t); |
562 | 0 | value=stage3b[idx]; |
563 | 0 | if(value==0) { |
564 | 0 | return 0; |
565 | 0 | } |
566 | | |
567 | | /* |
568 | | * Tests for (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0: |
569 | | * Do not interpret values with reserved bits used, for forward compatibility, |
570 | | * and do not even remember intermediate results with reserved bits used. |
571 | | */ |
572 | | |
573 | 0 | if(UCNV_EXT_TO_U_IS_PARTIAL(value)) { |
574 | | /* partial match, enter the loop below */ |
575 | 0 | idx=(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value); |
576 | | |
577 | | /* initialize */ |
578 | 0 | fromUTableUChars=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_UCHARS_INDEX, UChar); |
579 | 0 | fromUTableValues=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_VALUES_INDEX, uint32_t); |
580 | |
|
581 | 0 | matchValue=0; |
582 | 0 | i=j=matchLength=0; |
583 | | |
584 | | /* we must not remember fallback matches when not using fallbacks */ |
585 | | |
586 | | /* match input units until there is a full match or the input is consumed */ |
587 | 0 | for(;;) { |
588 | | /* go to the next section */ |
589 | 0 | fromUSectionUChars=fromUTableUChars+idx; |
590 | 0 | fromUSectionValues=fromUTableValues+idx; |
591 | | |
592 | | /* read first pair of the section */ |
593 | 0 | length=*fromUSectionUChars++; |
594 | 0 | value=*fromUSectionValues++; |
595 | 0 | if(value!=0 && extFromUUseMapping(useFallback, value, firstCP)) { |
596 | | /* remember longest match so far */ |
597 | 0 | matchValue=value; |
598 | 0 | matchLength=2+i+j; |
599 | 0 | } |
600 | | |
601 | | /* match pre[] then src[] */ |
602 | 0 | if(i<preLength) { |
603 | 0 | c=pre[i++]; |
604 | 0 | } else if(j<srcLength) { |
605 | 0 | c=src[j++]; |
606 | 0 | } else { |
607 | | /* all input consumed, partial match */ |
608 | 0 | if(flush || (length=(i+j))>UCNV_EXT_MAX_UCHARS) { |
609 | | /* |
610 | | * end of the entire input stream, stop with the longest match so far |
611 | | * or: partial match must not be longer than UCNV_EXT_MAX_UCHARS |
612 | | * because it must fit into state buffers |
613 | | */ |
614 | 0 | break; |
615 | 0 | } else { |
616 | | /* continue with more input next time */ |
617 | 0 | return -(2+length); |
618 | 0 | } |
619 | 0 | } |
620 | | |
621 | | /* search for the current UChar */ |
622 | 0 | idx=ucnv_extFindFromU(fromUSectionUChars, length, c); |
623 | 0 | if(idx<0) { |
624 | | /* no match here, stop with the longest match so far */ |
625 | 0 | break; |
626 | 0 | } else { |
627 | 0 | value=fromUSectionValues[idx]; |
628 | 0 | if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) { |
629 | | /* partial match, continue */ |
630 | 0 | idx=(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value); |
631 | 0 | } else { |
632 | 0 | if(extFromUUseMapping(useFallback, value, firstCP)) { |
633 | | /* full match, stop with result */ |
634 | 0 | matchValue=value; |
635 | 0 | matchLength=2+i+j; |
636 | 0 | } else { |
637 | | /* full match on fallback not taken, stop with the longest match so far */ |
638 | 0 | } |
639 | 0 | break; |
640 | 0 | } |
641 | 0 | } |
642 | 0 | } |
643 | | |
644 | 0 | if(matchLength==0) { |
645 | | /* no match at all */ |
646 | 0 | return 0; |
647 | 0 | } |
648 | 0 | } else /* result from firstCP trie lookup */ { |
649 | 0 | if(extFromUUseMapping(useFallback, value, firstCP)) { |
650 | | /* full match, stop with result */ |
651 | 0 | matchValue=value; |
652 | 0 | matchLength=2; |
653 | 0 | } else { |
654 | | /* fallback not taken */ |
655 | 0 | return 0; |
656 | 0 | } |
657 | 0 | } |
658 | | |
659 | | /* return result */ |
660 | 0 | if(matchValue==UCNV_EXT_FROM_U_SUBCHAR1) { |
661 | 0 | return 1; /* assert matchLength==2 */ |
662 | 0 | } |
663 | | |
664 | 0 | *pMatchValue=matchValue; |
665 | 0 | return matchLength; |
666 | 0 | } |
667 | | |
668 | | /* |
669 | | * @param value fromUnicode mapping table value; ignores roundtrip and reserved bits |
670 | | */ |
671 | | static inline void |
672 | | ucnv_extWriteFromU(UConverter *cnv, const int32_t *cx, |
673 | | uint32_t value, |
674 | | char **target, const char *targetLimit, |
675 | | int32_t **offsets, int32_t srcIndex, |
676 | 0 | UErrorCode *pErrorCode) { |
677 | 0 | uint8_t buffer[1+UCNV_EXT_MAX_BYTES]; |
678 | 0 | const uint8_t *result; |
679 | 0 | int32_t length, prevLength; |
680 | |
|
681 | 0 | length=UCNV_EXT_FROM_U_GET_LENGTH(value); |
682 | 0 | value=(uint32_t)UCNV_EXT_FROM_U_GET_DATA(value); |
683 | | |
684 | | /* output the result */ |
685 | 0 | if(length<=UCNV_EXT_FROM_U_MAX_DIRECT_LENGTH) { |
686 | | /* |
687 | | * Generate a byte array and then write it below. |
688 | | * This is not the fastest possible way, but it should be ok for |
689 | | * extension mappings, and it is much simpler. |
690 | | * Offset and overflow handling are only done once this way. |
691 | | */ |
692 | 0 | uint8_t *p=buffer+1; /* reserve buffer[0] for shiftByte below */ |
693 | 0 | switch(length) { |
694 | 0 | case 3: |
695 | 0 | *p++=(uint8_t)(value>>16); |
696 | 0 | U_FALLTHROUGH; |
697 | 0 | case 2: |
698 | 0 | *p++=(uint8_t)(value>>8); |
699 | 0 | U_FALLTHROUGH; |
700 | 0 | case 1: |
701 | 0 | *p++=(uint8_t)value; |
702 | 0 | U_FALLTHROUGH; |
703 | 0 | default: |
704 | 0 | break; /* will never occur */ |
705 | 0 | } |
706 | 0 | result=buffer+1; |
707 | 0 | } else { |
708 | 0 | result=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_BYTES_INDEX, uint8_t)+value; |
709 | 0 | } |
710 | | |
711 | | /* with correct data we have length>0 */ |
712 | | |
713 | 0 | if((prevLength=cnv->fromUnicodeStatus)!=0) { |
714 | | /* handle SI/SO stateful output */ |
715 | 0 | uint8_t shiftByte; |
716 | |
|
717 | 0 | if(prevLength>1 && length==1) { |
718 | | /* change from double-byte mode to single-byte */ |
719 | 0 | shiftByte=(uint8_t)UCNV_SI; |
720 | 0 | cnv->fromUnicodeStatus=1; |
721 | 0 | } else if(prevLength==1 && length>1) { |
722 | | /* change from single-byte mode to double-byte */ |
723 | 0 | shiftByte=(uint8_t)UCNV_SO; |
724 | 0 | cnv->fromUnicodeStatus=2; |
725 | 0 | } else { |
726 | 0 | shiftByte=0; |
727 | 0 | } |
728 | |
|
729 | 0 | if(shiftByte!=0) { |
730 | | /* prepend the shift byte to the result bytes */ |
731 | 0 | buffer[0]=shiftByte; |
732 | 0 | if(result!=buffer+1) { |
733 | 0 | uprv_memcpy(buffer+1, result, length); |
734 | 0 | } |
735 | 0 | result=buffer; |
736 | 0 | ++length; |
737 | 0 | } |
738 | 0 | } |
739 | |
|
740 | 0 | ucnv_fromUWriteBytes(cnv, (const char *)result, length, |
741 | 0 | target, targetLimit, |
742 | 0 | offsets, srcIndex, |
743 | 0 | pErrorCode); |
744 | 0 | } |
745 | | |
746 | | /* |
747 | | * target<targetLimit; set error code for overflow |
748 | | */ |
749 | | U_CFUNC UBool |
750 | | ucnv_extInitialMatchFromU(UConverter *cnv, const int32_t *cx, |
751 | | UChar32 cp, |
752 | | const UChar **src, const UChar *srcLimit, |
753 | | char **target, const char *targetLimit, |
754 | | int32_t **offsets, int32_t srcIndex, |
755 | | UBool flush, |
756 | 0 | UErrorCode *pErrorCode) { |
757 | 0 | uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ |
758 | 0 | int32_t match; |
759 | | |
760 | | /* try to match */ |
761 | 0 | match=ucnv_extMatchFromU(cx, cp, |
762 | 0 | NULL, 0, |
763 | 0 | *src, (int32_t)(srcLimit-*src), |
764 | 0 | &value, |
765 | 0 | cnv->useFallback, flush); |
766 | | |
767 | | /* reject a match if the result is a single byte for DBCS-only */ |
768 | 0 | if( match>=2 && |
769 | 0 | !(UCNV_EXT_FROM_U_GET_LENGTH(value)==1 && |
770 | 0 | cnv->sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY) |
771 | 0 | ) { |
772 | | /* advance src pointer for the consumed input */ |
773 | 0 | *src+=match-2; /* remove 2 for the initial code point */ |
774 | | |
775 | | /* write result to target */ |
776 | 0 | ucnv_extWriteFromU(cnv, cx, |
777 | 0 | value, |
778 | 0 | target, targetLimit, |
779 | 0 | offsets, srcIndex, |
780 | 0 | pErrorCode); |
781 | 0 | return TRUE; |
782 | 0 | } else if(match<0) { |
783 | | /* save state for partial match */ |
784 | 0 | const UChar *s; |
785 | 0 | int32_t j; |
786 | | |
787 | | /* copy the first code point */ |
788 | 0 | cnv->preFromUFirstCP=cp; |
789 | | |
790 | | /* now copy the newly consumed input */ |
791 | 0 | s=*src; |
792 | 0 | match=-match-2; /* remove 2 for the initial code point */ |
793 | 0 | for(j=0; j<match; ++j) { |
794 | 0 | cnv->preFromU[j]=*s++; |
795 | 0 | } |
796 | 0 | *src=s; /* same as *src=srcLimit; because we reached the end of input */ |
797 | 0 | cnv->preFromULength=(int8_t)match; |
798 | 0 | return TRUE; |
799 | 0 | } else if(match==1) { |
800 | | /* matched, no mapping but request for <subchar1> */ |
801 | 0 | cnv->useSubChar1=TRUE; |
802 | 0 | return FALSE; |
803 | 0 | } else /* match==0 no match */ { |
804 | 0 | return FALSE; |
805 | 0 | } |
806 | 0 | } |
807 | | |
808 | | /* |
809 | | * Used by ISO 2022 implementation. |
810 | | * @return number of bytes in *pValue; negative number if fallback; 0 for no mapping |
811 | | */ |
812 | | U_CFUNC int32_t |
813 | | ucnv_extSimpleMatchFromU(const int32_t *cx, |
814 | | UChar32 cp, uint32_t *pValue, |
815 | 0 | UBool useFallback) { |
816 | 0 | uint32_t value; |
817 | 0 | int32_t match; |
818 | | |
819 | | /* try to match */ |
820 | 0 | match=ucnv_extMatchFromU(cx, |
821 | 0 | cp, |
822 | 0 | NULL, 0, |
823 | 0 | NULL, 0, |
824 | 0 | &value, |
825 | 0 | useFallback, TRUE); |
826 | 0 | if(match>=2) { |
827 | | /* write result for simple, single-character conversion */ |
828 | 0 | int32_t length; |
829 | 0 | int isRoundtrip; |
830 | |
|
831 | 0 | isRoundtrip=UCNV_EXT_FROM_U_IS_ROUNDTRIP(value); |
832 | 0 | length=UCNV_EXT_FROM_U_GET_LENGTH(value); |
833 | 0 | value=(uint32_t)UCNV_EXT_FROM_U_GET_DATA(value); |
834 | |
|
835 | 0 | if(length<=UCNV_EXT_FROM_U_MAX_DIRECT_LENGTH) { |
836 | 0 | *pValue=value; |
837 | 0 | return isRoundtrip ? length : -length; |
838 | | #if 0 /* not currently used */ |
839 | | } else if(length==4) { |
840 | | /* de-serialize a 4-byte result */ |
841 | | const uint8_t *result=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_BYTES_INDEX, uint8_t)+value; |
842 | | *pValue= |
843 | | ((uint32_t)result[0]<<24)| |
844 | | ((uint32_t)result[1]<<16)| |
845 | | ((uint32_t)result[2]<<8)| |
846 | | result[3]; |
847 | | return isRoundtrip ? 4 : -4; |
848 | | #endif |
849 | 0 | } |
850 | 0 | } |
851 | | |
852 | | /* |
853 | | * return no match because |
854 | | * - match>1 && resultLength>4: result too long for simple conversion |
855 | | * - match==1: no match found, <subchar1> preferred |
856 | | * - match==0: no match found in the first place |
857 | | * - match<0: partial match, not supported for simple conversion (and flush==TRUE) |
858 | | */ |
859 | 0 | return 0; |
860 | 0 | } |
861 | | |
862 | | /* |
863 | | * continue partial match with new input, requires cnv->preFromUFirstCP>=0 |
864 | | * never called for simple, single-character conversion |
865 | | */ |
866 | | U_CFUNC void |
867 | | ucnv_extContinueMatchFromU(UConverter *cnv, |
868 | | UConverterFromUnicodeArgs *pArgs, int32_t srcIndex, |
869 | 0 | UErrorCode *pErrorCode) { |
870 | 0 | uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ |
871 | 0 | int32_t match; |
872 | |
|
873 | 0 | match=ucnv_extMatchFromU(cnv->sharedData->mbcs.extIndexes, |
874 | 0 | cnv->preFromUFirstCP, |
875 | 0 | cnv->preFromU, cnv->preFromULength, |
876 | 0 | pArgs->source, (int32_t)(pArgs->sourceLimit-pArgs->source), |
877 | 0 | &value, |
878 | 0 | cnv->useFallback, pArgs->flush); |
879 | 0 | if(match>=2) { |
880 | 0 | match-=2; /* remove 2 for the initial code point */ |
881 | |
|
882 | 0 | if(match>=cnv->preFromULength) { |
883 | | /* advance src pointer for the consumed input */ |
884 | 0 | pArgs->source+=match-cnv->preFromULength; |
885 | 0 | cnv->preFromULength=0; |
886 | 0 | } else { |
887 | | /* the match did not use all of preFromU[] - keep the rest for replay */ |
888 | 0 | int32_t length=cnv->preFromULength-match; |
889 | 0 | u_memmove(cnv->preFromU, cnv->preFromU+match, length); |
890 | 0 | cnv->preFromULength=(int8_t)-length; |
891 | 0 | } |
892 | | |
893 | | /* finish the partial match */ |
894 | 0 | cnv->preFromUFirstCP=U_SENTINEL; |
895 | | |
896 | | /* write result */ |
897 | 0 | ucnv_extWriteFromU(cnv, cnv->sharedData->mbcs.extIndexes, |
898 | 0 | value, |
899 | 0 | &pArgs->target, pArgs->targetLimit, |
900 | 0 | &pArgs->offsets, srcIndex, |
901 | 0 | pErrorCode); |
902 | 0 | } else if(match<0) { |
903 | | /* save state for partial match */ |
904 | 0 | const UChar *s; |
905 | 0 | int32_t j; |
906 | | |
907 | | /* just _append_ the newly consumed input to preFromU[] */ |
908 | 0 | s=pArgs->source; |
909 | 0 | match=-match-2; /* remove 2 for the initial code point */ |
910 | 0 | for(j=cnv->preFromULength; j<match; ++j) { |
911 | 0 | U_ASSERT(j>=0); |
912 | 0 | cnv->preFromU[j]=*s++; |
913 | 0 | } |
914 | 0 | pArgs->source=s; /* same as *src=srcLimit; because we reached the end of input */ |
915 | 0 | cnv->preFromULength=(int8_t)match; |
916 | 0 | } else /* match==0 or 1 */ { |
917 | | /* |
918 | | * no match |
919 | | * |
920 | | * We need to split the previous input into two parts: |
921 | | * |
922 | | * 1. The first code point is unmappable - that's how we got into |
923 | | * trying the extension data in the first place. |
924 | | * We need to move it from the preFromU buffer |
925 | | * to the error buffer, set an error code, |
926 | | * and prepare the rest of the previous input for 2. |
927 | | * |
928 | | * 2. The rest of the previous input must be converted once we |
929 | | * come back from the callback for the first code point. |
930 | | * At that time, we have to try again from scratch to convert |
931 | | * these input characters. |
932 | | * The replay will be handled by the ucnv.c conversion code. |
933 | | */ |
934 | |
|
935 | 0 | if(match==1) { |
936 | | /* matched, no mapping but request for <subchar1> */ |
937 | 0 | cnv->useSubChar1=TRUE; |
938 | 0 | } |
939 | | |
940 | | /* move the first code point to the error field */ |
941 | 0 | cnv->fromUChar32=cnv->preFromUFirstCP; |
942 | 0 | cnv->preFromUFirstCP=U_SENTINEL; |
943 | | |
944 | | /* mark preFromU for replay */ |
945 | 0 | cnv->preFromULength=-cnv->preFromULength; |
946 | | |
947 | | /* set the error code for unassigned */ |
948 | 0 | *pErrorCode=U_INVALID_CHAR_FOUND; |
949 | 0 | } |
950 | 0 | } |
951 | | |
952 | | static UBool |
953 | 0 | extSetUseMapping(UConverterUnicodeSet which, int32_t minLength, uint32_t value) { |
954 | 0 | if(which==UCNV_ROUNDTRIP_SET) { |
955 | | // Add only code points for which the roundtrip flag is set. |
956 | | // Do not add any fallbacks, even if ucnv_fromUnicode() would use them |
957 | | // (fallbacks from PUA). See the API docs for ucnv_getUnicodeSet(). |
958 | | // |
959 | | // By analogy, also do not add "good one-way" mappings. |
960 | | // |
961 | | // Do not add entries with reserved bits set. |
962 | 0 | if(((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))!= |
963 | 0 | UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)) { |
964 | 0 | return FALSE; |
965 | 0 | } |
966 | 0 | } else /* UCNV_ROUNDTRIP_AND_FALLBACK_SET */ { |
967 | | // Do not add entries with reserved bits set. |
968 | 0 | if((value&UCNV_EXT_FROM_U_RESERVED_MASK)!=0) { |
969 | 0 | return FALSE; |
970 | 0 | } |
971 | 0 | } |
972 | | // Do not add <subchar1> entries or other (future?) pseudo-entries |
973 | | // with an output length of 0. |
974 | 0 | return UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength; |
975 | 0 | } |
976 | | |
977 | | static void |
978 | | ucnv_extGetUnicodeSetString(const UConverterSharedData *sharedData, |
979 | | const int32_t *cx, |
980 | | const USetAdder *sa, |
981 | | UConverterUnicodeSet which, |
982 | | int32_t minLength, |
983 | | UChar32 firstCP, |
984 | | UChar s[UCNV_EXT_MAX_UCHARS], int32_t length, |
985 | | int32_t sectionIndex, |
986 | 0 | UErrorCode *pErrorCode) { |
987 | 0 | const UChar *fromUSectionUChars; |
988 | 0 | const uint32_t *fromUSectionValues; |
989 | |
|
990 | 0 | uint32_t value; |
991 | 0 | int32_t i, count; |
992 | |
|
993 | 0 | fromUSectionUChars=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_UCHARS_INDEX, UChar)+sectionIndex; |
994 | 0 | fromUSectionValues=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_VALUES_INDEX, uint32_t)+sectionIndex; |
995 | | |
996 | | /* read first pair of the section */ |
997 | 0 | count=*fromUSectionUChars++; |
998 | 0 | value=*fromUSectionValues++; |
999 | |
|
1000 | 0 | if(extSetUseMapping(which, minLength, value)) { |
1001 | 0 | if(length==U16_LENGTH(firstCP)) { |
1002 | | /* add the initial code point */ |
1003 | 0 | sa->add(sa->set, firstCP); |
1004 | 0 | } else { |
1005 | | /* add the string so far */ |
1006 | 0 | sa->addString(sa->set, s, length); |
1007 | 0 | } |
1008 | 0 | } |
1009 | |
|
1010 | 0 | for(i=0; i<count; ++i) { |
1011 | | /* append this code unit and recurse or add the string */ |
1012 | 0 | s[length]=fromUSectionUChars[i]; |
1013 | 0 | value=fromUSectionValues[i]; |
1014 | |
|
1015 | 0 | if(value==0) { |
1016 | | /* no mapping, do nothing */ |
1017 | 0 | } else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) { |
1018 | 0 | ucnv_extGetUnicodeSetString( |
1019 | 0 | sharedData, cx, sa, which, minLength, |
1020 | 0 | firstCP, s, length+1, |
1021 | 0 | (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value), |
1022 | 0 | pErrorCode); |
1023 | 0 | } else if(extSetUseMapping(which, minLength, value)) { |
1024 | 0 | sa->addString(sa->set, s, length+1); |
1025 | 0 | } |
1026 | 0 | } |
1027 | 0 | } |
1028 | | |
1029 | | U_CFUNC void |
1030 | | ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData, |
1031 | | const USetAdder *sa, |
1032 | | UConverterUnicodeSet which, |
1033 | | UConverterSetFilter filter, |
1034 | 0 | UErrorCode *pErrorCode) { |
1035 | 0 | const int32_t *cx; |
1036 | 0 | const uint16_t *stage12, *stage3, *ps2, *ps3; |
1037 | 0 | const uint32_t *stage3b; |
1038 | |
|
1039 | 0 | uint32_t value; |
1040 | 0 | int32_t st1, stage1Length, st2, st3, minLength; |
1041 | |
|
1042 | 0 | UChar s[UCNV_EXT_MAX_UCHARS]; |
1043 | 0 | UChar32 c; |
1044 | 0 | int32_t length; |
1045 | |
|
1046 | 0 | cx=sharedData->mbcs.extIndexes; |
1047 | 0 | if(cx==NULL) { |
1048 | 0 | return; |
1049 | 0 | } |
1050 | | |
1051 | 0 | stage12=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_12_INDEX, uint16_t); |
1052 | 0 | stage3=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3_INDEX, uint16_t); |
1053 | 0 | stage3b=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3B_INDEX, uint32_t); |
1054 | |
|
1055 | 0 | stage1Length=cx[UCNV_EXT_FROM_U_STAGE_1_LENGTH]; |
1056 | | |
1057 | | /* enumerate the from-Unicode trie table */ |
1058 | 0 | c=0; /* keep track of the current code point while enumerating */ |
1059 | |
|
1060 | 0 | if(filter==UCNV_SET_FILTER_2022_CN) { |
1061 | 0 | minLength=3; |
1062 | 0 | } else if( sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY || |
1063 | 0 | filter!=UCNV_SET_FILTER_NONE |
1064 | 0 | ) { |
1065 | | /* DBCS-only, ignore single-byte results */ |
1066 | 0 | minLength=2; |
1067 | 0 | } else { |
1068 | 0 | minLength=1; |
1069 | 0 | } |
1070 | | |
1071 | | /* |
1072 | | * the trie enumeration is almost the same as |
1073 | | * in MBCSGetUnicodeSet() for MBCS_OUTPUT_1 |
1074 | | */ |
1075 | 0 | for(st1=0; st1<stage1Length; ++st1) { |
1076 | 0 | st2=stage12[st1]; |
1077 | 0 | if(st2>stage1Length) { |
1078 | 0 | ps2=stage12+st2; |
1079 | 0 | for(st2=0; st2<64; ++st2) { |
1080 | 0 | if((st3=(int32_t)ps2[st2]<<UCNV_EXT_STAGE_2_LEFT_SHIFT)!=0) { |
1081 | | /* read the stage 3 block */ |
1082 | 0 | ps3=stage3+st3; |
1083 | |
|
1084 | 0 | do { |
1085 | 0 | value=stage3b[*ps3++]; |
1086 | 0 | if(value==0) { |
1087 | | /* no mapping, do nothing */ |
1088 | 0 | } else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) { |
1089 | | // Recurse for partial results. |
1090 | 0 | length=0; |
1091 | 0 | U16_APPEND_UNSAFE(s, length, c); |
1092 | 0 | ucnv_extGetUnicodeSetString( |
1093 | 0 | sharedData, cx, sa, which, minLength, |
1094 | 0 | c, s, length, |
1095 | 0 | (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value), |
1096 | 0 | pErrorCode); |
1097 | 0 | } else if(extSetUseMapping(which, minLength, value)) { |
1098 | 0 | switch(filter) { |
1099 | 0 | case UCNV_SET_FILTER_2022_CN: |
1100 | 0 | if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==3 && UCNV_EXT_FROM_U_GET_DATA(value)<=0x82ffff)) { |
1101 | 0 | continue; |
1102 | 0 | } |
1103 | 0 | break; |
1104 | 0 | case UCNV_SET_FILTER_SJIS: |
1105 | 0 | if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && (value=UCNV_EXT_FROM_U_GET_DATA(value))>=0x8140 && value<=0xeffc)) { |
1106 | 0 | continue; |
1107 | 0 | } |
1108 | 0 | break; |
1109 | 0 | case UCNV_SET_FILTER_GR94DBCS: |
1110 | 0 | if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && |
1111 | 0 | (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value))-0xa1a1)<=(0xfefe - 0xa1a1) && |
1112 | 0 | (uint8_t)(value-0xa1)<=(0xfe - 0xa1))) { |
1113 | 0 | continue; |
1114 | 0 | } |
1115 | 0 | break; |
1116 | 0 | case UCNV_SET_FILTER_HZ: |
1117 | 0 | if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && |
1118 | 0 | (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value))-0xa1a1)<=(0xfdfe - 0xa1a1) && |
1119 | 0 | (uint8_t)(value-0xa1)<=(0xfe - 0xa1))) { |
1120 | 0 | continue; |
1121 | 0 | } |
1122 | 0 | break; |
1123 | 0 | default: |
1124 | | /* |
1125 | | * UCNV_SET_FILTER_NONE, |
1126 | | * or UCNV_SET_FILTER_DBCS_ONLY which is handled via minLength |
1127 | | */ |
1128 | 0 | break; |
1129 | 0 | } |
1130 | 0 | sa->add(sa->set, c); |
1131 | 0 | } |
1132 | 0 | } while((++c&0xf)!=0); |
1133 | 0 | } else { |
1134 | 0 | c+=16; /* empty stage 3 block */ |
1135 | 0 | } |
1136 | 0 | } |
1137 | 0 | } else { |
1138 | 0 | c+=1024; /* empty stage 2 block */ |
1139 | 0 | } |
1140 | 0 | } |
1141 | 0 | } |
1142 | | |
1143 | | #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ |