/src/icu/source/common/ucnvhz.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // © 2016 and later: Unicode, Inc. and others. |
2 | | // License & terms of use: http://www.unicode.org/copyright.html |
3 | | /* |
4 | | ********************************************************************** |
5 | | * Copyright (C) 2000-2015, International Business Machines |
6 | | * Corporation and others. All Rights Reserved. |
7 | | ********************************************************************** |
8 | | * file name: ucnvhz.c |
9 | | * encoding: UTF-8 |
10 | | * tab size: 8 (not used) |
11 | | * indentation:4 |
12 | | * |
13 | | * created on: 2000oct16 |
14 | | * created by: Ram Viswanadha |
15 | | * 10/31/2000 Ram Implemented offsets logic function |
16 | | * |
17 | | */ |
18 | | |
19 | | #include "unicode/utypes.h" |
20 | | |
21 | | #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION |
22 | | |
23 | | #include "cmemory.h" |
24 | | #include "unicode/ucnv.h" |
25 | | #include "unicode/ucnv_cb.h" |
26 | | #include "unicode/uset.h" |
27 | | #include "unicode/utf16.h" |
28 | | #include "ucnv_bld.h" |
29 | | #include "ucnv_cnv.h" |
30 | | #include "ucnv_imp.h" |
31 | | |
32 | 0 | #define UCNV_TILDE 0x7E /* ~ */ |
33 | 0 | #define UCNV_OPEN_BRACE 0x7B /* { */ |
34 | 0 | #define UCNV_CLOSE_BRACE 0x7D /* } */ |
35 | 0 | #define SB_ESCAPE "\x7E\x7D" |
36 | 0 | #define DB_ESCAPE "\x7E\x7B" |
37 | 0 | #define TILDE_ESCAPE "\x7E\x7E" |
38 | 0 | #define ESC_LEN 2 |
39 | | |
40 | | |
41 | 0 | #define CONCAT_ESCAPE_MACRO(args, targetIndex,targetLength,strToAppend, err, len,sourceIndex) UPRV_BLOCK_MACRO_BEGIN { \ |
42 | 0 | while(len-->0){ \ |
43 | 0 | if(targetIndex < targetLength){ \ |
44 | 0 | args->target[targetIndex] = (unsigned char) *strToAppend; \ |
45 | 0 | if(args->offsets!=NULL){ \ |
46 | 0 | *(offsets++) = sourceIndex-1; \ |
47 | 0 | } \ |
48 | 0 | targetIndex++; \ |
49 | 0 | } \ |
50 | 0 | else{ \ |
51 | 0 | args->converter->charErrorBuffer[(int)args->converter->charErrorBufferLength++] = (unsigned char) *strToAppend; \ |
52 | 0 | *err =U_BUFFER_OVERFLOW_ERROR; \ |
53 | 0 | } \ |
54 | 0 | strToAppend++; \ |
55 | 0 | } \ |
56 | 0 | } UPRV_BLOCK_MACRO_END |
57 | | |
58 | | |
59 | | typedef struct{ |
60 | | UConverter* gbConverter; |
61 | | int32_t targetIndex; |
62 | | int32_t sourceIndex; |
63 | | UBool isEscapeAppended; |
64 | | UBool isStateDBCS; |
65 | | UBool isTargetUCharDBCS; |
66 | | UBool isEmptySegment; |
67 | | }UConverterDataHZ; |
68 | | |
69 | | |
70 | | U_CDECL_BEGIN |
71 | | static void U_CALLCONV |
72 | 0 | _HZOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){ |
73 | 0 | UConverter *gbConverter; |
74 | 0 | if(pArgs->onlyTestIsLoadable) { |
75 | 0 | ucnv_canCreateConverter("GBK", errorCode); /* errorCode carries result */ |
76 | 0 | return; |
77 | 0 | } |
78 | 0 | gbConverter = ucnv_open("GBK", errorCode); |
79 | 0 | if(U_FAILURE(*errorCode)) { |
80 | 0 | return; |
81 | 0 | } |
82 | 0 | cnv->toUnicodeStatus = 0; |
83 | 0 | cnv->fromUnicodeStatus= 0; |
84 | 0 | cnv->mode=0; |
85 | 0 | cnv->fromUChar32=0x0000; |
86 | 0 | cnv->extraInfo = uprv_calloc(1, sizeof(UConverterDataHZ)); |
87 | 0 | if(cnv->extraInfo != NULL){ |
88 | 0 | ((UConverterDataHZ*)cnv->extraInfo)->gbConverter = gbConverter; |
89 | 0 | } |
90 | 0 | else { |
91 | 0 | ucnv_close(gbConverter); |
92 | 0 | *errorCode = U_MEMORY_ALLOCATION_ERROR; |
93 | 0 | return; |
94 | 0 | } |
95 | 0 | } |
96 | | |
97 | | static void U_CALLCONV |
98 | 0 | _HZClose(UConverter *cnv){ |
99 | 0 | if(cnv->extraInfo != NULL) { |
100 | 0 | ucnv_close (((UConverterDataHZ *) (cnv->extraInfo))->gbConverter); |
101 | 0 | if(!cnv->isExtraLocal) { |
102 | 0 | uprv_free(cnv->extraInfo); |
103 | 0 | } |
104 | 0 | cnv->extraInfo = NULL; |
105 | 0 | } |
106 | 0 | } |
107 | | |
108 | | static void U_CALLCONV |
109 | 0 | _HZReset(UConverter *cnv, UConverterResetChoice choice){ |
110 | 0 | if(choice<=UCNV_RESET_TO_UNICODE) { |
111 | 0 | cnv->toUnicodeStatus = 0; |
112 | 0 | cnv->mode=0; |
113 | 0 | if(cnv->extraInfo != NULL){ |
114 | 0 | ((UConverterDataHZ*)cnv->extraInfo)->isStateDBCS = FALSE; |
115 | 0 | ((UConverterDataHZ*)cnv->extraInfo)->isEmptySegment = FALSE; |
116 | 0 | } |
117 | 0 | } |
118 | 0 | if(choice!=UCNV_RESET_TO_UNICODE) { |
119 | 0 | cnv->fromUnicodeStatus= 0; |
120 | 0 | cnv->fromUChar32=0x0000; |
121 | 0 | if(cnv->extraInfo != NULL){ |
122 | 0 | ((UConverterDataHZ*)cnv->extraInfo)->isEscapeAppended = FALSE; |
123 | 0 | ((UConverterDataHZ*)cnv->extraInfo)->targetIndex = 0; |
124 | 0 | ((UConverterDataHZ*)cnv->extraInfo)->sourceIndex = 0; |
125 | 0 | ((UConverterDataHZ*)cnv->extraInfo)->isTargetUCharDBCS = FALSE; |
126 | 0 | } |
127 | 0 | } |
128 | 0 | } |
129 | | |
130 | | /**************************************HZ Encoding************************************************* |
131 | | * Rules for HZ encoding |
132 | | * |
133 | | * In ASCII mode, a byte is interpreted as an ASCII character, unless a |
134 | | * '~' is encountered. The character '~' is an escape character. By |
135 | | * convention, it must be immediately followed ONLY by '~', '{' or '\n' |
136 | | * (<LF>), with the following special meaning. |
137 | | |
138 | | * 1. The escape sequence '~~' is interpreted as a '~'. |
139 | | * 2. The escape-to-GB sequence '~{' switches the mode from ASCII to GB. |
140 | | * 3. The escape sequence '~\n' is a line-continuation marker to be |
141 | | * consumed with no output produced. |
142 | | * In GB mode, characters are interpreted two bytes at a time as (pure) |
143 | | * GB codes until the escape-from-GB code '~}' is read. This code |
144 | | * switches the mode from GB back to ASCII. (Note that the escape- |
145 | | * from-GB code '~}' ($7E7D) is outside the defined GB range.) |
146 | | * |
147 | | * Source: RFC 1842 |
148 | | * |
149 | | * Note that the formal syntax in RFC 1842 is invalid. I assume that the |
150 | | * intended definition of single-byte-segment is as follows (pedberg): |
151 | | * single-byte-segment = single-byte-seq 1*single-byte-char |
152 | | */ |
153 | | |
154 | | |
155 | | static void U_CALLCONV |
156 | | UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, |
157 | 0 | UErrorCode* err){ |
158 | 0 | char tempBuf[2]; |
159 | 0 | const char *mySource = ( char *) args->source; |
160 | 0 | UChar *myTarget = args->target; |
161 | 0 | const char *mySourceLimit = args->sourceLimit; |
162 | 0 | UChar32 targetUniChar = 0x0000; |
163 | 0 | int32_t mySourceChar = 0x0000; |
164 | 0 | UConverterDataHZ* myData=(UConverterDataHZ*)(args->converter->extraInfo); |
165 | 0 | tempBuf[0]=0; |
166 | 0 | tempBuf[1]=0; |
167 | | |
168 | | /* Calling code already handles this situation. */ |
169 | | /*if ((args->converter == NULL) || (args->targetLimit < args->target) || (mySourceLimit < args->source)){ |
170 | | *err = U_ILLEGAL_ARGUMENT_ERROR; |
171 | | return; |
172 | | }*/ |
173 | | |
174 | 0 | while(mySource< mySourceLimit){ |
175 | | |
176 | 0 | if(myTarget < args->targetLimit){ |
177 | | |
178 | 0 | mySourceChar= (unsigned char) *mySource++; |
179 | |
|
180 | 0 | if(args->converter->mode == UCNV_TILDE) { |
181 | | /* second byte after ~ */ |
182 | 0 | args->converter->mode=0; |
183 | 0 | switch(mySourceChar) { |
184 | 0 | case 0x0A: |
185 | | /* no output for ~\n (line-continuation marker) */ |
186 | 0 | continue; |
187 | 0 | case UCNV_TILDE: |
188 | 0 | if(args->offsets) { |
189 | 0 | args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 2); |
190 | 0 | } |
191 | 0 | *(myTarget++)=(UChar)mySourceChar; |
192 | 0 | myData->isEmptySegment = FALSE; |
193 | 0 | continue; |
194 | 0 | case UCNV_OPEN_BRACE: |
195 | 0 | case UCNV_CLOSE_BRACE: |
196 | 0 | myData->isStateDBCS = (mySourceChar == UCNV_OPEN_BRACE); |
197 | 0 | if (myData->isEmptySegment) { |
198 | 0 | myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */ |
199 | 0 | *err = U_ILLEGAL_ESCAPE_SEQUENCE; |
200 | 0 | args->converter->toUCallbackReason = UCNV_IRREGULAR; |
201 | 0 | args->converter->toUBytes[0] = UCNV_TILDE; |
202 | 0 | args->converter->toUBytes[1] = static_cast<uint8_t>(mySourceChar); |
203 | 0 | args->converter->toULength = 2; |
204 | 0 | args->target = myTarget; |
205 | 0 | args->source = mySource; |
206 | 0 | return; |
207 | 0 | } |
208 | 0 | myData->isEmptySegment = TRUE; |
209 | 0 | continue; |
210 | 0 | default: |
211 | | /* if the first byte is equal to TILDE and the trail byte |
212 | | * is not a valid byte then it is an error condition |
213 | | */ |
214 | | /* |
215 | | * Ticket 5691: consistent illegal sequences: |
216 | | * - We include at least the first byte in the illegal sequence. |
217 | | * - If any of the non-initial bytes could be the start of a character, |
218 | | * we stop the illegal sequence before the first one of those. |
219 | | */ |
220 | 0 | myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */ |
221 | 0 | *err = U_ILLEGAL_ESCAPE_SEQUENCE; |
222 | 0 | args->converter->toUBytes[0] = UCNV_TILDE; |
223 | 0 | if( myData->isStateDBCS ? |
224 | 0 | (0x21 <= mySourceChar && mySourceChar <= 0x7e) : |
225 | 0 | mySourceChar <= 0x7f |
226 | 0 | ) { |
227 | | /* The current byte could be the start of a character: Back it out. */ |
228 | 0 | args->converter->toULength = 1; |
229 | 0 | --mySource; |
230 | 0 | } else { |
231 | | /* Include the current byte in the illegal sequence. */ |
232 | 0 | args->converter->toUBytes[1] = static_cast<uint8_t>(mySourceChar); |
233 | 0 | args->converter->toULength = 2; |
234 | 0 | } |
235 | 0 | args->target = myTarget; |
236 | 0 | args->source = mySource; |
237 | 0 | return; |
238 | 0 | } |
239 | 0 | } else if(myData->isStateDBCS) { |
240 | 0 | if(args->converter->toUnicodeStatus == 0x00){ |
241 | | /* lead byte */ |
242 | 0 | if(mySourceChar == UCNV_TILDE) { |
243 | 0 | args->converter->mode = UCNV_TILDE; |
244 | 0 | } else { |
245 | | /* add another bit to distinguish a 0 byte from not having seen a lead byte */ |
246 | 0 | args->converter->toUnicodeStatus = (uint32_t) (mySourceChar | 0x100); |
247 | 0 | myData->isEmptySegment = FALSE; /* the segment has something, either valid or will produce a different error, so reset this */ |
248 | 0 | } |
249 | 0 | continue; |
250 | 0 | } |
251 | 0 | else{ |
252 | | /* trail byte */ |
253 | 0 | int leadIsOk, trailIsOk; |
254 | 0 | uint32_t leadByte = args->converter->toUnicodeStatus & 0xff; |
255 | 0 | targetUniChar = 0xffff; |
256 | | /* |
257 | | * Ticket 5691: consistent illegal sequences: |
258 | | * - We include at least the first byte in the illegal sequence. |
259 | | * - If any of the non-initial bytes could be the start of a character, |
260 | | * we stop the illegal sequence before the first one of those. |
261 | | * |
262 | | * In HZ DBCS, if the second byte is in the 21..7e range, |
263 | | * we report only the first byte as the illegal sequence. |
264 | | * Otherwise we convert or report the pair of bytes. |
265 | | */ |
266 | 0 | leadIsOk = (uint8_t)(leadByte - 0x21) <= (0x7d - 0x21); |
267 | 0 | trailIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21); |
268 | 0 | if (leadIsOk && trailIsOk) { |
269 | 0 | tempBuf[0] = (char) (leadByte+0x80) ; |
270 | 0 | tempBuf[1] = (char) (mySourceChar+0x80); |
271 | 0 | targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData, |
272 | 0 | tempBuf, 2, args->converter->useFallback); |
273 | 0 | mySourceChar= (leadByte << 8) | mySourceChar; |
274 | 0 | } else if (trailIsOk) { |
275 | | /* report a single illegal byte and continue with the following DBCS starter byte */ |
276 | 0 | --mySource; |
277 | 0 | mySourceChar = (int32_t)leadByte; |
278 | 0 | } else { |
279 | | /* report a pair of illegal bytes if the second byte is not a DBCS starter */ |
280 | | /* add another bit so that the code below writes 2 bytes in case of error */ |
281 | 0 | mySourceChar= 0x10000 | (leadByte << 8) | mySourceChar; |
282 | 0 | } |
283 | 0 | args->converter->toUnicodeStatus =0x00; |
284 | 0 | } |
285 | 0 | } |
286 | 0 | else{ |
287 | 0 | if(mySourceChar == UCNV_TILDE) { |
288 | 0 | args->converter->mode = UCNV_TILDE; |
289 | 0 | continue; |
290 | 0 | } else if(mySourceChar <= 0x7f) { |
291 | 0 | targetUniChar = (UChar)mySourceChar; /* ASCII */ |
292 | 0 | myData->isEmptySegment = FALSE; /* the segment has something valid */ |
293 | 0 | } else { |
294 | 0 | targetUniChar = 0xffff; |
295 | 0 | myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */ |
296 | 0 | } |
297 | 0 | } |
298 | 0 | if(targetUniChar < 0xfffe){ |
299 | 0 | if(args->offsets) { |
300 | 0 | args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 1-(myData->isStateDBCS)); |
301 | 0 | } |
302 | |
|
303 | 0 | *(myTarget++)=(UChar)targetUniChar; |
304 | 0 | } |
305 | 0 | else /* targetUniChar>=0xfffe */ { |
306 | 0 | if(targetUniChar == 0xfffe){ |
307 | 0 | *err = U_INVALID_CHAR_FOUND; |
308 | 0 | } |
309 | 0 | else{ |
310 | 0 | *err = U_ILLEGAL_CHAR_FOUND; |
311 | 0 | } |
312 | 0 | if(mySourceChar > 0xff){ |
313 | 0 | args->converter->toUBytes[0] = (uint8_t)(mySourceChar >> 8); |
314 | 0 | args->converter->toUBytes[1] = (uint8_t)mySourceChar; |
315 | 0 | args->converter->toULength=2; |
316 | 0 | } |
317 | 0 | else{ |
318 | 0 | args->converter->toUBytes[0] = (uint8_t)mySourceChar; |
319 | 0 | args->converter->toULength=1; |
320 | 0 | } |
321 | 0 | break; |
322 | 0 | } |
323 | 0 | } |
324 | 0 | else{ |
325 | 0 | *err =U_BUFFER_OVERFLOW_ERROR; |
326 | 0 | break; |
327 | 0 | } |
328 | 0 | } |
329 | | |
330 | 0 | args->target = myTarget; |
331 | 0 | args->source = mySource; |
332 | 0 | } |
333 | | |
334 | | |
335 | | static void U_CALLCONV |
336 | | UConverter_fromUnicode_HZ_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args, |
337 | 0 | UErrorCode * err){ |
338 | 0 | const UChar *mySource = args->source; |
339 | 0 | char *myTarget = args->target; |
340 | 0 | int32_t* offsets = args->offsets; |
341 | 0 | int32_t mySourceIndex = 0; |
342 | 0 | int32_t myTargetIndex = 0; |
343 | 0 | int32_t targetLength = (int32_t)(args->targetLimit - myTarget); |
344 | 0 | int32_t mySourceLength = (int32_t)(args->sourceLimit - args->source); |
345 | 0 | uint32_t targetUniChar = 0x0000; |
346 | 0 | UChar32 mySourceChar = 0x0000; |
347 | 0 | UConverterDataHZ *myConverterData=(UConverterDataHZ*)args->converter->extraInfo; |
348 | 0 | UBool isTargetUCharDBCS = (UBool) myConverterData->isTargetUCharDBCS; |
349 | 0 | UBool oldIsTargetUCharDBCS; |
350 | 0 | int len =0; |
351 | 0 | const char* escSeq=NULL; |
352 | | |
353 | | /* Calling code already handles this situation. */ |
354 | | /*if ((args->converter == NULL) || (args->targetLimit < myTarget) || (args->sourceLimit < args->source)){ |
355 | | *err = U_ILLEGAL_ARGUMENT_ERROR; |
356 | | return; |
357 | | }*/ |
358 | 0 | if(args->converter->fromUChar32!=0 && myTargetIndex < targetLength) { |
359 | 0 | goto getTrail; |
360 | 0 | } |
361 | | /*writing the char to the output stream */ |
362 | 0 | while (mySourceIndex < mySourceLength){ |
363 | 0 | targetUniChar = missingCharMarker; |
364 | 0 | if (myTargetIndex < targetLength){ |
365 | | |
366 | 0 | mySourceChar = (UChar) mySource[mySourceIndex++]; |
367 | | |
368 | |
|
369 | 0 | oldIsTargetUCharDBCS = isTargetUCharDBCS; |
370 | 0 | if(mySourceChar ==UCNV_TILDE){ |
371 | | /*concatEscape(args, &myTargetIndex, &targetLength,"\x7E\x7E",err,2,&mySourceIndex);*/ |
372 | 0 | len = ESC_LEN; |
373 | 0 | escSeq = TILDE_ESCAPE; |
374 | 0 | CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex); |
375 | 0 | continue; |
376 | 0 | } else if(mySourceChar <= 0x7f) { |
377 | 0 | targetUniChar = mySourceChar; |
378 | 0 | } else { |
379 | 0 | int32_t length= ucnv_MBCSFromUChar32(myConverterData->gbConverter->sharedData, |
380 | 0 | mySourceChar,&targetUniChar,args->converter->useFallback); |
381 | | /* we can only use lead bytes 21..7D and trail bytes 21..7E */ |
382 | 0 | if( length == 2 && |
383 | 0 | (uint16_t)(targetUniChar - 0xa1a1) <= (0xfdfe - 0xa1a1) && |
384 | 0 | (uint8_t)(targetUniChar - 0xa1) <= (0xfe - 0xa1) |
385 | 0 | ) { |
386 | 0 | targetUniChar -= 0x8080; |
387 | 0 | } else { |
388 | 0 | targetUniChar = missingCharMarker; |
389 | 0 | } |
390 | 0 | } |
391 | 0 | if (targetUniChar != missingCharMarker){ |
392 | 0 | myConverterData->isTargetUCharDBCS = isTargetUCharDBCS = (UBool)(targetUniChar>0x00FF); |
393 | 0 | if(oldIsTargetUCharDBCS != isTargetUCharDBCS || !myConverterData->isEscapeAppended ){ |
394 | | /*Shifting from a double byte to single byte mode*/ |
395 | 0 | if(!isTargetUCharDBCS){ |
396 | 0 | len =ESC_LEN; |
397 | 0 | escSeq = SB_ESCAPE; |
398 | 0 | CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex); |
399 | 0 | myConverterData->isEscapeAppended = TRUE; |
400 | 0 | } |
401 | 0 | else{ /* Shifting from a single byte to double byte mode*/ |
402 | 0 | len =ESC_LEN; |
403 | 0 | escSeq = DB_ESCAPE; |
404 | 0 | CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex); |
405 | 0 | myConverterData->isEscapeAppended = TRUE; |
406 | | |
407 | 0 | } |
408 | 0 | } |
409 | | |
410 | 0 | if(isTargetUCharDBCS){ |
411 | 0 | if( myTargetIndex <targetLength){ |
412 | 0 | myTarget[myTargetIndex++] =(char) (targetUniChar >> 8); |
413 | 0 | if(offsets){ |
414 | 0 | *(offsets++) = mySourceIndex-1; |
415 | 0 | } |
416 | 0 | if(myTargetIndex < targetLength){ |
417 | 0 | myTarget[myTargetIndex++] =(char) targetUniChar; |
418 | 0 | if(offsets){ |
419 | 0 | *(offsets++) = mySourceIndex-1; |
420 | 0 | } |
421 | 0 | }else{ |
422 | 0 | args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar; |
423 | 0 | *err = U_BUFFER_OVERFLOW_ERROR; |
424 | 0 | } |
425 | 0 | }else{ |
426 | 0 | args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =(char) (targetUniChar >> 8); |
427 | 0 | args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar; |
428 | 0 | *err = U_BUFFER_OVERFLOW_ERROR; |
429 | 0 | } |
430 | |
|
431 | 0 | }else{ |
432 | 0 | if( myTargetIndex <targetLength){ |
433 | 0 | myTarget[myTargetIndex++] = (char) (targetUniChar ); |
434 | 0 | if(offsets){ |
435 | 0 | *(offsets++) = mySourceIndex-1; |
436 | 0 | } |
437 | | |
438 | 0 | }else{ |
439 | 0 | args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar; |
440 | 0 | *err = U_BUFFER_OVERFLOW_ERROR; |
441 | 0 | } |
442 | 0 | } |
443 | |
|
444 | 0 | } |
445 | 0 | else{ |
446 | | /* oops.. the code point is unassigned */ |
447 | | /*Handle surrogates */ |
448 | | /*check if the char is a First surrogate*/ |
449 | 0 | if(U16_IS_SURROGATE(mySourceChar)) { |
450 | 0 | if(U16_IS_SURROGATE_LEAD(mySourceChar)) { |
451 | 0 | args->converter->fromUChar32=mySourceChar; |
452 | 0 | getTrail: |
453 | | /*look ahead to find the trail surrogate*/ |
454 | 0 | if(mySourceIndex < mySourceLength) { |
455 | | /* test the following code unit */ |
456 | 0 | UChar trail=(UChar) args->source[mySourceIndex]; |
457 | 0 | if(U16_IS_TRAIL(trail)) { |
458 | 0 | ++mySourceIndex; |
459 | 0 | mySourceChar=U16_GET_SUPPLEMENTARY(args->converter->fromUChar32, trail); |
460 | 0 | args->converter->fromUChar32=0x00; |
461 | | /* there are no surrogates in GB2312*/ |
462 | 0 | *err = U_INVALID_CHAR_FOUND; |
463 | | /* exit this condition tree */ |
464 | 0 | } else { |
465 | | /* this is an unmatched lead code unit (1st surrogate) */ |
466 | | /* callback(illegal) */ |
467 | 0 | *err=U_ILLEGAL_CHAR_FOUND; |
468 | 0 | } |
469 | 0 | } else { |
470 | | /* no more input */ |
471 | 0 | *err = U_ZERO_ERROR; |
472 | 0 | } |
473 | 0 | } else { |
474 | | /* this is an unmatched trail code unit (2nd surrogate) */ |
475 | | /* callback(illegal) */ |
476 | 0 | *err=U_ILLEGAL_CHAR_FOUND; |
477 | 0 | } |
478 | 0 | } else { |
479 | | /* callback(unassigned) for a BMP code point */ |
480 | 0 | *err = U_INVALID_CHAR_FOUND; |
481 | 0 | } |
482 | |
|
483 | 0 | args->converter->fromUChar32=mySourceChar; |
484 | 0 | break; |
485 | 0 | } |
486 | 0 | } |
487 | 0 | else{ |
488 | 0 | *err = U_BUFFER_OVERFLOW_ERROR; |
489 | 0 | break; |
490 | 0 | } |
491 | 0 | targetUniChar=missingCharMarker; |
492 | 0 | } |
493 | | |
494 | 0 | args->target += myTargetIndex; |
495 | 0 | args->source += mySourceIndex; |
496 | 0 | myConverterData->isTargetUCharDBCS = isTargetUCharDBCS; |
497 | 0 | } |
498 | | |
499 | | static void U_CALLCONV |
500 | 0 | _HZ_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) { |
501 | 0 | UConverter *cnv = args->converter; |
502 | 0 | UConverterDataHZ *convData=(UConverterDataHZ *) cnv->extraInfo; |
503 | 0 | char *p; |
504 | 0 | char buffer[4]; |
505 | 0 | p = buffer; |
506 | | |
507 | 0 | if( convData->isTargetUCharDBCS){ |
508 | 0 | *p++= UCNV_TILDE; |
509 | 0 | *p++= UCNV_CLOSE_BRACE; |
510 | 0 | convData->isTargetUCharDBCS=FALSE; |
511 | 0 | } |
512 | 0 | *p++= (char)cnv->subChars[0]; |
513 | |
|
514 | 0 | ucnv_cbFromUWriteBytes(args, |
515 | 0 | buffer, (int32_t)(p - buffer), |
516 | 0 | offsetIndex, err); |
517 | 0 | } |
518 | | |
519 | | /* |
520 | | * Structure for cloning an HZ converter into a single memory block. |
521 | | */ |
522 | | struct cloneHZStruct |
523 | | { |
524 | | UConverter cnv; |
525 | | UConverter subCnv; |
526 | | UConverterDataHZ mydata; |
527 | | }; |
528 | | |
529 | | |
530 | | static UConverter * U_CALLCONV |
531 | | _HZ_SafeClone(const UConverter *cnv, |
532 | | void *stackBuffer, |
533 | | int32_t *pBufferSize, |
534 | | UErrorCode *status) |
535 | 0 | { |
536 | 0 | struct cloneHZStruct * localClone; |
537 | 0 | int32_t size, bufferSizeNeeded = sizeof(struct cloneHZStruct); |
538 | |
|
539 | 0 | if (U_FAILURE(*status)){ |
540 | 0 | return nullptr; |
541 | 0 | } |
542 | | |
543 | 0 | if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */ |
544 | 0 | *pBufferSize = bufferSizeNeeded; |
545 | 0 | return nullptr; |
546 | 0 | } |
547 | | |
548 | 0 | localClone = (struct cloneHZStruct *)stackBuffer; |
549 | | /* ucnv.c/ucnv_safeClone() copied the main UConverter already */ |
550 | |
|
551 | 0 | uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataHZ)); |
552 | 0 | localClone->cnv.extraInfo = &localClone->mydata; |
553 | 0 | localClone->cnv.isExtraLocal = TRUE; |
554 | | |
555 | | /* deep-clone the sub-converter */ |
556 | 0 | size = (int32_t)sizeof(UConverter); |
557 | 0 | ((UConverterDataHZ*)localClone->cnv.extraInfo)->gbConverter = |
558 | 0 | ucnv_safeClone(((UConverterDataHZ*)cnv->extraInfo)->gbConverter, &localClone->subCnv, &size, status); |
559 | |
|
560 | 0 | return &localClone->cnv; |
561 | 0 | } |
562 | | |
563 | | static void U_CALLCONV |
564 | | _HZ_GetUnicodeSet(const UConverter *cnv, |
565 | | const USetAdder *sa, |
566 | | UConverterUnicodeSet which, |
567 | 0 | UErrorCode *pErrorCode) { |
568 | | /* HZ converts all of ASCII */ |
569 | 0 | sa->addRange(sa->set, 0, 0x7f); |
570 | | |
571 | | /* add all of the code points that the sub-converter handles */ |
572 | 0 | ucnv_MBCSGetFilteredUnicodeSetForUnicode( |
573 | 0 | ((UConverterDataHZ*)cnv->extraInfo)->gbConverter->sharedData, |
574 | 0 | sa, which, UCNV_SET_FILTER_HZ, |
575 | 0 | pErrorCode); |
576 | 0 | } |
577 | | U_CDECL_END |
578 | | static const UConverterImpl _HZImpl={ |
579 | | |
580 | | UCNV_HZ, |
581 | | |
582 | | NULL, |
583 | | NULL, |
584 | | |
585 | | _HZOpen, |
586 | | _HZClose, |
587 | | _HZReset, |
588 | | |
589 | | UConverter_toUnicode_HZ_OFFSETS_LOGIC, |
590 | | UConverter_toUnicode_HZ_OFFSETS_LOGIC, |
591 | | UConverter_fromUnicode_HZ_OFFSETS_LOGIC, |
592 | | UConverter_fromUnicode_HZ_OFFSETS_LOGIC, |
593 | | NULL, |
594 | | |
595 | | NULL, |
596 | | NULL, |
597 | | _HZ_WriteSub, |
598 | | _HZ_SafeClone, |
599 | | _HZ_GetUnicodeSet, |
600 | | NULL, |
601 | | NULL |
602 | | }; |
603 | | |
604 | | static const UConverterStaticData _HZStaticData={ |
605 | | sizeof(UConverterStaticData), |
606 | | "HZ", |
607 | | 0, |
608 | | UCNV_IBM, |
609 | | UCNV_HZ, |
610 | | 1, |
611 | | 4, |
612 | | { 0x1a, 0, 0, 0 }, |
613 | | 1, |
614 | | FALSE, |
615 | | FALSE, |
616 | | 0, |
617 | | 0, |
618 | | { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */ |
619 | | |
620 | | }; |
621 | | |
622 | | const UConverterSharedData _HZData= |
623 | | UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_HZStaticData, &_HZImpl); |
624 | | |
625 | | #endif /* #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION */ |