Line | Count | Source (jump to first uncovered line) |
1 | | #include "rar.hpp" |
2 | | #define MBFUNCTIONS |
3 | | |
4 | | #if defined(_UNIX) && defined(MBFUNCTIONS) |
5 | | |
6 | | static bool WideToCharMap(const wchar *Src,char *Dest,size_t DestSize,bool &Success); |
7 | | static void CharToWideMap(const char *Src,wchar *Dest,size_t DestSize,bool &Success); |
8 | | |
9 | | // In Unix we map high ASCII characters which cannot be converted to Unicode |
10 | | // to 0xE000 - 0xE0FF private use Unicode area. |
11 | | static const uint MapAreaStart=0xE000; |
12 | | |
13 | | // Mapped string marker. Initially we used 0xFFFF for this purpose, |
14 | | // but it causes MSVC2008 swprintf to fail (it treats 0xFFFF as error marker). |
15 | | // While we could workaround it, it is safer to use another character. |
16 | | static const uint MappedStringMark=0xFFFE; |
17 | | |
18 | | #endif |
19 | | |
20 | | bool WideToChar(const wchar *Src,char *Dest,size_t DestSize) |
21 | 24.1k | { |
22 | 24.1k | bool RetCode=true; |
23 | 24.1k | *Dest=0; // Set 'Dest' to zero just in case the conversion will fail. |
24 | | |
25 | | #ifdef _WIN_ALL |
26 | | if (WideCharToMultiByte(CP_ACP,0,Src,-1,Dest,(int)DestSize,NULL,NULL)==0) |
27 | | RetCode=false; |
28 | | |
29 | | // wcstombs is broken in Android NDK r9. |
30 | | #elif defined(_APPLE) |
31 | | WideToUtf(Src,Dest,DestSize); |
32 | | |
33 | | #elif defined(MBFUNCTIONS) |
34 | 24.1k | if (!WideToCharMap(Src,Dest,DestSize,RetCode)) |
35 | 21.1k | { |
36 | 21.1k | mbstate_t ps; // Use thread safe external state based functions. |
37 | 21.1k | memset (&ps, 0, sizeof(ps)); |
38 | 21.1k | const wchar *SrcParam=Src; // wcsrtombs can change the pointer. |
39 | | |
40 | | // Some implementations of wcsrtombs can cause memory analyzing tools |
41 | | // like valgrind to report uninitialized data access. It happens because |
42 | | // internally these implementations call SSE4 based wcslen function, |
43 | | // which reads 16 bytes at once including those beyond of trailing 0. |
44 | 21.1k | size_t ResultingSize=wcsrtombs(Dest,&SrcParam,DestSize,&ps); |
45 | | |
46 | 21.1k | if (ResultingSize==(size_t)-1 && errno==EILSEQ) |
47 | 3.65k | { |
48 | | // Aborted on inconvertible character not zero terminating the result. |
49 | | // EILSEQ helps to distinguish it from small output buffer abort. |
50 | | // We want to convert as much as we can, so we clean the output buffer |
51 | | // and repeat conversion. |
52 | 3.65k | memset (&ps, 0, sizeof(ps)); |
53 | 3.65k | SrcParam=Src; // wcsrtombs can change the pointer. |
54 | 3.65k | memset(Dest,0,DestSize); |
55 | 3.65k | ResultingSize=wcsrtombs(Dest,&SrcParam,DestSize,&ps); |
56 | 3.65k | } |
57 | | |
58 | 21.1k | if (ResultingSize==(size_t)-1) |
59 | 3.65k | RetCode=false; |
60 | 21.1k | if (ResultingSize==0 && *Src!=0) |
61 | 0 | RetCode=false; |
62 | 21.1k | } |
63 | | #else |
64 | | for (int I=0;I<DestSize;I++) |
65 | | { |
66 | | Dest[I]=(char)Src[I]; |
67 | | if (Src[I]==0) |
68 | | break; |
69 | | } |
70 | | #endif |
71 | 24.1k | if (DestSize>0) |
72 | 24.1k | Dest[DestSize-1]=0; |
73 | | |
74 | | // We tried to return the empty string if conversion is failed, |
75 | | // but it does not work well. WideCharToMultiByte returns 'failed' code |
76 | | // and partially converted string even if we wanted to convert only a part |
77 | | // of string and passed DestSize smaller than required for fully converted |
78 | | // string. Such call is the valid behavior in RAR code and we do not expect |
79 | | // the empty string in this case. |
80 | | |
81 | 24.1k | return RetCode; |
82 | 24.1k | } |
83 | | |
84 | | |
85 | | bool CharToWide(const char *Src,wchar *Dest,size_t DestSize) |
86 | 1.67k | { |
87 | 1.67k | bool RetCode=true; |
88 | 1.67k | *Dest=0; // Set 'Dest' to zero just in case the conversion will fail. |
89 | | |
90 | | #ifdef _WIN_ALL |
91 | | if (MultiByteToWideChar(CP_ACP,0,Src,-1,Dest,(int)DestSize)==0) |
92 | | RetCode=false; |
93 | | |
94 | | // mbstowcs is broken in Android NDK r9. |
95 | | #elif defined(_APPLE) |
96 | | UtfToWide(Src,Dest,DestSize); |
97 | | |
98 | | #elif defined(MBFUNCTIONS) |
99 | 1.67k | mbstate_t ps; |
100 | 1.67k | memset (&ps, 0, sizeof(ps)); |
101 | 1.67k | const char *SrcParam=Src; // mbsrtowcs can change the pointer. |
102 | 1.67k | size_t ResultingSize=mbsrtowcs(Dest,&SrcParam,DestSize,&ps); |
103 | 1.67k | if (ResultingSize==(size_t)-1) |
104 | 1.00k | RetCode=false; |
105 | 1.67k | if (ResultingSize==0 && *Src!=0) |
106 | 0 | RetCode=false; |
107 | | |
108 | 1.67k | if (RetCode==false && DestSize>1) |
109 | 1.00k | CharToWideMap(Src,Dest,DestSize,RetCode); |
110 | | #else |
111 | | for (int I=0;I<DestSize;I++) |
112 | | { |
113 | | Dest[I]=(wchar_t)Src[I]; |
114 | | if (Src[I]==0) |
115 | | break; |
116 | | } |
117 | | #endif |
118 | 1.67k | if (DestSize>0) |
119 | 1.67k | Dest[DestSize-1]=0; |
120 | | |
121 | | // We tried to return the empty string if conversion is failed, |
122 | | // but it does not work well. MultiByteToWideChar returns 'failed' code |
123 | | // even if we wanted to convert only a part of string and passed DestSize |
124 | | // smaller than required for fully converted string. Such call is the valid |
125 | | // behavior in RAR code and we do not expect the empty string in this case. |
126 | | |
127 | 1.67k | return RetCode; |
128 | 1.67k | } |
129 | | |
130 | | |
131 | | #if defined(_UNIX) && defined(MBFUNCTIONS) |
132 | | // Convert and restore mapped inconvertible Unicode characters. |
133 | | // We use it for extended ASCII names in Unix. |
134 | | bool WideToCharMap(const wchar *Src,char *Dest,size_t DestSize,bool &Success) |
135 | 24.1k | { |
136 | | // String with inconvertible characters mapped to private use Unicode area |
137 | | // must have the mark code somewhere. |
138 | 24.1k | if (wcschr(Src,(wchar)MappedStringMark)==NULL) |
139 | 21.1k | return false; |
140 | | |
141 | | // Seems to be that wcrtomb in some memory analyzing libraries |
142 | | // can produce uninitilized output while reporting success on garbage input. |
143 | | // So we clean the destination to calm analyzers. |
144 | 2.93k | memset(Dest,0,DestSize); |
145 | | |
146 | 2.93k | Success=true; |
147 | 2.93k | uint SrcPos=0,DestPos=0; |
148 | 144k | while (Src[SrcPos]!=0 && DestPos<DestSize-MB_CUR_MAX) |
149 | 141k | { |
150 | 141k | if (uint(Src[SrcPos])==MappedStringMark) |
151 | 4.49k | { |
152 | 4.49k | SrcPos++; |
153 | 4.49k | continue; |
154 | 4.49k | } |
155 | | // For security reasons do not restore low ASCII codes, so mapping cannot |
156 | | // be used to hide control codes like path separators. |
157 | 137k | if (uint(Src[SrcPos])>=MapAreaStart+0x80 && uint(Src[SrcPos])<MapAreaStart+0x100) |
158 | 32.7k | Dest[DestPos++]=char(uint(Src[SrcPos++])-MapAreaStart); |
159 | 104k | else |
160 | 104k | { |
161 | 104k | mbstate_t ps; |
162 | 104k | memset(&ps,0,sizeof(ps)); |
163 | 104k | if (wcrtomb(Dest+DestPos,Src[SrcPos],&ps)==(size_t)-1) |
164 | 33.2k | { |
165 | 33.2k | Dest[DestPos]='_'; |
166 | 33.2k | Success=false; |
167 | 33.2k | } |
168 | 104k | SrcPos++; |
169 | 104k | memset(&ps,0,sizeof(ps)); |
170 | 104k | int Length=mbrlen(Dest+DestPos,MB_CUR_MAX,&ps); |
171 | 104k | DestPos+=Max(Length,1); |
172 | 104k | } |
173 | 137k | } |
174 | 2.93k | Dest[Min(DestPos,DestSize-1)]=0; |
175 | 2.93k | return true; |
176 | 24.1k | } |
177 | | #endif |
178 | | |
179 | | |
180 | | #if defined(_UNIX) && defined(MBFUNCTIONS) |
181 | | // Convert and map inconvertible Unicode characters. |
182 | | // We use it for extended ASCII names in Unix. |
183 | | void CharToWideMap(const char *Src,wchar *Dest,size_t DestSize,bool &Success) |
184 | 1.00k | { |
185 | | // Map inconvertible characters to private use Unicode area 0xE000. |
186 | | // Mark such string by placing special non-character code before |
187 | | // first inconvertible character. |
188 | 1.00k | Success=false; |
189 | 1.00k | bool MarkAdded=false; |
190 | 1.00k | uint SrcPos=0,DestPos=0; |
191 | 322k | while (DestPos<DestSize) |
192 | 322k | { |
193 | 322k | if (Src[SrcPos]==0) |
194 | 971 | { |
195 | 971 | Success=true; |
196 | 971 | break; |
197 | 971 | } |
198 | 321k | mbstate_t ps; |
199 | 321k | memset(&ps,0,sizeof(ps)); |
200 | 321k | size_t res=mbrtowc(Dest+DestPos,Src+SrcPos,MB_CUR_MAX,&ps); |
201 | 321k | if (res==(size_t)-1 || res==(size_t)-2) |
202 | 164k | { |
203 | | // For security reasons we do not want to map low ASCII characters, |
204 | | // so we do not have additional .. and path separator codes. |
205 | 164k | if (byte(Src[SrcPos])>=0x80) |
206 | 164k | { |
207 | 164k | if (!MarkAdded) |
208 | 1.00k | { |
209 | 1.00k | Dest[DestPos++]=MappedStringMark; |
210 | 1.00k | MarkAdded=true; |
211 | 1.00k | if (DestPos>=DestSize) |
212 | 0 | break; |
213 | 1.00k | } |
214 | 164k | Dest[DestPos++]=byte(Src[SrcPos++])+MapAreaStart; |
215 | 164k | } |
216 | 0 | else |
217 | 0 | break; |
218 | 164k | } |
219 | 157k | else |
220 | 157k | { |
221 | 157k | memset(&ps,0,sizeof(ps)); |
222 | 157k | int Length=mbrlen(Src+SrcPos,MB_CUR_MAX,&ps); |
223 | 157k | SrcPos+=Max(Length,1); |
224 | 157k | DestPos++; |
225 | 157k | } |
226 | 321k | } |
227 | 1.00k | Dest[Min(DestPos,DestSize-1)]=0; |
228 | 1.00k | } |
229 | | #endif |
230 | | |
231 | | |
232 | | // SrcSize is in wide characters, not in bytes. |
233 | | byte* WideToRaw(const wchar *Src,byte *Dest,size_t SrcSize) |
234 | 0 | { |
235 | 0 | for (size_t I=0;I<SrcSize;I++,Src++) |
236 | 0 | { |
237 | 0 | Dest[I*2]=(byte)*Src; |
238 | 0 | Dest[I*2+1]=(byte)(*Src>>8); |
239 | 0 | if (*Src==0) |
240 | 0 | break; |
241 | 0 | } |
242 | 0 | return Dest; |
243 | 0 | } |
244 | | |
245 | | |
246 | | wchar* RawToWide(const byte *Src,wchar *Dest,size_t DestSize) |
247 | 0 | { |
248 | 0 | for (size_t I=0;I<DestSize;I++) |
249 | 0 | if ((Dest[I]=Src[I*2]+(Src[I*2+1]<<8))==0) |
250 | 0 | break; |
251 | 0 | return Dest; |
252 | 0 | } |
253 | | |
254 | | |
255 | | void WideToUtf(const wchar *Src,char *Dest,size_t DestSize) |
256 | 0 | { |
257 | 0 | long dsize=(long)DestSize; |
258 | 0 | dsize--; |
259 | 0 | while (*Src!=0 && --dsize>=0) |
260 | 0 | { |
261 | 0 | uint c=*(Src++); |
262 | 0 | if (c<0x80) |
263 | 0 | *(Dest++)=c; |
264 | 0 | else |
265 | 0 | if (c<0x800 && --dsize>=0) |
266 | 0 | { |
267 | 0 | *(Dest++)=(0xc0|(c>>6)); |
268 | 0 | *(Dest++)=(0x80|(c&0x3f)); |
269 | 0 | } |
270 | 0 | else |
271 | 0 | { |
272 | 0 | if (c>=0xd800 && c<=0xdbff && *Src>=0xdc00 && *Src<=0xdfff) // Surrogate pair. |
273 | 0 | { |
274 | 0 | c=((c-0xd800)<<10)+(*Src-0xdc00)+0x10000; |
275 | 0 | Src++; |
276 | 0 | } |
277 | 0 | if (c<0x10000 && (dsize-=2)>=0) |
278 | 0 | { |
279 | 0 | *(Dest++)=(0xe0|(c>>12)); |
280 | 0 | *(Dest++)=(0x80|((c>>6)&0x3f)); |
281 | 0 | *(Dest++)=(0x80|(c&0x3f)); |
282 | 0 | } |
283 | 0 | else |
284 | 0 | if (c < 0x200000 && (dsize-=3)>=0) |
285 | 0 | { |
286 | 0 | *(Dest++)=(0xf0|(c>>18)); |
287 | 0 | *(Dest++)=(0x80|((c>>12)&0x3f)); |
288 | 0 | *(Dest++)=(0x80|((c>>6)&0x3f)); |
289 | 0 | *(Dest++)=(0x80|(c&0x3f)); |
290 | 0 | } |
291 | 0 | } |
292 | 0 | } |
293 | 0 | *Dest=0; |
294 | 0 | } |
295 | | |
296 | | |
297 | | size_t WideToUtfSize(const wchar *Src) |
298 | 0 | { |
299 | 0 | size_t Size=0; |
300 | 0 | for (;*Src!=0;Src++) |
301 | 0 | if (*Src<0x80) |
302 | 0 | Size++; |
303 | 0 | else |
304 | 0 | if (*Src<0x800) |
305 | 0 | Size+=2; |
306 | 0 | else |
307 | 0 | if ((uint)*Src<0x10000) //(uint) to avoid Clang/win "always true" warning for 16-bit wchar_t. |
308 | 0 | { |
309 | 0 | if (Src[0]>=0xd800 && Src[0]<=0xdbff && Src[1]>=0xdc00 && Src[1]<=0xdfff) |
310 | 0 | { |
311 | 0 | Size+=4; // 4 output bytes for Unicode surrogate pair. |
312 | 0 | Src++; |
313 | 0 | } |
314 | 0 | else |
315 | 0 | Size+=3; |
316 | 0 | } |
317 | 0 | else |
318 | 0 | if ((uint)*Src<0x200000) //(uint) to avoid Clang/win "always true" warning for 16-bit wchar_t. |
319 | 0 | Size+=4; |
320 | 0 | return Size+1; // Include terminating zero. |
321 | 0 | } |
322 | | |
323 | | |
324 | | bool UtfToWide(const char *Src,wchar *Dest,size_t DestSize) |
325 | 143k | { |
326 | 143k | bool Success=true; |
327 | 143k | long dsize=(long)DestSize; |
328 | 143k | dsize--; |
329 | 152k | while (*Src!=0) |
330 | 9.02k | { |
331 | 9.02k | uint c=byte(*(Src++)),d; |
332 | 9.02k | if (c<0x80) |
333 | 8.84k | d=c; |
334 | 182 | else |
335 | 182 | if ((c>>5)==6) |
336 | 50 | { |
337 | 50 | if ((*Src&0xc0)!=0x80) |
338 | 45 | { |
339 | 45 | Success=false; |
340 | 45 | break; |
341 | 45 | } |
342 | 5 | d=((c&0x1f)<<6)|(*Src&0x3f); |
343 | 5 | Src++; |
344 | 5 | } |
345 | 132 | else |
346 | 132 | if ((c>>4)==14) |
347 | 12 | { |
348 | 12 | if ((Src[0]&0xc0)!=0x80 || (Src[1]&0xc0)!=0x80) |
349 | 11 | { |
350 | 11 | Success=false; |
351 | 11 | break; |
352 | 11 | } |
353 | 1 | d=((c&0xf)<<12)|((Src[0]&0x3f)<<6)|(Src[1]&0x3f); |
354 | 1 | Src+=2; |
355 | 1 | } |
356 | 120 | else |
357 | 120 | if ((c>>3)==30) |
358 | 5 | { |
359 | 5 | if ((Src[0]&0xc0)!=0x80 || (Src[1]&0xc0)!=0x80 || (Src[2]&0xc0)!=0x80) |
360 | 5 | { |
361 | 5 | Success=false; |
362 | 5 | break; |
363 | 5 | } |
364 | 0 | d=((c&7)<<18)|((Src[0]&0x3f)<<12)|((Src[1]&0x3f)<<6)|(Src[2]&0x3f); |
365 | 0 | Src+=3; |
366 | 0 | } |
367 | 115 | else |
368 | 115 | { |
369 | 115 | Success=false; |
370 | 115 | break; |
371 | 115 | } |
372 | 8.84k | if (--dsize<0) |
373 | 0 | break; |
374 | 8.84k | if (d>0xffff) |
375 | 0 | { |
376 | 0 | if (--dsize<0) |
377 | 0 | break; |
378 | 0 | if (d>0x10ffff) // UTF-8 must end at 0x10ffff according to RFC 3629. |
379 | 0 | { |
380 | 0 | Success=false; |
381 | 0 | continue; |
382 | 0 | } |
383 | 0 | if (sizeof(*Dest)==2) // Use the surrogate pair. |
384 | 0 | { |
385 | 0 | *(Dest++)=((d-0x10000)>>10)+0xd800; |
386 | 0 | *(Dest++)=(d&0x3ff)+0xdc00; |
387 | 0 | } |
388 | 0 | else |
389 | 0 | *(Dest++)=d; |
390 | 0 | } |
391 | 8.84k | else |
392 | 8.84k | *(Dest++)=d; |
393 | 8.84k | } |
394 | 143k | *Dest=0; |
395 | 143k | return Success; |
396 | 143k | } |
397 | | |
398 | | |
399 | | // For zero terminated strings. |
400 | | bool IsTextUtf8(const byte *Src) |
401 | 0 | { |
402 | 0 | return IsTextUtf8(Src,strlen((const char *)Src)); |
403 | 0 | } |
404 | | |
405 | | |
406 | | // Source data can be both with and without UTF-8 BOM. |
407 | | bool IsTextUtf8(const byte *Src,size_t SrcSize) |
408 | 0 | { |
409 | 0 | while (SrcSize-- > 0) |
410 | 0 | { |
411 | 0 | byte C=*(Src++); |
412 | 0 | int HighOne=0; // Number of leftmost '1' bits. |
413 | 0 | for (byte Mask=0x80;Mask!=0 && (C & Mask)!=0;Mask>>=1) |
414 | 0 | HighOne++; |
415 | 0 | if (HighOne==1 || HighOne>6) |
416 | 0 | return false; |
417 | 0 | while (--HighOne > 0) |
418 | 0 | if (SrcSize-- <= 0 || (*(Src++) & 0xc0)!=0x80) |
419 | 0 | return false; |
420 | 0 | } |
421 | 0 | return true; |
422 | 0 | } |
423 | | |
424 | | |
425 | | int wcsicomp(const wchar *s1,const wchar *s2) |
426 | 802 | { |
427 | | #ifdef _WIN_ALL |
428 | | return CompareStringW(LOCALE_USER_DEFAULT,NORM_IGNORECASE|SORT_STRINGSORT,s1,-1,s2,-1)-2; |
429 | | #else |
430 | 1.97k | while (true) |
431 | 1.97k | { |
432 | 1.97k | wchar u1 = towupper(*s1); |
433 | 1.97k | wchar u2 = towupper(*s2); |
434 | 1.97k | if (u1 != u2) |
435 | 734 | return u1 < u2 ? -1 : 1; |
436 | 1.23k | if (*s1==0) |
437 | 68 | break; |
438 | 1.16k | s1++; |
439 | 1.16k | s2++; |
440 | 1.16k | } |
441 | 68 | return 0; |
442 | 802 | #endif |
443 | 802 | } |
444 | | |
445 | | |
446 | | int wcsnicomp(const wchar *s1,const wchar *s2,size_t n) |
447 | 0 | { |
448 | | #ifdef _WIN_ALL |
449 | | // If we specify 'n' exceeding the actual string length, CompareString goes |
450 | | // beyond the trailing zero and compares garbage. So we need to limit 'n' |
451 | | // to real string length. |
452 | | size_t l1=Min(wcslen(s1)+1,n); |
453 | | size_t l2=Min(wcslen(s2)+1,n); |
454 | | return CompareStringW(LOCALE_USER_DEFAULT,NORM_IGNORECASE|SORT_STRINGSORT,s1,(int)l1,s2,(int)l2)-2; |
455 | | #else |
456 | 0 | if (n==0) |
457 | 0 | return 0; |
458 | 0 | while (true) |
459 | 0 | { |
460 | 0 | wchar u1 = towupper(*s1); |
461 | 0 | wchar u2 = towupper(*s2); |
462 | 0 | if (u1 != u2) |
463 | 0 | return u1 < u2 ? -1 : 1; |
464 | 0 | if (*s1==0 || --n==0) |
465 | 0 | break; |
466 | 0 | s1++; |
467 | 0 | s2++; |
468 | 0 | } |
469 | 0 | return 0; |
470 | 0 | #endif |
471 | 0 | } |
472 | | |
473 | | |
474 | | // Case insensitive wcsstr(). |
475 | | const wchar_t* wcscasestr(const wchar_t *str, const wchar_t *search) |
476 | 0 | { |
477 | 0 | for (size_t i=0;str[i]!=0;i++) |
478 | 0 | for (size_t j=0;;j++) |
479 | 0 | { |
480 | 0 | if (search[j]==0) |
481 | 0 | return str+i; |
482 | 0 | if (tolowerw(str[i+j])!=tolowerw(search[j])) |
483 | 0 | break; |
484 | 0 | } |
485 | 0 | return NULL; |
486 | 0 | } |
487 | | |
488 | | |
489 | | #ifndef SFX_MODULE |
490 | | wchar* wcslower(wchar *s) |
491 | 0 | { |
492 | | #ifdef _WIN_ALL |
493 | | // _wcslwr requires setlocale and we do not want to depend on setlocale |
494 | | // in Windows. Also CharLower involves less overhead. |
495 | | CharLower(s); |
496 | | #else |
497 | 0 | for (wchar *c=s;*c!=0;c++) |
498 | 0 | *c=towlower(*c); |
499 | 0 | #endif |
500 | 0 | return s; |
501 | 0 | } |
502 | | #endif |
503 | | |
504 | | |
505 | | #ifndef SFX_MODULE |
506 | | wchar* wcsupper(wchar *s) |
507 | 4.09k | { |
508 | | #ifdef _WIN_ALL |
509 | | // _wcsupr requires setlocale and we do not want to depend on setlocale |
510 | | // in Windows. Also CharUpper involves less overhead. |
511 | | CharUpper(s); |
512 | | #else |
513 | 8.19k | for (wchar *c=s;*c!=0;c++) |
514 | 4.09k | *c=towupper(*c); |
515 | 4.09k | #endif |
516 | 4.09k | return s; |
517 | 4.09k | } |
518 | | #endif |
519 | | |
520 | | |
521 | | |
522 | | |
523 | | int toupperw(int ch) |
524 | 155k | { |
525 | | #if defined(_WIN_ALL) |
526 | | // CharUpper is more reliable than towupper in Windows, which seems to be |
527 | | // C locale dependent even in Unicode version. For example, towupper failed |
528 | | // to convert lowercase Russian characters. Use 0xffff mask to prevent crash |
529 | | // if value larger than 0xffff is passed to this function. |
530 | | return (int)(INT_PTR)CharUpper((wchar *)(INT_PTR)(ch&0xffff)); |
531 | | #else |
532 | 155k | return towupper(ch); |
533 | 155k | #endif |
534 | 155k | } |
535 | | |
536 | | |
537 | | int tolowerw(int ch) |
538 | 0 | { |
539 | | #if defined(_WIN_ALL) |
540 | | // CharLower is more reliable than towlower in Windows. |
541 | | // See comment for towupper above. Use 0xffff mask to prevent crash |
542 | | // if value larger than 0xffff is passed to this function. |
543 | | return (int)(INT_PTR)CharLower((wchar *)(INT_PTR)(ch&0xffff)); |
544 | | #else |
545 | 0 | return towlower(ch); |
546 | 0 | #endif |
547 | 0 | } |
548 | | |
549 | | |
550 | | int atoiw(const wchar *s) |
551 | 14 | { |
552 | 14 | return (int)atoilw(s); |
553 | 14 | } |
554 | | |
555 | | |
556 | | int64 atoilw(const wchar *s) |
557 | 14 | { |
558 | 14 | bool sign=false; |
559 | 14 | if (*s=='-') // We do use signed integers here, for example, in GUI SFX. |
560 | 1 | { |
561 | 1 | s++; |
562 | 1 | sign=true; |
563 | 1 | } |
564 | | // Use unsigned type here, since long string can overflow the variable |
565 | | // and signed integer overflow is undefined behavior in C++. |
566 | 14 | uint64 n=0; |
567 | 267 | while (*s>='0' && *s<='9') |
568 | 253 | { |
569 | 253 | n=n*10+(*s-'0'); |
570 | 253 | s++; |
571 | 253 | } |
572 | | // Check int64(n)>=0 to avoid the signed overflow with undefined behavior |
573 | | // when negating 0x8000000000000000. |
574 | 14 | return sign && int64(n)>=0 ? -int64(n) : int64(n); |
575 | 14 | } |
576 | | |
577 | | |
578 | | #ifdef DBCS_SUPPORTED |
579 | | SupportDBCS gdbcs; |
580 | | |
581 | | SupportDBCS::SupportDBCS() |
582 | | { |
583 | | Init(); |
584 | | } |
585 | | |
586 | | |
587 | | void SupportDBCS::Init() |
588 | | { |
589 | | CPINFO CPInfo; |
590 | | GetCPInfo(CP_ACP,&CPInfo); |
591 | | DBCSMode=CPInfo.MaxCharSize > 1; |
592 | | for (uint I=0;I<ASIZE(IsLeadByte);I++) |
593 | | IsLeadByte[I]=IsDBCSLeadByte(I)!=0; |
594 | | } |
595 | | |
596 | | |
597 | | char* SupportDBCS::charnext(const char *s) |
598 | | { |
599 | | // Zero cannot be the trail byte. So if next byte after the lead byte |
600 | | // is 0, the string is corrupt and we'll better return the pointer to 0, |
601 | | // to break string processing loops. |
602 | | return (char *)(IsLeadByte[(byte)*s] && s[1]!=0 ? s+2:s+1); |
603 | | } |
604 | | #endif |
605 | | |
606 | | |