/src/libcdr/src/lib/libcdr_utils.cpp
Line | Count | Source |
1 | | /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
2 | | /* |
3 | | * This file is part of the libcdr project. |
4 | | * |
5 | | * This Source Code Form is subject to the terms of the Mozilla Public |
6 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
7 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8 | | */ |
9 | | |
10 | | #include "libcdr_utils.h" |
11 | | |
12 | | #include <cassert> |
13 | | #include <cstdarg> |
14 | | #include <cstdio> |
15 | | #include <string.h> |
16 | | |
17 | | #include <unicode/ucsdet.h> |
18 | | #include <unicode/ucnv.h> |
19 | | #include <unicode/utypes.h> |
20 | | #include <unicode/utf8.h> |
21 | | |
22 | | #define CDR_NUM_ELEMENTS(array) sizeof(array)/sizeof(array[0]) |
23 | | |
24 | | #define SURROGATE_VALUE(h,l) (((h) - 0xd800) * 0x400 + (l) - 0xdc00 + 0x10000) |
25 | | |
26 | | namespace |
27 | | { |
28 | | |
29 | | static unsigned short getEncodingFromICUName(const char *name) |
30 | 588k | { |
31 | | // ANSI |
32 | 588k | if (strcmp(name, "ISO-8859-1") == 0) |
33 | 23.8k | return 0; |
34 | 564k | if (strcmp(name, "windows-1252") == 0) |
35 | 2.92k | return 0; |
36 | | // CENTRAL EUROPE |
37 | 561k | if (strcmp(name, "ISO-8859-2") == 0) |
38 | 11.8k | return 0xee; |
39 | 549k | if (strcmp(name, "windows-1250") == 0) |
40 | 1.74k | return 0xee; |
41 | | // RUSSIAN |
42 | 548k | if (strcmp(name, "ISO-8859-5") == 0) |
43 | 2.08k | return 0xcc; |
44 | 546k | if (strcmp(name, "windows-1251") == 0) |
45 | 764 | return 0xcc; |
46 | 545k | if (strcmp(name, "KOI8-R") == 0) |
47 | 913 | return 0xcc; |
48 | | // ARABIC |
49 | 544k | if (strcmp(name, "ISO-8859-6") == 0) |
50 | 109 | return 0xb2; |
51 | 544k | if (strcmp(name, "windows-1256") == 0) |
52 | 50 | return 0xb2; |
53 | | // TURKISH |
54 | 544k | if (strcmp(name, "ISO-8859-9") == 0) |
55 | 216 | return 0xa2; |
56 | 544k | if (strcmp(name, "windows-1254") == 0) |
57 | 79 | return 0xa2; |
58 | | // GREEK |
59 | 543k | if (strcmp(name, "ISO-8859-7") == 0) |
60 | 920 | return 0xa1; |
61 | 543k | if (strcmp(name, "windows-1253") == 0) |
62 | 434 | return 0xa1; |
63 | | // HEBREW |
64 | 542k | if (strcmp(name, "ISO-8859-8") == 0) |
65 | 753 | return 0xb1; |
66 | 541k | if (strcmp(name, "windows-1255") == 0) |
67 | 585 | return 0xb1; |
68 | | // JAPANESE |
69 | 541k | if (strcmp(name, "Shift_JIS") == 0) |
70 | 10.3k | return 0x80; |
71 | 530k | if (strcmp(name, "ISO-2022-JP") == 0) |
72 | 7 | return 0x80; |
73 | 530k | if (strcmp(name, "EUC-JP") == 0) |
74 | 4.59k | return 0x80; |
75 | 526k | if (strcmp(name, "windows-932") == 0) |
76 | 0 | return 0x80; |
77 | | // KOREAN |
78 | 526k | if (strcmp(name, "ISO-2022-KR") == 0) |
79 | 0 | return 0x81; |
80 | 526k | if (strcmp(name, "EUC-KR") == 0) |
81 | 49 | return 0x81; |
82 | 526k | if (strcmp(name, "windows-949") == 0) |
83 | 0 | return 0x81; |
84 | | // CHINESE SIMPLIFIED |
85 | 526k | if (strcmp(name, "ISO-2022-CN") == 0) |
86 | 1.39k | return 0x86; |
87 | 524k | if (strcmp(name, "GB18030") == 0) |
88 | 17.3k | return 0x86; |
89 | 507k | if (strcmp(name, "windows-936") == 0) |
90 | 0 | return 0x86; |
91 | | // CHINESE TRADITIONAL |
92 | 507k | if (strcmp(name, "Big5") == 0) |
93 | 209 | return 0x88; |
94 | 507k | if (strcmp(name, "windows-950") == 0) |
95 | 0 | return 0x88; |
96 | | |
97 | 507k | return 0; |
98 | 507k | } |
99 | | |
100 | | static unsigned short getEncoding(const unsigned char *buffer, unsigned long bufferLength) |
101 | 629k | { |
102 | 629k | if (!buffer) |
103 | 0 | return 0; |
104 | 629k | UErrorCode status = U_ZERO_ERROR; |
105 | 629k | UCharsetDetector *csd = nullptr; |
106 | 629k | try |
107 | 629k | { |
108 | 629k | csd = ucsdet_open(&status); |
109 | 629k | if (U_FAILURE(status) || !csd) |
110 | 0 | return 0; |
111 | 629k | ucsdet_enableInputFilter(csd, true); |
112 | 629k | ucsdet_setText(csd, (const char *)buffer, (unsigned)bufferLength, &status); |
113 | 629k | if (U_FAILURE(status)) |
114 | 0 | throw libcdr::EncodingException(); |
115 | 629k | const UCharsetMatch *csm = ucsdet_detect(csd, &status); |
116 | 629k | if (U_FAILURE(status) || !csm) |
117 | 40.4k | throw libcdr::EncodingException(); |
118 | 588k | const char *name = ucsdet_getName(csm, &status); |
119 | 588k | if (U_FAILURE(status) || !name) |
120 | 0 | throw libcdr::EncodingException(); |
121 | 588k | int32_t confidence = ucsdet_getConfidence(csm, &status); |
122 | 588k | if (U_FAILURE(status)) |
123 | 0 | throw libcdr::EncodingException(); |
124 | 588k | CDR_DEBUG_MSG(("UCSDET: getEncoding name %s, confidence %i\n", name, confidence)); |
125 | 588k | unsigned short encoding = getEncodingFromICUName(name); |
126 | 588k | ucsdet_close(csd); |
127 | | /* From ICU documentation |
128 | | * A confidence value of ten does have a general meaning - it is used |
129 | | * for charsets that can represent the input data, but for which there |
130 | | * is no other indication that suggests that the charset is the correct |
131 | | * one. Pure 7 bit ASCII data, for example, is compatible with a great |
132 | | * many charsets, most of which will appear as possible matches with |
133 | | * a confidence of 10. |
134 | | */ |
135 | 588k | if (confidence == 10) |
136 | 49.9k | return 0; |
137 | 538k | return encoding; |
138 | 588k | } |
139 | 629k | catch (const libcdr::EncodingException &) |
140 | 629k | { |
141 | 40.4k | ucsdet_close(csd); |
142 | 40.4k | return 0; |
143 | 40.4k | } |
144 | 629k | } |
145 | | |
146 | | static void _appendUCS4(librevenge::RVNGString &text, UChar32 ucs4Character) |
147 | 50.4M | { |
148 | | // Convert carriage returns to new line characters |
149 | | // Writerperfect/LibreOffice will replace them by <text:line-break> |
150 | 50.4M | if (ucs4Character == (UChar32) 0x0d) |
151 | 17.0k | ucs4Character = (UChar32) '\n'; |
152 | | |
153 | 50.4M | unsigned char outbuf[U8_MAX_LENGTH+1]; |
154 | 50.4M | int i = 0; |
155 | 50.4M | U8_APPEND_UNSAFE(&outbuf[0], i, ucs4Character); |
156 | 50.4M | outbuf[i] = 0; |
157 | | |
158 | 50.4M | text.append((char *)outbuf); |
159 | 50.4M | } |
160 | | |
161 | | } // anonymous namespace |
162 | | |
163 | | uint8_t libcdr::readU8(librevenge::RVNGInputStream *input, bool /* bigEndian */) |
164 | 298M | { |
165 | 298M | if (!input || input->isEnd()) |
166 | 20.9k | { |
167 | 20.9k | CDR_DEBUG_MSG(("Throwing EndOfStreamException\n")); |
168 | 20.9k | throw EndOfStreamException(); |
169 | 20.9k | } |
170 | 298M | unsigned long numBytesRead; |
171 | 298M | uint8_t const *p = input->read(sizeof(uint8_t), numBytesRead); |
172 | | |
173 | 298M | if (p && numBytesRead == sizeof(uint8_t)) |
174 | 298M | return *(uint8_t const *)(p); |
175 | 0 | CDR_DEBUG_MSG(("Throwing EndOfStreamException\n")); |
176 | 0 | throw EndOfStreamException(); |
177 | 298M | } |
178 | | |
179 | | uint16_t libcdr::readU16(librevenge::RVNGInputStream *input, bool bigEndian) |
180 | 778M | { |
181 | 778M | if (!input || input->isEnd()) |
182 | 14.0k | { |
183 | 14.0k | CDR_DEBUG_MSG(("Throwing EndOfStreamException\n")); |
184 | 14.0k | throw EndOfStreamException(); |
185 | 14.0k | } |
186 | 777M | unsigned long numBytesRead; |
187 | 777M | uint8_t const *p = input->read(sizeof(uint16_t), numBytesRead); |
188 | | |
189 | 777M | if (p && numBytesRead == sizeof(uint16_t)) |
190 | 777M | { |
191 | 777M | if (bigEndian) |
192 | 550 | return (uint16_t)(p[1]|((uint16_t)p[0]<<8)); |
193 | 777M | return (uint16_t)(p[0]|((uint16_t)p[1]<<8)); |
194 | 777M | } |
195 | 10.4k | CDR_DEBUG_MSG(("Throwing EndOfStreamException\n")); |
196 | 10.4k | throw EndOfStreamException(); |
197 | 777M | } |
198 | | |
199 | | int16_t libcdr::readS16(librevenge::RVNGInputStream *input, bool bigEndian) |
200 | 79.5M | { |
201 | 79.5M | return (int16_t)readU16(input, bigEndian); |
202 | 79.5M | } |
203 | | |
204 | | uint32_t libcdr::readU32(librevenge::RVNGInputStream *input, bool bigEndian) |
205 | 234M | { |
206 | 234M | if (!input || input->isEnd()) |
207 | 56.4k | { |
208 | 56.4k | CDR_DEBUG_MSG(("Throwing EndOfStreamException\n")); |
209 | 56.4k | throw EndOfStreamException(); |
210 | 56.4k | } |
211 | 234M | unsigned long numBytesRead; |
212 | 234M | uint8_t const *p = input->read(sizeof(uint32_t), numBytesRead); |
213 | | |
214 | 234M | if (p && numBytesRead == sizeof(uint32_t)) |
215 | 234M | { |
216 | 234M | if (bigEndian) |
217 | 1.06k | return (uint32_t)p[3]|((uint32_t)p[2]<<8)|((uint32_t)p[1]<<16)|((uint32_t)p[0]<<24); |
218 | 234M | return (uint32_t)p[0]|((uint32_t)p[1]<<8)|((uint32_t)p[2]<<16)|((uint32_t)p[3]<<24); |
219 | 234M | } |
220 | 23.6k | CDR_DEBUG_MSG(("Throwing EndOfStreamException\n")); |
221 | 23.6k | throw EndOfStreamException(); |
222 | 234M | } |
223 | | |
224 | | int32_t libcdr::readS32(librevenge::RVNGInputStream *input, bool bigEndian) |
225 | 107M | { |
226 | 107M | return (int32_t)readU32(input, bigEndian); |
227 | 107M | } |
228 | | |
229 | | uint64_t libcdr::readU64(librevenge::RVNGInputStream *input, bool bigEndian) |
230 | 15.9M | { |
231 | 15.9M | if (!input || input->isEnd()) |
232 | 40 | { |
233 | 40 | CDR_DEBUG_MSG(("Throwing EndOfStreamException\n")); |
234 | 40 | throw EndOfStreamException(); |
235 | 40 | } |
236 | 15.9M | unsigned long numBytesRead; |
237 | 15.9M | uint8_t const *p = input->read(sizeof(uint64_t), numBytesRead); |
238 | | |
239 | 15.9M | if (p && numBytesRead == sizeof(uint64_t)) |
240 | 15.9M | { |
241 | 15.9M | if (bigEndian) |
242 | 131 | return (uint64_t)p[7]|((uint64_t)p[6]<<8)|((uint64_t)p[5]<<16)|((uint64_t)p[4]<<24)|((uint64_t)p[3]<<32)|((uint64_t)p[2]<<40)|((uint64_t)p[1]<<48)|((uint64_t)p[0]<<56); |
243 | 15.9M | return (uint64_t)p[0]|((uint64_t)p[1]<<8)|((uint64_t)p[2]<<16)|((uint64_t)p[3]<<24)|((uint64_t)p[4]<<32)|((uint64_t)p[5]<<40)|((uint64_t)p[6]<<48)|((uint64_t)p[7]<<56); |
244 | 15.9M | } |
245 | 62 | CDR_DEBUG_MSG(("Throwing EndOfStreamException\n")); |
246 | 62 | throw EndOfStreamException(); |
247 | 15.9M | } |
248 | | |
249 | | double libcdr::readDouble(librevenge::RVNGInputStream *input, bool bigEndian) |
250 | 7.19M | { |
251 | 7.19M | union |
252 | 7.19M | { |
253 | 7.19M | uint64_t u; |
254 | 7.19M | double d; |
255 | 7.19M | } tmpUnion; |
256 | | |
257 | 7.19M | tmpUnion.u = readU64(input, bigEndian); |
258 | | |
259 | 7.19M | return tmpUnion.d; |
260 | 7.19M | } |
261 | | |
262 | | double libcdr::readFixedPoint(librevenge::RVNGInputStream *input, bool bigEndian) |
263 | 81.5k | { |
264 | 81.5k | unsigned fixedPointNumber = readU32(input, bigEndian); |
265 | 81.5k | auto fixedPointNumberIntegerPart = (short)((fixedPointNumber & 0xFFFF0000) >> 16); |
266 | 81.5k | auto fixedPointNumberFractionalPart = (double)((double)(fixedPointNumber & 0x0000FFFF)/(double)0xFFFF); |
267 | 81.5k | return ((double)fixedPointNumberIntegerPart + fixedPointNumberFractionalPart); |
268 | 81.5k | } |
269 | | |
270 | | unsigned long libcdr::getLength(librevenge::RVNGInputStream *const input) |
271 | 7.84M | { |
272 | 7.84M | if (!input) |
273 | 0 | throw EndOfStreamException(); |
274 | | |
275 | 7.84M | const long orig = input->tell(); |
276 | 7.84M | long end = 0; |
277 | | |
278 | 7.84M | if (input->seek(0, librevenge::RVNG_SEEK_END) == 0) |
279 | 7.84M | { |
280 | 7.84M | end = input->tell(); |
281 | 7.84M | } |
282 | 0 | else |
283 | 0 | { |
284 | | // RVNG_SEEK_END does not work. Use the harder way. |
285 | 0 | if (input->seek(0, librevenge::RVNG_SEEK_SET) != 0) |
286 | 0 | throw EndOfStreamException(); |
287 | 0 | while (!input->isEnd()) |
288 | 0 | { |
289 | 0 | readU8(input); |
290 | 0 | ++end; |
291 | 0 | } |
292 | 0 | } |
293 | 7.84M | assert(end >= 0); |
294 | | |
295 | 7.84M | if (input->seek(orig, librevenge::RVNG_SEEK_SET) != 0) |
296 | 0 | throw EndOfStreamException(); |
297 | | |
298 | 7.84M | return static_cast<unsigned long>(end); |
299 | 7.84M | } |
300 | | |
301 | | unsigned long libcdr::getRemainingLength(librevenge::RVNGInputStream *const input) |
302 | 7.70M | { |
303 | 7.70M | return getLength(input) - static_cast<unsigned long>(input->tell()); |
304 | 7.70M | } |
305 | | |
306 | | int libcdr::cdr_round(double d) |
307 | 5.55M | { |
308 | 5.55M | return (d>0) ? int(d+0.5) : int(d-0.5); |
309 | 5.55M | } |
310 | | |
311 | | void libcdr::writeU16(librevenge::RVNGBinaryData &buffer, const int value) |
312 | 85.2k | { |
313 | 85.2k | buffer.append((unsigned char)(value & 0xFF)); |
314 | 85.2k | buffer.append((unsigned char)((value >> 8) & 0xFF)); |
315 | 85.2k | } |
316 | | |
317 | | void libcdr::writeU32(librevenge::RVNGBinaryData &buffer, const int value) |
318 | 10.8M | { |
319 | 10.8M | buffer.append((unsigned char)(value & 0xFF)); |
320 | 10.8M | buffer.append((unsigned char)((value >> 8) & 0xFF)); |
321 | 10.8M | buffer.append((unsigned char)((value >> 16) & 0xFF)); |
322 | 10.8M | buffer.append((unsigned char)((value >> 24) & 0xFF)); |
323 | 10.8M | } |
324 | | |
325 | | void libcdr::appendCharacters(librevenge::RVNGString &text, std::vector<unsigned char> characters, unsigned short charset) |
326 | 3.65M | { |
327 | 3.65M | if (characters.empty()) |
328 | 6.24k | return; |
329 | 3.64M | static const UChar32 symbolmap [] = |
330 | 3.64M | { |
331 | 3.64M | 0x0020, 0x0021, 0x2200, 0x0023, 0x2203, 0x0025, 0x0026, 0x220D, // 0x20 .. |
332 | 3.64M | 0x0028, 0x0029, 0x2217, 0x002B, 0x002C, 0x2212, 0x002E, 0x002F, |
333 | 3.64M | 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, |
334 | 3.64M | 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, |
335 | 3.64M | 0x2245, 0x0391, 0x0392, 0x03A7, 0x0394, 0x0395, 0x03A6, 0x0393, |
336 | 3.64M | 0x0397, 0x0399, 0x03D1, 0x039A, 0x039B, 0x039C, 0x039D, 0x039F, |
337 | 3.64M | 0x03A0, 0x0398, 0x03A1, 0x03A3, 0x03A4, 0x03A5, 0x03C2, 0x03A9, |
338 | 3.64M | 0x039E, 0x03A8, 0x0396, 0x005B, 0x2234, 0x005D, 0x22A5, 0x005F, |
339 | 3.64M | 0xF8E5, 0x03B1, 0x03B2, 0x03C7, 0x03B4, 0x03B5, 0x03C6, 0x03B3, |
340 | 3.64M | 0x03B7, 0x03B9, 0x03D5, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BF, |
341 | 3.64M | 0x03C0, 0x03B8, 0x03C1, 0x03C3, 0x03C4, 0x03C5, 0x03D6, 0x03C9, |
342 | 3.64M | 0x03BE, 0x03C8, 0x03B6, 0x007B, 0x007C, 0x007D, 0x223C, 0x0020, // .. 0x7F |
343 | 3.64M | 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, |
344 | 3.64M | 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, |
345 | 3.64M | 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, |
346 | 3.64M | 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009E, 0x009f, |
347 | 3.64M | 0x20AC, 0x03D2, 0x2032, 0x2264, 0x2044, 0x221E, 0x0192, 0x2663, // 0xA0 .. |
348 | 3.64M | 0x2666, 0x2665, 0x2660, 0x2194, 0x2190, 0x2191, 0x2192, 0x2193, |
349 | 3.64M | 0x00B0, 0x00B1, 0x2033, 0x2265, 0x00D7, 0x221D, 0x2202, 0x2022, |
350 | 3.64M | 0x00F7, 0x2260, 0x2261, 0x2248, 0x2026, 0x23D0, 0x23AF, 0x21B5, |
351 | 3.64M | 0x2135, 0x2111, 0x211C, 0x2118, 0x2297, 0x2295, 0x2205, 0x2229, |
352 | 3.64M | 0x222A, 0x2283, 0x2287, 0x2284, 0x2282, 0x2286, 0x2208, 0x2209, |
353 | 3.64M | 0x2220, 0x2207, 0x00AE, 0x00A9, 0x2122, 0x220F, 0x221A, 0x22C5, |
354 | 3.64M | 0x00AC, 0x2227, 0x2228, 0x21D4, 0x21D0, 0x21D1, 0x21D2, 0x21D3, |
355 | 3.64M | 0x25CA, 0x3008, 0x00AE, 0x00A9, 0x2122, 0x2211, 0x239B, 0x239C, |
356 | 3.64M | 0x239D, 0x23A1, 0x23A2, 0x23A3, 0x23A7, 0x23A8, 0x23A9, 0x23AA, |
357 | 3.64M | 0xF8FF, 0x3009, 0x222B, 0x2320, 0x23AE, 0x2321, 0x239E, 0x239F, |
358 | 3.64M | 0x23A0, 0x23A4, 0x23A5, 0x23A6, 0x23AB, 0x23AC, 0x23AD, 0x0020 // .. 0xFE |
359 | 3.64M | }; |
360 | | |
361 | 3.64M | if (!charset && !characters.empty()) |
362 | 629k | charset = getEncoding(&characters[0], characters.size()); |
363 | | |
364 | 3.64M | if (charset == 0x02) // SYMBOL |
365 | 1.20k | { |
366 | 1.20k | uint32_t ucs4Character = 0; |
367 | 1.20k | for (std::vector<unsigned char>::const_iterator iter = characters.begin(); |
368 | 5.45k | iter != characters.end(); ++iter) |
369 | 4.25k | { |
370 | 4.25k | if (*iter < 0x20) |
371 | 1.38k | ucs4Character = 0x20; |
372 | 2.86k | else |
373 | 2.86k | ucs4Character = symbolmap[*iter - 0x20]; |
374 | 4.25k | _appendUCS4(text, ucs4Character); |
375 | 4.25k | } |
376 | 1.20k | } |
377 | 3.64M | else |
378 | 3.64M | { |
379 | 3.64M | UErrorCode status = U_ZERO_ERROR; |
380 | 3.64M | UConverter *conv = nullptr; |
381 | 3.64M | switch (charset) |
382 | 3.64M | { |
383 | 613 | case 0x80: // SHIFTJIS |
384 | 613 | conv = ucnv_open("windows-932", &status); |
385 | 613 | break; |
386 | 868 | case 0x81: // HANGUL |
387 | 868 | conv = ucnv_open("windows-949", &status); |
388 | 868 | break; |
389 | 1.76k | case 0x86: // GB2312 |
390 | 1.76k | conv = ucnv_open("windows-936", &status); |
391 | 1.76k | break; |
392 | 2.31k | case 0x88: // CHINESEBIG5 |
393 | 2.31k | conv = ucnv_open("windows-950", &status); |
394 | 2.31k | break; |
395 | 1.55k | case 0xa1: // GREEEK |
396 | 1.55k | conv = ucnv_open("windows-1253", &status); |
397 | 1.55k | break; |
398 | 1.60k | case 0xa2: // TURKISH |
399 | 1.60k | conv = ucnv_open("windows-1254", &status); |
400 | 1.60k | break; |
401 | 1.68k | case 0xa3: // VIETNAMESE |
402 | 1.68k | conv = ucnv_open("windows-1258", &status); |
403 | 1.68k | break; |
404 | 1.43k | case 0xb1: // HEBREW |
405 | 1.43k | conv = ucnv_open("windows-1255", &status); |
406 | 1.43k | break; |
407 | 365 | case 0xb2: // ARABIC |
408 | 365 | conv = ucnv_open("windows-1256", &status); |
409 | 365 | break; |
410 | 214 | case 0xba: // BALTIC |
411 | 214 | conv = ucnv_open("windows-1257", &status); |
412 | 214 | break; |
413 | 5.22k | case 0xcc: // RUSSIAN |
414 | 5.22k | conv = ucnv_open("windows-1251", &status); |
415 | 5.22k | break; |
416 | 348 | case 0xde: // THAI |
417 | 348 | conv = ucnv_open("windows-874", &status); |
418 | 348 | break; |
419 | 13.7k | case 0xee: // CENTRAL EUROPE |
420 | 13.7k | conv = ucnv_open("windows-1250", &status); |
421 | 13.7k | break; |
422 | 3.61M | default: |
423 | 3.61M | conv = ucnv_open("windows-1252", &status); |
424 | 3.61M | break; |
425 | 3.64M | } |
426 | 3.64M | if (U_SUCCESS(status) && conv) |
427 | 3.64M | { |
428 | 3.64M | const auto *src = (const char *)&characters[0]; |
429 | 3.64M | const char *srcLimit = (const char *)src + characters.size(); |
430 | 16.1M | while (src < srcLimit) |
431 | 12.4M | { |
432 | 12.4M | UChar32 ucs4Character = ucnv_getNextUChar(conv, &src, srcLimit, &status); |
433 | 12.4M | if (U_SUCCESS(status) && U_IS_UNICODE_CHAR(ucs4Character)) |
434 | 12.4M | _appendUCS4(text, ucs4Character); |
435 | 12.4M | } |
436 | 3.64M | } |
437 | 3.64M | if (conv) |
438 | 3.64M | ucnv_close(conv); |
439 | 3.64M | } |
440 | 3.64M | } |
441 | | |
442 | | void libcdr::appendCharacters(librevenge::RVNGString &text, std::vector<unsigned char> characters) |
443 | 1.43M | { |
444 | 1.43M | if (characters.empty()) |
445 | 11.4k | return; |
446 | | |
447 | 1.42M | UErrorCode status = U_ZERO_ERROR; |
448 | 1.42M | UConverter *conv = ucnv_open("UTF-16LE", &status); |
449 | | |
450 | 1.42M | if (U_SUCCESS(status) && conv) |
451 | 1.42M | { |
452 | 1.42M | const auto *src = (const char *)&characters[0]; |
453 | 1.42M | const char *srcLimit = (const char *)src + characters.size(); |
454 | 39.8M | while (src < srcLimit) |
455 | 38.4M | { |
456 | 38.4M | UChar32 ucs4Character = ucnv_getNextUChar(conv, &src, srcLimit, &status); |
457 | 38.4M | if (U_SUCCESS(status) && U_IS_UNICODE_CHAR(ucs4Character)) |
458 | 37.9M | _appendUCS4(text, ucs4Character); |
459 | 38.4M | } |
460 | 1.42M | } |
461 | 1.42M | if (conv) |
462 | 1.42M | ucnv_close(conv); |
463 | 1.42M | } |
464 | | |
465 | | void libcdr::appendUTF8Characters(librevenge::RVNGString &text, std::vector<unsigned char> characters) |
466 | 33.3k | { |
467 | 33.3k | if (characters.empty()) |
468 | 24.3k | return; |
469 | | |
470 | 210M | for (std::vector<unsigned char>::const_iterator iter = characters.begin(); iter != characters.end(); ++iter) |
471 | 210M | text.append((char)*iter); |
472 | 9.03k | } |
473 | | |
474 | | #ifdef DEBUG |
475 | | |
476 | | void libcdr::debugPrint(const char *const format, ...) |
477 | | { |
478 | | va_list args; |
479 | | va_start(args, format); |
480 | | std::vfprintf(stderr, format, args); |
481 | | va_end(args); |
482 | | } |
483 | | |
484 | | const char *libcdr::toFourCC(unsigned value, bool bigEndian) |
485 | | { |
486 | | static char sValue[5] = { 0, 0, 0, 0, 0 }; |
487 | | if (bigEndian) |
488 | | { |
489 | | sValue[3] = (char)(value & 0xff); |
490 | | sValue[2] = (char)((value & 0xff00) >> 8); |
491 | | sValue[1] = (char)((value & 0xff0000) >> 16); |
492 | | sValue[0] = (char)((value & 0xff000000) >> 24); |
493 | | } |
494 | | else |
495 | | { |
496 | | sValue[0] = (char)(value & 0xff); |
497 | | sValue[1] = (char)((value & 0xff00) >> 8); |
498 | | sValue[2] = (char)((value & 0xff0000) >> 16); |
499 | | sValue[3] = (char)((value & 0xff000000) >> 24); |
500 | | } |
501 | | return sValue; |
502 | | } |
503 | | #endif |
504 | | |
505 | | /* vim:set shiftwidth=2 softtabstop=2 expandtab: */ |