/src/dcmtk/ofstd/libsrc/ofchrenc.cc
Line | Count | Source |
1 | | /* |
2 | | * |
3 | | * Copyright (C) 2011-2024, OFFIS e.V. |
4 | | * All rights reserved. See COPYRIGHT file for details. |
5 | | * |
6 | | * This software and supporting documentation were developed by |
7 | | * |
8 | | * OFFIS e.V. |
9 | | * R&D Division Health |
10 | | * Escherweg 2 |
11 | | * D-26121 Oldenburg, Germany |
12 | | * |
13 | | * |
14 | | * Module: ofstd |
15 | | * |
16 | | * Author: Joerg Riesmeier, Jan Schlamelcher |
17 | | * |
18 | | * Purpose: Class for character encoding conversion (Source) |
19 | | * |
20 | | */ |
21 | | |
22 | | |
23 | | #include "dcmtk/config/osconfig.h" |
24 | | |
25 | | #include "dcmtk/ofstd/ofchrenc.h" |
26 | | #include "dcmtk/ofstd/ofstd.h" |
27 | | #include "dcmtk/ofstd/ofdiag.h" |
28 | | #include "dcmtk/ofstd/ofconsol.h" |
29 | | #include <cerrno> |
30 | | |
31 | | #ifdef HAVE_WINDOWS_H |
32 | | #define WIN32_LEAN_AND_MEAN |
33 | | #include <windows.h> |
34 | | |
35 | | /*-------------* |
36 | | * constants * |
37 | | *-------------*/ |
38 | | |
39 | | // Windows-specific code page identifiers |
40 | | const unsigned int OFCharacterEncoding::CPC_ANSI = CP_ACP; |
41 | | const unsigned int OFCharacterEncoding::CPC_OEM = CP_OEMCP; |
42 | | const unsigned int OFCharacterEncoding::CPC_ASCII = 20127; |
43 | | const unsigned int OFCharacterEncoding::CPC_Latin1 = 28591; |
44 | | const unsigned int OFCharacterEncoding::CPC_UTF8 = CP_UTF8; |
45 | | #endif |
46 | | |
47 | | /*------------------* |
48 | | * implementation * |
49 | | *------------------*/ |
50 | | |
51 | | #ifdef DCMTK_ENABLE_CHARSET_CONVERSION |
52 | | |
53 | | #if DCMTK_ENABLE_CHARSET_CONVERSION == DCMTK_CHARSET_CONVERSION_ICONV ||\ |
54 | | DCMTK_ENABLE_CHARSET_CONVERSION == DCMTK_CHARSET_CONVERSION_OFICONV ||\ |
55 | | DCMTK_ENABLE_CHARSET_CONVERSION == DCMTK_CHARSET_CONVERSION_STDLIBC_ICONV |
56 | | |
57 | | #if DCMTK_ENABLE_CHARSET_CONVERSION == DCMTK_CHARSET_CONVERSION_OFICONV |
58 | | |
59 | | #include "dcmtk/oficonv/iconv.h" |
60 | | |
61 | | // helper class for cleanup up oficonv at application exit |
62 | | class OFiconvCleanupHelper |
63 | | { |
64 | | public: |
65 | | ~OFiconvCleanupHelper() |
66 | 0 | { |
67 | 0 | OFiconv_cleanup(); |
68 | 0 | } |
69 | | }; |
70 | | |
71 | | // global helper object that will clean up the oficonv csmapper area buffer |
72 | | // at application exit and thus avoid reports about memory leaks |
73 | | OFiconvCleanupHelper cleanupHelper; |
74 | | |
75 | | #undef LIBICONV_SECOND_ARGUMENT_CONST |
76 | 0 | #define iconv_open OFiconv_open |
77 | 0 | #define iconv_close OFiconv_close |
78 | 0 | #define iconv OFiconv |
79 | 0 | #define iconvctl OFiconvctl |
80 | | |
81 | | #ifndef WITH_LIBICONV |
82 | | #define WITH_LIBICONV |
83 | | #endif |
84 | | |
85 | | #else /* DCMTK_ENABLE_CHARSET_CONVERSION == DCMTK_CHARSET_CONVERSION_OFICONV */ |
86 | | |
87 | | #include <iconv.h> |
88 | | #ifdef WITH_LIBICONV |
89 | | #include <localcharset.h> |
90 | | #endif |
91 | | #ifdef __GLIBC__ |
92 | | #include <langinfo.h> // nl_langinfo / CODESET |
93 | | #endif |
94 | | |
95 | | #endif /* DCMTK_ENABLE_CHARSET_CONVERSION == DCMTK_CHARSET_CONVERSION_OFICONV */ |
96 | | |
97 | | |
98 | 0 | #define ILLEGAL_DESCRIPTOR OFreinterpret_cast(iconv_t, -1) |
99 | 0 | #define CONVERSION_ERROR OFstatic_cast(size_t, -1) |
100 | | #define CONVERSION_BUFFER_SIZE 1024 |
101 | | |
102 | | class OFCharacterEncoding::Implementation |
103 | | { |
104 | | |
105 | | public: |
106 | | |
107 | | static Implementation* create(const OFString& fromEncoding, |
108 | | const OFString& toEncoding, |
109 | | OFCondition& result) |
110 | 0 | { |
111 | 0 | iconv_t descriptor = ::iconv_open(toEncoding.c_str(), fromEncoding.c_str()); |
112 | 0 | if (descriptor == ILLEGAL_DESCRIPTOR) |
113 | 0 | { |
114 | | // return an appropriate error message |
115 | 0 | createErrnoCondition(result, "Cannot open character encoding: ", EC_CODE_CannotOpenEncoding); |
116 | 0 | return OFnullptr; |
117 | 0 | } |
118 | 0 | #if DCMTK_ENABLE_CHARSET_CONVERSION == DCMTK_CHARSET_CONVERSION_OFICONV |
119 | | // GNU libiconv does not perform transliteration by default. We emulate this when using oficonv. |
120 | 0 | int flag1 = 1; |
121 | 0 | (void) ::iconvctl(descriptor, ICONV_SET_ILSEQ_INVALID, &flag1); |
122 | 0 | #endif |
123 | 0 | if (Implementation* pImplementation = new Implementation(descriptor)) |
124 | 0 | { |
125 | 0 | result = EC_Normal; |
126 | 0 | return pImplementation; |
127 | 0 | } |
128 | 0 | result = EC_MemoryExhausted; |
129 | 0 | return OFnullptr; |
130 | 0 | } |
131 | | |
132 | | static OFString getVersionString() |
133 | 0 | { |
134 | 0 | #if DCMTK_ENABLE_CHARSET_CONVERSION == DCMTK_CHARSET_CONVERSION_OFICONV |
135 | 0 | return "Citrus iconv, Version FreeBSD 13.1 (modified)" |
136 | | #ifdef DCMTK_ENABLE_BUILTIN_OFICONV_DATA |
137 | | ", with built-in data files" |
138 | | #endif |
139 | 0 | ; |
140 | | #elif defined(WITH_LIBICONV) |
141 | | OFString versionStr = "LIBICONV, Version "; |
142 | | char buf[10]; |
143 | | // extract major and minor version number |
144 | | OFStandard::snprintf(buf, sizeof(buf), "%i.%i", (_LIBICONV_VERSION >> 8), (_LIBICONV_VERSION & 0xff)); |
145 | | versionStr.append(buf); |
146 | | return versionStr; |
147 | | #elif defined(__GLIBC__) |
148 | | OFOStringStream oss; |
149 | | oss << "GNU C library (iconv), version " |
150 | | << __GLIBC__ << '.' |
151 | | << __GLIBC_MINOR__; |
152 | | OFSTRINGSTREAM_GETOFSTRING(oss, version); |
153 | | return version; |
154 | | #else |
155 | | return "Unknown C library (iconv)"; |
156 | | #endif |
157 | 0 | } |
158 | | |
159 | | static OFString getLocaleEncoding() |
160 | 0 | { |
161 | 0 | #if DCMTK_ENABLE_CHARSET_CONVERSION == DCMTK_CHARSET_CONVERSION_OFICONV |
162 | 0 | iconv_locale_allocation_t buf; |
163 | 0 | return OFSTRING_GUARD(::OFlocale_charset(&buf)); |
164 | | #elif defined(WITH_LIBICONV) |
165 | | // basically, the function below should always return a non-empty string |
166 | | // but older versions of libiconv might return NULL in certain cases |
167 | | return OFSTRING_GUARD(::locale_charset()); |
168 | | #elif defined(__GLIBC__) |
169 | | const char *oldlocale = setlocale(LC_ALL, ""); |
170 | | const char *codeset = nl_langinfo (CODESET); |
171 | | setlocale(LC_ALL, oldlocale); |
172 | | return OFSTRING_GUARD(codeset); |
173 | | #else |
174 | | return OFString(); |
175 | | #endif |
176 | 0 | } |
177 | | |
178 | | static OFBool supportsConversionFlags(const unsigned flags) |
179 | 0 | { |
180 | 0 | #if defined(WITH_LIBICONV) && _LIBICONV_VERSION >= 0x0108 |
181 | 0 | return flags == AbortTranscodingOnIllegalSequence |
182 | 0 | || flags == DiscardIllegalSequences |
183 | 0 | || flags == TransliterateIllegalSequences |
184 | 0 | || flags == (DiscardIllegalSequences | TransliterateIllegalSequences) |
185 | 0 | ; |
186 | | #elif defined DCMTK_FIXED_ICONV_CONVERSION_FLAGS |
187 | | // the iconvctl function is implemented only in newer versions of the |
188 | | // GNU libiconv and not in other iconv implementations. For instance, |
189 | | // the iconv implementation in the C standard library does not support |
190 | | // different encoding flags and only has a (varying) fixed functionality |
191 | | // that we detect with a configuration test. |
192 | | return flags == DCMTK_FIXED_ICONV_CONVERSION_FLAGS; |
193 | | #else |
194 | | return OFFalse; |
195 | | #endif |
196 | 0 | } |
197 | | |
198 | | unsigned getConversionFlags() const |
199 | 0 | { |
200 | 0 | #if defined(WITH_LIBICONV) && _LIBICONV_VERSION >= 0x0108 |
201 | 0 | unsigned result = 0; |
202 | 0 | int flag; |
203 | 0 | #if DCMTK_ENABLE_CHARSET_CONVERSION == DCMTK_CHARSET_CONVERSION_OFICONV |
204 | 0 | if (::iconvctl(ConversionDescriptor, ICONV_GET_ILSEQ_INVALID, &flag)) |
205 | 0 | return 0; |
206 | 0 | if (flag == 0) |
207 | 0 | result |= TransliterateIllegalSequences; |
208 | | #else |
209 | | if (::iconvctl(ConversionDescriptor, ICONV_GET_TRANSLITERATE, &flag)) |
210 | | return 0; |
211 | | if (flag) |
212 | | result |= TransliterateIllegalSequences; |
213 | | #endif |
214 | 0 | if (::iconvctl(ConversionDescriptor, ICONV_GET_DISCARD_ILSEQ, &flag)) |
215 | 0 | return 0; |
216 | 0 | if (flag) |
217 | 0 | result |= DiscardIllegalSequences; |
218 | 0 | if (result) |
219 | 0 | return result; |
220 | | // no flags set, so return libiconv default (=abort) |
221 | 0 | return AbortTranscodingOnIllegalSequence; |
222 | | #elif defined DCMTK_FIXED_ICONV_CONVERSION_FLAGS |
223 | | // the iconvctl function is implemented only in newer versions of the |
224 | | // GNU libiconv and not in other iconv implementations. For instance, |
225 | | // the iconv implementation in the C standard library does not support |
226 | | // different encoding flags and only has a (varying) fixed functionality |
227 | | // that we detect with a configuration test. |
228 | | return DCMTK_FIXED_ICONV_CONVERSION_FLAGS; |
229 | | #else |
230 | | return 0; |
231 | | #endif |
232 | 0 | } |
233 | | |
234 | | OFBool setConversionFlags(const unsigned flags) |
235 | 0 | { |
236 | 0 | #if defined(WITH_LIBICONV) && _LIBICONV_VERSION >= 0x0108 |
237 | 0 | int flag0 = 0; |
238 | 0 | int flag1 = 1; |
239 | 0 | switch (flags) |
240 | 0 | { |
241 | 0 | case AbortTranscodingOnIllegalSequence: |
242 | 0 | #if DCMTK_ENABLE_CHARSET_CONVERSION == DCMTK_CHARSET_CONVERSION_OFICONV |
243 | 0 | if (::iconvctl(ConversionDescriptor, ICONV_SET_DISCARD_ILSEQ, &flag0)) |
244 | 0 | return OFFalse; |
245 | 0 | if (::iconvctl(ConversionDescriptor, ICONV_SET_ILSEQ_INVALID, &flag1)) |
246 | 0 | return OFFalse; |
247 | | #else |
248 | | if (::iconvctl(ConversionDescriptor, ICONV_SET_DISCARD_ILSEQ, &flag0)) |
249 | | return OFFalse; |
250 | | if (::iconvctl(ConversionDescriptor, ICONV_SET_TRANSLITERATE, &flag0)) |
251 | | return OFFalse; |
252 | | #endif |
253 | 0 | return OFTrue; |
254 | 0 | case DiscardIllegalSequences: |
255 | 0 | #if DCMTK_ENABLE_CHARSET_CONVERSION == DCMTK_CHARSET_CONVERSION_OFICONV |
256 | 0 | if (::iconvctl(ConversionDescriptor, ICONV_SET_DISCARD_ILSEQ, &flag1)) |
257 | 0 | return OFFalse; |
258 | 0 | if (::iconvctl(ConversionDescriptor, ICONV_SET_ILSEQ_INVALID, &flag0)) |
259 | 0 | return OFFalse; |
260 | | #else |
261 | | if (::iconvctl(ConversionDescriptor, ICONV_SET_DISCARD_ILSEQ, &flag1)) |
262 | | return OFFalse; |
263 | | if (::iconvctl(ConversionDescriptor, ICONV_SET_TRANSLITERATE, &flag0)) |
264 | | return OFFalse; |
265 | | #endif |
266 | 0 | return OFTrue; |
267 | 0 | case TransliterateIllegalSequences: |
268 | 0 | #if DCMTK_ENABLE_CHARSET_CONVERSION == DCMTK_CHARSET_CONVERSION_OFICONV |
269 | 0 | if (::iconvctl(ConversionDescriptor, ICONV_SET_DISCARD_ILSEQ, &flag0)) |
270 | 0 | return OFFalse; |
271 | 0 | if (::iconvctl(ConversionDescriptor, ICONV_SET_ILSEQ_INVALID, &flag0)) |
272 | 0 | return OFFalse; |
273 | | #else |
274 | | if (::iconvctl(ConversionDescriptor, ICONV_SET_DISCARD_ILSEQ, &flag0)) |
275 | | return OFFalse; |
276 | | if (::iconvctl(ConversionDescriptor, ICONV_SET_TRANSLITERATE, &flag1)) |
277 | | return OFFalse; |
278 | | #endif |
279 | 0 | return OFTrue; |
280 | 0 | case (TransliterateIllegalSequences | DiscardIllegalSequences): |
281 | 0 | #if DCMTK_ENABLE_CHARSET_CONVERSION == DCMTK_CHARSET_CONVERSION_OFICONV |
282 | 0 | if (::iconvctl(ConversionDescriptor, ICONV_SET_DISCARD_ILSEQ, &flag1)) |
283 | 0 | return OFFalse; |
284 | 0 | if (::iconvctl(ConversionDescriptor, ICONV_SET_ILSEQ_INVALID, &flag0)) |
285 | 0 | return OFFalse; |
286 | | #else |
287 | | if (::iconvctl(ConversionDescriptor, ICONV_SET_DISCARD_ILSEQ, &flag1)) |
288 | | return OFFalse; |
289 | | if (::iconvctl(ConversionDescriptor, ICONV_SET_TRANSLITERATE, &flag1)) |
290 | | return OFFalse; |
291 | | #endif |
292 | 0 | return OFTrue; |
293 | 0 | default: |
294 | 0 | return OFFalse; |
295 | 0 | } |
296 | | #elif defined DCMTK_FIXED_ICONV_CONVERSION_FLAGS |
297 | | // the iconvctl function is implemented only in newer versions of the |
298 | | // GNU libiconv and not in other iconv implementations. For instance, |
299 | | // the iconv implementation in the C standard library does not support |
300 | | // different encoding flags and only has a (varying) fixed functionality |
301 | | // that we detect with a configuration test. |
302 | | return flags == DCMTK_FIXED_ICONV_CONVERSION_FLAGS; |
303 | | #else |
304 | | return OFFalse; |
305 | | #endif |
306 | 0 | } |
307 | | |
308 | | |
309 | | OFCondition convert(OFString& toString, |
310 | | const char* fromString, |
311 | | const size_t fromLength) |
312 | 0 | { |
313 | 0 | OFCondition status = EC_Normal; |
314 | | // if the input string is empty or NULL, we are done |
315 | 0 | if (status.good() && (fromString != NULL) && (fromLength > 0)) |
316 | 0 | { |
317 | | #ifdef LIBICONV_SECOND_ARGUMENT_CONST |
318 | | const char *inputPos = fromString; |
319 | | #else |
320 | 0 | char *inputPos = OFconst_cast(char *, fromString); |
321 | 0 | #endif |
322 | 0 | size_t inputLeft = fromLength; |
323 | | // set the conversion descriptor to the initial state |
324 | 0 | ::iconv(ConversionDescriptor, NULL, NULL, NULL, NULL); |
325 | | // iterate as long as there are characters to be converted |
326 | 0 | while (inputLeft > 0) |
327 | 0 | { |
328 | 0 | char buffer[CONVERSION_BUFFER_SIZE]; |
329 | 0 | char *bufferPos = buffer; |
330 | 0 | const size_t bufferLength = sizeof(buffer); |
331 | 0 | size_t bufferLeft = bufferLength; |
332 | | // convert the current block of the given string to the selected character encoding |
333 | 0 | if (::iconv(ConversionDescriptor, &inputPos, &inputLeft, &bufferPos, &bufferLeft) == CONVERSION_ERROR) |
334 | 0 | { |
335 | | // check whether the output buffer was too small for the next converted character |
336 | | // (also make sure that the output buffer has been filled to avoid an endless loop) |
337 | 0 | if ((errno != E2BIG) || (bufferLeft == bufferLength)) |
338 | 0 | { |
339 | | // if the conversion was unsuccessful, return with an appropriate error message |
340 | 0 | createErrnoCondition(status, "Cannot convert character encoding: ", |
341 | 0 | EC_CODE_CannotConvertEncoding); |
342 | 0 | break; |
343 | 0 | } |
344 | 0 | } |
345 | | // append the converted character string to the result variable |
346 | 0 | toString.append(buffer, bufferLength - bufferLeft); |
347 | 0 | } |
348 | 0 | } |
349 | 0 | return status; |
350 | 0 | } |
351 | | |
352 | | ~Implementation() |
353 | 0 | { |
354 | | // try to close given descriptor and check whether it worked |
355 | 0 | if (::iconv_close(ConversionDescriptor) == -1) |
356 | 0 | { |
357 | 0 | char errBuf[256]; |
358 | 0 | CERR << "Cannot close character encoding: " |
359 | 0 | << OFStandard::strerror(errno, errBuf, sizeof(errBuf)) |
360 | 0 | << OFendl; |
361 | 0 | } |
362 | 0 | } |
363 | | |
364 | | private: |
365 | | |
366 | | #include DCMTK_DIAGNOSTIC_PUSH |
367 | | #include DCMTK_DIAGNOSTIC_IGNORE_SHADOW |
368 | | Implementation(iconv_t ConversionDescriptor) |
369 | 0 | : ConversionDescriptor(ConversionDescriptor) |
370 | 0 | { |
371 | |
|
372 | 0 | } |
373 | | #include DCMTK_DIAGNOSTIC_POP |
374 | | |
375 | | static void createErrnoCondition(OFCondition &status, |
376 | | OFString message, |
377 | | const unsigned short code) |
378 | 0 | { |
379 | 0 | char errBuf[256]; |
380 | 0 | message.append(OFStandard::strerror(errno, errBuf, sizeof(errBuf))); |
381 | 0 | status = makeOFCondition(0, code, OF_error, message.c_str()); |
382 | 0 | } |
383 | | |
384 | | iconv_t ConversionDescriptor; |
385 | | }; |
386 | | |
387 | | #endif // ICONV |
388 | | |
389 | | #else // DCMTK_ENABLE_CHARSET_CONVERSION |
390 | | |
391 | | // for suppressing unnecessary warnings |
392 | | class OFCharacterEncoding::Implementation {}; |
393 | | |
394 | | #endif // NOT DCMTK_ENABLE_CHARSET_CONVERSION |
395 | | |
396 | | |
397 | | OFBool OFCharacterEncoding::isLibraryAvailable() |
398 | 0 | { |
399 | 0 | #ifdef DCMTK_ENABLE_CHARSET_CONVERSION |
400 | 0 | return OFTrue; |
401 | | #else |
402 | | return OFFalse; |
403 | | #endif |
404 | 0 | } |
405 | | |
406 | | |
407 | | OFString OFCharacterEncoding::getLibraryVersionString() |
408 | 0 | { |
409 | 0 | #ifdef DCMTK_ENABLE_CHARSET_CONVERSION |
410 | 0 | return Implementation::getVersionString(); |
411 | | #else |
412 | | return "<no character encoding library available>"; |
413 | | #endif |
414 | 0 | } |
415 | | |
416 | | |
417 | | size_t OFCharacterEncoding::countCharactersInUTF8String(const OFString &utf8String) |
418 | 0 | { |
419 | 0 | const size_t length = utf8String.length(); |
420 | 0 | size_t count = 0; |
421 | | // iterate over all bytes and count start of UTF-8 characters |
422 | 0 | for (size_t i = 0; i < length; i++) |
423 | 0 | { |
424 | 0 | if ((utf8String.at(i) & 0xc0) != 0x80) |
425 | 0 | count++; |
426 | 0 | } |
427 | 0 | return count; |
428 | 0 | } |
429 | | |
430 | | OFBool OFCharacterEncoding::hasDefaultEncoding() |
431 | 0 | { |
432 | 0 | #if defined(DCMTK_ENABLE_CHARSET_CONVERSION) &&\ |
433 | 0 | (\ |
434 | 0 | DCMTK_ENABLE_CHARSET_CONVERSION != DCMTK_CHARSET_CONVERSION_STDLIBC_ICONV\ |
435 | 0 | || DCMTK_STDLIBC_ICONV_HAS_DEFAULT_ENCODING\ |
436 | 0 | ) |
437 | 0 | return OFTrue; |
438 | | #else |
439 | | return OFFalse; |
440 | | #endif |
441 | 0 | } |
442 | | |
443 | | OFString OFCharacterEncoding::getLocaleEncoding() |
444 | 0 | { |
445 | 0 | #ifdef DCMTK_ENABLE_CHARSET_CONVERSION |
446 | 0 | return Implementation::getLocaleEncoding(); |
447 | | #else |
448 | | return OFString(); |
449 | | #endif |
450 | 0 | } |
451 | | |
452 | | |
453 | | #ifdef DCMTK_ENABLE_CHARSET_CONVERSION |
454 | | OFBool OFCharacterEncoding::supportsConversionFlags(const unsigned flags) |
455 | 0 | { |
456 | 0 | return Implementation::supportsConversionFlags(flags); |
457 | 0 | } |
458 | | #else |
459 | | OFBool OFCharacterEncoding::supportsConversionFlags(const unsigned /* flags */) |
460 | | { |
461 | | return OFFalse; |
462 | | } |
463 | | #endif |
464 | | |
465 | | |
466 | | OFCharacterEncoding::OFCharacterEncoding() |
467 | 0 | : TheImplementation() |
468 | 0 | { |
469 | |
|
470 | 0 | } |
471 | | |
472 | | |
473 | | OFCharacterEncoding::OFCharacterEncoding(const OFCharacterEncoding& rhs) |
474 | 0 | : TheImplementation(rhs.TheImplementation) |
475 | 0 | { |
476 | |
|
477 | 0 | } |
478 | | |
479 | | |
480 | | OFCharacterEncoding::~OFCharacterEncoding() |
481 | 0 | { |
482 | |
|
483 | 0 | } |
484 | | |
485 | | |
486 | | OFCharacterEncoding& OFCharacterEncoding::operator=(const OFCharacterEncoding& rhs) |
487 | 0 | { |
488 | 0 | TheImplementation = rhs.TheImplementation; |
489 | 0 | return *this; |
490 | 0 | } |
491 | | |
492 | | |
493 | | OFCharacterEncoding::operator OFBool() const |
494 | 0 | { |
495 | 0 | return OFstatic_cast(OFBool, TheImplementation); |
496 | 0 | } |
497 | | |
498 | | |
499 | | OFBool OFCharacterEncoding::operator!() const |
500 | 0 | { |
501 | 0 | return !TheImplementation; |
502 | 0 | } |
503 | | |
504 | | |
505 | | OFBool OFCharacterEncoding::operator==(const OFCharacterEncoding& rhs) const |
506 | 0 | { |
507 | 0 | return TheImplementation == rhs.TheImplementation; |
508 | 0 | } |
509 | | |
510 | | OFBool OFCharacterEncoding::operator!=(const OFCharacterEncoding& rhs) const |
511 | 0 | { |
512 | 0 | return TheImplementation != rhs.TheImplementation; |
513 | 0 | } |
514 | | |
515 | | |
516 | | void OFCharacterEncoding::clear() |
517 | 0 | { |
518 | 0 | #ifdef DCMTK_ENABLE_CHARSET_CONVERSION |
519 | 0 | TheImplementation.reset(); |
520 | 0 | #endif |
521 | 0 | } |
522 | | |
523 | | |
524 | | unsigned OFCharacterEncoding::getConversionFlags() const |
525 | 0 | { |
526 | 0 | #ifdef DCMTK_ENABLE_CHARSET_CONVERSION |
527 | 0 | if (TheImplementation) |
528 | 0 | return TheImplementation->getConversionFlags(); |
529 | 0 | #endif |
530 | 0 | return 0; |
531 | 0 | } |
532 | | |
533 | | |
534 | | #ifdef DCMTK_ENABLE_CHARSET_CONVERSION |
535 | | OFCondition OFCharacterEncoding::setConversionFlags(const unsigned flags) |
536 | 0 | { |
537 | 0 | if (TheImplementation) |
538 | 0 | { |
539 | 0 | if (TheImplementation->setConversionFlags(flags)) |
540 | 0 | return EC_Normal; |
541 | 0 | return makeOFCondition(0, EC_CODE_CannotControlConverter, OF_error, |
542 | 0 | "Conversion flags not supported by the underlying implementation"); |
543 | 0 | } |
544 | 0 | return EC_NoEncodingSelected; |
545 | 0 | } |
546 | | #else |
547 | | OFCondition OFCharacterEncoding::setConversionFlags(const unsigned /* flags */) |
548 | | { |
549 | | return EC_NoEncodingLibrary; |
550 | | } |
551 | | #endif |
552 | | |
553 | | |
554 | | #ifdef DCMTK_ENABLE_CHARSET_CONVERSION |
555 | | OFCondition OFCharacterEncoding::selectEncoding(const OFString &fromEncoding, |
556 | | const OFString &toEncoding) |
557 | 0 | { |
558 | 0 | OFCondition result; |
559 | 0 | TheImplementation.reset(Implementation::create(fromEncoding, toEncoding, result)); |
560 | 0 | return result; |
561 | 0 | } |
562 | | #else |
563 | | OFCondition OFCharacterEncoding::selectEncoding(const OFString & /* fromEncoding */, |
564 | | const OFString & /* toEncoding */) |
565 | | { |
566 | | return EC_NoEncodingLibrary; |
567 | | } |
568 | | #endif |
569 | | |
570 | | |
571 | | #ifdef DCMTK_ENABLE_CHARSET_CONVERSION |
572 | | OFCondition OFCharacterEncoding::convertString(const OFString &fromString, |
573 | | OFString &toString, |
574 | | const OFBool clearMode) |
575 | 0 | { |
576 | 0 | return convertString(fromString.c_str(), fromString.length(), toString, clearMode); |
577 | 0 | } |
578 | | #else |
579 | | OFCondition OFCharacterEncoding::convertString(const OFString & /* fromString */, |
580 | | OFString & /* toString */, |
581 | | const OFBool /* clearMode */) |
582 | | { |
583 | | return EC_NoEncodingLibrary; |
584 | | } |
585 | | #endif |
586 | | |
587 | | |
588 | | #ifdef DCMTK_ENABLE_CHARSET_CONVERSION |
589 | | OFCondition OFCharacterEncoding::convertString(const char *fromString, |
590 | | const size_t fromLength, |
591 | | OFString &toString, |
592 | | const OFBool clearMode) |
593 | 0 | { |
594 | 0 | if (TheImplementation) |
595 | 0 | { |
596 | | // first, clear result variable if requested |
597 | 0 | if (clearMode) |
598 | 0 | toString.clear(); |
599 | 0 | return TheImplementation->convert(toString, fromString, fromLength); |
600 | 0 | } |
601 | 0 | return EC_NoEncodingSelected; |
602 | 0 | } |
603 | | #else |
604 | | OFCondition OFCharacterEncoding::convertString(const char * /* fromString */, |
605 | | const size_t /* fromLength */, |
606 | | OFString & /* toString */, |
607 | | const OFBool /* clearMode */) |
608 | | { |
609 | | return EC_NoEncodingLibrary; |
610 | | } |
611 | | #endif |
612 | | |
613 | | |
614 | | #ifdef HAVE_WINDOWS_H // Windows-specific conversion functions |
615 | | |
616 | | OFCondition OFCharacterEncoding::convertFromWideCharString(const wchar_t *fromString, |
617 | | const size_t fromLength, |
618 | | OFString &toString, |
619 | | const unsigned int codePage, |
620 | | const OFBool clearMode) |
621 | | { |
622 | | // first, clear result variable if requested |
623 | | if (clearMode) |
624 | | toString.clear(); |
625 | | OFCondition status = EC_Normal; |
626 | | // check for empty string |
627 | | if ((fromString != NULL) && (fromLength > 0)) |
628 | | { |
629 | | // determine required size for output buffer |
630 | | const int sizeNeeded = WideCharToMultiByte(codePage, 0, fromString, OFstatic_cast(int, fromLength), NULL, 0, NULL, NULL); |
631 | | if (sizeNeeded > 0) |
632 | | { |
633 | | // allocate temporary buffer |
634 | | char *toBuffer = new char[sizeNeeded]; |
635 | | if (toBuffer != NULL) |
636 | | { |
637 | | // convert characters (without trailing NULL byte) |
638 | | const int charsConverted = WideCharToMultiByte(codePage, 0, fromString, OFstatic_cast(int, fromLength), toBuffer, sizeNeeded, NULL, NULL); |
639 | | if (charsConverted > 0) |
640 | | { |
641 | | // append the converted character string to the result variable |
642 | | toString.append(toBuffer, charsConverted); |
643 | | } else { |
644 | | // if conversion failed, create appropriate condition text |
645 | | createGetLastErrorCondition(status, "Cannot convert character encoding: ", EC_CODE_CannotConvertEncoding); |
646 | | } |
647 | | delete[] toBuffer; |
648 | | } else { |
649 | | // output buffer could not be allocated |
650 | | status = EC_MemoryExhausted; |
651 | | } |
652 | | } |
653 | | } |
654 | | return status; |
655 | | } |
656 | | |
657 | | |
658 | | OFCondition OFCharacterEncoding::convertToWideCharString(const OFString &fromString, |
659 | | wchar_t *&toString, |
660 | | size_t &toLength, |
661 | | const unsigned int codePage) |
662 | | { |
663 | | // call the real method converting the given string |
664 | | return OFCharacterEncoding::convertToWideCharString(fromString.c_str(), fromString.length(), |
665 | | toString, toLength, codePage); |
666 | | } |
667 | | |
668 | | |
669 | | OFCondition OFCharacterEncoding::convertToWideCharString(const char *fromString, |
670 | | const size_t fromLength, |
671 | | wchar_t *&toString, |
672 | | size_t &toLength, |
673 | | const unsigned int codePage) |
674 | | { |
675 | | OFCondition status = EC_Normal; |
676 | | // check for empty string |
677 | | if ((fromString != NULL) && (fromLength > 0)) |
678 | | { |
679 | | // determine required size for output buffer |
680 | | const int sizeNeeded = MultiByteToWideChar(codePage, 0, fromString, OFstatic_cast(int, fromLength), NULL, 0); |
681 | | // allocate output buffer (one extra byte for the terminating NULL) |
682 | | toString = new wchar_t[sizeNeeded + 1]; |
683 | | if (toString != NULL) |
684 | | { |
685 | | // convert characters (without trailing NULL byte) |
686 | | toLength = MultiByteToWideChar(codePage, 0, fromString, OFstatic_cast(int, fromLength), toString, sizeNeeded); |
687 | | // append NULL byte to mark "end of string" |
688 | | toString[toLength] = L'\0'; |
689 | | if (toLength == 0) |
690 | | { |
691 | | // if conversion failed, create appropriate condition text |
692 | | createGetLastErrorCondition(status, "Cannot convert character encoding: ", EC_CODE_CannotConvertEncoding); |
693 | | } |
694 | | } else { |
695 | | // output buffer could not be allocated |
696 | | status = EC_MemoryExhausted; |
697 | | } |
698 | | } else { |
699 | | // create an empty string (should never fail) |
700 | | toString = new wchar_t[1]; |
701 | | toString[0] = L'\0'; |
702 | | toLength = 0; |
703 | | } |
704 | | return status; |
705 | | } |
706 | | |
707 | | |
708 | | void OFCharacterEncoding::createGetLastErrorCondition(OFCondition &status, |
709 | | OFString message, |
710 | | const unsigned short code) |
711 | | { |
712 | | LPVOID errBuf = NULL; |
713 | | // obtain an error string from system error code |
714 | | if (FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, |
715 | | NULL, GetLastError(), MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), OFreinterpret_cast(LPTSTR, &errBuf), 0, NULL) > 0) |
716 | | { |
717 | | message.append(OFstatic_cast(const char *, errBuf)); |
718 | | } else |
719 | | message.append("unknown error code"); |
720 | | LocalFree(errBuf); |
721 | | status = makeOFCondition(0, code, OF_error, message.c_str()); |
722 | | } |
723 | | |
724 | | #endif // HAVE_WINDOWS_H |