/src/libreoffice/sal/osl/unx/nlsupport.cxx
Line | Count | Source (jump to first uncovered line) |
1 | | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
2 | | /* |
3 | | * This file is part of the LibreOffice project. |
4 | | * |
5 | | * This Source Code Form is subject to the terms of the Mozilla Public |
6 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
7 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8 | | * |
9 | | * This file incorporates work covered by the following license notice: |
10 | | * |
11 | | * Licensed to the Apache Software Foundation (ASF) under one or more |
12 | | * contributor license agreements. See the NOTICE file distributed |
13 | | * with this work for additional information regarding copyright |
14 | | * ownership. The ASF licenses this file to you under the Apache |
15 | | * License, Version 2.0 (the "License"); you may not use this file |
16 | | * except in compliance with the License. You may obtain a copy of |
17 | | * the License at http://www.apache.org/licenses/LICENSE-2.0 . |
18 | | */ |
19 | | |
20 | | #include <sal/config.h> |
21 | | |
22 | | #include <algorithm> |
23 | | #include <cassert> |
24 | | #include <cstring> |
25 | | |
26 | | #include <osl/nlsupport.h> |
27 | | #include <osl/diagnose.h> |
28 | | #include <osl/process.h> |
29 | | |
30 | | #include "nlsupport.hxx" |
31 | | |
32 | | // these share a lot, so use one define |
33 | | #if defined(LINUX) || defined(EMSCRIPTEN) || defined(__sun) || \ |
34 | | defined(FREEBSD) || defined(OPENBSD) || defined(DRAGONFLY) || defined(NETBSD) |
35 | | #define LO_COMMON_NLS_ARCHS 1 |
36 | | #else |
37 | | #define LO_COMMON_NLS_ARCHS 0 |
38 | | #endif |
39 | | |
40 | | #if LO_COMMON_NLS_ARCHS |
41 | | #include <locale.h> |
42 | | #include <langinfo.h> |
43 | | #elif defined(MACOSX) || defined(IOS) |
44 | | #include <osl/module.h> |
45 | | #include <osl/thread.h> |
46 | | #include <rtl/ustring.hxx> |
47 | | #include <sal/log.hxx> |
48 | | #include "system.hxx" |
49 | | #endif |
50 | | |
51 | | namespace { |
52 | | |
53 | | struct Pair { |
54 | | const char *key; |
55 | | const rtl_TextEncoding value; |
56 | | }; |
57 | | |
58 | | } |
59 | | |
60 | | /***************************************************************************** |
61 | | compare function for binary search |
62 | | *****************************************************************************/ |
63 | | |
64 | | static int |
65 | | pair_compare (const char *key, const Pair *pair) |
66 | 115k | { |
67 | 115k | int result = rtl_str_compareIgnoreAsciiCase( key, pair->key ); |
68 | 115k | return result; |
69 | 115k | } |
70 | | |
71 | | /***************************************************************************** |
72 | | binary search on encoding tables |
73 | | *****************************************************************************/ |
74 | | |
75 | | static const Pair* |
76 | | pair_search (const char *key, const Pair *base, unsigned int member ) |
77 | 16.5k | { |
78 | 16.5k | unsigned int lower = 0; |
79 | 16.5k | unsigned int upper = member; |
80 | | |
81 | | /* check for validity of input */ |
82 | 16.5k | if ( (key == nullptr) || (base == nullptr) || (member == 0) ) |
83 | 0 | return nullptr; |
84 | | |
85 | | /* binary search */ |
86 | 115k | while ( lower < upper ) |
87 | 115k | { |
88 | 115k | const unsigned int current = (lower + upper) / 2; |
89 | 115k | const int comparison = pair_compare( key, base + current ); |
90 | 115k | if (comparison < 0) |
91 | 99.1k | upper = current; |
92 | 16.5k | else if (comparison > 0) |
93 | 0 | lower = current + 1; |
94 | 16.5k | else |
95 | 16.5k | return base + current; |
96 | 115k | } |
97 | | |
98 | 0 | return nullptr; |
99 | 16.5k | } |
100 | | |
101 | | /***************************************************************************** |
102 | | convert rtl_Locale to locale string |
103 | | *****************************************************************************/ |
104 | | |
105 | | static char * compose_locale( rtl_Locale * pLocale, char * buffer, size_t n ) |
106 | 16.5k | { |
107 | | /* check if a valid locale is specified */ |
108 | 16.5k | if( pLocale && pLocale->Language && |
109 | 16.5k | (pLocale->Language->length == 2 || pLocale->Language->length == 3) ) |
110 | 0 | { |
111 | 0 | size_t offset = 0; |
112 | | |
113 | | /* convert language code to ascii */ |
114 | 0 | { |
115 | 0 | rtl_String *pLanguage = nullptr; |
116 | |
|
117 | 0 | rtl_uString2String( &pLanguage, |
118 | 0 | pLocale->Language->buffer, pLocale->Language->length, |
119 | 0 | RTL_TEXTENCODING_ASCII_US, OUSTRING_TO_OSTRING_CVTFLAGS ); |
120 | |
|
121 | 0 | if( sal::static_int_cast<sal_uInt32>(pLanguage->length) < n ) |
122 | 0 | { |
123 | 0 | strcpy( buffer, pLanguage->buffer ); |
124 | 0 | offset = pLanguage->length; |
125 | 0 | } |
126 | |
|
127 | 0 | rtl_string_release( pLanguage ); |
128 | 0 | } |
129 | | |
130 | | /* convert country code to ascii */ |
131 | 0 | if( pLocale->Country && (pLocale->Country->length == 2) ) |
132 | 0 | { |
133 | 0 | rtl_String *pCountry = nullptr; |
134 | |
|
135 | 0 | rtl_uString2String( &pCountry, |
136 | 0 | pLocale->Country->buffer, pLocale->Country->length, |
137 | 0 | RTL_TEXTENCODING_ASCII_US, OUSTRING_TO_OSTRING_CVTFLAGS ); |
138 | |
|
139 | 0 | if( offset + pCountry->length + 1 < n ) |
140 | 0 | { |
141 | 0 | strcpy( buffer + offset++, "_" ); |
142 | 0 | strcpy( buffer + offset, pCountry->buffer ); |
143 | 0 | offset += pCountry->length; |
144 | 0 | } |
145 | |
|
146 | 0 | rtl_string_release( pCountry ); |
147 | 0 | } |
148 | | |
149 | | /* convert variant to ascii - check if there is enough space for the variant string */ |
150 | 0 | if( pLocale->Variant && pLocale->Variant->length && |
151 | 0 | ( sal::static_int_cast<sal_uInt32>(pLocale->Variant->length) < n - 6 ) ) |
152 | 0 | { |
153 | 0 | rtl_String *pVariant = nullptr; |
154 | |
|
155 | 0 | rtl_uString2String( &pVariant, |
156 | 0 | pLocale->Variant->buffer, pLocale->Variant->length, |
157 | 0 | RTL_TEXTENCODING_ASCII_US, OUSTRING_TO_OSTRING_CVTFLAGS ); |
158 | |
|
159 | 0 | if( offset + pVariant->length + 1 < n ) |
160 | 0 | { |
161 | 0 | strcpy( buffer + offset, pVariant->buffer ); |
162 | 0 | } |
163 | |
|
164 | 0 | rtl_string_release( pVariant ); |
165 | 0 | } |
166 | |
|
167 | 0 | return buffer; |
168 | 0 | } |
169 | | |
170 | 16.5k | return nullptr; |
171 | 16.5k | } |
172 | | |
173 | | /***************************************************************************** |
174 | | convert locale string to rtl_Locale |
175 | | *****************************************************************************/ |
176 | | |
177 | | static rtl_Locale * parse_locale( const char * locale ) |
178 | 107 | { |
179 | 107 | assert(locale != nullptr); |
180 | | |
181 | 107 | if (*locale == '\0' || std::strcmp(locale, "C") == 0 |
182 | 107 | || std::strcmp(locale, "POSIX") == 0) |
183 | 107 | { |
184 | 107 | return rtl_locale_register(u"C", u"", u""); |
185 | 107 | } |
186 | | |
187 | 0 | size_t len = strlen( locale ); |
188 | |
|
189 | 0 | rtl_uString * pLanguage = nullptr; |
190 | 0 | rtl_uString * pCountry = nullptr; |
191 | 0 | rtl_uString * pVariant = nullptr; |
192 | |
|
193 | 0 | size_t offset = std::min<size_t>(len, 2); |
194 | |
|
195 | 0 | rtl_Locale * ret; |
196 | | |
197 | | /* language is a two or three letter code */ |
198 | 0 | if( (len > 3 && locale[3] == '_') || (len == 3 && locale[2] != '_') ) |
199 | 0 | offset = 3; |
200 | | |
201 | | /* convert language code to unicode */ |
202 | 0 | rtl_string2UString( &pLanguage, locale, offset, RTL_TEXTENCODING_ASCII_US, OSTRING_TO_OUSTRING_CVTFLAGS ); |
203 | 0 | OSL_ASSERT(pLanguage != nullptr); |
204 | | |
205 | | /* convert country code to unicode */ |
206 | 0 | if( len >= offset+3 && locale[offset] == '_' ) |
207 | 0 | { |
208 | 0 | rtl_string2UString( &pCountry, locale + offset + 1, 2, RTL_TEXTENCODING_ASCII_US, OSTRING_TO_OUSTRING_CVTFLAGS ); |
209 | 0 | OSL_ASSERT(pCountry != nullptr); |
210 | 0 | offset += 3; |
211 | 0 | } |
212 | | |
213 | | /* convert variant code to unicode - do not rely on "." as delimiter */ |
214 | 0 | if( len > offset ) { |
215 | 0 | rtl_string2UString( &pVariant, locale + offset, len - offset, RTL_TEXTENCODING_ASCII_US, OSTRING_TO_OUSTRING_CVTFLAGS ); |
216 | 0 | OSL_ASSERT(pVariant != nullptr); |
217 | 0 | } |
218 | |
|
219 | 0 | ret = rtl_locale_register( pLanguage->buffer, pCountry ? pCountry->buffer : u"", pVariant ? pVariant->buffer : u"" ); |
220 | |
|
221 | 0 | if (pVariant) rtl_uString_release(pVariant); |
222 | 0 | if (pCountry) rtl_uString_release(pCountry); |
223 | 0 | if (pLanguage) rtl_uString_release(pLanguage); |
224 | |
|
225 | 0 | return ret; |
226 | 107 | } |
227 | | |
228 | | #if LO_COMMON_NLS_ARCHS |
229 | | |
230 | | /* |
231 | | * This implementation of osl_getTextEncodingFromLocale maps |
232 | | * from nl_langinfo_l(CODESET) to rtl_textencoding defines. |
233 | | * nl_langinfo() is supported only on Linux, Solaris, |
234 | | * >= NetBSD 1.6 and >= FreeBSD 4.4 |
235 | | * |
236 | | * _nl_language_list[] is an array list of supported encodings. Because |
237 | | * we are using a binary search, the list has to be in ascending order. |
238 | | * We are comparing the encodings case insensitive, so the list has |
239 | | * to be completely upper or lowercase. |
240 | | */ |
241 | | |
242 | | #if defined(__sun) |
243 | | |
244 | | /* The values in the below list can be obtained with a script like |
245 | | * #!/bin/sh |
246 | | * for i in `locale -a`; do |
247 | | * LC_ALL=$i locale -k code_set_name |
248 | | * done |
249 | | */ |
250 | | static const Pair nl_language_list[] = { |
251 | | { "5601", RTL_TEXTENCODING_EUC_KR }, /* ko_KR.EUC */ |
252 | | { "646", RTL_TEXTENCODING_ISO_8859_1 }, /* fake: ASCII_US */ |
253 | | { "ANSI-1251", RTL_TEXTENCODING_MS_1251 }, /* ru_RU.ANSI1251 */ |
254 | | { "BIG5", RTL_TEXTENCODING_BIG5 }, /* zh_CN.BIG5 */ |
255 | | { "BIG5-HKSCS", RTL_TEXTENCODING_BIG5_HKSCS }, /* zh_CN.BIG5HK */ |
256 | | { "CNS11643", RTL_TEXTENCODING_EUC_TW }, /* zh_TW.EUC */ |
257 | | { "EUCJP", RTL_TEXTENCODING_EUC_JP }, /* ja_JP.eucjp */ |
258 | | { "GB18030", RTL_TEXTENCODING_GB_18030 }, /* zh_CN.GB18030 */ |
259 | | { "GB2312", RTL_TEXTENCODING_GB_2312 }, /* zh_CN */ |
260 | | { "GBK", RTL_TEXTENCODING_GBK }, /* zh_CN.GBK */ |
261 | | { "ISO8859-1", RTL_TEXTENCODING_ISO_8859_1 }, |
262 | | { "ISO8859-10", RTL_TEXTENCODING_ISO_8859_10 }, |
263 | | { "ISO8859-13", RTL_TEXTENCODING_ISO_8859_13 }, /* lt_LT lv_LV */ |
264 | | { "ISO8859-14", RTL_TEXTENCODING_ISO_8859_14 }, |
265 | | { "ISO8859-15", RTL_TEXTENCODING_ISO_8859_15 }, |
266 | | { "ISO8859-2", RTL_TEXTENCODING_ISO_8859_2 }, |
267 | | { "ISO8859-3", RTL_TEXTENCODING_ISO_8859_3 }, |
268 | | { "ISO8859-4", RTL_TEXTENCODING_ISO_8859_4 }, |
269 | | { "ISO8859-5", RTL_TEXTENCODING_ISO_8859_5 }, |
270 | | { "ISO8859-6", RTL_TEXTENCODING_ISO_8859_6 }, |
271 | | { "ISO8859-7", RTL_TEXTENCODING_ISO_8859_7 }, |
272 | | { "ISO8859-8", RTL_TEXTENCODING_ISO_8859_8 }, |
273 | | { "ISO8859-9", RTL_TEXTENCODING_ISO_8859_9 }, |
274 | | { "KOI8-R", RTL_TEXTENCODING_KOI8_R }, |
275 | | { "KOI8-U", RTL_TEXTENCODING_KOI8_U }, |
276 | | { "PCK", RTL_TEXTENCODING_MS_932 }, |
277 | | { "SUN_EU_GREEK", RTL_TEXTENCODING_ISO_8859_7 }, /* 8859-7 + Euro */ |
278 | | { "TIS620.2533", RTL_TEXTENCODING_MS_874 }, /* th_TH.TIS620 */ |
279 | | { "UTF-8", RTL_TEXTENCODING_UTF8 } |
280 | | }; |
281 | | |
282 | | /* XXX MS-874 is an extension to tis620, so this is not |
283 | | * really equivalent */ |
284 | | |
285 | | #elif defined(LINUX) || defined(EMSCRIPTEN) |
286 | | |
287 | | #if !defined(CODESET) |
288 | | #define CODESET _NL_CTYPE_CODESET_NAME |
289 | | #endif |
290 | | |
291 | | const Pair nl_language_list[] = { |
292 | | { "ANSI_X3.110-1983", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-99 NAPLPS */ |
293 | | { "ANSI_X3.4-1968", RTL_TEXTENCODING_ISO_8859_1 }, /* fake: ASCII_US */ |
294 | | { "ASMO_449", RTL_TEXTENCODING_DONTKNOW }, /* ISO_9036 ARABIC7 */ |
295 | | { "BALTIC", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-179 */ |
296 | | { "BIG5", RTL_TEXTENCODING_BIG5 }, /* locale: zh_TW */ |
297 | | { "BIG5-HKSCS", RTL_TEXTENCODING_BIG5_HKSCS }, /* locale: zh_CN.BIG5HK */ |
298 | | { "BIG5HKSCS", RTL_TEXTENCODING_BIG5_HKSCS }, /* deprecated */ |
299 | | { "BS_4730", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-4 ISO646-GB */ |
300 | | { "BS_VIEWDATA", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-47 */ |
301 | | { "CP1250", RTL_TEXTENCODING_MS_1250 }, /* MS-EE */ |
302 | | { "CP1251", RTL_TEXTENCODING_MS_1251 }, /* MS-CYRL */ |
303 | | { "CP1252", RTL_TEXTENCODING_MS_1252 }, /* MS-ANSI */ |
304 | | { "CP1253", RTL_TEXTENCODING_MS_1253 }, /* MS-GREEK */ |
305 | | { "CP1254", RTL_TEXTENCODING_MS_1254 }, /* MS-TURK */ |
306 | | { "CP1255", RTL_TEXTENCODING_MS_1255 }, /* MS-HEBR */ |
307 | | { "CP1256", RTL_TEXTENCODING_MS_1256 }, /* MS-ARAB */ |
308 | | { "CP1257", RTL_TEXTENCODING_MS_1257 }, /* WINBALTRIM */ |
309 | | { "CSA_Z243.4-1985-1", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-121 */ |
310 | | { "CSA_Z243.4-1985-2", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-122 CSA7-2 */ |
311 | | { "CSA_Z243.4-1985-GR", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-123 */ |
312 | | { "CSN_369103", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-139 */ |
313 | | { "CWI", RTL_TEXTENCODING_DONTKNOW }, /* CWI-2 CP-HU */ |
314 | | { "DEC-MCS", RTL_TEXTENCODING_DONTKNOW }, /* DEC */ |
315 | | { "DIN_66003", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-21 */ |
316 | | { "DS_2089", RTL_TEXTENCODING_DONTKNOW }, /* DS2089 ISO646-DK */ |
317 | | { "EBCDIC-AT-DE", RTL_TEXTENCODING_DONTKNOW }, |
318 | | { "EBCDIC-AT-DE-A", RTL_TEXTENCODING_DONTKNOW }, |
319 | | { "EBCDIC-CA-FR", RTL_TEXTENCODING_DONTKNOW }, |
320 | | { "EBCDIC-DK-NO", RTL_TEXTENCODING_DONTKNOW }, |
321 | | { "EBCDIC-DK-NO-A", RTL_TEXTENCODING_DONTKNOW }, |
322 | | { "EBCDIC-ES", RTL_TEXTENCODING_DONTKNOW }, |
323 | | { "EBCDIC-ES-A", RTL_TEXTENCODING_DONTKNOW }, |
324 | | { "EBCDIC-ES-S", RTL_TEXTENCODING_DONTKNOW }, |
325 | | { "EBCDIC-FI-SE", RTL_TEXTENCODING_DONTKNOW }, |
326 | | { "EBCDIC-FI-SE-A", RTL_TEXTENCODING_DONTKNOW }, |
327 | | { "EBCDIC-FR", RTL_TEXTENCODING_DONTKNOW }, |
328 | | { "EBCDIC-IS-FRISS", RTL_TEXTENCODING_DONTKNOW }, /* FRISS */ |
329 | | { "EBCDIC-IT", RTL_TEXTENCODING_DONTKNOW }, |
330 | | { "EBCDIC-PT", RTL_TEXTENCODING_DONTKNOW }, |
331 | | { "EBCDIC-UK", RTL_TEXTENCODING_DONTKNOW }, |
332 | | { "EBCDIC-US", RTL_TEXTENCODING_DONTKNOW }, |
333 | | { "ECMA-CYRILLIC", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-111 */ |
334 | | { "ES", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-17 */ |
335 | | { "ES2", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-85 */ |
336 | | { "EUC-JP", RTL_TEXTENCODING_EUC_JP }, /* locale: ja_JP.eucjp */ |
337 | | { "EUC-KR", RTL_TEXTENCODING_EUC_KR }, /* locale: ko_KR.euckr */ |
338 | | { "EUC-TW", RTL_TEXTENCODING_EUC_TW }, /* locale: zh_TW.euctw */ |
339 | | { "GB18030", RTL_TEXTENCODING_GB_18030 }, /* locale: zh_CN.gb18030 */ |
340 | | { "GB2312", RTL_TEXTENCODING_GB_2312 }, /* locale: zh_CN */ |
341 | | { "GB_1988-80", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-57 */ |
342 | | { "GBK", RTL_TEXTENCODING_GBK }, /* locale: zh_CN.GBK */ |
343 | | { "GOST_19768-74", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-153 */ |
344 | | { "GREEK-CCITT", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-150 */ |
345 | | { "GREEK7", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-88 */ |
346 | | { "GREEK7-OLD", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-18 */ |
347 | | { "HP-ROMAN8", RTL_TEXTENCODING_DONTKNOW }, /* ROMAN8 R8 */ |
348 | | { "IBM037", RTL_TEXTENCODING_DONTKNOW }, /* EBCDIC-[US|CA|WT] */ |
349 | | { "IBM038", RTL_TEXTENCODING_DONTKNOW }, /* EBCDIC-INT CP038 */ |
350 | | { "IBM1004", RTL_TEXTENCODING_DONTKNOW }, /* CP1004 OS2LATIN1 */ |
351 | | { "IBM1026", RTL_TEXTENCODING_DONTKNOW }, /* CP1026 1026 */ |
352 | | { "IBM1047", RTL_TEXTENCODING_DONTKNOW }, /* CP1047 1047 */ |
353 | | { "IBM256", RTL_TEXTENCODING_DONTKNOW }, /* EBCDIC-INT1 */ |
354 | | { "IBM273", RTL_TEXTENCODING_DONTKNOW }, /* CP273 */ |
355 | | { "IBM274", RTL_TEXTENCODING_DONTKNOW }, /* EBCDIC-BE CP274 */ |
356 | | { "IBM275", RTL_TEXTENCODING_DONTKNOW }, /* EBCDIC-BR CP275 */ |
357 | | { "IBM277", RTL_TEXTENCODING_DONTKNOW }, /* EBCDIC-CP-[DK|NO] */ |
358 | | { "IBM278", RTL_TEXTENCODING_DONTKNOW }, /* EBCDIC-CP-[FISE]*/ |
359 | | { "IBM280", RTL_TEXTENCODING_DONTKNOW }, /* CP280 EBCDIC-CP-IT*/ |
360 | | { "IBM281", RTL_TEXTENCODING_DONTKNOW }, /* EBCDIC-JP-E CP281 */ |
361 | | { "IBM284", RTL_TEXTENCODING_DONTKNOW }, /* CP284 EBCDIC-CP-ES */ |
362 | | { "IBM285", RTL_TEXTENCODING_DONTKNOW }, /* CP285 EBCDIC-CP-GB */ |
363 | | { "IBM290", RTL_TEXTENCODING_DONTKNOW }, /* EBCDIC-JP-KANA */ |
364 | | { "IBM297", RTL_TEXTENCODING_DONTKNOW }, /* EBCDIC-CP-FR */ |
365 | | { "IBM420", RTL_TEXTENCODING_DONTKNOW }, /* EBCDIC-CP-AR1 */ |
366 | | { "IBM423", RTL_TEXTENCODING_DONTKNOW }, /* CP423 EBCDIC-CP-GR */ |
367 | | { "IBM424", RTL_TEXTENCODING_DONTKNOW }, /* CP424 EBCDIC-CP-HE */ |
368 | | { "IBM437", RTL_TEXTENCODING_IBM_437 }, /* CP437 437 */ |
369 | | { "IBM500", RTL_TEXTENCODING_DONTKNOW }, /* EBCDIC-CP-[BE|CH] */ |
370 | | { "IBM850", RTL_TEXTENCODING_IBM_850 }, /* CP850 850 */ |
371 | | { "IBM851", RTL_TEXTENCODING_DONTKNOW }, /* CP851 851 */ |
372 | | { "IBM852", RTL_TEXTENCODING_IBM_852 }, /* CP852 852 */ |
373 | | { "IBM855", RTL_TEXTENCODING_IBM_855 }, /* CP855 855 */ |
374 | | { "IBM857", RTL_TEXTENCODING_IBM_857 }, /* CP857 857 */ |
375 | | { "IBM860", RTL_TEXTENCODING_IBM_860 }, /* CP860 860 */ |
376 | | { "IBM861", RTL_TEXTENCODING_IBM_861 }, /* CP861 861 CP-IS */ |
377 | | { "IBM862", RTL_TEXTENCODING_IBM_862 }, /* CP862 862 */ |
378 | | { "IBM863", RTL_TEXTENCODING_IBM_863 }, /* CP863 863 */ |
379 | | { "IBM864", RTL_TEXTENCODING_IBM_864 }, /* CP864 */ |
380 | | { "IBM865", RTL_TEXTENCODING_IBM_865 }, /* CP865 865 */ |
381 | | { "IBM866", RTL_TEXTENCODING_IBM_866 }, /* CP866 866 */ |
382 | | { "IBM868", RTL_TEXTENCODING_DONTKNOW }, /* CP868 CP-AR */ |
383 | | { "IBM869", RTL_TEXTENCODING_IBM_869 }, /* CP869 869 CP-GR */ |
384 | | { "IBM870", RTL_TEXTENCODING_DONTKNOW }, /* EBCDIC-[ROECE|YU] */ |
385 | | { "IBM871", RTL_TEXTENCODING_DONTKNOW }, /* CP871 EBCDIC-CP-IS */ |
386 | | { "IBM875", RTL_TEXTENCODING_DONTKNOW }, /* CP875 EBCDIC-GREEK */ |
387 | | { "IBM880", RTL_TEXTENCODING_DONTKNOW }, /* EBCDIC-CYRILLIC */ |
388 | | { "IBM891", RTL_TEXTENCODING_DONTKNOW }, /* CP891 */ |
389 | | { "IBM903", RTL_TEXTENCODING_DONTKNOW }, /* CP903 */ |
390 | | { "IBM904", RTL_TEXTENCODING_DONTKNOW }, /* CP904 904 */ |
391 | | { "IBM905", RTL_TEXTENCODING_DONTKNOW }, /* CP905 EBCDIC-CP-TR */ |
392 | | { "IBM918", RTL_TEXTENCODING_DONTKNOW }, /* CP918 EBCDIC-AR2 */ |
393 | | { "IEC_P27-1", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-143 */ |
394 | | { "INIS", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-49 */ |
395 | | { "INIS-8", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-50 */ |
396 | | { "INIS-CYRILLIC", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-51 */ |
397 | | { "INVARIANT", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-170 */ |
398 | | { "ISO-8859-1", RTL_TEXTENCODING_ISO_8859_1 }, /* ISO-IR-100 CP819 */ |
399 | | { "ISO-8859-10", RTL_TEXTENCODING_ISO_8859_10 }, /* ISO-IR-157 LATIN6 */ |
400 | | { "ISO-8859-13", RTL_TEXTENCODING_ISO_8859_13 }, /* ISO-IR-179 LATIN7 */ |
401 | | { "ISO-8859-14", RTL_TEXTENCODING_ISO_8859_14 }, /* LATIN8 L8 */ |
402 | | { "ISO-8859-15", RTL_TEXTENCODING_ISO_8859_15 }, |
403 | | { "ISO-8859-2", RTL_TEXTENCODING_ISO_8859_2 }, /* LATIN2 L2 */ |
404 | | { "ISO-8859-3", RTL_TEXTENCODING_ISO_8859_3 }, /* LATIN3 L3 */ |
405 | | { "ISO-8859-4", RTL_TEXTENCODING_ISO_8859_4 }, /* LATIN4 L4 */ |
406 | | { "ISO-8859-5", RTL_TEXTENCODING_ISO_8859_5 }, /* CYRILLIC */ |
407 | | { "ISO-8859-6", RTL_TEXTENCODING_ISO_8859_6 }, /* ECMA-114 ARABIC */ |
408 | | { "ISO-8859-7", RTL_TEXTENCODING_ISO_8859_7 }, /* ECMA-118 GREEK8 */ |
409 | | { "ISO-8859-8", RTL_TEXTENCODING_ISO_8859_8 }, /* ISO_8859-8 HEBREW */ |
410 | | { "ISO-8859-9", RTL_TEXTENCODING_ISO_8859_9 }, /* ISO_8859-9 LATIN5 */ |
411 | | { "ISO-IR-90", RTL_TEXTENCODING_DONTKNOW }, /* ISO_6937-2:1983 */ |
412 | | { "ISO_10367-BOX", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-155 */ |
413 | | { "ISO_2033-1983", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-98 E13B */ |
414 | | { "ISO_5427", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-37 KOI-7 */ |
415 | | { "ISO_5427-EXT", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-54 */ |
416 | | { "ISO_5428", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-55 */ |
417 | | { "ISO_646.BASIC", RTL_TEXTENCODING_ASCII_US }, /* REF */ |
418 | | { "ISO_646.IRV", RTL_TEXTENCODING_ASCII_US }, /* ISO-IR-2 IRV */ |
419 | | { "ISO_646.IRV:1983", RTL_TEXTENCODING_ISO_8859_1 }, /* fake: ASCII_US, used for "C" locale*/ |
420 | | { "ISO_6937", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-156 ISO6937*/ |
421 | | { "ISO_6937-2-25", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-152 */ |
422 | | { "ISO_6937-2-ADD", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-142 */ |
423 | | { "ISO_8859-SUPP", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-154 */ |
424 | | { "IT", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-15 */ |
425 | | { "JIS_C6220-1969-JP", RTL_TEXTENCODING_DONTKNOW }, /* KATAKANA X0201-7 */ |
426 | | { "JIS_C6220-1969-RO", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-14 */ |
427 | | { "JIS_C6229-1984-A", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-91 */ |
428 | | { "JIS_C6229-1984-B", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-92 */ |
429 | | { "JIS_C6229-1984-B-ADD", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-93 */ |
430 | | { "JIS_C6229-1984-HAND", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-94 */ |
431 | | { "JIS_C6229-1984-HAND-ADD", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-95 */ |
432 | | { "JIS_C6229-1984-KANA", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-96 */ |
433 | | { "JIS_X0201", RTL_TEXTENCODING_DONTKNOW }, /* X0201 */ |
434 | | { "JUS_I.B1.002", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-141 */ |
435 | | { "JUS_I.B1.003-MAC", RTL_TEXTENCODING_DONTKNOW }, /* MACEDONIAN */ |
436 | | { "JUS_I.B1.003-SERB", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-146 SERBIAN */ |
437 | | { "KOI-8", RTL_TEXTENCODING_DONTKNOW }, |
438 | | { "KOI8-R", RTL_TEXTENCODING_KOI8_R }, |
439 | | { "KOI8-U", RTL_TEXTENCODING_KOI8_U }, |
440 | | { "KSC5636", RTL_TEXTENCODING_DONTKNOW }, /* ISO646-KR */ |
441 | | { "LATIN-GREEK", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-19 */ |
442 | | { "LATIN-GREEK-1", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-27 */ |
443 | | { "MAC-IS", RTL_TEXTENCODING_APPLE_ROMAN }, |
444 | | { "MAC-UK", RTL_TEXTENCODING_APPLE_ROMAN }, |
445 | | { "MACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN }, /* MAC */ |
446 | | { "MSZ_7795.3", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-86 */ |
447 | | { "NATS-DANO", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-9-1 */ |
448 | | { "NATS-DANO-ADD", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-9-2 */ |
449 | | { "NATS-SEFI", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-8-1 */ |
450 | | { "NATS-SEFI-ADD", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-8-2 */ |
451 | | { "NC_NC00-10", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-151 */ |
452 | | { "NEXTSTEP", RTL_TEXTENCODING_DONTKNOW }, /* NEXT */ |
453 | | { "NF_Z_62-010", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-69 */ |
454 | | { "NF_Z_62-010_(1973)", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-25 */ |
455 | | { "NS_4551-1", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-60 */ |
456 | | { "NS_4551-2", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-61 */ |
457 | | { "PT", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-16 */ |
458 | | { "PT2", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-84 */ |
459 | | { "SAMI", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-158 */ |
460 | | { "SEN_850200_B", RTL_TEXTENCODING_DONTKNOW }, /* ISO646-[FI|SE] */ |
461 | | { "SEN_850200_C", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-11 */ |
462 | | { "T.101-G2", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-128 */ |
463 | | { "T.61-7BIT", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-102 */ |
464 | | { "T.61-8BIT", RTL_TEXTENCODING_DONTKNOW }, /* T.61 ISO-IR-103 */ |
465 | | { "TIS-620", RTL_TEXTENCODING_MS_874 }, /* locale: th_TH */ |
466 | | { "UTF-8", RTL_TEXTENCODING_UTF8 }, /* ISO-10646/UTF-8 */ |
467 | | { "VIDEOTEX-SUPPL", RTL_TEXTENCODING_DONTKNOW }, /* ISO-IR-70 */ |
468 | | { "WIN-SAMI-2", RTL_TEXTENCODING_DONTKNOW } /* WS2 */ |
469 | | }; |
470 | | |
471 | | #elif defined(FREEBSD) || defined(DRAGONFLY) |
472 | | |
473 | | static const Pair nl_language_list[] = { |
474 | | { "ASCII", RTL_TEXTENCODING_ASCII_US }, /* US-ASCII */ |
475 | | { "BIG5", RTL_TEXTENCODING_BIG5 }, /* China - Traditional Chinese */ |
476 | | { "CP1251", RTL_TEXTENCODING_MS_1251 }, /* MS-CYRL */ |
477 | | { "CP866", RTL_TEXTENCODING_IBM_866 }, /* CP866 866 */ |
478 | | { "EUCCN", RTL_TEXTENCODING_EUC_CN }, /* China - Simplified Chinese */ |
479 | | { "EUCJP", RTL_TEXTENCODING_EUC_JP }, /* Japan */ |
480 | | { "EUCKR", RTL_TEXTENCODING_EUC_KR }, /* Korea */ |
481 | | { "ISO8859-1", RTL_TEXTENCODING_ISO_8859_1 }, /* Western */ |
482 | | { "ISO8859-15", RTL_TEXTENCODING_ISO_8859_15 }, /* Western Updated (w/Euro sign) */ |
483 | | { "ISO8859-2", RTL_TEXTENCODING_ISO_8859_2 }, /* Central European */ |
484 | | { "ISO8859-4", RTL_TEXTENCODING_ISO_8859_4 }, /* LATIN4 L4 */ |
485 | | { "ISO8859-5", RTL_TEXTENCODING_ISO_8859_5 }, /* Cyrillic */ |
486 | | { "ISO8859-7", RTL_TEXTENCODING_ISO_8859_7 }, /* Greek */ |
487 | | { "ISO8859-9", RTL_TEXTENCODING_ISO_8859_9 }, /* Turkish */ |
488 | | { "KOI8-R", RTL_TEXTENCODING_KOI8_R }, /* KOI8-R */ |
489 | | { "KOI8-U", RTL_TEXTENCODING_KOI8_U }, /* KOI8-U */ |
490 | | { "SJIS", RTL_TEXTENCODING_SHIFT_JIS }, /* Japan */ |
491 | | { "US-ASCII", RTL_TEXTENCODING_ASCII_US }, /* US-ASCII */ |
492 | | { "UTF-8", RTL_TEXTENCODING_UTF8 } /* ISO-10646/UTF-8 */ |
493 | | }; |
494 | | |
495 | | #elif defined(NETBSD) |
496 | | |
497 | | static const Pair nl_language_list[] = { |
498 | | { "ASCII", RTL_TEXTENCODING_ASCII_US }, /* US-ASCII */ |
499 | | { "BIG5", RTL_TEXTENCODING_BIG5 }, /* China - Traditional Chinese */ |
500 | | { "Big5", RTL_TEXTENCODING_BIG5 }, /* China - Traditional Chinese */ |
501 | | { "Big5-HKSCS", RTL_TEXTENCODING_BIG5_HKSCS }, /* locale: zh_CN.BIG5HK */ |
502 | | { "Big5HKSCS", RTL_TEXTENCODING_BIG5_HKSCS }, /* deprecated */ |
503 | | { "CP1251", RTL_TEXTENCODING_MS_1251 }, /* MS-CYRL */ |
504 | | { "CP866", RTL_TEXTENCODING_IBM_866 }, /* CP866 866 */ |
505 | | { "CTEXT", RTL_TEXTENCODING_ASCII_US }, /* US-ASCII */ |
506 | | { "eucCN", RTL_TEXTENCODING_EUC_CN }, /* China - Simplified Chinese */ |
507 | | { "eucJP", RTL_TEXTENCODING_EUC_JP }, /* Japan */ |
508 | | { "eucKR", RTL_TEXTENCODING_EUC_KR }, /* Korea */ |
509 | | { "eucTW", RTL_TEXTENCODING_EUC_TW }, /* China - Traditional Chinese */ |
510 | | { "GB18030", RTL_TEXTENCODING_GB_18030 }, /* locale: zh_CN.gb18030 */ |
511 | | { "GB2312", RTL_TEXTENCODING_GB_2312 }, /* locale: zh_CN */ |
512 | | { "ISO-2022-JP", RTL_TEXTENCODING_DONTKNOW }, /* */ |
513 | | { "ISO-2022-JP-2", RTL_TEXTENCODING_DONTKNOW }, /* */ |
514 | | { "ISO8859-1", RTL_TEXTENCODING_ISO_8859_1 }, /* Western */ |
515 | | { "ISO8859-13", RTL_TEXTENCODING_ISO_8859_13 }, /* ISO-IR-179 LATIN7 */ |
516 | | { "ISO8859-15", RTL_TEXTENCODING_ISO_8859_15 }, /* Western Updated (w/Euro sign) */ |
517 | | { "ISO8859-2", RTL_TEXTENCODING_ISO_8859_2 }, /* Central European */ |
518 | | { "ISO8859-4", RTL_TEXTENCODING_ISO_8859_4 }, /* LATIN4 L4 */ |
519 | | { "ISO8859-5", RTL_TEXTENCODING_ISO_8859_5 }, /* Cyrillic */ |
520 | | { "ISO8859-7", RTL_TEXTENCODING_ISO_8859_7 }, /* Greek */ |
521 | | { "ISO8859-9", RTL_TEXTENCODING_ISO_8859_9 }, /* Turkish */ |
522 | | { "KOI8-R", RTL_TEXTENCODING_KOI8_R }, /* KOI8-R */ |
523 | | { "KOI8-U", RTL_TEXTENCODING_KOI8_U }, /* KOI8-U */ |
524 | | { "PT154", RTL_TEXTENCODING_PT154 }, /* */ |
525 | | { "SJIS", RTL_TEXTENCODING_SHIFT_JIS }, /* Japan */ |
526 | | { "US-ASCII", RTL_TEXTENCODING_ASCII_US }, /* US-ASCII */ |
527 | | { "UTF-8", RTL_TEXTENCODING_UTF8 } /* ISO-10646/UTF-8 */ |
528 | | }; |
529 | | |
530 | | #elif defined(OPENBSD) |
531 | | |
532 | | static const Pair nl_language_list[] = { |
533 | | { "ASCII", RTL_TEXTENCODING_ASCII_US }, /* US-ASCII */ |
534 | | { "BIG5", RTL_TEXTENCODING_BIG5 }, /* China - Traditional Chinese */ |
535 | | { "CP1251", RTL_TEXTENCODING_MS_1251 }, /* MS-CYRL */ |
536 | | { "CP866", RTL_TEXTENCODING_IBM_866 }, /* CP866 866 */ |
537 | | { "EUCCN", RTL_TEXTENCODING_EUC_CN }, /* China - Simplified Chinese */ |
538 | | { "EUCJP", RTL_TEXTENCODING_EUC_JP }, /* Japan */ |
539 | | { "EUCKR", RTL_TEXTENCODING_EUC_KR }, /* Korea */ |
540 | | { "ISO8859-1", RTL_TEXTENCODING_ISO_8859_1 }, /* Western */ |
541 | | { "ISO8859-15", RTL_TEXTENCODING_ISO_8859_15 }, /* Western Updated (w/Euro sign) */ |
542 | | { "ISO8859-2", RTL_TEXTENCODING_ISO_8859_2 }, /* Central European */ |
543 | | { "ISO8859-4", RTL_TEXTENCODING_ISO_8859_4 }, /* LATIN4 L4 */ |
544 | | { "ISO8859-5", RTL_TEXTENCODING_ISO_8859_5 }, /* Cyrillic */ |
545 | | { "ISO8859-7", RTL_TEXTENCODING_ISO_8859_7 }, /* Greek */ |
546 | | { "ISO8859-9", RTL_TEXTENCODING_ISO_8859_9 }, /* Turkish */ |
547 | | { "KOI8-R", RTL_TEXTENCODING_KOI8_R }, /* KOI8-R */ |
548 | | { "KOI8-U", RTL_TEXTENCODING_KOI8_U }, /* KOI8-U */ |
549 | | { "SJIS", RTL_TEXTENCODING_SHIFT_JIS }, /* Japan */ |
550 | | { "US-ASCII", RTL_TEXTENCODING_ASCII_US }, /* US-ASCII */ |
551 | | { "UTF-8", RTL_TEXTENCODING_UTF8 } /* ISO-10646/UTF-8 */ |
552 | | }; |
553 | | |
554 | | #else |
555 | | #error Unhandled individual LO_COMMON_NLS_ARCHS |
556 | | #endif // individual common NLS archs |
557 | | |
558 | | /***************************************************************************** |
559 | | return the text encoding corresponding to the given locale |
560 | | *****************************************************************************/ |
561 | | |
562 | | rtl_TextEncoding osl_getTextEncodingFromLocale( rtl_Locale * pLocale ) |
563 | 16.5k | { |
564 | 16.5k | const Pair *language=nullptr; |
565 | | |
566 | 16.5k | char locale_buf[64] = ""; |
567 | 16.5k | char codeset_buf[64]; |
568 | | |
569 | 16.5k | char *codeset = nullptr; |
570 | | |
571 | | /* default to process locale if pLocale == NULL */ |
572 | 16.5k | if( pLocale == nullptr ) |
573 | 16.5k | osl_getProcessLocale( &pLocale ); |
574 | | |
575 | | /* convert rtl_Locale to locale string */ |
576 | 16.5k | compose_locale( pLocale, locale_buf, 64 ); |
577 | | |
578 | 16.5k | locale_t ctype_locale = newlocale( |
579 | 16.5k | LC_CTYPE_MASK, locale_buf, static_cast<locale_t>(0)); |
580 | 16.5k | if (ctype_locale == static_cast<locale_t>(0)) |
581 | 0 | { |
582 | 0 | return RTL_TEXTENCODING_DONTKNOW; |
583 | 0 | } |
584 | | |
585 | | /* get the charset as indicated by the LC_CTYPE locale */ |
586 | | #if defined(NETBSD) && !defined(CODESET) |
587 | | codeset = NULL; |
588 | | #else |
589 | 16.5k | codeset = nl_langinfo_l(CODESET, ctype_locale); |
590 | | // per SUSv4, the return value of nl_langinfo_l can be invalidated by a |
591 | | // subsequent call to nl_langinfo (not nl_langinfo_l) in any thread, but |
592 | | // we cannot guard against that (at least, no code in LO itself should |
593 | | // call nl_langinfo) |
594 | 16.5k | #endif |
595 | | |
596 | 16.5k | if ( codeset != nullptr ) |
597 | 16.5k | { |
598 | | /* get codeset into mt save memory */ |
599 | 16.5k | strncpy( codeset_buf, codeset, sizeof(codeset_buf) ); |
600 | 16.5k | codeset_buf[sizeof(codeset_buf) - 1] = 0; |
601 | 16.5k | codeset = codeset_buf; |
602 | 16.5k | } |
603 | | |
604 | 16.5k | freelocale(ctype_locale); |
605 | | |
606 | | /* search the codeset in our language list */ |
607 | 16.5k | if ( codeset != nullptr ) |
608 | 16.5k | { |
609 | 16.5k | language = pair_search (codeset, nl_language_list, SAL_N_ELEMENTS( nl_language_list ) ); |
610 | 16.5k | } |
611 | | |
612 | 16.5k | OSL_ASSERT( language && ( RTL_TEXTENCODING_DONTKNOW != language->value ) ); |
613 | | |
614 | | /* a matching item in our list provides a mapping from codeset to |
615 | | * rtl-codeset */ |
616 | 16.5k | if ( language != nullptr ) |
617 | 16.5k | return language->value; |
618 | | |
619 | 0 | return RTL_TEXTENCODING_DONTKNOW; |
620 | 16.5k | } |
621 | | |
622 | | /***************************************************************************** |
623 | | return the current process locale |
624 | | *****************************************************************************/ |
625 | | |
626 | | void imp_getProcessLocale( rtl_Locale ** ppLocale ) |
627 | 107 | { |
628 | 107 | char const * locale = getenv("LC_ALL"); |
629 | 107 | if (locale == nullptr || *locale == '\0') { |
630 | 107 | locale = getenv("LC_CTYPE"); |
631 | 107 | if (locale == nullptr || *locale == '\0') { |
632 | 107 | locale = getenv("LANG"); |
633 | 107 | if (locale == nullptr || *locale == '\0') { |
634 | 107 | locale = "C"; |
635 | 107 | } |
636 | 107 | } |
637 | 107 | } |
638 | | // coverity[overrun-buffer-val : FALSE] - coverity gets this very wrong |
639 | 107 | *ppLocale = parse_locale(locale); |
640 | 107 | } |
641 | | |
642 | | #else // !LO_COMMON_NLS_ARCHS |
643 | | |
644 | | /* |
645 | | * This implementation of osl_getTextEncodingFromLocale maps |
646 | | * from the ISO language codes. |
647 | | */ |
648 | | |
649 | | const Pair full_locale_list[] = { |
650 | | { "ja_JP.eucJP", RTL_TEXTENCODING_EUC_JP }, |
651 | | { "ja_JP.EUC", RTL_TEXTENCODING_EUC_JP }, |
652 | | { "ko_KR.EUC", RTL_TEXTENCODING_EUC_KR }, |
653 | | { "zh_CN.EUC", RTL_TEXTENCODING_EUC_CN }, |
654 | | { "zh_TW.EUC", RTL_TEXTENCODING_EUC_TW } |
655 | | }; |
656 | | |
657 | | const Pair locale_extension_list[] = { |
658 | | { "big5", RTL_TEXTENCODING_BIG5 }, |
659 | | { "big5hk", RTL_TEXTENCODING_BIG5_HKSCS }, |
660 | | { "gb18030", RTL_TEXTENCODING_GB_18030 }, |
661 | | { "euc", RTL_TEXTENCODING_EUC_JP }, |
662 | | { "iso8859-1", RTL_TEXTENCODING_ISO_8859_1 }, |
663 | | { "iso8859-10", RTL_TEXTENCODING_ISO_8859_10 }, |
664 | | { "iso8859-13", RTL_TEXTENCODING_ISO_8859_13 }, |
665 | | { "iso8859-14", RTL_TEXTENCODING_ISO_8859_14 }, |
666 | | { "iso8859-15", RTL_TEXTENCODING_ISO_8859_15 }, |
667 | | { "iso8859-2", RTL_TEXTENCODING_ISO_8859_2 }, |
668 | | { "iso8859-3", RTL_TEXTENCODING_ISO_8859_3 }, |
669 | | { "iso8859-4", RTL_TEXTENCODING_ISO_8859_4 }, |
670 | | { "iso8859-5", RTL_TEXTENCODING_ISO_8859_5 }, |
671 | | { "iso8859-6", RTL_TEXTENCODING_ISO_8859_6 }, |
672 | | { "iso8859-7", RTL_TEXTENCODING_ISO_8859_7 }, |
673 | | { "iso8859-8", RTL_TEXTENCODING_ISO_8859_8 }, |
674 | | { "iso8859-9", RTL_TEXTENCODING_ISO_8859_9 }, |
675 | | { "koi8-r", RTL_TEXTENCODING_KOI8_R }, |
676 | | { "koi8-u", RTL_TEXTENCODING_KOI8_U }, |
677 | | { "pck", RTL_TEXTENCODING_MS_932 }, |
678 | | #if (0) |
679 | | { "sun_eu_greek", RTL_TEXTENCODING_DONTKNOW }, |
680 | | #endif |
681 | | { "utf-16", RTL_TEXTENCODING_UNICODE }, |
682 | | { "utf-7", RTL_TEXTENCODING_UTF7 }, |
683 | | { "utf-8", RTL_TEXTENCODING_UTF8 } |
684 | | }; |
685 | | |
686 | | const Pair iso_language_list[] = { |
687 | | { "af", RTL_TEXTENCODING_ISO_8859_1 }, |
688 | | { "ar", RTL_TEXTENCODING_ISO_8859_6 }, |
689 | | { "az", RTL_TEXTENCODING_ISO_8859_9 }, |
690 | | { "be", RTL_TEXTENCODING_ISO_8859_5 }, |
691 | | { "bg", RTL_TEXTENCODING_ISO_8859_5 }, |
692 | | { "ca", RTL_TEXTENCODING_ISO_8859_1 }, |
693 | | { "cs", RTL_TEXTENCODING_ISO_8859_2 }, |
694 | | { "da", RTL_TEXTENCODING_ISO_8859_1 }, |
695 | | { "de", RTL_TEXTENCODING_ISO_8859_1 }, |
696 | | { "el", RTL_TEXTENCODING_ISO_8859_7 }, |
697 | | { "en", RTL_TEXTENCODING_ISO_8859_1 }, |
698 | | { "es", RTL_TEXTENCODING_ISO_8859_1 }, |
699 | | { "et", RTL_TEXTENCODING_ISO_8859_4 }, |
700 | | { "eu", RTL_TEXTENCODING_ISO_8859_1 }, |
701 | | { "fa", RTL_TEXTENCODING_ISO_8859_6 }, |
702 | | { "fi", RTL_TEXTENCODING_ISO_8859_1 }, |
703 | | { "fo", RTL_TEXTENCODING_ISO_8859_1 }, |
704 | | { "fr", RTL_TEXTENCODING_ISO_8859_1 }, |
705 | | { "gr", RTL_TEXTENCODING_ISO_8859_7 }, |
706 | | { "he", RTL_TEXTENCODING_ISO_8859_8 }, |
707 | | { "hi", RTL_TEXTENCODING_DONTKNOW }, |
708 | | { "hr", RTL_TEXTENCODING_ISO_8859_2 }, |
709 | | { "hu", RTL_TEXTENCODING_ISO_8859_2 }, |
710 | | { "hy", RTL_TEXTENCODING_DONTKNOW }, |
711 | | { "id", RTL_TEXTENCODING_ISO_8859_1 }, |
712 | | { "is", RTL_TEXTENCODING_ISO_8859_1 }, |
713 | | { "it", RTL_TEXTENCODING_ISO_8859_1 }, |
714 | | { "iw", RTL_TEXTENCODING_ISO_8859_8 }, |
715 | | { "ja", RTL_TEXTENCODING_EUC_JP }, |
716 | | { "ka", RTL_TEXTENCODING_DONTKNOW }, |
717 | | { "kk", RTL_TEXTENCODING_ISO_8859_5 }, |
718 | | { "ko", RTL_TEXTENCODING_EUC_KR }, |
719 | | { "lt", RTL_TEXTENCODING_ISO_8859_4 }, |
720 | | { "lv", RTL_TEXTENCODING_ISO_8859_4 }, |
721 | | { "mk", RTL_TEXTENCODING_ISO_8859_5 }, |
722 | | { "mr", RTL_TEXTENCODING_DONTKNOW }, |
723 | | { "ms", RTL_TEXTENCODING_ISO_8859_1 }, |
724 | | { "nl", RTL_TEXTENCODING_ISO_8859_1 }, |
725 | | { "no", RTL_TEXTENCODING_ISO_8859_1 }, |
726 | | { "pl", RTL_TEXTENCODING_ISO_8859_2 }, |
727 | | { "pt", RTL_TEXTENCODING_ISO_8859_1 }, |
728 | | { "ro", RTL_TEXTENCODING_ISO_8859_2 }, |
729 | | { "ru", RTL_TEXTENCODING_ISO_8859_5 }, |
730 | | { "sa", RTL_TEXTENCODING_DONTKNOW }, |
731 | | { "sk", RTL_TEXTENCODING_ISO_8859_2 }, |
732 | | { "sl", RTL_TEXTENCODING_ISO_8859_2 }, |
733 | | { "sq", RTL_TEXTENCODING_ISO_8859_2 }, |
734 | | { "sv", RTL_TEXTENCODING_ISO_8859_1 }, |
735 | | { "sw", RTL_TEXTENCODING_ISO_8859_1 }, |
736 | | { "ta", RTL_TEXTENCODING_DONTKNOW }, |
737 | | { "th", RTL_TEXTENCODING_DONTKNOW }, |
738 | | { "tr", RTL_TEXTENCODING_ISO_8859_9 }, |
739 | | { "tt", RTL_TEXTENCODING_ISO_8859_5 }, |
740 | | { "uk", RTL_TEXTENCODING_ISO_8859_5 }, |
741 | | { "ur", RTL_TEXTENCODING_ISO_8859_6 }, |
742 | | { "uz", RTL_TEXTENCODING_ISO_8859_9 }, |
743 | | { "vi", RTL_TEXTENCODING_DONTKNOW }, |
744 | | { "zh", RTL_TEXTENCODING_BIG5 } |
745 | | }; |
746 | | |
747 | | /***************************************************************************** |
748 | | return the text encoding corresponding to the given locale |
749 | | *****************************************************************************/ |
750 | | |
751 | | rtl_TextEncoding osl_getTextEncodingFromLocale( rtl_Locale * pLocale ) |
752 | | { |
753 | | const Pair *language = nullptr; |
754 | | char locale_buf[64] = ""; |
755 | | |
756 | | /* default to process locale if pLocale == NULL */ |
757 | | if( nullptr == pLocale ) |
758 | | osl_getProcessLocale( &pLocale ); |
759 | | |
760 | | /* convert rtl_Locale to locale string */ |
761 | | if( compose_locale( pLocale, locale_buf, 64 ) ) |
762 | | { |
763 | | /* check special handling list (EUC) first */ |
764 | | language = pair_search( locale_buf, full_locale_list, SAL_N_ELEMENTS( full_locale_list ) ); |
765 | | |
766 | | if( nullptr == language ) |
767 | | { |
768 | | /* |
769 | | * check if there is a charset qualifier at the end of the given locale string |
770 | | * e.g. de.ISO8859-15 or de.ISO8859-15@euro which strongly indicates what |
771 | | * charset to use |
772 | | */ |
773 | | char* cp = strrchr( locale_buf, '.' ); |
774 | | |
775 | | if( nullptr != cp ) |
776 | | { |
777 | | language = pair_search( cp + 1, locale_extension_list, SAL_N_ELEMENTS( locale_extension_list ) ); |
778 | | } |
779 | | } |
780 | | |
781 | | /* use iso language code to determine the charset */ |
782 | | if( nullptr == language ) |
783 | | { |
784 | | /* iso lang codes have 2 characters */ |
785 | | locale_buf[2] = '\0'; |
786 | | |
787 | | language = pair_search( locale_buf, iso_language_list, SAL_N_ELEMENTS( iso_language_list ) ); |
788 | | } |
789 | | } |
790 | | |
791 | | /* a matching item in our list provides a mapping from codeset to |
792 | | * rtl-codeset */ |
793 | | if ( language != nullptr ) |
794 | | return language->value; |
795 | | |
796 | | return RTL_TEXTENCODING_DONTKNOW; |
797 | | } |
798 | | |
799 | | #if defined(MACOSX) || defined(IOS) |
800 | | |
801 | | /***************************************************************************** |
802 | | return the current process locale |
803 | | *****************************************************************************/ |
804 | | |
805 | | void imp_getProcessLocale( rtl_Locale ** ppLocale ) |
806 | | { |
807 | | OUString loc16(macosx_getLocale()); |
808 | | OString locale; |
809 | | if (!loc16.convertToString( |
810 | | &locale, RTL_TEXTENCODING_UTF8, |
811 | | (RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR |
812 | | | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR))) |
813 | | { |
814 | | SAL_INFO("sal.osl", "Cannot convert \"" << loc16 << "\" to UTF-8"); |
815 | | } |
816 | | |
817 | | /* handle the case where OS specific method of finding locale fails */ |
818 | | if ( locale.isEmpty() ) |
819 | | { |
820 | | /* simulate behavior of setlocale */ |
821 | | locale = getenv( "LC_ALL" ); |
822 | | |
823 | | if( locale.isEmpty() ) |
824 | | locale = getenv( "LC_CTYPE" ); |
825 | | |
826 | | if( locale.isEmpty() ) |
827 | | locale = getenv( "LANG" ); |
828 | | |
829 | | if( locale.isEmpty() ) |
830 | | locale = "C"_ostr; |
831 | | } |
832 | | |
833 | | /* return the locale */ |
834 | | *ppLocale = parse_locale( locale.getStr() ); |
835 | | } |
836 | | |
837 | | #else // !MACOSX && !IOS |
838 | | |
839 | | /***************************************************************************** |
840 | | return the current process locale |
841 | | *****************************************************************************/ |
842 | | |
843 | | void imp_getProcessLocale( rtl_Locale ** ppLocale ) |
844 | | { |
845 | | #ifdef ANDROID |
846 | | /* No locale environment variables on Android, so why even bother |
847 | | * with getenv(). |
848 | | */ |
849 | | const char* locale = "en-US.UTF-8"; |
850 | | #else |
851 | | /* simulate behavior off setlocale */ |
852 | | const char* locale = getenv("LC_ALL"); |
853 | | |
854 | | if( NULL == locale ) |
855 | | locale = getenv( "LC_CTYPE" ); |
856 | | |
857 | | if( NULL == locale ) |
858 | | locale = getenv( "LANG" ); |
859 | | |
860 | | if( NULL == locale ) |
861 | | locale = "C"; |
862 | | |
863 | | #endif |
864 | | *ppLocale = parse_locale( locale ); |
865 | | } |
866 | | |
867 | | #endif // !MACOSX && !IOS |
868 | | #endif // !LO_COMMON_NLS_ARCHS |
869 | | |
870 | | /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |