Coverage Report

Created: 2025-07-07 10:01

/src/libreoffice/sal/osl/unx/nlsupport.cxx
Line
Count
Source (jump to first uncovered line)
1
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
/*
3
 * This file is part of the LibreOffice project.
4
 *
5
 * This Source Code Form is subject to the terms of the Mozilla Public
6
 * License, v. 2.0. If a copy of the MPL was not distributed with this
7
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
 *
9
 * This file incorporates work covered by the following license notice:
10
 *
11
 *   Licensed to the Apache Software Foundation (ASF) under one or more
12
 *   contributor license agreements. See the NOTICE file distributed
13
 *   with this work for additional information regarding copyright
14
 *   ownership. The ASF licenses this file to you under the Apache
15
 *   License, Version 2.0 (the "License"); you may not use this file
16
 *   except in compliance with the License. You may obtain a copy of
17
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18
 */
19
20
#include <sal/config.h>
21
22
#include <algorithm>
23
#include <cassert>
24
#include <cstring>
25
26
#include <osl/nlsupport.h>
27
#include <osl/diagnose.h>
28
#include <osl/process.h>
29
30
#include "nlsupport.hxx"
31
32
// these share a lot, so use one define
33
#if defined(LINUX) || defined(EMSCRIPTEN) || defined(__sun) || \
34
    defined(FREEBSD) || defined(OPENBSD) || defined(DRAGONFLY) || defined(NETBSD)
35
#define LO_COMMON_NLS_ARCHS 1
36
#else
37
#define LO_COMMON_NLS_ARCHS 0
38
#endif
39
40
#if LO_COMMON_NLS_ARCHS
41
#include <locale.h>
42
#include <langinfo.h>
43
#elif defined(MACOSX) || defined(IOS)
44
#include <osl/module.h>
45
#include <osl/thread.h>
46
#include <rtl/ustring.hxx>
47
#include <sal/log.hxx>
48
#include "system.hxx"
49
#endif
50
51
namespace {
52
53
struct Pair {
54
    const char              *key;
55
    const rtl_TextEncoding   value;
56
};
57
58
}
59
60
/*****************************************************************************
61
 compare function for binary search
62
 *****************************************************************************/
63
64
static int
65
pair_compare (const char *key, const Pair *pair)
66
115k
{
67
115k
    int result = rtl_str_compareIgnoreAsciiCase( key, pair->key );
68
115k
    return result;
69
115k
}
70
71
/*****************************************************************************
72
 binary search on encoding tables
73
 *****************************************************************************/
74
75
static const Pair*
76
pair_search (const char *key, const Pair *base, unsigned int member )
77
16.5k
{
78
16.5k
    unsigned int lower = 0;
79
16.5k
    unsigned int upper = member;
80
81
    /* check for validity of input */
82
16.5k
    if ( (key == nullptr) || (base == nullptr) || (member == 0) )
83
0
        return nullptr;
84
85
    /* binary search */
86
115k
    while ( lower < upper )
87
115k
    {
88
115k
        const unsigned int current = (lower + upper) / 2;
89
115k
        const int comparison = pair_compare( key, base + current );
90
115k
        if (comparison < 0)
91
99.1k
            upper = current;
92
16.5k
        else if (comparison > 0)
93
0
            lower = current + 1;
94
16.5k
        else
95
16.5k
            return base + current;
96
115k
    }
97
98
0
    return nullptr;
99
16.5k
}
100
101
/*****************************************************************************
102
 convert rtl_Locale to locale string
103
 *****************************************************************************/
104
105
static char * compose_locale( rtl_Locale * pLocale, char * buffer, size_t n )
106
16.5k
{
107
    /* check if a valid locale is specified */
108
16.5k
    if( pLocale && pLocale->Language &&
109
16.5k
            (pLocale->Language->length == 2 || pLocale->Language->length == 3) )
110
0
    {
111
0
        size_t offset = 0;
112
113
        /* convert language code to ascii */
114
0
        {
115
0
            rtl_String *pLanguage = nullptr;
116
117
0
            rtl_uString2String( &pLanguage,
118
0
                pLocale->Language->buffer, pLocale->Language->length,
119
0
                RTL_TEXTENCODING_ASCII_US, OUSTRING_TO_OSTRING_CVTFLAGS );
120
121
0
            if( sal::static_int_cast<sal_uInt32>(pLanguage->length) < n )
122
0
            {
123
0
                strcpy( buffer, pLanguage->buffer );
124
0
                offset = pLanguage->length;
125
0
            }
126
127
0
            rtl_string_release( pLanguage );
128
0
        }
129
130
        /* convert country code to ascii */
131
0
        if( pLocale->Country && (pLocale->Country->length == 2) )
132
0
        {
133
0
            rtl_String *pCountry = nullptr;
134
135
0
            rtl_uString2String( &pCountry,
136
0
                pLocale->Country->buffer, pLocale->Country->length,
137
0
                RTL_TEXTENCODING_ASCII_US, OUSTRING_TO_OSTRING_CVTFLAGS );
138
139
0
            if( offset + pCountry->length + 1 < n )
140
0
            {
141
0
                strcpy( buffer + offset++, "_" );
142
0
                strcpy( buffer + offset, pCountry->buffer );
143
0
                offset += pCountry->length;
144
0
            }
145
146
0
            rtl_string_release( pCountry );
147
0
        }
148
149
        /* convert variant to ascii - check if there is enough space for the variant string */
150
0
        if( pLocale->Variant && pLocale->Variant->length &&
151
0
            ( sal::static_int_cast<sal_uInt32>(pLocale->Variant->length) < n - 6 ) )
152
0
        {
153
0
            rtl_String *pVariant = nullptr;
154
155
0
            rtl_uString2String( &pVariant,
156
0
                pLocale->Variant->buffer, pLocale->Variant->length,
157
0
                RTL_TEXTENCODING_ASCII_US, OUSTRING_TO_OSTRING_CVTFLAGS );
158
159
0
            if( offset + pVariant->length + 1 < n )
160
0
            {
161
0
                strcpy( buffer + offset, pVariant->buffer );
162
0
            }
163
164
0
            rtl_string_release( pVariant );
165
0
        }
166
167
0
        return buffer;
168
0
    }
169
170
16.5k
    return nullptr;
171
16.5k
}
172
173
/*****************************************************************************
174
 convert locale string to rtl_Locale
175
 *****************************************************************************/
176
177
static rtl_Locale * parse_locale( const char * locale )
178
107
{
179
107
    assert(locale != nullptr);
180
181
107
    if (*locale == '\0' || std::strcmp(locale, "C") == 0
182
107
        || std::strcmp(locale, "POSIX") == 0)
183
107
    {
184
107
        return rtl_locale_register(u"C", u"", u"");
185
107
    }
186
187
0
    size_t len = strlen( locale );
188
189
0
    rtl_uString * pLanguage = nullptr;
190
0
    rtl_uString * pCountry  = nullptr;
191
0
    rtl_uString * pVariant  = nullptr;
192
193
0
    size_t offset = std::min<size_t>(len, 2);
194
195
0
    rtl_Locale * ret;
196
197
    /* language is a two or three letter code */
198
0
    if( (len > 3 && locale[3] == '_') || (len == 3 && locale[2] != '_') )
199
0
        offset = 3;
200
201
    /* convert language code to unicode */
202
0
    rtl_string2UString( &pLanguage, locale, offset, RTL_TEXTENCODING_ASCII_US, OSTRING_TO_OUSTRING_CVTFLAGS );
203
0
    OSL_ASSERT(pLanguage != nullptr);
204
205
    /* convert country code to unicode */
206
0
    if( len >= offset+3 && locale[offset] == '_' )
207
0
    {
208
0
        rtl_string2UString( &pCountry, locale + offset + 1, 2, RTL_TEXTENCODING_ASCII_US, OSTRING_TO_OUSTRING_CVTFLAGS );
209
0
        OSL_ASSERT(pCountry != nullptr);
210
0
        offset += 3;
211
0
    }
212
213
    /* convert variant code to unicode - do not rely on "." as delimiter */
214
0
    if( len > offset ) {
215
0
        rtl_string2UString( &pVariant, locale + offset, len - offset, RTL_TEXTENCODING_ASCII_US, OSTRING_TO_OUSTRING_CVTFLAGS );
216
0
        OSL_ASSERT(pVariant != nullptr);
217
0
    }
218
219
0
    ret =  rtl_locale_register( pLanguage->buffer, pCountry ? pCountry->buffer : u"", pVariant ? pVariant->buffer : u"" );
220
221
0
    if (pVariant) rtl_uString_release(pVariant);
222
0
    if (pCountry) rtl_uString_release(pCountry);
223
0
    if (pLanguage) rtl_uString_release(pLanguage);
224
225
0
    return ret;
226
107
}
227
228
#if LO_COMMON_NLS_ARCHS
229
230
/*
231
 * This implementation of osl_getTextEncodingFromLocale maps
232
 * from nl_langinfo_l(CODESET) to rtl_textencoding defines.
233
 * nl_langinfo() is supported only on Linux, Solaris,
234
 * >= NetBSD 1.6 and >= FreeBSD 4.4
235
 *
236
 * _nl_language_list[] is an array list of supported encodings. Because
237
 * we are using a binary search, the list has to be in ascending order.
238
 * We are comparing the encodings case insensitive, so the list has
239
 * to be completely upper or lowercase.
240
 */
241
242
#if defined(__sun)
243
244
/* The values in the below list can be obtained with a script like
245
 *  #!/bin/sh
246
 *  for i in `locale -a`; do
247
 *    LC_ALL=$i locale -k code_set_name
248
 *  done
249
 */
250
static const Pair nl_language_list[] = {
251
    { "5601",           RTL_TEXTENCODING_EUC_KR         }, /* ko_KR.EUC */
252
    { "646",            RTL_TEXTENCODING_ISO_8859_1     }, /* fake: ASCII_US */
253
    { "ANSI-1251",      RTL_TEXTENCODING_MS_1251        }, /* ru_RU.ANSI1251 */
254
    { "BIG5",           RTL_TEXTENCODING_BIG5           }, /* zh_CN.BIG5 */
255
    { "BIG5-HKSCS",     RTL_TEXTENCODING_BIG5_HKSCS     }, /* zh_CN.BIG5HK */
256
    { "CNS11643",       RTL_TEXTENCODING_EUC_TW         }, /* zh_TW.EUC */
257
    { "EUCJP",          RTL_TEXTENCODING_EUC_JP         }, /* ja_JP.eucjp */
258
    { "GB18030",        RTL_TEXTENCODING_GB_18030       }, /* zh_CN.GB18030 */
259
    { "GB2312",         RTL_TEXTENCODING_GB_2312        }, /* zh_CN */
260
    { "GBK",            RTL_TEXTENCODING_GBK            }, /* zh_CN.GBK */
261
    { "ISO8859-1",      RTL_TEXTENCODING_ISO_8859_1     },
262
    { "ISO8859-10",     RTL_TEXTENCODING_ISO_8859_10    },
263
    { "ISO8859-13",     RTL_TEXTENCODING_ISO_8859_13    }, /* lt_LT lv_LV */
264
    { "ISO8859-14",     RTL_TEXTENCODING_ISO_8859_14    },
265
    { "ISO8859-15",     RTL_TEXTENCODING_ISO_8859_15    },
266
    { "ISO8859-2",      RTL_TEXTENCODING_ISO_8859_2     },
267
    { "ISO8859-3",      RTL_TEXTENCODING_ISO_8859_3     },
268
    { "ISO8859-4",      RTL_TEXTENCODING_ISO_8859_4     },
269
    { "ISO8859-5",      RTL_TEXTENCODING_ISO_8859_5     },
270
    { "ISO8859-6",      RTL_TEXTENCODING_ISO_8859_6     },
271
    { "ISO8859-7",      RTL_TEXTENCODING_ISO_8859_7     },
272
    { "ISO8859-8",      RTL_TEXTENCODING_ISO_8859_8     },
273
    { "ISO8859-9",      RTL_TEXTENCODING_ISO_8859_9     },
274
    { "KOI8-R",         RTL_TEXTENCODING_KOI8_R         },
275
    { "KOI8-U",         RTL_TEXTENCODING_KOI8_U         },
276
    { "PCK",            RTL_TEXTENCODING_MS_932         },
277
    { "SUN_EU_GREEK",   RTL_TEXTENCODING_ISO_8859_7     }, /* 8859-7 + Euro */
278
    { "TIS620.2533",    RTL_TEXTENCODING_MS_874         }, /* th_TH.TIS620 */
279
    { "UTF-8",          RTL_TEXTENCODING_UTF8           }
280
};
281
282
/* XXX MS-874 is an extension to tis620, so this is not
283
 * really equivalent */
284
285
#elif defined(LINUX) || defined(EMSCRIPTEN)
286
287
#if !defined(CODESET)
288
#define CODESET _NL_CTYPE_CODESET_NAME
289
#endif
290
291
const Pair nl_language_list[] = {
292
    { "ANSI_X3.110-1983",           RTL_TEXTENCODING_DONTKNOW   },  /* ISO-IR-99 NAPLPS */
293
    { "ANSI_X3.4-1968",             RTL_TEXTENCODING_ISO_8859_1 },  /* fake: ASCII_US */
294
    { "ASMO_449",                   RTL_TEXTENCODING_DONTKNOW },    /* ISO_9036 ARABIC7 */
295
    { "BALTIC",                     RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-179 */
296
    { "BIG5",                       RTL_TEXTENCODING_BIG5 },        /* locale: zh_TW */
297
    { "BIG5-HKSCS",                 RTL_TEXTENCODING_BIG5_HKSCS },  /* locale: zh_CN.BIG5HK */
298
    { "BIG5HKSCS",                  RTL_TEXTENCODING_BIG5_HKSCS },  /* deprecated */
299
    { "BS_4730",                    RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-4 ISO646-GB */
300
    { "BS_VIEWDATA",                RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-47 */
301
    { "CP1250",                     RTL_TEXTENCODING_MS_1250 },     /* MS-EE */
302
    { "CP1251",                     RTL_TEXTENCODING_MS_1251 },     /* MS-CYRL */
303
    { "CP1252",                     RTL_TEXTENCODING_MS_1252 },     /* MS-ANSI */
304
    { "CP1253",                     RTL_TEXTENCODING_MS_1253 },     /* MS-GREEK */
305
    { "CP1254",                     RTL_TEXTENCODING_MS_1254 },     /* MS-TURK */
306
    { "CP1255",                     RTL_TEXTENCODING_MS_1255 },     /* MS-HEBR */
307
    { "CP1256",                     RTL_TEXTENCODING_MS_1256 },     /* MS-ARAB */
308
    { "CP1257",                     RTL_TEXTENCODING_MS_1257 },     /* WINBALTRIM */
309
    { "CSA_Z243.4-1985-1",          RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-121 */
310
    { "CSA_Z243.4-1985-2",          RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-122 CSA7-2 */
311
    { "CSA_Z243.4-1985-GR",         RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-123 */
312
    { "CSN_369103",                 RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-139 */
313
    { "CWI",                        RTL_TEXTENCODING_DONTKNOW },    /* CWI-2 CP-HU */
314
    { "DEC-MCS",                    RTL_TEXTENCODING_DONTKNOW },    /* DEC */
315
    { "DIN_66003",                  RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-21 */
316
    { "DS_2089",                    RTL_TEXTENCODING_DONTKNOW },    /* DS2089 ISO646-DK */
317
    { "EBCDIC-AT-DE",               RTL_TEXTENCODING_DONTKNOW },
318
    { "EBCDIC-AT-DE-A",             RTL_TEXTENCODING_DONTKNOW },
319
    { "EBCDIC-CA-FR",               RTL_TEXTENCODING_DONTKNOW },
320
    { "EBCDIC-DK-NO",               RTL_TEXTENCODING_DONTKNOW },
321
    { "EBCDIC-DK-NO-A",             RTL_TEXTENCODING_DONTKNOW },
322
    { "EBCDIC-ES",                  RTL_TEXTENCODING_DONTKNOW },
323
    { "EBCDIC-ES-A",                RTL_TEXTENCODING_DONTKNOW },
324
    { "EBCDIC-ES-S",                RTL_TEXTENCODING_DONTKNOW },
325
    { "EBCDIC-FI-SE",               RTL_TEXTENCODING_DONTKNOW },
326
    { "EBCDIC-FI-SE-A",             RTL_TEXTENCODING_DONTKNOW },
327
    { "EBCDIC-FR",                  RTL_TEXTENCODING_DONTKNOW },
328
    { "EBCDIC-IS-FRISS",            RTL_TEXTENCODING_DONTKNOW },    /*  FRISS */
329
    { "EBCDIC-IT",                  RTL_TEXTENCODING_DONTKNOW },
330
    { "EBCDIC-PT",                  RTL_TEXTENCODING_DONTKNOW },
331
    { "EBCDIC-UK",                  RTL_TEXTENCODING_DONTKNOW },
332
    { "EBCDIC-US",                  RTL_TEXTENCODING_DONTKNOW },
333
    { "ECMA-CYRILLIC",              RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-111 */
334
    { "ES",                         RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-17 */
335
    { "ES2",                        RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-85 */
336
    { "EUC-JP",                     RTL_TEXTENCODING_EUC_JP },      /* locale: ja_JP.eucjp */
337
    { "EUC-KR",                     RTL_TEXTENCODING_EUC_KR },      /* locale: ko_KR.euckr */
338
    { "EUC-TW",                     RTL_TEXTENCODING_EUC_TW },      /* locale: zh_TW.euctw */
339
    { "GB18030",                    RTL_TEXTENCODING_GB_18030 },    /* locale: zh_CN.gb18030 */
340
    { "GB2312",                     RTL_TEXTENCODING_GB_2312 },     /* locale: zh_CN */
341
    { "GB_1988-80",                 RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-57 */
342
    { "GBK",                        RTL_TEXTENCODING_GBK },         /* locale: zh_CN.GBK */
343
    { "GOST_19768-74",              RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-153 */
344
    { "GREEK-CCITT",                RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-150 */
345
    { "GREEK7",                     RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-88 */
346
    { "GREEK7-OLD",                 RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-18 */
347
    { "HP-ROMAN8",                  RTL_TEXTENCODING_DONTKNOW },    /* ROMAN8 R8 */
348
    { "IBM037",                     RTL_TEXTENCODING_DONTKNOW },    /* EBCDIC-[US|CA|WT] */
349
    { "IBM038",                     RTL_TEXTENCODING_DONTKNOW },    /* EBCDIC-INT CP038 */
350
    { "IBM1004",                    RTL_TEXTENCODING_DONTKNOW },    /* CP1004 OS2LATIN1 */
351
    { "IBM1026",                    RTL_TEXTENCODING_DONTKNOW },    /* CP1026 1026 */
352
    { "IBM1047",                    RTL_TEXTENCODING_DONTKNOW },    /* CP1047 1047 */
353
    { "IBM256",                     RTL_TEXTENCODING_DONTKNOW },    /* EBCDIC-INT1 */
354
    { "IBM273",                     RTL_TEXTENCODING_DONTKNOW },    /* CP273 */
355
    { "IBM274",                     RTL_TEXTENCODING_DONTKNOW },    /* EBCDIC-BE CP274 */
356
    { "IBM275",                     RTL_TEXTENCODING_DONTKNOW },    /* EBCDIC-BR CP275 */
357
    { "IBM277",                     RTL_TEXTENCODING_DONTKNOW },    /* EBCDIC-CP-[DK|NO] */
358
    { "IBM278",                     RTL_TEXTENCODING_DONTKNOW },    /* EBCDIC-CP-[FISE]*/
359
    { "IBM280",                     RTL_TEXTENCODING_DONTKNOW },    /* CP280 EBCDIC-CP-IT*/
360
    { "IBM281",                     RTL_TEXTENCODING_DONTKNOW },    /* EBCDIC-JP-E CP281 */
361
    { "IBM284",                     RTL_TEXTENCODING_DONTKNOW },    /* CP284 EBCDIC-CP-ES */
362
    { "IBM285",                     RTL_TEXTENCODING_DONTKNOW },    /* CP285 EBCDIC-CP-GB */
363
    { "IBM290",                     RTL_TEXTENCODING_DONTKNOW },    /* EBCDIC-JP-KANA */
364
    { "IBM297",                     RTL_TEXTENCODING_DONTKNOW },    /* EBCDIC-CP-FR */
365
    { "IBM420",                     RTL_TEXTENCODING_DONTKNOW },    /* EBCDIC-CP-AR1 */
366
    { "IBM423",                     RTL_TEXTENCODING_DONTKNOW },    /* CP423 EBCDIC-CP-GR */
367
    { "IBM424",                     RTL_TEXTENCODING_DONTKNOW },    /* CP424 EBCDIC-CP-HE */
368
    { "IBM437",                     RTL_TEXTENCODING_IBM_437 },     /* CP437 437 */
369
    { "IBM500",                     RTL_TEXTENCODING_DONTKNOW },    /* EBCDIC-CP-[BE|CH] */
370
    { "IBM850",                     RTL_TEXTENCODING_IBM_850 },     /* CP850 850 */
371
    { "IBM851",                     RTL_TEXTENCODING_DONTKNOW },    /* CP851 851 */
372
    { "IBM852",                     RTL_TEXTENCODING_IBM_852 },     /* CP852 852 */
373
    { "IBM855",                     RTL_TEXTENCODING_IBM_855 },     /* CP855 855 */
374
    { "IBM857",                     RTL_TEXTENCODING_IBM_857 },     /* CP857 857 */
375
    { "IBM860",                     RTL_TEXTENCODING_IBM_860 },     /* CP860 860 */
376
    { "IBM861",                     RTL_TEXTENCODING_IBM_861 },     /* CP861 861 CP-IS */
377
    { "IBM862",                     RTL_TEXTENCODING_IBM_862 },     /* CP862 862 */
378
    { "IBM863",                     RTL_TEXTENCODING_IBM_863 },     /* CP863 863 */
379
    { "IBM864",                     RTL_TEXTENCODING_IBM_864 },     /* CP864 */
380
    { "IBM865",                     RTL_TEXTENCODING_IBM_865 },     /* CP865 865 */
381
    { "IBM866",                     RTL_TEXTENCODING_IBM_866 },     /* CP866 866 */
382
    { "IBM868",                     RTL_TEXTENCODING_DONTKNOW },    /* CP868 CP-AR */
383
    { "IBM869",                     RTL_TEXTENCODING_IBM_869 },     /* CP869 869 CP-GR */
384
    { "IBM870",                     RTL_TEXTENCODING_DONTKNOW },    /* EBCDIC-[ROECE|YU] */
385
    { "IBM871",                     RTL_TEXTENCODING_DONTKNOW },    /* CP871 EBCDIC-CP-IS */
386
    { "IBM875",                     RTL_TEXTENCODING_DONTKNOW },    /* CP875 EBCDIC-GREEK */
387
    { "IBM880",                     RTL_TEXTENCODING_DONTKNOW },    /* EBCDIC-CYRILLIC */
388
    { "IBM891",                     RTL_TEXTENCODING_DONTKNOW },    /* CP891 */
389
    { "IBM903",                     RTL_TEXTENCODING_DONTKNOW },    /* CP903 */
390
    { "IBM904",                     RTL_TEXTENCODING_DONTKNOW },    /* CP904 904 */
391
    { "IBM905",                     RTL_TEXTENCODING_DONTKNOW },    /* CP905 EBCDIC-CP-TR */
392
    { "IBM918",                     RTL_TEXTENCODING_DONTKNOW },    /* CP918 EBCDIC-AR2 */
393
    { "IEC_P27-1",                  RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-143 */
394
    { "INIS",                       RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-49 */
395
    { "INIS-8",                     RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-50 */
396
    { "INIS-CYRILLIC",              RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-51 */
397
    { "INVARIANT",                  RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-170 */
398
    { "ISO-8859-1",                 RTL_TEXTENCODING_ISO_8859_1 },  /* ISO-IR-100 CP819 */
399
    { "ISO-8859-10",                RTL_TEXTENCODING_ISO_8859_10 }, /* ISO-IR-157 LATIN6 */
400
    { "ISO-8859-13",                RTL_TEXTENCODING_ISO_8859_13 }, /* ISO-IR-179 LATIN7 */
401
    { "ISO-8859-14",                RTL_TEXTENCODING_ISO_8859_14 }, /* LATIN8 L8 */
402
    { "ISO-8859-15",                RTL_TEXTENCODING_ISO_8859_15 },
403
    { "ISO-8859-2",                 RTL_TEXTENCODING_ISO_8859_2 },  /* LATIN2 L2 */
404
    { "ISO-8859-3",                 RTL_TEXTENCODING_ISO_8859_3 },  /* LATIN3 L3 */
405
    { "ISO-8859-4",                 RTL_TEXTENCODING_ISO_8859_4 },  /* LATIN4 L4 */
406
    { "ISO-8859-5",                 RTL_TEXTENCODING_ISO_8859_5 },  /* CYRILLIC */
407
    { "ISO-8859-6",                 RTL_TEXTENCODING_ISO_8859_6 },  /* ECMA-114 ARABIC */
408
    { "ISO-8859-7",                 RTL_TEXTENCODING_ISO_8859_7 },  /* ECMA-118 GREEK8 */
409
    { "ISO-8859-8",                 RTL_TEXTENCODING_ISO_8859_8 },  /* ISO_8859-8 HEBREW */
410
    { "ISO-8859-9",                 RTL_TEXTENCODING_ISO_8859_9 },  /* ISO_8859-9 LATIN5 */
411
    { "ISO-IR-90",                  RTL_TEXTENCODING_DONTKNOW },    /* ISO_6937-2:1983 */
412
    { "ISO_10367-BOX",              RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-155 */
413
    { "ISO_2033-1983",              RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-98 E13B */
414
    { "ISO_5427",                   RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-37 KOI-7 */
415
    { "ISO_5427-EXT",               RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-54  */
416
    { "ISO_5428",                   RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-55 */
417
    { "ISO_646.BASIC",              RTL_TEXTENCODING_ASCII_US },    /* REF */
418
    { "ISO_646.IRV",                RTL_TEXTENCODING_ASCII_US },    /* ISO-IR-2 IRV */
419
    { "ISO_646.IRV:1983",           RTL_TEXTENCODING_ISO_8859_1 },  /* fake: ASCII_US, used for "C" locale*/
420
    { "ISO_6937",                   RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-156 ISO6937*/
421
    { "ISO_6937-2-25",              RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-152 */
422
    { "ISO_6937-2-ADD",             RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-142 */
423
    { "ISO_8859-SUPP",              RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-154 */
424
    { "IT",                         RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-15  */
425
    { "JIS_C6220-1969-JP",          RTL_TEXTENCODING_DONTKNOW },    /* KATAKANA X0201-7 */
426
    { "JIS_C6220-1969-RO",          RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-14 */
427
    { "JIS_C6229-1984-A",           RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-91 */
428
    { "JIS_C6229-1984-B",           RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-92 */
429
    { "JIS_C6229-1984-B-ADD",       RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-93 */
430
    { "JIS_C6229-1984-HAND",        RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-94 */
431
    { "JIS_C6229-1984-HAND-ADD",    RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-95 */
432
    { "JIS_C6229-1984-KANA",        RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-96 */
433
    { "JIS_X0201",                  RTL_TEXTENCODING_DONTKNOW },    /* X0201 */
434
    { "JUS_I.B1.002",               RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-141 */
435
    { "JUS_I.B1.003-MAC",           RTL_TEXTENCODING_DONTKNOW },    /* MACEDONIAN */
436
    { "JUS_I.B1.003-SERB",          RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-146 SERBIAN */
437
    { "KOI-8",                      RTL_TEXTENCODING_DONTKNOW },
438
    { "KOI8-R",                     RTL_TEXTENCODING_KOI8_R },
439
    { "KOI8-U",                     RTL_TEXTENCODING_KOI8_U },
440
    { "KSC5636",                    RTL_TEXTENCODING_DONTKNOW },    /* ISO646-KR */
441
    { "LATIN-GREEK",                RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-19 */
442
    { "LATIN-GREEK-1",              RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-27 */
443
    { "MAC-IS",                     RTL_TEXTENCODING_APPLE_ROMAN },
444
    { "MAC-UK",                     RTL_TEXTENCODING_APPLE_ROMAN },
445
    { "MACINTOSH",                  RTL_TEXTENCODING_APPLE_ROMAN }, /* MAC */
446
    { "MSZ_7795.3",                 RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-86 */
447
    { "NATS-DANO",                  RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-9-1 */
448
    { "NATS-DANO-ADD",              RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-9-2 */
449
    { "NATS-SEFI",                  RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-8-1 */
450
    { "NATS-SEFI-ADD",              RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-8-2 */
451
    { "NC_NC00-10",                 RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-151 */
452
    { "NEXTSTEP",                   RTL_TEXTENCODING_DONTKNOW },    /* NEXT */
453
    { "NF_Z_62-010",                RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-69 */
454
    { "NF_Z_62-010_(1973)",         RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-25 */
455
    { "NS_4551-1",                  RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-60 */
456
    { "NS_4551-2",                  RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-61 */
457
    { "PT",                         RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-16 */
458
    { "PT2",                        RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-84 */
459
    { "SAMI",                       RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-158 */
460
    { "SEN_850200_B",               RTL_TEXTENCODING_DONTKNOW },    /* ISO646-[FI|SE] */
461
    { "SEN_850200_C",               RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-11 */
462
    { "T.101-G2",                   RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-128 */
463
    { "T.61-7BIT",                  RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-102 */
464
    { "T.61-8BIT",                  RTL_TEXTENCODING_DONTKNOW },    /* T.61 ISO-IR-103 */
465
    { "TIS-620",                    RTL_TEXTENCODING_MS_874 },     /* locale: th_TH */
466
    { "UTF-8",                      RTL_TEXTENCODING_UTF8 },        /* ISO-10646/UTF-8 */
467
    { "VIDEOTEX-SUPPL",             RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-70 */
468
    { "WIN-SAMI-2",                 RTL_TEXTENCODING_DONTKNOW }     /* WS2 */
469
};
470
471
#elif defined(FREEBSD) || defined(DRAGONFLY)
472
473
static const Pair nl_language_list[] = {
474
    { "ASCII",         RTL_TEXTENCODING_ASCII_US       }, /* US-ASCII */
475
    { "BIG5",          RTL_TEXTENCODING_BIG5           }, /* China - Traditional Chinese */
476
    { "CP1251",        RTL_TEXTENCODING_MS_1251        }, /* MS-CYRL */
477
    { "CP866",         RTL_TEXTENCODING_IBM_866        }, /* CP866 866 */
478
    { "EUCCN",         RTL_TEXTENCODING_EUC_CN         }, /* China - Simplified Chinese */
479
    { "EUCJP",         RTL_TEXTENCODING_EUC_JP         }, /* Japan */
480
    { "EUCKR",         RTL_TEXTENCODING_EUC_KR         }, /* Korea */
481
    { "ISO8859-1",     RTL_TEXTENCODING_ISO_8859_1     }, /* Western */
482
    { "ISO8859-15",    RTL_TEXTENCODING_ISO_8859_15    }, /* Western Updated (w/Euro sign) */
483
    { "ISO8859-2",     RTL_TEXTENCODING_ISO_8859_2     }, /* Central European */
484
    { "ISO8859-4",     RTL_TEXTENCODING_ISO_8859_4     }, /* LATIN4 L4 */
485
    { "ISO8859-5",     RTL_TEXTENCODING_ISO_8859_5     }, /* Cyrillic */
486
    { "ISO8859-7",     RTL_TEXTENCODING_ISO_8859_7     }, /* Greek */
487
    { "ISO8859-9",     RTL_TEXTENCODING_ISO_8859_9     }, /* Turkish */
488
    { "KOI8-R",        RTL_TEXTENCODING_KOI8_R         }, /* KOI8-R */
489
    { "KOI8-U",        RTL_TEXTENCODING_KOI8_U         }, /* KOI8-U */
490
    { "SJIS",          RTL_TEXTENCODING_SHIFT_JIS      }, /* Japan */
491
    { "US-ASCII",      RTL_TEXTENCODING_ASCII_US       }, /* US-ASCII */
492
    { "UTF-8",         RTL_TEXTENCODING_UTF8           }  /* ISO-10646/UTF-8 */
493
};
494
495
#elif defined(NETBSD)
496
497
static const Pair nl_language_list[] = {
498
    { "ASCII",         RTL_TEXTENCODING_ASCII_US       }, /* US-ASCII */
499
    { "BIG5",          RTL_TEXTENCODING_BIG5           }, /* China - Traditional Chinese */
500
    { "Big5",          RTL_TEXTENCODING_BIG5           }, /* China - Traditional Chinese */
501
    { "Big5-HKSCS",    RTL_TEXTENCODING_BIG5_HKSCS     }, /* locale: zh_CN.BIG5HK */
502
    { "Big5HKSCS",     RTL_TEXTENCODING_BIG5_HKSCS     }, /* deprecated */
503
    { "CP1251",        RTL_TEXTENCODING_MS_1251        }, /* MS-CYRL */
504
    { "CP866",         RTL_TEXTENCODING_IBM_866        }, /* CP866 866 */
505
    { "CTEXT",         RTL_TEXTENCODING_ASCII_US       }, /* US-ASCII */
506
    { "eucCN",         RTL_TEXTENCODING_EUC_CN         }, /* China - Simplified Chinese */
507
    { "eucJP",         RTL_TEXTENCODING_EUC_JP         }, /* Japan */
508
    { "eucKR",         RTL_TEXTENCODING_EUC_KR         }, /* Korea */
509
    { "eucTW",         RTL_TEXTENCODING_EUC_TW         }, /* China - Traditional Chinese */
510
    { "GB18030",       RTL_TEXTENCODING_GB_18030       }, /* locale: zh_CN.gb18030 */
511
    { "GB2312",        RTL_TEXTENCODING_GB_2312        }, /* locale: zh_CN */
512
    { "ISO-2022-JP",   RTL_TEXTENCODING_DONTKNOW       }, /* */
513
    { "ISO-2022-JP-2", RTL_TEXTENCODING_DONTKNOW       }, /* */
514
    { "ISO8859-1",     RTL_TEXTENCODING_ISO_8859_1     }, /* Western */
515
    { "ISO8859-13",    RTL_TEXTENCODING_ISO_8859_13    }, /* ISO-IR-179 LATIN7 */
516
    { "ISO8859-15",    RTL_TEXTENCODING_ISO_8859_15    }, /* Western Updated (w/Euro sign) */
517
    { "ISO8859-2",     RTL_TEXTENCODING_ISO_8859_2     }, /* Central European */
518
    { "ISO8859-4",     RTL_TEXTENCODING_ISO_8859_4     }, /* LATIN4 L4 */
519
    { "ISO8859-5",     RTL_TEXTENCODING_ISO_8859_5     }, /* Cyrillic */
520
    { "ISO8859-7",     RTL_TEXTENCODING_ISO_8859_7     }, /* Greek */
521
    { "ISO8859-9",     RTL_TEXTENCODING_ISO_8859_9     }, /* Turkish */
522
    { "KOI8-R",        RTL_TEXTENCODING_KOI8_R         }, /* KOI8-R */
523
    { "KOI8-U",        RTL_TEXTENCODING_KOI8_U         }, /* KOI8-U */
524
    { "PT154",         RTL_TEXTENCODING_PT154          }, /* */
525
    { "SJIS",          RTL_TEXTENCODING_SHIFT_JIS      }, /* Japan */
526
    { "US-ASCII",      RTL_TEXTENCODING_ASCII_US       }, /* US-ASCII */
527
    { "UTF-8",         RTL_TEXTENCODING_UTF8           }  /* ISO-10646/UTF-8 */
528
};
529
530
#elif defined(OPENBSD)
531
532
static const Pair nl_language_list[] = {
533
    { "ASCII",         RTL_TEXTENCODING_ASCII_US       }, /* US-ASCII */
534
    { "BIG5",          RTL_TEXTENCODING_BIG5           }, /* China - Traditional Chinese */
535
    { "CP1251",        RTL_TEXTENCODING_MS_1251        }, /* MS-CYRL */
536
    { "CP866",         RTL_TEXTENCODING_IBM_866        }, /* CP866 866 */
537
    { "EUCCN",         RTL_TEXTENCODING_EUC_CN         }, /* China - Simplified Chinese */
538
    { "EUCJP",         RTL_TEXTENCODING_EUC_JP         }, /* Japan */
539
    { "EUCKR",         RTL_TEXTENCODING_EUC_KR         }, /* Korea */
540
    { "ISO8859-1",     RTL_TEXTENCODING_ISO_8859_1     }, /* Western */
541
    { "ISO8859-15",    RTL_TEXTENCODING_ISO_8859_15    }, /* Western Updated (w/Euro sign) */
542
    { "ISO8859-2",     RTL_TEXTENCODING_ISO_8859_2     }, /* Central European */
543
    { "ISO8859-4",     RTL_TEXTENCODING_ISO_8859_4     }, /* LATIN4 L4 */
544
    { "ISO8859-5",     RTL_TEXTENCODING_ISO_8859_5     }, /* Cyrillic */
545
    { "ISO8859-7",     RTL_TEXTENCODING_ISO_8859_7     }, /* Greek */
546
    { "ISO8859-9",     RTL_TEXTENCODING_ISO_8859_9     }, /* Turkish */
547
    { "KOI8-R",        RTL_TEXTENCODING_KOI8_R         }, /* KOI8-R */
548
    { "KOI8-U",        RTL_TEXTENCODING_KOI8_U         }, /* KOI8-U */
549
    { "SJIS",          RTL_TEXTENCODING_SHIFT_JIS      }, /* Japan */
550
    { "US-ASCII",      RTL_TEXTENCODING_ASCII_US       }, /* US-ASCII */
551
    { "UTF-8",         RTL_TEXTENCODING_UTF8           }  /* ISO-10646/UTF-8 */
552
};
553
554
#else
555
#error Unhandled individual LO_COMMON_NLS_ARCHS
556
#endif // individual common NLS archs
557
558
/*****************************************************************************
559
 return the text encoding corresponding to the given locale
560
 *****************************************************************************/
561
562
rtl_TextEncoding osl_getTextEncodingFromLocale( rtl_Locale * pLocale )
563
16.5k
{
564
16.5k
    const Pair *language=nullptr;
565
566
16.5k
    char  locale_buf[64] = "";
567
16.5k
    char  codeset_buf[64];
568
569
16.5k
    char *codeset      = nullptr;
570
571
    /* default to process locale if pLocale == NULL */
572
16.5k
    if( pLocale == nullptr )
573
16.5k
        osl_getProcessLocale( &pLocale );
574
575
    /* convert rtl_Locale to locale string */
576
16.5k
    compose_locale( pLocale, locale_buf, 64 );
577
578
16.5k
    locale_t ctype_locale = newlocale(
579
16.5k
        LC_CTYPE_MASK, locale_buf, static_cast<locale_t>(0));
580
16.5k
    if (ctype_locale == static_cast<locale_t>(0))
581
0
    {
582
0
        return RTL_TEXTENCODING_DONTKNOW;
583
0
    }
584
585
    /* get the charset as indicated by the LC_CTYPE locale */
586
#if defined(NETBSD) && !defined(CODESET)
587
    codeset = NULL;
588
#else
589
16.5k
    codeset = nl_langinfo_l(CODESET, ctype_locale);
590
        // per SUSv4, the return value of nl_langinfo_l can be invalidated by a
591
        // subsequent call to nl_langinfo (not nl_langinfo_l) in any thread, but
592
        // we cannot guard against that (at least, no code in LO itself should
593
        // call nl_langinfo)
594
16.5k
#endif
595
596
16.5k
    if ( codeset != nullptr )
597
16.5k
    {
598
        /* get codeset into mt save memory */
599
16.5k
        strncpy( codeset_buf, codeset, sizeof(codeset_buf) );
600
16.5k
        codeset_buf[sizeof(codeset_buf) - 1] = 0;
601
16.5k
        codeset = codeset_buf;
602
16.5k
    }
603
604
16.5k
    freelocale(ctype_locale);
605
606
    /* search the codeset in our language list */
607
16.5k
    if ( codeset != nullptr )
608
16.5k
    {
609
16.5k
        language = pair_search (codeset, nl_language_list, SAL_N_ELEMENTS( nl_language_list ) );
610
16.5k
    }
611
612
16.5k
    OSL_ASSERT( language && ( RTL_TEXTENCODING_DONTKNOW != language->value ) );
613
614
    /* a matching item in our list provides a mapping from codeset to
615
     * rtl-codeset */
616
16.5k
    if ( language != nullptr )
617
16.5k
        return language->value;
618
619
0
    return RTL_TEXTENCODING_DONTKNOW;
620
16.5k
}
621
622
/*****************************************************************************
623
 return the current process locale
624
 *****************************************************************************/
625
626
void imp_getProcessLocale( rtl_Locale ** ppLocale )
627
107
{
628
107
    char const * locale = getenv("LC_ALL");
629
107
    if (locale == nullptr || *locale == '\0') {
630
107
        locale = getenv("LC_CTYPE");
631
107
        if (locale == nullptr || *locale == '\0') {
632
107
            locale = getenv("LANG");
633
107
            if (locale == nullptr || *locale == '\0') {
634
107
                locale = "C";
635
107
            }
636
107
        }
637
107
    }
638
    // coverity[overrun-buffer-val : FALSE] - coverity gets this very wrong
639
107
    *ppLocale = parse_locale(locale);
640
107
}
641
642
#else // !LO_COMMON_NLS_ARCHS
643
644
/*
645
 * This implementation of osl_getTextEncodingFromLocale maps
646
 * from the ISO language codes.
647
 */
648
649
const Pair full_locale_list[] = {
650
    { "ja_JP.eucJP",  RTL_TEXTENCODING_EUC_JP      },
651
    { "ja_JP.EUC",    RTL_TEXTENCODING_EUC_JP      },
652
    { "ko_KR.EUC",    RTL_TEXTENCODING_EUC_KR      },
653
    { "zh_CN.EUC",    RTL_TEXTENCODING_EUC_CN      },
654
    { "zh_TW.EUC",    RTL_TEXTENCODING_EUC_TW      }
655
};
656
657
const Pair locale_extension_list[] = {
658
    { "big5",         RTL_TEXTENCODING_BIG5        },
659
    { "big5hk",       RTL_TEXTENCODING_BIG5_HKSCS  },
660
    { "gb18030",      RTL_TEXTENCODING_GB_18030    },
661
    { "euc",          RTL_TEXTENCODING_EUC_JP      },
662
    { "iso8859-1",    RTL_TEXTENCODING_ISO_8859_1  },
663
    { "iso8859-10",   RTL_TEXTENCODING_ISO_8859_10 },
664
    { "iso8859-13",   RTL_TEXTENCODING_ISO_8859_13 },
665
    { "iso8859-14",   RTL_TEXTENCODING_ISO_8859_14 },
666
    { "iso8859-15",   RTL_TEXTENCODING_ISO_8859_15 },
667
    { "iso8859-2",    RTL_TEXTENCODING_ISO_8859_2  },
668
    { "iso8859-3",    RTL_TEXTENCODING_ISO_8859_3  },
669
    { "iso8859-4",    RTL_TEXTENCODING_ISO_8859_4  },
670
    { "iso8859-5",    RTL_TEXTENCODING_ISO_8859_5  },
671
    { "iso8859-6",    RTL_TEXTENCODING_ISO_8859_6  },
672
    { "iso8859-7",    RTL_TEXTENCODING_ISO_8859_7  },
673
    { "iso8859-8",    RTL_TEXTENCODING_ISO_8859_8  },
674
    { "iso8859-9",    RTL_TEXTENCODING_ISO_8859_9  },
675
    { "koi8-r",       RTL_TEXTENCODING_KOI8_R      },
676
    { "koi8-u",       RTL_TEXTENCODING_KOI8_U      },
677
    { "pck",          RTL_TEXTENCODING_MS_932      },
678
#if (0)
679
    { "sun_eu_greek", RTL_TEXTENCODING_DONTKNOW    },
680
#endif
681
    { "utf-16",       RTL_TEXTENCODING_UNICODE     },
682
    { "utf-7",        RTL_TEXTENCODING_UTF7        },
683
    { "utf-8",        RTL_TEXTENCODING_UTF8        }
684
};
685
686
const Pair iso_language_list[] = {
687
    { "af",  RTL_TEXTENCODING_ISO_8859_1 },
688
    { "ar",  RTL_TEXTENCODING_ISO_8859_6 },
689
    { "az",  RTL_TEXTENCODING_ISO_8859_9 },
690
    { "be",  RTL_TEXTENCODING_ISO_8859_5 },
691
    { "bg",  RTL_TEXTENCODING_ISO_8859_5 },
692
    { "ca",  RTL_TEXTENCODING_ISO_8859_1 },
693
    { "cs",  RTL_TEXTENCODING_ISO_8859_2 },
694
    { "da",  RTL_TEXTENCODING_ISO_8859_1 },
695
    { "de",  RTL_TEXTENCODING_ISO_8859_1 },
696
    { "el",  RTL_TEXTENCODING_ISO_8859_7 },
697
    { "en",  RTL_TEXTENCODING_ISO_8859_1 },
698
    { "es",  RTL_TEXTENCODING_ISO_8859_1 },
699
    { "et",  RTL_TEXTENCODING_ISO_8859_4 },
700
    { "eu",  RTL_TEXTENCODING_ISO_8859_1 },
701
    { "fa",  RTL_TEXTENCODING_ISO_8859_6 },
702
    { "fi",  RTL_TEXTENCODING_ISO_8859_1 },
703
    { "fo",  RTL_TEXTENCODING_ISO_8859_1 },
704
    { "fr",  RTL_TEXTENCODING_ISO_8859_1 },
705
    { "gr",  RTL_TEXTENCODING_ISO_8859_7 },
706
    { "he",  RTL_TEXTENCODING_ISO_8859_8 },
707
    { "hi",  RTL_TEXTENCODING_DONTKNOW },
708
    { "hr",  RTL_TEXTENCODING_ISO_8859_2 },
709
    { "hu",  RTL_TEXTENCODING_ISO_8859_2 },
710
    { "hy",  RTL_TEXTENCODING_DONTKNOW },
711
    { "id",  RTL_TEXTENCODING_ISO_8859_1 },
712
    { "is",  RTL_TEXTENCODING_ISO_8859_1 },
713
    { "it",  RTL_TEXTENCODING_ISO_8859_1 },
714
    { "iw",  RTL_TEXTENCODING_ISO_8859_8 },
715
    { "ja",  RTL_TEXTENCODING_EUC_JP },
716
    { "ka",  RTL_TEXTENCODING_DONTKNOW },
717
    { "kk",  RTL_TEXTENCODING_ISO_8859_5 },
718
    { "ko",  RTL_TEXTENCODING_EUC_KR },
719
    { "lt",  RTL_TEXTENCODING_ISO_8859_4 },
720
    { "lv",  RTL_TEXTENCODING_ISO_8859_4 },
721
    { "mk",  RTL_TEXTENCODING_ISO_8859_5 },
722
    { "mr",  RTL_TEXTENCODING_DONTKNOW },
723
    { "ms",  RTL_TEXTENCODING_ISO_8859_1 },
724
    { "nl",  RTL_TEXTENCODING_ISO_8859_1 },
725
    { "no",  RTL_TEXTENCODING_ISO_8859_1 },
726
    { "pl",  RTL_TEXTENCODING_ISO_8859_2 },
727
    { "pt",  RTL_TEXTENCODING_ISO_8859_1 },
728
    { "ro",  RTL_TEXTENCODING_ISO_8859_2 },
729
    { "ru",  RTL_TEXTENCODING_ISO_8859_5 },
730
    { "sa",  RTL_TEXTENCODING_DONTKNOW },
731
    { "sk",  RTL_TEXTENCODING_ISO_8859_2 },
732
    { "sl",  RTL_TEXTENCODING_ISO_8859_2 },
733
    { "sq",  RTL_TEXTENCODING_ISO_8859_2 },
734
    { "sv",  RTL_TEXTENCODING_ISO_8859_1 },
735
    { "sw",  RTL_TEXTENCODING_ISO_8859_1 },
736
    { "ta",  RTL_TEXTENCODING_DONTKNOW },
737
    { "th",  RTL_TEXTENCODING_DONTKNOW },
738
    { "tr",  RTL_TEXTENCODING_ISO_8859_9 },
739
    { "tt",  RTL_TEXTENCODING_ISO_8859_5 },
740
    { "uk",  RTL_TEXTENCODING_ISO_8859_5 },
741
    { "ur",  RTL_TEXTENCODING_ISO_8859_6 },
742
    { "uz",  RTL_TEXTENCODING_ISO_8859_9 },
743
    { "vi",  RTL_TEXTENCODING_DONTKNOW },
744
    { "zh",  RTL_TEXTENCODING_BIG5 }
745
};
746
747
/*****************************************************************************
748
 return the text encoding corresponding to the given locale
749
 *****************************************************************************/
750
751
rtl_TextEncoding osl_getTextEncodingFromLocale( rtl_Locale * pLocale )
752
{
753
    const Pair *language = nullptr;
754
    char locale_buf[64] = "";
755
756
    /* default to process locale if pLocale == NULL */
757
    if( nullptr == pLocale )
758
        osl_getProcessLocale( &pLocale );
759
760
    /* convert rtl_Locale to locale string */
761
    if( compose_locale( pLocale, locale_buf, 64 ) )
762
    {
763
        /* check special handling list (EUC) first */
764
        language = pair_search( locale_buf, full_locale_list, SAL_N_ELEMENTS( full_locale_list ) );
765
766
        if( nullptr == language )
767
        {
768
            /*
769
             *  check if there is a charset qualifier at the end of the given locale string
770
             *  e.g. de.ISO8859-15 or de.ISO8859-15@euro which strongly indicates what
771
             *  charset to use
772
             */
773
            char* cp = strrchr( locale_buf, '.' );
774
775
            if( nullptr != cp )
776
            {
777
                language = pair_search( cp + 1, locale_extension_list, SAL_N_ELEMENTS( locale_extension_list ) );
778
            }
779
        }
780
781
        /* use iso language code to determine the charset */
782
        if( nullptr == language )
783
        {
784
            /* iso lang codes have 2 characters */
785
            locale_buf[2] = '\0';
786
787
            language = pair_search( locale_buf, iso_language_list, SAL_N_ELEMENTS( iso_language_list ) );
788
        }
789
    }
790
791
    /* a matching item in our list provides a mapping from codeset to
792
     * rtl-codeset */
793
    if ( language != nullptr )
794
        return language->value;
795
796
    return RTL_TEXTENCODING_DONTKNOW;
797
}
798
799
#if defined(MACOSX) || defined(IOS)
800
801
/*****************************************************************************
802
 return the current process locale
803
 *****************************************************************************/
804
805
void imp_getProcessLocale( rtl_Locale ** ppLocale )
806
{
807
    OUString loc16(macosx_getLocale());
808
    OString locale;
809
    if (!loc16.convertToString(
810
            &locale, RTL_TEXTENCODING_UTF8,
811
            (RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
812
             | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR)))
813
    {
814
        SAL_INFO("sal.osl", "Cannot convert \"" << loc16 << "\" to UTF-8");
815
    }
816
817
    /* handle the case where OS specific method of finding locale fails */
818
    if ( locale.isEmpty() )
819
    {
820
        /* simulate behavior of setlocale */
821
        locale = getenv( "LC_ALL" );
822
823
        if( locale.isEmpty() )
824
            locale = getenv( "LC_CTYPE" );
825
826
        if( locale.isEmpty() )
827
            locale = getenv( "LANG" );
828
829
        if( locale.isEmpty() )
830
            locale = "C"_ostr;
831
    }
832
833
    /* return the locale */
834
    *ppLocale = parse_locale( locale.getStr() );
835
}
836
837
#else // !MACOSX && !IOS
838
839
/*****************************************************************************
840
 return the current process locale
841
 *****************************************************************************/
842
843
void imp_getProcessLocale( rtl_Locale ** ppLocale )
844
{
845
#ifdef ANDROID
846
    /* No locale environment variables on Android, so why even bother
847
     * with getenv().
848
     */
849
    const char* locale = "en-US.UTF-8";
850
#else
851
    /* simulate behavior off setlocale */
852
    const char* locale = getenv("LC_ALL");
853
854
    if( NULL == locale )
855
        locale = getenv( "LC_CTYPE" );
856
857
    if( NULL == locale )
858
        locale = getenv( "LANG" );
859
860
    if( NULL == locale )
861
        locale = "C";
862
863
#endif
864
    *ppLocale = parse_locale( locale );
865
}
866
867
#endif // !MACOSX && !IOS
868
#endif // !LO_COMMON_NLS_ARCHS
869
870
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */