/src/libreoffice/unotools/source/misc/wincodepage.cxx
Line | Count | Source |
1 | | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
2 | | /* |
3 | | * This file is part of the LibreOffice project. |
4 | | * |
5 | | * This Source Code Form is subject to the terms of the Mozilla Public |
6 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
7 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8 | | */ |
9 | | |
10 | | #include <sal/config.h> |
11 | | |
12 | | #include <string_view> |
13 | | |
14 | | #include <unotools/wincodepage.hxx> |
15 | | #include <rtl/textenc.h> |
16 | | |
17 | | namespace{ |
18 | | |
19 | | struct LangEncodingDef |
20 | | { |
21 | | const std::u16string_view msLangStr; |
22 | | rtl_TextEncoding meTextEncoding; |
23 | | }; |
24 | | |
25 | | // See https://msdn.microsoft.com/en-us/library/windows/desktop/dd317756 |
26 | | rtl_TextEncoding impl_getWinTextEncodingFromLangStrANSI(const OUString& sLanguage) |
27 | 14.6k | { |
28 | 14.6k | static constexpr LangEncodingDef aLanguageTab[] = |
29 | 14.6k | { |
30 | 14.6k | { u"en", RTL_TEXTENCODING_MS_1252 }, // Most used -> first in list |
31 | 14.6k | { u"th", RTL_TEXTENCODING_MS_874 }, |
32 | 14.6k | { u"ja", RTL_TEXTENCODING_MS_932 }, |
33 | 14.6k | { u"zh-cn", RTL_TEXTENCODING_MS_936 }, // Chinese (simplified) - must go before "zh" |
34 | 14.6k | { u"ko", RTL_TEXTENCODING_MS_949 }, |
35 | 14.6k | { u"zh", RTL_TEXTENCODING_MS_950 }, // Chinese (traditional) |
36 | 14.6k | { u"bs", RTL_TEXTENCODING_MS_1250 }, |
37 | 14.6k | { u"cs", RTL_TEXTENCODING_MS_1250 }, |
38 | 14.6k | { u"hr", RTL_TEXTENCODING_MS_1250 }, |
39 | 14.6k | { u"hu", RTL_TEXTENCODING_MS_1250 }, |
40 | 14.6k | { u"pl", RTL_TEXTENCODING_MS_1250 }, |
41 | 14.6k | { u"ro", RTL_TEXTENCODING_MS_1250 }, |
42 | 14.6k | { u"sk", RTL_TEXTENCODING_MS_1250 }, |
43 | 14.6k | { u"sl", RTL_TEXTENCODING_MS_1250 }, |
44 | | // { "sr", RTL_TEXTENCODING_MS_1250 }, |
45 | 14.6k | { u"sq", RTL_TEXTENCODING_MS_1250 }, |
46 | 14.6k | { u"be", RTL_TEXTENCODING_MS_1251 }, |
47 | 14.6k | { u"bg", RTL_TEXTENCODING_MS_1251 }, |
48 | 14.6k | { u"mk", RTL_TEXTENCODING_MS_1251 }, |
49 | 14.6k | { u"ru", RTL_TEXTENCODING_MS_1251 }, |
50 | 14.6k | { u"sr", RTL_TEXTENCODING_MS_1251 }, |
51 | 14.6k | { u"uk", RTL_TEXTENCODING_MS_1251 }, |
52 | 14.6k | { u"es", RTL_TEXTENCODING_MS_1252 }, |
53 | 14.6k | { u"el", RTL_TEXTENCODING_MS_1253 }, |
54 | 14.6k | { u"tr", RTL_TEXTENCODING_MS_1254 }, |
55 | 14.6k | { u"he", RTL_TEXTENCODING_MS_1255 }, |
56 | 14.6k | { u"ar", RTL_TEXTENCODING_MS_1256 }, |
57 | 14.6k | { u"et", RTL_TEXTENCODING_MS_1257 }, |
58 | 14.6k | { u"lt", RTL_TEXTENCODING_MS_1257 }, |
59 | 14.6k | { u"lv", RTL_TEXTENCODING_MS_1257 }, |
60 | 14.6k | { u"vi", RTL_TEXTENCODING_MS_1258 }, |
61 | 14.6k | }; |
62 | | |
63 | 14.6k | for (auto& def : aLanguageTab) |
64 | 14.6k | { |
65 | 14.6k | if (sLanguage.startsWithIgnoreAsciiCase(def.msLangStr)) |
66 | 14.6k | return def.meTextEncoding; |
67 | 14.6k | } |
68 | | |
69 | 0 | return RTL_TEXTENCODING_MS_1252; |
70 | 14.6k | } |
71 | | |
72 | | /* ----------------------------------------------------------------------- */ |
73 | | |
74 | | // See https://msdn.microsoft.com/en-us/library/windows/desktop/dd317756 |
75 | | // See http://shapelib.maptools.org/codepage.html |
76 | | rtl_TextEncoding impl_getWinTextEncodingFromLangStrOEM(const OUString& sLanguage) |
77 | 1.22k | { |
78 | 1.22k | static constexpr LangEncodingDef aLanguageTab[] = |
79 | 1.22k | { |
80 | 1.22k | { u"de", RTL_TEXTENCODING_IBM_437 }, // OEM United States |
81 | 1.22k | { u"en-us", RTL_TEXTENCODING_IBM_437 }, // OEM United States |
82 | 1.22k | { u"fi", RTL_TEXTENCODING_IBM_437 }, // OEM United States |
83 | 1.22k | { u"fr-ca", RTL_TEXTENCODING_IBM_863 }, // OEM French Canadian; French Canadian (DOS) |
84 | 1.22k | { u"fr", RTL_TEXTENCODING_IBM_437 }, // OEM United States |
85 | 1.22k | { u"it", RTL_TEXTENCODING_IBM_437 }, // OEM United States |
86 | 1.22k | { u"nl", RTL_TEXTENCODING_IBM_437 }, // OEM United States |
87 | 1.22k | { u"sv", RTL_TEXTENCODING_IBM_437 }, // OEM United States |
88 | 1.22k | { u"el", RTL_TEXTENCODING_IBM_737 }, // OEM Greek (formerly 437G); Greek (DOS) |
89 | 1.22k | { u"et", RTL_TEXTENCODING_IBM_775 }, // OEM Baltic; Baltic (DOS) |
90 | 1.22k | { u"lt", RTL_TEXTENCODING_IBM_775 }, // OEM Baltic; Baltic (DOS) |
91 | 1.22k | { u"lv", RTL_TEXTENCODING_IBM_775 }, // OEM Baltic; Baltic (DOS) |
92 | 1.22k | { u"en", RTL_TEXTENCODING_IBM_850 }, // OEM Multilingual Latin 1; Western European (DOS) |
93 | 1.22k | { u"bs", RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS) |
94 | 1.22k | { u"cs", RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS) |
95 | 1.22k | { u"hr", RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS) |
96 | 1.22k | { u"hu", RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS) |
97 | 1.22k | { u"pl", RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS) |
98 | 1.22k | { u"ro", RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS) |
99 | 1.22k | { u"sk", RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS) |
100 | 1.22k | { u"sl", RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS) |
101 | | // { "sr", RTL_TEXTENCODING_IBM_852 }, // OEM Latin 2; Central European (DOS) |
102 | 1.22k | { u"bg", RTL_TEXTENCODING_IBM_855 }, // OEM Cyrillic (primarily Russian) |
103 | 1.22k | { u"mk", RTL_TEXTENCODING_IBM_855 }, // OEM Cyrillic (primarily Russian) |
104 | 1.22k | { u"sr", RTL_TEXTENCODING_IBM_855 }, // OEM Cyrillic (primarily Russian) |
105 | 1.22k | { u"tr", RTL_TEXTENCODING_IBM_857 }, // OEM Turkish; Turkish (DOS) |
106 | 1.22k | { u"pt", RTL_TEXTENCODING_IBM_860 }, // OEM Portuguese; Portuguese (DOS) |
107 | 1.22k | { u"is", RTL_TEXTENCODING_IBM_861 }, // OEM Icelandic; Icelandic (DOS) |
108 | 1.22k | { u"he", RTL_TEXTENCODING_IBM_862 }, // OEM Hebrew; Hebrew (DOS) |
109 | 1.22k | { u"ar", RTL_TEXTENCODING_IBM_864 }, // OEM Arabic; Arabic (864) |
110 | 1.22k | { u"da", RTL_TEXTENCODING_IBM_865 }, // OEM Nordic; Nordic (DOS) |
111 | 1.22k | { u"nn", RTL_TEXTENCODING_IBM_865 }, // OEM Nordic; Nordic (DOS) |
112 | 1.22k | { u"be", RTL_TEXTENCODING_IBM_866 }, // OEM Russian; Cyrillic (DOS) |
113 | 1.22k | { u"ru", RTL_TEXTENCODING_IBM_866 }, // OEM Russian; Cyrillic (DOS) |
114 | 1.22k | { u"uk", RTL_TEXTENCODING_IBM_866 }, // OEM Russian; Cyrillic (DOS) |
115 | 1.22k | { u"th", RTL_TEXTENCODING_MS_874 }, // ANSI/OEM Thai (ISO 8859-11); Thai (Windows) |
116 | 1.22k | { u"ja", RTL_TEXTENCODING_MS_932 }, // ANSI/OEM Japanese; Japanese (Shift-JIS) |
117 | 1.22k | { u"zh-cn", RTL_TEXTENCODING_MS_936 }, // ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312) |
118 | 1.22k | { u"ko", RTL_TEXTENCODING_MS_949 }, // ANSI/OEM Korean (Unified Hangul Code) |
119 | 1.22k | { u"zh", RTL_TEXTENCODING_MS_950 }, // ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5) |
120 | 1.22k | { u"vi", RTL_TEXTENCODING_MS_1258 }, // ANSI/OEM Vietnamese; Vietnamese (Windows) |
121 | 1.22k | }; |
122 | | |
123 | 1.22k | for (auto& def : aLanguageTab) |
124 | 2.45k | { |
125 | 2.45k | if (sLanguage.startsWithIgnoreAsciiCase(def.msLangStr)) |
126 | 1.22k | return def.meTextEncoding; |
127 | 2.45k | } |
128 | | |
129 | 0 | return RTL_TEXTENCODING_IBM_850; |
130 | 1.22k | } |
131 | | |
132 | | } // namespace |
133 | | |
134 | | rtl_TextEncoding utl_getWinTextEncodingFromLangStr(const OUString& sLanguage, bool bOEM) |
135 | 15.8k | { |
136 | 15.8k | return bOEM ? |
137 | 1.22k | impl_getWinTextEncodingFromLangStrOEM(sLanguage) : |
138 | 15.8k | impl_getWinTextEncodingFromLangStrANSI(sLanguage); |
139 | 15.8k | } |
140 | | |
141 | | /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |