/src/libreoffice/sal/textenc/tables.cxx
Line | Count | Source |
1 | | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
2 | | /* |
3 | | * This file is part of the LibreOffice project. |
4 | | * |
5 | | * This Source Code Form is subject to the terms of the Mozilla Public |
6 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
7 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8 | | * |
9 | | * This file incorporates work covered by the following license notice: |
10 | | * |
11 | | * Licensed to the Apache Software Foundation (ASF) under one or more |
12 | | * contributor license agreements. See the NOTICE file distributed |
13 | | * with this work for additional information regarding copyright |
14 | | * ownership. The ASF licenses this file to you under the Apache |
15 | | * License, Version 2.0 (the "License"); you may not use this file |
16 | | * except in compliance with the License. You may obtain a copy of |
17 | | * the License at http://www.apache.org/licenses/LICENSE-2.0 . |
18 | | */ |
19 | | |
20 | | #include <config_locales.h> |
21 | | |
22 | | #include <sal/config.h> |
23 | | |
24 | | #include <cassert> |
25 | | #include <cstddef> |
26 | | #include <iterator> |
27 | | |
28 | | #include <rtl/textenc.h> |
29 | | #include <rtl/tencinfo.h> |
30 | | #include <sal/types.h> |
31 | | |
32 | | #define NOTABUNI_START 0xFF |
33 | | #define NOTABUNI_END 0x00 |
34 | | |
35 | | #define NOTABCHAR_START 0xFFFF |
36 | | #define NOTABCHAR_END 0x0000 |
37 | | |
38 | | #define SAME8090UNI_START 0x80 |
39 | | #define SAME8090UNI_END 0x9F |
40 | | sal_uInt16 const aImpl8090SameToUniTab[SAME8090UNI_END |
41 | | - SAME8090UNI_START |
42 | | + 1] |
43 | | = { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, /* 0x80 */ |
44 | | 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, |
45 | | 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, /* 0x90 */ |
46 | | 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F }; |
47 | | |
48 | | #define SAME8090CHAR_START 0x0080 |
49 | | #define SAME8090CHAR_END 0x009F |
50 | | unsigned char const aImpl8090SameToCharTab[SAME8090CHAR_END |
51 | | - SAME8090CHAR_START |
52 | | + 1] |
53 | | = { 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x0080 */ |
54 | | 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, |
55 | | 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x0090 */ |
56 | | 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F }; |
57 | | |
58 | | sal_uInt16 const aImplDoubleByteIdentifierTab[1] = { 0 }; |
59 | | |
60 | | #include "tcvtarb1.tab" |
61 | | #include "tcvteas1.tab" |
62 | | #include "tcvtest1.tab" |
63 | | #include "tcvtjp1.tab" |
64 | | #include "tcvtjp2.tab" |
65 | | #include "tcvtjp3.tab" |
66 | | #include "tcvtjp4.tab" |
67 | | #include "tcvtjp5.tab" |
68 | | #include "tcvtjp6.tab" |
69 | | #include "tcvtkr1.tab" |
70 | | #include "tcvtkr2.tab" |
71 | | #include "tcvtkr4.tab" |
72 | | #include "tcvtkr5.tab" |
73 | | #include "tcvtkr6.tab" |
74 | | #include "tcvtlat1.tab" |
75 | | #include "tcvtscn1.tab" |
76 | | #include "tcvtscn2.tab" |
77 | | #include "tcvtscn3.tab" |
78 | | #include "tcvtscn4.tab" |
79 | | #include "tcvtscn5.tab" |
80 | | #include "tcvtscn6.tab" |
81 | | #include "tcvtsym1.tab" |
82 | | #include "tcvttcn1.tab" |
83 | | #include "tcvttcn2.tab" |
84 | | #include "tcvttcn6.tab" |
85 | | #include "tcvtuni1.tab" |
86 | | |
87 | | #include "convertadobe.tab" |
88 | | #include "convertbig5hkscs.tab" |
89 | | #include "converteuctw.tab" |
90 | | #include "convertgb18030.tab" |
91 | | #include "convertisciidevangari.tab" |
92 | | #include "convertiso2022cn.tab" |
93 | | #include "convertiso2022jp.tab" |
94 | | #include "convertiso2022kr.tab" |
95 | | |
96 | | extern "C" SAL_DLLPUBLIC_EXPORT ImplTextEncodingData const * |
97 | | sal_getFullTextEncodingData( rtl_TextEncoding nEncoding ) |
98 | 10.6M | { |
99 | 10.6M | assert( |
100 | 10.6M | nEncoding != RTL_TEXTENCODING_ASCII_US && |
101 | 10.6M | nEncoding != RTL_TEXTENCODING_ISO_8859_1 && |
102 | 10.6M | nEncoding != RTL_TEXTENCODING_JAVA_UTF8 && |
103 | 10.6M | nEncoding != RTL_TEXTENCODING_MS_1252 && |
104 | 10.6M | nEncoding != RTL_TEXTENCODING_UTF8); |
105 | | // handled by Impl_getTextEncodingData |
106 | 10.6M | static ImplTextEncodingData const * const aData[] |
107 | 10.6M | = { nullptr, /* DONTKNOW */ |
108 | 10.6M | nullptr, /* MS_1252, see above */ |
109 | 10.6M | &aImplAPPLEROMANTextEncodingData, /* APPLE_ROMAN */ |
110 | 10.6M | &aImplIBM437TextEncodingData, /* IBM_437 */ |
111 | 10.6M | &aImplIBM850TextEncodingData, /* IBM_850 */ |
112 | 10.6M | &aImplIBM860TextEncodingData, /* IBM_860 */ |
113 | 10.6M | &aImplIBM861TextEncodingData, /* IBM_861 */ |
114 | 10.6M | &aImplIBM863TextEncodingData, /* IBM_863 */ |
115 | 10.6M | &aImplIBM865TextEncodingData, /* IBM_865 */ |
116 | 10.6M | nullptr, /* reserved (SYSTEM) */ |
117 | 10.6M | &aImplSYMBOLTextEncodingData, /* SYMBOL */ |
118 | 10.6M | nullptr, /* ASCII_US, see above */ |
119 | 10.6M | nullptr, /* ISO_8859_1, see above */ |
120 | 10.6M | &aImplISO88592TextEncodingData, /* ISO_8859_2 */ |
121 | 10.6M | &aImplISO88593TextEncodingData, /* ISO_8859_3 */ |
122 | 10.6M | &aImplISO88594TextEncodingData, /* ISO_8859_4 */ |
123 | 10.6M | &aImplISO88595TextEncodingData, /* ISO_8859_5 */ |
124 | 10.6M | &aImplISO88596TextEncodingData, /* ISO_8859_6 */ |
125 | 10.6M | &aImplISO88597TextEncodingData, /* ISO_8859_7 */ |
126 | 10.6M | &aImplISO88598TextEncodingData, /* ISO_8859_8 */ |
127 | 10.6M | &aImplISO88599TextEncodingData, /* ISO_8859_9 */ |
128 | 10.6M | &aImplISO885914TextEncodingData, /* ISO_8859_14 */ |
129 | 10.6M | &aImplISO885915TextEncodingData, /* ISO_8859_15 */ |
130 | 10.6M | &aImplIBM737TextEncodingData, /* IBM_737 */ |
131 | 10.6M | &aImplIBM775TextEncodingData, /* IBM_775 */ |
132 | 10.6M | &aImplIBM852TextEncodingData, /* IBM_852 */ |
133 | 10.6M | &aImplIBM855TextEncodingData, /* IBM_855 */ |
134 | 10.6M | &aImplIBM857TextEncodingData, /* IBM_857 */ |
135 | 10.6M | &aImplIBM862TextEncodingData, /* IBM_862 */ |
136 | 10.6M | &aImplIBM864TextEncodingData, /* IBM_864 */ |
137 | 10.6M | &aImplIBM866TextEncodingData, /* IBM_866 */ |
138 | 10.6M | &aImplIBM869TextEncodingData, /* IBM_869 */ |
139 | 10.6M | &aImplMS874TextEncodingData, /* MS_874 */ |
140 | 10.6M | &aImplMS1250TextEncodingData, /* MS_1250 */ |
141 | 10.6M | &aImplMS1251TextEncodingData, /* MS_1251 */ |
142 | 10.6M | &aImplMS1253TextEncodingData, /* MS_1253 */ |
143 | 10.6M | &aImplMS1254TextEncodingData, /* MS_1254 */ |
144 | 10.6M | &aImplMS1255TextEncodingData, /* MS_1255 */ |
145 | 10.6M | &aImplMS1256TextEncodingData, /* MS_1256 */ |
146 | 10.6M | &aImplMS1257TextEncodingData, /* MS_1257 */ |
147 | 10.6M | &aImplMS1258TextEncodingData, /* MS_1258 */ |
148 | 10.6M | nullptr, /* TODO! APPLE_ARABIC */ |
149 | 10.6M | &aImplAPPLECENTEUROTextEncodingData, /* APPLE_CENTEURO */ |
150 | 10.6M | &aImplAPPLECROATIANTextEncodingData, /* APPLE_CROATIAN */ |
151 | 10.6M | &aImplAPPLECYRILLICTextEncodingData, /* APPLE_CYRILLIC */ |
152 | 10.6M | nullptr, /* TODO! APPLE_DEVANAGARI */ |
153 | 10.6M | nullptr, /* TODO! APPLE_FARSI */ |
154 | 10.6M | &aImplAPPLEGREEKTextEncodingData, /* APPLE_GREEK */ |
155 | 10.6M | nullptr, /* TODO! APPLE_GUJARATI */ |
156 | 10.6M | nullptr, /* TODO! APPLE_GURMUKHI */ |
157 | 10.6M | nullptr, /* TODO! APPLE_HEBREW */ |
158 | 10.6M | &aImplAPPLEICELANDTextEncodingData, /* APPLE_ICELAND */ |
159 | 10.6M | &aImplAPPLEROMANIANTextEncodingData, /* APPLE_ROMANIAN */ |
160 | 10.6M | nullptr, /* TODO! APPLE_THAI */ |
161 | 10.6M | &aImplAPPLETURKISHTextEncodingData, /* APPLE_TURKISH */ |
162 | 10.6M | &aImplAPPLEUKRAINIANTextEncodingData, /* APPLE_UKRAINIAN */ |
163 | | #if WITH_LOCALE_ALL || WITH_LOCALE_zh |
164 | | &aImplAPPLECHINSIMPTextEncodingData, /* APPLE_CHINSIMP */ |
165 | | &aImplAPPLECHINTRADTextEncodingData, /* APPLE_CHINTRAD */ |
166 | | #else |
167 | 10.6M | NULL, |
168 | 10.6M | NULL, |
169 | 10.6M | #endif |
170 | | #if WITH_LOCALE_ALL || WITH_LOCALE_ja |
171 | | &aImplAPPLEJAPANESETextEncodingData, /* APPLE_JAPANESE */ |
172 | | #else |
173 | 10.6M | NULL, |
174 | 10.6M | #endif |
175 | | #if WITH_LOCALE_ALL || WITH_LOCALE_ko |
176 | | &aImplAPPLEKOREANTextEncodingData, /* APPLE_KOREAN */ |
177 | | #else |
178 | 10.6M | NULL, |
179 | 10.6M | #endif |
180 | 10.6M | &aImplMS932TextEncodingData, /* MS_932 */ |
181 | 10.6M | &aImplMS936TextEncodingData, /* MS_936 */ |
182 | 10.6M | &aImplMS949TextEncodingData, /* MS_949 */ |
183 | 10.6M | &aImplMS950TextEncodingData, /* MS_950 */ |
184 | | #if WITH_LOCALE_ALL || WITH_LOCALE_ja |
185 | | &aImplSJISTextEncodingData, /* SHIFT_JIS */ |
186 | | #else |
187 | 10.6M | NULL, |
188 | 10.6M | #endif |
189 | | #if WITH_LOCALE_ALL || WITH_LOCALE_zh |
190 | | &aImplGB2312TextEncodingData, /* GB_2312 */ |
191 | | &aImplGBT12345TextEncodingData, /* GBT_12345 */ |
192 | | &aImplGBKTextEncodingData, /* GBK */ |
193 | | &aImplBIG5TextEncodingData, /* BIG5 */ |
194 | | #else |
195 | 10.6M | NULL, |
196 | 10.6M | NULL, |
197 | 10.6M | NULL, |
198 | 10.6M | NULL, |
199 | 10.6M | #endif |
200 | | #if WITH_LOCALE_ALL || WITH_LOCALE_ja |
201 | | &aImplEUCJPTextEncodingData, /* EUC_JP */ |
202 | | #else |
203 | 10.6M | NULL, |
204 | 10.6M | #endif |
205 | | #if WITH_LOCALE_ALL || WITH_LOCALE_zh |
206 | | &aImplEUCCNTextEncodingData, /* EUC_CN */ |
207 | | &aImplEucTwTextEncodingData, /* EUC_TW */ |
208 | | #else |
209 | 10.6M | NULL, |
210 | 10.6M | NULL, |
211 | 10.6M | #endif |
212 | | #if WITH_LOCALE_ALL || WITH_LOCALE_ja |
213 | | &aImplIso2022JpTextEncodingData, /* ISO_2022_JP */ |
214 | | #else |
215 | 10.6M | NULL, |
216 | 10.6M | #endif |
217 | | #if WITH_LOCALE_ALL || WITH_LOCALE_zh |
218 | | &aImplIso2022CnTextEncodingData, /* ISO_2022_CN */ |
219 | | #else |
220 | 10.6M | NULL, |
221 | 10.6M | #endif |
222 | 10.6M | &aImplKOI8RTextEncodingData, /* KOI8_R */ |
223 | 10.6M | &aImplUTF7TextEncodingData, /* UTF7 */ |
224 | 10.6M | nullptr, /* UTF8, see above */ |
225 | 10.6M | &aImplISO885910TextEncodingData, /* ISO_8859_10 */ |
226 | 10.6M | &aImplISO885913TextEncodingData, /* ISO_8859_13 */ |
227 | | #if WITH_LOCALE_ALL || WITH_LOCALE_ko |
228 | | &aImplEUCKRTextEncodingData, /* EUC_KR */ |
229 | | &aImplIso2022KrTextEncodingData, /* ISO_2022_KR */ |
230 | | #else |
231 | 10.6M | NULL, |
232 | 10.6M | NULL, |
233 | 10.6M | #endif |
234 | | #if WITH_LOCALE_ALL || WITH_LOCALE_ja |
235 | | &aImplJISX0201TextEncodingData, /* JIS_X_0201 */ |
236 | | &aImplJISX0208TextEncodingData, /* JIS_X_0208 */ |
237 | | &aImplJISX0212TextEncodingData, /* JIS_X_0212 */ |
238 | | #else |
239 | 10.6M | NULL, |
240 | 10.6M | NULL, |
241 | 10.6M | NULL, |
242 | 10.6M | #endif |
243 | 10.6M | &aImplMS1361TextEncodingData, /* MS_1361 */ |
244 | | #if WITH_LOCALE_ALL || WITH_LOCALE_zh |
245 | | &aImplGb18030TextEncodingData, /* GB_18030 */ |
246 | | &aImplBig5HkscsTextEncodingData, /* BIG5_HKSCS */ |
247 | | #else |
248 | 10.6M | NULL, |
249 | 10.6M | NULL, |
250 | 10.6M | #endif |
251 | 10.6M | &aImplTis620TextEncodingData, /* TIS_620 */ |
252 | 10.6M | &aImplKoi8UTextEncodingData, /* KOI8_U */ |
253 | | #if WITH_LOCALE_ALL || WITH_LOCALE_FOR_SCRIPT_Deva |
254 | | &aImplIsciiDevanagariTextEncodingData, /* ISCII_DEVANAGARI */ |
255 | | #else |
256 | 10.6M | NULL, |
257 | 10.6M | #endif |
258 | 10.6M | nullptr, /* JAVA_UTF8, see above */ |
259 | 10.6M | &adobeStandardEncodingData, /* ADOBE_STANDARD */ |
260 | 10.6M | &adobeSymbolEncodingData, /* ADOBE_SYMBOL */ |
261 | 10.6M | &aImplPT154TextEncodingData, /* PT154 */ |
262 | 10.6M | &adobeDingbatsEncodingData, /* ADOBE_DINGBATS */ |
263 | 10.6M | &kamenickyEncodingData, /* KAMENICKY */ |
264 | 10.6M | &mazoviaEncodingData }; /* MAZOVIA */ |
265 | | |
266 | 10.6M | static_assert( |
267 | 10.6M | SAL_N_ELEMENTS(aData) == RTL_TEXTENCODING_MAZOVIA + 1, |
268 | 10.6M | "update table above if a new encoding is added"); |
269 | | |
270 | 10.6M | return |
271 | 10.6M | nEncoding < SAL_N_ELEMENTS(aData) ? aData[nEncoding] : nullptr; |
272 | 10.6M | } |
273 | | |
274 | | /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |