/src/libreoffice/i18npool/source/transliteration/transliterationImpl.cxx
Line | Count | Source |
1 | | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
2 | | /* |
3 | | * This file is part of the LibreOffice project. |
4 | | * |
5 | | * This Source Code Form is subject to the terms of the Mozilla Public |
6 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
7 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8 | | * |
9 | | * This file incorporates work covered by the following license notice: |
10 | | * |
11 | | * Licensed to the Apache Software Foundation (ASF) under one or more |
12 | | * contributor license agreements. See the NOTICE file distributed |
13 | | * with this work for additional information regarding copyright |
14 | | * ownership. The ASF licenses this file to you under the Apache |
15 | | * License, Version 2.0 (the "License"); you may not use this file |
16 | | * except in compliance with the License. You may obtain a copy of |
17 | | * the License at http://www.apache.org/licenses/LICENSE-2.0 . |
18 | | */ |
19 | | |
20 | | |
21 | | #include <transliterationImpl.hxx> |
22 | | #include <servicename.hxx> |
23 | | |
24 | | #include <com/sun/star/i18n/LocaleData2.hpp> |
25 | | #include <com/sun/star/i18n/TransliterationType.hpp> |
26 | | #include <com/sun/star/i18n/TransliterationModulesExtra.hpp> |
27 | | |
28 | | #include <comphelper/sequence.hxx> |
29 | | #include <cppuhelper/supportsservice.hxx> |
30 | | #include <o3tl/string_view.hxx> |
31 | | #include <rtl/ustring.hxx> |
32 | | |
33 | | #include <algorithm> |
34 | | #include <mutex> |
35 | | #include <numeric> |
36 | | |
37 | | using namespace com::sun::star::uno; |
38 | | using namespace com::sun::star::i18n; |
39 | | using namespace com::sun::star::lang; |
40 | | |
41 | | |
42 | | namespace i18npool { |
43 | | |
44 | | #define TmItem1( name ) \ |
45 | | {TransliterationModules_##name, TransliterationModulesNew_##name, #name} |
46 | | |
47 | | #define TmItem2( name ) \ |
48 | | {TransliterationModules(0), TransliterationModulesNew_##name, #name} |
49 | | |
50 | | namespace { |
51 | | |
52 | | // Ignore Module list |
53 | | struct TMList { |
54 | | TransliterationModules tm; |
55 | | TransliterationModulesNew tmn; |
56 | | const char *implName; |
57 | | }; |
58 | | |
59 | | } |
60 | | |
61 | | TMList const TMlist[] = { // Modules ModulesNew |
62 | | TmItem1 (IGNORE_CASE), // 0. (1<<8 256) (7) |
63 | | TmItem1 (IGNORE_WIDTH), // 1. (1<<9 512) (8) |
64 | | TmItem1 (IGNORE_KANA), // 2. (1<<10 1024) (9) |
65 | | // No enum define for this trans. application has to use impl name to load it |
66 | | // TmItem1 (IGNORE_CASE_SIMPLE), // (1<<11 1024) (66) |
67 | | |
68 | | {TransliterationModules_IgnoreTraditionalKanji_ja_JP, |
69 | | TransliterationModulesNew_IgnoreTraditionalKanji_ja_JP, "ignoreTraditionalKanji_ja_JP"}, |
70 | | // 3. (1<<12 4096) (10) |
71 | | {TransliterationModules_IgnoreTraditionalKana_ja_JP, |
72 | | TransliterationModulesNew_IgnoreTraditionalKana_ja_JP, "ignoreTraditionalKana_ja_JP"}, |
73 | | // 4. (1<<13 8192) (11) |
74 | | {TransliterationModules_IgnoreMinusSign_ja_JP, TransliterationModulesNew_IgnoreMinusSign_ja_JP, |
75 | | "ignoreMinusSign_ja_JP"}, // 5. (1<<13 16384) (12) |
76 | | {TransliterationModules_IgnoreIterationMark_ja_JP, |
77 | | TransliterationModulesNew_IgnoreIterationMark_ja_JP, "ignoreIterationMark_ja_JP"}, |
78 | | // 6. (1<<14 32768) (13) |
79 | | {TransliterationModules_IgnoreSeparator_ja_JP, TransliterationModulesNew_IgnoreSeparator_ja_JP, |
80 | | "ignoreSeparator_ja_JP"}, // 7. (1<<15 65536) (14) |
81 | | {TransliterationModules_IgnoreSize_ja_JP, TransliterationModulesNew_IgnoreSize_ja_JP, |
82 | | "ignoreSize_ja_JP"}, // 15. (1<<23 16777216) (22) |
83 | | {TransliterationModules_IgnoreMiddleDot_ja_JP, TransliterationModulesNew_IgnoreMiddleDot_ja_JP, |
84 | | "ignoreMiddleDot_ja_JP"}, // 17. (1<<25 67108864) (24) |
85 | | {TransliterationModules_IgnoreSpace_ja_JP, TransliterationModulesNew_IgnoreSpace_ja_JP, |
86 | | "ignoreSpace_ja_JP"}, // 18. (1<<26 134217728) (25) |
87 | | {TransliterationModules_IgnoreZiZu_ja_JP, TransliterationModulesNew_IgnoreZiZu_ja_JP, |
88 | | "ignoreZiZu_ja_JP"}, // 8. (1<<16 131072) (15) |
89 | | {TransliterationModules_IgnoreBaFa_ja_JP, TransliterationModulesNew_IgnoreBaFa_ja_JP, |
90 | | "ignoreBaFa_ja_JP"}, // 9. (1<<17 262144) (16) |
91 | | {TransliterationModules_IgnoreTiJi_ja_JP, TransliterationModulesNew_IgnoreTiJi_ja_JP, |
92 | | "ignoreTiJi_ja_JP"}, // 10. (1<<18 524288) (17) |
93 | | {TransliterationModules_IgnoreHyuByu_ja_JP, TransliterationModulesNew_IgnoreHyuByu_ja_JP, |
94 | | "ignoreHyuByu_ja_JP"}, // 11. (1<<19 1048576) (18) |
95 | | {TransliterationModules_IgnoreSeZe_ja_JP, TransliterationModulesNew_IgnoreSeZe_ja_JP, |
96 | | "ignoreSeZe_ja_JP"}, // 12. (1<<20 2097152) (19) |
97 | | {TransliterationModules_IgnoreIandEfollowedByYa_ja_JP, |
98 | | TransliterationModulesNew_IgnoreIandEfollowedByYa_ja_JP, "ignoreIandEfollowedByYa_ja_JP"}, |
99 | | // 13. (1<<21 4194304) (20) |
100 | | {TransliterationModules_IgnoreKiKuFollowedBySa_ja_JP, |
101 | | TransliterationModulesNew_IgnoreKiKuFollowedBySa_ja_JP, "ignoreKiKuFollowedBySa_ja_JP"}, |
102 | | // 14. (1<<22 8388608) (21) |
103 | | {TransliterationModules_IgnoreProlongedSoundMark_ja_JP, |
104 | | TransliterationModulesNew_IgnoreProlongedSoundMark_ja_JP, "ignoreProlongedSoundMark_ja_JP"}, |
105 | | // 16. (1<<24 33554432) (23) |
106 | | |
107 | | TmItem1 (UPPERCASE_LOWERCASE), // 19. (1) (1) |
108 | | TmItem1 (LOWERCASE_UPPERCASE), // 20. (2) (2) |
109 | | TmItem1 (HALFWIDTH_FULLWIDTH), // 21. (3) (3) |
110 | | TmItem1 (FULLWIDTH_HALFWIDTH), // 22. (4) (4) |
111 | | TmItem1 (KATAKANA_HIRAGANA), // 23. (5) (5) |
112 | | TmItem1 (HIRAGANA_KATAKANA), // 24. (6) (6) |
113 | | |
114 | | {TransliterationModules_SmallToLarge_ja_JP, TransliterationModulesNew_SmallToLarge_ja_JP, |
115 | | "smallToLarge_ja_JP"}, // 25. (1<<27 268435456) (26) |
116 | | {TransliterationModules_LargeToSmall_ja_JP, TransliterationModulesNew_LargeToSmall_ja_JP, |
117 | | "largeToSmall_ja_JP"}, // 26. (1<<28 536870912) (27) |
118 | | TmItem2 (NumToTextLower_zh_CN), // 27. () (28) |
119 | | TmItem2 (NumToTextUpper_zh_CN), // 28. () (29) |
120 | | TmItem2 (NumToTextLower_zh_TW), // 29. () (30) |
121 | | TmItem2 (NumToTextUpper_zh_TW), // 30. () (31) |
122 | | TmItem2 (NumToTextFormalHangul_ko), // 31. () (32) |
123 | | TmItem2 (NumToTextFormalLower_ko), // 32. () (33) |
124 | | TmItem2 (NumToTextFormalUpper_ko), // 33. () (34) |
125 | | TmItem2 (NumToTextInformalHangul_ko), // 34. () (35) |
126 | | TmItem2 (NumToTextInformalLower_ko), // 35. () (36) |
127 | | TmItem2 (NumToTextInformalUpper_ko), // 36. () (37) |
128 | | TmItem2 (NumToCharLower_zh_CN), // 37. () (38) |
129 | | TmItem2 (NumToCharUpper_zh_CN), // 38. () (39) |
130 | | TmItem2 (NumToCharLower_zh_TW), // 39. () (40) |
131 | | TmItem2 (NumToCharUpper_zh_TW), // 40. () (41) |
132 | | TmItem2 (NumToCharHangul_ko), // 41. () (42) |
133 | | TmItem2 (NumToCharLower_ko), // 42. () (43) |
134 | | TmItem2 (NumToCharUpper_ko), // 43. () (44) |
135 | | TmItem2 (NumToCharFullwidth), // 44. () (45) |
136 | | TmItem2 (NumToCharKanjiShort_ja_JP), // 45. () (46) |
137 | | TmItem2 (TextToNumLower_zh_CN), // 46. () (47) |
138 | | TmItem2 (TextToNumUpper_zh_CN), // 47. () (48) |
139 | | TmItem2 (TextToNumLower_zh_TW), // 48. () (49) |
140 | | TmItem2 (TextToNumUpper_zh_TW), // 49. () (50) |
141 | | TmItem2 (TextToNumFormalHangul_ko), // 50. () (51) |
142 | | TmItem2 (TextToNumFormalLower_ko), // 51. () (52) |
143 | | TmItem2 (TextToNumFormalUpper_ko), // 52. () (53) |
144 | | TmItem2 (TextToNumInformalHangul_ko), // 53. () (54) |
145 | | TmItem2 (TextToNumInformalLower_ko), // 54. () (55) |
146 | | TmItem2 (TextToNumInformalUpper_ko), // 55. () (56) |
147 | | |
148 | | TmItem2 (CharToNumLower_zh_CN), // 56. () (59) |
149 | | TmItem2 (CharToNumUpper_zh_CN), // 57. () (60) |
150 | | TmItem2 (CharToNumLower_zh_TW), // 58. () (61) |
151 | | TmItem2 (CharToNumUpper_zh_TW), // 59. () (62) |
152 | | TmItem2 (CharToNumHangul_ko), // 60. () (63) |
153 | | TmItem2 (CharToNumLower_ko), // 61. () (64) |
154 | | TmItem2 (CharToNumUpper_ko), // 62. () (65) |
155 | | |
156 | | // no enum defined for these trans. application has to use impl name to load them |
157 | | // TmItem2 (NumToCharArabic_Indic), // () (67) |
158 | | // TmItem2 (NumToCharEstern_Arabic_Indic),// () (68) |
159 | | // TmItem2 (NumToCharIndic), // () (69) |
160 | | // TmItem2 (NumToCharThai), // () (70) |
161 | | {TransliterationModules(0), TransliterationModulesNew(0), nullptr} |
162 | | }; |
163 | | |
164 | | // Constructor/Destructor |
165 | 7.30k | TransliterationImpl::TransliterationImpl(const Reference <XComponentContext>& xContext) : mxContext(xContext) |
166 | 7.30k | { |
167 | 7.30k | numCascade = 0; |
168 | 7.30k | caseignoreOnly = true; |
169 | | |
170 | 7.30k | mxLocaledata.set(LocaleData2::create(xContext)); |
171 | 7.30k | } |
172 | | |
173 | | TransliterationImpl::~TransliterationImpl() |
174 | 7.30k | { |
175 | 7.30k | mxLocaledata.clear(); |
176 | 7.30k | clear(); |
177 | 7.30k | } |
178 | | |
179 | | |
180 | | // Methods |
181 | | OUString SAL_CALL |
182 | | TransliterationImpl::getName() |
183 | 0 | { |
184 | 0 | if (numCascade == 1 && bodyCascade[0].is()) |
185 | 0 | return bodyCascade[0]->getName(); |
186 | 0 | if (numCascade < 1) |
187 | 0 | return ( u"Not Loaded"_ustr); |
188 | 0 | throw RuntimeException(); |
189 | 0 | } |
190 | | |
191 | | sal_Int16 SAL_CALL |
192 | | TransliterationImpl::getType() |
193 | 0 | { |
194 | 0 | if (numCascade > 1) |
195 | 0 | return (TransliterationType::CASCADE|TransliterationType::IGNORE); |
196 | 0 | if (numCascade > 0 && bodyCascade[0].is()) |
197 | 0 | return bodyCascade[0]->getType(); |
198 | 0 | throw RuntimeException(); |
199 | 0 | } |
200 | | |
201 | 1.25M | static TransliterationModules operator&(TransliterationModules lhs, TransliterationModules rhs) { |
202 | 1.25M | return TransliterationModules(sal_Int32(lhs) & sal_Int32(rhs)); |
203 | 1.25M | } |
204 | 518k | static TransliterationModules operator|(TransliterationModules lhs, TransliterationModules rhs) { |
205 | 518k | return TransliterationModules(sal_Int32(lhs) | sal_Int32(rhs)); |
206 | 518k | } |
207 | | |
208 | | void SAL_CALL |
209 | | TransliterationImpl::loadModule( TransliterationModules modType, const Locale& rLocale ) |
210 | 129k | { |
211 | 129k | clear(); |
212 | 129k | if (bool(modType & TransliterationModules_IGNORE_MASK) && |
213 | 129k | bool(modType & TransliterationModules_NON_IGNORE_MASK)) |
214 | 0 | { |
215 | 0 | throw RuntimeException(); |
216 | 129k | } else if (bool(modType & TransliterationModules_IGNORE_MASK)) { |
217 | 259k | #define TransliterationModules_IGNORE_CASE_MASK (TransliterationModules_IGNORE_CASE | \ |
218 | 259k | TransliterationModules_IGNORE_WIDTH | \ |
219 | 259k | TransliterationModules_IGNORE_KANA) |
220 | 129k | TransliterationModules mask = ((modType & TransliterationModules_IGNORE_CASE_MASK) == modType) ? |
221 | 129k | TransliterationModules_IGNORE_CASE_MASK : TransliterationModules_IGNORE_MASK; |
222 | 395k | for (sal_Int16 i = 0; bool(TMlist[i].tm & mask); i++) { |
223 | 265k | if (bool(modType & TMlist[i].tm)) |
224 | 129k | if (loadModuleByName(OUString::createFromAscii(TMlist[i].implName), |
225 | 129k | bodyCascade[numCascade], rLocale)) |
226 | 68.0k | numCascade++; |
227 | 265k | } |
228 | | // additional transliterations from TransliterationModulesExtra (we cannot extend TransliterationModules) |
229 | 129k | if (bool(modType & TransliterationModules(TransliterationModulesExtra::IGNORE_DIACRITICS_CTL))) |
230 | 0 | { |
231 | 0 | if (loadModuleByName(u"ignoreDiacritics_CTL", bodyCascade[numCascade], rLocale)) |
232 | 0 | numCascade++; |
233 | 0 | } |
234 | 129k | if (bool(modType & TransliterationModules(TransliterationModulesExtra::IGNORE_KASHIDA_CTL))) |
235 | 0 | if (loadModuleByName(u"ignoreKashida_CTL", bodyCascade[numCascade], rLocale)) |
236 | 0 | numCascade++; |
237 | | |
238 | 129k | } else if (bool(modType & TransliterationModules_NON_IGNORE_MASK)) { |
239 | 0 | for (sal_Int16 i = 0; bool(TMlist[i].tm); i++) { |
240 | 0 | if (TMlist[i].tm == modType) { |
241 | 0 | if (loadModuleByName(OUString::createFromAscii(TMlist[i].implName), bodyCascade[numCascade], rLocale)) |
242 | 0 | numCascade++; |
243 | 0 | break; |
244 | 0 | } |
245 | 0 | } |
246 | 0 | } |
247 | 129k | } |
248 | | |
249 | | void SAL_CALL |
250 | | TransliterationImpl::loadModuleNew( const Sequence < TransliterationModulesNew > & modType, const Locale& rLocale ) |
251 | 0 | { |
252 | 0 | clear(); |
253 | 0 | TransliterationModules mask = TransliterationModules_END_OF_MODULE; |
254 | 0 | sal_Int32 count = modType.getLength(); |
255 | 0 | if (count > maxCascade) |
256 | 0 | throw RuntimeException(); // could not handle more than maxCascade |
257 | 0 | for (sal_Int32 i = 0; i < count; i++) { |
258 | 0 | for (sal_Int16 j = 0; bool(TMlist[j].tmn); j++) { |
259 | 0 | if (TMlist[j].tmn == modType[i]) { |
260 | 0 | if (mask == TransliterationModules_END_OF_MODULE) |
261 | 0 | mask = bool(TMlist[i].tm) && bool(TMlist[i].tm & TransliterationModules_IGNORE_MASK) ? |
262 | 0 | TransliterationModules_IGNORE_MASK : TransliterationModules_NON_IGNORE_MASK; |
263 | 0 | else if (mask == TransliterationModules_IGNORE_MASK && |
264 | 0 | (TMlist[i].tm&TransliterationModules_IGNORE_MASK) == TransliterationModules_END_OF_MODULE) |
265 | 0 | throw RuntimeException(); // could not mess up ignore trans. with non_ignore trans. |
266 | 0 | if (loadModuleByName(OUString::createFromAscii(TMlist[j].implName), bodyCascade[numCascade], rLocale)) |
267 | 0 | numCascade++; |
268 | 0 | break; |
269 | 0 | } |
270 | 0 | } |
271 | 0 | } |
272 | 0 | } |
273 | | |
274 | | void SAL_CALL |
275 | | TransliterationImpl::loadModuleByImplName(const OUString& implName, const Locale& rLocale) |
276 | 0 | { |
277 | 0 | clear(); |
278 | 0 | if (loadModuleByName(implName, bodyCascade[numCascade], rLocale)) |
279 | 0 | numCascade++; |
280 | 0 | } |
281 | | |
282 | | |
283 | | void SAL_CALL |
284 | | TransliterationImpl::loadModulesByImplNames(const Sequence< OUString >& implNameList, const Locale& rLocale ) |
285 | 0 | { |
286 | 0 | if (implNameList.getLength() > maxCascade || implNameList.getLength() <= 0) |
287 | 0 | throw RuntimeException(); |
288 | | |
289 | 0 | clear(); |
290 | 0 | for (const auto& rName : implNameList) |
291 | 0 | if (loadModuleByName(rName, bodyCascade[numCascade], rLocale)) |
292 | 0 | numCascade++; |
293 | 0 | } |
294 | | |
295 | | |
296 | | Sequence<OUString> SAL_CALL |
297 | | TransliterationImpl::getAvailableModules( const Locale& rLocale, sal_Int16 sType ) |
298 | 0 | { |
299 | 0 | const Sequence<OUString> translist = mxLocaledata->getTransliterations(rLocale); |
300 | 0 | std::vector<OUString> r; |
301 | 0 | r.reserve(translist.getLength()); |
302 | 0 | Reference<XExtendedTransliteration> body; |
303 | 0 | for (const auto& rTrans : translist) |
304 | 0 | { |
305 | 0 | if (loadModuleByName(rTrans, body, rLocale)) { |
306 | 0 | if (body->getType() & sType) |
307 | 0 | r.push_back(rTrans); |
308 | 0 | body.clear(); |
309 | 0 | } |
310 | 0 | } |
311 | 0 | return comphelper::containerToSequence(r); |
312 | 0 | } |
313 | | |
314 | | |
315 | | OUString SAL_CALL |
316 | | TransliterationImpl::transliterate( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, |
317 | | Sequence< sal_Int32 >& offset ) |
318 | 1.31k | { |
319 | 1.31k | if (numCascade == 0) |
320 | 0 | return inStr; |
321 | | |
322 | 1.31k | if (numCascade == 1) |
323 | 1.31k | { |
324 | 1.31k | if ( startPos == 0 && nCount == inStr.getLength() ) |
325 | 1.31k | return bodyCascade[0]->transliterate( inStr, 0, nCount, offset); |
326 | 0 | else |
327 | 0 | { |
328 | 0 | OUString tmpStr = inStr.copy(startPos, nCount); |
329 | 0 | tmpStr = bodyCascade[0]->transliterate(tmpStr, 0, nCount, offset); |
330 | 0 | if ( startPos ) |
331 | 0 | { |
332 | 0 | for (sal_Int32 & j : asNonConstRange(offset)) |
333 | 0 | j += startPos; |
334 | 0 | } |
335 | 0 | return tmpStr; |
336 | 0 | } |
337 | 1.31k | } |
338 | 0 | else |
339 | 0 | { |
340 | 0 | OUString tmpStr = inStr.copy(startPos, nCount); |
341 | |
|
342 | 0 | auto [begin, end] = asNonConstRange(offset); |
343 | 0 | std::iota(begin, end, startPos); |
344 | |
|
345 | 0 | Sequence<sal_Int32> from(nCount); |
346 | 0 | Sequence<sal_Int32> to = offset; |
347 | 0 | for (sal_Int32 i = 0; i < numCascade; i++) { |
348 | 0 | tmpStr = bodyCascade[i]->transliterate(tmpStr, 0, nCount, from); |
349 | |
|
350 | 0 | nCount = tmpStr.getLength(); |
351 | |
|
352 | 0 | assert(from.getLength() == nCount); |
353 | 0 | from.swap(to); |
354 | 0 | for (sal_Int32& ix : asNonConstRange(to)) |
355 | 0 | ix = from[ix]; |
356 | 0 | } |
357 | 0 | offset = std::move(to); |
358 | 0 | return tmpStr; |
359 | 0 | } |
360 | 1.31k | } |
361 | | |
362 | | |
363 | | OUString SAL_CALL |
364 | | TransliterationImpl::folding( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, |
365 | | Sequence< sal_Int32 >& offset ) |
366 | 2.36M | { |
367 | 2.36M | if (numCascade == 0) |
368 | 2.36M | return inStr; |
369 | | |
370 | 0 | if (offset.getLength() != nCount) |
371 | 0 | offset.realloc(nCount); |
372 | 0 | if (numCascade == 1) |
373 | 0 | { |
374 | 0 | if ( startPos == 0 && nCount == inStr.getLength() ) |
375 | 0 | return bodyCascade[0]->folding( inStr, 0, nCount, offset); |
376 | 0 | else |
377 | 0 | { |
378 | 0 | OUString tmpStr = inStr.copy(startPos, nCount); |
379 | 0 | tmpStr = bodyCascade[0]->folding(tmpStr, 0, nCount, offset); |
380 | 0 | if ( startPos ) |
381 | 0 | { |
382 | 0 | for (sal_Int32 & j : asNonConstRange(offset)) |
383 | 0 | j += startPos; |
384 | 0 | } |
385 | 0 | return tmpStr; |
386 | 0 | } |
387 | 0 | } |
388 | 0 | else |
389 | 0 | { |
390 | 0 | OUString tmpStr = inStr.copy(startPos, nCount); |
391 | |
|
392 | 0 | auto [begin, end] = asNonConstRange(offset); |
393 | 0 | std::iota(begin, end, startPos); |
394 | |
|
395 | 0 | Sequence<sal_Int32> from; |
396 | 0 | Sequence<sal_Int32> to = offset; |
397 | |
|
398 | 0 | for (sal_Int32 i = 0; i < numCascade; i++) { |
399 | 0 | tmpStr = bodyCascade[i]->folding(tmpStr, 0, nCount, from); |
400 | |
|
401 | 0 | nCount = tmpStr.getLength(); |
402 | |
|
403 | 0 | assert(from.getLength() == nCount); |
404 | 0 | from.swap(to); |
405 | 0 | for (sal_Int32& ix : asNonConstRange(to)) |
406 | 0 | ix = from[ix]; |
407 | 0 | } |
408 | 0 | offset = std::move(to); |
409 | 0 | return tmpStr; |
410 | 0 | } |
411 | 0 | } |
412 | | |
413 | | OUString SAL_CALL |
414 | | TransliterationImpl::transliterateString2String( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount ) |
415 | 3 | { |
416 | 3 | if (numCascade == 0) |
417 | 0 | return inStr; |
418 | 3 | else if (numCascade == 1) |
419 | 3 | return bodyCascade[0]->transliterateString2String( inStr, startPos, nCount); |
420 | 0 | else { |
421 | 0 | OUString tmpStr = bodyCascade[0]->transliterateString2String(inStr, startPos, nCount); |
422 | |
|
423 | 0 | for (sal_Int32 i = 1; i < numCascade; i++) |
424 | 0 | tmpStr = bodyCascade[i]->transliterateString2String(tmpStr, 0, tmpStr.getLength()); |
425 | 0 | return tmpStr; |
426 | 0 | } |
427 | 3 | } |
428 | | |
429 | | OUString SAL_CALL |
430 | | TransliterationImpl::transliterateChar2String( sal_Unicode inChar ) |
431 | 0 | { |
432 | 0 | if (numCascade == 0) |
433 | 0 | return OUString(&inChar, 1); |
434 | 0 | else if (numCascade == 1) |
435 | 0 | return bodyCascade[0]->transliterateChar2String( inChar); |
436 | 0 | else { |
437 | 0 | OUString tmpStr = bodyCascade[0]->transliterateChar2String(inChar); |
438 | |
|
439 | 0 | for (sal_Int32 i = 1; i < numCascade; i++) |
440 | 0 | tmpStr = bodyCascade[i]->transliterateString2String(tmpStr, 0, tmpStr.getLength()); |
441 | 0 | return tmpStr; |
442 | 0 | } |
443 | 0 | } |
444 | | |
445 | | sal_Unicode SAL_CALL |
446 | | TransliterationImpl::transliterateChar2Char( sal_Unicode inChar ) |
447 | 0 | { |
448 | 0 | sal_Unicode tmpChar = inChar; |
449 | 0 | for (sal_Int32 i = 0; i < numCascade; i++) |
450 | 0 | tmpChar = bodyCascade[i]->transliterateChar2Char(tmpChar); |
451 | 0 | return tmpChar; |
452 | 0 | } |
453 | | |
454 | | |
455 | | sal_Bool SAL_CALL |
456 | | TransliterationImpl::equals( |
457 | | const OUString& str1, sal_Int32 pos1, sal_Int32 nCount1, sal_Int32& nMatch1, |
458 | | const OUString& str2, sal_Int32 pos2, sal_Int32 nCount2, sal_Int32& nMatch2) |
459 | 5.55M | { |
460 | | // since this is an API function make it user fail safe |
461 | 5.55M | if ( nCount1 < 0 ) { |
462 | 0 | pos1 += nCount1; |
463 | 0 | nCount1 = -nCount1; |
464 | 0 | } |
465 | 5.55M | if ( nCount2 < 0 ) { |
466 | 0 | pos2 += nCount2; |
467 | 0 | nCount2 = -nCount2; |
468 | 0 | } |
469 | 5.55M | if ( !nCount1 || !nCount2 || |
470 | 3.20M | pos1 >= str1.getLength() || pos2 >= str2.getLength() || |
471 | 3.20M | pos1 < 0 || pos2 < 0 ) { |
472 | 2.34M | nMatch1 = nMatch2 = 0; |
473 | | // two empty strings return true, else false |
474 | 2.34M | return !nCount1 && !nCount2 && pos1 == str1.getLength() && pos2 == str2.getLength(); |
475 | 2.34M | } |
476 | 3.20M | if ( pos1 + nCount1 > str1.getLength() ) |
477 | 0 | nCount1 = str1.getLength() - pos1; |
478 | 3.20M | if ( pos2 + nCount2 > str2.getLength() ) |
479 | 0 | nCount2 = str2.getLength() - pos2; |
480 | | |
481 | 3.20M | if (caseignoreOnly && caseignore.is()) |
482 | 2.02M | return caseignore->equals(str1, pos1, nCount1, nMatch1, str2, pos2, nCount2, nMatch2); |
483 | | |
484 | 1.18M | Sequence<sal_Int32> offset1, offset2; |
485 | | |
486 | 1.18M | OUString tmpStr1 = folding(str1, pos1, nCount1, offset1); |
487 | 1.18M | OUString tmpStr2 = folding(str2, pos2, nCount2, offset2); |
488 | | // Length of offset1 and offset2 may still be 0 if there was no folding |
489 | | // necessary! |
490 | | |
491 | 1.18M | const sal_Unicode *p1 = tmpStr1.getStr(); |
492 | 1.18M | const sal_Unicode *p2 = tmpStr2.getStr(); |
493 | 1.18M | sal_Int32 i, nLen = ::std::min( tmpStr1.getLength(), tmpStr2.getLength()); |
494 | 2.10M | for (i = 0; i < nLen; ++i, ++p1, ++p2 ) { |
495 | 2.03M | if (*p1 != *p2) { |
496 | | // return number of matched code points so far |
497 | 1.11M | nMatch1 = (i < offset1.getLength()) ? offset1[i] : i; |
498 | 1.11M | nMatch2 = (i < offset2.getLength()) ? offset2[i] : i; |
499 | 1.11M | return false; |
500 | 1.11M | } |
501 | 2.03M | } |
502 | | // i==nLen |
503 | 71.2k | if ( tmpStr1.getLength() != tmpStr2.getLength() ) { |
504 | | // return number of matched code points so far |
505 | 13.0k | nMatch1 = (i <= offset1.getLength()) ? offset1[i-1] + 1 : i; |
506 | 13.0k | nMatch2 = (i <= offset2.getLength()) ? offset2[i-1] + 1 : i; |
507 | 13.0k | return false; |
508 | 58.1k | } else { |
509 | 58.1k | nMatch1 = nCount1; |
510 | 58.1k | nMatch2 = nCount2; |
511 | 58.1k | return true; |
512 | 58.1k | } |
513 | 71.2k | } |
514 | | |
515 | | Sequence< OUString > |
516 | | TransliterationImpl::getRange(const Sequence< OUString > &inStrs, |
517 | | const sal_Int32 length, sal_Int16 _numCascade) |
518 | 0 | { |
519 | 0 | if (_numCascade >= numCascade || ! bodyCascade[_numCascade].is()) |
520 | 0 | return inStrs; |
521 | | |
522 | 0 | sal_Int32 j_tmp = 0; |
523 | 0 | constexpr sal_Int32 nMaxOutput = 2; |
524 | 0 | const sal_Int32 nMaxOutputLength = nMaxOutput*length; |
525 | 0 | std::vector<OUString> ostr; |
526 | 0 | ostr.reserve(nMaxOutputLength); |
527 | 0 | for (sal_Int32 j = 0; j < length; j+=2) { |
528 | 0 | const Sequence< OUString > temp = bodyCascade[_numCascade]->transliterateRange(inStrs[j], inStrs[j+1]); |
529 | |
|
530 | 0 | for (const auto& rStr : temp) { |
531 | 0 | if ( j_tmp++ >= nMaxOutputLength ) throw RuntimeException(); |
532 | 0 | ostr.push_back(rStr); |
533 | 0 | } |
534 | 0 | } |
535 | | |
536 | 0 | return getRange(comphelper::containerToSequence(ostr), j_tmp, ++_numCascade); |
537 | 0 | } |
538 | | |
539 | | |
540 | | Sequence< OUString > SAL_CALL |
541 | | TransliterationImpl::transliterateRange( const OUString& str1, const OUString& str2 ) |
542 | 0 | { |
543 | 0 | if (numCascade == 1) |
544 | 0 | return bodyCascade[0]->transliterateRange(str1, str2); |
545 | | |
546 | 0 | Sequence< OUString > ostr{ str1, str2 }; |
547 | |
|
548 | 0 | return getRange(ostr, 2, 0); |
549 | 0 | } |
550 | | |
551 | | |
552 | | sal_Int32 SAL_CALL |
553 | | TransliterationImpl::compareSubstring( |
554 | | const OUString& str1, sal_Int32 off1, sal_Int32 len1, |
555 | | const OUString& str2, sal_Int32 off2, sal_Int32 len2) |
556 | 0 | { |
557 | 0 | if (caseignoreOnly && caseignore.is()) |
558 | 0 | return caseignore->compareSubstring(str1, off1, len1, str2, off2, len2); |
559 | | |
560 | 0 | Sequence <sal_Int32> offset; |
561 | |
|
562 | 0 | OUString in_str1 = transliterate(str1, off1, len1, offset); |
563 | 0 | OUString in_str2 = transliterate(str2, off2, len2, offset); |
564 | 0 | const sal_Unicode* unistr1 = in_str1.getStr(); |
565 | 0 | const sal_Unicode* unistr2 = in_str2.getStr(); |
566 | 0 | sal_Int32 strlen1 = in_str1.getLength(); |
567 | 0 | sal_Int32 strlen2 = in_str2.getLength(); |
568 | |
|
569 | 0 | while (strlen1 && strlen2) { |
570 | 0 | if (*unistr1 != *unistr2) |
571 | 0 | return *unistr1 > *unistr2 ? 1 : -1; |
572 | | |
573 | 0 | unistr1++; unistr2++; strlen1--; strlen2--; |
574 | 0 | } |
575 | 0 | return strlen1 == strlen2 ? 0 : (strlen1 > strlen2 ? 1 : -1); |
576 | 0 | } |
577 | | |
578 | | |
579 | | sal_Int32 SAL_CALL |
580 | | TransliterationImpl::compareString(const OUString& str1, const OUString& str2 ) |
581 | 783 | { |
582 | 783 | if (caseignoreOnly && caseignore.is()) |
583 | 783 | return caseignore->compareString(str1, str2); |
584 | 0 | else |
585 | 0 | return compareSubstring(str1, 0, str1.getLength(), str2, 0, str2.getLength()); |
586 | 783 | } |
587 | | |
588 | | |
589 | | void |
590 | | TransliterationImpl::clear() |
591 | 136k | { |
592 | 204k | for (sal_Int32 i = 0; i < numCascade; i++) |
593 | 68.0k | if (bodyCascade[i].is()) |
594 | 68.0k | bodyCascade[i].clear(); |
595 | 136k | numCascade = 0; |
596 | 136k | caseignore.clear(); |
597 | 136k | caseignoreOnly = true; |
598 | 136k | } |
599 | | |
600 | | namespace |
601 | | { |
602 | | /** structure to cache the last transliteration body used. */ |
603 | | struct TransBody |
604 | | { |
605 | | OUString Name; |
606 | | css::uno::Reference< css::i18n::XExtendedTransliteration > Body; |
607 | | }; |
608 | | } |
609 | | |
610 | | void TransliterationImpl::loadBody( OUString const &implName, Reference<XExtendedTransliteration>& body ) |
611 | 197k | { |
612 | 197k | assert(!implName.isEmpty()); |
613 | 197k | static std::mutex transBodyMutex; |
614 | 197k | std::unique_lock guard(transBodyMutex); |
615 | 197k | static TransBody lastTransBody; |
616 | 197k | if (implName != lastTransBody.Name) |
617 | 61.5k | { |
618 | 61.5k | lastTransBody.Body.set( |
619 | 61.5k | mxContext->getServiceManager()->createInstanceWithContext(implName, mxContext), UNO_QUERY_THROW); |
620 | 61.5k | lastTransBody.Name = implName; |
621 | 61.5k | } |
622 | 197k | body = lastTransBody.Body; |
623 | 197k | } |
624 | | |
625 | | bool |
626 | | TransliterationImpl::loadModuleByName( std::u16string_view implName, |
627 | | Reference<XExtendedTransliteration>& body, const Locale& rLocale) |
628 | 129k | { |
629 | 129k | OUString cname = OUString::Concat(TRLT_IMPLNAME_PREFIX) + implName; |
630 | 129k | loadBody(cname, body); |
631 | 129k | if (body.is()) { |
632 | 68.0k | body->loadModule(TransliterationModules(0), rLocale); // toUpper/toLoad need rLocale |
633 | | |
634 | | // if the module is ignore case/kana/width, load caseignore for equals/compareString mothed |
635 | 68.0k | for (sal_Int16 i = 0; i < 3; i++) { |
636 | 68.0k | if (o3tl::equalsAscii(implName, TMlist[i].implName)) { |
637 | 68.0k | if (i == 0) // current module is caseignore |
638 | 68.0k | body->loadModule(TMlist[0].tm, rLocale); // caseignore need to setup module name |
639 | 68.0k | if (! caseignore.is()) { |
640 | 68.0k | OUString bname = TRLT_IMPLNAME_PREFIX + |
641 | 68.0k | OUString::createFromAscii(TMlist[0].implName); |
642 | 68.0k | loadBody(bname, caseignore); |
643 | 68.0k | } |
644 | 68.0k | if (caseignore.is()) |
645 | 68.0k | caseignore->loadModule(TMlist[i].tm, rLocale); |
646 | 68.0k | return true; |
647 | 68.0k | } |
648 | 68.0k | } |
649 | 0 | caseignoreOnly = false; // has other module than just ignore case/kana/width |
650 | 0 | } |
651 | 61.5k | return body.is(); |
652 | 129k | } |
653 | | |
654 | | OUString SAL_CALL |
655 | | TransliterationImpl::getImplementationName() |
656 | 0 | { |
657 | 0 | return u"com.sun.star.i18n.Transliteration"_ustr; |
658 | 0 | } |
659 | | |
660 | | sal_Bool SAL_CALL |
661 | | TransliterationImpl::supportsService(const OUString& rServiceName) |
662 | 0 | { |
663 | 0 | return cppu::supportsService(this, rServiceName); |
664 | 0 | } |
665 | | |
666 | | Sequence< OUString > SAL_CALL |
667 | | TransliterationImpl::getSupportedServiceNames() |
668 | 0 | { |
669 | 0 | return { u"com.sun.star.i18n.Transliteration"_ustr }; |
670 | 0 | } |
671 | | |
672 | | } |
673 | | |
674 | | extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface * |
675 | | com_sun_star_i18n_Transliteration_get_implementation( |
676 | | css::uno::XComponentContext *context, |
677 | | css::uno::Sequence<css::uno::Any> const &) |
678 | 7.30k | { |
679 | 7.30k | return cppu::acquire(new i18npool::TransliterationImpl(context)); |
680 | 7.30k | } |
681 | | |
682 | | /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |