Coverage Report

Created: 2026-06-30 11:14

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libreoffice/i18npool/source/transliteration/transliterationImpl.cxx
Line
Count
Source
1
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
/*
3
 * This file is part of the LibreOffice project.
4
 *
5
 * This Source Code Form is subject to the terms of the Mozilla Public
6
 * License, v. 2.0. If a copy of the MPL was not distributed with this
7
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
 *
9
 * This file incorporates work covered by the following license notice:
10
 *
11
 *   Licensed to the Apache Software Foundation (ASF) under one or more
12
 *   contributor license agreements. See the NOTICE file distributed
13
 *   with this work for additional information regarding copyright
14
 *   ownership. The ASF licenses this file to you under the Apache
15
 *   License, Version 2.0 (the "License"); you may not use this file
16
 *   except in compliance with the License. You may obtain a copy of
17
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18
 */
19
20
21
#include <transliterationImpl.hxx>
22
#include <servicename.hxx>
23
24
#include <com/sun/star/i18n/LocaleData2.hpp>
25
#include <com/sun/star/i18n/TransliterationType.hpp>
26
#include <com/sun/star/i18n/TransliterationModulesExtra.hpp>
27
28
#include <comphelper/sequence.hxx>
29
#include <cppuhelper/supportsservice.hxx>
30
#include <o3tl/string_view.hxx>
31
#include <rtl/ustring.hxx>
32
33
#include <algorithm>
34
#include <mutex>
35
#include <numeric>
36
37
using namespace com::sun::star::uno;
38
using namespace com::sun::star::i18n;
39
using namespace com::sun::star::lang;
40
41
42
namespace i18npool {
43
44
#define TmItem1( name ) \
45
  {TransliterationModules_##name, TransliterationModulesNew_##name, #name}
46
47
#define TmItem2( name ) \
48
  {TransliterationModules(0), TransliterationModulesNew_##name, #name}
49
50
namespace {
51
52
// Ignore Module list
53
struct TMList {
54
  TransliterationModules        tm;
55
  TransliterationModulesNew     tmn;
56
  const char                   *implName;
57
};
58
59
}
60
61
TMList const TMlist[] = {                //      Modules      ModulesNew
62
  TmItem1 (IGNORE_CASE),                        // 0. (1<<8        256) (7)
63
  TmItem1 (IGNORE_WIDTH),                       // 1. (1<<9        512) (8)
64
  TmItem1 (IGNORE_KANA),                        // 2. (1<<10      1024) (9)
65
// No enum define for this trans. application has to use impl name to load it
66
//  TmItem1 (IGNORE_CASE_SIMPLE),                       // (1<<11      1024) (66)
67
68
  {TransliterationModules_IgnoreTraditionalKanji_ja_JP,
69
   TransliterationModulesNew_IgnoreTraditionalKanji_ja_JP, "ignoreTraditionalKanji_ja_JP"},
70
                                                // 3. (1<<12      4096) (10)
71
  {TransliterationModules_IgnoreTraditionalKana_ja_JP,
72
   TransliterationModulesNew_IgnoreTraditionalKana_ja_JP, "ignoreTraditionalKana_ja_JP"},
73
                                                // 4. (1<<13      8192) (11)
74
  {TransliterationModules_IgnoreMinusSign_ja_JP, TransliterationModulesNew_IgnoreMinusSign_ja_JP,
75
   "ignoreMinusSign_ja_JP"},                    // 5. (1<<13     16384) (12)
76
  {TransliterationModules_IgnoreIterationMark_ja_JP,
77
   TransliterationModulesNew_IgnoreIterationMark_ja_JP, "ignoreIterationMark_ja_JP"},
78
                                                // 6. (1<<14     32768) (13)
79
  {TransliterationModules_IgnoreSeparator_ja_JP, TransliterationModulesNew_IgnoreSeparator_ja_JP,
80
   "ignoreSeparator_ja_JP"},                    // 7. (1<<15     65536) (14)
81
  {TransliterationModules_IgnoreSize_ja_JP, TransliterationModulesNew_IgnoreSize_ja_JP,
82
   "ignoreSize_ja_JP"},                         // 15. (1<<23  16777216) (22)
83
  {TransliterationModules_IgnoreMiddleDot_ja_JP, TransliterationModulesNew_IgnoreMiddleDot_ja_JP,
84
   "ignoreMiddleDot_ja_JP"},                    // 17. (1<<25  67108864) (24)
85
  {TransliterationModules_IgnoreSpace_ja_JP, TransliterationModulesNew_IgnoreSpace_ja_JP,
86
   "ignoreSpace_ja_JP"},                        // 18. (1<<26 134217728) (25)
87
  {TransliterationModules_IgnoreZiZu_ja_JP, TransliterationModulesNew_IgnoreZiZu_ja_JP,
88
   "ignoreZiZu_ja_JP"},                         // 8. (1<<16    131072) (15)
89
  {TransliterationModules_IgnoreBaFa_ja_JP, TransliterationModulesNew_IgnoreBaFa_ja_JP,
90
   "ignoreBaFa_ja_JP"},                         // 9. (1<<17    262144) (16)
91
  {TransliterationModules_IgnoreTiJi_ja_JP, TransliterationModulesNew_IgnoreTiJi_ja_JP,
92
   "ignoreTiJi_ja_JP"},                         // 10. (1<<18    524288) (17)
93
  {TransliterationModules_IgnoreHyuByu_ja_JP, TransliterationModulesNew_IgnoreHyuByu_ja_JP,
94
   "ignoreHyuByu_ja_JP"},                       // 11. (1<<19   1048576) (18)
95
  {TransliterationModules_IgnoreSeZe_ja_JP, TransliterationModulesNew_IgnoreSeZe_ja_JP,
96
   "ignoreSeZe_ja_JP"},                         // 12. (1<<20   2097152) (19)
97
  {TransliterationModules_IgnoreIandEfollowedByYa_ja_JP,
98
   TransliterationModulesNew_IgnoreIandEfollowedByYa_ja_JP, "ignoreIandEfollowedByYa_ja_JP"},
99
                                                // 13. (1<<21   4194304) (20)
100
  {TransliterationModules_IgnoreKiKuFollowedBySa_ja_JP,
101
   TransliterationModulesNew_IgnoreKiKuFollowedBySa_ja_JP, "ignoreKiKuFollowedBySa_ja_JP"},
102
                                                // 14. (1<<22   8388608) (21)
103
  {TransliterationModules_IgnoreProlongedSoundMark_ja_JP,
104
   TransliterationModulesNew_IgnoreProlongedSoundMark_ja_JP, "ignoreProlongedSoundMark_ja_JP"},
105
                                                // 16. (1<<24  33554432) (23)
106
107
  TmItem1 (UPPERCASE_LOWERCASE),        // 19. (1) (1)
108
  TmItem1 (LOWERCASE_UPPERCASE),        // 20. (2) (2)
109
  TmItem1 (HALFWIDTH_FULLWIDTH),        // 21. (3) (3)
110
  TmItem1 (FULLWIDTH_HALFWIDTH),        // 22. (4) (4)
111
  TmItem1 (KATAKANA_HIRAGANA),          // 23. (5) (5)
112
  TmItem1 (HIRAGANA_KATAKANA),          // 24. (6) (6)
113
114
  {TransliterationModules_SmallToLarge_ja_JP, TransliterationModulesNew_SmallToLarge_ja_JP,
115
   "smallToLarge_ja_JP"},               // 25. (1<<27 268435456) (26)
116
  {TransliterationModules_LargeToSmall_ja_JP, TransliterationModulesNew_LargeToSmall_ja_JP,
117
   "largeToSmall_ja_JP"},               // 26. (1<<28 536870912) (27)
118
  TmItem2 (NumToTextLower_zh_CN),       // 27. () (28)
119
  TmItem2 (NumToTextUpper_zh_CN),       // 28. () (29)
120
  TmItem2 (NumToTextLower_zh_TW),       // 29. () (30)
121
  TmItem2 (NumToTextUpper_zh_TW),       // 30. () (31)
122
  TmItem2 (NumToTextFormalHangul_ko),   // 31. () (32)
123
  TmItem2 (NumToTextFormalLower_ko),    // 32. () (33)
124
  TmItem2 (NumToTextFormalUpper_ko),    // 33. () (34)
125
  TmItem2 (NumToTextInformalHangul_ko), // 34. () (35)
126
  TmItem2 (NumToTextInformalLower_ko),  // 35. () (36)
127
  TmItem2 (NumToTextInformalUpper_ko),  // 36. () (37)
128
  TmItem2 (NumToCharLower_zh_CN),       // 37. () (38)
129
  TmItem2 (NumToCharUpper_zh_CN),       // 38. () (39)
130
  TmItem2 (NumToCharLower_zh_TW),       // 39. () (40)
131
  TmItem2 (NumToCharUpper_zh_TW),       // 40. () (41)
132
  TmItem2 (NumToCharHangul_ko),         // 41. () (42)
133
  TmItem2 (NumToCharLower_ko),          // 42. () (43)
134
  TmItem2 (NumToCharUpper_ko),          // 43. () (44)
135
  TmItem2 (NumToCharFullwidth),         // 44. () (45)
136
  TmItem2 (NumToCharKanjiShort_ja_JP),  // 45. () (46)
137
  TmItem2 (TextToNumLower_zh_CN),       // 46. () (47)
138
  TmItem2 (TextToNumUpper_zh_CN),       // 47. () (48)
139
  TmItem2 (TextToNumLower_zh_TW),       // 48. () (49)
140
  TmItem2 (TextToNumUpper_zh_TW),       // 49. () (50)
141
  TmItem2 (TextToNumFormalHangul_ko),   // 50. () (51)
142
  TmItem2 (TextToNumFormalLower_ko),    // 51. () (52)
143
  TmItem2 (TextToNumFormalUpper_ko),    // 52. () (53)
144
  TmItem2 (TextToNumInformalHangul_ko), // 53. () (54)
145
  TmItem2 (TextToNumInformalLower_ko),  // 54. () (55)
146
  TmItem2 (TextToNumInformalUpper_ko),  // 55. () (56)
147
148
  TmItem2 (CharToNumLower_zh_CN),       // 56. () (59)
149
  TmItem2 (CharToNumUpper_zh_CN),       // 57. () (60)
150
  TmItem2 (CharToNumLower_zh_TW),       // 58. () (61)
151
  TmItem2 (CharToNumUpper_zh_TW),       // 59. () (62)
152
  TmItem2 (CharToNumHangul_ko),         // 60. () (63)
153
  TmItem2 (CharToNumLower_ko),          // 61. () (64)
154
  TmItem2 (CharToNumUpper_ko),          // 62. () (65)
155
156
// no enum defined for these trans. application has to use impl name to load them
157
//  TmItem2 (NumToCharArabic_Indic),    // () (67)
158
//  TmItem2 (NumToCharEstern_Arabic_Indic),// () (68)
159
//  TmItem2 (NumToCharIndic),           // () (69)
160
//  TmItem2 (NumToCharThai),            // () (70)
161
  {TransliterationModules(0), TransliterationModulesNew(0),  nullptr}
162
};
163
164
// Constructor/Destructor
165
7.30k
TransliterationImpl::TransliterationImpl(const Reference <XComponentContext>& xContext) : mxContext(xContext)
166
7.30k
{
167
7.30k
    numCascade = 0;
168
7.30k
    caseignoreOnly = true;
169
170
7.30k
    mxLocaledata.set(LocaleData2::create(xContext));
171
7.30k
}
172
173
TransliterationImpl::~TransliterationImpl()
174
7.30k
{
175
7.30k
    mxLocaledata.clear();
176
7.30k
    clear();
177
7.30k
}
178
179
180
// Methods
181
OUString SAL_CALL
182
TransliterationImpl::getName()
183
0
{
184
0
    if (numCascade == 1 && bodyCascade[0].is())
185
0
        return bodyCascade[0]->getName();
186
0
    if (numCascade < 1)
187
0
        return ( u"Not Loaded"_ustr);
188
0
    throw RuntimeException();
189
0
}
190
191
sal_Int16 SAL_CALL
192
TransliterationImpl::getType()
193
0
{
194
0
    if (numCascade > 1)
195
0
        return (TransliterationType::CASCADE|TransliterationType::IGNORE);
196
0
    if (numCascade > 0 && bodyCascade[0].is())
197
0
        return bodyCascade[0]->getType();
198
0
    throw RuntimeException();
199
0
}
200
201
1.25M
static TransliterationModules operator&(TransliterationModules lhs, TransliterationModules rhs) {
202
1.25M
    return TransliterationModules(sal_Int32(lhs) & sal_Int32(rhs));
203
1.25M
}
204
518k
static TransliterationModules operator|(TransliterationModules lhs, TransliterationModules rhs) {
205
518k
    return TransliterationModules(sal_Int32(lhs) | sal_Int32(rhs));
206
518k
}
207
208
void SAL_CALL
209
TransliterationImpl::loadModule( TransliterationModules modType, const Locale& rLocale )
210
129k
{
211
129k
    clear();
212
129k
    if (bool(modType & TransliterationModules_IGNORE_MASK) &&
213
129k
        bool(modType & TransliterationModules_NON_IGNORE_MASK))
214
0
    {
215
0
        throw RuntimeException();
216
129k
    } else if (bool(modType & TransliterationModules_IGNORE_MASK)) {
217
259k
#define TransliterationModules_IGNORE_CASE_MASK (TransliterationModules_IGNORE_CASE | \
218
259k
                                                TransliterationModules_IGNORE_WIDTH | \
219
259k
                                                TransliterationModules_IGNORE_KANA)
220
129k
        TransliterationModules mask = ((modType & TransliterationModules_IGNORE_CASE_MASK) == modType) ?
221
129k
                TransliterationModules_IGNORE_CASE_MASK : TransliterationModules_IGNORE_MASK;
222
395k
        for (sal_Int16 i = 0; bool(TMlist[i].tm & mask); i++) {
223
265k
            if (bool(modType & TMlist[i].tm))
224
129k
                if (loadModuleByName(OUString::createFromAscii(TMlist[i].implName),
225
129k
                                                bodyCascade[numCascade], rLocale))
226
68.0k
                    numCascade++;
227
265k
        }
228
        // additional transliterations from TransliterationModulesExtra (we cannot extend TransliterationModules)
229
129k
        if (bool(modType & TransliterationModules(TransliterationModulesExtra::IGNORE_DIACRITICS_CTL)))
230
0
        {
231
0
            if (loadModuleByName(u"ignoreDiacritics_CTL", bodyCascade[numCascade], rLocale))
232
0
                numCascade++;
233
0
        }
234
129k
        if (bool(modType & TransliterationModules(TransliterationModulesExtra::IGNORE_KASHIDA_CTL)))
235
0
            if (loadModuleByName(u"ignoreKashida_CTL", bodyCascade[numCascade], rLocale))
236
0
                numCascade++;
237
238
129k
    } else if (bool(modType & TransliterationModules_NON_IGNORE_MASK)) {
239
0
        for (sal_Int16 i = 0; bool(TMlist[i].tm); i++) {
240
0
            if (TMlist[i].tm == modType) {
241
0
                if (loadModuleByName(OUString::createFromAscii(TMlist[i].implName), bodyCascade[numCascade], rLocale))
242
0
                    numCascade++;
243
0
                break;
244
0
            }
245
0
        }
246
0
    }
247
129k
}
248
249
void SAL_CALL
250
TransliterationImpl::loadModuleNew( const Sequence < TransliterationModulesNew > & modType, const Locale& rLocale )
251
0
{
252
0
    clear();
253
0
    TransliterationModules mask = TransliterationModules_END_OF_MODULE;
254
0
    sal_Int32 count = modType.getLength();
255
0
    if (count > maxCascade)
256
0
        throw RuntimeException(); // could not handle more than maxCascade
257
0
    for (sal_Int32 i = 0; i < count; i++) {
258
0
        for (sal_Int16 j = 0; bool(TMlist[j].tmn); j++) {
259
0
            if (TMlist[j].tmn == modType[i]) {
260
0
                if (mask == TransliterationModules_END_OF_MODULE)
261
0
                    mask = bool(TMlist[i].tm) && bool(TMlist[i].tm & TransliterationModules_IGNORE_MASK) ?
262
0
                        TransliterationModules_IGNORE_MASK : TransliterationModules_NON_IGNORE_MASK;
263
0
                else if (mask == TransliterationModules_IGNORE_MASK &&
264
0
                        (TMlist[i].tm&TransliterationModules_IGNORE_MASK) == TransliterationModules_END_OF_MODULE)
265
0
                    throw RuntimeException(); // could not mess up ignore trans. with non_ignore trans.
266
0
                if (loadModuleByName(OUString::createFromAscii(TMlist[j].implName), bodyCascade[numCascade], rLocale))
267
0
                    numCascade++;
268
0
                break;
269
0
            }
270
0
        }
271
0
    }
272
0
}
273
274
void SAL_CALL
275
TransliterationImpl::loadModuleByImplName(const OUString& implName, const Locale& rLocale)
276
0
{
277
0
    clear();
278
0
    if (loadModuleByName(implName, bodyCascade[numCascade], rLocale))
279
0
        numCascade++;
280
0
}
281
282
283
void SAL_CALL
284
TransliterationImpl::loadModulesByImplNames(const Sequence< OUString >& implNameList, const Locale& rLocale )
285
0
{
286
0
    if (implNameList.getLength() > maxCascade || implNameList.getLength() <= 0)
287
0
        throw RuntimeException();
288
289
0
    clear();
290
0
    for (const auto& rName : implNameList)
291
0
        if (loadModuleByName(rName, bodyCascade[numCascade], rLocale))
292
0
            numCascade++;
293
0
}
294
295
296
Sequence<OUString> SAL_CALL
297
TransliterationImpl::getAvailableModules( const Locale& rLocale, sal_Int16 sType )
298
0
{
299
0
    const Sequence<OUString> translist = mxLocaledata->getTransliterations(rLocale);
300
0
    std::vector<OUString> r;
301
0
    r.reserve(translist.getLength());
302
0
    Reference<XExtendedTransliteration> body;
303
0
    for (const auto& rTrans : translist)
304
0
    {
305
0
        if (loadModuleByName(rTrans, body, rLocale)) {
306
0
            if (body->getType() & sType)
307
0
                r.push_back(rTrans);
308
0
            body.clear();
309
0
        }
310
0
    }
311
0
    return comphelper::containerToSequence(r);
312
0
}
313
314
315
OUString SAL_CALL
316
TransliterationImpl::transliterate( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
317
                    Sequence< sal_Int32 >& offset )
318
1.31k
{
319
1.31k
    if (numCascade == 0)
320
0
        return inStr;
321
322
1.31k
    if (numCascade == 1)
323
1.31k
    {
324
1.31k
        if ( startPos == 0 && nCount == inStr.getLength() )
325
1.31k
            return bodyCascade[0]->transliterate( inStr, 0, nCount, offset);
326
0
        else
327
0
        {
328
0
            OUString tmpStr = inStr.copy(startPos, nCount);
329
0
            tmpStr = bodyCascade[0]->transliterate(tmpStr, 0, nCount, offset);
330
0
            if ( startPos )
331
0
            {
332
0
                for (sal_Int32 & j : asNonConstRange(offset))
333
0
                    j += startPos;
334
0
            }
335
0
            return tmpStr;
336
0
        }
337
1.31k
    }
338
0
    else
339
0
    {
340
0
        OUString tmpStr = inStr.copy(startPos, nCount);
341
342
0
        auto [begin, end] = asNonConstRange(offset);
343
0
        std::iota(begin, end, startPos);
344
345
0
        Sequence<sal_Int32> from(nCount);
346
0
        Sequence<sal_Int32> to = offset;
347
0
        for (sal_Int32 i = 0; i < numCascade; i++) {
348
0
            tmpStr = bodyCascade[i]->transliterate(tmpStr, 0, nCount, from);
349
350
0
            nCount = tmpStr.getLength();
351
352
0
            assert(from.getLength() == nCount);
353
0
            from.swap(to);
354
0
            for (sal_Int32& ix : asNonConstRange(to))
355
0
                ix = from[ix];
356
0
        }
357
0
        offset = std::move(to);
358
0
        return tmpStr;
359
0
    }
360
1.31k
}
361
362
363
OUString SAL_CALL
364
TransliterationImpl::folding( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
365
        Sequence< sal_Int32 >& offset )
366
2.36M
{
367
2.36M
    if (numCascade == 0)
368
2.36M
        return inStr;
369
370
0
    if (offset.getLength() != nCount)
371
0
        offset.realloc(nCount);
372
0
    if (numCascade == 1)
373
0
    {
374
0
        if ( startPos == 0 && nCount == inStr.getLength() )
375
0
            return bodyCascade[0]->folding( inStr, 0, nCount, offset);
376
0
        else
377
0
        {
378
0
            OUString tmpStr = inStr.copy(startPos, nCount);
379
0
            tmpStr = bodyCascade[0]->folding(tmpStr, 0, nCount, offset);
380
0
            if ( startPos )
381
0
            {
382
0
                for (sal_Int32 & j : asNonConstRange(offset))
383
0
                    j += startPos;
384
0
            }
385
0
            return tmpStr;
386
0
        }
387
0
    }
388
0
    else
389
0
    {
390
0
        OUString tmpStr = inStr.copy(startPos, nCount);
391
392
0
        auto [begin, end] = asNonConstRange(offset);
393
0
        std::iota(begin, end, startPos);
394
395
0
        Sequence<sal_Int32> from;
396
0
        Sequence<sal_Int32> to = offset;
397
398
0
        for (sal_Int32 i = 0; i < numCascade; i++) {
399
0
            tmpStr = bodyCascade[i]->folding(tmpStr, 0, nCount, from);
400
401
0
            nCount = tmpStr.getLength();
402
403
0
            assert(from.getLength() == nCount);
404
0
            from.swap(to);
405
0
            for (sal_Int32& ix : asNonConstRange(to))
406
0
                ix = from[ix];
407
0
        }
408
0
        offset = std::move(to);
409
0
        return tmpStr;
410
0
    }
411
0
}
412
413
OUString SAL_CALL
414
TransliterationImpl::transliterateString2String( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount )
415
3
{
416
3
    if (numCascade == 0)
417
0
        return inStr;
418
3
    else if (numCascade == 1)
419
3
        return bodyCascade[0]->transliterateString2String( inStr, startPos, nCount);
420
0
    else {
421
0
        OUString tmpStr = bodyCascade[0]->transliterateString2String(inStr, startPos, nCount);
422
423
0
        for (sal_Int32 i = 1; i < numCascade; i++)
424
0
            tmpStr = bodyCascade[i]->transliterateString2String(tmpStr, 0, tmpStr.getLength());
425
0
        return tmpStr;
426
0
    }
427
3
}
428
429
OUString SAL_CALL
430
TransliterationImpl::transliterateChar2String( sal_Unicode inChar )
431
0
{
432
0
    if (numCascade == 0)
433
0
        return OUString(&inChar, 1);
434
0
    else if (numCascade == 1)
435
0
        return bodyCascade[0]->transliterateChar2String( inChar);
436
0
    else {
437
0
        OUString tmpStr = bodyCascade[0]->transliterateChar2String(inChar);
438
439
0
        for (sal_Int32 i = 1; i < numCascade; i++)
440
0
            tmpStr = bodyCascade[i]->transliterateString2String(tmpStr, 0, tmpStr.getLength());
441
0
        return tmpStr;
442
0
    }
443
0
}
444
445
sal_Unicode SAL_CALL
446
TransliterationImpl::transliterateChar2Char( sal_Unicode inChar )
447
0
{
448
0
    sal_Unicode tmpChar = inChar;
449
0
    for (sal_Int32 i = 0; i < numCascade; i++)
450
0
        tmpChar = bodyCascade[i]->transliterateChar2Char(tmpChar);
451
0
    return tmpChar;
452
0
}
453
454
455
sal_Bool SAL_CALL
456
TransliterationImpl::equals(
457
    const OUString& str1, sal_Int32 pos1, sal_Int32 nCount1, sal_Int32& nMatch1,
458
    const OUString& str2, sal_Int32 pos2, sal_Int32 nCount2, sal_Int32& nMatch2)
459
5.55M
{
460
    // since this is an API function make it user fail safe
461
5.55M
    if ( nCount1 < 0 ) {
462
0
        pos1 += nCount1;
463
0
        nCount1 = -nCount1;
464
0
    }
465
5.55M
    if ( nCount2 < 0 ) {
466
0
        pos2 += nCount2;
467
0
        nCount2 = -nCount2;
468
0
    }
469
5.55M
    if ( !nCount1 || !nCount2 ||
470
3.20M
            pos1 >= str1.getLength() || pos2 >= str2.getLength() ||
471
3.20M
            pos1 < 0 || pos2 < 0 ) {
472
2.34M
        nMatch1 = nMatch2 = 0;
473
        // two empty strings return true, else false
474
2.34M
        return !nCount1 && !nCount2 && pos1 == str1.getLength() && pos2 == str2.getLength();
475
2.34M
    }
476
3.20M
    if ( pos1 + nCount1 > str1.getLength() )
477
0
        nCount1 = str1.getLength() - pos1;
478
3.20M
    if ( pos2 + nCount2 > str2.getLength() )
479
0
        nCount2 = str2.getLength() - pos2;
480
481
3.20M
    if (caseignoreOnly && caseignore.is())
482
2.02M
        return caseignore->equals(str1, pos1, nCount1, nMatch1, str2, pos2, nCount2, nMatch2);
483
484
1.18M
    Sequence<sal_Int32> offset1, offset2;
485
486
1.18M
    OUString tmpStr1 = folding(str1, pos1, nCount1, offset1);
487
1.18M
    OUString tmpStr2 = folding(str2, pos2, nCount2, offset2);
488
    // Length of offset1 and offset2 may still be 0 if there was no folding
489
    // necessary!
490
491
1.18M
    const sal_Unicode *p1 = tmpStr1.getStr();
492
1.18M
    const sal_Unicode *p2 = tmpStr2.getStr();
493
1.18M
    sal_Int32 i, nLen = ::std::min( tmpStr1.getLength(), tmpStr2.getLength());
494
2.10M
    for (i = 0; i < nLen; ++i, ++p1, ++p2 ) {
495
2.03M
        if (*p1 != *p2) {
496
            // return number of matched code points so far
497
1.11M
            nMatch1 = (i < offset1.getLength()) ? offset1[i] : i;
498
1.11M
            nMatch2 = (i < offset2.getLength()) ? offset2[i] : i;
499
1.11M
            return false;
500
1.11M
        }
501
2.03M
    }
502
    // i==nLen
503
71.2k
    if ( tmpStr1.getLength() != tmpStr2.getLength() ) {
504
        // return number of matched code points so far
505
13.0k
        nMatch1 = (i <= offset1.getLength()) ? offset1[i-1] + 1 : i;
506
13.0k
        nMatch2 = (i <= offset2.getLength()) ? offset2[i-1] + 1 : i;
507
13.0k
        return false;
508
58.1k
    } else {
509
58.1k
        nMatch1 = nCount1;
510
58.1k
        nMatch2 = nCount2;
511
58.1k
        return true;
512
58.1k
    }
513
71.2k
}
514
515
Sequence< OUString >
516
TransliterationImpl::getRange(const Sequence< OUString > &inStrs,
517
                const sal_Int32 length, sal_Int16 _numCascade)
518
0
{
519
0
    if (_numCascade >= numCascade || ! bodyCascade[_numCascade].is())
520
0
        return inStrs;
521
522
0
    sal_Int32 j_tmp = 0;
523
0
    constexpr sal_Int32 nMaxOutput = 2;
524
0
    const sal_Int32 nMaxOutputLength = nMaxOutput*length;
525
0
    std::vector<OUString> ostr;
526
0
    ostr.reserve(nMaxOutputLength);
527
0
    for (sal_Int32 j = 0; j < length; j+=2) {
528
0
        const Sequence< OUString > temp = bodyCascade[_numCascade]->transliterateRange(inStrs[j], inStrs[j+1]);
529
530
0
        for (const auto& rStr : temp) {
531
0
            if ( j_tmp++ >= nMaxOutputLength ) throw RuntimeException();
532
0
            ostr.push_back(rStr);
533
0
        }
534
0
    }
535
536
0
    return getRange(comphelper::containerToSequence(ostr), j_tmp, ++_numCascade);
537
0
}
538
539
540
Sequence< OUString > SAL_CALL
541
TransliterationImpl::transliterateRange( const OUString& str1, const OUString& str2 )
542
0
{
543
0
    if (numCascade == 1)
544
0
        return bodyCascade[0]->transliterateRange(str1, str2);
545
546
0
    Sequence< OUString > ostr{ str1, str2 };
547
548
0
    return getRange(ostr, 2, 0);
549
0
}
550
551
552
sal_Int32 SAL_CALL
553
TransliterationImpl::compareSubstring(
554
    const OUString& str1, sal_Int32 off1, sal_Int32 len1,
555
    const OUString& str2, sal_Int32 off2, sal_Int32 len2)
556
0
{
557
0
    if (caseignoreOnly && caseignore.is())
558
0
        return caseignore->compareSubstring(str1, off1, len1, str2, off2, len2);
559
560
0
    Sequence <sal_Int32> offset;
561
562
0
    OUString in_str1 = transliterate(str1, off1, len1, offset);
563
0
    OUString in_str2 = transliterate(str2, off2, len2, offset);
564
0
    const sal_Unicode* unistr1 = in_str1.getStr();
565
0
    const sal_Unicode* unistr2 = in_str2.getStr();
566
0
    sal_Int32 strlen1 = in_str1.getLength();
567
0
    sal_Int32 strlen2 = in_str2.getLength();
568
569
0
    while (strlen1 && strlen2) {
570
0
        if (*unistr1 != *unistr2)
571
0
           return *unistr1 > *unistr2 ? 1 : -1;
572
573
0
        unistr1++; unistr2++; strlen1--; strlen2--;
574
0
    }
575
0
    return strlen1 == strlen2 ? 0 : (strlen1 > strlen2 ? 1 : -1);
576
0
}
577
578
579
sal_Int32 SAL_CALL
580
TransliterationImpl::compareString(const OUString& str1, const OUString& str2 )
581
783
{
582
783
    if (caseignoreOnly && caseignore.is())
583
783
        return caseignore->compareString(str1, str2);
584
0
    else
585
0
        return compareSubstring(str1, 0, str1.getLength(), str2, 0, str2.getLength());
586
783
}
587
588
589
void
590
TransliterationImpl::clear()
591
136k
{
592
204k
    for (sal_Int32 i = 0; i < numCascade; i++)
593
68.0k
        if (bodyCascade[i].is())
594
68.0k
            bodyCascade[i].clear();
595
136k
    numCascade = 0;
596
136k
    caseignore.clear();
597
136k
    caseignoreOnly = true;
598
136k
}
599
600
namespace
601
{
602
    /** structure to cache the last transliteration body used. */
603
    struct TransBody
604
    {
605
        OUString Name;
606
        css::uno::Reference< css::i18n::XExtendedTransliteration > Body;
607
    };
608
}
609
610
void TransliterationImpl::loadBody( OUString const &implName, Reference<XExtendedTransliteration>& body )
611
197k
{
612
197k
    assert(!implName.isEmpty());
613
197k
    static std::mutex transBodyMutex;
614
197k
    std::unique_lock guard(transBodyMutex);
615
197k
    static TransBody lastTransBody;
616
197k
    if (implName != lastTransBody.Name)
617
61.5k
    {
618
61.5k
        lastTransBody.Body.set(
619
61.5k
            mxContext->getServiceManager()->createInstanceWithContext(implName, mxContext), UNO_QUERY_THROW);
620
61.5k
        lastTransBody.Name = implName;
621
61.5k
    }
622
197k
    body = lastTransBody.Body;
623
197k
}
624
625
bool
626
TransliterationImpl::loadModuleByName( std::u16string_view implName,
627
        Reference<XExtendedTransliteration>& body, const Locale& rLocale)
628
129k
{
629
129k
    OUString cname = OUString::Concat(TRLT_IMPLNAME_PREFIX) + implName;
630
129k
    loadBody(cname, body);
631
129k
    if (body.is()) {
632
68.0k
        body->loadModule(TransliterationModules(0), rLocale); // toUpper/toLoad need rLocale
633
634
        // if the module is ignore case/kana/width, load caseignore for equals/compareString mothed
635
68.0k
        for (sal_Int16 i = 0; i < 3; i++) {
636
68.0k
            if (o3tl::equalsAscii(implName, TMlist[i].implName)) {
637
68.0k
                if (i == 0) // current module is caseignore
638
68.0k
                    body->loadModule(TMlist[0].tm, rLocale); // caseignore need to setup module name
639
68.0k
                if (! caseignore.is()) {
640
68.0k
                    OUString bname = TRLT_IMPLNAME_PREFIX +
641
68.0k
                                OUString::createFromAscii(TMlist[0].implName);
642
68.0k
                    loadBody(bname, caseignore);
643
68.0k
                }
644
68.0k
                if (caseignore.is())
645
68.0k
                    caseignore->loadModule(TMlist[i].tm, rLocale);
646
68.0k
                return true;
647
68.0k
            }
648
68.0k
        }
649
0
        caseignoreOnly = false; // has other module than just ignore case/kana/width
650
0
    }
651
61.5k
    return body.is();
652
129k
}
653
654
OUString SAL_CALL
655
TransliterationImpl::getImplementationName()
656
0
{
657
0
    return u"com.sun.star.i18n.Transliteration"_ustr;
658
0
}
659
660
sal_Bool SAL_CALL
661
TransliterationImpl::supportsService(const OUString& rServiceName)
662
0
{
663
0
    return cppu::supportsService(this, rServiceName);
664
0
}
665
666
Sequence< OUString > SAL_CALL
667
TransliterationImpl::getSupportedServiceNames()
668
0
{
669
0
    return { u"com.sun.star.i18n.Transliteration"_ustr };
670
0
}
671
672
}
673
674
extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface *
675
com_sun_star_i18n_Transliteration_get_implementation(
676
    css::uno::XComponentContext *context,
677
    css::uno::Sequence<css::uno::Any> const &)
678
7.30k
{
679
7.30k
    return cppu::acquire(new i18npool::TransliterationImpl(context));
680
7.30k
}
681
682
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */