Coverage Report

Created: 2025-11-16 09:57

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libreoffice/i18npool/source/transliteration/transliteration_body.cxx
Line
Count
Source
1
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
/*
3
 * This file is part of the LibreOffice project.
4
 *
5
 * This Source Code Form is subject to the terms of the Mozilla Public
6
 * License, v. 2.0. If a copy of the MPL was not distributed with this
7
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
 *
9
 * This file incorporates work covered by the following license notice:
10
 *
11
 *   Licensed to the Apache Software Foundation (ASF) under one or more
12
 *   contributor license agreements. See the NOTICE file distributed
13
 *   with this work for additional information regarding copyright
14
 *   ownership. The ASF licenses this file to you under the Apache
15
 *   License, Version 2.0 (the "License"); you may not use this file
16
 *   except in compliance with the License. You may obtain a copy of
17
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18
 */
19
// Silence spurious Werror=maybe-uninitialized in transliterateImpl emitted at least by GCC 11.2.0
20
#if defined __GNUC__ && !defined __clang__
21
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
22
#endif
23
24
#include <rtl/ref.hxx>
25
#include <i18nutil/casefolding.hxx>
26
#include <i18nutil/unicode.hxx>
27
#include <com/sun/star/i18n/MultipleCharsOutputException.hpp>
28
#include <com/sun/star/i18n/TransliterationType.hpp>
29
#include <comphelper/processfactory.hxx>
30
#include <comphelper/sequence.hxx>
31
#include <o3tl/temporary.hxx>
32
33
#include <characterclassificationImpl.hxx>
34
35
#include <transliteration_body.hxx>
36
#include <memory>
37
#include <numeric>
38
39
using namespace ::com::sun::star::uno;
40
using namespace ::com::sun::star::i18n;
41
using namespace ::com::sun::star::lang;
42
43
namespace i18npool {
44
45
Transliteration_body::Transliteration_body()
46
2.09M
{
47
2.09M
    nMappingType = MappingType::NONE;
48
2.09M
    transliterationName = "Transliteration_body";
49
2.09M
    implementationName = "com.sun.star.i18n.Transliteration.Transliteration_body";
50
2.09M
}
51
52
sal_Int16 SAL_CALL Transliteration_body::getType()
53
0
{
54
0
    return TransliterationType::ONE_TO_ONE;
55
0
}
56
57
sal_Bool SAL_CALL Transliteration_body::equals(
58
    const OUString& /*str1*/, sal_Int32 /*pos1*/, sal_Int32 /*nCount1*/, sal_Int32& /*nMatch1*/,
59
    const OUString& /*str2*/, sal_Int32 /*pos2*/, sal_Int32 /*nCount2*/, sal_Int32& /*nMatch2*/)
60
0
{
61
0
    throw RuntimeException();
62
0
}
63
64
Sequence< OUString > SAL_CALL
65
Transliteration_body::transliterateRange( const OUString& str1, const OUString& str2 )
66
0
{
67
0
    return { str1, str2 };
68
0
}
69
70
static MappingType lcl_getMappingTypeForToggleCase( MappingType nMappingType, sal_Unicode cChar )
71
1.30G
{
72
1.30G
    MappingType nRes = nMappingType;
73
74
    // take care of TOGGLE_CASE transliteration:
75
    // nMappingType should not be a combination of flags, thuse we decide now
76
    // which one to use.
77
1.30G
    if (nMappingType == (MappingType::LowerToUpper | MappingType::UpperToLower))
78
0
    {
79
0
        const sal_Int16 nType = unicode::getUnicodeType( cChar );
80
0
        if (nType & 0x02 /* lower case*/)
81
0
            nRes = MappingType::LowerToUpper;
82
0
        else
83
0
        {
84
            // should also work properly for non-upper characters like white spaces, numbers, ...
85
0
            nRes = MappingType::UpperToLower;
86
0
        }
87
0
    }
88
89
1.30G
    return nRes;
90
1.30G
}
91
92
OUString
93
Transliteration_body::transliterateImpl(
94
    const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
95
    Sequence< sal_Int32 >* pOffset)
96
134M
{
97
134M
    const sal_Unicode *in = inStr.getStr() + startPos;
98
99
    // We could assume that most calls result in identical string lengths,
100
    // thus using a preallocated OUStringBuffer could be an easy way
101
    // to assemble the return string without too much hassle. However,
102
    // for single characters the OUStringBuffer::append() method is quite
103
    // expensive compared to a simple array operation, so it pays here
104
    // to copy the final result instead.
105
106
    // Allocate the max possible buffer. Try to use stack instead of heap,
107
    // which would have to be reallocated most times anyways.
108
134M
    constexpr sal_Int32 nLocalBuf = 2048;
109
134M
    sal_Unicode* out;
110
134M
    std::unique_ptr<sal_Unicode[]> pHeapBuf;
111
134M
    if (nCount <= nLocalBuf)
112
134M
        out = static_cast<sal_Unicode*>(alloca(nCount * NMAPPINGMAX * sizeof(sal_Unicode)));
113
4.18k
    else
114
4.18k
    {
115
4.18k
        pHeapBuf.reset(new sal_Unicode[ nCount * NMAPPINGMAX ]);
116
4.18k
        out = pHeapBuf.get();
117
4.18k
    }
118
119
134M
    sal_Int32 j = 0;
120
    // Two different blocks to eliminate the if(useOffset) condition inside the loop.
121
    // Yes, on massive use even such small things do count.
122
134M
    if ( pOffset )
123
1.43k
    {
124
1.43k
        sal_Int32* offsetData;
125
1.43k
        std::unique_ptr<sal_Int32[]> pOffsetHeapBuf;
126
1.43k
        sal_Int32 nOffsetCount = std::max<sal_Int32>(nLocalBuf, nCount);
127
1.43k
        if (nOffsetCount <= nLocalBuf)
128
1.43k
            offsetData = static_cast<sal_Int32*>(alloca(nOffsetCount * NMAPPINGMAX * sizeof(sal_Int32)));
129
0
        else
130
0
        {
131
0
            pOffsetHeapBuf.reset(new sal_Int32[ nOffsetCount * NMAPPINGMAX ]);
132
0
            offsetData = pOffsetHeapBuf.get();
133
0
        }
134
1.43k
        sal_Int32* offsetDataEnd = offsetData;
135
136
28.8k
        for (sal_Int32 i = 0; i < nCount; i++)
137
27.3k
        {
138
            // take care of TOGGLE_CASE transliteration:
139
27.3k
            MappingType nTmpMappingType = lcl_getMappingTypeForToggleCase( nMappingType, in[i] );
140
141
27.3k
            const i18nutil::Mapping map = i18nutil::casefolding::getValue( in, i, nCount, aLocale, nTmpMappingType );
142
27.3k
            std::fill_n(offsetDataEnd, map.nmap, i + startPos);
143
27.3k
            offsetDataEnd += map.nmap;
144
27.3k
            std::copy_n(map.map, map.nmap, out + j);
145
27.3k
            j += map.nmap;
146
27.3k
        }
147
148
1.43k
        *pOffset = css::uno::Sequence< sal_Int32 >(offsetData, offsetDataEnd - offsetData);
149
1.43k
    }
150
134M
    else
151
134M
    {
152
1.44G
        for ( sal_Int32 i = 0; i < nCount; i++)
153
1.30G
        {
154
            // take care of TOGGLE_CASE transliteration:
155
1.30G
            MappingType nTmpMappingType = lcl_getMappingTypeForToggleCase( nMappingType, in[i] );
156
157
1.30G
            const i18nutil::Mapping map = i18nutil::casefolding::getValue( in, i, nCount, aLocale, nTmpMappingType );
158
1.30G
            std::copy_n(map.map, map.nmap, out + j);
159
1.30G
            j += map.nmap;
160
1.30G
        }
161
134M
    }
162
163
134M
    return OUString(out, j);
164
134M
}
165
166
OUString SAL_CALL
167
Transliteration_body::transliterateChar2String( sal_Unicode inChar )
168
0
{
169
0
    const i18nutil::Mapping map = i18nutil::casefolding::getValue(&inChar, 0, 1, aLocale, nMappingType);
170
0
    rtl_uString* pStr = rtl_uString_alloc(map.nmap);
171
0
    sal_Unicode* out = pStr->buffer;
172
0
    sal_Int32 i;
173
174
0
    for (i = 0; i < map.nmap; i++)
175
0
        out[i] = map.map[i];
176
0
    out[i] = 0;
177
178
0
    return OUString( pStr, SAL_NO_ACQUIRE );
179
0
}
180
181
sal_Unicode SAL_CALL
182
Transliteration_body::transliterateChar2Char( sal_Unicode inChar )
183
0
{
184
0
    const i18nutil::Mapping map = i18nutil::casefolding::getValue(&inChar, 0, 1, aLocale, nMappingType);
185
0
    if (map.nmap > 1)
186
0
        throw MultipleCharsOutputException();
187
0
    return map.map[0];
188
0
}
189
190
OUString
191
Transliteration_body::foldingImpl( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
192
    Sequence< sal_Int32 >* pOffset)
193
0
{
194
0
    return transliterateImpl(inStr, startPos, nCount, pOffset);
195
0
}
196
197
Transliteration_casemapping::Transliteration_casemapping()
198
2.09M
{
199
2.09M
    nMappingType = MappingType::NONE;
200
2.09M
    transliterationName = "casemapping(generic)";
201
2.09M
    implementationName = "com.sun.star.i18n.Transliteration.Transliteration_casemapping";
202
2.09M
}
203
204
Transliteration_u2l::Transliteration_u2l()
205
0
{
206
0
    nMappingType = MappingType::UpperToLower;
207
0
    transliterationName = "upper_to_lower(generic)";
208
0
    implementationName = "com.sun.star.i18n.Transliteration.UPPERCASE_LOWERCASE";
209
0
}
210
211
Transliteration_l2u::Transliteration_l2u()
212
0
{
213
0
    nMappingType = MappingType::LowerToUpper;
214
0
    transliterationName = "lower_to_upper(generic)";
215
0
    implementationName = "com.sun.star.i18n.Transliteration.LOWERCASE_UPPERCASE";
216
0
}
217
218
Transliteration_togglecase::Transliteration_togglecase()
219
0
{
220
    // usually nMappingType must NOT be a combination of different flags here,
221
    // but we take care of that problem in Transliteration_body::transliterate above
222
    // before that value is used. There we will decide which of both is to be used on
223
    // a per character basis.
224
0
    nMappingType = MappingType::LowerToUpper | MappingType::UpperToLower;
225
0
    transliterationName = "toggle(generic)";
226
0
    implementationName = "com.sun.star.i18n.Transliteration.TOGGLE_CASE";
227
0
}
228
229
Transliteration_titlecase::Transliteration_titlecase()
230
0
{
231
0
    nMappingType = MappingType::ToTitle;
232
0
    transliterationName = "title(generic)";
233
0
    implementationName = "com.sun.star.i18n.Transliteration.TITLE_CASE";
234
0
}
235
236
/// @throws RuntimeException
237
static OUString transliterate_titlecase_Impl(
238
    std::u16string_view inStr, sal_Int32 startPos, sal_Int32 nCount,
239
    const Locale &rLocale,
240
    Sequence< sal_Int32 >* pOffset )
241
0
{
242
0
    const OUString aText( inStr.substr( startPos, nCount ) );
243
244
0
    OUString aRes;
245
0
    if (!aText.isEmpty())
246
0
    {
247
0
        const Reference< XComponentContext >& xContext = ::comphelper::getProcessComponentContext();
248
0
        rtl::Reference< CharacterClassificationImpl > xCharClassImpl( new CharacterClassificationImpl( xContext ) );
249
250
        // because xCharClassImpl.toTitle does not handle ligatures or Beta but will raise
251
        // an exception we need to handle the first chara manually...
252
253
        // we don't want to change surrogates by accident, thuse we use proper code point iteration
254
0
        sal_uInt32 cFirstChar = aText.iterateCodePoints( &o3tl::temporary(sal_Int32(0)) );
255
0
        OUString aResolvedLigature( &cFirstChar, 1 );
256
        // toUpper can be used to properly resolve ligatures and characters like Beta
257
0
        aResolvedLigature = xCharClassImpl->toUpper( aResolvedLigature, 0, aResolvedLigature.getLength(), rLocale );
258
        // since toTitle will leave all-uppercase text unchanged we first need to
259
        // use toLower to bring possible 2nd and following chars in lowercase
260
0
        aResolvedLigature = xCharClassImpl->toLower( aResolvedLigature, 0, aResolvedLigature.getLength(), rLocale );
261
0
        sal_Int32 nResolvedLen = aResolvedLigature.getLength();
262
263
        // now we can properly use toTitle to get the expected result for the resolved string.
264
        // The rest of the text should just become lowercase.
265
0
        aRes = xCharClassImpl->toTitle( aResolvedLigature, 0, nResolvedLen, rLocale ) +
266
0
               xCharClassImpl->toLower( aText, 1, aText.getLength() - 1, rLocale );
267
0
        if (pOffset)
268
0
        {
269
0
            pOffset->realloc( aRes.getLength() );
270
271
0
            auto [begin, end] = asNonConstRange(*pOffset);
272
0
            sal_Int32* pOffsetInt = std::fill_n(begin, nResolvedLen, 0);
273
0
            std::iota(pOffsetInt, end, 1);
274
0
        }
275
0
    }
276
0
    return aRes;
277
0
}
278
279
// this function expects to be called on a word-by-word basis,
280
// namely that startPos points to the first char of the word
281
OUString Transliteration_titlecase::transliterateImpl(
282
    const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
283
    Sequence< sal_Int32 >* pOffset )
284
0
{
285
0
    return transliterate_titlecase_Impl( inStr, startPos, nCount, aLocale, pOffset );
286
0
}
287
288
Transliteration_sentencecase::Transliteration_sentencecase()
289
0
{
290
0
    nMappingType = MappingType::ToTitle;  // though only to be applied to the first word...
291
0
    transliterationName = "sentence(generic)";
292
0
    implementationName = "com.sun.star.i18n.Transliteration.SENTENCE_CASE";
293
0
}
294
295
// this function expects to be called on a sentence-by-sentence basis,
296
// namely that startPos points to the first word (NOT first char!) in the sentence
297
OUString Transliteration_sentencecase::transliterateImpl(
298
    const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
299
    Sequence< sal_Int32 >* pOffset )
300
0
{
301
0
    return transliterate_titlecase_Impl( inStr, startPos, nCount, aLocale, pOffset );
302
0
}
303
304
}
305
306
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */