Coverage Report

Created: 2026-03-31 11:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libreoffice/unotools/source/i18n/charclass.cxx
Line
Count
Source
1
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
/*
3
 * This file is part of the LibreOffice project.
4
 *
5
 * This Source Code Form is subject to the terms of the Mozilla Public
6
 * License, v. 2.0. If a copy of the MPL was not distributed with this
7
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
 *
9
 * This file incorporates work covered by the following license notice:
10
 *
11
 *   Licensed to the Apache Software Foundation (ASF) under one or more
12
 *   contributor license agreements. See the NOTICE file distributed
13
 *   with this work for additional information regarding copyright
14
 *   ownership. The ASF licenses this file to you under the Apache
15
 *   License, Version 2.0 (the "License"); you may not use this file
16
 *   except in compliance with the License. You may obtain a copy of
17
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18
 */
19
20
#include <comphelper/processfactory.hxx>
21
#include <unotools/charclass.hxx>
22
#include <rtl/character.hxx>
23
#include <comphelper/diagnose_ex.hxx>
24
25
#include <com/sun/star/i18n/CharacterClassification.hpp>
26
#include <com/sun/star/i18n/DirectionProperty.hpp>
27
#include <com/sun/star/i18n/ParseResult.hpp>
28
#include <com/sun/star/i18n/UnicodeScript.hpp>
29
#include <utility>
30
31
using namespace ::com::sun::star;
32
using namespace ::com::sun::star::i18n;
33
using namespace ::com::sun::star::uno;
34
35
CharClass::CharClass(
36
            const Reference< uno::XComponentContext > & rxContext,
37
            LanguageTag aLanguageTag
38
            )
39
635k
    : maLanguageTag(std::move( aLanguageTag))
40
635k
{
41
635k
    xCC = CharacterClassification::create( rxContext );
42
635k
}
43
44
CharClass::CharClass( LanguageTag aLanguageTag )
45
65.5k
    : maLanguageTag(std::move( aLanguageTag))
46
65.5k
{
47
65.5k
    xCC = CharacterClassification::create( comphelper::getProcessComponentContext() );
48
65.5k
}
49
50
CharClass::~CharClass()
51
700k
{
52
700k
}
53
54
const LanguageTag& CharClass::getLanguageTag() const
55
8.46M
{
56
8.46M
    return maLanguageTag;
57
8.46M
}
58
59
const css::lang::Locale& CharClass::getMyLocale() const
60
151M
{
61
151M
    return maLanguageTag.getLocale();
62
151M
}
63
64
// static
65
bool CharClass::isAsciiNumeric( std::u16string_view rStr )
66
3.16k
{
67
3.16k
    if ( rStr.empty() )
68
929
        return false;
69
2.23k
    const sal_Unicode* p = rStr.data();
70
2.23k
    const sal_Unicode* const pStop = p + rStr.size();
71
72
2.23k
    do
73
3.15k
    {
74
3.15k
        if ( !rtl::isAsciiDigit( *p ) )
75
1.44k
            return false;
76
3.15k
    }
77
2.23k
    while ( ++p < pStop );
78
79
790
    return true;
80
2.23k
}
81
82
// static
83
bool CharClass::isAsciiAlpha( std::u16string_view rStr )
84
0
{
85
0
    if ( rStr.empty() )
86
0
        return false;
87
0
    const sal_Unicode* p = rStr.data();
88
0
    const sal_Unicode* const pStop = p + rStr.size();
89
90
0
    do
91
0
    {
92
0
        if ( !rtl::isAsciiAlpha( *p ) )
93
0
            return false;
94
0
    }
95
0
    while ( ++p < pStop );
96
97
0
    return true;
98
0
}
99
100
bool CharClass::isAlpha( const OUString& rStr, sal_Int32 nPos ) const
101
0
{
102
0
    sal_Unicode c = rStr[nPos];
103
0
    if ( c < 128 )
104
0
        return rtl::isAsciiAlpha( c );
105
106
0
    try
107
0
    {
108
0
        return  (xCC->getCharacterType( rStr, nPos, getMyLocale() ) &
109
0
                 nCharClassAlphaType) != 0;
110
0
    }
111
0
    catch ( const Exception& )
112
0
    {
113
0
        TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
114
0
    }
115
0
    return false;
116
0
}
117
118
bool CharClass::isLetter( const OUString& rStr, sal_Int32 nPos ) const
119
72.8M
{
120
72.8M
    sal_Unicode c = rStr[nPos];
121
72.8M
    if ( c < 128 )
122
72.2M
        return rtl::isAsciiAlpha( c );
123
124
526k
    try
125
526k
    {
126
526k
        return  (xCC->getCharacterType( rStr, nPos, getMyLocale() ) &
127
526k
                 nCharClassLetterType) != 0;
128
526k
    }
129
526k
    catch ( const Exception& )
130
526k
    {
131
0
        TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
132
0
    }
133
0
    return false;
134
526k
}
135
136
bool CharClass::isLetter( const OUString& rStr ) const
137
0
{
138
0
    if (rStr.isEmpty())
139
0
        return false;
140
141
0
    try
142
0
    {
143
0
        sal_Int32 nPos = 0;
144
0
        while (nPos < rStr.getLength())
145
0
        {
146
0
            if (!isLetter( rStr, nPos))
147
0
                return false;
148
0
            rStr.iterateCodePoints( &nPos);
149
0
        }
150
0
        return true;
151
0
    }
152
0
    catch ( const Exception& )
153
0
    {
154
0
        TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
155
0
    }
156
0
    return false;
157
0
}
158
159
bool CharClass::isDigit( const OUString& rStr, sal_Int32 nPos ) const
160
3.12M
{
161
3.12M
    sal_Unicode c = rStr[ nPos ];
162
3.12M
    if ( c < 128 )
163
1.61M
        return rtl::isAsciiDigit( c );
164
165
1.50M
    try
166
1.50M
    {
167
1.50M
        return  (xCC->getCharacterType( rStr, nPos, getMyLocale() ) &
168
1.50M
                 KCharacterType::DIGIT) != 0;
169
1.50M
    }
170
1.50M
    catch ( const Exception& )
171
1.50M
    {
172
0
        TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
173
0
    }
174
0
    return false;
175
1.50M
}
176
177
bool CharClass::isNumeric( const OUString& rStr ) const
178
0
{
179
0
    if (rStr.isEmpty())
180
0
        return false;
181
182
0
    try
183
0
    {
184
0
        sal_Int32 nPos = 0;
185
0
        while (nPos < rStr.getLength())
186
0
        {
187
0
            if (!isDigit( rStr, nPos))
188
0
                return false;
189
0
            rStr.iterateCodePoints( &nPos);
190
0
        }
191
0
        return true;
192
0
    }
193
0
    catch ( const Exception& )
194
0
    {
195
0
        TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
196
0
    }
197
0
    return false;
198
0
}
199
200
bool CharClass::isAlphaNumeric( const OUString& rStr, sal_Int32 nPos ) const
201
0
{
202
0
    sal_Unicode c = rStr[nPos];
203
0
    if ( c < 128 )
204
0
        return rtl::isAsciiAlphanumeric( c );
205
206
0
    try
207
0
    {
208
0
        return  (xCC->getCharacterType( rStr, nPos, getMyLocale() ) &
209
0
                (nCharClassAlphaType | nCharClassNumericType)) != 0;
210
0
    }
211
0
    catch ( const Exception& )
212
0
    {
213
0
        TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
214
0
    }
215
0
    return false;
216
0
}
217
218
bool CharClass::isLetterNumeric( const OUString& rStr, sal_Int32 nPos ) const
219
827k
{
220
827k
    sal_Unicode c = rStr[nPos];
221
827k
    if ( c < 128 )
222
713k
        return rtl::isAsciiAlphanumeric( c );
223
224
114k
    try
225
114k
    {
226
114k
        return  (xCC->getCharacterType( rStr, nPos, getMyLocale() ) &
227
114k
                (nCharClassLetterType | nCharClassNumericType)) != 0;
228
114k
    }
229
114k
    catch ( const Exception& )
230
114k
    {
231
0
        TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
232
0
    }
233
0
    return false;
234
114k
}
235
236
bool CharClass::isLetterNumeric( const OUString& rStr ) const
237
0
{
238
0
    if (rStr.isEmpty())
239
0
        return false;
240
241
0
    try
242
0
    {
243
0
        sal_Int32 nPos = 0;
244
0
        while (nPos < rStr.getLength())
245
0
        {
246
0
            if (!isLetterNumeric( rStr, nPos))
247
0
                return false;
248
0
            rStr.iterateCodePoints( &nPos);
249
0
        }
250
0
        return true;
251
0
    }
252
0
    catch ( const Exception& )
253
0
    {
254
0
        TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
255
0
    }
256
0
    return false;
257
0
}
258
259
bool CharClass::isBase( const OUString& rStr, sal_Int32 nPos ) const
260
0
{
261
0
    sal_Unicode c = rStr[nPos];
262
0
    if ( c < 128 )
263
0
        return rtl::isAsciiAlphanumeric( c );
264
265
0
    try
266
0
    {
267
0
        return  (xCC->getCharacterType( rStr, nPos, getMyLocale() ) & nCharClassBaseType ) != 0;
268
0
    }
269
0
    catch ( const Exception& )
270
0
    {
271
0
        TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
272
0
    }
273
0
    return false;
274
0
}
275
276
bool CharClass::isUpper( const OUString& rStr, sal_Int32 nPos ) const
277
0
{
278
0
    sal_Unicode c = rStr[nPos];
279
0
    if ( c < 128 )
280
0
        return rtl::isAsciiUpperCase(c);
281
282
0
    try
283
0
    {
284
0
        return (xCC->getCharacterType( rStr, nPos, getMyLocale()) &
285
0
                KCharacterType::UPPER) != 0;
286
0
    }
287
0
    catch ( const Exception& )
288
0
    {
289
0
        TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
290
0
    }
291
0
    return false;
292
0
}
293
294
OUString CharClass::titlecase(const OUString& rStr, sal_Int32 nPos, sal_Int32 nCount) const
295
0
{
296
0
    try
297
0
    {
298
0
        return xCC->toTitle( rStr, nPos, nCount, getMyLocale() );
299
0
    }
300
0
    catch ( const Exception& )
301
0
    {
302
0
        TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
303
0
    }
304
0
    return rStr.copy( nPos, nCount );
305
0
}
306
307
OUString CharClass::uppercase( const OUString& rStr, sal_Int32 nPos, sal_Int32 nCount ) const
308
129M
{
309
129M
    try
310
129M
    {
311
129M
        return xCC->toUpper( rStr, nPos, nCount, getMyLocale() );
312
129M
    }
313
129M
    catch ( const Exception& )
314
129M
    {
315
0
        TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
316
0
    }
317
0
    return rStr.copy( nPos, nCount );
318
129M
}
319
320
OUString CharClass::lowercase( const OUString& rStr, sal_Int32 nPos, sal_Int32 nCount ) const
321
2.57M
{
322
2.57M
    try
323
2.57M
    {
324
2.57M
        return xCC->toLower( rStr, nPos, nCount, getMyLocale() );
325
2.57M
    }
326
2.57M
    catch ( const Exception& )
327
2.57M
    {
328
0
        TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
329
0
    }
330
0
    return rStr.copy( nPos, nCount );
331
2.57M
}
332
333
sal_Int16 CharClass::getType( const OUString& rStr, sal_Int32 nPos ) const
334
8.84M
{
335
8.84M
    try
336
8.84M
    {
337
8.84M
        return xCC->getType( rStr, nPos );
338
8.84M
    }
339
8.84M
    catch ( const Exception& )
340
8.84M
    {
341
0
        TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
342
0
    }
343
0
    return 0;
344
8.84M
}
345
346
css::i18n::DirectionProperty CharClass::getCharacterDirection( const OUString& rStr, sal_Int32 nPos ) const
347
0
{
348
0
    try
349
0
    {
350
0
        return static_cast<css::i18n::DirectionProperty>(xCC->getCharacterDirection( rStr, nPos ));
351
0
    }
352
0
    catch ( const Exception& )
353
0
    {
354
0
        TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
355
0
    }
356
0
    return css::i18n::DirectionProperty_LEFT_TO_RIGHT;
357
0
}
358
359
css::i18n::UnicodeScript CharClass::getScript( const OUString& rStr, sal_Int32 nPos ) const
360
862
{
361
862
    try
362
862
    {
363
862
        return static_cast<css::i18n::UnicodeScript>(xCC->getScript( rStr, nPos ));
364
862
    }
365
862
    catch ( const Exception& )
366
862
    {
367
0
        TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
368
0
    }
369
0
    return UnicodeScript_kBasicLatin;
370
862
}
371
372
sal_Int32 CharClass::getCharacterType( const OUString& rStr, sal_Int32 nPos ) const
373
13.0k
{
374
13.0k
    try
375
13.0k
    {
376
13.0k
        return xCC->getCharacterType( rStr, nPos, getMyLocale() );
377
13.0k
    }
378
13.0k
    catch ( const Exception& )
379
13.0k
    {
380
0
        TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
381
0
    }
382
0
    return 0;
383
13.0k
}
384
385
css::i18n::ParseResult CharClass::parseAnyToken(
386
            const OUString& rStr,
387
            sal_Int32 nPos,
388
            sal_Int32 nStartCharFlags,
389
            const OUString& userDefinedCharactersStart,
390
            sal_Int32 nContCharFlags,
391
            const OUString& userDefinedCharactersCont ) const
392
9.75M
{
393
9.75M
    try
394
9.75M
    {
395
9.75M
        return xCC->parseAnyToken( rStr, nPos, getMyLocale(),
396
9.75M
                nStartCharFlags, userDefinedCharactersStart,
397
9.75M
                nContCharFlags, userDefinedCharactersCont );
398
9.75M
    }
399
9.75M
    catch ( const Exception& )
400
9.75M
    {
401
0
        TOOLS_WARN_EXCEPTION( "unotools.i18n", "parseAnyToken" );
402
0
    }
403
0
    return ParseResult();
404
9.75M
}
405
406
css::i18n::ParseResult CharClass::parsePredefinedToken(
407
            sal_Int32 nTokenType,
408
            const OUString& rStr,
409
            sal_Int32 nPos,
410
            sal_Int32 nStartCharFlags,
411
            const OUString& userDefinedCharactersStart,
412
            sal_Int32 nContCharFlags,
413
            const OUString& userDefinedCharactersCont ) const
414
8.14M
{
415
8.14M
    try
416
8.14M
    {
417
8.14M
        return xCC->parsePredefinedToken( nTokenType, rStr, nPos, getMyLocale(),
418
8.14M
                nStartCharFlags, userDefinedCharactersStart,
419
8.14M
                nContCharFlags, userDefinedCharactersCont );
420
8.14M
    }
421
8.14M
    catch ( const Exception& )
422
8.14M
    {
423
0
        TOOLS_WARN_EXCEPTION( "unotools.i18n", "parsePredefinedToken" );
424
0
    }
425
0
    return ParseResult();
426
8.14M
}
427
428
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */