Coverage Report

Created: 2025-12-08 09:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libreoffice/unotools/source/i18n/charclass.cxx
Line
Count
Source
1
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
/*
3
 * This file is part of the LibreOffice project.
4
 *
5
 * This Source Code Form is subject to the terms of the Mozilla Public
6
 * License, v. 2.0. If a copy of the MPL was not distributed with this
7
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
 *
9
 * This file incorporates work covered by the following license notice:
10
 *
11
 *   Licensed to the Apache Software Foundation (ASF) under one or more
12
 *   contributor license agreements. See the NOTICE file distributed
13
 *   with this work for additional information regarding copyright
14
 *   ownership. The ASF licenses this file to you under the Apache
15
 *   License, Version 2.0 (the "License"); you may not use this file
16
 *   except in compliance with the License. You may obtain a copy of
17
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18
 */
19
20
#include <comphelper/processfactory.hxx>
21
#include <unotools/charclass.hxx>
22
#include <rtl/character.hxx>
23
#include <comphelper/diagnose_ex.hxx>
24
25
#include <com/sun/star/i18n/CharacterClassification.hpp>
26
#include <utility>
27
28
using namespace ::com::sun::star;
29
using namespace ::com::sun::star::i18n;
30
using namespace ::com::sun::star::uno;
31
32
CharClass::CharClass(
33
            const Reference< uno::XComponentContext > & rxContext,
34
            LanguageTag aLanguageTag
35
            )
36
691k
    : maLanguageTag(std::move( aLanguageTag))
37
691k
{
38
691k
    xCC = CharacterClassification::create( rxContext );
39
691k
}
40
41
CharClass::CharClass( LanguageTag aLanguageTag )
42
80.1k
    : maLanguageTag(std::move( aLanguageTag))
43
80.1k
{
44
80.1k
    xCC = CharacterClassification::create( comphelper::getProcessComponentContext() );
45
80.1k
}
46
47
CharClass::~CharClass()
48
771k
{
49
771k
}
50
51
const LanguageTag& CharClass::getLanguageTag() const
52
8.06M
{
53
8.06M
    return maLanguageTag;
54
8.06M
}
55
56
const css::lang::Locale& CharClass::getMyLocale() const
57
172M
{
58
172M
    return maLanguageTag.getLocale();
59
172M
}
60
61
// static
62
bool CharClass::isAsciiNumeric( std::u16string_view rStr )
63
87.3k
{
64
87.3k
    if ( rStr.empty() )
65
350
        return false;
66
87.0k
    const sal_Unicode* p = rStr.data();
67
87.0k
    const sal_Unicode* const pStop = p + rStr.size();
68
69
87.0k
    do
70
88.4k
    {
71
88.4k
        if ( !rtl::isAsciiDigit( *p ) )
72
86.2k
            return false;
73
88.4k
    }
74
87.0k
    while ( ++p < pStop );
75
76
788
    return true;
77
87.0k
}
78
79
// static
80
bool CharClass::isAsciiAlpha( std::u16string_view rStr )
81
0
{
82
0
    if ( rStr.empty() )
83
0
        return false;
84
0
    const sal_Unicode* p = rStr.data();
85
0
    const sal_Unicode* const pStop = p + rStr.size();
86
87
0
    do
88
0
    {
89
0
        if ( !rtl::isAsciiAlpha( *p ) )
90
0
            return false;
91
0
    }
92
0
    while ( ++p < pStop );
93
94
0
    return true;
95
0
}
96
97
bool CharClass::isAlpha( const OUString& rStr, sal_Int32 nPos ) const
98
0
{
99
0
    sal_Unicode c = rStr[nPos];
100
0
    if ( c < 128 )
101
0
        return rtl::isAsciiAlpha( c );
102
103
0
    try
104
0
    {
105
0
        return  (xCC->getCharacterType( rStr, nPos, getMyLocale() ) &
106
0
                 nCharClassAlphaType) != 0;
107
0
    }
108
0
    catch ( const Exception& )
109
0
    {
110
0
        TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
111
0
    }
112
0
    return false;
113
0
}
114
115
bool CharClass::isLetter( const OUString& rStr, sal_Int32 nPos ) const
116
81.0M
{
117
81.0M
    sal_Unicode c = rStr[nPos];
118
81.0M
    if ( c < 128 )
119
80.5M
        return rtl::isAsciiAlpha( c );
120
121
508k
    try
122
508k
    {
123
508k
        return  (xCC->getCharacterType( rStr, nPos, getMyLocale() ) &
124
508k
                 nCharClassLetterType) != 0;
125
508k
    }
126
508k
    catch ( const Exception& )
127
508k
    {
128
0
        TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
129
0
    }
130
0
    return false;
131
508k
}
132
133
bool CharClass::isLetter( const OUString& rStr ) const
134
0
{
135
0
    if (rStr.isEmpty())
136
0
        return false;
137
138
0
    try
139
0
    {
140
0
        sal_Int32 nPos = 0;
141
0
        while (nPos < rStr.getLength())
142
0
        {
143
0
            if (!isLetter( rStr, nPos))
144
0
                return false;
145
0
            rStr.iterateCodePoints( &nPos);
146
0
        }
147
0
        return true;
148
0
    }
149
0
    catch ( const Exception& )
150
0
    {
151
0
        TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
152
0
    }
153
0
    return false;
154
0
}
155
156
bool CharClass::isDigit( const OUString& rStr, sal_Int32 nPos ) const
157
3.11M
{
158
3.11M
    sal_Unicode c = rStr[ nPos ];
159
3.11M
    if ( c < 128 )
160
1.63M
        return rtl::isAsciiDigit( c );
161
162
1.47M
    try
163
1.47M
    {
164
1.47M
        return  (xCC->getCharacterType( rStr, nPos, getMyLocale() ) &
165
1.47M
                 KCharacterType::DIGIT) != 0;
166
1.47M
    }
167
1.47M
    catch ( const Exception& )
168
1.47M
    {
169
0
        TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
170
0
    }
171
0
    return false;
172
1.47M
}
173
174
bool CharClass::isNumeric( const OUString& rStr ) const
175
0
{
176
0
    if (rStr.isEmpty())
177
0
        return false;
178
179
0
    try
180
0
    {
181
0
        sal_Int32 nPos = 0;
182
0
        while (nPos < rStr.getLength())
183
0
        {
184
0
            if (!isDigit( rStr, nPos))
185
0
                return false;
186
0
            rStr.iterateCodePoints( &nPos);
187
0
        }
188
0
        return true;
189
0
    }
190
0
    catch ( const Exception& )
191
0
    {
192
0
        TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
193
0
    }
194
0
    return false;
195
0
}
196
197
bool CharClass::isAlphaNumeric( const OUString& rStr, sal_Int32 nPos ) const
198
0
{
199
0
    sal_Unicode c = rStr[nPos];
200
0
    if ( c < 128 )
201
0
        return rtl::isAsciiAlphanumeric( c );
202
203
0
    try
204
0
    {
205
0
        return  (xCC->getCharacterType( rStr, nPos, getMyLocale() ) &
206
0
                (nCharClassAlphaType | nCharClassNumericType)) != 0;
207
0
    }
208
0
    catch ( const Exception& )
209
0
    {
210
0
        TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
211
0
    }
212
0
    return false;
213
0
}
214
215
bool CharClass::isLetterNumeric( const OUString& rStr, sal_Int32 nPos ) const
216
1.01M
{
217
1.01M
    sal_Unicode c = rStr[nPos];
218
1.01M
    if ( c < 128 )
219
916k
        return rtl::isAsciiAlphanumeric( c );
220
221
99.0k
    try
222
99.0k
    {
223
99.0k
        return  (xCC->getCharacterType( rStr, nPos, getMyLocale() ) &
224
99.0k
                (nCharClassLetterType | nCharClassNumericType)) != 0;
225
99.0k
    }
226
99.0k
    catch ( const Exception& )
227
99.0k
    {
228
0
        TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
229
0
    }
230
0
    return false;
231
99.0k
}
232
233
bool CharClass::isLetterNumeric( const OUString& rStr ) const
234
2.94k
{
235
2.94k
    if (rStr.isEmpty())
236
0
        return false;
237
238
2.94k
    try
239
2.94k
    {
240
2.94k
        sal_Int32 nPos = 0;
241
4.35k
        while (nPos < rStr.getLength())
242
2.94k
        {
243
2.94k
            if (!isLetterNumeric( rStr, nPos))
244
1.54k
                return false;
245
1.40k
            rStr.iterateCodePoints( &nPos);
246
1.40k
        }
247
1.40k
        return true;
248
2.94k
    }
249
2.94k
    catch ( const Exception& )
250
2.94k
    {
251
0
        TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
252
0
    }
253
0
    return false;
254
2.94k
}
255
256
bool CharClass::isBase( const OUString& rStr, sal_Int32 nPos ) const
257
0
{
258
0
    sal_Unicode c = rStr[nPos];
259
0
    if ( c < 128 )
260
0
        return rtl::isAsciiAlphanumeric( c );
261
262
0
    try
263
0
    {
264
0
        return  (xCC->getCharacterType( rStr, nPos, getMyLocale() ) & nCharClassBaseType ) != 0;
265
0
    }
266
0
    catch ( const Exception& )
267
0
    {
268
0
        TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
269
0
    }
270
0
    return false;
271
0
}
272
273
bool CharClass::isUpper( const OUString& rStr, sal_Int32 nPos ) const
274
0
{
275
0
    sal_Unicode c = rStr[nPos];
276
0
    if ( c < 128 )
277
0
        return rtl::isAsciiUpperCase(c);
278
279
0
    try
280
0
    {
281
0
        return (xCC->getCharacterType( rStr, nPos, getMyLocale()) &
282
0
                KCharacterType::UPPER) != 0;
283
0
    }
284
0
    catch ( const Exception& )
285
0
    {
286
0
        TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
287
0
    }
288
0
    return false;
289
0
}
290
291
OUString CharClass::titlecase(const OUString& rStr, sal_Int32 nPos, sal_Int32 nCount) const
292
0
{
293
0
    try
294
0
    {
295
0
        return xCC->toTitle( rStr, nPos, nCount, getMyLocale() );
296
0
    }
297
0
    catch ( const Exception& )
298
0
    {
299
0
        TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
300
0
    }
301
0
    return rStr.copy( nPos, nCount );
302
0
}
303
304
OUString CharClass::uppercase( const OUString& rStr, sal_Int32 nPos, sal_Int32 nCount ) const
305
143M
{
306
143M
    try
307
143M
    {
308
143M
        return xCC->toUpper( rStr, nPos, nCount, getMyLocale() );
309
143M
    }
310
143M
    catch ( const Exception& )
311
143M
    {
312
0
        TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
313
0
    }
314
0
    return rStr.copy( nPos, nCount );
315
143M
}
316
317
OUString CharClass::lowercase( const OUString& rStr, sal_Int32 nPos, sal_Int32 nCount ) const
318
2.58M
{
319
2.58M
    try
320
2.58M
    {
321
2.58M
        return xCC->toLower( rStr, nPos, nCount, getMyLocale() );
322
2.58M
    }
323
2.58M
    catch ( const Exception& )
324
2.58M
    {
325
0
        TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
326
0
    }
327
0
    return rStr.copy( nPos, nCount );
328
2.58M
}
329
330
sal_Int16 CharClass::getType( const OUString& rStr, sal_Int32 nPos ) const
331
12.4M
{
332
12.4M
    try
333
12.4M
    {
334
12.4M
        return xCC->getType( rStr, nPos );
335
12.4M
    }
336
12.4M
    catch ( const Exception& )
337
12.4M
    {
338
0
        TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
339
0
    }
340
0
    return 0;
341
12.4M
}
342
343
css::i18n::DirectionProperty CharClass::getCharacterDirection( const OUString& rStr, sal_Int32 nPos ) const
344
0
{
345
0
    try
346
0
    {
347
0
        return static_cast<css::i18n::DirectionProperty>(xCC->getCharacterDirection( rStr, nPos ));
348
0
    }
349
0
    catch ( const Exception& )
350
0
    {
351
0
        TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
352
0
    }
353
0
    return css::i18n::DirectionProperty_LEFT_TO_RIGHT;
354
0
}
355
356
css::i18n::UnicodeScript CharClass::getScript( const OUString& rStr, sal_Int32 nPos ) const
357
4.20k
{
358
4.20k
    try
359
4.20k
    {
360
4.20k
        return static_cast<css::i18n::UnicodeScript>(xCC->getScript( rStr, nPos ));
361
4.20k
    }
362
4.20k
    catch ( const Exception& )
363
4.20k
    {
364
0
        TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
365
0
    }
366
0
    return UnicodeScript_kBasicLatin;
367
4.20k
}
368
369
sal_Int32 CharClass::getCharacterType( const OUString& rStr, sal_Int32 nPos ) const
370
13.1k
{
371
13.1k
    try
372
13.1k
    {
373
13.1k
        return xCC->getCharacterType( rStr, nPos, getMyLocale() );
374
13.1k
    }
375
13.1k
    catch ( const Exception& )
376
13.1k
    {
377
0
        TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
378
0
    }
379
0
    return 0;
380
13.1k
}
381
382
css::i18n::ParseResult CharClass::parseAnyToken(
383
            const OUString& rStr,
384
            sal_Int32 nPos,
385
            sal_Int32 nStartCharFlags,
386
            const OUString& userDefinedCharactersStart,
387
            sal_Int32 nContCharFlags,
388
            const OUString& userDefinedCharactersCont ) const
389
12.8M
{
390
12.8M
    try
391
12.8M
    {
392
12.8M
        return xCC->parseAnyToken( rStr, nPos, getMyLocale(),
393
12.8M
                nStartCharFlags, userDefinedCharactersStart,
394
12.8M
                nContCharFlags, userDefinedCharactersCont );
395
12.8M
    }
396
12.8M
    catch ( const Exception& )
397
12.8M
    {
398
0
        TOOLS_WARN_EXCEPTION( "unotools.i18n", "parseAnyToken" );
399
0
    }
400
0
    return ParseResult();
401
12.8M
}
402
403
css::i18n::ParseResult CharClass::parsePredefinedToken(
404
            sal_Int32 nTokenType,
405
            const OUString& rStr,
406
            sal_Int32 nPos,
407
            sal_Int32 nStartCharFlags,
408
            const OUString& userDefinedCharactersStart,
409
            sal_Int32 nContCharFlags,
410
            const OUString& userDefinedCharactersCont ) const
411
11.3M
{
412
11.3M
    try
413
11.3M
    {
414
11.3M
        return xCC->parsePredefinedToken( nTokenType, rStr, nPos, getMyLocale(),
415
11.3M
                nStartCharFlags, userDefinedCharactersStart,
416
11.3M
                nContCharFlags, userDefinedCharactersCont );
417
11.3M
    }
418
11.3M
    catch ( const Exception& )
419
11.3M
    {
420
0
        TOOLS_WARN_EXCEPTION( "unotools.i18n", "parsePredefinedToken" );
421
0
    }
422
0
    return ParseResult();
423
11.3M
}
424
425
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */