/src/libreoffice/unotools/source/i18n/charclass.cxx
Line | Count | Source |
1 | | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
2 | | /* |
3 | | * This file is part of the LibreOffice project. |
4 | | * |
5 | | * This Source Code Form is subject to the terms of the Mozilla Public |
6 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
7 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8 | | * |
9 | | * This file incorporates work covered by the following license notice: |
10 | | * |
11 | | * Licensed to the Apache Software Foundation (ASF) under one or more |
12 | | * contributor license agreements. See the NOTICE file distributed |
13 | | * with this work for additional information regarding copyright |
14 | | * ownership. The ASF licenses this file to you under the Apache |
15 | | * License, Version 2.0 (the "License"); you may not use this file |
16 | | * except in compliance with the License. You may obtain a copy of |
17 | | * the License at http://www.apache.org/licenses/LICENSE-2.0 . |
18 | | */ |
19 | | |
20 | | #include <comphelper/processfactory.hxx> |
21 | | #include <unotools/charclass.hxx> |
22 | | #include <rtl/character.hxx> |
23 | | #include <comphelper/diagnose_ex.hxx> |
24 | | |
25 | | #include <com/sun/star/i18n/CharacterClassification.hpp> |
26 | | #include <com/sun/star/i18n/DirectionProperty.hpp> |
27 | | #include <com/sun/star/i18n/ParseResult.hpp> |
28 | | #include <com/sun/star/i18n/UnicodeScript.hpp> |
29 | | #include <utility> |
30 | | |
31 | | using namespace ::com::sun::star; |
32 | | using namespace ::com::sun::star::i18n; |
33 | | using namespace ::com::sun::star::uno; |
34 | | |
35 | | CharClass::CharClass( |
36 | | const Reference< uno::XComponentContext > & rxContext, |
37 | | LanguageTag aLanguageTag |
38 | | ) |
39 | 635k | : maLanguageTag(std::move( aLanguageTag)) |
40 | 635k | { |
41 | 635k | xCC = CharacterClassification::create( rxContext ); |
42 | 635k | } |
43 | | |
44 | | CharClass::CharClass( LanguageTag aLanguageTag ) |
45 | 65.5k | : maLanguageTag(std::move( aLanguageTag)) |
46 | 65.5k | { |
47 | 65.5k | xCC = CharacterClassification::create( comphelper::getProcessComponentContext() ); |
48 | 65.5k | } |
49 | | |
50 | | CharClass::~CharClass() |
51 | 700k | { |
52 | 700k | } |
53 | | |
54 | | const LanguageTag& CharClass::getLanguageTag() const |
55 | 8.46M | { |
56 | 8.46M | return maLanguageTag; |
57 | 8.46M | } |
58 | | |
59 | | const css::lang::Locale& CharClass::getMyLocale() const |
60 | 151M | { |
61 | 151M | return maLanguageTag.getLocale(); |
62 | 151M | } |
63 | | |
64 | | // static |
65 | | bool CharClass::isAsciiNumeric( std::u16string_view rStr ) |
66 | 3.16k | { |
67 | 3.16k | if ( rStr.empty() ) |
68 | 929 | return false; |
69 | 2.23k | const sal_Unicode* p = rStr.data(); |
70 | 2.23k | const sal_Unicode* const pStop = p + rStr.size(); |
71 | | |
72 | 2.23k | do |
73 | 3.15k | { |
74 | 3.15k | if ( !rtl::isAsciiDigit( *p ) ) |
75 | 1.44k | return false; |
76 | 3.15k | } |
77 | 2.23k | while ( ++p < pStop ); |
78 | | |
79 | 790 | return true; |
80 | 2.23k | } |
81 | | |
82 | | // static |
83 | | bool CharClass::isAsciiAlpha( std::u16string_view rStr ) |
84 | 0 | { |
85 | 0 | if ( rStr.empty() ) |
86 | 0 | return false; |
87 | 0 | const sal_Unicode* p = rStr.data(); |
88 | 0 | const sal_Unicode* const pStop = p + rStr.size(); |
89 | |
|
90 | 0 | do |
91 | 0 | { |
92 | 0 | if ( !rtl::isAsciiAlpha( *p ) ) |
93 | 0 | return false; |
94 | 0 | } |
95 | 0 | while ( ++p < pStop ); |
96 | | |
97 | 0 | return true; |
98 | 0 | } |
99 | | |
100 | | bool CharClass::isAlpha( const OUString& rStr, sal_Int32 nPos ) const |
101 | 0 | { |
102 | 0 | sal_Unicode c = rStr[nPos]; |
103 | 0 | if ( c < 128 ) |
104 | 0 | return rtl::isAsciiAlpha( c ); |
105 | | |
106 | 0 | try |
107 | 0 | { |
108 | 0 | return (xCC->getCharacterType( rStr, nPos, getMyLocale() ) & |
109 | 0 | nCharClassAlphaType) != 0; |
110 | 0 | } |
111 | 0 | catch ( const Exception& ) |
112 | 0 | { |
113 | 0 | TOOLS_WARN_EXCEPTION("unotools.i18n", "" ); |
114 | 0 | } |
115 | 0 | return false; |
116 | 0 | } |
117 | | |
118 | | bool CharClass::isLetter( const OUString& rStr, sal_Int32 nPos ) const |
119 | 72.8M | { |
120 | 72.8M | sal_Unicode c = rStr[nPos]; |
121 | 72.8M | if ( c < 128 ) |
122 | 72.2M | return rtl::isAsciiAlpha( c ); |
123 | | |
124 | 526k | try |
125 | 526k | { |
126 | 526k | return (xCC->getCharacterType( rStr, nPos, getMyLocale() ) & |
127 | 526k | nCharClassLetterType) != 0; |
128 | 526k | } |
129 | 526k | catch ( const Exception& ) |
130 | 526k | { |
131 | 0 | TOOLS_WARN_EXCEPTION("unotools.i18n", "" ); |
132 | 0 | } |
133 | 0 | return false; |
134 | 526k | } |
135 | | |
136 | | bool CharClass::isLetter( const OUString& rStr ) const |
137 | 0 | { |
138 | 0 | if (rStr.isEmpty()) |
139 | 0 | return false; |
140 | | |
141 | 0 | try |
142 | 0 | { |
143 | 0 | sal_Int32 nPos = 0; |
144 | 0 | while (nPos < rStr.getLength()) |
145 | 0 | { |
146 | 0 | if (!isLetter( rStr, nPos)) |
147 | 0 | return false; |
148 | 0 | rStr.iterateCodePoints( &nPos); |
149 | 0 | } |
150 | 0 | return true; |
151 | 0 | } |
152 | 0 | catch ( const Exception& ) |
153 | 0 | { |
154 | 0 | TOOLS_WARN_EXCEPTION("unotools.i18n", "" ); |
155 | 0 | } |
156 | 0 | return false; |
157 | 0 | } |
158 | | |
159 | | bool CharClass::isDigit( const OUString& rStr, sal_Int32 nPos ) const |
160 | 3.12M | { |
161 | 3.12M | sal_Unicode c = rStr[ nPos ]; |
162 | 3.12M | if ( c < 128 ) |
163 | 1.61M | return rtl::isAsciiDigit( c ); |
164 | | |
165 | 1.50M | try |
166 | 1.50M | { |
167 | 1.50M | return (xCC->getCharacterType( rStr, nPos, getMyLocale() ) & |
168 | 1.50M | KCharacterType::DIGIT) != 0; |
169 | 1.50M | } |
170 | 1.50M | catch ( const Exception& ) |
171 | 1.50M | { |
172 | 0 | TOOLS_WARN_EXCEPTION("unotools.i18n", "" ); |
173 | 0 | } |
174 | 0 | return false; |
175 | 1.50M | } |
176 | | |
177 | | bool CharClass::isNumeric( const OUString& rStr ) const |
178 | 0 | { |
179 | 0 | if (rStr.isEmpty()) |
180 | 0 | return false; |
181 | | |
182 | 0 | try |
183 | 0 | { |
184 | 0 | sal_Int32 nPos = 0; |
185 | 0 | while (nPos < rStr.getLength()) |
186 | 0 | { |
187 | 0 | if (!isDigit( rStr, nPos)) |
188 | 0 | return false; |
189 | 0 | rStr.iterateCodePoints( &nPos); |
190 | 0 | } |
191 | 0 | return true; |
192 | 0 | } |
193 | 0 | catch ( const Exception& ) |
194 | 0 | { |
195 | 0 | TOOLS_WARN_EXCEPTION("unotools.i18n", "" ); |
196 | 0 | } |
197 | 0 | return false; |
198 | 0 | } |
199 | | |
200 | | bool CharClass::isAlphaNumeric( const OUString& rStr, sal_Int32 nPos ) const |
201 | 0 | { |
202 | 0 | sal_Unicode c = rStr[nPos]; |
203 | 0 | if ( c < 128 ) |
204 | 0 | return rtl::isAsciiAlphanumeric( c ); |
205 | | |
206 | 0 | try |
207 | 0 | { |
208 | 0 | return (xCC->getCharacterType( rStr, nPos, getMyLocale() ) & |
209 | 0 | (nCharClassAlphaType | nCharClassNumericType)) != 0; |
210 | 0 | } |
211 | 0 | catch ( const Exception& ) |
212 | 0 | { |
213 | 0 | TOOLS_WARN_EXCEPTION("unotools.i18n", "" ); |
214 | 0 | } |
215 | 0 | return false; |
216 | 0 | } |
217 | | |
218 | | bool CharClass::isLetterNumeric( const OUString& rStr, sal_Int32 nPos ) const |
219 | 827k | { |
220 | 827k | sal_Unicode c = rStr[nPos]; |
221 | 827k | if ( c < 128 ) |
222 | 713k | return rtl::isAsciiAlphanumeric( c ); |
223 | | |
224 | 114k | try |
225 | 114k | { |
226 | 114k | return (xCC->getCharacterType( rStr, nPos, getMyLocale() ) & |
227 | 114k | (nCharClassLetterType | nCharClassNumericType)) != 0; |
228 | 114k | } |
229 | 114k | catch ( const Exception& ) |
230 | 114k | { |
231 | 0 | TOOLS_WARN_EXCEPTION("unotools.i18n", "" ); |
232 | 0 | } |
233 | 0 | return false; |
234 | 114k | } |
235 | | |
236 | | bool CharClass::isLetterNumeric( const OUString& rStr ) const |
237 | 0 | { |
238 | 0 | if (rStr.isEmpty()) |
239 | 0 | return false; |
240 | | |
241 | 0 | try |
242 | 0 | { |
243 | 0 | sal_Int32 nPos = 0; |
244 | 0 | while (nPos < rStr.getLength()) |
245 | 0 | { |
246 | 0 | if (!isLetterNumeric( rStr, nPos)) |
247 | 0 | return false; |
248 | 0 | rStr.iterateCodePoints( &nPos); |
249 | 0 | } |
250 | 0 | return true; |
251 | 0 | } |
252 | 0 | catch ( const Exception& ) |
253 | 0 | { |
254 | 0 | TOOLS_WARN_EXCEPTION("unotools.i18n", "" ); |
255 | 0 | } |
256 | 0 | return false; |
257 | 0 | } |
258 | | |
259 | | bool CharClass::isBase( const OUString& rStr, sal_Int32 nPos ) const |
260 | 0 | { |
261 | 0 | sal_Unicode c = rStr[nPos]; |
262 | 0 | if ( c < 128 ) |
263 | 0 | return rtl::isAsciiAlphanumeric( c ); |
264 | | |
265 | 0 | try |
266 | 0 | { |
267 | 0 | return (xCC->getCharacterType( rStr, nPos, getMyLocale() ) & nCharClassBaseType ) != 0; |
268 | 0 | } |
269 | 0 | catch ( const Exception& ) |
270 | 0 | { |
271 | 0 | TOOLS_WARN_EXCEPTION("unotools.i18n", "" ); |
272 | 0 | } |
273 | 0 | return false; |
274 | 0 | } |
275 | | |
276 | | bool CharClass::isUpper( const OUString& rStr, sal_Int32 nPos ) const |
277 | 0 | { |
278 | 0 | sal_Unicode c = rStr[nPos]; |
279 | 0 | if ( c < 128 ) |
280 | 0 | return rtl::isAsciiUpperCase(c); |
281 | | |
282 | 0 | try |
283 | 0 | { |
284 | 0 | return (xCC->getCharacterType( rStr, nPos, getMyLocale()) & |
285 | 0 | KCharacterType::UPPER) != 0; |
286 | 0 | } |
287 | 0 | catch ( const Exception& ) |
288 | 0 | { |
289 | 0 | TOOLS_WARN_EXCEPTION("unotools.i18n", "" ); |
290 | 0 | } |
291 | 0 | return false; |
292 | 0 | } |
293 | | |
294 | | OUString CharClass::titlecase(const OUString& rStr, sal_Int32 nPos, sal_Int32 nCount) const |
295 | 0 | { |
296 | 0 | try |
297 | 0 | { |
298 | 0 | return xCC->toTitle( rStr, nPos, nCount, getMyLocale() ); |
299 | 0 | } |
300 | 0 | catch ( const Exception& ) |
301 | 0 | { |
302 | 0 | TOOLS_WARN_EXCEPTION("unotools.i18n", "" ); |
303 | 0 | } |
304 | 0 | return rStr.copy( nPos, nCount ); |
305 | 0 | } |
306 | | |
307 | | OUString CharClass::uppercase( const OUString& rStr, sal_Int32 nPos, sal_Int32 nCount ) const |
308 | 129M | { |
309 | 129M | try |
310 | 129M | { |
311 | 129M | return xCC->toUpper( rStr, nPos, nCount, getMyLocale() ); |
312 | 129M | } |
313 | 129M | catch ( const Exception& ) |
314 | 129M | { |
315 | 0 | TOOLS_WARN_EXCEPTION("unotools.i18n", "" ); |
316 | 0 | } |
317 | 0 | return rStr.copy( nPos, nCount ); |
318 | 129M | } |
319 | | |
320 | | OUString CharClass::lowercase( const OUString& rStr, sal_Int32 nPos, sal_Int32 nCount ) const |
321 | 2.57M | { |
322 | 2.57M | try |
323 | 2.57M | { |
324 | 2.57M | return xCC->toLower( rStr, nPos, nCount, getMyLocale() ); |
325 | 2.57M | } |
326 | 2.57M | catch ( const Exception& ) |
327 | 2.57M | { |
328 | 0 | TOOLS_WARN_EXCEPTION("unotools.i18n", "" ); |
329 | 0 | } |
330 | 0 | return rStr.copy( nPos, nCount ); |
331 | 2.57M | } |
332 | | |
333 | | sal_Int16 CharClass::getType( const OUString& rStr, sal_Int32 nPos ) const |
334 | 8.84M | { |
335 | 8.84M | try |
336 | 8.84M | { |
337 | 8.84M | return xCC->getType( rStr, nPos ); |
338 | 8.84M | } |
339 | 8.84M | catch ( const Exception& ) |
340 | 8.84M | { |
341 | 0 | TOOLS_WARN_EXCEPTION("unotools.i18n", "" ); |
342 | 0 | } |
343 | 0 | return 0; |
344 | 8.84M | } |
345 | | |
346 | | css::i18n::DirectionProperty CharClass::getCharacterDirection( const OUString& rStr, sal_Int32 nPos ) const |
347 | 0 | { |
348 | 0 | try |
349 | 0 | { |
350 | 0 | return static_cast<css::i18n::DirectionProperty>(xCC->getCharacterDirection( rStr, nPos )); |
351 | 0 | } |
352 | 0 | catch ( const Exception& ) |
353 | 0 | { |
354 | 0 | TOOLS_WARN_EXCEPTION("unotools.i18n", "" ); |
355 | 0 | } |
356 | 0 | return css::i18n::DirectionProperty_LEFT_TO_RIGHT; |
357 | 0 | } |
358 | | |
359 | | css::i18n::UnicodeScript CharClass::getScript( const OUString& rStr, sal_Int32 nPos ) const |
360 | 862 | { |
361 | 862 | try |
362 | 862 | { |
363 | 862 | return static_cast<css::i18n::UnicodeScript>(xCC->getScript( rStr, nPos )); |
364 | 862 | } |
365 | 862 | catch ( const Exception& ) |
366 | 862 | { |
367 | 0 | TOOLS_WARN_EXCEPTION("unotools.i18n", "" ); |
368 | 0 | } |
369 | 0 | return UnicodeScript_kBasicLatin; |
370 | 862 | } |
371 | | |
372 | | sal_Int32 CharClass::getCharacterType( const OUString& rStr, sal_Int32 nPos ) const |
373 | 13.0k | { |
374 | 13.0k | try |
375 | 13.0k | { |
376 | 13.0k | return xCC->getCharacterType( rStr, nPos, getMyLocale() ); |
377 | 13.0k | } |
378 | 13.0k | catch ( const Exception& ) |
379 | 13.0k | { |
380 | 0 | TOOLS_WARN_EXCEPTION("unotools.i18n", "" ); |
381 | 0 | } |
382 | 0 | return 0; |
383 | 13.0k | } |
384 | | |
385 | | css::i18n::ParseResult CharClass::parseAnyToken( |
386 | | const OUString& rStr, |
387 | | sal_Int32 nPos, |
388 | | sal_Int32 nStartCharFlags, |
389 | | const OUString& userDefinedCharactersStart, |
390 | | sal_Int32 nContCharFlags, |
391 | | const OUString& userDefinedCharactersCont ) const |
392 | 9.75M | { |
393 | 9.75M | try |
394 | 9.75M | { |
395 | 9.75M | return xCC->parseAnyToken( rStr, nPos, getMyLocale(), |
396 | 9.75M | nStartCharFlags, userDefinedCharactersStart, |
397 | 9.75M | nContCharFlags, userDefinedCharactersCont ); |
398 | 9.75M | } |
399 | 9.75M | catch ( const Exception& ) |
400 | 9.75M | { |
401 | 0 | TOOLS_WARN_EXCEPTION( "unotools.i18n", "parseAnyToken" ); |
402 | 0 | } |
403 | 0 | return ParseResult(); |
404 | 9.75M | } |
405 | | |
406 | | css::i18n::ParseResult CharClass::parsePredefinedToken( |
407 | | sal_Int32 nTokenType, |
408 | | const OUString& rStr, |
409 | | sal_Int32 nPos, |
410 | | sal_Int32 nStartCharFlags, |
411 | | const OUString& userDefinedCharactersStart, |
412 | | sal_Int32 nContCharFlags, |
413 | | const OUString& userDefinedCharactersCont ) const |
414 | 8.14M | { |
415 | 8.14M | try |
416 | 8.14M | { |
417 | 8.14M | return xCC->parsePredefinedToken( nTokenType, rStr, nPos, getMyLocale(), |
418 | 8.14M | nStartCharFlags, userDefinedCharactersStart, |
419 | 8.14M | nContCharFlags, userDefinedCharactersCont ); |
420 | 8.14M | } |
421 | 8.14M | catch ( const Exception& ) |
422 | 8.14M | { |
423 | 0 | TOOLS_WARN_EXCEPTION( "unotools.i18n", "parsePredefinedToken" ); |
424 | 0 | } |
425 | 0 | return ParseResult(); |
426 | 8.14M | } |
427 | | |
428 | | /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |