/src/libreoffice/linguistic/source/misc.cxx
Line | Count | Source |
1 | | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
2 | | /* |
3 | | * This file is part of the LibreOffice project. |
4 | | * |
5 | | * This Source Code Form is subject to the terms of the Mozilla Public |
6 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
7 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8 | | * |
9 | | * This file incorporates work covered by the following license notice: |
10 | | * |
11 | | * Licensed to the Apache Software Foundation (ASF) under one or more |
12 | | * contributor license agreements. See the NOTICE file distributed |
13 | | * with this work for additional information regarding copyright |
14 | | * ownership. The ASF licenses this file to you under the Apache |
15 | | * License, Version 2.0 (the "License"); you may not use this file754 |
16 | | * except in compliance with the License. You may obtain a copy of |
17 | | * the License at http://www.apache.org/licenses/LICENSE-2.0 . |
18 | | */ |
19 | | |
20 | | #include <memory> |
21 | | #include <optional> |
22 | | #include <sal/log.hxx> |
23 | | #include <svl/lngmisc.hxx> |
24 | | #include <ucbhelper/content.hxx> |
25 | | #include <i18nlangtag/languagetag.hxx> |
26 | | #include <com/sun/star/beans/XPropertySet.hpp> |
27 | | #include <com/sun/star/beans/XFastPropertySet.hpp> |
28 | | #include <com/sun/star/beans/PropertyValues.hpp> |
29 | | #include <com/sun/star/frame/Desktop.hpp> |
30 | | #include <com/sun/star/frame/XStorable.hpp> |
31 | | #include <com/sun/star/linguistic2/DictionaryType.hpp> |
32 | | #include <com/sun/star/linguistic2/DictionaryList.hpp> |
33 | | #include <com/sun/star/linguistic2/LinguProperties.hpp> |
34 | | #include <com/sun/star/ucb/XCommandEnvironment.hpp> |
35 | | #include <com/sun/star/uno/Sequence.hxx> |
36 | | #include <com/sun/star/uno/Reference.h> |
37 | | #include <comphelper/lok.hxx> |
38 | | #include <comphelper/processfactory.hxx> |
39 | | #include <comphelper/sequence.hxx> |
40 | | #include <unotools/charclass.hxx> |
41 | | #include <unotools/linguprops.hxx> |
42 | | #include <unotools/localedatawrapper.hxx> |
43 | | #include <svtools/strings.hrc> |
44 | | #include <unotools/resmgr.hxx> |
45 | | #include <o3tl/string_view.hxx> |
46 | | |
47 | | #include <linguistic/misc.hxx> |
48 | | #include <linguistic/hyphdta.hxx> |
49 | | |
50 | | using namespace osl; |
51 | | using namespace com::sun::star; |
52 | | using namespace com::sun::star::beans; |
53 | | using namespace com::sun::star::lang; |
54 | | using namespace com::sun::star::uno; |
55 | | using namespace com::sun::star::i18n; |
56 | | using namespace com::sun::star::linguistic2; |
57 | | |
58 | | namespace linguistic |
59 | | { |
60 | | |
61 | | //!! multi-thread safe mutex for all platforms !! |
62 | | osl::Mutex & GetLinguMutex() |
63 | 0 | { |
64 | 0 | static osl::Mutex SINGLETON; |
65 | 0 | return SINGLETON; |
66 | 0 | } |
67 | | |
68 | | const LocaleDataWrapper & GetLocaleDataWrapper( LanguageType nLang ) |
69 | 0 | { |
70 | 0 | static std::optional<LocaleDataWrapper> oLclDtaWrp; |
71 | 0 | if (!oLclDtaWrp || oLclDtaWrp->getLoadedLanguageTag().getLanguageType() != nLang) |
72 | 0 | oLclDtaWrp.emplace(LanguageTag( nLang )); |
73 | 0 | return *oLclDtaWrp; |
74 | 0 | } |
75 | | |
76 | | LanguageType LinguLocaleToLanguage( const css::lang::Locale& rLocale ) |
77 | 0 | { |
78 | 0 | if ( rLocale.Language.isEmpty() ) |
79 | 0 | return LANGUAGE_NONE; |
80 | 0 | return LanguageTag::convertToLanguageType( rLocale ); |
81 | 0 | } |
82 | | |
83 | | css::lang::Locale LinguLanguageToLocale( LanguageType nLanguage ) |
84 | 0 | { |
85 | 0 | if (nLanguage == LANGUAGE_NONE) |
86 | 0 | return css::lang::Locale(); |
87 | 0 | return LanguageTag::convertToLocale( nLanguage); |
88 | 0 | } |
89 | | |
90 | | bool LinguIsUnspecified( LanguageType nLanguage ) |
91 | 0 | { |
92 | 0 | return nLanguage.anyOf( |
93 | 0 | LANGUAGE_NONE, |
94 | 0 | LANGUAGE_UNDETERMINED, |
95 | 0 | LANGUAGE_MULTIPLE); |
96 | 0 | } |
97 | | |
98 | | // When adding anything keep both LinguIsUnspecified() methods in sync! |
99 | | // For mappings between language code string and LanguageType see |
100 | | // i18nlangtag/source/isolang/isolang.cxx |
101 | | |
102 | | bool LinguIsUnspecified( std::u16string_view rBcp47 ) |
103 | 0 | { |
104 | 0 | if (rBcp47.size() != 3) |
105 | 0 | return false; |
106 | 0 | return rBcp47 == u"zxx" || rBcp47 == u"und" || rBcp47 == u"mul"; |
107 | 0 | } |
108 | | |
109 | | static sal_Int32 Minimum( sal_Int32 n1, sal_Int32 n2, sal_Int32 n3 ) |
110 | 0 | { |
111 | 0 | return std::min(std::min(n1, n2), n3); |
112 | 0 | } |
113 | | |
114 | | namespace { |
115 | | |
116 | | class IntArray2D |
117 | | { |
118 | | private: |
119 | | std::unique_ptr<sal_Int32[]> pData; |
120 | | int n1, n2; |
121 | | |
122 | | public: |
123 | | IntArray2D( int nDim1, int nDim2 ); |
124 | | |
125 | | sal_Int32 & Value( int i, int k ); |
126 | | }; |
127 | | |
128 | | } |
129 | | |
130 | | IntArray2D::IntArray2D( int nDim1, int nDim2 ) |
131 | 0 | { |
132 | 0 | n1 = nDim1; |
133 | 0 | n2 = nDim2; |
134 | 0 | pData.reset( new sal_Int32[n1 * n2] ); |
135 | 0 | } |
136 | | |
137 | | sal_Int32 & IntArray2D::Value( int i, int k ) |
138 | 0 | { |
139 | 0 | assert( (0 <= i && i < n1) && "first index out of range" ); |
140 | 0 | assert( (0 <= k && k < n2) && "second index out of range" ); |
141 | 0 | assert( (i * n2 + k < n1 * n2) && "index out of range" ); |
142 | 0 | return pData[ i * n2 + k ]; |
143 | 0 | } |
144 | | |
145 | | sal_Int32 LevDistance( std::u16string_view rTxt1, std::u16string_view rTxt2 ) |
146 | 0 | { |
147 | 0 | sal_Int32 nLen1 = rTxt1.size(); |
148 | 0 | sal_Int32 nLen2 = rTxt2.size(); |
149 | |
|
150 | 0 | if (nLen1 == 0) |
151 | 0 | return nLen2; |
152 | 0 | if (nLen2 == 0) |
153 | 0 | return nLen1; |
154 | | |
155 | 0 | IntArray2D aData( nLen1 + 1, nLen2 + 1 ); |
156 | |
|
157 | 0 | sal_Int32 i, k; |
158 | 0 | for (i = 0; i <= nLen1; ++i) |
159 | 0 | aData.Value(i, 0) = i; |
160 | 0 | for (k = 0; k <= nLen2; ++k) |
161 | 0 | aData.Value(0, k) = k; |
162 | 0 | for (i = 1; i <= nLen1; ++i) |
163 | 0 | { |
164 | 0 | for (k = 1; k <= nLen2; ++k) |
165 | 0 | { |
166 | 0 | sal_Unicode c1i = rTxt1[i - 1]; |
167 | 0 | sal_Unicode c2k = rTxt2[k - 1]; |
168 | 0 | sal_Int32 nCost = c1i == c2k ? 0 : 1; |
169 | 0 | sal_Int32 nNew = Minimum( aData.Value(i-1, k ) + 1, |
170 | 0 | aData.Value(i , k-1) + 1, |
171 | 0 | aData.Value(i-1, k-1) + nCost ); |
172 | | // take transposition (exchange with left or right char) in account |
173 | 0 | if (2 < i && 2 < k) |
174 | 0 | { |
175 | 0 | int nT = aData.Value(i-2, k-2) + 1; |
176 | 0 | if (rTxt1[i - 2] != c1i) |
177 | 0 | ++nT; |
178 | 0 | if (rTxt2[k - 2] != c2k) |
179 | 0 | ++nT; |
180 | 0 | if (nT < nNew) |
181 | 0 | nNew = nT; |
182 | 0 | } |
183 | |
|
184 | 0 | aData.Value(i, k) = nNew; |
185 | 0 | } |
186 | 0 | } |
187 | 0 | sal_Int32 nDist = aData.Value(nLen1, nLen2); |
188 | 0 | return nDist; |
189 | 0 | } |
190 | | |
191 | | bool IsUseDicList( const PropertyValues &rProperties, |
192 | | const uno::Reference< XPropertySet > &rxProp ) |
193 | 0 | { |
194 | 0 | bool bRes = true; |
195 | |
|
196 | 0 | const PropertyValue *pVal = std::find_if(rProperties.begin(), rProperties.end(), |
197 | 0 | [](const PropertyValue& rVal) { return UPH_IS_USE_DICTIONARY_LIST == rVal.Handle; }); |
198 | |
|
199 | 0 | if (pVal != rProperties.end()) |
200 | 0 | { |
201 | 0 | pVal->Value >>= bRes; |
202 | 0 | } |
203 | 0 | else // no temporary value found in 'rProperties' |
204 | 0 | { |
205 | 0 | uno::Reference< XFastPropertySet > xFast( rxProp, UNO_QUERY ); |
206 | 0 | if (xFast.is()) |
207 | 0 | xFast->getFastPropertyValue( UPH_IS_USE_DICTIONARY_LIST ) >>= bRes; |
208 | 0 | } |
209 | |
|
210 | 0 | return bRes; |
211 | 0 | } |
212 | | |
213 | | bool IsIgnoreControlChars( const PropertyValues &rProperties, |
214 | | const uno::Reference< XPropertySet > &rxProp ) |
215 | 0 | { |
216 | 0 | bool bRes = true; |
217 | |
|
218 | 0 | const PropertyValue *pVal = std::find_if(rProperties.begin(), rProperties.end(), |
219 | 0 | [](const PropertyValue& rVal) { return UPH_IS_IGNORE_CONTROL_CHARACTERS == rVal.Handle; }); |
220 | |
|
221 | 0 | if (pVal != rProperties.end()) |
222 | 0 | { |
223 | 0 | pVal->Value >>= bRes; |
224 | 0 | } |
225 | 0 | else // no temporary value found in 'rProperties' |
226 | 0 | { |
227 | 0 | uno::Reference< XFastPropertySet > xFast( rxProp, UNO_QUERY ); |
228 | 0 | if (xFast.is()) |
229 | 0 | xFast->getFastPropertyValue( UPH_IS_IGNORE_CONTROL_CHARACTERS ) >>= bRes; |
230 | 0 | } |
231 | |
|
232 | 0 | return bRes; |
233 | 0 | } |
234 | | |
235 | | static bool lcl_HasHyphInfo( const uno::Reference<XDictionaryEntry> &xEntry ) |
236 | 0 | { |
237 | 0 | bool bRes = false; |
238 | 0 | if (xEntry.is()) |
239 | 0 | { |
240 | | // there has to be (at least one) '=' or '[' denoting a hyphenation position |
241 | | // and it must not be before any character of the word |
242 | 0 | sal_Int32 nIdx = xEntry->getDictionaryWord().indexOf( '=' ); |
243 | 0 | if (nIdx == -1) |
244 | 0 | nIdx = xEntry->getDictionaryWord().indexOf( '[' ); |
245 | 0 | bRes = nIdx != -1 && nIdx != 0; |
246 | 0 | } |
247 | 0 | return bRes; |
248 | 0 | } |
249 | | |
250 | | uno::Reference< XDictionaryEntry > SearchDicList( |
251 | | const uno::Reference< XSearchableDictionaryList > &xDicList, |
252 | | const OUString &rWord, LanguageType nLanguage, |
253 | | bool bSearchPosDics, bool bSearchSpellEntry ) |
254 | 0 | { |
255 | 0 | MutexGuard aGuard( GetLinguMutex() ); |
256 | |
|
257 | 0 | uno::Reference< XDictionaryEntry > xEntry; |
258 | |
|
259 | 0 | if (!xDicList.is()) |
260 | 0 | return xEntry; |
261 | | |
262 | 0 | const uno::Sequence< uno::Reference< XDictionary > > |
263 | 0 | aDics( xDicList->getDictionaries() ); |
264 | 0 | const uno::Reference< XDictionary > |
265 | 0 | *pDic = aDics.getConstArray(); |
266 | 0 | sal_Int32 nDics = xDicList->getCount(); |
267 | |
|
268 | 0 | sal_Int32 i; |
269 | 0 | for (i = 0; i < nDics; i++) |
270 | 0 | { |
271 | 0 | uno::Reference< XDictionary > axDic = pDic[i]; |
272 | |
|
273 | 0 | DictionaryType eType = axDic->getDictionaryType(); |
274 | 0 | LanguageType nLang = LinguLocaleToLanguage( axDic->getLocale() ); |
275 | |
|
276 | 0 | if ( axDic.is() && axDic->isActive() |
277 | 0 | && (nLang == nLanguage || LinguIsUnspecified( nLang)) ) |
278 | 0 | { |
279 | | // DictionaryType_MIXED is deprecated |
280 | 0 | SAL_WARN_IF(eType == DictionaryType_MIXED, "linguistic", "unexpected dictionary type"); |
281 | | |
282 | 0 | if ( (!bSearchPosDics && eType == DictionaryType_NEGATIVE) |
283 | 0 | || ( bSearchPosDics && eType == DictionaryType_POSITIVE)) |
284 | 0 | { |
285 | 0 | xEntry = axDic->getEntry( rWord ); |
286 | 0 | if ( xEntry.is() && (bSearchSpellEntry || lcl_HasHyphInfo( xEntry )) ) |
287 | 0 | break; |
288 | 0 | xEntry = nullptr; |
289 | 0 | } |
290 | 0 | } |
291 | 0 | } |
292 | | |
293 | 0 | return xEntry; |
294 | 0 | } |
295 | | |
296 | | bool SaveDictionaries( const uno::Reference< XSearchableDictionaryList > &xDicList ) |
297 | 0 | { |
298 | 0 | if (!xDicList.is()) |
299 | 0 | return true; |
300 | | |
301 | 0 | bool bRet = true; |
302 | |
|
303 | 0 | const Sequence< uno::Reference< XDictionary > > aDics( xDicList->getDictionaries() ); |
304 | 0 | for (const uno::Reference<XDictionary>& rDic : aDics) |
305 | 0 | { |
306 | 0 | try |
307 | 0 | { |
308 | 0 | uno::Reference< frame::XStorable > xStor( rDic, UNO_QUERY ); |
309 | 0 | if (xStor.is()) |
310 | 0 | { |
311 | 0 | if (!xStor->isReadonly() && xStor->hasLocation()) |
312 | 0 | xStor->store(); |
313 | 0 | } |
314 | 0 | } |
315 | 0 | catch(uno::Exception &) |
316 | 0 | { |
317 | 0 | bRet = false; |
318 | 0 | } |
319 | 0 | } |
320 | |
|
321 | 0 | return bRet; |
322 | 0 | } |
323 | | |
324 | | DictionaryError AddEntryToDic( |
325 | | uno::Reference< XDictionary > const &rxDic, |
326 | | const OUString &rWord, bool bIsNeg, |
327 | | const OUString &rRplcTxt, |
328 | | bool bStripDot ) |
329 | 0 | { |
330 | 0 | if (!rxDic.is()) |
331 | 0 | return DictionaryError::NOT_EXISTS; |
332 | | |
333 | 0 | OUString aTmp( rWord ); |
334 | 0 | if (bStripDot) |
335 | 0 | { |
336 | 0 | sal_Int32 nLen = rWord.getLength(); |
337 | 0 | if (nLen > 0 && '.' == rWord[ nLen - 1]) |
338 | 0 | { |
339 | | // remove trailing '.' |
340 | | // (this is the official way to do this :-( ) |
341 | 0 | aTmp = aTmp.copy( 0, nLen - 1 ); |
342 | 0 | } |
343 | 0 | } |
344 | 0 | bool bAddOk = rxDic->add( aTmp, bIsNeg, rRplcTxt ); |
345 | |
|
346 | 0 | DictionaryError nRes = DictionaryError::NONE; |
347 | 0 | if (!bAddOk) |
348 | 0 | { |
349 | 0 | if (rxDic->isFull()) |
350 | 0 | nRes = DictionaryError::FULL; |
351 | 0 | else |
352 | 0 | { |
353 | 0 | uno::Reference< frame::XStorable > xStor( rxDic, UNO_QUERY ); |
354 | 0 | if (xStor.is() && xStor->isReadonly()) |
355 | 0 | nRes = DictionaryError::READONLY; |
356 | 0 | else |
357 | 0 | nRes = DictionaryError::UNKNOWN; |
358 | 0 | } |
359 | 0 | } |
360 | |
|
361 | 0 | return nRes; |
362 | 0 | } |
363 | | |
364 | | std::vector< LanguageType > |
365 | | LocaleSeqToLangVec( uno::Sequence< Locale > const &rLocaleSeq ) |
366 | 0 | { |
367 | 0 | std::vector< LanguageType > aLangs; |
368 | 0 | aLangs.reserve(rLocaleSeq.getLength()); |
369 | |
|
370 | 0 | std::transform(rLocaleSeq.begin(), rLocaleSeq.end(), std::back_inserter(aLangs), |
371 | 0 | [](const Locale& rLocale) { return LinguLocaleToLanguage(rLocale); }); |
372 | |
|
373 | 0 | return aLangs; |
374 | 0 | } |
375 | | |
376 | | uno::Sequence< sal_Int16 > |
377 | | LocaleSeqToLangSeq( uno::Sequence< Locale > const &rLocaleSeq ) |
378 | 0 | { |
379 | 0 | std::vector<sal_Int16> aLangs; |
380 | 0 | aLangs.reserve(rLocaleSeq.getLength()); |
381 | |
|
382 | 0 | std::transform(rLocaleSeq.begin(), rLocaleSeq.end(), std::back_inserter(aLangs), |
383 | 0 | [](const Locale& rLocale) { return static_cast<sal_uInt16>(LinguLocaleToLanguage(rLocale)); }); |
384 | |
|
385 | 0 | return comphelper::containerToSequence(aLangs); |
386 | 0 | } |
387 | | bool IsReadOnly( const OUString &rURL, bool *pbExist ) |
388 | 0 | { |
389 | 0 | bool bRes = false; |
390 | 0 | bool bExists = false; |
391 | |
|
392 | 0 | if (!rURL.isEmpty()) |
393 | 0 | { |
394 | 0 | try |
395 | 0 | { |
396 | 0 | uno::Reference< css::ucb::XCommandEnvironment > xCmdEnv; |
397 | 0 | ::ucbhelper::Content aContent( rURL, xCmdEnv, comphelper::getProcessComponentContext() ); |
398 | |
|
399 | 0 | bExists = aContent.isDocument(); |
400 | 0 | if (bExists) |
401 | 0 | { |
402 | 0 | Any aAny( aContent.getPropertyValue( u"IsReadOnly"_ustr ) ); |
403 | 0 | aAny >>= bRes; |
404 | 0 | } |
405 | 0 | } |
406 | 0 | catch (Exception &) |
407 | 0 | { |
408 | 0 | bRes = true; |
409 | 0 | } |
410 | 0 | } |
411 | |
|
412 | 0 | if (pbExist) |
413 | 0 | *pbExist = bExists; |
414 | 0 | return bRes; |
415 | 0 | } |
416 | | |
417 | | static bool GetAltSpelling( sal_Int16 &rnChgPos, sal_Int16 &rnChgLen, OUString &rRplc, |
418 | | uno::Reference< XHyphenatedWord > const &rxHyphWord ) |
419 | 0 | { |
420 | 0 | bool bRes = rxHyphWord->isAlternativeSpelling(); |
421 | 0 | if (bRes) |
422 | 0 | { |
423 | 0 | OUString aWord( rxHyphWord->getWord() ), |
424 | 0 | aHyphenatedWord( rxHyphWord->getHyphenatedWord() ); |
425 | 0 | sal_Int16 nHyphenationPos = rxHyphWord->getHyphenationPos(); |
426 | 0 | /*sal_Int16 nHyphenPos = rxHyphWord->getHyphenPos()*/; |
427 | 0 | const sal_Unicode *pWord = aWord.getStr(), |
428 | 0 | *pAltWord = aHyphenatedWord.getStr(); |
429 | | |
430 | | // at least char changes directly left or right to the hyphen |
431 | | // should(!) be handled properly... |
432 | | //! nHyphenationPos and nHyphenPos differ at most by 1 (see above) |
433 | | //! Beware: eg "Schiffahrt" in German (pre spelling reform) |
434 | | //! proves to be a bit nasty (nChgPosLeft and nChgPosRight overlap |
435 | | //! to an extend.) |
436 | | |
437 | | // find first different char from left |
438 | 0 | sal_Int32 nPosL = 0, |
439 | 0 | nAltPosL = 0; |
440 | 0 | for (sal_Int16 i = 0 ; pWord[ nPosL ] == pAltWord[ nAltPosL ]; nPosL++, nAltPosL++, i++) |
441 | 0 | { |
442 | | // restrict changes area beginning to the right to |
443 | | // the char immediately following the hyphen. |
444 | | //! serves to insert the additional "f" in "Schiffahrt" at |
445 | | //! position 5 rather than position 6. |
446 | 0 | if (i >= nHyphenationPos + 1) |
447 | 0 | break; |
448 | 0 | } |
449 | | |
450 | | // find first different char from right |
451 | 0 | sal_Int32 nPosR = aWord.getLength() - 1, |
452 | 0 | nAltPosR = aHyphenatedWord.getLength() - 1; |
453 | 0 | for ( ; nPosR >= nPosL && nAltPosR >= nAltPosL |
454 | 0 | && pWord[ nPosR ] == pAltWord[ nAltPosR ]; |
455 | 0 | nPosR--, nAltPosR--) |
456 | 0 | ; |
457 | |
|
458 | 0 | rnChgPos = sal::static_int_cast< sal_Int16 >(nPosL); |
459 | 0 | rnChgLen = sal::static_int_cast< sal_Int16 >(nAltPosR - nPosL); |
460 | 0 | assert( rnChgLen >= 0 && "nChgLen < 0"); |
461 | |
|
462 | 0 | sal_Int32 nTxtStart = nPosL; |
463 | 0 | sal_Int32 nTxtLen = nAltPosR - nPosL + 1; |
464 | 0 | rRplc = aHyphenatedWord.copy( nTxtStart, nTxtLen ); |
465 | 0 | } |
466 | 0 | return bRes; |
467 | 0 | } |
468 | | |
469 | | static sal_Int16 GetOrigWordPos( std::u16string_view rOrigWord, sal_Int16 nPos ) |
470 | 0 | { |
471 | 0 | sal_Int32 nLen = rOrigWord.size(); |
472 | 0 | sal_Int32 i = -1; |
473 | 0 | while (nPos >= 0 && i++ < nLen) |
474 | 0 | { |
475 | 0 | sal_Unicode cChar = rOrigWord[i]; |
476 | 0 | bool bSkip = IsHyphen( cChar ) || IsControlChar( cChar ); |
477 | 0 | if (!bSkip) |
478 | 0 | --nPos; |
479 | 0 | } |
480 | 0 | return sal::static_int_cast< sal_Int16 >((0 <= i && i < nLen) ? i : -1); |
481 | 0 | } |
482 | | |
483 | | sal_Int32 GetPosInWordToCheck( std::u16string_view rTxt, sal_Int32 nPos ) |
484 | 0 | { |
485 | 0 | sal_Int32 nRes = -1; |
486 | 0 | sal_Int32 nLen = rTxt.size(); |
487 | 0 | if (0 <= nPos && nPos < nLen) |
488 | 0 | { |
489 | 0 | nRes = 0; |
490 | 0 | for (sal_Int32 i = 0; i < nPos; ++i) |
491 | 0 | { |
492 | 0 | sal_Unicode cChar = rTxt[i]; |
493 | 0 | bool bSkip = IsHyphen( cChar ) || IsControlChar( cChar ); |
494 | 0 | if (!bSkip) |
495 | 0 | ++nRes; |
496 | 0 | } |
497 | 0 | } |
498 | 0 | return nRes; |
499 | 0 | } |
500 | | |
501 | | rtl::Reference< HyphenatedWord > RebuildHyphensAndControlChars( |
502 | | const OUString &rOrigWord, |
503 | | uno::Reference< XHyphenatedWord > const &rxHyphWord ) |
504 | 0 | { |
505 | 0 | if (rOrigWord.isEmpty() || !rxHyphWord.is()) |
506 | 0 | return nullptr; |
507 | | |
508 | 0 | sal_Int16 nChgPos = 0, |
509 | 0 | nChgLen = 0; |
510 | 0 | OUString aRplc; |
511 | 0 | bool bAltSpelling = GetAltSpelling( nChgPos, nChgLen, aRplc, rxHyphWord ); |
512 | |
|
513 | 0 | OUString aOrigHyphenatedWord; |
514 | 0 | sal_Int16 nOrigHyphenPos = -1; |
515 | 0 | sal_Int16 nOrigHyphenationPos = -1; |
516 | 0 | if (!bAltSpelling) |
517 | 0 | { |
518 | 0 | aOrigHyphenatedWord = rOrigWord; |
519 | 0 | nOrigHyphenPos = GetOrigWordPos( rOrigWord, rxHyphWord->getHyphenPos() ); |
520 | 0 | nOrigHyphenationPos = GetOrigWordPos( rOrigWord, rxHyphWord->getHyphenationPos() ); |
521 | 0 | } |
522 | 0 | else |
523 | 0 | { |
524 | | //! should at least work with the German words |
525 | | //! B-"u-c-k-er and Sc-hif-fah-rt |
526 | |
|
527 | 0 | sal_Int16 nPos = GetOrigWordPos( rOrigWord, nChgPos ); |
528 | | |
529 | | // get words like Sc-hif-fah-rt to work correct |
530 | 0 | sal_Int16 nHyphenationPos = rxHyphWord->getHyphenationPos(); |
531 | 0 | if (nChgPos > nHyphenationPos) |
532 | 0 | --nPos; |
533 | |
|
534 | 0 | std::u16string_view aLeft = rOrigWord.subView( 0, nPos ); |
535 | 0 | std::u16string_view aRight = rOrigWord.subView( nPos ); // FIXME: changes at the right side |
536 | |
|
537 | 0 | aOrigHyphenatedWord = aLeft + aRplc + aRight; |
538 | |
|
539 | 0 | nOrigHyphenPos = sal::static_int_cast< sal_Int16 >(aLeft.size() + |
540 | 0 | rxHyphWord->getHyphenPos() - nChgPos); |
541 | 0 | nOrigHyphenationPos = GetOrigWordPos( rOrigWord, nHyphenationPos ); |
542 | 0 | } |
543 | |
|
544 | 0 | if (nOrigHyphenPos != -1 && nOrigHyphenationPos != -1) |
545 | 0 | { |
546 | 0 | SAL_INFO( "linguistic", "failed to get nOrigHyphenPos or nOrigHyphenationPos" ); |
547 | 0 | return nullptr; |
548 | 0 | } |
549 | | |
550 | 0 | LanguageType nLang = LinguLocaleToLanguage( rxHyphWord->getLocale() ); |
551 | 0 | return new HyphenatedWord( |
552 | 0 | rOrigWord, nLang, nOrigHyphenationPos, |
553 | 0 | aOrigHyphenatedWord, nOrigHyphenPos ); |
554 | |
|
555 | 0 | } |
556 | | |
557 | | bool IsUpper( const OUString &rText, sal_Int32 nPos, sal_Int32 nLen, LanguageType nLanguage ) |
558 | 0 | { |
559 | 0 | assert(nPos >= 0 && nLen > 0); |
560 | 0 | CharClass aCC(( LanguageTag( nLanguage ) )); |
561 | |
|
562 | 0 | bool bCaseIsAlwaysUppercase = false; |
563 | 0 | const sal_Int32 nEnd = std::min(nPos + nLen, rText.getLength()); |
564 | 0 | while (nPos < nEnd) |
565 | 0 | { |
566 | | // only consider characters that have case-status |
567 | 0 | if (aCC.isAlpha(rText, nPos)) |
568 | 0 | { |
569 | 0 | if (aCC.isUpper(rText, nPos)) |
570 | 0 | bCaseIsAlwaysUppercase = true; |
571 | 0 | else |
572 | 0 | return false; |
573 | 0 | } |
574 | 0 | rText.iterateCodePoints(&nPos); |
575 | 0 | } |
576 | | |
577 | 0 | return bCaseIsAlwaysUppercase; |
578 | 0 | } |
579 | | |
580 | | CapType capitalType(const OUString& aTerm, CharClass const * pCC) |
581 | 0 | { |
582 | 0 | sal_Int32 tlen = aTerm.getLength(); |
583 | 0 | if (!pCC || !tlen) |
584 | 0 | return CapType::UNKNOWN; |
585 | | |
586 | 0 | sal_Int32 nc = 0; |
587 | 0 | for (sal_Int32 tindex = 0; tindex < tlen; ++tindex) |
588 | 0 | { |
589 | 0 | if (pCC->getCharacterType(aTerm,tindex) & |
590 | 0 | css::i18n::KCharacterType::UPPER) nc++; |
591 | 0 | } |
592 | |
|
593 | 0 | if (nc == 0) |
594 | 0 | return CapType::NOCAP; |
595 | 0 | if (nc == tlen) |
596 | 0 | return CapType::ALLCAP; |
597 | 0 | if ((nc == 1) && (pCC->getCharacterType(aTerm,0) & |
598 | 0 | css::i18n::KCharacterType::UPPER)) |
599 | 0 | return CapType::INITCAP; |
600 | | |
601 | 0 | return CapType::MIXED; |
602 | 0 | } |
603 | | |
604 | | // sorted(!) array of unicode ranges for code points that are exclusively(!) used as numbers |
605 | | // and thus may NOT not be part of names or words like the Chinese/Japanese number characters |
606 | | const sal_uInt32 the_aDigitZeroes [] = |
607 | | { |
608 | | 0x00000030, //0039 ; Decimal # Nd [10] DIGIT ZERO..DIGIT NINE |
609 | | 0x00000660, //0669 ; Decimal # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE |
610 | | 0x000006F0, //06F9 ; Decimal # Nd [10] EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE |
611 | | 0x000007C0, //07C9 ; Decimal # Nd [10] NKO DIGIT ZERO..NKO DIGIT NINE |
612 | | 0x00000966, //096F ; Decimal # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE |
613 | | 0x000009E6, //09EF ; Decimal # Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE |
614 | | 0x00000A66, //0A6F ; Decimal # Nd [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE |
615 | | 0x00000AE6, //0AEF ; Decimal # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE |
616 | | 0x00000B66, //0B6F ; Decimal # Nd [10] ODIA DIGIT ZERO..ODIA DIGIT NINE |
617 | | 0x00000BE6, //0BEF ; Decimal # Nd [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE |
618 | | 0x00000C66, //0C6F ; Decimal # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE |
619 | | 0x00000CE6, //0CEF ; Decimal # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE |
620 | | 0x00000D66, //0D6F ; Decimal # Nd [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE |
621 | | 0x00000E50, //0E59 ; Decimal # Nd [10] THAI DIGIT ZERO..THAI DIGIT NINE |
622 | | 0x00000ED0, //0ED9 ; Decimal # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE |
623 | | 0x00000F20, //0F29 ; Decimal # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE |
624 | | 0x00001040, //1049 ; Decimal # Nd [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE |
625 | | 0x00001090, //1099 ; Decimal # Nd [10] MYANMAR SHAN DIGIT ZERO..MYANMAR SHAN DIGIT NINE |
626 | | 0x000017E0, //17E9 ; Decimal # Nd [10] KHMER DIGIT ZERO..KHMER DIGIT NINE |
627 | | 0x00001810, //1819 ; Decimal # Nd [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE |
628 | | 0x00001946, //194F ; Decimal # Nd [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE |
629 | | 0x000019D0, //19D9 ; Decimal # Nd [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE |
630 | | 0x00001B50, //1B59 ; Decimal # Nd [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE |
631 | | 0x00001BB0, //1BB9 ; Decimal # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE |
632 | | 0x00001C40, //1C49 ; Decimal # Nd [10] LEPCHA DIGIT ZERO..LEPCHA DIGIT NINE |
633 | | 0x00001C50, //1C59 ; Decimal # Nd [10] OL CHIKI DIGIT ZERO..OL CHIKI DIGIT NINE |
634 | | 0x0000A620, //A629 ; Decimal # Nd [10] VAI DIGIT ZERO..VAI DIGIT NINE |
635 | | 0x0000A8D0, //A8D9 ; Decimal # Nd [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE |
636 | | 0x0000A900, //A909 ; Decimal # Nd [10] KAYAH LI DIGIT ZERO..KAYAH LI DIGIT NINE |
637 | | 0x0000AA50, //AA59 ; Decimal # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE |
638 | | 0x0000FF10, //FF19 ; Decimal # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE |
639 | | 0x000104A0, //104A9 ; Decimal # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE |
640 | | 0x0001D7CE //1D7FF ; Decimal # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE |
641 | | }; |
642 | | |
643 | | bool HasDigits( std::u16string_view rText ) |
644 | 0 | { |
645 | 0 | const sal_Int32 nLen = rText.size(); |
646 | |
|
647 | 0 | sal_Int32 i = 0; |
648 | 0 | while (i < nLen) // for all characters ... |
649 | 0 | { |
650 | 0 | const sal_uInt32 nCodePoint = o3tl::iterateCodePoints( rText, &i ); // handle unicode surrogates correctly... |
651 | 0 | for (unsigned int nDigitZero : the_aDigitZeroes) // ... check in all 0..9 ranges |
652 | 0 | { |
653 | 0 | if (nDigitZero > nCodePoint) |
654 | 0 | break; |
655 | 0 | if (/*nDigitZero <= nCodePoint &&*/ nCodePoint <= nDigitZero + 9) |
656 | 0 | return true; |
657 | 0 | } |
658 | 0 | } |
659 | 0 | return false; |
660 | 0 | } |
661 | | |
662 | | bool IsNumeric( std::u16string_view rText ) |
663 | 0 | { |
664 | 0 | bool bRes = false; |
665 | 0 | if (!rText.empty()) |
666 | 0 | { |
667 | 0 | sal_Int32 nLen = rText.size(); |
668 | 0 | bRes = true; |
669 | 0 | for(sal_Int32 i = 0; i < nLen; ++i) |
670 | 0 | { |
671 | 0 | sal_Unicode cChar = rText[ i ]; |
672 | 0 | if ( '0' > cChar || cChar > '9' ) |
673 | 0 | { |
674 | 0 | bRes = false; |
675 | 0 | break; |
676 | 0 | } |
677 | 0 | } |
678 | 0 | } |
679 | 0 | return bRes; |
680 | 0 | } |
681 | | |
682 | | uno::Reference< XLinguProperties > GetLinguProperties() |
683 | 0 | { |
684 | 0 | return LinguProperties::create( comphelper::getProcessComponentContext() ); |
685 | 0 | } |
686 | | |
687 | | uno::Reference< XSearchableDictionaryList > GetDictionaryList() |
688 | 0 | { |
689 | 0 | const uno::Reference< XComponentContext >& xContext( comphelper::getProcessComponentContext() ); |
690 | 0 | uno::Reference< XSearchableDictionaryList > xRef; |
691 | 0 | try |
692 | 0 | { |
693 | 0 | xRef = DictionaryList::create(xContext); |
694 | 0 | } |
695 | 0 | catch (const uno::Exception &) |
696 | 0 | { |
697 | 0 | SAL_WARN( "linguistic", "createInstance failed" ); |
698 | 0 | } |
699 | | |
700 | 0 | return xRef; |
701 | 0 | } |
702 | | |
703 | | uno::Reference< XDictionary > GetIgnoreAllList() |
704 | 0 | { |
705 | 0 | uno::Reference< XDictionary > xRes; |
706 | 0 | uno::Reference< XSearchableDictionaryList > xDL( GetDictionaryList() ); |
707 | 0 | if (xDL.is()) |
708 | 0 | { |
709 | 0 | const LanguageTag tag = comphelper::LibreOfficeKit::isActive() |
710 | 0 | ? LanguageTag(u"en-US"_ustr) |
711 | 0 | : SvtSysLocale().GetUILanguageTag(); |
712 | 0 | std::locale loc(Translate::Create("svt", tag)); |
713 | 0 | xRes = xDL->getDictionaryByName( Translate::get(STR_DESCRIPTION_IGNOREALLLIST, loc) ); |
714 | 0 | } |
715 | 0 | return xRes; |
716 | 0 | } |
717 | | |
718 | | AppExitListener::AppExitListener() |
719 | 0 | { |
720 | | // add object to Desktop EventListeners in order to properly call |
721 | | // the AtExit function at application exit. |
722 | 0 | const uno::Reference< XComponentContext >& xContext( comphelper::getProcessComponentContext() ); |
723 | |
|
724 | 0 | try |
725 | 0 | { |
726 | 0 | xDesktop = frame::Desktop::create(xContext); |
727 | 0 | } |
728 | 0 | catch (const uno::Exception &) |
729 | 0 | { |
730 | 0 | SAL_WARN( "linguistic", "createInstance failed" ); |
731 | 0 | } |
732 | 0 | } |
733 | | |
734 | | AppExitListener::~AppExitListener() |
735 | 0 | { |
736 | 0 | } |
737 | | |
738 | | void AppExitListener::Activate() |
739 | 0 | { |
740 | 0 | if (xDesktop.is()) |
741 | 0 | xDesktop->addTerminateListener( this ); |
742 | 0 | } |
743 | | |
744 | | void AppExitListener::Deactivate() |
745 | 0 | { |
746 | 0 | if (xDesktop.is()) |
747 | 0 | xDesktop->removeTerminateListener( this ); |
748 | 0 | } |
749 | | |
750 | | void SAL_CALL |
751 | | AppExitListener::disposing( const EventObject& rEvtSource ) |
752 | 0 | { |
753 | 0 | MutexGuard aGuard( GetLinguMutex() ); |
754 | |
|
755 | 0 | if (xDesktop.is() && rEvtSource.Source == xDesktop) |
756 | 0 | { |
757 | 0 | xDesktop = nullptr; //! release reference to desktop |
758 | 0 | } |
759 | 0 | } |
760 | | |
761 | | void SAL_CALL |
762 | | AppExitListener::queryTermination( const EventObject& /*rEvtSource*/ ) |
763 | 0 | { |
764 | 0 | } |
765 | | |
766 | | void SAL_CALL |
767 | | AppExitListener::notifyTermination( const EventObject& rEvtSource ) |
768 | 0 | { |
769 | 0 | MutexGuard aGuard( GetLinguMutex() ); |
770 | |
|
771 | 0 | if (xDesktop.is() && rEvtSource.Source == xDesktop) |
772 | 0 | { |
773 | 0 | AtExit(); |
774 | 0 | } |
775 | 0 | } |
776 | | |
777 | | } // namespace linguistic |
778 | | |
779 | | /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |