/src/libreoffice/lingucomponent/source/languageguessing/guesslang.cxx
Line | Count | Source (jump to first uncovered line) |
1 | | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
2 | | /* |
3 | | * This file is part of the LibreOffice project. |
4 | | * |
5 | | * This Source Code Form is subject to the terms of the Mozilla Public |
6 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
7 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8 | | * |
9 | | * This file incorporates work covered by the following license notice: |
10 | | * |
11 | | * Licensed to the Apache Software Foundation (ASF) under one or more |
12 | | * contributor license agreements. See the NOTICE file distributed |
13 | | * with this work for additional information regarding copyright |
14 | | * ownership. The ASF licenses this file to you under the Apache |
15 | | * License, Version 2.0 (the "License"); you may not use this file |
16 | | * except in compliance with the License. You may obtain a copy of |
17 | | * the License at http://www.apache.org/licenses/LICENSE-2.0 . |
18 | | */ |
19 | | |
20 | | #include <iostream> |
21 | | #include <mutex> |
22 | | #include <string_view> |
23 | | |
24 | | #include <osl/file.hxx> |
25 | | #include <tools/debug.hxx> |
26 | | |
27 | | #include <sal/config.h> |
28 | | #include <cppuhelper/factory.hxx> |
29 | | #include <cppuhelper/implbase.hxx> |
30 | | #include <cppuhelper/supportsservice.hxx> |
31 | | |
32 | | #include "simpleguesser.hxx" |
33 | | #include "guess.hxx" |
34 | | |
35 | | #include <com/sun/star/lang/IllegalArgumentException.hpp> |
36 | | #include <com/sun/star/lang/XServiceInfo.hpp> |
37 | | #include <com/sun/star/linguistic2/XLanguageGuessing.hpp> |
38 | | #include <unotools/pathoptions.hxx> |
39 | | #include <osl/thread.h> |
40 | | |
41 | | #include <sal/macros.h> |
42 | | |
43 | | #ifdef SYSTEM_LIBEXTTEXTCAT |
44 | | #include <libexttextcat/textcat.h> |
45 | | #else |
46 | | #include <textcat.h> |
47 | | #endif |
48 | | |
49 | | using namespace ::osl; |
50 | | using namespace ::cppu; |
51 | | using namespace ::com::sun::star; |
52 | | using namespace ::com::sun::star::uno; |
53 | | using namespace ::com::sun::star::lang; |
54 | | using namespace ::com::sun::star::linguistic2; |
55 | | |
56 | | static std::mutex & GetLangGuessMutex() |
57 | 0 | { |
58 | 0 | static std::mutex aMutex; |
59 | 0 | return aMutex; |
60 | 0 | } |
61 | | |
62 | | namespace { |
63 | | |
64 | | class LangGuess_Impl : |
65 | | public ::cppu::WeakImplHelper< |
66 | | XLanguageGuessing, |
67 | | XServiceInfo > |
68 | | { |
69 | | SimpleGuesser m_aGuesser; |
70 | | bool m_bInitialized; |
71 | | |
72 | 0 | virtual ~LangGuess_Impl() override {} |
73 | | void EnsureInitialized(); |
74 | | |
75 | | public: |
76 | | LangGuess_Impl(); |
77 | | LangGuess_Impl(const LangGuess_Impl&) = delete; |
78 | | LangGuess_Impl& operator=(const LangGuess_Impl&) = delete; |
79 | | |
80 | | // XServiceInfo implementation |
81 | | virtual OUString SAL_CALL getImplementationName( ) override; |
82 | | virtual sal_Bool SAL_CALL supportsService( const OUString& ServiceName ) override; |
83 | | virtual Sequence< OUString > SAL_CALL getSupportedServiceNames( ) override; |
84 | | |
85 | | // XLanguageGuessing implementation |
86 | | virtual css::lang::Locale SAL_CALL guessPrimaryLanguage( const OUString& aText, ::sal_Int32 nStartPos, ::sal_Int32 nLen ) override; |
87 | | virtual void SAL_CALL disableLanguages( const css::uno::Sequence< css::lang::Locale >& aLanguages ) override; |
88 | | virtual void SAL_CALL enableLanguages( const css::uno::Sequence< css::lang::Locale >& aLanguages ) override; |
89 | | virtual css::uno::Sequence< css::lang::Locale > SAL_CALL getAvailableLanguages( ) override; |
90 | | virtual css::uno::Sequence< css::lang::Locale > SAL_CALL getEnabledLanguages( ) override; |
91 | | virtual css::uno::Sequence< css::lang::Locale > SAL_CALL getDisabledLanguages( ) override; |
92 | | |
93 | | // implementation specific |
94 | | /// @throws RuntimeException |
95 | | void SetFingerPrintsDB( std::u16string_view fileName ); |
96 | | }; |
97 | | |
98 | | } |
99 | | |
100 | | LangGuess_Impl::LangGuess_Impl() : |
101 | 0 | m_bInitialized( false ) |
102 | 0 | { |
103 | 0 | } |
104 | | |
105 | | void LangGuess_Impl::EnsureInitialized() |
106 | 0 | { |
107 | 0 | if (m_bInitialized) |
108 | 0 | return; |
109 | | |
110 | | // set this to true at the very start to prevent loops because of |
111 | | // implicitly called functions below |
112 | 0 | m_bInitialized = true; |
113 | | |
114 | | // set default fingerprint path to where those get installed |
115 | 0 | OUString aPhysPath; |
116 | 0 | OUString aURL( SvtPathOptions().GetFingerprintPath() ); |
117 | 0 | osl::FileBase::getSystemPathFromFileURL( aURL, aPhysPath ); |
118 | | #ifdef _WIN32 |
119 | | aPhysPath += "\\"; |
120 | | #else |
121 | 0 | aPhysPath += "/"; |
122 | 0 | #endif |
123 | |
|
124 | 0 | SetFingerPrintsDB( aPhysPath ); |
125 | |
|
126 | | #if !defined(EXTTEXTCAT_VERSION_MAJOR) |
127 | | |
128 | | // disable currently not functional languages... |
129 | | struct LangCountry |
130 | | { |
131 | | const char *pLang; |
132 | | const char *pCountry; |
133 | | }; |
134 | | LangCountry aDisable[] = |
135 | | { |
136 | | // not functional in modified libtextcat, but fixed in >= libexttextcat 3.1.0 |
137 | | // which is the first with EXTTEXTCAT_VERSION_MAJOR defined |
138 | | {"sco", ""}, {"zh", "CN"}, {"zh", "TW"}, {"ja", ""}, {"ko", ""}, |
139 | | {"ka", ""}, {"hi", ""}, {"mr", ""}, {"ne", ""}, {"sa", ""}, |
140 | | {"ta", ""}, {"th", ""}, {"qu", ""}, {"yi", ""} |
141 | | }; |
142 | | sal_Int32 nNum = SAL_N_ELEMENTS(aDisable); |
143 | | Sequence< Locale > aDisableSeq( nNum ); |
144 | | Locale *pDisableSeq = aDisableSeq.getArray(); |
145 | | for (sal_Int32 i = 0; i < nNum; ++i) |
146 | | { |
147 | | Locale aLocale; |
148 | | aLocale.Language = OUString::createFromAscii( aDisable[i].pLang ); |
149 | | aLocale.Country = OUString::createFromAscii( aDisable[i].pCountry ); |
150 | | pDisableSeq[i] = aLocale; |
151 | | } |
152 | | disableLanguages( aDisableSeq ); |
153 | | DBG_ASSERT( nNum == getDisabledLanguages().getLength(), "size mismatch" ); |
154 | | #endif |
155 | 0 | } |
156 | | |
157 | | Locale SAL_CALL LangGuess_Impl::guessPrimaryLanguage( |
158 | | const OUString& rText, |
159 | | ::sal_Int32 nStartPos, |
160 | | ::sal_Int32 nLen ) |
161 | 0 | { |
162 | 0 | std::scoped_lock aGuard( GetLangGuessMutex() ); |
163 | |
|
164 | 0 | EnsureInitialized(); |
165 | |
|
166 | 0 | if (nStartPos < 0 || nLen < 0 || nStartPos + nLen > rText.getLength()) |
167 | 0 | throw lang::IllegalArgumentException(); |
168 | | |
169 | 0 | OString o( OUStringToOString( rText.subView(nStartPos, nLen), RTL_TEXTENCODING_UTF8 ) ); |
170 | 0 | Guess g = m_aGuesser.GuessPrimaryLanguage(o.getStr()); |
171 | 0 | lang::Locale aRes; |
172 | 0 | aRes.Language = OUString::createFromAscii( g.GetLanguage() ); |
173 | 0 | aRes.Country = OUString::createFromAscii( g.GetCountry() ); |
174 | 0 | return aRes; |
175 | 0 | } |
176 | | |
177 | 0 | #define DEFAULT_CONF_FILE_NAME "fpdb.conf" |
178 | | |
179 | | void LangGuess_Impl::SetFingerPrintsDB( |
180 | | std::u16string_view filePath ) |
181 | 0 | { |
182 | | //! text encoding for file name / path needs to be in the same encoding the OS uses |
183 | 0 | OString path = OUStringToOString( filePath, osl_getThreadTextEncoding() ); |
184 | 0 | OString conf_file_path = path + DEFAULT_CONF_FILE_NAME; |
185 | |
|
186 | 0 | m_aGuesser.SetDBPath(conf_file_path.getStr(), path.getStr()); |
187 | 0 | } |
188 | | |
189 | | uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getAvailableLanguages( ) |
190 | 0 | { |
191 | 0 | std::scoped_lock aGuard( GetLangGuessMutex() ); |
192 | |
|
193 | 0 | EnsureInitialized(); |
194 | |
|
195 | 0 | Sequence< css::lang::Locale > aRes; |
196 | 0 | std::vector<Guess> gs = m_aGuesser.GetAllManagedLanguages(); |
197 | 0 | aRes.realloc(gs.size()); |
198 | |
|
199 | 0 | css::lang::Locale *pRes = aRes.getArray(); |
200 | |
|
201 | 0 | for(size_t i = 0; i < gs.size() ; i++ ){ |
202 | 0 | css::lang::Locale current_aRes; |
203 | 0 | current_aRes.Language = OUString::createFromAscii( gs[i].GetLanguage() ); |
204 | 0 | current_aRes.Country = OUString::createFromAscii( gs[i].GetCountry() ); |
205 | 0 | pRes[i] = std::move(current_aRes); |
206 | 0 | } |
207 | |
|
208 | 0 | return aRes; |
209 | 0 | } |
210 | | |
211 | | uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getEnabledLanguages( ) |
212 | 0 | { |
213 | 0 | std::scoped_lock aGuard( GetLangGuessMutex() ); |
214 | |
|
215 | 0 | EnsureInitialized(); |
216 | |
|
217 | 0 | Sequence< css::lang::Locale > aRes; |
218 | 0 | std::vector<Guess> gs = m_aGuesser.GetAvailableLanguages(); |
219 | 0 | aRes.realloc(gs.size()); |
220 | |
|
221 | 0 | css::lang::Locale *pRes = aRes.getArray(); |
222 | |
|
223 | 0 | for(size_t i = 0; i < gs.size() ; i++ ){ |
224 | 0 | css::lang::Locale current_aRes; |
225 | 0 | current_aRes.Language = OUString::createFromAscii( gs[i].GetLanguage() ); |
226 | 0 | current_aRes.Country = OUString::createFromAscii( gs[i].GetCountry() ); |
227 | 0 | pRes[i] = std::move(current_aRes); |
228 | 0 | } |
229 | |
|
230 | 0 | return aRes; |
231 | 0 | } |
232 | | |
233 | | uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getDisabledLanguages( ) |
234 | 0 | { |
235 | 0 | std::scoped_lock aGuard( GetLangGuessMutex() ); |
236 | |
|
237 | 0 | EnsureInitialized(); |
238 | |
|
239 | 0 | Sequence< css::lang::Locale > aRes; |
240 | 0 | std::vector<Guess> gs = m_aGuesser.GetUnavailableLanguages(); |
241 | 0 | aRes.realloc(gs.size()); |
242 | |
|
243 | 0 | css::lang::Locale *pRes = aRes.getArray(); |
244 | |
|
245 | 0 | for(size_t i = 0; i < gs.size() ; i++ ){ |
246 | 0 | css::lang::Locale current_aRes; |
247 | 0 | current_aRes.Language = OUString::createFromAscii( gs[i].GetLanguage() ); |
248 | 0 | current_aRes.Country = OUString::createFromAscii( gs[i].GetCountry() ); |
249 | 0 | pRes[i] = std::move(current_aRes); |
250 | 0 | } |
251 | |
|
252 | 0 | return aRes; |
253 | 0 | } |
254 | | |
255 | | void SAL_CALL LangGuess_Impl::disableLanguages( |
256 | | const uno::Sequence< Locale >& rLanguages ) |
257 | 0 | { |
258 | 0 | std::scoped_lock aGuard( GetLangGuessMutex() ); |
259 | |
|
260 | 0 | EnsureInitialized(); |
261 | |
|
262 | 0 | for (const Locale& rLanguage : rLanguages) |
263 | 0 | { |
264 | 0 | std::string language; |
265 | |
|
266 | 0 | OString l = OUStringToOString( rLanguage.Language, RTL_TEXTENCODING_ASCII_US ); |
267 | 0 | OString c = OUStringToOString( rLanguage.Country, RTL_TEXTENCODING_ASCII_US ); |
268 | |
|
269 | 0 | language += l.getStr(); |
270 | 0 | language += "-"; |
271 | 0 | language += c.getStr(); |
272 | 0 | m_aGuesser.DisableLanguage(language); |
273 | 0 | } |
274 | 0 | } |
275 | | |
276 | | void SAL_CALL LangGuess_Impl::enableLanguages( |
277 | | const uno::Sequence< Locale >& rLanguages ) |
278 | 0 | { |
279 | 0 | std::scoped_lock aGuard( GetLangGuessMutex() ); |
280 | |
|
281 | 0 | EnsureInitialized(); |
282 | |
|
283 | 0 | for (const Locale& rLanguage : rLanguages) |
284 | 0 | { |
285 | 0 | std::string language; |
286 | |
|
287 | 0 | OString l = OUStringToOString( rLanguage.Language, RTL_TEXTENCODING_ASCII_US ); |
288 | 0 | OString c = OUStringToOString( rLanguage.Country, RTL_TEXTENCODING_ASCII_US ); |
289 | |
|
290 | 0 | language += l.getStr(); |
291 | 0 | language += "-"; |
292 | 0 | language += c.getStr(); |
293 | 0 | m_aGuesser.EnableLanguage(language); |
294 | 0 | } |
295 | 0 | } |
296 | | |
297 | | OUString SAL_CALL LangGuess_Impl::getImplementationName( ) |
298 | 0 | { |
299 | 0 | return u"com.sun.star.lingu2.LanguageGuessing"_ustr; |
300 | 0 | } |
301 | | |
302 | | sal_Bool SAL_CALL LangGuess_Impl::supportsService( const OUString& ServiceName ) |
303 | 0 | { |
304 | 0 | return cppu::supportsService(this, ServiceName); |
305 | 0 | } |
306 | | |
307 | | Sequence<OUString> SAL_CALL LangGuess_Impl::getSupportedServiceNames( ) |
308 | 0 | { |
309 | 0 | return { u"com.sun.star.linguistic2.LanguageGuessing"_ustr }; |
310 | 0 | } |
311 | | |
312 | | extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface* |
313 | | lingucomponent_LangGuess_get_implementation( |
314 | | css::uno::XComponentContext* , css::uno::Sequence<css::uno::Any> const&) |
315 | 0 | { |
316 | 0 | return cppu::acquire(new LangGuess_Impl()); |
317 | 0 | } |
318 | | |
319 | | |
320 | | /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |