Coverage Report

Created: 2025-07-07 10:01

/src/libreoffice/lingucomponent/source/languageguessing/guesslang.cxx
Line
Count
Source (jump to first uncovered line)
1
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
/*
3
 * This file is part of the LibreOffice project.
4
 *
5
 * This Source Code Form is subject to the terms of the Mozilla Public
6
 * License, v. 2.0. If a copy of the MPL was not distributed with this
7
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
 *
9
 * This file incorporates work covered by the following license notice:
10
 *
11
 *   Licensed to the Apache Software Foundation (ASF) under one or more
12
 *   contributor license agreements. See the NOTICE file distributed
13
 *   with this work for additional information regarding copyright
14
 *   ownership. The ASF licenses this file to you under the Apache
15
 *   License, Version 2.0 (the "License"); you may not use this file
16
 *   except in compliance with the License. You may obtain a copy of
17
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18
 */
19
20
#include <iostream>
21
#include <mutex>
22
#include <string_view>
23
24
#include <osl/file.hxx>
25
#include <tools/debug.hxx>
26
27
#include <sal/config.h>
28
#include <cppuhelper/factory.hxx>
29
#include <cppuhelper/implbase.hxx>
30
#include <cppuhelper/supportsservice.hxx>
31
32
#include "simpleguesser.hxx"
33
#include "guess.hxx"
34
35
#include <com/sun/star/lang/IllegalArgumentException.hpp>
36
#include <com/sun/star/lang/XServiceInfo.hpp>
37
#include <com/sun/star/linguistic2/XLanguageGuessing.hpp>
38
#include <unotools/pathoptions.hxx>
39
#include <osl/thread.h>
40
41
#include <sal/macros.h>
42
43
#ifdef SYSTEM_LIBEXTTEXTCAT
44
#include <libexttextcat/textcat.h>
45
#else
46
#include <textcat.h>
47
#endif
48
49
using namespace ::osl;
50
using namespace ::cppu;
51
using namespace ::com::sun::star;
52
using namespace ::com::sun::star::uno;
53
using namespace ::com::sun::star::lang;
54
using namespace ::com::sun::star::linguistic2;
55
56
static std::mutex & GetLangGuessMutex()
57
0
{
58
0
    static std::mutex aMutex;
59
0
    return aMutex;
60
0
}
61
62
namespace {
63
64
class LangGuess_Impl :
65
    public ::cppu::WeakImplHelper<
66
        XLanguageGuessing,
67
        XServiceInfo >
68
{
69
    SimpleGuesser   m_aGuesser;
70
    bool            m_bInitialized;
71
72
0
    virtual ~LangGuess_Impl() override {}
73
    void    EnsureInitialized();
74
75
public:
76
    LangGuess_Impl();
77
    LangGuess_Impl(const LangGuess_Impl&) = delete;
78
    LangGuess_Impl& operator=(const LangGuess_Impl&) = delete;
79
80
    // XServiceInfo implementation
81
    virtual OUString SAL_CALL getImplementationName(  ) override;
82
    virtual sal_Bool SAL_CALL supportsService( const OUString& ServiceName ) override;
83
    virtual Sequence< OUString > SAL_CALL getSupportedServiceNames(  ) override;
84
85
    // XLanguageGuessing implementation
86
    virtual css::lang::Locale SAL_CALL guessPrimaryLanguage( const OUString& aText, ::sal_Int32 nStartPos, ::sal_Int32 nLen ) override;
87
    virtual void SAL_CALL disableLanguages( const css::uno::Sequence< css::lang::Locale >& aLanguages ) override;
88
    virtual void SAL_CALL enableLanguages( const css::uno::Sequence< css::lang::Locale >& aLanguages ) override;
89
    virtual css::uno::Sequence< css::lang::Locale > SAL_CALL getAvailableLanguages(  ) override;
90
    virtual css::uno::Sequence< css::lang::Locale > SAL_CALL getEnabledLanguages(  ) override;
91
    virtual css::uno::Sequence< css::lang::Locale > SAL_CALL getDisabledLanguages(  ) override;
92
93
    // implementation specific
94
    /// @throws RuntimeException
95
    void SetFingerPrintsDB( std::u16string_view fileName );
96
};
97
98
}
99
100
LangGuess_Impl::LangGuess_Impl() :
101
0
    m_bInitialized( false )
102
0
{
103
0
}
104
105
void LangGuess_Impl::EnsureInitialized()
106
0
{
107
0
    if (m_bInitialized)
108
0
        return;
109
110
    // set this to true at the very start to prevent loops because of
111
    // implicitly called functions below
112
0
    m_bInitialized = true;
113
114
    // set default fingerprint path to where those get installed
115
0
    OUString aPhysPath;
116
0
    OUString aURL( SvtPathOptions().GetFingerprintPath() );
117
0
    osl::FileBase::getSystemPathFromFileURL( aURL, aPhysPath );
118
#ifdef _WIN32
119
    aPhysPath += "\\";
120
#else
121
0
    aPhysPath += "/";
122
0
#endif
123
124
0
    SetFingerPrintsDB( aPhysPath );
125
126
#if !defined(EXTTEXTCAT_VERSION_MAJOR)
127
128
    // disable currently not functional languages...
129
    struct LangCountry
130
    {
131
        const char *pLang;
132
        const char *pCountry;
133
    };
134
    LangCountry aDisable[] =
135
    {
136
        // not functional in modified libtextcat, but fixed in >= libexttextcat 3.1.0
137
        // which is the first with EXTTEXTCAT_VERSION_MAJOR defined
138
        {"sco", ""}, {"zh", "CN"}, {"zh", "TW"}, {"ja", ""}, {"ko", ""},
139
        {"ka", ""}, {"hi", ""}, {"mr", ""}, {"ne", ""}, {"sa", ""},
140
        {"ta", ""}, {"th", ""}, {"qu", ""}, {"yi", ""}
141
    };
142
    sal_Int32 nNum = SAL_N_ELEMENTS(aDisable);
143
    Sequence< Locale > aDisableSeq( nNum );
144
    Locale *pDisableSeq = aDisableSeq.getArray();
145
    for (sal_Int32 i = 0;  i < nNum;  ++i)
146
    {
147
        Locale aLocale;
148
        aLocale.Language = OUString::createFromAscii( aDisable[i].pLang );
149
        aLocale.Country  = OUString::createFromAscii( aDisable[i].pCountry );
150
        pDisableSeq[i] = aLocale;
151
    }
152
    disableLanguages( aDisableSeq );
153
    DBG_ASSERT( nNum == getDisabledLanguages().getLength(), "size mismatch" );
154
#endif
155
0
}
156
157
Locale SAL_CALL LangGuess_Impl::guessPrimaryLanguage(
158
        const OUString& rText,
159
        ::sal_Int32 nStartPos,
160
        ::sal_Int32 nLen )
161
0
{
162
0
    std::scoped_lock aGuard( GetLangGuessMutex() );
163
164
0
    EnsureInitialized();
165
166
0
    if (nStartPos < 0 || nLen < 0 || nStartPos + nLen > rText.getLength())
167
0
        throw lang::IllegalArgumentException();
168
169
0
    OString o( OUStringToOString( rText.subView(nStartPos, nLen), RTL_TEXTENCODING_UTF8 ) );
170
0
    Guess g = m_aGuesser.GuessPrimaryLanguage(o.getStr());
171
0
    lang::Locale aRes;
172
0
    aRes.Language   = OUString::createFromAscii( g.GetLanguage() );
173
0
    aRes.Country    = OUString::createFromAscii( g.GetCountry() );
174
0
    return aRes;
175
0
}
176
177
0
#define DEFAULT_CONF_FILE_NAME "fpdb.conf"
178
179
void LangGuess_Impl::SetFingerPrintsDB(
180
        std::u16string_view filePath )
181
0
{
182
    //! text encoding for file name / path needs to be in the same encoding the OS uses
183
0
    OString path = OUStringToOString( filePath, osl_getThreadTextEncoding() );
184
0
    OString conf_file_path = path + DEFAULT_CONF_FILE_NAME;
185
186
0
    m_aGuesser.SetDBPath(conf_file_path.getStr(), path.getStr());
187
0
}
188
189
uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getAvailableLanguages(  )
190
0
{
191
0
    std::scoped_lock aGuard( GetLangGuessMutex() );
192
193
0
    EnsureInitialized();
194
195
0
    Sequence< css::lang::Locale > aRes;
196
0
    std::vector<Guess> gs = m_aGuesser.GetAllManagedLanguages();
197
0
    aRes.realloc(gs.size());
198
199
0
    css::lang::Locale *pRes = aRes.getArray();
200
201
0
    for(size_t i = 0; i < gs.size() ; i++ ){
202
0
        css::lang::Locale current_aRes;
203
0
        current_aRes.Language   = OUString::createFromAscii( gs[i].GetLanguage() );
204
0
        current_aRes.Country    = OUString::createFromAscii( gs[i].GetCountry() );
205
0
        pRes[i] = std::move(current_aRes);
206
0
    }
207
208
0
    return aRes;
209
0
}
210
211
uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getEnabledLanguages(  )
212
0
{
213
0
    std::scoped_lock aGuard( GetLangGuessMutex() );
214
215
0
    EnsureInitialized();
216
217
0
    Sequence< css::lang::Locale > aRes;
218
0
    std::vector<Guess> gs = m_aGuesser.GetAvailableLanguages();
219
0
    aRes.realloc(gs.size());
220
221
0
    css::lang::Locale *pRes = aRes.getArray();
222
223
0
    for(size_t i = 0; i < gs.size() ; i++ ){
224
0
        css::lang::Locale current_aRes;
225
0
        current_aRes.Language   = OUString::createFromAscii( gs[i].GetLanguage() );
226
0
        current_aRes.Country    = OUString::createFromAscii( gs[i].GetCountry() );
227
0
        pRes[i] = std::move(current_aRes);
228
0
    }
229
230
0
    return aRes;
231
0
}
232
233
uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getDisabledLanguages(  )
234
0
{
235
0
    std::scoped_lock aGuard( GetLangGuessMutex() );
236
237
0
    EnsureInitialized();
238
239
0
    Sequence< css::lang::Locale > aRes;
240
0
    std::vector<Guess> gs = m_aGuesser.GetUnavailableLanguages();
241
0
    aRes.realloc(gs.size());
242
243
0
    css::lang::Locale *pRes = aRes.getArray();
244
245
0
    for(size_t i = 0; i < gs.size() ; i++ ){
246
0
        css::lang::Locale current_aRes;
247
0
        current_aRes.Language   = OUString::createFromAscii( gs[i].GetLanguage() );
248
0
        current_aRes.Country    = OUString::createFromAscii( gs[i].GetCountry() );
249
0
        pRes[i] = std::move(current_aRes);
250
0
    }
251
252
0
    return aRes;
253
0
}
254
255
void SAL_CALL LangGuess_Impl::disableLanguages(
256
        const uno::Sequence< Locale >& rLanguages )
257
0
{
258
0
    std::scoped_lock aGuard( GetLangGuessMutex() );
259
260
0
    EnsureInitialized();
261
262
0
    for (const Locale& rLanguage : rLanguages)
263
0
    {
264
0
        std::string language;
265
266
0
        OString l = OUStringToOString( rLanguage.Language, RTL_TEXTENCODING_ASCII_US );
267
0
        OString c = OUStringToOString( rLanguage.Country, RTL_TEXTENCODING_ASCII_US );
268
269
0
        language += l.getStr();
270
0
        language += "-";
271
0
        language += c.getStr();
272
0
        m_aGuesser.DisableLanguage(language);
273
0
    }
274
0
}
275
276
void SAL_CALL LangGuess_Impl::enableLanguages(
277
        const uno::Sequence< Locale >& rLanguages )
278
0
{
279
0
    std::scoped_lock aGuard( GetLangGuessMutex() );
280
281
0
    EnsureInitialized();
282
283
0
    for (const Locale& rLanguage : rLanguages)
284
0
    {
285
0
        std::string language;
286
287
0
        OString l = OUStringToOString( rLanguage.Language, RTL_TEXTENCODING_ASCII_US );
288
0
        OString c = OUStringToOString( rLanguage.Country, RTL_TEXTENCODING_ASCII_US );
289
290
0
        language += l.getStr();
291
0
        language += "-";
292
0
        language += c.getStr();
293
0
        m_aGuesser.EnableLanguage(language);
294
0
    }
295
0
}
296
297
OUString SAL_CALL LangGuess_Impl::getImplementationName(  )
298
0
{
299
0
    return u"com.sun.star.lingu2.LanguageGuessing"_ustr;
300
0
}
301
302
sal_Bool SAL_CALL LangGuess_Impl::supportsService( const OUString& ServiceName )
303
0
{
304
0
    return cppu::supportsService(this, ServiceName);
305
0
}
306
307
Sequence<OUString> SAL_CALL LangGuess_Impl::getSupportedServiceNames(  )
308
0
{
309
0
    return { u"com.sun.star.linguistic2.LanguageGuessing"_ustr };
310
0
}
311
312
extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
313
lingucomponent_LangGuess_get_implementation(
314
    css::uno::XComponentContext* , css::uno::Sequence<css::uno::Any> const&)
315
0
{
316
0
    return cppu::acquire(new LangGuess_Impl());
317
0
}
318
319
320
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */