Coverage Report

Created: 2025-11-16 09:57

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libreoffice/lingucomponent/source/lingutil/lingutil.cxx
Line
Count
Source
1
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
/*
3
 * This file is part of the LibreOffice project.
4
 *
5
 * This Source Code Form is subject to the terms of the Mozilla Public
6
 * License, v. 2.0. If a copy of the MPL was not distributed with this
7
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
 *
9
 * This file incorporates work covered by the following license notice:
10
 *
11
 *   Licensed to the Apache Software Foundation (ASF) under one or more
12
 *   contributor license agreements. See the NOTICE file distributed
13
 *   with this work for additional information regarding copyright
14
 *   ownership. The ASF licenses this file to you under the Apache
15
 *   License, Version 2.0 (the "License"); you may not use this file
16
 *   except in compliance with the License. You may obtain a copy of
17
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18
 */
19
20
#if defined(_WIN32)
21
#if !defined WIN32_LEAN_AND_MEAN
22
# define WIN32_LEAN_AND_MEAN
23
#endif
24
#include <windows.h>
25
#endif
26
27
#include <osl/diagnose.h>
28
#include <osl/thread.h>
29
#include <osl/file.hxx>
30
#include <osl/process.h>
31
#include <tools/debug.hxx>
32
#include <tools/urlobj.hxx>
33
#include <i18nlangtag/languagetag.hxx>
34
#include <i18nlangtag/mslangid.hxx>
35
#include <unotools/bootstrap.hxx>
36
#include <unotools/lingucfg.hxx>
37
#include <unotools/pathoptions.hxx>
38
#include <rtl/bootstrap.hxx>
39
#include <rtl/ustring.hxx>
40
#include <rtl/string.hxx>
41
#include <rtl/tencinfo.h>
42
#include <linguistic/misc.hxx>
43
44
#include <set>
45
#include <vector>
46
#include <string.h>
47
48
#include "lingutil.hxx"
49
50
#include <sal/macros.h>
51
52
using namespace ::com::sun::star;
53
54
#if defined(_WIN32)
55
OString Win_AddLongPathPrefix( const OString &rPathName )
56
{
57
  constexpr OString WIN32_LONG_PATH_PREFIX = "\\\\?\\"_ostr;
58
  if (!rPathName.match(WIN32_LONG_PATH_PREFIX)) return WIN32_LONG_PATH_PREFIX + rPathName;
59
  return rPathName;
60
}
61
#endif //defined(_WIN32)
62
63
#if defined SYSTEM_DICTS || defined IOS
64
// find old style dictionaries in system directories
65
static void GetOldStyleDicsInDir(
66
    OUString const & aSystemDir, OUString const & aFormatName,
67
    std::u16string_view aSystemSuffix, std::u16string_view aSystemPrefix,
68
    std::set< OUString >& aDicLangInUse,
69
    std::vector< SvtLinguConfigDictionaryEntry >& aRes )
70
0
{
71
0
    osl::Directory aSystemDicts(aSystemDir);
72
0
    if (aSystemDicts.open() != osl::FileBase::E_None)
73
0
        return;
74
75
0
    osl::DirectoryItem aItem;
76
0
    osl::FileStatus aFileStatus(osl_FileStatus_Mask_FileURL);
77
0
    while (aSystemDicts.getNextItem(aItem) == osl::FileBase::E_None)
78
0
    {
79
0
        aItem.getFileStatus(aFileStatus);
80
0
        OUString sPath = aFileStatus.getFileURL();
81
0
        if (sPath.endsWith(aSystemSuffix))
82
0
        {
83
0
            sal_Int32 nStartIndex = sPath.lastIndexOf('/') + 1;
84
0
            if (!sPath.match(aSystemPrefix, nStartIndex))
85
0
                continue;
86
0
            OUString sChunk = sPath.copy(nStartIndex + aSystemPrefix.size(),
87
0
                sPath.getLength() - aSystemSuffix.size() -
88
0
                nStartIndex - aSystemPrefix.size());
89
0
            if (sChunk.isEmpty())
90
0
                continue;
91
92
            // We prefer (now) to use language tags.
93
            // Avoid feeding in the older LANG_REGION scheme to the BCP47
94
            // ctor as that triggers use of liblangtag and initializes its
95
            // database which we do not want during startup. Convert
96
            // instead.
97
0
            sChunk = sChunk.replace( '_', '-');
98
99
            // There's a known exception to the rule, the dreaded
100
            // hu_HU_u8.dic of the myspell-hu package, see
101
            // http://packages.debian.org/search?arch=any&searchon=contents&keywords=hu_HU_u8.dic
102
            // This was ignored because unknown in the old implementation,
103
            // truncate to the known locale and either insert because hu_HU
104
            // wasn't encountered yet, or skip because it was. It doesn't
105
            // really matter because the proper new-style hu_HU dictionary
106
            // will take precedence anyway if installed with a Hungarian
107
            // languagepack. Again, this is only to not pull in all
108
            // liblangtag and stuff during startup, the result would be
109
            // !isValidBcp47() and the dictionary ignored.
110
0
            if (sChunk == "hu-HU-u8")
111
0
                sChunk = "hu-HU";
112
113
0
            LanguageTag aLangTag(sChunk, true);
114
0
            if (!aLangTag.isValidBcp47())
115
0
                continue;
116
117
            // Thus we first get the language of the dictionary
118
0
            const OUString& aLocaleName(aLangTag.getBcp47());
119
120
0
            if (aDicLangInUse.insert(aLocaleName).second)
121
0
            {
122
                // add the dictionary to the resulting vector
123
0
                SvtLinguConfigDictionaryEntry aDicEntry;
124
0
                aDicEntry.aLocations = { sPath };
125
0
                aDicEntry.aFormatName = aFormatName;
126
0
                if (aLocaleName == u"ar")
127
0
                    aDicEntry.aLocaleNames = {
128
0
                        aLocaleName,
129
0
                        u"ar-AE"_ustr, u"ar-BH"_ustr, u"ar-DJ"_ustr, u"ar-DZ"_ustr, u"ar-EG"_ustr,
130
0
                        u"ar-ER"_ustr, u"ar-IL"_ustr, u"ar-IQ"_ustr, u"ar-JO"_ustr, u"ar-KM"_ustr,
131
0
                        u"ar-KW"_ustr, u"ar-LB"_ustr, u"ar-LY"_ustr, u"ar-MA"_ustr, u"ar-MR"_ustr,
132
0
                        u"ar-OM"_ustr, u"ar-PS"_ustr, u"ar-QA"_ustr, u"ar-SA"_ustr, u"ar-SD"_ustr,
133
0
                        u"ar-SO"_ustr, u"ar-SY"_ustr, u"ar-TD"_ustr, u"ar-TN"_ustr, u"ar-YE"_ustr
134
0
                    };
135
0
                else
136
0
                    aDicEntry.aLocaleNames = { aLocaleName };
137
0
                aRes.push_back(std::move(aDicEntry));
138
0
            }
139
0
        }
140
0
    }
141
0
}
142
#endif
143
144
// build list of old style dictionaries (not as extensions) to use.
145
// User installed dictionaries (the ones residing in the user paths)
146
// will get precedence over system installed ones for the same language.
147
std::vector< SvtLinguConfigDictionaryEntry > GetOldStyleDics( const char *pDicType )
148
0
{
149
0
    std::vector< SvtLinguConfigDictionaryEntry > aRes;
150
151
0
    if (!pDicType)
152
0
        return aRes;
153
154
0
    OUString aFormatName;
155
0
    OUString aDicExtension;
156
0
#if defined SYSTEM_DICTS || defined IOS
157
0
    OUString aSystemDir;
158
0
    OUString aSystemPrefix;
159
0
    OUString aSystemSuffix;
160
0
#endif
161
0
    if (strcmp( pDicType, "DICT" ) == 0)
162
0
    {
163
0
        aFormatName     = "DICT_SPELL";
164
0
        aDicExtension   = ".dic";
165
0
#ifdef SYSTEM_DICTS
166
0
        aSystemDir      = DICT_SYSTEM_DIR;
167
0
        aSystemSuffix   = aDicExtension;
168
#elif defined IOS
169
        aSystemDir      = "$BRAND_BASE_DIR/share/spell";
170
        rtl::Bootstrap::expandMacros(aSystemDir);
171
        aSystemSuffix   = ".dic";
172
#endif
173
0
    }
174
0
    else if (strcmp( pDicType, "HYPH" ) == 0)
175
0
    {
176
0
        aFormatName     = "DICT_HYPH";
177
0
        aDicExtension   = ".dic";
178
0
#ifdef SYSTEM_DICTS
179
0
        aSystemDir      = HYPH_SYSTEM_DIR;
180
0
        aSystemPrefix   = "hyph_";
181
0
        aSystemSuffix   = aDicExtension;
182
0
#endif
183
0
    }
184
0
    else if (strcmp( pDicType, "THES" ) == 0)
185
0
    {
186
0
        aFormatName     = "DICT_THES";
187
0
        aDicExtension   = ".dat";
188
0
#ifdef SYSTEM_DICTS
189
0
        aSystemDir      = THES_SYSTEM_DIR;
190
0
        aSystemPrefix   = "th_";
191
0
        aSystemSuffix   = "_v2.dat";
192
#elif defined IOS
193
        aSystemDir      = "$BRAND_BASE_DIR/share/thes";
194
        rtl::Bootstrap::expandMacros(aSystemDir);
195
        aSystemPrefix   = "th_";
196
        aSystemSuffix   = "_v2.dat";
197
#endif
198
0
    }
199
200
0
    if (aFormatName.isEmpty() || aDicExtension.isEmpty())
201
0
        return aRes;
202
203
0
#if defined SYSTEM_DICTS || defined IOS
204
    // set of languages to remember the language where it is already
205
    // decided to make use of the dictionary.
206
0
    std::set< OUString > aDicLangInUse;
207
208
0
#ifndef IOS
209
    // follow the hunspell tool's example and check DICPATH for preferred dictionaries
210
0
    rtl_uString * pSearchPath = nullptr;
211
0
    osl_getEnvironment(u"DICPATH"_ustr.pData, &pSearchPath);
212
213
0
    if (pSearchPath)
214
0
    {
215
0
        OUString aSearchPath(pSearchPath);
216
0
        rtl_uString_release(pSearchPath);
217
218
0
        sal_Int32 nIndex = 0;
219
0
        do
220
0
        {
221
0
            OUString aSystem( aSearchPath.getToken(0, ':', nIndex) );
222
0
            OUString aCWD;
223
0
            OUString aRelative;
224
0
            OUString aAbsolute;
225
226
0
            if (!utl::Bootstrap::getProcessWorkingDir(aCWD))
227
0
                continue;
228
0
            if (osl::FileBase::getFileURLFromSystemPath(aSystem, aRelative)
229
0
                    != osl::FileBase::E_None)
230
0
                continue;
231
0
            if (osl::FileBase::getAbsoluteFileURL(aCWD, aRelative, aAbsolute)
232
0
                    != osl::FileBase::E_None)
233
0
                continue;
234
235
            // GetOldStyleDicsInDir will make sure the dictionary is the right
236
            // type based on its prefix, that way hyphen, mythes and regular
237
            // dictionaries can live in one directory
238
0
            GetOldStyleDicsInDir(aAbsolute, aFormatName, aSystemSuffix,
239
0
                aSystemPrefix, aDicLangInUse, aRes);
240
0
        }
241
0
        while (nIndex != -1);
242
0
    }
243
0
#endif
244
245
    // load system directories last so that DICPATH prevails
246
0
    GetOldStyleDicsInDir(aSystemDir, aFormatName, aSystemSuffix, aSystemPrefix,
247
0
        aDicLangInUse, aRes);
248
0
#endif
249
250
0
    return aRes;
251
0
}
252
253
void MergeNewStyleDicsAndOldStyleDics(
254
    std::vector< SvtLinguConfigDictionaryEntry > &rNewStyleDics,
255
    const std::vector< SvtLinguConfigDictionaryEntry > &rOldStyleDics )
256
0
{
257
    // get list of languages supported by new style dictionaries
258
0
    std::set< OUString > aNewStyleLanguages;
259
0
    for (auto const& newStyleDic : rNewStyleDics)
260
0
    {
261
0
        const uno::Sequence< OUString > aLocaleNames(newStyleDic.aLocaleNames);
262
0
        sal_Int32 nLocaleNames = aLocaleNames.getLength();
263
0
        for (sal_Int32 k = 0;  k < nLocaleNames; ++k)
264
0
        {
265
0
            aNewStyleLanguages.insert( aLocaleNames[k] );
266
0
        }
267
0
    }
268
269
    // now check all old style dictionaries if they will add a not yet
270
    // added language. If so add them to the resulting vector
271
0
    for (auto const& oldStyleDic : rOldStyleDics)
272
0
    {
273
0
        sal_Int32 nOldStyleDics = oldStyleDic.aLocaleNames.getLength();
274
275
        // old style dics should only have one language listed...
276
0
        DBG_ASSERT( nOldStyleDics, "old style dictionary with more than one language found!");
277
0
        if (nOldStyleDics > 0)
278
0
        {
279
0
            if (linguistic::LinguIsUnspecified( oldStyleDic.aLocaleNames[0]))
280
0
            {
281
0
                OSL_FAIL( "old style dictionary with invalid language found!" );
282
0
                continue;
283
0
            }
284
285
            // language not yet added?
286
0
            if (aNewStyleLanguages.find( oldStyleDic.aLocaleNames[0] ) == aNewStyleLanguages.end())
287
0
                rNewStyleDics.push_back(oldStyleDic);
288
0
        }
289
0
        else
290
0
        {
291
0
            OSL_FAIL( "old style dictionary with no language found!" );
292
0
        }
293
0
    }
294
0
}
295
296
rtl_TextEncoding getTextEncodingFromCharset(const char* pCharset)
297
0
{
298
    // default result: used to indicate that we failed to get the proper encoding
299
0
    rtl_TextEncoding eRet = RTL_TEXTENCODING_DONTKNOW;
300
301
0
    if (pCharset)
302
0
    {
303
0
        eRet = rtl_getTextEncodingFromMimeCharset(pCharset);
304
0
        if (eRet == RTL_TEXTENCODING_DONTKNOW)
305
0
            eRet = rtl_getTextEncodingFromUnixCharset(pCharset);
306
0
        if (eRet == RTL_TEXTENCODING_DONTKNOW)
307
0
        {
308
0
            if (strcmp("ISCII-DEVANAGARI", pCharset) == 0)
309
0
                eRet = RTL_TEXTENCODING_ISCII_DEVANAGARI;
310
0
        }
311
0
    }
312
0
    return eRet;
313
0
}
314
315
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */