Coverage Report

Created: 2025-12-08 09:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libreoffice/unotools/source/i18n/textsearch.cxx
Line
Count
Source
1
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
/*
3
 * This file is part of the LibreOffice project.
4
 *
5
 * This Source Code Form is subject to the terms of the Mozilla Public
6
 * License, v. 2.0. If a copy of the MPL was not distributed with this
7
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
 *
9
 * This file incorporates work covered by the following license notice:
10
 *
11
 *   Licensed to the Apache Software Foundation (ASF) under one or more
12
 *   contributor license agreements. See the NOTICE file distributed
13
 *   with this work for additional information regarding copyright
14
 *   ownership. The ASF licenses this file to you under the Apache
15
 *   License, Version 2.0 (the "License"); you may not use this file
16
 *   except in compliance with the License. You may obtain a copy of
17
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18
 */
19
20
#include <sal/config.h>
21
22
#include <cstdlib>
23
#include <string_view>
24
25
#include <i18nlangtag/languagetag.hxx>
26
#include <i18nutil/searchopt.hxx>
27
#include <i18nutil/transliteration.hxx>
28
#include <com/sun/star/util/TextSearch2.hpp>
29
#include <com/sun/star/util/SearchAlgorithms2.hpp>
30
#include <com/sun/star/util/SearchFlags.hpp>
31
#include <unotools/charclass.hxx>
32
#include <comphelper/processfactory.hxx>
33
#include <unotools/textsearch.hxx>
34
#include <rtl/ustrbuf.hxx>
35
#include <comphelper/diagnose_ex.hxx>
36
#include <mutex>
37
38
using namespace ::com::sun::star::util;
39
using namespace ::com::sun::star::uno;
40
using namespace ::com::sun::star::lang;
41
42
namespace utl
43
{
44
45
SearchParam::SearchParam( const OUString &rText,
46
                                SearchType eType,
47
                                bool bCaseSensitive,
48
                                sal_uInt32 cWildEscChar,
49
                                bool bWildMatchSel )
50
48
{
51
48
    sSrchStr        = rText;
52
48
    m_eSrchType     = eType;
53
54
48
    m_cWildEscChar  = cWildEscChar;
55
56
48
    m_bCaseSense    = bCaseSensitive;
57
48
    m_bWildMatchSel = bWildMatchSel;
58
48
}
59
60
SearchParam::SearchParam( const SearchParam& rParam )
61
0
{
62
0
    sSrchStr        = rParam.sSrchStr;
63
0
    m_eSrchType     = rParam.m_eSrchType;
64
65
0
    m_cWildEscChar  = rParam.m_cWildEscChar;
66
67
0
    m_bCaseSense    = rParam.m_bCaseSense;
68
0
    m_bWildMatchSel = rParam.m_bWildMatchSel;
69
0
}
70
71
48
SearchParam::~SearchParam() {}
72
73
static bool lcl_Equals( const i18nutil::SearchOptions2& rSO1, const i18nutil::SearchOptions2& rSO2 )
74
48
{
75
48
    return
76
48
        rSO1.AlgorithmType2 == rSO2.AlgorithmType2 &&
77
46
        rSO1.WildcardEscapeCharacter == rSO2.WildcardEscapeCharacter &&
78
46
        rSO1.searchFlag == rSO2.searchFlag &&
79
46
        rSO1.searchString == rSO2.searchString &&
80
45
        rSO1.replaceString == rSO2.replaceString &&
81
45
        rSO1.changedChars == rSO2.changedChars &&
82
45
        rSO1.deletedChars == rSO2.deletedChars &&
83
45
        rSO1.insertedChars == rSO2.insertedChars &&
84
45
        rSO1.Locale.Language == rSO2.Locale.Language &&
85
45
        rSO1.Locale.Country == rSO2.Locale.Country &&
86
45
        rSO1.Locale.Variant == rSO2.Locale.Variant &&
87
45
        rSO1.transliterateFlags == rSO2.transliterateFlags;
88
48
}
89
90
namespace
91
{
92
    struct CachedTextSearch
93
    {
94
        std::mutex mutex;
95
        i18nutil::SearchOptions2 Options;
96
        css::uno::Reference< css::util::XTextSearch2 > xTextSearch;
97
    };
98
}
99
100
Reference<XTextSearch2> TextSearch::getXTextSearch( const i18nutil::SearchOptions2& rPara )
101
48
{
102
48
    static CachedTextSearch theCachedTextSearch;
103
104
48
    std::scoped_lock aGuard(theCachedTextSearch.mutex);
105
106
48
    if ( lcl_Equals(theCachedTextSearch.Options, rPara) )
107
45
        return theCachedTextSearch.xTextSearch;
108
109
3
    const Reference< XComponentContext >& xContext = ::comphelper::getProcessComponentContext();
110
3
    theCachedTextSearch.xTextSearch.set( ::TextSearch2::create(xContext) );
111
3
    theCachedTextSearch.xTextSearch->setOptions2( rPara.toUnoSearchOptions2() );
112
3
    theCachedTextSearch.Options = rPara;
113
114
3
    return theCachedTextSearch.xTextSearch;
115
48
}
116
117
TextSearch::TextSearch(const SearchParam & rParam, LanguageType eLang )
118
0
{
119
0
    if( LANGUAGE_NONE == eLang )
120
0
        eLang = LANGUAGE_SYSTEM;
121
0
    css::lang::Locale aLocale( LanguageTag::convertToLocale( eLang ) );
122
123
0
    Init( rParam, aLocale);
124
0
}
125
126
TextSearch::TextSearch(const SearchParam & rParam, const CharClass& rCClass )
127
48
{
128
48
    Init( rParam, rCClass.getLanguageTag().getLocale() );
129
48
}
130
131
TextSearch::TextSearch( const i18nutil::SearchOptions2& rPara )
132
0
{
133
0
    xTextSearch = getXTextSearch( rPara );
134
0
}
135
136
void TextSearch::Init( const SearchParam & rParam,
137
                        const css::lang::Locale& rLocale )
138
48
{
139
    // convert SearchParam to the UNO SearchOptions2
140
48
    i18nutil::SearchOptions2 aSOpt;
141
142
48
    switch( rParam.GetSrchType() )
143
48
    {
144
16
    case SearchParam::SearchType::Wildcard:
145
16
        aSOpt.AlgorithmType2 = SearchAlgorithms2::WILDCARD;
146
16
        aSOpt.WildcardEscapeCharacter = rParam.GetWildEscChar();
147
16
        if (rParam.IsWildMatchSel())
148
16
            aSOpt.searchFlag |= SearchFlags::WILD_MATCH_SELECTION;
149
16
        break;
150
151
0
    case SearchParam::SearchType::Regexp:
152
0
        aSOpt.AlgorithmType2 = SearchAlgorithms2::REGEXP;
153
0
        break;
154
155
32
    case SearchParam::SearchType::Normal:
156
32
        aSOpt.AlgorithmType2 = SearchAlgorithms2::ABSOLUTE;
157
32
        break;
158
159
0
    default:
160
0
        for (;;) std::abort();
161
48
    }
162
48
    aSOpt.searchString = rParam.GetSrchStr();
163
48
    aSOpt.replaceString = "";
164
48
    aSOpt.Locale = rLocale;
165
48
    aSOpt.transliterateFlags = TransliterationFlags::NONE;
166
48
    if( !rParam.IsCaseSensitive() )
167
48
    {
168
48
        aSOpt.searchFlag |= SearchFlags::ALL_IGNORE_CASE;
169
48
        aSOpt.transliterateFlags |= TransliterationFlags::IGNORE_CASE;
170
48
    }
171
172
48
    xTextSearch = getXTextSearch( aSOpt );
173
48
}
174
175
void TextSearch::SetLocale( const i18nutil::SearchOptions2& rOptions,
176
                            const css::lang::Locale& rLocale )
177
0
{
178
0
    i18nutil::SearchOptions2 aSOpt( rOptions );
179
0
    aSOpt.Locale = rLocale;
180
181
0
    xTextSearch = getXTextSearch( aSOpt );
182
0
}
183
184
TextSearch::~TextSearch()
185
48
{
186
48
}
187
188
/*
189
 * General search methods. These methods will call the respective
190
 * methods, such as ordinary string searching or regular expression
191
 * matching, using the method pointer.
192
 */
193
bool TextSearch::SearchForward( const OUString &rStr,
194
                    sal_Int32* pStart, sal_Int32* pEnd,
195
                    css::util::SearchResult* pRes)
196
1.19k
{
197
1.19k
    bool bRet = false;
198
1.19k
    try
199
1.19k
    {
200
1.19k
        if( xTextSearch.is() )
201
1.19k
        {
202
1.19k
            SearchResult aRet( xTextSearch->searchForward( rStr, *pStart, *pEnd ));
203
1.19k
            if( aRet.subRegExpressions > 0 )
204
210
            {
205
210
                bRet = true;
206
                // the XTextsearch returns in startOffset the higher position
207
                // and the endposition is always exclusive.
208
                // The caller of this function will have in startPos the
209
                // lower pos. and end
210
210
                *pStart = aRet.startOffset[ 0 ];
211
210
                *pEnd = aRet.endOffset[ 0 ];
212
210
                if( pRes )
213
0
                    *pRes = std::move(aRet);
214
210
            }
215
1.19k
        }
216
1.19k
    }
217
1.19k
    catch ( Exception& )
218
1.19k
    {
219
0
        TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
220
0
    }
221
1.19k
    return bRet;
222
1.19k
}
223
224
bool TextSearch::searchForward( const OUString &rStr )
225
0
{
226
0
    sal_Int32 pStart = 0;
227
0
    sal_Int32 pEnd = rStr.getLength();
228
229
0
    bool bResult = SearchForward(rStr, &pStart, &pEnd);
230
231
0
    return bResult;
232
0
}
233
234
bool TextSearch::SearchBackward( const OUString & rStr, sal_Int32* pStart,
235
                                sal_Int32* pEnd, SearchResult* pRes )
236
0
{
237
0
    bool bRet = false;
238
0
    try
239
0
    {
240
0
        if( xTextSearch.is() )
241
0
        {
242
0
            SearchResult aRet( xTextSearch->searchBackward( rStr, *pStart, *pEnd ));
243
0
            if( aRet.subRegExpressions )
244
0
            {
245
0
                bRet = true;
246
                // the XTextsearch returns in startOffset the higher position
247
                // and the endposition is always exclusive.
248
                // The caller of this function will have in startPos the
249
                // lower pos. and end
250
0
                *pEnd = aRet.startOffset[ 0 ];
251
0
                *pStart = aRet.endOffset[ 0 ];
252
0
                if( pRes )
253
0
                    *pRes = std::move(aRet);
254
0
            }
255
0
        }
256
0
    }
257
0
    catch ( Exception& )
258
0
    {
259
0
        TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
260
0
    }
261
0
    return bRet;
262
0
}
263
264
// static
265
void TextSearch::ReplaceBackReferences( OUString& rReplaceStr, std::u16string_view rStr, const SearchResult& rResult )
266
0
{
267
0
    if( rResult.subRegExpressions <= 0 )
268
0
        return;
269
270
0
    sal_Unicode sFndChar;
271
0
    sal_Int32 i;
272
0
    OUStringBuffer sBuff(rReplaceStr.getLength()*4);
273
0
    for(i = 0; i < rReplaceStr.getLength(); i++)
274
0
    {
275
0
        if( rReplaceStr[i] == '&')
276
0
        {
277
0
            sal_Int32 nStart = rResult.startOffset[0];
278
0
            sal_Int32 nLength = rResult.endOffset[0] - rResult.startOffset[0];
279
0
            sBuff.append(rStr.substr(nStart, nLength));
280
0
        }
281
0
        else if((i < rReplaceStr.getLength() - 1) && rReplaceStr[i] == '$')
282
0
        {
283
0
            sFndChar = rReplaceStr[ i + 1 ];
284
0
            switch(sFndChar)
285
0
            {   // placeholder for a backward reference?
286
0
            case '0':
287
0
            case '1':
288
0
            case '2':
289
0
            case '3':
290
0
            case '4':
291
0
            case '5':
292
0
            case '6':
293
0
            case '7':
294
0
            case '8':
295
0
            case '9':
296
0
                {
297
0
                    int j = sFndChar - '0'; // index
298
0
                    if(j < rResult.subRegExpressions)
299
0
                    {
300
0
                        sal_Int32 nSttReg = rResult.startOffset[j];
301
0
                        sal_Int32 nRegLen = rResult.endOffset[j];
302
0
                        if (nSttReg < 0 || nRegLen < 0) // A "not found" optional capture
303
0
                        {
304
0
                            nSttReg = nRegLen = 0; // Copy empty string
305
0
                        }
306
0
                        else if (nRegLen >= nSttReg)
307
0
                        {
308
0
                            nRegLen = nRegLen - nSttReg;
309
0
                        }
310
0
                        else
311
0
                        {
312
0
                            nRegLen = nSttReg - nRegLen;
313
0
                            nSttReg = rResult.endOffset[j];
314
0
                        }
315
                        // Copy reference from found string
316
0
                        sBuff.append(rStr.substr(nSttReg, nRegLen));
317
0
                    }
318
0
                    i += 1;
319
0
                }
320
0
                break;
321
0
            default:
322
0
                sBuff.append(OUStringChar(rReplaceStr[i]) + OUStringChar(rReplaceStr[i+1]));
323
0
                i += 1;
324
0
                break;
325
0
            }
326
0
        }
327
0
        else if((i < rReplaceStr.getLength() - 1) && rReplaceStr[i] == '\\')
328
0
        {
329
0
            sFndChar = rReplaceStr[ i+1 ];
330
0
            switch(sFndChar)
331
0
            {
332
0
            case '\\':
333
0
            case '&':
334
0
            case '$':
335
0
                sBuff.append(sFndChar);
336
0
                i+=1;
337
0
                break;
338
0
            case 't':
339
0
                sBuff.append('\t');
340
0
                i += 1;
341
0
                break;
342
0
            default:
343
0
                sBuff.append(OUStringChar(rReplaceStr[i]) + OUStringChar(rReplaceStr[i+1]));
344
0
                i += 1;
345
0
                break;
346
0
            }
347
0
        }
348
0
        else
349
0
        {
350
0
            sBuff.append(rReplaceStr[i]);
351
0
        }
352
0
    }
353
0
    rReplaceStr = sBuff.makeStringAndClear();
354
0
}
355
356
bool TextSearch::SimilaritySearch(const OUString& rString, const OUString& rSearchString,
357
                                  ::std::pair<sal_Int32, sal_Int32>& rSimilarityScore)
358
0
{
359
0
    sal_Int32 nScore = 0;
360
0
    sal_Int32 nFirstScore = GetSubstringSimilarity(rString, rSearchString, nScore, true);
361
0
    if (nFirstScore == -1)
362
0
        nFirstScore = GetSubstringSimilarity(rString, rSearchString, nScore, false);
363
0
    if (nFirstScore == -1)
364
0
    {
365
0
        if (rSearchString.getLength() == 1)
366
0
        {
367
0
            if (rString.startsWith(rSearchString))
368
0
                nFirstScore = nScore;
369
0
            else if (rString.endsWith(rSearchString))
370
0
                nFirstScore = nScore + 1;
371
0
            nScore += 2;
372
0
        }
373
0
        else if (rString.getLength() == 1 && rSearchString.getLength() < SMALL_STRING_THRESHOLD)
374
0
        {
375
0
            if (rSearchString.startsWith(rString))
376
0
                nFirstScore = nScore;
377
0
            else if (rSearchString.endsWith(rString))
378
0
                nFirstScore = nScore + 1;
379
0
            nScore += 2;
380
0
        }
381
0
    }
382
0
    sal_Int32 nSecondScore = GetWeightedLevenshteinDistance(rString, rSearchString);
383
384
0
    if (nFirstScore == -1 && nSecondScore >= WLD_THRESHOLD)
385
0
        return false;
386
387
0
    rSimilarityScore.first = (nFirstScore == -1) ? nScore : nFirstScore;
388
0
    rSimilarityScore.second = nSecondScore;
389
0
    return true;
390
0
}
391
392
sal_Int32 TextSearch::GetSubstringSimilarity(std::u16string_view rString,
393
                                             std::u16string_view rSearchString,
394
                                             sal_Int32& nInitialScore, const bool bFromStart)
395
0
{
396
0
    sal_Int32 nScore = -1;
397
0
    for (sal_Int32 length = rSearchString.length(); length > 1; length--)
398
0
    {
399
0
        sal_Int32 nStartPos = bFromStart ? 0 : rSearchString.length() - length;
400
0
        std::u16string_view rSearchSubString = rSearchString.substr(nStartPos, length);
401
0
        if (rString.starts_with(rSearchSubString))
402
0
        {
403
0
            nScore = nInitialScore;
404
0
            break;
405
0
        }
406
0
        else if (rString.ends_with(rSearchSubString))
407
0
        {
408
0
            nScore = nInitialScore + 1;
409
0
            break;
410
0
        }
411
0
        else if (rString.find(rSearchSubString) != std::u16string_view::npos)
412
0
        {
413
0
            nScore = nInitialScore + 2;
414
0
            break;
415
0
        }
416
0
        nInitialScore += 3;
417
0
    }
418
0
    return nScore;
419
0
}
420
421
sal_Int32 TextSearch::GetWeightedLevenshteinDistance(const OUString& rString,
422
                                                     const OUString& rSearchString)
423
0
{
424
0
    sal_Int32 n = rString.getLength();
425
0
    sal_Int32 m = rSearchString.getLength();
426
0
    std::vector<std::vector<sal_Int32>> ScoreDP(n + 1, std::vector<sal_Int32>(m + 1));
427
428
0
    for (sal_Int32 i = 0; i <= n; i++)
429
0
    {
430
0
        ScoreDP[i][0] = i;
431
0
    }
432
0
    for (sal_Int32 j = 0; j <= m; j++)
433
0
    {
434
0
        ScoreDP[0][j] = j;
435
0
    }
436
437
0
    for (sal_Int32 i = 1; i <= n; i++)
438
0
    {
439
0
        for (sal_Int32 j = 1; j <= m; j++)
440
0
        {
441
0
            sal_Int32& minE = ScoreDP[i][j];
442
0
            minE = ScoreDP[i - 1][j] + 1;
443
0
            minE = std::min(minE, ScoreDP[i][j - 1] + 1);
444
0
            if (rString[i - 1] != rSearchString[j - 1])
445
0
                minE = std::min(minE, ScoreDP[i - 1][j - 1] + 2);
446
0
            else
447
0
                minE = std::min(minE, ScoreDP[i - 1][j - 1]);
448
0
        }
449
0
    }
450
0
    return ScoreDP[n][m];
451
0
}
452
453
}   // namespace utl
454
455
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */