Coverage Report

Created: 2026-02-14 09:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libreoffice/i18npool/source/search/textsearch.cxx
Line
Count
Source
1
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
/*
3
 * This file is part of the LibreOffice project.
4
 *
5
 * This Source Code Form is subject to the terms of the Mozilla Public
6
 * License, v. 2.0. If a copy of the MPL was not distributed with this
7
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
 *
9
 * This file incorporates work covered by the following license notice:
10
 *
11
 *   Licensed to the Apache Software Foundation (ASF) under one or more
12
 *   contributor license agreements. See the NOTICE file distributed
13
 *   with this work for additional information regarding copyright
14
 *   ownership. The ASF licenses this file to you under the Apache
15
 *   License, Version 2.0 (the "License"); you may not use this file
16
 *   except in compliance with the License. You may obtain a copy of
17
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18
 */
19
20
#include "textsearch.hxx"
21
#include "levdis.hxx"
22
#include <com/sun/star/i18n/BreakIterator.hpp>
23
#include <com/sun/star/util/SearchAlgorithms2.hpp>
24
#include <com/sun/star/util/SearchFlags.hpp>
25
#include <com/sun/star/i18n/WordType.hpp>
26
#include <com/sun/star/i18n/ScriptType.hpp>
27
#include <com/sun/star/i18n/CharacterIteratorMode.hpp>
28
#include <com/sun/star/i18n/CharacterClassification.hpp>
29
#include <com/sun/star/i18n/KCharacterType.hpp>
30
#include <com/sun/star/i18n/Transliteration.hpp>
31
#include <cppuhelper/supportsservice.hxx>
32
#include <cppuhelper/weak.hxx>
33
#include <i18nutil/transliteration.hxx>
34
#include <rtl/ustrbuf.hxx>
35
#include <sal/log.hxx>
36
37
#include <unicode/regex.h>
38
39
using namespace ::com::sun::star::util;
40
using namespace ::com::sun::star::uno;
41
using namespace ::com::sun::star::lang;
42
using namespace ::com::sun::star::i18n;
43
using namespace ::com::sun::star;
44
45
const TransliterationFlags COMPLEX_TRANS_MASK =
46
    TransliterationFlags::ignoreBaFa_ja_JP |
47
    TransliterationFlags::ignoreIterationMark_ja_JP |
48
    TransliterationFlags::ignoreTiJi_ja_JP |
49
    TransliterationFlags::ignoreHyuByu_ja_JP |
50
    TransliterationFlags::ignoreSeZe_ja_JP |
51
    TransliterationFlags::ignoreIandEfollowedByYa_ja_JP |
52
    TransliterationFlags::ignoreKiKuFollowedBySa_ja_JP |
53
    TransliterationFlags::ignoreProlongedSoundMark_ja_JP;
54
55
namespace
56
{
57
TransliterationFlags maskComplexTrans( TransliterationFlags n )
58
0
{
59
    // IGNORE_KANA and FULLWIDTH_HALFWIDTH are simple but need to take effect
60
    // in complex transliteration.
61
0
    return
62
0
        n & (COMPLEX_TRANS_MASK |                       // all set ignore bits
63
0
        TransliterationFlags::IGNORE_KANA |            // plus IGNORE_KANA bit
64
0
        TransliterationFlags::FULLWIDTH_HALFWIDTH);    // and the FULLWIDTH_HALFWIDTH value
65
0
}
66
67
bool isComplexTrans( TransliterationFlags n )
68
6
{
69
6
    return bool(n & COMPLEX_TRANS_MASK);
70
6
}
71
72
TransliterationFlags maskSimpleTrans( TransliterationFlags n )
73
12
{
74
12
    return n & ~COMPLEX_TRANS_MASK;
75
12
}
76
77
bool isSimpleTrans( TransliterationFlags n )
78
9
{
79
9
    return bool(maskSimpleTrans(n));
80
9
}
81
82
// Regex patterns are case sensitive.
83
TransliterationFlags maskSimpleRegexTrans( TransliterationFlags n )
84
0
{
85
0
    TransliterationFlags m = (n & TransliterationFlags::IGNORE_MASK) & ~TransliterationFlags::IGNORE_CASE;
86
0
    TransliterationFlags v = n & TransliterationFlags::NON_IGNORE_MASK;
87
0
    if (v == TransliterationFlags::UPPERCASE_LOWERCASE || v == TransliterationFlags::LOWERCASE_UPPERCASE)
88
0
        v = TransliterationFlags::NONE;
89
0
    return (m | v) & ~COMPLEX_TRANS_MASK;
90
0
}
91
92
bool isSimpleRegexTrans( TransliterationFlags n )
93
0
{
94
0
    return bool(maskSimpleRegexTrans(n));
95
0
}
96
97
bool isReplacePunctuation( std::u16string_view rStr )
98
0
{
99
0
    return rStr.find(u'\u2018') != std::u16string_view::npos ||
100
0
           rStr.find(u'\u2019') != std::u16string_view::npos ||
101
0
           rStr.find(u'\u201A') != std::u16string_view::npos ||
102
0
           rStr.find(u'\u201B') != std::u16string_view::npos ||
103
0
           rStr.find(u'\u201C') != std::u16string_view::npos ||
104
0
           rStr.find(u'\u201D') != std::u16string_view::npos ||
105
0
           rStr.find(u'\u201E') != std::u16string_view::npos ||
106
0
           rStr.find(u'\u201F') != std::u16string_view::npos;
107
0
}
108
109
OUString replacePunctuation( const OUString &rStr )
110
0
{
111
0
    return rStr.replace(u'\u2018', '\'')
112
0
               .replace(u'\u2019', '\'')
113
0
               .replace(u'\u201A', '\'')
114
0
               .replace(u'\u201B', '\'')
115
0
               .replace(u'\u201C', '"')
116
0
               .replace(u'\u201D', '"')
117
0
               .replace(u'\u201E', '"')
118
0
               .replace(u'\u201F', '"');
119
0
}
120
};
121
122
TextSearch::TextSearch(const Reference < XComponentContext > & rxContext)
123
3
        : m_xContext( rxContext )
124
3
{
125
3
    SearchOptions2 aOpt;
126
3
    aOpt.AlgorithmType2 = SearchAlgorithms2::ABSOLUTE;
127
3
    aOpt.algorithmType = SearchAlgorithms_ABSOLUTE;
128
3
    aOpt.searchFlag = SearchFlags::ALL_IGNORE_CASE;
129
    //aOpt.Locale = ???;
130
3
    setOptions( aOpt );
131
3
}
132
133
TextSearch::~TextSearch()
134
3
{
135
3
    pRegexMatcher.reset();
136
3
    pWLD.reset();
137
3
    pJumpTable.reset();
138
3
    pJumpTable2.reset();
139
3
}
140
141
void TextSearch::setOptions2( const SearchOptions2& rOptions )
142
6
{
143
6
    std::unique_lock g(m_aMutex);
144
145
6
    aSrchPara = rOptions;
146
147
6
    pRegexMatcher.reset();
148
6
    pWLD.reset();
149
6
    pJumpTable.reset();
150
6
    pJumpTable2.reset();
151
6
    maWildcardReversePattern.clear();
152
6
    maWildcardReversePattern2.clear();
153
6
    TransliterationFlags transliterateFlags = static_cast<TransliterationFlags>(aSrchPara.transliterateFlags);
154
6
    bSearchApostrophe = false;
155
6
    bool bReplaceApostrophe = false;
156
6
    if (aSrchPara.AlgorithmType2 == SearchAlgorithms2::REGEXP)
157
0
    {
158
        // RESrchPrepare will consider aSrchPara.transliterateFlags when
159
        // picking the actual regex pattern
160
        // (sSrchStr|sSrchStr2|SearchOptions2::searchString) and setting
161
        // case-insensitivity. Create transliteration instance, if any, without
162
        // ignore-case so later in TextSearch::searchForward() the string to
163
        // match is not case-altered, leave case-(in)sensitive to regex engine.
164
0
        transliterateFlags &= ~TransliterationFlags::IGNORE_CASE;
165
0
    }
166
6
    else if ( aSrchPara.searchString.indexOf('\'') > - 1 || aSrchPara.searchString.indexOf('"') > - 1 )
167
0
    {
168
0
        bSearchApostrophe = true;
169
0
        bReplaceApostrophe = isReplacePunctuation(aSrchPara.searchString);
170
0
    }
171
172
    // Create Transliteration class
173
6
    if( isSimpleTrans( transliterateFlags) )
174
3
    {
175
3
        if( !xTranslit.is() )
176
3
            xTranslit.set( Transliteration::create( m_xContext ) );
177
3
        xTranslit->loadModule(
178
3
             static_cast<TransliterationModules>(maskSimpleTrans(transliterateFlags)),
179
3
             aSrchPara.Locale);
180
3
    }
181
3
    else if( xTranslit.is() )
182
0
        xTranslit = nullptr;
183
184
    // Create Transliteration for 2<->1, 2<->2 transliteration
185
6
    if ( isComplexTrans( transliterateFlags) )
186
0
    {
187
0
        if( !xTranslit2.is() )
188
0
            xTranslit2.set( Transliteration::create( m_xContext ) );
189
        // Load transliteration module
190
0
        xTranslit2->loadModule(
191
0
             static_cast<TransliterationModules>(maskComplexTrans(transliterateFlags)),
192
0
             aSrchPara.Locale);
193
0
    }
194
195
6
    if ( !xBreak.is() )
196
3
        xBreak = css::i18n::BreakIterator::create( m_xContext );
197
198
6
    sSrchStr = aSrchPara.searchString;
199
200
    // Transliterate search string.
201
6
    if (aSrchPara.AlgorithmType2 == SearchAlgorithms2::REGEXP)
202
0
    {
203
0
        if (isSimpleRegexTrans(transliterateFlags))
204
0
        {
205
0
            if (maskSimpleRegexTrans(transliterateFlags) !=
206
0
                    maskSimpleTrans(transliterateFlags))
207
0
            {
208
0
                css::uno::Reference< XExtendedTransliteration > xTranslitPattern(
209
0
                         Transliteration::create( m_xContext ));
210
0
                if (xTranslitPattern.is())
211
0
                {
212
0
                    xTranslitPattern->loadModule(
213
0
                            static_cast<TransliterationModules>(maskSimpleRegexTrans(transliterateFlags)),
214
0
                            aSrchPara.Locale);
215
0
                    sSrchStr = xTranslitPattern->transliterateString2String(
216
0
                            aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
217
0
                }
218
0
            }
219
0
            else
220
0
            {
221
0
                if (xTranslit.is())
222
0
                    sSrchStr = xTranslit->transliterateString2String(
223
0
                            aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
224
0
            }
225
            // xTranslit2 complex transliterated sSrchStr2 is not used in
226
            // regex, see TextSearch::searchForward() and
227
            // TextSearch::searchBackward()
228
0
        }
229
0
    }
230
6
    else
231
6
    {
232
6
        if ( xTranslit.is() && isSimpleTrans(transliterateFlags) )
233
3
            sSrchStr = xTranslit->transliterateString2String(
234
3
                    aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
235
236
6
        if ( xTranslit2.is() && isComplexTrans(transliterateFlags) )
237
0
            sSrchStr2 = xTranslit2->transliterateString2String(
238
0
                    aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
239
6
    }
240
241
6
    if ( bReplaceApostrophe )
242
0
        sSrchStr = replacePunctuation(sSrchStr);
243
244
    // Take the new SearchOptions2::AlgorithmType2 field and ignore
245
    // SearchOptions::algorithmType
246
6
    switch( aSrchPara.AlgorithmType2)
247
6
    {
248
0
        case SearchAlgorithms2::REGEXP:
249
0
            fnForward = &TextSearch::RESrchFrwrd;
250
0
            fnBackward = &TextSearch::RESrchBkwrd;
251
0
            RESrchPrepare( aSrchPara);
252
0
            break;
253
254
0
        case SearchAlgorithms2::APPROXIMATE:
255
0
            fnForward = &TextSearch::ApproxSrchFrwrd;
256
0
            fnBackward = &TextSearch::ApproxSrchBkwrd;
257
258
0
            pWLD.reset( new WLevDistance( sSrchStr.getStr(), aSrchPara.changedChars,
259
0
                    aSrchPara.insertedChars, aSrchPara.deletedChars,
260
0
                    0 != (SearchFlags::LEV_RELAXED & aSrchPara.searchFlag ) ) );
261
262
0
            nLimit = pWLD->GetLimit();
263
0
            break;
264
265
2
        case SearchAlgorithms2::WILDCARD:
266
2
            mcWildcardEscapeChar = static_cast<sal_uInt32>(aSrchPara.WildcardEscapeCharacter);
267
2
            mbWildcardAllowSubstring = ((aSrchPara.searchFlag & SearchFlags::WILD_MATCH_SELECTION) == 0);
268
2
            fnForward = &TextSearch::WildcardSrchFrwrd;
269
2
            fnBackward = &TextSearch::WildcardSrchBkwrd;
270
2
            break;
271
272
0
        default:
273
0
            SAL_WARN("i18npool","TextSearch::setOptions2 - default what?");
274
0
            [[fallthrough]];
275
4
        case SearchAlgorithms2::ABSOLUTE:
276
4
            fnForward = &TextSearch::NSrchFrwrd;
277
4
            fnBackward = &TextSearch::NSrchBkwrd;
278
4
            break;
279
6
    }
280
6
}
281
282
void TextSearch::setOptions( const SearchOptions& rOptions )
283
3
{
284
3
    sal_Int16 nAlgorithmType2;
285
3
    switch (rOptions.algorithmType)
286
3
    {
287
0
        case SearchAlgorithms_REGEXP:
288
0
            nAlgorithmType2 = SearchAlgorithms2::REGEXP;
289
0
            break;
290
0
        case SearchAlgorithms_APPROXIMATE:
291
0
            nAlgorithmType2 = SearchAlgorithms2::APPROXIMATE;
292
0
            break;
293
0
        default:
294
0
            SAL_WARN("i18npool","TextSearch::setOptions - default what?");
295
0
            [[fallthrough]];
296
3
        case SearchAlgorithms_ABSOLUTE:
297
3
            nAlgorithmType2 = SearchAlgorithms2::ABSOLUTE;
298
3
            break;
299
3
    }
300
    // It would be nice if an inherited struct had a ctor that takes an
301
    // instance of the object the struct derived from...
302
3
    SearchOptions2 aOptions2(
303
3
            rOptions.algorithmType,
304
3
            rOptions.searchFlag,
305
3
            rOptions.searchString,
306
3
            rOptions.replaceString,
307
3
            rOptions.Locale,
308
3
            rOptions.changedChars,
309
3
            rOptions.deletedChars,
310
3
            rOptions.insertedChars,
311
3
            rOptions.transliterateFlags,
312
3
            nAlgorithmType2,
313
3
            0   // no wildcard search, no escape character...
314
3
            );
315
3
    setOptions2( aOptions2);
316
3
}
317
318
static sal_Int32 FindPosInSeq_Impl( const Sequence <sal_Int32>& rOff, sal_Int32 nPos )
319
0
{
320
0
    auto pOff = std::find_if(rOff.begin(), rOff.end(),
321
0
        [nPos](const sal_Int32 nOff) { return nOff >= nPos; });
322
0
    return static_cast<sal_Int32>(std::distance(rOff.begin(), pOff));
323
0
}
324
325
SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
326
1.31k
{
327
1.31k
    std::unique_lock g(m_aMutex);
328
329
1.31k
    SearchResult sres;
330
331
1.31k
    OUString in_str(searchStr);
332
333
    // in non-regex mode, allow searching typographical apostrophe with the ASCII one
334
    // to avoid regression after using automatic conversion to U+2019 during typing in Writer
335
1.31k
    bool bReplaceApostrophe = bSearchApostrophe && isReplacePunctuation(in_str);
336
337
1.31k
    bUsePrimarySrchStr = true;
338
339
1.31k
    if ( xTranslit.is() )
340
1.31k
    {
341
        // apply normal transliteration (1<->1, 1<->0)
342
343
1.31k
        sal_Int32 nInStartPos = startPos;
344
1.31k
        if (pRegexMatcher && startPos > 0)
345
0
        {
346
            // tdf#89665, tdf#75806: An optimization to avoid transliterating the whole string, yet
347
            // transliterate enough of the leading text to allow sensible look-behind assertions.
348
            // 100 is chosen arbitrarily in the hope that look-behind assertions would largely fit.
349
            // See http://userguide.icu-project.org/strings/regexp for look-behind assertion syntax.
350
            // When search regex doesn't start with an assertion, 3 is to allow startPos to be in
351
            // the middle of a surrogate pair, preceded by another surrogate pair.
352
0
            const sal_Int32 nMaxLeadingLen = aSrchPara.searchString.startsWith("(?") ? 100 : 3;
353
0
            nInStartPos -= std::min(nMaxLeadingLen, startPos);
354
0
        }
355
1.31k
        sal_Int32 nInEndPos = endPos;
356
1.31k
        if (pRegexMatcher && endPos < searchStr.getLength())
357
0
        {
358
            // tdf#65038: ditto for look-ahead assertions
359
0
            const sal_Int32 nMaxTrailingLen = aSrchPara.searchString.endsWith(")") ? 100 : 3;
360
0
            nInEndPos += std::min(nMaxTrailingLen, searchStr.getLength() - endPos);
361
0
        }
362
363
1.31k
        css::uno::Sequence<sal_Int32> offset(nInEndPos - nInStartPos);
364
1.31k
        in_str = xTranslit->transliterate(searchStr, nInStartPos, nInEndPos - nInStartPos, offset);
365
366
1.31k
        if ( bReplaceApostrophe )
367
0
            in_str = replacePunctuation(in_str);
368
369
        // JP 20.6.2001: also the start and end positions must be corrected!
370
1.31k
        sal_Int32 newStartPos =
371
1.31k
            (startPos == 0) ? 0 : FindPosInSeq_Impl( offset, startPos );
372
373
1.31k
        sal_Int32 newEndPos = (endPos < searchStr.getLength())
374
1.31k
            ? FindPosInSeq_Impl( offset, endPos )
375
1.31k
            : in_str.getLength();
376
377
1.31k
        sres = (this->*fnForward)( g, in_str, newStartPos, newEndPos );
378
379
        // Map offsets back to untransliterated string.
380
1.31k
        const sal_Int32 nOffsets = offset.getLength();
381
1.31k
        if (nOffsets)
382
1.31k
        {
383
1.31k
            auto sres_startOffsetRange = asNonConstRange(sres.startOffset);
384
1.31k
            auto sres_endOffsetRange = asNonConstRange(sres.endOffset);
385
            // For regex nGroups is the number of groups+1 with group 0 being
386
            // the entire match.
387
1.31k
            const sal_Int32 nGroups = sres.startOffset.getLength();
388
1.50k
            for ( sal_Int32 k = 0; k < nGroups; k++ )
389
185
            {
390
185
                const sal_Int32 nStart = sres.startOffset[k];
391
                // Result offsets are negative (-1) if a group expression was
392
                // not matched.
393
185
                if (nStart >= 0)
394
185
                    sres_startOffsetRange[k] = (nStart < nOffsets ? offset[nStart] : (offset[nOffsets - 1] + 1));
395
                // JP 20.6.2001: end is ever exclusive and then don't return
396
                //               the position of the next character - return the
397
                //               next position behind the last found character!
398
                //               "a b c" find "b" must return 2,3 and not 2,4!!!
399
185
                const sal_Int32 nStop = sres.endOffset[k];
400
185
                if (nStop >= 0)
401
185
                {
402
185
                    if (nStop > 0)
403
185
                        sres_endOffsetRange[k] = offset[(nStop <= nOffsets ? nStop : nOffsets) - 1] + 1;
404
0
                    else
405
0
                        sres_endOffsetRange[k] = offset[0];
406
185
                }
407
185
            }
408
1.31k
        }
409
1.31k
    }
410
0
    else
411
0
    {
412
0
        if ( bReplaceApostrophe )
413
0
            in_str = in_str.replace(u'\u2019', '\'');
414
415
0
        sres = (this->*fnForward)( g, in_str, startPos, endPos );
416
0
    }
417
418
1.31k
    if ( xTranslit2.is() && aSrchPara.AlgorithmType2 != SearchAlgorithms2::REGEXP)
419
0
    {
420
0
        SearchResult sres2;
421
422
0
        in_str = searchStr;
423
0
        css::uno::Sequence <sal_Int32> offset( in_str.getLength());
424
425
0
        in_str = xTranslit2->transliterate( searchStr, 0, in_str.getLength(), offset );
426
427
0
        if( startPos )
428
0
            startPos = FindPosInSeq_Impl( offset, startPos );
429
430
0
        if( endPos < searchStr.getLength() )
431
0
            endPos = FindPosInSeq_Impl( offset, endPos );
432
0
        else
433
0
            endPos = in_str.getLength();
434
435
0
        bUsePrimarySrchStr = false;
436
0
        sres2 = (this->*fnForward)( g, in_str, startPos, endPos );
437
0
        auto sres2_startOffsetRange = asNonConstRange(sres2.startOffset);
438
0
        auto sres2_endOffsetRange = asNonConstRange(sres2.endOffset);
439
440
0
        for ( int k = 0; k < sres2.startOffset.getLength(); k++ )
441
0
        {
442
0
            if (sres2.startOffset[k])
443
0
                sres2_startOffsetRange[k] = offset[sres2.startOffset[k]-1] + 1;
444
0
            if (sres2.endOffset[k])
445
0
                sres2_endOffsetRange[k] = offset[sres2.endOffset[k]-1] + 1;
446
0
        }
447
448
        // pick first and long one
449
0
        if ( sres.subRegExpressions == 0)
450
0
            return sres2;
451
0
        if ( sres2.subRegExpressions == 1)
452
0
        {
453
0
            if ( sres.startOffset[0] > sres2.startOffset[0])
454
0
                return sres2;
455
0
            else if ( sres.startOffset[0] == sres2.startOffset[0] &&
456
0
                    sres.endOffset[0] < sres2.endOffset[0])
457
0
                return sres2;
458
0
        }
459
0
    }
460
461
1.31k
    return sres;
462
1.31k
}
463
464
SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
465
0
{
466
0
    std::unique_lock g(m_aMutex);
467
468
0
    SearchResult sres;
469
470
0
    OUString in_str(searchStr);
471
472
    // in non-regex mode, allow searching typographical apostrophe with the ASCII one
473
    // to avoid regression after using automatic conversion to U+2019 during typing in Writer
474
0
    bool bReplaceApostrophe = bSearchApostrophe && isReplacePunctuation(in_str);
475
476
0
    bUsePrimarySrchStr = true;
477
478
0
    if ( xTranslit.is() )
479
0
    {
480
        // apply only simple 1<->1 transliteration here
481
0
        css::uno::Sequence<sal_Int32> offset(startPos - endPos);
482
0
        in_str = xTranslit->transliterate( searchStr, endPos, startPos - endPos, offset );
483
484
0
        if ( bReplaceApostrophe )
485
0
            in_str = replacePunctuation(in_str);
486
487
        // JP 20.6.2001: also the start and end positions must be corrected!
488
0
        sal_Int32 const newStartPos = (startPos < searchStr.getLength())
489
0
            ? FindPosInSeq_Impl( offset, startPos )
490
0
            : in_str.getLength();
491
492
0
        sal_Int32 const newEndPos =
493
0
            (endPos == 0) ? 0 : FindPosInSeq_Impl( offset, endPos );
494
495
        // TODO: this would need nExtraOffset handling to avoid $ matching
496
        // if (pRegexMatcher && startPos < searchStr.getLength())
497
        // but that appears to be impossible with ICU regex
498
499
0
        sres = (this->*fnBackward)( g, in_str, newStartPos, newEndPos );
500
501
        // Map offsets back to untransliterated string.
502
0
        const sal_Int32 nOffsets = offset.getLength();
503
0
        if (nOffsets)
504
0
        {
505
0
            auto sres_startOffsetRange = asNonConstRange(sres.startOffset);
506
0
            auto sres_endOffsetRange = asNonConstRange(sres.endOffset);
507
            // For regex nGroups is the number of groups+1 with group 0 being
508
            // the entire match.
509
0
            const sal_Int32 nGroups = sres.startOffset.getLength();
510
0
            for ( sal_Int32 k = 0; k < nGroups; k++ )
511
0
            {
512
0
                const sal_Int32 nStart = sres.startOffset[k];
513
                // Result offsets are negative (-1) if a group expression was
514
                // not matched.
515
0
                if (nStart >= 0)
516
0
                {
517
0
                    if (nStart > 0)
518
0
                        sres_startOffsetRange[k] = offset[(nStart <= nOffsets ? nStart : nOffsets) - 1] + 1;
519
0
                    else
520
0
                        sres_startOffsetRange[k] = offset[0];
521
0
                }
522
                // JP 20.6.2001: end is ever exclusive and then don't return
523
                //               the position of the next character - return the
524
                //               next position behind the last found character!
525
                //               "a b c" find "b" must return 2,3 and not 2,4!!!
526
0
                const sal_Int32 nStop = sres.endOffset[k];
527
0
                if (nStop >= 0)
528
0
                    sres_endOffsetRange[k] = (nStop < nOffsets ? offset[nStop] : (offset[nOffsets - 1] + 1));
529
0
            }
530
0
        }
531
0
    }
532
0
    else
533
0
    {
534
0
        if ( bReplaceApostrophe )
535
0
            in_str = replacePunctuation(in_str);
536
537
0
        sres = (this->*fnBackward)( g, in_str, startPos, endPos );
538
0
    }
539
540
0
    if ( xTranslit2.is() && aSrchPara.AlgorithmType2 != SearchAlgorithms2::REGEXP )
541
0
    {
542
0
        SearchResult sres2;
543
544
0
        in_str = searchStr;
545
0
        css::uno::Sequence <sal_Int32> offset( in_str.getLength());
546
547
0
        in_str = xTranslit2->transliterate(searchStr, 0, in_str.getLength(), offset);
548
549
0
        if( startPos < searchStr.getLength() )
550
0
            startPos = FindPosInSeq_Impl( offset, startPos );
551
0
        else
552
0
            startPos = in_str.getLength();
553
554
0
        if( endPos )
555
0
            endPos = FindPosInSeq_Impl( offset, endPos );
556
557
0
        bUsePrimarySrchStr = false;
558
0
        sres2 = (this->*fnBackward)( g, in_str, startPos, endPos );
559
0
        auto sres2_startOffsetRange = asNonConstRange(sres2.startOffset);
560
0
        auto sres2_endOffsetRange = asNonConstRange(sres2.endOffset);
561
562
0
        for( int k = 0; k < sres2.startOffset.getLength(); k++ )
563
0
        {
564
0
            if (sres2.startOffset[k])
565
0
                sres2_startOffsetRange[k] = offset[sres2.startOffset[k]-1]+1;
566
0
            if (sres2.endOffset[k])
567
0
                sres2_endOffsetRange[k] = offset[sres2.endOffset[k]-1]+1;
568
0
        }
569
570
        // pick last and long one
571
0
        if ( sres.subRegExpressions == 0 )
572
0
            return sres2;
573
0
        if ( sres2.subRegExpressions == 1 )
574
0
        {
575
0
            if ( sres.startOffset[0] < sres2.startOffset[0] )
576
0
                return sres2;
577
0
            if ( sres.startOffset[0] == sres2.startOffset[0] &&
578
0
                    sres.endOffset[0] > sres2.endOffset[0] )
579
0
                return sres2;
580
0
        }
581
0
    }
582
583
0
    return sres;
584
0
}
585
586
587
bool TextSearch::IsDelimiter( const OUString& rStr, sal_Int32 nPos ) const
588
0
{
589
0
    bool bRet = true;
590
0
    if( '\x7f' != rStr[nPos])
591
0
    {
592
0
        if ( !xCharClass.is() )
593
0
             xCharClass = CharacterClassification::create( m_xContext );
594
0
        sal_Int32 nCType = xCharClass->getCharacterType( rStr, nPos,
595
0
                aSrchPara.Locale );
596
0
        if( 0 != (( KCharacterType::DIGIT | KCharacterType::ALPHA |
597
0
                        KCharacterType::LETTER ) & nCType ) )
598
0
            bRet = false;
599
0
    }
600
0
    return bRet;
601
0
}
602
603
// --------- helper methods for Boyer-Moore like text searching ----------
604
// TODO: use ICU's regex UREGEX_LITERAL mode instead when it becomes available
605
606
void TextSearch::MakeForwardTab()
607
24
{
608
    // create the jumptable for the search text
609
610
24
    if( pJumpTable && bIsForwardTab )
611
23
    {
612
23
        return; // the jumpTable is ok
613
23
    }
614
1
    bIsForwardTab = true;
615
616
1
    sal_Int32 n, nLen = sSrchStr.getLength();
617
1
    pJumpTable.reset( new TextSearchJumpTable );
618
619
9
    for( n = 0; n < nLen - 1; ++n )
620
8
    {
621
8
        sal_Unicode cCh = sSrchStr[n];
622
8
        sal_Int32 nDiff = nLen - n - 1;
623
8
        TextSearchJumpTable::value_type aEntry( cCh, nDiff );
624
625
8
        ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
626
8
            pJumpTable->insert( aEntry );
627
8
        if ( !aPair.second )
628
1
            (*(aPair.first)).second = nDiff;
629
8
    }
630
1
}
631
632
void TextSearch::MakeForwardTab2()
633
0
{
634
    // create the jumptable for the search text
635
0
    if( pJumpTable2 && bIsForwardTab )
636
0
    {
637
0
        return;        // the jumpTable is ok
638
0
    }
639
0
    bIsForwardTab = true;
640
641
0
    sal_Int32 n, nLen = sSrchStr2.getLength();
642
0
    pJumpTable2.reset( new TextSearchJumpTable );
643
644
0
    for( n = 0; n < nLen - 1; ++n )
645
0
    {
646
0
        sal_Unicode cCh = sSrchStr2[n];
647
0
        sal_Int32 nDiff = nLen - n - 1;
648
649
0
        TextSearchJumpTable::value_type aEntry( cCh, nDiff );
650
0
        ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
651
0
            pJumpTable2->insert( aEntry );
652
0
        if ( !aPair.second )
653
0
            (*(aPair.first)).second = nDiff;
654
0
    }
655
0
}
656
657
void TextSearch::MakeBackwardTab()
658
0
{
659
    // create the jumptable for the search text
660
0
    if( pJumpTable && !bIsForwardTab)
661
0
    {
662
0
        return;   // the jumpTable is ok
663
0
    }
664
0
    bIsForwardTab = false;
665
666
0
    sal_Int32 n, nLen = sSrchStr.getLength();
667
0
    pJumpTable.reset( new TextSearchJumpTable );
668
669
0
    for( n = nLen-1; n > 0; --n )
670
0
    {
671
0
        sal_Unicode cCh = sSrchStr[n];
672
0
        TextSearchJumpTable::value_type aEntry( cCh, n );
673
0
        ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
674
0
            pJumpTable->insert( aEntry );
675
0
        if ( !aPair.second )
676
0
            (*(aPair.first)).second = n;
677
0
    }
678
0
}
679
680
void TextSearch::MakeBackwardTab2()
681
0
{
682
    // create the jumptable for the search text
683
0
    if( pJumpTable2 && !bIsForwardTab )
684
0
    {
685
0
        return;    // the jumpTable is ok
686
0
    }
687
0
    bIsForwardTab = false;
688
689
0
    sal_Int32 n, nLen = sSrchStr2.getLength();
690
0
    pJumpTable2.reset( new TextSearchJumpTable );
691
692
0
    for( n = nLen-1; n > 0; --n )
693
0
    {
694
0
        sal_Unicode cCh = sSrchStr2[n];
695
0
        TextSearchJumpTable::value_type aEntry( cCh, n );
696
0
        ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
697
0
            pJumpTable2->insert( aEntry );
698
0
        if ( !aPair.second )
699
0
            (*(aPair.first)).second = n;
700
0
    }
701
0
}
702
703
sal_Int32 TextSearch::GetDiff( const sal_Unicode cChr ) const
704
165
{
705
165
    TextSearchJumpTable *pJump;
706
165
    OUString sSearchKey;
707
708
165
    if ( bUsePrimarySrchStr ) {
709
165
        pJump = pJumpTable.get();
710
165
        sSearchKey = sSrchStr;
711
165
    } else {
712
0
        pJump = pJumpTable2.get();
713
0
        sSearchKey = sSrchStr2;
714
0
    }
715
716
165
    TextSearchJumpTable::const_iterator iLook = pJump->find( cChr );
717
165
    if ( iLook == pJump->end() )
718
120
        return sSearchKey.getLength();
719
45
    return (*iLook).second;
720
165
}
721
722
723
SearchResult TextSearch::NSrchFrwrd( std::unique_lock<std::mutex>& /*rGuard*/, const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
724
24
{
725
24
    SearchResult aRet;
726
24
    aRet.subRegExpressions = 0;
727
728
24
    OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2;
729
730
24
    sal_Int32 nSuchIdx = searchStr.getLength();
731
24
    sal_Int32 nEnd = endPos;
732
24
    if( !nSuchIdx || !sSearchKey.getLength() || sSearchKey.getLength() > nSuchIdx )
733
0
        return aRet;
734
735
736
24
    if( nEnd < sSearchKey.getLength() )   // position inside the search region ?
737
0
        return aRet;
738
739
24
    nEnd -= sSearchKey.getLength();
740
741
24
    if (bUsePrimarySrchStr)
742
24
      MakeForwardTab();                   // create the jumptable
743
0
    else
744
0
      MakeForwardTab2();
745
746
24
    for (sal_Int32 nCmpIdx = startPos; // start position for the search
747
189
            nCmpIdx <= nEnd;
748
165
            nCmpIdx += GetDiff( searchStr[nCmpIdx + sSearchKey.getLength()-1]))
749
180
    {
750
180
        nSuchIdx = sSearchKey.getLength() - 1;
751
315
        while( nSuchIdx >= 0 && sSearchKey[nSuchIdx] == searchStr[nCmpIdx + nSuchIdx])
752
150
        {
753
150
            if( nSuchIdx == 0 )
754
15
            {
755
15
                if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag )
756
0
                {
757
0
                    sal_Int32 nFndEnd = nCmpIdx + sSearchKey.getLength();
758
0
                    bool bAtStart = !nCmpIdx;
759
0
                    bool bAtEnd = nFndEnd == endPos;
760
0
                    bool bDelimBefore = bAtStart || IsDelimiter( searchStr, nCmpIdx-1 );
761
0
                    bool bDelimBehind = bAtEnd || IsDelimiter(  searchStr, nFndEnd );
762
                    //  *       1 -> only one word in the paragraph
763
                    //  *       2 -> at begin of paragraph
764
                    //  *       3 -> at end of paragraph
765
                    //  *       4 -> inside the paragraph
766
0
                    if( !(  ( bAtStart && bAtEnd ) ||           // 1
767
0
                                ( bAtStart && bDelimBehind ) ||     // 2
768
0
                                ( bAtEnd && bDelimBefore ) ||       // 3
769
0
                                ( bDelimBefore && bDelimBehind )))  // 4
770
0
                        break;
771
0
                }
772
773
15
                aRet.subRegExpressions = 1;
774
15
                aRet.startOffset = { nCmpIdx };
775
15
                aRet.endOffset = { nCmpIdx + sSearchKey.getLength() };
776
777
15
                return aRet;
778
15
            }
779
135
            else
780
135
                nSuchIdx--;
781
150
        }
782
180
    }
783
9
    return aRet;
784
24
}
785
786
SearchResult TextSearch::NSrchBkwrd( std::unique_lock<std::mutex>& /*rGuard*/,const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
787
0
{
788
0
    SearchResult aRet;
789
0
    aRet.subRegExpressions = 0;
790
791
0
    OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2;
792
793
0
    sal_Int32 nSuchIdx = searchStr.getLength();
794
0
    sal_Int32 nEnd = endPos;
795
0
    if( nSuchIdx == 0 || sSearchKey.isEmpty() || sSearchKey.getLength() > nSuchIdx)
796
0
        return aRet;
797
798
0
    if (bUsePrimarySrchStr)
799
0
        MakeBackwardTab();                  // create the jumptable
800
0
    else
801
0
        MakeBackwardTab2();
802
803
0
    if( nEnd == nSuchIdx )                  // end position for the search
804
0
        nEnd = sSearchKey.getLength();
805
0
    else
806
0
        nEnd += sSearchKey.getLength();
807
808
0
    sal_Int32 nCmpIdx = startPos;          // start position for the search
809
810
0
    while (nCmpIdx >= nEnd)
811
0
    {
812
0
        nSuchIdx = 0;
813
0
        while( nSuchIdx < sSearchKey.getLength() && sSearchKey[nSuchIdx] ==
814
0
                searchStr[nCmpIdx + nSuchIdx - sSearchKey.getLength()] )
815
0
            nSuchIdx++;
816
0
        if( nSuchIdx >= sSearchKey.getLength() )
817
0
        {
818
0
            if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag )
819
0
            {
820
0
                sal_Int32 nFndStt = nCmpIdx - sSearchKey.getLength();
821
0
                bool bAtStart = !nFndStt;
822
0
                bool bAtEnd = nCmpIdx == startPos;
823
0
                bool bDelimBehind = bAtEnd || IsDelimiter( searchStr, nCmpIdx );
824
0
                bool bDelimBefore = bAtStart || // begin of paragraph
825
0
                    IsDelimiter( searchStr, nFndStt-1 );
826
                //  *       1 -> only one word in the paragraph
827
                //  *       2 -> at begin of paragraph
828
                //  *       3 -> at end of paragraph
829
                //  *       4 -> inside the paragraph
830
0
                if( ( bAtStart && bAtEnd ) ||           // 1
831
0
                        ( bAtStart && bDelimBehind ) ||     // 2
832
0
                        ( bAtEnd && bDelimBefore ) ||       // 3
833
0
                        ( bDelimBefore && bDelimBehind ))   // 4
834
0
                {
835
0
                    aRet.subRegExpressions = 1;
836
0
                    aRet.startOffset = { nCmpIdx };
837
0
                    aRet.endOffset = { nCmpIdx - sSearchKey.getLength() };
838
0
                    return aRet;
839
0
                }
840
0
            }
841
0
            else
842
0
            {
843
0
                aRet.subRegExpressions = 1;
844
0
                aRet.startOffset = { nCmpIdx };
845
0
                aRet.endOffset = { nCmpIdx - sSearchKey.getLength() };
846
0
                return aRet;
847
0
            }
848
0
        }
849
0
        nSuchIdx = GetDiff( searchStr[nCmpIdx - sSearchKey.getLength()] );
850
0
        if( nCmpIdx < nSuchIdx )
851
0
            return aRet;
852
0
        nCmpIdx -= nSuchIdx;
853
0
    }
854
0
    return aRet;
855
0
}
856
857
void TextSearch::RESrchPrepare( const css::util::SearchOptions2& rOptions)
858
0
{
859
0
    TransliterationFlags transliterateFlags = static_cast<TransliterationFlags>(rOptions.transliterateFlags);
860
    // select the transliterated pattern string
861
0
    const OUString& rPatternStr =
862
0
        (isSimpleTrans(transliterateFlags) ? sSrchStr
863
0
        : (isComplexTrans(transliterateFlags) ? sSrchStr2 : rOptions.searchString));
864
865
0
    sal_uInt32 nIcuSearchFlags = UREGEX_UWORD; // request UAX#29 unicode capability
866
    // map css::util::SearchFlags to ICU uregex.h flags
867
    // TODO: REG_EXTENDED, REG_NOT_BEGINOFLINE, REG_NOT_ENDOFLINE
868
    // REG_NEWLINE is neither properly defined nor used anywhere => not implemented
869
    // REG_NOSUB is not used anywhere => not implemented
870
    // NORM_WORD_ONLY is only used for SearchAlgorithm==Absolute
871
    // LEV_RELAXED is only used for SearchAlgorithm==Approximate
872
    // Note that the search flag ALL_IGNORE_CASE is deprecated in UNO
873
    // probably because the transliteration flag IGNORE_CASE handles it as well.
874
0
    if( (rOptions.searchFlag & css::util::SearchFlags::ALL_IGNORE_CASE) != 0
875
0
    ||  (transliterateFlags & TransliterationFlags::IGNORE_CASE))
876
0
        nIcuSearchFlags |= UREGEX_CASE_INSENSITIVE;
877
0
    UErrorCode nIcuErr = U_ZERO_ERROR;
878
    // assumption: transliteration didn't mangle regexp control chars
879
0
    icu::UnicodeString aIcuSearchPatStr( reinterpret_cast<const UChar*>(rPatternStr.getStr()), rPatternStr.getLength());
880
0
#ifndef DISABLE_WORDBOUND_EMULATION
881
    // for convenience specific syntax elements of the old regex engine are emulated
882
    // - by replacing \< with "word-break followed by a look-ahead word-char"
883
0
    static const icu::UnicodeString aChevronPatternB( "\\\\<", -1, icu::UnicodeString::kInvariant);
884
0
    static const icu::UnicodeString aChevronReplaceB( "\\\\b(?=\\\\w)", -1, icu::UnicodeString::kInvariant);
885
0
    static icu::RegexMatcher aChevronMatcherB( aChevronPatternB, 0, nIcuErr);
886
0
    aChevronMatcherB.reset( aIcuSearchPatStr);
887
0
    aIcuSearchPatStr = aChevronMatcherB.replaceAll( aChevronReplaceB, nIcuErr);
888
0
    aChevronMatcherB.reset();
889
    // - by replacing \> with "look-behind word-char followed by a word-break"
890
0
    static const icu::UnicodeString aChevronPatternE( "\\\\>", -1, icu::UnicodeString::kInvariant);
891
0
    static const icu::UnicodeString aChevronReplaceE( "(?<=\\\\w)\\\\b", -1, icu::UnicodeString::kInvariant);
892
0
    static icu::RegexMatcher aChevronMatcherE( aChevronPatternE, 0, nIcuErr);
893
0
    aChevronMatcherE.reset( aIcuSearchPatStr);
894
0
    aIcuSearchPatStr = aChevronMatcherE.replaceAll( aChevronReplaceE, nIcuErr);
895
0
    aChevronMatcherE.reset();
896
0
#endif
897
0
    pRegexMatcher.reset( new icu::RegexMatcher( aIcuSearchPatStr, nIcuSearchFlags, nIcuErr) );
898
0
    if (nIcuErr)
899
0
    {
900
0
        SAL_INFO( "i18npool", "TextSearch::RESrchPrepare UErrorCode " << nIcuErr);
901
0
        pRegexMatcher.reset();
902
0
    }
903
0
    else
904
0
    {
905
        // Pathological patterns may result in exponential run time making the
906
        // application appear to be frozen. Limit that. Documentation for this
907
        // call says
908
        // https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/classicu_1_1RegexMatcher.html#a6ebcfcab4fe6a38678c0291643a03a00
909
        // "The units of the limit are steps of the match engine.
910
        // Correspondence with actual processor time will depend on the speed
911
        // of the processor and the details of the specific pattern, but will
912
        // typically be on the order of milliseconds."
913
        // Just what is a good value? 42 is always an answer ... the 23 enigma
914
        // as well... which on the dev's machine is roughly 50 seconds with the
915
        // pattern of fdo#70627.
916
        /* TODO: make this a configuration settable value and possibly take
917
         * complexity of expression into account and maybe even length of text
918
         * to be matched; currently (2013-11-25) that is at most one 64k
919
         * paragraph per RESrchFrwrd()/RESrchBkwrd() call. */
920
0
        pRegexMatcher->setTimeLimit( 23*1000, nIcuErr);
921
0
    }
922
0
}
923
924
925
static bool lcl_findRegex(std::unique_ptr<icu::RegexMatcher> const& pRegexMatcher,
926
                          sal_Int32 nStartPos, sal_Int32 nEndPos, UErrorCode& rIcuErr)
927
0
{
928
0
    pRegexMatcher->region(nStartPos, nEndPos, rIcuErr);
929
0
    pRegexMatcher->useAnchoringBounds(false); // use whole text's anchoring bounds, not region's
930
0
    pRegexMatcher->useTransparentBounds(true); // take text outside of the region into account for
931
                                               // look-ahead/behind assertions
932
933
0
    if (!pRegexMatcher->find(rIcuErr))
934
0
    {
935
        /* TODO: future versions could pass the UErrorCode or translations
936
         * thereof to the caller, for example to inform the user of
937
         * U_REGEX_TIME_OUT. The strange thing though is that an error is set
938
         * only after the second call that returns immediately and not if
939
         * timeout occurred on the first call?!? */
940
0
        SAL_INFO( "i18npool", "lcl_findRegex UErrorCode " << rIcuErr);
941
0
        return false;
942
0
    }
943
0
    return true;
944
0
}
945
946
SearchResult TextSearch::RESrchFrwrd( std::unique_lock<std::mutex>& /*rGuard*/, const OUString& searchStr,
947
                                      sal_Int32 startPos, sal_Int32 endPos )
948
0
{
949
0
    SearchResult aRet;
950
0
    aRet.subRegExpressions = 0;
951
0
    if( !pRegexMatcher)
952
0
        return aRet;
953
954
0
    if( endPos > searchStr.getLength())
955
0
        endPos = searchStr.getLength();
956
957
    // use the ICU RegexMatcher to find the matches
958
0
    UErrorCode nIcuErr = U_ZERO_ERROR;
959
0
    const icu::UnicodeString aSearchTargetStr(false, reinterpret_cast<const UChar*>(searchStr.getStr()),
960
0
                                        searchStr.getLength());
961
0
    pRegexMatcher->reset( aSearchTargetStr);
962
    // search until there is a valid match
963
0
    for(;;)
964
0
    {
965
0
        if (!lcl_findRegex( pRegexMatcher, startPos, endPos, nIcuErr))
966
0
            return aRet;
967
968
        // #i118887# ignore zero-length matches e.g. "a*" in "bc"
969
0
        int nStartOfs = pRegexMatcher->start( nIcuErr);
970
0
        int nEndOfs = pRegexMatcher->end( nIcuErr);
971
0
        if( nStartOfs < nEndOfs)
972
0
            break;
973
        // If the zero-length match is behind the string, do not match it again
974
        // and again until startPos reaches there. A match behind the string is
975
        // a "$" anchor.
976
0
        if (nStartOfs == endPos)
977
0
            break;
978
        // try at next position if there was a zero-length match
979
0
        if( ++startPos >= endPos)
980
0
            return aRet;
981
0
    }
982
983
    // extract the result of the search
984
0
    const int nGroupCount = pRegexMatcher->groupCount();
985
0
    aRet.subRegExpressions = nGroupCount + 1;
986
0
    aRet.startOffset.realloc( aRet.subRegExpressions);
987
0
    auto pstartOffset = aRet.startOffset.getArray();
988
0
    aRet.endOffset.realloc( aRet.subRegExpressions);
989
0
    auto pendOffset = aRet.endOffset.getArray();
990
0
    pstartOffset[0] = pRegexMatcher->start( nIcuErr);
991
0
    pendOffset[0]   = pRegexMatcher->end( nIcuErr);
992
0
    for( int i = 1; i <= nGroupCount; ++i) {
993
0
        pstartOffset[i] = pRegexMatcher->start( i, nIcuErr);
994
0
        pendOffset[i]   = pRegexMatcher->end( i, nIcuErr);
995
0
    }
996
997
0
    return aRet;
998
0
}
999
1000
SearchResult TextSearch::RESrchBkwrd( std::unique_lock<std::mutex>& /*rGuard*/, const OUString& searchStr,
1001
                                      sal_Int32 startPos, sal_Int32 endPos )
1002
0
{
1003
    // NOTE: for backwards search callers provide startPos/endPos inverted!
1004
0
    SearchResult aRet;
1005
0
    aRet.subRegExpressions = 0;
1006
0
    if( !pRegexMatcher)
1007
0
        return aRet;
1008
1009
0
    if( startPos > searchStr.getLength())
1010
0
        startPos = searchStr.getLength();
1011
1012
    // use the ICU RegexMatcher to find the matches
1013
    // TODO: use ICU's backward searching once it becomes available
1014
    //       as its replacement using forward search is not as good as the real thing
1015
0
    UErrorCode nIcuErr = U_ZERO_ERROR;
1016
0
    const icu::UnicodeString aSearchTargetStr(false, reinterpret_cast<const UChar*>(searchStr.getStr()),
1017
0
                                        searchStr.getLength());
1018
0
    pRegexMatcher->reset( aSearchTargetStr);
1019
0
    if (!lcl_findRegex( pRegexMatcher, endPos, startPos, nIcuErr))
1020
0
        return aRet;
1021
1022
    // find the last match
1023
0
    int nLastPos = 0;
1024
0
    int nFoundEnd = 0;
1025
0
    int nGoodPos = 0, nGoodEnd = 0;
1026
0
    bool bFirst = true;
1027
0
    do {
1028
0
        nLastPos = pRegexMatcher->start( nIcuErr);
1029
0
        nFoundEnd = pRegexMatcher->end( nIcuErr);
1030
0
        if (nLastPos < nFoundEnd)
1031
0
        {
1032
            // remember last non-zero-length match
1033
0
            nGoodPos = nLastPos;
1034
0
            nGoodEnd = nFoundEnd;
1035
0
        }
1036
0
        if( nFoundEnd >= startPos)
1037
0
            break;
1038
0
        bFirst = false;
1039
0
        if( nFoundEnd == nLastPos)
1040
0
            ++nFoundEnd;
1041
0
    } while( lcl_findRegex( pRegexMatcher, nFoundEnd, startPos, nIcuErr));
1042
1043
    // Ignore all zero-length matches except "$" anchor on first match.
1044
0
    if (nGoodPos == nGoodEnd)
1045
0
    {
1046
0
        if (bFirst && nLastPos == startPos)
1047
0
            nGoodPos = nLastPos;
1048
0
        else
1049
0
            return aRet;
1050
0
    }
1051
1052
    // find last match again to get its details
1053
0
    lcl_findRegex( pRegexMatcher, nGoodPos, startPos, nIcuErr);
1054
1055
    // fill in the details of the last match
1056
0
    const int nGroupCount = pRegexMatcher->groupCount();
1057
0
    aRet.subRegExpressions = nGroupCount + 1;
1058
0
    aRet.startOffset.realloc( aRet.subRegExpressions);
1059
0
    auto pstartOffset = aRet.startOffset.getArray();
1060
0
    aRet.endOffset.realloc( aRet.subRegExpressions);
1061
0
    auto pendOffset = aRet.endOffset.getArray();
1062
    // NOTE: existing users of backward search seem to expect startOfs/endOfs being inverted!
1063
0
    pstartOffset[0] = pRegexMatcher->end( nIcuErr);
1064
0
    pendOffset[0]   = pRegexMatcher->start( nIcuErr);
1065
0
    for( int i = 1; i <= nGroupCount; ++i) {
1066
0
        pstartOffset[i] = pRegexMatcher->end( i, nIcuErr);
1067
0
        pendOffset[i]   = pRegexMatcher->start( i, nIcuErr);
1068
0
    }
1069
1070
0
    return aRet;
1071
0
}
1072
1073
1074
// search for words phonetically
1075
SearchResult TextSearch::ApproxSrchFrwrd( std::unique_lock<std::mutex>& /*rGuard*/, const OUString& searchStr,
1076
                                          sal_Int32 startPos, sal_Int32 endPos )
1077
0
{
1078
0
    SearchResult aRet;
1079
0
    aRet.subRegExpressions = 0;
1080
1081
0
    if( !xBreak.is() )
1082
0
        return aRet;
1083
1084
0
    sal_Int32 nStt, nEnd;
1085
1086
0
    Boundary aWBnd = xBreak->getWordBoundary( searchStr, startPos,
1087
0
            aSrchPara.Locale,
1088
0
            WordType::ANYWORD_IGNOREWHITESPACES, true );
1089
1090
0
    do
1091
0
    {
1092
0
        if( aWBnd.startPos >= endPos )
1093
0
            break;
1094
0
        nStt = aWBnd.startPos < startPos ? startPos : aWBnd.startPos;
1095
0
        nEnd = std::min(aWBnd.endPos, endPos);
1096
1097
0
        if( nStt < nEnd &&
1098
0
                pWLD->WLD( searchStr.getStr() + nStt, nEnd - nStt ) <= nLimit )
1099
0
        {
1100
0
            aRet.subRegExpressions = 1;
1101
0
            aRet.startOffset = { nStt };
1102
0
            aRet.endOffset = { nEnd };
1103
0
            break;
1104
0
        }
1105
1106
0
        nStt = nEnd - 1;
1107
0
        aWBnd = xBreak->nextWord( searchStr, nStt, aSrchPara.Locale,
1108
0
                WordType::ANYWORD_IGNOREWHITESPACES);
1109
0
    } while( aWBnd.startPos != aWBnd.endPos ||
1110
0
            (aWBnd.endPos != searchStr.getLength() && aWBnd.endPos != nEnd) );
1111
    // #i50244# aWBnd.endPos != nEnd : in case there is _no_ word (only
1112
    // whitespace) in searchStr, getWordBoundary() returned startPos,startPos
1113
    // and nextWord() does also => don't loop forever.
1114
0
    return aRet;
1115
0
}
1116
1117
SearchResult TextSearch::ApproxSrchBkwrd( std::unique_lock<std::mutex>& /*rGuard*/, const OUString& searchStr,
1118
                                          sal_Int32 startPos, sal_Int32 endPos )
1119
0
{
1120
0
    SearchResult aRet;
1121
0
    aRet.subRegExpressions = 0;
1122
1123
0
    if( !xBreak.is() )
1124
0
        return aRet;
1125
1126
0
    sal_Int32 nStt, nEnd;
1127
1128
0
    Boundary aWBnd = xBreak->getWordBoundary( searchStr, startPos,
1129
0
            aSrchPara.Locale,
1130
0
            WordType::ANYWORD_IGNOREWHITESPACES, true );
1131
1132
0
    do
1133
0
    {
1134
0
        if( aWBnd.endPos <= endPos )
1135
0
            break;
1136
0
        nStt = aWBnd.startPos < endPos ? endPos : aWBnd.startPos;
1137
0
        nEnd = std::min(aWBnd.endPos, startPos);
1138
1139
0
        if( nStt < nEnd &&
1140
0
                pWLD->WLD( searchStr.getStr() + nStt, nEnd - nStt ) <= nLimit )
1141
0
        {
1142
0
            aRet.subRegExpressions = 1;
1143
0
            aRet.startOffset = { nEnd };
1144
0
            aRet.endOffset = { nStt };
1145
0
            break;
1146
0
        }
1147
0
        if( !nStt )
1148
0
            break;
1149
1150
0
        aWBnd = xBreak->previousWord( searchStr, nStt, aSrchPara.Locale,
1151
0
                WordType::ANYWORD_IGNOREWHITESPACES);
1152
0
    } while( aWBnd.startPos != aWBnd.endPos || aWBnd.endPos != searchStr.getLength() );
1153
0
    return aRet;
1154
0
}
1155
1156
1157
namespace {
1158
void setWildcardMatch( css::util::SearchResult& rRes, sal_Int32 nStartOffset, sal_Int32 nEndOffset )
1159
170
{
1160
170
    rRes.subRegExpressions = 1;
1161
170
    rRes.startOffset = { nStartOffset };
1162
170
    rRes.endOffset = { nEndOffset };
1163
170
}
1164
}
1165
1166
SearchResult TextSearch::WildcardSrchFrwrd( std::unique_lock<std::mutex>& /*rGuard*/, const OUString& searchStr, sal_Int32 nStartPos, sal_Int32 nEndPos )
1167
1.29k
{
1168
1.29k
    SearchResult aRes;
1169
1.29k
    aRes.subRegExpressions = 0;     // no match
1170
1.29k
    sal_Int32 nStartOffset = nStartPos;
1171
1.29k
    sal_Int32 nEndOffset = nEndPos;
1172
1173
1.29k
    const sal_Int32 nStringLen = searchStr.getLength();
1174
1175
    // Forward nStartPos inclusive, nEndPos exclusive, but allow for empty
1176
    // string match with [0,0).
1177
1.29k
    if (nStartPos < 0 || nEndPos > nStringLen || nEndPos < nStartPos ||
1178
1.29k
            (nStartPos == nStringLen && (nStringLen != 0 || nStartPos != nEndPos)))
1179
0
        return aRes;
1180
1181
1.29k
    const OUString& rPattern = (bUsePrimarySrchStr ? sSrchStr : sSrchStr2);
1182
1.29k
    const sal_Int32 nPatternLen = rPattern.getLength();
1183
1184
    // Handle special cases empty pattern and/or string outside of the loop to
1185
    // not add performance penalties there and simplify.
1186
1.29k
    if (nStartPos == nEndPos)
1187
0
    {
1188
0
        sal_Int32 i = 0;
1189
0
        while (i < nPatternLen && rPattern[i] == '*')
1190
0
            ++i;
1191
0
        if (i == nPatternLen)
1192
0
            setWildcardMatch( aRes, nStartOffset, nEndOffset);
1193
0
        return aRes;
1194
0
    }
1195
1196
    // Empty pattern does not match any non-empty string.
1197
1.29k
    if (!nPatternLen)
1198
0
        return aRes;
1199
1200
1.29k
    bool bRewind = false;
1201
1.29k
    sal_uInt32 cPattern = 0;
1202
1.29k
    sal_Int32 nPattern = 0;
1203
1.29k
    sal_Int32 nAfterFakePattern = nPattern;
1204
1.29k
    if (mbWildcardAllowSubstring)
1205
0
    {
1206
        // Fake a leading '*' wildcard.
1207
0
        cPattern = '*';
1208
0
        bRewind = true;
1209
        // Assume a non-'*' pattern character follows. If it is a '*' instead
1210
        // that will be handled in the loop by setting nPat.
1211
0
        sal_uInt32 cu = rPattern.iterateCodePoints( &nAfterFakePattern);
1212
0
        if (cu == mcWildcardEscapeChar && mcWildcardEscapeChar && nAfterFakePattern < nPatternLen)
1213
0
            rPattern.iterateCodePoints( &nAfterFakePattern);
1214
0
    }
1215
1216
1.29k
    sal_Int32 nString = nStartPos, nPat = -1, nStr = -1, nLastAsterisk = -1;
1217
1.29k
    sal_uInt32 cPatternAfterAsterisk = 0;
1218
1.29k
    bool bEscaped = false, bEscapedAfterAsterisk = false;
1219
1220
    // The loop code tries to avoid costly calls to iterateCodePoints() when
1221
    // possible.
1222
1223
1.29k
    do
1224
3.32k
    {
1225
3.32k
        if (bRewind)
1226
0
        {
1227
            // Reuse cPattern after '*', nPattern was correspondingly
1228
            // incremented to point behind cPattern.
1229
0
            bRewind = false;
1230
0
        }
1231
3.32k
        else if (nPattern < nPatternLen)
1232
3.32k
        {
1233
            // nPattern will be incremented by iterateCodePoints().
1234
3.32k
            cPattern = rPattern.iterateCodePoints( &nPattern);
1235
3.32k
            if (cPattern == mcWildcardEscapeChar && mcWildcardEscapeChar && nPattern < nPatternLen)
1236
0
            {
1237
0
                bEscaped = true;
1238
0
                cPattern = rPattern.iterateCodePoints( &nPattern);
1239
0
            }
1240
3.32k
        }
1241
0
        else
1242
0
        {
1243
            // A trailing '*' is handled below.
1244
0
            if (mbWildcardAllowSubstring)
1245
0
            {
1246
                // If the pattern is consumed and substring match allowed we're good.
1247
0
                setWildcardMatch( aRes, nStartOffset, nString);
1248
0
                return aRes;
1249
0
            }
1250
0
            else if (nString < nEndPos && nLastAsterisk >= 0)
1251
0
            {
1252
                // If substring match is not allowed try a greedy '*' match.
1253
0
                nPattern = nLastAsterisk;
1254
0
                continue;   // do
1255
0
            }
1256
0
            else
1257
0
                return aRes;
1258
0
        }
1259
1260
3.32k
        if (cPattern == '*' && !bEscaped)
1261
170
        {
1262
            // '*' is one code unit, so not using iterateCodePoints() is ok.
1263
170
            while (nPattern < nPatternLen && rPattern[nPattern] == '*')
1264
0
                ++nPattern;
1265
1266
170
            if (nPattern >= nPatternLen)
1267
170
            {
1268
                // Last pattern is '*', remaining string matches.
1269
170
                setWildcardMatch( aRes, nStartOffset, nEndOffset);
1270
170
                return aRes;
1271
170
            }
1272
1273
0
            nLastAsterisk = nPattern;   // Remember last encountered '*'.
1274
1275
            // cPattern will be the next non-'*' character, nPattern
1276
            // incremented.
1277
0
            cPattern = rPattern.iterateCodePoints( &nPattern);
1278
0
            if (cPattern == mcWildcardEscapeChar && mcWildcardEscapeChar && nPattern < nPatternLen)
1279
0
            {
1280
0
                bEscaped = true;
1281
0
                cPattern = rPattern.iterateCodePoints( &nPattern);
1282
0
            }
1283
1284
0
            cPatternAfterAsterisk = cPattern;
1285
0
            bEscapedAfterAsterisk = bEscaped;
1286
0
            nPat = nPattern;    // Remember position of pattern behind '*', already incremented.
1287
0
            nStr = nString;     // Remember the current string to be matched.
1288
0
        }
1289
1290
3.15k
        if (nString >= nEndPos)
1291
            // Whatever follows in pattern, string will not match.
1292
0
            return aRes;
1293
1294
        // nString will be incremented by iterateCodePoints().
1295
3.15k
        sal_uInt32 cString = searchStr.iterateCodePoints( &nString);
1296
1297
3.15k
        if ((cPattern != '?' || bEscaped) && cPattern != cString)
1298
1.12k
        {
1299
1.12k
            if (nPat == -1)
1300
                // Non-match already without any '*' pattern.
1301
1.12k
                return aRes;
1302
1303
0
            bRewind = true;
1304
0
            nPattern = nPat;                    // Rewind pattern to character behind '*', already incremented.
1305
0
            cPattern = cPatternAfterAsterisk;
1306
0
            bEscaped = bEscapedAfterAsterisk;
1307
0
            searchStr.iterateCodePoints( &nStr);
1308
0
            nString = nStr;                     // Restore incremented remembered string position.
1309
0
            if (nPat == nAfterFakePattern)
1310
0
            {
1311
                // Next start offset will be the next character.
1312
0
                nStartOffset = nString;
1313
0
            }
1314
0
        }
1315
2.02k
        else
1316
2.02k
        {
1317
            // An unescaped '?' pattern matched any character, or characters
1318
            // matched. Reset only escaped state.
1319
2.02k
            bEscaped = false;
1320
2.02k
        }
1321
3.15k
    }
1322
2.02k
    while (nString < nEndPos);
1323
1324
0
    if (bRewind)
1325
0
        return aRes;
1326
1327
    // Eat trailing '*' pattern that matches anything, including nothing.
1328
    // '*' is one code unit, so not using iterateCodePoints() is ok.
1329
0
    while (nPattern < nPatternLen && rPattern[nPattern] == '*')
1330
0
        ++nPattern;
1331
1332
0
    if (nPattern == nPatternLen)
1333
0
        setWildcardMatch( aRes, nStartOffset, nEndOffset);
1334
0
    return aRes;
1335
0
}
1336
1337
SearchResult TextSearch::WildcardSrchBkwrd( std::unique_lock<std::mutex>& /*rGuard*/, const OUString& searchStr, sal_Int32 nStartPos, sal_Int32 nEndPos )
1338
0
{
1339
0
    SearchResult aRes;
1340
0
    aRes.subRegExpressions = 0;     // no match
1341
1342
0
    sal_Int32 nStartOffset = nStartPos;
1343
0
    sal_Int32 nEndOffset = nEndPos;
1344
1345
0
    const sal_Int32 nStringLen = searchStr.getLength();
1346
1347
    // Backward nStartPos exclusive, nEndPos inclusive, but allow for empty
1348
    // string match with (0,0].
1349
0
    if (nStartPos > nStringLen || nEndPos < 0 || nStartPos < nEndPos ||
1350
0
            (nEndPos == nStringLen && (nStringLen != 0 || nStartPos != nEndPos)))
1351
0
        return aRes;
1352
1353
0
    const OUString& rPattern = (bUsePrimarySrchStr ? sSrchStr : sSrchStr2);
1354
0
    sal_Int32 nPatternLen = rPattern.getLength();
1355
1356
    // Handle special cases empty pattern and/or string outside of the loop to
1357
    // not add performance penalties there and simplify.
1358
0
    if (nStartPos == nEndPos)
1359
0
    {
1360
0
        sal_Int32 i = 0;
1361
0
        while (i < nPatternLen && rPattern[i] == '*')
1362
0
            ++i;
1363
0
        if (i == nPatternLen)
1364
0
            setWildcardMatch( aRes, nStartOffset, nEndOffset);
1365
0
        return aRes;
1366
0
    }
1367
1368
    // Empty pattern does not match any non-empty string.
1369
0
    if (!nPatternLen)
1370
0
        return aRes;
1371
1372
    // Reverse escaped patterns to ease the handling of escapes, keeping escape
1373
    // and following character as one sequence in backward direction.
1374
0
    if ((bUsePrimarySrchStr && maWildcardReversePattern.isEmpty()) ||
1375
0
            (!bUsePrimarySrchStr && maWildcardReversePattern2.isEmpty()))
1376
0
    {
1377
0
        OUStringBuffer aPatternBuf( rPattern);
1378
0
        sal_Int32 nIndex = 0;
1379
0
        while (nIndex < nPatternLen)
1380
0
        {
1381
0
            const sal_Int32 nOld = nIndex;
1382
0
            const sal_uInt32 cu = rPattern.iterateCodePoints( &nIndex);
1383
0
            if (cu == mcWildcardEscapeChar)
1384
0
            {
1385
0
                if (nIndex < nPatternLen)
1386
0
                {
1387
0
                    if (nIndex - nOld == 1)
1388
0
                    {
1389
                        // Simply move code units, we already memorized the one
1390
                        // in 'cu'.
1391
0
                        const sal_Int32 nOld2 = nIndex;
1392
0
                        rPattern.iterateCodePoints( &nIndex);
1393
0
                        for (sal_Int32 i=0; i < nIndex - nOld2; ++i)
1394
0
                            aPatternBuf[nOld+i] = rPattern[nOld2+i];
1395
0
                        aPatternBuf[nIndex-1] = static_cast<sal_Unicode>(cu);
1396
0
                    }
1397
0
                    else
1398
0
                    {
1399
                        // Copy the escape character code units first in the
1400
                        // unlikely case that it would not be of BMP.
1401
0
                        assert(nIndex - nOld == 2);  // it's UTF-16, so...
1402
0
                        sal_Unicode buf[2];
1403
0
                        buf[0] = rPattern[nOld];
1404
0
                        buf[1] = rPattern[nOld+1];
1405
0
                        const sal_Int32 nOld2 = nIndex;
1406
0
                        rPattern.iterateCodePoints( &nIndex);
1407
0
                        for (sal_Int32 i=0; i < nIndex - nOld2; ++i)
1408
0
                            aPatternBuf[nOld+i] = rPattern[nOld2+i];
1409
0
                        aPatternBuf[nIndex-2] = buf[0];
1410
0
                        aPatternBuf[nIndex-1] = buf[1];
1411
0
                    }
1412
0
                }
1413
0
                else
1414
0
                {
1415
                    // Trailing escape would become leading escape, do what?
1416
                    // Eliminate.
1417
0
                    aPatternBuf.remove( nOld, nIndex - nOld);
1418
0
                }
1419
0
            }
1420
0
        }
1421
0
        if (bUsePrimarySrchStr)
1422
0
            maWildcardReversePattern = aPatternBuf.makeStringAndClear();
1423
0
        else
1424
0
            maWildcardReversePattern2 = aPatternBuf.makeStringAndClear();
1425
0
    }
1426
0
    const OUString& rReversePattern = (bUsePrimarySrchStr ? maWildcardReversePattern : maWildcardReversePattern2);
1427
0
    nPatternLen = rReversePattern.getLength();
1428
1429
0
    bool bRewind = false;
1430
0
    sal_uInt32 cPattern = 0;
1431
0
    sal_Int32 nPattern = nPatternLen;
1432
0
    sal_Int32 nAfterFakePattern = nPattern;
1433
0
    if (mbWildcardAllowSubstring)
1434
0
    {
1435
        // Fake a trailing '*' wildcard.
1436
0
        cPattern = '*';
1437
0
        bRewind = true;
1438
        // Assume a non-'*' pattern character follows. If it is a '*' instead
1439
        // that will be handled in the loop by setting nPat.
1440
0
        sal_uInt32 cu = rReversePattern.iterateCodePoints( &nAfterFakePattern, -1);
1441
0
        if (cu == mcWildcardEscapeChar && mcWildcardEscapeChar && nAfterFakePattern > 0)
1442
0
            rReversePattern.iterateCodePoints( &nAfterFakePattern, -1);
1443
0
    }
1444
1445
0
    sal_Int32 nString = nStartPos, nPat = -1, nStr = -1, nLastAsterisk = -1;
1446
0
    sal_uInt32 cPatternAfterAsterisk = 0;
1447
0
    bool bEscaped = false, bEscapedAfterAsterisk = false;
1448
1449
    // The loop code tries to avoid costly calls to iterateCodePoints() when
1450
    // possible.
1451
1452
0
    do
1453
0
    {
1454
0
        if (bRewind)
1455
0
        {
1456
            // Reuse cPattern after '*', nPattern was correspondingly
1457
            // decremented to point before cPattern.
1458
0
            bRewind = false;
1459
0
        }
1460
0
        else if (nPattern > 0)
1461
0
        {
1462
            // nPattern will be decremented by iterateCodePoints().
1463
0
            cPattern = rReversePattern.iterateCodePoints( &nPattern, -1);
1464
0
            if (cPattern == mcWildcardEscapeChar && mcWildcardEscapeChar && nPattern > 0)
1465
0
            {
1466
0
                bEscaped = true;
1467
0
                cPattern = rReversePattern.iterateCodePoints( &nPattern, -1);
1468
0
            }
1469
0
        }
1470
0
        else
1471
0
        {
1472
            // A trailing '*' is handled below.
1473
0
            if (mbWildcardAllowSubstring)
1474
0
            {
1475
                // If the pattern is consumed and substring match allowed we're good.
1476
0
                setWildcardMatch( aRes, nStartOffset, nString);
1477
0
                return aRes;
1478
0
            }
1479
0
            else if (nString > nEndPos && nLastAsterisk >= 0)
1480
0
            {
1481
                // If substring match is not allowed try a greedy '*' match.
1482
0
                nPattern = nLastAsterisk;
1483
0
                continue;   // do
1484
0
            }
1485
0
            else
1486
0
                return aRes;
1487
0
        }
1488
1489
0
        if (cPattern == '*' && !bEscaped)
1490
0
        {
1491
            // '*' is one code unit, so not using iterateCodePoints() is ok.
1492
0
            while (nPattern > 0 && rReversePattern[nPattern-1] == '*')
1493
0
                --nPattern;
1494
1495
0
            if (nPattern <= 0)
1496
0
            {
1497
                // First pattern is '*', remaining string matches.
1498
0
                setWildcardMatch( aRes, nStartOffset, nEndOffset);
1499
0
                return aRes;
1500
0
            }
1501
1502
0
            nLastAsterisk = nPattern;   // Remember last encountered '*'.
1503
1504
            // cPattern will be the previous non-'*' character, nPattern
1505
            // decremented.
1506
0
            cPattern = rReversePattern.iterateCodePoints( &nPattern, -1);
1507
0
            if (cPattern == mcWildcardEscapeChar && mcWildcardEscapeChar && nPattern > 0)
1508
0
            {
1509
0
                bEscaped = true;
1510
0
                cPattern = rReversePattern.iterateCodePoints( &nPattern, -1);
1511
0
            }
1512
1513
0
            cPatternAfterAsterisk = cPattern;
1514
0
            bEscapedAfterAsterisk = bEscaped;
1515
0
            nPat = nPattern;    // Remember position of pattern before '*', already decremented.
1516
0
            nStr = nString;     // Remember the current string to be matched.
1517
0
        }
1518
1519
0
        if (nString <= nEndPos)
1520
            // Whatever leads in pattern, string will not match.
1521
0
            return aRes;
1522
1523
        // nString will be decremented by iterateCodePoints().
1524
0
        sal_uInt32 cString = searchStr.iterateCodePoints( &nString, -1);
1525
1526
0
        if ((cPattern != '?' || bEscaped) && cPattern != cString)
1527
0
        {
1528
0
            if (nPat == -1)
1529
                // Non-match already without any '*' pattern.
1530
0
                return aRes;
1531
1532
0
            bRewind = true;
1533
0
            nPattern = nPat;                    // Rewind pattern to character before '*', already decremented.
1534
0
            cPattern = cPatternAfterAsterisk;
1535
0
            bEscaped = bEscapedAfterAsterisk;
1536
0
            searchStr.iterateCodePoints( &nStr, -1);
1537
0
            nString = nStr;                     // Restore decremented remembered string position.
1538
0
            if (nPat == nAfterFakePattern)
1539
0
            {
1540
                // Next start offset will be this character (exclusive).
1541
0
                nStartOffset = nString;
1542
0
            }
1543
0
        }
1544
0
        else
1545
0
        {
1546
            // An unescaped '?' pattern matched any character, or characters
1547
            // matched. Reset only escaped state.
1548
0
            bEscaped = false;
1549
0
        }
1550
0
    }
1551
0
    while (nString > nEndPos);
1552
1553
0
    if (bRewind)
1554
0
        return aRes;
1555
1556
    // Eat leading '*' pattern that matches anything, including nothing.
1557
    // '*' is one code unit, so not using iterateCodePoints() is ok.
1558
0
    while (nPattern > 0 && rReversePattern[nPattern-1] == '*')
1559
0
        --nPattern;
1560
1561
0
    if (nPattern == 0)
1562
0
        setWildcardMatch( aRes, nStartOffset, nEndOffset);
1563
0
    return aRes;
1564
0
}
1565
1566
1567
OUString SAL_CALL
1568
TextSearch::getImplementationName()
1569
0
{
1570
0
    return u"com.sun.star.util.TextSearch_i18n"_ustr;
1571
0
}
1572
1573
sal_Bool SAL_CALL TextSearch::supportsService(const OUString& rServiceName)
1574
0
{
1575
0
    return cppu::supportsService(this, rServiceName);
1576
0
}
1577
1578
Sequence< OUString > SAL_CALL
1579
TextSearch::getSupportedServiceNames()
1580
0
{
1581
0
    return { u"com.sun.star.util.TextSearch"_ustr, u"com.sun.star.util.TextSearch2"_ustr };
1582
0
}
1583
1584
extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
1585
i18npool_TextSearch_get_implementation(
1586
    css::uno::XComponentContext* context , css::uno::Sequence<css::uno::Any> const&)
1587
3
{
1588
3
    return cppu::acquire(new TextSearch(context));
1589
3
}
1590
1591
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */