Coverage Report

Created: 2025-07-07 10:01

/src/libreoffice/include/unotools/textsearch.hxx
Line
Count
Source (jump to first uncovered line)
1
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
/*
3
 * This file is part of the LibreOffice project.
4
 *
5
 * This Source Code Form is subject to the terms of the Mozilla Public
6
 * License, v. 2.0. If a copy of the MPL was not distributed with this
7
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
 *
9
 * This file incorporates work covered by the following license notice:
10
 *
11
 *   Licensed to the Apache Software Foundation (ASF) under one or more
12
 *   contributor license agreements. See the NOTICE file distributed
13
 *   with this work for additional information regarding copyright
14
 *   ownership. The ASF licenses this file to you under the Apache
15
 *   License, Version 2.0 (the "License"); you may not use this file
16
 *   except in compliance with the License. You may obtain a copy of
17
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18
 */
19
20
#ifndef INCLUDED_UNOTOOLS_TEXTSEARCH_HXX
21
#define INCLUDED_UNOTOOLS_TEXTSEARCH_HXX
22
23
#include <unotools/unotoolsdllapi.h>
24
#include <i18nlangtag/lang.h>
25
#include <rtl/ustring.hxx>
26
#include <com/sun/star/uno/Reference.h>
27
28
#include <ostream>
29
30
0
#define WLD_THRESHOLD 3
31
0
#define SMALL_STRING_THRESHOLD 4
32
33
class CharClass;
34
35
namespace com::sun::star::lang { struct Locale; }
36
namespace com::sun::star::util { class XTextSearch2; }
37
namespace com::sun::star::util { struct SearchResult; }
38
namespace i18nutil {
39
    struct SearchOptions2;
40
}
41
42
namespace utl
43
{
44
45
// Utility class for searching
46
class UNOTOOLS_DLLPUBLIC SearchParam
47
{
48
public:
49
    enum class SearchType { Normal, Regexp, Wildcard, Unknown = -1 };
50
51
    /** Convert configuration and document boolean settings to SearchType.
52
        If bWildcard is true it takes precedence over rbRegExp.
53
        @param  rbRegExp
54
                If true and bWildcard is also true, rbRegExp is set to false to
55
                adapt the caller's settings.
56
     */
57
    static SearchType ConvertToSearchType( bool bWildcard, bool & rbRegExp )
58
0
    {
59
0
        if (bWildcard)
60
0
        {
61
0
            if (rbRegExp)
62
0
                rbRegExp = false;
63
0
            return SearchType::Wildcard;
64
0
        }
65
0
        return rbRegExp ? SearchType::Regexp : SearchType::Normal;
66
0
    }
67
68
    /** Convert SearchType to configuration and document boolean settings.
69
     */
70
    static void ConvertToBool( const SearchType eSearchType, bool& rbWildcard, bool& rbRegExp )
71
20.1k
    {
72
20.1k
        switch (eSearchType)
73
20.1k
        {
74
0
            case SearchType::Wildcard:
75
0
                rbWildcard = true;
76
0
                rbRegExp = false;
77
0
                break;
78
19.5k
            case SearchType::Regexp:
79
19.5k
                rbWildcard = false;
80
19.5k
                rbRegExp = true;
81
19.5k
                break;
82
614
            default:
83
614
                rbWildcard = false;
84
614
                rbRegExp = false;
85
614
                break;
86
20.1k
        }
87
20.1k
    }
88
89
private:
90
    OUString sSrchStr;            // the search string
91
92
    SearchType m_eSrchType;       // search normal/regular/LevDist
93
94
    sal_uInt32 m_cWildEscChar;      // wildcard escape character
95
96
    bool m_bCaseSense  : 1;
97
    bool m_bWildMatchSel : 1;       // wildcard pattern must match entire selection
98
99
public:
100
    SearchParam( const OUString &rText,
101
                    SearchType eSrchType,
102
                    bool bCaseSensitive = true,
103
                    sal_uInt32 cWildEscChar = '\\',
104
                    bool bWildMatchSel = false );
105
106
    SearchParam( const SearchParam& );
107
108
    ~SearchParam();
109
110
63
    const OUString& GetSrchStr() const          { return sSrchStr; }
111
63
    SearchType      GetSrchType() const         { return m_eSrchType; }
112
113
63
    bool            IsCaseSensitive() const     { return m_bCaseSense; }
114
15
    bool            IsWildMatchSel() const      { return m_bWildMatchSel; }
115
116
    // signed return for API use
117
15
    sal_Int32       GetWildEscChar() const      { return static_cast<sal_Int32>(m_cWildEscChar); }
118
};
119
120
// For use in SAL_DEBUG etc. Output format not guaranteed to be stable.
121
template<typename charT, typename traits>
122
inline std::basic_ostream<charT, traits> & operator <<(std::basic_ostream<charT, traits> & stream, const SearchParam::SearchType& eType)
123
{
124
    switch (eType)
125
    {
126
    case SearchParam::SearchType::Normal:
127
        stream << "N";
128
        break;
129
    case SearchParam::SearchType::Regexp:
130
        stream << "RE";
131
        break;
132
    case SearchParam::SearchType::Wildcard:
133
        stream << "WC";
134
        break;
135
    case SearchParam::SearchType::Unknown:
136
        stream << "UNK";
137
        break;
138
    default:
139
        stream << static_cast<int>(eType) << '?';
140
        break;
141
    }
142
143
    return stream;
144
}
145
146
//  Utility class for searching a substring in a string.
147
//  The following metrics are supported
148
//      - ordinary text (Bayer/Moore)
149
//      - regular expressions
150
//      - weighted Levenshtein distance
151
//      - wildcards '*' and '?'
152
153
//  This class allows forward and backward searching!
154
155
class UNOTOOLS_DLLPUBLIC TextSearch
156
{
157
    static css::uno::Reference< css::util::XTextSearch2 >
158
        getXTextSearch( const i18nutil::SearchOptions2& rPara );
159
160
    css::uno::Reference < css::util::XTextSearch2 >
161
            xTextSearch;
162
163
    void Init( const SearchParam & rParam,
164
               const css::lang::Locale& rLocale );
165
166
public:
167
    // rText is the string being searched for
168
    // this first two CTORs are deprecated!
169
    TextSearch( const SearchParam & rPara, LanguageType nLanguage );
170
    TextSearch( const SearchParam & rPara, const CharClass& rCClass );
171
172
    TextSearch( const i18nutil::SearchOptions2& rPara );
173
    ~TextSearch();
174
175
    /* search in the (selected) text the search string:
176
        rScrTxt - the text, in which we search
177
        pStart  - start position for the search
178
        pEnd    - end position for the search
179
180
        RETURN values   ==  true: something is found
181
                        - pStart start pos of the found text,
182
                        - pEnd end pos of the found text,
183
                        - pSrchResult - the search result with all found
184
                             positions. Is only filled with more positions
185
                             if the regular expression handles groups.
186
187
                        == false: nothing found, pStart, pEnd unchanged.
188
189
        Definitions: start pos always inclusive, end pos always exclusive!
190
                     The position must always in the right direction!
191
                    search forward: start <= end
192
                    search backward: end <= start
193
    */
194
    bool SearchForward( const OUString &rStr,
195
                        sal_Int32* pStart, sal_Int32* pEnd,
196
                        css::util::SearchResult* pRes = nullptr );
197
    /**
198
     * @brief searchForward Search forward beginning from the start to the end
199
     *        of the given text
200
     * @param rStr The text in which we search
201
     * @return True if the search term is found in the text
202
     */
203
    bool searchForward( const OUString &rStr );
204
    bool SearchBackward( const OUString &rStr,
205
                        sal_Int32* pStart, sal_Int32* pEnd,
206
                        css::util::SearchResult* pRes = nullptr );
207
208
    void SetLocale( const i18nutil::SearchOptions2& rOpt,
209
                    const css::lang::Locale& rLocale );
210
211
    /* replace back references in the replace string by the sub expressions from the search result */
212
    static void ReplaceBackReferences( OUString& rReplaceStr, std::u16string_view rStr, const css::util::SearchResult& rResult );
213
214
    /**
215
     * @brief Search for a string in a another one based on similarity
216
     * @param rString The string we compare with
217
     * @param rSearchString The search term
218
     * @param rSimilarityScore The similarity score (sent by reference to be filled)
219
     * @return True if the search term is found, false otherwise
220
     */
221
    static bool SimilaritySearch(const OUString& rString, const OUString& rSearchString,
222
                                 ::std::pair<sal_Int32, sal_Int32>& rSimilarityScore);
223
    /**
224
     * @brief Get similarity score between two strings
225
     *        according to the length of the common substring and its position
226
     * @param rString The string we compare with
227
     * @param rSearchString The search term
228
     * @param nInitialScore The initial score
229
     * @param bFromStart True if the search is from the start
230
     * @return Score if the search term is found in the text, -1 otherwise
231
     */
232
    static sal_Int32 GetSubstringSimilarity(std::u16string_view rString,
233
                                            std::u16string_view rSearchString,
234
                                            sal_Int32& nInitialScore, const bool bFromStart);
235
    static sal_Int32 GetWeightedLevenshteinDistance(const OUString& rString,
236
                                                    const OUString& rSearchString);
237
};
238
239
}   // namespace utl
240
241
#endif
242
243
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */