/src/libreoffice/include/unotools/textsearch.hxx
Line | Count | Source (jump to first uncovered line) |
1 | | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
2 | | /* |
3 | | * This file is part of the LibreOffice project. |
4 | | * |
5 | | * This Source Code Form is subject to the terms of the Mozilla Public |
6 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
7 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8 | | * |
9 | | * This file incorporates work covered by the following license notice: |
10 | | * |
11 | | * Licensed to the Apache Software Foundation (ASF) under one or more |
12 | | * contributor license agreements. See the NOTICE file distributed |
13 | | * with this work for additional information regarding copyright |
14 | | * ownership. The ASF licenses this file to you under the Apache |
15 | | * License, Version 2.0 (the "License"); you may not use this file |
16 | | * except in compliance with the License. You may obtain a copy of |
17 | | * the License at http://www.apache.org/licenses/LICENSE-2.0 . |
18 | | */ |
19 | | |
20 | | #ifndef INCLUDED_UNOTOOLS_TEXTSEARCH_HXX |
21 | | #define INCLUDED_UNOTOOLS_TEXTSEARCH_HXX |
22 | | |
23 | | #include <unotools/unotoolsdllapi.h> |
24 | | #include <i18nlangtag/lang.h> |
25 | | #include <rtl/ustring.hxx> |
26 | | #include <com/sun/star/uno/Reference.h> |
27 | | |
28 | | #include <ostream> |
29 | | |
30 | 0 | #define WLD_THRESHOLD 3 |
31 | 0 | #define SMALL_STRING_THRESHOLD 4 |
32 | | |
33 | | class CharClass; |
34 | | |
35 | | namespace com::sun::star::lang { struct Locale; } |
36 | | namespace com::sun::star::util { class XTextSearch2; } |
37 | | namespace com::sun::star::util { struct SearchResult; } |
38 | | namespace i18nutil { |
39 | | struct SearchOptions2; |
40 | | } |
41 | | |
42 | | namespace utl |
43 | | { |
44 | | |
45 | | // Utility class for searching |
46 | | class UNOTOOLS_DLLPUBLIC SearchParam |
47 | | { |
48 | | public: |
49 | | enum class SearchType { Normal, Regexp, Wildcard, Unknown = -1 }; |
50 | | |
51 | | /** Convert configuration and document boolean settings to SearchType. |
52 | | If bWildcard is true it takes precedence over rbRegExp. |
53 | | @param rbRegExp |
54 | | If true and bWildcard is also true, rbRegExp is set to false to |
55 | | adapt the caller's settings. |
56 | | */ |
57 | | static SearchType ConvertToSearchType( bool bWildcard, bool & rbRegExp ) |
58 | 0 | { |
59 | 0 | if (bWildcard) |
60 | 0 | { |
61 | 0 | if (rbRegExp) |
62 | 0 | rbRegExp = false; |
63 | 0 | return SearchType::Wildcard; |
64 | 0 | } |
65 | 0 | return rbRegExp ? SearchType::Regexp : SearchType::Normal; |
66 | 0 | } |
67 | | |
68 | | /** Convert SearchType to configuration and document boolean settings. |
69 | | */ |
70 | | static void ConvertToBool( const SearchType eSearchType, bool& rbWildcard, bool& rbRegExp ) |
71 | 20.1k | { |
72 | 20.1k | switch (eSearchType) |
73 | 20.1k | { |
74 | 0 | case SearchType::Wildcard: |
75 | 0 | rbWildcard = true; |
76 | 0 | rbRegExp = false; |
77 | 0 | break; |
78 | 19.5k | case SearchType::Regexp: |
79 | 19.5k | rbWildcard = false; |
80 | 19.5k | rbRegExp = true; |
81 | 19.5k | break; |
82 | 614 | default: |
83 | 614 | rbWildcard = false; |
84 | 614 | rbRegExp = false; |
85 | 614 | break; |
86 | 20.1k | } |
87 | 20.1k | } |
88 | | |
89 | | private: |
90 | | OUString sSrchStr; // the search string |
91 | | |
92 | | SearchType m_eSrchType; // search normal/regular/LevDist |
93 | | |
94 | | sal_uInt32 m_cWildEscChar; // wildcard escape character |
95 | | |
96 | | bool m_bCaseSense : 1; |
97 | | bool m_bWildMatchSel : 1; // wildcard pattern must match entire selection |
98 | | |
99 | | public: |
100 | | SearchParam( const OUString &rText, |
101 | | SearchType eSrchType, |
102 | | bool bCaseSensitive = true, |
103 | | sal_uInt32 cWildEscChar = '\\', |
104 | | bool bWildMatchSel = false ); |
105 | | |
106 | | SearchParam( const SearchParam& ); |
107 | | |
108 | | ~SearchParam(); |
109 | | |
110 | 63 | const OUString& GetSrchStr() const { return sSrchStr; } |
111 | 63 | SearchType GetSrchType() const { return m_eSrchType; } |
112 | | |
113 | 63 | bool IsCaseSensitive() const { return m_bCaseSense; } |
114 | 15 | bool IsWildMatchSel() const { return m_bWildMatchSel; } |
115 | | |
116 | | // signed return for API use |
117 | 15 | sal_Int32 GetWildEscChar() const { return static_cast<sal_Int32>(m_cWildEscChar); } |
118 | | }; |
119 | | |
120 | | // For use in SAL_DEBUG etc. Output format not guaranteed to be stable. |
121 | | template<typename charT, typename traits> |
122 | | inline std::basic_ostream<charT, traits> & operator <<(std::basic_ostream<charT, traits> & stream, const SearchParam::SearchType& eType) |
123 | | { |
124 | | switch (eType) |
125 | | { |
126 | | case SearchParam::SearchType::Normal: |
127 | | stream << "N"; |
128 | | break; |
129 | | case SearchParam::SearchType::Regexp: |
130 | | stream << "RE"; |
131 | | break; |
132 | | case SearchParam::SearchType::Wildcard: |
133 | | stream << "WC"; |
134 | | break; |
135 | | case SearchParam::SearchType::Unknown: |
136 | | stream << "UNK"; |
137 | | break; |
138 | | default: |
139 | | stream << static_cast<int>(eType) << '?'; |
140 | | break; |
141 | | } |
142 | | |
143 | | return stream; |
144 | | } |
145 | | |
146 | | // Utility class for searching a substring in a string. |
147 | | // The following metrics are supported |
148 | | // - ordinary text (Bayer/Moore) |
149 | | // - regular expressions |
150 | | // - weighted Levenshtein distance |
151 | | // - wildcards '*' and '?' |
152 | | |
153 | | // This class allows forward and backward searching! |
154 | | |
155 | | class UNOTOOLS_DLLPUBLIC TextSearch |
156 | | { |
157 | | static css::uno::Reference< css::util::XTextSearch2 > |
158 | | getXTextSearch( const i18nutil::SearchOptions2& rPara ); |
159 | | |
160 | | css::uno::Reference < css::util::XTextSearch2 > |
161 | | xTextSearch; |
162 | | |
163 | | void Init( const SearchParam & rParam, |
164 | | const css::lang::Locale& rLocale ); |
165 | | |
166 | | public: |
167 | | // rText is the string being searched for |
168 | | // this first two CTORs are deprecated! |
169 | | TextSearch( const SearchParam & rPara, LanguageType nLanguage ); |
170 | | TextSearch( const SearchParam & rPara, const CharClass& rCClass ); |
171 | | |
172 | | TextSearch( const i18nutil::SearchOptions2& rPara ); |
173 | | ~TextSearch(); |
174 | | |
175 | | /* search in the (selected) text the search string: |
176 | | rScrTxt - the text, in which we search |
177 | | pStart - start position for the search |
178 | | pEnd - end position for the search |
179 | | |
180 | | RETURN values == true: something is found |
181 | | - pStart start pos of the found text, |
182 | | - pEnd end pos of the found text, |
183 | | - pSrchResult - the search result with all found |
184 | | positions. Is only filled with more positions |
185 | | if the regular expression handles groups. |
186 | | |
187 | | == false: nothing found, pStart, pEnd unchanged. |
188 | | |
189 | | Definitions: start pos always inclusive, end pos always exclusive! |
190 | | The position must always in the right direction! |
191 | | search forward: start <= end |
192 | | search backward: end <= start |
193 | | */ |
194 | | bool SearchForward( const OUString &rStr, |
195 | | sal_Int32* pStart, sal_Int32* pEnd, |
196 | | css::util::SearchResult* pRes = nullptr ); |
197 | | /** |
198 | | * @brief searchForward Search forward beginning from the start to the end |
199 | | * of the given text |
200 | | * @param rStr The text in which we search |
201 | | * @return True if the search term is found in the text |
202 | | */ |
203 | | bool searchForward( const OUString &rStr ); |
204 | | bool SearchBackward( const OUString &rStr, |
205 | | sal_Int32* pStart, sal_Int32* pEnd, |
206 | | css::util::SearchResult* pRes = nullptr ); |
207 | | |
208 | | void SetLocale( const i18nutil::SearchOptions2& rOpt, |
209 | | const css::lang::Locale& rLocale ); |
210 | | |
211 | | /* replace back references in the replace string by the sub expressions from the search result */ |
212 | | static void ReplaceBackReferences( OUString& rReplaceStr, std::u16string_view rStr, const css::util::SearchResult& rResult ); |
213 | | |
214 | | /** |
215 | | * @brief Search for a string in a another one based on similarity |
216 | | * @param rString The string we compare with |
217 | | * @param rSearchString The search term |
218 | | * @param rSimilarityScore The similarity score (sent by reference to be filled) |
219 | | * @return True if the search term is found, false otherwise |
220 | | */ |
221 | | static bool SimilaritySearch(const OUString& rString, const OUString& rSearchString, |
222 | | ::std::pair<sal_Int32, sal_Int32>& rSimilarityScore); |
223 | | /** |
224 | | * @brief Get similarity score between two strings |
225 | | * according to the length of the common substring and its position |
226 | | * @param rString The string we compare with |
227 | | * @param rSearchString The search term |
228 | | * @param nInitialScore The initial score |
229 | | * @param bFromStart True if the search is from the start |
230 | | * @return Score if the search term is found in the text, -1 otherwise |
231 | | */ |
232 | | static sal_Int32 GetSubstringSimilarity(std::u16string_view rString, |
233 | | std::u16string_view rSearchString, |
234 | | sal_Int32& nInitialScore, const bool bFromStart); |
235 | | static sal_Int32 GetWeightedLevenshteinDistance(const OUString& rString, |
236 | | const OUString& rSearchString); |
237 | | }; |
238 | | |
239 | | } // namespace utl |
240 | | |
241 | | #endif |
242 | | |
243 | | /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |