/src/libreoffice/comphelper/source/misc/string.cxx
Line | Count | Source |
1 | | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
2 | | /* |
3 | | * This file is part of the LibreOffice project. |
4 | | * |
5 | | * This Source Code Form is subject to the terms of the Mozilla Public |
6 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
7 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8 | | * |
9 | | * This file incorporates work covered by the following license notice: |
10 | | * |
11 | | * Licensed to the Apache Software Foundation (ASF) under one or more |
12 | | * contributor license agreements. See the NOTICE file distributed |
13 | | * with this work for additional information regarding copyright |
14 | | * ownership. The ASF licenses this file to you under the Apache |
15 | | * License, Version 2.0 (the "License"); you may not use this file |
16 | | * except in compliance with the License. You may obtain a copy of |
17 | | * the License at http://www.apache.org/licenses/LICENSE-2.0 . |
18 | | */ |
19 | | |
20 | | #include <sal/config.h> |
21 | | |
22 | | #include <cassert> |
23 | | #include <cstddef> |
24 | | #include <string_view> |
25 | | #include <utility> |
26 | | #include <vector> |
27 | | #include <algorithm> |
28 | | |
29 | | #include <o3tl/safeint.hxx> |
30 | | #include <o3tl/string_view.hxx> |
31 | | #include <rtl/character.hxx> |
32 | | #include <rtl/ustring.hxx> |
33 | | #include <rtl/ustrbuf.hxx> |
34 | | #include <rtl/string.hxx> |
35 | | #include <rtl/strbuf.hxx> |
36 | | #include <sal/log.hxx> |
37 | | #include <sal/types.h> |
38 | | |
39 | | #include <comphelper/string.hxx> |
40 | | #include <comphelper/stl_types.hxx> |
41 | | #include <comphelper/sequence.hxx> |
42 | | |
43 | | #include <com/sun/star/i18n/BreakIterator.hpp> |
44 | | #include <com/sun/star/i18n/CharType.hpp> |
45 | | #include <com/sun/star/i18n/Collator.hpp> |
46 | | |
47 | | |
48 | | namespace comphelper::string { |
49 | | |
50 | | namespace |
51 | | { |
52 | | template <typename T, typename C> T tmpl_stripStart(const T &rIn, |
53 | | const C cRemove) |
54 | 0 | { |
55 | 0 | if (rIn.empty()) |
56 | 0 | return rIn; |
57 | | |
58 | 0 | typename T::size_type i = 0; |
59 | |
|
60 | 0 | while (i < rIn.size()) |
61 | 0 | { |
62 | 0 | if (rIn[i] != cRemove) |
63 | 0 | break; |
64 | 0 | ++i; |
65 | 0 | } |
66 | |
|
67 | 0 | return rIn.substr(i); |
68 | 0 | } Unexecuted instantiation: string.cxx:std::__1::basic_string_view<char, std::__1::char_traits<char> > comphelper::string::(anonymous namespace)::tmpl_stripStart<std::__1::basic_string_view<char, std::__1::char_traits<char> >, char>(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, char) Unexecuted instantiation: string.cxx:std::__1::basic_string_view<char16_t, std::__1::char_traits<char16_t> > comphelper::string::(anonymous namespace)::tmpl_stripStart<std::__1::basic_string_view<char16_t, std::__1::char_traits<char16_t> >, char16_t>(std::__1::basic_string_view<char16_t, std::__1::char_traits<char16_t> > const&, char16_t) |
69 | | template <typename T, typename C> T tmpl_stripStartString(const T &rIn, |
70 | | const C cRemove) |
71 | 127k | { |
72 | 127k | if (rIn.isEmpty()) |
73 | 1.01k | return rIn; |
74 | | |
75 | 126k | sal_Int32 i = 0; |
76 | | |
77 | 128k | while (i < rIn.getLength()) |
78 | 127k | { |
79 | 127k | if (rIn[i] != cRemove) |
80 | 124k | break; |
81 | 2.19k | ++i; |
82 | 2.19k | } |
83 | | |
84 | 126k | return rIn.copy(i); |
85 | 127k | } Unexecuted instantiation: string.cxx:rtl::OString comphelper::string::(anonymous namespace)::tmpl_stripStartString<rtl::OString, char>(rtl::OString const&, char) string.cxx:rtl::OUString comphelper::string::(anonymous namespace)::tmpl_stripStartString<rtl::OUString, char16_t>(rtl::OUString const&, char16_t) Line | Count | Source | 71 | 127k | { | 72 | 127k | if (rIn.isEmpty()) | 73 | 1.01k | return rIn; | 74 | | | 75 | 126k | sal_Int32 i = 0; | 76 | | | 77 | 128k | while (i < rIn.getLength()) | 78 | 127k | { | 79 | 127k | if (rIn[i] != cRemove) | 80 | 124k | break; | 81 | 2.19k | ++i; | 82 | 2.19k | } | 83 | | | 84 | 126k | return rIn.copy(i); | 85 | 127k | } |
|
86 | | } |
87 | | |
88 | | OString stripStart(const OString& rIn, char c) |
89 | 0 | { |
90 | 0 | return tmpl_stripStartString<OString, char>(rIn, c); |
91 | 0 | } |
92 | | |
93 | | std::string_view stripStart(std::string_view rIn, char c) |
94 | 0 | { |
95 | 0 | return tmpl_stripStart<std::string_view, char>(rIn, c); |
96 | 0 | } |
97 | | |
98 | | OUString stripStart(const OUString& rIn, sal_Unicode c) |
99 | 127k | { |
100 | 127k | return tmpl_stripStartString<OUString, sal_Unicode>(rIn, c); |
101 | 127k | } |
102 | | |
103 | | std::u16string_view stripStart(std::u16string_view rIn, sal_Unicode c) |
104 | 0 | { |
105 | 0 | return tmpl_stripStart<std::u16string_view, sal_Unicode>(rIn, c); |
106 | 0 | } |
107 | | |
108 | | namespace |
109 | | { |
110 | | template <typename T, typename C> T tmpl_stripEnd(const T &rIn, |
111 | | const C cRemove) |
112 | 2.84M | { |
113 | 2.84M | if (rIn.empty()) |
114 | 37 | return rIn; |
115 | | |
116 | 2.84M | typename T::size_type i = rIn.size(); |
117 | | |
118 | 14.2M | while (i > 0) |
119 | 14.2M | { |
120 | 14.2M | if (rIn[i-1] != cRemove) |
121 | 2.84M | break; |
122 | 11.3M | --i; |
123 | 11.3M | } |
124 | | |
125 | 2.84M | return rIn.substr(0, i); |
126 | 2.84M | } Unexecuted instantiation: string.cxx:std::__1::basic_string_view<char, std::__1::char_traits<char> > comphelper::string::(anonymous namespace)::tmpl_stripEnd<std::__1::basic_string_view<char, std::__1::char_traits<char> >, char>(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, char) string.cxx:std::__1::basic_string_view<char16_t, std::__1::char_traits<char16_t> > comphelper::string::(anonymous namespace)::tmpl_stripEnd<std::__1::basic_string_view<char16_t, std::__1::char_traits<char16_t> >, char16_t>(std::__1::basic_string_view<char16_t, std::__1::char_traits<char16_t> > const&, char16_t) Line | Count | Source | 112 | 2.84M | { | 113 | 2.84M | if (rIn.empty()) | 114 | 37 | return rIn; | 115 | | | 116 | 2.84M | typename T::size_type i = rIn.size(); | 117 | | | 118 | 14.2M | while (i > 0) | 119 | 14.2M | { | 120 | 14.2M | if (rIn[i-1] != cRemove) | 121 | 2.84M | break; | 122 | 11.3M | --i; | 123 | 11.3M | } | 124 | | | 125 | 2.84M | return rIn.substr(0, i); | 126 | 2.84M | } |
|
127 | | template <typename T, typename C> T tmpl_stripEndString(const T &rIn, |
128 | | const C cRemove) |
129 | 1.07M | { |
130 | 1.07M | if (rIn.isEmpty()) |
131 | 48.5k | return rIn; |
132 | | |
133 | 1.02M | sal_Int32 i = rIn.getLength(); |
134 | | |
135 | 1.51M | while (i > 0) |
136 | 1.51M | { |
137 | 1.51M | if (rIn[i-1] != cRemove) |
138 | 1.01M | break; |
139 | 491k | --i; |
140 | 491k | } |
141 | | |
142 | 1.02M | return rIn.copy(0, i); |
143 | 1.07M | } string.cxx:rtl::OString comphelper::string::(anonymous namespace)::tmpl_stripEndString<rtl::OString, char>(rtl::OString const&, char) Line | Count | Source | 129 | 993k | { | 130 | 993k | if (rIn.isEmpty()) | 131 | 299 | return rIn; | 132 | | | 133 | 993k | sal_Int32 i = rIn.getLength(); | 134 | | | 135 | 993k | while (i > 0) | 136 | 993k | { | 137 | 993k | if (rIn[i-1] != cRemove) | 138 | 993k | break; | 139 | 727 | --i; | 140 | 727 | } | 141 | | | 142 | 993k | return rIn.copy(0, i); | 143 | 993k | } |
string.cxx:rtl::OUString comphelper::string::(anonymous namespace)::tmpl_stripEndString<rtl::OUString, char16_t>(rtl::OUString const&, char16_t) Line | Count | Source | 129 | 77.2k | { | 130 | 77.2k | if (rIn.isEmpty()) | 131 | 48.2k | return rIn; | 132 | | | 133 | 29.0k | sal_Int32 i = rIn.getLength(); | 134 | | | 135 | 520k | while (i > 0) | 136 | 516k | { | 137 | 516k | if (rIn[i-1] != cRemove) | 138 | 25.1k | break; | 139 | 491k | --i; | 140 | 491k | } | 141 | | | 142 | 29.0k | return rIn.copy(0, i); | 143 | 77.2k | } |
|
144 | | } |
145 | | |
146 | | OString stripEnd(const OString& rIn, char c) |
147 | 993k | { |
148 | 993k | return tmpl_stripEndString<OString, char>(rIn, c); |
149 | 993k | } |
150 | | |
151 | | std::string_view stripEnd(std::string_view rIn, char c) |
152 | 0 | { |
153 | 0 | return tmpl_stripEnd<std::string_view, char>(rIn, c); |
154 | 0 | } |
155 | | |
156 | | OUString stripEnd(const OUString& rIn, sal_Unicode c) |
157 | 77.2k | { |
158 | 77.2k | return tmpl_stripEndString<OUString, sal_Unicode>(rIn, c); |
159 | 77.2k | } |
160 | | |
161 | | std::u16string_view stripEnd(std::u16string_view rIn, sal_Unicode c) |
162 | 2.84M | { |
163 | 2.84M | return tmpl_stripEnd<std::u16string_view, sal_Unicode>(rIn, c); |
164 | 2.84M | } |
165 | | |
166 | | namespace |
167 | | { |
168 | | template <typename T, typename C> T tmpl_strip(const T &rIn, |
169 | | const C cRemove) |
170 | 8.04k | { |
171 | 8.04k | if (rIn.empty()) |
172 | 1.33k | return rIn; |
173 | | |
174 | 6.70k | typename T::size_type end = rIn.size(); |
175 | 14.4k | while (end > 0) |
176 | 13.5k | { |
177 | 13.5k | if (rIn[end-1] != cRemove) |
178 | 5.83k | break; |
179 | 7.75k | --end; |
180 | 7.75k | } |
181 | | |
182 | 6.70k | typename T::size_type start = 0; |
183 | 9.80k | while (start < end) |
184 | 8.93k | { |
185 | 8.93k | if (rIn[start] != cRemove) |
186 | 5.83k | break; |
187 | 3.09k | ++start; |
188 | 3.09k | } |
189 | | |
190 | 6.70k | return rIn.substr(start, end - start); |
191 | 8.04k | } Unexecuted instantiation: string.cxx:std::__1::basic_string_view<char, std::__1::char_traits<char> > comphelper::string::(anonymous namespace)::tmpl_strip<std::__1::basic_string_view<char, std::__1::char_traits<char> >, char>(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, char) string.cxx:std::__1::basic_string_view<char16_t, std::__1::char_traits<char16_t> > comphelper::string::(anonymous namespace)::tmpl_strip<std::__1::basic_string_view<char16_t, std::__1::char_traits<char16_t> >, char16_t>(std::__1::basic_string_view<char16_t, std::__1::char_traits<char16_t> > const&, char16_t) Line | Count | Source | 170 | 8.04k | { | 171 | 8.04k | if (rIn.empty()) | 172 | 1.33k | return rIn; | 173 | | | 174 | 6.70k | typename T::size_type end = rIn.size(); | 175 | 14.4k | while (end > 0) | 176 | 13.5k | { | 177 | 13.5k | if (rIn[end-1] != cRemove) | 178 | 5.83k | break; | 179 | 7.75k | --end; | 180 | 7.75k | } | 181 | | | 182 | 6.70k | typename T::size_type start = 0; | 183 | 9.80k | while (start < end) | 184 | 8.93k | { | 185 | 8.93k | if (rIn[start] != cRemove) | 186 | 5.83k | break; | 187 | 3.09k | ++start; | 188 | 3.09k | } | 189 | | | 190 | 6.70k | return rIn.substr(start, end - start); | 191 | 8.04k | } |
|
192 | | template <typename T, typename C> T tmpl_stripString(const T &rIn, |
193 | | const C cRemove) |
194 | 3.82M | { |
195 | 3.82M | if (rIn.isEmpty()) |
196 | 1.70k | return rIn; |
197 | | |
198 | 3.82M | sal_Int32 end = rIn.getLength(); |
199 | 3.96M | while (end > 0) |
200 | 3.95M | { |
201 | 3.95M | if (rIn[end-1] != cRemove) |
202 | 3.81M | break; |
203 | 142k | --end; |
204 | 142k | } |
205 | 3.82M | sal_Int32 start = 0; |
206 | 3.90M | while (start < end) |
207 | 3.90M | { |
208 | 3.90M | if (rIn[start] != cRemove) |
209 | 3.81M | break; |
210 | 88.0k | ++start; |
211 | 88.0k | } |
212 | | |
213 | 3.82M | return rIn.copy(start, end - start); |
214 | 3.82M | } Unexecuted instantiation: string.cxx:rtl::OString comphelper::string::(anonymous namespace)::tmpl_stripString<rtl::OString, char>(rtl::OString const&, char) string.cxx:rtl::OUString comphelper::string::(anonymous namespace)::tmpl_stripString<rtl::OUString, char16_t>(rtl::OUString const&, char16_t) Line | Count | Source | 194 | 3.82M | { | 195 | 3.82M | if (rIn.isEmpty()) | 196 | 1.70k | return rIn; | 197 | | | 198 | 3.82M | sal_Int32 end = rIn.getLength(); | 199 | 3.96M | while (end > 0) | 200 | 3.95M | { | 201 | 3.95M | if (rIn[end-1] != cRemove) | 202 | 3.81M | break; | 203 | 142k | --end; | 204 | 142k | } | 205 | 3.82M | sal_Int32 start = 0; | 206 | 3.90M | while (start < end) | 207 | 3.90M | { | 208 | 3.90M | if (rIn[start] != cRemove) | 209 | 3.81M | break; | 210 | 88.0k | ++start; | 211 | 88.0k | } | 212 | | | 213 | 3.82M | return rIn.copy(start, end - start); | 214 | 3.82M | } |
|
215 | | } |
216 | | |
217 | | OString strip(const OString& rIn, char c) |
218 | 0 | { |
219 | 0 | return tmpl_stripString<OString, char>(rIn, c); |
220 | 0 | } |
221 | | |
222 | | std::string_view strip(std::string_view rIn, char c) |
223 | 0 | { |
224 | 0 | return tmpl_strip<std::string_view, char>(rIn, c); |
225 | 0 | } |
226 | | |
227 | | OUString strip(const OUString& rIn, sal_Unicode c) |
228 | 3.82M | { |
229 | 3.82M | return tmpl_stripString<OUString, sal_Unicode>(rIn, c); |
230 | 3.82M | } |
231 | | |
232 | | std::u16string_view strip(std::u16string_view rIn, sal_Unicode c) |
233 | 8.04k | { |
234 | 8.04k | return tmpl_strip<std::u16string_view, sal_Unicode>(rIn, c); |
235 | 8.04k | } |
236 | | |
237 | | namespace |
238 | | { |
239 | | template <typename T, typename C> sal_Int32 tmpl_getTokenCount( T rIn, |
240 | | C cTok) |
241 | 400k | { |
242 | | // Empty String: TokenCount by Definition is 0 |
243 | 400k | if (rIn.empty()) |
244 | 181k | return 0; |
245 | | |
246 | 219k | sal_Int32 nTokCount = 1; |
247 | 7.04M | for (typename T::size_type i = 0; i < rIn.size(); ++i) |
248 | 6.82M | { |
249 | 6.82M | if (rIn[i] == cTok) |
250 | 522k | ++nTokCount; |
251 | 6.82M | } |
252 | 219k | return nTokCount; |
253 | 400k | } Unexecuted instantiation: string.cxx:int comphelper::string::(anonymous namespace)::tmpl_getTokenCount<std::__1::basic_string_view<char, std::__1::char_traits<char> >, char>(std::__1::basic_string_view<char, std::__1::char_traits<char> >, char) string.cxx:int comphelper::string::(anonymous namespace)::tmpl_getTokenCount<std::__1::basic_string_view<char16_t, std::__1::char_traits<char16_t> >, char16_t>(std::__1::basic_string_view<char16_t, std::__1::char_traits<char16_t> >, char16_t) Line | Count | Source | 241 | 400k | { | 242 | | // Empty String: TokenCount by Definition is 0 | 243 | 400k | if (rIn.empty()) | 244 | 181k | return 0; | 245 | | | 246 | 219k | sal_Int32 nTokCount = 1; | 247 | 7.04M | for (typename T::size_type i = 0; i < rIn.size(); ++i) | 248 | 6.82M | { | 249 | 6.82M | if (rIn[i] == cTok) | 250 | 522k | ++nTokCount; | 251 | 6.82M | } | 252 | 219k | return nTokCount; | 253 | 400k | } |
|
254 | | } |
255 | | |
256 | | sal_Int32 getTokenCount(std::string_view rIn, char cTok) |
257 | 0 | { |
258 | 0 | return tmpl_getTokenCount<std::string_view, char>(rIn, cTok); |
259 | 0 | } |
260 | | |
261 | | sal_Int32 getTokenCount(std::u16string_view rIn, sal_Unicode cTok) |
262 | 400k | { |
263 | 400k | return tmpl_getTokenCount<std::u16string_view, sal_Unicode>(rIn, cTok); |
264 | 400k | } |
265 | | |
266 | | sal_uInt32 decimalStringToNumber(std::u16string_view str) |
267 | 0 | { |
268 | 0 | sal_uInt32 result = 0; |
269 | 0 | for( std::size_t i = 0; i < str.size(); ) |
270 | 0 | { |
271 | 0 | sal_uInt32 c = o3tl::iterateCodePoints(str, &i); |
272 | 0 | sal_uInt32 value = 0; |
273 | 0 | if( c <= 0x0039) // ASCII decimal digits, most common |
274 | 0 | value = c - 0x0030; |
275 | 0 | else if( c >= 0x1D7F6 ) // mathematical monospace digits |
276 | 0 | value = c - 0x1D7F6; |
277 | 0 | else if( c >= 0x1D7EC ) // mathematical sans-serif bold digits |
278 | 0 | value = c - 0x1D7EC; |
279 | 0 | else if( c >= 0x1D7E2 ) // mathematical sans-serif digits |
280 | 0 | value = c - 0x1D7E2; |
281 | 0 | else if( c >= 0x1D7D8 ) // mathematical double-struck digits |
282 | 0 | value = c - 0x1D7D8; |
283 | 0 | else if( c >= 0x1D7CE ) // mathematical bold digits |
284 | 0 | value = c - 0x1D7CE; |
285 | 0 | else if( c >= 0x11066 ) // brahmi digits |
286 | 0 | value = c - 0x11066; |
287 | 0 | else if( c >= 0x104A0 ) // osmanya digits |
288 | 0 | value = c - 0x104A0; |
289 | 0 | else if( c >= 0xFF10 ) // fullwidth digits |
290 | 0 | value = c - 0xFF10; |
291 | 0 | else if( c >= 0xABF0 ) // meetei mayek digits |
292 | 0 | value = c - 0xABF0; |
293 | 0 | else if( c >= 0xAA50 ) // cham digits |
294 | 0 | value = c - 0xAA50; |
295 | 0 | else if( c >= 0xA9D0 ) // javanese digits |
296 | 0 | value = c - 0xA9D0; |
297 | 0 | else if( c >= 0xA900 ) // kayah li digits |
298 | 0 | value = c - 0xA900; |
299 | 0 | else if( c >= 0xA8D0 ) // saurashtra digits |
300 | 0 | value = c - 0xA8D0; |
301 | 0 | else if( c >= 0xA620 ) // vai digits |
302 | 0 | value = c - 0xA620; |
303 | 0 | else if( c >= 0x1C50 ) // ol chiki digits |
304 | 0 | value = c - 0x1C50; |
305 | 0 | else if( c >= 0x1C40 ) // lepcha digits |
306 | 0 | value = c - 0x1C40; |
307 | 0 | else if( c >= 0x1BB0 ) // sundanese digits |
308 | 0 | value = c - 0x1BB0; |
309 | 0 | else if( c >= 0x1B50 ) // balinese digits |
310 | 0 | value = c - 0x1B50; |
311 | 0 | else if( c >= 0x1A90 ) // tai tham tham digits |
312 | 0 | value = c - 0x1A90; |
313 | 0 | else if( c >= 0x1A80 ) // tai tham hora digits |
314 | 0 | value = c - 0x1A80; |
315 | 0 | else if( c >= 0x19D0 ) // new tai lue digits |
316 | 0 | value = c - 0x19D0; |
317 | 0 | else if( c >= 0x1946 ) // limbu digits |
318 | 0 | value = c - 0x1946; |
319 | 0 | else if( c >= 0x1810 ) // mongolian digits |
320 | 0 | value = c - 0x1810; |
321 | 0 | else if( c >= 0x17E0 ) // khmer digits |
322 | 0 | value = c - 0x17E0; |
323 | 0 | else if( c >= 0x1090 ) // myanmar shan digits |
324 | 0 | value = c - 0x1090; |
325 | 0 | else if( c >= 0x1040 ) // myanmar digits |
326 | 0 | value = c - 0x1040; |
327 | 0 | else if( c >= 0x0F20 ) // tibetan digits |
328 | 0 | value = c - 0x0F20; |
329 | 0 | else if( c >= 0x0ED0 ) // lao digits |
330 | 0 | value = c - 0x0ED0; |
331 | 0 | else if( c >= 0x0E50 ) // thai digits |
332 | 0 | value = c - 0x0E50; |
333 | 0 | else if( c >= 0x0D66 ) // malayalam digits |
334 | 0 | value = c - 0x0D66; |
335 | 0 | else if( c >= 0x0CE6 ) // kannada digits |
336 | 0 | value = c - 0x0CE6; |
337 | 0 | else if( c >= 0x0C66 ) // telugu digits |
338 | 0 | value = c - 0x0C66; |
339 | 0 | else if( c >= 0x0BE6 ) // tamil digits |
340 | 0 | value = c - 0x0BE6; |
341 | 0 | else if( c >= 0x0B66 ) // odia digits |
342 | 0 | value = c - 0x0B66; |
343 | 0 | else if( c >= 0x0AE6 ) // gujarati digits |
344 | 0 | value = c - 0x0AE6; |
345 | 0 | else if( c >= 0x0A66 ) // gurmukhi digits |
346 | 0 | value = c - 0x0A66; |
347 | 0 | else if( c >= 0x09E6 ) // bengali digits |
348 | 0 | value = c - 0x09E6; |
349 | 0 | else if( c >= 0x0966 ) // devanagari digit |
350 | 0 | value = c - 0x0966; |
351 | 0 | else if( c >= 0x07C0 ) // nko digits |
352 | 0 | value = c - 0x07C0; |
353 | 0 | else if( c >= 0x06F0 ) // extended arabic-indic digits |
354 | 0 | value = c - 0x06F0; |
355 | 0 | else if( c >= 0x0660 ) // arabic-indic digits |
356 | 0 | value = c - 0x0660; |
357 | 0 | result = result * 10 + value; |
358 | 0 | } |
359 | 0 | return result; |
360 | 0 | } |
361 | | |
362 | | using namespace ::com::sun::star; |
363 | | |
364 | | // convert between sequence of string and comma separated string |
365 | | |
366 | | OUString convertCommaSeparated( |
367 | | uno::Sequence< OUString > const& i_rSeq) |
368 | 4.71k | { |
369 | 4.71k | OUStringBuffer buf; |
370 | 4.71k | ::comphelper::intersperse( |
371 | 4.71k | i_rSeq.begin(), i_rSeq.end(), ::comphelper::OUStringBufferAppender(buf), u", "_ustr); |
372 | 4.71k | return buf.makeStringAndClear(); |
373 | 4.71k | } |
374 | | |
375 | | std::vector<OUString> |
376 | | split(std::u16string_view rStr, sal_Unicode cSeparator) |
377 | 166k | { |
378 | 166k | std::vector< OUString > vec; |
379 | 166k | std::size_t idx = 0; |
380 | 166k | do |
381 | 185k | { |
382 | 185k | std::u16string_view kw = o3tl::getToken(rStr, cSeparator, idx); |
383 | 185k | kw = o3tl::trim(kw); |
384 | 185k | if (!kw.empty()) |
385 | 182k | { |
386 | 182k | vec.push_back(OUString(kw)); |
387 | 182k | } |
388 | | |
389 | 185k | } while (idx != std::u16string_view::npos); |
390 | | |
391 | 166k | return vec; |
392 | 166k | } |
393 | | |
394 | | uno::Sequence< OUString > |
395 | | convertCommaSeparated( std::u16string_view i_rString ) |
396 | 5.41k | { |
397 | 5.41k | std::vector< OUString > vec = split(i_rString, ','); |
398 | 5.41k | return comphelper::containerToSequence(vec); |
399 | 5.41k | } |
400 | | |
401 | | OString join(std::string_view rSeparator, const std::vector<OString>& rSequence) |
402 | 0 | { |
403 | 0 | OStringBuffer aBuffer; |
404 | 0 | for (size_t i = 0; i < rSequence.size(); ++i) |
405 | 0 | { |
406 | 0 | if (i != 0) |
407 | 0 | aBuffer.append(rSeparator); |
408 | 0 | aBuffer.append(rSequence[i]); |
409 | 0 | } |
410 | 0 | return aBuffer.makeStringAndClear(); |
411 | 0 | } |
412 | | |
413 | | sal_Int32 compareNatural( const OUString & rLHS, const OUString & rRHS, |
414 | | const uno::Reference< i18n::XCollator > &rCollator, |
415 | | const uno::Reference< i18n::XBreakIterator > &rBI, |
416 | | const lang::Locale &rLocale ) |
417 | 0 | { |
418 | 0 | sal_Int32 nRet = 0; |
419 | |
|
420 | 0 | sal_Int32 nLHSLastNonDigitPos = 0; |
421 | 0 | sal_Int32 nRHSLastNonDigitPos = 0; |
422 | 0 | sal_Int32 nLHSFirstDigitPos = 0; |
423 | 0 | sal_Int32 nRHSFirstDigitPos = 0; |
424 | | |
425 | | // Check if the string starts with a digit |
426 | 0 | sal_Int32 nStartsDigitLHS = rBI->endOfCharBlock(rLHS, nLHSFirstDigitPos, rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER); |
427 | 0 | sal_Int32 nStartsDigitRHS = rBI->endOfCharBlock(rRHS, nRHSFirstDigitPos, rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER); |
428 | |
|
429 | 0 | if (nStartsDigitLHS > 0 && nStartsDigitRHS > 0) |
430 | 0 | { |
431 | 0 | sal_uInt32 nLHS = comphelper::string::decimalStringToNumber(rLHS.subView(0, nStartsDigitLHS)); |
432 | 0 | sal_uInt32 nRHS = comphelper::string::decimalStringToNumber(rRHS.subView(0, nStartsDigitRHS)); |
433 | |
|
434 | 0 | if (nLHS != nRHS) |
435 | 0 | return nLHS < nRHS ? -1 : 1; |
436 | 0 | nLHSLastNonDigitPos = nStartsDigitLHS; |
437 | 0 | nRHSLastNonDigitPos = nStartsDigitRHS; |
438 | 0 | } |
439 | 0 | else if (nStartsDigitLHS > 0) |
440 | 0 | return -1; |
441 | 0 | else if (nStartsDigitRHS > 0) |
442 | 0 | return 1; |
443 | | |
444 | 0 | while (nLHSFirstDigitPos < rLHS.getLength() || nRHSFirstDigitPos < rRHS.getLength()) |
445 | 0 | { |
446 | 0 | sal_Int32 nLHSChunkLen; |
447 | 0 | sal_Int32 nRHSChunkLen; |
448 | | |
449 | | //Compare non digit block as normal strings |
450 | 0 | nLHSFirstDigitPos = rBI->nextCharBlock(rLHS, nLHSLastNonDigitPos, rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER); |
451 | 0 | nRHSFirstDigitPos = rBI->nextCharBlock(rRHS, nRHSLastNonDigitPos, rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER); |
452 | |
|
453 | 0 | if (nLHSFirstDigitPos == -1) |
454 | 0 | nLHSFirstDigitPos = rLHS.getLength(); |
455 | |
|
456 | 0 | if (nRHSFirstDigitPos == -1) |
457 | 0 | nRHSFirstDigitPos = rRHS.getLength(); |
458 | |
|
459 | 0 | nLHSChunkLen = nLHSFirstDigitPos - nLHSLastNonDigitPos; |
460 | 0 | nRHSChunkLen = nRHSFirstDigitPos - nRHSLastNonDigitPos; |
461 | |
|
462 | 0 | nRet = rCollator->compareSubstring(rLHS, nLHSLastNonDigitPos, nLHSChunkLen, rRHS, nRHSLastNonDigitPos, nRHSChunkLen); |
463 | 0 | if (nRet != 0) |
464 | 0 | break; |
465 | | |
466 | | //Compare digit block as one number vs another |
467 | 0 | nLHSLastNonDigitPos = rBI->endOfCharBlock(rLHS, nLHSFirstDigitPos, rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER); |
468 | 0 | nRHSLastNonDigitPos = rBI->endOfCharBlock(rRHS, nRHSFirstDigitPos, rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER); |
469 | 0 | if (nLHSLastNonDigitPos == -1) |
470 | 0 | nLHSLastNonDigitPos = rLHS.getLength(); |
471 | 0 | if (nRHSLastNonDigitPos == -1) |
472 | 0 | nRHSLastNonDigitPos = rRHS.getLength(); |
473 | 0 | nLHSChunkLen = nLHSLastNonDigitPos - nLHSFirstDigitPos; |
474 | 0 | nRHSChunkLen = nRHSLastNonDigitPos - nRHSFirstDigitPos; |
475 | | |
476 | | //To-Do: Possibly scale down those unicode codepoints that relate to |
477 | | //numbers outside of the normal 0-9 range, e.g. see LocalizeDigitsInString in |
478 | | //vcl |
479 | |
|
480 | 0 | sal_uInt32 nLHS = comphelper::string::decimalStringToNumber(rLHS.subView(nLHSFirstDigitPos, nLHSChunkLen)); |
481 | 0 | sal_uInt32 nRHS = comphelper::string::decimalStringToNumber(rRHS.subView(nRHSFirstDigitPos, nRHSChunkLen)); |
482 | |
|
483 | 0 | if (nLHS != nRHS) |
484 | 0 | { |
485 | 0 | nRet = (nLHS < nRHS) ? -1 : 1; |
486 | 0 | break; |
487 | 0 | } |
488 | 0 | } |
489 | |
|
490 | 0 | return nRet; |
491 | 0 | } |
492 | | |
493 | | NaturalStringSorter::NaturalStringSorter( |
494 | | const uno::Reference< uno::XComponentContext > &rContext, |
495 | 0 | lang::Locale aLocale) : m_aLocale(std::move(aLocale)) |
496 | 0 | { |
497 | 0 | m_xCollator = i18n::Collator::create( rContext ); |
498 | 0 | m_xCollator->loadDefaultCollator(m_aLocale, 0); |
499 | 0 | m_xBI = i18n::BreakIterator::create( rContext ); |
500 | 0 | } |
501 | | |
502 | | bool isdigitAsciiString(std::string_view rString) |
503 | 663 | { |
504 | 663 | return std::all_of( |
505 | 663 | rString.data(), rString.data() + rString.size(), |
506 | 1.26k | [](unsigned char c){ return rtl::isAsciiDigit(c); }); |
507 | 663 | } |
508 | | |
509 | | bool isdigitAsciiString(std::u16string_view rString) |
510 | 0 | { |
511 | 0 | return std::all_of( |
512 | 0 | rString.data(), rString.data() + rString.size(), |
513 | 0 | [](sal_Unicode c){ return rtl::isAsciiDigit(c); }); |
514 | 0 | } |
515 | | |
516 | | bool isValidAsciiFilename(std::u16string_view rString) |
517 | 0 | { |
518 | 0 | if (rString.empty() || rString[0] == ' ' || rString[rString.size() - 1] == ' ') |
519 | 0 | return false; |
520 | | |
521 | 0 | bool bRet = std::all_of( |
522 | 0 | rString.data(), rString.data() + rString.size(), |
523 | 0 | [](sal_Unicode c) |
524 | 0 | { |
525 | 0 | if (!rtl::isAscii(c)) |
526 | 0 | return false; |
527 | 0 | switch (c) |
528 | 0 | { |
529 | 0 | case '<': |
530 | 0 | case '>': |
531 | 0 | case ':': |
532 | 0 | case '"': |
533 | 0 | case '\\': |
534 | 0 | case '/': |
535 | 0 | case '?': |
536 | 0 | case '%': |
537 | 0 | case '*': |
538 | 0 | case '|': |
539 | 0 | return false; |
540 | 0 | default: |
541 | 0 | return true; |
542 | 0 | } |
543 | 0 | }); |
544 | 0 | return bRet; |
545 | 0 | } |
546 | | |
547 | | OUString reverseString(std::u16string_view rStr) |
548 | 0 | { |
549 | 0 | if (rStr.empty()) |
550 | 0 | return OUString(); |
551 | | |
552 | 0 | std::size_t i = rStr.size(); |
553 | 0 | OUStringBuffer sBuf(static_cast<sal_Int32>(i)); |
554 | 0 | while (i) |
555 | 0 | sBuf.append(rStr[--i]); |
556 | 0 | return sBuf.makeStringAndClear(); |
557 | 0 | } |
558 | | |
559 | 0 | OUString reverseCodePoints(std::u16string_view str) { |
560 | 0 | auto const len = str.size(); |
561 | 0 | OUStringBuffer buf(len); |
562 | 0 | for (sal_Int32 i = len; i != 0;) { |
563 | 0 | buf.appendUtf32(o3tl::iterateCodePoints(str, &i, -1)); |
564 | 0 | } |
565 | 0 | return buf.makeStringAndClear(); |
566 | 0 | } |
567 | | |
568 | | sal_Int32 indexOfAny(std::u16string_view rIn, |
569 | | sal_Unicode const*const pChars, sal_Int32 const nPos) |
570 | 6.74k | { |
571 | 866k | for (std::u16string_view::size_type i = nPos; i < rIn.size(); ++i) |
572 | 864k | { |
573 | 864k | sal_Unicode const c = rIn[i]; |
574 | 2.58M | for (sal_Unicode const* pChar = pChars; *pChar; ++pChar) |
575 | 1.72M | { |
576 | 1.72M | if (c == *pChar) |
577 | 4.12k | { |
578 | 4.12k | return i; |
579 | 4.12k | } |
580 | 1.72M | } |
581 | 864k | } |
582 | 2.61k | return -1; |
583 | 6.74k | } |
584 | | |
585 | | OUString removeAny(std::u16string_view rIn, |
586 | | sal_Unicode const*const pChars) |
587 | 35.4k | { |
588 | 35.4k | OUStringBuffer buf; |
589 | 35.4k | bool isFound(false); |
590 | 9.12M | for (std::u16string_view::size_type i = 0; i < rIn.size(); ++i) |
591 | 9.08M | { |
592 | 9.08M | sal_Unicode const c = rIn[i]; |
593 | 9.08M | bool removeC(false); |
594 | 72.7M | for (sal_Unicode const* pChar = pChars; *pChar; ++pChar) |
595 | 63.6M | { |
596 | 63.6M | if (c == *pChar) |
597 | 81 | { |
598 | 81 | removeC = true; |
599 | 81 | break; |
600 | 81 | } |
601 | 63.6M | } |
602 | 9.08M | if (removeC) |
603 | 81 | { |
604 | 81 | if (!isFound) |
605 | 71 | { |
606 | 71 | if (i > 0) |
607 | 4 | { |
608 | 4 | buf.append(rIn.substr(0, i)); |
609 | 4 | } |
610 | 71 | isFound = true; |
611 | 71 | } |
612 | 81 | } |
613 | 9.08M | else if (isFound) |
614 | 84 | { |
615 | 84 | buf.append(c); |
616 | 84 | } |
617 | 9.08M | } |
618 | 35.4k | return isFound ? buf.makeStringAndClear() : OUString(rIn); |
619 | 35.4k | } |
620 | | |
621 | | OUString setToken(const OUString& rIn, sal_Int32 nToken, sal_Unicode cTok, |
622 | | std::u16string_view rNewToken) |
623 | 3.02k | { |
624 | 3.02k | sal_Int32 nLen = rIn.getLength(); |
625 | 3.02k | sal_Int32 nTok = 0; |
626 | 3.02k | sal_Int32 nFirstChar = 0; |
627 | 3.02k | sal_Int32 i = 0; |
628 | | |
629 | | // Determine token position and length |
630 | 167k | while ( i < nLen ) |
631 | 167k | { |
632 | | // Increase token count if match |
633 | 167k | if (rIn[i] == cTok) |
634 | 51.3k | { |
635 | 51.3k | ++nTok; |
636 | | |
637 | 51.3k | if (nTok == nToken) |
638 | 2.85k | nFirstChar = i+1; |
639 | 48.5k | else if (nTok > nToken) |
640 | 2.94k | break; |
641 | 51.3k | } |
642 | | |
643 | 164k | ++i; |
644 | 164k | } |
645 | | |
646 | 3.02k | if (nTok >= nToken) |
647 | 3.02k | return rIn.replaceAt(nFirstChar, i-nFirstChar, rNewToken); |
648 | 0 | return rIn; |
649 | 3.02k | } |
650 | | |
651 | | /** Similar to OUString::replaceAt, but for an OUStringBuffer. |
652 | | |
653 | | Replace n = count characters |
654 | | from position index in this string with newStr. |
655 | | */ |
656 | | void replaceAt(OUStringBuffer& rIn, sal_Int32 nIndex, sal_Int32 nCount, std::u16string_view newStr ) |
657 | 0 | { |
658 | 0 | assert(nIndex >= 0 && nIndex <= rIn.getLength()); |
659 | 0 | assert(nCount >= 0); |
660 | 0 | assert(nCount <= rIn.getLength() - nIndex); |
661 | | |
662 | | /* Append? */ |
663 | 0 | const sal_Int32 nOldLength = rIn.getLength(); |
664 | 0 | if ( nIndex == nOldLength ) |
665 | 0 | { |
666 | 0 | rIn.append(newStr); |
667 | 0 | return; |
668 | 0 | } |
669 | | |
670 | 0 | sal_Int32 nNewLength = nOldLength + newStr.size() - nCount; |
671 | 0 | if (newStr.size() > o3tl::make_unsigned(nCount)) |
672 | 0 | rIn.ensureCapacity(nOldLength + newStr.size() - nCount); |
673 | |
|
674 | 0 | sal_Unicode* pStr = const_cast<sal_Unicode*>(rIn.getStr()); |
675 | 0 | memmove(pStr + nIndex + newStr.size(), pStr + nIndex + nCount, nOldLength - nIndex + nCount); |
676 | 0 | memcpy(pStr + nIndex, newStr.data(), newStr.size()); |
677 | |
|
678 | 0 | rIn.setLength(nNewLength); |
679 | 0 | } |
680 | | |
681 | | OUString sanitizeStringSurrogates(const OUString& rString) |
682 | 1.23M | { |
683 | 1.23M | sal_Int32 i=0; |
684 | 2.38M | while (i < rString.getLength()) |
685 | 1.14M | { |
686 | 1.14M | sal_Unicode c = rString[i]; |
687 | 1.14M | if (rtl::isHighSurrogate(c)) |
688 | 224 | { |
689 | 224 | if (i+1 == rString.getLength() |
690 | 223 | || !rtl::isLowSurrogate(rString[i+1])) |
691 | 221 | { |
692 | 221 | SAL_WARN("comphelper", "Surrogate error: high without low"); |
693 | 221 | return rString.copy(0, i); |
694 | 221 | } |
695 | 3 | ++i; //skip correct low |
696 | 3 | } |
697 | 1.14M | if (rtl::isLowSurrogate(c)) //bare low without preceding high |
698 | 174 | { |
699 | 174 | SAL_WARN("comphelper", "Surrogate error: low without high"); |
700 | 174 | return rString.copy(0, i); |
701 | 174 | } |
702 | 1.14M | ++i; |
703 | 1.14M | } |
704 | 1.23M | return rString; |
705 | 1.23M | } |
706 | | |
707 | | } |
708 | | |
709 | | /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |