Coverage Report

Created: 2026-02-14 09:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libreoffice/include/rtl/character.hxx
Line
Count
Source
1
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
/*
3
 * This file is part of the LibreOffice project.
4
 *
5
 * This Source Code Form is subject to the terms of the Mozilla Public
6
 * License, v. 2.0. If a copy of the MPL was not distributed with this
7
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
 *
9
 * This file incorporates work covered by the following license notice:
10
 *
11
 *   Licensed to the Apache Software Foundation (ASF) under one or more
12
 *   contributor license agreements. See the NOTICE file distributed
13
 *   with this work for additional information regarding copyright
14
 *   ownership. The ASF licenses this file to you under the Apache
15
 *   License, Version 2.0 (the "License"); you may not use this file
16
 *   except in compliance with the License. You may obtain a copy of
17
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18
 */
19
20
/*
21
 * This file is part of LibreOffice published API.
22
 */
23
24
#ifndef INCLUDED_RTL_CHARACTER_HXX
25
#define INCLUDED_RTL_CHARACTER_HXX
26
27
#include "sal/config.h"
28
29
#include <cassert>
30
#include <cstddef>
31
32
#include "sal/types.h"
33
34
#if defined LIBO_INTERNAL_ONLY
35
#include <type_traits>
36
#endif
37
38
namespace rtl
39
{
40
/** Check for Unicode code point.
41
42
    @param code  An integer.
43
44
    @return  True if code is a Unicode code point.
45
46
    @since LibreOffice 5.2
47
*/
48
99.6M
inline SAL_CONSTEXPR bool isUnicodeCodePoint(sal_uInt32 code) { return code <= 0x10FFFF; }
49
50
/** Check for ASCII character.
51
52
    @param code  A Unicode code point.
53
54
    @return  True if code is an ASCII character (0x00--0x7F).
55
56
    @since LibreOffice 4.1
57
 */
58
inline SAL_CONSTEXPR bool isAscii(sal_uInt32 code)
59
55.9M
{
60
55.9M
    assert(isUnicodeCodePoint(code));
61
55.9M
    return code <= 0x7F;
62
55.9M
}
63
64
#if defined LIBO_INTERNAL_ONLY
65
bool isAscii(char) = delete;
66
bool isAscii(signed char) = delete;
67
template <typename T>
68
inline constexpr std::enable_if_t<std::is_integral_v<T> && sizeof(T) <= sizeof(sal_uInt32), bool>
69
isAscii(T code)
70
22.1k
{
71
22.1k
    return isAscii(sal_uInt32(code));
72
22.1k
}
_ZN3rtl7isAsciiIDsEENSt3__19enable_ifIXaasr3stdE13is_integral_vIT_ElestS3_Lm4EEbE4typeES3_
Line
Count
Source
70
22.1k
{
71
22.1k
    return isAscii(sal_uInt32(code));
72
22.1k
}
Unexecuted instantiation: _ZN3rtl7isAsciiIhEENSt3__19enable_ifIXaasr3stdE13is_integral_vIT_ElestS3_Lm4EEbE4typeES3_
73
#endif
74
75
/** Check for ASCII lower case character.
76
77
    @param code  A Unicode code point.
78
79
    @return  True if code is an ASCII lower case alphabetic character (ASCII
80
    'a'--'z').
81
82
    @since LibreOffice 4.1
83
 */
84
inline SAL_CONSTEXPR bool isAsciiLowerCase(sal_uInt32 code)
85
271M
{
86
271M
    assert(isUnicodeCodePoint(code));
87
271M
    return code >= 'a' && code <= 'z';
88
271M
}
89
90
#if defined LIBO_INTERNAL_ONLY
91
bool isAsciiLowerCase(char) = delete;
92
bool isAsciiLowerCase(signed char) = delete;
93
template <typename T>
94
inline constexpr std::enable_if_t<std::is_integral_v<T> && sizeof(T) <= sizeof(sal_uInt32), bool>
95
isAsciiLowerCase(T code)
96
963k
{
97
963k
    return isAsciiLowerCase(sal_uInt32(code));
98
963k
}
Unexecuted instantiation: _ZN3rtl16isAsciiLowerCaseIDsEENSt3__19enable_ifIXaasr3stdE13is_integral_vIT_ElestS3_Lm4EEbE4typeES3_
Unexecuted instantiation: _ZN3rtl16isAsciiLowerCaseIjEENSt3__19enable_ifIXaasr3stdE13is_integral_vIT_ElestS3_Lm4EEbE4typeES3_
_ZN3rtl16isAsciiLowerCaseIhEENSt3__19enable_ifIXaasr3stdE13is_integral_vIT_ElestS3_Lm4EEbE4typeES3_
Line
Count
Source
96
963k
{
97
963k
    return isAsciiLowerCase(sal_uInt32(code));
98
963k
}
99
#endif
100
101
/** Check for ASCII upper case character.
102
103
    @param code  A Unicode code point.
104
105
    @return  True if code is an ASCII upper case alphabetic character (ASCII
106
    'A'--'Z').
107
108
    @since LibreOffice 4.1
109
 */
110
inline SAL_CONSTEXPR bool isAsciiUpperCase(sal_uInt32 code)
111
698M
{
112
698M
    assert(isUnicodeCodePoint(code));
113
698M
    return code >= 'A' && code <= 'Z';
114
698M
}
115
116
#if defined LIBO_INTERNAL_ONLY
117
bool isAsciiUpperCase(char) = delete;
118
bool isAsciiUpperCase(signed char) = delete;
119
template <typename T>
120
inline constexpr std::enable_if_t<std::is_integral_v<T> && sizeof(T) <= sizeof(sal_uInt32), bool>
121
isAsciiUpperCase(T code)
122
833k
{
123
833k
    return isAsciiUpperCase(sal_uInt32(code));
124
833k
}
_ZN3rtl16isAsciiUpperCaseIDsEENSt3__19enable_ifIXaasr3stdE13is_integral_vIT_ElestS3_Lm4EEbE4typeES3_
Line
Count
Source
122
713k
{
123
713k
    return isAsciiUpperCase(sal_uInt32(code));
124
713k
}
Unexecuted instantiation: _ZN3rtl16isAsciiUpperCaseIjEENSt3__19enable_ifIXaasr3stdE13is_integral_vIT_ElestS3_Lm4EEbE4typeES3_
_ZN3rtl16isAsciiUpperCaseIhEENSt3__19enable_ifIXaasr3stdE13is_integral_vIT_ElestS3_Lm4EEbE4typeES3_
Line
Count
Source
122
120k
{
123
120k
    return isAsciiUpperCase(sal_uInt32(code));
124
120k
}
125
#endif
126
127
/** Check for ASCII alphabetic character.
128
129
    @param code  A Unicode code point.
130
131
    @return  True if code is an ASCII alphabetic character (ASCII 'A'--'Z' or
132
    'a'--'z').
133
134
    @since LibreOffice 4.1
135
 */
136
inline SAL_CONSTEXPR bool isAsciiAlpha(sal_uInt32 code)
137
211M
{
138
211M
    assert(isUnicodeCodePoint(code));
139
211M
    return isAsciiLowerCase(code) || isAsciiUpperCase(code);
140
211M
}
141
142
#if defined LIBO_INTERNAL_ONLY
143
bool isAsciiAlpha(char) = delete;
144
bool isAsciiAlpha(signed char) = delete;
145
template <typename T>
146
inline constexpr std::enable_if_t<std::is_integral_v<T> && sizeof(T) <= sizeof(sal_uInt32), bool>
147
isAsciiAlpha(T code)
148
137M
{
149
137M
    return isAsciiAlpha(sal_uInt32(code));
150
137M
}
_ZN3rtl12isAsciiAlphaIhEENSt3__19enable_ifIXaasr3stdE13is_integral_vIT_ElestS3_Lm4EEbE4typeES3_
Line
Count
Source
148
30.7M
{
149
30.7M
    return isAsciiAlpha(sal_uInt32(code));
150
30.7M
}
_ZN3rtl12isAsciiAlphaIDsEENSt3__19enable_ifIXaasr3stdE13is_integral_vIT_ElestS3_Lm4EEbE4typeES3_
Line
Count
Source
148
104M
{
149
104M
    return isAsciiAlpha(sal_uInt32(code));
150
104M
}
_ZN3rtl12isAsciiAlphaIiEENSt3__19enable_ifIXaasr3stdE13is_integral_vIT_ElestS3_Lm4EEbE4typeES3_
Line
Count
Source
148
1.57M
{
149
1.57M
    return isAsciiAlpha(sal_uInt32(code));
150
1.57M
}
151
#endif
152
153
/** Check for ASCII digit character.
154
155
    @param code  A Unicode code point.
156
157
    @return  True if code is an ASCII (decimal) digit character (ASCII
158
    '0'--'9').
159
160
    @since LibreOffice 4.1
161
 */
162
inline SAL_CONSTEXPR bool isAsciiDigit(sal_uInt32 code)
163
174M
{
164
174M
    assert(isUnicodeCodePoint(code));
165
174M
    return code >= '0' && code <= '9';
166
174M
}
167
168
#if defined LIBO_INTERNAL_ONLY
169
bool isAsciiDigit(char) = delete;
170
bool isAsciiDigit(signed char) = delete;
171
template <typename T>
172
inline constexpr std::enable_if_t<std::is_integral_v<T> && sizeof(T) <= sizeof(sal_uInt32), bool>
173
isAsciiDigit(T code)
174
119M
{
175
119M
    return isAsciiDigit(sal_uInt32(code));
176
119M
}
_ZN3rtl12isAsciiDigitIhEENSt3__19enable_ifIXaasr3stdE13is_integral_vIT_ElestS3_Lm4EEbE4typeES3_
Line
Count
Source
174
19.2M
{
175
19.2M
    return isAsciiDigit(sal_uInt32(code));
176
19.2M
}
_ZN3rtl12isAsciiDigitIDsEENSt3__19enable_ifIXaasr3stdE13is_integral_vIT_ElestS3_Lm4EEbE4typeES3_
Line
Count
Source
174
98.9M
{
175
98.9M
    return isAsciiDigit(sal_uInt32(code));
176
98.9M
}
_ZN3rtl12isAsciiDigitItEENSt3__19enable_ifIXaasr3stdE13is_integral_vIT_ElestS3_Lm4EEbE4typeES3_
Line
Count
Source
174
26.3k
{
175
26.3k
    return isAsciiDigit(sal_uInt32(code));
176
26.3k
}
_ZN3rtl12isAsciiDigitIiEENSt3__19enable_ifIXaasr3stdE13is_integral_vIT_ElestS3_Lm4EEbE4typeES3_
Line
Count
Source
174
1.01M
{
175
1.01M
    return isAsciiDigit(sal_uInt32(code));
176
1.01M
}
177
#endif
178
179
/** Check for ASCII alphanumeric character.
180
181
    @param code  A Unicode code point.
182
183
    @return  True if code is an ASCII alphanumeric character (ASCII '0'--'9',
184
    'A'--'Z', or 'a'--'z').
185
186
    @since LibreOffice 4.1
187
 */
188
inline SAL_CONSTEXPR bool isAsciiAlphanumeric(sal_uInt32 code)
189
28.2M
{
190
28.2M
    assert(isUnicodeCodePoint(code));
191
28.2M
    return isAsciiDigit(code) || isAsciiAlpha(code);
192
28.2M
}
193
194
#if defined LIBO_INTERNAL_ONLY
195
bool isAsciiAlphanumeric(char) = delete;
196
bool isAsciiAlphanumeric(signed char) = delete;
197
template <typename T>
198
inline constexpr std::enable_if_t<std::is_integral_v<T> && sizeof(T) <= sizeof(sal_uInt32), bool>
199
isAsciiAlphanumeric(T code)
200
27.9M
{
201
27.9M
    return isAsciiAlphanumeric(sal_uInt32(code));
202
27.9M
}
_ZN3rtl19isAsciiAlphanumericIDsEENSt3__19enable_ifIXaasr3stdE13is_integral_vIT_ElestS3_Lm4EEbE4typeES3_
Line
Count
Source
200
27.0M
{
201
27.0M
    return isAsciiAlphanumeric(sal_uInt32(code));
202
27.0M
}
_ZN3rtl19isAsciiAlphanumericIhEENSt3__19enable_ifIXaasr3stdE13is_integral_vIT_ElestS3_Lm4EEbE4typeES3_
Line
Count
Source
200
859k
{
201
859k
    return isAsciiAlphanumeric(sal_uInt32(code));
202
859k
}
203
#endif
204
205
/** Check for ASCII canonic hexadecimal digit character.
206
207
    @param code  A Unicode code point.
208
209
    @return  True if code is an ASCII canonic (i.e., upper case) hexadecimal
210
    digit character (ASCII '0'--'9' or 'A'--'F').
211
212
    @since LibreOffice 4.1
213
 */
214
inline SAL_CONSTEXPR bool isAsciiCanonicHexDigit(sal_uInt32 code)
215
785k
{
216
785k
    assert(isUnicodeCodePoint(code));
217
785k
    return isAsciiDigit(code) || (code >= 'A' && code <= 'F');
218
785k
}
219
220
#if defined LIBO_INTERNAL_ONLY
221
bool isAsciiCanonicHexDigit(char) = delete;
222
bool isAsciiCanonicHexDigit(signed char) = delete;
223
template <typename T>
224
inline constexpr std::enable_if_t<std::is_integral_v<T> && sizeof(T) <= sizeof(sal_uInt32), bool>
225
isAsciiCanonicHexDigit(T code)
226
7.38k
{
227
7.38k
    return isAsciiCanonicHexDigit(sal_uInt32(code));
228
7.38k
}
229
#endif
230
231
/** Check for ASCII hexadecimal digit character.
232
233
    @param code  A Unicode code point.
234
235
    @return  True if code is an ASCII hexadecimal digit character (ASCII
236
    '0'--'9', 'A'--'F', or 'a'--'f').
237
238
    @since LibreOffice 4.1
239
 */
240
inline SAL_CONSTEXPR bool isAsciiHexDigit(sal_uInt32 code)
241
777k
{
242
777k
    assert(isUnicodeCodePoint(code));
243
777k
    return isAsciiCanonicHexDigit(code) || (code >= 'a' && code <= 'f');
244
777k
}
245
246
#if defined LIBO_INTERNAL_ONLY
247
bool isAsciiHexDigit(char) = delete;
248
bool isAsciiHexDigit(signed char) = delete;
249
template <typename T>
250
inline constexpr std::enable_if_t<std::is_integral_v<T> && sizeof(T) <= sizeof(sal_uInt32), bool>
251
isAsciiHexDigit(T code)
252
676k
{
253
676k
    return isAsciiHexDigit(sal_uInt32(code));
254
676k
}
_ZN3rtl15isAsciiHexDigitIhEENSt3__19enable_ifIXaasr3stdE13is_integral_vIT_ElestS3_Lm4EEbE4typeES3_
Line
Count
Source
252
444k
{
253
444k
    return isAsciiHexDigit(sal_uInt32(code));
254
444k
}
_ZN3rtl15isAsciiHexDigitIDsEENSt3__19enable_ifIXaasr3stdE13is_integral_vIT_ElestS3_Lm4EEbE4typeES3_
Line
Count
Source
252
231k
{
253
231k
    return isAsciiHexDigit(sal_uInt32(code));
254
231k
}
255
#endif
256
257
/** Check for ASCII octal digit character.
258
259
    @param code  A Unicode code point.
260
261
    @return  True if code is an ASCII octal digit character (ASCII '0'--'7').
262
263
    @since LibreOffice 5.0
264
 */
265
inline SAL_CONSTEXPR bool isAsciiOctalDigit(sal_uInt32 code)
266
0
{
267
0
    assert(isUnicodeCodePoint(code));
268
0
    return code >= '0' && code <= '7';
269
0
}
270
271
#if defined LIBO_INTERNAL_ONLY
272
bool isAsciiOctalDigit(char) = delete;
273
bool isAsciiOctalDigit(signed char) = delete;
274
template <typename T>
275
inline constexpr std::enable_if_t<std::is_integral_v<T> && sizeof(T) <= sizeof(sal_uInt32), bool>
276
isAsciiOctalDigit(T code)
277
0
{
278
0
    return isAsciiOctalDigit(sal_uInt32(code));
279
0
}
280
#endif
281
282
/** Check for ASCII white space character.
283
284
    @param code  A Unicode code point.
285
286
    @return  True if code is an ASCII white space character as defined by C for
287
    isspace in the "C" locale (ASCII ' ', '\\f', '\\n', '\\r', '\\t' '\\v').
288
289
    @since LibreOffice 5.4
290
*/
291
inline SAL_CONSTEXPR bool isAsciiWhiteSpace(sal_uInt32 code)
292
22.9M
{
293
22.9M
    assert(isUnicodeCodePoint(code));
294
22.9M
    return code == ' ' || code == '\f' || code == '\n' || code == '\r' || code == '\t'
295
20.4M
           || code == '\v';
296
22.9M
}
297
298
#if defined LIBO_INTERNAL_ONLY
299
bool isAsciiWhiteSpace(char) = delete;
300
bool isAsciiWhiteSpace(signed char) = delete;
301
template <typename T>
302
inline constexpr std::enable_if_t<std::is_integral_v<T> && sizeof(T) <= sizeof(sal_uInt32), bool>
303
isAsciiWhiteSpace(T code)
304
9.73M
{
305
9.73M
    return isAsciiWhiteSpace(sal_uInt32(code));
306
9.73M
}
_ZN3rtl17isAsciiWhiteSpaceIhEENSt3__19enable_ifIXaasr3stdE13is_integral_vIT_ElestS3_Lm4EEbE4typeES3_
Line
Count
Source
304
1.54M
{
305
1.54M
    return isAsciiWhiteSpace(sal_uInt32(code));
306
1.54M
}
_ZN3rtl17isAsciiWhiteSpaceIDsEENSt3__19enable_ifIXaasr3stdE13is_integral_vIT_ElestS3_Lm4EEbE4typeES3_
Line
Count
Source
304
8.18M
{
305
8.18M
    return isAsciiWhiteSpace(sal_uInt32(code));
306
8.18M
}
307
#endif
308
309
/** Convert a character, if ASCII, to upper case.
310
311
    @param code  A Unicode code point.
312
313
    @return  code converted to ASCII upper case.
314
315
    @since LibreOffice 4.2
316
*/
317
inline SAL_CONSTEXPR sal_uInt32 toAsciiUpperCase(sal_uInt32 code)
318
34.2M
{
319
34.2M
    assert(isUnicodeCodePoint(code));
320
34.2M
    return isAsciiLowerCase(code) ? code - 32 : code;
321
34.2M
}
322
323
#if defined LIBO_INTERNAL_ONLY
324
sal_uInt32 toAsciiUpperCase(char) = delete;
325
sal_uInt32 toAsciiUpperCase(signed char) = delete;
326
template <typename T>
327
inline constexpr std::enable_if_t<std::is_integral_v<T> && sizeof(T) <= sizeof(sal_uInt32),
328
                                  sal_uInt32>
329
toAsciiUpperCase(T code)
330
5.80M
{
331
5.80M
    return toAsciiUpperCase(sal_uInt32(code));
332
5.80M
}
_ZN3rtl16toAsciiUpperCaseIDsEENSt3__19enable_ifIXaasr3stdE13is_integral_vIT_ElestS3_Lm4EEjE4typeES3_
Line
Count
Source
330
5.52M
{
331
5.52M
    return toAsciiUpperCase(sal_uInt32(code));
332
5.52M
}
Unexecuted instantiation: _ZN3rtl16toAsciiUpperCaseIjEENSt3__19enable_ifIXaasr3stdE13is_integral_vIT_ElestS3_Lm4EEjE4typeES3_
_ZN3rtl16toAsciiUpperCaseIhEENSt3__19enable_ifIXaasr3stdE13is_integral_vIT_ElestS3_Lm4EEjE4typeES3_
Line
Count
Source
330
276k
{
331
276k
    return toAsciiUpperCase(sal_uInt32(code));
332
276k
}
333
#endif
334
335
/** Convert a character, if ASCII, to lower case.
336
337
    @param code  A Unicode code point.
338
339
    @return  code converted to ASCII lower case.
340
341
    @since LibreOffice 4.2
342
*/
343
inline SAL_CONSTEXPR sal_uInt32 toAsciiLowerCase(sal_uInt32 code)
344
550M
{
345
550M
    assert(isUnicodeCodePoint(code));
346
550M
    return isAsciiUpperCase(code) ? code + 32 : code;
347
550M
}
348
349
#if defined LIBO_INTERNAL_ONLY
350
sal_uInt32 toAsciiLowerCase(char) = delete;
351
sal_uInt32 toAsciiLowerCase(signed char) = delete;
352
template <typename T>
353
inline constexpr std::enable_if_t<std::is_integral_v<T> && sizeof(T) <= sizeof(sal_uInt32),
354
                                  sal_uInt32>
355
toAsciiLowerCase(T code)
356
11.5M
{
357
11.5M
    return toAsciiLowerCase(sal_uInt32(code));
358
11.5M
}
_ZN3rtl16toAsciiLowerCaseIDsEENSt3__19enable_ifIXaasr3stdE13is_integral_vIT_ElestS3_Lm4EEjE4typeES3_
Line
Count
Source
356
10.6M
{
357
10.6M
    return toAsciiLowerCase(sal_uInt32(code));
358
10.6M
}
_ZN3rtl16toAsciiLowerCaseIjEENSt3__19enable_ifIXaasr3stdE13is_integral_vIT_ElestS3_Lm4EEjE4typeES3_
Line
Count
Source
356
277k
{
357
277k
    return toAsciiLowerCase(sal_uInt32(code));
358
277k
}
_ZN3rtl16toAsciiLowerCaseIhEENSt3__19enable_ifIXaasr3stdE13is_integral_vIT_ElestS3_Lm4EEjE4typeES3_
Line
Count
Source
356
562k
{
357
562k
    return toAsciiLowerCase(sal_uInt32(code));
358
562k
}
359
#endif
360
361
/** Compare two characters ignoring ASCII case.
362
363
    @param code1  A Unicode code point.
364
365
    @param code2  A unicode code point.
366
367
    @return  0 if both code points are equal,
368
             < 0 if code1 is less than code2,
369
             > 0 if code1 is greater than code2.
370
371
    @since LibreOffice 4.2
372
 */
373
inline SAL_CONSTEXPR sal_Int32 compareIgnoreAsciiCase(sal_uInt32 code1, sal_uInt32 code2)
374
244M
{
375
244M
    assert(isUnicodeCodePoint(code1));
376
244M
    assert(isUnicodeCodePoint(code2));
377
244M
    return static_cast<sal_Int32>(toAsciiLowerCase(code1))
378
244M
           - static_cast<sal_Int32>(toAsciiLowerCase(code2));
379
244M
}
380
381
/// @cond INTERNAL
382
namespace detail
383
{
384
sal_uInt32 const surrogatesHighFirst = 0xD800;
385
sal_uInt32 const surrogatesHighLast = 0xDBFF;
386
sal_uInt32 const surrogatesLowFirst = 0xDC00;
387
sal_uInt32 const surrogatesLowLast = 0xDFFF;
388
}
389
/// @endcond
390
391
/** Check for surrogate.
392
393
    @param code  A Unicode code point.
394
395
    @return  True if code is a surrogate code point (0xD800--0xDFFF).
396
397
    @since LibreOffice 6.0
398
*/
399
inline SAL_CONSTEXPR bool isSurrogate(sal_uInt32 code)
400
171M
{
401
171M
    assert(isUnicodeCodePoint(code));
402
171M
    return code >= detail::surrogatesHighFirst && code <= detail::surrogatesLowLast;
403
171M
}
404
405
/** Check for high surrogate.
406
407
    @param code  A Unicode code point.
408
409
    @return  True if code is a high surrogate code point (0xD800--0xDBFF).
410
411
    @since LibreOffice 5.0
412
*/
413
inline SAL_CONSTEXPR bool isHighSurrogate(sal_uInt32 code)
414
8.57G
{
415
8.57G
    assert(isUnicodeCodePoint(code));
416
8.57G
    return code >= detail::surrogatesHighFirst && code <= detail::surrogatesHighLast;
417
8.57G
}
418
419
/** Check for low surrogate.
420
421
    @param code  A Unicode code point.
422
423
    @return  True if code is a low surrogate code point (0xDC00--0xDFFF).
424
425
    @since LibreOffice 5.0
426
*/
427
inline SAL_CONSTEXPR bool isLowSurrogate(sal_uInt32 code)
428
396M
{
429
396M
    assert(isUnicodeCodePoint(code));
430
396M
    return code >= detail::surrogatesLowFirst && code <= detail::surrogatesLowLast;
431
396M
}
432
433
/** Get high surrogate half of a non-BMP Unicode code point.
434
435
    @param code  A non-BMP Unicode code point.
436
437
    @return  The UTF-16 high surrogate half for the give code point.
438
439
    @since LibreOffice 5.0
440
 */
441
inline SAL_CONSTEXPR sal_Unicode getHighSurrogate(sal_uInt32 code)
442
2.98M
{
443
2.98M
    assert(isUnicodeCodePoint(code));
444
2.98M
    assert(code >= 0x10000);
445
2.98M
    return static_cast<sal_Unicode>(((code - 0x10000) >> 10) | detail::surrogatesHighFirst);
446
2.98M
}
447
448
/** Get low surrogate half of a non-BMP Unicode code point.
449
450
    @param code  A non-BMP Unicode code point.
451
452
    @return  The UTF-16 low surrogate half for the give code point.
453
454
    @since LibreOffice 5.0
455
 */
456
inline SAL_CONSTEXPR sal_Unicode getLowSurrogate(sal_uInt32 code)
457
2.98M
{
458
2.98M
    assert(isUnicodeCodePoint(code));
459
2.98M
    assert(code >= 0x10000);
460
2.98M
    return static_cast<sal_Unicode>(((code - 0x10000) & 0x3FF) | detail::surrogatesLowFirst);
461
2.98M
}
462
463
/** Combine surrogates to form a code point.
464
465
    @param high  A high surrogate code point.
466
467
    @param low  A low surrogate code point.
468
469
    @return  The code point represented by the surrogate pair.
470
471
    @since LibreOffice 5.0
472
*/
473
inline SAL_CONSTEXPR sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low)
474
2.06M
{
475
2.06M
    assert(isHighSurrogate(high));
476
2.06M
    assert(isLowSurrogate(low));
477
2.06M
    return ((high - detail::surrogatesHighFirst) << 10) + (low - detail::surrogatesLowFirst)
478
2.06M
           + 0x10000;
479
2.06M
}
480
481
/** Split a Unicode code point into UTF-16 code units.
482
483
    @param code  A Unicode code point.
484
485
    @param output  A non-null pointer to an array with space for at least two
486
    sal_Unicode UTF-16 code units.
487
488
    @return  The number of UTF-16 code units placed into the output (either one
489
    or two).
490
491
    @since LibreOffice 5.3
492
*/
493
inline SAL_CONSTEXPR std::size_t splitSurrogates(sal_uInt32 code, sal_Unicode* output)
494
1.18G
{
495
1.18G
    assert(isUnicodeCodePoint(code));
496
1.18G
    assert(output != NULL);
497
1.18G
    if (code < 0x10000)
498
1.18G
    {
499
1.18G
        output[0] = code;
500
1.18G
        return 1;
501
1.18G
    }
502
2.98M
    else
503
2.98M
    {
504
2.98M
        output[0] = getHighSurrogate(code);
505
2.98M
        output[1] = getLowSurrogate(code);
506
2.98M
        return 2;
507
2.98M
    }
508
1.18G
}
509
510
/** Check for Unicode scalar value.
511
512
    @param code  An integer.
513
514
    @return  True if code is a Unicode scalar value.
515
516
    @since LibreOffice 6.0
517
*/
518
inline SAL_CONSTEXPR bool isUnicodeScalarValue(sal_uInt32 code)
519
96.6M
{
520
96.6M
    return isUnicodeCodePoint(code) && !isSurrogate(code);
521
96.6M
}
522
}
523
524
#endif
525
526
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */