/src/icu/source/common/ustr_imp.h
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // © 2016 and later: Unicode, Inc. and others.  | 
2  |  | // License & terms of use: http://www.unicode.org/copyright.html  | 
3  |  | /*    | 
4  |  | **********************************************************************  | 
5  |  | *   Copyright (C) 1999-2015, International Business Machines  | 
6  |  | *   Corporation and others.  All Rights Reserved.  | 
7  |  | **********************************************************************  | 
8  |  | *   file name:  ustr_imp.h  | 
9  |  | *   encoding:   UTF-8  | 
10  |  | *   tab size:   8 (not used)  | 
11  |  | *   indentation:4  | 
12  |  | *  | 
13  |  | *   created on: 2001jan30  | 
14  |  | *   created by: Markus W. Scherer  | 
15  |  | */  | 
16  |  |  | 
17  |  | #ifndef __USTR_IMP_H__  | 
18  |  | #define __USTR_IMP_H__  | 
19  |  |  | 
20  |  | #include "unicode/utypes.h"  | 
21  |  | #include "unicode/utf8.h"  | 
22  |  |  | 
23  |  | /**  | 
24  |  |  * Internal option for unorm_cmpEquivFold() for strncmp style.  | 
25  |  |  * If set, checks for both string length and terminating NUL.  | 
26  |  |  */  | 
27  | 0  | #define _STRNCMP_STYLE 0x1000  | 
28  |  |  | 
29  |  | /**  | 
30  |  |  * Compare two strings in code point order or code unit order.  | 
31  |  |  * Works in strcmp style (both lengths -1),  | 
32  |  |  * strncmp style (lengths equal and >=0, flag true),  | 
33  |  |  * and memcmp/UnicodeString style (at least one length >=0).  | 
34  |  |  */  | 
35  |  | U_CFUNC int32_t U_EXPORT2  | 
36  |  | uprv_strCompare(const UChar *s1, int32_t length1,  | 
37  |  |                 const UChar *s2, int32_t length2,  | 
38  |  |                 UBool strncmpStyle, UBool codePointOrder);  | 
39  |  |  | 
40  |  | U_CAPI int32_t U_EXPORT2   | 
41  |  | ustr_hashUCharsN(const UChar *str, int32_t length);  | 
42  |  |  | 
43  |  | U_CAPI int32_t U_EXPORT2   | 
44  |  | ustr_hashCharsN(const char *str, int32_t length);  | 
45  |  |  | 
46  |  | U_CAPI int32_t U_EXPORT2  | 
47  |  | ustr_hashICharsN(const char *str, int32_t length);  | 
48  |  |  | 
49  |  | /**  | 
50  |  |  * Convert an ASCII-range lowercase character to uppercase.  | 
51  |  |  *   | 
52  |  |  * @param c A UChar.  | 
53  |  |  * @return If UChar is a lowercase ASCII character, returns the uppercase version.  | 
54  |  |  *         Otherwise, returns the input character.  | 
55  |  |  */  | 
56  |  | U_CAPI UChar U_EXPORT2  | 
57  |  | u_asciiToUpper(UChar c);  | 
58  |  |  | 
59  |  | // TODO: Add u_asciiToLower if/when there is a need for it.  | 
60  |  |  | 
61  |  | /**  | 
62  |  |  * NUL-terminate a UChar * string if possible.  | 
63  |  |  * If length  < destCapacity then NUL-terminate.  | 
64  |  |  * If length == destCapacity then do not terminate but set U_STRING_NOT_TERMINATED_WARNING.  | 
65  |  |  * If length  > destCapacity then do not terminate but set U_BUFFER_OVERFLOW_ERROR.  | 
66  |  |  *  | 
67  |  |  * @param dest Destination buffer, can be NULL if destCapacity==0.  | 
68  |  |  * @param destCapacity Number of UChars available at dest.  | 
69  |  |  * @param length Number of UChars that were (to be) written to dest.  | 
70  |  |  * @param pErrorCode ICU error code.  | 
71  |  |  * @return length  | 
72  |  |  */  | 
73  |  | U_CAPI int32_t U_EXPORT2  | 
74  |  | u_terminateUChars(UChar *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode);  | 
75  |  |  | 
76  |  | /**  | 
77  |  |  * NUL-terminate a char * string if possible.  | 
78  |  |  * Same as u_terminateUChars() but for a different string type.  | 
79  |  |  */  | 
80  |  | U_CAPI int32_t U_EXPORT2  | 
81  |  | u_terminateChars(char *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode);  | 
82  |  |  | 
83  |  | /**  | 
84  |  |  * NUL-terminate a UChar32 * string if possible.  | 
85  |  |  * Same as u_terminateUChars() but for a different string type.  | 
86  |  |  */  | 
87  |  | U_CAPI int32_t U_EXPORT2  | 
88  |  | u_terminateUChar32s(UChar32 *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode);  | 
89  |  |  | 
90  |  | /**  | 
91  |  |  * NUL-terminate a wchar_t * string if possible.  | 
92  |  |  * Same as u_terminateUChars() but for a different string type.  | 
93  |  |  */  | 
94  |  | U_CAPI int32_t U_EXPORT2  | 
95  |  | u_terminateWChars(wchar_t *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode);  | 
96  |  |  | 
97  |  | /**  | 
98  |  |  * Counts the bytes of any whole valid sequence for a UTF-8 lead byte.  | 
99  |  |  * Returns 1 for ASCII 0..0x7f.  | 
100  |  |  * Returns 0 for 0x80..0xc1 as well as for 0xf5..0xff.  | 
101  |  |  * leadByte might be evaluated multiple times.  | 
102  |  |  *  | 
103  |  |  * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.  | 
104  |  |  * @return 0..4  | 
105  |  |  */  | 
106  |  | #define U8_COUNT_BYTES(leadByte) \  | 
107  |  |     (U8_IS_SINGLE(leadByte) ? 1 : U8_COUNT_BYTES_NON_ASCII(leadByte))  | 
108  |  |  | 
109  |  | /**  | 
110  |  |  * Counts the bytes of any whole valid sequence for a UTF-8 lead byte.  | 
111  |  |  * Returns 0 for 0x00..0xc1 as well as for 0xf5..0xff.  | 
112  |  |  * leadByte might be evaluated multiple times.  | 
113  |  |  *  | 
114  |  |  * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.  | 
115  |  |  * @return 0 or 2..4  | 
116  |  |  */  | 
117  |  | #define U8_COUNT_BYTES_NON_ASCII(leadByte) \  | 
118  |  |     (U8_IS_LEAD(leadByte) ? ((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0)+2 : 0)  | 
119  |  |  | 
120  |  | #ifdef __cplusplus  | 
121  |  |  | 
122  |  | U_NAMESPACE_BEGIN  | 
123  |  |  | 
124  |  | class UTF8 { | 
125  |  | public:  | 
126  |  |     UTF8() = delete;  // all static  | 
127  |  |  | 
128  |  |     /**  | 
129  |  |      * Is t a valid UTF-8 trail byte?  | 
130  |  |      *  | 
131  |  |      * @param prev Must be the preceding lead byte if i==1 and length>=3;  | 
132  |  |      *             otherwise ignored.  | 
133  |  |      * @param t The i-th byte following the lead byte.  | 
134  |  |      * @param i The index (1..3) of byte t in the byte sequence. 0<i<length  | 
135  |  |      * @param length The length (2..4) of the byte sequence according to the lead byte.  | 
136  |  |      * @return true if t is a valid trail byte in this context.  | 
137  |  |      */  | 
138  | 0  |     static inline UBool isValidTrail(int32_t prev, uint8_t t, int32_t i, int32_t length) { | 
139  | 0  |         // The first trail byte after a 3- or 4-byte lead byte  | 
140  | 0  |         // needs to be validated together with its lead byte.  | 
141  | 0  |         if (length <= 2 || i > 1) { | 
142  | 0  |             return U8_IS_TRAIL(t);  | 
143  | 0  |         } else if (length == 3) { | 
144  | 0  |             return U8_IS_VALID_LEAD3_AND_T1(prev, t);  | 
145  | 0  |         } else {  // length == 4 | 
146  | 0  |             return U8_IS_VALID_LEAD4_AND_T1(prev, t);  | 
147  | 0  |         }  | 
148  | 0  |     }  | 
149  |  | };  | 
150  |  |  | 
151  |  | U_NAMESPACE_END  | 
152  |  |  | 
153  |  | #endif  // __cplusplus  | 
154  |  |  | 
155  |  | #endif  |