/src/icu/source/i18n/collation.cpp
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // © 2016 and later: Unicode, Inc. and others.  | 
2  |  | // License & terms of use: http://www.unicode.org/copyright.html  | 
3  |  | /*  | 
4  |  | *******************************************************************************  | 
5  |  | * Copyright (C) 2010-2014, International Business Machines  | 
6  |  | * Corporation and others.  All Rights Reserved.  | 
7  |  | *******************************************************************************  | 
8  |  | * collation.cpp  | 
9  |  | *  | 
10  |  | * created on: 2010oct27  | 
11  |  | * created by: Markus W. Scherer  | 
12  |  | */  | 
13  |  |  | 
14  |  | #include "unicode/utypes.h"  | 
15  |  |  | 
16  |  | #if !UCONFIG_NO_COLLATION  | 
17  |  |  | 
18  |  | #include "collation.h"  | 
19  |  | #include "uassert.h"  | 
20  |  |  | 
21  |  | U_NAMESPACE_BEGIN  | 
22  |  |  | 
23  |  | // Some compilers don't care if constants are defined in the .cpp file.  | 
24  |  | // MS Visual C++ does not like it, but gcc requires it. clang does not care.  | 
25  |  | #ifndef _MSC_VER  | 
26  |  | const uint8_t Collation::LEVEL_SEPARATOR_BYTE;  | 
27  |  | const uint8_t Collation::MERGE_SEPARATOR_BYTE;  | 
28  |  | const uint32_t Collation::ONLY_TERTIARY_MASK;  | 
29  |  | const uint32_t Collation::CASE_AND_TERTIARY_MASK;  | 
30  |  | #endif  | 
31  |  |  | 
32  |  | uint32_t  | 
33  | 0  | Collation::incTwoBytePrimaryByOffset(uint32_t basePrimary, UBool isCompressible, int32_t offset) { | 
34  |  |     // Extract the second byte, minus the minimum byte value,  | 
35  |  |     // plus the offset, modulo the number of usable byte values, plus the minimum.  | 
36  |  |     // Reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.  | 
37  | 0  |     uint32_t primary;  | 
38  | 0  |     if(isCompressible) { | 
39  | 0  |         offset += ((int32_t)(basePrimary >> 16) & 0xff) - 4;  | 
40  | 0  |         primary = (uint32_t)((offset % 251) + 4) << 16;  | 
41  | 0  |         offset /= 251;  | 
42  | 0  |     } else { | 
43  | 0  |         offset += ((int32_t)(basePrimary >> 16) & 0xff) - 2;  | 
44  | 0  |         primary = (uint32_t)((offset % 254) + 2) << 16;  | 
45  | 0  |         offset /= 254;  | 
46  | 0  |     }  | 
47  |  |     // First byte, assume no further overflow.  | 
48  | 0  |     return primary | ((basePrimary & 0xff000000) + (uint32_t)(offset << 24));  | 
49  | 0  | }  | 
50  |  |  | 
51  |  | uint32_t  | 
52  | 0  | Collation::incThreeBytePrimaryByOffset(uint32_t basePrimary, UBool isCompressible, int32_t offset) { | 
53  |  |     // Extract the third byte, minus the minimum byte value,  | 
54  |  |     // plus the offset, modulo the number of usable byte values, plus the minimum.  | 
55  | 0  |     offset += ((int32_t)(basePrimary >> 8) & 0xff) - 2;  | 
56  | 0  |     uint32_t primary = (uint32_t)((offset % 254) + 2) << 8;  | 
57  | 0  |     offset /= 254;  | 
58  |  |     // Same with the second byte,  | 
59  |  |     // but reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.  | 
60  | 0  |     if(isCompressible) { | 
61  | 0  |         offset += ((int32_t)(basePrimary >> 16) & 0xff) - 4;  | 
62  | 0  |         primary |= (uint32_t)((offset % 251) + 4) << 16;  | 
63  | 0  |         offset /= 251;  | 
64  | 0  |     } else { | 
65  | 0  |         offset += ((int32_t)(basePrimary >> 16) & 0xff) - 2;  | 
66  | 0  |         primary |= (uint32_t)((offset % 254) + 2) << 16;  | 
67  | 0  |         offset /= 254;  | 
68  | 0  |     }  | 
69  |  |     // First byte, assume no further overflow.  | 
70  | 0  |     return primary | ((basePrimary & 0xff000000) + (uint32_t)(offset << 24));  | 
71  | 0  | }  | 
72  |  |  | 
73  |  | uint32_t  | 
74  | 0  | Collation::decTwoBytePrimaryByOneStep(uint32_t basePrimary, UBool isCompressible, int32_t step) { | 
75  |  |     // Extract the second byte, minus the minimum byte value,  | 
76  |  |     // minus the step, modulo the number of usable byte values, plus the minimum.  | 
77  |  |     // Reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.  | 
78  |  |     // Assume no further underflow for the first byte.  | 
79  | 0  |     U_ASSERT(0 < step && step <= 0x7f);  | 
80  | 0  |     int32_t byte2 = ((int32_t)(basePrimary >> 16) & 0xff) - step;  | 
81  | 0  |     if(isCompressible) { | 
82  | 0  |         if(byte2 < 4) { | 
83  | 0  |             byte2 += 251;  | 
84  | 0  |             basePrimary -= 0x1000000;  | 
85  | 0  |         }  | 
86  | 0  |     } else { | 
87  | 0  |         if(byte2 < 2) { | 
88  | 0  |             byte2 += 254;  | 
89  | 0  |             basePrimary -= 0x1000000;  | 
90  | 0  |         }  | 
91  | 0  |     }  | 
92  | 0  |     return (basePrimary & 0xff000000) | ((uint32_t)byte2 << 16);  | 
93  | 0  | }  | 
94  |  |  | 
95  |  | uint32_t  | 
96  | 0  | Collation::decThreeBytePrimaryByOneStep(uint32_t basePrimary, UBool isCompressible, int32_t step) { | 
97  |  |     // Extract the third byte, minus the minimum byte value,  | 
98  |  |     // minus the step, modulo the number of usable byte values, plus the minimum.  | 
99  | 0  |     U_ASSERT(0 < step && step <= 0x7f);  | 
100  | 0  |     int32_t byte3 = ((int32_t)(basePrimary >> 8) & 0xff) - step;  | 
101  | 0  |     if(byte3 >= 2) { | 
102  | 0  |         return (basePrimary & 0xffff0000) | ((uint32_t)byte3 << 8);  | 
103  | 0  |     }  | 
104  | 0  |     byte3 += 254;  | 
105  |  |     // Same with the second byte,  | 
106  |  |     // but reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.  | 
107  | 0  |     int32_t byte2 = ((int32_t)(basePrimary >> 16) & 0xff) - 1;  | 
108  | 0  |     if(isCompressible) { | 
109  | 0  |         if(byte2 < 4) { | 
110  | 0  |             byte2 = 0xfe;  | 
111  | 0  |             basePrimary -= 0x1000000;  | 
112  | 0  |         }  | 
113  | 0  |     } else { | 
114  | 0  |         if(byte2 < 2) { | 
115  | 0  |             byte2 = 0xff;  | 
116  | 0  |             basePrimary -= 0x1000000;  | 
117  | 0  |         }  | 
118  | 0  |     }  | 
119  |  |     // First byte, assume no further underflow.  | 
120  | 0  |     return (basePrimary & 0xff000000) | ((uint32_t)byte2 << 16) | ((uint32_t)byte3 << 8);  | 
121  | 0  | }  | 
122  |  |  | 
123  |  | uint32_t  | 
124  | 0  | Collation::getThreeBytePrimaryForOffsetData(UChar32 c, int64_t dataCE) { | 
125  | 0  |     uint32_t p = (uint32_t)(dataCE >> 32);  // three-byte primary pppppp00  | 
126  | 0  |     int32_t lower32 = (int32_t)dataCE;  // base code point b & step s: bbbbbbss (bit 7: isCompressible)  | 
127  | 0  |     int32_t offset = (c - (lower32 >> 8)) * (lower32 & 0x7f);  // delta * increment  | 
128  | 0  |     UBool isCompressible = (lower32 & 0x80) != 0;  | 
129  | 0  |     return Collation::incThreeBytePrimaryByOffset(p, isCompressible, offset);  | 
130  | 0  | }  | 
131  |  |  | 
132  |  | uint32_t  | 
133  | 0  | Collation::unassignedPrimaryFromCodePoint(UChar32 c) { | 
134  |  |     // Create a gap before U+0000. Use c=-1 for [first unassigned].  | 
135  | 0  |     ++c;  | 
136  |  |     // Fourth byte: 18 values, every 14th byte value (gap of 13).  | 
137  | 0  |     uint32_t primary = 2 + (c % 18) * 14;  | 
138  | 0  |     c /= 18;  | 
139  |  |     // Third byte: 254 values.  | 
140  | 0  |     primary |= (2 + (c % 254)) << 8;  | 
141  | 0  |     c /= 254;  | 
142  |  |     // Second byte: 251 values 04..FE excluding the primary compression bytes.  | 
143  | 0  |     primary |= (4 + (c % 251)) << 16;  | 
144  |  |     // One lead byte covers all code points (c < 0x1182B4 = 1*251*254*18).  | 
145  | 0  |     return primary | (UNASSIGNED_IMPLICIT_BYTE << 24);  | 
146  | 0  | }  | 
147  |  |  | 
148  |  | U_NAMESPACE_END  | 
149  |  |  | 
150  |  | #endif  // !UCONFIG_NO_COLLATION  |