Coverage Report

Created: 2023-02-22 06:51

/src/icu/source/i18n/collation.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
* Copyright (C) 2010-2014, International Business Machines
6
* Corporation and others.  All Rights Reserved.
7
*******************************************************************************
8
* collation.cpp
9
*
10
* created on: 2010oct27
11
* created by: Markus W. Scherer
12
*/
13
14
#include "unicode/utypes.h"
15
16
#if !UCONFIG_NO_COLLATION
17
18
#include "collation.h"
19
#include "uassert.h"
20
21
U_NAMESPACE_BEGIN
22
23
// Some compilers don't care if constants are defined in the .cpp file.
24
// MS Visual C++ does not like it, but gcc requires it. clang does not care.
25
#ifndef _MSC_VER
26
const uint8_t Collation::LEVEL_SEPARATOR_BYTE;
27
const uint8_t Collation::MERGE_SEPARATOR_BYTE;
28
const uint32_t Collation::ONLY_TERTIARY_MASK;
29
const uint32_t Collation::CASE_AND_TERTIARY_MASK;
30
#endif
31
32
uint32_t
33
0
Collation::incTwoBytePrimaryByOffset(uint32_t basePrimary, UBool isCompressible, int32_t offset) {
34
    // Extract the second byte, minus the minimum byte value,
35
    // plus the offset, modulo the number of usable byte values, plus the minimum.
36
    // Reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
37
0
    uint32_t primary;
38
0
    if(isCompressible) {
39
0
        offset += ((int32_t)(basePrimary >> 16) & 0xff) - 4;
40
0
        primary = (uint32_t)((offset % 251) + 4) << 16;
41
0
        offset /= 251;
42
0
    } else {
43
0
        offset += ((int32_t)(basePrimary >> 16) & 0xff) - 2;
44
0
        primary = (uint32_t)((offset % 254) + 2) << 16;
45
0
        offset /= 254;
46
0
    }
47
    // First byte, assume no further overflow.
48
0
    return primary | ((basePrimary & 0xff000000) + (uint32_t)(offset << 24));
49
0
}
50
51
uint32_t
52
0
Collation::incThreeBytePrimaryByOffset(uint32_t basePrimary, UBool isCompressible, int32_t offset) {
53
    // Extract the third byte, minus the minimum byte value,
54
    // plus the offset, modulo the number of usable byte values, plus the minimum.
55
0
    offset += ((int32_t)(basePrimary >> 8) & 0xff) - 2;
56
0
    uint32_t primary = (uint32_t)((offset % 254) + 2) << 8;
57
0
    offset /= 254;
58
    // Same with the second byte,
59
    // but reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
60
0
    if(isCompressible) {
61
0
        offset += ((int32_t)(basePrimary >> 16) & 0xff) - 4;
62
0
        primary |= (uint32_t)((offset % 251) + 4) << 16;
63
0
        offset /= 251;
64
0
    } else {
65
0
        offset += ((int32_t)(basePrimary >> 16) & 0xff) - 2;
66
0
        primary |= (uint32_t)((offset % 254) + 2) << 16;
67
0
        offset /= 254;
68
0
    }
69
    // First byte, assume no further overflow.
70
0
    return primary | ((basePrimary & 0xff000000) + (uint32_t)(offset << 24));
71
0
}
72
73
uint32_t
74
0
Collation::decTwoBytePrimaryByOneStep(uint32_t basePrimary, UBool isCompressible, int32_t step) {
75
    // Extract the second byte, minus the minimum byte value,
76
    // minus the step, modulo the number of usable byte values, plus the minimum.
77
    // Reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
78
    // Assume no further underflow for the first byte.
79
0
    U_ASSERT(0 < step && step <= 0x7f);
80
0
    int32_t byte2 = ((int32_t)(basePrimary >> 16) & 0xff) - step;
81
0
    if(isCompressible) {
82
0
        if(byte2 < 4) {
83
0
            byte2 += 251;
84
0
            basePrimary -= 0x1000000;
85
0
        }
86
0
    } else {
87
0
        if(byte2 < 2) {
88
0
            byte2 += 254;
89
0
            basePrimary -= 0x1000000;
90
0
        }
91
0
    }
92
0
    return (basePrimary & 0xff000000) | ((uint32_t)byte2 << 16);
93
0
}
94
95
uint32_t
96
0
Collation::decThreeBytePrimaryByOneStep(uint32_t basePrimary, UBool isCompressible, int32_t step) {
97
    // Extract the third byte, minus the minimum byte value,
98
    // minus the step, modulo the number of usable byte values, plus the minimum.
99
0
    U_ASSERT(0 < step && step <= 0x7f);
100
0
    int32_t byte3 = ((int32_t)(basePrimary >> 8) & 0xff) - step;
101
0
    if(byte3 >= 2) {
102
0
        return (basePrimary & 0xffff0000) | ((uint32_t)byte3 << 8);
103
0
    }
104
0
    byte3 += 254;
105
    // Same with the second byte,
106
    // but reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
107
0
    int32_t byte2 = ((int32_t)(basePrimary >> 16) & 0xff) - 1;
108
0
    if(isCompressible) {
109
0
        if(byte2 < 4) {
110
0
            byte2 = 0xfe;
111
0
            basePrimary -= 0x1000000;
112
0
        }
113
0
    } else {
114
0
        if(byte2 < 2) {
115
0
            byte2 = 0xff;
116
0
            basePrimary -= 0x1000000;
117
0
        }
118
0
    }
119
    // First byte, assume no further underflow.
120
0
    return (basePrimary & 0xff000000) | ((uint32_t)byte2 << 16) | ((uint32_t)byte3 << 8);
121
0
}
122
123
uint32_t
124
0
Collation::getThreeBytePrimaryForOffsetData(UChar32 c, int64_t dataCE) {
125
0
    uint32_t p = (uint32_t)(dataCE >> 32);  // three-byte primary pppppp00
126
0
    int32_t lower32 = (int32_t)dataCE;  // base code point b & step s: bbbbbbss (bit 7: isCompressible)
127
0
    int32_t offset = (c - (lower32 >> 8)) * (lower32 & 0x7f);  // delta * increment
128
0
    UBool isCompressible = (lower32 & 0x80) != 0;
129
0
    return Collation::incThreeBytePrimaryByOffset(p, isCompressible, offset);
130
0
}
131
132
uint32_t
133
0
Collation::unassignedPrimaryFromCodePoint(UChar32 c) {
134
    // Create a gap before U+0000. Use c=-1 for [first unassigned].
135
0
    ++c;
136
    // Fourth byte: 18 values, every 14th byte value (gap of 13).
137
0
    uint32_t primary = 2 + (c % 18) * 14;
138
0
    c /= 18;
139
    // Third byte: 254 values.
140
0
    primary |= (2 + (c % 254)) << 8;
141
0
    c /= 254;
142
    // Second byte: 251 values 04..FE excluding the primary compression bytes.
143
0
    primary |= (4 + (c % 251)) << 16;
144
    // One lead byte covers all code points (c < 0x1182B4 = 1*251*254*18).
145
0
    return primary | (UNASSIGNED_IMPLICIT_BYTE << 24);
146
0
}
147
148
U_NAMESPACE_END
149
150
#endif  // !UCONFIG_NO_COLLATION