Coverage Report

Created: 2025-06-24 06:43

/src/icu/source/i18n/collationkeys.h
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
* Copyright (C) 2012-2014, International Business Machines
6
* Corporation and others.  All Rights Reserved.
7
*******************************************************************************
8
* collationkeys.h
9
*
10
* created on: 2012sep02
11
* created by: Markus W. Scherer
12
*/
13
14
#ifndef __COLLATIONKEYS_H__
15
#define __COLLATIONKEYS_H__
16
17
#include "unicode/utypes.h"
18
19
#if !UCONFIG_NO_COLLATION
20
21
#include "unicode/bytestream.h"
22
#include "unicode/ucol.h"
23
#include "charstr.h"
24
#include "collation.h"
25
26
U_NAMESPACE_BEGIN
27
28
class CollationIterator;
29
struct CollationDataReader;
30
struct CollationSettings;
31
32
class SortKeyByteSink : public ByteSink {
33
public:
34
    SortKeyByteSink(char *dest, int32_t destCapacity)
35
0
            : buffer_(dest), capacity_(destCapacity),
36
0
              appended_(0), ignore_(0) {}
37
    virtual ~SortKeyByteSink();
38
39
0
    void IgnoreBytes(int32_t numIgnore) { ignore_ = numIgnore; }
40
41
    virtual void Append(const char *bytes, int32_t n);
42
0
    void Append(uint32_t b) {
43
0
        if (ignore_ > 0) {
44
0
            --ignore_;
45
0
        } else {
46
0
            if (appended_ < capacity_ || Resize(1, appended_)) {
47
0
                buffer_[appended_] = (char)b;
48
0
            }
49
0
            ++appended_;
50
0
        }
51
0
    }
52
    virtual char *GetAppendBuffer(int32_t min_capacity,
53
                                  int32_t desired_capacity_hint,
54
                                  char *scratch, int32_t scratch_capacity,
55
                                  int32_t *result_capacity);
56
0
    int32_t NumberOfBytesAppended() const { return appended_; }
57
58
    /**
59
     * @return how many bytes can be appended (including ignored ones)
60
     *         without reallocation
61
     */
62
0
    int32_t GetRemainingCapacity() const {
63
        // Either ignore_ or appended_ should be 0.
64
0
        return ignore_ + capacity_ - appended_;
65
0
    }
66
67
0
    UBool Overflowed() const { return appended_ > capacity_; }
68
    /** @return false if memory allocation failed */
69
0
    UBool IsOk() const { return buffer_ != NULL; }
70
71
protected:
72
    virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) = 0;
73
    virtual UBool Resize(int32_t appendCapacity, int32_t length) = 0;
74
75
0
    void SetNotOk() {
76
0
        buffer_ = NULL;
77
0
        capacity_ = 0;
78
0
    }
79
80
    char *buffer_;
81
    int32_t capacity_;
82
    int32_t appended_;
83
    int32_t ignore_;
84
85
private:
86
    SortKeyByteSink(const SortKeyByteSink &); // copy constructor not implemented
87
    SortKeyByteSink &operator=(const SortKeyByteSink &); // assignment operator not implemented
88
};
89
90
class U_I18N_API CollationKeys /* not : public UObject because all methods are static */ {
91
public:
92
    class LevelCallback : public UMemory {
93
    public:
94
        virtual ~LevelCallback();
95
        /**
96
         * @param level The next level about to be written to the ByteSink.
97
         * @return true if the level is to be written
98
         *         (the base class implementation always returns true)
99
         */
100
        virtual UBool needToWrite(Collation::Level level);
101
    };
102
103
    /**
104
     * Writes the sort key bytes for minLevel up to the iterator data's strength.
105
     * Optionally writes the case level.
106
     * Stops writing levels when callback.needToWrite(level) returns false.
107
     * Separates levels with the LEVEL_SEPARATOR_BYTE
108
     * but does not write a TERMINATOR_BYTE.
109
     */
110
    static void writeSortKeyUpToQuaternary(CollationIterator &iter,
111
                                           const UBool *compressibleBytes,
112
                                           const CollationSettings &settings,
113
                                           SortKeyByteSink &sink,
114
                                           Collation::Level minLevel, LevelCallback &callback,
115
                                           UBool preflight, UErrorCode &errorCode);
116
private:
117
    friend struct CollationDataReader;
118
119
    CollationKeys();  // no instantiation
120
121
    // Secondary level: Compress up to 33 common weights as 05..25 or 25..45.
122
    static const uint32_t SEC_COMMON_LOW = Collation::COMMON_BYTE;
123
    static const uint32_t SEC_COMMON_MIDDLE = SEC_COMMON_LOW + 0x20;
124
    static const uint32_t SEC_COMMON_HIGH = SEC_COMMON_LOW + 0x40;
125
    static const int32_t SEC_COMMON_MAX_COUNT = 0x21;
126
127
    // Case level, lowerFirst: Compress up to 7 common weights as 1..7 or 7..13.
128
    static const uint32_t CASE_LOWER_FIRST_COMMON_LOW = 1;
129
    static const uint32_t CASE_LOWER_FIRST_COMMON_MIDDLE = 7;
130
    static const uint32_t CASE_LOWER_FIRST_COMMON_HIGH = 13;
131
    static const int32_t CASE_LOWER_FIRST_COMMON_MAX_COUNT = 7;
132
133
    // Case level, upperFirst: Compress up to 13 common weights as 3..15.
134
    static const uint32_t CASE_UPPER_FIRST_COMMON_LOW = 3;
135
    static const uint32_t CASE_UPPER_FIRST_COMMON_HIGH = 15;
136
    static const int32_t CASE_UPPER_FIRST_COMMON_MAX_COUNT = 13;
137
138
    // Tertiary level only (no case): Compress up to 97 common weights as 05..65 or 65..C5.
139
    static const uint32_t TER_ONLY_COMMON_LOW = Collation::COMMON_BYTE;
140
    static const uint32_t TER_ONLY_COMMON_MIDDLE = TER_ONLY_COMMON_LOW + 0x60;
141
    static const uint32_t TER_ONLY_COMMON_HIGH = TER_ONLY_COMMON_LOW + 0xc0;
142
    static const int32_t TER_ONLY_COMMON_MAX_COUNT = 0x61;
143
144
    // Tertiary with case, lowerFirst: Compress up to 33 common weights as 05..25 or 25..45.
145
    static const uint32_t TER_LOWER_FIRST_COMMON_LOW = Collation::COMMON_BYTE;
146
    static const uint32_t TER_LOWER_FIRST_COMMON_MIDDLE = TER_LOWER_FIRST_COMMON_LOW + 0x20;
147
    static const uint32_t TER_LOWER_FIRST_COMMON_HIGH = TER_LOWER_FIRST_COMMON_LOW + 0x40;
148
    static const int32_t TER_LOWER_FIRST_COMMON_MAX_COUNT = 0x21;
149
150
    // Tertiary with case, upperFirst: Compress up to 33 common weights as 85..A5 or A5..C5.
151
    static const uint32_t TER_UPPER_FIRST_COMMON_LOW = Collation::COMMON_BYTE + 0x80;
152
    static const uint32_t TER_UPPER_FIRST_COMMON_MIDDLE = TER_UPPER_FIRST_COMMON_LOW + 0x20;
153
    static const uint32_t TER_UPPER_FIRST_COMMON_HIGH = TER_UPPER_FIRST_COMMON_LOW + 0x40;
154
    static const int32_t TER_UPPER_FIRST_COMMON_MAX_COUNT = 0x21;
155
156
    // Quaternary level: Compress up to 113 common weights as 1C..8C or 8C..FC.
157
    static const uint32_t QUAT_COMMON_LOW = 0x1c;
158
    static const uint32_t QUAT_COMMON_MIDDLE = QUAT_COMMON_LOW + 0x70;
159
    static const uint32_t QUAT_COMMON_HIGH = QUAT_COMMON_LOW + 0xE0;
160
    static const int32_t QUAT_COMMON_MAX_COUNT = 0x71;
161
    // Primary weights shifted to quaternary level must be encoded with
162
    // a lead byte below the common-weight compression range.
163
    static const uint32_t QUAT_SHIFTED_LIMIT_BYTE = QUAT_COMMON_LOW - 1;  // 0x1b
164
};
165
166
U_NAMESPACE_END
167
168
#endif  // !UCONFIG_NO_COLLATION
169
#endif  // __COLLATIONKEYS_H__