Coverage Report

Created: 2023-02-22 06:51

/src/icu/source/i18n/collationsets.h
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
* Copyright (C) 2013-2014, International Business Machines
6
* Corporation and others.  All Rights Reserved.
7
*******************************************************************************
8
* collationsets.h
9
*
10
* created on: 2013feb09
11
* created by: Markus W. Scherer
12
*/
13
14
#ifndef __COLLATIONSETS_H__
15
#define __COLLATIONSETS_H__
16
17
#include "unicode/utypes.h"
18
19
#if !UCONFIG_NO_COLLATION
20
21
#include "unicode/uniset.h"
22
#include "collation.h"
23
24
U_NAMESPACE_BEGIN
25
26
struct CollationData;
27
28
/**
29
 * Finds the set of characters and strings that sort differently in the tailoring
30
 * from the base data.
31
 *
32
 * Every mapping in the tailoring needs to be compared to the base,
33
 * because some mappings are copied for optimization, and
34
 * all contractions for a character are copied if any contractions for that character
35
 * are added, modified or removed.
36
 *
37
 * It might be simpler to re-parse the rule string, but:
38
 * - That would require duplicating some of the from-rules builder code.
39
 * - That would make the runtime code depend on the builder.
40
 * - That would only work if we have the rule string, and we allow users to
41
 *   omit the rule string from data files.
42
 */
43
class TailoredSet : public UMemory {
44
public:
45
    TailoredSet(UnicodeSet *t)
46
            : data(NULL), baseData(NULL),
47
              tailored(t),
48
              suffix(NULL),
49
0
              errorCode(U_ZERO_ERROR) {}
50
51
    void forData(const CollationData *d, UErrorCode &errorCode);
52
53
    /**
54
     * @return U_SUCCESS(errorCode) in C++, void in Java
55
     * @internal only public for access by callback
56
     */
57
    UBool handleCE32(UChar32 start, UChar32 end, uint32_t ce32);
58
59
private:
60
    void compare(UChar32 c, uint32_t ce32, uint32_t baseCE32);
61
    void comparePrefixes(UChar32 c, const UChar *p, const UChar *q);
62
    void compareContractions(UChar32 c, const UChar *p, const UChar *q);
63
64
    void addPrefixes(const CollationData *d, UChar32 c, const UChar *p);
65
    void addPrefix(const CollationData *d, const UnicodeString &pfx, UChar32 c, uint32_t ce32);
66
    void addContractions(UChar32 c, const UChar *p);
67
    void addSuffix(UChar32 c, const UnicodeString &sfx);
68
    void add(UChar32 c);
69
70
    /** Prefixes are reversed in the data structure. */
71
0
    void setPrefix(const UnicodeString &pfx) {
72
0
        unreversedPrefix = pfx;
73
0
        unreversedPrefix.reverse();
74
0
    }
75
0
    void resetPrefix() {
76
0
        unreversedPrefix.remove();
77
0
    }
78
79
    const CollationData *data;
80
    const CollationData *baseData;
81
    UnicodeSet *tailored;
82
    UnicodeString unreversedPrefix;
83
    const UnicodeString *suffix;
84
    UErrorCode errorCode;
85
};
86
87
class ContractionsAndExpansions : public UMemory {
88
public:
89
    class CESink : public UMemory {
90
    public:
91
        virtual ~CESink();
92
        virtual void handleCE(int64_t ce) = 0;
93
        virtual void handleExpansion(const int64_t ces[], int32_t length) = 0;
94
    };
95
96
    ContractionsAndExpansions(UnicodeSet *con, UnicodeSet *exp, CESink *s, UBool prefixes)
97
            : data(NULL),
98
              contractions(con), expansions(exp),
99
              sink(s),
100
              addPrefixes(prefixes),
101
              checkTailored(0),
102
              suffix(NULL),
103
0
              errorCode(U_ZERO_ERROR) {}
104
105
    void forData(const CollationData *d, UErrorCode &errorCode);
106
    void forCodePoint(const CollationData *d, UChar32 c, UErrorCode &ec);
107
108
    // all following: @internal, only public for access by callback
109
110
    void handleCE32(UChar32 start, UChar32 end, uint32_t ce32);
111
112
    void handlePrefixes(UChar32 start, UChar32 end, uint32_t ce32);
113
    void handleContractions(UChar32 start, UChar32 end, uint32_t ce32);
114
115
    void addExpansions(UChar32 start, UChar32 end);
116
    void addStrings(UChar32 start, UChar32 end, UnicodeSet *set);
117
118
    /** Prefixes are reversed in the data structure. */
119
0
    void setPrefix(const UnicodeString &pfx) {
120
0
        unreversedPrefix = pfx;
121
0
        unreversedPrefix.reverse();
122
0
    }
123
0
    void resetPrefix() {
124
0
        unreversedPrefix.remove();
125
0
    }
126
127
    const CollationData *data;
128
    UnicodeSet *contractions;
129
    UnicodeSet *expansions;
130
    CESink *sink;
131
    UBool addPrefixes;
132
    int8_t checkTailored;  // -1: collected tailored  +1: exclude tailored
133
    UnicodeSet tailored;
134
    UnicodeSet ranges;
135
    UnicodeString unreversedPrefix;
136
    const UnicodeString *suffix;
137
    int64_t ces[Collation::MAX_EXPANSION_LENGTH];
138
    UErrorCode errorCode;
139
};
140
141
U_NAMESPACE_END
142
143
#endif  // !UCONFIG_NO_COLLATION
144
#endif  // __COLLATIONSETS_H__