/src/icu/source/i18n/collationsets.h

Source (jump to first uncovered line)
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
* Copyright (C) 2013-2014, International Business Machines
* Corporation and others.  All Rights Reserved.
*******************************************************************************
* collationsets.h
*
* created on: 2013feb09
* created by: Markus W. Scherer
*/

#ifndef __COLLATIONSETS_H__
#define __COLLATIONSETS_H__

#include "unicode/utypes.h"

#if !UCONFIG_NO_COLLATION

#include "unicode/uniset.h"
#include "collation.h"

U_NAMESPACE_BEGIN

struct CollationData;

/**
 * Finds the set of characters and strings that sort differently in the tailoring
 * from the base data.
 *
 * Every mapping in the tailoring needs to be compared to the base,
 * because some mappings are copied for optimization, and
 * all contractions for a character are copied if any contractions for that character
 * are added, modified or removed.
 *
 * It might be simpler to re-parse the rule string, but:
 * - That would require duplicating some of the from-rules builder code.
 * - That would make the runtime code depend on the builder.
 * - That would only work if we have the rule string, and we allow users to
 *   omit the rule string from data files.
 */
class TailoredSet : public UMemory {
public:
    TailoredSet(UnicodeSet *t)
            : data(NULL), baseData(NULL),
              tailored(t),
              suffix(NULL),
              errorCode(U_ZERO_ERROR) {}

    void forData(const CollationData *d, UErrorCode &errorCode);

    /**
     * @return U_SUCCESS(errorCode) in C++, void in Java
     * @internal only public for access by callback
     */
    UBool handleCE32(UChar32 start, UChar32 end, uint32_t ce32);

private:
    void compare(UChar32 c, uint32_t ce32, uint32_t baseCE32);
    void comparePrefixes(UChar32 c, const UChar *p, const UChar *q);
    void compareContractions(UChar32 c, const UChar *p, const UChar *q);

    void addPrefixes(const CollationData *d, UChar32 c, const UChar *p);
    void addPrefix(const CollationData *d, const UnicodeString &pfx, UChar32 c, uint32_t ce32);
    void addContractions(UChar32 c, const UChar *p);
    void addSuffix(UChar32 c, const UnicodeString &sfx);
    void add(UChar32 c);

    /** Prefixes are reversed in the data structure. */
    void setPrefix(const UnicodeString &pfx) {
        unreversedPrefix = pfx;
        unreversedPrefix.reverse();
    }
    void resetPrefix() {
        unreversedPrefix.remove();
    }

    const CollationData *data;
    const CollationData *baseData;
    UnicodeSet *tailored;
    UnicodeString unreversedPrefix;
    const UnicodeString *suffix;
    UErrorCode errorCode;
};

class ContractionsAndExpansions : public UMemory {
public:
    class CESink : public UMemory {
    public:
        virtual ~CESink();
        virtual void handleCE(int64_t ce) = 0;
        virtual void handleExpansion(const int64_t ces[], int32_t length) = 0;
    };

    ContractionsAndExpansions(UnicodeSet *con, UnicodeSet *exp, CESink *s, UBool prefixes)
            : data(NULL),
              contractions(con), expansions(exp),
              sink(s),
              addPrefixes(prefixes),
              checkTailored(0),
              suffix(NULL),
              errorCode(U_ZERO_ERROR) {}

    void forData(const CollationData *d, UErrorCode &errorCode);
    void forCodePoint(const CollationData *d, UChar32 c, UErrorCode &ec);

    // all following: @internal, only public for access by callback

    void handleCE32(UChar32 start, UChar32 end, uint32_t ce32);

    void handlePrefixes(UChar32 start, UChar32 end, uint32_t ce32);
    void handleContractions(UChar32 start, UChar32 end, uint32_t ce32);

    void addExpansions(UChar32 start, UChar32 end);
    void addStrings(UChar32 start, UChar32 end, UnicodeSet *set);

    /** Prefixes are reversed in the data structure. */
    void setPrefix(const UnicodeString &pfx) {
        unreversedPrefix = pfx;
        unreversedPrefix.reverse();
    }
    void resetPrefix() {
        unreversedPrefix.remove();
    }

    const CollationData *data;
    UnicodeSet *contractions;
    UnicodeSet *expansions;
    CESink *sink;
    UBool addPrefixes;
    int8_t checkTailored;  // -1: collected tailored  +1: exclude tailored
    UnicodeSet tailored;
    UnicodeSet ranges;
    UnicodeString unreversedPrefix;
    const UnicodeString *suffix;
    int64_t ces[Collation::MAX_EXPANSION_LENGTH];
    UErrorCode errorCode;
};

U_NAMESPACE_END

#endif  // !UCONFIG_NO_COLLATION
#endif  // __COLLATIONSETS_H__

Line	Count	Source (jump to first uncovered line)
1		// © 2016 and later: Unicode, Inc. and others.
2		// License & terms of use: http://www.unicode.org/copyright.html
3		/*
4		*******************************************************************************
5		* Copyright (C) 2013-2014, International Business Machines
6		* Corporation and others. All Rights Reserved.
7		*******************************************************************************
8		* collationsets.h
9		*
10		* created on: 2013feb09
11		* created by: Markus W. Scherer
12		*/
13
14		#ifndef __COLLATIONSETS_H__
15		#define __COLLATIONSETS_H__
16
17		#include "unicode/utypes.h"
18
19		#if !UCONFIG_NO_COLLATION
20
21		#include "unicode/uniset.h"
22		#include "collation.h"
23
24		U_NAMESPACE_BEGIN
25
26		struct CollationData;
27
28		/**
29		* Finds the set of characters and strings that sort differently in the tailoring
30		* from the base data.
31		*
32		* Every mapping in the tailoring needs to be compared to the base,
33		* because some mappings are copied for optimization, and
34		* all contractions for a character are copied if any contractions for that character
35		* are added, modified or removed.
36		*
37		* It might be simpler to re-parse the rule string, but:
38		* - That would require duplicating some of the from-rules builder code.
39		* - That would make the runtime code depend on the builder.
40		* - That would only work if we have the rule string, and we allow users to
41		* omit the rule string from data files.
42		*/
43		class TailoredSet : public UMemory {
44		public:
45		TailoredSet(UnicodeSet *t)
46		: data(NULL), baseData(NULL),
47		tailored(t),
48		suffix(NULL),
49	0	errorCode(U_ZERO_ERROR) {}
50
51		void forData(const CollationData *d, UErrorCode &errorCode);
52
53		/**
54		* @return U_SUCCESS(errorCode) in C++, void in Java
55		* @internal only public for access by callback
56		*/
57		UBool handleCE32(UChar32 start, UChar32 end, uint32_t ce32);
58
59		private:
60		void compare(UChar32 c, uint32_t ce32, uint32_t baseCE32);
61		void comparePrefixes(UChar32 c, const UChar p, const UChar q);
62		void compareContractions(UChar32 c, const UChar p, const UChar q);
63
64		void addPrefixes(const CollationData d, UChar32 c, const UChar p);
65		void addPrefix(const CollationData *d, const UnicodeString &pfx, UChar32 c, uint32_t ce32);
66		void addContractions(UChar32 c, const UChar *p);
67		void addSuffix(UChar32 c, const UnicodeString &sfx);
68		void add(UChar32 c);
69
70		/** Prefixes are reversed in the data structure. */
71	0	void setPrefix(const UnicodeString &pfx) {
72	0	unreversedPrefix = pfx;
73	0	unreversedPrefix.reverse();
74	0	}
75	0	void resetPrefix() {
76	0	unreversedPrefix.remove();
77	0	}
78
79		const CollationData *data;
80		const CollationData *baseData;
81		UnicodeSet *tailored;
82		UnicodeString unreversedPrefix;
83		const UnicodeString *suffix;
84		UErrorCode errorCode;
85		};
86
87		class ContractionsAndExpansions : public UMemory {
88		public:
89		class CESink : public UMemory {
90		public:
91		virtual ~CESink();
92		virtual void handleCE(int64_t ce) = 0;
93		virtual void handleExpansion(const int64_t ces[], int32_t length) = 0;
94		};
95
96		ContractionsAndExpansions(UnicodeSet con, UnicodeSet exp, CESink *s, UBool prefixes)
97		: data(NULL),
98		contractions(con), expansions(exp),
99		sink(s),
100		addPrefixes(prefixes),
101		checkTailored(0),
102		suffix(NULL),
103	0	errorCode(U_ZERO_ERROR) {}
104
105		void forData(const CollationData *d, UErrorCode &errorCode);
106		void forCodePoint(const CollationData *d, UChar32 c, UErrorCode &ec);
107
108		// all following: @internal, only public for access by callback
109
110		void handleCE32(UChar32 start, UChar32 end, uint32_t ce32);
111
112		void handlePrefixes(UChar32 start, UChar32 end, uint32_t ce32);
113		void handleContractions(UChar32 start, UChar32 end, uint32_t ce32);
114
115		void addExpansions(UChar32 start, UChar32 end);
116		void addStrings(UChar32 start, UChar32 end, UnicodeSet *set);
117
118		/** Prefixes are reversed in the data structure. */
119	0	void setPrefix(const UnicodeString &pfx) {
120	0	unreversedPrefix = pfx;
121	0	unreversedPrefix.reverse();
122	0	}
123	0	void resetPrefix() {
124	0	unreversedPrefix.remove();
125	0	}
126
127		const CollationData *data;
128		UnicodeSet *contractions;
129		UnicodeSet *expansions;
130		CESink *sink;
131		UBool addPrefixes;
132		int8_t checkTailored; // -1: collected tailored +1: exclude tailored
133		UnicodeSet tailored;
134		UnicodeSet ranges;
135		UnicodeString unreversedPrefix;
136		const UnicodeString *suffix;
137		int64_t ces[Collation::MAX_EXPANSION_LENGTH];
138		UErrorCode errorCode;
139		};
140
141		U_NAMESPACE_END
142
143		#endif // !UCONFIG_NO_COLLATION
144		#endif // __COLLATIONSETS_H__

Coverage Report

Created: 2023-02-22 06:51