/src/icu/source/common/unicode/ucharstriebuilder.h
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // © 2016 and later: Unicode, Inc. and others.  | 
2  |  | // License & terms of use: http://www.unicode.org/copyright.html  | 
3  |  | /*  | 
4  |  | *******************************************************************************  | 
5  |  | *   Copyright (C) 2010-2016, International Business Machines  | 
6  |  | *   Corporation and others.  All Rights Reserved.  | 
7  |  | *******************************************************************************  | 
8  |  | *   file name:  ucharstriebuilder.h  | 
9  |  | *   encoding:   UTF-8  | 
10  |  | *   tab size:   8 (not used)  | 
11  |  | *   indentation:4  | 
12  |  | *  | 
13  |  | *   created on: 2010nov14  | 
14  |  | *   created by: Markus W. Scherer  | 
15  |  | */  | 
16  |  |  | 
17  |  | #ifndef __UCHARSTRIEBUILDER_H__  | 
18  |  | #define __UCHARSTRIEBUILDER_H__  | 
19  |  |  | 
20  |  | #include "unicode/utypes.h"  | 
21  |  |  | 
22  |  | #if U_SHOW_CPLUSPLUS_API  | 
23  |  |  | 
24  |  | #include "unicode/stringtriebuilder.h"  | 
25  |  | #include "unicode/ucharstrie.h"  | 
26  |  | #include "unicode/unistr.h"  | 
27  |  |  | 
28  |  | /**  | 
29  |  |  * \file  | 
30  |  |  * \brief C++ API: Builder for icu::UCharsTrie  | 
31  |  |  */  | 
32  |  |  | 
33  |  | U_NAMESPACE_BEGIN  | 
34  |  |  | 
35  |  | class UCharsTrieElement;  | 
36  |  |  | 
37  |  | /**  | 
38  |  |  * Builder class for UCharsTrie.  | 
39  |  |  *  | 
40  |  |  * This class is not intended for public subclassing.  | 
41  |  |  * @stable ICU 4.8  | 
42  |  |  */  | 
43  |  | class U_COMMON_API UCharsTrieBuilder : public StringTrieBuilder { | 
44  |  | public:  | 
45  |  |     /**  | 
46  |  |      * Constructs an empty builder.  | 
47  |  |      * @param errorCode Standard ICU error code.  | 
48  |  |      * @stable ICU 4.8  | 
49  |  |      */  | 
50  |  |     UCharsTrieBuilder(UErrorCode &errorCode);  | 
51  |  |  | 
52  |  |     /**  | 
53  |  |      * Destructor.  | 
54  |  |      * @stable ICU 4.8  | 
55  |  |      */  | 
56  |  |     virtual ~UCharsTrieBuilder();  | 
57  |  |  | 
58  |  |     /**  | 
59  |  |      * Adds a (string, value) pair.  | 
60  |  |      * The string must be unique.  | 
61  |  |      * The string contents will be copied; the builder does not keep  | 
62  |  |      * a reference to the input UnicodeString or its buffer.  | 
63  |  |      * @param s The input string.  | 
64  |  |      * @param value The value associated with this string.  | 
65  |  |      * @param errorCode Standard ICU error code. Its input value must  | 
66  |  |      *                  pass the U_SUCCESS() test, or else the function returns  | 
67  |  |      *                  immediately. Check for U_FAILURE() on output or use with  | 
68  |  |      *                  function chaining. (See User Guide for details.)  | 
69  |  |      * @return *this  | 
70  |  |      * @stable ICU 4.8  | 
71  |  |      */  | 
72  |  |     UCharsTrieBuilder &add(const UnicodeString &s, int32_t value, UErrorCode &errorCode);  | 
73  |  |  | 
74  |  |     /**  | 
75  |  |      * Builds a UCharsTrie for the add()ed data.  | 
76  |  |      * Once built, no further data can be add()ed until clear() is called.  | 
77  |  |      *  | 
78  |  |      * A UCharsTrie cannot be empty. At least one (string, value) pair  | 
79  |  |      * must have been add()ed.  | 
80  |  |      *  | 
81  |  |      * This method passes ownership of the builder's internal result array to the new trie object.  | 
82  |  |      * Another call to any build() variant will re-serialize the trie.  | 
83  |  |      * After clear() has been called, a new array will be used as well.  | 
84  |  |      * @param buildOption Build option, see UStringTrieBuildOption.  | 
85  |  |      * @param errorCode Standard ICU error code. Its input value must  | 
86  |  |      *                  pass the U_SUCCESS() test, or else the function returns  | 
87  |  |      *                  immediately. Check for U_FAILURE() on output or use with  | 
88  |  |      *                  function chaining. (See User Guide for details.)  | 
89  |  |      * @return A new UCharsTrie for the add()ed data.  | 
90  |  |      * @stable ICU 4.8  | 
91  |  |      */  | 
92  |  |     UCharsTrie *build(UStringTrieBuildOption buildOption, UErrorCode &errorCode);  | 
93  |  |  | 
94  |  |     /**  | 
95  |  |      * Builds a UCharsTrie for the add()ed data and char16_t-serializes it.  | 
96  |  |      * Once built, no further data can be add()ed until clear() is called.  | 
97  |  |      *  | 
98  |  |      * A UCharsTrie cannot be empty. At least one (string, value) pair  | 
99  |  |      * must have been add()ed.  | 
100  |  |      *  | 
101  |  |      * Multiple calls to buildUnicodeString() set the UnicodeStrings to the  | 
102  |  |      * builder's same char16_t array, without rebuilding.  | 
103  |  |      * If buildUnicodeString() is called after build(), the trie will be  | 
104  |  |      * re-serialized into a new array (because build() passes on ownership).  | 
105  |  |      * If build() is called after buildUnicodeString(), the trie object returned  | 
106  |  |      * by build() will become the owner of the underlying data for the  | 
107  |  |      * previously returned UnicodeString.  | 
108  |  |      * After clear() has been called, a new array will be used as well.  | 
109  |  |      * @param buildOption Build option, see UStringTrieBuildOption.  | 
110  |  |      * @param result A UnicodeString which will be set to the char16_t-serialized  | 
111  |  |      *               UCharsTrie for the add()ed data.  | 
112  |  |      * @param errorCode Standard ICU error code. Its input value must  | 
113  |  |      *                  pass the U_SUCCESS() test, or else the function returns  | 
114  |  |      *                  immediately. Check for U_FAILURE() on output or use with  | 
115  |  |      *                  function chaining. (See User Guide for details.)  | 
116  |  |      * @return result  | 
117  |  |      * @stable ICU 4.8  | 
118  |  |      */  | 
119  |  |     UnicodeString &buildUnicodeString(UStringTrieBuildOption buildOption, UnicodeString &result,  | 
120  |  |                                       UErrorCode &errorCode);  | 
121  |  |  | 
122  |  |     /**  | 
123  |  |      * Removes all (string, value) pairs.  | 
124  |  |      * New data can then be add()ed and a new trie can be built.  | 
125  |  |      * @return *this  | 
126  |  |      * @stable ICU 4.8  | 
127  |  |      */  | 
128  | 0  |     UCharsTrieBuilder &clear() { | 
129  | 0  |         strings.remove();  | 
130  | 0  |         elementsLength=0;  | 
131  | 0  |         ucharsLength=0;  | 
132  | 0  |         return *this;  | 
133  | 0  |     }  | 
134  |  |  | 
135  |  | private:  | 
136  |  |     UCharsTrieBuilder(const UCharsTrieBuilder &other);  // no copy constructor  | 
137  |  |     UCharsTrieBuilder &operator=(const UCharsTrieBuilder &other);  // no assignment operator  | 
138  |  |  | 
139  |  |     void buildUChars(UStringTrieBuildOption buildOption, UErrorCode &errorCode);  | 
140  |  |  | 
141  |  |     virtual int32_t getElementStringLength(int32_t i) const;  | 
142  |  |     virtual char16_t getElementUnit(int32_t i, int32_t unitIndex) const;  | 
143  |  |     virtual int32_t getElementValue(int32_t i) const;  | 
144  |  |  | 
145  |  |     virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const;  | 
146  |  |  | 
147  |  |     virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const;  | 
148  |  |     virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const;  | 
149  |  |     virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, char16_t unit) const;  | 
150  |  |  | 
151  | 0  |     virtual UBool matchNodesCanHaveValues() const { return true; } | 
152  |  |  | 
153  | 0  |     virtual int32_t getMaxBranchLinearSubNodeLength() const { return UCharsTrie::kMaxBranchLinearSubNodeLength; } | 
154  | 0  |     virtual int32_t getMinLinearMatch() const { return UCharsTrie::kMinLinearMatch; } | 
155  | 0  |     virtual int32_t getMaxLinearMatchLength() const { return UCharsTrie::kMaxLinearMatchLength; } | 
156  |  |  | 
157  |  |     class UCTLinearMatchNode : public LinearMatchNode { | 
158  |  |     public:  | 
159  |  |         UCTLinearMatchNode(const char16_t *units, int32_t len, Node *nextNode);  | 
160  |  |         virtual bool operator==(const Node &other) const;  | 
161  |  |         virtual void write(StringTrieBuilder &builder);  | 
162  |  |     private:  | 
163  |  |         const char16_t *s;  | 
164  |  |     };  | 
165  |  |  | 
166  |  |     virtual Node *createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length,  | 
167  |  |                                         Node *nextNode) const;  | 
168  |  |  | 
169  |  |     UBool ensureCapacity(int32_t length);  | 
170  |  |     virtual int32_t write(int32_t unit);  | 
171  |  |     int32_t write(const char16_t *s, int32_t length);  | 
172  |  |     virtual int32_t writeElementUnits(int32_t i, int32_t unitIndex, int32_t length);  | 
173  |  |     virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal);  | 
174  |  |     virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node);  | 
175  |  |     virtual int32_t writeDeltaTo(int32_t jumpTarget);  | 
176  |  |  | 
177  |  |     UnicodeString strings;  | 
178  |  |     UCharsTrieElement *elements;  | 
179  |  |     int32_t elementsCapacity;  | 
180  |  |     int32_t elementsLength;  | 
181  |  |  | 
182  |  |     // char16_t serialization of the trie.  | 
183  |  |     // Grows from the back: ucharsLength measures from the end of the buffer!  | 
184  |  |     char16_t *uchars;  | 
185  |  |     int32_t ucharsCapacity;  | 
186  |  |     int32_t ucharsLength;  | 
187  |  | };  | 
188  |  |  | 
189  |  | U_NAMESPACE_END  | 
190  |  |  | 
191  |  | #endif /* U_SHOW_CPLUSPLUS_API */  | 
192  |  |  | 
193  |  | #endif  // __UCHARSTRIEBUILDER_H__  |