Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/intl/icu/source/common/unicode/stringtriebuilder.h
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
*   Copyright (C) 2010-2012,2014, International Business Machines
6
*   Corporation and others.  All Rights Reserved.
7
*******************************************************************************
8
*   file name:  stringtriebuilder.h
9
*   encoding:   UTF-8
10
*   tab size:   8 (not used)
11
*   indentation:4
12
*
13
*   created on: 2010dec24
14
*   created by: Markus W. Scherer
15
*/
16
17
#ifndef __STRINGTRIEBUILDER_H__
18
#define __STRINGTRIEBUILDER_H__
19
20
#include "unicode/utypes.h"
21
#include "unicode/uobject.h"
22
23
/**
24
 * \file
25
 * \brief C++ API: Builder API for trie builders
26
 */
27
28
// Forward declaration.
29
struct UHashtable;
30
typedef struct UHashtable UHashtable;
31
32
/**
33
 * Build options for BytesTrieBuilder and CharsTrieBuilder.
34
 * @stable ICU 4.8
35
 */
36
enum UStringTrieBuildOption {
37
    /**
38
     * Builds a trie quickly.
39
     * @stable ICU 4.8
40
     */
41
    USTRINGTRIE_BUILD_FAST,
42
    /**
43
     * Builds a trie more slowly, attempting to generate
44
     * a shorter but equivalent serialization.
45
     * This build option also uses more memory.
46
     *
47
     * This option can be effective when many integer values are the same
48
     * and string/byte sequence suffixes can be shared.
49
     * Runtime speed is not expected to improve.
50
     * @stable ICU 4.8
51
     */
52
    USTRINGTRIE_BUILD_SMALL
53
};
54
55
U_NAMESPACE_BEGIN
56
57
/**
58
 * Base class for string trie builder classes.
59
 *
60
 * This class is not intended for public subclassing.
61
 * @stable ICU 4.8
62
 */
63
class U_COMMON_API StringTrieBuilder : public UObject {
64
public:
65
#ifndef U_HIDE_INTERNAL_API
66
    /** @internal */
67
    static UBool hashNode(const void *node);
68
    /** @internal */
69
    static UBool equalNodes(const void *left, const void *right);
70
#endif  /* U_HIDE_INTERNAL_API */
71
72
protected:
73
    // Do not enclose the protected default constructor with #ifndef U_HIDE_INTERNAL_API
74
    // or else the compiler will create a public default constructor.
75
    /** @internal */
76
    StringTrieBuilder();
77
    /** @internal */
78
    virtual ~StringTrieBuilder();
79
80
#ifndef U_HIDE_INTERNAL_API
81
    /** @internal */
82
    void createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode);
83
    /** @internal */
84
    void deleteCompactBuilder();
85
86
    /** @internal */
87
    void build(UStringTrieBuildOption buildOption, int32_t elementsLength, UErrorCode &errorCode);
88
89
    /** @internal */
90
    int32_t writeNode(int32_t start, int32_t limit, int32_t unitIndex);
91
    /** @internal */
92
    int32_t writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length);
93
#endif  /* U_HIDE_INTERNAL_API */
94
95
    class Node;
96
97
#ifndef U_HIDE_INTERNAL_API
98
    /** @internal */
99
    Node *makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode);
100
    /** @internal */
101
    Node *makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex,
102
                            int32_t length, UErrorCode &errorCode);
103
#endif  /* U_HIDE_INTERNAL_API */
104
105
    /** @internal */
106
    virtual int32_t getElementStringLength(int32_t i) const = 0;
107
    /** @internal */
108
    virtual char16_t getElementUnit(int32_t i, int32_t unitIndex) const = 0;
109
    /** @internal */
110
    virtual int32_t getElementValue(int32_t i) const = 0;
111
112
    // Finds the first unit index after this one where
113
    // the first and last element have different units again.
114
    /** @internal */
115
    virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const = 0;
116
117
    // Number of different units at unitIndex.
118
    /** @internal */
119
    virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const = 0;
120
    /** @internal */
121
    virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const = 0;
122
    /** @internal */
123
    virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, char16_t unit) const = 0;
124
125
    /** @internal */
126
    virtual UBool matchNodesCanHaveValues() const = 0;
127
128
    /** @internal */
129
    virtual int32_t getMaxBranchLinearSubNodeLength() const = 0;
130
    /** @internal */
131
    virtual int32_t getMinLinearMatch() const = 0;
132
    /** @internal */
133
    virtual int32_t getMaxLinearMatchLength() const = 0;
134
135
#ifndef U_HIDE_INTERNAL_API
136
    // max(BytesTrie::kMaxBranchLinearSubNodeLength, UCharsTrie::kMaxBranchLinearSubNodeLength).
137
    /** @internal */
138
    static const int32_t kMaxBranchLinearSubNodeLength=5;
139
140
    // Maximum number of nested split-branch levels for a branch on all 2^16 possible char16_t units.
141
    // log2(2^16/kMaxBranchLinearSubNodeLength) rounded up.
142
    /** @internal */
143
    static const int32_t kMaxSplitBranchLevels=14;
144
145
    /**
146
     * Makes sure that there is only one unique node registered that is
147
     * equivalent to newNode.
148
     * @param newNode Input node. The builder takes ownership.
149
     * @param errorCode ICU in/out UErrorCode.
150
                        Set to U_MEMORY_ALLOCATION_ERROR if it was success but newNode==NULL.
151
     * @return newNode if it is the first of its kind, or
152
     *         an equivalent node if newNode is a duplicate.
153
     * @internal
154
     */
155
    Node *registerNode(Node *newNode, UErrorCode &errorCode);
156
    /**
157
     * Makes sure that there is only one unique FinalValueNode registered
158
     * with this value.
159
     * Avoids creating a node if the value is a duplicate.
160
     * @param value A final value.
161
     * @param errorCode ICU in/out UErrorCode.
162
                        Set to U_MEMORY_ALLOCATION_ERROR if it was success but newNode==NULL.
163
     * @return A FinalValueNode with the given value.
164
     * @internal
165
     */
166
    Node *registerFinalValue(int32_t value, UErrorCode &errorCode);
167
#endif  /* U_HIDE_INTERNAL_API */
168
169
    /*
170
     * C++ note:
171
     * registerNode() and registerFinalValue() take ownership of their input nodes,
172
     * and only return owned nodes.
173
     * If they see a failure UErrorCode, they will delete the input node.
174
     * If they get a NULL pointer, they will record a U_MEMORY_ALLOCATION_ERROR.
175
     * If there is a failure, they return NULL.
176
     *
177
     * NULL Node pointers can be safely passed into other Nodes because
178
     * they call the static Node::hashCode() which checks for a NULL pointer first.
179
     *
180
     * Therefore, as long as builder functions register a new node,
181
     * they need to check for failures only before explicitly dereferencing
182
     * a Node pointer, or before setting a new UErrorCode.
183
     */
184
185
    // Hash set of nodes, maps from nodes to integer 1.
186
    /** @internal */
187
    UHashtable *nodes;
188
189
    // Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API,
190
    // it is needed for layout of other objects.
191
    /** @internal */
192
    class Node : public UObject {
193
    public:
194
        Node(int32_t initialHash) : hash(initialHash), offset(0) {}
195
        inline int32_t hashCode() const { return hash; }
196
        // Handles node==NULL.
197
        static inline int32_t hashCode(const Node *node) { return node==NULL ? 0 : node->hashCode(); }
198
        // Base class operator==() compares the actual class types.
199
        virtual UBool operator==(const Node &other) const;
200
        inline UBool operator!=(const Node &other) const { return !operator==(other); }
201
        /**
202
         * Traverses the Node graph and numbers branch edges, with rightmost edges first.
203
         * This is to avoid writing a duplicate node twice.
204
         *
205
         * Branch nodes in this trie data structure are not symmetric.
206
         * Most branch edges "jump" to other nodes but the rightmost branch edges
207
         * just continue without a jump.
208
         * Therefore, write() must write the rightmost branch edge last
209
         * (trie units are written backwards), and must write it at that point even if
210
         * it is a duplicate of a node previously written elsewhere.
211
         *
212
         * This function visits and marks right branch edges first.
213
         * Edges are numbered with increasingly negative values because we share the
214
         * offset field which gets positive values when nodes are written.
215
         * A branch edge also remembers the first number for any of its edges.
216
         *
217
         * When a further-left branch edge has a number in the range of the rightmost
218
         * edge's numbers, then it will be written as part of the required right edge
219
         * and we can avoid writing it first.
220
         *
221
         * After root.markRightEdgesFirst(-1) the offsets of all nodes are negative
222
         * edge numbers.
223
         *
224
         * @param edgeNumber The first edge number for this node and its sub-nodes.
225
         * @return An edge number that is at least the maximum-negative
226
         *         of the input edge number and the numbers of this node and all of its sub-nodes.
227
         */
228
        virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
229
        // write() must set the offset to a positive value.
230
        virtual void write(StringTrieBuilder &builder) = 0;
231
        // See markRightEdgesFirst.
232
        inline void writeUnlessInsideRightEdge(int32_t firstRight, int32_t lastRight,
233
0
                                               StringTrieBuilder &builder) {
234
0
            // Note: Edge numbers are negative, lastRight<=firstRight.
235
0
            // If offset>0 then this node and its sub-nodes have been written already
236
0
            // and we need not write them again.
237
0
            // If this node is part of the unwritten right branch edge,
238
0
            // then we wait until that is written.
239
0
            if(offset<0 && (offset<lastRight || firstRight<offset)) {
240
0
                write(builder);
241
0
            }
242
0
        }
243
0
        inline int32_t getOffset() const { return offset; }
244
    protected:
245
        int32_t hash;
246
        int32_t offset;
247
    };
248
249
#ifndef U_HIDE_INTERNAL_API
250
    // This class should not be overridden because
251
    // registerFinalValue() compares a stack-allocated FinalValueNode
252
    // (stack-allocated so that we don't unnecessarily create lots of duplicate nodes)
253
    // with the input node, and the
254
    // !Node::operator==(other) used inside FinalValueNode::operator==(other)
255
    // will be false if the typeid's are different.
256
    /** @internal */
257
    class FinalValueNode : public Node {
258
    public:
259
0
        FinalValueNode(int32_t v) : Node(0x111111u*37u+v), value(v) {}
260
        virtual UBool operator==(const Node &other) const;
261
        virtual void write(StringTrieBuilder &builder);
262
    protected:
263
        int32_t value;
264
    };
265
#endif  /* U_HIDE_INTERNAL_API */
266
267
    // Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API,
268
    // it is needed for layout of other objects.
269
    /**
270
     * @internal 
271
     */
272
    class ValueNode : public Node {
273
    public:
274
        ValueNode(int32_t initialHash) : Node(initialHash), hasValue(FALSE), value(0) {}
275
        virtual UBool operator==(const Node &other) const;
276
0
        void setValue(int32_t v) {
277
0
            hasValue=TRUE;
278
0
            value=v;
279
0
            hash=hash*37u+v;
280
0
        }
281
    protected:
282
        UBool hasValue;
283
        int32_t value;
284
    };
285
286
#ifndef U_HIDE_INTERNAL_API
287
    /** 
288
     * @internal 
289
     */
290
    class IntermediateValueNode : public ValueNode {
291
    public:
292
        IntermediateValueNode(int32_t v, Node *nextNode)
293
0
                : ValueNode(0x222222u*37u+hashCode(nextNode)), next(nextNode) { setValue(v); }
294
        virtual UBool operator==(const Node &other) const;
295
        virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
296
        virtual void write(StringTrieBuilder &builder);
297
    protected:
298
        Node *next;
299
    };
300
#endif  /* U_HIDE_INTERNAL_API */
301
302
    // Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API,
303
    // it is needed for layout of other objects.
304
    /**
305
     * @internal 
306
     */
307
    class LinearMatchNode : public ValueNode {
308
    public:
309
        LinearMatchNode(int32_t len, Node *nextNode)
310
                : ValueNode((0x333333u*37u+len)*37u+hashCode(nextNode)),
311
                  length(len), next(nextNode) {}
312
        virtual UBool operator==(const Node &other) const;
313
        virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
314
    protected:
315
        int32_t length;
316
        Node *next;
317
    };
318
319
#ifndef U_HIDE_INTERNAL_API
320
    /**
321
     * @internal 
322
     */
323
    class BranchNode : public Node {
324
    public:
325
0
        BranchNode(int32_t initialHash) : Node(initialHash) {}
326
    protected:
327
        int32_t firstEdgeNumber;
328
    };
329
330
    /**
331
     * @internal 
332
     */
333
    class ListBranchNode : public BranchNode {
334
    public:
335
0
        ListBranchNode() : BranchNode(0x444444), length(0) {}
336
        virtual UBool operator==(const Node &other) const;
337
        virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
338
        virtual void write(StringTrieBuilder &builder);
339
        // Adds a unit with a final value.
340
0
        void add(int32_t c, int32_t value) {
341
0
            units[length]=(char16_t)c;
342
0
            equal[length]=NULL;
343
0
            values[length]=value;
344
0
            ++length;
345
0
            hash=(hash*37u+c)*37u+value;
346
0
        }
347
        // Adds a unit which leads to another match node.
348
0
        void add(int32_t c, Node *node) {
349
0
            units[length]=(char16_t)c;
350
0
            equal[length]=node;
351
0
            values[length]=0;
352
0
            ++length;
353
0
            hash=(hash*37u+c)*37u+hashCode(node);
354
0
        }
355
    protected:
356
        Node *equal[kMaxBranchLinearSubNodeLength];  // NULL means "has final value".
357
        int32_t length;
358
        int32_t values[kMaxBranchLinearSubNodeLength];
359
        char16_t units[kMaxBranchLinearSubNodeLength];
360
    };
361
362
    /**
363
     * @internal 
364
     */
365
    class SplitBranchNode : public BranchNode {
366
    public:
367
        SplitBranchNode(char16_t middleUnit, Node *lessThanNode, Node *greaterOrEqualNode)
368
                : BranchNode(((0x555555u*37u+middleUnit)*37u+
369
                              hashCode(lessThanNode))*37u+hashCode(greaterOrEqualNode)),
370
0
                  unit(middleUnit), lessThan(lessThanNode), greaterOrEqual(greaterOrEqualNode) {}
371
        virtual UBool operator==(const Node &other) const;
372
        virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
373
        virtual void write(StringTrieBuilder &builder);
374
    protected:
375
        char16_t unit;
376
        Node *lessThan;
377
        Node *greaterOrEqual;
378
    };
379
380
    // Branch head node, for writing the actual node lead unit.
381
    /** @internal */
382
    class BranchHeadNode : public ValueNode {
383
    public:
384
        BranchHeadNode(int32_t len, Node *subNode)
385
                : ValueNode((0x666666u*37u+len)*37u+hashCode(subNode)),
386
0
                  length(len), next(subNode) {}
387
        virtual UBool operator==(const Node &other) const;
388
        virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
389
        virtual void write(StringTrieBuilder &builder);
390
    protected:
391
        int32_t length;
392
        Node *next;  // A branch sub-node.
393
    };
394
#endif  /* U_HIDE_INTERNAL_API */
395
396
    /** @internal */
397
    virtual Node *createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length,
398
                                        Node *nextNode) const = 0;
399
400
    /** @internal */
401
    virtual int32_t write(int32_t unit) = 0;
402
    /** @internal */
403
    virtual int32_t writeElementUnits(int32_t i, int32_t unitIndex, int32_t length) = 0;
404
    /** @internal */
405
    virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal) = 0;
406
    /** @internal */
407
    virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node) = 0;
408
    /** @internal */
409
    virtual int32_t writeDeltaTo(int32_t jumpTarget) = 0;
410
};
411
412
U_NAMESPACE_END
413
414
#endif  // __STRINGTRIEBUILDER_H__