/src/icu/icu4c/source/common/uniquecharstr.h
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // © 2020 and later: Unicode, Inc. and others.  | 
2  |  | // License & terms of use: http://www.unicode.org/copyright.html  | 
3  |  |  | 
4  |  | // uniquecharstr.h  | 
5  |  | // created: 2020sep01 Frank Yung-Fong Tang  | 
6  |  |  | 
7  |  | #ifndef __UNIQUECHARSTR_H__  | 
8  |  | #define __UNIQUECHARSTR_H__  | 
9  |  |  | 
10  |  | #include "charstr.h"  | 
11  |  | #include "uassert.h"  | 
12  |  | #include "uhash.h"  | 
13  |  | #include "cmemory.h"  | 
14  |  |  | 
15  |  | U_NAMESPACE_BEGIN  | 
16  |  |  | 
17  |  | /**  | 
18  |  |  * Stores NUL-terminated strings with duplicate elimination.  | 
19  |  |  * Checks for unique UTF-16 string pointers and converts to invariant characters.  | 
20  |  |  *  | 
21  |  |  * Intended to be stack-allocated. Add strings, get a unique number for each,  | 
22  |  |  * freeze the object, get a char * pointer for each string,  | 
23  |  |  * call orphanCharStrings() to capture the string storage, and let this object go out of scope.  | 
24  |  |  */  | 
25  |  | class UniqueCharStrings { | 
26  |  | public:  | 
27  | 0  |     UniqueCharStrings(UErrorCode &errorCode) : strings(nullptr) { | 
28  |  |         // Note: We hash on string contents but store stable char16_t * pointers.  | 
29  |  |         // If the strings are stored in resource bundles which should be built with  | 
30  |  |         // duplicate elimination, then we should be able to hash on just the pointer values.  | 
31  | 0  |         uhash_init(&map, uhash_hashUChars, uhash_compareUChars, uhash_compareLong, &errorCode);  | 
32  | 0  |         if (U_FAILURE(errorCode)) { return; } | 
33  | 0  |         strings = new CharString();  | 
34  | 0  |         if (strings == nullptr) { | 
35  | 0  |             errorCode = U_MEMORY_ALLOCATION_ERROR;  | 
36  | 0  |         }  | 
37  | 0  |     }  | 
38  | 0  |     ~UniqueCharStrings() { | 
39  | 0  |         uhash_close(&map);  | 
40  | 0  |         delete strings;  | 
41  | 0  |     }  | 
42  |  |  | 
43  |  |     /** Returns/orphans the CharString that contains all strings. */  | 
44  | 0  |     CharString *orphanCharStrings() { | 
45  | 0  |         CharString *result = strings;  | 
46  | 0  |         strings = nullptr;  | 
47  | 0  |         return result;  | 
48  | 0  |     }  | 
49  |  |  | 
50  |  |     /**  | 
51  |  |      * Adds a NUL-terminated string and returns a unique number for it.  | 
52  |  |      * The string must not change, nor move around in memory,  | 
53  |  |      * while this UniqueCharStrings is in use.  | 
54  |  |      *  | 
55  |  |      * Best used with string data in a stable storage, such as strings returned  | 
56  |  |      * by resource bundle functions.  | 
57  |  |      */  | 
58  | 0  |     int32_t add(const char16_t*p, UErrorCode &errorCode) { | 
59  | 0  |         if (U_FAILURE(errorCode)) { return -1; } | 
60  | 0  |         if (isFrozen) { | 
61  | 0  |             errorCode = U_NO_WRITE_PERMISSION;  | 
62  | 0  |             return -1;  | 
63  | 0  |         }  | 
64  |  |         // The string points into the resource bundle.  | 
65  | 0  |         int32_t oldIndex = uhash_geti(&map, p);  | 
66  | 0  |         if (oldIndex != 0) {  // found duplicate | 
67  | 0  |             return oldIndex;  | 
68  | 0  |         }  | 
69  |  |         // Explicit NUL terminator for the previous string.  | 
70  |  |         // The strings object is also terminated with one implicit NUL.  | 
71  | 0  |         strings->append(0, errorCode);  | 
72  | 0  |         int32_t newIndex = strings->length();  | 
73  | 0  |         strings->appendInvariantChars(p, u_strlen(p), errorCode);  | 
74  | 0  |         uhash_puti(&map, const_cast<char16_t *>(p), newIndex, &errorCode);  | 
75  | 0  |         return newIndex;  | 
76  | 0  |     }  | 
77  |  |  | 
78  |  |     /**  | 
79  |  |      * Adds a unicode string by value and returns a unique number for it.  | 
80  |  |      */  | 
81  | 0  |     int32_t addByValue(UnicodeString s, UErrorCode &errorCode) { | 
82  | 0  |         if (U_FAILURE(errorCode)) { return -1; } | 
83  | 0  |         if (isFrozen) { | 
84  | 0  |             errorCode = U_NO_WRITE_PERMISSION;  | 
85  | 0  |             return -1;  | 
86  | 0  |         }  | 
87  | 0  |         int32_t oldIndex = uhash_geti(&map, s.getTerminatedBuffer());  | 
88  | 0  |         if (oldIndex != 0) {  // found duplicate | 
89  | 0  |             return oldIndex;  | 
90  | 0  |         }  | 
91  |  |         // We need to store the string content of the UnicodeString.  | 
92  | 0  |         UnicodeString *key = keyStore.create(s);  | 
93  | 0  |         if (key == nullptr) { | 
94  | 0  |             errorCode = U_MEMORY_ALLOCATION_ERROR;  | 
95  | 0  |             return -1;  | 
96  | 0  |         }  | 
97  | 0  |         return add(key->getTerminatedBuffer(), errorCode);  | 
98  | 0  |     }  | 
99  |  |  | 
100  | 0  |     void freeze() { isFrozen = true; } | 
101  |  |  | 
102  |  |     /**  | 
103  |  |      * Returns a string pointer for its unique number, if this object is frozen.  | 
104  |  |      * Otherwise nullptr.  | 
105  |  |      */  | 
106  | 0  |     const char *get(int32_t i) const { | 
107  | 0  |         U_ASSERT(isFrozen);  | 
108  | 0  |         return isFrozen && i > 0 ? strings->data() + i : nullptr;  | 
109  | 0  |     }  | 
110  |  |  | 
111  |  | private:  | 
112  |  |     UHashtable map;  | 
113  |  |     CharString *strings;  | 
114  |  |     MemoryPool<UnicodeString> keyStore;  | 
115  |  |     bool isFrozen = false;  | 
116  |  | };  | 
117  |  |  | 
118  |  | U_NAMESPACE_END  | 
119  |  |  | 
120  |  | #endif  // __UNIQUECHARSTR_H__  |