/src/icu/icu4c/source/common/uniquecharstr.h
Line | Count | Source (jump to first uncovered line) |
1 | | // © 2020 and later: Unicode, Inc. and others. |
2 | | // License & terms of use: http://www.unicode.org/copyright.html |
3 | | |
4 | | // uniquecharstr.h |
5 | | // created: 2020sep01 Frank Yung-Fong Tang |
6 | | |
7 | | #ifndef __UNIQUECHARSTR_H__ |
8 | | #define __UNIQUECHARSTR_H__ |
9 | | |
10 | | #include "charstr.h" |
11 | | #include "uassert.h" |
12 | | #include "uhash.h" |
13 | | #include "cmemory.h" |
14 | | |
15 | | U_NAMESPACE_BEGIN |
16 | | |
17 | | /** |
18 | | * Stores NUL-terminated strings with duplicate elimination. |
19 | | * Checks for unique UTF-16 string pointers and converts to invariant characters. |
20 | | * |
21 | | * Intended to be stack-allocated. Add strings, get a unique number for each, |
22 | | * freeze the object, get a char * pointer for each string, |
23 | | * call orphanCharStrings() to capture the string storage, and let this object go out of scope. |
24 | | */ |
25 | | class UniqueCharStrings { |
26 | | public: |
27 | 2 | UniqueCharStrings(UErrorCode &errorCode) : strings(nullptr) { |
28 | | // Note: We hash on string contents but store stable char16_t * pointers. |
29 | | // If the strings are stored in resource bundles which should be built with |
30 | | // duplicate elimination, then we should be able to hash on just the pointer values. |
31 | 2 | uhash_init(&map, uhash_hashUChars, uhash_compareUChars, uhash_compareLong, &errorCode); |
32 | 2 | if (U_FAILURE(errorCode)) { return; } |
33 | 2 | strings = new CharString(); |
34 | 2 | if (strings == nullptr) { |
35 | 0 | errorCode = U_MEMORY_ALLOCATION_ERROR; |
36 | 0 | } |
37 | 2 | } |
38 | 2 | ~UniqueCharStrings() { |
39 | 2 | uhash_close(&map); |
40 | 2 | delete strings; |
41 | 2 | } |
42 | | |
43 | | /** Returns/orphans the CharString that contains all strings. */ |
44 | 2 | CharString *orphanCharStrings() { |
45 | 2 | CharString *result = strings; |
46 | 2 | strings = nullptr; |
47 | 2 | return result; |
48 | 2 | } |
49 | | |
50 | | /** |
51 | | * Adds a NUL-terminated string and returns a unique number for it. |
52 | | * The string must not change, nor move around in memory, |
53 | | * while this UniqueCharStrings is in use. |
54 | | * |
55 | | * Best used with string data in a stable storage, such as strings returned |
56 | | * by resource bundle functions. |
57 | | */ |
58 | 9.23k | int32_t add(const char16_t*p, UErrorCode &errorCode) { |
59 | 9.23k | if (U_FAILURE(errorCode)) { return -1; } |
60 | 9.23k | if (isFrozen) { |
61 | 0 | errorCode = U_NO_WRITE_PERMISSION; |
62 | 0 | return -1; |
63 | 0 | } |
64 | | // The string points into the resource bundle. |
65 | 9.23k | int32_t oldIndex = uhash_geti(&map, p); |
66 | 9.23k | if (oldIndex != 0) { // found duplicate |
67 | 508 | return oldIndex; |
68 | 508 | } |
69 | | // Explicit NUL terminator for the previous string. |
70 | | // The strings object is also terminated with one implicit NUL. |
71 | 8.72k | strings->append(0, errorCode); |
72 | 8.72k | int32_t newIndex = strings->length(); |
73 | 8.72k | strings->appendInvariantChars(p, u_strlen(p), errorCode); |
74 | 8.72k | uhash_puti(&map, const_cast<char16_t *>(p), newIndex, &errorCode); |
75 | 8.72k | return newIndex; |
76 | 9.23k | } |
77 | | |
78 | | /** |
79 | | * Adds a unicode string by value and returns a unique number for it. |
80 | | */ |
81 | 22.5k | int32_t addByValue(UnicodeString s, UErrorCode &errorCode) { |
82 | 22.5k | if (U_FAILURE(errorCode)) { return -1; } |
83 | 22.5k | if (isFrozen) { |
84 | 0 | errorCode = U_NO_WRITE_PERMISSION; |
85 | 0 | return -1; |
86 | 0 | } |
87 | 22.5k | int32_t oldIndex = uhash_geti(&map, s.getTerminatedBuffer()); |
88 | 22.5k | if (oldIndex != 0) { // found duplicate |
89 | 15.1k | return oldIndex; |
90 | 15.1k | } |
91 | | // We need to store the string content of the UnicodeString. |
92 | 7.36k | UnicodeString *key = keyStore.create(s); |
93 | 7.36k | if (key == nullptr) { |
94 | 0 | errorCode = U_MEMORY_ALLOCATION_ERROR; |
95 | 0 | return -1; |
96 | 0 | } |
97 | 7.36k | return add(key->getTerminatedBuffer(), errorCode); |
98 | 7.36k | } |
99 | | |
100 | 2 | void freeze() { isFrozen = true; } |
101 | | |
102 | | /** |
103 | | * Returns a string pointer for its unique number, if this object is frozen. |
104 | | * Otherwise nullptr. |
105 | | */ |
106 | 24.3k | const char *get(int32_t i) const { |
107 | 24.3k | U_ASSERT(isFrozen); |
108 | 24.3k | return isFrozen && i > 0 ? strings->data() + i : nullptr; |
109 | 24.3k | } |
110 | | |
111 | | private: |
112 | | UHashtable map; |
113 | | CharString *strings; |
114 | | MemoryPool<UnicodeString> keyStore; |
115 | | bool isFrozen = false; |
116 | | }; |
117 | | |
118 | | U_NAMESPACE_END |
119 | | |
120 | | #endif // __UNIQUECHARSTR_H__ |