/src/icu/source/common/uniquecharstr.h
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // © 2020 and later: Unicode, Inc. and others.  | 
2  |  | // License & terms of use: http://www.unicode.org/copyright.html  | 
3  |  |  | 
4  |  | // uniquecharstr.h  | 
5  |  | // created: 2020sep01 Frank Yung-Fong Tang  | 
6  |  |  | 
7  |  | #ifndef __UNIQUECHARSTR_H__  | 
8  |  | #define __UNIQUECHARSTR_H__  | 
9  |  |  | 
10  |  | #include "charstr.h"  | 
11  |  | #include "uassert.h"  | 
12  |  | #include "uhash.h"  | 
13  |  |  | 
14  |  | U_NAMESPACE_BEGIN  | 
15  |  |  | 
16  |  | /**  | 
17  |  |  * Stores NUL-terminated strings with duplicate elimination.  | 
18  |  |  * Checks for unique UTF-16 string pointers and converts to invariant characters.  | 
19  |  |  *  | 
20  |  |  * Intended to be stack-allocated. Add strings, get a unique number for each,  | 
21  |  |  * freeze the object, get a char * pointer for each string,  | 
22  |  |  * call orphanCharStrings() to capture the string storage, and let this object go out of scope.  | 
23  |  |  */  | 
24  |  | class UniqueCharStrings { | 
25  |  | public:  | 
26  | 0  |     UniqueCharStrings(UErrorCode &errorCode) : strings(nullptr) { | 
27  |  |         // Note: We hash on string contents but store stable char16_t * pointers.  | 
28  |  |         // If the strings are stored in resource bundles which should be built with  | 
29  |  |         // duplicate elimination, then we should be able to hash on just the pointer values.  | 
30  | 0  |         uhash_init(&map, uhash_hashUChars, uhash_compareUChars, uhash_compareLong, &errorCode);  | 
31  | 0  |         if (U_FAILURE(errorCode)) { return; } | 
32  | 0  |         strings = new CharString();  | 
33  | 0  |         if (strings == nullptr) { | 
34  | 0  |             errorCode = U_MEMORY_ALLOCATION_ERROR;  | 
35  | 0  |         }  | 
36  | 0  |     }  | 
37  | 0  |     ~UniqueCharStrings() { | 
38  | 0  |         uhash_close(&map);  | 
39  | 0  |         delete strings;  | 
40  | 0  |     }  | 
41  |  |  | 
42  |  |     /** Returns/orphans the CharString that contains all strings. */  | 
43  | 0  |     CharString *orphanCharStrings() { | 
44  | 0  |         CharString *result = strings;  | 
45  | 0  |         strings = nullptr;  | 
46  | 0  |         return result;  | 
47  | 0  |     }  | 
48  |  |  | 
49  |  |     /**  | 
50  |  |      * Adds a string and returns a unique number for it.  | 
51  |  |      * The string's buffer contents must not change, nor move around in memory,  | 
52  |  |      * while this UniqueCharStrings is in use.  | 
53  |  |      * The string contents must be NUL-terminated exactly at s.length().  | 
54  |  |      *  | 
55  |  |      * Best used with read-only-alias UnicodeString objects that point to  | 
56  |  |      * stable storage, such as strings returned by resource bundle functions.  | 
57  |  |      */  | 
58  | 0  |     int32_t add(const UnicodeString &s, UErrorCode &errorCode) { | 
59  | 0  |         if (U_FAILURE(errorCode)) { return 0; } | 
60  | 0  |         if (isFrozen) { | 
61  | 0  |             errorCode = U_NO_WRITE_PERMISSION;  | 
62  | 0  |             return 0;  | 
63  | 0  |         }  | 
64  |  |         // The string points into the resource bundle.  | 
65  | 0  |         const char16_t *p = s.getBuffer();  | 
66  | 0  |         int32_t oldIndex = uhash_geti(&map, p);  | 
67  | 0  |         if (oldIndex != 0) {  // found duplicate | 
68  | 0  |             return oldIndex;  | 
69  | 0  |         }  | 
70  |  |         // Explicit NUL terminator for the previous string.  | 
71  |  |         // The strings object is also terminated with one implicit NUL.  | 
72  | 0  |         strings->append(0, errorCode);  | 
73  | 0  |         int32_t newIndex = strings->length();  | 
74  | 0  |         strings->appendInvariantChars(s, errorCode);  | 
75  | 0  |         uhash_puti(&map, const_cast<char16_t *>(p), newIndex, &errorCode);  | 
76  | 0  |         return newIndex;  | 
77  | 0  |     }  | 
78  |  |  | 
79  | 0  |     void freeze() { isFrozen = true; } | 
80  |  |  | 
81  |  |     /**  | 
82  |  |      * Returns a string pointer for its unique number, if this object is frozen.  | 
83  |  |      * Otherwise nullptr.  | 
84  |  |      */  | 
85  | 0  |     const char *get(int32_t i) const { | 
86  | 0  |         U_ASSERT(isFrozen);  | 
87  | 0  |         return isFrozen && i > 0 ? strings->data() + i : nullptr;  | 
88  | 0  |     }  | 
89  |  |  | 
90  |  | private:  | 
91  |  |     UHashtable map;  | 
92  |  |     CharString *strings;  | 
93  |  |     bool isFrozen = false;  | 
94  |  | };  | 
95  |  |  | 
96  |  | U_NAMESPACE_END  | 
97  |  |  | 
98  |  | #endif  // __UNIQUECHARSTR_H__  |