Coverage Report

Created: 2025-06-13 06:34

/src/icu/icu4c/source/common/uniquecharstr.h
Line
Count
Source (jump to first uncovered line)
1
// © 2020 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
4
// uniquecharstr.h
5
// created: 2020sep01 Frank Yung-Fong Tang
6
7
#ifndef __UNIQUECHARSTR_H__
8
#define __UNIQUECHARSTR_H__
9
10
#include "charstr.h"
11
#include "uassert.h"
12
#include "uhash.h"
13
#include "cmemory.h"
14
15
U_NAMESPACE_BEGIN
16
17
/**
18
 * Stores NUL-terminated strings with duplicate elimination.
19
 * Checks for unique UTF-16 string pointers and converts to invariant characters.
20
 *
21
 * Intended to be stack-allocated. Add strings, get a unique number for each,
22
 * freeze the object, get a char * pointer for each string,
23
 * call orphanCharStrings() to capture the string storage, and let this object go out of scope.
24
 */
25
class UniqueCharStrings {
26
public:
27
1
    UniqueCharStrings(UErrorCode &errorCode) : strings(nullptr) {
28
        // Note: We hash on string contents but store stable char16_t * pointers.
29
        // If the strings are stored in resource bundles which should be built with
30
        // duplicate elimination, then we should be able to hash on just the pointer values.
31
1
        uhash_init(&map, uhash_hashUChars, uhash_compareUChars, uhash_compareLong, &errorCode);
32
1
        if (U_FAILURE(errorCode)) { return; }
33
1
        strings = new CharString();
34
1
        if (strings == nullptr) {
35
0
            errorCode = U_MEMORY_ALLOCATION_ERROR;
36
0
        }
37
1
    }
38
1
    ~UniqueCharStrings() {
39
1
        uhash_close(&map);
40
1
        delete strings;
41
1
    }
42
43
    /** Returns/orphans the CharString that contains all strings. */
44
1
    CharString *orphanCharStrings() {
45
1
        CharString *result = strings;
46
1
        strings = nullptr;
47
1
        return result;
48
1
    }
49
50
    /**
51
     * Adds a NUL-terminated string and returns a unique number for it.
52
     * The string must not change, nor move around in memory,
53
     * while this UniqueCharStrings is in use.
54
     *
55
     * Best used with string data in a stable storage, such as strings returned
56
     * by resource bundle functions.
57
     */
58
1.29k
    int32_t add(const char16_t*p, UErrorCode &errorCode) {
59
1.29k
        if (U_FAILURE(errorCode)) { return -1; }
60
1.29k
        if (isFrozen) {
61
0
            errorCode = U_NO_WRITE_PERMISSION;
62
0
            return -1;
63
0
        }
64
        // The string points into the resource bundle.
65
1.29k
        int32_t oldIndex = uhash_geti(&map, p);
66
1.29k
        if (oldIndex != 0) {  // found duplicate
67
476
            return oldIndex;
68
476
        }
69
        // Explicit NUL terminator for the previous string.
70
        // The strings object is also terminated with one implicit NUL.
71
814
        strings->append(0, errorCode);
72
814
        int32_t newIndex = strings->length();
73
814
        strings->appendInvariantChars(p, u_strlen(p), errorCode);
74
814
        uhash_puti(&map, const_cast<char16_t *>(p), newIndex, &errorCode);
75
814
        return newIndex;
76
1.29k
    }
77
78
    /**
79
     * Adds a unicode string by value and returns a unique number for it.
80
     */
81
0
    int32_t addByValue(UnicodeString s, UErrorCode &errorCode) {
82
0
        if (U_FAILURE(errorCode)) { return -1; }
83
0
        if (isFrozen) {
84
0
            errorCode = U_NO_WRITE_PERMISSION;
85
0
            return -1;
86
0
        }
87
0
        int32_t oldIndex = uhash_geti(&map, s.getTerminatedBuffer());
88
0
        if (oldIndex != 0) {  // found duplicate
89
0
            return oldIndex;
90
0
        }
91
        // We need to store the string content of the UnicodeString.
92
0
        UnicodeString *key = keyStore.create(s);
93
0
        if (key == nullptr) {
94
0
            errorCode = U_MEMORY_ALLOCATION_ERROR;
95
0
            return -1;
96
0
        }
97
0
        return add(key->getTerminatedBuffer(), errorCode);
98
0
    }
99
100
1
    void freeze() { isFrozen = true; }
101
102
    /**
103
     * Returns a string pointer for its unique number, if this object is frozen.
104
     * Otherwise nullptr.
105
     */
106
1.29k
    const char *get(int32_t i) const {
107
1.29k
        U_ASSERT(isFrozen);
108
1.29k
        return isFrozen && i > 0 ? strings->data() + i : nullptr;
109
1.29k
    }
110
111
private:
112
    UHashtable map;
113
    CharString *strings;
114
    MemoryPool<UnicodeString> keyStore;
115
    bool isFrozen = false;
116
};
117
118
U_NAMESPACE_END
119
120
#endif  // __UNIQUECHARSTR_H__