Coverage Report

Created: 2025-06-24 06:43

/src/icu/source/common/uniquecharstr.h
Line
Count
Source (jump to first uncovered line)
1
// © 2020 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
4
// uniquecharstr.h
5
// created: 2020sep01 Frank Yung-Fong Tang
6
7
#ifndef __UNIQUECHARSTR_H__
8
#define __UNIQUECHARSTR_H__
9
10
#include "charstr.h"
11
#include "uassert.h"
12
#include "uhash.h"
13
14
U_NAMESPACE_BEGIN
15
16
/**
17
 * Stores NUL-terminated strings with duplicate elimination.
18
 * Checks for unique UTF-16 string pointers and converts to invariant characters.
19
 *
20
 * Intended to be stack-allocated. Add strings, get a unique number for each,
21
 * freeze the object, get a char * pointer for each string,
22
 * call orphanCharStrings() to capture the string storage, and let this object go out of scope.
23
 */
24
class UniqueCharStrings {
25
public:
26
0
    UniqueCharStrings(UErrorCode &errorCode) : strings(nullptr) {
27
        // Note: We hash on string contents but store stable char16_t * pointers.
28
        // If the strings are stored in resource bundles which should be built with
29
        // duplicate elimination, then we should be able to hash on just the pointer values.
30
0
        uhash_init(&map, uhash_hashUChars, uhash_compareUChars, uhash_compareLong, &errorCode);
31
0
        if (U_FAILURE(errorCode)) { return; }
32
0
        strings = new CharString();
33
0
        if (strings == nullptr) {
34
0
            errorCode = U_MEMORY_ALLOCATION_ERROR;
35
0
        }
36
0
    }
37
0
    ~UniqueCharStrings() {
38
0
        uhash_close(&map);
39
0
        delete strings;
40
0
    }
41
42
    /** Returns/orphans the CharString that contains all strings. */
43
0
    CharString *orphanCharStrings() {
44
0
        CharString *result = strings;
45
0
        strings = nullptr;
46
0
        return result;
47
0
    }
48
49
    /**
50
     * Adds a string and returns a unique number for it.
51
     * The string's buffer contents must not change, nor move around in memory,
52
     * while this UniqueCharStrings is in use.
53
     * The string contents must be NUL-terminated exactly at s.length().
54
     *
55
     * Best used with read-only-alias UnicodeString objects that point to
56
     * stable storage, such as strings returned by resource bundle functions.
57
     */
58
0
    int32_t add(const UnicodeString &s, UErrorCode &errorCode) {
59
0
        if (U_FAILURE(errorCode)) { return 0; }
60
0
        if (isFrozen) {
61
0
            errorCode = U_NO_WRITE_PERMISSION;
62
0
            return 0;
63
0
        }
64
        // The string points into the resource bundle.
65
0
        const char16_t *p = s.getBuffer();
66
0
        int32_t oldIndex = uhash_geti(&map, p);
67
0
        if (oldIndex != 0) {  // found duplicate
68
0
            return oldIndex;
69
0
        }
70
        // Explicit NUL terminator for the previous string.
71
        // The strings object is also terminated with one implicit NUL.
72
0
        strings->append(0, errorCode);
73
0
        int32_t newIndex = strings->length();
74
0
        strings->appendInvariantChars(s, errorCode);
75
0
        uhash_puti(&map, const_cast<char16_t *>(p), newIndex, &errorCode);
76
0
        return newIndex;
77
0
    }
78
79
0
    void freeze() { isFrozen = true; }
80
81
    /**
82
     * Returns a string pointer for its unique number, if this object is frozen.
83
     * Otherwise nullptr.
84
     */
85
0
    const char *get(int32_t i) const {
86
0
        U_ASSERT(isFrozen);
87
0
        return isFrozen && i > 0 ? strings->data() + i : nullptr;
88
0
    }
89
90
private:
91
    UHashtable map;
92
    CharString *strings;
93
    bool isFrozen = false;
94
};
95
96
U_NAMESPACE_END
97
98
#endif  // __UNIQUECHARSTR_H__