Coverage Report

Created: 2026-05-16 09:25

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libreoffice/svl/source/misc/sharedstringpool.cxx
Line
Count
Source
1
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
/*
3
 * This file is part of the LibreOffice project.
4
 *
5
 * This Source Code Form is subject to the terms of the Mozilla Public
6
 * License, v. 2.0. If a copy of the MPL was not distributed with this
7
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
 */
9
10
#include <svl/sharedstringpool.hxx>
11
#include <svl/sharedstring.hxx>
12
#include <unotools/charclass.hxx>
13
14
#include <mutex>
15
#include <unordered_map>
16
#include <unordered_set>
17
18
/** create a key class that caches the hashcode */
19
namespace
20
{
21
struct StringWithHash
22
{
23
    OUString str;
24
    sal_Int32 hashCode;
25
    StringWithHash(OUString s)
26
9.21M
        : str(std::move(s))
27
9.21M
        , hashCode(str.hashCode())
28
9.21M
    {
29
9.21M
    }
30
31
    bool operator==(StringWithHash const& rhs) const
32
6.50M
    {
33
6.50M
        if (hashCode != rhs.hashCode)
34
0
            return false;
35
6.50M
        return str == rhs.str;
36
6.50M
    }
37
};
38
}
39
40
namespace std
41
{
42
template <> struct hash<StringWithHash>
43
{
44
10.1M
    std::size_t operator()(const StringWithHash& k) const { return k.hashCode; }
45
};
46
}
47
48
namespace svl
49
{
50
namespace
51
{
52
0
sal_Int32 getRefCount(const rtl_uString* p) { return (p->refCount & 0x3FFFFFFF); }
53
}
54
55
struct SharedStringPool::Impl
56
{
57
    mutable std::mutex maMutex;
58
    // We use this map for two purposes - to store lower->upper case mappings
59
    // and to retrieve a shared uppercase object, so the management logic
60
    // is quite complex.
61
    std::unordered_map<StringWithHash, OUString> maStrMap;
62
    const CharClass& mrCharClass;
63
64
    explicit Impl(const CharClass& rCharClass)
65
165k
        : mrCharClass(rCharClass)
66
165k
    {
67
165k
    }
68
};
69
70
SharedStringPool::SharedStringPool(const CharClass& rCharClass)
71
165k
    : mpImpl(new Impl(rCharClass))
72
165k
{
73
    // make sure the one empty string instance is shared in this pool as well
74
165k
    intern(SharedString::EMPTY_STRING);
75
165k
    assert(intern(SharedString::EMPTY_STRING) == SharedString::getEmptyString());
76
165k
}
77
78
165k
SharedStringPool::~SharedStringPool() {}
79
80
SharedString SharedStringPool::intern(const OUString& rStr)
81
8.27M
{
82
8.27M
    StringWithHash aStrWithHash(rStr);
83
8.27M
    std::scoped_lock<std::mutex> aGuard(mpImpl->maMutex);
84
85
8.27M
    auto[mapIt, bInserted] = mpImpl->maStrMap.emplace(aStrWithHash, rStr);
86
8.27M
    if (!bInserted)
87
        // there is already a mapping
88
6.47M
        return SharedString(mapIt->first.str.pData, mapIt->second.pData);
89
90
    // This is a new string insertion. Establish mapping to upper-case variant.
91
1.79M
    OUString aUpper = mpImpl->mrCharClass.uppercase(rStr);
92
1.79M
    if (aUpper == rStr)
93
        // no need to do anything more, because we inserted an upper->upper mapping
94
855k
        return SharedString(mapIt->first.str.pData, mapIt->second.pData);
95
96
    // We need to insert a lower->upper mapping, so also insert
97
    // an upper->upper mapping, which we can use both for when an upper string
98
    // is interned, and to look up a shared upper string.
99
942k
    StringWithHash aUpperWithHash(aUpper);
100
942k
    auto mapIt2 = mpImpl->maStrMap.find(aUpperWithHash);
101
942k
    if (mapIt2 != mpImpl->maStrMap.end())
102
5.83k
    {
103
        // there is an already existing upper string
104
5.83k
        mapIt->second = mapIt2->first.str;
105
5.83k
        return SharedString(mapIt->first.str.pData, mapIt->second.pData);
106
5.83k
    }
107
108
    // There is no already existing upper string.
109
    // First, update using the iterator, can't do this later because
110
    // the iterator will be invalid.
111
936k
    mapIt->second = aUpper;
112
936k
    mpImpl->maStrMap.emplace_hint(mapIt2, aUpperWithHash, aUpper);
113
936k
    return SharedString(rStr.pData, aUpper.pData);
114
942k
}
115
116
void SharedStringPool::purge()
117
0
{
118
0
    std::scoped_lock<std::mutex> aGuard(mpImpl->maMutex);
119
120
    // Because we can have an uppercase entry mapped to itself,
121
    // and then a bunch of lowercase entries mapped to that same
122
    // upper-case entry, we need to scan the map twice - the first
123
    // time to remove lowercase entries, and then only can we
124
    // check for unused uppercase entries.
125
126
0
    auto it = mpImpl->maStrMap.begin();
127
0
    auto itEnd = mpImpl->maStrMap.end();
128
0
    while (it != itEnd)
129
0
    {
130
0
        rtl_uString* p1 = it->first.str.pData;
131
0
        rtl_uString* p2 = it->second.pData;
132
0
        if (p1 != p2)
133
0
        {
134
            // normal case - lowercase mapped to uppercase, which
135
            // means that the lowercase entry has one ref-counted
136
            // entry as the key in the map
137
0
            if (getRefCount(p1) == 1)
138
0
            {
139
0
                it = mpImpl->maStrMap.erase(it);
140
0
                continue;
141
0
            }
142
0
        }
143
0
        ++it;
144
0
    }
145
146
0
    it = mpImpl->maStrMap.begin();
147
0
    itEnd = mpImpl->maStrMap.end();
148
0
    while (it != itEnd)
149
0
    {
150
0
        rtl_uString* p1 = it->first.str.pData;
151
0
        rtl_uString* p2 = it->second.pData;
152
0
        if (p1 == p2)
153
0
        {
154
            // uppercase which is mapped to itself, which means
155
            // one ref-counted entry as the key in the map, and
156
            // one ref-counted entry in the value in the map
157
0
            if (getRefCount(p1) == 2)
158
0
            {
159
0
                it = mpImpl->maStrMap.erase(it);
160
0
                continue;
161
0
            }
162
0
        }
163
0
        ++it;
164
0
    }
165
0
}
166
167
size_t SharedStringPool::getCount() const
168
0
{
169
0
    std::scoped_lock<std::mutex> aGuard(mpImpl->maMutex);
170
0
    return mpImpl->maStrMap.size();
171
0
}
172
173
size_t SharedStringPool::getCountIgnoreCase() const
174
0
{
175
0
    std::scoped_lock<std::mutex> aGuard(mpImpl->maMutex);
176
    // this is only called from unit tests, so no need to be efficient
177
0
    std::unordered_set<OUString> aUpperSet;
178
0
    for (auto const& pair : mpImpl->maStrMap)
179
0
        aUpperSet.insert(pair.second);
180
0
    return aUpperSet.size();
181
0
}
182
}
183
184
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */