Coverage Report

Created: 2025-12-31 06:57

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/mysql-server/strings/collations_internal.h
Line
Count
Source
1
/* Copyright (c) 2020, 2025, Oracle and/or its affiliates.
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License, version 2.0,
5
   as published by the Free Software Foundation.
6
7
   This program is designed to work with certain software (including
8
   but not limited to OpenSSL) that is licensed under separate terms,
9
   as designated in a particular file or component or in included license
10
   documentation.  The authors of MySQL hereby grant you an additional
11
   permission to link the program and your derivative works with the
12
   separately licensed software that they have either included with
13
   the program or referenced in the documentation.
14
15
   This program is distributed in the hope that it will be useful,
16
   but WITHOUT ANY WARRANTY; without even the implied warranty of
17
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
   GNU General Public License, version 2.0, for more details.
19
20
   You should have received a copy of the GNU General Public License
21
   along with this program; if not, write to the Free Software
22
   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
23
24
#ifndef STRINGS_COLLATIONS_INTERNAL_H_
25
#define STRINGS_COLLATIONS_INTERNAL_H_
26
27
#include <cstddef>
28
#include <functional>
29
#include <mutex>
30
#include <string>
31
#include <string_view>
32
#include <unordered_map>
33
#include <utility>
34
35
#include "mysql/strings/m_ctype.h"
36
37
constexpr char MY_CHARSET_INDEX[]{"Index.xml"};
38
39
typedef int myf;
40
41
namespace mysql {
42
43
namespace collation {
44
class Name;
45
}  // namespace collation
46
47
namespace collation_internals {
48
49
using id_hash_map = std::unordered_map<unsigned, CHARSET_INFO *>;
50
51
struct string_hash {
52
  using is_transparent = void;
53
15.7k
  [[nodiscard]] size_t operator()(std::string_view txt) const {
54
15.7k
    return std::hash<std::string_view>{}(txt);
55
15.7k
  }
56
};
57
58
using sv_hash_map = std::unordered_map<std::string, CHARSET_INFO *, string_hash,
59
                                       std::equal_to<>>;
60
61
/**
62
  Helper class: implementation of character set/collation library
63
64
  @see mysql::collation_internals::entry.
65
*/
66
class Collations final {
67
 public:
68
  Collations(const Collations &) = delete;
69
  Collations &operator=(const Collations &) = delete;
70
71
  /**
72
    Constructor
73
74
    @param charset_dir  Optional "/\0"-terminated path to the directory
75
                        containing Index.xml
76
    @param loader       Optional user-specified hooks to the character
77
                        set/collation parser/initializer.
78
  */
79
  explicit Collations(const char *charset_dir,
80
                      MY_CHARSET_LOADER *loader = nullptr);
81
82
  ~Collations();
83
84
  /**
85
    Finds collation by its name
86
87
    @note Forces collation parsing/initialization if not done yet.
88
89
    @param name         Collation name
90
91
    @param flags        Optional mysys-specific flags
92
93
    @param [out] errmsg Optional buffer to return error message from
94
                        collation parser/initializer
95
96
    @returns pointer to a collation object on success, nullptr if not found
97
  */
98
  CHARSET_INFO *find_by_name(const mysql::collation::Name &name, myf flags = 0,
99
                             MY_CHARSET_ERRMSG *errmsg = nullptr);
100
101
  /**
102
    Finds collation by its number
103
104
    @note Forces collation parsing/initialization if not done yet.
105
106
    @param id           Collation id (hardcoded in library sources or
107
                        specified in Index.xml)
108
109
    @param flags        Optional mysys-specific flags
110
111
    @param [out] errmsg Optional buffer to return error message from
112
                        collation parser/initializer
113
114
    @returns pointer to a collation object on success, nullptr if not found
115
  */
116
  CHARSET_INFO *find_by_id(unsigned id, myf flags = 0,
117
                           MY_CHARSET_ERRMSG *errmsg = nullptr);
118
119
  /**
120
    Finds primary collation by its character set name
121
122
    @note Forces collation parsing/initialization if not done yet.
123
124
    @param cs_name      Character set name
125
126
    @param flags        Optional mysys-specific flags
127
128
    @param [out] errmsg Optional buffer to return error message from
129
                        collation parser/initializer
130
131
    @returns pointer to a collation object on success, nullptr if not found
132
  */
133
  CHARSET_INFO *find_primary(const mysql::collation::Name &cs_name,
134
                             myf flags = 0,
135
                             MY_CHARSET_ERRMSG *errmsg = nullptr);
136
137
  /**
138
    Finds binary collation by its character set name
139
140
    @note Forces collation parsing/initialization if not done yet.
141
142
    @param cs_name      Character set name
143
144
    @param flags        Optional mysys-specific flags
145
146
    @param [out] errmsg Optional buffer to return error message from
147
                        collation parser/initializer
148
149
    @returns pointer to a collation object on success, nullptr if not found
150
  */
151
  CHARSET_INFO *find_default_binary(const mysql::collation::Name &cs_name,
152
                                    myf flags = 0,
153
                                    MY_CHARSET_ERRMSG *errmsg = nullptr);
154
155
  /**
156
    Finds collation by its name and returns its id
157
158
    @param name         Collation name
159
160
    @returns collation id
161
  */
162
  unsigned get_collation_id(const mysql::collation::Name &name) const;
163
164
  /**
165
    Finds character set by its name and returns an id of its primary collation
166
167
    @param name         Collation name
168
169
    @returns primary collation id
170
  */
171
  unsigned get_primary_collation_id(const mysql::collation::Name &name) const;
172
173
  /**
174
    Finds character set by its name and returns an id of its default binary
175
    collation
176
177
    @param name         Collation name
178
179
    @returns default binary collation id
180
  */
181
  unsigned get_default_binary_collation_id(
182
      const mysql::collation::Name &name) const;
183
184
  /**
185
    If not done yet, force collation parsing/initialization under m_mutex lock
186
187
    @param cs           Pointer to collation object
188
189
    @param flags        Optional mysys-specific flags
190
191
    @param [out] errmsg Optional buffer to return error message from
192
                        collation parser/initializer
193
194
    @returns @p cs on success, otherwise nullptr
195
  */
196
  CHARSET_INFO *safe_init_when_necessary(CHARSET_INFO *cs, myf flags = 0,
197
                                         MY_CHARSET_ERRMSG *errmsg = nullptr);
198
199
  /**
200
    Like find_by_name but without initialization of return value
201
202
    @param name         Collation name
203
204
    @returns Pointer to CHARSET_INFO object on success, nullptr if not found.
205
             The resulting value can point to a half-initialized object.
206
             Moreover, further initialization of that object or parsing
207
             of its collation XML can fail.
208
  */
209
  CHARSET_INFO *find_by_name_unsafe(const mysql::collation::Name &name);
210
211
  /**
212
    For registering compile-time collations
213
214
    @param cs Collation object
215
216
    @returns false on success, otherwise true.
217
  */
218
  bool add_internal_collation(CHARSET_INFO *cs);
219
220
  /**
221
    Iterate over all collation objects known to the library
222
223
    @param f    Closure to execute on each collation object known to the library
224
  */
225
2
  void iterate(const std::function<void(const CHARSET_INFO *)> &f) {
226
572
    for (const auto &i : m_all_by_collation_name) {
227
572
      f(i.second);
228
572
    }
229
2
  }
230
231
 protected:
232
  /**
233
    Internals of safe_init_when_necessary()
234
235
    This function is similar to safe_init_when_necessary, but, unlike
236
    safe_init_when_necessary(), it doesn't acquire locks.
237
238
    @param cs           Pointer to collation object
239
240
    @param flags        Optional mysys-specific flags
241
242
    @param [out] errmsg Optional buffer to return error message from
243
                        collation parser/initializer
244
245
    @returns @p cs on success, otherwise nullptr
246
  */
247
  CHARSET_INFO *unsafe_init(CHARSET_INFO *cs, myf flags,
248
                            MY_CHARSET_ERRMSG *errmsg);
249
250
  /**
251
    Optional '/'-terminated path to the directory containing Index.xml
252
  */
253
  const std::string m_charset_dir;
254
255
  /**
256
    Maps collation ids to CHARSET_INFO object pointers
257
  */
258
  id_hash_map m_all_by_id;
259
260
  /**
261
    Maps normalized strings of all known character set names, collation names,
262
    and their aliases to CHARSET_INFO object pointers
263
264
    @note see old_conv and get_old_charset_by_name() for exclusions
265
    @see old_conv(), get_old_charset_by_name()
266
  */
267
  sv_hash_map m_all_by_collation_name;
268
269
  /**
270
    Maps normalized strings of character set names to CHARSET_INFO object
271
    pointers
272
273
    @note In MySQL, CHARSET_INFO object of character set is also an object
274
    of its primary collation.
275
  */
276
  sv_hash_map m_primary_by_cs_name;
277
278
  /**
279
    Maps normalized strings of character set names to CHARSET_INFO objects
280
    of preferred binary collations
281
282
    @note utf8mb4 has two separate binary collations, so m_binary_by_cs_name
283
          contains a reference to utf8mb4_bin only.
284
  */
285
  sv_hash_map m_binary_by_cs_name;
286
287
  /**
288
    False if m_loader references external MY_CHARSET_LOADER, otherwise true.
289
  */
290
  const bool m_owns_loader;
291
292
  /**
293
    Shared MY_CHARSET_LOADER implementation for use in collation parser and
294
    initializer
295
296
    By default references an instance of mysql::collation_internals::Loader.
297
  */
298
  MY_CHARSET_LOADER *m_loader;
299
300
 private:
301
  /**
302
    Collation parser/initializer mutex
303
304
    The library parses collations and initializes CHARSET_INFO objects in
305
    depth on demand, so m_mutex is necessary to guarantee a safety of
306
    concurrent find_... function calls.
307
  */
308
  std::mutex m_mutex;
309
};
310
311
/**
312
  Global entry point to character set/collation library internals
313
*/
314
extern Collations *entry;
315
316
}  // namespace collation_internals
317
}  // namespace mysql
318
319
#endif  // STRINGS_COLLATIONS_INTERNAL_H_