/src/mysql-server/strings/collations_internal.h
Line | Count | Source |
1 | | /* Copyright (c) 2020, 2025, Oracle and/or its affiliates. |
2 | | |
3 | | This program is free software; you can redistribute it and/or modify |
4 | | it under the terms of the GNU General Public License, version 2.0, |
5 | | as published by the Free Software Foundation. |
6 | | |
7 | | This program is designed to work with certain software (including |
8 | | but not limited to OpenSSL) that is licensed under separate terms, |
9 | | as designated in a particular file or component or in included license |
10 | | documentation. The authors of MySQL hereby grant you an additional |
11 | | permission to link the program and your derivative works with the |
12 | | separately licensed software that they have either included with |
13 | | the program or referenced in the documentation. |
14 | | |
15 | | This program is distributed in the hope that it will be useful, |
16 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
17 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
18 | | GNU General Public License, version 2.0, for more details. |
19 | | |
20 | | You should have received a copy of the GNU General Public License |
21 | | along with this program; if not, write to the Free Software |
22 | | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ |
23 | | |
24 | | #ifndef STRINGS_COLLATIONS_INTERNAL_H_ |
25 | | #define STRINGS_COLLATIONS_INTERNAL_H_ |
26 | | |
27 | | #include <cstddef> |
28 | | #include <functional> |
29 | | #include <mutex> |
30 | | #include <string> |
31 | | #include <string_view> |
32 | | #include <unordered_map> |
33 | | #include <utility> |
34 | | |
35 | | #include "mysql/strings/m_ctype.h" |
36 | | |
37 | | constexpr char MY_CHARSET_INDEX[]{"Index.xml"}; |
38 | | |
39 | | typedef int myf; |
40 | | |
41 | | namespace mysql { |
42 | | |
43 | | namespace collation { |
44 | | class Name; |
45 | | } // namespace collation |
46 | | |
47 | | namespace collation_internals { |
48 | | |
49 | | using id_hash_map = std::unordered_map<unsigned, CHARSET_INFO *>; |
50 | | |
51 | | struct string_hash { |
52 | | using is_transparent = void; |
53 | 15.7k | [[nodiscard]] size_t operator()(std::string_view txt) const { |
54 | 15.7k | return std::hash<std::string_view>{}(txt); |
55 | 15.7k | } |
56 | | }; |
57 | | |
58 | | using sv_hash_map = std::unordered_map<std::string, CHARSET_INFO *, string_hash, |
59 | | std::equal_to<>>; |
60 | | |
61 | | /** |
62 | | Helper class: implementation of character set/collation library |
63 | | |
64 | | @see mysql::collation_internals::entry. |
65 | | */ |
66 | | class Collations final { |
67 | | public: |
68 | | Collations(const Collations &) = delete; |
69 | | Collations &operator=(const Collations &) = delete; |
70 | | |
71 | | /** |
72 | | Constructor |
73 | | |
74 | | @param charset_dir Optional "/\0"-terminated path to the directory |
75 | | containing Index.xml |
76 | | @param loader Optional user-specified hooks to the character |
77 | | set/collation parser/initializer. |
78 | | */ |
79 | | explicit Collations(const char *charset_dir, |
80 | | MY_CHARSET_LOADER *loader = nullptr); |
81 | | |
82 | | ~Collations(); |
83 | | |
84 | | /** |
85 | | Finds collation by its name |
86 | | |
87 | | @note Forces collation parsing/initialization if not done yet. |
88 | | |
89 | | @param name Collation name |
90 | | |
91 | | @param flags Optional mysys-specific flags |
92 | | |
93 | | @param [out] errmsg Optional buffer to return error message from |
94 | | collation parser/initializer |
95 | | |
96 | | @returns pointer to a collation object on success, nullptr if not found |
97 | | */ |
98 | | CHARSET_INFO *find_by_name(const mysql::collation::Name &name, myf flags = 0, |
99 | | MY_CHARSET_ERRMSG *errmsg = nullptr); |
100 | | |
101 | | /** |
102 | | Finds collation by its number |
103 | | |
104 | | @note Forces collation parsing/initialization if not done yet. |
105 | | |
106 | | @param id Collation id (hardcoded in library sources or |
107 | | specified in Index.xml) |
108 | | |
109 | | @param flags Optional mysys-specific flags |
110 | | |
111 | | @param [out] errmsg Optional buffer to return error message from |
112 | | collation parser/initializer |
113 | | |
114 | | @returns pointer to a collation object on success, nullptr if not found |
115 | | */ |
116 | | CHARSET_INFO *find_by_id(unsigned id, myf flags = 0, |
117 | | MY_CHARSET_ERRMSG *errmsg = nullptr); |
118 | | |
119 | | /** |
120 | | Finds primary collation by its character set name |
121 | | |
122 | | @note Forces collation parsing/initialization if not done yet. |
123 | | |
124 | | @param cs_name Character set name |
125 | | |
126 | | @param flags Optional mysys-specific flags |
127 | | |
128 | | @param [out] errmsg Optional buffer to return error message from |
129 | | collation parser/initializer |
130 | | |
131 | | @returns pointer to a collation object on success, nullptr if not found |
132 | | */ |
133 | | CHARSET_INFO *find_primary(const mysql::collation::Name &cs_name, |
134 | | myf flags = 0, |
135 | | MY_CHARSET_ERRMSG *errmsg = nullptr); |
136 | | |
137 | | /** |
138 | | Finds binary collation by its character set name |
139 | | |
140 | | @note Forces collation parsing/initialization if not done yet. |
141 | | |
142 | | @param cs_name Character set name |
143 | | |
144 | | @param flags Optional mysys-specific flags |
145 | | |
146 | | @param [out] errmsg Optional buffer to return error message from |
147 | | collation parser/initializer |
148 | | |
149 | | @returns pointer to a collation object on success, nullptr if not found |
150 | | */ |
151 | | CHARSET_INFO *find_default_binary(const mysql::collation::Name &cs_name, |
152 | | myf flags = 0, |
153 | | MY_CHARSET_ERRMSG *errmsg = nullptr); |
154 | | |
155 | | /** |
156 | | Finds collation by its name and returns its id |
157 | | |
158 | | @param name Collation name |
159 | | |
160 | | @returns collation id |
161 | | */ |
162 | | unsigned get_collation_id(const mysql::collation::Name &name) const; |
163 | | |
164 | | /** |
165 | | Finds character set by its name and returns an id of its primary collation |
166 | | |
167 | | @param name Collation name |
168 | | |
169 | | @returns primary collation id |
170 | | */ |
171 | | unsigned get_primary_collation_id(const mysql::collation::Name &name) const; |
172 | | |
173 | | /** |
174 | | Finds character set by its name and returns an id of its default binary |
175 | | collation |
176 | | |
177 | | @param name Collation name |
178 | | |
179 | | @returns default binary collation id |
180 | | */ |
181 | | unsigned get_default_binary_collation_id( |
182 | | const mysql::collation::Name &name) const; |
183 | | |
184 | | /** |
185 | | If not done yet, force collation parsing/initialization under m_mutex lock |
186 | | |
187 | | @param cs Pointer to collation object |
188 | | |
189 | | @param flags Optional mysys-specific flags |
190 | | |
191 | | @param [out] errmsg Optional buffer to return error message from |
192 | | collation parser/initializer |
193 | | |
194 | | @returns @p cs on success, otherwise nullptr |
195 | | */ |
196 | | CHARSET_INFO *safe_init_when_necessary(CHARSET_INFO *cs, myf flags = 0, |
197 | | MY_CHARSET_ERRMSG *errmsg = nullptr); |
198 | | |
199 | | /** |
200 | | Like find_by_name but without initialization of return value |
201 | | |
202 | | @param name Collation name |
203 | | |
204 | | @returns Pointer to CHARSET_INFO object on success, nullptr if not found. |
205 | | The resulting value can point to a half-initialized object. |
206 | | Moreover, further initialization of that object or parsing |
207 | | of its collation XML can fail. |
208 | | */ |
209 | | CHARSET_INFO *find_by_name_unsafe(const mysql::collation::Name &name); |
210 | | |
211 | | /** |
212 | | For registering compile-time collations |
213 | | |
214 | | @param cs Collation object |
215 | | |
216 | | @returns false on success, otherwise true. |
217 | | */ |
218 | | bool add_internal_collation(CHARSET_INFO *cs); |
219 | | |
220 | | /** |
221 | | Iterate over all collation objects known to the library |
222 | | |
223 | | @param f Closure to execute on each collation object known to the library |
224 | | */ |
225 | 2 | void iterate(const std::function<void(const CHARSET_INFO *)> &f) { |
226 | 572 | for (const auto &i : m_all_by_collation_name) { |
227 | 572 | f(i.second); |
228 | 572 | } |
229 | 2 | } |
230 | | |
231 | | protected: |
232 | | /** |
233 | | Internals of safe_init_when_necessary() |
234 | | |
235 | | This function is similar to safe_init_when_necessary, but, unlike |
236 | | safe_init_when_necessary(), it doesn't acquire locks. |
237 | | |
238 | | @param cs Pointer to collation object |
239 | | |
240 | | @param flags Optional mysys-specific flags |
241 | | |
242 | | @param [out] errmsg Optional buffer to return error message from |
243 | | collation parser/initializer |
244 | | |
245 | | @returns @p cs on success, otherwise nullptr |
246 | | */ |
247 | | CHARSET_INFO *unsafe_init(CHARSET_INFO *cs, myf flags, |
248 | | MY_CHARSET_ERRMSG *errmsg); |
249 | | |
250 | | /** |
251 | | Optional '/'-terminated path to the directory containing Index.xml |
252 | | */ |
253 | | const std::string m_charset_dir; |
254 | | |
255 | | /** |
256 | | Maps collation ids to CHARSET_INFO object pointers |
257 | | */ |
258 | | id_hash_map m_all_by_id; |
259 | | |
260 | | /** |
261 | | Maps normalized strings of all known character set names, collation names, |
262 | | and their aliases to CHARSET_INFO object pointers |
263 | | |
264 | | @note see old_conv and get_old_charset_by_name() for exclusions |
265 | | @see old_conv(), get_old_charset_by_name() |
266 | | */ |
267 | | sv_hash_map m_all_by_collation_name; |
268 | | |
269 | | /** |
270 | | Maps normalized strings of character set names to CHARSET_INFO object |
271 | | pointers |
272 | | |
273 | | @note In MySQL, CHARSET_INFO object of character set is also an object |
274 | | of its primary collation. |
275 | | */ |
276 | | sv_hash_map m_primary_by_cs_name; |
277 | | |
278 | | /** |
279 | | Maps normalized strings of character set names to CHARSET_INFO objects |
280 | | of preferred binary collations |
281 | | |
282 | | @note utf8mb4 has two separate binary collations, so m_binary_by_cs_name |
283 | | contains a reference to utf8mb4_bin only. |
284 | | */ |
285 | | sv_hash_map m_binary_by_cs_name; |
286 | | |
287 | | /** |
288 | | False if m_loader references external MY_CHARSET_LOADER, otherwise true. |
289 | | */ |
290 | | const bool m_owns_loader; |
291 | | |
292 | | /** |
293 | | Shared MY_CHARSET_LOADER implementation for use in collation parser and |
294 | | initializer |
295 | | |
296 | | By default references an instance of mysql::collation_internals::Loader. |
297 | | */ |
298 | | MY_CHARSET_LOADER *m_loader; |
299 | | |
300 | | private: |
301 | | /** |
302 | | Collation parser/initializer mutex |
303 | | |
304 | | The library parses collations and initializes CHARSET_INFO objects in |
305 | | depth on demand, so m_mutex is necessary to guarantee a safety of |
306 | | concurrent find_... function calls. |
307 | | */ |
308 | | std::mutex m_mutex; |
309 | | }; |
310 | | |
311 | | /** |
312 | | Global entry point to character set/collation library internals |
313 | | */ |
314 | | extern Collations *entry; |
315 | | |
316 | | } // namespace collation_internals |
317 | | } // namespace mysql |
318 | | |
319 | | #endif // STRINGS_COLLATIONS_INTERNAL_H_ |