/src/server/mysys/charset.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | Copyright (c) 2000, 2011, Oracle and/or its affiliates |
3 | | Copyright (c) 2009, 2020, MariaDB Corporation. |
4 | | |
5 | | This program is free software; you can redistribute it and/or modify |
6 | | it under the terms of the GNU General Public License as published by |
7 | | the Free Software Foundation; version 2 of the License. |
8 | | |
9 | | This program is distributed in the hope that it will be useful, |
10 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | | GNU General Public License for more details. |
13 | | |
14 | | You should have received a copy of the GNU General Public License |
15 | | along with this program; if not, write to the Free Software |
16 | | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ |
17 | | |
18 | | #include "mysys_priv.h" |
19 | | #include "mysys_err.h" |
20 | | #include <m_ctype.h> |
21 | | #include <m_string.h> |
22 | | #include <my_dir.h> |
23 | | #include <hash.h> |
24 | | #include <my_xml.h> |
25 | | #ifdef HAVE_LANGINFO_H |
26 | | #include <langinfo.h> |
27 | | #endif |
28 | | #ifdef HAVE_LOCALE_H |
29 | | #include <locale.h> |
30 | | #endif |
31 | | |
32 | | static HASH charset_name_hash; |
33 | | static HASH collation_name_hash; |
34 | | |
35 | | /* |
36 | | The code below implements this functionality: |
37 | | |
38 | | - Initializing charset related structures |
39 | | - Loading dynamic charsets |
40 | | - Searching for a proper CHARSET_INFO |
41 | | using charset name, collation name or collation ID |
42 | | - Setting server default character set |
43 | | */ |
44 | | |
45 | | static uint |
46 | | get_collation_number_internal(const char *name) |
47 | 0 | { |
48 | 0 | CHARSET_INFO *cs= (CHARSET_INFO*) my_hash_search(&collation_name_hash, |
49 | 0 | (uchar*) name, strlen(name)); |
50 | 0 | return cs ? cs->number : 0; |
51 | 0 | } |
52 | | |
53 | | |
54 | | static my_bool is_multi_byte_ident(CHARSET_INFO *cs, uchar ch) |
55 | 0 | { |
56 | 0 | int chlen= my_ci_charlen(cs, &ch, &ch + 1); |
57 | 0 | return MY_CS_IS_TOOSMALL(chlen) ? TRUE : FALSE; |
58 | 0 | } |
59 | | |
60 | | static my_bool init_state_maps(struct charset_info_st *cs) |
61 | 0 | { |
62 | 0 | uint i; |
63 | 0 | uchar *state_map; |
64 | 0 | uchar *ident_map; |
65 | |
|
66 | 0 | if (!(cs->state_map= state_map= (uchar*) my_once_alloc(256*2, MYF(MY_WME)))) |
67 | 0 | return 1; |
68 | | |
69 | 0 | cs->ident_map= ident_map= state_map + 256; |
70 | | |
71 | | /* Fill state_map with states to get a faster parser */ |
72 | 0 | for (i=0; i < 256 ; i++) |
73 | 0 | { |
74 | 0 | if (my_isalpha(cs,i)) |
75 | 0 | state_map[i]=(uchar) MY_LEX_IDENT; |
76 | 0 | else if (my_isdigit(cs,i)) |
77 | 0 | state_map[i]=(uchar) MY_LEX_NUMBER_IDENT; |
78 | 0 | else if (is_multi_byte_ident(cs, i)) |
79 | 0 | state_map[i]=(uchar) MY_LEX_IDENT; |
80 | 0 | else if (my_isspace(cs,i)) |
81 | 0 | state_map[i]=(uchar) MY_LEX_SKIP; |
82 | 0 | else |
83 | 0 | state_map[i]=(uchar) MY_LEX_CHAR; |
84 | 0 | } |
85 | 0 | state_map[(uchar)'_']=state_map[(uchar)'$']=(uchar) MY_LEX_IDENT; |
86 | 0 | state_map[(uchar)'\'']=(uchar) MY_LEX_STRING; |
87 | 0 | state_map[(uchar)'.']=(uchar) MY_LEX_REAL_OR_POINT; |
88 | 0 | state_map[(uchar)'>']=state_map[(uchar)'=']=state_map[(uchar)'!']= (uchar) MY_LEX_CMP_OP; |
89 | 0 | state_map[(uchar)'<']= (uchar) MY_LEX_LONG_CMP_OP; |
90 | 0 | state_map[(uchar)'&']=state_map[(uchar)'|']=(uchar) MY_LEX_BOOL; |
91 | 0 | state_map[(uchar)'#']=(uchar) MY_LEX_COMMENT; |
92 | 0 | state_map[(uchar)';']=(uchar) MY_LEX_SEMICOLON; |
93 | 0 | state_map[(uchar)':']=(uchar) MY_LEX_SET_VAR; |
94 | 0 | state_map[0]=(uchar) MY_LEX_EOL; |
95 | 0 | state_map[(uchar)'\\']= (uchar) MY_LEX_ESCAPE; |
96 | 0 | state_map[(uchar)'/']= (uchar) MY_LEX_LONG_COMMENT; |
97 | 0 | state_map[(uchar)'*']= (uchar) MY_LEX_END_LONG_COMMENT; |
98 | 0 | state_map[(uchar)'@']= (uchar) MY_LEX_USER_END; |
99 | 0 | state_map[(uchar) '`']= (uchar) MY_LEX_USER_VARIABLE_DELIMITER; |
100 | 0 | state_map[(uchar)'"']= (uchar) MY_LEX_STRING_OR_DELIMITER; |
101 | 0 | state_map[(uchar)'-']= (uchar) MY_LEX_MINUS_OR_COMMENT; |
102 | 0 | state_map[(uchar)',']= (uchar) MY_LEX_COMMA; |
103 | 0 | state_map[(uchar)'?']= (uchar) MY_LEX_PLACEHOLDER; |
104 | | |
105 | | /* |
106 | | Create a second map to make it faster to find identifiers |
107 | | */ |
108 | 0 | for (i=0; i < 256 ; i++) |
109 | 0 | { |
110 | 0 | ident_map[i]= (uchar) (state_map[i] == MY_LEX_IDENT || |
111 | 0 | state_map[i] == MY_LEX_NUMBER_IDENT); |
112 | 0 | } |
113 | | |
114 | | /* Special handling of hex and binary strings */ |
115 | 0 | state_map[(uchar)'x']= state_map[(uchar)'X']= (uchar) MY_LEX_IDENT_OR_HEX; |
116 | 0 | state_map[(uchar)'b']= state_map[(uchar)'B']= (uchar) MY_LEX_IDENT_OR_BIN; |
117 | 0 | state_map[(uchar)'n']= state_map[(uchar)'N']= (uchar) MY_LEX_IDENT_OR_NCHAR; |
118 | 0 | return 0; |
119 | 0 | } |
120 | | |
121 | | |
122 | | static MY_COLLATION_HANDLER *get_simple_collation_handler_by_flags(uint flags) |
123 | 0 | { |
124 | 0 | return flags & MY_CS_BINSORT ? |
125 | 0 | (flags & MY_CS_NOPAD ? |
126 | 0 | &my_collation_8bit_nopad_bin_handler : |
127 | 0 | &my_collation_8bit_bin_handler) : |
128 | 0 | (flags & MY_CS_NOPAD ? |
129 | 0 | &my_collation_8bit_simple_nopad_ci_handler : |
130 | 0 | &my_collation_8bit_simple_ci_handler); |
131 | 0 | } |
132 | | |
133 | | |
134 | | static void simple_cs_init_functions(struct charset_info_st *cs) |
135 | 0 | { |
136 | 0 | cs->coll= get_simple_collation_handler_by_flags(cs->state); |
137 | 0 | cs->cset= &my_charset_8bit_handler; |
138 | 0 | } |
139 | | |
140 | | |
141 | | |
142 | | static int cs_copy_data(struct charset_info_st *to, CHARSET_INFO *from) |
143 | 0 | { |
144 | 0 | to->number= from->number ? from->number : to->number; |
145 | | |
146 | | /* Don't replace csname if already set */ |
147 | 0 | if (from->cs_name.str && !to->cs_name.str) |
148 | 0 | { |
149 | 0 | if (!(to->cs_name.str= my_once_memdup(from->cs_name.str, |
150 | 0 | from->cs_name.length + 1, |
151 | 0 | MYF(MY_WME)))) |
152 | 0 | goto err; |
153 | 0 | to->cs_name.length= from->cs_name.length; |
154 | 0 | } |
155 | | |
156 | 0 | if (from->coll_name.str) |
157 | 0 | { |
158 | 0 | if (!(to->coll_name.str= my_once_memdup(from->coll_name.str, |
159 | 0 | from->coll_name.length + 1, |
160 | 0 | MYF(MY_WME)))) |
161 | 0 | goto err; |
162 | 0 | to->coll_name.length= from->coll_name.length; |
163 | 0 | } |
164 | | |
165 | 0 | if (from->comment) |
166 | 0 | if (!(to->comment= my_once_strdup(from->comment,MYF(MY_WME)))) |
167 | 0 | goto err; |
168 | | |
169 | 0 | if (from->m_ctype) |
170 | 0 | { |
171 | 0 | if (!(to->m_ctype= (uchar*) my_once_memdup((char*) from->m_ctype, |
172 | 0 | MY_CS_CTYPE_TABLE_SIZE, |
173 | 0 | MYF(MY_WME)))) |
174 | 0 | goto err; |
175 | 0 | if (init_state_maps(to)) |
176 | 0 | goto err; |
177 | 0 | } |
178 | 0 | if (from->to_lower) |
179 | 0 | if (!(to->to_lower= (uchar*) my_once_memdup((char*) from->to_lower, |
180 | 0 | MY_CS_TO_LOWER_TABLE_SIZE, |
181 | 0 | MYF(MY_WME)))) |
182 | 0 | goto err; |
183 | | |
184 | 0 | if (from->to_upper) |
185 | 0 | if (!(to->to_upper= (uchar*) my_once_memdup((char*) from->to_upper, |
186 | 0 | MY_CS_TO_UPPER_TABLE_SIZE, |
187 | 0 | MYF(MY_WME)))) |
188 | 0 | goto err; |
189 | 0 | if (from->sort_order) |
190 | 0 | { |
191 | 0 | if (!(to->sort_order= (uchar*) my_once_memdup((char*) from->sort_order, |
192 | 0 | MY_CS_SORT_ORDER_TABLE_SIZE, |
193 | 0 | MYF(MY_WME)))) |
194 | 0 | goto err; |
195 | |
|
196 | 0 | } |
197 | 0 | if (from->tab_to_uni) |
198 | 0 | { |
199 | 0 | uint sz= MY_CS_TO_UNI_TABLE_SIZE*sizeof(uint16); |
200 | 0 | if (!(to->tab_to_uni= (uint16*) my_once_memdup((char*)from->tab_to_uni, |
201 | 0 | sz, MYF(MY_WME)))) |
202 | 0 | goto err; |
203 | 0 | } |
204 | 0 | if (from->tailoring) |
205 | 0 | if (!(to->tailoring= my_once_strdup(from->tailoring,MYF(MY_WME)))) |
206 | 0 | goto err; |
207 | | |
208 | 0 | return 0; |
209 | | |
210 | 0 | err: |
211 | 0 | return 1; |
212 | 0 | } |
213 | | |
214 | | |
215 | | static my_bool simple_8bit_charset_data_is_full(CHARSET_INFO *cs) |
216 | 0 | { |
217 | 0 | return cs->m_ctype && cs->to_upper && cs->to_lower && cs->tab_to_uni; |
218 | 0 | } |
219 | | |
220 | | |
221 | | /** |
222 | | Inherit missing 8bit charset data from another collation. |
223 | | Arrays pointed by refcs must be in the permanent memory already, |
224 | | e.g. static memory, or allocated by my_once_xxx(). |
225 | | */ |
226 | | static void |
227 | | inherit_charset_data(struct charset_info_st *cs, CHARSET_INFO *refcs) |
228 | 0 | { |
229 | 0 | if (!cs->to_upper) |
230 | 0 | cs->to_upper= refcs->to_upper; |
231 | 0 | if (!cs->to_lower) |
232 | 0 | cs->to_lower= refcs->to_lower; |
233 | 0 | if (!cs->m_ctype) |
234 | 0 | cs->m_ctype= refcs->m_ctype; |
235 | 0 | if (!cs->tab_to_uni) |
236 | 0 | cs->tab_to_uni= refcs->tab_to_uni; |
237 | 0 | } |
238 | | |
239 | | |
240 | | static my_bool simple_8bit_collation_data_is_full(CHARSET_INFO *cs) |
241 | 0 | { |
242 | 0 | return cs->sort_order || (cs->state & MY_CS_BINSORT); |
243 | 0 | } |
244 | | |
245 | | |
246 | | /** |
247 | | Inherit 8bit simple collation data from another collation. |
248 | | refcs->sort_order must be in the permanent memory already, |
249 | | e.g. static memory, or allocated by my_once_xxx(). |
250 | | */ |
251 | | static void |
252 | | inherit_collation_data(struct charset_info_st *cs, CHARSET_INFO *refcs) |
253 | 0 | { |
254 | 0 | if (!simple_8bit_collation_data_is_full(cs)) |
255 | 0 | cs->sort_order= refcs->sort_order; |
256 | 0 | } |
257 | | |
258 | | |
259 | | static my_bool simple_cs_is_full(CHARSET_INFO *cs) |
260 | 0 | { |
261 | 0 | return cs->number && cs->cs_name.str && cs->coll_name.str && |
262 | 0 | simple_8bit_charset_data_is_full(cs) && |
263 | 0 | (simple_8bit_collation_data_is_full(cs) || cs->tailoring); |
264 | 0 | } |
265 | | |
266 | | |
267 | | #if defined(HAVE_UCA_COLLATIONS) && (defined(HAVE_CHARSET_ucs2) || defined(HAVE_CHARSET_utf8mb3)) |
268 | | /** |
269 | | Initialize a loaded collation. |
270 | | @param [OUT] to - The new charset_info_st structure to initialize. |
271 | | @param [IN] from - A template collation, to fill the missing data from. |
272 | | @param [IN] loaded - The collation data loaded from the LDML file. |
273 | | some data may be missing in "loaded". |
274 | | */ |
275 | | static void |
276 | | copy_uca_collation(struct charset_info_st *to, CHARSET_INFO *from, |
277 | | CHARSET_INFO *loaded) |
278 | 0 | { |
279 | 0 | to->cset= from->cset; |
280 | 0 | to->coll= from->coll; |
281 | | /* |
282 | | Single-level UCA collation have strnxfrm_multiple=8. |
283 | | In case of a multi-level UCA collation we use strnxfrm_multiply=4. |
284 | | That means MY_COLLATION_HANDLER::strnfrmlen() will request the caller |
285 | | to allocate a buffer smaller size for each level, for performance purpose, |
286 | | and to fit longer VARCHARs to @@max_sort_length. |
287 | | This makes filesort produce non-precise order for some rare Unicode |
288 | | characters that produce more than 4 weights (long expansions). |
289 | | UCA requires 2 bytes per weight multiplied by the number of levels. |
290 | | In case of a 2-level collation, each character requires 4*2=8 bytes. |
291 | | Therefore, the longest VARCHAR that fits into the default @@max_sort_length |
292 | | is 1024/8=VARCHAR(128). With strnxfrm_multiply==8, only VARCHAR(64) |
293 | | would fit. |
294 | | Note, the built-in collation utf8_thai_520_w2 also uses strnxfrm_multiply=4, |
295 | | for the same purpose. |
296 | | TODO: we could add a new LDML syntax to choose strxfrm_multiply value. |
297 | | */ |
298 | 0 | to->strxfrm_multiply= loaded->levels_for_order > 1 ? |
299 | 0 | 4 : from->strxfrm_multiply; |
300 | 0 | to->min_sort_char= from->min_sort_char; |
301 | 0 | to->max_sort_char= from->max_sort_char; |
302 | 0 | to->mbminlen= from->mbminlen; |
303 | 0 | to->mbmaxlen= from->mbmaxlen; |
304 | 0 | to->state|= MY_CS_AVAILABLE | MY_CS_LOADED | |
305 | 0 | MY_CS_STRNXFRM | MY_CS_UNICODE; |
306 | 0 | } |
307 | | #endif |
308 | | |
309 | | |
310 | | static int add_collation(struct charset_info_st *cs) |
311 | 0 | { |
312 | 0 | if (cs->coll_name.str && |
313 | 0 | (cs->number || |
314 | 0 | (cs->number=get_collation_number_internal(cs->coll_name.str))) && |
315 | 0 | cs->number < array_elements(all_charsets)) |
316 | 0 | { |
317 | 0 | struct charset_info_st *newcs; |
318 | 0 | if (!(newcs= (struct charset_info_st*) all_charsets[cs->number])) |
319 | 0 | { |
320 | 0 | if (!(all_charsets[cs->number]= newcs= |
321 | 0 | (struct charset_info_st*) my_once_alloc(sizeof(CHARSET_INFO),MYF(0)))) |
322 | 0 | return MY_XML_ERROR; |
323 | 0 | bzero(newcs,sizeof(CHARSET_INFO)); |
324 | 0 | } |
325 | 0 | else |
326 | 0 | { |
327 | | /* Don't allow change of csname */ |
328 | 0 | if (newcs->cs_name.str && strcmp(newcs->cs_name.str, cs->cs_name.str)) |
329 | 0 | { |
330 | 0 | my_error(EE_DUPLICATE_CHARSET, MYF(ME_WARNING), |
331 | 0 | cs->number, cs->cs_name.str, newcs->cs_name.str); |
332 | | /* |
333 | | Continue parsing rest of Index.xml. We got an warning in the log |
334 | | so the user can fix the wrong character set definition. |
335 | | */ |
336 | 0 | return MY_XML_OK; |
337 | 0 | } |
338 | 0 | } |
339 | | |
340 | 0 | if (cs->primary_number == cs->number) |
341 | 0 | cs->state |= MY_CS_PRIMARY; |
342 | | |
343 | 0 | if (cs->binary_number == cs->number) |
344 | 0 | cs->state |= MY_CS_BINSORT; |
345 | | |
346 | 0 | newcs->state|= cs->state; |
347 | | |
348 | 0 | if (!(newcs->state & MY_CS_COMPILED)) |
349 | 0 | { |
350 | 0 | if (cs_copy_data(newcs,cs)) |
351 | 0 | return MY_XML_ERROR; |
352 | | |
353 | 0 | newcs->levels_for_order= 1; |
354 | | |
355 | 0 | if (!strcmp(cs->cs_name.str,"ucs2") ) |
356 | 0 | { |
357 | 0 | #if defined(HAVE_CHARSET_ucs2) && defined(HAVE_UCA_COLLATIONS) |
358 | 0 | copy_uca_collation(newcs, newcs->state & MY_CS_NOPAD ? |
359 | 0 | &my_charset_ucs2_unicode_nopad_ci : |
360 | 0 | &my_charset_ucs2_unicode_ci, |
361 | 0 | cs); |
362 | 0 | newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII; |
363 | 0 | #endif |
364 | 0 | } |
365 | 0 | else if (!strcmp(cs->cs_name.str, "utf8") || |
366 | 0 | !strcmp(cs->cs_name.str, "utf8mb3")) |
367 | 0 | { |
368 | 0 | #if defined (HAVE_CHARSET_utf8mb3) && defined(HAVE_UCA_COLLATIONS) |
369 | 0 | copy_uca_collation(newcs, newcs->state & MY_CS_NOPAD ? |
370 | 0 | &my_charset_utf8mb3_unicode_nopad_ci : |
371 | 0 | &my_charset_utf8mb3_unicode_ci, |
372 | 0 | cs); |
373 | 0 | newcs->m_ctype= my_charset_utf8mb3_unicode_ci.m_ctype; |
374 | 0 | if (init_state_maps(newcs)) |
375 | 0 | return MY_XML_ERROR; |
376 | 0 | #endif |
377 | 0 | } |
378 | 0 | else if (!strcmp(cs->cs_name.str, "utf8mb4")) |
379 | 0 | { |
380 | 0 | #if defined (HAVE_CHARSET_utf8mb4) && defined(HAVE_UCA_COLLATIONS) |
381 | 0 | copy_uca_collation(newcs, newcs->state & MY_CS_NOPAD ? |
382 | 0 | &my_charset_utf8mb4_unicode_nopad_ci : |
383 | 0 | &my_charset_utf8mb4_unicode_ci, |
384 | 0 | cs); |
385 | 0 | newcs->m_ctype= my_charset_utf8mb4_unicode_ci.m_ctype; |
386 | 0 | if (init_state_maps(newcs)) |
387 | 0 | return MY_XML_ERROR; |
388 | 0 | newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED; |
389 | 0 | #endif |
390 | 0 | } |
391 | 0 | else if (!strcmp(cs->cs_name.str, "utf16")) |
392 | 0 | { |
393 | 0 | #if defined (HAVE_CHARSET_utf16) && defined(HAVE_UCA_COLLATIONS) |
394 | 0 | copy_uca_collation(newcs, newcs->state & MY_CS_NOPAD ? |
395 | 0 | &my_charset_utf16_unicode_nopad_ci : |
396 | 0 | &my_charset_utf16_unicode_ci, |
397 | 0 | cs); |
398 | 0 | newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII; |
399 | 0 | #endif |
400 | 0 | } |
401 | 0 | else if (!strcmp(cs->cs_name.str, "utf32")) |
402 | 0 | { |
403 | 0 | #if defined (HAVE_CHARSET_utf32) && defined(HAVE_UCA_COLLATIONS) |
404 | 0 | copy_uca_collation(newcs, newcs->state & MY_CS_NOPAD ? |
405 | 0 | &my_charset_utf32_unicode_nopad_ci : |
406 | 0 | &my_charset_utf32_unicode_ci, |
407 | 0 | cs); |
408 | 0 | newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII; |
409 | 0 | #endif |
410 | 0 | } |
411 | 0 | else |
412 | 0 | { |
413 | 0 | simple_cs_init_functions(newcs); |
414 | 0 | newcs->mbminlen= 1; |
415 | 0 | newcs->mbmaxlen= 1; |
416 | 0 | newcs->strxfrm_multiply= 1; |
417 | 0 | if (simple_cs_is_full(newcs)) |
418 | 0 | { |
419 | 0 | newcs->state |= MY_CS_LOADED; |
420 | 0 | } |
421 | 0 | } |
422 | 0 | add_compiled_extra_collation(newcs); |
423 | 0 | } |
424 | 0 | else |
425 | 0 | { |
426 | | /* |
427 | | We need the below to make get_charset_name() |
428 | | and get_charset_number() working even if a |
429 | | character set has not been really incompiled. |
430 | | The above functions are used for example |
431 | | in error message compiler extra/comp_err.c. |
432 | | If a character set was compiled, this information |
433 | | will get lost and overwritten in add_compiled_collation(). |
434 | | */ |
435 | 0 | newcs->number= cs->number; |
436 | 0 | if (cs->comment) |
437 | 0 | if (!(newcs->comment= my_once_strdup(cs->comment,MYF(MY_WME)))) |
438 | 0 | return MY_XML_ERROR; |
439 | 0 | if (cs->cs_name.str && ! newcs->cs_name.str) |
440 | 0 | { |
441 | 0 | if (!(newcs->cs_name.str= my_once_memdup(cs->cs_name.str, |
442 | 0 | cs->cs_name.length+1, |
443 | 0 | MYF(MY_WME)))) |
444 | 0 | return MY_XML_ERROR; |
445 | 0 | newcs->cs_name.length= cs->cs_name.length; |
446 | 0 | } |
447 | 0 | if (cs->coll_name.str) |
448 | 0 | { |
449 | 0 | if (!(newcs->coll_name.str= my_once_memdup(cs->coll_name.str, |
450 | 0 | cs->coll_name.length+1, |
451 | 0 | MYF(MY_WME)))) |
452 | 0 | return MY_XML_ERROR; |
453 | 0 | newcs->coll_name.length= cs->coll_name.length; |
454 | 0 | } |
455 | 0 | } |
456 | 0 | cs->number= 0; |
457 | 0 | cs->primary_number= 0; |
458 | 0 | cs->binary_number= 0; |
459 | 0 | cs->coll_name.str= 0; |
460 | 0 | cs->coll_name.length= 0; |
461 | 0 | cs->state= 0; |
462 | 0 | cs->sort_order= NULL; |
463 | 0 | cs->tailoring= NULL; |
464 | 0 | } |
465 | 0 | return MY_XML_OK; |
466 | 0 | } |
467 | | |
468 | | |
469 | | /** |
470 | | Report character set initialization errors and warnings. |
471 | | Be silent by default: no warnings on the client side. |
472 | | */ |
473 | | ATTRIBUTE_FORMAT(printf, 2, 3) static void |
474 | | default_reporter(enum loglevel level __attribute__ ((unused)), |
475 | | const char *format __attribute__ ((unused)), |
476 | | ...) |
477 | 0 | { |
478 | 0 | } |
479 | | my_error_reporter my_charset_error_reporter= default_reporter; |
480 | | |
481 | | |
482 | | /** |
483 | | Wrappers for memory functions my_malloc (and friends) |
484 | | with C-compatbile API without extra "myf" argument. |
485 | | */ |
486 | | static void * |
487 | | my_once_alloc_c(size_t size) |
488 | 0 | { return my_once_alloc(size, MYF(MY_WME)); } |
489 | | |
490 | | |
491 | | static void * |
492 | | my_malloc_c(size_t size) |
493 | 0 | { return my_malloc(key_memory_charset_loader, size, MYF(MY_WME)); } |
494 | | |
495 | | |
496 | | static void * |
497 | | my_realloc_c(void *old, size_t size) |
498 | 0 | { return my_realloc(key_memory_charset_loader, old, size, MYF(MY_WME|MY_ALLOW_ZERO_PTR)); } |
499 | | |
500 | | |
501 | | /** |
502 | | Initialize character set loader to use mysys memory management functions. |
503 | | @param loader Loader to initialize |
504 | | */ |
505 | | void |
506 | | my_charset_loader_init_mysys(MY_CHARSET_LOADER *loader) |
507 | 0 | { |
508 | 0 | loader->error[0]= '\0'; |
509 | 0 | loader->once_alloc= my_once_alloc_c; |
510 | 0 | loader->malloc= my_malloc_c; |
511 | 0 | loader->realloc= my_realloc_c; |
512 | 0 | loader->free= my_free; |
513 | 0 | loader->reporter= my_charset_error_reporter; |
514 | 0 | loader->add_collation= add_collation; |
515 | 0 | } |
516 | | |
517 | | |
518 | 0 | #define MY_MAX_ALLOWED_BUF 1024*1024 |
519 | | #define MY_CHARSET_INDEX "Index.xml" |
520 | | |
521 | | const char *charsets_dir= NULL; |
522 | | |
523 | | |
524 | | static my_bool |
525 | | my_read_charset_file(MY_CHARSET_LOADER *loader, |
526 | | const char *filename, |
527 | | myf myflags) |
528 | 0 | { |
529 | 0 | uchar *buf; |
530 | 0 | int fd; |
531 | 0 | size_t len, tmp_len; |
532 | 0 | MY_STAT stat_info; |
533 | | |
534 | 0 | if (!my_stat(filename, &stat_info, MYF(myflags)) || |
535 | 0 | ((len= (uint)stat_info.st_size) > MY_MAX_ALLOWED_BUF) || |
536 | 0 | !(buf= (uchar*) my_malloc(key_memory_charset_loader,len,myflags))) |
537 | 0 | return TRUE; |
538 | | |
539 | 0 | if ((fd= mysql_file_open(key_file_charset, filename, O_RDONLY, myflags)) < 0) |
540 | 0 | goto error; |
541 | 0 | tmp_len= mysql_file_read(fd, buf, len, myflags); |
542 | 0 | mysql_file_close(fd, myflags); |
543 | 0 | if (tmp_len != len) |
544 | 0 | goto error; |
545 | | |
546 | 0 | if (my_parse_charset_xml(loader, (char *) buf, len)) |
547 | 0 | { |
548 | 0 | my_printf_error(EE_UNKNOWN_CHARSET, "Error while parsing '%s': %s\n", |
549 | 0 | MYF(0), filename, loader->error); |
550 | 0 | goto error; |
551 | 0 | } |
552 | | |
553 | 0 | my_free(buf); |
554 | 0 | return FALSE; |
555 | | |
556 | 0 | error: |
557 | 0 | my_free(buf); |
558 | 0 | return TRUE; |
559 | 0 | } |
560 | | |
561 | | |
562 | | char *get_charsets_dir(char *buf) |
563 | 0 | { |
564 | 0 | const char *sharedir= SHAREDIR; |
565 | 0 | char *res; |
566 | 0 | DBUG_ENTER("get_charsets_dir"); |
567 | |
|
568 | 0 | if (charsets_dir != NULL) |
569 | 0 | strmake(buf, charsets_dir, FN_REFLEN-1); |
570 | 0 | else |
571 | 0 | { |
572 | 0 | if (test_if_hard_path(sharedir) || |
573 | 0 | is_prefix(sharedir, DEFAULT_CHARSET_HOME)) |
574 | 0 | strxmov(buf, sharedir, "/", CHARSET_DIR, NullS); |
575 | 0 | else |
576 | 0 | strxmov(buf, DEFAULT_CHARSET_HOME, "/", sharedir, "/", CHARSET_DIR, |
577 | 0 | NullS); |
578 | 0 | } |
579 | 0 | res= convert_dirname(buf,buf,NullS); |
580 | 0 | DBUG_PRINT("info",("charsets dir: '%s'", buf)); |
581 | 0 | DBUG_RETURN(res); |
582 | 0 | } |
583 | | |
584 | | CHARSET_INFO *all_charsets[MY_ALL_CHARSETS_SIZE]={NULL}; |
585 | | CHARSET_INFO *default_charset_info = &my_charset_latin1; |
586 | | |
587 | | |
588 | | /* |
589 | | Add standard character set compiled into the application |
590 | | All related character sets should share same cname |
591 | | */ |
592 | | |
593 | | int add_compiled_collation(struct charset_info_st *cs) |
594 | 0 | { |
595 | 0 | DBUG_ASSERT(cs->number < array_elements(all_charsets)); |
596 | 0 | all_charsets[cs->number]= cs; |
597 | 0 | cs->state|= MY_CS_AVAILABLE; |
598 | 0 | if ((my_hash_insert(&charset_name_hash, (uchar*) cs))) |
599 | 0 | { |
600 | | #ifndef DBUG_OFF |
601 | | CHARSET_INFO *org= (CHARSET_INFO*) my_hash_search(&charset_name_hash, |
602 | | (uchar*) cs->cs_name.str, |
603 | | cs->cs_name.length); |
604 | | DBUG_ASSERT(org); |
605 | | DBUG_ASSERT(org->cs_name.str == cs->cs_name.str); |
606 | | DBUG_ASSERT(org->cs_name.length == strlen(cs->cs_name.str)); |
607 | | #endif |
608 | 0 | } |
609 | 0 | if (cs->coll_name.str) |
610 | 0 | my_hash_insert(&collation_name_hash, (uchar*) cs); |
611 | 0 | return 0; |
612 | 0 | } |
613 | | |
614 | | |
615 | | /* |
616 | | Add optional characters sets from ctype-extra.c |
617 | | |
618 | | If cname is already in use, replace csname in new object with a pointer to |
619 | | the already used csname to ensure that all csname's points to the same string |
620 | | for the same character set. |
621 | | */ |
622 | | |
623 | | |
624 | | void add_compiled_extra_collation(struct charset_info_st *cs) |
625 | 0 | { |
626 | 0 | DBUG_ASSERT(cs->number < array_elements(all_charsets)); |
627 | 0 | all_charsets[cs->number]= cs; |
628 | 0 | cs->state|= MY_CS_AVAILABLE; |
629 | 0 | if ((my_hash_insert(&charset_name_hash, (uchar*) cs))) |
630 | 0 | { |
631 | 0 | CHARSET_INFO *org= (CHARSET_INFO*) my_hash_search(&charset_name_hash, |
632 | 0 | (uchar*) cs->cs_name.str, |
633 | 0 | cs->cs_name.length); |
634 | 0 | cs->cs_name= org->cs_name; |
635 | 0 | } |
636 | 0 | if (cs->coll_name.str) |
637 | 0 | my_hash_insert(&collation_name_hash, (uchar*) cs); |
638 | 0 | } |
639 | | |
640 | | |
641 | | static my_pthread_once_t charsets_initialized= MY_PTHREAD_ONCE_INIT; |
642 | | static my_pthread_once_t charsets_template= MY_PTHREAD_ONCE_INIT; |
643 | | |
644 | | typedef struct |
645 | | { |
646 | | ulonglong use_count; |
647 | | } MY_COLLATION_STATISTICS; |
648 | | |
649 | | |
650 | | static MY_COLLATION_STATISTICS my_collation_statistics[MY_ALL_CHARSETS_SIZE]; |
651 | | |
652 | | |
653 | | my_bool my_collation_is_known_id(uint id) |
654 | 0 | { |
655 | 0 | return id > 0 && id < array_elements(all_charsets) && all_charsets[id] ? |
656 | 0 | TRUE : FALSE; |
657 | 0 | } |
658 | | |
659 | | |
660 | | /* |
661 | | Collation use statistics functions do not lock |
662 | | counters to avoid mutex contention. This can lose |
663 | | some counter increments with high thread concurrency. |
664 | | But this should be Ok, as we don't need exact numbers. |
665 | | */ |
666 | | static inline void my_collation_statistics_inc_use_count(uint id) |
667 | 0 | { |
668 | 0 | DBUG_ASSERT(my_collation_is_known_id(id)); |
669 | 0 | my_collation_statistics[id].use_count++; |
670 | 0 | } |
671 | | |
672 | | |
673 | | ulonglong my_collation_statistics_get_use_count(uint id) |
674 | 0 | { |
675 | 0 | DBUG_ASSERT(my_collation_is_known_id(id)); |
676 | 0 | return my_collation_statistics[id].use_count; |
677 | 0 | } |
678 | | |
679 | | |
680 | | const char *my_collation_get_tailoring(uint id) |
681 | 0 | { |
682 | | /* all_charsets[id]->tailoring is never changed after server startup. */ |
683 | 0 | DBUG_ASSERT(my_collation_is_known_id(id)); |
684 | 0 | return all_charsets[id]->tailoring; |
685 | 0 | } |
686 | | |
687 | | |
688 | | static const uchar *get_charset_key(const void *object, size_t *size, |
689 | | my_bool not_used __attribute__((unused))) |
690 | 0 | { |
691 | 0 | CHARSET_INFO *cs= object; |
692 | 0 | *size= cs->cs_name.length; |
693 | 0 | return (const uchar*) cs->cs_name.str; |
694 | 0 | } |
695 | | |
696 | | static const uchar *get_collation_key(const void *object, size_t *length, |
697 | | my_bool not_used __attribute__((unused))) |
698 | 0 | { |
699 | 0 | CHARSET_INFO *cs= (CHARSET_INFO*) object; |
700 | 0 | *length= cs->coll_name.length; |
701 | 0 | return (const uchar*) cs->coll_name.str; |
702 | 0 | } |
703 | | |
704 | | static void init_available_charsets(void) |
705 | 0 | { |
706 | 0 | char fname[FN_REFLEN + sizeof(MY_CHARSET_INDEX)]; |
707 | 0 | struct charset_info_st **cs; |
708 | 0 | MY_CHARSET_LOADER loader; |
709 | 0 | DBUG_ENTER("init_available_charsets"); |
710 | |
|
711 | 0 | bzero((char*) &all_charsets,sizeof(all_charsets)); |
712 | 0 | bzero((char*) &my_collation_statistics, sizeof(my_collation_statistics)); |
713 | |
|
714 | 0 | my_hash_init2(key_memory_charsets, &charset_name_hash, 16, |
715 | 0 | &my_charset_latin1, 64, 0, 0, get_charset_key, |
716 | 0 | 0, 0, HASH_UNIQUE); |
717 | |
|
718 | 0 | my_hash_init2(key_memory_charsets, &collation_name_hash, 16, |
719 | 0 | &my_charset_latin1, 64, 0, 0, get_collation_key, |
720 | 0 | 0, 0, HASH_UNIQUE); |
721 | |
|
722 | 0 | init_compiled_charsets(MYF(0)); |
723 | | |
724 | | /* Copy compiled charsets */ |
725 | 0 | for (cs= (struct charset_info_st**) all_charsets; |
726 | 0 | cs < (struct charset_info_st**) all_charsets + |
727 | 0 | array_elements(all_charsets)-1 ; |
728 | 0 | cs++) |
729 | 0 | { |
730 | 0 | if (*cs) |
731 | 0 | { |
732 | 0 | DBUG_ASSERT(cs[0]->mbmaxlen <= MY_CS_MBMAXLEN); |
733 | 0 | if (cs[0]->m_ctype && !cs[0]->state_map) |
734 | 0 | if (init_state_maps(*cs)) |
735 | 0 | *cs= NULL; |
736 | 0 | } |
737 | 0 | } |
738 | |
|
739 | 0 | my_charset_loader_init_mysys(&loader); |
740 | 0 | strmov(get_charsets_dir(fname), MY_CHARSET_INDEX); |
741 | 0 | my_read_charset_file(&loader, fname, MYF(0)); |
742 | 0 | DBUG_VOID_RETURN; |
743 | 0 | } |
744 | | |
745 | | |
746 | | void free_charsets(void) |
747 | 0 | { |
748 | 0 | charsets_initialized= charsets_template; |
749 | 0 | my_hash_free(&charset_name_hash); |
750 | 0 | my_hash_free(&collation_name_hash); |
751 | 0 | } |
752 | | |
753 | | |
754 | | static const char* |
755 | | get_collation_name_alias(const char *name, char *buf, size_t bufsize, myf flags) |
756 | 0 | { |
757 | 0 | if (!strncasecmp(name, "utf8_", 5)) |
758 | 0 | { |
759 | 0 | my_snprintf(buf, bufsize, "utf8mb%c_%s", |
760 | 0 | flags & MY_UTF8_IS_UTF8MB3 ? '3' : '4', name + 5); |
761 | 0 | return buf; |
762 | 0 | } |
763 | 0 | return NULL; |
764 | 0 | } |
765 | | |
766 | | |
767 | | uint get_collation_number(const char *name, myf flags) |
768 | 0 | { |
769 | 0 | uint id; |
770 | 0 | char alias[64]; |
771 | 0 | my_pthread_once(&charsets_initialized, init_available_charsets); |
772 | 0 | if ((id= get_collation_number_internal(name))) |
773 | 0 | return id; |
774 | 0 | if ((name= get_collation_name_alias(name, alias, sizeof(alias),flags))) |
775 | 0 | return get_collation_number_internal(name); |
776 | 0 | return 0; |
777 | 0 | } |
778 | | |
779 | | |
780 | | static uint |
781 | | get_charset_number_internal(const char *charset_name, uint cs_flags) |
782 | 0 | { |
783 | 0 | CHARSET_INFO **cs; |
784 | | |
785 | 0 | for (cs= all_charsets; |
786 | 0 | cs < all_charsets + array_elements(all_charsets); |
787 | 0 | cs++) |
788 | 0 | { |
789 | 0 | if ( cs[0] && cs[0]->cs_name.str && (cs[0]->state & cs_flags) && |
790 | 0 | !my_strcasecmp_latin1(cs[0]->cs_name.str, charset_name)) |
791 | 0 | return cs[0]->number; |
792 | 0 | } |
793 | 0 | return 0; |
794 | 0 | } |
795 | | |
796 | | |
797 | | uint get_charset_number(const char *charset_name, uint cs_flags, myf flags) |
798 | 0 | { |
799 | 0 | uint id; |
800 | 0 | const char *new_charset_name= flags & MY_UTF8_IS_UTF8MB3 ? "utf8mb3" : |
801 | 0 | "utf8mb4"; |
802 | 0 | my_pthread_once(&charsets_initialized, init_available_charsets); |
803 | 0 | if ((id= get_charset_number_internal(charset_name, cs_flags))) |
804 | 0 | return id; |
805 | 0 | if ((charset_name= !my_strcasecmp_latin1(charset_name, "utf8") ? |
806 | 0 | new_charset_name : NULL)) |
807 | 0 | return get_charset_number_internal(charset_name, cs_flags); |
808 | 0 | return 0; |
809 | 0 | } |
810 | | |
811 | | |
812 | | const char *get_charset_name(uint charset_number) |
813 | 0 | { |
814 | 0 | my_pthread_once(&charsets_initialized, init_available_charsets); |
815 | |
|
816 | 0 | if (charset_number < array_elements(all_charsets)) |
817 | 0 | { |
818 | 0 | CHARSET_INFO *cs= all_charsets[charset_number]; |
819 | |
|
820 | 0 | if (cs && (cs->number == charset_number) && cs->coll_name.str) |
821 | 0 | return cs->coll_name.str; |
822 | 0 | } |
823 | | |
824 | 0 | return "?"; /* this mimics find_type() */ |
825 | 0 | } |
826 | | |
827 | | |
828 | | static CHARSET_INFO *inheritance_source_by_id(CHARSET_INFO *cs, uint refid) |
829 | 0 | { |
830 | 0 | CHARSET_INFO *refcs; |
831 | 0 | return refid && refid != cs->number && |
832 | 0 | (refcs= all_charsets[refid]) && |
833 | 0 | (refcs->state & MY_CS_AVAILABLE) ? refcs : NULL; |
834 | 0 | } |
835 | | |
836 | | |
837 | | static CHARSET_INFO *find_collation_data_inheritance_source(CHARSET_INFO *cs, myf flags) |
838 | 0 | { |
839 | 0 | const char *beg, *end; |
840 | 0 | if (cs->tailoring && |
841 | 0 | !strncmp(cs->tailoring, "[import ", 8) && |
842 | 0 | (end= strchr(cs->tailoring + 8, ']')) && |
843 | 0 | (beg= cs->tailoring + 8) + MY_CS_COLLATION_NAME_SIZE > end) |
844 | 0 | { |
845 | 0 | char name[MY_CS_COLLATION_NAME_SIZE + 1]; |
846 | 0 | memcpy(name, beg, end - beg); |
847 | 0 | name[end - beg]= '\0'; |
848 | 0 | return inheritance_source_by_id(cs, get_collation_number(name,MYF(flags))); |
849 | 0 | } |
850 | 0 | return NULL; |
851 | 0 | } |
852 | | |
853 | | |
854 | | static CHARSET_INFO *find_charset_data_inheritance_source(CHARSET_INFO *cs) |
855 | 0 | { |
856 | 0 | uint refid= get_charset_number_internal(cs->cs_name.str, MY_CS_PRIMARY); |
857 | 0 | return inheritance_source_by_id(cs, refid); |
858 | 0 | } |
859 | | |
860 | | |
861 | | static CHARSET_INFO * |
862 | | get_internal_charset(MY_CHARSET_LOADER *loader, uint cs_number, myf flags) |
863 | 0 | { |
864 | 0 | char buf[FN_REFLEN]; |
865 | 0 | struct charset_info_st *cs; |
866 | |
|
867 | 0 | DBUG_ASSERT(cs_number < array_elements(all_charsets)); |
868 | |
|
869 | 0 | if ((cs= (struct charset_info_st*) all_charsets[cs_number])) |
870 | 0 | { |
871 | 0 | if (cs->state & MY_CS_READY) /* if CS is already initialized */ |
872 | 0 | { |
873 | 0 | my_collation_statistics_inc_use_count(cs_number); |
874 | 0 | return cs; |
875 | 0 | } |
876 | | |
877 | | /* |
878 | | To make things thread safe we are not allowing other threads to interfere |
879 | | while we may changing the cs_info_table |
880 | | */ |
881 | 0 | mysql_mutex_lock(&THR_LOCK_charset); |
882 | |
|
883 | 0 | if (!(cs->state & (MY_CS_COMPILED|MY_CS_LOADED))) /* if CS is not in memory */ |
884 | 0 | { |
885 | 0 | MY_CHARSET_LOADER loader; |
886 | 0 | strxmov(get_charsets_dir(buf), cs->cs_name.str, ".xml", NullS); |
887 | 0 | my_charset_loader_init_mysys(&loader); |
888 | 0 | my_read_charset_file(&loader, buf, flags); |
889 | 0 | } |
890 | |
|
891 | 0 | if (cs->state & MY_CS_AVAILABLE) |
892 | 0 | { |
893 | 0 | if (!(cs->state & MY_CS_READY)) |
894 | 0 | { |
895 | 0 | if (!simple_8bit_charset_data_is_full(cs)) |
896 | 0 | { |
897 | 0 | CHARSET_INFO *refcs= find_charset_data_inheritance_source(cs); |
898 | 0 | if (refcs) |
899 | 0 | inherit_charset_data(cs, refcs); |
900 | 0 | } |
901 | 0 | if (!simple_8bit_collation_data_is_full(cs)) |
902 | 0 | { |
903 | 0 | CHARSET_INFO *refcl= find_collation_data_inheritance_source(cs, flags); |
904 | 0 | if (refcl) |
905 | 0 | inherit_collation_data(cs, refcl); |
906 | 0 | } |
907 | |
|
908 | 0 | if (my_ci_init_charset(cs, loader) || |
909 | 0 | my_ci_init_collation(cs, loader)) |
910 | 0 | { |
911 | 0 | cs= NULL; |
912 | 0 | } |
913 | 0 | else |
914 | 0 | cs->state|= MY_CS_READY; |
915 | 0 | } |
916 | 0 | my_collation_statistics_inc_use_count(cs_number); |
917 | 0 | } |
918 | 0 | else |
919 | 0 | cs= NULL; |
920 | |
|
921 | 0 | mysql_mutex_unlock(&THR_LOCK_charset); |
922 | 0 | } |
923 | 0 | return cs; |
924 | 0 | } |
925 | | |
926 | | |
927 | | CHARSET_INFO *get_charset(uint cs_number, myf flags) |
928 | 0 | { |
929 | 0 | CHARSET_INFO *cs= NULL; |
930 | |
|
931 | 0 | if (cs_number == default_charset_info->number) |
932 | 0 | return default_charset_info; |
933 | | |
934 | 0 | my_pthread_once(&charsets_initialized, init_available_charsets); |
935 | |
|
936 | 0 | if (cs_number < array_elements(all_charsets)) |
937 | 0 | { |
938 | 0 | MY_CHARSET_LOADER loader; |
939 | 0 | my_charset_loader_init_mysys(&loader); |
940 | 0 | cs= get_internal_charset(&loader, cs_number, flags); |
941 | 0 | } |
942 | |
|
943 | 0 | if (!cs && (flags & MY_WME)) |
944 | 0 | { |
945 | 0 | char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)], cs_string[23]; |
946 | 0 | strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX); |
947 | 0 | cs_string[0]='#'; |
948 | 0 | int10_to_str(cs_number, cs_string+1, 10); |
949 | 0 | my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_string, index_file); |
950 | 0 | } |
951 | 0 | return cs; |
952 | 0 | } |
953 | | |
954 | | |
955 | | /** |
956 | | Find collation by name: extended version of get_charset_by_name() |
957 | | to return error messages to the caller. |
958 | | @param loader Character set loader |
959 | | @param name Collation name |
960 | | @param flags Flags |
961 | | @return NULL on error, pointer to collation on success |
962 | | */ |
963 | | |
964 | | CHARSET_INFO * |
965 | | my_collation_get_by_name(MY_CHARSET_LOADER *loader, |
966 | | const char *name, myf flags) |
967 | 0 | { |
968 | 0 | uint cs_number; |
969 | 0 | CHARSET_INFO *cs; |
970 | 0 | my_pthread_once(&charsets_initialized, init_available_charsets); |
971 | |
|
972 | 0 | cs_number= get_collation_number(name,flags); |
973 | 0 | my_charset_loader_init_mysys(loader); |
974 | 0 | cs= cs_number ? get_internal_charset(loader, cs_number, flags) : NULL; |
975 | |
|
976 | 0 | if (!cs && (flags & MY_WME)) |
977 | 0 | { |
978 | 0 | char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)]; |
979 | 0 | strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX); |
980 | 0 | my_error(EE_UNKNOWN_COLLATION, MYF(ME_BELL), name, index_file); |
981 | 0 | } |
982 | 0 | return cs; |
983 | 0 | } |
984 | | |
985 | | |
986 | | CHARSET_INFO *get_charset_by_name(const char *cs_name, myf flags) |
987 | 0 | { |
988 | 0 | MY_CHARSET_LOADER loader; |
989 | 0 | my_charset_loader_init_mysys(&loader); |
990 | 0 | return my_collation_get_by_name(&loader, cs_name, flags); |
991 | 0 | } |
992 | | |
993 | | |
994 | | /** |
995 | | Find character set by name: extended version of get_charset_by_csname() |
996 | | to return error messages to the caller. |
997 | | @param loader Character set loader |
998 | | @param name Collation name |
999 | | @param cs_flags Character set flags (e.g. default or binary collation) |
1000 | | @param flags Flags |
1001 | | @return NULL on error, pointer to collation on success |
1002 | | */ |
1003 | | CHARSET_INFO * |
1004 | | my_charset_get_by_name(MY_CHARSET_LOADER *loader, |
1005 | | const char *cs_name, uint cs_flags, myf flags) |
1006 | 0 | { |
1007 | 0 | uint cs_number; |
1008 | 0 | CHARSET_INFO *cs; |
1009 | 0 | DBUG_ENTER("get_charset_by_csname"); |
1010 | 0 | DBUG_PRINT("enter",("name: '%s'", cs_name)); |
1011 | |
|
1012 | 0 | my_pthread_once(&charsets_initialized, init_available_charsets); |
1013 | |
|
1014 | 0 | cs_number= get_charset_number(cs_name, cs_flags, flags); |
1015 | 0 | cs= cs_number ? get_internal_charset(loader, cs_number, flags) : NULL; |
1016 | |
|
1017 | 0 | if (!cs && (flags & MY_WME)) |
1018 | 0 | { |
1019 | 0 | char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)]; |
1020 | 0 | strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX); |
1021 | 0 | my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_name, index_file); |
1022 | 0 | } |
1023 | |
|
1024 | 0 | DBUG_RETURN(cs); |
1025 | 0 | } |
1026 | | |
1027 | | |
1028 | | CHARSET_INFO * |
1029 | | get_charset_by_csname(const char *cs_name, uint cs_flags, myf flags) |
1030 | 0 | { |
1031 | 0 | MY_CHARSET_LOADER loader; |
1032 | 0 | my_charset_loader_init_mysys(&loader); |
1033 | 0 | return my_charset_get_by_name(&loader, cs_name, cs_flags, flags); |
1034 | 0 | } |
1035 | | |
1036 | | |
1037 | | /** |
1038 | | Resolve character set by the character set name (utf8, latin1, ...). |
1039 | | |
1040 | | The function tries to resolve character set by the specified name. If |
1041 | | there is character set with the given name, it is assigned to the "cs" |
1042 | | parameter and FALSE is returned. If there is no such character set, |
1043 | | "default_cs" is assigned to the "cs" and TRUE is returned. |
1044 | | |
1045 | | @param[in] cs_name Character set name. |
1046 | | @param[in] default_cs Default character set. |
1047 | | @param[out] cs Variable to store character set. |
1048 | | |
1049 | | @return FALSE if character set was resolved successfully; TRUE if there |
1050 | | is no character set with given name. |
1051 | | */ |
1052 | | |
1053 | | my_bool resolve_charset(const char *cs_name, |
1054 | | CHARSET_INFO *default_cs, |
1055 | | CHARSET_INFO **cs, |
1056 | | myf flags) |
1057 | 0 | { |
1058 | 0 | *cs= get_charset_by_csname(cs_name, MY_CS_PRIMARY, flags); |
1059 | |
|
1060 | 0 | if (*cs == NULL) |
1061 | 0 | { |
1062 | 0 | *cs= default_cs; |
1063 | 0 | return TRUE; |
1064 | 0 | } |
1065 | | |
1066 | 0 | return FALSE; |
1067 | 0 | } |
1068 | | |
1069 | | |
1070 | | /** |
1071 | | Resolve collation by the collation name (utf8_general_ci, ...). |
1072 | | |
1073 | | The function tries to resolve collation by the specified name. If there |
1074 | | is collation with the given name, it is assigned to the "cl" parameter |
1075 | | and FALSE is returned. If there is no such collation, "default_cl" is |
1076 | | assigned to the "cl" and TRUE is returned. |
1077 | | |
1078 | | @param[out] cl Variable to store collation. |
1079 | | @param[in] cl_name Collation name. |
1080 | | @param[in] default_cl Default collation. |
1081 | | |
1082 | | @return FALSE if collation was resolved successfully; TRUE if there is no |
1083 | | collation with given name. |
1084 | | */ |
1085 | | |
1086 | | my_bool resolve_collation(const char *cl_name, |
1087 | | CHARSET_INFO *default_cl, |
1088 | | CHARSET_INFO **cl, |
1089 | | myf my_flags) |
1090 | 0 | { |
1091 | 0 | *cl= get_charset_by_name(cl_name, my_flags); |
1092 | |
|
1093 | 0 | if (*cl == NULL) |
1094 | 0 | { |
1095 | 0 | *cl= default_cl; |
1096 | 0 | return TRUE; |
1097 | 0 | } |
1098 | | |
1099 | 0 | return FALSE; |
1100 | 0 | } |
1101 | | |
1102 | | |
1103 | | /* |
1104 | | Escape string with backslashes (\) |
1105 | | |
1106 | | SYNOPSIS |
1107 | | escape_string_for_mysql() |
1108 | | charset_info Charset of the strings |
1109 | | to Buffer for escaped string |
1110 | | to_length Length of destination buffer, or 0 |
1111 | | from The string to escape |
1112 | | length The length of the string to escape |
1113 | | overflow Set to 1 if the escaped string did not fit in |
1114 | | the to buffer |
1115 | | |
1116 | | DESCRIPTION |
1117 | | This escapes the contents of a string by adding backslashes before special |
1118 | | characters, and turning others into specific escape sequences, such as |
1119 | | turning newlines into \n and null bytes into \0. |
1120 | | |
1121 | | NOTE |
1122 | | To maintain compatibility with the old C API, to_length may be 0 to mean |
1123 | | "big enough" |
1124 | | |
1125 | | RETURN VALUES |
1126 | | # The length of the escaped string |
1127 | | */ |
1128 | | |
1129 | | size_t escape_string_for_mysql(CHARSET_INFO *charset_info, |
1130 | | char *to, size_t to_length, |
1131 | | const char *from, size_t length, |
1132 | | my_bool *overflow) |
1133 | 0 | { |
1134 | 0 | const char *to_start= to; |
1135 | 0 | const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length); |
1136 | 0 | *overflow= FALSE; |
1137 | 0 | for (end= from + length; from < end; from++) |
1138 | 0 | { |
1139 | 0 | char escape= 0; |
1140 | 0 | #ifdef USE_MB |
1141 | 0 | int tmp_length= my_ci_charlen(charset_info, (const uchar *) from, (const uchar *) end); |
1142 | 0 | if (tmp_length > 1) |
1143 | 0 | { |
1144 | 0 | if (to + tmp_length > to_end) |
1145 | 0 | { |
1146 | 0 | *overflow= TRUE; |
1147 | 0 | break; |
1148 | 0 | } |
1149 | 0 | while (tmp_length--) |
1150 | 0 | *to++= *from++; |
1151 | 0 | from--; |
1152 | 0 | continue; |
1153 | 0 | } |
1154 | | /* |
1155 | | If the next character appears to begin a multi-byte character, we |
1156 | | escape that first byte of that apparent multi-byte character. (The |
1157 | | character just looks like a multi-byte character -- if it were actually |
1158 | | a multi-byte character, it would have been passed through in the test |
1159 | | above.) |
1160 | | |
1161 | | Without this check, we can create a problem by converting an invalid |
1162 | | multi-byte character into a valid one. For example, 0xbf27 is not |
1163 | | a valid GBK character, but 0xbf5c is. (0x27 = ', 0x5c = \) |
1164 | | */ |
1165 | 0 | if (tmp_length < 1) /* Bad byte sequence */ |
1166 | 0 | escape= *from; |
1167 | 0 | else |
1168 | 0 | #endif |
1169 | 0 | switch (*from) { |
1170 | 0 | case 0: /* Must be escaped for 'mysql' */ |
1171 | 0 | escape= '0'; |
1172 | 0 | break; |
1173 | 0 | case '\n': /* Must be escaped for logs */ |
1174 | 0 | escape= 'n'; |
1175 | 0 | break; |
1176 | 0 | case '\r': |
1177 | 0 | escape= 'r'; |
1178 | 0 | break; |
1179 | 0 | case '\\': |
1180 | 0 | escape= '\\'; |
1181 | 0 | break; |
1182 | 0 | case '\'': |
1183 | 0 | escape= '\''; |
1184 | 0 | break; |
1185 | 0 | case '"': /* Better safe than sorry */ |
1186 | 0 | escape= '"'; |
1187 | 0 | break; |
1188 | 0 | case '\032': /* This gives problems on Win32 */ |
1189 | 0 | escape= 'Z'; |
1190 | 0 | break; |
1191 | 0 | } |
1192 | 0 | if (escape) |
1193 | 0 | { |
1194 | 0 | if (to + 2 > to_end) |
1195 | 0 | { |
1196 | 0 | *overflow= TRUE; |
1197 | 0 | break; |
1198 | 0 | } |
1199 | 0 | *to++= '\\'; |
1200 | 0 | *to++= escape; |
1201 | 0 | } |
1202 | 0 | else |
1203 | 0 | { |
1204 | 0 | if (to + 1 > to_end) |
1205 | 0 | { |
1206 | 0 | *overflow= TRUE; |
1207 | 0 | break; |
1208 | 0 | } |
1209 | 0 | *to++= *from; |
1210 | 0 | } |
1211 | 0 | } |
1212 | 0 | *to= 0; |
1213 | 0 | return (size_t) (to - to_start); |
1214 | 0 | } |
1215 | | |
1216 | | |
1217 | | #ifdef BACKSLASH_MBTAIL |
1218 | | CHARSET_INFO *fs_character_set() |
1219 | | { |
1220 | | static CHARSET_INFO *fs_cset_cache; |
1221 | | if (fs_cset_cache) |
1222 | | return fs_cset_cache; |
1223 | | #ifdef HAVE_CHARSET_cp932 |
1224 | | else if (GetACP() == 932) |
1225 | | return fs_cset_cache= &my_charset_cp932_japanese_ci; |
1226 | | #endif |
1227 | | else |
1228 | | return fs_cset_cache= &my_charset_bin; |
1229 | | } |
1230 | | #endif |
1231 | | |
1232 | | /* |
1233 | | Escape apostrophes by doubling them up |
1234 | | |
1235 | | SYNOPSIS |
1236 | | escape_quotes_for_mysql() |
1237 | | charset_info Charset of the strings |
1238 | | to Buffer for escaped string |
1239 | | to_length Length of destination buffer, or 0 |
1240 | | from The string to escape |
1241 | | length The length of the string to escape |
1242 | | overflow Set to 1 if the buffer overflows |
1243 | | |
1244 | | DESCRIPTION |
1245 | | This escapes the contents of a string by doubling up any apostrophes that |
1246 | | it contains. This is used when the NO_BACKSLASH_ESCAPES SQL_MODE is in |
1247 | | effect on the server. |
1248 | | |
1249 | | NOTE |
1250 | | To be consistent with escape_string_for_mysql(), to_length may be 0 to |
1251 | | mean "big enough" |
1252 | | |
1253 | | RETURN VALUES |
1254 | | The length of the escaped string |
1255 | | */ |
1256 | | |
1257 | | size_t escape_quotes_for_mysql(CHARSET_INFO *charset_info, |
1258 | | char *to, size_t to_length, |
1259 | | const char *from, size_t length, |
1260 | | my_bool *overflow) |
1261 | 0 | { |
1262 | 0 | const char *to_start= to; |
1263 | 0 | const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length); |
1264 | 0 | #ifdef USE_MB |
1265 | 0 | my_bool use_mb_flag= my_ci_use_mb(charset_info); |
1266 | 0 | #endif |
1267 | 0 | *overflow= FALSE; |
1268 | 0 | for (end= from + length; from < end; from++) |
1269 | 0 | { |
1270 | 0 | #ifdef USE_MB |
1271 | 0 | int tmp_length; |
1272 | 0 | if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end))) |
1273 | 0 | { |
1274 | 0 | if (to + tmp_length > to_end) |
1275 | 0 | { |
1276 | 0 | *overflow= TRUE; |
1277 | 0 | break; |
1278 | 0 | } |
1279 | 0 | while (tmp_length--) |
1280 | 0 | *to++= *from++; |
1281 | 0 | from--; |
1282 | 0 | continue; |
1283 | 0 | } |
1284 | | /* |
1285 | | We don't have the same issue here with a non-multi-byte character being |
1286 | | turned into a multi-byte character by the addition of an escaping |
1287 | | character, because we are only escaping the ' character with itself. |
1288 | | */ |
1289 | 0 | #endif |
1290 | 0 | if (*from == '\'') |
1291 | 0 | { |
1292 | 0 | if (to + 2 > to_end) |
1293 | 0 | { |
1294 | 0 | *overflow= TRUE; |
1295 | 0 | break; |
1296 | 0 | } |
1297 | 0 | *to++= '\''; |
1298 | 0 | *to++= '\''; |
1299 | 0 | } |
1300 | 0 | else |
1301 | 0 | { |
1302 | 0 | if (to + 1 > to_end) |
1303 | 0 | { |
1304 | 0 | *overflow= TRUE; |
1305 | 0 | break; |
1306 | 0 | } |
1307 | 0 | *to++= *from; |
1308 | 0 | } |
1309 | 0 | } |
1310 | 0 | *to= 0; |
1311 | 0 | return (size_t) (to - to_start); |
1312 | 0 | } |
1313 | | |
1314 | | |
1315 | | typedef enum my_cs_match_type_enum |
1316 | | { |
1317 | | /* MySQL and OS charsets are fully compatible */ |
1318 | | my_cs_exact, |
1319 | | /* MySQL charset is very close to OS charset */ |
1320 | | my_cs_approx, |
1321 | | /* |
1322 | | MySQL knows this charset, but it is not supported as client character set. |
1323 | | */ |
1324 | | my_cs_unsupp |
1325 | | } my_cs_match_type; |
1326 | | |
1327 | | |
1328 | | typedef struct str2str_st |
1329 | | { |
1330 | | const char* os_name; |
1331 | | const char* my_name; |
1332 | | my_cs_match_type param; |
1333 | | } MY_CSET_OS_NAME; |
1334 | | |
1335 | | static const MY_CSET_OS_NAME charsets[] = |
1336 | | { |
1337 | | #ifdef _WIN32 |
1338 | | {"cp437", "cp850", my_cs_approx}, |
1339 | | {"cp850", "cp850", my_cs_exact}, |
1340 | | {"cp852", "cp852", my_cs_exact}, |
1341 | | {"cp858", "cp850", my_cs_approx}, |
1342 | | {"cp866", "cp866", my_cs_exact}, |
1343 | | {"cp874", "tis620", my_cs_approx}, |
1344 | | {"cp932", "cp932", my_cs_exact}, |
1345 | | {"cp936", "gbk", my_cs_approx}, |
1346 | | {"cp949", "euckr", my_cs_approx}, |
1347 | | {"cp950", "big5", my_cs_exact}, |
1348 | | {"cp1200", "utf16le", my_cs_unsupp}, |
1349 | | {"cp1201", "utf16", my_cs_unsupp}, |
1350 | | {"cp1250", "cp1250", my_cs_exact}, |
1351 | | {"cp1251", "cp1251", my_cs_exact}, |
1352 | | {"cp1252", "latin1", my_cs_exact}, |
1353 | | {"cp1253", "greek", my_cs_exact}, |
1354 | | {"cp1254", "latin5", my_cs_exact}, |
1355 | | {"cp1255", "hebrew", my_cs_approx}, |
1356 | | {"cp1256", "cp1256", my_cs_exact}, |
1357 | | {"cp1257", "cp1257", my_cs_exact}, |
1358 | | {"cp10000", "macroman", my_cs_exact}, |
1359 | | {"cp10001", "sjis", my_cs_approx}, |
1360 | | {"cp10002", "big5", my_cs_approx}, |
1361 | | {"cp10008", "gb2312", my_cs_approx}, |
1362 | | {"cp10021", "tis620", my_cs_approx}, |
1363 | | {"cp10029", "macce", my_cs_exact}, |
1364 | | {"cp12001", "utf32", my_cs_unsupp}, |
1365 | | {"cp20107", "swe7", my_cs_exact}, |
1366 | | {"cp20127", "latin1", my_cs_approx}, |
1367 | | {"cp20866", "koi8r", my_cs_exact}, |
1368 | | {"cp20932", "ujis", my_cs_exact}, |
1369 | | {"cp20936", "gb2312", my_cs_approx}, |
1370 | | {"cp20949", "euckr", my_cs_approx}, |
1371 | | {"cp21866", "koi8u", my_cs_exact}, |
1372 | | {"cp28591", "latin1", my_cs_approx}, |
1373 | | {"cp28592", "latin2", my_cs_exact}, |
1374 | | {"cp28597", "greek", my_cs_exact}, |
1375 | | {"cp28598", "hebrew", my_cs_exact}, |
1376 | | {"cp28599", "latin5", my_cs_exact}, |
1377 | | {"cp28603", "latin7", my_cs_exact}, |
1378 | | #ifdef UNCOMMENT_THIS_WHEN_WL_4579_IS_DONE |
1379 | | {"cp28605", "latin9", my_cs_exact}, |
1380 | | #endif |
1381 | | {"cp38598", "hebrew", my_cs_exact}, |
1382 | | {"cp51932", "ujis", my_cs_exact}, |
1383 | | {"cp51936", "gb2312", my_cs_exact}, |
1384 | | {"cp51949", "euckr", my_cs_exact}, |
1385 | | {"cp51950", "big5", my_cs_exact}, |
1386 | | #ifdef UNCOMMENT_THIS_WHEN_WL_WL_4024_IS_DONE |
1387 | | {"cp54936", "gb18030", my_cs_exact}, |
1388 | | #endif |
1389 | | {"cp65001", "utf8mb4", my_cs_exact}, |
1390 | | {"cp65001", "utf8mb3", my_cs_approx}, |
1391 | | #else /* not Windows */ |
1392 | | |
1393 | | {"646", "latin1", my_cs_approx}, /* Default on Solaris */ |
1394 | | {"ANSI_X3.4-1968", "latin1", my_cs_approx}, |
1395 | | {"ansi1251", "cp1251", my_cs_exact}, |
1396 | | {"armscii8", "armscii8", my_cs_exact}, |
1397 | | {"armscii-8", "armscii8", my_cs_exact}, |
1398 | | {"ASCII", "latin1", my_cs_approx}, |
1399 | | {"Big5", "big5", my_cs_exact}, |
1400 | | {"cp1251", "cp1251", my_cs_exact}, |
1401 | | {"cp1255", "hebrew", my_cs_approx}, |
1402 | | {"CP866", "cp866", my_cs_exact}, |
1403 | | {"eucCN", "gb2312", my_cs_exact}, |
1404 | | {"euc-CN", "gb2312", my_cs_exact}, |
1405 | | {"eucJP", "ujis", my_cs_exact}, |
1406 | | {"euc-JP", "ujis", my_cs_exact}, |
1407 | | {"eucKR", "euckr", my_cs_exact}, |
1408 | | {"euc-KR", "euckr", my_cs_exact}, |
1409 | | #ifdef UNCOMMENT_THIS_WHEN_WL_WL_4024_IS_DONE |
1410 | | {"gb18030", "gb18030", my_cs_exact}, |
1411 | | #endif |
1412 | | {"gb2312", "gb2312", my_cs_exact}, |
1413 | | {"gbk", "gbk", my_cs_exact}, |
1414 | | {"georgianps", "geostd8", my_cs_exact}, |
1415 | | {"georgian-ps", "geostd8", my_cs_exact}, |
1416 | | {"IBM-1252", "cp1252", my_cs_exact}, |
1417 | | |
1418 | | {"iso88591", "latin1", my_cs_approx}, |
1419 | | {"ISO_8859-1", "latin1", my_cs_approx}, |
1420 | | {"ISO8859-1", "latin1", my_cs_approx}, |
1421 | | {"ISO-8859-1", "latin1", my_cs_approx}, |
1422 | | |
1423 | | {"iso885913", "latin7", my_cs_exact}, |
1424 | | {"ISO_8859-13", "latin7", my_cs_exact}, |
1425 | | {"ISO8859-13", "latin7", my_cs_exact}, |
1426 | | {"ISO-8859-13", "latin7", my_cs_exact}, |
1427 | | |
1428 | | #ifdef UNCOMMENT_THIS_WHEN_WL_4579_IS_DONE |
1429 | | {"iso885915", "latin9", my_cs_exact}, |
1430 | | {"ISO_8859-15", "latin9", my_cs_exact}, |
1431 | | {"ISO8859-15", "latin9", my_cs_exact}, |
1432 | | {"ISO-8859-15", "latin9", my_cs_exact}, |
1433 | | #endif |
1434 | | |
1435 | | {"iso88592", "latin2", my_cs_exact}, |
1436 | | {"ISO_8859-2", "latin2", my_cs_exact}, |
1437 | | {"ISO8859-2", "latin2", my_cs_exact}, |
1438 | | {"ISO-8859-2", "latin2", my_cs_exact}, |
1439 | | |
1440 | | {"iso88597", "greek", my_cs_exact}, |
1441 | | {"ISO_8859-7", "greek", my_cs_exact}, |
1442 | | {"ISO8859-7", "greek", my_cs_exact}, |
1443 | | {"ISO-8859-7", "greek", my_cs_exact}, |
1444 | | |
1445 | | {"iso88598", "hebrew", my_cs_exact}, |
1446 | | {"ISO_8859-8", "hebrew", my_cs_exact}, |
1447 | | {"ISO8859-8", "hebrew", my_cs_exact}, |
1448 | | {"ISO-8859-8", "hebrew", my_cs_exact}, |
1449 | | |
1450 | | {"iso88599", "latin5", my_cs_exact}, |
1451 | | {"ISO_8859-9", "latin5", my_cs_exact}, |
1452 | | {"ISO8859-9", "latin5", my_cs_exact}, |
1453 | | {"ISO-8859-9", "latin5", my_cs_exact}, |
1454 | | |
1455 | | {"koi8r", "koi8r", my_cs_exact}, |
1456 | | {"KOI8-R", "koi8r", my_cs_exact}, |
1457 | | {"koi8u", "koi8u", my_cs_exact}, |
1458 | | {"KOI8-U", "koi8u", my_cs_exact}, |
1459 | | |
1460 | | {"roman8", "hp8", my_cs_exact}, /* Default on HP UX */ |
1461 | | |
1462 | | {"Shift_JIS", "sjis", my_cs_exact}, |
1463 | | {"SJIS", "sjis", my_cs_exact}, |
1464 | | {"shiftjisx0213", "sjis", my_cs_exact}, |
1465 | | |
1466 | | {"tis620", "tis620", my_cs_exact}, |
1467 | | {"tis-620", "tis620", my_cs_exact}, |
1468 | | |
1469 | | {"ujis", "ujis", my_cs_exact}, |
1470 | | |
1471 | | {"US-ASCII", "latin1", my_cs_approx}, |
1472 | | |
1473 | | {"utf8", "utf8mb4", my_cs_exact}, |
1474 | | {"utf-8", "utf8mb4", my_cs_exact}, |
1475 | | #endif |
1476 | | {NULL, NULL, 0} |
1477 | | }; |
1478 | | |
1479 | | |
1480 | | static const char* |
1481 | | my_os_charset_to_mysql_charset(const char* csname) |
1482 | 0 | { |
1483 | 0 | const MY_CSET_OS_NAME* csp; |
1484 | 0 | for (csp = charsets; csp->os_name; csp++) |
1485 | 0 | { |
1486 | 0 | if (!strcasecmp(csp->os_name, csname)) |
1487 | 0 | { |
1488 | 0 | switch (csp->param) |
1489 | 0 | { |
1490 | 0 | case my_cs_exact: |
1491 | 0 | return csp->my_name; |
1492 | | |
1493 | 0 | case my_cs_approx: |
1494 | | /* |
1495 | | Maybe we should print a warning eventually: |
1496 | | character set correspondence is not exact. |
1497 | | */ |
1498 | 0 | return csp->my_name; |
1499 | | |
1500 | 0 | default: |
1501 | 0 | return NULL; |
1502 | 0 | } |
1503 | 0 | } |
1504 | 0 | } |
1505 | 0 | return NULL; |
1506 | 0 | } |
1507 | | |
1508 | | const char* my_default_csname() |
1509 | 0 | { |
1510 | 0 | const char* csname = NULL; |
1511 | | #ifdef _WIN32 |
1512 | | char cpbuf[64]; |
1513 | | UINT cp; |
1514 | | if (GetACP() == CP_UTF8) |
1515 | | cp= CP_UTF8; |
1516 | | else |
1517 | | { |
1518 | | cp= GetConsoleCP(); |
1519 | | if (cp == 0) |
1520 | | cp= GetACP(); |
1521 | | } |
1522 | | snprintf(cpbuf, sizeof(cpbuf), "cp%d", (int)cp); |
1523 | | csname = my_os_charset_to_mysql_charset(cpbuf); |
1524 | | #elif defined(HAVE_SETLOCALE) && defined(HAVE_NL_LANGINFO) |
1525 | 0 | if (setlocale(LC_CTYPE, "") && (csname = nl_langinfo(CODESET))) |
1526 | 0 | csname = my_os_charset_to_mysql_charset(csname); |
1527 | 0 | #endif |
1528 | 0 | return csname ? csname : MYSQL_DEFAULT_CHARSET_NAME; |
1529 | 0 | } |
1530 | | |
1531 | | |
1532 | | #ifdef _WIN32 |
1533 | | /** |
1534 | | Extract codepage number from "cpNNNN" string, |
1535 | | and check that this codepage is supported. |
1536 | | |
1537 | | @return 0 - invalid codepage(or unsupported) |
1538 | | > 0 - valid codepage number. |
1539 | | */ |
1540 | | static UINT get_codepage(const char *s) |
1541 | | { |
1542 | | UINT cp; |
1543 | | if (s[0] != 'c' || s[1] != 'p') |
1544 | | { |
1545 | | DBUG_ASSERT(0); |
1546 | | return 0; |
1547 | | } |
1548 | | cp= strtoul(s + 2, NULL, 10); |
1549 | | if (!IsValidCodePage(cp)) |
1550 | | { |
1551 | | /* |
1552 | | Can happen also with documented CP, i.e 51936 |
1553 | | Perhaps differs from one machine to another. |
1554 | | */ |
1555 | | return 0; |
1556 | | } |
1557 | | return cp; |
1558 | | } |
1559 | | |
1560 | | static UINT mysql_charset_to_codepage(const char *my_cs_name) |
1561 | | { |
1562 | | const MY_CSET_OS_NAME *csp; |
1563 | | UINT cp=0,tmp; |
1564 | | for (csp= charsets; csp->os_name; csp++) |
1565 | | { |
1566 | | if (!strcasecmp(csp->my_name, my_cs_name)) |
1567 | | { |
1568 | | switch (csp->param) |
1569 | | { |
1570 | | case my_cs_exact: |
1571 | | tmp= get_codepage(csp->os_name); |
1572 | | if (tmp) |
1573 | | return tmp; |
1574 | | break; |
1575 | | case my_cs_approx: |
1576 | | /* |
1577 | | don't return just yet, perhaps there is a better |
1578 | | (exact) match later. |
1579 | | */ |
1580 | | if (!cp) |
1581 | | cp= get_codepage(csp->os_name); |
1582 | | continue; |
1583 | | |
1584 | | default: |
1585 | | return 0; |
1586 | | } |
1587 | | } |
1588 | | } |
1589 | | return cp; |
1590 | | } |
1591 | | |
1592 | | /** Set console codepage for MariaDB's charset name */ |
1593 | | int my_set_console_cp(const char *csname) |
1594 | | { |
1595 | | UINT cp; |
1596 | | if (fileno(stdout) < 0 || !isatty(fileno(stdout))) |
1597 | | return 0; |
1598 | | cp= mysql_charset_to_codepage(csname); |
1599 | | if (!cp) |
1600 | | { |
1601 | | /* No compatible os charset.*/ |
1602 | | return -1; |
1603 | | } |
1604 | | |
1605 | | if (GetConsoleOutputCP() != cp && !SetConsoleOutputCP(cp)) |
1606 | | { |
1607 | | return -1; |
1608 | | } |
1609 | | |
1610 | | if (GetConsoleCP() != cp && !SetConsoleCP(cp)) |
1611 | | { |
1612 | | return -1; |
1613 | | } |
1614 | | return 0; |
1615 | | } |
1616 | | #endif |