/src/server/mysys/charset.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | Copyright (c) 2000, 2011, Oracle and/or its affiliates |
3 | | Copyright (c) 2009, 2020, MariaDB Corporation. |
4 | | |
5 | | This program is free software; you can redistribute it and/or modify |
6 | | it under the terms of the GNU General Public License as published by |
7 | | the Free Software Foundation; version 2 of the License. |
8 | | |
9 | | This program is distributed in the hope that it will be useful, |
10 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | | GNU General Public License for more details. |
13 | | |
14 | | You should have received a copy of the GNU General Public License |
15 | | along with this program; if not, write to the Free Software |
16 | | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ |
17 | | |
18 | | #include "mysys_priv.h" |
19 | | #include "mysys_err.h" |
20 | | #include <m_ctype.h> |
21 | | #include <m_string.h> |
22 | | #include <my_dir.h> |
23 | | #include <hash.h> |
24 | | #include <my_xml.h> |
25 | | #ifdef HAVE_LANGINFO_H |
26 | | #include <langinfo.h> |
27 | | #endif |
28 | | #ifdef HAVE_LOCALE_H |
29 | | #include <locale.h> |
30 | | #endif |
31 | | |
32 | | extern HASH charset_name_hash; |
33 | | |
34 | | /* |
35 | | The code below implements this functionality: |
36 | | |
37 | | - Initializing charset related structures |
38 | | - Loading dynamic charsets |
39 | | - Searching for a proper CHARSET_INFO |
40 | | using charset name, collation name or collation ID |
41 | | - Setting server default character set |
42 | | */ |
43 | | |
44 | | static uint |
45 | | get_collation_number_internal(const char *name) |
46 | 0 | { |
47 | |
|
48 | 0 | CHARSET_INFO **cs; |
49 | 0 | for (cs= all_charsets; |
50 | 0 | cs < all_charsets + array_elements(all_charsets); |
51 | 0 | cs++) |
52 | 0 | { |
53 | 0 | if (cs[0] && cs[0]->coll_name.str && |
54 | 0 | !my_strcasecmp_latin1(cs[0]->coll_name.str, name)) |
55 | 0 | return cs[0]->number; |
56 | 0 | } |
57 | 0 | return 0; |
58 | 0 | } |
59 | | |
60 | | |
61 | | static my_bool is_multi_byte_ident(CHARSET_INFO *cs, uchar ch) |
62 | 0 | { |
63 | 0 | int chlen= my_ci_charlen(cs, &ch, &ch + 1); |
64 | 0 | return MY_CS_IS_TOOSMALL(chlen) ? TRUE : FALSE; |
65 | 0 | } |
66 | | |
67 | | static my_bool init_state_maps(struct charset_info_st *cs) |
68 | 0 | { |
69 | 0 | uint i; |
70 | 0 | uchar *state_map; |
71 | 0 | uchar *ident_map; |
72 | |
|
73 | 0 | if (!(cs->state_map= state_map= (uchar*) my_once_alloc(256*2, MYF(MY_WME)))) |
74 | 0 | return 1; |
75 | | |
76 | 0 | cs->ident_map= ident_map= state_map + 256; |
77 | | |
78 | | /* Fill state_map with states to get a faster parser */ |
79 | 0 | for (i=0; i < 256 ; i++) |
80 | 0 | { |
81 | 0 | if (my_isalpha(cs,i)) |
82 | 0 | state_map[i]=(uchar) MY_LEX_IDENT; |
83 | 0 | else if (my_isdigit(cs,i)) |
84 | 0 | state_map[i]=(uchar) MY_LEX_NUMBER_IDENT; |
85 | 0 | else if (is_multi_byte_ident(cs, i)) |
86 | 0 | state_map[i]=(uchar) MY_LEX_IDENT; |
87 | 0 | else if (my_isspace(cs,i)) |
88 | 0 | state_map[i]=(uchar) MY_LEX_SKIP; |
89 | 0 | else |
90 | 0 | state_map[i]=(uchar) MY_LEX_CHAR; |
91 | 0 | } |
92 | 0 | state_map[(uchar)'_']=state_map[(uchar)'$']=(uchar) MY_LEX_IDENT; |
93 | 0 | state_map[(uchar)'\'']=(uchar) MY_LEX_STRING; |
94 | 0 | state_map[(uchar)'.']=(uchar) MY_LEX_REAL_OR_POINT; |
95 | 0 | state_map[(uchar)'>']=state_map[(uchar)'=']=state_map[(uchar)'!']= (uchar) MY_LEX_CMP_OP; |
96 | 0 | state_map[(uchar)'<']= (uchar) MY_LEX_LONG_CMP_OP; |
97 | 0 | state_map[(uchar)'&']=state_map[(uchar)'|']=(uchar) MY_LEX_BOOL; |
98 | 0 | state_map[(uchar)'#']=(uchar) MY_LEX_COMMENT; |
99 | 0 | state_map[(uchar)';']=(uchar) MY_LEX_SEMICOLON; |
100 | 0 | state_map[(uchar)':']=(uchar) MY_LEX_SET_VAR; |
101 | 0 | state_map[0]=(uchar) MY_LEX_EOL; |
102 | 0 | state_map[(uchar)'\\']= (uchar) MY_LEX_ESCAPE; |
103 | 0 | state_map[(uchar)'/']= (uchar) MY_LEX_LONG_COMMENT; |
104 | 0 | state_map[(uchar)'*']= (uchar) MY_LEX_END_LONG_COMMENT; |
105 | 0 | state_map[(uchar)'@']= (uchar) MY_LEX_USER_END; |
106 | 0 | state_map[(uchar) '`']= (uchar) MY_LEX_USER_VARIABLE_DELIMITER; |
107 | 0 | state_map[(uchar)'"']= (uchar) MY_LEX_STRING_OR_DELIMITER; |
108 | 0 | state_map[(uchar)'-']= (uchar) MY_LEX_MINUS_OR_COMMENT; |
109 | 0 | state_map[(uchar)',']= (uchar) MY_LEX_COMMA; |
110 | 0 | state_map[(uchar)'?']= (uchar) MY_LEX_PLACEHOLDER; |
111 | | |
112 | | /* |
113 | | Create a second map to make it faster to find identifiers |
114 | | */ |
115 | 0 | for (i=0; i < 256 ; i++) |
116 | 0 | { |
117 | 0 | ident_map[i]= (uchar) (state_map[i] == MY_LEX_IDENT || |
118 | 0 | state_map[i] == MY_LEX_NUMBER_IDENT); |
119 | 0 | } |
120 | | |
121 | | /* Special handling of hex and binary strings */ |
122 | 0 | state_map[(uchar)'x']= state_map[(uchar)'X']= (uchar) MY_LEX_IDENT_OR_HEX; |
123 | 0 | state_map[(uchar)'b']= state_map[(uchar)'B']= (uchar) MY_LEX_IDENT_OR_BIN; |
124 | 0 | state_map[(uchar)'n']= state_map[(uchar)'N']= (uchar) MY_LEX_IDENT_OR_NCHAR; |
125 | 0 | return 0; |
126 | 0 | } |
127 | | |
128 | | |
129 | | static MY_COLLATION_HANDLER *get_simple_collation_handler_by_flags(uint flags) |
130 | 0 | { |
131 | 0 | return flags & MY_CS_BINSORT ? |
132 | 0 | (flags & MY_CS_NOPAD ? |
133 | 0 | &my_collation_8bit_nopad_bin_handler : |
134 | 0 | &my_collation_8bit_bin_handler) : |
135 | 0 | (flags & MY_CS_NOPAD ? |
136 | 0 | &my_collation_8bit_simple_nopad_ci_handler : |
137 | 0 | &my_collation_8bit_simple_ci_handler); |
138 | 0 | } |
139 | | |
140 | | |
141 | | static void simple_cs_init_functions(struct charset_info_st *cs) |
142 | 0 | { |
143 | 0 | cs->coll= get_simple_collation_handler_by_flags(cs->state); |
144 | 0 | cs->cset= &my_charset_8bit_handler; |
145 | 0 | } |
146 | | |
147 | | |
148 | | |
149 | | static int cs_copy_data(struct charset_info_st *to, CHARSET_INFO *from) |
150 | 0 | { |
151 | 0 | to->number= from->number ? from->number : to->number; |
152 | | |
153 | | /* Don't replace csname if already set */ |
154 | 0 | if (from->cs_name.str && !to->cs_name.str) |
155 | 0 | { |
156 | 0 | if (!(to->cs_name.str= my_once_memdup(from->cs_name.str, |
157 | 0 | from->cs_name.length + 1, |
158 | 0 | MYF(MY_WME)))) |
159 | 0 | goto err; |
160 | 0 | to->cs_name.length= from->cs_name.length; |
161 | 0 | } |
162 | | |
163 | 0 | if (from->coll_name.str) |
164 | 0 | { |
165 | 0 | if (!(to->coll_name.str= my_once_memdup(from->coll_name.str, |
166 | 0 | from->coll_name.length + 1, |
167 | 0 | MYF(MY_WME)))) |
168 | 0 | goto err; |
169 | 0 | to->coll_name.length= from->coll_name.length; |
170 | 0 | } |
171 | | |
172 | 0 | if (from->comment) |
173 | 0 | if (!(to->comment= my_once_strdup(from->comment,MYF(MY_WME)))) |
174 | 0 | goto err; |
175 | | |
176 | 0 | if (from->m_ctype) |
177 | 0 | { |
178 | 0 | if (!(to->m_ctype= (uchar*) my_once_memdup((char*) from->m_ctype, |
179 | 0 | MY_CS_CTYPE_TABLE_SIZE, |
180 | 0 | MYF(MY_WME)))) |
181 | 0 | goto err; |
182 | 0 | if (init_state_maps(to)) |
183 | 0 | goto err; |
184 | 0 | } |
185 | 0 | if (from->to_lower) |
186 | 0 | if (!(to->to_lower= (uchar*) my_once_memdup((char*) from->to_lower, |
187 | 0 | MY_CS_TO_LOWER_TABLE_SIZE, |
188 | 0 | MYF(MY_WME)))) |
189 | 0 | goto err; |
190 | | |
191 | 0 | if (from->to_upper) |
192 | 0 | if (!(to->to_upper= (uchar*) my_once_memdup((char*) from->to_upper, |
193 | 0 | MY_CS_TO_UPPER_TABLE_SIZE, |
194 | 0 | MYF(MY_WME)))) |
195 | 0 | goto err; |
196 | 0 | if (from->sort_order) |
197 | 0 | { |
198 | 0 | if (!(to->sort_order= (uchar*) my_once_memdup((char*) from->sort_order, |
199 | 0 | MY_CS_SORT_ORDER_TABLE_SIZE, |
200 | 0 | MYF(MY_WME)))) |
201 | 0 | goto err; |
202 | |
|
203 | 0 | } |
204 | 0 | if (from->tab_to_uni) |
205 | 0 | { |
206 | 0 | uint sz= MY_CS_TO_UNI_TABLE_SIZE*sizeof(uint16); |
207 | 0 | if (!(to->tab_to_uni= (uint16*) my_once_memdup((char*)from->tab_to_uni, |
208 | 0 | sz, MYF(MY_WME)))) |
209 | 0 | goto err; |
210 | 0 | } |
211 | 0 | if (from->tailoring) |
212 | 0 | if (!(to->tailoring= my_once_strdup(from->tailoring,MYF(MY_WME)))) |
213 | 0 | goto err; |
214 | | |
215 | 0 | return 0; |
216 | | |
217 | 0 | err: |
218 | 0 | return 1; |
219 | 0 | } |
220 | | |
221 | | |
222 | | static my_bool simple_8bit_charset_data_is_full(CHARSET_INFO *cs) |
223 | 0 | { |
224 | 0 | return cs->m_ctype && cs->to_upper && cs->to_lower && cs->tab_to_uni; |
225 | 0 | } |
226 | | |
227 | | |
228 | | /** |
229 | | Inherit missing 8bit charset data from another collation. |
230 | | Arrays pointed by refcs must be in the permanent memory already, |
231 | | e.g. static memory, or allocated by my_once_xxx(). |
232 | | */ |
233 | | static void |
234 | | inherit_charset_data(struct charset_info_st *cs, CHARSET_INFO *refcs) |
235 | 0 | { |
236 | 0 | if (!cs->to_upper) |
237 | 0 | cs->to_upper= refcs->to_upper; |
238 | 0 | if (!cs->to_lower) |
239 | 0 | cs->to_lower= refcs->to_lower; |
240 | 0 | if (!cs->m_ctype) |
241 | 0 | cs->m_ctype= refcs->m_ctype; |
242 | 0 | if (!cs->tab_to_uni) |
243 | 0 | cs->tab_to_uni= refcs->tab_to_uni; |
244 | 0 | } |
245 | | |
246 | | |
247 | | static my_bool simple_8bit_collation_data_is_full(CHARSET_INFO *cs) |
248 | 0 | { |
249 | 0 | return cs->sort_order || (cs->state & MY_CS_BINSORT); |
250 | 0 | } |
251 | | |
252 | | |
253 | | /** |
254 | | Inherit 8bit simple collation data from another collation. |
255 | | refcs->sort_order must be in the permanent memory already, |
256 | | e.g. static memory, or allocated by my_once_xxx(). |
257 | | */ |
258 | | static void |
259 | | inherit_collation_data(struct charset_info_st *cs, CHARSET_INFO *refcs) |
260 | 0 | { |
261 | 0 | if (!simple_8bit_collation_data_is_full(cs)) |
262 | 0 | cs->sort_order= refcs->sort_order; |
263 | 0 | } |
264 | | |
265 | | |
266 | | static my_bool simple_cs_is_full(CHARSET_INFO *cs) |
267 | 0 | { |
268 | 0 | return cs->number && cs->cs_name.str && cs->coll_name.str && |
269 | 0 | simple_8bit_charset_data_is_full(cs) && |
270 | 0 | (simple_8bit_collation_data_is_full(cs) || cs->tailoring); |
271 | 0 | } |
272 | | |
273 | | |
274 | | #if defined(HAVE_UCA_COLLATIONS) && (defined(HAVE_CHARSET_ucs2) || defined(HAVE_CHARSET_utf8mb3)) |
275 | | /** |
276 | | Initialize a loaded collation. |
277 | | @param [OUT] to - The new charset_info_st structure to initialize. |
278 | | @param [IN] from - A template collation, to fill the missing data from. |
279 | | @param [IN] loaded - The collation data loaded from the LDML file. |
280 | | some data may be missing in "loaded". |
281 | | */ |
282 | | static void |
283 | | copy_uca_collation(struct charset_info_st *to, CHARSET_INFO *from, |
284 | | CHARSET_INFO *loaded) |
285 | 0 | { |
286 | 0 | to->cset= from->cset; |
287 | 0 | to->coll= from->coll; |
288 | | /* |
289 | | Single-level UCA collation have strnxfrm_multiple=8. |
290 | | In case of a multi-level UCA collation we use strnxfrm_multiply=4. |
291 | | That means MY_COLLATION_HANDLER::strnfrmlen() will request the caller |
292 | | to allocate a buffer smaller size for each level, for performance purpose, |
293 | | and to fit longer VARCHARs to @@max_sort_length. |
294 | | This makes filesort produce non-precise order for some rare Unicode |
295 | | characters that produce more than 4 weights (long expansions). |
296 | | UCA requires 2 bytes per weight multiplied by the number of levels. |
297 | | In case of a 2-level collation, each character requires 4*2=8 bytes. |
298 | | Therefore, the longest VARCHAR that fits into the default @@max_sort_length |
299 | | is 1024/8=VARCHAR(128). With strnxfrm_multiply==8, only VARCHAR(64) |
300 | | would fit. |
301 | | Note, the built-in collation utf8_thai_520_w2 also uses strnxfrm_multiply=4, |
302 | | for the same purpose. |
303 | | TODO: we could add a new LDML syntax to choose strxfrm_multiply value. |
304 | | */ |
305 | 0 | to->strxfrm_multiply= loaded->levels_for_order > 1 ? |
306 | 0 | 4 : from->strxfrm_multiply; |
307 | 0 | to->min_sort_char= from->min_sort_char; |
308 | 0 | to->max_sort_char= from->max_sort_char; |
309 | 0 | to->mbminlen= from->mbminlen; |
310 | 0 | to->mbmaxlen= from->mbmaxlen; |
311 | 0 | to->state|= MY_CS_AVAILABLE | MY_CS_LOADED | |
312 | 0 | MY_CS_STRNXFRM | MY_CS_UNICODE; |
313 | 0 | } |
314 | | #endif |
315 | | |
316 | | |
317 | | static int add_collation(struct charset_info_st *cs) |
318 | 0 | { |
319 | 0 | if (cs->coll_name.str && |
320 | 0 | (cs->number || |
321 | 0 | (cs->number=get_collation_number_internal(cs->coll_name.str))) && |
322 | 0 | cs->number < array_elements(all_charsets)) |
323 | 0 | { |
324 | 0 | struct charset_info_st *newcs; |
325 | 0 | if (!(newcs= (struct charset_info_st*) all_charsets[cs->number])) |
326 | 0 | { |
327 | 0 | if (!(all_charsets[cs->number]= newcs= |
328 | 0 | (struct charset_info_st*) my_once_alloc(sizeof(CHARSET_INFO),MYF(0)))) |
329 | 0 | return MY_XML_ERROR; |
330 | 0 | bzero(newcs,sizeof(CHARSET_INFO)); |
331 | 0 | } |
332 | 0 | else |
333 | 0 | { |
334 | | /* Don't allow change of csname */ |
335 | 0 | if (newcs->cs_name.str && strcmp(newcs->cs_name.str, cs->cs_name.str)) |
336 | 0 | { |
337 | 0 | my_error(EE_DUPLICATE_CHARSET, MYF(ME_WARNING), |
338 | 0 | cs->number, cs->cs_name.str, newcs->cs_name.str); |
339 | | /* |
340 | | Continue parsing rest of Index.xml. We got an warning in the log |
341 | | so the user can fix the wrong character set definition. |
342 | | */ |
343 | 0 | return MY_XML_OK; |
344 | 0 | } |
345 | 0 | } |
346 | | |
347 | 0 | if (cs->primary_number == cs->number) |
348 | 0 | cs->state |= MY_CS_PRIMARY; |
349 | | |
350 | 0 | if (cs->binary_number == cs->number) |
351 | 0 | cs->state |= MY_CS_BINSORT; |
352 | | |
353 | 0 | newcs->state|= cs->state; |
354 | | |
355 | 0 | if (!(newcs->state & MY_CS_COMPILED)) |
356 | 0 | { |
357 | 0 | if (cs_copy_data(newcs,cs)) |
358 | 0 | return MY_XML_ERROR; |
359 | | |
360 | 0 | newcs->levels_for_order= 1; |
361 | | |
362 | 0 | if (!strcmp(cs->cs_name.str,"ucs2") ) |
363 | 0 | { |
364 | 0 | #if defined(HAVE_CHARSET_ucs2) && defined(HAVE_UCA_COLLATIONS) |
365 | 0 | copy_uca_collation(newcs, newcs->state & MY_CS_NOPAD ? |
366 | 0 | &my_charset_ucs2_unicode_nopad_ci : |
367 | 0 | &my_charset_ucs2_unicode_ci, |
368 | 0 | cs); |
369 | 0 | newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII; |
370 | 0 | #endif |
371 | 0 | } |
372 | 0 | else if (!strcmp(cs->cs_name.str, "utf8") || |
373 | 0 | !strcmp(cs->cs_name.str, "utf8mb3")) |
374 | 0 | { |
375 | 0 | #if defined (HAVE_CHARSET_utf8mb3) && defined(HAVE_UCA_COLLATIONS) |
376 | 0 | copy_uca_collation(newcs, newcs->state & MY_CS_NOPAD ? |
377 | 0 | &my_charset_utf8mb3_unicode_nopad_ci : |
378 | 0 | &my_charset_utf8mb3_unicode_ci, |
379 | 0 | cs); |
380 | 0 | newcs->m_ctype= my_charset_utf8mb3_unicode_ci.m_ctype; |
381 | 0 | if (init_state_maps(newcs)) |
382 | 0 | return MY_XML_ERROR; |
383 | 0 | #endif |
384 | 0 | } |
385 | 0 | else if (!strcmp(cs->cs_name.str, "utf8mb4")) |
386 | 0 | { |
387 | 0 | #if defined (HAVE_CHARSET_utf8mb4) && defined(HAVE_UCA_COLLATIONS) |
388 | 0 | copy_uca_collation(newcs, newcs->state & MY_CS_NOPAD ? |
389 | 0 | &my_charset_utf8mb4_unicode_nopad_ci : |
390 | 0 | &my_charset_utf8mb4_unicode_ci, |
391 | 0 | cs); |
392 | 0 | newcs->m_ctype= my_charset_utf8mb4_unicode_ci.m_ctype; |
393 | 0 | if (init_state_maps(newcs)) |
394 | 0 | return MY_XML_ERROR; |
395 | 0 | newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED; |
396 | 0 | #endif |
397 | 0 | } |
398 | 0 | else if (!strcmp(cs->cs_name.str, "utf16")) |
399 | 0 | { |
400 | 0 | #if defined (HAVE_CHARSET_utf16) && defined(HAVE_UCA_COLLATIONS) |
401 | 0 | copy_uca_collation(newcs, newcs->state & MY_CS_NOPAD ? |
402 | 0 | &my_charset_utf16_unicode_nopad_ci : |
403 | 0 | &my_charset_utf16_unicode_ci, |
404 | 0 | cs); |
405 | 0 | newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII; |
406 | 0 | #endif |
407 | 0 | } |
408 | 0 | else if (!strcmp(cs->cs_name.str, "utf32")) |
409 | 0 | { |
410 | 0 | #if defined (HAVE_CHARSET_utf32) && defined(HAVE_UCA_COLLATIONS) |
411 | 0 | copy_uca_collation(newcs, newcs->state & MY_CS_NOPAD ? |
412 | 0 | &my_charset_utf32_unicode_nopad_ci : |
413 | 0 | &my_charset_utf32_unicode_ci, |
414 | 0 | cs); |
415 | 0 | newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII; |
416 | 0 | #endif |
417 | 0 | } |
418 | 0 | else |
419 | 0 | { |
420 | 0 | simple_cs_init_functions(newcs); |
421 | 0 | newcs->mbminlen= 1; |
422 | 0 | newcs->mbmaxlen= 1; |
423 | 0 | newcs->strxfrm_multiply= 1; |
424 | 0 | if (simple_cs_is_full(newcs)) |
425 | 0 | { |
426 | 0 | newcs->state |= MY_CS_LOADED; |
427 | 0 | } |
428 | 0 | } |
429 | 0 | add_compiled_extra_collation(newcs); |
430 | 0 | } |
431 | 0 | else |
432 | 0 | { |
433 | | /* |
434 | | We need the below to make get_charset_name() |
435 | | and get_charset_number() working even if a |
436 | | character set has not been really incompiled. |
437 | | The above functions are used for example |
438 | | in error message compiler extra/comp_err.c. |
439 | | If a character set was compiled, this information |
440 | | will get lost and overwritten in add_compiled_collation(). |
441 | | */ |
442 | 0 | newcs->number= cs->number; |
443 | 0 | if (cs->comment) |
444 | 0 | if (!(newcs->comment= my_once_strdup(cs->comment,MYF(MY_WME)))) |
445 | 0 | return MY_XML_ERROR; |
446 | 0 | if (cs->cs_name.str && ! newcs->cs_name.str) |
447 | 0 | { |
448 | 0 | if (!(newcs->cs_name.str= my_once_memdup(cs->cs_name.str, |
449 | 0 | cs->cs_name.length+1, |
450 | 0 | MYF(MY_WME)))) |
451 | 0 | return MY_XML_ERROR; |
452 | 0 | newcs->cs_name.length= cs->cs_name.length; |
453 | 0 | } |
454 | 0 | if (cs->coll_name.str) |
455 | 0 | { |
456 | 0 | if (!(newcs->coll_name.str= my_once_memdup(cs->coll_name.str, |
457 | 0 | cs->coll_name.length+1, |
458 | 0 | MYF(MY_WME)))) |
459 | 0 | return MY_XML_ERROR; |
460 | 0 | newcs->coll_name.length= cs->coll_name.length; |
461 | 0 | } |
462 | 0 | } |
463 | 0 | cs->number= 0; |
464 | 0 | cs->primary_number= 0; |
465 | 0 | cs->binary_number= 0; |
466 | 0 | cs->coll_name.str= 0; |
467 | 0 | cs->coll_name.length= 0; |
468 | 0 | cs->state= 0; |
469 | 0 | cs->sort_order= NULL; |
470 | 0 | cs->tailoring= NULL; |
471 | 0 | } |
472 | 0 | return MY_XML_OK; |
473 | 0 | } |
474 | | |
475 | | |
476 | | /** |
477 | | Report character set initialization errors and warnings. |
478 | | Be silent by default: no warnings on the client side. |
479 | | */ |
480 | | static void |
481 | | default_reporter(enum loglevel level __attribute__ ((unused)), |
482 | | const char *format __attribute__ ((unused)), |
483 | | ...) |
484 | 0 | { |
485 | 0 | } |
486 | | my_error_reporter my_charset_error_reporter= default_reporter; |
487 | | |
488 | | |
489 | | /** |
490 | | Wrappers for memory functions my_malloc (and friends) |
491 | | with C-compatbile API without extra "myf" argument. |
492 | | */ |
493 | | static void * |
494 | | my_once_alloc_c(size_t size) |
495 | 0 | { return my_once_alloc(size, MYF(MY_WME)); } |
496 | | |
497 | | |
498 | | static void * |
499 | | my_malloc_c(size_t size) |
500 | 0 | { return my_malloc(key_memory_charset_loader, size, MYF(MY_WME)); } |
501 | | |
502 | | |
503 | | static void * |
504 | | my_realloc_c(void *old, size_t size) |
505 | 0 | { return my_realloc(key_memory_charset_loader, old, size, MYF(MY_WME|MY_ALLOW_ZERO_PTR)); } |
506 | | |
507 | | |
508 | | /** |
509 | | Initialize character set loader to use mysys memory management functions. |
510 | | @param loader Loader to initialize |
511 | | */ |
512 | | void |
513 | | my_charset_loader_init_mysys(MY_CHARSET_LOADER *loader) |
514 | 0 | { |
515 | 0 | loader->error[0]= '\0'; |
516 | 0 | loader->once_alloc= my_once_alloc_c; |
517 | 0 | loader->malloc= my_malloc_c; |
518 | 0 | loader->realloc= my_realloc_c; |
519 | 0 | loader->free= my_free; |
520 | 0 | loader->reporter= my_charset_error_reporter; |
521 | 0 | loader->add_collation= add_collation; |
522 | 0 | } |
523 | | |
524 | | |
525 | 0 | #define MY_MAX_ALLOWED_BUF 1024*1024 |
526 | | #define MY_CHARSET_INDEX "Index.xml" |
527 | | |
528 | | const char *charsets_dir= NULL; |
529 | | |
530 | | |
531 | | static my_bool |
532 | | my_read_charset_file(MY_CHARSET_LOADER *loader, |
533 | | const char *filename, |
534 | | myf myflags) |
535 | 0 | { |
536 | 0 | uchar *buf; |
537 | 0 | int fd; |
538 | 0 | size_t len, tmp_len; |
539 | 0 | MY_STAT stat_info; |
540 | | |
541 | 0 | if (!my_stat(filename, &stat_info, MYF(myflags)) || |
542 | 0 | ((len= (uint)stat_info.st_size) > MY_MAX_ALLOWED_BUF) || |
543 | 0 | !(buf= (uchar*) my_malloc(key_memory_charset_loader,len,myflags))) |
544 | 0 | return TRUE; |
545 | | |
546 | 0 | if ((fd= mysql_file_open(key_file_charset, filename, O_RDONLY, myflags)) < 0) |
547 | 0 | goto error; |
548 | 0 | tmp_len= mysql_file_read(fd, buf, len, myflags); |
549 | 0 | mysql_file_close(fd, myflags); |
550 | 0 | if (tmp_len != len) |
551 | 0 | goto error; |
552 | | |
553 | 0 | if (my_parse_charset_xml(loader, (char *) buf, len)) |
554 | 0 | { |
555 | 0 | my_printf_error(EE_UNKNOWN_CHARSET, "Error while parsing '%s': %s\n", |
556 | 0 | MYF(0), filename, loader->error); |
557 | 0 | goto error; |
558 | 0 | } |
559 | | |
560 | 0 | my_free(buf); |
561 | 0 | return FALSE; |
562 | | |
563 | 0 | error: |
564 | 0 | my_free(buf); |
565 | 0 | return TRUE; |
566 | 0 | } |
567 | | |
568 | | |
569 | | char *get_charsets_dir(char *buf) |
570 | 0 | { |
571 | 0 | const char *sharedir= SHAREDIR; |
572 | 0 | char *res; |
573 | 0 | DBUG_ENTER("get_charsets_dir"); |
574 | |
|
575 | 0 | if (charsets_dir != NULL) |
576 | 0 | strmake(buf, charsets_dir, FN_REFLEN-1); |
577 | 0 | else |
578 | 0 | { |
579 | 0 | if (test_if_hard_path(sharedir) || |
580 | 0 | is_prefix(sharedir, DEFAULT_CHARSET_HOME)) |
581 | 0 | strxmov(buf, sharedir, "/", CHARSET_DIR, NullS); |
582 | 0 | else |
583 | 0 | strxmov(buf, DEFAULT_CHARSET_HOME, "/", sharedir, "/", CHARSET_DIR, |
584 | 0 | NullS); |
585 | 0 | } |
586 | 0 | res= convert_dirname(buf,buf,NullS); |
587 | 0 | DBUG_PRINT("info",("charsets dir: '%s'", buf)); |
588 | 0 | DBUG_RETURN(res); |
589 | 0 | } |
590 | | |
591 | | CHARSET_INFO *all_charsets[MY_ALL_CHARSETS_SIZE]={NULL}; |
592 | | CHARSET_INFO *default_charset_info = &my_charset_latin1; |
593 | | |
594 | | |
595 | | /* |
596 | | Add standard character set compiled into the application |
597 | | All related character sets should share same cname |
598 | | */ |
599 | | |
600 | | void add_compiled_collation(struct charset_info_st *cs) |
601 | 0 | { |
602 | 0 | DBUG_ASSERT(cs->number < array_elements(all_charsets)); |
603 | 0 | all_charsets[cs->number]= cs; |
604 | 0 | cs->state|= MY_CS_AVAILABLE; |
605 | 0 | if ((my_hash_insert(&charset_name_hash, (uchar*) cs))) |
606 | 0 | { |
607 | | #ifndef DBUG_OFF |
608 | | CHARSET_INFO *org= (CHARSET_INFO*) my_hash_search(&charset_name_hash, |
609 | | (uchar*) cs->cs_name.str, |
610 | | cs->cs_name.length); |
611 | | DBUG_ASSERT(org); |
612 | | DBUG_ASSERT(org->cs_name.str == cs->cs_name.str); |
613 | | DBUG_ASSERT(org->cs_name.length == strlen(cs->cs_name.str)); |
614 | | #endif |
615 | 0 | } |
616 | 0 | } |
617 | | |
618 | | |
619 | | /* |
620 | | Add optional characters sets from ctype-extra.c |
621 | | |
622 | | If cname is already in use, replace csname in new object with a pointer to |
623 | | the already used csname to ensure that all csname's points to the same string |
624 | | for the same character set. |
625 | | */ |
626 | | |
627 | | |
628 | | void add_compiled_extra_collation(struct charset_info_st *cs) |
629 | 0 | { |
630 | 0 | DBUG_ASSERT(cs->number < array_elements(all_charsets)); |
631 | 0 | all_charsets[cs->number]= cs; |
632 | 0 | cs->state|= MY_CS_AVAILABLE; |
633 | 0 | if ((my_hash_insert(&charset_name_hash, (uchar*) cs))) |
634 | 0 | { |
635 | 0 | CHARSET_INFO *org= (CHARSET_INFO*) my_hash_search(&charset_name_hash, |
636 | 0 | (uchar*) cs->cs_name.str, |
637 | 0 | cs->cs_name.length); |
638 | 0 | cs->cs_name= org->cs_name; |
639 | 0 | } |
640 | 0 | } |
641 | | |
642 | | |
643 | | |
644 | | static my_pthread_once_t charsets_initialized= MY_PTHREAD_ONCE_INIT; |
645 | | static my_pthread_once_t charsets_template= MY_PTHREAD_ONCE_INIT; |
646 | | |
647 | | typedef struct |
648 | | { |
649 | | ulonglong use_count; |
650 | | } MY_COLLATION_STATISTICS; |
651 | | |
652 | | |
653 | | static MY_COLLATION_STATISTICS my_collation_statistics[MY_ALL_CHARSETS_SIZE]; |
654 | | |
655 | | |
656 | | my_bool my_collation_is_known_id(uint id) |
657 | 0 | { |
658 | 0 | return id > 0 && id < array_elements(all_charsets) && all_charsets[id] ? |
659 | 0 | TRUE : FALSE; |
660 | 0 | } |
661 | | |
662 | | |
663 | | /* |
664 | | Collation use statistics functions do not lock |
665 | | counters to avoid mutex contention. This can lose |
666 | | some counter increments with high thread concurrency. |
667 | | But this should be Ok, as we don't need exact numbers. |
668 | | */ |
669 | | static inline void my_collation_statistics_inc_use_count(uint id) |
670 | 0 | { |
671 | 0 | DBUG_ASSERT(my_collation_is_known_id(id)); |
672 | 0 | my_collation_statistics[id].use_count++; |
673 | 0 | } |
674 | | |
675 | | |
676 | | ulonglong my_collation_statistics_get_use_count(uint id) |
677 | 0 | { |
678 | 0 | DBUG_ASSERT(my_collation_is_known_id(id)); |
679 | 0 | return my_collation_statistics[id].use_count; |
680 | 0 | } |
681 | | |
682 | | |
683 | | const char *my_collation_get_tailoring(uint id) |
684 | 0 | { |
685 | | /* all_charsets[id]->tailoring is never changed after server startup. */ |
686 | 0 | DBUG_ASSERT(my_collation_is_known_id(id)); |
687 | 0 | return all_charsets[id]->tailoring; |
688 | 0 | } |
689 | | |
690 | | |
691 | | HASH charset_name_hash; |
692 | | |
693 | | static uchar *get_charset_key(const uchar *object, |
694 | | size_t *size, |
695 | | my_bool not_used __attribute__((unused))) |
696 | 0 | { |
697 | 0 | CHARSET_INFO *cs= (CHARSET_INFO*) object; |
698 | 0 | *size= cs->cs_name.length; |
699 | 0 | return (uchar*) cs->cs_name.str; |
700 | 0 | } |
701 | | |
702 | | static void init_available_charsets(void) |
703 | 0 | { |
704 | 0 | char fname[FN_REFLEN + sizeof(MY_CHARSET_INDEX)]; |
705 | 0 | struct charset_info_st **cs; |
706 | 0 | MY_CHARSET_LOADER loader; |
707 | 0 | DBUG_ENTER("init_available_charsets"); |
708 | |
|
709 | 0 | bzero((char*) &all_charsets,sizeof(all_charsets)); |
710 | 0 | bzero((char*) &my_collation_statistics, sizeof(my_collation_statistics)); |
711 | |
|
712 | 0 | my_hash_init2(key_memory_charsets, &charset_name_hash, 16, |
713 | 0 | &my_charset_latin1, 64, 0, 0, get_charset_key, |
714 | 0 | 0, 0, HASH_UNIQUE); |
715 | |
|
716 | 0 | init_compiled_charsets(MYF(0)); |
717 | | |
718 | | /* Copy compiled charsets */ |
719 | 0 | for (cs= (struct charset_info_st**) all_charsets; |
720 | 0 | cs < (struct charset_info_st**) all_charsets + |
721 | 0 | array_elements(all_charsets)-1 ; |
722 | 0 | cs++) |
723 | 0 | { |
724 | 0 | if (*cs) |
725 | 0 | { |
726 | 0 | DBUG_ASSERT(cs[0]->mbmaxlen <= MY_CS_MBMAXLEN); |
727 | 0 | if (cs[0]->m_ctype) |
728 | 0 | if (init_state_maps(*cs)) |
729 | 0 | *cs= NULL; |
730 | 0 | } |
731 | 0 | } |
732 | |
|
733 | 0 | my_charset_loader_init_mysys(&loader); |
734 | 0 | strmov(get_charsets_dir(fname), MY_CHARSET_INDEX); |
735 | 0 | my_read_charset_file(&loader, fname, MYF(0)); |
736 | 0 | DBUG_VOID_RETURN; |
737 | 0 | } |
738 | | |
739 | | |
740 | | void free_charsets(void) |
741 | 0 | { |
742 | 0 | charsets_initialized= charsets_template; |
743 | 0 | my_hash_free(&charset_name_hash); |
744 | 0 | } |
745 | | |
746 | | |
747 | | static const char* |
748 | | get_collation_name_alias(const char *name, char *buf, size_t bufsize, myf flags) |
749 | 0 | { |
750 | 0 | if (!strncasecmp(name, "utf8_", 5)) |
751 | 0 | { |
752 | 0 | my_snprintf(buf, bufsize, "utf8mb%c_%s", |
753 | 0 | flags & MY_UTF8_IS_UTF8MB3 ? '3' : '4', name + 5); |
754 | 0 | return buf; |
755 | 0 | } |
756 | 0 | return NULL; |
757 | 0 | } |
758 | | |
759 | | |
760 | | uint get_collation_number(const char *name, myf flags) |
761 | 0 | { |
762 | 0 | uint id; |
763 | 0 | char alias[64]; |
764 | 0 | my_pthread_once(&charsets_initialized, init_available_charsets); |
765 | 0 | if ((id= get_collation_number_internal(name))) |
766 | 0 | return id; |
767 | 0 | if ((name= get_collation_name_alias(name, alias, sizeof(alias),flags))) |
768 | 0 | return get_collation_number_internal(name); |
769 | 0 | return 0; |
770 | 0 | } |
771 | | |
772 | | |
773 | | static uint |
774 | | get_charset_number_internal(const char *charset_name, uint cs_flags) |
775 | 0 | { |
776 | 0 | CHARSET_INFO **cs; |
777 | | |
778 | 0 | for (cs= all_charsets; |
779 | 0 | cs < all_charsets + array_elements(all_charsets); |
780 | 0 | cs++) |
781 | 0 | { |
782 | 0 | if ( cs[0] && cs[0]->cs_name.str && (cs[0]->state & cs_flags) && |
783 | 0 | !my_strcasecmp_latin1(cs[0]->cs_name.str, charset_name)) |
784 | 0 | return cs[0]->number; |
785 | 0 | } |
786 | 0 | return 0; |
787 | 0 | } |
788 | | |
789 | | |
790 | | uint get_charset_number(const char *charset_name, uint cs_flags, myf flags) |
791 | 0 | { |
792 | 0 | uint id; |
793 | 0 | const char *new_charset_name= flags & MY_UTF8_IS_UTF8MB3 ? "utf8mb3" : |
794 | 0 | "utf8mb4"; |
795 | 0 | my_pthread_once(&charsets_initialized, init_available_charsets); |
796 | 0 | if ((id= get_charset_number_internal(charset_name, cs_flags))) |
797 | 0 | return id; |
798 | 0 | if ((charset_name= !my_strcasecmp_latin1(charset_name, "utf8") ? |
799 | 0 | new_charset_name : NULL)) |
800 | 0 | return get_charset_number_internal(charset_name, cs_flags); |
801 | 0 | return 0; |
802 | 0 | } |
803 | | |
804 | | |
805 | | const char *get_charset_name(uint charset_number) |
806 | 0 | { |
807 | 0 | my_pthread_once(&charsets_initialized, init_available_charsets); |
808 | |
|
809 | 0 | if (charset_number < array_elements(all_charsets)) |
810 | 0 | { |
811 | 0 | CHARSET_INFO *cs= all_charsets[charset_number]; |
812 | |
|
813 | 0 | if (cs && (cs->number == charset_number) && cs->coll_name.str) |
814 | 0 | return cs->coll_name.str; |
815 | 0 | } |
816 | | |
817 | 0 | return "?"; /* this mimics find_type() */ |
818 | 0 | } |
819 | | |
820 | | |
821 | | static CHARSET_INFO *inheritance_source_by_id(CHARSET_INFO *cs, uint refid) |
822 | 0 | { |
823 | 0 | CHARSET_INFO *refcs; |
824 | 0 | return refid && refid != cs->number && |
825 | 0 | (refcs= all_charsets[refid]) && |
826 | 0 | (refcs->state & MY_CS_AVAILABLE) ? refcs : NULL; |
827 | 0 | } |
828 | | |
829 | | |
830 | | static CHARSET_INFO *find_collation_data_inheritance_source(CHARSET_INFO *cs, myf flags) |
831 | 0 | { |
832 | 0 | const char *beg, *end; |
833 | 0 | if (cs->tailoring && |
834 | 0 | !strncmp(cs->tailoring, "[import ", 8) && |
835 | 0 | (end= strchr(cs->tailoring + 8, ']')) && |
836 | 0 | (beg= cs->tailoring + 8) + MY_CS_COLLATION_NAME_SIZE > end) |
837 | 0 | { |
838 | 0 | char name[MY_CS_COLLATION_NAME_SIZE + 1]; |
839 | 0 | memcpy(name, beg, end - beg); |
840 | 0 | name[end - beg]= '\0'; |
841 | 0 | return inheritance_source_by_id(cs, get_collation_number(name,MYF(flags))); |
842 | 0 | } |
843 | 0 | return NULL; |
844 | 0 | } |
845 | | |
846 | | |
847 | | static CHARSET_INFO *find_charset_data_inheritance_source(CHARSET_INFO *cs) |
848 | 0 | { |
849 | 0 | uint refid= get_charset_number_internal(cs->cs_name.str, MY_CS_PRIMARY); |
850 | 0 | return inheritance_source_by_id(cs, refid); |
851 | 0 | } |
852 | | |
853 | | |
854 | | static CHARSET_INFO * |
855 | | get_internal_charset(MY_CHARSET_LOADER *loader, uint cs_number, myf flags) |
856 | 0 | { |
857 | 0 | char buf[FN_REFLEN]; |
858 | 0 | struct charset_info_st *cs; |
859 | |
|
860 | 0 | DBUG_ASSERT(cs_number < array_elements(all_charsets)); |
861 | |
|
862 | 0 | if ((cs= (struct charset_info_st*) all_charsets[cs_number])) |
863 | 0 | { |
864 | 0 | if (cs->state & MY_CS_READY) /* if CS is already initialized */ |
865 | 0 | { |
866 | 0 | my_collation_statistics_inc_use_count(cs_number); |
867 | 0 | return cs; |
868 | 0 | } |
869 | | |
870 | | /* |
871 | | To make things thread safe we are not allowing other threads to interfere |
872 | | while we may changing the cs_info_table |
873 | | */ |
874 | 0 | mysql_mutex_lock(&THR_LOCK_charset); |
875 | |
|
876 | 0 | if (!(cs->state & (MY_CS_COMPILED|MY_CS_LOADED))) /* if CS is not in memory */ |
877 | 0 | { |
878 | 0 | MY_CHARSET_LOADER loader; |
879 | 0 | strxmov(get_charsets_dir(buf), cs->cs_name.str, ".xml", NullS); |
880 | 0 | my_charset_loader_init_mysys(&loader); |
881 | 0 | my_read_charset_file(&loader, buf, flags); |
882 | 0 | } |
883 | |
|
884 | 0 | if (cs->state & MY_CS_AVAILABLE) |
885 | 0 | { |
886 | 0 | if (!(cs->state & MY_CS_READY)) |
887 | 0 | { |
888 | 0 | if (!simple_8bit_charset_data_is_full(cs)) |
889 | 0 | { |
890 | 0 | CHARSET_INFO *refcs= find_charset_data_inheritance_source(cs); |
891 | 0 | if (refcs) |
892 | 0 | inherit_charset_data(cs, refcs); |
893 | 0 | } |
894 | 0 | if (!simple_8bit_collation_data_is_full(cs)) |
895 | 0 | { |
896 | 0 | CHARSET_INFO *refcl= find_collation_data_inheritance_source(cs, flags); |
897 | 0 | if (refcl) |
898 | 0 | inherit_collation_data(cs, refcl); |
899 | 0 | } |
900 | |
|
901 | 0 | if (my_ci_init_charset(cs, loader) || |
902 | 0 | my_ci_init_collation(cs, loader)) |
903 | 0 | { |
904 | 0 | cs= NULL; |
905 | 0 | } |
906 | 0 | else |
907 | 0 | cs->state|= MY_CS_READY; |
908 | 0 | } |
909 | 0 | my_collation_statistics_inc_use_count(cs_number); |
910 | 0 | } |
911 | 0 | else |
912 | 0 | cs= NULL; |
913 | |
|
914 | 0 | mysql_mutex_unlock(&THR_LOCK_charset); |
915 | 0 | } |
916 | 0 | return cs; |
917 | 0 | } |
918 | | |
919 | | |
920 | | CHARSET_INFO *get_charset(uint cs_number, myf flags) |
921 | 0 | { |
922 | 0 | CHARSET_INFO *cs= NULL; |
923 | |
|
924 | 0 | if (cs_number == default_charset_info->number) |
925 | 0 | return default_charset_info; |
926 | | |
927 | 0 | my_pthread_once(&charsets_initialized, init_available_charsets); |
928 | |
|
929 | 0 | if (cs_number < array_elements(all_charsets)) |
930 | 0 | { |
931 | 0 | MY_CHARSET_LOADER loader; |
932 | 0 | my_charset_loader_init_mysys(&loader); |
933 | 0 | cs= get_internal_charset(&loader, cs_number, flags); |
934 | 0 | } |
935 | |
|
936 | 0 | if (!cs && (flags & MY_WME)) |
937 | 0 | { |
938 | 0 | char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)], cs_string[23]; |
939 | 0 | strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX); |
940 | 0 | cs_string[0]='#'; |
941 | 0 | int10_to_str(cs_number, cs_string+1, 10); |
942 | 0 | my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_string, index_file); |
943 | 0 | } |
944 | 0 | return cs; |
945 | 0 | } |
946 | | |
947 | | |
948 | | /** |
949 | | Find collation by name: extended version of get_charset_by_name() |
950 | | to return error messages to the caller. |
951 | | @param loader Character set loader |
952 | | @param name Collation name |
953 | | @param flags Flags |
954 | | @return NULL on error, pointer to collation on success |
955 | | */ |
956 | | |
957 | | CHARSET_INFO * |
958 | | my_collation_get_by_name(MY_CHARSET_LOADER *loader, |
959 | | const char *name, myf flags) |
960 | 0 | { |
961 | 0 | uint cs_number; |
962 | 0 | CHARSET_INFO *cs; |
963 | 0 | my_pthread_once(&charsets_initialized, init_available_charsets); |
964 | |
|
965 | 0 | cs_number= get_collation_number(name,flags); |
966 | 0 | my_charset_loader_init_mysys(loader); |
967 | 0 | cs= cs_number ? get_internal_charset(loader, cs_number, flags) : NULL; |
968 | |
|
969 | 0 | if (!cs && (flags & MY_WME)) |
970 | 0 | { |
971 | 0 | char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)]; |
972 | 0 | strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX); |
973 | 0 | my_error(EE_UNKNOWN_COLLATION, MYF(ME_BELL), name, index_file); |
974 | 0 | } |
975 | 0 | return cs; |
976 | 0 | } |
977 | | |
978 | | |
979 | | CHARSET_INFO *get_charset_by_name(const char *cs_name, myf flags) |
980 | 0 | { |
981 | 0 | MY_CHARSET_LOADER loader; |
982 | 0 | my_charset_loader_init_mysys(&loader); |
983 | 0 | return my_collation_get_by_name(&loader, cs_name, flags); |
984 | 0 | } |
985 | | |
986 | | |
987 | | /** |
988 | | Find character set by name: extended version of get_charset_by_csname() |
989 | | to return error messages to the caller. |
990 | | @param loader Character set loader |
991 | | @param name Collation name |
992 | | @param cs_flags Character set flags (e.g. default or binary collation) |
993 | | @param flags Flags |
994 | | @return NULL on error, pointer to collation on success |
995 | | */ |
996 | | CHARSET_INFO * |
997 | | my_charset_get_by_name(MY_CHARSET_LOADER *loader, |
998 | | const char *cs_name, uint cs_flags, myf flags) |
999 | 0 | { |
1000 | 0 | uint cs_number; |
1001 | 0 | CHARSET_INFO *cs; |
1002 | 0 | DBUG_ENTER("get_charset_by_csname"); |
1003 | 0 | DBUG_PRINT("enter",("name: '%s'", cs_name)); |
1004 | |
|
1005 | 0 | my_pthread_once(&charsets_initialized, init_available_charsets); |
1006 | |
|
1007 | 0 | cs_number= get_charset_number(cs_name, cs_flags, flags); |
1008 | 0 | cs= cs_number ? get_internal_charset(loader, cs_number, flags) : NULL; |
1009 | |
|
1010 | 0 | if (!cs && (flags & MY_WME)) |
1011 | 0 | { |
1012 | 0 | char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)]; |
1013 | 0 | strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX); |
1014 | 0 | my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_name, index_file); |
1015 | 0 | } |
1016 | |
|
1017 | 0 | DBUG_RETURN(cs); |
1018 | 0 | } |
1019 | | |
1020 | | |
1021 | | CHARSET_INFO * |
1022 | | get_charset_by_csname(const char *cs_name, uint cs_flags, myf flags) |
1023 | 0 | { |
1024 | 0 | MY_CHARSET_LOADER loader; |
1025 | 0 | my_charset_loader_init_mysys(&loader); |
1026 | 0 | return my_charset_get_by_name(&loader, cs_name, cs_flags, flags); |
1027 | 0 | } |
1028 | | |
1029 | | |
1030 | | /** |
1031 | | Resolve character set by the character set name (utf8, latin1, ...). |
1032 | | |
1033 | | The function tries to resolve character set by the specified name. If |
1034 | | there is character set with the given name, it is assigned to the "cs" |
1035 | | parameter and FALSE is returned. If there is no such character set, |
1036 | | "default_cs" is assigned to the "cs" and TRUE is returned. |
1037 | | |
1038 | | @param[in] cs_name Character set name. |
1039 | | @param[in] default_cs Default character set. |
1040 | | @param[out] cs Variable to store character set. |
1041 | | |
1042 | | @return FALSE if character set was resolved successfully; TRUE if there |
1043 | | is no character set with given name. |
1044 | | */ |
1045 | | |
1046 | | my_bool resolve_charset(const char *cs_name, |
1047 | | CHARSET_INFO *default_cs, |
1048 | | CHARSET_INFO **cs, |
1049 | | myf flags) |
1050 | 0 | { |
1051 | 0 | *cs= get_charset_by_csname(cs_name, MY_CS_PRIMARY, flags); |
1052 | |
|
1053 | 0 | if (*cs == NULL) |
1054 | 0 | { |
1055 | 0 | *cs= default_cs; |
1056 | 0 | return TRUE; |
1057 | 0 | } |
1058 | | |
1059 | 0 | return FALSE; |
1060 | 0 | } |
1061 | | |
1062 | | |
1063 | | /** |
1064 | | Resolve collation by the collation name (utf8_general_ci, ...). |
1065 | | |
1066 | | The function tries to resolve collation by the specified name. If there |
1067 | | is collation with the given name, it is assigned to the "cl" parameter |
1068 | | and FALSE is returned. If there is no such collation, "default_cl" is |
1069 | | assigned to the "cl" and TRUE is returned. |
1070 | | |
1071 | | @param[out] cl Variable to store collation. |
1072 | | @param[in] cl_name Collation name. |
1073 | | @param[in] default_cl Default collation. |
1074 | | |
1075 | | @return FALSE if collation was resolved successfully; TRUE if there is no |
1076 | | collation with given name. |
1077 | | */ |
1078 | | |
1079 | | my_bool resolve_collation(const char *cl_name, |
1080 | | CHARSET_INFO *default_cl, |
1081 | | CHARSET_INFO **cl, |
1082 | | myf my_flags) |
1083 | 0 | { |
1084 | 0 | *cl= get_charset_by_name(cl_name, my_flags); |
1085 | |
|
1086 | 0 | if (*cl == NULL) |
1087 | 0 | { |
1088 | 0 | *cl= default_cl; |
1089 | 0 | return TRUE; |
1090 | 0 | } |
1091 | | |
1092 | 0 | return FALSE; |
1093 | 0 | } |
1094 | | |
1095 | | |
1096 | | /* |
1097 | | Escape string with backslashes (\) |
1098 | | |
1099 | | SYNOPSIS |
1100 | | escape_string_for_mysql() |
1101 | | charset_info Charset of the strings |
1102 | | to Buffer for escaped string |
1103 | | to_length Length of destination buffer, or 0 |
1104 | | from The string to escape |
1105 | | length The length of the string to escape |
1106 | | overflow Set to 1 if the escaped string did not fit in |
1107 | | the to buffer |
1108 | | |
1109 | | DESCRIPTION |
1110 | | This escapes the contents of a string by adding backslashes before special |
1111 | | characters, and turning others into specific escape sequences, such as |
1112 | | turning newlines into \n and null bytes into \0. |
1113 | | |
1114 | | NOTE |
1115 | | To maintain compatibility with the old C API, to_length may be 0 to mean |
1116 | | "big enough" |
1117 | | |
1118 | | RETURN VALUES |
1119 | | # The length of the escaped string |
1120 | | */ |
1121 | | |
1122 | | size_t escape_string_for_mysql(CHARSET_INFO *charset_info, |
1123 | | char *to, size_t to_length, |
1124 | | const char *from, size_t length, |
1125 | | my_bool *overflow) |
1126 | 0 | { |
1127 | 0 | const char *to_start= to; |
1128 | 0 | const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length); |
1129 | 0 | *overflow= FALSE; |
1130 | 0 | for (end= from + length; from < end; from++) |
1131 | 0 | { |
1132 | 0 | char escape= 0; |
1133 | 0 | #ifdef USE_MB |
1134 | 0 | int tmp_length= my_ci_charlen(charset_info, (const uchar *) from, (const uchar *) end); |
1135 | 0 | if (tmp_length > 1) |
1136 | 0 | { |
1137 | 0 | if (to + tmp_length > to_end) |
1138 | 0 | { |
1139 | 0 | *overflow= TRUE; |
1140 | 0 | break; |
1141 | 0 | } |
1142 | 0 | while (tmp_length--) |
1143 | 0 | *to++= *from++; |
1144 | 0 | from--; |
1145 | 0 | continue; |
1146 | 0 | } |
1147 | | /* |
1148 | | If the next character appears to begin a multi-byte character, we |
1149 | | escape that first byte of that apparent multi-byte character. (The |
1150 | | character just looks like a multi-byte character -- if it were actually |
1151 | | a multi-byte character, it would have been passed through in the test |
1152 | | above.) |
1153 | | |
1154 | | Without this check, we can create a problem by converting an invalid |
1155 | | multi-byte character into a valid one. For example, 0xbf27 is not |
1156 | | a valid GBK character, but 0xbf5c is. (0x27 = ', 0x5c = \) |
1157 | | */ |
1158 | 0 | if (tmp_length < 1) /* Bad byte sequence */ |
1159 | 0 | escape= *from; |
1160 | 0 | else |
1161 | 0 | #endif |
1162 | 0 | switch (*from) { |
1163 | 0 | case 0: /* Must be escaped for 'mysql' */ |
1164 | 0 | escape= '0'; |
1165 | 0 | break; |
1166 | 0 | case '\n': /* Must be escaped for logs */ |
1167 | 0 | escape= 'n'; |
1168 | 0 | break; |
1169 | 0 | case '\r': |
1170 | 0 | escape= 'r'; |
1171 | 0 | break; |
1172 | 0 | case '\\': |
1173 | 0 | escape= '\\'; |
1174 | 0 | break; |
1175 | 0 | case '\'': |
1176 | 0 | escape= '\''; |
1177 | 0 | break; |
1178 | 0 | case '"': /* Better safe than sorry */ |
1179 | 0 | escape= '"'; |
1180 | 0 | break; |
1181 | 0 | case '\032': /* This gives problems on Win32 */ |
1182 | 0 | escape= 'Z'; |
1183 | 0 | break; |
1184 | 0 | } |
1185 | 0 | if (escape) |
1186 | 0 | { |
1187 | 0 | if (to + 2 > to_end) |
1188 | 0 | { |
1189 | 0 | *overflow= TRUE; |
1190 | 0 | break; |
1191 | 0 | } |
1192 | 0 | *to++= '\\'; |
1193 | 0 | *to++= escape; |
1194 | 0 | } |
1195 | 0 | else |
1196 | 0 | { |
1197 | 0 | if (to + 1 > to_end) |
1198 | 0 | { |
1199 | 0 | *overflow= TRUE; |
1200 | 0 | break; |
1201 | 0 | } |
1202 | 0 | *to++= *from; |
1203 | 0 | } |
1204 | 0 | } |
1205 | 0 | *to= 0; |
1206 | 0 | return (size_t) (to - to_start); |
1207 | 0 | } |
1208 | | |
1209 | | |
1210 | | #ifdef BACKSLASH_MBTAIL |
1211 | | CHARSET_INFO *fs_character_set() |
1212 | | { |
1213 | | static CHARSET_INFO *fs_cset_cache; |
1214 | | if (fs_cset_cache) |
1215 | | return fs_cset_cache; |
1216 | | #ifdef HAVE_CHARSET_cp932 |
1217 | | else if (GetACP() == 932) |
1218 | | return fs_cset_cache= &my_charset_cp932_japanese_ci; |
1219 | | #endif |
1220 | | else |
1221 | | return fs_cset_cache= &my_charset_bin; |
1222 | | } |
1223 | | #endif |
1224 | | |
1225 | | /* |
1226 | | Escape apostrophes by doubling them up |
1227 | | |
1228 | | SYNOPSIS |
1229 | | escape_quotes_for_mysql() |
1230 | | charset_info Charset of the strings |
1231 | | to Buffer for escaped string |
1232 | | to_length Length of destination buffer, or 0 |
1233 | | from The string to escape |
1234 | | length The length of the string to escape |
1235 | | overflow Set to 1 if the buffer overflows |
1236 | | |
1237 | | DESCRIPTION |
1238 | | This escapes the contents of a string by doubling up any apostrophes that |
1239 | | it contains. This is used when the NO_BACKSLASH_ESCAPES SQL_MODE is in |
1240 | | effect on the server. |
1241 | | |
1242 | | NOTE |
1243 | | To be consistent with escape_string_for_mysql(), to_length may be 0 to |
1244 | | mean "big enough" |
1245 | | |
1246 | | RETURN VALUES |
1247 | | The length of the escaped string |
1248 | | */ |
1249 | | |
1250 | | size_t escape_quotes_for_mysql(CHARSET_INFO *charset_info, |
1251 | | char *to, size_t to_length, |
1252 | | const char *from, size_t length, |
1253 | | my_bool *overflow) |
1254 | 0 | { |
1255 | 0 | const char *to_start= to; |
1256 | 0 | const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length); |
1257 | 0 | #ifdef USE_MB |
1258 | 0 | my_bool use_mb_flag= my_ci_use_mb(charset_info); |
1259 | 0 | #endif |
1260 | 0 | *overflow= FALSE; |
1261 | 0 | for (end= from + length; from < end; from++) |
1262 | 0 | { |
1263 | 0 | #ifdef USE_MB |
1264 | 0 | int tmp_length; |
1265 | 0 | if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end))) |
1266 | 0 | { |
1267 | 0 | if (to + tmp_length > to_end) |
1268 | 0 | { |
1269 | 0 | *overflow= TRUE; |
1270 | 0 | break; |
1271 | 0 | } |
1272 | 0 | while (tmp_length--) |
1273 | 0 | *to++= *from++; |
1274 | 0 | from--; |
1275 | 0 | continue; |
1276 | 0 | } |
1277 | | /* |
1278 | | We don't have the same issue here with a non-multi-byte character being |
1279 | | turned into a multi-byte character by the addition of an escaping |
1280 | | character, because we are only escaping the ' character with itself. |
1281 | | */ |
1282 | 0 | #endif |
1283 | 0 | if (*from == '\'') |
1284 | 0 | { |
1285 | 0 | if (to + 2 > to_end) |
1286 | 0 | { |
1287 | 0 | *overflow= TRUE; |
1288 | 0 | break; |
1289 | 0 | } |
1290 | 0 | *to++= '\''; |
1291 | 0 | *to++= '\''; |
1292 | 0 | } |
1293 | 0 | else |
1294 | 0 | { |
1295 | 0 | if (to + 1 > to_end) |
1296 | 0 | { |
1297 | 0 | *overflow= TRUE; |
1298 | 0 | break; |
1299 | 0 | } |
1300 | 0 | *to++= *from; |
1301 | 0 | } |
1302 | 0 | } |
1303 | 0 | *to= 0; |
1304 | 0 | return (size_t) (to - to_start); |
1305 | 0 | } |
1306 | | |
1307 | | |
1308 | | typedef enum my_cs_match_type_enum |
1309 | | { |
1310 | | /* MySQL and OS charsets are fully compatible */ |
1311 | | my_cs_exact, |
1312 | | /* MySQL charset is very close to OS charset */ |
1313 | | my_cs_approx, |
1314 | | /* |
1315 | | MySQL knows this charset, but it is not supported as client character set. |
1316 | | */ |
1317 | | my_cs_unsupp |
1318 | | } my_cs_match_type; |
1319 | | |
1320 | | |
1321 | | typedef struct str2str_st |
1322 | | { |
1323 | | const char* os_name; |
1324 | | const char* my_name; |
1325 | | my_cs_match_type param; |
1326 | | } MY_CSET_OS_NAME; |
1327 | | |
1328 | | static const MY_CSET_OS_NAME charsets[] = |
1329 | | { |
1330 | | #ifdef _WIN32 |
1331 | | {"cp437", "cp850", my_cs_approx}, |
1332 | | {"cp850", "cp850", my_cs_exact}, |
1333 | | {"cp852", "cp852", my_cs_exact}, |
1334 | | {"cp858", "cp850", my_cs_approx}, |
1335 | | {"cp866", "cp866", my_cs_exact}, |
1336 | | {"cp874", "tis620", my_cs_approx}, |
1337 | | {"cp932", "cp932", my_cs_exact}, |
1338 | | {"cp936", "gbk", my_cs_approx}, |
1339 | | {"cp949", "euckr", my_cs_approx}, |
1340 | | {"cp950", "big5", my_cs_exact}, |
1341 | | {"cp1200", "utf16le", my_cs_unsupp}, |
1342 | | {"cp1201", "utf16", my_cs_unsupp}, |
1343 | | {"cp1250", "cp1250", my_cs_exact}, |
1344 | | {"cp1251", "cp1251", my_cs_exact}, |
1345 | | {"cp1252", "latin1", my_cs_exact}, |
1346 | | {"cp1253", "greek", my_cs_exact}, |
1347 | | {"cp1254", "latin5", my_cs_exact}, |
1348 | | {"cp1255", "hebrew", my_cs_approx}, |
1349 | | {"cp1256", "cp1256", my_cs_exact}, |
1350 | | {"cp1257", "cp1257", my_cs_exact}, |
1351 | | {"cp10000", "macroman", my_cs_exact}, |
1352 | | {"cp10001", "sjis", my_cs_approx}, |
1353 | | {"cp10002", "big5", my_cs_approx}, |
1354 | | {"cp10008", "gb2312", my_cs_approx}, |
1355 | | {"cp10021", "tis620", my_cs_approx}, |
1356 | | {"cp10029", "macce", my_cs_exact}, |
1357 | | {"cp12001", "utf32", my_cs_unsupp}, |
1358 | | {"cp20107", "swe7", my_cs_exact}, |
1359 | | {"cp20127", "latin1", my_cs_approx}, |
1360 | | {"cp20866", "koi8r", my_cs_exact}, |
1361 | | {"cp20932", "ujis", my_cs_exact}, |
1362 | | {"cp20936", "gb2312", my_cs_approx}, |
1363 | | {"cp20949", "euckr", my_cs_approx}, |
1364 | | {"cp21866", "koi8u", my_cs_exact}, |
1365 | | {"cp28591", "latin1", my_cs_approx}, |
1366 | | {"cp28592", "latin2", my_cs_exact}, |
1367 | | {"cp28597", "greek", my_cs_exact}, |
1368 | | {"cp28598", "hebrew", my_cs_exact}, |
1369 | | {"cp28599", "latin5", my_cs_exact}, |
1370 | | {"cp28603", "latin7", my_cs_exact}, |
1371 | | #ifdef UNCOMMENT_THIS_WHEN_WL_4579_IS_DONE |
1372 | | {"cp28605", "latin9", my_cs_exact}, |
1373 | | #endif |
1374 | | {"cp38598", "hebrew", my_cs_exact}, |
1375 | | {"cp51932", "ujis", my_cs_exact}, |
1376 | | {"cp51936", "gb2312", my_cs_exact}, |
1377 | | {"cp51949", "euckr", my_cs_exact}, |
1378 | | {"cp51950", "big5", my_cs_exact}, |
1379 | | #ifdef UNCOMMENT_THIS_WHEN_WL_WL_4024_IS_DONE |
1380 | | {"cp54936", "gb18030", my_cs_exact}, |
1381 | | #endif |
1382 | | {"cp65001", "utf8mb4", my_cs_exact}, |
1383 | | {"cp65001", "utf8mb3", my_cs_approx}, |
1384 | | #else /* not Windows */ |
1385 | | |
1386 | | {"646", "latin1", my_cs_approx}, /* Default on Solaris */ |
1387 | | {"ANSI_X3.4-1968", "latin1", my_cs_approx}, |
1388 | | {"ansi1251", "cp1251", my_cs_exact}, |
1389 | | {"armscii8", "armscii8", my_cs_exact}, |
1390 | | {"armscii-8", "armscii8", my_cs_exact}, |
1391 | | {"ASCII", "latin1", my_cs_approx}, |
1392 | | {"Big5", "big5", my_cs_exact}, |
1393 | | {"cp1251", "cp1251", my_cs_exact}, |
1394 | | {"cp1255", "hebrew", my_cs_approx}, |
1395 | | {"CP866", "cp866", my_cs_exact}, |
1396 | | {"eucCN", "gb2312", my_cs_exact}, |
1397 | | {"euc-CN", "gb2312", my_cs_exact}, |
1398 | | {"eucJP", "ujis", my_cs_exact}, |
1399 | | {"euc-JP", "ujis", my_cs_exact}, |
1400 | | {"eucKR", "euckr", my_cs_exact}, |
1401 | | {"euc-KR", "euckr", my_cs_exact}, |
1402 | | #ifdef UNCOMMENT_THIS_WHEN_WL_WL_4024_IS_DONE |
1403 | | {"gb18030", "gb18030", my_cs_exact}, |
1404 | | #endif |
1405 | | {"gb2312", "gb2312", my_cs_exact}, |
1406 | | {"gbk", "gbk", my_cs_exact}, |
1407 | | {"georgianps", "geostd8", my_cs_exact}, |
1408 | | {"georgian-ps", "geostd8", my_cs_exact}, |
1409 | | {"IBM-1252", "cp1252", my_cs_exact}, |
1410 | | |
1411 | | {"iso88591", "latin1", my_cs_approx}, |
1412 | | {"ISO_8859-1", "latin1", my_cs_approx}, |
1413 | | {"ISO8859-1", "latin1", my_cs_approx}, |
1414 | | {"ISO-8859-1", "latin1", my_cs_approx}, |
1415 | | |
1416 | | {"iso885913", "latin7", my_cs_exact}, |
1417 | | {"ISO_8859-13", "latin7", my_cs_exact}, |
1418 | | {"ISO8859-13", "latin7", my_cs_exact}, |
1419 | | {"ISO-8859-13", "latin7", my_cs_exact}, |
1420 | | |
1421 | | #ifdef UNCOMMENT_THIS_WHEN_WL_4579_IS_DONE |
1422 | | {"iso885915", "latin9", my_cs_exact}, |
1423 | | {"ISO_8859-15", "latin9", my_cs_exact}, |
1424 | | {"ISO8859-15", "latin9", my_cs_exact}, |
1425 | | {"ISO-8859-15", "latin9", my_cs_exact}, |
1426 | | #endif |
1427 | | |
1428 | | {"iso88592", "latin2", my_cs_exact}, |
1429 | | {"ISO_8859-2", "latin2", my_cs_exact}, |
1430 | | {"ISO8859-2", "latin2", my_cs_exact}, |
1431 | | {"ISO-8859-2", "latin2", my_cs_exact}, |
1432 | | |
1433 | | {"iso88597", "greek", my_cs_exact}, |
1434 | | {"ISO_8859-7", "greek", my_cs_exact}, |
1435 | | {"ISO8859-7", "greek", my_cs_exact}, |
1436 | | {"ISO-8859-7", "greek", my_cs_exact}, |
1437 | | |
1438 | | {"iso88598", "hebrew", my_cs_exact}, |
1439 | | {"ISO_8859-8", "hebrew", my_cs_exact}, |
1440 | | {"ISO8859-8", "hebrew", my_cs_exact}, |
1441 | | {"ISO-8859-8", "hebrew", my_cs_exact}, |
1442 | | |
1443 | | {"iso88599", "latin5", my_cs_exact}, |
1444 | | {"ISO_8859-9", "latin5", my_cs_exact}, |
1445 | | {"ISO8859-9", "latin5", my_cs_exact}, |
1446 | | {"ISO-8859-9", "latin5", my_cs_exact}, |
1447 | | |
1448 | | {"koi8r", "koi8r", my_cs_exact}, |
1449 | | {"KOI8-R", "koi8r", my_cs_exact}, |
1450 | | {"koi8u", "koi8u", my_cs_exact}, |
1451 | | {"KOI8-U", "koi8u", my_cs_exact}, |
1452 | | |
1453 | | {"roman8", "hp8", my_cs_exact}, /* Default on HP UX */ |
1454 | | |
1455 | | {"Shift_JIS", "sjis", my_cs_exact}, |
1456 | | {"SJIS", "sjis", my_cs_exact}, |
1457 | | {"shiftjisx0213", "sjis", my_cs_exact}, |
1458 | | |
1459 | | {"tis620", "tis620", my_cs_exact}, |
1460 | | {"tis-620", "tis620", my_cs_exact}, |
1461 | | |
1462 | | {"ujis", "ujis", my_cs_exact}, |
1463 | | |
1464 | | {"US-ASCII", "latin1", my_cs_approx}, |
1465 | | |
1466 | | {"utf8", "utf8", my_cs_exact}, |
1467 | | {"utf-8", "utf8", my_cs_exact}, |
1468 | | #endif |
1469 | | {NULL, NULL, 0} |
1470 | | }; |
1471 | | |
1472 | | |
1473 | | static const char* |
1474 | | my_os_charset_to_mysql_charset(const char* csname) |
1475 | 0 | { |
1476 | 0 | const MY_CSET_OS_NAME* csp; |
1477 | 0 | for (csp = charsets; csp->os_name; csp++) |
1478 | 0 | { |
1479 | 0 | if (!strcasecmp(csp->os_name, csname)) |
1480 | 0 | { |
1481 | 0 | switch (csp->param) |
1482 | 0 | { |
1483 | 0 | case my_cs_exact: |
1484 | 0 | return csp->my_name; |
1485 | | |
1486 | 0 | case my_cs_approx: |
1487 | | /* |
1488 | | Maybe we should print a warning eventually: |
1489 | | character set correspondence is not exact. |
1490 | | */ |
1491 | 0 | return csp->my_name; |
1492 | | |
1493 | 0 | default: |
1494 | 0 | return NULL; |
1495 | 0 | } |
1496 | 0 | } |
1497 | 0 | } |
1498 | 0 | return NULL; |
1499 | 0 | } |
1500 | | |
1501 | | const char* my_default_csname() |
1502 | 0 | { |
1503 | 0 | const char* csname = NULL; |
1504 | | #ifdef _WIN32 |
1505 | | char cpbuf[64]; |
1506 | | UINT cp; |
1507 | | if (GetACP() == CP_UTF8) |
1508 | | cp= CP_UTF8; |
1509 | | else |
1510 | | { |
1511 | | cp= GetConsoleCP(); |
1512 | | if (cp == 0) |
1513 | | cp= GetACP(); |
1514 | | } |
1515 | | snprintf(cpbuf, sizeof(cpbuf), "cp%d", (int)cp); |
1516 | | csname = my_os_charset_to_mysql_charset(cpbuf); |
1517 | | #elif defined(HAVE_SETLOCALE) && defined(HAVE_NL_LANGINFO) |
1518 | 0 | if (setlocale(LC_CTYPE, "") && (csname = nl_langinfo(CODESET))) |
1519 | 0 | csname = my_os_charset_to_mysql_charset(csname); |
1520 | 0 | #endif |
1521 | 0 | return csname ? csname : MYSQL_DEFAULT_CHARSET_NAME; |
1522 | 0 | } |
1523 | | |
1524 | | |
1525 | | #ifdef _WIN32 |
1526 | | /** |
1527 | | Extract codepage number from "cpNNNN" string, |
1528 | | and check that this codepage is supported. |
1529 | | |
1530 | | @return 0 - invalid codepage(or unsupported) |
1531 | | > 0 - valid codepage number. |
1532 | | */ |
1533 | | static UINT get_codepage(const char *s) |
1534 | | { |
1535 | | UINT cp; |
1536 | | if (s[0] != 'c' || s[1] != 'p') |
1537 | | { |
1538 | | DBUG_ASSERT(0); |
1539 | | return 0; |
1540 | | } |
1541 | | cp= strtoul(s + 2, NULL, 10); |
1542 | | if (!IsValidCodePage(cp)) |
1543 | | { |
1544 | | /* |
1545 | | Can happen also with documented CP, i.e 51936 |
1546 | | Perhaps differs from one machine to another. |
1547 | | */ |
1548 | | return 0; |
1549 | | } |
1550 | | return cp; |
1551 | | } |
1552 | | |
1553 | | static UINT mysql_charset_to_codepage(const char *my_cs_name) |
1554 | | { |
1555 | | const MY_CSET_OS_NAME *csp; |
1556 | | UINT cp=0,tmp; |
1557 | | for (csp= charsets; csp->os_name; csp++) |
1558 | | { |
1559 | | if (!strcasecmp(csp->my_name, my_cs_name)) |
1560 | | { |
1561 | | switch (csp->param) |
1562 | | { |
1563 | | case my_cs_exact: |
1564 | | tmp= get_codepage(csp->os_name); |
1565 | | if (tmp) |
1566 | | return tmp; |
1567 | | break; |
1568 | | case my_cs_approx: |
1569 | | /* |
1570 | | don't return just yet, perhaps there is a better |
1571 | | (exact) match later. |
1572 | | */ |
1573 | | if (!cp) |
1574 | | cp= get_codepage(csp->os_name); |
1575 | | continue; |
1576 | | |
1577 | | default: |
1578 | | return 0; |
1579 | | } |
1580 | | } |
1581 | | } |
1582 | | return cp; |
1583 | | } |
1584 | | |
1585 | | /** Set console codepage for MariaDB's charset name */ |
1586 | | int my_set_console_cp(const char *csname) |
1587 | | { |
1588 | | UINT cp; |
1589 | | if (fileno(stdout) < 0 || !isatty(fileno(stdout))) |
1590 | | return 0; |
1591 | | cp= mysql_charset_to_codepage(csname); |
1592 | | if (!cp) |
1593 | | { |
1594 | | /* No compatible os charset.*/ |
1595 | | return -1; |
1596 | | } |
1597 | | |
1598 | | if (GetConsoleOutputCP() != cp && !SetConsoleOutputCP(cp)) |
1599 | | { |
1600 | | return -1; |
1601 | | } |
1602 | | |
1603 | | if (GetConsoleCP() != cp && !SetConsoleCP(cp)) |
1604 | | { |
1605 | | return -1; |
1606 | | } |
1607 | | return 0; |
1608 | | } |
1609 | | #endif |