/src/server/mysys/charset.c
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /*  | 
2  |  |    Copyright (c) 2000, 2011, Oracle and/or its affiliates  | 
3  |  |    Copyright (c) 2009, 2020, MariaDB Corporation.  | 
4  |  |  | 
5  |  |    This program is free software; you can redistribute it and/or modify  | 
6  |  |    it under the terms of the GNU General Public License as published by  | 
7  |  |    the Free Software Foundation; version 2 of the License.  | 
8  |  |  | 
9  |  |    This program is distributed in the hope that it will be useful,  | 
10  |  |    but WITHOUT ANY WARRANTY; without even the implied warranty of  | 
11  |  |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the  | 
12  |  |    GNU General Public License for more details.  | 
13  |  |  | 
14  |  |    You should have received a copy of the GNU General Public License  | 
15  |  |    along with this program; if not, write to the Free Software  | 
16  |  |    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */  | 
17  |  |  | 
18  |  | #include "mysys_priv.h"  | 
19  |  | #include "mysys_err.h"  | 
20  |  | #include <m_ctype.h>  | 
21  |  | #include <m_string.h>  | 
22  |  | #include <my_dir.h>  | 
23  |  | #include <hash.h>  | 
24  |  | #include <my_xml.h>  | 
25  |  | #ifdef HAVE_LANGINFO_H  | 
26  |  | #include <langinfo.h>  | 
27  |  | #endif  | 
28  |  | #ifdef HAVE_LOCALE_H  | 
29  |  | #include <locale.h>  | 
30  |  | #endif  | 
31  |  |  | 
32  |  | extern HASH charset_name_hash;  | 
33  |  |  | 
34  |  | /*  | 
35  |  |   The code below implements this functionality:  | 
36  |  |     | 
37  |  |     - Initializing charset related structures  | 
38  |  |     - Loading dynamic charsets  | 
39  |  |     - Searching for a proper CHARSET_INFO   | 
40  |  |       using charset name, collation name or collation ID  | 
41  |  |     - Setting server default character set  | 
42  |  | */  | 
43  |  |  | 
44  |  | static uint  | 
45  |  | get_collation_number_internal(const char *name)  | 
46  | 0  | { | 
47  |  | 
  | 
48  | 0  |   CHARSET_INFO **cs;  | 
49  | 0  |   for (cs= all_charsets;  | 
50  | 0  |        cs < all_charsets + array_elements(all_charsets);  | 
51  | 0  |        cs++)  | 
52  | 0  |   { | 
53  | 0  |     if (cs[0] && cs[0]->coll_name.str &&  | 
54  | 0  |         !my_strcasecmp_latin1(cs[0]->coll_name.str, name))  | 
55  | 0  |       return cs[0]->number;  | 
56  | 0  |   }    | 
57  | 0  |   return 0;  | 
58  | 0  | }  | 
59  |  |  | 
60  |  |  | 
61  |  | static my_bool is_multi_byte_ident(CHARSET_INFO *cs, uchar ch)  | 
62  | 0  | { | 
63  | 0  |   int chlen= my_ci_charlen(cs, &ch, &ch + 1);  | 
64  | 0  |   return MY_CS_IS_TOOSMALL(chlen) ? TRUE : FALSE;  | 
65  | 0  | }  | 
66  |  |  | 
67  |  | static my_bool init_state_maps(struct charset_info_st *cs)  | 
68  | 0  | { | 
69  | 0  |   uint i;  | 
70  | 0  |   uchar *state_map;  | 
71  | 0  |   uchar *ident_map;  | 
72  |  | 
  | 
73  | 0  |   if (!(cs->state_map= state_map= (uchar*) my_once_alloc(256*2, MYF(MY_WME))))  | 
74  | 0  |     return 1;  | 
75  |  |       | 
76  | 0  |   cs->ident_map= ident_map= state_map + 256;  | 
77  |  |  | 
78  |  |   /* Fill state_map with states to get a faster parser */  | 
79  | 0  |   for (i=0; i < 256 ; i++)  | 
80  | 0  |   { | 
81  | 0  |     if (my_isalpha(cs,i))  | 
82  | 0  |       state_map[i]=(uchar) MY_LEX_IDENT;  | 
83  | 0  |     else if (my_isdigit(cs,i))  | 
84  | 0  |       state_map[i]=(uchar) MY_LEX_NUMBER_IDENT;  | 
85  | 0  |     else if (is_multi_byte_ident(cs, i))  | 
86  | 0  |       state_map[i]=(uchar) MY_LEX_IDENT;  | 
87  | 0  |     else if (my_isspace(cs,i))  | 
88  | 0  |       state_map[i]=(uchar) MY_LEX_SKIP;  | 
89  | 0  |     else  | 
90  | 0  |       state_map[i]=(uchar) MY_LEX_CHAR;  | 
91  | 0  |   }  | 
92  | 0  |   state_map[(uchar)'_']=state_map[(uchar)'$']=(uchar) MY_LEX_IDENT;  | 
93  | 0  |   state_map[(uchar)'\'']=(uchar) MY_LEX_STRING;  | 
94  | 0  |   state_map[(uchar)'.']=(uchar) MY_LEX_REAL_OR_POINT;  | 
95  | 0  |   state_map[(uchar)'>']=state_map[(uchar)'=']=state_map[(uchar)'!']= (uchar) MY_LEX_CMP_OP;  | 
96  | 0  |   state_map[(uchar)'<']= (uchar) MY_LEX_LONG_CMP_OP;  | 
97  | 0  |   state_map[(uchar)'&']=state_map[(uchar)'|']=(uchar) MY_LEX_BOOL;  | 
98  | 0  |   state_map[(uchar)'#']=(uchar) MY_LEX_COMMENT;  | 
99  | 0  |   state_map[(uchar)';']=(uchar) MY_LEX_SEMICOLON;  | 
100  | 0  |   state_map[(uchar)':']=(uchar) MY_LEX_SET_VAR;  | 
101  | 0  |   state_map[0]=(uchar) MY_LEX_EOL;  | 
102  | 0  |   state_map[(uchar)'\\']= (uchar) MY_LEX_ESCAPE;  | 
103  | 0  |   state_map[(uchar)'/']= (uchar) MY_LEX_LONG_COMMENT;  | 
104  | 0  |   state_map[(uchar)'*']= (uchar) MY_LEX_END_LONG_COMMENT;  | 
105  | 0  |   state_map[(uchar)'@']= (uchar) MY_LEX_USER_END;  | 
106  | 0  |   state_map[(uchar) '`']= (uchar) MY_LEX_USER_VARIABLE_DELIMITER;  | 
107  | 0  |   state_map[(uchar)'"']= (uchar) MY_LEX_STRING_OR_DELIMITER;  | 
108  | 0  |   state_map[(uchar)'-']= (uchar) MY_LEX_MINUS_OR_COMMENT;  | 
109  | 0  |   state_map[(uchar)',']= (uchar) MY_LEX_COMMA;  | 
110  | 0  |   state_map[(uchar)'?']= (uchar) MY_LEX_PLACEHOLDER;  | 
111  |  |  | 
112  |  |   /*  | 
113  |  |     Create a second map to make it faster to find identifiers  | 
114  |  |   */  | 
115  | 0  |   for (i=0; i < 256 ; i++)  | 
116  | 0  |   { | 
117  | 0  |     ident_map[i]= (uchar) (state_map[i] == MY_LEX_IDENT ||  | 
118  | 0  |          state_map[i] == MY_LEX_NUMBER_IDENT);  | 
119  | 0  |   }  | 
120  |  |  | 
121  |  |   /* Special handling of hex and binary strings */  | 
122  | 0  |   state_map[(uchar)'x']= state_map[(uchar)'X']= (uchar) MY_LEX_IDENT_OR_HEX;  | 
123  | 0  |   state_map[(uchar)'b']= state_map[(uchar)'B']= (uchar) MY_LEX_IDENT_OR_BIN;  | 
124  | 0  |   state_map[(uchar)'n']= state_map[(uchar)'N']= (uchar) MY_LEX_IDENT_OR_NCHAR;  | 
125  | 0  |   return 0;  | 
126  | 0  | }  | 
127  |  |  | 
128  |  |  | 
129  |  | static MY_COLLATION_HANDLER *get_simple_collation_handler_by_flags(uint flags)  | 
130  | 0  | { | 
131  | 0  |   return flags & MY_CS_BINSORT ?  | 
132  | 0  |            (flags & MY_CS_NOPAD ?  | 
133  | 0  |             &my_collation_8bit_nopad_bin_handler :  | 
134  | 0  |             &my_collation_8bit_bin_handler) :  | 
135  | 0  |            (flags & MY_CS_NOPAD ?  | 
136  | 0  |             &my_collation_8bit_simple_nopad_ci_handler :  | 
137  | 0  |             &my_collation_8bit_simple_ci_handler);  | 
138  | 0  | }  | 
139  |  |  | 
140  |  |  | 
141  |  | static void simple_cs_init_functions(struct charset_info_st *cs)  | 
142  | 0  | { | 
143  | 0  |   cs->coll= get_simple_collation_handler_by_flags(cs->state);  | 
144  | 0  |   cs->cset= &my_charset_8bit_handler;  | 
145  | 0  | }  | 
146  |  |  | 
147  |  |  | 
148  |  |  | 
149  |  | static int cs_copy_data(struct charset_info_st *to, CHARSET_INFO *from)  | 
150  | 0  | { | 
151  | 0  |   to->number= from->number ? from->number : to->number;  | 
152  |  |  | 
153  |  |   /* Don't replace csname if already set */  | 
154  | 0  |   if (from->cs_name.str && !to->cs_name.str)  | 
155  | 0  |   { | 
156  | 0  |     if (!(to->cs_name.str= my_once_memdup(from->cs_name.str,  | 
157  | 0  |                                           from->cs_name.length + 1,  | 
158  | 0  |                                           MYF(MY_WME))))  | 
159  | 0  |       goto err;  | 
160  | 0  |     to->cs_name.length= from->cs_name.length;  | 
161  | 0  |   }  | 
162  |  |     | 
163  | 0  |   if (from->coll_name.str)  | 
164  | 0  |   { | 
165  | 0  |     if (!(to->coll_name.str= my_once_memdup(from->coll_name.str,  | 
166  | 0  |                                             from->coll_name.length + 1,  | 
167  | 0  |                                             MYF(MY_WME))))  | 
168  | 0  |       goto err;  | 
169  | 0  |     to->coll_name.length= from->coll_name.length;  | 
170  | 0  |   }  | 
171  |  |     | 
172  | 0  |   if (from->comment)  | 
173  | 0  |     if (!(to->comment= my_once_strdup(from->comment,MYF(MY_WME))))  | 
174  | 0  |       goto err;  | 
175  |  |     | 
176  | 0  |   if (from->m_ctype)  | 
177  | 0  |   { | 
178  | 0  |     if (!(to->m_ctype= (uchar*) my_once_memdup((char*) from->m_ctype,  | 
179  | 0  |                                                MY_CS_CTYPE_TABLE_SIZE,  | 
180  | 0  |                                                MYF(MY_WME))))  | 
181  | 0  |       goto err;  | 
182  | 0  |     if (init_state_maps(to))  | 
183  | 0  |       goto err;  | 
184  | 0  |   }  | 
185  | 0  |   if (from->to_lower)  | 
186  | 0  |     if (!(to->to_lower= (uchar*) my_once_memdup((char*) from->to_lower,  | 
187  | 0  |             MY_CS_TO_LOWER_TABLE_SIZE,  | 
188  | 0  |             MYF(MY_WME))))  | 
189  | 0  |       goto err;  | 
190  |  |  | 
191  | 0  |   if (from->to_upper)  | 
192  | 0  |     if (!(to->to_upper= (uchar*) my_once_memdup((char*) from->to_upper,  | 
193  | 0  |             MY_CS_TO_UPPER_TABLE_SIZE,  | 
194  | 0  |             MYF(MY_WME))))  | 
195  | 0  |       goto err;  | 
196  | 0  |   if (from->sort_order)  | 
197  | 0  |   { | 
198  | 0  |     if (!(to->sort_order= (uchar*) my_once_memdup((char*) from->sort_order,  | 
199  | 0  |               MY_CS_SORT_ORDER_TABLE_SIZE,  | 
200  | 0  |               MYF(MY_WME))))  | 
201  | 0  |       goto err;  | 
202  |  | 
  | 
203  | 0  |   }  | 
204  | 0  |   if (from->tab_to_uni)  | 
205  | 0  |   { | 
206  | 0  |     uint sz= MY_CS_TO_UNI_TABLE_SIZE*sizeof(uint16);  | 
207  | 0  |     if (!(to->tab_to_uni= (uint16*)  my_once_memdup((char*)from->tab_to_uni,  | 
208  | 0  |                 sz, MYF(MY_WME))))  | 
209  | 0  |       goto err;  | 
210  | 0  |   }  | 
211  | 0  |   if (from->tailoring)  | 
212  | 0  |     if (!(to->tailoring= my_once_strdup(from->tailoring,MYF(MY_WME))))  | 
213  | 0  |       goto err;  | 
214  |  |  | 
215  | 0  |   return 0;  | 
216  |  |  | 
217  | 0  | err:  | 
218  | 0  |   return 1;  | 
219  | 0  | }  | 
220  |  |  | 
221  |  |  | 
222  |  | static my_bool simple_8bit_charset_data_is_full(CHARSET_INFO *cs)  | 
223  | 0  | { | 
224  | 0  |   return cs->m_ctype && cs->to_upper && cs->to_lower && cs->tab_to_uni;  | 
225  | 0  | }  | 
226  |  |  | 
227  |  |  | 
228  |  | /**  | 
229  |  |   Inherit missing 8bit charset data from another collation.  | 
230  |  |   Arrays pointed by refcs must be in the permanent memory already,  | 
231  |  |   e.g. static memory, or allocated by my_once_xxx().  | 
232  |  | */  | 
233  |  | static void  | 
234  |  | inherit_charset_data(struct charset_info_st *cs, CHARSET_INFO *refcs)  | 
235  | 0  | { | 
236  | 0  |   if (!cs->to_upper)  | 
237  | 0  |     cs->to_upper= refcs->to_upper;  | 
238  | 0  |   if (!cs->to_lower)  | 
239  | 0  |     cs->to_lower= refcs->to_lower;  | 
240  | 0  |   if (!cs->m_ctype)  | 
241  | 0  |     cs->m_ctype= refcs->m_ctype;  | 
242  | 0  |   if (!cs->tab_to_uni)  | 
243  | 0  |     cs->tab_to_uni= refcs->tab_to_uni;  | 
244  | 0  | }  | 
245  |  |  | 
246  |  |  | 
247  |  | static my_bool simple_8bit_collation_data_is_full(CHARSET_INFO *cs)  | 
248  | 0  | { | 
249  | 0  |   return cs->sort_order || (cs->state & MY_CS_BINSORT);  | 
250  | 0  | }  | 
251  |  |  | 
252  |  |  | 
253  |  | /**  | 
254  |  |   Inherit 8bit simple collation data from another collation.  | 
255  |  |   refcs->sort_order must be in the permanent memory already,  | 
256  |  |   e.g. static memory, or allocated by my_once_xxx().  | 
257  |  | */  | 
258  |  | static void  | 
259  |  | inherit_collation_data(struct charset_info_st *cs, CHARSET_INFO *refcs)  | 
260  | 0  | { | 
261  | 0  |   if (!simple_8bit_collation_data_is_full(cs))  | 
262  | 0  |     cs->sort_order= refcs->sort_order;  | 
263  | 0  | }  | 
264  |  |  | 
265  |  |  | 
266  |  | static my_bool simple_cs_is_full(CHARSET_INFO *cs)  | 
267  | 0  | { | 
268  | 0  |   return  cs->number && cs->cs_name.str && cs->coll_name.str &&  | 
269  | 0  |           simple_8bit_charset_data_is_full(cs) &&  | 
270  | 0  |           (simple_8bit_collation_data_is_full(cs) || cs->tailoring);  | 
271  | 0  | }  | 
272  |  |  | 
273  |  |  | 
274  |  | #if defined(HAVE_UCA_COLLATIONS) && (defined(HAVE_CHARSET_ucs2) || defined(HAVE_CHARSET_utf8mb3))  | 
275  |  | /**  | 
276  |  |   Initialize a loaded collation.  | 
277  |  |   @param [OUT] to     - The new charset_info_st structure to initialize.  | 
278  |  |   @param [IN]  from   - A template collation, to fill the missing data from.  | 
279  |  |   @param [IN]  loaded - The collation data loaded from the LDML file.  | 
280  |  |                         some data may be missing in "loaded".  | 
281  |  | */  | 
282  |  | static void  | 
283  |  | copy_uca_collation(struct charset_info_st *to, CHARSET_INFO *from,  | 
284  |  |                    CHARSET_INFO *loaded)  | 
285  | 0  | { | 
286  | 0  |   to->cset= from->cset;  | 
287  | 0  |   to->coll= from->coll;  | 
288  |  |   /*  | 
289  |  |     Single-level UCA collation have strnxfrm_multiple=8.  | 
290  |  |     In case of a multi-level UCA collation we use strnxfrm_multiply=4.  | 
291  |  |     That means MY_COLLATION_HANDLER::strnfrmlen() will request the caller  | 
292  |  |     to allocate a buffer smaller size for each level, for performance purpose,  | 
293  |  |     and to fit longer VARCHARs to @@max_sort_length.  | 
294  |  |     This makes filesort produce non-precise order for some rare Unicode  | 
295  |  |     characters that produce more than 4 weights (long expansions).  | 
296  |  |     UCA requires 2 bytes per weight multiplied by the number of levels.  | 
297  |  |     In case of a 2-level collation, each character requires 4*2=8 bytes.  | 
298  |  |     Therefore, the longest VARCHAR that fits into the default @@max_sort_length  | 
299  |  |     is 1024/8=VARCHAR(128). With strnxfrm_multiply==8, only VARCHAR(64)  | 
300  |  |     would fit.  | 
301  |  |     Note, the built-in collation utf8_thai_520_w2 also uses strnxfrm_multiply=4,  | 
302  |  |     for the same purpose.  | 
303  |  |     TODO: we could add a new LDML syntax to choose strxfrm_multiply value.  | 
304  |  |   */  | 
305  | 0  |   to->strxfrm_multiply= loaded->levels_for_order > 1 ?  | 
306  | 0  |                         4 : from->strxfrm_multiply;  | 
307  | 0  |   to->min_sort_char= from->min_sort_char;  | 
308  | 0  |   to->max_sort_char= from->max_sort_char;  | 
309  | 0  |   to->mbminlen= from->mbminlen;  | 
310  | 0  |   to->mbmaxlen= from->mbmaxlen;  | 
311  | 0  |   to->state|= MY_CS_AVAILABLE | MY_CS_LOADED |  | 
312  | 0  |               MY_CS_STRNXFRM  | MY_CS_UNICODE;  | 
313  | 0  | }  | 
314  |  | #endif  | 
315  |  |  | 
316  |  |  | 
317  |  | static int add_collation(struct charset_info_st *cs)  | 
318  | 0  | { | 
319  | 0  |   if (cs->coll_name.str &&  | 
320  | 0  |       (cs->number ||  | 
321  | 0  |        (cs->number=get_collation_number_internal(cs->coll_name.str))) &&  | 
322  | 0  |       cs->number < array_elements(all_charsets))  | 
323  | 0  |   { | 
324  | 0  |     struct charset_info_st *newcs;  | 
325  | 0  |     if (!(newcs= (struct charset_info_st*) all_charsets[cs->number]))  | 
326  | 0  |     { | 
327  | 0  |       if (!(all_charsets[cs->number]= newcs=  | 
328  | 0  |          (struct charset_info_st*) my_once_alloc(sizeof(CHARSET_INFO),MYF(0))))  | 
329  | 0  |         return MY_XML_ERROR;  | 
330  | 0  |       bzero(newcs,sizeof(CHARSET_INFO));  | 
331  | 0  |     }  | 
332  | 0  |     else  | 
333  | 0  |     { | 
334  |  |       /* Don't allow change of csname */  | 
335  | 0  |       if (newcs->cs_name.str && strcmp(newcs->cs_name.str, cs->cs_name.str))  | 
336  | 0  |       { | 
337  | 0  |         my_error(EE_DUPLICATE_CHARSET, MYF(ME_WARNING),  | 
338  | 0  |                  cs->number, cs->cs_name.str, newcs->cs_name.str);  | 
339  |  |         /*  | 
340  |  |           Continue parsing rest of Index.xml. We got an warning in the log  | 
341  |  |           so the user can fix the wrong character set definition.  | 
342  |  |         */  | 
343  | 0  |         return MY_XML_OK;  | 
344  | 0  |       }  | 
345  | 0  |     }  | 
346  |  |  | 
347  | 0  |     if (cs->primary_number == cs->number)  | 
348  | 0  |       cs->state |= MY_CS_PRIMARY;  | 
349  |  |         | 
350  | 0  |     if (cs->binary_number == cs->number)  | 
351  | 0  |       cs->state |= MY_CS_BINSORT;  | 
352  |  |       | 
353  | 0  |     newcs->state|= cs->state;  | 
354  |  |       | 
355  | 0  |     if (!(newcs->state & MY_CS_COMPILED))  | 
356  | 0  |     { | 
357  | 0  |       if (cs_copy_data(newcs,cs))  | 
358  | 0  |         return MY_XML_ERROR;  | 
359  |  |  | 
360  | 0  |       newcs->levels_for_order= 1;  | 
361  |  |         | 
362  | 0  |       if (!strcmp(cs->cs_name.str,"ucs2") )  | 
363  | 0  |       { | 
364  | 0  | #if defined(HAVE_CHARSET_ucs2) && defined(HAVE_UCA_COLLATIONS)  | 
365  | 0  |         copy_uca_collation(newcs, newcs->state & MY_CS_NOPAD ?  | 
366  | 0  |                                   &my_charset_ucs2_unicode_nopad_ci :  | 
367  | 0  |                                   &my_charset_ucs2_unicode_ci,  | 
368  | 0  |                                   cs);  | 
369  | 0  |         newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;  | 
370  | 0  | #endif          | 
371  | 0  |       }  | 
372  | 0  |       else if (!strcmp(cs->cs_name.str, "utf8") ||  | 
373  | 0  |                !strcmp(cs->cs_name.str, "utf8mb3"))  | 
374  | 0  |       { | 
375  | 0  | #if defined (HAVE_CHARSET_utf8mb3) && defined(HAVE_UCA_COLLATIONS)  | 
376  | 0  |         copy_uca_collation(newcs, newcs->state & MY_CS_NOPAD ?  | 
377  | 0  |                                   &my_charset_utf8mb3_unicode_nopad_ci :  | 
378  | 0  |                                   &my_charset_utf8mb3_unicode_ci,  | 
379  | 0  |                                   cs);  | 
380  | 0  |         newcs->m_ctype= my_charset_utf8mb3_unicode_ci.m_ctype;  | 
381  | 0  |         if (init_state_maps(newcs))  | 
382  | 0  |           return MY_XML_ERROR;  | 
383  | 0  | #endif  | 
384  | 0  |       }  | 
385  | 0  |       else if (!strcmp(cs->cs_name.str, "utf8mb4"))  | 
386  | 0  |       { | 
387  | 0  | #if defined (HAVE_CHARSET_utf8mb4) && defined(HAVE_UCA_COLLATIONS)  | 
388  | 0  |         copy_uca_collation(newcs, newcs->state & MY_CS_NOPAD ?  | 
389  | 0  |                                   &my_charset_utf8mb4_unicode_nopad_ci :  | 
390  | 0  |                                   &my_charset_utf8mb4_unicode_ci,  | 
391  | 0  |                                   cs);  | 
392  | 0  |         newcs->m_ctype= my_charset_utf8mb4_unicode_ci.m_ctype;  | 
393  | 0  |         if (init_state_maps(newcs))  | 
394  | 0  |           return MY_XML_ERROR;  | 
395  | 0  |         newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED;  | 
396  | 0  | #endif  | 
397  | 0  |       }  | 
398  | 0  |       else if (!strcmp(cs->cs_name.str, "utf16"))  | 
399  | 0  |       { | 
400  | 0  | #if defined (HAVE_CHARSET_utf16) && defined(HAVE_UCA_COLLATIONS)  | 
401  | 0  |         copy_uca_collation(newcs, newcs->state & MY_CS_NOPAD ?  | 
402  | 0  |                                   &my_charset_utf16_unicode_nopad_ci :  | 
403  | 0  |                                   &my_charset_utf16_unicode_ci,  | 
404  | 0  |                                   cs);  | 
405  | 0  |         newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;  | 
406  | 0  | #endif  | 
407  | 0  |       }  | 
408  | 0  |       else if (!strcmp(cs->cs_name.str, "utf32"))  | 
409  | 0  |       { | 
410  | 0  | #if defined (HAVE_CHARSET_utf32) && defined(HAVE_UCA_COLLATIONS)  | 
411  | 0  |         copy_uca_collation(newcs, newcs->state & MY_CS_NOPAD ?  | 
412  | 0  |                                   &my_charset_utf32_unicode_nopad_ci :  | 
413  | 0  |                                   &my_charset_utf32_unicode_ci,  | 
414  | 0  |                                   cs);  | 
415  | 0  |         newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;  | 
416  | 0  | #endif  | 
417  | 0  |       }  | 
418  | 0  |       else  | 
419  | 0  |       { | 
420  | 0  |         simple_cs_init_functions(newcs);  | 
421  | 0  |         newcs->mbminlen= 1;  | 
422  | 0  |         newcs->mbmaxlen= 1;  | 
423  | 0  |         newcs->strxfrm_multiply= 1;  | 
424  | 0  |         if (simple_cs_is_full(newcs))  | 
425  | 0  |         { | 
426  | 0  |           newcs->state |= MY_CS_LOADED;  | 
427  | 0  |         }  | 
428  | 0  |       }  | 
429  | 0  |       add_compiled_extra_collation(newcs);  | 
430  | 0  |     }  | 
431  | 0  |     else  | 
432  | 0  |     { | 
433  |  |       /*  | 
434  |  |         We need the below to make get_charset_name()  | 
435  |  |         and get_charset_number() working even if a  | 
436  |  |         character set has not been really incompiled.  | 
437  |  |         The above functions are used for example  | 
438  |  |         in error message compiler extra/comp_err.c.  | 
439  |  |         If a character set was compiled, this information  | 
440  |  |         will get lost and overwritten in add_compiled_collation().  | 
441  |  |       */  | 
442  | 0  |       newcs->number= cs->number;  | 
443  | 0  |       if (cs->comment)  | 
444  | 0  |   if (!(newcs->comment= my_once_strdup(cs->comment,MYF(MY_WME))))  | 
445  | 0  |     return MY_XML_ERROR;  | 
446  | 0  |       if (cs->cs_name.str && ! newcs->cs_name.str)  | 
447  | 0  |       { | 
448  | 0  |         if (!(newcs->cs_name.str= my_once_memdup(cs->cs_name.str,  | 
449  | 0  |                                                  cs->cs_name.length+1,  | 
450  | 0  |                                                  MYF(MY_WME))))  | 
451  | 0  |     return MY_XML_ERROR;  | 
452  | 0  |         newcs->cs_name.length= cs->cs_name.length;  | 
453  | 0  |       }  | 
454  | 0  |       if (cs->coll_name.str)  | 
455  | 0  |       { | 
456  | 0  |   if (!(newcs->coll_name.str= my_once_memdup(cs->coll_name.str,  | 
457  | 0  |                                                    cs->coll_name.length+1,  | 
458  | 0  |                                                   MYF(MY_WME))))  | 
459  | 0  |     return MY_XML_ERROR;  | 
460  | 0  |         newcs->coll_name.length= cs->coll_name.length;  | 
461  | 0  |       }  | 
462  | 0  |     }  | 
463  | 0  |     cs->number= 0;  | 
464  | 0  |     cs->primary_number= 0;  | 
465  | 0  |     cs->binary_number= 0;  | 
466  | 0  |     cs->coll_name.str= 0;  | 
467  | 0  |     cs->coll_name.length= 0;  | 
468  | 0  |     cs->state= 0;  | 
469  | 0  |     cs->sort_order= NULL;  | 
470  | 0  |     cs->tailoring= NULL;  | 
471  | 0  |   }  | 
472  | 0  |   return MY_XML_OK;  | 
473  | 0  | }  | 
474  |  |  | 
475  |  |  | 
476  |  | /**  | 
477  |  |   Report character set initialization errors and warnings.  | 
478  |  |   Be silent by default: no warnings on the client side.  | 
479  |  | */  | 
480  |  | static void  | 
481  |  | default_reporter(enum loglevel level  __attribute__ ((unused)),  | 
482  |  |                  const char *format  __attribute__ ((unused)),  | 
483  |  |                  ...)  | 
484  | 0  | { | 
485  | 0  | }  | 
486  |  | my_error_reporter my_charset_error_reporter= default_reporter;  | 
487  |  |  | 
488  |  |  | 
489  |  | /**  | 
490  |  |   Wrappers for memory functions my_malloc (and friends)  | 
491  |  |   with C-compatbile API without extra "myf" argument.  | 
492  |  | */  | 
493  |  | static void *  | 
494  |  | my_once_alloc_c(size_t size)  | 
495  | 0  | { return my_once_alloc(size, MYF(MY_WME)); } | 
496  |  |  | 
497  |  |  | 
498  |  | static void *  | 
499  |  | my_malloc_c(size_t size)  | 
500  | 0  | { return my_malloc(key_memory_charset_loader, size, MYF(MY_WME)); } | 
501  |  |  | 
502  |  |  | 
503  |  | static void *  | 
504  |  | my_realloc_c(void *old, size_t size)  | 
505  | 0  | { return my_realloc(key_memory_charset_loader, old, size, MYF(MY_WME|MY_ALLOW_ZERO_PTR)); } | 
506  |  |  | 
507  |  |  | 
508  |  | /**  | 
509  |  |   Initialize character set loader to use mysys memory management functions.  | 
510  |  |   @param loader  Loader to initialize  | 
511  |  | */  | 
512  |  | void  | 
513  |  | my_charset_loader_init_mysys(MY_CHARSET_LOADER *loader)  | 
514  | 0  | { | 
515  | 0  |   loader->error[0]= '\0';  | 
516  | 0  |   loader->once_alloc= my_once_alloc_c;  | 
517  | 0  |   loader->malloc= my_malloc_c;  | 
518  | 0  |   loader->realloc= my_realloc_c;  | 
519  | 0  |   loader->free= my_free;  | 
520  | 0  |   loader->reporter= my_charset_error_reporter;  | 
521  | 0  |   loader->add_collation= add_collation;  | 
522  | 0  | }  | 
523  |  |  | 
524  |  |  | 
525  | 0  | #define MY_MAX_ALLOWED_BUF 1024*1024  | 
526  |  | #define MY_CHARSET_INDEX "Index.xml"  | 
527  |  |  | 
528  |  | const char *charsets_dir= NULL;  | 
529  |  |  | 
530  |  |  | 
531  |  | static my_bool  | 
532  |  | my_read_charset_file(MY_CHARSET_LOADER *loader,  | 
533  |  |                      const char *filename,  | 
534  |  |                      myf myflags)  | 
535  | 0  | { | 
536  | 0  |   uchar *buf;  | 
537  | 0  |   int  fd;  | 
538  | 0  |   size_t len, tmp_len;  | 
539  | 0  |   MY_STAT stat_info;  | 
540  |  |     | 
541  | 0  |   if (!my_stat(filename, &stat_info, MYF(myflags)) ||  | 
542  | 0  |        ((len= (uint)stat_info.st_size) > MY_MAX_ALLOWED_BUF) ||  | 
543  | 0  |        !(buf= (uchar*) my_malloc(key_memory_charset_loader,len,myflags)))  | 
544  | 0  |     return TRUE;  | 
545  |  |     | 
546  | 0  |   if ((fd= mysql_file_open(key_file_charset, filename, O_RDONLY, myflags)) < 0)  | 
547  | 0  |     goto error;  | 
548  | 0  |   tmp_len= mysql_file_read(fd, buf, len, myflags);  | 
549  | 0  |   mysql_file_close(fd, myflags);  | 
550  | 0  |   if (tmp_len != len)  | 
551  | 0  |     goto error;  | 
552  |  |     | 
553  | 0  |   if (my_parse_charset_xml(loader, (char *) buf, len))  | 
554  | 0  |   { | 
555  | 0  |     my_printf_error(EE_UNKNOWN_CHARSET, "Error while parsing '%s': %s\n",  | 
556  | 0  |                     MYF(0), filename, loader->error);  | 
557  | 0  |     goto error;  | 
558  | 0  |   }  | 
559  |  |     | 
560  | 0  |   my_free(buf);  | 
561  | 0  |   return FALSE;  | 
562  |  |  | 
563  | 0  | error:  | 
564  | 0  |   my_free(buf);  | 
565  | 0  |   return TRUE;  | 
566  | 0  | }  | 
567  |  |  | 
568  |  |  | 
569  |  | char *get_charsets_dir(char *buf)  | 
570  | 0  | { | 
571  | 0  |   const char *sharedir= SHAREDIR;  | 
572  | 0  |   char *res;  | 
573  | 0  |   DBUG_ENTER("get_charsets_dir"); | 
574  |  | 
  | 
575  | 0  |   if (charsets_dir != NULL)  | 
576  | 0  |     strmake(buf, charsets_dir, FN_REFLEN-1);  | 
577  | 0  |   else  | 
578  | 0  |   { | 
579  | 0  |     if (test_if_hard_path(sharedir) ||  | 
580  | 0  |   is_prefix(sharedir, DEFAULT_CHARSET_HOME))  | 
581  | 0  |       strxmov(buf, sharedir, "/", CHARSET_DIR, NullS);  | 
582  | 0  |     else  | 
583  | 0  |       strxmov(buf, DEFAULT_CHARSET_HOME, "/", sharedir, "/", CHARSET_DIR,  | 
584  | 0  |         NullS);  | 
585  | 0  |   }  | 
586  | 0  |   res= convert_dirname(buf,buf,NullS);  | 
587  | 0  |   DBUG_PRINT("info",("charsets dir: '%s'", buf)); | 
588  | 0  |   DBUG_RETURN(res);  | 
589  | 0  | }  | 
590  |  |  | 
591  |  | CHARSET_INFO *all_charsets[MY_ALL_CHARSETS_SIZE]={NULL}; | 
592  |  | CHARSET_INFO *default_charset_info = &my_charset_latin1;  | 
593  |  |  | 
594  |  |  | 
595  |  | /*  | 
596  |  |   Add standard character set compiled into the application  | 
597  |  |   All related character sets should share same cname  | 
598  |  | */  | 
599  |  |  | 
600  |  | void add_compiled_collation(struct charset_info_st *cs)  | 
601  | 0  | { | 
602  | 0  |   DBUG_ASSERT(cs->number < array_elements(all_charsets));  | 
603  | 0  |   all_charsets[cs->number]= cs;  | 
604  | 0  |   cs->state|= MY_CS_AVAILABLE;  | 
605  | 0  |   if ((my_hash_insert(&charset_name_hash, (uchar*) cs)))  | 
606  | 0  |   { | 
607  |  | #ifndef DBUG_OFF  | 
608  |  |     CHARSET_INFO *org= (CHARSET_INFO*) my_hash_search(&charset_name_hash,  | 
609  |  |                                                       (uchar*) cs->cs_name.str,  | 
610  |  |                                                       cs->cs_name.length);  | 
611  |  |     DBUG_ASSERT(org);  | 
612  |  |     DBUG_ASSERT(org->cs_name.str == cs->cs_name.str);  | 
613  |  |     DBUG_ASSERT(org->cs_name.length == strlen(cs->cs_name.str));  | 
614  |  | #endif  | 
615  | 0  |   }  | 
616  | 0  | }  | 
617  |  |  | 
618  |  |  | 
619  |  | /*  | 
620  |  |   Add optional characters sets from ctype-extra.c  | 
621  |  |  | 
622  |  |   If cname is already in use, replace csname in new object with a pointer to  | 
623  |  |   the already used csname to ensure that all csname's points to the same string  | 
624  |  |   for the same character set.  | 
625  |  | */  | 
626  |  |  | 
627  |  |  | 
628  |  | void add_compiled_extra_collation(struct charset_info_st *cs)  | 
629  | 0  | { | 
630  | 0  |   DBUG_ASSERT(cs->number < array_elements(all_charsets));  | 
631  | 0  |   all_charsets[cs->number]= cs;  | 
632  | 0  |   cs->state|= MY_CS_AVAILABLE;  | 
633  | 0  |   if ((my_hash_insert(&charset_name_hash, (uchar*) cs)))  | 
634  | 0  |   { | 
635  | 0  |     CHARSET_INFO *org= (CHARSET_INFO*) my_hash_search(&charset_name_hash,  | 
636  | 0  |                                                       (uchar*) cs->cs_name.str,  | 
637  | 0  |                                                       cs->cs_name.length);  | 
638  | 0  |     cs->cs_name= org->cs_name;  | 
639  | 0  |   }  | 
640  | 0  | }  | 
641  |  |  | 
642  |  |  | 
643  |  |  | 
644  |  | static my_pthread_once_t charsets_initialized= MY_PTHREAD_ONCE_INIT;  | 
645  |  | static my_pthread_once_t charsets_template= MY_PTHREAD_ONCE_INIT;  | 
646  |  |  | 
647  |  | typedef struct  | 
648  |  | { | 
649  |  |   ulonglong use_count;  | 
650  |  | } MY_COLLATION_STATISTICS;  | 
651  |  |  | 
652  |  |  | 
653  |  | static MY_COLLATION_STATISTICS my_collation_statistics[MY_ALL_CHARSETS_SIZE];  | 
654  |  |  | 
655  |  |  | 
656  |  | my_bool my_collation_is_known_id(uint id)  | 
657  | 0  | { | 
658  | 0  |   return id > 0 && id < array_elements(all_charsets) && all_charsets[id] ?  | 
659  | 0  |          TRUE : FALSE;  | 
660  | 0  | }  | 
661  |  |  | 
662  |  |  | 
663  |  | /*  | 
664  |  |   Collation use statistics functions do not lock  | 
665  |  |   counters to avoid mutex contention. This can lose  | 
666  |  |   some counter increments with high thread concurrency.  | 
667  |  |   But this should be Ok, as we don't need exact numbers.  | 
668  |  | */  | 
669  |  | static inline void my_collation_statistics_inc_use_count(uint id)  | 
670  | 0  | { | 
671  | 0  |   DBUG_ASSERT(my_collation_is_known_id(id));  | 
672  | 0  |   my_collation_statistics[id].use_count++;  | 
673  | 0  | }  | 
674  |  |  | 
675  |  |  | 
676  |  | ulonglong my_collation_statistics_get_use_count(uint id)  | 
677  | 0  | { | 
678  | 0  |   DBUG_ASSERT(my_collation_is_known_id(id));  | 
679  | 0  |   return my_collation_statistics[id].use_count;  | 
680  | 0  | }  | 
681  |  |  | 
682  |  |  | 
683  |  | const char *my_collation_get_tailoring(uint id)  | 
684  | 0  | { | 
685  |  |   /* all_charsets[id]->tailoring is never changed after server startup. */  | 
686  | 0  |   DBUG_ASSERT(my_collation_is_known_id(id));  | 
687  | 0  |   return all_charsets[id]->tailoring;  | 
688  | 0  | }  | 
689  |  |  | 
690  |  |  | 
691  |  | HASH charset_name_hash;  | 
692  |  |  | 
693  |  | static uchar *get_charset_key(const uchar *object,  | 
694  |  |                               size_t *size,  | 
695  |  |                               my_bool not_used __attribute__((unused)))  | 
696  | 0  | { | 
697  | 0  |   CHARSET_INFO *cs= (CHARSET_INFO*) object;  | 
698  | 0  |   *size= cs->cs_name.length;  | 
699  | 0  |   return (uchar*) cs->cs_name.str;  | 
700  | 0  | }  | 
701  |  |  | 
702  |  | static void init_available_charsets(void)  | 
703  | 0  | { | 
704  | 0  |   char fname[FN_REFLEN + sizeof(MY_CHARSET_INDEX)];  | 
705  | 0  |   struct charset_info_st **cs;  | 
706  | 0  |   MY_CHARSET_LOADER loader;  | 
707  | 0  |   DBUG_ENTER("init_available_charsets"); | 
708  |  | 
  | 
709  | 0  |   bzero((char*) &all_charsets,sizeof(all_charsets));  | 
710  | 0  |   bzero((char*) &my_collation_statistics, sizeof(my_collation_statistics));  | 
711  |  | 
  | 
712  | 0  |   my_hash_init2(key_memory_charsets, &charset_name_hash, 16,  | 
713  | 0  |                 &my_charset_latin1, 64, 0, 0, get_charset_key,  | 
714  | 0  |                 0, 0, HASH_UNIQUE);  | 
715  |  | 
  | 
716  | 0  |   init_compiled_charsets(MYF(0));  | 
717  |  |  | 
718  |  |   /* Copy compiled charsets */  | 
719  | 0  |   for (cs= (struct charset_info_st**) all_charsets;  | 
720  | 0  |        cs < (struct charset_info_st**) all_charsets +  | 
721  | 0  |             array_elements(all_charsets)-1 ;  | 
722  | 0  |        cs++)  | 
723  | 0  |   { | 
724  | 0  |     if (*cs)  | 
725  | 0  |     { | 
726  | 0  |       DBUG_ASSERT(cs[0]->mbmaxlen <= MY_CS_MBMAXLEN);  | 
727  | 0  |       if (cs[0]->m_ctype)  | 
728  | 0  |         if (init_state_maps(*cs))  | 
729  | 0  |           *cs= NULL;  | 
730  | 0  |     }  | 
731  | 0  |   }  | 
732  |  | 
  | 
733  | 0  |   my_charset_loader_init_mysys(&loader);  | 
734  | 0  |   strmov(get_charsets_dir(fname), MY_CHARSET_INDEX);  | 
735  | 0  |   my_read_charset_file(&loader, fname, MYF(0));  | 
736  | 0  |   DBUG_VOID_RETURN;  | 
737  | 0  | }  | 
738  |  |  | 
739  |  |  | 
740  |  | void free_charsets(void)  | 
741  | 0  | { | 
742  | 0  |   charsets_initialized= charsets_template;  | 
743  | 0  |   my_hash_free(&charset_name_hash);  | 
744  | 0  | }  | 
745  |  |  | 
746  |  |  | 
747  |  | static const char*  | 
748  |  | get_collation_name_alias(const char *name, char *buf, size_t bufsize, myf flags)  | 
749  | 0  | { | 
750  | 0  |   if (!strncasecmp(name, "utf8_", 5))  | 
751  | 0  |   { | 
752  | 0  |     my_snprintf(buf, bufsize, "utf8mb%c_%s",  | 
753  | 0  |        flags & MY_UTF8_IS_UTF8MB3 ? '3' : '4', name + 5);  | 
754  | 0  |     return buf;  | 
755  | 0  |   }  | 
756  | 0  |   return NULL;  | 
757  | 0  | }  | 
758  |  |  | 
759  |  |  | 
760  |  | uint get_collation_number(const char *name, myf flags)  | 
761  | 0  | { | 
762  | 0  |   uint id;  | 
763  | 0  |   char alias[64];  | 
764  | 0  |   my_pthread_once(&charsets_initialized, init_available_charsets);  | 
765  | 0  |   if ((id= get_collation_number_internal(name)))  | 
766  | 0  |     return id;  | 
767  | 0  |   if ((name= get_collation_name_alias(name, alias, sizeof(alias),flags)))  | 
768  | 0  |     return get_collation_number_internal(name);  | 
769  | 0  |   return 0;  | 
770  | 0  | }  | 
771  |  |  | 
772  |  |  | 
773  |  | static uint  | 
774  |  | get_charset_number_internal(const char *charset_name, uint cs_flags)  | 
775  | 0  | { | 
776  | 0  |   CHARSET_INFO **cs;  | 
777  |  |     | 
778  | 0  |   for (cs= all_charsets;  | 
779  | 0  |        cs < all_charsets + array_elements(all_charsets);  | 
780  | 0  |        cs++)  | 
781  | 0  |   { | 
782  | 0  |     if ( cs[0] && cs[0]->cs_name.str && (cs[0]->state & cs_flags) &&  | 
783  | 0  |          !my_strcasecmp_latin1(cs[0]->cs_name.str, charset_name))  | 
784  | 0  |       return cs[0]->number;  | 
785  | 0  |   }    | 
786  | 0  |   return 0;  | 
787  | 0  | }  | 
788  |  |  | 
789  |  |  | 
790  |  | uint get_charset_number(const char *charset_name, uint cs_flags, myf flags)  | 
791  | 0  | { | 
792  | 0  |   uint id;  | 
793  | 0  |   const char *new_charset_name= flags & MY_UTF8_IS_UTF8MB3 ? "utf8mb3" :  | 
794  | 0  |                                                              "utf8mb4";  | 
795  | 0  |   my_pthread_once(&charsets_initialized, init_available_charsets);  | 
796  | 0  |   if ((id= get_charset_number_internal(charset_name, cs_flags)))  | 
797  | 0  |     return id;  | 
798  | 0  |   if ((charset_name= !my_strcasecmp_latin1(charset_name, "utf8") ?  | 
799  | 0  |                       new_charset_name : NULL))  | 
800  | 0  |     return get_charset_number_internal(charset_name, cs_flags);  | 
801  | 0  |   return 0;  | 
802  | 0  | }  | 
803  |  |                     | 
804  |  |  | 
805  |  | const char *get_charset_name(uint charset_number)  | 
806  | 0  | { | 
807  | 0  |   my_pthread_once(&charsets_initialized, init_available_charsets);  | 
808  |  | 
  | 
809  | 0  |   if (charset_number < array_elements(all_charsets))  | 
810  | 0  |   { | 
811  | 0  |     CHARSET_INFO *cs= all_charsets[charset_number];  | 
812  |  | 
  | 
813  | 0  |     if (cs && (cs->number == charset_number) && cs->coll_name.str)  | 
814  | 0  |       return cs->coll_name.str;  | 
815  | 0  |   }  | 
816  |  |     | 
817  | 0  |   return "?";   /* this mimics find_type() */  | 
818  | 0  | }  | 
819  |  |  | 
820  |  |  | 
821  |  | static CHARSET_INFO *inheritance_source_by_id(CHARSET_INFO *cs, uint refid)  | 
822  | 0  | { | 
823  | 0  |   CHARSET_INFO *refcs;  | 
824  | 0  |   return refid && refid != cs->number &&  | 
825  | 0  |          (refcs= all_charsets[refid]) &&  | 
826  | 0  |          (refcs->state & MY_CS_AVAILABLE) ? refcs : NULL;  | 
827  | 0  | }  | 
828  |  |  | 
829  |  |  | 
830  |  | static CHARSET_INFO *find_collation_data_inheritance_source(CHARSET_INFO *cs, myf flags)  | 
831  | 0  | { | 
832  | 0  |   const char *beg, *end;  | 
833  | 0  |   if (cs->tailoring &&  | 
834  | 0  |       !strncmp(cs->tailoring, "[import ", 8) &&  | 
835  | 0  |       (end= strchr(cs->tailoring + 8, ']')) &&  | 
836  | 0  |       (beg= cs->tailoring + 8) + MY_CS_COLLATION_NAME_SIZE > end)  | 
837  | 0  |   { | 
838  | 0  |     char name[MY_CS_COLLATION_NAME_SIZE + 1];  | 
839  | 0  |     memcpy(name, beg, end - beg);  | 
840  | 0  |     name[end - beg]= '\0';  | 
841  | 0  |     return inheritance_source_by_id(cs, get_collation_number(name,MYF(flags)));  | 
842  | 0  |   }  | 
843  | 0  |   return NULL;  | 
844  | 0  | }  | 
845  |  |  | 
846  |  |  | 
847  |  | static CHARSET_INFO *find_charset_data_inheritance_source(CHARSET_INFO *cs)  | 
848  | 0  | { | 
849  | 0  |   uint refid= get_charset_number_internal(cs->cs_name.str, MY_CS_PRIMARY);  | 
850  | 0  |   return inheritance_source_by_id(cs, refid);  | 
851  | 0  | }  | 
852  |  |  | 
853  |  |  | 
854  |  | static CHARSET_INFO *  | 
855  |  | get_internal_charset(MY_CHARSET_LOADER *loader, uint cs_number, myf flags)  | 
856  | 0  | { | 
857  | 0  |   char  buf[FN_REFLEN];  | 
858  | 0  |   struct charset_info_st *cs;  | 
859  |  | 
  | 
860  | 0  |   DBUG_ASSERT(cs_number < array_elements(all_charsets));  | 
861  |  | 
  | 
862  | 0  |   if ((cs= (struct charset_info_st*) all_charsets[cs_number]))  | 
863  | 0  |   { | 
864  | 0  |     if (cs->state & MY_CS_READY)  /* if CS is already initialized */  | 
865  | 0  |     { | 
866  | 0  |       my_collation_statistics_inc_use_count(cs_number);  | 
867  | 0  |       return cs;  | 
868  | 0  |     }  | 
869  |  |  | 
870  |  |     /*  | 
871  |  |       To make things thread safe we are not allowing other threads to interfere  | 
872  |  |       while we may changing the cs_info_table  | 
873  |  |     */  | 
874  | 0  |     mysql_mutex_lock(&THR_LOCK_charset);  | 
875  |  | 
  | 
876  | 0  |     if (!(cs->state & (MY_CS_COMPILED|MY_CS_LOADED))) /* if CS is not in memory */  | 
877  | 0  |     { | 
878  | 0  |       MY_CHARSET_LOADER loader;  | 
879  | 0  |       strxmov(get_charsets_dir(buf), cs->cs_name.str, ".xml", NullS);  | 
880  | 0  |       my_charset_loader_init_mysys(&loader);  | 
881  | 0  |       my_read_charset_file(&loader, buf, flags);  | 
882  | 0  |     }  | 
883  |  | 
  | 
884  | 0  |     if (cs->state & MY_CS_AVAILABLE)  | 
885  | 0  |     { | 
886  | 0  |       if (!(cs->state & MY_CS_READY))  | 
887  | 0  |       { | 
888  | 0  |         if (!simple_8bit_charset_data_is_full(cs))  | 
889  | 0  |         { | 
890  | 0  |           CHARSET_INFO *refcs= find_charset_data_inheritance_source(cs);  | 
891  | 0  |           if (refcs)  | 
892  | 0  |             inherit_charset_data(cs, refcs);  | 
893  | 0  |         }  | 
894  | 0  |         if (!simple_8bit_collation_data_is_full(cs))  | 
895  | 0  |         { | 
896  | 0  |           CHARSET_INFO *refcl= find_collation_data_inheritance_source(cs, flags);  | 
897  | 0  |           if (refcl)  | 
898  | 0  |             inherit_collation_data(cs, refcl);  | 
899  | 0  |         }  | 
900  |  | 
  | 
901  | 0  |         if (my_ci_init_charset(cs, loader) ||  | 
902  | 0  |             my_ci_init_collation(cs, loader))  | 
903  | 0  |         { | 
904  | 0  |           cs= NULL;  | 
905  | 0  |         }  | 
906  | 0  |         else  | 
907  | 0  |           cs->state|= MY_CS_READY;  | 
908  | 0  |       }  | 
909  | 0  |       my_collation_statistics_inc_use_count(cs_number);  | 
910  | 0  |     }  | 
911  | 0  |     else  | 
912  | 0  |       cs= NULL;  | 
913  |  | 
  | 
914  | 0  |     mysql_mutex_unlock(&THR_LOCK_charset);  | 
915  | 0  |   }  | 
916  | 0  |   return cs;  | 
917  | 0  | }  | 
918  |  |  | 
919  |  |  | 
920  |  | CHARSET_INFO *get_charset(uint cs_number, myf flags)  | 
921  | 0  | { | 
922  | 0  |   CHARSET_INFO *cs= NULL;  | 
923  |  | 
  | 
924  | 0  |   if (cs_number == default_charset_info->number)  | 
925  | 0  |     return default_charset_info;  | 
926  |  |  | 
927  | 0  |   my_pthread_once(&charsets_initialized, init_available_charsets);  | 
928  |  | 
  | 
929  | 0  |   if (cs_number < array_elements(all_charsets))  | 
930  | 0  |   { | 
931  | 0  |     MY_CHARSET_LOADER loader;  | 
932  | 0  |     my_charset_loader_init_mysys(&loader);  | 
933  | 0  |     cs= get_internal_charset(&loader, cs_number, flags);  | 
934  | 0  |   }  | 
935  |  | 
  | 
936  | 0  |   if (!cs && (flags & MY_WME))  | 
937  | 0  |   { | 
938  | 0  |     char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)], cs_string[23];  | 
939  | 0  |     strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX);  | 
940  | 0  |     cs_string[0]='#';  | 
941  | 0  |     int10_to_str(cs_number, cs_string+1, 10);  | 
942  | 0  |     my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_string, index_file);  | 
943  | 0  |   }  | 
944  | 0  |   return cs;  | 
945  | 0  | }  | 
946  |  |  | 
947  |  |  | 
948  |  | /**  | 
949  |  |   Find collation by name: extended version of get_charset_by_name()  | 
950  |  |   to return error messages to the caller.  | 
951  |  |   @param   loader  Character set loader  | 
952  |  |   @param   name    Collation name  | 
953  |  |   @param   flags   Flags  | 
954  |  |   @return          NULL on error, pointer to collation on success  | 
955  |  | */  | 
956  |  |  | 
957  |  | CHARSET_INFO *  | 
958  |  | my_collation_get_by_name(MY_CHARSET_LOADER *loader,  | 
959  |  |                          const char *name, myf flags)  | 
960  | 0  | { | 
961  | 0  |   uint cs_number;  | 
962  | 0  |   CHARSET_INFO *cs;  | 
963  | 0  |   my_pthread_once(&charsets_initialized, init_available_charsets);  | 
964  |  | 
  | 
965  | 0  |   cs_number= get_collation_number(name,flags);  | 
966  | 0  |   my_charset_loader_init_mysys(loader);  | 
967  | 0  |   cs= cs_number ? get_internal_charset(loader, cs_number, flags) : NULL;  | 
968  |  | 
  | 
969  | 0  |   if (!cs && (flags & MY_WME))  | 
970  | 0  |   { | 
971  | 0  |     char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)];  | 
972  | 0  |     strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX);  | 
973  | 0  |     my_error(EE_UNKNOWN_COLLATION, MYF(ME_BELL), name, index_file);  | 
974  | 0  |   }  | 
975  | 0  |   return cs;  | 
976  | 0  | }  | 
977  |  |  | 
978  |  |  | 
979  |  | CHARSET_INFO *get_charset_by_name(const char *cs_name, myf flags)  | 
980  | 0  | { | 
981  | 0  |   MY_CHARSET_LOADER loader;  | 
982  | 0  |   my_charset_loader_init_mysys(&loader);  | 
983  | 0  |   return my_collation_get_by_name(&loader, cs_name, flags);  | 
984  | 0  | }  | 
985  |  |  | 
986  |  |  | 
987  |  | /**  | 
988  |  |   Find character set by name: extended version of get_charset_by_csname()  | 
989  |  |   to return error messages to the caller.  | 
990  |  |   @param   loader   Character set loader  | 
991  |  |   @param   name     Collation name  | 
992  |  |   @param   cs_flags Character set flags (e.g. default or binary collation)  | 
993  |  |   @param   flags    Flags  | 
994  |  |   @return           NULL on error, pointer to collation on success  | 
995  |  | */  | 
996  |  | CHARSET_INFO *  | 
997  |  | my_charset_get_by_name(MY_CHARSET_LOADER *loader,  | 
998  |  |                        const char *cs_name, uint cs_flags, myf flags)  | 
999  | 0  | { | 
1000  | 0  |   uint cs_number;  | 
1001  | 0  |   CHARSET_INFO *cs;  | 
1002  | 0  |   DBUG_ENTER("get_charset_by_csname"); | 
1003  | 0  |   DBUG_PRINT("enter",("name: '%s'", cs_name)); | 
1004  |  | 
  | 
1005  | 0  |   my_pthread_once(&charsets_initialized, init_available_charsets);  | 
1006  |  | 
  | 
1007  | 0  |   cs_number= get_charset_number(cs_name, cs_flags, flags);  | 
1008  | 0  |   cs= cs_number ? get_internal_charset(loader, cs_number, flags) : NULL;  | 
1009  |  | 
  | 
1010  | 0  |   if (!cs && (flags & MY_WME))  | 
1011  | 0  |   { | 
1012  | 0  |     char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)];  | 
1013  | 0  |     strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX);  | 
1014  | 0  |     my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_name, index_file);  | 
1015  | 0  |   }  | 
1016  |  | 
  | 
1017  | 0  |   DBUG_RETURN(cs);  | 
1018  | 0  | }  | 
1019  |  |  | 
1020  |  |  | 
1021  |  | CHARSET_INFO *  | 
1022  |  | get_charset_by_csname(const char *cs_name, uint cs_flags, myf flags)  | 
1023  | 0  | { | 
1024  | 0  |   MY_CHARSET_LOADER loader;  | 
1025  | 0  |   my_charset_loader_init_mysys(&loader);  | 
1026  | 0  |   return my_charset_get_by_name(&loader, cs_name, cs_flags, flags);  | 
1027  | 0  | }  | 
1028  |  |  | 
1029  |  |  | 
1030  |  | /**  | 
1031  |  |   Resolve character set by the character set name (utf8, latin1, ...).  | 
1032  |  |  | 
1033  |  |   The function tries to resolve character set by the specified name. If  | 
1034  |  |   there is character set with the given name, it is assigned to the "cs"  | 
1035  |  |   parameter and FALSE is returned. If there is no such character set,  | 
1036  |  |   "default_cs" is assigned to the "cs" and TRUE is returned.  | 
1037  |  |  | 
1038  |  |   @param[in] cs_name    Character set name.  | 
1039  |  |   @param[in] default_cs Default character set.  | 
1040  |  |   @param[out] cs        Variable to store character set.  | 
1041  |  |  | 
1042  |  |   @return FALSE if character set was resolved successfully; TRUE if there  | 
1043  |  |   is no character set with given name.  | 
1044  |  | */  | 
1045  |  |  | 
1046  |  | my_bool resolve_charset(const char *cs_name,  | 
1047  |  |                         CHARSET_INFO *default_cs,  | 
1048  |  |                         CHARSET_INFO **cs,  | 
1049  |  |                         myf flags)  | 
1050  | 0  | { | 
1051  | 0  |   *cs= get_charset_by_csname(cs_name, MY_CS_PRIMARY, flags);  | 
1052  |  | 
  | 
1053  | 0  |   if (*cs == NULL)  | 
1054  | 0  |   { | 
1055  | 0  |     *cs= default_cs;  | 
1056  | 0  |     return TRUE;  | 
1057  | 0  |   }  | 
1058  |  |  | 
1059  | 0  |   return FALSE;  | 
1060  | 0  | }  | 
1061  |  |  | 
1062  |  |  | 
1063  |  | /**  | 
1064  |  |   Resolve collation by the collation name (utf8_general_ci, ...).  | 
1065  |  |  | 
1066  |  |   The function tries to resolve collation by the specified name. If there  | 
1067  |  |   is collation with the given name, it is assigned to the "cl" parameter  | 
1068  |  |   and FALSE is returned. If there is no such collation, "default_cl" is  | 
1069  |  |   assigned to the "cl" and TRUE is returned.  | 
1070  |  |  | 
1071  |  |   @param[out] cl        Variable to store collation.  | 
1072  |  |   @param[in] cl_name    Collation name.  | 
1073  |  |   @param[in] default_cl Default collation.  | 
1074  |  |  | 
1075  |  |   @return FALSE if collation was resolved successfully; TRUE if there is no  | 
1076  |  |   collation with given name.  | 
1077  |  | */  | 
1078  |  |  | 
1079  |  | my_bool resolve_collation(const char *cl_name,  | 
1080  |  |                           CHARSET_INFO *default_cl,  | 
1081  |  |                           CHARSET_INFO **cl,  | 
1082  |  |                           myf my_flags)  | 
1083  | 0  | { | 
1084  | 0  |   *cl= get_charset_by_name(cl_name, my_flags);  | 
1085  |  | 
  | 
1086  | 0  |   if (*cl == NULL)  | 
1087  | 0  |   { | 
1088  | 0  |     *cl= default_cl;  | 
1089  | 0  |     return TRUE;  | 
1090  | 0  |   }  | 
1091  |  |  | 
1092  | 0  |   return FALSE;  | 
1093  | 0  | }  | 
1094  |  |  | 
1095  |  |  | 
1096  |  | /*  | 
1097  |  |   Escape string with backslashes (\)  | 
1098  |  |  | 
1099  |  |   SYNOPSIS  | 
1100  |  |     escape_string_for_mysql()  | 
1101  |  |     charset_info        Charset of the strings  | 
1102  |  |     to                  Buffer for escaped string  | 
1103  |  |     to_length           Length of destination buffer, or 0  | 
1104  |  |     from                The string to escape  | 
1105  |  |     length              The length of the string to escape  | 
1106  |  |     overflow            Set to 1 if the escaped string did not fit in  | 
1107  |  |                         the to buffer  | 
1108  |  |  | 
1109  |  |   DESCRIPTION  | 
1110  |  |     This escapes the contents of a string by adding backslashes before special  | 
1111  |  |     characters, and turning others into specific escape sequences, such as  | 
1112  |  |     turning newlines into \n and null bytes into \0.  | 
1113  |  |  | 
1114  |  |   NOTE  | 
1115  |  |     To maintain compatibility with the old C API, to_length may be 0 to mean  | 
1116  |  |     "big enough"  | 
1117  |  |  | 
1118  |  |   RETURN VALUES  | 
1119  |  |     #           The length of the escaped string  | 
1120  |  | */  | 
1121  |  |  | 
1122  |  | size_t escape_string_for_mysql(CHARSET_INFO *charset_info,  | 
1123  |  |                                char *to, size_t to_length,  | 
1124  |  |                                const char *from, size_t length,  | 
1125  |  |                                my_bool *overflow)  | 
1126  | 0  | { | 
1127  | 0  |   const char *to_start= to;  | 
1128  | 0  |   const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length);  | 
1129  | 0  |   *overflow= FALSE;  | 
1130  | 0  |   for (end= from + length; from < end; from++)  | 
1131  | 0  |   { | 
1132  | 0  |     char escape= 0;  | 
1133  | 0  | #ifdef USE_MB  | 
1134  | 0  |     int tmp_length= my_ci_charlen(charset_info, (const uchar *) from, (const uchar *) end);  | 
1135  | 0  |     if (tmp_length > 1)  | 
1136  | 0  |     { | 
1137  | 0  |       if (to + tmp_length > to_end)  | 
1138  | 0  |       { | 
1139  | 0  |         *overflow= TRUE;  | 
1140  | 0  |         break;  | 
1141  | 0  |       }  | 
1142  | 0  |       while (tmp_length--)  | 
1143  | 0  |   *to++= *from++;  | 
1144  | 0  |       from--;  | 
1145  | 0  |       continue;  | 
1146  | 0  |     }  | 
1147  |  |     /*  | 
1148  |  |      If the next character appears to begin a multi-byte character, we  | 
1149  |  |      escape that first byte of that apparent multi-byte character. (The  | 
1150  |  |      character just looks like a multi-byte character -- if it were actually  | 
1151  |  |      a multi-byte character, it would have been passed through in the test  | 
1152  |  |      above.)  | 
1153  |  |  | 
1154  |  |      Without this check, we can create a problem by converting an invalid  | 
1155  |  |      multi-byte character into a valid one. For example, 0xbf27 is not  | 
1156  |  |      a valid GBK character, but 0xbf5c is. (0x27 = ', 0x5c = \)  | 
1157  |  |     */  | 
1158  | 0  |     if (tmp_length < 1) /* Bad byte sequence */  | 
1159  | 0  |       escape= *from;  | 
1160  | 0  |     else  | 
1161  | 0  | #endif  | 
1162  | 0  |     switch (*from) { | 
1163  | 0  |     case 0:       /* Must be escaped for 'mysql' */  | 
1164  | 0  |       escape= '0';  | 
1165  | 0  |       break;  | 
1166  | 0  |     case '\n':        /* Must be escaped for logs */  | 
1167  | 0  |       escape= 'n';  | 
1168  | 0  |       break;  | 
1169  | 0  |     case '\r':  | 
1170  | 0  |       escape= 'r';  | 
1171  | 0  |       break;  | 
1172  | 0  |     case '\\':  | 
1173  | 0  |       escape= '\\';  | 
1174  | 0  |       break;  | 
1175  | 0  |     case '\'':  | 
1176  | 0  |       escape= '\'';  | 
1177  | 0  |       break;  | 
1178  | 0  |     case '"':       /* Better safe than sorry */  | 
1179  | 0  |       escape= '"';  | 
1180  | 0  |       break;  | 
1181  | 0  |     case '\032':      /* This gives problems on Win32 */  | 
1182  | 0  |       escape= 'Z';  | 
1183  | 0  |       break;  | 
1184  | 0  |     }  | 
1185  | 0  |     if (escape)  | 
1186  | 0  |     { | 
1187  | 0  |       if (to + 2 > to_end)  | 
1188  | 0  |       { | 
1189  | 0  |         *overflow= TRUE;  | 
1190  | 0  |         break;  | 
1191  | 0  |       }  | 
1192  | 0  |       *to++= '\\';  | 
1193  | 0  |       *to++= escape;  | 
1194  | 0  |     }  | 
1195  | 0  |     else  | 
1196  | 0  |     { | 
1197  | 0  |       if (to + 1 > to_end)  | 
1198  | 0  |       { | 
1199  | 0  |         *overflow= TRUE;  | 
1200  | 0  |         break;  | 
1201  | 0  |       }  | 
1202  | 0  |       *to++= *from;  | 
1203  | 0  |     }  | 
1204  | 0  |   }  | 
1205  | 0  |   *to= 0;  | 
1206  | 0  |   return (size_t) (to - to_start);  | 
1207  | 0  | }  | 
1208  |  |  | 
1209  |  |  | 
1210  |  | #ifdef BACKSLASH_MBTAIL  | 
1211  |  | CHARSET_INFO *fs_character_set()  | 
1212  |  | { | 
1213  |  |   static CHARSET_INFO *fs_cset_cache;  | 
1214  |  |   if (fs_cset_cache)  | 
1215  |  |     return fs_cset_cache;  | 
1216  |  | #ifdef HAVE_CHARSET_cp932  | 
1217  |  |   else if (GetACP() == 932)  | 
1218  |  |     return fs_cset_cache= &my_charset_cp932_japanese_ci;  | 
1219  |  | #endif  | 
1220  |  |   else  | 
1221  |  |     return fs_cset_cache= &my_charset_bin;  | 
1222  |  | }  | 
1223  |  | #endif  | 
1224  |  |  | 
1225  |  | /*  | 
1226  |  |   Escape apostrophes by doubling them up  | 
1227  |  |  | 
1228  |  |   SYNOPSIS  | 
1229  |  |     escape_quotes_for_mysql()  | 
1230  |  |     charset_info        Charset of the strings  | 
1231  |  |     to                  Buffer for escaped string  | 
1232  |  |     to_length           Length of destination buffer, or 0  | 
1233  |  |     from                The string to escape  | 
1234  |  |     length              The length of the string to escape  | 
1235  |  |     overflow            Set to 1 if the buffer overflows  | 
1236  |  |  | 
1237  |  |   DESCRIPTION  | 
1238  |  |     This escapes the contents of a string by doubling up any apostrophes that  | 
1239  |  |     it contains. This is used when the NO_BACKSLASH_ESCAPES SQL_MODE is in  | 
1240  |  |     effect on the server.  | 
1241  |  |  | 
1242  |  |   NOTE  | 
1243  |  |     To be consistent with escape_string_for_mysql(), to_length may be 0 to  | 
1244  |  |     mean "big enough"  | 
1245  |  |  | 
1246  |  |   RETURN VALUES  | 
1247  |  |      The length of the escaped string  | 
1248  |  | */  | 
1249  |  |  | 
1250  |  | size_t escape_quotes_for_mysql(CHARSET_INFO *charset_info,  | 
1251  |  |                                char *to, size_t to_length,  | 
1252  |  |                                const char *from, size_t length,  | 
1253  |  |                                my_bool *overflow)  | 
1254  | 0  | { | 
1255  | 0  |   const char *to_start= to;  | 
1256  | 0  |   const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length);  | 
1257  | 0  | #ifdef USE_MB  | 
1258  | 0  |   my_bool use_mb_flag= my_ci_use_mb(charset_info);  | 
1259  | 0  | #endif  | 
1260  | 0  |   *overflow= FALSE;  | 
1261  | 0  |   for (end= from + length; from < end; from++)  | 
1262  | 0  |   { | 
1263  | 0  | #ifdef USE_MB  | 
1264  | 0  |     int tmp_length;  | 
1265  | 0  |     if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end)))  | 
1266  | 0  |     { | 
1267  | 0  |       if (to + tmp_length > to_end)  | 
1268  | 0  |       { | 
1269  | 0  |         *overflow= TRUE;  | 
1270  | 0  |         break;  | 
1271  | 0  |       }  | 
1272  | 0  |       while (tmp_length--)  | 
1273  | 0  |   *to++= *from++;  | 
1274  | 0  |       from--;  | 
1275  | 0  |       continue;  | 
1276  | 0  |     }  | 
1277  |  |     /*  | 
1278  |  |       We don't have the same issue here with a non-multi-byte character being  | 
1279  |  |       turned into a multi-byte character by the addition of an escaping  | 
1280  |  |       character, because we are only escaping the ' character with itself.  | 
1281  |  |      */  | 
1282  | 0  | #endif  | 
1283  | 0  |     if (*from == '\'')  | 
1284  | 0  |     { | 
1285  | 0  |       if (to + 2 > to_end)  | 
1286  | 0  |       { | 
1287  | 0  |         *overflow= TRUE;  | 
1288  | 0  |         break;  | 
1289  | 0  |       }  | 
1290  | 0  |       *to++= '\'';  | 
1291  | 0  |       *to++= '\'';  | 
1292  | 0  |     }  | 
1293  | 0  |     else  | 
1294  | 0  |     { | 
1295  | 0  |       if (to + 1 > to_end)  | 
1296  | 0  |       { | 
1297  | 0  |         *overflow= TRUE;  | 
1298  | 0  |         break;  | 
1299  | 0  |       }  | 
1300  | 0  |       *to++= *from;  | 
1301  | 0  |     }  | 
1302  | 0  |   }  | 
1303  | 0  |   *to= 0;  | 
1304  | 0  |   return (size_t) (to - to_start);  | 
1305  | 0  | }  | 
1306  |  |  | 
1307  |  |  | 
1308  |  | typedef enum my_cs_match_type_enum  | 
1309  |  | { | 
1310  |  |   /* MySQL and OS charsets are fully compatible */  | 
1311  |  |   my_cs_exact,  | 
1312  |  |   /* MySQL charset is very close to OS charset  */  | 
1313  |  |   my_cs_approx,  | 
1314  |  |   /*  | 
1315  |  |     MySQL knows this charset, but it is not supported as client character set.  | 
1316  |  |   */  | 
1317  |  |   my_cs_unsupp  | 
1318  |  | } my_cs_match_type;  | 
1319  |  |  | 
1320  |  |  | 
1321  |  | typedef struct str2str_st  | 
1322  |  | { | 
1323  |  |   const char* os_name;  | 
1324  |  |   const char* my_name;  | 
1325  |  |   my_cs_match_type param;  | 
1326  |  | } MY_CSET_OS_NAME;  | 
1327  |  |  | 
1328  |  | static const MY_CSET_OS_NAME charsets[] =  | 
1329  |  | { | 
1330  |  | #ifdef _WIN32  | 
1331  |  |   {"cp437",          "cp850",    my_cs_approx}, | 
1332  |  |   {"cp850",          "cp850",    my_cs_exact}, | 
1333  |  |   {"cp852",          "cp852",    my_cs_exact}, | 
1334  |  |   {"cp858",          "cp850",    my_cs_approx}, | 
1335  |  |   {"cp866",          "cp866",    my_cs_exact}, | 
1336  |  |   {"cp874",          "tis620",   my_cs_approx}, | 
1337  |  |   {"cp932",          "cp932",    my_cs_exact}, | 
1338  |  |   {"cp936",          "gbk",      my_cs_approx}, | 
1339  |  |   {"cp949",          "euckr",    my_cs_approx}, | 
1340  |  |   {"cp950",          "big5",     my_cs_exact}, | 
1341  |  |   {"cp1200",         "utf16le",  my_cs_unsupp}, | 
1342  |  |   {"cp1201",         "utf16",    my_cs_unsupp}, | 
1343  |  |   {"cp1250",         "cp1250",   my_cs_exact}, | 
1344  |  |   {"cp1251",         "cp1251",   my_cs_exact}, | 
1345  |  |   {"cp1252",         "latin1",   my_cs_exact}, | 
1346  |  |   {"cp1253",         "greek",    my_cs_exact}, | 
1347  |  |   {"cp1254",         "latin5",   my_cs_exact}, | 
1348  |  |   {"cp1255",         "hebrew",   my_cs_approx}, | 
1349  |  |   {"cp1256",         "cp1256",   my_cs_exact}, | 
1350  |  |   {"cp1257",         "cp1257",   my_cs_exact}, | 
1351  |  |   {"cp10000",        "macroman", my_cs_exact}, | 
1352  |  |   {"cp10001",        "sjis",     my_cs_approx}, | 
1353  |  |   {"cp10002",        "big5",     my_cs_approx}, | 
1354  |  |   {"cp10008",        "gb2312",   my_cs_approx}, | 
1355  |  |   {"cp10021",        "tis620",   my_cs_approx}, | 
1356  |  |   {"cp10029",        "macce",    my_cs_exact}, | 
1357  |  |   {"cp12001",        "utf32",    my_cs_unsupp}, | 
1358  |  |   {"cp20107",        "swe7",     my_cs_exact}, | 
1359  |  |   {"cp20127",        "latin1",   my_cs_approx}, | 
1360  |  |   {"cp20866",        "koi8r",    my_cs_exact}, | 
1361  |  |   {"cp20932",        "ujis",     my_cs_exact}, | 
1362  |  |   {"cp20936",        "gb2312",   my_cs_approx}, | 
1363  |  |   {"cp20949",        "euckr",    my_cs_approx}, | 
1364  |  |   {"cp21866",        "koi8u",    my_cs_exact}, | 
1365  |  |   {"cp28591",        "latin1",   my_cs_approx}, | 
1366  |  |   {"cp28592",        "latin2",   my_cs_exact}, | 
1367  |  |   {"cp28597",        "greek",    my_cs_exact}, | 
1368  |  |   {"cp28598",        "hebrew",   my_cs_exact}, | 
1369  |  |   {"cp28599",        "latin5",   my_cs_exact}, | 
1370  |  |   {"cp28603",        "latin7",   my_cs_exact}, | 
1371  |  | #ifdef UNCOMMENT_THIS_WHEN_WL_4579_IS_DONE  | 
1372  |  |   {"cp28605",        "latin9",   my_cs_exact}, | 
1373  |  | #endif  | 
1374  |  |   {"cp38598",        "hebrew",   my_cs_exact}, | 
1375  |  |   {"cp51932",        "ujis",     my_cs_exact}, | 
1376  |  |   {"cp51936",        "gb2312",   my_cs_exact}, | 
1377  |  |   {"cp51949",        "euckr",    my_cs_exact}, | 
1378  |  |   {"cp51950",        "big5",     my_cs_exact}, | 
1379  |  | #ifdef UNCOMMENT_THIS_WHEN_WL_WL_4024_IS_DONE  | 
1380  |  |   {"cp54936",        "gb18030",  my_cs_exact}, | 
1381  |  | #endif  | 
1382  |  |   {"cp65001",        "utf8mb4",  my_cs_exact}, | 
1383  |  |   {"cp65001",        "utf8mb3",  my_cs_approx}, | 
1384  |  | #else /* not Windows */  | 
1385  |  |  | 
1386  |  |   {"646",            "latin1",   my_cs_approx}, /* Default on Solaris */ | 
1387  |  |   {"ANSI_X3.4-1968", "latin1",   my_cs_approx}, | 
1388  |  |   {"ansi1251",       "cp1251",   my_cs_exact}, | 
1389  |  |   {"armscii8",       "armscii8", my_cs_exact}, | 
1390  |  |   {"armscii-8",      "armscii8", my_cs_exact}, | 
1391  |  |   {"ASCII",          "latin1",   my_cs_approx}, | 
1392  |  |   {"Big5",           "big5",     my_cs_exact}, | 
1393  |  |   {"cp1251",         "cp1251",   my_cs_exact}, | 
1394  |  |   {"cp1255",         "hebrew",   my_cs_approx}, | 
1395  |  |   {"CP866",          "cp866",    my_cs_exact}, | 
1396  |  |   {"eucCN",          "gb2312",   my_cs_exact}, | 
1397  |  |   {"euc-CN",         "gb2312",   my_cs_exact}, | 
1398  |  |   {"eucJP",          "ujis",     my_cs_exact}, | 
1399  |  |   {"euc-JP",         "ujis",     my_cs_exact}, | 
1400  |  |   {"eucKR",          "euckr",    my_cs_exact}, | 
1401  |  |   {"euc-KR",         "euckr",    my_cs_exact}, | 
1402  |  | #ifdef UNCOMMENT_THIS_WHEN_WL_WL_4024_IS_DONE  | 
1403  |  |   {"gb18030",        "gb18030",  my_cs_exact}, | 
1404  |  | #endif  | 
1405  |  |   {"gb2312",         "gb2312",   my_cs_exact}, | 
1406  |  |   {"gbk",            "gbk",      my_cs_exact}, | 
1407  |  |   {"georgianps",     "geostd8",  my_cs_exact}, | 
1408  |  |   {"georgian-ps",    "geostd8",  my_cs_exact}, | 
1409  |  |   {"IBM-1252",       "cp1252",   my_cs_exact}, | 
1410  |  |  | 
1411  |  |   {"iso88591",       "latin1",   my_cs_approx}, | 
1412  |  |   {"ISO_8859-1",     "latin1",   my_cs_approx}, | 
1413  |  |   {"ISO8859-1",      "latin1",   my_cs_approx}, | 
1414  |  |   {"ISO-8859-1",     "latin1",   my_cs_approx}, | 
1415  |  |  | 
1416  |  |   {"iso885913",      "latin7",   my_cs_exact}, | 
1417  |  |   {"ISO_8859-13",    "latin7",   my_cs_exact}, | 
1418  |  |   {"ISO8859-13",     "latin7",   my_cs_exact}, | 
1419  |  |   {"ISO-8859-13",    "latin7",   my_cs_exact}, | 
1420  |  |  | 
1421  |  | #ifdef UNCOMMENT_THIS_WHEN_WL_4579_IS_DONE  | 
1422  |  |   {"iso885915",      "latin9",   my_cs_exact}, | 
1423  |  |   {"ISO_8859-15",    "latin9",   my_cs_exact}, | 
1424  |  |   {"ISO8859-15",     "latin9",   my_cs_exact}, | 
1425  |  |   {"ISO-8859-15",    "latin9",   my_cs_exact}, | 
1426  |  | #endif  | 
1427  |  |  | 
1428  |  |   {"iso88592",       "latin2",   my_cs_exact}, | 
1429  |  |   {"ISO_8859-2",     "latin2",   my_cs_exact}, | 
1430  |  |   {"ISO8859-2",      "latin2",   my_cs_exact}, | 
1431  |  |   {"ISO-8859-2",     "latin2",   my_cs_exact}, | 
1432  |  |  | 
1433  |  |   {"iso88597",       "greek",    my_cs_exact}, | 
1434  |  |   {"ISO_8859-7",     "greek",    my_cs_exact}, | 
1435  |  |   {"ISO8859-7",      "greek",    my_cs_exact}, | 
1436  |  |   {"ISO-8859-7",     "greek",    my_cs_exact}, | 
1437  |  |  | 
1438  |  |   {"iso88598",       "hebrew",   my_cs_exact}, | 
1439  |  |   {"ISO_8859-8",     "hebrew",   my_cs_exact}, | 
1440  |  |   {"ISO8859-8",      "hebrew",   my_cs_exact}, | 
1441  |  |   {"ISO-8859-8",     "hebrew",   my_cs_exact}, | 
1442  |  |  | 
1443  |  |   {"iso88599",       "latin5",   my_cs_exact}, | 
1444  |  |   {"ISO_8859-9",     "latin5",   my_cs_exact}, | 
1445  |  |   {"ISO8859-9",      "latin5",   my_cs_exact}, | 
1446  |  |   {"ISO-8859-9",     "latin5",   my_cs_exact}, | 
1447  |  |  | 
1448  |  |   {"koi8r",          "koi8r",    my_cs_exact}, | 
1449  |  |   {"KOI8-R",         "koi8r",    my_cs_exact}, | 
1450  |  |   {"koi8u",          "koi8u",    my_cs_exact}, | 
1451  |  |   {"KOI8-U",         "koi8u",    my_cs_exact}, | 
1452  |  |  | 
1453  |  |   {"roman8",         "hp8",      my_cs_exact}, /* Default on HP UX */ | 
1454  |  |  | 
1455  |  |   {"Shift_JIS",      "sjis",     my_cs_exact}, | 
1456  |  |   {"SJIS",           "sjis",     my_cs_exact}, | 
1457  |  |   {"shiftjisx0213",  "sjis",     my_cs_exact}, | 
1458  |  |  | 
1459  |  |   {"tis620",         "tis620",   my_cs_exact}, | 
1460  |  |   {"tis-620",        "tis620",   my_cs_exact}, | 
1461  |  |  | 
1462  |  |   {"ujis",           "ujis",     my_cs_exact}, | 
1463  |  |  | 
1464  |  |   {"US-ASCII",       "latin1",   my_cs_approx}, | 
1465  |  |  | 
1466  |  |   {"utf8",           "utf8",     my_cs_exact}, | 
1467  |  |   {"utf-8",          "utf8",     my_cs_exact}, | 
1468  |  | #endif  | 
1469  |  |   {NULL,             NULL,       0} | 
1470  |  | };  | 
1471  |  |  | 
1472  |  |  | 
1473  |  | static const char*  | 
1474  |  | my_os_charset_to_mysql_charset(const char* csname)  | 
1475  | 0  | { | 
1476  | 0  |   const MY_CSET_OS_NAME* csp;  | 
1477  | 0  |   for (csp = charsets; csp->os_name; csp++)  | 
1478  | 0  |   { | 
1479  | 0  |     if (!strcasecmp(csp->os_name, csname))  | 
1480  | 0  |     { | 
1481  | 0  |       switch (csp->param)  | 
1482  | 0  |       { | 
1483  | 0  |       case my_cs_exact:  | 
1484  | 0  |         return csp->my_name;  | 
1485  |  |  | 
1486  | 0  |       case my_cs_approx:  | 
1487  |  |         /*  | 
1488  |  |           Maybe we should print a warning eventually:  | 
1489  |  |           character set correspondence is not exact.  | 
1490  |  |         */  | 
1491  | 0  |         return csp->my_name;  | 
1492  |  |  | 
1493  | 0  |       default:  | 
1494  | 0  |         return NULL;  | 
1495  | 0  |       }  | 
1496  | 0  |     }  | 
1497  | 0  |   }  | 
1498  | 0  |   return NULL;  | 
1499  | 0  | }  | 
1500  |  |  | 
1501  |  | const char* my_default_csname()  | 
1502  | 0  | { | 
1503  | 0  |   const char* csname = NULL;  | 
1504  |  | #ifdef _WIN32  | 
1505  |  |   char cpbuf[64];  | 
1506  |  |   UINT cp;  | 
1507  |  |   if (GetACP() == CP_UTF8)  | 
1508  |  |     cp= CP_UTF8;  | 
1509  |  |   else  | 
1510  |  |   { | 
1511  |  |     cp= GetConsoleCP();  | 
1512  |  |     if (cp == 0)  | 
1513  |  |       cp= GetACP();  | 
1514  |  |   }  | 
1515  |  |   snprintf(cpbuf, sizeof(cpbuf), "cp%d", (int)cp);  | 
1516  |  |   csname = my_os_charset_to_mysql_charset(cpbuf);  | 
1517  |  | #elif defined(HAVE_SETLOCALE) && defined(HAVE_NL_LANGINFO)  | 
1518  | 0  |   if (setlocale(LC_CTYPE, "") && (csname = nl_langinfo(CODESET)))  | 
1519  | 0  |     csname = my_os_charset_to_mysql_charset(csname);  | 
1520  | 0  | #endif  | 
1521  | 0  |   return csname ? csname : MYSQL_DEFAULT_CHARSET_NAME;  | 
1522  | 0  | }  | 
1523  |  |  | 
1524  |  |  | 
1525  |  | #ifdef _WIN32  | 
1526  |  | /**  | 
1527  |  |   Extract codepage number from "cpNNNN" string,  | 
1528  |  |   and check that this codepage is supported.  | 
1529  |  |  | 
1530  |  |   @return 0 - invalid codepage(or unsupported)  | 
1531  |  |           > 0 - valid codepage number.  | 
1532  |  | */  | 
1533  |  | static UINT get_codepage(const char *s)  | 
1534  |  | { | 
1535  |  |   UINT cp;  | 
1536  |  |   if (s[0] != 'c' || s[1] != 'p')  | 
1537  |  |   { | 
1538  |  |     DBUG_ASSERT(0);  | 
1539  |  |     return 0;  | 
1540  |  |   }  | 
1541  |  |   cp= strtoul(s + 2, NULL, 10);  | 
1542  |  |   if (!IsValidCodePage(cp))  | 
1543  |  |   { | 
1544  |  |     /*  | 
1545  |  |      Can happen also with documented CP, i.e 51936  | 
1546  |  |      Perhaps differs from one machine to another.  | 
1547  |  |     */  | 
1548  |  |     return 0;  | 
1549  |  |   }  | 
1550  |  |   return cp;  | 
1551  |  | }  | 
1552  |  |  | 
1553  |  | static UINT mysql_charset_to_codepage(const char *my_cs_name)  | 
1554  |  | { | 
1555  |  |   const MY_CSET_OS_NAME *csp;  | 
1556  |  |   UINT cp=0,tmp;  | 
1557  |  |   for (csp= charsets; csp->os_name; csp++)  | 
1558  |  |   { | 
1559  |  |     if (!strcasecmp(csp->my_name, my_cs_name))  | 
1560  |  |     { | 
1561  |  |       switch (csp->param)  | 
1562  |  |       { | 
1563  |  |       case my_cs_exact:  | 
1564  |  |         tmp= get_codepage(csp->os_name);  | 
1565  |  |         if (tmp)  | 
1566  |  |           return tmp;  | 
1567  |  |         break;  | 
1568  |  |       case my_cs_approx:  | 
1569  |  |         /*  | 
1570  |  |           don't return just yet, perhaps there is a better  | 
1571  |  |           (exact) match later.  | 
1572  |  |         */  | 
1573  |  |         if (!cp)  | 
1574  |  |           cp= get_codepage(csp->os_name);  | 
1575  |  |         continue;  | 
1576  |  |  | 
1577  |  |       default:  | 
1578  |  |         return 0;  | 
1579  |  |       }  | 
1580  |  |     }  | 
1581  |  |   }  | 
1582  |  |   return cp;  | 
1583  |  | }  | 
1584  |  |  | 
1585  |  | /** Set console codepage for MariaDB's charset name */  | 
1586  |  | int my_set_console_cp(const char *csname)  | 
1587  |  | { | 
1588  |  |   UINT cp;  | 
1589  |  |   if (fileno(stdout) < 0 || !isatty(fileno(stdout)))  | 
1590  |  |     return 0;  | 
1591  |  |   cp= mysql_charset_to_codepage(csname);  | 
1592  |  |   if (!cp)  | 
1593  |  |   { | 
1594  |  |     /* No compatible os charset.*/  | 
1595  |  |     return -1;  | 
1596  |  |   }  | 
1597  |  |  | 
1598  |  |   if (GetConsoleOutputCP() != cp && !SetConsoleOutputCP(cp))  | 
1599  |  |   { | 
1600  |  |     return -1;  | 
1601  |  |   }  | 
1602  |  |  | 
1603  |  |   if (GetConsoleCP() != cp && !SetConsoleCP(cp))  | 
1604  |  |   { | 
1605  |  |     return -1;  | 
1606  |  |   }  | 
1607  |  |   return 0;  | 
1608  |  | }  | 
1609  |  | #endif  |