/src/mysql-server/strings/sql_chars.cc
Line | Count | Source |
1 | | /* |
2 | | Copyright (c) 2015, 2025, Oracle and/or its affiliates. |
3 | | |
4 | | This program is free software; you can redistribute it and/or modify |
5 | | it under the terms of the GNU General Public License, version 2.0, |
6 | | as published by the Free Software Foundation. |
7 | | |
8 | | This program is designed to work with certain software (including |
9 | | but not limited to OpenSSL) that is licensed under separate terms, |
10 | | as designated in a particular file or component or in included license |
11 | | documentation. The authors of MySQL hereby grant you an additional |
12 | | permission to link the program and your derivative works with the |
13 | | separately licensed software that they have either included with |
14 | | the program or referenced in the documentation. |
15 | | |
16 | | Without limiting anything contained in the foregoing, this file, |
17 | | which is part of C Driver for MySQL (Connector/C), is also subject to the |
18 | | Universal FOSS Exception, version 1.0, a copy of which can be found at |
19 | | http://oss.oracle.com/licenses/universal-foss-exception. |
20 | | |
21 | | This program is distributed in the hope that it will be useful, |
22 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
23 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
24 | | GNU General Public License, version 2.0, for more details. |
25 | | |
26 | | You should have received a copy of the GNU General Public License |
27 | | along with this program; if not, write to the Free Software |
28 | | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ |
29 | | |
30 | | #include <cassert> |
31 | | #include <cstddef> |
32 | | #include <cstdint> |
33 | | |
34 | | #include "mysql/strings/m_ctype.h" |
35 | | #include "sql_chars.h" |
36 | | |
37 | | static void hint_lex_init_maps(CHARSET_INFO *cs, |
38 | 410 | enum hint_lex_char_classes *hint_map) { |
39 | 410 | size_t i; |
40 | 105k | for (i = 0; i < 256; i++) { |
41 | 104k | if (my_ismb1st(cs, i)) |
42 | 15.9k | hint_map[i] = HINT_CHR_MB; |
43 | 89.0k | else if (my_isalpha(cs, i)) |
44 | 47.4k | hint_map[i] = HINT_CHR_IDENT; |
45 | 41.6k | else if (my_isdigit(cs, i)) |
46 | 4.10k | hint_map[i] = HINT_CHR_DIGIT; |
47 | 37.5k | else if (my_isspace(cs, i)) { |
48 | 2.55k | assert(!my_ismb1st(cs, i)); |
49 | 2.55k | hint_map[i] = HINT_CHR_SPACE; |
50 | 2.55k | } else |
51 | 34.9k | hint_map[i] = HINT_CHR_CHAR; |
52 | 104k | } |
53 | 410 | hint_map[u'*'] = HINT_CHR_ASTERISK; |
54 | 410 | hint_map[u'@'] = HINT_CHR_AT; |
55 | 410 | hint_map[u'`'] = HINT_CHR_BACKQUOTE; |
56 | 410 | hint_map[u'.'] = HINT_CHR_DOT; |
57 | 410 | hint_map[u'"'] = HINT_CHR_DOUBLEQUOTE; |
58 | 410 | hint_map[u'$'] = HINT_CHR_IDENT; |
59 | 410 | hint_map[u'_'] = HINT_CHR_IDENT; |
60 | 410 | hint_map[u'\n'] = HINT_CHR_NL; |
61 | 410 | hint_map[u'\''] = HINT_CHR_QUOTE; |
62 | 410 | hint_map[u'/'] = HINT_CHR_SLASH; |
63 | 410 | } |
64 | | |
65 | 410 | bool init_state_maps(MY_CHARSET_LOADER *loader, CHARSET_INFO *cs) { |
66 | 410 | uint8_t *ident_map = nullptr; |
67 | 410 | enum my_lex_states *state_map = nullptr; |
68 | | |
69 | | // This character set has already been initialized. |
70 | 410 | if (cs->state_maps != nullptr && cs->ident_map != nullptr) return false; |
71 | | |
72 | 410 | auto *lex_state_maps = static_cast<lex_state_maps_st *>( |
73 | 410 | loader->once_alloc(sizeof(lex_state_maps_st))); |
74 | | |
75 | 410 | if (lex_state_maps == nullptr) return true; // OOM |
76 | | |
77 | 410 | cs->state_maps = lex_state_maps; |
78 | 410 | state_map = lex_state_maps->main_map; |
79 | | |
80 | 410 | if (!(cs->ident_map = ident_map = |
81 | 410 | static_cast<uint8_t *>(loader->once_alloc(256)))) |
82 | 0 | return true; // OOM |
83 | | |
84 | 410 | hint_lex_init_maps(cs, lex_state_maps->hint_map); |
85 | | |
86 | | /* Fill state_map with states to get a faster parser */ |
87 | 105k | for (unsigned i = 0; i < 256; i++) { |
88 | 104k | if (my_isalpha(cs, i)) |
89 | 61.4k | state_map[i] = MY_LEX_IDENT; |
90 | 43.5k | else if (my_isdigit(cs, i)) |
91 | 4.10k | state_map[i] = MY_LEX_NUMBER_IDENT; |
92 | 39.4k | else if (my_ismb1st(cs, i)) |
93 | | /* To get whether it's a possible leading byte for a charset. */ |
94 | 1.88k | state_map[i] = MY_LEX_IDENT; |
95 | 37.5k | else if (my_isspace(cs, i)) |
96 | 2.55k | state_map[i] = MY_LEX_SKIP; |
97 | 34.9k | else |
98 | 34.9k | state_map[i] = MY_LEX_CHAR; |
99 | 104k | } |
100 | 410 | state_map[u'_'] = state_map[u'$'] = MY_LEX_IDENT; |
101 | 410 | state_map[u'\''] = MY_LEX_STRING; |
102 | 410 | state_map[u'.'] = MY_LEX_REAL_OR_POINT; |
103 | 410 | state_map[u'>'] = state_map[u'='] = state_map[u'!'] = MY_LEX_CMP_OP; |
104 | 410 | state_map[u'<'] = MY_LEX_LONG_CMP_OP; |
105 | 410 | state_map[u'&'] = state_map[u'|'] = MY_LEX_BOOL; |
106 | 410 | state_map[u'#'] = MY_LEX_COMMENT; |
107 | 410 | state_map[u';'] = MY_LEX_SEMICOLON; |
108 | 410 | state_map[u':'] = MY_LEX_SET_VAR; |
109 | 410 | state_map[0] = MY_LEX_EOL; |
110 | 410 | state_map[u'/'] = MY_LEX_LONG_COMMENT; |
111 | 410 | state_map[u'*'] = MY_LEX_END_LONG_COMMENT; |
112 | 410 | state_map[u'@'] = MY_LEX_USER_END; |
113 | 410 | state_map[u'`'] = MY_LEX_USER_VARIABLE_DELIMITER; |
114 | 410 | state_map[u'"'] = MY_LEX_STRING_OR_DELIMITER; |
115 | | |
116 | | /* |
117 | | Create a second map to make it faster to find identifiers |
118 | | */ |
119 | 105k | for (unsigned i = 0; i < 256; i++) { |
120 | 104k | ident_map[i] = static_cast<uint8_t>(state_map[i] == MY_LEX_IDENT || |
121 | 40.8k | state_map[i] == MY_LEX_NUMBER_IDENT); |
122 | 104k | } |
123 | | |
124 | | /* Special handling of hex and binary strings */ |
125 | 410 | state_map[u'x'] = state_map[u'X'] = MY_LEX_IDENT_OR_HEX; |
126 | 410 | state_map[u'b'] = state_map[u'B'] = MY_LEX_IDENT_OR_BIN; |
127 | 410 | state_map[u'n'] = state_map[u'N'] = MY_LEX_IDENT_OR_NCHAR; |
128 | | |
129 | | /* Special handling of '$' for dollar quoted strings */ |
130 | 410 | state_map[u'$'] = MY_LEX_IDENT_OR_DOLLAR_QUOTED_TEXT; |
131 | | |
132 | 410 | return false; |
133 | 410 | } |