Coverage Report

Created: 2025-09-27 06:50

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/mysql-server/strings/sql_chars.cc
Line
Count
Source
1
/*
2
   Copyright (c) 2015, 2025, Oracle and/or its affiliates.
3
4
   This program is free software; you can redistribute it and/or modify
5
   it under the terms of the GNU General Public License, version 2.0,
6
   as published by the Free Software Foundation.
7
8
   This program is designed to work with certain software (including
9
   but not limited to OpenSSL) that is licensed under separate terms,
10
   as designated in a particular file or component or in included license
11
   documentation.  The authors of MySQL hereby grant you an additional
12
   permission to link the program and your derivative works with the
13
   separately licensed software that they have either included with
14
   the program or referenced in the documentation.
15
16
   Without limiting anything contained in the foregoing, this file,
17
   which is part of C Driver for MySQL (Connector/C), is also subject to the
18
   Universal FOSS Exception, version 1.0, a copy of which can be found at
19
   http://oss.oracle.com/licenses/universal-foss-exception.
20
21
   This program is distributed in the hope that it will be useful,
22
   but WITHOUT ANY WARRANTY; without even the implied warranty of
23
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24
   GNU General Public License, version 2.0, for more details.
25
26
   You should have received a copy of the GNU General Public License
27
   along with this program; if not, write to the Free Software
28
   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
29
30
#include <cassert>
31
#include <cstddef>
32
#include <cstdint>
33
34
#include "mysql/strings/m_ctype.h"
35
#include "sql_chars.h"
36
37
static void hint_lex_init_maps(CHARSET_INFO *cs,
38
410
                               enum hint_lex_char_classes *hint_map) {
39
410
  size_t i;
40
105k
  for (i = 0; i < 256; i++) {
41
104k
    if (my_ismb1st(cs, i))
42
15.9k
      hint_map[i] = HINT_CHR_MB;
43
89.0k
    else if (my_isalpha(cs, i))
44
47.4k
      hint_map[i] = HINT_CHR_IDENT;
45
41.6k
    else if (my_isdigit(cs, i))
46
4.10k
      hint_map[i] = HINT_CHR_DIGIT;
47
37.5k
    else if (my_isspace(cs, i)) {
48
2.55k
      assert(!my_ismb1st(cs, i));
49
2.55k
      hint_map[i] = HINT_CHR_SPACE;
50
2.55k
    } else
51
34.9k
      hint_map[i] = HINT_CHR_CHAR;
52
104k
  }
53
410
  hint_map[u'*'] = HINT_CHR_ASTERISK;
54
410
  hint_map[u'@'] = HINT_CHR_AT;
55
410
  hint_map[u'`'] = HINT_CHR_BACKQUOTE;
56
410
  hint_map[u'.'] = HINT_CHR_DOT;
57
410
  hint_map[u'"'] = HINT_CHR_DOUBLEQUOTE;
58
410
  hint_map[u'$'] = HINT_CHR_IDENT;
59
410
  hint_map[u'_'] = HINT_CHR_IDENT;
60
410
  hint_map[u'\n'] = HINT_CHR_NL;
61
410
  hint_map[u'\''] = HINT_CHR_QUOTE;
62
410
  hint_map[u'/'] = HINT_CHR_SLASH;
63
410
}
64
65
410
bool init_state_maps(MY_CHARSET_LOADER *loader, CHARSET_INFO *cs) {
66
410
  uint8_t *ident_map = nullptr;
67
410
  enum my_lex_states *state_map = nullptr;
68
69
  // This character set has already been initialized.
70
410
  if (cs->state_maps != nullptr && cs->ident_map != nullptr) return false;
71
72
410
  auto *lex_state_maps = static_cast<lex_state_maps_st *>(
73
410
      loader->once_alloc(sizeof(lex_state_maps_st)));
74
75
410
  if (lex_state_maps == nullptr) return true;  // OOM
76
77
410
  cs->state_maps = lex_state_maps;
78
410
  state_map = lex_state_maps->main_map;
79
80
410
  if (!(cs->ident_map = ident_map =
81
410
            static_cast<uint8_t *>(loader->once_alloc(256))))
82
0
    return true;  // OOM
83
84
410
  hint_lex_init_maps(cs, lex_state_maps->hint_map);
85
86
  /* Fill state_map with states to get a faster parser */
87
105k
  for (unsigned i = 0; i < 256; i++) {
88
104k
    if (my_isalpha(cs, i))
89
61.4k
      state_map[i] = MY_LEX_IDENT;
90
43.5k
    else if (my_isdigit(cs, i))
91
4.10k
      state_map[i] = MY_LEX_NUMBER_IDENT;
92
39.4k
    else if (my_ismb1st(cs, i))
93
      /* To get whether it's a possible leading byte for a charset. */
94
1.88k
      state_map[i] = MY_LEX_IDENT;
95
37.5k
    else if (my_isspace(cs, i))
96
2.55k
      state_map[i] = MY_LEX_SKIP;
97
34.9k
    else
98
34.9k
      state_map[i] = MY_LEX_CHAR;
99
104k
  }
100
410
  state_map[u'_'] = state_map[u'$'] = MY_LEX_IDENT;
101
410
  state_map[u'\''] = MY_LEX_STRING;
102
410
  state_map[u'.'] = MY_LEX_REAL_OR_POINT;
103
410
  state_map[u'>'] = state_map[u'='] = state_map[u'!'] = MY_LEX_CMP_OP;
104
410
  state_map[u'<'] = MY_LEX_LONG_CMP_OP;
105
410
  state_map[u'&'] = state_map[u'|'] = MY_LEX_BOOL;
106
410
  state_map[u'#'] = MY_LEX_COMMENT;
107
410
  state_map[u';'] = MY_LEX_SEMICOLON;
108
410
  state_map[u':'] = MY_LEX_SET_VAR;
109
410
  state_map[0] = MY_LEX_EOL;
110
410
  state_map[u'/'] = MY_LEX_LONG_COMMENT;
111
410
  state_map[u'*'] = MY_LEX_END_LONG_COMMENT;
112
410
  state_map[u'@'] = MY_LEX_USER_END;
113
410
  state_map[u'`'] = MY_LEX_USER_VARIABLE_DELIMITER;
114
410
  state_map[u'"'] = MY_LEX_STRING_OR_DELIMITER;
115
116
  /*
117
    Create a second map to make it faster to find identifiers
118
  */
119
105k
  for (unsigned i = 0; i < 256; i++) {
120
104k
    ident_map[i] = static_cast<uint8_t>(state_map[i] == MY_LEX_IDENT ||
121
40.8k
                                        state_map[i] == MY_LEX_NUMBER_IDENT);
122
104k
  }
123
124
  /* Special handling of hex and binary strings */
125
410
  state_map[u'x'] = state_map[u'X'] = MY_LEX_IDENT_OR_HEX;
126
410
  state_map[u'b'] = state_map[u'B'] = MY_LEX_IDENT_OR_BIN;
127
410
  state_map[u'n'] = state_map[u'N'] = MY_LEX_IDENT_OR_NCHAR;
128
129
  /* Special handling of '$' for dollar quoted strings */
130
410
  state_map[u'$'] = MY_LEX_IDENT_OR_DOLLAR_QUOTED_TEXT;
131
132
410
  return false;
133
410
}