1# dialects/mysql/reflection.py
2# Copyright (C) 2005-2024 the SQLAlchemy authors and contributors
3# <see AUTHORS file>
4#
5# This module is part of SQLAlchemy and is released under
6# the MIT License: https://www.opensource.org/licenses/mit-license.php
7
8import re
9
10from .enumerated import ENUM
11from .enumerated import SET
12from .types import DATETIME
13from .types import TIME
14from .types import TIMESTAMP
15from ... import log
16from ... import types as sqltypes
17from ... import util
18
19
20class ReflectedState(object):
21 """Stores raw information about a SHOW CREATE TABLE statement."""
22
23 def __init__(self):
24 self.columns = []
25 self.table_options = {}
26 self.table_name = None
27 self.keys = []
28 self.fk_constraints = []
29 self.ck_constraints = []
30
31
32@log.class_logger
33class MySQLTableDefinitionParser(object):
34 """Parses the results of a SHOW CREATE TABLE statement."""
35
36 def __init__(self, dialect, preparer):
37 self.dialect = dialect
38 self.preparer = preparer
39 self._prep_regexes()
40
41 def parse(self, show_create, charset):
42 state = ReflectedState()
43 state.charset = charset
44 for line in re.split(r"\r?\n", show_create):
45 if line.startswith(" " + self.preparer.initial_quote):
46 self._parse_column(line, state)
47 # a regular table options line
48 elif line.startswith(") "):
49 self._parse_table_options(line, state)
50 # an ANSI-mode table options line
51 elif line == ")":
52 pass
53 elif line.startswith("CREATE "):
54 self._parse_table_name(line, state)
55 # Not present in real reflection, but may be if
56 # loading from a file.
57 elif not line:
58 pass
59 else:
60 type_, spec = self._parse_constraints(line)
61 if type_ is None:
62 util.warn("Unknown schema content: %r" % line)
63 elif type_ == "key":
64 state.keys.append(spec)
65 elif type_ == "fk_constraint":
66 state.fk_constraints.append(spec)
67 elif type_ == "ck_constraint":
68 state.ck_constraints.append(spec)
69 else:
70 pass
71 return state
72
73 def _check_view(self, sql):
74 return bool(self._re_is_view.match(sql))
75
76 def _parse_constraints(self, line):
77 """Parse a KEY or CONSTRAINT line.
78
79 :param line: A line of SHOW CREATE TABLE output
80 """
81
82 # KEY
83 m = self._re_key.match(line)
84 if m:
85 spec = m.groupdict()
86 # convert columns into name, length pairs
87 # NOTE: we may want to consider SHOW INDEX as the
88 # format of indexes in MySQL becomes more complex
89 spec["columns"] = self._parse_keyexprs(spec["columns"])
90 if spec["version_sql"]:
91 m2 = self._re_key_version_sql.match(spec["version_sql"])
92 if m2 and m2.groupdict()["parser"]:
93 spec["parser"] = m2.groupdict()["parser"]
94 if spec["parser"]:
95 spec["parser"] = self.preparer.unformat_identifiers(
96 spec["parser"]
97 )[0]
98 return "key", spec
99
100 # FOREIGN KEY CONSTRAINT
101 m = self._re_fk_constraint.match(line)
102 if m:
103 spec = m.groupdict()
104 spec["table"] = self.preparer.unformat_identifiers(spec["table"])
105 spec["local"] = [c[0] for c in self._parse_keyexprs(spec["local"])]
106 spec["foreign"] = [
107 c[0] for c in self._parse_keyexprs(spec["foreign"])
108 ]
109 return "fk_constraint", spec
110
111 # CHECK constraint
112 m = self._re_ck_constraint.match(line)
113 if m:
114 spec = m.groupdict()
115 return "ck_constraint", spec
116
117 # PARTITION and SUBPARTITION
118 m = self._re_partition.match(line)
119 if m:
120 # Punt!
121 return "partition", line
122
123 # No match.
124 return (None, line)
125
126 def _parse_table_name(self, line, state):
127 """Extract the table name.
128
129 :param line: The first line of SHOW CREATE TABLE
130 """
131
132 regex, cleanup = self._pr_name
133 m = regex.match(line)
134 if m:
135 state.table_name = cleanup(m.group("name"))
136
137 def _parse_table_options(self, line, state):
138 """Build a dictionary of all reflected table-level options.
139
140 :param line: The final line of SHOW CREATE TABLE output.
141 """
142
143 options = {}
144
145 if not line or line == ")":
146 pass
147
148 else:
149 rest_of_line = line[:]
150 for regex, cleanup in self._pr_options:
151 m = regex.search(rest_of_line)
152 if not m:
153 continue
154 directive, value = m.group("directive"), m.group("val")
155 if cleanup:
156 value = cleanup(value)
157 options[directive.lower()] = value
158 rest_of_line = regex.sub("", rest_of_line)
159
160 for nope in ("auto_increment", "data directory", "index directory"):
161 options.pop(nope, None)
162
163 for opt, val in options.items():
164 state.table_options["%s_%s" % (self.dialect.name, opt)] = val
165
166 def _parse_column(self, line, state):
167 """Extract column details.
168
169 Falls back to a 'minimal support' variant if full parse fails.
170
171 :param line: Any column-bearing line from SHOW CREATE TABLE
172 """
173
174 spec = None
175 m = self._re_column.match(line)
176 if m:
177 spec = m.groupdict()
178 spec["full"] = True
179 else:
180 m = self._re_column_loose.match(line)
181 if m:
182 spec = m.groupdict()
183 spec["full"] = False
184 if not spec:
185 util.warn("Unknown column definition %r" % line)
186 return
187 if not spec["full"]:
188 util.warn("Incomplete reflection of column definition %r" % line)
189
190 name, type_, args = spec["name"], spec["coltype"], spec["arg"]
191
192 try:
193 col_type = self.dialect.ischema_names[type_]
194 except KeyError:
195 util.warn(
196 "Did not recognize type '%s' of column '%s'" % (type_, name)
197 )
198 col_type = sqltypes.NullType
199
200 # Column type positional arguments eg. varchar(32)
201 if args is None or args == "":
202 type_args = []
203 elif args[0] == "'" and args[-1] == "'":
204 type_args = self._re_csv_str.findall(args)
205 else:
206 type_args = [int(v) for v in self._re_csv_int.findall(args)]
207
208 # Column type keyword options
209 type_kw = {}
210
211 if issubclass(col_type, (DATETIME, TIME, TIMESTAMP)):
212 if type_args:
213 type_kw["fsp"] = type_args.pop(0)
214
215 for kw in ("unsigned", "zerofill"):
216 if spec.get(kw, False):
217 type_kw[kw] = True
218 for kw in ("charset", "collate"):
219 if spec.get(kw, False):
220 type_kw[kw] = spec[kw]
221 if issubclass(col_type, (ENUM, SET)):
222 type_args = _strip_values(type_args)
223
224 if issubclass(col_type, SET) and "" in type_args:
225 type_kw["retrieve_as_bitwise"] = True
226
227 type_instance = col_type(*type_args, **type_kw)
228
229 col_kw = {}
230
231 # NOT NULL
232 col_kw["nullable"] = True
233 # this can be "NULL" in the case of TIMESTAMP
234 if spec.get("notnull", False) == "NOT NULL":
235 col_kw["nullable"] = False
236
237 # AUTO_INCREMENT
238 if spec.get("autoincr", False):
239 col_kw["autoincrement"] = True
240 elif issubclass(col_type, sqltypes.Integer):
241 col_kw["autoincrement"] = False
242
243 # DEFAULT
244 default = spec.get("default", None)
245
246 if default == "NULL":
247 # eliminates the need to deal with this later.
248 default = None
249
250 comment = spec.get("comment", None)
251
252 if comment is not None:
253 comment = comment.replace("\\\\", "\\").replace("''", "'")
254
255 sqltext = spec.get("generated")
256 if sqltext is not None:
257 computed = dict(sqltext=sqltext)
258 persisted = spec.get("persistence")
259 if persisted is not None:
260 computed["persisted"] = persisted == "STORED"
261 col_kw["computed"] = computed
262
263 col_d = dict(
264 name=name, type=type_instance, default=default, comment=comment
265 )
266 col_d.update(col_kw)
267 state.columns.append(col_d)
268
269 def _describe_to_create(self, table_name, columns):
270 """Re-format DESCRIBE output as a SHOW CREATE TABLE string.
271
272 DESCRIBE is a much simpler reflection and is sufficient for
273 reflecting views for runtime use. This method formats DDL
274 for columns only- keys are omitted.
275
276 :param columns: A sequence of DESCRIBE or SHOW COLUMNS 6-tuples.
277 SHOW FULL COLUMNS FROM rows must be rearranged for use with
278 this function.
279 """
280
281 buffer = []
282 for row in columns:
283 (name, col_type, nullable, default, extra) = [
284 row[i] for i in (0, 1, 2, 4, 5)
285 ]
286
287 line = [" "]
288 line.append(self.preparer.quote_identifier(name))
289 line.append(col_type)
290 if not nullable:
291 line.append("NOT NULL")
292 if default:
293 if "auto_increment" in default:
294 pass
295 elif col_type.startswith("timestamp") and default.startswith(
296 "C"
297 ):
298 line.append("DEFAULT")
299 line.append(default)
300 elif default == "NULL":
301 line.append("DEFAULT")
302 line.append(default)
303 else:
304 line.append("DEFAULT")
305 line.append("'%s'" % default.replace("'", "''"))
306 if extra:
307 line.append(extra)
308
309 buffer.append(" ".join(line))
310
311 return "".join(
312 [
313 (
314 "CREATE TABLE %s (\n"
315 % self.preparer.quote_identifier(table_name)
316 ),
317 ",\n".join(buffer),
318 "\n) ",
319 ]
320 )
321
322 def _parse_keyexprs(self, identifiers):
323 """Unpack '"col"(2),"col" ASC'-ish strings into components."""
324
325 return [
326 (colname, int(length) if length else None, modifiers)
327 for colname, length, modifiers in self._re_keyexprs.findall(
328 identifiers
329 )
330 ]
331
332 def _prep_regexes(self):
333 """Pre-compile regular expressions."""
334
335 self._re_columns = []
336 self._pr_options = []
337
338 _final = self.preparer.final_quote
339
340 quotes = dict(
341 zip(
342 ("iq", "fq", "esc_fq"),
343 [
344 re.escape(s)
345 for s in (
346 self.preparer.initial_quote,
347 _final,
348 self.preparer._escape_identifier(_final),
349 )
350 ],
351 )
352 )
353
354 self._pr_name = _pr_compile(
355 r"^CREATE (?:\w+ +)?TABLE +"
356 r"%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s +\($" % quotes,
357 self.preparer._unescape_identifier,
358 )
359
360 self._re_is_view = _re_compile(r"^CREATE(?! TABLE)(\s.*)?\sVIEW")
361
362 # `col`,`col2`(32),`col3`(15) DESC
363 #
364 self._re_keyexprs = _re_compile(
365 r"(?:"
366 r"(?:%(iq)s((?:%(esc_fq)s|[^%(fq)s])+)%(fq)s)"
367 r"(?:\((\d+)\))?(?: +(ASC|DESC))?(?=\,|$))+" % quotes
368 )
369
370 # 'foo' or 'foo','bar' or 'fo,o','ba''a''r'
371 self._re_csv_str = _re_compile(r"\x27(?:\x27\x27|[^\x27])*\x27")
372
373 # 123 or 123,456
374 self._re_csv_int = _re_compile(r"\d+")
375
376 # `colname` <type> [type opts]
377 # (NOT NULL | NULL)
378 # DEFAULT ('value' | CURRENT_TIMESTAMP...)
379 # COMMENT 'comment'
380 # COLUMN_FORMAT (FIXED|DYNAMIC|DEFAULT)
381 # STORAGE (DISK|MEMORY)
382 self._re_column = _re_compile(
383 r" "
384 r"%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s +"
385 r"(?P<coltype>\w+)"
386 r"(?:\((?P<arg>(?:\d+|\d+,\d+|"
387 r"(?:'(?:''|[^'])*',?)+))\))?"
388 r"(?: +(?P<unsigned>UNSIGNED))?"
389 r"(?: +(?P<zerofill>ZEROFILL))?"
390 r"(?: +CHARACTER SET +(?P<charset>[\w_]+))?"
391 r"(?: +COLLATE +(?P<collate>[\w_]+))?"
392 r"(?: +(?P<notnull>(?:NOT )?NULL))?"
393 r"(?: +DEFAULT +(?P<default>"
394 r"(?:NULL|'(?:''|[^'])*'|[\-\w\.\(\)]+"
395 r"(?: +ON UPDATE [\-\w\.\(\)]+)?)"
396 r"))?"
397 r"(?: +(?:GENERATED ALWAYS)? ?AS +(?P<generated>\("
398 r".*\))? ?(?P<persistence>VIRTUAL|STORED)?)?"
399 r"(?: +(?P<autoincr>AUTO_INCREMENT))?"
400 r"(?: +COMMENT +'(?P<comment>(?:''|[^'])*)')?"
401 r"(?: +COLUMN_FORMAT +(?P<colfmt>\w+))?"
402 r"(?: +STORAGE +(?P<storage>\w+))?"
403 r"(?: +(?P<extra>.*))?"
404 r",?$" % quotes
405 )
406
407 # Fallback, try to parse as little as possible
408 self._re_column_loose = _re_compile(
409 r" "
410 r"%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s +"
411 r"(?P<coltype>\w+)"
412 r"(?:\((?P<arg>(?:\d+|\d+,\d+|\x27(?:\x27\x27|[^\x27])+\x27))\))?"
413 r".*?(?P<notnull>(?:NOT )NULL)?" % quotes
414 )
415
416 # (PRIMARY|UNIQUE|FULLTEXT|SPATIAL) INDEX `name` (USING (BTREE|HASH))?
417 # (`col` (ASC|DESC)?, `col` (ASC|DESC)?)
418 # KEY_BLOCK_SIZE size | WITH PARSER name /*!50100 WITH PARSER name */
419 self._re_key = _re_compile(
420 r" "
421 r"(?:(?P<type>\S+) )?KEY"
422 r"(?: +%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s)?"
423 r"(?: +USING +(?P<using_pre>\S+))?"
424 r" +\((?P<columns>.+?)\)"
425 r"(?: +USING +(?P<using_post>\S+))?"
426 r"(?: +KEY_BLOCK_SIZE *[ =]? *(?P<keyblock>\S+))?"
427 r"(?: +WITH PARSER +(?P<parser>\S+))?"
428 r"(?: +COMMENT +(?P<comment>(\x27\x27|\x27([^\x27])*?\x27)+))?"
429 r"(?: +/\*(?P<version_sql>.+)\*/ *)?"
430 r",?$" % quotes
431 )
432
433 # https://forums.mysql.com/read.php?20,567102,567111#msg-567111
434 # It means if the MySQL version >= \d+, execute what's in the comment
435 self._re_key_version_sql = _re_compile(
436 r"\!\d+ " r"(?: *WITH PARSER +(?P<parser>\S+) *)?"
437 )
438
439 # CONSTRAINT `name` FOREIGN KEY (`local_col`)
440 # REFERENCES `remote` (`remote_col`)
441 # MATCH FULL | MATCH PARTIAL | MATCH SIMPLE
442 # ON DELETE CASCADE ON UPDATE RESTRICT
443 #
444 # unique constraints come back as KEYs
445 kw = quotes.copy()
446 kw["on"] = "RESTRICT|CASCADE|SET NULL|NO ACTION"
447 self._re_fk_constraint = _re_compile(
448 r" "
449 r"CONSTRAINT +"
450 r"%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s +"
451 r"FOREIGN KEY +"
452 r"\((?P<local>[^\)]+?)\) REFERENCES +"
453 r"(?P<table>%(iq)s[^%(fq)s]+%(fq)s"
454 r"(?:\.%(iq)s[^%(fq)s]+%(fq)s)?) +"
455 r"\((?P<foreign>[^\)]+?)\)"
456 r"(?: +(?P<match>MATCH \w+))?"
457 r"(?: +ON DELETE (?P<ondelete>%(on)s))?"
458 r"(?: +ON UPDATE (?P<onupdate>%(on)s))?" % kw
459 )
460
461 # CONSTRAINT `CONSTRAINT_1` CHECK (`x` > 5)'
462 # testing on MariaDB 10.2 shows that the CHECK constraint
463 # is returned on a line by itself, so to match without worrying
464 # about parenthesis in the expression we go to the end of the line
465 self._re_ck_constraint = _re_compile(
466 r" "
467 r"CONSTRAINT +"
468 r"%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s +"
469 r"CHECK +"
470 r"\((?P<sqltext>.+)\),?" % kw
471 )
472
473 # PARTITION
474 #
475 # punt!
476 self._re_partition = _re_compile(r"(?:.*)(?:SUB)?PARTITION(?:.*)")
477
478 # Table-level options (COLLATE, ENGINE, etc.)
479 # Do the string options first, since they have quoted
480 # strings we need to get rid of.
481 for option in _options_of_type_string:
482 self._add_option_string(option)
483
484 for option in (
485 "ENGINE",
486 "TYPE",
487 "AUTO_INCREMENT",
488 "AVG_ROW_LENGTH",
489 "CHARACTER SET",
490 "DEFAULT CHARSET",
491 "CHECKSUM",
492 "COLLATE",
493 "DELAY_KEY_WRITE",
494 "INSERT_METHOD",
495 "MAX_ROWS",
496 "MIN_ROWS",
497 "PACK_KEYS",
498 "ROW_FORMAT",
499 "KEY_BLOCK_SIZE",
500 ):
501 self._add_option_word(option)
502
503 self._add_option_regex("UNION", r"\([^\)]+\)")
504 self._add_option_regex("TABLESPACE", r".*? STORAGE DISK")
505 self._add_option_regex(
506 "RAID_TYPE",
507 r"\w+\s+RAID_CHUNKS\s*\=\s*\w+RAID_CHUNKSIZE\s*=\s*\w+",
508 )
509
510 _optional_equals = r"(?:\s*(?:=\s*)|\s+)"
511
512 def _add_option_string(self, directive):
513 regex = r"(?P<directive>%s)%s" r"'(?P<val>(?:[^']|'')*?)'(?!')" % (
514 re.escape(directive),
515 self._optional_equals,
516 )
517 self._pr_options.append(
518 _pr_compile(
519 regex, lambda v: v.replace("\\\\", "\\").replace("''", "'")
520 )
521 )
522
523 def _add_option_word(self, directive):
524 regex = r"(?P<directive>%s)%s" r"(?P<val>\w+)" % (
525 re.escape(directive),
526 self._optional_equals,
527 )
528 self._pr_options.append(_pr_compile(regex))
529
530 def _add_option_regex(self, directive, regex):
531 regex = r"(?P<directive>%s)%s" r"(?P<val>%s)" % (
532 re.escape(directive),
533 self._optional_equals,
534 regex,
535 )
536 self._pr_options.append(_pr_compile(regex))
537
538
539_options_of_type_string = (
540 "COMMENT",
541 "DATA DIRECTORY",
542 "INDEX DIRECTORY",
543 "PASSWORD",
544 "CONNECTION",
545)
546
547
548def _pr_compile(regex, cleanup=None):
549 """Prepare a 2-tuple of compiled regex and callable."""
550
551 return (_re_compile(regex), cleanup)
552
553
554def _re_compile(regex):
555 """Compile a string to regex, I and UNICODE."""
556
557 return re.compile(regex, re.I | re.UNICODE)
558
559
560def _strip_values(values):
561 "Strip reflected values quotes"
562 strip_values = []
563 for a in values:
564 if a[0:1] == '"' or a[0:1] == "'":
565 # strip enclosing quotes and unquote interior
566 a = a[1:-1].replace(a[0] * 2, a[0])
567 strip_values.append(a)
568 return strip_values