Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/sqlparse/lexer.py: 86%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

71 statements  

1# 

2# Copyright (C) 2009-2020 the sqlparse authors and contributors 

3# <see AUTHORS file> 

4# 

5# This module is part of python-sqlparse and is released under 

6# the BSD License: https://opensource.org/licenses/BSD-3-Clause 

7 

8"""SQL Lexer""" 

9import re 

10from threading import Lock 

11 

12# This code is based on the SqlLexer in pygments. 

13# http://pygments.org/ 

14# It's separated from the rest of pygments to increase performance 

15# and to allow some customizations. 

16 

17from io import TextIOBase 

18 

19from sqlparse import tokens, keywords 

20from sqlparse.utils import consume 

21 

22 

23class Lexer: 

24 """The Lexer supports configurable syntax. 

25 To add support for additional keywords, use the `add_keywords` method.""" 

26 

27 _default_instance = None 

28 _lock = Lock() 

29 

30 # Development notes: 

31 # - This class is prepared to be able to support additional SQL dialects 

32 # in the future by adding additional functions that take the place of 

33 # the function default_initialization(). 

34 # - The lexer class uses an explicit singleton behavior with the 

35 # instance-getter method get_default_instance(). This mechanism has 

36 # the advantage that the call signature of the entry-points to the 

37 # sqlparse library are not affected. Also, usage of sqlparse in third 

38 # party code does not need to be adapted. On the other hand, the current 

39 # implementation does not easily allow for multiple SQL dialects to be 

40 # parsed in the same process. 

41 # Such behavior can be supported in the future by passing a 

42 # suitably initialized lexer object as an additional parameter to the 

43 # entry-point functions (such as `parse`). Code will need to be written 

44 # to pass down and utilize such an object. The current implementation 

45 # is prepared to support this thread safe approach without the 

46 # default_instance part needing to change interface. 

47 

48 @classmethod 

49 def get_default_instance(cls): 

50 """Returns the lexer instance used internally 

51 by the sqlparse core functions.""" 

52 with cls._lock: 

53 if cls._default_instance is None: 

54 cls._default_instance = cls() 

55 cls._default_instance.default_initialization() 

56 return cls._default_instance 

57 

58 def default_initialization(self): 

59 """Initialize the lexer with default dictionaries. 

60 Useful if you need to revert custom syntax settings.""" 

61 self.clear() 

62 self.set_SQL_REGEX(keywords.SQL_REGEX) 

63 self.add_keywords(keywords.KEYWORDS_COMMON) 

64 self.add_keywords(keywords.KEYWORDS_ORACLE) 

65 self.add_keywords(keywords.KEYWORDS_MYSQL) 

66 self.add_keywords(keywords.KEYWORDS_PLPGSQL) 

67 self.add_keywords(keywords.KEYWORDS_HQL) 

68 self.add_keywords(keywords.KEYWORDS_MSACCESS) 

69 self.add_keywords(keywords.KEYWORDS_SNOWFLAKE) 

70 self.add_keywords(keywords.KEYWORDS_BIGQUERY) 

71 self.add_keywords(keywords.KEYWORDS) 

72 

73 def clear(self): 

74 """Clear all syntax configurations. 

75 Useful if you want to load a reduced set of syntax configurations. 

76 After this call, regexps and keyword dictionaries need to be loaded 

77 to make the lexer functional again.""" 

78 self._SQL_REGEX = [] 

79 self._keywords = [] 

80 

81 def set_SQL_REGEX(self, SQL_REGEX): 

82 """Set the list of regex that will parse the SQL.""" 

83 FLAGS = re.IGNORECASE | re.UNICODE 

84 self._SQL_REGEX = [ 

85 (re.compile(rx, FLAGS).match, tt) 

86 for rx, tt in SQL_REGEX 

87 ] 

88 

89 def add_keywords(self, keywords): 

90 """Add keyword dictionaries. Keywords are looked up in the same order 

91 that dictionaries were added.""" 

92 self._keywords.append(keywords) 

93 

94 def is_keyword(self, value): 

95 """Checks for a keyword. 

96 

97 If the given value is in one of the KEYWORDS_* dictionary 

98 it's considered a keyword. Otherwise, tokens.Name is returned. 

99 """ 

100 val = value.upper() 

101 for kwdict in self._keywords: 

102 if val in kwdict: 

103 return kwdict[val], value 

104 else: 

105 return tokens.Name, value 

106 

107 def get_tokens(self, text, encoding=None): 

108 """ 

109 Return an iterable of (tokentype, value) pairs generated from 

110 `text`. If `unfiltered` is set to `True`, the filtering mechanism 

111 is bypassed even if filters are defined. 

112 

113 Also preprocess the text, i.e. expand tabs and strip it if 

114 wanted and applies registered filters. 

115 

116 Split ``text`` into (tokentype, text) pairs. 

117 

118 ``stack`` is the initial stack (default: ``['root']``) 

119 """ 

120 if isinstance(text, TextIOBase): 

121 text = text.read() 

122 

123 if isinstance(text, str): 

124 pass 

125 elif isinstance(text, bytes): 

126 if encoding: 

127 text = text.decode(encoding) 

128 else: 

129 try: 

130 text = text.decode('utf-8') 

131 except UnicodeDecodeError: 

132 text = text.decode('unicode-escape') 

133 else: 

134 raise TypeError("Expected text or file-like object, got {!r}". 

135 format(type(text))) 

136 

137 iterable = enumerate(text) 

138 for pos, char in iterable: 

139 for rexmatch, action in self._SQL_REGEX: 

140 m = rexmatch(text, pos) 

141 

142 if not m: 

143 continue 

144 elif isinstance(action, tokens._TokenType): 

145 yield action, m.group() 

146 elif action is keywords.PROCESS_AS_KEYWORD: 

147 yield self.is_keyword(m.group()) 

148 

149 consume(iterable, m.end() - pos - 1) 

150 break 

151 else: 

152 yield tokens.Error, char 

153 

154 

155def tokenize(sql, encoding=None): 

156 """Tokenize sql. 

157 

158 Tokenize *sql* using the :class:`Lexer` and return a 2-tuple stream 

159 of ``(token type, value)`` items. 

160 """ 

161 return Lexer.get_default_instance().get_tokens(sql, encoding)