Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/sqlparse/lexer.py: 86%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

71 statements  

1# 

2# Copyright (C) 2009-2020 the sqlparse authors and contributors 

3# <see AUTHORS file> 

4# 

5# This module is part of python-sqlparse and is released under 

6# the BSD License: https://opensource.org/licenses/BSD-3-Clause 

7 

8"""SQL Lexer""" 

9import re 

10 

11# This code is based on the SqlLexer in pygments. 

12# http://pygments.org/ 

13# It's separated from the rest of pygments to increase performance 

14# and to allow some customizations. 

15from io import TextIOBase 

16from threading import Lock 

17 

18from sqlparse import keywords, tokens 

19from sqlparse.utils import consume 

20 

21 

22class Lexer: 

23 """The Lexer supports configurable syntax. 

24 To add support for additional keywords, use the `add_keywords` method.""" 

25 

26 _default_instance = None 

27 _lock = Lock() 

28 

29 # Development notes: 

30 # - This class is prepared to be able to support additional SQL dialects 

31 # in the future by adding additional functions that take the place of 

32 # the function default_initialization(). 

33 # - The lexer class uses an explicit singleton behavior with the 

34 # instance-getter method get_default_instance(). This mechanism has 

35 # the advantage that the call signature of the entry-points to the 

36 # sqlparse library are not affected. Also, usage of sqlparse in third 

37 # party code does not need to be adapted. On the other hand, the current 

38 # implementation does not easily allow for multiple SQL dialects to be 

39 # parsed in the same process. 

40 # Such behavior can be supported in the future by passing a 

41 # suitably initialized lexer object as an additional parameter to the 

42 # entry-point functions (such as `parse`). Code will need to be written 

43 # to pass down and utilize such an object. The current implementation 

44 # is prepared to support this thread safe approach without the 

45 # default_instance part needing to change interface. 

46 

47 @classmethod 

48 def get_default_instance(cls): 

49 """Returns the lexer instance used internally 

50 by the sqlparse core functions.""" 

51 with cls._lock: 

52 if cls._default_instance is None: 

53 cls._default_instance = cls() 

54 cls._default_instance.default_initialization() 

55 return cls._default_instance 

56 

57 def default_initialization(self): 

58 """Initialize the lexer with default dictionaries. 

59 Useful if you need to revert custom syntax settings.""" 

60 self.clear() 

61 self.set_SQL_REGEX(keywords.SQL_REGEX) 

62 self.add_keywords(keywords.KEYWORDS_COMMON) 

63 self.add_keywords(keywords.KEYWORDS_ORACLE) 

64 self.add_keywords(keywords.KEYWORDS_MYSQL) 

65 self.add_keywords(keywords.KEYWORDS_PLPGSQL) 

66 self.add_keywords(keywords.KEYWORDS_HQL) 

67 self.add_keywords(keywords.KEYWORDS_MSACCESS) 

68 self.add_keywords(keywords.KEYWORDS_SNOWFLAKE) 

69 self.add_keywords(keywords.KEYWORDS_BIGQUERY) 

70 self.add_keywords(keywords.KEYWORDS) 

71 

72 def clear(self): 

73 """Clear all syntax configurations. 

74 Useful if you want to load a reduced set of syntax configurations. 

75 After this call, regexps and keyword dictionaries need to be loaded 

76 to make the lexer functional again.""" 

77 self._SQL_REGEX = [] 

78 self._keywords = [] 

79 

80 def set_SQL_REGEX(self, SQL_REGEX): 

81 """Set the list of regex that will parse the SQL.""" 

82 FLAGS = re.IGNORECASE | re.UNICODE 

83 self._SQL_REGEX = [ 

84 (re.compile(rx, FLAGS).match, tt) 

85 for rx, tt in SQL_REGEX 

86 ] 

87 

88 def add_keywords(self, keywords): 

89 """Add keyword dictionaries. Keywords are looked up in the same order 

90 that dictionaries were added.""" 

91 self._keywords.append(keywords) 

92 

93 def is_keyword(self, value): 

94 """Checks for a keyword. 

95 

96 If the given value is in one of the KEYWORDS_* dictionary 

97 it's considered a keyword. Otherwise, tokens.Name is returned. 

98 """ 

99 val = value.upper() 

100 for kwdict in self._keywords: 

101 if val in kwdict: 

102 return kwdict[val], value 

103 else: 

104 return tokens.Name, value 

105 

106 def get_tokens(self, text, encoding=None): 

107 """ 

108 Return an iterable of (tokentype, value) pairs generated from 

109 `text`. If `unfiltered` is set to `True`, the filtering mechanism 

110 is bypassed even if filters are defined. 

111 

112 Also preprocess the text, i.e. expand tabs and strip it if 

113 wanted and applies registered filters. 

114 

115 Split ``text`` into (tokentype, text) pairs. 

116 

117 ``stack`` is the initial stack (default: ``['root']``) 

118 """ 

119 if isinstance(text, TextIOBase): 

120 text = text.read() 

121 

122 if isinstance(text, str): 

123 pass 

124 elif isinstance(text, bytes): 

125 if encoding: 

126 text = text.decode(encoding) 

127 else: 

128 try: 

129 text = text.decode('utf-8') 

130 except UnicodeDecodeError: 

131 text = text.decode('unicode-escape') 

132 else: 

133 raise TypeError(f"Expected text or file-like object, got {type(text)!r}") 

134 

135 iterable = enumerate(text) 

136 for pos, char in iterable: 

137 for rexmatch, action in self._SQL_REGEX: 

138 m = rexmatch(text, pos) 

139 

140 if not m: 

141 continue 

142 elif isinstance(action, tokens._TokenType): 

143 yield action, m.group() 

144 elif action is keywords.PROCESS_AS_KEYWORD: 

145 yield self.is_keyword(m.group()) 

146 

147 consume(iterable, m.end() - pos - 1) 

148 break 

149 else: 

150 yield tokens.Error, char 

151 

152 

153def tokenize(sql, encoding=None): 

154 """Tokenize sql. 

155 

156 Tokenize *sql* using the :class:`Lexer` and return a 2-tuple stream 

157 of ``(token type, value)`` items. 

158 """ 

159 return Lexer.get_default_instance().get_tokens(sql, encoding)