1#
2# Copyright (C) 2009-2020 the sqlparse authors and contributors
3# <see AUTHORS file>
4#
5# This module is part of python-sqlparse and is released under
6# the BSD License: https://opensource.org/licenses/BSD-3-Clause
7
8from sqlparse import sql
9from sqlparse import tokens as T
10
11
12class StatementSplitter:
13 """Filter that split stream at individual statements"""
14
15 def __init__(self):
16 self._reset()
17
18 def _reset(self):
19 """Set the filter attributes to its default values"""
20 self._block_stack = []
21 self._parenthesis_level = 0
22 self._unconfirmed_start = None
23 self._is_create = False
24 self._seen_begin = False
25
26 self.consume_ws = False
27 self.tokens = []
28 self.level = 0
29
30 def _handle_nested_block(self, unified):
31 """Check for nested loop or control structures inside a block"""
32 if unified == 'FOR':
33 self._unconfirmed_start = 'FOR'
34 return 0
35 if unified == 'WHILE':
36 self._unconfirmed_start = 'WHILE'
37 return 0
38 if unified in ('LOOP', 'DO'):
39 if self._unconfirmed_start in ('FOR', 'WHILE'):
40 self._block_stack.append(self._unconfirmed_start)
41 self._unconfirmed_start = None
42 return 1
43 if unified == 'LOOP':
44 self._block_stack.append('LOOP')
45 return 1
46 if unified in ('IF', 'CASE'):
47 self._block_stack.append(unified)
48 return 1
49 return None
50
51 def _handle_closing_keyword(self, unified):
52 """Handle closing keywords for blocks"""
53 if unified == 'END IF':
54 if self._block_stack and self._block_stack[-1] == 'IF':
55 self._block_stack.pop()
56 return -1
57 elif unified == 'END FOR':
58 if self._block_stack and self._block_stack[-1] == 'FOR':
59 self._block_stack.pop()
60 return -1
61 elif unified == 'END WHILE':
62 if self._block_stack and self._block_stack[-1] == 'WHILE':
63 self._block_stack.pop()
64 return -1
65 elif unified == 'END LOOP':
66 if (self._block_stack and
67 self._block_stack[-1] in ('LOOP', 'FOR', 'WHILE')):
68 self._block_stack.pop()
69 return -1
70 elif unified == 'END CASE':
71 if self._block_stack and self._block_stack[-1] == 'CASE':
72 self._block_stack.pop()
73 return -1
74 elif unified == 'END':
75 if self._block_stack:
76 self._block_stack.pop()
77 return -1
78 return 0
79
80 def _change_splitlevel(self, ttype, value):
81 """Get the new split level (increase, decrease or remain equal)"""
82
83 # Semicolon resets unconfirmed loop starters
84 # and handles standalone BEGIN;
85 if ttype is T.Punctuation and value == ';':
86 self._unconfirmed_start = None
87 if self._seen_begin:
88 self._seen_begin = False
89 if self._block_stack and self._block_stack[-1] == 'BEGIN':
90 self._block_stack.pop()
91 return -1
92 return 0
93
94 # parenthesis increase/decrease a level
95 if ttype is T.Punctuation and value == '(':
96 self._parenthesis_level += 1
97 return 1
98 elif ttype is T.Punctuation and value == ')':
99 self._parenthesis_level = max(0, self._parenthesis_level - 1)
100 return -1
101 elif ttype not in T.Keyword: # if normal token return
102 return 0
103
104 # Everything after here is ttype = T.Keyword
105 unified = value.upper()
106
107 # DDL Create though can contain more words such as "or replace"
108 if ttype is T.Keyword.DDL and unified.startswith('CREATE'):
109 self._is_create = True
110 return 0
111
112 # Handle DECLARE block start (only for CREATE statements)
113 if unified == 'DECLARE' and self._is_create and not self._block_stack:
114 self._block_stack.append('DECLARE')
115 return 1
116
117 # Handle BEGIN block start
118 if unified == 'BEGIN':
119 self._seen_begin = True
120 # Transition DECLARE to BEGIN if present
121 if self._block_stack and self._block_stack[-1] == 'DECLARE':
122 self._block_stack.pop()
123 self._block_stack.append('BEGIN')
124 return 0
125 else:
126 self._block_stack.append('BEGIN')
127 return 1
128
129 # Issue826: If we see a transaction keyword after BEGIN,
130 # it's a transaction statement, not a block.
131 if self._seen_begin and \
132 (ttype is T.Keyword or ttype is T.Name) and \
133 unified in ('TRANSACTION', 'WORK', 'TRAN',
134 'DISTRIBUTED', 'DEFERRED',
135 'IMMEDIATE', 'EXCLUSIVE'):
136 self._seen_begin = False
137 if self._block_stack and self._block_stack[-1] == 'BEGIN':
138 self._block_stack.pop()
139 return -1
140 return 0
141
142 # Inside a block, check for nested loop or control structures
143 if 'BEGIN' in self._block_stack:
144 res = self._handle_nested_block(unified)
145 if res is not None:
146 return res
147
148 # Handle closing keywords
149 return self._handle_closing_keyword(unified)
150
151 def process(self, stream):
152 """Process the stream"""
153 EOS_TTYPE = T.Whitespace, T.Comment.Single
154
155 # Run over all stream tokens
156 for ttype, value in stream:
157 # Yield token if we finished a statement and there's no whitespaces
158 # It will count newline token as a non whitespace. In this context
159 # whitespace ignores newlines.
160 # why don't multi line comments also count?
161 if self.consume_ws and ttype not in EOS_TTYPE:
162 yield sql.Statement(self.tokens)
163
164 # Reset filter and prepare to process next statement
165 self._reset()
166
167 # Change current split level (increase, decrease or remain equal)
168 self.level += self._change_splitlevel(ttype, value)
169
170 # Append the token to the current statement
171 self.tokens.append(sql.Token(ttype, value))
172
173 # Check if we get the end of a statement
174 # Issue762: Allow GO (or "GO 2") as statement splitter.
175 # When implementing a language toggle, it's not only to add
176 # keywords it's also to change some rules, like this splitting
177 # rule.
178 # Issue809: Ignore semicolons inside BEGIN...END blocks, but handle
179 # standalone BEGIN; as a transaction statement
180 if ttype is T.Punctuation and value == ';':
181 self._seen_begin = False
182 # Split on semicolon if not inside a BEGIN...END block
183 if self.level <= 0 and 'BEGIN' not in self._block_stack:
184 self.consume_ws = True
185 elif ttype is T.Keyword and value.split()[0] == 'GO':
186 self.consume_ws = True
187 elif (ttype not in (T.Whitespace, T.Newline, T.Comment.Single,
188 T.Comment.Multiline)
189 and not (ttype is T.Keyword and value.upper() == 'BEGIN')):
190 # Reset _seen_begin if we see a non-whitespace, non-comment
191 # token but not for BEGIN itself (which just set the flag)
192 self._seen_begin = False
193
194 # Yield pending statement (if any)
195 if self.tokens and not all(t.is_whitespace for t in self.tokens):
196 yield sql.Statement(self.tokens)