1#
2# Copyright (C) 2009-2020 the sqlparse authors and contributors
3# <see AUTHORS file>
4#
5# This module is part of python-sqlparse and is released under
6# the BSD License: https://opensource.org/licenses/BSD-3-Clause
7
8from sqlparse import sql, tokens as T
9
10
11class StatementSplitter:
12 """Filter that split stream at individual statements"""
13
14 def __init__(self):
15 self._reset()
16
17 def _reset(self):
18 """Set the filter attributes to its default values"""
19 self._in_declare = False
20 self._in_case = False
21 self._is_create = False
22 self._begin_depth = 0
23 self._seen_begin = False
24
25 self.consume_ws = False
26 self.tokens = []
27 self.level = 0
28
29 def _change_splitlevel(self, ttype, value):
30 """Get the new split level (increase, decrease or remain equal)"""
31
32 # parenthesis increase/decrease a level
33 if ttype is T.Punctuation and value == '(':
34 return 1
35 elif ttype is T.Punctuation and value == ')':
36 return -1
37 elif ttype not in T.Keyword: # if normal token return
38 return 0
39
40 # Everything after here is ttype = T.Keyword
41 # Also to note, once entered an If statement you are done and basically
42 # returning
43 unified = value.upper()
44
45 # three keywords begin with CREATE, but only one of them is DDL
46 # DDL Create though can contain more words such as "or replace"
47 if ttype is T.Keyword.DDL and unified.startswith('CREATE'):
48 self._is_create = True
49 return 0
50
51 # can have nested declare inside of being...
52 if unified == 'DECLARE' and self._is_create and self._begin_depth == 0:
53 self._in_declare = True
54 return 1
55
56 if unified == 'BEGIN':
57 self._begin_depth += 1
58 self._seen_begin = True
59 if self._is_create:
60 # FIXME(andi): This makes no sense. ## this comment neither
61 return 1
62 return 0
63
64 # Issue826: If we see a transaction keyword after BEGIN,
65 # it's a transaction statement, not a block.
66 if self._seen_begin and \
67 (ttype is T.Keyword or ttype is T.Name) and \
68 unified in ('TRANSACTION', 'WORK', 'TRAN',
69 'DISTRIBUTED', 'DEFERRED',
70 'IMMEDIATE', 'EXCLUSIVE'):
71 self._begin_depth = max(0, self._begin_depth - 1)
72 self._seen_begin = False
73 return 0
74
75 # BEGIN and CASE/WHEN both end with END
76 if unified == 'END':
77 if not self._in_case:
78 self._begin_depth = max(0, self._begin_depth - 1)
79 else:
80 self._in_case = False
81 return -1
82
83 if (unified in ('IF', 'FOR', 'WHILE', 'CASE')
84 and self._is_create and self._begin_depth > 0):
85 if unified == 'CASE':
86 self._in_case = True
87 return 1
88
89 if unified in ('END IF', 'END FOR', 'END WHILE'):
90 return -1
91
92 # Default
93 return 0
94
95 def process(self, stream):
96 """Process the stream"""
97 EOS_TTYPE = T.Whitespace, T.Comment.Single
98
99 # Run over all stream tokens
100 for ttype, value in stream:
101 # Yield token if we finished a statement and there's no whitespaces
102 # It will count newline token as a non whitespace. In this context
103 # whitespace ignores newlines.
104 # why don't multi line comments also count?
105 if self.consume_ws and ttype not in EOS_TTYPE:
106 yield sql.Statement(self.tokens)
107
108 # Reset filter and prepare to process next statement
109 self._reset()
110
111 # Change current split level (increase, decrease or remain equal)
112 self.level += self._change_splitlevel(ttype, value)
113
114 # Append the token to the current statement
115 self.tokens.append(sql.Token(ttype, value))
116
117 # Check if we get the end of a statement
118 # Issue762: Allow GO (or "GO 2") as statement splitter.
119 # When implementing a language toggle, it's not only to add
120 # keywords it's also to change some rules, like this splitting
121 # rule.
122 # Issue809: Ignore semicolons inside BEGIN...END blocks, but handle
123 # standalone BEGIN; as a transaction statement
124 if ttype is T.Punctuation and value == ';':
125 # If we just saw BEGIN; then this is a transaction BEGIN,
126 # not a BEGIN...END block, so decrement depth
127 if self._seen_begin:
128 self._begin_depth = max(0, self._begin_depth - 1)
129 self._seen_begin = False
130 # Split on semicolon if not inside a BEGIN...END block
131 if self.level <= 0 and self._begin_depth == 0:
132 self.consume_ws = True
133 elif ttype is T.Keyword and value.split()[0] == 'GO':
134 self.consume_ws = True
135 elif (ttype not in (T.Whitespace, T.Newline, T.Comment.Single,
136 T.Comment.Multiline)
137 and not (ttype is T.Keyword and value.upper() == 'BEGIN')):
138 # Reset _seen_begin if we see a non-whitespace, non-comment
139 # token but not for BEGIN itself (which just set the flag)
140 self._seen_begin = False
141
142 # Yield pending statement (if any)
143 if self.tokens and not all(t.is_whitespace for t in self.tokens):
144 yield sql.Statement(self.tokens)