1# 
    2# Copyright (C) 2009-2020 the sqlparse authors and contributors 
    3# <see AUTHORS file> 
    4# 
    5# This module is part of python-sqlparse and is released under 
    6# the BSD License: https://opensource.org/licenses/BSD-3-Clause 
    7 
    8import re 
    9 
    10from sqlparse import sql, tokens as T 
    11from sqlparse.utils import split_unquoted_newlines 
    12 
    13 
    14class StripCommentsFilter: 
    15 
    16    @staticmethod 
    17    def _process(tlist): 
    18        def get_next_comment(idx=-1): 
    19            # TODO(andi) Comment types should be unified, see related issue38 
    20            return tlist.token_next_by(i=sql.Comment, t=T.Comment, idx=idx) 
    21 
    22        def _get_insert_token(token): 
    23            """Returns either a whitespace or the line breaks from token.""" 
    24            # See issue484 why line breaks should be preserved. 
    25            # Note: The actual value for a line break is replaced by \n 
    26            # in SerializerUnicode which will be executed in the 
    27            # postprocessing state. 
    28            m = re.search(r'([\r\n]+) *$', token.value) 
    29            if m is not None: 
    30                return sql.Token(T.Whitespace.Newline, m.groups()[0]) 
    31            else: 
    32                return sql.Token(T.Whitespace, ' ') 
    33 
    34        sql_hints = (T.Comment.Multiline.Hint, T.Comment.Single.Hint) 
    35        tidx, token = get_next_comment() 
    36        while token: 
    37            # skipping token remove if token is a SQL-Hint. issue262 
    38            is_sql_hint = False 
    39            if token.ttype in sql_hints: 
    40                is_sql_hint = True 
    41            elif isinstance(token, sql.Comment): 
    42                comment_tokens = token.tokens 
    43                if len(comment_tokens) > 0: 
    44                    if comment_tokens[0].ttype in sql_hints: 
    45                        is_sql_hint = True 
    46 
    47            if is_sql_hint: 
    48                # using current index as start index to search next token for 
    49                # preventing infinite loop in cases when token type is a 
    50                # "SQL-Hint" and has to be skipped 
    51                tidx, token = get_next_comment(idx=tidx) 
    52                continue 
    53 
    54            pidx, prev_ = tlist.token_prev(tidx, skip_ws=False) 
    55            nidx, next_ = tlist.token_next(tidx, skip_ws=False) 
    56            # Replace by whitespace if prev and next exist and if they're not 
    57            # whitespaces. This doesn't apply if prev or next is a parenthesis. 
    58            if ( 
    59                prev_ is None or next_ is None 
    60                or prev_.is_whitespace or prev_.match(T.Punctuation, '(') 
    61                or next_.is_whitespace or next_.match(T.Punctuation, ')') 
    62            ): 
    63                # Insert a whitespace to ensure the following SQL produces 
    64                # a valid SQL (see #425). 
    65                if prev_ is not None and not prev_.match(T.Punctuation, '('): 
    66                    tlist.tokens.insert(tidx, _get_insert_token(token)) 
    67                tlist.tokens.remove(token) 
    68            else: 
    69                tlist.tokens[tidx] = _get_insert_token(token) 
    70 
    71            # using current index as start index to search next token for 
    72            # preventing infinite loop in cases when token type is a 
    73            # "SQL-Hint" and has to be skipped 
    74            tidx, token = get_next_comment(idx=tidx) 
    75 
    76    def process(self, stmt): 
    77        [self.process(sgroup) for sgroup in stmt.get_sublists()] 
    78        StripCommentsFilter._process(stmt) 
    79        return stmt 
    80 
    81 
    82class StripWhitespaceFilter: 
    83    def _stripws(self, tlist): 
    84        func_name = f'_stripws_{type(tlist).__name__}' 
    85        func = getattr(self, func_name.lower(), self._stripws_default) 
    86        func(tlist) 
    87 
    88    @staticmethod 
    89    def _stripws_default(tlist): 
    90        last_was_ws = False 
    91        is_first_char = True 
    92        for token in tlist.tokens: 
    93            if token.is_whitespace: 
    94                token.value = '' if last_was_ws or is_first_char else ' ' 
    95            last_was_ws = token.is_whitespace 
    96            is_first_char = False 
    97 
    98    def _stripws_identifierlist(self, tlist): 
    99        # Removes newlines before commas, see issue140 
    100        last_nl = None 
    101        for token in list(tlist.tokens): 
    102            if last_nl and token.ttype is T.Punctuation and token.value == ',': 
    103                tlist.tokens.remove(last_nl) 
    104            last_nl = token if token.is_whitespace else None 
    105 
    106            # next_ = tlist.token_next(token, skip_ws=False) 
    107            # if (next_ and not next_.is_whitespace and 
    108            #             token.ttype is T.Punctuation and token.value == ','): 
    109            #     tlist.insert_after(token, sql.Token(T.Whitespace, ' ')) 
    110        return self._stripws_default(tlist) 
    111 
    112    def _stripws_parenthesis(self, tlist): 
    113        while tlist.tokens[1].is_whitespace: 
    114            tlist.tokens.pop(1) 
    115        while tlist.tokens[-2].is_whitespace: 
    116            tlist.tokens.pop(-2) 
    117        if tlist.tokens[-2].is_group: 
    118            # save to remove the last whitespace 
    119            while tlist.tokens[-2].tokens[-1].is_whitespace: 
    120                tlist.tokens[-2].tokens.pop(-1) 
    121        self._stripws_default(tlist) 
    122 
    123    def process(self, stmt, depth=0): 
    124        [self.process(sgroup, depth + 1) for sgroup in stmt.get_sublists()] 
    125        self._stripws(stmt) 
    126        if depth == 0 and stmt.tokens and stmt.tokens[-1].is_whitespace: 
    127            stmt.tokens.pop(-1) 
    128        return stmt 
    129 
    130 
    131class SpacesAroundOperatorsFilter: 
    132    @staticmethod 
    133    def _process(tlist): 
    134 
    135        ttypes = (T.Operator, T.Comparison) 
    136        tidx, token = tlist.token_next_by(t=ttypes) 
    137        while token: 
    138            nidx, next_ = tlist.token_next(tidx, skip_ws=False) 
    139            if next_ and next_.ttype != T.Whitespace: 
    140                tlist.insert_after(tidx, sql.Token(T.Whitespace, ' ')) 
    141 
    142            pidx, prev_ = tlist.token_prev(tidx, skip_ws=False) 
    143            if prev_ and prev_.ttype != T.Whitespace: 
    144                tlist.insert_before(tidx, sql.Token(T.Whitespace, ' ')) 
    145                tidx += 1  # has to shift since token inserted before it 
    146 
    147            # assert tlist.token_index(token) == tidx 
    148            tidx, token = tlist.token_next_by(t=ttypes, idx=tidx) 
    149 
    150    def process(self, stmt): 
    151        [self.process(sgroup) for sgroup in stmt.get_sublists()] 
    152        SpacesAroundOperatorsFilter._process(stmt) 
    153        return stmt 
    154 
    155 
    156class StripTrailingSemicolonFilter: 
    157 
    158    def process(self, stmt): 
    159        while stmt.tokens and (stmt.tokens[-1].is_whitespace 
    160                               or stmt.tokens[-1].value == ';'): 
    161            stmt.tokens.pop() 
    162        return stmt 
    163 
    164 
    165# --------------------------- 
    166# postprocess 
    167 
    168class SerializerUnicode: 
    169    @staticmethod 
    170    def process(stmt): 
    171        lines = split_unquoted_newlines(stmt) 
    172        return '\n'.join(line.rstrip() for line in lines)