1# -*- coding: utf-8 -*- 
    2# 
    3# Copyright (C) 2007-2009 Edgewall Software 
    4# All rights reserved. 
    5# 
    6# This software is licensed as described in the file COPYING, which 
    7# you should have received as part of this distribution. The terms 
    8# are also available at http://genshi.edgewall.org/wiki/License. 
    9# 
    10# This software consists of voluntary contributions made by many 
    11# individuals. For the exact contribution history, see the revision 
    12# history and logs, available at http://genshi.edgewall.org/log/. 
    13 
    14"""String interpolation routines, i.e. the splitting up a given text into some 
    15parts that are literal strings, and others that are Python expressions. 
    16""" 
    17 
    18from itertools import chain 
    19import re 
    20from tokenize import PseudoToken 
    21 
    22from genshi.core import TEXT 
    23from genshi.template.base import TemplateSyntaxError, EXPR 
    24from genshi.template.eval import Expression 
    25 
    26__all__ = ['interpolate'] 
    27__docformat__ = 'restructuredtext en' 
    28 
    29NAMESTART = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_' 
    30NAMECHARS = NAMESTART + '.0123456789' 
    31PREFIX = '$' 
    32 
    33token_re = re.compile('(?s)%s|%s' % ( 
    34    r'[uU]?[rR]?("""|\'\'\')((?<!\\)\\\1|.)*?\1', 
    35    PseudoToken 
    36)) 
    37 
    38 
    39def interpolate(text, filepath=None, lineno=-1, offset=0, lookup='strict'): 
    40    """Parse the given string and extract expressions. 
    41     
    42    This function is a generator that yields `TEXT` events for literal strings, 
    43    and `EXPR` events for expressions, depending on the results of parsing the 
    44    string. 
    45     
    46    >>> for kind, data, pos in interpolate("hey ${foo}bar"): 
    47    ...     print('%s %r' % (kind, data)) 
    48    TEXT 'hey ' 
    49    EXPR Expression('foo') 
    50    TEXT 'bar' 
    51     
    52    :param text: the text to parse 
    53    :param filepath: absolute path to the file in which the text was found 
    54                     (optional) 
    55    :param lineno: the line number at which the text was found (optional) 
    56    :param offset: the column number at which the text starts in the source 
    57                   (optional) 
    58    :param lookup: the variable lookup mechanism; either "lenient" (the 
    59                   default), "strict", or a custom lookup class 
    60    :return: a list of `TEXT` and `EXPR` events 
    61    :raise TemplateSyntaxError: when a syntax error in an expression is 
    62                                encountered 
    63    """ 
    64    pos = [filepath, lineno, offset] 
    65 
    66    textbuf = [] 
    67    textpos = None 
    68    for is_expr, chunk in chain(lex(text, pos, filepath), [(True, '')]): 
    69        if is_expr: 
    70            if textbuf: 
    71                yield TEXT, ''.join(textbuf), textpos 
    72                del textbuf[:] 
    73                textpos = None 
    74            if chunk: 
    75                try: 
    76                    expr = Expression(chunk.strip(), pos[0], pos[1], 
    77                                      lookup=lookup) 
    78                    yield EXPR, expr, tuple(pos) 
    79                except SyntaxError as err: 
    80                    raise TemplateSyntaxError(err, filepath, pos[1], 
    81                                              pos[2] + (err.offset or 0)) 
    82        else: 
    83            textbuf.append(chunk) 
    84            if textpos is None: 
    85                textpos = tuple(pos) 
    86 
    87        if '\n' in chunk: 
    88            lines = chunk.splitlines() 
    89            pos[1] += len(lines) - 1 
    90            pos[2] += len(lines[-1]) 
    91        else: 
    92            pos[2] += len(chunk) 
    93 
    94 
    95def lex(text, textpos, filepath): 
    96    offset = pos = 0 
    97    end = len(text) 
    98    escaped = False 
    99 
    100    while 1: 
    101        if escaped: 
    102            offset = text.find(PREFIX, offset + 2) 
    103            escaped = False 
    104        else: 
    105            offset = text.find(PREFIX, pos) 
    106        if offset < 0 or offset == end - 1: 
    107            break 
    108        next = text[offset + 1] 
    109 
    110        if next == '{': 
    111            if offset > pos: 
    112                yield False, text[pos:offset] 
    113            pos = offset + 2 
    114            level = 1 
    115            while level: 
    116                match = token_re.match(text, pos) 
    117                if match is None or not match.group(): 
    118                    # if there isn't a match or the match is the empty 
    119                    # string, we're not going to match up braces ever 
    120                    raise TemplateSyntaxError('invalid syntax',  filepath, 
    121                                              *textpos[1:]) 
    122                pos = match.end() 
    123                tstart, tend = match.regs[3] 
    124                token = text[tstart:tend] 
    125                if token == '{': 
    126                    level += 1 
    127                elif token == '}': 
    128                    level -= 1 
    129            yield True, text[offset + 2:pos - 1] 
    130 
    131        elif next in NAMESTART: 
    132            if offset > pos: 
    133                yield False, text[pos:offset] 
    134                pos = offset 
    135            pos += 1 
    136            while pos < end: 
    137                char = text[pos] 
    138                if char not in NAMECHARS: 
    139                    break 
    140                pos += 1 
    141            yield True, text[offset + 1:pos].strip() 
    142 
    143        elif not escaped and next == PREFIX: 
    144            if offset > pos: 
    145                yield False, text[pos:offset] 
    146            escaped = True 
    147            pos = offset + 1 
    148 
    149        else: 
    150            yield False, text[pos:offset + 1] 
    151            pos = offset + 1 
    152 
    153    if pos < end: 
    154        yield False, text[pos:]