1# -*- coding: utf-8 -*-
2#
3# Copyright (C) 2007-2009 Edgewall Software
4# All rights reserved.
5#
6# This software is licensed as described in the file COPYING, which
7# you should have received as part of this distribution. The terms
8# are also available at http://genshi.edgewall.org/wiki/License.
9#
10# This software consists of voluntary contributions made by many
11# individuals. For the exact contribution history, see the revision
12# history and logs, available at http://genshi.edgewall.org/log/.
13
14"""String interpolation routines, i.e. the splitting up a given text into some
15parts that are literal strings, and others that are Python expressions.
16"""
17
18from itertools import chain
19import re
20from tokenize import PseudoToken
21
22from genshi.core import TEXT
23from genshi.template.base import TemplateSyntaxError, EXPR
24from genshi.template.eval import Expression
25
26__all__ = ['interpolate']
27__docformat__ = 'restructuredtext en'
28
29NAMESTART = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'
30NAMECHARS = NAMESTART + '.0123456789'
31PREFIX = '$'
32
33token_re = re.compile('(?s)%s|%s' % (
34 r'[uU]?[rR]?("""|\'\'\')((?<!\\)\\\1|.)*?\1',
35 PseudoToken
36))
37
38
39def interpolate(text, filepath=None, lineno=-1, offset=0, lookup='strict'):
40 """Parse the given string and extract expressions.
41
42 This function is a generator that yields `TEXT` events for literal strings,
43 and `EXPR` events for expressions, depending on the results of parsing the
44 string.
45
46 >>> for kind, data, pos in interpolate("hey ${foo}bar"):
47 ... print('%s %r' % (kind, data))
48 TEXT 'hey '
49 EXPR Expression('foo')
50 TEXT 'bar'
51
52 :param text: the text to parse
53 :param filepath: absolute path to the file in which the text was found
54 (optional)
55 :param lineno: the line number at which the text was found (optional)
56 :param offset: the column number at which the text starts in the source
57 (optional)
58 :param lookup: the variable lookup mechanism; either "lenient" (the
59 default), "strict", or a custom lookup class
60 :return: a list of `TEXT` and `EXPR` events
61 :raise TemplateSyntaxError: when a syntax error in an expression is
62 encountered
63 """
64 pos = [filepath, lineno, offset]
65
66 textbuf = []
67 textpos = None
68 for is_expr, chunk in chain(lex(text, pos, filepath), [(True, '')]):
69 if is_expr:
70 if textbuf:
71 yield TEXT, ''.join(textbuf), textpos
72 del textbuf[:]
73 textpos = None
74 if chunk:
75 try:
76 expr = Expression(chunk.strip(), pos[0], pos[1],
77 lookup=lookup)
78 yield EXPR, expr, tuple(pos)
79 except SyntaxError as err:
80 raise TemplateSyntaxError(err, filepath, pos[1],
81 pos[2] + (err.offset or 0))
82 else:
83 textbuf.append(chunk)
84 if textpos is None:
85 textpos = tuple(pos)
86
87 if '\n' in chunk:
88 lines = chunk.splitlines()
89 pos[1] += len(lines) - 1
90 pos[2] += len(lines[-1])
91 else:
92 pos[2] += len(chunk)
93
94
95def lex(text, textpos, filepath):
96 offset = pos = 0
97 end = len(text)
98 escaped = False
99
100 while 1:
101 if escaped:
102 offset = text.find(PREFIX, offset + 2)
103 escaped = False
104 else:
105 offset = text.find(PREFIX, pos)
106 if offset < 0 or offset == end - 1:
107 break
108 next = text[offset + 1]
109
110 if next == '{':
111 if offset > pos:
112 yield False, text[pos:offset]
113 pos = offset + 2
114 level = 1
115 while level:
116 match = token_re.match(text, pos)
117 if match is None or not match.group():
118 # if there isn't a match or the match is the empty
119 # string, we're not going to match up braces ever
120 raise TemplateSyntaxError('invalid syntax', filepath,
121 *textpos[1:])
122 pos = match.end()
123 tstart, tend = match.regs[3]
124 token = text[tstart:tend]
125 if token == '{':
126 level += 1
127 elif token == '}':
128 level -= 1
129 yield True, text[offset + 2:pos - 1]
130
131 elif next in NAMESTART:
132 if offset > pos:
133 yield False, text[pos:offset]
134 pos = offset
135 pos += 1
136 while pos < end:
137 char = text[pos]
138 if char not in NAMECHARS:
139 break
140 pos += 1
141 yield True, text[offset + 1:pos].strip()
142
143 elif not escaped and next == PREFIX:
144 if offset > pos:
145 yield False, text[pos:offset]
146 escaped = True
147 pos = offset + 1
148
149 else:
150 yield False, text[pos:offset + 1]
151 pos = offset + 1
152
153 if pos < end:
154 yield False, text[pos:]