Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/mako/lexer.py: 89%
252 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:02 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:02 +0000
1# mako/lexer.py
2# Copyright 2006-2023 the Mako authors and contributors <see AUTHORS file>
3#
4# This module is part of Mako and is released under
5# the MIT License: http://www.opensource.org/licenses/mit-license.php
7"""provides the Lexer class for parsing template strings into parse trees."""
9import codecs
10import re
12from mako import exceptions
13from mako import parsetree
14from mako.pygen import adjust_whitespace
16_regexp_cache = {}
19class Lexer:
20 def __init__(
21 self, text, filename=None, input_encoding=None, preprocessor=None
22 ):
23 self.text = text
24 self.filename = filename
25 self.template = parsetree.TemplateNode(self.filename)
26 self.matched_lineno = 1
27 self.matched_charpos = 0
28 self.lineno = 1
29 self.match_position = 0
30 self.tag = []
31 self.control_line = []
32 self.ternary_stack = []
33 self.encoding = input_encoding
35 if preprocessor is None:
36 self.preprocessor = []
37 elif not hasattr(preprocessor, "__iter__"):
38 self.preprocessor = [preprocessor]
39 else:
40 self.preprocessor = preprocessor
42 @property
43 def exception_kwargs(self):
44 return {
45 "source": self.text,
46 "lineno": self.matched_lineno,
47 "pos": self.matched_charpos,
48 "filename": self.filename,
49 }
51 def match(self, regexp, flags=None):
52 """compile the given regexp, cache the reg, and call match_reg()."""
54 try:
55 reg = _regexp_cache[(regexp, flags)]
56 except KeyError:
57 reg = re.compile(regexp, flags) if flags else re.compile(regexp)
58 _regexp_cache[(regexp, flags)] = reg
60 return self.match_reg(reg)
62 def match_reg(self, reg):
63 """match the given regular expression object to the current text
64 position.
66 if a match occurs, update the current text and line position.
68 """
70 mp = self.match_position
72 match = reg.match(self.text, self.match_position)
73 if match:
74 (start, end) = match.span()
75 self.match_position = end + 1 if end == start else end
76 self.matched_lineno = self.lineno
77 cp = mp - 1
78 if cp >= 0 and cp < self.textlength:
79 cp = self.text[: cp + 1].rfind("\n")
80 self.matched_charpos = mp - cp
81 self.lineno += self.text[mp : self.match_position].count("\n")
82 return match
84 def parse_until_text(self, watch_nesting, *text):
85 startpos = self.match_position
86 text_re = r"|".join(text)
87 brace_level = 0
88 paren_level = 0
89 bracket_level = 0
90 while True:
91 match = self.match(r"#.*\n")
92 if match:
93 continue
94 match = self.match(
95 r"(\"\"\"|\'\'\'|\"|\')[^\\]*?(\\.[^\\]*?)*\1", re.S
96 )
97 if match:
98 continue
99 match = self.match(r"(%s)" % text_re)
100 if match and not (
101 watch_nesting
102 and (brace_level > 0 or paren_level > 0 or bracket_level > 0)
103 ):
104 return (
105 self.text[
106 startpos : self.match_position - len(match.group(1))
107 ],
108 match.group(1),
109 )
110 elif not match:
111 match = self.match(r"(.*?)(?=\"|\'|#|%s)" % text_re, re.S)
112 if match:
113 brace_level += match.group(1).count("{")
114 brace_level -= match.group(1).count("}")
115 paren_level += match.group(1).count("(")
116 paren_level -= match.group(1).count(")")
117 bracket_level += match.group(1).count("[")
118 bracket_level -= match.group(1).count("]")
119 continue
120 raise exceptions.SyntaxException(
121 "Expected: %s" % ",".join(text), **self.exception_kwargs
122 )
124 def append_node(self, nodecls, *args, **kwargs):
125 kwargs.setdefault("source", self.text)
126 kwargs.setdefault("lineno", self.matched_lineno)
127 kwargs.setdefault("pos", self.matched_charpos)
128 kwargs["filename"] = self.filename
129 node = nodecls(*args, **kwargs)
130 if len(self.tag):
131 self.tag[-1].nodes.append(node)
132 else:
133 self.template.nodes.append(node)
134 # build a set of child nodes for the control line
135 # (used for loop variable detection)
136 # also build a set of child nodes on ternary control lines
137 # (used for determining if a pass needs to be auto-inserted
138 if self.control_line:
139 control_frame = self.control_line[-1]
140 control_frame.nodes.append(node)
141 if (
142 not (
143 isinstance(node, parsetree.ControlLine)
144 and control_frame.is_ternary(node.keyword)
145 )
146 and self.ternary_stack
147 and self.ternary_stack[-1]
148 ):
149 self.ternary_stack[-1][-1].nodes.append(node)
150 if isinstance(node, parsetree.Tag):
151 if len(self.tag):
152 node.parent = self.tag[-1]
153 self.tag.append(node)
154 elif isinstance(node, parsetree.ControlLine):
155 if node.isend:
156 self.control_line.pop()
157 self.ternary_stack.pop()
158 elif node.is_primary:
159 self.control_line.append(node)
160 self.ternary_stack.append([])
161 elif self.control_line and self.control_line[-1].is_ternary(
162 node.keyword
163 ):
164 self.ternary_stack[-1].append(node)
165 elif self.control_line and not self.control_line[-1].is_ternary(
166 node.keyword
167 ):
168 raise exceptions.SyntaxException(
169 "Keyword '%s' not a legal ternary for keyword '%s'"
170 % (node.keyword, self.control_line[-1].keyword),
171 **self.exception_kwargs,
172 )
174 _coding_re = re.compile(r"#.*coding[:=]\s*([-\w.]+).*\r?\n")
176 def decode_raw_stream(self, text, decode_raw, known_encoding, filename):
177 """given string/unicode or bytes/string, determine encoding
178 from magic encoding comment, return body as unicode
179 or raw if decode_raw=False
181 """
182 if isinstance(text, str):
183 m = self._coding_re.match(text)
184 encoding = m and m.group(1) or known_encoding or "utf-8"
185 return encoding, text
187 if text.startswith(codecs.BOM_UTF8):
188 text = text[len(codecs.BOM_UTF8) :]
189 parsed_encoding = "utf-8"
190 m = self._coding_re.match(text.decode("utf-8", "ignore"))
191 if m is not None and m.group(1) != "utf-8":
192 raise exceptions.CompileException(
193 "Found utf-8 BOM in file, with conflicting "
194 "magic encoding comment of '%s'" % m.group(1),
195 text.decode("utf-8", "ignore"),
196 0,
197 0,
198 filename,
199 )
200 else:
201 m = self._coding_re.match(text.decode("utf-8", "ignore"))
202 parsed_encoding = m.group(1) if m else known_encoding or "utf-8"
203 if decode_raw:
204 try:
205 text = text.decode(parsed_encoding)
206 except UnicodeDecodeError:
207 raise exceptions.CompileException(
208 "Unicode decode operation of encoding '%s' failed"
209 % parsed_encoding,
210 text.decode("utf-8", "ignore"),
211 0,
212 0,
213 filename,
214 )
216 return parsed_encoding, text
218 def parse(self):
219 self.encoding, self.text = self.decode_raw_stream(
220 self.text, True, self.encoding, self.filename
221 )
223 for preproc in self.preprocessor:
224 self.text = preproc(self.text)
226 # push the match marker past the
227 # encoding comment.
228 self.match_reg(self._coding_re)
230 self.textlength = len(self.text)
232 while True:
233 if self.match_position > self.textlength:
234 break
236 if self.match_end():
237 break
238 if self.match_expression():
239 continue
240 if self.match_control_line():
241 continue
242 if self.match_comment():
243 continue
244 if self.match_tag_start():
245 continue
246 if self.match_tag_end():
247 continue
248 if self.match_python_block():
249 continue
250 if self.match_text():
251 continue
253 if self.match_position > self.textlength:
254 break
255 # TODO: no coverage here
256 raise exceptions.MakoException("assertion failed")
258 if len(self.tag):
259 raise exceptions.SyntaxException(
260 "Unclosed tag: <%%%s>" % self.tag[-1].keyword,
261 **self.exception_kwargs,
262 )
263 if len(self.control_line):
264 raise exceptions.SyntaxException(
265 "Unterminated control keyword: '%s'"
266 % self.control_line[-1].keyword,
267 self.text,
268 self.control_line[-1].lineno,
269 self.control_line[-1].pos,
270 self.filename,
271 )
272 return self.template
274 def match_tag_start(self):
275 reg = r"""
276 \<% # opening tag
278 ([\w\.\:]+) # keyword
280 ((?:\s+\w+|\s*=\s*|"[^"]*?"|'[^']*?'|\s*,\s*)*) # attrname, = \
281 # sign, string expression
282 # comma is for backwards compat
283 # identified in #366
285 \s* # more whitespace
287 (/)?> # closing
289 """
291 match = self.match(
292 reg,
293 re.I | re.S | re.X,
294 )
296 if not match:
297 return False
299 keyword, attr, isend = match.groups()
300 self.keyword = keyword
301 attributes = {}
302 if attr:
303 for att in re.findall(
304 r"\s*(\w+)\s*=\s*(?:'([^']*)'|\"([^\"]*)\")", attr
305 ):
306 key, val1, val2 = att
307 text = val1 or val2
308 text = text.replace("\r\n", "\n")
309 attributes[key] = text
310 self.append_node(parsetree.Tag, keyword, attributes)
311 if isend:
312 self.tag.pop()
313 elif keyword == "text":
314 match = self.match(r"(.*?)(?=\</%text>)", re.S)
315 if not match:
316 raise exceptions.SyntaxException(
317 "Unclosed tag: <%%%s>" % self.tag[-1].keyword,
318 **self.exception_kwargs,
319 )
320 self.append_node(parsetree.Text, match.group(1))
321 return self.match_tag_end()
322 return True
324 def match_tag_end(self):
325 match = self.match(r"\</%[\t ]*([^\t ]+?)[\t ]*>")
326 if match:
327 if not len(self.tag):
328 raise exceptions.SyntaxException(
329 "Closing tag without opening tag: </%%%s>"
330 % match.group(1),
331 **self.exception_kwargs,
332 )
333 elif self.tag[-1].keyword != match.group(1):
334 raise exceptions.SyntaxException(
335 "Closing tag </%%%s> does not match tag: <%%%s>"
336 % (match.group(1), self.tag[-1].keyword),
337 **self.exception_kwargs,
338 )
339 self.tag.pop()
340 return True
341 else:
342 return False
344 def match_end(self):
345 match = self.match(r"\Z", re.S)
346 if not match:
347 return False
349 string = match.group()
350 if string:
351 return string
352 else:
353 return True
355 def match_text(self):
356 match = self.match(
357 r"""
358 (.*?) # anything, followed by:
359 (
360 (?<=\n)(?=[ \t]*(?=%|\#\#)) # an eval or line-based
361 # comment preceded by a
362 # consumed newline and whitespace
363 |
364 (?=\${) # an expression
365 |
366 (?=</?[%&]) # a substitution or block or call start or end
367 # - don't consume
368 |
369 (\\\r?\n) # an escaped newline - throw away
370 |
371 \Z # end of string
372 )""",
373 re.X | re.S,
374 )
376 if match:
377 text = match.group(1)
378 if text:
379 self.append_node(parsetree.Text, text)
380 return True
381 else:
382 return False
384 def match_python_block(self):
385 match = self.match(r"<%(!)?")
386 if match:
387 line, pos = self.matched_lineno, self.matched_charpos
388 text, end = self.parse_until_text(False, r"%>")
389 # the trailing newline helps
390 # compiler.parse() not complain about indentation
391 text = adjust_whitespace(text) + "\n"
392 self.append_node(
393 parsetree.Code,
394 text,
395 match.group(1) == "!",
396 lineno=line,
397 pos=pos,
398 )
399 return True
400 else:
401 return False
403 def match_expression(self):
404 match = self.match(r"\${")
405 if not match:
406 return False
408 line, pos = self.matched_lineno, self.matched_charpos
409 text, end = self.parse_until_text(True, r"\|", r"}")
410 if end == "|":
411 escapes, end = self.parse_until_text(True, r"}")
412 else:
413 escapes = ""
414 text = text.replace("\r\n", "\n")
415 self.append_node(
416 parsetree.Expression,
417 text,
418 escapes.strip(),
419 lineno=line,
420 pos=pos,
421 )
422 return True
424 def match_control_line(self):
425 match = self.match(
426 r"(?<=^)[\t ]*(%(?!%)|##)[\t ]*((?:(?:\\\r?\n)|[^\r\n])*)"
427 r"(?:\r?\n|\Z)",
428 re.M,
429 )
430 if not match:
431 return False
433 operator = match.group(1)
434 text = match.group(2)
435 if operator == "%":
436 m2 = re.match(r"(end)?(\w+)\s*(.*)", text)
437 if not m2:
438 raise exceptions.SyntaxException(
439 "Invalid control line: '%s'" % text,
440 **self.exception_kwargs,
441 )
442 isend, keyword = m2.group(1, 2)
443 isend = isend is not None
445 if isend:
446 if not len(self.control_line):
447 raise exceptions.SyntaxException(
448 "No starting keyword '%s' for '%s'" % (keyword, text),
449 **self.exception_kwargs,
450 )
451 elif self.control_line[-1].keyword != keyword:
452 raise exceptions.SyntaxException(
453 "Keyword '%s' doesn't match keyword '%s'"
454 % (text, self.control_line[-1].keyword),
455 **self.exception_kwargs,
456 )
457 self.append_node(parsetree.ControlLine, keyword, isend, text)
458 else:
459 self.append_node(parsetree.Comment, text)
460 return True
462 def match_comment(self):
463 """matches the multiline version of a comment"""
464 match = self.match(r"<%doc>(.*?)</%doc>", re.S)
465 if match:
466 self.append_node(parsetree.Comment, match.group(1))
467 return True
468 else:
469 return False