Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/jinja2/lexer.py: 34%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""Implements a Jinja / Python combination lexer. The ``Lexer`` class
2is used to do some preprocessing. It filters out invalid operators like
3the bitshift operators we don't allow in templates. It separates
4template code and python code in expressions.
5"""
7import re
8import typing as t
9from ast import literal_eval
10from collections import deque
11from sys import intern
13from ._identifier import pattern as name_re
14from .exceptions import TemplateSyntaxError
15from .utils import LRUCache
17if t.TYPE_CHECKING:
18 import typing_extensions as te
20 from .environment import Environment
22# cache for the lexers. Exists in order to be able to have multiple
23# environments with the same lexer
24_lexer_cache: t.MutableMapping[t.Tuple, "Lexer"] = LRUCache(50) # type: ignore
26# static regular expressions
27whitespace_re = re.compile(r"\s+")
28newline_re = re.compile(r"(\r\n|\r|\n)")
29string_re = re.compile(
30 r"('([^'\\]*(?:\\.[^'\\]*)*)'" r'|"([^"\\]*(?:\\.[^"\\]*)*)")', re.S
31)
32integer_re = re.compile(
33 r"""
34 (
35 0b(_?[0-1])+ # binary
36 |
37 0o(_?[0-7])+ # octal
38 |
39 0x(_?[\da-f])+ # hex
40 |
41 [1-9](_?\d)* # decimal
42 |
43 0(_?0)* # decimal zero
44 )
45 """,
46 re.IGNORECASE | re.VERBOSE,
47)
48float_re = re.compile(
49 r"""
50 (?<!\.) # doesn't start with a .
51 (\d+_)*\d+ # digits, possibly _ separated
52 (
53 (\.(\d+_)*\d+)? # optional fractional part
54 e[+\-]?(\d+_)*\d+ # exponent part
55 |
56 \.(\d+_)*\d+ # required fractional part
57 )
58 """,
59 re.IGNORECASE | re.VERBOSE,
60)
62# internal the tokens and keep references to them
63TOKEN_ADD = intern("add")
64TOKEN_ASSIGN = intern("assign")
65TOKEN_COLON = intern("colon")
66TOKEN_COMMA = intern("comma")
67TOKEN_DIV = intern("div")
68TOKEN_DOT = intern("dot")
69TOKEN_EQ = intern("eq")
70TOKEN_FLOORDIV = intern("floordiv")
71TOKEN_GT = intern("gt")
72TOKEN_GTEQ = intern("gteq")
73TOKEN_LBRACE = intern("lbrace")
74TOKEN_LBRACKET = intern("lbracket")
75TOKEN_LPAREN = intern("lparen")
76TOKEN_LT = intern("lt")
77TOKEN_LTEQ = intern("lteq")
78TOKEN_MOD = intern("mod")
79TOKEN_MUL = intern("mul")
80TOKEN_NE = intern("ne")
81TOKEN_PIPE = intern("pipe")
82TOKEN_POW = intern("pow")
83TOKEN_RBRACE = intern("rbrace")
84TOKEN_RBRACKET = intern("rbracket")
85TOKEN_RPAREN = intern("rparen")
86TOKEN_SEMICOLON = intern("semicolon")
87TOKEN_SUB = intern("sub")
88TOKEN_TILDE = intern("tilde")
89TOKEN_WHITESPACE = intern("whitespace")
90TOKEN_FLOAT = intern("float")
91TOKEN_INTEGER = intern("integer")
92TOKEN_NAME = intern("name")
93TOKEN_STRING = intern("string")
94TOKEN_OPERATOR = intern("operator")
95TOKEN_BLOCK_BEGIN = intern("block_begin")
96TOKEN_BLOCK_END = intern("block_end")
97TOKEN_VARIABLE_BEGIN = intern("variable_begin")
98TOKEN_VARIABLE_END = intern("variable_end")
99TOKEN_RAW_BEGIN = intern("raw_begin")
100TOKEN_RAW_END = intern("raw_end")
101TOKEN_COMMENT_BEGIN = intern("comment_begin")
102TOKEN_COMMENT_END = intern("comment_end")
103TOKEN_COMMENT = intern("comment")
104TOKEN_LINESTATEMENT_BEGIN = intern("linestatement_begin")
105TOKEN_LINESTATEMENT_END = intern("linestatement_end")
106TOKEN_LINECOMMENT_BEGIN = intern("linecomment_begin")
107TOKEN_LINECOMMENT_END = intern("linecomment_end")
108TOKEN_LINECOMMENT = intern("linecomment")
109TOKEN_DATA = intern("data")
110TOKEN_INITIAL = intern("initial")
111TOKEN_EOF = intern("eof")
113# bind operators to token types
114operators = {
115 "+": TOKEN_ADD,
116 "-": TOKEN_SUB,
117 "/": TOKEN_DIV,
118 "//": TOKEN_FLOORDIV,
119 "*": TOKEN_MUL,
120 "%": TOKEN_MOD,
121 "**": TOKEN_POW,
122 "~": TOKEN_TILDE,
123 "[": TOKEN_LBRACKET,
124 "]": TOKEN_RBRACKET,
125 "(": TOKEN_LPAREN,
126 ")": TOKEN_RPAREN,
127 "{": TOKEN_LBRACE,
128 "}": TOKEN_RBRACE,
129 "==": TOKEN_EQ,
130 "!=": TOKEN_NE,
131 ">": TOKEN_GT,
132 ">=": TOKEN_GTEQ,
133 "<": TOKEN_LT,
134 "<=": TOKEN_LTEQ,
135 "=": TOKEN_ASSIGN,
136 ".": TOKEN_DOT,
137 ":": TOKEN_COLON,
138 "|": TOKEN_PIPE,
139 ",": TOKEN_COMMA,
140 ";": TOKEN_SEMICOLON,
141}
143reverse_operators = {v: k for k, v in operators.items()}
144assert len(operators) == len(reverse_operators), "operators dropped"
145operator_re = re.compile(
146 f"({'|'.join(re.escape(x) for x in sorted(operators, key=lambda x: -len(x)))})"
147)
149ignored_tokens = frozenset(
150 [
151 TOKEN_COMMENT_BEGIN,
152 TOKEN_COMMENT,
153 TOKEN_COMMENT_END,
154 TOKEN_WHITESPACE,
155 TOKEN_LINECOMMENT_BEGIN,
156 TOKEN_LINECOMMENT_END,
157 TOKEN_LINECOMMENT,
158 ]
159)
160ignore_if_empty = frozenset(
161 [TOKEN_WHITESPACE, TOKEN_DATA, TOKEN_COMMENT, TOKEN_LINECOMMENT]
162)
165def _describe_token_type(token_type: str) -> str:
166 if token_type in reverse_operators:
167 return reverse_operators[token_type]
169 return {
170 TOKEN_COMMENT_BEGIN: "begin of comment",
171 TOKEN_COMMENT_END: "end of comment",
172 TOKEN_COMMENT: "comment",
173 TOKEN_LINECOMMENT: "comment",
174 TOKEN_BLOCK_BEGIN: "begin of statement block",
175 TOKEN_BLOCK_END: "end of statement block",
176 TOKEN_VARIABLE_BEGIN: "begin of print statement",
177 TOKEN_VARIABLE_END: "end of print statement",
178 TOKEN_LINESTATEMENT_BEGIN: "begin of line statement",
179 TOKEN_LINESTATEMENT_END: "end of line statement",
180 TOKEN_DATA: "template data / text",
181 TOKEN_EOF: "end of template",
182 }.get(token_type, token_type)
185def describe_token(token: "Token") -> str:
186 """Returns a description of the token."""
187 if token.type == TOKEN_NAME:
188 return token.value
190 return _describe_token_type(token.type)
193def describe_token_expr(expr: str) -> str:
194 """Like `describe_token` but for token expressions."""
195 if ":" in expr:
196 type, value = expr.split(":", 1)
198 if type == TOKEN_NAME:
199 return value
200 else:
201 type = expr
203 return _describe_token_type(type)
206def count_newlines(value: str) -> int:
207 """Count the number of newline characters in the string. This is
208 useful for extensions that filter a stream.
209 """
210 return len(newline_re.findall(value))
213def compile_rules(environment: "Environment") -> t.List[t.Tuple[str, str]]:
214 """Compiles all the rules from the environment into a list of rules."""
215 e = re.escape
216 rules = [
217 (
218 len(environment.comment_start_string),
219 TOKEN_COMMENT_BEGIN,
220 e(environment.comment_start_string),
221 ),
222 (
223 len(environment.block_start_string),
224 TOKEN_BLOCK_BEGIN,
225 e(environment.block_start_string),
226 ),
227 (
228 len(environment.variable_start_string),
229 TOKEN_VARIABLE_BEGIN,
230 e(environment.variable_start_string),
231 ),
232 ]
234 if environment.line_statement_prefix is not None:
235 rules.append(
236 (
237 len(environment.line_statement_prefix),
238 TOKEN_LINESTATEMENT_BEGIN,
239 r"^[ \t\v]*" + e(environment.line_statement_prefix),
240 )
241 )
242 if environment.line_comment_prefix is not None:
243 rules.append(
244 (
245 len(environment.line_comment_prefix),
246 TOKEN_LINECOMMENT_BEGIN,
247 r"(?:^|(?<=\S))[^\S\r\n]*" + e(environment.line_comment_prefix),
248 )
249 )
251 return [x[1:] for x in sorted(rules, reverse=True)]
254class Failure:
255 """Class that raises a `TemplateSyntaxError` if called.
256 Used by the `Lexer` to specify known errors.
257 """
259 def __init__(
260 self, message: str, cls: t.Type[TemplateSyntaxError] = TemplateSyntaxError
261 ) -> None:
262 self.message = message
263 self.error_class = cls
265 def __call__(self, lineno: int, filename: str) -> "te.NoReturn":
266 raise self.error_class(self.message, lineno, filename)
269class Token(t.NamedTuple):
270 lineno: int
271 type: str
272 value: str
274 def __str__(self) -> str:
275 return describe_token(self)
277 def test(self, expr: str) -> bool:
278 """Test a token against a token expression. This can either be a
279 token type or ``'token_type:token_value'``. This can only test
280 against string values and types.
281 """
282 # here we do a regular string equality check as test_any is usually
283 # passed an iterable of not interned strings.
284 if self.type == expr:
285 return True
287 if ":" in expr:
288 return expr.split(":", 1) == [self.type, self.value]
290 return False
292 def test_any(self, *iterable: str) -> bool:
293 """Test against multiple token expressions."""
294 return any(self.test(expr) for expr in iterable)
297class TokenStreamIterator:
298 """The iterator for tokenstreams. Iterate over the stream
299 until the eof token is reached.
300 """
302 def __init__(self, stream: "TokenStream") -> None:
303 self.stream = stream
305 def __iter__(self) -> "TokenStreamIterator":
306 return self
308 def __next__(self) -> Token:
309 token = self.stream.current
311 if token.type is TOKEN_EOF:
312 self.stream.close()
313 raise StopIteration
315 next(self.stream)
316 return token
319class TokenStream:
320 """A token stream is an iterable that yields :class:`Token`\\s. The
321 parser however does not iterate over it but calls :meth:`next` to go
322 one token ahead. The current active token is stored as :attr:`current`.
323 """
325 def __init__(
326 self,
327 generator: t.Iterable[Token],
328 name: t.Optional[str],
329 filename: t.Optional[str],
330 ):
331 self._iter = iter(generator)
332 self._pushed: "te.Deque[Token]" = deque()
333 self.name = name
334 self.filename = filename
335 self.closed = False
336 self.current = Token(1, TOKEN_INITIAL, "")
337 next(self)
339 def __iter__(self) -> TokenStreamIterator:
340 return TokenStreamIterator(self)
342 def __bool__(self) -> bool:
343 return bool(self._pushed) or self.current.type is not TOKEN_EOF
345 @property
346 def eos(self) -> bool:
347 """Are we at the end of the stream?"""
348 return not self
350 def push(self, token: Token) -> None:
351 """Push a token back to the stream."""
352 self._pushed.append(token)
354 def look(self) -> Token:
355 """Look at the next token."""
356 old_token = next(self)
357 result = self.current
358 self.push(result)
359 self.current = old_token
360 return result
362 def skip(self, n: int = 1) -> None:
363 """Got n tokens ahead."""
364 for _ in range(n):
365 next(self)
367 def next_if(self, expr: str) -> t.Optional[Token]:
368 """Perform the token test and return the token if it matched.
369 Otherwise the return value is `None`.
370 """
371 if self.current.test(expr):
372 return next(self)
374 return None
376 def skip_if(self, expr: str) -> bool:
377 """Like :meth:`next_if` but only returns `True` or `False`."""
378 return self.next_if(expr) is not None
380 def __next__(self) -> Token:
381 """Go one token ahead and return the old one.
383 Use the built-in :func:`next` instead of calling this directly.
384 """
385 rv = self.current
387 if self._pushed:
388 self.current = self._pushed.popleft()
389 elif self.current.type is not TOKEN_EOF:
390 try:
391 self.current = next(self._iter)
392 except StopIteration:
393 self.close()
395 return rv
397 def close(self) -> None:
398 """Close the stream."""
399 self.current = Token(self.current.lineno, TOKEN_EOF, "")
400 self._iter = iter(())
401 self.closed = True
403 def expect(self, expr: str) -> Token:
404 """Expect a given token type and return it. This accepts the same
405 argument as :meth:`jinja2.lexer.Token.test`.
406 """
407 if not self.current.test(expr):
408 expr = describe_token_expr(expr)
410 if self.current.type is TOKEN_EOF:
411 raise TemplateSyntaxError(
412 f"unexpected end of template, expected {expr!r}.",
413 self.current.lineno,
414 self.name,
415 self.filename,
416 )
418 raise TemplateSyntaxError(
419 f"expected token {expr!r}, got {describe_token(self.current)!r}",
420 self.current.lineno,
421 self.name,
422 self.filename,
423 )
425 return next(self)
428def get_lexer(environment: "Environment") -> "Lexer":
429 """Return a lexer which is probably cached."""
430 key = (
431 environment.block_start_string,
432 environment.block_end_string,
433 environment.variable_start_string,
434 environment.variable_end_string,
435 environment.comment_start_string,
436 environment.comment_end_string,
437 environment.line_statement_prefix,
438 environment.line_comment_prefix,
439 environment.trim_blocks,
440 environment.lstrip_blocks,
441 environment.newline_sequence,
442 environment.keep_trailing_newline,
443 )
444 lexer = _lexer_cache.get(key)
446 if lexer is None:
447 _lexer_cache[key] = lexer = Lexer(environment)
449 return lexer
452class OptionalLStrip(tuple): # type: ignore[type-arg]
453 """A special tuple for marking a point in the state that can have
454 lstrip applied.
455 """
457 __slots__ = ()
459 # Even though it looks like a no-op, creating instances fails
460 # without this.
461 def __new__(cls, *members, **kwargs): # type: ignore
462 return super().__new__(cls, members)
465class _Rule(t.NamedTuple):
466 pattern: t.Pattern[str]
467 tokens: t.Union[str, t.Tuple[str, ...], t.Tuple[Failure]]
468 command: t.Optional[str]
471class Lexer:
472 """Class that implements a lexer for a given environment. Automatically
473 created by the environment class, usually you don't have to do that.
475 Note that the lexer is not automatically bound to an environment.
476 Multiple environments can share the same lexer.
477 """
479 def __init__(self, environment: "Environment") -> None:
480 # shortcuts
481 e = re.escape
483 def c(x: str) -> t.Pattern[str]:
484 return re.compile(x, re.M | re.S)
486 # lexing rules for tags
487 tag_rules: t.List[_Rule] = [
488 _Rule(whitespace_re, TOKEN_WHITESPACE, None),
489 _Rule(float_re, TOKEN_FLOAT, None),
490 _Rule(integer_re, TOKEN_INTEGER, None),
491 _Rule(name_re, TOKEN_NAME, None),
492 _Rule(string_re, TOKEN_STRING, None),
493 _Rule(operator_re, TOKEN_OPERATOR, None),
494 ]
496 # assemble the root lexing rule. because "|" is ungreedy
497 # we have to sort by length so that the lexer continues working
498 # as expected when we have parsing rules like <% for block and
499 # <%= for variables. (if someone wants asp like syntax)
500 # variables are just part of the rules if variable processing
501 # is required.
502 root_tag_rules = compile_rules(environment)
504 block_start_re = e(environment.block_start_string)
505 block_end_re = e(environment.block_end_string)
506 comment_end_re = e(environment.comment_end_string)
507 variable_end_re = e(environment.variable_end_string)
509 # block suffix if trimming is enabled
510 block_suffix_re = "\\n?" if environment.trim_blocks else ""
512 self.lstrip_blocks = environment.lstrip_blocks
514 self.newline_sequence = environment.newline_sequence
515 self.keep_trailing_newline = environment.keep_trailing_newline
517 root_raw_re = (
518 rf"(?P<raw_begin>{block_start_re}(\-|\+|)\s*raw\s*"
519 rf"(?:\-{block_end_re}\s*|{block_end_re}))"
520 )
521 root_parts_re = "|".join(
522 [root_raw_re] + [rf"(?P<{n}>{r}(\-|\+|))" for n, r in root_tag_rules]
523 )
525 # global lexing rules
526 self.rules: t.Dict[str, t.List[_Rule]] = {
527 "root": [
528 # directives
529 _Rule(
530 c(rf"(.*?)(?:{root_parts_re})"),
531 OptionalLStrip(TOKEN_DATA, "#bygroup"), # type: ignore
532 "#bygroup",
533 ),
534 # data
535 _Rule(c(".+"), TOKEN_DATA, None),
536 ],
537 # comments
538 TOKEN_COMMENT_BEGIN: [
539 _Rule(
540 c(
541 rf"(.*?)((?:\+{comment_end_re}|\-{comment_end_re}\s*"
542 rf"|{comment_end_re}{block_suffix_re}))"
543 ),
544 (TOKEN_COMMENT, TOKEN_COMMENT_END),
545 "#pop",
546 ),
547 _Rule(c(r"(.)"), (Failure("Missing end of comment tag"),), None),
548 ],
549 # blocks
550 TOKEN_BLOCK_BEGIN: [
551 _Rule(
552 c(
553 rf"(?:\+{block_end_re}|\-{block_end_re}\s*"
554 rf"|{block_end_re}{block_suffix_re})"
555 ),
556 TOKEN_BLOCK_END,
557 "#pop",
558 ),
559 ]
560 + tag_rules,
561 # variables
562 TOKEN_VARIABLE_BEGIN: [
563 _Rule(
564 c(rf"\-{variable_end_re}\s*|{variable_end_re}"),
565 TOKEN_VARIABLE_END,
566 "#pop",
567 )
568 ]
569 + tag_rules,
570 # raw block
571 TOKEN_RAW_BEGIN: [
572 _Rule(
573 c(
574 rf"(.*?)((?:{block_start_re}(\-|\+|))\s*endraw\s*"
575 rf"(?:\+{block_end_re}|\-{block_end_re}\s*"
576 rf"|{block_end_re}{block_suffix_re}))"
577 ),
578 OptionalLStrip(TOKEN_DATA, TOKEN_RAW_END), # type: ignore
579 "#pop",
580 ),
581 _Rule(c(r"(.)"), (Failure("Missing end of raw directive"),), None),
582 ],
583 # line statements
584 TOKEN_LINESTATEMENT_BEGIN: [
585 _Rule(c(r"\s*(\n|$)"), TOKEN_LINESTATEMENT_END, "#pop")
586 ]
587 + tag_rules,
588 # line comments
589 TOKEN_LINECOMMENT_BEGIN: [
590 _Rule(
591 c(r"(.*?)()(?=\n|$)"),
592 (TOKEN_LINECOMMENT, TOKEN_LINECOMMENT_END),
593 "#pop",
594 )
595 ],
596 }
598 def _normalize_newlines(self, value: str) -> str:
599 """Replace all newlines with the configured sequence in strings
600 and template data.
601 """
602 return newline_re.sub(self.newline_sequence, value)
604 def tokenize(
605 self,
606 source: str,
607 name: t.Optional[str] = None,
608 filename: t.Optional[str] = None,
609 state: t.Optional[str] = None,
610 ) -> TokenStream:
611 """Calls tokeniter + tokenize and wraps it in a token stream."""
612 stream = self.tokeniter(source, name, filename, state)
613 return TokenStream(self.wrap(stream, name, filename), name, filename)
615 def wrap(
616 self,
617 stream: t.Iterable[t.Tuple[int, str, str]],
618 name: t.Optional[str] = None,
619 filename: t.Optional[str] = None,
620 ) -> t.Iterator[Token]:
621 """This is called with the stream as returned by `tokenize` and wraps
622 every token in a :class:`Token` and converts the value.
623 """
624 for lineno, token, value_str in stream:
625 if token in ignored_tokens:
626 continue
628 value: t.Any = value_str
630 if token == TOKEN_LINESTATEMENT_BEGIN:
631 token = TOKEN_BLOCK_BEGIN
632 elif token == TOKEN_LINESTATEMENT_END:
633 token = TOKEN_BLOCK_END
634 # we are not interested in those tokens in the parser
635 elif token in (TOKEN_RAW_BEGIN, TOKEN_RAW_END):
636 continue
637 elif token == TOKEN_DATA:
638 value = self._normalize_newlines(value_str)
639 elif token == "keyword":
640 token = value_str
641 elif token == TOKEN_NAME:
642 value = value_str
644 if not value.isidentifier():
645 raise TemplateSyntaxError(
646 "Invalid character in identifier", lineno, name, filename
647 )
648 elif token == TOKEN_STRING:
649 # try to unescape string
650 try:
651 value = (
652 self._normalize_newlines(value_str[1:-1])
653 .encode("ascii", "backslashreplace")
654 .decode("unicode-escape")
655 )
656 except Exception as e:
657 msg = str(e).split(":")[-1].strip()
658 raise TemplateSyntaxError(msg, lineno, name, filename) from e
659 elif token == TOKEN_INTEGER:
660 value = int(value_str.replace("_", ""), 0)
661 elif token == TOKEN_FLOAT:
662 # remove all "_" first to support more Python versions
663 value = literal_eval(value_str.replace("_", ""))
664 elif token == TOKEN_OPERATOR:
665 token = operators[value_str]
667 yield Token(lineno, token, value)
669 def tokeniter(
670 self,
671 source: str,
672 name: t.Optional[str],
673 filename: t.Optional[str] = None,
674 state: t.Optional[str] = None,
675 ) -> t.Iterator[t.Tuple[int, str, str]]:
676 """This method tokenizes the text and returns the tokens in a
677 generator. Use this method if you just want to tokenize a template.
679 .. versionchanged:: 3.0
680 Only ``\\n``, ``\\r\\n`` and ``\\r`` are treated as line
681 breaks.
682 """
683 lines = newline_re.split(source)[::2]
685 if not self.keep_trailing_newline and lines[-1] == "":
686 del lines[-1]
688 source = "\n".join(lines)
689 pos = 0
690 lineno = 1
691 stack = ["root"]
693 if state is not None and state != "root":
694 assert state in ("variable", "block"), "invalid state"
695 stack.append(state + "_begin")
697 statetokens = self.rules[stack[-1]]
698 source_length = len(source)
699 balancing_stack: t.List[str] = []
700 newlines_stripped = 0
701 line_starting = True
703 while True:
704 # tokenizer loop
705 for regex, tokens, new_state in statetokens:
706 m = regex.match(source, pos)
708 # if no match we try again with the next rule
709 if m is None:
710 continue
712 # we only match blocks and variables if braces / parentheses
713 # are balanced. continue parsing with the lower rule which
714 # is the operator rule. do this only if the end tags look
715 # like operators
716 if balancing_stack and tokens in (
717 TOKEN_VARIABLE_END,
718 TOKEN_BLOCK_END,
719 TOKEN_LINESTATEMENT_END,
720 ):
721 continue
723 # tuples support more options
724 if isinstance(tokens, tuple):
725 groups: t.Sequence[str] = m.groups()
727 if isinstance(tokens, OptionalLStrip):
728 # Rule supports lstrip. Match will look like
729 # text, block type, whitespace control, type, control, ...
730 text = groups[0]
731 # Skipping the text and first type, every other group is the
732 # whitespace control for each type. One of the groups will be
733 # -, +, or empty string instead of None.
734 strip_sign = next(g for g in groups[2::2] if g is not None)
736 if strip_sign == "-":
737 # Strip all whitespace between the text and the tag.
738 stripped = text.rstrip()
739 newlines_stripped = text[len(stripped) :].count("\n")
740 groups = [stripped, *groups[1:]]
741 elif (
742 # Not marked for preserving whitespace.
743 strip_sign != "+"
744 # lstrip is enabled.
745 and self.lstrip_blocks
746 # Not a variable expression.
747 and not m.groupdict().get(TOKEN_VARIABLE_BEGIN)
748 ):
749 # The start of text between the last newline and the tag.
750 l_pos = text.rfind("\n") + 1
752 if l_pos > 0 or line_starting:
753 # If there's only whitespace between the newline and the
754 # tag, strip it.
755 if whitespace_re.fullmatch(text, l_pos):
756 groups = [text[:l_pos], *groups[1:]]
758 for idx, token in enumerate(tokens):
759 # failure group
760 if token.__class__ is Failure:
761 raise token(lineno, filename)
762 # bygroup is a bit more complex, in that case we
763 # yield for the current token the first named
764 # group that matched
765 elif token == "#bygroup":
766 for key, value in m.groupdict().items():
767 if value is not None:
768 yield lineno, key, value
769 lineno += value.count("\n")
770 break
771 else:
772 raise RuntimeError(
773 f"{regex!r} wanted to resolve the token dynamically"
774 " but no group matched"
775 )
776 # normal group
777 else:
778 data = groups[idx]
780 if data or token not in ignore_if_empty:
781 yield lineno, token, data
783 lineno += data.count("\n") + newlines_stripped
784 newlines_stripped = 0
786 # strings as token just are yielded as it.
787 else:
788 data = m.group()
790 # update brace/parentheses balance
791 if tokens == TOKEN_OPERATOR:
792 if data == "{":
793 balancing_stack.append("}")
794 elif data == "(":
795 balancing_stack.append(")")
796 elif data == "[":
797 balancing_stack.append("]")
798 elif data in ("}", ")", "]"):
799 if not balancing_stack:
800 raise TemplateSyntaxError(
801 f"unexpected '{data}'", lineno, name, filename
802 )
804 expected_op = balancing_stack.pop()
806 if expected_op != data:
807 raise TemplateSyntaxError(
808 f"unexpected '{data}', expected '{expected_op}'",
809 lineno,
810 name,
811 filename,
812 )
814 # yield items
815 if data or tokens not in ignore_if_empty:
816 yield lineno, tokens, data
818 lineno += data.count("\n")
820 line_starting = m.group()[-1:] == "\n"
821 # fetch new position into new variable so that we can check
822 # if there is a internal parsing error which would result
823 # in an infinite loop
824 pos2 = m.end()
826 # handle state changes
827 if new_state is not None:
828 # remove the uppermost state
829 if new_state == "#pop":
830 stack.pop()
831 # resolve the new state by group checking
832 elif new_state == "#bygroup":
833 for key, value in m.groupdict().items():
834 if value is not None:
835 stack.append(key)
836 break
837 else:
838 raise RuntimeError(
839 f"{regex!r} wanted to resolve the new state dynamically"
840 f" but no group matched"
841 )
842 # direct state name given
843 else:
844 stack.append(new_state)
846 statetokens = self.rules[stack[-1]]
847 # we are still at the same position and no stack change.
848 # this means a loop without break condition, avoid that and
849 # raise error
850 elif pos2 == pos:
851 raise RuntimeError(
852 f"{regex!r} yielded empty string without stack change"
853 )
855 # publish new function and start again
856 pos = pos2
857 break
858 # if loop terminated without break we haven't found a single match
859 # either we are at the end of the file or we have a problem
860 else:
861 # end of text
862 if pos >= source_length:
863 return
865 # something went wrong
866 raise TemplateSyntaxError(
867 f"unexpected char {source[pos]!r} at {pos}", lineno, name, filename
868 )