Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/jinja2/lexer.py: 34%

1"""Implements a Jinja / Python combination lexer. The ``Lexer`` class

2is used to do some preprocessing. It filters out invalid operators like

3the bitshift operators we don't allow in templates. It separates

4template code and python code in expressions.

5"""

7import re

8import typing as t

9from ast import literal_eval

10from collections import deque

11from sys import intern

13from ._identifier import pattern as name_re

14from .exceptions import TemplateSyntaxError

15from .utils import LRUCache

17if t.TYPE_CHECKING:

18 import typing_extensions as te

20 from .environment import Environment

22# cache for the lexers. Exists in order to be able to have multiple

23# environments with the same lexer

24_lexer_cache: t.MutableMapping[t.Tuple, "Lexer"] = LRUCache(50) # type: ignore

26# static regular expressions

27whitespace_re = re.compile(r"\s+")

28newline_re = re.compile(r"(\r\n|\r|\n)")

29string_re = re.compile(

30 r"('([^'\\]*(?:\\.[^'\\]*)*)'" r'|"([^"\\]*(?:\\.[^"\\]*)*)")', re.S

31)

32integer_re = re.compile(

33 r"""

34 (

35 0b(_?[0-1])+ # binary

36 |

37 0o(_?[0-7])+ # octal

38 |

39 0x(_?[\da-f])+ # hex

40 |

41 [1-9](_?\d)* # decimal

42 |

43 0(_?0)* # decimal zero

44 )

45 """,

46 re.IGNORECASE | re.VERBOSE,

47)

48float_re = re.compile(

49 r"""

50 (?<!\.) # doesn't start with a .

51 (\d+_)*\d+ # digits, possibly _ separated

52 (

53 (\.(\d+_)*\d+)? # optional fractional part

54 e[+\-]?(\d+_)*\d+ # exponent part

55 |

56 \.(\d+_)*\d+ # required fractional part

57 )

58 """,

59 re.IGNORECASE | re.VERBOSE,

60)

62# internal the tokens and keep references to them

63TOKEN_ADD = intern("add")

64TOKEN_ASSIGN = intern("assign")

65TOKEN_COLON = intern("colon")

66TOKEN_COMMA = intern("comma")

67TOKEN_DIV = intern("div")

68TOKEN_DOT = intern("dot")

69TOKEN_EQ = intern("eq")

70TOKEN_FLOORDIV = intern("floordiv")

71TOKEN_GT = intern("gt")

72TOKEN_GTEQ = intern("gteq")

73TOKEN_LBRACE = intern("lbrace")

74TOKEN_LBRACKET = intern("lbracket")

75TOKEN_LPAREN = intern("lparen")

76TOKEN_LT = intern("lt")

77TOKEN_LTEQ = intern("lteq")

78TOKEN_MOD = intern("mod")

79TOKEN_MUL = intern("mul")

80TOKEN_NE = intern("ne")

81TOKEN_PIPE = intern("pipe")

82TOKEN_POW = intern("pow")

83TOKEN_RBRACE = intern("rbrace")

84TOKEN_RBRACKET = intern("rbracket")

85TOKEN_RPAREN = intern("rparen")

86TOKEN_SEMICOLON = intern("semicolon")

87TOKEN_SUB = intern("sub")

88TOKEN_TILDE = intern("tilde")

89TOKEN_WHITESPACE = intern("whitespace")

90TOKEN_FLOAT = intern("float")

91TOKEN_INTEGER = intern("integer")

92TOKEN_NAME = intern("name")

93TOKEN_STRING = intern("string")

94TOKEN_OPERATOR = intern("operator")

95TOKEN_BLOCK_BEGIN = intern("block_begin")

96TOKEN_BLOCK_END = intern("block_end")

97TOKEN_VARIABLE_BEGIN = intern("variable_begin")

98TOKEN_VARIABLE_END = intern("variable_end")

99TOKEN_RAW_BEGIN = intern("raw_begin")

100TOKEN_RAW_END = intern("raw_end")

101TOKEN_COMMENT_BEGIN = intern("comment_begin")

102TOKEN_COMMENT_END = intern("comment_end")

103TOKEN_COMMENT = intern("comment")

104TOKEN_LINESTATEMENT_BEGIN = intern("linestatement_begin")

105TOKEN_LINESTATEMENT_END = intern("linestatement_end")

106TOKEN_LINECOMMENT_BEGIN = intern("linecomment_begin")

107TOKEN_LINECOMMENT_END = intern("linecomment_end")

108TOKEN_LINECOMMENT = intern("linecomment")

109TOKEN_DATA = intern("data")

110TOKEN_INITIAL = intern("initial")

111TOKEN_EOF = intern("eof")

112

113# bind operators to token types

114operators = {

115 "+": TOKEN_ADD,

116 "-": TOKEN_SUB,

117 "/": TOKEN_DIV,

118 "//": TOKEN_FLOORDIV,

119 "*": TOKEN_MUL,

120 "%": TOKEN_MOD,

121 "**": TOKEN_POW,

122 "~": TOKEN_TILDE,

123 "[": TOKEN_LBRACKET,

124 "]": TOKEN_RBRACKET,

125 "(": TOKEN_LPAREN,

126 ")": TOKEN_RPAREN,

127 "{": TOKEN_LBRACE,

128 "}": TOKEN_RBRACE,

129 "==": TOKEN_EQ,

130 "!=": TOKEN_NE,

131 ">": TOKEN_GT,

132 ">=": TOKEN_GTEQ,

133 "<": TOKEN_LT,

134 "<=": TOKEN_LTEQ,

135 "=": TOKEN_ASSIGN,

136 ".": TOKEN_DOT,

137 ":": TOKEN_COLON,

138 "|": TOKEN_PIPE,

139 ",": TOKEN_COMMA,

140 ";": TOKEN_SEMICOLON,

141}

142

143reverse_operators = {v: k for k, v in operators.items()}

144assert len(operators) == len(reverse_operators), "operators dropped"

145operator_re = re.compile(

146 f"({'|'.join(re.escape(x) for x in sorted(operators, key=lambda x: -len(x)))})"

147)

148

149ignored_tokens = frozenset(

150 [

151 TOKEN_COMMENT_BEGIN,

152 TOKEN_COMMENT,

153 TOKEN_COMMENT_END,

154 TOKEN_WHITESPACE,

155 TOKEN_LINECOMMENT_BEGIN,

156 TOKEN_LINECOMMENT_END,

157 TOKEN_LINECOMMENT,

158 ]

159)

160ignore_if_empty = frozenset(

161 [TOKEN_WHITESPACE, TOKEN_DATA, TOKEN_COMMENT, TOKEN_LINECOMMENT]

162)

163

164

165def _describe_token_type(token_type: str) -> str:

166 if token_type in reverse_operators:

167 return reverse_operators[token_type]

168

169 return {

170 TOKEN_COMMENT_BEGIN: "begin of comment",

171 TOKEN_COMMENT_END: "end of comment",

172 TOKEN_COMMENT: "comment",

173 TOKEN_LINECOMMENT: "comment",

174 TOKEN_BLOCK_BEGIN: "begin of statement block",

175 TOKEN_BLOCK_END: "end of statement block",

176 TOKEN_VARIABLE_BEGIN: "begin of print statement",

177 TOKEN_VARIABLE_END: "end of print statement",

178 TOKEN_LINESTATEMENT_BEGIN: "begin of line statement",

179 TOKEN_LINESTATEMENT_END: "end of line statement",

180 TOKEN_DATA: "template data / text",

181 TOKEN_EOF: "end of template",

182 }.get(token_type, token_type)

183

184

185def describe_token(token: "Token") -> str:

186 """Returns a description of the token."""

187 if token.type == TOKEN_NAME:

188 return token.value

189

190 return _describe_token_type(token.type)

191

192

193def describe_token_expr(expr: str) -> str:

194 """Like `describe_token` but for token expressions."""

195 if ":" in expr:

196 type, value = expr.split(":", 1)

197

198 if type == TOKEN_NAME:

199 return value

200 else:

201 type = expr

202

203 return _describe_token_type(type)

204

205

206def count_newlines(value: str) -> int:

207 """Count the number of newline characters in the string. This is

208 useful for extensions that filter a stream.

209 """

210 return len(newline_re.findall(value))

211

212

213def compile_rules(environment: "Environment") -> t.List[t.Tuple[str, str]]:

214 """Compiles all the rules from the environment into a list of rules."""

215 e = re.escape

216 rules = [

217 (

218 len(environment.comment_start_string),

219 TOKEN_COMMENT_BEGIN,

220 e(environment.comment_start_string),

221 ),

222 (

223 len(environment.block_start_string),

224 TOKEN_BLOCK_BEGIN,

225 e(environment.block_start_string),

226 ),

227 (

228 len(environment.variable_start_string),

229 TOKEN_VARIABLE_BEGIN,

230 e(environment.variable_start_string),

231 ),

232 ]

233

234 if environment.line_statement_prefix is not None:

235 rules.append(

236 (

237 len(environment.line_statement_prefix),

238 TOKEN_LINESTATEMENT_BEGIN,

239 r"^[ \t\v]*" + e(environment.line_statement_prefix),

240 )

241 )

242 if environment.line_comment_prefix is not None:

243 rules.append(

244 (

245 len(environment.line_comment_prefix),

246 TOKEN_LINECOMMENT_BEGIN,

247 r"(?:^|(?<=\S))[^\S\r\n]*" + e(environment.line_comment_prefix),

248 )

249 )

250

251 return [x[1:] for x in sorted(rules, reverse=True)]

252

253

254class Failure:

255 """Class that raises a `TemplateSyntaxError` if called.

256 Used by the `Lexer` to specify known errors.

257 """

258

259 def __init__(

260 self, message: str, cls: t.Type[TemplateSyntaxError] = TemplateSyntaxError

261 ) -> None:

262 self.message = message

263 self.error_class = cls

264

265 def __call__(self, lineno: int, filename: str) -> "te.NoReturn":

266 raise self.error_class(self.message, lineno, filename)

267

268

269class Token(t.NamedTuple):

270 lineno: int

271 type: str

272 value: str

273

274 def __str__(self) -> str:

275 return describe_token(self)

276

277 def test(self, expr: str) -> bool:

278 """Test a token against a token expression. This can either be a

279 token type or ``'token_type:token_value'``. This can only test

280 against string values and types.

281 """

282 # here we do a regular string equality check as test_any is usually

283 # passed an iterable of not interned strings.

284 if self.type == expr:

285 return True

286

287 if ":" in expr:

288 return expr.split(":", 1) == [self.type, self.value]

289

290 return False

291

292 def test_any(self, *iterable: str) -> bool:

293 """Test against multiple token expressions."""

294 return any(self.test(expr) for expr in iterable)

295

296

297class TokenStreamIterator:

298 """The iterator for tokenstreams. Iterate over the stream

299 until the eof token is reached.

300 """

301

302 def __init__(self, stream: "TokenStream") -> None:

303 self.stream = stream

304

305 def __iter__(self) -> "TokenStreamIterator":

306 return self

307

308 def __next__(self) -> Token:

309 token = self.stream.current

310

311 if token.type is TOKEN_EOF:

312 self.stream.close()

313 raise StopIteration

314

315 next(self.stream)

316 return token

317

318

319class TokenStream:

320 """A token stream is an iterable that yields :class:`Token`\\s. The

321 parser however does not iterate over it but calls :meth:`next` to go

322 one token ahead. The current active token is stored as :attr:`current`.

323 """

324

325 def __init__(

326 self,

327 generator: t.Iterable[Token],

328 name: t.Optional[str],

329 filename: t.Optional[str],

330 ):

331 self._iter = iter(generator)

332 self._pushed: "te.Deque[Token]" = deque()

333 self.name = name

334 self.filename = filename

335 self.closed = False

336 self.current = Token(1, TOKEN_INITIAL, "")

337 next(self)

338

339 def __iter__(self) -> TokenStreamIterator:

340 return TokenStreamIterator(self)

341

342 def __bool__(self) -> bool:

343 return bool(self._pushed) or self.current.type is not TOKEN_EOF

344

345 @property

346 def eos(self) -> bool:

347 """Are we at the end of the stream?"""

348 return not self

349

350 def push(self, token: Token) -> None:

351 """Push a token back to the stream."""

352 self._pushed.append(token)

353

354 def look(self) -> Token:

355 """Look at the next token."""

356 old_token = next(self)

357 result = self.current

358 self.push(result)

359 self.current = old_token

360 return result

361

362 def skip(self, n: int = 1) -> None:

363 """Got n tokens ahead."""

364 for _ in range(n):

365 next(self)

366

367 def next_if(self, expr: str) -> t.Optional[Token]:

368 """Perform the token test and return the token if it matched.

369 Otherwise the return value is `None`.

370 """

371 if self.current.test(expr):

372 return next(self)

373

374 return None

375

376 def skip_if(self, expr: str) -> bool:

377 """Like :meth:`next_if` but only returns `True` or `False`."""

378 return self.next_if(expr) is not None

379

380 def __next__(self) -> Token:

381 """Go one token ahead and return the old one.

382

383 Use the built-in :func:`next` instead of calling this directly.

384 """

385 rv = self.current

386

387 if self._pushed:

388 self.current = self._pushed.popleft()

389 elif self.current.type is not TOKEN_EOF:

390 try:

391 self.current = next(self._iter)

392 except StopIteration:

393 self.close()

394

395 return rv

396

397 def close(self) -> None:

398 """Close the stream."""

399 self.current = Token(self.current.lineno, TOKEN_EOF, "")

400 self._iter = iter(())

401 self.closed = True

402

403 def expect(self, expr: str) -> Token:

404 """Expect a given token type and return it. This accepts the same

405 argument as :meth:`jinja2.lexer.Token.test`.

406 """

407 if not self.current.test(expr):

408 expr = describe_token_expr(expr)

409

410 if self.current.type is TOKEN_EOF:

411 raise TemplateSyntaxError(

412 f"unexpected end of template, expected {expr!r}.",

413 self.current.lineno,

414 self.name,

415 self.filename,

416 )

417

418 raise TemplateSyntaxError(

419 f"expected token {expr!r}, got {describe_token(self.current)!r}",

420 self.current.lineno,

421 self.name,

422 self.filename,

423 )

424

425 return next(self)

426

427

428def get_lexer(environment: "Environment") -> "Lexer":

429 """Return a lexer which is probably cached."""

430 key = (

431 environment.block_start_string,

432 environment.block_end_string,

433 environment.variable_start_string,

434 environment.variable_end_string,

435 environment.comment_start_string,

436 environment.comment_end_string,

437 environment.line_statement_prefix,

438 environment.line_comment_prefix,

439 environment.trim_blocks,

440 environment.lstrip_blocks,

441 environment.newline_sequence,

442 environment.keep_trailing_newline,

443 )

444 lexer = _lexer_cache.get(key)

445

446 if lexer is None:

447 _lexer_cache[key] = lexer = Lexer(environment)

448

449 return lexer

450

451

452class OptionalLStrip(tuple): # type: ignore[type-arg]

453 """A special tuple for marking a point in the state that can have

454 lstrip applied.

455 """

456

457 __slots__ = ()

458

459 # Even though it looks like a no-op, creating instances fails

460 # without this.

461 def __new__(cls, *members, **kwargs): # type: ignore

462 return super().__new__(cls, members)

463

464

465class _Rule(t.NamedTuple):

466 pattern: t.Pattern[str]

467 tokens: t.Union[str, t.Tuple[str, ...], t.Tuple[Failure]]

468 command: t.Optional[str]

469

470

471class Lexer:

472 """Class that implements a lexer for a given environment. Automatically

473 created by the environment class, usually you don't have to do that.

474

475 Note that the lexer is not automatically bound to an environment.

476 Multiple environments can share the same lexer.

477 """

478

479 def __init__(self, environment: "Environment") -> None:

480 # shortcuts

481 e = re.escape

482

483 def c(x: str) -> t.Pattern[str]:

484 return re.compile(x, re.M | re.S)

485

486 # lexing rules for tags

487 tag_rules: t.List[_Rule] = [

488 _Rule(whitespace_re, TOKEN_WHITESPACE, None),

489 _Rule(float_re, TOKEN_FLOAT, None),

490 _Rule(integer_re, TOKEN_INTEGER, None),

491 _Rule(name_re, TOKEN_NAME, None),

492 _Rule(string_re, TOKEN_STRING, None),

493 _Rule(operator_re, TOKEN_OPERATOR, None),

494 ]

495

496 # assemble the root lexing rule. because "|" is ungreedy

497 # we have to sort by length so that the lexer continues working

498 # as expected when we have parsing rules like <% for block and

499 # <%= for variables. (if someone wants asp like syntax)

500 # variables are just part of the rules if variable processing

501 # is required.

502 root_tag_rules = compile_rules(environment)

503

504 block_start_re = e(environment.block_start_string)

505 block_end_re = e(environment.block_end_string)

506 comment_end_re = e(environment.comment_end_string)

507 variable_end_re = e(environment.variable_end_string)

508

509 # block suffix if trimming is enabled

510 block_suffix_re = "\\n?" if environment.trim_blocks else ""

511

512 self.lstrip_blocks = environment.lstrip_blocks

513

514 self.newline_sequence = environment.newline_sequence

515 self.keep_trailing_newline = environment.keep_trailing_newline

516

517 root_raw_re = (

518 rf"(?P<raw_begin>{block_start_re}(\-|\+|)\s*raw\s*"

519 rf"(?:\-{block_end_re}\s*|{block_end_re}))"

520 )

521 root_parts_re = "|".join(

522 [root_raw_re] + [rf"(?P<{n}>{r}(\-|\+|))" for n, r in root_tag_rules]

523 )

524

525 # global lexing rules

526 self.rules: t.Dict[str, t.List[_Rule]] = {

527 "root": [

528 # directives

529 _Rule(

530 c(rf"(.*?)(?:{root_parts_re})"),

531 OptionalLStrip(TOKEN_DATA, "#bygroup"), # type: ignore

532 "#bygroup",

533 ),

534 # data

535 _Rule(c(".+"), TOKEN_DATA, None),

536 ],

537 # comments

538 TOKEN_COMMENT_BEGIN: [

539 _Rule(

540 c(

541 rf"(.*?)((?:\+{comment_end_re}|\-{comment_end_re}\s*"

542 rf"|{comment_end_re}{block_suffix_re}))"

543 ),

544 (TOKEN_COMMENT, TOKEN_COMMENT_END),

545 "#pop",

546 ),

547 _Rule(c(r"(.)"), (Failure("Missing end of comment tag"),), None),

548 ],

549 # blocks

550 TOKEN_BLOCK_BEGIN: [

551 _Rule(

552 c(

553 rf"(?:\+{block_end_re}|\-{block_end_re}\s*"

554 rf"|{block_end_re}{block_suffix_re})"

555 ),

556 TOKEN_BLOCK_END,

557 "#pop",

558 ),

559 ]

560 + tag_rules,

561 # variables

562 TOKEN_VARIABLE_BEGIN: [

563 _Rule(

564 c(rf"\-{variable_end_re}\s*|{variable_end_re}"),

565 TOKEN_VARIABLE_END,

566 "#pop",

567 )

568 ]

569 + tag_rules,

570 # raw block

571 TOKEN_RAW_BEGIN: [

572 _Rule(

573 c(

574 rf"(.*?)((?:{block_start_re}(\-|\+|))\s*endraw\s*"

575 rf"(?:\+{block_end_re}|\-{block_end_re}\s*"

576 rf"|{block_end_re}{block_suffix_re}))"

577 ),

578 OptionalLStrip(TOKEN_DATA, TOKEN_RAW_END), # type: ignore

579 "#pop",

580 ),

581 _Rule(c(r"(.)"), (Failure("Missing end of raw directive"),), None),

582 ],

583 # line statements

584 TOKEN_LINESTATEMENT_BEGIN: [

585 _Rule(c(r"\s*(\n|$)"), TOKEN_LINESTATEMENT_END, "#pop")

586 ]

587 + tag_rules,

588 # line comments

589 TOKEN_LINECOMMENT_BEGIN: [

590 _Rule(

591 c(r"(.*?)()(?=\n|$)"),

592 (TOKEN_LINECOMMENT, TOKEN_LINECOMMENT_END),

593 "#pop",

594 )

595 ],

596 }

597

598 def _normalize_newlines(self, value: str) -> str:

599 """Replace all newlines with the configured sequence in strings

600 and template data.

601 """

602 return newline_re.sub(self.newline_sequence, value)

603

604 def tokenize(

605 self,

606 source: str,

607 name: t.Optional[str] = None,

608 filename: t.Optional[str] = None,

609 state: t.Optional[str] = None,

610 ) -> TokenStream:

611 """Calls tokeniter + tokenize and wraps it in a token stream."""

612 stream = self.tokeniter(source, name, filename, state)

613 return TokenStream(self.wrap(stream, name, filename), name, filename)

614

615 def wrap(

616 self,

617 stream: t.Iterable[t.Tuple[int, str, str]],

618 name: t.Optional[str] = None,

619 filename: t.Optional[str] = None,

620 ) -> t.Iterator[Token]:

621 """This is called with the stream as returned by `tokenize` and wraps

622 every token in a :class:`Token` and converts the value.

623 """

624 for lineno, token, value_str in stream:

625 if token in ignored_tokens:

626 continue

627

628 value: t.Any = value_str

629

630 if token == TOKEN_LINESTATEMENT_BEGIN:

631 token = TOKEN_BLOCK_BEGIN

632 elif token == TOKEN_LINESTATEMENT_END:

633 token = TOKEN_BLOCK_END

634 # we are not interested in those tokens in the parser

635 elif token in (TOKEN_RAW_BEGIN, TOKEN_RAW_END):

636 continue

637 elif token == TOKEN_DATA:

638 value = self._normalize_newlines(value_str)

639 elif token == "keyword":

640 token = value_str

641 elif token == TOKEN_NAME:

642 value = value_str

643

644 if not value.isidentifier():

645 raise TemplateSyntaxError(

646 "Invalid character in identifier", lineno, name, filename

647 )

648 elif token == TOKEN_STRING:

649 # try to unescape string

650 try:

651 value = (

652 self._normalize_newlines(value_str[1:-1])

653 .encode("ascii", "backslashreplace")

654 .decode("unicode-escape")

655 )

656 except Exception as e:

657 msg = str(e).split(":")[-1].strip()

658 raise TemplateSyntaxError(msg, lineno, name, filename) from e

659 elif token == TOKEN_INTEGER:

660 value = int(value_str.replace("_", ""), 0)

661 elif token == TOKEN_FLOAT:

662 # remove all "_" first to support more Python versions

663 value = literal_eval(value_str.replace("_", ""))

664 elif token == TOKEN_OPERATOR:

665 token = operators[value_str]

666

667 yield Token(lineno, token, value)

668

669 def tokeniter(

670 self,

671 source: str,

672 name: t.Optional[str],

673 filename: t.Optional[str] = None,

674 state: t.Optional[str] = None,

675 ) -> t.Iterator[t.Tuple[int, str, str]]:

676 """This method tokenizes the text and returns the tokens in a

677 generator. Use this method if you just want to tokenize a template.

678

679 .. versionchanged:: 3.0

680 Only ``\\n``, ``\\r\\n`` and ``\\r`` are treated as line

681 breaks.

682 """

683 lines = newline_re.split(source)[::2]

684

685 if not self.keep_trailing_newline and lines[-1] == "":

686 del lines[-1]

687

688 source = "\n".join(lines)

689 pos = 0

690 lineno = 1

691 stack = ["root"]

692

693 if state is not None and state != "root":

694 assert state in ("variable", "block"), "invalid state"

695 stack.append(state + "_begin")

696

697 statetokens = self.rules[stack[-1]]

698 source_length = len(source)

699 balancing_stack: t.List[str] = []

700 newlines_stripped = 0

701 line_starting = True

702

703 while True:

704 # tokenizer loop

705 for regex, tokens, new_state in statetokens:

706 m = regex.match(source, pos)

707

708 # if no match we try again with the next rule

709 if m is None:

710 continue

711

712 # we only match blocks and variables if braces / parentheses

713 # are balanced. continue parsing with the lower rule which

714 # is the operator rule. do this only if the end tags look

715 # like operators

716 if balancing_stack and tokens in (

717 TOKEN_VARIABLE_END,

718 TOKEN_BLOCK_END,

719 TOKEN_LINESTATEMENT_END,

720 ):

721 continue

722

723 # tuples support more options

724 if isinstance(tokens, tuple):

725 groups: t.Sequence[str] = m.groups()

726

727 if isinstance(tokens, OptionalLStrip):

728 # Rule supports lstrip. Match will look like

729 # text, block type, whitespace control, type, control, ...

730 text = groups[0]

731 # Skipping the text and first type, every other group is the

732 # whitespace control for each type. One of the groups will be

733 # -, +, or empty string instead of None.

734 strip_sign = next(g for g in groups[2::2] if g is not None)

735

736 if strip_sign == "-":

737 # Strip all whitespace between the text and the tag.

738 stripped = text.rstrip()

739 newlines_stripped = text[len(stripped) :].count("\n")

740 groups = [stripped, *groups[1:]]

741 elif (

742 # Not marked for preserving whitespace.

743 strip_sign != "+"

744 # lstrip is enabled.

745 and self.lstrip_blocks

746 # Not a variable expression.

747 and not m.groupdict().get(TOKEN_VARIABLE_BEGIN)

748 ):

749 # The start of text between the last newline and the tag.

750 l_pos = text.rfind("\n") + 1

751

752 if l_pos > 0 or line_starting:

753 # If there's only whitespace between the newline and the

754 # tag, strip it.

755 if whitespace_re.fullmatch(text, l_pos):

756 groups = [text[:l_pos], *groups[1:]]

757

758 for idx, token in enumerate(tokens):

759 # failure group

760 if token.__class__ is Failure:

761 raise token(lineno, filename)

762 # bygroup is a bit more complex, in that case we

763 # yield for the current token the first named

764 # group that matched

765 elif token == "#bygroup":

766 for key, value in m.groupdict().items():

767 if value is not None:

768 yield lineno, key, value

769 lineno += value.count("\n")

770 break

771 else:

772 raise RuntimeError(

773 f"{regex!r} wanted to resolve the token dynamically"

774 " but no group matched"

775 )

776 # normal group

777 else:

778 data = groups[idx]

779

780 if data or token not in ignore_if_empty:

781 yield lineno, token, data

782

783 lineno += data.count("\n") + newlines_stripped

784 newlines_stripped = 0

785

786 # strings as token just are yielded as it.

787 else:

788 data = m.group()

789

790 # update brace/parentheses balance

791 if tokens == TOKEN_OPERATOR:

792 if data == "{":

793 balancing_stack.append("}")

794 elif data == "(":

795 balancing_stack.append(")")

796 elif data == "[":

797 balancing_stack.append("]")

798 elif data in ("}", ")", "]"):

799 if not balancing_stack:

800 raise TemplateSyntaxError(

801 f"unexpected '{data}'", lineno, name, filename

802 )

803

804 expected_op = balancing_stack.pop()

805

806 if expected_op != data:

807 raise TemplateSyntaxError(

808 f"unexpected '{data}', expected '{expected_op}'",

809 lineno,

810 name,

811 filename,

812 )

813

814 # yield items

815 if data or tokens not in ignore_if_empty:

816 yield lineno, tokens, data

817

818 lineno += data.count("\n")

819

820 line_starting = m.group()[-1:] == "\n"

821 # fetch new position into new variable so that we can check

822 # if there is a internal parsing error which would result

823 # in an infinite loop

824 pos2 = m.end()

825

826 # handle state changes

827 if new_state is not None:

828 # remove the uppermost state

829 if new_state == "#pop":

830 stack.pop()

831 # resolve the new state by group checking

832 elif new_state == "#bygroup":

833 for key, value in m.groupdict().items():

834 if value is not None:

835 stack.append(key)

836 break

837 else:

838 raise RuntimeError(

839 f"{regex!r} wanted to resolve the new state dynamically"

840 f" but no group matched"

841 )

842 # direct state name given

843 else:

844 stack.append(new_state)

845

846 statetokens = self.rules[stack[-1]]

847 # we are still at the same position and no stack change.

848 # this means a loop without break condition, avoid that and

849 # raise error

850 elif pos2 == pos:

851 raise RuntimeError(

852 f"{regex!r} yielded empty string without stack change"

853 )

854

855 # publish new function and start again

856 pos = pos2

857 break

858 # if loop terminated without break we haven't found a single match

859 # either we are at the end of the file or we have a problem

860 else:

861 # end of text

862 if pos >= source_length:

863 return

864

865 # something went wrong

866 raise TemplateSyntaxError(

867 f"unexpected char {source[pos]!r} at {pos}", lineno, name, filename

868 )