Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/jinja2/lexer.py: 34%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

340 statements  

1"""Implements a Jinja / Python combination lexer. The ``Lexer`` class 

2is used to do some preprocessing. It filters out invalid operators like 

3the bitshift operators we don't allow in templates. It separates 

4template code and python code in expressions. 

5""" 

6 

7import re 

8import typing as t 

9from ast import literal_eval 

10from collections import deque 

11from sys import intern 

12 

13from ._identifier import pattern as name_re 

14from .exceptions import TemplateSyntaxError 

15from .utils import LRUCache 

16 

17if t.TYPE_CHECKING: 

18 import typing_extensions as te 

19 

20 from .environment import Environment 

21 

22# cache for the lexers. Exists in order to be able to have multiple 

23# environments with the same lexer 

24_lexer_cache: t.MutableMapping[t.Tuple, "Lexer"] = LRUCache(50) # type: ignore 

25 

26# static regular expressions 

27whitespace_re = re.compile(r"\s+") 

28newline_re = re.compile(r"(\r\n|\r|\n)") 

29string_re = re.compile( 

30 r"('([^'\\]*(?:\\.[^'\\]*)*)'" r'|"([^"\\]*(?:\\.[^"\\]*)*)")', re.S 

31) 

32integer_re = re.compile( 

33 r""" 

34 ( 

35 0b(_?[0-1])+ # binary 

36 | 

37 0o(_?[0-7])+ # octal 

38 | 

39 0x(_?[\da-f])+ # hex 

40 | 

41 [1-9](_?\d)* # decimal 

42 | 

43 0(_?0)* # decimal zero 

44 ) 

45 """, 

46 re.IGNORECASE | re.VERBOSE, 

47) 

48float_re = re.compile( 

49 r""" 

50 (?<!\.) # doesn't start with a . 

51 (\d+_)*\d+ # digits, possibly _ separated 

52 ( 

53 (\.(\d+_)*\d+)? # optional fractional part 

54 e[+\-]?(\d+_)*\d+ # exponent part 

55 | 

56 \.(\d+_)*\d+ # required fractional part 

57 ) 

58 """, 

59 re.IGNORECASE | re.VERBOSE, 

60) 

61 

62# internal the tokens and keep references to them 

63TOKEN_ADD = intern("add") 

64TOKEN_ASSIGN = intern("assign") 

65TOKEN_COLON = intern("colon") 

66TOKEN_COMMA = intern("comma") 

67TOKEN_DIV = intern("div") 

68TOKEN_DOT = intern("dot") 

69TOKEN_EQ = intern("eq") 

70TOKEN_FLOORDIV = intern("floordiv") 

71TOKEN_GT = intern("gt") 

72TOKEN_GTEQ = intern("gteq") 

73TOKEN_LBRACE = intern("lbrace") 

74TOKEN_LBRACKET = intern("lbracket") 

75TOKEN_LPAREN = intern("lparen") 

76TOKEN_LT = intern("lt") 

77TOKEN_LTEQ = intern("lteq") 

78TOKEN_MOD = intern("mod") 

79TOKEN_MUL = intern("mul") 

80TOKEN_NE = intern("ne") 

81TOKEN_PIPE = intern("pipe") 

82TOKEN_POW = intern("pow") 

83TOKEN_RBRACE = intern("rbrace") 

84TOKEN_RBRACKET = intern("rbracket") 

85TOKEN_RPAREN = intern("rparen") 

86TOKEN_SEMICOLON = intern("semicolon") 

87TOKEN_SUB = intern("sub") 

88TOKEN_TILDE = intern("tilde") 

89TOKEN_WHITESPACE = intern("whitespace") 

90TOKEN_FLOAT = intern("float") 

91TOKEN_INTEGER = intern("integer") 

92TOKEN_NAME = intern("name") 

93TOKEN_STRING = intern("string") 

94TOKEN_OPERATOR = intern("operator") 

95TOKEN_BLOCK_BEGIN = intern("block_begin") 

96TOKEN_BLOCK_END = intern("block_end") 

97TOKEN_VARIABLE_BEGIN = intern("variable_begin") 

98TOKEN_VARIABLE_END = intern("variable_end") 

99TOKEN_RAW_BEGIN = intern("raw_begin") 

100TOKEN_RAW_END = intern("raw_end") 

101TOKEN_COMMENT_BEGIN = intern("comment_begin") 

102TOKEN_COMMENT_END = intern("comment_end") 

103TOKEN_COMMENT = intern("comment") 

104TOKEN_LINESTATEMENT_BEGIN = intern("linestatement_begin") 

105TOKEN_LINESTATEMENT_END = intern("linestatement_end") 

106TOKEN_LINECOMMENT_BEGIN = intern("linecomment_begin") 

107TOKEN_LINECOMMENT_END = intern("linecomment_end") 

108TOKEN_LINECOMMENT = intern("linecomment") 

109TOKEN_DATA = intern("data") 

110TOKEN_INITIAL = intern("initial") 

111TOKEN_EOF = intern("eof") 

112 

113# bind operators to token types 

114operators = { 

115 "+": TOKEN_ADD, 

116 "-": TOKEN_SUB, 

117 "/": TOKEN_DIV, 

118 "//": TOKEN_FLOORDIV, 

119 "*": TOKEN_MUL, 

120 "%": TOKEN_MOD, 

121 "**": TOKEN_POW, 

122 "~": TOKEN_TILDE, 

123 "[": TOKEN_LBRACKET, 

124 "]": TOKEN_RBRACKET, 

125 "(": TOKEN_LPAREN, 

126 ")": TOKEN_RPAREN, 

127 "{": TOKEN_LBRACE, 

128 "}": TOKEN_RBRACE, 

129 "==": TOKEN_EQ, 

130 "!=": TOKEN_NE, 

131 ">": TOKEN_GT, 

132 ">=": TOKEN_GTEQ, 

133 "<": TOKEN_LT, 

134 "<=": TOKEN_LTEQ, 

135 "=": TOKEN_ASSIGN, 

136 ".": TOKEN_DOT, 

137 ":": TOKEN_COLON, 

138 "|": TOKEN_PIPE, 

139 ",": TOKEN_COMMA, 

140 ";": TOKEN_SEMICOLON, 

141} 

142 

143reverse_operators = {v: k for k, v in operators.items()} 

144assert len(operators) == len(reverse_operators), "operators dropped" 

145operator_re = re.compile( 

146 f"({'|'.join(re.escape(x) for x in sorted(operators, key=lambda x: -len(x)))})" 

147) 

148 

149ignored_tokens = frozenset( 

150 [ 

151 TOKEN_COMMENT_BEGIN, 

152 TOKEN_COMMENT, 

153 TOKEN_COMMENT_END, 

154 TOKEN_WHITESPACE, 

155 TOKEN_LINECOMMENT_BEGIN, 

156 TOKEN_LINECOMMENT_END, 

157 TOKEN_LINECOMMENT, 

158 ] 

159) 

160ignore_if_empty = frozenset( 

161 [TOKEN_WHITESPACE, TOKEN_DATA, TOKEN_COMMENT, TOKEN_LINECOMMENT] 

162) 

163 

164 

165def _describe_token_type(token_type: str) -> str: 

166 if token_type in reverse_operators: 

167 return reverse_operators[token_type] 

168 

169 return { 

170 TOKEN_COMMENT_BEGIN: "begin of comment", 

171 TOKEN_COMMENT_END: "end of comment", 

172 TOKEN_COMMENT: "comment", 

173 TOKEN_LINECOMMENT: "comment", 

174 TOKEN_BLOCK_BEGIN: "begin of statement block", 

175 TOKEN_BLOCK_END: "end of statement block", 

176 TOKEN_VARIABLE_BEGIN: "begin of print statement", 

177 TOKEN_VARIABLE_END: "end of print statement", 

178 TOKEN_LINESTATEMENT_BEGIN: "begin of line statement", 

179 TOKEN_LINESTATEMENT_END: "end of line statement", 

180 TOKEN_DATA: "template data / text", 

181 TOKEN_EOF: "end of template", 

182 }.get(token_type, token_type) 

183 

184 

185def describe_token(token: "Token") -> str: 

186 """Returns a description of the token.""" 

187 if token.type == TOKEN_NAME: 

188 return token.value 

189 

190 return _describe_token_type(token.type) 

191 

192 

193def describe_token_expr(expr: str) -> str: 

194 """Like `describe_token` but for token expressions.""" 

195 if ":" in expr: 

196 type, value = expr.split(":", 1) 

197 

198 if type == TOKEN_NAME: 

199 return value 

200 else: 

201 type = expr 

202 

203 return _describe_token_type(type) 

204 

205 

206def count_newlines(value: str) -> int: 

207 """Count the number of newline characters in the string. This is 

208 useful for extensions that filter a stream. 

209 """ 

210 return len(newline_re.findall(value)) 

211 

212 

213def compile_rules(environment: "Environment") -> t.List[t.Tuple[str, str]]: 

214 """Compiles all the rules from the environment into a list of rules.""" 

215 e = re.escape 

216 rules = [ 

217 ( 

218 len(environment.comment_start_string), 

219 TOKEN_COMMENT_BEGIN, 

220 e(environment.comment_start_string), 

221 ), 

222 ( 

223 len(environment.block_start_string), 

224 TOKEN_BLOCK_BEGIN, 

225 e(environment.block_start_string), 

226 ), 

227 ( 

228 len(environment.variable_start_string), 

229 TOKEN_VARIABLE_BEGIN, 

230 e(environment.variable_start_string), 

231 ), 

232 ] 

233 

234 if environment.line_statement_prefix is not None: 

235 rules.append( 

236 ( 

237 len(environment.line_statement_prefix), 

238 TOKEN_LINESTATEMENT_BEGIN, 

239 r"^[ \t\v]*" + e(environment.line_statement_prefix), 

240 ) 

241 ) 

242 if environment.line_comment_prefix is not None: 

243 rules.append( 

244 ( 

245 len(environment.line_comment_prefix), 

246 TOKEN_LINECOMMENT_BEGIN, 

247 r"(?:^|(?<=\S))[^\S\r\n]*" + e(environment.line_comment_prefix), 

248 ) 

249 ) 

250 

251 return [x[1:] for x in sorted(rules, reverse=True)] 

252 

253 

254class Failure: 

255 """Class that raises a `TemplateSyntaxError` if called. 

256 Used by the `Lexer` to specify known errors. 

257 """ 

258 

259 def __init__( 

260 self, message: str, cls: t.Type[TemplateSyntaxError] = TemplateSyntaxError 

261 ) -> None: 

262 self.message = message 

263 self.error_class = cls 

264 

265 def __call__(self, lineno: int, filename: str) -> "te.NoReturn": 

266 raise self.error_class(self.message, lineno, filename) 

267 

268 

269class Token(t.NamedTuple): 

270 lineno: int 

271 type: str 

272 value: str 

273 

274 def __str__(self) -> str: 

275 return describe_token(self) 

276 

277 def test(self, expr: str) -> bool: 

278 """Test a token against a token expression. This can either be a 

279 token type or ``'token_type:token_value'``. This can only test 

280 against string values and types. 

281 """ 

282 # here we do a regular string equality check as test_any is usually 

283 # passed an iterable of not interned strings. 

284 if self.type == expr: 

285 return True 

286 

287 if ":" in expr: 

288 return expr.split(":", 1) == [self.type, self.value] 

289 

290 return False 

291 

292 def test_any(self, *iterable: str) -> bool: 

293 """Test against multiple token expressions.""" 

294 return any(self.test(expr) for expr in iterable) 

295 

296 

297class TokenStreamIterator: 

298 """The iterator for tokenstreams. Iterate over the stream 

299 until the eof token is reached. 

300 """ 

301 

302 def __init__(self, stream: "TokenStream") -> None: 

303 self.stream = stream 

304 

305 def __iter__(self) -> "TokenStreamIterator": 

306 return self 

307 

308 def __next__(self) -> Token: 

309 token = self.stream.current 

310 

311 if token.type is TOKEN_EOF: 

312 self.stream.close() 

313 raise StopIteration 

314 

315 next(self.stream) 

316 return token 

317 

318 

319class TokenStream: 

320 """A token stream is an iterable that yields :class:`Token`\\s. The 

321 parser however does not iterate over it but calls :meth:`next` to go 

322 one token ahead. The current active token is stored as :attr:`current`. 

323 """ 

324 

325 def __init__( 

326 self, 

327 generator: t.Iterable[Token], 

328 name: t.Optional[str], 

329 filename: t.Optional[str], 

330 ): 

331 self._iter = iter(generator) 

332 self._pushed: "te.Deque[Token]" = deque() 

333 self.name = name 

334 self.filename = filename 

335 self.closed = False 

336 self.current = Token(1, TOKEN_INITIAL, "") 

337 next(self) 

338 

339 def __iter__(self) -> TokenStreamIterator: 

340 return TokenStreamIterator(self) 

341 

342 def __bool__(self) -> bool: 

343 return bool(self._pushed) or self.current.type is not TOKEN_EOF 

344 

345 @property 

346 def eos(self) -> bool: 

347 """Are we at the end of the stream?""" 

348 return not self 

349 

350 def push(self, token: Token) -> None: 

351 """Push a token back to the stream.""" 

352 self._pushed.append(token) 

353 

354 def look(self) -> Token: 

355 """Look at the next token.""" 

356 old_token = next(self) 

357 result = self.current 

358 self.push(result) 

359 self.current = old_token 

360 return result 

361 

362 def skip(self, n: int = 1) -> None: 

363 """Got n tokens ahead.""" 

364 for _ in range(n): 

365 next(self) 

366 

367 def next_if(self, expr: str) -> t.Optional[Token]: 

368 """Perform the token test and return the token if it matched. 

369 Otherwise the return value is `None`. 

370 """ 

371 if self.current.test(expr): 

372 return next(self) 

373 

374 return None 

375 

376 def skip_if(self, expr: str) -> bool: 

377 """Like :meth:`next_if` but only returns `True` or `False`.""" 

378 return self.next_if(expr) is not None 

379 

380 def __next__(self) -> Token: 

381 """Go one token ahead and return the old one. 

382 

383 Use the built-in :func:`next` instead of calling this directly. 

384 """ 

385 rv = self.current 

386 

387 if self._pushed: 

388 self.current = self._pushed.popleft() 

389 elif self.current.type is not TOKEN_EOF: 

390 try: 

391 self.current = next(self._iter) 

392 except StopIteration: 

393 self.close() 

394 

395 return rv 

396 

397 def close(self) -> None: 

398 """Close the stream.""" 

399 self.current = Token(self.current.lineno, TOKEN_EOF, "") 

400 self._iter = iter(()) 

401 self.closed = True 

402 

403 def expect(self, expr: str) -> Token: 

404 """Expect a given token type and return it. This accepts the same 

405 argument as :meth:`jinja2.lexer.Token.test`. 

406 """ 

407 if not self.current.test(expr): 

408 expr = describe_token_expr(expr) 

409 

410 if self.current.type is TOKEN_EOF: 

411 raise TemplateSyntaxError( 

412 f"unexpected end of template, expected {expr!r}.", 

413 self.current.lineno, 

414 self.name, 

415 self.filename, 

416 ) 

417 

418 raise TemplateSyntaxError( 

419 f"expected token {expr!r}, got {describe_token(self.current)!r}", 

420 self.current.lineno, 

421 self.name, 

422 self.filename, 

423 ) 

424 

425 return next(self) 

426 

427 

428def get_lexer(environment: "Environment") -> "Lexer": 

429 """Return a lexer which is probably cached.""" 

430 key = ( 

431 environment.block_start_string, 

432 environment.block_end_string, 

433 environment.variable_start_string, 

434 environment.variable_end_string, 

435 environment.comment_start_string, 

436 environment.comment_end_string, 

437 environment.line_statement_prefix, 

438 environment.line_comment_prefix, 

439 environment.trim_blocks, 

440 environment.lstrip_blocks, 

441 environment.newline_sequence, 

442 environment.keep_trailing_newline, 

443 ) 

444 lexer = _lexer_cache.get(key) 

445 

446 if lexer is None: 

447 _lexer_cache[key] = lexer = Lexer(environment) 

448 

449 return lexer 

450 

451 

452class OptionalLStrip(tuple): # type: ignore[type-arg] 

453 """A special tuple for marking a point in the state that can have 

454 lstrip applied. 

455 """ 

456 

457 __slots__ = () 

458 

459 # Even though it looks like a no-op, creating instances fails 

460 # without this. 

461 def __new__(cls, *members, **kwargs): # type: ignore 

462 return super().__new__(cls, members) 

463 

464 

465class _Rule(t.NamedTuple): 

466 pattern: t.Pattern[str] 

467 tokens: t.Union[str, t.Tuple[str, ...], t.Tuple[Failure]] 

468 command: t.Optional[str] 

469 

470 

471class Lexer: 

472 """Class that implements a lexer for a given environment. Automatically 

473 created by the environment class, usually you don't have to do that. 

474 

475 Note that the lexer is not automatically bound to an environment. 

476 Multiple environments can share the same lexer. 

477 """ 

478 

479 def __init__(self, environment: "Environment") -> None: 

480 # shortcuts 

481 e = re.escape 

482 

483 def c(x: str) -> t.Pattern[str]: 

484 return re.compile(x, re.M | re.S) 

485 

486 # lexing rules for tags 

487 tag_rules: t.List[_Rule] = [ 

488 _Rule(whitespace_re, TOKEN_WHITESPACE, None), 

489 _Rule(float_re, TOKEN_FLOAT, None), 

490 _Rule(integer_re, TOKEN_INTEGER, None), 

491 _Rule(name_re, TOKEN_NAME, None), 

492 _Rule(string_re, TOKEN_STRING, None), 

493 _Rule(operator_re, TOKEN_OPERATOR, None), 

494 ] 

495 

496 # assemble the root lexing rule. because "|" is ungreedy 

497 # we have to sort by length so that the lexer continues working 

498 # as expected when we have parsing rules like <% for block and 

499 # <%= for variables. (if someone wants asp like syntax) 

500 # variables are just part of the rules if variable processing 

501 # is required. 

502 root_tag_rules = compile_rules(environment) 

503 

504 block_start_re = e(environment.block_start_string) 

505 block_end_re = e(environment.block_end_string) 

506 comment_end_re = e(environment.comment_end_string) 

507 variable_end_re = e(environment.variable_end_string) 

508 

509 # block suffix if trimming is enabled 

510 block_suffix_re = "\\n?" if environment.trim_blocks else "" 

511 

512 self.lstrip_blocks = environment.lstrip_blocks 

513 

514 self.newline_sequence = environment.newline_sequence 

515 self.keep_trailing_newline = environment.keep_trailing_newline 

516 

517 root_raw_re = ( 

518 rf"(?P<raw_begin>{block_start_re}(\-|\+|)\s*raw\s*" 

519 rf"(?:\-{block_end_re}\s*|{block_end_re}))" 

520 ) 

521 root_parts_re = "|".join( 

522 [root_raw_re] + [rf"(?P<{n}>{r}(\-|\+|))" for n, r in root_tag_rules] 

523 ) 

524 

525 # global lexing rules 

526 self.rules: t.Dict[str, t.List[_Rule]] = { 

527 "root": [ 

528 # directives 

529 _Rule( 

530 c(rf"(.*?)(?:{root_parts_re})"), 

531 OptionalLStrip(TOKEN_DATA, "#bygroup"), # type: ignore 

532 "#bygroup", 

533 ), 

534 # data 

535 _Rule(c(".+"), TOKEN_DATA, None), 

536 ], 

537 # comments 

538 TOKEN_COMMENT_BEGIN: [ 

539 _Rule( 

540 c( 

541 rf"(.*?)((?:\+{comment_end_re}|\-{comment_end_re}\s*" 

542 rf"|{comment_end_re}{block_suffix_re}))" 

543 ), 

544 (TOKEN_COMMENT, TOKEN_COMMENT_END), 

545 "#pop", 

546 ), 

547 _Rule(c(r"(.)"), (Failure("Missing end of comment tag"),), None), 

548 ], 

549 # blocks 

550 TOKEN_BLOCK_BEGIN: [ 

551 _Rule( 

552 c( 

553 rf"(?:\+{block_end_re}|\-{block_end_re}\s*" 

554 rf"|{block_end_re}{block_suffix_re})" 

555 ), 

556 TOKEN_BLOCK_END, 

557 "#pop", 

558 ), 

559 ] 

560 + tag_rules, 

561 # variables 

562 TOKEN_VARIABLE_BEGIN: [ 

563 _Rule( 

564 c(rf"\-{variable_end_re}\s*|{variable_end_re}"), 

565 TOKEN_VARIABLE_END, 

566 "#pop", 

567 ) 

568 ] 

569 + tag_rules, 

570 # raw block 

571 TOKEN_RAW_BEGIN: [ 

572 _Rule( 

573 c( 

574 rf"(.*?)((?:{block_start_re}(\-|\+|))\s*endraw\s*" 

575 rf"(?:\+{block_end_re}|\-{block_end_re}\s*" 

576 rf"|{block_end_re}{block_suffix_re}))" 

577 ), 

578 OptionalLStrip(TOKEN_DATA, TOKEN_RAW_END), # type: ignore 

579 "#pop", 

580 ), 

581 _Rule(c(r"(.)"), (Failure("Missing end of raw directive"),), None), 

582 ], 

583 # line statements 

584 TOKEN_LINESTATEMENT_BEGIN: [ 

585 _Rule(c(r"\s*(\n|$)"), TOKEN_LINESTATEMENT_END, "#pop") 

586 ] 

587 + tag_rules, 

588 # line comments 

589 TOKEN_LINECOMMENT_BEGIN: [ 

590 _Rule( 

591 c(r"(.*?)()(?=\n|$)"), 

592 (TOKEN_LINECOMMENT, TOKEN_LINECOMMENT_END), 

593 "#pop", 

594 ) 

595 ], 

596 } 

597 

598 def _normalize_newlines(self, value: str) -> str: 

599 """Replace all newlines with the configured sequence in strings 

600 and template data. 

601 """ 

602 return newline_re.sub(self.newline_sequence, value) 

603 

604 def tokenize( 

605 self, 

606 source: str, 

607 name: t.Optional[str] = None, 

608 filename: t.Optional[str] = None, 

609 state: t.Optional[str] = None, 

610 ) -> TokenStream: 

611 """Calls tokeniter + tokenize and wraps it in a token stream.""" 

612 stream = self.tokeniter(source, name, filename, state) 

613 return TokenStream(self.wrap(stream, name, filename), name, filename) 

614 

615 def wrap( 

616 self, 

617 stream: t.Iterable[t.Tuple[int, str, str]], 

618 name: t.Optional[str] = None, 

619 filename: t.Optional[str] = None, 

620 ) -> t.Iterator[Token]: 

621 """This is called with the stream as returned by `tokenize` and wraps 

622 every token in a :class:`Token` and converts the value. 

623 """ 

624 for lineno, token, value_str in stream: 

625 if token in ignored_tokens: 

626 continue 

627 

628 value: t.Any = value_str 

629 

630 if token == TOKEN_LINESTATEMENT_BEGIN: 

631 token = TOKEN_BLOCK_BEGIN 

632 elif token == TOKEN_LINESTATEMENT_END: 

633 token = TOKEN_BLOCK_END 

634 # we are not interested in those tokens in the parser 

635 elif token in (TOKEN_RAW_BEGIN, TOKEN_RAW_END): 

636 continue 

637 elif token == TOKEN_DATA: 

638 value = self._normalize_newlines(value_str) 

639 elif token == "keyword": 

640 token = value_str 

641 elif token == TOKEN_NAME: 

642 value = value_str 

643 

644 if not value.isidentifier(): 

645 raise TemplateSyntaxError( 

646 "Invalid character in identifier", lineno, name, filename 

647 ) 

648 elif token == TOKEN_STRING: 

649 # try to unescape string 

650 try: 

651 value = ( 

652 self._normalize_newlines(value_str[1:-1]) 

653 .encode("ascii", "backslashreplace") 

654 .decode("unicode-escape") 

655 ) 

656 except Exception as e: 

657 msg = str(e).split(":")[-1].strip() 

658 raise TemplateSyntaxError(msg, lineno, name, filename) from e 

659 elif token == TOKEN_INTEGER: 

660 value = int(value_str.replace("_", ""), 0) 

661 elif token == TOKEN_FLOAT: 

662 # remove all "_" first to support more Python versions 

663 value = literal_eval(value_str.replace("_", "")) 

664 elif token == TOKEN_OPERATOR: 

665 token = operators[value_str] 

666 

667 yield Token(lineno, token, value) 

668 

669 def tokeniter( 

670 self, 

671 source: str, 

672 name: t.Optional[str], 

673 filename: t.Optional[str] = None, 

674 state: t.Optional[str] = None, 

675 ) -> t.Iterator[t.Tuple[int, str, str]]: 

676 """This method tokenizes the text and returns the tokens in a 

677 generator. Use this method if you just want to tokenize a template. 

678 

679 .. versionchanged:: 3.0 

680 Only ``\\n``, ``\\r\\n`` and ``\\r`` are treated as line 

681 breaks. 

682 """ 

683 lines = newline_re.split(source)[::2] 

684 

685 if not self.keep_trailing_newline and lines[-1] == "": 

686 del lines[-1] 

687 

688 source = "\n".join(lines) 

689 pos = 0 

690 lineno = 1 

691 stack = ["root"] 

692 

693 if state is not None and state != "root": 

694 assert state in ("variable", "block"), "invalid state" 

695 stack.append(state + "_begin") 

696 

697 statetokens = self.rules[stack[-1]] 

698 source_length = len(source) 

699 balancing_stack: t.List[str] = [] 

700 newlines_stripped = 0 

701 line_starting = True 

702 

703 while True: 

704 # tokenizer loop 

705 for regex, tokens, new_state in statetokens: 

706 m = regex.match(source, pos) 

707 

708 # if no match we try again with the next rule 

709 if m is None: 

710 continue 

711 

712 # we only match blocks and variables if braces / parentheses 

713 # are balanced. continue parsing with the lower rule which 

714 # is the operator rule. do this only if the end tags look 

715 # like operators 

716 if balancing_stack and tokens in ( 

717 TOKEN_VARIABLE_END, 

718 TOKEN_BLOCK_END, 

719 TOKEN_LINESTATEMENT_END, 

720 ): 

721 continue 

722 

723 # tuples support more options 

724 if isinstance(tokens, tuple): 

725 groups: t.Sequence[str] = m.groups() 

726 

727 if isinstance(tokens, OptionalLStrip): 

728 # Rule supports lstrip. Match will look like 

729 # text, block type, whitespace control, type, control, ... 

730 text = groups[0] 

731 # Skipping the text and first type, every other group is the 

732 # whitespace control for each type. One of the groups will be 

733 # -, +, or empty string instead of None. 

734 strip_sign = next(g for g in groups[2::2] if g is not None) 

735 

736 if strip_sign == "-": 

737 # Strip all whitespace between the text and the tag. 

738 stripped = text.rstrip() 

739 newlines_stripped = text[len(stripped) :].count("\n") 

740 groups = [stripped, *groups[1:]] 

741 elif ( 

742 # Not marked for preserving whitespace. 

743 strip_sign != "+" 

744 # lstrip is enabled. 

745 and self.lstrip_blocks 

746 # Not a variable expression. 

747 and not m.groupdict().get(TOKEN_VARIABLE_BEGIN) 

748 ): 

749 # The start of text between the last newline and the tag. 

750 l_pos = text.rfind("\n") + 1 

751 

752 if l_pos > 0 or line_starting: 

753 # If there's only whitespace between the newline and the 

754 # tag, strip it. 

755 if whitespace_re.fullmatch(text, l_pos): 

756 groups = [text[:l_pos], *groups[1:]] 

757 

758 for idx, token in enumerate(tokens): 

759 # failure group 

760 if token.__class__ is Failure: 

761 raise token(lineno, filename) 

762 # bygroup is a bit more complex, in that case we 

763 # yield for the current token the first named 

764 # group that matched 

765 elif token == "#bygroup": 

766 for key, value in m.groupdict().items(): 

767 if value is not None: 

768 yield lineno, key, value 

769 lineno += value.count("\n") 

770 break 

771 else: 

772 raise RuntimeError( 

773 f"{regex!r} wanted to resolve the token dynamically" 

774 " but no group matched" 

775 ) 

776 # normal group 

777 else: 

778 data = groups[idx] 

779 

780 if data or token not in ignore_if_empty: 

781 yield lineno, token, data 

782 

783 lineno += data.count("\n") + newlines_stripped 

784 newlines_stripped = 0 

785 

786 # strings as token just are yielded as it. 

787 else: 

788 data = m.group() 

789 

790 # update brace/parentheses balance 

791 if tokens == TOKEN_OPERATOR: 

792 if data == "{": 

793 balancing_stack.append("}") 

794 elif data == "(": 

795 balancing_stack.append(")") 

796 elif data == "[": 

797 balancing_stack.append("]") 

798 elif data in ("}", ")", "]"): 

799 if not balancing_stack: 

800 raise TemplateSyntaxError( 

801 f"unexpected '{data}'", lineno, name, filename 

802 ) 

803 

804 expected_op = balancing_stack.pop() 

805 

806 if expected_op != data: 

807 raise TemplateSyntaxError( 

808 f"unexpected '{data}', expected '{expected_op}'", 

809 lineno, 

810 name, 

811 filename, 

812 ) 

813 

814 # yield items 

815 if data or tokens not in ignore_if_empty: 

816 yield lineno, tokens, data 

817 

818 lineno += data.count("\n") 

819 

820 line_starting = m.group()[-1:] == "\n" 

821 # fetch new position into new variable so that we can check 

822 # if there is a internal parsing error which would result 

823 # in an infinite loop 

824 pos2 = m.end() 

825 

826 # handle state changes 

827 if new_state is not None: 

828 # remove the uppermost state 

829 if new_state == "#pop": 

830 stack.pop() 

831 # resolve the new state by group checking 

832 elif new_state == "#bygroup": 

833 for key, value in m.groupdict().items(): 

834 if value is not None: 

835 stack.append(key) 

836 break 

837 else: 

838 raise RuntimeError( 

839 f"{regex!r} wanted to resolve the new state dynamically" 

840 f" but no group matched" 

841 ) 

842 # direct state name given 

843 else: 

844 stack.append(new_state) 

845 

846 statetokens = self.rules[stack[-1]] 

847 # we are still at the same position and no stack change. 

848 # this means a loop without break condition, avoid that and 

849 # raise error 

850 elif pos2 == pos: 

851 raise RuntimeError( 

852 f"{regex!r} yielded empty string without stack change" 

853 ) 

854 

855 # publish new function and start again 

856 pos = pos2 

857 break 

858 # if loop terminated without break we haven't found a single match 

859 # either we are at the end of the file or we have a problem 

860 else: 

861 # end of text 

862 if pos >= source_length: 

863 return 

864 

865 # something went wrong 

866 raise TemplateSyntaxError( 

867 f"unexpected char {source[pos]!r} at {pos}", lineno, name, filename 

868 )