Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/mako/lexer.py: 89%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

263 statements  

1# mako/lexer.py 

2# Copyright 2006-2025 the Mako authors and contributors <see AUTHORS file> 

3# 

4# This module is part of Mako and is released under 

5# the MIT License: http://www.opensource.org/licenses/mit-license.php 

6 

7"""provides the Lexer class for parsing template strings into parse trees.""" 

8 

9import codecs 

10import re 

11 

12from mako import exceptions 

13from mako import parsetree 

14from mako.pygen import adjust_whitespace 

15 

16_regexp_cache = {} 

17 

18 

19class Lexer: 

20 def __init__( 

21 self, text, filename=None, input_encoding=None, preprocessor=None 

22 ): 

23 self.text = text 

24 self.filename = filename 

25 self.template = parsetree.TemplateNode(self.filename) 

26 self.matched_lineno = 1 

27 self.matched_charpos = 0 

28 self.lineno = 1 

29 self.match_position = 0 

30 self.tag = [] 

31 self.control_line = [] 

32 self.ternary_stack = [] 

33 self.encoding = input_encoding 

34 

35 if preprocessor is None: 

36 self.preprocessor = [] 

37 elif not hasattr(preprocessor, "__iter__"): 

38 self.preprocessor = [preprocessor] 

39 else: 

40 self.preprocessor = preprocessor 

41 

42 @property 

43 def exception_kwargs(self): 

44 return { 

45 "source": self.text, 

46 "lineno": self.matched_lineno, 

47 "pos": self.matched_charpos, 

48 "filename": self.filename, 

49 } 

50 

51 def match(self, regexp, flags=None): 

52 """compile the given regexp, cache the reg, and call match_reg().""" 

53 

54 try: 

55 reg = _regexp_cache[(regexp, flags)] 

56 except KeyError: 

57 reg = re.compile(regexp, flags) if flags else re.compile(regexp) 

58 _regexp_cache[(regexp, flags)] = reg 

59 

60 return self.match_reg(reg) 

61 

62 def match_reg(self, reg): 

63 """match the given regular expression object to the current text 

64 position. 

65 

66 if a match occurs, update the current text and line position. 

67 

68 """ 

69 

70 mp = self.match_position 

71 

72 match = reg.match(self.text, self.match_position) 

73 if match: 

74 (start, end) = match.span() 

75 self.match_position = end + 1 if end == start else end 

76 self.matched_lineno = self.lineno 

77 cp = mp - 1 

78 if cp >= 0 and cp < self.textlength: 

79 cp = self.text[: cp + 1].rfind("\n") 

80 self.matched_charpos = mp - cp 

81 self.lineno += self.text[mp : self.match_position].count("\n") 

82 return match 

83 

84 def parse_until_text(self, watch_nesting, *text): 

85 startpos = self.match_position 

86 text_re = r"|".join(text) 

87 brace_level = 0 

88 paren_level = 0 

89 bracket_level = 0 

90 while True: 

91 match = self.match(r"#.*\n") 

92 if match: 

93 continue 

94 match = self.match( 

95 r"(\"\"\"|\'\'\'|\"|\')[^\\]*?(\\.[^\\]*?)*\1", re.S 

96 ) 

97 if match: 

98 continue 

99 match = self.match(r"(%s)" % text_re) 

100 if match and not ( 

101 watch_nesting 

102 and (brace_level > 0 or paren_level > 0 or bracket_level > 0) 

103 ): 

104 return ( 

105 self.text[ 

106 startpos : self.match_position - len(match.group(1)) 

107 ], 

108 match.group(1), 

109 ) 

110 elif not match: 

111 match = self.match(r"(.*?)(?=\"|\'|#|%s)" % text_re, re.S) 

112 if match: 

113 brace_level += match.group(1).count("{") 

114 brace_level -= match.group(1).count("}") 

115 paren_level += match.group(1).count("(") 

116 paren_level -= match.group(1).count(")") 

117 bracket_level += match.group(1).count("[") 

118 bracket_level -= match.group(1).count("]") 

119 continue 

120 raise exceptions.SyntaxException( 

121 "Expected: %s" % ",".join(text), **self.exception_kwargs 

122 ) 

123 

124 def append_node(self, nodecls, *args, **kwargs): 

125 kwargs.setdefault("source", self.text) 

126 kwargs.setdefault("lineno", self.matched_lineno) 

127 kwargs.setdefault("pos", self.matched_charpos) 

128 kwargs["filename"] = self.filename 

129 node = nodecls(*args, **kwargs) 

130 if len(self.tag): 

131 self.tag[-1].nodes.append(node) 

132 else: 

133 self.template.nodes.append(node) 

134 # build a set of child nodes for the control line 

135 # (used for loop variable detection) 

136 # also build a set of child nodes on ternary control lines 

137 # (used for determining if a pass needs to be auto-inserted 

138 if self.control_line: 

139 control_frame = self.control_line[-1] 

140 control_frame.nodes.append(node) 

141 if ( 

142 not ( 

143 isinstance(node, parsetree.ControlLine) 

144 and control_frame.is_ternary(node.keyword) 

145 ) 

146 and self.ternary_stack 

147 and self.ternary_stack[-1] 

148 ): 

149 self.ternary_stack[-1][-1].nodes.append(node) 

150 if isinstance(node, parsetree.Tag): 

151 if len(self.tag): 

152 node.parent = self.tag[-1] 

153 self.tag.append(node) 

154 elif isinstance(node, parsetree.ControlLine): 

155 if node.isend: 

156 self.control_line.pop() 

157 self.ternary_stack.pop() 

158 elif node.is_primary: 

159 self.control_line.append(node) 

160 self.ternary_stack.append([]) 

161 elif self.control_line and self.control_line[-1].is_ternary( 

162 node.keyword 

163 ): 

164 self.ternary_stack[-1].append(node) 

165 elif self.control_line and not self.control_line[-1].is_ternary( 

166 node.keyword 

167 ): 

168 raise exceptions.SyntaxException( 

169 "Keyword '%s' not a legal ternary for keyword '%s'" 

170 % (node.keyword, self.control_line[-1].keyword), 

171 **self.exception_kwargs, 

172 ) 

173 

174 _coding_re = re.compile(r"#.*coding[:=]\s*([-\w.]+).*\r?\n") 

175 

176 def decode_raw_stream(self, text, decode_raw, known_encoding, filename): 

177 """given string/unicode or bytes/string, determine encoding 

178 from magic encoding comment, return body as unicode 

179 or raw if decode_raw=False 

180 

181 """ 

182 if isinstance(text, str): 

183 m = self._coding_re.match(text) 

184 encoding = m and m.group(1) or known_encoding or "utf-8" 

185 return encoding, text 

186 

187 if text.startswith(codecs.BOM_UTF8): 

188 text = text[len(codecs.BOM_UTF8) :] 

189 parsed_encoding = "utf-8" 

190 m = self._coding_re.match(text.decode("utf-8", "ignore")) 

191 if m is not None and m.group(1) != "utf-8": 

192 raise exceptions.CompileException( 

193 "Found utf-8 BOM in file, with conflicting " 

194 "magic encoding comment of '%s'" % m.group(1), 

195 text.decode("utf-8", "ignore"), 

196 0, 

197 0, 

198 filename, 

199 ) 

200 else: 

201 m = self._coding_re.match(text.decode("utf-8", "ignore")) 

202 parsed_encoding = m.group(1) if m else known_encoding or "utf-8" 

203 if decode_raw: 

204 try: 

205 text = text.decode(parsed_encoding) 

206 except UnicodeDecodeError: 

207 raise exceptions.CompileException( 

208 "Unicode decode operation of encoding '%s' failed" 

209 % parsed_encoding, 

210 text.decode("utf-8", "ignore"), 

211 0, 

212 0, 

213 filename, 

214 ) 

215 

216 return parsed_encoding, text 

217 

218 def parse(self): 

219 self.encoding, self.text = self.decode_raw_stream( 

220 self.text, True, self.encoding, self.filename 

221 ) 

222 

223 for preproc in self.preprocessor: 

224 self.text = preproc(self.text) 

225 

226 # push the match marker past the 

227 # encoding comment. 

228 self.match_reg(self._coding_re) 

229 

230 self.textlength = len(self.text) 

231 

232 while True: 

233 if self.match_position > self.textlength: 

234 break 

235 

236 if self.match_end(): 

237 break 

238 if self.match_expression(): 

239 continue 

240 if self.match_control_line(): 

241 continue 

242 if self.match_comment(): 

243 continue 

244 if self.match_tag_start(): 

245 continue 

246 if self.match_tag_end(): 

247 continue 

248 if self.match_python_block(): 

249 continue 

250 if self.match_percent(): 

251 continue 

252 if self.match_text(): 

253 continue 

254 

255 if self.match_position > self.textlength: 

256 break 

257 # TODO: no coverage here 

258 raise exceptions.MakoException("assertion failed") 

259 

260 if len(self.tag): 

261 raise exceptions.SyntaxException( 

262 "Unclosed tag: <%%%s>" % self.tag[-1].keyword, 

263 **self.exception_kwargs, 

264 ) 

265 if len(self.control_line): 

266 raise exceptions.SyntaxException( 

267 "Unterminated control keyword: '%s'" 

268 % self.control_line[-1].keyword, 

269 self.text, 

270 self.control_line[-1].lineno, 

271 self.control_line[-1].pos, 

272 self.filename, 

273 ) 

274 return self.template 

275 

276 def match_tag_start(self): 

277 reg = r""" 

278 \<% # opening tag 

279 

280 ([\w\.\:]+) # keyword 

281 

282 ((?:\s+\w+|\s*=\s*|"[^"]*?"|'[^']*?'|\s*,\s*)*) # attrname, = \ 

283 # sign, string expression 

284 # comma is for backwards compat 

285 # identified in #366 

286 

287 \s* # more whitespace 

288 

289 (/)?> # closing 

290 

291 """ 

292 

293 match = self.match( 

294 reg, 

295 re.I | re.S | re.X, 

296 ) 

297 

298 if not match: 

299 return False 

300 

301 keyword, attr, isend = match.groups() 

302 self.keyword = keyword 

303 attributes = {} 

304 if attr: 

305 for att in re.findall( 

306 r"\s*(\w+)\s*=\s*(?:'([^']*)'|\"([^\"]*)\")", attr 

307 ): 

308 key, val1, val2 = att 

309 text = val1 or val2 

310 text = text.replace("\r\n", "\n") 

311 attributes[key] = text 

312 self.append_node(parsetree.Tag, keyword, attributes) 

313 if isend: 

314 self.tag.pop() 

315 elif keyword == "text": 

316 match = self.match(r"(.*?)(?=\</%text>)", re.S) 

317 if not match: 

318 raise exceptions.SyntaxException( 

319 "Unclosed tag: <%%%s>" % self.tag[-1].keyword, 

320 **self.exception_kwargs, 

321 ) 

322 self.append_node(parsetree.Text, match.group(1)) 

323 return self.match_tag_end() 

324 return True 

325 

326 def match_tag_end(self): 

327 match = self.match(r"\</%[\t ]*([^\t ]+?)[\t ]*>") 

328 if match: 

329 if not len(self.tag): 

330 raise exceptions.SyntaxException( 

331 "Closing tag without opening tag: </%%%s>" 

332 % match.group(1), 

333 **self.exception_kwargs, 

334 ) 

335 elif self.tag[-1].keyword != match.group(1): 

336 raise exceptions.SyntaxException( 

337 "Closing tag </%%%s> does not match tag: <%%%s>" 

338 % (match.group(1), self.tag[-1].keyword), 

339 **self.exception_kwargs, 

340 ) 

341 self.tag.pop() 

342 return True 

343 else: 

344 return False 

345 

346 def match_end(self): 

347 match = self.match(r"\Z", re.S) 

348 if not match: 

349 return False 

350 

351 string = match.group() 

352 if string: 

353 return string 

354 else: 

355 return True 

356 

357 def match_percent(self): 

358 match = self.match(r"(?<=^)(\s*)%%(%*)", re.M) 

359 if match: 

360 self.append_node( 

361 parsetree.Text, match.group(1) + "%" + match.group(2) 

362 ) 

363 return True 

364 else: 

365 return False 

366 

367 def match_text(self): 

368 match = self.match( 

369 r""" 

370 (.*?) # anything, followed by: 

371 ( 

372 (?<=\n)(?=[ \t]*(?=%|\#\#)) # an eval or line-based 

373 # comment, preceded by a 

374 # consumed newline and whitespace 

375 | 

376 (?=\${) # an expression 

377 | 

378 (?=</?%) # a substitution or block or call start or end 

379 # - don't consume 

380 | 

381 (\\\r?\n) # an escaped newline - throw away 

382 | 

383 \Z # end of string 

384 )""", 

385 re.X | re.S, 

386 ) 

387 

388 if match: 

389 text = match.group(1) 

390 if text: 

391 self.append_node(parsetree.Text, text) 

392 return True 

393 else: 

394 return False 

395 

396 def match_python_block(self): 

397 match = self.match(r"<%(!)?") 

398 if match: 

399 line, pos = self.matched_lineno, self.matched_charpos 

400 text, end = self.parse_until_text(False, r"%>") 

401 # the trailing newline helps 

402 # compiler.parse() not complain about indentation 

403 text = adjust_whitespace(text) + "\n" 

404 self.append_node( 

405 parsetree.Code, 

406 text, 

407 match.group(1) == "!", 

408 lineno=line, 

409 pos=pos, 

410 ) 

411 return True 

412 else: 

413 return False 

414 

415 def match_expression(self): 

416 match = self.match(r"\${") 

417 if not match: 

418 return False 

419 

420 line, pos = self.matched_lineno, self.matched_charpos 

421 text, end = self.parse_until_text(True, r"\|", r"}") 

422 if end == "|": 

423 escapes, end = self.parse_until_text(True, r"}") 

424 else: 

425 escapes = "" 

426 text = text.replace("\r\n", "\n") 

427 self.append_node( 

428 parsetree.Expression, 

429 text, 

430 escapes.strip(), 

431 lineno=line, 

432 pos=pos, 

433 ) 

434 return True 

435 

436 def match_control_line(self): 

437 match = self.match( 

438 r"(?<=^)[\t ]*(%(?!%)|##)[\t ]*((?:(?:\\\r?\n)|[^\r\n])*)" 

439 r"(?:\r?\n|\Z)", 

440 re.M, 

441 ) 

442 if not match: 

443 return False 

444 

445 operator = match.group(1) 

446 text = match.group(2) 

447 if operator == "%": 

448 m2 = re.match(r"(end)?(\w+)\s*(.*)", text) 

449 if not m2: 

450 raise exceptions.SyntaxException( 

451 "Invalid control line: '%s'" % text, 

452 **self.exception_kwargs, 

453 ) 

454 isend, keyword = m2.group(1, 2) 

455 isend = isend is not None 

456 

457 if isend: 

458 if not len(self.control_line): 

459 raise exceptions.SyntaxException( 

460 "No starting keyword '%s' for '%s'" % (keyword, text), 

461 **self.exception_kwargs, 

462 ) 

463 elif self.control_line[-1].keyword != keyword: 

464 raise exceptions.SyntaxException( 

465 "Keyword '%s' doesn't match keyword '%s'" 

466 % (text, self.control_line[-1].keyword), 

467 **self.exception_kwargs, 

468 ) 

469 self.append_node(parsetree.ControlLine, keyword, isend, text) 

470 else: 

471 self.append_node(parsetree.Comment, text) 

472 return True 

473 

474 def match_comment(self): 

475 """matches the multiline version of a comment""" 

476 match = self.match(r"<%doc>(.*?)</%doc>", re.S) 

477 if match: 

478 self.append_node(parsetree.Comment, match.group(1)) 

479 return True 

480 else: 

481 return False