Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/mako/lexer.py: 89%

252 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-07 06:02 +0000

1# mako/lexer.py 

2# Copyright 2006-2023 the Mako authors and contributors <see AUTHORS file> 

3# 

4# This module is part of Mako and is released under 

5# the MIT License: http://www.opensource.org/licenses/mit-license.php 

6 

7"""provides the Lexer class for parsing template strings into parse trees.""" 

8 

9import codecs 

10import re 

11 

12from mako import exceptions 

13from mako import parsetree 

14from mako.pygen import adjust_whitespace 

15 

16_regexp_cache = {} 

17 

18 

19class Lexer: 

20 def __init__( 

21 self, text, filename=None, input_encoding=None, preprocessor=None 

22 ): 

23 self.text = text 

24 self.filename = filename 

25 self.template = parsetree.TemplateNode(self.filename) 

26 self.matched_lineno = 1 

27 self.matched_charpos = 0 

28 self.lineno = 1 

29 self.match_position = 0 

30 self.tag = [] 

31 self.control_line = [] 

32 self.ternary_stack = [] 

33 self.encoding = input_encoding 

34 

35 if preprocessor is None: 

36 self.preprocessor = [] 

37 elif not hasattr(preprocessor, "__iter__"): 

38 self.preprocessor = [preprocessor] 

39 else: 

40 self.preprocessor = preprocessor 

41 

42 @property 

43 def exception_kwargs(self): 

44 return { 

45 "source": self.text, 

46 "lineno": self.matched_lineno, 

47 "pos": self.matched_charpos, 

48 "filename": self.filename, 

49 } 

50 

51 def match(self, regexp, flags=None): 

52 """compile the given regexp, cache the reg, and call match_reg().""" 

53 

54 try: 

55 reg = _regexp_cache[(regexp, flags)] 

56 except KeyError: 

57 reg = re.compile(regexp, flags) if flags else re.compile(regexp) 

58 _regexp_cache[(regexp, flags)] = reg 

59 

60 return self.match_reg(reg) 

61 

62 def match_reg(self, reg): 

63 """match the given regular expression object to the current text 

64 position. 

65 

66 if a match occurs, update the current text and line position. 

67 

68 """ 

69 

70 mp = self.match_position 

71 

72 match = reg.match(self.text, self.match_position) 

73 if match: 

74 (start, end) = match.span() 

75 self.match_position = end + 1 if end == start else end 

76 self.matched_lineno = self.lineno 

77 cp = mp - 1 

78 if cp >= 0 and cp < self.textlength: 

79 cp = self.text[: cp + 1].rfind("\n") 

80 self.matched_charpos = mp - cp 

81 self.lineno += self.text[mp : self.match_position].count("\n") 

82 return match 

83 

84 def parse_until_text(self, watch_nesting, *text): 

85 startpos = self.match_position 

86 text_re = r"|".join(text) 

87 brace_level = 0 

88 paren_level = 0 

89 bracket_level = 0 

90 while True: 

91 match = self.match(r"#.*\n") 

92 if match: 

93 continue 

94 match = self.match( 

95 r"(\"\"\"|\'\'\'|\"|\')[^\\]*?(\\.[^\\]*?)*\1", re.S 

96 ) 

97 if match: 

98 continue 

99 match = self.match(r"(%s)" % text_re) 

100 if match and not ( 

101 watch_nesting 

102 and (brace_level > 0 or paren_level > 0 or bracket_level > 0) 

103 ): 

104 return ( 

105 self.text[ 

106 startpos : self.match_position - len(match.group(1)) 

107 ], 

108 match.group(1), 

109 ) 

110 elif not match: 

111 match = self.match(r"(.*?)(?=\"|\'|#|%s)" % text_re, re.S) 

112 if match: 

113 brace_level += match.group(1).count("{") 

114 brace_level -= match.group(1).count("}") 

115 paren_level += match.group(1).count("(") 

116 paren_level -= match.group(1).count(")") 

117 bracket_level += match.group(1).count("[") 

118 bracket_level -= match.group(1).count("]") 

119 continue 

120 raise exceptions.SyntaxException( 

121 "Expected: %s" % ",".join(text), **self.exception_kwargs 

122 ) 

123 

124 def append_node(self, nodecls, *args, **kwargs): 

125 kwargs.setdefault("source", self.text) 

126 kwargs.setdefault("lineno", self.matched_lineno) 

127 kwargs.setdefault("pos", self.matched_charpos) 

128 kwargs["filename"] = self.filename 

129 node = nodecls(*args, **kwargs) 

130 if len(self.tag): 

131 self.tag[-1].nodes.append(node) 

132 else: 

133 self.template.nodes.append(node) 

134 # build a set of child nodes for the control line 

135 # (used for loop variable detection) 

136 # also build a set of child nodes on ternary control lines 

137 # (used for determining if a pass needs to be auto-inserted 

138 if self.control_line: 

139 control_frame = self.control_line[-1] 

140 control_frame.nodes.append(node) 

141 if ( 

142 not ( 

143 isinstance(node, parsetree.ControlLine) 

144 and control_frame.is_ternary(node.keyword) 

145 ) 

146 and self.ternary_stack 

147 and self.ternary_stack[-1] 

148 ): 

149 self.ternary_stack[-1][-1].nodes.append(node) 

150 if isinstance(node, parsetree.Tag): 

151 if len(self.tag): 

152 node.parent = self.tag[-1] 

153 self.tag.append(node) 

154 elif isinstance(node, parsetree.ControlLine): 

155 if node.isend: 

156 self.control_line.pop() 

157 self.ternary_stack.pop() 

158 elif node.is_primary: 

159 self.control_line.append(node) 

160 self.ternary_stack.append([]) 

161 elif self.control_line and self.control_line[-1].is_ternary( 

162 node.keyword 

163 ): 

164 self.ternary_stack[-1].append(node) 

165 elif self.control_line and not self.control_line[-1].is_ternary( 

166 node.keyword 

167 ): 

168 raise exceptions.SyntaxException( 

169 "Keyword '%s' not a legal ternary for keyword '%s'" 

170 % (node.keyword, self.control_line[-1].keyword), 

171 **self.exception_kwargs, 

172 ) 

173 

174 _coding_re = re.compile(r"#.*coding[:=]\s*([-\w.]+).*\r?\n") 

175 

176 def decode_raw_stream(self, text, decode_raw, known_encoding, filename): 

177 """given string/unicode or bytes/string, determine encoding 

178 from magic encoding comment, return body as unicode 

179 or raw if decode_raw=False 

180 

181 """ 

182 if isinstance(text, str): 

183 m = self._coding_re.match(text) 

184 encoding = m and m.group(1) or known_encoding or "utf-8" 

185 return encoding, text 

186 

187 if text.startswith(codecs.BOM_UTF8): 

188 text = text[len(codecs.BOM_UTF8) :] 

189 parsed_encoding = "utf-8" 

190 m = self._coding_re.match(text.decode("utf-8", "ignore")) 

191 if m is not None and m.group(1) != "utf-8": 

192 raise exceptions.CompileException( 

193 "Found utf-8 BOM in file, with conflicting " 

194 "magic encoding comment of '%s'" % m.group(1), 

195 text.decode("utf-8", "ignore"), 

196 0, 

197 0, 

198 filename, 

199 ) 

200 else: 

201 m = self._coding_re.match(text.decode("utf-8", "ignore")) 

202 parsed_encoding = m.group(1) if m else known_encoding or "utf-8" 

203 if decode_raw: 

204 try: 

205 text = text.decode(parsed_encoding) 

206 except UnicodeDecodeError: 

207 raise exceptions.CompileException( 

208 "Unicode decode operation of encoding '%s' failed" 

209 % parsed_encoding, 

210 text.decode("utf-8", "ignore"), 

211 0, 

212 0, 

213 filename, 

214 ) 

215 

216 return parsed_encoding, text 

217 

218 def parse(self): 

219 self.encoding, self.text = self.decode_raw_stream( 

220 self.text, True, self.encoding, self.filename 

221 ) 

222 

223 for preproc in self.preprocessor: 

224 self.text = preproc(self.text) 

225 

226 # push the match marker past the 

227 # encoding comment. 

228 self.match_reg(self._coding_re) 

229 

230 self.textlength = len(self.text) 

231 

232 while True: 

233 if self.match_position > self.textlength: 

234 break 

235 

236 if self.match_end(): 

237 break 

238 if self.match_expression(): 

239 continue 

240 if self.match_control_line(): 

241 continue 

242 if self.match_comment(): 

243 continue 

244 if self.match_tag_start(): 

245 continue 

246 if self.match_tag_end(): 

247 continue 

248 if self.match_python_block(): 

249 continue 

250 if self.match_text(): 

251 continue 

252 

253 if self.match_position > self.textlength: 

254 break 

255 # TODO: no coverage here 

256 raise exceptions.MakoException("assertion failed") 

257 

258 if len(self.tag): 

259 raise exceptions.SyntaxException( 

260 "Unclosed tag: <%%%s>" % self.tag[-1].keyword, 

261 **self.exception_kwargs, 

262 ) 

263 if len(self.control_line): 

264 raise exceptions.SyntaxException( 

265 "Unterminated control keyword: '%s'" 

266 % self.control_line[-1].keyword, 

267 self.text, 

268 self.control_line[-1].lineno, 

269 self.control_line[-1].pos, 

270 self.filename, 

271 ) 

272 return self.template 

273 

274 def match_tag_start(self): 

275 reg = r""" 

276 \<% # opening tag 

277 

278 ([\w\.\:]+) # keyword 

279 

280 ((?:\s+\w+|\s*=\s*|"[^"]*?"|'[^']*?'|\s*,\s*)*) # attrname, = \ 

281 # sign, string expression 

282 # comma is for backwards compat 

283 # identified in #366 

284 

285 \s* # more whitespace 

286 

287 (/)?> # closing 

288 

289 """ 

290 

291 match = self.match( 

292 reg, 

293 re.I | re.S | re.X, 

294 ) 

295 

296 if not match: 

297 return False 

298 

299 keyword, attr, isend = match.groups() 

300 self.keyword = keyword 

301 attributes = {} 

302 if attr: 

303 for att in re.findall( 

304 r"\s*(\w+)\s*=\s*(?:'([^']*)'|\"([^\"]*)\")", attr 

305 ): 

306 key, val1, val2 = att 

307 text = val1 or val2 

308 text = text.replace("\r\n", "\n") 

309 attributes[key] = text 

310 self.append_node(parsetree.Tag, keyword, attributes) 

311 if isend: 

312 self.tag.pop() 

313 elif keyword == "text": 

314 match = self.match(r"(.*?)(?=\</%text>)", re.S) 

315 if not match: 

316 raise exceptions.SyntaxException( 

317 "Unclosed tag: <%%%s>" % self.tag[-1].keyword, 

318 **self.exception_kwargs, 

319 ) 

320 self.append_node(parsetree.Text, match.group(1)) 

321 return self.match_tag_end() 

322 return True 

323 

324 def match_tag_end(self): 

325 match = self.match(r"\</%[\t ]*([^\t ]+?)[\t ]*>") 

326 if match: 

327 if not len(self.tag): 

328 raise exceptions.SyntaxException( 

329 "Closing tag without opening tag: </%%%s>" 

330 % match.group(1), 

331 **self.exception_kwargs, 

332 ) 

333 elif self.tag[-1].keyword != match.group(1): 

334 raise exceptions.SyntaxException( 

335 "Closing tag </%%%s> does not match tag: <%%%s>" 

336 % (match.group(1), self.tag[-1].keyword), 

337 **self.exception_kwargs, 

338 ) 

339 self.tag.pop() 

340 return True 

341 else: 

342 return False 

343 

344 def match_end(self): 

345 match = self.match(r"\Z", re.S) 

346 if not match: 

347 return False 

348 

349 string = match.group() 

350 if string: 

351 return string 

352 else: 

353 return True 

354 

355 def match_text(self): 

356 match = self.match( 

357 r""" 

358 (.*?) # anything, followed by: 

359 ( 

360 (?<=\n)(?=[ \t]*(?=%|\#\#)) # an eval or line-based 

361 # comment preceded by a 

362 # consumed newline and whitespace 

363 | 

364 (?=\${) # an expression 

365 | 

366 (?=</?[%&]) # a substitution or block or call start or end 

367 # - don't consume 

368 | 

369 (\\\r?\n) # an escaped newline - throw away 

370 | 

371 \Z # end of string 

372 )""", 

373 re.X | re.S, 

374 ) 

375 

376 if match: 

377 text = match.group(1) 

378 if text: 

379 self.append_node(parsetree.Text, text) 

380 return True 

381 else: 

382 return False 

383 

384 def match_python_block(self): 

385 match = self.match(r"<%(!)?") 

386 if match: 

387 line, pos = self.matched_lineno, self.matched_charpos 

388 text, end = self.parse_until_text(False, r"%>") 

389 # the trailing newline helps 

390 # compiler.parse() not complain about indentation 

391 text = adjust_whitespace(text) + "\n" 

392 self.append_node( 

393 parsetree.Code, 

394 text, 

395 match.group(1) == "!", 

396 lineno=line, 

397 pos=pos, 

398 ) 

399 return True 

400 else: 

401 return False 

402 

403 def match_expression(self): 

404 match = self.match(r"\${") 

405 if not match: 

406 return False 

407 

408 line, pos = self.matched_lineno, self.matched_charpos 

409 text, end = self.parse_until_text(True, r"\|", r"}") 

410 if end == "|": 

411 escapes, end = self.parse_until_text(True, r"}") 

412 else: 

413 escapes = "" 

414 text = text.replace("\r\n", "\n") 

415 self.append_node( 

416 parsetree.Expression, 

417 text, 

418 escapes.strip(), 

419 lineno=line, 

420 pos=pos, 

421 ) 

422 return True 

423 

424 def match_control_line(self): 

425 match = self.match( 

426 r"(?<=^)[\t ]*(%(?!%)|##)[\t ]*((?:(?:\\\r?\n)|[^\r\n])*)" 

427 r"(?:\r?\n|\Z)", 

428 re.M, 

429 ) 

430 if not match: 

431 return False 

432 

433 operator = match.group(1) 

434 text = match.group(2) 

435 if operator == "%": 

436 m2 = re.match(r"(end)?(\w+)\s*(.*)", text) 

437 if not m2: 

438 raise exceptions.SyntaxException( 

439 "Invalid control line: '%s'" % text, 

440 **self.exception_kwargs, 

441 ) 

442 isend, keyword = m2.group(1, 2) 

443 isend = isend is not None 

444 

445 if isend: 

446 if not len(self.control_line): 

447 raise exceptions.SyntaxException( 

448 "No starting keyword '%s' for '%s'" % (keyword, text), 

449 **self.exception_kwargs, 

450 ) 

451 elif self.control_line[-1].keyword != keyword: 

452 raise exceptions.SyntaxException( 

453 "Keyword '%s' doesn't match keyword '%s'" 

454 % (text, self.control_line[-1].keyword), 

455 **self.exception_kwargs, 

456 ) 

457 self.append_node(parsetree.ControlLine, keyword, isend, text) 

458 else: 

459 self.append_node(parsetree.Comment, text) 

460 return True 

461 

462 def match_comment(self): 

463 """matches the multiline version of a comment""" 

464 match = self.match(r"<%doc>(.*?)</%doc>", re.S) 

465 if match: 

466 self.append_node(parsetree.Comment, match.group(1)) 

467 return True 

468 else: 

469 return False