Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/mako/lexer.py: 89%

1# mako/lexer.py

4# This module is part of Mako and is released under

5# the MIT License: http://www.opensource.org/licenses/mit-license.php

7"""provides the Lexer class for parsing template strings into parse trees."""

9import codecs

10import re

12from mako import exceptions

13from mako import parsetree

14from mako.pygen import adjust_whitespace

16_regexp_cache = {}

19class Lexer:

20 def __init__(

21 self, text, filename=None, input_encoding=None, preprocessor=None

22 ):

23 self.text = text

24 self.filename = filename

25 self.template = parsetree.TemplateNode(self.filename)

26 self.matched_lineno = 1

27 self.matched_charpos = 0

28 self.lineno = 1

29 self.match_position = 0

30 self.tag = []

31 self.control_line = []

32 self.ternary_stack = []

33 self.encoding = input_encoding

35 if preprocessor is None:

36 self.preprocessor = []

37 elif not hasattr(preprocessor, "__iter__"):

38 self.preprocessor = [preprocessor]

39 else:

40 self.preprocessor = preprocessor

42 @property

43 def exception_kwargs(self):

44 return {

45 "source": self.text,

46 "lineno": self.matched_lineno,

47 "pos": self.matched_charpos,

48 "filename": self.filename,

49 }

51 def match(self, regexp, flags=None):

52 """compile the given regexp, cache the reg, and call match_reg()."""

54 try:

55 reg = _regexp_cache[(regexp, flags)]

56 except KeyError:

57 reg = re.compile(regexp, flags) if flags else re.compile(regexp)

58 _regexp_cache[(regexp, flags)] = reg

60 return self.match_reg(reg)

62 def match_reg(self, reg):

63 """match the given regular expression object to the current text

64 position.

66 if a match occurs, update the current text and line position.

68 """

70 mp = self.match_position

72 match = reg.match(self.text, self.match_position)

73 if match:

74 (start, end) = match.span()

75 self.match_position = end + 1 if end == start else end

76 self.matched_lineno = self.lineno

77 cp = mp - 1

78 if cp >= 0 and cp < self.textlength:

79 cp = self.text[: cp + 1].rfind("\n")

80 self.matched_charpos = mp - cp

81 self.lineno += self.text[mp : self.match_position].count("\n")

82 return match

84 def parse_until_text(self, watch_nesting, *text):

85 startpos = self.match_position

86 text_re = r"|".join(text)

87 brace_level = 0

88 paren_level = 0

89 bracket_level = 0

90 while True:

91 match = self.match(r"#.*\n")

92 if match:

93 continue

94 match = self.match(

95 r"(\"\"\"|\'\'\'|\"|\')[^\\]*?(\\.[^\\]*?)*\1", re.S

96 )

97 if match:

98 continue

99 match = self.match(r"(%s)" % text_re)

100 if match and not (

101 watch_nesting

102 and (brace_level > 0 or paren_level > 0 or bracket_level > 0)

103 ):

104 return (

105 self.text[

106 startpos : self.match_position - len(match.group(1))

107 ],

108 match.group(1),

109 )

110 elif not match:

111 match = self.match(r"(.*?)(?=\"|\'|#|%s)" % text_re, re.S)

112 if match:

113 brace_level += match.group(1).count("{")

114 brace_level -= match.group(1).count("}")

115 paren_level += match.group(1).count("(")

116 paren_level -= match.group(1).count(")")

117 bracket_level += match.group(1).count("[")

118 bracket_level -= match.group(1).count("]")

119 continue

120 raise exceptions.SyntaxException(

121 "Expected: %s" % ",".join(text), **self.exception_kwargs

122 )

123

124 def append_node(self, nodecls, *args, **kwargs):

125 kwargs.setdefault("source", self.text)

126 kwargs.setdefault("lineno", self.matched_lineno)

127 kwargs.setdefault("pos", self.matched_charpos)

128 kwargs["filename"] = self.filename

129 node = nodecls(*args, **kwargs)

130 if len(self.tag):

131 self.tag[-1].nodes.append(node)

132 else:

133 self.template.nodes.append(node)

134 # build a set of child nodes for the control line

135 # (used for loop variable detection)

136 # also build a set of child nodes on ternary control lines

137 # (used for determining if a pass needs to be auto-inserted

138 if self.control_line:

139 control_frame = self.control_line[-1]

140 control_frame.nodes.append(node)

141 if (

142 not (

143 isinstance(node, parsetree.ControlLine)

144 and control_frame.is_ternary(node.keyword)

145 )

146 and self.ternary_stack

147 and self.ternary_stack[-1]

148 ):

149 self.ternary_stack[-1][-1].nodes.append(node)

150 if isinstance(node, parsetree.Tag):

151 if len(self.tag):

152 node.parent = self.tag[-1]

153 self.tag.append(node)

154 elif isinstance(node, parsetree.ControlLine):

155 if node.isend:

156 self.control_line.pop()

157 self.ternary_stack.pop()

158 elif node.is_primary:

159 self.control_line.append(node)

160 self.ternary_stack.append([])

161 elif self.control_line and self.control_line[-1].is_ternary(

162 node.keyword

163 ):

164 self.ternary_stack[-1].append(node)

165 elif self.control_line and not self.control_line[-1].is_ternary(

166 node.keyword

167 ):

168 raise exceptions.SyntaxException(

169 "Keyword '%s' not a legal ternary for keyword '%s'"

170 % (node.keyword, self.control_line[-1].keyword),

171 **self.exception_kwargs,

172 )

173

174 _coding_re = re.compile(r"#.*coding[:=]\s*([-\w.]+).*\r?\n")

175

176 def decode_raw_stream(self, text, decode_raw, known_encoding, filename):

177 """given string/unicode or bytes/string, determine encoding

178 from magic encoding comment, return body as unicode

179 or raw if decode_raw=False

180

181 """

182 if isinstance(text, str):

183 m = self._coding_re.match(text)

184 encoding = m and m.group(1) or known_encoding or "utf-8"

185 return encoding, text

186

187 if text.startswith(codecs.BOM_UTF8):

188 text = text[len(codecs.BOM_UTF8) :]

189 parsed_encoding = "utf-8"

190 m = self._coding_re.match(text.decode("utf-8", "ignore"))

191 if m is not None and m.group(1) != "utf-8":

192 raise exceptions.CompileException(

193 "Found utf-8 BOM in file, with conflicting "

194 "magic encoding comment of '%s'" % m.group(1),

195 text.decode("utf-8", "ignore"),

196 0,

197 0,

198 filename,

199 )

200 else:

201 m = self._coding_re.match(text.decode("utf-8", "ignore"))

202 parsed_encoding = m.group(1) if m else known_encoding or "utf-8"

203 if decode_raw:

204 try:

205 text = text.decode(parsed_encoding)

206 except UnicodeDecodeError:

207 raise exceptions.CompileException(

208 "Unicode decode operation of encoding '%s' failed"

209 % parsed_encoding,

210 text.decode("utf-8", "ignore"),

211 0,

212 0,

213 filename,

214 )

215

216 return parsed_encoding, text

217

218 def parse(self):

219 self.encoding, self.text = self.decode_raw_stream(

220 self.text, True, self.encoding, self.filename

221 )

222

223 for preproc in self.preprocessor:

224 self.text = preproc(self.text)

225

226 # push the match marker past the

227 # encoding comment.

228 self.match_reg(self._coding_re)

229

230 self.textlength = len(self.text)

231

232 while True:

233 if self.match_position > self.textlength:

234 break

235

236 if self.match_end():

237 break

238 if self.match_expression():

239 continue

240 if self.match_control_line():

241 continue

242 if self.match_comment():

243 continue

244 if self.match_tag_start():

245 continue

246 if self.match_tag_end():

247 continue

248 if self.match_python_block():

249 continue

250 if self.match_percent():

251 continue

252 if self.match_text():

253 continue

254

255 if self.match_position > self.textlength:

256 break

257 # TODO: no coverage here

258 raise exceptions.MakoException("assertion failed")

259

260 if len(self.tag):

261 raise exceptions.SyntaxException(

262 "Unclosed tag: <%%%s>" % self.tag[-1].keyword,

263 **self.exception_kwargs,

264 )

265 if len(self.control_line):

266 raise exceptions.SyntaxException(

267 "Unterminated control keyword: '%s'"

268 % self.control_line[-1].keyword,

269 self.text,

270 self.control_line[-1].lineno,

271 self.control_line[-1].pos,

272 self.filename,

273 )

274 return self.template

275

276 def match_tag_start(self):

277 reg = r"""

278 \<% # opening tag

279

280 ([\w\.\:]+) # keyword

281

282 ((?:\s+\w+|\s*=\s*|"[^"]*?"|'[^']*?'|\s*,\s*)*) # attrname, = \

283 # sign, string expression

284 # comma is for backwards compat

285 # identified in #366

286

287 \s* # more whitespace

288

289 (/)?> # closing

290

291 """

292

293 match = self.match(

294 reg,

295 re.I | re.S | re.X,

296 )

297

298 if not match:

299 return False

300

301 keyword, attr, isend = match.groups()

302 self.keyword = keyword

303 attributes = {}

304 if attr:

305 for att in re.findall(

306 r"\s*(\w+)\s*=\s*(?:'([^']*)'|\"([^\"]*)\")", attr

307 ):

308 key, val1, val2 = att

309 text = val1 or val2

310 text = text.replace("\r\n", "\n")

311 attributes[key] = text

312 self.append_node(parsetree.Tag, keyword, attributes)

313 if isend:

314 self.tag.pop()

315 elif keyword == "text":

316 match = self.match(r"(.*?)(?=\</%text>)", re.S)

317 if not match:

318 raise exceptions.SyntaxException(

319 "Unclosed tag: <%%%s>" % self.tag[-1].keyword,

320 **self.exception_kwargs,

321 )

322 self.append_node(parsetree.Text, match.group(1))

323 return self.match_tag_end()

324 return True

325

326 def match_tag_end(self):

327 match = self.match(r"\</%[\t ]*([^\t ]+?)[\t ]*>")

328 if match:

329 if not len(self.tag):

330 raise exceptions.SyntaxException(

331 "Closing tag without opening tag: </%%%s>"

332 % match.group(1),

333 **self.exception_kwargs,

334 )

335 elif self.tag[-1].keyword != match.group(1):

336 raise exceptions.SyntaxException(

337 "Closing tag </%%%s> does not match tag: <%%%s>"

338 % (match.group(1), self.tag[-1].keyword),

339 **self.exception_kwargs,

340 )

341 self.tag.pop()

342 return True

343 else:

344 return False

345

346 def match_end(self):

347 match = self.match(r"\Z", re.S)

348 if not match:

349 return False

350

351 string = match.group()

352 if string:

353 return string

354 else:

355 return True

356

357 def match_percent(self):

358 match = self.match(r"(?<=^)(\s*)%%(%*)", re.M)

359 if match:

360 self.append_node(

361 parsetree.Text, match.group(1) + "%" + match.group(2)

362 )

363 return True

364 else:

365 return False

366

367 def match_text(self):

368 match = self.match(

369 r"""

370 (.*?) # anything, followed by:

371 (

372 (?<=\n)(?=[ \t]*(?=%|\#\#)) # an eval or line-based

373 # comment, preceded by a

374 # consumed newline and whitespace

375 |

376 (?=\${) # an expression

377 |

378 (?=</?%) # a substitution or block or call start or end

379 # - don't consume

380 |

381 (\\\r?\n) # an escaped newline - throw away

382 |

383 \Z # end of string

384 )""",

385 re.X | re.S,

386 )

387

388 if match:

389 text = match.group(1)

390 if text:

391 self.append_node(parsetree.Text, text)

392 return True

393 else:

394 return False

395

396 def match_python_block(self):

397 match = self.match(r"<%(!)?")

398 if match:

399 line, pos = self.matched_lineno, self.matched_charpos

400 text, end = self.parse_until_text(False, r"%>")

401 # the trailing newline helps

402 # compiler.parse() not complain about indentation

403 text = adjust_whitespace(text) + "\n"

404 self.append_node(

405 parsetree.Code,

406 text,

407 match.group(1) == "!",

408 lineno=line,

409 pos=pos,

410 )

411 return True

412 else:

413 return False

414

415 def match_expression(self):

416 match = self.match(r"\${")

417 if not match:

418 return False

419

420 line, pos = self.matched_lineno, self.matched_charpos

421 text, end = self.parse_until_text(True, r"\|", r"}")

422 if end == "|":

423 escapes, end = self.parse_until_text(True, r"}")

424 else:

425 escapes = ""

426 text = text.replace("\r\n", "\n")

427 self.append_node(

428 parsetree.Expression,

429 text,

430 escapes.strip(),

431 lineno=line,

432 pos=pos,

433 )

434 return True

435

436 def match_control_line(self):

437 match = self.match(

438 r"(?<=^)[\t ]*(%(?!%)|##)[\t ]*((?:(?:\\\r?\n)|[^\r\n])*)"

439 r"(?:\r?\n|\Z)",

440 re.M,

441 )

442 if not match:

443 return False

444

445 operator = match.group(1)

446 text = match.group(2)

447 if operator == "%":

448 m2 = re.match(r"(end)?(\w+)\s*(.*)", text)

449 if not m2:

450 raise exceptions.SyntaxException(

451 "Invalid control line: '%s'" % text,

452 **self.exception_kwargs,

453 )

454 isend, keyword = m2.group(1, 2)

455 isend = isend is not None

456

457 if isend:

458 if not len(self.control_line):

459 raise exceptions.SyntaxException(

460 "No starting keyword '%s' for '%s'" % (keyword, text),

461 **self.exception_kwargs,

462 )

463 elif self.control_line[-1].keyword != keyword:

464 raise exceptions.SyntaxException(

465 "Keyword '%s' doesn't match keyword '%s'"

466 % (text, self.control_line[-1].keyword),

467 **self.exception_kwargs,

468 )

469 self.append_node(parsetree.ControlLine, keyword, isend, text)

470 else:

471 self.append_node(parsetree.Comment, text)

472 return True

473

474 def match_comment(self):

475 """matches the multiline version of a comment"""

476 match = self.match(r"<%doc>(.*?)</%doc>", re.S)

477 if match:

478 self.append_node(parsetree.Comment, match.group(1))

479 return True

480 else:

481 return False