Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/mako/lexer.py: 89%

1# mako/lexer.py

4# This module is part of Mako and is released under

5# the MIT License: http://www.opensource.org/licenses/mit-license.php

7"""provides the Lexer class for parsing template strings into parse trees."""

9import codecs

10import re

12from mako import exceptions

13from mako import parsetree

14from mako.pygen import adjust_whitespace

16_regexp_cache = {}

19class Lexer:

20 def __init__(

21 self, text, filename=None, input_encoding=None, preprocessor=None

22 ):

23 self.text = text

24 self.filename = filename

25 self.template = parsetree.TemplateNode(self.filename)

26 self.matched_lineno = 1

27 self.matched_charpos = 0

28 self.lineno = 1

29 self.match_position = 0

30 self.tag = []

31 self.control_line = []

32 self.ternary_stack = []

33 self.encoding = input_encoding

35 if preprocessor is None:

36 self.preprocessor = []

37 elif not hasattr(preprocessor, "__iter__"):

38 self.preprocessor = [preprocessor]

39 else:

40 self.preprocessor = preprocessor

42 @property

43 def exception_kwargs(self):

44 return {

45 "source": self.text,

46 "lineno": self.matched_lineno,

47 "pos": self.matched_charpos,

48 "filename": self.filename,

49 }

51 def match(self, regexp, flags=None):

52 """compile the given regexp, cache the reg, and call match_reg()."""

54 try:

55 reg = _regexp_cache[(regexp, flags)]

56 except KeyError:

57 reg = re.compile(regexp, flags) if flags else re.compile(regexp)

58 _regexp_cache[(regexp, flags)] = reg

60 return self.match_reg(reg)

62 def match_reg(self, reg):

63 """match the given regular expression object to the current text

64 position.

66 if a match occurs, update the current text and line position.

68 """

70 mp = self.match_position

72 match = reg.match(self.text, self.match_position)

73 if match:

74 (start, end) = match.span()

75 self.match_position = end + 1 if end == start else end

76 self.matched_lineno = self.lineno

77 cp = mp - 1

78 if cp >= 0 and cp < self.textlength:

79 cp = self.text[: cp + 1].rfind("\n")

80 self.matched_charpos = mp - cp

81 self.lineno += self.text[mp : self.match_position].count("\n")

82 return match

84 def parse_until_text(self, watch_nesting, *text):

85 startpos = self.match_position

86 text_re = r"|".join(text)

87 brace_level = 0

88 paren_level = 0

89 bracket_level = 0

90 while True:

91 match = self.match(r"#.*\n")

92 if match:

93 continue

94 match = self.match(

95 r"(\"\"\"|\'\'\'|\"|\')[^\\]*?(\\.[^\\]*?)*\1", re.S

96 )

97 if match:

98 continue

99 match = self.match(r"(%s)" % text_re)

100 if match and not (

101 watch_nesting

102 and (brace_level > 0 or paren_level > 0 or bracket_level > 0)

103 ):

104 return (

105 self.text[

106 startpos : self.match_position - len(match.group(1))

107 ],

108 match.group(1),

109 )

110 elif not match:

111 match = self.match(r"(.*?)(?=\"|\'|#|%s)" % text_re, re.S)

112 if match:

113 brace_level += match.group(1).count("{")

114 brace_level -= match.group(1).count("}")

115 paren_level += match.group(1).count("(")

116 paren_level -= match.group(1).count(")")

117 bracket_level += match.group(1).count("[")

118 bracket_level -= match.group(1).count("]")

119 continue

120 raise exceptions.SyntaxException(

121 "Expected: %s" % ",".join(text), **self.exception_kwargs

122 )

123

124 def append_node(self, nodecls, *args, **kwargs):

125 kwargs.setdefault("source", self.text)

126 kwargs.setdefault("lineno", self.matched_lineno)

127 kwargs.setdefault("pos", self.matched_charpos)

128 kwargs["filename"] = self.filename

129 node = nodecls(*args, **kwargs)

130 if len(self.tag):

131 self.tag[-1].nodes.append(node)

132 else:

133 self.template.nodes.append(node)

134 # build a set of child nodes for the control line

135 # (used for loop variable detection)

136 # also build a set of child nodes on ternary control lines

137 # (used for determining if a pass needs to be auto-inserted

138 if self.control_line:

139 control_frame = self.control_line[-1]

140 control_frame.nodes.append(node)

141 if (

142 not (

143 isinstance(node, parsetree.ControlLine)

144 and control_frame.is_ternary(node.keyword)

145 )

146 and self.ternary_stack

147 and self.ternary_stack[-1]

148 ):

149 self.ternary_stack[-1][-1].nodes.append(node)

150 if isinstance(node, parsetree.Tag):

151 if len(self.tag):

152 node.parent = self.tag[-1]

153 self.tag.append(node)

154 elif isinstance(node, parsetree.ControlLine):

155 if node.isend:

156 self.control_line.pop()

157 self.ternary_stack.pop()

158 elif node.is_primary:

159 self.control_line.append(node)

160 self.ternary_stack.append([])

161 elif self.control_line and self.control_line[-1].is_ternary(

162 node.keyword

163 ):

164 self.ternary_stack[-1].append(node)

165 elif self.control_line and not self.control_line[-1].is_ternary(

166 node.keyword

167 ):

168 raise exceptions.SyntaxException(

169 "Keyword '%s' not a legal ternary for keyword '%s'"

170 % (node.keyword, self.control_line[-1].keyword),

171 **self.exception_kwargs,

172 )

173

174 _coding_re = re.compile(r"#.*coding[:=]\s*([-\w.]+).*\r?\n")

175

176 def decode_raw_stream(self, text, decode_raw, known_encoding, filename):

177 """given string/unicode or bytes/string, determine encoding

178 from magic encoding comment, return body as unicode

179 or raw if decode_raw=False

180

181 """

182 if isinstance(text, str):

183 m = self._coding_re.match(text)

184 encoding = m and m.group(1) or known_encoding or "utf-8"

185 return encoding, text

186

187 if text.startswith(codecs.BOM_UTF8):

188 text = text[len(codecs.BOM_UTF8) :]

189 parsed_encoding = "utf-8"

190 m = self._coding_re.match(text.decode("utf-8", "ignore"))

191 if m is not None and m.group(1) != "utf-8":

192 raise exceptions.CompileException(

193 "Found utf-8 BOM in file, with conflicting "

194 "magic encoding comment of '%s'" % m.group(1),

195 text.decode("utf-8", "ignore"),

196 0,

197 0,

198 filename,

199 )

200 else:

201 m = self._coding_re.match(text.decode("utf-8", "ignore"))

202 parsed_encoding = m.group(1) if m else known_encoding or "utf-8"

203 if decode_raw:

204 try:

205 text = text.decode(parsed_encoding)

206 except UnicodeDecodeError:

207 raise exceptions.CompileException(

208 "Unicode decode operation of encoding '%s' failed"

209 % parsed_encoding,

210 text.decode("utf-8", "ignore"),

211 0,

212 0,

213 filename,

214 )

215

216 return parsed_encoding, text

217

218 def parse(self):

219 self.encoding, self.text = self.decode_raw_stream(

220 self.text, True, self.encoding, self.filename

221 )

222

223 for preproc in self.preprocessor:

224 self.text = preproc(self.text)

225

226 # push the match marker past the

227 # encoding comment.

228 self.match_reg(self._coding_re)

229

230 self.textlength = len(self.text)

231

232 while True:

233 if self.match_position > self.textlength:

234 break

235

236 if self.match_end():

237 break

238 if self.match_expression():

239 continue

240 if self.match_control_line():

241 continue

242 if self.match_comment():

243 continue

244 if self.match_tag_start():

245 continue

246 if self.match_tag_end():

247 continue

248 if self.match_python_block():

249 continue

250 if self.match_text():

251 continue

252

253 if self.match_position > self.textlength:

254 break

255 # TODO: no coverage here

256 raise exceptions.MakoException("assertion failed")

257

258 if len(self.tag):

259 raise exceptions.SyntaxException(

260 "Unclosed tag: <%%%s>" % self.tag[-1].keyword,

261 **self.exception_kwargs,

262 )

263 if len(self.control_line):

264 raise exceptions.SyntaxException(

265 "Unterminated control keyword: '%s'"

266 % self.control_line[-1].keyword,

267 self.text,

268 self.control_line[-1].lineno,

269 self.control_line[-1].pos,

270 self.filename,

271 )

272 return self.template

273

274 def match_tag_start(self):

275 reg = r"""

276 \<% # opening tag

277

278 ([\w\.\:]+) # keyword

279

280 ((?:\s+\w+|\s*=\s*|"[^"]*?"|'[^']*?'|\s*,\s*)*) # attrname, = \

281 # sign, string expression

282 # comma is for backwards compat

283 # identified in #366

284

285 \s* # more whitespace

286

287 (/)?> # closing

288

289 """

290

291 match = self.match(

292 reg,

293 re.I | re.S | re.X,

294 )

295

296 if not match:

297 return False

298

299 keyword, attr, isend = match.groups()

300 self.keyword = keyword

301 attributes = {}

302 if attr:

303 for att in re.findall(

304 r"\s*(\w+)\s*=\s*(?:'([^']*)'|\"([^\"]*)\")", attr

305 ):

306 key, val1, val2 = att

307 text = val1 or val2

308 text = text.replace("\r\n", "\n")

309 attributes[key] = text

310 self.append_node(parsetree.Tag, keyword, attributes)

311 if isend:

312 self.tag.pop()

313 elif keyword == "text":

314 match = self.match(r"(.*?)(?=\</%text>)", re.S)

315 if not match:

316 raise exceptions.SyntaxException(

317 "Unclosed tag: <%%%s>" % self.tag[-1].keyword,

318 **self.exception_kwargs,

319 )

320 self.append_node(parsetree.Text, match.group(1))

321 return self.match_tag_end()

322 return True

323

324 def match_tag_end(self):

325 match = self.match(r"\</%[\t ]*([^\t ]+?)[\t ]*>")

326 if match:

327 if not len(self.tag):

328 raise exceptions.SyntaxException(

329 "Closing tag without opening tag: </%%%s>"

330 % match.group(1),

331 **self.exception_kwargs,

332 )

333 elif self.tag[-1].keyword != match.group(1):

334 raise exceptions.SyntaxException(

335 "Closing tag </%%%s> does not match tag: <%%%s>"

336 % (match.group(1), self.tag[-1].keyword),

337 **self.exception_kwargs,

338 )

339 self.tag.pop()

340 return True

341 else:

342 return False

343

344 def match_end(self):

345 match = self.match(r"\Z", re.S)

346 if not match:

347 return False

348

349 string = match.group()

350 if string:

351 return string

352 else:

353 return True

354

355 def match_text(self):

356 match = self.match(

357 r"""

358 (.*?) # anything, followed by:

359 (

360 (?<=\n)(?=[ \t]*(?=%|\#\#)) # an eval or line-based

361 # comment preceded by a

362 # consumed newline and whitespace

363 |

364 (?=\${) # an expression

365 |

366 (?=</?[%&]) # a substitution or block or call start or end

367 # - don't consume

368 |

369 (\\\r?\n) # an escaped newline - throw away

370 |

371 \Z # end of string

372 )""",

373 re.X | re.S,

374 )

375

376 if match:

377 text = match.group(1)

378 if text:

379 self.append_node(parsetree.Text, text)

380 return True

381 else:

382 return False

383

384 def match_python_block(self):

385 match = self.match(r"<%(!)?")

386 if match:

387 line, pos = self.matched_lineno, self.matched_charpos

388 text, end = self.parse_until_text(False, r"%>")

389 # the trailing newline helps

390 # compiler.parse() not complain about indentation

391 text = adjust_whitespace(text) + "\n"

392 self.append_node(

393 parsetree.Code,

394 text,

395 match.group(1) == "!",

396 lineno=line,

397 pos=pos,

398 )

399 return True

400 else:

401 return False

402

403 def match_expression(self):

404 match = self.match(r"\${")

405 if not match:

406 return False

407

408 line, pos = self.matched_lineno, self.matched_charpos

409 text, end = self.parse_until_text(True, r"\|", r"}")

410 if end == "|":

411 escapes, end = self.parse_until_text(True, r"}")

412 else:

413 escapes = ""

414 text = text.replace("\r\n", "\n")

415 self.append_node(

416 parsetree.Expression,

417 text,

418 escapes.strip(),

419 lineno=line,

420 pos=pos,

421 )

422 return True

423

424 def match_control_line(self):

425 match = self.match(

426 r"(?<=^)[\t ]*(%(?!%)|##)[\t ]*((?:(?:\\\r?\n)|[^\r\n])*)"

427 r"(?:\r?\n|\Z)",

428 re.M,

429 )

430 if not match:

431 return False

432

433 operator = match.group(1)

434 text = match.group(2)

435 if operator == "%":

436 m2 = re.match(r"(end)?(\w+)\s*(.*)", text)

437 if not m2:

438 raise exceptions.SyntaxException(

439 "Invalid control line: '%s'" % text,

440 **self.exception_kwargs,

441 )

442 isend, keyword = m2.group(1, 2)

443 isend = isend is not None

444

445 if isend:

446 if not len(self.control_line):

447 raise exceptions.SyntaxException(

448 "No starting keyword '%s' for '%s'" % (keyword, text),

449 **self.exception_kwargs,

450 )

451 elif self.control_line[-1].keyword != keyword:

452 raise exceptions.SyntaxException(

453 "Keyword '%s' doesn't match keyword '%s'"

454 % (text, self.control_line[-1].keyword),

455 **self.exception_kwargs,

456 )

457 self.append_node(parsetree.ControlLine, keyword, isend, text)

458 else:

459 self.append_node(parsetree.Comment, text)

460 return True

461

462 def match_comment(self):

463 """matches the multiline version of a comment"""

464 match = self.match(r"<%doc>(.*?)</%doc>", re.S)

465 if match:

466 self.append_node(parsetree.Comment, match.group(1))

467 return True

468 else:

469 return False