Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/html.py: 89%

1"""

2 pygments.lexers.html

3 ~~~~~~~~~~~~~~~~~~~~

5 Lexers for HTML, XML and related markup.

8 :license: BSD, see LICENSE for details.

9"""

11import re

13from pygments.lexer import RegexLexer, ExtendedRegexLexer, include, bygroups, \

14 default, using

15from pygments.token import Text, Comment, Operator, Keyword, Name, String, \

16 Punctuation, Whitespace

17from pygments.util import looks_like_xml, html_doctype_matches

19from pygments.lexers.javascript import JavascriptLexer

20from pygments.lexers.jvm import ScalaLexer

21from pygments.lexers.css import CssLexer, _indentation, _starts_block

22from pygments.lexers.ruby import RubyLexer

24__all__ = ['HtmlLexer', 'DtdLexer', 'XmlLexer', 'XsltLexer', 'HamlLexer',

25 'ScamlLexer', 'PugLexer']

28class HtmlLexer(RegexLexer):

29 """

30 For HTML 4 and XHTML 1 markup. Nested JavaScript and CSS is highlighted

31 by the appropriate lexer.

32 """

34 name = 'HTML'

35 url = 'https://html.spec.whatwg.org/'

36 aliases = ['html']

37 filenames = ['*.html', '*.htm', '*.xhtml', '*.xslt']

38 mimetypes = ['text/html', 'application/xhtml+xml']

40 flags = re.IGNORECASE | re.DOTALL

41 tokens = {

42 'root': [

43 ('[^<&]+', Text),

44 (r'&\S*?;', Name.Entity),

45 (r'\<\!\[CDATA\[.*?\]\]\>', Comment.Preproc),

46 (r'', Comment.Multiline),

47 (r'<\?.*?\?>', Comment.Preproc),

48 ('<![^>]*>', Comment.Preproc),

49 (r'(<)(\s*)(script)(\s*)',

50 bygroups(Punctuation, Text, Name.Tag, Text),

51 ('script-content', 'tag')),

52 (r'(<)(\s*)(style)(\s*)',

53 bygroups(Punctuation, Text, Name.Tag, Text),

54 ('style-content', 'tag')),

55 # note: this allows tag names not used in HTML like <x:with-dash>,

56 # this is to support yet-unknown template engines and the like

57 (r'(<)(\s*)([\w:.-]+)',

58 bygroups(Punctuation, Text, Name.Tag), 'tag'),

59 (r'(<)(\s*)(/)(\s*)([\w:.-]+)(\s*)(>)',

60 bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text,

61 Punctuation)),

62 ],

63 'tag': [

64 (r'\s+', Text),

65 (r'([\w:-]+\s*)(=)(\s*)', bygroups(Name.Attribute, Operator, Text),

66 'attr'),

67 (r'[\w:-]+', Name.Attribute),

68 (r'(/?)(\s*)(>)', bygroups(Punctuation, Text, Punctuation), '#pop'),

69 ],

70 'script-content': [

71 (r'(<)(\s*)(/)(\s*)(script)(\s*)(>)',

72 bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text,

73 Punctuation), '#pop'),

74 (r'.+?(?=<\s*/\s*script\s*>)', using(JavascriptLexer)),

75 # fallback cases for when there is no closing script tag

76 # first look for newline and then go back into root state

77 # if that fails just read the rest of the file

78 # this is similar to the error handling logic in lexer.py

79 (r'.+?\n', using(JavascriptLexer), '#pop'),

80 (r'.+', using(JavascriptLexer), '#pop'),

81 ],

82 'style-content': [

83 (r'(<)(\s*)(/)(\s*)(style)(\s*)(>)',

84 bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text,

85 Punctuation),'#pop'),

86 (r'.+?(?=<\s*/\s*style\s*>)', using(CssLexer)),

87 # fallback cases for when there is no closing style tag

88 # first look for newline and then go back into root state

89 # if that fails just read the rest of the file

90 # this is similar to the error handling logic in lexer.py

91 (r'.+?\n', using(CssLexer), '#pop'),

92 (r'.+', using(CssLexer), '#pop'),

93 ],

94 'attr': [

95 ('".*?"', String, '#pop'),

96 ("'.*?'", String, '#pop'),

97 (r'[^\s>]+', String, '#pop'),

98 ],

99 }

100

101 def analyse_text(text):

102 if html_doctype_matches(text):

103 return 0.5

104

105

106class DtdLexer(RegexLexer):

107 """

108 A lexer for DTDs (Document Type Definitions).

109

110 .. versionadded:: 1.5

111 """

112

113 flags = re.MULTILINE | re.DOTALL

114

115 name = 'DTD'

116 aliases = ['dtd']

117 filenames = ['*.dtd']

118 mimetypes = ['application/xml-dtd']

119

120 tokens = {

121 'root': [

122 include('common'),

123

124 (r'(<!ELEMENT)(\s+)(\S+)',

125 bygroups(Keyword, Text, Name.Tag), 'element'),

126 (r'(<!ATTLIST)(\s+)(\S+)',

127 bygroups(Keyword, Text, Name.Tag), 'attlist'),

128 (r'(<!ENTITY)(\s+)(\S+)',

129 bygroups(Keyword, Text, Name.Entity), 'entity'),

130 (r'(<!NOTATION)(\s+)(\S+)',

131 bygroups(Keyword, Text, Name.Tag), 'notation'),

132 (r'(<!\[)([^\[\s]+)(\s*)(\[)', # conditional sections

133 bygroups(Keyword, Name.Entity, Text, Keyword)),

134

135 (r'(<!DOCTYPE)(\s+)([^>\s]+)',

136 bygroups(Keyword, Text, Name.Tag)),

137 (r'PUBLIC|SYSTEM', Keyword.Constant),

138 (r'[\[\]>]', Keyword),

139 ],

140

141 'common': [

142 (r'\s+', Text),

143 (r'(%|&)[^;]*;', Name.Entity),

144 ('<!--', Comment, 'comment'),

145 (r'[(|)*,?+]', Operator),

146 (r'"[^"]*"', String.Double),

147 (r'\'[^\']*\'', String.Single),

148 ],

149

150 'comment': [

151 ('[^-]+', Comment),

152 ('-->', Comment, '#pop'),

153 ('-', Comment),

154 ],

155

156 'element': [

157 include('common'),

158 (r'EMPTY|ANY|#PCDATA', Keyword.Constant),

159 (r'[^>\s|()?+*,]+', Name.Tag),

160 (r'>', Keyword, '#pop'),

161 ],

162

163 'attlist': [

164 include('common'),

166 Keyword.Constant),

167 (r'#REQUIRED|#IMPLIED|#FIXED', Keyword.Constant),

168 (r'xml:space|xml:lang', Keyword.Reserved),

169 (r'[^>\s|()?+*,]+', Name.Attribute),

170 (r'>', Keyword, '#pop'),

171 ],

172

173 'entity': [

174 include('common'),

175 (r'SYSTEM|PUBLIC|NDATA', Keyword.Constant),

176 (r'[^>\s|()?+*,]+', Name.Entity),

177 (r'>', Keyword, '#pop'),

178 ],

179

180 'notation': [

181 include('common'),

182 (r'SYSTEM|PUBLIC', Keyword.Constant),

183 (r'[^>\s|()?+*,]+', Name.Attribute),

184 (r'>', Keyword, '#pop'),

185 ],

186 }

187

188 def analyse_text(text):

189 if not looks_like_xml(text) and \

190 ('<!ELEMENT' in text or '<!ATTLIST' in text or '<!ENTITY' in text):

191 return 0.8

192

193

194class XmlLexer(RegexLexer):

195 """

196 Generic lexer for XML (eXtensible Markup Language).

197 """

198

199 flags = re.MULTILINE | re.DOTALL

200

201 name = 'XML'

202 aliases = ['xml']

203 filenames = ['*.xml', '*.xsl', '*.rss', '*.xslt', '*.xsd',

204 '*.wsdl', '*.wsf']

205 mimetypes = ['text/xml', 'application/xml', 'image/svg+xml',

206 'application/rss+xml', 'application/atom+xml']

207

208 tokens = {

209 'root': [

210 (r'[^<&\s]+', Text),

211 (r'[^<&\S]+', Whitespace),

212 (r'&\S*?;', Name.Entity),

213 (r'\<\!\[CDATA\[.*?\]\]\>', Comment.Preproc),

214 (r'', Comment.Multiline),

215 (r'<\?.*?\?>', Comment.Preproc),

216 ('<![^>]*>', Comment.Preproc),

217 (r'<\s*[\w:.-]+', Name.Tag, 'tag'),

218 (r'<\s*/\s*[\w:.-]+\s*>', Name.Tag),

219 ],

220 'tag': [

221 (r'\s+', Whitespace),

222 (r'[\w.:-]+\s*=', Name.Attribute, 'attr'),

223 (r'/?\s*>', Name.Tag, '#pop'),

224 ],

225 'attr': [

226 (r'\s+', Whitespace),

227 ('".*?"', String, '#pop'),

228 ("'.*?'", String, '#pop'),

229 (r'[^\s>]+', String, '#pop'),

230 ],

231 }

232

233 def analyse_text(text):

234 if looks_like_xml(text):

235 return 0.45 # less than HTML

236

237

238class XsltLexer(XmlLexer):

239 """

240 A lexer for XSLT.

241

242 .. versionadded:: 0.10

243 """

244

245 name = 'XSLT'

246 aliases = ['xslt']

247 filenames = ['*.xsl', '*.xslt', '*.xpl'] # xpl is XProc

248 mimetypes = ['application/xsl+xml', 'application/xslt+xml']

249

250 EXTRA_KEYWORDS = {

251 'apply-imports', 'apply-templates', 'attribute',

252 'attribute-set', 'call-template', 'choose', 'comment',

253 'copy', 'copy-of', 'decimal-format', 'element', 'fallback',

254 'for-each', 'if', 'import', 'include', 'key', 'message',

255 'namespace-alias', 'number', 'otherwise', 'output', 'param',

256 'preserve-space', 'processing-instruction', 'sort',

257 'strip-space', 'stylesheet', 'template', 'text', 'transform',

258 'value-of', 'variable', 'when', 'with-param'

259 }

260

261 def get_tokens_unprocessed(self, text):

262 for index, token, value in XmlLexer.get_tokens_unprocessed(self, text):

263 m = re.match('</?xsl:([^>]*)/?>?', value)

264

265 if token is Name.Tag and m and m.group(1) in self.EXTRA_KEYWORDS:

266 yield index, Keyword, value

267 else:

268 yield index, token, value

269

270 def analyse_text(text):

271 if looks_like_xml(text) and '<xsl' in text:

272 return 0.8

273

274

275class HamlLexer(ExtendedRegexLexer):

276 """

277 For Haml markup.

278

279 .. versionadded:: 1.3

280 """

281

282 name = 'Haml'

283 aliases = ['haml']

284 filenames = ['*.haml']

285 mimetypes = ['text/x-haml']

286

287 flags = re.IGNORECASE

288 # Haml can include " |\n" anywhere,

289 # which is ignored and used to wrap long lines.

290 # To accommodate this, use this custom faux dot instead.

291 _dot = r'(?: \|\n(?=.* \|)|.)'

292

293 # In certain places, a comma at the end of the line

294 # allows line wrapping as well.

295 _comma_dot = r'(?:,\s*\n|' + _dot + ')'

296 tokens = {

297 'root': [

298 (r'[ \t]*\n', Text),

299 (r'[ \t]*', _indentation),

300 ],

301

302 'css': [

303 (r'\.[\w:-]+', Name.Class, 'tag'),

304 (r'\#[\w:-]+', Name.Function, 'tag'),

305 ],

306

307 'eval-or-plain': [

308 (r'[&!]?==', Punctuation, 'plain'),

309 (r'([&!]?[=~])(' + _comma_dot + r'*\n)',

310 bygroups(Punctuation, using(RubyLexer)),

311 'root'),

312 default('plain'),

313 ],

314

315 'content': [

316 include('css'),

317 (r'%[\w:-]+', Name.Tag, 'tag'),

318 (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'),

319 (r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)',

320 bygroups(Comment, Comment.Special, Comment),

321 '#pop'),

322 (r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'),

323 '#pop'),

324 (r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc,

325 'haml-comment-block'), '#pop'),

326 (r'(-)(' + _comma_dot + r'*\n)',

327 bygroups(Punctuation, using(RubyLexer)),

328 '#pop'),

329 (r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'),

330 '#pop'),

331 include('eval-or-plain'),

332 ],

333

334 'tag': [

335 include('css'),

336 (r'\{(,\n|' + _dot + r')*?\}', using(RubyLexer)),

337 (r'\[' + _dot + r'*?\]', using(RubyLexer)),

338 (r'\(', Text, 'html-attributes'),

339 (r'/[ \t]*\n', Punctuation, '#pop:2'),

340 (r'[<>]{1,2}(?=[ \t=])', Punctuation),

341 include('eval-or-plain'),

342 ],

343

344 'plain': [

345 (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text),

346 (r'(#\{)(' + _dot + r'*?)(\})',

347 bygroups(String.Interpol, using(RubyLexer), String.Interpol)),

348 (r'\n', Text, 'root'),

349 ],

350

351 'html-attributes': [

352 (r'\s+', Text),

353 (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'),

354 (r'[\w:-]+', Name.Attribute),

355 (r'\)', Text, '#pop'),

356 ],

357

358 'html-attribute-value': [

359 (r'[ \t]+', Text),

360 (r'\w+', Name.Variable, '#pop'),

361 (r'@\w+', Name.Variable.Instance, '#pop'),

362 (r'\$\w+', Name.Variable.Global, '#pop'),

363 (r"'(\\\\|\\[^\\]|[^'\\\n])*'", String, '#pop'),

364 (r'"(\\\\|\\[^\\]|[^"\\\n])*"', String, '#pop'),

365 ],

366

367 'html-comment-block': [

368 (_dot + '+', Comment),

369 (r'\n', Text, 'root'),

370 ],

371

372 'haml-comment-block': [

373 (_dot + '+', Comment.Preproc),

374 (r'\n', Text, 'root'),

375 ],

376

377 'filter-block': [

378 (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator),

379 (r'(#\{)(' + _dot + r'*?)(\})',

380 bygroups(String.Interpol, using(RubyLexer), String.Interpol)),

381 (r'\n', Text, 'root'),

382 ],

383 }

384

385

386class ScamlLexer(ExtendedRegexLexer):

387 """

388 For `Scaml markup <http://scalate.fusesource.org/>`_. Scaml is Haml for Scala.

389

390 .. versionadded:: 1.4

391 """

392

393 name = 'Scaml'

394 aliases = ['scaml']

395 filenames = ['*.scaml']

396 mimetypes = ['text/x-scaml']

397

398 flags = re.IGNORECASE

399 # Scaml does not yet support the " |\n" notation to

400 # wrap long lines. Once it does, use the custom faux

401 # dot instead.

402 # _dot = r'(?: \|\n(?=.* \|)|.)'

403 _dot = r'.'

404

405 tokens = {

406 'root': [

407 (r'[ \t]*\n', Text),

408 (r'[ \t]*', _indentation),

409 ],

410

411 'css': [

412 (r'\.[\w:-]+', Name.Class, 'tag'),

413 (r'\#[\w:-]+', Name.Function, 'tag'),

414 ],

415

416 'eval-or-plain': [

417 (r'[&!]?==', Punctuation, 'plain'),

418 (r'([&!]?[=~])(' + _dot + r'*\n)',

419 bygroups(Punctuation, using(ScalaLexer)),

420 'root'),

421 default('plain'),

422 ],

423

424 'content': [

425 include('css'),

426 (r'%[\w:-]+', Name.Tag, 'tag'),

427 (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'),

428 (r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)',

429 bygroups(Comment, Comment.Special, Comment),

430 '#pop'),

431 (r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'),

432 '#pop'),

433 (r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc,

434 'scaml-comment-block'), '#pop'),

435 (r'(-@\s*)(import)?(' + _dot + r'*\n)',

436 bygroups(Punctuation, Keyword, using(ScalaLexer)),

437 '#pop'),

438 (r'(-)(' + _dot + r'*\n)',

439 bygroups(Punctuation, using(ScalaLexer)),

440 '#pop'),

441 (r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'),

442 '#pop'),

443 include('eval-or-plain'),

444 ],

445

446 'tag': [

447 include('css'),

448 (r'\{(,\n|' + _dot + r')*?\}', using(ScalaLexer)),

449 (r'\[' + _dot + r'*?\]', using(ScalaLexer)),

450 (r'\(', Text, 'html-attributes'),

451 (r'/[ \t]*\n', Punctuation, '#pop:2'),

452 (r'[<>]{1,2}(?=[ \t=])', Punctuation),

453 include('eval-or-plain'),

454 ],

455

456 'plain': [

457 (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text),

458 (r'(#\{)(' + _dot + r'*?)(\})',

459 bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),

460 (r'\n', Text, 'root'),

461 ],

462

463 'html-attributes': [

464 (r'\s+', Text),

465 (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'),

466 (r'[\w:-]+', Name.Attribute),

467 (r'\)', Text, '#pop'),

468 ],

469

470 'html-attribute-value': [

471 (r'[ \t]+', Text),

472 (r'\w+', Name.Variable, '#pop'),

473 (r'@\w+', Name.Variable.Instance, '#pop'),

474 (r'\$\w+', Name.Variable.Global, '#pop'),

475 (r"'(\\\\|\\[^\\]|[^'\\\n])*'", String, '#pop'),

476 (r'"(\\\\|\\[^\\]|[^"\\\n])*"', String, '#pop'),

477 ],

478

479 'html-comment-block': [

480 (_dot + '+', Comment),

481 (r'\n', Text, 'root'),

482 ],

483

484 'scaml-comment-block': [

485 (_dot + '+', Comment.Preproc),

486 (r'\n', Text, 'root'),

487 ],

488

489 'filter-block': [

490 (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator),

491 (r'(#\{)(' + _dot + r'*?)(\})',

492 bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),

493 (r'\n', Text, 'root'),

494 ],

495 }

496

497

498class PugLexer(ExtendedRegexLexer):

499 """

500 For Pug markup.

501 Pug is a variant of Scaml, see:

502 http://scalate.fusesource.org/documentation/scaml-reference.html

503

504 .. versionadded:: 1.4

505 """

506

507 name = 'Pug'

508 aliases = ['pug', 'jade']

509 filenames = ['*.pug', '*.jade']

510 mimetypes = ['text/x-pug', 'text/x-jade']

511

512 flags = re.IGNORECASE

513 _dot = r'.'

514

515 tokens = {

516 'root': [

517 (r'[ \t]*\n', Text),

518 (r'[ \t]*', _indentation),

519 ],

520

521 'css': [

522 (r'\.[\w:-]+', Name.Class, 'tag'),

523 (r'\#[\w:-]+', Name.Function, 'tag'),

524 ],

525

526 'eval-or-plain': [

527 (r'[&!]?==', Punctuation, 'plain'),

528 (r'([&!]?[=~])(' + _dot + r'*\n)',

529 bygroups(Punctuation, using(ScalaLexer)), 'root'),

530 default('plain'),

531 ],

532

533 'content': [

534 include('css'),

535 (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'),

536 (r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)',

537 bygroups(Comment, Comment.Special, Comment),

538 '#pop'),

539 (r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'),

540 '#pop'),

541 (r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc,

542 'scaml-comment-block'), '#pop'),

543 (r'(-@\s*)(import)?(' + _dot + r'*\n)',

544 bygroups(Punctuation, Keyword, using(ScalaLexer)),

545 '#pop'),

546 (r'(-)(' + _dot + r'*\n)',

547 bygroups(Punctuation, using(ScalaLexer)),

548 '#pop'),

549 (r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'),

550 '#pop'),

551 (r'[\w:-]+', Name.Tag, 'tag'),

552 (r'\|', Text, 'eval-or-plain'),

553 ],

554

555 'tag': [

556 include('css'),

557 (r'\{(,\n|' + _dot + r')*?\}', using(ScalaLexer)),

558 (r'\[' + _dot + r'*?\]', using(ScalaLexer)),

559 (r'\(', Text, 'html-attributes'),

560 (r'/[ \t]*\n', Punctuation, '#pop:2'),

561 (r'[<>]{1,2}(?=[ \t=])', Punctuation),

562 include('eval-or-plain'),

563 ],

564

565 'plain': [

566 (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text),

567 (r'(#\{)(' + _dot + r'*?)(\})',

568 bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),

569 (r'\n', Text, 'root'),

570 ],

571

572 'html-attributes': [

573 (r'\s+', Text),

574 (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'),

575 (r'[\w:-]+', Name.Attribute),

576 (r'\)', Text, '#pop'),

577 ],

578

579 'html-attribute-value': [

580 (r'[ \t]+', Text),

581 (r'\w+', Name.Variable, '#pop'),

582 (r'@\w+', Name.Variable.Instance, '#pop'),

583 (r'\$\w+', Name.Variable.Global, '#pop'),

584 (r"'(\\\\|\\[^\\]|[^'\\\n])*'", String, '#pop'),

585 (r'"(\\\\|\\[^\\]|[^"\\\n])*"', String, '#pop'),

586 ],

587

588 'html-comment-block': [

589 (_dot + '+', Comment),

590 (r'\n', Text, 'root'),

591 ],

592

593 'scaml-comment-block': [

594 (_dot + '+', Comment.Preproc),

595 (r'\n', Text, 'root'),

596 ],

597

598 'filter-block': [

599 (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator),

600 (r'(#\{)(' + _dot + r'*?)(\})',

601 bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),

602 (r'\n', Text, 'root'),

603 ],

604 }

605JadeLexer = PugLexer # compat