Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/html.py: 89%
81 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
1"""
2 pygments.lexers.html
3 ~~~~~~~~~~~~~~~~~~~~
5 Lexers for HTML, XML and related markup.
7 :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
11import re
13from pygments.lexer import RegexLexer, ExtendedRegexLexer, include, bygroups, \
14 default, using
15from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
16 Punctuation, Whitespace
17from pygments.util import looks_like_xml, html_doctype_matches
19from pygments.lexers.javascript import JavascriptLexer
20from pygments.lexers.jvm import ScalaLexer
21from pygments.lexers.css import CssLexer, _indentation, _starts_block
22from pygments.lexers.ruby import RubyLexer
24__all__ = ['HtmlLexer', 'DtdLexer', 'XmlLexer', 'XsltLexer', 'HamlLexer',
25 'ScamlLexer', 'PugLexer']
28class HtmlLexer(RegexLexer):
29 """
30 For HTML 4 and XHTML 1 markup. Nested JavaScript and CSS is highlighted
31 by the appropriate lexer.
32 """
34 name = 'HTML'
35 url = 'https://html.spec.whatwg.org/'
36 aliases = ['html']
37 filenames = ['*.html', '*.htm', '*.xhtml', '*.xslt']
38 mimetypes = ['text/html', 'application/xhtml+xml']
40 flags = re.IGNORECASE | re.DOTALL
41 tokens = {
42 'root': [
43 ('[^<&]+', Text),
44 (r'&\S*?;', Name.Entity),
45 (r'\<\!\[CDATA\[.*?\]\]\>', Comment.Preproc),
46 (r'<!--.*?-->', Comment.Multiline),
47 (r'<\?.*?\?>', Comment.Preproc),
48 ('<![^>]*>', Comment.Preproc),
49 (r'(<)(\s*)(script)(\s*)',
50 bygroups(Punctuation, Text, Name.Tag, Text),
51 ('script-content', 'tag')),
52 (r'(<)(\s*)(style)(\s*)',
53 bygroups(Punctuation, Text, Name.Tag, Text),
54 ('style-content', 'tag')),
55 # note: this allows tag names not used in HTML like <x:with-dash>,
56 # this is to support yet-unknown template engines and the like
57 (r'(<)(\s*)([\w:.-]+)',
58 bygroups(Punctuation, Text, Name.Tag), 'tag'),
59 (r'(<)(\s*)(/)(\s*)([\w:.-]+)(\s*)(>)',
60 bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text,
61 Punctuation)),
62 ],
63 'tag': [
64 (r'\s+', Text),
65 (r'([\w:-]+\s*)(=)(\s*)', bygroups(Name.Attribute, Operator, Text),
66 'attr'),
67 (r'[\w:-]+', Name.Attribute),
68 (r'(/?)(\s*)(>)', bygroups(Punctuation, Text, Punctuation), '#pop'),
69 ],
70 'script-content': [
71 (r'(<)(\s*)(/)(\s*)(script)(\s*)(>)',
72 bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text,
73 Punctuation), '#pop'),
74 (r'.+?(?=<\s*/\s*script\s*>)', using(JavascriptLexer)),
75 # fallback cases for when there is no closing script tag
76 # first look for newline and then go back into root state
77 # if that fails just read the rest of the file
78 # this is similar to the error handling logic in lexer.py
79 (r'.+?\n', using(JavascriptLexer), '#pop'),
80 (r'.+', using(JavascriptLexer), '#pop'),
81 ],
82 'style-content': [
83 (r'(<)(\s*)(/)(\s*)(style)(\s*)(>)',
84 bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text,
85 Punctuation),'#pop'),
86 (r'.+?(?=<\s*/\s*style\s*>)', using(CssLexer)),
87 # fallback cases for when there is no closing style tag
88 # first look for newline and then go back into root state
89 # if that fails just read the rest of the file
90 # this is similar to the error handling logic in lexer.py
91 (r'.+?\n', using(CssLexer), '#pop'),
92 (r'.+', using(CssLexer), '#pop'),
93 ],
94 'attr': [
95 ('".*?"', String, '#pop'),
96 ("'.*?'", String, '#pop'),
97 (r'[^\s>]+', String, '#pop'),
98 ],
99 }
101 def analyse_text(text):
102 if html_doctype_matches(text):
103 return 0.5
106class DtdLexer(RegexLexer):
107 """
108 A lexer for DTDs (Document Type Definitions).
110 .. versionadded:: 1.5
111 """
113 flags = re.MULTILINE | re.DOTALL
115 name = 'DTD'
116 aliases = ['dtd']
117 filenames = ['*.dtd']
118 mimetypes = ['application/xml-dtd']
120 tokens = {
121 'root': [
122 include('common'),
124 (r'(<!ELEMENT)(\s+)(\S+)',
125 bygroups(Keyword, Text, Name.Tag), 'element'),
126 (r'(<!ATTLIST)(\s+)(\S+)',
127 bygroups(Keyword, Text, Name.Tag), 'attlist'),
128 (r'(<!ENTITY)(\s+)(\S+)',
129 bygroups(Keyword, Text, Name.Entity), 'entity'),
130 (r'(<!NOTATION)(\s+)(\S+)',
131 bygroups(Keyword, Text, Name.Tag), 'notation'),
132 (r'(<!\[)([^\[\s]+)(\s*)(\[)', # conditional sections
133 bygroups(Keyword, Name.Entity, Text, Keyword)),
135 (r'(<!DOCTYPE)(\s+)([^>\s]+)',
136 bygroups(Keyword, Text, Name.Tag)),
137 (r'PUBLIC|SYSTEM', Keyword.Constant),
138 (r'[\[\]>]', Keyword),
139 ],
141 'common': [
142 (r'\s+', Text),
143 (r'(%|&)[^;]*;', Name.Entity),
144 ('<!--', Comment, 'comment'),
145 (r'[(|)*,?+]', Operator),
146 (r'"[^"]*"', String.Double),
147 (r'\'[^\']*\'', String.Single),
148 ],
150 'comment': [
151 ('[^-]+', Comment),
152 ('-->', Comment, '#pop'),
153 ('-', Comment),
154 ],
156 'element': [
157 include('common'),
158 (r'EMPTY|ANY|#PCDATA', Keyword.Constant),
159 (r'[^>\s|()?+*,]+', Name.Tag),
160 (r'>', Keyword, '#pop'),
161 ],
163 'attlist': [
164 include('common'),
165 (r'CDATA|IDREFS|IDREF|ID|NMTOKENS|NMTOKEN|ENTITIES|ENTITY|NOTATION',
166 Keyword.Constant),
167 (r'#REQUIRED|#IMPLIED|#FIXED', Keyword.Constant),
168 (r'xml:space|xml:lang', Keyword.Reserved),
169 (r'[^>\s|()?+*,]+', Name.Attribute),
170 (r'>', Keyword, '#pop'),
171 ],
173 'entity': [
174 include('common'),
175 (r'SYSTEM|PUBLIC|NDATA', Keyword.Constant),
176 (r'[^>\s|()?+*,]+', Name.Entity),
177 (r'>', Keyword, '#pop'),
178 ],
180 'notation': [
181 include('common'),
182 (r'SYSTEM|PUBLIC', Keyword.Constant),
183 (r'[^>\s|()?+*,]+', Name.Attribute),
184 (r'>', Keyword, '#pop'),
185 ],
186 }
188 def analyse_text(text):
189 if not looks_like_xml(text) and \
190 ('<!ELEMENT' in text or '<!ATTLIST' in text or '<!ENTITY' in text):
191 return 0.8
194class XmlLexer(RegexLexer):
195 """
196 Generic lexer for XML (eXtensible Markup Language).
197 """
199 flags = re.MULTILINE | re.DOTALL
201 name = 'XML'
202 aliases = ['xml']
203 filenames = ['*.xml', '*.xsl', '*.rss', '*.xslt', '*.xsd',
204 '*.wsdl', '*.wsf']
205 mimetypes = ['text/xml', 'application/xml', 'image/svg+xml',
206 'application/rss+xml', 'application/atom+xml']
208 tokens = {
209 'root': [
210 (r'[^<&\s]+', Text),
211 (r'[^<&\S]+', Whitespace),
212 (r'&\S*?;', Name.Entity),
213 (r'\<\!\[CDATA\[.*?\]\]\>', Comment.Preproc),
214 (r'<!--.*?-->', Comment.Multiline),
215 (r'<\?.*?\?>', Comment.Preproc),
216 ('<![^>]*>', Comment.Preproc),
217 (r'<\s*[\w:.-]+', Name.Tag, 'tag'),
218 (r'<\s*/\s*[\w:.-]+\s*>', Name.Tag),
219 ],
220 'tag': [
221 (r'\s+', Whitespace),
222 (r'[\w.:-]+\s*=', Name.Attribute, 'attr'),
223 (r'/?\s*>', Name.Tag, '#pop'),
224 ],
225 'attr': [
226 (r'\s+', Whitespace),
227 ('".*?"', String, '#pop'),
228 ("'.*?'", String, '#pop'),
229 (r'[^\s>]+', String, '#pop'),
230 ],
231 }
233 def analyse_text(text):
234 if looks_like_xml(text):
235 return 0.45 # less than HTML
238class XsltLexer(XmlLexer):
239 """
240 A lexer for XSLT.
242 .. versionadded:: 0.10
243 """
245 name = 'XSLT'
246 aliases = ['xslt']
247 filenames = ['*.xsl', '*.xslt', '*.xpl'] # xpl is XProc
248 mimetypes = ['application/xsl+xml', 'application/xslt+xml']
250 EXTRA_KEYWORDS = {
251 'apply-imports', 'apply-templates', 'attribute',
252 'attribute-set', 'call-template', 'choose', 'comment',
253 'copy', 'copy-of', 'decimal-format', 'element', 'fallback',
254 'for-each', 'if', 'import', 'include', 'key', 'message',
255 'namespace-alias', 'number', 'otherwise', 'output', 'param',
256 'preserve-space', 'processing-instruction', 'sort',
257 'strip-space', 'stylesheet', 'template', 'text', 'transform',
258 'value-of', 'variable', 'when', 'with-param'
259 }
261 def get_tokens_unprocessed(self, text):
262 for index, token, value in XmlLexer.get_tokens_unprocessed(self, text):
263 m = re.match('</?xsl:([^>]*)/?>?', value)
265 if token is Name.Tag and m and m.group(1) in self.EXTRA_KEYWORDS:
266 yield index, Keyword, value
267 else:
268 yield index, token, value
270 def analyse_text(text):
271 if looks_like_xml(text) and '<xsl' in text:
272 return 0.8
275class HamlLexer(ExtendedRegexLexer):
276 """
277 For Haml markup.
279 .. versionadded:: 1.3
280 """
282 name = 'Haml'
283 aliases = ['haml']
284 filenames = ['*.haml']
285 mimetypes = ['text/x-haml']
287 flags = re.IGNORECASE
288 # Haml can include " |\n" anywhere,
289 # which is ignored and used to wrap long lines.
290 # To accommodate this, use this custom faux dot instead.
291 _dot = r'(?: \|\n(?=.* \|)|.)'
293 # In certain places, a comma at the end of the line
294 # allows line wrapping as well.
295 _comma_dot = r'(?:,\s*\n|' + _dot + ')'
296 tokens = {
297 'root': [
298 (r'[ \t]*\n', Text),
299 (r'[ \t]*', _indentation),
300 ],
302 'css': [
303 (r'\.[\w:-]+', Name.Class, 'tag'),
304 (r'\#[\w:-]+', Name.Function, 'tag'),
305 ],
307 'eval-or-plain': [
308 (r'[&!]?==', Punctuation, 'plain'),
309 (r'([&!]?[=~])(' + _comma_dot + r'*\n)',
310 bygroups(Punctuation, using(RubyLexer)),
311 'root'),
312 default('plain'),
313 ],
315 'content': [
316 include('css'),
317 (r'%[\w:-]+', Name.Tag, 'tag'),
318 (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'),
319 (r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)',
320 bygroups(Comment, Comment.Special, Comment),
321 '#pop'),
322 (r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'),
323 '#pop'),
324 (r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc,
325 'haml-comment-block'), '#pop'),
326 (r'(-)(' + _comma_dot + r'*\n)',
327 bygroups(Punctuation, using(RubyLexer)),
328 '#pop'),
329 (r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'),
330 '#pop'),
331 include('eval-or-plain'),
332 ],
334 'tag': [
335 include('css'),
336 (r'\{(,\n|' + _dot + r')*?\}', using(RubyLexer)),
337 (r'\[' + _dot + r'*?\]', using(RubyLexer)),
338 (r'\(', Text, 'html-attributes'),
339 (r'/[ \t]*\n', Punctuation, '#pop:2'),
340 (r'[<>]{1,2}(?=[ \t=])', Punctuation),
341 include('eval-or-plain'),
342 ],
344 'plain': [
345 (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text),
346 (r'(#\{)(' + _dot + r'*?)(\})',
347 bygroups(String.Interpol, using(RubyLexer), String.Interpol)),
348 (r'\n', Text, 'root'),
349 ],
351 'html-attributes': [
352 (r'\s+', Text),
353 (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'),
354 (r'[\w:-]+', Name.Attribute),
355 (r'\)', Text, '#pop'),
356 ],
358 'html-attribute-value': [
359 (r'[ \t]+', Text),
360 (r'\w+', Name.Variable, '#pop'),
361 (r'@\w+', Name.Variable.Instance, '#pop'),
362 (r'\$\w+', Name.Variable.Global, '#pop'),
363 (r"'(\\\\|\\[^\\]|[^'\\\n])*'", String, '#pop'),
364 (r'"(\\\\|\\[^\\]|[^"\\\n])*"', String, '#pop'),
365 ],
367 'html-comment-block': [
368 (_dot + '+', Comment),
369 (r'\n', Text, 'root'),
370 ],
372 'haml-comment-block': [
373 (_dot + '+', Comment.Preproc),
374 (r'\n', Text, 'root'),
375 ],
377 'filter-block': [
378 (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator),
379 (r'(#\{)(' + _dot + r'*?)(\})',
380 bygroups(String.Interpol, using(RubyLexer), String.Interpol)),
381 (r'\n', Text, 'root'),
382 ],
383 }
386class ScamlLexer(ExtendedRegexLexer):
387 """
388 For `Scaml markup <http://scalate.fusesource.org/>`_. Scaml is Haml for Scala.
390 .. versionadded:: 1.4
391 """
393 name = 'Scaml'
394 aliases = ['scaml']
395 filenames = ['*.scaml']
396 mimetypes = ['text/x-scaml']
398 flags = re.IGNORECASE
399 # Scaml does not yet support the " |\n" notation to
400 # wrap long lines. Once it does, use the custom faux
401 # dot instead.
402 # _dot = r'(?: \|\n(?=.* \|)|.)'
403 _dot = r'.'
405 tokens = {
406 'root': [
407 (r'[ \t]*\n', Text),
408 (r'[ \t]*', _indentation),
409 ],
411 'css': [
412 (r'\.[\w:-]+', Name.Class, 'tag'),
413 (r'\#[\w:-]+', Name.Function, 'tag'),
414 ],
416 'eval-or-plain': [
417 (r'[&!]?==', Punctuation, 'plain'),
418 (r'([&!]?[=~])(' + _dot + r'*\n)',
419 bygroups(Punctuation, using(ScalaLexer)),
420 'root'),
421 default('plain'),
422 ],
424 'content': [
425 include('css'),
426 (r'%[\w:-]+', Name.Tag, 'tag'),
427 (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'),
428 (r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)',
429 bygroups(Comment, Comment.Special, Comment),
430 '#pop'),
431 (r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'),
432 '#pop'),
433 (r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc,
434 'scaml-comment-block'), '#pop'),
435 (r'(-@\s*)(import)?(' + _dot + r'*\n)',
436 bygroups(Punctuation, Keyword, using(ScalaLexer)),
437 '#pop'),
438 (r'(-)(' + _dot + r'*\n)',
439 bygroups(Punctuation, using(ScalaLexer)),
440 '#pop'),
441 (r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'),
442 '#pop'),
443 include('eval-or-plain'),
444 ],
446 'tag': [
447 include('css'),
448 (r'\{(,\n|' + _dot + r')*?\}', using(ScalaLexer)),
449 (r'\[' + _dot + r'*?\]', using(ScalaLexer)),
450 (r'\(', Text, 'html-attributes'),
451 (r'/[ \t]*\n', Punctuation, '#pop:2'),
452 (r'[<>]{1,2}(?=[ \t=])', Punctuation),
453 include('eval-or-plain'),
454 ],
456 'plain': [
457 (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text),
458 (r'(#\{)(' + _dot + r'*?)(\})',
459 bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
460 (r'\n', Text, 'root'),
461 ],
463 'html-attributes': [
464 (r'\s+', Text),
465 (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'),
466 (r'[\w:-]+', Name.Attribute),
467 (r'\)', Text, '#pop'),
468 ],
470 'html-attribute-value': [
471 (r'[ \t]+', Text),
472 (r'\w+', Name.Variable, '#pop'),
473 (r'@\w+', Name.Variable.Instance, '#pop'),
474 (r'\$\w+', Name.Variable.Global, '#pop'),
475 (r"'(\\\\|\\[^\\]|[^'\\\n])*'", String, '#pop'),
476 (r'"(\\\\|\\[^\\]|[^"\\\n])*"', String, '#pop'),
477 ],
479 'html-comment-block': [
480 (_dot + '+', Comment),
481 (r'\n', Text, 'root'),
482 ],
484 'scaml-comment-block': [
485 (_dot + '+', Comment.Preproc),
486 (r'\n', Text, 'root'),
487 ],
489 'filter-block': [
490 (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator),
491 (r'(#\{)(' + _dot + r'*?)(\})',
492 bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
493 (r'\n', Text, 'root'),
494 ],
495 }
498class PugLexer(ExtendedRegexLexer):
499 """
500 For Pug markup.
501 Pug is a variant of Scaml, see:
502 http://scalate.fusesource.org/documentation/scaml-reference.html
504 .. versionadded:: 1.4
505 """
507 name = 'Pug'
508 aliases = ['pug', 'jade']
509 filenames = ['*.pug', '*.jade']
510 mimetypes = ['text/x-pug', 'text/x-jade']
512 flags = re.IGNORECASE
513 _dot = r'.'
515 tokens = {
516 'root': [
517 (r'[ \t]*\n', Text),
518 (r'[ \t]*', _indentation),
519 ],
521 'css': [
522 (r'\.[\w:-]+', Name.Class, 'tag'),
523 (r'\#[\w:-]+', Name.Function, 'tag'),
524 ],
526 'eval-or-plain': [
527 (r'[&!]?==', Punctuation, 'plain'),
528 (r'([&!]?[=~])(' + _dot + r'*\n)',
529 bygroups(Punctuation, using(ScalaLexer)), 'root'),
530 default('plain'),
531 ],
533 'content': [
534 include('css'),
535 (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'),
536 (r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)',
537 bygroups(Comment, Comment.Special, Comment),
538 '#pop'),
539 (r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'),
540 '#pop'),
541 (r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc,
542 'scaml-comment-block'), '#pop'),
543 (r'(-@\s*)(import)?(' + _dot + r'*\n)',
544 bygroups(Punctuation, Keyword, using(ScalaLexer)),
545 '#pop'),
546 (r'(-)(' + _dot + r'*\n)',
547 bygroups(Punctuation, using(ScalaLexer)),
548 '#pop'),
549 (r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'),
550 '#pop'),
551 (r'[\w:-]+', Name.Tag, 'tag'),
552 (r'\|', Text, 'eval-or-plain'),
553 ],
555 'tag': [
556 include('css'),
557 (r'\{(,\n|' + _dot + r')*?\}', using(ScalaLexer)),
558 (r'\[' + _dot + r'*?\]', using(ScalaLexer)),
559 (r'\(', Text, 'html-attributes'),
560 (r'/[ \t]*\n', Punctuation, '#pop:2'),
561 (r'[<>]{1,2}(?=[ \t=])', Punctuation),
562 include('eval-or-plain'),
563 ],
565 'plain': [
566 (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text),
567 (r'(#\{)(' + _dot + r'*?)(\})',
568 bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
569 (r'\n', Text, 'root'),
570 ],
572 'html-attributes': [
573 (r'\s+', Text),
574 (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'),
575 (r'[\w:-]+', Name.Attribute),
576 (r'\)', Text, '#pop'),
577 ],
579 'html-attribute-value': [
580 (r'[ \t]+', Text),
581 (r'\w+', Name.Variable, '#pop'),
582 (r'@\w+', Name.Variable.Instance, '#pop'),
583 (r'\$\w+', Name.Variable.Global, '#pop'),
584 (r"'(\\\\|\\[^\\]|[^'\\\n])*'", String, '#pop'),
585 (r'"(\\\\|\\[^\\]|[^"\\\n])*"', String, '#pop'),
586 ],
588 'html-comment-block': [
589 (_dot + '+', Comment),
590 (r'\n', Text, 'root'),
591 ],
593 'scaml-comment-block': [
594 (_dot + '+', Comment.Preproc),
595 (r'\n', Text, 'root'),
596 ],
598 'filter-block': [
599 (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator),
600 (r'(#\{)(' + _dot + r'*?)(\})',
601 bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
602 (r'\n', Text, 'root'),
603 ],
604 }
605JadeLexer = PugLexer # compat