1"""
2 pygments.lexers.markup
3 ~~~~~~~~~~~~~~~~~~~~~~
4
5 Lexers for non-HTML markup languages.
6
7 :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
10
11import re
12
13from pygments.lexers.html import XmlLexer
14from pygments.lexers.javascript import JavascriptLexer
15from pygments.lexers.css import CssLexer
16from pygments.lexers.lilypond import LilyPondLexer
17from pygments.lexers.data import JsonLexer
18
19from pygments.lexer import RegexLexer, DelegatingLexer, include, bygroups, \
20 using, this, do_insertions, default, words
21from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
22 Number, Punctuation, Generic, Other, Whitespace, Literal
23from pygments.util import get_bool_opt, ClassNotFound
24
25__all__ = ['BBCodeLexer', 'MoinWikiLexer', 'RstLexer', 'TexLexer', 'GroffLexer',
26 'MozPreprocHashLexer', 'MozPreprocPercentLexer',
27 'MozPreprocXulLexer', 'MozPreprocJavascriptLexer',
28 'MozPreprocCssLexer', 'MarkdownLexer', 'OrgLexer', 'TiddlyWiki5Lexer',
29 'WikitextLexer']
30
31
32class BBCodeLexer(RegexLexer):
33 """
34 A lexer that highlights BBCode(-like) syntax.
35 """
36
37 name = 'BBCode'
38 aliases = ['bbcode']
39 mimetypes = ['text/x-bbcode']
40 url = 'https://www.bbcode.org/'
41 version_added = '0.6'
42
43 tokens = {
44 'root': [
45 (r'[^[]+', Text),
46 # tag/end tag begin
47 (r'\[/?\w+', Keyword, 'tag'),
48 # stray bracket
49 (r'\[', Text),
50 ],
51 'tag': [
52 (r'\s+', Text),
53 # attribute with value
54 (r'(\w+)(=)("?[^\s"\]]+"?)',
55 bygroups(Name.Attribute, Operator, String)),
56 # tag argument (a la [color=green])
57 (r'(=)("?[^\s"\]]+"?)',
58 bygroups(Operator, String)),
59 # tag end
60 (r'\]', Keyword, '#pop'),
61 ],
62 }
63
64
65class MoinWikiLexer(RegexLexer):
66 """
67 For MoinMoin (and Trac) Wiki markup.
68 """
69
70 name = 'MoinMoin/Trac Wiki markup'
71 aliases = ['trac-wiki', 'moin']
72 filenames = []
73 mimetypes = ['text/x-trac-wiki']
74 url = 'https://moinmo.in'
75 version_added = '0.7'
76
77 flags = re.MULTILINE | re.IGNORECASE
78
79 tokens = {
80 'root': [
81 (r'^#.*$', Comment),
82 (r'(!)(\S+)', bygroups(Keyword, Text)), # Ignore-next
83 # Titles
84 (r'^(=+)([^=]+)(=+)(\s*#.+)?$',
85 bygroups(Generic.Heading, using(this), Generic.Heading, String)),
86 # Literal code blocks, with optional shebang
87 (r'(\{\{\{)(\n#!.+)?', bygroups(Name.Builtin, Name.Namespace), 'codeblock'),
88 (r'(\'\'\'?|\|\||`|__|~~|\^|,,|::)', Comment), # Formatting
89 # Lists
90 (r'^( +)([.*-])( )', bygroups(Text, Name.Builtin, Text)),
91 (r'^( +)([a-z]{1,5}\.)( )', bygroups(Text, Name.Builtin, Text)),
92 # Other Formatting
93 (r'\[\[\w+.*?\]\]', Keyword), # Macro
94 (r'(\[[^\s\]]+)(\s+[^\]]+?)?(\])',
95 bygroups(Keyword, String, Keyword)), # Link
96 (r'^----+$', Keyword), # Horizontal rules
97 (r'[^\n\'\[{!_~^,|]+', Text),
98 (r'\n', Text),
99 (r'.', Text),
100 ],
101 'codeblock': [
102 (r'\}\}\}', Name.Builtin, '#pop'),
103 # these blocks are allowed to be nested in Trac, but not MoinMoin
104 (r'\{\{\{', Text, '#push'),
105 (r'[^{}]+', Comment.Preproc), # slurp boring text
106 (r'.', Comment.Preproc), # allow loose { or }
107 ],
108 }
109
110
111class RstLexer(RegexLexer):
112 """
113 For reStructuredText markup.
114
115 Additional options accepted:
116
117 `handlecodeblocks`
118 Highlight the contents of ``.. sourcecode:: language``,
119 ``.. code:: language`` and ``.. code-block:: language``
120 directives with a lexer for the given language (default:
121 ``True``).
122
123 .. versionadded:: 0.8
124 """
125 name = 'reStructuredText'
126 url = 'https://docutils.sourceforge.io/rst.html'
127 aliases = ['restructuredtext', 'rst', 'rest']
128 filenames = ['*.rst', '*.rest']
129 mimetypes = ["text/x-rst", "text/prs.fallenstein.rst"]
130 version_added = '0.7'
131 flags = re.MULTILINE
132
133 def _handle_sourcecode(self, match):
134 from pygments.lexers import get_lexer_by_name
135
136 # section header
137 yield match.start(1), Punctuation, match.group(1)
138 yield match.start(2), Text, match.group(2)
139 yield match.start(3), Operator.Word, match.group(3)
140 yield match.start(4), Punctuation, match.group(4)
141 yield match.start(5), Text, match.group(5)
142 yield match.start(6), Keyword, match.group(6)
143 yield match.start(7), Text, match.group(7)
144
145 # lookup lexer if wanted and existing
146 lexer = None
147 if self.handlecodeblocks:
148 try:
149 lexer = get_lexer_by_name(match.group(6).strip())
150 except ClassNotFound:
151 pass
152 indention = match.group(8)
153 indention_size = len(indention)
154 code = (indention + match.group(9) + match.group(10) + match.group(11))
155
156 # no lexer for this language. handle it like it was a code block
157 if lexer is None:
158 yield match.start(8), String, code
159 return
160
161 # highlight the lines with the lexer.
162 ins = []
163 codelines = code.splitlines(True)
164 code = ''
165 for line in codelines:
166 if len(line) > indention_size:
167 ins.append((len(code), [(0, Text, line[:indention_size])]))
168 code += line[indention_size:]
169 else:
170 code += line
171 yield from do_insertions(ins, lexer.get_tokens_unprocessed(code))
172
173 # from docutils.parsers.rst.states
174 closers = '\'")]}>\u2019\u201d\xbb!?'
175 unicode_delimiters = '\u2010\u2011\u2012\u2013\u2014\u00a0'
176 end_string_suffix = (rf'((?=$)|(?=[-/:.,; \n\x00{re.escape(unicode_delimiters)}{re.escape(closers)}]))')
177
178 tokens = {
179 'root': [
180 # Heading with overline
181 (r'^(=+|-+|`+|:+|\.+|\'+|"+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)'
182 r'(.+)(\n)(\1)(\n)',
183 bygroups(Generic.Heading, Text, Generic.Heading,
184 Text, Generic.Heading, Text)),
185 # Plain heading
186 (r'^(\S.*)(\n)(={3,}|-{3,}|`{3,}|:{3,}|\.{3,}|\'{3,}|"{3,}|'
187 r'~{3,}|\^{3,}|_{3,}|\*{3,}|\+{3,}|#{3,})(\n)',
188 bygroups(Generic.Heading, Text, Generic.Heading, Text)),
189 # Bulleted lists
190 (r'^(\s*)([-*+])( .+\n(?:\1 .+\n)*)',
191 bygroups(Text, Number, using(this, state='inline'))),
192 # Numbered lists
193 (r'^(\s*)([0-9#ivxlcmIVXLCM]+\.)( .+\n(?:\1 .+\n)*)',
194 bygroups(Text, Number, using(this, state='inline'))),
195 (r'^(\s*)(\(?[0-9#ivxlcmIVXLCM]+\))( .+\n(?:\1 .+\n)*)',
196 bygroups(Text, Number, using(this, state='inline'))),
197 # Numbered, but keep words at BOL from becoming lists
198 (r'^(\s*)([A-Z]+\.)( .+\n(?:\1 .+\n)+)',
199 bygroups(Text, Number, using(this, state='inline'))),
200 (r'^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1 .+\n)+)',
201 bygroups(Text, Number, using(this, state='inline'))),
202 # Line blocks
203 (r'^(\s*)(\|)( .+\n(?:\| .+\n)*)',
204 bygroups(Text, Operator, using(this, state='inline'))),
205 # Sourcecode directives
206 (r'^( *\.\.)(\s*)((?:source)?code(?:-block)?)(::)([ \t]*)([^\n]+)'
207 r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*)?\n)+)',
208 _handle_sourcecode),
209 # A directive
210 (r'^( *\.\.)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))',
211 bygroups(Punctuation, Text, Operator.Word, Punctuation, Text,
212 using(this, state='inline'))),
213 # A reference target
214 (r'^( *\.\.)(\s*)(_(?:[^:\\]|\\.)+:)(.*?)$',
215 bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),
216 # A footnote/citation target
217 (r'^( *\.\.)(\s*)(\[.+\])(.*?)$',
218 bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),
219 # A substitution def
220 (r'^( *\.\.)(\s*)(\|.+\|)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))',
221 bygroups(Punctuation, Text, Name.Tag, Text, Operator.Word,
222 Punctuation, Text, using(this, state='inline'))),
223 # Comments
224 (r'^ *\.\..*(\n( +.*\n|\n)+)?', Comment),
225 # Field list marker
226 (r'^( *)(:(?:\\\\|\\:|[^:\n])+:(?=\s))([ \t]*)',
227 bygroups(Text, Name.Class, Text)),
228 # Definition list
229 (r'^(\S.*(?<!::)\n)((?:(?: +.*)\n)+)',
230 bygroups(using(this, state='inline'), using(this, state='inline'))),
231 # Code blocks
232 (r'(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*)?\n)+)',
233 bygroups(String.Escape, Text, String, String, Text, String)),
234 include('inline'),
235 ],
236 'inline': [
237 (r'\\.', Text), # escape
238 (r'``', String, 'literal'), # code
239 (r'(`.+?)(<.+?>)(`__?)', # reference with inline target
240 bygroups(String, String.Interpol, String)),
241 (r'`.+?`__?', String), # reference
242 (r'(`.+?`)(:[a-zA-Z0-9:-]+?:)?',
243 bygroups(Name.Variable, Name.Attribute)), # role
244 (r'(:[a-zA-Z0-9:-]+?:)(`.+?`)',
245 bygroups(Name.Attribute, Name.Variable)), # role (content first)
246 (r'\*\*.+?\*\*', Generic.Strong), # Strong emphasis
247 (r'\*.+?\*', Generic.Emph), # Emphasis
248 (r'\[.*?\]_', String), # Footnote or citation
249 (r'<.+?>', Name.Tag), # Hyperlink
250 (r'[^\\\n\[*`:]+', Text),
251 (r'.', Text),
252 ],
253 'literal': [
254 (r'[^`]+', String),
255 (r'``' + end_string_suffix, String, '#pop'),
256 (r'`', String),
257 ]
258 }
259
260 def __init__(self, **options):
261 self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
262 RegexLexer.__init__(self, **options)
263
264 def analyse_text(text):
265 if text[:2] == '..' and text[2:3] != '.':
266 return 0.3
267 p1 = text.find("\n")
268 p2 = text.find("\n", p1 + 1)
269 if (p2 > -1 and # has two lines
270 p1 * 2 + 1 == p2 and # they are the same length
271 text[p1+1] in '-=' and # the next line both starts and ends with
272 text[p1+1] == text[p2-1]): # ...a sufficiently high header
273 return 0.5
274
275
276class TexLexer(RegexLexer):
277 """
278 Lexer for the TeX and LaTeX typesetting languages.
279 """
280
281 name = 'TeX'
282 aliases = ['tex', 'latex']
283 filenames = ['*.tex', '*.aux', '*.toc']
284 mimetypes = ['text/x-tex', 'text/x-latex']
285 url = 'https://tug.org'
286 version_added = ''
287
288 tokens = {
289 'general': [
290 (r'%.*?\n', Comment),
291 (r'[{}]', Name.Builtin),
292 (r'[&_^]', Name.Builtin),
293 ],
294 'root': [
295 (r'\\\[', String.Backtick, 'displaymath'),
296 (r'\\\(', String, 'inlinemath'),
297 (r'\$\$', String.Backtick, 'displaymath'),
298 (r'\$', String, 'inlinemath'),
299 (r'\\([a-zA-Z@_:]+|\S?)', Keyword, 'command'),
300 (r'\\$', Keyword),
301 include('general'),
302 (r'[^\\$%&_^{}]+', Text),
303 ],
304 'math': [
305 (r'\\([a-zA-Z]+|\S?)', Name.Variable),
306 include('general'),
307 (r'[0-9]+', Number),
308 (r'[-=!+*/()\[\]]', Operator),
309 (r'[^=!+*/()\[\]\\$%&_^{}0-9-]+', Name.Builtin),
310 ],
311 'inlinemath': [
312 (r'\\\)', String, '#pop'),
313 (r'\$', String, '#pop'),
314 include('math'),
315 ],
316 'displaymath': [
317 (r'\\\]', String, '#pop'),
318 (r'\$\$', String, '#pop'),
319 (r'\$', Name.Builtin),
320 include('math'),
321 ],
322 'command': [
323 (r'\[.*?\]', Name.Attribute),
324 (r'\*', Keyword),
325 default('#pop'),
326 ],
327 }
328
329 def analyse_text(text):
330 for start in ("\\documentclass", "\\input", "\\documentstyle",
331 "\\relax"):
332 if text[:len(start)] == start:
333 return True
334
335
336class GroffLexer(RegexLexer):
337 """
338 Lexer for the (g)roff typesetting language, supporting groff
339 extensions. Mainly useful for highlighting manpage sources.
340 """
341
342 name = 'Groff'
343 aliases = ['groff', 'nroff', 'man']
344 filenames = ['*.[1-9]', '*.man', '*.1p', '*.3pm']
345 mimetypes = ['application/x-troff', 'text/troff']
346 url = 'https://www.gnu.org/software/groff'
347 version_added = '0.6'
348
349 tokens = {
350 'root': [
351 (r'(\.)(\w+)', bygroups(Text, Keyword), 'request'),
352 (r'\.', Punctuation, 'request'),
353 # Regular characters, slurp till we find a backslash or newline
354 (r'[^\\\n]+', Text, 'textline'),
355 default('textline'),
356 ],
357 'textline': [
358 include('escapes'),
359 (r'[^\\\n]+', Text),
360 (r'\n', Text, '#pop'),
361 ],
362 'escapes': [
363 # groff has many ways to write escapes.
364 (r'\\"[^\n]*', Comment),
365 (r'\\[fn]\w', String.Escape),
366 (r'\\\(.{2}', String.Escape),
367 (r'\\.\[.*\]', String.Escape),
368 (r'\\.', String.Escape),
369 (r'\\\n', Text, 'request'),
370 ],
371 'request': [
372 (r'\n', Text, '#pop'),
373 include('escapes'),
374 (r'"[^\n"]+"', String.Double),
375 (r'\d+', Number),
376 (r'\S+', String),
377 (r'\s+', Text),
378 ],
379 }
380
381 def analyse_text(text):
382 if text[:1] != '.':
383 return False
384 if text[:3] == '.\\"':
385 return True
386 if text[:4] == '.TH ':
387 return True
388 if text[1:3].isalnum() and text[3].isspace():
389 return 0.9
390
391
392class MozPreprocHashLexer(RegexLexer):
393 """
394 Lexer for Mozilla Preprocessor files (with '#' as the marker).
395
396 Other data is left untouched.
397 """
398 name = 'mozhashpreproc'
399 aliases = [name]
400 filenames = []
401 mimetypes = []
402 url = 'https://firefox-source-docs.mozilla.org/build/buildsystem/preprocessor.html'
403 version_added = '2.0'
404
405 tokens = {
406 'root': [
407 (r'^#', Comment.Preproc, ('expr', 'exprstart')),
408 (r'.+', Other),
409 ],
410 'exprstart': [
411 (r'(literal)(.*)', bygroups(Comment.Preproc, Text), '#pop:2'),
412 (words((
413 'define', 'undef', 'if', 'ifdef', 'ifndef', 'else', 'elif',
414 'elifdef', 'elifndef', 'endif', 'expand', 'filter', 'unfilter',
415 'include', 'includesubst', 'error')),
416 Comment.Preproc, '#pop'),
417 ],
418 'expr': [
419 (words(('!', '!=', '==', '&&', '||')), Operator),
420 (r'(defined)(\()', bygroups(Keyword, Punctuation)),
421 (r'\)', Punctuation),
422 (r'[0-9]+', Number.Decimal),
423 (r'__\w+?__', Name.Variable),
424 (r'@\w+?@', Name.Class),
425 (r'\w+', Name),
426 (r'\n', Text, '#pop'),
427 (r'\s+', Text),
428 (r'\S', Punctuation),
429 ],
430 }
431
432
433class MozPreprocPercentLexer(MozPreprocHashLexer):
434 """
435 Lexer for Mozilla Preprocessor files (with '%' as the marker).
436
437 Other data is left untouched.
438 """
439 name = 'mozpercentpreproc'
440 aliases = [name]
441 filenames = []
442 mimetypes = []
443 url = 'https://firefox-source-docs.mozilla.org/build/buildsystem/preprocessor.html'
444 version_added = '2.0'
445
446 tokens = {
447 'root': [
448 (r'^%', Comment.Preproc, ('expr', 'exprstart')),
449 (r'.+', Other),
450 ],
451 }
452
453
454class MozPreprocXulLexer(DelegatingLexer):
455 """
456 Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the
457 `XmlLexer`.
458 """
459 name = "XUL+mozpreproc"
460 aliases = ['xul+mozpreproc']
461 filenames = ['*.xul.in']
462 mimetypes = []
463 url = 'https://firefox-source-docs.mozilla.org/build/buildsystem/preprocessor.html'
464 version_added = '2.0'
465
466 def __init__(self, **options):
467 super().__init__(XmlLexer, MozPreprocHashLexer, **options)
468
469
470class MozPreprocJavascriptLexer(DelegatingLexer):
471 """
472 Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the
473 `JavascriptLexer`.
474 """
475 name = "Javascript+mozpreproc"
476 aliases = ['javascript+mozpreproc']
477 filenames = ['*.js.in']
478 mimetypes = []
479 url = 'https://firefox-source-docs.mozilla.org/build/buildsystem/preprocessor.html'
480 version_added = '2.0'
481
482 def __init__(self, **options):
483 super().__init__(JavascriptLexer, MozPreprocHashLexer, **options)
484
485
486class MozPreprocCssLexer(DelegatingLexer):
487 """
488 Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the
489 `CssLexer`.
490 """
491 name = "CSS+mozpreproc"
492 aliases = ['css+mozpreproc']
493 filenames = ['*.css.in']
494 mimetypes = []
495 url = 'https://firefox-source-docs.mozilla.org/build/buildsystem/preprocessor.html'
496 version_added = '2.0'
497
498 def __init__(self, **options):
499 super().__init__(CssLexer, MozPreprocPercentLexer, **options)
500
501
502class MarkdownLexer(RegexLexer):
503 """
504 For Markdown markup.
505 """
506 name = 'Markdown'
507 url = 'https://daringfireball.net/projects/markdown/'
508 aliases = ['markdown', 'md']
509 filenames = ['*.md', '*.markdown']
510 mimetypes = ["text/x-markdown"]
511 version_added = '2.2'
512 flags = re.MULTILINE
513
514 def _handle_codeblock(self, match):
515 from pygments.lexers import get_lexer_by_name
516
517 yield match.start('initial'), String.Backtick, match.group('initial')
518 yield match.start('lang'), String.Backtick, match.group('lang')
519 if match.group('afterlang') is not None:
520 yield match.start('whitespace'), Whitespace, match.group('whitespace')
521 yield match.start('extra'), Text, match.group('extra')
522 yield match.start('newline'), Whitespace, match.group('newline')
523
524 # lookup lexer if wanted and existing
525 lexer = None
526 if self.handlecodeblocks:
527 try:
528 lexer = get_lexer_by_name(match.group('lang').strip())
529 except ClassNotFound:
530 pass
531 code = match.group('code')
532 # no lexer for this language. handle it like it was a code block
533 if lexer is None:
534 yield match.start('code'), String, code
535 else:
536 # FIXME: aren't the offsets wrong?
537 yield from do_insertions([], lexer.get_tokens_unprocessed(code))
538
539 yield match.start('terminator'), String.Backtick, match.group('terminator')
540
541 tokens = {
542 'root': [
543 # heading with '#' prefix (atx-style)
544 (r'(^#[^#].+)(\n)', bygroups(Generic.Heading, Text)),
545 # subheading with '#' prefix (atx-style)
546 (r'(^#{2,6}[^#].+)(\n)', bygroups(Generic.Subheading, Text)),
547 # heading with '=' underlines (Setext-style)
548 (r'^(.+)(\n)(=+)(\n)', bygroups(Generic.Heading, Text, Generic.Heading, Text)),
549 # subheading with '-' underlines (Setext-style)
550 (r'^(.+)(\n)(-+)(\n)', bygroups(Generic.Subheading, Text, Generic.Subheading, Text)),
551 # task list
552 (r'^(\s*)([*-] )(\[[ xX]\])( .+\n)',
553 bygroups(Whitespace, Keyword, Keyword, using(this, state='inline'))),
554 # bulleted list
555 (r'^(\s*)([*-])(\s)(.+\n)',
556 bygroups(Whitespace, Keyword, Whitespace, using(this, state='inline'))),
557 # numbered list
558 (r'^(\s*)([0-9]+\.)( .+\n)',
559 bygroups(Whitespace, Keyword, using(this, state='inline'))),
560 # quote
561 (r'^(\s*>\s)(.+\n)', bygroups(Keyword, Generic.Emph)),
562 # code block fenced by 3 backticks
563 (r'^(\s*```\n[\w\W]*?^\s*```$\n)', String.Backtick),
564 # code block with language
565 # Some tools include extra stuff after the language name, just
566 # highlight that as text. For example: https://docs.enola.dev/use/execmd
567 (r'''(?x)
568 ^(?P<initial>\s*```)
569 (?P<lang>[\w\-]+)
570 (?P<afterlang>
571 (?P<whitespace>[^\S\n]+)
572 (?P<extra>.*))?
573 (?P<newline>\n)
574 (?P<code>(.|\n)*?)
575 (?P<terminator>^\s*```$\n)
576 ''',
577 _handle_codeblock),
578
579 include('inline'),
580 ],
581 'inline': [
582 # escape
583 (r'\\.', Text),
584 # inline code
585 (r'([^`]?)(`[^`\n]+`)', bygroups(Text, String.Backtick)),
586 # warning: the following rules eat outer tags.
587 # eg. **foo _bar_ baz** => foo and baz are not recognized as bold
588 # bold fenced by '**'
589 (r'([^\*]?)(\*\*[^* \n][^*\n]*\*\*)', bygroups(Text, Generic.Strong)),
590 # bold fenced by '__'
591 (r'([^_]?)(__[^_ \n][^_\n]*__)', bygroups(Text, Generic.Strong)),
592 # italics fenced by '*'
593 (r'([^\*]?)(\*[^* \n][^*\n]*\*)', bygroups(Text, Generic.Emph)),
594 # italics fenced by '_'
595 (r'([^_]?)(_[^_ \n][^_\n]*_)', bygroups(Text, Generic.Emph)),
596 # strikethrough
597 (r'([^~]?)(~~[^~ \n][^~\n]*~~)', bygroups(Text, Generic.Deleted)),
598 # mentions and topics (twitter and github stuff)
599 (r'[@#][\w/:]+', Name.Entity),
600 # (image?) links eg: 
601 (r'(!?\[)([^]]+)(\])(\()([^)]+)(\))',
602 bygroups(Text, Name.Tag, Text, Text, Name.Attribute, Text)),
603 # reference-style links, e.g.:
604 # [an example][id]
605 # [id]: http://example.com/
606 (r'(\[)([^]]+)(\])(\[)([^]]*)(\])',
607 bygroups(Text, Name.Tag, Text, Text, Name.Label, Text)),
608 (r'^(\s*\[)([^]]*)(\]:\s*)(.+)',
609 bygroups(Text, Name.Label, Text, Name.Attribute)),
610
611 # general text, must come last!
612 (r'[^\\\s]+', Text),
613 (r'.', Text),
614 ],
615 }
616
617 def __init__(self, **options):
618 self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
619 RegexLexer.__init__(self, **options)
620
621class OrgLexer(RegexLexer):
622 """
623 For Org Mode markup.
624 """
625 name = 'Org Mode'
626 url = 'https://orgmode.org'
627 aliases = ['org', 'orgmode', 'org-mode']
628 filenames = ['*.org']
629 mimetypes = ["text/org"]
630 version_added = '2.18'
631
632 def _inline(start, end):
633 return rf'(?<!\w){start}(.|\n(?!\n))+?{end}(?!\w)'
634
635 tokens = {
636 'root': [
637 (r'^# .*', Comment.Single),
638
639 # Headings
640 (r'^(\* )(COMMENT)( .*)',
641 bygroups(Generic.Heading, Comment.Preproc, Generic.Heading)),
642 (r'^(\*\*+ )(COMMENT)( .*)',
643 bygroups(Generic.Subheading, Comment.Preproc, Generic.Subheading)),
644 (r'^(\* )(DONE)( .*)',
645 bygroups(Generic.Heading, Generic.Deleted, Generic.Heading)),
646 (r'^(\*\*+ )(DONE)( .*)',
647 bygroups(Generic.Subheading, Generic.Deleted, Generic.Subheading)),
648 (r'^(\* )(TODO)( .*)',
649 bygroups(Generic.Heading, Generic.Error, Generic.Heading)),
650 (r'^(\*\*+ )(TODO)( .*)',
651 bygroups(Generic.Subheading, Generic.Error, Generic.Subheading)),
652
653 (r'^(\* .+?)( :[a-zA-Z0-9_@:]+:)?$', bygroups(Generic.Heading, Generic.Emph)),
654 (r'^(\*\*+ .+?)( :[a-zA-Z0-9_@:]+:)?$', bygroups(Generic.Subheading, Generic.Emph)),
655
656 # Unordered lists items, including TODO items and description items
657 (r'^(?:( *)([+-] )|( +)(\* ))(\[[ X-]\])?(.+ ::)?',
658 bygroups(Whitespace, Keyword, Whitespace, Keyword, Generic.Prompt, Name.Label)),
659
660 # Ordered list items
661 (r'^( *)([0-9]+[.)])( \[@[0-9]+\])?', bygroups(Whitespace, Keyword, Generic.Emph)),
662
663 # Dynamic blocks
664 (r'(?i)^( *#\+begin: *)((?:.|\n)*?)(^ *#\+end: *$)',
665 bygroups(Operator.Word, using(this), Operator.Word)),
666
667 # Comment blocks
668 (r'(?i)^( *#\+begin_comment *\n)((?:.|\n)*?)(^ *#\+end_comment *$)',
669 bygroups(Operator.Word, Comment.Multiline, Operator.Word)),
670
671 # Source code blocks
672 # TODO: language-dependent syntax highlighting (see Markdown lexer)
673 (r'(?i)^( *#\+begin_src .*)((?:.|\n)*?)(^ *#\+end_src *$)',
674 bygroups(Operator.Word, Text, Operator.Word)),
675
676 # Other blocks
677 (r'(?i)^( *#\+begin_\w+)( *\n)((?:.|\n)*?)(^ *#\+end_\w+)( *$)',
678 bygroups(Operator.Word, Whitespace, Text, Operator.Word, Whitespace)),
679
680 # Keywords
681 (r'^(#\+\w+:)(.*)$', bygroups(Name.Namespace, Text)),
682
683 # Properties and drawers
684 (r'(?i)^( *:\w+: *\n)((?:.|\n)*?)(^ *:end: *$)',
685 bygroups(Name.Decorator, Comment.Special, Name.Decorator)),
686
687 # Line break operator
688 (r'\\\\$', Operator),
689
690 # Deadline, Scheduled, CLOSED
691 (r'(?i)^( *(?:DEADLINE|SCHEDULED): )(<.+?> *)$',
692 bygroups(Generic.Error, Literal.Date)),
693 (r'(?i)^( *CLOSED: )(\[.+?\] *)$',
694 bygroups(Generic.Deleted, Literal.Date)),
695
696 # Bold
697 (_inline(r'\*', r'\*+'), Generic.Strong),
698 # Italic
699 (_inline(r'/', r'/'), Generic.Emph),
700 # Verbatim
701 (_inline(r'=', r'='), String), # TODO token
702 # Code
703 (_inline(r'~', r'~'), String),
704 # Strikethrough
705 (_inline(r'\+', r'\+'), Generic.Deleted),
706 # Underline
707 (_inline(r'_', r'_+'), Generic.EmphStrong),
708
709 # Dates
710 (r'<.+?>', Literal.Date),
711 # Macros
712 (r'\{\{\{.+?\}\}\}', Comment.Preproc),
713 # Footnotes
714 (r'(?<!\[)\[fn:.+?\]', Name.Tag),
715 # Links
716 (r'(?s)(\[\[)(.*?)(\]\[)(.*?)(\]\])',
717 bygroups(Punctuation, Name.Attribute, Punctuation, Name.Tag, Punctuation)),
718 (r'(?s)(\[\[)(.+?)(\]\])', bygroups(Punctuation, Name.Attribute, Punctuation)),
719 (r'(<<)(.+?)(>>)', bygroups(Punctuation, Name.Attribute, Punctuation)),
720
721 # Tables
722 (r'^( *)(\|[ -].*?[ -]\|)$', bygroups(Whitespace, String)),
723
724 # Any other text
725 (r'[^#*+\-0-9:\\/=~_<{\[|\n]+', Text),
726 (r'[#*+\-0-9:\\/=~_<{\[|\n]', Text),
727 ],
728 }
729
730class TiddlyWiki5Lexer(RegexLexer):
731 """
732 For TiddlyWiki5 markup.
733 """
734 name = 'tiddler'
735 url = 'https://tiddlywiki.com/#TiddlerFiles'
736 aliases = ['tid']
737 filenames = ['*.tid']
738 mimetypes = ["text/vnd.tiddlywiki"]
739 version_added = '2.7'
740 flags = re.MULTILINE
741
742 def _handle_codeblock(self, match):
743 """
744 match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks
745 """
746 from pygments.lexers import get_lexer_by_name
747
748 # section header
749 yield match.start(1), String, match.group(1)
750 yield match.start(2), String, match.group(2)
751 yield match.start(3), Text, match.group(3)
752
753 # lookup lexer if wanted and existing
754 lexer = None
755 if self.handlecodeblocks:
756 try:
757 lexer = get_lexer_by_name(match.group(2).strip())
758 except ClassNotFound:
759 pass
760 code = match.group(4)
761
762 # no lexer for this language. handle it like it was a code block
763 if lexer is None:
764 yield match.start(4), String, code
765 return
766
767 yield from do_insertions([], lexer.get_tokens_unprocessed(code))
768
769 yield match.start(5), String, match.group(5)
770
771 def _handle_cssblock(self, match):
772 """
773 match args: 1:style tag 2:newline, 3:code, 4:closing style tag
774 """
775 from pygments.lexers import get_lexer_by_name
776
777 # section header
778 yield match.start(1), String, match.group(1)
779 yield match.start(2), String, match.group(2)
780
781 lexer = None
782 if self.handlecodeblocks:
783 try:
784 lexer = get_lexer_by_name('css')
785 except ClassNotFound:
786 pass
787 code = match.group(3)
788
789 # no lexer for this language. handle it like it was a code block
790 if lexer is None:
791 yield match.start(3), String, code
792 return
793
794 yield from do_insertions([], lexer.get_tokens_unprocessed(code))
795
796 yield match.start(4), String, match.group(4)
797
798 tokens = {
799 'root': [
800 # title in metadata section
801 (r'^(title)(:\s)(.+\n)', bygroups(Keyword, Text, Generic.Heading)),
802 # headings
803 (r'^(!)([^!].+\n)', bygroups(Generic.Heading, Text)),
804 (r'^(!{2,6})(.+\n)', bygroups(Generic.Subheading, Text)),
805 # bulleted or numbered lists or single-line block quotes
806 # (can be mixed)
807 (r'^(\s*)([*#>]+)(\s*)(.+\n)',
808 bygroups(Text, Keyword, Text, using(this, state='inline'))),
809 # multi-line block quotes
810 (r'^(<<<.*\n)([\w\W]*?)(^<<<.*$)', bygroups(String, Text, String)),
811 # table header
812 (r'^(\|.*?\|h)$', bygroups(Generic.Strong)),
813 # table footer or caption
814 (r'^(\|.*?\|[cf])$', bygroups(Generic.Emph)),
815 # table class
816 (r'^(\|.*?\|k)$', bygroups(Name.Tag)),
817 # definitions
818 (r'^(;.*)$', bygroups(Generic.Strong)),
819 # text block
820 (r'^(```\n)([\w\W]*?)(^```$)', bygroups(String, Text, String)),
821 # code block with language
822 (r'^(```)(\w+)(\n)([\w\W]*?)(^```$)', _handle_codeblock),
823 # CSS style block
824 (r'^(<style>)(\n)([\w\W]*?)(^</style>$)', _handle_cssblock),
825
826 include('keywords'),
827 include('inline'),
828 ],
829 'keywords': [
830 (words((
831 '\\define', '\\end', 'caption', 'created', 'modified', 'tags',
832 'title', 'type'), prefix=r'^', suffix=r'\b'),
833 Keyword),
834 ],
835 'inline': [
836 # escape
837 (r'\\.', Text),
838 # created or modified date
839 (r'\d{17}', Number.Integer),
840 # italics
841 (r'(\s)(//[^/]+//)((?=\W|\n))',
842 bygroups(Text, Generic.Emph, Text)),
843 # superscript
844 (r'(\s)(\^\^[^\^]+\^\^)', bygroups(Text, Generic.Emph)),
845 # subscript
846 (r'(\s)(,,[^,]+,,)', bygroups(Text, Generic.Emph)),
847 # underscore
848 (r'(\s)(__[^_]+__)', bygroups(Text, Generic.Strong)),
849 # bold
850 (r"(\s)(''[^']+'')((?=\W|\n))",
851 bygroups(Text, Generic.Strong, Text)),
852 # strikethrough
853 (r'(\s)(~~[^~]+~~)((?=\W|\n))',
854 bygroups(Text, Generic.Deleted, Text)),
855 # TiddlyWiki variables
856 (r'<<[^>]+>>', Name.Tag),
857 (r'\$\$[^$]+\$\$', Name.Tag),
858 (r'\$\([^)]+\)\$', Name.Tag),
859 # TiddlyWiki style or class
860 (r'^@@.*$', Name.Tag),
861 # HTML tags
862 (r'</?[^>]+>', Name.Tag),
863 # inline code
864 (r'`[^`]+`', String.Backtick),
865 # HTML escaped symbols
866 (r'&\S*?;', String.Regex),
867 # Wiki links
868 (r'(\[{2})([^]\|]+)(\]{2})', bygroups(Text, Name.Tag, Text)),
869 # External links
870 (r'(\[{2})([^]\|]+)(\|)([^]\|]+)(\]{2})',
871 bygroups(Text, Name.Tag, Text, Name.Attribute, Text)),
872 # Transclusion
873 (r'(\{{2})([^}]+)(\}{2})', bygroups(Text, Name.Tag, Text)),
874 # URLs
875 (r'(\b.?.?tps?://[^\s"]+)', bygroups(Name.Attribute)),
876
877 # general text, must come last!
878 (r'[\w]+', Text),
879 (r'.', Text)
880 ],
881 }
882
883 def __init__(self, **options):
884 self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
885 RegexLexer.__init__(self, **options)
886
887
888class WikitextLexer(RegexLexer):
889 """
890 For MediaWiki Wikitext.
891
892 Parsing Wikitext is tricky, and results vary between different MediaWiki
893 installations, so we only highlight common syntaxes (built-in or from
894 popular extensions), and also assume templates produce no unbalanced
895 syntaxes.
896 """
897 name = 'Wikitext'
898 url = 'https://www.mediawiki.org/wiki/Wikitext'
899 aliases = ['wikitext', 'mediawiki']
900 filenames = []
901 mimetypes = ['text/x-wiki']
902 version_added = '2.15'
903 flags = re.MULTILINE
904
905 def nowiki_tag_rules(tag_name):
906 return [
907 (rf'(?i)(</)({tag_name})(\s*)(>)', bygroups(Punctuation,
908 Name.Tag, Whitespace, Punctuation), '#pop'),
909 include('entity'),
910 include('text'),
911 ]
912
913 def plaintext_tag_rules(tag_name):
914 return [
915 (rf'(?si)(.*?)(</)({tag_name})(\s*)(>)', bygroups(Text,
916 Punctuation, Name.Tag, Whitespace, Punctuation), '#pop'),
917 ]
918
919 def delegate_tag_rules(tag_name, lexer, **lexer_kwargs):
920 return [
921 (rf'(?i)(</)({tag_name})(\s*)(>)', bygroups(Punctuation,
922 Name.Tag, Whitespace, Punctuation), '#pop'),
923 (rf'(?si).+?(?=</{tag_name}\s*>)', using(lexer, **lexer_kwargs)),
924 ]
925
926 def text_rules(token):
927 return [
928 (r'\w+', token),
929 (r'[^\S\n]+', token),
930 (r'(?s).', token),
931 ]
932
933 def handle_syntaxhighlight(self, match, ctx):
934 from pygments.lexers import get_lexer_by_name
935
936 attr_content = match.group()
937 start = 0
938 index = 0
939 while True:
940 index = attr_content.find('>', start)
941 # Exclude comment end (-->)
942 if attr_content[index-2:index] != '--':
943 break
944 start = index + 1
945
946 if index == -1:
947 # No tag end
948 yield from self.get_tokens_unprocessed(attr_content, stack=['root', 'attr'])
949 return
950 attr = attr_content[:index]
951 yield from self.get_tokens_unprocessed(attr, stack=['root', 'attr'])
952 yield match.start(3) + index, Punctuation, '>'
953
954 lexer = None
955 content = attr_content[index+1:]
956 lang_match = re.findall(r'\blang=("|\'|)(\w+)(\1)', attr)
957
958 if len(lang_match) >= 1:
959 # Pick the last match in case of multiple matches
960 lang = lang_match[-1][1]
961 try:
962 lexer = get_lexer_by_name(lang)
963 except ClassNotFound:
964 pass
965
966 if lexer is None:
967 yield match.start() + index + 1, Text, content
968 else:
969 yield from lexer.get_tokens_unprocessed(content)
970
971 def handle_score(self, match, ctx):
972 attr_content = match.group()
973 start = 0
974 index = 0
975 while True:
976 index = attr_content.find('>', start)
977 # Exclude comment end (-->)
978 if attr_content[index-2:index] != '--':
979 break
980 start = index + 1
981
982 if index == -1:
983 # No tag end
984 yield from self.get_tokens_unprocessed(attr_content, stack=['root', 'attr'])
985 return
986 attr = attr_content[:index]
987 content = attr_content[index+1:]
988 yield from self.get_tokens_unprocessed(attr, stack=['root', 'attr'])
989 yield match.start(3) + index, Punctuation, '>'
990
991 lang_match = re.findall(r'\blang=("|\'|)(\w+)(\1)', attr)
992 # Pick the last match in case of multiple matches
993 lang = lang_match[-1][1] if len(lang_match) >= 1 else 'lilypond'
994
995 if lang == 'lilypond': # Case sensitive
996 yield from LilyPondLexer().get_tokens_unprocessed(content)
997 else: # ABC
998 # FIXME: Use ABC lexer in the future
999 yield match.start() + index + 1, Text, content
1000
1001 # a-z removed to prevent linter from complaining, REMEMBER to use (?i)
1002 title_char = r' %!"$&\'()*,\-./0-9:;=?@A-Z\\\^_`~+\u0080-\uFFFF'
1003 nbsp_char = r'(?:\t| |&\#0*160;|&\#[Xx]0*[Aa]0;|[ \xA0\u1680\u2000-\u200A\u202F\u205F\u3000])'
1004 link_address = r'(?:[0-9.]+|\[[0-9a-f:.]+\]|[^\x00-\x20"<>\[\]\x7F\xA0\u1680\u2000-\u200A\u202F\u205F\u3000\uFFFD])'
1005 link_char_class = r'[^\x00-\x20"<>\[\]\x7F\xA0\u1680\u2000-\u200A\u202F\u205F\u3000\uFFFD]'
1006 double_slashes_i = {
1007 '__FORCETOC__', '__NOCONTENTCONVERT__', '__NOCC__', '__NOEDITSECTION__', '__NOGALLERY__',
1008 '__NOTITLECONVERT__', '__NOTC__', '__NOTOC__', '__TOC__',
1009 }
1010 double_slashes = {
1011 '__EXPECTUNUSEDCATEGORY__', '__HIDDENCAT__', '__INDEX__', '__NEWSECTIONLINK__',
1012 '__NOINDEX__', '__NONEWSECTIONLINK__', '__STATICREDIRECT__', '__NOGLOBAL__',
1013 '__DISAMBIG__', '__EXPECTED_UNCONNECTED_PAGE__',
1014 }
1015 protocols = {
1016 'bitcoin:', 'ftp://', 'ftps://', 'geo:', 'git://', 'gopher://', 'http://', 'https://',
1017 'irc://', 'ircs://', 'magnet:', 'mailto:', 'mms://', 'news:', 'nntp://', 'redis://',
1018 'sftp://', 'sip:', 'sips:', 'sms:', 'ssh://', 'svn://', 'tel:', 'telnet://', 'urn:',
1019 'worldwind://', 'xmpp:', '//',
1020 }
1021 non_relative_protocols = protocols - {'//'}
1022 html_tags = {
1023 'abbr', 'b', 'bdi', 'bdo', 'big', 'blockquote', 'br', 'caption', 'center', 'cite', 'code',
1024 'data', 'dd', 'del', 'dfn', 'div', 'dl', 'dt', 'em', 'font', 'h1', 'h2', 'h3', 'h4', 'h5',
1025 'h6', 'hr', 'i', 'ins', 'kbd', 'li', 'link', 'mark', 'meta', 'ol', 'p', 'q', 'rb', 'rp',
1026 'rt', 'rtc', 'ruby', 's', 'samp', 'small', 'span', 'strike', 'strong', 'sub', 'sup',
1027 'table', 'td', 'th', 'time', 'tr', 'tt', 'u', 'ul', 'var', 'wbr',
1028 }
1029 parser_tags = {
1030 'graph', 'charinsert', 'rss', 'chem', 'categorytree', 'nowiki', 'inputbox', 'math',
1031 'hiero', 'score', 'pre', 'ref', 'translate', 'imagemap', 'templatestyles', 'languages',
1032 'noinclude', 'mapframe', 'section', 'poem', 'syntaxhighlight', 'includeonly', 'tvar',
1033 'onlyinclude', 'templatedata', 'langconvert', 'timeline', 'dynamicpagelist', 'gallery',
1034 'maplink', 'ce', 'references',
1035 }
1036 variant_langs = {
1037 # ZhConverter.php
1038 'zh', 'zh-hans', 'zh-hant', 'zh-cn', 'zh-hk', 'zh-mo', 'zh-my', 'zh-sg', 'zh-tw',
1039 # WuuConverter.php
1040 'wuu', 'wuu-hans', 'wuu-hant',
1041 # UzConverter.php
1042 'uz', 'uz-latn', 'uz-cyrl',
1043 # TlyConverter.php
1044 'tly', 'tly-cyrl',
1045 # TgConverter.php
1046 'tg', 'tg-latn',
1047 # SrConverter.php
1048 'sr', 'sr-ec', 'sr-el',
1049 # ShiConverter.php
1050 'shi', 'shi-tfng', 'shi-latn',
1051 # ShConverter.php
1052 'sh-latn', 'sh-cyrl',
1053 # KuConverter.php
1054 'ku', 'ku-arab', 'ku-latn',
1055 # IuConverter.php
1056 'iu', 'ike-cans', 'ike-latn',
1057 # GanConverter.php
1058 'gan', 'gan-hans', 'gan-hant',
1059 # EnConverter.php
1060 'en', 'en-x-piglatin',
1061 # CrhConverter.php
1062 'crh', 'crh-cyrl', 'crh-latn',
1063 # BanConverter.php
1064 'ban', 'ban-bali', 'ban-x-dharma', 'ban-x-palmleaf', 'ban-x-pku',
1065 }
1066 magic_vars_i = {
1067 'ARTICLEPATH', 'INT', 'PAGEID', 'SCRIPTPATH', 'SERVER', 'SERVERNAME', 'STYLEPATH',
1068 }
1069 magic_vars = {
1070 '!', '=', 'BASEPAGENAME', 'BASEPAGENAMEE', 'CASCADINGSOURCES', 'CONTENTLANGUAGE',
1071 'CONTENTLANG', 'CURRENTDAY', 'CURRENTDAY2', 'CURRENTDAYNAME', 'CURRENTDOW', 'CURRENTHOUR',
1072 'CURRENTMONTH', 'CURRENTMONTH2', 'CURRENTMONTH1', 'CURRENTMONTHABBREV', 'CURRENTMONTHNAME',
1073 'CURRENTMONTHNAMEGEN', 'CURRENTTIME', 'CURRENTTIMESTAMP', 'CURRENTVERSION', 'CURRENTWEEK',
1074 'CURRENTYEAR', 'DIRECTIONMARK', 'DIRMARK', 'FULLPAGENAME', 'FULLPAGENAMEE', 'LOCALDAY',
1075 'LOCALDAY2', 'LOCALDAYNAME', 'LOCALDOW', 'LOCALHOUR', 'LOCALMONTH', 'LOCALMONTH2',
1076 'LOCALMONTH1', 'LOCALMONTHABBREV', 'LOCALMONTHNAME', 'LOCALMONTHNAMEGEN', 'LOCALTIME',
1077 'LOCALTIMESTAMP', 'LOCALWEEK', 'LOCALYEAR', 'NAMESPACE', 'NAMESPACEE', 'NAMESPACENUMBER',
1078 'NUMBEROFACTIVEUSERS', 'NUMBEROFADMINS', 'NUMBEROFARTICLES', 'NUMBEROFEDITS',
1079 'NUMBEROFFILES', 'NUMBEROFPAGES', 'NUMBEROFUSERS', 'PAGELANGUAGE', 'PAGENAME', 'PAGENAMEE',
1080 'REVISIONDAY', 'REVISIONDAY2', 'REVISIONID', 'REVISIONMONTH', 'REVISIONMONTH1',
1081 'REVISIONSIZE', 'REVISIONTIMESTAMP', 'REVISIONUSER', 'REVISIONYEAR', 'ROOTPAGENAME',
1082 'ROOTPAGENAMEE', 'SITENAME', 'SUBJECTPAGENAME', 'ARTICLEPAGENAME', 'SUBJECTPAGENAMEE',
1083 'ARTICLEPAGENAMEE', 'SUBJECTSPACE', 'ARTICLESPACE', 'SUBJECTSPACEE', 'ARTICLESPACEE',
1084 'SUBPAGENAME', 'SUBPAGENAMEE', 'TALKPAGENAME', 'TALKPAGENAMEE', 'TALKSPACE', 'TALKSPACEE',
1085 }
1086 parser_functions_i = {
1087 'ANCHORENCODE', 'BIDI', 'CANONICALURL', 'CANONICALURLE', 'FILEPATH', 'FORMATNUM',
1088 'FULLURL', 'FULLURLE', 'GENDER', 'GRAMMAR', 'INT', r'\#LANGUAGE', 'LC', 'LCFIRST', 'LOCALURL',
1089 'LOCALURLE', 'NS', 'NSE', 'PADLEFT', 'PADRIGHT', 'PAGEID', 'PLURAL', 'UC', 'UCFIRST',
1090 'URLENCODE',
1091 }
1092 parser_functions = {
1093 'BASEPAGENAME', 'BASEPAGENAMEE', 'CASCADINGSOURCES', 'DEFAULTSORT', 'DEFAULTSORTKEY',
1094 'DEFAULTCATEGORYSORT', 'FULLPAGENAME', 'FULLPAGENAMEE', 'NAMESPACE', 'NAMESPACEE',
1095 'NAMESPACENUMBER', 'NUMBERINGROUP', 'NUMINGROUP', 'NUMBEROFACTIVEUSERS', 'NUMBEROFADMINS',
1096 'NUMBEROFARTICLES', 'NUMBEROFEDITS', 'NUMBEROFFILES', 'NUMBEROFPAGES', 'NUMBEROFUSERS',
1097 'PAGENAME', 'PAGENAMEE', 'PAGESINCATEGORY', 'PAGESINCAT', 'PAGESIZE', 'PROTECTIONEXPIRY',
1098 'PROTECTIONLEVEL', 'REVISIONDAY', 'REVISIONDAY2', 'REVISIONID', 'REVISIONMONTH',
1099 'REVISIONMONTH1', 'REVISIONTIMESTAMP', 'REVISIONUSER', 'REVISIONYEAR', 'ROOTPAGENAME',
1100 'ROOTPAGENAMEE', 'SUBJECTPAGENAME', 'ARTICLEPAGENAME', 'SUBJECTPAGENAMEE',
1101 'ARTICLEPAGENAMEE', 'SUBJECTSPACE', 'ARTICLESPACE', 'SUBJECTSPACEE', 'ARTICLESPACEE',
1102 'SUBPAGENAME', 'SUBPAGENAMEE', 'TALKPAGENAME', 'TALKPAGENAMEE', 'TALKSPACE', 'TALKSPACEE',
1103 'INT', 'DISPLAYTITLE', 'PAGESINNAMESPACE', 'PAGESINNS',
1104 }
1105
1106 tokens = {
1107 'root': [
1108 # Redirects
1109 (r"""(?xi)
1110 (\A\s*?)(\#REDIRECT:?) # may contain a colon
1111 (\s+)(\[\[) (?=[^\]\n]* \]\]$)
1112 """,
1113 bygroups(Whitespace, Keyword, Whitespace, Punctuation), 'redirect-inner'),
1114 # Subheadings
1115 (r'^(={2,6})(.+?)(\1)(\s*$\n)',
1116 bygroups(Generic.Subheading, Generic.Subheading, Generic.Subheading, Whitespace)),
1117 # Headings
1118 (r'^(=.+?=)(\s*$\n)',
1119 bygroups(Generic.Heading, Whitespace)),
1120 # Double-slashed magic words
1121 (words(double_slashes_i, prefix=r'(?i)'), Name.Function.Magic),
1122 (words(double_slashes), Name.Function.Magic),
1123 # Raw URLs
1124 (r'(?i)\b(?:{}){}{}*'.format('|'.join(protocols),
1125 link_address, link_char_class), Name.Label),
1126 # Magic links
1127 (rf'\b(?:RFC|PMID){nbsp_char}+[0-9]+\b',
1128 Name.Function.Magic),
1129 (r"""(?x)
1130 \bISBN {nbsp_char}
1131 (?: 97[89] {nbsp_dash}? )?
1132 (?: [0-9] {nbsp_dash}? ){{9}} # escape format()
1133 [0-9Xx]\b
1134 """.format(nbsp_char=nbsp_char, nbsp_dash=f'(?:-|{nbsp_char})'), Name.Function.Magic),
1135 include('list'),
1136 include('inline'),
1137 include('text'),
1138 ],
1139 'redirect-inner': [
1140 (r'(\]\])(\s*?\n)', bygroups(Punctuation, Whitespace), '#pop'),
1141 (r'(\#)([^#]*?)', bygroups(Punctuation, Name.Label)),
1142 (rf'(?i)[{title_char}]+', Name.Tag),
1143 ],
1144 'list': [
1145 # Description lists
1146 (r'^;', Keyword, 'dt'),
1147 # Ordered lists, unordered lists and indents
1148 (r'^[#:*]+', Keyword),
1149 # Horizontal rules
1150 (r'^-{4,}', Keyword),
1151 ],
1152 'inline': [
1153 # Signatures
1154 (r'~{3,5}', Keyword),
1155 # Entities
1156 include('entity'),
1157 # Bold & italic
1158 (r"('')(''')(?!')", bygroups(Generic.Emph,
1159 Generic.EmphStrong), 'inline-italic-bold'),
1160 (r"'''(?!')", Generic.Strong, 'inline-bold'),
1161 (r"''(?!')", Generic.Emph, 'inline-italic'),
1162 # Comments & parameters & templates
1163 include('replaceable'),
1164 # Media links
1165 (
1166 r"""(?xi)
1167 (\[\[)
1168 (File|Image) (:)
1169 ((?: [{}] | \{{{{2,3}}[^{{}}]*?\}}{{2,3}} | <!--[\s\S]*?--> )*)
1170 (?: (\#) ([{}]*?) )?
1171 """.format(title_char, f'{title_char}#'),
1172 bygroups(Punctuation, Name.Namespace, Punctuation,
1173 using(this, state=['wikilink-name']), Punctuation, Name.Label),
1174 'medialink-inner'
1175 ),
1176 # Wikilinks
1177 (
1178 r"""(?xi)
1179 (\[\[)(?!{}) # Should not contain URLs
1180 (?: ([{}]*) (:))?
1181 ((?: [{}] | \{{{{2,3}}[^{{}}]*?\}}{{2,3}} | <!--[\s\S]*?--> )*?)
1182 (?: (\#) ([{}]*?) )?
1183 (\]\])
1184 """.format('|'.join(protocols), title_char.replace('/', ''),
1185 title_char, f'{title_char}#'),
1186 bygroups(Punctuation, Name.Namespace, Punctuation,
1187 using(this, state=['wikilink-name']), Punctuation, Name.Label, Punctuation)
1188 ),
1189 (
1190 r"""(?xi)
1191 (\[\[)(?!{})
1192 (?: ([{}]*) (:))?
1193 ((?: [{}] | \{{{{2,3}}[^{{}}]*?\}}{{2,3}} | <!--[\s\S]*?--> )*?)
1194 (?: (\#) ([{}]*?) )?
1195 (\|)
1196 """.format('|'.join(protocols), title_char.replace('/', ''),
1197 title_char, f'{title_char}#'),
1198 bygroups(Punctuation, Name.Namespace, Punctuation,
1199 using(this, state=['wikilink-name']), Punctuation, Name.Label, Punctuation),
1200 'wikilink-inner'
1201 ),
1202 # External links
1203 (
1204 r"""(?xi)
1205 (\[)
1206 ((?:{}) {} {}*)
1207 (\s*)
1208 """.format('|'.join(protocols), link_address, link_char_class),
1209 bygroups(Punctuation, Name.Label, Whitespace),
1210 'extlink-inner'
1211 ),
1212 # Tables
1213 (r'^(:*)(\s*?)(\{\|)([^\n]*)$', bygroups(Keyword,
1214 Whitespace, Punctuation, using(this, state=['root', 'attr'])), 'table'),
1215 # HTML tags
1216 (r'(?i)(<)({})\b'.format('|'.join(html_tags)),
1217 bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'),
1218 (r'(?i)(</)({})\b(\s*)(>)'.format('|'.join(html_tags)),
1219 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),
1220 # <nowiki>
1221 (r'(?i)(<)(nowiki)\b', bygroups(Punctuation,
1222 Name.Tag), ('tag-nowiki', 'tag-inner')),
1223 # <pre>
1224 (r'(?i)(<)(pre)\b', bygroups(Punctuation,
1225 Name.Tag), ('tag-pre', 'tag-inner')),
1226 # <categorytree>
1227 (r'(?i)(<)(categorytree)\b', bygroups(
1228 Punctuation, Name.Tag), ('tag-categorytree', 'tag-inner')),
1229 # <hiero>
1230 (r'(?i)(<)(hiero)\b', bygroups(Punctuation,
1231 Name.Tag), ('tag-hiero', 'tag-inner')),
1232 # <math>
1233 (r'(?i)(<)(math)\b', bygroups(Punctuation,
1234 Name.Tag), ('tag-math', 'tag-inner')),
1235 # <chem>
1236 (r'(?i)(<)(chem)\b', bygroups(Punctuation,
1237 Name.Tag), ('tag-chem', 'tag-inner')),
1238 # <ce>
1239 (r'(?i)(<)(ce)\b', bygroups(Punctuation,
1240 Name.Tag), ('tag-ce', 'tag-inner')),
1241 # <charinsert>
1242 (r'(?i)(<)(charinsert)\b', bygroups(
1243 Punctuation, Name.Tag), ('tag-charinsert', 'tag-inner')),
1244 # <templatedata>
1245 (r'(?i)(<)(templatedata)\b', bygroups(
1246 Punctuation, Name.Tag), ('tag-templatedata', 'tag-inner')),
1247 # <gallery>
1248 (r'(?i)(<)(gallery)\b', bygroups(
1249 Punctuation, Name.Tag), ('tag-gallery', 'tag-inner')),
1250 # <graph>
1251 (r'(?i)(<)(gallery)\b', bygroups(
1252 Punctuation, Name.Tag), ('tag-graph', 'tag-inner')),
1253 # <dynamicpagelist>
1254 (r'(?i)(<)(dynamicpagelist)\b', bygroups(
1255 Punctuation, Name.Tag), ('tag-dynamicpagelist', 'tag-inner')),
1256 # <inputbox>
1257 (r'(?i)(<)(inputbox)\b', bygroups(
1258 Punctuation, Name.Tag), ('tag-inputbox', 'tag-inner')),
1259 # <rss>
1260 (r'(?i)(<)(rss)\b', bygroups(
1261 Punctuation, Name.Tag), ('tag-rss', 'tag-inner')),
1262 # <imagemap>
1263 (r'(?i)(<)(imagemap)\b', bygroups(
1264 Punctuation, Name.Tag), ('tag-imagemap', 'tag-inner')),
1265 # <syntaxhighlight>
1266 (r'(?i)(</)(syntaxhighlight)\b(\s*)(>)',
1267 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),
1268 (r'(?si)(<)(syntaxhighlight)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)',
1269 bygroups(Punctuation, Name.Tag, handle_syntaxhighlight)),
1270 # <syntaxhighlight>: Fallback case for self-closing tags
1271 (r'(?i)(<)(syntaxhighlight)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups(
1272 Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)),
1273 # <source>
1274 (r'(?i)(</)(source)\b(\s*)(>)',
1275 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),
1276 (r'(?si)(<)(source)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)',
1277 bygroups(Punctuation, Name.Tag, handle_syntaxhighlight)),
1278 # <source>: Fallback case for self-closing tags
1279 (r'(?i)(<)(source)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups(
1280 Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)),
1281 # <score>
1282 (r'(?i)(</)(score)\b(\s*)(>)',
1283 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),
1284 (r'(?si)(<)(score)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)',
1285 bygroups(Punctuation, Name.Tag, handle_score)),
1286 # <score>: Fallback case for self-closing tags
1287 (r'(?i)(<)(score)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups(
1288 Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)),
1289 # Other parser tags
1290 (r'(?i)(<)({})\b'.format('|'.join(parser_tags)),
1291 bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'),
1292 (r'(?i)(</)({})\b(\s*)(>)'.format('|'.join(parser_tags)),
1293 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),
1294 # LanguageConverter markups
1295 (
1296 r"""(?xi)
1297 (-\{{) # Use {{ to escape format()
1298 ([^|]) (\|)
1299 (?:
1300 (?: ([^;]*?) (=>))?
1301 (\s* (?:{variants}) \s*) (:)
1302 )?
1303 """.format(variants='|'.join(variant_langs)),
1304 bygroups(Punctuation, Keyword, Punctuation,
1305 using(this, state=['root', 'lc-raw']),
1306 Operator, Name.Label, Punctuation),
1307 'lc-inner'
1308 ),
1309 # LanguageConverter markups: composite conversion grammar
1310 (
1311 r"""(?xi)
1312 (-\{)
1313 ([a-z\s;-]*?) (\|)
1314 """,
1315 bygroups(Punctuation,
1316 using(this, state=['root', 'lc-flag']),
1317 Punctuation),
1318 'lc-raw'
1319 ),
1320 # LanguageConverter markups: fallbacks
1321 (
1322 r"""(?xi)
1323 (-\{{) (?!\{{) # Use {{ to escape format()
1324 (?: (\s* (?:{variants}) \s*) (:))?
1325 """.format(variants='|'.join(variant_langs)),
1326 bygroups(Punctuation, Name.Label, Punctuation),
1327 'lc-inner'
1328 ),
1329 ],
1330 'wikilink-name': [
1331 include('replaceable'),
1332 (r'[^{<]+', Name.Tag),
1333 (r'(?s).', Name.Tag),
1334 ],
1335 'wikilink-inner': [
1336 # Quit in case of another wikilink
1337 (r'(?=\[\[)', Punctuation, '#pop'),
1338 (r'\]\]', Punctuation, '#pop'),
1339 include('inline'),
1340 include('text'),
1341 ],
1342 'medialink-inner': [
1343 (r'\]\]', Punctuation, '#pop'),
1344 (r'(\|)([^\n=|]*)(=)',
1345 bygroups(Punctuation, Name.Attribute, Operator)),
1346 (r'\|', Punctuation),
1347 include('inline'),
1348 include('text'),
1349 ],
1350 'quote-common': [
1351 # Quit in case of link/template endings
1352 (r'(?=\]\]|\{\{|\}\})', Punctuation, '#pop'),
1353 (r'\n', Text, '#pop'),
1354 ],
1355 'inline-italic': [
1356 include('quote-common'),
1357 (r"('')(''')(?!')", bygroups(Generic.Emph,
1358 Generic.Strong), ('#pop', 'inline-bold')),
1359 (r"'''(?!')", Generic.EmphStrong, ('#pop', 'inline-italic-bold')),
1360 (r"''(?!')", Generic.Emph, '#pop'),
1361 include('inline'),
1362 include('text-italic'),
1363 ],
1364 'inline-bold': [
1365 include('quote-common'),
1366 (r"(''')('')(?!')", bygroups(
1367 Generic.Strong, Generic.Emph), ('#pop', 'inline-italic')),
1368 (r"'''(?!')", Generic.Strong, '#pop'),
1369 (r"''(?!')", Generic.EmphStrong, ('#pop', 'inline-bold-italic')),
1370 include('inline'),
1371 include('text-bold'),
1372 ],
1373 'inline-bold-italic': [
1374 include('quote-common'),
1375 (r"('')(''')(?!')", bygroups(Generic.EmphStrong,
1376 Generic.Strong), '#pop'),
1377 (r"'''(?!')", Generic.EmphStrong, ('#pop', 'inline-italic')),
1378 (r"''(?!')", Generic.EmphStrong, ('#pop', 'inline-bold')),
1379 include('inline'),
1380 include('text-bold-italic'),
1381 ],
1382 'inline-italic-bold': [
1383 include('quote-common'),
1384 (r"(''')('')(?!')", bygroups(
1385 Generic.EmphStrong, Generic.Emph), '#pop'),
1386 (r"'''(?!')", Generic.EmphStrong, ('#pop', 'inline-italic')),
1387 (r"''(?!')", Generic.EmphStrong, ('#pop', 'inline-bold')),
1388 include('inline'),
1389 include('text-bold-italic'),
1390 ],
1391 'lc-flag': [
1392 (r'\s+', Whitespace),
1393 (r';', Punctuation),
1394 *text_rules(Keyword),
1395 ],
1396 'lc-inner': [
1397 (
1398 r"""(?xi)
1399 (;)
1400 (?: ([^;]*?) (=>))?
1401 (\s* (?:{variants}) \s*) (:)
1402 """.format(variants='|'.join(variant_langs)),
1403 bygroups(Punctuation, using(this, state=['root', 'lc-raw']),
1404 Operator, Name.Label, Punctuation)
1405 ),
1406 (r';?\s*?\}-', Punctuation, '#pop'),
1407 include('inline'),
1408 include('text'),
1409 ],
1410 'lc-raw': [
1411 (r'\}-', Punctuation, '#pop'),
1412 include('inline'),
1413 include('text'),
1414 ],
1415 'replaceable': [
1416 # Comments
1417 (r'<!--[\s\S]*?(?:-->|\Z)', Comment.Multiline),
1418 # Parameters
1419 (
1420 r"""(?x)
1421 (\{{3})
1422 ([^|]*?)
1423 (?=\}{3}|\|)
1424 """,
1425 bygroups(Punctuation, Name.Variable),
1426 'parameter-inner',
1427 ),
1428 # Magic variables
1429 (r'(?i)(\{{\{{)(\s*)({})(\s*)(\}}\}})'.format('|'.join(magic_vars_i)),
1430 bygroups(Punctuation, Whitespace, Name.Function, Whitespace, Punctuation)),
1431 (r'(\{{\{{)(\s*)({})(\s*)(\}}\}})'.format('|'.join(magic_vars)),
1432 bygroups(Punctuation, Whitespace, Name.Function, Whitespace, Punctuation)),
1433 # Parser functions & templates
1434 (r'\{\{', Punctuation, 'template-begin-space'),
1435 # <tvar> legacy syntax
1436 (r'(?i)(<)(tvar)\b(\|)([^>]*?)(>)', bygroups(Punctuation,
1437 Name.Tag, Punctuation, String, Punctuation)),
1438 (r'</>', Punctuation, '#pop'),
1439 # <tvar>
1440 (r'(?i)(<)(tvar)\b', bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'),
1441 (r'(?i)(</)(tvar)\b(\s*)(>)',
1442 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),
1443 ],
1444 'parameter-inner': [
1445 (r'\}{3}', Punctuation, '#pop'),
1446 (r'\|', Punctuation),
1447 include('inline'),
1448 include('text'),
1449 ],
1450 'template-begin-space': [
1451 # Templates allow line breaks at the beginning, and due to how MediaWiki handles
1452 # comments, an extra state is required to handle things like {{\n<!---->\n name}}
1453 (r'<!--[\s\S]*?(?:-->|\Z)', Comment.Multiline),
1454 (r'\s+', Whitespace),
1455 # Parser functions
1456 (
1457 r'(?i)(\#[{}]*?|{})(:)'.format(title_char,
1458 '|'.join(parser_functions_i)),
1459 bygroups(Name.Function, Punctuation), ('#pop', 'template-inner')
1460 ),
1461 (
1462 r'({})(:)'.format('|'.join(parser_functions)),
1463 bygroups(Name.Function, Punctuation), ('#pop', 'template-inner')
1464 ),
1465 # Templates
1466 (
1467 rf'(?i)([{title_char}]*?)(:)',
1468 bygroups(Name.Namespace, Punctuation), ('#pop', 'template-name')
1469 ),
1470 default(('#pop', 'template-name'),),
1471 ],
1472 'template-name': [
1473 (r'(\s*?)(\|)', bygroups(Text, Punctuation), ('#pop', 'template-inner')),
1474 (r'\}\}', Punctuation, '#pop'),
1475 (r'\n', Text, '#pop'),
1476 include('replaceable'),
1477 *text_rules(Name.Tag),
1478 ],
1479 'template-inner': [
1480 (r'\}\}', Punctuation, '#pop'),
1481 (r'\|', Punctuation),
1482 (
1483 r"""(?x)
1484 (?<=\|)
1485 ( (?: (?! \{\{ | \}\} )[^=\|<])*? ) # Exclude templates and tags
1486 (=)
1487 """,
1488 bygroups(Name.Label, Operator)
1489 ),
1490 include('inline'),
1491 include('text'),
1492 ],
1493 'table': [
1494 # Use [ \t\n\r\0\x0B] instead of \s to follow PHP trim() behavior
1495 # Endings
1496 (r'^([ \t\n\r\0\x0B]*?)(\|\})',
1497 bygroups(Whitespace, Punctuation), '#pop'),
1498 # Table rows
1499 (r'^([ \t\n\r\0\x0B]*?)(\|-+)(.*)$', bygroups(Whitespace, Punctuation,
1500 using(this, state=['root', 'attr']))),
1501 # Captions
1502 (
1503 r"""(?x)
1504 ^([ \t\n\r\0\x0B]*?)(\|\+)
1505 # Exclude links, template and tags
1506 (?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|) )?
1507 (.*?)$
1508 """,
1509 bygroups(Whitespace, Punctuation, using(this, state=[
1510 'root', 'attr']), Punctuation, Generic.Heading),
1511 ),
1512 # Table data
1513 (
1514 r"""(?x)
1515 ( ^(?:[ \t\n\r\0\x0B]*?)\| | \|\| )
1516 (?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|)(?!\|) )?
1517 """,
1518 bygroups(Punctuation, using(this, state=[
1519 'root', 'attr']), Punctuation),
1520 ),
1521 # Table headers
1522 (
1523 r"""(?x)
1524 ( ^(?:[ \t\n\r\0\x0B]*?)! )
1525 (?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|)(?!\|) )?
1526 """,
1527 bygroups(Punctuation, using(this, state=[
1528 'root', 'attr']), Punctuation),
1529 'table-header',
1530 ),
1531 include('list'),
1532 include('inline'),
1533 include('text'),
1534 ],
1535 'table-header': [
1536 # Requires another state for || handling inside headers
1537 (r'\n', Text, '#pop'),
1538 (
1539 r"""(?x)
1540 (!!|\|\|)
1541 (?:
1542 ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )
1543 (\|)(?!\|)
1544 )?
1545 """,
1546 bygroups(Punctuation, using(this, state=[
1547 'root', 'attr']), Punctuation)
1548 ),
1549 *text_rules(Generic.Subheading),
1550 ],
1551 'entity': [
1552 (r'&\S*?;', Name.Entity),
1553 ],
1554 'dt': [
1555 (r'\n', Text, '#pop'),
1556 include('inline'),
1557 (r':', Keyword, '#pop'),
1558 include('text'),
1559 ],
1560 'extlink-inner': [
1561 (r'\]', Punctuation, '#pop'),
1562 include('inline'),
1563 include('text'),
1564 ],
1565 'nowiki-ish': [
1566 include('entity'),
1567 include('text'),
1568 ],
1569 'attr': [
1570 include('replaceable'),
1571 (r'\s+', Whitespace),
1572 (r'(=)(\s*)(")', bygroups(Operator, Whitespace, String.Double), 'attr-val-2'),
1573 (r"(=)(\s*)(')", bygroups(Operator, Whitespace, String.Single), 'attr-val-1'),
1574 (r'(=)(\s*)', bygroups(Operator, Whitespace), 'attr-val-0'),
1575 (r'[\w:-]+', Name.Attribute),
1576
1577 ],
1578 'attr-val-0': [
1579 (r'\s', Whitespace, '#pop'),
1580 include('replaceable'),
1581 *text_rules(String),
1582 ],
1583 'attr-val-1': [
1584 (r"'", String.Single, '#pop'),
1585 include('replaceable'),
1586 *text_rules(String.Single),
1587 ],
1588 'attr-val-2': [
1589 (r'"', String.Double, '#pop'),
1590 include('replaceable'),
1591 *text_rules(String.Double),
1592 ],
1593 'tag-inner-ordinary': [
1594 (r'/?\s*>', Punctuation, '#pop'),
1595 include('tag-attr'),
1596 ],
1597 'tag-inner': [
1598 # Return to root state for self-closing tags
1599 (r'/\s*>', Punctuation, '#pop:2'),
1600 (r'\s*>', Punctuation, '#pop'),
1601 include('tag-attr'),
1602 ],
1603 # There states below are just like their non-tag variants, the key difference is
1604 # they forcibly quit when encountering tag closing markup
1605 'tag-attr': [
1606 include('replaceable'),
1607 (r'\s+', Whitespace),
1608 (r'(=)(\s*)(")', bygroups(Operator,
1609 Whitespace, String.Double), 'tag-attr-val-2'),
1610 (r"(=)(\s*)(')", bygroups(Operator,
1611 Whitespace, String.Single), 'tag-attr-val-1'),
1612 (r'(=)(\s*)', bygroups(Operator, Whitespace), 'tag-attr-val-0'),
1613 (r'[\w:-]+', Name.Attribute),
1614
1615 ],
1616 'tag-attr-val-0': [
1617 (r'\s', Whitespace, '#pop'),
1618 (r'/?>', Punctuation, '#pop:2'),
1619 include('replaceable'),
1620 *text_rules(String),
1621 ],
1622 'tag-attr-val-1': [
1623 (r"'", String.Single, '#pop'),
1624 (r'/?>', Punctuation, '#pop:2'),
1625 include('replaceable'),
1626 *text_rules(String.Single),
1627 ],
1628 'tag-attr-val-2': [
1629 (r'"', String.Double, '#pop'),
1630 (r'/?>', Punctuation, '#pop:2'),
1631 include('replaceable'),
1632 *text_rules(String.Double),
1633 ],
1634 'tag-nowiki': nowiki_tag_rules('nowiki'),
1635 'tag-pre': nowiki_tag_rules('pre'),
1636 'tag-categorytree': plaintext_tag_rules('categorytree'),
1637 'tag-dynamicpagelist': plaintext_tag_rules('dynamicpagelist'),
1638 'tag-hiero': plaintext_tag_rules('hiero'),
1639 'tag-inputbox': plaintext_tag_rules('inputbox'),
1640 'tag-imagemap': plaintext_tag_rules('imagemap'),
1641 'tag-charinsert': plaintext_tag_rules('charinsert'),
1642 'tag-timeline': plaintext_tag_rules('timeline'),
1643 'tag-gallery': plaintext_tag_rules('gallery'),
1644 'tag-graph': plaintext_tag_rules('graph'),
1645 'tag-rss': plaintext_tag_rules('rss'),
1646 'tag-math': delegate_tag_rules('math', TexLexer, state='math'),
1647 'tag-chem': delegate_tag_rules('chem', TexLexer, state='math'),
1648 'tag-ce': delegate_tag_rules('ce', TexLexer, state='math'),
1649 'tag-templatedata': delegate_tag_rules('templatedata', JsonLexer),
1650 'text-italic': text_rules(Generic.Emph),
1651 'text-bold': text_rules(Generic.Strong),
1652 'text-bold-italic': text_rules(Generic.EmphStrong),
1653 'text': text_rules(Text),
1654 }