1"""
2 pygments.lexers.markup
3 ~~~~~~~~~~~~~~~~~~~~~~
4
5 Lexers for non-HTML markup languages.
6
7 :copyright: Copyright 2006-present by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
10
11import re
12
13from pygments.lexers.html import XmlLexer
14from pygments.lexers.javascript import JavascriptLexer
15from pygments.lexers.css import CssLexer
16from pygments.lexers.lilypond import LilyPondLexer
17from pygments.lexers.data import JsonLexer
18
19from pygments.lexer import RegexLexer, DelegatingLexer, include, bygroups, \
20 using, this, do_insertions, default, words
21from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
22 Number, Punctuation, Generic, Other, Whitespace, Literal
23from pygments.util import get_bool_opt, ClassNotFound
24
25__all__ = ['BBCodeLexer', 'MoinWikiLexer', 'RstLexer', 'TexLexer', 'GroffLexer',
26 'MozPreprocHashLexer', 'MozPreprocPercentLexer',
27 'MozPreprocXulLexer', 'MozPreprocJavascriptLexer',
28 'MozPreprocCssLexer', 'MarkdownLexer', 'OrgLexer', 'TiddlyWiki5Lexer',
29 'WikitextLexer']
30
31
32class BBCodeLexer(RegexLexer):
33 """
34 A lexer that highlights BBCode(-like) syntax.
35 """
36
37 name = 'BBCode'
38 aliases = ['bbcode']
39 mimetypes = ['text/x-bbcode']
40 url = 'https://www.bbcode.org/'
41 version_added = '0.6'
42
43 tokens = {
44 'root': [
45 (r'[^[]+', Text),
46 # tag/end tag begin
47 (r'\[/?\w+', Keyword, 'tag'),
48 # stray bracket
49 (r'\[', Text),
50 ],
51 'tag': [
52 (r'\s+', Text),
53 # attribute with value
54 (r'(\w+)(=)("?[^\s"\]]+"?)',
55 bygroups(Name.Attribute, Operator, String)),
56 # tag argument (a la [color=green])
57 (r'(=)("?[^\s"\]]+"?)',
58 bygroups(Operator, String)),
59 # tag end
60 (r'\]', Keyword, '#pop'),
61 ],
62 }
63
64
65class MoinWikiLexer(RegexLexer):
66 """
67 For MoinMoin (and Trac) Wiki markup.
68 """
69
70 name = 'MoinMoin/Trac Wiki markup'
71 aliases = ['trac-wiki', 'moin']
72 filenames = []
73 mimetypes = ['text/x-trac-wiki']
74 url = 'https://moinmo.in'
75 version_added = '0.7'
76
77 flags = re.MULTILINE | re.IGNORECASE
78
79 tokens = {
80 'root': [
81 (r'^#.*$', Comment),
82 (r'(!)(\S+)', bygroups(Keyword, Text)), # Ignore-next
83 # Titles
84 (r'^(=+)([^=]+)(=+)(\s*#.+)?$',
85 bygroups(Generic.Heading, using(this), Generic.Heading, String)),
86 # Literal code blocks, with optional shebang
87 (r'(\{\{\{)(\n#!.+)?', bygroups(Name.Builtin, Name.Namespace), 'codeblock'),
88 (r'(\'\'\'?|\|\||`|__|~~|\^|,,|::)', Comment), # Formatting
89 # Lists
90 (r'^( +)([.*-])( )', bygroups(Text, Name.Builtin, Text)),
91 (r'^( +)([a-z]{1,5}\.)( )', bygroups(Text, Name.Builtin, Text)),
92 # Other Formatting
93 (r'\[\[\w+.*?\]\]', Keyword), # Macro
94 (r'(\[[^\s\]]+)(\s+[^\]]+?)?(\])',
95 bygroups(Keyword, String, Keyword)), # Link
96 (r'^----+$', Keyword), # Horizontal rules
97 (r'[^\n\'\[{!_~^,|]+', Text),
98 (r'\n', Text),
99 (r'.', Text),
100 ],
101 'codeblock': [
102 (r'\}\}\}', Name.Builtin, '#pop'),
103 # these blocks are allowed to be nested in Trac, but not MoinMoin
104 (r'\{\{\{', Text, '#push'),
105 (r'[^{}]+', Comment.Preproc), # slurp boring text
106 (r'.', Comment.Preproc), # allow loose { or }
107 ],
108 }
109
110
111class RstLexer(RegexLexer):
112 """
113 For reStructuredText markup.
114
115 Additional options accepted:
116
117 `handlecodeblocks`
118 Highlight the contents of ``.. sourcecode:: language``,
119 ``.. code:: language`` and ``.. code-block:: language``
120 directives with a lexer for the given language (default:
121 ``True``).
122
123 .. versionadded:: 0.8
124 """
125 name = 'reStructuredText'
126 url = 'https://docutils.sourceforge.io/rst.html'
127 aliases = ['restructuredtext', 'rst', 'rest']
128 filenames = ['*.rst', '*.rest']
129 mimetypes = ["text/x-rst", "text/prs.fallenstein.rst"]
130 version_added = '0.7'
131 flags = re.MULTILINE
132
133 def _handle_sourcecode(self, match):
134 from pygments.lexers import get_lexer_by_name
135
136 # section header
137 yield match.start(1), Punctuation, match.group(1)
138 yield match.start(2), Text, match.group(2)
139 yield match.start(3), Operator.Word, match.group(3)
140 yield match.start(4), Punctuation, match.group(4)
141 yield match.start(5), Text, match.group(5)
142 yield match.start(6), Keyword, match.group(6)
143 yield match.start(7), Text, match.group(7)
144
145 # lookup lexer if wanted and existing
146 lexer = None
147 if self.handlecodeblocks:
148 try:
149 lexer = get_lexer_by_name(match.group(6).strip())
150 except ClassNotFound:
151 pass
152 indention = match.group(8)
153 indention_size = len(indention)
154 code = (indention + match.group(9) + match.group(10) + match.group(11))
155
156 # no lexer for this language. handle it like it was a code block
157 if lexer is None:
158 yield match.start(8), String, code
159 return
160
161 # highlight the lines with the lexer.
162 ins = []
163 codelines = code.splitlines(True)
164 code = ''
165 for line in codelines:
166 if len(line) > indention_size:
167 ins.append((len(code), [(0, Text, line[:indention_size])]))
168 code += line[indention_size:]
169 else:
170 code += line
171 yield from do_insertions(ins, lexer.get_tokens_unprocessed(code))
172
173 # from docutils.parsers.rst.states
174 closers = '\'")]}>\u2019\u201d\xbb!?'
175 unicode_delimiters = '\u2010\u2011\u2012\u2013\u2014\u00a0'
176 end_string_suffix = (rf'((?=$)|(?=[-/:.,; \n\x00{re.escape(unicode_delimiters)}{re.escape(closers)}]))')
177
178 tokens = {
179 'root': [
180 # Heading with overline
181 (r'^(=+|-+|`+|:+|\.+|\'+|"+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)'
182 r'(.+)(\n)(\1)(\n)',
183 bygroups(Generic.Heading, Text, Generic.Heading,
184 Text, Generic.Heading, Text)),
185 # Plain heading
186 (r'^(\S.*)(\n)(={3,}|-{3,}|`{3,}|:{3,}|\.{3,}|\'{3,}|"{3,}|'
187 r'~{3,}|\^{3,}|_{3,}|\*{3,}|\+{3,}|#{3,})(\n)',
188 bygroups(Generic.Heading, Text, Generic.Heading, Text)),
189 # Bulleted lists
190 (r'^(\s*)([-*+])( .+\n(?:\1 .+\n)*)',
191 bygroups(Text, Number, using(this, state='inline'))),
192 # Numbered lists
193 (r'^(\s*)([0-9#ivxlcmIVXLCM]+\.)( .+\n(?:\1 .+\n)*)',
194 bygroups(Text, Number, using(this, state='inline'))),
195 (r'^(\s*)(\(?[0-9#ivxlcmIVXLCM]+\))( .+\n(?:\1 .+\n)*)',
196 bygroups(Text, Number, using(this, state='inline'))),
197 # Numbered, but keep words at BOL from becoming lists
198 (r'^(\s*)([A-Z]+\.)( .+\n(?:\1 .+\n)+)',
199 bygroups(Text, Number, using(this, state='inline'))),
200 (r'^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1 .+\n)+)',
201 bygroups(Text, Number, using(this, state='inline'))),
202 # Line blocks
203 (r'^(\s*)(\|)( .+\n(?:\| .+\n)*)',
204 bygroups(Text, Operator, using(this, state='inline'))),
205 # Sourcecode directives
206 (r'^( *\.\.)(\s*)((?:source)?code(?:-block)?)(::)([ \t]*)([^\n]+)'
207 r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*)?\n)+)',
208 _handle_sourcecode),
209 # A directive
210 (r'^( *\.\.)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))',
211 bygroups(Punctuation, Text, Operator.Word, Punctuation, Text,
212 using(this, state='inline'))),
213 # A reference target
214 (r'^( *\.\.)(\s*)(_(?:[^:\\]|\\.)+:)(.*?)$',
215 bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),
216 # A footnote/citation target
217 (r'^( *\.\.)(\s*)(\[.+\])(.*?)$',
218 bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),
219 # A substitution def
220 (r'^( *\.\.)(\s*)(\|.+\|)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))',
221 bygroups(Punctuation, Text, Name.Tag, Text, Operator.Word,
222 Punctuation, Text, using(this, state='inline'))),
223 # Comments
224 (r'^ *\.\..*(\n( +.*\n|\n)+)?', Comment),
225 # Field list marker
226 (r'^( *)(:(?:\\\\|\\:|[^:\n])+:(?=\s))([ \t]*)',
227 bygroups(Text, Name.Class, Text)),
228 # Definition list
229 (r'^(\S.*(?<!::)\n)((?:(?: +.*)\n)+)',
230 bygroups(using(this, state='inline'), using(this, state='inline'))),
231 # Code blocks
232 (r'(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*)?\n)+)',
233 bygroups(String.Escape, Text, String, String, Text, String)),
234 include('inline'),
235 ],
236 'inline': [
237 (r'\\.', Text), # escape
238 (r'``', String, 'literal'), # code
239 (r'(`.+?)(<.+?>)(`__?)', # reference with inline target
240 bygroups(String, String.Interpol, String)),
241 (r'`.+?`__?', String), # reference
242 (r'(`.+?`)(:[a-zA-Z0-9:-]+?:)?',
243 bygroups(Name.Variable, Name.Attribute)), # role
244 (r'(:[a-zA-Z0-9:-]+?:)(`.+?`)',
245 bygroups(Name.Attribute, Name.Variable)), # role (content first)
246 (r'\*\*.+?\*\*', Generic.Strong), # Strong emphasis
247 (r'\*.+?\*', Generic.Emph), # Emphasis
248 (r'\[.*?\]_', String), # Footnote or citation
249 (r'<.+?>', Name.Tag), # Hyperlink
250 (r'[^\\\n\[*`:]+', Text),
251 (r'.', Text),
252 ],
253 'literal': [
254 (r'[^`]+', String),
255 (r'``' + end_string_suffix, String, '#pop'),
256 (r'`', String),
257 ]
258 }
259
260 def __init__(self, **options):
261 self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
262 RegexLexer.__init__(self, **options)
263
264 def analyse_text(text):
265 if text[:2] == '..' and text[2:3] != '.':
266 return 0.3
267 p1 = text.find("\n")
268 p2 = text.find("\n", p1 + 1)
269 if (p2 > -1 and # has two lines
270 p1 * 2 + 1 == p2 and # they are the same length
271 text[p1+1] in '-=' and # the next line both starts and ends with
272 text[p1+1] == text[p2-1]): # ...a sufficiently high header
273 return 0.5
274
275
276class TexLexer(RegexLexer):
277 """
278 Lexer for the TeX and LaTeX typesetting languages.
279 """
280
281 name = 'TeX'
282 aliases = ['tex', 'latex']
283 filenames = ['*.tex', '*.aux', '*.toc']
284 mimetypes = ['text/x-tex', 'text/x-latex']
285 url = 'https://tug.org'
286 version_added = ''
287
288 tokens = {
289 'general': [
290 (r'%.*?\n', Comment),
291 (r'[{}]', Name.Builtin),
292 (r'[&_^]', Name.Builtin),
293 ],
294 'root': [
295 (r'\\\[', String.Backtick, 'displaymath'),
296 (r'\\\(', String, 'inlinemath'),
297 (r'\$\$', String.Backtick, 'displaymath'),
298 (r'\$', String, 'inlinemath'),
299 (r'\\([a-zA-Z@_:]+|\S?)', Keyword, 'command'),
300 (r'\\$', Keyword),
301 include('general'),
302 (r'[^\\$%&_^{}]+', Text),
303 ],
304 'math': [
305 (r'\\([a-zA-Z]+|\S?)', Name.Variable),
306 include('general'),
307 (r'[0-9]+', Number),
308 (r'[-=!+*/()\[\]]', Operator),
309 (r'[^=!+*/()\[\]\\$%&_^{}0-9-]+', Name.Builtin),
310 ],
311 'inlinemath': [
312 (r'\\\)', String, '#pop'),
313 (r'\$', String, '#pop'),
314 include('math'),
315 ],
316 'displaymath': [
317 (r'\\\]', String, '#pop'),
318 (r'\$\$', String, '#pop'),
319 (r'\$', Name.Builtin),
320 include('math'),
321 ],
322 'command': [
323 (r'\[.*?\]', Name.Attribute),
324 (r'\*', Keyword),
325 default('#pop'),
326 ],
327 }
328
329 def analyse_text(text):
330 for start in ("\\documentclass", "\\input", "\\documentstyle",
331 "\\relax"):
332 if text[:len(start)] == start:
333 return True
334
335
336class GroffLexer(RegexLexer):
337 """
338 Lexer for the (g)roff typesetting language, supporting groff
339 extensions. Mainly useful for highlighting manpage sources.
340 """
341
342 name = 'Groff'
343 aliases = ['groff', 'nroff', 'man']
344 filenames = ['*.[1-9]', '*.man', '*.1p', '*.3pm']
345 mimetypes = ['application/x-troff', 'text/troff']
346 url = 'https://www.gnu.org/software/groff'
347 version_added = '0.6'
348
349 tokens = {
350 'root': [
351 (r'(\.)(\w+)', bygroups(Text, Keyword), 'request'),
352 (r'\.', Punctuation, 'request'),
353 # Regular characters, slurp till we find a backslash or newline
354 (r'[^\\\n]+', Text, 'textline'),
355 default('textline'),
356 ],
357 'textline': [
358 include('escapes'),
359 (r'[^\\\n]+', Text),
360 (r'\n', Text, '#pop'),
361 ],
362 'escapes': [
363 # groff has many ways to write escapes.
364 (r'\\"[^\n]*', Comment),
365 (r'\\[fn]\w', String.Escape),
366 (r'\\\(.{2}', String.Escape),
367 (r'\\.\[.*\]', String.Escape),
368 (r'\\.', String.Escape),
369 (r'\\\n', Text, 'request'),
370 ],
371 'request': [
372 (r'\n', Text, '#pop'),
373 include('escapes'),
374 (r'"[^\n"]+"', String.Double),
375 (r'\d+', Number),
376 (r'\S+', String),
377 (r'\s+', Text),
378 ],
379 }
380
381 def analyse_text(text):
382 if text[:1] != '.':
383 return False
384 if text[:3] == '.\\"':
385 return True
386 if text[:4] == '.TH ':
387 return True
388 if text[1:3].isalnum() and text[3].isspace():
389 return 0.9
390
391
392class MozPreprocHashLexer(RegexLexer):
393 """
394 Lexer for Mozilla Preprocessor files (with '#' as the marker).
395
396 Other data is left untouched.
397 """
398 name = 'mozhashpreproc'
399 aliases = [name]
400 filenames = []
401 mimetypes = []
402 url = 'https://firefox-source-docs.mozilla.org/build/buildsystem/preprocessor.html'
403 version_added = '2.0'
404
405 tokens = {
406 'root': [
407 (r'^#', Comment.Preproc, ('expr', 'exprstart')),
408 (r'.+', Other),
409 ],
410 'exprstart': [
411 (r'(literal)(.*)', bygroups(Comment.Preproc, Text), '#pop:2'),
412 (words((
413 'define', 'undef', 'if', 'ifdef', 'ifndef', 'else', 'elif',
414 'elifdef', 'elifndef', 'endif', 'expand', 'filter', 'unfilter',
415 'include', 'includesubst', 'error')),
416 Comment.Preproc, '#pop'),
417 ],
418 'expr': [
419 (words(('!', '!=', '==', '&&', '||')), Operator),
420 (r'(defined)(\()', bygroups(Keyword, Punctuation)),
421 (r'\)', Punctuation),
422 (r'[0-9]+', Number.Decimal),
423 (r'__\w+?__', Name.Variable),
424 (r'@\w+?@', Name.Class),
425 (r'\w+', Name),
426 (r'\n', Text, '#pop'),
427 (r'\s+', Text),
428 (r'\S', Punctuation),
429 ],
430 }
431
432
433class MozPreprocPercentLexer(MozPreprocHashLexer):
434 """
435 Lexer for Mozilla Preprocessor files (with '%' as the marker).
436
437 Other data is left untouched.
438 """
439 name = 'mozpercentpreproc'
440 aliases = [name]
441 filenames = []
442 mimetypes = []
443 url = 'https://firefox-source-docs.mozilla.org/build/buildsystem/preprocessor.html'
444 version_added = '2.0'
445
446 tokens = {
447 'root': [
448 (r'^%', Comment.Preproc, ('expr', 'exprstart')),
449 (r'.+', Other),
450 ],
451 }
452
453
454class MozPreprocXulLexer(DelegatingLexer):
455 """
456 Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the
457 `XmlLexer`.
458 """
459 name = "XUL+mozpreproc"
460 aliases = ['xul+mozpreproc']
461 filenames = ['*.xul.in']
462 mimetypes = []
463 url = 'https://firefox-source-docs.mozilla.org/build/buildsystem/preprocessor.html'
464 version_added = '2.0'
465
466 def __init__(self, **options):
467 super().__init__(XmlLexer, MozPreprocHashLexer, **options)
468
469
470class MozPreprocJavascriptLexer(DelegatingLexer):
471 """
472 Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the
473 `JavascriptLexer`.
474 """
475 name = "Javascript+mozpreproc"
476 aliases = ['javascript+mozpreproc']
477 filenames = ['*.js.in']
478 mimetypes = []
479 url = 'https://firefox-source-docs.mozilla.org/build/buildsystem/preprocessor.html'
480 version_added = '2.0'
481
482 def __init__(self, **options):
483 super().__init__(JavascriptLexer, MozPreprocHashLexer, **options)
484
485
486class MozPreprocCssLexer(DelegatingLexer):
487 """
488 Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the
489 `CssLexer`.
490 """
491 name = "CSS+mozpreproc"
492 aliases = ['css+mozpreproc']
493 filenames = ['*.css.in']
494 mimetypes = []
495 url = 'https://firefox-source-docs.mozilla.org/build/buildsystem/preprocessor.html'
496 version_added = '2.0'
497
498 def __init__(self, **options):
499 super().__init__(CssLexer, MozPreprocPercentLexer, **options)
500
501
502class MarkdownLexer(RegexLexer):
503 """
504 For Markdown markup.
505 """
506 name = 'Markdown'
507 url = 'https://daringfireball.net/projects/markdown/'
508 aliases = ['markdown', 'md']
509 filenames = ['*.md', '*.markdown']
510 mimetypes = ["text/x-markdown"]
511 version_added = '2.2'
512 flags = re.MULTILINE
513
514 def _handle_codeblock(self, match):
515 from pygments.lexers import get_lexer_by_name
516
517 yield match.start('initial'), String.Backtick, match.group('initial')
518 yield match.start('lang'), String.Backtick, match.group('lang')
519 if match.group('afterlang') is not None:
520 yield match.start('whitespace'), Whitespace, match.group('whitespace')
521 yield match.start('extra'), Text, match.group('extra')
522 yield match.start('newline'), Whitespace, match.group('newline')
523
524 # lookup lexer if wanted and existing
525 lexer = None
526 if self.handlecodeblocks:
527 try:
528 lexer = get_lexer_by_name(match.group('lang').strip())
529 except ClassNotFound:
530 pass
531 code = match.group('code')
532 # no lexer for this language. handle it like it was a code block
533 if lexer is None:
534 yield match.start('code'), String, code
535 else:
536 # FIXME: aren't the offsets wrong?
537 yield from do_insertions([], lexer.get_tokens_unprocessed(code))
538
539 yield match.start('terminator'), String.Backtick, match.group('terminator')
540
541 tokens = {
542 'root': [
543 # heading with '#' prefix (atx-style)
544 (r'(^#[^#].+)(\n)', bygroups(Generic.Heading, Text)),
545 # subheading with '#' prefix (atx-style)
546 (r'(^#{2,6}[^#].+)(\n)', bygroups(Generic.Subheading, Text)),
547 # heading with '=' underlines (Setext-style)
548 (r'^(.+)(\n)(=+)(\n)', bygroups(Generic.Heading, Text, Generic.Heading, Text)),
549 # subheading with '-' underlines (Setext-style)
550 (r'^(.+)(\n)(-+)(\n)', bygroups(Generic.Subheading, Text, Generic.Subheading, Text)),
551 # task list
552 (r'^(\s*)([*-] )(\[[ xX]\])( .+\n)',
553 bygroups(Whitespace, Keyword, Keyword, using(this, state='inline'))),
554 # bulleted list
555 (r'^(\s*)([*-])(\s)(.+\n)',
556 bygroups(Whitespace, Keyword, Whitespace, using(this, state='inline'))),
557 # numbered list
558 (r'^(\s*)([0-9]+\.)( .+\n)',
559 bygroups(Whitespace, Keyword, using(this, state='inline'))),
560 # quote
561 (r'^(\s*>\s)(.+\n)', bygroups(Keyword, Generic.Emph)),
562 # code block fenced by 3 backticks
563 (r'^(\s*```\n[\w\W]*?^\s*```$\n)', String.Backtick),
564 # code block with language
565 # Some tools include extra stuff after the language name, just
566 # highlight that as text. For example: https://docs.enola.dev/use/execmd
567 (r'''(?x)
568 ^(?P<initial>\s*```)
569 (?P<lang>[\w\-]+)
570 (?P<afterlang>
571 (?P<whitespace>[^\S\n]+)
572 (?P<extra>.*))?
573 (?P<newline>\n)
574 (?P<code>(.|\n)*?)
575 (?P<terminator>^\s*```$\n)
576 ''',
577 _handle_codeblock),
578
579 include('inline'),
580 ],
581 'inline': [
582 # escape
583 (r'\\.', Text),
584 # inline code
585 (r'([^`]?)(`[^`\n]+`)', bygroups(Text, String.Backtick)),
586 # warning: the following rules eat outer tags.
587 # eg. **foo _bar_ baz** => foo and baz are not recognized as bold
588 # bold fenced by '**'
589 (r'([^\*]?)(\*\*[^* \n][^*\n]*\*\*)', bygroups(Text, Generic.Strong)),
590 # bold fenced by '__'
591 (r'([^_]?)(__[^_ \n][^_\n]*__)', bygroups(Text, Generic.Strong)),
592 # italics fenced by '*'
593 (r'([^\*]?)(\*[^* \n][^*\n]*\*)', bygroups(Text, Generic.Emph)),
594 # italics fenced by '_'
595 (r'([^_]?)(_[^_ \n][^_\n]*_)', bygroups(Text, Generic.Emph)),
596 # strikethrough
597 (r'([^~]?)(~~[^~ \n][^~\n]*~~)', bygroups(Text, Generic.Deleted)),
598 # mentions and topics (twitter and github stuff)
599 (r'[@#][\w/:]+', Name.Entity),
600 # (image?) links eg: 
601 (r'(!?\[)([^]]+)(\])(\()([^)]+)(\))',
602 bygroups(Text, Name.Tag, Text, Text, Name.Attribute, Text)),
603 # reference-style links, e.g.:
604 # [an example][id]
605 # [id]: http://example.com/
606 (r'(\[)([^]]+)(\])(\[)([^]]*)(\])',
607 bygroups(Text, Name.Tag, Text, Text, Name.Label, Text)),
608 (r'^(\s*\[)([^]]*)(\]:\s*)(.+)',
609 bygroups(Text, Name.Label, Text, Name.Attribute)),
610
611 # general text, must come last!
612 (r'[^\\\s]+', Text),
613 (r'.', Text),
614 ],
615 }
616
617 def __init__(self, **options):
618 self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
619 RegexLexer.__init__(self, **options)
620
621class OrgLexer(RegexLexer):
622 """
623 For Org Mode markup.
624 """
625 name = 'Org Mode'
626 url = 'https://orgmode.org'
627 aliases = ['org', 'orgmode', 'org-mode']
628 filenames = ['*.org']
629 mimetypes = ["text/org"]
630 version_added = '2.18'
631
632 def _inline(start, end):
633 return rf'(?<!\w){start}(.|\n(?!\n))+?{end}(?!\w)'
634
635 tokens = {
636 'root': [
637 (r'^# .*', Comment.Single),
638
639 # Headings
640 (r'^(\* )(COMMENT)( .*)',
641 bygroups(Generic.Heading, Comment.Preproc, Generic.Heading)),
642 (r'^(\*\*+ )(COMMENT)( .*)',
643 bygroups(Generic.Subheading, Comment.Preproc, Generic.Subheading)),
644 (r'^(\* )(DONE)( .*)',
645 bygroups(Generic.Heading, Generic.Deleted, Generic.Heading)),
646 (r'^(\*\*+ )(DONE)( .*)',
647 bygroups(Generic.Subheading, Generic.Deleted, Generic.Subheading)),
648 (r'^(\* )(TODO)( .*)',
649 bygroups(Generic.Heading, Generic.Error, Generic.Heading)),
650 (r'^(\*\*+ )(TODO)( .*)',
651 bygroups(Generic.Subheading, Generic.Error, Generic.Subheading)),
652
653 (r'^(\* .+?)( :[a-zA-Z0-9_@:]+:)?$', bygroups(Generic.Heading, Generic.Emph)),
654 (r'^(\*\*+ .+?)( :[a-zA-Z0-9_@:]+:)?$', bygroups(Generic.Subheading, Generic.Emph)),
655
656 # Unordered lists items, including TODO items and description items
657 (r'^(?:( *)([+-] )|( +)(\* ))(\[[ X-]\])?(.+ ::)?',
658 bygroups(Whitespace, Keyword, Whitespace, Keyword, Generic.Prompt, Name.Label)),
659
660 # Ordered list items
661 (r'^( *)([0-9]+[.)])( \[@[0-9]+\])?', bygroups(Whitespace, Keyword, Generic.Emph)),
662
663 # Dynamic blocks
664 (r'(?i)^( *#\+begin: *)((?:.|\n)*?)(^ *#\+end: *$)',
665 bygroups(Operator.Word, using(this), Operator.Word)),
666
667 # Comment blocks
668 (r'(?i)^( *#\+begin_comment *\n)((?:.|\n)*?)(^ *#\+end_comment *$)',
669 bygroups(Operator.Word, Comment.Multiline, Operator.Word)),
670
671 # Source code blocks
672 # TODO: language-dependent syntax highlighting (see Markdown lexer)
673 (r'(?i)^( *#\+begin_src .*)((?:.|\n)*?)(^ *#\+end_src *$)',
674 bygroups(Operator.Word, Text, Operator.Word)),
675
676 # Other blocks
677 (r'(?i)^( *#\+begin_\w+)( *\n)((?:.|\n)*?)(^ *#\+end_\w+)( *$)',
678 bygroups(Operator.Word, Whitespace, Text, Operator.Word, Whitespace)),
679
680 # Keywords
681 (r'^(#\+\w+:)(.*)$', bygroups(Name.Namespace, Text)),
682
683 # Properties and drawers
684 (r'(?i)^( *:\w+: *\n)((?:.|\n)*?)(^ *:end: *$)',
685 bygroups(Name.Decorator, Comment.Special, Name.Decorator)),
686
687 # Line break operator
688 (r'\\\\$', Operator),
689
690 (r'^\s*CLOSED:\s+', Generic.Deleted, 'dateline'),
691 (r'^\s*(?:DEADLINE:|SCHEDULED:)\s+', Generic.Error, 'dateline'),
692
693 # Bold
694 (_inline(r'\*', r'\*+'), Generic.Strong),
695 # Italic
696 (_inline(r'/', r'/'), Generic.Emph),
697 # Verbatim
698 (_inline(r'=', r'='), String), # TODO token
699 # Code
700 (_inline(r'~', r'~'), String),
701 # Strikethrough
702 (_inline(r'\+', r'\+'), Generic.Deleted),
703 # Underline
704 (_inline(r'_', r'_+'), Generic.EmphStrong),
705
706 # Dates
707 (r'<.+?>', Literal.Date),
708 # Macros
709 (r'\{\{\{.+?\}\}\}', Comment.Preproc),
710 # Footnotes
711 (r'(?<!\[)\[fn:.+?\]', Name.Tag),
712 # Links
713 (r'(?s)(\[\[)(.*?)(\]\[)(.*?)(\]\])',
714 bygroups(Punctuation, Name.Attribute, Punctuation, Name.Tag, Punctuation)),
715 (r'(?s)(\[\[)(.+?)(\]\])', bygroups(Punctuation, Name.Attribute, Punctuation)),
716 (r'(<<)(.+?)(>>)', bygroups(Punctuation, Name.Attribute, Punctuation)),
717
718 # Tables
719 (r'^( *)(\|[ -].*?[ -]\|)$', bygroups(Whitespace, String)),
720
721 # Any other text
722 (r'[^#*+\-0-9:\\/=~_<{\[|\n]+', Text),
723 (r'[#*+\-0-9:\\/=~_<{\[|\n]', Text),
724 ],
725 'dateline': [
726 (r'\s*CLOSED:\s+', Generic.Deleted),
727 (r'\s*(?:DEADLINE:|SCHEDULED:)\s+', Generic.Error),
728 (r'\[.+?\]', Literal.Date),
729 (r'<[^>]+?>', Literal.Date),
730 (r'(\s*)$', Text, '#pop'),
731 (r'.', Text),
732 ],
733 }
734
735class TiddlyWiki5Lexer(RegexLexer):
736 """
737 For TiddlyWiki5 markup.
738 """
739 name = 'tiddler'
740 url = 'https://tiddlywiki.com/#TiddlerFiles'
741 aliases = ['tid']
742 filenames = ['*.tid']
743 mimetypes = ["text/vnd.tiddlywiki"]
744 version_added = '2.7'
745 flags = re.MULTILINE
746
747 def _handle_codeblock(self, match):
748 """
749 match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks
750 """
751 from pygments.lexers import get_lexer_by_name
752
753 # section header
754 yield match.start(1), String, match.group(1)
755 yield match.start(2), String, match.group(2)
756 yield match.start(3), Text, match.group(3)
757
758 # lookup lexer if wanted and existing
759 lexer = None
760 if self.handlecodeblocks:
761 try:
762 lexer = get_lexer_by_name(match.group(2).strip())
763 except ClassNotFound:
764 pass
765 code = match.group(4)
766
767 # no lexer for this language. handle it like it was a code block
768 if lexer is None:
769 yield match.start(4), String, code
770 return
771
772 yield from do_insertions([], lexer.get_tokens_unprocessed(code))
773
774 yield match.start(5), String, match.group(5)
775
776 def _handle_cssblock(self, match):
777 """
778 match args: 1:style tag 2:newline, 3:code, 4:closing style tag
779 """
780 from pygments.lexers import get_lexer_by_name
781
782 # section header
783 yield match.start(1), String, match.group(1)
784 yield match.start(2), String, match.group(2)
785
786 lexer = None
787 if self.handlecodeblocks:
788 try:
789 lexer = get_lexer_by_name('css')
790 except ClassNotFound:
791 pass
792 code = match.group(3)
793
794 # no lexer for this language. handle it like it was a code block
795 if lexer is None:
796 yield match.start(3), String, code
797 return
798
799 yield from do_insertions([], lexer.get_tokens_unprocessed(code))
800
801 yield match.start(4), String, match.group(4)
802
803 tokens = {
804 'root': [
805 # title in metadata section
806 (r'^(title)(:\s)(.+\n)', bygroups(Keyword, Text, Generic.Heading)),
807 # headings
808 (r'^(!)([^!].+\n)', bygroups(Generic.Heading, Text)),
809 (r'^(!{2,6})(.+\n)', bygroups(Generic.Subheading, Text)),
810 # bulleted or numbered lists or single-line block quotes
811 # (can be mixed)
812 (r'^(\s*)([*#>]+)(\s*)(.+\n)',
813 bygroups(Text, Keyword, Text, using(this, state='inline'))),
814 # multi-line block quotes
815 (r'^(<<<.*\n)([\w\W]*?)(^<<<.*$)', bygroups(String, Text, String)),
816 # table header
817 (r'^(\|.*?\|h)$', bygroups(Generic.Strong)),
818 # table footer or caption
819 (r'^(\|.*?\|[cf])$', bygroups(Generic.Emph)),
820 # table class
821 (r'^(\|.*?\|k)$', bygroups(Name.Tag)),
822 # definitions
823 (r'^(;.*)$', bygroups(Generic.Strong)),
824 # text block
825 (r'^(```\n)([\w\W]*?)(^```$)', bygroups(String, Text, String)),
826 # code block with language
827 (r'^(```)(\w+)(\n)([\w\W]*?)(^```$)', _handle_codeblock),
828 # CSS style block
829 (r'^(<style>)(\n)([\w\W]*?)(^</style>$)', _handle_cssblock),
830
831 include('keywords'),
832 include('inline'),
833 ],
834 'keywords': [
835 (words((
836 '\\define', '\\end', 'caption', 'created', 'modified', 'tags',
837 'title', 'type'), prefix=r'^', suffix=r'\b'),
838 Keyword),
839 ],
840 'inline': [
841 # escape
842 (r'\\.', Text),
843 # created or modified date
844 (r'\d{17}', Number.Integer),
845 # italics
846 (r'(\s)(//[^/]+//)((?=\W|\n))',
847 bygroups(Text, Generic.Emph, Text)),
848 # superscript
849 (r'(\s)(\^\^[^\^]+\^\^)', bygroups(Text, Generic.Emph)),
850 # subscript
851 (r'(\s)(,,[^,]+,,)', bygroups(Text, Generic.Emph)),
852 # underscore
853 (r'(\s)(__[^_]+__)', bygroups(Text, Generic.Strong)),
854 # bold
855 (r"(\s)(''[^']+'')((?=\W|\n))",
856 bygroups(Text, Generic.Strong, Text)),
857 # strikethrough
858 (r'(\s)(~~[^~]+~~)((?=\W|\n))',
859 bygroups(Text, Generic.Deleted, Text)),
860 # TiddlyWiki variables
861 (r'<<[^>]+>>', Name.Tag),
862 (r'\$\$[^$]+\$\$', Name.Tag),
863 (r'\$\([^)]+\)\$', Name.Tag),
864 # TiddlyWiki style or class
865 (r'^@@.*$', Name.Tag),
866 # HTML tags
867 (r'</?[^>]+>', Name.Tag),
868 # inline code
869 (r'`[^`]+`', String.Backtick),
870 # HTML escaped symbols
871 (r'&\S*?;', String.Regex),
872 # Wiki links
873 (r'(\[{2})([^]\|]+)(\]{2})', bygroups(Text, Name.Tag, Text)),
874 # External links
875 (r'(\[{2})([^]\|]+)(\|)([^]\|]+)(\]{2})',
876 bygroups(Text, Name.Tag, Text, Name.Attribute, Text)),
877 # Transclusion
878 (r'(\{{2})([^}]+)(\}{2})', bygroups(Text, Name.Tag, Text)),
879 # URLs
880 (r'(\b.?.?tps?://[^\s"]+)', bygroups(Name.Attribute)),
881
882 # general text, must come last!
883 (r'[\w]+', Text),
884 (r'.', Text)
885 ],
886 }
887
888 def __init__(self, **options):
889 self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
890 RegexLexer.__init__(self, **options)
891
892
893class WikitextLexer(RegexLexer):
894 """
895 For MediaWiki Wikitext.
896
897 Parsing Wikitext is tricky, and results vary between different MediaWiki
898 installations, so we only highlight common syntaxes (built-in or from
899 popular extensions), and also assume templates produce no unbalanced
900 syntaxes.
901 """
902 name = 'Wikitext'
903 url = 'https://www.mediawiki.org/wiki/Wikitext'
904 aliases = ['wikitext', 'mediawiki']
905 filenames = []
906 mimetypes = ['text/x-wiki']
907 version_added = '2.15'
908 flags = re.MULTILINE
909
910 def nowiki_tag_rules(tag_name):
911 return [
912 (rf'(?i)(</)({tag_name})(\s*)(>)', bygroups(Punctuation,
913 Name.Tag, Whitespace, Punctuation), '#pop'),
914 include('entity'),
915 include('text'),
916 ]
917
918 def plaintext_tag_rules(tag_name):
919 return [
920 (rf'(?si)(.*?)(</)({tag_name})(\s*)(>)', bygroups(Text,
921 Punctuation, Name.Tag, Whitespace, Punctuation), '#pop'),
922 ]
923
924 def delegate_tag_rules(tag_name, lexer, **lexer_kwargs):
925 return [
926 (rf'(?i)(</)({tag_name})(\s*)(>)', bygroups(Punctuation,
927 Name.Tag, Whitespace, Punctuation), '#pop'),
928 (rf'(?si).+?(?=</{tag_name}\s*>)', using(lexer, **lexer_kwargs)),
929 ]
930
931 def text_rules(token):
932 return [
933 (r'\w+', token),
934 (r'[^\S\n]+', token),
935 (r'(?s).', token),
936 ]
937
938 def handle_syntaxhighlight(self, match, ctx):
939 from pygments.lexers import get_lexer_by_name
940
941 attr_content = match.group()
942 start = 0
943 index = 0
944 while True:
945 index = attr_content.find('>', start)
946 # Exclude comment end (-->)
947 if attr_content[index-2:index] != '--':
948 break
949 start = index + 1
950
951 if index == -1:
952 # No tag end
953 yield from self.get_tokens_unprocessed(attr_content, stack=['root', 'attr'])
954 return
955 attr = attr_content[:index]
956 yield from self.get_tokens_unprocessed(attr, stack=['root', 'attr'])
957 yield match.start(3) + index, Punctuation, '>'
958
959 lexer = None
960 content = attr_content[index+1:]
961 lang_match = re.findall(r'\blang=("|\'|)(\w+)(\1)', attr)
962
963 if len(lang_match) >= 1:
964 # Pick the last match in case of multiple matches
965 lang = lang_match[-1][1]
966 try:
967 lexer = get_lexer_by_name(lang)
968 except ClassNotFound:
969 pass
970
971 if lexer is None:
972 yield match.start() + index + 1, Text, content
973 else:
974 yield from lexer.get_tokens_unprocessed(content)
975
976 def handle_score(self, match, ctx):
977 attr_content = match.group()
978 start = 0
979 index = 0
980 while True:
981 index = attr_content.find('>', start)
982 # Exclude comment end (-->)
983 if attr_content[index-2:index] != '--':
984 break
985 start = index + 1
986
987 if index == -1:
988 # No tag end
989 yield from self.get_tokens_unprocessed(attr_content, stack=['root', 'attr'])
990 return
991 attr = attr_content[:index]
992 content = attr_content[index+1:]
993 yield from self.get_tokens_unprocessed(attr, stack=['root', 'attr'])
994 yield match.start(3) + index, Punctuation, '>'
995
996 lang_match = re.findall(r'\blang=("|\'|)(\w+)(\1)', attr)
997 # Pick the last match in case of multiple matches
998 lang = lang_match[-1][1] if len(lang_match) >= 1 else 'lilypond'
999
1000 if lang == 'lilypond': # Case sensitive
1001 yield from LilyPondLexer().get_tokens_unprocessed(content)
1002 else: # ABC
1003 # FIXME: Use ABC lexer in the future
1004 yield match.start() + index + 1, Text, content
1005
1006 # a-z removed to prevent linter from complaining, REMEMBER to use (?i)
1007 title_char = r' %!"$&\'()*,\-./0-9:;=?@A-Z\\\^_`~+\u0080-\uFFFF'
1008 nbsp_char = r'(?:\t| |&\#0*160;|&\#[Xx]0*[Aa]0;|[ \xA0\u1680\u2000-\u200A\u202F\u205F\u3000])'
1009 link_address = r'(?:[0-9.]+|\[[0-9a-f:.]+\]|[^\x00-\x20"<>\[\]\x7F\xA0\u1680\u2000-\u200A\u202F\u205F\u3000\uFFFD])'
1010 link_char_class = r'[^\x00-\x20"<>\[\]\x7F\xA0\u1680\u2000-\u200A\u202F\u205F\u3000\uFFFD]'
1011 double_slashes_i = {
1012 '__FORCETOC__', '__NOCONTENTCONVERT__', '__NOCC__', '__NOEDITSECTION__', '__NOGALLERY__',
1013 '__NOTITLECONVERT__', '__NOTC__', '__NOTOC__', '__TOC__',
1014 }
1015 double_slashes = {
1016 '__EXPECTUNUSEDCATEGORY__', '__HIDDENCAT__', '__INDEX__', '__NEWSECTIONLINK__',
1017 '__NOINDEX__', '__NONEWSECTIONLINK__', '__STATICREDIRECT__', '__NOGLOBAL__',
1018 '__DISAMBIG__', '__EXPECTED_UNCONNECTED_PAGE__',
1019 }
1020 protocols = {
1021 'bitcoin:', 'ftp://', 'ftps://', 'geo:', 'git://', 'gopher://', 'http://', 'https://',
1022 'irc://', 'ircs://', 'magnet:', 'mailto:', 'mms://', 'news:', 'nntp://', 'redis://',
1023 'sftp://', 'sip:', 'sips:', 'sms:', 'ssh://', 'svn://', 'tel:', 'telnet://', 'urn:',
1024 'worldwind://', 'xmpp:', '//',
1025 }
1026 non_relative_protocols = protocols - {'//'}
1027 html_tags = {
1028 'abbr', 'b', 'bdi', 'bdo', 'big', 'blockquote', 'br', 'caption', 'center', 'cite', 'code',
1029 'data', 'dd', 'del', 'dfn', 'div', 'dl', 'dt', 'em', 'font', 'h1', 'h2', 'h3', 'h4', 'h5',
1030 'h6', 'hr', 'i', 'ins', 'kbd', 'li', 'link', 'mark', 'meta', 'ol', 'p', 'q', 'rb', 'rp',
1031 'rt', 'rtc', 'ruby', 's', 'samp', 'small', 'span', 'strike', 'strong', 'sub', 'sup',
1032 'table', 'td', 'th', 'time', 'tr', 'tt', 'u', 'ul', 'var', 'wbr',
1033 }
1034 parser_tags = {
1035 'graph', 'charinsert', 'rss', 'chem', 'categorytree', 'nowiki', 'inputbox', 'math',
1036 'hiero', 'score', 'pre', 'ref', 'translate', 'imagemap', 'templatestyles', 'languages',
1037 'noinclude', 'mapframe', 'section', 'poem', 'syntaxhighlight', 'includeonly', 'tvar',
1038 'onlyinclude', 'templatedata', 'langconvert', 'timeline', 'dynamicpagelist', 'gallery',
1039 'maplink', 'ce', 'references',
1040 }
1041 variant_langs = {
1042 # ZhConverter.php
1043 'zh', 'zh-hans', 'zh-hant', 'zh-cn', 'zh-hk', 'zh-mo', 'zh-my', 'zh-sg', 'zh-tw',
1044 # WuuConverter.php
1045 'wuu', 'wuu-hans', 'wuu-hant',
1046 # UzConverter.php
1047 'uz', 'uz-latn', 'uz-cyrl',
1048 # TlyConverter.php
1049 'tly', 'tly-cyrl',
1050 # TgConverter.php
1051 'tg', 'tg-latn',
1052 # SrConverter.php
1053 'sr', 'sr-ec', 'sr-el',
1054 # ShiConverter.php
1055 'shi', 'shi-tfng', 'shi-latn',
1056 # ShConverter.php
1057 'sh-latn', 'sh-cyrl',
1058 # KuConverter.php
1059 'ku', 'ku-arab', 'ku-latn',
1060 # IuConverter.php
1061 'iu', 'ike-cans', 'ike-latn',
1062 # GanConverter.php
1063 'gan', 'gan-hans', 'gan-hant',
1064 # EnConverter.php
1065 'en', 'en-x-piglatin',
1066 # CrhConverter.php
1067 'crh', 'crh-cyrl', 'crh-latn',
1068 # BanConverter.php
1069 'ban', 'ban-bali', 'ban-x-dharma', 'ban-x-palmleaf', 'ban-x-pku',
1070 }
1071 magic_vars_i = {
1072 'ARTICLEPATH', 'INT', 'PAGEID', 'SCRIPTPATH', 'SERVER', 'SERVERNAME', 'STYLEPATH',
1073 }
1074 magic_vars = {
1075 '!', '=', 'BASEPAGENAME', 'BASEPAGENAMEE', 'CASCADINGSOURCES', 'CONTENTLANGUAGE',
1076 'CONTENTLANG', 'CURRENTDAY', 'CURRENTDAY2', 'CURRENTDAYNAME', 'CURRENTDOW', 'CURRENTHOUR',
1077 'CURRENTMONTH', 'CURRENTMONTH2', 'CURRENTMONTH1', 'CURRENTMONTHABBREV', 'CURRENTMONTHNAME',
1078 'CURRENTMONTHNAMEGEN', 'CURRENTTIME', 'CURRENTTIMESTAMP', 'CURRENTVERSION', 'CURRENTWEEK',
1079 'CURRENTYEAR', 'DIRECTIONMARK', 'DIRMARK', 'FULLPAGENAME', 'FULLPAGENAMEE', 'LOCALDAY',
1080 'LOCALDAY2', 'LOCALDAYNAME', 'LOCALDOW', 'LOCALHOUR', 'LOCALMONTH', 'LOCALMONTH2',
1081 'LOCALMONTH1', 'LOCALMONTHABBREV', 'LOCALMONTHNAME', 'LOCALMONTHNAMEGEN', 'LOCALTIME',
1082 'LOCALTIMESTAMP', 'LOCALWEEK', 'LOCALYEAR', 'NAMESPACE', 'NAMESPACEE', 'NAMESPACENUMBER',
1083 'NUMBEROFACTIVEUSERS', 'NUMBEROFADMINS', 'NUMBEROFARTICLES', 'NUMBEROFEDITS',
1084 'NUMBEROFFILES', 'NUMBEROFPAGES', 'NUMBEROFUSERS', 'PAGELANGUAGE', 'PAGENAME', 'PAGENAMEE',
1085 'REVISIONDAY', 'REVISIONDAY2', 'REVISIONID', 'REVISIONMONTH', 'REVISIONMONTH1',
1086 'REVISIONSIZE', 'REVISIONTIMESTAMP', 'REVISIONUSER', 'REVISIONYEAR', 'ROOTPAGENAME',
1087 'ROOTPAGENAMEE', 'SITENAME', 'SUBJECTPAGENAME', 'ARTICLEPAGENAME', 'SUBJECTPAGENAMEE',
1088 'ARTICLEPAGENAMEE', 'SUBJECTSPACE', 'ARTICLESPACE', 'SUBJECTSPACEE', 'ARTICLESPACEE',
1089 'SUBPAGENAME', 'SUBPAGENAMEE', 'TALKPAGENAME', 'TALKPAGENAMEE', 'TALKSPACE', 'TALKSPACEE',
1090 }
1091 parser_functions_i = {
1092 'ANCHORENCODE', 'BIDI', 'CANONICALURL', 'CANONICALURLE', 'FILEPATH', 'FORMATNUM',
1093 'FULLURL', 'FULLURLE', 'GENDER', 'GRAMMAR', 'INT', r'\#LANGUAGE', 'LC', 'LCFIRST', 'LOCALURL',
1094 'LOCALURLE', 'NS', 'NSE', 'PADLEFT', 'PADRIGHT', 'PAGEID', 'PLURAL', 'UC', 'UCFIRST',
1095 'URLENCODE',
1096 }
1097 parser_functions = {
1098 'BASEPAGENAME', 'BASEPAGENAMEE', 'CASCADINGSOURCES', 'DEFAULTSORT', 'DEFAULTSORTKEY',
1099 'DEFAULTCATEGORYSORT', 'FULLPAGENAME', 'FULLPAGENAMEE', 'NAMESPACE', 'NAMESPACEE',
1100 'NAMESPACENUMBER', 'NUMBERINGROUP', 'NUMINGROUP', 'NUMBEROFACTIVEUSERS', 'NUMBEROFADMINS',
1101 'NUMBEROFARTICLES', 'NUMBEROFEDITS', 'NUMBEROFFILES', 'NUMBEROFPAGES', 'NUMBEROFUSERS',
1102 'PAGENAME', 'PAGENAMEE', 'PAGESINCATEGORY', 'PAGESINCAT', 'PAGESIZE', 'PROTECTIONEXPIRY',
1103 'PROTECTIONLEVEL', 'REVISIONDAY', 'REVISIONDAY2', 'REVISIONID', 'REVISIONMONTH',
1104 'REVISIONMONTH1', 'REVISIONTIMESTAMP', 'REVISIONUSER', 'REVISIONYEAR', 'ROOTPAGENAME',
1105 'ROOTPAGENAMEE', 'SUBJECTPAGENAME', 'ARTICLEPAGENAME', 'SUBJECTPAGENAMEE',
1106 'ARTICLEPAGENAMEE', 'SUBJECTSPACE', 'ARTICLESPACE', 'SUBJECTSPACEE', 'ARTICLESPACEE',
1107 'SUBPAGENAME', 'SUBPAGENAMEE', 'TALKPAGENAME', 'TALKPAGENAMEE', 'TALKSPACE', 'TALKSPACEE',
1108 'INT', 'DISPLAYTITLE', 'PAGESINNAMESPACE', 'PAGESINNS',
1109 }
1110
1111 tokens = {
1112 'root': [
1113 # Redirects
1114 (r"""(?xi)
1115 (\A\s*?)(\#REDIRECT:?) # may contain a colon
1116 (\s+)(\[\[) (?=[^\]\n]* \]\]$)
1117 """,
1118 bygroups(Whitespace, Keyword, Whitespace, Punctuation), 'redirect-inner'),
1119 # Subheadings
1120 (r'^(={2,6})(.+?)(\1)(\s*$\n)',
1121 bygroups(Generic.Subheading, Generic.Subheading, Generic.Subheading, Whitespace)),
1122 # Headings
1123 (r'^(=.+?=)(\s*$\n)',
1124 bygroups(Generic.Heading, Whitespace)),
1125 # Double-slashed magic words
1126 (words(double_slashes_i, prefix=r'(?i)'), Name.Function.Magic),
1127 (words(double_slashes), Name.Function.Magic),
1128 # Raw URLs
1129 (r'(?i)\b(?:{}){}{}*'.format('|'.join(protocols),
1130 link_address, link_char_class), Name.Label),
1131 # Magic links
1132 (rf'\b(?:RFC|PMID){nbsp_char}+[0-9]+\b',
1133 Name.Function.Magic),
1134 (r"""(?x)
1135 \bISBN {nbsp_char}
1136 (?: 97[89] {nbsp_dash}? )?
1137 (?: [0-9] {nbsp_dash}? ){{9}} # escape format()
1138 [0-9Xx]\b
1139 """.format(nbsp_char=nbsp_char, nbsp_dash=f'(?:-|{nbsp_char})'), Name.Function.Magic),
1140 include('list'),
1141 include('inline'),
1142 include('text'),
1143 ],
1144 'redirect-inner': [
1145 (r'(\]\])(\s*?\n)', bygroups(Punctuation, Whitespace), '#pop'),
1146 (r'(\#)([^#]*?)', bygroups(Punctuation, Name.Label)),
1147 (rf'(?i)[{title_char}]+', Name.Tag),
1148 ],
1149 'list': [
1150 # Description lists
1151 (r'^;', Keyword, 'dt'),
1152 # Ordered lists, unordered lists and indents
1153 (r'^[#:*]+', Keyword),
1154 # Horizontal rules
1155 (r'^-{4,}', Keyword),
1156 ],
1157 'inline': [
1158 # Signatures
1159 (r'~{3,5}', Keyword),
1160 # Entities
1161 include('entity'),
1162 # Bold & italic
1163 (r"('')(''')(?!')", bygroups(Generic.Emph,
1164 Generic.EmphStrong), 'inline-italic-bold'),
1165 (r"'''(?!')", Generic.Strong, 'inline-bold'),
1166 (r"''(?!')", Generic.Emph, 'inline-italic'),
1167 # Comments & parameters & templates
1168 include('replaceable'),
1169 # Media links
1170 (
1171 r"""(?xi)
1172 (\[\[)
1173 (File|Image) (:)
1174 ((?: [{}] | \{{{{2,3}}[^{{}}]*?\}}{{2,3}} | <!--[\s\S]*?--> )*)
1175 (?: (\#) ([{}]*?) )?
1176 """.format(title_char, f'{title_char}#'),
1177 bygroups(Punctuation, Name.Namespace, Punctuation,
1178 using(this, state=['wikilink-name']), Punctuation, Name.Label),
1179 'medialink-inner'
1180 ),
1181 # Wikilinks
1182 (
1183 r"""(?xi)
1184 (\[\[)(?!{}) # Should not contain URLs
1185 (?: ([{}]*) (:))?
1186 ((?: [{}] | \{{{{2,3}}[^{{}}]*?\}}{{2,3}} | <!--[\s\S]*?--> )*?)
1187 (?: (\#) ([{}]*?) )?
1188 (\]\])
1189 """.format('|'.join(protocols), title_char.replace('/', ''),
1190 title_char, f'{title_char}#'),
1191 bygroups(Punctuation, Name.Namespace, Punctuation,
1192 using(this, state=['wikilink-name']), Punctuation, Name.Label, Punctuation)
1193 ),
1194 (
1195 r"""(?xi)
1196 (\[\[)(?!{})
1197 (?: ([{}]*) (:))?
1198 ((?: [{}] | \{{{{2,3}}[^{{}}]*?\}}{{2,3}} | <!--[\s\S]*?--> )*?)
1199 (?: (\#) ([{}]*?) )?
1200 (\|)
1201 """.format('|'.join(protocols), title_char.replace('/', ''),
1202 title_char, f'{title_char}#'),
1203 bygroups(Punctuation, Name.Namespace, Punctuation,
1204 using(this, state=['wikilink-name']), Punctuation, Name.Label, Punctuation),
1205 'wikilink-inner'
1206 ),
1207 # External links
1208 (
1209 r"""(?xi)
1210 (\[)
1211 ((?:{}) {} {}*)
1212 (\s*)
1213 """.format('|'.join(protocols), link_address, link_char_class),
1214 bygroups(Punctuation, Name.Label, Whitespace),
1215 'extlink-inner'
1216 ),
1217 # Tables
1218 (r'^(:*)(\s*?)(\{\|)([^\n]*)$', bygroups(Keyword,
1219 Whitespace, Punctuation, using(this, state=['root', 'attr'])), 'table'),
1220 # HTML tags
1221 (r'(?i)(<)({})\b'.format('|'.join(html_tags)),
1222 bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'),
1223 (r'(?i)(</)({})\b(\s*)(>)'.format('|'.join(html_tags)),
1224 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),
1225 # <nowiki>
1226 (r'(?i)(<)(nowiki)\b', bygroups(Punctuation,
1227 Name.Tag), ('tag-nowiki', 'tag-inner')),
1228 # <pre>
1229 (r'(?i)(<)(pre)\b', bygroups(Punctuation,
1230 Name.Tag), ('tag-pre', 'tag-inner')),
1231 # <categorytree>
1232 (r'(?i)(<)(categorytree)\b', bygroups(
1233 Punctuation, Name.Tag), ('tag-categorytree', 'tag-inner')),
1234 # <hiero>
1235 (r'(?i)(<)(hiero)\b', bygroups(Punctuation,
1236 Name.Tag), ('tag-hiero', 'tag-inner')),
1237 # <math>
1238 (r'(?i)(<)(math)\b', bygroups(Punctuation,
1239 Name.Tag), ('tag-math', 'tag-inner')),
1240 # <chem>
1241 (r'(?i)(<)(chem)\b', bygroups(Punctuation,
1242 Name.Tag), ('tag-chem', 'tag-inner')),
1243 # <ce>
1244 (r'(?i)(<)(ce)\b', bygroups(Punctuation,
1245 Name.Tag), ('tag-ce', 'tag-inner')),
1246 # <charinsert>
1247 (r'(?i)(<)(charinsert)\b', bygroups(
1248 Punctuation, Name.Tag), ('tag-charinsert', 'tag-inner')),
1249 # <templatedata>
1250 (r'(?i)(<)(templatedata)\b', bygroups(
1251 Punctuation, Name.Tag), ('tag-templatedata', 'tag-inner')),
1252 # <gallery>
1253 (r'(?i)(<)(gallery)\b', bygroups(
1254 Punctuation, Name.Tag), ('tag-gallery', 'tag-inner')),
1255 # <graph>
1256 (r'(?i)(<)(gallery)\b', bygroups(
1257 Punctuation, Name.Tag), ('tag-graph', 'tag-inner')),
1258 # <dynamicpagelist>
1259 (r'(?i)(<)(dynamicpagelist)\b', bygroups(
1260 Punctuation, Name.Tag), ('tag-dynamicpagelist', 'tag-inner')),
1261 # <inputbox>
1262 (r'(?i)(<)(inputbox)\b', bygroups(
1263 Punctuation, Name.Tag), ('tag-inputbox', 'tag-inner')),
1264 # <rss>
1265 (r'(?i)(<)(rss)\b', bygroups(
1266 Punctuation, Name.Tag), ('tag-rss', 'tag-inner')),
1267 # <imagemap>
1268 (r'(?i)(<)(imagemap)\b', bygroups(
1269 Punctuation, Name.Tag), ('tag-imagemap', 'tag-inner')),
1270 # <syntaxhighlight>
1271 (r'(?i)(</)(syntaxhighlight)\b(\s*)(>)',
1272 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),
1273 (r'(?si)(<)(syntaxhighlight)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)',
1274 bygroups(Punctuation, Name.Tag, handle_syntaxhighlight)),
1275 # <syntaxhighlight>: Fallback case for self-closing tags
1276 (r'(?i)(<)(syntaxhighlight)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups(
1277 Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)),
1278 # <source>
1279 (r'(?i)(</)(source)\b(\s*)(>)',
1280 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),
1281 (r'(?si)(<)(source)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)',
1282 bygroups(Punctuation, Name.Tag, handle_syntaxhighlight)),
1283 # <source>: Fallback case for self-closing tags
1284 (r'(?i)(<)(source)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups(
1285 Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)),
1286 # <score>
1287 (r'(?i)(</)(score)\b(\s*)(>)',
1288 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),
1289 (r'(?si)(<)(score)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)',
1290 bygroups(Punctuation, Name.Tag, handle_score)),
1291 # <score>: Fallback case for self-closing tags
1292 (r'(?i)(<)(score)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups(
1293 Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)),
1294 # Other parser tags
1295 (r'(?i)(<)({})\b'.format('|'.join(parser_tags)),
1296 bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'),
1297 (r'(?i)(</)({})\b(\s*)(>)'.format('|'.join(parser_tags)),
1298 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),
1299 # LanguageConverter markups
1300 (
1301 r"""(?xi)
1302 (-\{{) # Use {{ to escape format()
1303 ([^|]) (\|)
1304 (?:
1305 (?: ([^;]*?) (=>))?
1306 (\s* (?:{variants}) \s*) (:)
1307 )?
1308 """.format(variants='|'.join(variant_langs)),
1309 bygroups(Punctuation, Keyword, Punctuation,
1310 using(this, state=['root', 'lc-raw']),
1311 Operator, Name.Label, Punctuation),
1312 'lc-inner'
1313 ),
1314 # LanguageConverter markups: composite conversion grammar
1315 (
1316 r"""(?xi)
1317 (-\{)
1318 ([a-z\s;-]*?) (\|)
1319 """,
1320 bygroups(Punctuation,
1321 using(this, state=['root', 'lc-flag']),
1322 Punctuation),
1323 'lc-raw'
1324 ),
1325 # LanguageConverter markups: fallbacks
1326 (
1327 r"""(?xi)
1328 (-\{{) (?!\{{) # Use {{ to escape format()
1329 (?: (\s* (?:{variants}) \s*) (:))?
1330 """.format(variants='|'.join(variant_langs)),
1331 bygroups(Punctuation, Name.Label, Punctuation),
1332 'lc-inner'
1333 ),
1334 ],
1335 'wikilink-name': [
1336 include('replaceable'),
1337 (r'[^{<]+', Name.Tag),
1338 (r'(?s).', Name.Tag),
1339 ],
1340 'wikilink-inner': [
1341 # Quit in case of another wikilink
1342 (r'(?=\[\[)', Punctuation, '#pop'),
1343 (r'\]\]', Punctuation, '#pop'),
1344 include('inline'),
1345 include('text'),
1346 ],
1347 'medialink-inner': [
1348 (r'\]\]', Punctuation, '#pop'),
1349 (r'(\|)([^\n=|]*)(=)',
1350 bygroups(Punctuation, Name.Attribute, Operator)),
1351 (r'\|', Punctuation),
1352 include('inline'),
1353 include('text'),
1354 ],
1355 'quote-common': [
1356 # Quit in case of link/template endings
1357 (r'(?=\]\]|\{\{|\}\})', Punctuation, '#pop'),
1358 (r'\n', Text, '#pop'),
1359 ],
1360 'inline-italic': [
1361 include('quote-common'),
1362 (r"('')(''')(?!')", bygroups(Generic.Emph,
1363 Generic.Strong), ('#pop', 'inline-bold')),
1364 (r"'''(?!')", Generic.EmphStrong, ('#pop', 'inline-italic-bold')),
1365 (r"''(?!')", Generic.Emph, '#pop'),
1366 include('inline'),
1367 include('text-italic'),
1368 ],
1369 'inline-bold': [
1370 include('quote-common'),
1371 (r"(''')('')(?!')", bygroups(
1372 Generic.Strong, Generic.Emph), ('#pop', 'inline-italic')),
1373 (r"'''(?!')", Generic.Strong, '#pop'),
1374 (r"''(?!')", Generic.EmphStrong, ('#pop', 'inline-bold-italic')),
1375 include('inline'),
1376 include('text-bold'),
1377 ],
1378 'inline-bold-italic': [
1379 include('quote-common'),
1380 (r"('')(''')(?!')", bygroups(Generic.EmphStrong,
1381 Generic.Strong), '#pop'),
1382 (r"'''(?!')", Generic.EmphStrong, ('#pop', 'inline-italic')),
1383 (r"''(?!')", Generic.EmphStrong, ('#pop', 'inline-bold')),
1384 include('inline'),
1385 include('text-bold-italic'),
1386 ],
1387 'inline-italic-bold': [
1388 include('quote-common'),
1389 (r"(''')('')(?!')", bygroups(
1390 Generic.EmphStrong, Generic.Emph), '#pop'),
1391 (r"'''(?!')", Generic.EmphStrong, ('#pop', 'inline-italic')),
1392 (r"''(?!')", Generic.EmphStrong, ('#pop', 'inline-bold')),
1393 include('inline'),
1394 include('text-bold-italic'),
1395 ],
1396 'lc-flag': [
1397 (r'\s+', Whitespace),
1398 (r';', Punctuation),
1399 *text_rules(Keyword),
1400 ],
1401 'lc-inner': [
1402 (
1403 r"""(?xi)
1404 (;)
1405 (?: ([^;]*?) (=>))?
1406 (\s* (?:{variants}) \s*) (:)
1407 """.format(variants='|'.join(variant_langs)),
1408 bygroups(Punctuation, using(this, state=['root', 'lc-raw']),
1409 Operator, Name.Label, Punctuation)
1410 ),
1411 (r';?\s*?\}-', Punctuation, '#pop'),
1412 include('inline'),
1413 include('text'),
1414 ],
1415 'lc-raw': [
1416 (r'\}-', Punctuation, '#pop'),
1417 include('inline'),
1418 include('text'),
1419 ],
1420 'replaceable': [
1421 # Comments
1422 (r'<!--[\s\S]*?(?:-->|\Z)', Comment.Multiline),
1423 # Parameters
1424 (
1425 r"""(?x)
1426 (\{{3})
1427 ([^|]*?)
1428 (?=\}{3}|\|)
1429 """,
1430 bygroups(Punctuation, Name.Variable),
1431 'parameter-inner',
1432 ),
1433 # Magic variables
1434 (r'(?i)(\{{\{{)(\s*)({})(\s*)(\}}\}})'.format('|'.join(magic_vars_i)),
1435 bygroups(Punctuation, Whitespace, Name.Function, Whitespace, Punctuation)),
1436 (r'(\{{\{{)(\s*)({})(\s*)(\}}\}})'.format('|'.join(magic_vars)),
1437 bygroups(Punctuation, Whitespace, Name.Function, Whitespace, Punctuation)),
1438 # Parser functions & templates
1439 (r'\{\{', Punctuation, 'template-begin-space'),
1440 # <tvar> legacy syntax
1441 (r'(?i)(<)(tvar)\b(\|)([^>]*?)(>)', bygroups(Punctuation,
1442 Name.Tag, Punctuation, String, Punctuation)),
1443 (r'</>', Punctuation, '#pop'),
1444 # <tvar>
1445 (r'(?i)(<)(tvar)\b', bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'),
1446 (r'(?i)(</)(tvar)\b(\s*)(>)',
1447 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),
1448 ],
1449 'parameter-inner': [
1450 (r'\}{3}', Punctuation, '#pop'),
1451 (r'\|', Punctuation),
1452 include('inline'),
1453 include('text'),
1454 ],
1455 'template-begin-space': [
1456 # Templates allow line breaks at the beginning, and due to how MediaWiki handles
1457 # comments, an extra state is required to handle things like {{\n<!---->\n name}}
1458 (r'<!--[\s\S]*?(?:-->|\Z)', Comment.Multiline),
1459 (r'\s+', Whitespace),
1460 # Parser functions
1461 (
1462 r'(?i)(\#[{}]*?|{})(:)'.format(title_char,
1463 '|'.join(parser_functions_i)),
1464 bygroups(Name.Function, Punctuation), ('#pop', 'template-inner')
1465 ),
1466 (
1467 r'({})(:)'.format('|'.join(parser_functions)),
1468 bygroups(Name.Function, Punctuation), ('#pop', 'template-inner')
1469 ),
1470 # Templates
1471 (
1472 rf'(?i)([{title_char}]*?)(:)',
1473 bygroups(Name.Namespace, Punctuation), ('#pop', 'template-name')
1474 ),
1475 default(('#pop', 'template-name'),),
1476 ],
1477 'template-name': [
1478 (r'(\s*?)(\|)', bygroups(Text, Punctuation), ('#pop', 'template-inner')),
1479 (r'\}\}', Punctuation, '#pop'),
1480 (r'\n', Text, '#pop'),
1481 include('replaceable'),
1482 *text_rules(Name.Tag),
1483 ],
1484 'template-inner': [
1485 (r'\}\}', Punctuation, '#pop'),
1486 (r'\|', Punctuation),
1487 (
1488 r"""(?x)
1489 (?<=\|)
1490 ( (?: (?! \{\{ | \}\} )[^=\|<])*? ) # Exclude templates and tags
1491 (=)
1492 """,
1493 bygroups(Name.Label, Operator)
1494 ),
1495 include('inline'),
1496 include('text'),
1497 ],
1498 'table': [
1499 # Use [ \t\n\r\0\x0B] instead of \s to follow PHP trim() behavior
1500 # Endings
1501 (r'^([ \t\n\r\0\x0B]*?)(\|\})',
1502 bygroups(Whitespace, Punctuation), '#pop'),
1503 # Table rows
1504 (r'^([ \t\n\r\0\x0B]*?)(\|-+)(.*)$', bygroups(Whitespace, Punctuation,
1505 using(this, state=['root', 'attr']))),
1506 # Captions
1507 (
1508 r"""(?x)
1509 ^([ \t\n\r\0\x0B]*?)(\|\+)
1510 # Exclude links, template and tags
1511 (?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|) )?
1512 (.*?)$
1513 """,
1514 bygroups(Whitespace, Punctuation, using(this, state=[
1515 'root', 'attr']), Punctuation, Generic.Heading),
1516 ),
1517 # Table data
1518 (
1519 r"""(?x)
1520 ( ^(?:[ \t\n\r\0\x0B]*?)\| | \|\| )
1521 (?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|)(?!\|) )?
1522 """,
1523 bygroups(Punctuation, using(this, state=[
1524 'root', 'attr']), Punctuation),
1525 ),
1526 # Table headers
1527 (
1528 r"""(?x)
1529 ( ^(?:[ \t\n\r\0\x0B]*?)! )
1530 (?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|)(?!\|) )?
1531 """,
1532 bygroups(Punctuation, using(this, state=[
1533 'root', 'attr']), Punctuation),
1534 'table-header',
1535 ),
1536 include('list'),
1537 include('inline'),
1538 include('text'),
1539 ],
1540 'table-header': [
1541 # Requires another state for || handling inside headers
1542 (r'\n', Text, '#pop'),
1543 (
1544 r"""(?x)
1545 (!!|\|\|)
1546 (?:
1547 ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )
1548 (\|)(?!\|)
1549 )?
1550 """,
1551 bygroups(Punctuation, using(this, state=[
1552 'root', 'attr']), Punctuation)
1553 ),
1554 *text_rules(Generic.Subheading),
1555 ],
1556 'entity': [
1557 (r'&\S*?;', Name.Entity),
1558 ],
1559 'dt': [
1560 (r'\n', Text, '#pop'),
1561 include('inline'),
1562 (r':', Keyword, '#pop'),
1563 include('text'),
1564 ],
1565 'extlink-inner': [
1566 (r'\]', Punctuation, '#pop'),
1567 include('inline'),
1568 include('text'),
1569 ],
1570 'nowiki-ish': [
1571 include('entity'),
1572 include('text'),
1573 ],
1574 'attr': [
1575 include('replaceable'),
1576 (r'\s+', Whitespace),
1577 (r'(=)(\s*)(")', bygroups(Operator, Whitespace, String.Double), 'attr-val-2'),
1578 (r"(=)(\s*)(')", bygroups(Operator, Whitespace, String.Single), 'attr-val-1'),
1579 (r'(=)(\s*)', bygroups(Operator, Whitespace), 'attr-val-0'),
1580 (r'[\w:-]+', Name.Attribute),
1581
1582 ],
1583 'attr-val-0': [
1584 (r'\s', Whitespace, '#pop'),
1585 include('replaceable'),
1586 *text_rules(String),
1587 ],
1588 'attr-val-1': [
1589 (r"'", String.Single, '#pop'),
1590 include('replaceable'),
1591 *text_rules(String.Single),
1592 ],
1593 'attr-val-2': [
1594 (r'"', String.Double, '#pop'),
1595 include('replaceable'),
1596 *text_rules(String.Double),
1597 ],
1598 'tag-inner-ordinary': [
1599 (r'/?\s*>', Punctuation, '#pop'),
1600 include('tag-attr'),
1601 ],
1602 'tag-inner': [
1603 # Return to root state for self-closing tags
1604 (r'/\s*>', Punctuation, '#pop:2'),
1605 (r'\s*>', Punctuation, '#pop'),
1606 include('tag-attr'),
1607 ],
1608 # There states below are just like their non-tag variants, the key difference is
1609 # they forcibly quit when encountering tag closing markup
1610 'tag-attr': [
1611 include('replaceable'),
1612 (r'\s+', Whitespace),
1613 (r'(=)(\s*)(")', bygroups(Operator,
1614 Whitespace, String.Double), 'tag-attr-val-2'),
1615 (r"(=)(\s*)(')", bygroups(Operator,
1616 Whitespace, String.Single), 'tag-attr-val-1'),
1617 (r'(=)(\s*)', bygroups(Operator, Whitespace), 'tag-attr-val-0'),
1618 (r'[\w:-]+', Name.Attribute),
1619
1620 ],
1621 'tag-attr-val-0': [
1622 (r'\s', Whitespace, '#pop'),
1623 (r'/?>', Punctuation, '#pop:2'),
1624 include('replaceable'),
1625 *text_rules(String),
1626 ],
1627 'tag-attr-val-1': [
1628 (r"'", String.Single, '#pop'),
1629 (r'/?>', Punctuation, '#pop:2'),
1630 include('replaceable'),
1631 *text_rules(String.Single),
1632 ],
1633 'tag-attr-val-2': [
1634 (r'"', String.Double, '#pop'),
1635 (r'/?>', Punctuation, '#pop:2'),
1636 include('replaceable'),
1637 *text_rules(String.Double),
1638 ],
1639 'tag-nowiki': nowiki_tag_rules('nowiki'),
1640 'tag-pre': nowiki_tag_rules('pre'),
1641 'tag-categorytree': plaintext_tag_rules('categorytree'),
1642 'tag-dynamicpagelist': plaintext_tag_rules('dynamicpagelist'),
1643 'tag-hiero': plaintext_tag_rules('hiero'),
1644 'tag-inputbox': plaintext_tag_rules('inputbox'),
1645 'tag-imagemap': plaintext_tag_rules('imagemap'),
1646 'tag-charinsert': plaintext_tag_rules('charinsert'),
1647 'tag-timeline': plaintext_tag_rules('timeline'),
1648 'tag-gallery': plaintext_tag_rules('gallery'),
1649 'tag-graph': plaintext_tag_rules('graph'),
1650 'tag-rss': plaintext_tag_rules('rss'),
1651 'tag-math': delegate_tag_rules('math', TexLexer, state='math'),
1652 'tag-chem': delegate_tag_rules('chem', TexLexer, state='math'),
1653 'tag-ce': delegate_tag_rules('ce', TexLexer, state='math'),
1654 'tag-templatedata': delegate_tag_rules('templatedata', JsonLexer),
1655 'text-italic': text_rules(Generic.Emph),
1656 'text-bold': text_rules(Generic.Strong),
1657 'text-bold-italic': text_rules(Generic.EmphStrong),
1658 'text': text_rules(Text),
1659 }