Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/markup.py: 49%
280 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
1"""
2 pygments.lexers.markup
3 ~~~~~~~~~~~~~~~~~~~~~~
5 Lexers for non-HTML markup languages.
7 :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
11import re
13from pygments.lexers.html import XmlLexer
14from pygments.lexers.javascript import JavascriptLexer
15from pygments.lexers.css import CssLexer
16from pygments.lexers.lilypond import LilyPondLexer
17from pygments.lexers.data import JsonLexer
19from pygments.lexer import RegexLexer, DelegatingLexer, include, bygroups, \
20 using, this, do_insertions, default, words
21from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
22 Number, Punctuation, Generic, Other, Whitespace
23from pygments.util import get_bool_opt, ClassNotFound
25__all__ = ['BBCodeLexer', 'MoinWikiLexer', 'RstLexer', 'TexLexer', 'GroffLexer',
26 'MozPreprocHashLexer', 'MozPreprocPercentLexer',
27 'MozPreprocXulLexer', 'MozPreprocJavascriptLexer',
28 'MozPreprocCssLexer', 'MarkdownLexer', 'TiddlyWiki5Lexer', 'WikitextLexer']
31class BBCodeLexer(RegexLexer):
32 """
33 A lexer that highlights BBCode(-like) syntax.
35 .. versionadded:: 0.6
36 """
38 name = 'BBCode'
39 aliases = ['bbcode']
40 mimetypes = ['text/x-bbcode']
42 tokens = {
43 'root': [
44 (r'[^[]+', Text),
45 # tag/end tag begin
46 (r'\[/?\w+', Keyword, 'tag'),
47 # stray bracket
48 (r'\[', Text),
49 ],
50 'tag': [
51 (r'\s+', Text),
52 # attribute with value
53 (r'(\w+)(=)("?[^\s"\]]+"?)',
54 bygroups(Name.Attribute, Operator, String)),
55 # tag argument (a la [color=green])
56 (r'(=)("?[^\s"\]]+"?)',
57 bygroups(Operator, String)),
58 # tag end
59 (r'\]', Keyword, '#pop'),
60 ],
61 }
64class MoinWikiLexer(RegexLexer):
65 """
66 For MoinMoin (and Trac) Wiki markup.
68 .. versionadded:: 0.7
69 """
71 name = 'MoinMoin/Trac Wiki markup'
72 aliases = ['trac-wiki', 'moin']
73 filenames = []
74 mimetypes = ['text/x-trac-wiki']
75 flags = re.MULTILINE | re.IGNORECASE
77 tokens = {
78 'root': [
79 (r'^#.*$', Comment),
80 (r'(!)(\S+)', bygroups(Keyword, Text)), # Ignore-next
81 # Titles
82 (r'^(=+)([^=]+)(=+)(\s*#.+)?$',
83 bygroups(Generic.Heading, using(this), Generic.Heading, String)),
84 # Literal code blocks, with optional shebang
85 (r'(\{\{\{)(\n#!.+)?', bygroups(Name.Builtin, Name.Namespace), 'codeblock'),
86 (r'(\'\'\'?|\|\||`|__|~~|\^|,,|::)', Comment), # Formatting
87 # Lists
88 (r'^( +)([.*-])( )', bygroups(Text, Name.Builtin, Text)),
89 (r'^( +)([a-z]{1,5}\.)( )', bygroups(Text, Name.Builtin, Text)),
90 # Other Formatting
91 (r'\[\[\w+.*?\]\]', Keyword), # Macro
92 (r'(\[[^\s\]]+)(\s+[^\]]+?)?(\])',
93 bygroups(Keyword, String, Keyword)), # Link
94 (r'^----+$', Keyword), # Horizontal rules
95 (r'[^\n\'\[{!_~^,|]+', Text),
96 (r'\n', Text),
97 (r'.', Text),
98 ],
99 'codeblock': [
100 (r'\}\}\}', Name.Builtin, '#pop'),
101 # these blocks are allowed to be nested in Trac, but not MoinMoin
102 (r'\{\{\{', Text, '#push'),
103 (r'[^{}]+', Comment.Preproc), # slurp boring text
104 (r'.', Comment.Preproc), # allow loose { or }
105 ],
106 }
109class RstLexer(RegexLexer):
110 """
111 For reStructuredText markup.
113 .. versionadded:: 0.7
115 Additional options accepted:
117 `handlecodeblocks`
118 Highlight the contents of ``.. sourcecode:: language``,
119 ``.. code:: language`` and ``.. code-block:: language``
120 directives with a lexer for the given language (default:
121 ``True``).
123 .. versionadded:: 0.8
124 """
125 name = 'reStructuredText'
126 url = 'https://docutils.sourceforge.io/rst.html'
127 aliases = ['restructuredtext', 'rst', 'rest']
128 filenames = ['*.rst', '*.rest']
129 mimetypes = ["text/x-rst", "text/prs.fallenstein.rst"]
130 flags = re.MULTILINE
132 def _handle_sourcecode(self, match):
133 from pygments.lexers import get_lexer_by_name
135 # section header
136 yield match.start(1), Punctuation, match.group(1)
137 yield match.start(2), Text, match.group(2)
138 yield match.start(3), Operator.Word, match.group(3)
139 yield match.start(4), Punctuation, match.group(4)
140 yield match.start(5), Text, match.group(5)
141 yield match.start(6), Keyword, match.group(6)
142 yield match.start(7), Text, match.group(7)
144 # lookup lexer if wanted and existing
145 lexer = None
146 if self.handlecodeblocks:
147 try:
148 lexer = get_lexer_by_name(match.group(6).strip())
149 except ClassNotFound:
150 pass
151 indention = match.group(8)
152 indention_size = len(indention)
153 code = (indention + match.group(9) + match.group(10) + match.group(11))
155 # no lexer for this language. handle it like it was a code block
156 if lexer is None:
157 yield match.start(8), String, code
158 return
160 # highlight the lines with the lexer.
161 ins = []
162 codelines = code.splitlines(True)
163 code = ''
164 for line in codelines:
165 if len(line) > indention_size:
166 ins.append((len(code), [(0, Text, line[:indention_size])]))
167 code += line[indention_size:]
168 else:
169 code += line
170 yield from do_insertions(ins, lexer.get_tokens_unprocessed(code))
172 # from docutils.parsers.rst.states
173 closers = '\'")]}>\u2019\u201d\xbb!?'
174 unicode_delimiters = '\u2010\u2011\u2012\u2013\u2014\u00a0'
175 end_string_suffix = (r'((?=$)|(?=[-/:.,; \n\x00%s%s]))'
176 % (re.escape(unicode_delimiters),
177 re.escape(closers)))
179 tokens = {
180 'root': [
181 # Heading with overline
182 (r'^(=+|-+|`+|:+|\.+|\'+|"+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)'
183 r'(.+)(\n)(\1)(\n)',
184 bygroups(Generic.Heading, Text, Generic.Heading,
185 Text, Generic.Heading, Text)),
186 # Plain heading
187 (r'^(\S.*)(\n)(={3,}|-{3,}|`{3,}|:{3,}|\.{3,}|\'{3,}|"{3,}|'
188 r'~{3,}|\^{3,}|_{3,}|\*{3,}|\+{3,}|#{3,})(\n)',
189 bygroups(Generic.Heading, Text, Generic.Heading, Text)),
190 # Bulleted lists
191 (r'^(\s*)([-*+])( .+\n(?:\1 .+\n)*)',
192 bygroups(Text, Number, using(this, state='inline'))),
193 # Numbered lists
194 (r'^(\s*)([0-9#ivxlcmIVXLCM]+\.)( .+\n(?:\1 .+\n)*)',
195 bygroups(Text, Number, using(this, state='inline'))),
196 (r'^(\s*)(\(?[0-9#ivxlcmIVXLCM]+\))( .+\n(?:\1 .+\n)*)',
197 bygroups(Text, Number, using(this, state='inline'))),
198 # Numbered, but keep words at BOL from becoming lists
199 (r'^(\s*)([A-Z]+\.)( .+\n(?:\1 .+\n)+)',
200 bygroups(Text, Number, using(this, state='inline'))),
201 (r'^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1 .+\n)+)',
202 bygroups(Text, Number, using(this, state='inline'))),
203 # Line blocks
204 (r'^(\s*)(\|)( .+\n(?:\| .+\n)*)',
205 bygroups(Text, Operator, using(this, state='inline'))),
206 # Sourcecode directives
207 (r'^( *\.\.)(\s*)((?:source)?code(?:-block)?)(::)([ \t]*)([^\n]+)'
208 r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*)?\n)+)',
209 _handle_sourcecode),
210 # A directive
211 (r'^( *\.\.)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))',
212 bygroups(Punctuation, Text, Operator.Word, Punctuation, Text,
213 using(this, state='inline'))),
214 # A reference target
215 (r'^( *\.\.)(\s*)(_(?:[^:\\]|\\.)+:)(.*?)$',
216 bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),
217 # A footnote/citation target
218 (r'^( *\.\.)(\s*)(\[.+\])(.*?)$',
219 bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),
220 # A substitution def
221 (r'^( *\.\.)(\s*)(\|.+\|)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))',
222 bygroups(Punctuation, Text, Name.Tag, Text, Operator.Word,
223 Punctuation, Text, using(this, state='inline'))),
224 # Comments
225 (r'^ *\.\..*(\n( +.*\n|\n)+)?', Comment.Preproc),
226 # Field list marker
227 (r'^( *)(:(?:\\\\|\\:|[^:\n])+:(?=\s))([ \t]*)',
228 bygroups(Text, Name.Class, Text)),
229 # Definition list
230 (r'^(\S.*(?<!::)\n)((?:(?: +.*)\n)+)',
231 bygroups(using(this, state='inline'), using(this, state='inline'))),
232 # Code blocks
233 (r'(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*)?\n)+)',
234 bygroups(String.Escape, Text, String, String, Text, String)),
235 include('inline'),
236 ],
237 'inline': [
238 (r'\\.', Text), # escape
239 (r'``', String, 'literal'), # code
240 (r'(`.+?)(<.+?>)(`__?)', # reference with inline target
241 bygroups(String, String.Interpol, String)),
242 (r'`.+?`__?', String), # reference
243 (r'(`.+?`)(:[a-zA-Z0-9:-]+?:)?',
244 bygroups(Name.Variable, Name.Attribute)), # role
245 (r'(:[a-zA-Z0-9:-]+?:)(`.+?`)',
246 bygroups(Name.Attribute, Name.Variable)), # role (content first)
247 (r'\*\*.+?\*\*', Generic.Strong), # Strong emphasis
248 (r'\*.+?\*', Generic.Emph), # Emphasis
249 (r'\[.*?\]_', String), # Footnote or citation
250 (r'<.+?>', Name.Tag), # Hyperlink
251 (r'[^\\\n\[*`:]+', Text),
252 (r'.', Text),
253 ],
254 'literal': [
255 (r'[^`]+', String),
256 (r'``' + end_string_suffix, String, '#pop'),
257 (r'`', String),
258 ]
259 }
261 def __init__(self, **options):
262 self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
263 RegexLexer.__init__(self, **options)
265 def analyse_text(text):
266 if text[:2] == '..' and text[2:3] != '.':
267 return 0.3
268 p1 = text.find("\n")
269 p2 = text.find("\n", p1 + 1)
270 if (p2 > -1 and # has two lines
271 p1 * 2 + 1 == p2 and # they are the same length
272 text[p1+1] in '-=' and # the next line both starts and ends with
273 text[p1+1] == text[p2-1]): # ...a sufficiently high header
274 return 0.5
277class TexLexer(RegexLexer):
278 """
279 Lexer for the TeX and LaTeX typesetting languages.
280 """
282 name = 'TeX'
283 aliases = ['tex', 'latex']
284 filenames = ['*.tex', '*.aux', '*.toc']
285 mimetypes = ['text/x-tex', 'text/x-latex']
287 tokens = {
288 'general': [
289 (r'%.*?\n', Comment),
290 (r'[{}]', Name.Builtin),
291 (r'[&_^]', Name.Builtin),
292 ],
293 'root': [
294 (r'\\\[', String.Backtick, 'displaymath'),
295 (r'\\\(', String, 'inlinemath'),
296 (r'\$\$', String.Backtick, 'displaymath'),
297 (r'\$', String, 'inlinemath'),
298 (r'\\([a-zA-Z]+|.)', Keyword, 'command'),
299 (r'\\$', Keyword),
300 include('general'),
301 (r'[^\\$%&_^{}]+', Text),
302 ],
303 'math': [
304 (r'\\([a-zA-Z]+|.)', Name.Variable),
305 include('general'),
306 (r'[0-9]+', Number),
307 (r'[-=!+*/()\[\]]', Operator),
308 (r'[^=!+*/()\[\]\\$%&_^{}0-9-]+', Name.Builtin),
309 ],
310 'inlinemath': [
311 (r'\\\)', String, '#pop'),
312 (r'\$', String, '#pop'),
313 include('math'),
314 ],
315 'displaymath': [
316 (r'\\\]', String, '#pop'),
317 (r'\$\$', String, '#pop'),
318 (r'\$', Name.Builtin),
319 include('math'),
320 ],
321 'command': [
322 (r'\[.*?\]', Name.Attribute),
323 (r'\*', Keyword),
324 default('#pop'),
325 ],
326 }
328 def analyse_text(text):
329 for start in ("\\documentclass", "\\input", "\\documentstyle",
330 "\\relax"):
331 if text[:len(start)] == start:
332 return True
335class GroffLexer(RegexLexer):
336 """
337 Lexer for the (g)roff typesetting language, supporting groff
338 extensions. Mainly useful for highlighting manpage sources.
340 .. versionadded:: 0.6
341 """
343 name = 'Groff'
344 aliases = ['groff', 'nroff', 'man']
345 filenames = ['*.[1-9]', '*.man', '*.1p', '*.3pm']
346 mimetypes = ['application/x-troff', 'text/troff']
348 tokens = {
349 'root': [
350 (r'(\.)(\w+)', bygroups(Text, Keyword), 'request'),
351 (r'\.', Punctuation, 'request'),
352 # Regular characters, slurp till we find a backslash or newline
353 (r'[^\\\n]+', Text, 'textline'),
354 default('textline'),
355 ],
356 'textline': [
357 include('escapes'),
358 (r'[^\\\n]+', Text),
359 (r'\n', Text, '#pop'),
360 ],
361 'escapes': [
362 # groff has many ways to write escapes.
363 (r'\\"[^\n]*', Comment),
364 (r'\\[fn]\w', String.Escape),
365 (r'\\\(.{2}', String.Escape),
366 (r'\\.\[.*\]', String.Escape),
367 (r'\\.', String.Escape),
368 (r'\\\n', Text, 'request'),
369 ],
370 'request': [
371 (r'\n', Text, '#pop'),
372 include('escapes'),
373 (r'"[^\n"]+"', String.Double),
374 (r'\d+', Number),
375 (r'\S+', String),
376 (r'\s+', Text),
377 ],
378 }
380 def analyse_text(text):
381 if text[:1] != '.':
382 return False
383 if text[:3] == '.\\"':
384 return True
385 if text[:4] == '.TH ':
386 return True
387 if text[1:3].isalnum() and text[3].isspace():
388 return 0.9
391class MozPreprocHashLexer(RegexLexer):
392 """
393 Lexer for Mozilla Preprocessor files (with '#' as the marker).
395 Other data is left untouched.
397 .. versionadded:: 2.0
398 """
399 name = 'mozhashpreproc'
400 aliases = [name]
401 filenames = []
402 mimetypes = []
404 tokens = {
405 'root': [
406 (r'^#', Comment.Preproc, ('expr', 'exprstart')),
407 (r'.+', Other),
408 ],
409 'exprstart': [
410 (r'(literal)(.*)', bygroups(Comment.Preproc, Text), '#pop:2'),
411 (words((
412 'define', 'undef', 'if', 'ifdef', 'ifndef', 'else', 'elif',
413 'elifdef', 'elifndef', 'endif', 'expand', 'filter', 'unfilter',
414 'include', 'includesubst', 'error')),
415 Comment.Preproc, '#pop'),
416 ],
417 'expr': [
418 (words(('!', '!=', '==', '&&', '||')), Operator),
419 (r'(defined)(\()', bygroups(Keyword, Punctuation)),
420 (r'\)', Punctuation),
421 (r'[0-9]+', Number.Decimal),
422 (r'__\w+?__', Name.Variable),
423 (r'@\w+?@', Name.Class),
424 (r'\w+', Name),
425 (r'\n', Text, '#pop'),
426 (r'\s+', Text),
427 (r'\S', Punctuation),
428 ],
429 }
432class MozPreprocPercentLexer(MozPreprocHashLexer):
433 """
434 Lexer for Mozilla Preprocessor files (with '%' as the marker).
436 Other data is left untouched.
438 .. versionadded:: 2.0
439 """
440 name = 'mozpercentpreproc'
441 aliases = [name]
442 filenames = []
443 mimetypes = []
445 tokens = {
446 'root': [
447 (r'^%', Comment.Preproc, ('expr', 'exprstart')),
448 (r'.+', Other),
449 ],
450 }
453class MozPreprocXulLexer(DelegatingLexer):
454 """
455 Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the
456 `XmlLexer`.
458 .. versionadded:: 2.0
459 """
460 name = "XUL+mozpreproc"
461 aliases = ['xul+mozpreproc']
462 filenames = ['*.xul.in']
463 mimetypes = []
465 def __init__(self, **options):
466 super().__init__(XmlLexer, MozPreprocHashLexer, **options)
469class MozPreprocJavascriptLexer(DelegatingLexer):
470 """
471 Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the
472 `JavascriptLexer`.
474 .. versionadded:: 2.0
475 """
476 name = "Javascript+mozpreproc"
477 aliases = ['javascript+mozpreproc']
478 filenames = ['*.js.in']
479 mimetypes = []
481 def __init__(self, **options):
482 super().__init__(JavascriptLexer, MozPreprocHashLexer, **options)
485class MozPreprocCssLexer(DelegatingLexer):
486 """
487 Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the
488 `CssLexer`.
490 .. versionadded:: 2.0
491 """
492 name = "CSS+mozpreproc"
493 aliases = ['css+mozpreproc']
494 filenames = ['*.css.in']
495 mimetypes = []
497 def __init__(self, **options):
498 super().__init__(CssLexer, MozPreprocPercentLexer, **options)
501class MarkdownLexer(RegexLexer):
502 """
503 For Markdown markup.
505 .. versionadded:: 2.2
506 """
507 name = 'Markdown'
508 url = 'https://daringfireball.net/projects/markdown/'
509 aliases = ['markdown', 'md']
510 filenames = ['*.md', '*.markdown']
511 mimetypes = ["text/x-markdown"]
512 flags = re.MULTILINE
514 def _handle_codeblock(self, match):
515 """
516 match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks
517 """
518 from pygments.lexers import get_lexer_by_name
520 # section header
521 yield match.start(1), String.Backtick, match.group(1)
522 yield match.start(2), String.Backtick, match.group(2)
523 yield match.start(3), Text , match.group(3)
525 # lookup lexer if wanted and existing
526 lexer = None
527 if self.handlecodeblocks:
528 try:
529 lexer = get_lexer_by_name( match.group(2).strip() )
530 except ClassNotFound:
531 pass
532 code = match.group(4)
534 # no lexer for this language. handle it like it was a code block
535 if lexer is None:
536 yield match.start(4), String, code
537 else:
538 yield from do_insertions([], lexer.get_tokens_unprocessed(code))
540 yield match.start(5), String.Backtick, match.group(5)
542 tokens = {
543 'root': [
544 # heading with '#' prefix (atx-style)
545 (r'(^#[^#].+)(\n)', bygroups(Generic.Heading, Text)),
546 # subheading with '#' prefix (atx-style)
547 (r'(^#{2,6}[^#].+)(\n)', bygroups(Generic.Subheading, Text)),
548 # heading with '=' underlines (Setext-style)
549 (r'^(.+)(\n)(=+)(\n)', bygroups(Generic.Heading, Text, Generic.Heading, Text)),
550 # subheading with '-' underlines (Setext-style)
551 (r'^(.+)(\n)(-+)(\n)', bygroups(Generic.Subheading, Text, Generic.Subheading, Text)),
552 # task list
553 (r'^(\s*)([*-] )(\[[ xX]\])( .+\n)',
554 bygroups(Whitespace, Keyword, Keyword, using(this, state='inline'))),
555 # bulleted list
556 (r'^(\s*)([*-])(\s)(.+\n)',
557 bygroups(Whitespace, Keyword, Whitespace, using(this, state='inline'))),
558 # numbered list
559 (r'^(\s*)([0-9]+\.)( .+\n)',
560 bygroups(Whitespace, Keyword, using(this, state='inline'))),
561 # quote
562 (r'^(\s*>\s)(.+\n)', bygroups(Keyword, Generic.Emph)),
563 # code block fenced by 3 backticks
564 (r'^(\s*```\n[\w\W]*?^\s*```$\n)', String.Backtick),
565 # code block with language
566 (r'^(\s*```)(\w+)(\n)([\w\W]*?)(^\s*```$\n)', _handle_codeblock),
568 include('inline'),
569 ],
570 'inline': [
571 # escape
572 (r'\\.', Text),
573 # inline code
574 (r'([^`]?)(`[^`\n]+`)', bygroups(Text, String.Backtick)),
575 # warning: the following rules eat outer tags.
576 # eg. **foo _bar_ baz** => foo and baz are not recognized as bold
577 # bold fenced by '**'
578 (r'([^\*]?)(\*\*[^* \n][^*\n]*\*\*)', bygroups(Text, Generic.Strong)),
579 # bold fenced by '__'
580 (r'([^_]?)(__[^_ \n][^_\n]*__)', bygroups(Text, Generic.Strong)),
581 # italics fenced by '*'
582 (r'([^\*]?)(\*[^* \n][^*\n]*\*)', bygroups(Text, Generic.Emph)),
583 # italics fenced by '_'
584 (r'([^_]?)(_[^_ \n][^_\n]*_)', bygroups(Text, Generic.Emph)),
585 # strikethrough
586 (r'([^~]?)(~~[^~ \n][^~\n]*~~)', bygroups(Text, Generic.Deleted)),
587 # mentions and topics (twitter and github stuff)
588 (r'[@#][\w/:]+', Name.Entity),
589 # (image?) links eg: 
590 (r'(!?\[)([^]]+)(\])(\()([^)]+)(\))',
591 bygroups(Text, Name.Tag, Text, Text, Name.Attribute, Text)),
592 # reference-style links, e.g.:
593 # [an example][id]
594 # [id]: http://example.com/
595 (r'(\[)([^]]+)(\])(\[)([^]]*)(\])',
596 bygroups(Text, Name.Tag, Text, Text, Name.Label, Text)),
597 (r'^(\s*\[)([^]]*)(\]:\s*)(.+)',
598 bygroups(Text, Name.Label, Text, Name.Attribute)),
600 # general text, must come last!
601 (r'[^\\\s]+', Text),
602 (r'.', Text),
603 ],
604 }
606 def __init__(self, **options):
607 self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
608 RegexLexer.__init__(self, **options)
611class TiddlyWiki5Lexer(RegexLexer):
612 """
613 For TiddlyWiki5 markup.
615 .. versionadded:: 2.7
616 """
617 name = 'tiddler'
618 url = 'https://tiddlywiki.com/#TiddlerFiles'
619 aliases = ['tid']
620 filenames = ['*.tid']
621 mimetypes = ["text/vnd.tiddlywiki"]
622 flags = re.MULTILINE
624 def _handle_codeblock(self, match):
625 """
626 match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks
627 """
628 from pygments.lexers import get_lexer_by_name
630 # section header
631 yield match.start(1), String, match.group(1)
632 yield match.start(2), String, match.group(2)
633 yield match.start(3), Text, match.group(3)
635 # lookup lexer if wanted and existing
636 lexer = None
637 if self.handlecodeblocks:
638 try:
639 lexer = get_lexer_by_name(match.group(2).strip())
640 except ClassNotFound:
641 pass
642 code = match.group(4)
644 # no lexer for this language. handle it like it was a code block
645 if lexer is None:
646 yield match.start(4), String, code
647 return
649 yield from do_insertions([], lexer.get_tokens_unprocessed(code))
651 yield match.start(5), String, match.group(5)
653 def _handle_cssblock(self, match):
654 """
655 match args: 1:style tag 2:newline, 3:code, 4:closing style tag
656 """
657 from pygments.lexers import get_lexer_by_name
659 # section header
660 yield match.start(1), String, match.group(1)
661 yield match.start(2), String, match.group(2)
663 lexer = None
664 if self.handlecodeblocks:
665 try:
666 lexer = get_lexer_by_name('css')
667 except ClassNotFound:
668 pass
669 code = match.group(3)
671 # no lexer for this language. handle it like it was a code block
672 if lexer is None:
673 yield match.start(3), String, code
674 return
676 yield from do_insertions([], lexer.get_tokens_unprocessed(code))
678 yield match.start(4), String, match.group(4)
680 tokens = {
681 'root': [
682 # title in metadata section
683 (r'^(title)(:\s)(.+\n)', bygroups(Keyword, Text, Generic.Heading)),
684 # headings
685 (r'^(!)([^!].+\n)', bygroups(Generic.Heading, Text)),
686 (r'^(!{2,6})(.+\n)', bygroups(Generic.Subheading, Text)),
687 # bulleted or numbered lists or single-line block quotes
688 # (can be mixed)
689 (r'^(\s*)([*#>]+)(\s*)(.+\n)',
690 bygroups(Text, Keyword, Text, using(this, state='inline'))),
691 # multi-line block quotes
692 (r'^(<<<.*\n)([\w\W]*?)(^<<<.*$)', bygroups(String, Text, String)),
693 # table header
694 (r'^(\|.*?\|h)$', bygroups(Generic.Strong)),
695 # table footer or caption
696 (r'^(\|.*?\|[cf])$', bygroups(Generic.Emph)),
697 # table class
698 (r'^(\|.*?\|k)$', bygroups(Name.Tag)),
699 # definitions
700 (r'^(;.*)$', bygroups(Generic.Strong)),
701 # text block
702 (r'^(```\n)([\w\W]*?)(^```$)', bygroups(String, Text, String)),
703 # code block with language
704 (r'^(```)(\w+)(\n)([\w\W]*?)(^```$)', _handle_codeblock),
705 # CSS style block
706 (r'^(<style>)(\n)([\w\W]*?)(^</style>$)', _handle_cssblock),
708 include('keywords'),
709 include('inline'),
710 ],
711 'keywords': [
712 (words((
713 '\\define', '\\end', 'caption', 'created', 'modified', 'tags',
714 'title', 'type'), prefix=r'^', suffix=r'\b'),
715 Keyword),
716 ],
717 'inline': [
718 # escape
719 (r'\\.', Text),
720 # created or modified date
721 (r'\d{17}', Number.Integer),
722 # italics
723 (r'(\s)(//[^/]+//)((?=\W|\n))',
724 bygroups(Text, Generic.Emph, Text)),
725 # superscript
726 (r'(\s)(\^\^[^\^]+\^\^)', bygroups(Text, Generic.Emph)),
727 # subscript
728 (r'(\s)(,,[^,]+,,)', bygroups(Text, Generic.Emph)),
729 # underscore
730 (r'(\s)(__[^_]+__)', bygroups(Text, Generic.Strong)),
731 # bold
732 (r"(\s)(''[^']+'')((?=\W|\n))",
733 bygroups(Text, Generic.Strong, Text)),
734 # strikethrough
735 (r'(\s)(~~[^~]+~~)((?=\W|\n))',
736 bygroups(Text, Generic.Deleted, Text)),
737 # TiddlyWiki variables
738 (r'<<[^>]+>>', Name.Tag),
739 (r'\$\$[^$]+\$\$', Name.Tag),
740 (r'\$\([^)]+\)\$', Name.Tag),
741 # TiddlyWiki style or class
742 (r'^@@.*$', Name.Tag),
743 # HTML tags
744 (r'</?[^>]+>', Name.Tag),
745 # inline code
746 (r'`[^`]+`', String.Backtick),
747 # HTML escaped symbols
748 (r'&\S*?;', String.Regex),
749 # Wiki links
750 (r'(\[{2})([^]\|]+)(\]{2})', bygroups(Text, Name.Tag, Text)),
751 # External links
752 (r'(\[{2})([^]\|]+)(\|)([^]\|]+)(\]{2})',
753 bygroups(Text, Name.Tag, Text, Name.Attribute, Text)),
754 # Transclusion
755 (r'(\{{2})([^}]+)(\}{2})', bygroups(Text, Name.Tag, Text)),
756 # URLs
757 (r'(\b.?.?tps?://[^\s"]+)', bygroups(Name.Attribute)),
759 # general text, must come last!
760 (r'[\w]+', Text),
761 (r'.', Text)
762 ],
763 }
765 def __init__(self, **options):
766 self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
767 RegexLexer.__init__(self, **options)
770class WikitextLexer(RegexLexer):
771 """
772 For MediaWiki Wikitext.
774 Parsing Wikitext is tricky, and results vary between different MediaWiki
775 installations, so we only highlight common syntaxes (built-in or from
776 popular extensions), and also assume templates produce no unbalanced
777 syntaxes.
779 .. versionadded:: 2.15
780 """
781 name = 'Wikitext'
782 url = 'https://www.mediawiki.org/wiki/Wikitext'
783 aliases = ['wikitext', 'mediawiki']
784 filenames = []
785 mimetypes = ['text/x-wiki']
786 flags = re.MULTILINE
788 def nowiki_tag_rules(tag_name):
789 return [
790 (r'(?i)(</)({})(\s*)(>)'.format(tag_name), bygroups(Punctuation,
791 Name.Tag, Whitespace, Punctuation), '#pop'),
792 include('entity'),
793 include('text'),
794 ]
796 def plaintext_tag_rules(tag_name):
797 return [
798 (r'(?si)(.*?)(</)({})(\s*)(>)'.format(tag_name), bygroups(Text,
799 Punctuation, Name.Tag, Whitespace, Punctuation), '#pop'),
800 ]
802 def delegate_tag_rules(tag_name, lexer):
803 return [
804 (r'(?i)(</)({})(\s*)(>)'.format(tag_name), bygroups(Punctuation,
805 Name.Tag, Whitespace, Punctuation), '#pop'),
806 (r'(?si).+?(?=</{}\s*>)'.format(tag_name), using(lexer)),
807 ]
809 def text_rules(token):
810 return [
811 (r'\w+', token),
812 (r'[^\S\n]+', token),
813 (r'(?s).', token),
814 ]
816 def handle_syntaxhighlight(self, match, ctx):
817 from pygments.lexers import get_lexer_by_name
819 attr_content = match.group()
820 start = 0
821 index = 0
822 while True:
823 index = attr_content.find('>', start)
824 # Exclude comment end (-->)
825 if attr_content[index-2:index] != '--':
826 break
827 start = index + 1
829 if index == -1:
830 # No tag end
831 yield from self.get_tokens_unprocessed(attr_content, stack=['root', 'attr'])
832 return
833 attr = attr_content[:index]
834 yield from self.get_tokens_unprocessed(attr, stack=['root', 'attr'])
835 yield match.start(3) + index, Punctuation, '>'
837 lexer = None
838 content = attr_content[index+1:]
839 lang_match = re.findall(r'\blang=("|\'|)(\w+)(\1)', attr)
841 if len(lang_match) >= 1:
842 # Pick the last match in case of multiple matches
843 lang = lang_match[-1][1]
844 try:
845 lexer = get_lexer_by_name(lang)
846 except ClassNotFound:
847 pass
849 if lexer is None:
850 yield match.start() + index + 1, Text, content
851 else:
852 yield from lexer.get_tokens_unprocessed(content)
854 def handle_score(self, match, ctx):
855 attr_content = match.group()
856 start = 0
857 index = 0
858 while True:
859 index = attr_content.find('>', start)
860 # Exclude comment end (-->)
861 if attr_content[index-2:index] != '--':
862 break
863 start = index + 1
865 if index == -1:
866 # No tag end
867 yield from self.get_tokens_unprocessed(attr_content, stack=['root', 'attr'])
868 return
869 attr = attr_content[:index]
870 content = attr_content[index+1:]
871 yield from self.get_tokens_unprocessed(attr, stack=['root', 'attr'])
872 yield match.start(3) + index, Punctuation, '>'
874 lang_match = re.findall(r'\blang=("|\'|)(\w+)(\1)', attr)
875 # Pick the last match in case of multiple matches
876 lang = lang_match[-1][1] if len(lang_match) >= 1 else 'lilypond'
878 if lang == 'lilypond': # Case sensitive
879 yield from LilyPondLexer().get_tokens_unprocessed(content)
880 else: # ABC
881 # FIXME: Use ABC lexer in the future
882 yield match.start() + index + 1, Text, content
884 # a-z removed to prevent linter from complaining, REMEMBER to use (?i)
885 title_char = r' %!"$&\'()*,\-./0-9:;=?@A-Z\\\^_`~+\u0080-\uFFFF'
886 nbsp_char = r'(?:\t| |&\#0*160;|&\#[Xx]0*[Aa]0;|[ \xA0\u1680\u2000-\u200A\u202F\u205F\u3000])'
887 link_address = r'(?:[0-9.]+|\[[0-9a-f:.]+\]|[^\x00-\x20"<>\[\]\x7F\xA0\u1680\u2000-\u200A\u202F\u205F\u3000\uFFFD])'
888 link_char_class = r'[^\x00-\x20"<>\[\]\x7F\xA0\u1680\u2000-\u200A\u202F\u205F\u3000\uFFFD]'
889 double_slashes_i = {
890 '__FORCETOC__', '__NOCONTENTCONVERT__', '__NOCC__', '__NOEDITSECTION__', '__NOGALLERY__',
891 '__NOTITLECONVERT__', '__NOTC__', '__NOTOC__', '__TOC__',
892 }
893 double_slashes = {
894 '__EXPECTUNUSEDCATEGORY__', '__HIDDENCAT__', '__INDEX__', '__NEWSECTIONLINK__',
895 '__NOINDEX__', '__NONEWSECTIONLINK__', '__STATICREDIRECT__', '__NOGLOBAL__',
896 '__DISAMBIG__', '__EXPECTED_UNCONNECTED_PAGE__',
897 }
898 protocols = {
899 'bitcoin:', 'ftp://', 'ftps://', 'geo:', 'git://', 'gopher://', 'http://', 'https://',
900 'irc://', 'ircs://', 'magnet:', 'mailto:', 'mms://', 'news:', 'nntp://', 'redis://',
901 'sftp://', 'sip:', 'sips:', 'sms:', 'ssh://', 'svn://', 'tel:', 'telnet://', 'urn:',
902 'worldwind://', 'xmpp:', '//',
903 }
904 non_relative_protocols = protocols - {'//'}
905 html_tags = {
906 'abbr', 'b', 'bdi', 'bdo', 'big', 'blockquote', 'br', 'caption', 'center', 'cite', 'code',
907 'data', 'dd', 'del', 'dfn', 'div', 'dl', 'dt', 'em', 'font', 'h1', 'h2', 'h3', 'h4', 'h5',
908 'h6', 'hr', 'i', 'ins', 'kbd', 'li', 'link', 'mark', 'meta', 'ol', 'p', 'q', 'rb', 'rp',
909 'rt', 'rtc', 'ruby', 's', 'samp', 'small', 'span', 'strike', 'strong', 'sub', 'sup',
910 'table', 'td', 'th', 'time', 'tr', 'tt', 'u', 'ul', 'var', 'wbr',
911 }
912 parser_tags = {
913 'graph', 'charinsert', 'rss', 'chem', 'categorytree', 'nowiki', 'inputbox', 'math',
914 'hiero', 'score', 'pre', 'ref', 'translate', 'imagemap', 'templatestyles', 'languages',
915 'noinclude', 'mapframe', 'section', 'poem', 'syntaxhighlight', 'includeonly', 'tvar',
916 'onlyinclude', 'templatedata', 'langconvert', 'timeline', 'dynamicpagelist', 'gallery',
917 'maplink', 'ce', 'references',
918 }
919 variant_langs = {
920 # ZhConverter.php
921 'zh', 'zh-hans', 'zh-hant', 'zh-cn', 'zh-hk', 'zh-mo', 'zh-my', 'zh-sg', 'zh-tw',
922 # UnConverter.php
923 'uz', 'uz-latn', 'uz-cyrl',
924 # TlyConverter.php
925 'tly', 'tly-cyrl',
926 # TgConverter.php
927 'tg', 'tg-latn',
928 # SrConverter.php
929 'sr', 'sr-ec', 'sr-el',
930 # ShiConverter.php
931 'shi', 'shi-tfng', 'shi-latn',
932 # ShConverter.php
933 'sh-latn', 'sh-cyrl',
934 # KuConverter.php
935 'ku', 'ku-arab', 'ku-latn',
936 # KkConverter.php
937 'kk', 'kk-cyrl', 'kk-latn', 'kk-arab', 'kk-kz', 'kk-tr', 'kk-cn',
938 # IuConverter.php
939 'iu', 'ike-cans', 'ike-latn',
940 # GanConverter.php
941 'gan', 'gan-hans', 'gan-hant',
942 # EnConverter.php
943 'en', 'en-x-piglatin',
944 # CrhConverter.php
945 'crh', 'crh-cyrl', 'crh-latn',
946 # BanConverter.php
947 'ban', 'ban-bali', 'ban-x-dharma', 'ban-x-palmleaf', 'ban-x-pku',
948 }
949 magic_vars_i = {
950 'ARTICLEPATH', 'INT', 'PAGEID', 'SCRIPTPATH', 'SERVER', 'SERVERNAME', 'STYLEPATH',
951 }
952 magic_vars = {
953 '!', '=', 'BASEPAGENAME', 'BASEPAGENAMEE', 'CASCADINGSOURCES', 'CONTENTLANGUAGE',
954 'CONTENTLANG', 'CURRENTDAY', 'CURRENTDAY2', 'CURRENTDAYNAME', 'CURRENTDOW', 'CURRENTHOUR',
955 'CURRENTMONTH', 'CURRENTMONTH2', 'CURRENTMONTH1', 'CURRENTMONTHABBREV', 'CURRENTMONTHNAME',
956 'CURRENTMONTHNAMEGEN', 'CURRENTTIME', 'CURRENTTIMESTAMP', 'CURRENTVERSION', 'CURRENTWEEK',
957 'CURRENTYEAR', 'DIRECTIONMARK', 'DIRMARK', 'FULLPAGENAME', 'FULLPAGENAMEE', 'LOCALDAY',
958 'LOCALDAY2', 'LOCALDAYNAME', 'LOCALDOW', 'LOCALHOUR', 'LOCALMONTH', 'LOCALMONTH2',
959 'LOCALMONTH1', 'LOCALMONTHABBREV', 'LOCALMONTHNAME', 'LOCALMONTHNAMEGEN', 'LOCALTIME',
960 'LOCALTIMESTAMP', 'LOCALWEEK', 'LOCALYEAR', 'NAMESPACE', 'NAMESPACEE', 'NAMESPACENUMBER',
961 'NUMBEROFACTIVEUSERS', 'NUMBEROFADMINS', 'NUMBEROFARTICLES', 'NUMBEROFEDITS',
962 'NUMBEROFFILES', 'NUMBEROFPAGES', 'NUMBEROFUSERS', 'PAGELANGUAGE', 'PAGENAME', 'PAGENAMEE',
963 'REVISIONDAY', 'REVISIONDAY2', 'REVISIONID', 'REVISIONMONTH', 'REVISIONMONTH1',
964 'REVISIONSIZE', 'REVISIONTIMESTAMP', 'REVISIONUSER', 'REVISIONYEAR', 'ROOTPAGENAME',
965 'ROOTPAGENAMEE', 'SITENAME', 'SUBJECTPAGENAME', 'ARTICLEPAGENAME', 'SUBJECTPAGENAMEE',
966 'ARTICLEPAGENAMEE', 'SUBJECTSPACE', 'ARTICLESPACE', 'SUBJECTSPACEE', 'ARTICLESPACEE',
967 'SUBPAGENAME', 'SUBPAGENAMEE', 'TALKPAGENAME', 'TALKPAGENAMEE', 'TALKSPACE', 'TALKSPACEE',
968 }
969 parser_functions_i = {
970 'ANCHORENCODE', 'BIDI', 'CANONICALURL', 'CANONICALURLE', 'FILEPATH', 'FORMATNUM',
971 'FULLURL', 'FULLURLE', 'GENDER', 'GRAMMAR', 'INT', r'\#LANGUAGE', 'LC', 'LCFIRST', 'LOCALURL',
972 'LOCALURLE', 'NS', 'NSE', 'PADLEFT', 'PADRIGHT', 'PAGEID', 'PLURAL', 'UC', 'UCFIRST',
973 'URLENCODE',
974 }
975 parser_functions = {
976 'BASEPAGENAME', 'BASEPAGENAMEE', 'CASCADINGSOURCES', 'DEFAULTSORT', 'DEFAULTSORTKEY',
977 'DEFAULTCATEGORYSORT', 'FULLPAGENAME', 'FULLPAGENAMEE', 'NAMESPACE', 'NAMESPACEE',
978 'NAMESPACENUMBER', 'NUMBERINGROUP', 'NUMINGROUP', 'NUMBEROFACTIVEUSERS', 'NUMBEROFADMINS',
979 'NUMBEROFARTICLES', 'NUMBEROFEDITS', 'NUMBEROFFILES', 'NUMBEROFPAGES', 'NUMBEROFUSERS',
980 'PAGENAME', 'PAGENAMEE', 'PAGESINCATEGORY', 'PAGESINCAT', 'PAGESIZE', 'PROTECTIONEXPIRY',
981 'PROTECTIONLEVEL', 'REVISIONDAY', 'REVISIONDAY2', 'REVISIONID', 'REVISIONMONTH',
982 'REVISIONMONTH1', 'REVISIONTIMESTAMP', 'REVISIONUSER', 'REVISIONYEAR', 'ROOTPAGENAME',
983 'ROOTPAGENAMEE', 'SUBJECTPAGENAME', 'ARTICLEPAGENAME', 'SUBJECTPAGENAMEE',
984 'ARTICLEPAGENAMEE', 'SUBJECTSPACE', 'ARTICLESPACE', 'SUBJECTSPACEE', 'ARTICLESPACEE',
985 'SUBPAGENAME', 'SUBPAGENAMEE', 'TALKPAGENAME', 'TALKPAGENAMEE', 'TALKSPACE', 'TALKSPACEE',
986 'INT', 'DISPLAYTITLE', 'PAGESINNAMESPACE', 'PAGESINNS',
987 }
989 tokens = {
990 'root': [
991 # Redirects
992 (r"""(?xi)
993 (\A\s*?)(\#REDIRECT:?) # may contain a colon
994 (\s+)(\[\[) (?=[^\]\n]* \]\]$)
995 """,
996 bygroups(Whitespace, Keyword, Whitespace, Punctuation), 'redirect-inner'),
997 # Subheadings
998 (r'^(={2,6})(.+?)(\1)(\s*$\n)',
999 bygroups(Generic.Subheading, Generic.Subheading, Generic.Subheading, Whitespace)),
1000 # Headings
1001 (r'^(=.+?=)(\s*$\n)',
1002 bygroups(Generic.Heading, Whitespace)),
1003 # Double-slashed magic words
1004 (words(double_slashes_i, prefix=r'(?i)'), Name.Function.Magic),
1005 (words(double_slashes), Name.Function.Magic),
1006 # Raw URLs
1007 (r'(?i)\b(?:{}){}{}*'.format('|'.join(protocols),
1008 link_address, link_char_class), Name.Label),
1009 # Magic links
1010 (r'\b(?:RFC|PMID){}+[0-9]+\b'.format(nbsp_char),
1011 Name.Function.Magic),
1012 (r"""(?x)
1013 \bISBN {nbsp_char}
1014 (?: 97[89] {nbsp_dash}? )?
1015 (?: [0-9] {nbsp_dash}? ){{9}} # escape format()
1016 [0-9Xx]\b
1017 """.format(nbsp_char=nbsp_char, nbsp_dash=f'(?:-|{nbsp_char})'), Name.Function.Magic),
1018 include('list'),
1019 include('inline'),
1020 include('text'),
1021 ],
1022 'redirect-inner': [
1023 (r'(\]\])(\s*?\n)', bygroups(Punctuation, Whitespace), '#pop'),
1024 (r'(\#)([^#]*?)', bygroups(Punctuation, Name.Label)),
1025 (r'(?i)[{}]+'.format(title_char), Name.Tag),
1026 ],
1027 'list': [
1028 # Description lists
1029 (r'^;', Keyword, 'dt'),
1030 # Ordered lists, unordered lists and indents
1031 (r'^[#:*]+', Keyword),
1032 # Horizontal rules
1033 (r'^-{4,}', Keyword),
1034 ],
1035 'inline': [
1036 # Signatures
1037 (r'~{3,5}', Keyword),
1038 # Entities
1039 include('entity'),
1040 # Bold & italic
1041 (r"('')(''')(?!')", bygroups(Generic.Emph,
1042 Generic.Strong), 'inline-italic-bold'),
1043 (r"'''(?!')", Generic.Strong, 'inline-bold'),
1044 (r"''(?!')", Generic.Emph, 'inline-italic'),
1045 # Comments & parameters & templates
1046 include('replaceable'),
1047 # Media links
1048 (
1049 r"""(?xi)
1050 (\[\[)
1051 (File|Image) (:)
1052 ([{}]*)
1053 (?: (\#) ([{}]*?) )?
1054 """.format(title_char, f'{title_char}#'),
1055 bygroups(Punctuation, Name.Namespace, Punctuation,
1056 Name.Tag, Punctuation, Name.Label),
1057 'medialink-inner'
1058 ),
1059 # Wikilinks
1060 (
1061 r"""(?xi)
1062 (\[\[)(?!{}) # Should not contain URLs
1063 (?: ([{}]*) (:))?
1064 ([{}]*?)
1065 (?: (\#) ([{}]*?) )?
1066 (\]\])
1067 """.format('|'.join(protocols), title_char.replace('/', ''),
1068 title_char, f'{title_char}#'),
1069 bygroups(Punctuation, Name.Namespace, Punctuation,
1070 Name.Tag, Punctuation, Name.Label, Punctuation)
1071 ),
1072 (
1073 r"""(?xi)
1074 (\[\[)(?!{})
1075 (?: ([{}]*) (:))?
1076 ([{}]*?)
1077 (?: (\#) ([{}]*?) )?
1078 (\|)
1079 """.format('|'.join(protocols), title_char.replace('/', ''),
1080 title_char, f'{title_char}#'),
1081 bygroups(Punctuation, Name.Namespace, Punctuation,
1082 Name.Tag, Punctuation, Name.Label, Punctuation),
1083 'wikilink-inner'
1084 ),
1085 # External links
1086 (
1087 r"""(?xi)
1088 (\[)
1089 ((?:{}) {} {}*)
1090 (\s*)
1091 """.format('|'.join(protocols), link_address, link_char_class),
1092 bygroups(Punctuation, Name.Label, Whitespace),
1093 'extlink-inner'
1094 ),
1095 # Tables
1096 (r'^(:*)(\s*?)(\{\|)([^\n]*)$', bygroups(Keyword,
1097 Whitespace, Punctuation, using(this, state=['root', 'attr'])), 'table'),
1098 # HTML tags
1099 (r'(?i)(<)({})\b'.format('|'.join(html_tags)),
1100 bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'),
1101 (r'(?i)(</)({})\b(\s*)(>)'.format('|'.join(html_tags)),
1102 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),
1103 # <nowiki>
1104 (r'(?i)(<)(nowiki)\b', bygroups(Punctuation,
1105 Name.Tag), ('tag-nowiki', 'tag-inner')),
1106 # <pre>
1107 (r'(?i)(<)(pre)\b', bygroups(Punctuation,
1108 Name.Tag), ('tag-pre', 'tag-inner')),
1109 # <categorytree>
1110 (r'(?i)(<)(categorytree)\b', bygroups(
1111 Punctuation, Name.Tag), ('tag-categorytree', 'tag-inner')),
1112 # <hiero>
1113 (r'(?i)(<)(hiero)\b', bygroups(Punctuation,
1114 Name.Tag), ('tag-hiero', 'tag-inner')),
1115 # <math>
1116 (r'(?i)(<)(math)\b', bygroups(Punctuation,
1117 Name.Tag), ('tag-math', 'tag-inner')),
1118 # <chem>
1119 (r'(?i)(<)(chem)\b', bygroups(Punctuation,
1120 Name.Tag), ('tag-chem', 'tag-inner')),
1121 # <ce>
1122 (r'(?i)(<)(ce)\b', bygroups(Punctuation,
1123 Name.Tag), ('tag-ce', 'tag-inner')),
1124 # <charinsert>
1125 (r'(?i)(<)(charinsert)\b', bygroups(
1126 Punctuation, Name.Tag), ('tag-charinsert', 'tag-inner')),
1127 # <templatedata>
1128 (r'(?i)(<)(templatedata)\b', bygroups(
1129 Punctuation, Name.Tag), ('tag-templatedata', 'tag-inner')),
1130 # <gallery>
1131 (r'(?i)(<)(gallery)\b', bygroups(
1132 Punctuation, Name.Tag), ('tag-gallery', 'tag-inner')),
1133 # <graph>
1134 (r'(?i)(<)(gallery)\b', bygroups(
1135 Punctuation, Name.Tag), ('tag-graph', 'tag-inner')),
1136 # <dynamicpagelist>
1137 (r'(?i)(<)(dynamicpagelist)\b', bygroups(
1138 Punctuation, Name.Tag), ('tag-dynamicpagelist', 'tag-inner')),
1139 # <inputbox>
1140 (r'(?i)(<)(inputbox)\b', bygroups(
1141 Punctuation, Name.Tag), ('tag-inputbox', 'tag-inner')),
1142 # <rss>
1143 (r'(?i)(<)(rss)\b', bygroups(
1144 Punctuation, Name.Tag), ('tag-rss', 'tag-inner')),
1145 # <imagemap>
1146 (r'(?i)(<)(imagemap)\b', bygroups(
1147 Punctuation, Name.Tag), ('tag-imagemap', 'tag-inner')),
1148 # <syntaxhighlight>
1149 (r'(?i)(</)(syntaxhighlight)\b(\s*)(>)',
1150 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),
1151 (r'(?si)(<)(syntaxhighlight)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)',
1152 bygroups(Punctuation, Name.Tag, handle_syntaxhighlight)),
1153 # <syntaxhighlight>: Fallback case for self-closing tags
1154 (r'(?i)(<)(syntaxhighlight)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups(
1155 Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)),
1156 # <source>
1157 (r'(?i)(</)(source)\b(\s*)(>)',
1158 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),
1159 (r'(?si)(<)(source)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)',
1160 bygroups(Punctuation, Name.Tag, handle_syntaxhighlight)),
1161 # <source>: Fallback case for self-closing tags
1162 (r'(?i)(<)(source)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups(
1163 Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)),
1164 # <score>
1165 (r'(?i)(</)(score)\b(\s*)(>)',
1166 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),
1167 (r'(?si)(<)(score)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)',
1168 bygroups(Punctuation, Name.Tag, handle_score)),
1169 # <score>: Fallback case for self-closing tags
1170 (r'(?i)(<)(score)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups(
1171 Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)),
1172 # Other parser tags
1173 (r'(?i)(<)({})\b'.format('|'.join(parser_tags)),
1174 bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'),
1175 (r'(?i)(</)({})\b(\s*)(>)'.format('|'.join(parser_tags)),
1176 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),
1177 # LanguageConverter markups
1178 (
1179 r"""(?xi)
1180 (-\{{) # Escape format()
1181 (?: ([^|]) (\|))?
1182 (?: (\s* (?:{variants}) \s*) (=>))?
1183 (\s* (?:{variants}) \s*) (:)
1184 """.format(variants='|'.join(variant_langs)),
1185 bygroups(Punctuation, Keyword, Punctuation,
1186 Name.Label, Operator, Name.Label, Punctuation),
1187 'lc-inner'
1188 ),
1189 (r'-\{', Punctuation, 'lc-raw'),
1190 ],
1191 'wikilink-inner': [
1192 # Quit in case of another wikilink
1193 (r'(?=\[\[)', Punctuation, '#pop'),
1194 (r'\]\]', Punctuation, '#pop'),
1195 include('inline'),
1196 include('text'),
1197 ],
1198 'medialink-inner': [
1199 (r'\]\]', Punctuation, '#pop'),
1200 (r'(\|)([^\n=|]*)(=)',
1201 bygroups(Punctuation, Name.Attribute, Operator)),
1202 (r'\|', Punctuation),
1203 include('inline'),
1204 include('text'),
1205 ],
1206 'quote-common': [
1207 # Quit in case of link/template endings
1208 (r'(?=\]\]|\{\{|\}\})', Punctuation, '#pop'),
1209 (r'\n', Text, '#pop'),
1210 ],
1211 'inline-italic': [
1212 include('quote-common'),
1213 (r"('')(''')(?!')", bygroups(Generic.Emph,
1214 Generic.Strong), ('#pop', 'inline-bold')),
1215 (r"'''(?!')", Generic.Strong, ('#pop', 'inline-italic-bold')),
1216 (r"''(?!')", Generic.Emph, '#pop'),
1217 include('inline'),
1218 include('text-italic'),
1219 ],
1220 'inline-bold': [
1221 include('quote-common'),
1222 (r"(''')('')(?!')", bygroups(
1223 Generic.Strong, Generic.Emph), ('#pop', 'inline-italic')),
1224 (r"'''(?!')", Generic.Strong, '#pop'),
1225 (r"''(?!')", Generic.Emph, ('#pop', 'inline-bold-italic')),
1226 include('inline'),
1227 include('text-bold'),
1228 ],
1229 'inline-bold-italic': [
1230 include('quote-common'),
1231 (r"('')(''')(?!')", bygroups(Generic.Emph,
1232 Generic.Strong), '#pop'),
1233 (r"'''(?!')", Generic.Strong, ('#pop', 'inline-italic')),
1234 (r"''(?!')", Generic.Emph, ('#pop', 'inline-bold')),
1235 include('inline'),
1236 include('text-italic'),
1237 ],
1238 'inline-italic-bold': [
1239 include('quote-common'),
1240 (r"(''')('')(?!')", bygroups(
1241 Generic.Strong, Generic.Emph), '#pop'),
1242 (r"'''(?!')", Generic.Strong, ('#pop', 'inline-italic')),
1243 (r"''(?!')", Generic.Emph, ('#pop', 'inline-bold')),
1244 include('text-bold'),
1245 ],
1246 'lc-inner': [
1247 (
1248 r"""(?xi)
1249 (;)
1250 (?: (\s* (?:{variants}) \s*) (=>))?
1251 (\s* (?:{variants}) \s*) (:)
1252 """.format(variants='|'.join(variant_langs)),
1253 bygroups(Punctuation, Name.Label,
1254 Operator, Name.Label, Punctuation)
1255 ),
1256 (r';?\s*?\}-', Punctuation, '#pop'),
1257 include('inline'),
1258 include('text'),
1259 ],
1260 'lc-raw': [
1261 (r'\}-', Punctuation, '#pop'),
1262 include('inline'),
1263 include('text'),
1264 ],
1265 'replaceable': [
1266 # Comments
1267 (r'<!--[\s\S]*?(?:-->|\Z)', Comment.Multiline),
1268 # Parameters
1269 (
1270 r"""(?x)
1271 (\{{3})
1272 ([^|]*?)
1273 (?=\}{3}|\|)
1274 """,
1275 bygroups(Punctuation, Name.Variable),
1276 'parameter-inner',
1277 ),
1278 # Magic variables
1279 (r'(?i)(\{\{)(\s*)(%s)(\s*)(\}\})' % '|'.join(magic_vars_i),
1280 bygroups(Punctuation, Whitespace, Name.Function, Whitespace, Punctuation)),
1281 (r'(\{\{)(\s*)(%s)(\s*)(\}\})' % '|'.join(magic_vars),
1282 bygroups(Punctuation, Whitespace, Name.Function, Whitespace, Punctuation)),
1283 # Parser functions & templates
1284 (r'\{\{', Punctuation, 'template-begin-space'),
1285 # <tvar> legacy syntax
1286 (r'(?i)(<)(tvar)\b(\|)([^>]*?)(>)', bygroups(Punctuation,
1287 Name.Tag, Punctuation, String, Punctuation)),
1288 (r'</>', Punctuation, '#pop'),
1289 # <tvar>
1290 (r'(?i)(<)(tvar)\b', bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'),
1291 (r'(?i)(</)(tvar)\b(\s*)(>)',
1292 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),
1293 ],
1294 'parameter-inner': [
1295 (r'\}{3}', Punctuation, '#pop'),
1296 (r'\|', Punctuation),
1297 include('inline'),
1298 include('text'),
1299 ],
1300 'template-begin-space': [
1301 # Templates allow line breaks at the beginning, and due to how MediaWiki handles
1302 # comments, an extra state is required to handle things like {{\n<!---->\n name}}
1303 (r'<!--[\s\S]*?(?:-->|\Z)', Comment.Multiline),
1304 (r'\s+', Whitespace),
1305 # Parser functions
1306 (
1307 r'(?i)(\#[%s]*?|%s)(:)' % (title_char,
1308 '|'.join(parser_functions_i)),
1309 bygroups(Name.Function, Punctuation), ('#pop', 'template-inner')
1310 ),
1311 (
1312 r'(%s)(:)' % ('|'.join(parser_functions)),
1313 bygroups(Name.Function, Punctuation), ('#pop', 'template-inner')
1314 ),
1315 # Templates
1316 (
1317 r'(?i)([%s]*?)(:)' % title_char,
1318 bygroups(Name.Namespace, Punctuation), ('#pop', 'template-name')
1319 ),
1320 default(('#pop', 'template-name'),),
1321 ],
1322 'template-name': [
1323 (r'(\s*?)(\|)', bygroups(Text, Punctuation), ('#pop', 'template-inner')),
1324 (r'\}\}', Punctuation, '#pop'),
1325 (r'\n', Text, '#pop'),
1326 include('replaceable'),
1327 *text_rules(Name.Tag),
1328 ],
1329 'template-inner': [
1330 (r'\}\}', Punctuation, '#pop'),
1331 (r'\|', Punctuation),
1332 (
1333 r"""(?x)
1334 (?<=\|)
1335 ( (?: (?! \{\{ | \}\} )[^=\|<])*? ) # Exclude templates and tags
1336 (=)
1337 """,
1338 bygroups(Name.Label, Operator)
1339 ),
1340 include('inline'),
1341 include('text'),
1342 ],
1343 'table': [
1344 # Use [ \t\n\r\0\x0B] instead of \s to follow PHP trim() behavior
1345 # Endings
1346 (r'^([ \t\n\r\0\x0B]*?)(\|\})',
1347 bygroups(Whitespace, Punctuation), '#pop'),
1348 # Table rows
1349 (r'^([ \t\n\r\0\x0B]*?)(\|-+)(.*)$', bygroups(Whitespace, Punctuation,
1350 using(this, state=['root', 'attr']))),
1351 # Captions
1352 (
1353 r"""(?x)
1354 ^([ \t\n\r\0\x0B]*?)(\|\+)
1355 # Exclude links, template and tags
1356 (?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|) )?
1357 (.*?)$
1358 """,
1359 bygroups(Whitespace, Punctuation, using(this, state=[
1360 'root', 'attr']), Punctuation, Generic.Heading),
1361 ),
1362 # Table data
1363 (
1364 r"""(?x)
1365 ( ^(?:[ \t\n\r\0\x0B]*?)\| | \|\| )
1366 (?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|)(?!\|) )?
1367 """,
1368 bygroups(Punctuation, using(this, state=[
1369 'root', 'attr']), Punctuation),
1370 ),
1371 # Table headers
1372 (
1373 r"""(?x)
1374 ( ^(?:[ \t\n\r\0\x0B]*?)! )
1375 (?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|)(?!\|) )?
1376 """,
1377 bygroups(Punctuation, using(this, state=[
1378 'root', 'attr']), Punctuation),
1379 'table-header',
1380 ),
1381 include('list'),
1382 include('inline'),
1383 include('text'),
1384 ],
1385 'table-header': [
1386 # Requires another state for || handling inside headers
1387 (r'\n', Text, '#pop'),
1388 (
1389 r"""(?x)
1390 (!!|\|\|)
1391 (?:
1392 ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )
1393 (\|)(?!\|)
1394 )?
1395 """,
1396 bygroups(Punctuation, using(this, state=[
1397 'root', 'attr']), Punctuation)
1398 ),
1399 *text_rules(Generic.Subheading),
1400 ],
1401 'entity': [
1402 (r'&\S*?;', Name.Entity),
1403 ],
1404 'dt': [
1405 (r'\n', Text, '#pop'),
1406 include('inline'),
1407 (r':', Keyword, '#pop'),
1408 include('text'),
1409 ],
1410 'extlink-inner': [
1411 (r'\]', Punctuation, '#pop'),
1412 include('inline'),
1413 include('text'),
1414 ],
1415 'nowiki-ish': [
1416 include('entity'),
1417 include('text'),
1418 ],
1419 'attr': [
1420 include('replaceable'),
1421 (r'\s+', Whitespace),
1422 (r'(=)(\s*)(")', bygroups(Operator, Whitespace, String.Double), 'attr-val-2'),
1423 (r"(=)(\s*)(')", bygroups(Operator, Whitespace, String.Single), 'attr-val-1'),
1424 (r'(=)(\s*)', bygroups(Operator, Whitespace), 'attr-val-0'),
1425 (r'[\w:-]+', Name.Attribute),
1427 ],
1428 'attr-val-0': [
1429 (r'\s', Whitespace, '#pop'),
1430 include('replaceable'),
1431 *text_rules(String),
1432 ],
1433 'attr-val-1': [
1434 (r"'", String.Single, '#pop'),
1435 include('replaceable'),
1436 *text_rules(String.Single),
1437 ],
1438 'attr-val-2': [
1439 (r'"', String.Double, '#pop'),
1440 include('replaceable'),
1441 *text_rules(String.Double),
1442 ],
1443 'tag-inner-ordinary': [
1444 (r'/?\s*>', Punctuation, '#pop'),
1445 include('tag-attr'),
1446 ],
1447 'tag-inner': [
1448 # Return to root state for self-closing tags
1449 (r'/\s*>', Punctuation, '#pop:2'),
1450 (r'\s*>', Punctuation, '#pop'),
1451 include('tag-attr'),
1452 ],
1453 # There states below are just like their non-tag variants, the key difference is
1454 # they forcibly quit when encountering tag closing markup
1455 'tag-attr': [
1456 include('replaceable'),
1457 (r'\s+', Whitespace),
1458 (r'(=)(\s*)(")', bygroups(Operator,
1459 Whitespace, String.Double), 'tag-attr-val-2'),
1460 (r"(=)(\s*)(')", bygroups(Operator,
1461 Whitespace, String.Single), 'tag-attr-val-1'),
1462 (r'(=)(\s*)', bygroups(Operator, Whitespace), 'tag-attr-val-0'),
1463 (r'[\w:-]+', Name.Attribute),
1465 ],
1466 'tag-attr-val-0': [
1467 (r'\s', Whitespace, '#pop'),
1468 (r'/?>', Punctuation, '#pop:2'),
1469 include('replaceable'),
1470 *text_rules(String),
1471 ],
1472 'tag-attr-val-1': [
1473 (r"'", String.Single, '#pop'),
1474 (r'/?>', Punctuation, '#pop:2'),
1475 include('replaceable'),
1476 *text_rules(String.Single),
1477 ],
1478 'tag-attr-val-2': [
1479 (r'"', String.Double, '#pop'),
1480 (r'/?>', Punctuation, '#pop:2'),
1481 include('replaceable'),
1482 *text_rules(String.Double),
1483 ],
1484 'tag-nowiki': nowiki_tag_rules('nowiki'),
1485 'tag-pre': nowiki_tag_rules('pre'),
1486 'tag-categorytree': plaintext_tag_rules('categorytree'),
1487 'tag-dynamicpagelist': plaintext_tag_rules('dynamicpagelist'),
1488 'tag-hiero': plaintext_tag_rules('hiero'),
1489 'tag-inputbox': plaintext_tag_rules('inputbox'),
1490 'tag-imagemap': plaintext_tag_rules('imagemap'),
1491 'tag-charinsert': plaintext_tag_rules('charinsert'),
1492 'tag-timeline': plaintext_tag_rules('timeline'),
1493 'tag-gallery': plaintext_tag_rules('gallery'),
1494 'tag-graph': plaintext_tag_rules('graph'),
1495 'tag-rss': plaintext_tag_rules('rss'),
1496 'tag-math': delegate_tag_rules('math', TexLexer),
1497 'tag-chem': delegate_tag_rules('chem', TexLexer),
1498 'tag-ce': delegate_tag_rules('ce', TexLexer),
1499 'tag-templatedata': delegate_tag_rules('templatedata', JsonLexer),
1500 'text-italic': text_rules(Generic.Emph),
1501 'text-bold': text_rules(Generic.Strong),
1502 'text': text_rules(Text),
1503 }