Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/markup.py: 56%
200 statements
« prev ^ index » next coverage.py v7.2.2, created at 2023-03-26 07:45 +0000
« prev ^ index » next coverage.py v7.2.2, created at 2023-03-26 07:45 +0000
1"""
2 pygments.lexers.markup
3 ~~~~~~~~~~~~~~~~~~~~~~
5 Lexers for non-HTML markup languages.
7 :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
11import re
13from pygments.lexers.html import XmlLexer
14from pygments.lexers.javascript import JavascriptLexer
15from pygments.lexers.css import CssLexer
17from pygments.lexer import RegexLexer, DelegatingLexer, include, bygroups, \
18 using, this, do_insertions, default, words
19from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
20 Number, Punctuation, Generic, Other, Whitespace
21from pygments.util import get_bool_opt, ClassNotFound
23__all__ = ['BBCodeLexer', 'MoinWikiLexer', 'RstLexer', 'TexLexer', 'GroffLexer',
24 'MozPreprocHashLexer', 'MozPreprocPercentLexer',
25 'MozPreprocXulLexer', 'MozPreprocJavascriptLexer',
26 'MozPreprocCssLexer', 'MarkdownLexer', 'TiddlyWiki5Lexer']
29class BBCodeLexer(RegexLexer):
30 """
31 A lexer that highlights BBCode(-like) syntax.
33 .. versionadded:: 0.6
34 """
36 name = 'BBCode'
37 aliases = ['bbcode']
38 mimetypes = ['text/x-bbcode']
40 tokens = {
41 'root': [
42 (r'[^[]+', Text),
43 # tag/end tag begin
44 (r'\[/?\w+', Keyword, 'tag'),
45 # stray bracket
46 (r'\[', Text),
47 ],
48 'tag': [
49 (r'\s+', Text),
50 # attribute with value
51 (r'(\w+)(=)("?[^\s"\]]+"?)',
52 bygroups(Name.Attribute, Operator, String)),
53 # tag argument (a la [color=green])
54 (r'(=)("?[^\s"\]]+"?)',
55 bygroups(Operator, String)),
56 # tag end
57 (r'\]', Keyword, '#pop'),
58 ],
59 }
62class MoinWikiLexer(RegexLexer):
63 """
64 For MoinMoin (and Trac) Wiki markup.
66 .. versionadded:: 0.7
67 """
69 name = 'MoinMoin/Trac Wiki markup'
70 aliases = ['trac-wiki', 'moin']
71 filenames = []
72 mimetypes = ['text/x-trac-wiki']
73 flags = re.MULTILINE | re.IGNORECASE
75 tokens = {
76 'root': [
77 (r'^#.*$', Comment),
78 (r'(!)(\S+)', bygroups(Keyword, Text)), # Ignore-next
79 # Titles
80 (r'^(=+)([^=]+)(=+)(\s*#.+)?$',
81 bygroups(Generic.Heading, using(this), Generic.Heading, String)),
82 # Literal code blocks, with optional shebang
83 (r'(\{\{\{)(\n#!.+)?', bygroups(Name.Builtin, Name.Namespace), 'codeblock'),
84 (r'(\'\'\'?|\|\||`|__|~~|\^|,,|::)', Comment), # Formatting
85 # Lists
86 (r'^( +)([.*-])( )', bygroups(Text, Name.Builtin, Text)),
87 (r'^( +)([a-z]{1,5}\.)( )', bygroups(Text, Name.Builtin, Text)),
88 # Other Formatting
89 (r'\[\[\w+.*?\]\]', Keyword), # Macro
90 (r'(\[[^\s\]]+)(\s+[^\]]+?)?(\])',
91 bygroups(Keyword, String, Keyword)), # Link
92 (r'^----+$', Keyword), # Horizontal rules
93 (r'[^\n\'\[{!_~^,|]+', Text),
94 (r'\n', Text),
95 (r'.', Text),
96 ],
97 'codeblock': [
98 (r'\}\}\}', Name.Builtin, '#pop'),
99 # these blocks are allowed to be nested in Trac, but not MoinMoin
100 (r'\{\{\{', Text, '#push'),
101 (r'[^{}]+', Comment.Preproc), # slurp boring text
102 (r'.', Comment.Preproc), # allow loose { or }
103 ],
104 }
107class RstLexer(RegexLexer):
108 """
109 For reStructuredText markup.
111 .. versionadded:: 0.7
113 Additional options accepted:
115 `handlecodeblocks`
116 Highlight the contents of ``.. sourcecode:: language``,
117 ``.. code:: language`` and ``.. code-block:: language``
118 directives with a lexer for the given language (default:
119 ``True``).
121 .. versionadded:: 0.8
122 """
123 name = 'reStructuredText'
124 url = 'https://docutils.sourceforge.io/rst.html'
125 aliases = ['restructuredtext', 'rst', 'rest']
126 filenames = ['*.rst', '*.rest']
127 mimetypes = ["text/x-rst", "text/prs.fallenstein.rst"]
128 flags = re.MULTILINE
130 def _handle_sourcecode(self, match):
131 from pygments.lexers import get_lexer_by_name
133 # section header
134 yield match.start(1), Punctuation, match.group(1)
135 yield match.start(2), Text, match.group(2)
136 yield match.start(3), Operator.Word, match.group(3)
137 yield match.start(4), Punctuation, match.group(4)
138 yield match.start(5), Text, match.group(5)
139 yield match.start(6), Keyword, match.group(6)
140 yield match.start(7), Text, match.group(7)
142 # lookup lexer if wanted and existing
143 lexer = None
144 if self.handlecodeblocks:
145 try:
146 lexer = get_lexer_by_name(match.group(6).strip())
147 except ClassNotFound:
148 pass
149 indention = match.group(8)
150 indention_size = len(indention)
151 code = (indention + match.group(9) + match.group(10) + match.group(11))
153 # no lexer for this language. handle it like it was a code block
154 if lexer is None:
155 yield match.start(8), String, code
156 return
158 # highlight the lines with the lexer.
159 ins = []
160 codelines = code.splitlines(True)
161 code = ''
162 for line in codelines:
163 if len(line) > indention_size:
164 ins.append((len(code), [(0, Text, line[:indention_size])]))
165 code += line[indention_size:]
166 else:
167 code += line
168 yield from do_insertions(ins, lexer.get_tokens_unprocessed(code))
170 # from docutils.parsers.rst.states
171 closers = '\'")]}>\u2019\u201d\xbb!?'
172 unicode_delimiters = '\u2010\u2011\u2012\u2013\u2014\u00a0'
173 end_string_suffix = (r'((?=$)|(?=[-/:.,; \n\x00%s%s]))'
174 % (re.escape(unicode_delimiters),
175 re.escape(closers)))
177 tokens = {
178 'root': [
179 # Heading with overline
180 (r'^(=+|-+|`+|:+|\.+|\'+|"+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)'
181 r'(.+)(\n)(\1)(\n)',
182 bygroups(Generic.Heading, Text, Generic.Heading,
183 Text, Generic.Heading, Text)),
184 # Plain heading
185 (r'^(\S.*)(\n)(={3,}|-{3,}|`{3,}|:{3,}|\.{3,}|\'{3,}|"{3,}|'
186 r'~{3,}|\^{3,}|_{3,}|\*{3,}|\+{3,}|#{3,})(\n)',
187 bygroups(Generic.Heading, Text, Generic.Heading, Text)),
188 # Bulleted lists
189 (r'^(\s*)([-*+])( .+\n(?:\1 .+\n)*)',
190 bygroups(Text, Number, using(this, state='inline'))),
191 # Numbered lists
192 (r'^(\s*)([0-9#ivxlcmIVXLCM]+\.)( .+\n(?:\1 .+\n)*)',
193 bygroups(Text, Number, using(this, state='inline'))),
194 (r'^(\s*)(\(?[0-9#ivxlcmIVXLCM]+\))( .+\n(?:\1 .+\n)*)',
195 bygroups(Text, Number, using(this, state='inline'))),
196 # Numbered, but keep words at BOL from becoming lists
197 (r'^(\s*)([A-Z]+\.)( .+\n(?:\1 .+\n)+)',
198 bygroups(Text, Number, using(this, state='inline'))),
199 (r'^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1 .+\n)+)',
200 bygroups(Text, Number, using(this, state='inline'))),
201 # Line blocks
202 (r'^(\s*)(\|)( .+\n(?:\| .+\n)*)',
203 bygroups(Text, Operator, using(this, state='inline'))),
204 # Sourcecode directives
205 (r'^( *\.\.)(\s*)((?:source)?code(?:-block)?)(::)([ \t]*)([^\n]+)'
206 r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*)?\n)+)',
207 _handle_sourcecode),
208 # A directive
209 (r'^( *\.\.)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))',
210 bygroups(Punctuation, Text, Operator.Word, Punctuation, Text,
211 using(this, state='inline'))),
212 # A reference target
213 (r'^( *\.\.)(\s*)(_(?:[^:\\]|\\.)+:)(.*?)$',
214 bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),
215 # A footnote/citation target
216 (r'^( *\.\.)(\s*)(\[.+\])(.*?)$',
217 bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),
218 # A substitution def
219 (r'^( *\.\.)(\s*)(\|.+\|)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))',
220 bygroups(Punctuation, Text, Name.Tag, Text, Operator.Word,
221 Punctuation, Text, using(this, state='inline'))),
222 # Comments
223 (r'^ *\.\..*(\n( +.*\n|\n)+)?', Comment.Preproc),
224 # Field list marker
225 (r'^( *)(:(?:\\\\|\\:|[^:\n])+:(?=\s))([ \t]*)',
226 bygroups(Text, Name.Class, Text)),
227 # Definition list
228 (r'^(\S.*(?<!::)\n)((?:(?: +.*)\n)+)',
229 bygroups(using(this, state='inline'), using(this, state='inline'))),
230 # Code blocks
231 (r'(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*)?\n)+)',
232 bygroups(String.Escape, Text, String, String, Text, String)),
233 include('inline'),
234 ],
235 'inline': [
236 (r'\\.', Text), # escape
237 (r'``', String, 'literal'), # code
238 (r'(`.+?)(<.+?>)(`__?)', # reference with inline target
239 bygroups(String, String.Interpol, String)),
240 (r'`.+?`__?', String), # reference
241 (r'(`.+?`)(:[a-zA-Z0-9:-]+?:)?',
242 bygroups(Name.Variable, Name.Attribute)), # role
243 (r'(:[a-zA-Z0-9:-]+?:)(`.+?`)',
244 bygroups(Name.Attribute, Name.Variable)), # role (content first)
245 (r'\*\*.+?\*\*', Generic.Strong), # Strong emphasis
246 (r'\*.+?\*', Generic.Emph), # Emphasis
247 (r'\[.*?\]_', String), # Footnote or citation
248 (r'<.+?>', Name.Tag), # Hyperlink
249 (r'[^\\\n\[*`:]+', Text),
250 (r'.', Text),
251 ],
252 'literal': [
253 (r'[^`]+', String),
254 (r'``' + end_string_suffix, String, '#pop'),
255 (r'`', String),
256 ]
257 }
259 def __init__(self, **options):
260 self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
261 RegexLexer.__init__(self, **options)
263 def analyse_text(text):
264 if text[:2] == '..' and text[2:3] != '.':
265 return 0.3
266 p1 = text.find("\n")
267 p2 = text.find("\n", p1 + 1)
268 if (p2 > -1 and # has two lines
269 p1 * 2 + 1 == p2 and # they are the same length
270 text[p1+1] in '-=' and # the next line both starts and ends with
271 text[p1+1] == text[p2-1]): # ...a sufficiently high header
272 return 0.5
275class TexLexer(RegexLexer):
276 """
277 Lexer for the TeX and LaTeX typesetting languages.
278 """
280 name = 'TeX'
281 aliases = ['tex', 'latex']
282 filenames = ['*.tex', '*.aux', '*.toc']
283 mimetypes = ['text/x-tex', 'text/x-latex']
285 tokens = {
286 'general': [
287 (r'%.*?\n', Comment),
288 (r'[{}]', Name.Builtin),
289 (r'[&_^]', Name.Builtin),
290 ],
291 'root': [
292 (r'\\\[', String.Backtick, 'displaymath'),
293 (r'\\\(', String, 'inlinemath'),
294 (r'\$\$', String.Backtick, 'displaymath'),
295 (r'\$', String, 'inlinemath'),
296 (r'\\([a-zA-Z]+|.)', Keyword, 'command'),
297 (r'\\$', Keyword),
298 include('general'),
299 (r'[^\\$%&_^{}]+', Text),
300 ],
301 'math': [
302 (r'\\([a-zA-Z]+|.)', Name.Variable),
303 include('general'),
304 (r'[0-9]+', Number),
305 (r'[-=!+*/()\[\]]', Operator),
306 (r'[^=!+*/()\[\]\\$%&_^{}0-9-]+', Name.Builtin),
307 ],
308 'inlinemath': [
309 (r'\\\)', String, '#pop'),
310 (r'\$', String, '#pop'),
311 include('math'),
312 ],
313 'displaymath': [
314 (r'\\\]', String, '#pop'),
315 (r'\$\$', String, '#pop'),
316 (r'\$', Name.Builtin),
317 include('math'),
318 ],
319 'command': [
320 (r'\[.*?\]', Name.Attribute),
321 (r'\*', Keyword),
322 default('#pop'),
323 ],
324 }
326 def analyse_text(text):
327 for start in ("\\documentclass", "\\input", "\\documentstyle",
328 "\\relax"):
329 if text[:len(start)] == start:
330 return True
333class GroffLexer(RegexLexer):
334 """
335 Lexer for the (g)roff typesetting language, supporting groff
336 extensions. Mainly useful for highlighting manpage sources.
338 .. versionadded:: 0.6
339 """
341 name = 'Groff'
342 aliases = ['groff', 'nroff', 'man']
343 filenames = ['*.[1-9]', '*.man', '*.1p', '*.3pm']
344 mimetypes = ['application/x-troff', 'text/troff']
346 tokens = {
347 'root': [
348 (r'(\.)(\w+)', bygroups(Text, Keyword), 'request'),
349 (r'\.', Punctuation, 'request'),
350 # Regular characters, slurp till we find a backslash or newline
351 (r'[^\\\n]+', Text, 'textline'),
352 default('textline'),
353 ],
354 'textline': [
355 include('escapes'),
356 (r'[^\\\n]+', Text),
357 (r'\n', Text, '#pop'),
358 ],
359 'escapes': [
360 # groff has many ways to write escapes.
361 (r'\\"[^\n]*', Comment),
362 (r'\\[fn]\w', String.Escape),
363 (r'\\\(.{2}', String.Escape),
364 (r'\\.\[.*\]', String.Escape),
365 (r'\\.', String.Escape),
366 (r'\\\n', Text, 'request'),
367 ],
368 'request': [
369 (r'\n', Text, '#pop'),
370 include('escapes'),
371 (r'"[^\n"]+"', String.Double),
372 (r'\d+', Number),
373 (r'\S+', String),
374 (r'\s+', Text),
375 ],
376 }
378 def analyse_text(text):
379 if text[:1] != '.':
380 return False
381 if text[:3] == '.\\"':
382 return True
383 if text[:4] == '.TH ':
384 return True
385 if text[1:3].isalnum() and text[3].isspace():
386 return 0.9
389class MozPreprocHashLexer(RegexLexer):
390 """
391 Lexer for Mozilla Preprocessor files (with '#' as the marker).
393 Other data is left untouched.
395 .. versionadded:: 2.0
396 """
397 name = 'mozhashpreproc'
398 aliases = [name]
399 filenames = []
400 mimetypes = []
402 tokens = {
403 'root': [
404 (r'^#', Comment.Preproc, ('expr', 'exprstart')),
405 (r'.+', Other),
406 ],
407 'exprstart': [
408 (r'(literal)(.*)', bygroups(Comment.Preproc, Text), '#pop:2'),
409 (words((
410 'define', 'undef', 'if', 'ifdef', 'ifndef', 'else', 'elif',
411 'elifdef', 'elifndef', 'endif', 'expand', 'filter', 'unfilter',
412 'include', 'includesubst', 'error')),
413 Comment.Preproc, '#pop'),
414 ],
415 'expr': [
416 (words(('!', '!=', '==', '&&', '||')), Operator),
417 (r'(defined)(\()', bygroups(Keyword, Punctuation)),
418 (r'\)', Punctuation),
419 (r'[0-9]+', Number.Decimal),
420 (r'__\w+?__', Name.Variable),
421 (r'@\w+?@', Name.Class),
422 (r'\w+', Name),
423 (r'\n', Text, '#pop'),
424 (r'\s+', Text),
425 (r'\S', Punctuation),
426 ],
427 }
430class MozPreprocPercentLexer(MozPreprocHashLexer):
431 """
432 Lexer for Mozilla Preprocessor files (with '%' as the marker).
434 Other data is left untouched.
436 .. versionadded:: 2.0
437 """
438 name = 'mozpercentpreproc'
439 aliases = [name]
440 filenames = []
441 mimetypes = []
443 tokens = {
444 'root': [
445 (r'^%', Comment.Preproc, ('expr', 'exprstart')),
446 (r'.+', Other),
447 ],
448 }
451class MozPreprocXulLexer(DelegatingLexer):
452 """
453 Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the
454 `XmlLexer`.
456 .. versionadded:: 2.0
457 """
458 name = "XUL+mozpreproc"
459 aliases = ['xul+mozpreproc']
460 filenames = ['*.xul.in']
461 mimetypes = []
463 def __init__(self, **options):
464 super().__init__(XmlLexer, MozPreprocHashLexer, **options)
467class MozPreprocJavascriptLexer(DelegatingLexer):
468 """
469 Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the
470 `JavascriptLexer`.
472 .. versionadded:: 2.0
473 """
474 name = "Javascript+mozpreproc"
475 aliases = ['javascript+mozpreproc']
476 filenames = ['*.js.in']
477 mimetypes = []
479 def __init__(self, **options):
480 super().__init__(JavascriptLexer, MozPreprocHashLexer, **options)
483class MozPreprocCssLexer(DelegatingLexer):
484 """
485 Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the
486 `CssLexer`.
488 .. versionadded:: 2.0
489 """
490 name = "CSS+mozpreproc"
491 aliases = ['css+mozpreproc']
492 filenames = ['*.css.in']
493 mimetypes = []
495 def __init__(self, **options):
496 super().__init__(CssLexer, MozPreprocPercentLexer, **options)
499class MarkdownLexer(RegexLexer):
500 """
501 For Markdown markup.
503 .. versionadded:: 2.2
504 """
505 name = 'Markdown'
506 url = 'https://daringfireball.net/projects/markdown/'
507 aliases = ['markdown', 'md']
508 filenames = ['*.md', '*.markdown']
509 mimetypes = ["text/x-markdown"]
510 flags = re.MULTILINE
512 def _handle_codeblock(self, match):
513 """
514 match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks
515 """
516 from pygments.lexers import get_lexer_by_name
518 # section header
519 yield match.start(1), String.Backtick, match.group(1)
520 yield match.start(2), String.Backtick, match.group(2)
521 yield match.start(3), Text , match.group(3)
523 # lookup lexer if wanted and existing
524 lexer = None
525 if self.handlecodeblocks:
526 try:
527 lexer = get_lexer_by_name( match.group(2).strip() )
528 except ClassNotFound:
529 pass
530 code = match.group(4)
532 # no lexer for this language. handle it like it was a code block
533 if lexer is None:
534 yield match.start(4), String, code
535 else:
536 yield from do_insertions([], lexer.get_tokens_unprocessed(code))
538 yield match.start(5), String.Backtick, match.group(5)
540 tokens = {
541 'root': [
542 # heading with '#' prefix (atx-style)
543 (r'(^#[^#].+)(\n)', bygroups(Generic.Heading, Text)),
544 # subheading with '#' prefix (atx-style)
545 (r'(^#{2,6}[^#].+)(\n)', bygroups(Generic.Subheading, Text)),
546 # heading with '=' underlines (Setext-style)
547 (r'^(.+)(\n)(=+)(\n)', bygroups(Generic.Heading, Text, Generic.Heading, Text)),
548 # subheading with '-' underlines (Setext-style)
549 (r'^(.+)(\n)(-+)(\n)', bygroups(Generic.Subheading, Text, Generic.Subheading, Text)),
550 # task list
551 (r'^(\s*)([*-] )(\[[ xX]\])( .+\n)',
552 bygroups(Whitespace, Keyword, Keyword, using(this, state='inline'))),
553 # bulleted list
554 (r'^(\s*)([*-])(\s)(.+\n)',
555 bygroups(Whitespace, Keyword, Whitespace, using(this, state='inline'))),
556 # numbered list
557 (r'^(\s*)([0-9]+\.)( .+\n)',
558 bygroups(Whitespace, Keyword, using(this, state='inline'))),
559 # quote
560 (r'^(\s*>\s)(.+\n)', bygroups(Keyword, Generic.Emph)),
561 # code block fenced by 3 backticks
562 (r'^(\s*```\n[\w\W]*?^\s*```$\n)', String.Backtick),
563 # code block with language
564 (r'^(\s*```)(\w+)(\n)([\w\W]*?)(^\s*```$\n)', _handle_codeblock),
566 include('inline'),
567 ],
568 'inline': [
569 # escape
570 (r'\\.', Text),
571 # inline code
572 (r'([^`]?)(`[^`\n]+`)', bygroups(Text, String.Backtick)),
573 # warning: the following rules eat outer tags.
574 # eg. **foo _bar_ baz** => foo and baz are not recognized as bold
575 # bold fenced by '**'
576 (r'([^\*]?)(\*\*[^* \n][^*\n]*\*\*)', bygroups(Text, Generic.Strong)),
577 # bold fenced by '__'
578 (r'([^_]?)(__[^_ \n][^_\n]*__)', bygroups(Text, Generic.Strong)),
579 # italics fenced by '*'
580 (r'([^\*]?)(\*[^* \n][^*\n]*\*)', bygroups(Text, Generic.Emph)),
581 # italics fenced by '_'
582 (r'([^_]?)(_[^_ \n][^_\n]*_)', bygroups(Text, Generic.Emph)),
583 # strikethrough
584 (r'([^~]?)(~~[^~ \n][^~\n]*~~)', bygroups(Text, Generic.Deleted)),
585 # mentions and topics (twitter and github stuff)
586 (r'[@#][\w/:]+', Name.Entity),
587 # (image?) links eg: 
588 (r'(!?\[)([^]]+)(\])(\()([^)]+)(\))',
589 bygroups(Text, Name.Tag, Text, Text, Name.Attribute, Text)),
590 # reference-style links, e.g.:
591 # [an example][id]
592 # [id]: http://example.com/
593 (r'(\[)([^]]+)(\])(\[)([^]]*)(\])',
594 bygroups(Text, Name.Tag, Text, Text, Name.Label, Text)),
595 (r'^(\s*\[)([^]]*)(\]:\s*)(.+)',
596 bygroups(Text, Name.Label, Text, Name.Attribute)),
598 # general text, must come last!
599 (r'[^\\\s]+', Text),
600 (r'.', Text),
601 ],
602 }
604 def __init__(self, **options):
605 self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
606 RegexLexer.__init__(self, **options)
609class TiddlyWiki5Lexer(RegexLexer):
610 """
611 For TiddlyWiki5 markup.
613 .. versionadded:: 2.7
614 """
615 name = 'tiddler'
616 url = 'https://tiddlywiki.com/#TiddlerFiles'
617 aliases = ['tid']
618 filenames = ['*.tid']
619 mimetypes = ["text/vnd.tiddlywiki"]
620 flags = re.MULTILINE
622 def _handle_codeblock(self, match):
623 """
624 match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks
625 """
626 from pygments.lexers import get_lexer_by_name
628 # section header
629 yield match.start(1), String, match.group(1)
630 yield match.start(2), String, match.group(2)
631 yield match.start(3), Text, match.group(3)
633 # lookup lexer if wanted and existing
634 lexer = None
635 if self.handlecodeblocks:
636 try:
637 lexer = get_lexer_by_name(match.group(2).strip())
638 except ClassNotFound:
639 pass
640 code = match.group(4)
642 # no lexer for this language. handle it like it was a code block
643 if lexer is None:
644 yield match.start(4), String, code
645 return
647 yield from do_insertions([], lexer.get_tokens_unprocessed(code))
649 yield match.start(5), String, match.group(5)
651 def _handle_cssblock(self, match):
652 """
653 match args: 1:style tag 2:newline, 3:code, 4:closing style tag
654 """
655 from pygments.lexers import get_lexer_by_name
657 # section header
658 yield match.start(1), String, match.group(1)
659 yield match.start(2), String, match.group(2)
661 lexer = None
662 if self.handlecodeblocks:
663 try:
664 lexer = get_lexer_by_name('css')
665 except ClassNotFound:
666 pass
667 code = match.group(3)
669 # no lexer for this language. handle it like it was a code block
670 if lexer is None:
671 yield match.start(3), String, code
672 return
674 yield from do_insertions([], lexer.get_tokens_unprocessed(code))
676 yield match.start(4), String, match.group(4)
678 tokens = {
679 'root': [
680 # title in metadata section
681 (r'^(title)(:\s)(.+\n)', bygroups(Keyword, Text, Generic.Heading)),
682 # headings
683 (r'^(!)([^!].+\n)', bygroups(Generic.Heading, Text)),
684 (r'^(!{2,6})(.+\n)', bygroups(Generic.Subheading, Text)),
685 # bulleted or numbered lists or single-line block quotes
686 # (can be mixed)
687 (r'^(\s*)([*#>]+)(\s*)(.+\n)',
688 bygroups(Text, Keyword, Text, using(this, state='inline'))),
689 # multi-line block quotes
690 (r'^(<<<.*\n)([\w\W]*?)(^<<<.*$)', bygroups(String, Text, String)),
691 # table header
692 (r'^(\|.*?\|h)$', bygroups(Generic.Strong)),
693 # table footer or caption
694 (r'^(\|.*?\|[cf])$', bygroups(Generic.Emph)),
695 # table class
696 (r'^(\|.*?\|k)$', bygroups(Name.Tag)),
697 # definitions
698 (r'^(;.*)$', bygroups(Generic.Strong)),
699 # text block
700 (r'^(```\n)([\w\W]*?)(^```$)', bygroups(String, Text, String)),
701 # code block with language
702 (r'^(```)(\w+)(\n)([\w\W]*?)(^```$)', _handle_codeblock),
703 # CSS style block
704 (r'^(<style>)(\n)([\w\W]*?)(^</style>$)', _handle_cssblock),
706 include('keywords'),
707 include('inline'),
708 ],
709 'keywords': [
710 (words((
711 '\\define', '\\end', 'caption', 'created', 'modified', 'tags',
712 'title', 'type'), prefix=r'^', suffix=r'\b'),
713 Keyword),
714 ],
715 'inline': [
716 # escape
717 (r'\\.', Text),
718 # created or modified date
719 (r'\d{17}', Number.Integer),
720 # italics
721 (r'(\s)(//[^/]+//)((?=\W|\n))',
722 bygroups(Text, Generic.Emph, Text)),
723 # superscript
724 (r'(\s)(\^\^[^\^]+\^\^)', bygroups(Text, Generic.Emph)),
725 # subscript
726 (r'(\s)(,,[^,]+,,)', bygroups(Text, Generic.Emph)),
727 # underscore
728 (r'(\s)(__[^_]+__)', bygroups(Text, Generic.Strong)),
729 # bold
730 (r"(\s)(''[^']+'')((?=\W|\n))",
731 bygroups(Text, Generic.Strong, Text)),
732 # strikethrough
733 (r'(\s)(~~[^~]+~~)((?=\W|\n))',
734 bygroups(Text, Generic.Deleted, Text)),
735 # TiddlyWiki variables
736 (r'<<[^>]+>>', Name.Tag),
737 (r'\$\$[^$]+\$\$', Name.Tag),
738 (r'\$\([^)]+\)\$', Name.Tag),
739 # TiddlyWiki style or class
740 (r'^@@.*$', Name.Tag),
741 # HTML tags
742 (r'</?[^>]+>', Name.Tag),
743 # inline code
744 (r'`[^`]+`', String.Backtick),
745 # HTML escaped symbols
746 (r'&\S*?;', String.Regex),
747 # Wiki links
748 (r'(\[{2})([^]\|]+)(\]{2})', bygroups(Text, Name.Tag, Text)),
749 # External links
750 (r'(\[{2})([^]\|]+)(\|)([^]\|]+)(\]{2})',
751 bygroups(Text, Name.Tag, Text, Name.Attribute, Text)),
752 # Transclusion
753 (r'(\{{2})([^}]+)(\}{2})', bygroups(Text, Name.Tag, Text)),
754 # URLs
755 (r'(\b.?.?tps?://[^\s"]+)', bygroups(Name.Attribute)),
757 # general text, must come last!
758 (r'[\w]+', Text),
759 (r'.', Text)
760 ],
761 }
763 def __init__(self, **options):
764 self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
765 RegexLexer.__init__(self, **options)