Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/markup.py: 49%

280 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-01 06:54 +0000

1""" 

2 pygments.lexers.markup 

3 ~~~~~~~~~~~~~~~~~~~~~~ 

4 

5 Lexers for non-HTML markup languages. 

6 

7 :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS. 

8 :license: BSD, see LICENSE for details. 

9""" 

10 

11import re 

12 

13from pygments.lexers.html import XmlLexer 

14from pygments.lexers.javascript import JavascriptLexer 

15from pygments.lexers.css import CssLexer 

16from pygments.lexers.lilypond import LilyPondLexer 

17from pygments.lexers.data import JsonLexer 

18 

19from pygments.lexer import RegexLexer, DelegatingLexer, include, bygroups, \ 

20 using, this, do_insertions, default, words 

21from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ 

22 Number, Punctuation, Generic, Other, Whitespace 

23from pygments.util import get_bool_opt, ClassNotFound 

24 

25__all__ = ['BBCodeLexer', 'MoinWikiLexer', 'RstLexer', 'TexLexer', 'GroffLexer', 

26 'MozPreprocHashLexer', 'MozPreprocPercentLexer', 

27 'MozPreprocXulLexer', 'MozPreprocJavascriptLexer', 

28 'MozPreprocCssLexer', 'MarkdownLexer', 'TiddlyWiki5Lexer', 'WikitextLexer'] 

29 

30 

31class BBCodeLexer(RegexLexer): 

32 """ 

33 A lexer that highlights BBCode(-like) syntax. 

34 

35 .. versionadded:: 0.6 

36 """ 

37 

38 name = 'BBCode' 

39 aliases = ['bbcode'] 

40 mimetypes = ['text/x-bbcode'] 

41 

42 tokens = { 

43 'root': [ 

44 (r'[^[]+', Text), 

45 # tag/end tag begin 

46 (r'\[/?\w+', Keyword, 'tag'), 

47 # stray bracket 

48 (r'\[', Text), 

49 ], 

50 'tag': [ 

51 (r'\s+', Text), 

52 # attribute with value 

53 (r'(\w+)(=)("?[^\s"\]]+"?)', 

54 bygroups(Name.Attribute, Operator, String)), 

55 # tag argument (a la [color=green]) 

56 (r'(=)("?[^\s"\]]+"?)', 

57 bygroups(Operator, String)), 

58 # tag end 

59 (r'\]', Keyword, '#pop'), 

60 ], 

61 } 

62 

63 

64class MoinWikiLexer(RegexLexer): 

65 """ 

66 For MoinMoin (and Trac) Wiki markup. 

67 

68 .. versionadded:: 0.7 

69 """ 

70 

71 name = 'MoinMoin/Trac Wiki markup' 

72 aliases = ['trac-wiki', 'moin'] 

73 filenames = [] 

74 mimetypes = ['text/x-trac-wiki'] 

75 flags = re.MULTILINE | re.IGNORECASE 

76 

77 tokens = { 

78 'root': [ 

79 (r'^#.*$', Comment), 

80 (r'(!)(\S+)', bygroups(Keyword, Text)), # Ignore-next 

81 # Titles 

82 (r'^(=+)([^=]+)(=+)(\s*#.+)?$', 

83 bygroups(Generic.Heading, using(this), Generic.Heading, String)), 

84 # Literal code blocks, with optional shebang 

85 (r'(\{\{\{)(\n#!.+)?', bygroups(Name.Builtin, Name.Namespace), 'codeblock'), 

86 (r'(\'\'\'?|\|\||`|__|~~|\^|,,|::)', Comment), # Formatting 

87 # Lists 

88 (r'^( +)([.*-])( )', bygroups(Text, Name.Builtin, Text)), 

89 (r'^( +)([a-z]{1,5}\.)( )', bygroups(Text, Name.Builtin, Text)), 

90 # Other Formatting 

91 (r'\[\[\w+.*?\]\]', Keyword), # Macro 

92 (r'(\[[^\s\]]+)(\s+[^\]]+?)?(\])', 

93 bygroups(Keyword, String, Keyword)), # Link 

94 (r'^----+$', Keyword), # Horizontal rules 

95 (r'[^\n\'\[{!_~^,|]+', Text), 

96 (r'\n', Text), 

97 (r'.', Text), 

98 ], 

99 'codeblock': [ 

100 (r'\}\}\}', Name.Builtin, '#pop'), 

101 # these blocks are allowed to be nested in Trac, but not MoinMoin 

102 (r'\{\{\{', Text, '#push'), 

103 (r'[^{}]+', Comment.Preproc), # slurp boring text 

104 (r'.', Comment.Preproc), # allow loose { or } 

105 ], 

106 } 

107 

108 

109class RstLexer(RegexLexer): 

110 """ 

111 For reStructuredText markup. 

112 

113 .. versionadded:: 0.7 

114 

115 Additional options accepted: 

116 

117 `handlecodeblocks` 

118 Highlight the contents of ``.. sourcecode:: language``, 

119 ``.. code:: language`` and ``.. code-block:: language`` 

120 directives with a lexer for the given language (default: 

121 ``True``). 

122 

123 .. versionadded:: 0.8 

124 """ 

125 name = 'reStructuredText' 

126 url = 'https://docutils.sourceforge.io/rst.html' 

127 aliases = ['restructuredtext', 'rst', 'rest'] 

128 filenames = ['*.rst', '*.rest'] 

129 mimetypes = ["text/x-rst", "text/prs.fallenstein.rst"] 

130 flags = re.MULTILINE 

131 

132 def _handle_sourcecode(self, match): 

133 from pygments.lexers import get_lexer_by_name 

134 

135 # section header 

136 yield match.start(1), Punctuation, match.group(1) 

137 yield match.start(2), Text, match.group(2) 

138 yield match.start(3), Operator.Word, match.group(3) 

139 yield match.start(4), Punctuation, match.group(4) 

140 yield match.start(5), Text, match.group(5) 

141 yield match.start(6), Keyword, match.group(6) 

142 yield match.start(7), Text, match.group(7) 

143 

144 # lookup lexer if wanted and existing 

145 lexer = None 

146 if self.handlecodeblocks: 

147 try: 

148 lexer = get_lexer_by_name(match.group(6).strip()) 

149 except ClassNotFound: 

150 pass 

151 indention = match.group(8) 

152 indention_size = len(indention) 

153 code = (indention + match.group(9) + match.group(10) + match.group(11)) 

154 

155 # no lexer for this language. handle it like it was a code block 

156 if lexer is None: 

157 yield match.start(8), String, code 

158 return 

159 

160 # highlight the lines with the lexer. 

161 ins = [] 

162 codelines = code.splitlines(True) 

163 code = '' 

164 for line in codelines: 

165 if len(line) > indention_size: 

166 ins.append((len(code), [(0, Text, line[:indention_size])])) 

167 code += line[indention_size:] 

168 else: 

169 code += line 

170 yield from do_insertions(ins, lexer.get_tokens_unprocessed(code)) 

171 

172 # from docutils.parsers.rst.states 

173 closers = '\'")]}>\u2019\u201d\xbb!?' 

174 unicode_delimiters = '\u2010\u2011\u2012\u2013\u2014\u00a0' 

175 end_string_suffix = (r'((?=$)|(?=[-/:.,; \n\x00%s%s]))' 

176 % (re.escape(unicode_delimiters), 

177 re.escape(closers))) 

178 

179 tokens = { 

180 'root': [ 

181 # Heading with overline 

182 (r'^(=+|-+|`+|:+|\.+|\'+|"+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)' 

183 r'(.+)(\n)(\1)(\n)', 

184 bygroups(Generic.Heading, Text, Generic.Heading, 

185 Text, Generic.Heading, Text)), 

186 # Plain heading 

187 (r'^(\S.*)(\n)(={3,}|-{3,}|`{3,}|:{3,}|\.{3,}|\'{3,}|"{3,}|' 

188 r'~{3,}|\^{3,}|_{3,}|\*{3,}|\+{3,}|#{3,})(\n)', 

189 bygroups(Generic.Heading, Text, Generic.Heading, Text)), 

190 # Bulleted lists 

191 (r'^(\s*)([-*+])( .+\n(?:\1 .+\n)*)', 

192 bygroups(Text, Number, using(this, state='inline'))), 

193 # Numbered lists 

194 (r'^(\s*)([0-9#ivxlcmIVXLCM]+\.)( .+\n(?:\1 .+\n)*)', 

195 bygroups(Text, Number, using(this, state='inline'))), 

196 (r'^(\s*)(\(?[0-9#ivxlcmIVXLCM]+\))( .+\n(?:\1 .+\n)*)', 

197 bygroups(Text, Number, using(this, state='inline'))), 

198 # Numbered, but keep words at BOL from becoming lists 

199 (r'^(\s*)([A-Z]+\.)( .+\n(?:\1 .+\n)+)', 

200 bygroups(Text, Number, using(this, state='inline'))), 

201 (r'^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1 .+\n)+)', 

202 bygroups(Text, Number, using(this, state='inline'))), 

203 # Line blocks 

204 (r'^(\s*)(\|)( .+\n(?:\| .+\n)*)', 

205 bygroups(Text, Operator, using(this, state='inline'))), 

206 # Sourcecode directives 

207 (r'^( *\.\.)(\s*)((?:source)?code(?:-block)?)(::)([ \t]*)([^\n]+)' 

208 r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*)?\n)+)', 

209 _handle_sourcecode), 

210 # A directive 

211 (r'^( *\.\.)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))', 

212 bygroups(Punctuation, Text, Operator.Word, Punctuation, Text, 

213 using(this, state='inline'))), 

214 # A reference target 

215 (r'^( *\.\.)(\s*)(_(?:[^:\\]|\\.)+:)(.*?)$', 

216 bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))), 

217 # A footnote/citation target 

218 (r'^( *\.\.)(\s*)(\[.+\])(.*?)$', 

219 bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))), 

220 # A substitution def 

221 (r'^( *\.\.)(\s*)(\|.+\|)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))', 

222 bygroups(Punctuation, Text, Name.Tag, Text, Operator.Word, 

223 Punctuation, Text, using(this, state='inline'))), 

224 # Comments 

225 (r'^ *\.\..*(\n( +.*\n|\n)+)?', Comment.Preproc), 

226 # Field list marker 

227 (r'^( *)(:(?:\\\\|\\:|[^:\n])+:(?=\s))([ \t]*)', 

228 bygroups(Text, Name.Class, Text)), 

229 # Definition list 

230 (r'^(\S.*(?<!::)\n)((?:(?: +.*)\n)+)', 

231 bygroups(using(this, state='inline'), using(this, state='inline'))), 

232 # Code blocks 

233 (r'(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*)?\n)+)', 

234 bygroups(String.Escape, Text, String, String, Text, String)), 

235 include('inline'), 

236 ], 

237 'inline': [ 

238 (r'\\.', Text), # escape 

239 (r'``', String, 'literal'), # code 

240 (r'(`.+?)(<.+?>)(`__?)', # reference with inline target 

241 bygroups(String, String.Interpol, String)), 

242 (r'`.+?`__?', String), # reference 

243 (r'(`.+?`)(:[a-zA-Z0-9:-]+?:)?', 

244 bygroups(Name.Variable, Name.Attribute)), # role 

245 (r'(:[a-zA-Z0-9:-]+?:)(`.+?`)', 

246 bygroups(Name.Attribute, Name.Variable)), # role (content first) 

247 (r'\*\*.+?\*\*', Generic.Strong), # Strong emphasis 

248 (r'\*.+?\*', Generic.Emph), # Emphasis 

249 (r'\[.*?\]_', String), # Footnote or citation 

250 (r'<.+?>', Name.Tag), # Hyperlink 

251 (r'[^\\\n\[*`:]+', Text), 

252 (r'.', Text), 

253 ], 

254 'literal': [ 

255 (r'[^`]+', String), 

256 (r'``' + end_string_suffix, String, '#pop'), 

257 (r'`', String), 

258 ] 

259 } 

260 

261 def __init__(self, **options): 

262 self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True) 

263 RegexLexer.__init__(self, **options) 

264 

265 def analyse_text(text): 

266 if text[:2] == '..' and text[2:3] != '.': 

267 return 0.3 

268 p1 = text.find("\n") 

269 p2 = text.find("\n", p1 + 1) 

270 if (p2 > -1 and # has two lines 

271 p1 * 2 + 1 == p2 and # they are the same length 

272 text[p1+1] in '-=' and # the next line both starts and ends with 

273 text[p1+1] == text[p2-1]): # ...a sufficiently high header 

274 return 0.5 

275 

276 

277class TexLexer(RegexLexer): 

278 """ 

279 Lexer for the TeX and LaTeX typesetting languages. 

280 """ 

281 

282 name = 'TeX' 

283 aliases = ['tex', 'latex'] 

284 filenames = ['*.tex', '*.aux', '*.toc'] 

285 mimetypes = ['text/x-tex', 'text/x-latex'] 

286 

287 tokens = { 

288 'general': [ 

289 (r'%.*?\n', Comment), 

290 (r'[{}]', Name.Builtin), 

291 (r'[&_^]', Name.Builtin), 

292 ], 

293 'root': [ 

294 (r'\\\[', String.Backtick, 'displaymath'), 

295 (r'\\\(', String, 'inlinemath'), 

296 (r'\$\$', String.Backtick, 'displaymath'), 

297 (r'\$', String, 'inlinemath'), 

298 (r'\\([a-zA-Z]+|.)', Keyword, 'command'), 

299 (r'\\$', Keyword), 

300 include('general'), 

301 (r'[^\\$%&_^{}]+', Text), 

302 ], 

303 'math': [ 

304 (r'\\([a-zA-Z]+|.)', Name.Variable), 

305 include('general'), 

306 (r'[0-9]+', Number), 

307 (r'[-=!+*/()\[\]]', Operator), 

308 (r'[^=!+*/()\[\]\\$%&_^{}0-9-]+', Name.Builtin), 

309 ], 

310 'inlinemath': [ 

311 (r'\\\)', String, '#pop'), 

312 (r'\$', String, '#pop'), 

313 include('math'), 

314 ], 

315 'displaymath': [ 

316 (r'\\\]', String, '#pop'), 

317 (r'\$\$', String, '#pop'), 

318 (r'\$', Name.Builtin), 

319 include('math'), 

320 ], 

321 'command': [ 

322 (r'\[.*?\]', Name.Attribute), 

323 (r'\*', Keyword), 

324 default('#pop'), 

325 ], 

326 } 

327 

328 def analyse_text(text): 

329 for start in ("\\documentclass", "\\input", "\\documentstyle", 

330 "\\relax"): 

331 if text[:len(start)] == start: 

332 return True 

333 

334 

335class GroffLexer(RegexLexer): 

336 """ 

337 Lexer for the (g)roff typesetting language, supporting groff 

338 extensions. Mainly useful for highlighting manpage sources. 

339 

340 .. versionadded:: 0.6 

341 """ 

342 

343 name = 'Groff' 

344 aliases = ['groff', 'nroff', 'man'] 

345 filenames = ['*.[1-9]', '*.man', '*.1p', '*.3pm'] 

346 mimetypes = ['application/x-troff', 'text/troff'] 

347 

348 tokens = { 

349 'root': [ 

350 (r'(\.)(\w+)', bygroups(Text, Keyword), 'request'), 

351 (r'\.', Punctuation, 'request'), 

352 # Regular characters, slurp till we find a backslash or newline 

353 (r'[^\\\n]+', Text, 'textline'), 

354 default('textline'), 

355 ], 

356 'textline': [ 

357 include('escapes'), 

358 (r'[^\\\n]+', Text), 

359 (r'\n', Text, '#pop'), 

360 ], 

361 'escapes': [ 

362 # groff has many ways to write escapes. 

363 (r'\\"[^\n]*', Comment), 

364 (r'\\[fn]\w', String.Escape), 

365 (r'\\\(.{2}', String.Escape), 

366 (r'\\.\[.*\]', String.Escape), 

367 (r'\\.', String.Escape), 

368 (r'\\\n', Text, 'request'), 

369 ], 

370 'request': [ 

371 (r'\n', Text, '#pop'), 

372 include('escapes'), 

373 (r'"[^\n"]+"', String.Double), 

374 (r'\d+', Number), 

375 (r'\S+', String), 

376 (r'\s+', Text), 

377 ], 

378 } 

379 

380 def analyse_text(text): 

381 if text[:1] != '.': 

382 return False 

383 if text[:3] == '.\\"': 

384 return True 

385 if text[:4] == '.TH ': 

386 return True 

387 if text[1:3].isalnum() and text[3].isspace(): 

388 return 0.9 

389 

390 

391class MozPreprocHashLexer(RegexLexer): 

392 """ 

393 Lexer for Mozilla Preprocessor files (with '#' as the marker). 

394 

395 Other data is left untouched. 

396 

397 .. versionadded:: 2.0 

398 """ 

399 name = 'mozhashpreproc' 

400 aliases = [name] 

401 filenames = [] 

402 mimetypes = [] 

403 

404 tokens = { 

405 'root': [ 

406 (r'^#', Comment.Preproc, ('expr', 'exprstart')), 

407 (r'.+', Other), 

408 ], 

409 'exprstart': [ 

410 (r'(literal)(.*)', bygroups(Comment.Preproc, Text), '#pop:2'), 

411 (words(( 

412 'define', 'undef', 'if', 'ifdef', 'ifndef', 'else', 'elif', 

413 'elifdef', 'elifndef', 'endif', 'expand', 'filter', 'unfilter', 

414 'include', 'includesubst', 'error')), 

415 Comment.Preproc, '#pop'), 

416 ], 

417 'expr': [ 

418 (words(('!', '!=', '==', '&&', '||')), Operator), 

419 (r'(defined)(\()', bygroups(Keyword, Punctuation)), 

420 (r'\)', Punctuation), 

421 (r'[0-9]+', Number.Decimal), 

422 (r'__\w+?__', Name.Variable), 

423 (r'@\w+?@', Name.Class), 

424 (r'\w+', Name), 

425 (r'\n', Text, '#pop'), 

426 (r'\s+', Text), 

427 (r'\S', Punctuation), 

428 ], 

429 } 

430 

431 

432class MozPreprocPercentLexer(MozPreprocHashLexer): 

433 """ 

434 Lexer for Mozilla Preprocessor files (with '%' as the marker). 

435 

436 Other data is left untouched. 

437 

438 .. versionadded:: 2.0 

439 """ 

440 name = 'mozpercentpreproc' 

441 aliases = [name] 

442 filenames = [] 

443 mimetypes = [] 

444 

445 tokens = { 

446 'root': [ 

447 (r'^%', Comment.Preproc, ('expr', 'exprstart')), 

448 (r'.+', Other), 

449 ], 

450 } 

451 

452 

453class MozPreprocXulLexer(DelegatingLexer): 

454 """ 

455 Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the 

456 `XmlLexer`. 

457 

458 .. versionadded:: 2.0 

459 """ 

460 name = "XUL+mozpreproc" 

461 aliases = ['xul+mozpreproc'] 

462 filenames = ['*.xul.in'] 

463 mimetypes = [] 

464 

465 def __init__(self, **options): 

466 super().__init__(XmlLexer, MozPreprocHashLexer, **options) 

467 

468 

469class MozPreprocJavascriptLexer(DelegatingLexer): 

470 """ 

471 Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the 

472 `JavascriptLexer`. 

473 

474 .. versionadded:: 2.0 

475 """ 

476 name = "Javascript+mozpreproc" 

477 aliases = ['javascript+mozpreproc'] 

478 filenames = ['*.js.in'] 

479 mimetypes = [] 

480 

481 def __init__(self, **options): 

482 super().__init__(JavascriptLexer, MozPreprocHashLexer, **options) 

483 

484 

485class MozPreprocCssLexer(DelegatingLexer): 

486 """ 

487 Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the 

488 `CssLexer`. 

489 

490 .. versionadded:: 2.0 

491 """ 

492 name = "CSS+mozpreproc" 

493 aliases = ['css+mozpreproc'] 

494 filenames = ['*.css.in'] 

495 mimetypes = [] 

496 

497 def __init__(self, **options): 

498 super().__init__(CssLexer, MozPreprocPercentLexer, **options) 

499 

500 

501class MarkdownLexer(RegexLexer): 

502 """ 

503 For Markdown markup. 

504 

505 .. versionadded:: 2.2 

506 """ 

507 name = 'Markdown' 

508 url = 'https://daringfireball.net/projects/markdown/' 

509 aliases = ['markdown', 'md'] 

510 filenames = ['*.md', '*.markdown'] 

511 mimetypes = ["text/x-markdown"] 

512 flags = re.MULTILINE 

513 

514 def _handle_codeblock(self, match): 

515 """ 

516 match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks 

517 """ 

518 from pygments.lexers import get_lexer_by_name 

519 

520 # section header 

521 yield match.start(1), String.Backtick, match.group(1) 

522 yield match.start(2), String.Backtick, match.group(2) 

523 yield match.start(3), Text , match.group(3) 

524 

525 # lookup lexer if wanted and existing 

526 lexer = None 

527 if self.handlecodeblocks: 

528 try: 

529 lexer = get_lexer_by_name( match.group(2).strip() ) 

530 except ClassNotFound: 

531 pass 

532 code = match.group(4) 

533 

534 # no lexer for this language. handle it like it was a code block 

535 if lexer is None: 

536 yield match.start(4), String, code 

537 else: 

538 yield from do_insertions([], lexer.get_tokens_unprocessed(code)) 

539 

540 yield match.start(5), String.Backtick, match.group(5) 

541 

542 tokens = { 

543 'root': [ 

544 # heading with '#' prefix (atx-style) 

545 (r'(^#[^#].+)(\n)', bygroups(Generic.Heading, Text)), 

546 # subheading with '#' prefix (atx-style) 

547 (r'(^#{2,6}[^#].+)(\n)', bygroups(Generic.Subheading, Text)), 

548 # heading with '=' underlines (Setext-style) 

549 (r'^(.+)(\n)(=+)(\n)', bygroups(Generic.Heading, Text, Generic.Heading, Text)), 

550 # subheading with '-' underlines (Setext-style) 

551 (r'^(.+)(\n)(-+)(\n)', bygroups(Generic.Subheading, Text, Generic.Subheading, Text)), 

552 # task list 

553 (r'^(\s*)([*-] )(\[[ xX]\])( .+\n)', 

554 bygroups(Whitespace, Keyword, Keyword, using(this, state='inline'))), 

555 # bulleted list 

556 (r'^(\s*)([*-])(\s)(.+\n)', 

557 bygroups(Whitespace, Keyword, Whitespace, using(this, state='inline'))), 

558 # numbered list 

559 (r'^(\s*)([0-9]+\.)( .+\n)', 

560 bygroups(Whitespace, Keyword, using(this, state='inline'))), 

561 # quote 

562 (r'^(\s*>\s)(.+\n)', bygroups(Keyword, Generic.Emph)), 

563 # code block fenced by 3 backticks 

564 (r'^(\s*```\n[\w\W]*?^\s*```$\n)', String.Backtick), 

565 # code block with language 

566 (r'^(\s*```)(\w+)(\n)([\w\W]*?)(^\s*```$\n)', _handle_codeblock), 

567 

568 include('inline'), 

569 ], 

570 'inline': [ 

571 # escape 

572 (r'\\.', Text), 

573 # inline code 

574 (r'([^`]?)(`[^`\n]+`)', bygroups(Text, String.Backtick)), 

575 # warning: the following rules eat outer tags. 

576 # eg. **foo _bar_ baz** => foo and baz are not recognized as bold 

577 # bold fenced by '**' 

578 (r'([^\*]?)(\*\*[^* \n][^*\n]*\*\*)', bygroups(Text, Generic.Strong)), 

579 # bold fenced by '__' 

580 (r'([^_]?)(__[^_ \n][^_\n]*__)', bygroups(Text, Generic.Strong)), 

581 # italics fenced by '*' 

582 (r'([^\*]?)(\*[^* \n][^*\n]*\*)', bygroups(Text, Generic.Emph)), 

583 # italics fenced by '_' 

584 (r'([^_]?)(_[^_ \n][^_\n]*_)', bygroups(Text, Generic.Emph)), 

585 # strikethrough 

586 (r'([^~]?)(~~[^~ \n][^~\n]*~~)', bygroups(Text, Generic.Deleted)), 

587 # mentions and topics (twitter and github stuff) 

588 (r'[@#][\w/:]+', Name.Entity), 

589 # (image?) links eg: ![Image of Yaktocat](https://octodex.github.com/images/yaktocat.png) 

590 (r'(!?\[)([^]]+)(\])(\()([^)]+)(\))', 

591 bygroups(Text, Name.Tag, Text, Text, Name.Attribute, Text)), 

592 # reference-style links, e.g.: 

593 # [an example][id] 

594 # [id]: http://example.com/ 

595 (r'(\[)([^]]+)(\])(\[)([^]]*)(\])', 

596 bygroups(Text, Name.Tag, Text, Text, Name.Label, Text)), 

597 (r'^(\s*\[)([^]]*)(\]:\s*)(.+)', 

598 bygroups(Text, Name.Label, Text, Name.Attribute)), 

599 

600 # general text, must come last! 

601 (r'[^\\\s]+', Text), 

602 (r'.', Text), 

603 ], 

604 } 

605 

606 def __init__(self, **options): 

607 self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True) 

608 RegexLexer.__init__(self, **options) 

609 

610 

611class TiddlyWiki5Lexer(RegexLexer): 

612 """ 

613 For TiddlyWiki5 markup. 

614 

615 .. versionadded:: 2.7 

616 """ 

617 name = 'tiddler' 

618 url = 'https://tiddlywiki.com/#TiddlerFiles' 

619 aliases = ['tid'] 

620 filenames = ['*.tid'] 

621 mimetypes = ["text/vnd.tiddlywiki"] 

622 flags = re.MULTILINE 

623 

624 def _handle_codeblock(self, match): 

625 """ 

626 match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks 

627 """ 

628 from pygments.lexers import get_lexer_by_name 

629 

630 # section header 

631 yield match.start(1), String, match.group(1) 

632 yield match.start(2), String, match.group(2) 

633 yield match.start(3), Text, match.group(3) 

634 

635 # lookup lexer if wanted and existing 

636 lexer = None 

637 if self.handlecodeblocks: 

638 try: 

639 lexer = get_lexer_by_name(match.group(2).strip()) 

640 except ClassNotFound: 

641 pass 

642 code = match.group(4) 

643 

644 # no lexer for this language. handle it like it was a code block 

645 if lexer is None: 

646 yield match.start(4), String, code 

647 return 

648 

649 yield from do_insertions([], lexer.get_tokens_unprocessed(code)) 

650 

651 yield match.start(5), String, match.group(5) 

652 

653 def _handle_cssblock(self, match): 

654 """ 

655 match args: 1:style tag 2:newline, 3:code, 4:closing style tag 

656 """ 

657 from pygments.lexers import get_lexer_by_name 

658 

659 # section header 

660 yield match.start(1), String, match.group(1) 

661 yield match.start(2), String, match.group(2) 

662 

663 lexer = None 

664 if self.handlecodeblocks: 

665 try: 

666 lexer = get_lexer_by_name('css') 

667 except ClassNotFound: 

668 pass 

669 code = match.group(3) 

670 

671 # no lexer for this language. handle it like it was a code block 

672 if lexer is None: 

673 yield match.start(3), String, code 

674 return 

675 

676 yield from do_insertions([], lexer.get_tokens_unprocessed(code)) 

677 

678 yield match.start(4), String, match.group(4) 

679 

680 tokens = { 

681 'root': [ 

682 # title in metadata section 

683 (r'^(title)(:\s)(.+\n)', bygroups(Keyword, Text, Generic.Heading)), 

684 # headings 

685 (r'^(!)([^!].+\n)', bygroups(Generic.Heading, Text)), 

686 (r'^(!{2,6})(.+\n)', bygroups(Generic.Subheading, Text)), 

687 # bulleted or numbered lists or single-line block quotes 

688 # (can be mixed) 

689 (r'^(\s*)([*#>]+)(\s*)(.+\n)', 

690 bygroups(Text, Keyword, Text, using(this, state='inline'))), 

691 # multi-line block quotes 

692 (r'^(<<<.*\n)([\w\W]*?)(^<<<.*$)', bygroups(String, Text, String)), 

693 # table header 

694 (r'^(\|.*?\|h)$', bygroups(Generic.Strong)), 

695 # table footer or caption 

696 (r'^(\|.*?\|[cf])$', bygroups(Generic.Emph)), 

697 # table class 

698 (r'^(\|.*?\|k)$', bygroups(Name.Tag)), 

699 # definitions 

700 (r'^(;.*)$', bygroups(Generic.Strong)), 

701 # text block 

702 (r'^(```\n)([\w\W]*?)(^```$)', bygroups(String, Text, String)), 

703 # code block with language 

704 (r'^(```)(\w+)(\n)([\w\W]*?)(^```$)', _handle_codeblock), 

705 # CSS style block 

706 (r'^(<style>)(\n)([\w\W]*?)(^</style>$)', _handle_cssblock), 

707 

708 include('keywords'), 

709 include('inline'), 

710 ], 

711 'keywords': [ 

712 (words(( 

713 '\\define', '\\end', 'caption', 'created', 'modified', 'tags', 

714 'title', 'type'), prefix=r'^', suffix=r'\b'), 

715 Keyword), 

716 ], 

717 'inline': [ 

718 # escape 

719 (r'\\.', Text), 

720 # created or modified date 

721 (r'\d{17}', Number.Integer), 

722 # italics 

723 (r'(\s)(//[^/]+//)((?=\W|\n))', 

724 bygroups(Text, Generic.Emph, Text)), 

725 # superscript 

726 (r'(\s)(\^\^[^\^]+\^\^)', bygroups(Text, Generic.Emph)), 

727 # subscript 

728 (r'(\s)(,,[^,]+,,)', bygroups(Text, Generic.Emph)), 

729 # underscore 

730 (r'(\s)(__[^_]+__)', bygroups(Text, Generic.Strong)), 

731 # bold 

732 (r"(\s)(''[^']+'')((?=\W|\n))", 

733 bygroups(Text, Generic.Strong, Text)), 

734 # strikethrough 

735 (r'(\s)(~~[^~]+~~)((?=\W|\n))', 

736 bygroups(Text, Generic.Deleted, Text)), 

737 # TiddlyWiki variables 

738 (r'<<[^>]+>>', Name.Tag), 

739 (r'\$\$[^$]+\$\$', Name.Tag), 

740 (r'\$\([^)]+\)\$', Name.Tag), 

741 # TiddlyWiki style or class 

742 (r'^@@.*$', Name.Tag), 

743 # HTML tags 

744 (r'</?[^>]+>', Name.Tag), 

745 # inline code 

746 (r'`[^`]+`', String.Backtick), 

747 # HTML escaped symbols 

748 (r'&\S*?;', String.Regex), 

749 # Wiki links 

750 (r'(\[{2})([^]\|]+)(\]{2})', bygroups(Text, Name.Tag, Text)), 

751 # External links 

752 (r'(\[{2})([^]\|]+)(\|)([^]\|]+)(\]{2})', 

753 bygroups(Text, Name.Tag, Text, Name.Attribute, Text)), 

754 # Transclusion 

755 (r'(\{{2})([^}]+)(\}{2})', bygroups(Text, Name.Tag, Text)), 

756 # URLs 

757 (r'(\b.?.?tps?://[^\s"]+)', bygroups(Name.Attribute)), 

758 

759 # general text, must come last! 

760 (r'[\w]+', Text), 

761 (r'.', Text) 

762 ], 

763 } 

764 

765 def __init__(self, **options): 

766 self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True) 

767 RegexLexer.__init__(self, **options) 

768 

769 

770class WikitextLexer(RegexLexer): 

771 """ 

772 For MediaWiki Wikitext. 

773 

774 Parsing Wikitext is tricky, and results vary between different MediaWiki 

775 installations, so we only highlight common syntaxes (built-in or from 

776 popular extensions), and also assume templates produce no unbalanced 

777 syntaxes. 

778 

779 .. versionadded:: 2.15 

780 """ 

781 name = 'Wikitext' 

782 url = 'https://www.mediawiki.org/wiki/Wikitext' 

783 aliases = ['wikitext', 'mediawiki'] 

784 filenames = [] 

785 mimetypes = ['text/x-wiki'] 

786 flags = re.MULTILINE 

787 

788 def nowiki_tag_rules(tag_name): 

789 return [ 

790 (r'(?i)(</)({})(\s*)(>)'.format(tag_name), bygroups(Punctuation, 

791 Name.Tag, Whitespace, Punctuation), '#pop'), 

792 include('entity'), 

793 include('text'), 

794 ] 

795 

796 def plaintext_tag_rules(tag_name): 

797 return [ 

798 (r'(?si)(.*?)(</)({})(\s*)(>)'.format(tag_name), bygroups(Text, 

799 Punctuation, Name.Tag, Whitespace, Punctuation), '#pop'), 

800 ] 

801 

802 def delegate_tag_rules(tag_name, lexer): 

803 return [ 

804 (r'(?i)(</)({})(\s*)(>)'.format(tag_name), bygroups(Punctuation, 

805 Name.Tag, Whitespace, Punctuation), '#pop'), 

806 (r'(?si).+?(?=</{}\s*>)'.format(tag_name), using(lexer)), 

807 ] 

808 

809 def text_rules(token): 

810 return [ 

811 (r'\w+', token), 

812 (r'[^\S\n]+', token), 

813 (r'(?s).', token), 

814 ] 

815 

816 def handle_syntaxhighlight(self, match, ctx): 

817 from pygments.lexers import get_lexer_by_name 

818 

819 attr_content = match.group() 

820 start = 0 

821 index = 0 

822 while True: 

823 index = attr_content.find('>', start) 

824 # Exclude comment end (-->) 

825 if attr_content[index-2:index] != '--': 

826 break 

827 start = index + 1 

828 

829 if index == -1: 

830 # No tag end 

831 yield from self.get_tokens_unprocessed(attr_content, stack=['root', 'attr']) 

832 return 

833 attr = attr_content[:index] 

834 yield from self.get_tokens_unprocessed(attr, stack=['root', 'attr']) 

835 yield match.start(3) + index, Punctuation, '>' 

836 

837 lexer = None 

838 content = attr_content[index+1:] 

839 lang_match = re.findall(r'\blang=("|\'|)(\w+)(\1)', attr) 

840 

841 if len(lang_match) >= 1: 

842 # Pick the last match in case of multiple matches 

843 lang = lang_match[-1][1] 

844 try: 

845 lexer = get_lexer_by_name(lang) 

846 except ClassNotFound: 

847 pass 

848 

849 if lexer is None: 

850 yield match.start() + index + 1, Text, content 

851 else: 

852 yield from lexer.get_tokens_unprocessed(content) 

853 

854 def handle_score(self, match, ctx): 

855 attr_content = match.group() 

856 start = 0 

857 index = 0 

858 while True: 

859 index = attr_content.find('>', start) 

860 # Exclude comment end (-->) 

861 if attr_content[index-2:index] != '--': 

862 break 

863 start = index + 1 

864 

865 if index == -1: 

866 # No tag end 

867 yield from self.get_tokens_unprocessed(attr_content, stack=['root', 'attr']) 

868 return 

869 attr = attr_content[:index] 

870 content = attr_content[index+1:] 

871 yield from self.get_tokens_unprocessed(attr, stack=['root', 'attr']) 

872 yield match.start(3) + index, Punctuation, '>' 

873 

874 lang_match = re.findall(r'\blang=("|\'|)(\w+)(\1)', attr) 

875 # Pick the last match in case of multiple matches 

876 lang = lang_match[-1][1] if len(lang_match) >= 1 else 'lilypond' 

877 

878 if lang == 'lilypond': # Case sensitive 

879 yield from LilyPondLexer().get_tokens_unprocessed(content) 

880 else: # ABC 

881 # FIXME: Use ABC lexer in the future 

882 yield match.start() + index + 1, Text, content 

883 

884 # a-z removed to prevent linter from complaining, REMEMBER to use (?i) 

885 title_char = r' %!"$&\'()*,\-./0-9:;=?@A-Z\\\^_`~+\u0080-\uFFFF' 

886 nbsp_char = r'(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|[ \xA0\u1680\u2000-\u200A\u202F\u205F\u3000])' 

887 link_address = r'(?:[0-9.]+|\[[0-9a-f:.]+\]|[^\x00-\x20"<>\[\]\x7F\xA0\u1680\u2000-\u200A\u202F\u205F\u3000\uFFFD])' 

888 link_char_class = r'[^\x00-\x20"<>\[\]\x7F\xA0\u1680\u2000-\u200A\u202F\u205F\u3000\uFFFD]' 

889 double_slashes_i = { 

890 '__FORCETOC__', '__NOCONTENTCONVERT__', '__NOCC__', '__NOEDITSECTION__', '__NOGALLERY__', 

891 '__NOTITLECONVERT__', '__NOTC__', '__NOTOC__', '__TOC__', 

892 } 

893 double_slashes = { 

894 '__EXPECTUNUSEDCATEGORY__', '__HIDDENCAT__', '__INDEX__', '__NEWSECTIONLINK__', 

895 '__NOINDEX__', '__NONEWSECTIONLINK__', '__STATICREDIRECT__', '__NOGLOBAL__', 

896 '__DISAMBIG__', '__EXPECTED_UNCONNECTED_PAGE__', 

897 } 

898 protocols = { 

899 'bitcoin:', 'ftp://', 'ftps://', 'geo:', 'git://', 'gopher://', 'http://', 'https://', 

900 'irc://', 'ircs://', 'magnet:', 'mailto:', 'mms://', 'news:', 'nntp://', 'redis://', 

901 'sftp://', 'sip:', 'sips:', 'sms:', 'ssh://', 'svn://', 'tel:', 'telnet://', 'urn:', 

902 'worldwind://', 'xmpp:', '//', 

903 } 

904 non_relative_protocols = protocols - {'//'} 

905 html_tags = { 

906 'abbr', 'b', 'bdi', 'bdo', 'big', 'blockquote', 'br', 'caption', 'center', 'cite', 'code', 

907 'data', 'dd', 'del', 'dfn', 'div', 'dl', 'dt', 'em', 'font', 'h1', 'h2', 'h3', 'h4', 'h5', 

908 'h6', 'hr', 'i', 'ins', 'kbd', 'li', 'link', 'mark', 'meta', 'ol', 'p', 'q', 'rb', 'rp', 

909 'rt', 'rtc', 'ruby', 's', 'samp', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 

910 'table', 'td', 'th', 'time', 'tr', 'tt', 'u', 'ul', 'var', 'wbr', 

911 } 

912 parser_tags = { 

913 'graph', 'charinsert', 'rss', 'chem', 'categorytree', 'nowiki', 'inputbox', 'math', 

914 'hiero', 'score', 'pre', 'ref', 'translate', 'imagemap', 'templatestyles', 'languages', 

915 'noinclude', 'mapframe', 'section', 'poem', 'syntaxhighlight', 'includeonly', 'tvar', 

916 'onlyinclude', 'templatedata', 'langconvert', 'timeline', 'dynamicpagelist', 'gallery', 

917 'maplink', 'ce', 'references', 

918 } 

919 variant_langs = { 

920 # ZhConverter.php 

921 'zh', 'zh-hans', 'zh-hant', 'zh-cn', 'zh-hk', 'zh-mo', 'zh-my', 'zh-sg', 'zh-tw', 

922 # UnConverter.php 

923 'uz', 'uz-latn', 'uz-cyrl', 

924 # TlyConverter.php 

925 'tly', 'tly-cyrl', 

926 # TgConverter.php 

927 'tg', 'tg-latn', 

928 # SrConverter.php 

929 'sr', 'sr-ec', 'sr-el', 

930 # ShiConverter.php 

931 'shi', 'shi-tfng', 'shi-latn', 

932 # ShConverter.php 

933 'sh-latn', 'sh-cyrl', 

934 # KuConverter.php 

935 'ku', 'ku-arab', 'ku-latn', 

936 # KkConverter.php 

937 'kk', 'kk-cyrl', 'kk-latn', 'kk-arab', 'kk-kz', 'kk-tr', 'kk-cn', 

938 # IuConverter.php 

939 'iu', 'ike-cans', 'ike-latn', 

940 # GanConverter.php 

941 'gan', 'gan-hans', 'gan-hant', 

942 # EnConverter.php 

943 'en', 'en-x-piglatin', 

944 # CrhConverter.php 

945 'crh', 'crh-cyrl', 'crh-latn', 

946 # BanConverter.php 

947 'ban', 'ban-bali', 'ban-x-dharma', 'ban-x-palmleaf', 'ban-x-pku', 

948 } 

949 magic_vars_i = { 

950 'ARTICLEPATH', 'INT', 'PAGEID', 'SCRIPTPATH', 'SERVER', 'SERVERNAME', 'STYLEPATH', 

951 } 

952 magic_vars = { 

953 '!', '=', 'BASEPAGENAME', 'BASEPAGENAMEE', 'CASCADINGSOURCES', 'CONTENTLANGUAGE', 

954 'CONTENTLANG', 'CURRENTDAY', 'CURRENTDAY2', 'CURRENTDAYNAME', 'CURRENTDOW', 'CURRENTHOUR', 

955 'CURRENTMONTH', 'CURRENTMONTH2', 'CURRENTMONTH1', 'CURRENTMONTHABBREV', 'CURRENTMONTHNAME', 

956 'CURRENTMONTHNAMEGEN', 'CURRENTTIME', 'CURRENTTIMESTAMP', 'CURRENTVERSION', 'CURRENTWEEK', 

957 'CURRENTYEAR', 'DIRECTIONMARK', 'DIRMARK', 'FULLPAGENAME', 'FULLPAGENAMEE', 'LOCALDAY', 

958 'LOCALDAY2', 'LOCALDAYNAME', 'LOCALDOW', 'LOCALHOUR', 'LOCALMONTH', 'LOCALMONTH2', 

959 'LOCALMONTH1', 'LOCALMONTHABBREV', 'LOCALMONTHNAME', 'LOCALMONTHNAMEGEN', 'LOCALTIME', 

960 'LOCALTIMESTAMP', 'LOCALWEEK', 'LOCALYEAR', 'NAMESPACE', 'NAMESPACEE', 'NAMESPACENUMBER', 

961 'NUMBEROFACTIVEUSERS', 'NUMBEROFADMINS', 'NUMBEROFARTICLES', 'NUMBEROFEDITS', 

962 'NUMBEROFFILES', 'NUMBEROFPAGES', 'NUMBEROFUSERS', 'PAGELANGUAGE', 'PAGENAME', 'PAGENAMEE', 

963 'REVISIONDAY', 'REVISIONDAY2', 'REVISIONID', 'REVISIONMONTH', 'REVISIONMONTH1', 

964 'REVISIONSIZE', 'REVISIONTIMESTAMP', 'REVISIONUSER', 'REVISIONYEAR', 'ROOTPAGENAME', 

965 'ROOTPAGENAMEE', 'SITENAME', 'SUBJECTPAGENAME', 'ARTICLEPAGENAME', 'SUBJECTPAGENAMEE', 

966 'ARTICLEPAGENAMEE', 'SUBJECTSPACE', 'ARTICLESPACE', 'SUBJECTSPACEE', 'ARTICLESPACEE', 

967 'SUBPAGENAME', 'SUBPAGENAMEE', 'TALKPAGENAME', 'TALKPAGENAMEE', 'TALKSPACE', 'TALKSPACEE', 

968 } 

969 parser_functions_i = { 

970 'ANCHORENCODE', 'BIDI', 'CANONICALURL', 'CANONICALURLE', 'FILEPATH', 'FORMATNUM', 

971 'FULLURL', 'FULLURLE', 'GENDER', 'GRAMMAR', 'INT', r'\#LANGUAGE', 'LC', 'LCFIRST', 'LOCALURL', 

972 'LOCALURLE', 'NS', 'NSE', 'PADLEFT', 'PADRIGHT', 'PAGEID', 'PLURAL', 'UC', 'UCFIRST', 

973 'URLENCODE', 

974 } 

975 parser_functions = { 

976 'BASEPAGENAME', 'BASEPAGENAMEE', 'CASCADINGSOURCES', 'DEFAULTSORT', 'DEFAULTSORTKEY', 

977 'DEFAULTCATEGORYSORT', 'FULLPAGENAME', 'FULLPAGENAMEE', 'NAMESPACE', 'NAMESPACEE', 

978 'NAMESPACENUMBER', 'NUMBERINGROUP', 'NUMINGROUP', 'NUMBEROFACTIVEUSERS', 'NUMBEROFADMINS', 

979 'NUMBEROFARTICLES', 'NUMBEROFEDITS', 'NUMBEROFFILES', 'NUMBEROFPAGES', 'NUMBEROFUSERS', 

980 'PAGENAME', 'PAGENAMEE', 'PAGESINCATEGORY', 'PAGESINCAT', 'PAGESIZE', 'PROTECTIONEXPIRY', 

981 'PROTECTIONLEVEL', 'REVISIONDAY', 'REVISIONDAY2', 'REVISIONID', 'REVISIONMONTH', 

982 'REVISIONMONTH1', 'REVISIONTIMESTAMP', 'REVISIONUSER', 'REVISIONYEAR', 'ROOTPAGENAME', 

983 'ROOTPAGENAMEE', 'SUBJECTPAGENAME', 'ARTICLEPAGENAME', 'SUBJECTPAGENAMEE', 

984 'ARTICLEPAGENAMEE', 'SUBJECTSPACE', 'ARTICLESPACE', 'SUBJECTSPACEE', 'ARTICLESPACEE', 

985 'SUBPAGENAME', 'SUBPAGENAMEE', 'TALKPAGENAME', 'TALKPAGENAMEE', 'TALKSPACE', 'TALKSPACEE', 

986 'INT', 'DISPLAYTITLE', 'PAGESINNAMESPACE', 'PAGESINNS', 

987 } 

988 

989 tokens = { 

990 'root': [ 

991 # Redirects 

992 (r"""(?xi) 

993 (\A\s*?)(\#REDIRECT:?) # may contain a colon 

994 (\s+)(\[\[) (?=[^\]\n]* \]\]$) 

995 """, 

996 bygroups(Whitespace, Keyword, Whitespace, Punctuation), 'redirect-inner'), 

997 # Subheadings 

998 (r'^(={2,6})(.+?)(\1)(\s*$\n)', 

999 bygroups(Generic.Subheading, Generic.Subheading, Generic.Subheading, Whitespace)), 

1000 # Headings 

1001 (r'^(=.+?=)(\s*$\n)', 

1002 bygroups(Generic.Heading, Whitespace)), 

1003 # Double-slashed magic words 

1004 (words(double_slashes_i, prefix=r'(?i)'), Name.Function.Magic), 

1005 (words(double_slashes), Name.Function.Magic), 

1006 # Raw URLs 

1007 (r'(?i)\b(?:{}){}{}*'.format('|'.join(protocols), 

1008 link_address, link_char_class), Name.Label), 

1009 # Magic links 

1010 (r'\b(?:RFC|PMID){}+[0-9]+\b'.format(nbsp_char), 

1011 Name.Function.Magic), 

1012 (r"""(?x) 

1013 \bISBN {nbsp_char} 

1014 (?: 97[89] {nbsp_dash}? )? 

1015 (?: [0-9] {nbsp_dash}? ){{9}} # escape format() 

1016 [0-9Xx]\b 

1017 """.format(nbsp_char=nbsp_char, nbsp_dash=f'(?:-|{nbsp_char})'), Name.Function.Magic), 

1018 include('list'), 

1019 include('inline'), 

1020 include('text'), 

1021 ], 

1022 'redirect-inner': [ 

1023 (r'(\]\])(\s*?\n)', bygroups(Punctuation, Whitespace), '#pop'), 

1024 (r'(\#)([^#]*?)', bygroups(Punctuation, Name.Label)), 

1025 (r'(?i)[{}]+'.format(title_char), Name.Tag), 

1026 ], 

1027 'list': [ 

1028 # Description lists 

1029 (r'^;', Keyword, 'dt'), 

1030 # Ordered lists, unordered lists and indents 

1031 (r'^[#:*]+', Keyword), 

1032 # Horizontal rules 

1033 (r'^-{4,}', Keyword), 

1034 ], 

1035 'inline': [ 

1036 # Signatures 

1037 (r'~{3,5}', Keyword), 

1038 # Entities 

1039 include('entity'), 

1040 # Bold & italic 

1041 (r"('')(''')(?!')", bygroups(Generic.Emph, 

1042 Generic.Strong), 'inline-italic-bold'), 

1043 (r"'''(?!')", Generic.Strong, 'inline-bold'), 

1044 (r"''(?!')", Generic.Emph, 'inline-italic'), 

1045 # Comments & parameters & templates 

1046 include('replaceable'), 

1047 # Media links 

1048 ( 

1049 r"""(?xi) 

1050 (\[\[) 

1051 (File|Image) (:) 

1052 ([{}]*) 

1053 (?: (\#) ([{}]*?) )? 

1054 """.format(title_char, f'{title_char}#'), 

1055 bygroups(Punctuation, Name.Namespace, Punctuation, 

1056 Name.Tag, Punctuation, Name.Label), 

1057 'medialink-inner' 

1058 ), 

1059 # Wikilinks 

1060 ( 

1061 r"""(?xi) 

1062 (\[\[)(?!{}) # Should not contain URLs 

1063 (?: ([{}]*) (:))? 

1064 ([{}]*?) 

1065 (?: (\#) ([{}]*?) )? 

1066 (\]\]) 

1067 """.format('|'.join(protocols), title_char.replace('/', ''), 

1068 title_char, f'{title_char}#'), 

1069 bygroups(Punctuation, Name.Namespace, Punctuation, 

1070 Name.Tag, Punctuation, Name.Label, Punctuation) 

1071 ), 

1072 ( 

1073 r"""(?xi) 

1074 (\[\[)(?!{}) 

1075 (?: ([{}]*) (:))? 

1076 ([{}]*?) 

1077 (?: (\#) ([{}]*?) )? 

1078 (\|) 

1079 """.format('|'.join(protocols), title_char.replace('/', ''), 

1080 title_char, f'{title_char}#'), 

1081 bygroups(Punctuation, Name.Namespace, Punctuation, 

1082 Name.Tag, Punctuation, Name.Label, Punctuation), 

1083 'wikilink-inner' 

1084 ), 

1085 # External links 

1086 ( 

1087 r"""(?xi) 

1088 (\[) 

1089 ((?:{}) {} {}*) 

1090 (\s*) 

1091 """.format('|'.join(protocols), link_address, link_char_class), 

1092 bygroups(Punctuation, Name.Label, Whitespace), 

1093 'extlink-inner' 

1094 ), 

1095 # Tables 

1096 (r'^(:*)(\s*?)(\{\|)([^\n]*)$', bygroups(Keyword, 

1097 Whitespace, Punctuation, using(this, state=['root', 'attr'])), 'table'), 

1098 # HTML tags 

1099 (r'(?i)(<)({})\b'.format('|'.join(html_tags)), 

1100 bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'), 

1101 (r'(?i)(</)({})\b(\s*)(>)'.format('|'.join(html_tags)), 

1102 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)), 

1103 # <nowiki> 

1104 (r'(?i)(<)(nowiki)\b', bygroups(Punctuation, 

1105 Name.Tag), ('tag-nowiki', 'tag-inner')), 

1106 # <pre> 

1107 (r'(?i)(<)(pre)\b', bygroups(Punctuation, 

1108 Name.Tag), ('tag-pre', 'tag-inner')), 

1109 # <categorytree> 

1110 (r'(?i)(<)(categorytree)\b', bygroups( 

1111 Punctuation, Name.Tag), ('tag-categorytree', 'tag-inner')), 

1112 # <hiero> 

1113 (r'(?i)(<)(hiero)\b', bygroups(Punctuation, 

1114 Name.Tag), ('tag-hiero', 'tag-inner')), 

1115 # <math> 

1116 (r'(?i)(<)(math)\b', bygroups(Punctuation, 

1117 Name.Tag), ('tag-math', 'tag-inner')), 

1118 # <chem> 

1119 (r'(?i)(<)(chem)\b', bygroups(Punctuation, 

1120 Name.Tag), ('tag-chem', 'tag-inner')), 

1121 # <ce> 

1122 (r'(?i)(<)(ce)\b', bygroups(Punctuation, 

1123 Name.Tag), ('tag-ce', 'tag-inner')), 

1124 # <charinsert> 

1125 (r'(?i)(<)(charinsert)\b', bygroups( 

1126 Punctuation, Name.Tag), ('tag-charinsert', 'tag-inner')), 

1127 # <templatedata> 

1128 (r'(?i)(<)(templatedata)\b', bygroups( 

1129 Punctuation, Name.Tag), ('tag-templatedata', 'tag-inner')), 

1130 # <gallery> 

1131 (r'(?i)(<)(gallery)\b', bygroups( 

1132 Punctuation, Name.Tag), ('tag-gallery', 'tag-inner')), 

1133 # <graph> 

1134 (r'(?i)(<)(gallery)\b', bygroups( 

1135 Punctuation, Name.Tag), ('tag-graph', 'tag-inner')), 

1136 # <dynamicpagelist> 

1137 (r'(?i)(<)(dynamicpagelist)\b', bygroups( 

1138 Punctuation, Name.Tag), ('tag-dynamicpagelist', 'tag-inner')), 

1139 # <inputbox> 

1140 (r'(?i)(<)(inputbox)\b', bygroups( 

1141 Punctuation, Name.Tag), ('tag-inputbox', 'tag-inner')), 

1142 # <rss> 

1143 (r'(?i)(<)(rss)\b', bygroups( 

1144 Punctuation, Name.Tag), ('tag-rss', 'tag-inner')), 

1145 # <imagemap> 

1146 (r'(?i)(<)(imagemap)\b', bygroups( 

1147 Punctuation, Name.Tag), ('tag-imagemap', 'tag-inner')), 

1148 # <syntaxhighlight> 

1149 (r'(?i)(</)(syntaxhighlight)\b(\s*)(>)', 

1150 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)), 

1151 (r'(?si)(<)(syntaxhighlight)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)', 

1152 bygroups(Punctuation, Name.Tag, handle_syntaxhighlight)), 

1153 # <syntaxhighlight>: Fallback case for self-closing tags 

1154 (r'(?i)(<)(syntaxhighlight)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups( 

1155 Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)), 

1156 # <source> 

1157 (r'(?i)(</)(source)\b(\s*)(>)', 

1158 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)), 

1159 (r'(?si)(<)(source)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)', 

1160 bygroups(Punctuation, Name.Tag, handle_syntaxhighlight)), 

1161 # <source>: Fallback case for self-closing tags 

1162 (r'(?i)(<)(source)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups( 

1163 Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)), 

1164 # <score> 

1165 (r'(?i)(</)(score)\b(\s*)(>)', 

1166 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)), 

1167 (r'(?si)(<)(score)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)', 

1168 bygroups(Punctuation, Name.Tag, handle_score)), 

1169 # <score>: Fallback case for self-closing tags 

1170 (r'(?i)(<)(score)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups( 

1171 Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)), 

1172 # Other parser tags 

1173 (r'(?i)(<)({})\b'.format('|'.join(parser_tags)), 

1174 bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'), 

1175 (r'(?i)(</)({})\b(\s*)(>)'.format('|'.join(parser_tags)), 

1176 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)), 

1177 # LanguageConverter markups 

1178 ( 

1179 r"""(?xi) 

1180 (-\{{) # Escape format() 

1181 (?: ([^|]) (\|))? 

1182 (?: (\s* (?:{variants}) \s*) (=>))? 

1183 (\s* (?:{variants}) \s*) (:) 

1184 """.format(variants='|'.join(variant_langs)), 

1185 bygroups(Punctuation, Keyword, Punctuation, 

1186 Name.Label, Operator, Name.Label, Punctuation), 

1187 'lc-inner' 

1188 ), 

1189 (r'-\{', Punctuation, 'lc-raw'), 

1190 ], 

1191 'wikilink-inner': [ 

1192 # Quit in case of another wikilink 

1193 (r'(?=\[\[)', Punctuation, '#pop'), 

1194 (r'\]\]', Punctuation, '#pop'), 

1195 include('inline'), 

1196 include('text'), 

1197 ], 

1198 'medialink-inner': [ 

1199 (r'\]\]', Punctuation, '#pop'), 

1200 (r'(\|)([^\n=|]*)(=)', 

1201 bygroups(Punctuation, Name.Attribute, Operator)), 

1202 (r'\|', Punctuation), 

1203 include('inline'), 

1204 include('text'), 

1205 ], 

1206 'quote-common': [ 

1207 # Quit in case of link/template endings 

1208 (r'(?=\]\]|\{\{|\}\})', Punctuation, '#pop'), 

1209 (r'\n', Text, '#pop'), 

1210 ], 

1211 'inline-italic': [ 

1212 include('quote-common'), 

1213 (r"('')(''')(?!')", bygroups(Generic.Emph, 

1214 Generic.Strong), ('#pop', 'inline-bold')), 

1215 (r"'''(?!')", Generic.Strong, ('#pop', 'inline-italic-bold')), 

1216 (r"''(?!')", Generic.Emph, '#pop'), 

1217 include('inline'), 

1218 include('text-italic'), 

1219 ], 

1220 'inline-bold': [ 

1221 include('quote-common'), 

1222 (r"(''')('')(?!')", bygroups( 

1223 Generic.Strong, Generic.Emph), ('#pop', 'inline-italic')), 

1224 (r"'''(?!')", Generic.Strong, '#pop'), 

1225 (r"''(?!')", Generic.Emph, ('#pop', 'inline-bold-italic')), 

1226 include('inline'), 

1227 include('text-bold'), 

1228 ], 

1229 'inline-bold-italic': [ 

1230 include('quote-common'), 

1231 (r"('')(''')(?!')", bygroups(Generic.Emph, 

1232 Generic.Strong), '#pop'), 

1233 (r"'''(?!')", Generic.Strong, ('#pop', 'inline-italic')), 

1234 (r"''(?!')", Generic.Emph, ('#pop', 'inline-bold')), 

1235 include('inline'), 

1236 include('text-italic'), 

1237 ], 

1238 'inline-italic-bold': [ 

1239 include('quote-common'), 

1240 (r"(''')('')(?!')", bygroups( 

1241 Generic.Strong, Generic.Emph), '#pop'), 

1242 (r"'''(?!')", Generic.Strong, ('#pop', 'inline-italic')), 

1243 (r"''(?!')", Generic.Emph, ('#pop', 'inline-bold')), 

1244 include('text-bold'), 

1245 ], 

1246 'lc-inner': [ 

1247 ( 

1248 r"""(?xi) 

1249 (;) 

1250 (?: (\s* (?:{variants}) \s*) (=>))? 

1251 (\s* (?:{variants}) \s*) (:) 

1252 """.format(variants='|'.join(variant_langs)), 

1253 bygroups(Punctuation, Name.Label, 

1254 Operator, Name.Label, Punctuation) 

1255 ), 

1256 (r';?\s*?\}-', Punctuation, '#pop'), 

1257 include('inline'), 

1258 include('text'), 

1259 ], 

1260 'lc-raw': [ 

1261 (r'\}-', Punctuation, '#pop'), 

1262 include('inline'), 

1263 include('text'), 

1264 ], 

1265 'replaceable': [ 

1266 # Comments 

1267 (r'<!--[\s\S]*?(?:-->|\Z)', Comment.Multiline), 

1268 # Parameters 

1269 ( 

1270 r"""(?x) 

1271 (\{{3}) 

1272 ([^|]*?) 

1273 (?=\}{3}|\|) 

1274 """, 

1275 bygroups(Punctuation, Name.Variable), 

1276 'parameter-inner', 

1277 ), 

1278 # Magic variables 

1279 (r'(?i)(\{\{)(\s*)(%s)(\s*)(\}\})' % '|'.join(magic_vars_i), 

1280 bygroups(Punctuation, Whitespace, Name.Function, Whitespace, Punctuation)), 

1281 (r'(\{\{)(\s*)(%s)(\s*)(\}\})' % '|'.join(magic_vars), 

1282 bygroups(Punctuation, Whitespace, Name.Function, Whitespace, Punctuation)), 

1283 # Parser functions & templates 

1284 (r'\{\{', Punctuation, 'template-begin-space'), 

1285 # <tvar> legacy syntax 

1286 (r'(?i)(<)(tvar)\b(\|)([^>]*?)(>)', bygroups(Punctuation, 

1287 Name.Tag, Punctuation, String, Punctuation)), 

1288 (r'</>', Punctuation, '#pop'), 

1289 # <tvar> 

1290 (r'(?i)(<)(tvar)\b', bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'), 

1291 (r'(?i)(</)(tvar)\b(\s*)(>)', 

1292 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)), 

1293 ], 

1294 'parameter-inner': [ 

1295 (r'\}{3}', Punctuation, '#pop'), 

1296 (r'\|', Punctuation), 

1297 include('inline'), 

1298 include('text'), 

1299 ], 

1300 'template-begin-space': [ 

1301 # Templates allow line breaks at the beginning, and due to how MediaWiki handles 

1302 # comments, an extra state is required to handle things like {{\n<!---->\n name}} 

1303 (r'<!--[\s\S]*?(?:-->|\Z)', Comment.Multiline), 

1304 (r'\s+', Whitespace), 

1305 # Parser functions 

1306 ( 

1307 r'(?i)(\#[%s]*?|%s)(:)' % (title_char, 

1308 '|'.join(parser_functions_i)), 

1309 bygroups(Name.Function, Punctuation), ('#pop', 'template-inner') 

1310 ), 

1311 ( 

1312 r'(%s)(:)' % ('|'.join(parser_functions)), 

1313 bygroups(Name.Function, Punctuation), ('#pop', 'template-inner') 

1314 ), 

1315 # Templates 

1316 ( 

1317 r'(?i)([%s]*?)(:)' % title_char, 

1318 bygroups(Name.Namespace, Punctuation), ('#pop', 'template-name') 

1319 ), 

1320 default(('#pop', 'template-name'),), 

1321 ], 

1322 'template-name': [ 

1323 (r'(\s*?)(\|)', bygroups(Text, Punctuation), ('#pop', 'template-inner')), 

1324 (r'\}\}', Punctuation, '#pop'), 

1325 (r'\n', Text, '#pop'), 

1326 include('replaceable'), 

1327 *text_rules(Name.Tag), 

1328 ], 

1329 'template-inner': [ 

1330 (r'\}\}', Punctuation, '#pop'), 

1331 (r'\|', Punctuation), 

1332 ( 

1333 r"""(?x) 

1334 (?<=\|) 

1335 ( (?: (?! \{\{ | \}\} )[^=\|<])*? ) # Exclude templates and tags 

1336 (=) 

1337 """, 

1338 bygroups(Name.Label, Operator) 

1339 ), 

1340 include('inline'), 

1341 include('text'), 

1342 ], 

1343 'table': [ 

1344 # Use [ \t\n\r\0\x0B] instead of \s to follow PHP trim() behavior 

1345 # Endings 

1346 (r'^([ \t\n\r\0\x0B]*?)(\|\})', 

1347 bygroups(Whitespace, Punctuation), '#pop'), 

1348 # Table rows 

1349 (r'^([ \t\n\r\0\x0B]*?)(\|-+)(.*)$', bygroups(Whitespace, Punctuation, 

1350 using(this, state=['root', 'attr']))), 

1351 # Captions 

1352 ( 

1353 r"""(?x) 

1354 ^([ \t\n\r\0\x0B]*?)(\|\+) 

1355 # Exclude links, template and tags 

1356 (?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|) )? 

1357 (.*?)$ 

1358 """, 

1359 bygroups(Whitespace, Punctuation, using(this, state=[ 

1360 'root', 'attr']), Punctuation, Generic.Heading), 

1361 ), 

1362 # Table data 

1363 ( 

1364 r"""(?x) 

1365 ( ^(?:[ \t\n\r\0\x0B]*?)\| | \|\| ) 

1366 (?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|)(?!\|) )? 

1367 """, 

1368 bygroups(Punctuation, using(this, state=[ 

1369 'root', 'attr']), Punctuation), 

1370 ), 

1371 # Table headers 

1372 ( 

1373 r"""(?x) 

1374 ( ^(?:[ \t\n\r\0\x0B]*?)! ) 

1375 (?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|)(?!\|) )? 

1376 """, 

1377 bygroups(Punctuation, using(this, state=[ 

1378 'root', 'attr']), Punctuation), 

1379 'table-header', 

1380 ), 

1381 include('list'), 

1382 include('inline'), 

1383 include('text'), 

1384 ], 

1385 'table-header': [ 

1386 # Requires another state for || handling inside headers 

1387 (r'\n', Text, '#pop'), 

1388 ( 

1389 r"""(?x) 

1390 (!!|\|\|) 

1391 (?: 

1392 ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? ) 

1393 (\|)(?!\|) 

1394 )? 

1395 """, 

1396 bygroups(Punctuation, using(this, state=[ 

1397 'root', 'attr']), Punctuation) 

1398 ), 

1399 *text_rules(Generic.Subheading), 

1400 ], 

1401 'entity': [ 

1402 (r'&\S*?;', Name.Entity), 

1403 ], 

1404 'dt': [ 

1405 (r'\n', Text, '#pop'), 

1406 include('inline'), 

1407 (r':', Keyword, '#pop'), 

1408 include('text'), 

1409 ], 

1410 'extlink-inner': [ 

1411 (r'\]', Punctuation, '#pop'), 

1412 include('inline'), 

1413 include('text'), 

1414 ], 

1415 'nowiki-ish': [ 

1416 include('entity'), 

1417 include('text'), 

1418 ], 

1419 'attr': [ 

1420 include('replaceable'), 

1421 (r'\s+', Whitespace), 

1422 (r'(=)(\s*)(")', bygroups(Operator, Whitespace, String.Double), 'attr-val-2'), 

1423 (r"(=)(\s*)(')", bygroups(Operator, Whitespace, String.Single), 'attr-val-1'), 

1424 (r'(=)(\s*)', bygroups(Operator, Whitespace), 'attr-val-0'), 

1425 (r'[\w:-]+', Name.Attribute), 

1426 

1427 ], 

1428 'attr-val-0': [ 

1429 (r'\s', Whitespace, '#pop'), 

1430 include('replaceable'), 

1431 *text_rules(String), 

1432 ], 

1433 'attr-val-1': [ 

1434 (r"'", String.Single, '#pop'), 

1435 include('replaceable'), 

1436 *text_rules(String.Single), 

1437 ], 

1438 'attr-val-2': [ 

1439 (r'"', String.Double, '#pop'), 

1440 include('replaceable'), 

1441 *text_rules(String.Double), 

1442 ], 

1443 'tag-inner-ordinary': [ 

1444 (r'/?\s*>', Punctuation, '#pop'), 

1445 include('tag-attr'), 

1446 ], 

1447 'tag-inner': [ 

1448 # Return to root state for self-closing tags 

1449 (r'/\s*>', Punctuation, '#pop:2'), 

1450 (r'\s*>', Punctuation, '#pop'), 

1451 include('tag-attr'), 

1452 ], 

1453 # There states below are just like their non-tag variants, the key difference is 

1454 # they forcibly quit when encountering tag closing markup 

1455 'tag-attr': [ 

1456 include('replaceable'), 

1457 (r'\s+', Whitespace), 

1458 (r'(=)(\s*)(")', bygroups(Operator, 

1459 Whitespace, String.Double), 'tag-attr-val-2'), 

1460 (r"(=)(\s*)(')", bygroups(Operator, 

1461 Whitespace, String.Single), 'tag-attr-val-1'), 

1462 (r'(=)(\s*)', bygroups(Operator, Whitespace), 'tag-attr-val-0'), 

1463 (r'[\w:-]+', Name.Attribute), 

1464 

1465 ], 

1466 'tag-attr-val-0': [ 

1467 (r'\s', Whitespace, '#pop'), 

1468 (r'/?>', Punctuation, '#pop:2'), 

1469 include('replaceable'), 

1470 *text_rules(String), 

1471 ], 

1472 'tag-attr-val-1': [ 

1473 (r"'", String.Single, '#pop'), 

1474 (r'/?>', Punctuation, '#pop:2'), 

1475 include('replaceable'), 

1476 *text_rules(String.Single), 

1477 ], 

1478 'tag-attr-val-2': [ 

1479 (r'"', String.Double, '#pop'), 

1480 (r'/?>', Punctuation, '#pop:2'), 

1481 include('replaceable'), 

1482 *text_rules(String.Double), 

1483 ], 

1484 'tag-nowiki': nowiki_tag_rules('nowiki'), 

1485 'tag-pre': nowiki_tag_rules('pre'), 

1486 'tag-categorytree': plaintext_tag_rules('categorytree'), 

1487 'tag-dynamicpagelist': plaintext_tag_rules('dynamicpagelist'), 

1488 'tag-hiero': plaintext_tag_rules('hiero'), 

1489 'tag-inputbox': plaintext_tag_rules('inputbox'), 

1490 'tag-imagemap': plaintext_tag_rules('imagemap'), 

1491 'tag-charinsert': plaintext_tag_rules('charinsert'), 

1492 'tag-timeline': plaintext_tag_rules('timeline'), 

1493 'tag-gallery': plaintext_tag_rules('gallery'), 

1494 'tag-graph': plaintext_tag_rules('graph'), 

1495 'tag-rss': plaintext_tag_rules('rss'), 

1496 'tag-math': delegate_tag_rules('math', TexLexer), 

1497 'tag-chem': delegate_tag_rules('chem', TexLexer), 

1498 'tag-ce': delegate_tag_rules('ce', TexLexer), 

1499 'tag-templatedata': delegate_tag_rules('templatedata', JsonLexer), 

1500 'text-italic': text_rules(Generic.Emph), 

1501 'text-bold': text_rules(Generic.Strong), 

1502 'text': text_rules(Text), 

1503 }