Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pygments/lexers/markup.py: 56%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

318 statements  

1""" 

2 pygments.lexers.markup 

3 ~~~~~~~~~~~~~~~~~~~~~~ 

4 

5 Lexers for non-HTML markup languages. 

6 

7 :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS. 

8 :license: BSD, see LICENSE for details. 

9""" 

10 

11import re 

12 

13from pygments.lexers.html import XmlLexer 

14from pygments.lexers.javascript import JavascriptLexer 

15from pygments.lexers.css import CssLexer 

16from pygments.lexers.lilypond import LilyPondLexer 

17from pygments.lexers.data import JsonLexer 

18 

19from pygments.lexer import RegexLexer, DelegatingLexer, include, bygroups, \ 

20 using, this, do_insertions, default, words 

21from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ 

22 Number, Punctuation, Generic, Other, Whitespace, Literal 

23from pygments.util import get_bool_opt, ClassNotFound 

24 

25__all__ = ['BBCodeLexer', 'MoinWikiLexer', 'RstLexer', 'TexLexer', 'GroffLexer', 

26 'MozPreprocHashLexer', 'MozPreprocPercentLexer', 

27 'MozPreprocXulLexer', 'MozPreprocJavascriptLexer', 

28 'MozPreprocCssLexer', 'MarkdownLexer', 'OrgLexer', 'TiddlyWiki5Lexer', 

29 'WikitextLexer'] 

30 

31 

32class BBCodeLexer(RegexLexer): 

33 """ 

34 A lexer that highlights BBCode(-like) syntax. 

35 """ 

36 

37 name = 'BBCode' 

38 aliases = ['bbcode'] 

39 mimetypes = ['text/x-bbcode'] 

40 url = 'https://www.bbcode.org/' 

41 version_added = '0.6' 

42 

43 tokens = { 

44 'root': [ 

45 (r'[^[]+', Text), 

46 # tag/end tag begin 

47 (r'\[/?\w+', Keyword, 'tag'), 

48 # stray bracket 

49 (r'\[', Text), 

50 ], 

51 'tag': [ 

52 (r'\s+', Text), 

53 # attribute with value 

54 (r'(\w+)(=)("?[^\s"\]]+"?)', 

55 bygroups(Name.Attribute, Operator, String)), 

56 # tag argument (a la [color=green]) 

57 (r'(=)("?[^\s"\]]+"?)', 

58 bygroups(Operator, String)), 

59 # tag end 

60 (r'\]', Keyword, '#pop'), 

61 ], 

62 } 

63 

64 

65class MoinWikiLexer(RegexLexer): 

66 """ 

67 For MoinMoin (and Trac) Wiki markup. 

68 """ 

69 

70 name = 'MoinMoin/Trac Wiki markup' 

71 aliases = ['trac-wiki', 'moin'] 

72 filenames = [] 

73 mimetypes = ['text/x-trac-wiki'] 

74 url = 'https://moinmo.in' 

75 version_added = '0.7' 

76 

77 flags = re.MULTILINE | re.IGNORECASE 

78 

79 tokens = { 

80 'root': [ 

81 (r'^#.*$', Comment), 

82 (r'(!)(\S+)', bygroups(Keyword, Text)), # Ignore-next 

83 # Titles 

84 (r'^(=+)([^=]+)(=+)(\s*#.+)?$', 

85 bygroups(Generic.Heading, using(this), Generic.Heading, String)), 

86 # Literal code blocks, with optional shebang 

87 (r'(\{\{\{)(\n#!.+)?', bygroups(Name.Builtin, Name.Namespace), 'codeblock'), 

88 (r'(\'\'\'?|\|\||`|__|~~|\^|,,|::)', Comment), # Formatting 

89 # Lists 

90 (r'^( +)([.*-])( )', bygroups(Text, Name.Builtin, Text)), 

91 (r'^( +)([a-z]{1,5}\.)( )', bygroups(Text, Name.Builtin, Text)), 

92 # Other Formatting 

93 (r'\[\[\w+.*?\]\]', Keyword), # Macro 

94 (r'(\[[^\s\]]+)(\s+[^\]]+?)?(\])', 

95 bygroups(Keyword, String, Keyword)), # Link 

96 (r'^----+$', Keyword), # Horizontal rules 

97 (r'[^\n\'\[{!_~^,|]+', Text), 

98 (r'\n', Text), 

99 (r'.', Text), 

100 ], 

101 'codeblock': [ 

102 (r'\}\}\}', Name.Builtin, '#pop'), 

103 # these blocks are allowed to be nested in Trac, but not MoinMoin 

104 (r'\{\{\{', Text, '#push'), 

105 (r'[^{}]+', Comment.Preproc), # slurp boring text 

106 (r'.', Comment.Preproc), # allow loose { or } 

107 ], 

108 } 

109 

110 

111class RstLexer(RegexLexer): 

112 """ 

113 For reStructuredText markup. 

114 

115 Additional options accepted: 

116 

117 `handlecodeblocks` 

118 Highlight the contents of ``.. sourcecode:: language``, 

119 ``.. code:: language`` and ``.. code-block:: language`` 

120 directives with a lexer for the given language (default: 

121 ``True``). 

122 

123 .. versionadded:: 0.8 

124 """ 

125 name = 'reStructuredText' 

126 url = 'https://docutils.sourceforge.io/rst.html' 

127 aliases = ['restructuredtext', 'rst', 'rest'] 

128 filenames = ['*.rst', '*.rest'] 

129 mimetypes = ["text/x-rst", "text/prs.fallenstein.rst"] 

130 version_added = '0.7' 

131 flags = re.MULTILINE 

132 

133 def _handle_sourcecode(self, match): 

134 from pygments.lexers import get_lexer_by_name 

135 

136 # section header 

137 yield match.start(1), Punctuation, match.group(1) 

138 yield match.start(2), Text, match.group(2) 

139 yield match.start(3), Operator.Word, match.group(3) 

140 yield match.start(4), Punctuation, match.group(4) 

141 yield match.start(5), Text, match.group(5) 

142 yield match.start(6), Keyword, match.group(6) 

143 yield match.start(7), Text, match.group(7) 

144 

145 # lookup lexer if wanted and existing 

146 lexer = None 

147 if self.handlecodeblocks: 

148 try: 

149 lexer = get_lexer_by_name(match.group(6).strip()) 

150 except ClassNotFound: 

151 pass 

152 indention = match.group(8) 

153 indention_size = len(indention) 

154 code = (indention + match.group(9) + match.group(10) + match.group(11)) 

155 

156 # no lexer for this language. handle it like it was a code block 

157 if lexer is None: 

158 yield match.start(8), String, code 

159 return 

160 

161 # highlight the lines with the lexer. 

162 ins = [] 

163 codelines = code.splitlines(True) 

164 code = '' 

165 for line in codelines: 

166 if len(line) > indention_size: 

167 ins.append((len(code), [(0, Text, line[:indention_size])])) 

168 code += line[indention_size:] 

169 else: 

170 code += line 

171 yield from do_insertions(ins, lexer.get_tokens_unprocessed(code)) 

172 

173 # from docutils.parsers.rst.states 

174 closers = '\'")]}>\u2019\u201d\xbb!?' 

175 unicode_delimiters = '\u2010\u2011\u2012\u2013\u2014\u00a0' 

176 end_string_suffix = (rf'((?=$)|(?=[-/:.,; \n\x00{re.escape(unicode_delimiters)}{re.escape(closers)}]))') 

177 

178 tokens = { 

179 'root': [ 

180 # Heading with overline 

181 (r'^(=+|-+|`+|:+|\.+|\'+|"+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)' 

182 r'(.+)(\n)(\1)(\n)', 

183 bygroups(Generic.Heading, Text, Generic.Heading, 

184 Text, Generic.Heading, Text)), 

185 # Plain heading 

186 (r'^(\S.*)(\n)(={3,}|-{3,}|`{3,}|:{3,}|\.{3,}|\'{3,}|"{3,}|' 

187 r'~{3,}|\^{3,}|_{3,}|\*{3,}|\+{3,}|#{3,})(\n)', 

188 bygroups(Generic.Heading, Text, Generic.Heading, Text)), 

189 # Bulleted lists 

190 (r'^(\s*)([-*+])( .+\n(?:\1 .+\n)*)', 

191 bygroups(Text, Number, using(this, state='inline'))), 

192 # Numbered lists 

193 (r'^(\s*)([0-9#ivxlcmIVXLCM]+\.)( .+\n(?:\1 .+\n)*)', 

194 bygroups(Text, Number, using(this, state='inline'))), 

195 (r'^(\s*)(\(?[0-9#ivxlcmIVXLCM]+\))( .+\n(?:\1 .+\n)*)', 

196 bygroups(Text, Number, using(this, state='inline'))), 

197 # Numbered, but keep words at BOL from becoming lists 

198 (r'^(\s*)([A-Z]+\.)( .+\n(?:\1 .+\n)+)', 

199 bygroups(Text, Number, using(this, state='inline'))), 

200 (r'^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1 .+\n)+)', 

201 bygroups(Text, Number, using(this, state='inline'))), 

202 # Line blocks 

203 (r'^(\s*)(\|)( .+\n(?:\| .+\n)*)', 

204 bygroups(Text, Operator, using(this, state='inline'))), 

205 # Sourcecode directives 

206 (r'^( *\.\.)(\s*)((?:source)?code(?:-block)?)(::)([ \t]*)([^\n]+)' 

207 r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*)?\n)+)', 

208 _handle_sourcecode), 

209 # A directive 

210 (r'^( *\.\.)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))', 

211 bygroups(Punctuation, Text, Operator.Word, Punctuation, Text, 

212 using(this, state='inline'))), 

213 # A reference target 

214 (r'^( *\.\.)(\s*)(_(?:[^:\\]|\\.)+:)(.*?)$', 

215 bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))), 

216 # A footnote/citation target 

217 (r'^( *\.\.)(\s*)(\[.+\])(.*?)$', 

218 bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))), 

219 # A substitution def 

220 (r'^( *\.\.)(\s*)(\|.+\|)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))', 

221 bygroups(Punctuation, Text, Name.Tag, Text, Operator.Word, 

222 Punctuation, Text, using(this, state='inline'))), 

223 # Comments 

224 (r'^ *\.\..*(\n( +.*\n|\n)+)?', Comment), 

225 # Field list marker 

226 (r'^( *)(:(?:\\\\|\\:|[^:\n])+:(?=\s))([ \t]*)', 

227 bygroups(Text, Name.Class, Text)), 

228 # Definition list 

229 (r'^(\S.*(?<!::)\n)((?:(?: +.*)\n)+)', 

230 bygroups(using(this, state='inline'), using(this, state='inline'))), 

231 # Code blocks 

232 (r'(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*)?\n)+)', 

233 bygroups(String.Escape, Text, String, String, Text, String)), 

234 include('inline'), 

235 ], 

236 'inline': [ 

237 (r'\\.', Text), # escape 

238 (r'``', String, 'literal'), # code 

239 (r'(`.+?)(<.+?>)(`__?)', # reference with inline target 

240 bygroups(String, String.Interpol, String)), 

241 (r'`.+?`__?', String), # reference 

242 (r'(`.+?`)(:[a-zA-Z0-9:-]+?:)?', 

243 bygroups(Name.Variable, Name.Attribute)), # role 

244 (r'(:[a-zA-Z0-9:-]+?:)(`.+?`)', 

245 bygroups(Name.Attribute, Name.Variable)), # role (content first) 

246 (r'\*\*.+?\*\*', Generic.Strong), # Strong emphasis 

247 (r'\*.+?\*', Generic.Emph), # Emphasis 

248 (r'\[.*?\]_', String), # Footnote or citation 

249 (r'<.+?>', Name.Tag), # Hyperlink 

250 (r'[^\\\n\[*`:]+', Text), 

251 (r'.', Text), 

252 ], 

253 'literal': [ 

254 (r'[^`]+', String), 

255 (r'``' + end_string_suffix, String, '#pop'), 

256 (r'`', String), 

257 ] 

258 } 

259 

260 def __init__(self, **options): 

261 self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True) 

262 RegexLexer.__init__(self, **options) 

263 

264 def analyse_text(text): 

265 if text[:2] == '..' and text[2:3] != '.': 

266 return 0.3 

267 p1 = text.find("\n") 

268 p2 = text.find("\n", p1 + 1) 

269 if (p2 > -1 and # has two lines 

270 p1 * 2 + 1 == p2 and # they are the same length 

271 text[p1+1] in '-=' and # the next line both starts and ends with 

272 text[p1+1] == text[p2-1]): # ...a sufficiently high header 

273 return 0.5 

274 

275 

276class TexLexer(RegexLexer): 

277 """ 

278 Lexer for the TeX and LaTeX typesetting languages. 

279 """ 

280 

281 name = 'TeX' 

282 aliases = ['tex', 'latex'] 

283 filenames = ['*.tex', '*.aux', '*.toc'] 

284 mimetypes = ['text/x-tex', 'text/x-latex'] 

285 url = 'https://tug.org' 

286 version_added = '' 

287 

288 tokens = { 

289 'general': [ 

290 (r'%.*?\n', Comment), 

291 (r'[{}]', Name.Builtin), 

292 (r'[&_^]', Name.Builtin), 

293 ], 

294 'root': [ 

295 (r'\\\[', String.Backtick, 'displaymath'), 

296 (r'\\\(', String, 'inlinemath'), 

297 (r'\$\$', String.Backtick, 'displaymath'), 

298 (r'\$', String, 'inlinemath'), 

299 (r'\\([a-zA-Z@_:]+|\S?)', Keyword, 'command'), 

300 (r'\\$', Keyword), 

301 include('general'), 

302 (r'[^\\$%&_^{}]+', Text), 

303 ], 

304 'math': [ 

305 (r'\\([a-zA-Z]+|\S?)', Name.Variable), 

306 include('general'), 

307 (r'[0-9]+', Number), 

308 (r'[-=!+*/()\[\]]', Operator), 

309 (r'[^=!+*/()\[\]\\$%&_^{}0-9-]+', Name.Builtin), 

310 ], 

311 'inlinemath': [ 

312 (r'\\\)', String, '#pop'), 

313 (r'\$', String, '#pop'), 

314 include('math'), 

315 ], 

316 'displaymath': [ 

317 (r'\\\]', String, '#pop'), 

318 (r'\$\$', String, '#pop'), 

319 (r'\$', Name.Builtin), 

320 include('math'), 

321 ], 

322 'command': [ 

323 (r'\[.*?\]', Name.Attribute), 

324 (r'\*', Keyword), 

325 default('#pop'), 

326 ], 

327 } 

328 

329 def analyse_text(text): 

330 for start in ("\\documentclass", "\\input", "\\documentstyle", 

331 "\\relax"): 

332 if text[:len(start)] == start: 

333 return True 

334 

335 

336class GroffLexer(RegexLexer): 

337 """ 

338 Lexer for the (g)roff typesetting language, supporting groff 

339 extensions. Mainly useful for highlighting manpage sources. 

340 """ 

341 

342 name = 'Groff' 

343 aliases = ['groff', 'nroff', 'man'] 

344 filenames = ['*.[1-9]', '*.man', '*.1p', '*.3pm'] 

345 mimetypes = ['application/x-troff', 'text/troff'] 

346 url = 'https://www.gnu.org/software/groff' 

347 version_added = '0.6' 

348 

349 tokens = { 

350 'root': [ 

351 (r'(\.)(\w+)', bygroups(Text, Keyword), 'request'), 

352 (r'\.', Punctuation, 'request'), 

353 # Regular characters, slurp till we find a backslash or newline 

354 (r'[^\\\n]+', Text, 'textline'), 

355 default('textline'), 

356 ], 

357 'textline': [ 

358 include('escapes'), 

359 (r'[^\\\n]+', Text), 

360 (r'\n', Text, '#pop'), 

361 ], 

362 'escapes': [ 

363 # groff has many ways to write escapes. 

364 (r'\\"[^\n]*', Comment), 

365 (r'\\[fn]\w', String.Escape), 

366 (r'\\\(.{2}', String.Escape), 

367 (r'\\.\[.*\]', String.Escape), 

368 (r'\\.', String.Escape), 

369 (r'\\\n', Text, 'request'), 

370 ], 

371 'request': [ 

372 (r'\n', Text, '#pop'), 

373 include('escapes'), 

374 (r'"[^\n"]+"', String.Double), 

375 (r'\d+', Number), 

376 (r'\S+', String), 

377 (r'\s+', Text), 

378 ], 

379 } 

380 

381 def analyse_text(text): 

382 if text[:1] != '.': 

383 return False 

384 if text[:3] == '.\\"': 

385 return True 

386 if text[:4] == '.TH ': 

387 return True 

388 if text[1:3].isalnum() and text[3].isspace(): 

389 return 0.9 

390 

391 

392class MozPreprocHashLexer(RegexLexer): 

393 """ 

394 Lexer for Mozilla Preprocessor files (with '#' as the marker). 

395 

396 Other data is left untouched. 

397 """ 

398 name = 'mozhashpreproc' 

399 aliases = [name] 

400 filenames = [] 

401 mimetypes = [] 

402 url = 'https://firefox-source-docs.mozilla.org/build/buildsystem/preprocessor.html' 

403 version_added = '2.0' 

404 

405 tokens = { 

406 'root': [ 

407 (r'^#', Comment.Preproc, ('expr', 'exprstart')), 

408 (r'.+', Other), 

409 ], 

410 'exprstart': [ 

411 (r'(literal)(.*)', bygroups(Comment.Preproc, Text), '#pop:2'), 

412 (words(( 

413 'define', 'undef', 'if', 'ifdef', 'ifndef', 'else', 'elif', 

414 'elifdef', 'elifndef', 'endif', 'expand', 'filter', 'unfilter', 

415 'include', 'includesubst', 'error')), 

416 Comment.Preproc, '#pop'), 

417 ], 

418 'expr': [ 

419 (words(('!', '!=', '==', '&&', '||')), Operator), 

420 (r'(defined)(\()', bygroups(Keyword, Punctuation)), 

421 (r'\)', Punctuation), 

422 (r'[0-9]+', Number.Decimal), 

423 (r'__\w+?__', Name.Variable), 

424 (r'@\w+?@', Name.Class), 

425 (r'\w+', Name), 

426 (r'\n', Text, '#pop'), 

427 (r'\s+', Text), 

428 (r'\S', Punctuation), 

429 ], 

430 } 

431 

432 

433class MozPreprocPercentLexer(MozPreprocHashLexer): 

434 """ 

435 Lexer for Mozilla Preprocessor files (with '%' as the marker). 

436 

437 Other data is left untouched. 

438 """ 

439 name = 'mozpercentpreproc' 

440 aliases = [name] 

441 filenames = [] 

442 mimetypes = [] 

443 url = 'https://firefox-source-docs.mozilla.org/build/buildsystem/preprocessor.html' 

444 version_added = '2.0' 

445 

446 tokens = { 

447 'root': [ 

448 (r'^%', Comment.Preproc, ('expr', 'exprstart')), 

449 (r'.+', Other), 

450 ], 

451 } 

452 

453 

454class MozPreprocXulLexer(DelegatingLexer): 

455 """ 

456 Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the 

457 `XmlLexer`. 

458 """ 

459 name = "XUL+mozpreproc" 

460 aliases = ['xul+mozpreproc'] 

461 filenames = ['*.xul.in'] 

462 mimetypes = [] 

463 url = 'https://firefox-source-docs.mozilla.org/build/buildsystem/preprocessor.html' 

464 version_added = '2.0' 

465 

466 def __init__(self, **options): 

467 super().__init__(XmlLexer, MozPreprocHashLexer, **options) 

468 

469 

470class MozPreprocJavascriptLexer(DelegatingLexer): 

471 """ 

472 Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the 

473 `JavascriptLexer`. 

474 """ 

475 name = "Javascript+mozpreproc" 

476 aliases = ['javascript+mozpreproc'] 

477 filenames = ['*.js.in'] 

478 mimetypes = [] 

479 url = 'https://firefox-source-docs.mozilla.org/build/buildsystem/preprocessor.html' 

480 version_added = '2.0' 

481 

482 def __init__(self, **options): 

483 super().__init__(JavascriptLexer, MozPreprocHashLexer, **options) 

484 

485 

486class MozPreprocCssLexer(DelegatingLexer): 

487 """ 

488 Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the 

489 `CssLexer`. 

490 """ 

491 name = "CSS+mozpreproc" 

492 aliases = ['css+mozpreproc'] 

493 filenames = ['*.css.in'] 

494 mimetypes = [] 

495 url = 'https://firefox-source-docs.mozilla.org/build/buildsystem/preprocessor.html' 

496 version_added = '2.0' 

497 

498 def __init__(self, **options): 

499 super().__init__(CssLexer, MozPreprocPercentLexer, **options) 

500 

501 

502class MarkdownLexer(RegexLexer): 

503 """ 

504 For Markdown markup. 

505 """ 

506 name = 'Markdown' 

507 url = 'https://daringfireball.net/projects/markdown/' 

508 aliases = ['markdown', 'md'] 

509 filenames = ['*.md', '*.markdown'] 

510 mimetypes = ["text/x-markdown"] 

511 version_added = '2.2' 

512 flags = re.MULTILINE 

513 

514 def _handle_codeblock(self, match): 

515 from pygments.lexers import get_lexer_by_name 

516 

517 yield match.start('initial'), String.Backtick, match.group('initial') 

518 yield match.start('lang'), String.Backtick, match.group('lang') 

519 if match.group('afterlang') is not None: 

520 yield match.start('whitespace'), Whitespace, match.group('whitespace') 

521 yield match.start('extra'), Text, match.group('extra') 

522 yield match.start('newline'), Whitespace, match.group('newline') 

523 

524 # lookup lexer if wanted and existing 

525 lexer = None 

526 if self.handlecodeblocks: 

527 try: 

528 lexer = get_lexer_by_name(match.group('lang').strip()) 

529 except ClassNotFound: 

530 pass 

531 code = match.group('code') 

532 # no lexer for this language. handle it like it was a code block 

533 if lexer is None: 

534 yield match.start('code'), String, code 

535 else: 

536 # FIXME: aren't the offsets wrong? 

537 yield from do_insertions([], lexer.get_tokens_unprocessed(code)) 

538 

539 yield match.start('terminator'), String.Backtick, match.group('terminator') 

540 

541 tokens = { 

542 'root': [ 

543 # heading with '#' prefix (atx-style) 

544 (r'(^#[^#].+)(\n)', bygroups(Generic.Heading, Text)), 

545 # subheading with '#' prefix (atx-style) 

546 (r'(^#{2,6}[^#].+)(\n)', bygroups(Generic.Subheading, Text)), 

547 # heading with '=' underlines (Setext-style) 

548 (r'^(.+)(\n)(=+)(\n)', bygroups(Generic.Heading, Text, Generic.Heading, Text)), 

549 # subheading with '-' underlines (Setext-style) 

550 (r'^(.+)(\n)(-+)(\n)', bygroups(Generic.Subheading, Text, Generic.Subheading, Text)), 

551 # task list 

552 (r'^(\s*)([*-] )(\[[ xX]\])( .+\n)', 

553 bygroups(Whitespace, Keyword, Keyword, using(this, state='inline'))), 

554 # bulleted list 

555 (r'^(\s*)([*-])(\s)(.+\n)', 

556 bygroups(Whitespace, Keyword, Whitespace, using(this, state='inline'))), 

557 # numbered list 

558 (r'^(\s*)([0-9]+\.)( .+\n)', 

559 bygroups(Whitespace, Keyword, using(this, state='inline'))), 

560 # quote 

561 (r'^(\s*>\s)(.+\n)', bygroups(Keyword, Generic.Emph)), 

562 # code block fenced by 3 backticks 

563 (r'^(\s*```\n[\w\W]*?^\s*```$\n)', String.Backtick), 

564 # code block with language 

565 # Some tools include extra stuff after the language name, just 

566 # highlight that as text. For example: https://docs.enola.dev/use/execmd 

567 (r'''(?x) 

568 ^(?P<initial>\s*```) 

569 (?P<lang>[\w\-]+) 

570 (?P<afterlang> 

571 (?P<whitespace>[^\S\n]+) 

572 (?P<extra>.*))? 

573 (?P<newline>\n) 

574 (?P<code>(.|\n)*?) 

575 (?P<terminator>^\s*```$\n) 

576 ''', 

577 _handle_codeblock), 

578 

579 include('inline'), 

580 ], 

581 'inline': [ 

582 # escape 

583 (r'\\.', Text), 

584 # inline code 

585 (r'([^`]?)(`[^`\n]+`)', bygroups(Text, String.Backtick)), 

586 # warning: the following rules eat outer tags. 

587 # eg. **foo _bar_ baz** => foo and baz are not recognized as bold 

588 # bold fenced by '**' 

589 (r'([^\*]?)(\*\*[^* \n][^*\n]*\*\*)', bygroups(Text, Generic.Strong)), 

590 # bold fenced by '__' 

591 (r'([^_]?)(__[^_ \n][^_\n]*__)', bygroups(Text, Generic.Strong)), 

592 # italics fenced by '*' 

593 (r'([^\*]?)(\*[^* \n][^*\n]*\*)', bygroups(Text, Generic.Emph)), 

594 # italics fenced by '_' 

595 (r'([^_]?)(_[^_ \n][^_\n]*_)', bygroups(Text, Generic.Emph)), 

596 # strikethrough 

597 (r'([^~]?)(~~[^~ \n][^~\n]*~~)', bygroups(Text, Generic.Deleted)), 

598 # mentions and topics (twitter and github stuff) 

599 (r'[@#][\w/:]+', Name.Entity), 

600 # (image?) links eg: ![Image of Yaktocat](https://octodex.github.com/images/yaktocat.png) 

601 (r'(!?\[)([^]]+)(\])(\()([^)]+)(\))', 

602 bygroups(Text, Name.Tag, Text, Text, Name.Attribute, Text)), 

603 # reference-style links, e.g.: 

604 # [an example][id] 

605 # [id]: http://example.com/ 

606 (r'(\[)([^]]+)(\])(\[)([^]]*)(\])', 

607 bygroups(Text, Name.Tag, Text, Text, Name.Label, Text)), 

608 (r'^(\s*\[)([^]]*)(\]:\s*)(.+)', 

609 bygroups(Text, Name.Label, Text, Name.Attribute)), 

610 

611 # general text, must come last! 

612 (r'[^\\\s]+', Text), 

613 (r'.', Text), 

614 ], 

615 } 

616 

617 def __init__(self, **options): 

618 self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True) 

619 RegexLexer.__init__(self, **options) 

620 

621class OrgLexer(RegexLexer): 

622 """ 

623 For Org Mode markup. 

624 """ 

625 name = 'Org Mode' 

626 url = 'https://orgmode.org' 

627 aliases = ['org', 'orgmode', 'org-mode'] 

628 filenames = ['*.org'] 

629 mimetypes = ["text/org"] 

630 version_added = '2.18' 

631 

632 def _inline(start, end): 

633 return rf'(?<!\w){start}(.|\n(?!\n))+?{end}(?!\w)' 

634 

635 tokens = { 

636 'root': [ 

637 (r'^# .*', Comment.Single), 

638 

639 # Headings 

640 (r'^(\* )(COMMENT)( .*)', 

641 bygroups(Generic.Heading, Comment.Preproc, Generic.Heading)), 

642 (r'^(\*\*+ )(COMMENT)( .*)', 

643 bygroups(Generic.Subheading, Comment.Preproc, Generic.Subheading)), 

644 (r'^(\* )(DONE)( .*)', 

645 bygroups(Generic.Heading, Generic.Deleted, Generic.Heading)), 

646 (r'^(\*\*+ )(DONE)( .*)', 

647 bygroups(Generic.Subheading, Generic.Deleted, Generic.Subheading)), 

648 (r'^(\* )(TODO)( .*)', 

649 bygroups(Generic.Heading, Generic.Error, Generic.Heading)), 

650 (r'^(\*\*+ )(TODO)( .*)', 

651 bygroups(Generic.Subheading, Generic.Error, Generic.Subheading)), 

652 

653 (r'^(\* .+?)( :[a-zA-Z0-9_@:]+:)?$', bygroups(Generic.Heading, Generic.Emph)), 

654 (r'^(\*\*+ .+?)( :[a-zA-Z0-9_@:]+:)?$', bygroups(Generic.Subheading, Generic.Emph)), 

655 

656 # Unordered lists items, including TODO items and description items 

657 (r'^(?:( *)([+-] )|( +)(\* ))(\[[ X-]\])?(.+ ::)?', 

658 bygroups(Whitespace, Keyword, Whitespace, Keyword, Generic.Prompt, Name.Label)), 

659 

660 # Ordered list items 

661 (r'^( *)([0-9]+[.)])( \[@[0-9]+\])?', bygroups(Whitespace, Keyword, Generic.Emph)), 

662 

663 # Dynamic blocks 

664 (r'(?i)^( *#\+begin: *)((?:.|\n)*?)(^ *#\+end: *$)', 

665 bygroups(Operator.Word, using(this), Operator.Word)), 

666 

667 # Comment blocks 

668 (r'(?i)^( *#\+begin_comment *\n)((?:.|\n)*?)(^ *#\+end_comment *$)', 

669 bygroups(Operator.Word, Comment.Multiline, Operator.Word)), 

670 

671 # Source code blocks 

672 # TODO: language-dependent syntax highlighting (see Markdown lexer) 

673 (r'(?i)^( *#\+begin_src .*)((?:.|\n)*?)(^ *#\+end_src *$)', 

674 bygroups(Operator.Word, Text, Operator.Word)), 

675 

676 # Other blocks 

677 (r'(?i)^( *#\+begin_\w+)( *\n)((?:.|\n)*?)(^ *#\+end_\w+)( *$)', 

678 bygroups(Operator.Word, Whitespace, Text, Operator.Word, Whitespace)), 

679 

680 # Keywords 

681 (r'^(#\+\w+:)(.*)$', bygroups(Name.Namespace, Text)), 

682 

683 # Properties and drawers 

684 (r'(?i)^( *:\w+: *\n)((?:.|\n)*?)(^ *:end: *$)', 

685 bygroups(Name.Decorator, Comment.Special, Name.Decorator)), 

686 

687 # Line break operator 

688 (r'\\\\$', Operator), 

689 

690 # Deadline, Scheduled, CLOSED 

691 (r'(?i)^( *(?:DEADLINE|SCHEDULED): )(<.+?> *)$', 

692 bygroups(Generic.Error, Literal.Date)), 

693 (r'(?i)^( *CLOSED: )(\[.+?\] *)$', 

694 bygroups(Generic.Deleted, Literal.Date)), 

695 

696 # Bold 

697 (_inline(r'\*', r'\*+'), Generic.Strong), 

698 # Italic 

699 (_inline(r'/', r'/'), Generic.Emph), 

700 # Verbatim 

701 (_inline(r'=', r'='), String), # TODO token 

702 # Code 

703 (_inline(r'~', r'~'), String), 

704 # Strikethrough 

705 (_inline(r'\+', r'\+'), Generic.Deleted), 

706 # Underline 

707 (_inline(r'_', r'_+'), Generic.EmphStrong), 

708 

709 # Dates 

710 (r'<.+?>', Literal.Date), 

711 # Macros 

712 (r'\{\{\{.+?\}\}\}', Comment.Preproc), 

713 # Footnotes 

714 (r'(?<!\[)\[fn:.+?\]', Name.Tag), 

715 # Links 

716 (r'(?s)(\[\[)(.*?)(\]\[)(.*?)(\]\])', 

717 bygroups(Punctuation, Name.Attribute, Punctuation, Name.Tag, Punctuation)), 

718 (r'(?s)(\[\[)(.+?)(\]\])', bygroups(Punctuation, Name.Attribute, Punctuation)), 

719 (r'(<<)(.+?)(>>)', bygroups(Punctuation, Name.Attribute, Punctuation)), 

720 

721 # Tables 

722 (r'^( *)(\|[ -].*?[ -]\|)$', bygroups(Whitespace, String)), 

723 

724 # Any other text 

725 (r'[^#*+\-0-9:\\/=~_<{\[|\n]+', Text), 

726 (r'[#*+\-0-9:\\/=~_<{\[|\n]', Text), 

727 ], 

728 } 

729 

730class TiddlyWiki5Lexer(RegexLexer): 

731 """ 

732 For TiddlyWiki5 markup. 

733 """ 

734 name = 'tiddler' 

735 url = 'https://tiddlywiki.com/#TiddlerFiles' 

736 aliases = ['tid'] 

737 filenames = ['*.tid'] 

738 mimetypes = ["text/vnd.tiddlywiki"] 

739 version_added = '2.7' 

740 flags = re.MULTILINE 

741 

742 def _handle_codeblock(self, match): 

743 """ 

744 match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks 

745 """ 

746 from pygments.lexers import get_lexer_by_name 

747 

748 # section header 

749 yield match.start(1), String, match.group(1) 

750 yield match.start(2), String, match.group(2) 

751 yield match.start(3), Text, match.group(3) 

752 

753 # lookup lexer if wanted and existing 

754 lexer = None 

755 if self.handlecodeblocks: 

756 try: 

757 lexer = get_lexer_by_name(match.group(2).strip()) 

758 except ClassNotFound: 

759 pass 

760 code = match.group(4) 

761 

762 # no lexer for this language. handle it like it was a code block 

763 if lexer is None: 

764 yield match.start(4), String, code 

765 return 

766 

767 yield from do_insertions([], lexer.get_tokens_unprocessed(code)) 

768 

769 yield match.start(5), String, match.group(5) 

770 

771 def _handle_cssblock(self, match): 

772 """ 

773 match args: 1:style tag 2:newline, 3:code, 4:closing style tag 

774 """ 

775 from pygments.lexers import get_lexer_by_name 

776 

777 # section header 

778 yield match.start(1), String, match.group(1) 

779 yield match.start(2), String, match.group(2) 

780 

781 lexer = None 

782 if self.handlecodeblocks: 

783 try: 

784 lexer = get_lexer_by_name('css') 

785 except ClassNotFound: 

786 pass 

787 code = match.group(3) 

788 

789 # no lexer for this language. handle it like it was a code block 

790 if lexer is None: 

791 yield match.start(3), String, code 

792 return 

793 

794 yield from do_insertions([], lexer.get_tokens_unprocessed(code)) 

795 

796 yield match.start(4), String, match.group(4) 

797 

798 tokens = { 

799 'root': [ 

800 # title in metadata section 

801 (r'^(title)(:\s)(.+\n)', bygroups(Keyword, Text, Generic.Heading)), 

802 # headings 

803 (r'^(!)([^!].+\n)', bygroups(Generic.Heading, Text)), 

804 (r'^(!{2,6})(.+\n)', bygroups(Generic.Subheading, Text)), 

805 # bulleted or numbered lists or single-line block quotes 

806 # (can be mixed) 

807 (r'^(\s*)([*#>]+)(\s*)(.+\n)', 

808 bygroups(Text, Keyword, Text, using(this, state='inline'))), 

809 # multi-line block quotes 

810 (r'^(<<<.*\n)([\w\W]*?)(^<<<.*$)', bygroups(String, Text, String)), 

811 # table header 

812 (r'^(\|.*?\|h)$', bygroups(Generic.Strong)), 

813 # table footer or caption 

814 (r'^(\|.*?\|[cf])$', bygroups(Generic.Emph)), 

815 # table class 

816 (r'^(\|.*?\|k)$', bygroups(Name.Tag)), 

817 # definitions 

818 (r'^(;.*)$', bygroups(Generic.Strong)), 

819 # text block 

820 (r'^(```\n)([\w\W]*?)(^```$)', bygroups(String, Text, String)), 

821 # code block with language 

822 (r'^(```)(\w+)(\n)([\w\W]*?)(^```$)', _handle_codeblock), 

823 # CSS style block 

824 (r'^(<style>)(\n)([\w\W]*?)(^</style>$)', _handle_cssblock), 

825 

826 include('keywords'), 

827 include('inline'), 

828 ], 

829 'keywords': [ 

830 (words(( 

831 '\\define', '\\end', 'caption', 'created', 'modified', 'tags', 

832 'title', 'type'), prefix=r'^', suffix=r'\b'), 

833 Keyword), 

834 ], 

835 'inline': [ 

836 # escape 

837 (r'\\.', Text), 

838 # created or modified date 

839 (r'\d{17}', Number.Integer), 

840 # italics 

841 (r'(\s)(//[^/]+//)((?=\W|\n))', 

842 bygroups(Text, Generic.Emph, Text)), 

843 # superscript 

844 (r'(\s)(\^\^[^\^]+\^\^)', bygroups(Text, Generic.Emph)), 

845 # subscript 

846 (r'(\s)(,,[^,]+,,)', bygroups(Text, Generic.Emph)), 

847 # underscore 

848 (r'(\s)(__[^_]+__)', bygroups(Text, Generic.Strong)), 

849 # bold 

850 (r"(\s)(''[^']+'')((?=\W|\n))", 

851 bygroups(Text, Generic.Strong, Text)), 

852 # strikethrough 

853 (r'(\s)(~~[^~]+~~)((?=\W|\n))', 

854 bygroups(Text, Generic.Deleted, Text)), 

855 # TiddlyWiki variables 

856 (r'<<[^>]+>>', Name.Tag), 

857 (r'\$\$[^$]+\$\$', Name.Tag), 

858 (r'\$\([^)]+\)\$', Name.Tag), 

859 # TiddlyWiki style or class 

860 (r'^@@.*$', Name.Tag), 

861 # HTML tags 

862 (r'</?[^>]+>', Name.Tag), 

863 # inline code 

864 (r'`[^`]+`', String.Backtick), 

865 # HTML escaped symbols 

866 (r'&\S*?;', String.Regex), 

867 # Wiki links 

868 (r'(\[{2})([^]\|]+)(\]{2})', bygroups(Text, Name.Tag, Text)), 

869 # External links 

870 (r'(\[{2})([^]\|]+)(\|)([^]\|]+)(\]{2})', 

871 bygroups(Text, Name.Tag, Text, Name.Attribute, Text)), 

872 # Transclusion 

873 (r'(\{{2})([^}]+)(\}{2})', bygroups(Text, Name.Tag, Text)), 

874 # URLs 

875 (r'(\b.?.?tps?://[^\s"]+)', bygroups(Name.Attribute)), 

876 

877 # general text, must come last! 

878 (r'[\w]+', Text), 

879 (r'.', Text) 

880 ], 

881 } 

882 

883 def __init__(self, **options): 

884 self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True) 

885 RegexLexer.__init__(self, **options) 

886 

887 

888class WikitextLexer(RegexLexer): 

889 """ 

890 For MediaWiki Wikitext. 

891 

892 Parsing Wikitext is tricky, and results vary between different MediaWiki 

893 installations, so we only highlight common syntaxes (built-in or from 

894 popular extensions), and also assume templates produce no unbalanced 

895 syntaxes. 

896 """ 

897 name = 'Wikitext' 

898 url = 'https://www.mediawiki.org/wiki/Wikitext' 

899 aliases = ['wikitext', 'mediawiki'] 

900 filenames = [] 

901 mimetypes = ['text/x-wiki'] 

902 version_added = '2.15' 

903 flags = re.MULTILINE 

904 

905 def nowiki_tag_rules(tag_name): 

906 return [ 

907 (rf'(?i)(</)({tag_name})(\s*)(>)', bygroups(Punctuation, 

908 Name.Tag, Whitespace, Punctuation), '#pop'), 

909 include('entity'), 

910 include('text'), 

911 ] 

912 

913 def plaintext_tag_rules(tag_name): 

914 return [ 

915 (rf'(?si)(.*?)(</)({tag_name})(\s*)(>)', bygroups(Text, 

916 Punctuation, Name.Tag, Whitespace, Punctuation), '#pop'), 

917 ] 

918 

919 def delegate_tag_rules(tag_name, lexer, **lexer_kwargs): 

920 return [ 

921 (rf'(?i)(</)({tag_name})(\s*)(>)', bygroups(Punctuation, 

922 Name.Tag, Whitespace, Punctuation), '#pop'), 

923 (rf'(?si).+?(?=</{tag_name}\s*>)', using(lexer, **lexer_kwargs)), 

924 ] 

925 

926 def text_rules(token): 

927 return [ 

928 (r'\w+', token), 

929 (r'[^\S\n]+', token), 

930 (r'(?s).', token), 

931 ] 

932 

933 def handle_syntaxhighlight(self, match, ctx): 

934 from pygments.lexers import get_lexer_by_name 

935 

936 attr_content = match.group() 

937 start = 0 

938 index = 0 

939 while True: 

940 index = attr_content.find('>', start) 

941 # Exclude comment end (-->) 

942 if attr_content[index-2:index] != '--': 

943 break 

944 start = index + 1 

945 

946 if index == -1: 

947 # No tag end 

948 yield from self.get_tokens_unprocessed(attr_content, stack=['root', 'attr']) 

949 return 

950 attr = attr_content[:index] 

951 yield from self.get_tokens_unprocessed(attr, stack=['root', 'attr']) 

952 yield match.start(3) + index, Punctuation, '>' 

953 

954 lexer = None 

955 content = attr_content[index+1:] 

956 lang_match = re.findall(r'\blang=("|\'|)(\w+)(\1)', attr) 

957 

958 if len(lang_match) >= 1: 

959 # Pick the last match in case of multiple matches 

960 lang = lang_match[-1][1] 

961 try: 

962 lexer = get_lexer_by_name(lang) 

963 except ClassNotFound: 

964 pass 

965 

966 if lexer is None: 

967 yield match.start() + index + 1, Text, content 

968 else: 

969 yield from lexer.get_tokens_unprocessed(content) 

970 

971 def handle_score(self, match, ctx): 

972 attr_content = match.group() 

973 start = 0 

974 index = 0 

975 while True: 

976 index = attr_content.find('>', start) 

977 # Exclude comment end (-->) 

978 if attr_content[index-2:index] != '--': 

979 break 

980 start = index + 1 

981 

982 if index == -1: 

983 # No tag end 

984 yield from self.get_tokens_unprocessed(attr_content, stack=['root', 'attr']) 

985 return 

986 attr = attr_content[:index] 

987 content = attr_content[index+1:] 

988 yield from self.get_tokens_unprocessed(attr, stack=['root', 'attr']) 

989 yield match.start(3) + index, Punctuation, '>' 

990 

991 lang_match = re.findall(r'\blang=("|\'|)(\w+)(\1)', attr) 

992 # Pick the last match in case of multiple matches 

993 lang = lang_match[-1][1] if len(lang_match) >= 1 else 'lilypond' 

994 

995 if lang == 'lilypond': # Case sensitive 

996 yield from LilyPondLexer().get_tokens_unprocessed(content) 

997 else: # ABC 

998 # FIXME: Use ABC lexer in the future 

999 yield match.start() + index + 1, Text, content 

1000 

1001 # a-z removed to prevent linter from complaining, REMEMBER to use (?i) 

1002 title_char = r' %!"$&\'()*,\-./0-9:;=?@A-Z\\\^_`~+\u0080-\uFFFF' 

1003 nbsp_char = r'(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|[ \xA0\u1680\u2000-\u200A\u202F\u205F\u3000])' 

1004 link_address = r'(?:[0-9.]+|\[[0-9a-f:.]+\]|[^\x00-\x20"<>\[\]\x7F\xA0\u1680\u2000-\u200A\u202F\u205F\u3000\uFFFD])' 

1005 link_char_class = r'[^\x00-\x20"<>\[\]\x7F\xA0\u1680\u2000-\u200A\u202F\u205F\u3000\uFFFD]' 

1006 double_slashes_i = { 

1007 '__FORCETOC__', '__NOCONTENTCONVERT__', '__NOCC__', '__NOEDITSECTION__', '__NOGALLERY__', 

1008 '__NOTITLECONVERT__', '__NOTC__', '__NOTOC__', '__TOC__', 

1009 } 

1010 double_slashes = { 

1011 '__EXPECTUNUSEDCATEGORY__', '__HIDDENCAT__', '__INDEX__', '__NEWSECTIONLINK__', 

1012 '__NOINDEX__', '__NONEWSECTIONLINK__', '__STATICREDIRECT__', '__NOGLOBAL__', 

1013 '__DISAMBIG__', '__EXPECTED_UNCONNECTED_PAGE__', 

1014 } 

1015 protocols = { 

1016 'bitcoin:', 'ftp://', 'ftps://', 'geo:', 'git://', 'gopher://', 'http://', 'https://', 

1017 'irc://', 'ircs://', 'magnet:', 'mailto:', 'mms://', 'news:', 'nntp://', 'redis://', 

1018 'sftp://', 'sip:', 'sips:', 'sms:', 'ssh://', 'svn://', 'tel:', 'telnet://', 'urn:', 

1019 'worldwind://', 'xmpp:', '//', 

1020 } 

1021 non_relative_protocols = protocols - {'//'} 

1022 html_tags = { 

1023 'abbr', 'b', 'bdi', 'bdo', 'big', 'blockquote', 'br', 'caption', 'center', 'cite', 'code', 

1024 'data', 'dd', 'del', 'dfn', 'div', 'dl', 'dt', 'em', 'font', 'h1', 'h2', 'h3', 'h4', 'h5', 

1025 'h6', 'hr', 'i', 'ins', 'kbd', 'li', 'link', 'mark', 'meta', 'ol', 'p', 'q', 'rb', 'rp', 

1026 'rt', 'rtc', 'ruby', 's', 'samp', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 

1027 'table', 'td', 'th', 'time', 'tr', 'tt', 'u', 'ul', 'var', 'wbr', 

1028 } 

1029 parser_tags = { 

1030 'graph', 'charinsert', 'rss', 'chem', 'categorytree', 'nowiki', 'inputbox', 'math', 

1031 'hiero', 'score', 'pre', 'ref', 'translate', 'imagemap', 'templatestyles', 'languages', 

1032 'noinclude', 'mapframe', 'section', 'poem', 'syntaxhighlight', 'includeonly', 'tvar', 

1033 'onlyinclude', 'templatedata', 'langconvert', 'timeline', 'dynamicpagelist', 'gallery', 

1034 'maplink', 'ce', 'references', 

1035 } 

1036 variant_langs = { 

1037 # ZhConverter.php 

1038 'zh', 'zh-hans', 'zh-hant', 'zh-cn', 'zh-hk', 'zh-mo', 'zh-my', 'zh-sg', 'zh-tw', 

1039 # WuuConverter.php 

1040 'wuu', 'wuu-hans', 'wuu-hant', 

1041 # UzConverter.php 

1042 'uz', 'uz-latn', 'uz-cyrl', 

1043 # TlyConverter.php 

1044 'tly', 'tly-cyrl', 

1045 # TgConverter.php 

1046 'tg', 'tg-latn', 

1047 # SrConverter.php 

1048 'sr', 'sr-ec', 'sr-el', 

1049 # ShiConverter.php 

1050 'shi', 'shi-tfng', 'shi-latn', 

1051 # ShConverter.php 

1052 'sh-latn', 'sh-cyrl', 

1053 # KuConverter.php 

1054 'ku', 'ku-arab', 'ku-latn', 

1055 # IuConverter.php 

1056 'iu', 'ike-cans', 'ike-latn', 

1057 # GanConverter.php 

1058 'gan', 'gan-hans', 'gan-hant', 

1059 # EnConverter.php 

1060 'en', 'en-x-piglatin', 

1061 # CrhConverter.php 

1062 'crh', 'crh-cyrl', 'crh-latn', 

1063 # BanConverter.php 

1064 'ban', 'ban-bali', 'ban-x-dharma', 'ban-x-palmleaf', 'ban-x-pku', 

1065 } 

1066 magic_vars_i = { 

1067 'ARTICLEPATH', 'INT', 'PAGEID', 'SCRIPTPATH', 'SERVER', 'SERVERNAME', 'STYLEPATH', 

1068 } 

1069 magic_vars = { 

1070 '!', '=', 'BASEPAGENAME', 'BASEPAGENAMEE', 'CASCADINGSOURCES', 'CONTENTLANGUAGE', 

1071 'CONTENTLANG', 'CURRENTDAY', 'CURRENTDAY2', 'CURRENTDAYNAME', 'CURRENTDOW', 'CURRENTHOUR', 

1072 'CURRENTMONTH', 'CURRENTMONTH2', 'CURRENTMONTH1', 'CURRENTMONTHABBREV', 'CURRENTMONTHNAME', 

1073 'CURRENTMONTHNAMEGEN', 'CURRENTTIME', 'CURRENTTIMESTAMP', 'CURRENTVERSION', 'CURRENTWEEK', 

1074 'CURRENTYEAR', 'DIRECTIONMARK', 'DIRMARK', 'FULLPAGENAME', 'FULLPAGENAMEE', 'LOCALDAY', 

1075 'LOCALDAY2', 'LOCALDAYNAME', 'LOCALDOW', 'LOCALHOUR', 'LOCALMONTH', 'LOCALMONTH2', 

1076 'LOCALMONTH1', 'LOCALMONTHABBREV', 'LOCALMONTHNAME', 'LOCALMONTHNAMEGEN', 'LOCALTIME', 

1077 'LOCALTIMESTAMP', 'LOCALWEEK', 'LOCALYEAR', 'NAMESPACE', 'NAMESPACEE', 'NAMESPACENUMBER', 

1078 'NUMBEROFACTIVEUSERS', 'NUMBEROFADMINS', 'NUMBEROFARTICLES', 'NUMBEROFEDITS', 

1079 'NUMBEROFFILES', 'NUMBEROFPAGES', 'NUMBEROFUSERS', 'PAGELANGUAGE', 'PAGENAME', 'PAGENAMEE', 

1080 'REVISIONDAY', 'REVISIONDAY2', 'REVISIONID', 'REVISIONMONTH', 'REVISIONMONTH1', 

1081 'REVISIONSIZE', 'REVISIONTIMESTAMP', 'REVISIONUSER', 'REVISIONYEAR', 'ROOTPAGENAME', 

1082 'ROOTPAGENAMEE', 'SITENAME', 'SUBJECTPAGENAME', 'ARTICLEPAGENAME', 'SUBJECTPAGENAMEE', 

1083 'ARTICLEPAGENAMEE', 'SUBJECTSPACE', 'ARTICLESPACE', 'SUBJECTSPACEE', 'ARTICLESPACEE', 

1084 'SUBPAGENAME', 'SUBPAGENAMEE', 'TALKPAGENAME', 'TALKPAGENAMEE', 'TALKSPACE', 'TALKSPACEE', 

1085 } 

1086 parser_functions_i = { 

1087 'ANCHORENCODE', 'BIDI', 'CANONICALURL', 'CANONICALURLE', 'FILEPATH', 'FORMATNUM', 

1088 'FULLURL', 'FULLURLE', 'GENDER', 'GRAMMAR', 'INT', r'\#LANGUAGE', 'LC', 'LCFIRST', 'LOCALURL', 

1089 'LOCALURLE', 'NS', 'NSE', 'PADLEFT', 'PADRIGHT', 'PAGEID', 'PLURAL', 'UC', 'UCFIRST', 

1090 'URLENCODE', 

1091 } 

1092 parser_functions = { 

1093 'BASEPAGENAME', 'BASEPAGENAMEE', 'CASCADINGSOURCES', 'DEFAULTSORT', 'DEFAULTSORTKEY', 

1094 'DEFAULTCATEGORYSORT', 'FULLPAGENAME', 'FULLPAGENAMEE', 'NAMESPACE', 'NAMESPACEE', 

1095 'NAMESPACENUMBER', 'NUMBERINGROUP', 'NUMINGROUP', 'NUMBEROFACTIVEUSERS', 'NUMBEROFADMINS', 

1096 'NUMBEROFARTICLES', 'NUMBEROFEDITS', 'NUMBEROFFILES', 'NUMBEROFPAGES', 'NUMBEROFUSERS', 

1097 'PAGENAME', 'PAGENAMEE', 'PAGESINCATEGORY', 'PAGESINCAT', 'PAGESIZE', 'PROTECTIONEXPIRY', 

1098 'PROTECTIONLEVEL', 'REVISIONDAY', 'REVISIONDAY2', 'REVISIONID', 'REVISIONMONTH', 

1099 'REVISIONMONTH1', 'REVISIONTIMESTAMP', 'REVISIONUSER', 'REVISIONYEAR', 'ROOTPAGENAME', 

1100 'ROOTPAGENAMEE', 'SUBJECTPAGENAME', 'ARTICLEPAGENAME', 'SUBJECTPAGENAMEE', 

1101 'ARTICLEPAGENAMEE', 'SUBJECTSPACE', 'ARTICLESPACE', 'SUBJECTSPACEE', 'ARTICLESPACEE', 

1102 'SUBPAGENAME', 'SUBPAGENAMEE', 'TALKPAGENAME', 'TALKPAGENAMEE', 'TALKSPACE', 'TALKSPACEE', 

1103 'INT', 'DISPLAYTITLE', 'PAGESINNAMESPACE', 'PAGESINNS', 

1104 } 

1105 

1106 tokens = { 

1107 'root': [ 

1108 # Redirects 

1109 (r"""(?xi) 

1110 (\A\s*?)(\#REDIRECT:?) # may contain a colon 

1111 (\s+)(\[\[) (?=[^\]\n]* \]\]$) 

1112 """, 

1113 bygroups(Whitespace, Keyword, Whitespace, Punctuation), 'redirect-inner'), 

1114 # Subheadings 

1115 (r'^(={2,6})(.+?)(\1)(\s*$\n)', 

1116 bygroups(Generic.Subheading, Generic.Subheading, Generic.Subheading, Whitespace)), 

1117 # Headings 

1118 (r'^(=.+?=)(\s*$\n)', 

1119 bygroups(Generic.Heading, Whitespace)), 

1120 # Double-slashed magic words 

1121 (words(double_slashes_i, prefix=r'(?i)'), Name.Function.Magic), 

1122 (words(double_slashes), Name.Function.Magic), 

1123 # Raw URLs 

1124 (r'(?i)\b(?:{}){}{}*'.format('|'.join(protocols), 

1125 link_address, link_char_class), Name.Label), 

1126 # Magic links 

1127 (rf'\b(?:RFC|PMID){nbsp_char}+[0-9]+\b', 

1128 Name.Function.Magic), 

1129 (r"""(?x) 

1130 \bISBN {nbsp_char} 

1131 (?: 97[89] {nbsp_dash}? )? 

1132 (?: [0-9] {nbsp_dash}? ){{9}} # escape format() 

1133 [0-9Xx]\b 

1134 """.format(nbsp_char=nbsp_char, nbsp_dash=f'(?:-|{nbsp_char})'), Name.Function.Magic), 

1135 include('list'), 

1136 include('inline'), 

1137 include('text'), 

1138 ], 

1139 'redirect-inner': [ 

1140 (r'(\]\])(\s*?\n)', bygroups(Punctuation, Whitespace), '#pop'), 

1141 (r'(\#)([^#]*?)', bygroups(Punctuation, Name.Label)), 

1142 (rf'(?i)[{title_char}]+', Name.Tag), 

1143 ], 

1144 'list': [ 

1145 # Description lists 

1146 (r'^;', Keyword, 'dt'), 

1147 # Ordered lists, unordered lists and indents 

1148 (r'^[#:*]+', Keyword), 

1149 # Horizontal rules 

1150 (r'^-{4,}', Keyword), 

1151 ], 

1152 'inline': [ 

1153 # Signatures 

1154 (r'~{3,5}', Keyword), 

1155 # Entities 

1156 include('entity'), 

1157 # Bold & italic 

1158 (r"('')(''')(?!')", bygroups(Generic.Emph, 

1159 Generic.EmphStrong), 'inline-italic-bold'), 

1160 (r"'''(?!')", Generic.Strong, 'inline-bold'), 

1161 (r"''(?!')", Generic.Emph, 'inline-italic'), 

1162 # Comments & parameters & templates 

1163 include('replaceable'), 

1164 # Media links 

1165 ( 

1166 r"""(?xi) 

1167 (\[\[) 

1168 (File|Image) (:) 

1169 ((?: [{}] | \{{{{2,3}}[^{{}}]*?\}}{{2,3}} | <!--[\s\S]*?--> )*) 

1170 (?: (\#) ([{}]*?) )? 

1171 """.format(title_char, f'{title_char}#'), 

1172 bygroups(Punctuation, Name.Namespace, Punctuation, 

1173 using(this, state=['wikilink-name']), Punctuation, Name.Label), 

1174 'medialink-inner' 

1175 ), 

1176 # Wikilinks 

1177 ( 

1178 r"""(?xi) 

1179 (\[\[)(?!{}) # Should not contain URLs 

1180 (?: ([{}]*) (:))? 

1181 ((?: [{}] | \{{{{2,3}}[^{{}}]*?\}}{{2,3}} | <!--[\s\S]*?--> )*?) 

1182 (?: (\#) ([{}]*?) )? 

1183 (\]\]) 

1184 """.format('|'.join(protocols), title_char.replace('/', ''), 

1185 title_char, f'{title_char}#'), 

1186 bygroups(Punctuation, Name.Namespace, Punctuation, 

1187 using(this, state=['wikilink-name']), Punctuation, Name.Label, Punctuation) 

1188 ), 

1189 ( 

1190 r"""(?xi) 

1191 (\[\[)(?!{}) 

1192 (?: ([{}]*) (:))? 

1193 ((?: [{}] | \{{{{2,3}}[^{{}}]*?\}}{{2,3}} | <!--[\s\S]*?--> )*?) 

1194 (?: (\#) ([{}]*?) )? 

1195 (\|) 

1196 """.format('|'.join(protocols), title_char.replace('/', ''), 

1197 title_char, f'{title_char}#'), 

1198 bygroups(Punctuation, Name.Namespace, Punctuation, 

1199 using(this, state=['wikilink-name']), Punctuation, Name.Label, Punctuation), 

1200 'wikilink-inner' 

1201 ), 

1202 # External links 

1203 ( 

1204 r"""(?xi) 

1205 (\[) 

1206 ((?:{}) {} {}*) 

1207 (\s*) 

1208 """.format('|'.join(protocols), link_address, link_char_class), 

1209 bygroups(Punctuation, Name.Label, Whitespace), 

1210 'extlink-inner' 

1211 ), 

1212 # Tables 

1213 (r'^(:*)(\s*?)(\{\|)([^\n]*)$', bygroups(Keyword, 

1214 Whitespace, Punctuation, using(this, state=['root', 'attr'])), 'table'), 

1215 # HTML tags 

1216 (r'(?i)(<)({})\b'.format('|'.join(html_tags)), 

1217 bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'), 

1218 (r'(?i)(</)({})\b(\s*)(>)'.format('|'.join(html_tags)), 

1219 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)), 

1220 # <nowiki> 

1221 (r'(?i)(<)(nowiki)\b', bygroups(Punctuation, 

1222 Name.Tag), ('tag-nowiki', 'tag-inner')), 

1223 # <pre> 

1224 (r'(?i)(<)(pre)\b', bygroups(Punctuation, 

1225 Name.Tag), ('tag-pre', 'tag-inner')), 

1226 # <categorytree> 

1227 (r'(?i)(<)(categorytree)\b', bygroups( 

1228 Punctuation, Name.Tag), ('tag-categorytree', 'tag-inner')), 

1229 # <hiero> 

1230 (r'(?i)(<)(hiero)\b', bygroups(Punctuation, 

1231 Name.Tag), ('tag-hiero', 'tag-inner')), 

1232 # <math> 

1233 (r'(?i)(<)(math)\b', bygroups(Punctuation, 

1234 Name.Tag), ('tag-math', 'tag-inner')), 

1235 # <chem> 

1236 (r'(?i)(<)(chem)\b', bygroups(Punctuation, 

1237 Name.Tag), ('tag-chem', 'tag-inner')), 

1238 # <ce> 

1239 (r'(?i)(<)(ce)\b', bygroups(Punctuation, 

1240 Name.Tag), ('tag-ce', 'tag-inner')), 

1241 # <charinsert> 

1242 (r'(?i)(<)(charinsert)\b', bygroups( 

1243 Punctuation, Name.Tag), ('tag-charinsert', 'tag-inner')), 

1244 # <templatedata> 

1245 (r'(?i)(<)(templatedata)\b', bygroups( 

1246 Punctuation, Name.Tag), ('tag-templatedata', 'tag-inner')), 

1247 # <gallery> 

1248 (r'(?i)(<)(gallery)\b', bygroups( 

1249 Punctuation, Name.Tag), ('tag-gallery', 'tag-inner')), 

1250 # <graph> 

1251 (r'(?i)(<)(gallery)\b', bygroups( 

1252 Punctuation, Name.Tag), ('tag-graph', 'tag-inner')), 

1253 # <dynamicpagelist> 

1254 (r'(?i)(<)(dynamicpagelist)\b', bygroups( 

1255 Punctuation, Name.Tag), ('tag-dynamicpagelist', 'tag-inner')), 

1256 # <inputbox> 

1257 (r'(?i)(<)(inputbox)\b', bygroups( 

1258 Punctuation, Name.Tag), ('tag-inputbox', 'tag-inner')), 

1259 # <rss> 

1260 (r'(?i)(<)(rss)\b', bygroups( 

1261 Punctuation, Name.Tag), ('tag-rss', 'tag-inner')), 

1262 # <imagemap> 

1263 (r'(?i)(<)(imagemap)\b', bygroups( 

1264 Punctuation, Name.Tag), ('tag-imagemap', 'tag-inner')), 

1265 # <syntaxhighlight> 

1266 (r'(?i)(</)(syntaxhighlight)\b(\s*)(>)', 

1267 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)), 

1268 (r'(?si)(<)(syntaxhighlight)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)', 

1269 bygroups(Punctuation, Name.Tag, handle_syntaxhighlight)), 

1270 # <syntaxhighlight>: Fallback case for self-closing tags 

1271 (r'(?i)(<)(syntaxhighlight)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups( 

1272 Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)), 

1273 # <source> 

1274 (r'(?i)(</)(source)\b(\s*)(>)', 

1275 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)), 

1276 (r'(?si)(<)(source)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)', 

1277 bygroups(Punctuation, Name.Tag, handle_syntaxhighlight)), 

1278 # <source>: Fallback case for self-closing tags 

1279 (r'(?i)(<)(source)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups( 

1280 Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)), 

1281 # <score> 

1282 (r'(?i)(</)(score)\b(\s*)(>)', 

1283 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)), 

1284 (r'(?si)(<)(score)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)', 

1285 bygroups(Punctuation, Name.Tag, handle_score)), 

1286 # <score>: Fallback case for self-closing tags 

1287 (r'(?i)(<)(score)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups( 

1288 Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)), 

1289 # Other parser tags 

1290 (r'(?i)(<)({})\b'.format('|'.join(parser_tags)), 

1291 bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'), 

1292 (r'(?i)(</)({})\b(\s*)(>)'.format('|'.join(parser_tags)), 

1293 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)), 

1294 # LanguageConverter markups 

1295 ( 

1296 r"""(?xi) 

1297 (-\{{) # Use {{ to escape format() 

1298 ([^|]) (\|) 

1299 (?: 

1300 (?: ([^;]*?) (=>))? 

1301 (\s* (?:{variants}) \s*) (:) 

1302 )? 

1303 """.format(variants='|'.join(variant_langs)), 

1304 bygroups(Punctuation, Keyword, Punctuation, 

1305 using(this, state=['root', 'lc-raw']), 

1306 Operator, Name.Label, Punctuation), 

1307 'lc-inner' 

1308 ), 

1309 # LanguageConverter markups: composite conversion grammar 

1310 ( 

1311 r"""(?xi) 

1312 (-\{) 

1313 ([a-z\s;-]*?) (\|) 

1314 """, 

1315 bygroups(Punctuation, 

1316 using(this, state=['root', 'lc-flag']), 

1317 Punctuation), 

1318 'lc-raw' 

1319 ), 

1320 # LanguageConverter markups: fallbacks 

1321 ( 

1322 r"""(?xi) 

1323 (-\{{) (?!\{{) # Use {{ to escape format() 

1324 (?: (\s* (?:{variants}) \s*) (:))? 

1325 """.format(variants='|'.join(variant_langs)), 

1326 bygroups(Punctuation, Name.Label, Punctuation), 

1327 'lc-inner' 

1328 ), 

1329 ], 

1330 'wikilink-name': [ 

1331 include('replaceable'), 

1332 (r'[^{<]+', Name.Tag), 

1333 (r'(?s).', Name.Tag), 

1334 ], 

1335 'wikilink-inner': [ 

1336 # Quit in case of another wikilink 

1337 (r'(?=\[\[)', Punctuation, '#pop'), 

1338 (r'\]\]', Punctuation, '#pop'), 

1339 include('inline'), 

1340 include('text'), 

1341 ], 

1342 'medialink-inner': [ 

1343 (r'\]\]', Punctuation, '#pop'), 

1344 (r'(\|)([^\n=|]*)(=)', 

1345 bygroups(Punctuation, Name.Attribute, Operator)), 

1346 (r'\|', Punctuation), 

1347 include('inline'), 

1348 include('text'), 

1349 ], 

1350 'quote-common': [ 

1351 # Quit in case of link/template endings 

1352 (r'(?=\]\]|\{\{|\}\})', Punctuation, '#pop'), 

1353 (r'\n', Text, '#pop'), 

1354 ], 

1355 'inline-italic': [ 

1356 include('quote-common'), 

1357 (r"('')(''')(?!')", bygroups(Generic.Emph, 

1358 Generic.Strong), ('#pop', 'inline-bold')), 

1359 (r"'''(?!')", Generic.EmphStrong, ('#pop', 'inline-italic-bold')), 

1360 (r"''(?!')", Generic.Emph, '#pop'), 

1361 include('inline'), 

1362 include('text-italic'), 

1363 ], 

1364 'inline-bold': [ 

1365 include('quote-common'), 

1366 (r"(''')('')(?!')", bygroups( 

1367 Generic.Strong, Generic.Emph), ('#pop', 'inline-italic')), 

1368 (r"'''(?!')", Generic.Strong, '#pop'), 

1369 (r"''(?!')", Generic.EmphStrong, ('#pop', 'inline-bold-italic')), 

1370 include('inline'), 

1371 include('text-bold'), 

1372 ], 

1373 'inline-bold-italic': [ 

1374 include('quote-common'), 

1375 (r"('')(''')(?!')", bygroups(Generic.EmphStrong, 

1376 Generic.Strong), '#pop'), 

1377 (r"'''(?!')", Generic.EmphStrong, ('#pop', 'inline-italic')), 

1378 (r"''(?!')", Generic.EmphStrong, ('#pop', 'inline-bold')), 

1379 include('inline'), 

1380 include('text-bold-italic'), 

1381 ], 

1382 'inline-italic-bold': [ 

1383 include('quote-common'), 

1384 (r"(''')('')(?!')", bygroups( 

1385 Generic.EmphStrong, Generic.Emph), '#pop'), 

1386 (r"'''(?!')", Generic.EmphStrong, ('#pop', 'inline-italic')), 

1387 (r"''(?!')", Generic.EmphStrong, ('#pop', 'inline-bold')), 

1388 include('inline'), 

1389 include('text-bold-italic'), 

1390 ], 

1391 'lc-flag': [ 

1392 (r'\s+', Whitespace), 

1393 (r';', Punctuation), 

1394 *text_rules(Keyword), 

1395 ], 

1396 'lc-inner': [ 

1397 ( 

1398 r"""(?xi) 

1399 (;) 

1400 (?: ([^;]*?) (=>))? 

1401 (\s* (?:{variants}) \s*) (:) 

1402 """.format(variants='|'.join(variant_langs)), 

1403 bygroups(Punctuation, using(this, state=['root', 'lc-raw']), 

1404 Operator, Name.Label, Punctuation) 

1405 ), 

1406 (r';?\s*?\}-', Punctuation, '#pop'), 

1407 include('inline'), 

1408 include('text'), 

1409 ], 

1410 'lc-raw': [ 

1411 (r'\}-', Punctuation, '#pop'), 

1412 include('inline'), 

1413 include('text'), 

1414 ], 

1415 'replaceable': [ 

1416 # Comments 

1417 (r'<!--[\s\S]*?(?:-->|\Z)', Comment.Multiline), 

1418 # Parameters 

1419 ( 

1420 r"""(?x) 

1421 (\{{3}) 

1422 ([^|]*?) 

1423 (?=\}{3}|\|) 

1424 """, 

1425 bygroups(Punctuation, Name.Variable), 

1426 'parameter-inner', 

1427 ), 

1428 # Magic variables 

1429 (r'(?i)(\{{\{{)(\s*)({})(\s*)(\}}\}})'.format('|'.join(magic_vars_i)), 

1430 bygroups(Punctuation, Whitespace, Name.Function, Whitespace, Punctuation)), 

1431 (r'(\{{\{{)(\s*)({})(\s*)(\}}\}})'.format('|'.join(magic_vars)), 

1432 bygroups(Punctuation, Whitespace, Name.Function, Whitespace, Punctuation)), 

1433 # Parser functions & templates 

1434 (r'\{\{', Punctuation, 'template-begin-space'), 

1435 # <tvar> legacy syntax 

1436 (r'(?i)(<)(tvar)\b(\|)([^>]*?)(>)', bygroups(Punctuation, 

1437 Name.Tag, Punctuation, String, Punctuation)), 

1438 (r'</>', Punctuation, '#pop'), 

1439 # <tvar> 

1440 (r'(?i)(<)(tvar)\b', bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'), 

1441 (r'(?i)(</)(tvar)\b(\s*)(>)', 

1442 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)), 

1443 ], 

1444 'parameter-inner': [ 

1445 (r'\}{3}', Punctuation, '#pop'), 

1446 (r'\|', Punctuation), 

1447 include('inline'), 

1448 include('text'), 

1449 ], 

1450 'template-begin-space': [ 

1451 # Templates allow line breaks at the beginning, and due to how MediaWiki handles 

1452 # comments, an extra state is required to handle things like {{\n<!---->\n name}} 

1453 (r'<!--[\s\S]*?(?:-->|\Z)', Comment.Multiline), 

1454 (r'\s+', Whitespace), 

1455 # Parser functions 

1456 ( 

1457 r'(?i)(\#[{}]*?|{})(:)'.format(title_char, 

1458 '|'.join(parser_functions_i)), 

1459 bygroups(Name.Function, Punctuation), ('#pop', 'template-inner') 

1460 ), 

1461 ( 

1462 r'({})(:)'.format('|'.join(parser_functions)), 

1463 bygroups(Name.Function, Punctuation), ('#pop', 'template-inner') 

1464 ), 

1465 # Templates 

1466 ( 

1467 rf'(?i)([{title_char}]*?)(:)', 

1468 bygroups(Name.Namespace, Punctuation), ('#pop', 'template-name') 

1469 ), 

1470 default(('#pop', 'template-name'),), 

1471 ], 

1472 'template-name': [ 

1473 (r'(\s*?)(\|)', bygroups(Text, Punctuation), ('#pop', 'template-inner')), 

1474 (r'\}\}', Punctuation, '#pop'), 

1475 (r'\n', Text, '#pop'), 

1476 include('replaceable'), 

1477 *text_rules(Name.Tag), 

1478 ], 

1479 'template-inner': [ 

1480 (r'\}\}', Punctuation, '#pop'), 

1481 (r'\|', Punctuation), 

1482 ( 

1483 r"""(?x) 

1484 (?<=\|) 

1485 ( (?: (?! \{\{ | \}\} )[^=\|<])*? ) # Exclude templates and tags 

1486 (=) 

1487 """, 

1488 bygroups(Name.Label, Operator) 

1489 ), 

1490 include('inline'), 

1491 include('text'), 

1492 ], 

1493 'table': [ 

1494 # Use [ \t\n\r\0\x0B] instead of \s to follow PHP trim() behavior 

1495 # Endings 

1496 (r'^([ \t\n\r\0\x0B]*?)(\|\})', 

1497 bygroups(Whitespace, Punctuation), '#pop'), 

1498 # Table rows 

1499 (r'^([ \t\n\r\0\x0B]*?)(\|-+)(.*)$', bygroups(Whitespace, Punctuation, 

1500 using(this, state=['root', 'attr']))), 

1501 # Captions 

1502 ( 

1503 r"""(?x) 

1504 ^([ \t\n\r\0\x0B]*?)(\|\+) 

1505 # Exclude links, template and tags 

1506 (?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|) )? 

1507 (.*?)$ 

1508 """, 

1509 bygroups(Whitespace, Punctuation, using(this, state=[ 

1510 'root', 'attr']), Punctuation, Generic.Heading), 

1511 ), 

1512 # Table data 

1513 ( 

1514 r"""(?x) 

1515 ( ^(?:[ \t\n\r\0\x0B]*?)\| | \|\| ) 

1516 (?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|)(?!\|) )? 

1517 """, 

1518 bygroups(Punctuation, using(this, state=[ 

1519 'root', 'attr']), Punctuation), 

1520 ), 

1521 # Table headers 

1522 ( 

1523 r"""(?x) 

1524 ( ^(?:[ \t\n\r\0\x0B]*?)! ) 

1525 (?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|)(?!\|) )? 

1526 """, 

1527 bygroups(Punctuation, using(this, state=[ 

1528 'root', 'attr']), Punctuation), 

1529 'table-header', 

1530 ), 

1531 include('list'), 

1532 include('inline'), 

1533 include('text'), 

1534 ], 

1535 'table-header': [ 

1536 # Requires another state for || handling inside headers 

1537 (r'\n', Text, '#pop'), 

1538 ( 

1539 r"""(?x) 

1540 (!!|\|\|) 

1541 (?: 

1542 ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? ) 

1543 (\|)(?!\|) 

1544 )? 

1545 """, 

1546 bygroups(Punctuation, using(this, state=[ 

1547 'root', 'attr']), Punctuation) 

1548 ), 

1549 *text_rules(Generic.Subheading), 

1550 ], 

1551 'entity': [ 

1552 (r'&\S*?;', Name.Entity), 

1553 ], 

1554 'dt': [ 

1555 (r'\n', Text, '#pop'), 

1556 include('inline'), 

1557 (r':', Keyword, '#pop'), 

1558 include('text'), 

1559 ], 

1560 'extlink-inner': [ 

1561 (r'\]', Punctuation, '#pop'), 

1562 include('inline'), 

1563 include('text'), 

1564 ], 

1565 'nowiki-ish': [ 

1566 include('entity'), 

1567 include('text'), 

1568 ], 

1569 'attr': [ 

1570 include('replaceable'), 

1571 (r'\s+', Whitespace), 

1572 (r'(=)(\s*)(")', bygroups(Operator, Whitespace, String.Double), 'attr-val-2'), 

1573 (r"(=)(\s*)(')", bygroups(Operator, Whitespace, String.Single), 'attr-val-1'), 

1574 (r'(=)(\s*)', bygroups(Operator, Whitespace), 'attr-val-0'), 

1575 (r'[\w:-]+', Name.Attribute), 

1576 

1577 ], 

1578 'attr-val-0': [ 

1579 (r'\s', Whitespace, '#pop'), 

1580 include('replaceable'), 

1581 *text_rules(String), 

1582 ], 

1583 'attr-val-1': [ 

1584 (r"'", String.Single, '#pop'), 

1585 include('replaceable'), 

1586 *text_rules(String.Single), 

1587 ], 

1588 'attr-val-2': [ 

1589 (r'"', String.Double, '#pop'), 

1590 include('replaceable'), 

1591 *text_rules(String.Double), 

1592 ], 

1593 'tag-inner-ordinary': [ 

1594 (r'/?\s*>', Punctuation, '#pop'), 

1595 include('tag-attr'), 

1596 ], 

1597 'tag-inner': [ 

1598 # Return to root state for self-closing tags 

1599 (r'/\s*>', Punctuation, '#pop:2'), 

1600 (r'\s*>', Punctuation, '#pop'), 

1601 include('tag-attr'), 

1602 ], 

1603 # There states below are just like their non-tag variants, the key difference is 

1604 # they forcibly quit when encountering tag closing markup 

1605 'tag-attr': [ 

1606 include('replaceable'), 

1607 (r'\s+', Whitespace), 

1608 (r'(=)(\s*)(")', bygroups(Operator, 

1609 Whitespace, String.Double), 'tag-attr-val-2'), 

1610 (r"(=)(\s*)(')", bygroups(Operator, 

1611 Whitespace, String.Single), 'tag-attr-val-1'), 

1612 (r'(=)(\s*)', bygroups(Operator, Whitespace), 'tag-attr-val-0'), 

1613 (r'[\w:-]+', Name.Attribute), 

1614 

1615 ], 

1616 'tag-attr-val-0': [ 

1617 (r'\s', Whitespace, '#pop'), 

1618 (r'/?>', Punctuation, '#pop:2'), 

1619 include('replaceable'), 

1620 *text_rules(String), 

1621 ], 

1622 'tag-attr-val-1': [ 

1623 (r"'", String.Single, '#pop'), 

1624 (r'/?>', Punctuation, '#pop:2'), 

1625 include('replaceable'), 

1626 *text_rules(String.Single), 

1627 ], 

1628 'tag-attr-val-2': [ 

1629 (r'"', String.Double, '#pop'), 

1630 (r'/?>', Punctuation, '#pop:2'), 

1631 include('replaceable'), 

1632 *text_rules(String.Double), 

1633 ], 

1634 'tag-nowiki': nowiki_tag_rules('nowiki'), 

1635 'tag-pre': nowiki_tag_rules('pre'), 

1636 'tag-categorytree': plaintext_tag_rules('categorytree'), 

1637 'tag-dynamicpagelist': plaintext_tag_rules('dynamicpagelist'), 

1638 'tag-hiero': plaintext_tag_rules('hiero'), 

1639 'tag-inputbox': plaintext_tag_rules('inputbox'), 

1640 'tag-imagemap': plaintext_tag_rules('imagemap'), 

1641 'tag-charinsert': plaintext_tag_rules('charinsert'), 

1642 'tag-timeline': plaintext_tag_rules('timeline'), 

1643 'tag-gallery': plaintext_tag_rules('gallery'), 

1644 'tag-graph': plaintext_tag_rules('graph'), 

1645 'tag-rss': plaintext_tag_rules('rss'), 

1646 'tag-math': delegate_tag_rules('math', TexLexer, state='math'), 

1647 'tag-chem': delegate_tag_rules('chem', TexLexer, state='math'), 

1648 'tag-ce': delegate_tag_rules('ce', TexLexer, state='math'), 

1649 'tag-templatedata': delegate_tag_rules('templatedata', JsonLexer), 

1650 'text-italic': text_rules(Generic.Emph), 

1651 'text-bold': text_rules(Generic.Strong), 

1652 'text-bold-italic': text_rules(Generic.EmphStrong), 

1653 'text': text_rules(Text), 

1654 }