Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pygments/lexers/markup.py: 56%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

318 statements  

1""" 

2 pygments.lexers.markup 

3 ~~~~~~~~~~~~~~~~~~~~~~ 

4 

5 Lexers for non-HTML markup languages. 

6 

7 :copyright: Copyright 2006-present by the Pygments team, see AUTHORS. 

8 :license: BSD, see LICENSE for details. 

9""" 

10 

11import re 

12 

13from pygments.lexers.html import XmlLexer 

14from pygments.lexers.javascript import JavascriptLexer 

15from pygments.lexers.css import CssLexer 

16from pygments.lexers.lilypond import LilyPondLexer 

17from pygments.lexers.data import JsonLexer 

18 

19from pygments.lexer import RegexLexer, DelegatingLexer, include, bygroups, \ 

20 using, this, do_insertions, default, words 

21from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ 

22 Number, Punctuation, Generic, Other, Whitespace, Literal 

23from pygments.util import get_bool_opt, ClassNotFound 

24 

25__all__ = ['BBCodeLexer', 'MoinWikiLexer', 'RstLexer', 'TexLexer', 'GroffLexer', 

26 'MozPreprocHashLexer', 'MozPreprocPercentLexer', 

27 'MozPreprocXulLexer', 'MozPreprocJavascriptLexer', 

28 'MozPreprocCssLexer', 'MarkdownLexer', 'OrgLexer', 'TiddlyWiki5Lexer', 

29 'WikitextLexer'] 

30 

31 

32class BBCodeLexer(RegexLexer): 

33 """ 

34 A lexer that highlights BBCode(-like) syntax. 

35 """ 

36 

37 name = 'BBCode' 

38 aliases = ['bbcode'] 

39 mimetypes = ['text/x-bbcode'] 

40 url = 'https://www.bbcode.org/' 

41 version_added = '0.6' 

42 

43 tokens = { 

44 'root': [ 

45 (r'[^[]+', Text), 

46 # tag/end tag begin 

47 (r'\[/?\w+', Keyword, 'tag'), 

48 # stray bracket 

49 (r'\[', Text), 

50 ], 

51 'tag': [ 

52 (r'\s+', Text), 

53 # attribute with value 

54 (r'(\w+)(=)("?[^\s"\]]+"?)', 

55 bygroups(Name.Attribute, Operator, String)), 

56 # tag argument (a la [color=green]) 

57 (r'(=)("?[^\s"\]]+"?)', 

58 bygroups(Operator, String)), 

59 # tag end 

60 (r'\]', Keyword, '#pop'), 

61 ], 

62 } 

63 

64 

65class MoinWikiLexer(RegexLexer): 

66 """ 

67 For MoinMoin (and Trac) Wiki markup. 

68 """ 

69 

70 name = 'MoinMoin/Trac Wiki markup' 

71 aliases = ['trac-wiki', 'moin'] 

72 filenames = [] 

73 mimetypes = ['text/x-trac-wiki'] 

74 url = 'https://moinmo.in' 

75 version_added = '0.7' 

76 

77 flags = re.MULTILINE | re.IGNORECASE 

78 

79 tokens = { 

80 'root': [ 

81 (r'^#.*$', Comment), 

82 (r'(!)(\S+)', bygroups(Keyword, Text)), # Ignore-next 

83 # Titles 

84 (r'^(=+)([^=]+)(=+)(\s*#.+)?$', 

85 bygroups(Generic.Heading, using(this), Generic.Heading, String)), 

86 # Literal code blocks, with optional shebang 

87 (r'(\{\{\{)(\n#!.+)?', bygroups(Name.Builtin, Name.Namespace), 'codeblock'), 

88 (r'(\'\'\'?|\|\||`|__|~~|\^|,,|::)', Comment), # Formatting 

89 # Lists 

90 (r'^( +)([.*-])( )', bygroups(Text, Name.Builtin, Text)), 

91 (r'^( +)([a-z]{1,5}\.)( )', bygroups(Text, Name.Builtin, Text)), 

92 # Other Formatting 

93 (r'\[\[\w+.*?\]\]', Keyword), # Macro 

94 (r'(\[[^\s\]]+)(\s+[^\]]+?)?(\])', 

95 bygroups(Keyword, String, Keyword)), # Link 

96 (r'^----+$', Keyword), # Horizontal rules 

97 (r'[^\n\'\[{!_~^,|]+', Text), 

98 (r'\n', Text), 

99 (r'.', Text), 

100 ], 

101 'codeblock': [ 

102 (r'\}\}\}', Name.Builtin, '#pop'), 

103 # these blocks are allowed to be nested in Trac, but not MoinMoin 

104 (r'\{\{\{', Text, '#push'), 

105 (r'[^{}]+', Comment.Preproc), # slurp boring text 

106 (r'.', Comment.Preproc), # allow loose { or } 

107 ], 

108 } 

109 

110 

111class RstLexer(RegexLexer): 

112 """ 

113 For reStructuredText markup. 

114 

115 Additional options accepted: 

116 

117 `handlecodeblocks` 

118 Highlight the contents of ``.. sourcecode:: language``, 

119 ``.. code:: language`` and ``.. code-block:: language`` 

120 directives with a lexer for the given language (default: 

121 ``True``). 

122 

123 .. versionadded:: 0.8 

124 """ 

125 name = 'reStructuredText' 

126 url = 'https://docutils.sourceforge.io/rst.html' 

127 aliases = ['restructuredtext', 'rst', 'rest'] 

128 filenames = ['*.rst', '*.rest'] 

129 mimetypes = ["text/x-rst", "text/prs.fallenstein.rst"] 

130 version_added = '0.7' 

131 flags = re.MULTILINE 

132 

133 def _handle_sourcecode(self, match): 

134 from pygments.lexers import get_lexer_by_name 

135 

136 # section header 

137 yield match.start(1), Punctuation, match.group(1) 

138 yield match.start(2), Text, match.group(2) 

139 yield match.start(3), Operator.Word, match.group(3) 

140 yield match.start(4), Punctuation, match.group(4) 

141 yield match.start(5), Text, match.group(5) 

142 yield match.start(6), Keyword, match.group(6) 

143 yield match.start(7), Text, match.group(7) 

144 

145 # lookup lexer if wanted and existing 

146 lexer = None 

147 if self.handlecodeblocks: 

148 try: 

149 lexer = get_lexer_by_name(match.group(6).strip()) 

150 except ClassNotFound: 

151 pass 

152 indention = match.group(8) 

153 indention_size = len(indention) 

154 code = (indention + match.group(9) + match.group(10) + match.group(11)) 

155 

156 # no lexer for this language. handle it like it was a code block 

157 if lexer is None: 

158 yield match.start(8), String, code 

159 return 

160 

161 # highlight the lines with the lexer. 

162 ins = [] 

163 codelines = code.splitlines(True) 

164 code = '' 

165 for line in codelines: 

166 if len(line) > indention_size: 

167 ins.append((len(code), [(0, Text, line[:indention_size])])) 

168 code += line[indention_size:] 

169 else: 

170 code += line 

171 yield from do_insertions(ins, lexer.get_tokens_unprocessed(code)) 

172 

173 # from docutils.parsers.rst.states 

174 closers = '\'")]}>\u2019\u201d\xbb!?' 

175 unicode_delimiters = '\u2010\u2011\u2012\u2013\u2014\u00a0' 

176 end_string_suffix = (rf'((?=$)|(?=[-/:.,; \n\x00{re.escape(unicode_delimiters)}{re.escape(closers)}]))') 

177 

178 tokens = { 

179 'root': [ 

180 # Heading with overline 

181 (r'^(=+|-+|`+|:+|\.+|\'+|"+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)' 

182 r'(.+)(\n)(\1)(\n)', 

183 bygroups(Generic.Heading, Text, Generic.Heading, 

184 Text, Generic.Heading, Text)), 

185 # Plain heading 

186 (r'^(\S.*)(\n)(={3,}|-{3,}|`{3,}|:{3,}|\.{3,}|\'{3,}|"{3,}|' 

187 r'~{3,}|\^{3,}|_{3,}|\*{3,}|\+{3,}|#{3,})(\n)', 

188 bygroups(Generic.Heading, Text, Generic.Heading, Text)), 

189 # Bulleted lists 

190 (r'^(\s*)([-*+])( .+\n(?:\1 .+\n)*)', 

191 bygroups(Text, Number, using(this, state='inline'))), 

192 # Numbered lists 

193 (r'^(\s*)([0-9#ivxlcmIVXLCM]+\.)( .+\n(?:\1 .+\n)*)', 

194 bygroups(Text, Number, using(this, state='inline'))), 

195 (r'^(\s*)(\(?[0-9#ivxlcmIVXLCM]+\))( .+\n(?:\1 .+\n)*)', 

196 bygroups(Text, Number, using(this, state='inline'))), 

197 # Numbered, but keep words at BOL from becoming lists 

198 (r'^(\s*)([A-Z]+\.)( .+\n(?:\1 .+\n)+)', 

199 bygroups(Text, Number, using(this, state='inline'))), 

200 (r'^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1 .+\n)+)', 

201 bygroups(Text, Number, using(this, state='inline'))), 

202 # Line blocks 

203 (r'^(\s*)(\|)( .+\n(?:\| .+\n)*)', 

204 bygroups(Text, Operator, using(this, state='inline'))), 

205 # Sourcecode directives 

206 (r'^( *\.\.)(\s*)((?:source)?code(?:-block)?)(::)([ \t]*)([^\n]+)' 

207 r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*)?\n)+)', 

208 _handle_sourcecode), 

209 # A directive 

210 (r'^( *\.\.)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))', 

211 bygroups(Punctuation, Text, Operator.Word, Punctuation, Text, 

212 using(this, state='inline'))), 

213 # A reference target 

214 (r'^( *\.\.)(\s*)(_(?:[^:\\]|\\.)+:)(.*?)$', 

215 bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))), 

216 # A footnote/citation target 

217 (r'^( *\.\.)(\s*)(\[.+\])(.*?)$', 

218 bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))), 

219 # A substitution def 

220 (r'^( *\.\.)(\s*)(\|.+\|)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))', 

221 bygroups(Punctuation, Text, Name.Tag, Text, Operator.Word, 

222 Punctuation, Text, using(this, state='inline'))), 

223 # Comments 

224 (r'^ *\.\..*(\n( +.*\n|\n)+)?', Comment), 

225 # Field list marker 

226 (r'^( *)(:(?:\\\\|\\:|[^:\n])+:(?=\s))([ \t]*)', 

227 bygroups(Text, Name.Class, Text)), 

228 # Definition list 

229 (r'^(\S.*(?<!::)\n)((?:(?: +.*)\n)+)', 

230 bygroups(using(this, state='inline'), using(this, state='inline'))), 

231 # Code blocks 

232 (r'(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*)?\n)+)', 

233 bygroups(String.Escape, Text, String, String, Text, String)), 

234 include('inline'), 

235 ], 

236 'inline': [ 

237 (r'\\.', Text), # escape 

238 (r'``', String, 'literal'), # code 

239 (r'(`.+?)(<.+?>)(`__?)', # reference with inline target 

240 bygroups(String, String.Interpol, String)), 

241 (r'`.+?`__?', String), # reference 

242 (r'(`.+?`)(:[a-zA-Z0-9:-]+?:)?', 

243 bygroups(Name.Variable, Name.Attribute)), # role 

244 (r'(:[a-zA-Z0-9:-]+?:)(`.+?`)', 

245 bygroups(Name.Attribute, Name.Variable)), # role (content first) 

246 (r'\*\*.+?\*\*', Generic.Strong), # Strong emphasis 

247 (r'\*.+?\*', Generic.Emph), # Emphasis 

248 (r'\[.*?\]_', String), # Footnote or citation 

249 (r'<.+?>', Name.Tag), # Hyperlink 

250 (r'[^\\\n\[*`:]+', Text), 

251 (r'.', Text), 

252 ], 

253 'literal': [ 

254 (r'[^`]+', String), 

255 (r'``' + end_string_suffix, String, '#pop'), 

256 (r'`', String), 

257 ] 

258 } 

259 

260 def __init__(self, **options): 

261 self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True) 

262 RegexLexer.__init__(self, **options) 

263 

264 def analyse_text(text): 

265 if text[:2] == '..' and text[2:3] != '.': 

266 return 0.3 

267 p1 = text.find("\n") 

268 p2 = text.find("\n", p1 + 1) 

269 if (p2 > -1 and # has two lines 

270 p1 * 2 + 1 == p2 and # they are the same length 

271 text[p1+1] in '-=' and # the next line both starts and ends with 

272 text[p1+1] == text[p2-1]): # ...a sufficiently high header 

273 return 0.5 

274 

275 

276class TexLexer(RegexLexer): 

277 """ 

278 Lexer for the TeX and LaTeX typesetting languages. 

279 """ 

280 

281 name = 'TeX' 

282 aliases = ['tex', 'latex'] 

283 filenames = ['*.tex', '*.aux', '*.toc'] 

284 mimetypes = ['text/x-tex', 'text/x-latex'] 

285 url = 'https://tug.org' 

286 version_added = '' 

287 

288 tokens = { 

289 'general': [ 

290 (r'%.*?\n', Comment), 

291 (r'[{}]', Name.Builtin), 

292 (r'[&_^]', Name.Builtin), 

293 ], 

294 'root': [ 

295 (r'\\\[', String.Backtick, 'displaymath'), 

296 (r'\\\(', String, 'inlinemath'), 

297 (r'\$\$', String.Backtick, 'displaymath'), 

298 (r'\$', String, 'inlinemath'), 

299 (r'\\([a-zA-Z@_:]+|\S?)', Keyword, 'command'), 

300 (r'\\$', Keyword), 

301 include('general'), 

302 (r'[^\\$%&_^{}]+', Text), 

303 ], 

304 'math': [ 

305 (r'\\([a-zA-Z]+|\S?)', Name.Variable), 

306 include('general'), 

307 (r'[0-9]+', Number), 

308 (r'[-=!+*/()\[\]]', Operator), 

309 (r'[^=!+*/()\[\]\\$%&_^{}0-9-]+', Name.Builtin), 

310 ], 

311 'inlinemath': [ 

312 (r'\\\)', String, '#pop'), 

313 (r'\$', String, '#pop'), 

314 include('math'), 

315 ], 

316 'displaymath': [ 

317 (r'\\\]', String, '#pop'), 

318 (r'\$\$', String, '#pop'), 

319 (r'\$', Name.Builtin), 

320 include('math'), 

321 ], 

322 'command': [ 

323 (r'\[.*?\]', Name.Attribute), 

324 (r'\*', Keyword), 

325 default('#pop'), 

326 ], 

327 } 

328 

329 def analyse_text(text): 

330 for start in ("\\documentclass", "\\input", "\\documentstyle", 

331 "\\relax"): 

332 if text[:len(start)] == start: 

333 return True 

334 

335 

336class GroffLexer(RegexLexer): 

337 """ 

338 Lexer for the (g)roff typesetting language, supporting groff 

339 extensions. Mainly useful for highlighting manpage sources. 

340 """ 

341 

342 name = 'Groff' 

343 aliases = ['groff', 'nroff', 'man'] 

344 filenames = ['*.[1-9]', '*.man', '*.1p', '*.3pm'] 

345 mimetypes = ['application/x-troff', 'text/troff'] 

346 url = 'https://www.gnu.org/software/groff' 

347 version_added = '0.6' 

348 

349 tokens = { 

350 'root': [ 

351 (r'(\.)(\w+)', bygroups(Text, Keyword), 'request'), 

352 (r'\.', Punctuation, 'request'), 

353 # Regular characters, slurp till we find a backslash or newline 

354 (r'[^\\\n]+', Text, 'textline'), 

355 default('textline'), 

356 ], 

357 'textline': [ 

358 include('escapes'), 

359 (r'[^\\\n]+', Text), 

360 (r'\n', Text, '#pop'), 

361 ], 

362 'escapes': [ 

363 # groff has many ways to write escapes. 

364 (r'\\"[^\n]*', Comment), 

365 (r'\\[fn]\w', String.Escape), 

366 (r'\\\(.{2}', String.Escape), 

367 (r'\\.\[.*\]', String.Escape), 

368 (r'\\.', String.Escape), 

369 (r'\\\n', Text, 'request'), 

370 ], 

371 'request': [ 

372 (r'\n', Text, '#pop'), 

373 include('escapes'), 

374 (r'"[^\n"]+"', String.Double), 

375 (r'\d+', Number), 

376 (r'\S+', String), 

377 (r'\s+', Text), 

378 ], 

379 } 

380 

381 def analyse_text(text): 

382 if text[:1] != '.': 

383 return False 

384 if text[:3] == '.\\"': 

385 return True 

386 if text[:4] == '.TH ': 

387 return True 

388 if text[1:3].isalnum() and text[3].isspace(): 

389 return 0.9 

390 

391 

392class MozPreprocHashLexer(RegexLexer): 

393 """ 

394 Lexer for Mozilla Preprocessor files (with '#' as the marker). 

395 

396 Other data is left untouched. 

397 """ 

398 name = 'mozhashpreproc' 

399 aliases = [name] 

400 filenames = [] 

401 mimetypes = [] 

402 url = 'https://firefox-source-docs.mozilla.org/build/buildsystem/preprocessor.html' 

403 version_added = '2.0' 

404 

405 tokens = { 

406 'root': [ 

407 (r'^#', Comment.Preproc, ('expr', 'exprstart')), 

408 (r'.+', Other), 

409 ], 

410 'exprstart': [ 

411 (r'(literal)(.*)', bygroups(Comment.Preproc, Text), '#pop:2'), 

412 (words(( 

413 'define', 'undef', 'if', 'ifdef', 'ifndef', 'else', 'elif', 

414 'elifdef', 'elifndef', 'endif', 'expand', 'filter', 'unfilter', 

415 'include', 'includesubst', 'error')), 

416 Comment.Preproc, '#pop'), 

417 ], 

418 'expr': [ 

419 (words(('!', '!=', '==', '&&', '||')), Operator), 

420 (r'(defined)(\()', bygroups(Keyword, Punctuation)), 

421 (r'\)', Punctuation), 

422 (r'[0-9]+', Number.Decimal), 

423 (r'__\w+?__', Name.Variable), 

424 (r'@\w+?@', Name.Class), 

425 (r'\w+', Name), 

426 (r'\n', Text, '#pop'), 

427 (r'\s+', Text), 

428 (r'\S', Punctuation), 

429 ], 

430 } 

431 

432 

433class MozPreprocPercentLexer(MozPreprocHashLexer): 

434 """ 

435 Lexer for Mozilla Preprocessor files (with '%' as the marker). 

436 

437 Other data is left untouched. 

438 """ 

439 name = 'mozpercentpreproc' 

440 aliases = [name] 

441 filenames = [] 

442 mimetypes = [] 

443 url = 'https://firefox-source-docs.mozilla.org/build/buildsystem/preprocessor.html' 

444 version_added = '2.0' 

445 

446 tokens = { 

447 'root': [ 

448 (r'^%', Comment.Preproc, ('expr', 'exprstart')), 

449 (r'.+', Other), 

450 ], 

451 } 

452 

453 

454class MozPreprocXulLexer(DelegatingLexer): 

455 """ 

456 Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the 

457 `XmlLexer`. 

458 """ 

459 name = "XUL+mozpreproc" 

460 aliases = ['xul+mozpreproc'] 

461 filenames = ['*.xul.in'] 

462 mimetypes = [] 

463 url = 'https://firefox-source-docs.mozilla.org/build/buildsystem/preprocessor.html' 

464 version_added = '2.0' 

465 

466 def __init__(self, **options): 

467 super().__init__(XmlLexer, MozPreprocHashLexer, **options) 

468 

469 

470class MozPreprocJavascriptLexer(DelegatingLexer): 

471 """ 

472 Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the 

473 `JavascriptLexer`. 

474 """ 

475 name = "Javascript+mozpreproc" 

476 aliases = ['javascript+mozpreproc'] 

477 filenames = ['*.js.in'] 

478 mimetypes = [] 

479 url = 'https://firefox-source-docs.mozilla.org/build/buildsystem/preprocessor.html' 

480 version_added = '2.0' 

481 

482 def __init__(self, **options): 

483 super().__init__(JavascriptLexer, MozPreprocHashLexer, **options) 

484 

485 

486class MozPreprocCssLexer(DelegatingLexer): 

487 """ 

488 Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the 

489 `CssLexer`. 

490 """ 

491 name = "CSS+mozpreproc" 

492 aliases = ['css+mozpreproc'] 

493 filenames = ['*.css.in'] 

494 mimetypes = [] 

495 url = 'https://firefox-source-docs.mozilla.org/build/buildsystem/preprocessor.html' 

496 version_added = '2.0' 

497 

498 def __init__(self, **options): 

499 super().__init__(CssLexer, MozPreprocPercentLexer, **options) 

500 

501 

502class MarkdownLexer(RegexLexer): 

503 """ 

504 For Markdown markup. 

505 """ 

506 name = 'Markdown' 

507 url = 'https://daringfireball.net/projects/markdown/' 

508 aliases = ['markdown', 'md'] 

509 filenames = ['*.md', '*.markdown'] 

510 mimetypes = ["text/x-markdown"] 

511 version_added = '2.2' 

512 flags = re.MULTILINE 

513 

514 def _handle_codeblock(self, match): 

515 from pygments.lexers import get_lexer_by_name 

516 

517 yield match.start('initial'), String.Backtick, match.group('initial') 

518 yield match.start('lang'), String.Backtick, match.group('lang') 

519 if match.group('afterlang') is not None: 

520 yield match.start('whitespace'), Whitespace, match.group('whitespace') 

521 yield match.start('extra'), Text, match.group('extra') 

522 yield match.start('newline'), Whitespace, match.group('newline') 

523 

524 # lookup lexer if wanted and existing 

525 lexer = None 

526 if self.handlecodeblocks: 

527 try: 

528 lexer = get_lexer_by_name(match.group('lang').strip()) 

529 except ClassNotFound: 

530 pass 

531 code = match.group('code') 

532 # no lexer for this language. handle it like it was a code block 

533 if lexer is None: 

534 yield match.start('code'), String, code 

535 else: 

536 # FIXME: aren't the offsets wrong? 

537 yield from do_insertions([], lexer.get_tokens_unprocessed(code)) 

538 

539 yield match.start('terminator'), String.Backtick, match.group('terminator') 

540 

541 tokens = { 

542 'root': [ 

543 # heading with '#' prefix (atx-style) 

544 (r'(^#[^#].+)(\n)', bygroups(Generic.Heading, Text)), 

545 # subheading with '#' prefix (atx-style) 

546 (r'(^#{2,6}[^#].+)(\n)', bygroups(Generic.Subheading, Text)), 

547 # heading with '=' underlines (Setext-style) 

548 (r'^(.+)(\n)(=+)(\n)', bygroups(Generic.Heading, Text, Generic.Heading, Text)), 

549 # subheading with '-' underlines (Setext-style) 

550 (r'^(.+)(\n)(-+)(\n)', bygroups(Generic.Subheading, Text, Generic.Subheading, Text)), 

551 # task list 

552 (r'^(\s*)([*-] )(\[[ xX]\])( .+\n)', 

553 bygroups(Whitespace, Keyword, Keyword, using(this, state='inline'))), 

554 # bulleted list 

555 (r'^(\s*)([*-])(\s)(.+\n)', 

556 bygroups(Whitespace, Keyword, Whitespace, using(this, state='inline'))), 

557 # numbered list 

558 (r'^(\s*)([0-9]+\.)( .+\n)', 

559 bygroups(Whitespace, Keyword, using(this, state='inline'))), 

560 # quote 

561 (r'^(\s*>\s)(.+\n)', bygroups(Keyword, Generic.Emph)), 

562 # code block fenced by 3 backticks 

563 (r'^(\s*```\n[\w\W]*?^\s*```$\n)', String.Backtick), 

564 # code block with language 

565 # Some tools include extra stuff after the language name, just 

566 # highlight that as text. For example: https://docs.enola.dev/use/execmd 

567 (r'''(?x) 

568 ^(?P<initial>\s*```) 

569 (?P<lang>[\w\-]+) 

570 (?P<afterlang> 

571 (?P<whitespace>[^\S\n]+) 

572 (?P<extra>.*))? 

573 (?P<newline>\n) 

574 (?P<code>(.|\n)*?) 

575 (?P<terminator>^\s*```$\n) 

576 ''', 

577 _handle_codeblock), 

578 

579 include('inline'), 

580 ], 

581 'inline': [ 

582 # escape 

583 (r'\\.', Text), 

584 # inline code 

585 (r'([^`]?)(`[^`\n]+`)', bygroups(Text, String.Backtick)), 

586 # warning: the following rules eat outer tags. 

587 # eg. **foo _bar_ baz** => foo and baz are not recognized as bold 

588 # bold fenced by '**' 

589 (r'([^\*]?)(\*\*[^* \n][^*\n]*\*\*)', bygroups(Text, Generic.Strong)), 

590 # bold fenced by '__' 

591 (r'([^_]?)(__[^_ \n][^_\n]*__)', bygroups(Text, Generic.Strong)), 

592 # italics fenced by '*' 

593 (r'([^\*]?)(\*[^* \n][^*\n]*\*)', bygroups(Text, Generic.Emph)), 

594 # italics fenced by '_' 

595 (r'([^_]?)(_[^_ \n][^_\n]*_)', bygroups(Text, Generic.Emph)), 

596 # strikethrough 

597 (r'([^~]?)(~~[^~ \n][^~\n]*~~)', bygroups(Text, Generic.Deleted)), 

598 # mentions and topics (twitter and github stuff) 

599 (r'[@#][\w/:]+', Name.Entity), 

600 # (image?) links eg: ![Image of Yaktocat](https://octodex.github.com/images/yaktocat.png) 

601 (r'(!?\[)([^]]+)(\])(\()([^)]+)(\))', 

602 bygroups(Text, Name.Tag, Text, Text, Name.Attribute, Text)), 

603 # reference-style links, e.g.: 

604 # [an example][id] 

605 # [id]: http://example.com/ 

606 (r'(\[)([^]]+)(\])(\[)([^]]*)(\])', 

607 bygroups(Text, Name.Tag, Text, Text, Name.Label, Text)), 

608 (r'^(\s*\[)([^]]*)(\]:\s*)(.+)', 

609 bygroups(Text, Name.Label, Text, Name.Attribute)), 

610 

611 # general text, must come last! 

612 (r'[^\\\s]+', Text), 

613 (r'.', Text), 

614 ], 

615 } 

616 

617 def __init__(self, **options): 

618 self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True) 

619 RegexLexer.__init__(self, **options) 

620 

621class OrgLexer(RegexLexer): 

622 """ 

623 For Org Mode markup. 

624 """ 

625 name = 'Org Mode' 

626 url = 'https://orgmode.org' 

627 aliases = ['org', 'orgmode', 'org-mode'] 

628 filenames = ['*.org'] 

629 mimetypes = ["text/org"] 

630 version_added = '2.18' 

631 

632 def _inline(start, end): 

633 return rf'(?<!\w){start}(.|\n(?!\n))+?{end}(?!\w)' 

634 

635 tokens = { 

636 'root': [ 

637 (r'^# .*', Comment.Single), 

638 

639 # Headings 

640 (r'^(\* )(COMMENT)( .*)', 

641 bygroups(Generic.Heading, Comment.Preproc, Generic.Heading)), 

642 (r'^(\*\*+ )(COMMENT)( .*)', 

643 bygroups(Generic.Subheading, Comment.Preproc, Generic.Subheading)), 

644 (r'^(\* )(DONE)( .*)', 

645 bygroups(Generic.Heading, Generic.Deleted, Generic.Heading)), 

646 (r'^(\*\*+ )(DONE)( .*)', 

647 bygroups(Generic.Subheading, Generic.Deleted, Generic.Subheading)), 

648 (r'^(\* )(TODO)( .*)', 

649 bygroups(Generic.Heading, Generic.Error, Generic.Heading)), 

650 (r'^(\*\*+ )(TODO)( .*)', 

651 bygroups(Generic.Subheading, Generic.Error, Generic.Subheading)), 

652 

653 (r'^(\* .+?)( :[a-zA-Z0-9_@:]+:)?$', bygroups(Generic.Heading, Generic.Emph)), 

654 (r'^(\*\*+ .+?)( :[a-zA-Z0-9_@:]+:)?$', bygroups(Generic.Subheading, Generic.Emph)), 

655 

656 # Unordered lists items, including TODO items and description items 

657 (r'^(?:( *)([+-] )|( +)(\* ))(\[[ X-]\])?(.+ ::)?', 

658 bygroups(Whitespace, Keyword, Whitespace, Keyword, Generic.Prompt, Name.Label)), 

659 

660 # Ordered list items 

661 (r'^( *)([0-9]+[.)])( \[@[0-9]+\])?', bygroups(Whitespace, Keyword, Generic.Emph)), 

662 

663 # Dynamic blocks 

664 (r'(?i)^( *#\+begin: *)((?:.|\n)*?)(^ *#\+end: *$)', 

665 bygroups(Operator.Word, using(this), Operator.Word)), 

666 

667 # Comment blocks 

668 (r'(?i)^( *#\+begin_comment *\n)((?:.|\n)*?)(^ *#\+end_comment *$)', 

669 bygroups(Operator.Word, Comment.Multiline, Operator.Word)), 

670 

671 # Source code blocks 

672 # TODO: language-dependent syntax highlighting (see Markdown lexer) 

673 (r'(?i)^( *#\+begin_src .*)((?:.|\n)*?)(^ *#\+end_src *$)', 

674 bygroups(Operator.Word, Text, Operator.Word)), 

675 

676 # Other blocks 

677 (r'(?i)^( *#\+begin_\w+)( *\n)((?:.|\n)*?)(^ *#\+end_\w+)( *$)', 

678 bygroups(Operator.Word, Whitespace, Text, Operator.Word, Whitespace)), 

679 

680 # Keywords 

681 (r'^(#\+\w+:)(.*)$', bygroups(Name.Namespace, Text)), 

682 

683 # Properties and drawers 

684 (r'(?i)^( *:\w+: *\n)((?:.|\n)*?)(^ *:end: *$)', 

685 bygroups(Name.Decorator, Comment.Special, Name.Decorator)), 

686 

687 # Line break operator 

688 (r'\\\\$', Operator), 

689 

690 (r'^\s*CLOSED:\s+', Generic.Deleted, 'dateline'), 

691 (r'^\s*(?:DEADLINE:|SCHEDULED:)\s+', Generic.Error, 'dateline'), 

692 

693 # Bold 

694 (_inline(r'\*', r'\*+'), Generic.Strong), 

695 # Italic 

696 (_inline(r'/', r'/'), Generic.Emph), 

697 # Verbatim 

698 (_inline(r'=', r'='), String), # TODO token 

699 # Code 

700 (_inline(r'~', r'~'), String), 

701 # Strikethrough 

702 (_inline(r'\+', r'\+'), Generic.Deleted), 

703 # Underline 

704 (_inline(r'_', r'_+'), Generic.EmphStrong), 

705 

706 # Dates 

707 (r'<.+?>', Literal.Date), 

708 # Macros 

709 (r'\{\{\{.+?\}\}\}', Comment.Preproc), 

710 # Footnotes 

711 (r'(?<!\[)\[fn:.+?\]', Name.Tag), 

712 # Links 

713 (r'(?s)(\[\[)(.*?)(\]\[)(.*?)(\]\])', 

714 bygroups(Punctuation, Name.Attribute, Punctuation, Name.Tag, Punctuation)), 

715 (r'(?s)(\[\[)(.+?)(\]\])', bygroups(Punctuation, Name.Attribute, Punctuation)), 

716 (r'(<<)(.+?)(>>)', bygroups(Punctuation, Name.Attribute, Punctuation)), 

717 

718 # Tables 

719 (r'^( *)(\|[ -].*?[ -]\|)$', bygroups(Whitespace, String)), 

720 

721 # Any other text 

722 (r'[^#*+\-0-9:\\/=~_<{\[|\n]+', Text), 

723 (r'[#*+\-0-9:\\/=~_<{\[|\n]', Text), 

724 ], 

725 'dateline': [ 

726 (r'\s*CLOSED:\s+', Generic.Deleted), 

727 (r'\s*(?:DEADLINE:|SCHEDULED:)\s+', Generic.Error), 

728 (r'\[.+?\]', Literal.Date), 

729 (r'<[^>]+?>', Literal.Date), 

730 (r'(\s*)$', Text, '#pop'), 

731 (r'.', Text), 

732 ], 

733 } 

734 

735class TiddlyWiki5Lexer(RegexLexer): 

736 """ 

737 For TiddlyWiki5 markup. 

738 """ 

739 name = 'tiddler' 

740 url = 'https://tiddlywiki.com/#TiddlerFiles' 

741 aliases = ['tid'] 

742 filenames = ['*.tid'] 

743 mimetypes = ["text/vnd.tiddlywiki"] 

744 version_added = '2.7' 

745 flags = re.MULTILINE 

746 

747 def _handle_codeblock(self, match): 

748 """ 

749 match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks 

750 """ 

751 from pygments.lexers import get_lexer_by_name 

752 

753 # section header 

754 yield match.start(1), String, match.group(1) 

755 yield match.start(2), String, match.group(2) 

756 yield match.start(3), Text, match.group(3) 

757 

758 # lookup lexer if wanted and existing 

759 lexer = None 

760 if self.handlecodeblocks: 

761 try: 

762 lexer = get_lexer_by_name(match.group(2).strip()) 

763 except ClassNotFound: 

764 pass 

765 code = match.group(4) 

766 

767 # no lexer for this language. handle it like it was a code block 

768 if lexer is None: 

769 yield match.start(4), String, code 

770 return 

771 

772 yield from do_insertions([], lexer.get_tokens_unprocessed(code)) 

773 

774 yield match.start(5), String, match.group(5) 

775 

776 def _handle_cssblock(self, match): 

777 """ 

778 match args: 1:style tag 2:newline, 3:code, 4:closing style tag 

779 """ 

780 from pygments.lexers import get_lexer_by_name 

781 

782 # section header 

783 yield match.start(1), String, match.group(1) 

784 yield match.start(2), String, match.group(2) 

785 

786 lexer = None 

787 if self.handlecodeblocks: 

788 try: 

789 lexer = get_lexer_by_name('css') 

790 except ClassNotFound: 

791 pass 

792 code = match.group(3) 

793 

794 # no lexer for this language. handle it like it was a code block 

795 if lexer is None: 

796 yield match.start(3), String, code 

797 return 

798 

799 yield from do_insertions([], lexer.get_tokens_unprocessed(code)) 

800 

801 yield match.start(4), String, match.group(4) 

802 

803 tokens = { 

804 'root': [ 

805 # title in metadata section 

806 (r'^(title)(:\s)(.+\n)', bygroups(Keyword, Text, Generic.Heading)), 

807 # headings 

808 (r'^(!)([^!].+\n)', bygroups(Generic.Heading, Text)), 

809 (r'^(!{2,6})(.+\n)', bygroups(Generic.Subheading, Text)), 

810 # bulleted or numbered lists or single-line block quotes 

811 # (can be mixed) 

812 (r'^(\s*)([*#>]+)(\s*)(.+\n)', 

813 bygroups(Text, Keyword, Text, using(this, state='inline'))), 

814 # multi-line block quotes 

815 (r'^(<<<.*\n)([\w\W]*?)(^<<<.*$)', bygroups(String, Text, String)), 

816 # table header 

817 (r'^(\|.*?\|h)$', bygroups(Generic.Strong)), 

818 # table footer or caption 

819 (r'^(\|.*?\|[cf])$', bygroups(Generic.Emph)), 

820 # table class 

821 (r'^(\|.*?\|k)$', bygroups(Name.Tag)), 

822 # definitions 

823 (r'^(;.*)$', bygroups(Generic.Strong)), 

824 # text block 

825 (r'^(```\n)([\w\W]*?)(^```$)', bygroups(String, Text, String)), 

826 # code block with language 

827 (r'^(```)(\w+)(\n)([\w\W]*?)(^```$)', _handle_codeblock), 

828 # CSS style block 

829 (r'^(<style>)(\n)([\w\W]*?)(^</style>$)', _handle_cssblock), 

830 

831 include('keywords'), 

832 include('inline'), 

833 ], 

834 'keywords': [ 

835 (words(( 

836 '\\define', '\\end', 'caption', 'created', 'modified', 'tags', 

837 'title', 'type'), prefix=r'^', suffix=r'\b'), 

838 Keyword), 

839 ], 

840 'inline': [ 

841 # escape 

842 (r'\\.', Text), 

843 # created or modified date 

844 (r'\d{17}', Number.Integer), 

845 # italics 

846 (r'(\s)(//[^/]+//)((?=\W|\n))', 

847 bygroups(Text, Generic.Emph, Text)), 

848 # superscript 

849 (r'(\s)(\^\^[^\^]+\^\^)', bygroups(Text, Generic.Emph)), 

850 # subscript 

851 (r'(\s)(,,[^,]+,,)', bygroups(Text, Generic.Emph)), 

852 # underscore 

853 (r'(\s)(__[^_]+__)', bygroups(Text, Generic.Strong)), 

854 # bold 

855 (r"(\s)(''[^']+'')((?=\W|\n))", 

856 bygroups(Text, Generic.Strong, Text)), 

857 # strikethrough 

858 (r'(\s)(~~[^~]+~~)((?=\W|\n))', 

859 bygroups(Text, Generic.Deleted, Text)), 

860 # TiddlyWiki variables 

861 (r'<<[^>]+>>', Name.Tag), 

862 (r'\$\$[^$]+\$\$', Name.Tag), 

863 (r'\$\([^)]+\)\$', Name.Tag), 

864 # TiddlyWiki style or class 

865 (r'^@@.*$', Name.Tag), 

866 # HTML tags 

867 (r'</?[^>]+>', Name.Tag), 

868 # inline code 

869 (r'`[^`]+`', String.Backtick), 

870 # HTML escaped symbols 

871 (r'&\S*?;', String.Regex), 

872 # Wiki links 

873 (r'(\[{2})([^]\|]+)(\]{2})', bygroups(Text, Name.Tag, Text)), 

874 # External links 

875 (r'(\[{2})([^]\|]+)(\|)([^]\|]+)(\]{2})', 

876 bygroups(Text, Name.Tag, Text, Name.Attribute, Text)), 

877 # Transclusion 

878 (r'(\{{2})([^}]+)(\}{2})', bygroups(Text, Name.Tag, Text)), 

879 # URLs 

880 (r'(\b.?.?tps?://[^\s"]+)', bygroups(Name.Attribute)), 

881 

882 # general text, must come last! 

883 (r'[\w]+', Text), 

884 (r'.', Text) 

885 ], 

886 } 

887 

888 def __init__(self, **options): 

889 self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True) 

890 RegexLexer.__init__(self, **options) 

891 

892 

893class WikitextLexer(RegexLexer): 

894 """ 

895 For MediaWiki Wikitext. 

896 

897 Parsing Wikitext is tricky, and results vary between different MediaWiki 

898 installations, so we only highlight common syntaxes (built-in or from 

899 popular extensions), and also assume templates produce no unbalanced 

900 syntaxes. 

901 """ 

902 name = 'Wikitext' 

903 url = 'https://www.mediawiki.org/wiki/Wikitext' 

904 aliases = ['wikitext', 'mediawiki'] 

905 filenames = [] 

906 mimetypes = ['text/x-wiki'] 

907 version_added = '2.15' 

908 flags = re.MULTILINE 

909 

910 def nowiki_tag_rules(tag_name): 

911 return [ 

912 (rf'(?i)(</)({tag_name})(\s*)(>)', bygroups(Punctuation, 

913 Name.Tag, Whitespace, Punctuation), '#pop'), 

914 include('entity'), 

915 include('text'), 

916 ] 

917 

918 def plaintext_tag_rules(tag_name): 

919 return [ 

920 (rf'(?si)(.*?)(</)({tag_name})(\s*)(>)', bygroups(Text, 

921 Punctuation, Name.Tag, Whitespace, Punctuation), '#pop'), 

922 ] 

923 

924 def delegate_tag_rules(tag_name, lexer, **lexer_kwargs): 

925 return [ 

926 (rf'(?i)(</)({tag_name})(\s*)(>)', bygroups(Punctuation, 

927 Name.Tag, Whitespace, Punctuation), '#pop'), 

928 (rf'(?si).+?(?=</{tag_name}\s*>)', using(lexer, **lexer_kwargs)), 

929 ] 

930 

931 def text_rules(token): 

932 return [ 

933 (r'\w+', token), 

934 (r'[^\S\n]+', token), 

935 (r'(?s).', token), 

936 ] 

937 

938 def handle_syntaxhighlight(self, match, ctx): 

939 from pygments.lexers import get_lexer_by_name 

940 

941 attr_content = match.group() 

942 start = 0 

943 index = 0 

944 while True: 

945 index = attr_content.find('>', start) 

946 # Exclude comment end (-->) 

947 if attr_content[index-2:index] != '--': 

948 break 

949 start = index + 1 

950 

951 if index == -1: 

952 # No tag end 

953 yield from self.get_tokens_unprocessed(attr_content, stack=['root', 'attr']) 

954 return 

955 attr = attr_content[:index] 

956 yield from self.get_tokens_unprocessed(attr, stack=['root', 'attr']) 

957 yield match.start(3) + index, Punctuation, '>' 

958 

959 lexer = None 

960 content = attr_content[index+1:] 

961 lang_match = re.findall(r'\blang=("|\'|)(\w+)(\1)', attr) 

962 

963 if len(lang_match) >= 1: 

964 # Pick the last match in case of multiple matches 

965 lang = lang_match[-1][1] 

966 try: 

967 lexer = get_lexer_by_name(lang) 

968 except ClassNotFound: 

969 pass 

970 

971 if lexer is None: 

972 yield match.start() + index + 1, Text, content 

973 else: 

974 yield from lexer.get_tokens_unprocessed(content) 

975 

976 def handle_score(self, match, ctx): 

977 attr_content = match.group() 

978 start = 0 

979 index = 0 

980 while True: 

981 index = attr_content.find('>', start) 

982 # Exclude comment end (-->) 

983 if attr_content[index-2:index] != '--': 

984 break 

985 start = index + 1 

986 

987 if index == -1: 

988 # No tag end 

989 yield from self.get_tokens_unprocessed(attr_content, stack=['root', 'attr']) 

990 return 

991 attr = attr_content[:index] 

992 content = attr_content[index+1:] 

993 yield from self.get_tokens_unprocessed(attr, stack=['root', 'attr']) 

994 yield match.start(3) + index, Punctuation, '>' 

995 

996 lang_match = re.findall(r'\blang=("|\'|)(\w+)(\1)', attr) 

997 # Pick the last match in case of multiple matches 

998 lang = lang_match[-1][1] if len(lang_match) >= 1 else 'lilypond' 

999 

1000 if lang == 'lilypond': # Case sensitive 

1001 yield from LilyPondLexer().get_tokens_unprocessed(content) 

1002 else: # ABC 

1003 # FIXME: Use ABC lexer in the future 

1004 yield match.start() + index + 1, Text, content 

1005 

1006 # a-z removed to prevent linter from complaining, REMEMBER to use (?i) 

1007 title_char = r' %!"$&\'()*,\-./0-9:;=?@A-Z\\\^_`~+\u0080-\uFFFF' 

1008 nbsp_char = r'(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|[ \xA0\u1680\u2000-\u200A\u202F\u205F\u3000])' 

1009 link_address = r'(?:[0-9.]+|\[[0-9a-f:.]+\]|[^\x00-\x20"<>\[\]\x7F\xA0\u1680\u2000-\u200A\u202F\u205F\u3000\uFFFD])' 

1010 link_char_class = r'[^\x00-\x20"<>\[\]\x7F\xA0\u1680\u2000-\u200A\u202F\u205F\u3000\uFFFD]' 

1011 double_slashes_i = { 

1012 '__FORCETOC__', '__NOCONTENTCONVERT__', '__NOCC__', '__NOEDITSECTION__', '__NOGALLERY__', 

1013 '__NOTITLECONVERT__', '__NOTC__', '__NOTOC__', '__TOC__', 

1014 } 

1015 double_slashes = { 

1016 '__EXPECTUNUSEDCATEGORY__', '__HIDDENCAT__', '__INDEX__', '__NEWSECTIONLINK__', 

1017 '__NOINDEX__', '__NONEWSECTIONLINK__', '__STATICREDIRECT__', '__NOGLOBAL__', 

1018 '__DISAMBIG__', '__EXPECTED_UNCONNECTED_PAGE__', 

1019 } 

1020 protocols = { 

1021 'bitcoin:', 'ftp://', 'ftps://', 'geo:', 'git://', 'gopher://', 'http://', 'https://', 

1022 'irc://', 'ircs://', 'magnet:', 'mailto:', 'mms://', 'news:', 'nntp://', 'redis://', 

1023 'sftp://', 'sip:', 'sips:', 'sms:', 'ssh://', 'svn://', 'tel:', 'telnet://', 'urn:', 

1024 'worldwind://', 'xmpp:', '//', 

1025 } 

1026 non_relative_protocols = protocols - {'//'} 

1027 html_tags = { 

1028 'abbr', 'b', 'bdi', 'bdo', 'big', 'blockquote', 'br', 'caption', 'center', 'cite', 'code', 

1029 'data', 'dd', 'del', 'dfn', 'div', 'dl', 'dt', 'em', 'font', 'h1', 'h2', 'h3', 'h4', 'h5', 

1030 'h6', 'hr', 'i', 'ins', 'kbd', 'li', 'link', 'mark', 'meta', 'ol', 'p', 'q', 'rb', 'rp', 

1031 'rt', 'rtc', 'ruby', 's', 'samp', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 

1032 'table', 'td', 'th', 'time', 'tr', 'tt', 'u', 'ul', 'var', 'wbr', 

1033 } 

1034 parser_tags = { 

1035 'graph', 'charinsert', 'rss', 'chem', 'categorytree', 'nowiki', 'inputbox', 'math', 

1036 'hiero', 'score', 'pre', 'ref', 'translate', 'imagemap', 'templatestyles', 'languages', 

1037 'noinclude', 'mapframe', 'section', 'poem', 'syntaxhighlight', 'includeonly', 'tvar', 

1038 'onlyinclude', 'templatedata', 'langconvert', 'timeline', 'dynamicpagelist', 'gallery', 

1039 'maplink', 'ce', 'references', 

1040 } 

1041 variant_langs = { 

1042 # ZhConverter.php 

1043 'zh', 'zh-hans', 'zh-hant', 'zh-cn', 'zh-hk', 'zh-mo', 'zh-my', 'zh-sg', 'zh-tw', 

1044 # WuuConverter.php 

1045 'wuu', 'wuu-hans', 'wuu-hant', 

1046 # UzConverter.php 

1047 'uz', 'uz-latn', 'uz-cyrl', 

1048 # TlyConverter.php 

1049 'tly', 'tly-cyrl', 

1050 # TgConverter.php 

1051 'tg', 'tg-latn', 

1052 # SrConverter.php 

1053 'sr', 'sr-ec', 'sr-el', 

1054 # ShiConverter.php 

1055 'shi', 'shi-tfng', 'shi-latn', 

1056 # ShConverter.php 

1057 'sh-latn', 'sh-cyrl', 

1058 # KuConverter.php 

1059 'ku', 'ku-arab', 'ku-latn', 

1060 # IuConverter.php 

1061 'iu', 'ike-cans', 'ike-latn', 

1062 # GanConverter.php 

1063 'gan', 'gan-hans', 'gan-hant', 

1064 # EnConverter.php 

1065 'en', 'en-x-piglatin', 

1066 # CrhConverter.php 

1067 'crh', 'crh-cyrl', 'crh-latn', 

1068 # BanConverter.php 

1069 'ban', 'ban-bali', 'ban-x-dharma', 'ban-x-palmleaf', 'ban-x-pku', 

1070 } 

1071 magic_vars_i = { 

1072 'ARTICLEPATH', 'INT', 'PAGEID', 'SCRIPTPATH', 'SERVER', 'SERVERNAME', 'STYLEPATH', 

1073 } 

1074 magic_vars = { 

1075 '!', '=', 'BASEPAGENAME', 'BASEPAGENAMEE', 'CASCADINGSOURCES', 'CONTENTLANGUAGE', 

1076 'CONTENTLANG', 'CURRENTDAY', 'CURRENTDAY2', 'CURRENTDAYNAME', 'CURRENTDOW', 'CURRENTHOUR', 

1077 'CURRENTMONTH', 'CURRENTMONTH2', 'CURRENTMONTH1', 'CURRENTMONTHABBREV', 'CURRENTMONTHNAME', 

1078 'CURRENTMONTHNAMEGEN', 'CURRENTTIME', 'CURRENTTIMESTAMP', 'CURRENTVERSION', 'CURRENTWEEK', 

1079 'CURRENTYEAR', 'DIRECTIONMARK', 'DIRMARK', 'FULLPAGENAME', 'FULLPAGENAMEE', 'LOCALDAY', 

1080 'LOCALDAY2', 'LOCALDAYNAME', 'LOCALDOW', 'LOCALHOUR', 'LOCALMONTH', 'LOCALMONTH2', 

1081 'LOCALMONTH1', 'LOCALMONTHABBREV', 'LOCALMONTHNAME', 'LOCALMONTHNAMEGEN', 'LOCALTIME', 

1082 'LOCALTIMESTAMP', 'LOCALWEEK', 'LOCALYEAR', 'NAMESPACE', 'NAMESPACEE', 'NAMESPACENUMBER', 

1083 'NUMBEROFACTIVEUSERS', 'NUMBEROFADMINS', 'NUMBEROFARTICLES', 'NUMBEROFEDITS', 

1084 'NUMBEROFFILES', 'NUMBEROFPAGES', 'NUMBEROFUSERS', 'PAGELANGUAGE', 'PAGENAME', 'PAGENAMEE', 

1085 'REVISIONDAY', 'REVISIONDAY2', 'REVISIONID', 'REVISIONMONTH', 'REVISIONMONTH1', 

1086 'REVISIONSIZE', 'REVISIONTIMESTAMP', 'REVISIONUSER', 'REVISIONYEAR', 'ROOTPAGENAME', 

1087 'ROOTPAGENAMEE', 'SITENAME', 'SUBJECTPAGENAME', 'ARTICLEPAGENAME', 'SUBJECTPAGENAMEE', 

1088 'ARTICLEPAGENAMEE', 'SUBJECTSPACE', 'ARTICLESPACE', 'SUBJECTSPACEE', 'ARTICLESPACEE', 

1089 'SUBPAGENAME', 'SUBPAGENAMEE', 'TALKPAGENAME', 'TALKPAGENAMEE', 'TALKSPACE', 'TALKSPACEE', 

1090 } 

1091 parser_functions_i = { 

1092 'ANCHORENCODE', 'BIDI', 'CANONICALURL', 'CANONICALURLE', 'FILEPATH', 'FORMATNUM', 

1093 'FULLURL', 'FULLURLE', 'GENDER', 'GRAMMAR', 'INT', r'\#LANGUAGE', 'LC', 'LCFIRST', 'LOCALURL', 

1094 'LOCALURLE', 'NS', 'NSE', 'PADLEFT', 'PADRIGHT', 'PAGEID', 'PLURAL', 'UC', 'UCFIRST', 

1095 'URLENCODE', 

1096 } 

1097 parser_functions = { 

1098 'BASEPAGENAME', 'BASEPAGENAMEE', 'CASCADINGSOURCES', 'DEFAULTSORT', 'DEFAULTSORTKEY', 

1099 'DEFAULTCATEGORYSORT', 'FULLPAGENAME', 'FULLPAGENAMEE', 'NAMESPACE', 'NAMESPACEE', 

1100 'NAMESPACENUMBER', 'NUMBERINGROUP', 'NUMINGROUP', 'NUMBEROFACTIVEUSERS', 'NUMBEROFADMINS', 

1101 'NUMBEROFARTICLES', 'NUMBEROFEDITS', 'NUMBEROFFILES', 'NUMBEROFPAGES', 'NUMBEROFUSERS', 

1102 'PAGENAME', 'PAGENAMEE', 'PAGESINCATEGORY', 'PAGESINCAT', 'PAGESIZE', 'PROTECTIONEXPIRY', 

1103 'PROTECTIONLEVEL', 'REVISIONDAY', 'REVISIONDAY2', 'REVISIONID', 'REVISIONMONTH', 

1104 'REVISIONMONTH1', 'REVISIONTIMESTAMP', 'REVISIONUSER', 'REVISIONYEAR', 'ROOTPAGENAME', 

1105 'ROOTPAGENAMEE', 'SUBJECTPAGENAME', 'ARTICLEPAGENAME', 'SUBJECTPAGENAMEE', 

1106 'ARTICLEPAGENAMEE', 'SUBJECTSPACE', 'ARTICLESPACE', 'SUBJECTSPACEE', 'ARTICLESPACEE', 

1107 'SUBPAGENAME', 'SUBPAGENAMEE', 'TALKPAGENAME', 'TALKPAGENAMEE', 'TALKSPACE', 'TALKSPACEE', 

1108 'INT', 'DISPLAYTITLE', 'PAGESINNAMESPACE', 'PAGESINNS', 

1109 } 

1110 

1111 tokens = { 

1112 'root': [ 

1113 # Redirects 

1114 (r"""(?xi) 

1115 (\A\s*?)(\#REDIRECT:?) # may contain a colon 

1116 (\s+)(\[\[) (?=[^\]\n]* \]\]$) 

1117 """, 

1118 bygroups(Whitespace, Keyword, Whitespace, Punctuation), 'redirect-inner'), 

1119 # Subheadings 

1120 (r'^(={2,6})(.+?)(\1)(\s*$\n)', 

1121 bygroups(Generic.Subheading, Generic.Subheading, Generic.Subheading, Whitespace)), 

1122 # Headings 

1123 (r'^(=.+?=)(\s*$\n)', 

1124 bygroups(Generic.Heading, Whitespace)), 

1125 # Double-slashed magic words 

1126 (words(double_slashes_i, prefix=r'(?i)'), Name.Function.Magic), 

1127 (words(double_slashes), Name.Function.Magic), 

1128 # Raw URLs 

1129 (r'(?i)\b(?:{}){}{}*'.format('|'.join(protocols), 

1130 link_address, link_char_class), Name.Label), 

1131 # Magic links 

1132 (rf'\b(?:RFC|PMID){nbsp_char}+[0-9]+\b', 

1133 Name.Function.Magic), 

1134 (r"""(?x) 

1135 \bISBN {nbsp_char} 

1136 (?: 97[89] {nbsp_dash}? )? 

1137 (?: [0-9] {nbsp_dash}? ){{9}} # escape format() 

1138 [0-9Xx]\b 

1139 """.format(nbsp_char=nbsp_char, nbsp_dash=f'(?:-|{nbsp_char})'), Name.Function.Magic), 

1140 include('list'), 

1141 include('inline'), 

1142 include('text'), 

1143 ], 

1144 'redirect-inner': [ 

1145 (r'(\]\])(\s*?\n)', bygroups(Punctuation, Whitespace), '#pop'), 

1146 (r'(\#)([^#]*?)', bygroups(Punctuation, Name.Label)), 

1147 (rf'(?i)[{title_char}]+', Name.Tag), 

1148 ], 

1149 'list': [ 

1150 # Description lists 

1151 (r'^;', Keyword, 'dt'), 

1152 # Ordered lists, unordered lists and indents 

1153 (r'^[#:*]+', Keyword), 

1154 # Horizontal rules 

1155 (r'^-{4,}', Keyword), 

1156 ], 

1157 'inline': [ 

1158 # Signatures 

1159 (r'~{3,5}', Keyword), 

1160 # Entities 

1161 include('entity'), 

1162 # Bold & italic 

1163 (r"('')(''')(?!')", bygroups(Generic.Emph, 

1164 Generic.EmphStrong), 'inline-italic-bold'), 

1165 (r"'''(?!')", Generic.Strong, 'inline-bold'), 

1166 (r"''(?!')", Generic.Emph, 'inline-italic'), 

1167 # Comments & parameters & templates 

1168 include('replaceable'), 

1169 # Media links 

1170 ( 

1171 r"""(?xi) 

1172 (\[\[) 

1173 (File|Image) (:) 

1174 ((?: [{}] | \{{{{2,3}}[^{{}}]*?\}}{{2,3}} | <!--[\s\S]*?--> )*) 

1175 (?: (\#) ([{}]*?) )? 

1176 """.format(title_char, f'{title_char}#'), 

1177 bygroups(Punctuation, Name.Namespace, Punctuation, 

1178 using(this, state=['wikilink-name']), Punctuation, Name.Label), 

1179 'medialink-inner' 

1180 ), 

1181 # Wikilinks 

1182 ( 

1183 r"""(?xi) 

1184 (\[\[)(?!{}) # Should not contain URLs 

1185 (?: ([{}]*) (:))? 

1186 ((?: [{}] | \{{{{2,3}}[^{{}}]*?\}}{{2,3}} | <!--[\s\S]*?--> )*?) 

1187 (?: (\#) ([{}]*?) )? 

1188 (\]\]) 

1189 """.format('|'.join(protocols), title_char.replace('/', ''), 

1190 title_char, f'{title_char}#'), 

1191 bygroups(Punctuation, Name.Namespace, Punctuation, 

1192 using(this, state=['wikilink-name']), Punctuation, Name.Label, Punctuation) 

1193 ), 

1194 ( 

1195 r"""(?xi) 

1196 (\[\[)(?!{}) 

1197 (?: ([{}]*) (:))? 

1198 ((?: [{}] | \{{{{2,3}}[^{{}}]*?\}}{{2,3}} | <!--[\s\S]*?--> )*?) 

1199 (?: (\#) ([{}]*?) )? 

1200 (\|) 

1201 """.format('|'.join(protocols), title_char.replace('/', ''), 

1202 title_char, f'{title_char}#'), 

1203 bygroups(Punctuation, Name.Namespace, Punctuation, 

1204 using(this, state=['wikilink-name']), Punctuation, Name.Label, Punctuation), 

1205 'wikilink-inner' 

1206 ), 

1207 # External links 

1208 ( 

1209 r"""(?xi) 

1210 (\[) 

1211 ((?:{}) {} {}*) 

1212 (\s*) 

1213 """.format('|'.join(protocols), link_address, link_char_class), 

1214 bygroups(Punctuation, Name.Label, Whitespace), 

1215 'extlink-inner' 

1216 ), 

1217 # Tables 

1218 (r'^(:*)(\s*?)(\{\|)([^\n]*)$', bygroups(Keyword, 

1219 Whitespace, Punctuation, using(this, state=['root', 'attr'])), 'table'), 

1220 # HTML tags 

1221 (r'(?i)(<)({})\b'.format('|'.join(html_tags)), 

1222 bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'), 

1223 (r'(?i)(</)({})\b(\s*)(>)'.format('|'.join(html_tags)), 

1224 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)), 

1225 # <nowiki> 

1226 (r'(?i)(<)(nowiki)\b', bygroups(Punctuation, 

1227 Name.Tag), ('tag-nowiki', 'tag-inner')), 

1228 # <pre> 

1229 (r'(?i)(<)(pre)\b', bygroups(Punctuation, 

1230 Name.Tag), ('tag-pre', 'tag-inner')), 

1231 # <categorytree> 

1232 (r'(?i)(<)(categorytree)\b', bygroups( 

1233 Punctuation, Name.Tag), ('tag-categorytree', 'tag-inner')), 

1234 # <hiero> 

1235 (r'(?i)(<)(hiero)\b', bygroups(Punctuation, 

1236 Name.Tag), ('tag-hiero', 'tag-inner')), 

1237 # <math> 

1238 (r'(?i)(<)(math)\b', bygroups(Punctuation, 

1239 Name.Tag), ('tag-math', 'tag-inner')), 

1240 # <chem> 

1241 (r'(?i)(<)(chem)\b', bygroups(Punctuation, 

1242 Name.Tag), ('tag-chem', 'tag-inner')), 

1243 # <ce> 

1244 (r'(?i)(<)(ce)\b', bygroups(Punctuation, 

1245 Name.Tag), ('tag-ce', 'tag-inner')), 

1246 # <charinsert> 

1247 (r'(?i)(<)(charinsert)\b', bygroups( 

1248 Punctuation, Name.Tag), ('tag-charinsert', 'tag-inner')), 

1249 # <templatedata> 

1250 (r'(?i)(<)(templatedata)\b', bygroups( 

1251 Punctuation, Name.Tag), ('tag-templatedata', 'tag-inner')), 

1252 # <gallery> 

1253 (r'(?i)(<)(gallery)\b', bygroups( 

1254 Punctuation, Name.Tag), ('tag-gallery', 'tag-inner')), 

1255 # <graph> 

1256 (r'(?i)(<)(gallery)\b', bygroups( 

1257 Punctuation, Name.Tag), ('tag-graph', 'tag-inner')), 

1258 # <dynamicpagelist> 

1259 (r'(?i)(<)(dynamicpagelist)\b', bygroups( 

1260 Punctuation, Name.Tag), ('tag-dynamicpagelist', 'tag-inner')), 

1261 # <inputbox> 

1262 (r'(?i)(<)(inputbox)\b', bygroups( 

1263 Punctuation, Name.Tag), ('tag-inputbox', 'tag-inner')), 

1264 # <rss> 

1265 (r'(?i)(<)(rss)\b', bygroups( 

1266 Punctuation, Name.Tag), ('tag-rss', 'tag-inner')), 

1267 # <imagemap> 

1268 (r'(?i)(<)(imagemap)\b', bygroups( 

1269 Punctuation, Name.Tag), ('tag-imagemap', 'tag-inner')), 

1270 # <syntaxhighlight> 

1271 (r'(?i)(</)(syntaxhighlight)\b(\s*)(>)', 

1272 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)), 

1273 (r'(?si)(<)(syntaxhighlight)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)', 

1274 bygroups(Punctuation, Name.Tag, handle_syntaxhighlight)), 

1275 # <syntaxhighlight>: Fallback case for self-closing tags 

1276 (r'(?i)(<)(syntaxhighlight)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups( 

1277 Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)), 

1278 # <source> 

1279 (r'(?i)(</)(source)\b(\s*)(>)', 

1280 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)), 

1281 (r'(?si)(<)(source)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)', 

1282 bygroups(Punctuation, Name.Tag, handle_syntaxhighlight)), 

1283 # <source>: Fallback case for self-closing tags 

1284 (r'(?i)(<)(source)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups( 

1285 Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)), 

1286 # <score> 

1287 (r'(?i)(</)(score)\b(\s*)(>)', 

1288 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)), 

1289 (r'(?si)(<)(score)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)', 

1290 bygroups(Punctuation, Name.Tag, handle_score)), 

1291 # <score>: Fallback case for self-closing tags 

1292 (r'(?i)(<)(score)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups( 

1293 Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)), 

1294 # Other parser tags 

1295 (r'(?i)(<)({})\b'.format('|'.join(parser_tags)), 

1296 bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'), 

1297 (r'(?i)(</)({})\b(\s*)(>)'.format('|'.join(parser_tags)), 

1298 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)), 

1299 # LanguageConverter markups 

1300 ( 

1301 r"""(?xi) 

1302 (-\{{) # Use {{ to escape format() 

1303 ([^|]) (\|) 

1304 (?: 

1305 (?: ([^;]*?) (=>))? 

1306 (\s* (?:{variants}) \s*) (:) 

1307 )? 

1308 """.format(variants='|'.join(variant_langs)), 

1309 bygroups(Punctuation, Keyword, Punctuation, 

1310 using(this, state=['root', 'lc-raw']), 

1311 Operator, Name.Label, Punctuation), 

1312 'lc-inner' 

1313 ), 

1314 # LanguageConverter markups: composite conversion grammar 

1315 ( 

1316 r"""(?xi) 

1317 (-\{) 

1318 ([a-z\s;-]*?) (\|) 

1319 """, 

1320 bygroups(Punctuation, 

1321 using(this, state=['root', 'lc-flag']), 

1322 Punctuation), 

1323 'lc-raw' 

1324 ), 

1325 # LanguageConverter markups: fallbacks 

1326 ( 

1327 r"""(?xi) 

1328 (-\{{) (?!\{{) # Use {{ to escape format() 

1329 (?: (\s* (?:{variants}) \s*) (:))? 

1330 """.format(variants='|'.join(variant_langs)), 

1331 bygroups(Punctuation, Name.Label, Punctuation), 

1332 'lc-inner' 

1333 ), 

1334 ], 

1335 'wikilink-name': [ 

1336 include('replaceable'), 

1337 (r'[^{<]+', Name.Tag), 

1338 (r'(?s).', Name.Tag), 

1339 ], 

1340 'wikilink-inner': [ 

1341 # Quit in case of another wikilink 

1342 (r'(?=\[\[)', Punctuation, '#pop'), 

1343 (r'\]\]', Punctuation, '#pop'), 

1344 include('inline'), 

1345 include('text'), 

1346 ], 

1347 'medialink-inner': [ 

1348 (r'\]\]', Punctuation, '#pop'), 

1349 (r'(\|)([^\n=|]*)(=)', 

1350 bygroups(Punctuation, Name.Attribute, Operator)), 

1351 (r'\|', Punctuation), 

1352 include('inline'), 

1353 include('text'), 

1354 ], 

1355 'quote-common': [ 

1356 # Quit in case of link/template endings 

1357 (r'(?=\]\]|\{\{|\}\})', Punctuation, '#pop'), 

1358 (r'\n', Text, '#pop'), 

1359 ], 

1360 'inline-italic': [ 

1361 include('quote-common'), 

1362 (r"('')(''')(?!')", bygroups(Generic.Emph, 

1363 Generic.Strong), ('#pop', 'inline-bold')), 

1364 (r"'''(?!')", Generic.EmphStrong, ('#pop', 'inline-italic-bold')), 

1365 (r"''(?!')", Generic.Emph, '#pop'), 

1366 include('inline'), 

1367 include('text-italic'), 

1368 ], 

1369 'inline-bold': [ 

1370 include('quote-common'), 

1371 (r"(''')('')(?!')", bygroups( 

1372 Generic.Strong, Generic.Emph), ('#pop', 'inline-italic')), 

1373 (r"'''(?!')", Generic.Strong, '#pop'), 

1374 (r"''(?!')", Generic.EmphStrong, ('#pop', 'inline-bold-italic')), 

1375 include('inline'), 

1376 include('text-bold'), 

1377 ], 

1378 'inline-bold-italic': [ 

1379 include('quote-common'), 

1380 (r"('')(''')(?!')", bygroups(Generic.EmphStrong, 

1381 Generic.Strong), '#pop'), 

1382 (r"'''(?!')", Generic.EmphStrong, ('#pop', 'inline-italic')), 

1383 (r"''(?!')", Generic.EmphStrong, ('#pop', 'inline-bold')), 

1384 include('inline'), 

1385 include('text-bold-italic'), 

1386 ], 

1387 'inline-italic-bold': [ 

1388 include('quote-common'), 

1389 (r"(''')('')(?!')", bygroups( 

1390 Generic.EmphStrong, Generic.Emph), '#pop'), 

1391 (r"'''(?!')", Generic.EmphStrong, ('#pop', 'inline-italic')), 

1392 (r"''(?!')", Generic.EmphStrong, ('#pop', 'inline-bold')), 

1393 include('inline'), 

1394 include('text-bold-italic'), 

1395 ], 

1396 'lc-flag': [ 

1397 (r'\s+', Whitespace), 

1398 (r';', Punctuation), 

1399 *text_rules(Keyword), 

1400 ], 

1401 'lc-inner': [ 

1402 ( 

1403 r"""(?xi) 

1404 (;) 

1405 (?: ([^;]*?) (=>))? 

1406 (\s* (?:{variants}) \s*) (:) 

1407 """.format(variants='|'.join(variant_langs)), 

1408 bygroups(Punctuation, using(this, state=['root', 'lc-raw']), 

1409 Operator, Name.Label, Punctuation) 

1410 ), 

1411 (r';?\s*?\}-', Punctuation, '#pop'), 

1412 include('inline'), 

1413 include('text'), 

1414 ], 

1415 'lc-raw': [ 

1416 (r'\}-', Punctuation, '#pop'), 

1417 include('inline'), 

1418 include('text'), 

1419 ], 

1420 'replaceable': [ 

1421 # Comments 

1422 (r'<!--[\s\S]*?(?:-->|\Z)', Comment.Multiline), 

1423 # Parameters 

1424 ( 

1425 r"""(?x) 

1426 (\{{3}) 

1427 ([^|]*?) 

1428 (?=\}{3}|\|) 

1429 """, 

1430 bygroups(Punctuation, Name.Variable), 

1431 'parameter-inner', 

1432 ), 

1433 # Magic variables 

1434 (r'(?i)(\{{\{{)(\s*)({})(\s*)(\}}\}})'.format('|'.join(magic_vars_i)), 

1435 bygroups(Punctuation, Whitespace, Name.Function, Whitespace, Punctuation)), 

1436 (r'(\{{\{{)(\s*)({})(\s*)(\}}\}})'.format('|'.join(magic_vars)), 

1437 bygroups(Punctuation, Whitespace, Name.Function, Whitespace, Punctuation)), 

1438 # Parser functions & templates 

1439 (r'\{\{', Punctuation, 'template-begin-space'), 

1440 # <tvar> legacy syntax 

1441 (r'(?i)(<)(tvar)\b(\|)([^>]*?)(>)', bygroups(Punctuation, 

1442 Name.Tag, Punctuation, String, Punctuation)), 

1443 (r'</>', Punctuation, '#pop'), 

1444 # <tvar> 

1445 (r'(?i)(<)(tvar)\b', bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'), 

1446 (r'(?i)(</)(tvar)\b(\s*)(>)', 

1447 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)), 

1448 ], 

1449 'parameter-inner': [ 

1450 (r'\}{3}', Punctuation, '#pop'), 

1451 (r'\|', Punctuation), 

1452 include('inline'), 

1453 include('text'), 

1454 ], 

1455 'template-begin-space': [ 

1456 # Templates allow line breaks at the beginning, and due to how MediaWiki handles 

1457 # comments, an extra state is required to handle things like {{\n<!---->\n name}} 

1458 (r'<!--[\s\S]*?(?:-->|\Z)', Comment.Multiline), 

1459 (r'\s+', Whitespace), 

1460 # Parser functions 

1461 ( 

1462 r'(?i)(\#[{}]*?|{})(:)'.format(title_char, 

1463 '|'.join(parser_functions_i)), 

1464 bygroups(Name.Function, Punctuation), ('#pop', 'template-inner') 

1465 ), 

1466 ( 

1467 r'({})(:)'.format('|'.join(parser_functions)), 

1468 bygroups(Name.Function, Punctuation), ('#pop', 'template-inner') 

1469 ), 

1470 # Templates 

1471 ( 

1472 rf'(?i)([{title_char}]*?)(:)', 

1473 bygroups(Name.Namespace, Punctuation), ('#pop', 'template-name') 

1474 ), 

1475 default(('#pop', 'template-name'),), 

1476 ], 

1477 'template-name': [ 

1478 (r'(\s*?)(\|)', bygroups(Text, Punctuation), ('#pop', 'template-inner')), 

1479 (r'\}\}', Punctuation, '#pop'), 

1480 (r'\n', Text, '#pop'), 

1481 include('replaceable'), 

1482 *text_rules(Name.Tag), 

1483 ], 

1484 'template-inner': [ 

1485 (r'\}\}', Punctuation, '#pop'), 

1486 (r'\|', Punctuation), 

1487 ( 

1488 r"""(?x) 

1489 (?<=\|) 

1490 ( (?: (?! \{\{ | \}\} )[^=\|<])*? ) # Exclude templates and tags 

1491 (=) 

1492 """, 

1493 bygroups(Name.Label, Operator) 

1494 ), 

1495 include('inline'), 

1496 include('text'), 

1497 ], 

1498 'table': [ 

1499 # Use [ \t\n\r\0\x0B] instead of \s to follow PHP trim() behavior 

1500 # Endings 

1501 (r'^([ \t\n\r\0\x0B]*?)(\|\})', 

1502 bygroups(Whitespace, Punctuation), '#pop'), 

1503 # Table rows 

1504 (r'^([ \t\n\r\0\x0B]*?)(\|-+)(.*)$', bygroups(Whitespace, Punctuation, 

1505 using(this, state=['root', 'attr']))), 

1506 # Captions 

1507 ( 

1508 r"""(?x) 

1509 ^([ \t\n\r\0\x0B]*?)(\|\+) 

1510 # Exclude links, template and tags 

1511 (?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|) )? 

1512 (.*?)$ 

1513 """, 

1514 bygroups(Whitespace, Punctuation, using(this, state=[ 

1515 'root', 'attr']), Punctuation, Generic.Heading), 

1516 ), 

1517 # Table data 

1518 ( 

1519 r"""(?x) 

1520 ( ^(?:[ \t\n\r\0\x0B]*?)\| | \|\| ) 

1521 (?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|)(?!\|) )? 

1522 """, 

1523 bygroups(Punctuation, using(this, state=[ 

1524 'root', 'attr']), Punctuation), 

1525 ), 

1526 # Table headers 

1527 ( 

1528 r"""(?x) 

1529 ( ^(?:[ \t\n\r\0\x0B]*?)! ) 

1530 (?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|)(?!\|) )? 

1531 """, 

1532 bygroups(Punctuation, using(this, state=[ 

1533 'root', 'attr']), Punctuation), 

1534 'table-header', 

1535 ), 

1536 include('list'), 

1537 include('inline'), 

1538 include('text'), 

1539 ], 

1540 'table-header': [ 

1541 # Requires another state for || handling inside headers 

1542 (r'\n', Text, '#pop'), 

1543 ( 

1544 r"""(?x) 

1545 (!!|\|\|) 

1546 (?: 

1547 ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? ) 

1548 (\|)(?!\|) 

1549 )? 

1550 """, 

1551 bygroups(Punctuation, using(this, state=[ 

1552 'root', 'attr']), Punctuation) 

1553 ), 

1554 *text_rules(Generic.Subheading), 

1555 ], 

1556 'entity': [ 

1557 (r'&\S*?;', Name.Entity), 

1558 ], 

1559 'dt': [ 

1560 (r'\n', Text, '#pop'), 

1561 include('inline'), 

1562 (r':', Keyword, '#pop'), 

1563 include('text'), 

1564 ], 

1565 'extlink-inner': [ 

1566 (r'\]', Punctuation, '#pop'), 

1567 include('inline'), 

1568 include('text'), 

1569 ], 

1570 'nowiki-ish': [ 

1571 include('entity'), 

1572 include('text'), 

1573 ], 

1574 'attr': [ 

1575 include('replaceable'), 

1576 (r'\s+', Whitespace), 

1577 (r'(=)(\s*)(")', bygroups(Operator, Whitespace, String.Double), 'attr-val-2'), 

1578 (r"(=)(\s*)(')", bygroups(Operator, Whitespace, String.Single), 'attr-val-1'), 

1579 (r'(=)(\s*)', bygroups(Operator, Whitespace), 'attr-val-0'), 

1580 (r'[\w:-]+', Name.Attribute), 

1581 

1582 ], 

1583 'attr-val-0': [ 

1584 (r'\s', Whitespace, '#pop'), 

1585 include('replaceable'), 

1586 *text_rules(String), 

1587 ], 

1588 'attr-val-1': [ 

1589 (r"'", String.Single, '#pop'), 

1590 include('replaceable'), 

1591 *text_rules(String.Single), 

1592 ], 

1593 'attr-val-2': [ 

1594 (r'"', String.Double, '#pop'), 

1595 include('replaceable'), 

1596 *text_rules(String.Double), 

1597 ], 

1598 'tag-inner-ordinary': [ 

1599 (r'/?\s*>', Punctuation, '#pop'), 

1600 include('tag-attr'), 

1601 ], 

1602 'tag-inner': [ 

1603 # Return to root state for self-closing tags 

1604 (r'/\s*>', Punctuation, '#pop:2'), 

1605 (r'\s*>', Punctuation, '#pop'), 

1606 include('tag-attr'), 

1607 ], 

1608 # There states below are just like their non-tag variants, the key difference is 

1609 # they forcibly quit when encountering tag closing markup 

1610 'tag-attr': [ 

1611 include('replaceable'), 

1612 (r'\s+', Whitespace), 

1613 (r'(=)(\s*)(")', bygroups(Operator, 

1614 Whitespace, String.Double), 'tag-attr-val-2'), 

1615 (r"(=)(\s*)(')", bygroups(Operator, 

1616 Whitespace, String.Single), 'tag-attr-val-1'), 

1617 (r'(=)(\s*)', bygroups(Operator, Whitespace), 'tag-attr-val-0'), 

1618 (r'[\w:-]+', Name.Attribute), 

1619 

1620 ], 

1621 'tag-attr-val-0': [ 

1622 (r'\s', Whitespace, '#pop'), 

1623 (r'/?>', Punctuation, '#pop:2'), 

1624 include('replaceable'), 

1625 *text_rules(String), 

1626 ], 

1627 'tag-attr-val-1': [ 

1628 (r"'", String.Single, '#pop'), 

1629 (r'/?>', Punctuation, '#pop:2'), 

1630 include('replaceable'), 

1631 *text_rules(String.Single), 

1632 ], 

1633 'tag-attr-val-2': [ 

1634 (r'"', String.Double, '#pop'), 

1635 (r'/?>', Punctuation, '#pop:2'), 

1636 include('replaceable'), 

1637 *text_rules(String.Double), 

1638 ], 

1639 'tag-nowiki': nowiki_tag_rules('nowiki'), 

1640 'tag-pre': nowiki_tag_rules('pre'), 

1641 'tag-categorytree': plaintext_tag_rules('categorytree'), 

1642 'tag-dynamicpagelist': plaintext_tag_rules('dynamicpagelist'), 

1643 'tag-hiero': plaintext_tag_rules('hiero'), 

1644 'tag-inputbox': plaintext_tag_rules('inputbox'), 

1645 'tag-imagemap': plaintext_tag_rules('imagemap'), 

1646 'tag-charinsert': plaintext_tag_rules('charinsert'), 

1647 'tag-timeline': plaintext_tag_rules('timeline'), 

1648 'tag-gallery': plaintext_tag_rules('gallery'), 

1649 'tag-graph': plaintext_tag_rules('graph'), 

1650 'tag-rss': plaintext_tag_rules('rss'), 

1651 'tag-math': delegate_tag_rules('math', TexLexer, state='math'), 

1652 'tag-chem': delegate_tag_rules('chem', TexLexer, state='math'), 

1653 'tag-ce': delegate_tag_rules('ce', TexLexer, state='math'), 

1654 'tag-templatedata': delegate_tag_rules('templatedata', JsonLexer), 

1655 'text-italic': text_rules(Generic.Emph), 

1656 'text-bold': text_rules(Generic.Strong), 

1657 'text-bold-italic': text_rules(Generic.EmphStrong), 

1658 'text': text_rules(Text), 

1659 }