Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pygments/lexers/markup.py: 56%

1"""

2 pygments.lexers.markup

3 ~~~~~~~~~~~~~~~~~~~~~~

5 Lexers for non-HTML markup languages.

8 :license: BSD, see LICENSE for details.

9"""

11import re

13from pygments.lexers.html import XmlLexer

14from pygments.lexers.javascript import JavascriptLexer

15from pygments.lexers.css import CssLexer

16from pygments.lexers.lilypond import LilyPondLexer

17from pygments.lexers.data import JsonLexer

19from pygments.lexer import RegexLexer, DelegatingLexer, include, bygroups, \

20 using, this, do_insertions, default, words

21from pygments.token import Text, Comment, Operator, Keyword, Name, String, \

22 Number, Punctuation, Generic, Other, Whitespace, Literal

23from pygments.util import get_bool_opt, ClassNotFound

25__all__ = ['BBCodeLexer', 'MoinWikiLexer', 'RstLexer', 'TexLexer', 'GroffLexer',

26 'MozPreprocHashLexer', 'MozPreprocPercentLexer',

27 'MozPreprocXulLexer', 'MozPreprocJavascriptLexer',

28 'MozPreprocCssLexer', 'MarkdownLexer', 'OrgLexer', 'TiddlyWiki5Lexer',

29 'WikitextLexer']

32class BBCodeLexer(RegexLexer):

33 """

34 A lexer that highlights BBCode(-like) syntax.

35 """

37 name = 'BBCode'

38 aliases = ['bbcode']

39 mimetypes = ['text/x-bbcode']

40 url = 'https://www.bbcode.org/'

41 version_added = '0.6'

43 tokens = {

44 'root': [

45 (r'[^[]+', Text),

46 # tag/end tag begin

47 (r'\[/?\w+', Keyword, 'tag'),

48 # stray bracket

49 (r'\[', Text),

50 ],

51 'tag': [

52 (r'\s+', Text),

53 # attribute with value

54 (r'(\w+)(=)("?[^\s"\]]+"?)',

55 bygroups(Name.Attribute, Operator, String)),

56 # tag argument (a la [color=green])

57 (r'(=)("?[^\s"\]]+"?)',

58 bygroups(Operator, String)),

59 # tag end

60 (r'\]', Keyword, '#pop'),

61 ],

62 }

65class MoinWikiLexer(RegexLexer):

66 """

67 For MoinMoin (and Trac) Wiki markup.

68 """

70 name = 'MoinMoin/Trac Wiki markup'

71 aliases = ['trac-wiki', 'moin']

72 filenames = []

73 mimetypes = ['text/x-trac-wiki']

74 url = 'https://moinmo.in'

75 version_added = '0.7'

77 flags = re.MULTILINE | re.IGNORECASE

79 tokens = {

80 'root': [

81 (r'^#.*$', Comment),

82 (r'(!)(\S+)', bygroups(Keyword, Text)), # Ignore-next

83 # Titles

84 (r'^(=+)([^=]+)(=+)(\s*#.+)?$',

85 bygroups(Generic.Heading, using(this), Generic.Heading, String)),

86 # Literal code blocks, with optional shebang

87 (r'(\{\{\{)(\n#!.+)?', bygroups(Name.Builtin, Name.Namespace), 'codeblock'),

88 (r'(\'\'\'?|\|\||`|__|~~|\^|,,|::)', Comment), # Formatting

89 # Lists

90 (r'^( +)([.*-])( )', bygroups(Text, Name.Builtin, Text)),

91 (r'^( +)([a-z]{1,5}\.)( )', bygroups(Text, Name.Builtin, Text)),

92 # Other Formatting

93 (r'\[\[\w+.*?\]\]', Keyword), # Macro

94 (r'(\[[^\s\]]+)(\s+[^\]]+?)?(\])',

95 bygroups(Keyword, String, Keyword)), # Link

96 (r'^----+$', Keyword), # Horizontal rules

97 (r'[^\n\'\[{!_~^,|]+', Text),

98 (r'\n', Text),

99 (r'.', Text),

100 ],

101 'codeblock': [

102 (r'\}\}\}', Name.Builtin, '#pop'),

103 # these blocks are allowed to be nested in Trac, but not MoinMoin

104 (r'\{\{\{', Text, '#push'),

105 (r'[^{}]+', Comment.Preproc), # slurp boring text

106 (r'.', Comment.Preproc), # allow loose { or }

107 ],

108 }

109

110

111class RstLexer(RegexLexer):

112 """

113 For reStructuredText markup.

114

115 Additional options accepted:

116

117 `handlecodeblocks`

118 Highlight the contents of ``.. sourcecode:: language``,

119 ``.. code:: language`` and ``.. code-block:: language``

120 directives with a lexer for the given language (default:

121 ``True``).

122

123 .. versionadded:: 0.8

124 """

125 name = 'reStructuredText'

126 url = 'https://docutils.sourceforge.io/rst.html'

127 aliases = ['restructuredtext', 'rst', 'rest']

128 filenames = ['*.rst', '*.rest']

129 mimetypes = ["text/x-rst", "text/prs.fallenstein.rst"]

130 version_added = '0.7'

131 flags = re.MULTILINE

132

133 def _handle_sourcecode(self, match):

134 from pygments.lexers import get_lexer_by_name

135

136 # section header

137 yield match.start(1), Punctuation, match.group(1)

138 yield match.start(2), Text, match.group(2)

139 yield match.start(3), Operator.Word, match.group(3)

140 yield match.start(4), Punctuation, match.group(4)

141 yield match.start(5), Text, match.group(5)

142 yield match.start(6), Keyword, match.group(6)

143 yield match.start(7), Text, match.group(7)

144

145 # lookup lexer if wanted and existing

146 lexer = None

147 if self.handlecodeblocks:

148 try:

149 lexer = get_lexer_by_name(match.group(6).strip())

150 except ClassNotFound:

151 pass

152 indention = match.group(8)

153 indention_size = len(indention)

154 code = (indention + match.group(9) + match.group(10) + match.group(11))

155

156 # no lexer for this language. handle it like it was a code block

157 if lexer is None:

158 yield match.start(8), String, code

159 return

160

161 # highlight the lines with the lexer.

162 ins = []

163 codelines = code.splitlines(True)

164 code = ''

165 for line in codelines:

166 if len(line) > indention_size:

167 ins.append((len(code), [(0, Text, line[:indention_size])]))

168 code += line[indention_size:]

169 else:

170 code += line

171 yield from do_insertions(ins, lexer.get_tokens_unprocessed(code))

172

173 # from docutils.parsers.rst.states

174 closers = '\'")]}>\u2019\u201d\xbb!?'

175 unicode_delimiters = '\u2010\u2011\u2012\u2013\u2014\u00a0'

176 end_string_suffix = (rf'((?=$)|(?=[-/:.,; \n\x00{re.escape(unicode_delimiters)}{re.escape(closers)}]))')

177

178 tokens = {

179 'root': [

180 # Heading with overline

181 (r'^(=+|-+|`+|:+|\.+|\'+|"+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)'

182 r'(.+)(\n)(\1)(\n)',

183 bygroups(Generic.Heading, Text, Generic.Heading,

184 Text, Generic.Heading, Text)),

185 # Plain heading

186 (r'^(\S.*)(\n)(={3,}|-{3,}|`{3,}|:{3,}|\.{3,}|\'{3,}|"{3,}|'

187 r'~{3,}|\^{3,}|_{3,}|\*{3,}|\+{3,}|#{3,})(\n)',

188 bygroups(Generic.Heading, Text, Generic.Heading, Text)),

189 # Bulleted lists

190 (r'^(\s*)([-*+])( .+\n(?:\1 .+\n)*)',

191 bygroups(Text, Number, using(this, state='inline'))),

192 # Numbered lists

193 (r'^(\s*)([0-9#ivxlcmIVXLCM]+\.)( .+\n(?:\1 .+\n)*)',

194 bygroups(Text, Number, using(this, state='inline'))),

195 (r'^(\s*)($?[0-9#ivxlcmIVXLCM]+$)( .+\n(?:\1 .+\n)*)',

196 bygroups(Text, Number, using(this, state='inline'))),

197 # Numbered, but keep words at BOL from becoming lists

198 (r'^(\s*)([A-Z]+\.)( .+\n(?:\1 .+\n)+)',

199 bygroups(Text, Number, using(this, state='inline'))),

200 (r'^(\s*)($?[A-Za-z]+$)( .+\n(?:\1 .+\n)+)',

201 bygroups(Text, Number, using(this, state='inline'))),

202 # Line blocks

203 (r'^(\s*)(\|)( .+\n(?:\| .+\n)*)',

204 bygroups(Text, Operator, using(this, state='inline'))),

205 # Sourcecode directives

206 (r'^( *\.\.)(\s*)((?:source)?code(?:-block)?)(::)([ \t]*)([^\n]+)'

207 r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*)?\n)+)',

208 _handle_sourcecode),

209 # A directive

210 (r'^( *\.\.)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))',

211 bygroups(Punctuation, Text, Operator.Word, Punctuation, Text,

212 using(this, state='inline'))),

213 # A reference target

214 (r'^( *\.\.)(\s*)(_(?:[^:\\]|\\.)+:)(.*?)$',

215 bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),

216 # A footnote/citation target

217 (r'^( *\.\.)(\s*)(\[.+\])(.*?)$',

218 bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),

219 # A substitution def

220 (r'^( *\.\.)(\s*)(\|.+\|)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))',

221 bygroups(Punctuation, Text, Name.Tag, Text, Operator.Word,

222 Punctuation, Text, using(this, state='inline'))),

223 # Comments

224 (r'^ *\.\..*(\n( +.*\n|\n)+)?', Comment),

225 # Field list marker

226 (r'^( *)(:(?:\\\\|\\:|[^:\n])+:(?=\s))([ \t]*)',

227 bygroups(Text, Name.Class, Text)),

228 # Definition list

229 (r'^(\S.*(?<!::)\n)((?:(?: +.*)\n)+)',

230 bygroups(using(this, state='inline'), using(this, state='inline'))),

231 # Code blocks

232 (r'(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*)?\n)+)',

233 bygroups(String.Escape, Text, String, String, Text, String)),

234 include('inline'),

235 ],

236 'inline': [

237 (r'\\.', Text), # escape

238 (r'``', String, 'literal'), # code

239 (r'(`.+?)(<.+?>)(`__?)', # reference with inline target

240 bygroups(String, String.Interpol, String)),

241 (r'`.+?`__?', String), # reference

242 (r'(`.+?`)(:[a-zA-Z0-9:-]+?:)?',

243 bygroups(Name.Variable, Name.Attribute)), # role

244 (r'(:[a-zA-Z0-9:-]+?:)(`.+?`)',

245 bygroups(Name.Attribute, Name.Variable)), # role (content first)

246 (r'\*\*.+?\*\*', Generic.Strong), # Strong emphasis

247 (r'\*.+?\*', Generic.Emph), # Emphasis

248 (r'\[.*?\]_', String), # Footnote or citation

249 (r'<.+?>', Name.Tag), # Hyperlink

250 (r'[^\\\n\[*`:]+', Text),

251 (r'.', Text),

252 ],

253 'literal': [

254 (r'[^`]+', String),

255 (r'``' + end_string_suffix, String, '#pop'),

256 (r'`', String),

257 ]

258 }

259

260 def __init__(self, **options):

261 self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)

262 RegexLexer.__init__(self, **options)

263

264 def analyse_text(text):

265 if text[:2] == '..' and text[2:3] != '.':

266 return 0.3

267 p1 = text.find("\n")

268 p2 = text.find("\n", p1 + 1)

269 if (p2 > -1 and # has two lines

270 p1 * 2 + 1 == p2 and # they are the same length

271 text[p1+1] in '-=' and # the next line both starts and ends with

272 text[p1+1] == text[p2-1]): # ...a sufficiently high header

273 return 0.5

274

275

276class TexLexer(RegexLexer):

277 """

278 Lexer for the TeX and LaTeX typesetting languages.

279 """

280

281 name = 'TeX'

282 aliases = ['tex', 'latex']

283 filenames = ['*.tex', '*.aux', '*.toc']

284 mimetypes = ['text/x-tex', 'text/x-latex']

285 url = 'https://tug.org'

286 version_added = ''

287

288 tokens = {

289 'general': [

290 (r'%.*?\n', Comment),

291 (r'[{}]', Name.Builtin),

292 (r'[&_^]', Name.Builtin),

293 ],

294 'root': [

295 (r'\\\[', String.Backtick, 'displaymath'),

296 (r'\\\(', String, 'inlinemath'),

297 (r'\$\$', String.Backtick, 'displaymath'),

298 (r'\$', String, 'inlinemath'),

299 (r'\\([a-zA-Z@_:]+|\S?)', Keyword, 'command'),

300 (r'\\$', Keyword),

301 include('general'),

302 (r'[^\\$%&_^{}]+', Text),

303 ],

304 'math': [

305 (r'\\([a-zA-Z]+|\S?)', Name.Variable),

306 include('general'),

307 (r'[0-9]+', Number),

308 (r'[-=!+*/()\[\]]', Operator),

309 (r'[^=!+*/()\[\]\\$%&_^{}0-9-]+', Name.Builtin),

310 ],

311 'inlinemath': [

312 (r'\\\)', String, '#pop'),

313 (r'\$', String, '#pop'),

314 include('math'),

315 ],

316 'displaymath': [

317 (r'\\\]', String, '#pop'),

318 (r'\$\$', String, '#pop'),

319 (r'\$', Name.Builtin),

320 include('math'),

321 ],

322 'command': [

323 (r'\[.*?\]', Name.Attribute),

324 (r'\*', Keyword),

325 default('#pop'),

326 ],

327 }

328

329 def analyse_text(text):

330 for start in ("\\documentclass", "\\input", "\\documentstyle",

331 "\\relax"):

332 if text[:len(start)] == start:

333 return True

334

335

336class GroffLexer(RegexLexer):

337 """

338 Lexer for the (g)roff typesetting language, supporting groff

339 extensions. Mainly useful for highlighting manpage sources.

340 """

341

342 name = 'Groff'

343 aliases = ['groff', 'nroff', 'man']

344 filenames = ['*.[1-9]', '*.man', '*.1p', '*.3pm']

345 mimetypes = ['application/x-troff', 'text/troff']

346 url = 'https://www.gnu.org/software/groff'

347 version_added = '0.6'

348

349 tokens = {

350 'root': [

351 (r'(\.)(\w+)', bygroups(Text, Keyword), 'request'),

352 (r'\.', Punctuation, 'request'),

353 # Regular characters, slurp till we find a backslash or newline

354 (r'[^\\\n]+', Text, 'textline'),

355 default('textline'),

356 ],

357 'textline': [

358 include('escapes'),

359 (r'[^\\\n]+', Text),

360 (r'\n', Text, '#pop'),

361 ],

362 'escapes': [

363 # groff has many ways to write escapes.

364 (r'\\"[^\n]*', Comment),

365 (r'\\[fn]\w', String.Escape),

366 (r'\\\(.{2}', String.Escape),

367 (r'\\.\[.*\]', String.Escape),

368 (r'\\.', String.Escape),

369 (r'\\\n', Text, 'request'),

370 ],

371 'request': [

372 (r'\n', Text, '#pop'),

373 include('escapes'),

374 (r'"[^\n"]+"', String.Double),

375 (r'\d+', Number),

376 (r'\S+', String),

377 (r'\s+', Text),

378 ],

379 }

380

381 def analyse_text(text):

382 if text[:1] != '.':

383 return False

384 if text[:3] == '.\\"':

385 return True

386 if text[:4] == '.TH ':

387 return True

388 if text[1:3].isalnum() and text[3].isspace():

389 return 0.9

390

391

392class MozPreprocHashLexer(RegexLexer):

393 """

394 Lexer for Mozilla Preprocessor files (with '#' as the marker).

395

396 Other data is left untouched.

397 """

398 name = 'mozhashpreproc'

399 aliases = [name]

400 filenames = []

401 mimetypes = []

402 url = 'https://firefox-source-docs.mozilla.org/build/buildsystem/preprocessor.html'

403 version_added = '2.0'

404

405 tokens = {

406 'root': [

407 (r'^#', Comment.Preproc, ('expr', 'exprstart')),

408 (r'.+', Other),

409 ],

410 'exprstart': [

411 (r'(literal)(.*)', bygroups(Comment.Preproc, Text), '#pop:2'),

412 (words((

413 'define', 'undef', 'if', 'ifdef', 'ifndef', 'else', 'elif',

414 'elifdef', 'elifndef', 'endif', 'expand', 'filter', 'unfilter',

415 'include', 'includesubst', 'error')),

416 Comment.Preproc, '#pop'),

417 ],

418 'expr': [

419 (words(('!', '!=', '==', '&&', '||')), Operator),

420 (r'(defined)(\()', bygroups(Keyword, Punctuation)),

421 (r'\)', Punctuation),

422 (r'[0-9]+', Number.Decimal),

423 (r'__\w+?__', Name.Variable),

424 (r'@\w+?@', Name.Class),

425 (r'\w+', Name),

426 (r'\n', Text, '#pop'),

427 (r'\s+', Text),

428 (r'\S', Punctuation),

429 ],

430 }

431

432

433class MozPreprocPercentLexer(MozPreprocHashLexer):

434 """

435 Lexer for Mozilla Preprocessor files (with '%' as the marker).

436

437 Other data is left untouched.

438 """

439 name = 'mozpercentpreproc'

440 aliases = [name]

441 filenames = []

442 mimetypes = []

443 url = 'https://firefox-source-docs.mozilla.org/build/buildsystem/preprocessor.html'

444 version_added = '2.0'

445

446 tokens = {

447 'root': [

448 (r'^%', Comment.Preproc, ('expr', 'exprstart')),

449 (r'.+', Other),

450 ],

451 }

452

453

454class MozPreprocXulLexer(DelegatingLexer):

455 """

456 Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the

457 `XmlLexer`.

458 """

459 name = "XUL+mozpreproc"

460 aliases = ['xul+mozpreproc']

461 filenames = ['*.xul.in']

462 mimetypes = []

463 url = 'https://firefox-source-docs.mozilla.org/build/buildsystem/preprocessor.html'

464 version_added = '2.0'

465

466 def __init__(self, **options):

467 super().__init__(XmlLexer, MozPreprocHashLexer, **options)

468

469

470class MozPreprocJavascriptLexer(DelegatingLexer):

471 """

472 Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the

473 `JavascriptLexer`.

474 """

475 name = "Javascript+mozpreproc"

476 aliases = ['javascript+mozpreproc']

477 filenames = ['*.js.in']

478 mimetypes = []

479 url = 'https://firefox-source-docs.mozilla.org/build/buildsystem/preprocessor.html'

480 version_added = '2.0'

481

482 def __init__(self, **options):

483 super().__init__(JavascriptLexer, MozPreprocHashLexer, **options)

484

485

486class MozPreprocCssLexer(DelegatingLexer):

487 """

488 Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the

489 `CssLexer`.

490 """

491 name = "CSS+mozpreproc"

492 aliases = ['css+mozpreproc']

493 filenames = ['*.css.in']

494 mimetypes = []

495 url = 'https://firefox-source-docs.mozilla.org/build/buildsystem/preprocessor.html'

496 version_added = '2.0'

497

498 def __init__(self, **options):

499 super().__init__(CssLexer, MozPreprocPercentLexer, **options)

500

501

502class MarkdownLexer(RegexLexer):

503 """

504 For Markdown markup.

505 """

506 name = 'Markdown'

507 url = 'https://daringfireball.net/projects/markdown/'

508 aliases = ['markdown', 'md']

509 filenames = ['*.md', '*.markdown']

510 mimetypes = ["text/x-markdown"]

511 version_added = '2.2'

512 flags = re.MULTILINE

513

514 def _handle_codeblock(self, match):

515 from pygments.lexers import get_lexer_by_name

516

517 yield match.start('initial'), String.Backtick, match.group('initial')

518 yield match.start('lang'), String.Backtick, match.group('lang')

519 if match.group('afterlang') is not None:

520 yield match.start('whitespace'), Whitespace, match.group('whitespace')

521 yield match.start('extra'), Text, match.group('extra')

522 yield match.start('newline'), Whitespace, match.group('newline')

523

524 # lookup lexer if wanted and existing

525 lexer = None

526 if self.handlecodeblocks:

527 try:

528 lexer = get_lexer_by_name(match.group('lang').strip())

529 except ClassNotFound:

530 pass

531 code = match.group('code')

532 # no lexer for this language. handle it like it was a code block

533 if lexer is None:

534 yield match.start('code'), String, code

535 else:

536 # FIXME: aren't the offsets wrong?

537 yield from do_insertions([], lexer.get_tokens_unprocessed(code))

538

539 yield match.start('terminator'), String.Backtick, match.group('terminator')

540

541 tokens = {

542 'root': [

543 # heading with '#' prefix (atx-style)

544 (r'(^#[^#].+)(\n)', bygroups(Generic.Heading, Text)),

545 # subheading with '#' prefix (atx-style)

546 (r'(^#{2,6}[^#].+)(\n)', bygroups(Generic.Subheading, Text)),

547 # heading with '=' underlines (Setext-style)

548 (r'^(.+)(\n)(=+)(\n)', bygroups(Generic.Heading, Text, Generic.Heading, Text)),

549 # subheading with '-' underlines (Setext-style)

550 (r'^(.+)(\n)(-+)(\n)', bygroups(Generic.Subheading, Text, Generic.Subheading, Text)),

551 # task list

552 (r'^(\s*)([*-] )(\[[ xX]\])( .+\n)',

553 bygroups(Whitespace, Keyword, Keyword, using(this, state='inline'))),

554 # bulleted list

555 (r'^(\s*)([*-])(\s)(.+\n)',

556 bygroups(Whitespace, Keyword, Whitespace, using(this, state='inline'))),

557 # numbered list

558 (r'^(\s*)([0-9]+\.)( .+\n)',

559 bygroups(Whitespace, Keyword, using(this, state='inline'))),

560 # quote

561 (r'^(\s*>\s)(.+\n)', bygroups(Keyword, Generic.Emph)),

562 # code block fenced by 3 backticks

563 (r'^(\s*```\n[\w\W]*?^\s*```$\n)', String.Backtick),

564 # code block with language

565 # Some tools include extra stuff after the language name, just

566 # highlight that as text. For example: https://docs.enola.dev/use/execmd

567 (r'''(?x)

568 ^(?P<initial>\s*```)

569 (?P<lang>[\w\-]+)

570 (?P<afterlang>

571 (?P<whitespace>[^\S\n]+)

572 (?P<extra>.*))?

573 (?P<newline>\n)

574 (?P<code>(.|\n)*?)

575 (?P<terminator>^\s*```$\n)

576 ''',

577 _handle_codeblock),

578

579 include('inline'),

580 ],

581 'inline': [

582 # escape

583 (r'\\.', Text),

584 # inline code

585 (r'([^`]?)(`[^`\n]+`)', bygroups(Text, String.Backtick)),

586 # warning: the following rules eat outer tags.

587 # eg. **foo _bar_ baz** => foo and baz are not recognized as bold

588 # bold fenced by '**'

589 (r'([^\*]?)(\*\*[^* \n][^*\n]*\*\*)', bygroups(Text, Generic.Strong)),

590 # bold fenced by '__'

591 (r'([^_]?)(__[^_ \n][^_\n]*__)', bygroups(Text, Generic.Strong)),

592 # italics fenced by '*'

593 (r'([^\*]?)(\*[^* \n][^*\n]*\*)', bygroups(Text, Generic.Emph)),

594 # italics fenced by '_'

595 (r'([^_]?)(_[^_ \n][^_\n]*_)', bygroups(Text, Generic.Emph)),

596 # strikethrough

597 (r'([^~]?)(~~[^~ \n][^~\n]*~~)', bygroups(Text, Generic.Deleted)),

598 # mentions and topics (twitter and github stuff)

599 (r'[@#][\w/:]+', Name.Entity),

600 # (image?) links eg: ![Image of Yaktocat](https://octodex.github.com/images/yaktocat.png)

601 (r'(!?\[)([^]]+)(\])($)([^)]+)($)',

602 bygroups(Text, Name.Tag, Text, Text, Name.Attribute, Text)),

603 # reference-style links, e.g.:

604 # [an example][id]

605 # [id]: http://example.com/

606 (r'(\[)([^]]+)(\])(\[)([^]]*)(\])',

607 bygroups(Text, Name.Tag, Text, Text, Name.Label, Text)),

608 (r'^(\s*\[)([^]]*)(\]:\s*)(.+)',

609 bygroups(Text, Name.Label, Text, Name.Attribute)),

610

611 # general text, must come last!

612 (r'[^\\\s]+', Text),

613 (r'.', Text),

614 ],

615 }

616

617 def __init__(self, **options):

618 self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)

619 RegexLexer.__init__(self, **options)

620

621class OrgLexer(RegexLexer):

622 """

623 For Org Mode markup.

624 """

625 name = 'Org Mode'

626 url = 'https://orgmode.org'

627 aliases = ['org', 'orgmode', 'org-mode']

628 filenames = ['*.org']

629 mimetypes = ["text/org"]

630 version_added = '2.18'

631

632 def _inline(start, end):

633 return rf'(?<!\w){start}(.|\n(?!\n))+?{end}(?!\w)'

634

635 tokens = {

636 'root': [

637 (r'^# .*', Comment.Single),

638

639 # Headings

640 (r'^(\* )(COMMENT)( .*)',

641 bygroups(Generic.Heading, Comment.Preproc, Generic.Heading)),

642 (r'^(\*\*+ )(COMMENT)( .*)',

643 bygroups(Generic.Subheading, Comment.Preproc, Generic.Subheading)),

644 (r'^(\* )(DONE)( .*)',

645 bygroups(Generic.Heading, Generic.Deleted, Generic.Heading)),

646 (r'^(\*\*+ )(DONE)( .*)',

647 bygroups(Generic.Subheading, Generic.Deleted, Generic.Subheading)),

648 (r'^(\* )(TODO)( .*)',

649 bygroups(Generic.Heading, Generic.Error, Generic.Heading)),

650 (r'^(\*\*+ )(TODO)( .*)',

651 bygroups(Generic.Subheading, Generic.Error, Generic.Subheading)),

652

653 (r'^(\* .+?)( :[a-zA-Z0-9_@:]+:)?$', bygroups(Generic.Heading, Generic.Emph)),

654 (r'^(\*\*+ .+?)( :[a-zA-Z0-9_@:]+:)?$', bygroups(Generic.Subheading, Generic.Emph)),

655

656 # Unordered lists items, including TODO items and description items

657 (r'^(?:( *)([+-] )|( +)(\* ))(\[[ X-]\])?(.+ ::)?',

658 bygroups(Whitespace, Keyword, Whitespace, Keyword, Generic.Prompt, Name.Label)),

659

660 # Ordered list items

661 (r'^( *)([0-9]+[.)])( \[@[0-9]+\])?', bygroups(Whitespace, Keyword, Generic.Emph)),

662

663 # Dynamic blocks

664 (r'(?i)^( *#\+begin: *)((?:.|\n)*?)(^ *#\+end: *$)',

665 bygroups(Operator.Word, using(this), Operator.Word)),

666

667 # Comment blocks

668 (r'(?i)^( *#\+begin_comment *\n)((?:.|\n)*?)(^ *#\+end_comment *$)',

669 bygroups(Operator.Word, Comment.Multiline, Operator.Word)),

670

671 # Source code blocks

672 # TODO: language-dependent syntax highlighting (see Markdown lexer)

673 (r'(?i)^( *#\+begin_src .*)((?:.|\n)*?)(^ *#\+end_src *$)',

674 bygroups(Operator.Word, Text, Operator.Word)),

675

676 # Other blocks

677 (r'(?i)^( *#\+begin_\w+)( *\n)((?:.|\n)*?)(^ *#\+end_\w+)( *$)',

678 bygroups(Operator.Word, Whitespace, Text, Operator.Word, Whitespace)),

679

680 # Keywords

681 (r'^(#\+\w+:)(.*)$', bygroups(Name.Namespace, Text)),

682

683 # Properties and drawers

684 (r'(?i)^( *:\w+: *\n)((?:.|\n)*?)(^ *:end: *$)',

685 bygroups(Name.Decorator, Comment.Special, Name.Decorator)),

686

687 # Line break operator

688 (r'\\\\$', Operator),

689

690 (r'^\s*CLOSED:\s+', Generic.Deleted, 'dateline'),

691 (r'^\s*(?:DEADLINE:|SCHEDULED:)\s+', Generic.Error, 'dateline'),

692

693 # Bold

694 (_inline(r'\*', r'\*+'), Generic.Strong),

695 # Italic

696 (_inline(r'/', r'/'), Generic.Emph),

697 # Verbatim

698 (_inline(r'=', r'='), String), # TODO token

699 # Code

700 (_inline(r'~', r'~'), String),

701 # Strikethrough

702 (_inline(r'\+', r'\+'), Generic.Deleted),

703 # Underline

704 (_inline(r'_', r'_+'), Generic.EmphStrong),

705

706 # Dates

707 (r'<.+?>', Literal.Date),

708 # Macros

709 (r'\{\{\{.+?\}\}\}', Comment.Preproc),

710 # Footnotes

711 (r'(?<!\[)\[fn:.+?\]', Name.Tag),

712 # Links

713 (r'(?s)(\[\[)(.*?)(\]\[)(.*?)(\]\])',

714 bygroups(Punctuation, Name.Attribute, Punctuation, Name.Tag, Punctuation)),

715 (r'(?s)(\[\[)(.+?)(\]\])', bygroups(Punctuation, Name.Attribute, Punctuation)),

716 (r'(<<)(.+?)(>>)', bygroups(Punctuation, Name.Attribute, Punctuation)),

717

718 # Tables

719 (r'^( *)(\|[ -].*?[ -]\|)$', bygroups(Whitespace, String)),

720

721 # Any other text

722 (r'[^#*+\-0-9:\\/=~_<{\[|\n]+', Text),

723 (r'[#*+\-0-9:\\/=~_<{\[|\n]', Text),

724 ],

725 'dateline': [

726 (r'\s*CLOSED:\s+', Generic.Deleted),

727 (r'\s*(?:DEADLINE:|SCHEDULED:)\s+', Generic.Error),

728 (r'\[.+?\]', Literal.Date),

729 (r'<[^>]+?>', Literal.Date),

730 (r'(\s*)$', Text, '#pop'),

731 (r'.', Text),

732 ],

733 }

734

735class TiddlyWiki5Lexer(RegexLexer):

736 """

737 For TiddlyWiki5 markup.

738 """

739 name = 'tiddler'

740 url = 'https://tiddlywiki.com/#TiddlerFiles'

741 aliases = ['tid']

742 filenames = ['*.tid']

743 mimetypes = ["text/vnd.tiddlywiki"]

744 version_added = '2.7'

745 flags = re.MULTILINE

746

747 def _handle_codeblock(self, match):

748 """

749 match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks

750 """

751 from pygments.lexers import get_lexer_by_name

752

753 # section header

754 yield match.start(1), String, match.group(1)

755 yield match.start(2), String, match.group(2)

756 yield match.start(3), Text, match.group(3)

757

758 # lookup lexer if wanted and existing

759 lexer = None

760 if self.handlecodeblocks:

761 try:

762 lexer = get_lexer_by_name(match.group(2).strip())

763 except ClassNotFound:

764 pass

765 code = match.group(4)

766

767 # no lexer for this language. handle it like it was a code block

768 if lexer is None:

769 yield match.start(4), String, code

770 return

771

772 yield from do_insertions([], lexer.get_tokens_unprocessed(code))

773

774 yield match.start(5), String, match.group(5)

775

776 def _handle_cssblock(self, match):

777 """

778 match args: 1:style tag 2:newline, 3:code, 4:closing style tag

779 """

780 from pygments.lexers import get_lexer_by_name

781

782 # section header

783 yield match.start(1), String, match.group(1)

784 yield match.start(2), String, match.group(2)

785

786 lexer = None

787 if self.handlecodeblocks:

788 try:

789 lexer = get_lexer_by_name('css')

790 except ClassNotFound:

791 pass

792 code = match.group(3)

793

794 # no lexer for this language. handle it like it was a code block

795 if lexer is None:

796 yield match.start(3), String, code

797 return

798

799 yield from do_insertions([], lexer.get_tokens_unprocessed(code))

800

801 yield match.start(4), String, match.group(4)

802

803 tokens = {

804 'root': [

805 # title in metadata section

806 (r'^(title)(:\s)(.+\n)', bygroups(Keyword, Text, Generic.Heading)),

807 # headings

808 (r'^(!)([^!].+\n)', bygroups(Generic.Heading, Text)),

809 (r'^(!{2,6})(.+\n)', bygroups(Generic.Subheading, Text)),

810 # bulleted or numbered lists or single-line block quotes

811 # (can be mixed)

812 (r'^(\s*)([*#>]+)(\s*)(.+\n)',

813 bygroups(Text, Keyword, Text, using(this, state='inline'))),

814 # multi-line block quotes

815 (r'^(<<<.*\n)([\w\W]*?)(^<<<.*$)', bygroups(String, Text, String)),

816 # table header

817 (r'^(\|.*?\|h)$', bygroups(Generic.Strong)),

818 # table footer or caption

819 (r'^(\|.*?\|[cf])$', bygroups(Generic.Emph)),

820 # table class

821 (r'^(\|.*?\|k)$', bygroups(Name.Tag)),

822 # definitions

823 (r'^(;.*)$', bygroups(Generic.Strong)),

824 # text block

825 (r'^(```\n)([\w\W]*?)(^```$)', bygroups(String, Text, String)),

826 # code block with language

827 (r'^(```)(\w+)(\n)([\w\W]*?)(^```$)', _handle_codeblock),

828 # CSS style block

829 (r'^(<style>)(\n)([\w\W]*?)(^</style>$)', _handle_cssblock),

830

831 include('keywords'),

832 include('inline'),

833 ],

834 'keywords': [

835 (words((

836 '\\define', '\\end', 'caption', 'created', 'modified', 'tags',

837 'title', 'type'), prefix=r'^', suffix=r'\b'),

838 Keyword),

839 ],

840 'inline': [

841 # escape

842 (r'\\.', Text),

843 # created or modified date

844 (r'\d{17}', Number.Integer),

845 # italics

846 (r'(\s)(//[^/]+//)((?=\W|\n))',

847 bygroups(Text, Generic.Emph, Text)),

848 # superscript

849 (r'(\s)(\^\^[^\^]+\^\^)', bygroups(Text, Generic.Emph)),

850 # subscript

851 (r'(\s)(,,[^,]+,,)', bygroups(Text, Generic.Emph)),

852 # underscore

853 (r'(\s)(__[^_]+__)', bygroups(Text, Generic.Strong)),

854 # bold

855 (r"(\s)(''[^']+'')((?=\W|\n))",

856 bygroups(Text, Generic.Strong, Text)),

857 # strikethrough

858 (r'(\s)(~~[^~]+~~)((?=\W|\n))',

859 bygroups(Text, Generic.Deleted, Text)),

860 # TiddlyWiki variables

861 (r'<<[^>]+>>', Name.Tag),

862 (r'\$\$[^$]+\$\$', Name.Tag),

863 (r'\$$[^)]+$\$', Name.Tag),

864 # TiddlyWiki style or class

865 (r'^@@.*$', Name.Tag),

866 # HTML tags

867 (r'</?[^>]+>', Name.Tag),

868 # inline code

869 (r'`[^`]+`', String.Backtick),

870 # HTML escaped symbols

871 (r'&\S*?;', String.Regex),

872 # Wiki links

873 (r'(\[{2})([^]\|]+)(\]{2})', bygroups(Text, Name.Tag, Text)),

874 # External links

875 (r'(\[{2})([^]\|]+)(\|)([^]\|]+)(\]{2})',

876 bygroups(Text, Name.Tag, Text, Name.Attribute, Text)),

877 # Transclusion

878 (r'(\{{2})([^}]+)(\}{2})', bygroups(Text, Name.Tag, Text)),

879 # URLs

880 (r'(\b.?.?tps?://[^\s"]+)', bygroups(Name.Attribute)),

881

882 # general text, must come last!

883 (r'[\w]+', Text),

884 (r'.', Text)

885 ],

886 }

887

888 def __init__(self, **options):

889 self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)

890 RegexLexer.__init__(self, **options)

891

892

893class WikitextLexer(RegexLexer):

894 """

895 For MediaWiki Wikitext.

896

897 Parsing Wikitext is tricky, and results vary between different MediaWiki

898 installations, so we only highlight common syntaxes (built-in or from

899 popular extensions), and also assume templates produce no unbalanced

900 syntaxes.

901 """

902 name = 'Wikitext'

903 url = 'https://www.mediawiki.org/wiki/Wikitext'

904 aliases = ['wikitext', 'mediawiki']

905 filenames = []

906 mimetypes = ['text/x-wiki']

907 version_added = '2.15'

908 flags = re.MULTILINE

909

910 def nowiki_tag_rules(tag_name):

911 return [

912 (rf'(?i)(</)({tag_name})(\s*)(>)', bygroups(Punctuation,

913 Name.Tag, Whitespace, Punctuation), '#pop'),

914 include('entity'),

915 include('text'),

916 ]

917

918 def plaintext_tag_rules(tag_name):

919 return [

920 (rf'(?si)(.*?)(</)({tag_name})(\s*)(>)', bygroups(Text,

921 Punctuation, Name.Tag, Whitespace, Punctuation), '#pop'),

922 ]

923

924 def delegate_tag_rules(tag_name, lexer, **lexer_kwargs):

925 return [

926 (rf'(?i)(</)({tag_name})(\s*)(>)', bygroups(Punctuation,

927 Name.Tag, Whitespace, Punctuation), '#pop'),

928 (rf'(?si).+?(?=</{tag_name}\s*>)', using(lexer, **lexer_kwargs)),

929 ]

930

931 def text_rules(token):

932 return [

933 (r'\w+', token),

934 (r'[^\S\n]+', token),

935 (r'(?s).', token),

936 ]

937

938 def handle_syntaxhighlight(self, match, ctx):

939 from pygments.lexers import get_lexer_by_name

940

941 attr_content = match.group()

942 start = 0

943 index = 0

944 while True:

945 index = attr_content.find('>', start)

946 # Exclude comment end (-->)

947 if attr_content[index-2:index] != '--':

948 break

949 start = index + 1

950

951 if index == -1:

952 # No tag end

953 yield from self.get_tokens_unprocessed(attr_content, stack=['root', 'attr'])

954 return

955 attr = attr_content[:index]

956 yield from self.get_tokens_unprocessed(attr, stack=['root', 'attr'])

957 yield match.start(3) + index, Punctuation, '>'

958

959 lexer = None

960 content = attr_content[index+1:]

961 lang_match = re.findall(r'\blang=("|\'|)(\w+)(\1)', attr)

962

963 if len(lang_match) >= 1:

964 # Pick the last match in case of multiple matches

965 lang = lang_match[-1][1]

966 try:

967 lexer = get_lexer_by_name(lang)

968 except ClassNotFound:

969 pass

970

971 if lexer is None:

972 yield match.start() + index + 1, Text, content

973 else:

974 yield from lexer.get_tokens_unprocessed(content)

975

976 def handle_score(self, match, ctx):

977 attr_content = match.group()

978 start = 0

979 index = 0

980 while True:

981 index = attr_content.find('>', start)

982 # Exclude comment end (-->)

983 if attr_content[index-2:index] != '--':

984 break

985 start = index + 1

986

987 if index == -1:

988 # No tag end

989 yield from self.get_tokens_unprocessed(attr_content, stack=['root', 'attr'])

990 return

991 attr = attr_content[:index]

992 content = attr_content[index+1:]

993 yield from self.get_tokens_unprocessed(attr, stack=['root', 'attr'])

994 yield match.start(3) + index, Punctuation, '>'

995

996 lang_match = re.findall(r'\blang=("|\'|)(\w+)(\1)', attr)

997 # Pick the last match in case of multiple matches

998 lang = lang_match[-1][1] if len(lang_match) >= 1 else 'lilypond'

999

1000 if lang == 'lilypond': # Case sensitive

1001 yield from LilyPondLexer().get_tokens_unprocessed(content)

1002 else: # ABC

1003 # FIXME: Use ABC lexer in the future

1004 yield match.start() + index + 1, Text, content

1005

1006 # a-z removed to prevent linter from complaining, REMEMBER to use (?i)

1007 title_char = r' %!"$&\'()*,\-./0-9:;=?@A-Z\\\^_`~+\u0080-\uFFFF'

1008 nbsp_char = r'(?:\t| |&\#0*160;|&\#[Xx]0*[Aa]0;|[ \xA0\u1680\u2000-\u200A\u202F\u205F\u3000])'

1009 link_address = r'(?:[0-9.]+|\[[0-9a-f:.]+\]|[^\x00-\x20"<>\[\]\x7F\xA0\u1680\u2000-\u200A\u202F\u205F\u3000\uFFFD])'

1010 link_char_class = r'[^\x00-\x20"<>\[\]\x7F\xA0\u1680\u2000-\u200A\u202F\u205F\u3000\uFFFD]'

1011 double_slashes_i = {

1012 '__FORCETOC__', '__NOCONTENTCONVERT__', '__NOCC__', '__NOEDITSECTION__', '__NOGALLERY__',

1013 '__NOTITLECONVERT__', '__NOTC__', '__NOTOC__', '__TOC__',

1014 }

1015 double_slashes = {

1016 '__EXPECTUNUSEDCATEGORY__', '__HIDDENCAT__', '__INDEX__', '__NEWSECTIONLINK__',

1017 '__NOINDEX__', '__NONEWSECTIONLINK__', '__STATICREDIRECT__', '__NOGLOBAL__',

1018 '__DISAMBIG__', '__EXPECTED_UNCONNECTED_PAGE__',

1019 }

1020 protocols = {

1021 'bitcoin:', 'ftp://', 'ftps://', 'geo:', 'git://', 'gopher://', 'http://', 'https://',

1022 'irc://', 'ircs://', 'magnet:', 'mailto:', 'mms://', 'news:', 'nntp://', 'redis://',

1023 'sftp://', 'sip:', 'sips:', 'sms:', 'ssh://', 'svn://', 'tel:', 'telnet://', 'urn:',

1024 'worldwind://', 'xmpp:', '//',

1025 }

1026 non_relative_protocols = protocols - {'//'}

1027 html_tags = {

1028 'abbr', 'b', 'bdi', 'bdo', 'big', 'blockquote', 'br', 'caption', 'center', 'cite', 'code',

1029 'data', 'dd', 'del', 'dfn', 'div', 'dl', 'dt', 'em', 'font', 'h1', 'h2', 'h3', 'h4', 'h5',

1030 'h6', 'hr', 'i', 'ins', 'kbd', 'li', 'link', 'mark', 'meta', 'ol', 'p', 'q', 'rb', 'rp',

1031 'rt', 'rtc', 'ruby', 's', 'samp', 'small', 'span', 'strike', 'strong', 'sub', 'sup',

1032 'table', 'td', 'th', 'time', 'tr', 'tt', 'u', 'ul', 'var', 'wbr',

1033 }

1034 parser_tags = {

1035 'graph', 'charinsert', 'rss', 'chem', 'categorytree', 'nowiki', 'inputbox', 'math',

1036 'hiero', 'score', 'pre', 'ref', 'translate', 'imagemap', 'templatestyles', 'languages',

1037 'noinclude', 'mapframe', 'section', 'poem', 'syntaxhighlight', 'includeonly', 'tvar',

1038 'onlyinclude', 'templatedata', 'langconvert', 'timeline', 'dynamicpagelist', 'gallery',

1039 'maplink', 'ce', 'references',

1040 }

1041 variant_langs = {

1042 # ZhConverter.php

1043 'zh', 'zh-hans', 'zh-hant', 'zh-cn', 'zh-hk', 'zh-mo', 'zh-my', 'zh-sg', 'zh-tw',

1044 # WuuConverter.php

1045 'wuu', 'wuu-hans', 'wuu-hant',

1046 # UzConverter.php

1047 'uz', 'uz-latn', 'uz-cyrl',

1048 # TlyConverter.php

1049 'tly', 'tly-cyrl',

1050 # TgConverter.php

1051 'tg', 'tg-latn',

1052 # SrConverter.php

1053 'sr', 'sr-ec', 'sr-el',

1054 # ShiConverter.php

1055 'shi', 'shi-tfng', 'shi-latn',

1056 # ShConverter.php

1057 'sh-latn', 'sh-cyrl',

1058 # KuConverter.php

1059 'ku', 'ku-arab', 'ku-latn',

1060 # IuConverter.php

1061 'iu', 'ike-cans', 'ike-latn',

1062 # GanConverter.php

1063 'gan', 'gan-hans', 'gan-hant',

1064 # EnConverter.php

1065 'en', 'en-x-piglatin',

1066 # CrhConverter.php

1067 'crh', 'crh-cyrl', 'crh-latn',

1068 # BanConverter.php

1069 'ban', 'ban-bali', 'ban-x-dharma', 'ban-x-palmleaf', 'ban-x-pku',

1070 }

1071 magic_vars_i = {

1072 'ARTICLEPATH', 'INT', 'PAGEID', 'SCRIPTPATH', 'SERVER', 'SERVERNAME', 'STYLEPATH',

1073 }

1074 magic_vars = {

1075 '!', '=', 'BASEPAGENAME', 'BASEPAGENAMEE', 'CASCADINGSOURCES', 'CONTENTLANGUAGE',

1076 'CONTENTLANG', 'CURRENTDAY', 'CURRENTDAY2', 'CURRENTDAYNAME', 'CURRENTDOW', 'CURRENTHOUR',

1077 'CURRENTMONTH', 'CURRENTMONTH2', 'CURRENTMONTH1', 'CURRENTMONTHABBREV', 'CURRENTMONTHNAME',

1078 'CURRENTMONTHNAMEGEN', 'CURRENTTIME', 'CURRENTTIMESTAMP', 'CURRENTVERSION', 'CURRENTWEEK',

1079 'CURRENTYEAR', 'DIRECTIONMARK', 'DIRMARK', 'FULLPAGENAME', 'FULLPAGENAMEE', 'LOCALDAY',

1080 'LOCALDAY2', 'LOCALDAYNAME', 'LOCALDOW', 'LOCALHOUR', 'LOCALMONTH', 'LOCALMONTH2',

1081 'LOCALMONTH1', 'LOCALMONTHABBREV', 'LOCALMONTHNAME', 'LOCALMONTHNAMEGEN', 'LOCALTIME',

1082 'LOCALTIMESTAMP', 'LOCALWEEK', 'LOCALYEAR', 'NAMESPACE', 'NAMESPACEE', 'NAMESPACENUMBER',

1083 'NUMBEROFACTIVEUSERS', 'NUMBEROFADMINS', 'NUMBEROFARTICLES', 'NUMBEROFEDITS',

1084 'NUMBEROFFILES', 'NUMBEROFPAGES', 'NUMBEROFUSERS', 'PAGELANGUAGE', 'PAGENAME', 'PAGENAMEE',

1085 'REVISIONDAY', 'REVISIONDAY2', 'REVISIONID', 'REVISIONMONTH', 'REVISIONMONTH1',

1086 'REVISIONSIZE', 'REVISIONTIMESTAMP', 'REVISIONUSER', 'REVISIONYEAR', 'ROOTPAGENAME',

1087 'ROOTPAGENAMEE', 'SITENAME', 'SUBJECTPAGENAME', 'ARTICLEPAGENAME', 'SUBJECTPAGENAMEE',

1088 'ARTICLEPAGENAMEE', 'SUBJECTSPACE', 'ARTICLESPACE', 'SUBJECTSPACEE', 'ARTICLESPACEE',

1089 'SUBPAGENAME', 'SUBPAGENAMEE', 'TALKPAGENAME', 'TALKPAGENAMEE', 'TALKSPACE', 'TALKSPACEE',

1090 }

1091 parser_functions_i = {

1092 'ANCHORENCODE', 'BIDI', 'CANONICALURL', 'CANONICALURLE', 'FILEPATH', 'FORMATNUM',

1093 'FULLURL', 'FULLURLE', 'GENDER', 'GRAMMAR', 'INT', r'\#LANGUAGE', 'LC', 'LCFIRST', 'LOCALURL',

1094 'LOCALURLE', 'NS', 'NSE', 'PADLEFT', 'PADRIGHT', 'PAGEID', 'PLURAL', 'UC', 'UCFIRST',

1095 'URLENCODE',

1096 }

1097 parser_functions = {

1098 'BASEPAGENAME', 'BASEPAGENAMEE', 'CASCADINGSOURCES', 'DEFAULTSORT', 'DEFAULTSORTKEY',

1099 'DEFAULTCATEGORYSORT', 'FULLPAGENAME', 'FULLPAGENAMEE', 'NAMESPACE', 'NAMESPACEE',

1100 'NAMESPACENUMBER', 'NUMBERINGROUP', 'NUMINGROUP', 'NUMBEROFACTIVEUSERS', 'NUMBEROFADMINS',

1101 'NUMBEROFARTICLES', 'NUMBEROFEDITS', 'NUMBEROFFILES', 'NUMBEROFPAGES', 'NUMBEROFUSERS',

1102 'PAGENAME', 'PAGENAMEE', 'PAGESINCATEGORY', 'PAGESINCAT', 'PAGESIZE', 'PROTECTIONEXPIRY',

1103 'PROTECTIONLEVEL', 'REVISIONDAY', 'REVISIONDAY2', 'REVISIONID', 'REVISIONMONTH',

1104 'REVISIONMONTH1', 'REVISIONTIMESTAMP', 'REVISIONUSER', 'REVISIONYEAR', 'ROOTPAGENAME',

1105 'ROOTPAGENAMEE', 'SUBJECTPAGENAME', 'ARTICLEPAGENAME', 'SUBJECTPAGENAMEE',

1106 'ARTICLEPAGENAMEE', 'SUBJECTSPACE', 'ARTICLESPACE', 'SUBJECTSPACEE', 'ARTICLESPACEE',

1107 'SUBPAGENAME', 'SUBPAGENAMEE', 'TALKPAGENAME', 'TALKPAGENAMEE', 'TALKSPACE', 'TALKSPACEE',

1108 'INT', 'DISPLAYTITLE', 'PAGESINNAMESPACE', 'PAGESINNS',

1109 }

1110

1111 tokens = {

1112 'root': [

1113 # Redirects

1114 (r"""(?xi)

1115 (\A\s*?)(\#REDIRECT:?) # may contain a colon

1116 (\s+)(\[\[) (?=[^\]\n]* \]\]$)

1117 """,

1118 bygroups(Whitespace, Keyword, Whitespace, Punctuation), 'redirect-inner'),

1119 # Subheadings

1120 (r'^(={2,6})(.+?)(\1)(\s*$\n)',

1121 bygroups(Generic.Subheading, Generic.Subheading, Generic.Subheading, Whitespace)),

1122 # Headings

1123 (r'^(=.+?=)(\s*$\n)',

1124 bygroups(Generic.Heading, Whitespace)),

1125 # Double-slashed magic words

1126 (words(double_slashes_i, prefix=r'(?i)'), Name.Function.Magic),

1127 (words(double_slashes), Name.Function.Magic),

1128 # Raw URLs

1129 (r'(?i)\b(?:{}){}{}*'.format('|'.join(protocols),

1130 link_address, link_char_class), Name.Label),

1131 # Magic links

1132 (rf'\b(?:RFC|PMID){nbsp_char}+[0-9]+\b',

1133 Name.Function.Magic),

1134 (r"""(?x)

1135 \bISBN {nbsp_char}

1136 (?: 97[89] {nbsp_dash}? )?

1137 (?: [0-9] {nbsp_dash}? ){{9}} # escape format()

1138 [0-9Xx]\b

1139 """.format(nbsp_char=nbsp_char, nbsp_dash=f'(?:-|{nbsp_char})'), Name.Function.Magic),

1140 include('list'),

1141 include('inline'),

1142 include('text'),

1143 ],

1144 'redirect-inner': [

1145 (r'(\]\])(\s*?\n)', bygroups(Punctuation, Whitespace), '#pop'),

1146 (r'(\#)([^#]*?)', bygroups(Punctuation, Name.Label)),

1147 (rf'(?i)[{title_char}]+', Name.Tag),

1148 ],

1149 'list': [

1150 # Description lists

1151 (r'^;', Keyword, 'dt'),

1152 # Ordered lists, unordered lists and indents

1153 (r'^[#:*]+', Keyword),

1154 # Horizontal rules

1155 (r'^-{4,}', Keyword),

1156 ],

1157 'inline': [

1158 # Signatures

1159 (r'~{3,5}', Keyword),

1160 # Entities

1161 include('entity'),

1162 # Bold & italic

1163 (r"('')(''')(?!')", bygroups(Generic.Emph,

1164 Generic.EmphStrong), 'inline-italic-bold'),

1165 (r"'''(?!')", Generic.Strong, 'inline-bold'),

1166 (r"''(?!')", Generic.Emph, 'inline-italic'),

1167 # Comments & parameters & templates

1168 include('replaceable'),

1169 # Media links

1170 (

1171 r"""(?xi)

1172 (\[\[)

1173 (File|Image) (:)

1174 ((?: [{}] | \{{{{2,3}}[^{{}}]*?\}}{{2,3}} |  )*)

1175 (?: (\#) ([{}]*?) )?

1176 """.format(title_char, f'{title_char}#'),

1177 bygroups(Punctuation, Name.Namespace, Punctuation,

1178 using(this, state=['wikilink-name']), Punctuation, Name.Label),

1179 'medialink-inner'

1180 ),

1181 # Wikilinks

1182 (

1183 r"""(?xi)

1184 (\[\[)(?!{}) # Should not contain URLs

1185 (?: ([{}]*) (:))?

1186 ((?: [{}] | \{{{{2,3}}[^{{}}]*?\}}{{2,3}} |  )*?)

1187 (?: (\#) ([{}]*?) )?

1188 (\]\])

1189 """.format('|'.join(protocols), title_char.replace('/', ''),

1190 title_char, f'{title_char}#'),

1191 bygroups(Punctuation, Name.Namespace, Punctuation,

1192 using(this, state=['wikilink-name']), Punctuation, Name.Label, Punctuation)

1193 ),

1194 (

1195 r"""(?xi)

1196 (\[\[)(?!{})

1197 (?: ([{}]*) (:))?

1198 ((?: [{}] | \{{{{2,3}}[^{{}}]*?\}}{{2,3}} |  )*?)

1199 (?: (\#) ([{}]*?) )?

1200 (\|)

1201 """.format('|'.join(protocols), title_char.replace('/', ''),

1202 title_char, f'{title_char}#'),

1203 bygroups(Punctuation, Name.Namespace, Punctuation,

1204 using(this, state=['wikilink-name']), Punctuation, Name.Label, Punctuation),

1205 'wikilink-inner'

1206 ),

1207 # External links

1208 (

1209 r"""(?xi)

1210 (\[)

1211 ((?:{}) {} {}*)

1212 (\s*)

1213 """.format('|'.join(protocols), link_address, link_char_class),

1214 bygroups(Punctuation, Name.Label, Whitespace),

1215 'extlink-inner'

1216 ),

1217 # Tables

1218 (r'^(:*)(\s*?)(\{\|)([^\n]*)$', bygroups(Keyword,

1219 Whitespace, Punctuation, using(this, state=['root', 'attr'])), 'table'),

1220 # HTML tags

1221 (r'(?i)(<)({})\b'.format('|'.join(html_tags)),

1222 bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'),

1223 (r'(?i)(</)({})\b(\s*)(>)'.format('|'.join(html_tags)),

1224 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),

1225 # <nowiki>

1226 (r'(?i)(<)(nowiki)\b', bygroups(Punctuation,

1227 Name.Tag), ('tag-nowiki', 'tag-inner')),

1228 # <pre>

1229 (r'(?i)(<)(pre)\b', bygroups(Punctuation,

1230 Name.Tag), ('tag-pre', 'tag-inner')),

1231 # <categorytree>

1232 (r'(?i)(<)(categorytree)\b', bygroups(

1233 Punctuation, Name.Tag), ('tag-categorytree', 'tag-inner')),

1234 # <hiero>

1235 (r'(?i)(<)(hiero)\b', bygroups(Punctuation,

1236 Name.Tag), ('tag-hiero', 'tag-inner')),

1237 # <math>

1238 (r'(?i)(<)(math)\b', bygroups(Punctuation,

1239 Name.Tag), ('tag-math', 'tag-inner')),

1240 # <chem>

1241 (r'(?i)(<)(chem)\b', bygroups(Punctuation,

1242 Name.Tag), ('tag-chem', 'tag-inner')),

1243 # <ce>

1244 (r'(?i)(<)(ce)\b', bygroups(Punctuation,

1245 Name.Tag), ('tag-ce', 'tag-inner')),

1246 # <charinsert>

1247 (r'(?i)(<)(charinsert)\b', bygroups(

1248 Punctuation, Name.Tag), ('tag-charinsert', 'tag-inner')),

1249 # <templatedata>

1250 (r'(?i)(<)(templatedata)\b', bygroups(

1251 Punctuation, Name.Tag), ('tag-templatedata', 'tag-inner')),

1252 # <gallery>

1253 (r'(?i)(<)(gallery)\b', bygroups(

1254 Punctuation, Name.Tag), ('tag-gallery', 'tag-inner')),

1255 # <graph>

1256 (r'(?i)(<)(gallery)\b', bygroups(

1257 Punctuation, Name.Tag), ('tag-graph', 'tag-inner')),

1258 # <dynamicpagelist>

1259 (r'(?i)(<)(dynamicpagelist)\b', bygroups(

1260 Punctuation, Name.Tag), ('tag-dynamicpagelist', 'tag-inner')),

1261 # <inputbox>

1262 (r'(?i)(<)(inputbox)\b', bygroups(

1263 Punctuation, Name.Tag), ('tag-inputbox', 'tag-inner')),

1264 # <rss>

1265 (r'(?i)(<)(rss)\b', bygroups(

1266 Punctuation, Name.Tag), ('tag-rss', 'tag-inner')),

1267 # <imagemap>

1268 (r'(?i)(<)(imagemap)\b', bygroups(

1269 Punctuation, Name.Tag), ('tag-imagemap', 'tag-inner')),

1270 # <syntaxhighlight>

1271 (r'(?i)(</)(syntaxhighlight)\b(\s*)(>)',

1272 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),

1273 (r'(?si)(<)(syntaxhighlight)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)',

1274 bygroups(Punctuation, Name.Tag, handle_syntaxhighlight)),

1275 # <syntaxhighlight>: Fallback case for self-closing tags

1276 (r'(?i)(<)(syntaxhighlight)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups(

1277 Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)),

1278 # <source>

1279 (r'(?i)(</)(source)\b(\s*)(>)',

1280 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),

1281 (r'(?si)(<)(source)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)',

1282 bygroups(Punctuation, Name.Tag, handle_syntaxhighlight)),

1283 # <source>: Fallback case for self-closing tags

1284 (r'(?i)(<)(source)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups(

1285 Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)),

1286 # <score>

1287 (r'(?i)(</)(score)\b(\s*)(>)',

1288 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),

1289 (r'(?si)(<)(score)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)',

1290 bygroups(Punctuation, Name.Tag, handle_score)),

1291 # <score>: Fallback case for self-closing tags

1292 (r'(?i)(<)(score)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups(

1293 Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)),

1294 # Other parser tags

1295 (r'(?i)(<)({})\b'.format('|'.join(parser_tags)),

1296 bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'),

1297 (r'(?i)(</)({})\b(\s*)(>)'.format('|'.join(parser_tags)),

1298 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),

1299 # LanguageConverter markups

1300 (

1301 r"""(?xi)

1302 (-\{{) # Use {{ to escape format()

1303 ([^|]) (\|)

1304 (?:

1305 (?: ([^;]*?) (=>))?

1306 (\s* (?:{variants}) \s*) (:)

1307 )?

1308 """.format(variants='|'.join(variant_langs)),

1309 bygroups(Punctuation, Keyword, Punctuation,

1310 using(this, state=['root', 'lc-raw']),

1311 Operator, Name.Label, Punctuation),

1312 'lc-inner'

1313 ),

1314 # LanguageConverter markups: composite conversion grammar

1315 (

1316 r"""(?xi)

1317 (-\{)

1318 ([a-z\s;-]*?) (\|)

1319 """,

1320 bygroups(Punctuation,

1321 using(this, state=['root', 'lc-flag']),

1322 Punctuation),

1323 'lc-raw'

1324 ),

1325 # LanguageConverter markups: fallbacks

1326 (

1327 r"""(?xi)

1328 (-\{{) (?!\{{) # Use {{ to escape format()

1329 (?: (\s* (?:{variants}) \s*) (:))?

1330 """.format(variants='|'.join(variant_langs)),

1331 bygroups(Punctuation, Name.Label, Punctuation),

1332 'lc-inner'

1333 ),

1334 ],

1335 'wikilink-name': [

1336 include('replaceable'),

1337 (r'[^{<]+', Name.Tag),

1338 (r'(?s).', Name.Tag),

1339 ],

1340 'wikilink-inner': [

1341 # Quit in case of another wikilink

1342 (r'(?=\[\[)', Punctuation, '#pop'),

1343 (r'\]\]', Punctuation, '#pop'),

1344 include('inline'),

1345 include('text'),

1346 ],

1347 'medialink-inner': [

1348 (r'\]\]', Punctuation, '#pop'),

1349 (r'(\|)([^\n=|]*)(=)',

1350 bygroups(Punctuation, Name.Attribute, Operator)),

1351 (r'\|', Punctuation),

1352 include('inline'),

1353 include('text'),

1354 ],

1355 'quote-common': [

1356 # Quit in case of link/template endings

1357 (r'(?=\]\]|\{\{|\}\})', Punctuation, '#pop'),

1358 (r'\n', Text, '#pop'),

1359 ],

1360 'inline-italic': [

1361 include('quote-common'),

1362 (r"('')(''')(?!')", bygroups(Generic.Emph,

1363 Generic.Strong), ('#pop', 'inline-bold')),

1364 (r"'''(?!')", Generic.EmphStrong, ('#pop', 'inline-italic-bold')),

1365 (r"''(?!')", Generic.Emph, '#pop'),

1366 include('inline'),

1367 include('text-italic'),

1368 ],

1369 'inline-bold': [

1370 include('quote-common'),

1371 (r"(''')('')(?!')", bygroups(

1372 Generic.Strong, Generic.Emph), ('#pop', 'inline-italic')),

1373 (r"'''(?!')", Generic.Strong, '#pop'),

1374 (r"''(?!')", Generic.EmphStrong, ('#pop', 'inline-bold-italic')),

1375 include('inline'),

1376 include('text-bold'),

1377 ],

1378 'inline-bold-italic': [

1379 include('quote-common'),

1380 (r"('')(''')(?!')", bygroups(Generic.EmphStrong,

1381 Generic.Strong), '#pop'),

1382 (r"'''(?!')", Generic.EmphStrong, ('#pop', 'inline-italic')),

1383 (r"''(?!')", Generic.EmphStrong, ('#pop', 'inline-bold')),

1384 include('inline'),

1385 include('text-bold-italic'),

1386 ],

1387 'inline-italic-bold': [

1388 include('quote-common'),

1389 (r"(''')('')(?!')", bygroups(

1390 Generic.EmphStrong, Generic.Emph), '#pop'),

1391 (r"'''(?!')", Generic.EmphStrong, ('#pop', 'inline-italic')),

1392 (r"''(?!')", Generic.EmphStrong, ('#pop', 'inline-bold')),

1393 include('inline'),

1394 include('text-bold-italic'),

1395 ],

1396 'lc-flag': [

1397 (r'\s+', Whitespace),

1398 (r';', Punctuation),

1399 *text_rules(Keyword),

1400 ],

1401 'lc-inner': [

1402 (

1403 r"""(?xi)

1404 (;)

1405 (?: ([^;]*?) (=>))?

1406 (\s* (?:{variants}) \s*) (:)

1407 """.format(variants='|'.join(variant_langs)),

1408 bygroups(Punctuation, using(this, state=['root', 'lc-raw']),

1409 Operator, Name.Label, Punctuation)

1410 ),

1411 (r';?\s*?\}-', Punctuation, '#pop'),

1412 include('inline'),

1413 include('text'),

1414 ],

1415 'lc-raw': [

1416 (r'\}-', Punctuation, '#pop'),

1417 include('inline'),

1418 include('text'),

1419 ],

1420 'replaceable': [

1421 # Comments

1422 (r'|\Z)', Comment.Multiline),

1423 # Parameters

1424 (

1425 r"""(?x)

1426 (\{{3})

1427 ([^|]*?)

1428 (?=\}{3}|\|)

1429 """,

1430 bygroups(Punctuation, Name.Variable),

1431 'parameter-inner',

1432 ),

1433 # Magic variables

1434 (r'(?i)(\{{\{{)(\s*)({})(\s*)(\}}\}})'.format('|'.join(magic_vars_i)),

1435 bygroups(Punctuation, Whitespace, Name.Function, Whitespace, Punctuation)),

1436 (r'(\{{\{{)(\s*)({})(\s*)(\}}\}})'.format('|'.join(magic_vars)),

1437 bygroups(Punctuation, Whitespace, Name.Function, Whitespace, Punctuation)),

1438 # Parser functions & templates

1439 (r'\{\{', Punctuation, 'template-begin-space'),

1440 # <tvar> legacy syntax

1441 (r'(?i)(<)(tvar)\b(\|)([^>]*?)(>)', bygroups(Punctuation,

1442 Name.Tag, Punctuation, String, Punctuation)),

1443 (r'</>', Punctuation, '#pop'),

1444 # <tvar>

1445 (r'(?i)(<)(tvar)\b', bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'),

1446 (r'(?i)(</)(tvar)\b(\s*)(>)',

1447 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),

1448 ],

1449 'parameter-inner': [

1450 (r'\}{3}', Punctuation, '#pop'),

1451 (r'\|', Punctuation),

1452 include('inline'),

1453 include('text'),

1454 ],

1455 'template-begin-space': [

1456 # Templates allow line breaks at the beginning, and due to how MediaWiki handles

1457 # comments, an extra state is required to handle things like {{\n\n name}}

1458 (r'|\Z)', Comment.Multiline),

1459 (r'\s+', Whitespace),

1460 # Parser functions

1461 (

1462 r'(?i)(\#[{}]*?|{})(:)'.format(title_char,

1463 '|'.join(parser_functions_i)),

1464 bygroups(Name.Function, Punctuation), ('#pop', 'template-inner')

1465 ),

1466 (

1467 r'({})(:)'.format('|'.join(parser_functions)),

1468 bygroups(Name.Function, Punctuation), ('#pop', 'template-inner')

1469 ),

1470 # Templates

1471 (

1472 rf'(?i)([{title_char}]*?)(:)',

1473 bygroups(Name.Namespace, Punctuation), ('#pop', 'template-name')

1474 ),

1475 default(('#pop', 'template-name'),),

1476 ],

1477 'template-name': [

1478 (r'(\s*?)(\|)', bygroups(Text, Punctuation), ('#pop', 'template-inner')),

1479 (r'\}\}', Punctuation, '#pop'),

1480 (r'\n', Text, '#pop'),

1481 include('replaceable'),

1482 *text_rules(Name.Tag),

1483 ],

1484 'template-inner': [

1485 (r'\}\}', Punctuation, '#pop'),

1486 (r'\|', Punctuation),

1487 (

1488 r"""(?x)

1489 (?<=\|)

1490 ( (?: (?! \{\{ | \}\} )[^=\|<])*? ) # Exclude templates and tags

1491 (=)

1492 """,

1493 bygroups(Name.Label, Operator)

1494 ),

1495 include('inline'),

1496 include('text'),

1497 ],

1498 'table': [

1499 # Use [ \t\n\r\0\x0B] instead of \s to follow PHP trim() behavior

1500 # Endings

1501 (r'^([ \t\n\r\0\x0B]*?)(\|\})',

1502 bygroups(Whitespace, Punctuation), '#pop'),

1503 # Table rows

1504 (r'^([ \t\n\r\0\x0B]*?)(\|-+)(.*)$', bygroups(Whitespace, Punctuation,

1505 using(this, state=['root', 'attr']))),

1506 # Captions

1507 (

1508 r"""(?x)

1509 ^([ \t\n\r\0\x0B]*?)(\|\+)

1510 # Exclude links, template and tags

1511 (?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|) )?

1512 (.*?)$

1513 """,

1514 bygroups(Whitespace, Punctuation, using(this, state=[

1515 'root', 'attr']), Punctuation, Generic.Heading),

1516 ),

1517 # Table data

1518 (

1519 r"""(?x)

1520 ( ^(?:[ \t\n\r\0\x0B]*?)\| | \|\| )

1521 (?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|)(?!\|) )?

1522 """,

1523 bygroups(Punctuation, using(this, state=[

1524 'root', 'attr']), Punctuation),

1525 ),

1526 # Table headers

1527 (

1528 r"""(?x)

1529 ( ^(?:[ \t\n\r\0\x0B]*?)! )

1530 (?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|)(?!\|) )?

1531 """,

1532 bygroups(Punctuation, using(this, state=[

1533 'root', 'attr']), Punctuation),

1534 'table-header',

1535 ),

1536 include('list'),

1537 include('inline'),

1538 include('text'),

1539 ],

1540 'table-header': [

1541 # Requires another state for || handling inside headers

1542 (r'\n', Text, '#pop'),

1543 (

1544 r"""(?x)

1545 (!!|\|\|)

1546 (?:

1547 ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )

1548 (\|)(?!\|)

1549 )?

1550 """,

1551 bygroups(Punctuation, using(this, state=[

1552 'root', 'attr']), Punctuation)

1553 ),

1554 *text_rules(Generic.Subheading),

1555 ],

1556 'entity': [

1557 (r'&\S*?;', Name.Entity),

1558 ],

1559 'dt': [

1560 (r'\n', Text, '#pop'),

1561 include('inline'),

1562 (r':', Keyword, '#pop'),

1563 include('text'),

1564 ],

1565 'extlink-inner': [

1566 (r'\]', Punctuation, '#pop'),

1567 include('inline'),

1568 include('text'),

1569 ],

1570 'nowiki-ish': [

1571 include('entity'),

1572 include('text'),

1573 ],

1574 'attr': [

1575 include('replaceable'),

1576 (r'\s+', Whitespace),

1577 (r'(=)(\s*)(")', bygroups(Operator, Whitespace, String.Double), 'attr-val-2'),

1578 (r"(=)(\s*)(')", bygroups(Operator, Whitespace, String.Single), 'attr-val-1'),

1579 (r'(=)(\s*)', bygroups(Operator, Whitespace), 'attr-val-0'),

1580 (r'[\w:-]+', Name.Attribute),

1581

1582 ],

1583 'attr-val-0': [

1584 (r'\s', Whitespace, '#pop'),

1585 include('replaceable'),

1586 *text_rules(String),

1587 ],

1588 'attr-val-1': [

1589 (r"'", String.Single, '#pop'),

1590 include('replaceable'),

1591 *text_rules(String.Single),

1592 ],

1593 'attr-val-2': [

1594 (r'"', String.Double, '#pop'),

1595 include('replaceable'),

1596 *text_rules(String.Double),

1597 ],

1598 'tag-inner-ordinary': [

1599 (r'/?\s*>', Punctuation, '#pop'),

1600 include('tag-attr'),

1601 ],

1602 'tag-inner': [

1603 # Return to root state for self-closing tags

1604 (r'/\s*>', Punctuation, '#pop:2'),

1605 (r'\s*>', Punctuation, '#pop'),

1606 include('tag-attr'),

1607 ],

1608 # There states below are just like their non-tag variants, the key difference is

1609 # they forcibly quit when encountering tag closing markup

1610 'tag-attr': [

1611 include('replaceable'),

1612 (r'\s+', Whitespace),

1613 (r'(=)(\s*)(")', bygroups(Operator,

1614 Whitespace, String.Double), 'tag-attr-val-2'),

1615 (r"(=)(\s*)(')", bygroups(Operator,

1616 Whitespace, String.Single), 'tag-attr-val-1'),

1617 (r'(=)(\s*)', bygroups(Operator, Whitespace), 'tag-attr-val-0'),

1618 (r'[\w:-]+', Name.Attribute),

1619

1620 ],

1621 'tag-attr-val-0': [

1622 (r'\s', Whitespace, '#pop'),

1623 (r'/?>', Punctuation, '#pop:2'),

1624 include('replaceable'),

1625 *text_rules(String),

1626 ],

1627 'tag-attr-val-1': [

1628 (r"'", String.Single, '#pop'),

1629 (r'/?>', Punctuation, '#pop:2'),

1630 include('replaceable'),

1631 *text_rules(String.Single),

1632 ],

1633 'tag-attr-val-2': [

1634 (r'"', String.Double, '#pop'),

1635 (r'/?>', Punctuation, '#pop:2'),

1636 include('replaceable'),

1637 *text_rules(String.Double),

1638 ],

1639 'tag-nowiki': nowiki_tag_rules('nowiki'),

1640 'tag-pre': nowiki_tag_rules('pre'),

1641 'tag-categorytree': plaintext_tag_rules('categorytree'),

1642 'tag-dynamicpagelist': plaintext_tag_rules('dynamicpagelist'),

1643 'tag-hiero': plaintext_tag_rules('hiero'),

1644 'tag-inputbox': plaintext_tag_rules('inputbox'),

1645 'tag-imagemap': plaintext_tag_rules('imagemap'),

1646 'tag-charinsert': plaintext_tag_rules('charinsert'),

1647 'tag-timeline': plaintext_tag_rules('timeline'),

1648 'tag-gallery': plaintext_tag_rules('gallery'),

1649 'tag-graph': plaintext_tag_rules('graph'),

1650 'tag-rss': plaintext_tag_rules('rss'),

1651 'tag-math': delegate_tag_rules('math', TexLexer, state='math'),

1652 'tag-chem': delegate_tag_rules('chem', TexLexer, state='math'),

1653 'tag-ce': delegate_tag_rules('ce', TexLexer, state='math'),

1654 'tag-templatedata': delegate_tag_rules('templatedata', JsonLexer),

1655 'text-italic': text_rules(Generic.Emph),

1656 'text-bold': text_rules(Generic.Strong),

1657 'text-bold-italic': text_rules(Generic.EmphStrong),

1658 'text': text_rules(Text),

1659 }