Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pygments/lexers/markup.py: 54%

1"""

2 pygments.lexers.markup

3 ~~~~~~~~~~~~~~~~~~~~~~

5 Lexers for non-HTML markup languages.

8 :license: BSD, see LICENSE for details.

9"""

11import re

13from pygments.lexers.html import XmlLexer

14from pygments.lexers.javascript import JavascriptLexer

15from pygments.lexers.css import CssLexer

16from pygments.lexers.lilypond import LilyPondLexer

17from pygments.lexers.data import JsonLexer

19from pygments.lexer import RegexLexer, DelegatingLexer, include, bygroups, \

20 using, this, do_insertions, default, words

21from pygments.token import Text, Comment, Operator, Keyword, Name, String, \

22 Number, Punctuation, Generic, Other, Whitespace, Literal

23from pygments.util import get_bool_opt, ClassNotFound

25__all__ = ['BBCodeLexer', 'MoinWikiLexer', 'RstLexer', 'TexLexer', 'GroffLexer',

26 'MozPreprocHashLexer', 'MozPreprocPercentLexer',

27 'MozPreprocXulLexer', 'MozPreprocJavascriptLexer',

28 'MozPreprocCssLexer', 'MarkdownLexer', 'OrgLexer', 'TiddlyWiki5Lexer',

29 'WikitextLexer']

32class BBCodeLexer(RegexLexer):

33 """

34 A lexer that highlights BBCode(-like) syntax.

35 """

37 name = 'BBCode'

38 aliases = ['bbcode']

39 mimetypes = ['text/x-bbcode']

40 url = 'https://www.bbcode.org/'

41 version_added = '0.6'

43 tokens = {

44 'root': [

45 (r'[^[]+', Text),

46 # tag/end tag begin

47 (r'\[/?\w+', Keyword, 'tag'),

48 # stray bracket

49 (r'\[', Text),

50 ],

51 'tag': [

52 (r'\s+', Text),

53 # attribute with value

54 (r'(\w+)(=)("?[^\s"\]]+"?)',

55 bygroups(Name.Attribute, Operator, String)),

56 # tag argument (a la [color=green])

57 (r'(=)("?[^\s"\]]+"?)',

58 bygroups(Operator, String)),

59 # tag end

60 (r'\]', Keyword, '#pop'),

61 ],

62 }

65class MoinWikiLexer(RegexLexer):

66 """

67 For MoinMoin (and Trac) Wiki markup.

68 """

70 name = 'MoinMoin/Trac Wiki markup'

71 aliases = ['trac-wiki', 'moin']

72 filenames = []

73 mimetypes = ['text/x-trac-wiki']

74 url = 'https://moinmo.in'

75 version_added = '0.7'

77 flags = re.MULTILINE | re.IGNORECASE

79 tokens = {

80 'root': [

81 (r'^#.*$', Comment),

82 (r'(!)(\S+)', bygroups(Keyword, Text)), # Ignore-next

83 # Titles

84 (r'^(=+)([^=]+)(=+)(\s*#.+)?$',

85 bygroups(Generic.Heading, using(this), Generic.Heading, String)),

86 # Literal code blocks, with optional shebang

87 (r'(\{\{\{)(\n#!.+)?', bygroups(Name.Builtin, Name.Namespace), 'codeblock'),

88 (r'(\'\'\'?|\|\||`|__|~~|\^|,,|::)', Comment), # Formatting

89 # Lists

90 (r'^( +)([.*-])( )', bygroups(Text, Name.Builtin, Text)),

91 (r'^( +)([a-z]{1,5}\.)( )', bygroups(Text, Name.Builtin, Text)),

92 # Other Formatting

93 (r'\[\[\w+.*?\]\]', Keyword), # Macro

94 (r'(\[[^\s\]]+)(\s+[^\]]+?)?(\])',

95 bygroups(Keyword, String, Keyword)), # Link

96 (r'^----+$', Keyword), # Horizontal rules

97 (r'[^\n\'\[{!_~^,|]+', Text),

98 (r'\n', Text),

99 (r'.', Text),

100 ],

101 'codeblock': [

102 (r'\}\}\}', Name.Builtin, '#pop'),

103 # these blocks are allowed to be nested in Trac, but not MoinMoin

104 (r'\{\{\{', Text, '#push'),

105 (r'[^{}]+', Comment.Preproc), # slurp boring text

106 (r'.', Comment.Preproc), # allow loose { or }

107 ],

108 }

109

110

111class RstLexer(RegexLexer):

112 """

113 For reStructuredText markup.

114

115 Additional options accepted:

116

117 `handlecodeblocks`

118 Highlight the contents of ``.. sourcecode:: language``,

119 ``.. code:: language`` and ``.. code-block:: language``

120 directives with a lexer for the given language (default:

121 ``True``).

122

123 .. versionadded:: 0.8

124 """

125 name = 'reStructuredText'

126 url = 'https://docutils.sourceforge.io/rst.html'

127 aliases = ['restructuredtext', 'rst', 'rest']

128 filenames = ['*.rst', '*.rest']

129 mimetypes = ["text/x-rst", "text/prs.fallenstein.rst"]

130 version_added = '0.7'

131 flags = re.MULTILINE

132

133 def _handle_sourcecode(self, match):

134 from pygments.lexers import get_lexer_by_name

135

136 # section header

137 yield match.start(1), Punctuation, match.group(1)

138 yield match.start(2), Text, match.group(2)

139 yield match.start(3), Operator.Word, match.group(3)

140 yield match.start(4), Punctuation, match.group(4)

141 yield match.start(5), Text, match.group(5)

142 yield match.start(6), Keyword, match.group(6)

143 yield match.start(7), Text, match.group(7)

144

145 # lookup lexer if wanted and existing

146 lexer = None

147 if self.handlecodeblocks:

148 try:

149 lexer = get_lexer_by_name(match.group(6).strip())

150 except ClassNotFound:

151 pass

152 indention = match.group(8)

153 indention_size = len(indention)

154 code = (indention + match.group(9) + match.group(10) + match.group(11))

155

156 # no lexer for this language. handle it like it was a code block

157 if lexer is None:

158 yield match.start(8), String, code

159 return

160

161 # highlight the lines with the lexer.

162 ins = []

163 codelines = code.splitlines(True)

164 code = ''

165 for line in codelines:

166 if len(line) > indention_size:

167 ins.append((len(code), [(0, Text, line[:indention_size])]))

168 code += line[indention_size:]

169 else:

170 code += line

171 yield from do_insertions(ins, lexer.get_tokens_unprocessed(code))

172

173 # from docutils.parsers.rst.states

174 closers = '\'")]}>\u2019\u201d\xbb!?'

175 unicode_delimiters = '\u2010\u2011\u2012\u2013\u2014\u00a0'

176 end_string_suffix = (rf'((?=$)|(?=[-/:.,; \n\x00{re.escape(unicode_delimiters)}{re.escape(closers)}]))')

177

178 tokens = {

179 'root': [

180 # Heading with overline

181 (r'^(=+|-+|`+|:+|\.+|\'+|"+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)'

182 r'(.+)(\n)(\1)(\n)',

183 bygroups(Generic.Heading, Text, Generic.Heading,

184 Text, Generic.Heading, Text)),

185 # Plain heading

186 (r'^(\S.*)(\n)(={3,}|-{3,}|`{3,}|:{3,}|\.{3,}|\'{3,}|"{3,}|'

187 r'~{3,}|\^{3,}|_{3,}|\*{3,}|\+{3,}|#{3,})(\n)',

188 bygroups(Generic.Heading, Text, Generic.Heading, Text)),

189 # Bulleted lists

190 (r'^(\s*)([-*+])( .+\n(?:\1 .+\n)*)',

191 bygroups(Text, Number, using(this, state='inline'))),

192 # Numbered lists

193 (r'^(\s*)([0-9#ivxlcmIVXLCM]+\.)( .+\n(?:\1 .+\n)*)',

194 bygroups(Text, Number, using(this, state='inline'))),

195 (r'^(\s*)($?[0-9#ivxlcmIVXLCM]+$)( .+\n(?:\1 .+\n)*)',

196 bygroups(Text, Number, using(this, state='inline'))),

197 # Numbered, but keep words at BOL from becoming lists

198 (r'^(\s*)([A-Z]+\.)( .+\n(?:\1 .+\n)+)',

199 bygroups(Text, Number, using(this, state='inline'))),

200 (r'^(\s*)($?[A-Za-z]+$)( .+\n(?:\1 .+\n)+)',

201 bygroups(Text, Number, using(this, state='inline'))),

202 # Line blocks

203 (r'^(\s*)(\|)( .+\n(?:\| .+\n)*)',

204 bygroups(Text, Operator, using(this, state='inline'))),

205 # Sourcecode directives

206 (r'^( *\.\.)(\s*)((?:source)?code(?:-block)?)(::)([ \t]*)([^\n]+)'

207 r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*)?\n)+)',

208 _handle_sourcecode),

209 # A directive

210 (r'^( *\.\.)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))',

211 bygroups(Punctuation, Text, Operator.Word, Punctuation, Text,

212 using(this, state='inline'))),

213 # A reference target

214 (r'^( *\.\.)(\s*)(_(?:[^:\\]|\\.)+:)(.*?)$',

215 bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),

216 # A footnote/citation target

217 (r'^( *\.\.)(\s*)(\[.+\])(.*?)$',

218 bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),

219 # A substitution def

220 (r'^( *\.\.)(\s*)(\|.+\|)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))',

221 bygroups(Punctuation, Text, Name.Tag, Text, Operator.Word,

222 Punctuation, Text, using(this, state='inline'))),

223 # Comments

224 (r'^ *\.\..*(\n( +.*\n|\n)+)?', Comment),

225 # Field list marker

226 (r'^( *)(:(?:\\\\|\\:|[^:\n])+:(?=\s))([ \t]*)',

227 bygroups(Text, Name.Class, Text)),

228 # Definition list

229 (r'^(\S.*(?<!::)\n)((?:(?: +.*)\n)+)',

230 bygroups(using(this, state='inline'), using(this, state='inline'))),

231 # Code blocks

232 (r'(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*)?\n)+)',

233 bygroups(String.Escape, Text, String, String, Text, String)),

234 include('inline'),

235 ],

236 'inline': [

237 (r'\\.', Text), # escape

238 (r'``', String, 'literal'), # code

239 (r'(`.+?)(<.+?>)(`__?)', # reference with inline target

240 bygroups(String, String.Interpol, String)),

241 (r'`.+?`__?', String), # reference

242 (r'(`.+?`)(:[a-zA-Z0-9:-]+?:)?',

243 bygroups(Name.Variable, Name.Attribute)), # role

244 (r'(:[a-zA-Z0-9:-]+?:)(`.+?`)',

245 bygroups(Name.Attribute, Name.Variable)), # role (content first)

246 (r'\*\*.+?\*\*', Generic.Strong), # Strong emphasis

247 (r'\*.+?\*', Generic.Emph), # Emphasis

248 (r'\[.*?\]_', String), # Footnote or citation

249 (r'<.+?>', Name.Tag), # Hyperlink

250 (r'[^\\\n\[*`:]+', Text),

251 (r'.', Text),

252 ],

253 'literal': [

254 (r'[^`]+', String),

255 (r'``' + end_string_suffix, String, '#pop'),

256 (r'`', String),

257 ]

258 }

259

260 def __init__(self, **options):

261 self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)

262 RegexLexer.__init__(self, **options)

263

264 def analyse_text(text):

265 if text[:2] == '..' and text[2:3] != '.':

266 return 0.3

267 p1 = text.find("\n")

268 p2 = text.find("\n", p1 + 1)

269 if (p2 > -1 and # has two lines

270 p1 * 2 + 1 == p2 and # they are the same length

271 text[p1+1] in '-=' and # the next line both starts and ends with

272 text[p1+1] == text[p2-1]): # ...a sufficiently high header

273 return 0.5

274

275

276class TexLexer(RegexLexer):

277 """

278 Lexer for the TeX and LaTeX typesetting languages.

279 """

280

281 name = 'TeX'

282 aliases = ['tex', 'latex']

283 filenames = ['*.tex', '*.aux', '*.toc']

284 mimetypes = ['text/x-tex', 'text/x-latex']

285 url = 'https://tug.org'

286 version_added = ''

287

288 tokens = {

289 'general': [

290 (r'%.*?\n', Comment),

291 (r'[{}]', Name.Builtin),

292 (r'[&_^]', Name.Builtin),

293 ],

294 'root': [

295 (r'\\\[', String.Backtick, 'displaymath'),

296 (r'\\\(', String, 'inlinemath'),

297 (r'\$\$', String.Backtick, 'displaymath'),

298 (r'\$', String, 'inlinemath'),

299 (r'\\([a-zA-Z@_:]+|\S?)', Keyword, 'command'),

300 (r'\\$', Keyword),

301 include('general'),

302 (r'[^\\$%&_^{}]+', Text),

303 ],

304 'math': [

305 (r'\\([a-zA-Z]+|\S?)', Name.Variable),

306 include('general'),

307 (r'[0-9]+', Number),

308 (r'[-=!+*/()\[\]]', Operator),

309 (r'[^=!+*/()\[\]\\$%&_^{}0-9-]+', Name.Builtin),

310 ],

311 'inlinemath': [

312 (r'\\\)', String, '#pop'),

313 (r'\$', String, '#pop'),

314 include('math'),

315 ],

316 'displaymath': [

317 (r'\\\]', String, '#pop'),

318 (r'\$\$', String, '#pop'),

319 (r'\$', Name.Builtin),

320 include('math'),

321 ],

322 'command': [

323 (r'\[.*?\]', Name.Attribute),

324 (r'\*', Keyword),

325 default('#pop'),

326 ],

327 }

328

329 def analyse_text(text):

330 for start in ("\\documentclass", "\\input", "\\documentstyle",

331 "\\relax"):

332 if text[:len(start)] == start:

333 return True

334

335

336class GroffLexer(RegexLexer):

337 """

338 Lexer for the (g)roff typesetting language, supporting groff

339 extensions. Mainly useful for highlighting manpage sources.

340 """

341

342 name = 'Groff'

343 aliases = ['groff', 'nroff', 'man']

344 filenames = ['*.[1-9]', '*.man', '*.1p', '*.3pm']

345 mimetypes = ['application/x-troff', 'text/troff']

346 url = 'https://www.gnu.org/software/groff'

347 version_added = '0.6'

348

349 tokens = {

350 'root': [

351 (r'(\.)(\w+)', bygroups(Text, Keyword), 'request'),

352 (r'\.', Punctuation, 'request'),

353 # Regular characters, slurp till we find a backslash or newline

354 (r'[^\\\n]+', Text, 'textline'),

355 default('textline'),

356 ],

357 'textline': [

358 include('escapes'),

359 (r'[^\\\n]+', Text),

360 (r'\n', Text, '#pop'),

361 ],

362 'escapes': [

363 # groff has many ways to write escapes.

364 (r'\\"[^\n]*', Comment),

365 (r'\\[fn]\w', String.Escape),

366 (r'\\\(.{2}', String.Escape),

367 (r'\\.\[.*\]', String.Escape),

368 (r'\\.', String.Escape),

369 (r'\\\n', Text, 'request'),

370 ],

371 'request': [

372 (r'\n', Text, '#pop'),

373 include('escapes'),

374 (r'"[^\n"]+"', String.Double),

375 (r'\d+', Number),

376 (r'\S+', String),

377 (r'\s+', Text),

378 ],

379 }

380

381 def analyse_text(text):

382 if text[:1] != '.':

383 return False

384 if text[:3] == '.\\"':

385 return True

386 if text[:4] == '.TH ':

387 return True

388 if text[1:3].isalnum() and text[3].isspace():

389 return 0.9

390

391

392class MozPreprocHashLexer(RegexLexer):

393 """

394 Lexer for Mozilla Preprocessor files (with '#' as the marker).

395

396 Other data is left untouched.

397 """

398 name = 'mozhashpreproc'

399 aliases = [name]

400 filenames = []

401 mimetypes = []

402 url = 'https://firefox-source-docs.mozilla.org/build/buildsystem/preprocessor.html'

403 version_added = '2.0'

404

405 tokens = {

406 'root': [

407 (r'^#', Comment.Preproc, ('expr', 'exprstart')),

408 (r'.+', Other),

409 ],

410 'exprstart': [

411 (r'(literal)(.*)', bygroups(Comment.Preproc, Text), '#pop:2'),

412 (words((

413 'define', 'undef', 'if', 'ifdef', 'ifndef', 'else', 'elif',

414 'elifdef', 'elifndef', 'endif', 'expand', 'filter', 'unfilter',

415 'include', 'includesubst', 'error')),

416 Comment.Preproc, '#pop'),

417 ],

418 'expr': [

419 (words(('!', '!=', '==', '&&', '||')), Operator),

420 (r'(defined)(\()', bygroups(Keyword, Punctuation)),

421 (r'\)', Punctuation),

422 (r'[0-9]+', Number.Decimal),

423 (r'__\w+?__', Name.Variable),

424 (r'@\w+?@', Name.Class),

425 (r'\w+', Name),

426 (r'\n', Text, '#pop'),

427 (r'\s+', Text),

428 (r'\S', Punctuation),

429 ],

430 }

431

432

433class MozPreprocPercentLexer(MozPreprocHashLexer):

434 """

435 Lexer for Mozilla Preprocessor files (with '%' as the marker).

436

437 Other data is left untouched.

438 """

439 name = 'mozpercentpreproc'

440 aliases = [name]

441 filenames = []

442 mimetypes = []

443 url = 'https://firefox-source-docs.mozilla.org/build/buildsystem/preprocessor.html'

444 version_added = '2.0'

445

446 tokens = {

447 'root': [

448 (r'^%', Comment.Preproc, ('expr', 'exprstart')),

449 (r'.+', Other),

450 ],

451 }

452

453

454class MozPreprocXulLexer(DelegatingLexer):

455 """

456 Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the

457 `XmlLexer`.

458 """

459 name = "XUL+mozpreproc"

460 aliases = ['xul+mozpreproc']

461 filenames = ['*.xul.in']

462 mimetypes = []

463 url = 'https://firefox-source-docs.mozilla.org/build/buildsystem/preprocessor.html'

464 version_added = '2.0'

465

466 def __init__(self, **options):

467 super().__init__(XmlLexer, MozPreprocHashLexer, **options)

468

469

470class MozPreprocJavascriptLexer(DelegatingLexer):

471 """

472 Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the

473 `JavascriptLexer`.

474 """

475 name = "Javascript+mozpreproc"

476 aliases = ['javascript+mozpreproc']

477 filenames = ['*.js.in']

478 mimetypes = []

479 url = 'https://firefox-source-docs.mozilla.org/build/buildsystem/preprocessor.html'

480 version_added = '2.0'

481

482 def __init__(self, **options):

483 super().__init__(JavascriptLexer, MozPreprocHashLexer, **options)

484

485

486class MozPreprocCssLexer(DelegatingLexer):

487 """

488 Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the

489 `CssLexer`.

490 """

491 name = "CSS+mozpreproc"

492 aliases = ['css+mozpreproc']

493 filenames = ['*.css.in']

494 mimetypes = []

495 url = 'https://firefox-source-docs.mozilla.org/build/buildsystem/preprocessor.html'

496 version_added = '2.0'

497

498 def __init__(self, **options):

499 super().__init__(CssLexer, MozPreprocPercentLexer, **options)

500

501

502class MarkdownLexer(RegexLexer):

503 """

504 For Markdown markup.

505 """

506 name = 'Markdown'

507 url = 'https://daringfireball.net/projects/markdown/'

508 aliases = ['markdown', 'md']

509 filenames = ['*.md', '*.markdown']

510 mimetypes = ["text/x-markdown"]

511 version_added = '2.2'

512 flags = re.MULTILINE

513

514 def _handle_codeblock(self, match):

515 from pygments.lexers import get_lexer_by_name

516

517 yield match.start('initial'), String.Backtick, match.group('initial')

518 yield match.start('lang'), String.Backtick, match.group('lang')

519 if match.group('afterlang') is not None:

520 yield match.start('whitespace'), Whitespace, match.group('whitespace')

521 yield match.start('extra'), Text, match.group('extra')

522 yield match.start('newline'), Whitespace, match.group('newline')

523

524 # lookup lexer if wanted and existing

525 lexer = None

526 if self.handlecodeblocks:

527 try:

528 lexer = get_lexer_by_name(match.group('lang').strip())

529 except ClassNotFound:

530 pass

531 code = match.group('code')

532 # no lexer for this language. handle it like it was a code block

533 if lexer is None:

534 yield match.start('code'), String, code

535 else:

536 # FIXME: aren't the offsets wrong?

537 yield from do_insertions([], lexer.get_tokens_unprocessed(code))

538

539 yield match.start('terminator'), String.Backtick, match.group('terminator')

540

541 tokens = {

542 'root': [

543 # heading with '#' prefix (atx-style)

544 (r'(^#[^#].+)(\n)', bygroups(Generic.Heading, Text)),

545 # subheading with '#' prefix (atx-style)

546 (r'(^#{2,6}[^#].+)(\n)', bygroups(Generic.Subheading, Text)),

547 # heading with '=' underlines (Setext-style)

548 (r'^(.+)(\n)(=+)(\n)', bygroups(Generic.Heading, Text, Generic.Heading, Text)),

549 # subheading with '-' underlines (Setext-style)

550 (r'^(.+)(\n)(-+)(\n)', bygroups(Generic.Subheading, Text, Generic.Subheading, Text)),

551 # task list

552 (r'^(\s*)([*-] )(\[[ xX]\])( .+\n)',

553 bygroups(Whitespace, Keyword, Keyword, using(this, state='inline'))),

554 # bulleted list

555 (r'^(\s*)([*-])(\s)(.+\n)',

556 bygroups(Whitespace, Keyword, Whitespace, using(this, state='inline'))),

557 # numbered list

558 (r'^(\s*)([0-9]+\.)( .+\n)',

559 bygroups(Whitespace, Keyword, using(this, state='inline'))),

560 # quote

561 (r'^(\s*>\s)(.+\n)', bygroups(Keyword, Generic.Emph)),

562 # code block fenced by 3 backticks

563 (r'^(\s*```\n[\w\W]*?^\s*```$\n)', String.Backtick),

564 # code block with language

565 # Some tools include extra stuff after the language name, just

566 # highlight that as text. For example: https://docs.enola.dev/use/execmd

567 (r'''(?x)

568 ^(?P<initial>\s*```)

569 (?P<lang>[\w\-]+)

570 (?P<afterlang>

571 (?P<whitespace>[^\S\n]+)

572 (?P<extra>.*))?

573 (?P<newline>\n)

574 (?P<code>(.|\n)*?)

575 (?P<terminator>^\s*```$\n)

576 ''',

577 _handle_codeblock),

578

579 include('inline'),

580 ],

581 'inline': [

582 # escape

583 (r'\\.', Text),

584 # inline code

585 (r'([^`]?)(`[^`\n]+`)', bygroups(Text, String.Backtick)),

586 # warning: the following rules eat outer tags.

587 # eg. **foo _bar_ baz** => foo and baz are not recognized as bold

588 # bold fenced by '**'

589 (r'([^\*]?)(\*\*[^* \n][^*\n]*\*\*)', bygroups(Text, Generic.Strong)),

590 # bold fenced by '__'

591 (r'([^_]?)(__[^_ \n][^_\n]*__)', bygroups(Text, Generic.Strong)),

592 # italics fenced by '*'

593 (r'([^\*]?)(\*[^* \n][^*\n]*\*)', bygroups(Text, Generic.Emph)),

594 # italics fenced by '_'

595 (r'([^_]?)(_[^_ \n][^_\n]*_)', bygroups(Text, Generic.Emph)),

596 # strikethrough

597 (r'([^~]?)(~~[^~ \n][^~\n]*~~)', bygroups(Text, Generic.Deleted)),

598 # mentions and topics (twitter and github stuff)

599 (r'[@#][\w/:]+', Name.Entity),

600 # (image?) links eg: ![Image of Yaktocat](https://octodex.github.com/images/yaktocat.png)

601 (r'(!?\[)([^]]+)(\])($)([^)]+)($)',

602 bygroups(Text, Name.Tag, Text, Text, Name.Attribute, Text)),

603 # reference-style links, e.g.:

604 # [an example][id]

605 # [id]: http://example.com/

606 (r'(\[)([^]]+)(\])(\[)([^]]*)(\])',

607 bygroups(Text, Name.Tag, Text, Text, Name.Label, Text)),

608 (r'^(\s*\[)([^]]*)(\]:\s*)(.+)',

609 bygroups(Text, Name.Label, Text, Name.Attribute)),

610

611 # general text, must come last!

612 (r'[^\\\s]+', Text),

613 (r'.', Text),

614 ],

615 }

616

617 def __init__(self, **options):

618 self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)

619 RegexLexer.__init__(self, **options)

620

621class OrgLexer(RegexLexer):

622 """

623 For Org Mode markup.

624 """

625 name = 'Org Mode'

626 url = 'https://orgmode.org'

627 aliases = ['org', 'orgmode', 'org-mode']

628 filenames = ['*.org']

629 mimetypes = ["text/org"]

630 version_added = '2.18'

631

632 def _inline(start, end):

633 return rf'(?<!\w){start}(.|\n(?!\n))+?{end}(?!\w)'

634

635 tokens = {

636 'root': [

637 (r'^# .*', Comment.Single),

638

639 # Headings

640 (r'^(\* )(COMMENT)( .*)',

641 bygroups(Generic.Heading, Comment.Preproc, Generic.Heading)),

642 (r'^(\*\*+ )(COMMENT)( .*)',

643 bygroups(Generic.Subheading, Comment.Preproc, Generic.Subheading)),

644 (r'^(\* )(DONE)( .*)',

645 bygroups(Generic.Heading, Generic.Deleted, Generic.Heading)),

646 (r'^(\*\*+ )(DONE)( .*)',

647 bygroups(Generic.Subheading, Generic.Deleted, Generic.Subheading)),

648 (r'^(\* )(TODO)( .*)',

649 bygroups(Generic.Heading, Generic.Error, Generic.Heading)),

650 (r'^(\*\*+ )(TODO)( .*)',

651 bygroups(Generic.Subheading, Generic.Error, Generic.Subheading)),

652

653 (r'^(\* .+?)( :[a-zA-Z0-9_@:]+:)?$', bygroups(Generic.Heading, Generic.Emph)),

654 (r'^(\*\*+ .+?)( :[a-zA-Z0-9_@:]+:)?$', bygroups(Generic.Subheading, Generic.Emph)),

655

656 # Unordered lists items, including TODO items and description items

657 (r'^(?:( *)([+-] )|( +)(\* ))(\[[ X-]\])?(.+ ::)?',

658 bygroups(Whitespace, Keyword, Whitespace, Keyword, Generic.Prompt, Name.Label)),

659

660 # Ordered list items

661 (r'^( *)([0-9]+[.)])( \[@[0-9]+\])?', bygroups(Whitespace, Keyword, Generic.Emph)),

662

663 # Dynamic blocks

664 (r'(?i)^( *#\+begin: *)((?:.|\n)*?)(^ *#\+end: *$)',

665 bygroups(Operator.Word, using(this), Operator.Word)),

666

667 # Comment blocks

668 (r'(?i)^( *#\+begin_comment *\n)((?:.|\n)*?)(^ *#\+end_comment *$)',

669 bygroups(Operator.Word, Comment.Multiline, Operator.Word)),

670

671 # Source code blocks

672 # TODO: language-dependent syntax highlighting (see Markdown lexer)

673 (r'(?i)^( *#\+begin_src .*)((?:.|\n)*?)(^ *#\+end_src *$)',

674 bygroups(Operator.Word, Text, Operator.Word)),

675

676 # Other blocks

677 (r'(?i)^( *#\+begin_\w+)( *\n)((?:.|\n)*?)(^ *#\+end_\w+)( *$)',

678 bygroups(Operator.Word, Whitespace, Text, Operator.Word, Whitespace)),

679

680 # Keywords

681 (r'^(#\+\w+:)(.*)$', bygroups(Name.Namespace, Text)),

682

683 # Properties and drawers

684 (r'(?i)^( *:\w+: *\n)((?:.|\n)*?)(^ *:end: *$)',

685 bygroups(Name.Decorator, Comment.Special, Name.Decorator)),

686

687 # Line break operator

688 (r'\\\\$', Operator),

689

690 # Deadline, Scheduled, CLOSED

691 (r'(?i)^( *(?:DEADLINE|SCHEDULED): )(<.+?> *)$',

692 bygroups(Generic.Error, Literal.Date)),

693 (r'(?i)^( *CLOSED: )(\[.+?\] *)$',

694 bygroups(Generic.Deleted, Literal.Date)),

695

696 # Bold

697 (_inline(r'\*', r'\*+'), Generic.Strong),

698 # Italic

699 (_inline(r'/', r'/'), Generic.Emph),

700 # Verbatim

701 (_inline(r'=', r'='), String), # TODO token

702 # Code

703 (_inline(r'~', r'~'), String),

704 # Strikethrough

705 (_inline(r'\+', r'\+'), Generic.Deleted),

706 # Underline

707 (_inline(r'_', r'_+'), Generic.EmphStrong),

708

709 # Dates

710 (r'<.+?>', Literal.Date),

711 # Macros

712 (r'\{\{\{.+?\}\}\}', Comment.Preproc),

713 # Footnotes

714 (r'(?<!\[)\[fn:.+?\]', Name.Tag),

715 # Links

716 (r'(?s)(\[\[)(.*?)(\]\[)(.*?)(\]\])',

717 bygroups(Punctuation, Name.Attribute, Punctuation, Name.Tag, Punctuation)),

718 (r'(?s)(\[\[)(.+?)(\]\])', bygroups(Punctuation, Name.Attribute, Punctuation)),

719 (r'(<<)(.+?)(>>)', bygroups(Punctuation, Name.Attribute, Punctuation)),

720

721 # Tables

722 (r'^( *)(\|[ -].*?[ -]\|)$', bygroups(Whitespace, String)),

723

724 # Any other text

725 (r'[^#*+\-0-9:\\/=~_<{\[|\n]+', Text),

726 (r'[#*+\-0-9:\\/=~_<{\[|\n]', Text),

727 ],

728 }

729

730class TiddlyWiki5Lexer(RegexLexer):

731 """

732 For TiddlyWiki5 markup.

733 """

734 name = 'tiddler'

735 url = 'https://tiddlywiki.com/#TiddlerFiles'

736 aliases = ['tid']

737 filenames = ['*.tid']

738 mimetypes = ["text/vnd.tiddlywiki"]

739 version_added = '2.7'

740 flags = re.MULTILINE

741

742 def _handle_codeblock(self, match):

743 """

744 match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks

745 """

746 from pygments.lexers import get_lexer_by_name

747

748 # section header

749 yield match.start(1), String, match.group(1)

750 yield match.start(2), String, match.group(2)

751 yield match.start(3), Text, match.group(3)

752

753 # lookup lexer if wanted and existing

754 lexer = None

755 if self.handlecodeblocks:

756 try:

757 lexer = get_lexer_by_name(match.group(2).strip())

758 except ClassNotFound:

759 pass

760 code = match.group(4)

761

762 # no lexer for this language. handle it like it was a code block

763 if lexer is None:

764 yield match.start(4), String, code

765 return

766

767 yield from do_insertions([], lexer.get_tokens_unprocessed(code))

768

769 yield match.start(5), String, match.group(5)

770

771 def _handle_cssblock(self, match):

772 """

773 match args: 1:style tag 2:newline, 3:code, 4:closing style tag

774 """

775 from pygments.lexers import get_lexer_by_name

776

777 # section header

778 yield match.start(1), String, match.group(1)

779 yield match.start(2), String, match.group(2)

780

781 lexer = None

782 if self.handlecodeblocks:

783 try:

784 lexer = get_lexer_by_name('css')

785 except ClassNotFound:

786 pass

787 code = match.group(3)

788

789 # no lexer for this language. handle it like it was a code block

790 if lexer is None:

791 yield match.start(3), String, code

792 return

793

794 yield from do_insertions([], lexer.get_tokens_unprocessed(code))

795

796 yield match.start(4), String, match.group(4)

797

798 tokens = {

799 'root': [

800 # title in metadata section

801 (r'^(title)(:\s)(.+\n)', bygroups(Keyword, Text, Generic.Heading)),

802 # headings

803 (r'^(!)([^!].+\n)', bygroups(Generic.Heading, Text)),

804 (r'^(!{2,6})(.+\n)', bygroups(Generic.Subheading, Text)),

805 # bulleted or numbered lists or single-line block quotes

806 # (can be mixed)

807 (r'^(\s*)([*#>]+)(\s*)(.+\n)',

808 bygroups(Text, Keyword, Text, using(this, state='inline'))),

809 # multi-line block quotes

810 (r'^(<<<.*\n)([\w\W]*?)(^<<<.*$)', bygroups(String, Text, String)),

811 # table header

812 (r'^(\|.*?\|h)$', bygroups(Generic.Strong)),

813 # table footer or caption

814 (r'^(\|.*?\|[cf])$', bygroups(Generic.Emph)),

815 # table class

816 (r'^(\|.*?\|k)$', bygroups(Name.Tag)),

817 # definitions

818 (r'^(;.*)$', bygroups(Generic.Strong)),

819 # text block

820 (r'^(```\n)([\w\W]*?)(^```$)', bygroups(String, Text, String)),

821 # code block with language

822 (r'^(```)(\w+)(\n)([\w\W]*?)(^```$)', _handle_codeblock),

823 # CSS style block

824 (r'^(<style>)(\n)([\w\W]*?)(^</style>$)', _handle_cssblock),

825

826 include('keywords'),

827 include('inline'),

828 ],

829 'keywords': [

830 (words((

831 '\\define', '\\end', 'caption', 'created', 'modified', 'tags',

832 'title', 'type'), prefix=r'^', suffix=r'\b'),

833 Keyword),

834 ],

835 'inline': [

836 # escape

837 (r'\\.', Text),

838 # created or modified date

839 (r'\d{17}', Number.Integer),

840 # italics

841 (r'(\s)(//[^/]+//)((?=\W|\n))',

842 bygroups(Text, Generic.Emph, Text)),

843 # superscript

844 (r'(\s)(\^\^[^\^]+\^\^)', bygroups(Text, Generic.Emph)),

845 # subscript

846 (r'(\s)(,,[^,]+,,)', bygroups(Text, Generic.Emph)),

847 # underscore

848 (r'(\s)(__[^_]+__)', bygroups(Text, Generic.Strong)),

849 # bold

850 (r"(\s)(''[^']+'')((?=\W|\n))",

851 bygroups(Text, Generic.Strong, Text)),

852 # strikethrough

853 (r'(\s)(~~[^~]+~~)((?=\W|\n))',

854 bygroups(Text, Generic.Deleted, Text)),

855 # TiddlyWiki variables

856 (r'<<[^>]+>>', Name.Tag),

857 (r'\$\$[^$]+\$\$', Name.Tag),

858 (r'\$$[^)]+$\$', Name.Tag),

859 # TiddlyWiki style or class

860 (r'^@@.*$', Name.Tag),

861 # HTML tags

862 (r'</?[^>]+>', Name.Tag),

863 # inline code

864 (r'`[^`]+`', String.Backtick),

865 # HTML escaped symbols

866 (r'&\S*?;', String.Regex),

867 # Wiki links

868 (r'(\[{2})([^]\|]+)(\]{2})', bygroups(Text, Name.Tag, Text)),

869 # External links

870 (r'(\[{2})([^]\|]+)(\|)([^]\|]+)(\]{2})',

871 bygroups(Text, Name.Tag, Text, Name.Attribute, Text)),

872 # Transclusion

873 (r'(\{{2})([^}]+)(\}{2})', bygroups(Text, Name.Tag, Text)),

874 # URLs

875 (r'(\b.?.?tps?://[^\s"]+)', bygroups(Name.Attribute)),

876

877 # general text, must come last!

878 (r'[\w]+', Text),

879 (r'.', Text)

880 ],

881 }

882

883 def __init__(self, **options):

884 self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)

885 RegexLexer.__init__(self, **options)

886

887

888class WikitextLexer(RegexLexer):

889 """

890 For MediaWiki Wikitext.

891

892 Parsing Wikitext is tricky, and results vary between different MediaWiki

893 installations, so we only highlight common syntaxes (built-in or from

894 popular extensions), and also assume templates produce no unbalanced

895 syntaxes.

896 """

897 name = 'Wikitext'

898 url = 'https://www.mediawiki.org/wiki/Wikitext'

899 aliases = ['wikitext', 'mediawiki']

900 filenames = []

901 mimetypes = ['text/x-wiki']

902 version_added = '2.15'

903 flags = re.MULTILINE

904

905 def nowiki_tag_rules(tag_name):

906 return [

907 (rf'(?i)(</)({tag_name})(\s*)(>)', bygroups(Punctuation,

908 Name.Tag, Whitespace, Punctuation), '#pop'),

909 include('entity'),

910 include('text'),

911 ]

912

913 def plaintext_tag_rules(tag_name):

914 return [

915 (rf'(?si)(.*?)(</)({tag_name})(\s*)(>)', bygroups(Text,

916 Punctuation, Name.Tag, Whitespace, Punctuation), '#pop'),

917 ]

918

919 def delegate_tag_rules(tag_name, lexer, **lexer_kwargs):

920 return [

921 (rf'(?i)(</)({tag_name})(\s*)(>)', bygroups(Punctuation,

922 Name.Tag, Whitespace, Punctuation), '#pop'),

923 (rf'(?si).+?(?=</{tag_name}\s*>)', using(lexer, **lexer_kwargs)),

924 ]

925

926 def text_rules(token):

927 return [

928 (r'\w+', token),

929 (r'[^\S\n]+', token),

930 (r'(?s).', token),

931 ]

932

933 def handle_syntaxhighlight(self, match, ctx):

934 from pygments.lexers import get_lexer_by_name

935

936 attr_content = match.group()

937 start = 0

938 index = 0

939 while True:

940 index = attr_content.find('>', start)

941 # Exclude comment end (-->)

942 if attr_content[index-2:index] != '--':

943 break

944 start = index + 1

945

946 if index == -1:

947 # No tag end

948 yield from self.get_tokens_unprocessed(attr_content, stack=['root', 'attr'])

949 return

950 attr = attr_content[:index]

951 yield from self.get_tokens_unprocessed(attr, stack=['root', 'attr'])

952 yield match.start(3) + index, Punctuation, '>'

953

954 lexer = None

955 content = attr_content[index+1:]

956 lang_match = re.findall(r'\blang=("|\'|)(\w+)(\1)', attr)

957

958 if len(lang_match) >= 1:

959 # Pick the last match in case of multiple matches

960 lang = lang_match[-1][1]

961 try:

962 lexer = get_lexer_by_name(lang)

963 except ClassNotFound:

964 pass

965

966 if lexer is None:

967 yield match.start() + index + 1, Text, content

968 else:

969 yield from lexer.get_tokens_unprocessed(content)

970

971 def handle_score(self, match, ctx):

972 attr_content = match.group()

973 start = 0

974 index = 0

975 while True:

976 index = attr_content.find('>', start)

977 # Exclude comment end (-->)

978 if attr_content[index-2:index] != '--':

979 break

980 start = index + 1

981

982 if index == -1:

983 # No tag end

984 yield from self.get_tokens_unprocessed(attr_content, stack=['root', 'attr'])

985 return

986 attr = attr_content[:index]

987 content = attr_content[index+1:]

988 yield from self.get_tokens_unprocessed(attr, stack=['root', 'attr'])

989 yield match.start(3) + index, Punctuation, '>'

990

991 lang_match = re.findall(r'\blang=("|\'|)(\w+)(\1)', attr)

992 # Pick the last match in case of multiple matches

993 lang = lang_match[-1][1] if len(lang_match) >= 1 else 'lilypond'

994

995 if lang == 'lilypond': # Case sensitive

996 yield from LilyPondLexer().get_tokens_unprocessed(content)

997 else: # ABC

998 # FIXME: Use ABC lexer in the future

999 yield match.start() + index + 1, Text, content

1000

1001 # a-z removed to prevent linter from complaining, REMEMBER to use (?i)

1002 title_char = r' %!"$&\'()*,\-./0-9:;=?@A-Z\\\^_`~+\u0080-\uFFFF'

1003 nbsp_char = r'(?:\t| |&\#0*160;|&\#[Xx]0*[Aa]0;|[ \xA0\u1680\u2000-\u200A\u202F\u205F\u3000])'

1004 link_address = r'(?:[0-9.]+|\[[0-9a-f:.]+\]|[^\x00-\x20"<>\[\]\x7F\xA0\u1680\u2000-\u200A\u202F\u205F\u3000\uFFFD])'

1005 link_char_class = r'[^\x00-\x20"<>\[\]\x7F\xA0\u1680\u2000-\u200A\u202F\u205F\u3000\uFFFD]'

1006 double_slashes_i = {

1007 '__FORCETOC__', '__NOCONTENTCONVERT__', '__NOCC__', '__NOEDITSECTION__', '__NOGALLERY__',

1008 '__NOTITLECONVERT__', '__NOTC__', '__NOTOC__', '__TOC__',

1009 }

1010 double_slashes = {

1011 '__EXPECTUNUSEDCATEGORY__', '__HIDDENCAT__', '__INDEX__', '__NEWSECTIONLINK__',

1012 '__NOINDEX__', '__NONEWSECTIONLINK__', '__STATICREDIRECT__', '__NOGLOBAL__',

1013 '__DISAMBIG__', '__EXPECTED_UNCONNECTED_PAGE__',

1014 }

1015 protocols = {

1016 'bitcoin:', 'ftp://', 'ftps://', 'geo:', 'git://', 'gopher://', 'http://', 'https://',

1017 'irc://', 'ircs://', 'magnet:', 'mailto:', 'mms://', 'news:', 'nntp://', 'redis://',

1018 'sftp://', 'sip:', 'sips:', 'sms:', 'ssh://', 'svn://', 'tel:', 'telnet://', 'urn:',

1019 'worldwind://', 'xmpp:', '//',

1020 }

1021 non_relative_protocols = protocols - {'//'}

1022 html_tags = {

1023 'abbr', 'b', 'bdi', 'bdo', 'big', 'blockquote', 'br', 'caption', 'center', 'cite', 'code',

1024 'data', 'dd', 'del', 'dfn', 'div', 'dl', 'dt', 'em', 'font', 'h1', 'h2', 'h3', 'h4', 'h5',

1025 'h6', 'hr', 'i', 'ins', 'kbd', 'li', 'link', 'mark', 'meta', 'ol', 'p', 'q', 'rb', 'rp',

1026 'rt', 'rtc', 'ruby', 's', 'samp', 'small', 'span', 'strike', 'strong', 'sub', 'sup',

1027 'table', 'td', 'th', 'time', 'tr', 'tt', 'u', 'ul', 'var', 'wbr',

1028 }

1029 parser_tags = {

1030 'graph', 'charinsert', 'rss', 'chem', 'categorytree', 'nowiki', 'inputbox', 'math',

1031 'hiero', 'score', 'pre', 'ref', 'translate', 'imagemap', 'templatestyles', 'languages',

1032 'noinclude', 'mapframe', 'section', 'poem', 'syntaxhighlight', 'includeonly', 'tvar',

1033 'onlyinclude', 'templatedata', 'langconvert', 'timeline', 'dynamicpagelist', 'gallery',

1034 'maplink', 'ce', 'references',

1035 }

1036 variant_langs = {

1037 # ZhConverter.php

1038 'zh', 'zh-hans', 'zh-hant', 'zh-cn', 'zh-hk', 'zh-mo', 'zh-my', 'zh-sg', 'zh-tw',

1039 # WuuConverter.php

1040 'wuu', 'wuu-hans', 'wuu-hant',

1041 # UzConverter.php

1042 'uz', 'uz-latn', 'uz-cyrl',

1043 # TlyConverter.php

1044 'tly', 'tly-cyrl',

1045 # TgConverter.php

1046 'tg', 'tg-latn',

1047 # SrConverter.php

1048 'sr', 'sr-ec', 'sr-el',

1049 # ShiConverter.php

1050 'shi', 'shi-tfng', 'shi-latn',

1051 # ShConverter.php

1052 'sh-latn', 'sh-cyrl',

1053 # KuConverter.php

1054 'ku', 'ku-arab', 'ku-latn',

1055 # IuConverter.php

1056 'iu', 'ike-cans', 'ike-latn',

1057 # GanConverter.php

1058 'gan', 'gan-hans', 'gan-hant',

1059 # EnConverter.php

1060 'en', 'en-x-piglatin',

1061 # CrhConverter.php

1062 'crh', 'crh-cyrl', 'crh-latn',

1063 # BanConverter.php

1064 'ban', 'ban-bali', 'ban-x-dharma', 'ban-x-palmleaf', 'ban-x-pku',

1065 }

1066 magic_vars_i = {

1067 'ARTICLEPATH', 'INT', 'PAGEID', 'SCRIPTPATH', 'SERVER', 'SERVERNAME', 'STYLEPATH',

1068 }

1069 magic_vars = {

1070 '!', '=', 'BASEPAGENAME', 'BASEPAGENAMEE', 'CASCADINGSOURCES', 'CONTENTLANGUAGE',

1071 'CONTENTLANG', 'CURRENTDAY', 'CURRENTDAY2', 'CURRENTDAYNAME', 'CURRENTDOW', 'CURRENTHOUR',

1072 'CURRENTMONTH', 'CURRENTMONTH2', 'CURRENTMONTH1', 'CURRENTMONTHABBREV', 'CURRENTMONTHNAME',

1073 'CURRENTMONTHNAMEGEN', 'CURRENTTIME', 'CURRENTTIMESTAMP', 'CURRENTVERSION', 'CURRENTWEEK',

1074 'CURRENTYEAR', 'DIRECTIONMARK', 'DIRMARK', 'FULLPAGENAME', 'FULLPAGENAMEE', 'LOCALDAY',

1075 'LOCALDAY2', 'LOCALDAYNAME', 'LOCALDOW', 'LOCALHOUR', 'LOCALMONTH', 'LOCALMONTH2',

1076 'LOCALMONTH1', 'LOCALMONTHABBREV', 'LOCALMONTHNAME', 'LOCALMONTHNAMEGEN', 'LOCALTIME',

1077 'LOCALTIMESTAMP', 'LOCALWEEK', 'LOCALYEAR', 'NAMESPACE', 'NAMESPACEE', 'NAMESPACENUMBER',

1078 'NUMBEROFACTIVEUSERS', 'NUMBEROFADMINS', 'NUMBEROFARTICLES', 'NUMBEROFEDITS',

1079 'NUMBEROFFILES', 'NUMBEROFPAGES', 'NUMBEROFUSERS', 'PAGELANGUAGE', 'PAGENAME', 'PAGENAMEE',

1080 'REVISIONDAY', 'REVISIONDAY2', 'REVISIONID', 'REVISIONMONTH', 'REVISIONMONTH1',

1081 'REVISIONSIZE', 'REVISIONTIMESTAMP', 'REVISIONUSER', 'REVISIONYEAR', 'ROOTPAGENAME',

1082 'ROOTPAGENAMEE', 'SITENAME', 'SUBJECTPAGENAME', 'ARTICLEPAGENAME', 'SUBJECTPAGENAMEE',

1083 'ARTICLEPAGENAMEE', 'SUBJECTSPACE', 'ARTICLESPACE', 'SUBJECTSPACEE', 'ARTICLESPACEE',

1084 'SUBPAGENAME', 'SUBPAGENAMEE', 'TALKPAGENAME', 'TALKPAGENAMEE', 'TALKSPACE', 'TALKSPACEE',

1085 }

1086 parser_functions_i = {

1087 'ANCHORENCODE', 'BIDI', 'CANONICALURL', 'CANONICALURLE', 'FILEPATH', 'FORMATNUM',

1088 'FULLURL', 'FULLURLE', 'GENDER', 'GRAMMAR', 'INT', r'\#LANGUAGE', 'LC', 'LCFIRST', 'LOCALURL',

1089 'LOCALURLE', 'NS', 'NSE', 'PADLEFT', 'PADRIGHT', 'PAGEID', 'PLURAL', 'UC', 'UCFIRST',

1090 'URLENCODE',

1091 }

1092 parser_functions = {

1093 'BASEPAGENAME', 'BASEPAGENAMEE', 'CASCADINGSOURCES', 'DEFAULTSORT', 'DEFAULTSORTKEY',

1094 'DEFAULTCATEGORYSORT', 'FULLPAGENAME', 'FULLPAGENAMEE', 'NAMESPACE', 'NAMESPACEE',

1095 'NAMESPACENUMBER', 'NUMBERINGROUP', 'NUMINGROUP', 'NUMBEROFACTIVEUSERS', 'NUMBEROFADMINS',

1096 'NUMBEROFARTICLES', 'NUMBEROFEDITS', 'NUMBEROFFILES', 'NUMBEROFPAGES', 'NUMBEROFUSERS',

1097 'PAGENAME', 'PAGENAMEE', 'PAGESINCATEGORY', 'PAGESINCAT', 'PAGESIZE', 'PROTECTIONEXPIRY',

1098 'PROTECTIONLEVEL', 'REVISIONDAY', 'REVISIONDAY2', 'REVISIONID', 'REVISIONMONTH',

1099 'REVISIONMONTH1', 'REVISIONTIMESTAMP', 'REVISIONUSER', 'REVISIONYEAR', 'ROOTPAGENAME',

1100 'ROOTPAGENAMEE', 'SUBJECTPAGENAME', 'ARTICLEPAGENAME', 'SUBJECTPAGENAMEE',

1101 'ARTICLEPAGENAMEE', 'SUBJECTSPACE', 'ARTICLESPACE', 'SUBJECTSPACEE', 'ARTICLESPACEE',

1102 'SUBPAGENAME', 'SUBPAGENAMEE', 'TALKPAGENAME', 'TALKPAGENAMEE', 'TALKSPACE', 'TALKSPACEE',

1103 'INT', 'DISPLAYTITLE', 'PAGESINNAMESPACE', 'PAGESINNS',

1104 }

1105

1106 tokens = {

1107 'root': [

1108 # Redirects

1109 (r"""(?xi)

1110 (\A\s*?)(\#REDIRECT:?) # may contain a colon

1111 (\s+)(\[\[) (?=[^\]\n]* \]\]$)

1112 """,

1113 bygroups(Whitespace, Keyword, Whitespace, Punctuation), 'redirect-inner'),

1114 # Subheadings

1115 (r'^(={2,6})(.+?)(\1)(\s*$\n)',

1116 bygroups(Generic.Subheading, Generic.Subheading, Generic.Subheading, Whitespace)),

1117 # Headings

1118 (r'^(=.+?=)(\s*$\n)',

1119 bygroups(Generic.Heading, Whitespace)),

1120 # Double-slashed magic words

1121 (words(double_slashes_i, prefix=r'(?i)'), Name.Function.Magic),

1122 (words(double_slashes), Name.Function.Magic),

1123 # Raw URLs

1124 (r'(?i)\b(?:{}){}{}*'.format('|'.join(protocols),

1125 link_address, link_char_class), Name.Label),

1126 # Magic links

1127 (rf'\b(?:RFC|PMID){nbsp_char}+[0-9]+\b',

1128 Name.Function.Magic),

1129 (r"""(?x)

1130 \bISBN {nbsp_char}

1131 (?: 97[89] {nbsp_dash}? )?

1132 (?: [0-9] {nbsp_dash}? ){{9}} # escape format()

1133 [0-9Xx]\b

1134 """.format(nbsp_char=nbsp_char, nbsp_dash=f'(?:-|{nbsp_char})'), Name.Function.Magic),

1135 include('list'),

1136 include('inline'),

1137 include('text'),

1138 ],

1139 'redirect-inner': [

1140 (r'(\]\])(\s*?\n)', bygroups(Punctuation, Whitespace), '#pop'),

1141 (r'(\#)([^#]*?)', bygroups(Punctuation, Name.Label)),

1142 (rf'(?i)[{title_char}]+', Name.Tag),

1143 ],

1144 'list': [

1145 # Description lists

1146 (r'^;', Keyword, 'dt'),

1147 # Ordered lists, unordered lists and indents

1148 (r'^[#:*]+', Keyword),

1149 # Horizontal rules

1150 (r'^-{4,}', Keyword),

1151 ],

1152 'inline': [

1153 # Signatures

1154 (r'~{3,5}', Keyword),

1155 # Entities

1156 include('entity'),

1157 # Bold & italic

1158 (r"('')(''')(?!')", bygroups(Generic.Emph,

1159 Generic.EmphStrong), 'inline-italic-bold'),

1160 (r"'''(?!')", Generic.Strong, 'inline-bold'),

1161 (r"''(?!')", Generic.Emph, 'inline-italic'),

1162 # Comments & parameters & templates

1163 include('replaceable'),

1164 # Media links

1165 (

1166 r"""(?xi)

1167 (\[\[)

1168 (File|Image) (:)

1169 ((?: [{}] | \{{{{2,3}}[^{{}}]*?\}}{{2,3}} |  )*)

1170 (?: (\#) ([{}]*?) )?

1171 """.format(title_char, f'{title_char}#'),

1172 bygroups(Punctuation, Name.Namespace, Punctuation,

1173 using(this, state=['wikilink-name']), Punctuation, Name.Label),

1174 'medialink-inner'

1175 ),

1176 # Wikilinks

1177 (

1178 r"""(?xi)

1179 (\[\[)(?!{}) # Should not contain URLs

1180 (?: ([{}]*) (:))?

1181 ((?: [{}] | \{{{{2,3}}[^{{}}]*?\}}{{2,3}} |  )*?)

1182 (?: (\#) ([{}]*?) )?

1183 (\]\])

1184 """.format('|'.join(protocols), title_char.replace('/', ''),

1185 title_char, f'{title_char}#'),

1186 bygroups(Punctuation, Name.Namespace, Punctuation,

1187 using(this, state=['wikilink-name']), Punctuation, Name.Label, Punctuation)

1188 ),

1189 (

1190 r"""(?xi)

1191 (\[\[)(?!{})

1192 (?: ([{}]*) (:))?

1193 ((?: [{}] | \{{{{2,3}}[^{{}}]*?\}}{{2,3}} |  )*?)

1194 (?: (\#) ([{}]*?) )?

1195 (\|)

1196 """.format('|'.join(protocols), title_char.replace('/', ''),

1197 title_char, f'{title_char}#'),

1198 bygroups(Punctuation, Name.Namespace, Punctuation,

1199 using(this, state=['wikilink-name']), Punctuation, Name.Label, Punctuation),

1200 'wikilink-inner'

1201 ),

1202 # External links

1203 (

1204 r"""(?xi)

1205 (\[)

1206 ((?:{}) {} {}*)

1207 (\s*)

1208 """.format('|'.join(protocols), link_address, link_char_class),

1209 bygroups(Punctuation, Name.Label, Whitespace),

1210 'extlink-inner'

1211 ),

1212 # Tables

1213 (r'^(:*)(\s*?)(\{\|)([^\n]*)$', bygroups(Keyword,

1214 Whitespace, Punctuation, using(this, state=['root', 'attr'])), 'table'),

1215 # HTML tags

1216 (r'(?i)(<)({})\b'.format('|'.join(html_tags)),

1217 bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'),

1218 (r'(?i)(</)({})\b(\s*)(>)'.format('|'.join(html_tags)),

1219 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),

1220 # <nowiki>

1221 (r'(?i)(<)(nowiki)\b', bygroups(Punctuation,

1222 Name.Tag), ('tag-nowiki', 'tag-inner')),

1223 # <pre>

1224 (r'(?i)(<)(pre)\b', bygroups(Punctuation,

1225 Name.Tag), ('tag-pre', 'tag-inner')),

1226 # <categorytree>

1227 (r'(?i)(<)(categorytree)\b', bygroups(

1228 Punctuation, Name.Tag), ('tag-categorytree', 'tag-inner')),

1229 # <hiero>

1230 (r'(?i)(<)(hiero)\b', bygroups(Punctuation,

1231 Name.Tag), ('tag-hiero', 'tag-inner')),

1232 # <math>

1233 (r'(?i)(<)(math)\b', bygroups(Punctuation,

1234 Name.Tag), ('tag-math', 'tag-inner')),

1235 # <chem>

1236 (r'(?i)(<)(chem)\b', bygroups(Punctuation,

1237 Name.Tag), ('tag-chem', 'tag-inner')),

1238 # <ce>

1239 (r'(?i)(<)(ce)\b', bygroups(Punctuation,

1240 Name.Tag), ('tag-ce', 'tag-inner')),

1241 # <charinsert>

1242 (r'(?i)(<)(charinsert)\b', bygroups(

1243 Punctuation, Name.Tag), ('tag-charinsert', 'tag-inner')),

1244 # <templatedata>

1245 (r'(?i)(<)(templatedata)\b', bygroups(

1246 Punctuation, Name.Tag), ('tag-templatedata', 'tag-inner')),

1247 # <gallery>

1248 (r'(?i)(<)(gallery)\b', bygroups(

1249 Punctuation, Name.Tag), ('tag-gallery', 'tag-inner')),

1250 # <graph>

1251 (r'(?i)(<)(gallery)\b', bygroups(

1252 Punctuation, Name.Tag), ('tag-graph', 'tag-inner')),

1253 # <dynamicpagelist>

1254 (r'(?i)(<)(dynamicpagelist)\b', bygroups(

1255 Punctuation, Name.Tag), ('tag-dynamicpagelist', 'tag-inner')),

1256 # <inputbox>

1257 (r'(?i)(<)(inputbox)\b', bygroups(

1258 Punctuation, Name.Tag), ('tag-inputbox', 'tag-inner')),

1259 # <rss>

1260 (r'(?i)(<)(rss)\b', bygroups(

1261 Punctuation, Name.Tag), ('tag-rss', 'tag-inner')),

1262 # <imagemap>

1263 (r'(?i)(<)(imagemap)\b', bygroups(

1264 Punctuation, Name.Tag), ('tag-imagemap', 'tag-inner')),

1265 # <syntaxhighlight>

1266 (r'(?i)(</)(syntaxhighlight)\b(\s*)(>)',

1267 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),

1268 (r'(?si)(<)(syntaxhighlight)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)',

1269 bygroups(Punctuation, Name.Tag, handle_syntaxhighlight)),

1270 # <syntaxhighlight>: Fallback case for self-closing tags

1271 (r'(?i)(<)(syntaxhighlight)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups(

1272 Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)),

1273 # <source>

1274 (r'(?i)(</)(source)\b(\s*)(>)',

1275 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),

1276 (r'(?si)(<)(source)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)',

1277 bygroups(Punctuation, Name.Tag, handle_syntaxhighlight)),

1278 # <source>: Fallback case for self-closing tags

1279 (r'(?i)(<)(source)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups(

1280 Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)),

1281 # <score>

1282 (r'(?i)(</)(score)\b(\s*)(>)',

1283 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),

1284 (r'(?si)(<)(score)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)',

1285 bygroups(Punctuation, Name.Tag, handle_score)),

1286 # <score>: Fallback case for self-closing tags

1287 (r'(?i)(<)(score)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups(

1288 Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)),

1289 # Other parser tags

1290 (r'(?i)(<)({})\b'.format('|'.join(parser_tags)),

1291 bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'),

1292 (r'(?i)(</)({})\b(\s*)(>)'.format('|'.join(parser_tags)),

1293 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),

1294 # LanguageConverter markups

1295 (

1296 r"""(?xi)

1297 (-\{{) # Use {{ to escape format()

1298 ([^|]) (\|)

1299 (?:

1300 (?: ([^;]*?) (=>))?

1301 (\s* (?:{variants}) \s*) (:)

1302 )?

1303 """.format(variants='|'.join(variant_langs)),

1304 bygroups(Punctuation, Keyword, Punctuation,

1305 using(this, state=['root', 'lc-raw']),

1306 Operator, Name.Label, Punctuation),

1307 'lc-inner'

1308 ),

1309 # LanguageConverter markups: composite conversion grammar

1310 (

1311 r"""(?xi)

1312 (-\{)

1313 ([a-z\s;-]*?) (\|)

1314 """,

1315 bygroups(Punctuation,

1316 using(this, state=['root', 'lc-flag']),

1317 Punctuation),

1318 'lc-raw'

1319 ),

1320 # LanguageConverter markups: fallbacks

1321 (

1322 r"""(?xi)

1323 (-\{{) (?!\{{) # Use {{ to escape format()

1324 (?: (\s* (?:{variants}) \s*) (:))?

1325 """.format(variants='|'.join(variant_langs)),

1326 bygroups(Punctuation, Name.Label, Punctuation),

1327 'lc-inner'

1328 ),

1329 ],

1330 'wikilink-name': [

1331 include('replaceable'),

1332 (r'[^{<]+', Name.Tag),

1333 (r'(?s).', Name.Tag),

1334 ],

1335 'wikilink-inner': [

1336 # Quit in case of another wikilink

1337 (r'(?=\[\[)', Punctuation, '#pop'),

1338 (r'\]\]', Punctuation, '#pop'),

1339 include('inline'),

1340 include('text'),

1341 ],

1342 'medialink-inner': [

1343 (r'\]\]', Punctuation, '#pop'),

1344 (r'(\|)([^\n=|]*)(=)',

1345 bygroups(Punctuation, Name.Attribute, Operator)),

1346 (r'\|', Punctuation),

1347 include('inline'),

1348 include('text'),

1349 ],

1350 'quote-common': [

1351 # Quit in case of link/template endings

1352 (r'(?=\]\]|\{\{|\}\})', Punctuation, '#pop'),

1353 (r'\n', Text, '#pop'),

1354 ],

1355 'inline-italic': [

1356 include('quote-common'),

1357 (r"('')(''')(?!')", bygroups(Generic.Emph,

1358 Generic.Strong), ('#pop', 'inline-bold')),

1359 (r"'''(?!')", Generic.EmphStrong, ('#pop', 'inline-italic-bold')),

1360 (r"''(?!')", Generic.Emph, '#pop'),

1361 include('inline'),

1362 include('text-italic'),

1363 ],

1364 'inline-bold': [

1365 include('quote-common'),

1366 (r"(''')('')(?!')", bygroups(

1367 Generic.Strong, Generic.Emph), ('#pop', 'inline-italic')),

1368 (r"'''(?!')", Generic.Strong, '#pop'),

1369 (r"''(?!')", Generic.EmphStrong, ('#pop', 'inline-bold-italic')),

1370 include('inline'),

1371 include('text-bold'),

1372 ],

1373 'inline-bold-italic': [

1374 include('quote-common'),

1375 (r"('')(''')(?!')", bygroups(Generic.EmphStrong,

1376 Generic.Strong), '#pop'),

1377 (r"'''(?!')", Generic.EmphStrong, ('#pop', 'inline-italic')),

1378 (r"''(?!')", Generic.EmphStrong, ('#pop', 'inline-bold')),

1379 include('inline'),

1380 include('text-bold-italic'),

1381 ],

1382 'inline-italic-bold': [

1383 include('quote-common'),

1384 (r"(''')('')(?!')", bygroups(

1385 Generic.EmphStrong, Generic.Emph), '#pop'),

1386 (r"'''(?!')", Generic.EmphStrong, ('#pop', 'inline-italic')),

1387 (r"''(?!')", Generic.EmphStrong, ('#pop', 'inline-bold')),

1388 include('inline'),

1389 include('text-bold-italic'),

1390 ],

1391 'lc-flag': [

1392 (r'\s+', Whitespace),

1393 (r';', Punctuation),

1394 *text_rules(Keyword),

1395 ],

1396 'lc-inner': [

1397 (

1398 r"""(?xi)

1399 (;)

1400 (?: ([^;]*?) (=>))?

1401 (\s* (?:{variants}) \s*) (:)

1402 """.format(variants='|'.join(variant_langs)),

1403 bygroups(Punctuation, using(this, state=['root', 'lc-raw']),

1404 Operator, Name.Label, Punctuation)

1405 ),

1406 (r';?\s*?\}-', Punctuation, '#pop'),

1407 include('inline'),

1408 include('text'),

1409 ],

1410 'lc-raw': [

1411 (r'\}-', Punctuation, '#pop'),

1412 include('inline'),

1413 include('text'),

1414 ],

1415 'replaceable': [

1416 # Comments

1417 (r'|\Z)', Comment.Multiline),

1418 # Parameters

1419 (

1420 r"""(?x)

1421 (\{{3})

1422 ([^|]*?)

1423 (?=\}{3}|\|)

1424 """,

1425 bygroups(Punctuation, Name.Variable),

1426 'parameter-inner',

1427 ),

1428 # Magic variables

1429 (r'(?i)(\{{\{{)(\s*)({})(\s*)(\}}\}})'.format('|'.join(magic_vars_i)),

1430 bygroups(Punctuation, Whitespace, Name.Function, Whitespace, Punctuation)),

1431 (r'(\{{\{{)(\s*)({})(\s*)(\}}\}})'.format('|'.join(magic_vars)),

1432 bygroups(Punctuation, Whitespace, Name.Function, Whitespace, Punctuation)),

1433 # Parser functions & templates

1434 (r'\{\{', Punctuation, 'template-begin-space'),

1435 # <tvar> legacy syntax

1436 (r'(?i)(<)(tvar)\b(\|)([^>]*?)(>)', bygroups(Punctuation,

1437 Name.Tag, Punctuation, String, Punctuation)),

1438 (r'</>', Punctuation, '#pop'),

1439 # <tvar>

1440 (r'(?i)(<)(tvar)\b', bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'),

1441 (r'(?i)(</)(tvar)\b(\s*)(>)',

1442 bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),

1443 ],

1444 'parameter-inner': [

1445 (r'\}{3}', Punctuation, '#pop'),

1446 (r'\|', Punctuation),

1447 include('inline'),

1448 include('text'),

1449 ],

1450 'template-begin-space': [

1451 # Templates allow line breaks at the beginning, and due to how MediaWiki handles

1452 # comments, an extra state is required to handle things like {{\n\n name}}

1453 (r'|\Z)', Comment.Multiline),

1454 (r'\s+', Whitespace),

1455 # Parser functions

1456 (

1457 r'(?i)(\#[{}]*?|{})(:)'.format(title_char,

1458 '|'.join(parser_functions_i)),

1459 bygroups(Name.Function, Punctuation), ('#pop', 'template-inner')

1460 ),

1461 (

1462 r'({})(:)'.format('|'.join(parser_functions)),

1463 bygroups(Name.Function, Punctuation), ('#pop', 'template-inner')

1464 ),

1465 # Templates

1466 (

1467 rf'(?i)([{title_char}]*?)(:)',

1468 bygroups(Name.Namespace, Punctuation), ('#pop', 'template-name')

1469 ),

1470 default(('#pop', 'template-name'),),

1471 ],

1472 'template-name': [

1473 (r'(\s*?)(\|)', bygroups(Text, Punctuation), ('#pop', 'template-inner')),

1474 (r'\}\}', Punctuation, '#pop'),

1475 (r'\n', Text, '#pop'),

1476 include('replaceable'),

1477 *text_rules(Name.Tag),

1478 ],

1479 'template-inner': [

1480 (r'\}\}', Punctuation, '#pop'),

1481 (r'\|', Punctuation),

1482 (

1483 r"""(?x)

1484 (?<=\|)

1485 ( (?: (?! \{\{ | \}\} )[^=\|<])*? ) # Exclude templates and tags

1486 (=)

1487 """,

1488 bygroups(Name.Label, Operator)

1489 ),

1490 include('inline'),

1491 include('text'),

1492 ],

1493 'table': [

1494 # Use [ \t\n\r\0\x0B] instead of \s to follow PHP trim() behavior

1495 # Endings

1496 (r'^([ \t\n\r\0\x0B]*?)(\|\})',

1497 bygroups(Whitespace, Punctuation), '#pop'),

1498 # Table rows

1499 (r'^([ \t\n\r\0\x0B]*?)(\|-+)(.*)$', bygroups(Whitespace, Punctuation,

1500 using(this, state=['root', 'attr']))),

1501 # Captions

1502 (

1503 r"""(?x)

1504 ^([ \t\n\r\0\x0B]*?)(\|\+)

1505 # Exclude links, template and tags

1506 (?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|) )?

1507 (.*?)$

1508 """,

1509 bygroups(Whitespace, Punctuation, using(this, state=[

1510 'root', 'attr']), Punctuation, Generic.Heading),

1511 ),

1512 # Table data

1513 (

1514 r"""(?x)

1515 ( ^(?:[ \t\n\r\0\x0B]*?)\| | \|\| )

1516 (?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|)(?!\|) )?

1517 """,

1518 bygroups(Punctuation, using(this, state=[

1519 'root', 'attr']), Punctuation),

1520 ),

1521 # Table headers

1522 (

1523 r"""(?x)

1524 ( ^(?:[ \t\n\r\0\x0B]*?)! )

1525 (?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|)(?!\|) )?

1526 """,

1527 bygroups(Punctuation, using(this, state=[

1528 'root', 'attr']), Punctuation),

1529 'table-header',

1530 ),

1531 include('list'),

1532 include('inline'),

1533 include('text'),

1534 ],

1535 'table-header': [

1536 # Requires another state for || handling inside headers

1537 (r'\n', Text, '#pop'),

1538 (

1539 r"""(?x)

1540 (!!|\|\|)

1541 (?:

1542 ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )

1543 (\|)(?!\|)

1544 )?

1545 """,

1546 bygroups(Punctuation, using(this, state=[

1547 'root', 'attr']), Punctuation)

1548 ),

1549 *text_rules(Generic.Subheading),

1550 ],

1551 'entity': [

1552 (r'&\S*?;', Name.Entity),

1553 ],

1554 'dt': [

1555 (r'\n', Text, '#pop'),

1556 include('inline'),

1557 (r':', Keyword, '#pop'),

1558 include('text'),

1559 ],

1560 'extlink-inner': [

1561 (r'\]', Punctuation, '#pop'),

1562 include('inline'),

1563 include('text'),

1564 ],

1565 'nowiki-ish': [

1566 include('entity'),

1567 include('text'),

1568 ],

1569 'attr': [

1570 include('replaceable'),

1571 (r'\s+', Whitespace),

1572 (r'(=)(\s*)(")', bygroups(Operator, Whitespace, String.Double), 'attr-val-2'),

1573 (r"(=)(\s*)(')", bygroups(Operator, Whitespace, String.Single), 'attr-val-1'),

1574 (r'(=)(\s*)', bygroups(Operator, Whitespace), 'attr-val-0'),

1575 (r'[\w:-]+', Name.Attribute),

1576

1577 ],

1578 'attr-val-0': [

1579 (r'\s', Whitespace, '#pop'),

1580 include('replaceable'),

1581 *text_rules(String),

1582 ],

1583 'attr-val-1': [

1584 (r"'", String.Single, '#pop'),

1585 include('replaceable'),

1586 *text_rules(String.Single),

1587 ],

1588 'attr-val-2': [

1589 (r'"', String.Double, '#pop'),

1590 include('replaceable'),

1591 *text_rules(String.Double),

1592 ],

1593 'tag-inner-ordinary': [

1594 (r'/?\s*>', Punctuation, '#pop'),

1595 include('tag-attr'),

1596 ],

1597 'tag-inner': [

1598 # Return to root state for self-closing tags

1599 (r'/\s*>', Punctuation, '#pop:2'),

1600 (r'\s*>', Punctuation, '#pop'),

1601 include('tag-attr'),

1602 ],

1603 # There states below are just like their non-tag variants, the key difference is

1604 # they forcibly quit when encountering tag closing markup

1605 'tag-attr': [

1606 include('replaceable'),

1607 (r'\s+', Whitespace),

1608 (r'(=)(\s*)(")', bygroups(Operator,

1609 Whitespace, String.Double), 'tag-attr-val-2'),

1610 (r"(=)(\s*)(')", bygroups(Operator,

1611 Whitespace, String.Single), 'tag-attr-val-1'),

1612 (r'(=)(\s*)', bygroups(Operator, Whitespace), 'tag-attr-val-0'),

1613 (r'[\w:-]+', Name.Attribute),

1614

1615 ],

1616 'tag-attr-val-0': [

1617 (r'\s', Whitespace, '#pop'),

1618 (r'/?>', Punctuation, '#pop:2'),

1619 include('replaceable'),

1620 *text_rules(String),

1621 ],

1622 'tag-attr-val-1': [

1623 (r"'", String.Single, '#pop'),

1624 (r'/?>', Punctuation, '#pop:2'),

1625 include('replaceable'),

1626 *text_rules(String.Single),

1627 ],

1628 'tag-attr-val-2': [

1629 (r'"', String.Double, '#pop'),

1630 (r'/?>', Punctuation, '#pop:2'),

1631 include('replaceable'),

1632 *text_rules(String.Double),

1633 ],

1634 'tag-nowiki': nowiki_tag_rules('nowiki'),

1635 'tag-pre': nowiki_tag_rules('pre'),

1636 'tag-categorytree': plaintext_tag_rules('categorytree'),

1637 'tag-dynamicpagelist': plaintext_tag_rules('dynamicpagelist'),

1638 'tag-hiero': plaintext_tag_rules('hiero'),

1639 'tag-inputbox': plaintext_tag_rules('inputbox'),

1640 'tag-imagemap': plaintext_tag_rules('imagemap'),

1641 'tag-charinsert': plaintext_tag_rules('charinsert'),

1642 'tag-timeline': plaintext_tag_rules('timeline'),

1643 'tag-gallery': plaintext_tag_rules('gallery'),

1644 'tag-graph': plaintext_tag_rules('graph'),

1645 'tag-rss': plaintext_tag_rules('rss'),

1646 'tag-math': delegate_tag_rules('math', TexLexer, state='math'),

1647 'tag-chem': delegate_tag_rules('chem', TexLexer, state='math'),

1648 'tag-ce': delegate_tag_rules('ce', TexLexer, state='math'),

1649 'tag-templatedata': delegate_tag_rules('templatedata', JsonLexer),

1650 'text-italic': text_rules(Generic.Emph),

1651 'text-bold': text_rules(Generic.Strong),

1652 'text-bold-italic': text_rules(Generic.EmphStrong),

1653 'text': text_rules(Text),

1654 }