Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pygments/lexers/haskell.py: 70%

1"""

2 pygments.lexers.haskell

3 ~~~~~~~~~~~~~~~~~~~~~~~

5 Lexers for Haskell and related languages.

8 :license: BSD, see LICENSE for details.

9"""

11import re

13from pygments.lexer import Lexer, RegexLexer, bygroups, do_insertions, \

14 default, include, inherit, line_re

15from pygments.token import Text, Comment, Operator, Keyword, Name, String, \

16 Number, Punctuation, Generic, Whitespace

17from pygments import unistring as uni

19__all__ = ['HaskellLexer', 'HspecLexer', 'IdrisLexer', 'AgdaLexer', 'CryptolLexer',

20 'LiterateHaskellLexer', 'LiterateIdrisLexer', 'LiterateAgdaLexer',

21 'LiterateCryptolLexer', 'KokaLexer']

24class HaskellLexer(RegexLexer):

25 """

26 A Haskell lexer based on the lexemes defined in the Haskell 98 Report.

27 """

28 name = 'Haskell'

29 url = 'https://www.haskell.org/'

30 aliases = ['haskell', 'hs']

31 filenames = ['*.hs']

32 mimetypes = ['text/x-haskell']

33 version_added = '0.8'

35 reserved = ('case', 'class', 'data', 'default', 'deriving', 'do', 'else',

36 'family', 'if', 'in', 'infix[lr]?', 'instance',

37 'let', 'newtype', 'of', 'then', 'type', 'where', '_')

38 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',

39 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',

40 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',

41 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')

43 tokens = {

44 'root': [

45 # Whitespace:

46 (r'\s+', Whitespace),

47 # (r'--\s*|.*$', Comment.Doc),

48 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),

49 (r'\{-', Comment.Multiline, 'comment'),

50 # Lexemes:

51 # Identifiers

52 (r'\bimport\b', Keyword.Reserved, 'import'),

53 (r'\bmodule\b', Keyword.Reserved, 'module'),

54 (r'\berror\b', Name.Exception),

55 (r'\b({})(?!\')\b'.format('|'.join(reserved)), Keyword.Reserved),

56 (r"'[^\\]'", String.Char), # character literal

57 (r"'\\.'", String.Char), # escape character literal (e.g. '\n')

58 (r'^[_' + uni.Ll + r'][\w\']*', Name.Function),

59 (r"'?[_" + uni.Ll + r"][\w']*", Name),

60 (r"('')?[" + uni.Lu + r"][\w\']*", Keyword.Type),

61 (r"(')[" + uni.Lu + r"][\w\']*", Keyword.Type),

62 (r"(')\[[^\]]*\]", Keyword.Type), # tuples and lists get special treatment in GHC

63 (r"(')$[^)]*$", Keyword.Type), # ..

64 (r"(')[:!#$%&*+.\\/<=>?@^|~-]+", Keyword.Type), # promoted type operators

65 # Operators

66 (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator

67 (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials

68 (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators

69 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators

70 # Numbers

71 (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*_*[pP][+-]?\d(_*\d)*', Number.Float),

72 (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*\.[\da-fA-F](_*[\da-fA-F])*'

73 r'(_*[pP][+-]?\d(_*\d)*)?', Number.Float),

74 (r'\d(_*\d)*_*[eE][+-]?\d(_*\d)*', Number.Float),

75 (r'\d(_*\d)*\.\d(_*\d)*(_*[eE][+-]?\d(_*\d)*)?', Number.Float),

76 (r'0[bB]_*[01](_*[01])*', Number.Bin),

77 (r'0[oO]_*[0-7](_*[0-7])*', Number.Oct),

78 (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*', Number.Hex),

79 (r'\d(_*\d)*', Number.Integer),

80 # Character/String Literals

81 (r"'", String.Char, 'character'),

82 (r'"', String, 'string'),

83 # Special

84 (r'\[\]', Keyword.Type),

85 (r'', Name.Builtin),

86 (r'[][(),;`{}]', Punctuation),

87 ],

88 'import': [

89 # Import statements

90 (r'\s+', Whitespace),

91 (r'"', String, 'string'),

92 # after "funclist" state

93 (r'\)', Punctuation, '#pop'),

94 (r'qualified\b', Keyword),

95 # import X as Y

96 (r'([' + uni.Lu + r'][\w.]*)(\s+)(as)(\s+)([' + uni.Lu + r'][\w.]*)',

97 bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Name), '#pop'),

98 # import X hiding (functions)

99 (r'([' + uni.Lu + r'][\w.]*)(\s+)(hiding)(\s+)(\()',

100 bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Punctuation), 'funclist'),

101 # import X (functions)

102 (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()',

103 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),

104 # import X

105 (r'[\w.]+', Name.Namespace, '#pop'),

106 ],

107 'module': [

108 (r'\s+', Whitespace),

109 (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()',

110 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),

111 (r'[' + uni.Lu + r'][\w.]*', Name.Namespace, '#pop'),

112 ],

113 'funclist': [

114 (r'\s+', Whitespace),

115 (r'[' + uni.Lu + r']\w*', Keyword.Type),

116 (r'(_[\w\']+|[' + uni.Ll + r'][\w\']*)', Name.Function),

117 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),

118 (r'\{-', Comment.Multiline, 'comment'),

119 (r',', Punctuation),

120 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),

121 # (HACK, but it makes sense to push two instances, believe me)

122 (r'\(', Punctuation, ('funclist', 'funclist')),

123 (r'\)', Punctuation, '#pop:2'),

124 ],

125 # NOTE: the next four states are shared in the AgdaLexer; make sure

126 # any change is compatible with Agda as well or copy over and change

127 'comment': [

128 # Multiline Comments

129 (r'[^-{}]+', Comment.Multiline),

130 (r'\{-', Comment.Multiline, '#push'),

131 (r'-\}', Comment.Multiline, '#pop'),

132 (r'[-{}]', Comment.Multiline),

133 ],

134 'character': [

135 # Allows multi-chars, incorrectly.

136 (r"[^\\']'", String.Char, '#pop'),

137 (r"\\", String.Escape, 'escape'),

138 ("'", String.Char, '#pop'),

139 ],

140 'string': [

141 (r'[^\\"]+', String),

142 (r"\\", String.Escape, 'escape'),

143 ('"', String, '#pop'),

144 ],

145 'escape': [

146 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),

147 (r'\^[][' + uni.Lu + r'@^_]', String.Escape, '#pop'),

148 ('|'.join(ascii), String.Escape, '#pop'),

149 (r'o[0-7]+', String.Escape, '#pop'),

150 (r'x[\da-fA-F]+', String.Escape, '#pop'),

151 (r'\d+', String.Escape, '#pop'),

152 (r'(\s+)(\\)', bygroups(Whitespace, String.Escape), '#pop'),

153 ],

154 }

155

156

157class HspecLexer(HaskellLexer):

158 """

159 A Haskell lexer with support for Hspec constructs.

160 """

161

162 name = 'Hspec'

163 aliases = ['hspec']

164 filenames = ['*Spec.hs']

165 mimetypes = []

166 version_added = '2.4'

167

168 tokens = {

169 'root': [

170 (r'(it)(\s*)("[^"]*")', bygroups(Text, Whitespace, String.Doc)),

171 (r'(describe)(\s*)("[^"]*")', bygroups(Text, Whitespace, String.Doc)),

172 (r'(context)(\s*)("[^"]*")', bygroups(Text, Whitespace, String.Doc)),

173 inherit,

174 ],

175 }

176

177

178class IdrisLexer(RegexLexer):

179 """

180 A lexer for the dependently typed programming language Idris.

181

182 Based on the Haskell and Agda Lexer.

183 """

184 name = 'Idris'

185 url = 'https://www.idris-lang.org/'

186 aliases = ['idris', 'idr']

187 filenames = ['*.idr']

188 mimetypes = ['text/x-idris']

189 version_added = '2.0'

190

191 reserved = ('case', 'class', 'data', 'default', 'using', 'do', 'else',

192 'if', 'in', 'infix[lr]?', 'instance', 'rewrite', 'auto',

193 'namespace', 'codata', 'mutual', 'private', 'public', 'abstract',

194 'total', 'partial',

195 'interface', 'implementation', 'export', 'covering', 'constructor',

196 'let', 'proof', 'of', 'then', 'static', 'where', '_', 'with',

197 'pattern', 'term', 'syntax', 'prefix',

198 'postulate', 'parameters', 'record', 'dsl', 'impossible', 'implicit',

199 'tactics', 'intros', 'intro', 'compute', 'refine', 'exact', 'trivial')

200

201 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',

202 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',

203 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',

204 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')

205

206 directives = ('lib', 'link', 'flag', 'include', 'hide', 'freeze', 'access',

207 'default', 'logging', 'dynamic', 'name', 'error_handlers', 'language')

208

209 tokens = {

210 'root': [

211 # Comments

212 (r'^(\s*)(%({}))'.format('|'.join(directives)),

213 bygroups(Whitespace, Keyword.Reserved)),

214 (r'(\s*)(--(?![!#$%&*+./<=>?@^|_~:\\]).*?)$', bygroups(Whitespace, Comment.Single)),

215 (r'(\s*)(\|{3}.*?)$', bygroups(Whitespace, Comment.Single)),

216 (r'(\s*)(\{-)', bygroups(Whitespace, Comment.Multiline), 'comment'),

217 # Declaration

218 (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)',

219 bygroups(Whitespace, Name.Function, Whitespace, Operator.Word, Whitespace)),

220 # Identifiers

221 (r'\b({})(?!\')\b'.format('|'.join(reserved)), Keyword.Reserved),

222 (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Whitespace), 'module'),

223 (r"('')?[A-Z][\w\']*", Keyword.Type),

224 (r'[a-z][\w\']*', Text),

225 # Special Symbols

226 (r'(<-|::|->|=>|=)', Operator.Word), # specials

227 (r'([(){}\[\]:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials

228 # Numbers

229 (r'\d+[eE][+-]?\d+', Number.Float),

230 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),

231 (r'0[xX][\da-fA-F]+', Number.Hex),

232 (r'\d+', Number.Integer),

233 # Strings

234 (r"'", String.Char, 'character'),

235 (r'"', String, 'string'),

236 (r'[^\s(){}]+', Text),

237 (r'\s+?', Whitespace), # Whitespace

238 ],

239 'module': [

240 (r'\s+', Whitespace),

241 (r'([A-Z][\w.]*)(\s+)(\()',

242 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),

243 (r'[A-Z][\w.]*', Name.Namespace, '#pop'),

244 ],

245 'funclist': [

246 (r'\s+', Whitespace),

247 (r'[A-Z]\w*', Keyword.Type),

248 (r'(_[\w\']+|[a-z][\w\']*)', Name.Function),

249 (r'--.*$', Comment.Single),

250 (r'\{-', Comment.Multiline, 'comment'),

251 (r',', Punctuation),

252 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),

253 # (HACK, but it makes sense to push two instances, believe me)

254 (r'\(', Punctuation, ('funclist', 'funclist')),

255 (r'\)', Punctuation, '#pop:2'),

256 ],

257 # NOTE: the next four states are shared in the AgdaLexer; make sure

258 # any change is compatible with Agda as well or copy over and change

259 'comment': [

260 # Multiline Comments

261 (r'[^-{}]+', Comment.Multiline),

262 (r'\{-', Comment.Multiline, '#push'),

263 (r'-\}', Comment.Multiline, '#pop'),

264 (r'[-{}]', Comment.Multiline),

265 ],

266 'character': [

267 # Allows multi-chars, incorrectly.

268 (r"[^\\']", String.Char),

269 (r"\\", String.Escape, 'escape'),

270 ("'", String.Char, '#pop'),

271 ],

272 'string': [

273 (r'[^\\"]+', String),

274 (r"\\", String.Escape, 'escape'),

275 ('"', String, '#pop'),

276 ],

277 'escape': [

278 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),

279 (r'\^[][A-Z@^_]', String.Escape, '#pop'),

280 ('|'.join(ascii), String.Escape, '#pop'),

281 (r'o[0-7]+', String.Escape, '#pop'),

282 (r'x[\da-fA-F]+', String.Escape, '#pop'),

283 (r'\d+', String.Escape, '#pop'),

284 (r'(\s+)(\\)', bygroups(Whitespace, String.Escape), '#pop')

285 ],

286 }

287

288

289class AgdaLexer(RegexLexer):

290 """

291 For the Agda dependently typed functional programming language and

292 proof assistant.

293 """

294

295 name = 'Agda'

296 url = 'http://wiki.portal.chalmers.se/agda/pmwiki.php'

297 aliases = ['agda']

298 filenames = ['*.agda']

299 mimetypes = ['text/x-agda']

300 version_added = '2.0'

301

302 reserved = (

303 'abstract', 'codata', 'coinductive', 'constructor', 'data', 'do',

304 'eta-equality', 'field', 'forall', 'hiding', 'in', 'inductive', 'infix',

305 'infixl', 'infixr', 'instance', 'interleaved', 'let', 'macro', 'mutual',

306 'no-eta-equality', 'opaque', 'open', 'overlap', 'pattern', 'postulate', 'primitive',

307 'private', 'quote', 'quoteTerm', 'record', 'renaming', 'rewrite',

308 'syntax', 'tactic', 'unfolding', 'unquote', 'unquoteDecl', 'unquoteDef', 'using',

309 'variable', 'where', 'with',

310 )

311

312 tokens = {

313 'root': [

314 # Declaration

315 (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)',

316 bygroups(Whitespace, Name.Function, Whitespace,

317 Operator.Word, Whitespace)),

318 # Comments

319 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),

320 (r'\{-', Comment.Multiline, 'comment'),

321 # Holes

322 (r'\{!', Comment.Directive, 'hole'),

323 # Lexemes:

324 # Identifiers

325 (r'\b({})(?!\')\b'.format('|'.join(reserved)), Keyword.Reserved),

326 (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Whitespace),

327 'module'),

328 (r'\b(Set|Prop)[\u2080-\u2089]*\b', Keyword.Type),

329 # Special Symbols

330 (r'($|$|\{|\})', Operator),

331 (r'(\.{1,3}|\||\u03BB|\u2200|\u2192|:|=|->)', Operator.Word),

332 # Numbers

333 (r'\d+[eE][+-]?\d+', Number.Float),

334 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),

335 (r'0[xX][\da-fA-F]+', Number.Hex),

336 (r'\d+', Number.Integer),

337 # Strings

338 (r"'", String.Char, 'character'),

339 (r'"', String, 'string'),

340 (r'[^\s(){}]+', Text),

341 (r'\s+?', Whitespace), # Whitespace

342 ],

343 'hole': [

344 # Holes

345 (r'[^!{}]+', Comment.Directive),

346 (r'\{!', Comment.Directive, '#push'),

347 (r'!\}', Comment.Directive, '#pop'),

348 (r'[!{}]', Comment.Directive),

349 ],

350 'module': [

351 (r'\{-', Comment.Multiline, 'comment'),

352 (r'[a-zA-Z][\w.\']*', Name, '#pop'),

353 (r'[\W0-9_]+', Text)

354 ],

355 'comment': HaskellLexer.tokens['comment'],

356 'character': HaskellLexer.tokens['character'],

357 'string': HaskellLexer.tokens['string'],

358 'escape': HaskellLexer.tokens['escape']

359 }

360

361

362class CryptolLexer(RegexLexer):

363 """

364 FIXME: A Cryptol2 lexer based on the lexemes defined in the Haskell 98 Report.

365 """

366 name = 'Cryptol'

367 aliases = ['cryptol', 'cry']

368 filenames = ['*.cry']

369 mimetypes = ['text/x-cryptol']

370 url = 'https://www.cryptol.net'

371 version_added = '2.0'

372

373 reserved = ('Arith', 'Bit', 'Cmp', 'False', 'Inf', 'True', 'else',

374 'export', 'extern', 'fin', 'if', 'import', 'inf', 'lg2',

375 'max', 'min', 'module', 'newtype', 'pragma', 'property',

376 'then', 'type', 'where', 'width')

377 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',

378 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',

379 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',

380 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')

381

382 tokens = {

383 'root': [

384 # Whitespace:

385 (r'\s+', Whitespace),

386 # (r'--\s*|.*$', Comment.Doc),

387 (r'//.*$', Comment.Single),

388 (r'/\*', Comment.Multiline, 'comment'),

389 # Lexemes:

390 # Identifiers

391 (r'\bimport\b', Keyword.Reserved, 'import'),

392 (r'\bmodule\b', Keyword.Reserved, 'module'),

393 (r'\berror\b', Name.Exception),

394 (r'\b({})(?!\')\b'.format('|'.join(reserved)), Keyword.Reserved),

395 (r'^[_a-z][\w\']*', Name.Function),

396 (r"'?[_a-z][\w']*", Name),

397 (r"('')?[A-Z][\w\']*", Keyword.Type),

398 # Operators

399 (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator

400 (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials

401 (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators

402 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators

403 # Numbers

404 (r'\d+[eE][+-]?\d+', Number.Float),

405 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),

406 (r'0[oO][0-7]+', Number.Oct),

407 (r'0[xX][\da-fA-F]+', Number.Hex),

408 (r'\d+', Number.Integer),

409 # Character/String Literals

410 (r"'", String.Char, 'character'),

411 (r'"', String, 'string'),

412 # Special

413 (r'\[\]', Keyword.Type),

414 (r'', Name.Builtin),

415 (r'[][(),;`{}]', Punctuation),

416 ],

417 'import': [

418 # Import statements

419 (r'\s+', Whitespace),

420 (r'"', String, 'string'),

421 # after "funclist" state

422 (r'\)', Punctuation, '#pop'),

423 (r'qualified\b', Keyword),

424 # import X as Y

425 (r'([A-Z][\w.]*)(\s+)(as)(\s+)([A-Z][\w.]*)',

426 bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Name), '#pop'),

427 # import X hiding (functions)

428 (r'([A-Z][\w.]*)(\s+)(hiding)(\s+)(\()',

429 bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Punctuation), 'funclist'),

430 # import X (functions)

431 (r'([A-Z][\w.]*)(\s+)(\()',

432 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),

433 # import X

434 (r'[\w.]+', Name.Namespace, '#pop'),

435 ],

436 'module': [

437 (r'\s+', Whitespace),

438 (r'([A-Z][\w.]*)(\s+)(\()',

439 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),

440 (r'[A-Z][\w.]*', Name.Namespace, '#pop'),

441 ],

442 'funclist': [

443 (r'\s+', Whitespace),

444 (r'[A-Z]\w*', Keyword.Type),

445 (r'(_[\w\']+|[a-z][\w\']*)', Name.Function),

446 # TODO: these don't match the comments in docs, remove.

447 # (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),

448 # (r'{-', Comment.Multiline, 'comment'),

449 (r',', Punctuation),

450 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),

451 # (HACK, but it makes sense to push two instances, believe me)

452 (r'\(', Punctuation, ('funclist', 'funclist')),

453 (r'\)', Punctuation, '#pop:2'),

454 ],

455 'comment': [

456 # Multiline Comments

457 (r'[^/*]+', Comment.Multiline),

458 (r'/\*', Comment.Multiline, '#push'),

459 (r'\*/', Comment.Multiline, '#pop'),

460 (r'[*/]', Comment.Multiline),

461 ],

462 'character': [

463 # Allows multi-chars, incorrectly.

464 (r"[^\\']'", String.Char, '#pop'),

465 (r"\\", String.Escape, 'escape'),

466 ("'", String.Char, '#pop'),

467 ],

468 'string': [

469 (r'[^\\"]+', String),

470 (r"\\", String.Escape, 'escape'),

471 ('"', String, '#pop'),

472 ],

473 'escape': [

474 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),

475 (r'\^[][A-Z@^_]', String.Escape, '#pop'),

476 ('|'.join(ascii), String.Escape, '#pop'),

477 (r'o[0-7]+', String.Escape, '#pop'),

478 (r'x[\da-fA-F]+', String.Escape, '#pop'),

479 (r'\d+', String.Escape, '#pop'),

480 (r'(\s+)(\\)', bygroups(Whitespace, String.Escape), '#pop'),

481 ],

482 }

483

484 EXTRA_KEYWORDS = {'join', 'split', 'reverse', 'transpose', 'width',

485 'length', 'tail', '<<', '>>', '<<<', '>>>', 'const',

486 'reg', 'par', 'seq', 'ASSERT', 'undefined', 'error',

487 'trace'}

488

489 def get_tokens_unprocessed(self, text):

490 stack = ['root']

491 for index, token, value in \

492 RegexLexer.get_tokens_unprocessed(self, text, stack):

493 if token is Name and value in self.EXTRA_KEYWORDS:

494 yield index, Name.Builtin, value

495 else:

496 yield index, token, value

497

498

499class LiterateLexer(Lexer):

500 """

501 Base class for lexers of literate file formats based on LaTeX or Bird-style

502 (prefixing each code line with ">").

503

504 Additional options accepted:

505

506 `litstyle`

507 If given, must be ``"bird"`` or ``"latex"``. If not given, the style

508 is autodetected: if the first non-whitespace character in the source

509 is a backslash or percent character, LaTeX is assumed, else Bird.

510 """

511

512 bird_re = re.compile(r'(>[ \t]*)(.*\n)')

513

514 def __init__(self, baselexer, **options):

515 self.baselexer = baselexer

516 Lexer.__init__(self, **options)

517

518 def get_tokens_unprocessed(self, text):

519 style = self.options.get('litstyle')

520 if style is None:

521 style = (text.lstrip()[0:1] in '%\\') and 'latex' or 'bird'

522

523 code = ''

524 insertions = []

525 if style == 'bird':

526 # bird-style

527 for match in line_re.finditer(text):

528 line = match.group()

529 m = self.bird_re.match(line)

530 if m:

531 insertions.append((len(code),

532 [(0, Comment.Special, m.group(1))]))

533 code += m.group(2)

534 else:

535 insertions.append((len(code), [(0, Text, line)]))

536 else:

537 # latex-style

538 from pygments.lexers.markup import TexLexer

539 lxlexer = TexLexer(**self.options)

540 codelines = 0

541 latex = ''

542 for match in line_re.finditer(text):

543 line = match.group()

544 if codelines:

545 if line.lstrip().startswith('\\end{code}'):

546 codelines = 0

547 latex += line

548 else:

549 code += line

550 elif line.lstrip().startswith('\\begin{code}'):

551 codelines = 1

552 latex += line

553 insertions.append((len(code),

554 list(lxlexer.get_tokens_unprocessed(latex))))

555 latex = ''

556 else:

557 latex += line

558 insertions.append((len(code),

559 list(lxlexer.get_tokens_unprocessed(latex))))

560 yield from do_insertions(insertions, self.baselexer.get_tokens_unprocessed(code))

561

562

563class LiterateHaskellLexer(LiterateLexer):

564 """

565 For Literate Haskell (Bird-style or LaTeX) source.

566

567 Additional options accepted:

568

569 `litstyle`

570 If given, must be ``"bird"`` or ``"latex"``. If not given, the style

571 is autodetected: if the first non-whitespace character in the source

572 is a backslash or percent character, LaTeX is assumed, else Bird.

573 """

574 name = 'Literate Haskell'

575 aliases = ['literate-haskell', 'lhaskell', 'lhs']

576 filenames = ['*.lhs']

577 mimetypes = ['text/x-literate-haskell']

578 url = 'https://wiki.haskell.org/Literate_programming'

579 version_added = '0.9'

580

581 def __init__(self, **options):

582 hslexer = HaskellLexer(**options)

583 LiterateLexer.__init__(self, hslexer, **options)

584

585

586class LiterateIdrisLexer(LiterateLexer):

587 """

588 For Literate Idris (Bird-style or LaTeX) source.

589

590 Additional options accepted:

591

592 `litstyle`

593 If given, must be ``"bird"`` or ``"latex"``. If not given, the style

594 is autodetected: if the first non-whitespace character in the source

595 is a backslash or percent character, LaTeX is assumed, else Bird.

596 """

597 name = 'Literate Idris'

598 aliases = ['literate-idris', 'lidris', 'lidr']

599 filenames = ['*.lidr']

600 mimetypes = ['text/x-literate-idris']

601 url = 'https://idris2.readthedocs.io/en/latest/reference/literate.html'

602 version_added = '2.0'

603

604 def __init__(self, **options):

605 hslexer = IdrisLexer(**options)

606 LiterateLexer.__init__(self, hslexer, **options)

607

608

609class LiterateAgdaLexer(LiterateLexer):

610 """

611 For Literate Agda source.

612

613 Additional options accepted:

614

615 `litstyle`

616 If given, must be ``"bird"`` or ``"latex"``. If not given, the style

617 is autodetected: if the first non-whitespace character in the source

618 is a backslash or percent character, LaTeX is assumed, else Bird.

619 """

620 name = 'Literate Agda'

621 aliases = ['literate-agda', 'lagda']

622 filenames = ['*.lagda']

623 mimetypes = ['text/x-literate-agda']

624 url = 'https://agda.readthedocs.io/en/latest/tools/literate-programming.html'

625 version_added = '2.0'

626

627 def __init__(self, **options):

628 agdalexer = AgdaLexer(**options)

629 LiterateLexer.__init__(self, agdalexer, litstyle='latex', **options)

630

631

632class LiterateCryptolLexer(LiterateLexer):

633 """

634 For Literate Cryptol (Bird-style or LaTeX) source.

635

636 Additional options accepted:

637

638 `litstyle`

639 If given, must be ``"bird"`` or ``"latex"``. If not given, the style

640 is autodetected: if the first non-whitespace character in the source

641 is a backslash or percent character, LaTeX is assumed, else Bird.

642 """

643 name = 'Literate Cryptol'

644 aliases = ['literate-cryptol', 'lcryptol', 'lcry']

645 filenames = ['*.lcry']

646 mimetypes = ['text/x-literate-cryptol']

647 url = 'https://www.cryptol.net'

648 version_added = '2.0'

649

650 def __init__(self, **options):

651 crylexer = CryptolLexer(**options)

652 LiterateLexer.__init__(self, crylexer, **options)

653

654

655class KokaLexer(RegexLexer):

656 """

657 Lexer for the Koka language.

658 """

659

660 name = 'Koka'

661 url = 'https://koka-lang.github.io/koka/doc/index.html'

662 aliases = ['koka']

663 filenames = ['*.kk', '*.kki']

664 mimetypes = ['text/x-koka']

665 version_added = '1.6'

666

667 keywords = [

668 'infix', 'infixr', 'infixl',

669 'type', 'cotype', 'rectype', 'alias',

670 'struct', 'con',

671 'fun', 'function', 'val', 'var',

672 'external',

673 'if', 'then', 'else', 'elif', 'return', 'match',

674 'private', 'public', 'private',

675 'module', 'import', 'as',

676 'include', 'inline',

677 'rec',

678 'try', 'yield', 'enum',

679 'interface', 'instance',

680 ]

681

682 # keywords that are followed by a type

683 typeStartKeywords = [

684 'type', 'cotype', 'rectype', 'alias', 'struct', 'enum',

685 ]

686

687 # keywords valid in a type

688 typekeywords = [

689 'forall', 'exists', 'some', 'with',

690 ]

691

692 # builtin names and special names

693 builtin = [

694 'for', 'while', 'repeat',

695 'foreach', 'foreach-indexed',

696 'error', 'catch', 'finally',

697 'cs', 'js', 'file', 'ref', 'assigned',

698 ]

699

700 # symbols that can be in an operator

701 symbols = r'[$%&*+@!/\\^~=.:\-?|<>]+'

702

703 # symbol boundary: an operator keyword should not be followed by any of these

704 sboundary = '(?!' + symbols + ')'

705

706 # name boundary: a keyword should not be followed by any of these

707 boundary = r'(?![\w/])'

708

709 # koka token abstractions

710 tokenType = Name.Attribute

711 tokenTypeDef = Name.Class

712 tokenConstructor = Generic.Emph

713

714 # main lexer

715 tokens = {

716 'root': [

717 include('whitespace'),

718

719 # go into type mode

720 (r'::?' + sboundary, tokenType, 'type'),

721 (r'(alias)(\s+)([a-z]\w*)?', bygroups(Keyword, Whitespace, tokenTypeDef),

722 'alias-type'),

723 (r'(struct)(\s+)([a-z]\w*)?', bygroups(Keyword, Whitespace, tokenTypeDef),

724 'struct-type'),

725 ((r'({})'.format('|'.join(typeStartKeywords))) +

726 r'(\s+)([a-z]\w*)?', bygroups(Keyword, Whitespace, tokenTypeDef),

727 'type'),

728

729 # special sequences of tokens (we use ?: for non-capturing group as

730 # required by 'bygroups')

731 (r'(module)(\s+)(interface(?=\s))?(\s+)?((?:[a-z]\w*/)*[a-z]\w*)',

732 bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Namespace)),

733 (r'(import)(\s+)((?:[a-z]\w*/)*[a-z]\w*)'

734 r'(?:(\s*)(=)(\s*)(qualified)?(\s*)'

735 r'((?:[a-z]\w*/)*[a-z]\w*))?',

736 bygroups(Keyword, Whitespace, Name.Namespace, Whitespace, Keyword, Whitespace,

737 Keyword, Whitespace, Name.Namespace)),

738

739 (r'^(public|private)?(\s+)?(function|fun|val)'

740 r'(\s+)([a-z]\w*|$(?:' + symbols + r'|/)$)',

741 bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Function)),

742 (r'^(?:(public|private)(?=\s+external))?((?<!^)\s+)?(external)(\s+)(inline(?=\s))?(\s+)?'

743 r'([a-z]\w*|$(?:' + symbols + r'|/)$)',

744 bygroups(Keyword, Whitespace, Keyword, Whitespace, Keyword, Whitespace, Name.Function)),

745

746 # keywords

747 (r'({})'.format('|'.join(typekeywords)) + boundary, Keyword.Type),

748 (r'({})'.format('|'.join(keywords)) + boundary, Keyword),

749 (r'({})'.format('|'.join(builtin)) + boundary, Keyword.Pseudo),

750 (r'::?|:=|\->|[=.]' + sboundary, Keyword),

751

752 # names

753 (r'((?:[a-z]\w*/)*)([A-Z]\w*)',

754 bygroups(Name.Namespace, tokenConstructor)),

755 (r'((?:[a-z]\w*/)*)([a-z]\w*)', bygroups(Name.Namespace, Name)),

756 (r'((?:[a-z]\w*/)*)($(?:' + symbols + r'|/)$)',

757 bygroups(Name.Namespace, Name)),

758 (r'_\w*', Name.Variable),

759

760 # literal string

761 (r'@"', String.Double, 'litstring'),

762

763 # operators

764 (symbols + "|/(?![*/])", Operator),

765 (r'`', Operator),

766 (r'[{}()\[\];,]', Punctuation),

767

768 # literals. No check for literal characters with len > 1

769 (r'[0-9]+\.[0-9]+([eE][\-+]?[0-9]+)?', Number.Float),

770 (r'0[xX][0-9a-fA-F]+', Number.Hex),

771 (r'[0-9]+', Number.Integer),

772

773 (r"'", String.Char, 'char'),

774 (r'"', String.Double, 'string'),

775 ],

776

777 # type started by alias

778 'alias-type': [

779 (r'=', Keyword),

780 include('type')

781 ],

782

783 # type started by struct

784 'struct-type': [

785 (r'(?=$(?!,*$))', Punctuation, '#pop'),

786 include('type')

787 ],

788

789 # type started by colon

790 'type': [

791 (r'[(\[<]', tokenType, 'type-nested'),

792 include('type-content')

793 ],

794

795 # type nested in brackets: can contain parameters, comma etc.

796 'type-nested': [

797 (r'[)\]>]', tokenType, '#pop'),

798 (r'[(\[<]', tokenType, 'type-nested'),

799 (r',', tokenType),

800 (r'([a-z]\w*)(\s*)(:)(?!:)',

801 bygroups(Name, Whitespace, tokenType)), # parameter name

802 include('type-content')

803 ],

804

805 # shared contents of a type

806 'type-content': [

807 include('whitespace'),

808

809 # keywords

810 (r'({})'.format('|'.join(typekeywords)) + boundary, Keyword),

811 (r'(?=(({})'.format('|'.join(keywords)) + boundary + '))',

812 Keyword, '#pop'), # need to match because names overlap...

813

814 # kinds

815 (r'[EPHVX]' + boundary, tokenType),

816

817 # type names

818 (r'[a-z][0-9]*(?![\w/])', tokenType),

819 (r'_\w*', tokenType.Variable), # Generic.Emph

820 (r'((?:[a-z]\w*/)*)([A-Z]\w*)',

821 bygroups(Name.Namespace, tokenType)),

822 (r'((?:[a-z]\w*/)*)([a-z]\w+)',

823 bygroups(Name.Namespace, tokenType)),

824

825 # type keyword operators

826 (r'::|->|[.:|]', tokenType),

827

828 # catchall

829 default('#pop')

830 ],

831

832 # comments and literals

833 'whitespace': [

834 (r'(\n\s*)(#.*)$', bygroups(Whitespace, Comment.Preproc)),

835 (r'\s+', Whitespace),

836 (r'/\*', Comment.Multiline, 'comment'),

837 (r'//.*$', Comment.Single)

838 ],

839 'comment': [

840 (r'[^/*]+', Comment.Multiline),

841 (r'/\*', Comment.Multiline, '#push'),

842 (r'\*/', Comment.Multiline, '#pop'),

843 (r'[*/]', Comment.Multiline),

844 ],

845 'litstring': [

846 (r'[^"]+', String.Double),

847 (r'""', String.Escape),

848 (r'"', String.Double, '#pop'),

849 ],

850 'string': [

851 (r'[^\\"\n]+', String.Double),

852 include('escape-sequence'),

853 (r'["\n]', String.Double, '#pop'),

854 ],

855 'char': [

856 (r'[^\\\'\n]+', String.Char),

857 include('escape-sequence'),

858 (r'[\'\n]', String.Char, '#pop'),

859 ],

860 'escape-sequence': [

861 (r'\\[nrt\\"\']', String.Escape),

862 (r'\\x[0-9a-fA-F]{2}', String.Escape),

863 (r'\\u[0-9a-fA-F]{4}', String.Escape),

864 # Yes, \U literals are 6 hex digits.

865 (r'\\U[0-9a-fA-F]{6}', String.Escape)

866 ]

867 }