1"""
2 pygments.lexers.haskell
3 ~~~~~~~~~~~~~~~~~~~~~~~
4
5 Lexers for Haskell and related languages.
6
7 :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
10
11import re
12
13from pygments.lexer import Lexer, RegexLexer, bygroups, do_insertions, \
14 default, include, inherit, line_re
15from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
16 Number, Punctuation, Generic, Whitespace
17from pygments import unistring as uni
18
19__all__ = ['HaskellLexer', 'HspecLexer', 'IdrisLexer', 'AgdaLexer', 'CryptolLexer',
20 'LiterateHaskellLexer', 'LiterateIdrisLexer', 'LiterateAgdaLexer',
21 'LiterateCryptolLexer', 'KokaLexer']
22
23
24class HaskellLexer(RegexLexer):
25 """
26 A Haskell lexer based on the lexemes defined in the Haskell 98 Report.
27 """
28 name = 'Haskell'
29 url = 'https://www.haskell.org/'
30 aliases = ['haskell', 'hs']
31 filenames = ['*.hs']
32 mimetypes = ['text/x-haskell']
33 version_added = '0.8'
34
35 reserved = ('case', 'class', 'data', 'default', 'deriving', 'do', 'else',
36 'family', 'if', 'in', 'infix[lr]?', 'instance',
37 'let', 'newtype', 'of', 'then', 'type', 'where', '_')
38 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
39 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
40 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
41 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
42
43 tokens = {
44 'root': [
45 # Whitespace:
46 (r'\s+', Whitespace),
47 # (r'--\s*|.*$', Comment.Doc),
48 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
49 (r'\{-', Comment.Multiline, 'comment'),
50 # Lexemes:
51 # Identifiers
52 (r'\bimport\b', Keyword.Reserved, 'import'),
53 (r'\bmodule\b', Keyword.Reserved, 'module'),
54 (r'\berror\b', Name.Exception),
55 (r'\b({})(?!\')\b'.format('|'.join(reserved)), Keyword.Reserved),
56 (r"'[^\\]'", String.Char), # this has to come before the TH quote
57 (r'^[_' + uni.Ll + r'][\w\']*', Name.Function),
58 (r"'?[_" + uni.Ll + r"][\w']*", Name),
59 (r"('')?[" + uni.Lu + r"][\w\']*", Keyword.Type),
60 (r"(')[" + uni.Lu + r"][\w\']*", Keyword.Type),
61 (r"(')\[[^\]]*\]", Keyword.Type), # tuples and lists get special treatment in GHC
62 (r"(')\([^)]*\)", Keyword.Type), # ..
63 (r"(')[:!#$%&*+.\\/<=>?@^|~-]+", Keyword.Type), # promoted type operators
64 # Operators
65 (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator
66 (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials
67 (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators
68 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators
69 # Numbers
70 (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*_*[pP][+-]?\d(_*\d)*', Number.Float),
71 (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*\.[\da-fA-F](_*[\da-fA-F])*'
72 r'(_*[pP][+-]?\d(_*\d)*)?', Number.Float),
73 (r'\d(_*\d)*_*[eE][+-]?\d(_*\d)*', Number.Float),
74 (r'\d(_*\d)*\.\d(_*\d)*(_*[eE][+-]?\d(_*\d)*)?', Number.Float),
75 (r'0[bB]_*[01](_*[01])*', Number.Bin),
76 (r'0[oO]_*[0-7](_*[0-7])*', Number.Oct),
77 (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*', Number.Hex),
78 (r'\d(_*\d)*', Number.Integer),
79 # Character/String Literals
80 (r"'", String.Char, 'character'),
81 (r'"', String, 'string'),
82 # Special
83 (r'\[\]', Keyword.Type),
84 (r'\(\)', Name.Builtin),
85 (r'[][(),;`{}]', Punctuation),
86 ],
87 'import': [
88 # Import statements
89 (r'\s+', Whitespace),
90 (r'"', String, 'string'),
91 # after "funclist" state
92 (r'\)', Punctuation, '#pop'),
93 (r'qualified\b', Keyword),
94 # import X as Y
95 (r'([' + uni.Lu + r'][\w.]*)(\s+)(as)(\s+)([' + uni.Lu + r'][\w.]*)',
96 bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Name), '#pop'),
97 # import X hiding (functions)
98 (r'([' + uni.Lu + r'][\w.]*)(\s+)(hiding)(\s+)(\()',
99 bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Punctuation), 'funclist'),
100 # import X (functions)
101 (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()',
102 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),
103 # import X
104 (r'[\w.]+', Name.Namespace, '#pop'),
105 ],
106 'module': [
107 (r'\s+', Whitespace),
108 (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()',
109 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),
110 (r'[' + uni.Lu + r'][\w.]*', Name.Namespace, '#pop'),
111 ],
112 'funclist': [
113 (r'\s+', Whitespace),
114 (r'[' + uni.Lu + r']\w*', Keyword.Type),
115 (r'(_[\w\']+|[' + uni.Ll + r'][\w\']*)', Name.Function),
116 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
117 (r'\{-', Comment.Multiline, 'comment'),
118 (r',', Punctuation),
119 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
120 # (HACK, but it makes sense to push two instances, believe me)
121 (r'\(', Punctuation, ('funclist', 'funclist')),
122 (r'\)', Punctuation, '#pop:2'),
123 ],
124 # NOTE: the next four states are shared in the AgdaLexer; make sure
125 # any change is compatible with Agda as well or copy over and change
126 'comment': [
127 # Multiline Comments
128 (r'[^-{}]+', Comment.Multiline),
129 (r'\{-', Comment.Multiline, '#push'),
130 (r'-\}', Comment.Multiline, '#pop'),
131 (r'[-{}]', Comment.Multiline),
132 ],
133 'character': [
134 # Allows multi-chars, incorrectly.
135 (r"[^\\']'", String.Char, '#pop'),
136 (r"\\", String.Escape, 'escape'),
137 ("'", String.Char, '#pop'),
138 ],
139 'string': [
140 (r'[^\\"]+', String),
141 (r"\\", String.Escape, 'escape'),
142 ('"', String, '#pop'),
143 ],
144 'escape': [
145 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
146 (r'\^[][' + uni.Lu + r'@^_]', String.Escape, '#pop'),
147 ('|'.join(ascii), String.Escape, '#pop'),
148 (r'o[0-7]+', String.Escape, '#pop'),
149 (r'x[\da-fA-F]+', String.Escape, '#pop'),
150 (r'\d+', String.Escape, '#pop'),
151 (r'(\s+)(\\)', bygroups(Whitespace, String.Escape), '#pop'),
152 ],
153 }
154
155
156class HspecLexer(HaskellLexer):
157 """
158 A Haskell lexer with support for Hspec constructs.
159 """
160
161 name = 'Hspec'
162 aliases = ['hspec']
163 filenames = ['*Spec.hs']
164 mimetypes = []
165 version_added = '2.4'
166
167 tokens = {
168 'root': [
169 (r'(it)(\s*)("[^"]*")', bygroups(Text, Whitespace, String.Doc)),
170 (r'(describe)(\s*)("[^"]*")', bygroups(Text, Whitespace, String.Doc)),
171 (r'(context)(\s*)("[^"]*")', bygroups(Text, Whitespace, String.Doc)),
172 inherit,
173 ],
174 }
175
176
177class IdrisLexer(RegexLexer):
178 """
179 A lexer for the dependently typed programming language Idris.
180
181 Based on the Haskell and Agda Lexer.
182 """
183 name = 'Idris'
184 url = 'https://www.idris-lang.org/'
185 aliases = ['idris', 'idr']
186 filenames = ['*.idr']
187 mimetypes = ['text/x-idris']
188 version_added = '2.0'
189
190 reserved = ('case', 'class', 'data', 'default', 'using', 'do', 'else',
191 'if', 'in', 'infix[lr]?', 'instance', 'rewrite', 'auto',
192 'namespace', 'codata', 'mutual', 'private', 'public', 'abstract',
193 'total', 'partial',
194 'interface', 'implementation', 'export', 'covering', 'constructor',
195 'let', 'proof', 'of', 'then', 'static', 'where', '_', 'with',
196 'pattern', 'term', 'syntax', 'prefix',
197 'postulate', 'parameters', 'record', 'dsl', 'impossible', 'implicit',
198 'tactics', 'intros', 'intro', 'compute', 'refine', 'exact', 'trivial')
199
200 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
201 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
202 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
203 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
204
205 directives = ('lib', 'link', 'flag', 'include', 'hide', 'freeze', 'access',
206 'default', 'logging', 'dynamic', 'name', 'error_handlers', 'language')
207
208 tokens = {
209 'root': [
210 # Comments
211 (r'^(\s*)(%({}))'.format('|'.join(directives)),
212 bygroups(Whitespace, Keyword.Reserved)),
213 (r'(\s*)(--(?![!#$%&*+./<=>?@^|_~:\\]).*?)$', bygroups(Whitespace, Comment.Single)),
214 (r'(\s*)(\|{3}.*?)$', bygroups(Whitespace, Comment.Single)),
215 (r'(\s*)(\{-)', bygroups(Whitespace, Comment.Multiline), 'comment'),
216 # Declaration
217 (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)',
218 bygroups(Whitespace, Name.Function, Whitespace, Operator.Word, Whitespace)),
219 # Identifiers
220 (r'\b({})(?!\')\b'.format('|'.join(reserved)), Keyword.Reserved),
221 (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Whitespace), 'module'),
222 (r"('')?[A-Z][\w\']*", Keyword.Type),
223 (r'[a-z][\w\']*', Text),
224 # Special Symbols
225 (r'(<-|::|->|=>|=)', Operator.Word), # specials
226 (r'([(){}\[\]:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials
227 # Numbers
228 (r'\d+[eE][+-]?\d+', Number.Float),
229 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
230 (r'0[xX][\da-fA-F]+', Number.Hex),
231 (r'\d+', Number.Integer),
232 # Strings
233 (r"'", String.Char, 'character'),
234 (r'"', String, 'string'),
235 (r'[^\s(){}]+', Text),
236 (r'\s+?', Whitespace), # Whitespace
237 ],
238 'module': [
239 (r'\s+', Whitespace),
240 (r'([A-Z][\w.]*)(\s+)(\()',
241 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),
242 (r'[A-Z][\w.]*', Name.Namespace, '#pop'),
243 ],
244 'funclist': [
245 (r'\s+', Whitespace),
246 (r'[A-Z]\w*', Keyword.Type),
247 (r'(_[\w\']+|[a-z][\w\']*)', Name.Function),
248 (r'--.*$', Comment.Single),
249 (r'\{-', Comment.Multiline, 'comment'),
250 (r',', Punctuation),
251 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
252 # (HACK, but it makes sense to push two instances, believe me)
253 (r'\(', Punctuation, ('funclist', 'funclist')),
254 (r'\)', Punctuation, '#pop:2'),
255 ],
256 # NOTE: the next four states are shared in the AgdaLexer; make sure
257 # any change is compatible with Agda as well or copy over and change
258 'comment': [
259 # Multiline Comments
260 (r'[^-{}]+', Comment.Multiline),
261 (r'\{-', Comment.Multiline, '#push'),
262 (r'-\}', Comment.Multiline, '#pop'),
263 (r'[-{}]', Comment.Multiline),
264 ],
265 'character': [
266 # Allows multi-chars, incorrectly.
267 (r"[^\\']", String.Char),
268 (r"\\", String.Escape, 'escape'),
269 ("'", String.Char, '#pop'),
270 ],
271 'string': [
272 (r'[^\\"]+', String),
273 (r"\\", String.Escape, 'escape'),
274 ('"', String, '#pop'),
275 ],
276 'escape': [
277 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
278 (r'\^[][A-Z@^_]', String.Escape, '#pop'),
279 ('|'.join(ascii), String.Escape, '#pop'),
280 (r'o[0-7]+', String.Escape, '#pop'),
281 (r'x[\da-fA-F]+', String.Escape, '#pop'),
282 (r'\d+', String.Escape, '#pop'),
283 (r'(\s+)(\\)', bygroups(Whitespace, String.Escape), '#pop')
284 ],
285 }
286
287
288class AgdaLexer(RegexLexer):
289 """
290 For the Agda dependently typed functional programming language and
291 proof assistant.
292 """
293
294 name = 'Agda'
295 url = 'http://wiki.portal.chalmers.se/agda/pmwiki.php'
296 aliases = ['agda']
297 filenames = ['*.agda']
298 mimetypes = ['text/x-agda']
299 version_added = '2.0'
300
301 reserved = (
302 'abstract', 'codata', 'coinductive', 'constructor', 'data', 'do',
303 'eta-equality', 'field', 'forall', 'hiding', 'in', 'inductive', 'infix',
304 'infixl', 'infixr', 'instance', 'interleaved', 'let', 'macro', 'mutual',
305 'no-eta-equality', 'opaque', 'open', 'overlap', 'pattern', 'postulate', 'primitive',
306 'private', 'quote', 'quoteTerm', 'record', 'renaming', 'rewrite',
307 'syntax', 'tactic', 'unfolding', 'unquote', 'unquoteDecl', 'unquoteDef', 'using',
308 'variable', 'where', 'with',
309 )
310
311 tokens = {
312 'root': [
313 # Declaration
314 (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)',
315 bygroups(Whitespace, Name.Function, Whitespace,
316 Operator.Word, Whitespace)),
317 # Comments
318 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
319 (r'\{-', Comment.Multiline, 'comment'),
320 # Holes
321 (r'\{!', Comment.Directive, 'hole'),
322 # Lexemes:
323 # Identifiers
324 (r'\b({})(?!\')\b'.format('|'.join(reserved)), Keyword.Reserved),
325 (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Whitespace),
326 'module'),
327 (r'\b(Set|Prop)[\u2080-\u2089]*\b', Keyword.Type),
328 # Special Symbols
329 (r'(\(|\)|\{|\})', Operator),
330 (r'(\.{1,3}|\||\u03BB|\u2200|\u2192|:|=|->)', Operator.Word),
331 # Numbers
332 (r'\d+[eE][+-]?\d+', Number.Float),
333 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
334 (r'0[xX][\da-fA-F]+', Number.Hex),
335 (r'\d+', Number.Integer),
336 # Strings
337 (r"'", String.Char, 'character'),
338 (r'"', String, 'string'),
339 (r'[^\s(){}]+', Text),
340 (r'\s+?', Whitespace), # Whitespace
341 ],
342 'hole': [
343 # Holes
344 (r'[^!{}]+', Comment.Directive),
345 (r'\{!', Comment.Directive, '#push'),
346 (r'!\}', Comment.Directive, '#pop'),
347 (r'[!{}]', Comment.Directive),
348 ],
349 'module': [
350 (r'\{-', Comment.Multiline, 'comment'),
351 (r'[a-zA-Z][\w.\']*', Name, '#pop'),
352 (r'[\W0-9_]+', Text)
353 ],
354 'comment': HaskellLexer.tokens['comment'],
355 'character': HaskellLexer.tokens['character'],
356 'string': HaskellLexer.tokens['string'],
357 'escape': HaskellLexer.tokens['escape']
358 }
359
360
361class CryptolLexer(RegexLexer):
362 """
363 FIXME: A Cryptol2 lexer based on the lexemes defined in the Haskell 98 Report.
364 """
365 name = 'Cryptol'
366 aliases = ['cryptol', 'cry']
367 filenames = ['*.cry']
368 mimetypes = ['text/x-cryptol']
369 url = 'https://www.cryptol.net'
370 version_added = '2.0'
371
372 reserved = ('Arith', 'Bit', 'Cmp', 'False', 'Inf', 'True', 'else',
373 'export', 'extern', 'fin', 'if', 'import', 'inf', 'lg2',
374 'max', 'min', 'module', 'newtype', 'pragma', 'property',
375 'then', 'type', 'where', 'width')
376 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
377 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
378 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
379 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
380
381 tokens = {
382 'root': [
383 # Whitespace:
384 (r'\s+', Whitespace),
385 # (r'--\s*|.*$', Comment.Doc),
386 (r'//.*$', Comment.Single),
387 (r'/\*', Comment.Multiline, 'comment'),
388 # Lexemes:
389 # Identifiers
390 (r'\bimport\b', Keyword.Reserved, 'import'),
391 (r'\bmodule\b', Keyword.Reserved, 'module'),
392 (r'\berror\b', Name.Exception),
393 (r'\b({})(?!\')\b'.format('|'.join(reserved)), Keyword.Reserved),
394 (r'^[_a-z][\w\']*', Name.Function),
395 (r"'?[_a-z][\w']*", Name),
396 (r"('')?[A-Z][\w\']*", Keyword.Type),
397 # Operators
398 (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator
399 (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials
400 (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators
401 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators
402 # Numbers
403 (r'\d+[eE][+-]?\d+', Number.Float),
404 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
405 (r'0[oO][0-7]+', Number.Oct),
406 (r'0[xX][\da-fA-F]+', Number.Hex),
407 (r'\d+', Number.Integer),
408 # Character/String Literals
409 (r"'", String.Char, 'character'),
410 (r'"', String, 'string'),
411 # Special
412 (r'\[\]', Keyword.Type),
413 (r'\(\)', Name.Builtin),
414 (r'[][(),;`{}]', Punctuation),
415 ],
416 'import': [
417 # Import statements
418 (r'\s+', Whitespace),
419 (r'"', String, 'string'),
420 # after "funclist" state
421 (r'\)', Punctuation, '#pop'),
422 (r'qualified\b', Keyword),
423 # import X as Y
424 (r'([A-Z][\w.]*)(\s+)(as)(\s+)([A-Z][\w.]*)',
425 bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Name), '#pop'),
426 # import X hiding (functions)
427 (r'([A-Z][\w.]*)(\s+)(hiding)(\s+)(\()',
428 bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Punctuation), 'funclist'),
429 # import X (functions)
430 (r'([A-Z][\w.]*)(\s+)(\()',
431 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),
432 # import X
433 (r'[\w.]+', Name.Namespace, '#pop'),
434 ],
435 'module': [
436 (r'\s+', Whitespace),
437 (r'([A-Z][\w.]*)(\s+)(\()',
438 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),
439 (r'[A-Z][\w.]*', Name.Namespace, '#pop'),
440 ],
441 'funclist': [
442 (r'\s+', Whitespace),
443 (r'[A-Z]\w*', Keyword.Type),
444 (r'(_[\w\']+|[a-z][\w\']*)', Name.Function),
445 # TODO: these don't match the comments in docs, remove.
446 # (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
447 # (r'{-', Comment.Multiline, 'comment'),
448 (r',', Punctuation),
449 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
450 # (HACK, but it makes sense to push two instances, believe me)
451 (r'\(', Punctuation, ('funclist', 'funclist')),
452 (r'\)', Punctuation, '#pop:2'),
453 ],
454 'comment': [
455 # Multiline Comments
456 (r'[^/*]+', Comment.Multiline),
457 (r'/\*', Comment.Multiline, '#push'),
458 (r'\*/', Comment.Multiline, '#pop'),
459 (r'[*/]', Comment.Multiline),
460 ],
461 'character': [
462 # Allows multi-chars, incorrectly.
463 (r"[^\\']'", String.Char, '#pop'),
464 (r"\\", String.Escape, 'escape'),
465 ("'", String.Char, '#pop'),
466 ],
467 'string': [
468 (r'[^\\"]+', String),
469 (r"\\", String.Escape, 'escape'),
470 ('"', String, '#pop'),
471 ],
472 'escape': [
473 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
474 (r'\^[][A-Z@^_]', String.Escape, '#pop'),
475 ('|'.join(ascii), String.Escape, '#pop'),
476 (r'o[0-7]+', String.Escape, '#pop'),
477 (r'x[\da-fA-F]+', String.Escape, '#pop'),
478 (r'\d+', String.Escape, '#pop'),
479 (r'(\s+)(\\)', bygroups(Whitespace, String.Escape), '#pop'),
480 ],
481 }
482
483 EXTRA_KEYWORDS = {'join', 'split', 'reverse', 'transpose', 'width',
484 'length', 'tail', '<<', '>>', '<<<', '>>>', 'const',
485 'reg', 'par', 'seq', 'ASSERT', 'undefined', 'error',
486 'trace'}
487
488 def get_tokens_unprocessed(self, text):
489 stack = ['root']
490 for index, token, value in \
491 RegexLexer.get_tokens_unprocessed(self, text, stack):
492 if token is Name and value in self.EXTRA_KEYWORDS:
493 yield index, Name.Builtin, value
494 else:
495 yield index, token, value
496
497
498class LiterateLexer(Lexer):
499 """
500 Base class for lexers of literate file formats based on LaTeX or Bird-style
501 (prefixing each code line with ">").
502
503 Additional options accepted:
504
505 `litstyle`
506 If given, must be ``"bird"`` or ``"latex"``. If not given, the style
507 is autodetected: if the first non-whitespace character in the source
508 is a backslash or percent character, LaTeX is assumed, else Bird.
509 """
510
511 bird_re = re.compile(r'(>[ \t]*)(.*\n)')
512
513 def __init__(self, baselexer, **options):
514 self.baselexer = baselexer
515 Lexer.__init__(self, **options)
516
517 def get_tokens_unprocessed(self, text):
518 style = self.options.get('litstyle')
519 if style is None:
520 style = (text.lstrip()[0:1] in '%\\') and 'latex' or 'bird'
521
522 code = ''
523 insertions = []
524 if style == 'bird':
525 # bird-style
526 for match in line_re.finditer(text):
527 line = match.group()
528 m = self.bird_re.match(line)
529 if m:
530 insertions.append((len(code),
531 [(0, Comment.Special, m.group(1))]))
532 code += m.group(2)
533 else:
534 insertions.append((len(code), [(0, Text, line)]))
535 else:
536 # latex-style
537 from pygments.lexers.markup import TexLexer
538 lxlexer = TexLexer(**self.options)
539 codelines = 0
540 latex = ''
541 for match in line_re.finditer(text):
542 line = match.group()
543 if codelines:
544 if line.lstrip().startswith('\\end{code}'):
545 codelines = 0
546 latex += line
547 else:
548 code += line
549 elif line.lstrip().startswith('\\begin{code}'):
550 codelines = 1
551 latex += line
552 insertions.append((len(code),
553 list(lxlexer.get_tokens_unprocessed(latex))))
554 latex = ''
555 else:
556 latex += line
557 insertions.append((len(code),
558 list(lxlexer.get_tokens_unprocessed(latex))))
559 yield from do_insertions(insertions, self.baselexer.get_tokens_unprocessed(code))
560
561
562class LiterateHaskellLexer(LiterateLexer):
563 """
564 For Literate Haskell (Bird-style or LaTeX) source.
565
566 Additional options accepted:
567
568 `litstyle`
569 If given, must be ``"bird"`` or ``"latex"``. If not given, the style
570 is autodetected: if the first non-whitespace character in the source
571 is a backslash or percent character, LaTeX is assumed, else Bird.
572 """
573 name = 'Literate Haskell'
574 aliases = ['literate-haskell', 'lhaskell', 'lhs']
575 filenames = ['*.lhs']
576 mimetypes = ['text/x-literate-haskell']
577 url = 'https://wiki.haskell.org/Literate_programming'
578 version_added = '0.9'
579
580 def __init__(self, **options):
581 hslexer = HaskellLexer(**options)
582 LiterateLexer.__init__(self, hslexer, **options)
583
584
585class LiterateIdrisLexer(LiterateLexer):
586 """
587 For Literate Idris (Bird-style or LaTeX) source.
588
589 Additional options accepted:
590
591 `litstyle`
592 If given, must be ``"bird"`` or ``"latex"``. If not given, the style
593 is autodetected: if the first non-whitespace character in the source
594 is a backslash or percent character, LaTeX is assumed, else Bird.
595 """
596 name = 'Literate Idris'
597 aliases = ['literate-idris', 'lidris', 'lidr']
598 filenames = ['*.lidr']
599 mimetypes = ['text/x-literate-idris']
600 url = 'https://idris2.readthedocs.io/en/latest/reference/literate.html'
601 version_added = '2.0'
602
603 def __init__(self, **options):
604 hslexer = IdrisLexer(**options)
605 LiterateLexer.__init__(self, hslexer, **options)
606
607
608class LiterateAgdaLexer(LiterateLexer):
609 """
610 For Literate Agda source.
611
612 Additional options accepted:
613
614 `litstyle`
615 If given, must be ``"bird"`` or ``"latex"``. If not given, the style
616 is autodetected: if the first non-whitespace character in the source
617 is a backslash or percent character, LaTeX is assumed, else Bird.
618 """
619 name = 'Literate Agda'
620 aliases = ['literate-agda', 'lagda']
621 filenames = ['*.lagda']
622 mimetypes = ['text/x-literate-agda']
623 url = 'https://agda.readthedocs.io/en/latest/tools/literate-programming.html'
624 version_added = '2.0'
625
626 def __init__(self, **options):
627 agdalexer = AgdaLexer(**options)
628 LiterateLexer.__init__(self, agdalexer, litstyle='latex', **options)
629
630
631class LiterateCryptolLexer(LiterateLexer):
632 """
633 For Literate Cryptol (Bird-style or LaTeX) source.
634
635 Additional options accepted:
636
637 `litstyle`
638 If given, must be ``"bird"`` or ``"latex"``. If not given, the style
639 is autodetected: if the first non-whitespace character in the source
640 is a backslash or percent character, LaTeX is assumed, else Bird.
641 """
642 name = 'Literate Cryptol'
643 aliases = ['literate-cryptol', 'lcryptol', 'lcry']
644 filenames = ['*.lcry']
645 mimetypes = ['text/x-literate-cryptol']
646 url = 'https://www.cryptol.net'
647 version_added = '2.0'
648
649 def __init__(self, **options):
650 crylexer = CryptolLexer(**options)
651 LiterateLexer.__init__(self, crylexer, **options)
652
653
654class KokaLexer(RegexLexer):
655 """
656 Lexer for the Koka language.
657 """
658
659 name = 'Koka'
660 url = 'https://koka-lang.github.io/koka/doc/index.html'
661 aliases = ['koka']
662 filenames = ['*.kk', '*.kki']
663 mimetypes = ['text/x-koka']
664 version_added = '1.6'
665
666 keywords = [
667 'infix', 'infixr', 'infixl',
668 'type', 'cotype', 'rectype', 'alias',
669 'struct', 'con',
670 'fun', 'function', 'val', 'var',
671 'external',
672 'if', 'then', 'else', 'elif', 'return', 'match',
673 'private', 'public', 'private',
674 'module', 'import', 'as',
675 'include', 'inline',
676 'rec',
677 'try', 'yield', 'enum',
678 'interface', 'instance',
679 ]
680
681 # keywords that are followed by a type
682 typeStartKeywords = [
683 'type', 'cotype', 'rectype', 'alias', 'struct', 'enum',
684 ]
685
686 # keywords valid in a type
687 typekeywords = [
688 'forall', 'exists', 'some', 'with',
689 ]
690
691 # builtin names and special names
692 builtin = [
693 'for', 'while', 'repeat',
694 'foreach', 'foreach-indexed',
695 'error', 'catch', 'finally',
696 'cs', 'js', 'file', 'ref', 'assigned',
697 ]
698
699 # symbols that can be in an operator
700 symbols = r'[$%&*+@!/\\^~=.:\-?|<>]+'
701
702 # symbol boundary: an operator keyword should not be followed by any of these
703 sboundary = '(?!' + symbols + ')'
704
705 # name boundary: a keyword should not be followed by any of these
706 boundary = r'(?![\w/])'
707
708 # koka token abstractions
709 tokenType = Name.Attribute
710 tokenTypeDef = Name.Class
711 tokenConstructor = Generic.Emph
712
713 # main lexer
714 tokens = {
715 'root': [
716 include('whitespace'),
717
718 # go into type mode
719 (r'::?' + sboundary, tokenType, 'type'),
720 (r'(alias)(\s+)([a-z]\w*)?', bygroups(Keyword, Whitespace, tokenTypeDef),
721 'alias-type'),
722 (r'(struct)(\s+)([a-z]\w*)?', bygroups(Keyword, Whitespace, tokenTypeDef),
723 'struct-type'),
724 ((r'({})'.format('|'.join(typeStartKeywords))) +
725 r'(\s+)([a-z]\w*)?', bygroups(Keyword, Whitespace, tokenTypeDef),
726 'type'),
727
728 # special sequences of tokens (we use ?: for non-capturing group as
729 # required by 'bygroups')
730 (r'(module)(\s+)(interface(?=\s))?(\s+)?((?:[a-z]\w*/)*[a-z]\w*)',
731 bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Namespace)),
732 (r'(import)(\s+)((?:[a-z]\w*/)*[a-z]\w*)'
733 r'(?:(\s*)(=)(\s*)(qualified)?(\s*)'
734 r'((?:[a-z]\w*/)*[a-z]\w*))?',
735 bygroups(Keyword, Whitespace, Name.Namespace, Whitespace, Keyword, Whitespace,
736 Keyword, Whitespace, Name.Namespace)),
737
738 (r'^(public|private)?(\s+)?(function|fun|val)'
739 r'(\s+)([a-z]\w*|\((?:' + symbols + r'|/)\))',
740 bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Function)),
741 (r'^(?:(public|private)(?=\s+external))?((?<!^)\s+)?(external)(\s+)(inline(?=\s))?(\s+)?'
742 r'([a-z]\w*|\((?:' + symbols + r'|/)\))',
743 bygroups(Keyword, Whitespace, Keyword, Whitespace, Keyword, Whitespace, Name.Function)),
744
745 # keywords
746 (r'({})'.format('|'.join(typekeywords)) + boundary, Keyword.Type),
747 (r'({})'.format('|'.join(keywords)) + boundary, Keyword),
748 (r'({})'.format('|'.join(builtin)) + boundary, Keyword.Pseudo),
749 (r'::?|:=|\->|[=.]' + sboundary, Keyword),
750
751 # names
752 (r'((?:[a-z]\w*/)*)([A-Z]\w*)',
753 bygroups(Name.Namespace, tokenConstructor)),
754 (r'((?:[a-z]\w*/)*)([a-z]\w*)', bygroups(Name.Namespace, Name)),
755 (r'((?:[a-z]\w*/)*)(\((?:' + symbols + r'|/)\))',
756 bygroups(Name.Namespace, Name)),
757 (r'_\w*', Name.Variable),
758
759 # literal string
760 (r'@"', String.Double, 'litstring'),
761
762 # operators
763 (symbols + "|/(?![*/])", Operator),
764 (r'`', Operator),
765 (r'[{}()\[\];,]', Punctuation),
766
767 # literals. No check for literal characters with len > 1
768 (r'[0-9]+\.[0-9]+([eE][\-+]?[0-9]+)?', Number.Float),
769 (r'0[xX][0-9a-fA-F]+', Number.Hex),
770 (r'[0-9]+', Number.Integer),
771
772 (r"'", String.Char, 'char'),
773 (r'"', String.Double, 'string'),
774 ],
775
776 # type started by alias
777 'alias-type': [
778 (r'=', Keyword),
779 include('type')
780 ],
781
782 # type started by struct
783 'struct-type': [
784 (r'(?=\((?!,*\)))', Punctuation, '#pop'),
785 include('type')
786 ],
787
788 # type started by colon
789 'type': [
790 (r'[(\[<]', tokenType, 'type-nested'),
791 include('type-content')
792 ],
793
794 # type nested in brackets: can contain parameters, comma etc.
795 'type-nested': [
796 (r'[)\]>]', tokenType, '#pop'),
797 (r'[(\[<]', tokenType, 'type-nested'),
798 (r',', tokenType),
799 (r'([a-z]\w*)(\s*)(:)(?!:)',
800 bygroups(Name, Whitespace, tokenType)), # parameter name
801 include('type-content')
802 ],
803
804 # shared contents of a type
805 'type-content': [
806 include('whitespace'),
807
808 # keywords
809 (r'({})'.format('|'.join(typekeywords)) + boundary, Keyword),
810 (r'(?=(({})'.format('|'.join(keywords)) + boundary + '))',
811 Keyword, '#pop'), # need to match because names overlap...
812
813 # kinds
814 (r'[EPHVX]' + boundary, tokenType),
815
816 # type names
817 (r'[a-z][0-9]*(?![\w/])', tokenType),
818 (r'_\w*', tokenType.Variable), # Generic.Emph
819 (r'((?:[a-z]\w*/)*)([A-Z]\w*)',
820 bygroups(Name.Namespace, tokenType)),
821 (r'((?:[a-z]\w*/)*)([a-z]\w+)',
822 bygroups(Name.Namespace, tokenType)),
823
824 # type keyword operators
825 (r'::|->|[.:|]', tokenType),
826
827 # catchall
828 default('#pop')
829 ],
830
831 # comments and literals
832 'whitespace': [
833 (r'(\n\s*)(#.*)$', bygroups(Whitespace, Comment.Preproc)),
834 (r'\s+', Whitespace),
835 (r'/\*', Comment.Multiline, 'comment'),
836 (r'//.*$', Comment.Single)
837 ],
838 'comment': [
839 (r'[^/*]+', Comment.Multiline),
840 (r'/\*', Comment.Multiline, '#push'),
841 (r'\*/', Comment.Multiline, '#pop'),
842 (r'[*/]', Comment.Multiline),
843 ],
844 'litstring': [
845 (r'[^"]+', String.Double),
846 (r'""', String.Escape),
847 (r'"', String.Double, '#pop'),
848 ],
849 'string': [
850 (r'[^\\"\n]+', String.Double),
851 include('escape-sequence'),
852 (r'["\n]', String.Double, '#pop'),
853 ],
854 'char': [
855 (r'[^\\\'\n]+', String.Char),
856 include('escape-sequence'),
857 (r'[\'\n]', String.Char, '#pop'),
858 ],
859 'escape-sequence': [
860 (r'\\[nrt\\"\']', String.Escape),
861 (r'\\x[0-9a-fA-F]{2}', String.Escape),
862 (r'\\u[0-9a-fA-F]{4}', String.Escape),
863 # Yes, \U literals are 6 hex digits.
864 (r'\\U[0-9a-fA-F]{6}', String.Escape)
865 ]
866 }