1"""
2 pygments.lexers.haskell
3 ~~~~~~~~~~~~~~~~~~~~~~~
4
5 Lexers for Haskell and related languages.
6
7 :copyright: Copyright 2006-present by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
10
11import re
12
13from pygments.lexer import Lexer, RegexLexer, bygroups, do_insertions, \
14 default, include, inherit, line_re
15from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
16 Number, Punctuation, Generic, Whitespace
17from pygments import unistring as uni
18
19__all__ = ['HaskellLexer', 'HspecLexer', 'IdrisLexer', 'AgdaLexer', 'CryptolLexer',
20 'LiterateHaskellLexer', 'LiterateIdrisLexer', 'LiterateAgdaLexer',
21 'LiterateCryptolLexer', 'KokaLexer']
22
23
24class HaskellLexer(RegexLexer):
25 """
26 A Haskell lexer based on the lexemes defined in the Haskell 98 Report.
27 """
28 name = 'Haskell'
29 url = 'https://www.haskell.org/'
30 aliases = ['haskell', 'hs']
31 filenames = ['*.hs']
32 mimetypes = ['text/x-haskell']
33 version_added = '0.8'
34
35 reserved = ('case', 'class', 'data', 'default', 'deriving', 'do', 'else',
36 'family', 'if', 'in', 'infix[lr]?', 'instance',
37 'let', 'newtype', 'of', 'then', 'type', 'where', '_')
38 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
39 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
40 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
41 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
42
43 tokens = {
44 'root': [
45 # Whitespace:
46 (r'\s+', Whitespace),
47 # (r'--\s*|.*$', Comment.Doc),
48 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
49 (r'\{-', Comment.Multiline, 'comment'),
50 # Lexemes:
51 # Identifiers
52 (r'\bimport\b', Keyword.Reserved, 'import'),
53 (r'\bmodule\b', Keyword.Reserved, 'module'),
54 (r'\berror\b', Name.Exception),
55 (r'\b({})(?!\')\b'.format('|'.join(reserved)), Keyword.Reserved),
56 (r"'[^\\]'", String.Char), # character literal
57 (r"'\\.'", String.Char), # escape character literal (e.g. '\n')
58 (r'^[_' + uni.Ll + r'][\w\']*', Name.Function),
59 (r"'?[_" + uni.Ll + r"][\w']*", Name),
60 (r"('')?[" + uni.Lu + r"][\w\']*", Keyword.Type),
61 (r"(')[" + uni.Lu + r"][\w\']*", Keyword.Type),
62 (r"(')\[[^\]]*\]", Keyword.Type), # tuples and lists get special treatment in GHC
63 (r"(')\([^)]*\)", Keyword.Type), # ..
64 (r"(')[:!#$%&*+.\\/<=>?@^|~-]+", Keyword.Type), # promoted type operators
65 # Operators
66 (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator
67 (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials
68 (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators
69 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators
70 # Numbers
71 (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*_*[pP][+-]?\d(_*\d)*', Number.Float),
72 (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*\.[\da-fA-F](_*[\da-fA-F])*'
73 r'(_*[pP][+-]?\d(_*\d)*)?', Number.Float),
74 (r'\d(_*\d)*_*[eE][+-]?\d(_*\d)*', Number.Float),
75 (r'\d(_*\d)*\.\d(_*\d)*(_*[eE][+-]?\d(_*\d)*)?', Number.Float),
76 (r'0[bB]_*[01](_*[01])*', Number.Bin),
77 (r'0[oO]_*[0-7](_*[0-7])*', Number.Oct),
78 (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*', Number.Hex),
79 (r'\d(_*\d)*', Number.Integer),
80 # Character/String Literals
81 (r"'", String.Char, 'character'),
82 (r'"', String, 'string'),
83 # Special
84 (r'\[\]', Keyword.Type),
85 (r'\(\)', Name.Builtin),
86 (r'[][(),;`{}]', Punctuation),
87 ],
88 'import': [
89 # Import statements
90 (r'\s+', Whitespace),
91 (r'"', String, 'string'),
92 # after "funclist" state
93 (r'\)', Punctuation, '#pop'),
94 (r'qualified\b', Keyword),
95 # import X as Y
96 (r'([' + uni.Lu + r'][\w.]*)(\s+)(as)(\s+)([' + uni.Lu + r'][\w.]*)',
97 bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Name), '#pop'),
98 # import X hiding (functions)
99 (r'([' + uni.Lu + r'][\w.]*)(\s+)(hiding)(\s+)(\()',
100 bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Punctuation), 'funclist'),
101 # import X (functions)
102 (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()',
103 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),
104 # import X
105 (r'[\w.]+', Name.Namespace, '#pop'),
106 ],
107 'module': [
108 (r'\s+', Whitespace),
109 (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()',
110 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),
111 (r'[' + uni.Lu + r'][\w.]*', Name.Namespace, '#pop'),
112 ],
113 'funclist': [
114 (r'\s+', Whitespace),
115 (r'[' + uni.Lu + r']\w*', Keyword.Type),
116 (r'(_[\w\']+|[' + uni.Ll + r'][\w\']*)', Name.Function),
117 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
118 (r'\{-', Comment.Multiline, 'comment'),
119 (r',', Punctuation),
120 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
121 # (HACK, but it makes sense to push two instances, believe me)
122 (r'\(', Punctuation, ('funclist', 'funclist')),
123 (r'\)', Punctuation, '#pop:2'),
124 ],
125 # NOTE: the next four states are shared in the AgdaLexer; make sure
126 # any change is compatible with Agda as well or copy over and change
127 'comment': [
128 # Multiline Comments
129 (r'[^-{}]+', Comment.Multiline),
130 (r'\{-', Comment.Multiline, '#push'),
131 (r'-\}', Comment.Multiline, '#pop'),
132 (r'[-{}]', Comment.Multiline),
133 ],
134 'character': [
135 # Allows multi-chars, incorrectly.
136 (r"[^\\']'", String.Char, '#pop'),
137 (r"\\", String.Escape, 'escape'),
138 ("'", String.Char, '#pop'),
139 ],
140 'string': [
141 (r'[^\\"]+', String),
142 (r"\\", String.Escape, 'escape'),
143 ('"', String, '#pop'),
144 ],
145 'escape': [
146 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
147 (r'\^[][' + uni.Lu + r'@^_]', String.Escape, '#pop'),
148 ('|'.join(ascii), String.Escape, '#pop'),
149 (r'o[0-7]+', String.Escape, '#pop'),
150 (r'x[\da-fA-F]+', String.Escape, '#pop'),
151 (r'\d+', String.Escape, '#pop'),
152 (r'(\s+)(\\)', bygroups(Whitespace, String.Escape), '#pop'),
153 ],
154 }
155
156
157class HspecLexer(HaskellLexer):
158 """
159 A Haskell lexer with support for Hspec constructs.
160 """
161
162 name = 'Hspec'
163 aliases = ['hspec']
164 filenames = ['*Spec.hs']
165 mimetypes = []
166 version_added = '2.4'
167
168 tokens = {
169 'root': [
170 (r'(it)(\s*)("[^"]*")', bygroups(Text, Whitespace, String.Doc)),
171 (r'(describe)(\s*)("[^"]*")', bygroups(Text, Whitespace, String.Doc)),
172 (r'(context)(\s*)("[^"]*")', bygroups(Text, Whitespace, String.Doc)),
173 inherit,
174 ],
175 }
176
177
178class IdrisLexer(RegexLexer):
179 """
180 A lexer for the dependently typed programming language Idris.
181
182 Based on the Haskell and Agda Lexer.
183 """
184 name = 'Idris'
185 url = 'https://www.idris-lang.org/'
186 aliases = ['idris', 'idr']
187 filenames = ['*.idr']
188 mimetypes = ['text/x-idris']
189 version_added = '2.0'
190
191 reserved = ('case', 'class', 'data', 'default', 'using', 'do', 'else',
192 'if', 'in', 'infix[lr]?', 'instance', 'rewrite', 'auto',
193 'namespace', 'codata', 'mutual', 'private', 'public', 'abstract',
194 'total', 'partial',
195 'interface', 'implementation', 'export', 'covering', 'constructor',
196 'let', 'proof', 'of', 'then', 'static', 'where', '_', 'with',
197 'pattern', 'term', 'syntax', 'prefix',
198 'postulate', 'parameters', 'record', 'dsl', 'impossible', 'implicit',
199 'tactics', 'intros', 'intro', 'compute', 'refine', 'exact', 'trivial')
200
201 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
202 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
203 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
204 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
205
206 directives = ('lib', 'link', 'flag', 'include', 'hide', 'freeze', 'access',
207 'default', 'logging', 'dynamic', 'name', 'error_handlers', 'language')
208
209 tokens = {
210 'root': [
211 # Comments
212 (r'^(\s*)(%({}))'.format('|'.join(directives)),
213 bygroups(Whitespace, Keyword.Reserved)),
214 (r'(\s*)(--(?![!#$%&*+./<=>?@^|_~:\\]).*?)$', bygroups(Whitespace, Comment.Single)),
215 (r'(\s*)(\|{3}.*?)$', bygroups(Whitespace, Comment.Single)),
216 (r'(\s*)(\{-)', bygroups(Whitespace, Comment.Multiline), 'comment'),
217 # Declaration
218 (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)',
219 bygroups(Whitespace, Name.Function, Whitespace, Operator.Word, Whitespace)),
220 # Identifiers
221 (r'\b({})(?!\')\b'.format('|'.join(reserved)), Keyword.Reserved),
222 (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Whitespace), 'module'),
223 (r"('')?[A-Z][\w\']*", Keyword.Type),
224 (r'[a-z][\w\']*', Text),
225 # Special Symbols
226 (r'(<-|::|->|=>|=)', Operator.Word), # specials
227 (r'([(){}\[\]:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials
228 # Numbers
229 (r'\d+[eE][+-]?\d+', Number.Float),
230 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
231 (r'0[xX][\da-fA-F]+', Number.Hex),
232 (r'\d+', Number.Integer),
233 # Strings
234 (r"'", String.Char, 'character'),
235 (r'"', String, 'string'),
236 (r'[^\s(){}]+', Text),
237 (r'\s+?', Whitespace), # Whitespace
238 ],
239 'module': [
240 (r'\s+', Whitespace),
241 (r'([A-Z][\w.]*)(\s+)(\()',
242 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),
243 (r'[A-Z][\w.]*', Name.Namespace, '#pop'),
244 ],
245 'funclist': [
246 (r'\s+', Whitespace),
247 (r'[A-Z]\w*', Keyword.Type),
248 (r'(_[\w\']+|[a-z][\w\']*)', Name.Function),
249 (r'--.*$', Comment.Single),
250 (r'\{-', Comment.Multiline, 'comment'),
251 (r',', Punctuation),
252 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
253 # (HACK, but it makes sense to push two instances, believe me)
254 (r'\(', Punctuation, ('funclist', 'funclist')),
255 (r'\)', Punctuation, '#pop:2'),
256 ],
257 # NOTE: the next four states are shared in the AgdaLexer; make sure
258 # any change is compatible with Agda as well or copy over and change
259 'comment': [
260 # Multiline Comments
261 (r'[^-{}]+', Comment.Multiline),
262 (r'\{-', Comment.Multiline, '#push'),
263 (r'-\}', Comment.Multiline, '#pop'),
264 (r'[-{}]', Comment.Multiline),
265 ],
266 'character': [
267 # Allows multi-chars, incorrectly.
268 (r"[^\\']", String.Char),
269 (r"\\", String.Escape, 'escape'),
270 ("'", String.Char, '#pop'),
271 ],
272 'string': [
273 (r'[^\\"]+', String),
274 (r"\\", String.Escape, 'escape'),
275 ('"', String, '#pop'),
276 ],
277 'escape': [
278 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
279 (r'\^[][A-Z@^_]', String.Escape, '#pop'),
280 ('|'.join(ascii), String.Escape, '#pop'),
281 (r'o[0-7]+', String.Escape, '#pop'),
282 (r'x[\da-fA-F]+', String.Escape, '#pop'),
283 (r'\d+', String.Escape, '#pop'),
284 (r'(\s+)(\\)', bygroups(Whitespace, String.Escape), '#pop')
285 ],
286 }
287
288
289class AgdaLexer(RegexLexer):
290 """
291 For the Agda dependently typed functional programming language and
292 proof assistant.
293 """
294
295 name = 'Agda'
296 url = 'http://wiki.portal.chalmers.se/agda/pmwiki.php'
297 aliases = ['agda']
298 filenames = ['*.agda']
299 mimetypes = ['text/x-agda']
300 version_added = '2.0'
301
302 reserved = (
303 'abstract', 'codata', 'coinductive', 'constructor', 'data', 'do',
304 'eta-equality', 'field', 'forall', 'hiding', 'in', 'inductive', 'infix',
305 'infixl', 'infixr', 'instance', 'interleaved', 'let', 'macro', 'mutual',
306 'no-eta-equality', 'opaque', 'open', 'overlap', 'pattern', 'postulate', 'primitive',
307 'private', 'quote', 'quoteTerm', 'record', 'renaming', 'rewrite',
308 'syntax', 'tactic', 'unfolding', 'unquote', 'unquoteDecl', 'unquoteDef', 'using',
309 'variable', 'where', 'with',
310 )
311
312 tokens = {
313 'root': [
314 # Declaration
315 (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)',
316 bygroups(Whitespace, Name.Function, Whitespace,
317 Operator.Word, Whitespace)),
318 # Comments
319 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
320 (r'\{-', Comment.Multiline, 'comment'),
321 # Holes
322 (r'\{!', Comment.Directive, 'hole'),
323 # Lexemes:
324 # Identifiers
325 (r'\b({})(?!\')\b'.format('|'.join(reserved)), Keyword.Reserved),
326 (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Whitespace),
327 'module'),
328 (r'\b(Set|Prop)[\u2080-\u2089]*\b', Keyword.Type),
329 # Special Symbols
330 (r'(\(|\)|\{|\})', Operator),
331 (r'(\.{1,3}|\||\u03BB|\u2200|\u2192|:|=|->)', Operator.Word),
332 # Numbers
333 (r'\d+[eE][+-]?\d+', Number.Float),
334 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
335 (r'0[xX][\da-fA-F]+', Number.Hex),
336 (r'\d+', Number.Integer),
337 # Strings
338 (r"'", String.Char, 'character'),
339 (r'"', String, 'string'),
340 (r'[^\s(){}]+', Text),
341 (r'\s+?', Whitespace), # Whitespace
342 ],
343 'hole': [
344 # Holes
345 (r'[^!{}]+', Comment.Directive),
346 (r'\{!', Comment.Directive, '#push'),
347 (r'!\}', Comment.Directive, '#pop'),
348 (r'[!{}]', Comment.Directive),
349 ],
350 'module': [
351 (r'\{-', Comment.Multiline, 'comment'),
352 (r'[a-zA-Z][\w.\']*', Name, '#pop'),
353 (r'[\W0-9_]+', Text)
354 ],
355 'comment': HaskellLexer.tokens['comment'],
356 'character': HaskellLexer.tokens['character'],
357 'string': HaskellLexer.tokens['string'],
358 'escape': HaskellLexer.tokens['escape']
359 }
360
361
362class CryptolLexer(RegexLexer):
363 """
364 FIXME: A Cryptol2 lexer based on the lexemes defined in the Haskell 98 Report.
365 """
366 name = 'Cryptol'
367 aliases = ['cryptol', 'cry']
368 filenames = ['*.cry']
369 mimetypes = ['text/x-cryptol']
370 url = 'https://www.cryptol.net'
371 version_added = '2.0'
372
373 reserved = ('Arith', 'Bit', 'Cmp', 'False', 'Inf', 'True', 'else',
374 'export', 'extern', 'fin', 'if', 'import', 'inf', 'lg2',
375 'max', 'min', 'module', 'newtype', 'pragma', 'property',
376 'then', 'type', 'where', 'width')
377 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
378 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
379 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
380 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
381
382 tokens = {
383 'root': [
384 # Whitespace:
385 (r'\s+', Whitespace),
386 # (r'--\s*|.*$', Comment.Doc),
387 (r'//.*$', Comment.Single),
388 (r'/\*', Comment.Multiline, 'comment'),
389 # Lexemes:
390 # Identifiers
391 (r'\bimport\b', Keyword.Reserved, 'import'),
392 (r'\bmodule\b', Keyword.Reserved, 'module'),
393 (r'\berror\b', Name.Exception),
394 (r'\b({})(?!\')\b'.format('|'.join(reserved)), Keyword.Reserved),
395 (r'^[_a-z][\w\']*', Name.Function),
396 (r"'?[_a-z][\w']*", Name),
397 (r"('')?[A-Z][\w\']*", Keyword.Type),
398 # Operators
399 (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator
400 (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials
401 (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators
402 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators
403 # Numbers
404 (r'\d+[eE][+-]?\d+', Number.Float),
405 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
406 (r'0[oO][0-7]+', Number.Oct),
407 (r'0[xX][\da-fA-F]+', Number.Hex),
408 (r'\d+', Number.Integer),
409 # Character/String Literals
410 (r"'", String.Char, 'character'),
411 (r'"', String, 'string'),
412 # Special
413 (r'\[\]', Keyword.Type),
414 (r'\(\)', Name.Builtin),
415 (r'[][(),;`{}]', Punctuation),
416 ],
417 'import': [
418 # Import statements
419 (r'\s+', Whitespace),
420 (r'"', String, 'string'),
421 # after "funclist" state
422 (r'\)', Punctuation, '#pop'),
423 (r'qualified\b', Keyword),
424 # import X as Y
425 (r'([A-Z][\w.]*)(\s+)(as)(\s+)([A-Z][\w.]*)',
426 bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Name), '#pop'),
427 # import X hiding (functions)
428 (r'([A-Z][\w.]*)(\s+)(hiding)(\s+)(\()',
429 bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Punctuation), 'funclist'),
430 # import X (functions)
431 (r'([A-Z][\w.]*)(\s+)(\()',
432 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),
433 # import X
434 (r'[\w.]+', Name.Namespace, '#pop'),
435 ],
436 'module': [
437 (r'\s+', Whitespace),
438 (r'([A-Z][\w.]*)(\s+)(\()',
439 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),
440 (r'[A-Z][\w.]*', Name.Namespace, '#pop'),
441 ],
442 'funclist': [
443 (r'\s+', Whitespace),
444 (r'[A-Z]\w*', Keyword.Type),
445 (r'(_[\w\']+|[a-z][\w\']*)', Name.Function),
446 # TODO: these don't match the comments in docs, remove.
447 # (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
448 # (r'{-', Comment.Multiline, 'comment'),
449 (r',', Punctuation),
450 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
451 # (HACK, but it makes sense to push two instances, believe me)
452 (r'\(', Punctuation, ('funclist', 'funclist')),
453 (r'\)', Punctuation, '#pop:2'),
454 ],
455 'comment': [
456 # Multiline Comments
457 (r'[^/*]+', Comment.Multiline),
458 (r'/\*', Comment.Multiline, '#push'),
459 (r'\*/', Comment.Multiline, '#pop'),
460 (r'[*/]', Comment.Multiline),
461 ],
462 'character': [
463 # Allows multi-chars, incorrectly.
464 (r"[^\\']'", String.Char, '#pop'),
465 (r"\\", String.Escape, 'escape'),
466 ("'", String.Char, '#pop'),
467 ],
468 'string': [
469 (r'[^\\"]+', String),
470 (r"\\", String.Escape, 'escape'),
471 ('"', String, '#pop'),
472 ],
473 'escape': [
474 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
475 (r'\^[][A-Z@^_]', String.Escape, '#pop'),
476 ('|'.join(ascii), String.Escape, '#pop'),
477 (r'o[0-7]+', String.Escape, '#pop'),
478 (r'x[\da-fA-F]+', String.Escape, '#pop'),
479 (r'\d+', String.Escape, '#pop'),
480 (r'(\s+)(\\)', bygroups(Whitespace, String.Escape), '#pop'),
481 ],
482 }
483
484 EXTRA_KEYWORDS = {'join', 'split', 'reverse', 'transpose', 'width',
485 'length', 'tail', '<<', '>>', '<<<', '>>>', 'const',
486 'reg', 'par', 'seq', 'ASSERT', 'undefined', 'error',
487 'trace'}
488
489 def get_tokens_unprocessed(self, text):
490 stack = ['root']
491 for index, token, value in \
492 RegexLexer.get_tokens_unprocessed(self, text, stack):
493 if token is Name and value in self.EXTRA_KEYWORDS:
494 yield index, Name.Builtin, value
495 else:
496 yield index, token, value
497
498
499class LiterateLexer(Lexer):
500 """
501 Base class for lexers of literate file formats based on LaTeX or Bird-style
502 (prefixing each code line with ">").
503
504 Additional options accepted:
505
506 `litstyle`
507 If given, must be ``"bird"`` or ``"latex"``. If not given, the style
508 is autodetected: if the first non-whitespace character in the source
509 is a backslash or percent character, LaTeX is assumed, else Bird.
510 """
511
512 bird_re = re.compile(r'(>[ \t]*)(.*\n)')
513
514 def __init__(self, baselexer, **options):
515 self.baselexer = baselexer
516 Lexer.__init__(self, **options)
517
518 def get_tokens_unprocessed(self, text):
519 style = self.options.get('litstyle')
520 if style is None:
521 style = (text.lstrip()[0:1] in '%\\') and 'latex' or 'bird'
522
523 code = ''
524 insertions = []
525 if style == 'bird':
526 # bird-style
527 for match in line_re.finditer(text):
528 line = match.group()
529 m = self.bird_re.match(line)
530 if m:
531 insertions.append((len(code),
532 [(0, Comment.Special, m.group(1))]))
533 code += m.group(2)
534 else:
535 insertions.append((len(code), [(0, Text, line)]))
536 else:
537 # latex-style
538 from pygments.lexers.markup import TexLexer
539 lxlexer = TexLexer(**self.options)
540 codelines = 0
541 latex = ''
542 for match in line_re.finditer(text):
543 line = match.group()
544 if codelines:
545 if line.lstrip().startswith('\\end{code}'):
546 codelines = 0
547 latex += line
548 else:
549 code += line
550 elif line.lstrip().startswith('\\begin{code}'):
551 codelines = 1
552 latex += line
553 insertions.append((len(code),
554 list(lxlexer.get_tokens_unprocessed(latex))))
555 latex = ''
556 else:
557 latex += line
558 insertions.append((len(code),
559 list(lxlexer.get_tokens_unprocessed(latex))))
560 yield from do_insertions(insertions, self.baselexer.get_tokens_unprocessed(code))
561
562
563class LiterateHaskellLexer(LiterateLexer):
564 """
565 For Literate Haskell (Bird-style or LaTeX) source.
566
567 Additional options accepted:
568
569 `litstyle`
570 If given, must be ``"bird"`` or ``"latex"``. If not given, the style
571 is autodetected: if the first non-whitespace character in the source
572 is a backslash or percent character, LaTeX is assumed, else Bird.
573 """
574 name = 'Literate Haskell'
575 aliases = ['literate-haskell', 'lhaskell', 'lhs']
576 filenames = ['*.lhs']
577 mimetypes = ['text/x-literate-haskell']
578 url = 'https://wiki.haskell.org/Literate_programming'
579 version_added = '0.9'
580
581 def __init__(self, **options):
582 hslexer = HaskellLexer(**options)
583 LiterateLexer.__init__(self, hslexer, **options)
584
585
586class LiterateIdrisLexer(LiterateLexer):
587 """
588 For Literate Idris (Bird-style or LaTeX) source.
589
590 Additional options accepted:
591
592 `litstyle`
593 If given, must be ``"bird"`` or ``"latex"``. If not given, the style
594 is autodetected: if the first non-whitespace character in the source
595 is a backslash or percent character, LaTeX is assumed, else Bird.
596 """
597 name = 'Literate Idris'
598 aliases = ['literate-idris', 'lidris', 'lidr']
599 filenames = ['*.lidr']
600 mimetypes = ['text/x-literate-idris']
601 url = 'https://idris2.readthedocs.io/en/latest/reference/literate.html'
602 version_added = '2.0'
603
604 def __init__(self, **options):
605 hslexer = IdrisLexer(**options)
606 LiterateLexer.__init__(self, hslexer, **options)
607
608
609class LiterateAgdaLexer(LiterateLexer):
610 """
611 For Literate Agda source.
612
613 Additional options accepted:
614
615 `litstyle`
616 If given, must be ``"bird"`` or ``"latex"``. If not given, the style
617 is autodetected: if the first non-whitespace character in the source
618 is a backslash or percent character, LaTeX is assumed, else Bird.
619 """
620 name = 'Literate Agda'
621 aliases = ['literate-agda', 'lagda']
622 filenames = ['*.lagda']
623 mimetypes = ['text/x-literate-agda']
624 url = 'https://agda.readthedocs.io/en/latest/tools/literate-programming.html'
625 version_added = '2.0'
626
627 def __init__(self, **options):
628 agdalexer = AgdaLexer(**options)
629 LiterateLexer.__init__(self, agdalexer, litstyle='latex', **options)
630
631
632class LiterateCryptolLexer(LiterateLexer):
633 """
634 For Literate Cryptol (Bird-style or LaTeX) source.
635
636 Additional options accepted:
637
638 `litstyle`
639 If given, must be ``"bird"`` or ``"latex"``. If not given, the style
640 is autodetected: if the first non-whitespace character in the source
641 is a backslash or percent character, LaTeX is assumed, else Bird.
642 """
643 name = 'Literate Cryptol'
644 aliases = ['literate-cryptol', 'lcryptol', 'lcry']
645 filenames = ['*.lcry']
646 mimetypes = ['text/x-literate-cryptol']
647 url = 'https://www.cryptol.net'
648 version_added = '2.0'
649
650 def __init__(self, **options):
651 crylexer = CryptolLexer(**options)
652 LiterateLexer.__init__(self, crylexer, **options)
653
654
655class KokaLexer(RegexLexer):
656 """
657 Lexer for the Koka language.
658 """
659
660 name = 'Koka'
661 url = 'https://koka-lang.github.io/koka/doc/index.html'
662 aliases = ['koka']
663 filenames = ['*.kk', '*.kki']
664 mimetypes = ['text/x-koka']
665 version_added = '1.6'
666
667 keywords = [
668 'infix', 'infixr', 'infixl',
669 'type', 'cotype', 'rectype', 'alias',
670 'struct', 'con',
671 'fun', 'function', 'val', 'var',
672 'external',
673 'if', 'then', 'else', 'elif', 'return', 'match',
674 'private', 'public', 'private',
675 'module', 'import', 'as',
676 'include', 'inline',
677 'rec',
678 'try', 'yield', 'enum',
679 'interface', 'instance',
680 ]
681
682 # keywords that are followed by a type
683 typeStartKeywords = [
684 'type', 'cotype', 'rectype', 'alias', 'struct', 'enum',
685 ]
686
687 # keywords valid in a type
688 typekeywords = [
689 'forall', 'exists', 'some', 'with',
690 ]
691
692 # builtin names and special names
693 builtin = [
694 'for', 'while', 'repeat',
695 'foreach', 'foreach-indexed',
696 'error', 'catch', 'finally',
697 'cs', 'js', 'file', 'ref', 'assigned',
698 ]
699
700 # symbols that can be in an operator
701 symbols = r'[$%&*+@!/\\^~=.:\-?|<>]+'
702
703 # symbol boundary: an operator keyword should not be followed by any of these
704 sboundary = '(?!' + symbols + ')'
705
706 # name boundary: a keyword should not be followed by any of these
707 boundary = r'(?![\w/])'
708
709 # koka token abstractions
710 tokenType = Name.Attribute
711 tokenTypeDef = Name.Class
712 tokenConstructor = Generic.Emph
713
714 # main lexer
715 tokens = {
716 'root': [
717 include('whitespace'),
718
719 # go into type mode
720 (r'::?' + sboundary, tokenType, 'type'),
721 (r'(alias)(\s+)([a-z]\w*)?', bygroups(Keyword, Whitespace, tokenTypeDef),
722 'alias-type'),
723 (r'(struct)(\s+)([a-z]\w*)?', bygroups(Keyword, Whitespace, tokenTypeDef),
724 'struct-type'),
725 ((r'({})'.format('|'.join(typeStartKeywords))) +
726 r'(\s+)([a-z]\w*)?', bygroups(Keyword, Whitespace, tokenTypeDef),
727 'type'),
728
729 # special sequences of tokens (we use ?: for non-capturing group as
730 # required by 'bygroups')
731 (r'(module)(\s+)(interface(?=\s))?(\s+)?((?:[a-z]\w*/)*[a-z]\w*)',
732 bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Namespace)),
733 (r'(import)(\s+)((?:[a-z]\w*/)*[a-z]\w*)'
734 r'(?:(\s*)(=)(\s*)(qualified)?(\s*)'
735 r'((?:[a-z]\w*/)*[a-z]\w*))?',
736 bygroups(Keyword, Whitespace, Name.Namespace, Whitespace, Keyword, Whitespace,
737 Keyword, Whitespace, Name.Namespace)),
738
739 (r'^(public|private)?(\s+)?(function|fun|val)'
740 r'(\s+)([a-z]\w*|\((?:' + symbols + r'|/)\))',
741 bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Function)),
742 (r'^(?:(public|private)(?=\s+external))?((?<!^)\s+)?(external)(\s+)(inline(?=\s))?(\s+)?'
743 r'([a-z]\w*|\((?:' + symbols + r'|/)\))',
744 bygroups(Keyword, Whitespace, Keyword, Whitespace, Keyword, Whitespace, Name.Function)),
745
746 # keywords
747 (r'({})'.format('|'.join(typekeywords)) + boundary, Keyword.Type),
748 (r'({})'.format('|'.join(keywords)) + boundary, Keyword),
749 (r'({})'.format('|'.join(builtin)) + boundary, Keyword.Pseudo),
750 (r'::?|:=|\->|[=.]' + sboundary, Keyword),
751
752 # names
753 (r'((?:[a-z]\w*/)*)([A-Z]\w*)',
754 bygroups(Name.Namespace, tokenConstructor)),
755 (r'((?:[a-z]\w*/)*)([a-z]\w*)', bygroups(Name.Namespace, Name)),
756 (r'((?:[a-z]\w*/)*)(\((?:' + symbols + r'|/)\))',
757 bygroups(Name.Namespace, Name)),
758 (r'_\w*', Name.Variable),
759
760 # literal string
761 (r'@"', String.Double, 'litstring'),
762
763 # operators
764 (symbols + "|/(?![*/])", Operator),
765 (r'`', Operator),
766 (r'[{}()\[\];,]', Punctuation),
767
768 # literals. No check for literal characters with len > 1
769 (r'[0-9]+\.[0-9]+([eE][\-+]?[0-9]+)?', Number.Float),
770 (r'0[xX][0-9a-fA-F]+', Number.Hex),
771 (r'[0-9]+', Number.Integer),
772
773 (r"'", String.Char, 'char'),
774 (r'"', String.Double, 'string'),
775 ],
776
777 # type started by alias
778 'alias-type': [
779 (r'=', Keyword),
780 include('type')
781 ],
782
783 # type started by struct
784 'struct-type': [
785 (r'(?=\((?!,*\)))', Punctuation, '#pop'),
786 include('type')
787 ],
788
789 # type started by colon
790 'type': [
791 (r'[(\[<]', tokenType, 'type-nested'),
792 include('type-content')
793 ],
794
795 # type nested in brackets: can contain parameters, comma etc.
796 'type-nested': [
797 (r'[)\]>]', tokenType, '#pop'),
798 (r'[(\[<]', tokenType, 'type-nested'),
799 (r',', tokenType),
800 (r'([a-z]\w*)(\s*)(:)(?!:)',
801 bygroups(Name, Whitespace, tokenType)), # parameter name
802 include('type-content')
803 ],
804
805 # shared contents of a type
806 'type-content': [
807 include('whitespace'),
808
809 # keywords
810 (r'({})'.format('|'.join(typekeywords)) + boundary, Keyword),
811 (r'(?=(({})'.format('|'.join(keywords)) + boundary + '))',
812 Keyword, '#pop'), # need to match because names overlap...
813
814 # kinds
815 (r'[EPHVX]' + boundary, tokenType),
816
817 # type names
818 (r'[a-z][0-9]*(?![\w/])', tokenType),
819 (r'_\w*', tokenType.Variable), # Generic.Emph
820 (r'((?:[a-z]\w*/)*)([A-Z]\w*)',
821 bygroups(Name.Namespace, tokenType)),
822 (r'((?:[a-z]\w*/)*)([a-z]\w+)',
823 bygroups(Name.Namespace, tokenType)),
824
825 # type keyword operators
826 (r'::|->|[.:|]', tokenType),
827
828 # catchall
829 default('#pop')
830 ],
831
832 # comments and literals
833 'whitespace': [
834 (r'(\n\s*)(#.*)$', bygroups(Whitespace, Comment.Preproc)),
835 (r'\s+', Whitespace),
836 (r'/\*', Comment.Multiline, 'comment'),
837 (r'//.*$', Comment.Single)
838 ],
839 'comment': [
840 (r'[^/*]+', Comment.Multiline),
841 (r'/\*', Comment.Multiline, '#push'),
842 (r'\*/', Comment.Multiline, '#pop'),
843 (r'[*/]', Comment.Multiline),
844 ],
845 'litstring': [
846 (r'[^"]+', String.Double),
847 (r'""', String.Escape),
848 (r'"', String.Double, '#pop'),
849 ],
850 'string': [
851 (r'[^\\"\n]+', String.Double),
852 include('escape-sequence'),
853 (r'["\n]', String.Double, '#pop'),
854 ],
855 'char': [
856 (r'[^\\\'\n]+', String.Char),
857 include('escape-sequence'),
858 (r'[\'\n]', String.Char, '#pop'),
859 ],
860 'escape-sequence': [
861 (r'\\[nrt\\"\']', String.Escape),
862 (r'\\x[0-9a-fA-F]{2}', String.Escape),
863 (r'\\u[0-9a-fA-F]{4}', String.Escape),
864 # Yes, \U literals are 6 hex digits.
865 (r'\\U[0-9a-fA-F]{6}', String.Escape)
866 ]
867 }