Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/haskell.py: 81%
140 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
1"""
2 pygments.lexers.haskell
3 ~~~~~~~~~~~~~~~~~~~~~~~
5 Lexers for Haskell and related languages.
7 :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
11import re
13from pygments.lexer import Lexer, RegexLexer, bygroups, do_insertions, \
14 default, include, inherit, line_re
15from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
16 Number, Punctuation, Generic, Whitespace
17from pygments import unistring as uni
19__all__ = ['HaskellLexer', 'HspecLexer', 'IdrisLexer', 'AgdaLexer', 'CryptolLexer',
20 'LiterateHaskellLexer', 'LiterateIdrisLexer', 'LiterateAgdaLexer',
21 'LiterateCryptolLexer', 'KokaLexer']
24class HaskellLexer(RegexLexer):
25 """
26 A Haskell lexer based on the lexemes defined in the Haskell 98 Report.
28 .. versionadded:: 0.8
29 """
30 name = 'Haskell'
31 url = 'https://www.haskell.org/'
32 aliases = ['haskell', 'hs']
33 filenames = ['*.hs']
34 mimetypes = ['text/x-haskell']
36 reserved = ('case', 'class', 'data', 'default', 'deriving', 'do', 'else',
37 'family', 'if', 'in', 'infix[lr]?', 'instance',
38 'let', 'newtype', 'of', 'then', 'type', 'where', '_')
39 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
40 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
41 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
42 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
44 tokens = {
45 'root': [
46 # Whitespace:
47 (r'\s+', Whitespace),
48 # (r'--\s*|.*$', Comment.Doc),
49 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
50 (r'\{-', Comment.Multiline, 'comment'),
51 # Lexemes:
52 # Identifiers
53 (r'\bimport\b', Keyword.Reserved, 'import'),
54 (r'\bmodule\b', Keyword.Reserved, 'module'),
55 (r'\berror\b', Name.Exception),
56 (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
57 (r"'[^\\]'", String.Char), # this has to come before the TH quote
58 (r'^[_' + uni.Ll + r'][\w\']*', Name.Function),
59 (r"'?[_" + uni.Ll + r"][\w']*", Name),
60 (r"('')?[" + uni.Lu + r"][\w\']*", Keyword.Type),
61 (r"(')[" + uni.Lu + r"][\w\']*", Keyword.Type),
62 (r"(')\[[^\]]*\]", Keyword.Type), # tuples and lists get special treatment in GHC
63 (r"(')\([^)]*\)", Keyword.Type), # ..
64 (r"(')[:!#$%&*+.\\/<=>?@^|~-]+", Keyword.Type), # promoted type operators
65 # Operators
66 (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator
67 (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials
68 (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators
69 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators
70 # Numbers
71 (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*_*[pP][+-]?\d(_*\d)*', Number.Float),
72 (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*\.[\da-fA-F](_*[\da-fA-F])*'
73 r'(_*[pP][+-]?\d(_*\d)*)?', Number.Float),
74 (r'\d(_*\d)*_*[eE][+-]?\d(_*\d)*', Number.Float),
75 (r'\d(_*\d)*\.\d(_*\d)*(_*[eE][+-]?\d(_*\d)*)?', Number.Float),
76 (r'0[bB]_*[01](_*[01])*', Number.Bin),
77 (r'0[oO]_*[0-7](_*[0-7])*', Number.Oct),
78 (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*', Number.Hex),
79 (r'\d(_*\d)*', Number.Integer),
80 # Character/String Literals
81 (r"'", String.Char, 'character'),
82 (r'"', String, 'string'),
83 # Special
84 (r'\[\]', Keyword.Type),
85 (r'\(\)', Name.Builtin),
86 (r'[][(),;`{}]', Punctuation),
87 ],
88 'import': [
89 # Import statements
90 (r'\s+', Whitespace),
91 (r'"', String, 'string'),
92 # after "funclist" state
93 (r'\)', Punctuation, '#pop'),
94 (r'qualified\b', Keyword),
95 # import X as Y
96 (r'([' + uni.Lu + r'][\w.]*)(\s+)(as)(\s+)([' + uni.Lu + r'][\w.]*)',
97 bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Name), '#pop'),
98 # import X hiding (functions)
99 (r'([' + uni.Lu + r'][\w.]*)(\s+)(hiding)(\s+)(\()',
100 bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Punctuation), 'funclist'),
101 # import X (functions)
102 (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()',
103 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),
104 # import X
105 (r'[\w.]+', Name.Namespace, '#pop'),
106 ],
107 'module': [
108 (r'\s+', Whitespace),
109 (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()',
110 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),
111 (r'[' + uni.Lu + r'][\w.]*', Name.Namespace, '#pop'),
112 ],
113 'funclist': [
114 (r'\s+', Whitespace),
115 (r'[' + uni.Lu + r']\w*', Keyword.Type),
116 (r'(_[\w\']+|[' + uni.Ll + r'][\w\']*)', Name.Function),
117 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
118 (r'\{-', Comment.Multiline, 'comment'),
119 (r',', Punctuation),
120 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
121 # (HACK, but it makes sense to push two instances, believe me)
122 (r'\(', Punctuation, ('funclist', 'funclist')),
123 (r'\)', Punctuation, '#pop:2'),
124 ],
125 # NOTE: the next four states are shared in the AgdaLexer; make sure
126 # any change is compatible with Agda as well or copy over and change
127 'comment': [
128 # Multiline Comments
129 (r'[^-{}]+', Comment.Multiline),
130 (r'\{-', Comment.Multiline, '#push'),
131 (r'-\}', Comment.Multiline, '#pop'),
132 (r'[-{}]', Comment.Multiline),
133 ],
134 'character': [
135 # Allows multi-chars, incorrectly.
136 (r"[^\\']'", String.Char, '#pop'),
137 (r"\\", String.Escape, 'escape'),
138 ("'", String.Char, '#pop'),
139 ],
140 'string': [
141 (r'[^\\"]+', String),
142 (r"\\", String.Escape, 'escape'),
143 ('"', String, '#pop'),
144 ],
145 'escape': [
146 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
147 (r'\^[][' + uni.Lu + r'@^_]', String.Escape, '#pop'),
148 ('|'.join(ascii), String.Escape, '#pop'),
149 (r'o[0-7]+', String.Escape, '#pop'),
150 (r'x[\da-fA-F]+', String.Escape, '#pop'),
151 (r'\d+', String.Escape, '#pop'),
152 (r'(\s+)(\\)', bygroups(Whitespace, String.Escape), '#pop'),
153 ],
154 }
157class HspecLexer(HaskellLexer):
158 """
159 A Haskell lexer with support for Hspec constructs.
161 .. versionadded:: 2.4.0
162 """
164 name = 'Hspec'
165 aliases = ['hspec']
166 filenames = ['*Spec.hs']
167 mimetypes = []
169 tokens = {
170 'root': [
171 (r'(it)(\s*)("[^"]*")', bygroups(Text, Whitespace, String.Doc)),
172 (r'(describe)(\s*)("[^"]*")', bygroups(Text, Whitespace, String.Doc)),
173 (r'(context)(\s*)("[^"]*")', bygroups(Text, Whitespace, String.Doc)),
174 inherit,
175 ],
176 }
179class IdrisLexer(RegexLexer):
180 """
181 A lexer for the dependently typed programming language Idris.
183 Based on the Haskell and Agda Lexer.
185 .. versionadded:: 2.0
186 """
187 name = 'Idris'
188 url = 'https://www.idris-lang.org/'
189 aliases = ['idris', 'idr']
190 filenames = ['*.idr']
191 mimetypes = ['text/x-idris']
193 reserved = ('case', 'class', 'data', 'default', 'using', 'do', 'else',
194 'if', 'in', 'infix[lr]?', 'instance', 'rewrite', 'auto',
195 'namespace', 'codata', 'mutual', 'private', 'public', 'abstract',
196 'total', 'partial',
197 'interface', 'implementation', 'export', 'covering', 'constructor',
198 'let', 'proof', 'of', 'then', 'static', 'where', '_', 'with',
199 'pattern', 'term', 'syntax', 'prefix',
200 'postulate', 'parameters', 'record', 'dsl', 'impossible', 'implicit',
201 'tactics', 'intros', 'intro', 'compute', 'refine', 'exact', 'trivial')
203 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
204 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
205 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
206 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
208 directives = ('lib', 'link', 'flag', 'include', 'hide', 'freeze', 'access',
209 'default', 'logging', 'dynamic', 'name', 'error_handlers', 'language')
211 tokens = {
212 'root': [
213 # Comments
214 (r'^(\s*)(%%(%s))' % '|'.join(directives),
215 bygroups(Whitespace, Keyword.Reserved)),
216 (r'(\s*)(--(?![!#$%&*+./<=>?@^|_~:\\]).*?)$', bygroups(Whitespace, Comment.Single)),
217 (r'(\s*)(\|{3}.*?)$', bygroups(Whitespace, Comment.Single)),
218 (r'(\s*)(\{-)', bygroups(Whitespace, Comment.Multiline), 'comment'),
219 # Declaration
220 (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)',
221 bygroups(Whitespace, Name.Function, Whitespace, Operator.Word, Whitespace)),
222 # Identifiers
223 (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
224 (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Whitespace), 'module'),
225 (r"('')?[A-Z][\w\']*", Keyword.Type),
226 (r'[a-z][\w\']*', Text),
227 # Special Symbols
228 (r'(<-|::|->|=>|=)', Operator.Word), # specials
229 (r'([(){}\[\]:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials
230 # Numbers
231 (r'\d+[eE][+-]?\d+', Number.Float),
232 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
233 (r'0[xX][\da-fA-F]+', Number.Hex),
234 (r'\d+', Number.Integer),
235 # Strings
236 (r"'", String.Char, 'character'),
237 (r'"', String, 'string'),
238 (r'[^\s(){}]+', Text),
239 (r'\s+?', Whitespace), # Whitespace
240 ],
241 'module': [
242 (r'\s+', Whitespace),
243 (r'([A-Z][\w.]*)(\s+)(\()',
244 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),
245 (r'[A-Z][\w.]*', Name.Namespace, '#pop'),
246 ],
247 'funclist': [
248 (r'\s+', Whitespace),
249 (r'[A-Z]\w*', Keyword.Type),
250 (r'(_[\w\']+|[a-z][\w\']*)', Name.Function),
251 (r'--.*$', Comment.Single),
252 (r'\{-', Comment.Multiline, 'comment'),
253 (r',', Punctuation),
254 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
255 # (HACK, but it makes sense to push two instances, believe me)
256 (r'\(', Punctuation, ('funclist', 'funclist')),
257 (r'\)', Punctuation, '#pop:2'),
258 ],
259 # NOTE: the next four states are shared in the AgdaLexer; make sure
260 # any change is compatible with Agda as well or copy over and change
261 'comment': [
262 # Multiline Comments
263 (r'[^-{}]+', Comment.Multiline),
264 (r'\{-', Comment.Multiline, '#push'),
265 (r'-\}', Comment.Multiline, '#pop'),
266 (r'[-{}]', Comment.Multiline),
267 ],
268 'character': [
269 # Allows multi-chars, incorrectly.
270 (r"[^\\']", String.Char),
271 (r"\\", String.Escape, 'escape'),
272 ("'", String.Char, '#pop'),
273 ],
274 'string': [
275 (r'[^\\"]+', String),
276 (r"\\", String.Escape, 'escape'),
277 ('"', String, '#pop'),
278 ],
279 'escape': [
280 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
281 (r'\^[][A-Z@^_]', String.Escape, '#pop'),
282 ('|'.join(ascii), String.Escape, '#pop'),
283 (r'o[0-7]+', String.Escape, '#pop'),
284 (r'x[\da-fA-F]+', String.Escape, '#pop'),
285 (r'\d+', String.Escape, '#pop'),
286 (r'(\s+)(\\)', bygroups(Whitespace, String.Escape), '#pop')
287 ],
288 }
291class AgdaLexer(RegexLexer):
292 """
293 For the Agda dependently typed functional programming language and
294 proof assistant.
296 .. versionadded:: 2.0
297 """
299 name = 'Agda'
300 url = 'http://wiki.portal.chalmers.se/agda/pmwiki.php'
301 aliases = ['agda']
302 filenames = ['*.agda']
303 mimetypes = ['text/x-agda']
305 reserved = (
306 'abstract', 'codata', 'coinductive', 'constructor', 'data', 'do',
307 'eta-equality', 'field', 'forall', 'hiding', 'in', 'inductive', 'infix',
308 'infixl', 'infixr', 'instance', 'interleaved', 'let', 'macro', 'mutual',
309 'no-eta-equality', 'open', 'overlap', 'pattern', 'postulate', 'primitive',
310 'private', 'quote', 'quoteTerm', 'record', 'renaming', 'rewrite',
311 'syntax', 'tactic', 'unquote', 'unquoteDecl', 'unquoteDef', 'using',
312 'variable', 'where', 'with',
313 )
315 tokens = {
316 'root': [
317 # Declaration
318 (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)',
319 bygroups(Whitespace, Name.Function, Whitespace,
320 Operator.Word, Whitespace)),
321 # Comments
322 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
323 (r'\{-', Comment.Multiline, 'comment'),
324 # Holes
325 (r'\{!', Comment.Directive, 'hole'),
326 # Lexemes:
327 # Identifiers
328 (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
329 (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Whitespace),
330 'module'),
331 (r'\b(Set|Prop)[\u2080-\u2089]*\b', Keyword.Type),
332 # Special Symbols
333 (r'(\(|\)|\{|\})', Operator),
334 (r'(\.{1,3}|\||\u03BB|\u2200|\u2192|:|=|->)', Operator.Word),
335 # Numbers
336 (r'\d+[eE][+-]?\d+', Number.Float),
337 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
338 (r'0[xX][\da-fA-F]+', Number.Hex),
339 (r'\d+', Number.Integer),
340 # Strings
341 (r"'", String.Char, 'character'),
342 (r'"', String, 'string'),
343 (r'[^\s(){}]+', Text),
344 (r'\s+?', Whitespace), # Whitespace
345 ],
346 'hole': [
347 # Holes
348 (r'[^!{}]+', Comment.Directive),
349 (r'\{!', Comment.Directive, '#push'),
350 (r'!\}', Comment.Directive, '#pop'),
351 (r'[!{}]', Comment.Directive),
352 ],
353 'module': [
354 (r'\{-', Comment.Multiline, 'comment'),
355 (r'[a-zA-Z][\w.\']*', Name, '#pop'),
356 (r'[\W0-9_]+', Text)
357 ],
358 'comment': HaskellLexer.tokens['comment'],
359 'character': HaskellLexer.tokens['character'],
360 'string': HaskellLexer.tokens['string'],
361 'escape': HaskellLexer.tokens['escape']
362 }
365class CryptolLexer(RegexLexer):
366 """
367 FIXME: A Cryptol2 lexer based on the lexemes defined in the Haskell 98 Report.
369 .. versionadded:: 2.0
370 """
371 name = 'Cryptol'
372 aliases = ['cryptol', 'cry']
373 filenames = ['*.cry']
374 mimetypes = ['text/x-cryptol']
376 reserved = ('Arith', 'Bit', 'Cmp', 'False', 'Inf', 'True', 'else',
377 'export', 'extern', 'fin', 'if', 'import', 'inf', 'lg2',
378 'max', 'min', 'module', 'newtype', 'pragma', 'property',
379 'then', 'type', 'where', 'width')
380 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
381 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
382 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
383 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
385 tokens = {
386 'root': [
387 # Whitespace:
388 (r'\s+', Whitespace),
389 # (r'--\s*|.*$', Comment.Doc),
390 (r'//.*$', Comment.Single),
391 (r'/\*', Comment.Multiline, 'comment'),
392 # Lexemes:
393 # Identifiers
394 (r'\bimport\b', Keyword.Reserved, 'import'),
395 (r'\bmodule\b', Keyword.Reserved, 'module'),
396 (r'\berror\b', Name.Exception),
397 (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
398 (r'^[_a-z][\w\']*', Name.Function),
399 (r"'?[_a-z][\w']*", Name),
400 (r"('')?[A-Z][\w\']*", Keyword.Type),
401 # Operators
402 (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator
403 (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials
404 (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators
405 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators
406 # Numbers
407 (r'\d+[eE][+-]?\d+', Number.Float),
408 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
409 (r'0[oO][0-7]+', Number.Oct),
410 (r'0[xX][\da-fA-F]+', Number.Hex),
411 (r'\d+', Number.Integer),
412 # Character/String Literals
413 (r"'", String.Char, 'character'),
414 (r'"', String, 'string'),
415 # Special
416 (r'\[\]', Keyword.Type),
417 (r'\(\)', Name.Builtin),
418 (r'[][(),;`{}]', Punctuation),
419 ],
420 'import': [
421 # Import statements
422 (r'\s+', Whitespace),
423 (r'"', String, 'string'),
424 # after "funclist" state
425 (r'\)', Punctuation, '#pop'),
426 (r'qualified\b', Keyword),
427 # import X as Y
428 (r'([A-Z][\w.]*)(\s+)(as)(\s+)([A-Z][\w.]*)',
429 bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Name), '#pop'),
430 # import X hiding (functions)
431 (r'([A-Z][\w.]*)(\s+)(hiding)(\s+)(\()',
432 bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Punctuation), 'funclist'),
433 # import X (functions)
434 (r'([A-Z][\w.]*)(\s+)(\()',
435 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),
436 # import X
437 (r'[\w.]+', Name.Namespace, '#pop'),
438 ],
439 'module': [
440 (r'\s+', Whitespace),
441 (r'([A-Z][\w.]*)(\s+)(\()',
442 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),
443 (r'[A-Z][\w.]*', Name.Namespace, '#pop'),
444 ],
445 'funclist': [
446 (r'\s+', Whitespace),
447 (r'[A-Z]\w*', Keyword.Type),
448 (r'(_[\w\']+|[a-z][\w\']*)', Name.Function),
449 # TODO: these don't match the comments in docs, remove.
450 # (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
451 # (r'{-', Comment.Multiline, 'comment'),
452 (r',', Punctuation),
453 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
454 # (HACK, but it makes sense to push two instances, believe me)
455 (r'\(', Punctuation, ('funclist', 'funclist')),
456 (r'\)', Punctuation, '#pop:2'),
457 ],
458 'comment': [
459 # Multiline Comments
460 (r'[^/*]+', Comment.Multiline),
461 (r'/\*', Comment.Multiline, '#push'),
462 (r'\*/', Comment.Multiline, '#pop'),
463 (r'[*/]', Comment.Multiline),
464 ],
465 'character': [
466 # Allows multi-chars, incorrectly.
467 (r"[^\\']'", String.Char, '#pop'),
468 (r"\\", String.Escape, 'escape'),
469 ("'", String.Char, '#pop'),
470 ],
471 'string': [
472 (r'[^\\"]+', String),
473 (r"\\", String.Escape, 'escape'),
474 ('"', String, '#pop'),
475 ],
476 'escape': [
477 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
478 (r'\^[][A-Z@^_]', String.Escape, '#pop'),
479 ('|'.join(ascii), String.Escape, '#pop'),
480 (r'o[0-7]+', String.Escape, '#pop'),
481 (r'x[\da-fA-F]+', String.Escape, '#pop'),
482 (r'\d+', String.Escape, '#pop'),
483 (r'(\s+)(\\)', bygroups(Whitespace, String.Escape), '#pop'),
484 ],
485 }
487 EXTRA_KEYWORDS = {'join', 'split', 'reverse', 'transpose', 'width',
488 'length', 'tail', '<<', '>>', '<<<', '>>>', 'const',
489 'reg', 'par', 'seq', 'ASSERT', 'undefined', 'error',
490 'trace'}
492 def get_tokens_unprocessed(self, text):
493 stack = ['root']
494 for index, token, value in \
495 RegexLexer.get_tokens_unprocessed(self, text, stack):
496 if token is Name and value in self.EXTRA_KEYWORDS:
497 yield index, Name.Builtin, value
498 else:
499 yield index, token, value
502class LiterateLexer(Lexer):
503 """
504 Base class for lexers of literate file formats based on LaTeX or Bird-style
505 (prefixing each code line with ">").
507 Additional options accepted:
509 `litstyle`
510 If given, must be ``"bird"`` or ``"latex"``. If not given, the style
511 is autodetected: if the first non-whitespace character in the source
512 is a backslash or percent character, LaTeX is assumed, else Bird.
513 """
515 bird_re = re.compile(r'(>[ \t]*)(.*\n)')
517 def __init__(self, baselexer, **options):
518 self.baselexer = baselexer
519 Lexer.__init__(self, **options)
521 def get_tokens_unprocessed(self, text):
522 style = self.options.get('litstyle')
523 if style is None:
524 style = (text.lstrip()[0:1] in '%\\') and 'latex' or 'bird'
526 code = ''
527 insertions = []
528 if style == 'bird':
529 # bird-style
530 for match in line_re.finditer(text):
531 line = match.group()
532 m = self.bird_re.match(line)
533 if m:
534 insertions.append((len(code),
535 [(0, Comment.Special, m.group(1))]))
536 code += m.group(2)
537 else:
538 insertions.append((len(code), [(0, Text, line)]))
539 else:
540 # latex-style
541 from pygments.lexers.markup import TexLexer
542 lxlexer = TexLexer(**self.options)
543 codelines = 0
544 latex = ''
545 for match in line_re.finditer(text):
546 line = match.group()
547 if codelines:
548 if line.lstrip().startswith('\\end{code}'):
549 codelines = 0
550 latex += line
551 else:
552 code += line
553 elif line.lstrip().startswith('\\begin{code}'):
554 codelines = 1
555 latex += line
556 insertions.append((len(code),
557 list(lxlexer.get_tokens_unprocessed(latex))))
558 latex = ''
559 else:
560 latex += line
561 insertions.append((len(code),
562 list(lxlexer.get_tokens_unprocessed(latex))))
563 yield from do_insertions(insertions, self.baselexer.get_tokens_unprocessed(code))
566class LiterateHaskellLexer(LiterateLexer):
567 """
568 For Literate Haskell (Bird-style or LaTeX) source.
570 Additional options accepted:
572 `litstyle`
573 If given, must be ``"bird"`` or ``"latex"``. If not given, the style
574 is autodetected: if the first non-whitespace character in the source
575 is a backslash or percent character, LaTeX is assumed, else Bird.
577 .. versionadded:: 0.9
578 """
579 name = 'Literate Haskell'
580 aliases = ['literate-haskell', 'lhaskell', 'lhs']
581 filenames = ['*.lhs']
582 mimetypes = ['text/x-literate-haskell']
584 def __init__(self, **options):
585 hslexer = HaskellLexer(**options)
586 LiterateLexer.__init__(self, hslexer, **options)
589class LiterateIdrisLexer(LiterateLexer):
590 """
591 For Literate Idris (Bird-style or LaTeX) source.
593 Additional options accepted:
595 `litstyle`
596 If given, must be ``"bird"`` or ``"latex"``. If not given, the style
597 is autodetected: if the first non-whitespace character in the source
598 is a backslash or percent character, LaTeX is assumed, else Bird.
600 .. versionadded:: 2.0
601 """
602 name = 'Literate Idris'
603 aliases = ['literate-idris', 'lidris', 'lidr']
604 filenames = ['*.lidr']
605 mimetypes = ['text/x-literate-idris']
607 def __init__(self, **options):
608 hslexer = IdrisLexer(**options)
609 LiterateLexer.__init__(self, hslexer, **options)
612class LiterateAgdaLexer(LiterateLexer):
613 """
614 For Literate Agda source.
616 Additional options accepted:
618 `litstyle`
619 If given, must be ``"bird"`` or ``"latex"``. If not given, the style
620 is autodetected: if the first non-whitespace character in the source
621 is a backslash or percent character, LaTeX is assumed, else Bird.
623 .. versionadded:: 2.0
624 """
625 name = 'Literate Agda'
626 aliases = ['literate-agda', 'lagda']
627 filenames = ['*.lagda']
628 mimetypes = ['text/x-literate-agda']
630 def __init__(self, **options):
631 agdalexer = AgdaLexer(**options)
632 LiterateLexer.__init__(self, agdalexer, litstyle='latex', **options)
635class LiterateCryptolLexer(LiterateLexer):
636 """
637 For Literate Cryptol (Bird-style or LaTeX) source.
639 Additional options accepted:
641 `litstyle`
642 If given, must be ``"bird"`` or ``"latex"``. If not given, the style
643 is autodetected: if the first non-whitespace character in the source
644 is a backslash or percent character, LaTeX is assumed, else Bird.
646 .. versionadded:: 2.0
647 """
648 name = 'Literate Cryptol'
649 aliases = ['literate-cryptol', 'lcryptol', 'lcry']
650 filenames = ['*.lcry']
651 mimetypes = ['text/x-literate-cryptol']
653 def __init__(self, **options):
654 crylexer = CryptolLexer(**options)
655 LiterateLexer.__init__(self, crylexer, **options)
658class KokaLexer(RegexLexer):
659 """
660 Lexer for the Koka language.
662 .. versionadded:: 1.6
663 """
665 name = 'Koka'
666 url = 'https://koka-lang.github.io/koka/doc/index.html'
667 aliases = ['koka']
668 filenames = ['*.kk', '*.kki']
669 mimetypes = ['text/x-koka']
671 keywords = [
672 'infix', 'infixr', 'infixl',
673 'type', 'cotype', 'rectype', 'alias',
674 'struct', 'con',
675 'fun', 'function', 'val', 'var',
676 'external',
677 'if', 'then', 'else', 'elif', 'return', 'match',
678 'private', 'public', 'private',
679 'module', 'import', 'as',
680 'include', 'inline',
681 'rec',
682 'try', 'yield', 'enum',
683 'interface', 'instance',
684 ]
686 # keywords that are followed by a type
687 typeStartKeywords = [
688 'type', 'cotype', 'rectype', 'alias', 'struct', 'enum',
689 ]
691 # keywords valid in a type
692 typekeywords = [
693 'forall', 'exists', 'some', 'with',
694 ]
696 # builtin names and special names
697 builtin = [
698 'for', 'while', 'repeat',
699 'foreach', 'foreach-indexed',
700 'error', 'catch', 'finally',
701 'cs', 'js', 'file', 'ref', 'assigned',
702 ]
704 # symbols that can be in an operator
705 symbols = r'[$%&*+@!/\\^~=.:\-?|<>]+'
707 # symbol boundary: an operator keyword should not be followed by any of these
708 sboundary = '(?!' + symbols + ')'
710 # name boundary: a keyword should not be followed by any of these
711 boundary = r'(?![\w/])'
713 # koka token abstractions
714 tokenType = Name.Attribute
715 tokenTypeDef = Name.Class
716 tokenConstructor = Generic.Emph
718 # main lexer
719 tokens = {
720 'root': [
721 include('whitespace'),
723 # go into type mode
724 (r'::?' + sboundary, tokenType, 'type'),
725 (r'(alias)(\s+)([a-z]\w*)?', bygroups(Keyword, Whitespace, tokenTypeDef),
726 'alias-type'),
727 (r'(struct)(\s+)([a-z]\w*)?', bygroups(Keyword, Whitespace, tokenTypeDef),
728 'struct-type'),
729 ((r'(%s)' % '|'.join(typeStartKeywords)) +
730 r'(\s+)([a-z]\w*)?', bygroups(Keyword, Whitespace, tokenTypeDef),
731 'type'),
733 # special sequences of tokens (we use ?: for non-capturing group as
734 # required by 'bygroups')
735 (r'(module)(\s+)(interface(?=\s))?(\s+)?((?:[a-z]\w*/)*[a-z]\w*)',
736 bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Namespace)),
737 (r'(import)(\s+)((?:[a-z]\w*/)*[a-z]\w*)'
738 r'(?:(\s*)(=)(\s*)(qualified)?(\s*)'
739 r'((?:[a-z]\w*/)*[a-z]\w*))?',
740 bygroups(Keyword, Whitespace, Name.Namespace, Whitespace, Keyword, Whitespace,
741 Keyword, Whitespace, Name.Namespace)),
743 (r'^(public|private)?(\s+)?(function|fun|val)'
744 r'(\s+)([a-z]\w*|\((?:' + symbols + r'|/)\))',
745 bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Function)),
746 (r'^(?:(public|private)(?=\s+external))?((?<!^)\s+)?(external)(\s+)(inline(?=\s))?(\s+)?'
747 r'([a-z]\w*|\((?:' + symbols + r'|/)\))',
748 bygroups(Keyword, Whitespace, Keyword, Whitespace, Keyword, Whitespace, Name.Function)),
750 # keywords
751 (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword.Type),
752 (r'(%s)' % '|'.join(keywords) + boundary, Keyword),
753 (r'(%s)' % '|'.join(builtin) + boundary, Keyword.Pseudo),
754 (r'::?|:=|\->|[=.]' + sboundary, Keyword),
756 # names
757 (r'((?:[a-z]\w*/)*)([A-Z]\w*)',
758 bygroups(Name.Namespace, tokenConstructor)),
759 (r'((?:[a-z]\w*/)*)([a-z]\w*)', bygroups(Name.Namespace, Name)),
760 (r'((?:[a-z]\w*/)*)(\((?:' + symbols + r'|/)\))',
761 bygroups(Name.Namespace, Name)),
762 (r'_\w*', Name.Variable),
764 # literal string
765 (r'@"', String.Double, 'litstring'),
767 # operators
768 (symbols + "|/(?![*/])", Operator),
769 (r'`', Operator),
770 (r'[{}()\[\];,]', Punctuation),
772 # literals. No check for literal characters with len > 1
773 (r'[0-9]+\.[0-9]+([eE][\-+]?[0-9]+)?', Number.Float),
774 (r'0[xX][0-9a-fA-F]+', Number.Hex),
775 (r'[0-9]+', Number.Integer),
777 (r"'", String.Char, 'char'),
778 (r'"', String.Double, 'string'),
779 ],
781 # type started by alias
782 'alias-type': [
783 (r'=', Keyword),
784 include('type')
785 ],
787 # type started by struct
788 'struct-type': [
789 (r'(?=\((?!,*\)))', Punctuation, '#pop'),
790 include('type')
791 ],
793 # type started by colon
794 'type': [
795 (r'[(\[<]', tokenType, 'type-nested'),
796 include('type-content')
797 ],
799 # type nested in brackets: can contain parameters, comma etc.
800 'type-nested': [
801 (r'[)\]>]', tokenType, '#pop'),
802 (r'[(\[<]', tokenType, 'type-nested'),
803 (r',', tokenType),
804 (r'([a-z]\w*)(\s*)(:)(?!:)',
805 bygroups(Name, Whitespace, tokenType)), # parameter name
806 include('type-content')
807 ],
809 # shared contents of a type
810 'type-content': [
811 include('whitespace'),
813 # keywords
814 (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword),
815 (r'(?=((%s)' % '|'.join(keywords) + boundary + '))',
816 Keyword, '#pop'), # need to match because names overlap...
818 # kinds
819 (r'[EPHVX]' + boundary, tokenType),
821 # type names
822 (r'[a-z][0-9]*(?![\w/])', tokenType),
823 (r'_\w*', tokenType.Variable), # Generic.Emph
824 (r'((?:[a-z]\w*/)*)([A-Z]\w*)',
825 bygroups(Name.Namespace, tokenType)),
826 (r'((?:[a-z]\w*/)*)([a-z]\w+)',
827 bygroups(Name.Namespace, tokenType)),
829 # type keyword operators
830 (r'::|->|[.:|]', tokenType),
832 # catchall
833 default('#pop')
834 ],
836 # comments and literals
837 'whitespace': [
838 (r'(\n\s*)(#.*)$', bygroups(Whitespace, Comment.Preproc)),
839 (r'\s+', Whitespace),
840 (r'/\*', Comment.Multiline, 'comment'),
841 (r'//.*$', Comment.Single)
842 ],
843 'comment': [
844 (r'[^/*]+', Comment.Multiline),
845 (r'/\*', Comment.Multiline, '#push'),
846 (r'\*/', Comment.Multiline, '#pop'),
847 (r'[*/]', Comment.Multiline),
848 ],
849 'litstring': [
850 (r'[^"]+', String.Double),
851 (r'""', String.Escape),
852 (r'"', String.Double, '#pop'),
853 ],
854 'string': [
855 (r'[^\\"\n]+', String.Double),
856 include('escape-sequence'),
857 (r'["\n]', String.Double, '#pop'),
858 ],
859 'char': [
860 (r'[^\\\'\n]+', String.Char),
861 include('escape-sequence'),
862 (r'[\'\n]', String.Char, '#pop'),
863 ],
864 'escape-sequence': [
865 (r'\\[nrt\\"\']', String.Escape),
866 (r'\\x[0-9a-fA-F]{2}', String.Escape),
867 (r'\\u[0-9a-fA-F]{4}', String.Escape),
868 # Yes, \U literals are 6 hex digits.
869 (r'\\U[0-9a-fA-F]{6}', String.Escape)
870 ]
871 }