Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pygments/lexers/ml.py: 82%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2 pygments.lexers.ml
3 ~~~~~~~~~~~~~~~~~~
5 Lexers for ML family languages.
7 :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
11import re
13from pygments.lexer import RegexLexer, include, bygroups, default, words
14from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
15 Number, Punctuation, Error
17__all__ = ['SMLLexer', 'OcamlLexer', 'OpaLexer', 'ReasonLexer', 'FStarLexer']
20class SMLLexer(RegexLexer):
21 """
22 For the Standard ML language.
23 """
25 name = 'Standard ML'
26 aliases = ['sml']
27 filenames = ['*.sml', '*.sig', '*.fun']
28 mimetypes = ['text/x-standardml', 'application/x-standardml']
29 url = 'https://en.wikipedia.org/wiki/Standard_ML'
30 version_added = '1.5'
32 alphanumid_reserved = {
33 # Core
34 'abstype', 'and', 'andalso', 'as', 'case', 'datatype', 'do', 'else',
35 'end', 'exception', 'fn', 'fun', 'handle', 'if', 'in', 'infix',
36 'infixr', 'let', 'local', 'nonfix', 'of', 'op', 'open', 'orelse',
37 'raise', 'rec', 'then', 'type', 'val', 'with', 'withtype', 'while',
38 # Modules
39 'eqtype', 'functor', 'include', 'sharing', 'sig', 'signature',
40 'struct', 'structure', 'where',
41 }
43 symbolicid_reserved = {
44 # Core
45 ':', r'\|', '=', '=>', '->', '#',
46 # Modules
47 ':>',
48 }
50 nonid_reserved = {'(', ')', '[', ']', '{', '}', ',', ';', '...', '_'}
52 alphanumid_re = r"[a-zA-Z][\w']*"
53 symbolicid_re = r"[!%&$#+\-/:<=>?@\\~`^|*]+"
55 # A character constant is a sequence of the form #s, where s is a string
56 # constant denoting a string of size one character. This setup just parses
57 # the entire string as either a String.Double or a String.Char (depending
58 # on the argument), even if the String.Char is an erroneous
59 # multiple-character string.
60 def stringy(whatkind):
61 return [
62 (r'[^"\\]', whatkind),
63 (r'\\[\\"abtnvfr]', String.Escape),
64 # Control-character notation is used for codes < 32,
65 # where \^@ == \000
66 (r'\\\^[\x40-\x5e]', String.Escape),
67 # Docs say 'decimal digits'
68 (r'\\[0-9]{3}', String.Escape),
69 (r'\\u[0-9a-fA-F]{4}', String.Escape),
70 (r'\\\s+\\', String.Interpol),
71 (r'"', whatkind, '#pop'),
72 ]
74 # Callbacks for distinguishing tokens and reserved words
75 def long_id_callback(self, match):
76 if match.group(1) in self.alphanumid_reserved:
77 token = Error
78 else:
79 token = Name.Namespace
80 yield match.start(1), token, match.group(1)
81 yield match.start(2), Punctuation, match.group(2)
83 def end_id_callback(self, match):
84 if match.group(1) in self.alphanumid_reserved:
85 token = Error
86 elif match.group(1) in self.symbolicid_reserved:
87 token = Error
88 else:
89 token = Name
90 yield match.start(1), token, match.group(1)
92 def id_callback(self, match):
93 str = match.group(1)
94 if str in self.alphanumid_reserved:
95 token = Keyword.Reserved
96 elif str in self.symbolicid_reserved:
97 token = Punctuation
98 else:
99 token = Name
100 yield match.start(1), token, str
102 tokens = {
103 # Whitespace and comments are (almost) everywhere
104 'whitespace': [
105 (r'\s+', Text),
106 (r'\(\*', Comment.Multiline, 'comment'),
107 ],
109 'delimiters': [
110 # This lexer treats these delimiters specially:
111 # Delimiters define scopes, and the scope is how the meaning of
112 # the `|' is resolved - is it a case/handle expression, or function
113 # definition by cases? (This is not how the Definition works, but
114 # it's how MLton behaves, see http://mlton.org/SMLNJDeviations)
115 (r'\(|\[|\{', Punctuation, 'main'),
116 (r'\)|\]|\}', Punctuation, '#pop'),
117 (r'\b(let|if|local)\b(?!\')', Keyword.Reserved, ('main', 'main')),
118 (r'\b(struct|sig|while)\b(?!\')', Keyword.Reserved, 'main'),
119 (r'\b(do|else|end|in|then)\b(?!\')', Keyword.Reserved, '#pop'),
120 ],
122 'core': [
123 # Punctuation that doesn't overlap symbolic identifiers
124 (r'({})'.format('|'.join(re.escape(z) for z in nonid_reserved)),
125 Punctuation),
127 # Special constants: strings, floats, numbers in decimal and hex
128 (r'#"', String.Char, 'char'),
129 (r'"', String.Double, 'string'),
130 (r'~?0x[0-9a-fA-F]+', Number.Hex),
131 (r'0wx[0-9a-fA-F]+', Number.Hex),
132 (r'0w\d+', Number.Integer),
133 (r'~?\d+\.\d+[eE]~?\d+', Number.Float),
134 (r'~?\d+\.\d+', Number.Float),
135 (r'~?\d+[eE]~?\d+', Number.Float),
136 (r'~?\d+', Number.Integer),
138 # Labels
139 (r'#\s*[1-9][0-9]*', Name.Label),
140 (rf'#\s*({alphanumid_re})', Name.Label),
141 (rf'#\s+({symbolicid_re})', Name.Label),
142 # Some reserved words trigger a special, local lexer state change
143 (r'\b(datatype|abstype)\b(?!\')', Keyword.Reserved, 'dname'),
144 (r'\b(exception)\b(?!\')', Keyword.Reserved, 'ename'),
145 (r'\b(functor|include|open|signature|structure)\b(?!\')',
146 Keyword.Reserved, 'sname'),
147 (r'\b(type|eqtype)\b(?!\')', Keyword.Reserved, 'tname'),
149 # Regular identifiers, long and otherwise
150 (r'\'[\w\']*', Name.Decorator),
151 (rf'({alphanumid_re})(\.)', long_id_callback, "dotted"),
152 (rf'({alphanumid_re})', id_callback),
153 (rf'({symbolicid_re})', id_callback),
154 ],
155 'dotted': [
156 (rf'({alphanumid_re})(\.)', long_id_callback),
157 (rf'({alphanumid_re})', end_id_callback, "#pop"),
158 (rf'({symbolicid_re})', end_id_callback, "#pop"),
159 (r'\s+', Error),
160 (r'\S+', Error),
161 ],
164 # Main parser (prevents errors in files that have scoping errors)
165 'root': [
166 default('main')
167 ],
169 # In this scope, I expect '|' to not be followed by a function name,
170 # and I expect 'and' to be followed by a binding site
171 'main': [
172 include('whitespace'),
174 # Special behavior of val/and/fun
175 (r'\b(val|and)\b(?!\')', Keyword.Reserved, 'vname'),
176 (r'\b(fun)\b(?!\')', Keyword.Reserved,
177 ('#pop', 'main-fun', 'fname')),
179 include('delimiters'),
180 include('core'),
181 (r'\S+', Error),
182 ],
184 # In this scope, I expect '|' and 'and' to be followed by a function
185 'main-fun': [
186 include('whitespace'),
188 (r'\s', Text),
189 (r'\(\*', Comment.Multiline, 'comment'),
191 # Special behavior of val/and/fun
192 (r'\b(fun|and)\b(?!\')', Keyword.Reserved, 'fname'),
193 (r'\b(val)\b(?!\')', Keyword.Reserved,
194 ('#pop', 'main', 'vname')),
196 # Special behavior of '|' and '|'-manipulating keywords
197 (r'\|', Punctuation, 'fname'),
198 (r'\b(case|handle)\b(?!\')', Keyword.Reserved,
199 ('#pop', 'main')),
201 include('delimiters'),
202 include('core'),
203 (r'\S+', Error),
204 ],
206 # Character and string parsers
207 'char': stringy(String.Char),
208 'string': stringy(String.Double),
210 'breakout': [
211 (r'(?=\b({})\b(?!\'))'.format('|'.join(alphanumid_reserved)), Text, '#pop'),
212 ],
214 # Dealing with what comes after module system keywords
215 'sname': [
216 include('whitespace'),
217 include('breakout'),
219 (rf'({alphanumid_re})', Name.Namespace),
220 default('#pop'),
221 ],
223 # Dealing with what comes after the 'fun' (or 'and' or '|') keyword
224 'fname': [
225 include('whitespace'),
226 (r'\'[\w\']*', Name.Decorator),
227 (r'\(', Punctuation, 'tyvarseq'),
229 (rf'({alphanumid_re})', Name.Function, '#pop'),
230 (rf'({symbolicid_re})', Name.Function, '#pop'),
232 # Ignore interesting function declarations like "fun (x + y) = ..."
233 default('#pop'),
234 ],
236 # Dealing with what comes after the 'val' (or 'and') keyword
237 'vname': [
238 include('whitespace'),
239 (r'\'[\w\']*', Name.Decorator),
240 (r'\(', Punctuation, 'tyvarseq'),
242 (rf'({alphanumid_re})(\s*)(=(?!{symbolicid_re}))',
243 bygroups(Name.Variable, Text, Punctuation), '#pop'),
244 (rf'({symbolicid_re})(\s*)(=(?!{symbolicid_re}))',
245 bygroups(Name.Variable, Text, Punctuation), '#pop'),
246 (rf'({alphanumid_re})', Name.Variable, '#pop'),
247 (rf'({symbolicid_re})', Name.Variable, '#pop'),
249 # Ignore interesting patterns like 'val (x, y)'
250 default('#pop'),
251 ],
253 # Dealing with what comes after the 'type' (or 'and') keyword
254 'tname': [
255 include('whitespace'),
256 include('breakout'),
258 (r'\'[\w\']*', Name.Decorator),
259 (r'\(', Punctuation, 'tyvarseq'),
260 (rf'=(?!{symbolicid_re})', Punctuation, ('#pop', 'typbind')),
262 (rf'({alphanumid_re})', Keyword.Type),
263 (rf'({symbolicid_re})', Keyword.Type),
264 (r'\S+', Error, '#pop'),
265 ],
267 # A type binding includes most identifiers
268 'typbind': [
269 include('whitespace'),
271 (r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')),
273 include('breakout'),
274 include('core'),
275 (r'\S+', Error, '#pop'),
276 ],
278 # Dealing with what comes after the 'datatype' (or 'and') keyword
279 'dname': [
280 include('whitespace'),
281 include('breakout'),
283 (r'\'[\w\']*', Name.Decorator),
284 (r'\(', Punctuation, 'tyvarseq'),
285 (r'(=)(\s*)(datatype)',
286 bygroups(Punctuation, Text, Keyword.Reserved), '#pop'),
287 (rf'=(?!{symbolicid_re})', Punctuation,
288 ('#pop', 'datbind', 'datcon')),
290 (rf'({alphanumid_re})', Keyword.Type),
291 (rf'({symbolicid_re})', Keyword.Type),
292 (r'\S+', Error, '#pop'),
293 ],
295 # common case - A | B | C of int
296 'datbind': [
297 include('whitespace'),
299 (r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'dname')),
300 (r'\b(withtype)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')),
301 (r'\b(of)\b(?!\')', Keyword.Reserved),
303 (rf'(\|)(\s*)({alphanumid_re})',
304 bygroups(Punctuation, Text, Name.Class)),
305 (rf'(\|)(\s+)({symbolicid_re})',
306 bygroups(Punctuation, Text, Name.Class)),
308 include('breakout'),
309 include('core'),
310 (r'\S+', Error),
311 ],
313 # Dealing with what comes after an exception
314 'ename': [
315 include('whitespace'),
317 (rf'(and\b)(\s+)({alphanumid_re})',
318 bygroups(Keyword.Reserved, Text, Name.Class)),
319 (rf'(and\b)(\s*)({symbolicid_re})',
320 bygroups(Keyword.Reserved, Text, Name.Class)),
321 (r'\b(of)\b(?!\')', Keyword.Reserved),
322 (rf'({alphanumid_re})|({symbolicid_re})', Name.Class),
324 default('#pop'),
325 ],
327 'datcon': [
328 include('whitespace'),
329 (rf'({alphanumid_re})', Name.Class, '#pop'),
330 (rf'({symbolicid_re})', Name.Class, '#pop'),
331 (r'\S+', Error, '#pop'),
332 ],
334 # Series of type variables
335 'tyvarseq': [
336 (r'\s', Text),
337 (r'\(\*', Comment.Multiline, 'comment'),
339 (r'\'[\w\']*', Name.Decorator),
340 (alphanumid_re, Name),
341 (r',', Punctuation),
342 (r'\)', Punctuation, '#pop'),
343 (symbolicid_re, Name),
344 ],
346 'comment': [
347 (r'[^(*)]', Comment.Multiline),
348 (r'\(\*', Comment.Multiline, '#push'),
349 (r'\*\)', Comment.Multiline, '#pop'),
350 (r'[(*)]', Comment.Multiline),
351 ],
352 }
355class OcamlLexer(RegexLexer):
356 """
357 For the OCaml language.
358 """
360 name = 'OCaml'
361 url = 'https://ocaml.org/'
362 aliases = ['ocaml']
363 filenames = ['*.ml', '*.mli', '*.mll', '*.mly']
364 mimetypes = ['text/x-ocaml']
365 version_added = '0.7'
367 keywords = (
368 'and', 'as', 'assert', 'begin', 'class', 'constraint', 'do', 'done',
369 'downto', 'else', 'end', 'exception', 'external', 'false',
370 'for', 'fun', 'function', 'functor', 'if', 'in', 'include',
371 'inherit', 'initializer', 'lazy', 'let', 'match', 'method',
372 'module', 'mutable', 'new', 'object', 'of', 'open', 'private',
373 'raise', 'rec', 'sig', 'struct', 'then', 'to', 'true', 'try',
374 'type', 'val', 'virtual', 'when', 'while', 'with',
375 )
376 keyopts = (
377 '!=', '#', '&', '&&', r'\(', r'\)', r'\*', r'\+', ',', '-',
378 r'-\.', '->', r'\.', r'\.\.', ':', '::', ':=', ':>', ';', ';;', '<',
379 '<-', '=', '>', '>]', r'>\}', r'\?', r'\?\?', r'\[', r'\[<', r'\[>',
380 r'\[\|', ']', '_', '`', r'\{', r'\{<', r'\|', r'\|]', r'\}', '~'
381 )
383 operators = r'[!$%&*+\./:<=>?@^|~-]'
384 word_operators = ('asr', 'land', 'lor', 'lsl', 'lxor', 'mod', 'or')
385 prefix_syms = r'[!?~]'
386 infix_syms = r'[=<>@^|&+\*/$%-]'
387 primitives = ('unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array')
389 tokens = {
390 'escape-sequence': [
391 (r'\\[\\"\'ntbr]', String.Escape),
392 (r'\\[0-9]{3}', String.Escape),
393 (r'\\x[0-9a-fA-F]{2}', String.Escape),
394 ],
395 'root': [
396 (r'\s+', Text),
397 (r'false|true|\(\)|\[\]', Name.Builtin.Pseudo),
398 (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'),
399 (r'\b([A-Z][\w\']*)', Name.Class),
400 (r'\(\*(?![)])', Comment, 'comment'),
401 (r'\b({})\b'.format('|'.join(keywords)), Keyword),
402 (r'({})'.format('|'.join(keyopts[::-1])), Operator),
403 (rf'({infix_syms}|{prefix_syms})?{operators}', Operator),
404 (r'\b({})\b'.format('|'.join(word_operators)), Operator.Word),
405 (r'\b({})\b'.format('|'.join(primitives)), Keyword.Type),
407 (r"[^\W\d][\w']*", Name),
409 (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),
410 (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex),
411 (r'0[oO][0-7][0-7_]*', Number.Oct),
412 (r'0[bB][01][01_]*', Number.Bin),
413 (r'\d[\d_]*', Number.Integer),
415 (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",
416 String.Char),
417 (r"'.'", String.Char),
418 (r"'", Keyword), # a stray quote is another syntax element
420 (r'"', String.Double, 'string'),
422 (r'[~?][a-z][\w\']*:', Name.Variable),
423 ],
424 'comment': [
425 (r'[^(*)]+', Comment),
426 (r'\(\*', Comment, '#push'),
427 (r'\*\)', Comment, '#pop'),
428 (r'[(*)]', Comment),
429 ],
430 'string': [
431 (r'[^\\"]+', String.Double),
432 include('escape-sequence'),
433 (r'\\\n', String.Double),
434 (r'"', String.Double, '#pop'),
435 ],
436 'dotted': [
437 (r'\s+', Text),
438 (r'\.', Punctuation),
439 (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace),
440 (r'[A-Z][\w\']*', Name.Class, '#pop'),
441 (r'[a-z_][\w\']*', Name, '#pop'),
442 default('#pop'),
443 ],
444 }
447class OpaLexer(RegexLexer):
448 """
449 Lexer for the Opa language.
450 """
452 name = 'Opa'
453 aliases = ['opa']
454 filenames = ['*.opa']
455 mimetypes = ['text/x-opa']
456 url = 'http://opalang.org'
457 version_added = '1.5'
459 # most of these aren't strictly keywords
460 # but if you color only real keywords, you might just
461 # as well not color anything
462 keywords = (
463 'and', 'as', 'begin', 'case', 'client', 'css', 'database', 'db', 'do',
464 'else', 'end', 'external', 'forall', 'function', 'if', 'import',
465 'match', 'module', 'or', 'package', 'parser', 'rec', 'server', 'then',
466 'type', 'val', 'with', 'xml_parser',
467 )
469 # matches both stuff and `stuff`
470 ident_re = r'(([a-zA-Z_]\w*)|(`[^`]*`))'
472 op_re = r'[.=\-<>,@~%/+?*&^!]'
473 punc_re = r'[()\[\],;|]' # '{' and '}' are treated elsewhere
474 # because they are also used for inserts
476 tokens = {
477 # copied from the caml lexer, should be adapted
478 'escape-sequence': [
479 (r'\\[\\"\'ntr}]', String.Escape),
480 (r'\\[0-9]{3}', String.Escape),
481 (r'\\x[0-9a-fA-F]{2}', String.Escape),
482 ],
484 # factorizing these rules, because they are inserted many times
485 'comments': [
486 (r'/\*', Comment, 'nested-comment'),
487 (r'//.*?$', Comment),
488 ],
489 'comments-and-spaces': [
490 include('comments'),
491 (r'\s+', Text),
492 ],
494 'root': [
495 include('comments-and-spaces'),
496 # keywords
497 (words(keywords, prefix=r'\b', suffix=r'\b'), Keyword),
498 # directives
499 # we could parse the actual set of directives instead of anything
500 # starting with @, but this is troublesome
501 # because it needs to be adjusted all the time
502 # and assuming we parse only sources that compile, it is useless
503 (r'@' + ident_re + r'\b', Name.Builtin.Pseudo),
505 # number literals
506 (r'-?.[\d]+([eE][+\-]?\d+)', Number.Float),
507 (r'-?\d+.\d*([eE][+\-]?\d+)', Number.Float),
508 (r'-?\d+[eE][+\-]?\d+', Number.Float),
509 (r'0[xX][\da-fA-F]+', Number.Hex),
510 (r'0[oO][0-7]+', Number.Oct),
511 (r'0[bB][01]+', Number.Bin),
512 (r'\d+', Number.Integer),
513 # color literals
514 (r'#[\da-fA-F]{3,6}', Number.Integer),
516 # string literals
517 (r'"', String.Double, 'string'),
518 # char literal, should be checked because this is the regexp from
519 # the caml lexer
520 (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2})|.)'",
521 String.Char),
523 # this is meant to deal with embedded exprs in strings
524 # every time we find a '}' we pop a state so that if we were
525 # inside a string, we are back in the string state
526 # as a consequence, we must also push a state every time we find a
527 # '{' or else we will have errors when parsing {} for instance
528 (r'\{', Operator, '#push'),
529 (r'\}', Operator, '#pop'),
531 # html literals
532 # this is a much more strict that the actual parser,
533 # since a<b would not be parsed as html
534 # but then again, the parser is way too lax, and we can't hope
535 # to have something as tolerant
536 (r'<(?=[a-zA-Z>])', String.Single, 'html-open-tag'),
538 # db path
539 # matching the '[_]' in '/a[_]' because it is a part
540 # of the syntax of the db path definition
541 # unfortunately, i don't know how to match the ']' in
542 # /a[1], so this is somewhat inconsistent
543 (r'[@?!]?(/\w+)+(\[_\])?', Name.Variable),
544 # putting the same color on <- as on db path, since
545 # it can be used only to mean Db.write
546 (r'<-(?!'+op_re+r')', Name.Variable),
548 # 'modules'
549 # although modules are not distinguished by their names as in caml
550 # the standard library seems to follow the convention that modules
551 # only area capitalized
552 (r'\b([A-Z]\w*)(?=\.)', Name.Namespace),
554 # operators
555 # = has a special role because this is the only
556 # way to syntactic distinguish binding constructions
557 # unfortunately, this colors the equal in {x=2} too
558 (r'=(?!'+op_re+r')', Keyword),
559 (rf'({op_re})+', Operator),
560 (rf'({punc_re})+', Operator),
562 # coercions
563 (r':', Operator, 'type'),
564 # type variables
565 # we need this rule because we don't parse specially type
566 # definitions so in "type t('a) = ...", "'a" is parsed by 'root'
567 ("'"+ident_re, Keyword.Type),
569 # id literal, #something, or #{expr}
570 (r'#'+ident_re, String.Single),
571 (r'#(?=\{)', String.Single),
573 # identifiers
574 # this avoids to color '2' in 'a2' as an integer
575 (ident_re, Text),
577 # default, not sure if that is needed or not
578 # (r'.', Text),
579 ],
581 # it is quite painful to have to parse types to know where they end
582 # this is the general rule for a type
583 # a type is either:
584 # * -> ty
585 # * type-with-slash
586 # * type-with-slash -> ty
587 # * type-with-slash (, type-with-slash)+ -> ty
588 #
589 # the code is pretty funky in here, but this code would roughly
590 # translate in caml to:
591 # let rec type stream =
592 # match stream with
593 # | [< "->"; stream >] -> type stream
594 # | [< ""; stream >] ->
595 # type_with_slash stream
596 # type_lhs_1 stream;
597 # and type_1 stream = ...
598 'type': [
599 include('comments-and-spaces'),
600 (r'->', Keyword.Type),
601 default(('#pop', 'type-lhs-1', 'type-with-slash')),
602 ],
604 # parses all the atomic or closed constructions in the syntax of type
605 # expressions: record types, tuple types, type constructors, basic type
606 # and type variables
607 'type-1': [
608 include('comments-and-spaces'),
609 (r'\(', Keyword.Type, ('#pop', 'type-tuple')),
610 (r'~?\{', Keyword.Type, ('#pop', 'type-record')),
611 (ident_re+r'\(', Keyword.Type, ('#pop', 'type-tuple')),
612 (ident_re, Keyword.Type, '#pop'),
613 ("'"+ident_re, Keyword.Type),
614 # this case is not in the syntax but sometimes
615 # we think we are parsing types when in fact we are parsing
616 # some css, so we just pop the states until we get back into
617 # the root state
618 default('#pop'),
619 ],
621 # type-with-slash is either:
622 # * type-1
623 # * type-1 (/ type-1)+
624 'type-with-slash': [
625 include('comments-and-spaces'),
626 default(('#pop', 'slash-type-1', 'type-1')),
627 ],
628 'slash-type-1': [
629 include('comments-and-spaces'),
630 ('/', Keyword.Type, ('#pop', 'type-1')),
631 # same remark as above
632 default('#pop'),
633 ],
635 # we go in this state after having parsed a type-with-slash
636 # while trying to parse a type
637 # and at this point we must determine if we are parsing an arrow
638 # type (in which case we must continue parsing) or not (in which
639 # case we stop)
640 'type-lhs-1': [
641 include('comments-and-spaces'),
642 (r'->', Keyword.Type, ('#pop', 'type')),
643 (r'(?=,)', Keyword.Type, ('#pop', 'type-arrow')),
644 default('#pop'),
645 ],
646 'type-arrow': [
647 include('comments-and-spaces'),
648 # the look ahead here allows to parse f(x : int, y : float -> truc)
649 # correctly
650 (r',(?=[^:]*?->)', Keyword.Type, 'type-with-slash'),
651 (r'->', Keyword.Type, ('#pop', 'type')),
652 # same remark as above
653 default('#pop'),
654 ],
656 # no need to do precise parsing for tuples and records
657 # because they are closed constructions, so we can simply
658 # find the closing delimiter
659 # note that this function would be not work if the source
660 # contained identifiers like `{)` (although it could be patched
661 # to support it)
662 'type-tuple': [
663 include('comments-and-spaces'),
664 (r'[^()/*]+', Keyword.Type),
665 (r'[/*]', Keyword.Type),
666 (r'\(', Keyword.Type, '#push'),
667 (r'\)', Keyword.Type, '#pop'),
668 ],
669 'type-record': [
670 include('comments-and-spaces'),
671 (r'[^{}/*]+', Keyword.Type),
672 (r'[/*]', Keyword.Type),
673 (r'\{', Keyword.Type, '#push'),
674 (r'\}', Keyword.Type, '#pop'),
675 ],
677 # 'type-tuple': [
678 # include('comments-and-spaces'),
679 # (r'\)', Keyword.Type, '#pop'),
680 # default(('#pop', 'type-tuple-1', 'type-1')),
681 # ],
682 # 'type-tuple-1': [
683 # include('comments-and-spaces'),
684 # (r',?\s*\)', Keyword.Type, '#pop'), # ,) is a valid end of tuple, in (1,)
685 # (r',', Keyword.Type, 'type-1'),
686 # ],
687 # 'type-record':[
688 # include('comments-and-spaces'),
689 # (r'\}', Keyword.Type, '#pop'),
690 # (r'~?(?:\w+|`[^`]*`)', Keyword.Type, 'type-record-field-expr'),
691 # ],
692 # 'type-record-field-expr': [
693 #
694 # ],
696 'nested-comment': [
697 (r'[^/*]+', Comment),
698 (r'/\*', Comment, '#push'),
699 (r'\*/', Comment, '#pop'),
700 (r'[/*]', Comment),
701 ],
703 # the copy pasting between string and single-string
704 # is kinda sad. Is there a way to avoid that??
705 'string': [
706 (r'[^\\"{]+', String.Double),
707 (r'"', String.Double, '#pop'),
708 (r'\{', Operator, 'root'),
709 include('escape-sequence'),
710 ],
711 'single-string': [
712 (r'[^\\\'{]+', String.Double),
713 (r'\'', String.Double, '#pop'),
714 (r'\{', Operator, 'root'),
715 include('escape-sequence'),
716 ],
718 # all the html stuff
719 # can't really reuse some existing html parser
720 # because we must be able to parse embedded expressions
722 # we are in this state after someone parsed the '<' that
723 # started the html literal
724 'html-open-tag': [
725 (r'[\w\-:]+', String.Single, ('#pop', 'html-attr')),
726 (r'>', String.Single, ('#pop', 'html-content')),
727 ],
729 # we are in this state after someone parsed the '</' that
730 # started the end of the closing tag
731 'html-end-tag': [
732 # this is a star, because </> is allowed
733 (r'[\w\-:]*>', String.Single, '#pop'),
734 ],
736 # we are in this state after having parsed '<ident(:ident)?'
737 # we thus parse a possibly empty list of attributes
738 'html-attr': [
739 (r'\s+', Text),
740 (r'[\w\-:]+=', String.Single, 'html-attr-value'),
741 (r'/>', String.Single, '#pop'),
742 (r'>', String.Single, ('#pop', 'html-content')),
743 ],
745 'html-attr-value': [
746 (r"'", String.Single, ('#pop', 'single-string')),
747 (r'"', String.Single, ('#pop', 'string')),
748 (r'#'+ident_re, String.Single, '#pop'),
749 (r'#(?=\{)', String.Single, ('#pop', 'root')),
750 (r'[^"\'{`=<>]+', String.Single, '#pop'),
751 (r'\{', Operator, ('#pop', 'root')), # this is a tail call!
752 ],
754 # we should probably deal with '\' escapes here
755 'html-content': [
756 (r'<!--', Comment, 'html-comment'),
757 (r'</', String.Single, ('#pop', 'html-end-tag')),
758 (r'<', String.Single, 'html-open-tag'),
759 (r'\{', Operator, 'root'),
760 (r'[^<{]+', String.Single),
761 ],
763 'html-comment': [
764 (r'-->', Comment, '#pop'),
765 (r'[^\-]+|-', Comment),
766 ],
767 }
770class ReasonLexer(RegexLexer):
771 """
772 For the ReasonML language.
773 """
775 name = 'ReasonML'
776 url = 'https://reasonml.github.io/'
777 aliases = ['reasonml', 'reason']
778 filenames = ['*.re', '*.rei']
779 mimetypes = ['text/x-reasonml']
780 version_added = '2.6'
782 keywords = (
783 'as', 'assert', 'begin', 'class', 'constraint', 'do', 'done', 'downto',
784 'else', 'end', 'exception', 'external', 'false', 'for', 'fun', 'esfun',
785 'function', 'functor', 'if', 'in', 'include', 'inherit', 'initializer', 'lazy',
786 'let', 'switch', 'module', 'pub', 'mutable', 'new', 'nonrec', 'object', 'of',
787 'open', 'pri', 'rec', 'sig', 'struct', 'then', 'to', 'true', 'try',
788 'type', 'val', 'virtual', 'when', 'while', 'with',
789 )
790 keyopts = (
791 '!=', '#', '&', '&&', r'\(', r'\)', r'\*', r'\+', ',', '-',
792 r'-\.', '=>', r'\.', r'\.\.', r'\.\.\.', ':', '::', ':=', ':>', ';', ';;', '<',
793 '<-', '=', '>', '>]', r'>\}', r'\?', r'\?\?', r'\[', r'\[<', r'\[>',
794 r'\[\|', ']', '_', '`', r'\{', r'\{<', r'\|', r'\|\|', r'\|]', r'\}', '~'
795 )
797 operators = r'[!$%&*+\./:<=>?@^|~-]'
798 word_operators = ('and', 'asr', 'land', 'lor', 'lsl', 'lsr', 'lxor', 'mod', 'or')
799 prefix_syms = r'[!?~]'
800 infix_syms = r'[=<>@^|&+\*/$%-]'
801 primitives = ('unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array')
803 tokens = {
804 'escape-sequence': [
805 (r'\\[\\"\'ntbr]', String.Escape),
806 (r'\\[0-9]{3}', String.Escape),
807 (r'\\x[0-9a-fA-F]{2}', String.Escape),
808 ],
809 'root': [
810 (r'\s+', Text),
811 (r'false|true|\(\)|\[\]', Name.Builtin.Pseudo),
812 (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'),
813 (r'\b([A-Z][\w\']*)', Name.Class),
814 (r'//.*?\n', Comment.Single),
815 (r'\/\*(?!/)', Comment.Multiline, 'comment'),
816 (r'\b({})\b'.format('|'.join(keywords)), Keyword),
817 (r'({})'.format('|'.join(keyopts[::-1])), Operator.Word),
818 (rf'({infix_syms}|{prefix_syms})?{operators}', Operator),
819 (r'\b({})\b'.format('|'.join(word_operators)), Operator.Word),
820 (r'\b({})\b'.format('|'.join(primitives)), Keyword.Type),
822 (r"[^\W\d][\w']*", Name),
824 (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),
825 (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex),
826 (r'0[oO][0-7][0-7_]*', Number.Oct),
827 (r'0[bB][01][01_]*', Number.Bin),
828 (r'\d[\d_]*', Number.Integer),
830 (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",
831 String.Char),
832 (r"'.'", String.Char),
833 (r"'", Keyword),
835 (r'"', String.Double, 'string'),
837 (r'[~?][a-z][\w\']*:', Name.Variable),
838 ],
839 'comment': [
840 (r'[^/*]+', Comment.Multiline),
841 (r'\/\*', Comment.Multiline, '#push'),
842 (r'\*\/', Comment.Multiline, '#pop'),
843 (r'\*', Comment.Multiline),
844 ],
845 'string': [
846 (r'[^\\"]+', String.Double),
847 include('escape-sequence'),
848 (r'\\\n', String.Double),
849 (r'"', String.Double, '#pop'),
850 ],
851 'dotted': [
852 (r'\s+', Text),
853 (r'\.', Punctuation),
854 (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace),
855 (r'[A-Z][\w\']*', Name.Class, '#pop'),
856 (r'[a-z_][\w\']*', Name, '#pop'),
857 default('#pop'),
858 ],
859 }
862class FStarLexer(RegexLexer):
863 """
864 For the F* language.
865 """
867 name = 'FStar'
868 url = 'https://www.fstar-lang.org/'
869 aliases = ['fstar']
870 filenames = ['*.fst', '*.fsti']
871 mimetypes = ['text/x-fstar']
872 version_added = '2.7'
874 keywords = (
875 'abstract', 'attributes', 'noeq', 'unopteq', 'and'
876 'begin', 'by', 'default', 'effect', 'else', 'end', 'ensures',
877 'exception', 'exists', 'false', 'forall', 'fun', 'function', 'if',
878 'in', 'include', 'inline', 'inline_for_extraction', 'irreducible',
879 'logic', 'match', 'module', 'mutable', 'new', 'new_effect', 'noextract',
880 'of', 'open', 'opaque', 'private', 'range_of', 'reifiable',
881 'reify', 'reflectable', 'requires', 'set_range_of', 'sub_effect',
882 'synth', 'then', 'total', 'true', 'try', 'type', 'unfold', 'unfoldable',
883 'val', 'when', 'with', 'not'
884 )
885 decl_keywords = ('let', 'rec')
886 assume_keywords = ('assume', 'admit', 'assert', 'calc')
887 keyopts = (
888 r'~', r'-', r'/\\', r'\\/', r'<:', r'<@', r'\(\|', r'\|\)', r'#', r'u#',
889 r'&', r'\(', r'\)', r'\(\)', r',', r'~>', r'->', r'<-', r'<--', r'<==>',
890 r'==>', r'\.', r'\?', r'\?\.', r'\.\[', r'\.\(', r'\.\(\|', r'\.\[\|',
891 r'\{:pattern', r':', r'::', r':=', r';', r';;', r'=', r'%\[', r'!\{',
892 r'\[', r'\[@', r'\[\|', r'\|>', r'\]', r'\|\]', r'\{', r'\|', r'\}', r'\$'
893 )
895 operators = r'[!$%&*+\./:<=>?@^|~-]'
896 prefix_syms = r'[!?~]'
897 infix_syms = r'[=<>@^|&+\*/$%-]'
898 primitives = ('unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array')
900 tokens = {
901 'escape-sequence': [
902 (r'\\[\\"\'ntbr]', String.Escape),
903 (r'\\[0-9]{3}', String.Escape),
904 (r'\\x[0-9a-fA-F]{2}', String.Escape),
905 ],
906 'root': [
907 (r'\s+', Text),
908 (r'false|true|False|True|\(\)|\[\]', Name.Builtin.Pseudo),
909 (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'),
910 (r'\b([A-Z][\w\']*)', Name.Class),
911 (r'\(\*(?![)])', Comment, 'comment'),
912 (r'\/\/.+$', Comment),
913 (r'\b({})\b'.format('|'.join(keywords)), Keyword),
914 (r'\b({})\b'.format('|'.join(assume_keywords)), Name.Exception),
915 (r'\b({})\b'.format('|'.join(decl_keywords)), Keyword.Declaration),
916 (r'({})'.format('|'.join(keyopts[::-1])), Operator),
917 (rf'({infix_syms}|{prefix_syms})?{operators}', Operator),
918 (r'\b({})\b'.format('|'.join(primitives)), Keyword.Type),
920 (r"[^\W\d][\w']*", Name),
922 (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),
923 (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex),
924 (r'0[oO][0-7][0-7_]*', Number.Oct),
925 (r'0[bB][01][01_]*', Number.Bin),
926 (r'\d[\d_]*', Number.Integer),
928 (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",
929 String.Char),
930 (r"'.'", String.Char),
931 (r"'", Keyword), # a stray quote is another syntax element
932 (r"\`([\w\'.]+)\`", Operator.Word), # for infix applications
933 (r"\`", Keyword), # for quoting
934 (r'"', String.Double, 'string'),
936 (r'[~?][a-z][\w\']*:', Name.Variable),
937 ],
938 'comment': [
939 (r'[^(*)]+', Comment),
940 (r'\(\*', Comment, '#push'),
941 (r'\*\)', Comment, '#pop'),
942 (r'[(*)]', Comment),
943 ],
944 'string': [
945 (r'[^\\"]+', String.Double),
946 include('escape-sequence'),
947 (r'\\\n', String.Double),
948 (r'"', String.Double, '#pop'),
949 ],
950 'dotted': [
951 (r'\s+', Text),
952 (r'\.', Punctuation),
953 (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace),
954 (r'[A-Z][\w\']*', Name.Class, '#pop'),
955 (r'[a-z_][\w\']*', Name, '#pop'),
956 default('#pop'),
957 ],
958 }