1"""
2 pygments.lexers.ml
3 ~~~~~~~~~~~~~~~~~~
4
5 Lexers for ML family languages.
6
7 :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
10
11import re
12
13from pygments.lexer import RegexLexer, include, bygroups, default, words
14from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
15 Number, Punctuation, Error
16
17__all__ = ['SMLLexer', 'OcamlLexer', 'OpaLexer', 'ReasonLexer', 'FStarLexer']
18
19
20class SMLLexer(RegexLexer):
21 """
22 For the Standard ML language.
23 """
24
25 name = 'Standard ML'
26 aliases = ['sml']
27 filenames = ['*.sml', '*.sig', '*.fun']
28 mimetypes = ['text/x-standardml', 'application/x-standardml']
29 url = 'https://en.wikipedia.org/wiki/Standard_ML'
30 version_added = '1.5'
31
32 alphanumid_reserved = {
33 # Core
34 'abstype', 'and', 'andalso', 'as', 'case', 'datatype', 'do', 'else',
35 'end', 'exception', 'fn', 'fun', 'handle', 'if', 'in', 'infix',
36 'infixr', 'let', 'local', 'nonfix', 'of', 'op', 'open', 'orelse',
37 'raise', 'rec', 'then', 'type', 'val', 'with', 'withtype', 'while',
38 # Modules
39 'eqtype', 'functor', 'include', 'sharing', 'sig', 'signature',
40 'struct', 'structure', 'where',
41 }
42
43 symbolicid_reserved = {
44 # Core
45 ':', r'\|', '=', '=>', '->', '#',
46 # Modules
47 ':>',
48 }
49
50 nonid_reserved = {'(', ')', '[', ']', '{', '}', ',', ';', '...', '_'}
51
52 alphanumid_re = r"[a-zA-Z][\w']*"
53 symbolicid_re = r"[!%&$#+\-/:<=>?@\\~`^|*]+"
54
55 # A character constant is a sequence of the form #s, where s is a string
56 # constant denoting a string of size one character. This setup just parses
57 # the entire string as either a String.Double or a String.Char (depending
58 # on the argument), even if the String.Char is an erroneous
59 # multiple-character string.
60 def stringy(whatkind):
61 return [
62 (r'[^"\\]', whatkind),
63 (r'\\[\\"abtnvfr]', String.Escape),
64 # Control-character notation is used for codes < 32,
65 # where \^@ == \000
66 (r'\\\^[\x40-\x5e]', String.Escape),
67 # Docs say 'decimal digits'
68 (r'\\[0-9]{3}', String.Escape),
69 (r'\\u[0-9a-fA-F]{4}', String.Escape),
70 (r'\\\s+\\', String.Interpol),
71 (r'"', whatkind, '#pop'),
72 ]
73
74 # Callbacks for distinguishing tokens and reserved words
75 def long_id_callback(self, match):
76 if match.group(1) in self.alphanumid_reserved:
77 token = Error
78 else:
79 token = Name.Namespace
80 yield match.start(1), token, match.group(1)
81 yield match.start(2), Punctuation, match.group(2)
82
83 def end_id_callback(self, match):
84 if match.group(1) in self.alphanumid_reserved:
85 token = Error
86 elif match.group(1) in self.symbolicid_reserved:
87 token = Error
88 else:
89 token = Name
90 yield match.start(1), token, match.group(1)
91
92 def id_callback(self, match):
93 str = match.group(1)
94 if str in self.alphanumid_reserved:
95 token = Keyword.Reserved
96 elif str in self.symbolicid_reserved:
97 token = Punctuation
98 else:
99 token = Name
100 yield match.start(1), token, str
101
102 tokens = {
103 # Whitespace and comments are (almost) everywhere
104 'whitespace': [
105 (r'\s+', Text),
106 (r'\(\*', Comment.Multiline, 'comment'),
107 ],
108
109 'delimiters': [
110 # This lexer treats these delimiters specially:
111 # Delimiters define scopes, and the scope is how the meaning of
112 # the `|' is resolved - is it a case/handle expression, or function
113 # definition by cases? (This is not how the Definition works, but
114 # it's how MLton behaves, see http://mlton.org/SMLNJDeviations)
115 (r'\(|\[|\{', Punctuation, 'main'),
116 (r'\)|\]|\}', Punctuation, '#pop'),
117 (r'\b(let|if|local)\b(?!\')', Keyword.Reserved, ('main', 'main')),
118 (r'\b(struct|sig|while)\b(?!\')', Keyword.Reserved, 'main'),
119 (r'\b(do|else|end|in|then)\b(?!\')', Keyword.Reserved, '#pop'),
120 ],
121
122 'core': [
123 # Punctuation that doesn't overlap symbolic identifiers
124 (r'({})'.format('|'.join(re.escape(z) for z in nonid_reserved)),
125 Punctuation),
126
127 # Special constants: strings, floats, numbers in decimal and hex
128 (r'#"', String.Char, 'char'),
129 (r'"', String.Double, 'string'),
130 (r'~?0x[0-9a-fA-F]+', Number.Hex),
131 (r'0wx[0-9a-fA-F]+', Number.Hex),
132 (r'0w\d+', Number.Integer),
133 (r'~?\d+\.\d+[eE]~?\d+', Number.Float),
134 (r'~?\d+\.\d+', Number.Float),
135 (r'~?\d+[eE]~?\d+', Number.Float),
136 (r'~?\d+', Number.Integer),
137
138 # Labels
139 (r'#\s*[1-9][0-9]*', Name.Label),
140 (rf'#\s*({alphanumid_re})', Name.Label),
141 (rf'#\s+({symbolicid_re})', Name.Label),
142 # Some reserved words trigger a special, local lexer state change
143 (r'\b(datatype|abstype)\b(?!\')', Keyword.Reserved, 'dname'),
144 (r'\b(exception)\b(?!\')', Keyword.Reserved, 'ename'),
145 (r'\b(functor|include|open|signature|structure)\b(?!\')',
146 Keyword.Reserved, 'sname'),
147 (r'\b(type|eqtype)\b(?!\')', Keyword.Reserved, 'tname'),
148
149 # Regular identifiers, long and otherwise
150 (r'\'[\w\']*', Name.Decorator),
151 (rf'({alphanumid_re})(\.)', long_id_callback, "dotted"),
152 (rf'({alphanumid_re})', id_callback),
153 (rf'({symbolicid_re})', id_callback),
154 ],
155 'dotted': [
156 (rf'({alphanumid_re})(\.)', long_id_callback),
157 (rf'({alphanumid_re})', end_id_callback, "#pop"),
158 (rf'({symbolicid_re})', end_id_callback, "#pop"),
159 (r'\s+', Error),
160 (r'\S+', Error),
161 ],
162
163
164 # Main parser (prevents errors in files that have scoping errors)
165 'root': [
166 default('main')
167 ],
168
169 # In this scope, I expect '|' to not be followed by a function name,
170 # and I expect 'and' to be followed by a binding site
171 'main': [
172 include('whitespace'),
173
174 # Special behavior of val/and/fun
175 (r'\b(val|and)\b(?!\')', Keyword.Reserved, 'vname'),
176 (r'\b(fun)\b(?!\')', Keyword.Reserved,
177 ('#pop', 'main-fun', 'fname')),
178
179 include('delimiters'),
180 include('core'),
181 (r'\S+', Error),
182 ],
183
184 # In this scope, I expect '|' and 'and' to be followed by a function
185 'main-fun': [
186 include('whitespace'),
187
188 (r'\s', Text),
189 (r'\(\*', Comment.Multiline, 'comment'),
190
191 # Special behavior of val/and/fun
192 (r'\b(fun|and)\b(?!\')', Keyword.Reserved, 'fname'),
193 (r'\b(val)\b(?!\')', Keyword.Reserved,
194 ('#pop', 'main', 'vname')),
195
196 # Special behavior of '|' and '|'-manipulating keywords
197 (r'\|', Punctuation, 'fname'),
198 (r'\b(case|handle)\b(?!\')', Keyword.Reserved,
199 ('#pop', 'main')),
200
201 include('delimiters'),
202 include('core'),
203 (r'\S+', Error),
204 ],
205
206 # Character and string parsers
207 'char': stringy(String.Char),
208 'string': stringy(String.Double),
209
210 'breakout': [
211 (r'(?=\b({})\b(?!\'))'.format('|'.join(alphanumid_reserved)), Text, '#pop'),
212 ],
213
214 # Dealing with what comes after module system keywords
215 'sname': [
216 include('whitespace'),
217 include('breakout'),
218
219 (rf'({alphanumid_re})', Name.Namespace),
220 default('#pop'),
221 ],
222
223 # Dealing with what comes after the 'fun' (or 'and' or '|') keyword
224 'fname': [
225 include('whitespace'),
226 (r'\'[\w\']*', Name.Decorator),
227 (r'\(', Punctuation, 'tyvarseq'),
228
229 (rf'({alphanumid_re})', Name.Function, '#pop'),
230 (rf'({symbolicid_re})', Name.Function, '#pop'),
231
232 # Ignore interesting function declarations like "fun (x + y) = ..."
233 default('#pop'),
234 ],
235
236 # Dealing with what comes after the 'val' (or 'and') keyword
237 'vname': [
238 include('whitespace'),
239 (r'\'[\w\']*', Name.Decorator),
240 (r'\(', Punctuation, 'tyvarseq'),
241
242 (rf'({alphanumid_re})(\s*)(=(?!{symbolicid_re}))',
243 bygroups(Name.Variable, Text, Punctuation), '#pop'),
244 (rf'({symbolicid_re})(\s*)(=(?!{symbolicid_re}))',
245 bygroups(Name.Variable, Text, Punctuation), '#pop'),
246 (rf'({alphanumid_re})', Name.Variable, '#pop'),
247 (rf'({symbolicid_re})', Name.Variable, '#pop'),
248
249 # Ignore interesting patterns like 'val (x, y)'
250 default('#pop'),
251 ],
252
253 # Dealing with what comes after the 'type' (or 'and') keyword
254 'tname': [
255 include('whitespace'),
256 include('breakout'),
257
258 (r'\'[\w\']*', Name.Decorator),
259 (r'\(', Punctuation, 'tyvarseq'),
260 (rf'=(?!{symbolicid_re})', Punctuation, ('#pop', 'typbind')),
261
262 (rf'({alphanumid_re})', Keyword.Type),
263 (rf'({symbolicid_re})', Keyword.Type),
264 (r'\S+', Error, '#pop'),
265 ],
266
267 # A type binding includes most identifiers
268 'typbind': [
269 include('whitespace'),
270
271 (r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')),
272
273 include('breakout'),
274 include('core'),
275 (r'\S+', Error, '#pop'),
276 ],
277
278 # Dealing with what comes after the 'datatype' (or 'and') keyword
279 'dname': [
280 include('whitespace'),
281 include('breakout'),
282
283 (r'\'[\w\']*', Name.Decorator),
284 (r'\(', Punctuation, 'tyvarseq'),
285 (r'(=)(\s*)(datatype)',
286 bygroups(Punctuation, Text, Keyword.Reserved), '#pop'),
287 (rf'=(?!{symbolicid_re})', Punctuation,
288 ('#pop', 'datbind', 'datcon')),
289
290 (rf'({alphanumid_re})', Keyword.Type),
291 (rf'({symbolicid_re})', Keyword.Type),
292 (r'\S+', Error, '#pop'),
293 ],
294
295 # common case - A | B | C of int
296 'datbind': [
297 include('whitespace'),
298
299 (r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'dname')),
300 (r'\b(withtype)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')),
301 (r'\b(of)\b(?!\')', Keyword.Reserved),
302
303 (rf'(\|)(\s*)({alphanumid_re})',
304 bygroups(Punctuation, Text, Name.Class)),
305 (rf'(\|)(\s+)({symbolicid_re})',
306 bygroups(Punctuation, Text, Name.Class)),
307
308 include('breakout'),
309 include('core'),
310 (r'\S+', Error),
311 ],
312
313 # Dealing with what comes after an exception
314 'ename': [
315 include('whitespace'),
316
317 (rf'(and\b)(\s+)({alphanumid_re})',
318 bygroups(Keyword.Reserved, Text, Name.Class)),
319 (rf'(and\b)(\s*)({symbolicid_re})',
320 bygroups(Keyword.Reserved, Text, Name.Class)),
321 (r'\b(of)\b(?!\')', Keyword.Reserved),
322 (rf'({alphanumid_re})|({symbolicid_re})', Name.Class),
323
324 default('#pop'),
325 ],
326
327 'datcon': [
328 include('whitespace'),
329 (rf'({alphanumid_re})', Name.Class, '#pop'),
330 (rf'({symbolicid_re})', Name.Class, '#pop'),
331 (r'\S+', Error, '#pop'),
332 ],
333
334 # Series of type variables
335 'tyvarseq': [
336 (r'\s', Text),
337 (r'\(\*', Comment.Multiline, 'comment'),
338
339 (r'\'[\w\']*', Name.Decorator),
340 (alphanumid_re, Name),
341 (r',', Punctuation),
342 (r'\)', Punctuation, '#pop'),
343 (symbolicid_re, Name),
344 ],
345
346 'comment': [
347 (r'[^(*)]', Comment.Multiline),
348 (r'\(\*', Comment.Multiline, '#push'),
349 (r'\*\)', Comment.Multiline, '#pop'),
350 (r'[(*)]', Comment.Multiline),
351 ],
352 }
353
354
355class OcamlLexer(RegexLexer):
356 """
357 For the OCaml language.
358 """
359
360 name = 'OCaml'
361 url = 'https://ocaml.org/'
362 aliases = ['ocaml']
363 filenames = ['*.ml', '*.mli', '*.mll', '*.mly']
364 mimetypes = ['text/x-ocaml']
365 version_added = '0.7'
366
367 keywords = (
368 'and', 'as', 'assert', 'begin', 'class', 'constraint', 'do', 'done',
369 'downto', 'else', 'end', 'exception', 'external', 'false',
370 'for', 'fun', 'function', 'functor', 'if', 'in', 'include',
371 'inherit', 'initializer', 'lazy', 'let', 'match', 'method',
372 'module', 'mutable', 'new', 'object', 'of', 'open', 'private',
373 'raise', 'rec', 'sig', 'struct', 'then', 'to', 'true', 'try',
374 'type', 'val', 'virtual', 'when', 'while', 'with',
375 )
376 keyopts = (
377 '!=', '#', '&', '&&', r'\(', r'\)', r'\*', r'\+', ',', '-',
378 r'-\.', '->', r'\.', r'\.\.', ':', '::', ':=', ':>', ';', ';;', '<',
379 '<-', '=', '>', '>]', r'>\}', r'\?', r'\?\?', r'\[', r'\[<', r'\[>',
380 r'\[\|', ']', '_', '`', r'\{', r'\{<', r'\|', r'\|]', r'\}', '~'
381 )
382
383 operators = r'[!$%&*+\./:<=>?@^|~-]'
384 word_operators = ('asr', 'land', 'lor', 'lsl', 'lxor', 'mod', 'or')
385 prefix_syms = r'[!?~]'
386 infix_syms = r'[=<>@^|&+\*/$%-]'
387 primitives = ('unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array')
388
389 tokens = {
390 'escape-sequence': [
391 (r'\\[\\"\'ntbr]', String.Escape),
392 (r'\\[0-9]{3}', String.Escape),
393 (r'\\x[0-9a-fA-F]{2}', String.Escape),
394 ],
395 'root': [
396 (r'\s+', Text),
397 (r'false|true|\(\)|\[\]', Name.Builtin.Pseudo),
398 (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'),
399 (r'\b([A-Z][\w\']*)', Name.Class),
400 (r'\(\*(?![)])', Comment, 'comment'),
401 (r'\b({})\b'.format('|'.join(keywords)), Keyword),
402 (r'({})'.format('|'.join(keyopts[::-1])), Operator),
403 (rf'({infix_syms}|{prefix_syms})?{operators}', Operator),
404 (r'\b({})\b'.format('|'.join(word_operators)), Operator.Word),
405 (r'\b({})\b'.format('|'.join(primitives)), Keyword.Type),
406
407 (r"[^\W\d][\w']*", Name),
408
409 (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),
410 (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex),
411 (r'0[oO][0-7][0-7_]*', Number.Oct),
412 (r'0[bB][01][01_]*', Number.Bin),
413 (r'\d[\d_]*', Number.Integer),
414
415 (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",
416 String.Char),
417 (r"'.'", String.Char),
418 (r"'", Keyword), # a stray quote is another syntax element
419
420 (r'"', String.Double, 'string'),
421
422 (r'[~?][a-z][\w\']*:', Name.Variable),
423 ],
424 'comment': [
425 (r'[^(*)]+', Comment),
426 (r'\(\*', Comment, '#push'),
427 (r'\*\)', Comment, '#pop'),
428 (r'[(*)]', Comment),
429 ],
430 'string': [
431 (r'[^\\"]+', String.Double),
432 include('escape-sequence'),
433 (r'\\\n', String.Double),
434 (r'"', String.Double, '#pop'),
435 ],
436 'dotted': [
437 (r'\s+', Text),
438 (r'\.', Punctuation),
439 (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace),
440 (r'[A-Z][\w\']*', Name.Class, '#pop'),
441 (r'[a-z_][\w\']*', Name, '#pop'),
442 default('#pop'),
443 ],
444 }
445
446
447class OpaLexer(RegexLexer):
448 """
449 Lexer for the Opa language.
450 """
451
452 name = 'Opa'
453 aliases = ['opa']
454 filenames = ['*.opa']
455 mimetypes = ['text/x-opa']
456 url = 'http://opalang.org'
457 version_added = '1.5'
458
459 # most of these aren't strictly keywords
460 # but if you color only real keywords, you might just
461 # as well not color anything
462 keywords = (
463 'and', 'as', 'begin', 'case', 'client', 'css', 'database', 'db', 'do',
464 'else', 'end', 'external', 'forall', 'function', 'if', 'import',
465 'match', 'module', 'or', 'package', 'parser', 'rec', 'server', 'then',
466 'type', 'val', 'with', 'xml_parser',
467 )
468
469 # matches both stuff and `stuff`
470 ident_re = r'(([a-zA-Z_]\w*)|(`[^`]*`))'
471
472 op_re = r'[.=\-<>,@~%/+?*&^!]'
473 punc_re = r'[()\[\],;|]' # '{' and '}' are treated elsewhere
474 # because they are also used for inserts
475
476 tokens = {
477 # copied from the caml lexer, should be adapted
478 'escape-sequence': [
479 (r'\\[\\"\'ntr}]', String.Escape),
480 (r'\\[0-9]{3}', String.Escape),
481 (r'\\x[0-9a-fA-F]{2}', String.Escape),
482 ],
483
484 # factorizing these rules, because they are inserted many times
485 'comments': [
486 (r'/\*', Comment, 'nested-comment'),
487 (r'//.*?$', Comment),
488 ],
489 'comments-and-spaces': [
490 include('comments'),
491 (r'\s+', Text),
492 ],
493
494 'root': [
495 include('comments-and-spaces'),
496 # keywords
497 (words(keywords, prefix=r'\b', suffix=r'\b'), Keyword),
498 # directives
499 # we could parse the actual set of directives instead of anything
500 # starting with @, but this is troublesome
501 # because it needs to be adjusted all the time
502 # and assuming we parse only sources that compile, it is useless
503 (r'@' + ident_re + r'\b', Name.Builtin.Pseudo),
504
505 # number literals
506 (r'-?.[\d]+([eE][+\-]?\d+)', Number.Float),
507 (r'-?\d+.\d*([eE][+\-]?\d+)', Number.Float),
508 (r'-?\d+[eE][+\-]?\d+', Number.Float),
509 (r'0[xX][\da-fA-F]+', Number.Hex),
510 (r'0[oO][0-7]+', Number.Oct),
511 (r'0[bB][01]+', Number.Bin),
512 (r'\d+', Number.Integer),
513 # color literals
514 (r'#[\da-fA-F]{3,6}', Number.Integer),
515
516 # string literals
517 (r'"', String.Double, 'string'),
518 # char literal, should be checked because this is the regexp from
519 # the caml lexer
520 (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2})|.)'",
521 String.Char),
522
523 # this is meant to deal with embedded exprs in strings
524 # every time we find a '}' we pop a state so that if we were
525 # inside a string, we are back in the string state
526 # as a consequence, we must also push a state every time we find a
527 # '{' or else we will have errors when parsing {} for instance
528 (r'\{', Operator, '#push'),
529 (r'\}', Operator, '#pop'),
530
531 # html literals
532 # this is a much more strict that the actual parser,
533 # since a<b would not be parsed as html
534 # but then again, the parser is way too lax, and we can't hope
535 # to have something as tolerant
536 (r'<(?=[a-zA-Z>])', String.Single, 'html-open-tag'),
537
538 # db path
539 # matching the '[_]' in '/a[_]' because it is a part
540 # of the syntax of the db path definition
541 # unfortunately, i don't know how to match the ']' in
542 # /a[1], so this is somewhat inconsistent
543 (r'[@?!]?(/\w+)+(\[_\])?', Name.Variable),
544 # putting the same color on <- as on db path, since
545 # it can be used only to mean Db.write
546 (r'<-(?!'+op_re+r')', Name.Variable),
547
548 # 'modules'
549 # although modules are not distinguished by their names as in caml
550 # the standard library seems to follow the convention that modules
551 # only area capitalized
552 (r'\b([A-Z]\w*)(?=\.)', Name.Namespace),
553
554 # operators
555 # = has a special role because this is the only
556 # way to syntactic distinguish binding constructions
557 # unfortunately, this colors the equal in {x=2} too
558 (r'=(?!'+op_re+r')', Keyword),
559 (rf'({op_re})+', Operator),
560 (rf'({punc_re})+', Operator),
561
562 # coercions
563 (r':', Operator, 'type'),
564 # type variables
565 # we need this rule because we don't parse specially type
566 # definitions so in "type t('a) = ...", "'a" is parsed by 'root'
567 ("'"+ident_re, Keyword.Type),
568
569 # id literal, #something, or #{expr}
570 (r'#'+ident_re, String.Single),
571 (r'#(?=\{)', String.Single),
572
573 # identifiers
574 # this avoids to color '2' in 'a2' as an integer
575 (ident_re, Text),
576
577 # default, not sure if that is needed or not
578 # (r'.', Text),
579 ],
580
581 # it is quite painful to have to parse types to know where they end
582 # this is the general rule for a type
583 # a type is either:
584 # * -> ty
585 # * type-with-slash
586 # * type-with-slash -> ty
587 # * type-with-slash (, type-with-slash)+ -> ty
588 #
589 # the code is pretty funky in here, but this code would roughly
590 # translate in caml to:
591 # let rec type stream =
592 # match stream with
593 # | [< "->"; stream >] -> type stream
594 # | [< ""; stream >] ->
595 # type_with_slash stream
596 # type_lhs_1 stream;
597 # and type_1 stream = ...
598 'type': [
599 include('comments-and-spaces'),
600 (r'->', Keyword.Type),
601 default(('#pop', 'type-lhs-1', 'type-with-slash')),
602 ],
603
604 # parses all the atomic or closed constructions in the syntax of type
605 # expressions: record types, tuple types, type constructors, basic type
606 # and type variables
607 'type-1': [
608 include('comments-and-spaces'),
609 (r'\(', Keyword.Type, ('#pop', 'type-tuple')),
610 (r'~?\{', Keyword.Type, ('#pop', 'type-record')),
611 (ident_re+r'\(', Keyword.Type, ('#pop', 'type-tuple')),
612 (ident_re, Keyword.Type, '#pop'),
613 ("'"+ident_re, Keyword.Type),
614 # this case is not in the syntax but sometimes
615 # we think we are parsing types when in fact we are parsing
616 # some css, so we just pop the states until we get back into
617 # the root state
618 default('#pop'),
619 ],
620
621 # type-with-slash is either:
622 # * type-1
623 # * type-1 (/ type-1)+
624 'type-with-slash': [
625 include('comments-and-spaces'),
626 default(('#pop', 'slash-type-1', 'type-1')),
627 ],
628 'slash-type-1': [
629 include('comments-and-spaces'),
630 ('/', Keyword.Type, ('#pop', 'type-1')),
631 # same remark as above
632 default('#pop'),
633 ],
634
635 # we go in this state after having parsed a type-with-slash
636 # while trying to parse a type
637 # and at this point we must determine if we are parsing an arrow
638 # type (in which case we must continue parsing) or not (in which
639 # case we stop)
640 'type-lhs-1': [
641 include('comments-and-spaces'),
642 (r'->', Keyword.Type, ('#pop', 'type')),
643 (r'(?=,)', Keyword.Type, ('#pop', 'type-arrow')),
644 default('#pop'),
645 ],
646 'type-arrow': [
647 include('comments-and-spaces'),
648 # the look ahead here allows to parse f(x : int, y : float -> truc)
649 # correctly
650 (r',(?=[^:]*?->)', Keyword.Type, 'type-with-slash'),
651 (r'->', Keyword.Type, ('#pop', 'type')),
652 # same remark as above
653 default('#pop'),
654 ],
655
656 # no need to do precise parsing for tuples and records
657 # because they are closed constructions, so we can simply
658 # find the closing delimiter
659 # note that this function would be not work if the source
660 # contained identifiers like `{)` (although it could be patched
661 # to support it)
662 'type-tuple': [
663 include('comments-and-spaces'),
664 (r'[^()/*]+', Keyword.Type),
665 (r'[/*]', Keyword.Type),
666 (r'\(', Keyword.Type, '#push'),
667 (r'\)', Keyword.Type, '#pop'),
668 ],
669 'type-record': [
670 include('comments-and-spaces'),
671 (r'[^{}/*]+', Keyword.Type),
672 (r'[/*]', Keyword.Type),
673 (r'\{', Keyword.Type, '#push'),
674 (r'\}', Keyword.Type, '#pop'),
675 ],
676
677 # 'type-tuple': [
678 # include('comments-and-spaces'),
679 # (r'\)', Keyword.Type, '#pop'),
680 # default(('#pop', 'type-tuple-1', 'type-1')),
681 # ],
682 # 'type-tuple-1': [
683 # include('comments-and-spaces'),
684 # (r',?\s*\)', Keyword.Type, '#pop'), # ,) is a valid end of tuple, in (1,)
685 # (r',', Keyword.Type, 'type-1'),
686 # ],
687 # 'type-record':[
688 # include('comments-and-spaces'),
689 # (r'\}', Keyword.Type, '#pop'),
690 # (r'~?(?:\w+|`[^`]*`)', Keyword.Type, 'type-record-field-expr'),
691 # ],
692 # 'type-record-field-expr': [
693 #
694 # ],
695
696 'nested-comment': [
697 (r'[^/*]+', Comment),
698 (r'/\*', Comment, '#push'),
699 (r'\*/', Comment, '#pop'),
700 (r'[/*]', Comment),
701 ],
702
703 # the copy pasting between string and single-string
704 # is kinda sad. Is there a way to avoid that??
705 'string': [
706 (r'[^\\"{]+', String.Double),
707 (r'"', String.Double, '#pop'),
708 (r'\{', Operator, 'root'),
709 include('escape-sequence'),
710 ],
711 'single-string': [
712 (r'[^\\\'{]+', String.Double),
713 (r'\'', String.Double, '#pop'),
714 (r'\{', Operator, 'root'),
715 include('escape-sequence'),
716 ],
717
718 # all the html stuff
719 # can't really reuse some existing html parser
720 # because we must be able to parse embedded expressions
721
722 # we are in this state after someone parsed the '<' that
723 # started the html literal
724 'html-open-tag': [
725 (r'[\w\-:]+', String.Single, ('#pop', 'html-attr')),
726 (r'>', String.Single, ('#pop', 'html-content')),
727 ],
728
729 # we are in this state after someone parsed the '</' that
730 # started the end of the closing tag
731 'html-end-tag': [
732 # this is a star, because </> is allowed
733 (r'[\w\-:]*>', String.Single, '#pop'),
734 ],
735
736 # we are in this state after having parsed '<ident(:ident)?'
737 # we thus parse a possibly empty list of attributes
738 'html-attr': [
739 (r'\s+', Text),
740 (r'[\w\-:]+=', String.Single, 'html-attr-value'),
741 (r'/>', String.Single, '#pop'),
742 (r'>', String.Single, ('#pop', 'html-content')),
743 ],
744
745 'html-attr-value': [
746 (r"'", String.Single, ('#pop', 'single-string')),
747 (r'"', String.Single, ('#pop', 'string')),
748 (r'#'+ident_re, String.Single, '#pop'),
749 (r'#(?=\{)', String.Single, ('#pop', 'root')),
750 (r'[^"\'{`=<>]+', String.Single, '#pop'),
751 (r'\{', Operator, ('#pop', 'root')), # this is a tail call!
752 ],
753
754 # we should probably deal with '\' escapes here
755 'html-content': [
756 (r'<!--', Comment, 'html-comment'),
757 (r'</', String.Single, ('#pop', 'html-end-tag')),
758 (r'<', String.Single, 'html-open-tag'),
759 (r'\{', Operator, 'root'),
760 (r'[^<{]+', String.Single),
761 ],
762
763 'html-comment': [
764 (r'-->', Comment, '#pop'),
765 (r'[^\-]+|-', Comment),
766 ],
767 }
768
769
770class ReasonLexer(RegexLexer):
771 """
772 For the ReasonML language.
773 """
774
775 name = 'ReasonML'
776 url = 'https://reasonml.github.io/'
777 aliases = ['reasonml', 'reason']
778 filenames = ['*.re', '*.rei']
779 mimetypes = ['text/x-reasonml']
780 version_added = '2.6'
781
782 keywords = (
783 'as', 'assert', 'begin', 'class', 'constraint', 'do', 'done', 'downto',
784 'else', 'end', 'exception', 'external', 'false', 'for', 'fun', 'esfun',
785 'function', 'functor', 'if', 'in', 'include', 'inherit', 'initializer', 'lazy',
786 'let', 'switch', 'module', 'pub', 'mutable', 'new', 'nonrec', 'object', 'of',
787 'open', 'pri', 'rec', 'sig', 'struct', 'then', 'to', 'true', 'try',
788 'type', 'val', 'virtual', 'when', 'while', 'with',
789 )
790 keyopts = (
791 '!=', '#', '&', '&&', r'\(', r'\)', r'\*', r'\+', ',', '-',
792 r'-\.', '=>', r'\.', r'\.\.', r'\.\.\.', ':', '::', ':=', ':>', ';', ';;', '<',
793 '<-', '=', '>', '>]', r'>\}', r'\?', r'\?\?', r'\[', r'\[<', r'\[>',
794 r'\[\|', ']', '_', '`', r'\{', r'\{<', r'\|', r'\|\|', r'\|]', r'\}', '~'
795 )
796
797 operators = r'[!$%&*+\./:<=>?@^|~-]'
798 word_operators = ('and', 'asr', 'land', 'lor', 'lsl', 'lsr', 'lxor', 'mod', 'or')
799 prefix_syms = r'[!?~]'
800 infix_syms = r'[=<>@^|&+\*/$%-]'
801 primitives = ('unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array')
802
803 tokens = {
804 'escape-sequence': [
805 (r'\\[\\"\'ntbr]', String.Escape),
806 (r'\\[0-9]{3}', String.Escape),
807 (r'\\x[0-9a-fA-F]{2}', String.Escape),
808 ],
809 'root': [
810 (r'\s+', Text),
811 (r'false|true|\(\)|\[\]', Name.Builtin.Pseudo),
812 (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'),
813 (r'\b([A-Z][\w\']*)', Name.Class),
814 (r'//.*?\n', Comment.Single),
815 (r'\/\*(?!/)', Comment.Multiline, 'comment'),
816 (r'\b({})\b'.format('|'.join(keywords)), Keyword),
817 (r'({})'.format('|'.join(keyopts[::-1])), Operator.Word),
818 (rf'({infix_syms}|{prefix_syms})?{operators}', Operator),
819 (r'\b({})\b'.format('|'.join(word_operators)), Operator.Word),
820 (r'\b({})\b'.format('|'.join(primitives)), Keyword.Type),
821
822 (r"[^\W\d][\w']*", Name),
823
824 (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),
825 (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex),
826 (r'0[oO][0-7][0-7_]*', Number.Oct),
827 (r'0[bB][01][01_]*', Number.Bin),
828 (r'\d[\d_]*', Number.Integer),
829
830 (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",
831 String.Char),
832 (r"'.'", String.Char),
833 (r"'", Keyword),
834
835 (r'"', String.Double, 'string'),
836
837 (r'[~?][a-z][\w\']*:', Name.Variable),
838 ],
839 'comment': [
840 (r'[^/*]+', Comment.Multiline),
841 (r'\/\*', Comment.Multiline, '#push'),
842 (r'\*\/', Comment.Multiline, '#pop'),
843 (r'\*', Comment.Multiline),
844 ],
845 'string': [
846 (r'[^\\"]+', String.Double),
847 include('escape-sequence'),
848 (r'\\\n', String.Double),
849 (r'"', String.Double, '#pop'),
850 ],
851 'dotted': [
852 (r'\s+', Text),
853 (r'\.', Punctuation),
854 (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace),
855 (r'[A-Z][\w\']*', Name.Class, '#pop'),
856 (r'[a-z_][\w\']*', Name, '#pop'),
857 default('#pop'),
858 ],
859 }
860
861
862class FStarLexer(RegexLexer):
863 """
864 For the F* language.
865 """
866
867 name = 'FStar'
868 url = 'https://www.fstar-lang.org/'
869 aliases = ['fstar']
870 filenames = ['*.fst', '*.fsti']
871 mimetypes = ['text/x-fstar']
872 version_added = '2.7'
873
874 keywords = (
875 'abstract', 'attributes', 'noeq', 'unopteq', 'and'
876 'begin', 'by', 'default', 'effect', 'else', 'end', 'ensures',
877 'exception', 'exists', 'false', 'forall', 'fun', 'function', 'if',
878 'in', 'include', 'inline', 'inline_for_extraction', 'irreducible',
879 'logic', 'match', 'module', 'mutable', 'new', 'new_effect', 'noextract',
880 'of', 'open', 'opaque', 'private', 'range_of', 'reifiable',
881 'reify', 'reflectable', 'requires', 'set_range_of', 'sub_effect',
882 'synth', 'then', 'total', 'true', 'try', 'type', 'unfold', 'unfoldable',
883 'val', 'when', 'with', 'not'
884 )
885 decl_keywords = ('let', 'rec')
886 assume_keywords = ('assume', 'admit', 'assert', 'calc')
887 keyopts = (
888 r'~', r'-', r'/\\', r'\\/', r'<:', r'<@', r'\(\|', r'\|\)', r'#', r'u#',
889 r'&', r'\(', r'\)', r'\(\)', r',', r'~>', r'->', r'<-', r'<--', r'<==>',
890 r'==>', r'\.', r'\?', r'\?\.', r'\.\[', r'\.\(', r'\.\(\|', r'\.\[\|',
891 r'\{:pattern', r':', r'::', r':=', r';', r';;', r'=', r'%\[', r'!\{',
892 r'\[', r'\[@', r'\[\|', r'\|>', r'\]', r'\|\]', r'\{', r'\|', r'\}', r'\$'
893 )
894
895 operators = r'[!$%&*+\./:<=>?@^|~-]'
896 prefix_syms = r'[!?~]'
897 infix_syms = r'[=<>@^|&+\*/$%-]'
898 primitives = ('unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array')
899
900 tokens = {
901 'escape-sequence': [
902 (r'\\[\\"\'ntbr]', String.Escape),
903 (r'\\[0-9]{3}', String.Escape),
904 (r'\\x[0-9a-fA-F]{2}', String.Escape),
905 ],
906 'root': [
907 (r'\s+', Text),
908 (r'false|true|False|True|\(\)|\[\]', Name.Builtin.Pseudo),
909 (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'),
910 (r'\b([A-Z][\w\']*)', Name.Class),
911 (r'\(\*(?![)])', Comment, 'comment'),
912 (r'\/\/.+$', Comment),
913 (r'\b({})\b'.format('|'.join(keywords)), Keyword),
914 (r'\b({})\b'.format('|'.join(assume_keywords)), Name.Exception),
915 (r'\b({})\b'.format('|'.join(decl_keywords)), Keyword.Declaration),
916 (r'({})'.format('|'.join(keyopts[::-1])), Operator),
917 (rf'({infix_syms}|{prefix_syms})?{operators}', Operator),
918 (r'\b({})\b'.format('|'.join(primitives)), Keyword.Type),
919
920 (r"[^\W\d][\w']*", Name),
921
922 (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),
923 (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex),
924 (r'0[oO][0-7][0-7_]*', Number.Oct),
925 (r'0[bB][01][01_]*', Number.Bin),
926 (r'\d[\d_]*', Number.Integer),
927
928 (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",
929 String.Char),
930 (r"'.'", String.Char),
931 (r"'", Keyword), # a stray quote is another syntax element
932 (r"\`([\w\'.]+)\`", Operator.Word), # for infix applications
933 (r"\`", Keyword), # for quoting
934 (r'"', String.Double, 'string'),
935
936 (r'[~?][a-z][\w\']*:', Name.Variable),
937 ],
938 'comment': [
939 (r'[^(*)]+', Comment),
940 (r'\(\*', Comment, '#push'),
941 (r'\*\)', Comment, '#pop'),
942 (r'[(*)]', Comment),
943 ],
944 'string': [
945 (r'[^\\"]+', String.Double),
946 include('escape-sequence'),
947 (r'\\\n', String.Double),
948 (r'"', String.Double, '#pop'),
949 ],
950 'dotted': [
951 (r'\s+', Text),
952 (r'\.', Punctuation),
953 (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace),
954 (r'[A-Z][\w\']*', Name.Class, '#pop'),
955 (r'[a-z_][\w\']*', Name, '#pop'),
956 default('#pop'),
957 ],
958 }