Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/ml.py: 80%
91 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
1"""
2 pygments.lexers.ml
3 ~~~~~~~~~~~~~~~~~~
5 Lexers for ML family languages.
7 :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
11import re
13from pygments.lexer import RegexLexer, include, bygroups, default, words
14from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
15 Number, Punctuation, Error
17__all__ = ['SMLLexer', 'OcamlLexer', 'OpaLexer', 'ReasonLexer', 'FStarLexer']
20class SMLLexer(RegexLexer):
21 """
22 For the Standard ML language.
24 .. versionadded:: 1.5
25 """
27 name = 'Standard ML'
28 aliases = ['sml']
29 filenames = ['*.sml', '*.sig', '*.fun']
30 mimetypes = ['text/x-standardml', 'application/x-standardml']
32 alphanumid_reserved = {
33 # Core
34 'abstype', 'and', 'andalso', 'as', 'case', 'datatype', 'do', 'else',
35 'end', 'exception', 'fn', 'fun', 'handle', 'if', 'in', 'infix',
36 'infixr', 'let', 'local', 'nonfix', 'of', 'op', 'open', 'orelse',
37 'raise', 'rec', 'then', 'type', 'val', 'with', 'withtype', 'while',
38 # Modules
39 'eqtype', 'functor', 'include', 'sharing', 'sig', 'signature',
40 'struct', 'structure', 'where',
41 }
43 symbolicid_reserved = {
44 # Core
45 ':', r'\|', '=', '=>', '->', '#',
46 # Modules
47 ':>',
48 }
50 nonid_reserved = {'(', ')', '[', ']', '{', '}', ',', ';', '...', '_'}
52 alphanumid_re = r"[a-zA-Z][\w']*"
53 symbolicid_re = r"[!%&$#+\-/:<=>?@\\~`^|*]+"
55 # A character constant is a sequence of the form #s, where s is a string
56 # constant denoting a string of size one character. This setup just parses
57 # the entire string as either a String.Double or a String.Char (depending
58 # on the argument), even if the String.Char is an erroneous
59 # multiple-character string.
60 def stringy(whatkind):
61 return [
62 (r'[^"\\]', whatkind),
63 (r'\\[\\"abtnvfr]', String.Escape),
64 # Control-character notation is used for codes < 32,
65 # where \^@ == \000
66 (r'\\\^[\x40-\x5e]', String.Escape),
67 # Docs say 'decimal digits'
68 (r'\\[0-9]{3}', String.Escape),
69 (r'\\u[0-9a-fA-F]{4}', String.Escape),
70 (r'\\\s+\\', String.Interpol),
71 (r'"', whatkind, '#pop'),
72 ]
74 # Callbacks for distinguishing tokens and reserved words
75 def long_id_callback(self, match):
76 if match.group(1) in self.alphanumid_reserved:
77 token = Error
78 else:
79 token = Name.Namespace
80 yield match.start(1), token, match.group(1)
81 yield match.start(2), Punctuation, match.group(2)
83 def end_id_callback(self, match):
84 if match.group(1) in self.alphanumid_reserved:
85 token = Error
86 elif match.group(1) in self.symbolicid_reserved:
87 token = Error
88 else:
89 token = Name
90 yield match.start(1), token, match.group(1)
92 def id_callback(self, match):
93 str = match.group(1)
94 if str in self.alphanumid_reserved:
95 token = Keyword.Reserved
96 elif str in self.symbolicid_reserved:
97 token = Punctuation
98 else:
99 token = Name
100 yield match.start(1), token, str
102 tokens = {
103 # Whitespace and comments are (almost) everywhere
104 'whitespace': [
105 (r'\s+', Text),
106 (r'\(\*', Comment.Multiline, 'comment'),
107 ],
109 'delimiters': [
110 # This lexer treats these delimiters specially:
111 # Delimiters define scopes, and the scope is how the meaning of
112 # the `|' is resolved - is it a case/handle expression, or function
113 # definition by cases? (This is not how the Definition works, but
114 # it's how MLton behaves, see http://mlton.org/SMLNJDeviations)
115 (r'\(|\[|\{', Punctuation, 'main'),
116 (r'\)|\]|\}', Punctuation, '#pop'),
117 (r'\b(let|if|local)\b(?!\')', Keyword.Reserved, ('main', 'main')),
118 (r'\b(struct|sig|while)\b(?!\')', Keyword.Reserved, 'main'),
119 (r'\b(do|else|end|in|then)\b(?!\')', Keyword.Reserved, '#pop'),
120 ],
122 'core': [
123 # Punctuation that doesn't overlap symbolic identifiers
124 (r'(%s)' % '|'.join(re.escape(z) for z in nonid_reserved),
125 Punctuation),
127 # Special constants: strings, floats, numbers in decimal and hex
128 (r'#"', String.Char, 'char'),
129 (r'"', String.Double, 'string'),
130 (r'~?0x[0-9a-fA-F]+', Number.Hex),
131 (r'0wx[0-9a-fA-F]+', Number.Hex),
132 (r'0w\d+', Number.Integer),
133 (r'~?\d+\.\d+[eE]~?\d+', Number.Float),
134 (r'~?\d+\.\d+', Number.Float),
135 (r'~?\d+[eE]~?\d+', Number.Float),
136 (r'~?\d+', Number.Integer),
138 # Labels
139 (r'#\s*[1-9][0-9]*', Name.Label),
140 (r'#\s*(%s)' % alphanumid_re, Name.Label),
141 (r'#\s+(%s)' % symbolicid_re, Name.Label),
142 # Some reserved words trigger a special, local lexer state change
143 (r'\b(datatype|abstype)\b(?!\')', Keyword.Reserved, 'dname'),
144 (r'\b(exception)\b(?!\')', Keyword.Reserved, 'ename'),
145 (r'\b(functor|include|open|signature|structure)\b(?!\')',
146 Keyword.Reserved, 'sname'),
147 (r'\b(type|eqtype)\b(?!\')', Keyword.Reserved, 'tname'),
149 # Regular identifiers, long and otherwise
150 (r'\'[\w\']*', Name.Decorator),
151 (r'(%s)(\.)' % alphanumid_re, long_id_callback, "dotted"),
152 (r'(%s)' % alphanumid_re, id_callback),
153 (r'(%s)' % symbolicid_re, id_callback),
154 ],
155 'dotted': [
156 (r'(%s)(\.)' % alphanumid_re, long_id_callback),
157 (r'(%s)' % alphanumid_re, end_id_callback, "#pop"),
158 (r'(%s)' % symbolicid_re, end_id_callback, "#pop"),
159 (r'\s+', Error),
160 (r'\S+', Error),
161 ],
164 # Main parser (prevents errors in files that have scoping errors)
165 'root': [
166 default('main')
167 ],
169 # In this scope, I expect '|' to not be followed by a function name,
170 # and I expect 'and' to be followed by a binding site
171 'main': [
172 include('whitespace'),
174 # Special behavior of val/and/fun
175 (r'\b(val|and)\b(?!\')', Keyword.Reserved, 'vname'),
176 (r'\b(fun)\b(?!\')', Keyword.Reserved,
177 ('#pop', 'main-fun', 'fname')),
179 include('delimiters'),
180 include('core'),
181 (r'\S+', Error),
182 ],
184 # In this scope, I expect '|' and 'and' to be followed by a function
185 'main-fun': [
186 include('whitespace'),
188 (r'\s', Text),
189 (r'\(\*', Comment.Multiline, 'comment'),
191 # Special behavior of val/and/fun
192 (r'\b(fun|and)\b(?!\')', Keyword.Reserved, 'fname'),
193 (r'\b(val)\b(?!\')', Keyword.Reserved,
194 ('#pop', 'main', 'vname')),
196 # Special behavior of '|' and '|'-manipulating keywords
197 (r'\|', Punctuation, 'fname'),
198 (r'\b(case|handle)\b(?!\')', Keyword.Reserved,
199 ('#pop', 'main')),
201 include('delimiters'),
202 include('core'),
203 (r'\S+', Error),
204 ],
206 # Character and string parsers
207 'char': stringy(String.Char),
208 'string': stringy(String.Double),
210 'breakout': [
211 (r'(?=\b(%s)\b(?!\'))' % '|'.join(alphanumid_reserved), Text, '#pop'),
212 ],
214 # Dealing with what comes after module system keywords
215 'sname': [
216 include('whitespace'),
217 include('breakout'),
219 (r'(%s)' % alphanumid_re, Name.Namespace),
220 default('#pop'),
221 ],
223 # Dealing with what comes after the 'fun' (or 'and' or '|') keyword
224 'fname': [
225 include('whitespace'),
226 (r'\'[\w\']*', Name.Decorator),
227 (r'\(', Punctuation, 'tyvarseq'),
229 (r'(%s)' % alphanumid_re, Name.Function, '#pop'),
230 (r'(%s)' % symbolicid_re, Name.Function, '#pop'),
232 # Ignore interesting function declarations like "fun (x + y) = ..."
233 default('#pop'),
234 ],
236 # Dealing with what comes after the 'val' (or 'and') keyword
237 'vname': [
238 include('whitespace'),
239 (r'\'[\w\']*', Name.Decorator),
240 (r'\(', Punctuation, 'tyvarseq'),
242 (r'(%s)(\s*)(=(?!%s))' % (alphanumid_re, symbolicid_re),
243 bygroups(Name.Variable, Text, Punctuation), '#pop'),
244 (r'(%s)(\s*)(=(?!%s))' % (symbolicid_re, symbolicid_re),
245 bygroups(Name.Variable, Text, Punctuation), '#pop'),
246 (r'(%s)' % alphanumid_re, Name.Variable, '#pop'),
247 (r'(%s)' % symbolicid_re, Name.Variable, '#pop'),
249 # Ignore interesting patterns like 'val (x, y)'
250 default('#pop'),
251 ],
253 # Dealing with what comes after the 'type' (or 'and') keyword
254 'tname': [
255 include('whitespace'),
256 include('breakout'),
258 (r'\'[\w\']*', Name.Decorator),
259 (r'\(', Punctuation, 'tyvarseq'),
260 (r'=(?!%s)' % symbolicid_re, Punctuation, ('#pop', 'typbind')),
262 (r'(%s)' % alphanumid_re, Keyword.Type),
263 (r'(%s)' % symbolicid_re, Keyword.Type),
264 (r'\S+', Error, '#pop'),
265 ],
267 # A type binding includes most identifiers
268 'typbind': [
269 include('whitespace'),
271 (r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')),
273 include('breakout'),
274 include('core'),
275 (r'\S+', Error, '#pop'),
276 ],
278 # Dealing with what comes after the 'datatype' (or 'and') keyword
279 'dname': [
280 include('whitespace'),
281 include('breakout'),
283 (r'\'[\w\']*', Name.Decorator),
284 (r'\(', Punctuation, 'tyvarseq'),
285 (r'(=)(\s*)(datatype)',
286 bygroups(Punctuation, Text, Keyword.Reserved), '#pop'),
287 (r'=(?!%s)' % symbolicid_re, Punctuation,
288 ('#pop', 'datbind', 'datcon')),
290 (r'(%s)' % alphanumid_re, Keyword.Type),
291 (r'(%s)' % symbolicid_re, Keyword.Type),
292 (r'\S+', Error, '#pop'),
293 ],
295 # common case - A | B | C of int
296 'datbind': [
297 include('whitespace'),
299 (r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'dname')),
300 (r'\b(withtype)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')),
301 (r'\b(of)\b(?!\')', Keyword.Reserved),
303 (r'(\|)(\s*)(%s)' % alphanumid_re,
304 bygroups(Punctuation, Text, Name.Class)),
305 (r'(\|)(\s+)(%s)' % symbolicid_re,
306 bygroups(Punctuation, Text, Name.Class)),
308 include('breakout'),
309 include('core'),
310 (r'\S+', Error),
311 ],
313 # Dealing with what comes after an exception
314 'ename': [
315 include('whitespace'),
317 (r'(and\b)(\s+)(%s)' % alphanumid_re,
318 bygroups(Keyword.Reserved, Text, Name.Class)),
319 (r'(and\b)(\s*)(%s)' % symbolicid_re,
320 bygroups(Keyword.Reserved, Text, Name.Class)),
321 (r'\b(of)\b(?!\')', Keyword.Reserved),
322 (r'(%s)|(%s)' % (alphanumid_re, symbolicid_re), Name.Class),
324 default('#pop'),
325 ],
327 'datcon': [
328 include('whitespace'),
329 (r'(%s)' % alphanumid_re, Name.Class, '#pop'),
330 (r'(%s)' % symbolicid_re, Name.Class, '#pop'),
331 (r'\S+', Error, '#pop'),
332 ],
334 # Series of type variables
335 'tyvarseq': [
336 (r'\s', Text),
337 (r'\(\*', Comment.Multiline, 'comment'),
339 (r'\'[\w\']*', Name.Decorator),
340 (alphanumid_re, Name),
341 (r',', Punctuation),
342 (r'\)', Punctuation, '#pop'),
343 (symbolicid_re, Name),
344 ],
346 'comment': [
347 (r'[^(*)]', Comment.Multiline),
348 (r'\(\*', Comment.Multiline, '#push'),
349 (r'\*\)', Comment.Multiline, '#pop'),
350 (r'[(*)]', Comment.Multiline),
351 ],
352 }
355class OcamlLexer(RegexLexer):
356 """
357 For the OCaml language.
359 .. versionadded:: 0.7
360 """
362 name = 'OCaml'
363 url = 'https://ocaml.org/'
364 aliases = ['ocaml']
365 filenames = ['*.ml', '*.mli', '*.mll', '*.mly']
366 mimetypes = ['text/x-ocaml']
368 keywords = (
369 'as', 'assert', 'begin', 'class', 'constraint', 'do', 'done',
370 'downto', 'else', 'end', 'exception', 'external', 'false',
371 'for', 'fun', 'function', 'functor', 'if', 'in', 'include',
372 'inherit', 'initializer', 'lazy', 'let', 'match', 'method',
373 'module', 'mutable', 'new', 'object', 'of', 'open', 'private',
374 'raise', 'rec', 'sig', 'struct', 'then', 'to', 'true', 'try',
375 'type', 'value', 'val', 'virtual', 'when', 'while', 'with',
376 )
377 keyopts = (
378 '!=', '#', '&', '&&', r'\(', r'\)', r'\*', r'\+', ',', '-',
379 r'-\.', '->', r'\.', r'\.\.', ':', '::', ':=', ':>', ';', ';;', '<',
380 '<-', '=', '>', '>]', r'>\}', r'\?', r'\?\?', r'\[', r'\[<', r'\[>',
381 r'\[\|', ']', '_', '`', r'\{', r'\{<', r'\|', r'\|]', r'\}', '~'
382 )
384 operators = r'[!$%&*+\./:<=>?@^|~-]'
385 word_operators = ('and', 'asr', 'land', 'lor', 'lsl', 'lxor', 'mod', 'or')
386 prefix_syms = r'[!?~]'
387 infix_syms = r'[=<>@^|&+\*/$%-]'
388 primitives = ('unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array')
390 tokens = {
391 'escape-sequence': [
392 (r'\\[\\"\'ntbr]', String.Escape),
393 (r'\\[0-9]{3}', String.Escape),
394 (r'\\x[0-9a-fA-F]{2}', String.Escape),
395 ],
396 'root': [
397 (r'\s+', Text),
398 (r'false|true|\(\)|\[\]', Name.Builtin.Pseudo),
399 (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'),
400 (r'\b([A-Z][\w\']*)', Name.Class),
401 (r'\(\*(?![)])', Comment, 'comment'),
402 (r'\b(%s)\b' % '|'.join(keywords), Keyword),
403 (r'(%s)' % '|'.join(keyopts[::-1]), Operator),
404 (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator),
405 (r'\b(%s)\b' % '|'.join(word_operators), Operator.Word),
406 (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type),
408 (r"[^\W\d][\w']*", Name),
410 (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),
411 (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex),
412 (r'0[oO][0-7][0-7_]*', Number.Oct),
413 (r'0[bB][01][01_]*', Number.Bin),
414 (r'\d[\d_]*', Number.Integer),
416 (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",
417 String.Char),
418 (r"'.'", String.Char),
419 (r"'", Keyword), # a stray quote is another syntax element
421 (r'"', String.Double, 'string'),
423 (r'[~?][a-z][\w\']*:', Name.Variable),
424 ],
425 'comment': [
426 (r'[^(*)]+', Comment),
427 (r'\(\*', Comment, '#push'),
428 (r'\*\)', Comment, '#pop'),
429 (r'[(*)]', Comment),
430 ],
431 'string': [
432 (r'[^\\"]+', String.Double),
433 include('escape-sequence'),
434 (r'\\\n', String.Double),
435 (r'"', String.Double, '#pop'),
436 ],
437 'dotted': [
438 (r'\s+', Text),
439 (r'\.', Punctuation),
440 (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace),
441 (r'[A-Z][\w\']*', Name.Class, '#pop'),
442 (r'[a-z_][\w\']*', Name, '#pop'),
443 default('#pop'),
444 ],
445 }
448class OpaLexer(RegexLexer):
449 """
450 Lexer for the Opa language.
452 .. versionadded:: 1.5
453 """
455 name = 'Opa'
456 aliases = ['opa']
457 filenames = ['*.opa']
458 mimetypes = ['text/x-opa']
460 # most of these aren't strictly keywords
461 # but if you color only real keywords, you might just
462 # as well not color anything
463 keywords = (
464 'and', 'as', 'begin', 'case', 'client', 'css', 'database', 'db', 'do',
465 'else', 'end', 'external', 'forall', 'function', 'if', 'import',
466 'match', 'module', 'or', 'package', 'parser', 'rec', 'server', 'then',
467 'type', 'val', 'with', 'xml_parser',
468 )
470 # matches both stuff and `stuff`
471 ident_re = r'(([a-zA-Z_]\w*)|(`[^`]*`))'
473 op_re = r'[.=\-<>,@~%/+?*&^!]'
474 punc_re = r'[()\[\],;|]' # '{' and '}' are treated elsewhere
475 # because they are also used for inserts
477 tokens = {
478 # copied from the caml lexer, should be adapted
479 'escape-sequence': [
480 (r'\\[\\"\'ntr}]', String.Escape),
481 (r'\\[0-9]{3}', String.Escape),
482 (r'\\x[0-9a-fA-F]{2}', String.Escape),
483 ],
485 # factorizing these rules, because they are inserted many times
486 'comments': [
487 (r'/\*', Comment, 'nested-comment'),
488 (r'//.*?$', Comment),
489 ],
490 'comments-and-spaces': [
491 include('comments'),
492 (r'\s+', Text),
493 ],
495 'root': [
496 include('comments-and-spaces'),
497 # keywords
498 (words(keywords, prefix=r'\b', suffix=r'\b'), Keyword),
499 # directives
500 # we could parse the actual set of directives instead of anything
501 # starting with @, but this is troublesome
502 # because it needs to be adjusted all the time
503 # and assuming we parse only sources that compile, it is useless
504 (r'@' + ident_re + r'\b', Name.Builtin.Pseudo),
506 # number literals
507 (r'-?.[\d]+([eE][+\-]?\d+)', Number.Float),
508 (r'-?\d+.\d*([eE][+\-]?\d+)', Number.Float),
509 (r'-?\d+[eE][+\-]?\d+', Number.Float),
510 (r'0[xX][\da-fA-F]+', Number.Hex),
511 (r'0[oO][0-7]+', Number.Oct),
512 (r'0[bB][01]+', Number.Bin),
513 (r'\d+', Number.Integer),
514 # color literals
515 (r'#[\da-fA-F]{3,6}', Number.Integer),
517 # string literals
518 (r'"', String.Double, 'string'),
519 # char literal, should be checked because this is the regexp from
520 # the caml lexer
521 (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2})|.)'",
522 String.Char),
524 # this is meant to deal with embedded exprs in strings
525 # every time we find a '}' we pop a state so that if we were
526 # inside a string, we are back in the string state
527 # as a consequence, we must also push a state every time we find a
528 # '{' or else we will have errors when parsing {} for instance
529 (r'\{', Operator, '#push'),
530 (r'\}', Operator, '#pop'),
532 # html literals
533 # this is a much more strict that the actual parser,
534 # since a<b would not be parsed as html
535 # but then again, the parser is way too lax, and we can't hope
536 # to have something as tolerant
537 (r'<(?=[a-zA-Z>])', String.Single, 'html-open-tag'),
539 # db path
540 # matching the '[_]' in '/a[_]' because it is a part
541 # of the syntax of the db path definition
542 # unfortunately, i don't know how to match the ']' in
543 # /a[1], so this is somewhat inconsistent
544 (r'[@?!]?(/\w+)+(\[_\])?', Name.Variable),
545 # putting the same color on <- as on db path, since
546 # it can be used only to mean Db.write
547 (r'<-(?!'+op_re+r')', Name.Variable),
549 # 'modules'
550 # although modules are not distinguished by their names as in caml
551 # the standard library seems to follow the convention that modules
552 # only area capitalized
553 (r'\b([A-Z]\w*)(?=\.)', Name.Namespace),
555 # operators
556 # = has a special role because this is the only
557 # way to syntactic distinguish binding constructions
558 # unfortunately, this colors the equal in {x=2} too
559 (r'=(?!'+op_re+r')', Keyword),
560 (r'(%s)+' % op_re, Operator),
561 (r'(%s)+' % punc_re, Operator),
563 # coercions
564 (r':', Operator, 'type'),
565 # type variables
566 # we need this rule because we don't parse specially type
567 # definitions so in "type t('a) = ...", "'a" is parsed by 'root'
568 ("'"+ident_re, Keyword.Type),
570 # id literal, #something, or #{expr}
571 (r'#'+ident_re, String.Single),
572 (r'#(?=\{)', String.Single),
574 # identifiers
575 # this avoids to color '2' in 'a2' as an integer
576 (ident_re, Text),
578 # default, not sure if that is needed or not
579 # (r'.', Text),
580 ],
582 # it is quite painful to have to parse types to know where they end
583 # this is the general rule for a type
584 # a type is either:
585 # * -> ty
586 # * type-with-slash
587 # * type-with-slash -> ty
588 # * type-with-slash (, type-with-slash)+ -> ty
589 #
590 # the code is pretty funky in here, but this code would roughly
591 # translate in caml to:
592 # let rec type stream =
593 # match stream with
594 # | [< "->"; stream >] -> type stream
595 # | [< ""; stream >] ->
596 # type_with_slash stream
597 # type_lhs_1 stream;
598 # and type_1 stream = ...
599 'type': [
600 include('comments-and-spaces'),
601 (r'->', Keyword.Type),
602 default(('#pop', 'type-lhs-1', 'type-with-slash')),
603 ],
605 # parses all the atomic or closed constructions in the syntax of type
606 # expressions: record types, tuple types, type constructors, basic type
607 # and type variables
608 'type-1': [
609 include('comments-and-spaces'),
610 (r'\(', Keyword.Type, ('#pop', 'type-tuple')),
611 (r'~?\{', Keyword.Type, ('#pop', 'type-record')),
612 (ident_re+r'\(', Keyword.Type, ('#pop', 'type-tuple')),
613 (ident_re, Keyword.Type, '#pop'),
614 ("'"+ident_re, Keyword.Type),
615 # this case is not in the syntax but sometimes
616 # we think we are parsing types when in fact we are parsing
617 # some css, so we just pop the states until we get back into
618 # the root state
619 default('#pop'),
620 ],
622 # type-with-slash is either:
623 # * type-1
624 # * type-1 (/ type-1)+
625 'type-with-slash': [
626 include('comments-and-spaces'),
627 default(('#pop', 'slash-type-1', 'type-1')),
628 ],
629 'slash-type-1': [
630 include('comments-and-spaces'),
631 ('/', Keyword.Type, ('#pop', 'type-1')),
632 # same remark as above
633 default('#pop'),
634 ],
636 # we go in this state after having parsed a type-with-slash
637 # while trying to parse a type
638 # and at this point we must determine if we are parsing an arrow
639 # type (in which case we must continue parsing) or not (in which
640 # case we stop)
641 'type-lhs-1': [
642 include('comments-and-spaces'),
643 (r'->', Keyword.Type, ('#pop', 'type')),
644 (r'(?=,)', Keyword.Type, ('#pop', 'type-arrow')),
645 default('#pop'),
646 ],
647 'type-arrow': [
648 include('comments-and-spaces'),
649 # the look ahead here allows to parse f(x : int, y : float -> truc)
650 # correctly
651 (r',(?=[^:]*?->)', Keyword.Type, 'type-with-slash'),
652 (r'->', Keyword.Type, ('#pop', 'type')),
653 # same remark as above
654 default('#pop'),
655 ],
657 # no need to do precise parsing for tuples and records
658 # because they are closed constructions, so we can simply
659 # find the closing delimiter
660 # note that this function would be not work if the source
661 # contained identifiers like `{)` (although it could be patched
662 # to support it)
663 'type-tuple': [
664 include('comments-and-spaces'),
665 (r'[^()/*]+', Keyword.Type),
666 (r'[/*]', Keyword.Type),
667 (r'\(', Keyword.Type, '#push'),
668 (r'\)', Keyword.Type, '#pop'),
669 ],
670 'type-record': [
671 include('comments-and-spaces'),
672 (r'[^{}/*]+', Keyword.Type),
673 (r'[/*]', Keyword.Type),
674 (r'\{', Keyword.Type, '#push'),
675 (r'\}', Keyword.Type, '#pop'),
676 ],
678 # 'type-tuple': [
679 # include('comments-and-spaces'),
680 # (r'\)', Keyword.Type, '#pop'),
681 # default(('#pop', 'type-tuple-1', 'type-1')),
682 # ],
683 # 'type-tuple-1': [
684 # include('comments-and-spaces'),
685 # (r',?\s*\)', Keyword.Type, '#pop'), # ,) is a valid end of tuple, in (1,)
686 # (r',', Keyword.Type, 'type-1'),
687 # ],
688 # 'type-record':[
689 # include('comments-and-spaces'),
690 # (r'\}', Keyword.Type, '#pop'),
691 # (r'~?(?:\w+|`[^`]*`)', Keyword.Type, 'type-record-field-expr'),
692 # ],
693 # 'type-record-field-expr': [
694 #
695 # ],
697 'nested-comment': [
698 (r'[^/*]+', Comment),
699 (r'/\*', Comment, '#push'),
700 (r'\*/', Comment, '#pop'),
701 (r'[/*]', Comment),
702 ],
704 # the copy pasting between string and single-string
705 # is kinda sad. Is there a way to avoid that??
706 'string': [
707 (r'[^\\"{]+', String.Double),
708 (r'"', String.Double, '#pop'),
709 (r'\{', Operator, 'root'),
710 include('escape-sequence'),
711 ],
712 'single-string': [
713 (r'[^\\\'{]+', String.Double),
714 (r'\'', String.Double, '#pop'),
715 (r'\{', Operator, 'root'),
716 include('escape-sequence'),
717 ],
719 # all the html stuff
720 # can't really reuse some existing html parser
721 # because we must be able to parse embedded expressions
723 # we are in this state after someone parsed the '<' that
724 # started the html literal
725 'html-open-tag': [
726 (r'[\w\-:]+', String.Single, ('#pop', 'html-attr')),
727 (r'>', String.Single, ('#pop', 'html-content')),
728 ],
730 # we are in this state after someone parsed the '</' that
731 # started the end of the closing tag
732 'html-end-tag': [
733 # this is a star, because </> is allowed
734 (r'[\w\-:]*>', String.Single, '#pop'),
735 ],
737 # we are in this state after having parsed '<ident(:ident)?'
738 # we thus parse a possibly empty list of attributes
739 'html-attr': [
740 (r'\s+', Text),
741 (r'[\w\-:]+=', String.Single, 'html-attr-value'),
742 (r'/>', String.Single, '#pop'),
743 (r'>', String.Single, ('#pop', 'html-content')),
744 ],
746 'html-attr-value': [
747 (r"'", String.Single, ('#pop', 'single-string')),
748 (r'"', String.Single, ('#pop', 'string')),
749 (r'#'+ident_re, String.Single, '#pop'),
750 (r'#(?=\{)', String.Single, ('#pop', 'root')),
751 (r'[^"\'{`=<>]+', String.Single, '#pop'),
752 (r'\{', Operator, ('#pop', 'root')), # this is a tail call!
753 ],
755 # we should probably deal with '\' escapes here
756 'html-content': [
757 (r'<!--', Comment, 'html-comment'),
758 (r'</', String.Single, ('#pop', 'html-end-tag')),
759 (r'<', String.Single, 'html-open-tag'),
760 (r'\{', Operator, 'root'),
761 (r'[^<{]+', String.Single),
762 ],
764 'html-comment': [
765 (r'-->', Comment, '#pop'),
766 (r'[^\-]+|-', Comment),
767 ],
768 }
771class ReasonLexer(RegexLexer):
772 """
773 For the ReasonML language.
775 .. versionadded:: 2.6
776 """
778 name = 'ReasonML'
779 url = 'https://reasonml.github.io/'
780 aliases = ['reasonml', 'reason']
781 filenames = ['*.re', '*.rei']
782 mimetypes = ['text/x-reasonml']
784 keywords = (
785 'as', 'assert', 'begin', 'class', 'constraint', 'do', 'done', 'downto',
786 'else', 'end', 'exception', 'external', 'false', 'for', 'fun', 'esfun',
787 'function', 'functor', 'if', 'in', 'include', 'inherit', 'initializer', 'lazy',
788 'let', 'switch', 'module', 'pub', 'mutable', 'new', 'nonrec', 'object', 'of',
789 'open', 'pri', 'rec', 'sig', 'struct', 'then', 'to', 'true', 'try',
790 'type', 'val', 'virtual', 'when', 'while', 'with',
791 )
792 keyopts = (
793 '!=', '#', '&', '&&', r'\(', r'\)', r'\*', r'\+', ',', '-',
794 r'-\.', '=>', r'\.', r'\.\.', r'\.\.\.', ':', '::', ':=', ':>', ';', ';;', '<',
795 '<-', '=', '>', '>]', r'>\}', r'\?', r'\?\?', r'\[', r'\[<', r'\[>',
796 r'\[\|', ']', '_', '`', r'\{', r'\{<', r'\|', r'\|\|', r'\|]', r'\}', '~'
797 )
799 operators = r'[!$%&*+\./:<=>?@^|~-]'
800 word_operators = ('and', 'asr', 'land', 'lor', 'lsl', 'lsr', 'lxor', 'mod', 'or')
801 prefix_syms = r'[!?~]'
802 infix_syms = r'[=<>@^|&+\*/$%-]'
803 primitives = ('unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array')
805 tokens = {
806 'escape-sequence': [
807 (r'\\[\\"\'ntbr]', String.Escape),
808 (r'\\[0-9]{3}', String.Escape),
809 (r'\\x[0-9a-fA-F]{2}', String.Escape),
810 ],
811 'root': [
812 (r'\s+', Text),
813 (r'false|true|\(\)|\[\]', Name.Builtin.Pseudo),
814 (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'),
815 (r'\b([A-Z][\w\']*)', Name.Class),
816 (r'//.*?\n', Comment.Single),
817 (r'\/\*(?!/)', Comment.Multiline, 'comment'),
818 (r'\b(%s)\b' % '|'.join(keywords), Keyword),
819 (r'(%s)' % '|'.join(keyopts[::-1]), Operator.Word),
820 (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator),
821 (r'\b(%s)\b' % '|'.join(word_operators), Operator.Word),
822 (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type),
824 (r"[^\W\d][\w']*", Name),
826 (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),
827 (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex),
828 (r'0[oO][0-7][0-7_]*', Number.Oct),
829 (r'0[bB][01][01_]*', Number.Bin),
830 (r'\d[\d_]*', Number.Integer),
832 (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",
833 String.Char),
834 (r"'.'", String.Char),
835 (r"'", Keyword),
837 (r'"', String.Double, 'string'),
839 (r'[~?][a-z][\w\']*:', Name.Variable),
840 ],
841 'comment': [
842 (r'[^/*]+', Comment.Multiline),
843 (r'\/\*', Comment.Multiline, '#push'),
844 (r'\*\/', Comment.Multiline, '#pop'),
845 (r'\*', Comment.Multiline),
846 ],
847 'string': [
848 (r'[^\\"]+', String.Double),
849 include('escape-sequence'),
850 (r'\\\n', String.Double),
851 (r'"', String.Double, '#pop'),
852 ],
853 'dotted': [
854 (r'\s+', Text),
855 (r'\.', Punctuation),
856 (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace),
857 (r'[A-Z][\w\']*', Name.Class, '#pop'),
858 (r'[a-z_][\w\']*', Name, '#pop'),
859 default('#pop'),
860 ],
861 }
864class FStarLexer(RegexLexer):
865 """
866 For the F* language.
867 .. versionadded:: 2.7
868 """
870 name = 'FStar'
871 url = 'https://www.fstar-lang.org/'
872 aliases = ['fstar']
873 filenames = ['*.fst', '*.fsti']
874 mimetypes = ['text/x-fstar']
876 keywords = (
877 'abstract', 'attributes', 'noeq', 'unopteq', 'and'
878 'begin', 'by', 'default', 'effect', 'else', 'end', 'ensures',
879 'exception', 'exists', 'false', 'forall', 'fun', 'function', 'if',
880 'in', 'include', 'inline', 'inline_for_extraction', 'irreducible',
881 'logic', 'match', 'module', 'mutable', 'new', 'new_effect', 'noextract',
882 'of', 'open', 'opaque', 'private', 'range_of', 'reifiable',
883 'reify', 'reflectable', 'requires', 'set_range_of', 'sub_effect',
884 'synth', 'then', 'total', 'true', 'try', 'type', 'unfold', 'unfoldable',
885 'val', 'when', 'with', 'not'
886 )
887 decl_keywords = ('let', 'rec')
888 assume_keywords = ('assume', 'admit', 'assert', 'calc')
889 keyopts = (
890 r'~', r'-', r'/\\', r'\\/', r'<:', r'<@', r'\(\|', r'\|\)', r'#', r'u#',
891 r'&', r'\(', r'\)', r'\(\)', r',', r'~>', r'->', r'<-', r'<--', r'<==>',
892 r'==>', r'\.', r'\?', r'\?\.', r'\.\[', r'\.\(', r'\.\(\|', r'\.\[\|',
893 r'\{:pattern', r':', r'::', r':=', r';', r';;', r'=', r'%\[', r'!\{',
894 r'\[', r'\[@', r'\[\|', r'\|>', r'\]', r'\|\]', r'\{', r'\|', r'\}', r'\$'
895 )
897 operators = r'[!$%&*+\./:<=>?@^|~-]'
898 prefix_syms = r'[!?~]'
899 infix_syms = r'[=<>@^|&+\*/$%-]'
900 primitives = ('unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array')
902 tokens = {
903 'escape-sequence': [
904 (r'\\[\\"\'ntbr]', String.Escape),
905 (r'\\[0-9]{3}', String.Escape),
906 (r'\\x[0-9a-fA-F]{2}', String.Escape),
907 ],
908 'root': [
909 (r'\s+', Text),
910 (r'false|true|False|True|\(\)|\[\]', Name.Builtin.Pseudo),
911 (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'),
912 (r'\b([A-Z][\w\']*)', Name.Class),
913 (r'\(\*(?![)])', Comment, 'comment'),
914 (r'\/\/.+$', Comment),
915 (r'\b(%s)\b' % '|'.join(keywords), Keyword),
916 (r'\b(%s)\b' % '|'.join(assume_keywords), Name.Exception),
917 (r'\b(%s)\b' % '|'.join(decl_keywords), Keyword.Declaration),
918 (r'(%s)' % '|'.join(keyopts[::-1]), Operator),
919 (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator),
920 (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type),
922 (r"[^\W\d][\w']*", Name),
924 (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),
925 (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex),
926 (r'0[oO][0-7][0-7_]*', Number.Oct),
927 (r'0[bB][01][01_]*', Number.Bin),
928 (r'\d[\d_]*', Number.Integer),
930 (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",
931 String.Char),
932 (r"'.'", String.Char),
933 (r"'", Keyword), # a stray quote is another syntax element
934 (r"\`([\w\'.]+)\`", Operator.Word), # for infix applications
935 (r"\`", Keyword), # for quoting
936 (r'"', String.Double, 'string'),
938 (r'[~?][a-z][\w\']*:', Name.Variable),
939 ],
940 'comment': [
941 (r'[^(*)]+', Comment),
942 (r'\(\*', Comment, '#push'),
943 (r'\*\)', Comment, '#pop'),
944 (r'[(*)]', Comment),
945 ],
946 'string': [
947 (r'[^\\"]+', String.Double),
948 include('escape-sequence'),
949 (r'\\\n', String.Double),
950 (r'"', String.Double, '#pop'),
951 ],
952 'dotted': [
953 (r'\s+', Text),
954 (r'\.', Punctuation),
955 (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace),
956 (r'[A-Z][\w\']*', Name.Class, '#pop'),
957 (r'[a-z_][\w\']*', Name, '#pop'),
958 default('#pop'),
959 ],
960 }