Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pygments/lexers/ml.py: 82%

1"""

2 pygments.lexers.ml

3 ~~~~~~~~~~~~~~~~~~

5 Lexers for ML family languages.

8 :license: BSD, see LICENSE for details.

9"""

11import re

13from pygments.lexer import RegexLexer, include, bygroups, default, words

14from pygments.token import Text, Comment, Operator, Keyword, Name, String, \

15 Number, Punctuation, Error

17__all__ = ['SMLLexer', 'OcamlLexer', 'OpaLexer', 'ReasonLexer', 'FStarLexer']

20class SMLLexer(RegexLexer):

21 """

22 For the Standard ML language.

23 """

25 name = 'Standard ML'

26 aliases = ['sml']

27 filenames = ['*.sml', '*.sig', '*.fun']

28 mimetypes = ['text/x-standardml', 'application/x-standardml']

29 url = 'https://en.wikipedia.org/wiki/Standard_ML'

30 version_added = '1.5'

32 alphanumid_reserved = {

33 # Core

34 'abstype', 'and', 'andalso', 'as', 'case', 'datatype', 'do', 'else',

35 'end', 'exception', 'fn', 'fun', 'handle', 'if', 'in', 'infix',

36 'infixr', 'let', 'local', 'nonfix', 'of', 'op', 'open', 'orelse',

37 'raise', 'rec', 'then', 'type', 'val', 'with', 'withtype', 'while',

38 # Modules

39 'eqtype', 'functor', 'include', 'sharing', 'sig', 'signature',

40 'struct', 'structure', 'where',

41 }

43 symbolicid_reserved = {

44 # Core

45 ':', r'\|', '=', '=>', '->', '#',

46 # Modules

47 ':>',

48 }

50 nonid_reserved = {'(', ')', '[', ']', '{', '}', ',', ';', '...', '_'}

52 alphanumid_re = r"[a-zA-Z][\w']*"

53 symbolicid_re = r"[!%&$#+\-/:<=>?@\\~`^|*]+"

55 # A character constant is a sequence of the form #s, where s is a string

56 # constant denoting a string of size one character. This setup just parses

57 # the entire string as either a String.Double or a String.Char (depending

58 # on the argument), even if the String.Char is an erroneous

59 # multiple-character string.

60 def stringy(whatkind):

61 return [

62 (r'[^"\\]', whatkind),

63 (r'\\[\\"abtnvfr]', String.Escape),

64 # Control-character notation is used for codes < 32,

65 # where \^@ == \000

66 (r'\\\^[\x40-\x5e]', String.Escape),

67 # Docs say 'decimal digits'

68 (r'\\[0-9]{3}', String.Escape),

69 (r'\\u[0-9a-fA-F]{4}', String.Escape),

70 (r'\\\s+\\', String.Interpol),

71 (r'"', whatkind, '#pop'),

72 ]

74 # Callbacks for distinguishing tokens and reserved words

75 def long_id_callback(self, match):

76 if match.group(1) in self.alphanumid_reserved:

77 token = Error

78 else:

79 token = Name.Namespace

80 yield match.start(1), token, match.group(1)

81 yield match.start(2), Punctuation, match.group(2)

83 def end_id_callback(self, match):

84 if match.group(1) in self.alphanumid_reserved:

85 token = Error

86 elif match.group(1) in self.symbolicid_reserved:

87 token = Error

88 else:

89 token = Name

90 yield match.start(1), token, match.group(1)

92 def id_callback(self, match):

93 str = match.group(1)

94 if str in self.alphanumid_reserved:

95 token = Keyword.Reserved

96 elif str in self.symbolicid_reserved:

97 token = Punctuation

98 else:

99 token = Name

100 yield match.start(1), token, str

101

102 tokens = {

103 # Whitespace and comments are (almost) everywhere

104 'whitespace': [

105 (r'\s+', Text),

106 (r'\(\*', Comment.Multiline, 'comment'),

107 ],

108

109 'delimiters': [

110 # This lexer treats these delimiters specially:

111 # Delimiters define scopes, and the scope is how the meaning of

112 # the `|' is resolved - is it a case/handle expression, or function

113 # definition by cases? (This is not how the Definition works, but

114 # it's how MLton behaves, see http://mlton.org/SMLNJDeviations)

115 (r'\(|\[|\{', Punctuation, 'main'),

116 (r'\)|\]|\}', Punctuation, '#pop'),

117 (r'\b(let|if|local)\b(?!\')', Keyword.Reserved, ('main', 'main')),

118 (r'\b(struct|sig|while)\b(?!\')', Keyword.Reserved, 'main'),

119 (r'\b(do|else|end|in|then)\b(?!\')', Keyword.Reserved, '#pop'),

120 ],

121

122 'core': [

123 # Punctuation that doesn't overlap symbolic identifiers

124 (r'({})'.format('|'.join(re.escape(z) for z in nonid_reserved)),

125 Punctuation),

126

127 # Special constants: strings, floats, numbers in decimal and hex

128 (r'#"', String.Char, 'char'),

129 (r'"', String.Double, 'string'),

130 (r'~?0x[0-9a-fA-F]+', Number.Hex),

131 (r'0wx[0-9a-fA-F]+', Number.Hex),

132 (r'0w\d+', Number.Integer),

133 (r'~?\d+\.\d+[eE]~?\d+', Number.Float),

134 (r'~?\d+\.\d+', Number.Float),

135 (r'~?\d+[eE]~?\d+', Number.Float),

136 (r'~?\d+', Number.Integer),

137

138 # Labels

139 (r'#\s*[1-9][0-9]*', Name.Label),

140 (rf'#\s*({alphanumid_re})', Name.Label),

141 (rf'#\s+({symbolicid_re})', Name.Label),

142 # Some reserved words trigger a special, local lexer state change

143 (r'\b(datatype|abstype)\b(?!\')', Keyword.Reserved, 'dname'),

144 (r'\b(exception)\b(?!\')', Keyword.Reserved, 'ename'),

145 (r'\b(functor|include|open|signature|structure)\b(?!\')',

146 Keyword.Reserved, 'sname'),

147 (r'\b(type|eqtype)\b(?!\')', Keyword.Reserved, 'tname'),

148

149 # Regular identifiers, long and otherwise

150 (r'\'[\w\']*', Name.Decorator),

151 (rf'({alphanumid_re})(\.)', long_id_callback, "dotted"),

152 (rf'({alphanumid_re})', id_callback),

153 (rf'({symbolicid_re})', id_callback),

154 ],

155 'dotted': [

156 (rf'({alphanumid_re})(\.)', long_id_callback),

157 (rf'({alphanumid_re})', end_id_callback, "#pop"),

158 (rf'({symbolicid_re})', end_id_callback, "#pop"),

159 (r'\s+', Error),

160 (r'\S+', Error),

161 ],

162

163

164 # Main parser (prevents errors in files that have scoping errors)

165 'root': [

166 default('main')

167 ],

168

169 # In this scope, I expect '|' to not be followed by a function name,

170 # and I expect 'and' to be followed by a binding site

171 'main': [

172 include('whitespace'),

173

174 # Special behavior of val/and/fun

175 (r'\b(val|and)\b(?!\')', Keyword.Reserved, 'vname'),

176 (r'\b(fun)\b(?!\')', Keyword.Reserved,

177 ('#pop', 'main-fun', 'fname')),

178

179 include('delimiters'),

180 include('core'),

181 (r'\S+', Error),

182 ],

183

184 # In this scope, I expect '|' and 'and' to be followed by a function

185 'main-fun': [

186 include('whitespace'),

187

188 (r'\s', Text),

189 (r'\(\*', Comment.Multiline, 'comment'),

190

191 # Special behavior of val/and/fun

192 (r'\b(fun|and)\b(?!\')', Keyword.Reserved, 'fname'),

193 (r'\b(val)\b(?!\')', Keyword.Reserved,

194 ('#pop', 'main', 'vname')),

195

196 # Special behavior of '|' and '|'-manipulating keywords

197 (r'\|', Punctuation, 'fname'),

198 (r'\b(case|handle)\b(?!\')', Keyword.Reserved,

199 ('#pop', 'main')),

200

201 include('delimiters'),

202 include('core'),

203 (r'\S+', Error),

204 ],

205

206 # Character and string parsers

207 'char': stringy(String.Char),

208 'string': stringy(String.Double),

209

210 'breakout': [

211 (r'(?=\b({})\b(?!\'))'.format('|'.join(alphanumid_reserved)), Text, '#pop'),

212 ],

213

214 # Dealing with what comes after module system keywords

215 'sname': [

216 include('whitespace'),

217 include('breakout'),

218

219 (rf'({alphanumid_re})', Name.Namespace),

220 default('#pop'),

221 ],

222

223 # Dealing with what comes after the 'fun' (or 'and' or '|') keyword

224 'fname': [

225 include('whitespace'),

226 (r'\'[\w\']*', Name.Decorator),

227 (r'\(', Punctuation, 'tyvarseq'),

228

229 (rf'({alphanumid_re})', Name.Function, '#pop'),

230 (rf'({symbolicid_re})', Name.Function, '#pop'),

231

232 # Ignore interesting function declarations like "fun (x + y) = ..."

233 default('#pop'),

234 ],

235

236 # Dealing with what comes after the 'val' (or 'and') keyword

237 'vname': [

238 include('whitespace'),

239 (r'\'[\w\']*', Name.Decorator),

240 (r'\(', Punctuation, 'tyvarseq'),

241

242 (rf'({alphanumid_re})(\s*)(=(?!{symbolicid_re}))',

243 bygroups(Name.Variable, Text, Punctuation), '#pop'),

244 (rf'({symbolicid_re})(\s*)(=(?!{symbolicid_re}))',

245 bygroups(Name.Variable, Text, Punctuation), '#pop'),

246 (rf'({alphanumid_re})', Name.Variable, '#pop'),

247 (rf'({symbolicid_re})', Name.Variable, '#pop'),

248

249 # Ignore interesting patterns like 'val (x, y)'

250 default('#pop'),

251 ],

252

253 # Dealing with what comes after the 'type' (or 'and') keyword

254 'tname': [

255 include('whitespace'),

256 include('breakout'),

257

258 (r'\'[\w\']*', Name.Decorator),

259 (r'\(', Punctuation, 'tyvarseq'),

260 (rf'=(?!{symbolicid_re})', Punctuation, ('#pop', 'typbind')),

261

262 (rf'({alphanumid_re})', Keyword.Type),

263 (rf'({symbolicid_re})', Keyword.Type),

264 (r'\S+', Error, '#pop'),

265 ],

266

267 # A type binding includes most identifiers

268 'typbind': [

269 include('whitespace'),

270

271 (r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')),

272

273 include('breakout'),

274 include('core'),

275 (r'\S+', Error, '#pop'),

276 ],

277

278 # Dealing with what comes after the 'datatype' (or 'and') keyword

279 'dname': [

280 include('whitespace'),

281 include('breakout'),

282

283 (r'\'[\w\']*', Name.Decorator),

284 (r'\(', Punctuation, 'tyvarseq'),

285 (r'(=)(\s*)(datatype)',

286 bygroups(Punctuation, Text, Keyword.Reserved), '#pop'),

287 (rf'=(?!{symbolicid_re})', Punctuation,

288 ('#pop', 'datbind', 'datcon')),

289

290 (rf'({alphanumid_re})', Keyword.Type),

291 (rf'({symbolicid_re})', Keyword.Type),

292 (r'\S+', Error, '#pop'),

293 ],

294

295 # common case - A | B | C of int

296 'datbind': [

297 include('whitespace'),

298

299 (r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'dname')),

300 (r'\b(withtype)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')),

301 (r'\b(of)\b(?!\')', Keyword.Reserved),

302

303 (rf'(\|)(\s*)({alphanumid_re})',

304 bygroups(Punctuation, Text, Name.Class)),

305 (rf'(\|)(\s+)({symbolicid_re})',

306 bygroups(Punctuation, Text, Name.Class)),

307

308 include('breakout'),

309 include('core'),

310 (r'\S+', Error),

311 ],

312

313 # Dealing with what comes after an exception

314 'ename': [

315 include('whitespace'),

316

317 (rf'(and\b)(\s+)({alphanumid_re})',

318 bygroups(Keyword.Reserved, Text, Name.Class)),

319 (rf'(and\b)(\s*)({symbolicid_re})',

320 bygroups(Keyword.Reserved, Text, Name.Class)),

321 (r'\b(of)\b(?!\')', Keyword.Reserved),

322 (rf'({alphanumid_re})|({symbolicid_re})', Name.Class),

323

324 default('#pop'),

325 ],

326

327 'datcon': [

328 include('whitespace'),

329 (rf'({alphanumid_re})', Name.Class, '#pop'),

330 (rf'({symbolicid_re})', Name.Class, '#pop'),

331 (r'\S+', Error, '#pop'),

332 ],

333

334 # Series of type variables

335 'tyvarseq': [

336 (r'\s', Text),

337 (r'\(\*', Comment.Multiline, 'comment'),

338

339 (r'\'[\w\']*', Name.Decorator),

340 (alphanumid_re, Name),

341 (r',', Punctuation),

342 (r'\)', Punctuation, '#pop'),

343 (symbolicid_re, Name),

344 ],

345

346 'comment': [

347 (r'[^(*)]', Comment.Multiline),

348 (r'\(\*', Comment.Multiline, '#push'),

349 (r'\*\)', Comment.Multiline, '#pop'),

350 (r'[(*)]', Comment.Multiline),

351 ],

352 }

353

354

355class OcamlLexer(RegexLexer):

356 """

357 For the OCaml language.

358 """

359

360 name = 'OCaml'

361 url = 'https://ocaml.org/'

362 aliases = ['ocaml']

363 filenames = ['*.ml', '*.mli', '*.mll', '*.mly']

364 mimetypes = ['text/x-ocaml']

365 version_added = '0.7'

366

367 keywords = (

368 'and', 'as', 'assert', 'begin', 'class', 'constraint', 'do', 'done',

369 'downto', 'else', 'end', 'exception', 'external', 'false',

370 'for', 'fun', 'function', 'functor', 'if', 'in', 'include',

371 'inherit', 'initializer', 'lazy', 'let', 'match', 'method',

372 'module', 'mutable', 'new', 'object', 'of', 'open', 'private',

373 'raise', 'rec', 'sig', 'struct', 'then', 'to', 'true', 'try',

374 'type', 'val', 'virtual', 'when', 'while', 'with',

375 )

376 keyopts = (

377 '!=', '#', '&', '&&', r'$', r'$', r'\*', r'\+', ',', '-',

378 r'-\.', '->', r'\.', r'\.\.', ':', '::', ':=', ':>', ';', ';;', '<',

379 '<-', '=', '>', '>]', r'>\}', r'\?', r'\?\?', r'\[', r'\[<', r'\[>',

380 r'\[\|', ']', '_', '`', r'\{', r'\{<', r'\|', r'\|]', r'\}', '~'

381 )

382

383 operators = r'[!$%&*+\./:<=>?@^|~-]'

384 word_operators = ('asr', 'land', 'lor', 'lsl', 'lxor', 'mod', 'or')

385 prefix_syms = r'[!?~]'

386 infix_syms = r'[=<>@^|&+\*/$%-]'

387 primitives = ('unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array')

388

389 tokens = {

390 'escape-sequence': [

391 (r'\\[\\"\'ntbr]', String.Escape),

392 (r'\\[0-9]{3}', String.Escape),

393 (r'\\x[0-9a-fA-F]{2}', String.Escape),

394 ],

395 'root': [

396 (r'\s+', Text),

397 (r'false|true||\[\]', Name.Builtin.Pseudo),

398 (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'),

399 (r'\b([A-Z][\w\']*)', Name.Class),

400 (r'\(\*(?![)])', Comment, 'comment'),

401 (r'\b({})\b'.format('|'.join(keywords)), Keyword),

402 (r'({})'.format('|'.join(keyopts[::-1])), Operator),

403 (rf'({infix_syms}|{prefix_syms})?{operators}', Operator),

404 (r'\b({})\b'.format('|'.join(word_operators)), Operator.Word),

405 (r'\b({})\b'.format('|'.join(primitives)), Keyword.Type),

406

407 (r"[^\W\d][\w']*", Name),

408

409 (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),

410 (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex),

411 (r'0[oO][0-7][0-7_]*', Number.Oct),

412 (r'0[bB][01][01_]*', Number.Bin),

413 (r'\d[\d_]*', Number.Integer),

414

415 (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",

416 String.Char),

417 (r"'.'", String.Char),

418 (r"'", Keyword), # a stray quote is another syntax element

419

420 (r'"', String.Double, 'string'),

421

422 (r'[~?][a-z][\w\']*:', Name.Variable),

423 ],

424 'comment': [

425 (r'[^(*)]+', Comment),

426 (r'\(\*', Comment, '#push'),

427 (r'\*\)', Comment, '#pop'),

428 (r'[(*)]', Comment),

429 ],

430 'string': [

431 (r'[^\\"]+', String.Double),

432 include('escape-sequence'),

433 (r'\\\n', String.Double),

434 (r'"', String.Double, '#pop'),

435 ],

436 'dotted': [

437 (r'\s+', Text),

438 (r'\.', Punctuation),

439 (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace),

440 (r'[A-Z][\w\']*', Name.Class, '#pop'),

441 (r'[a-z_][\w\']*', Name, '#pop'),

442 default('#pop'),

443 ],

444 }

445

446

447class OpaLexer(RegexLexer):

448 """

449 Lexer for the Opa language.

450 """

451

452 name = 'Opa'

453 aliases = ['opa']

454 filenames = ['*.opa']

455 mimetypes = ['text/x-opa']

456 url = 'http://opalang.org'

457 version_added = '1.5'

458

459 # most of these aren't strictly keywords

460 # but if you color only real keywords, you might just

461 # as well not color anything

462 keywords = (

463 'and', 'as', 'begin', 'case', 'client', 'css', 'database', 'db', 'do',

464 'else', 'end', 'external', 'forall', 'function', 'if', 'import',

465 'match', 'module', 'or', 'package', 'parser', 'rec', 'server', 'then',

466 'type', 'val', 'with', 'xml_parser',

467 )

468

469 # matches both stuff and `stuff`

470 ident_re = r'(([a-zA-Z_]\w*)|(`[^`]*`))'

471

472 op_re = r'[.=\-<>,@~%/+?*&^!]'

473 punc_re = r'[()\[\],;|]' # '{' and '}' are treated elsewhere

474 # because they are also used for inserts

475

476 tokens = {

477 # copied from the caml lexer, should be adapted

478 'escape-sequence': [

479 (r'\\[\\"\'ntr}]', String.Escape),

480 (r'\\[0-9]{3}', String.Escape),

481 (r'\\x[0-9a-fA-F]{2}', String.Escape),

482 ],

483

484 # factorizing these rules, because they are inserted many times

485 'comments': [

486 (r'/\*', Comment, 'nested-comment'),

487 (r'//.*?$', Comment),

488 ],

489 'comments-and-spaces': [

490 include('comments'),

491 (r'\s+', Text),

492 ],

493

494 'root': [

495 include('comments-and-spaces'),

496 # keywords

497 (words(keywords, prefix=r'\b', suffix=r'\b'), Keyword),

498 # directives

499 # we could parse the actual set of directives instead of anything

500 # starting with @, but this is troublesome

501 # because it needs to be adjusted all the time

502 # and assuming we parse only sources that compile, it is useless

503 (r'@' + ident_re + r'\b', Name.Builtin.Pseudo),

504

505 # number literals

506 (r'-?.[\d]+([eE][+\-]?\d+)', Number.Float),

507 (r'-?\d+.\d*([eE][+\-]?\d+)', Number.Float),

508 (r'-?\d+[eE][+\-]?\d+', Number.Float),

509 (r'0[xX][\da-fA-F]+', Number.Hex),

510 (r'0[oO][0-7]+', Number.Oct),

511 (r'0[bB][01]+', Number.Bin),

512 (r'\d+', Number.Integer),

513 # color literals

514 (r'#[\da-fA-F]{3,6}', Number.Integer),

515

516 # string literals

517 (r'"', String.Double, 'string'),

518 # char literal, should be checked because this is the regexp from

519 # the caml lexer

520 (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2})|.)'",

521 String.Char),

522

523 # this is meant to deal with embedded exprs in strings

524 # every time we find a '}' we pop a state so that if we were

525 # inside a string, we are back in the string state

526 # as a consequence, we must also push a state every time we find a

527 # '{' or else we will have errors when parsing {} for instance

528 (r'\{', Operator, '#push'),

529 (r'\}', Operator, '#pop'),

530

531 # html literals

532 # this is a much more strict that the actual parser,

533 # since a<b would not be parsed as html

534 # but then again, the parser is way too lax, and we can't hope

535 # to have something as tolerant

536 (r'<(?=[a-zA-Z>])', String.Single, 'html-open-tag'),

537

538 # db path

539 # matching the '[_]' in '/a[_]' because it is a part

540 # of the syntax of the db path definition

541 # unfortunately, i don't know how to match the ']' in

542 # /a[1], so this is somewhat inconsistent

543 (r'[@?!]?(/\w+)+(\[_\])?', Name.Variable),

544 # putting the same color on <- as on db path, since

545 # it can be used only to mean Db.write

546 (r'<-(?!'+op_re+r')', Name.Variable),

547

548 # 'modules'

549 # although modules are not distinguished by their names as in caml

550 # the standard library seems to follow the convention that modules

551 # only area capitalized

552 (r'\b([A-Z]\w*)(?=\.)', Name.Namespace),

553

554 # operators

555 # = has a special role because this is the only

556 # way to syntactic distinguish binding constructions

557 # unfortunately, this colors the equal in {x=2} too

558 (r'=(?!'+op_re+r')', Keyword),

559 (rf'({op_re})+', Operator),

560 (rf'({punc_re})+', Operator),

561

562 # coercions

563 (r':', Operator, 'type'),

564 # type variables

565 # we need this rule because we don't parse specially type

566 # definitions so in "type t('a) = ...", "'a" is parsed by 'root'

567 ("'"+ident_re, Keyword.Type),

568

569 # id literal, #something, or #{expr}

570 (r'#'+ident_re, String.Single),

571 (r'#(?=\{)', String.Single),

572

573 # identifiers

574 # this avoids to color '2' in 'a2' as an integer

575 (ident_re, Text),

576

577 # default, not sure if that is needed or not

578 # (r'.', Text),

579 ],

580

581 # it is quite painful to have to parse types to know where they end

582 # this is the general rule for a type

583 # a type is either:

584 # * -> ty

585 # * type-with-slash

586 # * type-with-slash -> ty

587 # * type-with-slash (, type-with-slash)+ -> ty

588 #

589 # the code is pretty funky in here, but this code would roughly

590 # translate in caml to:

591 # let rec type stream =

592 # match stream with

593 # | [< "->"; stream >] -> type stream

594 # | [< ""; stream >] ->

595 # type_with_slash stream

596 # type_lhs_1 stream;

597 # and type_1 stream = ...

598 'type': [

599 include('comments-and-spaces'),

600 (r'->', Keyword.Type),

601 default(('#pop', 'type-lhs-1', 'type-with-slash')),

602 ],

603

604 # parses all the atomic or closed constructions in the syntax of type

605 # expressions: record types, tuple types, type constructors, basic type

606 # and type variables

607 'type-1': [

608 include('comments-and-spaces'),

609 (r'\(', Keyword.Type, ('#pop', 'type-tuple')),

610 (r'~?\{', Keyword.Type, ('#pop', 'type-record')),

611 (ident_re+r'\(', Keyword.Type, ('#pop', 'type-tuple')),

612 (ident_re, Keyword.Type, '#pop'),

613 ("'"+ident_re, Keyword.Type),

614 # this case is not in the syntax but sometimes

615 # we think we are parsing types when in fact we are parsing

616 # some css, so we just pop the states until we get back into

617 # the root state

618 default('#pop'),

619 ],

620

621 # type-with-slash is either:

622 # * type-1

623 # * type-1 (/ type-1)+

624 'type-with-slash': [

625 include('comments-and-spaces'),

626 default(('#pop', 'slash-type-1', 'type-1')),

627 ],

628 'slash-type-1': [

629 include('comments-and-spaces'),

630 ('/', Keyword.Type, ('#pop', 'type-1')),

631 # same remark as above

632 default('#pop'),

633 ],

634

635 # we go in this state after having parsed a type-with-slash

636 # while trying to parse a type

637 # and at this point we must determine if we are parsing an arrow

638 # type (in which case we must continue parsing) or not (in which

639 # case we stop)

640 'type-lhs-1': [

641 include('comments-and-spaces'),

642 (r'->', Keyword.Type, ('#pop', 'type')),

643 (r'(?=,)', Keyword.Type, ('#pop', 'type-arrow')),

644 default('#pop'),

645 ],

646 'type-arrow': [

647 include('comments-and-spaces'),

648 # the look ahead here allows to parse f(x : int, y : float -> truc)

649 # correctly

650 (r',(?=[^:]*?->)', Keyword.Type, 'type-with-slash'),

651 (r'->', Keyword.Type, ('#pop', 'type')),

652 # same remark as above

653 default('#pop'),

654 ],

655

656 # no need to do precise parsing for tuples and records

657 # because they are closed constructions, so we can simply

658 # find the closing delimiter

659 # note that this function would be not work if the source

660 # contained identifiers like `{)` (although it could be patched

661 # to support it)

662 'type-tuple': [

663 include('comments-and-spaces'),

664 (r'[^()/*]+', Keyword.Type),

665 (r'[/*]', Keyword.Type),

666 (r'\(', Keyword.Type, '#push'),

667 (r'\)', Keyword.Type, '#pop'),

668 ],

669 'type-record': [

670 include('comments-and-spaces'),

671 (r'[^{}/*]+', Keyword.Type),

672 (r'[/*]', Keyword.Type),

673 (r'\{', Keyword.Type, '#push'),

674 (r'\}', Keyword.Type, '#pop'),

675 ],

676

677 # 'type-tuple': [

678 # include('comments-and-spaces'),

679 # (r'\)', Keyword.Type, '#pop'),

680 # default(('#pop', 'type-tuple-1', 'type-1')),

681 # ],

682 # 'type-tuple-1': [

683 # include('comments-and-spaces'),

684 # (r',?\s*\)', Keyword.Type, '#pop'), # ,) is a valid end of tuple, in (1,)

685 # (r',', Keyword.Type, 'type-1'),

686 # ],

687 # 'type-record':[

688 # include('comments-and-spaces'),

689 # (r'\}', Keyword.Type, '#pop'),

690 # (r'~?(?:\w+|`[^`]*`)', Keyword.Type, 'type-record-field-expr'),

691 # ],

692 # 'type-record-field-expr': [

693 #

694 # ],

695

696 'nested-comment': [

697 (r'[^/*]+', Comment),

698 (r'/\*', Comment, '#push'),

699 (r'\*/', Comment, '#pop'),

700 (r'[/*]', Comment),

701 ],

702

703 # the copy pasting between string and single-string

704 # is kinda sad. Is there a way to avoid that??

705 'string': [

706 (r'[^\\"{]+', String.Double),

707 (r'"', String.Double, '#pop'),

708 (r'\{', Operator, 'root'),

709 include('escape-sequence'),

710 ],

711 'single-string': [

712 (r'[^\\\'{]+', String.Double),

713 (r'\'', String.Double, '#pop'),

714 (r'\{', Operator, 'root'),

715 include('escape-sequence'),

716 ],

717

718 # all the html stuff

719 # can't really reuse some existing html parser

720 # because we must be able to parse embedded expressions

721

722 # we are in this state after someone parsed the '<' that

723 # started the html literal

724 'html-open-tag': [

725 (r'[\w\-:]+', String.Single, ('#pop', 'html-attr')),

726 (r'>', String.Single, ('#pop', 'html-content')),

727 ],

728

729 # we are in this state after someone parsed the '</' that

730 # started the end of the closing tag

731 'html-end-tag': [

732 # this is a star, because </> is allowed

733 (r'[\w\-:]*>', String.Single, '#pop'),

734 ],

735

736 # we are in this state after having parsed '<ident(:ident)?'

737 # we thus parse a possibly empty list of attributes

738 'html-attr': [

739 (r'\s+', Text),

740 (r'[\w\-:]+=', String.Single, 'html-attr-value'),

741 (r'/>', String.Single, '#pop'),

742 (r'>', String.Single, ('#pop', 'html-content')),

743 ],

744

745 'html-attr-value': [

746 (r"'", String.Single, ('#pop', 'single-string')),

747 (r'"', String.Single, ('#pop', 'string')),

748 (r'#'+ident_re, String.Single, '#pop'),

749 (r'#(?=\{)', String.Single, ('#pop', 'root')),

750 (r'[^"\'{`=<>]+', String.Single, '#pop'),

751 (r'\{', Operator, ('#pop', 'root')), # this is a tail call!

752 ],

753

754 # we should probably deal with '\' escapes here

755 'html-content': [

756 (r'<!--', Comment, 'html-comment'),

757 (r'</', String.Single, ('#pop', 'html-end-tag')),

758 (r'<', String.Single, 'html-open-tag'),

759 (r'\{', Operator, 'root'),

760 (r'[^<{]+', String.Single),

761 ],

762

763 'html-comment': [

764 (r'-->', Comment, '#pop'),

765 (r'[^\-]+|-', Comment),

766 ],

767 }

768

769

770class ReasonLexer(RegexLexer):

771 """

772 For the ReasonML language.

773 """

774

775 name = 'ReasonML'

776 url = 'https://reasonml.github.io/'

777 aliases = ['reasonml', 'reason']

778 filenames = ['*.re', '*.rei']

779 mimetypes = ['text/x-reasonml']

780 version_added = '2.6'

781

782 keywords = (

783 'as', 'assert', 'begin', 'class', 'constraint', 'do', 'done', 'downto',

784 'else', 'end', 'exception', 'external', 'false', 'for', 'fun', 'esfun',

785 'function', 'functor', 'if', 'in', 'include', 'inherit', 'initializer', 'lazy',

786 'let', 'switch', 'module', 'pub', 'mutable', 'new', 'nonrec', 'object', 'of',

787 'open', 'pri', 'rec', 'sig', 'struct', 'then', 'to', 'true', 'try',

788 'type', 'val', 'virtual', 'when', 'while', 'with',

789 )

790 keyopts = (

791 '!=', '#', '&', '&&', r'$', r'$', r'\*', r'\+', ',', '-',

792 r'-\.', '=>', r'\.', r'\.\.', r'\.\.\.', ':', '::', ':=', ':>', ';', ';;', '<',

793 '<-', '=', '>', '>]', r'>\}', r'\?', r'\?\?', r'\[', r'\[<', r'\[>',

794 r'\[\|', ']', '_', '`', r'\{', r'\{<', r'\|', r'\|\|', r'\|]', r'\}', '~'

795 )

796

797 operators = r'[!$%&*+\./:<=>?@^|~-]'

798 word_operators = ('and', 'asr', 'land', 'lor', 'lsl', 'lsr', 'lxor', 'mod', 'or')

799 prefix_syms = r'[!?~]'

800 infix_syms = r'[=<>@^|&+\*/$%-]'

801 primitives = ('unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array')

802

803 tokens = {

804 'escape-sequence': [

805 (r'\\[\\"\'ntbr]', String.Escape),

806 (r'\\[0-9]{3}', String.Escape),

807 (r'\\x[0-9a-fA-F]{2}', String.Escape),

808 ],

809 'root': [

810 (r'\s+', Text),

811 (r'false|true||\[\]', Name.Builtin.Pseudo),

812 (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'),

813 (r'\b([A-Z][\w\']*)', Name.Class),

814 (r'//.*?\n', Comment.Single),

815 (r'\/\*(?!/)', Comment.Multiline, 'comment'),

816 (r'\b({})\b'.format('|'.join(keywords)), Keyword),

817 (r'({})'.format('|'.join(keyopts[::-1])), Operator.Word),

818 (rf'({infix_syms}|{prefix_syms})?{operators}', Operator),

819 (r'\b({})\b'.format('|'.join(word_operators)), Operator.Word),

820 (r'\b({})\b'.format('|'.join(primitives)), Keyword.Type),

821

822 (r"[^\W\d][\w']*", Name),

823

824 (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),

825 (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex),

826 (r'0[oO][0-7][0-7_]*', Number.Oct),

827 (r'0[bB][01][01_]*', Number.Bin),

828 (r'\d[\d_]*', Number.Integer),

829

830 (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",

831 String.Char),

832 (r"'.'", String.Char),

833 (r"'", Keyword),

834

835 (r'"', String.Double, 'string'),

836

837 (r'[~?][a-z][\w\']*:', Name.Variable),

838 ],

839 'comment': [

840 (r'[^/*]+', Comment.Multiline),

841 (r'\/\*', Comment.Multiline, '#push'),

842 (r'\*\/', Comment.Multiline, '#pop'),

843 (r'\*', Comment.Multiline),

844 ],

845 'string': [

846 (r'[^\\"]+', String.Double),

847 include('escape-sequence'),

848 (r'\\\n', String.Double),

849 (r'"', String.Double, '#pop'),

850 ],

851 'dotted': [

852 (r'\s+', Text),

853 (r'\.', Punctuation),

854 (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace),

855 (r'[A-Z][\w\']*', Name.Class, '#pop'),

856 (r'[a-z_][\w\']*', Name, '#pop'),

857 default('#pop'),

858 ],

859 }

860

861

862class FStarLexer(RegexLexer):

863 """

864 For the F* language.

865 """

866

867 name = 'FStar'

868 url = 'https://www.fstar-lang.org/'

869 aliases = ['fstar']

870 filenames = ['*.fst', '*.fsti']

871 mimetypes = ['text/x-fstar']

872 version_added = '2.7'

873

874 keywords = (

875 'abstract', 'attributes', 'noeq', 'unopteq', 'and'

876 'begin', 'by', 'default', 'effect', 'else', 'end', 'ensures',

877 'exception', 'exists', 'false', 'forall', 'fun', 'function', 'if',

878 'in', 'include', 'inline', 'inline_for_extraction', 'irreducible',

879 'logic', 'match', 'module', 'mutable', 'new', 'new_effect', 'noextract',

880 'of', 'open', 'opaque', 'private', 'range_of', 'reifiable',

881 'reify', 'reflectable', 'requires', 'set_range_of', 'sub_effect',

882 'synth', 'then', 'total', 'true', 'try', 'type', 'unfold', 'unfoldable',

883 'val', 'when', 'with', 'not'

884 )

885 decl_keywords = ('let', 'rec')

886 assume_keywords = ('assume', 'admit', 'assert', 'calc')

887 keyopts = (

888 r'~', r'-', r'/\\', r'\\/', r'<:', r'<@', r'$\|', r'\|$', r'#', r'u#',

889 r'&', r'$', r'$', r'', r',', r'~>', r'->', r'<-', r'<--', r'<==>',

890 r'==>', r'\.', r'\?', r'\?\.', r'\.\[', r'\.\(', r'\.\(\|', r'\.\[\|',

891 r'\{:pattern', r':', r'::', r':=', r';', r';;', r'=', r'%\[', r'!\{',

892 r'\[', r'\[@', r'\[\|', r'\|>', r'\]', r'\|\]', r'\{', r'\|', r'\}', r'\$'

893 )

894

895 operators = r'[!$%&*+\./:<=>?@^|~-]'

896 prefix_syms = r'[!?~]'

897 infix_syms = r'[=<>@^|&+\*/$%-]'

898 primitives = ('unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array')

899

900 tokens = {

901 'escape-sequence': [

902 (r'\\[\\"\'ntbr]', String.Escape),

903 (r'\\[0-9]{3}', String.Escape),

904 (r'\\x[0-9a-fA-F]{2}', String.Escape),

905 ],

906 'root': [

907 (r'\s+', Text),

908 (r'false|true|False|True||\[\]', Name.Builtin.Pseudo),

909 (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'),

910 (r'\b([A-Z][\w\']*)', Name.Class),

911 (r'\(\*(?![)])', Comment, 'comment'),

912 (r'\/\/.+$', Comment),

913 (r'\b({})\b'.format('|'.join(keywords)), Keyword),

914 (r'\b({})\b'.format('|'.join(assume_keywords)), Name.Exception),

915 (r'\b({})\b'.format('|'.join(decl_keywords)), Keyword.Declaration),

916 (r'({})'.format('|'.join(keyopts[::-1])), Operator),

917 (rf'({infix_syms}|{prefix_syms})?{operators}', Operator),

918 (r'\b({})\b'.format('|'.join(primitives)), Keyword.Type),

919

920 (r"[^\W\d][\w']*", Name),

921

922 (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),

923 (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex),

924 (r'0[oO][0-7][0-7_]*', Number.Oct),

925 (r'0[bB][01][01_]*', Number.Bin),

926 (r'\d[\d_]*', Number.Integer),

927

928 (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",

929 String.Char),

930 (r"'.'", String.Char),

931 (r"'", Keyword), # a stray quote is another syntax element

932 (r"\`([\w\'.]+)\`", Operator.Word), # for infix applications

933 (r"\`", Keyword), # for quoting

934 (r'"', String.Double, 'string'),

935

936 (r'[~?][a-z][\w\']*:', Name.Variable),

937 ],

938 'comment': [

939 (r'[^(*)]+', Comment),

940 (r'\(\*', Comment, '#push'),

941 (r'\*\)', Comment, '#pop'),

942 (r'[(*)]', Comment),

943 ],

944 'string': [

945 (r'[^\\"]+', String.Double),

946 include('escape-sequence'),

947 (r'\\\n', String.Double),

948 (r'"', String.Double, '#pop'),

949 ],

950 'dotted': [

951 (r'\s+', Text),

952 (r'\.', Punctuation),

953 (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace),

954 (r'[A-Z][\w\']*', Name.Class, '#pop'),

955 (r'[a-z_][\w\']*', Name, '#pop'),

956 default('#pop'),

957 ],

958 }