Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pygments/lexers/ml.py: 82%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

99 statements  

1""" 

2 pygments.lexers.ml 

3 ~~~~~~~~~~~~~~~~~~ 

4 

5 Lexers for ML family languages. 

6 

7 :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS. 

8 :license: BSD, see LICENSE for details. 

9""" 

10 

11import re 

12 

13from pygments.lexer import RegexLexer, include, bygroups, default, words 

14from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ 

15 Number, Punctuation, Error 

16 

17__all__ = ['SMLLexer', 'OcamlLexer', 'OpaLexer', 'ReasonLexer', 'FStarLexer'] 

18 

19 

20class SMLLexer(RegexLexer): 

21 """ 

22 For the Standard ML language. 

23 """ 

24 

25 name = 'Standard ML' 

26 aliases = ['sml'] 

27 filenames = ['*.sml', '*.sig', '*.fun'] 

28 mimetypes = ['text/x-standardml', 'application/x-standardml'] 

29 url = 'https://en.wikipedia.org/wiki/Standard_ML' 

30 version_added = '1.5' 

31 

32 alphanumid_reserved = { 

33 # Core 

34 'abstype', 'and', 'andalso', 'as', 'case', 'datatype', 'do', 'else', 

35 'end', 'exception', 'fn', 'fun', 'handle', 'if', 'in', 'infix', 

36 'infixr', 'let', 'local', 'nonfix', 'of', 'op', 'open', 'orelse', 

37 'raise', 'rec', 'then', 'type', 'val', 'with', 'withtype', 'while', 

38 # Modules 

39 'eqtype', 'functor', 'include', 'sharing', 'sig', 'signature', 

40 'struct', 'structure', 'where', 

41 } 

42 

43 symbolicid_reserved = { 

44 # Core 

45 ':', r'\|', '=', '=>', '->', '#', 

46 # Modules 

47 ':>', 

48 } 

49 

50 nonid_reserved = {'(', ')', '[', ']', '{', '}', ',', ';', '...', '_'} 

51 

52 alphanumid_re = r"[a-zA-Z][\w']*" 

53 symbolicid_re = r"[!%&$#+\-/:<=>?@\\~`^|*]+" 

54 

55 # A character constant is a sequence of the form #s, where s is a string 

56 # constant denoting a string of size one character. This setup just parses 

57 # the entire string as either a String.Double or a String.Char (depending 

58 # on the argument), even if the String.Char is an erroneous 

59 # multiple-character string. 

60 def stringy(whatkind): 

61 return [ 

62 (r'[^"\\]', whatkind), 

63 (r'\\[\\"abtnvfr]', String.Escape), 

64 # Control-character notation is used for codes < 32, 

65 # where \^@ == \000 

66 (r'\\\^[\x40-\x5e]', String.Escape), 

67 # Docs say 'decimal digits' 

68 (r'\\[0-9]{3}', String.Escape), 

69 (r'\\u[0-9a-fA-F]{4}', String.Escape), 

70 (r'\\\s+\\', String.Interpol), 

71 (r'"', whatkind, '#pop'), 

72 ] 

73 

74 # Callbacks for distinguishing tokens and reserved words 

75 def long_id_callback(self, match): 

76 if match.group(1) in self.alphanumid_reserved: 

77 token = Error 

78 else: 

79 token = Name.Namespace 

80 yield match.start(1), token, match.group(1) 

81 yield match.start(2), Punctuation, match.group(2) 

82 

83 def end_id_callback(self, match): 

84 if match.group(1) in self.alphanumid_reserved: 

85 token = Error 

86 elif match.group(1) in self.symbolicid_reserved: 

87 token = Error 

88 else: 

89 token = Name 

90 yield match.start(1), token, match.group(1) 

91 

92 def id_callback(self, match): 

93 str = match.group(1) 

94 if str in self.alphanumid_reserved: 

95 token = Keyword.Reserved 

96 elif str in self.symbolicid_reserved: 

97 token = Punctuation 

98 else: 

99 token = Name 

100 yield match.start(1), token, str 

101 

102 tokens = { 

103 # Whitespace and comments are (almost) everywhere 

104 'whitespace': [ 

105 (r'\s+', Text), 

106 (r'\(\*', Comment.Multiline, 'comment'), 

107 ], 

108 

109 'delimiters': [ 

110 # This lexer treats these delimiters specially: 

111 # Delimiters define scopes, and the scope is how the meaning of 

112 # the `|' is resolved - is it a case/handle expression, or function 

113 # definition by cases? (This is not how the Definition works, but 

114 # it's how MLton behaves, see http://mlton.org/SMLNJDeviations) 

115 (r'\(|\[|\{', Punctuation, 'main'), 

116 (r'\)|\]|\}', Punctuation, '#pop'), 

117 (r'\b(let|if|local)\b(?!\')', Keyword.Reserved, ('main', 'main')), 

118 (r'\b(struct|sig|while)\b(?!\')', Keyword.Reserved, 'main'), 

119 (r'\b(do|else|end|in|then)\b(?!\')', Keyword.Reserved, '#pop'), 

120 ], 

121 

122 'core': [ 

123 # Punctuation that doesn't overlap symbolic identifiers 

124 (r'({})'.format('|'.join(re.escape(z) for z in nonid_reserved)), 

125 Punctuation), 

126 

127 # Special constants: strings, floats, numbers in decimal and hex 

128 (r'#"', String.Char, 'char'), 

129 (r'"', String.Double, 'string'), 

130 (r'~?0x[0-9a-fA-F]+', Number.Hex), 

131 (r'0wx[0-9a-fA-F]+', Number.Hex), 

132 (r'0w\d+', Number.Integer), 

133 (r'~?\d+\.\d+[eE]~?\d+', Number.Float), 

134 (r'~?\d+\.\d+', Number.Float), 

135 (r'~?\d+[eE]~?\d+', Number.Float), 

136 (r'~?\d+', Number.Integer), 

137 

138 # Labels 

139 (r'#\s*[1-9][0-9]*', Name.Label), 

140 (rf'#\s*({alphanumid_re})', Name.Label), 

141 (rf'#\s+({symbolicid_re})', Name.Label), 

142 # Some reserved words trigger a special, local lexer state change 

143 (r'\b(datatype|abstype)\b(?!\')', Keyword.Reserved, 'dname'), 

144 (r'\b(exception)\b(?!\')', Keyword.Reserved, 'ename'), 

145 (r'\b(functor|include|open|signature|structure)\b(?!\')', 

146 Keyword.Reserved, 'sname'), 

147 (r'\b(type|eqtype)\b(?!\')', Keyword.Reserved, 'tname'), 

148 

149 # Regular identifiers, long and otherwise 

150 (r'\'[\w\']*', Name.Decorator), 

151 (rf'({alphanumid_re})(\.)', long_id_callback, "dotted"), 

152 (rf'({alphanumid_re})', id_callback), 

153 (rf'({symbolicid_re})', id_callback), 

154 ], 

155 'dotted': [ 

156 (rf'({alphanumid_re})(\.)', long_id_callback), 

157 (rf'({alphanumid_re})', end_id_callback, "#pop"), 

158 (rf'({symbolicid_re})', end_id_callback, "#pop"), 

159 (r'\s+', Error), 

160 (r'\S+', Error), 

161 ], 

162 

163 

164 # Main parser (prevents errors in files that have scoping errors) 

165 'root': [ 

166 default('main') 

167 ], 

168 

169 # In this scope, I expect '|' to not be followed by a function name, 

170 # and I expect 'and' to be followed by a binding site 

171 'main': [ 

172 include('whitespace'), 

173 

174 # Special behavior of val/and/fun 

175 (r'\b(val|and)\b(?!\')', Keyword.Reserved, 'vname'), 

176 (r'\b(fun)\b(?!\')', Keyword.Reserved, 

177 ('#pop', 'main-fun', 'fname')), 

178 

179 include('delimiters'), 

180 include('core'), 

181 (r'\S+', Error), 

182 ], 

183 

184 # In this scope, I expect '|' and 'and' to be followed by a function 

185 'main-fun': [ 

186 include('whitespace'), 

187 

188 (r'\s', Text), 

189 (r'\(\*', Comment.Multiline, 'comment'), 

190 

191 # Special behavior of val/and/fun 

192 (r'\b(fun|and)\b(?!\')', Keyword.Reserved, 'fname'), 

193 (r'\b(val)\b(?!\')', Keyword.Reserved, 

194 ('#pop', 'main', 'vname')), 

195 

196 # Special behavior of '|' and '|'-manipulating keywords 

197 (r'\|', Punctuation, 'fname'), 

198 (r'\b(case|handle)\b(?!\')', Keyword.Reserved, 

199 ('#pop', 'main')), 

200 

201 include('delimiters'), 

202 include('core'), 

203 (r'\S+', Error), 

204 ], 

205 

206 # Character and string parsers 

207 'char': stringy(String.Char), 

208 'string': stringy(String.Double), 

209 

210 'breakout': [ 

211 (r'(?=\b({})\b(?!\'))'.format('|'.join(alphanumid_reserved)), Text, '#pop'), 

212 ], 

213 

214 # Dealing with what comes after module system keywords 

215 'sname': [ 

216 include('whitespace'), 

217 include('breakout'), 

218 

219 (rf'({alphanumid_re})', Name.Namespace), 

220 default('#pop'), 

221 ], 

222 

223 # Dealing with what comes after the 'fun' (or 'and' or '|') keyword 

224 'fname': [ 

225 include('whitespace'), 

226 (r'\'[\w\']*', Name.Decorator), 

227 (r'\(', Punctuation, 'tyvarseq'), 

228 

229 (rf'({alphanumid_re})', Name.Function, '#pop'), 

230 (rf'({symbolicid_re})', Name.Function, '#pop'), 

231 

232 # Ignore interesting function declarations like "fun (x + y) = ..." 

233 default('#pop'), 

234 ], 

235 

236 # Dealing with what comes after the 'val' (or 'and') keyword 

237 'vname': [ 

238 include('whitespace'), 

239 (r'\'[\w\']*', Name.Decorator), 

240 (r'\(', Punctuation, 'tyvarseq'), 

241 

242 (rf'({alphanumid_re})(\s*)(=(?!{symbolicid_re}))', 

243 bygroups(Name.Variable, Text, Punctuation), '#pop'), 

244 (rf'({symbolicid_re})(\s*)(=(?!{symbolicid_re}))', 

245 bygroups(Name.Variable, Text, Punctuation), '#pop'), 

246 (rf'({alphanumid_re})', Name.Variable, '#pop'), 

247 (rf'({symbolicid_re})', Name.Variable, '#pop'), 

248 

249 # Ignore interesting patterns like 'val (x, y)' 

250 default('#pop'), 

251 ], 

252 

253 # Dealing with what comes after the 'type' (or 'and') keyword 

254 'tname': [ 

255 include('whitespace'), 

256 include('breakout'), 

257 

258 (r'\'[\w\']*', Name.Decorator), 

259 (r'\(', Punctuation, 'tyvarseq'), 

260 (rf'=(?!{symbolicid_re})', Punctuation, ('#pop', 'typbind')), 

261 

262 (rf'({alphanumid_re})', Keyword.Type), 

263 (rf'({symbolicid_re})', Keyword.Type), 

264 (r'\S+', Error, '#pop'), 

265 ], 

266 

267 # A type binding includes most identifiers 

268 'typbind': [ 

269 include('whitespace'), 

270 

271 (r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')), 

272 

273 include('breakout'), 

274 include('core'), 

275 (r'\S+', Error, '#pop'), 

276 ], 

277 

278 # Dealing with what comes after the 'datatype' (or 'and') keyword 

279 'dname': [ 

280 include('whitespace'), 

281 include('breakout'), 

282 

283 (r'\'[\w\']*', Name.Decorator), 

284 (r'\(', Punctuation, 'tyvarseq'), 

285 (r'(=)(\s*)(datatype)', 

286 bygroups(Punctuation, Text, Keyword.Reserved), '#pop'), 

287 (rf'=(?!{symbolicid_re})', Punctuation, 

288 ('#pop', 'datbind', 'datcon')), 

289 

290 (rf'({alphanumid_re})', Keyword.Type), 

291 (rf'({symbolicid_re})', Keyword.Type), 

292 (r'\S+', Error, '#pop'), 

293 ], 

294 

295 # common case - A | B | C of int 

296 'datbind': [ 

297 include('whitespace'), 

298 

299 (r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'dname')), 

300 (r'\b(withtype)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')), 

301 (r'\b(of)\b(?!\')', Keyword.Reserved), 

302 

303 (rf'(\|)(\s*)({alphanumid_re})', 

304 bygroups(Punctuation, Text, Name.Class)), 

305 (rf'(\|)(\s+)({symbolicid_re})', 

306 bygroups(Punctuation, Text, Name.Class)), 

307 

308 include('breakout'), 

309 include('core'), 

310 (r'\S+', Error), 

311 ], 

312 

313 # Dealing with what comes after an exception 

314 'ename': [ 

315 include('whitespace'), 

316 

317 (rf'(and\b)(\s+)({alphanumid_re})', 

318 bygroups(Keyword.Reserved, Text, Name.Class)), 

319 (rf'(and\b)(\s*)({symbolicid_re})', 

320 bygroups(Keyword.Reserved, Text, Name.Class)), 

321 (r'\b(of)\b(?!\')', Keyword.Reserved), 

322 (rf'({alphanumid_re})|({symbolicid_re})', Name.Class), 

323 

324 default('#pop'), 

325 ], 

326 

327 'datcon': [ 

328 include('whitespace'), 

329 (rf'({alphanumid_re})', Name.Class, '#pop'), 

330 (rf'({symbolicid_re})', Name.Class, '#pop'), 

331 (r'\S+', Error, '#pop'), 

332 ], 

333 

334 # Series of type variables 

335 'tyvarseq': [ 

336 (r'\s', Text), 

337 (r'\(\*', Comment.Multiline, 'comment'), 

338 

339 (r'\'[\w\']*', Name.Decorator), 

340 (alphanumid_re, Name), 

341 (r',', Punctuation), 

342 (r'\)', Punctuation, '#pop'), 

343 (symbolicid_re, Name), 

344 ], 

345 

346 'comment': [ 

347 (r'[^(*)]', Comment.Multiline), 

348 (r'\(\*', Comment.Multiline, '#push'), 

349 (r'\*\)', Comment.Multiline, '#pop'), 

350 (r'[(*)]', Comment.Multiline), 

351 ], 

352 } 

353 

354 

355class OcamlLexer(RegexLexer): 

356 """ 

357 For the OCaml language. 

358 """ 

359 

360 name = 'OCaml' 

361 url = 'https://ocaml.org/' 

362 aliases = ['ocaml'] 

363 filenames = ['*.ml', '*.mli', '*.mll', '*.mly'] 

364 mimetypes = ['text/x-ocaml'] 

365 version_added = '0.7' 

366 

367 keywords = ( 

368 'and', 'as', 'assert', 'begin', 'class', 'constraint', 'do', 'done', 

369 'downto', 'else', 'end', 'exception', 'external', 'false', 

370 'for', 'fun', 'function', 'functor', 'if', 'in', 'include', 

371 'inherit', 'initializer', 'lazy', 'let', 'match', 'method', 

372 'module', 'mutable', 'new', 'object', 'of', 'open', 'private', 

373 'raise', 'rec', 'sig', 'struct', 'then', 'to', 'true', 'try', 

374 'type', 'val', 'virtual', 'when', 'while', 'with', 

375 ) 

376 keyopts = ( 

377 '!=', '#', '&', '&&', r'\(', r'\)', r'\*', r'\+', ',', '-', 

378 r'-\.', '->', r'\.', r'\.\.', ':', '::', ':=', ':>', ';', ';;', '<', 

379 '<-', '=', '>', '>]', r'>\}', r'\?', r'\?\?', r'\[', r'\[<', r'\[>', 

380 r'\[\|', ']', '_', '`', r'\{', r'\{<', r'\|', r'\|]', r'\}', '~' 

381 ) 

382 

383 operators = r'[!$%&*+\./:<=>?@^|~-]' 

384 word_operators = ('asr', 'land', 'lor', 'lsl', 'lxor', 'mod', 'or') 

385 prefix_syms = r'[!?~]' 

386 infix_syms = r'[=<>@^|&+\*/$%-]' 

387 primitives = ('unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array') 

388 

389 tokens = { 

390 'escape-sequence': [ 

391 (r'\\[\\"\'ntbr]', String.Escape), 

392 (r'\\[0-9]{3}', String.Escape), 

393 (r'\\x[0-9a-fA-F]{2}', String.Escape), 

394 ], 

395 'root': [ 

396 (r'\s+', Text), 

397 (r'false|true|\(\)|\[\]', Name.Builtin.Pseudo), 

398 (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'), 

399 (r'\b([A-Z][\w\']*)', Name.Class), 

400 (r'\(\*(?![)])', Comment, 'comment'), 

401 (r'\b({})\b'.format('|'.join(keywords)), Keyword), 

402 (r'({})'.format('|'.join(keyopts[::-1])), Operator), 

403 (rf'({infix_syms}|{prefix_syms})?{operators}', Operator), 

404 (r'\b({})\b'.format('|'.join(word_operators)), Operator.Word), 

405 (r'\b({})\b'.format('|'.join(primitives)), Keyword.Type), 

406 

407 (r"[^\W\d][\w']*", Name), 

408 

409 (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float), 

410 (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex), 

411 (r'0[oO][0-7][0-7_]*', Number.Oct), 

412 (r'0[bB][01][01_]*', Number.Bin), 

413 (r'\d[\d_]*', Number.Integer), 

414 

415 (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'", 

416 String.Char), 

417 (r"'.'", String.Char), 

418 (r"'", Keyword), # a stray quote is another syntax element 

419 

420 (r'"', String.Double, 'string'), 

421 

422 (r'[~?][a-z][\w\']*:', Name.Variable), 

423 ], 

424 'comment': [ 

425 (r'[^(*)]+', Comment), 

426 (r'\(\*', Comment, '#push'), 

427 (r'\*\)', Comment, '#pop'), 

428 (r'[(*)]', Comment), 

429 ], 

430 'string': [ 

431 (r'[^\\"]+', String.Double), 

432 include('escape-sequence'), 

433 (r'\\\n', String.Double), 

434 (r'"', String.Double, '#pop'), 

435 ], 

436 'dotted': [ 

437 (r'\s+', Text), 

438 (r'\.', Punctuation), 

439 (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace), 

440 (r'[A-Z][\w\']*', Name.Class, '#pop'), 

441 (r'[a-z_][\w\']*', Name, '#pop'), 

442 default('#pop'), 

443 ], 

444 } 

445 

446 

447class OpaLexer(RegexLexer): 

448 """ 

449 Lexer for the Opa language. 

450 """ 

451 

452 name = 'Opa' 

453 aliases = ['opa'] 

454 filenames = ['*.opa'] 

455 mimetypes = ['text/x-opa'] 

456 url = 'http://opalang.org' 

457 version_added = '1.5' 

458 

459 # most of these aren't strictly keywords 

460 # but if you color only real keywords, you might just 

461 # as well not color anything 

462 keywords = ( 

463 'and', 'as', 'begin', 'case', 'client', 'css', 'database', 'db', 'do', 

464 'else', 'end', 'external', 'forall', 'function', 'if', 'import', 

465 'match', 'module', 'or', 'package', 'parser', 'rec', 'server', 'then', 

466 'type', 'val', 'with', 'xml_parser', 

467 ) 

468 

469 # matches both stuff and `stuff` 

470 ident_re = r'(([a-zA-Z_]\w*)|(`[^`]*`))' 

471 

472 op_re = r'[.=\-<>,@~%/+?*&^!]' 

473 punc_re = r'[()\[\],;|]' # '{' and '}' are treated elsewhere 

474 # because they are also used for inserts 

475 

476 tokens = { 

477 # copied from the caml lexer, should be adapted 

478 'escape-sequence': [ 

479 (r'\\[\\"\'ntr}]', String.Escape), 

480 (r'\\[0-9]{3}', String.Escape), 

481 (r'\\x[0-9a-fA-F]{2}', String.Escape), 

482 ], 

483 

484 # factorizing these rules, because they are inserted many times 

485 'comments': [ 

486 (r'/\*', Comment, 'nested-comment'), 

487 (r'//.*?$', Comment), 

488 ], 

489 'comments-and-spaces': [ 

490 include('comments'), 

491 (r'\s+', Text), 

492 ], 

493 

494 'root': [ 

495 include('comments-and-spaces'), 

496 # keywords 

497 (words(keywords, prefix=r'\b', suffix=r'\b'), Keyword), 

498 # directives 

499 # we could parse the actual set of directives instead of anything 

500 # starting with @, but this is troublesome 

501 # because it needs to be adjusted all the time 

502 # and assuming we parse only sources that compile, it is useless 

503 (r'@' + ident_re + r'\b', Name.Builtin.Pseudo), 

504 

505 # number literals 

506 (r'-?.[\d]+([eE][+\-]?\d+)', Number.Float), 

507 (r'-?\d+.\d*([eE][+\-]?\d+)', Number.Float), 

508 (r'-?\d+[eE][+\-]?\d+', Number.Float), 

509 (r'0[xX][\da-fA-F]+', Number.Hex), 

510 (r'0[oO][0-7]+', Number.Oct), 

511 (r'0[bB][01]+', Number.Bin), 

512 (r'\d+', Number.Integer), 

513 # color literals 

514 (r'#[\da-fA-F]{3,6}', Number.Integer), 

515 

516 # string literals 

517 (r'"', String.Double, 'string'), 

518 # char literal, should be checked because this is the regexp from 

519 # the caml lexer 

520 (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2})|.)'", 

521 String.Char), 

522 

523 # this is meant to deal with embedded exprs in strings 

524 # every time we find a '}' we pop a state so that if we were 

525 # inside a string, we are back in the string state 

526 # as a consequence, we must also push a state every time we find a 

527 # '{' or else we will have errors when parsing {} for instance 

528 (r'\{', Operator, '#push'), 

529 (r'\}', Operator, '#pop'), 

530 

531 # html literals 

532 # this is a much more strict that the actual parser, 

533 # since a<b would not be parsed as html 

534 # but then again, the parser is way too lax, and we can't hope 

535 # to have something as tolerant 

536 (r'<(?=[a-zA-Z>])', String.Single, 'html-open-tag'), 

537 

538 # db path 

539 # matching the '[_]' in '/a[_]' because it is a part 

540 # of the syntax of the db path definition 

541 # unfortunately, i don't know how to match the ']' in 

542 # /a[1], so this is somewhat inconsistent 

543 (r'[@?!]?(/\w+)+(\[_\])?', Name.Variable), 

544 # putting the same color on <- as on db path, since 

545 # it can be used only to mean Db.write 

546 (r'<-(?!'+op_re+r')', Name.Variable), 

547 

548 # 'modules' 

549 # although modules are not distinguished by their names as in caml 

550 # the standard library seems to follow the convention that modules 

551 # only area capitalized 

552 (r'\b([A-Z]\w*)(?=\.)', Name.Namespace), 

553 

554 # operators 

555 # = has a special role because this is the only 

556 # way to syntactic distinguish binding constructions 

557 # unfortunately, this colors the equal in {x=2} too 

558 (r'=(?!'+op_re+r')', Keyword), 

559 (rf'({op_re})+', Operator), 

560 (rf'({punc_re})+', Operator), 

561 

562 # coercions 

563 (r':', Operator, 'type'), 

564 # type variables 

565 # we need this rule because we don't parse specially type 

566 # definitions so in "type t('a) = ...", "'a" is parsed by 'root' 

567 ("'"+ident_re, Keyword.Type), 

568 

569 # id literal, #something, or #{expr} 

570 (r'#'+ident_re, String.Single), 

571 (r'#(?=\{)', String.Single), 

572 

573 # identifiers 

574 # this avoids to color '2' in 'a2' as an integer 

575 (ident_re, Text), 

576 

577 # default, not sure if that is needed or not 

578 # (r'.', Text), 

579 ], 

580 

581 # it is quite painful to have to parse types to know where they end 

582 # this is the general rule for a type 

583 # a type is either: 

584 # * -> ty 

585 # * type-with-slash 

586 # * type-with-slash -> ty 

587 # * type-with-slash (, type-with-slash)+ -> ty 

588 # 

589 # the code is pretty funky in here, but this code would roughly 

590 # translate in caml to: 

591 # let rec type stream = 

592 # match stream with 

593 # | [< "->"; stream >] -> type stream 

594 # | [< ""; stream >] -> 

595 # type_with_slash stream 

596 # type_lhs_1 stream; 

597 # and type_1 stream = ... 

598 'type': [ 

599 include('comments-and-spaces'), 

600 (r'->', Keyword.Type), 

601 default(('#pop', 'type-lhs-1', 'type-with-slash')), 

602 ], 

603 

604 # parses all the atomic or closed constructions in the syntax of type 

605 # expressions: record types, tuple types, type constructors, basic type 

606 # and type variables 

607 'type-1': [ 

608 include('comments-and-spaces'), 

609 (r'\(', Keyword.Type, ('#pop', 'type-tuple')), 

610 (r'~?\{', Keyword.Type, ('#pop', 'type-record')), 

611 (ident_re+r'\(', Keyword.Type, ('#pop', 'type-tuple')), 

612 (ident_re, Keyword.Type, '#pop'), 

613 ("'"+ident_re, Keyword.Type), 

614 # this case is not in the syntax but sometimes 

615 # we think we are parsing types when in fact we are parsing 

616 # some css, so we just pop the states until we get back into 

617 # the root state 

618 default('#pop'), 

619 ], 

620 

621 # type-with-slash is either: 

622 # * type-1 

623 # * type-1 (/ type-1)+ 

624 'type-with-slash': [ 

625 include('comments-and-spaces'), 

626 default(('#pop', 'slash-type-1', 'type-1')), 

627 ], 

628 'slash-type-1': [ 

629 include('comments-and-spaces'), 

630 ('/', Keyword.Type, ('#pop', 'type-1')), 

631 # same remark as above 

632 default('#pop'), 

633 ], 

634 

635 # we go in this state after having parsed a type-with-slash 

636 # while trying to parse a type 

637 # and at this point we must determine if we are parsing an arrow 

638 # type (in which case we must continue parsing) or not (in which 

639 # case we stop) 

640 'type-lhs-1': [ 

641 include('comments-and-spaces'), 

642 (r'->', Keyword.Type, ('#pop', 'type')), 

643 (r'(?=,)', Keyword.Type, ('#pop', 'type-arrow')), 

644 default('#pop'), 

645 ], 

646 'type-arrow': [ 

647 include('comments-and-spaces'), 

648 # the look ahead here allows to parse f(x : int, y : float -> truc) 

649 # correctly 

650 (r',(?=[^:]*?->)', Keyword.Type, 'type-with-slash'), 

651 (r'->', Keyword.Type, ('#pop', 'type')), 

652 # same remark as above 

653 default('#pop'), 

654 ], 

655 

656 # no need to do precise parsing for tuples and records 

657 # because they are closed constructions, so we can simply 

658 # find the closing delimiter 

659 # note that this function would be not work if the source 

660 # contained identifiers like `{)` (although it could be patched 

661 # to support it) 

662 'type-tuple': [ 

663 include('comments-and-spaces'), 

664 (r'[^()/*]+', Keyword.Type), 

665 (r'[/*]', Keyword.Type), 

666 (r'\(', Keyword.Type, '#push'), 

667 (r'\)', Keyword.Type, '#pop'), 

668 ], 

669 'type-record': [ 

670 include('comments-and-spaces'), 

671 (r'[^{}/*]+', Keyword.Type), 

672 (r'[/*]', Keyword.Type), 

673 (r'\{', Keyword.Type, '#push'), 

674 (r'\}', Keyword.Type, '#pop'), 

675 ], 

676 

677 # 'type-tuple': [ 

678 # include('comments-and-spaces'), 

679 # (r'\)', Keyword.Type, '#pop'), 

680 # default(('#pop', 'type-tuple-1', 'type-1')), 

681 # ], 

682 # 'type-tuple-1': [ 

683 # include('comments-and-spaces'), 

684 # (r',?\s*\)', Keyword.Type, '#pop'), # ,) is a valid end of tuple, in (1,) 

685 # (r',', Keyword.Type, 'type-1'), 

686 # ], 

687 # 'type-record':[ 

688 # include('comments-and-spaces'), 

689 # (r'\}', Keyword.Type, '#pop'), 

690 # (r'~?(?:\w+|`[^`]*`)', Keyword.Type, 'type-record-field-expr'), 

691 # ], 

692 # 'type-record-field-expr': [ 

693 # 

694 # ], 

695 

696 'nested-comment': [ 

697 (r'[^/*]+', Comment), 

698 (r'/\*', Comment, '#push'), 

699 (r'\*/', Comment, '#pop'), 

700 (r'[/*]', Comment), 

701 ], 

702 

703 # the copy pasting between string and single-string 

704 # is kinda sad. Is there a way to avoid that?? 

705 'string': [ 

706 (r'[^\\"{]+', String.Double), 

707 (r'"', String.Double, '#pop'), 

708 (r'\{', Operator, 'root'), 

709 include('escape-sequence'), 

710 ], 

711 'single-string': [ 

712 (r'[^\\\'{]+', String.Double), 

713 (r'\'', String.Double, '#pop'), 

714 (r'\{', Operator, 'root'), 

715 include('escape-sequence'), 

716 ], 

717 

718 # all the html stuff 

719 # can't really reuse some existing html parser 

720 # because we must be able to parse embedded expressions 

721 

722 # we are in this state after someone parsed the '<' that 

723 # started the html literal 

724 'html-open-tag': [ 

725 (r'[\w\-:]+', String.Single, ('#pop', 'html-attr')), 

726 (r'>', String.Single, ('#pop', 'html-content')), 

727 ], 

728 

729 # we are in this state after someone parsed the '</' that 

730 # started the end of the closing tag 

731 'html-end-tag': [ 

732 # this is a star, because </> is allowed 

733 (r'[\w\-:]*>', String.Single, '#pop'), 

734 ], 

735 

736 # we are in this state after having parsed '<ident(:ident)?' 

737 # we thus parse a possibly empty list of attributes 

738 'html-attr': [ 

739 (r'\s+', Text), 

740 (r'[\w\-:]+=', String.Single, 'html-attr-value'), 

741 (r'/>', String.Single, '#pop'), 

742 (r'>', String.Single, ('#pop', 'html-content')), 

743 ], 

744 

745 'html-attr-value': [ 

746 (r"'", String.Single, ('#pop', 'single-string')), 

747 (r'"', String.Single, ('#pop', 'string')), 

748 (r'#'+ident_re, String.Single, '#pop'), 

749 (r'#(?=\{)', String.Single, ('#pop', 'root')), 

750 (r'[^"\'{`=<>]+', String.Single, '#pop'), 

751 (r'\{', Operator, ('#pop', 'root')), # this is a tail call! 

752 ], 

753 

754 # we should probably deal with '\' escapes here 

755 'html-content': [ 

756 (r'<!--', Comment, 'html-comment'), 

757 (r'</', String.Single, ('#pop', 'html-end-tag')), 

758 (r'<', String.Single, 'html-open-tag'), 

759 (r'\{', Operator, 'root'), 

760 (r'[^<{]+', String.Single), 

761 ], 

762 

763 'html-comment': [ 

764 (r'-->', Comment, '#pop'), 

765 (r'[^\-]+|-', Comment), 

766 ], 

767 } 

768 

769 

770class ReasonLexer(RegexLexer): 

771 """ 

772 For the ReasonML language. 

773 """ 

774 

775 name = 'ReasonML' 

776 url = 'https://reasonml.github.io/' 

777 aliases = ['reasonml', 'reason'] 

778 filenames = ['*.re', '*.rei'] 

779 mimetypes = ['text/x-reasonml'] 

780 version_added = '2.6' 

781 

782 keywords = ( 

783 'as', 'assert', 'begin', 'class', 'constraint', 'do', 'done', 'downto', 

784 'else', 'end', 'exception', 'external', 'false', 'for', 'fun', 'esfun', 

785 'function', 'functor', 'if', 'in', 'include', 'inherit', 'initializer', 'lazy', 

786 'let', 'switch', 'module', 'pub', 'mutable', 'new', 'nonrec', 'object', 'of', 

787 'open', 'pri', 'rec', 'sig', 'struct', 'then', 'to', 'true', 'try', 

788 'type', 'val', 'virtual', 'when', 'while', 'with', 

789 ) 

790 keyopts = ( 

791 '!=', '#', '&', '&&', r'\(', r'\)', r'\*', r'\+', ',', '-', 

792 r'-\.', '=>', r'\.', r'\.\.', r'\.\.\.', ':', '::', ':=', ':>', ';', ';;', '<', 

793 '<-', '=', '>', '>]', r'>\}', r'\?', r'\?\?', r'\[', r'\[<', r'\[>', 

794 r'\[\|', ']', '_', '`', r'\{', r'\{<', r'\|', r'\|\|', r'\|]', r'\}', '~' 

795 ) 

796 

797 operators = r'[!$%&*+\./:<=>?@^|~-]' 

798 word_operators = ('and', 'asr', 'land', 'lor', 'lsl', 'lsr', 'lxor', 'mod', 'or') 

799 prefix_syms = r'[!?~]' 

800 infix_syms = r'[=<>@^|&+\*/$%-]' 

801 primitives = ('unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array') 

802 

803 tokens = { 

804 'escape-sequence': [ 

805 (r'\\[\\"\'ntbr]', String.Escape), 

806 (r'\\[0-9]{3}', String.Escape), 

807 (r'\\x[0-9a-fA-F]{2}', String.Escape), 

808 ], 

809 'root': [ 

810 (r'\s+', Text), 

811 (r'false|true|\(\)|\[\]', Name.Builtin.Pseudo), 

812 (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'), 

813 (r'\b([A-Z][\w\']*)', Name.Class), 

814 (r'//.*?\n', Comment.Single), 

815 (r'\/\*(?!/)', Comment.Multiline, 'comment'), 

816 (r'\b({})\b'.format('|'.join(keywords)), Keyword), 

817 (r'({})'.format('|'.join(keyopts[::-1])), Operator.Word), 

818 (rf'({infix_syms}|{prefix_syms})?{operators}', Operator), 

819 (r'\b({})\b'.format('|'.join(word_operators)), Operator.Word), 

820 (r'\b({})\b'.format('|'.join(primitives)), Keyword.Type), 

821 

822 (r"[^\W\d][\w']*", Name), 

823 

824 (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float), 

825 (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex), 

826 (r'0[oO][0-7][0-7_]*', Number.Oct), 

827 (r'0[bB][01][01_]*', Number.Bin), 

828 (r'\d[\d_]*', Number.Integer), 

829 

830 (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'", 

831 String.Char), 

832 (r"'.'", String.Char), 

833 (r"'", Keyword), 

834 

835 (r'"', String.Double, 'string'), 

836 

837 (r'[~?][a-z][\w\']*:', Name.Variable), 

838 ], 

839 'comment': [ 

840 (r'[^/*]+', Comment.Multiline), 

841 (r'\/\*', Comment.Multiline, '#push'), 

842 (r'\*\/', Comment.Multiline, '#pop'), 

843 (r'\*', Comment.Multiline), 

844 ], 

845 'string': [ 

846 (r'[^\\"]+', String.Double), 

847 include('escape-sequence'), 

848 (r'\\\n', String.Double), 

849 (r'"', String.Double, '#pop'), 

850 ], 

851 'dotted': [ 

852 (r'\s+', Text), 

853 (r'\.', Punctuation), 

854 (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace), 

855 (r'[A-Z][\w\']*', Name.Class, '#pop'), 

856 (r'[a-z_][\w\']*', Name, '#pop'), 

857 default('#pop'), 

858 ], 

859 } 

860 

861 

862class FStarLexer(RegexLexer): 

863 """ 

864 For the F* language. 

865 """ 

866 

867 name = 'FStar' 

868 url = 'https://www.fstar-lang.org/' 

869 aliases = ['fstar'] 

870 filenames = ['*.fst', '*.fsti'] 

871 mimetypes = ['text/x-fstar'] 

872 version_added = '2.7' 

873 

874 keywords = ( 

875 'abstract', 'attributes', 'noeq', 'unopteq', 'and' 

876 'begin', 'by', 'default', 'effect', 'else', 'end', 'ensures', 

877 'exception', 'exists', 'false', 'forall', 'fun', 'function', 'if', 

878 'in', 'include', 'inline', 'inline_for_extraction', 'irreducible', 

879 'logic', 'match', 'module', 'mutable', 'new', 'new_effect', 'noextract', 

880 'of', 'open', 'opaque', 'private', 'range_of', 'reifiable', 

881 'reify', 'reflectable', 'requires', 'set_range_of', 'sub_effect', 

882 'synth', 'then', 'total', 'true', 'try', 'type', 'unfold', 'unfoldable', 

883 'val', 'when', 'with', 'not' 

884 ) 

885 decl_keywords = ('let', 'rec') 

886 assume_keywords = ('assume', 'admit', 'assert', 'calc') 

887 keyopts = ( 

888 r'~', r'-', r'/\\', r'\\/', r'<:', r'<@', r'\(\|', r'\|\)', r'#', r'u#', 

889 r'&', r'\(', r'\)', r'\(\)', r',', r'~>', r'->', r'<-', r'<--', r'<==>', 

890 r'==>', r'\.', r'\?', r'\?\.', r'\.\[', r'\.\(', r'\.\(\|', r'\.\[\|', 

891 r'\{:pattern', r':', r'::', r':=', r';', r';;', r'=', r'%\[', r'!\{', 

892 r'\[', r'\[@', r'\[\|', r'\|>', r'\]', r'\|\]', r'\{', r'\|', r'\}', r'\$' 

893 ) 

894 

895 operators = r'[!$%&*+\./:<=>?@^|~-]' 

896 prefix_syms = r'[!?~]' 

897 infix_syms = r'[=<>@^|&+\*/$%-]' 

898 primitives = ('unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array') 

899 

900 tokens = { 

901 'escape-sequence': [ 

902 (r'\\[\\"\'ntbr]', String.Escape), 

903 (r'\\[0-9]{3}', String.Escape), 

904 (r'\\x[0-9a-fA-F]{2}', String.Escape), 

905 ], 

906 'root': [ 

907 (r'\s+', Text), 

908 (r'false|true|False|True|\(\)|\[\]', Name.Builtin.Pseudo), 

909 (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'), 

910 (r'\b([A-Z][\w\']*)', Name.Class), 

911 (r'\(\*(?![)])', Comment, 'comment'), 

912 (r'\/\/.+$', Comment), 

913 (r'\b({})\b'.format('|'.join(keywords)), Keyword), 

914 (r'\b({})\b'.format('|'.join(assume_keywords)), Name.Exception), 

915 (r'\b({})\b'.format('|'.join(decl_keywords)), Keyword.Declaration), 

916 (r'({})'.format('|'.join(keyopts[::-1])), Operator), 

917 (rf'({infix_syms}|{prefix_syms})?{operators}', Operator), 

918 (r'\b({})\b'.format('|'.join(primitives)), Keyword.Type), 

919 

920 (r"[^\W\d][\w']*", Name), 

921 

922 (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float), 

923 (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex), 

924 (r'0[oO][0-7][0-7_]*', Number.Oct), 

925 (r'0[bB][01][01_]*', Number.Bin), 

926 (r'\d[\d_]*', Number.Integer), 

927 

928 (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'", 

929 String.Char), 

930 (r"'.'", String.Char), 

931 (r"'", Keyword), # a stray quote is another syntax element 

932 (r"\`([\w\'.]+)\`", Operator.Word), # for infix applications 

933 (r"\`", Keyword), # for quoting 

934 (r'"', String.Double, 'string'), 

935 

936 (r'[~?][a-z][\w\']*:', Name.Variable), 

937 ], 

938 'comment': [ 

939 (r'[^(*)]+', Comment), 

940 (r'\(\*', Comment, '#push'), 

941 (r'\*\)', Comment, '#pop'), 

942 (r'[(*)]', Comment), 

943 ], 

944 'string': [ 

945 (r'[^\\"]+', String.Double), 

946 include('escape-sequence'), 

947 (r'\\\n', String.Double), 

948 (r'"', String.Double, '#pop'), 

949 ], 

950 'dotted': [ 

951 (r'\s+', Text), 

952 (r'\.', Punctuation), 

953 (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace), 

954 (r'[A-Z][\w\']*', Name.Class, '#pop'), 

955 (r'[a-z_][\w\']*', Name, '#pop'), 

956 default('#pop'), 

957 ], 

958 }