Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/ml.py: 80%

91 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-01 06:54 +0000

1""" 

2 pygments.lexers.ml 

3 ~~~~~~~~~~~~~~~~~~ 

4 

5 Lexers for ML family languages. 

6 

7 :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS. 

8 :license: BSD, see LICENSE for details. 

9""" 

10 

11import re 

12 

13from pygments.lexer import RegexLexer, include, bygroups, default, words 

14from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ 

15 Number, Punctuation, Error 

16 

17__all__ = ['SMLLexer', 'OcamlLexer', 'OpaLexer', 'ReasonLexer', 'FStarLexer'] 

18 

19 

20class SMLLexer(RegexLexer): 

21 """ 

22 For the Standard ML language. 

23 

24 .. versionadded:: 1.5 

25 """ 

26 

27 name = 'Standard ML' 

28 aliases = ['sml'] 

29 filenames = ['*.sml', '*.sig', '*.fun'] 

30 mimetypes = ['text/x-standardml', 'application/x-standardml'] 

31 

32 alphanumid_reserved = { 

33 # Core 

34 'abstype', 'and', 'andalso', 'as', 'case', 'datatype', 'do', 'else', 

35 'end', 'exception', 'fn', 'fun', 'handle', 'if', 'in', 'infix', 

36 'infixr', 'let', 'local', 'nonfix', 'of', 'op', 'open', 'orelse', 

37 'raise', 'rec', 'then', 'type', 'val', 'with', 'withtype', 'while', 

38 # Modules 

39 'eqtype', 'functor', 'include', 'sharing', 'sig', 'signature', 

40 'struct', 'structure', 'where', 

41 } 

42 

43 symbolicid_reserved = { 

44 # Core 

45 ':', r'\|', '=', '=>', '->', '#', 

46 # Modules 

47 ':>', 

48 } 

49 

50 nonid_reserved = {'(', ')', '[', ']', '{', '}', ',', ';', '...', '_'} 

51 

52 alphanumid_re = r"[a-zA-Z][\w']*" 

53 symbolicid_re = r"[!%&$#+\-/:<=>?@\\~`^|*]+" 

54 

55 # A character constant is a sequence of the form #s, where s is a string 

56 # constant denoting a string of size one character. This setup just parses 

57 # the entire string as either a String.Double or a String.Char (depending 

58 # on the argument), even if the String.Char is an erroneous 

59 # multiple-character string. 

60 def stringy(whatkind): 

61 return [ 

62 (r'[^"\\]', whatkind), 

63 (r'\\[\\"abtnvfr]', String.Escape), 

64 # Control-character notation is used for codes < 32, 

65 # where \^@ == \000 

66 (r'\\\^[\x40-\x5e]', String.Escape), 

67 # Docs say 'decimal digits' 

68 (r'\\[0-9]{3}', String.Escape), 

69 (r'\\u[0-9a-fA-F]{4}', String.Escape), 

70 (r'\\\s+\\', String.Interpol), 

71 (r'"', whatkind, '#pop'), 

72 ] 

73 

74 # Callbacks for distinguishing tokens and reserved words 

75 def long_id_callback(self, match): 

76 if match.group(1) in self.alphanumid_reserved: 

77 token = Error 

78 else: 

79 token = Name.Namespace 

80 yield match.start(1), token, match.group(1) 

81 yield match.start(2), Punctuation, match.group(2) 

82 

83 def end_id_callback(self, match): 

84 if match.group(1) in self.alphanumid_reserved: 

85 token = Error 

86 elif match.group(1) in self.symbolicid_reserved: 

87 token = Error 

88 else: 

89 token = Name 

90 yield match.start(1), token, match.group(1) 

91 

92 def id_callback(self, match): 

93 str = match.group(1) 

94 if str in self.alphanumid_reserved: 

95 token = Keyword.Reserved 

96 elif str in self.symbolicid_reserved: 

97 token = Punctuation 

98 else: 

99 token = Name 

100 yield match.start(1), token, str 

101 

102 tokens = { 

103 # Whitespace and comments are (almost) everywhere 

104 'whitespace': [ 

105 (r'\s+', Text), 

106 (r'\(\*', Comment.Multiline, 'comment'), 

107 ], 

108 

109 'delimiters': [ 

110 # This lexer treats these delimiters specially: 

111 # Delimiters define scopes, and the scope is how the meaning of 

112 # the `|' is resolved - is it a case/handle expression, or function 

113 # definition by cases? (This is not how the Definition works, but 

114 # it's how MLton behaves, see http://mlton.org/SMLNJDeviations) 

115 (r'\(|\[|\{', Punctuation, 'main'), 

116 (r'\)|\]|\}', Punctuation, '#pop'), 

117 (r'\b(let|if|local)\b(?!\')', Keyword.Reserved, ('main', 'main')), 

118 (r'\b(struct|sig|while)\b(?!\')', Keyword.Reserved, 'main'), 

119 (r'\b(do|else|end|in|then)\b(?!\')', Keyword.Reserved, '#pop'), 

120 ], 

121 

122 'core': [ 

123 # Punctuation that doesn't overlap symbolic identifiers 

124 (r'(%s)' % '|'.join(re.escape(z) for z in nonid_reserved), 

125 Punctuation), 

126 

127 # Special constants: strings, floats, numbers in decimal and hex 

128 (r'#"', String.Char, 'char'), 

129 (r'"', String.Double, 'string'), 

130 (r'~?0x[0-9a-fA-F]+', Number.Hex), 

131 (r'0wx[0-9a-fA-F]+', Number.Hex), 

132 (r'0w\d+', Number.Integer), 

133 (r'~?\d+\.\d+[eE]~?\d+', Number.Float), 

134 (r'~?\d+\.\d+', Number.Float), 

135 (r'~?\d+[eE]~?\d+', Number.Float), 

136 (r'~?\d+', Number.Integer), 

137 

138 # Labels 

139 (r'#\s*[1-9][0-9]*', Name.Label), 

140 (r'#\s*(%s)' % alphanumid_re, Name.Label), 

141 (r'#\s+(%s)' % symbolicid_re, Name.Label), 

142 # Some reserved words trigger a special, local lexer state change 

143 (r'\b(datatype|abstype)\b(?!\')', Keyword.Reserved, 'dname'), 

144 (r'\b(exception)\b(?!\')', Keyword.Reserved, 'ename'), 

145 (r'\b(functor|include|open|signature|structure)\b(?!\')', 

146 Keyword.Reserved, 'sname'), 

147 (r'\b(type|eqtype)\b(?!\')', Keyword.Reserved, 'tname'), 

148 

149 # Regular identifiers, long and otherwise 

150 (r'\'[\w\']*', Name.Decorator), 

151 (r'(%s)(\.)' % alphanumid_re, long_id_callback, "dotted"), 

152 (r'(%s)' % alphanumid_re, id_callback), 

153 (r'(%s)' % symbolicid_re, id_callback), 

154 ], 

155 'dotted': [ 

156 (r'(%s)(\.)' % alphanumid_re, long_id_callback), 

157 (r'(%s)' % alphanumid_re, end_id_callback, "#pop"), 

158 (r'(%s)' % symbolicid_re, end_id_callback, "#pop"), 

159 (r'\s+', Error), 

160 (r'\S+', Error), 

161 ], 

162 

163 

164 # Main parser (prevents errors in files that have scoping errors) 

165 'root': [ 

166 default('main') 

167 ], 

168 

169 # In this scope, I expect '|' to not be followed by a function name, 

170 # and I expect 'and' to be followed by a binding site 

171 'main': [ 

172 include('whitespace'), 

173 

174 # Special behavior of val/and/fun 

175 (r'\b(val|and)\b(?!\')', Keyword.Reserved, 'vname'), 

176 (r'\b(fun)\b(?!\')', Keyword.Reserved, 

177 ('#pop', 'main-fun', 'fname')), 

178 

179 include('delimiters'), 

180 include('core'), 

181 (r'\S+', Error), 

182 ], 

183 

184 # In this scope, I expect '|' and 'and' to be followed by a function 

185 'main-fun': [ 

186 include('whitespace'), 

187 

188 (r'\s', Text), 

189 (r'\(\*', Comment.Multiline, 'comment'), 

190 

191 # Special behavior of val/and/fun 

192 (r'\b(fun|and)\b(?!\')', Keyword.Reserved, 'fname'), 

193 (r'\b(val)\b(?!\')', Keyword.Reserved, 

194 ('#pop', 'main', 'vname')), 

195 

196 # Special behavior of '|' and '|'-manipulating keywords 

197 (r'\|', Punctuation, 'fname'), 

198 (r'\b(case|handle)\b(?!\')', Keyword.Reserved, 

199 ('#pop', 'main')), 

200 

201 include('delimiters'), 

202 include('core'), 

203 (r'\S+', Error), 

204 ], 

205 

206 # Character and string parsers 

207 'char': stringy(String.Char), 

208 'string': stringy(String.Double), 

209 

210 'breakout': [ 

211 (r'(?=\b(%s)\b(?!\'))' % '|'.join(alphanumid_reserved), Text, '#pop'), 

212 ], 

213 

214 # Dealing with what comes after module system keywords 

215 'sname': [ 

216 include('whitespace'), 

217 include('breakout'), 

218 

219 (r'(%s)' % alphanumid_re, Name.Namespace), 

220 default('#pop'), 

221 ], 

222 

223 # Dealing with what comes after the 'fun' (or 'and' or '|') keyword 

224 'fname': [ 

225 include('whitespace'), 

226 (r'\'[\w\']*', Name.Decorator), 

227 (r'\(', Punctuation, 'tyvarseq'), 

228 

229 (r'(%s)' % alphanumid_re, Name.Function, '#pop'), 

230 (r'(%s)' % symbolicid_re, Name.Function, '#pop'), 

231 

232 # Ignore interesting function declarations like "fun (x + y) = ..." 

233 default('#pop'), 

234 ], 

235 

236 # Dealing with what comes after the 'val' (or 'and') keyword 

237 'vname': [ 

238 include('whitespace'), 

239 (r'\'[\w\']*', Name.Decorator), 

240 (r'\(', Punctuation, 'tyvarseq'), 

241 

242 (r'(%s)(\s*)(=(?!%s))' % (alphanumid_re, symbolicid_re), 

243 bygroups(Name.Variable, Text, Punctuation), '#pop'), 

244 (r'(%s)(\s*)(=(?!%s))' % (symbolicid_re, symbolicid_re), 

245 bygroups(Name.Variable, Text, Punctuation), '#pop'), 

246 (r'(%s)' % alphanumid_re, Name.Variable, '#pop'), 

247 (r'(%s)' % symbolicid_re, Name.Variable, '#pop'), 

248 

249 # Ignore interesting patterns like 'val (x, y)' 

250 default('#pop'), 

251 ], 

252 

253 # Dealing with what comes after the 'type' (or 'and') keyword 

254 'tname': [ 

255 include('whitespace'), 

256 include('breakout'), 

257 

258 (r'\'[\w\']*', Name.Decorator), 

259 (r'\(', Punctuation, 'tyvarseq'), 

260 (r'=(?!%s)' % symbolicid_re, Punctuation, ('#pop', 'typbind')), 

261 

262 (r'(%s)' % alphanumid_re, Keyword.Type), 

263 (r'(%s)' % symbolicid_re, Keyword.Type), 

264 (r'\S+', Error, '#pop'), 

265 ], 

266 

267 # A type binding includes most identifiers 

268 'typbind': [ 

269 include('whitespace'), 

270 

271 (r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')), 

272 

273 include('breakout'), 

274 include('core'), 

275 (r'\S+', Error, '#pop'), 

276 ], 

277 

278 # Dealing with what comes after the 'datatype' (or 'and') keyword 

279 'dname': [ 

280 include('whitespace'), 

281 include('breakout'), 

282 

283 (r'\'[\w\']*', Name.Decorator), 

284 (r'\(', Punctuation, 'tyvarseq'), 

285 (r'(=)(\s*)(datatype)', 

286 bygroups(Punctuation, Text, Keyword.Reserved), '#pop'), 

287 (r'=(?!%s)' % symbolicid_re, Punctuation, 

288 ('#pop', 'datbind', 'datcon')), 

289 

290 (r'(%s)' % alphanumid_re, Keyword.Type), 

291 (r'(%s)' % symbolicid_re, Keyword.Type), 

292 (r'\S+', Error, '#pop'), 

293 ], 

294 

295 # common case - A | B | C of int 

296 'datbind': [ 

297 include('whitespace'), 

298 

299 (r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'dname')), 

300 (r'\b(withtype)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')), 

301 (r'\b(of)\b(?!\')', Keyword.Reserved), 

302 

303 (r'(\|)(\s*)(%s)' % alphanumid_re, 

304 bygroups(Punctuation, Text, Name.Class)), 

305 (r'(\|)(\s+)(%s)' % symbolicid_re, 

306 bygroups(Punctuation, Text, Name.Class)), 

307 

308 include('breakout'), 

309 include('core'), 

310 (r'\S+', Error), 

311 ], 

312 

313 # Dealing with what comes after an exception 

314 'ename': [ 

315 include('whitespace'), 

316 

317 (r'(and\b)(\s+)(%s)' % alphanumid_re, 

318 bygroups(Keyword.Reserved, Text, Name.Class)), 

319 (r'(and\b)(\s*)(%s)' % symbolicid_re, 

320 bygroups(Keyword.Reserved, Text, Name.Class)), 

321 (r'\b(of)\b(?!\')', Keyword.Reserved), 

322 (r'(%s)|(%s)' % (alphanumid_re, symbolicid_re), Name.Class), 

323 

324 default('#pop'), 

325 ], 

326 

327 'datcon': [ 

328 include('whitespace'), 

329 (r'(%s)' % alphanumid_re, Name.Class, '#pop'), 

330 (r'(%s)' % symbolicid_re, Name.Class, '#pop'), 

331 (r'\S+', Error, '#pop'), 

332 ], 

333 

334 # Series of type variables 

335 'tyvarseq': [ 

336 (r'\s', Text), 

337 (r'\(\*', Comment.Multiline, 'comment'), 

338 

339 (r'\'[\w\']*', Name.Decorator), 

340 (alphanumid_re, Name), 

341 (r',', Punctuation), 

342 (r'\)', Punctuation, '#pop'), 

343 (symbolicid_re, Name), 

344 ], 

345 

346 'comment': [ 

347 (r'[^(*)]', Comment.Multiline), 

348 (r'\(\*', Comment.Multiline, '#push'), 

349 (r'\*\)', Comment.Multiline, '#pop'), 

350 (r'[(*)]', Comment.Multiline), 

351 ], 

352 } 

353 

354 

355class OcamlLexer(RegexLexer): 

356 """ 

357 For the OCaml language. 

358 

359 .. versionadded:: 0.7 

360 """ 

361 

362 name = 'OCaml' 

363 url = 'https://ocaml.org/' 

364 aliases = ['ocaml'] 

365 filenames = ['*.ml', '*.mli', '*.mll', '*.mly'] 

366 mimetypes = ['text/x-ocaml'] 

367 

368 keywords = ( 

369 'as', 'assert', 'begin', 'class', 'constraint', 'do', 'done', 

370 'downto', 'else', 'end', 'exception', 'external', 'false', 

371 'for', 'fun', 'function', 'functor', 'if', 'in', 'include', 

372 'inherit', 'initializer', 'lazy', 'let', 'match', 'method', 

373 'module', 'mutable', 'new', 'object', 'of', 'open', 'private', 

374 'raise', 'rec', 'sig', 'struct', 'then', 'to', 'true', 'try', 

375 'type', 'value', 'val', 'virtual', 'when', 'while', 'with', 

376 ) 

377 keyopts = ( 

378 '!=', '#', '&', '&&', r'\(', r'\)', r'\*', r'\+', ',', '-', 

379 r'-\.', '->', r'\.', r'\.\.', ':', '::', ':=', ':>', ';', ';;', '<', 

380 '<-', '=', '>', '>]', r'>\}', r'\?', r'\?\?', r'\[', r'\[<', r'\[>', 

381 r'\[\|', ']', '_', '`', r'\{', r'\{<', r'\|', r'\|]', r'\}', '~' 

382 ) 

383 

384 operators = r'[!$%&*+\./:<=>?@^|~-]' 

385 word_operators = ('and', 'asr', 'land', 'lor', 'lsl', 'lxor', 'mod', 'or') 

386 prefix_syms = r'[!?~]' 

387 infix_syms = r'[=<>@^|&+\*/$%-]' 

388 primitives = ('unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array') 

389 

390 tokens = { 

391 'escape-sequence': [ 

392 (r'\\[\\"\'ntbr]', String.Escape), 

393 (r'\\[0-9]{3}', String.Escape), 

394 (r'\\x[0-9a-fA-F]{2}', String.Escape), 

395 ], 

396 'root': [ 

397 (r'\s+', Text), 

398 (r'false|true|\(\)|\[\]', Name.Builtin.Pseudo), 

399 (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'), 

400 (r'\b([A-Z][\w\']*)', Name.Class), 

401 (r'\(\*(?![)])', Comment, 'comment'), 

402 (r'\b(%s)\b' % '|'.join(keywords), Keyword), 

403 (r'(%s)' % '|'.join(keyopts[::-1]), Operator), 

404 (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator), 

405 (r'\b(%s)\b' % '|'.join(word_operators), Operator.Word), 

406 (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type), 

407 

408 (r"[^\W\d][\w']*", Name), 

409 

410 (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float), 

411 (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex), 

412 (r'0[oO][0-7][0-7_]*', Number.Oct), 

413 (r'0[bB][01][01_]*', Number.Bin), 

414 (r'\d[\d_]*', Number.Integer), 

415 

416 (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'", 

417 String.Char), 

418 (r"'.'", String.Char), 

419 (r"'", Keyword), # a stray quote is another syntax element 

420 

421 (r'"', String.Double, 'string'), 

422 

423 (r'[~?][a-z][\w\']*:', Name.Variable), 

424 ], 

425 'comment': [ 

426 (r'[^(*)]+', Comment), 

427 (r'\(\*', Comment, '#push'), 

428 (r'\*\)', Comment, '#pop'), 

429 (r'[(*)]', Comment), 

430 ], 

431 'string': [ 

432 (r'[^\\"]+', String.Double), 

433 include('escape-sequence'), 

434 (r'\\\n', String.Double), 

435 (r'"', String.Double, '#pop'), 

436 ], 

437 'dotted': [ 

438 (r'\s+', Text), 

439 (r'\.', Punctuation), 

440 (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace), 

441 (r'[A-Z][\w\']*', Name.Class, '#pop'), 

442 (r'[a-z_][\w\']*', Name, '#pop'), 

443 default('#pop'), 

444 ], 

445 } 

446 

447 

448class OpaLexer(RegexLexer): 

449 """ 

450 Lexer for the Opa language. 

451 

452 .. versionadded:: 1.5 

453 """ 

454 

455 name = 'Opa' 

456 aliases = ['opa'] 

457 filenames = ['*.opa'] 

458 mimetypes = ['text/x-opa'] 

459 

460 # most of these aren't strictly keywords 

461 # but if you color only real keywords, you might just 

462 # as well not color anything 

463 keywords = ( 

464 'and', 'as', 'begin', 'case', 'client', 'css', 'database', 'db', 'do', 

465 'else', 'end', 'external', 'forall', 'function', 'if', 'import', 

466 'match', 'module', 'or', 'package', 'parser', 'rec', 'server', 'then', 

467 'type', 'val', 'with', 'xml_parser', 

468 ) 

469 

470 # matches both stuff and `stuff` 

471 ident_re = r'(([a-zA-Z_]\w*)|(`[^`]*`))' 

472 

473 op_re = r'[.=\-<>,@~%/+?*&^!]' 

474 punc_re = r'[()\[\],;|]' # '{' and '}' are treated elsewhere 

475 # because they are also used for inserts 

476 

477 tokens = { 

478 # copied from the caml lexer, should be adapted 

479 'escape-sequence': [ 

480 (r'\\[\\"\'ntr}]', String.Escape), 

481 (r'\\[0-9]{3}', String.Escape), 

482 (r'\\x[0-9a-fA-F]{2}', String.Escape), 

483 ], 

484 

485 # factorizing these rules, because they are inserted many times 

486 'comments': [ 

487 (r'/\*', Comment, 'nested-comment'), 

488 (r'//.*?$', Comment), 

489 ], 

490 'comments-and-spaces': [ 

491 include('comments'), 

492 (r'\s+', Text), 

493 ], 

494 

495 'root': [ 

496 include('comments-and-spaces'), 

497 # keywords 

498 (words(keywords, prefix=r'\b', suffix=r'\b'), Keyword), 

499 # directives 

500 # we could parse the actual set of directives instead of anything 

501 # starting with @, but this is troublesome 

502 # because it needs to be adjusted all the time 

503 # and assuming we parse only sources that compile, it is useless 

504 (r'@' + ident_re + r'\b', Name.Builtin.Pseudo), 

505 

506 # number literals 

507 (r'-?.[\d]+([eE][+\-]?\d+)', Number.Float), 

508 (r'-?\d+.\d*([eE][+\-]?\d+)', Number.Float), 

509 (r'-?\d+[eE][+\-]?\d+', Number.Float), 

510 (r'0[xX][\da-fA-F]+', Number.Hex), 

511 (r'0[oO][0-7]+', Number.Oct), 

512 (r'0[bB][01]+', Number.Bin), 

513 (r'\d+', Number.Integer), 

514 # color literals 

515 (r'#[\da-fA-F]{3,6}', Number.Integer), 

516 

517 # string literals 

518 (r'"', String.Double, 'string'), 

519 # char literal, should be checked because this is the regexp from 

520 # the caml lexer 

521 (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2})|.)'", 

522 String.Char), 

523 

524 # this is meant to deal with embedded exprs in strings 

525 # every time we find a '}' we pop a state so that if we were 

526 # inside a string, we are back in the string state 

527 # as a consequence, we must also push a state every time we find a 

528 # '{' or else we will have errors when parsing {} for instance 

529 (r'\{', Operator, '#push'), 

530 (r'\}', Operator, '#pop'), 

531 

532 # html literals 

533 # this is a much more strict that the actual parser, 

534 # since a<b would not be parsed as html 

535 # but then again, the parser is way too lax, and we can't hope 

536 # to have something as tolerant 

537 (r'<(?=[a-zA-Z>])', String.Single, 'html-open-tag'), 

538 

539 # db path 

540 # matching the '[_]' in '/a[_]' because it is a part 

541 # of the syntax of the db path definition 

542 # unfortunately, i don't know how to match the ']' in 

543 # /a[1], so this is somewhat inconsistent 

544 (r'[@?!]?(/\w+)+(\[_\])?', Name.Variable), 

545 # putting the same color on <- as on db path, since 

546 # it can be used only to mean Db.write 

547 (r'<-(?!'+op_re+r')', Name.Variable), 

548 

549 # 'modules' 

550 # although modules are not distinguished by their names as in caml 

551 # the standard library seems to follow the convention that modules 

552 # only area capitalized 

553 (r'\b([A-Z]\w*)(?=\.)', Name.Namespace), 

554 

555 # operators 

556 # = has a special role because this is the only 

557 # way to syntactic distinguish binding constructions 

558 # unfortunately, this colors the equal in {x=2} too 

559 (r'=(?!'+op_re+r')', Keyword), 

560 (r'(%s)+' % op_re, Operator), 

561 (r'(%s)+' % punc_re, Operator), 

562 

563 # coercions 

564 (r':', Operator, 'type'), 

565 # type variables 

566 # we need this rule because we don't parse specially type 

567 # definitions so in "type t('a) = ...", "'a" is parsed by 'root' 

568 ("'"+ident_re, Keyword.Type), 

569 

570 # id literal, #something, or #{expr} 

571 (r'#'+ident_re, String.Single), 

572 (r'#(?=\{)', String.Single), 

573 

574 # identifiers 

575 # this avoids to color '2' in 'a2' as an integer 

576 (ident_re, Text), 

577 

578 # default, not sure if that is needed or not 

579 # (r'.', Text), 

580 ], 

581 

582 # it is quite painful to have to parse types to know where they end 

583 # this is the general rule for a type 

584 # a type is either: 

585 # * -> ty 

586 # * type-with-slash 

587 # * type-with-slash -> ty 

588 # * type-with-slash (, type-with-slash)+ -> ty 

589 # 

590 # the code is pretty funky in here, but this code would roughly 

591 # translate in caml to: 

592 # let rec type stream = 

593 # match stream with 

594 # | [< "->"; stream >] -> type stream 

595 # | [< ""; stream >] -> 

596 # type_with_slash stream 

597 # type_lhs_1 stream; 

598 # and type_1 stream = ... 

599 'type': [ 

600 include('comments-and-spaces'), 

601 (r'->', Keyword.Type), 

602 default(('#pop', 'type-lhs-1', 'type-with-slash')), 

603 ], 

604 

605 # parses all the atomic or closed constructions in the syntax of type 

606 # expressions: record types, tuple types, type constructors, basic type 

607 # and type variables 

608 'type-1': [ 

609 include('comments-and-spaces'), 

610 (r'\(', Keyword.Type, ('#pop', 'type-tuple')), 

611 (r'~?\{', Keyword.Type, ('#pop', 'type-record')), 

612 (ident_re+r'\(', Keyword.Type, ('#pop', 'type-tuple')), 

613 (ident_re, Keyword.Type, '#pop'), 

614 ("'"+ident_re, Keyword.Type), 

615 # this case is not in the syntax but sometimes 

616 # we think we are parsing types when in fact we are parsing 

617 # some css, so we just pop the states until we get back into 

618 # the root state 

619 default('#pop'), 

620 ], 

621 

622 # type-with-slash is either: 

623 # * type-1 

624 # * type-1 (/ type-1)+ 

625 'type-with-slash': [ 

626 include('comments-and-spaces'), 

627 default(('#pop', 'slash-type-1', 'type-1')), 

628 ], 

629 'slash-type-1': [ 

630 include('comments-and-spaces'), 

631 ('/', Keyword.Type, ('#pop', 'type-1')), 

632 # same remark as above 

633 default('#pop'), 

634 ], 

635 

636 # we go in this state after having parsed a type-with-slash 

637 # while trying to parse a type 

638 # and at this point we must determine if we are parsing an arrow 

639 # type (in which case we must continue parsing) or not (in which 

640 # case we stop) 

641 'type-lhs-1': [ 

642 include('comments-and-spaces'), 

643 (r'->', Keyword.Type, ('#pop', 'type')), 

644 (r'(?=,)', Keyword.Type, ('#pop', 'type-arrow')), 

645 default('#pop'), 

646 ], 

647 'type-arrow': [ 

648 include('comments-and-spaces'), 

649 # the look ahead here allows to parse f(x : int, y : float -> truc) 

650 # correctly 

651 (r',(?=[^:]*?->)', Keyword.Type, 'type-with-slash'), 

652 (r'->', Keyword.Type, ('#pop', 'type')), 

653 # same remark as above 

654 default('#pop'), 

655 ], 

656 

657 # no need to do precise parsing for tuples and records 

658 # because they are closed constructions, so we can simply 

659 # find the closing delimiter 

660 # note that this function would be not work if the source 

661 # contained identifiers like `{)` (although it could be patched 

662 # to support it) 

663 'type-tuple': [ 

664 include('comments-and-spaces'), 

665 (r'[^()/*]+', Keyword.Type), 

666 (r'[/*]', Keyword.Type), 

667 (r'\(', Keyword.Type, '#push'), 

668 (r'\)', Keyword.Type, '#pop'), 

669 ], 

670 'type-record': [ 

671 include('comments-and-spaces'), 

672 (r'[^{}/*]+', Keyword.Type), 

673 (r'[/*]', Keyword.Type), 

674 (r'\{', Keyword.Type, '#push'), 

675 (r'\}', Keyword.Type, '#pop'), 

676 ], 

677 

678 # 'type-tuple': [ 

679 # include('comments-and-spaces'), 

680 # (r'\)', Keyword.Type, '#pop'), 

681 # default(('#pop', 'type-tuple-1', 'type-1')), 

682 # ], 

683 # 'type-tuple-1': [ 

684 # include('comments-and-spaces'), 

685 # (r',?\s*\)', Keyword.Type, '#pop'), # ,) is a valid end of tuple, in (1,) 

686 # (r',', Keyword.Type, 'type-1'), 

687 # ], 

688 # 'type-record':[ 

689 # include('comments-and-spaces'), 

690 # (r'\}', Keyword.Type, '#pop'), 

691 # (r'~?(?:\w+|`[^`]*`)', Keyword.Type, 'type-record-field-expr'), 

692 # ], 

693 # 'type-record-field-expr': [ 

694 # 

695 # ], 

696 

697 'nested-comment': [ 

698 (r'[^/*]+', Comment), 

699 (r'/\*', Comment, '#push'), 

700 (r'\*/', Comment, '#pop'), 

701 (r'[/*]', Comment), 

702 ], 

703 

704 # the copy pasting between string and single-string 

705 # is kinda sad. Is there a way to avoid that?? 

706 'string': [ 

707 (r'[^\\"{]+', String.Double), 

708 (r'"', String.Double, '#pop'), 

709 (r'\{', Operator, 'root'), 

710 include('escape-sequence'), 

711 ], 

712 'single-string': [ 

713 (r'[^\\\'{]+', String.Double), 

714 (r'\'', String.Double, '#pop'), 

715 (r'\{', Operator, 'root'), 

716 include('escape-sequence'), 

717 ], 

718 

719 # all the html stuff 

720 # can't really reuse some existing html parser 

721 # because we must be able to parse embedded expressions 

722 

723 # we are in this state after someone parsed the '<' that 

724 # started the html literal 

725 'html-open-tag': [ 

726 (r'[\w\-:]+', String.Single, ('#pop', 'html-attr')), 

727 (r'>', String.Single, ('#pop', 'html-content')), 

728 ], 

729 

730 # we are in this state after someone parsed the '</' that 

731 # started the end of the closing tag 

732 'html-end-tag': [ 

733 # this is a star, because </> is allowed 

734 (r'[\w\-:]*>', String.Single, '#pop'), 

735 ], 

736 

737 # we are in this state after having parsed '<ident(:ident)?' 

738 # we thus parse a possibly empty list of attributes 

739 'html-attr': [ 

740 (r'\s+', Text), 

741 (r'[\w\-:]+=', String.Single, 'html-attr-value'), 

742 (r'/>', String.Single, '#pop'), 

743 (r'>', String.Single, ('#pop', 'html-content')), 

744 ], 

745 

746 'html-attr-value': [ 

747 (r"'", String.Single, ('#pop', 'single-string')), 

748 (r'"', String.Single, ('#pop', 'string')), 

749 (r'#'+ident_re, String.Single, '#pop'), 

750 (r'#(?=\{)', String.Single, ('#pop', 'root')), 

751 (r'[^"\'{`=<>]+', String.Single, '#pop'), 

752 (r'\{', Operator, ('#pop', 'root')), # this is a tail call! 

753 ], 

754 

755 # we should probably deal with '\' escapes here 

756 'html-content': [ 

757 (r'<!--', Comment, 'html-comment'), 

758 (r'</', String.Single, ('#pop', 'html-end-tag')), 

759 (r'<', String.Single, 'html-open-tag'), 

760 (r'\{', Operator, 'root'), 

761 (r'[^<{]+', String.Single), 

762 ], 

763 

764 'html-comment': [ 

765 (r'-->', Comment, '#pop'), 

766 (r'[^\-]+|-', Comment), 

767 ], 

768 } 

769 

770 

771class ReasonLexer(RegexLexer): 

772 """ 

773 For the ReasonML language. 

774 

775 .. versionadded:: 2.6 

776 """ 

777 

778 name = 'ReasonML' 

779 url = 'https://reasonml.github.io/' 

780 aliases = ['reasonml', 'reason'] 

781 filenames = ['*.re', '*.rei'] 

782 mimetypes = ['text/x-reasonml'] 

783 

784 keywords = ( 

785 'as', 'assert', 'begin', 'class', 'constraint', 'do', 'done', 'downto', 

786 'else', 'end', 'exception', 'external', 'false', 'for', 'fun', 'esfun', 

787 'function', 'functor', 'if', 'in', 'include', 'inherit', 'initializer', 'lazy', 

788 'let', 'switch', 'module', 'pub', 'mutable', 'new', 'nonrec', 'object', 'of', 

789 'open', 'pri', 'rec', 'sig', 'struct', 'then', 'to', 'true', 'try', 

790 'type', 'val', 'virtual', 'when', 'while', 'with', 

791 ) 

792 keyopts = ( 

793 '!=', '#', '&', '&&', r'\(', r'\)', r'\*', r'\+', ',', '-', 

794 r'-\.', '=>', r'\.', r'\.\.', r'\.\.\.', ':', '::', ':=', ':>', ';', ';;', '<', 

795 '<-', '=', '>', '>]', r'>\}', r'\?', r'\?\?', r'\[', r'\[<', r'\[>', 

796 r'\[\|', ']', '_', '`', r'\{', r'\{<', r'\|', r'\|\|', r'\|]', r'\}', '~' 

797 ) 

798 

799 operators = r'[!$%&*+\./:<=>?@^|~-]' 

800 word_operators = ('and', 'asr', 'land', 'lor', 'lsl', 'lsr', 'lxor', 'mod', 'or') 

801 prefix_syms = r'[!?~]' 

802 infix_syms = r'[=<>@^|&+\*/$%-]' 

803 primitives = ('unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array') 

804 

805 tokens = { 

806 'escape-sequence': [ 

807 (r'\\[\\"\'ntbr]', String.Escape), 

808 (r'\\[0-9]{3}', String.Escape), 

809 (r'\\x[0-9a-fA-F]{2}', String.Escape), 

810 ], 

811 'root': [ 

812 (r'\s+', Text), 

813 (r'false|true|\(\)|\[\]', Name.Builtin.Pseudo), 

814 (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'), 

815 (r'\b([A-Z][\w\']*)', Name.Class), 

816 (r'//.*?\n', Comment.Single), 

817 (r'\/\*(?!/)', Comment.Multiline, 'comment'), 

818 (r'\b(%s)\b' % '|'.join(keywords), Keyword), 

819 (r'(%s)' % '|'.join(keyopts[::-1]), Operator.Word), 

820 (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator), 

821 (r'\b(%s)\b' % '|'.join(word_operators), Operator.Word), 

822 (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type), 

823 

824 (r"[^\W\d][\w']*", Name), 

825 

826 (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float), 

827 (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex), 

828 (r'0[oO][0-7][0-7_]*', Number.Oct), 

829 (r'0[bB][01][01_]*', Number.Bin), 

830 (r'\d[\d_]*', Number.Integer), 

831 

832 (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'", 

833 String.Char), 

834 (r"'.'", String.Char), 

835 (r"'", Keyword), 

836 

837 (r'"', String.Double, 'string'), 

838 

839 (r'[~?][a-z][\w\']*:', Name.Variable), 

840 ], 

841 'comment': [ 

842 (r'[^/*]+', Comment.Multiline), 

843 (r'\/\*', Comment.Multiline, '#push'), 

844 (r'\*\/', Comment.Multiline, '#pop'), 

845 (r'\*', Comment.Multiline), 

846 ], 

847 'string': [ 

848 (r'[^\\"]+', String.Double), 

849 include('escape-sequence'), 

850 (r'\\\n', String.Double), 

851 (r'"', String.Double, '#pop'), 

852 ], 

853 'dotted': [ 

854 (r'\s+', Text), 

855 (r'\.', Punctuation), 

856 (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace), 

857 (r'[A-Z][\w\']*', Name.Class, '#pop'), 

858 (r'[a-z_][\w\']*', Name, '#pop'), 

859 default('#pop'), 

860 ], 

861 } 

862 

863 

864class FStarLexer(RegexLexer): 

865 """ 

866 For the F* language. 

867 .. versionadded:: 2.7 

868 """ 

869 

870 name = 'FStar' 

871 url = 'https://www.fstar-lang.org/' 

872 aliases = ['fstar'] 

873 filenames = ['*.fst', '*.fsti'] 

874 mimetypes = ['text/x-fstar'] 

875 

876 keywords = ( 

877 'abstract', 'attributes', 'noeq', 'unopteq', 'and' 

878 'begin', 'by', 'default', 'effect', 'else', 'end', 'ensures', 

879 'exception', 'exists', 'false', 'forall', 'fun', 'function', 'if', 

880 'in', 'include', 'inline', 'inline_for_extraction', 'irreducible', 

881 'logic', 'match', 'module', 'mutable', 'new', 'new_effect', 'noextract', 

882 'of', 'open', 'opaque', 'private', 'range_of', 'reifiable', 

883 'reify', 'reflectable', 'requires', 'set_range_of', 'sub_effect', 

884 'synth', 'then', 'total', 'true', 'try', 'type', 'unfold', 'unfoldable', 

885 'val', 'when', 'with', 'not' 

886 ) 

887 decl_keywords = ('let', 'rec') 

888 assume_keywords = ('assume', 'admit', 'assert', 'calc') 

889 keyopts = ( 

890 r'~', r'-', r'/\\', r'\\/', r'<:', r'<@', r'\(\|', r'\|\)', r'#', r'u#', 

891 r'&', r'\(', r'\)', r'\(\)', r',', r'~>', r'->', r'<-', r'<--', r'<==>', 

892 r'==>', r'\.', r'\?', r'\?\.', r'\.\[', r'\.\(', r'\.\(\|', r'\.\[\|', 

893 r'\{:pattern', r':', r'::', r':=', r';', r';;', r'=', r'%\[', r'!\{', 

894 r'\[', r'\[@', r'\[\|', r'\|>', r'\]', r'\|\]', r'\{', r'\|', r'\}', r'\$' 

895 ) 

896 

897 operators = r'[!$%&*+\./:<=>?@^|~-]' 

898 prefix_syms = r'[!?~]' 

899 infix_syms = r'[=<>@^|&+\*/$%-]' 

900 primitives = ('unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array') 

901 

902 tokens = { 

903 'escape-sequence': [ 

904 (r'\\[\\"\'ntbr]', String.Escape), 

905 (r'\\[0-9]{3}', String.Escape), 

906 (r'\\x[0-9a-fA-F]{2}', String.Escape), 

907 ], 

908 'root': [ 

909 (r'\s+', Text), 

910 (r'false|true|False|True|\(\)|\[\]', Name.Builtin.Pseudo), 

911 (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'), 

912 (r'\b([A-Z][\w\']*)', Name.Class), 

913 (r'\(\*(?![)])', Comment, 'comment'), 

914 (r'\/\/.+$', Comment), 

915 (r'\b(%s)\b' % '|'.join(keywords), Keyword), 

916 (r'\b(%s)\b' % '|'.join(assume_keywords), Name.Exception), 

917 (r'\b(%s)\b' % '|'.join(decl_keywords), Keyword.Declaration), 

918 (r'(%s)' % '|'.join(keyopts[::-1]), Operator), 

919 (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator), 

920 (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type), 

921 

922 (r"[^\W\d][\w']*", Name), 

923 

924 (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float), 

925 (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex), 

926 (r'0[oO][0-7][0-7_]*', Number.Oct), 

927 (r'0[bB][01][01_]*', Number.Bin), 

928 (r'\d[\d_]*', Number.Integer), 

929 

930 (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'", 

931 String.Char), 

932 (r"'.'", String.Char), 

933 (r"'", Keyword), # a stray quote is another syntax element 

934 (r"\`([\w\'.]+)\`", Operator.Word), # for infix applications 

935 (r"\`", Keyword), # for quoting 

936 (r'"', String.Double, 'string'), 

937 

938 (r'[~?][a-z][\w\']*:', Name.Variable), 

939 ], 

940 'comment': [ 

941 (r'[^(*)]+', Comment), 

942 (r'\(\*', Comment, '#push'), 

943 (r'\*\)', Comment, '#pop'), 

944 (r'[(*)]', Comment), 

945 ], 

946 'string': [ 

947 (r'[^\\"]+', String.Double), 

948 include('escape-sequence'), 

949 (r'\\\n', String.Double), 

950 (r'"', String.Double, '#pop'), 

951 ], 

952 'dotted': [ 

953 (r'\s+', Text), 

954 (r'\.', Punctuation), 

955 (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace), 

956 (r'[A-Z][\w\']*', Name.Class, '#pop'), 

957 (r'[a-z_][\w\']*', Name, '#pop'), 

958 default('#pop'), 

959 ], 

960 }