Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/ml.py: 80%

1"""

2 pygments.lexers.ml

3 ~~~~~~~~~~~~~~~~~~

5 Lexers for ML family languages.

8 :license: BSD, see LICENSE for details.

9"""

11import re

13from pygments.lexer import RegexLexer, include, bygroups, default, words

14from pygments.token import Text, Comment, Operator, Keyword, Name, String, \

15 Number, Punctuation, Error

17__all__ = ['SMLLexer', 'OcamlLexer', 'OpaLexer', 'ReasonLexer', 'FStarLexer']

20class SMLLexer(RegexLexer):

21 """

22 For the Standard ML language.

24 .. versionadded:: 1.5

25 """

27 name = 'Standard ML'

28 aliases = ['sml']

29 filenames = ['*.sml', '*.sig', '*.fun']

30 mimetypes = ['text/x-standardml', 'application/x-standardml']

32 alphanumid_reserved = {

33 # Core

34 'abstype', 'and', 'andalso', 'as', 'case', 'datatype', 'do', 'else',

35 'end', 'exception', 'fn', 'fun', 'handle', 'if', 'in', 'infix',

36 'infixr', 'let', 'local', 'nonfix', 'of', 'op', 'open', 'orelse',

37 'raise', 'rec', 'then', 'type', 'val', 'with', 'withtype', 'while',

38 # Modules

39 'eqtype', 'functor', 'include', 'sharing', 'sig', 'signature',

40 'struct', 'structure', 'where',

41 }

43 symbolicid_reserved = {

44 # Core

45 ':', r'\|', '=', '=>', '->', '#',

46 # Modules

47 ':>',

48 }

50 nonid_reserved = {'(', ')', '[', ']', '{', '}', ',', ';', '...', '_'}

52 alphanumid_re = r"[a-zA-Z][\w']*"

53 symbolicid_re = r"[!%&$#+\-/:<=>?@\\~`^|*]+"

55 # A character constant is a sequence of the form #s, where s is a string

56 # constant denoting a string of size one character. This setup just parses

57 # the entire string as either a String.Double or a String.Char (depending

58 # on the argument), even if the String.Char is an erroneous

59 # multiple-character string.

60 def stringy(whatkind):

61 return [

62 (r'[^"\\]', whatkind),

63 (r'\\[\\"abtnvfr]', String.Escape),

64 # Control-character notation is used for codes < 32,

65 # where \^@ == \000

66 (r'\\\^[\x40-\x5e]', String.Escape),

67 # Docs say 'decimal digits'

68 (r'\\[0-9]{3}', String.Escape),

69 (r'\\u[0-9a-fA-F]{4}', String.Escape),

70 (r'\\\s+\\', String.Interpol),

71 (r'"', whatkind, '#pop'),

72 ]

74 # Callbacks for distinguishing tokens and reserved words

75 def long_id_callback(self, match):

76 if match.group(1) in self.alphanumid_reserved:

77 token = Error

78 else:

79 token = Name.Namespace

80 yield match.start(1), token, match.group(1)

81 yield match.start(2), Punctuation, match.group(2)

83 def end_id_callback(self, match):

84 if match.group(1) in self.alphanumid_reserved:

85 token = Error

86 elif match.group(1) in self.symbolicid_reserved:

87 token = Error

88 else:

89 token = Name

90 yield match.start(1), token, match.group(1)

92 def id_callback(self, match):

93 str = match.group(1)

94 if str in self.alphanumid_reserved:

95 token = Keyword.Reserved

96 elif str in self.symbolicid_reserved:

97 token = Punctuation

98 else:

99 token = Name

100 yield match.start(1), token, str

101

102 tokens = {

103 # Whitespace and comments are (almost) everywhere

104 'whitespace': [

105 (r'\s+', Text),

106 (r'\(\*', Comment.Multiline, 'comment'),

107 ],

108

109 'delimiters': [

110 # This lexer treats these delimiters specially:

111 # Delimiters define scopes, and the scope is how the meaning of

112 # the `|' is resolved - is it a case/handle expression, or function

113 # definition by cases? (This is not how the Definition works, but

114 # it's how MLton behaves, see http://mlton.org/SMLNJDeviations)

115 (r'\(|\[|\{', Punctuation, 'main'),

116 (r'\)|\]|\}', Punctuation, '#pop'),

117 (r'\b(let|if|local)\b(?!\')', Keyword.Reserved, ('main', 'main')),

118 (r'\b(struct|sig|while)\b(?!\')', Keyword.Reserved, 'main'),

119 (r'\b(do|else|end|in|then)\b(?!\')', Keyword.Reserved, '#pop'),

120 ],

121

122 'core': [

123 # Punctuation that doesn't overlap symbolic identifiers

124 (r'(%s)' % '|'.join(re.escape(z) for z in nonid_reserved),

125 Punctuation),

126

127 # Special constants: strings, floats, numbers in decimal and hex

128 (r'#"', String.Char, 'char'),

129 (r'"', String.Double, 'string'),

130 (r'~?0x[0-9a-fA-F]+', Number.Hex),

131 (r'0wx[0-9a-fA-F]+', Number.Hex),

132 (r'0w\d+', Number.Integer),

133 (r'~?\d+\.\d+[eE]~?\d+', Number.Float),

134 (r'~?\d+\.\d+', Number.Float),

135 (r'~?\d+[eE]~?\d+', Number.Float),

136 (r'~?\d+', Number.Integer),

137

138 # Labels

139 (r'#\s*[1-9][0-9]*', Name.Label),

140 (r'#\s*(%s)' % alphanumid_re, Name.Label),

141 (r'#\s+(%s)' % symbolicid_re, Name.Label),

142 # Some reserved words trigger a special, local lexer state change

143 (r'\b(datatype|abstype)\b(?!\')', Keyword.Reserved, 'dname'),

144 (r'\b(exception)\b(?!\')', Keyword.Reserved, 'ename'),

145 (r'\b(functor|include|open|signature|structure)\b(?!\')',

146 Keyword.Reserved, 'sname'),

147 (r'\b(type|eqtype)\b(?!\')', Keyword.Reserved, 'tname'),

148

149 # Regular identifiers, long and otherwise

150 (r'\'[\w\']*', Name.Decorator),

151 (r'(%s)(\.)' % alphanumid_re, long_id_callback, "dotted"),

152 (r'(%s)' % alphanumid_re, id_callback),

153 (r'(%s)' % symbolicid_re, id_callback),

154 ],

155 'dotted': [

156 (r'(%s)(\.)' % alphanumid_re, long_id_callback),

157 (r'(%s)' % alphanumid_re, end_id_callback, "#pop"),

158 (r'(%s)' % symbolicid_re, end_id_callback, "#pop"),

159 (r'\s+', Error),

160 (r'\S+', Error),

161 ],

162

163

164 # Main parser (prevents errors in files that have scoping errors)

165 'root': [

166 default('main')

167 ],

168

169 # In this scope, I expect '|' to not be followed by a function name,

170 # and I expect 'and' to be followed by a binding site

171 'main': [

172 include('whitespace'),

173

174 # Special behavior of val/and/fun

175 (r'\b(val|and)\b(?!\')', Keyword.Reserved, 'vname'),

176 (r'\b(fun)\b(?!\')', Keyword.Reserved,

177 ('#pop', 'main-fun', 'fname')),

178

179 include('delimiters'),

180 include('core'),

181 (r'\S+', Error),

182 ],

183

184 # In this scope, I expect '|' and 'and' to be followed by a function

185 'main-fun': [

186 include('whitespace'),

187

188 (r'\s', Text),

189 (r'\(\*', Comment.Multiline, 'comment'),

190

191 # Special behavior of val/and/fun

192 (r'\b(fun|and)\b(?!\')', Keyword.Reserved, 'fname'),

193 (r'\b(val)\b(?!\')', Keyword.Reserved,

194 ('#pop', 'main', 'vname')),

195

196 # Special behavior of '|' and '|'-manipulating keywords

197 (r'\|', Punctuation, 'fname'),

198 (r'\b(case|handle)\b(?!\')', Keyword.Reserved,

199 ('#pop', 'main')),

200

201 include('delimiters'),

202 include('core'),

203 (r'\S+', Error),

204 ],

205

206 # Character and string parsers

207 'char': stringy(String.Char),

208 'string': stringy(String.Double),

209

210 'breakout': [

211 (r'(?=\b(%s)\b(?!\'))' % '|'.join(alphanumid_reserved), Text, '#pop'),

212 ],

213

214 # Dealing with what comes after module system keywords

215 'sname': [

216 include('whitespace'),

217 include('breakout'),

218

219 (r'(%s)' % alphanumid_re, Name.Namespace),

220 default('#pop'),

221 ],

222

223 # Dealing with what comes after the 'fun' (or 'and' or '|') keyword

224 'fname': [

225 include('whitespace'),

226 (r'\'[\w\']*', Name.Decorator),

227 (r'\(', Punctuation, 'tyvarseq'),

228

229 (r'(%s)' % alphanumid_re, Name.Function, '#pop'),

230 (r'(%s)' % symbolicid_re, Name.Function, '#pop'),

231

232 # Ignore interesting function declarations like "fun (x + y) = ..."

233 default('#pop'),

234 ],

235

236 # Dealing with what comes after the 'val' (or 'and') keyword

237 'vname': [

238 include('whitespace'),

239 (r'\'[\w\']*', Name.Decorator),

240 (r'\(', Punctuation, 'tyvarseq'),

241

242 (r'(%s)(\s*)(=(?!%s))' % (alphanumid_re, symbolicid_re),

243 bygroups(Name.Variable, Text, Punctuation), '#pop'),

244 (r'(%s)(\s*)(=(?!%s))' % (symbolicid_re, symbolicid_re),

245 bygroups(Name.Variable, Text, Punctuation), '#pop'),

246 (r'(%s)' % alphanumid_re, Name.Variable, '#pop'),

247 (r'(%s)' % symbolicid_re, Name.Variable, '#pop'),

248

249 # Ignore interesting patterns like 'val (x, y)'

250 default('#pop'),

251 ],

252

253 # Dealing with what comes after the 'type' (or 'and') keyword

254 'tname': [

255 include('whitespace'),

256 include('breakout'),

257

258 (r'\'[\w\']*', Name.Decorator),

259 (r'\(', Punctuation, 'tyvarseq'),

260 (r'=(?!%s)' % symbolicid_re, Punctuation, ('#pop', 'typbind')),

261

262 (r'(%s)' % alphanumid_re, Keyword.Type),

263 (r'(%s)' % symbolicid_re, Keyword.Type),

264 (r'\S+', Error, '#pop'),

265 ],

266

267 # A type binding includes most identifiers

268 'typbind': [

269 include('whitespace'),

270

271 (r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')),

272

273 include('breakout'),

274 include('core'),

275 (r'\S+', Error, '#pop'),

276 ],

277

278 # Dealing with what comes after the 'datatype' (or 'and') keyword

279 'dname': [

280 include('whitespace'),

281 include('breakout'),

282

283 (r'\'[\w\']*', Name.Decorator),

284 (r'\(', Punctuation, 'tyvarseq'),

285 (r'(=)(\s*)(datatype)',

286 bygroups(Punctuation, Text, Keyword.Reserved), '#pop'),

287 (r'=(?!%s)' % symbolicid_re, Punctuation,

288 ('#pop', 'datbind', 'datcon')),

289

290 (r'(%s)' % alphanumid_re, Keyword.Type),

291 (r'(%s)' % symbolicid_re, Keyword.Type),

292 (r'\S+', Error, '#pop'),

293 ],

294

295 # common case - A | B | C of int

296 'datbind': [

297 include('whitespace'),

298

299 (r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'dname')),

300 (r'\b(withtype)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')),

301 (r'\b(of)\b(?!\')', Keyword.Reserved),

302

303 (r'(\|)(\s*)(%s)' % alphanumid_re,

304 bygroups(Punctuation, Text, Name.Class)),

305 (r'(\|)(\s+)(%s)' % symbolicid_re,

306 bygroups(Punctuation, Text, Name.Class)),

307

308 include('breakout'),

309 include('core'),

310 (r'\S+', Error),

311 ],

312

313 # Dealing with what comes after an exception

314 'ename': [

315 include('whitespace'),

316

317 (r'(and\b)(\s+)(%s)' % alphanumid_re,

318 bygroups(Keyword.Reserved, Text, Name.Class)),

319 (r'(and\b)(\s*)(%s)' % symbolicid_re,

320 bygroups(Keyword.Reserved, Text, Name.Class)),

321 (r'\b(of)\b(?!\')', Keyword.Reserved),

322 (r'(%s)|(%s)' % (alphanumid_re, symbolicid_re), Name.Class),

323

324 default('#pop'),

325 ],

326

327 'datcon': [

328 include('whitespace'),

329 (r'(%s)' % alphanumid_re, Name.Class, '#pop'),

330 (r'(%s)' % symbolicid_re, Name.Class, '#pop'),

331 (r'\S+', Error, '#pop'),

332 ],

333

334 # Series of type variables

335 'tyvarseq': [

336 (r'\s', Text),

337 (r'\(\*', Comment.Multiline, 'comment'),

338

339 (r'\'[\w\']*', Name.Decorator),

340 (alphanumid_re, Name),

341 (r',', Punctuation),

342 (r'\)', Punctuation, '#pop'),

343 (symbolicid_re, Name),

344 ],

345

346 'comment': [

347 (r'[^(*)]', Comment.Multiline),

348 (r'\(\*', Comment.Multiline, '#push'),

349 (r'\*\)', Comment.Multiline, '#pop'),

350 (r'[(*)]', Comment.Multiline),

351 ],

352 }

353

354

355class OcamlLexer(RegexLexer):

356 """

357 For the OCaml language.

358

359 .. versionadded:: 0.7

360 """

361

362 name = 'OCaml'

363 url = 'https://ocaml.org/'

364 aliases = ['ocaml']

365 filenames = ['*.ml', '*.mli', '*.mll', '*.mly']

366 mimetypes = ['text/x-ocaml']

367

368 keywords = (

369 'as', 'assert', 'begin', 'class', 'constraint', 'do', 'done',

370 'downto', 'else', 'end', 'exception', 'external', 'false',

371 'for', 'fun', 'function', 'functor', 'if', 'in', 'include',

372 'inherit', 'initializer', 'lazy', 'let', 'match', 'method',

373 'module', 'mutable', 'new', 'object', 'of', 'open', 'private',

374 'raise', 'rec', 'sig', 'struct', 'then', 'to', 'true', 'try',

375 'type', 'value', 'val', 'virtual', 'when', 'while', 'with',

376 )

377 keyopts = (

378 '!=', '#', '&', '&&', r'$', r'$', r'\*', r'\+', ',', '-',

379 r'-\.', '->', r'\.', r'\.\.', ':', '::', ':=', ':>', ';', ';;', '<',

380 '<-', '=', '>', '>]', r'>\}', r'\?', r'\?\?', r'\[', r'\[<', r'\[>',

381 r'\[\|', ']', '_', '`', r'\{', r'\{<', r'\|', r'\|]', r'\}', '~'

382 )

383

384 operators = r'[!$%&*+\./:<=>?@^|~-]'

385 word_operators = ('and', 'asr', 'land', 'lor', 'lsl', 'lxor', 'mod', 'or')

386 prefix_syms = r'[!?~]'

387 infix_syms = r'[=<>@^|&+\*/$%-]'

388 primitives = ('unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array')

389

390 tokens = {

391 'escape-sequence': [

392 (r'\\[\\"\'ntbr]', String.Escape),

393 (r'\\[0-9]{3}', String.Escape),

394 (r'\\x[0-9a-fA-F]{2}', String.Escape),

395 ],

396 'root': [

397 (r'\s+', Text),

398 (r'false|true||\[\]', Name.Builtin.Pseudo),

399 (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'),

400 (r'\b([A-Z][\w\']*)', Name.Class),

401 (r'\(\*(?![)])', Comment, 'comment'),

402 (r'\b(%s)\b' % '|'.join(keywords), Keyword),

403 (r'(%s)' % '|'.join(keyopts[::-1]), Operator),

404 (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator),

405 (r'\b(%s)\b' % '|'.join(word_operators), Operator.Word),

406 (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type),

407

408 (r"[^\W\d][\w']*", Name),

409

410 (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),

411 (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex),

412 (r'0[oO][0-7][0-7_]*', Number.Oct),

413 (r'0[bB][01][01_]*', Number.Bin),

414 (r'\d[\d_]*', Number.Integer),

415

416 (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",

417 String.Char),

418 (r"'.'", String.Char),

419 (r"'", Keyword), # a stray quote is another syntax element

420

421 (r'"', String.Double, 'string'),

422

423 (r'[~?][a-z][\w\']*:', Name.Variable),

424 ],

425 'comment': [

426 (r'[^(*)]+', Comment),

427 (r'\(\*', Comment, '#push'),

428 (r'\*\)', Comment, '#pop'),

429 (r'[(*)]', Comment),

430 ],

431 'string': [

432 (r'[^\\"]+', String.Double),

433 include('escape-sequence'),

434 (r'\\\n', String.Double),

435 (r'"', String.Double, '#pop'),

436 ],

437 'dotted': [

438 (r'\s+', Text),

439 (r'\.', Punctuation),

440 (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace),

441 (r'[A-Z][\w\']*', Name.Class, '#pop'),

442 (r'[a-z_][\w\']*', Name, '#pop'),

443 default('#pop'),

444 ],

445 }

446

447

448class OpaLexer(RegexLexer):

449 """

450 Lexer for the Opa language.

451

452 .. versionadded:: 1.5

453 """

454

455 name = 'Opa'

456 aliases = ['opa']

457 filenames = ['*.opa']

458 mimetypes = ['text/x-opa']

459

460 # most of these aren't strictly keywords

461 # but if you color only real keywords, you might just

462 # as well not color anything

463 keywords = (

464 'and', 'as', 'begin', 'case', 'client', 'css', 'database', 'db', 'do',

465 'else', 'end', 'external', 'forall', 'function', 'if', 'import',

466 'match', 'module', 'or', 'package', 'parser', 'rec', 'server', 'then',

467 'type', 'val', 'with', 'xml_parser',

468 )

469

470 # matches both stuff and `stuff`

471 ident_re = r'(([a-zA-Z_]\w*)|(`[^`]*`))'

472

473 op_re = r'[.=\-<>,@~%/+?*&^!]'

474 punc_re = r'[()\[\],;|]' # '{' and '}' are treated elsewhere

475 # because they are also used for inserts

476

477 tokens = {

478 # copied from the caml lexer, should be adapted

479 'escape-sequence': [

480 (r'\\[\\"\'ntr}]', String.Escape),

481 (r'\\[0-9]{3}', String.Escape),

482 (r'\\x[0-9a-fA-F]{2}', String.Escape),

483 ],

484

485 # factorizing these rules, because they are inserted many times

486 'comments': [

487 (r'/\*', Comment, 'nested-comment'),

488 (r'//.*?$', Comment),

489 ],

490 'comments-and-spaces': [

491 include('comments'),

492 (r'\s+', Text),

493 ],

494

495 'root': [

496 include('comments-and-spaces'),

497 # keywords

498 (words(keywords, prefix=r'\b', suffix=r'\b'), Keyword),

499 # directives

500 # we could parse the actual set of directives instead of anything

501 # starting with @, but this is troublesome

502 # because it needs to be adjusted all the time

503 # and assuming we parse only sources that compile, it is useless

504 (r'@' + ident_re + r'\b', Name.Builtin.Pseudo),

505

506 # number literals

507 (r'-?.[\d]+([eE][+\-]?\d+)', Number.Float),

508 (r'-?\d+.\d*([eE][+\-]?\d+)', Number.Float),

509 (r'-?\d+[eE][+\-]?\d+', Number.Float),

510 (r'0[xX][\da-fA-F]+', Number.Hex),

511 (r'0[oO][0-7]+', Number.Oct),

512 (r'0[bB][01]+', Number.Bin),

513 (r'\d+', Number.Integer),

514 # color literals

515 (r'#[\da-fA-F]{3,6}', Number.Integer),

516

517 # string literals

518 (r'"', String.Double, 'string'),

519 # char literal, should be checked because this is the regexp from

520 # the caml lexer

521 (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2})|.)'",

522 String.Char),

523

524 # this is meant to deal with embedded exprs in strings

525 # every time we find a '}' we pop a state so that if we were

526 # inside a string, we are back in the string state

527 # as a consequence, we must also push a state every time we find a

528 # '{' or else we will have errors when parsing {} for instance

529 (r'\{', Operator, '#push'),

530 (r'\}', Operator, '#pop'),

531

532 # html literals

533 # this is a much more strict that the actual parser,

534 # since a<b would not be parsed as html

535 # but then again, the parser is way too lax, and we can't hope

536 # to have something as tolerant

537 (r'<(?=[a-zA-Z>])', String.Single, 'html-open-tag'),

538

539 # db path

540 # matching the '[_]' in '/a[_]' because it is a part

541 # of the syntax of the db path definition

542 # unfortunately, i don't know how to match the ']' in

543 # /a[1], so this is somewhat inconsistent

544 (r'[@?!]?(/\w+)+(\[_\])?', Name.Variable),

545 # putting the same color on <- as on db path, since

546 # it can be used only to mean Db.write

547 (r'<-(?!'+op_re+r')', Name.Variable),

548

549 # 'modules'

550 # although modules are not distinguished by their names as in caml

551 # the standard library seems to follow the convention that modules

552 # only area capitalized

553 (r'\b([A-Z]\w*)(?=\.)', Name.Namespace),

554

555 # operators

556 # = has a special role because this is the only

557 # way to syntactic distinguish binding constructions

558 # unfortunately, this colors the equal in {x=2} too

559 (r'=(?!'+op_re+r')', Keyword),

560 (r'(%s)+' % op_re, Operator),

561 (r'(%s)+' % punc_re, Operator),

562

563 # coercions

564 (r':', Operator, 'type'),

565 # type variables

566 # we need this rule because we don't parse specially type

567 # definitions so in "type t('a) = ...", "'a" is parsed by 'root'

568 ("'"+ident_re, Keyword.Type),

569

570 # id literal, #something, or #{expr}

571 (r'#'+ident_re, String.Single),

572 (r'#(?=\{)', String.Single),

573

574 # identifiers

575 # this avoids to color '2' in 'a2' as an integer

576 (ident_re, Text),

577

578 # default, not sure if that is needed or not

579 # (r'.', Text),

580 ],

581

582 # it is quite painful to have to parse types to know where they end

583 # this is the general rule for a type

584 # a type is either:

585 # * -> ty

586 # * type-with-slash

587 # * type-with-slash -> ty

588 # * type-with-slash (, type-with-slash)+ -> ty

589 #

590 # the code is pretty funky in here, but this code would roughly

591 # translate in caml to:

592 # let rec type stream =

593 # match stream with

594 # | [< "->"; stream >] -> type stream

595 # | [< ""; stream >] ->

596 # type_with_slash stream

597 # type_lhs_1 stream;

598 # and type_1 stream = ...

599 'type': [

600 include('comments-and-spaces'),

601 (r'->', Keyword.Type),

602 default(('#pop', 'type-lhs-1', 'type-with-slash')),

603 ],

604

605 # parses all the atomic or closed constructions in the syntax of type

606 # expressions: record types, tuple types, type constructors, basic type

607 # and type variables

608 'type-1': [

609 include('comments-and-spaces'),

610 (r'\(', Keyword.Type, ('#pop', 'type-tuple')),

611 (r'~?\{', Keyword.Type, ('#pop', 'type-record')),

612 (ident_re+r'\(', Keyword.Type, ('#pop', 'type-tuple')),

613 (ident_re, Keyword.Type, '#pop'),

614 ("'"+ident_re, Keyword.Type),

615 # this case is not in the syntax but sometimes

616 # we think we are parsing types when in fact we are parsing

617 # some css, so we just pop the states until we get back into

618 # the root state

619 default('#pop'),

620 ],

621

622 # type-with-slash is either:

623 # * type-1

624 # * type-1 (/ type-1)+

625 'type-with-slash': [

626 include('comments-and-spaces'),

627 default(('#pop', 'slash-type-1', 'type-1')),

628 ],

629 'slash-type-1': [

630 include('comments-and-spaces'),

631 ('/', Keyword.Type, ('#pop', 'type-1')),

632 # same remark as above

633 default('#pop'),

634 ],

635

636 # we go in this state after having parsed a type-with-slash

637 # while trying to parse a type

638 # and at this point we must determine if we are parsing an arrow

639 # type (in which case we must continue parsing) or not (in which

640 # case we stop)

641 'type-lhs-1': [

642 include('comments-and-spaces'),

643 (r'->', Keyword.Type, ('#pop', 'type')),

644 (r'(?=,)', Keyword.Type, ('#pop', 'type-arrow')),

645 default('#pop'),

646 ],

647 'type-arrow': [

648 include('comments-and-spaces'),

649 # the look ahead here allows to parse f(x : int, y : float -> truc)

650 # correctly

651 (r',(?=[^:]*?->)', Keyword.Type, 'type-with-slash'),

652 (r'->', Keyword.Type, ('#pop', 'type')),

653 # same remark as above

654 default('#pop'),

655 ],

656

657 # no need to do precise parsing for tuples and records

658 # because they are closed constructions, so we can simply

659 # find the closing delimiter

660 # note that this function would be not work if the source

661 # contained identifiers like `{)` (although it could be patched

662 # to support it)

663 'type-tuple': [

664 include('comments-and-spaces'),

665 (r'[^()/*]+', Keyword.Type),

666 (r'[/*]', Keyword.Type),

667 (r'\(', Keyword.Type, '#push'),

668 (r'\)', Keyword.Type, '#pop'),

669 ],

670 'type-record': [

671 include('comments-and-spaces'),

672 (r'[^{}/*]+', Keyword.Type),

673 (r'[/*]', Keyword.Type),

674 (r'\{', Keyword.Type, '#push'),

675 (r'\}', Keyword.Type, '#pop'),

676 ],

677

678 # 'type-tuple': [

679 # include('comments-and-spaces'),

680 # (r'\)', Keyword.Type, '#pop'),

681 # default(('#pop', 'type-tuple-1', 'type-1')),

682 # ],

683 # 'type-tuple-1': [

684 # include('comments-and-spaces'),

685 # (r',?\s*\)', Keyword.Type, '#pop'), # ,) is a valid end of tuple, in (1,)

686 # (r',', Keyword.Type, 'type-1'),

687 # ],

688 # 'type-record':[

689 # include('comments-and-spaces'),

690 # (r'\}', Keyword.Type, '#pop'),

691 # (r'~?(?:\w+|`[^`]*`)', Keyword.Type, 'type-record-field-expr'),

692 # ],

693 # 'type-record-field-expr': [

694 #

695 # ],

696

697 'nested-comment': [

698 (r'[^/*]+', Comment),

699 (r'/\*', Comment, '#push'),

700 (r'\*/', Comment, '#pop'),

701 (r'[/*]', Comment),

702 ],

703

704 # the copy pasting between string and single-string

705 # is kinda sad. Is there a way to avoid that??

706 'string': [

707 (r'[^\\"{]+', String.Double),

708 (r'"', String.Double, '#pop'),

709 (r'\{', Operator, 'root'),

710 include('escape-sequence'),

711 ],

712 'single-string': [

713 (r'[^\\\'{]+', String.Double),

714 (r'\'', String.Double, '#pop'),

715 (r'\{', Operator, 'root'),

716 include('escape-sequence'),

717 ],

718

719 # all the html stuff

720 # can't really reuse some existing html parser

721 # because we must be able to parse embedded expressions

722

723 # we are in this state after someone parsed the '<' that

724 # started the html literal

725 'html-open-tag': [

726 (r'[\w\-:]+', String.Single, ('#pop', 'html-attr')),

727 (r'>', String.Single, ('#pop', 'html-content')),

728 ],

729

730 # we are in this state after someone parsed the '</' that

731 # started the end of the closing tag

732 'html-end-tag': [

733 # this is a star, because </> is allowed

734 (r'[\w\-:]*>', String.Single, '#pop'),

735 ],

736

737 # we are in this state after having parsed '<ident(:ident)?'

738 # we thus parse a possibly empty list of attributes

739 'html-attr': [

740 (r'\s+', Text),

741 (r'[\w\-:]+=', String.Single, 'html-attr-value'),

742 (r'/>', String.Single, '#pop'),

743 (r'>', String.Single, ('#pop', 'html-content')),

744 ],

745

746 'html-attr-value': [

747 (r"'", String.Single, ('#pop', 'single-string')),

748 (r'"', String.Single, ('#pop', 'string')),

749 (r'#'+ident_re, String.Single, '#pop'),

750 (r'#(?=\{)', String.Single, ('#pop', 'root')),

751 (r'[^"\'{`=<>]+', String.Single, '#pop'),

752 (r'\{', Operator, ('#pop', 'root')), # this is a tail call!

753 ],

754

755 # we should probably deal with '\' escapes here

756 'html-content': [

757 (r'<!--', Comment, 'html-comment'),

758 (r'</', String.Single, ('#pop', 'html-end-tag')),

759 (r'<', String.Single, 'html-open-tag'),

760 (r'\{', Operator, 'root'),

761 (r'[^<{]+', String.Single),

762 ],

763

764 'html-comment': [

765 (r'-->', Comment, '#pop'),

766 (r'[^\-]+|-', Comment),

767 ],

768 }

769

770

771class ReasonLexer(RegexLexer):

772 """

773 For the ReasonML language.

774

775 .. versionadded:: 2.6

776 """

777

778 name = 'ReasonML'

779 url = 'https://reasonml.github.io/'

780 aliases = ['reasonml', 'reason']

781 filenames = ['*.re', '*.rei']

782 mimetypes = ['text/x-reasonml']

783

784 keywords = (

785 'as', 'assert', 'begin', 'class', 'constraint', 'do', 'done', 'downto',

786 'else', 'end', 'exception', 'external', 'false', 'for', 'fun', 'esfun',

787 'function', 'functor', 'if', 'in', 'include', 'inherit', 'initializer', 'lazy',

788 'let', 'switch', 'module', 'pub', 'mutable', 'new', 'nonrec', 'object', 'of',

789 'open', 'pri', 'rec', 'sig', 'struct', 'then', 'to', 'true', 'try',

790 'type', 'val', 'virtual', 'when', 'while', 'with',

791 )

792 keyopts = (

793 '!=', '#', '&', '&&', r'$', r'$', r'\*', r'\+', ',', '-',

794 r'-\.', '=>', r'\.', r'\.\.', r'\.\.\.', ':', '::', ':=', ':>', ';', ';;', '<',

795 '<-', '=', '>', '>]', r'>\}', r'\?', r'\?\?', r'\[', r'\[<', r'\[>',

796 r'\[\|', ']', '_', '`', r'\{', r'\{<', r'\|', r'\|\|', r'\|]', r'\}', '~'

797 )

798

799 operators = r'[!$%&*+\./:<=>?@^|~-]'

800 word_operators = ('and', 'asr', 'land', 'lor', 'lsl', 'lsr', 'lxor', 'mod', 'or')

801 prefix_syms = r'[!?~]'

802 infix_syms = r'[=<>@^|&+\*/$%-]'

803 primitives = ('unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array')

804

805 tokens = {

806 'escape-sequence': [

807 (r'\\[\\"\'ntbr]', String.Escape),

808 (r'\\[0-9]{3}', String.Escape),

809 (r'\\x[0-9a-fA-F]{2}', String.Escape),

810 ],

811 'root': [

812 (r'\s+', Text),

813 (r'false|true||\[\]', Name.Builtin.Pseudo),

814 (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'),

815 (r'\b([A-Z][\w\']*)', Name.Class),

816 (r'//.*?\n', Comment.Single),

817 (r'\/\*(?!/)', Comment.Multiline, 'comment'),

818 (r'\b(%s)\b' % '|'.join(keywords), Keyword),

819 (r'(%s)' % '|'.join(keyopts[::-1]), Operator.Word),

820 (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator),

821 (r'\b(%s)\b' % '|'.join(word_operators), Operator.Word),

822 (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type),

823

824 (r"[^\W\d][\w']*", Name),

825

826 (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),

827 (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex),

828 (r'0[oO][0-7][0-7_]*', Number.Oct),

829 (r'0[bB][01][01_]*', Number.Bin),

830 (r'\d[\d_]*', Number.Integer),

831

832 (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",

833 String.Char),

834 (r"'.'", String.Char),

835 (r"'", Keyword),

836

837 (r'"', String.Double, 'string'),

838

839 (r'[~?][a-z][\w\']*:', Name.Variable),

840 ],

841 'comment': [

842 (r'[^/*]+', Comment.Multiline),

843 (r'\/\*', Comment.Multiline, '#push'),

844 (r'\*\/', Comment.Multiline, '#pop'),

845 (r'\*', Comment.Multiline),

846 ],

847 'string': [

848 (r'[^\\"]+', String.Double),

849 include('escape-sequence'),

850 (r'\\\n', String.Double),

851 (r'"', String.Double, '#pop'),

852 ],

853 'dotted': [

854 (r'\s+', Text),

855 (r'\.', Punctuation),

856 (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace),

857 (r'[A-Z][\w\']*', Name.Class, '#pop'),

858 (r'[a-z_][\w\']*', Name, '#pop'),

859 default('#pop'),

860 ],

861 }

862

863

864class FStarLexer(RegexLexer):

865 """

866 For the F* language.

867 .. versionadded:: 2.7

868 """

869

870 name = 'FStar'

871 url = 'https://www.fstar-lang.org/'

872 aliases = ['fstar']

873 filenames = ['*.fst', '*.fsti']

874 mimetypes = ['text/x-fstar']

875

876 keywords = (

877 'abstract', 'attributes', 'noeq', 'unopteq', 'and'

878 'begin', 'by', 'default', 'effect', 'else', 'end', 'ensures',

879 'exception', 'exists', 'false', 'forall', 'fun', 'function', 'if',

880 'in', 'include', 'inline', 'inline_for_extraction', 'irreducible',

881 'logic', 'match', 'module', 'mutable', 'new', 'new_effect', 'noextract',

882 'of', 'open', 'opaque', 'private', 'range_of', 'reifiable',

883 'reify', 'reflectable', 'requires', 'set_range_of', 'sub_effect',

884 'synth', 'then', 'total', 'true', 'try', 'type', 'unfold', 'unfoldable',

885 'val', 'when', 'with', 'not'

886 )

887 decl_keywords = ('let', 'rec')

888 assume_keywords = ('assume', 'admit', 'assert', 'calc')

889 keyopts = (

890 r'~', r'-', r'/\\', r'\\/', r'<:', r'<@', r'$\|', r'\|$', r'#', r'u#',

891 r'&', r'$', r'$', r'', r',', r'~>', r'->', r'<-', r'<--', r'<==>',

892 r'==>', r'\.', r'\?', r'\?\.', r'\.\[', r'\.\(', r'\.\(\|', r'\.\[\|',

893 r'\{:pattern', r':', r'::', r':=', r';', r';;', r'=', r'%\[', r'!\{',

894 r'\[', r'\[@', r'\[\|', r'\|>', r'\]', r'\|\]', r'\{', r'\|', r'\}', r'\$'

895 )

896

897 operators = r'[!$%&*+\./:<=>?@^|~-]'

898 prefix_syms = r'[!?~]'

899 infix_syms = r'[=<>@^|&+\*/$%-]'

900 primitives = ('unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array')

901

902 tokens = {

903 'escape-sequence': [

904 (r'\\[\\"\'ntbr]', String.Escape),

905 (r'\\[0-9]{3}', String.Escape),

906 (r'\\x[0-9a-fA-F]{2}', String.Escape),

907 ],

908 'root': [

909 (r'\s+', Text),

910 (r'false|true|False|True||\[\]', Name.Builtin.Pseudo),

911 (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'),

912 (r'\b([A-Z][\w\']*)', Name.Class),

913 (r'\(\*(?![)])', Comment, 'comment'),

914 (r'\/\/.+$', Comment),

915 (r'\b(%s)\b' % '|'.join(keywords), Keyword),

916 (r'\b(%s)\b' % '|'.join(assume_keywords), Name.Exception),

917 (r'\b(%s)\b' % '|'.join(decl_keywords), Keyword.Declaration),

918 (r'(%s)' % '|'.join(keyopts[::-1]), Operator),

919 (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator),

920 (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type),

921

922 (r"[^\W\d][\w']*", Name),

923

924 (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),

925 (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex),

926 (r'0[oO][0-7][0-7_]*', Number.Oct),

927 (r'0[bB][01][01_]*', Number.Bin),

928 (r'\d[\d_]*', Number.Integer),

929

930 (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",

931 String.Char),

932 (r"'.'", String.Char),

933 (r"'", Keyword), # a stray quote is another syntax element

934 (r"\`([\w\'.]+)\`", Operator.Word), # for infix applications

935 (r"\`", Keyword), # for quoting

936 (r'"', String.Double, 'string'),

937

938 (r'[~?][a-z][\w\']*:', Name.Variable),

939 ],

940 'comment': [

941 (r'[^(*)]+', Comment),

942 (r'\(\*', Comment, '#push'),

943 (r'\*\)', Comment, '#pop'),

944 (r'[(*)]', Comment),

945 ],

946 'string': [

947 (r'[^\\"]+', String.Double),

948 include('escape-sequence'),

949 (r'\\\n', String.Double),

950 (r'"', String.Double, '#pop'),

951 ],

952 'dotted': [

953 (r'\s+', Text),

954 (r'\.', Punctuation),

955 (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace),

956 (r'[A-Z][\w\']*', Name.Class, '#pop'),

957 (r'[a-z_][\w\']*', Name, '#pop'),

958 default('#pop'),

959 ],

960 }