Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/haskell.py: 81%

1"""

2 pygments.lexers.haskell

3 ~~~~~~~~~~~~~~~~~~~~~~~

5 Lexers for Haskell and related languages.

8 :license: BSD, see LICENSE for details.

9"""

11import re

13from pygments.lexer import Lexer, RegexLexer, bygroups, do_insertions, \

14 default, include, inherit, line_re

15from pygments.token import Text, Comment, Operator, Keyword, Name, String, \

16 Number, Punctuation, Generic, Whitespace

17from pygments import unistring as uni

19__all__ = ['HaskellLexer', 'HspecLexer', 'IdrisLexer', 'AgdaLexer', 'CryptolLexer',

20 'LiterateHaskellLexer', 'LiterateIdrisLexer', 'LiterateAgdaLexer',

21 'LiterateCryptolLexer', 'KokaLexer']

24class HaskellLexer(RegexLexer):

25 """

26 A Haskell lexer based on the lexemes defined in the Haskell 98 Report.

28 .. versionadded:: 0.8

29 """

30 name = 'Haskell'

31 url = 'https://www.haskell.org/'

32 aliases = ['haskell', 'hs']

33 filenames = ['*.hs']

34 mimetypes = ['text/x-haskell']

36 reserved = ('case', 'class', 'data', 'default', 'deriving', 'do', 'else',

37 'family', 'if', 'in', 'infix[lr]?', 'instance',

38 'let', 'newtype', 'of', 'then', 'type', 'where', '_')

39 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',

40 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',

41 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',

42 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')

44 tokens = {

45 'root': [

46 # Whitespace:

47 (r'\s+', Whitespace),

48 # (r'--\s*|.*$', Comment.Doc),

49 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),

50 (r'\{-', Comment.Multiline, 'comment'),

51 # Lexemes:

52 # Identifiers

53 (r'\bimport\b', Keyword.Reserved, 'import'),

54 (r'\bmodule\b', Keyword.Reserved, 'module'),

55 (r'\berror\b', Name.Exception),

56 (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),

57 (r"'[^\\]'", String.Char), # this has to come before the TH quote

58 (r'^[_' + uni.Ll + r'][\w\']*', Name.Function),

59 (r"'?[_" + uni.Ll + r"][\w']*", Name),

60 (r"('')?[" + uni.Lu + r"][\w\']*", Keyword.Type),

61 (r"(')[" + uni.Lu + r"][\w\']*", Keyword.Type),

62 (r"(')\[[^\]]*\]", Keyword.Type), # tuples and lists get special treatment in GHC

63 (r"(')$[^)]*$", Keyword.Type), # ..

64 (r"(')[:!#$%&*+.\\/<=>?@^|~-]+", Keyword.Type), # promoted type operators

65 # Operators

66 (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator

67 (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials

68 (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators

69 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators

70 # Numbers

71 (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*_*[pP][+-]?\d(_*\d)*', Number.Float),

72 (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*\.[\da-fA-F](_*[\da-fA-F])*'

73 r'(_*[pP][+-]?\d(_*\d)*)?', Number.Float),

74 (r'\d(_*\d)*_*[eE][+-]?\d(_*\d)*', Number.Float),

75 (r'\d(_*\d)*\.\d(_*\d)*(_*[eE][+-]?\d(_*\d)*)?', Number.Float),

76 (r'0[bB]_*[01](_*[01])*', Number.Bin),

77 (r'0[oO]_*[0-7](_*[0-7])*', Number.Oct),

78 (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*', Number.Hex),

79 (r'\d(_*\d)*', Number.Integer),

80 # Character/String Literals

81 (r"'", String.Char, 'character'),

82 (r'"', String, 'string'),

83 # Special

84 (r'\[\]', Keyword.Type),

85 (r'', Name.Builtin),

86 (r'[][(),;`{}]', Punctuation),

87 ],

88 'import': [

89 # Import statements

90 (r'\s+', Whitespace),

91 (r'"', String, 'string'),

92 # after "funclist" state

93 (r'\)', Punctuation, '#pop'),

94 (r'qualified\b', Keyword),

95 # import X as Y

96 (r'([' + uni.Lu + r'][\w.]*)(\s+)(as)(\s+)([' + uni.Lu + r'][\w.]*)',

97 bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Name), '#pop'),

98 # import X hiding (functions)

99 (r'([' + uni.Lu + r'][\w.]*)(\s+)(hiding)(\s+)(\()',

100 bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Punctuation), 'funclist'),

101 # import X (functions)

102 (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()',

103 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),

104 # import X

105 (r'[\w.]+', Name.Namespace, '#pop'),

106 ],

107 'module': [

108 (r'\s+', Whitespace),

109 (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()',

110 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),

111 (r'[' + uni.Lu + r'][\w.]*', Name.Namespace, '#pop'),

112 ],

113 'funclist': [

114 (r'\s+', Whitespace),

115 (r'[' + uni.Lu + r']\w*', Keyword.Type),

116 (r'(_[\w\']+|[' + uni.Ll + r'][\w\']*)', Name.Function),

117 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),

118 (r'\{-', Comment.Multiline, 'comment'),

119 (r',', Punctuation),

120 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),

121 # (HACK, but it makes sense to push two instances, believe me)

122 (r'\(', Punctuation, ('funclist', 'funclist')),

123 (r'\)', Punctuation, '#pop:2'),

124 ],

125 # NOTE: the next four states are shared in the AgdaLexer; make sure

126 # any change is compatible with Agda as well or copy over and change

127 'comment': [

128 # Multiline Comments

129 (r'[^-{}]+', Comment.Multiline),

130 (r'\{-', Comment.Multiline, '#push'),

131 (r'-\}', Comment.Multiline, '#pop'),

132 (r'[-{}]', Comment.Multiline),

133 ],

134 'character': [

135 # Allows multi-chars, incorrectly.

136 (r"[^\\']'", String.Char, '#pop'),

137 (r"\\", String.Escape, 'escape'),

138 ("'", String.Char, '#pop'),

139 ],

140 'string': [

141 (r'[^\\"]+', String),

142 (r"\\", String.Escape, 'escape'),

143 ('"', String, '#pop'),

144 ],

145 'escape': [

146 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),

147 (r'\^[][' + uni.Lu + r'@^_]', String.Escape, '#pop'),

148 ('|'.join(ascii), String.Escape, '#pop'),

149 (r'o[0-7]+', String.Escape, '#pop'),

150 (r'x[\da-fA-F]+', String.Escape, '#pop'),

151 (r'\d+', String.Escape, '#pop'),

152 (r'(\s+)(\\)', bygroups(Whitespace, String.Escape), '#pop'),

153 ],

154 }

155

156

157class HspecLexer(HaskellLexer):

158 """

159 A Haskell lexer with support for Hspec constructs.

160

161 .. versionadded:: 2.4.0

162 """

163

164 name = 'Hspec'

165 aliases = ['hspec']

166 filenames = ['*Spec.hs']

167 mimetypes = []

168

169 tokens = {

170 'root': [

171 (r'(it)(\s*)("[^"]*")', bygroups(Text, Whitespace, String.Doc)),

172 (r'(describe)(\s*)("[^"]*")', bygroups(Text, Whitespace, String.Doc)),

173 (r'(context)(\s*)("[^"]*")', bygroups(Text, Whitespace, String.Doc)),

174 inherit,

175 ],

176 }

177

178

179class IdrisLexer(RegexLexer):

180 """

181 A lexer for the dependently typed programming language Idris.

182

183 Based on the Haskell and Agda Lexer.

184

185 .. versionadded:: 2.0

186 """

187 name = 'Idris'

188 url = 'https://www.idris-lang.org/'

189 aliases = ['idris', 'idr']

190 filenames = ['*.idr']

191 mimetypes = ['text/x-idris']

192

193 reserved = ('case', 'class', 'data', 'default', 'using', 'do', 'else',

194 'if', 'in', 'infix[lr]?', 'instance', 'rewrite', 'auto',

195 'namespace', 'codata', 'mutual', 'private', 'public', 'abstract',

196 'total', 'partial',

197 'interface', 'implementation', 'export', 'covering', 'constructor',

198 'let', 'proof', 'of', 'then', 'static', 'where', '_', 'with',

199 'pattern', 'term', 'syntax', 'prefix',

200 'postulate', 'parameters', 'record', 'dsl', 'impossible', 'implicit',

201 'tactics', 'intros', 'intro', 'compute', 'refine', 'exact', 'trivial')

202

203 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',

204 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',

205 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',

206 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')

207

208 directives = ('lib', 'link', 'flag', 'include', 'hide', 'freeze', 'access',

209 'default', 'logging', 'dynamic', 'name', 'error_handlers', 'language')

210

211 tokens = {

212 'root': [

213 # Comments

214 (r'^(\s*)(%%(%s))' % '|'.join(directives),

215 bygroups(Whitespace, Keyword.Reserved)),

216 (r'(\s*)(--(?![!#$%&*+./<=>?@^|_~:\\]).*?)$', bygroups(Whitespace, Comment.Single)),

217 (r'(\s*)(\|{3}.*?)$', bygroups(Whitespace, Comment.Single)),

218 (r'(\s*)(\{-)', bygroups(Whitespace, Comment.Multiline), 'comment'),

219 # Declaration

220 (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)',

221 bygroups(Whitespace, Name.Function, Whitespace, Operator.Word, Whitespace)),

222 # Identifiers

223 (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),

224 (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Whitespace), 'module'),

225 (r"('')?[A-Z][\w\']*", Keyword.Type),

226 (r'[a-z][\w\']*', Text),

227 # Special Symbols

228 (r'(<-|::|->|=>|=)', Operator.Word), # specials

229 (r'([(){}\[\]:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials

230 # Numbers

231 (r'\d+[eE][+-]?\d+', Number.Float),

232 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),

233 (r'0[xX][\da-fA-F]+', Number.Hex),

234 (r'\d+', Number.Integer),

235 # Strings

236 (r"'", String.Char, 'character'),

237 (r'"', String, 'string'),

238 (r'[^\s(){}]+', Text),

239 (r'\s+?', Whitespace), # Whitespace

240 ],

241 'module': [

242 (r'\s+', Whitespace),

243 (r'([A-Z][\w.]*)(\s+)(\()',

244 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),

245 (r'[A-Z][\w.]*', Name.Namespace, '#pop'),

246 ],

247 'funclist': [

248 (r'\s+', Whitespace),

249 (r'[A-Z]\w*', Keyword.Type),

250 (r'(_[\w\']+|[a-z][\w\']*)', Name.Function),

251 (r'--.*$', Comment.Single),

252 (r'\{-', Comment.Multiline, 'comment'),

253 (r',', Punctuation),

254 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),

255 # (HACK, but it makes sense to push two instances, believe me)

256 (r'\(', Punctuation, ('funclist', 'funclist')),

257 (r'\)', Punctuation, '#pop:2'),

258 ],

259 # NOTE: the next four states are shared in the AgdaLexer; make sure

260 # any change is compatible with Agda as well or copy over and change

261 'comment': [

262 # Multiline Comments

263 (r'[^-{}]+', Comment.Multiline),

264 (r'\{-', Comment.Multiline, '#push'),

265 (r'-\}', Comment.Multiline, '#pop'),

266 (r'[-{}]', Comment.Multiline),

267 ],

268 'character': [

269 # Allows multi-chars, incorrectly.

270 (r"[^\\']", String.Char),

271 (r"\\", String.Escape, 'escape'),

272 ("'", String.Char, '#pop'),

273 ],

274 'string': [

275 (r'[^\\"]+', String),

276 (r"\\", String.Escape, 'escape'),

277 ('"', String, '#pop'),

278 ],

279 'escape': [

280 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),

281 (r'\^[][A-Z@^_]', String.Escape, '#pop'),

282 ('|'.join(ascii), String.Escape, '#pop'),

283 (r'o[0-7]+', String.Escape, '#pop'),

284 (r'x[\da-fA-F]+', String.Escape, '#pop'),

285 (r'\d+', String.Escape, '#pop'),

286 (r'(\s+)(\\)', bygroups(Whitespace, String.Escape), '#pop')

287 ],

288 }

289

290

291class AgdaLexer(RegexLexer):

292 """

293 For the Agda dependently typed functional programming language and

294 proof assistant.

295

296 .. versionadded:: 2.0

297 """

298

299 name = 'Agda'

300 url = 'http://wiki.portal.chalmers.se/agda/pmwiki.php'

301 aliases = ['agda']

302 filenames = ['*.agda']

303 mimetypes = ['text/x-agda']

304

305 reserved = (

306 'abstract', 'codata', 'coinductive', 'constructor', 'data', 'do',

307 'eta-equality', 'field', 'forall', 'hiding', 'in', 'inductive', 'infix',

308 'infixl', 'infixr', 'instance', 'interleaved', 'let', 'macro', 'mutual',

309 'no-eta-equality', 'open', 'overlap', 'pattern', 'postulate', 'primitive',

310 'private', 'quote', 'quoteTerm', 'record', 'renaming', 'rewrite',

311 'syntax', 'tactic', 'unquote', 'unquoteDecl', 'unquoteDef', 'using',

312 'variable', 'where', 'with',

313 )

314

315 tokens = {

316 'root': [

317 # Declaration

318 (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)',

319 bygroups(Whitespace, Name.Function, Whitespace,

320 Operator.Word, Whitespace)),

321 # Comments

322 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),

323 (r'\{-', Comment.Multiline, 'comment'),

324 # Holes

325 (r'\{!', Comment.Directive, 'hole'),

326 # Lexemes:

327 # Identifiers

328 (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),

329 (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Whitespace),

330 'module'),

331 (r'\b(Set|Prop)[\u2080-\u2089]*\b', Keyword.Type),

332 # Special Symbols

333 (r'($|$|\{|\})', Operator),

334 (r'(\.{1,3}|\||\u03BB|\u2200|\u2192|:|=|->)', Operator.Word),

335 # Numbers

336 (r'\d+[eE][+-]?\d+', Number.Float),

337 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),

338 (r'0[xX][\da-fA-F]+', Number.Hex),

339 (r'\d+', Number.Integer),

340 # Strings

341 (r"'", String.Char, 'character'),

342 (r'"', String, 'string'),

343 (r'[^\s(){}]+', Text),

344 (r'\s+?', Whitespace), # Whitespace

345 ],

346 'hole': [

347 # Holes

348 (r'[^!{}]+', Comment.Directive),

349 (r'\{!', Comment.Directive, '#push'),

350 (r'!\}', Comment.Directive, '#pop'),

351 (r'[!{}]', Comment.Directive),

352 ],

353 'module': [

354 (r'\{-', Comment.Multiline, 'comment'),

355 (r'[a-zA-Z][\w.\']*', Name, '#pop'),

356 (r'[\W0-9_]+', Text)

357 ],

358 'comment': HaskellLexer.tokens['comment'],

359 'character': HaskellLexer.tokens['character'],

360 'string': HaskellLexer.tokens['string'],

361 'escape': HaskellLexer.tokens['escape']

362 }

363

364

365class CryptolLexer(RegexLexer):

366 """

367 FIXME: A Cryptol2 lexer based on the lexemes defined in the Haskell 98 Report.

368

369 .. versionadded:: 2.0

370 """

371 name = 'Cryptol'

372 aliases = ['cryptol', 'cry']

373 filenames = ['*.cry']

374 mimetypes = ['text/x-cryptol']

375

376 reserved = ('Arith', 'Bit', 'Cmp', 'False', 'Inf', 'True', 'else',

377 'export', 'extern', 'fin', 'if', 'import', 'inf', 'lg2',

378 'max', 'min', 'module', 'newtype', 'pragma', 'property',

379 'then', 'type', 'where', 'width')

380 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',

381 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',

382 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',

383 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')

384

385 tokens = {

386 'root': [

387 # Whitespace:

388 (r'\s+', Whitespace),

389 # (r'--\s*|.*$', Comment.Doc),

390 (r'//.*$', Comment.Single),

391 (r'/\*', Comment.Multiline, 'comment'),

392 # Lexemes:

393 # Identifiers

394 (r'\bimport\b', Keyword.Reserved, 'import'),

395 (r'\bmodule\b', Keyword.Reserved, 'module'),

396 (r'\berror\b', Name.Exception),

397 (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),

398 (r'^[_a-z][\w\']*', Name.Function),

399 (r"'?[_a-z][\w']*", Name),

400 (r"('')?[A-Z][\w\']*", Keyword.Type),

401 # Operators

402 (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator

403 (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials

404 (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators

405 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators

406 # Numbers

407 (r'\d+[eE][+-]?\d+', Number.Float),

408 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),

409 (r'0[oO][0-7]+', Number.Oct),

410 (r'0[xX][\da-fA-F]+', Number.Hex),

411 (r'\d+', Number.Integer),

412 # Character/String Literals

413 (r"'", String.Char, 'character'),

414 (r'"', String, 'string'),

415 # Special

416 (r'\[\]', Keyword.Type),

417 (r'', Name.Builtin),

418 (r'[][(),;`{}]', Punctuation),

419 ],

420 'import': [

421 # Import statements

422 (r'\s+', Whitespace),

423 (r'"', String, 'string'),

424 # after "funclist" state

425 (r'\)', Punctuation, '#pop'),

426 (r'qualified\b', Keyword),

427 # import X as Y

428 (r'([A-Z][\w.]*)(\s+)(as)(\s+)([A-Z][\w.]*)',

429 bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Name), '#pop'),

430 # import X hiding (functions)

431 (r'([A-Z][\w.]*)(\s+)(hiding)(\s+)(\()',

432 bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Punctuation), 'funclist'),

433 # import X (functions)

434 (r'([A-Z][\w.]*)(\s+)(\()',

435 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),

436 # import X

437 (r'[\w.]+', Name.Namespace, '#pop'),

438 ],

439 'module': [

440 (r'\s+', Whitespace),

441 (r'([A-Z][\w.]*)(\s+)(\()',

442 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),

443 (r'[A-Z][\w.]*', Name.Namespace, '#pop'),

444 ],

445 'funclist': [

446 (r'\s+', Whitespace),

447 (r'[A-Z]\w*', Keyword.Type),

448 (r'(_[\w\']+|[a-z][\w\']*)', Name.Function),

449 # TODO: these don't match the comments in docs, remove.

450 # (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),

451 # (r'{-', Comment.Multiline, 'comment'),

452 (r',', Punctuation),

453 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),

454 # (HACK, but it makes sense to push two instances, believe me)

455 (r'\(', Punctuation, ('funclist', 'funclist')),

456 (r'\)', Punctuation, '#pop:2'),

457 ],

458 'comment': [

459 # Multiline Comments

460 (r'[^/*]+', Comment.Multiline),

461 (r'/\*', Comment.Multiline, '#push'),

462 (r'\*/', Comment.Multiline, '#pop'),

463 (r'[*/]', Comment.Multiline),

464 ],

465 'character': [

466 # Allows multi-chars, incorrectly.

467 (r"[^\\']'", String.Char, '#pop'),

468 (r"\\", String.Escape, 'escape'),

469 ("'", String.Char, '#pop'),

470 ],

471 'string': [

472 (r'[^\\"]+', String),

473 (r"\\", String.Escape, 'escape'),

474 ('"', String, '#pop'),

475 ],

476 'escape': [

477 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),

478 (r'\^[][A-Z@^_]', String.Escape, '#pop'),

479 ('|'.join(ascii), String.Escape, '#pop'),

480 (r'o[0-7]+', String.Escape, '#pop'),

481 (r'x[\da-fA-F]+', String.Escape, '#pop'),

482 (r'\d+', String.Escape, '#pop'),

483 (r'(\s+)(\\)', bygroups(Whitespace, String.Escape), '#pop'),

484 ],

485 }

486

487 EXTRA_KEYWORDS = {'join', 'split', 'reverse', 'transpose', 'width',

488 'length', 'tail', '<<', '>>', '<<<', '>>>', 'const',

489 'reg', 'par', 'seq', 'ASSERT', 'undefined', 'error',

490 'trace'}

491

492 def get_tokens_unprocessed(self, text):

493 stack = ['root']

494 for index, token, value in \

495 RegexLexer.get_tokens_unprocessed(self, text, stack):

496 if token is Name and value in self.EXTRA_KEYWORDS:

497 yield index, Name.Builtin, value

498 else:

499 yield index, token, value

500

501

502class LiterateLexer(Lexer):

503 """

504 Base class for lexers of literate file formats based on LaTeX or Bird-style

505 (prefixing each code line with ">").

506

507 Additional options accepted:

508

509 `litstyle`

510 If given, must be ``"bird"`` or ``"latex"``. If not given, the style

511 is autodetected: if the first non-whitespace character in the source

512 is a backslash or percent character, LaTeX is assumed, else Bird.

513 """

514

515 bird_re = re.compile(r'(>[ \t]*)(.*\n)')

516

517 def __init__(self, baselexer, **options):

518 self.baselexer = baselexer

519 Lexer.__init__(self, **options)

520

521 def get_tokens_unprocessed(self, text):

522 style = self.options.get('litstyle')

523 if style is None:

524 style = (text.lstrip()[0:1] in '%\\') and 'latex' or 'bird'

525

526 code = ''

527 insertions = []

528 if style == 'bird':

529 # bird-style

530 for match in line_re.finditer(text):

531 line = match.group()

532 m = self.bird_re.match(line)

533 if m:

534 insertions.append((len(code),

535 [(0, Comment.Special, m.group(1))]))

536 code += m.group(2)

537 else:

538 insertions.append((len(code), [(0, Text, line)]))

539 else:

540 # latex-style

541 from pygments.lexers.markup import TexLexer

542 lxlexer = TexLexer(**self.options)

543 codelines = 0

544 latex = ''

545 for match in line_re.finditer(text):

546 line = match.group()

547 if codelines:

548 if line.lstrip().startswith('\\end{code}'):

549 codelines = 0

550 latex += line

551 else:

552 code += line

553 elif line.lstrip().startswith('\\begin{code}'):

554 codelines = 1

555 latex += line

556 insertions.append((len(code),

557 list(lxlexer.get_tokens_unprocessed(latex))))

558 latex = ''

559 else:

560 latex += line

561 insertions.append((len(code),

562 list(lxlexer.get_tokens_unprocessed(latex))))

563 yield from do_insertions(insertions, self.baselexer.get_tokens_unprocessed(code))

564

565

566class LiterateHaskellLexer(LiterateLexer):

567 """

568 For Literate Haskell (Bird-style or LaTeX) source.

569

570 Additional options accepted:

571

572 `litstyle`

573 If given, must be ``"bird"`` or ``"latex"``. If not given, the style

574 is autodetected: if the first non-whitespace character in the source

575 is a backslash or percent character, LaTeX is assumed, else Bird.

576

577 .. versionadded:: 0.9

578 """

579 name = 'Literate Haskell'

580 aliases = ['literate-haskell', 'lhaskell', 'lhs']

581 filenames = ['*.lhs']

582 mimetypes = ['text/x-literate-haskell']

583

584 def __init__(self, **options):

585 hslexer = HaskellLexer(**options)

586 LiterateLexer.__init__(self, hslexer, **options)

587

588

589class LiterateIdrisLexer(LiterateLexer):

590 """

591 For Literate Idris (Bird-style or LaTeX) source.

592

593 Additional options accepted:

594

595 `litstyle`

596 If given, must be ``"bird"`` or ``"latex"``. If not given, the style

597 is autodetected: if the first non-whitespace character in the source

598 is a backslash or percent character, LaTeX is assumed, else Bird.

599

600 .. versionadded:: 2.0

601 """

602 name = 'Literate Idris'

603 aliases = ['literate-idris', 'lidris', 'lidr']

604 filenames = ['*.lidr']

605 mimetypes = ['text/x-literate-idris']

606

607 def __init__(self, **options):

608 hslexer = IdrisLexer(**options)

609 LiterateLexer.__init__(self, hslexer, **options)

610

611

612class LiterateAgdaLexer(LiterateLexer):

613 """

614 For Literate Agda source.

615

616 Additional options accepted:

617

618 `litstyle`

619 If given, must be ``"bird"`` or ``"latex"``. If not given, the style

620 is autodetected: if the first non-whitespace character in the source

621 is a backslash or percent character, LaTeX is assumed, else Bird.

622

623 .. versionadded:: 2.0

624 """

625 name = 'Literate Agda'

626 aliases = ['literate-agda', 'lagda']

627 filenames = ['*.lagda']

628 mimetypes = ['text/x-literate-agda']

629

630 def __init__(self, **options):

631 agdalexer = AgdaLexer(**options)

632 LiterateLexer.__init__(self, agdalexer, litstyle='latex', **options)

633

634

635class LiterateCryptolLexer(LiterateLexer):

636 """

637 For Literate Cryptol (Bird-style or LaTeX) source.

638

639 Additional options accepted:

640

641 `litstyle`

642 If given, must be ``"bird"`` or ``"latex"``. If not given, the style

643 is autodetected: if the first non-whitespace character in the source

644 is a backslash or percent character, LaTeX is assumed, else Bird.

645

646 .. versionadded:: 2.0

647 """

648 name = 'Literate Cryptol'

649 aliases = ['literate-cryptol', 'lcryptol', 'lcry']

650 filenames = ['*.lcry']

651 mimetypes = ['text/x-literate-cryptol']

652

653 def __init__(self, **options):

654 crylexer = CryptolLexer(**options)

655 LiterateLexer.__init__(self, crylexer, **options)

656

657

658class KokaLexer(RegexLexer):

659 """

660 Lexer for the Koka language.

661

662 .. versionadded:: 1.6

663 """

664

665 name = 'Koka'

666 url = 'https://koka-lang.github.io/koka/doc/index.html'

667 aliases = ['koka']

668 filenames = ['*.kk', '*.kki']

669 mimetypes = ['text/x-koka']

670

671 keywords = [

672 'infix', 'infixr', 'infixl',

673 'type', 'cotype', 'rectype', 'alias',

674 'struct', 'con',

675 'fun', 'function', 'val', 'var',

676 'external',

677 'if', 'then', 'else', 'elif', 'return', 'match',

678 'private', 'public', 'private',

679 'module', 'import', 'as',

680 'include', 'inline',

681 'rec',

682 'try', 'yield', 'enum',

683 'interface', 'instance',

684 ]

685

686 # keywords that are followed by a type

687 typeStartKeywords = [

688 'type', 'cotype', 'rectype', 'alias', 'struct', 'enum',

689 ]

690

691 # keywords valid in a type

692 typekeywords = [

693 'forall', 'exists', 'some', 'with',

694 ]

695

696 # builtin names and special names

697 builtin = [

698 'for', 'while', 'repeat',

699 'foreach', 'foreach-indexed',

700 'error', 'catch', 'finally',

701 'cs', 'js', 'file', 'ref', 'assigned',

702 ]

703

704 # symbols that can be in an operator

705 symbols = r'[$%&*+@!/\\^~=.:\-?|<>]+'

706

707 # symbol boundary: an operator keyword should not be followed by any of these

708 sboundary = '(?!' + symbols + ')'

709

710 # name boundary: a keyword should not be followed by any of these

711 boundary = r'(?![\w/])'

712

713 # koka token abstractions

714 tokenType = Name.Attribute

715 tokenTypeDef = Name.Class

716 tokenConstructor = Generic.Emph

717

718 # main lexer

719 tokens = {

720 'root': [

721 include('whitespace'),

722

723 # go into type mode

724 (r'::?' + sboundary, tokenType, 'type'),

725 (r'(alias)(\s+)([a-z]\w*)?', bygroups(Keyword, Whitespace, tokenTypeDef),

726 'alias-type'),

727 (r'(struct)(\s+)([a-z]\w*)?', bygroups(Keyword, Whitespace, tokenTypeDef),

728 'struct-type'),

729 ((r'(%s)' % '|'.join(typeStartKeywords)) +

730 r'(\s+)([a-z]\w*)?', bygroups(Keyword, Whitespace, tokenTypeDef),

731 'type'),

732

733 # special sequences of tokens (we use ?: for non-capturing group as

734 # required by 'bygroups')

735 (r'(module)(\s+)(interface(?=\s))?(\s+)?((?:[a-z]\w*/)*[a-z]\w*)',

736 bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Namespace)),

737 (r'(import)(\s+)((?:[a-z]\w*/)*[a-z]\w*)'

738 r'(?:(\s*)(=)(\s*)(qualified)?(\s*)'

739 r'((?:[a-z]\w*/)*[a-z]\w*))?',

740 bygroups(Keyword, Whitespace, Name.Namespace, Whitespace, Keyword, Whitespace,

741 Keyword, Whitespace, Name.Namespace)),

742

743 (r'^(public|private)?(\s+)?(function|fun|val)'

744 r'(\s+)([a-z]\w*|$(?:' + symbols + r'|/)$)',

745 bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Function)),

746 (r'^(?:(public|private)(?=\s+external))?((?<!^)\s+)?(external)(\s+)(inline(?=\s))?(\s+)?'

747 r'([a-z]\w*|$(?:' + symbols + r'|/)$)',

748 bygroups(Keyword, Whitespace, Keyword, Whitespace, Keyword, Whitespace, Name.Function)),

749

750 # keywords

751 (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword.Type),

752 (r'(%s)' % '|'.join(keywords) + boundary, Keyword),

753 (r'(%s)' % '|'.join(builtin) + boundary, Keyword.Pseudo),

754 (r'::?|:=|\->|[=.]' + sboundary, Keyword),

755

756 # names

757 (r'((?:[a-z]\w*/)*)([A-Z]\w*)',

758 bygroups(Name.Namespace, tokenConstructor)),

759 (r'((?:[a-z]\w*/)*)([a-z]\w*)', bygroups(Name.Namespace, Name)),

760 (r'((?:[a-z]\w*/)*)($(?:' + symbols + r'|/)$)',

761 bygroups(Name.Namespace, Name)),

762 (r'_\w*', Name.Variable),

763

764 # literal string

765 (r'@"', String.Double, 'litstring'),

766

767 # operators

768 (symbols + "|/(?![*/])", Operator),

769 (r'`', Operator),

770 (r'[{}()\[\];,]', Punctuation),

771

772 # literals. No check for literal characters with len > 1

773 (r'[0-9]+\.[0-9]+([eE][\-+]?[0-9]+)?', Number.Float),

774 (r'0[xX][0-9a-fA-F]+', Number.Hex),

775 (r'[0-9]+', Number.Integer),

776

777 (r"'", String.Char, 'char'),

778 (r'"', String.Double, 'string'),

779 ],

780

781 # type started by alias

782 'alias-type': [

783 (r'=', Keyword),

784 include('type')

785 ],

786

787 # type started by struct

788 'struct-type': [

789 (r'(?=$(?!,*$))', Punctuation, '#pop'),

790 include('type')

791 ],

792

793 # type started by colon

794 'type': [

795 (r'[(\[<]', tokenType, 'type-nested'),

796 include('type-content')

797 ],

798

799 # type nested in brackets: can contain parameters, comma etc.

800 'type-nested': [

801 (r'[)\]>]', tokenType, '#pop'),

802 (r'[(\[<]', tokenType, 'type-nested'),

803 (r',', tokenType),

804 (r'([a-z]\w*)(\s*)(:)(?!:)',

805 bygroups(Name, Whitespace, tokenType)), # parameter name

806 include('type-content')

807 ],

808

809 # shared contents of a type

810 'type-content': [

811 include('whitespace'),

812

813 # keywords

814 (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword),

815 (r'(?=((%s)' % '|'.join(keywords) + boundary + '))',

816 Keyword, '#pop'), # need to match because names overlap...

817

818 # kinds

819 (r'[EPHVX]' + boundary, tokenType),

820

821 # type names

822 (r'[a-z][0-9]*(?![\w/])', tokenType),

823 (r'_\w*', tokenType.Variable), # Generic.Emph

824 (r'((?:[a-z]\w*/)*)([A-Z]\w*)',

825 bygroups(Name.Namespace, tokenType)),

826 (r'((?:[a-z]\w*/)*)([a-z]\w+)',

827 bygroups(Name.Namespace, tokenType)),

828

829 # type keyword operators

830 (r'::|->|[.:|]', tokenType),

831

832 # catchall

833 default('#pop')

834 ],

835

836 # comments and literals

837 'whitespace': [

838 (r'(\n\s*)(#.*)$', bygroups(Whitespace, Comment.Preproc)),

839 (r'\s+', Whitespace),

840 (r'/\*', Comment.Multiline, 'comment'),

841 (r'//.*$', Comment.Single)

842 ],

843 'comment': [

844 (r'[^/*]+', Comment.Multiline),

845 (r'/\*', Comment.Multiline, '#push'),

846 (r'\*/', Comment.Multiline, '#pop'),

847 (r'[*/]', Comment.Multiline),

848 ],

849 'litstring': [

850 (r'[^"]+', String.Double),

851 (r'""', String.Escape),

852 (r'"', String.Double, '#pop'),

853 ],

854 'string': [

855 (r'[^\\"\n]+', String.Double),

856 include('escape-sequence'),

857 (r'["\n]', String.Double, '#pop'),

858 ],

859 'char': [

860 (r'[^\\\'\n]+', String.Char),

861 include('escape-sequence'),

862 (r'[\'\n]', String.Char, '#pop'),

863 ],

864 'escape-sequence': [

865 (r'\\[nrt\\"\']', String.Escape),

866 (r'\\x[0-9a-fA-F]{2}', String.Escape),

867 (r'\\u[0-9a-fA-F]{4}', String.Escape),

868 # Yes, \U literals are 6 hex digits.

869 (r'\\U[0-9a-fA-F]{6}', String.Escape)

870 ]

871 }