Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pygments/lexers/haskell.py: 86%

1"""

2 pygments.lexers.haskell

3 ~~~~~~~~~~~~~~~~~~~~~~~

5 Lexers for Haskell and related languages.

8 :license: BSD, see LICENSE for details.

9"""

11import re

13from pygments.lexer import Lexer, RegexLexer, bygroups, do_insertions, \

14 default, include, inherit, line_re

15from pygments.token import Text, Comment, Operator, Keyword, Name, String, \

16 Number, Punctuation, Generic, Whitespace

17from pygments import unistring as uni

19__all__ = ['HaskellLexer', 'HspecLexer', 'IdrisLexer', 'AgdaLexer', 'CryptolLexer',

20 'LiterateHaskellLexer', 'LiterateIdrisLexer', 'LiterateAgdaLexer',

21 'LiterateCryptolLexer', 'KokaLexer']

24class HaskellLexer(RegexLexer):

25 """

26 A Haskell lexer based on the lexemes defined in the Haskell 98 Report.

27 """

28 name = 'Haskell'

29 url = 'https://www.haskell.org/'

30 aliases = ['haskell', 'hs']

31 filenames = ['*.hs']

32 mimetypes = ['text/x-haskell']

33 version_added = '0.8'

35 reserved = ('case', 'class', 'data', 'default', 'deriving', 'do', 'else',

36 'family', 'if', 'in', 'infix[lr]?', 'instance',

37 'let', 'newtype', 'of', 'then', 'type', 'where', '_')

38 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',

39 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',

40 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',

41 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')

43 tokens = {

44 'root': [

45 # Whitespace:

46 (r'\s+', Whitespace),

47 # (r'--\s*|.*$', Comment.Doc),

48 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),

49 (r'\{-', Comment.Multiline, 'comment'),

50 # Lexemes:

51 # Identifiers

52 (r'\bimport\b', Keyword.Reserved, 'import'),

53 (r'\bmodule\b', Keyword.Reserved, 'module'),

54 (r'\berror\b', Name.Exception),

55 (r'\b({})(?!\')\b'.format('|'.join(reserved)), Keyword.Reserved),

56 (r"'[^\\]'", String.Char), # this has to come before the TH quote

57 (r'^[_' + uni.Ll + r'][\w\']*', Name.Function),

58 (r"'?[_" + uni.Ll + r"][\w']*", Name),

59 (r"('')?[" + uni.Lu + r"][\w\']*", Keyword.Type),

60 (r"(')[" + uni.Lu + r"][\w\']*", Keyword.Type),

61 (r"(')\[[^\]]*\]", Keyword.Type), # tuples and lists get special treatment in GHC

62 (r"(')$[^)]*$", Keyword.Type), # ..

63 (r"(')[:!#$%&*+.\\/<=>?@^|~-]+", Keyword.Type), # promoted type operators

64 # Operators

65 (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator

66 (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials

67 (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators

68 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators

69 # Numbers

70 (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*_*[pP][+-]?\d(_*\d)*', Number.Float),

71 (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*\.[\da-fA-F](_*[\da-fA-F])*'

72 r'(_*[pP][+-]?\d(_*\d)*)?', Number.Float),

73 (r'\d(_*\d)*_*[eE][+-]?\d(_*\d)*', Number.Float),

74 (r'\d(_*\d)*\.\d(_*\d)*(_*[eE][+-]?\d(_*\d)*)?', Number.Float),

75 (r'0[bB]_*[01](_*[01])*', Number.Bin),

76 (r'0[oO]_*[0-7](_*[0-7])*', Number.Oct),

77 (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*', Number.Hex),

78 (r'\d(_*\d)*', Number.Integer),

79 # Character/String Literals

80 (r"'", String.Char, 'character'),

81 (r'"', String, 'string'),

82 # Special

83 (r'\[\]', Keyword.Type),

84 (r'', Name.Builtin),

85 (r'[][(),;`{}]', Punctuation),

86 ],

87 'import': [

88 # Import statements

89 (r'\s+', Whitespace),

90 (r'"', String, 'string'),

91 # after "funclist" state

92 (r'\)', Punctuation, '#pop'),

93 (r'qualified\b', Keyword),

94 # import X as Y

95 (r'([' + uni.Lu + r'][\w.]*)(\s+)(as)(\s+)([' + uni.Lu + r'][\w.]*)',

96 bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Name), '#pop'),

97 # import X hiding (functions)

98 (r'([' + uni.Lu + r'][\w.]*)(\s+)(hiding)(\s+)(\()',

99 bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Punctuation), 'funclist'),

100 # import X (functions)

101 (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()',

102 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),

103 # import X

104 (r'[\w.]+', Name.Namespace, '#pop'),

105 ],

106 'module': [

107 (r'\s+', Whitespace),

108 (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()',

109 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),

110 (r'[' + uni.Lu + r'][\w.]*', Name.Namespace, '#pop'),

111 ],

112 'funclist': [

113 (r'\s+', Whitespace),

114 (r'[' + uni.Lu + r']\w*', Keyword.Type),

115 (r'(_[\w\']+|[' + uni.Ll + r'][\w\']*)', Name.Function),

116 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),

117 (r'\{-', Comment.Multiline, 'comment'),

118 (r',', Punctuation),

119 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),

120 # (HACK, but it makes sense to push two instances, believe me)

121 (r'\(', Punctuation, ('funclist', 'funclist')),

122 (r'\)', Punctuation, '#pop:2'),

123 ],

124 # NOTE: the next four states are shared in the AgdaLexer; make sure

125 # any change is compatible with Agda as well or copy over and change

126 'comment': [

127 # Multiline Comments

128 (r'[^-{}]+', Comment.Multiline),

129 (r'\{-', Comment.Multiline, '#push'),

130 (r'-\}', Comment.Multiline, '#pop'),

131 (r'[-{}]', Comment.Multiline),

132 ],

133 'character': [

134 # Allows multi-chars, incorrectly.

135 (r"[^\\']'", String.Char, '#pop'),

136 (r"\\", String.Escape, 'escape'),

137 ("'", String.Char, '#pop'),

138 ],

139 'string': [

140 (r'[^\\"]+', String),

141 (r"\\", String.Escape, 'escape'),

142 ('"', String, '#pop'),

143 ],

144 'escape': [

145 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),

146 (r'\^[][' + uni.Lu + r'@^_]', String.Escape, '#pop'),

147 ('|'.join(ascii), String.Escape, '#pop'),

148 (r'o[0-7]+', String.Escape, '#pop'),

149 (r'x[\da-fA-F]+', String.Escape, '#pop'),

150 (r'\d+', String.Escape, '#pop'),

151 (r'(\s+)(\\)', bygroups(Whitespace, String.Escape), '#pop'),

152 ],

153 }

154

155

156class HspecLexer(HaskellLexer):

157 """

158 A Haskell lexer with support for Hspec constructs.

159 """

160

161 name = 'Hspec'

162 aliases = ['hspec']

163 filenames = ['*Spec.hs']

164 mimetypes = []

165 version_added = '2.4'

166

167 tokens = {

168 'root': [

169 (r'(it)(\s*)("[^"]*")', bygroups(Text, Whitespace, String.Doc)),

170 (r'(describe)(\s*)("[^"]*")', bygroups(Text, Whitespace, String.Doc)),

171 (r'(context)(\s*)("[^"]*")', bygroups(Text, Whitespace, String.Doc)),

172 inherit,

173 ],

174 }

175

176

177class IdrisLexer(RegexLexer):

178 """

179 A lexer for the dependently typed programming language Idris.

180

181 Based on the Haskell and Agda Lexer.

182 """

183 name = 'Idris'

184 url = 'https://www.idris-lang.org/'

185 aliases = ['idris', 'idr']

186 filenames = ['*.idr']

187 mimetypes = ['text/x-idris']

188 version_added = '2.0'

189

190 reserved = ('case', 'class', 'data', 'default', 'using', 'do', 'else',

191 'if', 'in', 'infix[lr]?', 'instance', 'rewrite', 'auto',

192 'namespace', 'codata', 'mutual', 'private', 'public', 'abstract',

193 'total', 'partial',

194 'interface', 'implementation', 'export', 'covering', 'constructor',

195 'let', 'proof', 'of', 'then', 'static', 'where', '_', 'with',

196 'pattern', 'term', 'syntax', 'prefix',

197 'postulate', 'parameters', 'record', 'dsl', 'impossible', 'implicit',

198 'tactics', 'intros', 'intro', 'compute', 'refine', 'exact', 'trivial')

199

200 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',

201 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',

202 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',

203 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')

204

205 directives = ('lib', 'link', 'flag', 'include', 'hide', 'freeze', 'access',

206 'default', 'logging', 'dynamic', 'name', 'error_handlers', 'language')

207

208 tokens = {

209 'root': [

210 # Comments

211 (r'^(\s*)(%({}))'.format('|'.join(directives)),

212 bygroups(Whitespace, Keyword.Reserved)),

213 (r'(\s*)(--(?![!#$%&*+./<=>?@^|_~:\\]).*?)$', bygroups(Whitespace, Comment.Single)),

214 (r'(\s*)(\|{3}.*?)$', bygroups(Whitespace, Comment.Single)),

215 (r'(\s*)(\{-)', bygroups(Whitespace, Comment.Multiline), 'comment'),

216 # Declaration

217 (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)',

218 bygroups(Whitespace, Name.Function, Whitespace, Operator.Word, Whitespace)),

219 # Identifiers

220 (r'\b({})(?!\')\b'.format('|'.join(reserved)), Keyword.Reserved),

221 (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Whitespace), 'module'),

222 (r"('')?[A-Z][\w\']*", Keyword.Type),

223 (r'[a-z][\w\']*', Text),

224 # Special Symbols

225 (r'(<-|::|->|=>|=)', Operator.Word), # specials

226 (r'([(){}\[\]:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials

227 # Numbers

228 (r'\d+[eE][+-]?\d+', Number.Float),

229 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),

230 (r'0[xX][\da-fA-F]+', Number.Hex),

231 (r'\d+', Number.Integer),

232 # Strings

233 (r"'", String.Char, 'character'),

234 (r'"', String, 'string'),

235 (r'[^\s(){}]+', Text),

236 (r'\s+?', Whitespace), # Whitespace

237 ],

238 'module': [

239 (r'\s+', Whitespace),

240 (r'([A-Z][\w.]*)(\s+)(\()',

241 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),

242 (r'[A-Z][\w.]*', Name.Namespace, '#pop'),

243 ],

244 'funclist': [

245 (r'\s+', Whitespace),

246 (r'[A-Z]\w*', Keyword.Type),

247 (r'(_[\w\']+|[a-z][\w\']*)', Name.Function),

248 (r'--.*$', Comment.Single),

249 (r'\{-', Comment.Multiline, 'comment'),

250 (r',', Punctuation),

251 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),

252 # (HACK, but it makes sense to push two instances, believe me)

253 (r'\(', Punctuation, ('funclist', 'funclist')),

254 (r'\)', Punctuation, '#pop:2'),

255 ],

256 # NOTE: the next four states are shared in the AgdaLexer; make sure

257 # any change is compatible with Agda as well or copy over and change

258 'comment': [

259 # Multiline Comments

260 (r'[^-{}]+', Comment.Multiline),

261 (r'\{-', Comment.Multiline, '#push'),

262 (r'-\}', Comment.Multiline, '#pop'),

263 (r'[-{}]', Comment.Multiline),

264 ],

265 'character': [

266 # Allows multi-chars, incorrectly.

267 (r"[^\\']", String.Char),

268 (r"\\", String.Escape, 'escape'),

269 ("'", String.Char, '#pop'),

270 ],

271 'string': [

272 (r'[^\\"]+', String),

273 (r"\\", String.Escape, 'escape'),

274 ('"', String, '#pop'),

275 ],

276 'escape': [

277 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),

278 (r'\^[][A-Z@^_]', String.Escape, '#pop'),

279 ('|'.join(ascii), String.Escape, '#pop'),

280 (r'o[0-7]+', String.Escape, '#pop'),

281 (r'x[\da-fA-F]+', String.Escape, '#pop'),

282 (r'\d+', String.Escape, '#pop'),

283 (r'(\s+)(\\)', bygroups(Whitespace, String.Escape), '#pop')

284 ],

285 }

286

287

288class AgdaLexer(RegexLexer):

289 """

290 For the Agda dependently typed functional programming language and

291 proof assistant.

292 """

293

294 name = 'Agda'

295 url = 'http://wiki.portal.chalmers.se/agda/pmwiki.php'

296 aliases = ['agda']

297 filenames = ['*.agda']

298 mimetypes = ['text/x-agda']

299 version_added = '2.0'

300

301 reserved = (

302 'abstract', 'codata', 'coinductive', 'constructor', 'data', 'do',

303 'eta-equality', 'field', 'forall', 'hiding', 'in', 'inductive', 'infix',

304 'infixl', 'infixr', 'instance', 'interleaved', 'let', 'macro', 'mutual',

305 'no-eta-equality', 'opaque', 'open', 'overlap', 'pattern', 'postulate', 'primitive',

306 'private', 'quote', 'quoteTerm', 'record', 'renaming', 'rewrite',

307 'syntax', 'tactic', 'unfolding', 'unquote', 'unquoteDecl', 'unquoteDef', 'using',

308 'variable', 'where', 'with',

309 )

310

311 tokens = {

312 'root': [

313 # Declaration

314 (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)',

315 bygroups(Whitespace, Name.Function, Whitespace,

316 Operator.Word, Whitespace)),

317 # Comments

318 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),

319 (r'\{-', Comment.Multiline, 'comment'),

320 # Holes

321 (r'\{!', Comment.Directive, 'hole'),

322 # Lexemes:

323 # Identifiers

324 (r'\b({})(?!\')\b'.format('|'.join(reserved)), Keyword.Reserved),

325 (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Whitespace),

326 'module'),

327 (r'\b(Set|Prop)[\u2080-\u2089]*\b', Keyword.Type),

328 # Special Symbols

329 (r'($|$|\{|\})', Operator),

330 (r'(\.{1,3}|\||\u03BB|\u2200|\u2192|:|=|->)', Operator.Word),

331 # Numbers

332 (r'\d+[eE][+-]?\d+', Number.Float),

333 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),

334 (r'0[xX][\da-fA-F]+', Number.Hex),

335 (r'\d+', Number.Integer),

336 # Strings

337 (r"'", String.Char, 'character'),

338 (r'"', String, 'string'),

339 (r'[^\s(){}]+', Text),

340 (r'\s+?', Whitespace), # Whitespace

341 ],

342 'hole': [

343 # Holes

344 (r'[^!{}]+', Comment.Directive),

345 (r'\{!', Comment.Directive, '#push'),

346 (r'!\}', Comment.Directive, '#pop'),

347 (r'[!{}]', Comment.Directive),

348 ],

349 'module': [

350 (r'\{-', Comment.Multiline, 'comment'),

351 (r'[a-zA-Z][\w.\']*', Name, '#pop'),

352 (r'[\W0-9_]+', Text)

353 ],

354 'comment': HaskellLexer.tokens['comment'],

355 'character': HaskellLexer.tokens['character'],

356 'string': HaskellLexer.tokens['string'],

357 'escape': HaskellLexer.tokens['escape']

358 }

359

360

361class CryptolLexer(RegexLexer):

362 """

363 FIXME: A Cryptol2 lexer based on the lexemes defined in the Haskell 98 Report.

364 """

365 name = 'Cryptol'

366 aliases = ['cryptol', 'cry']

367 filenames = ['*.cry']

368 mimetypes = ['text/x-cryptol']

369 url = 'https://www.cryptol.net'

370 version_added = '2.0'

371

372 reserved = ('Arith', 'Bit', 'Cmp', 'False', 'Inf', 'True', 'else',

373 'export', 'extern', 'fin', 'if', 'import', 'inf', 'lg2',

374 'max', 'min', 'module', 'newtype', 'pragma', 'property',

375 'then', 'type', 'where', 'width')

376 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',

377 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',

378 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',

379 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')

380

381 tokens = {

382 'root': [

383 # Whitespace:

384 (r'\s+', Whitespace),

385 # (r'--\s*|.*$', Comment.Doc),

386 (r'//.*$', Comment.Single),

387 (r'/\*', Comment.Multiline, 'comment'),

388 # Lexemes:

389 # Identifiers

390 (r'\bimport\b', Keyword.Reserved, 'import'),

391 (r'\bmodule\b', Keyword.Reserved, 'module'),

392 (r'\berror\b', Name.Exception),

393 (r'\b({})(?!\')\b'.format('|'.join(reserved)), Keyword.Reserved),

394 (r'^[_a-z][\w\']*', Name.Function),

395 (r"'?[_a-z][\w']*", Name),

396 (r"('')?[A-Z][\w\']*", Keyword.Type),

397 # Operators

398 (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator

399 (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials

400 (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators

401 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators

402 # Numbers

403 (r'\d+[eE][+-]?\d+', Number.Float),

404 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),

405 (r'0[oO][0-7]+', Number.Oct),

406 (r'0[xX][\da-fA-F]+', Number.Hex),

407 (r'\d+', Number.Integer),

408 # Character/String Literals

409 (r"'", String.Char, 'character'),

410 (r'"', String, 'string'),

411 # Special

412 (r'\[\]', Keyword.Type),

413 (r'', Name.Builtin),

414 (r'[][(),;`{}]', Punctuation),

415 ],

416 'import': [

417 # Import statements

418 (r'\s+', Whitespace),

419 (r'"', String, 'string'),

420 # after "funclist" state

421 (r'\)', Punctuation, '#pop'),

422 (r'qualified\b', Keyword),

423 # import X as Y

424 (r'([A-Z][\w.]*)(\s+)(as)(\s+)([A-Z][\w.]*)',

425 bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Name), '#pop'),

426 # import X hiding (functions)

427 (r'([A-Z][\w.]*)(\s+)(hiding)(\s+)(\()',

428 bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Punctuation), 'funclist'),

429 # import X (functions)

430 (r'([A-Z][\w.]*)(\s+)(\()',

431 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),

432 # import X

433 (r'[\w.]+', Name.Namespace, '#pop'),

434 ],

435 'module': [

436 (r'\s+', Whitespace),

437 (r'([A-Z][\w.]*)(\s+)(\()',

438 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),

439 (r'[A-Z][\w.]*', Name.Namespace, '#pop'),

440 ],

441 'funclist': [

442 (r'\s+', Whitespace),

443 (r'[A-Z]\w*', Keyword.Type),

444 (r'(_[\w\']+|[a-z][\w\']*)', Name.Function),

445 # TODO: these don't match the comments in docs, remove.

446 # (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),

447 # (r'{-', Comment.Multiline, 'comment'),

448 (r',', Punctuation),

449 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),

450 # (HACK, but it makes sense to push two instances, believe me)

451 (r'\(', Punctuation, ('funclist', 'funclist')),

452 (r'\)', Punctuation, '#pop:2'),

453 ],

454 'comment': [

455 # Multiline Comments

456 (r'[^/*]+', Comment.Multiline),

457 (r'/\*', Comment.Multiline, '#push'),

458 (r'\*/', Comment.Multiline, '#pop'),

459 (r'[*/]', Comment.Multiline),

460 ],

461 'character': [

462 # Allows multi-chars, incorrectly.

463 (r"[^\\']'", String.Char, '#pop'),

464 (r"\\", String.Escape, 'escape'),

465 ("'", String.Char, '#pop'),

466 ],

467 'string': [

468 (r'[^\\"]+', String),

469 (r"\\", String.Escape, 'escape'),

470 ('"', String, '#pop'),

471 ],

472 'escape': [

473 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),

474 (r'\^[][A-Z@^_]', String.Escape, '#pop'),

475 ('|'.join(ascii), String.Escape, '#pop'),

476 (r'o[0-7]+', String.Escape, '#pop'),

477 (r'x[\da-fA-F]+', String.Escape, '#pop'),

478 (r'\d+', String.Escape, '#pop'),

479 (r'(\s+)(\\)', bygroups(Whitespace, String.Escape), '#pop'),

480 ],

481 }

482

483 EXTRA_KEYWORDS = {'join', 'split', 'reverse', 'transpose', 'width',

484 'length', 'tail', '<<', '>>', '<<<', '>>>', 'const',

485 'reg', 'par', 'seq', 'ASSERT', 'undefined', 'error',

486 'trace'}

487

488 def get_tokens_unprocessed(self, text):

489 stack = ['root']

490 for index, token, value in \

491 RegexLexer.get_tokens_unprocessed(self, text, stack):

492 if token is Name and value in self.EXTRA_KEYWORDS:

493 yield index, Name.Builtin, value

494 else:

495 yield index, token, value

496

497

498class LiterateLexer(Lexer):

499 """

500 Base class for lexers of literate file formats based on LaTeX or Bird-style

501 (prefixing each code line with ">").

502

503 Additional options accepted:

504

505 `litstyle`

506 If given, must be ``"bird"`` or ``"latex"``. If not given, the style

507 is autodetected: if the first non-whitespace character in the source

508 is a backslash or percent character, LaTeX is assumed, else Bird.

509 """

510

511 bird_re = re.compile(r'(>[ \t]*)(.*\n)')

512

513 def __init__(self, baselexer, **options):

514 self.baselexer = baselexer

515 Lexer.__init__(self, **options)

516

517 def get_tokens_unprocessed(self, text):

518 style = self.options.get('litstyle')

519 if style is None:

520 style = (text.lstrip()[0:1] in '%\\') and 'latex' or 'bird'

521

522 code = ''

523 insertions = []

524 if style == 'bird':

525 # bird-style

526 for match in line_re.finditer(text):

527 line = match.group()

528 m = self.bird_re.match(line)

529 if m:

530 insertions.append((len(code),

531 [(0, Comment.Special, m.group(1))]))

532 code += m.group(2)

533 else:

534 insertions.append((len(code), [(0, Text, line)]))

535 else:

536 # latex-style

537 from pygments.lexers.markup import TexLexer

538 lxlexer = TexLexer(**self.options)

539 codelines = 0

540 latex = ''

541 for match in line_re.finditer(text):

542 line = match.group()

543 if codelines:

544 if line.lstrip().startswith('\\end{code}'):

545 codelines = 0

546 latex += line

547 else:

548 code += line

549 elif line.lstrip().startswith('\\begin{code}'):

550 codelines = 1

551 latex += line

552 insertions.append((len(code),

553 list(lxlexer.get_tokens_unprocessed(latex))))

554 latex = ''

555 else:

556 latex += line

557 insertions.append((len(code),

558 list(lxlexer.get_tokens_unprocessed(latex))))

559 yield from do_insertions(insertions, self.baselexer.get_tokens_unprocessed(code))

560

561

562class LiterateHaskellLexer(LiterateLexer):

563 """

564 For Literate Haskell (Bird-style or LaTeX) source.

565

566 Additional options accepted:

567

568 `litstyle`

569 If given, must be ``"bird"`` or ``"latex"``. If not given, the style

570 is autodetected: if the first non-whitespace character in the source

571 is a backslash or percent character, LaTeX is assumed, else Bird.

572 """

573 name = 'Literate Haskell'

574 aliases = ['literate-haskell', 'lhaskell', 'lhs']

575 filenames = ['*.lhs']

576 mimetypes = ['text/x-literate-haskell']

577 url = 'https://wiki.haskell.org/Literate_programming'

578 version_added = '0.9'

579

580 def __init__(self, **options):

581 hslexer = HaskellLexer(**options)

582 LiterateLexer.__init__(self, hslexer, **options)

583

584

585class LiterateIdrisLexer(LiterateLexer):

586 """

587 For Literate Idris (Bird-style or LaTeX) source.

588

589 Additional options accepted:

590

591 `litstyle`

592 If given, must be ``"bird"`` or ``"latex"``. If not given, the style

593 is autodetected: if the first non-whitespace character in the source

594 is a backslash or percent character, LaTeX is assumed, else Bird.

595 """

596 name = 'Literate Idris'

597 aliases = ['literate-idris', 'lidris', 'lidr']

598 filenames = ['*.lidr']

599 mimetypes = ['text/x-literate-idris']

600 url = 'https://idris2.readthedocs.io/en/latest/reference/literate.html'

601 version_added = '2.0'

602

603 def __init__(self, **options):

604 hslexer = IdrisLexer(**options)

605 LiterateLexer.__init__(self, hslexer, **options)

606

607

608class LiterateAgdaLexer(LiterateLexer):

609 """

610 For Literate Agda source.

611

612 Additional options accepted:

613

614 `litstyle`

615 If given, must be ``"bird"`` or ``"latex"``. If not given, the style

616 is autodetected: if the first non-whitespace character in the source

617 is a backslash or percent character, LaTeX is assumed, else Bird.

618 """

619 name = 'Literate Agda'

620 aliases = ['literate-agda', 'lagda']

621 filenames = ['*.lagda']

622 mimetypes = ['text/x-literate-agda']

623 url = 'https://agda.readthedocs.io/en/latest/tools/literate-programming.html'

624 version_added = '2.0'

625

626 def __init__(self, **options):

627 agdalexer = AgdaLexer(**options)

628 LiterateLexer.__init__(self, agdalexer, litstyle='latex', **options)

629

630

631class LiterateCryptolLexer(LiterateLexer):

632 """

633 For Literate Cryptol (Bird-style or LaTeX) source.

634

635 Additional options accepted:

636

637 `litstyle`

638 If given, must be ``"bird"`` or ``"latex"``. If not given, the style

639 is autodetected: if the first non-whitespace character in the source

640 is a backslash or percent character, LaTeX is assumed, else Bird.

641 """

642 name = 'Literate Cryptol'

643 aliases = ['literate-cryptol', 'lcryptol', 'lcry']

644 filenames = ['*.lcry']

645 mimetypes = ['text/x-literate-cryptol']

646 url = 'https://www.cryptol.net'

647 version_added = '2.0'

648

649 def __init__(self, **options):

650 crylexer = CryptolLexer(**options)

651 LiterateLexer.__init__(self, crylexer, **options)

652

653

654class KokaLexer(RegexLexer):

655 """

656 Lexer for the Koka language.

657 """

658

659 name = 'Koka'

660 url = 'https://koka-lang.github.io/koka/doc/index.html'

661 aliases = ['koka']

662 filenames = ['*.kk', '*.kki']

663 mimetypes = ['text/x-koka']

664 version_added = '1.6'

665

666 keywords = [

667 'infix', 'infixr', 'infixl',

668 'type', 'cotype', 'rectype', 'alias',

669 'struct', 'con',

670 'fun', 'function', 'val', 'var',

671 'external',

672 'if', 'then', 'else', 'elif', 'return', 'match',

673 'private', 'public', 'private',

674 'module', 'import', 'as',

675 'include', 'inline',

676 'rec',

677 'try', 'yield', 'enum',

678 'interface', 'instance',

679 ]

680

681 # keywords that are followed by a type

682 typeStartKeywords = [

683 'type', 'cotype', 'rectype', 'alias', 'struct', 'enum',

684 ]

685

686 # keywords valid in a type

687 typekeywords = [

688 'forall', 'exists', 'some', 'with',

689 ]

690

691 # builtin names and special names

692 builtin = [

693 'for', 'while', 'repeat',

694 'foreach', 'foreach-indexed',

695 'error', 'catch', 'finally',

696 'cs', 'js', 'file', 'ref', 'assigned',

697 ]

698

699 # symbols that can be in an operator

700 symbols = r'[$%&*+@!/\\^~=.:\-?|<>]+'

701

702 # symbol boundary: an operator keyword should not be followed by any of these

703 sboundary = '(?!' + symbols + ')'

704

705 # name boundary: a keyword should not be followed by any of these

706 boundary = r'(?![\w/])'

707

708 # koka token abstractions

709 tokenType = Name.Attribute

710 tokenTypeDef = Name.Class

711 tokenConstructor = Generic.Emph

712

713 # main lexer

714 tokens = {

715 'root': [

716 include('whitespace'),

717

718 # go into type mode

719 (r'::?' + sboundary, tokenType, 'type'),

720 (r'(alias)(\s+)([a-z]\w*)?', bygroups(Keyword, Whitespace, tokenTypeDef),

721 'alias-type'),

722 (r'(struct)(\s+)([a-z]\w*)?', bygroups(Keyword, Whitespace, tokenTypeDef),

723 'struct-type'),

724 ((r'({})'.format('|'.join(typeStartKeywords))) +

725 r'(\s+)([a-z]\w*)?', bygroups(Keyword, Whitespace, tokenTypeDef),

726 'type'),

727

728 # special sequences of tokens (we use ?: for non-capturing group as

729 # required by 'bygroups')

730 (r'(module)(\s+)(interface(?=\s))?(\s+)?((?:[a-z]\w*/)*[a-z]\w*)',

731 bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Namespace)),

732 (r'(import)(\s+)((?:[a-z]\w*/)*[a-z]\w*)'

733 r'(?:(\s*)(=)(\s*)(qualified)?(\s*)'

734 r'((?:[a-z]\w*/)*[a-z]\w*))?',

735 bygroups(Keyword, Whitespace, Name.Namespace, Whitespace, Keyword, Whitespace,

736 Keyword, Whitespace, Name.Namespace)),

737

738 (r'^(public|private)?(\s+)?(function|fun|val)'

739 r'(\s+)([a-z]\w*|$(?:' + symbols + r'|/)$)',

740 bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Function)),

741 (r'^(?:(public|private)(?=\s+external))?((?<!^)\s+)?(external)(\s+)(inline(?=\s))?(\s+)?'

742 r'([a-z]\w*|$(?:' + symbols + r'|/)$)',

743 bygroups(Keyword, Whitespace, Keyword, Whitespace, Keyword, Whitespace, Name.Function)),

744

745 # keywords

746 (r'({})'.format('|'.join(typekeywords)) + boundary, Keyword.Type),

747 (r'({})'.format('|'.join(keywords)) + boundary, Keyword),

748 (r'({})'.format('|'.join(builtin)) + boundary, Keyword.Pseudo),

749 (r'::?|:=|\->|[=.]' + sboundary, Keyword),

750

751 # names

752 (r'((?:[a-z]\w*/)*)([A-Z]\w*)',

753 bygroups(Name.Namespace, tokenConstructor)),

754 (r'((?:[a-z]\w*/)*)([a-z]\w*)', bygroups(Name.Namespace, Name)),

755 (r'((?:[a-z]\w*/)*)($(?:' + symbols + r'|/)$)',

756 bygroups(Name.Namespace, Name)),

757 (r'_\w*', Name.Variable),

758

759 # literal string

760 (r'@"', String.Double, 'litstring'),

761

762 # operators

763 (symbols + "|/(?![*/])", Operator),

764 (r'`', Operator),

765 (r'[{}()\[\];,]', Punctuation),

766

767 # literals. No check for literal characters with len > 1

768 (r'[0-9]+\.[0-9]+([eE][\-+]?[0-9]+)?', Number.Float),

769 (r'0[xX][0-9a-fA-F]+', Number.Hex),

770 (r'[0-9]+', Number.Integer),

771

772 (r"'", String.Char, 'char'),

773 (r'"', String.Double, 'string'),

774 ],

775

776 # type started by alias

777 'alias-type': [

778 (r'=', Keyword),

779 include('type')

780 ],

781

782 # type started by struct

783 'struct-type': [

784 (r'(?=$(?!,*$))', Punctuation, '#pop'),

785 include('type')

786 ],

787

788 # type started by colon

789 'type': [

790 (r'[(\[<]', tokenType, 'type-nested'),

791 include('type-content')

792 ],

793

794 # type nested in brackets: can contain parameters, comma etc.

795 'type-nested': [

796 (r'[)\]>]', tokenType, '#pop'),

797 (r'[(\[<]', tokenType, 'type-nested'),

798 (r',', tokenType),

799 (r'([a-z]\w*)(\s*)(:)(?!:)',

800 bygroups(Name, Whitespace, tokenType)), # parameter name

801 include('type-content')

802 ],

803

804 # shared contents of a type

805 'type-content': [

806 include('whitespace'),

807

808 # keywords

809 (r'({})'.format('|'.join(typekeywords)) + boundary, Keyword),

810 (r'(?=(({})'.format('|'.join(keywords)) + boundary + '))',

811 Keyword, '#pop'), # need to match because names overlap...

812

813 # kinds

814 (r'[EPHVX]' + boundary, tokenType),

815

816 # type names

817 (r'[a-z][0-9]*(?![\w/])', tokenType),

818 (r'_\w*', tokenType.Variable), # Generic.Emph

819 (r'((?:[a-z]\w*/)*)([A-Z]\w*)',

820 bygroups(Name.Namespace, tokenType)),

821 (r'((?:[a-z]\w*/)*)([a-z]\w+)',

822 bygroups(Name.Namespace, tokenType)),

823

824 # type keyword operators

825 (r'::|->|[.:|]', tokenType),

826

827 # catchall

828 default('#pop')

829 ],

830

831 # comments and literals

832 'whitespace': [

833 (r'(\n\s*)(#.*)$', bygroups(Whitespace, Comment.Preproc)),

834 (r'\s+', Whitespace),

835 (r'/\*', Comment.Multiline, 'comment'),

836 (r'//.*$', Comment.Single)

837 ],

838 'comment': [

839 (r'[^/*]+', Comment.Multiline),

840 (r'/\*', Comment.Multiline, '#push'),

841 (r'\*/', Comment.Multiline, '#pop'),

842 (r'[*/]', Comment.Multiline),

843 ],

844 'litstring': [

845 (r'[^"]+', String.Double),

846 (r'""', String.Escape),

847 (r'"', String.Double, '#pop'),

848 ],

849 'string': [

850 (r'[^\\"\n]+', String.Double),

851 include('escape-sequence'),

852 (r'["\n]', String.Double, '#pop'),

853 ],

854 'char': [

855 (r'[^\\\'\n]+', String.Char),

856 include('escape-sequence'),

857 (r'[\'\n]', String.Char, '#pop'),

858 ],

859 'escape-sequence': [

860 (r'\\[nrt\\"\']', String.Escape),

861 (r'\\x[0-9a-fA-F]{2}', String.Escape),

862 (r'\\u[0-9a-fA-F]{4}', String.Escape),

863 # Yes, \U literals are 6 hex digits.

864 (r'\\U[0-9a-fA-F]{6}', String.Escape)

865 ]

866 }