Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pygments/lexers/haskell.py: 88%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

156 statements  

1""" 

2 pygments.lexers.haskell 

3 ~~~~~~~~~~~~~~~~~~~~~~~ 

4 

5 Lexers for Haskell and related languages. 

6 

7 :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS. 

8 :license: BSD, see LICENSE for details. 

9""" 

10 

11import re 

12 

13from pygments.lexer import Lexer, RegexLexer, bygroups, do_insertions, \ 

14 default, include, inherit, line_re 

15from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ 

16 Number, Punctuation, Generic, Whitespace 

17from pygments import unistring as uni 

18 

19__all__ = ['HaskellLexer', 'HspecLexer', 'IdrisLexer', 'AgdaLexer', 'CryptolLexer', 

20 'LiterateHaskellLexer', 'LiterateIdrisLexer', 'LiterateAgdaLexer', 

21 'LiterateCryptolLexer', 'KokaLexer'] 

22 

23 

24class HaskellLexer(RegexLexer): 

25 """ 

26 A Haskell lexer based on the lexemes defined in the Haskell 98 Report. 

27 """ 

28 name = 'Haskell' 

29 url = 'https://www.haskell.org/' 

30 aliases = ['haskell', 'hs'] 

31 filenames = ['*.hs'] 

32 mimetypes = ['text/x-haskell'] 

33 version_added = '0.8' 

34 

35 reserved = ('case', 'class', 'data', 'default', 'deriving', 'do', 'else', 

36 'family', 'if', 'in', 'infix[lr]?', 'instance', 

37 'let', 'newtype', 'of', 'then', 'type', 'where', '_') 

38 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK', 

39 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE', 

40 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN', 

41 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL') 

42 

43 tokens = { 

44 'root': [ 

45 # Whitespace: 

46 (r'\s+', Whitespace), 

47 # (r'--\s*|.*$', Comment.Doc), 

48 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single), 

49 (r'\{-', Comment.Multiline, 'comment'), 

50 # Lexemes: 

51 # Identifiers 

52 (r'\bimport\b', Keyword.Reserved, 'import'), 

53 (r'\bmodule\b', Keyword.Reserved, 'module'), 

54 (r'\berror\b', Name.Exception), 

55 (r'\b({})(?!\')\b'.format('|'.join(reserved)), Keyword.Reserved), 

56 (r"'[^\\]'", String.Char), # this has to come before the TH quote 

57 (r'^[_' + uni.Ll + r'][\w\']*', Name.Function), 

58 (r"'?[_" + uni.Ll + r"][\w']*", Name), 

59 (r"('')?[" + uni.Lu + r"][\w\']*", Keyword.Type), 

60 (r"(')[" + uni.Lu + r"][\w\']*", Keyword.Type), 

61 (r"(')\[[^\]]*\]", Keyword.Type), # tuples and lists get special treatment in GHC 

62 (r"(')\([^)]*\)", Keyword.Type), # .. 

63 (r"(')[:!#$%&*+.\\/<=>?@^|~-]+", Keyword.Type), # promoted type operators 

64 # Operators 

65 (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator 

66 (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials 

67 (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators 

68 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators 

69 # Numbers 

70 (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*_*[pP][+-]?\d(_*\d)*', Number.Float), 

71 (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*\.[\da-fA-F](_*[\da-fA-F])*' 

72 r'(_*[pP][+-]?\d(_*\d)*)?', Number.Float), 

73 (r'\d(_*\d)*_*[eE][+-]?\d(_*\d)*', Number.Float), 

74 (r'\d(_*\d)*\.\d(_*\d)*(_*[eE][+-]?\d(_*\d)*)?', Number.Float), 

75 (r'0[bB]_*[01](_*[01])*', Number.Bin), 

76 (r'0[oO]_*[0-7](_*[0-7])*', Number.Oct), 

77 (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*', Number.Hex), 

78 (r'\d(_*\d)*', Number.Integer), 

79 # Character/String Literals 

80 (r"'", String.Char, 'character'), 

81 (r'"', String, 'string'), 

82 # Special 

83 (r'\[\]', Keyword.Type), 

84 (r'\(\)', Name.Builtin), 

85 (r'[][(),;`{}]', Punctuation), 

86 ], 

87 'import': [ 

88 # Import statements 

89 (r'\s+', Whitespace), 

90 (r'"', String, 'string'), 

91 # after "funclist" state 

92 (r'\)', Punctuation, '#pop'), 

93 (r'qualified\b', Keyword), 

94 # import X as Y 

95 (r'([' + uni.Lu + r'][\w.]*)(\s+)(as)(\s+)([' + uni.Lu + r'][\w.]*)', 

96 bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Name), '#pop'), 

97 # import X hiding (functions) 

98 (r'([' + uni.Lu + r'][\w.]*)(\s+)(hiding)(\s+)(\()', 

99 bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Punctuation), 'funclist'), 

100 # import X (functions) 

101 (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()', 

102 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'), 

103 # import X 

104 (r'[\w.]+', Name.Namespace, '#pop'), 

105 ], 

106 'module': [ 

107 (r'\s+', Whitespace), 

108 (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()', 

109 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'), 

110 (r'[' + uni.Lu + r'][\w.]*', Name.Namespace, '#pop'), 

111 ], 

112 'funclist': [ 

113 (r'\s+', Whitespace), 

114 (r'[' + uni.Lu + r']\w*', Keyword.Type), 

115 (r'(_[\w\']+|[' + uni.Ll + r'][\w\']*)', Name.Function), 

116 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single), 

117 (r'\{-', Comment.Multiline, 'comment'), 

118 (r',', Punctuation), 

119 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), 

120 # (HACK, but it makes sense to push two instances, believe me) 

121 (r'\(', Punctuation, ('funclist', 'funclist')), 

122 (r'\)', Punctuation, '#pop:2'), 

123 ], 

124 # NOTE: the next four states are shared in the AgdaLexer; make sure 

125 # any change is compatible with Agda as well or copy over and change 

126 'comment': [ 

127 # Multiline Comments 

128 (r'[^-{}]+', Comment.Multiline), 

129 (r'\{-', Comment.Multiline, '#push'), 

130 (r'-\}', Comment.Multiline, '#pop'), 

131 (r'[-{}]', Comment.Multiline), 

132 ], 

133 'character': [ 

134 # Allows multi-chars, incorrectly. 

135 (r"[^\\']'", String.Char, '#pop'), 

136 (r"\\", String.Escape, 'escape'), 

137 ("'", String.Char, '#pop'), 

138 ], 

139 'string': [ 

140 (r'[^\\"]+', String), 

141 (r"\\", String.Escape, 'escape'), 

142 ('"', String, '#pop'), 

143 ], 

144 'escape': [ 

145 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'), 

146 (r'\^[][' + uni.Lu + r'@^_]', String.Escape, '#pop'), 

147 ('|'.join(ascii), String.Escape, '#pop'), 

148 (r'o[0-7]+', String.Escape, '#pop'), 

149 (r'x[\da-fA-F]+', String.Escape, '#pop'), 

150 (r'\d+', String.Escape, '#pop'), 

151 (r'(\s+)(\\)', bygroups(Whitespace, String.Escape), '#pop'), 

152 ], 

153 } 

154 

155 

156class HspecLexer(HaskellLexer): 

157 """ 

158 A Haskell lexer with support for Hspec constructs. 

159 """ 

160 

161 name = 'Hspec' 

162 aliases = ['hspec'] 

163 filenames = ['*Spec.hs'] 

164 mimetypes = [] 

165 version_added = '2.4' 

166 

167 tokens = { 

168 'root': [ 

169 (r'(it)(\s*)("[^"]*")', bygroups(Text, Whitespace, String.Doc)), 

170 (r'(describe)(\s*)("[^"]*")', bygroups(Text, Whitespace, String.Doc)), 

171 (r'(context)(\s*)("[^"]*")', bygroups(Text, Whitespace, String.Doc)), 

172 inherit, 

173 ], 

174 } 

175 

176 

177class IdrisLexer(RegexLexer): 

178 """ 

179 A lexer for the dependently typed programming language Idris. 

180 

181 Based on the Haskell and Agda Lexer. 

182 """ 

183 name = 'Idris' 

184 url = 'https://www.idris-lang.org/' 

185 aliases = ['idris', 'idr'] 

186 filenames = ['*.idr'] 

187 mimetypes = ['text/x-idris'] 

188 version_added = '2.0' 

189 

190 reserved = ('case', 'class', 'data', 'default', 'using', 'do', 'else', 

191 'if', 'in', 'infix[lr]?', 'instance', 'rewrite', 'auto', 

192 'namespace', 'codata', 'mutual', 'private', 'public', 'abstract', 

193 'total', 'partial', 

194 'interface', 'implementation', 'export', 'covering', 'constructor', 

195 'let', 'proof', 'of', 'then', 'static', 'where', '_', 'with', 

196 'pattern', 'term', 'syntax', 'prefix', 

197 'postulate', 'parameters', 'record', 'dsl', 'impossible', 'implicit', 

198 'tactics', 'intros', 'intro', 'compute', 'refine', 'exact', 'trivial') 

199 

200 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK', 

201 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE', 

202 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN', 

203 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL') 

204 

205 directives = ('lib', 'link', 'flag', 'include', 'hide', 'freeze', 'access', 

206 'default', 'logging', 'dynamic', 'name', 'error_handlers', 'language') 

207 

208 tokens = { 

209 'root': [ 

210 # Comments 

211 (r'^(\s*)(%({}))'.format('|'.join(directives)), 

212 bygroups(Whitespace, Keyword.Reserved)), 

213 (r'(\s*)(--(?![!#$%&*+./<=>?@^|_~:\\]).*?)$', bygroups(Whitespace, Comment.Single)), 

214 (r'(\s*)(\|{3}.*?)$', bygroups(Whitespace, Comment.Single)), 

215 (r'(\s*)(\{-)', bygroups(Whitespace, Comment.Multiline), 'comment'), 

216 # Declaration 

217 (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)', 

218 bygroups(Whitespace, Name.Function, Whitespace, Operator.Word, Whitespace)), 

219 # Identifiers 

220 (r'\b({})(?!\')\b'.format('|'.join(reserved)), Keyword.Reserved), 

221 (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Whitespace), 'module'), 

222 (r"('')?[A-Z][\w\']*", Keyword.Type), 

223 (r'[a-z][\w\']*', Text), 

224 # Special Symbols 

225 (r'(<-|::|->|=>|=)', Operator.Word), # specials 

226 (r'([(){}\[\]:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials 

227 # Numbers 

228 (r'\d+[eE][+-]?\d+', Number.Float), 

229 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float), 

230 (r'0[xX][\da-fA-F]+', Number.Hex), 

231 (r'\d+', Number.Integer), 

232 # Strings 

233 (r"'", String.Char, 'character'), 

234 (r'"', String, 'string'), 

235 (r'[^\s(){}]+', Text), 

236 (r'\s+?', Whitespace), # Whitespace 

237 ], 

238 'module': [ 

239 (r'\s+', Whitespace), 

240 (r'([A-Z][\w.]*)(\s+)(\()', 

241 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'), 

242 (r'[A-Z][\w.]*', Name.Namespace, '#pop'), 

243 ], 

244 'funclist': [ 

245 (r'\s+', Whitespace), 

246 (r'[A-Z]\w*', Keyword.Type), 

247 (r'(_[\w\']+|[a-z][\w\']*)', Name.Function), 

248 (r'--.*$', Comment.Single), 

249 (r'\{-', Comment.Multiline, 'comment'), 

250 (r',', Punctuation), 

251 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), 

252 # (HACK, but it makes sense to push two instances, believe me) 

253 (r'\(', Punctuation, ('funclist', 'funclist')), 

254 (r'\)', Punctuation, '#pop:2'), 

255 ], 

256 # NOTE: the next four states are shared in the AgdaLexer; make sure 

257 # any change is compatible with Agda as well or copy over and change 

258 'comment': [ 

259 # Multiline Comments 

260 (r'[^-{}]+', Comment.Multiline), 

261 (r'\{-', Comment.Multiline, '#push'), 

262 (r'-\}', Comment.Multiline, '#pop'), 

263 (r'[-{}]', Comment.Multiline), 

264 ], 

265 'character': [ 

266 # Allows multi-chars, incorrectly. 

267 (r"[^\\']", String.Char), 

268 (r"\\", String.Escape, 'escape'), 

269 ("'", String.Char, '#pop'), 

270 ], 

271 'string': [ 

272 (r'[^\\"]+', String), 

273 (r"\\", String.Escape, 'escape'), 

274 ('"', String, '#pop'), 

275 ], 

276 'escape': [ 

277 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'), 

278 (r'\^[][A-Z@^_]', String.Escape, '#pop'), 

279 ('|'.join(ascii), String.Escape, '#pop'), 

280 (r'o[0-7]+', String.Escape, '#pop'), 

281 (r'x[\da-fA-F]+', String.Escape, '#pop'), 

282 (r'\d+', String.Escape, '#pop'), 

283 (r'(\s+)(\\)', bygroups(Whitespace, String.Escape), '#pop') 

284 ], 

285 } 

286 

287 

288class AgdaLexer(RegexLexer): 

289 """ 

290 For the Agda dependently typed functional programming language and 

291 proof assistant. 

292 """ 

293 

294 name = 'Agda' 

295 url = 'http://wiki.portal.chalmers.se/agda/pmwiki.php' 

296 aliases = ['agda'] 

297 filenames = ['*.agda'] 

298 mimetypes = ['text/x-agda'] 

299 version_added = '2.0' 

300 

301 reserved = ( 

302 'abstract', 'codata', 'coinductive', 'constructor', 'data', 'do', 

303 'eta-equality', 'field', 'forall', 'hiding', 'in', 'inductive', 'infix', 

304 'infixl', 'infixr', 'instance', 'interleaved', 'let', 'macro', 'mutual', 

305 'no-eta-equality', 'opaque', 'open', 'overlap', 'pattern', 'postulate', 'primitive', 

306 'private', 'quote', 'quoteTerm', 'record', 'renaming', 'rewrite', 

307 'syntax', 'tactic', 'unfolding', 'unquote', 'unquoteDecl', 'unquoteDef', 'using', 

308 'variable', 'where', 'with', 

309 ) 

310 

311 tokens = { 

312 'root': [ 

313 # Declaration 

314 (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)', 

315 bygroups(Whitespace, Name.Function, Whitespace, 

316 Operator.Word, Whitespace)), 

317 # Comments 

318 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single), 

319 (r'\{-', Comment.Multiline, 'comment'), 

320 # Holes 

321 (r'\{!', Comment.Directive, 'hole'), 

322 # Lexemes: 

323 # Identifiers 

324 (r'\b({})(?!\')\b'.format('|'.join(reserved)), Keyword.Reserved), 

325 (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Whitespace), 

326 'module'), 

327 (r'\b(Set|Prop)[\u2080-\u2089]*\b', Keyword.Type), 

328 # Special Symbols 

329 (r'(\(|\)|\{|\})', Operator), 

330 (r'(\.{1,3}|\||\u03BB|\u2200|\u2192|:|=|->)', Operator.Word), 

331 # Numbers 

332 (r'\d+[eE][+-]?\d+', Number.Float), 

333 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float), 

334 (r'0[xX][\da-fA-F]+', Number.Hex), 

335 (r'\d+', Number.Integer), 

336 # Strings 

337 (r"'", String.Char, 'character'), 

338 (r'"', String, 'string'), 

339 (r'[^\s(){}]+', Text), 

340 (r'\s+?', Whitespace), # Whitespace 

341 ], 

342 'hole': [ 

343 # Holes 

344 (r'[^!{}]+', Comment.Directive), 

345 (r'\{!', Comment.Directive, '#push'), 

346 (r'!\}', Comment.Directive, '#pop'), 

347 (r'[!{}]', Comment.Directive), 

348 ], 

349 'module': [ 

350 (r'\{-', Comment.Multiline, 'comment'), 

351 (r'[a-zA-Z][\w.\']*', Name, '#pop'), 

352 (r'[\W0-9_]+', Text) 

353 ], 

354 'comment': HaskellLexer.tokens['comment'], 

355 'character': HaskellLexer.tokens['character'], 

356 'string': HaskellLexer.tokens['string'], 

357 'escape': HaskellLexer.tokens['escape'] 

358 } 

359 

360 

361class CryptolLexer(RegexLexer): 

362 """ 

363 FIXME: A Cryptol2 lexer based on the lexemes defined in the Haskell 98 Report. 

364 """ 

365 name = 'Cryptol' 

366 aliases = ['cryptol', 'cry'] 

367 filenames = ['*.cry'] 

368 mimetypes = ['text/x-cryptol'] 

369 url = 'https://www.cryptol.net' 

370 version_added = '2.0' 

371 

372 reserved = ('Arith', 'Bit', 'Cmp', 'False', 'Inf', 'True', 'else', 

373 'export', 'extern', 'fin', 'if', 'import', 'inf', 'lg2', 

374 'max', 'min', 'module', 'newtype', 'pragma', 'property', 

375 'then', 'type', 'where', 'width') 

376 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK', 

377 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE', 

378 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN', 

379 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL') 

380 

381 tokens = { 

382 'root': [ 

383 # Whitespace: 

384 (r'\s+', Whitespace), 

385 # (r'--\s*|.*$', Comment.Doc), 

386 (r'//.*$', Comment.Single), 

387 (r'/\*', Comment.Multiline, 'comment'), 

388 # Lexemes: 

389 # Identifiers 

390 (r'\bimport\b', Keyword.Reserved, 'import'), 

391 (r'\bmodule\b', Keyword.Reserved, 'module'), 

392 (r'\berror\b', Name.Exception), 

393 (r'\b({})(?!\')\b'.format('|'.join(reserved)), Keyword.Reserved), 

394 (r'^[_a-z][\w\']*', Name.Function), 

395 (r"'?[_a-z][\w']*", Name), 

396 (r"('')?[A-Z][\w\']*", Keyword.Type), 

397 # Operators 

398 (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator 

399 (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials 

400 (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators 

401 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators 

402 # Numbers 

403 (r'\d+[eE][+-]?\d+', Number.Float), 

404 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float), 

405 (r'0[oO][0-7]+', Number.Oct), 

406 (r'0[xX][\da-fA-F]+', Number.Hex), 

407 (r'\d+', Number.Integer), 

408 # Character/String Literals 

409 (r"'", String.Char, 'character'), 

410 (r'"', String, 'string'), 

411 # Special 

412 (r'\[\]', Keyword.Type), 

413 (r'\(\)', Name.Builtin), 

414 (r'[][(),;`{}]', Punctuation), 

415 ], 

416 'import': [ 

417 # Import statements 

418 (r'\s+', Whitespace), 

419 (r'"', String, 'string'), 

420 # after "funclist" state 

421 (r'\)', Punctuation, '#pop'), 

422 (r'qualified\b', Keyword), 

423 # import X as Y 

424 (r'([A-Z][\w.]*)(\s+)(as)(\s+)([A-Z][\w.]*)', 

425 bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Name), '#pop'), 

426 # import X hiding (functions) 

427 (r'([A-Z][\w.]*)(\s+)(hiding)(\s+)(\()', 

428 bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Punctuation), 'funclist'), 

429 # import X (functions) 

430 (r'([A-Z][\w.]*)(\s+)(\()', 

431 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'), 

432 # import X 

433 (r'[\w.]+', Name.Namespace, '#pop'), 

434 ], 

435 'module': [ 

436 (r'\s+', Whitespace), 

437 (r'([A-Z][\w.]*)(\s+)(\()', 

438 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'), 

439 (r'[A-Z][\w.]*', Name.Namespace, '#pop'), 

440 ], 

441 'funclist': [ 

442 (r'\s+', Whitespace), 

443 (r'[A-Z]\w*', Keyword.Type), 

444 (r'(_[\w\']+|[a-z][\w\']*)', Name.Function), 

445 # TODO: these don't match the comments in docs, remove. 

446 # (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single), 

447 # (r'{-', Comment.Multiline, 'comment'), 

448 (r',', Punctuation), 

449 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), 

450 # (HACK, but it makes sense to push two instances, believe me) 

451 (r'\(', Punctuation, ('funclist', 'funclist')), 

452 (r'\)', Punctuation, '#pop:2'), 

453 ], 

454 'comment': [ 

455 # Multiline Comments 

456 (r'[^/*]+', Comment.Multiline), 

457 (r'/\*', Comment.Multiline, '#push'), 

458 (r'\*/', Comment.Multiline, '#pop'), 

459 (r'[*/]', Comment.Multiline), 

460 ], 

461 'character': [ 

462 # Allows multi-chars, incorrectly. 

463 (r"[^\\']'", String.Char, '#pop'), 

464 (r"\\", String.Escape, 'escape'), 

465 ("'", String.Char, '#pop'), 

466 ], 

467 'string': [ 

468 (r'[^\\"]+', String), 

469 (r"\\", String.Escape, 'escape'), 

470 ('"', String, '#pop'), 

471 ], 

472 'escape': [ 

473 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'), 

474 (r'\^[][A-Z@^_]', String.Escape, '#pop'), 

475 ('|'.join(ascii), String.Escape, '#pop'), 

476 (r'o[0-7]+', String.Escape, '#pop'), 

477 (r'x[\da-fA-F]+', String.Escape, '#pop'), 

478 (r'\d+', String.Escape, '#pop'), 

479 (r'(\s+)(\\)', bygroups(Whitespace, String.Escape), '#pop'), 

480 ], 

481 } 

482 

483 EXTRA_KEYWORDS = {'join', 'split', 'reverse', 'transpose', 'width', 

484 'length', 'tail', '<<', '>>', '<<<', '>>>', 'const', 

485 'reg', 'par', 'seq', 'ASSERT', 'undefined', 'error', 

486 'trace'} 

487 

488 def get_tokens_unprocessed(self, text): 

489 stack = ['root'] 

490 for index, token, value in \ 

491 RegexLexer.get_tokens_unprocessed(self, text, stack): 

492 if token is Name and value in self.EXTRA_KEYWORDS: 

493 yield index, Name.Builtin, value 

494 else: 

495 yield index, token, value 

496 

497 

498class LiterateLexer(Lexer): 

499 """ 

500 Base class for lexers of literate file formats based on LaTeX or Bird-style 

501 (prefixing each code line with ">"). 

502 

503 Additional options accepted: 

504 

505 `litstyle` 

506 If given, must be ``"bird"`` or ``"latex"``. If not given, the style 

507 is autodetected: if the first non-whitespace character in the source 

508 is a backslash or percent character, LaTeX is assumed, else Bird. 

509 """ 

510 

511 bird_re = re.compile(r'(>[ \t]*)(.*\n)') 

512 

513 def __init__(self, baselexer, **options): 

514 self.baselexer = baselexer 

515 Lexer.__init__(self, **options) 

516 

517 def get_tokens_unprocessed(self, text): 

518 style = self.options.get('litstyle') 

519 if style is None: 

520 style = (text.lstrip()[0:1] in '%\\') and 'latex' or 'bird' 

521 

522 code = '' 

523 insertions = [] 

524 if style == 'bird': 

525 # bird-style 

526 for match in line_re.finditer(text): 

527 line = match.group() 

528 m = self.bird_re.match(line) 

529 if m: 

530 insertions.append((len(code), 

531 [(0, Comment.Special, m.group(1))])) 

532 code += m.group(2) 

533 else: 

534 insertions.append((len(code), [(0, Text, line)])) 

535 else: 

536 # latex-style 

537 from pygments.lexers.markup import TexLexer 

538 lxlexer = TexLexer(**self.options) 

539 codelines = 0 

540 latex = '' 

541 for match in line_re.finditer(text): 

542 line = match.group() 

543 if codelines: 

544 if line.lstrip().startswith('\\end{code}'): 

545 codelines = 0 

546 latex += line 

547 else: 

548 code += line 

549 elif line.lstrip().startswith('\\begin{code}'): 

550 codelines = 1 

551 latex += line 

552 insertions.append((len(code), 

553 list(lxlexer.get_tokens_unprocessed(latex)))) 

554 latex = '' 

555 else: 

556 latex += line 

557 insertions.append((len(code), 

558 list(lxlexer.get_tokens_unprocessed(latex)))) 

559 yield from do_insertions(insertions, self.baselexer.get_tokens_unprocessed(code)) 

560 

561 

562class LiterateHaskellLexer(LiterateLexer): 

563 """ 

564 For Literate Haskell (Bird-style or LaTeX) source. 

565 

566 Additional options accepted: 

567 

568 `litstyle` 

569 If given, must be ``"bird"`` or ``"latex"``. If not given, the style 

570 is autodetected: if the first non-whitespace character in the source 

571 is a backslash or percent character, LaTeX is assumed, else Bird. 

572 """ 

573 name = 'Literate Haskell' 

574 aliases = ['literate-haskell', 'lhaskell', 'lhs'] 

575 filenames = ['*.lhs'] 

576 mimetypes = ['text/x-literate-haskell'] 

577 url = 'https://wiki.haskell.org/Literate_programming' 

578 version_added = '0.9' 

579 

580 def __init__(self, **options): 

581 hslexer = HaskellLexer(**options) 

582 LiterateLexer.__init__(self, hslexer, **options) 

583 

584 

585class LiterateIdrisLexer(LiterateLexer): 

586 """ 

587 For Literate Idris (Bird-style or LaTeX) source. 

588 

589 Additional options accepted: 

590 

591 `litstyle` 

592 If given, must be ``"bird"`` or ``"latex"``. If not given, the style 

593 is autodetected: if the first non-whitespace character in the source 

594 is a backslash or percent character, LaTeX is assumed, else Bird. 

595 """ 

596 name = 'Literate Idris' 

597 aliases = ['literate-idris', 'lidris', 'lidr'] 

598 filenames = ['*.lidr'] 

599 mimetypes = ['text/x-literate-idris'] 

600 url = 'https://idris2.readthedocs.io/en/latest/reference/literate.html' 

601 version_added = '2.0' 

602 

603 def __init__(self, **options): 

604 hslexer = IdrisLexer(**options) 

605 LiterateLexer.__init__(self, hslexer, **options) 

606 

607 

608class LiterateAgdaLexer(LiterateLexer): 

609 """ 

610 For Literate Agda source. 

611 

612 Additional options accepted: 

613 

614 `litstyle` 

615 If given, must be ``"bird"`` or ``"latex"``. If not given, the style 

616 is autodetected: if the first non-whitespace character in the source 

617 is a backslash or percent character, LaTeX is assumed, else Bird. 

618 """ 

619 name = 'Literate Agda' 

620 aliases = ['literate-agda', 'lagda'] 

621 filenames = ['*.lagda'] 

622 mimetypes = ['text/x-literate-agda'] 

623 url = 'https://agda.readthedocs.io/en/latest/tools/literate-programming.html' 

624 version_added = '2.0' 

625 

626 def __init__(self, **options): 

627 agdalexer = AgdaLexer(**options) 

628 LiterateLexer.__init__(self, agdalexer, litstyle='latex', **options) 

629 

630 

631class LiterateCryptolLexer(LiterateLexer): 

632 """ 

633 For Literate Cryptol (Bird-style or LaTeX) source. 

634 

635 Additional options accepted: 

636 

637 `litstyle` 

638 If given, must be ``"bird"`` or ``"latex"``. If not given, the style 

639 is autodetected: if the first non-whitespace character in the source 

640 is a backslash or percent character, LaTeX is assumed, else Bird. 

641 """ 

642 name = 'Literate Cryptol' 

643 aliases = ['literate-cryptol', 'lcryptol', 'lcry'] 

644 filenames = ['*.lcry'] 

645 mimetypes = ['text/x-literate-cryptol'] 

646 url = 'https://www.cryptol.net' 

647 version_added = '2.0' 

648 

649 def __init__(self, **options): 

650 crylexer = CryptolLexer(**options) 

651 LiterateLexer.__init__(self, crylexer, **options) 

652 

653 

654class KokaLexer(RegexLexer): 

655 """ 

656 Lexer for the Koka language. 

657 """ 

658 

659 name = 'Koka' 

660 url = 'https://koka-lang.github.io/koka/doc/index.html' 

661 aliases = ['koka'] 

662 filenames = ['*.kk', '*.kki'] 

663 mimetypes = ['text/x-koka'] 

664 version_added = '1.6' 

665 

666 keywords = [ 

667 'infix', 'infixr', 'infixl', 

668 'type', 'cotype', 'rectype', 'alias', 

669 'struct', 'con', 

670 'fun', 'function', 'val', 'var', 

671 'external', 

672 'if', 'then', 'else', 'elif', 'return', 'match', 

673 'private', 'public', 'private', 

674 'module', 'import', 'as', 

675 'include', 'inline', 

676 'rec', 

677 'try', 'yield', 'enum', 

678 'interface', 'instance', 

679 ] 

680 

681 # keywords that are followed by a type 

682 typeStartKeywords = [ 

683 'type', 'cotype', 'rectype', 'alias', 'struct', 'enum', 

684 ] 

685 

686 # keywords valid in a type 

687 typekeywords = [ 

688 'forall', 'exists', 'some', 'with', 

689 ] 

690 

691 # builtin names and special names 

692 builtin = [ 

693 'for', 'while', 'repeat', 

694 'foreach', 'foreach-indexed', 

695 'error', 'catch', 'finally', 

696 'cs', 'js', 'file', 'ref', 'assigned', 

697 ] 

698 

699 # symbols that can be in an operator 

700 symbols = r'[$%&*+@!/\\^~=.:\-?|<>]+' 

701 

702 # symbol boundary: an operator keyword should not be followed by any of these 

703 sboundary = '(?!' + symbols + ')' 

704 

705 # name boundary: a keyword should not be followed by any of these 

706 boundary = r'(?![\w/])' 

707 

708 # koka token abstractions 

709 tokenType = Name.Attribute 

710 tokenTypeDef = Name.Class 

711 tokenConstructor = Generic.Emph 

712 

713 # main lexer 

714 tokens = { 

715 'root': [ 

716 include('whitespace'), 

717 

718 # go into type mode 

719 (r'::?' + sboundary, tokenType, 'type'), 

720 (r'(alias)(\s+)([a-z]\w*)?', bygroups(Keyword, Whitespace, tokenTypeDef), 

721 'alias-type'), 

722 (r'(struct)(\s+)([a-z]\w*)?', bygroups(Keyword, Whitespace, tokenTypeDef), 

723 'struct-type'), 

724 ((r'({})'.format('|'.join(typeStartKeywords))) + 

725 r'(\s+)([a-z]\w*)?', bygroups(Keyword, Whitespace, tokenTypeDef), 

726 'type'), 

727 

728 # special sequences of tokens (we use ?: for non-capturing group as 

729 # required by 'bygroups') 

730 (r'(module)(\s+)(interface(?=\s))?(\s+)?((?:[a-z]\w*/)*[a-z]\w*)', 

731 bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Namespace)), 

732 (r'(import)(\s+)((?:[a-z]\w*/)*[a-z]\w*)' 

733 r'(?:(\s*)(=)(\s*)(qualified)?(\s*)' 

734 r'((?:[a-z]\w*/)*[a-z]\w*))?', 

735 bygroups(Keyword, Whitespace, Name.Namespace, Whitespace, Keyword, Whitespace, 

736 Keyword, Whitespace, Name.Namespace)), 

737 

738 (r'^(public|private)?(\s+)?(function|fun|val)' 

739 r'(\s+)([a-z]\w*|\((?:' + symbols + r'|/)\))', 

740 bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Function)), 

741 (r'^(?:(public|private)(?=\s+external))?((?<!^)\s+)?(external)(\s+)(inline(?=\s))?(\s+)?' 

742 r'([a-z]\w*|\((?:' + symbols + r'|/)\))', 

743 bygroups(Keyword, Whitespace, Keyword, Whitespace, Keyword, Whitespace, Name.Function)), 

744 

745 # keywords 

746 (r'({})'.format('|'.join(typekeywords)) + boundary, Keyword.Type), 

747 (r'({})'.format('|'.join(keywords)) + boundary, Keyword), 

748 (r'({})'.format('|'.join(builtin)) + boundary, Keyword.Pseudo), 

749 (r'::?|:=|\->|[=.]' + sboundary, Keyword), 

750 

751 # names 

752 (r'((?:[a-z]\w*/)*)([A-Z]\w*)', 

753 bygroups(Name.Namespace, tokenConstructor)), 

754 (r'((?:[a-z]\w*/)*)([a-z]\w*)', bygroups(Name.Namespace, Name)), 

755 (r'((?:[a-z]\w*/)*)(\((?:' + symbols + r'|/)\))', 

756 bygroups(Name.Namespace, Name)), 

757 (r'_\w*', Name.Variable), 

758 

759 # literal string 

760 (r'@"', String.Double, 'litstring'), 

761 

762 # operators 

763 (symbols + "|/(?![*/])", Operator), 

764 (r'`', Operator), 

765 (r'[{}()\[\];,]', Punctuation), 

766 

767 # literals. No check for literal characters with len > 1 

768 (r'[0-9]+\.[0-9]+([eE][\-+]?[0-9]+)?', Number.Float), 

769 (r'0[xX][0-9a-fA-F]+', Number.Hex), 

770 (r'[0-9]+', Number.Integer), 

771 

772 (r"'", String.Char, 'char'), 

773 (r'"', String.Double, 'string'), 

774 ], 

775 

776 # type started by alias 

777 'alias-type': [ 

778 (r'=', Keyword), 

779 include('type') 

780 ], 

781 

782 # type started by struct 

783 'struct-type': [ 

784 (r'(?=\((?!,*\)))', Punctuation, '#pop'), 

785 include('type') 

786 ], 

787 

788 # type started by colon 

789 'type': [ 

790 (r'[(\[<]', tokenType, 'type-nested'), 

791 include('type-content') 

792 ], 

793 

794 # type nested in brackets: can contain parameters, comma etc. 

795 'type-nested': [ 

796 (r'[)\]>]', tokenType, '#pop'), 

797 (r'[(\[<]', tokenType, 'type-nested'), 

798 (r',', tokenType), 

799 (r'([a-z]\w*)(\s*)(:)(?!:)', 

800 bygroups(Name, Whitespace, tokenType)), # parameter name 

801 include('type-content') 

802 ], 

803 

804 # shared contents of a type 

805 'type-content': [ 

806 include('whitespace'), 

807 

808 # keywords 

809 (r'({})'.format('|'.join(typekeywords)) + boundary, Keyword), 

810 (r'(?=(({})'.format('|'.join(keywords)) + boundary + '))', 

811 Keyword, '#pop'), # need to match because names overlap... 

812 

813 # kinds 

814 (r'[EPHVX]' + boundary, tokenType), 

815 

816 # type names 

817 (r'[a-z][0-9]*(?![\w/])', tokenType), 

818 (r'_\w*', tokenType.Variable), # Generic.Emph 

819 (r'((?:[a-z]\w*/)*)([A-Z]\w*)', 

820 bygroups(Name.Namespace, tokenType)), 

821 (r'((?:[a-z]\w*/)*)([a-z]\w+)', 

822 bygroups(Name.Namespace, tokenType)), 

823 

824 # type keyword operators 

825 (r'::|->|[.:|]', tokenType), 

826 

827 # catchall 

828 default('#pop') 

829 ], 

830 

831 # comments and literals 

832 'whitespace': [ 

833 (r'(\n\s*)(#.*)$', bygroups(Whitespace, Comment.Preproc)), 

834 (r'\s+', Whitespace), 

835 (r'/\*', Comment.Multiline, 'comment'), 

836 (r'//.*$', Comment.Single) 

837 ], 

838 'comment': [ 

839 (r'[^/*]+', Comment.Multiline), 

840 (r'/\*', Comment.Multiline, '#push'), 

841 (r'\*/', Comment.Multiline, '#pop'), 

842 (r'[*/]', Comment.Multiline), 

843 ], 

844 'litstring': [ 

845 (r'[^"]+', String.Double), 

846 (r'""', String.Escape), 

847 (r'"', String.Double, '#pop'), 

848 ], 

849 'string': [ 

850 (r'[^\\"\n]+', String.Double), 

851 include('escape-sequence'), 

852 (r'["\n]', String.Double, '#pop'), 

853 ], 

854 'char': [ 

855 (r'[^\\\'\n]+', String.Char), 

856 include('escape-sequence'), 

857 (r'[\'\n]', String.Char, '#pop'), 

858 ], 

859 'escape-sequence': [ 

860 (r'\\[nrt\\"\']', String.Escape), 

861 (r'\\x[0-9a-fA-F]{2}', String.Escape), 

862 (r'\\u[0-9a-fA-F]{4}', String.Escape), 

863 # Yes, \U literals are 6 hex digits. 

864 (r'\\U[0-9a-fA-F]{6}', String.Escape) 

865 ] 

866 }