Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pygments/lexers/haskell.py: 70%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

156 statements  

1""" 

2 pygments.lexers.haskell 

3 ~~~~~~~~~~~~~~~~~~~~~~~ 

4 

5 Lexers for Haskell and related languages. 

6 

7 :copyright: Copyright 2006-present by the Pygments team, see AUTHORS. 

8 :license: BSD, see LICENSE for details. 

9""" 

10 

11import re 

12 

13from pygments.lexer import Lexer, RegexLexer, bygroups, do_insertions, \ 

14 default, include, inherit, line_re 

15from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ 

16 Number, Punctuation, Generic, Whitespace 

17from pygments import unistring as uni 

18 

19__all__ = ['HaskellLexer', 'HspecLexer', 'IdrisLexer', 'AgdaLexer', 'CryptolLexer', 

20 'LiterateHaskellLexer', 'LiterateIdrisLexer', 'LiterateAgdaLexer', 

21 'LiterateCryptolLexer', 'KokaLexer'] 

22 

23 

24class HaskellLexer(RegexLexer): 

25 """ 

26 A Haskell lexer based on the lexemes defined in the Haskell 98 Report. 

27 """ 

28 name = 'Haskell' 

29 url = 'https://www.haskell.org/' 

30 aliases = ['haskell', 'hs'] 

31 filenames = ['*.hs'] 

32 mimetypes = ['text/x-haskell'] 

33 version_added = '0.8' 

34 

35 reserved = ('case', 'class', 'data', 'default', 'deriving', 'do', 'else', 

36 'family', 'if', 'in', 'infix[lr]?', 'instance', 

37 'let', 'newtype', 'of', 'then', 'type', 'where', '_') 

38 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK', 

39 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE', 

40 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN', 

41 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL') 

42 

43 tokens = { 

44 'root': [ 

45 # Whitespace: 

46 (r'\s+', Whitespace), 

47 # (r'--\s*|.*$', Comment.Doc), 

48 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single), 

49 (r'\{-', Comment.Multiline, 'comment'), 

50 # Lexemes: 

51 # Identifiers 

52 (r'\bimport\b', Keyword.Reserved, 'import'), 

53 (r'\bmodule\b', Keyword.Reserved, 'module'), 

54 (r'\berror\b', Name.Exception), 

55 (r'\b({})(?!\')\b'.format('|'.join(reserved)), Keyword.Reserved), 

56 (r"'[^\\]'", String.Char), # character literal 

57 (r"'\\.'", String.Char), # escape character literal (e.g. '\n') 

58 (r'^[_' + uni.Ll + r'][\w\']*', Name.Function), 

59 (r"'?[_" + uni.Ll + r"][\w']*", Name), 

60 (r"('')?[" + uni.Lu + r"][\w\']*", Keyword.Type), 

61 (r"(')[" + uni.Lu + r"][\w\']*", Keyword.Type), 

62 (r"(')\[[^\]]*\]", Keyword.Type), # tuples and lists get special treatment in GHC 

63 (r"(')\([^)]*\)", Keyword.Type), # .. 

64 (r"(')[:!#$%&*+.\\/<=>?@^|~-]+", Keyword.Type), # promoted type operators 

65 # Operators 

66 (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator 

67 (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials 

68 (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators 

69 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators 

70 # Numbers 

71 (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*_*[pP][+-]?\d(_*\d)*', Number.Float), 

72 (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*\.[\da-fA-F](_*[\da-fA-F])*' 

73 r'(_*[pP][+-]?\d(_*\d)*)?', Number.Float), 

74 (r'\d(_*\d)*_*[eE][+-]?\d(_*\d)*', Number.Float), 

75 (r'\d(_*\d)*\.\d(_*\d)*(_*[eE][+-]?\d(_*\d)*)?', Number.Float), 

76 (r'0[bB]_*[01](_*[01])*', Number.Bin), 

77 (r'0[oO]_*[0-7](_*[0-7])*', Number.Oct), 

78 (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*', Number.Hex), 

79 (r'\d(_*\d)*', Number.Integer), 

80 # Character/String Literals 

81 (r"'", String.Char, 'character'), 

82 (r'"', String, 'string'), 

83 # Special 

84 (r'\[\]', Keyword.Type), 

85 (r'\(\)', Name.Builtin), 

86 (r'[][(),;`{}]', Punctuation), 

87 ], 

88 'import': [ 

89 # Import statements 

90 (r'\s+', Whitespace), 

91 (r'"', String, 'string'), 

92 # after "funclist" state 

93 (r'\)', Punctuation, '#pop'), 

94 (r'qualified\b', Keyword), 

95 # import X as Y 

96 (r'([' + uni.Lu + r'][\w.]*)(\s+)(as)(\s+)([' + uni.Lu + r'][\w.]*)', 

97 bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Name), '#pop'), 

98 # import X hiding (functions) 

99 (r'([' + uni.Lu + r'][\w.]*)(\s+)(hiding)(\s+)(\()', 

100 bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Punctuation), 'funclist'), 

101 # import X (functions) 

102 (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()', 

103 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'), 

104 # import X 

105 (r'[\w.]+', Name.Namespace, '#pop'), 

106 ], 

107 'module': [ 

108 (r'\s+', Whitespace), 

109 (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()', 

110 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'), 

111 (r'[' + uni.Lu + r'][\w.]*', Name.Namespace, '#pop'), 

112 ], 

113 'funclist': [ 

114 (r'\s+', Whitespace), 

115 (r'[' + uni.Lu + r']\w*', Keyword.Type), 

116 (r'(_[\w\']+|[' + uni.Ll + r'][\w\']*)', Name.Function), 

117 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single), 

118 (r'\{-', Comment.Multiline, 'comment'), 

119 (r',', Punctuation), 

120 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), 

121 # (HACK, but it makes sense to push two instances, believe me) 

122 (r'\(', Punctuation, ('funclist', 'funclist')), 

123 (r'\)', Punctuation, '#pop:2'), 

124 ], 

125 # NOTE: the next four states are shared in the AgdaLexer; make sure 

126 # any change is compatible with Agda as well or copy over and change 

127 'comment': [ 

128 # Multiline Comments 

129 (r'[^-{}]+', Comment.Multiline), 

130 (r'\{-', Comment.Multiline, '#push'), 

131 (r'-\}', Comment.Multiline, '#pop'), 

132 (r'[-{}]', Comment.Multiline), 

133 ], 

134 'character': [ 

135 # Allows multi-chars, incorrectly. 

136 (r"[^\\']'", String.Char, '#pop'), 

137 (r"\\", String.Escape, 'escape'), 

138 ("'", String.Char, '#pop'), 

139 ], 

140 'string': [ 

141 (r'[^\\"]+', String), 

142 (r"\\", String.Escape, 'escape'), 

143 ('"', String, '#pop'), 

144 ], 

145 'escape': [ 

146 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'), 

147 (r'\^[][' + uni.Lu + r'@^_]', String.Escape, '#pop'), 

148 ('|'.join(ascii), String.Escape, '#pop'), 

149 (r'o[0-7]+', String.Escape, '#pop'), 

150 (r'x[\da-fA-F]+', String.Escape, '#pop'), 

151 (r'\d+', String.Escape, '#pop'), 

152 (r'(\s+)(\\)', bygroups(Whitespace, String.Escape), '#pop'), 

153 ], 

154 } 

155 

156 

157class HspecLexer(HaskellLexer): 

158 """ 

159 A Haskell lexer with support for Hspec constructs. 

160 """ 

161 

162 name = 'Hspec' 

163 aliases = ['hspec'] 

164 filenames = ['*Spec.hs'] 

165 mimetypes = [] 

166 version_added = '2.4' 

167 

168 tokens = { 

169 'root': [ 

170 (r'(it)(\s*)("[^"]*")', bygroups(Text, Whitespace, String.Doc)), 

171 (r'(describe)(\s*)("[^"]*")', bygroups(Text, Whitespace, String.Doc)), 

172 (r'(context)(\s*)("[^"]*")', bygroups(Text, Whitespace, String.Doc)), 

173 inherit, 

174 ], 

175 } 

176 

177 

178class IdrisLexer(RegexLexer): 

179 """ 

180 A lexer for the dependently typed programming language Idris. 

181 

182 Based on the Haskell and Agda Lexer. 

183 """ 

184 name = 'Idris' 

185 url = 'https://www.idris-lang.org/' 

186 aliases = ['idris', 'idr'] 

187 filenames = ['*.idr'] 

188 mimetypes = ['text/x-idris'] 

189 version_added = '2.0' 

190 

191 reserved = ('case', 'class', 'data', 'default', 'using', 'do', 'else', 

192 'if', 'in', 'infix[lr]?', 'instance', 'rewrite', 'auto', 

193 'namespace', 'codata', 'mutual', 'private', 'public', 'abstract', 

194 'total', 'partial', 

195 'interface', 'implementation', 'export', 'covering', 'constructor', 

196 'let', 'proof', 'of', 'then', 'static', 'where', '_', 'with', 

197 'pattern', 'term', 'syntax', 'prefix', 

198 'postulate', 'parameters', 'record', 'dsl', 'impossible', 'implicit', 

199 'tactics', 'intros', 'intro', 'compute', 'refine', 'exact', 'trivial') 

200 

201 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK', 

202 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE', 

203 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN', 

204 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL') 

205 

206 directives = ('lib', 'link', 'flag', 'include', 'hide', 'freeze', 'access', 

207 'default', 'logging', 'dynamic', 'name', 'error_handlers', 'language') 

208 

209 tokens = { 

210 'root': [ 

211 # Comments 

212 (r'^(\s*)(%({}))'.format('|'.join(directives)), 

213 bygroups(Whitespace, Keyword.Reserved)), 

214 (r'(\s*)(--(?![!#$%&*+./<=>?@^|_~:\\]).*?)$', bygroups(Whitespace, Comment.Single)), 

215 (r'(\s*)(\|{3}.*?)$', bygroups(Whitespace, Comment.Single)), 

216 (r'(\s*)(\{-)', bygroups(Whitespace, Comment.Multiline), 'comment'), 

217 # Declaration 

218 (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)', 

219 bygroups(Whitespace, Name.Function, Whitespace, Operator.Word, Whitespace)), 

220 # Identifiers 

221 (r'\b({})(?!\')\b'.format('|'.join(reserved)), Keyword.Reserved), 

222 (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Whitespace), 'module'), 

223 (r"('')?[A-Z][\w\']*", Keyword.Type), 

224 (r'[a-z][\w\']*', Text), 

225 # Special Symbols 

226 (r'(<-|::|->|=>|=)', Operator.Word), # specials 

227 (r'([(){}\[\]:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials 

228 # Numbers 

229 (r'\d+[eE][+-]?\d+', Number.Float), 

230 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float), 

231 (r'0[xX][\da-fA-F]+', Number.Hex), 

232 (r'\d+', Number.Integer), 

233 # Strings 

234 (r"'", String.Char, 'character'), 

235 (r'"', String, 'string'), 

236 (r'[^\s(){}]+', Text), 

237 (r'\s+?', Whitespace), # Whitespace 

238 ], 

239 'module': [ 

240 (r'\s+', Whitespace), 

241 (r'([A-Z][\w.]*)(\s+)(\()', 

242 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'), 

243 (r'[A-Z][\w.]*', Name.Namespace, '#pop'), 

244 ], 

245 'funclist': [ 

246 (r'\s+', Whitespace), 

247 (r'[A-Z]\w*', Keyword.Type), 

248 (r'(_[\w\']+|[a-z][\w\']*)', Name.Function), 

249 (r'--.*$', Comment.Single), 

250 (r'\{-', Comment.Multiline, 'comment'), 

251 (r',', Punctuation), 

252 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), 

253 # (HACK, but it makes sense to push two instances, believe me) 

254 (r'\(', Punctuation, ('funclist', 'funclist')), 

255 (r'\)', Punctuation, '#pop:2'), 

256 ], 

257 # NOTE: the next four states are shared in the AgdaLexer; make sure 

258 # any change is compatible with Agda as well or copy over and change 

259 'comment': [ 

260 # Multiline Comments 

261 (r'[^-{}]+', Comment.Multiline), 

262 (r'\{-', Comment.Multiline, '#push'), 

263 (r'-\}', Comment.Multiline, '#pop'), 

264 (r'[-{}]', Comment.Multiline), 

265 ], 

266 'character': [ 

267 # Allows multi-chars, incorrectly. 

268 (r"[^\\']", String.Char), 

269 (r"\\", String.Escape, 'escape'), 

270 ("'", String.Char, '#pop'), 

271 ], 

272 'string': [ 

273 (r'[^\\"]+', String), 

274 (r"\\", String.Escape, 'escape'), 

275 ('"', String, '#pop'), 

276 ], 

277 'escape': [ 

278 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'), 

279 (r'\^[][A-Z@^_]', String.Escape, '#pop'), 

280 ('|'.join(ascii), String.Escape, '#pop'), 

281 (r'o[0-7]+', String.Escape, '#pop'), 

282 (r'x[\da-fA-F]+', String.Escape, '#pop'), 

283 (r'\d+', String.Escape, '#pop'), 

284 (r'(\s+)(\\)', bygroups(Whitespace, String.Escape), '#pop') 

285 ], 

286 } 

287 

288 

289class AgdaLexer(RegexLexer): 

290 """ 

291 For the Agda dependently typed functional programming language and 

292 proof assistant. 

293 """ 

294 

295 name = 'Agda' 

296 url = 'http://wiki.portal.chalmers.se/agda/pmwiki.php' 

297 aliases = ['agda'] 

298 filenames = ['*.agda'] 

299 mimetypes = ['text/x-agda'] 

300 version_added = '2.0' 

301 

302 reserved = ( 

303 'abstract', 'codata', 'coinductive', 'constructor', 'data', 'do', 

304 'eta-equality', 'field', 'forall', 'hiding', 'in', 'inductive', 'infix', 

305 'infixl', 'infixr', 'instance', 'interleaved', 'let', 'macro', 'mutual', 

306 'no-eta-equality', 'opaque', 'open', 'overlap', 'pattern', 'postulate', 'primitive', 

307 'private', 'quote', 'quoteTerm', 'record', 'renaming', 'rewrite', 

308 'syntax', 'tactic', 'unfolding', 'unquote', 'unquoteDecl', 'unquoteDef', 'using', 

309 'variable', 'where', 'with', 

310 ) 

311 

312 tokens = { 

313 'root': [ 

314 # Declaration 

315 (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)', 

316 bygroups(Whitespace, Name.Function, Whitespace, 

317 Operator.Word, Whitespace)), 

318 # Comments 

319 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single), 

320 (r'\{-', Comment.Multiline, 'comment'), 

321 # Holes 

322 (r'\{!', Comment.Directive, 'hole'), 

323 # Lexemes: 

324 # Identifiers 

325 (r'\b({})(?!\')\b'.format('|'.join(reserved)), Keyword.Reserved), 

326 (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Whitespace), 

327 'module'), 

328 (r'\b(Set|Prop)[\u2080-\u2089]*\b', Keyword.Type), 

329 # Special Symbols 

330 (r'(\(|\)|\{|\})', Operator), 

331 (r'(\.{1,3}|\||\u03BB|\u2200|\u2192|:|=|->)', Operator.Word), 

332 # Numbers 

333 (r'\d+[eE][+-]?\d+', Number.Float), 

334 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float), 

335 (r'0[xX][\da-fA-F]+', Number.Hex), 

336 (r'\d+', Number.Integer), 

337 # Strings 

338 (r"'", String.Char, 'character'), 

339 (r'"', String, 'string'), 

340 (r'[^\s(){}]+', Text), 

341 (r'\s+?', Whitespace), # Whitespace 

342 ], 

343 'hole': [ 

344 # Holes 

345 (r'[^!{}]+', Comment.Directive), 

346 (r'\{!', Comment.Directive, '#push'), 

347 (r'!\}', Comment.Directive, '#pop'), 

348 (r'[!{}]', Comment.Directive), 

349 ], 

350 'module': [ 

351 (r'\{-', Comment.Multiline, 'comment'), 

352 (r'[a-zA-Z][\w.\']*', Name, '#pop'), 

353 (r'[\W0-9_]+', Text) 

354 ], 

355 'comment': HaskellLexer.tokens['comment'], 

356 'character': HaskellLexer.tokens['character'], 

357 'string': HaskellLexer.tokens['string'], 

358 'escape': HaskellLexer.tokens['escape'] 

359 } 

360 

361 

362class CryptolLexer(RegexLexer): 

363 """ 

364 FIXME: A Cryptol2 lexer based on the lexemes defined in the Haskell 98 Report. 

365 """ 

366 name = 'Cryptol' 

367 aliases = ['cryptol', 'cry'] 

368 filenames = ['*.cry'] 

369 mimetypes = ['text/x-cryptol'] 

370 url = 'https://www.cryptol.net' 

371 version_added = '2.0' 

372 

373 reserved = ('Arith', 'Bit', 'Cmp', 'False', 'Inf', 'True', 'else', 

374 'export', 'extern', 'fin', 'if', 'import', 'inf', 'lg2', 

375 'max', 'min', 'module', 'newtype', 'pragma', 'property', 

376 'then', 'type', 'where', 'width') 

377 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK', 

378 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE', 

379 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN', 

380 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL') 

381 

382 tokens = { 

383 'root': [ 

384 # Whitespace: 

385 (r'\s+', Whitespace), 

386 # (r'--\s*|.*$', Comment.Doc), 

387 (r'//.*$', Comment.Single), 

388 (r'/\*', Comment.Multiline, 'comment'), 

389 # Lexemes: 

390 # Identifiers 

391 (r'\bimport\b', Keyword.Reserved, 'import'), 

392 (r'\bmodule\b', Keyword.Reserved, 'module'), 

393 (r'\berror\b', Name.Exception), 

394 (r'\b({})(?!\')\b'.format('|'.join(reserved)), Keyword.Reserved), 

395 (r'^[_a-z][\w\']*', Name.Function), 

396 (r"'?[_a-z][\w']*", Name), 

397 (r"('')?[A-Z][\w\']*", Keyword.Type), 

398 # Operators 

399 (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator 

400 (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials 

401 (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators 

402 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators 

403 # Numbers 

404 (r'\d+[eE][+-]?\d+', Number.Float), 

405 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float), 

406 (r'0[oO][0-7]+', Number.Oct), 

407 (r'0[xX][\da-fA-F]+', Number.Hex), 

408 (r'\d+', Number.Integer), 

409 # Character/String Literals 

410 (r"'", String.Char, 'character'), 

411 (r'"', String, 'string'), 

412 # Special 

413 (r'\[\]', Keyword.Type), 

414 (r'\(\)', Name.Builtin), 

415 (r'[][(),;`{}]', Punctuation), 

416 ], 

417 'import': [ 

418 # Import statements 

419 (r'\s+', Whitespace), 

420 (r'"', String, 'string'), 

421 # after "funclist" state 

422 (r'\)', Punctuation, '#pop'), 

423 (r'qualified\b', Keyword), 

424 # import X as Y 

425 (r'([A-Z][\w.]*)(\s+)(as)(\s+)([A-Z][\w.]*)', 

426 bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Name), '#pop'), 

427 # import X hiding (functions) 

428 (r'([A-Z][\w.]*)(\s+)(hiding)(\s+)(\()', 

429 bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Punctuation), 'funclist'), 

430 # import X (functions) 

431 (r'([A-Z][\w.]*)(\s+)(\()', 

432 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'), 

433 # import X 

434 (r'[\w.]+', Name.Namespace, '#pop'), 

435 ], 

436 'module': [ 

437 (r'\s+', Whitespace), 

438 (r'([A-Z][\w.]*)(\s+)(\()', 

439 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'), 

440 (r'[A-Z][\w.]*', Name.Namespace, '#pop'), 

441 ], 

442 'funclist': [ 

443 (r'\s+', Whitespace), 

444 (r'[A-Z]\w*', Keyword.Type), 

445 (r'(_[\w\']+|[a-z][\w\']*)', Name.Function), 

446 # TODO: these don't match the comments in docs, remove. 

447 # (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single), 

448 # (r'{-', Comment.Multiline, 'comment'), 

449 (r',', Punctuation), 

450 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), 

451 # (HACK, but it makes sense to push two instances, believe me) 

452 (r'\(', Punctuation, ('funclist', 'funclist')), 

453 (r'\)', Punctuation, '#pop:2'), 

454 ], 

455 'comment': [ 

456 # Multiline Comments 

457 (r'[^/*]+', Comment.Multiline), 

458 (r'/\*', Comment.Multiline, '#push'), 

459 (r'\*/', Comment.Multiline, '#pop'), 

460 (r'[*/]', Comment.Multiline), 

461 ], 

462 'character': [ 

463 # Allows multi-chars, incorrectly. 

464 (r"[^\\']'", String.Char, '#pop'), 

465 (r"\\", String.Escape, 'escape'), 

466 ("'", String.Char, '#pop'), 

467 ], 

468 'string': [ 

469 (r'[^\\"]+', String), 

470 (r"\\", String.Escape, 'escape'), 

471 ('"', String, '#pop'), 

472 ], 

473 'escape': [ 

474 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'), 

475 (r'\^[][A-Z@^_]', String.Escape, '#pop'), 

476 ('|'.join(ascii), String.Escape, '#pop'), 

477 (r'o[0-7]+', String.Escape, '#pop'), 

478 (r'x[\da-fA-F]+', String.Escape, '#pop'), 

479 (r'\d+', String.Escape, '#pop'), 

480 (r'(\s+)(\\)', bygroups(Whitespace, String.Escape), '#pop'), 

481 ], 

482 } 

483 

484 EXTRA_KEYWORDS = {'join', 'split', 'reverse', 'transpose', 'width', 

485 'length', 'tail', '<<', '>>', '<<<', '>>>', 'const', 

486 'reg', 'par', 'seq', 'ASSERT', 'undefined', 'error', 

487 'trace'} 

488 

489 def get_tokens_unprocessed(self, text): 

490 stack = ['root'] 

491 for index, token, value in \ 

492 RegexLexer.get_tokens_unprocessed(self, text, stack): 

493 if token is Name and value in self.EXTRA_KEYWORDS: 

494 yield index, Name.Builtin, value 

495 else: 

496 yield index, token, value 

497 

498 

499class LiterateLexer(Lexer): 

500 """ 

501 Base class for lexers of literate file formats based on LaTeX or Bird-style 

502 (prefixing each code line with ">"). 

503 

504 Additional options accepted: 

505 

506 `litstyle` 

507 If given, must be ``"bird"`` or ``"latex"``. If not given, the style 

508 is autodetected: if the first non-whitespace character in the source 

509 is a backslash or percent character, LaTeX is assumed, else Bird. 

510 """ 

511 

512 bird_re = re.compile(r'(>[ \t]*)(.*\n)') 

513 

514 def __init__(self, baselexer, **options): 

515 self.baselexer = baselexer 

516 Lexer.__init__(self, **options) 

517 

518 def get_tokens_unprocessed(self, text): 

519 style = self.options.get('litstyle') 

520 if style is None: 

521 style = (text.lstrip()[0:1] in '%\\') and 'latex' or 'bird' 

522 

523 code = '' 

524 insertions = [] 

525 if style == 'bird': 

526 # bird-style 

527 for match in line_re.finditer(text): 

528 line = match.group() 

529 m = self.bird_re.match(line) 

530 if m: 

531 insertions.append((len(code), 

532 [(0, Comment.Special, m.group(1))])) 

533 code += m.group(2) 

534 else: 

535 insertions.append((len(code), [(0, Text, line)])) 

536 else: 

537 # latex-style 

538 from pygments.lexers.markup import TexLexer 

539 lxlexer = TexLexer(**self.options) 

540 codelines = 0 

541 latex = '' 

542 for match in line_re.finditer(text): 

543 line = match.group() 

544 if codelines: 

545 if line.lstrip().startswith('\\end{code}'): 

546 codelines = 0 

547 latex += line 

548 else: 

549 code += line 

550 elif line.lstrip().startswith('\\begin{code}'): 

551 codelines = 1 

552 latex += line 

553 insertions.append((len(code), 

554 list(lxlexer.get_tokens_unprocessed(latex)))) 

555 latex = '' 

556 else: 

557 latex += line 

558 insertions.append((len(code), 

559 list(lxlexer.get_tokens_unprocessed(latex)))) 

560 yield from do_insertions(insertions, self.baselexer.get_tokens_unprocessed(code)) 

561 

562 

563class LiterateHaskellLexer(LiterateLexer): 

564 """ 

565 For Literate Haskell (Bird-style or LaTeX) source. 

566 

567 Additional options accepted: 

568 

569 `litstyle` 

570 If given, must be ``"bird"`` or ``"latex"``. If not given, the style 

571 is autodetected: if the first non-whitespace character in the source 

572 is a backslash or percent character, LaTeX is assumed, else Bird. 

573 """ 

574 name = 'Literate Haskell' 

575 aliases = ['literate-haskell', 'lhaskell', 'lhs'] 

576 filenames = ['*.lhs'] 

577 mimetypes = ['text/x-literate-haskell'] 

578 url = 'https://wiki.haskell.org/Literate_programming' 

579 version_added = '0.9' 

580 

581 def __init__(self, **options): 

582 hslexer = HaskellLexer(**options) 

583 LiterateLexer.__init__(self, hslexer, **options) 

584 

585 

586class LiterateIdrisLexer(LiterateLexer): 

587 """ 

588 For Literate Idris (Bird-style or LaTeX) source. 

589 

590 Additional options accepted: 

591 

592 `litstyle` 

593 If given, must be ``"bird"`` or ``"latex"``. If not given, the style 

594 is autodetected: if the first non-whitespace character in the source 

595 is a backslash or percent character, LaTeX is assumed, else Bird. 

596 """ 

597 name = 'Literate Idris' 

598 aliases = ['literate-idris', 'lidris', 'lidr'] 

599 filenames = ['*.lidr'] 

600 mimetypes = ['text/x-literate-idris'] 

601 url = 'https://idris2.readthedocs.io/en/latest/reference/literate.html' 

602 version_added = '2.0' 

603 

604 def __init__(self, **options): 

605 hslexer = IdrisLexer(**options) 

606 LiterateLexer.__init__(self, hslexer, **options) 

607 

608 

609class LiterateAgdaLexer(LiterateLexer): 

610 """ 

611 For Literate Agda source. 

612 

613 Additional options accepted: 

614 

615 `litstyle` 

616 If given, must be ``"bird"`` or ``"latex"``. If not given, the style 

617 is autodetected: if the first non-whitespace character in the source 

618 is a backslash or percent character, LaTeX is assumed, else Bird. 

619 """ 

620 name = 'Literate Agda' 

621 aliases = ['literate-agda', 'lagda'] 

622 filenames = ['*.lagda'] 

623 mimetypes = ['text/x-literate-agda'] 

624 url = 'https://agda.readthedocs.io/en/latest/tools/literate-programming.html' 

625 version_added = '2.0' 

626 

627 def __init__(self, **options): 

628 agdalexer = AgdaLexer(**options) 

629 LiterateLexer.__init__(self, agdalexer, litstyle='latex', **options) 

630 

631 

632class LiterateCryptolLexer(LiterateLexer): 

633 """ 

634 For Literate Cryptol (Bird-style or LaTeX) source. 

635 

636 Additional options accepted: 

637 

638 `litstyle` 

639 If given, must be ``"bird"`` or ``"latex"``. If not given, the style 

640 is autodetected: if the first non-whitespace character in the source 

641 is a backslash or percent character, LaTeX is assumed, else Bird. 

642 """ 

643 name = 'Literate Cryptol' 

644 aliases = ['literate-cryptol', 'lcryptol', 'lcry'] 

645 filenames = ['*.lcry'] 

646 mimetypes = ['text/x-literate-cryptol'] 

647 url = 'https://www.cryptol.net' 

648 version_added = '2.0' 

649 

650 def __init__(self, **options): 

651 crylexer = CryptolLexer(**options) 

652 LiterateLexer.__init__(self, crylexer, **options) 

653 

654 

655class KokaLexer(RegexLexer): 

656 """ 

657 Lexer for the Koka language. 

658 """ 

659 

660 name = 'Koka' 

661 url = 'https://koka-lang.github.io/koka/doc/index.html' 

662 aliases = ['koka'] 

663 filenames = ['*.kk', '*.kki'] 

664 mimetypes = ['text/x-koka'] 

665 version_added = '1.6' 

666 

667 keywords = [ 

668 'infix', 'infixr', 'infixl', 

669 'type', 'cotype', 'rectype', 'alias', 

670 'struct', 'con', 

671 'fun', 'function', 'val', 'var', 

672 'external', 

673 'if', 'then', 'else', 'elif', 'return', 'match', 

674 'private', 'public', 'private', 

675 'module', 'import', 'as', 

676 'include', 'inline', 

677 'rec', 

678 'try', 'yield', 'enum', 

679 'interface', 'instance', 

680 ] 

681 

682 # keywords that are followed by a type 

683 typeStartKeywords = [ 

684 'type', 'cotype', 'rectype', 'alias', 'struct', 'enum', 

685 ] 

686 

687 # keywords valid in a type 

688 typekeywords = [ 

689 'forall', 'exists', 'some', 'with', 

690 ] 

691 

692 # builtin names and special names 

693 builtin = [ 

694 'for', 'while', 'repeat', 

695 'foreach', 'foreach-indexed', 

696 'error', 'catch', 'finally', 

697 'cs', 'js', 'file', 'ref', 'assigned', 

698 ] 

699 

700 # symbols that can be in an operator 

701 symbols = r'[$%&*+@!/\\^~=.:\-?|<>]+' 

702 

703 # symbol boundary: an operator keyword should not be followed by any of these 

704 sboundary = '(?!' + symbols + ')' 

705 

706 # name boundary: a keyword should not be followed by any of these 

707 boundary = r'(?![\w/])' 

708 

709 # koka token abstractions 

710 tokenType = Name.Attribute 

711 tokenTypeDef = Name.Class 

712 tokenConstructor = Generic.Emph 

713 

714 # main lexer 

715 tokens = { 

716 'root': [ 

717 include('whitespace'), 

718 

719 # go into type mode 

720 (r'::?' + sboundary, tokenType, 'type'), 

721 (r'(alias)(\s+)([a-z]\w*)?', bygroups(Keyword, Whitespace, tokenTypeDef), 

722 'alias-type'), 

723 (r'(struct)(\s+)([a-z]\w*)?', bygroups(Keyword, Whitespace, tokenTypeDef), 

724 'struct-type'), 

725 ((r'({})'.format('|'.join(typeStartKeywords))) + 

726 r'(\s+)([a-z]\w*)?', bygroups(Keyword, Whitespace, tokenTypeDef), 

727 'type'), 

728 

729 # special sequences of tokens (we use ?: for non-capturing group as 

730 # required by 'bygroups') 

731 (r'(module)(\s+)(interface(?=\s))?(\s+)?((?:[a-z]\w*/)*[a-z]\w*)', 

732 bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Namespace)), 

733 (r'(import)(\s+)((?:[a-z]\w*/)*[a-z]\w*)' 

734 r'(?:(\s*)(=)(\s*)(qualified)?(\s*)' 

735 r'((?:[a-z]\w*/)*[a-z]\w*))?', 

736 bygroups(Keyword, Whitespace, Name.Namespace, Whitespace, Keyword, Whitespace, 

737 Keyword, Whitespace, Name.Namespace)), 

738 

739 (r'^(public|private)?(\s+)?(function|fun|val)' 

740 r'(\s+)([a-z]\w*|\((?:' + symbols + r'|/)\))', 

741 bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Function)), 

742 (r'^(?:(public|private)(?=\s+external))?((?<!^)\s+)?(external)(\s+)(inline(?=\s))?(\s+)?' 

743 r'([a-z]\w*|\((?:' + symbols + r'|/)\))', 

744 bygroups(Keyword, Whitespace, Keyword, Whitespace, Keyword, Whitespace, Name.Function)), 

745 

746 # keywords 

747 (r'({})'.format('|'.join(typekeywords)) + boundary, Keyword.Type), 

748 (r'({})'.format('|'.join(keywords)) + boundary, Keyword), 

749 (r'({})'.format('|'.join(builtin)) + boundary, Keyword.Pseudo), 

750 (r'::?|:=|\->|[=.]' + sboundary, Keyword), 

751 

752 # names 

753 (r'((?:[a-z]\w*/)*)([A-Z]\w*)', 

754 bygroups(Name.Namespace, tokenConstructor)), 

755 (r'((?:[a-z]\w*/)*)([a-z]\w*)', bygroups(Name.Namespace, Name)), 

756 (r'((?:[a-z]\w*/)*)(\((?:' + symbols + r'|/)\))', 

757 bygroups(Name.Namespace, Name)), 

758 (r'_\w*', Name.Variable), 

759 

760 # literal string 

761 (r'@"', String.Double, 'litstring'), 

762 

763 # operators 

764 (symbols + "|/(?![*/])", Operator), 

765 (r'`', Operator), 

766 (r'[{}()\[\];,]', Punctuation), 

767 

768 # literals. No check for literal characters with len > 1 

769 (r'[0-9]+\.[0-9]+([eE][\-+]?[0-9]+)?', Number.Float), 

770 (r'0[xX][0-9a-fA-F]+', Number.Hex), 

771 (r'[0-9]+', Number.Integer), 

772 

773 (r"'", String.Char, 'char'), 

774 (r'"', String.Double, 'string'), 

775 ], 

776 

777 # type started by alias 

778 'alias-type': [ 

779 (r'=', Keyword), 

780 include('type') 

781 ], 

782 

783 # type started by struct 

784 'struct-type': [ 

785 (r'(?=\((?!,*\)))', Punctuation, '#pop'), 

786 include('type') 

787 ], 

788 

789 # type started by colon 

790 'type': [ 

791 (r'[(\[<]', tokenType, 'type-nested'), 

792 include('type-content') 

793 ], 

794 

795 # type nested in brackets: can contain parameters, comma etc. 

796 'type-nested': [ 

797 (r'[)\]>]', tokenType, '#pop'), 

798 (r'[(\[<]', tokenType, 'type-nested'), 

799 (r',', tokenType), 

800 (r'([a-z]\w*)(\s*)(:)(?!:)', 

801 bygroups(Name, Whitespace, tokenType)), # parameter name 

802 include('type-content') 

803 ], 

804 

805 # shared contents of a type 

806 'type-content': [ 

807 include('whitespace'), 

808 

809 # keywords 

810 (r'({})'.format('|'.join(typekeywords)) + boundary, Keyword), 

811 (r'(?=(({})'.format('|'.join(keywords)) + boundary + '))', 

812 Keyword, '#pop'), # need to match because names overlap... 

813 

814 # kinds 

815 (r'[EPHVX]' + boundary, tokenType), 

816 

817 # type names 

818 (r'[a-z][0-9]*(?![\w/])', tokenType), 

819 (r'_\w*', tokenType.Variable), # Generic.Emph 

820 (r'((?:[a-z]\w*/)*)([A-Z]\w*)', 

821 bygroups(Name.Namespace, tokenType)), 

822 (r'((?:[a-z]\w*/)*)([a-z]\w+)', 

823 bygroups(Name.Namespace, tokenType)), 

824 

825 # type keyword operators 

826 (r'::|->|[.:|]', tokenType), 

827 

828 # catchall 

829 default('#pop') 

830 ], 

831 

832 # comments and literals 

833 'whitespace': [ 

834 (r'(\n\s*)(#.*)$', bygroups(Whitespace, Comment.Preproc)), 

835 (r'\s+', Whitespace), 

836 (r'/\*', Comment.Multiline, 'comment'), 

837 (r'//.*$', Comment.Single) 

838 ], 

839 'comment': [ 

840 (r'[^/*]+', Comment.Multiline), 

841 (r'/\*', Comment.Multiline, '#push'), 

842 (r'\*/', Comment.Multiline, '#pop'), 

843 (r'[*/]', Comment.Multiline), 

844 ], 

845 'litstring': [ 

846 (r'[^"]+', String.Double), 

847 (r'""', String.Escape), 

848 (r'"', String.Double, '#pop'), 

849 ], 

850 'string': [ 

851 (r'[^\\"\n]+', String.Double), 

852 include('escape-sequence'), 

853 (r'["\n]', String.Double, '#pop'), 

854 ], 

855 'char': [ 

856 (r'[^\\\'\n]+', String.Char), 

857 include('escape-sequence'), 

858 (r'[\'\n]', String.Char, '#pop'), 

859 ], 

860 'escape-sequence': [ 

861 (r'\\[nrt\\"\']', String.Escape), 

862 (r'\\x[0-9a-fA-F]{2}', String.Escape), 

863 (r'\\u[0-9a-fA-F]{4}', String.Escape), 

864 # Yes, \U literals are 6 hex digits. 

865 (r'\\U[0-9a-fA-F]{6}', String.Escape) 

866 ] 

867 }