Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/haskell.py: 81%

140 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-01 06:54 +0000

1""" 

2 pygments.lexers.haskell 

3 ~~~~~~~~~~~~~~~~~~~~~~~ 

4 

5 Lexers for Haskell and related languages. 

6 

7 :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS. 

8 :license: BSD, see LICENSE for details. 

9""" 

10 

11import re 

12 

13from pygments.lexer import Lexer, RegexLexer, bygroups, do_insertions, \ 

14 default, include, inherit, line_re 

15from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ 

16 Number, Punctuation, Generic, Whitespace 

17from pygments import unistring as uni 

18 

19__all__ = ['HaskellLexer', 'HspecLexer', 'IdrisLexer', 'AgdaLexer', 'CryptolLexer', 

20 'LiterateHaskellLexer', 'LiterateIdrisLexer', 'LiterateAgdaLexer', 

21 'LiterateCryptolLexer', 'KokaLexer'] 

22 

23 

24class HaskellLexer(RegexLexer): 

25 """ 

26 A Haskell lexer based on the lexemes defined in the Haskell 98 Report. 

27 

28 .. versionadded:: 0.8 

29 """ 

30 name = 'Haskell' 

31 url = 'https://www.haskell.org/' 

32 aliases = ['haskell', 'hs'] 

33 filenames = ['*.hs'] 

34 mimetypes = ['text/x-haskell'] 

35 

36 reserved = ('case', 'class', 'data', 'default', 'deriving', 'do', 'else', 

37 'family', 'if', 'in', 'infix[lr]?', 'instance', 

38 'let', 'newtype', 'of', 'then', 'type', 'where', '_') 

39 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK', 

40 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE', 

41 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN', 

42 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL') 

43 

44 tokens = { 

45 'root': [ 

46 # Whitespace: 

47 (r'\s+', Whitespace), 

48 # (r'--\s*|.*$', Comment.Doc), 

49 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single), 

50 (r'\{-', Comment.Multiline, 'comment'), 

51 # Lexemes: 

52 # Identifiers 

53 (r'\bimport\b', Keyword.Reserved, 'import'), 

54 (r'\bmodule\b', Keyword.Reserved, 'module'), 

55 (r'\berror\b', Name.Exception), 

56 (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved), 

57 (r"'[^\\]'", String.Char), # this has to come before the TH quote 

58 (r'^[_' + uni.Ll + r'][\w\']*', Name.Function), 

59 (r"'?[_" + uni.Ll + r"][\w']*", Name), 

60 (r"('')?[" + uni.Lu + r"][\w\']*", Keyword.Type), 

61 (r"(')[" + uni.Lu + r"][\w\']*", Keyword.Type), 

62 (r"(')\[[^\]]*\]", Keyword.Type), # tuples and lists get special treatment in GHC 

63 (r"(')\([^)]*\)", Keyword.Type), # .. 

64 (r"(')[:!#$%&*+.\\/<=>?@^|~-]+", Keyword.Type), # promoted type operators 

65 # Operators 

66 (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator 

67 (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials 

68 (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators 

69 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators 

70 # Numbers 

71 (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*_*[pP][+-]?\d(_*\d)*', Number.Float), 

72 (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*\.[\da-fA-F](_*[\da-fA-F])*' 

73 r'(_*[pP][+-]?\d(_*\d)*)?', Number.Float), 

74 (r'\d(_*\d)*_*[eE][+-]?\d(_*\d)*', Number.Float), 

75 (r'\d(_*\d)*\.\d(_*\d)*(_*[eE][+-]?\d(_*\d)*)?', Number.Float), 

76 (r'0[bB]_*[01](_*[01])*', Number.Bin), 

77 (r'0[oO]_*[0-7](_*[0-7])*', Number.Oct), 

78 (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*', Number.Hex), 

79 (r'\d(_*\d)*', Number.Integer), 

80 # Character/String Literals 

81 (r"'", String.Char, 'character'), 

82 (r'"', String, 'string'), 

83 # Special 

84 (r'\[\]', Keyword.Type), 

85 (r'\(\)', Name.Builtin), 

86 (r'[][(),;`{}]', Punctuation), 

87 ], 

88 'import': [ 

89 # Import statements 

90 (r'\s+', Whitespace), 

91 (r'"', String, 'string'), 

92 # after "funclist" state 

93 (r'\)', Punctuation, '#pop'), 

94 (r'qualified\b', Keyword), 

95 # import X as Y 

96 (r'([' + uni.Lu + r'][\w.]*)(\s+)(as)(\s+)([' + uni.Lu + r'][\w.]*)', 

97 bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Name), '#pop'), 

98 # import X hiding (functions) 

99 (r'([' + uni.Lu + r'][\w.]*)(\s+)(hiding)(\s+)(\()', 

100 bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Punctuation), 'funclist'), 

101 # import X (functions) 

102 (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()', 

103 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'), 

104 # import X 

105 (r'[\w.]+', Name.Namespace, '#pop'), 

106 ], 

107 'module': [ 

108 (r'\s+', Whitespace), 

109 (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()', 

110 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'), 

111 (r'[' + uni.Lu + r'][\w.]*', Name.Namespace, '#pop'), 

112 ], 

113 'funclist': [ 

114 (r'\s+', Whitespace), 

115 (r'[' + uni.Lu + r']\w*', Keyword.Type), 

116 (r'(_[\w\']+|[' + uni.Ll + r'][\w\']*)', Name.Function), 

117 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single), 

118 (r'\{-', Comment.Multiline, 'comment'), 

119 (r',', Punctuation), 

120 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), 

121 # (HACK, but it makes sense to push two instances, believe me) 

122 (r'\(', Punctuation, ('funclist', 'funclist')), 

123 (r'\)', Punctuation, '#pop:2'), 

124 ], 

125 # NOTE: the next four states are shared in the AgdaLexer; make sure 

126 # any change is compatible with Agda as well or copy over and change 

127 'comment': [ 

128 # Multiline Comments 

129 (r'[^-{}]+', Comment.Multiline), 

130 (r'\{-', Comment.Multiline, '#push'), 

131 (r'-\}', Comment.Multiline, '#pop'), 

132 (r'[-{}]', Comment.Multiline), 

133 ], 

134 'character': [ 

135 # Allows multi-chars, incorrectly. 

136 (r"[^\\']'", String.Char, '#pop'), 

137 (r"\\", String.Escape, 'escape'), 

138 ("'", String.Char, '#pop'), 

139 ], 

140 'string': [ 

141 (r'[^\\"]+', String), 

142 (r"\\", String.Escape, 'escape'), 

143 ('"', String, '#pop'), 

144 ], 

145 'escape': [ 

146 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'), 

147 (r'\^[][' + uni.Lu + r'@^_]', String.Escape, '#pop'), 

148 ('|'.join(ascii), String.Escape, '#pop'), 

149 (r'o[0-7]+', String.Escape, '#pop'), 

150 (r'x[\da-fA-F]+', String.Escape, '#pop'), 

151 (r'\d+', String.Escape, '#pop'), 

152 (r'(\s+)(\\)', bygroups(Whitespace, String.Escape), '#pop'), 

153 ], 

154 } 

155 

156 

157class HspecLexer(HaskellLexer): 

158 """ 

159 A Haskell lexer with support for Hspec constructs. 

160 

161 .. versionadded:: 2.4.0 

162 """ 

163 

164 name = 'Hspec' 

165 aliases = ['hspec'] 

166 filenames = ['*Spec.hs'] 

167 mimetypes = [] 

168 

169 tokens = { 

170 'root': [ 

171 (r'(it)(\s*)("[^"]*")', bygroups(Text, Whitespace, String.Doc)), 

172 (r'(describe)(\s*)("[^"]*")', bygroups(Text, Whitespace, String.Doc)), 

173 (r'(context)(\s*)("[^"]*")', bygroups(Text, Whitespace, String.Doc)), 

174 inherit, 

175 ], 

176 } 

177 

178 

179class IdrisLexer(RegexLexer): 

180 """ 

181 A lexer for the dependently typed programming language Idris. 

182 

183 Based on the Haskell and Agda Lexer. 

184 

185 .. versionadded:: 2.0 

186 """ 

187 name = 'Idris' 

188 url = 'https://www.idris-lang.org/' 

189 aliases = ['idris', 'idr'] 

190 filenames = ['*.idr'] 

191 mimetypes = ['text/x-idris'] 

192 

193 reserved = ('case', 'class', 'data', 'default', 'using', 'do', 'else', 

194 'if', 'in', 'infix[lr]?', 'instance', 'rewrite', 'auto', 

195 'namespace', 'codata', 'mutual', 'private', 'public', 'abstract', 

196 'total', 'partial', 

197 'interface', 'implementation', 'export', 'covering', 'constructor', 

198 'let', 'proof', 'of', 'then', 'static', 'where', '_', 'with', 

199 'pattern', 'term', 'syntax', 'prefix', 

200 'postulate', 'parameters', 'record', 'dsl', 'impossible', 'implicit', 

201 'tactics', 'intros', 'intro', 'compute', 'refine', 'exact', 'trivial') 

202 

203 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK', 

204 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE', 

205 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN', 

206 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL') 

207 

208 directives = ('lib', 'link', 'flag', 'include', 'hide', 'freeze', 'access', 

209 'default', 'logging', 'dynamic', 'name', 'error_handlers', 'language') 

210 

211 tokens = { 

212 'root': [ 

213 # Comments 

214 (r'^(\s*)(%%(%s))' % '|'.join(directives), 

215 bygroups(Whitespace, Keyword.Reserved)), 

216 (r'(\s*)(--(?![!#$%&*+./<=>?@^|_~:\\]).*?)$', bygroups(Whitespace, Comment.Single)), 

217 (r'(\s*)(\|{3}.*?)$', bygroups(Whitespace, Comment.Single)), 

218 (r'(\s*)(\{-)', bygroups(Whitespace, Comment.Multiline), 'comment'), 

219 # Declaration 

220 (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)', 

221 bygroups(Whitespace, Name.Function, Whitespace, Operator.Word, Whitespace)), 

222 # Identifiers 

223 (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved), 

224 (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Whitespace), 'module'), 

225 (r"('')?[A-Z][\w\']*", Keyword.Type), 

226 (r'[a-z][\w\']*', Text), 

227 # Special Symbols 

228 (r'(<-|::|->|=>|=)', Operator.Word), # specials 

229 (r'([(){}\[\]:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials 

230 # Numbers 

231 (r'\d+[eE][+-]?\d+', Number.Float), 

232 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float), 

233 (r'0[xX][\da-fA-F]+', Number.Hex), 

234 (r'\d+', Number.Integer), 

235 # Strings 

236 (r"'", String.Char, 'character'), 

237 (r'"', String, 'string'), 

238 (r'[^\s(){}]+', Text), 

239 (r'\s+?', Whitespace), # Whitespace 

240 ], 

241 'module': [ 

242 (r'\s+', Whitespace), 

243 (r'([A-Z][\w.]*)(\s+)(\()', 

244 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'), 

245 (r'[A-Z][\w.]*', Name.Namespace, '#pop'), 

246 ], 

247 'funclist': [ 

248 (r'\s+', Whitespace), 

249 (r'[A-Z]\w*', Keyword.Type), 

250 (r'(_[\w\']+|[a-z][\w\']*)', Name.Function), 

251 (r'--.*$', Comment.Single), 

252 (r'\{-', Comment.Multiline, 'comment'), 

253 (r',', Punctuation), 

254 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), 

255 # (HACK, but it makes sense to push two instances, believe me) 

256 (r'\(', Punctuation, ('funclist', 'funclist')), 

257 (r'\)', Punctuation, '#pop:2'), 

258 ], 

259 # NOTE: the next four states are shared in the AgdaLexer; make sure 

260 # any change is compatible with Agda as well or copy over and change 

261 'comment': [ 

262 # Multiline Comments 

263 (r'[^-{}]+', Comment.Multiline), 

264 (r'\{-', Comment.Multiline, '#push'), 

265 (r'-\}', Comment.Multiline, '#pop'), 

266 (r'[-{}]', Comment.Multiline), 

267 ], 

268 'character': [ 

269 # Allows multi-chars, incorrectly. 

270 (r"[^\\']", String.Char), 

271 (r"\\", String.Escape, 'escape'), 

272 ("'", String.Char, '#pop'), 

273 ], 

274 'string': [ 

275 (r'[^\\"]+', String), 

276 (r"\\", String.Escape, 'escape'), 

277 ('"', String, '#pop'), 

278 ], 

279 'escape': [ 

280 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'), 

281 (r'\^[][A-Z@^_]', String.Escape, '#pop'), 

282 ('|'.join(ascii), String.Escape, '#pop'), 

283 (r'o[0-7]+', String.Escape, '#pop'), 

284 (r'x[\da-fA-F]+', String.Escape, '#pop'), 

285 (r'\d+', String.Escape, '#pop'), 

286 (r'(\s+)(\\)', bygroups(Whitespace, String.Escape), '#pop') 

287 ], 

288 } 

289 

290 

291class AgdaLexer(RegexLexer): 

292 """ 

293 For the Agda dependently typed functional programming language and 

294 proof assistant. 

295 

296 .. versionadded:: 2.0 

297 """ 

298 

299 name = 'Agda' 

300 url = 'http://wiki.portal.chalmers.se/agda/pmwiki.php' 

301 aliases = ['agda'] 

302 filenames = ['*.agda'] 

303 mimetypes = ['text/x-agda'] 

304 

305 reserved = ( 

306 'abstract', 'codata', 'coinductive', 'constructor', 'data', 'do', 

307 'eta-equality', 'field', 'forall', 'hiding', 'in', 'inductive', 'infix', 

308 'infixl', 'infixr', 'instance', 'interleaved', 'let', 'macro', 'mutual', 

309 'no-eta-equality', 'open', 'overlap', 'pattern', 'postulate', 'primitive', 

310 'private', 'quote', 'quoteTerm', 'record', 'renaming', 'rewrite', 

311 'syntax', 'tactic', 'unquote', 'unquoteDecl', 'unquoteDef', 'using', 

312 'variable', 'where', 'with', 

313 ) 

314 

315 tokens = { 

316 'root': [ 

317 # Declaration 

318 (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)', 

319 bygroups(Whitespace, Name.Function, Whitespace, 

320 Operator.Word, Whitespace)), 

321 # Comments 

322 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single), 

323 (r'\{-', Comment.Multiline, 'comment'), 

324 # Holes 

325 (r'\{!', Comment.Directive, 'hole'), 

326 # Lexemes: 

327 # Identifiers 

328 (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved), 

329 (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Whitespace), 

330 'module'), 

331 (r'\b(Set|Prop)[\u2080-\u2089]*\b', Keyword.Type), 

332 # Special Symbols 

333 (r'(\(|\)|\{|\})', Operator), 

334 (r'(\.{1,3}|\||\u03BB|\u2200|\u2192|:|=|->)', Operator.Word), 

335 # Numbers 

336 (r'\d+[eE][+-]?\d+', Number.Float), 

337 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float), 

338 (r'0[xX][\da-fA-F]+', Number.Hex), 

339 (r'\d+', Number.Integer), 

340 # Strings 

341 (r"'", String.Char, 'character'), 

342 (r'"', String, 'string'), 

343 (r'[^\s(){}]+', Text), 

344 (r'\s+?', Whitespace), # Whitespace 

345 ], 

346 'hole': [ 

347 # Holes 

348 (r'[^!{}]+', Comment.Directive), 

349 (r'\{!', Comment.Directive, '#push'), 

350 (r'!\}', Comment.Directive, '#pop'), 

351 (r'[!{}]', Comment.Directive), 

352 ], 

353 'module': [ 

354 (r'\{-', Comment.Multiline, 'comment'), 

355 (r'[a-zA-Z][\w.\']*', Name, '#pop'), 

356 (r'[\W0-9_]+', Text) 

357 ], 

358 'comment': HaskellLexer.tokens['comment'], 

359 'character': HaskellLexer.tokens['character'], 

360 'string': HaskellLexer.tokens['string'], 

361 'escape': HaskellLexer.tokens['escape'] 

362 } 

363 

364 

365class CryptolLexer(RegexLexer): 

366 """ 

367 FIXME: A Cryptol2 lexer based on the lexemes defined in the Haskell 98 Report. 

368 

369 .. versionadded:: 2.0 

370 """ 

371 name = 'Cryptol' 

372 aliases = ['cryptol', 'cry'] 

373 filenames = ['*.cry'] 

374 mimetypes = ['text/x-cryptol'] 

375 

376 reserved = ('Arith', 'Bit', 'Cmp', 'False', 'Inf', 'True', 'else', 

377 'export', 'extern', 'fin', 'if', 'import', 'inf', 'lg2', 

378 'max', 'min', 'module', 'newtype', 'pragma', 'property', 

379 'then', 'type', 'where', 'width') 

380 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK', 

381 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE', 

382 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN', 

383 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL') 

384 

385 tokens = { 

386 'root': [ 

387 # Whitespace: 

388 (r'\s+', Whitespace), 

389 # (r'--\s*|.*$', Comment.Doc), 

390 (r'//.*$', Comment.Single), 

391 (r'/\*', Comment.Multiline, 'comment'), 

392 # Lexemes: 

393 # Identifiers 

394 (r'\bimport\b', Keyword.Reserved, 'import'), 

395 (r'\bmodule\b', Keyword.Reserved, 'module'), 

396 (r'\berror\b', Name.Exception), 

397 (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved), 

398 (r'^[_a-z][\w\']*', Name.Function), 

399 (r"'?[_a-z][\w']*", Name), 

400 (r"('')?[A-Z][\w\']*", Keyword.Type), 

401 # Operators 

402 (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator 

403 (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials 

404 (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators 

405 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators 

406 # Numbers 

407 (r'\d+[eE][+-]?\d+', Number.Float), 

408 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float), 

409 (r'0[oO][0-7]+', Number.Oct), 

410 (r'0[xX][\da-fA-F]+', Number.Hex), 

411 (r'\d+', Number.Integer), 

412 # Character/String Literals 

413 (r"'", String.Char, 'character'), 

414 (r'"', String, 'string'), 

415 # Special 

416 (r'\[\]', Keyword.Type), 

417 (r'\(\)', Name.Builtin), 

418 (r'[][(),;`{}]', Punctuation), 

419 ], 

420 'import': [ 

421 # Import statements 

422 (r'\s+', Whitespace), 

423 (r'"', String, 'string'), 

424 # after "funclist" state 

425 (r'\)', Punctuation, '#pop'), 

426 (r'qualified\b', Keyword), 

427 # import X as Y 

428 (r'([A-Z][\w.]*)(\s+)(as)(\s+)([A-Z][\w.]*)', 

429 bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Name), '#pop'), 

430 # import X hiding (functions) 

431 (r'([A-Z][\w.]*)(\s+)(hiding)(\s+)(\()', 

432 bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Punctuation), 'funclist'), 

433 # import X (functions) 

434 (r'([A-Z][\w.]*)(\s+)(\()', 

435 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'), 

436 # import X 

437 (r'[\w.]+', Name.Namespace, '#pop'), 

438 ], 

439 'module': [ 

440 (r'\s+', Whitespace), 

441 (r'([A-Z][\w.]*)(\s+)(\()', 

442 bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'), 

443 (r'[A-Z][\w.]*', Name.Namespace, '#pop'), 

444 ], 

445 'funclist': [ 

446 (r'\s+', Whitespace), 

447 (r'[A-Z]\w*', Keyword.Type), 

448 (r'(_[\w\']+|[a-z][\w\']*)', Name.Function), 

449 # TODO: these don't match the comments in docs, remove. 

450 # (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single), 

451 # (r'{-', Comment.Multiline, 'comment'), 

452 (r',', Punctuation), 

453 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), 

454 # (HACK, but it makes sense to push two instances, believe me) 

455 (r'\(', Punctuation, ('funclist', 'funclist')), 

456 (r'\)', Punctuation, '#pop:2'), 

457 ], 

458 'comment': [ 

459 # Multiline Comments 

460 (r'[^/*]+', Comment.Multiline), 

461 (r'/\*', Comment.Multiline, '#push'), 

462 (r'\*/', Comment.Multiline, '#pop'), 

463 (r'[*/]', Comment.Multiline), 

464 ], 

465 'character': [ 

466 # Allows multi-chars, incorrectly. 

467 (r"[^\\']'", String.Char, '#pop'), 

468 (r"\\", String.Escape, 'escape'), 

469 ("'", String.Char, '#pop'), 

470 ], 

471 'string': [ 

472 (r'[^\\"]+', String), 

473 (r"\\", String.Escape, 'escape'), 

474 ('"', String, '#pop'), 

475 ], 

476 'escape': [ 

477 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'), 

478 (r'\^[][A-Z@^_]', String.Escape, '#pop'), 

479 ('|'.join(ascii), String.Escape, '#pop'), 

480 (r'o[0-7]+', String.Escape, '#pop'), 

481 (r'x[\da-fA-F]+', String.Escape, '#pop'), 

482 (r'\d+', String.Escape, '#pop'), 

483 (r'(\s+)(\\)', bygroups(Whitespace, String.Escape), '#pop'), 

484 ], 

485 } 

486 

487 EXTRA_KEYWORDS = {'join', 'split', 'reverse', 'transpose', 'width', 

488 'length', 'tail', '<<', '>>', '<<<', '>>>', 'const', 

489 'reg', 'par', 'seq', 'ASSERT', 'undefined', 'error', 

490 'trace'} 

491 

492 def get_tokens_unprocessed(self, text): 

493 stack = ['root'] 

494 for index, token, value in \ 

495 RegexLexer.get_tokens_unprocessed(self, text, stack): 

496 if token is Name and value in self.EXTRA_KEYWORDS: 

497 yield index, Name.Builtin, value 

498 else: 

499 yield index, token, value 

500 

501 

502class LiterateLexer(Lexer): 

503 """ 

504 Base class for lexers of literate file formats based on LaTeX or Bird-style 

505 (prefixing each code line with ">"). 

506 

507 Additional options accepted: 

508 

509 `litstyle` 

510 If given, must be ``"bird"`` or ``"latex"``. If not given, the style 

511 is autodetected: if the first non-whitespace character in the source 

512 is a backslash or percent character, LaTeX is assumed, else Bird. 

513 """ 

514 

515 bird_re = re.compile(r'(>[ \t]*)(.*\n)') 

516 

517 def __init__(self, baselexer, **options): 

518 self.baselexer = baselexer 

519 Lexer.__init__(self, **options) 

520 

521 def get_tokens_unprocessed(self, text): 

522 style = self.options.get('litstyle') 

523 if style is None: 

524 style = (text.lstrip()[0:1] in '%\\') and 'latex' or 'bird' 

525 

526 code = '' 

527 insertions = [] 

528 if style == 'bird': 

529 # bird-style 

530 for match in line_re.finditer(text): 

531 line = match.group() 

532 m = self.bird_re.match(line) 

533 if m: 

534 insertions.append((len(code), 

535 [(0, Comment.Special, m.group(1))])) 

536 code += m.group(2) 

537 else: 

538 insertions.append((len(code), [(0, Text, line)])) 

539 else: 

540 # latex-style 

541 from pygments.lexers.markup import TexLexer 

542 lxlexer = TexLexer(**self.options) 

543 codelines = 0 

544 latex = '' 

545 for match in line_re.finditer(text): 

546 line = match.group() 

547 if codelines: 

548 if line.lstrip().startswith('\\end{code}'): 

549 codelines = 0 

550 latex += line 

551 else: 

552 code += line 

553 elif line.lstrip().startswith('\\begin{code}'): 

554 codelines = 1 

555 latex += line 

556 insertions.append((len(code), 

557 list(lxlexer.get_tokens_unprocessed(latex)))) 

558 latex = '' 

559 else: 

560 latex += line 

561 insertions.append((len(code), 

562 list(lxlexer.get_tokens_unprocessed(latex)))) 

563 yield from do_insertions(insertions, self.baselexer.get_tokens_unprocessed(code)) 

564 

565 

566class LiterateHaskellLexer(LiterateLexer): 

567 """ 

568 For Literate Haskell (Bird-style or LaTeX) source. 

569 

570 Additional options accepted: 

571 

572 `litstyle` 

573 If given, must be ``"bird"`` or ``"latex"``. If not given, the style 

574 is autodetected: if the first non-whitespace character in the source 

575 is a backslash or percent character, LaTeX is assumed, else Bird. 

576 

577 .. versionadded:: 0.9 

578 """ 

579 name = 'Literate Haskell' 

580 aliases = ['literate-haskell', 'lhaskell', 'lhs'] 

581 filenames = ['*.lhs'] 

582 mimetypes = ['text/x-literate-haskell'] 

583 

584 def __init__(self, **options): 

585 hslexer = HaskellLexer(**options) 

586 LiterateLexer.__init__(self, hslexer, **options) 

587 

588 

589class LiterateIdrisLexer(LiterateLexer): 

590 """ 

591 For Literate Idris (Bird-style or LaTeX) source. 

592 

593 Additional options accepted: 

594 

595 `litstyle` 

596 If given, must be ``"bird"`` or ``"latex"``. If not given, the style 

597 is autodetected: if the first non-whitespace character in the source 

598 is a backslash or percent character, LaTeX is assumed, else Bird. 

599 

600 .. versionadded:: 2.0 

601 """ 

602 name = 'Literate Idris' 

603 aliases = ['literate-idris', 'lidris', 'lidr'] 

604 filenames = ['*.lidr'] 

605 mimetypes = ['text/x-literate-idris'] 

606 

607 def __init__(self, **options): 

608 hslexer = IdrisLexer(**options) 

609 LiterateLexer.__init__(self, hslexer, **options) 

610 

611 

612class LiterateAgdaLexer(LiterateLexer): 

613 """ 

614 For Literate Agda source. 

615 

616 Additional options accepted: 

617 

618 `litstyle` 

619 If given, must be ``"bird"`` or ``"latex"``. If not given, the style 

620 is autodetected: if the first non-whitespace character in the source 

621 is a backslash or percent character, LaTeX is assumed, else Bird. 

622 

623 .. versionadded:: 2.0 

624 """ 

625 name = 'Literate Agda' 

626 aliases = ['literate-agda', 'lagda'] 

627 filenames = ['*.lagda'] 

628 mimetypes = ['text/x-literate-agda'] 

629 

630 def __init__(self, **options): 

631 agdalexer = AgdaLexer(**options) 

632 LiterateLexer.__init__(self, agdalexer, litstyle='latex', **options) 

633 

634 

635class LiterateCryptolLexer(LiterateLexer): 

636 """ 

637 For Literate Cryptol (Bird-style or LaTeX) source. 

638 

639 Additional options accepted: 

640 

641 `litstyle` 

642 If given, must be ``"bird"`` or ``"latex"``. If not given, the style 

643 is autodetected: if the first non-whitespace character in the source 

644 is a backslash or percent character, LaTeX is assumed, else Bird. 

645 

646 .. versionadded:: 2.0 

647 """ 

648 name = 'Literate Cryptol' 

649 aliases = ['literate-cryptol', 'lcryptol', 'lcry'] 

650 filenames = ['*.lcry'] 

651 mimetypes = ['text/x-literate-cryptol'] 

652 

653 def __init__(self, **options): 

654 crylexer = CryptolLexer(**options) 

655 LiterateLexer.__init__(self, crylexer, **options) 

656 

657 

658class KokaLexer(RegexLexer): 

659 """ 

660 Lexer for the Koka language. 

661 

662 .. versionadded:: 1.6 

663 """ 

664 

665 name = 'Koka' 

666 url = 'https://koka-lang.github.io/koka/doc/index.html' 

667 aliases = ['koka'] 

668 filenames = ['*.kk', '*.kki'] 

669 mimetypes = ['text/x-koka'] 

670 

671 keywords = [ 

672 'infix', 'infixr', 'infixl', 

673 'type', 'cotype', 'rectype', 'alias', 

674 'struct', 'con', 

675 'fun', 'function', 'val', 'var', 

676 'external', 

677 'if', 'then', 'else', 'elif', 'return', 'match', 

678 'private', 'public', 'private', 

679 'module', 'import', 'as', 

680 'include', 'inline', 

681 'rec', 

682 'try', 'yield', 'enum', 

683 'interface', 'instance', 

684 ] 

685 

686 # keywords that are followed by a type 

687 typeStartKeywords = [ 

688 'type', 'cotype', 'rectype', 'alias', 'struct', 'enum', 

689 ] 

690 

691 # keywords valid in a type 

692 typekeywords = [ 

693 'forall', 'exists', 'some', 'with', 

694 ] 

695 

696 # builtin names and special names 

697 builtin = [ 

698 'for', 'while', 'repeat', 

699 'foreach', 'foreach-indexed', 

700 'error', 'catch', 'finally', 

701 'cs', 'js', 'file', 'ref', 'assigned', 

702 ] 

703 

704 # symbols that can be in an operator 

705 symbols = r'[$%&*+@!/\\^~=.:\-?|<>]+' 

706 

707 # symbol boundary: an operator keyword should not be followed by any of these 

708 sboundary = '(?!' + symbols + ')' 

709 

710 # name boundary: a keyword should not be followed by any of these 

711 boundary = r'(?![\w/])' 

712 

713 # koka token abstractions 

714 tokenType = Name.Attribute 

715 tokenTypeDef = Name.Class 

716 tokenConstructor = Generic.Emph 

717 

718 # main lexer 

719 tokens = { 

720 'root': [ 

721 include('whitespace'), 

722 

723 # go into type mode 

724 (r'::?' + sboundary, tokenType, 'type'), 

725 (r'(alias)(\s+)([a-z]\w*)?', bygroups(Keyword, Whitespace, tokenTypeDef), 

726 'alias-type'), 

727 (r'(struct)(\s+)([a-z]\w*)?', bygroups(Keyword, Whitespace, tokenTypeDef), 

728 'struct-type'), 

729 ((r'(%s)' % '|'.join(typeStartKeywords)) + 

730 r'(\s+)([a-z]\w*)?', bygroups(Keyword, Whitespace, tokenTypeDef), 

731 'type'), 

732 

733 # special sequences of tokens (we use ?: for non-capturing group as 

734 # required by 'bygroups') 

735 (r'(module)(\s+)(interface(?=\s))?(\s+)?((?:[a-z]\w*/)*[a-z]\w*)', 

736 bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Namespace)), 

737 (r'(import)(\s+)((?:[a-z]\w*/)*[a-z]\w*)' 

738 r'(?:(\s*)(=)(\s*)(qualified)?(\s*)' 

739 r'((?:[a-z]\w*/)*[a-z]\w*))?', 

740 bygroups(Keyword, Whitespace, Name.Namespace, Whitespace, Keyword, Whitespace, 

741 Keyword, Whitespace, Name.Namespace)), 

742 

743 (r'^(public|private)?(\s+)?(function|fun|val)' 

744 r'(\s+)([a-z]\w*|\((?:' + symbols + r'|/)\))', 

745 bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Function)), 

746 (r'^(?:(public|private)(?=\s+external))?((?<!^)\s+)?(external)(\s+)(inline(?=\s))?(\s+)?' 

747 r'([a-z]\w*|\((?:' + symbols + r'|/)\))', 

748 bygroups(Keyword, Whitespace, Keyword, Whitespace, Keyword, Whitespace, Name.Function)), 

749 

750 # keywords 

751 (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword.Type), 

752 (r'(%s)' % '|'.join(keywords) + boundary, Keyword), 

753 (r'(%s)' % '|'.join(builtin) + boundary, Keyword.Pseudo), 

754 (r'::?|:=|\->|[=.]' + sboundary, Keyword), 

755 

756 # names 

757 (r'((?:[a-z]\w*/)*)([A-Z]\w*)', 

758 bygroups(Name.Namespace, tokenConstructor)), 

759 (r'((?:[a-z]\w*/)*)([a-z]\w*)', bygroups(Name.Namespace, Name)), 

760 (r'((?:[a-z]\w*/)*)(\((?:' + symbols + r'|/)\))', 

761 bygroups(Name.Namespace, Name)), 

762 (r'_\w*', Name.Variable), 

763 

764 # literal string 

765 (r'@"', String.Double, 'litstring'), 

766 

767 # operators 

768 (symbols + "|/(?![*/])", Operator), 

769 (r'`', Operator), 

770 (r'[{}()\[\];,]', Punctuation), 

771 

772 # literals. No check for literal characters with len > 1 

773 (r'[0-9]+\.[0-9]+([eE][\-+]?[0-9]+)?', Number.Float), 

774 (r'0[xX][0-9a-fA-F]+', Number.Hex), 

775 (r'[0-9]+', Number.Integer), 

776 

777 (r"'", String.Char, 'char'), 

778 (r'"', String.Double, 'string'), 

779 ], 

780 

781 # type started by alias 

782 'alias-type': [ 

783 (r'=', Keyword), 

784 include('type') 

785 ], 

786 

787 # type started by struct 

788 'struct-type': [ 

789 (r'(?=\((?!,*\)))', Punctuation, '#pop'), 

790 include('type') 

791 ], 

792 

793 # type started by colon 

794 'type': [ 

795 (r'[(\[<]', tokenType, 'type-nested'), 

796 include('type-content') 

797 ], 

798 

799 # type nested in brackets: can contain parameters, comma etc. 

800 'type-nested': [ 

801 (r'[)\]>]', tokenType, '#pop'), 

802 (r'[(\[<]', tokenType, 'type-nested'), 

803 (r',', tokenType), 

804 (r'([a-z]\w*)(\s*)(:)(?!:)', 

805 bygroups(Name, Whitespace, tokenType)), # parameter name 

806 include('type-content') 

807 ], 

808 

809 # shared contents of a type 

810 'type-content': [ 

811 include('whitespace'), 

812 

813 # keywords 

814 (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword), 

815 (r'(?=((%s)' % '|'.join(keywords) + boundary + '))', 

816 Keyword, '#pop'), # need to match because names overlap... 

817 

818 # kinds 

819 (r'[EPHVX]' + boundary, tokenType), 

820 

821 # type names 

822 (r'[a-z][0-9]*(?![\w/])', tokenType), 

823 (r'_\w*', tokenType.Variable), # Generic.Emph 

824 (r'((?:[a-z]\w*/)*)([A-Z]\w*)', 

825 bygroups(Name.Namespace, tokenType)), 

826 (r'((?:[a-z]\w*/)*)([a-z]\w+)', 

827 bygroups(Name.Namespace, tokenType)), 

828 

829 # type keyword operators 

830 (r'::|->|[.:|]', tokenType), 

831 

832 # catchall 

833 default('#pop') 

834 ], 

835 

836 # comments and literals 

837 'whitespace': [ 

838 (r'(\n\s*)(#.*)$', bygroups(Whitespace, Comment.Preproc)), 

839 (r'\s+', Whitespace), 

840 (r'/\*', Comment.Multiline, 'comment'), 

841 (r'//.*$', Comment.Single) 

842 ], 

843 'comment': [ 

844 (r'[^/*]+', Comment.Multiline), 

845 (r'/\*', Comment.Multiline, '#push'), 

846 (r'\*/', Comment.Multiline, '#pop'), 

847 (r'[*/]', Comment.Multiline), 

848 ], 

849 'litstring': [ 

850 (r'[^"]+', String.Double), 

851 (r'""', String.Escape), 

852 (r'"', String.Double, '#pop'), 

853 ], 

854 'string': [ 

855 (r'[^\\"\n]+', String.Double), 

856 include('escape-sequence'), 

857 (r'["\n]', String.Double, '#pop'), 

858 ], 

859 'char': [ 

860 (r'[^\\\'\n]+', String.Char), 

861 include('escape-sequence'), 

862 (r'[\'\n]', String.Char, '#pop'), 

863 ], 

864 'escape-sequence': [ 

865 (r'\\[nrt\\"\']', String.Escape), 

866 (r'\\x[0-9a-fA-F]{2}', String.Escape), 

867 (r'\\u[0-9a-fA-F]{4}', String.Escape), 

868 # Yes, \U literals are 6 hex digits. 

869 (r'\\U[0-9a-fA-F]{6}', String.Escape) 

870 ] 

871 }