1"""
2 pygments.lexers.esoteric
3 ~~~~~~~~~~~~~~~~~~~~~~~~
4
5 Lexers for esoteric languages.
6
7 :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
10
11from pygments.lexer import RegexLexer, include, words, bygroups
12from pygments.token import Comment, Operator, Keyword, Name, String, Number, \
13 Punctuation, Error, Whitespace
14
15__all__ = ['BrainfuckLexer', 'BefungeLexer', 'RedcodeLexer', 'CAmkESLexer',
16 'CapDLLexer', 'AheuiLexer']
17
18
19class BrainfuckLexer(RegexLexer):
20 """
21 Lexer for the esoteric BrainFuck language.
22 """
23
24 name = 'Brainfuck'
25 url = 'http://www.muppetlabs.com/~breadbox/bf/'
26 aliases = ['brainfuck', 'bf']
27 filenames = ['*.bf', '*.b']
28 mimetypes = ['application/x-brainfuck']
29 version_added = ''
30
31 tokens = {
32 'common': [
33 # use different colors for different instruction types
34 (r'[.,]+', Name.Tag),
35 (r'[+-]+', Name.Builtin),
36 (r'[<>]+', Name.Variable),
37 (r'[^.,+\-<>\[\]]+', Comment),
38 ],
39 'root': [
40 (r'\[', Keyword, 'loop'),
41 (r'\]', Error),
42 include('common'),
43 ],
44 'loop': [
45 (r'\[', Keyword, '#push'),
46 (r'\]', Keyword, '#pop'),
47 include('common'),
48 ]
49 }
50
51 def analyse_text(text):
52 """It's safe to assume that a program which mostly consists of + -
53 and < > is brainfuck."""
54 plus_minus_count = 0
55 greater_less_count = 0
56
57 range_to_check = max(256, len(text))
58
59 for c in text[:range_to_check]:
60 if c == '+' or c == '-':
61 plus_minus_count += 1
62 if c == '<' or c == '>':
63 greater_less_count += 1
64
65 if plus_minus_count > (0.25 * range_to_check):
66 return 1.0
67 if greater_less_count > (0.25 * range_to_check):
68 return 1.0
69
70 result = 0
71 if '[-]' in text:
72 result += 0.5
73
74 return result
75
76
77class BefungeLexer(RegexLexer):
78 """
79 Lexer for the esoteric Befunge language.
80 """
81 name = 'Befunge'
82 url = 'http://en.wikipedia.org/wiki/Befunge'
83 aliases = ['befunge']
84 filenames = ['*.befunge']
85 mimetypes = ['application/x-befunge']
86 version_added = '0.7'
87
88 tokens = {
89 'root': [
90 (r'[0-9a-f]', Number),
91 (r'[+*/%!`-]', Operator), # Traditional math
92 (r'[<>^v?\[\]rxjk]', Name.Variable), # Move, imperatives
93 (r'[:\\$.,n]', Name.Builtin), # Stack ops, imperatives
94 (r'[|_mw]', Keyword),
95 (r'[{}]', Name.Tag), # Befunge-98 stack ops
96 (r'".*?"', String.Double), # Strings don't appear to allow escapes
97 (r'\'.', String.Single), # Single character
98 (r'[#;]', Comment), # Trampoline... depends on direction hit
99 (r'[pg&~=@iotsy]', Keyword), # Misc
100 (r'[()A-Z]', Comment), # Fingerprints
101 (r'\s+', Whitespace), # Whitespace doesn't matter
102 ],
103 }
104
105
106class CAmkESLexer(RegexLexer):
107 """
108 Basic lexer for the input language for the CAmkES component platform.
109 """
110 name = 'CAmkES'
111 url = 'https://sel4.systems/CAmkES/'
112 aliases = ['camkes', 'idl4']
113 filenames = ['*.camkes', '*.idl4']
114 version_added = '2.1'
115
116 tokens = {
117 'root': [
118 # C pre-processor directive
119 (r'^(\s*)(#.*)(\n)', bygroups(Whitespace, Comment.Preproc,
120 Whitespace)),
121
122 # Whitespace, comments
123 (r'\s+', Whitespace),
124 (r'/\*(.|\n)*?\*/', Comment),
125 (r'//.*$', Comment),
126
127 (r'[\[(){},.;\]]', Punctuation),
128 (r'[~!%^&*+=|?:<>/-]', Operator),
129
130 (words(('assembly', 'attribute', 'component', 'composition',
131 'configuration', 'connection', 'connector', 'consumes',
132 'control', 'dataport', 'Dataport', 'Dataports', 'emits',
133 'event', 'Event', 'Events', 'export', 'from', 'group',
134 'hardware', 'has', 'interface', 'Interface', 'maybe',
135 'procedure', 'Procedure', 'Procedures', 'provides',
136 'template', 'thread', 'threads', 'to', 'uses', 'with'),
137 suffix=r'\b'), Keyword),
138
139 (words(('bool', 'boolean', 'Buf', 'char', 'character', 'double',
140 'float', 'in', 'inout', 'int', 'int16_6', 'int32_t',
141 'int64_t', 'int8_t', 'integer', 'mutex', 'out', 'real',
142 'refin', 'semaphore', 'signed', 'string', 'struct',
143 'uint16_t', 'uint32_t', 'uint64_t', 'uint8_t', 'uintptr_t',
144 'unsigned', 'void'),
145 suffix=r'\b'), Keyword.Type),
146
147 # Recognised attributes
148 (r'[a-zA-Z_]\w*_(priority|domain|buffer)', Keyword.Reserved),
149 (words(('dma_pool', 'from_access', 'to_access'), suffix=r'\b'),
150 Keyword.Reserved),
151
152 # CAmkES-level include
153 (r'(import)(\s+)((?:<[^>]*>|"[^"]*");)',
154 bygroups(Comment.Preproc, Whitespace, Comment.Preproc)),
155
156 # C-level include
157 (r'(include)(\s+)((?:<[^>]*>|"[^"]*");)',
158 bygroups(Comment.Preproc, Whitespace, Comment.Preproc)),
159
160 # Literals
161 (r'0[xX][\da-fA-F]+', Number.Hex),
162 (r'-?[\d]+', Number),
163 (r'-?[\d]+\.[\d]+', Number.Float),
164 (r'"[^"]*"', String),
165 (r'[Tt]rue|[Ff]alse', Name.Builtin),
166
167 # Identifiers
168 (r'[a-zA-Z_]\w*', Name),
169 ],
170 }
171
172
173class CapDLLexer(RegexLexer):
174 """
175 Basic lexer for CapDL.
176
177 The source of the primary tool that reads such specifications is available
178 at https://github.com/seL4/capdl/tree/master/capDL-tool. Note that this
179 lexer only supports a subset of the grammar. For example, identifiers can
180 shadow type names, but these instances are currently incorrectly
181 highlighted as types. Supporting this would need a stateful lexer that is
182 considered unnecessarily complex for now.
183 """
184 name = 'CapDL'
185 url = 'https://ssrg.nicta.com.au/publications/nictaabstracts/Kuz_KLW_10.abstract.pml'
186 aliases = ['capdl']
187 filenames = ['*.cdl']
188 version_added = '2.2'
189
190 tokens = {
191 'root': [
192 # C pre-processor directive
193 (r'^(\s*)(#.*)(\n)',
194 bygroups(Whitespace, Comment.Preproc, Whitespace)),
195
196 # Whitespace, comments
197 (r'\s+', Whitespace),
198 (r'/\*(.|\n)*?\*/', Comment),
199 (r'(//|--).*$', Comment),
200
201 (r'[<>\[(){},:;=\]]', Punctuation),
202 (r'\.\.', Punctuation),
203
204 (words(('arch', 'arm11', 'caps', 'child_of', 'ia32', 'irq', 'maps',
205 'objects'), suffix=r'\b'), Keyword),
206
207 (words(('aep', 'asid_pool', 'cnode', 'ep', 'frame', 'io_device',
208 'io_ports', 'io_pt', 'notification', 'pd', 'pt', 'tcb',
209 'ut', 'vcpu'), suffix=r'\b'), Keyword.Type),
210
211 # Properties
212 (words(('asid', 'addr', 'badge', 'cached', 'dom', 'domainID', 'elf',
213 'fault_ep', 'G', 'guard', 'guard_size', 'init', 'ip',
214 'prio', 'sp', 'R', 'RG', 'RX', 'RW', 'RWG', 'RWX', 'W',
215 'WG', 'WX', 'level', 'masked', 'master_reply', 'paddr',
216 'ports', 'reply', 'uncached'), suffix=r'\b'),
217 Keyword.Reserved),
218
219 # Literals
220 (r'0[xX][\da-fA-F]+', Number.Hex),
221 (r'\d+(\.\d+)?(k|M)?', Number),
222 (words(('bits',), suffix=r'\b'), Number),
223 (words(('cspace', 'vspace', 'reply_slot', 'caller_slot',
224 'ipc_buffer_slot'), suffix=r'\b'), Number),
225
226 # Identifiers
227 (r'[a-zA-Z_][-@\.\w]*', Name),
228 ],
229 }
230
231
232class RedcodeLexer(RegexLexer):
233 """
234 A simple Redcode lexer based on ICWS'94.
235 Contributed by Adam Blinkinsop <blinks@acm.org>.
236 """
237 name = 'Redcode'
238 aliases = ['redcode']
239 filenames = ['*.cw']
240 url = 'https://en.wikipedia.org/wiki/Core_War'
241 version_added = '0.8'
242
243 opcodes = ('DAT', 'MOV', 'ADD', 'SUB', 'MUL', 'DIV', 'MOD',
244 'JMP', 'JMZ', 'JMN', 'DJN', 'CMP', 'SLT', 'SPL',
245 'ORG', 'EQU', 'END')
246 modifiers = ('A', 'B', 'AB', 'BA', 'F', 'X', 'I')
247
248 tokens = {
249 'root': [
250 # Whitespace:
251 (r'\s+', Whitespace),
252 (r';.*$', Comment.Single),
253 # Lexemes:
254 # Identifiers
255 (r'\b({})\b'.format('|'.join(opcodes)), Name.Function),
256 (r'\b({})\b'.format('|'.join(modifiers)), Name.Decorator),
257 (r'[A-Za-z_]\w+', Name),
258 # Operators
259 (r'[-+*/%]', Operator),
260 (r'[#$@<>]', Operator), # mode
261 (r'[.,]', Punctuation), # mode
262 # Numbers
263 (r'[-+]?\d+', Number.Integer),
264 ],
265 }
266
267
268class AheuiLexer(RegexLexer):
269 """
270 Aheui is esoteric language based on Korean alphabets.
271 """
272
273 name = 'Aheui'
274 url = 'http://aheui.github.io/'
275 aliases = ['aheui']
276 filenames = ['*.aheui']
277 version_added = ''
278
279 tokens = {
280 'root': [
281 ('['
282 '나-낳냐-냫너-넣녀-녛노-놓뇨-눟뉴-닇'
283 '다-닿댜-댷더-덯뎌-뎧도-돟됴-둫듀-딓'
284 '따-땋땨-떃떠-떻뗘-뗳또-똫뚀-뚷뜌-띟'
285 '라-랗랴-럏러-렇려-렿로-롷료-뤃류-릫'
286 '마-맣먀-먛머-멓며-몋모-뫃묘-뭏뮤-믷'
287 '바-밯뱌-뱧버-벟벼-볗보-봏뵤-붛뷰-빃'
288 '빠-빻뺘-뺳뻐-뻫뼈-뼣뽀-뽛뾰-뿧쀼-삏'
289 '사-샇샤-샿서-섷셔-셯소-솧쇼-숳슈-싛'
290 '싸-쌓쌰-썋써-쎃쎠-쎻쏘-쏳쑈-쑿쓔-씧'
291 '자-잫쟈-쟣저-젛져-졓조-좋죠-줗쥬-즿'
292 '차-챃챠-챻처-첳쳐-쳫초-촣쵸-춯츄-칗'
293 '카-캏캬-컇커-컿켜-켷코-콯쿄-쿻큐-킣'
294 '타-탛탸-턓터-텋텨-톃토-톻툐-퉇튜-틯'
295 '파-팧퍄-퍟퍼-펗펴-폏포-퐇표-풓퓨-픻'
296 '하-핳햐-햫허-헣혀-혛호-홓효-훟휴-힇'
297 ']', Operator),
298 ('.', Comment),
299 ],
300 }