1"""
2 pygments.lexers.mojo
3 ~~~~~~~~~~~~~~~~~~~~
4
5 Lexers for Mojo and related languages.
6
7 :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
10
11import keyword
12
13from pygments import unistring as uni
14from pygments.lexer import (
15 RegexLexer,
16 bygroups,
17 combined,
18 default,
19 include,
20 this,
21 using,
22 words,
23)
24from pygments.token import (
25 Comment,
26 # Error,
27 Keyword,
28 Name,
29 Number,
30 Operator,
31 Punctuation,
32 String,
33 Text,
34 Whitespace,
35)
36from pygments.util import shebang_matches
37
38__all__ = ["MojoLexer"]
39
40
41class MojoLexer(RegexLexer):
42 """
43 For Mojo source code (version 24.2.1).
44 """
45
46 name = "Mojo"
47 url = "https://docs.modular.com/mojo/"
48 aliases = ["mojo", "🔥"]
49 filenames = [
50 "*.mojo",
51 "*.🔥",
52 ]
53 mimetypes = [
54 "text/x-mojo",
55 "application/x-mojo",
56 ]
57 version_added = "2.18"
58
59 uni_name = f"[{uni.xid_start}][{uni.xid_continue}]*"
60
61 def innerstring_rules(ttype):
62 return [
63 # the old style '%s' % (...) string formatting (still valid in Py3)
64 (
65 r"%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?"
66 "[hlL]?[E-GXc-giorsaux%]",
67 String.Interpol,
68 ),
69 # the new style '{}'.format(...) string formatting
70 (
71 r"\{"
72 r"((\w+)((\.\w+)|(\[[^\]]+\]))*)?" # field name
73 r"(\![sra])?" # conversion
74 r"(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[E-GXb-gnosx%]?)?"
75 r"\}",
76 String.Interpol,
77 ),
78 # backslashes, quotes and formatting signs must be parsed one at a time
79 (r'[^\\\'"%{\n]+', ttype),
80 (r'[\'"\\]', ttype),
81 # unhandled string formatting sign
82 (r"%|(\{{1,2})", ttype),
83 # newlines are an error (use "nl" state)
84 ]
85
86 def fstring_rules(ttype):
87 return [
88 # Assuming that a '}' is the closing brace after format specifier.
89 # Sadly, this means that we won't detect syntax error. But it's
90 # more important to parse correct syntax correctly, than to
91 # highlight invalid syntax.
92 (r"\}", String.Interpol),
93 (r"\{", String.Interpol, "expr-inside-fstring"),
94 # backslashes, quotes and formatting signs must be parsed one at a time
95 (r'[^\\\'"{}\n]+', ttype),
96 (r'[\'"\\]', ttype),
97 # newlines are an error (use "nl" state)
98 ]
99
100 tokens = {
101 "root": [
102 (r"\s+", Whitespace),
103 (
104 r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")',
105 bygroups(Whitespace, String.Affix, String.Doc),
106 ),
107 (
108 r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')",
109 bygroups(Whitespace, String.Affix, String.Doc),
110 ),
111 (r"\A#!.+$", Comment.Hashbang),
112 (r"#.*$", Comment.Single),
113 (r"\\\n", Whitespace),
114 (r"\\", Whitespace),
115 include("keywords"),
116 include("soft-keywords"),
117 # In the original PR, all the below here used ((?:\s|\\\s)+) to
118 # designate whitespace, but I can't find any example of this being
119 # needed in the example file, so we're replacing it with `\s+`.
120 (
121 r"(alias)(\s+)",
122 bygroups(Keyword, Whitespace),
123 "varname", # TODO varname the right fit?
124 ),
125 (r"(var)(\s+)", bygroups(Keyword, Whitespace), "varname"),
126 (r"(def)(\s+)", bygroups(Keyword, Whitespace), "funcname"),
127 (r"(fn)(\s+)", bygroups(Keyword, Whitespace), "funcname"),
128 (
129 r"(class)(\s+)",
130 bygroups(Keyword, Whitespace),
131 "classname",
132 ), # not implemented yet
133 (r"(struct)(\s+)", bygroups(Keyword, Whitespace), "structname"),
134 (r"(trait)(\s+)", bygroups(Keyword, Whitespace), "structname"),
135 (r"(from)(\s+)", bygroups(Keyword.Namespace, Whitespace), "fromimport"),
136 (r"(import)(\s+)", bygroups(Keyword.Namespace, Whitespace), "import"),
137 include("expr"),
138 ],
139 "expr": [
140 # raw f-strings
141 (
142 '(?i)(rf|fr)(""")',
143 bygroups(String.Affix, String.Double),
144 combined("rfstringescape", "tdqf"),
145 ),
146 (
147 "(?i)(rf|fr)(''')",
148 bygroups(String.Affix, String.Single),
149 combined("rfstringescape", "tsqf"),
150 ),
151 (
152 '(?i)(rf|fr)(")',
153 bygroups(String.Affix, String.Double),
154 combined("rfstringescape", "dqf"),
155 ),
156 (
157 "(?i)(rf|fr)(')",
158 bygroups(String.Affix, String.Single),
159 combined("rfstringescape", "sqf"),
160 ),
161 # non-raw f-strings
162 (
163 '([fF])(""")',
164 bygroups(String.Affix, String.Double),
165 combined("fstringescape", "tdqf"),
166 ),
167 (
168 "([fF])(''')",
169 bygroups(String.Affix, String.Single),
170 combined("fstringescape", "tsqf"),
171 ),
172 (
173 '([fF])(")',
174 bygroups(String.Affix, String.Double),
175 combined("fstringescape", "dqf"),
176 ),
177 (
178 "([fF])(')",
179 bygroups(String.Affix, String.Single),
180 combined("fstringescape", "sqf"),
181 ),
182 # raw bytes and strings
183 ('(?i)(rb|br|r)(""")', bygroups(String.Affix, String.Double), "tdqs"),
184 ("(?i)(rb|br|r)(''')", bygroups(String.Affix, String.Single), "tsqs"),
185 ('(?i)(rb|br|r)(")', bygroups(String.Affix, String.Double), "dqs"),
186 ("(?i)(rb|br|r)(')", bygroups(String.Affix, String.Single), "sqs"),
187 # non-raw strings
188 (
189 '([uU]?)(""")',
190 bygroups(String.Affix, String.Double),
191 combined("stringescape", "tdqs"),
192 ),
193 (
194 "([uU]?)(''')",
195 bygroups(String.Affix, String.Single),
196 combined("stringescape", "tsqs"),
197 ),
198 (
199 '([uU]?)(")',
200 bygroups(String.Affix, String.Double),
201 combined("stringescape", "dqs"),
202 ),
203 (
204 "([uU]?)(')",
205 bygroups(String.Affix, String.Single),
206 combined("stringescape", "sqs"),
207 ),
208 # non-raw bytes
209 (
210 '([bB])(""")',
211 bygroups(String.Affix, String.Double),
212 combined("bytesescape", "tdqs"),
213 ),
214 (
215 "([bB])(''')",
216 bygroups(String.Affix, String.Single),
217 combined("bytesescape", "tsqs"),
218 ),
219 (
220 '([bB])(")',
221 bygroups(String.Affix, String.Double),
222 combined("bytesescape", "dqs"),
223 ),
224 (
225 "([bB])(')",
226 bygroups(String.Affix, String.Single),
227 combined("bytesescape", "sqs"),
228 ),
229 (r"[^\S\n]+", Text),
230 include("numbers"),
231 (r"!=|==|<<|>>|:=|[-~+/*%=<>&^|.]", Operator),
232 (r"([]{}:\(\),;[])+", Punctuation),
233 (r"(in|is|and|or|not)\b", Operator.Word),
234 include("expr-keywords"),
235 include("builtins"),
236 include("magicfuncs"),
237 include("magicvars"),
238 include("name"),
239 ],
240 "expr-inside-fstring": [
241 (r"[{([]", Punctuation, "expr-inside-fstring-inner"),
242 # without format specifier
243 (
244 r"(=\s*)?" # debug (https://bugs.python.org/issue36817)
245 r"(\![sraf])?" # conversion
246 r"\}",
247 String.Interpol,
248 "#pop",
249 ),
250 # with format specifier
251 # we'll catch the remaining '}' in the outer scope
252 (
253 r"(=\s*)?" # debug (https://bugs.python.org/issue36817)
254 r"(\![sraf])?" # conversion
255 r":",
256 String.Interpol,
257 "#pop",
258 ),
259 (r"\s+", Whitespace), # allow new lines
260 include("expr"),
261 ],
262 "expr-inside-fstring-inner": [
263 (r"[{([]", Punctuation, "expr-inside-fstring-inner"),
264 (r"[])}]", Punctuation, "#pop"),
265 (r"\s+", Whitespace), # allow new lines
266 include("expr"),
267 ],
268 "expr-keywords": [
269 # Based on https://docs.python.org/3/reference/expressions.html
270 (
271 words(
272 (
273 "async for", # TODO https://docs.modular.com/mojo/roadmap#no-async-for-or-async-with
274 "async with", # TODO https://docs.modular.com/mojo/roadmap#no-async-for-or-async-with
275 "await",
276 "else",
277 "for",
278 "if",
279 "lambda",
280 "yield",
281 "yield from",
282 ),
283 suffix=r"\b",
284 ),
285 Keyword,
286 ),
287 (words(("True", "False", "None"), suffix=r"\b"), Keyword.Constant),
288 ],
289 "keywords": [
290 (
291 words(
292 (
293 "assert",
294 "async",
295 "await",
296 "borrowed",
297 "break",
298 "continue",
299 "del",
300 "elif",
301 "else",
302 "except",
303 "finally",
304 "for",
305 "global",
306 "if",
307 "lambda",
308 "pass",
309 "raise",
310 "nonlocal",
311 "return",
312 "try",
313 "while",
314 "yield",
315 "yield from",
316 "as",
317 "with",
318 ),
319 suffix=r"\b",
320 ),
321 Keyword,
322 ),
323 (words(("True", "False", "None"), suffix=r"\b"), Keyword.Constant),
324 ],
325 "soft-keywords": [
326 # `match`, `case` and `_` soft keywords
327 (
328 r"(^[ \t]*)" # at beginning of line + possible indentation
329 r"(match|case)\b" # a possible keyword
330 r"(?![ \t]*(?:" # not followed by...
331 r"[:,;=^&|@~)\]}]|(?:" + # characters and keywords that mean this isn't
332 # pattern matching (but None/True/False is ok)
333 r"|".join(k for k in keyword.kwlist if k[0].islower())
334 + r")\b))",
335 bygroups(Whitespace, Keyword),
336 "soft-keywords-inner",
337 ),
338 ],
339 "soft-keywords-inner": [
340 # optional `_` keyword
341 (r"(\s+)([^\n_]*)(_\b)", bygroups(Whitespace, using(this), Keyword)),
342 default("#pop"),
343 ],
344 "builtins": [
345 (
346 words(
347 (
348 "__import__",
349 "abs",
350 "aiter",
351 "all",
352 "any",
353 "bin",
354 "bool",
355 "bytearray",
356 "breakpoint",
357 "bytes",
358 "callable",
359 "chr",
360 "classmethod",
361 "compile",
362 "complex",
363 "delattr",
364 "dict",
365 "dir",
366 "divmod",
367 "enumerate",
368 "eval",
369 "filter",
370 "float",
371 "format",
372 "frozenset",
373 "getattr",
374 "globals",
375 "hasattr",
376 "hash",
377 "hex",
378 "id",
379 "input",
380 "int",
381 "isinstance",
382 "issubclass",
383 "iter",
384 "len",
385 "list",
386 "locals",
387 "map",
388 "max",
389 "memoryview",
390 "min",
391 "next",
392 "object",
393 "oct",
394 "open",
395 "ord",
396 "pow",
397 "print",
398 "property",
399 "range",
400 "repr",
401 "reversed",
402 "round",
403 "set",
404 "setattr",
405 "slice",
406 "sorted",
407 "staticmethod",
408 "str",
409 "sum",
410 "super",
411 "tuple",
412 "type",
413 "vars",
414 "zip",
415 # Mojo builtin types: https://docs.modular.com/mojo/stdlib/builtin/
416 "AnyType",
417 "Coroutine",
418 "DType",
419 "Error",
420 "Int",
421 "List",
422 "ListLiteral",
423 "Scalar",
424 "Int8",
425 "UInt8",
426 "Int16",
427 "UInt16",
428 "Int32",
429 "UInt32",
430 "Int64",
431 "UInt64",
432 "BFloat16",
433 "Float16",
434 "Float32",
435 "Float64",
436 "SIMD",
437 "String",
438 "Tensor",
439 "Tuple",
440 "Movable",
441 "Copyable",
442 "CollectionElement",
443 ),
444 prefix=r"(?<!\.)",
445 suffix=r"\b",
446 ),
447 Name.Builtin,
448 ),
449 (r"(?<!\.)(self|Ellipsis|NotImplemented|cls)\b", Name.Builtin.Pseudo),
450 (
451 words(
452 ("Error",),
453 prefix=r"(?<!\.)",
454 suffix=r"\b",
455 ),
456 Name.Exception,
457 ),
458 ],
459 "magicfuncs": [
460 (
461 words(
462 (
463 "__abs__",
464 "__add__",
465 "__aenter__",
466 "__aexit__",
467 "__aiter__",
468 "__and__",
469 "__anext__",
470 "__await__",
471 "__bool__",
472 "__bytes__",
473 "__call__",
474 "__complex__",
475 "__contains__",
476 "__del__",
477 "__delattr__",
478 "__delete__",
479 "__delitem__",
480 "__dir__",
481 "__divmod__",
482 "__enter__",
483 "__eq__",
484 "__exit__",
485 "__float__",
486 "__floordiv__",
487 "__format__",
488 "__ge__",
489 "__get__",
490 "__getattr__",
491 "__getattribute__",
492 "__getitem__",
493 "__gt__",
494 "__hash__",
495 "__iadd__",
496 "__iand__",
497 "__ifloordiv__",
498 "__ilshift__",
499 "__imatmul__",
500 "__imod__",
501 "__imul__",
502 "__index__",
503 "__init__",
504 "__instancecheck__",
505 "__int__",
506 "__invert__",
507 "__ior__",
508 "__ipow__",
509 "__irshift__",
510 "__isub__",
511 "__iter__",
512 "__itruediv__",
513 "__ixor__",
514 "__le__",
515 "__len__",
516 "__length_hint__",
517 "__lshift__",
518 "__lt__",
519 "__matmul__",
520 "__missing__",
521 "__mod__",
522 "__mul__",
523 "__ne__",
524 "__neg__",
525 "__new__",
526 "__next__",
527 "__or__",
528 "__pos__",
529 "__pow__",
530 "__prepare__",
531 "__radd__",
532 "__rand__",
533 "__rdivmod__",
534 "__repr__",
535 "__reversed__",
536 "__rfloordiv__",
537 "__rlshift__",
538 "__rmatmul__",
539 "__rmod__",
540 "__rmul__",
541 "__ror__",
542 "__round__",
543 "__rpow__",
544 "__rrshift__",
545 "__rshift__",
546 "__rsub__",
547 "__rtruediv__",
548 "__rxor__",
549 "__set__",
550 "__setattr__",
551 "__setitem__",
552 "__str__",
553 "__sub__",
554 "__subclasscheck__",
555 "__truediv__",
556 "__xor__",
557 ),
558 suffix=r"\b",
559 ),
560 Name.Function.Magic,
561 ),
562 ],
563 "magicvars": [
564 (
565 words(
566 (
567 "__annotations__",
568 "__bases__",
569 "__class__",
570 "__closure__",
571 "__code__",
572 "__defaults__",
573 "__dict__",
574 "__doc__",
575 "__file__",
576 "__func__",
577 "__globals__",
578 "__kwdefaults__",
579 "__module__",
580 "__mro__",
581 "__name__",
582 "__objclass__",
583 "__qualname__",
584 "__self__",
585 "__slots__",
586 "__weakref__",
587 ),
588 suffix=r"\b",
589 ),
590 Name.Variable.Magic,
591 ),
592 ],
593 "numbers": [
594 (
595 r"(\d(?:_?\d)*\.(?:\d(?:_?\d)*)?|(?:\d(?:_?\d)*)?\.\d(?:_?\d)*)"
596 r"([eE][+-]?\d(?:_?\d)*)?",
597 Number.Float,
598 ),
599 (r"\d(?:_?\d)*[eE][+-]?\d(?:_?\d)*j?", Number.Float),
600 (r"0[oO](?:_?[0-7])+", Number.Oct),
601 (r"0[bB](?:_?[01])+", Number.Bin),
602 (r"0[xX](?:_?[a-fA-F0-9])+", Number.Hex),
603 (r"\d(?:_?\d)*", Number.Integer),
604 ],
605 "name": [
606 (r"@" + uni_name, Name.Decorator),
607 (r"@", Operator), # new matrix multiplication operator
608 (uni_name, Name),
609 ],
610 "varname": [
611 (uni_name, Name.Variable, "#pop"),
612 ],
613 "funcname": [
614 include("magicfuncs"),
615 (uni_name, Name.Function, "#pop"),
616 default("#pop"),
617 ],
618 "classname": [
619 (uni_name, Name.Class, "#pop"),
620 ],
621 "structname": [
622 (uni_name, Name.Struct, "#pop"),
623 ],
624 "import": [
625 (r"(\s+)(as)(\s+)", bygroups(Whitespace, Keyword, Whitespace)),
626 (r"\.", Name.Namespace),
627 (uni_name, Name.Namespace),
628 (r"(\s*)(,)(\s*)", bygroups(Whitespace, Operator, Whitespace)),
629 default("#pop"), # all else: go back
630 ],
631 "fromimport": [
632 (r"(\s+)(import)\b", bygroups(Whitespace, Keyword.Namespace), "#pop"),
633 (r"\.", Name.Namespace),
634 # if None occurs here, it's "raise x from None", since None can
635 # never be a module name
636 (r"None\b", Keyword.Constant, "#pop"),
637 (uni_name, Name.Namespace),
638 default("#pop"),
639 ],
640 "rfstringescape": [
641 (r"\{\{", String.Escape),
642 (r"\}\}", String.Escape),
643 ],
644 "fstringescape": [
645 include("rfstringescape"),
646 include("stringescape"),
647 ],
648 "bytesescape": [
649 (r'\\([\\abfnrtv"\']|\n|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
650 ],
651 "stringescape": [
652 (r"\\(N\{.*?\}|u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8})", String.Escape),
653 include("bytesescape"),
654 ],
655 "fstrings-single": fstring_rules(String.Single),
656 "fstrings-double": fstring_rules(String.Double),
657 "strings-single": innerstring_rules(String.Single),
658 "strings-double": innerstring_rules(String.Double),
659 "dqf": [
660 (r'"', String.Double, "#pop"),
661 (r'\\\\|\\"|\\\n', String.Escape), # included here for raw strings
662 include("fstrings-double"),
663 ],
664 "sqf": [
665 (r"'", String.Single, "#pop"),
666 (r"\\\\|\\'|\\\n", String.Escape), # included here for raw strings
667 include("fstrings-single"),
668 ],
669 "dqs": [
670 (r'"', String.Double, "#pop"),
671 (r'\\\\|\\"|\\\n', String.Escape), # included here for raw strings
672 include("strings-double"),
673 ],
674 "sqs": [
675 (r"'", String.Single, "#pop"),
676 (r"\\\\|\\'|\\\n", String.Escape), # included here for raw strings
677 include("strings-single"),
678 ],
679 "tdqf": [
680 (r'"""', String.Double, "#pop"),
681 include("fstrings-double"),
682 (r"\n", String.Double),
683 ],
684 "tsqf": [
685 (r"'''", String.Single, "#pop"),
686 include("fstrings-single"),
687 (r"\n", String.Single),
688 ],
689 "tdqs": [
690 (r'"""', String.Double, "#pop"),
691 include("strings-double"),
692 (r"\n", String.Double),
693 ],
694 "tsqs": [
695 (r"'''", String.Single, "#pop"),
696 include("strings-single"),
697 (r"\n", String.Single),
698 ],
699 }
700
701 def analyse_text(text):
702 # TODO supported?
703 if shebang_matches(text, r"mojo?"):
704 return 1.0
705 if "import " in text[:1000]:
706 return 0.9
707 return 0