1"""
2 pygments.lexers.typst
3 ~~~~~~~~~~~~~~~~~~~~~
4
5 Lexers for Typst language.
6
7 :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
10
11from pygments.lexer import RegexLexer, words, bygroups, include
12from pygments.token import Comment, Keyword, Name, String, Punctuation, \
13 Whitespace, Generic, Operator, Number, Text
14from pygments.util import get_choice_opt
15
16__all__ = ['TypstLexer']
17
18
19class TypstLexer(RegexLexer):
20 """
21 For Typst code.
22
23 Additional options accepted:
24
25 `start`
26 Specifies the starting state of the lexer (one of 'markup', 'math',
27 'code'). The default is 'markup'.
28 """
29
30 name = 'Typst'
31 aliases = ['typst']
32 filenames = ['*.typ']
33 mimetypes = ['text/x-typst']
34 url = 'https://typst.app'
35 version_added = '2.18'
36
37 MATH_SHORTHANDS = (
38 '[|', '|]', '||', '*', ':=', '::=', '...', '\'', '-', '=:', '!=', '>>',
39 '>=', '>>>', '<<', '<=', '<<<', '->', '|->', '=>', '|=>', '==>',
40 '-->', '~~>', '~>', '>->', '->>', '<-', '<==', '<--', '<~~', '<~',
41 '<-<','<<-','<->','<=>','<==>','<-->', '>', '<', '~', ':', '|'
42 )
43
44 tokens = {
45 'root': [
46 include('markup'),
47 ],
48 # common cases going from math/markup into code mode
49 'into_code': [
50 (words(('#let', '#set', '#show'), suffix=r'\b'), Keyword.Declaration, 'inline_code'),
51 (words(('#import', '#include'), suffix=r'\b'), Keyword.Namespace, 'inline_code'),
52 (words(('#if', '#for', '#while', '#export'), suffix=r'\b'), Keyword.Reserved, 'inline_code'),
53 (r'#\{', Punctuation, 'code'),
54 (r'#\(', Punctuation, 'code'),
55 (r'(#[a-zA-Z_][a-zA-Z0-9_-]*)(\[)', bygroups(Name.Function, Punctuation), 'markup'),
56 (r'(#[a-zA-Z_][a-zA-Z0-9_-]*)(\()', bygroups(Name.Function, Punctuation), 'code'),
57 (words(('#true', '#false', '#none', '#auto'), suffix=r'\b'), Keyword.Constant),
58 (r'#[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable),
59 (r'#0x[0-9a-fA-F]+', Number.Hex),
60 (r'#0b[01]+', Number.Bin),
61 (r'#0o[0-7]+', Number.Oct),
62 (r'#[0-9]+[\.e][0-9]+', Number.Float),
63 (r'#[0-9]+', Number.Integer),
64 ],
65 'markup': [
66 include('comment'),
67 (r'^\s*=+.*$', Generic.Heading),
68 (r'[*][^*]*[*]', Generic.Strong),
69 (r'_[^_]*_', Generic.Emph),
70 (r'\$', Punctuation, 'math'),
71 (r'`[^`]*`', String.Backtick), # inline code
72 (r'^(\s*)(-)(\s+)', bygroups(Whitespace, Punctuation, Whitespace)), # unnumbered list
73 (r'^(\s*)(\+)(\s+)', bygroups(Whitespace, Punctuation, Whitespace)), # numbered list
74 (r'^(\s*)([0-9]+\.)', bygroups(Whitespace, Punctuation)), # numbered list variant
75 (r'^(\s*)(/)(\s+)([^:]+)(:)', bygroups(Whitespace, Punctuation, Whitespace, Name.Variable, Punctuation)), # definitions
76 (r'<[a-zA-Z_][a-zA-Z0-9_-]*>', Name.Label), # label
77 (r'@[a-zA-Z_][a-zA-Z0-9_-]*', Name.Label), # reference
78 (r'\\#', Text), # escaped
79 include('into_code'),
80 (r'```(?:.|\n)*?```', String.Backtick), # code block
81 (r'https?://[0-9a-zA-Z~/%#&=\',;.+?]*', Generic.Emph), # links
82 (words(('---', '\\', '~', '--', '...'), suffix=r'\B'), Punctuation), # special chars shorthand
83 (r'\\\[', Punctuation), # escaped
84 (r'\\\]', Punctuation), # escaped
85 (r'\[', Punctuation, '#push'),
86 (r'\]', Punctuation, '#pop'),
87 (r'[ \t]+\n?|\n', Whitespace),
88 (r'((?![*_$`<@\\#\] ]|https?://).)+', Text),
89 ],
90 'math': [
91 include('comment'),
92 (words(('\\_', '\\^', '\\&')), Text), # escapes
93 (words(('_', '^', '&', ';')), Punctuation),
94 (words(('+', '/', '=') + MATH_SHORTHANDS), Operator),
95 (r'\\', Punctuation), # line break
96 (r'\\\$', Punctuation), # escaped
97 (r'\$', Punctuation, '#pop'), # end of math mode
98 include('into_code'),
99 (r'([a-zA-Z][a-zA-Z0-9-]*)(\s*)(\()', bygroups(Name.Function, Whitespace, Punctuation)),
100 (r'([a-zA-Z][a-zA-Z0-9-]*)(:)', bygroups(Name.Variable, Punctuation)), # named arguments in math functions
101 (r'([a-zA-Z][a-zA-Z0-9-]*)', Name.Variable), # both variables and symbols (_ isn't supported for variables)
102 (r'[0-9]+(\.[0-9]+)?', Number),
103 (r'\.{1,3}|\(|\)|,|\{|\}', Punctuation),
104 (r'"[^"]*"', String.Double),
105 (r'[ \t\n]+', Whitespace),
106 ],
107 'comment': [
108 (r'//.*$', Comment.Single),
109 (r'/[*](.|\n)*?[*]/', Comment.Multiline),
110 ],
111 'code': [
112 include('comment'),
113 (r'\[', Punctuation, 'markup'),
114 (r'\(|\{', Punctuation, 'code'),
115 (r'\)|\}', Punctuation, '#pop'),
116 (r'"[^"]*"', String.Double),
117 (r',|\.{1,2}', Punctuation),
118 (r'=', Operator),
119 (words(('and', 'or', 'not'), suffix=r'\b'), Operator.Word),
120 (r'=>|<=|==|!=|>|<|-=|\+=|\*=|/=|\+|-|\\|\*', Operator), # comparisons
121 (r'([a-zA-Z_][a-zA-Z0-9_-]*)(:)', bygroups(Name.Variable, Punctuation)),
122 (r'([a-zA-Z_][a-zA-Z0-9_-]*)(\()', bygroups(Name.Function, Punctuation), 'code'),
123 (words(('as', 'break', 'export', 'continue', 'else', 'for', 'if',
124 'in', 'return', 'while'), suffix=r'\b'),
125 Keyword.Reserved),
126 (words(('import', 'include'), suffix=r'\b'), Keyword.Namespace),
127 (words(('auto', 'none', 'true', 'false'), suffix=r'\b'), Keyword.Constant),
128 (r'([0-9.]+)(mm|pt|cm|in|em|fr|%)', bygroups(Number, Keyword.Reserved)),
129 (r'0x[0-9a-fA-F]+', Number.Hex),
130 (r'0b[01]+', Number.Bin),
131 (r'0o[0-7]+', Number.Oct),
132 (r'[0-9]+[\.e][0-9]+', Number.Float),
133 (r'[0-9]+', Number.Integer),
134 (words(('let', 'set', 'show'), suffix=r'\b'), Keyword.Declaration),
135 # FIXME: make this work
136 ## (r'(import|include)( *)(")([^"])(")',
137 ## bygroups(Keyword.Reserved, Text, Punctuation, String.Double, Punctuation)),
138 (r'([a-zA-Z_][a-zA-Z0-9_-]*)', Name.Variable),
139 (r'[ \t\n]+', Whitespace),
140 (r':', Punctuation), # from imports like "import a: b" or "show: text.with(..)"
141 ],
142 'inline_code': [
143 (r';\b', Punctuation, '#pop'),
144 (r'\n', Whitespace, '#pop'),
145 include('code'),
146 ],
147 }
148
149 def __init__(self, **options):
150 self.start_state = get_choice_opt(
151 options, 'start', ['markup', 'code', 'math'], 'markup', True)
152
153 RegexLexer.__init__(self, **options)
154
155 def get_tokens_unprocessed(self, text):
156 stack = ['root']
157 if self.start_state != 'markup': # markup is equivalent to root
158 stack.append(self.start_state)
159
160 yield from RegexLexer.get_tokens_unprocessed(self, text, stack)