1"""
2 pygments.lexers.stata
3 ~~~~~~~~~~~~~~~~~~~~~
4
5 Lexer for Stata
6
7 :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
10
11import re
12from pygments.lexer import RegexLexer, default, include, words
13from pygments.token import Comment, Keyword, Name, Number, \
14 String, Text, Operator
15
16from pygments.lexers._stata_builtins import builtins_base, builtins_functions
17
18__all__ = ['StataLexer']
19
20
21class StataLexer(RegexLexer):
22 """
23 For Stata do files.
24 """
25 # Syntax based on
26 # - http://fmwww.bc.edu/RePEc/bocode/s/synlightlist.ado
27 # - https://github.com/isagalaev/highlight.js/blob/master/src/languages/stata.js
28 # - https://github.com/jpitblado/vim-stata/blob/master/syntax/stata.vim
29
30 name = 'Stata'
31 url = 'http://www.stata.com/'
32 version_added = '2.2'
33 aliases = ['stata', 'do']
34 filenames = ['*.do', '*.ado']
35 mimetypes = ['text/x-stata', 'text/stata', 'application/x-stata']
36 flags = re.MULTILINE | re.DOTALL
37
38 tokens = {
39 'root': [
40 include('comments'),
41 include('strings'),
42 include('macros'),
43 include('numbers'),
44 include('keywords'),
45 include('operators'),
46 include('format'),
47 (r'.', Text),
48 ],
49 # Comments are a complicated beast in Stata because they can be
50 # nested and there are a few corner cases with that. See:
51 # - github.com/kylebarron/language-stata/issues/90
52 # - statalist.org/forums/forum/general-stata-discussion/general/1448244
53 'comments': [
54 (r'(^//|(?<=\s)//)(?!/)', Comment.Single, 'comments-double-slash'),
55 (r'^\s*\*', Comment.Single, 'comments-star'),
56 (r'/\*', Comment.Multiline, 'comments-block'),
57 (r'(^///|(?<=\s)///)', Comment.Special, 'comments-triple-slash')
58 ],
59 'comments-block': [
60 (r'/\*', Comment.Multiline, '#push'),
61 # this ends and restarts a comment block. but need to catch this so
62 # that it doesn\'t start _another_ level of comment blocks
63 (r'\*/\*', Comment.Multiline),
64 (r'(\*/\s+\*(?!/)[^\n]*)|(\*/)', Comment.Multiline, '#pop'),
65 # Match anything else as a character inside the comment
66 (r'.', Comment.Multiline),
67 ],
68 'comments-star': [
69 (r'///.*?\n', Comment.Single,
70 ('#pop', 'comments-triple-slash')),
71 (r'(^//|(?<=\s)//)(?!/)', Comment.Single,
72 ('#pop', 'comments-double-slash')),
73 (r'/\*', Comment.Multiline, 'comments-block'),
74 (r'.(?=\n)', Comment.Single, '#pop'),
75 (r'.', Comment.Single),
76 ],
77 'comments-triple-slash': [
78 (r'\n', Comment.Special, '#pop'),
79 # A // breaks out of a comment for the rest of the line
80 (r'//.*?(?=\n)', Comment.Single, '#pop'),
81 (r'.', Comment.Special),
82 ],
83 'comments-double-slash': [
84 (r'\n', Text, '#pop'),
85 (r'.', Comment.Single),
86 ],
87 # `"compound string"' and regular "string"; note the former are
88 # nested.
89 'strings': [
90 (r'`"', String, 'string-compound'),
91 (r'(?<!`)"', String, 'string-regular'),
92 ],
93 'string-compound': [
94 (r'`"', String, '#push'),
95 (r'"\'', String, '#pop'),
96 (r'\\\\|\\"|\\\$|\\`|\\\n', String.Escape),
97 include('macros'),
98 (r'.', String)
99 ],
100 'string-regular': [
101 (r'(")(?!\')|(?=\n)', String, '#pop'),
102 (r'\\\\|\\"|\\\$|\\`|\\\n', String.Escape),
103 include('macros'),
104 (r'.', String)
105 ],
106 # A local is usually
107 # `\w{0,31}'
108 # `:extended macro'
109 # `=expression'
110 # `[rsen](results)'
111 # `(++--)scalar(++--)'
112 #
113 # However, there are all sorts of weird rules wrt edge
114 # cases. Instead of writing 27 exceptions, anything inside
115 # `' is a local.
116 #
117 # A global is more restricted, so we do follow rules. Note only
118 # locals explicitly enclosed ${} can be nested.
119 'macros': [
120 (r'\$(\{|(?=[$`]))', Name.Variable.Global, 'macro-global-nested'),
121 (r'\$', Name.Variable.Global, 'macro-global-name'),
122 (r'`', Name.Variable, 'macro-local'),
123 ],
124 'macro-local': [
125 (r'`', Name.Variable, '#push'),
126 (r"'", Name.Variable, '#pop'),
127 (r'\$(\{|(?=[$`]))', Name.Variable.Global, 'macro-global-nested'),
128 (r'\$', Name.Variable.Global, 'macro-global-name'),
129 (r'.', Name.Variable), # fallback
130 ],
131 'macro-global-nested': [
132 (r'\$(\{|(?=[$`]))', Name.Variable.Global, '#push'),
133 (r'\}', Name.Variable.Global, '#pop'),
134 (r'\$', Name.Variable.Global, 'macro-global-name'),
135 (r'`', Name.Variable, 'macro-local'),
136 (r'\w', Name.Variable.Global), # fallback
137 default('#pop'),
138 ],
139 'macro-global-name': [
140 (r'\$(\{|(?=[$`]))', Name.Variable.Global, 'macro-global-nested', '#pop'),
141 (r'\$', Name.Variable.Global, 'macro-global-name', '#pop'),
142 (r'`', Name.Variable, 'macro-local', '#pop'),
143 (r'\w{1,32}', Name.Variable.Global, '#pop'),
144 ],
145 # Built in functions and statements
146 'keywords': [
147 (words(builtins_functions, prefix = r'\b', suffix = r'(?=\()'),
148 Name.Function),
149 (words(builtins_base, prefix = r'(^\s*|\s)', suffix = r'\b'),
150 Keyword),
151 ],
152 # http://www.stata.com/help.cgi?operators
153 'operators': [
154 (r'-|==|<=|>=|<|>|&|!=', Operator),
155 (r'\*|\+|\^|/|!|~|==|~=', Operator)
156 ],
157 # Stata numbers
158 'numbers': [
159 # decimal number
160 (r'\b[+-]?([0-9]+(\.[0-9]+)?|\.[0-9]+|\.)([eE][+-]?[0-9]+)?[i]?\b',
161 Number),
162 ],
163 # Stata formats
164 'format': [
165 (r'%-?\d{1,2}(\.\d{1,2})?[gfe]c?', Name.Other),
166 (r'%(21x|16H|16L|8H|8L)', Name.Other),
167 (r'%-?(tc|tC|td|tw|tm|tq|th|ty|tg)\S{0,32}', Name.Other),
168 (r'%[-~]?\d{1,4}s', Name.Other),
169 ]
170 }