Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pygments/lexers/stata.py: 100%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

15 statements  

1""" 

2 pygments.lexers.stata 

3 ~~~~~~~~~~~~~~~~~~~~~ 

4 

5 Lexer for Stata 

6 

7 :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS. 

8 :license: BSD, see LICENSE for details. 

9""" 

10 

11import re 

12from pygments.lexer import RegexLexer, default, include, words 

13from pygments.token import Comment, Keyword, Name, Number, \ 

14 String, Text, Operator 

15 

16from pygments.lexers._stata_builtins import builtins_base, builtins_functions 

17 

18__all__ = ['StataLexer'] 

19 

20 

21class StataLexer(RegexLexer): 

22 """ 

23 For Stata do files. 

24 """ 

25 # Syntax based on 

26 # - http://fmwww.bc.edu/RePEc/bocode/s/synlightlist.ado 

27 # - https://github.com/isagalaev/highlight.js/blob/master/src/languages/stata.js 

28 # - https://github.com/jpitblado/vim-stata/blob/master/syntax/stata.vim 

29 

30 name = 'Stata' 

31 url = 'http://www.stata.com/' 

32 version_added = '2.2' 

33 aliases = ['stata', 'do'] 

34 filenames = ['*.do', '*.ado'] 

35 mimetypes = ['text/x-stata', 'text/stata', 'application/x-stata'] 

36 flags = re.MULTILINE | re.DOTALL 

37 

38 tokens = { 

39 'root': [ 

40 include('comments'), 

41 include('strings'), 

42 include('macros'), 

43 include('numbers'), 

44 include('keywords'), 

45 include('operators'), 

46 include('format'), 

47 (r'.', Text), 

48 ], 

49 # Comments are a complicated beast in Stata because they can be 

50 # nested and there are a few corner cases with that. See: 

51 # - github.com/kylebarron/language-stata/issues/90 

52 # - statalist.org/forums/forum/general-stata-discussion/general/1448244 

53 'comments': [ 

54 (r'(^//|(?<=\s)//)(?!/)', Comment.Single, 'comments-double-slash'), 

55 (r'^\s*\*', Comment.Single, 'comments-star'), 

56 (r'/\*', Comment.Multiline, 'comments-block'), 

57 (r'(^///|(?<=\s)///)', Comment.Special, 'comments-triple-slash') 

58 ], 

59 'comments-block': [ 

60 (r'/\*', Comment.Multiline, '#push'), 

61 # this ends and restarts a comment block. but need to catch this so 

62 # that it doesn\'t start _another_ level of comment blocks 

63 (r'\*/\*', Comment.Multiline), 

64 (r'(\*/\s+\*(?!/)[^\n]*)|(\*/)', Comment.Multiline, '#pop'), 

65 # Match anything else as a character inside the comment 

66 (r'.', Comment.Multiline), 

67 ], 

68 'comments-star': [ 

69 (r'///.*?\n', Comment.Single, 

70 ('#pop', 'comments-triple-slash')), 

71 (r'(^//|(?<=\s)//)(?!/)', Comment.Single, 

72 ('#pop', 'comments-double-slash')), 

73 (r'/\*', Comment.Multiline, 'comments-block'), 

74 (r'.(?=\n)', Comment.Single, '#pop'), 

75 (r'.', Comment.Single), 

76 ], 

77 'comments-triple-slash': [ 

78 (r'\n', Comment.Special, '#pop'), 

79 # A // breaks out of a comment for the rest of the line 

80 (r'//.*?(?=\n)', Comment.Single, '#pop'), 

81 (r'.', Comment.Special), 

82 ], 

83 'comments-double-slash': [ 

84 (r'\n', Text, '#pop'), 

85 (r'.', Comment.Single), 

86 ], 

87 # `"compound string"' and regular "string"; note the former are 

88 # nested. 

89 'strings': [ 

90 (r'`"', String, 'string-compound'), 

91 (r'(?<!`)"', String, 'string-regular'), 

92 ], 

93 'string-compound': [ 

94 (r'`"', String, '#push'), 

95 (r'"\'', String, '#pop'), 

96 (r'\\\\|\\"|\\\$|\\`|\\\n', String.Escape), 

97 include('macros'), 

98 (r'.', String) 

99 ], 

100 'string-regular': [ 

101 (r'(")(?!\')|(?=\n)', String, '#pop'), 

102 (r'\\\\|\\"|\\\$|\\`|\\\n', String.Escape), 

103 include('macros'), 

104 (r'.', String) 

105 ], 

106 # A local is usually 

107 # `\w{0,31}' 

108 # `:extended macro' 

109 # `=expression' 

110 # `[rsen](results)' 

111 # `(++--)scalar(++--)' 

112 # 

113 # However, there are all sorts of weird rules wrt edge 

114 # cases. Instead of writing 27 exceptions, anything inside 

115 # `' is a local. 

116 # 

117 # A global is more restricted, so we do follow rules. Note only 

118 # locals explicitly enclosed ${} can be nested. 

119 'macros': [ 

120 (r'\$(\{|(?=[$`]))', Name.Variable.Global, 'macro-global-nested'), 

121 (r'\$', Name.Variable.Global, 'macro-global-name'), 

122 (r'`', Name.Variable, 'macro-local'), 

123 ], 

124 'macro-local': [ 

125 (r'`', Name.Variable, '#push'), 

126 (r"'", Name.Variable, '#pop'), 

127 (r'\$(\{|(?=[$`]))', Name.Variable.Global, 'macro-global-nested'), 

128 (r'\$', Name.Variable.Global, 'macro-global-name'), 

129 (r'.', Name.Variable), # fallback 

130 ], 

131 'macro-global-nested': [ 

132 (r'\$(\{|(?=[$`]))', Name.Variable.Global, '#push'), 

133 (r'\}', Name.Variable.Global, '#pop'), 

134 (r'\$', Name.Variable.Global, 'macro-global-name'), 

135 (r'`', Name.Variable, 'macro-local'), 

136 (r'\w', Name.Variable.Global), # fallback 

137 default('#pop'), 

138 ], 

139 'macro-global-name': [ 

140 (r'\$(\{|(?=[$`]))', Name.Variable.Global, 'macro-global-nested', '#pop'), 

141 (r'\$', Name.Variable.Global, 'macro-global-name', '#pop'), 

142 (r'`', Name.Variable, 'macro-local', '#pop'), 

143 (r'\w{1,32}', Name.Variable.Global, '#pop'), 

144 ], 

145 # Built in functions and statements 

146 'keywords': [ 

147 (words(builtins_functions, prefix = r'\b', suffix = r'(?=\()'), 

148 Name.Function), 

149 (words(builtins_base, prefix = r'(^\s*|\s)', suffix = r'\b'), 

150 Keyword), 

151 ], 

152 # http://www.stata.com/help.cgi?operators 

153 'operators': [ 

154 (r'-|==|<=|>=|<|>|&|!=', Operator), 

155 (r'\*|\+|\^|/|!|~|==|~=', Operator) 

156 ], 

157 # Stata numbers 

158 'numbers': [ 

159 # decimal number 

160 (r'\b[+-]?([0-9]+(\.[0-9]+)?|\.[0-9]+|\.)([eE][+-]?[0-9]+)?[i]?\b', 

161 Number), 

162 ], 

163 # Stata formats 

164 'format': [ 

165 (r'%-?\d{1,2}(\.\d{1,2})?[gfe]c?', Name.Other), 

166 (r'%(21x|16H|16L|8H|8L)', Name.Other), 

167 (r'%-?(tc|tC|td|tw|tm|tq|th|ty|tg)\S{0,32}', Name.Other), 

168 (r'%[-~]?\d{1,4}s', Name.Other), 

169 ] 

170 }