Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/stata.py: 100%

13 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-01 06:54 +0000

1""" 

2 pygments.lexers.stata 

3 ~~~~~~~~~~~~~~~~~~~~~ 

4 

5 Lexer for Stata 

6 

7 :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS. 

8 :license: BSD, see LICENSE for details. 

9""" 

10 

11import re 

12from pygments.lexer import RegexLexer, default, include, words 

13from pygments.token import Comment, Keyword, Name, Number, \ 

14 String, Text, Operator 

15 

16from pygments.lexers._stata_builtins import builtins_base, builtins_functions 

17 

18__all__ = ['StataLexer'] 

19 

20 

21class StataLexer(RegexLexer): 

22 """ 

23 For Stata do files. 

24 

25 .. versionadded:: 2.2 

26 """ 

27 # Syntax based on 

28 # - http://fmwww.bc.edu/RePEc/bocode/s/synlightlist.ado 

29 # - https://github.com/isagalaev/highlight.js/blob/master/src/languages/stata.js 

30 # - https://github.com/jpitblado/vim-stata/blob/master/syntax/stata.vim 

31 

32 name = 'Stata' 

33 url = 'http://www.stata.com/' 

34 aliases = ['stata', 'do'] 

35 filenames = ['*.do', '*.ado'] 

36 mimetypes = ['text/x-stata', 'text/stata', 'application/x-stata'] 

37 flags = re.MULTILINE | re.DOTALL 

38 

39 tokens = { 

40 'root': [ 

41 include('comments'), 

42 include('strings'), 

43 include('macros'), 

44 include('numbers'), 

45 include('keywords'), 

46 include('operators'), 

47 include('format'), 

48 (r'.', Text), 

49 ], 

50 # Comments are a complicated beast in Stata because they can be 

51 # nested and there are a few corner cases with that. See: 

52 # - github.com/kylebarron/language-stata/issues/90 

53 # - statalist.org/forums/forum/general-stata-discussion/general/1448244 

54 'comments': [ 

55 (r'(^//|(?<=\s)//)(?!/)', Comment.Single, 'comments-double-slash'), 

56 (r'^\s*\*', Comment.Single, 'comments-star'), 

57 (r'/\*', Comment.Multiline, 'comments-block'), 

58 (r'(^///|(?<=\s)///)', Comment.Special, 'comments-triple-slash') 

59 ], 

60 'comments-block': [ 

61 (r'/\*', Comment.Multiline, '#push'), 

62 # this ends and restarts a comment block. but need to catch this so 

63 # that it doesn\'t start _another_ level of comment blocks 

64 (r'\*/\*', Comment.Multiline), 

65 (r'(\*/\s+\*(?!/)[^\n]*)|(\*/)', Comment.Multiline, '#pop'), 

66 # Match anything else as a character inside the comment 

67 (r'.', Comment.Multiline), 

68 ], 

69 'comments-star': [ 

70 (r'///.*?\n', Comment.Single, 

71 ('#pop', 'comments-triple-slash')), 

72 (r'(^//|(?<=\s)//)(?!/)', Comment.Single, 

73 ('#pop', 'comments-double-slash')), 

74 (r'/\*', Comment.Multiline, 'comments-block'), 

75 (r'.(?=\n)', Comment.Single, '#pop'), 

76 (r'.', Comment.Single), 

77 ], 

78 'comments-triple-slash': [ 

79 (r'\n', Comment.Special, '#pop'), 

80 # A // breaks out of a comment for the rest of the line 

81 (r'//.*?(?=\n)', Comment.Single, '#pop'), 

82 (r'.', Comment.Special), 

83 ], 

84 'comments-double-slash': [ 

85 (r'\n', Text, '#pop'), 

86 (r'.', Comment.Single), 

87 ], 

88 # `"compound string"' and regular "string"; note the former are 

89 # nested. 

90 'strings': [ 

91 (r'`"', String, 'string-compound'), 

92 (r'(?<!`)"', String, 'string-regular'), 

93 ], 

94 'string-compound': [ 

95 (r'`"', String, '#push'), 

96 (r'"\'', String, '#pop'), 

97 (r'\\\\|\\"|\\\$|\\`|\\\n', String.Escape), 

98 include('macros'), 

99 (r'.', String) 

100 ], 

101 'string-regular': [ 

102 (r'(")(?!\')|(?=\n)', String, '#pop'), 

103 (r'\\\\|\\"|\\\$|\\`|\\\n', String.Escape), 

104 include('macros'), 

105 (r'.', String) 

106 ], 

107 # A local is usually 

108 # `\w{0,31}' 

109 # `:extended macro' 

110 # `=expression' 

111 # `[rsen](results)' 

112 # `(++--)scalar(++--)' 

113 # 

114 # However, there are all sorts of weird rules wrt edge 

115 # cases. Instead of writing 27 exceptions, anything inside 

116 # `' is a local. 

117 # 

118 # A global is more restricted, so we do follow rules. Note only 

119 # locals explicitly enclosed ${} can be nested. 

120 'macros': [ 

121 (r'\$(\{|(?=[$`]))', Name.Variable.Global, 'macro-global-nested'), 

122 (r'\$', Name.Variable.Global, 'macro-global-name'), 

123 (r'`', Name.Variable, 'macro-local'), 

124 ], 

125 'macro-local': [ 

126 (r'`', Name.Variable, '#push'), 

127 (r"'", Name.Variable, '#pop'), 

128 (r'\$(\{|(?=[$`]))', Name.Variable.Global, 'macro-global-nested'), 

129 (r'\$', Name.Variable.Global, 'macro-global-name'), 

130 (r'.', Name.Variable), # fallback 

131 ], 

132 'macro-global-nested': [ 

133 (r'\$(\{|(?=[$`]))', Name.Variable.Global, '#push'), 

134 (r'\}', Name.Variable.Global, '#pop'), 

135 (r'\$', Name.Variable.Global, 'macro-global-name'), 

136 (r'`', Name.Variable, 'macro-local'), 

137 (r'\w', Name.Variable.Global), # fallback 

138 default('#pop'), 

139 ], 

140 'macro-global-name': [ 

141 (r'\$(\{|(?=[$`]))', Name.Variable.Global, 'macro-global-nested', '#pop'), 

142 (r'\$', Name.Variable.Global, 'macro-global-name', '#pop'), 

143 (r'`', Name.Variable, 'macro-local', '#pop'), 

144 (r'\w{1,32}', Name.Variable.Global, '#pop'), 

145 ], 

146 # Built in functions and statements 

147 'keywords': [ 

148 (words(builtins_functions, prefix = r'\b', suffix = r'(?=\()'), 

149 Name.Function), 

150 (words(builtins_base, prefix = r'(^\s*|\s)', suffix = r'\b'), 

151 Keyword), 

152 ], 

153 # http://www.stata.com/help.cgi?operators 

154 'operators': [ 

155 (r'-|==|<=|>=|<|>|&|!=', Operator), 

156 (r'\*|\+|\^|/|!|~|==|~=', Operator) 

157 ], 

158 # Stata numbers 

159 'numbers': [ 

160 # decimal number 

161 (r'\b[+-]?([0-9]+(\.[0-9]+)?|\.[0-9]+|\.)([eE][+-]?[0-9]+)?[i]?\b', 

162 Number), 

163 ], 

164 # Stata formats 

165 'format': [ 

166 (r'%-?\d{1,2}(\.\d{1,2})?[gfe]c?', Name.Other), 

167 (r'%(21x|16H|16L|8H|8L)', Name.Other), 

168 (r'%-?(tc|tC|td|tw|tm|tq|th|ty|tg)\S{0,32}', Name.Other), 

169 (r'%[-~]?\d{1,4}s', Name.Other), 

170 ] 

171 }