Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/parso/grammar.py: 72%

127 statements  

« prev     ^ index     » next       coverage.py v7.2.2, created at 2023-03-26 07:36 +0000

1import hashlib 

2import os 

3from typing import Generic, TypeVar, Union, Dict, Optional, Any 

4from pathlib import Path 

5 

6from parso._compatibility import is_pypy 

7from parso.pgen2 import generate_grammar 

8from parso.utils import split_lines, python_bytes_to_unicode, \ 

9 PythonVersionInfo, parse_version_string 

10from parso.python.diff import DiffParser 

11from parso.python.tokenize import tokenize_lines, tokenize 

12from parso.python.token import PythonTokenTypes 

13from parso.cache import parser_cache, load_module, try_to_save_module 

14from parso.parser import BaseParser 

15from parso.python.parser import Parser as PythonParser 

16from parso.python.errors import ErrorFinderConfig 

17from parso.python import pep8 

18from parso.file_io import FileIO, KnownContentFileIO 

19from parso.normalizer import RefactoringNormalizer, NormalizerConfig 

20 

21_loaded_grammars: Dict[str, 'Grammar'] = {} 

22 

23_NodeT = TypeVar("_NodeT") 

24 

25 

26class Grammar(Generic[_NodeT]): 

27 """ 

28 :py:func:`parso.load_grammar` returns instances of this class. 

29 

30 Creating custom none-python grammars by calling this is not supported, yet. 

31 

32 :param text: A BNF representation of your grammar. 

33 """ 

34 _start_nonterminal: str 

35 _error_normalizer_config: Optional[ErrorFinderConfig] = None 

36 _token_namespace: Any = None 

37 _default_normalizer_config: NormalizerConfig = pep8.PEP8NormalizerConfig() 

38 

39 def __init__(self, text: str, *, tokenizer, parser=BaseParser, diff_parser=None): 

40 self._pgen_grammar = generate_grammar( 

41 text, 

42 token_namespace=self._get_token_namespace() 

43 ) 

44 self._parser = parser 

45 self._tokenizer = tokenizer 

46 self._diff_parser = diff_parser 

47 self._hashed = hashlib.sha256(text.encode("utf-8")).hexdigest() 

48 

49 def parse(self, 

50 code: Union[str, bytes] = None, 

51 *, 

52 error_recovery=True, 

53 path: Union[os.PathLike, str] = None, 

54 start_symbol: str = None, 

55 cache=False, 

56 diff_cache=False, 

57 cache_path: Union[os.PathLike, str] = None, 

58 file_io: FileIO = None) -> _NodeT: 

59 """ 

60 If you want to parse a Python file you want to start here, most likely. 

61 

62 If you need finer grained control over the parsed instance, there will be 

63 other ways to access it. 

64 

65 :param str code: A unicode or bytes string. When it's not possible to 

66 decode bytes to a string, returns a 

67 :py:class:`UnicodeDecodeError`. 

68 :param bool error_recovery: If enabled, any code will be returned. If 

69 it is invalid, it will be returned as an error node. If disabled, 

70 you will get a ParseError when encountering syntax errors in your 

71 code. 

72 :param str start_symbol: The grammar rule (nonterminal) that you want 

73 to parse. Only allowed to be used when error_recovery is False. 

74 :param str path: The path to the file you want to open. Only needed for caching. 

75 :param bool cache: Keeps a copy of the parser tree in RAM and on disk 

76 if a path is given. Returns the cached trees if the corresponding 

77 files on disk have not changed. Note that this stores pickle files 

78 on your file system (e.g. for Linux in ``~/.cache/parso/``). 

79 :param bool diff_cache: Diffs the cached python module against the new 

80 code and tries to parse only the parts that have changed. Returns 

81 the same (changed) module that is found in cache. Using this option 

82 requires you to not do anything anymore with the cached modules 

83 under that path, because the contents of it might change. This 

84 option is still somewhat experimental. If you want stability, 

85 please don't use it. 

86 :param bool cache_path: If given saves the parso cache in this 

87 directory. If not given, defaults to the default cache places on 

88 each platform. 

89 

90 :return: A subclass of :py:class:`parso.tree.NodeOrLeaf`. Typically a 

91 :py:class:`parso.python.tree.Module`. 

92 """ 

93 if code is None and path is None and file_io is None: 

94 raise TypeError("Please provide either code or a path.") 

95 

96 if isinstance(path, str): 

97 path = Path(path) 

98 if isinstance(cache_path, str): 

99 cache_path = Path(cache_path) 

100 

101 if start_symbol is None: 

102 start_symbol = self._start_nonterminal 

103 

104 if error_recovery and start_symbol != 'file_input': 

105 raise NotImplementedError("This is currently not implemented.") 

106 

107 if file_io is None: 

108 if code is None: 

109 file_io = FileIO(path) # type: ignore[arg-type] 

110 else: 

111 file_io = KnownContentFileIO(path, code) 

112 

113 if cache and file_io.path is not None: 

114 module_node = load_module(self._hashed, file_io, cache_path=cache_path) 

115 if module_node is not None: 

116 return module_node # type: ignore[no-any-return] 

117 

118 if code is None: 

119 code = file_io.read() 

120 code = python_bytes_to_unicode(code) 

121 

122 lines = split_lines(code, keepends=True) 

123 if diff_cache: 

124 if self._diff_parser is None: 

125 raise TypeError("You have to define a diff parser to be able " 

126 "to use this option.") 

127 try: 

128 module_cache_item = parser_cache[self._hashed][file_io.path] 

129 except KeyError: 

130 pass 

131 else: 

132 module_node = module_cache_item.node 

133 old_lines = module_cache_item.lines 

134 if old_lines == lines: 

135 return module_node # type: ignore[no-any-return] 

136 

137 new_node = self._diff_parser( 

138 self._pgen_grammar, self._tokenizer, module_node 

139 ).update( 

140 old_lines=old_lines, 

141 new_lines=lines 

142 ) 

143 try_to_save_module(self._hashed, file_io, new_node, lines, 

144 # Never pickle in pypy, it's slow as hell. 

145 pickling=cache and not is_pypy, 

146 cache_path=cache_path) 

147 return new_node # type: ignore[no-any-return] 

148 

149 tokens = self._tokenizer(lines) 

150 

151 p = self._parser( 

152 self._pgen_grammar, 

153 error_recovery=error_recovery, 

154 start_nonterminal=start_symbol 

155 ) 

156 root_node = p.parse(tokens=tokens) 

157 

158 if cache or diff_cache: 

159 try_to_save_module(self._hashed, file_io, root_node, lines, 

160 # Never pickle in pypy, it's slow as hell. 

161 pickling=cache and not is_pypy, 

162 cache_path=cache_path) 

163 return root_node # type: ignore[no-any-return] 

164 

165 def _get_token_namespace(self): 

166 ns = self._token_namespace 

167 if ns is None: 

168 raise ValueError("The token namespace should be set.") 

169 return ns 

170 

171 def iter_errors(self, node): 

172 """ 

173 Given a :py:class:`parso.tree.NodeOrLeaf` returns a generator of 

174 :py:class:`parso.normalizer.Issue` objects. For Python this is 

175 a list of syntax/indentation errors. 

176 """ 

177 if self._error_normalizer_config is None: 

178 raise ValueError("No error normalizer specified for this grammar.") 

179 

180 return self._get_normalizer_issues(node, self._error_normalizer_config) 

181 

182 def refactor(self, base_node, node_to_str_map): 

183 return RefactoringNormalizer(node_to_str_map).walk(base_node) 

184 

185 def _get_normalizer(self, normalizer_config): 

186 if normalizer_config is None: 

187 normalizer_config = self._default_normalizer_config 

188 if normalizer_config is None: 

189 raise ValueError("You need to specify a normalizer, because " 

190 "there's no default normalizer for this tree.") 

191 return normalizer_config.create_normalizer(self) 

192 

193 def _normalize(self, node, normalizer_config=None): 

194 """ 

195 TODO this is not public, yet. 

196 The returned code will be normalized, e.g. PEP8 for Python. 

197 """ 

198 normalizer = self._get_normalizer(normalizer_config) 

199 return normalizer.walk(node) 

200 

201 def _get_normalizer_issues(self, node, normalizer_config=None): 

202 normalizer = self._get_normalizer(normalizer_config) 

203 normalizer.walk(node) 

204 return normalizer.issues 

205 

206 def __repr__(self): 

207 nonterminals = self._pgen_grammar.nonterminal_to_dfas.keys() 

208 txt = ' '.join(list(nonterminals)[:3]) + ' ...' 

209 return '<%s:%s>' % (self.__class__.__name__, txt) 

210 

211 

212class PythonGrammar(Grammar): 

213 _error_normalizer_config = ErrorFinderConfig() 

214 _token_namespace = PythonTokenTypes 

215 _start_nonterminal = 'file_input' 

216 

217 def __init__(self, version_info: PythonVersionInfo, bnf_text: str): 

218 super().__init__( 

219 bnf_text, 

220 tokenizer=self._tokenize_lines, 

221 parser=PythonParser, 

222 diff_parser=DiffParser 

223 ) 

224 self.version_info = version_info 

225 

226 def _tokenize_lines(self, lines, **kwargs): 

227 return tokenize_lines(lines, version_info=self.version_info, **kwargs) 

228 

229 def _tokenize(self, code): 

230 # Used by Jedi. 

231 return tokenize(code, version_info=self.version_info) 

232 

233 

234def load_grammar(*, version: str = None, path: str = None): 

235 """ 

236 Loads a :py:class:`parso.Grammar`. The default version is the current Python 

237 version. 

238 

239 :param str version: A python version string, e.g. ``version='3.8'``. 

240 :param str path: A path to a grammar file 

241 """ 

242 version_info = parse_version_string(version) 

243 

244 file = path or os.path.join( 

245 'python', 

246 'grammar%s%s.txt' % (version_info.major, version_info.minor) 

247 ) 

248 

249 global _loaded_grammars 

250 path = os.path.join(os.path.dirname(__file__), file) 

251 try: 

252 return _loaded_grammars[path] 

253 except KeyError: 

254 try: 

255 with open(path) as f: 

256 bnf_text = f.read() 

257 

258 grammar = PythonGrammar(version_info, bnf_text) 

259 return _loaded_grammars.setdefault(path, grammar) 

260 except FileNotFoundError: 

261 message = "Python version %s.%s is currently not supported." % ( 

262 version_info.major, version_info.minor 

263 ) 

264 raise NotImplementedError(message)