Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/parso/grammar.py: 72%

1import hashlib

2import os

3from typing import Generic, TypeVar, Union, Dict, Optional, Any

4from pathlib import Path

6from parso._compatibility import is_pypy

7from parso.pgen2 import generate_grammar

8from parso.utils import split_lines, python_bytes_to_unicode, \

9 PythonVersionInfo, parse_version_string

10from parso.python.diff import DiffParser

11from parso.python.tokenize import tokenize_lines, tokenize

12from parso.python.token import PythonTokenTypes

13from parso.cache import parser_cache, load_module, try_to_save_module

14from parso.parser import BaseParser

15from parso.python.parser import Parser as PythonParser

16from parso.python.errors import ErrorFinderConfig

17from parso.python import pep8

18from parso.file_io import FileIO, KnownContentFileIO

19from parso.normalizer import RefactoringNormalizer, NormalizerConfig

21_loaded_grammars: Dict[str, 'Grammar'] = {}

23_NodeT = TypeVar("_NodeT")

26class Grammar(Generic[_NodeT]):

27 """

28 :py:func:`parso.load_grammar` returns instances of this class.

30 Creating custom none-python grammars by calling this is not supported, yet.

32 :param text: A BNF representation of your grammar.

33 """

34 _start_nonterminal: str

35 _error_normalizer_config: Optional[ErrorFinderConfig] = None

36 _token_namespace: Any = None

37 _default_normalizer_config: NormalizerConfig = pep8.PEP8NormalizerConfig()

39 def __init__(self, text: str, *, tokenizer, parser=BaseParser, diff_parser=None):

40 self._pgen_grammar = generate_grammar(

41 text,

42 token_namespace=self._get_token_namespace()

43 )

44 self._parser = parser

45 self._tokenizer = tokenizer

46 self._diff_parser = diff_parser

47 self._hashed = hashlib.sha256(text.encode("utf-8")).hexdigest()

49 def parse(self,

50 code: Union[str, bytes] = None,

51 *,

52 error_recovery=True,

53 path: Union[os.PathLike, str] = None,

54 start_symbol: str = None,

55 cache=False,

56 diff_cache=False,

57 cache_path: Union[os.PathLike, str] = None,

58 file_io: FileIO = None) -> _NodeT:

59 """

60 If you want to parse a Python file you want to start here, most likely.

62 If you need finer grained control over the parsed instance, there will be

63 other ways to access it.

65 :param str code: A unicode or bytes string. When it's not possible to

66 decode bytes to a string, returns a

67 :py:class:`UnicodeDecodeError`.

68 :param bool error_recovery: If enabled, any code will be returned. If

69 it is invalid, it will be returned as an error node. If disabled,

70 you will get a ParseError when encountering syntax errors in your

71 code.

72 :param str start_symbol: The grammar rule (nonterminal) that you want

73 to parse. Only allowed to be used when error_recovery is False.

74 :param str path: The path to the file you want to open. Only needed for caching.

75 :param bool cache: Keeps a copy of the parser tree in RAM and on disk

76 if a path is given. Returns the cached trees if the corresponding

77 files on disk have not changed. Note that this stores pickle files

78 on your file system (e.g. for Linux in ``~/.cache/parso/``).

79 :param bool diff_cache: Diffs the cached python module against the new

80 code and tries to parse only the parts that have changed. Returns

81 the same (changed) module that is found in cache. Using this option

82 requires you to not do anything anymore with the cached modules

83 under that path, because the contents of it might change. This

84 option is still somewhat experimental. If you want stability,

85 please don't use it.

86 :param bool cache_path: If given saves the parso cache in this

87 directory. If not given, defaults to the default cache places on

88 each platform.

90 :return: A subclass of :py:class:`parso.tree.NodeOrLeaf`. Typically a

91 :py:class:`parso.python.tree.Module`.

92 """

93 if code is None and path is None and file_io is None:

94 raise TypeError("Please provide either code or a path.")

96 if isinstance(path, str):

97 path = Path(path)

98 if isinstance(cache_path, str):

99 cache_path = Path(cache_path)

100

101 if start_symbol is None:

102 start_symbol = self._start_nonterminal

103

104 if error_recovery and start_symbol != 'file_input':

105 raise NotImplementedError("This is currently not implemented.")

106

107 if file_io is None:

108 if code is None:

109 file_io = FileIO(path) # type: ignore[arg-type]

110 else:

111 file_io = KnownContentFileIO(path, code)

112

113 if cache and file_io.path is not None:

114 module_node = load_module(self._hashed, file_io, cache_path=cache_path)

115 if module_node is not None:

116 return module_node # type: ignore[no-any-return]

117

118 if code is None:

119 code = file_io.read()

120 code = python_bytes_to_unicode(code)

121

122 lines = split_lines(code, keepends=True)

123 if diff_cache:

124 if self._diff_parser is None:

125 raise TypeError("You have to define a diff parser to be able "

126 "to use this option.")

127 try:

128 module_cache_item = parser_cache[self._hashed][file_io.path]

129 except KeyError:

130 pass

131 else:

132 module_node = module_cache_item.node

133 old_lines = module_cache_item.lines

134 if old_lines == lines:

135 return module_node # type: ignore[no-any-return]

136

137 new_node = self._diff_parser(

138 self._pgen_grammar, self._tokenizer, module_node

139 ).update(

140 old_lines=old_lines,

141 new_lines=lines

142 )

143 try_to_save_module(self._hashed, file_io, new_node, lines,

144 # Never pickle in pypy, it's slow as hell.

145 pickling=cache and not is_pypy,

146 cache_path=cache_path)

147 return new_node # type: ignore[no-any-return]

148

149 tokens = self._tokenizer(lines)

150

151 p = self._parser(

152 self._pgen_grammar,

153 error_recovery=error_recovery,

154 start_nonterminal=start_symbol

155 )

156 root_node = p.parse(tokens=tokens)

157

158 if cache or diff_cache:

159 try_to_save_module(self._hashed, file_io, root_node, lines,

160 # Never pickle in pypy, it's slow as hell.

161 pickling=cache and not is_pypy,

162 cache_path=cache_path)

163 return root_node # type: ignore[no-any-return]

164

165 def _get_token_namespace(self):

166 ns = self._token_namespace

167 if ns is None:

168 raise ValueError("The token namespace should be set.")

169 return ns

170

171 def iter_errors(self, node):

172 """

173 Given a :py:class:`parso.tree.NodeOrLeaf` returns a generator of

174 :py:class:`parso.normalizer.Issue` objects. For Python this is

175 a list of syntax/indentation errors.

176 """

177 if self._error_normalizer_config is None:

178 raise ValueError("No error normalizer specified for this grammar.")

179

180 return self._get_normalizer_issues(node, self._error_normalizer_config)

181

182 def refactor(self, base_node, node_to_str_map):

183 return RefactoringNormalizer(node_to_str_map).walk(base_node)

184

185 def _get_normalizer(self, normalizer_config):

186 if normalizer_config is None:

187 normalizer_config = self._default_normalizer_config

188 if normalizer_config is None:

189 raise ValueError("You need to specify a normalizer, because "

190 "there's no default normalizer for this tree.")

191 return normalizer_config.create_normalizer(self)

192

193 def _normalize(self, node, normalizer_config=None):

194 """

195 TODO this is not public, yet.

196 The returned code will be normalized, e.g. PEP8 for Python.

197 """

198 normalizer = self._get_normalizer(normalizer_config)

199 return normalizer.walk(node)

200

201 def _get_normalizer_issues(self, node, normalizer_config=None):

202 normalizer = self._get_normalizer(normalizer_config)

203 normalizer.walk(node)

204 return normalizer.issues

205

206 def __repr__(self):

207 nonterminals = self._pgen_grammar.nonterminal_to_dfas.keys()

208 txt = ' '.join(list(nonterminals)[:3]) + ' ...'

209 return '<%s:%s>' % (self.__class__.__name__, txt)

210

211

212class PythonGrammar(Grammar):

213 _error_normalizer_config = ErrorFinderConfig()

214 _token_namespace = PythonTokenTypes

215 _start_nonterminal = 'file_input'

216

217 def __init__(self, version_info: PythonVersionInfo, bnf_text: str):

218 super().__init__(

219 bnf_text,

220 tokenizer=self._tokenize_lines,

221 parser=PythonParser,

222 diff_parser=DiffParser

223 )

224 self.version_info = version_info

225

226 def _tokenize_lines(self, lines, **kwargs):

227 return tokenize_lines(lines, version_info=self.version_info, **kwargs)

228

229 def _tokenize(self, code):

230 # Used by Jedi.

231 return tokenize(code, version_info=self.version_info)

232

233

234def load_grammar(*, version: str = None, path: str = None):

235 """

236 Loads a :py:class:`parso.Grammar`. The default version is the current Python

237 version.

238

239 :param str version: A python version string, e.g. ``version='3.8'``.

240 :param str path: A path to a grammar file

241 """

242 version_info = parse_version_string(version)

243

244 file = path or os.path.join(

245 'python',

246 'grammar%s%s.txt' % (version_info.major, version_info.minor)

247 )

248

249 global _loaded_grammars

250 path = os.path.join(os.path.dirname(__file__), file)

251 try:

252 return _loaded_grammars[path]

253 except KeyError:

254 try:

255 with open(path) as f:

256 bnf_text = f.read()

257

258 grammar = PythonGrammar(version_info, bnf_text)

259 return _loaded_grammars.setdefault(path, grammar)

260 except FileNotFoundError:

261 message = "Python version %s.%s is currently not supported." % (

262 version_info.major, version_info.minor

263 )

264 raise NotImplementedError(message)