Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/jedi/inference/references.py: 15%

1import os

2import re

4from parso import python_bytes_to_unicode

6from jedi.debug import dbg

7from jedi.file_io import KnownContentFileIO, FolderIO

8from jedi.inference.names import SubModuleName

9from jedi.inference.imports import load_module_from_path

10from jedi.inference.filters import ParserTreeFilter

11from jedi.inference.gradual.conversion import convert_names

13_IGNORE_FOLDERS = ('.tox', '.venv', '.mypy_cache', 'venv', '__pycache__')

15_OPENED_FILE_LIMIT = 2000

16"""

17Stats from a 2016 Lenovo Notebook running Linux:

18With os.walk, it takes about 10s to scan 11'000 files (without filesystem

19caching). Once cached it only takes 5s. So it is expected that reading all

20those files might take a few seconds, but not a lot more.

21"""

22_PARSED_FILE_LIMIT = 30

23"""

24For now we keep the amount of parsed files really low, since parsing might take

25easily 100ms for bigger files.

26"""

29def _resolve_names(definition_names, avoid_names=()):

30 for name in definition_names:

31 if name in avoid_names:

32 # Avoiding recursions here, because goto on a module name lands

33 # on the same module.

34 continue

36 if not isinstance(name, SubModuleName):

37 # SubModuleNames are not actually existing names but created

38 # names when importing something like `import foo.bar.baz`.

39 yield name

41 if name.api_type == 'module':

42 yield from _resolve_names(name.goto(), definition_names)

45def _dictionarize(names):

46 return dict(

47 (n if n.tree_name is None else n.tree_name, n)

48 for n in names

49 )

52def _find_defining_names(module_context, tree_name):

53 found_names = _find_names(module_context, tree_name)

55 for name in list(found_names):

56 # Convert from/to stubs, because those might also be usages.

57 found_names |= set(convert_names(

58 [name],

59 only_stubs=not name.get_root_context().is_stub(),

60 prefer_stub_to_compiled=False

61 ))

63 found_names |= set(_find_global_variables(found_names, tree_name.value))

64 for name in list(found_names):

65 if name.api_type == 'param' or name.tree_name is None \

66 or name.tree_name.parent.type == 'trailer':

67 continue

68 found_names |= set(_add_names_in_same_context(name.parent_context, name.string_name))

69 return set(_resolve_names(found_names))

72def _find_names(module_context, tree_name):

73 name = module_context.create_name(tree_name)

74 found_names = set(name.goto())

75 found_names.add(name)

77 return set(_resolve_names(found_names))

80def _add_names_in_same_context(context, string_name):

81 if context.tree_node is None:

82 return

84 until_position = None

85 while True:

86 filter_ = ParserTreeFilter(

87 parent_context=context,

88 until_position=until_position,

89 )

90 names = set(filter_.get(string_name))

91 if not names:

92 break

93 yield from names

94 ordered = sorted(names, key=lambda x: x.start_pos)

95 until_position = ordered[0].start_pos

98def _find_global_variables(names, search_name):

99 for name in names:

100 if name.tree_name is None:

101 continue

102 module_context = name.get_root_context()

103 try:

104 method = module_context.get_global_filter

105 except AttributeError:

106 continue

107 else:

108 for global_name in method().get(search_name):

109 yield global_name

110 c = module_context.create_context(global_name.tree_name)

111 yield from _add_names_in_same_context(c, global_name.string_name)

112

113

114def find_references(module_context, tree_name, only_in_module=False):

115 inf = module_context.inference_state

116 search_name = tree_name.value

117

118 # We disable flow analysis, because if we have ifs that are only true in

119 # certain cases, we want both sides.

120 try:

121 inf.flow_analysis_enabled = False

122 found_names = _find_defining_names(module_context, tree_name)

123 finally:

124 inf.flow_analysis_enabled = True

125

126 found_names_dct = _dictionarize(found_names)

127

128 module_contexts = [module_context]

129 if not only_in_module:

130 for m in set(d.get_root_context() for d in found_names):

131 if m != module_context and m.tree_node is not None \

132 and inf.project.path in m.py__file__().parents:

133 module_contexts.append(m)

134 # For param no search for other modules is necessary.

135 if only_in_module or any(n.api_type == 'param' for n in found_names):

136 potential_modules = module_contexts

137 else:

138 potential_modules = get_module_contexts_containing_name(

139 inf,

140 module_contexts,

141 search_name,

142 )

143

144 non_matching_reference_maps = {}

145 for module_context in potential_modules:

146 for name_leaf in module_context.tree_node.get_used_names().get(search_name, []):

147 new = _dictionarize(_find_names(module_context, name_leaf))

148 if any(tree_name in found_names_dct for tree_name in new):

149 found_names_dct.update(new)

150 for tree_name in new:

151 for dct in non_matching_reference_maps.get(tree_name, []):

152 # A reference that was previously searched for matches

153 # with a now found name. Merge.

154 found_names_dct.update(dct)

155 try:

156 del non_matching_reference_maps[tree_name]

157 except KeyError:

158 pass

159 else:

160 for name in new:

161 non_matching_reference_maps.setdefault(name, []).append(new)

162 result = found_names_dct.values()

163 if only_in_module:

164 return [n for n in result if n.get_root_context() == module_context]

165 return result

166

167

168def _check_fs(inference_state, file_io, regex):

169 try:

170 code = file_io.read()

171 except FileNotFoundError:

172 return None

173 code = python_bytes_to_unicode(code, errors='replace')

174 if not regex.search(code):

175 return None

176 new_file_io = KnownContentFileIO(file_io.path, code)

177 m = load_module_from_path(inference_state, new_file_io)

178 if m.is_compiled():

179 return None

180 return m.as_context()

181

182

183def gitignored_paths(folder_io, file_io):

184 ignored_paths_abs = set()

185 ignored_paths_rel = set()

186

187 for l in file_io.read().splitlines():

188 if not l or l.startswith(b'#') or l.startswith(b'!') or b'*' in l:

189 continue

190

191 p = l.decode('utf-8', 'ignore').rstrip('/')

192 if '/' in p:

193 name = p.lstrip('/')

194 ignored_paths_abs.add(os.path.join(folder_io.path, name))

195 else:

196 name = p

197 ignored_paths_rel.add((folder_io.path, name))

198

199 return ignored_paths_abs, ignored_paths_rel

200

201

202def expand_relative_ignore_paths(folder_io, relative_paths):

203 curr_path = folder_io.path

204 return {os.path.join(curr_path, p[1]) for p in relative_paths if curr_path.startswith(p[0])}

205

206

207def recurse_find_python_folders_and_files(folder_io, except_paths=()):

208 except_paths = set(except_paths)

209 except_paths_relative = set()

210

211 for root_folder_io, folder_ios, file_ios in folder_io.walk():

212 # Delete folders that we don't want to iterate over.

213 for file_io in file_ios:

214 path = file_io.path

215 if path.suffix in ('.py', '.pyi'):

216 if path not in except_paths:

217 yield None, file_io

218

219 if path.name == '.gitignore':

220 ignored_paths_abs, ignored_paths_rel = gitignored_paths(

221 root_folder_io, file_io

222 )

223 except_paths |= ignored_paths_abs

224 except_paths_relative |= ignored_paths_rel

225

226 except_paths_relative_expanded = expand_relative_ignore_paths(

227 root_folder_io, except_paths_relative

228 )

229

230 folder_ios[:] = [

231 folder_io

232 for folder_io in folder_ios

233 if folder_io.path not in except_paths

234 and folder_io.path not in except_paths_relative_expanded

235 and folder_io.get_base_name() not in _IGNORE_FOLDERS

236 ]

237 for folder_io in folder_ios:

238 yield folder_io, None

239

240

241def recurse_find_python_files(folder_io, except_paths=()):

242 for folder_io, file_io in recurse_find_python_folders_and_files(folder_io, except_paths):

243 if file_io is not None:

244 yield file_io

245

246

247def _find_python_files_in_sys_path(inference_state, module_contexts):

248 sys_path = inference_state.get_sys_path()

249 except_paths = set()

250 yielded_paths = [m.py__file__() for m in module_contexts]

251 for module_context in module_contexts:

252 file_io = module_context.get_value().file_io

253 if file_io is None:

254 continue

255

256 folder_io = file_io.get_parent_folder()

257 while True:

258 path = folder_io.path

259 if not any(path.startswith(p) for p in sys_path) or path in except_paths:

260 break

261 for file_io in recurse_find_python_files(folder_io, except_paths):

262 if file_io.path not in yielded_paths:

263 yield file_io

264 except_paths.add(path)

265 folder_io = folder_io.get_parent_folder()

266

267

268def _find_project_modules(inference_state, module_contexts):

269 except_ = [m.py__file__() for m in module_contexts]

270 yield from recurse_find_python_files(FolderIO(inference_state.project.path), except_)

271

272

273def get_module_contexts_containing_name(inference_state, module_contexts, name,

274 limit_reduction=1):

275 """

276 Search a name in the directories of modules.

277

278 :param limit_reduction: Divides the limits on opening/parsing files by this

279 factor.

280 """

281 # Skip non python modules

282 for module_context in module_contexts:

283 if module_context.is_compiled():

284 continue

285 yield module_context

286

287 # Very short names are not searched in other modules for now to avoid lots

288 # of file lookups.

289 if len(name) <= 2:

290 return

291

292 # Currently not used, because there's only `scope=project` and `scope=file`

293 # At the moment there is no such thing as `scope=sys.path`.

294 # file_io_iterator = _find_python_files_in_sys_path(inference_state, module_contexts)

295 file_io_iterator = _find_project_modules(inference_state, module_contexts)

296 yield from search_in_file_ios(inference_state, file_io_iterator, name,

297 limit_reduction=limit_reduction)

298

299

300def search_in_file_ios(inference_state, file_io_iterator, name,

301 limit_reduction=1, complete=False):

302 parse_limit = _PARSED_FILE_LIMIT / limit_reduction

303 open_limit = _OPENED_FILE_LIMIT / limit_reduction

304 file_io_count = 0

305 parsed_file_count = 0

306 regex = re.compile(r'\b' + re.escape(name) + (r'' if complete else r'\b'))

307 for file_io in file_io_iterator:

308 file_io_count += 1

309 m = _check_fs(inference_state, file_io, regex)

310 if m is not None:

311 parsed_file_count += 1

312 yield m

313 if parsed_file_count >= parse_limit:

314 dbg('Hit limit of parsed files: %s', parse_limit)

315 break

316

317 if file_io_count >= open_limit:

318 dbg('Hit limit of opened files: %s', open_limit)

319 break