Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/debug/lib/source

3# Licensed under the Apache License, Version 2.0 (the "License");

4# you may not use this file except in compliance with the License.

5# You may obtain a copy of the License at

7# http://www.apache.org/licenses/LICENSE-2.0

9# Unless required by applicable law or agreed to in writing, software

10# distributed under the License is distributed on an "AS IS" BASIS,

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12# See the License for the specific language governing permissions and

13# limitations under the License.

14# ==============================================================================

15"""Classes and functions that help to inspect Python source w.r.t. TF graphs."""

17import collections

18import os

19import re

20import zipfile

22import absl

23import numpy as np

25from tensorflow.python.debug.lib import profiling

28_TENSORFLOW_BASEDIR = os.path.dirname(

29 os.path.dirname(os.path.dirname(os.path.dirname(

30 os.path.normpath(os.path.abspath(__file__))))))

32_ABSL_BASEDIR = os.path.dirname(absl.__file__)

35UNCOMPILED_SOURCE_SUFFIXES = (".py")

36COMPILED_SOURCE_SUFFIXES = (".pyc", ".pyo")

39def _norm_abs_path(file_path):

40 return os.path.normpath(os.path.abspath(file_path))

43def is_extension_uncompiled_python_source(file_path):

44 _, extension = os.path.splitext(file_path)

45 return extension.lower() in UNCOMPILED_SOURCE_SUFFIXES

48def is_extension_compiled_python_source(file_path):

49 _, extension = os.path.splitext(file_path)

50 return extension.lower() in COMPILED_SOURCE_SUFFIXES

53def _convert_watch_key_to_tensor_name(watch_key):

54 return watch_key[:watch_key.rfind(":")]

57def guess_is_tensorflow_py_library(py_file_path):

58 """Guess whether a Python source file is a part of the tensorflow library.

60 Special cases:

61 1) Returns False for unit-test files in the library (*_test.py),

62 2) Returns False for files under python/debug/examples.

64 Args:

65 py_file_path: full path of the Python source file in question.

67 Returns:

68 (`bool`) Whether the file is inferred to be a part of the tensorflow

69 library.

70 """

71 if (not is_extension_uncompiled_python_source(py_file_path) and

72 not is_extension_compiled_python_source(py_file_path)):

73 return False

74 py_file_path = _norm_abs_path(py_file_path)

75 return ((py_file_path.startswith(_TENSORFLOW_BASEDIR) or

76 py_file_path.startswith(_ABSL_BASEDIR)) and

77 not py_file_path.endswith("_test.py") and

78 (os.path.normpath("tensorflow/python/debug/examples") not in

79 os.path.normpath(py_file_path)))

82def load_source(source_file_path):

83 """Load the content of a Python source code file.

85 This function covers the following case:

86 1. source_file_path points to an existing Python (.py) file on the

87 file system.

88 2. source_file_path is a path within a .par file (i.e., a zip-compressed,

89 self-contained Python executable).

91 Args:

92 source_file_path: Path to the Python source file to read.

94 Returns:

95 A length-2 tuple:

96 - Lines of the source file, as a `list` of `str`s.

97 - The width of the string needed to show the line number in the file.

98 This is calculated based on the number of lines in the source file.

100 Raises:

101 IOError: if loading is unsuccessful.

102 """

103 if os.path.isfile(source_file_path):

104 with open(source_file_path, "rb") as f:

105 source_text = f.read().decode("utf-8")

106 source_lines = source_text.split("\n")

107 else:

108 # One possible reason why the file doesn't exist is that it's a path

109 # inside a .par file. Try that possibility.

110 source_lines = _try_load_par_source(source_file_path)

111 if source_lines is None:

112 raise IOError(

113 "Source path neither exists nor can be loaded as a .par file: %s" %

114 source_file_path)

115 line_num_width = int(np.ceil(np.log10(len(source_lines)))) + 3

116 return source_lines, line_num_width

117

118

119def _try_load_par_source(source_file_path):

120 """Try loading the source code inside a .par file.

121

122 A .par file is a zip-compressed, self-contained Python executable.

123 It contains the content of individual Python source files that can

124 be read only through extracting from the zip file.

125

126 Args:

127 source_file_path: The full path to the file inside the .par file. This

128 path should include the path to the .par file itself, followed by the

129 intra-par path, e.g.,

130 "/tmp/my_executable.par/org-tensorflow/tensorflow/python/foo/bar.py".

131

132 Returns:

133 If successful, lines of the source file as a `list` of `str`s.

134 Else, `None`.

135 """

136 prefix_path = source_file_path

137 while True:

138 prefix_path, basename = os.path.split(prefix_path)

139 if not basename:

140 break

141 suffix_path = os.path.normpath(

142 os.path.relpath(source_file_path, start=prefix_path))

143 if prefix_path.endswith(".par") and os.path.isfile(prefix_path):

144 with zipfile.ZipFile(prefix_path) as z:

145 norm_names = [os.path.normpath(name) for name in z.namelist()]

146 if suffix_path in norm_names:

147 with z.open(z.namelist()[norm_names.index(suffix_path)]) as zf:

148 source_text = zf.read().decode("utf-8")

149 return source_text.split("\n")

150

151

152def annotate_source(dump,

153 source_file_path,

154 do_dumped_tensors=False,

155 file_stack_top=False,

156 min_line=None,

157 max_line=None):

158 """Annotate a Python source file with a list of ops created at each line.

159

160 (The annotation doesn't change the source file itself.)

161

162 Args:

163 dump: (`DebugDumpDir`) A `DebugDumpDir` object of which the Python graph

164 has been loaded.

165 source_file_path: (`str`) Path to the source file being annotated.

166 do_dumped_tensors: (`str`) Whether dumped Tensors, instead of ops are to be

167 used to annotate the source file.

168 file_stack_top: (`bool`) Whether only the top stack trace in the

169 specified source file is to be annotated.

170 min_line: (`None` or `int`) The 1-based line to start annotate the source

171 file from (inclusive).

172 max_line: (`None` or `int`) The 1-based line number to end the annotation

173 at (exclusive).

174

175 Returns:

176 A `dict` mapping 1-based line number to a list of op name(s) created at

177 that line, or tensor names if `do_dumped_tensors` is True.

178

179 Raises:

180 ValueError: If the dump object does not have a Python graph set.

181 """

182

183 py_graph = dump.python_graph

184 if not py_graph:

185 raise ValueError("Cannot perform source annotation due to a lack of set "

186 "Python graph in the dump object")

187

188 source_file_path = _norm_abs_path(source_file_path)

189

190 line_to_op_names = {}

191 for op in py_graph.get_operations():

192 for file_path, line_number, _, _ in reversed(dump.node_traceback(op.name)):

193 if (min_line is not None and line_number < min_line or

194 max_line is not None and line_number >= max_line):

195 continue

196

197 if _norm_abs_path(file_path) != source_file_path:

198 continue

199

200 if do_dumped_tensors:

201 watch_keys = dump.debug_watch_keys(op.name)

202 # Convert watch keys to unique Tensor names.

203 items_to_append = list(

204 set(map(_convert_watch_key_to_tensor_name, watch_keys)))

205 else:

206 items_to_append = [op.name]

207

208 if line_number in line_to_op_names:

209 line_to_op_names[line_number].extend(items_to_append)

210 else:

211 line_to_op_names[line_number] = items_to_append

212

213 if file_stack_top:

214 break

215

216 return line_to_op_names

217

218

219def list_source_files_against_dump(dump,

220 path_regex_allowlist=None,

221 node_name_regex_allowlist=None):

222 """Generate a list of source files with information regarding ops and tensors.

223

224 Args:

225 dump: (`DebugDumpDir`) A `DebugDumpDir` object of which the Python graph

226 has been loaded.

227 path_regex_allowlist: A regular-expression filter for source file path.

228 node_name_regex_allowlist: A regular-expression filter for node names.

229

230 Returns:

231 A list of tuples regarding the Python source files involved in constructing

232 the ops and tensors contained in `dump`. Each tuple is:

233 (source_file_path, is_tf_library, num_nodes, num_tensors, num_dumps,

234 first_line)

235

236 is_tf_library: (`bool`) A guess of whether the file belongs to the

237 TensorFlow Python library.

238 num_nodes: How many nodes were created by lines of this source file.

239 These include nodes with dumps and those without.

240 num_tensors: How many Tensors were created by lines of this source file.

241 These include Tensors with dumps and those without.

242 num_dumps: How many debug Tensor dumps were from nodes (and Tensors)

243 that were created by this source file.

244 first_line: The first line number (1-based) that created any nodes or

245 Tensors in this source file.

246

247 The list is sorted by ascending order of source_file_path.

248

249 Raises:

250 ValueError: If the dump object does not have a Python graph set.

251 """

252

253 py_graph = dump.python_graph

254 if not py_graph:

255 raise ValueError("Cannot generate source list due to a lack of set "

256 "Python graph in the dump object")

257

258 path_to_node_names = collections.defaultdict(set)

259 path_to_tensor_names = collections.defaultdict(set)

260 path_to_first_line = {}

261 tensor_name_to_num_dumps = {}

262

263 path_regex = (

264 re.compile(path_regex_allowlist) if path_regex_allowlist else None)

265 node_name_regex = (

266 re.compile(node_name_regex_allowlist)

267 if node_name_regex_allowlist else None)

268

269 to_skip_file_paths = set()

270 for op in py_graph.get_operations():

271 if node_name_regex and not node_name_regex.match(op.name):

272 continue

273

274 for file_path, line_number, _, _ in dump.node_traceback(op.name):

275 file_path = _norm_abs_path(file_path)

276 if (file_path in to_skip_file_paths or

277 path_regex and not path_regex.match(file_path) or

278 not os.path.isfile(file_path)):

279 to_skip_file_paths.add(file_path)

280 continue

281

282 path_to_node_names[file_path].add(op.name)

283 if file_path in path_to_first_line:

284 if path_to_first_line[file_path] > line_number:

285 path_to_first_line[file_path] = line_number

286 else:

287 path_to_first_line[file_path] = line_number

288

289 for output_tensor in op.outputs:

290 tensor_name = output_tensor.name

291 path_to_tensor_names[file_path].add(tensor_name)

292

293 watch_keys = dump.debug_watch_keys(op.name)

294 for watch_key in watch_keys:

295 node_name, output_slot, debug_op = watch_key.split(":")

296 tensor_name = "%s:%s" % (node_name, output_slot)

297 if tensor_name not in tensor_name_to_num_dumps:

298 tensor_name_to_num_dumps[tensor_name] = len(

299 dump.get_tensors(node_name, int(output_slot), debug_op))

300

301 path_to_num_dumps = {}

302 for path in path_to_tensor_names:

303 path_to_num_dumps[path] = sum(

304 tensor_name_to_num_dumps.get(tensor_name, 0)

305 for tensor_name in path_to_tensor_names[path])

306

307 output = []

308 for file_path in path_to_node_names:

309 output.append((

310 file_path,

311 guess_is_tensorflow_py_library(file_path),

312 len(path_to_node_names.get(file_path, {})),

313 len(path_to_tensor_names.get(file_path, {})),

314 path_to_num_dumps.get(file_path, 0),

315 path_to_first_line[file_path]))

316

317 return sorted(output, key=lambda x: x[0])

318

319

320def annotate_source_against_profile(profile_data,

321 source_file_path,

322 node_name_filter=None,

323 op_type_filter=None,

324 min_line=None,

325 max_line=None):

326 """Annotate a Python source file with profiling information at each line.

327

328 (The annotation doesn't change the source file itself.)

329

330 Args:

331 profile_data: (`list` of `ProfileDatum`) A list of `ProfileDatum`.

332 source_file_path: (`str`) Path to the source file being annotated.

333 node_name_filter: Regular expression to filter by node name.

334 op_type_filter: Regular expression to filter by op type.

335 min_line: (`None` or `int`) The 1-based line to start annotate the source

336 file from (inclusive).

337 max_line: (`None` or `int`) The 1-based line number to end the annotation

338 at (exclusive).

339

340 Returns:

341 A `dict` mapping 1-based line number to a the namedtuple

342 `profiling.LineOrFuncProfileSummary`.

343 """

344

345 source_file_path = _norm_abs_path(source_file_path)

346

347 node_name_regex = re.compile(node_name_filter) if node_name_filter else None

348 op_type_regex = re.compile(op_type_filter) if op_type_filter else None

349

350 line_to_profile_summary = {}

351 for profile_datum in profile_data:

352 if not profile_datum.file_path:

353 continue

354

355 if _norm_abs_path(profile_datum.file_path) != source_file_path:

356 continue

357

358 if (min_line is not None and profile_datum.line_number < min_line or

359 max_line is not None and profile_datum.line_number >= max_line):

360 continue

361

362 if (node_name_regex and

363 not node_name_regex.match(profile_datum.node_exec_stats.node_name)):

364 continue

365

366 if op_type_regex and not op_type_regex.match(profile_datum.op_type):

367 continue

368

369 if profile_datum.line_number not in line_to_profile_summary:

370 line_to_profile_summary[profile_datum.line_number] = (

371 profiling.AggregateProfile(profile_datum))

372 else:

373 line_to_profile_summary[profile_datum.line_number].add(profile_datum)

374

375 return line_to_profile_summary

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/debug/lib/source_utils.py: 17%

132 statements