Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/debug/lib/source_utils.py: 17%
132 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
1# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Classes and functions that help to inspect Python source w.r.t. TF graphs."""
17import collections
18import os
19import re
20import zipfile
22import absl
23import numpy as np
25from tensorflow.python.debug.lib import profiling
28_TENSORFLOW_BASEDIR = os.path.dirname(
29 os.path.dirname(os.path.dirname(os.path.dirname(
30 os.path.normpath(os.path.abspath(__file__))))))
32_ABSL_BASEDIR = os.path.dirname(absl.__file__)
35UNCOMPILED_SOURCE_SUFFIXES = (".py")
36COMPILED_SOURCE_SUFFIXES = (".pyc", ".pyo")
39def _norm_abs_path(file_path):
40 return os.path.normpath(os.path.abspath(file_path))
43def is_extension_uncompiled_python_source(file_path):
44 _, extension = os.path.splitext(file_path)
45 return extension.lower() in UNCOMPILED_SOURCE_SUFFIXES
48def is_extension_compiled_python_source(file_path):
49 _, extension = os.path.splitext(file_path)
50 return extension.lower() in COMPILED_SOURCE_SUFFIXES
53def _convert_watch_key_to_tensor_name(watch_key):
54 return watch_key[:watch_key.rfind(":")]
57def guess_is_tensorflow_py_library(py_file_path):
58 """Guess whether a Python source file is a part of the tensorflow library.
60 Special cases:
61 1) Returns False for unit-test files in the library (*_test.py),
62 2) Returns False for files under python/debug/examples.
64 Args:
65 py_file_path: full path of the Python source file in question.
67 Returns:
68 (`bool`) Whether the file is inferred to be a part of the tensorflow
69 library.
70 """
71 if (not is_extension_uncompiled_python_source(py_file_path) and
72 not is_extension_compiled_python_source(py_file_path)):
73 return False
74 py_file_path = _norm_abs_path(py_file_path)
75 return ((py_file_path.startswith(_TENSORFLOW_BASEDIR) or
76 py_file_path.startswith(_ABSL_BASEDIR)) and
77 not py_file_path.endswith("_test.py") and
78 (os.path.normpath("tensorflow/python/debug/examples") not in
79 os.path.normpath(py_file_path)))
82def load_source(source_file_path):
83 """Load the content of a Python source code file.
85 This function covers the following case:
86 1. source_file_path points to an existing Python (.py) file on the
87 file system.
88 2. source_file_path is a path within a .par file (i.e., a zip-compressed,
89 self-contained Python executable).
91 Args:
92 source_file_path: Path to the Python source file to read.
94 Returns:
95 A length-2 tuple:
96 - Lines of the source file, as a `list` of `str`s.
97 - The width of the string needed to show the line number in the file.
98 This is calculated based on the number of lines in the source file.
100 Raises:
101 IOError: if loading is unsuccessful.
102 """
103 if os.path.isfile(source_file_path):
104 with open(source_file_path, "rb") as f:
105 source_text = f.read().decode("utf-8")
106 source_lines = source_text.split("\n")
107 else:
108 # One possible reason why the file doesn't exist is that it's a path
109 # inside a .par file. Try that possibility.
110 source_lines = _try_load_par_source(source_file_path)
111 if source_lines is None:
112 raise IOError(
113 "Source path neither exists nor can be loaded as a .par file: %s" %
114 source_file_path)
115 line_num_width = int(np.ceil(np.log10(len(source_lines)))) + 3
116 return source_lines, line_num_width
119def _try_load_par_source(source_file_path):
120 """Try loading the source code inside a .par file.
122 A .par file is a zip-compressed, self-contained Python executable.
123 It contains the content of individual Python source files that can
124 be read only through extracting from the zip file.
126 Args:
127 source_file_path: The full path to the file inside the .par file. This
128 path should include the path to the .par file itself, followed by the
129 intra-par path, e.g.,
130 "/tmp/my_executable.par/org-tensorflow/tensorflow/python/foo/bar.py".
132 Returns:
133 If successful, lines of the source file as a `list` of `str`s.
134 Else, `None`.
135 """
136 prefix_path = source_file_path
137 while True:
138 prefix_path, basename = os.path.split(prefix_path)
139 if not basename:
140 break
141 suffix_path = os.path.normpath(
142 os.path.relpath(source_file_path, start=prefix_path))
143 if prefix_path.endswith(".par") and os.path.isfile(prefix_path):
144 with zipfile.ZipFile(prefix_path) as z:
145 norm_names = [os.path.normpath(name) for name in z.namelist()]
146 if suffix_path in norm_names:
147 with z.open(z.namelist()[norm_names.index(suffix_path)]) as zf:
148 source_text = zf.read().decode("utf-8")
149 return source_text.split("\n")
152def annotate_source(dump,
153 source_file_path,
154 do_dumped_tensors=False,
155 file_stack_top=False,
156 min_line=None,
157 max_line=None):
158 """Annotate a Python source file with a list of ops created at each line.
160 (The annotation doesn't change the source file itself.)
162 Args:
163 dump: (`DebugDumpDir`) A `DebugDumpDir` object of which the Python graph
164 has been loaded.
165 source_file_path: (`str`) Path to the source file being annotated.
166 do_dumped_tensors: (`str`) Whether dumped Tensors, instead of ops are to be
167 used to annotate the source file.
168 file_stack_top: (`bool`) Whether only the top stack trace in the
169 specified source file is to be annotated.
170 min_line: (`None` or `int`) The 1-based line to start annotate the source
171 file from (inclusive).
172 max_line: (`None` or `int`) The 1-based line number to end the annotation
173 at (exclusive).
175 Returns:
176 A `dict` mapping 1-based line number to a list of op name(s) created at
177 that line, or tensor names if `do_dumped_tensors` is True.
179 Raises:
180 ValueError: If the dump object does not have a Python graph set.
181 """
183 py_graph = dump.python_graph
184 if not py_graph:
185 raise ValueError("Cannot perform source annotation due to a lack of set "
186 "Python graph in the dump object")
188 source_file_path = _norm_abs_path(source_file_path)
190 line_to_op_names = {}
191 for op in py_graph.get_operations():
192 for file_path, line_number, _, _ in reversed(dump.node_traceback(op.name)):
193 if (min_line is not None and line_number < min_line or
194 max_line is not None and line_number >= max_line):
195 continue
197 if _norm_abs_path(file_path) != source_file_path:
198 continue
200 if do_dumped_tensors:
201 watch_keys = dump.debug_watch_keys(op.name)
202 # Convert watch keys to unique Tensor names.
203 items_to_append = list(
204 set(map(_convert_watch_key_to_tensor_name, watch_keys)))
205 else:
206 items_to_append = [op.name]
208 if line_number in line_to_op_names:
209 line_to_op_names[line_number].extend(items_to_append)
210 else:
211 line_to_op_names[line_number] = items_to_append
213 if file_stack_top:
214 break
216 return line_to_op_names
219def list_source_files_against_dump(dump,
220 path_regex_allowlist=None,
221 node_name_regex_allowlist=None):
222 """Generate a list of source files with information regarding ops and tensors.
224 Args:
225 dump: (`DebugDumpDir`) A `DebugDumpDir` object of which the Python graph
226 has been loaded.
227 path_regex_allowlist: A regular-expression filter for source file path.
228 node_name_regex_allowlist: A regular-expression filter for node names.
230 Returns:
231 A list of tuples regarding the Python source files involved in constructing
232 the ops and tensors contained in `dump`. Each tuple is:
233 (source_file_path, is_tf_library, num_nodes, num_tensors, num_dumps,
234 first_line)
236 is_tf_library: (`bool`) A guess of whether the file belongs to the
237 TensorFlow Python library.
238 num_nodes: How many nodes were created by lines of this source file.
239 These include nodes with dumps and those without.
240 num_tensors: How many Tensors were created by lines of this source file.
241 These include Tensors with dumps and those without.
242 num_dumps: How many debug Tensor dumps were from nodes (and Tensors)
243 that were created by this source file.
244 first_line: The first line number (1-based) that created any nodes or
245 Tensors in this source file.
247 The list is sorted by ascending order of source_file_path.
249 Raises:
250 ValueError: If the dump object does not have a Python graph set.
251 """
253 py_graph = dump.python_graph
254 if not py_graph:
255 raise ValueError("Cannot generate source list due to a lack of set "
256 "Python graph in the dump object")
258 path_to_node_names = collections.defaultdict(set)
259 path_to_tensor_names = collections.defaultdict(set)
260 path_to_first_line = {}
261 tensor_name_to_num_dumps = {}
263 path_regex = (
264 re.compile(path_regex_allowlist) if path_regex_allowlist else None)
265 node_name_regex = (
266 re.compile(node_name_regex_allowlist)
267 if node_name_regex_allowlist else None)
269 to_skip_file_paths = set()
270 for op in py_graph.get_operations():
271 if node_name_regex and not node_name_regex.match(op.name):
272 continue
274 for file_path, line_number, _, _ in dump.node_traceback(op.name):
275 file_path = _norm_abs_path(file_path)
276 if (file_path in to_skip_file_paths or
277 path_regex and not path_regex.match(file_path) or
278 not os.path.isfile(file_path)):
279 to_skip_file_paths.add(file_path)
280 continue
282 path_to_node_names[file_path].add(op.name)
283 if file_path in path_to_first_line:
284 if path_to_first_line[file_path] > line_number:
285 path_to_first_line[file_path] = line_number
286 else:
287 path_to_first_line[file_path] = line_number
289 for output_tensor in op.outputs:
290 tensor_name = output_tensor.name
291 path_to_tensor_names[file_path].add(tensor_name)
293 watch_keys = dump.debug_watch_keys(op.name)
294 for watch_key in watch_keys:
295 node_name, output_slot, debug_op = watch_key.split(":")
296 tensor_name = "%s:%s" % (node_name, output_slot)
297 if tensor_name not in tensor_name_to_num_dumps:
298 tensor_name_to_num_dumps[tensor_name] = len(
299 dump.get_tensors(node_name, int(output_slot), debug_op))
301 path_to_num_dumps = {}
302 for path in path_to_tensor_names:
303 path_to_num_dumps[path] = sum(
304 tensor_name_to_num_dumps.get(tensor_name, 0)
305 for tensor_name in path_to_tensor_names[path])
307 output = []
308 for file_path in path_to_node_names:
309 output.append((
310 file_path,
311 guess_is_tensorflow_py_library(file_path),
312 len(path_to_node_names.get(file_path, {})),
313 len(path_to_tensor_names.get(file_path, {})),
314 path_to_num_dumps.get(file_path, 0),
315 path_to_first_line[file_path]))
317 return sorted(output, key=lambda x: x[0])
320def annotate_source_against_profile(profile_data,
321 source_file_path,
322 node_name_filter=None,
323 op_type_filter=None,
324 min_line=None,
325 max_line=None):
326 """Annotate a Python source file with profiling information at each line.
328 (The annotation doesn't change the source file itself.)
330 Args:
331 profile_data: (`list` of `ProfileDatum`) A list of `ProfileDatum`.
332 source_file_path: (`str`) Path to the source file being annotated.
333 node_name_filter: Regular expression to filter by node name.
334 op_type_filter: Regular expression to filter by op type.
335 min_line: (`None` or `int`) The 1-based line to start annotate the source
336 file from (inclusive).
337 max_line: (`None` or `int`) The 1-based line number to end the annotation
338 at (exclusive).
340 Returns:
341 A `dict` mapping 1-based line number to a the namedtuple
342 `profiling.LineOrFuncProfileSummary`.
343 """
345 source_file_path = _norm_abs_path(source_file_path)
347 node_name_regex = re.compile(node_name_filter) if node_name_filter else None
348 op_type_regex = re.compile(op_type_filter) if op_type_filter else None
350 line_to_profile_summary = {}
351 for profile_datum in profile_data:
352 if not profile_datum.file_path:
353 continue
355 if _norm_abs_path(profile_datum.file_path) != source_file_path:
356 continue
358 if (min_line is not None and profile_datum.line_number < min_line or
359 max_line is not None and profile_datum.line_number >= max_line):
360 continue
362 if (node_name_regex and
363 not node_name_regex.match(profile_datum.node_exec_stats.node_name)):
364 continue
366 if op_type_regex and not op_type_regex.match(profile_datum.op_type):
367 continue
369 if profile_datum.line_number not in line_to_profile_summary:
370 line_to_profile_summary[profile_datum.line_number] = (
371 profiling.AggregateProfile(profile_datum))
372 else:
373 line_to_profile_summary[profile_datum.line_number].add(profile_datum)
375 return line_to_profile_summary