Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/debug/lib/dumping_callback.py: 21%
312 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
1# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Dumping op callbacks: Enables dump-based features in tfdbg v2."""
17import atexit
18import os
19import re
20import socket
21import threading
22import uuid
24from tensorflow.core.framework import graph_debug_info_pb2
25from tensorflow.core.framework import tensor_pb2
26from tensorflow.core.protobuf import debug_event_pb2
27from tensorflow.python.debug.lib import debug_events_writer
28from tensorflow.python.debug.lib import op_callbacks_common
29from tensorflow.python.debug.lib import source_utils
30from tensorflow.python.eager import function as function_lib
31from tensorflow.python.framework import constant_op
32from tensorflow.python.framework import dtypes
33from tensorflow.python.framework import op_callbacks
34from tensorflow.python.framework import ops
35from tensorflow.python.framework import tensor_util
36from tensorflow.python.ops import array_ops
37from tensorflow.python.ops import gen_debug_ops
38from tensorflow.python.platform import tf_logging as logging
39from tensorflow.python.util import compat
40from tensorflow.python.util import object_identity
41from tensorflow.python.util import tf_stack
42from tensorflow.python.util.tf_export import tf_export
44_state = threading.local()
45DEFAULT_TENSOR_DEBUG_MODE = "NO_TENSOR"
47# pylint:disable=protected-access
48_FUNCTION_PREFIXES = (
49 compat.as_bytes(function_lib._FORWARD_PREFIX),
50 compat.as_bytes(function_lib._BACKWARD_PREFIX),
51 compat.as_bytes(function_lib._INFERENCE_PREFIX))
52# pylint:enable=protected-access
55def is_op_type_function(op_type):
56 return compat.as_bytes(op_type).startswith(_FUNCTION_PREFIXES)
59@ops.RegisterGradient("DebugIdentityV2")
60def _debug_identity_v2_grad(op, dy):
61 """Gradient function for the DebugIdentityV2 op."""
62 del op # Unused
63 return dy
66def _get_tfdbg_run_id():
67 return str(uuid.uuid4())[:8]
70def _get_id():
71 """Get a short unique ID."""
72 return str(uuid.uuid4())
75def _concrete_tensor_to_proto(tensor):
76 return tensor_util.make_tensor_proto(tensor.numpy())
79class _DumpingCallback(object):
80 """An object holding the states surrounding the dumping callback."""
82 def __init__(self,
83 dump_root,
84 tensor_debug_mode,
85 circular_buffer_size,
86 op_regex,
87 tensor_dtypes):
88 self._dump_root = dump_root
89 self._tfdbg_run_id = _get_tfdbg_run_id()
90 self._tensor_debug_mode = tensor_debug_mode
91 self._circular_buffer_size = circular_buffer_size
92 self._op_regex = op_regex
93 self._tensor_dtypes = tensor_dtypes
95 self._hostname = socket.gethostname()
96 # A list of source-file paths.
97 self._source_file_paths = []
98 # A map from stack frame (FileLineCol) to unique ID.
99 self._stack_frame_to_id = dict()
100 # Mapping op context to unique ID.
101 self._context_to_id = dict()
102 self._function_to_graph_id = dict()
103 self._op_type_to_context_id = dict()
104 # Keeps track of counter for symbolic tensors output by in-graph ops.
105 # It is used to make unique names for debugger-generated tensors.
106 self._symbolic_tensor_counter = 0
107 # A map from the names of debugger-generated Identity and DebugIdentityV2
108 # tensors to the names of the original insrumented graph tensors. This is
109 # applicable to v1 graph mode only.
110 self._tensor_aliases = dict()
111 self._source_file_paths_lock = threading.Lock()
112 self._stack_frame_to_id_lock = threading.Lock()
113 self._context_lock = threading.Lock()
114 self._symbolic_tensor_counter_lock = threading.Lock()
115 # A dict mapping Placeholder tensors to their instrumenting debug tensors.
116 # Used only under V1 graph mode, where we can't rely on auto control
117 # dependency to execute the debug tensors and hence need to attach the debug
118 # tensors as control dependencies of the ops that consume the Placeholder.
119 self._placeholder_to_debug_tensor = (
120 object_identity.ObjectIdentityDictionary())
121 self._writer = None
123 def function_callback(self, function):
124 """A callback to be called on creation of Functions."""
125 graph_id = self._get_context_id(function.graph)
126 with self._context_lock:
127 # NOTE(cais): We currently store the function (AtomicFunction)
128 # as keys of this dict, because weakrefs to them sometimes become
129 # unreferenceable by the time the op callback is called. This approach
130 # may cause memory leaks due to the holding of the functions. If that's
131 # the case, calling `tf.debugging.disable_dump_debug_info()` should
132 # cause GC of this object and this dict.
133 self._function_to_graph_id[function] = graph_id
134 return function
136 @property
137 def dump_root(self):
138 return self._dump_root
140 @dump_root.setter
141 def dump_root(self, dump_root):
142 if self._dump_root != dump_root:
143 self._dump_root = dump_root
144 self._writer = None
146 @property
147 def tfdbg_run_id(self):
148 return self._tfdbg_run_id
150 @property
151 def tensor_debug_mode(self):
152 return self._tensor_debug_mode
154 @property
155 def circular_buffer_size(self):
156 return self._circular_buffer_size
158 def get_writer(self):
159 """Get the debug events writer for the currently configured dump root."""
160 if not self._writer:
161 self._writer = debug_events_writer.DebugEventsWriter(
162 self._dump_root,
163 self._tfdbg_run_id,
164 circular_buffer_size=self._circular_buffer_size)
165 return self._writer
167 def _get_context_id(self, context):
168 """Get a unique ID for an op-construction context (e.g., a graph).
170 If the graph has been encountered before, reuse the same unique ID.
171 When encountering a new context (graph), this methods writes a DebugEvent
172 proto with the debugged_graph field to the proper DebugEvent file.
174 Args:
175 context: A context to get the unique ID for. Must be hashable. E.g., a
176 Graph object.
178 Returns:
179 A unique ID for the context.
180 """
181 # Use the double-checked lock pattern to optimize the common case.
182 if context in self._context_to_id: # 1st check, without lock.
183 return self._context_to_id[context]
184 graph_is_new = False
185 with self._context_lock:
186 if context not in self._context_to_id: # 2nd check, with lock.
187 graph_is_new = True
188 context_id = _get_id()
189 self._context_to_id[context] = context_id
190 if graph_is_new:
191 self.get_writer().WriteDebuggedGraph(debug_event_pb2.DebuggedGraph(
192 graph_id=context_id,
193 graph_name=getattr(context, "name", None),
194 outer_context_id=self._get_outer_context_id(context)))
195 return self._context_to_id[context]
197 def _get_outer_context_id(self, graph):
198 """Get the ID of the immediate outer context of the input graph.
200 Args:
201 graph: The graph (context) in question.
203 Returns:
204 If an outer context exists, the immediate outer context name as a string.
205 If such as outer context does not exist (i.e., `graph` is itself
206 outermost), `None`.
207 """
208 if hasattr(graph, "outer_graph") and graph.outer_graph:
209 return self._get_context_id(graph.outer_graph)
210 else:
211 return None
213 def _write_source_file_content(self, file_path):
214 """Send the content of a source file via debug-events writer.
216 Args:
217 file_path: Path to the source file.
219 Returns:
220 An int index for the file.
221 """
222 if file_path in self._source_file_paths:
223 return self._source_file_paths.index(file_path)
224 with self._source_file_paths_lock:
225 if file_path not in self._source_file_paths:
226 lines = None
227 if source_utils.is_extension_uncompiled_python_source(file_path):
228 try:
229 lines, _ = source_utils.load_source(file_path)
230 except IOError as e:
231 logging.warn(
232 "Failed to read source code from path: %s. Reason: %s",
233 file_path, e)
234 writer = self.get_writer()
235 writer.WriteSourceFile(debug_event_pb2.SourceFile(
236 file_path=file_path, host_name=self._hostname, lines=lines))
237 self._source_file_paths.append(file_path)
238 return self._source_file_paths.index(file_path)
240 def _process_stack_frames(self):
241 """Process stack frames.
243 Send the content of source-files, on a best-effort basis.
245 Returns:
246 A list of stack frame IDs.
247 """
248 stack_frames = tf_stack.extract_stack()
249 stack_frame_ids = []
250 writer = None
251 for file_path, lineno, func, _ in stack_frames:
252 abs_path = os.path.abspath(file_path)
253 if (abs_path, lineno, func) in self._stack_frame_to_id:
254 stack_frame_ids.append(
255 self._stack_frame_to_id[(abs_path, lineno, func)])
256 continue
257 with self._stack_frame_to_id_lock:
258 if (abs_path, lineno, func) not in self._stack_frame_to_id:
259 stack_frame_id = _get_id()
260 self._stack_frame_to_id[(abs_path, lineno, func)] = stack_frame_id
261 file_index = self._write_source_file_content(abs_path)
262 file_line_col = graph_debug_info_pb2.GraphDebugInfo.FileLineCol(
263 file_index=file_index, line=lineno, func=func)
264 stack_frame_with_id = debug_event_pb2.StackFrameWithId(
265 id=stack_frame_id, file_line_col=file_line_col)
266 writer = self.get_writer()
267 writer.WriteStackFrameWithId(stack_frame_with_id)
268 stack_frame_ids.append(
269 self._stack_frame_to_id[(abs_path, lineno, func)])
271 code_location = debug_event_pb2.CodeLocation(
272 host_name=self._hostname, stack_frame_ids=stack_frame_ids)
273 return code_location
275 def _process_v1_graph_mode_tensor(self,
276 op_type,
277 tensor,
278 debug_tensor,
279 tensor_debug_mode):
280 """For V1 graph mode, determine what tensor to output from callback.
282 Args:
283 op_type: Type of the op that outputs the original symbolic tensor.
284 tensor: The original output symbolic tensor.
285 debug_tensor: The debugger-instrumented tensor.
286 tensor_debug_mode: Debug mode used, a tfdbg TensorDebugMode enum.
288 Returns:
289 A symbolic tensor to be returned by the dumping op_callback.
290 """
291 # Placeholders need special treatment under V1 graph mode. The
292 # callback can't simply override the Placeholder tensor to a debug tensor,
293 # as that would cause the Placeholder op to lack a value.
294 if op_type in ("Placeholder", "PlaceholderWithDefault"):
295 self._placeholder_to_debug_tensor[tensor] = debug_tensor
296 return tensor
297 else:
298 # TODO(cais): Evaluate performance optimization options. For the
299 # `NO_TENSOR` debug mode, an alternative is to add `debug_tensor` as a
300 # control dependency of `tensor.op` without an additional identity op.
301 if (tensor_debug_mode == debug_event_pb2.TensorDebugMode.FULL_TENSOR and
302 op_type != "Const"):
303 # NOTE(b/153716279): Under v1 graph mode, overriding the output tensor
304 # of Const ops can lead to downstream errors related to shapes. We opt
305 # to use an identity op to avoid this issue at the cost of slightly
306 # larger graph size.
307 self._tensor_aliases[debug_tensor.name] = tensor.name
308 return debug_tensor
309 else:
310 with self._symbolic_tensor_counter_lock:
311 identity_name = "tfdbg_identity_%d" % self._symbolic_tensor_counter
312 identity = array_ops.identity(tensor, name=identity_name)
313 identity.op._add_control_input( # pylint: disable=protected-access
314 debug_tensor.op)
315 self._tensor_aliases[identity.name] = tensor.name
316 return identity
318 def _instrument_symbolic_tensors(self,
319 tensors,
320 op_type,
321 op_name,
322 tfdbg_context_id,
323 tensor_ids):
324 """Add debugging instrumentation for symbolic (i.e., non-eager) tensors.
326 The detailed fashion in which the tensors are instrumented is determined
327 by the tensor_debug_mode configured for the currently enabled dumping
328 callback.
330 Args:
331 tensors: A tuple of Tensors to instrument. It is assumed that their
332 ordering corresponds to the ordering of output tensors of an original
333 op. Output slot indices (0-based) will be generated based on the
334 ordering.
335 op_type: Type name of the op that emits the Tensors (e.g., "MatMul").
336 op_name: Name of the op that emits the Tensors (e.g., "dense_1/MatMul").
337 tfdbg_context_id: A unique ID for the context that the op belongs to
338 (e.g., a graph).
339 tensor_ids: A list of unique ID numbers for the tensors, for tfdbg's
340 internal use.
342 Returns:
343 Non-eager Tensors that override the `tensors` as the output of the op
344 that originally generated `tensors`. In some cases (e.g., non-V1 graph
345 mode), this may be `None`, as the instrumentation can simply rely on
346 automatic control dependencies (see `auto_control_deps.py`) instead of
347 tensor overriding.
348 """
349 tensor_debug_mode = self._tensor_debug_mode
350 debug_urls = ["file://%s" % self._dump_root]
351 is_v1_graph_mode = not ops.executing_eagerly_outside_functions()
352 instrumented_tensors = [] if is_v1_graph_mode else None
353 for output_slot, tensor in enumerate(tensors):
354 with self._symbolic_tensor_counter_lock:
355 debug_identity_name = ("DebugIdentityV2_%d" %
356 self._symbolic_tensor_counter)
357 debug_identity_op_kwargs = {
358 "tfdbg_context_id": tfdbg_context_id,
359 "op_name": op_name,
360 "output_slot": output_slot,
361 "tensor_debug_mode": self._tensor_debug_mode,
362 "debug_urls": debug_urls,
363 "name": debug_identity_name,
364 "circular_buffer_size": self._circular_buffer_size,
365 "tfdbg_run_id": self._tfdbg_run_id,
366 }
367 if tensor_debug_mode == debug_event_pb2.TensorDebugMode.NO_TENSOR:
368 if (not self._should_dump_tensor(op_type, tensor.dtype) or
369 not tensor.dtype.is_numpy_compatible):
370 if is_v1_graph_mode:
371 instrumented_tensors.append(tensor)
372 continue
373 if is_v1_graph_mode and not tensor.dtype.is_numpy_compatible:
374 # Avoid instrumenting Placeholder under is_v1_graph_mode. Doing that
375 # would cause runtime complaint about Placeholders not being fed.
376 instrumented_tensors.append(tensor)
377 continue
378 # Except in V1 graph mode + control flow, debug_identity_v2 triggers
379 # auto control dependency because it's a stateful op.
380 debug_tensor = gen_debug_ops.debug_identity_v2(
381 # Use an empty (shape=[0]) float32 tensor for the NO_TENSOR mode
382 # as a low-overhead placeholder, since no actual tensor value is
383 # traced.
384 constant_op.constant([], dtype=dtypes.float32),
385 **debug_identity_op_kwargs)
386 if is_v1_graph_mode:
387 instrumented_tensors.append(self._process_v1_graph_mode_tensor(
388 op_type, tensor, debug_tensor, tensor_debug_mode))
389 elif tensor_debug_mode in (debug_event_pb2.TensorDebugMode.CURT_HEALTH,
390 debug_event_pb2.TensorDebugMode.CONCISE_HEALTH,
391 debug_event_pb2.TensorDebugMode.FULL_HEALTH,
392 debug_event_pb2.TensorDebugMode.SHAPE):
393 dtype = tensor.dtype
394 dtype_is_dumpable = (
395 tensor_debug_mode in (
396 debug_event_pb2.TensorDebugMode.CURT_HEALTH,
397 debug_event_pb2.TensorDebugMode.CONCISE_HEALTH,
398 debug_event_pb2.TensorDebugMode.FULL_HEALTH) and
399 dtype.is_floating or
400 tensor_debug_mode == debug_event_pb2.TensorDebugMode.SHAPE and
401 (dtype.is_floating or dtype.is_integer or dtype.is_bool))
402 if (not self._should_dump_tensor(op_type, tensor.dtype) or
403 not dtype_is_dumpable):
404 if is_v1_graph_mode:
405 instrumented_tensors.append(tensor)
406 continue
407 debug_tensor = gen_debug_ops.debug_identity_v2(
408 gen_debug_ops.debug_numeric_summary_v2(
409 tensor,
410 tensor_id=tensor_ids[output_slot],
411 tensor_debug_mode=self._tensor_debug_mode,
412 output_dtype=dtypes.float64), **debug_identity_op_kwargs)
413 if is_v1_graph_mode:
414 instrumented_tensors.append(self._process_v1_graph_mode_tensor(
415 op_type, tensor, debug_tensor, tensor_debug_mode))
416 elif tensor_debug_mode == debug_event_pb2.TensorDebugMode.FULL_TENSOR:
417 if (not self._should_dump_tensor(op_type, tensor.dtype) or
418 not tensor.dtype.is_numpy_compatible):
419 # Instrumenting DT_VARIANT and DT_RESOURCE type tensors under
420 # V1 graph mode is known to have issues. TODO(cais): Investigate.
421 if is_v1_graph_mode:
422 instrumented_tensors.append(tensor)
423 continue
424 debug_tensor = gen_debug_ops.debug_identity_v2(
425 tensor, **debug_identity_op_kwargs)
426 if is_v1_graph_mode:
427 instrumented_tensors.append(self._process_v1_graph_mode_tensor(
428 op_type, tensor, debug_tensor, tensor_debug_mode))
429 else:
430 raise NotImplementedError(
431 "Symbolic tensor instrumentation is not implemented for debug mode "
432 "%s" % self._tensor_debug_mode)
433 return instrumented_tensors
435 def _dump_eager_tensors(self,
436 tensors,
437 op_type,
438 input_tensor_ids,
439 output_tensor_device_ids,
440 graph_id=None):
441 """Dump the value of eager tensors.
443 The destination of the dumping is determined by the dump_root of the
444 currently enabled dumping callback. The tensors may be transformed prior to
445 dumping (e.g., reduced as summary statistics such as minimum, maximum and
446 arithmetic mean). The details of this transformation (if any) depends on
447 the tensor_debug_mode of the currently enabled dumping callback.
449 Args:
450 tensors: The EagerTensors whose values are to be dumped, with or without
451 value transform.
452 op_type: Type of the op that generates the tensors, as a string.
453 input_tensor_ids: IDs of the input EagerTensors to the op.
454 output_tensor_device_ids: Debugged-generated IDs for the devices on which
455 the output tensors are allocated, as a `list` of `int`s. Must match
456 `tensors` in length.
457 graph_id: ID of the executed graph, applicable only to eager execution of
458 a FuncGraph.
460 Returns:
461 A tfdbg Execution protocol buffer.
462 """
463 tensor_debug_mode = self._tensor_debug_mode
464 output_tensor_ids = [
465 t._id for t in tensors] # pylint:disable=protected-access
466 assert len(tensors) == len(output_tensor_device_ids)
467 if tensor_debug_mode == debug_event_pb2.TensorDebugMode.NO_TENSOR:
468 return debug_event_pb2.Execution(
469 op_type=op_type,
470 graph_id=graph_id,
471 num_outputs=len(tensors),
472 input_tensor_ids=input_tensor_ids,
473 output_tensor_ids=output_tensor_ids,
474 output_tensor_device_ids=output_tensor_device_ids,
475 tensor_debug_mode=tensor_debug_mode,
476 code_location=self._process_stack_frames())
477 elif tensor_debug_mode in (debug_event_pb2.TensorDebugMode.CURT_HEALTH,
478 debug_event_pb2.TensorDebugMode.CONCISE_HEALTH,
479 debug_event_pb2.TensorDebugMode.FULL_HEALTH,
480 debug_event_pb2.TensorDebugMode.SHAPE,
481 debug_event_pb2.TensorDebugMode.FULL_TENSOR):
482 execution_proto = debug_event_pb2.Execution(
483 op_type=op_type,
484 num_outputs=len(tensors),
485 graph_id=graph_id,
486 input_tensor_ids=input_tensor_ids,
487 output_tensor_ids=output_tensor_ids,
488 output_tensor_device_ids=output_tensor_device_ids,
489 tensor_debug_mode=tensor_debug_mode,
490 code_location=self._process_stack_frames())
491 for tensor in tensors:
492 if (self._should_dump_tensor(op_type, tensor.dtype) and
493 tensor.dtype.is_numpy_compatible):
494 if tensor_debug_mode in (
495 debug_event_pb2.TensorDebugMode.CURT_HEALTH,
496 debug_event_pb2.TensorDebugMode.CONCISE_HEALTH,
497 debug_event_pb2.TensorDebugMode.FULL_HEALTH):
498 if tensor.dtype.is_floating:
499 tensor_proto = _concrete_tensor_to_proto(
500 gen_debug_ops.debug_numeric_summary_v2(
501 tensor,
502 tensor_debug_mode=tensor_debug_mode,
503 output_dtype=dtypes.float64))
504 else:
505 # A placeholder for non-floating-type output tensors.
506 tensor_proto = tensor_pb2.TensorProto()
507 elif tensor_debug_mode == debug_event_pb2.TensorDebugMode.SHAPE:
508 if (tensor.dtype.is_floating or tensor.dtype.is_integer or
509 tensor.dtype.is_bool):
510 tensor_proto = _concrete_tensor_to_proto(
511 gen_debug_ops.debug_numeric_summary_v2(
512 tensor,
513 tensor_debug_mode=tensor_debug_mode,
514 output_dtype=dtypes.float64))
515 else:
516 # A placeholder for non-floating-type output tensors.
517 tensor_proto = tensor_pb2.TensorProto()
518 elif tensor_debug_mode == debug_event_pb2.TensorDebugMode.FULL_TENSOR:
519 tensor_proto = _concrete_tensor_to_proto(tensor)
520 if tensor_proto:
521 execution_proto.tensor_protos.append(tensor_proto)
522 return execution_proto
523 else:
524 raise NotImplementedError(
525 "Tensor instrumentation is not implemented for debug mode %s yet " %
526 self._tensor_debug_mode)
528 def callback(self,
529 op_type,
530 inputs,
531 attrs,
532 outputs,
533 op_name=None,
534 graph=None):
535 """Op callback for tracing (dumping) a TF program's execution."""
536 del attrs # Unused
538 writer = self.get_writer()
539 if graph:
540 is_v1_graph_mode = not ops.executing_eagerly_outside_functions()
541 context_id = self._get_context_id(graph) # Innermost context ID.
542 output_tensor_ids = self._get_symbolic_tensor_ids(len(outputs))
543 if op_type in ("Const", "Placeholder", "PlaceholderWithDefault"):
544 # In some cases, the op name of a Const or Placeholder op in a graph
545 # can be duplicate (e.g., `None` or "resource").
546 # When this happens, we use the output tensor name to infer
547 # the non-duplicated tensor name.
548 op_name = outputs[0].name.split(":")[0]
549 if is_v1_graph_mode:
550 for input_tensor in inputs:
551 if input_tensor in self._placeholder_to_debug_tensor and outputs:
552 outputs[0].op._add_control_input( # pylint: disable=protected-access
553 self._placeholder_to_debug_tensor[input_tensor].op)
554 graph_op_creation = debug_event_pb2.GraphOpCreation(
555 op_type=op_type,
556 op_name=op_name,
557 graph_name=graph.name if hasattr(graph, "name") else None,
558 graph_id=context_id,
559 input_names=[
560 self._lookup_tensor_name(input_tensor) for input_tensor in inputs
561 ],
562 num_outputs=len(outputs),
563 output_tensor_ids=output_tensor_ids,
564 code_location=self._process_stack_frames())
565 writer.WriteGraphOpCreation(graph_op_creation)
566 if outputs and compat.as_bytes(
567 op_type) not in op_callbacks_common.OP_CALLBACK_SKIP_OPS:
568 return self._instrument_symbolic_tensors(
569 outputs, op_type, op_name, context_id, output_tensor_ids)
570 else:
571 op_type_bytes = compat.as_bytes(op_type)
572 if op_type_bytes == b"DebugNumericSummaryV2":
573 # TODO(b/140334369): Remove this special casing logic once op_callback.
574 # automatically prevents infinite recursion in eager mode.
575 return None
576 if op_type_bytes in op_callbacks_common.OP_CALLBACK_SKIP_OPS:
577 return None
578 context_id = self._func_graph_id_from_func_name(op_type)
579 input_ids = [t._id for t in inputs] # pylint:disable=protected-access
580 output_tensor_device_ids = [writer.RegisterDeviceAndGetId(output.device)
581 for output in outputs] if outputs else []
582 writer.WriteExecution(self._dump_eager_tensors(
583 outputs, op_type, input_ids, output_tensor_device_ids,
584 graph_id=context_id))
586 def _lookup_tensor_name(self, tensor):
587 """Look up the name of a graph tensor.
589 This method maps the name of a debugger-generated Identity or
590 DebugIdentityV2 tensor to the name of the original instrumented tensor,
591 if `tensor` is such a debugger-created tensor.
592 Otherwise, it returns the name of `tensor` as is.
594 Args:
595 tensor: The graph tensor to look up the name for.
597 Returns:
598 Name of the orignal instrumented tensor as known to the debugger.
599 """
600 return self._tensor_aliases.get(tensor.name, tensor.name)
602 def _func_graph_id_from_func_name(self, op_type):
603 """Attempt to get the ID of a FuncGraph based on an op type name.
605 Also caches the ID for faster access later.
607 Args:
608 op_type: Op type string, which may be the name of a function.
610 Returns:
611 If the op_type name does not fit the pattern of a function name (e.g.,
612 one that starts with "__inference_"), `None` is returned immediately.
613 Else, if the FuncGraph is found, ID of the underlying FuncGraph is
614 returned as a string.
615 Else, `None` is returned.
616 """
617 op_type = compat.as_bytes(op_type)
618 if is_op_type_function(op_type):
619 # op_type for eagerly-executed FuncGraphs have the prefixed and suffixed
620 # form such as "__inference_my_function_13579", wherein the middle part
621 # "my_function" is the name of the Python function from which the
622 # FuncGraph is compiled. Due to the suffix, the op_type is unique for
623 # - duplicate Python function names
624 # - multiple compilation of the same Python function
625 if op_type in self._op_type_to_context_id:
626 return self._op_type_to_context_id[op_type]
627 with self._context_lock:
628 for function in self._function_to_graph_id:
629 if function.name == op_type:
630 graph_id = self._function_to_graph_id[function]
631 self._op_type_to_context_id[op_type] = graph_id
632 return graph_id
633 return None
634 else:
635 return None
637 def _get_symbolic_tensor_ids(self, num_tensors):
638 tensor_ids = []
639 if num_tensors:
640 with self._symbolic_tensor_counter_lock:
641 for _ in range(num_tensors):
642 self._symbolic_tensor_counter += 1
643 tensor_ids.append(self._symbolic_tensor_counter)
644 return tensor_ids
646 def _should_dump_tensor(self, op_type, dtype):
647 """Determine if the given tensor's value will be dumped.
649 The determination is made given the configurations such as `op_regex`,
650 `tensor_dtypes`.
652 Args:
653 op_type: Name of the op's type, as a string (e.g., "MatMul").
654 dtype: The dtype of the tensor, as a `dtypes.DType` object.
656 Returns:
657 A bool indicating whether the tensor's value will be dumped.
658 """
659 should_dump = True
660 if self._op_regex:
661 should_dump = (should_dump and
662 re.match(self._op_regex, op_type))
663 if self._tensor_dtypes:
664 if isinstance(self._tensor_dtypes, (list, tuple)):
665 should_dump = (should_dump and
666 any(dtype == dtype_item for dtype_item
667 in self._tensor_dtypes))
668 else: # A callable that takes a DType argument and return a boolean.
669 should_dump = should_dump and self._tensor_dtypes(dtype)
670 return should_dump
673@tf_export("debugging.experimental.enable_dump_debug_info")
674def enable_dump_debug_info(dump_root,
675 tensor_debug_mode=DEFAULT_TENSOR_DEBUG_MODE,
676 circular_buffer_size=1000,
677 op_regex=None,
678 tensor_dtypes=None):
679 """Enable dumping debugging information from a TensorFlow program.
681 The debugging information is dumped to a directory on the file system
682 specified as `dump_root`.
684 The dumped debugging information can be ingested by debugger UIs.
686 The files in the dump directory contain the following information:
687 - TensorFlow Function construction (e.g., compilation of Python functions
688 decorated with @tf.function), the op types, names (if available), context,
689 the input and output tensors, and the associated stack traces.
690 - Execution of TensorFlow operations (ops) and Functions and their stack
691 traces, op types, names (if available) and contexts. In addition,
692 depending on the value of the `tensor_debug_mode` argument (see Args
693 section below), the value(s) of the output tensors or more concise
694 summaries of the tensor values will be dumped.
695 - A snapshot of Python source files involved in the execution of the
696 TensorFlow program.
698 Once enabled, the dumping can be disabled with the corresponding
699 `disable_dump_debug_info()` method under the same Python namespace.
700 Calling this method more than once with the same `dump_root` is idempotent.
701 Calling this method more than once with different `tensor_debug_mode`s
702 leads to a `ValueError`.
703 Calling this method more than once with different `circular_buffer_size`s
704 leads to a `ValueError`.
705 Calling this method with a different `dump_root` abolishes the
706 previously-enabled `dump_root`.
708 Usage example:
710 ```py
711 tf.debugging.experimental.enable_dump_debug_info('/tmp/my-tfdbg-dumps')
713 # Code to build, train and run your TensorFlow model...
714 ```
716 NOTE: If your code is running on TPUs, be sure to call
717 `tf.config.set_soft_device_placement(True)` before calling
718 `tf.debugging.experimental.enable_dump_debug_info()` as this API uses
719 automatic outside compilation on TPUs. For example:
721 ```py
722 tf.config.set_soft_device_placement(True)
723 tf.debugging.experimental.enable_dump_debug_info(
724 logdir, tensor_debug_mode="FULL_HEALTH")
726 resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='')
727 strategy = tf.distribute.TPUStrategy(resolver)
728 with strategy.scope():
729 # ...
730 ```
732 Args:
733 dump_root: The directory path where the dumping information will be written.
734 tensor_debug_mode: Debug mode for tensor values, as a string.
735 The currently supported options are:
736 - "NO_TENSOR": (Default) Only traces the output tensors of all executed
737 ops (including those executed eagerly at the Python level or as a part
738 of a TensorFlow graph) and functions, while not extracting any
739 information from the values of the tensors.
740 - "CURT_HEALTH": For each floating-dtype tensor (e.g., tensors of dtypes
741 such as `float32`, `float64` and `bfloat16`), extracts a binary bit
742 indicating whether it contains any -infinity, +infinity or NaN.
743 - "CONCISE_HEALTH": For each floating-dtype tensor, extract total
744 element count, and counts of -infinity, +infinity and NaN elements.
745 - "FULL_HEALTH": For each floating-dtype tensor, extracts the dtype,
746 rank (number of dimensions), total element count, and counts of
747 -infinity, +infinity and NaN elements.
748 - "SHAPE": For each tensor (regardless of dtype), extracts its dtype,
749 rank, total element count and shape.
750 circular_buffer_size: Size of the circular buffers for execution events.
751 These circular buffers are designed to reduce the overhead of debugging
752 dumping. They hold the most recent debug events concerning eager execution
753 of ops and `tf.function`s and traces of tensor values computed inside
754 `tf.function`s. They are written to the file system only when the proper
755 flushing method is called (see description of return values below).
756 Expected to be an integer. If <= 0, the circular-buffer behavior will be
757 disabled, i.e., the execution debug events will be written to the file
758 writers in the same way as non-execution events such as op creations and
759 source-file snapshots.
760 op_regex: Dump data from only the tensors from op types that matches to the
761 regular expression (through Python's `re.match()`).
762 "Op type" refers to the names of the TensorFlow operations (e.g.,
763 "MatMul", "LogSoftmax"), which may repeat in a TensorFlow
764 function. It does *not* refer to the names of nodes (e.g.,
765 "dense/MatMul", "dense_1/MatMul_1") which are unique within a function.
766 - Example 1: Dump tensor data from only MatMul and Relu ops
767 `op_regex="^(MatMul|Relu)$"`.
768 - Example 2: Dump tensors from all ops *except* Relu:
769 `op_regex="(?!^Relu$)"`.
770 This filter operates in a logical AND relation with `tensor_dtypes`.
771 tensor_dtypes: Dump data from only the tensors of which the specified
772 dtypes. This optional argument can be in any of the following format:
773 - a list or tuple of `DType` objects or strings that can be converted
774 to `DType` objects via `tf.as_dtype()`. Examples:
775 - `tensor_dtype=[tf.float32, tf.float64]`,
776 - `tensor_dtype=["float32", "float64"]`,
777 - `tensor_dtypes=(tf.int32, tf.bool)`,
778 - `tensor_dtypes=("int32", "bool")`
779 - a callable that takes a single `DType` argument and returns a Python
780 `boolean` indicating whether the dtype is to be included in the data
781 dumping. Examples:
782 - `tensor_dtype=lambda dtype: dtype.is_integer`.
783 This filter operates in a logical AND relation with `op_regex`.
784 Returns:
785 A DebugEventsWriter instance used by the dumping callback. The caller
786 may use its flushing methods, including `FlushNonExecutionFiles()` and
787 `FlushExecutionFiles()`.
788 """
789 # TODO(cais): Revise the "UIs (currently under construction)" part of the doc
790 # string above.
791 # TODO(cais): Add Python code example to the doc string above.
792 global _state
794 tensor_debug_mode_keys = debug_event_pb2.TensorDebugMode.keys()
795 if tensor_debug_mode not in tensor_debug_mode_keys:
796 raise ValueError(
797 "Invalid value in tensor_debug_mode ('%s'). Valid options are: %s" %
798 (tensor_debug_mode, tensor_debug_mode_keys))
800 tensor_debug_mode = debug_event_pb2.TensorDebugMode.Value(tensor_debug_mode)
801 if tensor_debug_mode not in (debug_event_pb2.TensorDebugMode.NO_TENSOR,
802 debug_event_pb2.TensorDebugMode.CURT_HEALTH,
803 debug_event_pb2.TensorDebugMode.CONCISE_HEALTH,
804 debug_event_pb2.TensorDebugMode.FULL_HEALTH,
805 debug_event_pb2.TensorDebugMode.SHAPE,
806 debug_event_pb2.TensorDebugMode.FULL_TENSOR):
807 raise NotImplementedError(
808 "tfdbg dumping: support for tensor debug mode %s is not "
809 "implemented yet" %
810 debug_event_pb2.TensorDebugMode.Name(tensor_debug_mode))
812 # Validate the types of tensor_dtypes.
813 if tensor_dtypes is not None:
814 if (not isinstance(tensor_dtypes, (list, tuple)) and
815 not callable(tensor_dtypes)):
816 raise ValueError(
817 "If specified, tensor_dtypes is expected to be a list, a tuple, or "
818 "a callable that takes a DType argument and returns a boolean, "
819 "but received %s" % (tensor_dtypes,))
820 if isinstance(tensor_dtypes, (list, tuple)):
821 tensor_dtypes = [
822 dtypes.as_dtype(dtype_item) for dtype_item in tensor_dtypes]
824 if hasattr(_state, "dumping_callback"):
825 if _state.dumping_callback.circular_buffer_size != circular_buffer_size:
826 raise ValueError(
827 "There is already a dumping callback configured with a different "
828 "circular-buffer size (%d). Therefore the newly request "
829 "circular-buffer size (%d) will not be honored." %
830 (_state.dumping_callback.circular_buffer_size, circular_buffer_size))
831 if _state.dumping_callback.tensor_debug_mode != tensor_debug_mode:
832 raise ValueError(
833 "There is already a dumping callback configured for dump root "
834 "%s with a different "
835 "tensor-debug mode (%s). Therefore the newly request "
836 "tensor-debug mode (%s) size will not be honored." %
837 (_state.dumping_callback.dump_root,
838 tensor_debug_mode_keys[_state.dumping_callback.tensor_debug_mode],
839 tensor_debug_mode_keys[tensor_debug_mode]))
840 else:
841 _state.dumping_callback = _DumpingCallback(dump_root,
842 tensor_debug_mode,
843 circular_buffer_size,
844 op_regex,
845 tensor_dtypes)
846 op_callbacks.add_op_callback(_state.dumping_callback.callback)
847 function_lib.add_function_callback(
848 _state.dumping_callback.function_callback)
850 if _state.dumping_callback.dump_root != dump_root:
851 _state.dumping_callback.dump_root = dump_root
853 logging.info(
854 "Enabled dumping callback in thread %s "
855 "(dump root: %s, tensor debug mode: %s)",
856 threading.current_thread().name,
857 _state.dumping_callback.dump_root,
858 debug_event_pb2.TensorDebugMode.Name(tensor_debug_mode))
860 atexit.register(disable_dump_debug_info)
861 return _state.dumping_callback.get_writer()
864@tf_export("debugging.experimental.disable_dump_debug_info")
865def disable_dump_debug_info():
866 """Disable the currently-enabled debugging dumping.
868 If the `enable_dump_debug_info()` method under the same Python namespace
869 has been invoked before, calling this method disables it. If no call to
870 `enable_dump_debug_info()` has been made, calling this method is a no-op.
871 Calling this method more than once is idempotent.
872 """
873 if hasattr(_state, "dumping_callback"):
874 dump_root = _state.dumping_callback.dump_root
875 tfdbg_run_id = _state.dumping_callback.tfdbg_run_id
876 debug_events_writer.DebugEventsWriter(dump_root, tfdbg_run_id).Close()
877 op_callbacks.remove_op_callback(_state.dumping_callback.callback)
878 function_lib.remove_function_callback(
879 _state.dumping_callback.function_callback)
880 delattr(_state, "dumping_callback")
881 logging.info("Disabled dumping callback in thread %s (dump root: %s)",
882 threading.current_thread().name, dump_root)