Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/debug/lib/debug_gradients.py: 29%
116 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
1# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""TensorFlow Debugger: Tools for debugging gradients."""
17import re
18import uuid
20from tensorflow.python.debug.lib import debug_data
21from tensorflow.python.debug.lib import debug_graphs
22from tensorflow.python.framework import ops
23from tensorflow.python.ops import gen_array_ops
24from tensorflow.python.ops import variables
26_GRADIENT_DEBUG_TAG = "gradient_debug_"
28_gradient_debuggers = {}
31def _tensor_to_grad_debug_op_name(tensor, grad_debugger_uuid):
32 op_name, slot = debug_graphs.parse_node_or_tensor_name(tensor.name)
33 return "%s_%d/%s%s" % (op_name, slot, _GRADIENT_DEBUG_TAG, grad_debugger_uuid)
36def _parse_grad_debug_op_name(op_name):
37 """Parse the name of a debug gradient op.
39 Args:
40 op_name: the name of the debug gradient op.
42 Returns:
43 1) The UUID of the GradientsDebugger that created the debug gradient op.
44 2) Name of the original tensor whose gradient is debugged by the debug
45 gradient op.
46 """
47 name_items = op_name.split("/")
48 assert len(name_items) > 1
49 assert name_items[-1].startswith(_GRADIENT_DEBUG_TAG)
51 grad_debugger_uuid = name_items[-1][len(_GRADIENT_DEBUG_TAG):]
52 if "_" in grad_debugger_uuid:
53 grad_debugger_uuid = grad_debugger_uuid[:grad_debugger_uuid.index("_")]
54 orig_tensor_slot = int(name_items[-2][name_items[-2].rfind("_") + 1:])
55 orig_base_op_name = name_items[-2][:name_items[-2].rfind("_")]
56 orig_tensor_name = ("/".join(name_items[:-2] + [orig_base_op_name]) +
57 ":%d" % orig_tensor_slot)
59 return grad_debugger_uuid, orig_tensor_name
62class GradientsDebugger:
63 """Gradients Debugger.
65 Allows retrieval of gradient tensors created by TensorFlow's automatic
66 differentiation algorithm, i.e., `tf.gradients` and optimizer classes that
67 use it.
68 """
69 # TODO(cais): Add examples code in the doc string?
71 def __init__(self, y_tensor=None):
72 """Constructor of GradientsDebugger.
74 Args:
75 y_tensor: optional: the `tf.Tensor` to be differentiated, i.e., the tensor
76 on the numerator of the differentiation.
77 """
79 self._uuid = uuid.uuid4().hex
80 _gradient_debuggers[self._uuid] = self
82 # A dict mapping x-tensor names to gradient tensor. x-tensor refers to the
83 # independent tf.Tensor, i.e., the tensor on the denominator of the
84 # differentiation.
85 self._gradient_tensors = {}
86 self._y_tensor = y_tensor
88 self._graph = None
89 if y_tensor:
90 self._graph = y_tensor.graph
92 self._is_active_context = False
94 @property
95 def y_tensor(self):
96 return self._y_tensor
98 @property
99 def graph(self):
100 return self._graph
102 def __enter__(self):
103 self._is_active_context = True
105 def __exit__(self, unused_type, unused_value, unused_traceback):
106 self._is_active_context = False
108 def identify_gradient(self, input_tensor):
109 """Create a debug identity tensor that registers and forwards gradients.
111 The side effect of this method is that when gradient tensor(s) are created
112 with respect to the any paths that include the `input_tensor`, the gradient
113 tensor(s) with respect to `input_tensor` will be registered with this
114 this `GradientsDebugger` instance and can later be retrieved, with the
115 methods `gradient_tensor` and `gradient_tensors`.
117 Example:
119 ```python
120 x = tf.Variable(1.0)
121 y = tf.add(x, x)
123 grad_debugger = tf_debug.GradientsDebugger()
124 debug_y = grad_debugger.identify_gradient(y)
125 z = tf.square(debug_y)
127 # Create a train op under the grad_debugger context.
128 with grad_debugger:
129 train_op = tf.compat.v1.train.GradientDescentOptimizer(z)
131 # Now we can reflect through grad_debugger to get the gradient tensor
132 # with respect to y.
133 y_grad = grad_debugger.gradient_tensor(y)
134 ```
136 Args:
137 input_tensor: the input `tf.Tensor` object whose related gradient tensors
138 are to be registered with this `GradientsDebugger` instance when they
139 are created, e.g., during `tf.gradients` calls or the construction
140 of optimization (training) op that uses `tf.gradients`.
142 Returns:
143 A forwarded identity of `input_tensor`, as a `tf.Tensor`.
145 Raises:
146 ValueError: If an op with name that duplicates the gradient-debugging op
147 already exists in the graph (highly unlikely).
148 """
149 # TODO(cais): Allow overriding gradient.
150 # TODO(cais): Implement value_stack.
151 grad_debug_op_name = _tensor_to_grad_debug_op_name(input_tensor, self._uuid)
152 # pylint: disable=protected-access
153 identity_op = (
154 gen_array_ops.debug_gradient_ref_identity
155 if input_tensor.dtype._is_ref_dtype else
156 gen_array_ops.debug_gradient_identity)
157 # pylint: enable=protected-access
158 debug_grad_identity = identity_op(input_tensor, name=grad_debug_op_name)
159 assert debug_grad_identity.dtype == input_tensor.dtype
160 if debug_grad_identity.op.name != grad_debug_op_name:
161 raise ValueError(
162 "The graph already contains an op named %s" % grad_debug_op_name)
163 return debug_grad_identity
165 def watch_gradients_by_tensors(self, graph, tensors):
166 """Watch gradient tensors by x-tensor(s).
168 The side effect of this method is that when gradient tensor(s) are created
169 with respect to the any paths that include the `x_tensor`s, the gradient
170 tensor(s) with respect to the tensor will be registered with this
171 this `GradientsDebugger` instance and can later be retrieved, with the
172 methods `gradient_tensor` and `gradient_tensors`.
174 Unlike the method `identify_gradient`, this method is used to retrieve
175 gradient tensors after the construction of the forward subgraph has
176 completed (but before the construction of the backward subgraph).
178 This method is the same as `watch_gradients_by_x_tensor_names` except that
179 the tensors are specified by the Python `tf.Tensor` or `tf.Variable`
180 objects, instead by name patterns.
182 Example:
184 ```python
185 x = tf.Variable(1.0)
186 y = tf.add(x, x, name="y")
187 z = tf.square(debug_y)
189 # Create a train op under the grad_debugger context.
190 grad_debugger = tf_debug.GradientsDebugger()
191 with grad_debugger.watch_gradients_by_tensors(y):
192 train_op = tf.compat.v1.train.GradientDescentOptimizer(z)
194 # Now we can reflect through grad_debugger to get the gradient tensor
195 # with respect to y.
196 y_grad = grad_debugger.gradient_tensor(y)
197 # or
198 y_grad = grad_debugger.gradient_tensor("y:0")
199 ```
201 Args:
202 graph: the `tf.Graph` to watch the gradients on.
203 tensors: a `tf.Tensor` or `tf.Variable` object, or a list of such objects.
205 Returns:
206 The GradientsDebugger instance itself.
207 """
209 if not isinstance(tensors, list):
210 tensors = [tensors]
212 tensor_name_regex = []
213 for tensor in tensors:
214 tensor_name_regex.append(re.escape(tensor.name) + "$")
215 tensor_name_regex = "(" + "|".join(tensor_name_regex) + ")"
216 return self.watch_gradients_by_tensor_names(graph, tensor_name_regex)
218 def watch_gradients_by_tensor_names(self, graph, tensor_name_regex):
219 """Watch gradient tensors by name(s) of the x-tensor(s).
221 The side effect of this method is that when gradient tensor(s) are created
222 with respect to the x-tensors, the gradient tensor(s) will be registered
223 with this `GradientsDebugger` instance and can later be retrieved.
225 Unlike the `identify_gradient` method, this method is used after the
226 construction of the forward graph has completed. Unlike the
227 `watch_gradients_by_tensor` method, this method does not use handles to the
228 tensors of interest; it uses their names.
230 This method is the same as `watch_gradients_by_tensors` except that the
231 x-tensors are specified by name patterns, instead of `tf.Tensor` or
232 `tf.Variable` objects.
234 Example:
236 ```python
237 x = tf.Variable(1.0, name="x")
238 y = tf.add(x, x, name="y")
239 z = tf.square(debug_y)
241 # Create a train op under the grad_debugger context.
242 grad_debugger = tf_debug.GradientsDebugger()
243 with grad_debugger.watch_gradients_by_tensor_names(r"(x|y):0$"):
244 train_op = tf.compat.v1.train.GradientDescentOptimizer(z)
246 # Now we can reflect through grad_debugger to get the gradient tensor
247 # with respect to x and y.
248 x_grad = grad_debugger.gradient_tensor("x:0")
249 y_grad = grad_debugger.gradient_tensor("y:0")
250 ```
252 Args:
253 graph: the `tf.Graph` to watch the gradients on.
254 tensor_name_regex: the regular-expression pattern of the name(s) of the
255 x-tensor(s) to watch. x-tensor refers to the tensors on the denominator
256 of the differentiation.
258 Returns:
259 The GradientsDebugger instance itself.
260 """
261 tensor_name_pattern = re.compile(tensor_name_regex)
262 with graph.as_default():
263 for op in graph.get_operations():
264 for output in op.outputs:
265 if tensor_name_pattern.match(output.name):
266 debug_op = self.identify_gradient(output)
268 # Make a copy of output.consumers() since we'll modify the consumers
269 # TODO(skyewm): this is unnecessary once the C API is enabled
270 for consumer in list(output.consumers()):
271 if consumer == debug_op.op:
272 continue
274 # Locate the slot index of the original input.
275 for i, consumer_input in enumerate(consumer.inputs):
276 if consumer_input == output:
277 consumer._update_input(i, debug_op) # pylint: disable=protected-access
278 return self
280 def _check_same_graph(self, tensor):
281 if self._graph is None:
282 self._graph = tensor.graph
283 elif self._graph != tensor.graph:
284 raise ValueError(
285 "The graph of the value (%s) is not the same as the graph %s" %
286 (tensor.graph, self._graph))
288 def register_gradient_tensor(self,
289 x_tensor_name,
290 gradient_tensor):
291 """Register the gradient tensor for an x-tensor.
293 Args:
294 x_tensor_name: (`str`) the name of the independent `tf.Tensor`, i.e.,
295 the tensor on the denominator of the differentiation.
296 gradient_tensor: the gradient `tf.Tensor`.
297 """
298 if len(_gradient_debuggers) == 1 or self._is_active_context:
299 self._check_same_graph(gradient_tensor)
300 self._gradient_tensors[x_tensor_name] = gradient_tensor
302 def gradient_tensor(self, x_tensor):
303 """Get the gradient tensor of an x-tensor.
305 Args:
306 x_tensor: (`tf.Tensor`, `tf.Variable` or `str`) The x-tensor object or its
307 name. x-tensor refers to the independent `tf.Tensor`, i.e., the tensor
308 on the denominator of the differentiation.
310 Returns:
311 If found, the gradient tensor.
313 Raises:
314 TypeError: If `x_tensor` is not a `tf.Tensor`, `tf.Variable` or `str`.
315 LookupError: If the `x_tensor` has not been registered with a gradient
316 tensor.
317 """
318 x_tensor_name = self._get_tensor_name(x_tensor)
319 if x_tensor_name not in self._gradient_tensors:
320 raise LookupError(
321 "This GradientsDebugger has not received any gradient tensor for "
322 "x-tensor %s" % x_tensor_name)
323 return self._gradient_tensors[x_tensor_name]
325 def gradient_tensors(self):
326 """Get the gradient tensors that this object is aware of.
328 Returns:
329 A dict mapping x-tensor names to gradient tensor objects. x-tensor refers
330 to the tensors on the denominator of the differentation.
331 """
332 return self._gradient_tensors
334 def _get_tensor_name(self, tensor):
335 if isinstance(tensor, (ops.Tensor, variables.Variable)):
336 return tensor.name
337 elif isinstance(tensor, str):
338 return tensor
339 else:
340 raise TypeError(
341 "x_tensor must be a str or tf.Tensor or tf.Variable, "
342 "but instead has type %s" % type(tensor))
345def clear_gradient_debuggers():
346 """Clear all globally registered gradient debuggers."""
347 _gradient_debuggers.clear()
350@ops.RegisterGradient("DebugGradientIdentity")
351def _identify_gradient_grad(op, dy):
352 """Gradient function for the DebugIdentity op."""
353 # TODO(cais): Allow overriding gradient.
354 grad_debugger_uuid, orig_tensor_name = _parse_grad_debug_op_name(op.name)
355 grad_debugger = _gradient_debuggers[grad_debugger_uuid]
356 grad_debugger.register_gradient_tensor(orig_tensor_name, dy)
357 return dy
360@ops.RegisterGradient("DebugGradientRefIdentity")
361def _identify_gradient_grad_ref(op, dy):
362 """Gradient function for the DebugIdentity op."""
363 return _identify_gradient_grad(op, dy)
366def gradient_values_from_dump(grad_debugger, x_tensor, dump):
367 """Find gradient values from a `DebugDumpDir` object.
369 Args:
370 grad_debugger: the `tf_debug.GradientsDebugger` instance to be used.
371 x_tensor: (`tf.Tensor`, `tf.Variable` or `str`) The x-tensor object or its
372 name. x-tensor refers to the independent `tf.Tensor`, i.e., the tensor
373 on the denominator of the differentiation.
374 dump: A `tfdbg.DebugDumpDir` object.
376 Returns:
377 If this `GradientsDebugger` instance has the gradient tensor of `x_tensor`
378 registered: a list of `numpy.ndarray` representing the value of the
379 gradient tensor from `dump`. The list could be empty, if the gradient
380 tensor is not executed in the `tf.Session.run()` call that generated
381 the `dump`. The list could also contain multiple values of the gradient
382 tensor, e.g., if gradient tensor is computed repeatedly in a
383 `tf.while_loop` during the run that generated the `dump`.
385 Raises:
386 LookupError: If this `GradientsDebugger` instance does not have the
387 gradient tensor of `x_tensor` registered.
388 ValueError: If this `GradientsDebugger` has a `tf.Graph` object that
389 does not match the `tf.Graph` object of the `dump`.
390 TypeError: If `x_tensor` is not a `tf.Tensor`, `tf.Variable` or `str`.
391 """
392 # TODO(cais): Use this method in LocalCLIDebugWrapperSession to present the
393 # gradient tensors to the TFDBG CLI.
395 # If possible, verify that the Python graph of the dump and that of this
396 # GradientsDebugger match.
397 if (dump.python_graph and grad_debugger.graph and
398 dump.python_graph != grad_debugger.graph):
399 raise ValueError(
400 "This GradientsDebugger instance has a graph (%s) that differs from "
401 "the graph of the DebugDumpDir object (%s)." %
402 (grad_debugger.graph, dump.python_graph))
404 gradient_tensor = grad_debugger.gradient_tensor(x_tensor)
405 node_name, output_slot = debug_graphs.parse_node_or_tensor_name(
406 gradient_tensor.name)
408 try:
409 return dump.get_tensors(node_name, output_slot, "DebugIdentity")
410 except debug_data.WatchKeyDoesNotExistInDebugDumpDirError:
411 return []