Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/ops/gradient_checker.py: 18%
143 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
16"""Gradient checker for any ops, graphs.
18The gradient checker verifies numerically that an op/graph properly
19computes the gradients
20"""
21import numpy as np
23from tensorflow.python.framework import constant_op
24from tensorflow.python.framework import dtypes
25from tensorflow.python.framework import indexed_slices
26from tensorflow.python.framework import ops
27from tensorflow.python.ops import array_ops
28from tensorflow.python.ops import gradients
29from tensorflow.python.ops import math_ops
30from tensorflow.python.platform import tf_logging as logging
31from tensorflow.python.util import deprecation
32from tensorflow.python.util.tf_export import tf_export
35def _product(t):
36 if isinstance(t, int):
37 return t
38 else:
39 y = 1
40 for x in t:
41 y *= x
42 return y
45def _extra_feeds(extra_feed_dict, new_feeds):
46 if not extra_feed_dict:
47 return new_feeds
48 r = {}
49 r.update(extra_feed_dict)
50 r.update(new_feeds)
51 return r
54def _compute_theoretical_jacobian(x, x_shape, x_data, dy, dy_shape, dx,
55 extra_feed_dict):
56 """Computes the theoretical Jacobian for dy/dx.
58 Computes the theoretical Jacobian using the ops generated by
59 compute_gradient().
61 Args:
62 x: the tensor "x".
63 x_shape: the dimensions of x as a tuple or an array of ints.
64 x_data: a numpy parray as the input data for x
65 dy: the tensor "dy".
66 dy_shape: the dimensions of dy as a tuple or an array of ints.
67 dx: Tensor or IndexedSlices representing dx
68 extra_feed_dict: dict that allows fixing specified tensor values
69 during the jacobian calculation.
71 Returns:
72 A 2-d numpy array representing the Jacobian for dy/dx. It has "x_size" rows
73 and "dy_size" columns where "x_size" is the number of elements in x and
74 "dy_size" is the number of elements in dy.
76 Raises:
77 ValueError: If `dy` is empty but the gradient is nonzero.
78 """
79 # Complex vectors are treated as vectors of twice as many reals.
80 if x.dtype.is_complex:
81 x_shape = tuple(x_shape) + (2,)
82 dy_factor = 2 if dy.dtype.is_complex else 1
84 # To compute the jacobian, we treat x and y as one-dimensional vectors.
85 x_size = _product(x_shape)
86 x_val_size = _product(x_shape[1:]) # This is used for sparse gradients
87 dy_size = _product(dy_shape) * dy_factor
89 # Allocate 2-D Jacobian, with x dimensions smashed into the first
90 # dimension and y dimensions smashed into the second.
91 jacobian = np.zeros((x_size, dy_size),
92 dtype=x.dtype.real_dtype.as_numpy_dtype)
94 # For each of the entry of dy, we set this to be 1 and
95 # everything else to be 0 and compute the backprop -- this will give us one
96 # one column of the Jacobian matrix.
97 dy_data = np.zeros(dy_shape, dtype=dy.dtype.as_numpy_dtype)
98 dy_data_flat = dy_data.ravel().view(dy.dtype.real_dtype.as_numpy_dtype)
99 sess = ops.get_default_session()
100 for col in range(dy_size):
101 dy_data_flat[col] = 1
102 if isinstance(dx, indexed_slices.IndexedSlices):
103 backprop_indices, backprop_values = sess.run(
104 [dx.indices, dx.values],
105 feed_dict=_extra_feeds(extra_feed_dict, {x: x_data, dy: dy_data}))
106 for i, v in zip(backprop_indices, backprop_values):
107 r_begin = i * x_val_size
108 r_end = r_begin + x_val_size
109 jacobian[r_begin:r_end, col] += v.flat
110 else:
111 assert isinstance(dx, ops.Tensor), "dx = " + str(dx)
112 backprop = sess.run(
113 dx, feed_dict=_extra_feeds(extra_feed_dict, {x: x_data, dy: dy_data}))
114 jacobian[:, col] = backprop.ravel().view(jacobian.dtype)
115 dy_data_flat[col] = 0
117 # If the output is empty, run the gradients at least once and make sure
118 # they produce zeros.
119 if not dy_size:
120 backprop = sess.run(
121 dx, feed_dict=_extra_feeds(extra_feed_dict, {x: x_data, dy: dy_data}))
122 if backprop.shape != x_data.shape:
123 raise ValueError("Empty gradient has wrong shape: expected %s, got %s" %
124 (x_data.shape, backprop.shape))
125 if np.any(backprop):
126 raise ValueError("Empty tensor with nonzero gradients")
128 logging.vlog(1, "Theoretical Jacobian =\n%s", jacobian)
129 return jacobian
132def _compute_numeric_jacobian(x, x_shape, x_data, y, y_shape, delta,
133 extra_feed_dict):
134 """Computes the numeric Jacobian for dy/dx.
136 Computes the numeric Jacobian by slightly perturbing the inputs and
137 measuring the differences on the output.
139 Args:
140 x: the tensor "x".
141 x_shape: the dimensions of x as a tuple or an array of ints.
142 x_data: a numpy array as the input data for x
143 y: the tensor "y".
144 y_shape: the dimensions of y as a tuple or an array of ints.
145 delta: the amount of perturbation we give to the input
146 extra_feed_dict: dict that allows fixing specified tensor values
147 during the jacobian calculation.
149 Returns:
150 A 2-d numpy array representing the Jacobian for dy/dx. It has "x_size" rows
151 and "y_size" columns where "x_size" is the number of elements in x and
152 "y_size" is the number of elements in y.
153 """
154 # bfloat16 doesn't have enough bits to represent high precision numbers such
155 # as delta. Convert to float32 here. Since numeric_jacobian is expected to
156 # be the groundtruth to compare against, it shouldn't lose any information.
157 if x.dtype == dtypes.bfloat16:
158 x = math_ops.cast(x, dtypes.float32) # TODO(wangpeng): Now that the new x
159 # is an output of the old x, isn't feeding to the new x a mistake?
160 if y.dtype == dtypes.bfloat16:
161 y = math_ops.cast(y, dtypes.float32)
162 if x_data.dtype == dtypes.bfloat16.as_numpy_dtype:
163 x_data = x_data.astype(np.float32)
165 # To compute the jacobian, we treat x and y as one-dimensional vectors
166 x_size = _product(x_shape) * (2 if x.dtype.is_complex else 1)
167 y_size = _product(y_shape) * (2 if y.dtype.is_complex else 1)
168 x_dtype = x.dtype.real_dtype.as_numpy_dtype
169 y_dtype = y.dtype.real_dtype.as_numpy_dtype
171 # Make sure we have the right types
172 x_data = np.asarray(x_data, dtype=x.dtype.as_numpy_dtype)
173 scale = np.asarray(2 * delta, dtype=y_dtype)[()]
175 jacobian = np.zeros((x_size, y_size), dtype=x_dtype)
176 # For each of the entry of x, we slightly perturbs this by adding and
177 # subtracting a delta and then compute difference between the outputs. This
178 # will give us one row of the Jacobian matrix.
179 for row in range(x_size):
180 x_pos = x_data.copy()
181 x_neg = x_data.copy()
182 x_pos.ravel().view(x_dtype)[row] += delta
183 y_pos = y.eval(feed_dict=_extra_feeds(extra_feed_dict, {x: x_pos}))
184 x_neg.ravel().view(x_dtype)[row] -= delta
185 y_neg = y.eval(feed_dict=_extra_feeds(extra_feed_dict, {x: x_neg}))
186 diff = (y_pos - y_neg) / scale
187 jacobian[row, :] = diff.ravel().view(y_dtype)
189 logging.vlog(1, "Numeric Jacobian =\n%s", jacobian)
190 return jacobian
193def _compute_dx_and_dy(x, y, y_shape):
194 """Returns a node to compute gradient of y wrt x."""
195 # We make up a dy so that we can compute the gradients. We don't really use
196 # the value of dy -- we will always feed it. We need to add an identity node
197 # so that we can always feed it properly. Otherwise, for the Add operation,
198 # dx is the same as dy and we cannot fetch the tensor that we are feeding.
199 with x.graph.as_default():
200 dy_orig = constant_op.constant(1.0, shape=y_shape, dtype=y.dtype)
201 dy = array_ops.identity(dy_orig)
202 # We compute the gradients for y wrt. x
203 grads = gradients.gradients(y, x, dy)
204 assert len(grads) == 1
205 return grads[0], dy_orig
208def _compute_gradient(x,
209 x_shape,
210 dx,
211 y,
212 y_shape,
213 dy,
214 x_init_value=None,
215 delta=1e-3,
216 extra_feed_dict=None):
217 """Computes the theoretical and numerical jacobian."""
218 t = dtypes.as_dtype(x.dtype)
219 allowed_types = [dtypes.float16, dtypes.bfloat16, dtypes.float32,
220 dtypes.float64, dtypes.complex64, dtypes.complex128]
221 assert t.base_dtype in allowed_types, "Don't support type %s for x" % t.name
222 t2 = dtypes.as_dtype(y.dtype)
223 assert t2.base_dtype in allowed_types, "Don't support type %s for y" % t2.name
225 if x_init_value is not None:
226 i_shape = list(x_init_value.shape)
227 assert(list(x_shape) == i_shape), "x_shape = %s, init_data shape = %s" % (
228 x_shape, i_shape)
229 x_data = x_init_value
230 else:
231 x_data = np.random.random_sample(x_shape).astype(t.as_numpy_dtype)
232 if t.is_complex:
233 x_data.imag = np.random.random_sample(x_shape)
235 jacob_t = _compute_theoretical_jacobian(
236 x, x_shape, x_data, dy, y_shape, dx, extra_feed_dict=extra_feed_dict)
237 jacob_n = _compute_numeric_jacobian(
238 x, x_shape, x_data, y, y_shape, delta, extra_feed_dict=extra_feed_dict)
239 return jacob_t, jacob_n
242def _compute_gradient_list(x,
243 x_shape,
244 y,
245 y_shape,
246 x_init_value=None,
247 delta=1e-3,
248 init_targets=None,
249 extra_feed_dict=None):
250 """Compute gradients for a list of x values."""
251 assert isinstance(x, list)
252 dx, dy = zip(*[_compute_dx_and_dy(xi, y, y_shape) for xi in x])
254 if init_targets is not None:
255 assert isinstance(init_targets, (list, tuple))
256 for init in init_targets:
257 init.run()
258 if x_init_value is None:
259 x_init_value = [None] * len(x)
260 # pylint: disable=g-complex-comprehension
261 ret = [_compute_gradient(xi, x_shapei, dxi, y, y_shape, dyi, x_init_valuei,
262 delta, extra_feed_dict=extra_feed_dict)
263 for xi, x_shapei, dxi, dyi, x_init_valuei in zip(x, x_shape, dx, dy,
264 x_init_value)]
265 return ret
268@tf_export(v1=["test.compute_gradient"])
269@deprecation.deprecated(
270 date=None,
271 instructions="Use tf.test.compute_gradient in 2.0, which has better "
272 "support for functions. Note that the two versions have different usage, "
273 "so code change is needed.")
274def compute_gradient(x,
275 x_shape,
276 y,
277 y_shape,
278 x_init_value=None,
279 delta=1e-3,
280 init_targets=None,
281 extra_feed_dict=None):
282 """Computes and returns the theoretical and numerical Jacobian.
284 If `x` or `y` is complex, the Jacobian will still be real but the
285 corresponding Jacobian dimension(s) will be twice as large. This is required
286 even if both input and output is complex since TensorFlow graphs are not
287 necessarily holomorphic, and may have gradients not expressible as complex
288 numbers. For example, if `x` is complex with shape `[m]` and `y` is complex
289 with shape `[n]`, each Jacobian `J` will have shape `[m * 2, n * 2]` with
291 J[:m, :n] = d(Re y)/d(Re x)
292 J[:m, n:] = d(Im y)/d(Re x)
293 J[m:, :n] = d(Re y)/d(Im x)
294 J[m:, n:] = d(Im y)/d(Im x)
296 Args:
297 x: a tensor or list of tensors
298 x_shape: the dimensions of x as a tuple or an array of ints. If x is a list,
299 then this is the list of shapes.
300 y: a tensor
301 y_shape: the dimensions of y as a tuple or an array of ints.
302 x_init_value: (optional) a numpy array of the same shape as "x"
303 representing the initial value of x. If x is a list, this should be a list
304 of numpy arrays. If this is none, the function will pick a random tensor
305 as the initial value.
306 delta: (optional) the amount of perturbation.
307 init_targets: list of targets to run to initialize model params.
308 extra_feed_dict: dict that allows fixing specified tensor values
309 during the Jacobian calculation.
311 Returns:
312 Two 2-d numpy arrays representing the theoretical and numerical
313 Jacobian for dy/dx. Each has "x_size" rows and "y_size" columns
314 where "x_size" is the number of elements in x and "y_size" is the
315 number of elements in y. If x is a list, returns a list of two numpy arrays.
316 """
317 # TODO(mrry): remove argument `init_targets`
318 if extra_feed_dict is None:
319 extra_feed_dict = {}
321 if isinstance(x, list):
322 return _compute_gradient_list(x, x_shape, y, y_shape, x_init_value, delta,
323 init_targets, extra_feed_dict=extra_feed_dict)
324 else:
325 if init_targets is not None:
326 assert isinstance(init_targets, (list, tuple))
327 for init in init_targets:
328 init.run()
329 dx, dy = _compute_dx_and_dy(x, y, y_shape)
330 ret = _compute_gradient(x, x_shape, dx, y, y_shape, dy, x_init_value, delta,
331 extra_feed_dict=extra_feed_dict)
332 return ret
335def _compute_error(grad):
336 if isinstance(grad, tuple):
337 grad = [grad]
338 error = 0
339 for j_t, j_n in grad:
340 if j_t.size or j_n.size: # Handle zero size tensors correctly
341 error = np.maximum(error, np.fabs(j_t - j_n).max())
342 return error
345@tf_export(v1=["test.compute_gradient_error"])
346@deprecation.deprecated(
347 date=None,
348 instructions="Use tf.test.compute_gradient in 2.0, which has better "
349 "support for functions. Note that the two versions have different usage, "
350 "so code change is needed.")
351def compute_gradient_error(x,
352 x_shape,
353 y,
354 y_shape,
355 x_init_value=None,
356 delta=1e-3,
357 init_targets=None,
358 extra_feed_dict=None):
359 """Computes the gradient error.
361 Computes the maximum error for dy/dx between the computed Jacobian and the
362 numerically estimated Jacobian.
364 This function will modify the tensors passed in as it adds more operations
365 and hence changing the consumers of the operations of the input tensors.
367 This function adds operations to the current session. To compute the error
368 using a particular device, such as a GPU, use the standard methods for
369 setting a device (e.g. using with sess.graph.device() or setting a device
370 function in the session constructor).
372 Args:
373 x: a tensor or list of tensors
374 x_shape: the dimensions of x as a tuple or an array of ints. If x is a list,
375 then this is the list of shapes.
376 y: a tensor
377 y_shape: the dimensions of y as a tuple or an array of ints.
378 x_init_value: (optional) a numpy array of the same shape as "x"
379 representing the initial value of x. If x is a list, this should be a list
380 of numpy arrays. If this is none, the function will pick a random tensor
381 as the initial value.
382 delta: (optional) the amount of perturbation.
383 init_targets: list of targets to run to initialize model params.
384 extra_feed_dict: dict that allows fixing specified tensor values
385 during the Jacobian calculation.
387 Returns:
388 The maximum error in between the two Jacobians.
389 """
390 grad = compute_gradient(x, x_shape, y, y_shape, x_init_value, delta,
391 init_targets, extra_feed_dict=extra_feed_dict)
392 return _compute_error(grad)