Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/ops/gradient_checker_v2.py: 15%
136 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Gradient checker for functions.
17The gradient checker verifies numerically that an function properly
18computes the gradients
19"""
20import numpy as np
22from tensorflow.python.eager import backprop
23from tensorflow.python.eager import context
24from tensorflow.python.framework import dtypes
25from tensorflow.python.framework import indexed_slices
26from tensorflow.python.framework import ops
27from tensorflow.python.ops import array_ops
28from tensorflow.python.platform import tf_logging as logging
29from tensorflow.python.util.tf_export import tf_export
32def _product(t):
33 if isinstance(t, int):
34 return t
35 else:
36 y = 1
37 for x in t:
38 y *= x
39 return y
42def _eval_indexed_slices(a):
43 """Converts IndexedSlices to IndexedSlicesValue with numpy indices/values.
45 When eager execution is enabled, converts IndexedSlices
46 to IndexedSlicesValue with numpy indices/values.
48 Args:
49 a: any value.
51 Returns:
52 If a is IndexedSlices and eager execution is enabled, calls numpy() on a's
53 fields. Otherwise returns a unchanged.
54 """
55 if (isinstance(a, indexed_slices.IndexedSlices) and
56 context.executing_eagerly()):
57 return indexed_slices.IndexedSlicesValue(
58 indices=[x.numpy() for x in a.indices],
59 values=[x.numpy() for x in a.values],
60 dense_shape=a.dense_shape)
61 return a
64def _to_numpy(a):
65 """Converts Tensors, EagerTensors, and IndexedSlicesValue to numpy arrays.
67 Args:
68 a: any value.
70 Returns:
71 If a is EagerTensor or Tensor, returns the evaluation of a by calling
72 numpy() or run(). If a is IndexedSlicesValue, constructs the corresponding
73 dense numpy array. Otherwise returns a unchanged.
74 """
75 if isinstance(a, ops.EagerTensor):
76 return a.numpy()
77 if isinstance(a, ops.Tensor):
78 sess = ops.get_default_session()
79 return sess.run(a)
80 if isinstance(a, indexed_slices.IndexedSlicesValue):
81 arr = np.zeros(a.dense_shape)
82 assert len(a.values) == len(a.indices), (
83 "IndexedSlicesValue has %s value slices but %s indices\n%s" %
84 (a.values, a.indices, a))
85 for values_slice, index in zip(a.values, a.indices):
86 assert 0 <= index < len(arr), (
87 "IndexedSlicesValue has invalid index %s\n%s" % (index, a))
88 arr[index] += values_slice
89 return arr
90 return a
93def _prepare(f, xs_dtypes, xs_shapes):
94 """Return a function that executes 'f'.
96 In TF 2.x, this is the same as `f`.
97 In TF 1.x, returns a Python function that executes the graph defined by `f`
98 in a Session.
100 Args:
101 f: the function.
102 xs_dtypes: dtypes of f's arguments.
103 xs_shapes: shapes of f's arguments.
105 Returns:
106 """
107 if context.executing_eagerly():
109 def decorated_eager(*xs_data):
110 return f(*map(ops.convert_to_tensor, xs_data))
112 return decorated_eager
113 xs = [
114 array_ops.placeholder(x_dtype, shape=x_shape)
115 for x_dtype, x_shape in zip(xs_dtypes, xs_shapes)
116 ]
117 y = f(*xs)
118 sess = ops.get_default_session()
120 def decorated_graph(*xs_data):
121 xs_data = [_to_numpy(a) for a in xs_data]
122 return sess.run(y, feed_dict=dict(zip(xs, xs_data)))
124 return decorated_graph
127def _compute_theoretical_jacobian(f, y_shape, y_dtype, xs, param):
128 """Computes the theoretical Jacobian for f regarding xs[param].
130 One can think of the relation among f, xs and y as y = f(xs).
132 Args:
133 f: the function.
134 y_shape: the shape of the result.
135 y_dtype: the dtype of the result.
136 xs: a list of tensors.
137 param: the index of the target parameter.
139 Returns:
140 A 2-d numpy array representing the Jacobian. It has "y_size" rows
141 and "x_size" columns where "x_size" is the number of elements in xs[param]
142 and "y_size" is the number of elements in the result.
144 Raises:
145 ValueError: If result is empty but the gradient is nonzero.
146 """
147 x = xs[param]
148 # Complex vectors are treated as vectors of twice as many reals.
149 x_shape = tuple(x.shape) + (2,) if x.dtype.is_complex else x.shape
150 y_factor = 2 if y_dtype.is_complex else 1
152 # To compute the jacobian, we treat x and y as one-dimensional vectors.
153 x_size = _product(x_shape)
154 x_val_size = _product(x_shape[1:]) # This is used for sparse gradients
155 y_size = _product(y_shape) * y_factor
157 # Allocate 2-D Jacobian, with y dimensions smashed into the first
158 # dimension and x dimensions smashed into the second.
159 jacobian = np.zeros((y_size, x_size), dtype=x.dtype.real_dtype.as_numpy_dtype)
161 # For each of the entry of dy, we set this to be 1 and
162 # everything else to be 0 and compute the gradients -- this will give us one
163 # row of the Jacobian matrix.
164 dy_data = np.zeros(y_shape, dtype=y_dtype.as_numpy_dtype)
165 dy_data_flat = dy_data.ravel().view(y_dtype.real_dtype.as_numpy_dtype)
166 grad_fn_unprep = backprop.gradients_function(f, [param])
167 grad_fn = _prepare(lambda dy, *xs: grad_fn_unprep(*xs, dy=dy),
168 [y_dtype] + [z.dtype for z in xs],
169 [None] + [z.shape for z in xs])
170 for row in range(y_size):
171 dy_data_flat[row] = 1
172 grad = _to_numpy(grad_fn(dy_data, *xs)[0])
173 grad = _eval_indexed_slices(grad)
174 if isinstance(grad, indexed_slices.IndexedSlicesValue):
175 for i, v in zip(grad.indices, grad.values):
176 c_begin = i * x_val_size
177 c_end = c_begin + x_val_size
178 jacobian[row, c_begin:c_end] += v.flat
179 elif grad is not None:
180 jacobian[row, :] = grad.ravel().view(jacobian.dtype)
181 # This reset of `dy_data_flat` needs to happen after `grad` is copied to
182 # `jacobian` because `grad` and `dy_data_flat` may share memory.
183 dy_data_flat[row] = 0
185 # If the output is empty, run the gradients at least once and make sure
186 # they produce zeros.
187 if y_size == 0: # don't use 'not y_size', because y_size may not be an int
188 grad = _to_numpy(grad_fn(dy_data, *xs)[0])
189 if grad.shape != x.shape:
190 raise ValueError("Empty gradient has wrong shape: expected %s, got %s" %
191 (x.shape, grad.shape))
192 if np.any(grad):
193 raise ValueError("Empty tensor with nonzero gradients")
195 logging.vlog(1, "Theoretical Jacobian =\n%s", jacobian)
196 return jacobian
199def _compute_numeric_jacobian(f, y_size, y_dtype, xs, param, delta):
200 """Computes the numeric Jacobian for f regarding xs[param].
202 One can think of the relation among f, xs and y as y = f(xs).
204 Args:
205 f: the function.
206 y_size: the number of elements of the result.
207 y_dtype: the dtype of the result.
208 xs: a list of tensors.
209 param: the index of the target parameter.
210 delta: the amount of perturbation we give to the input.
212 Returns:
213 A 2-d numpy array representing the Jacobian. It has "y_size" rows
214 and "x_size" columns where "x_size" is the number of elements in xs[param]
215 and "y_size" is the number of elements in the result.
216 """
217 x_shape = xs[param].shape
218 x_dtype = xs[param].dtype
220 # To compute the jacobian, we treat x and y as one-dimensional vectors
221 x_size = _product(x_shape) * (2 if x_dtype.is_complex else 1)
222 y_size = y_size * (2 if y_dtype.is_complex else 1)
223 x_dtype = x_dtype.real_dtype.as_numpy_dtype
224 y_dtype = y_dtype.real_dtype.as_numpy_dtype
226 xs_dtypes = [x.dtype for x in xs]
227 xs_shapes = [x.shape for x in xs]
228 # Converts xs to numpy arrays to do in-place perturbation.
229 # Calls asarray() to avoid copying in ravel() later.
230 xs = [np.asarray(_to_numpy(x)) for x in xs]
231 x = xs[param]
233 # Make sure we have the right types
234 scale = np.asarray(2 * delta, dtype=y_dtype)[()]
236 jacobian = np.zeros((y_size, x_size), dtype=x_dtype)
238 # For each of the entry of x, we slightly perturbs this by adding and
239 # subtracting a delta and then compute difference between the outputs. This
240 # will give us one column of the Jacobian matrix.
241 f = _prepare(f, xs_dtypes, xs_shapes)
242 for col in range(x_size):
243 original = x.ravel().view(x_dtype)[col]
244 x.ravel().view(x_dtype)[col] += delta
245 y_pos = _to_numpy(f(*xs))
246 x.ravel().view(x_dtype)[col] = original
247 x.ravel().view(x_dtype)[col] -= delta
248 y_neg = _to_numpy(f(*xs))
249 x.ravel().view(x_dtype)[col] = original
250 diff = (y_pos - y_neg) / scale
251 jacobian[:, col] = diff.ravel().view(y_dtype)
253 logging.vlog(1, "Numeric Jacobian =\n%s", jacobian)
254 return jacobian
257def _compute_gradient(f, y_shape, y_dtype, xs, param, delta):
258 """Computes the theoretical and numerical jacobian."""
259 x = xs[param]
260 t = x.dtype
261 allowed_types = [
262 dtypes.float16, dtypes.bfloat16, dtypes.float32, dtypes.float64,
263 dtypes.complex64, dtypes.complex128
264 ]
265 assert t.base_dtype in allowed_types, ("Cannot compute gradient for "
266 "unsupported type %s of argument %s" %
267 (t.name, param))
268 t2 = y_dtype
269 assert t2.base_dtype in allowed_types, ("Cannot compute gradient for "
270 "unsupported type %s of y" % t2.name)
271 y_size = _product(y_shape)
272 jacob_t = _compute_theoretical_jacobian(f, y_shape, y_dtype, xs, param)
273 jacob_n = _compute_numeric_jacobian(f, y_size, y_dtype, xs, param, delta)
274 return jacob_t, jacob_n
277def _compute_gradient_list(f, xs, delta):
278 """Compute gradients for a list of x values."""
279 # convert xs to tensors so that dtype and shape have uniform types
280 xs = [ops.convert_to_tensor(x) for x in xs]
281 # run the function to get info of the result
282 xs_dtypes = [x.dtype for x in xs]
283 xs_shapes = [x.shape for x in xs]
284 f_temp = _prepare(f, xs_dtypes, xs_shapes)
285 y = f_temp(*xs)
286 return tuple(
287 zip(*[
288 _compute_gradient(f, y.shape, dtypes.as_dtype(y.dtype), xs, i, delta)
289 for i in range(len(xs))
290 ]))
293@tf_export("test.compute_gradient", v1=[])
294def compute_gradient(f, x, delta=None):
295 """Computes the theoretical and numeric Jacobian of `f`.
297 With y = f(x), computes the theoretical and numeric Jacobian dy/dx.
299 Args:
300 f: the function.
301 x: the arguments for the function as a list or tuple of values convertible
302 to a Tensor.
303 delta: (optional) perturbation used to compute numeric Jacobian.
305 Returns:
306 A pair of lists, where the first is a list of 2-d numpy arrays representing
307 the theoretical Jacobians for each argument, and the second list is the
308 numerical ones. Each 2-d array has "y_size" rows
309 and "x_size" columns where "x_size" is the number of elements in the
310 corresponding argument and "y_size" is the number of elements in f(x).
312 Raises:
313 ValueError: If result is empty but the gradient is nonzero.
314 ValueError: If x is not list, but any other type.
316 Example:
318 >>> @tf.function
319 ... def test_func(x):
320 ... return x*x
321 ...
322 >>>
323 >>> class MyTest(tf.test.TestCase):
324 ...
325 ... def test_gradient_of_test_func(self):
326 ... theoretical, numerical = tf.test.compute_gradient(test_func, [1.0])
327 ... # ((array([[2.]], dtype=float32),),
328 ... # (array([[2.000004]], dtype=float32),))
329 ... self.assertAllClose(theoretical, numerical)
331 """
332 if not isinstance(x, (list, tuple)):
333 raise ValueError(
334 "`x` must be a list or tuple of values convertible to a Tensor "
335 "(arguments to `f`), not a %s" % type(x))
336 if delta is None:
337 # By default, we use a step size for the central finite difference
338 # approximation that is exactly representable as a binary floating
339 # point number, since this reduces the amount of noise due to rounding
340 # in the approximation of some functions.
341 delta = 1.0 / 1024
342 return _compute_gradient_list(f, x, delta)
345def max_error(grad1, grad2):
346 """Computes maximum elementwise gap.
348 Computes the maximum elementwise gap between two lists of tensors of the same
349 shape.
351 Args:
352 grad1: a lists of tensors.
353 grad2: a lists of tensors with the same shape as grad1.
355 Returns:
356 The maximum elementwise gap between the two.
357 """
358 error = 0
359 for j_t, j_n in zip(grad1, grad2):
360 if j_t.size or j_n.size: # Handle zero size tensors correctly
361 error = np.maximum(error, np.fabs(j_t - j_n).max())
362 return error