Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/ops/sparse_grad.py: 30%
132 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Gradients for operators defined in sparse_ops.py."""
16from tensorflow.python.framework import dtypes
17from tensorflow.python.framework import ops
18from tensorflow.python.framework import sparse_tensor
19from tensorflow.python.ops import array_ops
20from tensorflow.python.ops import gen_sparse_ops
21from tensorflow.python.ops import math_ops
22from tensorflow.python.ops import sparse_ops
24# TODO(b/31222613): This op may be differentiable, and there may be
25# latent bugs here.
26ops.NotDifferentiable("SparseAddGrad")
27ops.NotDifferentiable("SparseConcat")
30@ops.RegisterGradient("SparseReorder")
31def _SparseReorderGrad(op, unused_output_indices_grad, output_values_grad):
32 """Gradients for the SparseReorder op.
34 Args:
35 op: the SparseReorder op
36 unused_output_indices_grad: the incoming gradients of the output indices
37 output_values_grad: the incoming gradients of the output values
39 Returns:
40 Gradient for each of the 3 input tensors:
41 (input_indices, input_values, input_shape)
42 The gradients for input_indices and input_shape is None.
43 """
44 input_indices = op.inputs[0]
45 input_shape = op.inputs[2]
47 num_entries = array_ops.shape(input_indices)[0]
48 entry_indices = math_ops.range(num_entries)
49 sp_unordered = sparse_tensor.SparseTensor(input_indices, entry_indices,
50 input_shape)
51 sp_ordered = sparse_ops.sparse_reorder(sp_unordered)
52 inverted_permutation = array_ops.invert_permutation(sp_ordered.values)
54 return (None, array_ops.gather(output_values_grad,
55 inverted_permutation), None)
58@ops.RegisterGradient("SparseAdd")
59def _SparseAddGrad(op, *grads):
60 """The backward operator for the SparseAdd op.
62 The SparseAdd op calculates A + B, where A, B, and the sum are all represented
63 as `SparseTensor` objects. This op takes in the upstream gradient w.r.t.
64 non-empty values of the sum, and outputs the gradients w.r.t. the non-empty
65 values of A and B.
67 Args:
68 op: the SparseAdd op
69 *grads: the incoming gradients, one element per output of `op`
71 Returns:
72 Gradient for each of the 6 input tensors of SparseAdd:
73 (a_indices, a_values, a_shape, b_indices, b_values, b_shape, thresh)
74 The gradients for the indices, shapes, and the threshold are None.
75 """
76 val_grad = grads[1]
77 a_indices = op.inputs[0]
78 b_indices = op.inputs[3]
79 sum_indices = op.outputs[0]
80 # NOTE: we do not need to take `thresh` into account, since it simply affects
81 # the non-zero elements of the sum, and we will peek into `sum_indices` in the
82 # gradient op.
84 a_val_grad, b_val_grad = gen_sparse_ops.sparse_add_grad(
85 val_grad, a_indices, b_indices, sum_indices)
86 a_val_grad.set_shape(op.inputs[1].get_shape())
87 b_val_grad.set_shape(op.inputs[4].get_shape())
88 # (a_indices, a_values, a_shape, b_indices, b_values, b_shape, thresh)
89 return (None, a_val_grad, None, None, b_val_grad, None, None)
92@ops.RegisterGradient("SparseTensorDenseAdd")
93def _SparseTensorDenseAddGrad(op, out_grad):
94 sp_indices = op.inputs[0]
95 # (sparse_indices, sparse_values, sparse_shape, dense)
96 return (None, array_ops.gather_nd(out_grad, sp_indices), None, out_grad)
99@ops.RegisterGradient("SparseReduceSum")
100def _SparseReduceSumGrad(op, out_grad):
101 """Similar to gradient for the Sum Op (i.e. tf.reduce_sum())."""
102 sp_indices = op.inputs[0]
103 sp_shape = op.inputs[2]
104 output_shape_kept_dims = math_ops.reduced_shape(sp_shape, op.inputs[3])
105 out_grad_reshaped = array_ops.reshape(out_grad, output_shape_kept_dims)
106 scale = sp_shape // math_ops.cast(output_shape_kept_dims, dtypes.int64)
107 # (sparse_indices, sparse_values, sparse_shape, reduction_axes)
108 return (None, array_ops.gather_nd(out_grad_reshaped,
109 sp_indices // scale), None, None)
112@ops.RegisterGradient("SparseSlice")
113def _SparseSliceGrad(op, *grads):
114 """The backward operator for the SparseSlice op.
116 This op takes in the upstream gradient w.r.t. non-empty values of
117 the sliced `SparseTensor`, and outputs the gradients w.r.t.
118 the non-empty values of input `SparseTensor`.
120 Args:
121 op: the SparseSlice op
122 *grads: the incoming gradients, one element per output of `op`
124 Returns:
125 Gradient for each of the 5 input tensors of SparseSlice:
126 (indices, values, shape, start, size)
127 The gradients for the indices, shape, start and the size are None.
128 """
129 backprop_val_grad = grads[1]
130 input_indices = op.inputs[0]
131 input_start = op.inputs[3]
132 output_indices = op.outputs[0]
134 val_grad = gen_sparse_ops.sparse_slice_grad(backprop_val_grad, input_indices,
135 input_start, output_indices)
136 val_grad.set_shape(op.inputs[1].get_shape())
137 # (indices, values, shape, start, size)
138 return (None, val_grad, None, None, None)
141@ops.RegisterGradient("SparseTensorDenseMatMul")
142def _SparseTensorDenseMatMulGrad(op, grad):
143 """Gradients for the dense tensor in the SparseTensorDenseMatMul op.
145 Args:
146 op: the SparseTensorDenseMatMul op
147 grad: the incoming gradient
149 Returns:
150 Gradient for each of the 4 input tensors:
151 (sparse_indices, sparse_values, sparse_shape, dense_tensor)
152 The gradients for indices and shape are None.
154 Raises:
155 TypeError: When the two operands don't have the same type.
156 """
157 a_indices, a_values, a_shape = op.inputs[:3]
158 b = op.inputs[3]
159 adj_a = op.get_attr("adjoint_a")
160 adj_b = op.get_attr("adjoint_b")
162 a_type = a_values.dtype.base_dtype
163 b_type = b.dtype.base_dtype
164 if a_type != b_type:
165 raise TypeError(
166 f"SparseTensorDenseMatMul op received operands with different types: "
167 f"`{a_type}` and `{b_type}`.")
169 # gradient w.r.t. dense
170 b_grad = gen_sparse_ops.sparse_tensor_dense_mat_mul(
171 a_indices, a_values, a_shape, grad, adjoint_a=not adj_a)
172 if adj_b:
173 b_grad = array_ops.matrix_transpose(b_grad, conjugate=True)
175 # gradient w.r.t. sparse values
177 # TODO(zongheng): these gather calls could potentially duplicate rows/cols in
178 # memory. If there is a need, we should look into implementing this more
179 # intelligently to avoid duplicating data.
181 # With no adjoints, a_grad is matmul(grad, adjoint(b)). Since a is sparse, we
182 # just want to compute that matmul at the rows/columns of non-zero values. The
183 # (r, c) value is sum(grad[r, :] * adjoint(b)[:, c]), where the latter term is
184 # more conveniently written as conj(b)[c, :]. That expression is more
185 # efficient to calculate as a matmul, after expanding the two terms to be 2D
186 # (i.e. a row vector and a column vector).
187 #
188 # If adj_b then we replace conj(b) by transpose(b); if adj_a we need to
189 # adjoint the result, which is equivalent to swapping r and c and taking
190 # conjugates.
192 # Get grad[r, :] and b[c, :] (or with r and c swapped if adj_a, or with
193 # transpose(b) if adj_b), as batches of vectors (with the batch dimension
194 # corresponding to the non-zero indices of a).
195 rows = a_indices[:, 0]
196 cols = a_indices[:, 1]
197 parts_a = array_ops.gather(grad, rows if not adj_a else cols)
198 parts_b = array_ops.gather(
199 b if not adj_b else array_ops.transpose(b), cols if not adj_a else rows)
201 if not adj_a and not adj_b:
202 # grad[r, :] * conj(b[c, :]) = row(grad[r, :]) @ adjoint(row(b[c, :]))
203 a_values_grad = math_ops.matmul(
204 array_ops.expand_dims(parts_a, -2),
205 array_ops.expand_dims(parts_b, -2),
206 adjoint_b=True)
207 elif adj_a and not adj_b:
208 # conj(grad[c, :] * conj(b[r, :])) = adjoint(col(grad[c, :])) @ col(b[r, :])
209 a_values_grad = math_ops.matmul(
210 array_ops.expand_dims(parts_a, -1),
211 array_ops.expand_dims(parts_b, -1),
212 adjoint_a=True)
213 elif not adj_a and adj_b:
214 # grad[r, :] * transpose(b)[c, :] =
215 # row(grad[r, :]) @ col(transpose(b)[c, :])
216 a_values_grad = math_ops.matmul(
217 array_ops.expand_dims(parts_a, -2), array_ops.expand_dims(parts_b, -1))
218 elif adj_a and adj_b:
219 # conj(grad[c, :] * transpose(b)[r, :]) =
220 # adjoint(col(grad[c, :])) @ adjoint(row(transpose(b)[r, :])
221 a_values_grad = math_ops.matmul(
222 array_ops.expand_dims(parts_a, -1),
223 array_ops.expand_dims(parts_b, -2),
224 adjoint_a=True,
225 adjoint_b=True)
227 # gradients w.r.t. (a_indices, a_values, a_shape, b)
228 return (None, array_ops.squeeze(a_values_grad, axis=[-2, -1]), None, b_grad)
231@ops.RegisterGradient("SparseDenseCwiseAdd")
232def _SparseDenseCwiseAddGrad(unused_op, unused_grad):
233 raise NotImplementedError(
234 "Gradient for SparseDenseCwiseAdd is not implemented.")
237def _SparseDenseCwiseMulOrDivGrad(op, grad, is_mul):
238 """Common code for SparseDenseCwise{Mul,Div} gradients."""
239 x_indices = op.inputs[0]
240 x_shape = op.inputs[2]
241 y = op.inputs[3]
243 y_shape = math_ops.cast(array_ops.shape(y), dtypes.int64)
244 num_added_dims = array_ops.expand_dims(
245 array_ops.size(x_shape) - array_ops.size(y_shape), 0)
246 augmented_y_shape = array_ops.concat(
247 [array_ops.ones(num_added_dims, ops.dtypes.int64), y_shape], 0)
249 scaling = x_shape // augmented_y_shape
250 scaled_indices = x_indices // scaling
251 scaled_indices = array_ops.slice(scaled_indices,
252 array_ops.concat([[0], num_added_dims], 0),
253 [-1, -1])
254 dense_vals = array_ops.gather_nd(y, scaled_indices)
256 if is_mul:
257 dx = grad * dense_vals
258 dy_val = grad * op.inputs[1]
259 else:
260 dx = grad / dense_vals
261 dy_val = grad * (-op.inputs[1] / math_ops.square(dense_vals))
262 # indices can repeat after scaling, so we can't use sparse_to_dense().
263 dy = sparse_ops.sparse_add(
264 array_ops.zeros_like(y),
265 sparse_tensor.SparseTensor(scaled_indices, dy_val, y_shape))
267 # (sp_indices, sp_vals, sp_shape, dense)
268 return (None, dx, None, dy)
271@ops.RegisterGradient("SparseDenseCwiseMul")
272def _SparseDenseCwiseMulGrad(op, grad):
273 """Gradients for SparseDenseCwiseMul."""
274 return _SparseDenseCwiseMulOrDivGrad(op, grad, True)
277@ops.RegisterGradient("SparseDenseCwiseDiv")
278def _SparseDenseCwiseDivGrad(op, grad):
279 """Gradients for SparseDenseCwiseDiv."""
280 return _SparseDenseCwiseMulOrDivGrad(op, grad, False)
283@ops.RegisterGradient("SparseSoftmax")
284def _SparseSoftmaxGrad(op, grad):
285 """Gradients for SparseSoftmax.
287 The calculation is the same as SoftmaxGrad:
289 grad_x = grad_softmax * softmax - sum(grad_softmax * softmax) * softmax
291 where we now only operate on the non-zero values present in the SparseTensors.
293 Args:
294 op: the SparseSoftmax op.
295 grad: the upstream gradient w.r.t. the non-zero SparseSoftmax output values.
297 Returns:
298 Gradients w.r.t. the input (sp_indices, sp_values, sp_shape).
299 """
300 indices, shape = op.inputs[0], op.inputs[2]
301 out_vals = op.outputs[0]
302 sp_output = sparse_tensor.SparseTensor(indices, out_vals, shape)
303 sp_grad = sparse_tensor.SparseTensor(indices, grad, shape)
304 sp_product = sparse_tensor.SparseTensor(indices,
305 sp_output.values * sp_grad.values,
306 shape)
308 # [..., B, 1], dense.
309 sum_reduced = -sparse_ops.sparse_reduce_sum(sp_product, [-1], keepdims=True)
310 # sparse [..., B, C] + dense [..., B, 1] with broadcast; outputs sparse.
311 sp_sum = sparse_ops.sparse_dense_cwise_add(sp_grad, sum_reduced)
313 grad_x = sp_sum.values * sp_output.values
314 return [None, grad_x, None]
317@ops.RegisterGradient("SparseSparseMaximum")
318def _SparseSparseMaximumGrad(unused_op, unused_grad):
319 raise NotImplementedError(
320 "Gradient for SparseSparseMaximum is not implemented.")
323@ops.RegisterGradient("SparseSparseMinimum")
324def _SparseSparseMinimumGrad(unused_op, unused_grad):
325 raise NotImplementedError(
326 "Gradient for SparseSparseMinimum is not implemented.")
329@ops.RegisterGradient("SparseFillEmptyRows")
330def _SparseFillEmptyRowsGrad(op, unused_grad_output_indices, output_grad_values,
331 unused_grad_empty_row_indicator,
332 unused_grad_reverse_index_map):
333 """Gradients for SparseFillEmptyRows."""
334 reverse_index_map = op.outputs[3]
336 d_values, d_default_value = gen_sparse_ops.sparse_fill_empty_rows_grad(
337 reverse_index_map=reverse_index_map, grad_values=output_grad_values)
339 # d_indices, d_values, d_dense_shape, d_default_value.
340 return [None, d_values, None, d_default_value]
343@ops.RegisterGradient("SparseToDense")
344def _SparseToDenseGrad(op, grad):
345 sparse_indices, output_shape, _, _ = op.inputs
347 sparse_values_grad = array_ops.gather_nd(grad, sparse_indices)
348 default_value_grad = math_ops.reduce_sum(grad) - math_ops.reduce_sum(
349 sparse_values_grad)
350 return [
351 array_ops.zeros_like(sparse_indices),
352 array_ops.zeros_like(output_shape), sparse_values_grad, default_value_grad
353 ]