Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/ops/sparse_grad.py: 30%

132 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-03 07:57 +0000

1# Copyright 2015 The TensorFlow Authors. All Rights Reserved. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14# ============================================================================== 

15"""Gradients for operators defined in sparse_ops.py.""" 

16from tensorflow.python.framework import dtypes 

17from tensorflow.python.framework import ops 

18from tensorflow.python.framework import sparse_tensor 

19from tensorflow.python.ops import array_ops 

20from tensorflow.python.ops import gen_sparse_ops 

21from tensorflow.python.ops import math_ops 

22from tensorflow.python.ops import sparse_ops 

23 

24# TODO(b/31222613): This op may be differentiable, and there may be 

25# latent bugs here. 

26ops.NotDifferentiable("SparseAddGrad") 

27ops.NotDifferentiable("SparseConcat") 

28 

29 

30@ops.RegisterGradient("SparseReorder") 

31def _SparseReorderGrad(op, unused_output_indices_grad, output_values_grad): 

32 """Gradients for the SparseReorder op. 

33 

34 Args: 

35 op: the SparseReorder op 

36 unused_output_indices_grad: the incoming gradients of the output indices 

37 output_values_grad: the incoming gradients of the output values 

38 

39 Returns: 

40 Gradient for each of the 3 input tensors: 

41 (input_indices, input_values, input_shape) 

42 The gradients for input_indices and input_shape is None. 

43 """ 

44 input_indices = op.inputs[0] 

45 input_shape = op.inputs[2] 

46 

47 num_entries = array_ops.shape(input_indices)[0] 

48 entry_indices = math_ops.range(num_entries) 

49 sp_unordered = sparse_tensor.SparseTensor(input_indices, entry_indices, 

50 input_shape) 

51 sp_ordered = sparse_ops.sparse_reorder(sp_unordered) 

52 inverted_permutation = array_ops.invert_permutation(sp_ordered.values) 

53 

54 return (None, array_ops.gather(output_values_grad, 

55 inverted_permutation), None) 

56 

57 

58@ops.RegisterGradient("SparseAdd") 

59def _SparseAddGrad(op, *grads): 

60 """The backward operator for the SparseAdd op. 

61 

62 The SparseAdd op calculates A + B, where A, B, and the sum are all represented 

63 as `SparseTensor` objects. This op takes in the upstream gradient w.r.t. 

64 non-empty values of the sum, and outputs the gradients w.r.t. the non-empty 

65 values of A and B. 

66 

67 Args: 

68 op: the SparseAdd op 

69 *grads: the incoming gradients, one element per output of `op` 

70 

71 Returns: 

72 Gradient for each of the 6 input tensors of SparseAdd: 

73 (a_indices, a_values, a_shape, b_indices, b_values, b_shape, thresh) 

74 The gradients for the indices, shapes, and the threshold are None. 

75 """ 

76 val_grad = grads[1] 

77 a_indices = op.inputs[0] 

78 b_indices = op.inputs[3] 

79 sum_indices = op.outputs[0] 

80 # NOTE: we do not need to take `thresh` into account, since it simply affects 

81 # the non-zero elements of the sum, and we will peek into `sum_indices` in the 

82 # gradient op. 

83 

84 a_val_grad, b_val_grad = gen_sparse_ops.sparse_add_grad( 

85 val_grad, a_indices, b_indices, sum_indices) 

86 a_val_grad.set_shape(op.inputs[1].get_shape()) 

87 b_val_grad.set_shape(op.inputs[4].get_shape()) 

88 # (a_indices, a_values, a_shape, b_indices, b_values, b_shape, thresh) 

89 return (None, a_val_grad, None, None, b_val_grad, None, None) 

90 

91 

92@ops.RegisterGradient("SparseTensorDenseAdd") 

93def _SparseTensorDenseAddGrad(op, out_grad): 

94 sp_indices = op.inputs[0] 

95 # (sparse_indices, sparse_values, sparse_shape, dense) 

96 return (None, array_ops.gather_nd(out_grad, sp_indices), None, out_grad) 

97 

98 

99@ops.RegisterGradient("SparseReduceSum") 

100def _SparseReduceSumGrad(op, out_grad): 

101 """Similar to gradient for the Sum Op (i.e. tf.reduce_sum()).""" 

102 sp_indices = op.inputs[0] 

103 sp_shape = op.inputs[2] 

104 output_shape_kept_dims = math_ops.reduced_shape(sp_shape, op.inputs[3]) 

105 out_grad_reshaped = array_ops.reshape(out_grad, output_shape_kept_dims) 

106 scale = sp_shape // math_ops.cast(output_shape_kept_dims, dtypes.int64) 

107 # (sparse_indices, sparse_values, sparse_shape, reduction_axes) 

108 return (None, array_ops.gather_nd(out_grad_reshaped, 

109 sp_indices // scale), None, None) 

110 

111 

112@ops.RegisterGradient("SparseSlice") 

113def _SparseSliceGrad(op, *grads): 

114 """The backward operator for the SparseSlice op. 

115 

116 This op takes in the upstream gradient w.r.t. non-empty values of 

117 the sliced `SparseTensor`, and outputs the gradients w.r.t. 

118 the non-empty values of input `SparseTensor`. 

119 

120 Args: 

121 op: the SparseSlice op 

122 *grads: the incoming gradients, one element per output of `op` 

123 

124 Returns: 

125 Gradient for each of the 5 input tensors of SparseSlice: 

126 (indices, values, shape, start, size) 

127 The gradients for the indices, shape, start and the size are None. 

128 """ 

129 backprop_val_grad = grads[1] 

130 input_indices = op.inputs[0] 

131 input_start = op.inputs[3] 

132 output_indices = op.outputs[0] 

133 

134 val_grad = gen_sparse_ops.sparse_slice_grad(backprop_val_grad, input_indices, 

135 input_start, output_indices) 

136 val_grad.set_shape(op.inputs[1].get_shape()) 

137 # (indices, values, shape, start, size) 

138 return (None, val_grad, None, None, None) 

139 

140 

141@ops.RegisterGradient("SparseTensorDenseMatMul") 

142def _SparseTensorDenseMatMulGrad(op, grad): 

143 """Gradients for the dense tensor in the SparseTensorDenseMatMul op. 

144 

145 Args: 

146 op: the SparseTensorDenseMatMul op 

147 grad: the incoming gradient 

148 

149 Returns: 

150 Gradient for each of the 4 input tensors: 

151 (sparse_indices, sparse_values, sparse_shape, dense_tensor) 

152 The gradients for indices and shape are None. 

153 

154 Raises: 

155 TypeError: When the two operands don't have the same type. 

156 """ 

157 a_indices, a_values, a_shape = op.inputs[:3] 

158 b = op.inputs[3] 

159 adj_a = op.get_attr("adjoint_a") 

160 adj_b = op.get_attr("adjoint_b") 

161 

162 a_type = a_values.dtype.base_dtype 

163 b_type = b.dtype.base_dtype 

164 if a_type != b_type: 

165 raise TypeError( 

166 f"SparseTensorDenseMatMul op received operands with different types: " 

167 f"`{a_type}` and `{b_type}`.") 

168 

169 # gradient w.r.t. dense 

170 b_grad = gen_sparse_ops.sparse_tensor_dense_mat_mul( 

171 a_indices, a_values, a_shape, grad, adjoint_a=not adj_a) 

172 if adj_b: 

173 b_grad = array_ops.matrix_transpose(b_grad, conjugate=True) 

174 

175 # gradient w.r.t. sparse values 

176 

177 # TODO(zongheng): these gather calls could potentially duplicate rows/cols in 

178 # memory. If there is a need, we should look into implementing this more 

179 # intelligently to avoid duplicating data. 

180 

181 # With no adjoints, a_grad is matmul(grad, adjoint(b)). Since a is sparse, we 

182 # just want to compute that matmul at the rows/columns of non-zero values. The 

183 # (r, c) value is sum(grad[r, :] * adjoint(b)[:, c]), where the latter term is 

184 # more conveniently written as conj(b)[c, :]. That expression is more 

185 # efficient to calculate as a matmul, after expanding the two terms to be 2D 

186 # (i.e. a row vector and a column vector). 

187 # 

188 # If adj_b then we replace conj(b) by transpose(b); if adj_a we need to 

189 # adjoint the result, which is equivalent to swapping r and c and taking 

190 # conjugates. 

191 

192 # Get grad[r, :] and b[c, :] (or with r and c swapped if adj_a, or with 

193 # transpose(b) if adj_b), as batches of vectors (with the batch dimension 

194 # corresponding to the non-zero indices of a). 

195 rows = a_indices[:, 0] 

196 cols = a_indices[:, 1] 

197 parts_a = array_ops.gather(grad, rows if not adj_a else cols) 

198 parts_b = array_ops.gather( 

199 b if not adj_b else array_ops.transpose(b), cols if not adj_a else rows) 

200 

201 if not adj_a and not adj_b: 

202 # grad[r, :] * conj(b[c, :]) = row(grad[r, :]) @ adjoint(row(b[c, :])) 

203 a_values_grad = math_ops.matmul( 

204 array_ops.expand_dims(parts_a, -2), 

205 array_ops.expand_dims(parts_b, -2), 

206 adjoint_b=True) 

207 elif adj_a and not adj_b: 

208 # conj(grad[c, :] * conj(b[r, :])) = adjoint(col(grad[c, :])) @ col(b[r, :]) 

209 a_values_grad = math_ops.matmul( 

210 array_ops.expand_dims(parts_a, -1), 

211 array_ops.expand_dims(parts_b, -1), 

212 adjoint_a=True) 

213 elif not adj_a and adj_b: 

214 # grad[r, :] * transpose(b)[c, :] = 

215 # row(grad[r, :]) @ col(transpose(b)[c, :]) 

216 a_values_grad = math_ops.matmul( 

217 array_ops.expand_dims(parts_a, -2), array_ops.expand_dims(parts_b, -1)) 

218 elif adj_a and adj_b: 

219 # conj(grad[c, :] * transpose(b)[r, :]) = 

220 # adjoint(col(grad[c, :])) @ adjoint(row(transpose(b)[r, :]) 

221 a_values_grad = math_ops.matmul( 

222 array_ops.expand_dims(parts_a, -1), 

223 array_ops.expand_dims(parts_b, -2), 

224 adjoint_a=True, 

225 adjoint_b=True) 

226 

227 # gradients w.r.t. (a_indices, a_values, a_shape, b) 

228 return (None, array_ops.squeeze(a_values_grad, axis=[-2, -1]), None, b_grad) 

229 

230 

231@ops.RegisterGradient("SparseDenseCwiseAdd") 

232def _SparseDenseCwiseAddGrad(unused_op, unused_grad): 

233 raise NotImplementedError( 

234 "Gradient for SparseDenseCwiseAdd is not implemented.") 

235 

236 

237def _SparseDenseCwiseMulOrDivGrad(op, grad, is_mul): 

238 """Common code for SparseDenseCwise{Mul,Div} gradients.""" 

239 x_indices = op.inputs[0] 

240 x_shape = op.inputs[2] 

241 y = op.inputs[3] 

242 

243 y_shape = math_ops.cast(array_ops.shape(y), dtypes.int64) 

244 num_added_dims = array_ops.expand_dims( 

245 array_ops.size(x_shape) - array_ops.size(y_shape), 0) 

246 augmented_y_shape = array_ops.concat( 

247 [array_ops.ones(num_added_dims, ops.dtypes.int64), y_shape], 0) 

248 

249 scaling = x_shape // augmented_y_shape 

250 scaled_indices = x_indices // scaling 

251 scaled_indices = array_ops.slice(scaled_indices, 

252 array_ops.concat([[0], num_added_dims], 0), 

253 [-1, -1]) 

254 dense_vals = array_ops.gather_nd(y, scaled_indices) 

255 

256 if is_mul: 

257 dx = grad * dense_vals 

258 dy_val = grad * op.inputs[1] 

259 else: 

260 dx = grad / dense_vals 

261 dy_val = grad * (-op.inputs[1] / math_ops.square(dense_vals)) 

262 # indices can repeat after scaling, so we can't use sparse_to_dense(). 

263 dy = sparse_ops.sparse_add( 

264 array_ops.zeros_like(y), 

265 sparse_tensor.SparseTensor(scaled_indices, dy_val, y_shape)) 

266 

267 # (sp_indices, sp_vals, sp_shape, dense) 

268 return (None, dx, None, dy) 

269 

270 

271@ops.RegisterGradient("SparseDenseCwiseMul") 

272def _SparseDenseCwiseMulGrad(op, grad): 

273 """Gradients for SparseDenseCwiseMul.""" 

274 return _SparseDenseCwiseMulOrDivGrad(op, grad, True) 

275 

276 

277@ops.RegisterGradient("SparseDenseCwiseDiv") 

278def _SparseDenseCwiseDivGrad(op, grad): 

279 """Gradients for SparseDenseCwiseDiv.""" 

280 return _SparseDenseCwiseMulOrDivGrad(op, grad, False) 

281 

282 

283@ops.RegisterGradient("SparseSoftmax") 

284def _SparseSoftmaxGrad(op, grad): 

285 """Gradients for SparseSoftmax. 

286 

287 The calculation is the same as SoftmaxGrad: 

288 

289 grad_x = grad_softmax * softmax - sum(grad_softmax * softmax) * softmax 

290 

291 where we now only operate on the non-zero values present in the SparseTensors. 

292 

293 Args: 

294 op: the SparseSoftmax op. 

295 grad: the upstream gradient w.r.t. the non-zero SparseSoftmax output values. 

296 

297 Returns: 

298 Gradients w.r.t. the input (sp_indices, sp_values, sp_shape). 

299 """ 

300 indices, shape = op.inputs[0], op.inputs[2] 

301 out_vals = op.outputs[0] 

302 sp_output = sparse_tensor.SparseTensor(indices, out_vals, shape) 

303 sp_grad = sparse_tensor.SparseTensor(indices, grad, shape) 

304 sp_product = sparse_tensor.SparseTensor(indices, 

305 sp_output.values * sp_grad.values, 

306 shape) 

307 

308 # [..., B, 1], dense. 

309 sum_reduced = -sparse_ops.sparse_reduce_sum(sp_product, [-1], keepdims=True) 

310 # sparse [..., B, C] + dense [..., B, 1] with broadcast; outputs sparse. 

311 sp_sum = sparse_ops.sparse_dense_cwise_add(sp_grad, sum_reduced) 

312 

313 grad_x = sp_sum.values * sp_output.values 

314 return [None, grad_x, None] 

315 

316 

317@ops.RegisterGradient("SparseSparseMaximum") 

318def _SparseSparseMaximumGrad(unused_op, unused_grad): 

319 raise NotImplementedError( 

320 "Gradient for SparseSparseMaximum is not implemented.") 

321 

322 

323@ops.RegisterGradient("SparseSparseMinimum") 

324def _SparseSparseMinimumGrad(unused_op, unused_grad): 

325 raise NotImplementedError( 

326 "Gradient for SparseSparseMinimum is not implemented.") 

327 

328 

329@ops.RegisterGradient("SparseFillEmptyRows") 

330def _SparseFillEmptyRowsGrad(op, unused_grad_output_indices, output_grad_values, 

331 unused_grad_empty_row_indicator, 

332 unused_grad_reverse_index_map): 

333 """Gradients for SparseFillEmptyRows.""" 

334 reverse_index_map = op.outputs[3] 

335 

336 d_values, d_default_value = gen_sparse_ops.sparse_fill_empty_rows_grad( 

337 reverse_index_map=reverse_index_map, grad_values=output_grad_values) 

338 

339 # d_indices, d_values, d_dense_shape, d_default_value. 

340 return [None, d_values, None, d_default_value] 

341 

342 

343@ops.RegisterGradient("SparseToDense") 

344def _SparseToDenseGrad(op, grad): 

345 sparse_indices, output_shape, _, _ = op.inputs 

346 

347 sparse_values_grad = array_ops.gather_nd(grad, sparse_indices) 

348 default_value_grad = math_ops.reduce_sum(grad) - math_ops.reduce_sum( 

349 sparse_values_grad) 

350 return [ 

351 array_ops.zeros_like(sparse_indices), 

352 array_ops.zeros_like(output_shape), sparse_values_grad, default_value_grad 

353 ]