Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/ops/random_grad.py: 24%

89 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-03 07:57 +0000

1# Copyright 2018 The TensorFlow Authors. All Rights Reserved. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14# ============================================================================== 

15"""Gradients for operators defined in random_ops.py.""" 

16 

17import numpy as np 

18 

19from tensorflow.python.framework import constant_op 

20from tensorflow.python.framework import dtypes 

21from tensorflow.python.framework import ops 

22from tensorflow.python.ops import array_ops 

23from tensorflow.python.ops import clip_ops 

24from tensorflow.python.ops import gen_array_ops 

25from tensorflow.python.ops import gen_random_ops 

26from tensorflow.python.ops import math_ops 

27 

28 

29def add_leading_unit_dimensions(x, num_dimensions): # pylint: disable=invalid-name 

30 new_shape = array_ops.concat( 

31 [array_ops.ones([num_dimensions], dtype=dtypes.int32), 

32 array_ops.shape(x)], axis=0) 

33 return array_ops.reshape(x, new_shape) 

34 

35 

36@ops.RegisterGradient("RandomGamma") 

37def _RandomGammaGrad(op, grad): # pylint: disable=invalid-name 

38 """Returns the gradient of a Gamma sample w.r.t. alpha. 

39 

40 The gradient is computed using implicit differentiation 

41 (Figurnov et al., 2018). 

42 

43 Args: 

44 op: A `RandomGamma` operation. We assume that the inputs to the operation 

45 are `shape` and `alpha` tensors, and the output is the `sample` tensor. 

46 grad: The incoming gradient `dloss / dsample` of the same shape as 

47 `op.outputs[0]`. 

48 

49 Returns: 

50 A `Tensor` with derivatives `dloss / dalpha`. 

51 

52 References: 

53 Implicit Reparameterization Gradients: 

54 [Figurnov et al., 2018] 

55 (http://papers.nips.cc/paper/7326-implicit-reparameterization-gradients) 

56 ([pdf] 

57 (http://papers.nips.cc/paper/7326-implicit-reparameterization-gradients.pdf)) 

58 """ 

59 shape = op.inputs[0] 

60 alpha = op.inputs[1] 

61 sample = op.outputs[0] 

62 

63 with ops.control_dependencies([grad]): 

64 # Make the parameters alpha broadcastable with samples by appending 

65 # unit dimensions. 

66 num_sample_dimensions = array_ops.shape(shape)[0] 

67 alpha_broadcastable = add_leading_unit_dimensions( 

68 alpha, num_sample_dimensions) 

69 partial_a = gen_random_ops.random_gamma_grad(alpha_broadcastable, sample) 

70 

71 # The first input is shape; the second input is alpha. 

72 return (None, math_ops.reduce_sum( 

73 grad * partial_a, axis=math_ops.range(num_sample_dimensions))) 

74 

75 

76@ops.RegisterGradient("StatelessRandomGammaV2") 

77def _StatelessRandomGammaV2Grad(op, grad): # pylint: disable=invalid-name 

78 """Returns the gradient of a Gamma sample w.r.t. alpha. 

79 

80 The gradient is computed using implicit differentiation 

81 (Figurnov et al., 2018). 

82 

83 Args: 

84 op: A `StatelessRandomGamma` operation. We assume that the inputs to the 

85 operation are `shape`, `seed` and `alpha` tensors, and the output is the 

86 `sample` tensor. 

87 grad: The incoming gradient `dloss / dsample` of the same shape as 

88 `op.outputs[0]`. 

89 

90 Returns: 

91 A `Tensor` with derivatives `dloss / dalpha`. 

92 

93 References: 

94 Implicit Reparameterization Gradients: 

95 [Figurnov et al., 2018] 

96 (http://papers.nips.cc/paper/7326-implicit-reparameterization-gradients) 

97 ([pdf] 

98 (http://papers.nips.cc/paper/7326-implicit-reparameterization-gradients.pdf)) 

99 """ 

100 shape = op.inputs[0] 

101 alpha = op.inputs[2] 

102 sample = op.outputs[0] 

103 

104 with ops.control_dependencies([grad]): 

105 return (None, None, _StatelessGammaGradAlpha(shape, alpha, sample, grad)) 

106 

107 

108@ops.RegisterGradient("StatelessRandomGammaV3") 

109def _StatelessRandomGammaV3Grad(op, grad): # pylint: disable=invalid-name 

110 """Returns the gradient of a Gamma sample w.r.t. alpha. 

111 

112 The gradient is computed using implicit differentiation 

113 (Figurnov et al., 2018). 

114 

115 Args: 

116 op: A `StatelessRandomGamma` operation. We assume that the inputs to the 

117 operation are `shape`, `key`, `counter`, `alg`, and `alpha` tensors, and 

118 the output is the `sample` tensor. 

119 grad: The incoming gradient `dloss / dsample` of the same shape as 

120 `op.outputs[0]`. 

121 

122 Returns: 

123 A `Tensor` with derivatives `dloss / dalpha`. 

124 

125 References: 

126 Implicit Reparameterization Gradients: 

127 [Figurnov et al., 2018] 

128 (http://papers.nips.cc/paper/7326-implicit-reparameterization-gradients) 

129 ([pdf] 

130 (http://papers.nips.cc/paper/7326-implicit-reparameterization-gradients.pdf)) 

131 """ 

132 shape = op.inputs[0] 

133 alpha = op.inputs[4] 

134 sample = op.outputs[0] 

135 

136 with ops.control_dependencies([grad]): 

137 return (None, None, None, None, 

138 _StatelessGammaGradAlpha(shape, alpha, sample, grad)) 

139 

140 

141def _StatelessGammaGradAlpha(shape, alpha, sample, grad): 

142 """Returns gradients of a gamma sampler wrt alpha.""" 

143 # Note that the shape handling is slightly different for stateless_gamma, 

144 # in particular num_sample_dimensions is different. 

145 num_sample_dimensions = array_ops.shape(shape)[0] - array_ops.rank(alpha) 

146 # Make the parameters alpha broadcastable with samples by appending 

147 # unit dimensions. 

148 alpha_broadcastable = add_leading_unit_dimensions(alpha, 

149 num_sample_dimensions) 

150 partial_a = gen_random_ops.random_gamma_grad(alpha_broadcastable, sample) 

151 

152 # The first two inputs are shape, seed, third input is alpha. 

153 return math_ops.reduce_sum( 

154 grad * partial_a, axis=math_ops.range(num_sample_dimensions)) 

155 

156 

157def _Ndtr(x): 

158 """Normal distribution function.""" 

159 half_sqrt_2 = constant_op.constant( 

160 0.5 * np.sqrt(2.), dtype=x.dtype, name="half_sqrt_2") 

161 w = x * half_sqrt_2 

162 z = math_ops.abs(w) 

163 y = array_ops.where( 

164 z < half_sqrt_2, 

165 1. + math_ops.erf(w), 

166 array_ops.where( 

167 w > 0., 2. - math_ops.erfc(z), math_ops.erfc(z))) 

168 return 0.5 * y 

169 

170 

171@ops.RegisterGradient("StatelessParameterizedTruncatedNormal") 

172def _StatelessParameterizedTruncatedNormalGrad(op, grad): # pylint: disable=invalid-name 

173 """Returns the gradient of a TruncatedNormal sample w.r.t. parameters. 

174 

175 The gradient is computed using implicit differentiation 

176 (Figurnov et al., 2018). 

177 

178 Args: 

179 op: A `StatelessParameterizedTruncatedNormal` operation. We assume that the 

180 inputs to the operation are `shape`, `seed`, `mean`, `stddev`, `minval`, 

181 and `maxval` tensors, and the output is the `sample` tensor. 

182 grad: The incoming gradient `dloss / dsample` of the same shape as 

183 `op.outputs[0]`. 

184 

185 Returns: 

186 A list of `Tensor` with derivates with respect to each parameter. 

187 

188 References: 

189 Implicit Reparameterization Gradients: 

190 [Figurnov et al., 2018] 

191 (http://papers.nips.cc/paper/7326-implicit-reparameterization-gradients) 

192 ([pdf] 

193 (http://papers.nips.cc/paper/7326-implicit-reparameterization-gradients.pdf)) 

194 """ 

195 shape = op.inputs[0] 

196 mean = op.inputs[2] 

197 stddev = op.inputs[3] 

198 minval = op.inputs[4] 

199 maxval = op.inputs[5] 

200 sample = op.outputs[0] 

201 

202 with ops.control_dependencies([grad]): 

203 minval_std = (minval - mean) / stddev 

204 maxval_std = (maxval - mean) / stddev 

205 sample_std = (sample - mean) / stddev 

206 

207 cdf_sample = (_Ndtr(sample_std) - _Ndtr(minval_std)) / ( 

208 _Ndtr(maxval_std) - _Ndtr(minval_std)) 

209 

210 # Clip to avoid zero argument for log_cdf expression 

211 tiny = np.finfo(mean.dtype.as_numpy_dtype).tiny 

212 eps = np.finfo(mean.dtype.as_numpy_dtype).eps 

213 cdf_sample = clip_ops.clip_by_value(cdf_sample, tiny, 1 - eps) 

214 

215 dmaxval = math_ops.exp(0.5 * (sample_std ** 2 - maxval_std ** 2) + 

216 math_ops.log(cdf_sample)) 

217 dminval = math_ops.exp(0.5 * (sample_std ** 2 - minval_std ** 2) + 

218 math_ops.log1p(-cdf_sample)) 

219 dmean = array_ops.ones_like(sample_std) 

220 dstddev = sample_std 

221 

222 # Reduce over extra dimensions caused by `shape`. We need to get the 

223 # difference in rank from shape vs. the broadcasted rank. 

224 

225 mean_shape = array_ops.shape(mean) 

226 stddev_shape = array_ops.shape(stddev) 

227 minval_shape = array_ops.shape(minval) 

228 maxval_shape = array_ops.shape(maxval) 

229 

230 broadcast_shape = array_ops.broadcast_dynamic_shape( 

231 mean_shape, stddev_shape) 

232 broadcast_shape = array_ops.broadcast_dynamic_shape( 

233 minval_shape, broadcast_shape) 

234 broadcast_shape = array_ops.broadcast_dynamic_shape( 

235 maxval_shape, broadcast_shape) 

236 extra_dims = math_ops.range( 

237 array_ops.size(shape) - array_ops.size(broadcast_shape)) 

238 

239 grad_mean = math_ops.reduce_sum(grad * dmean, axis=extra_dims) 

240 grad_stddev = math_ops.reduce_sum(grad * dstddev, axis=extra_dims) 

241 grad_minval = math_ops.reduce_sum(grad * dminval, axis=extra_dims) 

242 grad_maxval = math_ops.reduce_sum(grad * dmaxval, axis=extra_dims) 

243 

244 _, rmean = gen_array_ops.broadcast_gradient_args( 

245 broadcast_shape, mean_shape) 

246 _, rstddev = gen_array_ops.broadcast_gradient_args( 

247 broadcast_shape, stddev_shape) 

248 _, rminval = gen_array_ops.broadcast_gradient_args( 

249 broadcast_shape, minval_shape) 

250 _, rmaxval = gen_array_ops.broadcast_gradient_args( 

251 broadcast_shape, maxval_shape) 

252 

253 grad_mean = array_ops.reshape( 

254 math_ops.reduce_sum(grad_mean, axis=rmean, keepdims=True), mean_shape) 

255 

256 grad_stddev = array_ops.reshape( 

257 math_ops.reduce_sum(grad_stddev, axis=rstddev, keepdims=True), 

258 stddev_shape) 

259 

260 grad_minval = array_ops.reshape( 

261 math_ops.reduce_sum(grad_minval, axis=rminval, keepdims=True), 

262 minval_shape) 

263 

264 grad_maxval = array_ops.reshape( 

265 math_ops.reduce_sum(grad_maxval, axis=rmaxval, keepdims=True), 

266 maxval_shape) 

267 

268 # The first two inputs are shape. 

269 return (None, None, grad_mean, grad_stddev, grad_minval, grad_maxval)