Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/ops/random_grad.py: 24%
89 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
1# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Gradients for operators defined in random_ops.py."""
17import numpy as np
19from tensorflow.python.framework import constant_op
20from tensorflow.python.framework import dtypes
21from tensorflow.python.framework import ops
22from tensorflow.python.ops import array_ops
23from tensorflow.python.ops import clip_ops
24from tensorflow.python.ops import gen_array_ops
25from tensorflow.python.ops import gen_random_ops
26from tensorflow.python.ops import math_ops
29def add_leading_unit_dimensions(x, num_dimensions): # pylint: disable=invalid-name
30 new_shape = array_ops.concat(
31 [array_ops.ones([num_dimensions], dtype=dtypes.int32),
32 array_ops.shape(x)], axis=0)
33 return array_ops.reshape(x, new_shape)
36@ops.RegisterGradient("RandomGamma")
37def _RandomGammaGrad(op, grad): # pylint: disable=invalid-name
38 """Returns the gradient of a Gamma sample w.r.t. alpha.
40 The gradient is computed using implicit differentiation
41 (Figurnov et al., 2018).
43 Args:
44 op: A `RandomGamma` operation. We assume that the inputs to the operation
45 are `shape` and `alpha` tensors, and the output is the `sample` tensor.
46 grad: The incoming gradient `dloss / dsample` of the same shape as
47 `op.outputs[0]`.
49 Returns:
50 A `Tensor` with derivatives `dloss / dalpha`.
52 References:
53 Implicit Reparameterization Gradients:
54 [Figurnov et al., 2018]
55 (http://papers.nips.cc/paper/7326-implicit-reparameterization-gradients)
56 ([pdf]
57 (http://papers.nips.cc/paper/7326-implicit-reparameterization-gradients.pdf))
58 """
59 shape = op.inputs[0]
60 alpha = op.inputs[1]
61 sample = op.outputs[0]
63 with ops.control_dependencies([grad]):
64 # Make the parameters alpha broadcastable with samples by appending
65 # unit dimensions.
66 num_sample_dimensions = array_ops.shape(shape)[0]
67 alpha_broadcastable = add_leading_unit_dimensions(
68 alpha, num_sample_dimensions)
69 partial_a = gen_random_ops.random_gamma_grad(alpha_broadcastable, sample)
71 # The first input is shape; the second input is alpha.
72 return (None, math_ops.reduce_sum(
73 grad * partial_a, axis=math_ops.range(num_sample_dimensions)))
76@ops.RegisterGradient("StatelessRandomGammaV2")
77def _StatelessRandomGammaV2Grad(op, grad): # pylint: disable=invalid-name
78 """Returns the gradient of a Gamma sample w.r.t. alpha.
80 The gradient is computed using implicit differentiation
81 (Figurnov et al., 2018).
83 Args:
84 op: A `StatelessRandomGamma` operation. We assume that the inputs to the
85 operation are `shape`, `seed` and `alpha` tensors, and the output is the
86 `sample` tensor.
87 grad: The incoming gradient `dloss / dsample` of the same shape as
88 `op.outputs[0]`.
90 Returns:
91 A `Tensor` with derivatives `dloss / dalpha`.
93 References:
94 Implicit Reparameterization Gradients:
95 [Figurnov et al., 2018]
96 (http://papers.nips.cc/paper/7326-implicit-reparameterization-gradients)
97 ([pdf]
98 (http://papers.nips.cc/paper/7326-implicit-reparameterization-gradients.pdf))
99 """
100 shape = op.inputs[0]
101 alpha = op.inputs[2]
102 sample = op.outputs[0]
104 with ops.control_dependencies([grad]):
105 return (None, None, _StatelessGammaGradAlpha(shape, alpha, sample, grad))
108@ops.RegisterGradient("StatelessRandomGammaV3")
109def _StatelessRandomGammaV3Grad(op, grad): # pylint: disable=invalid-name
110 """Returns the gradient of a Gamma sample w.r.t. alpha.
112 The gradient is computed using implicit differentiation
113 (Figurnov et al., 2018).
115 Args:
116 op: A `StatelessRandomGamma` operation. We assume that the inputs to the
117 operation are `shape`, `key`, `counter`, `alg`, and `alpha` tensors, and
118 the output is the `sample` tensor.
119 grad: The incoming gradient `dloss / dsample` of the same shape as
120 `op.outputs[0]`.
122 Returns:
123 A `Tensor` with derivatives `dloss / dalpha`.
125 References:
126 Implicit Reparameterization Gradients:
127 [Figurnov et al., 2018]
128 (http://papers.nips.cc/paper/7326-implicit-reparameterization-gradients)
129 ([pdf]
130 (http://papers.nips.cc/paper/7326-implicit-reparameterization-gradients.pdf))
131 """
132 shape = op.inputs[0]
133 alpha = op.inputs[4]
134 sample = op.outputs[0]
136 with ops.control_dependencies([grad]):
137 return (None, None, None, None,
138 _StatelessGammaGradAlpha(shape, alpha, sample, grad))
141def _StatelessGammaGradAlpha(shape, alpha, sample, grad):
142 """Returns gradients of a gamma sampler wrt alpha."""
143 # Note that the shape handling is slightly different for stateless_gamma,
144 # in particular num_sample_dimensions is different.
145 num_sample_dimensions = array_ops.shape(shape)[0] - array_ops.rank(alpha)
146 # Make the parameters alpha broadcastable with samples by appending
147 # unit dimensions.
148 alpha_broadcastable = add_leading_unit_dimensions(alpha,
149 num_sample_dimensions)
150 partial_a = gen_random_ops.random_gamma_grad(alpha_broadcastable, sample)
152 # The first two inputs are shape, seed, third input is alpha.
153 return math_ops.reduce_sum(
154 grad * partial_a, axis=math_ops.range(num_sample_dimensions))
157def _Ndtr(x):
158 """Normal distribution function."""
159 half_sqrt_2 = constant_op.constant(
160 0.5 * np.sqrt(2.), dtype=x.dtype, name="half_sqrt_2")
161 w = x * half_sqrt_2
162 z = math_ops.abs(w)
163 y = array_ops.where(
164 z < half_sqrt_2,
165 1. + math_ops.erf(w),
166 array_ops.where(
167 w > 0., 2. - math_ops.erfc(z), math_ops.erfc(z)))
168 return 0.5 * y
171@ops.RegisterGradient("StatelessParameterizedTruncatedNormal")
172def _StatelessParameterizedTruncatedNormalGrad(op, grad): # pylint: disable=invalid-name
173 """Returns the gradient of a TruncatedNormal sample w.r.t. parameters.
175 The gradient is computed using implicit differentiation
176 (Figurnov et al., 2018).
178 Args:
179 op: A `StatelessParameterizedTruncatedNormal` operation. We assume that the
180 inputs to the operation are `shape`, `seed`, `mean`, `stddev`, `minval`,
181 and `maxval` tensors, and the output is the `sample` tensor.
182 grad: The incoming gradient `dloss / dsample` of the same shape as
183 `op.outputs[0]`.
185 Returns:
186 A list of `Tensor` with derivates with respect to each parameter.
188 References:
189 Implicit Reparameterization Gradients:
190 [Figurnov et al., 2018]
191 (http://papers.nips.cc/paper/7326-implicit-reparameterization-gradients)
192 ([pdf]
193 (http://papers.nips.cc/paper/7326-implicit-reparameterization-gradients.pdf))
194 """
195 shape = op.inputs[0]
196 mean = op.inputs[2]
197 stddev = op.inputs[3]
198 minval = op.inputs[4]
199 maxval = op.inputs[5]
200 sample = op.outputs[0]
202 with ops.control_dependencies([grad]):
203 minval_std = (minval - mean) / stddev
204 maxval_std = (maxval - mean) / stddev
205 sample_std = (sample - mean) / stddev
207 cdf_sample = (_Ndtr(sample_std) - _Ndtr(minval_std)) / (
208 _Ndtr(maxval_std) - _Ndtr(minval_std))
210 # Clip to avoid zero argument for log_cdf expression
211 tiny = np.finfo(mean.dtype.as_numpy_dtype).tiny
212 eps = np.finfo(mean.dtype.as_numpy_dtype).eps
213 cdf_sample = clip_ops.clip_by_value(cdf_sample, tiny, 1 - eps)
215 dmaxval = math_ops.exp(0.5 * (sample_std ** 2 - maxval_std ** 2) +
216 math_ops.log(cdf_sample))
217 dminval = math_ops.exp(0.5 * (sample_std ** 2 - minval_std ** 2) +
218 math_ops.log1p(-cdf_sample))
219 dmean = array_ops.ones_like(sample_std)
220 dstddev = sample_std
222 # Reduce over extra dimensions caused by `shape`. We need to get the
223 # difference in rank from shape vs. the broadcasted rank.
225 mean_shape = array_ops.shape(mean)
226 stddev_shape = array_ops.shape(stddev)
227 minval_shape = array_ops.shape(minval)
228 maxval_shape = array_ops.shape(maxval)
230 broadcast_shape = array_ops.broadcast_dynamic_shape(
231 mean_shape, stddev_shape)
232 broadcast_shape = array_ops.broadcast_dynamic_shape(
233 minval_shape, broadcast_shape)
234 broadcast_shape = array_ops.broadcast_dynamic_shape(
235 maxval_shape, broadcast_shape)
236 extra_dims = math_ops.range(
237 array_ops.size(shape) - array_ops.size(broadcast_shape))
239 grad_mean = math_ops.reduce_sum(grad * dmean, axis=extra_dims)
240 grad_stddev = math_ops.reduce_sum(grad * dstddev, axis=extra_dims)
241 grad_minval = math_ops.reduce_sum(grad * dminval, axis=extra_dims)
242 grad_maxval = math_ops.reduce_sum(grad * dmaxval, axis=extra_dims)
244 _, rmean = gen_array_ops.broadcast_gradient_args(
245 broadcast_shape, mean_shape)
246 _, rstddev = gen_array_ops.broadcast_gradient_args(
247 broadcast_shape, stddev_shape)
248 _, rminval = gen_array_ops.broadcast_gradient_args(
249 broadcast_shape, minval_shape)
250 _, rmaxval = gen_array_ops.broadcast_gradient_args(
251 broadcast_shape, maxval_shape)
253 grad_mean = array_ops.reshape(
254 math_ops.reduce_sum(grad_mean, axis=rmean, keepdims=True), mean_shape)
256 grad_stddev = array_ops.reshape(
257 math_ops.reduce_sum(grad_stddev, axis=rstddev, keepdims=True),
258 stddev_shape)
260 grad_minval = array_ops.reshape(
261 math_ops.reduce_sum(grad_minval, axis=rminval, keepdims=True),
262 minval_shape)
264 grad_maxval = array_ops.reshape(
265 math_ops.reduce_sum(grad_maxval, axis=rmaxval, keepdims=True),
266 maxval_shape)
268 # The first two inputs are shape.
269 return (None, None, grad_mean, grad_stddev, grad_minval, grad_maxval)