Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/ops/clip_ops.py: 31%
110 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
16"""Operations for clipping (gradient, weight) tensors to min/max values."""
17from tensorflow.python.framework import constant_op
18from tensorflow.python.framework import dtypes
19from tensorflow.python.framework import indexed_slices
20from tensorflow.python.framework import ops
21from tensorflow.python.ops import array_ops
22from tensorflow.python.ops import array_ops_stack
23from tensorflow.python.ops import gen_array_ops
24from tensorflow.python.ops import gen_nn_ops
25from tensorflow.python.ops import math_ops
26from tensorflow.python.util import deprecation
27from tensorflow.python.util import dispatch
28from tensorflow.python.util.compat import collections_abc
29from tensorflow.python.util.tf_export import tf_export
32@tf_export("clip_by_value")
33@dispatch.register_unary_elementwise_api
34@dispatch.add_dispatch_support
35def clip_by_value(t, clip_value_min, clip_value_max,
36 name=None):
37 """Clips tensor values to a specified min and max.
39 Given a tensor `t`, this operation returns a tensor of the same type and
40 shape as `t` with its values clipped to `clip_value_min` and `clip_value_max`.
41 Any values less than `clip_value_min` are set to `clip_value_min`. Any values
42 greater than `clip_value_max` are set to `clip_value_max`.
44 Note: `clip_value_min` needs to be smaller or equal to `clip_value_max` for
45 correct results.
47 For example:
49 Basic usage passes a scalar as the min and max value.
51 >>> t = tf.constant([[-10., -1., 0.], [0., 2., 10.]])
52 >>> t2 = tf.clip_by_value(t, clip_value_min=-1, clip_value_max=1)
53 >>> t2.numpy()
54 array([[-1., -1., 0.],
55 [ 0., 1., 1.]], dtype=float32)
57 The min and max can be the same size as `t`, or broadcastable to that size.
59 >>> t = tf.constant([[-1, 0., 10.], [-1, 0, 10]])
60 >>> clip_min = [[2],[1]]
61 >>> t3 = tf.clip_by_value(t, clip_value_min=clip_min, clip_value_max=100)
62 >>> t3.numpy()
63 array([[ 2., 2., 10.],
64 [ 1., 1., 10.]], dtype=float32)
66 Broadcasting fails, intentionally, if you would expand the dimensions of `t`
68 >>> t = tf.constant([[-1, 0., 10.], [-1, 0, 10]])
69 >>> clip_min = [[[2, 1]]] # Has a third axis
70 >>> t4 = tf.clip_by_value(t, clip_value_min=clip_min, clip_value_max=100)
71 Traceback (most recent call last):
72 ...
73 InvalidArgumentError: Incompatible shapes: [2,3] vs. [1,1,2]
75 It throws a `TypeError` if you try to clip an `int` to a `float` value
76 (`tf.cast` the input to `float` first).
78 >>> t = tf.constant([[1, 2], [3, 4]], dtype=tf.int32)
79 >>> t5 = tf.clip_by_value(t, clip_value_min=-3.1, clip_value_max=3.1)
80 Traceback (most recent call last):
81 ...
82 TypeError: Cannot convert ...
85 Args:
86 t: A `Tensor` or `IndexedSlices`.
87 clip_value_min: The minimum value to clip to. A scalar `Tensor` or one that
88 is broadcastable to the shape of `t`.
89 clip_value_max: The maximum value to clip to. A scalar `Tensor` or one that
90 is broadcastable to the shape of `t`.
91 name: A name for the operation (optional).
93 Returns:
94 A clipped `Tensor` or `IndexedSlices`.
96 Raises:
97 `tf.errors.InvalidArgumentError`: If the clip tensors would trigger array
98 broadcasting that would make the returned tensor larger than the input.
99 TypeError: If dtype of the input is `int32` and dtype of
100 the `clip_value_min` or `clip_value_max` is `float32`
101 """
102 with ops.name_scope(name, "clip_by_value",
103 [t, clip_value_min, clip_value_max]) as name:
104 values = ops.convert_to_tensor(
105 t.values if isinstance(t, indexed_slices.IndexedSlices) else t,
106 name="t")
108 # Go through list of tensors, for each value in each tensor clip
109 t_min = math_ops.minimum(values, clip_value_max)
110 # Assert that the shape is compatible with the initial shape,
111 # to prevent unintentional broadcasting.
112 values.shape.assert_is_compatible_with(t_min.shape)
114 t_max = math_ops.maximum(t_min, clip_value_min, name=name)
115 values.shape.assert_is_compatible_with(t_max.shape)
117 if isinstance(t, indexed_slices.IndexedSlices):
118 t_max = indexed_slices.IndexedSlices(t_max, t.indices, t.dense_shape)
120 return t_max
121 # TODO(scottzhu): switch to use new implementation in 2 weeks.
122 # return gen_math_ops.clip_by_value(
123 # t, clip_value_min, clip_value_max, name=name)
126@ops.RegisterGradient("ClipByValue")
127def _clip_by_value_grad(op, grad):
128 """Returns grad of clip_by_value."""
129 x = op.inputs[0]
130 y = op.inputs[1]
131 z = op.inputs[2]
132 gdtype = grad.dtype
133 sx = array_ops.shape(x)
134 sy = array_ops.shape(y)
135 sz = array_ops.shape(z)
136 gradshape = array_ops.shape(grad)
137 zeros = array_ops.zeros(gradshape, gdtype)
138 xymask = math_ops.less(x, y)
139 xzmask = math_ops.greater(x, z)
140 _, ry = gen_array_ops.broadcast_gradient_args(sx, sy)
141 _, rz = gen_array_ops.broadcast_gradient_args(sx, sz)
142 xgrad = array_ops.where(math_ops.logical_or(xymask, xzmask), zeros, grad)
143 ygrad = array_ops.where(xymask, grad, zeros)
144 zgrad = array_ops.where(xzmask, grad, zeros)
145 gy = array_ops.reshape(math_ops.reduce_sum(ygrad, ry), sy)
146 gz = array_ops.reshape(math_ops.reduce_sum(zgrad, rz), sz)
147 return xgrad, gy, gz
150@tf_export("clip_by_norm")
151@dispatch.add_dispatch_support
152def clip_by_norm(t, clip_norm, axes=None, name=None):
153 """Clips tensor values to a maximum L2-norm.
155 Given a tensor `t`, and a maximum clip value `clip_norm`, this operation
156 normalizes `t` so that its L2-norm is less than or equal to `clip_norm`,
157 along the dimensions given in `axes`. Specifically, in the default case
158 where all dimensions are used for calculation, if the L2-norm of `t` is
159 already less than or equal to `clip_norm`, then `t` is not modified. If
160 the L2-norm is greater than `clip_norm`, then this operation returns a
161 tensor of the same type and shape as `t` with its values set to:
163 `t * clip_norm / l2norm(t)`
165 In this case, the L2-norm of the output tensor is `clip_norm`.
167 As another example, if `t` is a matrix and `axes == [1]`, then each row
168 of the output will have L2-norm less than or equal to `clip_norm`. If
169 `axes == [0]` instead, each column of the output will be clipped.
171 Code example:
173 >>> some_nums = tf.constant([[1, 2, 3, 4, 5]], dtype=tf.float32)
174 >>> tf.clip_by_norm(some_nums, 2.0).numpy()
175 array([[0.26967996, 0.5393599 , 0.80903983, 1.0787199 , 1.3483998 ]],
176 dtype=float32)
178 This operation is typically used to clip gradients before applying them with
179 an optimizer. Most gradient data is a collection of different shaped tensors
180 for different parts of the model. Thus, this is a common usage:
182 ```
183 # Get your gradients after training
184 loss_value, grads = grad(model, features, labels)
186 # Apply some clipping
187 grads = [tf.clip_by_norm(g, norm)
188 for g in grads]
190 # Continue on with training
191 optimizer.apply_gradients(grads)
192 ```
194 Args:
195 t: A `Tensor` or `IndexedSlices`. This must be a floating point type.
196 clip_norm: A 0-D (scalar) `Tensor` > 0. A maximum clipping value, also
197 floating point
198 axes: A 1-D (vector) `Tensor` of type int32 containing the dimensions
199 to use for computing the L2-norm. If `None` (the default), uses all
200 dimensions.
201 name: A name for the operation (optional).
203 Returns:
204 A clipped `Tensor` or `IndexedSlices`.
206 Raises:
207 ValueError: If the clip_norm tensor is not a 0-D scalar tensor.
208 TypeError: If dtype of the input is not a floating point or
209 complex type.
210 """
211 with ops.name_scope(name, "clip_by_norm", [t, clip_norm]) as name:
212 values = ops.convert_to_tensor(
213 t.values if isinstance(t, indexed_slices.IndexedSlices) else t,
214 name="t")
216 # Calculate L2-norm, clip elements by ratio of clip_norm to L2-norm
217 l2sum = math_ops.reduce_sum(values * values, axes, keepdims=True)
218 pred = l2sum > 0
219 # Two-tap tf.where trick to bypass NaN gradients
220 l2sum_safe = array_ops.where(pred, l2sum, array_ops.ones_like(l2sum))
221 l2norm = array_ops.where(pred, math_ops.sqrt(l2sum_safe), l2sum)
222 intermediate = values * clip_norm
223 # Assert that the shape is compatible with the initial shape,
224 # to prevent unintentional broadcasting.
225 values.shape.assert_is_compatible_with(intermediate.shape)
226 values_clip = array_ops.identity(
227 intermediate / math_ops.maximum(l2norm, clip_norm), name=name)
229 if isinstance(t, indexed_slices.IndexedSlices):
230 return indexed_slices.IndexedSlices(values_clip, t.indices, t.dense_shape)
232 return values_clip
235@tf_export("linalg.global_norm", v1=["linalg.global_norm", "global_norm"])
236@dispatch.add_dispatch_support
237@deprecation.deprecated_endpoints("global_norm")
238def global_norm(t_list, name=None):
239 """Computes the global norm of multiple tensors.
241 Given a tuple or list of tensors `t_list`, this operation returns the
242 global norm of the elements in all tensors in `t_list`. The global norm is
243 computed as:
245 `global_norm = sqrt(sum([l2norm(t)**2 for t in t_list]))`
247 Any entries in `t_list` that are of type None are ignored.
249 Args:
250 t_list: A tuple or list of mixed `Tensors`, `IndexedSlices`, or None.
251 name: A name for the operation (optional).
253 Returns:
254 A 0-D (scalar) `Tensor` of type `float`.
256 Raises:
257 TypeError: If `t_list` is not a sequence.
258 """
259 if (not isinstance(t_list, collections_abc.Sequence) or
260 isinstance(t_list, str)):
261 raise TypeError("`t_list` should be a sequence of tensors. Received "
262 f"{type(t_list)}.")
263 t_list = list(t_list)
264 with ops.name_scope(name, "global_norm", t_list) as name:
265 values = [
266 ops.convert_to_tensor(
267 t.values if isinstance(t, indexed_slices.IndexedSlices) else t,
268 name="t_%d" % i) if t is not None else t
269 for i, t in enumerate(t_list)
270 ]
271 half_squared_norms = []
272 for v in values:
273 if v is not None:
274 with ops.colocate_with(v):
275 half_squared_norms.append(gen_nn_ops.l2_loss(v))
277 half_squared_norm = math_ops.reduce_sum(
278 array_ops_stack.stack(half_squared_norms))
280 norm = math_ops.sqrt(
281 half_squared_norm *
282 constant_op.constant(2.0, dtype=half_squared_norm.dtype),
283 name="global_norm")
285 return norm
288@tf_export("clip_by_global_norm")
289@dispatch.add_dispatch_support
290def clip_by_global_norm(t_list, clip_norm, use_norm=None, name=None):
291 """Clips values of multiple tensors by the ratio of the sum of their norms.
293 Given a tuple or list of tensors `t_list`, and a clipping ratio `clip_norm`,
294 this operation returns a list of clipped tensors `list_clipped`
295 and the global norm (`global_norm`) of all tensors in `t_list`. Optionally,
296 if you've already computed the global norm for `t_list`, you can specify
297 the global norm with `use_norm`.
299 To perform the clipping, the values `t_list[i]` are set to:
301 t_list[i] * clip_norm / max(global_norm, clip_norm)
303 where:
305 global_norm = sqrt(sum([l2norm(t)**2 for t in t_list]))
307 If `clip_norm > global_norm` then the entries in `t_list` remain as they are,
308 otherwise they're all shrunk by the global ratio.
310 If `global_norm == infinity` then the entries in `t_list` are all set to `NaN`
311 to signal that an error occurred.
313 Any of the entries of `t_list` that are of type `None` are ignored.
315 This is the correct way to perform gradient clipping (Pascanu et al., 2012).
317 However, it is slower than `clip_by_norm()` because all the parameters must be
318 ready before the clipping operation can be performed.
320 Args:
321 t_list: A tuple or list of mixed `Tensors`, `IndexedSlices`, or None.
322 clip_norm: A 0-D (scalar) `Tensor` > 0. The clipping ratio.
323 use_norm: A 0-D (scalar) `Tensor` of type `float` (optional). The global
324 norm to use. If not provided, `global_norm()` is used to compute the norm.
325 name: A name for the operation (optional).
327 Returns:
328 list_clipped: A list of `Tensors` of the same type as `list_t`.
329 global_norm: A 0-D (scalar) `Tensor` representing the global norm.
331 Raises:
332 TypeError: If `t_list` is not a sequence.
334 References:
335 On the difficulty of training Recurrent Neural Networks:
336 [Pascanu et al., 2012](http://proceedings.mlr.press/v28/pascanu13.html)
337 ([pdf](http://proceedings.mlr.press/v28/pascanu13.pdf))
338 """
339 if (not isinstance(t_list, collections_abc.Sequence) or
340 isinstance(t_list, str)):
341 raise TypeError("`t_list` should be a sequence of tensors. Received "
342 f"{type(t_list)}.")
343 t_list = list(t_list)
344 if use_norm is None:
345 use_norm = global_norm(t_list, name)
347 with ops.name_scope(name, "clip_by_global_norm",
348 t_list + [clip_norm]) as name:
349 # Calculate L2-norm, clip elements by ratio of clip_norm to L2-norm
350 scale_for_finite = clip_norm * math_ops.minimum(
351 1.0 / use_norm,
352 constant_op.constant(1.0, dtype=use_norm.dtype) / clip_norm)
353 # If use_norm is any finite number, this is a no-op. For inf/-inf/NaN,
354 # this will make scale NaN.
355 scale = scale_for_finite + (use_norm - use_norm)
357 values = [
358 ops.convert_to_tensor(
359 t.values if isinstance(t, indexed_slices.IndexedSlices) else t,
360 name="t_%d" % i) if t is not None else t
361 for i, t in enumerate(t_list)
362 ]
364 values_clipped = []
365 for i, v in enumerate(values):
366 if v is None:
367 values_clipped.append(None)
368 else:
369 with ops.colocate_with(v):
370 values_clipped.append(
371 array_ops.identity(v * scale, name="%s_%d" % (name, i)))
373 list_clipped = [
374 indexed_slices.IndexedSlices(c_v, t.indices, t.dense_shape)
375 if isinstance(t, indexed_slices.IndexedSlices) else c_v
376 for (c_v, t) in zip(values_clipped, t_list)
377 ]
379 return list_clipped, use_norm
382@deprecation.deprecated(
383 date=None,
384 instructions="clip_by_average_norm is deprecated in TensorFlow 2.0. Please "
385 "use clip_by_norm(t, clip_norm * tf.cast(tf.size(t), tf.float32), name) "
386 "instead.")
387@tf_export(v1=["clip_by_average_norm"])
388@dispatch.add_dispatch_support
389def clip_by_average_norm(t, clip_norm, name=None):
390 """Clips tensor values to a maximum average L2-norm.
392 Given a tensor `t`, and a maximum clip value `clip_norm`, this operation
393 normalizes `t` so that its average L2-norm is less than or equal to
394 `clip_norm`. Specifically, if the average L2-norm is already less than or
395 equal to `clip_norm`, then `t` is not modified. If the average L2-norm is
396 greater than `clip_norm`, then this operation returns a tensor of the same
397 type and shape as `t` with its values set to:
399 `t * clip_norm / l2norm_avg(t)`
401 In this case, the average L2-norm of the output tensor is `clip_norm`.
403 This operation is typically used to clip gradients before applying them with
404 an optimizer.
406 Args:
407 t: A `Tensor`.
408 clip_norm: A 0-D (scalar) `Tensor` > 0. A maximum clipping value.
409 name: A name for the operation (optional).
411 Returns:
412 A clipped `Tensor`.
413 """
414 with ops.name_scope(name, "clip_by_average_norm", [t, clip_norm]) as name:
415 t = ops.convert_to_tensor(t, name="t")
417 # Calculate L2-norm per element, clip elements by ratio of clip_norm to
418 # L2-norm per element
419 n_element = math_ops.cast(array_ops.size(t), dtypes.float32)
420 l2norm_inv = math_ops.rsqrt(
421 math_ops.reduce_sum(t * t, math_ops.range(array_ops.rank(t))))
422 tclip = array_ops.identity(
423 t * clip_norm * math_ops.minimum(
424 l2norm_inv * n_element, constant_op.constant(1.0) / clip_norm),
425 name=name)
427 return tclip