Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/ops/losses/losses_impl.py: 26%
255 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
1# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Implementation of Loss operations for use in neural networks."""
17from tensorflow.python.eager import context
18from tensorflow.python.framework import dtypes
19from tensorflow.python.framework import ops
20from tensorflow.python.ops import array_ops
21from tensorflow.python.ops import cond
22from tensorflow.python.ops import confusion_matrix
23from tensorflow.python.ops import control_flow_ops
24from tensorflow.python.ops import math_ops
25from tensorflow.python.ops import nn
26from tensorflow.python.ops import nn_ops
27from tensorflow.python.ops import weights_broadcast_ops
28from tensorflow.python.ops.losses import util
29from tensorflow.python.util import dispatch
30from tensorflow.python.util.deprecation import deprecated_args
31from tensorflow.python.util.deprecation import deprecated_argument_lookup
32from tensorflow.python.util.tf_export import tf_export
35@tf_export(v1=["losses.Reduction"])
36class Reduction:
37 """Types of loss reduction.
39 Contains the following values:
41 * `NONE`: Un-reduced weighted losses with the same shape as input.
42 * `SUM`: Scalar sum of weighted losses.
43 * `MEAN`: Scalar `SUM` divided by sum of weights. DEPRECATED.
44 * `SUM_OVER_BATCH_SIZE`: Scalar `SUM` divided by number of elements in losses.
45 * `SUM_OVER_NONZERO_WEIGHTS`: Scalar `SUM` divided by number of non-zero
46 weights. DEPRECATED.
47 * `SUM_BY_NONZERO_WEIGHTS`: Same as `SUM_OVER_NONZERO_WEIGHTS`. DEPRECATED.
48 """
50 NONE = "none"
51 SUM = "weighted_sum"
52 SUM_OVER_BATCH_SIZE = "weighted_sum_over_batch_size"
53 MEAN = "weighted_mean"
54 SUM_BY_NONZERO_WEIGHTS = "weighted_sum_by_nonzero_weights"
55 SUM_OVER_NONZERO_WEIGHTS = SUM_BY_NONZERO_WEIGHTS
57 @classmethod
58 def all(cls):
59 return (
60 cls.NONE,
61 cls.SUM,
62 cls.MEAN,
63 cls.SUM_OVER_BATCH_SIZE,
64 cls.SUM_OVER_NONZERO_WEIGHTS,
65 cls.SUM_BY_NONZERO_WEIGHTS)
67 @classmethod
68 def validate(cls, key):
69 if key not in cls.all():
70 raise ValueError(f"Invalid Reduction Key {key}. Key should be one of "
71 f"{cls.all()}.")
74def _safe_mean(losses, num_present):
75 """Computes a safe mean of the losses.
77 Args:
78 losses: `Tensor` whose elements contain individual loss measurements.
79 num_present: The number of measurable elements in `losses`.
81 Returns:
82 A scalar representing the mean of `losses`. If `num_present` is zero,
83 then zero is returned.
84 """
85 total_loss = math_ops.reduce_sum(losses)
86 return math_ops.div_no_nan(total_loss, num_present, name="value")
89def _num_present(losses, weights, per_batch=False):
90 """Computes the number of elements in the loss function induced by `weights`.
92 A given weights tensor induces different numbers of usable elements in the
93 `losses` tensor. The `weights` tensor is broadcast across `losses` for all
94 possible dimensions. For example, if `losses` is a tensor of dimension
95 `[4, 5, 6, 3]` and `weights` is a tensor of shape `[4, 5]`, then `weights` is,
96 in effect, tiled to match the shape of `losses`. Following this effective
97 tile, the total number of present elements is the number of non-zero weights.
99 Args:
100 losses: `Tensor` of shape `[batch_size, d1, ... dN]`.
101 weights: `Tensor` of shape `[]`, `[batch_size]` or
102 `[batch_size, d1, ... dK]`, where K < N.
103 per_batch: Whether to return the number of elements per batch or as a sum
104 total.
106 Returns:
107 The number of present (non-zero) elements in the losses tensor. If
108 `per_batch` is `True`, the value is returned as a tensor of size
109 `[batch_size]`. Otherwise, a single scalar tensor is returned.
110 """
111 if ((isinstance(weights, float) and weights != 0.0) or
112 (context.executing_eagerly() and weights._rank() == 0 # pylint: disable=protected-access
113 and not math_ops.equal(weights, 0.0))):
114 return _num_elements(losses)
115 with ops.name_scope(None, "num_present", (losses, weights)) as scope:
116 weights = math_ops.cast(weights, dtype=dtypes.float32)
117 present = array_ops.where(
118 math_ops.equal(weights, 0.0),
119 array_ops.zeros_like(weights),
120 array_ops.ones_like(weights))
121 present = weights_broadcast_ops.broadcast_weights(present, losses)
122 if per_batch:
123 return math_ops.reduce_sum(
124 present,
125 axis=math_ops.range(1, array_ops.rank(present)),
126 keepdims=True,
127 name=scope)
128 return math_ops.reduce_sum(present, name=scope)
131def _num_elements(losses):
132 """Computes the number of elements in `losses` tensor."""
133 with ops.name_scope(None, "num_elements", values=[losses]) as scope:
134 return math_ops.cast(array_ops.size(losses, name=scope), dtype=losses.dtype)
137@tf_export(v1=["losses.compute_weighted_loss"])
138@dispatch.add_dispatch_support
139def compute_weighted_loss(
140 losses, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES,
141 reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
142 """Computes the weighted loss.
144 Args:
145 losses: `Tensor` of shape `[batch_size, d1, ... dN]`.
146 weights: Optional `Tensor` whose rank is either 0, or the same rank as
147 `losses`, and must be broadcastable to `losses` (i.e., all dimensions must
148 be either `1`, or the same as the corresponding `losses` dimension).
149 scope: the scope for the operations performed in computing the loss.
150 loss_collection: the loss will be added to these collections.
151 reduction: Type of reduction to apply to loss.
153 Returns:
154 Weighted loss `Tensor` of the same type as `losses`. If `reduction` is
155 `NONE`, this has the same shape as `losses`; otherwise, it is scalar.
157 Raises:
158 ValueError: If `weights` is `None` or the shape is not compatible with
159 `losses`, or if the number of dimensions (rank) of either `losses` or
160 `weights` is missing.
162 Note:
163 When calculating the gradient of a weighted loss contributions from
164 both `losses` and `weights` are considered. If your `weights` depend
165 on some model parameters but you do not want this to affect the loss
166 gradient, you need to apply `tf.stop_gradient` to `weights` before
167 passing them to `compute_weighted_loss`.
169 @compatibility(eager)
170 The `loss_collection` argument is ignored when executing eagerly. Consider
171 holding on to the return value or collecting losses via a `tf.keras.Model`.
172 @end_compatibility
173 """
174 Reduction.validate(reduction)
175 with ops.name_scope(scope, "weighted_loss", (losses, weights)):
176 # Save the `reduction` argument for loss normalization when distributing
177 # to multiple replicas. Used only for estimator + v1 optimizer flow.
178 ops.get_default_graph()._last_loss_reduction = reduction # pylint: disable=protected-access
180 def compute_loss(losses, weights, loss_collection, reduction):
181 losses = ops.convert_to_tensor(losses)
182 input_dtype = losses.dtype
183 losses = math_ops.cast(losses, dtype=dtypes.float32)
184 weights = math_ops.cast(weights, dtype=dtypes.float32)
185 weighted_losses = math_ops.multiply(losses, weights)
186 if reduction == Reduction.NONE:
187 loss = weighted_losses
188 else:
189 loss = math_ops.reduce_sum(weighted_losses)
190 if reduction == Reduction.MEAN:
191 loss = _safe_mean(
192 loss, math_ops.reduce_sum(array_ops.ones_like(losses) * weights))
193 elif (reduction == Reduction.SUM_BY_NONZERO_WEIGHTS or
194 reduction == Reduction.SUM_OVER_NONZERO_WEIGHTS):
195 loss = _safe_mean(loss, _num_present(losses, weights))
196 elif reduction == Reduction.SUM_OVER_BATCH_SIZE:
197 loss = _safe_mean(loss, _num_elements(losses))
199 # Convert the result back to the input type.
200 loss = math_ops.cast(loss, input_dtype)
201 util.add_loss(loss, loss_collection)
202 return loss
204 # Skip the assert_broadcastable in XLA context because asserts are not
205 # supported so it only causes unnecessary ops. Also skip it because it uses
206 # a DenseToDenseSetOperation op that is incompatible with XLA when
207 # the shape(s) are dynamic.
208 if control_flow_ops.get_enclosing_xla_context() is not None:
209 return compute_loss(losses, weights, loss_collection, reduction)
210 else:
211 with ops.control_dependencies(
212 (weights_broadcast_ops.assert_broadcastable(weights, losses),)):
213 return compute_loss(losses, weights, loss_collection, reduction)
216@tf_export(v1=["losses.absolute_difference"])
217@dispatch.add_dispatch_support
218def absolute_difference(
219 labels, predictions, weights=1.0, scope=None,
220 loss_collection=ops.GraphKeys.LOSSES,
221 reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
222 """Adds an Absolute Difference loss to the training procedure.
224 `weights` acts as a coefficient for the loss. If a scalar is provided, then
225 the loss is simply scaled by the given value. If `weights` is a `Tensor` of
226 shape `[batch_size]`, then the total loss for each sample of the batch is
227 rescaled by the corresponding element in the `weights` vector. If the shape of
228 `weights` matches the shape of `predictions`, then the loss of each
229 measurable element of `predictions` is scaled by the corresponding value of
230 `weights`.
232 Args:
233 labels: The ground truth output tensor, same dimensions as 'predictions'.
234 predictions: The predicted outputs.
235 weights: Optional `Tensor` whose rank is either 0, or the same rank as
236 `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
237 be either `1`, or the same as the corresponding `losses` dimension).
238 scope: The scope for the operations performed in computing the loss.
239 loss_collection: collection to which this loss will be added.
240 reduction: Type of reduction to apply to loss.
242 Returns:
243 Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
244 shape as `labels`; otherwise, it is scalar.
246 Raises:
247 ValueError: If the shape of `predictions` doesn't match that of
248 `labels` or if the shape of `weights` is invalid or if `labels`
249 or `predictions` is None.
251 @compatibility(eager)
252 The `loss_collection` argument is ignored when executing eagerly. Consider
253 holding on to the return value or collecting losses via a `tf.keras.Model`.
254 @end_compatibility
255 """
256 if labels is None:
257 raise ValueError("Argument `labels` must not be None.")
258 if predictions is None:
259 raise ValueError("Argument `predictions` must not be None.")
260 with ops.name_scope(scope, "absolute_difference",
261 (predictions, labels, weights)) as scope:
262 predictions = math_ops.cast(predictions, dtype=dtypes.float32)
263 labels = math_ops.cast(labels, dtype=dtypes.float32)
264 predictions.get_shape().assert_is_compatible_with(labels.get_shape())
265 losses = math_ops.abs(math_ops.subtract(predictions, labels))
266 return compute_weighted_loss(
267 losses, weights, scope, loss_collection, reduction=reduction)
270@tf_export(v1=["losses.cosine_distance"])
271@dispatch.add_dispatch_support
272@deprecated_args(None, "dim is deprecated, use axis instead", "dim")
273def cosine_distance(
274 labels, predictions, axis=None, weights=1.0, scope=None,
275 loss_collection=ops.GraphKeys.LOSSES,
276 reduction=Reduction.SUM_BY_NONZERO_WEIGHTS,
277 dim=None):
278 """Adds a cosine-distance loss to the training procedure.
280 Note that the function assumes that `predictions` and `labels` are already
281 unit-normalized.
283 Args:
284 labels: `Tensor` whose shape matches 'predictions'
285 predictions: An arbitrary matrix.
286 axis: The dimension along which the cosine distance is computed.
287 weights: Optional `Tensor` whose rank is either 0, or the same rank as
288 `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
289 be either `1`, or the same as the corresponding `losses` dimension).
290 scope: The scope for the operations performed in computing the loss.
291 loss_collection: collection to which this loss will be added.
292 reduction: Type of reduction to apply to loss.
293 dim: The old (deprecated) name for `axis`.
295 Returns:
296 Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
297 shape as `labels`; otherwise, it is scalar.
299 Raises:
300 ValueError: If `predictions` shape doesn't match `labels` shape, or
301 `axis`, `labels`, `predictions` or `weights` is `None`.
303 @compatibility(eager)
304 The `loss_collection` argument is ignored when executing eagerly. Consider
305 holding on to the return value or collecting losses via a `tf.keras.Model`.
306 @end_compatibility
307 """
308 axis = deprecated_argument_lookup("axis", axis, "dim", dim)
309 if axis is None:
310 raise ValueError("You must specify argument `axis`.")
311 if labels is None:
312 raise ValueError("Argument `labels` must not be None.")
313 if predictions is None:
314 raise ValueError("Argument `predictions` must not be None.")
315 with ops.name_scope(scope, "cosine_distance_loss",
316 (predictions, labels, weights)) as scope:
317 predictions = math_ops.cast(predictions, dtype=dtypes.float32)
318 labels = math_ops.cast(labels, dtype=dtypes.float32)
319 predictions.get_shape().assert_is_compatible_with(labels.get_shape())
321 radial_diffs = math_ops.multiply(predictions, labels)
322 losses = 1 - math_ops.reduce_sum(radial_diffs, axis=(axis,), keepdims=True)
323 return compute_weighted_loss(
324 losses, weights, scope, loss_collection, reduction=reduction)
327@tf_export(v1=["losses.hinge_loss"])
328@dispatch.add_dispatch_support
329def hinge_loss(labels, logits, weights=1.0, scope=None,
330 loss_collection=ops.GraphKeys.LOSSES,
331 reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
332 """Adds a hinge loss to the training procedure.
334 Args:
335 labels: The ground truth output tensor. Its shape should match the shape of
336 logits. The values of the tensor are expected to be 0.0 or 1.0. Internally
337 the {0,1} labels are converted to {-1,1} when calculating the hinge loss.
338 logits: The logits, a float tensor. Note that logits are assumed to be
339 unbounded and 0-centered. A value > 0 (resp. < 0) is considered a positive
340 (resp. negative) binary prediction.
341 weights: Optional `Tensor` whose rank is either 0, or the same rank as
342 `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
343 be either `1`, or the same as the corresponding `losses` dimension).
344 scope: The scope for the operations performed in computing the loss.
345 loss_collection: collection to which the loss will be added.
346 reduction: Type of reduction to apply to loss.
348 Returns:
349 Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
350 shape as `labels`; otherwise, it is scalar.
352 Raises:
353 ValueError: If the shapes of `logits` and `labels` don't match or
354 if `labels` or `logits` is None.
356 @compatibility(eager)
357 The `loss_collection` argument is ignored when executing eagerly. Consider
358 holding on to the return value or collecting losses via a `tf.keras.Model`.
359 @end_compatibility
360 """
361 if labels is None:
362 raise ValueError("Argument `labels` must not be None.")
363 if logits is None:
364 raise ValueError("Argument `logits` must not be None.")
365 with ops.name_scope(scope, "hinge_loss", (logits, labels, weights)) as scope:
366 logits = math_ops.cast(logits, dtype=dtypes.float32)
367 labels = math_ops.cast(labels, dtype=dtypes.float32)
368 logits.get_shape().assert_is_compatible_with(labels.get_shape())
369 # We first need to convert binary labels to -1/1 labels (as floats).
370 all_ones = array_ops.ones_like(labels)
371 labels = math_ops.subtract(2 * labels, all_ones)
372 losses = nn_ops.relu(
373 math_ops.subtract(all_ones, math_ops.multiply(labels, logits)))
374 return compute_weighted_loss(
375 losses, weights, scope, loss_collection, reduction=reduction)
378@tf_export(v1=["losses.huber_loss"])
379@dispatch.add_dispatch_support
380def huber_loss(labels, predictions, weights=1.0, delta=1.0, scope=None,
381 loss_collection=ops.GraphKeys.LOSSES,
382 reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
383 """Adds a [Huber Loss](https://en.wikipedia.org/wiki/Huber_loss) term to the training procedure.
385 For each value x in `error=labels-predictions`, the following is calculated:
387 ```
388 0.5 * x^2 if |x| <= d
389 0.5 * d^2 + d * (|x| - d) if |x| > d
390 ```
392 where d is `delta`.
394 `weights` acts as a coefficient for the loss. If a scalar is provided, then
395 the loss is simply scaled by the given value. If `weights` is a tensor of size
396 `[batch_size]`, then the total loss for each sample of the batch is rescaled
397 by the corresponding element in the `weights` vector. If the shape of
398 `weights` matches the shape of `predictions`, then the loss of each
399 measurable element of `predictions` is scaled by the corresponding value of
400 `weights`.
402 Args:
403 labels: The ground truth output tensor, same dimensions as 'predictions'.
404 predictions: The predicted outputs.
405 weights: Optional `Tensor` whose rank is either 0, or the same rank as
406 `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
407 be either `1`, or the same as the corresponding `losses` dimension).
408 delta: `float`, the point where the huber loss function changes from a
409 quadratic to linear.
410 scope: The scope for the operations performed in computing the loss.
411 loss_collection: collection to which the loss will be added.
412 reduction: Type of reduction to apply to loss.
414 Returns:
415 Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
416 shape as `labels`; otherwise, it is scalar.
418 Raises:
419 ValueError: If the shape of `predictions` doesn't match that of `labels` or
420 if the shape of `weights` is invalid. Also if `labels` or
421 `predictions` is None.
423 @compatibility(eager)
424 The `loss_collection` argument is ignored when executing eagerly. Consider
425 holding on to the return value or collecting losses via a `tf.keras.Model`.
426 @end_compatibility
427 """
428 if labels is None:
429 raise ValueError("Argument `labels` must not be None.")
430 if predictions is None:
431 raise ValueError("Argument `predictions` must not be None.")
432 with ops.name_scope(scope, "huber_loss",
433 (predictions, labels, weights)) as scope:
434 predictions = math_ops.cast(predictions, dtype=dtypes.float32)
435 labels = math_ops.cast(labels, dtype=dtypes.float32)
436 predictions.get_shape().assert_is_compatible_with(labels.get_shape())
437 error = math_ops.subtract(predictions, labels)
438 abs_error = math_ops.abs(error)
439 quadratic = math_ops.minimum(abs_error, delta)
440 # The following expression is the same in value as
441 # tf.maximum(abs_error - delta, 0), but importantly the gradient for the
442 # expression when abs_error == delta is 0 (for tf.maximum it would be 1).
443 # This is necessary to avoid doubling the gradient, since there is already a
444 # nonzero contribution to the gradient from the quadratic term.
445 linear = math_ops.subtract(abs_error, quadratic)
446 losses = math_ops.add(
447 math_ops.multiply(
448 ops.convert_to_tensor(0.5, dtype=quadratic.dtype),
449 math_ops.multiply(quadratic, quadratic)),
450 math_ops.multiply(delta, linear))
451 return compute_weighted_loss(
452 losses, weights, scope, loss_collection, reduction=reduction)
455@tf_export(v1=["losses.log_loss"])
456@dispatch.add_dispatch_support
457def log_loss(labels, predictions, weights=1.0, epsilon=1e-7, scope=None,
458 loss_collection=ops.GraphKeys.LOSSES,
459 reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
460 """Adds a Log Loss term to the training procedure.
462 `weights` acts as a coefficient for the loss. If a scalar is provided, then
463 the loss is simply scaled by the given value. If `weights` is a tensor of size
464 `[batch_size]`, then the total loss for each sample of the batch is rescaled
465 by the corresponding element in the `weights` vector. If the shape of
466 `weights` matches the shape of `predictions`, then the loss of each
467 measurable element of `predictions` is scaled by the corresponding value of
468 `weights`.
470 Args:
471 labels: The ground truth output tensor, same dimensions as 'predictions'.
472 predictions: The predicted outputs.
473 weights: Optional `Tensor` whose rank is either 0, or the same rank as
474 `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
475 be either `1`, or the same as the corresponding `losses` dimension).
476 epsilon: A small increment to add to avoid taking a log of zero.
477 scope: The scope for the operations performed in computing the loss.
478 loss_collection: collection to which the loss will be added.
479 reduction: Type of reduction to apply to loss.
481 Returns:
482 Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
483 shape as `labels`; otherwise, it is scalar.
485 Raises:
486 ValueError: If the shape of `predictions` doesn't match that of `labels` or
487 if the shape of `weights` is invalid. Also if `labels` or `predictions`
488 is None.
490 @compatibility(eager)
491 The `loss_collection` argument is ignored when executing eagerly. Consider
492 holding on to the return value or collecting losses via a `tf.keras.Model`.
493 @end_compatibility
494 """
495 if labels is None:
496 raise ValueError("Argument `labels` must not be None.")
497 if predictions is None:
498 raise ValueError("Argument `predictions` must not be None.")
499 with ops.name_scope(scope, "log_loss",
500 (predictions, labels, weights)) as scope:
501 predictions = math_ops.cast(predictions, dtype=dtypes.float32)
502 labels = math_ops.cast(labels, dtype=dtypes.float32)
503 predictions.get_shape().assert_is_compatible_with(labels.get_shape())
504 losses = -math_ops.multiply(
505 labels,
506 math_ops.log(predictions + epsilon)) - math_ops.multiply(
507 (1 - labels), math_ops.log(1 - predictions + epsilon))
508 return compute_weighted_loss(
509 losses, weights, scope, loss_collection, reduction=reduction)
512# TODO(b/37208492): Add reduction arg.
513@tf_export(v1=["losses.mean_pairwise_squared_error"])
514@dispatch.add_dispatch_support
515def mean_pairwise_squared_error(
516 labels, predictions, weights=1.0, scope=None,
517 loss_collection=ops.GraphKeys.LOSSES):
518 """Adds a pairwise-errors-squared loss to the training procedure.
520 Unlike `mean_squared_error`, which is a measure of the differences between
521 corresponding elements of `predictions` and `labels`,
522 `mean_pairwise_squared_error` is a measure of the differences between pairs of
523 corresponding elements of `predictions` and `labels`.
525 For example, if `labels`=[a, b, c] and `predictions`=[x, y, z], there are
526 three pairs of differences are summed to compute the loss:
527 loss = [ ((a-b) - (x-y)).^2 + ((a-c) - (x-z)).^2 + ((b-c) - (y-z)).^2 ] / 3
529 Note that since the inputs are of shape `[batch_size, d0, ... dN]`, the
530 corresponding pairs are computed within each batch sample but not across
531 samples within a batch. For example, if `predictions` represents a batch of
532 16 grayscale images of dimension [batch_size, 100, 200], then the set of pairs
533 is drawn from each image, but not across images.
535 `weights` acts as a coefficient for the loss. If a scalar is provided, then
536 the loss is simply scaled by the given value. If `weights` is a tensor of size
537 `[batch_size]`, then the total loss for each sample of the batch is rescaled
538 by the corresponding element in the `weights` vector.
540 Args:
541 labels: The ground truth output tensor, whose shape must match the shape of
542 `predictions`.
543 predictions: The predicted outputs, a tensor of size
544 `[batch_size, d0, .. dN]` where N+1 is the total number of dimensions in
545 `predictions`.
546 weights: Coefficients for the loss a scalar, a tensor of shape
547 `[batch_size]` or a tensor whose shape matches `predictions`.
548 scope: The scope for the operations performed in computing the loss.
549 loss_collection: collection to which the loss will be added.
551 Returns:
552 A scalar `Tensor` that returns the weighted loss.
554 Raises:
555 ValueError: If the shape of `predictions` doesn't match that of `labels` or
556 if the shape of `weights` is invalid. Also if `labels` or `predictions`
557 is None.
559 @compatibility(eager)
560 The `loss_collection` argument is ignored when executing eagerly. Consider
561 holding on to the return value or collecting losses via a `tf.keras.Model`.
562 @end_compatibility
563 """
564 if labels is None:
565 raise ValueError("Argument `labels` must not be None.")
566 if predictions is None:
567 raise ValueError("Argument `predictions` must not be None.")
568 with ops.name_scope(scope, "mean_pairwise_squared_error",
569 (predictions, labels, weights)) as scope:
570 weights = math_ops.cast(weights, dtype=dtypes.float32)
571 labels = math_ops.cast(labels, dtype=dtypes.float32)
573 def compute_loss(labels, predictions, weights, loss_collection):
574 predictions = math_ops.cast(predictions, dtype=dtypes.float32)
575 predictions.get_shape().assert_is_compatible_with(labels.get_shape())
577 diffs = math_ops.subtract(predictions, labels)
579 axis = math_ops.range(1, array_ops.rank(diffs))
581 sum_squares_diff_per_batch = math_ops.reduce_sum(
582 math_ops.square(diffs), axis=axis, keepdims=True)
583 num_present_per_batch = _num_present(diffs, weights, per_batch=True)
585 term1 = 2.0 * math_ops.div_no_nan(
586 sum_squares_diff_per_batch,
587 math_ops.maximum(num_present_per_batch - 1, 0),
588 name="value")
590 sum_diff = math_ops.reduce_sum(diffs, axis=axis, keepdims=True)
591 term2 = 2.0 * math_ops.div_no_nan(
592 math_ops.square(sum_diff),
593 math_ops.maximum(
594 math_ops.multiply(num_present_per_batch,
595 num_present_per_batch - 1), 0),
596 name="value")
598 weighted_losses = math_ops.multiply(term1 - term2, weights)
599 loss = math_ops.reduce_sum(weighted_losses)
601 mean_loss = array_ops.where(
602 math_ops.reduce_sum(num_present_per_batch) > 0,
603 loss,
604 array_ops.zeros_like(loss),
605 name="value")
606 util.add_loss(mean_loss, loss_collection)
607 return mean_loss
609 # Skip the assert_broadcastable in XLA context because asserts are not
610 # supported so it only causes unnecessary ops. Also skip it because it uses
611 # a DenseToDenseSetOperation op that is incompatible with XLA when
612 # the shape(s) are dynamic.
613 if control_flow_ops.get_enclosing_xla_context() is not None:
614 return compute_loss(labels, predictions, weights, loss_collection)
615 else:
616 with ops.control_dependencies(
617 (weights_broadcast_ops.assert_broadcastable(weights, labels),)):
618 return compute_loss(labels, predictions, weights, loss_collection)
621@tf_export(v1=["losses.mean_squared_error"])
622@dispatch.add_dispatch_support
623def mean_squared_error(
624 labels, predictions, weights=1.0, scope=None,
625 loss_collection=ops.GraphKeys.LOSSES,
626 reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
627 """Adds a Sum-of-Squares loss to the training procedure.
629 `weights` acts as a coefficient for the loss. If a scalar is provided, then
630 the loss is simply scaled by the given value. If `weights` is a tensor of size
631 `[batch_size]`, then the total loss for each sample of the batch is rescaled
632 by the corresponding element in the `weights` vector. If the shape of
633 `weights` matches the shape of `predictions`, then the loss of each
634 measurable element of `predictions` is scaled by the corresponding value of
635 `weights`.
637 Args:
638 labels: The ground truth output tensor, same dimensions as 'predictions'.
639 predictions: The predicted outputs.
640 weights: Optional `Tensor` whose rank is either 0, or the same rank as
641 `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
642 be either `1`, or the same as the corresponding `losses` dimension).
643 scope: The scope for the operations performed in computing the loss.
644 loss_collection: collection to which the loss will be added.
645 reduction: Type of reduction to apply to loss.
647 Returns:
648 Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
649 shape as `labels`; otherwise, it is scalar.
651 Raises:
652 ValueError: If the shape of `predictions` doesn't match that of `labels` or
653 if the shape of `weights` is invalid. Also if `labels` or `predictions`
654 is None.
656 @compatibility(TF2)
658 `tf.compat.v1.losses.mean_squared_error` is mostly compatible with eager
659 execution and `tf.function`. But, the `loss_collection` argument is
660 ignored when executing eagerly and no loss will be written to the loss
661 collections. You will need to either hold on to the return value manually
662 or rely on `tf.keras.Model` loss tracking.
665 To switch to native TF2 style, instantiate the
666 `tf.keras.losses.MeanSquaredError` class and call the object instead.
669 #### Structural Mapping to Native TF2
671 Before:
673 ```python
674 loss = tf.compat.v1.losses.mean_squared_error(
675 labels=labels,
676 predictions=predictions,
677 weights=weights,
678 reduction=reduction)
679 ```
681 After:
683 ```python
684 loss_fn = tf.keras.losses.MeanSquaredError(
685 reduction=reduction)
686 loss = loss_fn(
687 y_true=labels,
688 y_pred=predictions,
689 sample_weight=weights)
690 ```
692 #### How to Map Arguments
694 | TF1 Arg Name | TF2 Arg Name | Note |
695 | :-------------------- | :--------------- | :------------------------- |
696 | `labels` | `y_true` | In `__call__()` method |
697 | `predictions` | `y_pred` | In `__call__()` method |
698 | `weights` | `sample_weight` | In `__call__()` method. |
699 : : : The shape requirements for `sample_weight` is different from :
700 : : : `weights`. Please check the [argument definition][api_docs] for :
701 : : : details. :
702 | `scope` | Not supported | - |
703 | `loss_collection` | Not supported | Losses should be tracked |
704 : : : explicitly or with Keras APIs, for example, [add_loss][add_loss], :
705 : : : instead of via collections :
706 | `reduction` | `reduction` | In constructor. Value of |
707 : : : `tf.compat.v1.losses.Reduction.SUM_OVER_BATCH_SIZE`, :
708 : : : `tf.compat.v1.losses.Reduction.SUM`, :
709 : : : `tf.compat.v1.losses.Reduction.NONE` in :
710 : : : `tf.compat.v1.losses.softmax_cross_entropy` correspond to :
711 : : : `tf.keras.losses.Reduction.SUM_OVER_BATCH_SIZE`, :
712 : : : `tf.keras.losses.Reduction.SUM`, :
713 : : : `tf.keras.losses.Reduction.NONE`, respectively. If you :
714 : : : used other value for `reduction`, including the default value :
715 : : : `tf.compat.v1.losses.Reduction.SUM_BY_NONZERO_WEIGHTS`, there is :
716 : : : no directly corresponding value. Please modify the loss :
717 : : : implementation manually. :
719 [add_loss]:https://www.tensorflow.org/api_docs/python/tf/keras/layers/Layer#add_loss
720 [api_docs]:https://www.tensorflow.org/api_docs/python/tf/keras/losses/MeanSquaredError#__call__
723 #### Before & After Usage Example
725 Before:
727 >>> y_true = [1, 2, 3]
728 >>> y_pred = [1, 3, 5]
729 >>> weights = [0, 1, 0.25]
730 >>> # samples with zero-weight are excluded from calculation when `reduction`
731 >>> # argument is set to default value `Reduction.SUM_BY_NONZERO_WEIGHTS`
732 >>> tf.compat.v1.losses.mean_squared_error(
733 ... labels=y_true,
734 ... predictions=y_pred,
735 ... weights=weights).numpy()
736 1.0
738 >>> tf.compat.v1.losses.mean_squared_error(
739 ... labels=y_true,
740 ... predictions=y_pred,
741 ... weights=weights,
742 ... reduction=tf.compat.v1.losses.Reduction.SUM_OVER_BATCH_SIZE).numpy()
743 0.66667
745 After:
747 >>> y_true = [[1.0], [2.0], [3.0]]
748 >>> y_pred = [[1.0], [3.0], [5.0]]
749 >>> weights = [1, 1, 0.25]
750 >>> mse = tf.keras.losses.MeanSquaredError(
751 ... reduction=tf.keras.losses.Reduction.SUM_OVER_BATCH_SIZE)
752 >>> mse(y_true=y_true, y_pred=y_pred, sample_weight=weights).numpy()
753 0.66667
755 @end_compatibility
756 """
757 if labels is None:
758 raise ValueError("Argument `labels` must not be None.")
759 if predictions is None:
760 raise ValueError("Argument `predictions` must not be None.")
761 with ops.name_scope(scope, "mean_squared_error",
762 (predictions, labels, weights)) as scope:
763 predictions = math_ops.cast(predictions, dtype=dtypes.float32)
764 labels = math_ops.cast(labels, dtype=dtypes.float32)
765 predictions.get_shape().assert_is_compatible_with(labels.get_shape())
766 losses = math_ops.squared_difference(predictions, labels)
767 return compute_weighted_loss(
768 losses, weights, scope, loss_collection, reduction=reduction)
771@tf_export(v1=["losses.sigmoid_cross_entropy"])
772@dispatch.add_dispatch_support
773def sigmoid_cross_entropy(
774 multi_class_labels, logits, weights=1.0, label_smoothing=0, scope=None,
775 loss_collection=ops.GraphKeys.LOSSES,
776 reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
777 """Creates a cross-entropy loss using tf.nn.sigmoid_cross_entropy_with_logits.
779 `weights` acts as a coefficient for the loss. If a scalar is provided,
780 then the loss is simply scaled by the given value. If `weights` is a
781 tensor of shape `[batch_size]`, then the loss weights apply to each
782 corresponding sample.
784 If `label_smoothing` is nonzero, smooth the labels towards 1/2:
786 new_multiclass_labels = multiclass_labels * (1 - label_smoothing)
787 + 0.5 * label_smoothing
789 Args:
790 multi_class_labels: `[batch_size, num_classes]` target integer labels in
791 `{0, 1}`.
792 logits: Float `[batch_size, num_classes]` logits outputs of the network.
793 weights: Optional `Tensor` whose rank is either 0, or the same rank as
794 `multi_class_labels`, and must be broadcastable to `multi_class_labels`
795 (i.e., all dimensions must be either `1`, or the same as the
796 corresponding `losses` dimension).
797 label_smoothing: If greater than `0` then smooth the labels.
798 scope: The scope for the operations performed in computing the loss.
799 loss_collection: collection to which the loss will be added.
800 reduction: Type of reduction to apply to loss.
802 Returns:
803 Weighted loss `Tensor` of the same type as `logits`. If `reduction` is
804 `NONE`, this has the same shape as `logits`; otherwise, it is scalar.
806 Raises:
807 ValueError: If the shape of `logits` doesn't match that of
808 `multi_class_labels` or if the shape of `weights` is invalid, or if
809 `weights` is None. Also if `multi_class_labels` or `logits` is None.
811 @compatibility(eager)
812 The `loss_collection` argument is ignored when executing eagerly. Consider
813 holding on to the return value or collecting losses via a `tf.keras.Model`.
814 @end_compatibility
815 """
816 if multi_class_labels is None:
817 raise ValueError("Argument `multi_class_labels` must not be None.")
818 if logits is None:
819 raise ValueError("Argument `logits` must not be None.")
820 with ops.name_scope(scope, "sigmoid_cross_entropy_loss",
821 (logits, multi_class_labels, weights)) as scope:
822 logits = ops.convert_to_tensor(logits)
823 multi_class_labels = math_ops.cast(multi_class_labels, logits.dtype)
824 logits.get_shape().assert_is_compatible_with(multi_class_labels.get_shape())
826 if label_smoothing > 0:
827 multi_class_labels = (multi_class_labels * (1 - label_smoothing) +
828 0.5 * label_smoothing)
830 losses = nn.sigmoid_cross_entropy_with_logits(labels=multi_class_labels,
831 logits=logits,
832 name="xentropy")
833 return compute_weighted_loss(
834 losses, weights, scope, loss_collection, reduction=reduction)
837@tf_export(v1=["losses.softmax_cross_entropy"])
838@dispatch.add_dispatch_support
839def softmax_cross_entropy(
840 onehot_labels, logits, weights=1.0, label_smoothing=0, scope=None,
841 loss_collection=ops.GraphKeys.LOSSES,
842 reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
843 r"""Creates a cross-entropy loss using tf.nn.softmax_cross_entropy_with_logits_v2.
845 `weights` acts as a coefficient for the loss. If a scalar is provided,
846 then the loss is simply scaled by the given value. If `weights` is a
847 tensor of shape `[batch_size]`, then the loss weights apply to each
848 corresponding sample.
850 If `label_smoothing` is nonzero, smooth the labels towards 1/num_classes:
851 new_onehot_labels = onehot_labels * (1 - label_smoothing)
852 + label_smoothing / num_classes
854 Note that `onehot_labels` and `logits` must have the same shape,
855 e.g. `[batch_size, num_classes]`. The shape of `weights` must be
856 broadcastable to loss, whose shape is decided by the shape of `logits`.
857 In case the shape of `logits` is `[batch_size, num_classes]`, loss is
858 a `Tensor` of shape `[batch_size]`.
860 Args:
861 onehot_labels: One-hot-encoded labels.
862 logits: Logits outputs of the network.
863 weights: Optional `Tensor` that is broadcastable to loss.
864 label_smoothing: If greater than 0 then smooth the labels.
865 scope: the scope for the operations performed in computing the loss.
866 loss_collection: collection to which the loss will be added.
867 reduction: Type of reduction to apply to loss.
869 Returns:
870 Weighted loss `Tensor` of the same type as `logits`. If `reduction` is
871 `NONE`, this has shape `[batch_size]`; otherwise, it is scalar.
873 Raises:
874 ValueError: If the shape of `logits` doesn't match that of `onehot_labels`
875 or if the shape of `weights` is invalid or if `weights` is None. Also if
876 `onehot_labels` or `logits` is None.
878 @compatibility(TF2)
880 `tf.compat.v1.losses.softmax_cross_entropy` is mostly compatible with eager
881 execution and `tf.function`. But, the `loss_collection` argument is
882 ignored when executing eagerly and no loss will be written to the loss
883 collections. You will need to either hold on to the return value manually
884 or rely on `tf.keras.Model` loss tracking.
887 To switch to native TF2 style, instantiate the
888 `tf.keras.losses.CategoricalCrossentropy` class with `from_logits` set
889 as `True` and call the object instead.
892 #### Structural Mapping to Native TF2
894 Before:
896 ```python
897 loss = tf.compat.v1.losses.softmax_cross_entropy(
898 onehot_labels=onehot_labels,
899 logits=logits,
900 weights=weights,
901 label_smoothing=smoothing)
902 ```
904 After:
906 ```python
907 loss_fn = tf.keras.losses.CategoricalCrossentropy(
908 from_logits=True,
909 label_smoothing=smoothing)
910 loss = loss_fn(
911 y_true=onehot_labels,
912 y_pred=logits,
913 sample_weight=weights)
914 ```
916 #### How to Map Arguments
918 | TF1 Arg Name | TF2 Arg Name | Note |
919 | :-------------------- | :--------------- | :------------------------- |
920 | - | `from_logits` | Set `from_logits` as True |
921 : : : to have identical behavior :
922 | `onehot_labels` | `y_true` | In `__call__()` method |
923 | `logits` | `y_pred` | In `__call__()` method |
924 | `weights` | `sample_weight` | In `__call__()` method |
925 | `label_smoothing` | `label_smoothing`| In constructor |
926 | `scope` | Not supported | - |
927 | `loss_collection` | Not supported | Losses should be tracked |
928 : : : explicitly or with Keras :
929 : : : APIs, for example, :
930 : : : [add_loss][add_loss], :
931 : : : instead of via collections :
932 | `reduction` | `reduction` | In constructor. Value of |
933 : : : `tf.compat.v1.losses.Reduction.SUM_OVER_BATCH_SIZE`, :
934 : : : `tf.compat.v1.losses.Reduction.SUM`, :
935 : : : `tf.compat.v1.losses.Reduction.NONE` in :
936 : : : `tf.compat.v1.losses.softmax_cross_entropy` correspond to :
937 : : : `tf.keras.losses.Reduction.SUM_OVER_BATCH_SIZE`, :
938 : : : `tf.keras.losses.Reduction.SUM`, :
939 : : : `tf.keras.losses.Reduction.NONE`, respectively. If you :
940 : : : used other value for `reduction`, including the default value :
941 : : : `tf.compat.v1.losses.Reduction.SUM_BY_NONZERO_WEIGHTS`, there is :
942 : : : no directly corresponding value. Please modify the loss :
943 : : : implementation manually. :
945 [add_loss]:https://www.tensorflow.org/api_docs/python/tf/keras/layers/Layer#add_loss
948 #### Before & After Usage Example
950 Before:
952 >>> y_true = [[0, 1, 0], [0, 0, 1]]
953 >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]
954 >>> weights = [0.3, 0.7]
955 >>> smoothing = 0.2
956 >>> tf.compat.v1.losses.softmax_cross_entropy(y_true, y_pred, weights=weights,
957 ... label_smoothing=smoothing).numpy()
958 0.57618
960 After:
962 >>> cce = tf.keras.losses.CategoricalCrossentropy(from_logits=True,
963 ... label_smoothing=smoothing)
964 >>> cce(y_true, y_pred, sample_weight=weights).numpy()
965 0.57618
967 @end_compatibility
968 """
969 if onehot_labels is None:
970 raise ValueError("Argument `onehot_labels` must not be None.")
971 if logits is None:
972 raise ValueError("Argument `logits` must not be None.")
973 with ops.name_scope(scope, "softmax_cross_entropy_loss",
974 (logits, onehot_labels, weights)) as scope:
975 logits = ops.convert_to_tensor(logits)
976 onehot_labels = math_ops.cast(onehot_labels, logits.dtype)
977 logits.get_shape().assert_is_compatible_with(onehot_labels.get_shape())
979 if label_smoothing > 0:
980 num_classes = math_ops.cast(
981 array_ops.shape(onehot_labels)[-1], logits.dtype)
982 smooth_positives = 1.0 - label_smoothing
983 smooth_negatives = label_smoothing / num_classes
984 onehot_labels = onehot_labels * smooth_positives + smooth_negatives
986 onehot_labels = array_ops.stop_gradient(
987 onehot_labels, name="labels_stop_gradient")
988 losses = nn.softmax_cross_entropy_with_logits_v2(
989 labels=onehot_labels, logits=logits, name="xentropy")
991 return compute_weighted_loss(
992 losses, weights, scope, loss_collection, reduction=reduction)
995# TODO(ptucker): Merge this with similar method in metrics_impl.
996def _remove_squeezable_dimensions(
997 labels, predictions, weights=None, expected_rank_diff=0):
998 """Internal version of _remove_squeezable_dimensions which handles weights.
1000 Squeezes `predictions` and `labels` if their ranks differ from expected by
1001 exactly 1.
1002 Squeezes `weights` if its rank is 1 more than the new rank of `predictions`
1004 This will use static shape if available. Otherwise, it will add graph
1005 operations, which could result in a performance hit.
1007 Args:
1008 labels: Label values, a `Tensor` whose dimensions match `predictions`.
1009 predictions: Predicted values, a `Tensor` of arbitrary dimensions.
1010 weights: Optional weight `Tensor`. It will be squeezed if it's not scalar,
1011 and its rank is 1 more than the new rank of `labels`.
1012 expected_rank_diff: Expected result of `rank(predictions) - rank(labels)`.
1014 Returns:
1015 Tuple of `predictions`, `labels` and `weights`, possibly with the last
1016 dimension squeezed.
1017 """
1018 labels, predictions = confusion_matrix.remove_squeezable_dimensions(
1019 labels, predictions, expected_rank_diff=expected_rank_diff)
1021 if weights is not None:
1022 weights = ops.convert_to_tensor(weights)
1023 labels_rank = labels.get_shape().ndims
1024 weights_shape = weights.get_shape()
1025 weights_rank = weights_shape.ndims
1027 if (labels_rank is not None) and (weights_rank is not None):
1028 # Use static rank.
1029 rank_diff = weights_rank - labels_rank
1030 if rank_diff == 1:
1031 weights = array_ops.squeeze(weights, [-1])
1032 return labels, predictions, weights
1034 # Use dynamic rank.
1035 rank_diff = array_ops.rank(weights) - array_ops.rank(labels)
1036 if (weights_rank is None) or (
1037 weights_rank > 0 and weights_shape.dims[-1].is_compatible_with(1)):
1038 weights = cond.cond(
1039 math_ops.equal(1, rank_diff),
1040 lambda: array_ops.squeeze(weights, [-1]),
1041 lambda: weights)
1043 return labels, predictions, weights
1046@tf_export(v1=["losses.sparse_softmax_cross_entropy"])
1047@dispatch.add_dispatch_support
1048def sparse_softmax_cross_entropy(
1049 labels, logits, weights=1.0, scope=None,
1050 loss_collection=ops.GraphKeys.LOSSES,
1051 reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
1052 """Cross-entropy loss using `tf.nn.sparse_softmax_cross_entropy_with_logits`.
1054 `weights` acts as a coefficient for the loss. If a scalar is provided,
1055 then the loss is simply scaled by the given value. If `weights` is a
1056 tensor of shape `[batch_size]`, then the loss weights apply to each
1057 corresponding sample.
1059 Args:
1060 labels: `Tensor` of shape `[d_0, d_1, ..., d_{r-1}]` (where `r` is rank of
1061 `labels` and result) and dtype `int32` or `int64`. Each entry in `labels`
1062 must be an index in `[0, num_classes)`. Other values will raise an
1063 exception when this op is run on CPU, and return `NaN` for corresponding
1064 loss and gradient rows on GPU.
1065 logits: Unscaled log probabilities of shape
1066 `[d_0, d_1, ..., d_{r-1}, num_classes]` and dtype `float16`, `float32` or
1067 `float64`.
1068 weights: Coefficients for the loss. This must be scalar or broadcastable to
1069 `labels` (i.e. same rank and each dimension is either 1 or the same).
1070 scope: the scope for the operations performed in computing the loss.
1071 loss_collection: collection to which the loss will be added.
1072 reduction: Type of reduction to apply to loss.
1074 Returns:
1075 Weighted loss `Tensor` of the same type as `logits`. If `reduction` is
1076 `NONE`, this has the same shape as `labels`; otherwise, it is scalar.
1078 Raises:
1079 ValueError: If the shapes of `logits`, `labels`, and `weights` are
1080 incompatible, or if any of them are None.
1082 @compatibility(eager)
1083 The `loss_collection` argument is ignored when executing eagerly. Consider
1084 holding on to the return value or collecting losses via a `tf.keras.Model`.
1085 @end_compatibility
1086 """
1087 if labels is None:
1088 raise ValueError("Argument `labels` must not be None.")
1089 if logits is None:
1090 raise ValueError("Argument `logits` must not be None.")
1091 with ops.name_scope(scope, "sparse_softmax_cross_entropy_loss",
1092 (logits, labels, weights)) as scope:
1093 # As documented above in Args, labels contain class IDs and logits contains
1094 # 1 probability per class ID, so we expect rank(logits) - rank(labels) == 1;
1095 # therefore, expected_rank_diff=1.
1096 labels, logits, weights = _remove_squeezable_dimensions(
1097 labels, logits, weights, expected_rank_diff=1)
1098 losses = nn.sparse_softmax_cross_entropy_with_logits(labels=labels,
1099 logits=logits,
1100 name="xentropy")
1101 return compute_weighted_loss(
1102 losses, weights, scope, loss_collection, reduction=reduction)