Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/keras/losses.py: 47%
352 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15# pylint: disable=g-classes-have-attributes
16"""Built-in loss functions."""
18import abc
19import functools
21from tensorflow.python.autograph.core import ag_ctx
22from tensorflow.python.autograph.impl import api as autograph
23from tensorflow.python.distribute import distribute_lib
24from tensorflow.python.eager import context
25from tensorflow.python.framework import constant_op
26from tensorflow.python.framework import ops
27from tensorflow.python.framework import smart_cond
28from tensorflow.python.framework import tensor_conversion
29from tensorflow.python.framework import tensor_spec
30from tensorflow.python.framework import tensor_util
31from tensorflow.python.keras import backend
32from tensorflow.python.keras.utils import losses_utils
33from tensorflow.python.keras.utils import tf_utils
34from tensorflow.python.keras.utils.generic_utils import deserialize_keras_object
35from tensorflow.python.keras.utils.generic_utils import serialize_keras_object
36from tensorflow.python.ops import array_ops
37from tensorflow.python.ops import cond
38from tensorflow.python.ops import math_ops
39from tensorflow.python.ops import nn
40from tensorflow.python.ops.losses import losses_impl
41from tensorflow.python.ops.ragged import ragged_map_ops
42from tensorflow.python.ops.ragged import ragged_tensor
43from tensorflow.python.ops.ragged import ragged_util
44from tensorflow.python.util import dispatch
45from tensorflow.python.util.tf_export import keras_export
46from tensorflow.tools.docs import doc_controls
49@keras_export('keras.losses.Loss')
50class Loss:
51 """Loss base class.
53 To be implemented by subclasses:
54 * `call()`: Contains the logic for loss calculation using `y_true`, `y_pred`.
56 Example subclass implementation:
58 ```python
59 class MeanSquaredError(Loss):
61 def call(self, y_true, y_pred):
62 y_pred = tf.convert_to_tensor_v2(y_pred)
63 y_true = tf.cast(y_true, y_pred.dtype)
64 return tf.reduce_mean(math_ops.square(y_pred - y_true), axis=-1)
65 ```
67 When used with `tf.distribute.Strategy`, outside of built-in training loops
68 such as `tf.keras` `compile` and `fit`, please use 'SUM' or 'NONE' reduction
69 types, and reduce losses explicitly in your training loop. Using 'AUTO' or
70 'SUM_OVER_BATCH_SIZE' will raise an error.
72 Please see this custom training [tutorial](
73 https://www.tensorflow.org/tutorials/distribute/custom_training) for more
74 details on this.
76 You can implement 'SUM_OVER_BATCH_SIZE' using global batch size like:
78 ```python
79 with strategy.scope():
80 loss_obj = tf.keras.losses.CategoricalCrossentropy(
81 reduction=tf.keras.losses.Reduction.NONE)
82 ....
83 loss = (tf.reduce_sum(loss_obj(labels, predictions)) *
84 (1. / global_batch_size))
85 ```
86 """
88 def __init__(self, reduction=losses_utils.ReductionV2.AUTO, name=None):
89 """Initializes `Loss` class.
91 Args:
92 reduction: Type of `tf.keras.losses.Reduction` to apply to
93 loss. Default value is `AUTO`. `AUTO` indicates that the reduction
94 option will be determined by the usage context. For almost all cases
95 this defaults to `SUM_OVER_BATCH_SIZE`. When used with
96 `tf.distribute.Strategy`, outside of built-in training loops such as
97 `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
98 will raise an error. Please see this custom training [tutorial](
99 https://www.tensorflow.org/tutorials/distribute/custom_training) for
100 more details.
101 name: Optional name for the instance.
102 """
103 losses_utils.ReductionV2.validate(reduction)
104 self.reduction = reduction
105 self.name = name
106 # SUM_OVER_BATCH is only allowed in losses managed by `fit` or
107 # CannedEstimators.
108 self._allow_sum_over_batch_size = False
109 self._set_name_scope()
111 def _set_name_scope(self):
112 """Creates a valid `name_scope` name."""
113 if self.name is None:
114 self._name_scope = self.__class__.__name__
115 elif self.name == '<lambda>':
116 self._name_scope = 'lambda'
117 else:
118 # E.g. '_my_loss' => 'my_loss'
119 self._name_scope = self.name.strip('_')
121 def __call__(self, y_true, y_pred, sample_weight=None):
122 """Invokes the `Loss` instance.
124 Args:
125 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`, except
126 sparse loss functions such as sparse categorical crossentropy where
127 shape = `[batch_size, d0, .. dN-1]`
128 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`
129 sample_weight: Optional `sample_weight` acts as a coefficient for the
130 loss. If a scalar is provided, then the loss is simply scaled by the
131 given value. If `sample_weight` is a tensor of size `[batch_size]`, then
132 the total loss for each sample of the batch is rescaled by the
133 corresponding element in the `sample_weight` vector. If the shape of
134 `sample_weight` is `[batch_size, d0, .. dN-1]` (or can be broadcasted to
135 this shape), then each loss element of `y_pred` is scaled
136 by the corresponding value of `sample_weight`. (Note on`dN-1`: all loss
137 functions reduce by 1 dimension, usually axis=-1.)
139 Returns:
140 Weighted loss float `Tensor`. If `reduction` is `NONE`, this has
141 shape `[batch_size, d0, .. dN-1]`; otherwise, it is scalar. (Note `dN-1`
142 because all loss functions reduce by 1 dimension, usually axis=-1.)
144 Raises:
145 ValueError: If the shape of `sample_weight` is invalid.
146 """
147 # If we are wrapping a lambda function strip '<>' from the name as it is not
148 # accepted in scope name.
149 graph_ctx = tf_utils.graph_context_for_symbolic_tensors(
150 y_true, y_pred, sample_weight)
151 with backend.name_scope(self._name_scope), graph_ctx:
152 if context.executing_eagerly():
153 call_fn = self.call
154 else:
155 call_fn = autograph.tf_convert(self.call, ag_ctx.control_status_ctx())
156 losses = call_fn(y_true, y_pred)
157 return losses_utils.compute_weighted_loss(
158 losses, sample_weight, reduction=self._get_reduction())
160 @classmethod
161 def from_config(cls, config):
162 """Instantiates a `Loss` from its config (output of `get_config()`).
164 Args:
165 config: Output of `get_config()`.
167 Returns:
168 A `Loss` instance.
169 """
170 return cls(**config)
172 def get_config(self):
173 """Returns the config dictionary for a `Loss` instance."""
174 return {'reduction': self.reduction, 'name': self.name}
176 @abc.abstractmethod
177 @doc_controls.for_subclass_implementers
178 def call(self, y_true, y_pred):
179 """Invokes the `Loss` instance.
181 Args:
182 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`, except
183 sparse loss functions such as sparse categorical crossentropy where
184 shape = `[batch_size, d0, .. dN-1]`
185 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`
187 Returns:
188 Loss values with the shape `[batch_size, d0, .. dN-1]`.
189 """
190 raise NotImplementedError('Must be implemented in subclasses.')
192 def _get_reduction(self):
193 """Handles `AUTO` reduction cases and returns the reduction value."""
194 if (not self._allow_sum_over_batch_size and
195 distribute_lib.has_strategy() and
196 (self.reduction == losses_utils.ReductionV2.AUTO or
197 self.reduction == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE)):
198 raise ValueError(
199 'Please use `tf.keras.losses.Reduction.SUM` or '
200 '`tf.keras.losses.Reduction.NONE` for loss reduction when losses are '
201 'used with `tf.distribute.Strategy` outside of the built-in training '
202 'loops. You can implement '
203 '`tf.keras.losses.Reduction.SUM_OVER_BATCH_SIZE` using global batch '
204 'size like:\n```\nwith strategy.scope():\n'
205 ' loss_obj = tf.keras.losses.CategoricalCrossentropy('
206 'reduction=tf.keras.losses.Reduction.NONE)\n....\n'
207 ' loss = tf.reduce_sum(loss_obj(labels, predictions)) * '
208 '(1. / global_batch_size)\n```\nPlease see '
209 'https://www.tensorflow.org/tutorials/distribute/custom_training'
210 ' for more details.')
212 if self.reduction == losses_utils.ReductionV2.AUTO:
213 return losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE
214 return self.reduction
217class LossFunctionWrapper(Loss):
218 """Wraps a loss function in the `Loss` class."""
220 def __init__(self,
221 fn,
222 reduction=losses_utils.ReductionV2.AUTO,
223 name=None,
224 **kwargs):
225 """Initializes `LossFunctionWrapper` class.
227 Args:
228 fn: The loss function to wrap, with signature `fn(y_true, y_pred,
229 **kwargs)`.
230 reduction: Type of `tf.keras.losses.Reduction` to apply to
231 loss. Default value is `AUTO`. `AUTO` indicates that the reduction
232 option will be determined by the usage context. For almost all cases
233 this defaults to `SUM_OVER_BATCH_SIZE`. When used with
234 `tf.distribute.Strategy`, outside of built-in training loops such as
235 `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
236 will raise an error. Please see this custom training [tutorial](
237 https://www.tensorflow.org/tutorials/distribute/custom_training) for
238 more details.
239 name: Optional name for the instance.
240 **kwargs: The keyword arguments that are passed on to `fn`.
241 """
242 super().__init__(reduction=reduction, name=name)
243 self.fn = fn
244 self._fn_kwargs = kwargs
246 def call(self, y_true, y_pred):
247 """Invokes the `LossFunctionWrapper` instance.
249 Args:
250 y_true: Ground truth values.
251 y_pred: The predicted values.
253 Returns:
254 Loss values per sample.
255 """
256 if tensor_util.is_tf_type(y_pred) and tensor_util.is_tf_type(y_true):
257 y_pred, y_true = losses_utils.squeeze_or_expand_dimensions(y_pred, y_true)
259 ag_fn = autograph.tf_convert(self.fn, ag_ctx.control_status_ctx())
260 return ag_fn(y_true, y_pred, **self._fn_kwargs)
262 def get_config(self):
263 config = {}
264 for k, v in self._fn_kwargs.items():
265 config[k] = backend.eval(v) if tf_utils.is_tensor_or_variable(v) else v
266 base_config = super().get_config()
267 return dict(list(base_config.items()) + list(config.items()))
270@keras_export('keras.losses.MeanSquaredError')
271class MeanSquaredError(LossFunctionWrapper):
272 """Computes the mean of squares of errors between labels and predictions.
274 `loss = square(y_true - y_pred)`
276 Standalone usage:
278 >>> y_true = [[0., 1.], [0., 0.]]
279 >>> y_pred = [[1., 1.], [1., 0.]]
280 >>> # Using 'auto'/'sum_over_batch_size' reduction type.
281 >>> mse = tf.keras.losses.MeanSquaredError()
282 >>> mse(y_true, y_pred).numpy()
283 0.5
285 >>> # Calling with 'sample_weight'.
286 >>> mse(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy()
287 0.25
289 >>> # Using 'sum' reduction type.
290 >>> mse = tf.keras.losses.MeanSquaredError(
291 ... reduction=tf.keras.losses.Reduction.SUM)
292 >>> mse(y_true, y_pred).numpy()
293 1.0
295 >>> # Using 'none' reduction type.
296 >>> mse = tf.keras.losses.MeanSquaredError(
297 ... reduction=tf.keras.losses.Reduction.NONE)
298 >>> mse(y_true, y_pred).numpy()
299 array([0.5, 0.5], dtype=float32)
301 Usage with the `compile()` API:
303 ```python
304 model.compile(optimizer='sgd', loss=tf.keras.losses.MeanSquaredError())
305 ```
306 """
308 def __init__(self,
309 reduction=losses_utils.ReductionV2.AUTO,
310 name='mean_squared_error'):
311 """Initializes `MeanSquaredError` instance.
313 Args:
314 reduction: Type of `tf.keras.losses.Reduction` to apply to
315 loss. Default value is `AUTO`. `AUTO` indicates that the reduction
316 option will be determined by the usage context. For almost all cases
317 this defaults to `SUM_OVER_BATCH_SIZE`. When used with
318 `tf.distribute.Strategy`, outside of built-in training loops such as
319 `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
320 will raise an error. Please see this custom training [tutorial](
321 https://www.tensorflow.org/tutorials/distribute/custom_training) for
322 more details.
323 name: Optional name for the instance. Defaults to 'mean_squared_error'.
324 """
325 super().__init__(mean_squared_error, name=name, reduction=reduction)
328@keras_export('keras.losses.MeanAbsoluteError')
329class MeanAbsoluteError(LossFunctionWrapper):
330 """Computes the mean of absolute difference between labels and predictions.
332 `loss = abs(y_true - y_pred)`
334 Standalone usage:
336 >>> y_true = [[0., 1.], [0., 0.]]
337 >>> y_pred = [[1., 1.], [1., 0.]]
338 >>> # Using 'auto'/'sum_over_batch_size' reduction type.
339 >>> mae = tf.keras.losses.MeanAbsoluteError()
340 >>> mae(y_true, y_pred).numpy()
341 0.5
343 >>> # Calling with 'sample_weight'.
344 >>> mae(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy()
345 0.25
347 >>> # Using 'sum' reduction type.
348 >>> mae = tf.keras.losses.MeanAbsoluteError(
349 ... reduction=tf.keras.losses.Reduction.SUM)
350 >>> mae(y_true, y_pred).numpy()
351 1.0
353 >>> # Using 'none' reduction type.
354 >>> mae = tf.keras.losses.MeanAbsoluteError(
355 ... reduction=tf.keras.losses.Reduction.NONE)
356 >>> mae(y_true, y_pred).numpy()
357 array([0.5, 0.5], dtype=float32)
359 Usage with the `compile()` API:
361 ```python
362 model.compile(optimizer='sgd', loss=tf.keras.losses.MeanAbsoluteError())
363 ```
364 """
366 def __init__(self,
367 reduction=losses_utils.ReductionV2.AUTO,
368 name='mean_absolute_error'):
369 """Initializes `MeanAbsoluteError` instance.
371 Args:
372 reduction: Type of `tf.keras.losses.Reduction` to apply to
373 loss. Default value is `AUTO`. `AUTO` indicates that the reduction
374 option will be determined by the usage context. For almost all cases
375 this defaults to `SUM_OVER_BATCH_SIZE`. When used with
376 `tf.distribute.Strategy`, outside of built-in training loops such as
377 `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
378 will raise an error. Please see this custom training [tutorial](
379 https://www.tensorflow.org/tutorials/distribute/custom_training) for
380 more details.
381 name: Optional name for the instance. Defaults to 'mean_absolute_error'.
382 """
383 super().__init__(mean_absolute_error, name=name, reduction=reduction)
386@keras_export('keras.losses.MeanAbsolutePercentageError')
387class MeanAbsolutePercentageError(LossFunctionWrapper):
388 """Computes the mean absolute percentage error between `y_true` and `y_pred`.
390 `loss = 100 * abs(y_true - y_pred) / y_true`
392 Standalone usage:
394 >>> y_true = [[2., 1.], [2., 3.]]
395 >>> y_pred = [[1., 1.], [1., 0.]]
396 >>> # Using 'auto'/'sum_over_batch_size' reduction type.
397 >>> mape = tf.keras.losses.MeanAbsolutePercentageError()
398 >>> mape(y_true, y_pred).numpy()
399 50.
401 >>> # Calling with 'sample_weight'.
402 >>> mape(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy()
403 20.
405 >>> # Using 'sum' reduction type.
406 >>> mape = tf.keras.losses.MeanAbsolutePercentageError(
407 ... reduction=tf.keras.losses.Reduction.SUM)
408 >>> mape(y_true, y_pred).numpy()
409 100.
411 >>> # Using 'none' reduction type.
412 >>> mape = tf.keras.losses.MeanAbsolutePercentageError(
413 ... reduction=tf.keras.losses.Reduction.NONE)
414 >>> mape(y_true, y_pred).numpy()
415 array([25., 75.], dtype=float32)
417 Usage with the `compile()` API:
419 ```python
420 model.compile(optimizer='sgd',
421 loss=tf.keras.losses.MeanAbsolutePercentageError())
422 ```
423 """
425 def __init__(self,
426 reduction=losses_utils.ReductionV2.AUTO,
427 name='mean_absolute_percentage_error'):
428 """Initializes `MeanAbsolutePercentageError` instance.
430 Args:
431 reduction: Type of `tf.keras.losses.Reduction` to apply to
432 loss. Default value is `AUTO`. `AUTO` indicates that the reduction
433 option will be determined by the usage context. For almost all cases
434 this defaults to `SUM_OVER_BATCH_SIZE`. When used with
435 `tf.distribute.Strategy`, outside of built-in training loops such as
436 `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
437 will raise an error. Please see this custom training [tutorial](
438 https://www.tensorflow.org/tutorials/distribute/custom_training) for
439 more details.
440 name: Optional name for the instance. Defaults to
441 'mean_absolute_percentage_error'.
442 """
443 super().__init__(
444 mean_absolute_percentage_error, name=name, reduction=reduction)
447@keras_export('keras.losses.MeanSquaredLogarithmicError')
448class MeanSquaredLogarithmicError(LossFunctionWrapper):
449 """Computes the mean squared logarithmic error between `y_true` and `y_pred`.
451 `loss = square(log(y_true + 1.) - log(y_pred + 1.))`
453 Standalone usage:
455 >>> y_true = [[0., 1.], [0., 0.]]
456 >>> y_pred = [[1., 1.], [1., 0.]]
457 >>> # Using 'auto'/'sum_over_batch_size' reduction type.
458 >>> msle = tf.keras.losses.MeanSquaredLogarithmicError()
459 >>> msle(y_true, y_pred).numpy()
460 0.240
462 >>> # Calling with 'sample_weight'.
463 >>> msle(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy()
464 0.120
466 >>> # Using 'sum' reduction type.
467 >>> msle = tf.keras.losses.MeanSquaredLogarithmicError(
468 ... reduction=tf.keras.losses.Reduction.SUM)
469 >>> msle(y_true, y_pred).numpy()
470 0.480
472 >>> # Using 'none' reduction type.
473 >>> msle = tf.keras.losses.MeanSquaredLogarithmicError(
474 ... reduction=tf.keras.losses.Reduction.NONE)
475 >>> msle(y_true, y_pred).numpy()
476 array([0.240, 0.240], dtype=float32)
478 Usage with the `compile()` API:
480 ```python
481 model.compile(optimizer='sgd',
482 loss=tf.keras.losses.MeanSquaredLogarithmicError())
483 ```
484 """
486 def __init__(self,
487 reduction=losses_utils.ReductionV2.AUTO,
488 name='mean_squared_logarithmic_error'):
489 """Initializes `MeanSquaredLogarithmicError` instance.
491 Args:
492 reduction: Type of `tf.keras.losses.Reduction` to apply to
493 loss. Default value is `AUTO`. `AUTO` indicates that the reduction
494 option will be determined by the usage context. For almost all cases
495 this defaults to `SUM_OVER_BATCH_SIZE`. When used with
496 `tf.distribute.Strategy`, outside of built-in training loops such as
497 `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
498 will raise an error. Please see this custom training [tutorial](
499 https://www.tensorflow.org/tutorials/distribute/custom_training) for
500 more details.
501 name: Optional name for the instance. Defaults to
502 'mean_squared_logarithmic_error'.
503 """
504 super().__init__(
505 mean_squared_logarithmic_error, name=name, reduction=reduction)
508@keras_export('keras.losses.BinaryCrossentropy')
509class BinaryCrossentropy(LossFunctionWrapper):
510 """Computes the cross-entropy loss between true labels and predicted labels.
512 Use this cross-entropy loss for binary (0 or 1) classification applications.
513 The loss function requires the following inputs:
515 - `y_true` (true label): This is either 0 or 1.
516 - `y_pred` (predicted value): This is the model's prediction, i.e, a single
517 floating-point value which either represents a
518 [logit](https://en.wikipedia.org/wiki/Logit), (i.e, value in [-inf, inf]
519 when `from_logits=True`) or a probability (i.e, value in [0., 1.] when
520 `from_logits=False`).
522 **Recommended Usage:** (set `from_logits=True`)
524 With `tf.keras` API:
526 ```python
527 model.compile(
528 loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
529 ....
530 )
531 ```
533 As a standalone function:
535 >>> # Example 1: (batch_size = 1, number of samples = 4)
536 >>> y_true = [0, 1, 0, 0]
537 >>> y_pred = [-18.6, 0.51, 2.94, -12.8]
538 >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True)
539 >>> bce(y_true, y_pred).numpy()
540 0.865
542 >>> # Example 2: (batch_size = 2, number of samples = 4)
543 >>> y_true = [[0, 1], [0, 0]]
544 >>> y_pred = [[-18.6, 0.51], [2.94, -12.8]]
545 >>> # Using default 'auto'/'sum_over_batch_size' reduction type.
546 >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True)
547 >>> bce(y_true, y_pred).numpy()
548 0.865
549 >>> # Using 'sample_weight' attribute
550 >>> bce(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()
551 0.243
552 >>> # Using 'sum' reduction` type.
553 >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True,
554 ... reduction=tf.keras.losses.Reduction.SUM)
555 >>> bce(y_true, y_pred).numpy()
556 1.730
557 >>> # Using 'none' reduction type.
558 >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True,
559 ... reduction=tf.keras.losses.Reduction.NONE)
560 >>> bce(y_true, y_pred).numpy()
561 array([0.235, 1.496], dtype=float32)
563 **Default Usage:** (set `from_logits=False`)
565 >>> # Make the following updates to the above "Recommended Usage" section
566 >>> # 1. Set `from_logits=False`
567 >>> tf.keras.losses.BinaryCrossentropy() # OR ...('from_logits=False')
568 >>> # 2. Update `y_pred` to use probabilities instead of logits
569 >>> y_pred = [0.6, 0.3, 0.2, 0.8] # OR [[0.6, 0.3], [0.2, 0.8]]
570 """
572 def __init__(self,
573 from_logits=False,
574 label_smoothing=0,
575 axis=-1,
576 reduction=losses_utils.ReductionV2.AUTO,
577 name='binary_crossentropy'):
578 """Initializes `BinaryCrossentropy` instance.
580 Args:
581 from_logits: Whether to interpret `y_pred` as a tensor of
582 [logit](https://en.wikipedia.org/wiki/Logit) values. By default, we
583 assume that `y_pred` contains probabilities (i.e., values in [0, 1]).
584 label_smoothing: Float in [0, 1]. When 0, no smoothing occurs. When > 0,
585 we compute the loss between the predicted labels and a smoothed version
586 of the true labels, where the smoothing squeezes the labels towards 0.5.
587 Larger values of `label_smoothing` correspond to heavier smoothing.
588 axis: The axis along which to compute crossentropy (the features axis).
589 Defaults to -1.
590 reduction: Type of `tf.keras.losses.Reduction` to apply to
591 loss. Default value is `AUTO`. `AUTO` indicates that the reduction
592 option will be determined by the usage context. For almost all cases
593 this defaults to `SUM_OVER_BATCH_SIZE`. When used with
594 `tf.distribute.Strategy`, outside of built-in training loops such as
595 `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
596 will raise an error. Please see this custom training [tutorial](
597 https://www.tensorflow.org/tutorials/distribute/custom_training) for
598 more details.
599 name: Name for the op. Defaults to 'binary_crossentropy'.
600 """
601 super().__init__(
602 binary_crossentropy,
603 name=name,
604 reduction=reduction,
605 from_logits=from_logits,
606 label_smoothing=label_smoothing,
607 axis=axis)
608 self.from_logits = from_logits
611@keras_export('keras.losses.CategoricalCrossentropy')
612class CategoricalCrossentropy(LossFunctionWrapper):
613 """Computes the crossentropy loss between the labels and predictions.
615 Use this crossentropy loss function when there are two or more label classes.
616 We expect labels to be provided in a `one_hot` representation. If you want to
617 provide labels as integers, please use `SparseCategoricalCrossentropy` loss.
618 There should be `# classes` floating point values per feature.
620 In the snippet below, there is `# classes` floating pointing values per
621 example. The shape of both `y_pred` and `y_true` are
622 `[batch_size, num_classes]`.
624 Standalone usage:
626 >>> y_true = [[0, 1, 0], [0, 0, 1]]
627 >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]
628 >>> # Using 'auto'/'sum_over_batch_size' reduction type.
629 >>> cce = tf.keras.losses.CategoricalCrossentropy()
630 >>> cce(y_true, y_pred).numpy()
631 1.177
633 >>> # Calling with 'sample_weight'.
634 >>> cce(y_true, y_pred, sample_weight=tf.constant([0.3, 0.7])).numpy()
635 0.814
637 >>> # Using 'sum' reduction type.
638 >>> cce = tf.keras.losses.CategoricalCrossentropy(
639 ... reduction=tf.keras.losses.Reduction.SUM)
640 >>> cce(y_true, y_pred).numpy()
641 2.354
643 >>> # Using 'none' reduction type.
644 >>> cce = tf.keras.losses.CategoricalCrossentropy(
645 ... reduction=tf.keras.losses.Reduction.NONE)
646 >>> cce(y_true, y_pred).numpy()
647 array([0.0513, 2.303], dtype=float32)
649 Usage with the `compile()` API:
651 ```python
652 model.compile(optimizer='sgd', loss=tf.keras.losses.CategoricalCrossentropy())
653 ```
654 """
656 def __init__(self,
657 from_logits=False,
658 label_smoothing=0,
659 axis=-1,
660 reduction=losses_utils.ReductionV2.AUTO,
661 name='categorical_crossentropy'):
662 """Initializes `CategoricalCrossentropy` instance.
664 Args:
665 from_logits: Whether `y_pred` is expected to be a logits tensor. By
666 default, we assume that `y_pred` encodes a probability distribution.
667 label_smoothing: Float in [0, 1]. When > 0, label values are smoothed,
668 meaning the confidence on label values are relaxed. For example, if
669 `0.1`, use `0.1 / num_classes` for non-target labels and
670 `0.9 + 0.1 / num_classes` for target labels.
671 axis: The axis along which to compute crossentropy (the features axis).
672 Defaults to -1.
673 reduction: Type of `tf.keras.losses.Reduction` to apply to
674 loss. Default value is `AUTO`. `AUTO` indicates that the reduction
675 option will be determined by the usage context. For almost all cases
676 this defaults to `SUM_OVER_BATCH_SIZE`. When used with
677 `tf.distribute.Strategy`, outside of built-in training loops such as
678 `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
679 will raise an error. Please see this custom training [tutorial](
680 https://www.tensorflow.org/tutorials/distribute/custom_training) for
681 more details.
682 name: Optional name for the instance.
683 Defaults to 'categorical_crossentropy'.
684 """
685 super().__init__(
686 categorical_crossentropy,
687 name=name,
688 reduction=reduction,
689 from_logits=from_logits,
690 label_smoothing=label_smoothing,
691 axis=axis)
694@keras_export('keras.losses.SparseCategoricalCrossentropy')
695class SparseCategoricalCrossentropy(LossFunctionWrapper):
696 """Computes the crossentropy loss between the labels and predictions.
698 Use this crossentropy loss function when there are two or more label classes.
699 We expect labels to be provided as integers. If you want to provide labels
700 using `one-hot` representation, please use `CategoricalCrossentropy` loss.
701 There should be `# classes` floating point values per feature for `y_pred`
702 and a single floating point value per feature for `y_true`.
704 In the snippet below, there is a single floating point value per example for
705 `y_true` and `# classes` floating pointing values per example for `y_pred`.
706 The shape of `y_true` is `[batch_size]` and the shape of `y_pred` is
707 `[batch_size, num_classes]`.
709 Standalone usage:
711 >>> y_true = [1, 2]
712 >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]
713 >>> # Using 'auto'/'sum_over_batch_size' reduction type.
714 >>> scce = tf.keras.losses.SparseCategoricalCrossentropy()
715 >>> scce(y_true, y_pred).numpy()
716 1.177
718 >>> # Calling with 'sample_weight'.
719 >>> scce(y_true, y_pred, sample_weight=tf.constant([0.3, 0.7])).numpy()
720 0.814
722 >>> # Using 'sum' reduction type.
723 >>> scce = tf.keras.losses.SparseCategoricalCrossentropy(
724 ... reduction=tf.keras.losses.Reduction.SUM)
725 >>> scce(y_true, y_pred).numpy()
726 2.354
728 >>> # Using 'none' reduction type.
729 >>> scce = tf.keras.losses.SparseCategoricalCrossentropy(
730 ... reduction=tf.keras.losses.Reduction.NONE)
731 >>> scce(y_true, y_pred).numpy()
732 array([0.0513, 2.303], dtype=float32)
734 Usage with the `compile()` API:
736 ```python
737 model.compile(optimizer='sgd',
738 loss=tf.keras.losses.SparseCategoricalCrossentropy())
739 ```
740 """
742 def __init__(self,
743 from_logits=False,
744 reduction=losses_utils.ReductionV2.AUTO,
745 name='sparse_categorical_crossentropy'):
746 """Initializes `SparseCategoricalCrossentropy` instance.
748 Args:
749 from_logits: Whether `y_pred` is expected to be a logits tensor. By
750 default, we assume that `y_pred` encodes a probability distribution.
751 reduction: Type of `tf.keras.losses.Reduction` to apply to
752 loss. Default value is `AUTO`. `AUTO` indicates that the reduction
753 option will be determined by the usage context. For almost all cases
754 this defaults to `SUM_OVER_BATCH_SIZE`. When used with
755 `tf.distribute.Strategy`, outside of built-in training loops such as
756 `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
757 will raise an error. Please see this custom training [tutorial](
758 https://www.tensorflow.org/tutorials/distribute/custom_training) for
759 more details.
760 name: Optional name for the instance. Defaults to
761 'sparse_categorical_crossentropy'.
762 """
763 super().__init__(
764 sparse_categorical_crossentropy,
765 name=name,
766 reduction=reduction,
767 from_logits=from_logits)
770@keras_export('keras.losses.Hinge')
771class Hinge(LossFunctionWrapper):
772 """Computes the hinge loss between `y_true` and `y_pred`.
774 `loss = maximum(1 - y_true * y_pred, 0)`
776 `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are
777 provided we will convert them to -1 or 1.
779 Standalone usage:
781 >>> y_true = [[0., 1.], [0., 0.]]
782 >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
783 >>> # Using 'auto'/'sum_over_batch_size' reduction type.
784 >>> h = tf.keras.losses.Hinge()
785 >>> h(y_true, y_pred).numpy()
786 1.3
788 >>> # Calling with 'sample_weight'.
789 >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy()
790 0.55
792 >>> # Using 'sum' reduction type.
793 >>> h = tf.keras.losses.Hinge(
794 ... reduction=tf.keras.losses.Reduction.SUM)
795 >>> h(y_true, y_pred).numpy()
796 2.6
798 >>> # Using 'none' reduction type.
799 >>> h = tf.keras.losses.Hinge(
800 ... reduction=tf.keras.losses.Reduction.NONE)
801 >>> h(y_true, y_pred).numpy()
802 array([1.1, 1.5], dtype=float32)
804 Usage with the `compile()` API:
806 ```python
807 model.compile(optimizer='sgd', loss=tf.keras.losses.Hinge())
808 ```
809 """
811 def __init__(self, reduction=losses_utils.ReductionV2.AUTO, name='hinge'):
812 """Initializes `Hinge` instance.
814 Args:
815 reduction: Type of `tf.keras.losses.Reduction` to apply to
816 loss. Default value is `AUTO`. `AUTO` indicates that the reduction
817 option will be determined by the usage context. For almost all cases
818 this defaults to `SUM_OVER_BATCH_SIZE`. When used with
819 `tf.distribute.Strategy`, outside of built-in training loops such as
820 `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
821 will raise an error. Please see this custom training [tutorial](
822 https://www.tensorflow.org/tutorials/distribute/custom_training) for
823 more details.
824 name: Optional name for the instance. Defaults to 'hinge'.
825 """
826 super().__init__(hinge, name=name, reduction=reduction)
829@keras_export('keras.losses.SquaredHinge')
830class SquaredHinge(LossFunctionWrapper):
831 """Computes the squared hinge loss between `y_true` and `y_pred`.
833 `loss = square(maximum(1 - y_true * y_pred, 0))`
835 `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are
836 provided we will convert them to -1 or 1.
838 Standalone usage:
840 >>> y_true = [[0., 1.], [0., 0.]]
841 >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
842 >>> # Using 'auto'/'sum_over_batch_size' reduction type.
843 >>> h = tf.keras.losses.SquaredHinge()
844 >>> h(y_true, y_pred).numpy()
845 1.86
847 >>> # Calling with 'sample_weight'.
848 >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy()
849 0.73
851 >>> # Using 'sum' reduction type.
852 >>> h = tf.keras.losses.SquaredHinge(
853 ... reduction=tf.keras.losses.Reduction.SUM)
854 >>> h(y_true, y_pred).numpy()
855 3.72
857 >>> # Using 'none' reduction type.
858 >>> h = tf.keras.losses.SquaredHinge(
859 ... reduction=tf.keras.losses.Reduction.NONE)
860 >>> h(y_true, y_pred).numpy()
861 array([1.46, 2.26], dtype=float32)
863 Usage with the `compile()` API:
865 ```python
866 model.compile(optimizer='sgd', loss=tf.keras.losses.SquaredHinge())
867 ```
868 """
870 def __init__(self,
871 reduction=losses_utils.ReductionV2.AUTO,
872 name='squared_hinge'):
873 """Initializes `SquaredHinge` instance.
875 Args:
876 reduction: Type of `tf.keras.losses.Reduction` to apply to
877 loss. Default value is `AUTO`. `AUTO` indicates that the reduction
878 option will be determined by the usage context. For almost all cases
879 this defaults to `SUM_OVER_BATCH_SIZE`. When used with
880 `tf.distribute.Strategy`, outside of built-in training loops such as
881 `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
882 will raise an error. Please see this custom training [tutorial](
883 https://www.tensorflow.org/tutorials/distribute/custom_training) for
884 more details.
885 name: Optional name for the instance. Defaults to 'squared_hinge'.
886 """
887 super().__init__(squared_hinge, name=name, reduction=reduction)
890@keras_export('keras.losses.CategoricalHinge')
891class CategoricalHinge(LossFunctionWrapper):
892 """Computes the categorical hinge loss between `y_true` and `y_pred`.
894 `loss = maximum(neg - pos + 1, 0)`
895 where `neg=maximum((1-y_true)*y_pred) and pos=sum(y_true*y_pred)`
897 Standalone usage:
899 >>> y_true = [[0, 1], [0, 0]]
900 >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
901 >>> # Using 'auto'/'sum_over_batch_size' reduction type.
902 >>> h = tf.keras.losses.CategoricalHinge()
903 >>> h(y_true, y_pred).numpy()
904 1.4
906 >>> # Calling with 'sample_weight'.
907 >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy()
908 0.6
910 >>> # Using 'sum' reduction type.
911 >>> h = tf.keras.losses.CategoricalHinge(
912 ... reduction=tf.keras.losses.Reduction.SUM)
913 >>> h(y_true, y_pred).numpy()
914 2.8
916 >>> # Using 'none' reduction type.
917 >>> h = tf.keras.losses.CategoricalHinge(
918 ... reduction=tf.keras.losses.Reduction.NONE)
919 >>> h(y_true, y_pred).numpy()
920 array([1.2, 1.6], dtype=float32)
922 Usage with the `compile()` API:
924 ```python
925 model.compile(optimizer='sgd', loss=tf.keras.losses.CategoricalHinge())
926 ```
927 """
929 def __init__(self,
930 reduction=losses_utils.ReductionV2.AUTO,
931 name='categorical_hinge'):
932 """Initializes `CategoricalHinge` instance.
934 Args:
935 reduction: Type of `tf.keras.losses.Reduction` to apply to
936 loss. Default value is `AUTO`. `AUTO` indicates that the reduction
937 option will be determined by the usage context. For almost all cases
938 this defaults to `SUM_OVER_BATCH_SIZE`. When used with
939 `tf.distribute.Strategy`, outside of built-in training loops such as
940 `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
941 will raise an error. Please see this custom training [tutorial](
942 https://www.tensorflow.org/tutorials/distribute/custom_training) for
943 more details.
944 name: Optional name for the instance. Defaults to 'categorical_hinge'.
945 """
946 super().__init__(categorical_hinge, name=name, reduction=reduction)
949@keras_export('keras.losses.Poisson')
950class Poisson(LossFunctionWrapper):
951 """Computes the Poisson loss between `y_true` and `y_pred`.
953 `loss = y_pred - y_true * log(y_pred)`
955 Standalone usage:
957 >>> y_true = [[0., 1.], [0., 0.]]
958 >>> y_pred = [[1., 1.], [0., 0.]]
959 >>> # Using 'auto'/'sum_over_batch_size' reduction type.
960 >>> p = tf.keras.losses.Poisson()
961 >>> p(y_true, y_pred).numpy()
962 0.5
964 >>> # Calling with 'sample_weight'.
965 >>> p(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()
966 0.4
968 >>> # Using 'sum' reduction type.
969 >>> p = tf.keras.losses.Poisson(
970 ... reduction=tf.keras.losses.Reduction.SUM)
971 >>> p(y_true, y_pred).numpy()
972 0.999
974 >>> # Using 'none' reduction type.
975 >>> p = tf.keras.losses.Poisson(
976 ... reduction=tf.keras.losses.Reduction.NONE)
977 >>> p(y_true, y_pred).numpy()
978 array([0.999, 0.], dtype=float32)
980 Usage with the `compile()` API:
982 ```python
983 model.compile(optimizer='sgd', loss=tf.keras.losses.Poisson())
984 ```
985 """
987 def __init__(self, reduction=losses_utils.ReductionV2.AUTO, name='poisson'):
988 """Initializes `Poisson` instance.
990 Args:
991 reduction: Type of `tf.keras.losses.Reduction` to apply to
992 loss. Default value is `AUTO`. `AUTO` indicates that the reduction
993 option will be determined by the usage context. For almost all cases
994 this defaults to `SUM_OVER_BATCH_SIZE`. When used with
995 `tf.distribute.Strategy`, outside of built-in training loops such as
996 `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
997 will raise an error. Please see this custom training [tutorial](
998 https://www.tensorflow.org/tutorials/distribute/custom_training) for
999 more details.
1000 name: Optional name for the instance. Defaults to 'poisson'.
1001 """
1002 super().__init__(poisson, name=name, reduction=reduction)
1005@keras_export('keras.losses.LogCosh')
1006class LogCosh(LossFunctionWrapper):
1007 """Computes the logarithm of the hyperbolic cosine of the prediction error.
1009 `logcosh = log((exp(x) + exp(-x))/2)`,
1010 where x is the error `y_pred - y_true`.
1012 Standalone usage:
1014 >>> y_true = [[0., 1.], [0., 0.]]
1015 >>> y_pred = [[1., 1.], [0., 0.]]
1016 >>> # Using 'auto'/'sum_over_batch_size' reduction type.
1017 >>> l = tf.keras.losses.LogCosh()
1018 >>> l(y_true, y_pred).numpy()
1019 0.108
1021 >>> # Calling with 'sample_weight'.
1022 >>> l(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()
1023 0.087
1025 >>> # Using 'sum' reduction type.
1026 >>> l = tf.keras.losses.LogCosh(
1027 ... reduction=tf.keras.losses.Reduction.SUM)
1028 >>> l(y_true, y_pred).numpy()
1029 0.217
1031 >>> # Using 'none' reduction type.
1032 >>> l = tf.keras.losses.LogCosh(
1033 ... reduction=tf.keras.losses.Reduction.NONE)
1034 >>> l(y_true, y_pred).numpy()
1035 array([0.217, 0.], dtype=float32)
1037 Usage with the `compile()` API:
1039 ```python
1040 model.compile(optimizer='sgd', loss=tf.keras.losses.LogCosh())
1041 ```
1042 """
1044 def __init__(self, reduction=losses_utils.ReductionV2.AUTO, name='log_cosh'):
1045 """Initializes `LogCosh` instance.
1047 Args:
1048 reduction: Type of `tf.keras.losses.Reduction` to apply to
1049 loss. Default value is `AUTO`. `AUTO` indicates that the reduction
1050 option will be determined by the usage context. For almost all cases
1051 this defaults to `SUM_OVER_BATCH_SIZE`. When used with
1052 `tf.distribute.Strategy`, outside of built-in training loops such as
1053 `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
1054 will raise an error. Please see this custom training [tutorial](
1055 https://www.tensorflow.org/tutorials/distribute/custom_training) for
1056 more details.
1057 name: Optional name for the instance. Defaults to 'log_cosh'.
1058 """
1059 super().__init__(log_cosh, name=name, reduction=reduction)
1062@keras_export('keras.losses.KLDivergence')
1063class KLDivergence(LossFunctionWrapper):
1064 """Computes Kullback-Leibler divergence loss between `y_true` and `y_pred`.
1066 `loss = y_true * log(y_true / y_pred)`
1068 See: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence
1070 Standalone usage:
1072 >>> y_true = [[0, 1], [0, 0]]
1073 >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
1074 >>> # Using 'auto'/'sum_over_batch_size' reduction type.
1075 >>> kl = tf.keras.losses.KLDivergence()
1076 >>> kl(y_true, y_pred).numpy()
1077 0.458
1079 >>> # Calling with 'sample_weight'.
1080 >>> kl(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()
1081 0.366
1083 >>> # Using 'sum' reduction type.
1084 >>> kl = tf.keras.losses.KLDivergence(
1085 ... reduction=tf.keras.losses.Reduction.SUM)
1086 >>> kl(y_true, y_pred).numpy()
1087 0.916
1089 >>> # Using 'none' reduction type.
1090 >>> kl = tf.keras.losses.KLDivergence(
1091 ... reduction=tf.keras.losses.Reduction.NONE)
1092 >>> kl(y_true, y_pred).numpy()
1093 array([0.916, -3.08e-06], dtype=float32)
1095 Usage with the `compile()` API:
1097 ```python
1098 model.compile(optimizer='sgd', loss=tf.keras.losses.KLDivergence())
1099 ```
1100 """
1102 def __init__(self,
1103 reduction=losses_utils.ReductionV2.AUTO,
1104 name='kl_divergence'):
1105 """Initializes `KLDivergence` instance.
1107 Args:
1108 reduction: Type of `tf.keras.losses.Reduction` to apply to
1109 loss. Default value is `AUTO`. `AUTO` indicates that the reduction
1110 option will be determined by the usage context. For almost all cases
1111 this defaults to `SUM_OVER_BATCH_SIZE`. When used with
1112 `tf.distribute.Strategy`, outside of built-in training loops such as
1113 `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
1114 will raise an error. Please see this custom training [tutorial](
1115 https://www.tensorflow.org/tutorials/distribute/custom_training) for
1116 more details.
1117 name: Optional name for the instance. Defaults to 'kl_divergence'.
1118 """
1119 super().__init__(kl_divergence, name=name, reduction=reduction)
1122@keras_export('keras.losses.Huber')
1123class Huber(LossFunctionWrapper):
1124 """Computes the Huber loss between `y_true` and `y_pred`.
1126 For each value x in `error = y_true - y_pred`:
1128 ```
1129 loss = 0.5 * x^2 if |x| <= d
1130 loss = 0.5 * d^2 + d * (|x| - d) if |x| > d
1131 ```
1132 where d is `delta`. See: https://en.wikipedia.org/wiki/Huber_loss
1134 Standalone usage:
1136 >>> y_true = [[0, 1], [0, 0]]
1137 >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
1138 >>> # Using 'auto'/'sum_over_batch_size' reduction type.
1139 >>> h = tf.keras.losses.Huber()
1140 >>> h(y_true, y_pred).numpy()
1141 0.155
1143 >>> # Calling with 'sample_weight'.
1144 >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy()
1145 0.09
1147 >>> # Using 'sum' reduction type.
1148 >>> h = tf.keras.losses.Huber(
1149 ... reduction=tf.keras.losses.Reduction.SUM)
1150 >>> h(y_true, y_pred).numpy()
1151 0.31
1153 >>> # Using 'none' reduction type.
1154 >>> h = tf.keras.losses.Huber(
1155 ... reduction=tf.keras.losses.Reduction.NONE)
1156 >>> h(y_true, y_pred).numpy()
1157 array([0.18, 0.13], dtype=float32)
1159 Usage with the `compile()` API:
1161 ```python
1162 model.compile(optimizer='sgd', loss=tf.keras.losses.Huber())
1163 ```
1164 """
1166 def __init__(self,
1167 delta=1.0,
1168 reduction=losses_utils.ReductionV2.AUTO,
1169 name='huber_loss'):
1170 """Initializes `Huber` instance.
1172 Args:
1173 delta: A float, the point where the Huber loss function changes from a
1174 quadratic to linear.
1175 reduction: Type of `tf.keras.losses.Reduction` to apply to
1176 loss. Default value is `AUTO`. `AUTO` indicates that the reduction
1177 option will be determined by the usage context. For almost all cases
1178 this defaults to `SUM_OVER_BATCH_SIZE`. When used with
1179 `tf.distribute.Strategy`, outside of built-in training loops such as
1180 `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
1181 will raise an error. Please see this custom training [tutorial](
1182 https://www.tensorflow.org/tutorials/distribute/custom_training) for
1183 more details.
1184 name: Optional name for the instance. Defaults to 'huber_loss'.
1185 """
1186 super().__init__(huber, name=name, reduction=reduction, delta=delta)
1189@keras_export('keras.metrics.mean_squared_error', 'keras.metrics.mse',
1190 'keras.metrics.MSE', 'keras.losses.mean_squared_error',
1191 'keras.losses.mse', 'keras.losses.MSE')
1192@dispatch.add_dispatch_support
1193def mean_squared_error(y_true, y_pred):
1194 """Computes the mean squared error between labels and predictions.
1196 After computing the squared distance between the inputs, the mean value over
1197 the last dimension is returned.
1199 `loss = mean(square(y_true - y_pred), axis=-1)`
1201 Standalone usage:
1203 >>> y_true = np.random.randint(0, 2, size=(2, 3))
1204 >>> y_pred = np.random.random(size=(2, 3))
1205 >>> loss = tf.keras.losses.mean_squared_error(y_true, y_pred)
1206 >>> assert loss.shape == (2,)
1207 >>> assert np.array_equal(
1208 ... loss.numpy(), np.mean(np.square(y_true - y_pred), axis=-1))
1210 Args:
1211 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
1212 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
1214 Returns:
1215 Mean squared error values. shape = `[batch_size, d0, .. dN-1]`.
1216 """
1217 y_pred = tensor_conversion.convert_to_tensor_v2_with_dispatch(y_pred)
1218 y_true = math_ops.cast(y_true, y_pred.dtype)
1219 return backend.mean(math_ops.squared_difference(y_pred, y_true), axis=-1)
1222def _ragged_tensor_apply_loss(loss_fn, y_true, y_pred, y_pred_extra_dim=False):
1223 """Apply a loss function on a per batch basis.
1225 Args:
1226 loss_fn: The loss function
1227 y_true: truth values (RaggedTensor)
1228 y_pred: predicted values (RaggedTensor)
1229 y_pred_extra_dim: whether y_pred has an additional dimension compared to
1230 y_true
1232 Returns:
1233 Loss-function result. A dense tensor if the output has a single dimension
1234 (per-batch loss value); a ragged tensor otherwise.
1235 """
1237 def rt_is_equiv_dense(rt):
1238 """Returns true if this RaggedTensor has the same row_lenghts across
1240 all ragged dimensions and thus can be converted to a dense tensor
1241 without loss of information.
1243 Args:
1244 rt: RaggedTensor.
1245 """
1246 return math_ops.reduce_all([
1247 math_ops.equal(
1248 math_ops.reduce_variance(math_ops.cast(row_lens, backend.floatx())),
1249 constant_op.constant([0.])) for row_lens in rt.nested_row_lengths()
1250 ])
1252 def _convert_to_dense(inputs):
1253 return tuple(
1254 rt.to_tensor() if isinstance(rt, ragged_tensor.RaggedTensor) else rt
1255 for rt in inputs)
1257 def _call_loss(inputs, ragged_output):
1258 """ Adapt the result to ragged or dense tensor according to the expected
1260 output type. This is done so that all the return values of the map
1261 operation have the same type.
1262 """
1263 r = loss_fn(*inputs)
1264 if ragged_output and not isinstance(r, ragged_tensor.RaggedTensor):
1265 r = ragged_tensor.RaggedTensor.from_tensor(r)
1266 elif not ragged_output and isinstance(r, ragged_tensor.RaggedTensor):
1267 r = r.to_tensor()
1268 return r
1270 def _wrapper(inputs, ragged_output):
1271 _, y_pred = inputs
1272 if isinstance(y_pred, ragged_tensor.RaggedTensor):
1273 return cond.cond(
1274 rt_is_equiv_dense(y_pred),
1275 lambda: _call_loss(_convert_to_dense(inputs), ragged_output),
1276 lambda: _call_loss(inputs, ragged_output))
1278 return loss_fn(*inputs)
1280 if not isinstance(y_true, ragged_tensor.RaggedTensor):
1281 return loss_fn(y_true, y_pred.to_tensor())
1283 lshape = y_pred.shape.as_list()[1:-1]
1284 if len(lshape) > 0:
1285 spec = ragged_tensor.RaggedTensorSpec(shape=lshape, dtype=y_pred.dtype)
1286 else:
1287 spec = tensor_spec.TensorSpec(shape=[], dtype=y_pred.dtype)
1289 nested_splits_list = [rt.nested_row_splits for rt in (y_true, y_pred)]
1290 if y_pred_extra_dim:
1291 # The last dimension of a categorical prediction may be ragged or not.
1292 rdims = [len(slist) for slist in nested_splits_list]
1293 if rdims[0] == rdims[1] - 1:
1294 nested_splits_list[1] = nested_splits_list[1][:-1]
1296 map_fn = functools.partial(_wrapper, ragged_output=len(lshape) > 1)
1298 assertion_list = ragged_util.assert_splits_match(nested_splits_list)
1299 with ops.control_dependencies(assertion_list):
1300 return ragged_map_ops.map_fn(map_fn, elems=(y_true, y_pred), dtype=spec)
1303@dispatch.dispatch_for_types(mean_squared_error, ragged_tensor.RaggedTensor)
1304def _ragged_tensor_mse(y_true, y_pred):
1305 """Implements support for handling RaggedTensors.
1307 Args:
1308 y_true: RaggedTensor truth values. shape = `[batch_size, d0, .. dN]`.
1309 y_pred: RaggedTensor predicted values. shape = `[batch_size, d0, .. dN]`.
1311 Returns:
1312 Mean squared error values. shape = `[batch_size, d0, .. dN-1]`.
1313 When the number of dimensions of the batch feature vector [d0, .. dN] is
1314 greater than one the return value is a RaggedTensor. Otherwise a Dense
1315 tensor with dimensions [batch_size] is returned.
1316 """
1317 return _ragged_tensor_apply_loss(mean_squared_error, y_true, y_pred)
1320@keras_export('keras.metrics.mean_absolute_error', 'keras.metrics.mae',
1321 'keras.metrics.MAE', 'keras.losses.mean_absolute_error',
1322 'keras.losses.mae', 'keras.losses.MAE')
1323@dispatch.add_dispatch_support
1324def mean_absolute_error(y_true, y_pred):
1325 """Computes the mean absolute error between labels and predictions.
1327 `loss = mean(abs(y_true - y_pred), axis=-1)`
1329 Standalone usage:
1331 >>> y_true = np.random.randint(0, 2, size=(2, 3))
1332 >>> y_pred = np.random.random(size=(2, 3))
1333 >>> loss = tf.keras.losses.mean_absolute_error(y_true, y_pred)
1334 >>> assert loss.shape == (2,)
1335 >>> assert np.array_equal(
1336 ... loss.numpy(), np.mean(np.abs(y_true - y_pred), axis=-1))
1338 Args:
1339 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
1340 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
1342 Returns:
1343 Mean absolute error values. shape = `[batch_size, d0, .. dN-1]`.
1344 """
1345 y_pred = tensor_conversion.convert_to_tensor_v2_with_dispatch(y_pred)
1346 y_true = math_ops.cast(y_true, y_pred.dtype)
1347 return backend.mean(math_ops.abs(y_pred - y_true), axis=-1)
1350@dispatch.dispatch_for_types(mean_absolute_error, ragged_tensor.RaggedTensor)
1351def _ragged_tensor_mae(y_true, y_pred):
1352 """RaggedTensor adapter for mean_absolute_error."""
1353 return _ragged_tensor_apply_loss(mean_absolute_error, y_true, y_pred)
1356@keras_export('keras.metrics.mean_absolute_percentage_error',
1357 'keras.metrics.mape', 'keras.metrics.MAPE',
1358 'keras.losses.mean_absolute_percentage_error',
1359 'keras.losses.mape', 'keras.losses.MAPE')
1360@dispatch.add_dispatch_support
1361def mean_absolute_percentage_error(y_true, y_pred):
1362 """Computes the mean absolute percentage error between `y_true` and `y_pred`.
1364 `loss = 100 * mean(abs((y_true - y_pred) / y_true), axis=-1)`
1366 Standalone usage:
1368 >>> y_true = np.random.random(size=(2, 3))
1369 >>> y_true = np.maximum(y_true, 1e-7) # Prevent division by zero
1370 >>> y_pred = np.random.random(size=(2, 3))
1371 >>> loss = tf.keras.losses.mean_absolute_percentage_error(y_true, y_pred)
1372 >>> assert loss.shape == (2,)
1373 >>> assert np.array_equal(
1374 ... loss.numpy(),
1375 ... 100. * np.mean(np.abs((y_true - y_pred) / y_true), axis=-1))
1377 Args:
1378 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
1379 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
1381 Returns:
1382 Mean absolute percentage error values. shape = `[batch_size, d0, .. dN-1]`.
1383 """
1384 y_pred = tensor_conversion.convert_to_tensor_v2_with_dispatch(y_pred)
1385 y_true = math_ops.cast(y_true, y_pred.dtype)
1386 diff = math_ops.abs(
1387 (y_true - y_pred) / backend.maximum(math_ops.abs(y_true),
1388 backend.epsilon()))
1389 return 100. * backend.mean(diff, axis=-1)
1392@dispatch.dispatch_for_types(mean_absolute_percentage_error,
1393 ragged_tensor.RaggedTensor)
1394def _ragged_tensor_mape(y_true, y_pred):
1395 """Support RaggedTensors."""
1396 return _ragged_tensor_apply_loss(mean_absolute_percentage_error, y_true,
1397 y_pred)
1400@keras_export('keras.metrics.mean_squared_logarithmic_error',
1401 'keras.metrics.msle', 'keras.metrics.MSLE',
1402 'keras.losses.mean_squared_logarithmic_error',
1403 'keras.losses.msle', 'keras.losses.MSLE')
1404@dispatch.add_dispatch_support
1405def mean_squared_logarithmic_error(y_true, y_pred):
1406 """Computes the mean squared logarithmic error between `y_true` and `y_pred`.
1408 `loss = mean(square(log(y_true + 1) - log(y_pred + 1)), axis=-1)`
1410 Standalone usage:
1412 >>> y_true = np.random.randint(0, 2, size=(2, 3))
1413 >>> y_pred = np.random.random(size=(2, 3))
1414 >>> loss = tf.keras.losses.mean_squared_logarithmic_error(y_true, y_pred)
1415 >>> assert loss.shape == (2,)
1416 >>> y_true = np.maximum(y_true, 1e-7)
1417 >>> y_pred = np.maximum(y_pred, 1e-7)
1418 >>> assert np.allclose(
1419 ... loss.numpy(),
1420 ... np.mean(
1421 ... np.square(np.log(y_true + 1.) - np.log(y_pred + 1.)), axis=-1))
1423 Args:
1424 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
1425 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
1427 Returns:
1428 Mean squared logarithmic error values. shape = `[batch_size, d0, .. dN-1]`.
1429 """
1430 y_pred = tensor_conversion.convert_to_tensor_v2_with_dispatch(y_pred)
1431 y_true = math_ops.cast(y_true, y_pred.dtype)
1432 first_log = math_ops.log(backend.maximum(y_pred, backend.epsilon()) + 1.)
1433 second_log = math_ops.log(backend.maximum(y_true, backend.epsilon()) + 1.)
1434 return backend.mean(
1435 math_ops.squared_difference(first_log, second_log), axis=-1)
1438@dispatch.dispatch_for_types(mean_squared_logarithmic_error,
1439 ragged_tensor.RaggedTensor)
1440def _ragged_tensor_msle(y_true, y_pred):
1441 """Implements support for handling RaggedTensors."""
1442 return _ragged_tensor_apply_loss(mean_squared_logarithmic_error, y_true,
1443 y_pred)
1446def _maybe_convert_labels(y_true):
1447 """Converts binary labels into -1/1."""
1448 are_zeros = math_ops.equal(y_true, 0)
1449 are_ones = math_ops.equal(y_true, 1)
1450 is_binary = math_ops.reduce_all(math_ops.logical_or(are_zeros, are_ones))
1452 def _convert_binary_labels():
1453 # Convert the binary labels to -1 or 1.
1454 return 2. * y_true - 1.
1456 updated_y_true = smart_cond.smart_cond(is_binary, _convert_binary_labels,
1457 lambda: y_true)
1458 return updated_y_true
1461@keras_export('keras.metrics.squared_hinge', 'keras.losses.squared_hinge')
1462@dispatch.add_dispatch_support
1463def squared_hinge(y_true, y_pred):
1464 """Computes the squared hinge loss between `y_true` and `y_pred`.
1466 `loss = mean(square(maximum(1 - y_true * y_pred, 0)), axis=-1)`
1468 Standalone usage:
1470 >>> y_true = np.random.choice([-1, 1], size=(2, 3))
1471 >>> y_pred = np.random.random(size=(2, 3))
1472 >>> loss = tf.keras.losses.squared_hinge(y_true, y_pred)
1473 >>> assert loss.shape == (2,)
1474 >>> assert np.array_equal(
1475 ... loss.numpy(),
1476 ... np.mean(np.square(np.maximum(1. - y_true * y_pred, 0.)), axis=-1))
1478 Args:
1479 y_true: The ground truth values. `y_true` values are expected to be -1 or 1.
1480 If binary (0 or 1) labels are provided we will convert them to -1 or 1.
1481 shape = `[batch_size, d0, .. dN]`.
1482 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
1484 Returns:
1485 Squared hinge loss values. shape = `[batch_size, d0, .. dN-1]`.
1486 """
1487 y_pred = tensor_conversion.convert_to_tensor_v2_with_dispatch(y_pred)
1488 y_true = math_ops.cast(y_true, y_pred.dtype)
1489 y_true = _maybe_convert_labels(y_true)
1490 return backend.mean(
1491 math_ops.square(math_ops.maximum(1. - y_true * y_pred, 0.)), axis=-1)
1494@keras_export('keras.metrics.hinge', 'keras.losses.hinge')
1495@dispatch.add_dispatch_support
1496def hinge(y_true, y_pred):
1497 """Computes the hinge loss between `y_true` and `y_pred`.
1499 `loss = mean(maximum(1 - y_true * y_pred, 0), axis=-1)`
1501 Standalone usage:
1503 >>> y_true = np.random.choice([-1, 1], size=(2, 3))
1504 >>> y_pred = np.random.random(size=(2, 3))
1505 >>> loss = tf.keras.losses.hinge(y_true, y_pred)
1506 >>> assert loss.shape == (2,)
1507 >>> assert np.array_equal(
1508 ... loss.numpy(),
1509 ... np.mean(np.maximum(1. - y_true * y_pred, 0.), axis=-1))
1511 Args:
1512 y_true: The ground truth values. `y_true` values are expected to be -1 or 1.
1513 If binary (0 or 1) labels are provided they will be converted to -1 or 1.
1514 shape = `[batch_size, d0, .. dN]`.
1515 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
1517 Returns:
1518 Hinge loss values. shape = `[batch_size, d0, .. dN-1]`.
1519 """
1520 y_pred = tensor_conversion.convert_to_tensor_v2_with_dispatch(y_pred)
1521 y_true = math_ops.cast(y_true, y_pred.dtype)
1522 y_true = _maybe_convert_labels(y_true)
1523 return backend.mean(math_ops.maximum(1. - y_true * y_pred, 0.), axis=-1)
1526@keras_export('keras.losses.categorical_hinge')
1527@dispatch.add_dispatch_support
1528def categorical_hinge(y_true, y_pred):
1529 """Computes the categorical hinge loss between `y_true` and `y_pred`.
1531 `loss = maximum(neg - pos + 1, 0)`
1532 where `neg=maximum((1-y_true)*y_pred) and pos=sum(y_true*y_pred)`
1534 Standalone usage:
1536 >>> y_true = np.random.randint(0, 3, size=(2,))
1537 >>> y_true = tf.keras.utils.to_categorical(y_true, num_classes=3)
1538 >>> y_pred = np.random.random(size=(2, 3))
1539 >>> loss = tf.keras.losses.categorical_hinge(y_true, y_pred)
1540 >>> assert loss.shape == (2,)
1541 >>> pos = np.sum(y_true * y_pred, axis=-1)
1542 >>> neg = np.amax((1. - y_true) * y_pred, axis=-1)
1543 >>> assert np.array_equal(loss.numpy(), np.maximum(0., neg - pos + 1.))
1545 Args:
1546 y_true: The ground truth values. `y_true` values are expected to be
1547 either `{-1, +1}` or `{0, 1}` (i.e. a one-hot-encoded tensor).
1548 y_pred: The predicted values.
1550 Returns:
1551 Categorical hinge loss values.
1552 """
1553 y_pred = tensor_conversion.convert_to_tensor_v2_with_dispatch(y_pred)
1554 y_true = math_ops.cast(y_true, y_pred.dtype)
1555 pos = math_ops.reduce_sum(y_true * y_pred, axis=-1)
1556 neg = math_ops.reduce_max((1. - y_true) * y_pred, axis=-1)
1557 zero = math_ops.cast(0., y_pred.dtype)
1558 return math_ops.maximum(neg - pos + 1., zero)
1561@keras_export('keras.losses.huber', v1=[])
1562@dispatch.add_dispatch_support
1563def huber(y_true, y_pred, delta=1.0):
1564 """Computes Huber loss value.
1566 For each value x in `error = y_true - y_pred`:
1568 ```
1569 loss = 0.5 * x^2 if |x| <= d
1570 loss = d * |x| - 0.5 * d^2 if |x| > d
1571 ```
1572 where d is `delta`. See: https://en.wikipedia.org/wiki/Huber_loss
1574 Args:
1575 y_true: tensor of true targets.
1576 y_pred: tensor of predicted targets.
1577 delta: A float, the point where the Huber loss function changes from a
1578 quadratic to linear.
1580 Returns:
1581 Tensor with one scalar loss entry per sample.
1582 """
1583 y_pred = math_ops.cast(y_pred, dtype=backend.floatx())
1584 y_true = math_ops.cast(y_true, dtype=backend.floatx())
1585 delta = math_ops.cast(delta, dtype=backend.floatx())
1586 error = math_ops.subtract(y_pred, y_true)
1587 abs_error = math_ops.abs(error)
1588 half = tensor_conversion.convert_to_tensor_v2_with_dispatch(
1589 0.5, dtype=abs_error.dtype
1590 )
1591 return backend.mean(
1592 array_ops.where_v2(abs_error <= delta, half * math_ops.square(error),
1593 delta * abs_error - half * math_ops.square(delta)),
1594 axis=-1)
1597@keras_export('keras.losses.log_cosh', 'keras.losses.logcosh',
1598 'keras.metrics.log_cosh', 'keras.metrics.logcosh')
1599@dispatch.add_dispatch_support
1600def log_cosh(y_true, y_pred):
1601 """Logarithm of the hyperbolic cosine of the prediction error.
1603 `log(cosh(x))` is approximately equal to `(x ** 2) / 2` for small `x` and
1604 to `abs(x) - log(2)` for large `x`. This means that 'logcosh' works mostly
1605 like the mean squared error, but will not be so strongly affected by the
1606 occasional wildly incorrect prediction.
1608 Standalone usage:
1610 >>> y_true = np.random.random(size=(2, 3))
1611 >>> y_pred = np.random.random(size=(2, 3))
1612 >>> loss = tf.keras.losses.logcosh(y_true, y_pred)
1613 >>> assert loss.shape == (2,)
1614 >>> x = y_pred - y_true
1615 >>> assert np.allclose(
1616 ... loss.numpy(),
1617 ... np.mean(x + np.log(np.exp(-2. * x) + 1.) - math_ops.log(2.), axis=-1),
1618 ... atol=1e-5)
1620 Args:
1621 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
1622 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
1624 Returns:
1625 Logcosh error values. shape = `[batch_size, d0, .. dN-1]`.
1626 """
1627 y_pred = tensor_conversion.convert_to_tensor_v2_with_dispatch(y_pred)
1628 y_true = math_ops.cast(y_true, y_pred.dtype)
1630 def _logcosh(x):
1631 return x + math_ops.softplus(-2. * x) - math_ops.cast(
1632 math_ops.log(2.), x.dtype)
1634 return backend.mean(_logcosh(y_pred - y_true), axis=-1)
1637@keras_export('keras.metrics.categorical_crossentropy',
1638 'keras.losses.categorical_crossentropy')
1639@dispatch.add_dispatch_support
1640def categorical_crossentropy(y_true,
1641 y_pred,
1642 from_logits=False,
1643 label_smoothing=0,
1644 axis=-1):
1645 """Computes the categorical crossentropy loss.
1647 Standalone usage:
1649 >>> y_true = [[0, 1, 0], [0, 0, 1]]
1650 >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]
1651 >>> loss = tf.keras.losses.categorical_crossentropy(y_true, y_pred)
1652 >>> assert loss.shape == (2,)
1653 >>> loss.numpy()
1654 array([0.0513, 2.303], dtype=float32)
1656 Args:
1657 y_true: Tensor of one-hot true targets.
1658 y_pred: Tensor of predicted targets.
1659 from_logits: Whether `y_pred` is expected to be a logits tensor. By default,
1660 we assume that `y_pred` encodes a probability distribution.
1661 label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For
1662 example, if `0.1`, use `0.1 / num_classes` for non-target labels
1663 and `0.9 + 0.1 / num_classes` for target labels.
1664 axis: Defaults to -1. The dimension along which the entropy is
1665 computed.
1667 Returns:
1668 Categorical crossentropy loss value.
1669 """
1670 y_pred = tensor_conversion.convert_to_tensor_v2_with_dispatch(y_pred)
1671 y_true = math_ops.cast(y_true, y_pred.dtype)
1672 label_smoothing = tensor_conversion.convert_to_tensor_v2_with_dispatch(
1673 label_smoothing, dtype=backend.floatx()
1674 )
1676 def _smooth_labels():
1677 num_classes = math_ops.cast(array_ops.shape(y_true)[-1], y_pred.dtype)
1678 return y_true * (1.0 - label_smoothing) + (label_smoothing / num_classes)
1680 y_true = smart_cond.smart_cond(label_smoothing, _smooth_labels,
1681 lambda: y_true)
1683 return backend.categorical_crossentropy(
1684 y_true, y_pred, from_logits=from_logits, axis=axis)
1687@dispatch.dispatch_for_types(categorical_crossentropy,
1688 ragged_tensor.RaggedTensor)
1689def _ragged_tensor_categorical_crossentropy(y_true,
1690 y_pred,
1691 from_logits=False,
1692 label_smoothing=0,
1693 axis=-1):
1694 """Implements support for handling RaggedTensors.
1696 Args:
1697 y_true: Tensor of one-hot true targets.
1698 y_pred: Tensor of predicted targets.
1699 from_logits: Whether `y_pred` is expected to be a logits tensor. By default,
1700 we assume that `y_pred` encodes a probability distribution.
1701 label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For
1702 example, if `0.1`, use `0.1 / num_classes` for non-target labels
1703 and `0.9 + 0.1 / num_classes` for target labels.
1704 axis: The axis along which to compute crossentropy (the features axis).
1705 Defaults to -1.
1707 Returns:
1708 Categorical crossentropy loss value.
1710 Expected shape: (batch, sequence_len, n_classes) with sequence_len
1711 being variable per batch.
1712 Return shape: (batch, sequence_len).
1714 When used by CategoricalCrossentropy() with the default reduction
1715 (SUM_OVER_BATCH_SIZE), the reduction averages the loss over the
1716 number of elements independent of the batch. E.g. if the RaggedTensor
1717 has 2 batches with [2, 1] values respectivly the resulting loss is
1718 the sum of the individual loss values divided by 3.
1719 """
1720 fn = functools.partial(
1721 categorical_crossentropy,
1722 from_logits=from_logits,
1723 label_smoothing=label_smoothing,
1724 axis=axis)
1725 return _ragged_tensor_apply_loss(fn, y_true, y_pred)
1728@keras_export('keras.metrics.sparse_categorical_crossentropy',
1729 'keras.losses.sparse_categorical_crossentropy')
1730@dispatch.add_dispatch_support
1731def sparse_categorical_crossentropy(y_true, y_pred, from_logits=False, axis=-1):
1732 """Computes the sparse categorical crossentropy loss.
1734 Standalone usage:
1736 >>> y_true = [1, 2]
1737 >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]
1738 >>> loss = tf.keras.losses.sparse_categorical_crossentropy(y_true, y_pred)
1739 >>> assert loss.shape == (2,)
1740 >>> loss.numpy()
1741 array([0.0513, 2.303], dtype=float32)
1743 Args:
1744 y_true: Ground truth values.
1745 y_pred: The predicted values.
1746 from_logits: Whether `y_pred` is expected to be a logits tensor. By default,
1747 we assume that `y_pred` encodes a probability distribution.
1748 axis: Defaults to -1. The dimension along which the entropy is
1749 computed.
1751 Returns:
1752 Sparse categorical crossentropy loss value.
1753 """
1754 y_pred = tensor_conversion.convert_to_tensor_v2_with_dispatch(y_pred)
1755 y_true = math_ops.cast(y_true, y_pred.dtype)
1756 return backend.sparse_categorical_crossentropy(
1757 y_true, y_pred, from_logits=from_logits, axis=axis)
1760@dispatch.dispatch_for_types(sparse_categorical_crossentropy,
1761 ragged_tensor.RaggedTensor)
1762def _ragged_tensor_sparse_categorical_crossentropy(y_true,
1763 y_pred,
1764 from_logits=False,
1765 axis=-1):
1766 """ Implements support for handling RaggedTensors.
1768 Expected y_pred shape: (batch, sequence_len, n_classes) with sequence_len
1769 being variable per batch.
1770 Return shape: (batch, sequence_len).
1772 When used by SparseCategoricalCrossentropy() with the default reduction
1773 (SUM_OVER_BATCH_SIZE), the reduction averages the loss over the
1774 number of elements independent of the batch. E.g. if the RaggedTensor
1775 has 2 batches with [2, 1] values respectively, the resulting loss is
1776 the sum of the individual loss values divided by 3.
1777 """
1778 fn = functools.partial(
1779 sparse_categorical_crossentropy, from_logits=from_logits, axis=axis)
1780 return _ragged_tensor_apply_loss(fn, y_true, y_pred, y_pred_extra_dim=True)
1783@keras_export('keras.metrics.binary_crossentropy',
1784 'keras.losses.binary_crossentropy')
1785@dispatch.add_dispatch_support
1786def binary_crossentropy(y_true,
1787 y_pred,
1788 from_logits=False,
1789 label_smoothing=0,
1790 axis=-1):
1791 """Computes the binary crossentropy loss.
1793 Standalone usage:
1795 >>> y_true = [[0, 1], [0, 0]]
1796 >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
1797 >>> loss = tf.keras.losses.binary_crossentropy(y_true, y_pred)
1798 >>> assert loss.shape == (2,)
1799 >>> loss.numpy()
1800 array([0.916 , 0.714], dtype=float32)
1802 Args:
1803 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
1804 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
1805 from_logits: Whether `y_pred` is expected to be a logits tensor. By default,
1806 we assume that `y_pred` encodes a probability distribution.
1807 label_smoothing: Float in [0, 1]. If > `0` then smooth the labels by
1808 squeezing them towards 0.5 That is, using `1. - 0.5 * label_smoothing`
1809 for the target class and `0.5 * label_smoothing` for the non-target class.
1810 axis: The axis along which the mean is computed. Defaults to -1.
1812 Returns:
1813 Binary crossentropy loss value. shape = `[batch_size, d0, .. dN-1]`.
1814 """
1815 y_pred = tensor_conversion.convert_to_tensor_v2_with_dispatch(y_pred)
1816 y_true = math_ops.cast(y_true, y_pred.dtype)
1817 label_smoothing = tensor_conversion.convert_to_tensor_v2_with_dispatch(
1818 label_smoothing, dtype=backend.floatx()
1819 )
1821 def _smooth_labels():
1822 return y_true * (1.0 - label_smoothing) + 0.5 * label_smoothing
1824 y_true = smart_cond.smart_cond(label_smoothing, _smooth_labels,
1825 lambda: y_true)
1827 return backend.mean(
1828 backend.binary_crossentropy(y_true, y_pred, from_logits=from_logits),
1829 axis=axis)
1832@dispatch.dispatch_for_types(binary_crossentropy, ragged_tensor.RaggedTensor)
1833def _ragged_tensor_binary_crossentropy(y_true,
1834 y_pred,
1835 from_logits=False,
1836 label_smoothing=0,
1837 axis=-1):
1838 """Implements support for handling RaggedTensors.
1840 Args:
1841 y_true: Tensor of one-hot true targets.
1842 y_pred: Tensor of predicted targets.
1843 from_logits: Whether `y_pred` is expected to be a logits tensor. By default,
1844 we assume that `y_pred` encodes a probability distribution.
1845 label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For
1846 example, if `0.1`, use `0.1 / num_classes` for non-target labels
1847 and `0.9 + 0.1 / num_classes` for target labels.
1848 axis: Axis along which to compute crossentropy.
1850 Returns:
1851 Binary crossentropy loss value.
1853 Expected shape: (batch, sequence_len) with sequence_len being variable
1854 per batch.
1855 Return shape: (batch,); returns the per batch mean of the loss values.
1857 When used by BinaryCrossentropy() with the default reduction
1858 (SUM_OVER_BATCH_SIZE), the reduction averages the per batch losses over
1859 the number of batches.
1860 """
1861 fn = functools.partial(
1862 binary_crossentropy,
1863 from_logits=from_logits,
1864 label_smoothing=label_smoothing,
1865 axis=axis)
1866 return _ragged_tensor_apply_loss(fn, y_true, y_pred)
1869@keras_export('keras.metrics.kl_divergence',
1870 'keras.metrics.kullback_leibler_divergence', 'keras.metrics.kld',
1871 'keras.metrics.KLD', 'keras.losses.kl_divergence',
1872 'keras.losses.kullback_leibler_divergence', 'keras.losses.kld',
1873 'keras.losses.KLD')
1874@dispatch.add_dispatch_support
1875def kl_divergence(y_true, y_pred):
1876 """Computes Kullback-Leibler divergence loss between `y_true` and `y_pred`.
1878 `loss = y_true * log(y_true / y_pred)`
1880 See: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence
1882 Standalone usage:
1884 >>> y_true = np.random.randint(0, 2, size=(2, 3)).astype(np.float64)
1885 >>> y_pred = np.random.random(size=(2, 3))
1886 >>> loss = tf.keras.losses.kullback_leibler_divergence(y_true, y_pred)
1887 >>> assert loss.shape == (2,)
1888 >>> y_true = tf.keras.backend.clip(y_true, 1e-7, 1)
1889 >>> y_pred = tf.keras.backend.clip(y_pred, 1e-7, 1)
1890 >>> assert np.array_equal(
1891 ... loss.numpy(), np.sum(y_true * np.log(y_true / y_pred), axis=-1))
1893 Args:
1894 y_true: Tensor of true targets.
1895 y_pred: Tensor of predicted targets.
1897 Returns:
1898 A `Tensor` with loss.
1900 Raises:
1901 TypeError: If `y_true` cannot be cast to the `y_pred.dtype`.
1902 """
1903 y_pred = tensor_conversion.convert_to_tensor_v2_with_dispatch(y_pred)
1904 y_true = math_ops.cast(y_true, y_pred.dtype)
1905 y_true = backend.clip(y_true, backend.epsilon(), 1)
1906 y_pred = backend.clip(y_pred, backend.epsilon(), 1)
1907 return math_ops.reduce_sum(y_true * math_ops.log(y_true / y_pred), axis=-1)
1910@keras_export('keras.metrics.poisson', 'keras.losses.poisson')
1911@dispatch.add_dispatch_support
1912def poisson(y_true, y_pred):
1913 """Computes the Poisson loss between y_true and y_pred.
1915 The Poisson loss is the mean of the elements of the `Tensor`
1916 `y_pred - y_true * log(y_pred)`.
1918 Standalone usage:
1920 >>> y_true = np.random.randint(0, 2, size=(2, 3))
1921 >>> y_pred = np.random.random(size=(2, 3))
1922 >>> loss = tf.keras.losses.poisson(y_true, y_pred)
1923 >>> assert loss.shape == (2,)
1924 >>> y_pred = y_pred + 1e-7
1925 >>> assert np.allclose(
1926 ... loss.numpy(), np.mean(y_pred - y_true * np.log(y_pred), axis=-1),
1927 ... atol=1e-5)
1929 Args:
1930 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
1931 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
1933 Returns:
1934 Poisson loss value. shape = `[batch_size, d0, .. dN-1]`.
1936 Raises:
1937 InvalidArgumentError: If `y_true` and `y_pred` have incompatible shapes.
1938 """
1939 y_pred = tensor_conversion.convert_to_tensor_v2_with_dispatch(y_pred)
1940 y_true = math_ops.cast(y_true, y_pred.dtype)
1941 return backend.mean(
1942 y_pred - y_true * math_ops.log(y_pred + backend.epsilon()), axis=-1)
1945@keras_export(
1946 'keras.losses.cosine_similarity',
1947 v1=[
1948 'keras.metrics.cosine_proximity',
1949 'keras.metrics.cosine',
1950 'keras.losses.cosine_proximity',
1951 'keras.losses.cosine',
1952 'keras.losses.cosine_similarity',
1953 ])
1954@dispatch.add_dispatch_support
1955def cosine_similarity(y_true, y_pred, axis=-1):
1956 """Computes the cosine similarity between labels and predictions.
1958 Note that it is a number between -1 and 1. When it is a negative number
1959 between -1 and 0, 0 indicates orthogonality and values closer to -1
1960 indicate greater similarity. The values closer to 1 indicate greater
1961 dissimilarity. This makes it usable as a loss function in a setting
1962 where you try to maximize the proximity between predictions and
1963 targets. If either `y_true` or `y_pred` is a zero vector, cosine
1964 similarity will be 0 regardless of the proximity between predictions
1965 and targets.
1967 `loss = -sum(l2_norm(y_true) * l2_norm(y_pred))`
1969 Standalone usage:
1971 >>> y_true = [[0., 1.], [1., 1.], [1., 1.]]
1972 >>> y_pred = [[1., 0.], [1., 1.], [-1., -1.]]
1973 >>> loss = tf.keras.losses.cosine_similarity(y_true, y_pred, axis=1)
1974 >>> loss.numpy()
1975 array([-0., -0.999, 0.999], dtype=float32)
1977 Args:
1978 y_true: Tensor of true targets.
1979 y_pred: Tensor of predicted targets.
1980 axis: Axis along which to determine similarity.
1982 Returns:
1983 Cosine similarity tensor.
1984 """
1985 y_true = nn.l2_normalize(y_true, axis=axis)
1986 y_pred = nn.l2_normalize(y_pred, axis=axis)
1987 return -math_ops.reduce_sum(y_true * y_pred, axis=axis)
1990@keras_export('keras.losses.CosineSimilarity')
1991class CosineSimilarity(LossFunctionWrapper):
1992 """Computes the cosine similarity between labels and predictions.
1994 Note that it is a number between -1 and 1. When it is a negative number
1995 between -1 and 0, 0 indicates orthogonality and values closer to -1
1996 indicate greater similarity. The values closer to 1 indicate greater
1997 dissimilarity. This makes it usable as a loss function in a setting
1998 where you try to maximize the proximity between predictions and targets.
1999 If either `y_true` or `y_pred` is a zero vector, cosine similarity will be 0
2000 regardless of the proximity between predictions and targets.
2002 `loss = -sum(l2_norm(y_true) * l2_norm(y_pred))`
2004 Standalone usage:
2006 >>> y_true = [[0., 1.], [1., 1.]]
2007 >>> y_pred = [[1., 0.], [1., 1.]]
2008 >>> # Using 'auto'/'sum_over_batch_size' reduction type.
2009 >>> cosine_loss = tf.keras.losses.CosineSimilarity(axis=1)
2010 >>> # l2_norm(y_true) = [[0., 1.], [1./1.414, 1./1.414]]
2011 >>> # l2_norm(y_pred) = [[1., 0.], [1./1.414, 1./1.414]]
2012 >>> # l2_norm(y_true) . l2_norm(y_pred) = [[0., 0.], [0.5, 0.5]]
2013 >>> # loss = mean(sum(l2_norm(y_true) . l2_norm(y_pred), axis=1))
2014 >>> # = -((0. + 0.) + (0.5 + 0.5)) / 2
2015 >>> cosine_loss(y_true, y_pred).numpy()
2016 -0.5
2018 >>> # Calling with 'sample_weight'.
2019 >>> cosine_loss(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()
2020 -0.0999
2022 >>> # Using 'sum' reduction type.
2023 >>> cosine_loss = tf.keras.losses.CosineSimilarity(axis=1,
2024 ... reduction=tf.keras.losses.Reduction.SUM)
2025 >>> cosine_loss(y_true, y_pred).numpy()
2026 -0.999
2028 >>> # Using 'none' reduction type.
2029 >>> cosine_loss = tf.keras.losses.CosineSimilarity(axis=1,
2030 ... reduction=tf.keras.losses.Reduction.NONE)
2031 >>> cosine_loss(y_true, y_pred).numpy()
2032 array([-0., -0.999], dtype=float32)
2034 Usage with the `compile()` API:
2036 ```python
2037 model.compile(optimizer='sgd', loss=tf.keras.losses.CosineSimilarity(axis=1))
2038 ```
2040 Args:
2041 axis: The axis along which the cosine similarity is computed
2042 (the features axis). Defaults to -1.
2043 reduction: Type of `tf.keras.losses.Reduction` to apply to loss.
2044 Default value is `AUTO`. `AUTO` indicates that the reduction option will
2045 be determined by the usage context. For almost all cases this defaults to
2046 `SUM_OVER_BATCH_SIZE`. When used with `tf.distribute.Strategy`, outside of
2047 built-in training loops such as `tf.keras` `compile` and `fit`, using
2048 `AUTO` or `SUM_OVER_BATCH_SIZE` will raise an error. Please see this
2049 custom training [tutorial]
2050 (https://www.tensorflow.org/tutorials/distribute/custom_training) for more
2051 details.
2052 name: Optional name for the instance.
2053 """
2055 def __init__(self,
2056 axis=-1,
2057 reduction=losses_utils.ReductionV2.AUTO,
2058 name='cosine_similarity'):
2059 super().__init__(
2060 cosine_similarity, reduction=reduction, name=name, axis=axis)
2063# Aliases.
2065bce = BCE = binary_crossentropy
2066mse = MSE = mean_squared_error
2067mae = MAE = mean_absolute_error
2068mape = MAPE = mean_absolute_percentage_error
2069msle = MSLE = mean_squared_logarithmic_error
2070kld = KLD = kullback_leibler_divergence = kl_divergence
2071logcosh = log_cosh
2072huber_loss = huber
2075def is_categorical_crossentropy(loss):
2076 result = ((isinstance(loss, CategoricalCrossentropy) or
2077 (isinstance(loss, LossFunctionWrapper) and
2078 loss.fn == categorical_crossentropy) or
2079 (hasattr(loss, '__name__') and
2080 loss.__name__ == 'categorical_crossentropy') or
2081 (loss == 'categorical_crossentropy')))
2082 return result
2085@keras_export('keras.losses.serialize')
2086def serialize(loss):
2087 """Serializes loss function or `Loss` instance.
2089 Args:
2090 loss: A Keras `Loss` instance or a loss function.
2092 Returns:
2093 Loss configuration dictionary.
2094 """
2095 return serialize_keras_object(loss)
2098@keras_export('keras.losses.deserialize')
2099def deserialize(name, custom_objects=None):
2100 """Deserializes a serialized loss class/function instance.
2102 Args:
2103 name: Loss configuration.
2104 custom_objects: Optional dictionary mapping names (strings) to custom
2105 objects (classes and functions) to be considered during deserialization.
2107 Returns:
2108 A Keras `Loss` instance or a loss function.
2109 """
2110 return deserialize_keras_object(
2111 name,
2112 module_objects=globals(),
2113 custom_objects=custom_objects,
2114 printable_module_name='loss function')
2117@keras_export('keras.losses.get')
2118def get(identifier):
2119 """Retrieves a Keras loss as a `function`/`Loss` class instance.
2121 The `identifier` may be the string name of a loss function or `Loss` class.
2123 >>> loss = tf.keras.losses.get("categorical_crossentropy")
2124 >>> type(loss)
2125 <class 'function'>
2126 >>> loss = tf.keras.losses.get("CategoricalCrossentropy")
2127 >>> type(loss)
2128 <class '...keras.losses.CategoricalCrossentropy'>
2130 You can also specify `config` of the loss to this function by passing dict
2131 containing `class_name` and `config` as an identifier. Also note that the
2132 `class_name` must map to a `Loss` class
2134 >>> identifier = {"class_name": "CategoricalCrossentropy",
2135 ... "config": {"from_logits": True}}
2136 >>> loss = tf.keras.losses.get(identifier)
2137 >>> type(loss)
2138 <class '...keras.losses.CategoricalCrossentropy'>
2140 Args:
2141 identifier: A loss identifier. One of None or string name of a loss
2142 function/class or loss configuration dictionary or a loss function or a
2143 loss class instance.
2145 Returns:
2146 A Keras loss as a `function`/ `Loss` class instance.
2148 Raises:
2149 ValueError: If `identifier` cannot be interpreted.
2150 """
2151 if identifier is None:
2152 return None
2153 if isinstance(identifier, str):
2154 identifier = str(identifier)
2155 return deserialize(identifier)
2156 if isinstance(identifier, dict):
2157 return deserialize(identifier)
2158 if callable(identifier):
2159 return identifier
2160 raise ValueError(
2161 f'Could not interpret loss function identifier: {identifier}')
2164LABEL_DTYPES_FOR_LOSSES = {
2165 losses_impl.sparse_softmax_cross_entropy: 'int32',
2166 sparse_categorical_crossentropy: 'int32'
2167}