Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/keras/src/losses.py: 41%
425 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
16"""Built-in loss functions."""
19import abc
20import functools
21import warnings
23import tensorflow.compat.v2 as tf
25from keras.src import backend
26from keras.src.saving import saving_lib
27from keras.src.saving.legacy import serialization as legacy_serialization
28from keras.src.saving.serialization_lib import deserialize_keras_object
29from keras.src.saving.serialization_lib import serialize_keras_object
30from keras.src.utils import losses_utils
31from keras.src.utils import tf_utils
33# isort: off
34from tensorflow.python.ops.ragged import ragged_map_ops
35from tensorflow.python.ops.ragged import ragged_util
36from tensorflow.python.util import dispatch
37from tensorflow.python.util.tf_export import keras_export
38from tensorflow.tools.docs import doc_controls
41@keras_export("keras.losses.Loss")
42class Loss:
43 """Loss base class.
45 To be implemented by subclasses:
46 * `call()`: Contains the logic for loss calculation using `y_true`,
47 `y_pred`.
49 Example subclass implementation:
51 ```python
52 class MeanSquaredError(Loss):
54 def call(self, y_true, y_pred):
55 return tf.reduce_mean(tf.math.square(y_pred - y_true), axis=-1)
56 ```
58 When using a Loss under a `tf.distribute.Strategy`, except passing it
59 to `Model.compile()` for use by `Model.fit()`, please use reduction
60 types 'SUM' or 'NONE', and reduce losses explicitly. Using 'AUTO' or
61 'SUM_OVER_BATCH_SIZE' will raise an error when calling the Loss object
62 from a custom training loop or from user-defined code in `Layer.call()`.
63 Please see this custom training
64 [tutorial](https://www.tensorflow.org/tutorials/distribute/custom_training)
65 for more details on this.
66 """
68 def __init__(self, reduction=losses_utils.ReductionV2.AUTO, name=None):
69 """Initializes `Loss` class.
71 Args:
72 reduction: Type of `tf.keras.losses.Reduction` to apply to
73 loss. Default value is `AUTO`. `AUTO` indicates that the reduction
74 option will be determined by the usage context. For almost all cases
75 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a
76 `tf.distribute.Strategy`, except via `Model.compile()` and
77 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
78 will raise an error. Please see this custom training [tutorial](
79 https://www.tensorflow.org/tutorials/distribute/custom_training)
80 for more details.
81 name: Optional name for the instance.
82 """
83 losses_utils.ReductionV2.validate(reduction)
84 self.reduction = reduction
85 self.name = name
86 # SUM_OVER_BATCH is only allowed in losses managed by `fit` or
87 # CannedEstimators.
88 self._allow_sum_over_batch_size = False
89 self._set_name_scope()
91 def _set_name_scope(self):
92 """Creates a valid `name_scope` name."""
93 if self.name is None:
94 self._name_scope = self.__class__.__name__.strip("_")
95 elif self.name == "<lambda>":
96 self._name_scope = "lambda"
97 else:
98 # E.g. '_my_loss' => 'my_loss'
99 self._name_scope = self.name.strip("_")
101 def __call__(self, y_true, y_pred, sample_weight=None):
102 """Invokes the `Loss` instance.
104 Args:
105 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`, except
106 sparse loss functions such as sparse categorical crossentropy where
107 shape = `[batch_size, d0, .. dN-1]`
108 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`
109 sample_weight: Optional `sample_weight` acts as a coefficient for the
110 loss. If a scalar is provided, then the loss is simply scaled by the
111 given value. If `sample_weight` is a tensor of size `[batch_size]`,
112 then the total loss for each sample of the batch is rescaled by the
113 corresponding element in the `sample_weight` vector. If the shape of
114 `sample_weight` is `[batch_size, d0, .. dN-1]` (or can be
115 broadcasted to this shape), then each loss element of `y_pred` is
116 scaled by the corresponding value of `sample_weight`. (Note
117 on`dN-1`: all loss functions reduce by 1 dimension, usually
118 axis=-1.)
120 Returns:
121 Weighted loss float `Tensor`. If `reduction` is `NONE`, this has
122 shape `[batch_size, d0, .. dN-1]`; otherwise, it is scalar. (Note
123 `dN-1` because all loss functions reduce by 1 dimension, usually
124 axis=-1.)
126 Raises:
127 ValueError: If the shape of `sample_weight` is invalid.
128 """
129 # If we are wrapping a lambda function strip '<>' from the name as it is
130 # not accepted in scope name.
131 graph_ctx = tf_utils.graph_context_for_symbolic_tensors(
132 y_true, y_pred, sample_weight
133 )
134 with backend.name_scope(self._name_scope), graph_ctx:
135 if tf.executing_eagerly():
136 call_fn = self.call
137 else:
138 call_fn = tf.__internal__.autograph.tf_convert(
139 self.call, tf.__internal__.autograph.control_status_ctx()
140 )
142 losses = call_fn(y_true, y_pred)
144 in_mask = losses_utils.get_mask(y_pred)
145 out_mask = losses_utils.get_mask(losses)
147 if in_mask is not None and out_mask is not None:
148 mask = in_mask & out_mask
149 elif in_mask is not None:
150 mask = in_mask
151 elif out_mask is not None:
152 mask = out_mask
153 else:
154 mask = None
156 reduction = self._get_reduction()
157 sample_weight = losses_utils.apply_valid_mask(
158 losses, sample_weight, mask, reduction
159 )
160 return losses_utils.compute_weighted_loss(
161 losses, sample_weight, reduction=reduction
162 )
164 @classmethod
165 def from_config(cls, config):
166 """Instantiates a `Loss` from its config (output of `get_config()`).
168 Args:
169 config: Output of `get_config()`.
171 Returns:
172 A `Loss` instance.
173 """
174 return cls(**config)
176 def get_config(self):
177 """Returns the config dictionary for a `Loss` instance."""
178 return {"reduction": self.reduction, "name": self.name}
180 @abc.abstractmethod
181 @doc_controls.for_subclass_implementers
182 def call(self, y_true, y_pred):
183 """Invokes the `Loss` instance.
185 Args:
186 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`, except
187 sparse loss functions such as sparse categorical crossentropy where
188 shape = `[batch_size, d0, .. dN-1]`
189 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`
191 Returns:
192 Loss values with the shape `[batch_size, d0, .. dN-1]`.
193 """
194 raise NotImplementedError("Must be implemented in subclasses.")
196 def _get_reduction(self):
197 """Handles `AUTO` reduction cases and returns the reduction value."""
198 if (
199 not self._allow_sum_over_batch_size
200 and tf.distribute.has_strategy()
201 and (
202 self.reduction == losses_utils.ReductionV2.AUTO
203 or self.reduction
204 == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE
205 )
206 ):
207 raise ValueError(
208 "Please use `tf.keras.losses.Reduction.SUM` or "
209 "`tf.keras.losses.Reduction.NONE` for loss reduction when "
210 "losses are used with `tf.distribute.Strategy`, "
211 "except for specifying losses in `Model.compile()` "
212 "for use by the built-in training looop `Model.fit()`.\n"
213 "Please see https://www.tensorflow.org/tutorials"
214 "/distribute/custom_training for more details."
215 )
217 if self.reduction == losses_utils.ReductionV2.AUTO:
218 return losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE
219 return self.reduction
222@keras_export("keras.__internal__.losses.LossFunctionWrapper", v1=[])
223class LossFunctionWrapper(Loss):
224 """Wraps a loss function in the `Loss` class."""
226 def __init__(
227 self, fn, reduction=losses_utils.ReductionV2.AUTO, name=None, **kwargs
228 ):
229 """Initializes `LossFunctionWrapper` class.
231 Args:
232 fn: The loss function to wrap, with signature `fn(y_true, y_pred,
233 **kwargs)`.
234 reduction: Type of `tf.keras.losses.Reduction` to apply to
235 loss. Default value is `AUTO`. `AUTO` indicates that the reduction
236 option will be determined by the usage context. For almost all cases
237 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a
238 `tf.distribute.Strategy`, except via `Model.compile()` and
239 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
240 will raise an error. Please see this custom training [tutorial](
241 https://www.tensorflow.org/tutorials/distribute/custom_training)
242 for more details.
243 name: Optional name for the instance.
244 **kwargs: The keyword arguments that are passed on to `fn`.
245 """
246 super().__init__(reduction=reduction, name=name)
247 self.fn = fn
248 self._fn_kwargs = kwargs
250 def call(self, y_true, y_pred):
251 """Invokes the `LossFunctionWrapper` instance.
253 Args:
254 y_true: Ground truth values.
255 y_pred: The predicted values.
257 Returns:
258 Loss values per sample.
259 """
260 if tf.is_tensor(y_pred) and tf.is_tensor(y_true):
261 y_pred, y_true = losses_utils.squeeze_or_expand_dimensions(
262 y_pred, y_true
263 )
265 ag_fn = tf.__internal__.autograph.tf_convert(
266 self.fn, tf.__internal__.autograph.control_status_ctx()
267 )
268 return ag_fn(y_true, y_pred, **self._fn_kwargs)
270 def get_config(self):
271 config = {}
272 for k, v in self._fn_kwargs.items():
273 config[k] = (
274 backend.eval(v) if tf_utils.is_tensor_or_variable(v) else v
275 )
277 if saving_lib.saving_v3_enabled():
278 from keras.src.utils import get_registered_name
280 config["fn"] = get_registered_name(self.fn)
282 base_config = super().get_config()
283 return dict(list(base_config.items()) + list(config.items()))
285 @classmethod
286 def from_config(cls, config):
287 """Instantiates a `Loss` from its config (output of `get_config()`).
289 Args:
290 config: Output of `get_config()`.
292 Returns:
293 A `keras.losses.Loss` instance.
294 """
295 if saving_lib.saving_v3_enabled():
296 fn_name = config.pop("fn", None)
297 if fn_name and cls is LossFunctionWrapper:
298 config["fn"] = get(fn_name)
299 return cls(**config)
302@keras_export("keras.losses.MeanSquaredError")
303class MeanSquaredError(LossFunctionWrapper):
304 """Computes the mean of squares of errors between labels and predictions.
306 `loss = mean(square(y_true - y_pred))`
308 Standalone usage:
310 >>> y_true = [[0., 1.], [0., 0.]]
311 >>> y_pred = [[1., 1.], [1., 0.]]
312 >>> # Using 'auto'/'sum_over_batch_size' reduction type.
313 >>> mse = tf.keras.losses.MeanSquaredError()
314 >>> mse(y_true, y_pred).numpy()
315 0.5
317 >>> # Calling with 'sample_weight'.
318 >>> mse(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy()
319 0.25
321 >>> # Using 'sum' reduction type.
322 >>> mse = tf.keras.losses.MeanSquaredError(
323 ... reduction=tf.keras.losses.Reduction.SUM)
324 >>> mse(y_true, y_pred).numpy()
325 1.0
327 >>> # Using 'none' reduction type.
328 >>> mse = tf.keras.losses.MeanSquaredError(
329 ... reduction=tf.keras.losses.Reduction.NONE)
330 >>> mse(y_true, y_pred).numpy()
331 array([0.5, 0.5], dtype=float32)
333 Usage with the `compile()` API:
335 ```python
336 model.compile(optimizer='sgd', loss=tf.keras.losses.MeanSquaredError())
337 ```
338 """
340 def __init__(
341 self, reduction=losses_utils.ReductionV2.AUTO, name="mean_squared_error"
342 ):
343 """Initializes `MeanSquaredError` instance.
345 Args:
346 reduction: Type of `tf.keras.losses.Reduction` to apply to
347 loss. Default value is `AUTO`. `AUTO` indicates that the reduction
348 option will be determined by the usage context. For almost all cases
349 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a
350 `tf.distribute.Strategy`, except via `Model.compile()` and
351 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
352 will raise an error. Please see this custom training [tutorial](
353 https://www.tensorflow.org/tutorials/distribute/custom_training)
354 for more details.
355 name: Optional name for the instance. Defaults to
356 'mean_squared_error'.
357 """
358 super().__init__(mean_squared_error, name=name, reduction=reduction)
361@keras_export("keras.losses.MeanAbsoluteError")
362class MeanAbsoluteError(LossFunctionWrapper):
363 """Computes the mean of absolute difference between labels and predictions.
365 `loss = mean(abs(y_true - y_pred))`
367 Standalone usage:
369 >>> y_true = [[0., 1.], [0., 0.]]
370 >>> y_pred = [[1., 1.], [1., 0.]]
371 >>> # Using 'auto'/'sum_over_batch_size' reduction type.
372 >>> mae = tf.keras.losses.MeanAbsoluteError()
373 >>> mae(y_true, y_pred).numpy()
374 0.5
376 >>> # Calling with 'sample_weight'.
377 >>> mae(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy()
378 0.25
380 >>> # Using 'sum' reduction type.
381 >>> mae = tf.keras.losses.MeanAbsoluteError(
382 ... reduction=tf.keras.losses.Reduction.SUM)
383 >>> mae(y_true, y_pred).numpy()
384 1.0
386 >>> # Using 'none' reduction type.
387 >>> mae = tf.keras.losses.MeanAbsoluteError(
388 ... reduction=tf.keras.losses.Reduction.NONE)
389 >>> mae(y_true, y_pred).numpy()
390 array([0.5, 0.5], dtype=float32)
392 Usage with the `compile()` API:
394 ```python
395 model.compile(optimizer='sgd', loss=tf.keras.losses.MeanAbsoluteError())
396 ```
397 """
399 def __init__(
400 self,
401 reduction=losses_utils.ReductionV2.AUTO,
402 name="mean_absolute_error",
403 ):
404 """Initializes `MeanAbsoluteError` instance.
406 Args:
407 reduction: Type of `tf.keras.losses.Reduction` to apply to
408 loss. Default value is `AUTO`. `AUTO` indicates that the reduction
409 option will be determined by the usage context. For almost all cases
410 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a
411 `tf.distribute.Strategy`, except via `Model.compile()` and
412 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
413 will raise an error. Please see this custom training [tutorial](
414 https://www.tensorflow.org/tutorials/distribute/custom_training)
415 for more details.
416 name: Optional name for the instance. Defaults to
417 'mean_absolute_error'.
418 """
419 super().__init__(mean_absolute_error, name=name, reduction=reduction)
422@keras_export("keras.losses.MeanAbsolutePercentageError")
423class MeanAbsolutePercentageError(LossFunctionWrapper):
424 """Computes the mean absolute percentage error between `y_true` & `y_pred`.
426 Formula:
428 `loss = 100 * abs((y_true - y_pred) / y_true)`
430 Note that to avoid dividing by zero, a small epsilon value
431 is added to the denominator.
433 Standalone usage:
435 >>> y_true = [[2., 1.], [2., 3.]]
436 >>> y_pred = [[1., 1.], [1., 0.]]
437 >>> # Using 'auto'/'sum_over_batch_size' reduction type.
438 >>> mape = tf.keras.losses.MeanAbsolutePercentageError()
439 >>> mape(y_true, y_pred).numpy()
440 50.
442 >>> # Calling with 'sample_weight'.
443 >>> mape(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy()
444 20.
446 >>> # Using 'sum' reduction type.
447 >>> mape = tf.keras.losses.MeanAbsolutePercentageError(
448 ... reduction=tf.keras.losses.Reduction.SUM)
449 >>> mape(y_true, y_pred).numpy()
450 100.
452 >>> # Using 'none' reduction type.
453 >>> mape = tf.keras.losses.MeanAbsolutePercentageError(
454 ... reduction=tf.keras.losses.Reduction.NONE)
455 >>> mape(y_true, y_pred).numpy()
456 array([25., 75.], dtype=float32)
458 Usage with the `compile()` API:
460 ```python
461 model.compile(optimizer='sgd',
462 loss=tf.keras.losses.MeanAbsolutePercentageError())
463 ```
464 """
466 def __init__(
467 self,
468 reduction=losses_utils.ReductionV2.AUTO,
469 name="mean_absolute_percentage_error",
470 ):
471 """Initializes `MeanAbsolutePercentageError` instance.
473 Args:
474 reduction: Type of `tf.keras.losses.Reduction` to apply to
475 loss. Default value is `AUTO`. `AUTO` indicates that the reduction
476 option will be determined by the usage context. For almost all cases
477 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a
478 `tf.distribute.Strategy`, except via `Model.compile()` and
479 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
480 will raise an error. Please see this custom training [tutorial](
481 https://www.tensorflow.org/tutorials/distribute/custom_training)
482 for more details.
483 name: Optional name for the instance. Defaults to
484 'mean_absolute_percentage_error'.
485 """
486 super().__init__(
487 mean_absolute_percentage_error, name=name, reduction=reduction
488 )
491@keras_export("keras.losses.MeanSquaredLogarithmicError")
492class MeanSquaredLogarithmicError(LossFunctionWrapper):
493 """Computes the mean squared logarithmic error between `y_true` & `y_pred`.
495 `loss = square(log(y_true + 1.) - log(y_pred + 1.))`
497 Standalone usage:
499 >>> y_true = [[0., 1.], [0., 0.]]
500 >>> y_pred = [[1., 1.], [1., 0.]]
501 >>> # Using 'auto'/'sum_over_batch_size' reduction type.
502 >>> msle = tf.keras.losses.MeanSquaredLogarithmicError()
503 >>> msle(y_true, y_pred).numpy()
504 0.240
506 >>> # Calling with 'sample_weight'.
507 >>> msle(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy()
508 0.120
510 >>> # Using 'sum' reduction type.
511 >>> msle = tf.keras.losses.MeanSquaredLogarithmicError(
512 ... reduction=tf.keras.losses.Reduction.SUM)
513 >>> msle(y_true, y_pred).numpy()
514 0.480
516 >>> # Using 'none' reduction type.
517 >>> msle = tf.keras.losses.MeanSquaredLogarithmicError(
518 ... reduction=tf.keras.losses.Reduction.NONE)
519 >>> msle(y_true, y_pred).numpy()
520 array([0.240, 0.240], dtype=float32)
522 Usage with the `compile()` API:
524 ```python
525 model.compile(optimizer='sgd',
526 loss=tf.keras.losses.MeanSquaredLogarithmicError())
527 ```
528 """
530 def __init__(
531 self,
532 reduction=losses_utils.ReductionV2.AUTO,
533 name="mean_squared_logarithmic_error",
534 ):
535 """Initializes `MeanSquaredLogarithmicError` instance.
537 Args:
538 reduction: Type of `tf.keras.losses.Reduction` to apply to
539 loss. Default value is `AUTO`. `AUTO` indicates that the reduction
540 option will be determined by the usage context. For almost all cases
541 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a
542 `tf.distribute.Strategy`, except via `Model.compile()` and
543 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
544 will raise an error. Please see this custom training [tutorial](
545 https://www.tensorflow.org/tutorials/distribute/custom_training)
546 for more details.
547 name: Optional name for the instance. Defaults to
548 'mean_squared_logarithmic_error'.
549 """
550 super().__init__(
551 mean_squared_logarithmic_error, name=name, reduction=reduction
552 )
555@keras_export("keras.losses.BinaryCrossentropy")
556class BinaryCrossentropy(LossFunctionWrapper):
557 """Computes the cross-entropy loss between true labels and predicted labels.
559 Use this cross-entropy loss for binary (0 or 1) classification applications.
560 The loss function requires the following inputs:
562 - `y_true` (true label): This is either 0 or 1.
563 - `y_pred` (predicted value): This is the model's prediction, i.e, a single
564 floating-point value which either represents a
565 [logit](https://en.wikipedia.org/wiki/Logit), (i.e, value in [-inf, inf]
566 when `from_logits=True`) or a probability (i.e, value in [0., 1.] when
567 `from_logits=False`).
569 **Recommended Usage:** (set `from_logits=True`)
571 With `tf.keras` API:
573 ```python
574 model.compile(
575 loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
576 ....
577 )
578 ```
580 As a standalone function:
582 >>> # Example 1: (batch_size = 1, number of samples = 4)
583 >>> y_true = [0, 1, 0, 0]
584 >>> y_pred = [-18.6, 0.51, 2.94, -12.8]
585 >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True)
586 >>> bce(y_true, y_pred).numpy()
587 0.865
589 >>> # Example 2: (batch_size = 2, number of samples = 4)
590 >>> y_true = [[0, 1], [0, 0]]
591 >>> y_pred = [[-18.6, 0.51], [2.94, -12.8]]
592 >>> # Using default 'auto'/'sum_over_batch_size' reduction type.
593 >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True)
594 >>> bce(y_true, y_pred).numpy()
595 0.865
596 >>> # Using 'sample_weight' attribute
597 >>> bce(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()
598 0.243
599 >>> # Using 'sum' reduction` type.
600 >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True,
601 ... reduction=tf.keras.losses.Reduction.SUM)
602 >>> bce(y_true, y_pred).numpy()
603 1.730
604 >>> # Using 'none' reduction type.
605 >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True,
606 ... reduction=tf.keras.losses.Reduction.NONE)
607 >>> bce(y_true, y_pred).numpy()
608 array([0.235, 1.496], dtype=float32)
610 **Default Usage:** (set `from_logits=False`)
612 >>> # Make the following updates to the above "Recommended Usage" section
613 >>> # 1. Set `from_logits=False`
614 >>> tf.keras.losses.BinaryCrossentropy() # OR ...('from_logits=False')
615 >>> # 2. Update `y_pred` to use probabilities instead of logits
616 >>> y_pred = [0.6, 0.3, 0.2, 0.8] # OR [[0.6, 0.3], [0.2, 0.8]]
617 """
619 def __init__(
620 self,
621 from_logits=False,
622 label_smoothing=0.0,
623 axis=-1,
624 reduction=losses_utils.ReductionV2.AUTO,
625 name="binary_crossentropy",
626 ):
627 """Initializes `BinaryCrossentropy` instance.
629 Args:
630 from_logits: Whether to interpret `y_pred` as a tensor of
631 [logit](https://en.wikipedia.org/wiki/Logit) values. By default, we
632 assume that `y_pred` contains probabilities (i.e., values in [0,
633 1]).
634 label_smoothing: Float in [0, 1]. When 0, no smoothing occurs. When >
635 0, we compute the loss between the predicted labels and a smoothed
636 version of the true labels, where the smoothing squeezes the labels
637 towards 0.5. Larger values of `label_smoothing` correspond to
638 heavier smoothing.
639 axis: The axis along which to compute crossentropy (the features
640 axis). Defaults to -1.
641 reduction: Type of `tf.keras.losses.Reduction` to apply to
642 loss. Default value is `AUTO`. `AUTO` indicates that the reduction
643 option will be determined by the usage context. For almost all cases
644 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a
645 `tf.distribute.Strategy`, except via `Model.compile()` and
646 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
647 will raise an error. Please see this custom training [tutorial](
648 https://www.tensorflow.org/tutorials/distribute/custom_training)
649 for more details.
650 name: Name for the op. Defaults to 'binary_crossentropy'.
651 """
652 super().__init__(
653 binary_crossentropy,
654 name=name,
655 reduction=reduction,
656 from_logits=from_logits,
657 label_smoothing=label_smoothing,
658 axis=axis,
659 )
660 self.from_logits = from_logits
663@keras_export("keras.losses.BinaryFocalCrossentropy")
664class BinaryFocalCrossentropy(LossFunctionWrapper):
665 """Computes focal cross-entropy loss between true labels and predictions.
667 Binary cross-entropy loss is often used for binary (0 or 1) classification
668 tasks. The loss function requires the following inputs:
670 - `y_true` (true label): This is either 0 or 1.
671 - `y_pred` (predicted value): This is the model's prediction, i.e, a single
672 floating-point value which either represents a
673 [logit](https://en.wikipedia.org/wiki/Logit), (i.e, value in [-inf, inf]
674 when `from_logits=True`) or a probability (i.e, value in `[0., 1.]` when
675 `from_logits=False`).
677 According to [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf), it
678 helps to apply a "focal factor" to down-weight easy examples and focus more
679 on hard examples. By default, the focal tensor is computed as follows:
681 `focal_factor = (1 - output) ** gamma` for class 1
682 `focal_factor = output ** gamma` for class 0
683 where `gamma` is a focusing parameter. When `gamma=0`, this function is
684 equivalent to the binary crossentropy loss.
686 With the `compile()` API:
688 ```python
689 model.compile(
690 loss=tf.keras.losses.BinaryFocalCrossentropy(gamma=2.0, from_logits=True),
691 ....
692 )
693 ```
695 As a standalone function:
697 >>> # Example 1: (batch_size = 1, number of samples = 4)
698 >>> y_true = [0, 1, 0, 0]
699 >>> y_pred = [-18.6, 0.51, 2.94, -12.8]
700 >>> loss = tf.keras.losses.BinaryFocalCrossentropy(gamma=2,
701 ... from_logits=True)
702 >>> loss(y_true, y_pred).numpy()
703 0.691
705 >>> # Apply class weight
706 >>> loss = tf.keras.losses.BinaryFocalCrossentropy(
707 ... apply_class_balancing=True, gamma=2, from_logits=True)
708 >>> loss(y_true, y_pred).numpy()
709 0.51
711 >>> # Example 2: (batch_size = 2, number of samples = 4)
712 >>> y_true = [[0, 1], [0, 0]]
713 >>> y_pred = [[-18.6, 0.51], [2.94, -12.8]]
714 >>> # Using default 'auto'/'sum_over_batch_size' reduction type.
715 >>> loss = tf.keras.losses.BinaryFocalCrossentropy(gamma=3,
716 ... from_logits=True)
717 >>> loss(y_true, y_pred).numpy()
718 0.647
720 >>> # Apply class weight
721 >>> loss = tf.keras.losses.BinaryFocalCrossentropy(
722 ... apply_class_balancing=True, gamma=3, from_logits=True)
723 >>> loss(y_true, y_pred).numpy()
724 0.482
726 >>> # Using 'sample_weight' attribute with focal effect
727 >>> loss = tf.keras.losses.BinaryFocalCrossentropy(gamma=3,
728 ... from_logits=True)
729 >>> loss(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()
730 0.133
732 >>> # Apply class weight
733 >>> loss = tf.keras.losses.BinaryFocalCrossentropy(
734 ... apply_class_balancing=True, gamma=3, from_logits=True)
735 >>> loss(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()
736 0.097
738 >>> # Using 'sum' reduction` type.
739 >>> loss = tf.keras.losses.BinaryFocalCrossentropy(gamma=4,
740 ... from_logits=True,
741 ... reduction=tf.keras.losses.Reduction.SUM)
742 >>> loss(y_true, y_pred).numpy()
743 1.222
745 >>> # Apply class weight
746 >>> loss = tf.keras.losses.BinaryFocalCrossentropy(
747 ... apply_class_balancing=True, gamma=4, from_logits=True,
748 ... reduction=tf.keras.losses.Reduction.SUM)
749 >>> loss(y_true, y_pred).numpy()
750 0.914
752 >>> # Using 'none' reduction type.
753 >>> loss = tf.keras.losses.BinaryFocalCrossentropy(
754 ... gamma=5, from_logits=True,
755 ... reduction=tf.keras.losses.Reduction.NONE)
756 >>> loss(y_true, y_pred).numpy()
757 array([0.0017 1.1561], dtype=float32)
759 >>> # Apply class weight
760 >>> loss = tf.keras.losses.BinaryFocalCrossentropy(
761 ... apply_class_balancing=True, gamma=5, from_logits=True,
762 ... reduction=tf.keras.losses.Reduction.NONE)
763 >>> loss(y_true, y_pred).numpy()
764 array([0.0004 0.8670], dtype=float32)
767 Args:
768 apply_class_balancing: A bool, whether to apply weight balancing on the
769 binary classes 0 and 1.
770 alpha: A weight balancing factor for class 1, default is `0.25` as
771 mentioned in reference [Lin et al., 2018](
772 https://arxiv.org/pdf/1708.02002.pdf). The weight for class 0 is
773 `1.0 - alpha`.
774 gamma: A focusing parameter used to compute the focal factor, default is
775 `2.0` as mentioned in the reference
776 [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf).
777 from_logits: Whether to interpret `y_pred` as a tensor of
778 [logit](https://en.wikipedia.org/wiki/Logit) values. By default, we
779 assume that `y_pred` are probabilities (i.e., values in `[0, 1]`).
780 label_smoothing: Float in `[0, 1]`. When `0`, no smoothing occurs. When >
781 `0`, we compute the loss between the predicted labels and a smoothed
782 version of the true labels, where the smoothing squeezes the labels
783 towards `0.5`. Larger values of `label_smoothing` correspond to heavier
784 smoothing.
785 axis: The axis along which to compute crossentropy (the features axis).
786 Defaults to `-1`.
787 reduction: Type of `tf.keras.losses.Reduction` to apply to
788 loss. Default value is `AUTO`. `AUTO` indicates that the reduction
789 option will be determined by the usage context. For almost all cases
790 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a
791 `tf.distribute.Strategy`, except via `Model.compile()` and
792 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
793 will raise an error. Please see this custom training [tutorial](
794 https://www.tensorflow.org/tutorials/distribute/custom_training)
795 for more details.
796 name: Name for the op. Defaults to 'binary_focal_crossentropy'.
797 """
799 def __init__(
800 self,
801 apply_class_balancing=False,
802 alpha=0.25,
803 gamma=2.0,
804 from_logits=False,
805 label_smoothing=0.0,
806 axis=-1,
807 reduction=losses_utils.ReductionV2.AUTO,
808 name="binary_focal_crossentropy",
809 ):
810 """Initializes `BinaryFocalCrossentropy` instance."""
811 super().__init__(
812 binary_focal_crossentropy,
813 apply_class_balancing=apply_class_balancing,
814 alpha=alpha,
815 gamma=gamma,
816 name=name,
817 reduction=reduction,
818 from_logits=from_logits,
819 label_smoothing=label_smoothing,
820 axis=axis,
821 )
822 self.from_logits = from_logits
823 self.apply_class_balancing = apply_class_balancing
824 self.alpha = alpha
825 self.gamma = gamma
827 def get_config(self):
828 config = {
829 "apply_class_balancing": self.apply_class_balancing,
830 "alpha": self.alpha,
831 "gamma": self.gamma,
832 }
833 base_config = super().get_config()
834 return dict(list(base_config.items()) + list(config.items()))
837@keras_export("keras.losses.CategoricalCrossentropy")
838class CategoricalCrossentropy(LossFunctionWrapper):
839 """Computes the crossentropy loss between the labels and predictions.
841 Use this crossentropy loss function when there are two or more label
842 classes. We expect labels to be provided in a `one_hot` representation. If
843 you want to provide labels as integers, please use
844 `SparseCategoricalCrossentropy` loss. There should be `# classes` floating
845 point values per feature.
847 In the snippet below, there is `# classes` floating pointing values per
848 example. The shape of both `y_pred` and `y_true` are
849 `[batch_size, num_classes]`.
851 Standalone usage:
853 >>> y_true = [[0, 1, 0], [0, 0, 1]]
854 >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]
855 >>> # Using 'auto'/'sum_over_batch_size' reduction type.
856 >>> cce = tf.keras.losses.CategoricalCrossentropy()
857 >>> cce(y_true, y_pred).numpy()
858 1.177
860 >>> # Calling with 'sample_weight'.
861 >>> cce(y_true, y_pred, sample_weight=tf.constant([0.3, 0.7])).numpy()
862 0.814
864 >>> # Using 'sum' reduction type.
865 >>> cce = tf.keras.losses.CategoricalCrossentropy(
866 ... reduction=tf.keras.losses.Reduction.SUM)
867 >>> cce(y_true, y_pred).numpy()
868 2.354
870 >>> # Using 'none' reduction type.
871 >>> cce = tf.keras.losses.CategoricalCrossentropy(
872 ... reduction=tf.keras.losses.Reduction.NONE)
873 >>> cce(y_true, y_pred).numpy()
874 array([0.0513, 2.303], dtype=float32)
876 Usage with the `compile()` API:
878 ```python
879 model.compile(optimizer='sgd',
880 loss=tf.keras.losses.CategoricalCrossentropy())
881 ```
882 """
884 def __init__(
885 self,
886 from_logits=False,
887 label_smoothing=0.0,
888 axis=-1,
889 reduction=losses_utils.ReductionV2.AUTO,
890 name="categorical_crossentropy",
891 ):
892 """Initializes `CategoricalCrossentropy` instance.
894 Args:
895 from_logits: Whether `y_pred` is expected to be a logits tensor. By
896 default, we assume that `y_pred` encodes a probability distribution.
897 label_smoothing: Float in [0, 1]. When > 0, label values are smoothed,
898 meaning the confidence on label values are relaxed. For example, if
899 `0.1`, use `0.1 / num_classes` for non-target labels and
900 `0.9 + 0.1 / num_classes` for target labels.
901 axis: The axis along which to compute crossentropy (the features
902 axis). Defaults to -1.
903 reduction: Type of `tf.keras.losses.Reduction` to apply to
904 loss. Default value is `AUTO`. `AUTO` indicates that the reduction
905 option will be determined by the usage context. For almost all cases
906 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a
907 `tf.distribute.Strategy`, except via `Model.compile()` and
908 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
909 will raise an error. Please see this custom training [tutorial](
910 https://www.tensorflow.org/tutorials/distribute/custom_training)
911 for more details.
912 name: Optional name for the instance.
913 Defaults to 'categorical_crossentropy'.
914 """
915 super().__init__(
916 categorical_crossentropy,
917 name=name,
918 reduction=reduction,
919 from_logits=from_logits,
920 label_smoothing=label_smoothing,
921 axis=axis,
922 )
925@keras_export("keras.losses.CategoricalFocalCrossentropy")
926class CategoricalFocalCrossentropy(LossFunctionWrapper):
927 """Computes the alpha balanced focal crossentropy loss.
929 Use this crossentropy loss function when there are two or more label
930 classes and if you want to handle class imbalance without using
931 `class_weights`. We expect labels to be provided in a `one_hot`
932 representation.
934 According to [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf), it
935 helps to apply a focal factor to down-weight easy examples and focus more on
936 hard examples. The general formula for the focal loss (FL)
937 is as follows:
939 `FL(p_t) = (1 − p_t)^gamma * log(p_t)`
941 where `p_t` is defined as follows:
942 `p_t = output if y_true == 1, else 1 - output`
944 `(1 − p_t)^gamma` is the `modulating_factor`, where `gamma` is a focusing
945 parameter. When `gamma` = 0, there is no focal effect on the cross entropy.
946 `gamma` reduces the importance given to simple examples in a smooth manner.
948 The authors use alpha-balanced variant of focal loss (FL) in the paper:
949 `FL(p_t) = −alpha * (1 − p_t)^gamma * log(p_t)`
951 where `alpha` is the weight factor for the classes. If `alpha` = 1, the
952 loss won't be able to handle class imbalance properly as all
953 classes will have the same weight. This can be a constant or a list of
954 constants. If alpha is a list, it must have the same length as the number
955 of classes.
957 The formula above can be generalized to:
958 `FL(p_t) = alpha * (1 − p_t)^gamma * CrossEntropy(y_true, y_pred)`
960 where minus comes from `CrossEntropy(y_true, y_pred)` (CE).
962 Extending this to multi-class case is straightforward:
963 `FL(p_t) = alpha * (1 − p_t)^gamma * CategoricalCE(y_true, y_pred)`
965 In the snippet below, there is `# classes` floating pointing values per
966 example. The shape of both `y_pred` and `y_true` are
967 `[batch_size, num_classes]`.
969 Standalone usage:
971 >>> y_true = [[0., 1., 0.], [0., 0., 1.]]
972 >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]
973 >>> # Using 'auto'/'sum_over_batch_size' reduction type.
974 >>> cce = tf.keras.losses.CategoricalFocalCrossentropy()
975 >>> cce(y_true, y_pred).numpy()
976 0.23315276
978 >>> # Calling with 'sample_weight'.
979 >>> cce(y_true, y_pred, sample_weight=tf.constant([0.3, 0.7])).numpy()
980 0.1632
982 >>> # Using 'sum' reduction type.
983 >>> cce = tf.keras.losses.CategoricalFocalCrossentropy(
984 ... reduction=tf.keras.losses.Reduction.SUM)
985 >>> cce(y_true, y_pred).numpy()
986 0.46631
988 >>> # Using 'none' reduction type.
989 >>> cce = tf.keras.losses.CategoricalFocalCrossentropy(
990 ... reduction=tf.keras.losses.Reduction.NONE)
991 >>> cce(y_true, y_pred).numpy()
992 array([3.2058331e-05, 4.6627346e-01], dtype=float32)
994 Usage with the `compile()` API:
995 ```python
996 model.compile(optimizer='adam',
997 loss=tf.keras.losses.CategoricalFocalCrossentropy())
998 ```
999 Args:
1000 alpha: A weight balancing factor for all classes, default is `0.25` as
1001 mentioned in the reference. It can be a list of floats or a scalar.
1002 In the multi-class case, alpha may be set by inverse class
1003 frequency by using `compute_class_weight` from `sklearn.utils`.
1004 gamma: A focusing parameter, default is `2.0` as mentioned in the
1005 reference. It helps to gradually reduce the importance given to
1006 simple (easy) examples in a smooth manner.
1007 from_logits: Whether `output` is expected to be a logits tensor. By
1008 default, we consider that `output` encodes a probability
1009 distribution.
1010 label_smoothing: Float in [0, 1]. When > 0, label values are smoothed,
1011 meaning the confidence on label values are relaxed. For example, if
1012 `0.1`, use `0.1 / num_classes` for non-target labels and
1013 `0.9 + 0.1 / num_classes` for target labels.
1014 axis: The axis along which to compute crossentropy (the features
1015 axis). Defaults to -1.
1016 reduction: Type of `tf.keras.losses.Reduction` to apply to
1017 loss. Default value is `AUTO`. `AUTO` indicates that the reduction
1018 option will be determined by the usage context. For almost all cases
1019 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a
1020 `tf.distribute.Strategy`, except via `Model.compile()` and
1021 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
1022 will raise an error. Please see this custom training [tutorial](
1023 https://www.tensorflow.org/tutorials/distribute/custom_training)
1024 for more details.
1025 name: Optional name for the instance.
1026 Defaults to 'categorical_focal_crossentropy'.
1027 """
1029 def __init__(
1030 self,
1031 alpha=0.25,
1032 gamma=2.0,
1033 from_logits=False,
1034 label_smoothing=0.0,
1035 axis=-1,
1036 reduction=losses_utils.ReductionV2.AUTO,
1037 name="categorical_focal_crossentropy",
1038 ):
1039 """Initializes `CategoricalFocalCrossentropy` instance."""
1040 super().__init__(
1041 categorical_focal_crossentropy,
1042 alpha=alpha,
1043 gamma=gamma,
1044 name=name,
1045 reduction=reduction,
1046 from_logits=from_logits,
1047 label_smoothing=label_smoothing,
1048 axis=axis,
1049 )
1050 self.from_logits = from_logits
1051 self.alpha = alpha
1052 self.gamma = gamma
1054 def get_config(self):
1055 config = {
1056 "alpha": self.alpha,
1057 "gamma": self.gamma,
1058 }
1059 base_config = super().get_config()
1060 return dict(list(base_config.items()) + list(config.items()))
1063@keras_export("keras.losses.SparseCategoricalCrossentropy")
1064class SparseCategoricalCrossentropy(LossFunctionWrapper):
1065 """Computes the crossentropy loss between the labels and predictions.
1067 Use this crossentropy loss function when there are two or more label
1068 classes. We expect labels to be provided as integers. If you want to
1069 provide labels using `one-hot` representation, please use
1070 `CategoricalCrossentropy` loss. There should be `# classes` floating point
1071 values per feature for `y_pred` and a single floating point value per
1072 feature for `y_true`.
1074 In the snippet below, there is a single floating point value per example for
1075 `y_true` and `# classes` floating pointing values per example for `y_pred`.
1076 The shape of `y_true` is `[batch_size]` and the shape of `y_pred` is
1077 `[batch_size, num_classes]`.
1079 Standalone usage:
1081 >>> y_true = [1, 2]
1082 >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]
1083 >>> # Using 'auto'/'sum_over_batch_size' reduction type.
1084 >>> scce = tf.keras.losses.SparseCategoricalCrossentropy()
1085 >>> scce(y_true, y_pred).numpy()
1086 1.177
1088 >>> # Calling with 'sample_weight'.
1089 >>> scce(y_true, y_pred, sample_weight=tf.constant([0.3, 0.7])).numpy()
1090 0.814
1092 >>> # Using 'sum' reduction type.
1093 >>> scce = tf.keras.losses.SparseCategoricalCrossentropy(
1094 ... reduction=tf.keras.losses.Reduction.SUM)
1095 >>> scce(y_true, y_pred).numpy()
1096 2.354
1098 >>> # Using 'none' reduction type.
1099 >>> scce = tf.keras.losses.SparseCategoricalCrossentropy(
1100 ... reduction=tf.keras.losses.Reduction.NONE)
1101 >>> scce(y_true, y_pred).numpy()
1102 array([0.0513, 2.303], dtype=float32)
1104 Usage with the `compile()` API:
1106 ```python
1107 model.compile(optimizer='sgd',
1108 loss=tf.keras.losses.SparseCategoricalCrossentropy())
1109 ```
1110 """
1112 def __init__(
1113 self,
1114 from_logits=False,
1115 ignore_class=None,
1116 reduction=losses_utils.ReductionV2.AUTO,
1117 name="sparse_categorical_crossentropy",
1118 ):
1119 """Initializes `SparseCategoricalCrossentropy` instance.
1121 Args:
1122 from_logits: Whether `y_pred` is expected to be a logits tensor. By
1123 default, we assume that `y_pred` encodes a probability distribution.
1124 ignore_class: Optional integer. The ID of a class to be ignored during
1125 loss computation. This is useful, for example, in segmentation
1126 problems featuring a "void" class (commonly -1 or 255) in
1127 segmentation maps.
1128 By default (`ignore_class=None`), all classes are considered.
1129 reduction: Type of `tf.keras.losses.Reduction` to apply to
1130 loss. Default value is `AUTO`. `AUTO` indicates that the reduction
1131 option will be determined by the usage context. For almost all cases
1132 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a
1133 `tf.distribute.Strategy`, except via `Model.compile()` and
1134 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
1135 will raise an error. Please see this custom training [tutorial](
1136 https://www.tensorflow.org/tutorials/distribute/custom_training)
1137 for more details.
1138 name: Optional name for the instance. Defaults to
1139 'sparse_categorical_crossentropy'.
1140 """
1141 super().__init__(
1142 sparse_categorical_crossentropy,
1143 name=name,
1144 reduction=reduction,
1145 from_logits=from_logits,
1146 ignore_class=ignore_class,
1147 )
1150@keras_export("keras.losses.Hinge")
1151class Hinge(LossFunctionWrapper):
1152 """Computes the hinge loss between `y_true` & `y_pred`.
1154 `loss = maximum(1 - y_true * y_pred, 0)`
1156 `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are
1157 provided we will convert them to -1 or 1.
1159 Standalone usage:
1161 >>> y_true = [[0., 1.], [0., 0.]]
1162 >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
1163 >>> # Using 'auto'/'sum_over_batch_size' reduction type.
1164 >>> h = tf.keras.losses.Hinge()
1165 >>> h(y_true, y_pred).numpy()
1166 1.3
1168 >>> # Calling with 'sample_weight'.
1169 >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy()
1170 0.55
1172 >>> # Using 'sum' reduction type.
1173 >>> h = tf.keras.losses.Hinge(
1174 ... reduction=tf.keras.losses.Reduction.SUM)
1175 >>> h(y_true, y_pred).numpy()
1176 2.6
1178 >>> # Using 'none' reduction type.
1179 >>> h = tf.keras.losses.Hinge(
1180 ... reduction=tf.keras.losses.Reduction.NONE)
1181 >>> h(y_true, y_pred).numpy()
1182 array([1.1, 1.5], dtype=float32)
1184 Usage with the `compile()` API:
1186 ```python
1187 model.compile(optimizer='sgd', loss=tf.keras.losses.Hinge())
1188 ```
1189 """
1191 def __init__(self, reduction=losses_utils.ReductionV2.AUTO, name="hinge"):
1192 """Initializes `Hinge` instance.
1194 Args:
1195 reduction: Type of `tf.keras.losses.Reduction` to apply to
1196 loss. Default value is `AUTO`. `AUTO` indicates that the reduction
1197 option will be determined by the usage context. For almost all cases
1198 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a
1199 `tf.distribute.Strategy`, except via `Model.compile()` and
1200 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
1201 will raise an error. Please see this custom training [tutorial](
1202 https://www.tensorflow.org/tutorials/distribute/custom_training)
1203 for more details.
1204 name: Optional name for the instance. Defaults to 'hinge'.
1205 """
1206 super().__init__(hinge, name=name, reduction=reduction)
1209@keras_export("keras.losses.SquaredHinge")
1210class SquaredHinge(LossFunctionWrapper):
1211 """Computes the squared hinge loss between `y_true` & `y_pred`.
1213 `loss = square(maximum(1 - y_true * y_pred, 0))`
1215 `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are
1216 provided we will convert them to -1 or 1.
1218 Standalone usage:
1220 >>> y_true = [[0., 1.], [0., 0.]]
1221 >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
1222 >>> # Using 'auto'/'sum_over_batch_size' reduction type.
1223 >>> h = tf.keras.losses.SquaredHinge()
1224 >>> h(y_true, y_pred).numpy()
1225 1.86
1227 >>> # Calling with 'sample_weight'.
1228 >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy()
1229 0.73
1231 >>> # Using 'sum' reduction type.
1232 >>> h = tf.keras.losses.SquaredHinge(
1233 ... reduction=tf.keras.losses.Reduction.SUM)
1234 >>> h(y_true, y_pred).numpy()
1235 3.72
1237 >>> # Using 'none' reduction type.
1238 >>> h = tf.keras.losses.SquaredHinge(
1239 ... reduction=tf.keras.losses.Reduction.NONE)
1240 >>> h(y_true, y_pred).numpy()
1241 array([1.46, 2.26], dtype=float32)
1243 Usage with the `compile()` API:
1245 ```python
1246 model.compile(optimizer='sgd', loss=tf.keras.losses.SquaredHinge())
1247 ```
1248 """
1250 def __init__(
1251 self, reduction=losses_utils.ReductionV2.AUTO, name="squared_hinge"
1252 ):
1253 """Initializes `SquaredHinge` instance.
1255 Args:
1256 reduction: Type of `tf.keras.losses.Reduction` to apply to
1257 loss. Default value is `AUTO`. `AUTO` indicates that the reduction
1258 option will be determined by the usage context. For almost all cases
1259 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a
1260 `tf.distribute.Strategy`, except via `Model.compile()` and
1261 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
1262 will raise an error. Please see this custom training [tutorial](
1263 https://www.tensorflow.org/tutorials/distribute/custom_training)
1264 for more details.
1265 name: Optional name for the instance. Defaults to 'squared_hinge'.
1266 """
1267 super().__init__(squared_hinge, name=name, reduction=reduction)
1270@keras_export("keras.losses.CategoricalHinge")
1271class CategoricalHinge(LossFunctionWrapper):
1272 """Computes the categorical hinge loss between `y_true` & `y_pred`.
1274 `loss = maximum(neg - pos + 1, 0)`
1275 where `neg=maximum((1-y_true)*y_pred) and pos=sum(y_true*y_pred)`
1277 Standalone usage:
1279 >>> y_true = [[0, 1], [0, 0]]
1280 >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
1281 >>> # Using 'auto'/'sum_over_batch_size' reduction type.
1282 >>> h = tf.keras.losses.CategoricalHinge()
1283 >>> h(y_true, y_pred).numpy()
1284 1.4
1286 >>> # Calling with 'sample_weight'.
1287 >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy()
1288 0.6
1290 >>> # Using 'sum' reduction type.
1291 >>> h = tf.keras.losses.CategoricalHinge(
1292 ... reduction=tf.keras.losses.Reduction.SUM)
1293 >>> h(y_true, y_pred).numpy()
1294 2.8
1296 >>> # Using 'none' reduction type.
1297 >>> h = tf.keras.losses.CategoricalHinge(
1298 ... reduction=tf.keras.losses.Reduction.NONE)
1299 >>> h(y_true, y_pred).numpy()
1300 array([1.2, 1.6], dtype=float32)
1302 Usage with the `compile()` API:
1304 ```python
1305 model.compile(optimizer='sgd', loss=tf.keras.losses.CategoricalHinge())
1306 ```
1307 """
1309 def __init__(
1310 self, reduction=losses_utils.ReductionV2.AUTO, name="categorical_hinge"
1311 ):
1312 """Initializes `CategoricalHinge` instance.
1314 Args:
1315 reduction: Type of `tf.keras.losses.Reduction` to apply to
1316 loss. Default value is `AUTO`. `AUTO` indicates that the reduction
1317 option will be determined by the usage context. For almost all cases
1318 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a
1319 `tf.distribute.Strategy`, except via `Model.compile()` and
1320 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
1321 will raise an error. Please see this custom training [tutorial](
1322 https://www.tensorflow.org/tutorials/distribute/custom_training)
1323 for more details.
1324 name: Optional name for the instance. Defaults to 'categorical_hinge'.
1325 """
1326 super().__init__(categorical_hinge, name=name, reduction=reduction)
1329@keras_export("keras.losses.Poisson")
1330class Poisson(LossFunctionWrapper):
1331 """Computes the Poisson loss between `y_true` & `y_pred`.
1333 `loss = y_pred - y_true * log(y_pred)`
1335 Standalone usage:
1337 >>> y_true = [[0., 1.], [0., 0.]]
1338 >>> y_pred = [[1., 1.], [0., 0.]]
1339 >>> # Using 'auto'/'sum_over_batch_size' reduction type.
1340 >>> p = tf.keras.losses.Poisson()
1341 >>> p(y_true, y_pred).numpy()
1342 0.5
1344 >>> # Calling with 'sample_weight'.
1345 >>> p(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()
1346 0.4
1348 >>> # Using 'sum' reduction type.
1349 >>> p = tf.keras.losses.Poisson(
1350 ... reduction=tf.keras.losses.Reduction.SUM)
1351 >>> p(y_true, y_pred).numpy()
1352 0.999
1354 >>> # Using 'none' reduction type.
1355 >>> p = tf.keras.losses.Poisson(
1356 ... reduction=tf.keras.losses.Reduction.NONE)
1357 >>> p(y_true, y_pred).numpy()
1358 array([0.999, 0.], dtype=float32)
1360 Usage with the `compile()` API:
1362 ```python
1363 model.compile(optimizer='sgd', loss=tf.keras.losses.Poisson())
1364 ```
1365 """
1367 def __init__(self, reduction=losses_utils.ReductionV2.AUTO, name="poisson"):
1368 """Initializes `Poisson` instance.
1370 Args:
1371 reduction: Type of `tf.keras.losses.Reduction` to apply to
1372 loss. Default value is `AUTO`. `AUTO` indicates that the reduction
1373 option will be determined by the usage context. For almost all cases
1374 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a
1375 `tf.distribute.Strategy`, except via `Model.compile()` and
1376 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
1377 will raise an error. Please see this custom training [tutorial](
1378 https://www.tensorflow.org/tutorials/distribute/custom_training)
1379 for more details.
1380 name: Optional name for the instance. Defaults to 'poisson'.
1381 """
1382 super().__init__(poisson, name=name, reduction=reduction)
1385@keras_export("keras.losses.LogCosh")
1386class LogCosh(LossFunctionWrapper):
1387 """Computes the logarithm of the hyperbolic cosine of the prediction error.
1389 `logcosh = log((exp(x) + exp(-x))/2)`,
1390 where x is the error `y_pred - y_true`.
1392 Standalone usage:
1394 >>> y_true = [[0., 1.], [0., 0.]]
1395 >>> y_pred = [[1., 1.], [0., 0.]]
1396 >>> # Using 'auto'/'sum_over_batch_size' reduction type.
1397 >>> l = tf.keras.losses.LogCosh()
1398 >>> l(y_true, y_pred).numpy()
1399 0.108
1401 >>> # Calling with 'sample_weight'.
1402 >>> l(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()
1403 0.087
1405 >>> # Using 'sum' reduction type.
1406 >>> l = tf.keras.losses.LogCosh(
1407 ... reduction=tf.keras.losses.Reduction.SUM)
1408 >>> l(y_true, y_pred).numpy()
1409 0.217
1411 >>> # Using 'none' reduction type.
1412 >>> l = tf.keras.losses.LogCosh(
1413 ... reduction=tf.keras.losses.Reduction.NONE)
1414 >>> l(y_true, y_pred).numpy()
1415 array([0.217, 0.], dtype=float32)
1417 Usage with the `compile()` API:
1419 ```python
1420 model.compile(optimizer='sgd', loss=tf.keras.losses.LogCosh())
1421 ```
1422 """
1424 def __init__(
1425 self, reduction=losses_utils.ReductionV2.AUTO, name="log_cosh"
1426 ):
1427 """Initializes `LogCosh` instance.
1429 Args:
1430 reduction: Type of `tf.keras.losses.Reduction` to apply to
1431 loss. Default value is `AUTO`. `AUTO` indicates that the reduction
1432 option will be determined by the usage context. For almost all cases
1433 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a
1434 `tf.distribute.Strategy`, except via `Model.compile()` and
1435 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
1436 will raise an error. Please see this custom training [tutorial](
1437 https://www.tensorflow.org/tutorials/distribute/custom_training)
1438 for more details.
1439 name: Optional name for the instance. Defaults to 'log_cosh'.
1440 """
1441 super().__init__(log_cosh, name=name, reduction=reduction)
1444@keras_export("keras.losses.KLDivergence")
1445class KLDivergence(LossFunctionWrapper):
1446 """Computes Kullback-Leibler divergence loss between `y_true` & `y_pred`.
1448 `loss = y_true * log(y_true / y_pred)`
1450 See: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence
1452 Standalone usage:
1454 >>> y_true = [[0, 1], [0, 0]]
1455 >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
1456 >>> # Using 'auto'/'sum_over_batch_size' reduction type.
1457 >>> kl = tf.keras.losses.KLDivergence()
1458 >>> kl(y_true, y_pred).numpy()
1459 0.458
1461 >>> # Calling with 'sample_weight'.
1462 >>> kl(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()
1463 0.366
1465 >>> # Using 'sum' reduction type.
1466 >>> kl = tf.keras.losses.KLDivergence(
1467 ... reduction=tf.keras.losses.Reduction.SUM)
1468 >>> kl(y_true, y_pred).numpy()
1469 0.916
1471 >>> # Using 'none' reduction type.
1472 >>> kl = tf.keras.losses.KLDivergence(
1473 ... reduction=tf.keras.losses.Reduction.NONE)
1474 >>> kl(y_true, y_pred).numpy()
1475 array([0.916, -3.08e-06], dtype=float32)
1477 Usage with the `compile()` API:
1479 ```python
1480 model.compile(optimizer='sgd', loss=tf.keras.losses.KLDivergence())
1481 ```
1482 """
1484 def __init__(
1485 self, reduction=losses_utils.ReductionV2.AUTO, name="kl_divergence"
1486 ):
1487 """Initializes `KLDivergence` instance.
1489 Args:
1490 reduction: Type of `tf.keras.losses.Reduction` to apply to
1491 loss. Default value is `AUTO`. `AUTO` indicates that the reduction
1492 option will be determined by the usage context. For almost all cases
1493 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a
1494 `tf.distribute.Strategy`, except via `Model.compile()` and
1495 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
1496 will raise an error. Please see this custom training [tutorial](
1497 https://www.tensorflow.org/tutorials/distribute/custom_training)
1498 for more details.
1499 name: Optional name for the instance. Defaults to 'kl_divergence'.
1500 """
1501 super().__init__(kl_divergence, name=name, reduction=reduction)
1504@keras_export("keras.losses.Huber")
1505class Huber(LossFunctionWrapper):
1506 """Computes the Huber loss between `y_true` & `y_pred`.
1508 For each value x in `error = y_true - y_pred`:
1510 ```
1511 loss = 0.5 * x^2 if |x| <= d
1512 loss = 0.5 * d^2 + d * (|x| - d) if |x| > d
1513 ```
1514 where d is `delta`. See: https://en.wikipedia.org/wiki/Huber_loss
1516 Standalone usage:
1518 >>> y_true = [[0, 1], [0, 0]]
1519 >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
1520 >>> # Using 'auto'/'sum_over_batch_size' reduction type.
1521 >>> h = tf.keras.losses.Huber()
1522 >>> h(y_true, y_pred).numpy()
1523 0.155
1525 >>> # Calling with 'sample_weight'.
1526 >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy()
1527 0.09
1529 >>> # Using 'sum' reduction type.
1530 >>> h = tf.keras.losses.Huber(
1531 ... reduction=tf.keras.losses.Reduction.SUM)
1532 >>> h(y_true, y_pred).numpy()
1533 0.31
1535 >>> # Using 'none' reduction type.
1536 >>> h = tf.keras.losses.Huber(
1537 ... reduction=tf.keras.losses.Reduction.NONE)
1538 >>> h(y_true, y_pred).numpy()
1539 array([0.18, 0.13], dtype=float32)
1541 Usage with the `compile()` API:
1543 ```python
1544 model.compile(optimizer='sgd', loss=tf.keras.losses.Huber())
1545 ```
1546 """
1548 def __init__(
1549 self,
1550 delta=1.0,
1551 reduction=losses_utils.ReductionV2.AUTO,
1552 name="huber_loss",
1553 ):
1554 """Initializes `Huber` instance.
1556 Args:
1557 delta: A float, the point where the Huber loss function changes from a
1558 quadratic to linear.
1559 reduction: Type of `tf.keras.losses.Reduction` to apply to
1560 loss. Default value is `AUTO`. `AUTO` indicates that the reduction
1561 option will be determined by the usage context. For almost all cases
1562 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a
1563 `tf.distribute.Strategy`, except via `Model.compile()` and
1564 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
1565 will raise an error. Please see this custom training [tutorial](
1566 https://www.tensorflow.org/tutorials/distribute/custom_training)
1567 for more details.
1568 name: Optional name for the instance. Defaults to 'huber_loss'.
1569 """
1570 super().__init__(huber, name=name, reduction=reduction, delta=delta)
1573@keras_export(
1574 "keras.metrics.mean_squared_error",
1575 "keras.metrics.mse",
1576 "keras.metrics.MSE",
1577 "keras.losses.mean_squared_error",
1578 "keras.losses.mse",
1579 "keras.losses.MSE",
1580)
1581@tf.__internal__.dispatch.add_dispatch_support
1582def mean_squared_error(y_true, y_pred):
1583 """Computes the mean squared error between labels and predictions.
1585 After computing the squared distance between the inputs, the mean value over
1586 the last dimension is returned.
1588 `loss = mean(square(y_true - y_pred), axis=-1)`
1590 Standalone usage:
1592 >>> y_true = np.random.randint(0, 2, size=(2, 3))
1593 >>> y_pred = np.random.random(size=(2, 3))
1594 >>> loss = tf.keras.losses.mean_squared_error(y_true, y_pred)
1595 >>> assert loss.shape == (2,)
1596 >>> assert np.array_equal(
1597 ... loss.numpy(), np.mean(np.square(y_true - y_pred), axis=-1))
1599 Args:
1600 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
1601 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
1603 Returns:
1604 Mean squared error values. shape = `[batch_size, d0, .. dN-1]`.
1605 """
1606 y_pred = tf.convert_to_tensor(y_pred)
1607 y_true = tf.cast(y_true, y_pred.dtype)
1608 return backend.mean(tf.math.squared_difference(y_pred, y_true), axis=-1)
1611def _ragged_tensor_apply_loss(loss_fn, y_true, y_pred, y_pred_extra_dim=False):
1612 """Apply a loss function on a per batch basis.
1614 Args:
1615 loss_fn: The loss function
1616 y_true: truth values (RaggedTensor)
1617 y_pred: predicted values (RaggedTensor)
1618 y_pred_extra_dim: whether y_pred has an additional dimension compared to
1619 y_true
1621 Returns:
1622 Loss-function result. A dense tensor if the output has a single dimension
1623 (per-batch loss value); a ragged tensor otherwise.
1624 """
1626 def rt_is_equiv_dense(rt):
1627 """Returns true if this RaggedTensor has the same row_lengths across
1629 all ragged dimensions and thus can be converted to a dense tensor
1630 without loss of information.
1632 Args:
1633 rt: RaggedTensor.
1634 """
1635 return tf.reduce_all(
1636 [
1637 tf.equal(
1638 tf.math.reduce_variance(
1639 tf.cast(row_lens, backend.floatx())
1640 ),
1641 tf.constant([0.0]),
1642 )
1643 for row_lens in rt.nested_row_lengths()
1644 ]
1645 )
1647 def _convert_to_dense(inputs):
1648 return tuple(
1649 rt.to_tensor() if isinstance(rt, tf.RaggedTensor) else rt
1650 for rt in inputs
1651 )
1653 def _call_loss(inputs, ragged_output):
1654 """Adapt the result to ragged or dense tensor according to the expected
1656 output type. This is done so that all the return values of the map
1657 operation have the same type.
1658 """
1659 r = loss_fn(*inputs)
1660 if ragged_output and not isinstance(r, tf.RaggedTensor):
1661 r = tf.RaggedTensor.from_tensor(r)
1662 elif not ragged_output and isinstance(r, tf.RaggedTensor):
1663 r = r.to_tensor()
1664 return r
1666 def _wrapper(inputs, ragged_output):
1667 _, y_pred = inputs
1668 if isinstance(y_pred, tf.RaggedTensor):
1669 return tf.cond(
1670 rt_is_equiv_dense(y_pred),
1671 lambda: _call_loss(_convert_to_dense(inputs), ragged_output),
1672 lambda: _call_loss(inputs, ragged_output),
1673 )
1675 return loss_fn(*inputs)
1677 if not isinstance(y_true, tf.RaggedTensor):
1678 return loss_fn(y_true, y_pred.to_tensor())
1680 lshape = y_pred.shape.as_list()[1:-1]
1681 if len(lshape) > 0:
1682 spec = tf.RaggedTensorSpec(shape=lshape, dtype=y_pred.dtype)
1683 else:
1684 spec = tf.TensorSpec(shape=[], dtype=y_pred.dtype)
1686 nested_splits_list = [rt.nested_row_splits for rt in (y_true, y_pred)]
1687 if y_pred_extra_dim:
1688 # The last dimension of a categorical prediction may be ragged or not.
1689 rdims = [len(slist) for slist in nested_splits_list]
1690 if rdims[0] == rdims[1] - 1:
1691 nested_splits_list[1] = nested_splits_list[1][:-1]
1693 map_fn = functools.partial(_wrapper, ragged_output=len(lshape) > 1)
1695 assertion_list = ragged_util.assert_splits_match(nested_splits_list)
1696 with tf.control_dependencies(assertion_list):
1697 return ragged_map_ops.map_fn(map_fn, elems=(y_true, y_pred), dtype=spec)
1700@dispatch.dispatch_for_types(mean_squared_error, tf.RaggedTensor)
1701def _ragged_tensor_mse(y_true, y_pred):
1702 """Implements support for handling RaggedTensors.
1704 Args:
1705 y_true: RaggedTensor truth values. shape = `[batch_size, d0, .. dN]`.
1706 y_pred: RaggedTensor predicted values. shape = `[batch_size, d0, .. dN]`.
1708 Returns:
1709 Mean squared error values. shape = `[batch_size, d0, .. dN-1]`.
1710 When the number of dimensions of the batch feature vector [d0, .. dN] is
1711 greater than one the return value is a RaggedTensor. Otherwise a Dense
1712 tensor with dimensions [batch_size] is returned.
1713 """
1714 return _ragged_tensor_apply_loss(mean_squared_error, y_true, y_pred)
1717@keras_export(
1718 "keras.metrics.mean_absolute_error",
1719 "keras.metrics.mae",
1720 "keras.metrics.MAE",
1721 "keras.losses.mean_absolute_error",
1722 "keras.losses.mae",
1723 "keras.losses.MAE",
1724)
1725@tf.__internal__.dispatch.add_dispatch_support
1726def mean_absolute_error(y_true, y_pred):
1727 """Computes the mean absolute error between labels and predictions.
1729 `loss = mean(abs(y_true - y_pred), axis=-1)`
1731 Standalone usage:
1733 >>> y_true = np.random.randint(0, 2, size=(2, 3))
1734 >>> y_pred = np.random.random(size=(2, 3))
1735 >>> loss = tf.keras.losses.mean_absolute_error(y_true, y_pred)
1736 >>> assert loss.shape == (2,)
1737 >>> assert np.array_equal(
1738 ... loss.numpy(), np.mean(np.abs(y_true - y_pred), axis=-1))
1740 Args:
1741 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
1742 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
1744 Returns:
1745 Mean absolute error values. shape = `[batch_size, d0, .. dN-1]`.
1746 """
1747 y_pred = tf.convert_to_tensor(y_pred)
1748 y_true = tf.cast(y_true, y_pred.dtype)
1749 return backend.mean(tf.abs(y_pred - y_true), axis=-1)
1752@dispatch.dispatch_for_types(mean_absolute_error, tf.RaggedTensor)
1753def _ragged_tensor_mae(y_true, y_pred):
1754 """RaggedTensor adapter for mean_absolute_error."""
1755 return _ragged_tensor_apply_loss(mean_absolute_error, y_true, y_pred)
1758@keras_export(
1759 "keras.metrics.mean_absolute_percentage_error",
1760 "keras.metrics.mape",
1761 "keras.metrics.MAPE",
1762 "keras.losses.mean_absolute_percentage_error",
1763 "keras.losses.mape",
1764 "keras.losses.MAPE",
1765)
1766@tf.__internal__.dispatch.add_dispatch_support
1767def mean_absolute_percentage_error(y_true, y_pred):
1768 """Computes the mean absolute percentage error between `y_true` & `y_pred`.
1770 `loss = 100 * mean(abs((y_true - y_pred) / y_true), axis=-1)`
1772 Standalone usage:
1774 >>> y_true = np.random.random(size=(2, 3))
1775 >>> y_true = np.maximum(y_true, 1e-7) # Prevent division by zero
1776 >>> y_pred = np.random.random(size=(2, 3))
1777 >>> loss = tf.keras.losses.mean_absolute_percentage_error(y_true, y_pred)
1778 >>> assert loss.shape == (2,)
1779 >>> assert np.array_equal(
1780 ... loss.numpy(),
1781 ... 100. * np.mean(np.abs((y_true - y_pred) / y_true), axis=-1))
1783 Args:
1784 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
1785 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
1787 Returns:
1788 Mean absolute percentage error values. shape = `[batch_size, d0, ..
1789 dN-1]`.
1790 """
1791 y_pred = tf.convert_to_tensor(y_pred)
1792 y_true = tf.cast(y_true, y_pred.dtype)
1793 diff = tf.abs(
1794 (y_true - y_pred) / backend.maximum(tf.abs(y_true), backend.epsilon())
1795 )
1796 return 100.0 * backend.mean(diff, axis=-1)
1799@dispatch.dispatch_for_types(mean_absolute_percentage_error, tf.RaggedTensor)
1800def _ragged_tensor_mape(y_true, y_pred):
1801 """Support RaggedTensors."""
1802 return _ragged_tensor_apply_loss(
1803 mean_absolute_percentage_error, y_true, y_pred
1804 )
1807@keras_export(
1808 "keras.metrics.mean_squared_logarithmic_error",
1809 "keras.metrics.msle",
1810 "keras.metrics.MSLE",
1811 "keras.losses.mean_squared_logarithmic_error",
1812 "keras.losses.msle",
1813 "keras.losses.MSLE",
1814)
1815@tf.__internal__.dispatch.add_dispatch_support
1816def mean_squared_logarithmic_error(y_true, y_pred):
1817 """Computes the mean squared logarithmic error between `y_true` & `y_pred`.
1819 `loss = mean(square(log(y_true + 1) - log(y_pred + 1)), axis=-1)`
1821 Standalone usage:
1823 >>> y_true = np.random.randint(0, 2, size=(2, 3))
1824 >>> y_pred = np.random.random(size=(2, 3))
1825 >>> loss = tf.keras.losses.mean_squared_logarithmic_error(y_true, y_pred)
1826 >>> assert loss.shape == (2,)
1827 >>> y_true = np.maximum(y_true, 1e-7)
1828 >>> y_pred = np.maximum(y_pred, 1e-7)
1829 >>> assert np.allclose(
1830 ... loss.numpy(),
1831 ... np.mean(
1832 ... np.square(np.log(y_true + 1.) - np.log(y_pred + 1.)), axis=-1))
1834 Args:
1835 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
1836 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
1838 Returns:
1839 Mean squared logarithmic error values. shape = `[batch_size, d0, ..
1840 dN-1]`.
1841 """
1842 y_pred = tf.convert_to_tensor(y_pred)
1843 y_true = tf.cast(y_true, y_pred.dtype)
1844 first_log = tf.math.log(backend.maximum(y_pred, backend.epsilon()) + 1.0)
1845 second_log = tf.math.log(backend.maximum(y_true, backend.epsilon()) + 1.0)
1846 return backend.mean(
1847 tf.math.squared_difference(first_log, second_log), axis=-1
1848 )
1851@dispatch.dispatch_for_types(mean_squared_logarithmic_error, tf.RaggedTensor)
1852def _ragged_tensor_msle(y_true, y_pred):
1853 """Implements support for handling RaggedTensors."""
1854 return _ragged_tensor_apply_loss(
1855 mean_squared_logarithmic_error, y_true, y_pred
1856 )
1859def _maybe_convert_labels(y_true):
1860 """Converts binary labels into -1/1."""
1861 are_zeros = tf.equal(y_true, 0)
1862 are_ones = tf.equal(y_true, 1)
1863 is_binary = tf.reduce_all(tf.logical_or(are_zeros, are_ones))
1865 def _convert_binary_labels():
1866 # Convert the binary labels to -1 or 1.
1867 return 2.0 * y_true - 1.0
1869 updated_y_true = tf.__internal__.smart_cond.smart_cond(
1870 is_binary, _convert_binary_labels, lambda: y_true
1871 )
1872 return updated_y_true
1875@keras_export("keras.metrics.squared_hinge", "keras.losses.squared_hinge")
1876@tf.__internal__.dispatch.add_dispatch_support
1877def squared_hinge(y_true, y_pred):
1878 """Computes the squared hinge loss between `y_true` & `y_pred`.
1880 `loss = mean(square(maximum(1 - y_true * y_pred, 0)), axis=-1)`
1882 Standalone usage:
1884 >>> y_true = np.random.choice([-1, 1], size=(2, 3))
1885 >>> y_pred = np.random.random(size=(2, 3))
1886 >>> loss = tf.keras.losses.squared_hinge(y_true, y_pred)
1887 >>> assert loss.shape == (2,)
1888 >>> assert np.array_equal(
1889 ... loss.numpy(),
1890 ... np.mean(np.square(np.maximum(1. - y_true * y_pred, 0.)), axis=-1))
1892 Args:
1893 y_true: The ground truth values. `y_true` values are expected to be -1 or
1894 1. If binary (0 or 1) labels are provided we will convert them to -1 or
1895 1. shape = `[batch_size, d0, .. dN]`.
1896 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
1898 Returns:
1899 Squared hinge loss values. shape = `[batch_size, d0, .. dN-1]`.
1900 """
1901 y_pred = tf.convert_to_tensor(y_pred)
1902 y_true = tf.cast(y_true, y_pred.dtype)
1903 y_true = _maybe_convert_labels(y_true)
1904 return backend.mean(
1905 tf.square(tf.maximum(1.0 - y_true * y_pred, 0.0)), axis=-1
1906 )
1909@keras_export("keras.metrics.hinge", "keras.losses.hinge")
1910@tf.__internal__.dispatch.add_dispatch_support
1911def hinge(y_true, y_pred):
1912 """Computes the hinge loss between `y_true` & `y_pred`.
1914 `loss = mean(maximum(1 - y_true * y_pred, 0), axis=-1)`
1916 Standalone usage:
1918 >>> y_true = np.random.choice([-1, 1], size=(2, 3))
1919 >>> y_pred = np.random.random(size=(2, 3))
1920 >>> loss = tf.keras.losses.hinge(y_true, y_pred)
1921 >>> assert loss.shape == (2,)
1922 >>> assert np.array_equal(
1923 ... loss.numpy(),
1924 ... np.mean(np.maximum(1. - y_true * y_pred, 0.), axis=-1))
1926 Args:
1927 y_true: The ground truth values. `y_true` values are expected to be -1 or
1928 1. If binary (0 or 1) labels are provided they will be converted to -1
1929 or 1. shape = `[batch_size, d0, .. dN]`.
1930 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
1932 Returns:
1933 Hinge loss values. shape = `[batch_size, d0, .. dN-1]`.
1934 """
1935 y_pred = tf.convert_to_tensor(y_pred)
1936 y_true = tf.cast(y_true, y_pred.dtype)
1937 y_true = _maybe_convert_labels(y_true)
1938 return backend.mean(tf.maximum(1.0 - y_true * y_pred, 0.0), axis=-1)
1941@keras_export("keras.losses.categorical_hinge")
1942@tf.__internal__.dispatch.add_dispatch_support
1943def categorical_hinge(y_true, y_pred):
1944 """Computes the categorical hinge loss between `y_true` & `y_pred`.
1946 `loss = maximum(neg - pos + 1, 0)`
1947 where `neg=maximum((1-y_true)*y_pred) and pos=sum(y_true*y_pred)`
1949 Standalone usage:
1951 >>> y_true = np.random.randint(0, 3, size=(2,))
1952 >>> y_true = tf.keras.utils.to_categorical(y_true, num_classes=3)
1953 >>> y_pred = np.random.random(size=(2, 3))
1954 >>> loss = tf.keras.losses.categorical_hinge(y_true, y_pred)
1955 >>> assert loss.shape == (2,)
1956 >>> pos = np.sum(y_true * y_pred, axis=-1)
1957 >>> neg = np.amax((1. - y_true) * y_pred, axis=-1)
1958 >>> assert np.array_equal(loss.numpy(), np.maximum(0., neg - pos + 1.))
1960 Args:
1961 y_true: The ground truth values. `y_true` values are expected to be
1962 either `{-1, +1}` or `{0, 1}` (i.e. a one-hot-encoded tensor).
1963 y_pred: The predicted values.
1965 Returns:
1966 Categorical hinge loss values.
1967 """
1968 y_pred = tf.convert_to_tensor(y_pred)
1969 y_true = tf.cast(y_true, y_pred.dtype)
1970 pos = tf.reduce_sum(y_true * y_pred, axis=-1)
1971 neg = tf.reduce_max((1.0 - y_true) * y_pred, axis=-1)
1972 zero = tf.cast(0.0, y_pred.dtype)
1973 return tf.maximum(neg - pos + 1.0, zero)
1976@keras_export("keras.losses.huber", v1=[])
1977@tf.__internal__.dispatch.add_dispatch_support
1978def huber(y_true, y_pred, delta=1.0):
1979 """Computes Huber loss value.
1981 For each value x in `error = y_true - y_pred`:
1983 ```
1984 loss = 0.5 * x^2 if |x| <= d
1985 loss = d * |x| - 0.5 * d^2 if |x| > d
1986 ```
1987 where d is `delta`. See: https://en.wikipedia.org/wiki/Huber_loss
1989 Args:
1990 y_true: tensor of true targets.
1991 y_pred: tensor of predicted targets.
1992 delta: A float, the point where the Huber loss function changes from a
1993 quadratic to linear.
1995 Returns:
1996 Tensor with one scalar loss entry per sample.
1997 """
1998 y_pred = tf.cast(y_pred, dtype=backend.floatx())
1999 y_true = tf.cast(y_true, dtype=backend.floatx())
2000 delta = tf.cast(delta, dtype=backend.floatx())
2001 error = tf.subtract(y_pred, y_true)
2002 abs_error = tf.abs(error)
2003 half = tf.convert_to_tensor(0.5, dtype=abs_error.dtype)
2004 return backend.mean(
2005 tf.where(
2006 abs_error <= delta,
2007 half * tf.square(error),
2008 delta * abs_error - half * tf.square(delta),
2009 ),
2010 axis=-1,
2011 )
2014@keras_export(
2015 "keras.losses.log_cosh",
2016 "keras.losses.logcosh",
2017 "keras.metrics.log_cosh",
2018 "keras.metrics.logcosh",
2019)
2020@tf.__internal__.dispatch.add_dispatch_support
2021def log_cosh(y_true, y_pred):
2022 """Logarithm of the hyperbolic cosine of the prediction error.
2024 `log(cosh(x))` is approximately equal to `(x ** 2) / 2` for small `x` and
2025 to `abs(x) - log(2)` for large `x`. This means that 'logcosh' works mostly
2026 like the mean squared error, but will not be so strongly affected by the
2027 occasional wildly incorrect prediction.
2029 Standalone usage:
2031 >>> y_true = np.random.random(size=(2, 3))
2032 >>> y_pred = np.random.random(size=(2, 3))
2033 >>> loss = tf.keras.losses.logcosh(y_true, y_pred)
2034 >>> assert loss.shape == (2,)
2035 >>> x = y_pred - y_true
2036 >>> assert np.allclose(
2037 ... loss.numpy(),
2038 ... np.mean(x + np.log(np.exp(-2. * x) + 1.) - tf.math.log(2.),
2039 ... axis=-1),
2040 ... atol=1e-5)
2042 Args:
2043 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
2044 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
2046 Returns:
2047 Logcosh error values. shape = `[batch_size, d0, .. dN-1]`.
2048 """
2049 y_pred = tf.convert_to_tensor(y_pred)
2050 y_true = tf.cast(y_true, y_pred.dtype)
2052 def _logcosh(x):
2053 return (
2054 x + tf.math.softplus(-2.0 * x) - tf.cast(tf.math.log(2.0), x.dtype)
2055 )
2057 return backend.mean(_logcosh(y_pred - y_true), axis=-1)
2060@keras_export(
2061 "keras.metrics.categorical_crossentropy",
2062 "keras.losses.categorical_crossentropy",
2063)
2064@tf.__internal__.dispatch.add_dispatch_support
2065def categorical_crossentropy(
2066 y_true, y_pred, from_logits=False, label_smoothing=0.0, axis=-1
2067):
2068 """Computes the categorical crossentropy loss.
2070 Standalone usage:
2072 >>> y_true = [[0, 1, 0], [0, 0, 1]]
2073 >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]
2074 >>> loss = tf.keras.losses.categorical_crossentropy(y_true, y_pred)
2075 >>> assert loss.shape == (2,)
2076 >>> loss.numpy()
2077 array([0.0513, 2.303], dtype=float32)
2079 Args:
2080 y_true: Tensor of one-hot true targets.
2081 y_pred: Tensor of predicted targets.
2082 from_logits: Whether `y_pred` is expected to be a logits tensor. By
2083 default, we assume that `y_pred` encodes a probability distribution.
2084 label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For
2085 example, if `0.1`, use `0.1 / num_classes` for non-target labels
2086 and `0.9 + 0.1 / num_classes` for target labels.
2087 axis: Defaults to -1. The dimension along which the entropy is
2088 computed.
2090 Returns:
2091 Categorical crossentropy loss value.
2092 """
2093 if isinstance(axis, bool):
2094 raise ValueError(
2095 "`axis` must be of type `int`. "
2096 f"Received: axis={axis} of type {type(axis)}"
2097 )
2098 y_pred = tf.convert_to_tensor(y_pred)
2099 y_true = tf.cast(y_true, y_pred.dtype)
2100 label_smoothing = tf.convert_to_tensor(label_smoothing, dtype=y_pred.dtype)
2102 if y_pred.shape[-1] == 1:
2103 warnings.warn(
2104 "In loss categorical_crossentropy, expected "
2105 "y_pred.shape to be (batch_size, num_classes) "
2106 f"with num_classes > 1. Received: y_pred.shape={y_pred.shape}. "
2107 "Consider using 'binary_crossentropy' if you only have 2 classes.",
2108 SyntaxWarning,
2109 stacklevel=2,
2110 )
2112 def _smooth_labels():
2113 num_classes = tf.cast(tf.shape(y_true)[-1], y_pred.dtype)
2114 return y_true * (1.0 - label_smoothing) + (
2115 label_smoothing / num_classes
2116 )
2118 y_true = tf.__internal__.smart_cond.smart_cond(
2119 label_smoothing, _smooth_labels, lambda: y_true
2120 )
2122 return backend.categorical_crossentropy(
2123 y_true, y_pred, from_logits=from_logits, axis=axis
2124 )
2127@dispatch.dispatch_for_types(categorical_crossentropy, tf.RaggedTensor)
2128def _ragged_tensor_categorical_crossentropy(
2129 y_true, y_pred, from_logits=False, label_smoothing=0.0, axis=-1
2130):
2131 """Implements support for handling RaggedTensors.
2133 Args:
2134 y_true: Tensor of one-hot true targets.
2135 y_pred: Tensor of predicted targets.
2136 from_logits: Whether `y_pred` is expected to be a logits tensor. By
2137 default, we assume that `y_pred` encodes a probability distribution.
2138 label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For
2139 example, if `0.1`, use `0.1 / num_classes` for non-target labels
2140 and `0.9 + 0.1 / num_classes` for target labels.
2141 axis: The axis along which to compute crossentropy (the features axis).
2142 Defaults to -1.
2144 Returns:
2145 Categorical crossentropy loss value.
2147 Expected shape: (batch, sequence_len, n_classes) with sequence_len
2148 being variable per batch.
2149 Return shape: (batch, sequence_len).
2151 When used by CategoricalCrossentropy() with the default reduction
2152 (SUM_OVER_BATCH_SIZE), the reduction averages the loss over the
2153 number of elements independent of the batch. E.g. if the RaggedTensor
2154 has 2 batches with [2, 1] values respectively the resulting loss is
2155 the sum of the individual loss values divided by 3.
2156 """
2157 fn = functools.partial(
2158 categorical_crossentropy,
2159 from_logits=from_logits,
2160 label_smoothing=label_smoothing,
2161 axis=axis,
2162 )
2163 return _ragged_tensor_apply_loss(fn, y_true, y_pred)
2166@keras_export(
2167 "keras.metrics.categorical_focal_crossentropy",
2168 "keras.losses.categorical_focal_crossentropy",
2169)
2170@tf.__internal__.dispatch.add_dispatch_support
2171def categorical_focal_crossentropy(
2172 y_true,
2173 y_pred,
2174 alpha=0.25,
2175 gamma=2.0,
2176 from_logits=False,
2177 label_smoothing=0.0,
2178 axis=-1,
2179):
2180 """Computes the categorical focal crossentropy loss.
2182 Standalone usage:
2183 >>> y_true = [[0, 1, 0], [0, 0, 1]]
2184 >>> y_pred = [[0.05, 0.9, 0.05], [0.1, 0.85, 0.05]]
2185 >>> loss = tf.keras.losses.categorical_focal_crossentropy(y_true, y_pred)
2186 >>> assert loss.shape == (2,)
2187 >>> loss.numpy()
2188 array([2.63401289e-04, 6.75912094e-01], dtype=float32)
2190 Args:
2191 y_true: Tensor of one-hot true targets.
2192 y_pred: Tensor of predicted targets.
2193 alpha: A weight balancing factor for all classes, default is `0.25` as
2194 mentioned in the reference. It can be a list of floats or a scalar.
2195 In the multi-class case, alpha may be set by inverse class
2196 frequency by using `compute_class_weight` from `sklearn.utils`.
2197 gamma: A focusing parameter, default is `2.0` as mentioned in the
2198 reference. It helps to gradually reduce the importance given to
2199 simple examples in a smooth manner. When `gamma` = 0, there is
2200 no focal effect on the categorical crossentropy.
2201 from_logits: Whether `y_pred` is expected to be a logits tensor. By
2202 default, we assume that `y_pred` encodes a probability
2203 distribution.
2204 label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For
2205 example, if `0.1`, use `0.1 / num_classes` for non-target labels
2206 and `0.9 + 0.1 / num_classes` for target labels.
2207 axis: Defaults to -1. The dimension along which the entropy is
2208 computed.
2210 Returns:
2211 Categorical focal crossentropy loss value.
2212 """
2213 if isinstance(axis, bool):
2214 raise ValueError(
2215 "`axis` must be of type `int`. "
2216 f"Received: axis={axis} of type {type(axis)}"
2217 )
2218 y_pred = tf.convert_to_tensor(y_pred)
2219 y_true = tf.cast(y_true, y_pred.dtype)
2220 label_smoothing = tf.convert_to_tensor(label_smoothing, dtype=y_pred.dtype)
2222 if y_pred.shape[-1] == 1:
2223 warnings.warn(
2224 "In loss categorical_focal_crossentropy, expected "
2225 "y_pred.shape to be (batch_size, num_classes) "
2226 f"with num_classes > 1. Received: y_pred.shape={y_pred.shape}. "
2227 "Consider using 'binary_crossentropy' if you only have 2 classes.",
2228 SyntaxWarning,
2229 stacklevel=2,
2230 )
2232 def _smooth_labels():
2233 num_classes = tf.cast(tf.shape(y_true)[-1], y_pred.dtype)
2234 return y_true * (1.0 - label_smoothing) + (
2235 label_smoothing / num_classes
2236 )
2238 y_true = tf.__internal__.smart_cond.smart_cond(
2239 label_smoothing, _smooth_labels, lambda: y_true
2240 )
2242 return backend.categorical_focal_crossentropy(
2243 target=y_true,
2244 output=y_pred,
2245 alpha=alpha,
2246 gamma=gamma,
2247 from_logits=from_logits,
2248 axis=axis,
2249 )
2252@dispatch.dispatch_for_types(categorical_focal_crossentropy, tf.RaggedTensor)
2253def _ragged_tensor_categorical_focal_crossentropy(
2254 y_true,
2255 y_pred,
2256 alpha=0.25,
2257 gamma=2.0,
2258 from_logits=False,
2259 label_smoothing=0.0,
2260 axis=-1,
2261):
2262 """Implements support for handling RaggedTensors.
2264 Expected shape: (batch, sequence_len, n_classes) with sequence_len
2265 being variable per batch.
2266 Return shape: (batch, sequence_len).
2267 When used by CategoricalFocalCrossentropy() with the default reduction
2268 (SUM_OVER_BATCH_SIZE), the reduction averages the loss over the
2269 number of elements independent of the batch. E.g. if the RaggedTensor
2270 has 2 batches with [2, 1] values respectively the resulting loss is
2271 the sum of the individual loss values divided by 3.
2273 Args:
2274 alpha: A weight balancing factor for all classes, default is `0.25` as
2275 mentioned in the reference. It can be a list of floats or a scalar.
2276 In the multi-class case, alpha may be set by inverse class
2277 frequency by using `compute_class_weight` from `sklearn.utils`.
2278 gamma: A focusing parameter, default is `2.0` as mentioned in the
2279 reference. It helps to gradually reduce the importance given to
2280 simple examples in a smooth manner. When `gamma` = 0, there is
2281 no focal effect on the categorical crossentropy.
2282 from_logits: Whether `y_pred` is expected to be a logits tensor. By
2283 default, we assume that `y_pred` encodes a probability distribution.
2284 label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For
2285 example, if `0.1`, use `0.1 / num_classes` for non-target labels
2286 and `0.9 + 0.1 / num_classes` for target labels.
2287 axis: Defaults to -1. The dimension along which the entropy is
2288 computed.
2290 Returns:
2291 Categorical focal crossentropy loss value.
2292 """
2293 fn = functools.partial(
2294 categorical_focal_crossentropy,
2295 alpha=alpha,
2296 gamma=gamma,
2297 from_logits=from_logits,
2298 label_smoothing=label_smoothing,
2299 axis=axis,
2300 )
2301 return _ragged_tensor_apply_loss(fn, y_true, y_pred)
2304@keras_export(
2305 "keras.metrics.sparse_categorical_crossentropy",
2306 "keras.losses.sparse_categorical_crossentropy",
2307)
2308@tf.__internal__.dispatch.add_dispatch_support
2309def sparse_categorical_crossentropy(
2310 y_true, y_pred, from_logits=False, axis=-1, ignore_class=None
2311):
2312 """Computes the sparse categorical crossentropy loss.
2314 Standalone usage:
2316 >>> y_true = [1, 2]
2317 >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]
2318 >>> loss = tf.keras.losses.sparse_categorical_crossentropy(y_true, y_pred)
2319 >>> assert loss.shape == (2,)
2320 >>> loss.numpy()
2321 array([0.0513, 2.303], dtype=float32)
2323 >>> y_true = [[[ 0, 2],
2324 ... [-1, -1]],
2325 ... [[ 0, 2],
2326 ... [-1, -1]]]
2327 >>> y_pred = [[[[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]],
2328 ... [[0.2, 0.5, 0.3], [0.0, 1.0, 0.0]]],
2329 ... [[[1.0, 0.0, 0.0], [0.0, 0.5, 0.5]],
2330 ... [[0.2, 0.5, 0.3], [0.0, 1.0, 0.0]]]]
2331 >>> loss = tf.keras.losses.sparse_categorical_crossentropy(
2332 ... y_true, y_pred, ignore_class=-1)
2333 >>> loss.numpy()
2334 array([[[2.3841855e-07, 2.3841855e-07],
2335 [0.0000000e+00, 0.0000000e+00]],
2336 [[2.3841855e-07, 6.9314730e-01],
2337 [0.0000000e+00, 0.0000000e+00]]], dtype=float32)
2339 Args:
2340 y_true: Ground truth values.
2341 y_pred: The predicted values.
2342 from_logits: Whether `y_pred` is expected to be a logits tensor. By
2343 default, we assume that `y_pred` encodes a probability distribution.
2344 axis: Defaults to -1. The dimension along which the entropy is
2345 computed.
2346 ignore_class: Optional integer. The ID of a class to be ignored during
2347 loss computation. This is useful, for example, in segmentation
2348 problems featuring a "void" class (commonly -1 or 255) in segmentation
2349 maps. By default (`ignore_class=None`), all classes are considered.
2351 Returns:
2352 Sparse categorical crossentropy loss value.
2353 """
2354 return backend.sparse_categorical_crossentropy(
2355 y_true,
2356 y_pred,
2357 from_logits=from_logits,
2358 ignore_class=ignore_class,
2359 axis=axis,
2360 )
2363@dispatch.dispatch_for_types(sparse_categorical_crossentropy, tf.RaggedTensor)
2364def _ragged_tensor_sparse_categorical_crossentropy(
2365 y_true, y_pred, from_logits=False, axis=-1, ignore_class=None
2366):
2367 """Implements support for handling RaggedTensors.
2369 Expected y_pred shape: (batch, sequence_len, n_classes) with sequence_len
2370 being variable per batch.
2371 Return shape: (batch, sequence_len).
2373 When used by SparseCategoricalCrossentropy() with the default reduction
2374 (SUM_OVER_BATCH_SIZE), the reduction averages the loss over the
2375 number of elements independent of the batch. E.g. if the RaggedTensor
2376 has 2 batches with [2, 1] values respectively, the resulting loss is
2377 the sum of the individual loss values divided by 3.
2378 """
2379 fn = functools.partial(
2380 sparse_categorical_crossentropy,
2381 from_logits=from_logits,
2382 ignore_class=ignore_class,
2383 axis=axis,
2384 )
2385 return _ragged_tensor_apply_loss(fn, y_true, y_pred, y_pred_extra_dim=True)
2388@keras_export(
2389 "keras.metrics.binary_crossentropy", "keras.losses.binary_crossentropy"
2390)
2391@tf.__internal__.dispatch.add_dispatch_support
2392def binary_crossentropy(
2393 y_true, y_pred, from_logits=False, label_smoothing=0.0, axis=-1
2394):
2395 """Computes the binary crossentropy loss.
2397 Standalone usage:
2399 >>> y_true = [[0, 1], [0, 0]]
2400 >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
2401 >>> loss = tf.keras.losses.binary_crossentropy(y_true, y_pred)
2402 >>> assert loss.shape == (2,)
2403 >>> loss.numpy()
2404 array([0.916 , 0.714], dtype=float32)
2406 Args:
2407 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
2408 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
2409 from_logits: Whether `y_pred` is expected to be a logits tensor. By
2410 default, we assume that `y_pred` encodes a probability distribution.
2411 label_smoothing: Float in [0, 1]. If > `0` then smooth the labels by
2412 squeezing them towards 0.5 That is, using `1. - 0.5 * label_smoothing`
2413 for the target class and `0.5 * label_smoothing` for the non-target
2414 class.
2415 axis: The axis along which the mean is computed. Defaults to -1.
2417 Returns:
2418 Binary crossentropy loss value. shape = `[batch_size, d0, .. dN-1]`.
2419 """
2420 y_pred = tf.convert_to_tensor(y_pred)
2421 y_true = tf.cast(y_true, y_pred.dtype)
2422 label_smoothing = tf.convert_to_tensor(label_smoothing, dtype=y_pred.dtype)
2424 def _smooth_labels():
2425 return y_true * (1.0 - label_smoothing) + 0.5 * label_smoothing
2427 y_true = tf.__internal__.smart_cond.smart_cond(
2428 label_smoothing, _smooth_labels, lambda: y_true
2429 )
2431 return backend.mean(
2432 backend.binary_crossentropy(y_true, y_pred, from_logits=from_logits),
2433 axis=axis,
2434 )
2437@dispatch.dispatch_for_types(binary_crossentropy, tf.RaggedTensor)
2438def _ragged_tensor_binary_crossentropy(
2439 y_true, y_pred, from_logits=False, label_smoothing=0.0, axis=-1
2440):
2441 """Implements support for handling RaggedTensors.
2443 Args:
2444 y_true: Tensor of one-hot true targets.
2445 y_pred: Tensor of predicted targets.
2446 from_logits: Whether `y_pred` is expected to be a logits tensor. By
2447 default, we assume that `y_pred` encodes a probability distribution.
2448 label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For
2449 example, if `0.1`, use `0.1 / num_classes` for non-target labels
2450 and `0.9 + 0.1 / num_classes` for target labels.
2451 axis: Axis along which to compute crossentropy.
2453 Returns:
2454 Binary crossentropy loss value.
2456 Expected shape: (batch, sequence_len) with sequence_len being variable
2457 per batch.
2458 Return shape: (batch,); returns the per batch mean of the loss values.
2460 When used by BinaryCrossentropy() with the default reduction
2461 (SUM_OVER_BATCH_SIZE), the reduction averages the per batch losses over
2462 the number of batches.
2463 """
2464 fn = functools.partial(
2465 binary_crossentropy,
2466 from_logits=from_logits,
2467 label_smoothing=label_smoothing,
2468 axis=axis,
2469 )
2470 return _ragged_tensor_apply_loss(fn, y_true, y_pred)
2473@keras_export(
2474 "keras.metrics.binary_focal_crossentropy",
2475 "keras.losses.binary_focal_crossentropy",
2476)
2477@tf.__internal__.dispatch.add_dispatch_support
2478def binary_focal_crossentropy(
2479 y_true,
2480 y_pred,
2481 apply_class_balancing=False,
2482 alpha=0.25,
2483 gamma=2.0,
2484 from_logits=False,
2485 label_smoothing=0.0,
2486 axis=-1,
2487):
2488 """Computes the binary focal crossentropy loss.
2490 According to [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf), it
2491 helps to apply a focal factor to down-weight easy examples and focus more on
2492 hard examples. By default, the focal tensor is computed as follows:
2494 `focal_factor = (1 - output)**gamma` for class 1
2495 `focal_factor = output**gamma` for class 0
2496 where `gamma` is a focusing parameter. When `gamma` = 0, there is no focal
2497 effect on the binary crossentropy loss.
2499 If `apply_class_balancing == True`, this function also takes into account a
2500 weight balancing factor for the binary classes 0 and 1 as follows:
2502 `weight = alpha` for class 1 (`target == 1`)
2503 `weight = 1 - alpha` for class 0
2504 where `alpha` is a float in the range of `[0, 1]`.
2506 Standalone usage:
2508 >>> y_true = [[0, 1], [0, 0]]
2509 >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
2510 >>> loss = tf.keras.losses.binary_focal_crossentropy(y_true, y_pred,
2511 ... gamma=2)
2512 >>> assert loss.shape == (2,)
2513 >>> loss.numpy()
2514 array([0.330, 0.206], dtype=float32)
2516 Args:
2517 y_true: Ground truth values, of shape `(batch_size, d0, .. dN)`.
2518 y_pred: The predicted values, of shape `(batch_size, d0, .. dN)`.
2519 apply_class_balancing: A bool, whether to apply weight balancing on the
2520 binary classes 0 and 1.
2521 alpha: A weight balancing factor for class 1, default is `0.25` as
2522 mentioned in the reference. The weight for class 0 is `1.0 - alpha`.
2523 gamma: A focusing parameter, default is `2.0` as mentioned in the
2524 reference.
2525 from_logits: Whether `y_pred` is expected to be a logits tensor. By
2526 default, we assume that `y_pred` encodes a probability distribution.
2527 label_smoothing: Float in `[0, 1]`. If higher than 0 then smooth the
2528 labels by squeezing them towards `0.5`, i.e., using `1. - 0.5 *
2529 label_smoothing` for the target class and `0.5 * label_smoothing` for
2530 the non-target class.
2531 axis: The axis along which the mean is computed. Defaults to `-1`.
2533 Returns:
2534 Binary focal crossentropy loss value. shape = `[batch_size, d0, .. dN-1]`.
2535 """
2536 y_pred = tf.convert_to_tensor(y_pred)
2537 y_true = tf.cast(y_true, y_pred.dtype)
2538 label_smoothing = tf.convert_to_tensor(label_smoothing, dtype=y_pred.dtype)
2540 def _smooth_labels():
2541 return y_true * (1.0 - label_smoothing) + 0.5 * label_smoothing
2543 y_true = tf.__internal__.smart_cond.smart_cond(
2544 label_smoothing, _smooth_labels, lambda: y_true
2545 )
2547 return backend.mean(
2548 backend.binary_focal_crossentropy(
2549 target=y_true,
2550 output=y_pred,
2551 apply_class_balancing=apply_class_balancing,
2552 alpha=alpha,
2553 gamma=gamma,
2554 from_logits=from_logits,
2555 ),
2556 axis=axis,
2557 )
2560@dispatch.dispatch_for_types(binary_focal_crossentropy, tf.RaggedTensor)
2561def _ragged_tensor_binary_focal_crossentropy(
2562 y_true,
2563 y_pred,
2564 apply_class_balancing=False,
2565 alpha=0.25,
2566 gamma=2.0,
2567 from_logits=False,
2568 label_smoothing=0.0,
2569 axis=-1,
2570):
2571 """Implements support for handling RaggedTensors.
2573 Expected shape: `(batch, sequence_len)` with sequence_len being variable per
2574 batch.
2575 Return shape: `(batch,)`; returns the per batch mean of the loss values.
2577 When used by BinaryFocalCrossentropy() with the default reduction
2578 (SUM_OVER_BATCH_SIZE), the reduction averages the per batch losses over
2579 the number of batches.
2581 Args:
2582 y_true: Tensor of one-hot true targets.
2583 y_pred: Tensor of predicted targets.
2584 apply_class_balancing: A bool, whether to apply weight balancing on the
2585 binary classes 0 and 1.
2586 alpha: A weight balancing factor for class 1, default is `0.25` as
2587 mentioned in the reference [Lin et al., 2018](
2588 https://arxiv.org/pdf/1708.02002.pdf). The weight for class 0 is
2589 `1.0 - alpha`.
2590 gamma: A focusing parameter, default is `2.0` as mentioned in the
2591 reference.
2592 from_logits: Whether `y_pred` is expected to be a logits tensor. By
2593 default, we assume that `y_pred` encodes a probability distribution.
2594 label_smoothing: Float in `[0, 1]`. If > `0` then smooth the labels. For
2595 example, if `0.1`, use `0.1 / num_classes` for non-target labels
2596 and `0.9 + 0.1 / num_classes` for target labels.
2597 axis: Axis along which to compute crossentropy.
2599 Returns:
2600 Binary focal crossentropy loss value.
2601 """
2602 fn = functools.partial(
2603 binary_focal_crossentropy,
2604 apply_class_balancing=apply_class_balancing,
2605 alpha=alpha,
2606 gamma=gamma,
2607 from_logits=from_logits,
2608 label_smoothing=label_smoothing,
2609 axis=axis,
2610 )
2611 return _ragged_tensor_apply_loss(fn, y_true, y_pred)
2614@keras_export(
2615 "keras.metrics.kl_divergence",
2616 "keras.metrics.kullback_leibler_divergence",
2617 "keras.metrics.kld",
2618 "keras.metrics.KLD",
2619 "keras.losses.kl_divergence",
2620 "keras.losses.kullback_leibler_divergence",
2621 "keras.losses.kld",
2622 "keras.losses.KLD",
2623)
2624@tf.__internal__.dispatch.add_dispatch_support
2625def kl_divergence(y_true, y_pred):
2626 """Computes Kullback-Leibler divergence loss between `y_true` & `y_pred`.
2628 `loss = y_true * log(y_true / y_pred)`
2630 See: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence
2632 Standalone usage:
2634 >>> y_true = np.random.randint(0, 2, size=(2, 3)).astype(np.float64)
2635 >>> y_pred = np.random.random(size=(2, 3))
2636 >>> loss = tf.keras.losses.kullback_leibler_divergence(y_true, y_pred)
2637 >>> assert loss.shape == (2,)
2638 >>> y_true = tf.keras.backend.clip(y_true, 1e-7, 1)
2639 >>> y_pred = tf.keras.backend.clip(y_pred, 1e-7, 1)
2640 >>> assert np.array_equal(
2641 ... loss.numpy(), np.sum(y_true * np.log(y_true / y_pred), axis=-1))
2643 Args:
2644 y_true: Tensor of true targets.
2645 y_pred: Tensor of predicted targets.
2647 Returns:
2648 A `Tensor` with loss.
2650 Raises:
2651 TypeError: If `y_true` cannot be cast to the `y_pred.dtype`.
2652 """
2653 y_pred = tf.convert_to_tensor(y_pred)
2654 y_true = tf.cast(y_true, y_pred.dtype)
2655 y_true = backend.clip(y_true, backend.epsilon(), 1)
2656 y_pred = backend.clip(y_pred, backend.epsilon(), 1)
2657 return tf.reduce_sum(y_true * tf.math.log(y_true / y_pred), axis=-1)
2660@keras_export("keras.metrics.poisson", "keras.losses.poisson")
2661@tf.__internal__.dispatch.add_dispatch_support
2662def poisson(y_true, y_pred):
2663 """Computes the Poisson loss between y_true and y_pred.
2665 The Poisson loss is the mean of the elements of the `Tensor`
2666 `y_pred - y_true * log(y_pred)`.
2668 Standalone usage:
2670 >>> y_true = np.random.randint(0, 2, size=(2, 3))
2671 >>> y_pred = np.random.random(size=(2, 3))
2672 >>> loss = tf.keras.losses.poisson(y_true, y_pred)
2673 >>> assert loss.shape == (2,)
2674 >>> y_pred = y_pred + 1e-7
2675 >>> assert np.allclose(
2676 ... loss.numpy(), np.mean(y_pred - y_true * np.log(y_pred), axis=-1),
2677 ... atol=1e-5)
2679 Args:
2680 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
2681 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
2683 Returns:
2684 Poisson loss value. shape = `[batch_size, d0, .. dN-1]`.
2686 Raises:
2687 InvalidArgumentError: If `y_true` and `y_pred` have incompatible shapes.
2688 """
2689 y_pred = tf.convert_to_tensor(y_pred)
2690 y_true = tf.cast(y_true, y_pred.dtype)
2691 return backend.mean(
2692 y_pred - y_true * tf.math.log(y_pred + backend.epsilon()), axis=-1
2693 )
2696@keras_export(
2697 "keras.losses.cosine_similarity",
2698 v1=[
2699 "keras.metrics.cosine_proximity",
2700 "keras.metrics.cosine",
2701 "keras.losses.cosine_proximity",
2702 "keras.losses.cosine",
2703 "keras.losses.cosine_similarity",
2704 ],
2705)
2706@tf.__internal__.dispatch.add_dispatch_support
2707def cosine_similarity(y_true, y_pred, axis=-1):
2708 """Computes the cosine similarity between labels and predictions.
2710 Note that it is a number between -1 and 1. When it is a negative number
2711 between -1 and 0, 0 indicates orthogonality and values closer to -1
2712 indicate greater similarity. The values closer to 1 indicate greater
2713 dissimilarity. This makes it usable as a loss function in a setting
2714 where you try to maximize the proximity between predictions and
2715 targets. If either `y_true` or `y_pred` is a zero vector, cosine
2716 similarity will be 0 regardless of the proximity between predictions
2717 and targets.
2719 `loss = -sum(l2_norm(y_true) * l2_norm(y_pred))`
2721 Standalone usage:
2723 >>> y_true = [[0., 1.], [1., 1.], [1., 1.]]
2724 >>> y_pred = [[1., 0.], [1., 1.], [-1., -1.]]
2725 >>> loss = tf.keras.losses.cosine_similarity(y_true, y_pred, axis=1)
2726 >>> loss.numpy()
2727 array([-0., -0.999, 0.999], dtype=float32)
2729 Args:
2730 y_true: Tensor of true targets.
2731 y_pred: Tensor of predicted targets.
2732 axis: Axis along which to determine similarity.
2734 Returns:
2735 Cosine similarity tensor.
2736 """
2737 y_true = tf.linalg.l2_normalize(y_true, axis=axis)
2738 y_pred = tf.linalg.l2_normalize(y_pred, axis=axis)
2739 return -tf.reduce_sum(y_true * y_pred, axis=axis)
2742@keras_export("keras.losses.CosineSimilarity")
2743class CosineSimilarity(LossFunctionWrapper):
2744 """Computes the cosine similarity between labels and predictions.
2746 Note that it is a number between -1 and 1. When it is a negative number
2747 between -1 and 0, 0 indicates orthogonality and values closer to -1
2748 indicate greater similarity. The values closer to 1 indicate greater
2749 dissimilarity. This makes it usable as a loss function in a setting
2750 where you try to maximize the proximity between predictions and targets.
2751 If either `y_true` or `y_pred` is a zero vector, cosine similarity will be 0
2752 regardless of the proximity between predictions and targets.
2754 `loss = -sum(l2_norm(y_true) * l2_norm(y_pred))`
2756 Standalone usage:
2758 >>> y_true = [[0., 1.], [1., 1.]]
2759 >>> y_pred = [[1., 0.], [1., 1.]]
2760 >>> # Using 'auto'/'sum_over_batch_size' reduction type.
2761 >>> cosine_loss = tf.keras.losses.CosineSimilarity(axis=1)
2762 >>> # l2_norm(y_true) = [[0., 1.], [1./1.414, 1./1.414]]
2763 >>> # l2_norm(y_pred) = [[1., 0.], [1./1.414, 1./1.414]]
2764 >>> # l2_norm(y_true) . l2_norm(y_pred) = [[0., 0.], [0.5, 0.5]]
2765 >>> # loss = mean(sum(l2_norm(y_true) . l2_norm(y_pred), axis=1))
2766 >>> # = -((0. + 0.) + (0.5 + 0.5)) / 2
2767 >>> cosine_loss(y_true, y_pred).numpy()
2768 -0.5
2770 >>> # Calling with 'sample_weight'.
2771 >>> cosine_loss(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()
2772 -0.0999
2774 >>> # Using 'sum' reduction type.
2775 >>> cosine_loss = tf.keras.losses.CosineSimilarity(axis=1,
2776 ... reduction=tf.keras.losses.Reduction.SUM)
2777 >>> cosine_loss(y_true, y_pred).numpy()
2778 -0.999
2780 >>> # Using 'none' reduction type.
2781 >>> cosine_loss = tf.keras.losses.CosineSimilarity(axis=1,
2782 ... reduction=tf.keras.losses.Reduction.NONE)
2783 >>> cosine_loss(y_true, y_pred).numpy()
2784 array([-0., -0.999], dtype=float32)
2786 Usage with the `compile()` API:
2788 ```python
2789 model.compile(optimizer='sgd',
2790 loss=tf.keras.losses.CosineSimilarity(axis=1))
2791 ```
2793 Args:
2794 axis: The axis along which the cosine similarity is computed
2795 (the features axis). Defaults to -1.
2796 reduction: Type of `tf.keras.losses.Reduction` to apply to loss.
2797 Default value is `AUTO`. `AUTO` indicates that the reduction option will
2798 be determined by the usage context. For almost all cases this defaults
2799 to `SUM_OVER_BATCH_SIZE`. When used under a
2800 `tf.distribute.Strategy`, except via `Model.compile()` and
2801 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
2802 will raise an error. Please see this custom training [tutorial](
2803 https://www.tensorflow.org/tutorials/distribute/custom_training)
2804 for more details.
2805 name: Optional name for the instance.
2806 """
2808 def __init__(
2809 self,
2810 axis=-1,
2811 reduction=losses_utils.ReductionV2.AUTO,
2812 name="cosine_similarity",
2813 ):
2814 super().__init__(
2815 cosine_similarity, reduction=reduction, name=name, axis=axis
2816 )
2819# Aliases.
2821bce = BCE = binary_crossentropy
2822mse = MSE = mean_squared_error
2823mae = MAE = mean_absolute_error
2824mape = MAPE = mean_absolute_percentage_error
2825msle = MSLE = mean_squared_logarithmic_error
2826kld = KLD = kullback_leibler_divergence = kl_divergence
2827logcosh = log_cosh
2828huber_loss = huber
2831def is_categorical_crossentropy(loss):
2832 result = (
2833 isinstance(loss, CategoricalCrossentropy)
2834 or (
2835 isinstance(loss, LossFunctionWrapper)
2836 and loss.fn == categorical_crossentropy
2837 )
2838 or (
2839 hasattr(loss, "__name__")
2840 and loss.__name__ == "categorical_crossentropy"
2841 )
2842 or (loss == "categorical_crossentropy")
2843 )
2844 return result
2847@keras_export("keras.losses.serialize")
2848def serialize(loss, use_legacy_format=False):
2849 """Serializes loss function or `Loss` instance.
2851 Args:
2852 loss: A Keras `Loss` instance or a loss function.
2854 Returns:
2855 Loss configuration dictionary.
2856 """
2857 if use_legacy_format:
2858 return legacy_serialization.serialize_keras_object(loss)
2859 return serialize_keras_object(loss)
2862@keras_export("keras.losses.deserialize")
2863def deserialize(name, custom_objects=None, use_legacy_format=False):
2864 """Deserializes a serialized loss class/function instance.
2866 Args:
2867 name: Loss configuration.
2868 custom_objects: Optional dictionary mapping names (strings) to custom
2869 objects (classes and functions) to be considered during
2870 deserialization.
2872 Returns:
2873 A Keras `Loss` instance or a loss function.
2874 """
2875 if use_legacy_format:
2876 return legacy_serialization.deserialize_keras_object(
2877 name,
2878 module_objects=globals(),
2879 custom_objects=custom_objects,
2880 printable_module_name="loss function",
2881 )
2882 return deserialize_keras_object(
2883 name,
2884 module_objects=globals(),
2885 custom_objects=custom_objects,
2886 printable_module_name="loss function",
2887 )
2890@keras_export("keras.losses.get")
2891def get(identifier):
2892 """Retrieves a Keras loss as a `function`/`Loss` class instance.
2894 The `identifier` may be the string name of a loss function or `Loss` class.
2896 >>> loss = tf.keras.losses.get("categorical_crossentropy")
2897 >>> type(loss)
2898 <class 'function'>
2899 >>> loss = tf.keras.losses.get("CategoricalCrossentropy")
2900 >>> type(loss)
2901 <class '...keras.losses.CategoricalCrossentropy'>
2903 You can also specify `config` of the loss to this function by passing dict
2904 containing `class_name` and `config` as an identifier. Also note that the
2905 `class_name` must map to a `Loss` class
2907 >>> identifier = {"class_name": "CategoricalCrossentropy",
2908 ... "config": {"from_logits": True}}
2909 >>> loss = tf.keras.losses.get(identifier)
2910 >>> type(loss)
2911 <class '...keras.losses.CategoricalCrossentropy'>
2913 Args:
2914 identifier: A loss identifier. One of None or string name of a loss
2915 function/class or loss configuration dictionary or a loss function or a
2916 loss class instance.
2918 Returns:
2919 A Keras loss as a `function`/ `Loss` class instance.
2921 Raises:
2922 ValueError: If `identifier` cannot be interpreted.
2923 """
2924 if identifier is None:
2925 return None
2926 if isinstance(identifier, str):
2927 identifier = str(identifier)
2928 use_legacy_format = "module" not in identifier
2929 return deserialize(identifier, use_legacy_format=use_legacy_format)
2930 if isinstance(identifier, dict):
2931 return deserialize(identifier)
2932 if callable(identifier):
2933 return identifier
2934 raise ValueError(
2935 f"Could not interpret loss function identifier: {identifier}"
2936 )
2939LABEL_DTYPES_FOR_LOSSES = {
2940 tf.compat.v1.losses.sparse_softmax_cross_entropy: "int32",
2941 sparse_categorical_crossentropy: "int32",
2942}