Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/keras/losses.py: 47%

3# Licensed under the Apache License, Version 2.0 (the "License");

4# you may not use this file except in compliance with the License.

5# You may obtain a copy of the License at

7# http://www.apache.org/licenses/LICENSE-2.0

9# Unless required by applicable law or agreed to in writing, software

10# distributed under the License is distributed on an "AS IS" BASIS,

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12# See the License for the specific language governing permissions and

13# limitations under the License.

14# ==============================================================================

15# pylint: disable=g-classes-have-attributes

16"""Built-in loss functions."""

18import abc

19import functools

21from tensorflow.python.autograph.core import ag_ctx

22from tensorflow.python.autograph.impl import api as autograph

23from tensorflow.python.distribute import distribute_lib

24from tensorflow.python.eager import context

25from tensorflow.python.framework import constant_op

26from tensorflow.python.framework import ops

27from tensorflow.python.framework import smart_cond

28from tensorflow.python.framework import tensor_conversion

29from tensorflow.python.framework import tensor_spec

30from tensorflow.python.framework import tensor_util

31from tensorflow.python.keras import backend

32from tensorflow.python.keras.utils import losses_utils

33from tensorflow.python.keras.utils import tf_utils

34from tensorflow.python.keras.utils.generic_utils import deserialize_keras_object

35from tensorflow.python.keras.utils.generic_utils import serialize_keras_object

36from tensorflow.python.ops import array_ops

37from tensorflow.python.ops import cond

38from tensorflow.python.ops import math_ops

39from tensorflow.python.ops import nn

40from tensorflow.python.ops.losses import losses_impl

41from tensorflow.python.ops.ragged import ragged_map_ops

42from tensorflow.python.ops.ragged import ragged_tensor

43from tensorflow.python.ops.ragged import ragged_util

44from tensorflow.python.util import dispatch

45from tensorflow.python.util.tf_export import keras_export

46from tensorflow.tools.docs import doc_controls

49@keras_export('keras.losses.Loss')

50class Loss:

51 """Loss base class.

53 To be implemented by subclasses:

54 * `call()`: Contains the logic for loss calculation using `y_true`, `y_pred`.

56 Example subclass implementation:

58 ```python

59 class MeanSquaredError(Loss):

61 def call(self, y_true, y_pred):

62 y_pred = tf.convert_to_tensor_v2(y_pred)

63 y_true = tf.cast(y_true, y_pred.dtype)

64 return tf.reduce_mean(math_ops.square(y_pred - y_true), axis=-1)

65 ```

67 When used with `tf.distribute.Strategy`, outside of built-in training loops

68 such as `tf.keras` `compile` and `fit`, please use 'SUM' or 'NONE' reduction

69 types, and reduce losses explicitly in your training loop. Using 'AUTO' or

70 'SUM_OVER_BATCH_SIZE' will raise an error.

72 Please see this custom training [tutorial](

73 https://www.tensorflow.org/tutorials/distribute/custom_training) for more

74 details on this.

76 You can implement 'SUM_OVER_BATCH_SIZE' using global batch size like:

78 ```python

79 with strategy.scope():

80 loss_obj = tf.keras.losses.CategoricalCrossentropy(

81 reduction=tf.keras.losses.Reduction.NONE)

82 ....

83 loss = (tf.reduce_sum(loss_obj(labels, predictions)) *

84 (1. / global_batch_size))

85 ```

86 """

88 def __init__(self, reduction=losses_utils.ReductionV2.AUTO, name=None):

89 """Initializes `Loss` class.

91 Args:

92 reduction: Type of `tf.keras.losses.Reduction` to apply to

93 loss. Default value is `AUTO`. `AUTO` indicates that the reduction

94 option will be determined by the usage context. For almost all cases

95 this defaults to `SUM_OVER_BATCH_SIZE`. When used with

96 `tf.distribute.Strategy`, outside of built-in training loops such as

97 `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`

98 will raise an error. Please see this custom training [tutorial](

99 https://www.tensorflow.org/tutorials/distribute/custom_training) for

100 more details.

101 name: Optional name for the instance.

102 """

103 losses_utils.ReductionV2.validate(reduction)

104 self.reduction = reduction

105 self.name = name

106 # SUM_OVER_BATCH is only allowed in losses managed by `fit` or

107 # CannedEstimators.

108 self._allow_sum_over_batch_size = False

109 self._set_name_scope()

110

111 def _set_name_scope(self):

112 """Creates a valid `name_scope` name."""

113 if self.name is None:

114 self._name_scope = self.__class__.__name__

115 elif self.name == '<lambda>':

116 self._name_scope = 'lambda'

117 else:

118 # E.g. '_my_loss' => 'my_loss'

119 self._name_scope = self.name.strip('_')

120

121 def __call__(self, y_true, y_pred, sample_weight=None):

122 """Invokes the `Loss` instance.

123

124 Args:

125 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`, except

126 sparse loss functions such as sparse categorical crossentropy where

127 shape = `[batch_size, d0, .. dN-1]`

128 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`

129 sample_weight: Optional `sample_weight` acts as a coefficient for the

130 loss. If a scalar is provided, then the loss is simply scaled by the

131 given value. If `sample_weight` is a tensor of size `[batch_size]`, then

132 the total loss for each sample of the batch is rescaled by the

133 corresponding element in the `sample_weight` vector. If the shape of

134 `sample_weight` is `[batch_size, d0, .. dN-1]` (or can be broadcasted to

135 this shape), then each loss element of `y_pred` is scaled

136 by the corresponding value of `sample_weight`. (Note on`dN-1`: all loss

137 functions reduce by 1 dimension, usually axis=-1.)

138

139 Returns:

140 Weighted loss float `Tensor`. If `reduction` is `NONE`, this has

141 shape `[batch_size, d0, .. dN-1]`; otherwise, it is scalar. (Note `dN-1`

142 because all loss functions reduce by 1 dimension, usually axis=-1.)

143

144 Raises:

145 ValueError: If the shape of `sample_weight` is invalid.

146 """

147 # If we are wrapping a lambda function strip '<>' from the name as it is not

148 # accepted in scope name.

149 graph_ctx = tf_utils.graph_context_for_symbolic_tensors(

150 y_true, y_pred, sample_weight)

151 with backend.name_scope(self._name_scope), graph_ctx:

152 if context.executing_eagerly():

153 call_fn = self.call

154 else:

155 call_fn = autograph.tf_convert(self.call, ag_ctx.control_status_ctx())

156 losses = call_fn(y_true, y_pred)

157 return losses_utils.compute_weighted_loss(

158 losses, sample_weight, reduction=self._get_reduction())

159

160 @classmethod

161 def from_config(cls, config):

162 """Instantiates a `Loss` from its config (output of `get_config()`).

163

164 Args:

165 config: Output of `get_config()`.

166

167 Returns:

168 A `Loss` instance.

169 """

170 return cls(**config)

171

172 def get_config(self):

173 """Returns the config dictionary for a `Loss` instance."""

174 return {'reduction': self.reduction, 'name': self.name}

175

176 @abc.abstractmethod

177 @doc_controls.for_subclass_implementers

178 def call(self, y_true, y_pred):

179 """Invokes the `Loss` instance.

180

181 Args:

182 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`, except

183 sparse loss functions such as sparse categorical crossentropy where

184 shape = `[batch_size, d0, .. dN-1]`

185 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`

186

187 Returns:

188 Loss values with the shape `[batch_size, d0, .. dN-1]`.

189 """

190 raise NotImplementedError('Must be implemented in subclasses.')

191

192 def _get_reduction(self):

193 """Handles `AUTO` reduction cases and returns the reduction value."""

194 if (not self._allow_sum_over_batch_size and

195 distribute_lib.has_strategy() and

196 (self.reduction == losses_utils.ReductionV2.AUTO or

197 self.reduction == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE)):

198 raise ValueError(

199 'Please use `tf.keras.losses.Reduction.SUM` or '

200 '`tf.keras.losses.Reduction.NONE` for loss reduction when losses are '

201 'used with `tf.distribute.Strategy` outside of the built-in training '

202 'loops. You can implement '

203 '`tf.keras.losses.Reduction.SUM_OVER_BATCH_SIZE` using global batch '

204 'size like:\n```\nwith strategy.scope():\n'

205 ' loss_obj = tf.keras.losses.CategoricalCrossentropy('

206 'reduction=tf.keras.losses.Reduction.NONE)\n....\n'

207 ' loss = tf.reduce_sum(loss_obj(labels, predictions)) * '

208 '(1. / global_batch_size)\n```\nPlease see '

209 'https://www.tensorflow.org/tutorials/distribute/custom_training'

210 ' for more details.')

211

212 if self.reduction == losses_utils.ReductionV2.AUTO:

213 return losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE

214 return self.reduction

215

216

217class LossFunctionWrapper(Loss):

218 """Wraps a loss function in the `Loss` class."""

219

220 def __init__(self,

221 fn,

222 reduction=losses_utils.ReductionV2.AUTO,

223 name=None,

224 **kwargs):

225 """Initializes `LossFunctionWrapper` class.

226

227 Args:

228 fn: The loss function to wrap, with signature `fn(y_true, y_pred,

229 **kwargs)`.

230 reduction: Type of `tf.keras.losses.Reduction` to apply to

231 loss. Default value is `AUTO`. `AUTO` indicates that the reduction

232 option will be determined by the usage context. For almost all cases

233 this defaults to `SUM_OVER_BATCH_SIZE`. When used with

234 `tf.distribute.Strategy`, outside of built-in training loops such as

235 `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`

236 will raise an error. Please see this custom training [tutorial](

237 https://www.tensorflow.org/tutorials/distribute/custom_training) for

238 more details.

239 name: Optional name for the instance.

240 **kwargs: The keyword arguments that are passed on to `fn`.

241 """

242 super().__init__(reduction=reduction, name=name)

243 self.fn = fn

244 self._fn_kwargs = kwargs

245

246 def call(self, y_true, y_pred):

247 """Invokes the `LossFunctionWrapper` instance.

248

249 Args:

250 y_true: Ground truth values.

251 y_pred: The predicted values.

252

253 Returns:

254 Loss values per sample.

255 """

256 if tensor_util.is_tf_type(y_pred) and tensor_util.is_tf_type(y_true):

257 y_pred, y_true = losses_utils.squeeze_or_expand_dimensions(y_pred, y_true)

258

259 ag_fn = autograph.tf_convert(self.fn, ag_ctx.control_status_ctx())

260 return ag_fn(y_true, y_pred, **self._fn_kwargs)

261

262 def get_config(self):

263 config = {}

264 for k, v in self._fn_kwargs.items():

265 config[k] = backend.eval(v) if tf_utils.is_tensor_or_variable(v) else v

266 base_config = super().get_config()

267 return dict(list(base_config.items()) + list(config.items()))

268

269

270@keras_export('keras.losses.MeanSquaredError')

271class MeanSquaredError(LossFunctionWrapper):

272 """Computes the mean of squares of errors between labels and predictions.

273

274 `loss = square(y_true - y_pred)`

275

276 Standalone usage:

277

278 >>> y_true = [[0., 1.], [0., 0.]]

279 >>> y_pred = [[1., 1.], [1., 0.]]

280 >>> # Using 'auto'/'sum_over_batch_size' reduction type.

281 >>> mse = tf.keras.losses.MeanSquaredError()

282 >>> mse(y_true, y_pred).numpy()

283 0.5

284

285 >>> # Calling with 'sample_weight'.

286 >>> mse(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy()

287 0.25

288

289 >>> # Using 'sum' reduction type.

290 >>> mse = tf.keras.losses.MeanSquaredError(

291 ... reduction=tf.keras.losses.Reduction.SUM)

292 >>> mse(y_true, y_pred).numpy()

293 1.0

294

295 >>> # Using 'none' reduction type.

296 >>> mse = tf.keras.losses.MeanSquaredError(

297 ... reduction=tf.keras.losses.Reduction.NONE)

298 >>> mse(y_true, y_pred).numpy()

299 array([0.5, 0.5], dtype=float32)

300

301 Usage with the `compile()` API:

302

303 ```python

304 model.compile(optimizer='sgd', loss=tf.keras.losses.MeanSquaredError())

305 ```

306 """

307

308 def __init__(self,

309 reduction=losses_utils.ReductionV2.AUTO,

310 name='mean_squared_error'):

311 """Initializes `MeanSquaredError` instance.

312

313 Args:

314 reduction: Type of `tf.keras.losses.Reduction` to apply to

315 loss. Default value is `AUTO`. `AUTO` indicates that the reduction

316 option will be determined by the usage context. For almost all cases

317 this defaults to `SUM_OVER_BATCH_SIZE`. When used with

318 `tf.distribute.Strategy`, outside of built-in training loops such as

319 `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`

320 will raise an error. Please see this custom training [tutorial](

321 https://www.tensorflow.org/tutorials/distribute/custom_training) for

322 more details.

323 name: Optional name for the instance. Defaults to 'mean_squared_error'.

324 """

325 super().__init__(mean_squared_error, name=name, reduction=reduction)

326

327

328@keras_export('keras.losses.MeanAbsoluteError')

329class MeanAbsoluteError(LossFunctionWrapper):

330 """Computes the mean of absolute difference between labels and predictions.

331

332 `loss = abs(y_true - y_pred)`

333

334 Standalone usage:

335

336 >>> y_true = [[0., 1.], [0., 0.]]

337 >>> y_pred = [[1., 1.], [1., 0.]]

338 >>> # Using 'auto'/'sum_over_batch_size' reduction type.

339 >>> mae = tf.keras.losses.MeanAbsoluteError()

340 >>> mae(y_true, y_pred).numpy()

341 0.5

342

343 >>> # Calling with 'sample_weight'.

344 >>> mae(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy()

345 0.25

346

347 >>> # Using 'sum' reduction type.

348 >>> mae = tf.keras.losses.MeanAbsoluteError(

349 ... reduction=tf.keras.losses.Reduction.SUM)

350 >>> mae(y_true, y_pred).numpy()

351 1.0

352

353 >>> # Using 'none' reduction type.

354 >>> mae = tf.keras.losses.MeanAbsoluteError(

355 ... reduction=tf.keras.losses.Reduction.NONE)

356 >>> mae(y_true, y_pred).numpy()

357 array([0.5, 0.5], dtype=float32)

358

359 Usage with the `compile()` API:

360

361 ```python

362 model.compile(optimizer='sgd', loss=tf.keras.losses.MeanAbsoluteError())

363 ```

364 """

365

366 def __init__(self,

367 reduction=losses_utils.ReductionV2.AUTO,

368 name='mean_absolute_error'):

369 """Initializes `MeanAbsoluteError` instance.

370

371 Args:

372 reduction: Type of `tf.keras.losses.Reduction` to apply to

373 loss. Default value is `AUTO`. `AUTO` indicates that the reduction

374 option will be determined by the usage context. For almost all cases

375 this defaults to `SUM_OVER_BATCH_SIZE`. When used with

376 `tf.distribute.Strategy`, outside of built-in training loops such as

377 `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`

378 will raise an error. Please see this custom training [tutorial](

379 https://www.tensorflow.org/tutorials/distribute/custom_training) for

380 more details.

381 name: Optional name for the instance. Defaults to 'mean_absolute_error'.

382 """

383 super().__init__(mean_absolute_error, name=name, reduction=reduction)

384

385

386@keras_export('keras.losses.MeanAbsolutePercentageError')

387class MeanAbsolutePercentageError(LossFunctionWrapper):

388 """Computes the mean absolute percentage error between `y_true` and `y_pred`.

389

390 `loss = 100 * abs(y_true - y_pred) / y_true`

391

392 Standalone usage:

393

394 >>> y_true = [[2., 1.], [2., 3.]]

395 >>> y_pred = [[1., 1.], [1., 0.]]

396 >>> # Using 'auto'/'sum_over_batch_size' reduction type.

397 >>> mape = tf.keras.losses.MeanAbsolutePercentageError()

398 >>> mape(y_true, y_pred).numpy()

399 50.

400

401 >>> # Calling with 'sample_weight'.

402 >>> mape(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy()

403 20.

404

405 >>> # Using 'sum' reduction type.

406 >>> mape = tf.keras.losses.MeanAbsolutePercentageError(

407 ... reduction=tf.keras.losses.Reduction.SUM)

408 >>> mape(y_true, y_pred).numpy()

409 100.

410

411 >>> # Using 'none' reduction type.

412 >>> mape = tf.keras.losses.MeanAbsolutePercentageError(

413 ... reduction=tf.keras.losses.Reduction.NONE)

414 >>> mape(y_true, y_pred).numpy()

415 array([25., 75.], dtype=float32)

416

417 Usage with the `compile()` API:

418

419 ```python

420 model.compile(optimizer='sgd',

421 loss=tf.keras.losses.MeanAbsolutePercentageError())

422 ```

423 """

424

425 def __init__(self,

426 reduction=losses_utils.ReductionV2.AUTO,

427 name='mean_absolute_percentage_error'):

428 """Initializes `MeanAbsolutePercentageError` instance.

429

430 Args:

431 reduction: Type of `tf.keras.losses.Reduction` to apply to

432 loss. Default value is `AUTO`. `AUTO` indicates that the reduction

433 option will be determined by the usage context. For almost all cases

434 this defaults to `SUM_OVER_BATCH_SIZE`. When used with

435 `tf.distribute.Strategy`, outside of built-in training loops such as

436 `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`

437 will raise an error. Please see this custom training [tutorial](

438 https://www.tensorflow.org/tutorials/distribute/custom_training) for

439 more details.

440 name: Optional name for the instance. Defaults to

441 'mean_absolute_percentage_error'.

442 """

443 super().__init__(

444 mean_absolute_percentage_error, name=name, reduction=reduction)

445

446

447@keras_export('keras.losses.MeanSquaredLogarithmicError')

448class MeanSquaredLogarithmicError(LossFunctionWrapper):

449 """Computes the mean squared logarithmic error between `y_true` and `y_pred`.

450

451 `loss = square(log(y_true + 1.) - log(y_pred + 1.))`

452

453 Standalone usage:

454

455 >>> y_true = [[0., 1.], [0., 0.]]

456 >>> y_pred = [[1., 1.], [1., 0.]]

457 >>> # Using 'auto'/'sum_over_batch_size' reduction type.

458 >>> msle = tf.keras.losses.MeanSquaredLogarithmicError()

459 >>> msle(y_true, y_pred).numpy()

460 0.240

461

462 >>> # Calling with 'sample_weight'.

463 >>> msle(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy()

464 0.120

465

466 >>> # Using 'sum' reduction type.

467 >>> msle = tf.keras.losses.MeanSquaredLogarithmicError(

468 ... reduction=tf.keras.losses.Reduction.SUM)

469 >>> msle(y_true, y_pred).numpy()

470 0.480

471

472 >>> # Using 'none' reduction type.

473 >>> msle = tf.keras.losses.MeanSquaredLogarithmicError(

474 ... reduction=tf.keras.losses.Reduction.NONE)

475 >>> msle(y_true, y_pred).numpy()

476 array([0.240, 0.240], dtype=float32)

477

478 Usage with the `compile()` API:

479

480 ```python

481 model.compile(optimizer='sgd',

482 loss=tf.keras.losses.MeanSquaredLogarithmicError())

483 ```

484 """

485

486 def __init__(self,

487 reduction=losses_utils.ReductionV2.AUTO,

488 name='mean_squared_logarithmic_error'):

489 """Initializes `MeanSquaredLogarithmicError` instance.

490

491 Args:

492 reduction: Type of `tf.keras.losses.Reduction` to apply to

493 loss. Default value is `AUTO`. `AUTO` indicates that the reduction

494 option will be determined by the usage context. For almost all cases

495 this defaults to `SUM_OVER_BATCH_SIZE`. When used with

496 `tf.distribute.Strategy`, outside of built-in training loops such as

497 `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`

498 will raise an error. Please see this custom training [tutorial](

499 https://www.tensorflow.org/tutorials/distribute/custom_training) for

500 more details.

501 name: Optional name for the instance. Defaults to

502 'mean_squared_logarithmic_error'.

503 """

504 super().__init__(

505 mean_squared_logarithmic_error, name=name, reduction=reduction)

506

507

508@keras_export('keras.losses.BinaryCrossentropy')

509class BinaryCrossentropy(LossFunctionWrapper):

510 """Computes the cross-entropy loss between true labels and predicted labels.

511

512 Use this cross-entropy loss for binary (0 or 1) classification applications.

513 The loss function requires the following inputs:

514

515 - `y_true` (true label): This is either 0 or 1.

516 - `y_pred` (predicted value): This is the model's prediction, i.e, a single

517 floating-point value which either represents a

518 [logit](https://en.wikipedia.org/wiki/Logit), (i.e, value in [-inf, inf]

519 when `from_logits=True`) or a probability (i.e, value in [0., 1.] when

520 `from_logits=False`).

521

522 **Recommended Usage:** (set `from_logits=True`)

523

524 With `tf.keras` API:

525

526 ```python

527 model.compile(

528 loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),

529 ....

530 )

531 ```

532

533 As a standalone function:

534

535 >>> # Example 1: (batch_size = 1, number of samples = 4)

536 >>> y_true = [0, 1, 0, 0]

537 >>> y_pred = [-18.6, 0.51, 2.94, -12.8]

538 >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True)

539 >>> bce(y_true, y_pred).numpy()

540 0.865

541

542 >>> # Example 2: (batch_size = 2, number of samples = 4)

543 >>> y_true = [[0, 1], [0, 0]]

544 >>> y_pred = [[-18.6, 0.51], [2.94, -12.8]]

545 >>> # Using default 'auto'/'sum_over_batch_size' reduction type.

546 >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True)

547 >>> bce(y_true, y_pred).numpy()

548 0.865

549 >>> # Using 'sample_weight' attribute

550 >>> bce(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()

551 0.243

552 >>> # Using 'sum' reduction` type.

553 >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True,

554 ... reduction=tf.keras.losses.Reduction.SUM)

555 >>> bce(y_true, y_pred).numpy()

556 1.730

557 >>> # Using 'none' reduction type.

558 >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True,

559 ... reduction=tf.keras.losses.Reduction.NONE)

560 >>> bce(y_true, y_pred).numpy()

561 array([0.235, 1.496], dtype=float32)

562

563 **Default Usage:** (set `from_logits=False`)

564

565 >>> # Make the following updates to the above "Recommended Usage" section

566 >>> # 1. Set `from_logits=False`

567 >>> tf.keras.losses.BinaryCrossentropy() # OR ...('from_logits=False')

568 >>> # 2. Update `y_pred` to use probabilities instead of logits

569 >>> y_pred = [0.6, 0.3, 0.2, 0.8] # OR [[0.6, 0.3], [0.2, 0.8]]

570 """

571

572 def __init__(self,

573 from_logits=False,

574 label_smoothing=0,

575 axis=-1,

576 reduction=losses_utils.ReductionV2.AUTO,

577 name='binary_crossentropy'):

578 """Initializes `BinaryCrossentropy` instance.

579

580 Args:

581 from_logits: Whether to interpret `y_pred` as a tensor of

582 [logit](https://en.wikipedia.org/wiki/Logit) values. By default, we

583 assume that `y_pred` contains probabilities (i.e., values in [0, 1]).

584 label_smoothing: Float in [0, 1]. When 0, no smoothing occurs. When > 0,

585 we compute the loss between the predicted labels and a smoothed version

586 of the true labels, where the smoothing squeezes the labels towards 0.5.

587 Larger values of `label_smoothing` correspond to heavier smoothing.

588 axis: The axis along which to compute crossentropy (the features axis).

589 Defaults to -1.

590 reduction: Type of `tf.keras.losses.Reduction` to apply to

591 loss. Default value is `AUTO`. `AUTO` indicates that the reduction

592 option will be determined by the usage context. For almost all cases

593 this defaults to `SUM_OVER_BATCH_SIZE`. When used with

594 `tf.distribute.Strategy`, outside of built-in training loops such as

595 `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`

596 will raise an error. Please see this custom training [tutorial](

597 https://www.tensorflow.org/tutorials/distribute/custom_training) for

598 more details.

599 name: Name for the op. Defaults to 'binary_crossentropy'.

600 """

601 super().__init__(

602 binary_crossentropy,

603 name=name,

604 reduction=reduction,

605 from_logits=from_logits,

606 label_smoothing=label_smoothing,

607 axis=axis)

608 self.from_logits = from_logits

609

610

611@keras_export('keras.losses.CategoricalCrossentropy')

612class CategoricalCrossentropy(LossFunctionWrapper):

613 """Computes the crossentropy loss between the labels and predictions.

614

615 Use this crossentropy loss function when there are two or more label classes.

616 We expect labels to be provided in a `one_hot` representation. If you want to

617 provide labels as integers, please use `SparseCategoricalCrossentropy` loss.

618 There should be `# classes` floating point values per feature.

619

620 In the snippet below, there is `# classes` floating pointing values per

621 example. The shape of both `y_pred` and `y_true` are

622 `[batch_size, num_classes]`.

623

624 Standalone usage:

625

626 >>> y_true = [[0, 1, 0], [0, 0, 1]]

627 >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]

628 >>> # Using 'auto'/'sum_over_batch_size' reduction type.

629 >>> cce = tf.keras.losses.CategoricalCrossentropy()

630 >>> cce(y_true, y_pred).numpy()

631 1.177

632

633 >>> # Calling with 'sample_weight'.

634 >>> cce(y_true, y_pred, sample_weight=tf.constant([0.3, 0.7])).numpy()

635 0.814

636

637 >>> # Using 'sum' reduction type.

638 >>> cce = tf.keras.losses.CategoricalCrossentropy(

639 ... reduction=tf.keras.losses.Reduction.SUM)

640 >>> cce(y_true, y_pred).numpy()

641 2.354

642

643 >>> # Using 'none' reduction type.

644 >>> cce = tf.keras.losses.CategoricalCrossentropy(

645 ... reduction=tf.keras.losses.Reduction.NONE)

646 >>> cce(y_true, y_pred).numpy()

647 array([0.0513, 2.303], dtype=float32)

648

649 Usage with the `compile()` API:

650

651 ```python

652 model.compile(optimizer='sgd', loss=tf.keras.losses.CategoricalCrossentropy())

653 ```

654 """

655

656 def __init__(self,

657 from_logits=False,

658 label_smoothing=0,

659 axis=-1,

660 reduction=losses_utils.ReductionV2.AUTO,

661 name='categorical_crossentropy'):

662 """Initializes `CategoricalCrossentropy` instance.

663

664 Args:

665 from_logits: Whether `y_pred` is expected to be a logits tensor. By

666 default, we assume that `y_pred` encodes a probability distribution.

667 label_smoothing: Float in [0, 1]. When > 0, label values are smoothed,

668 meaning the confidence on label values are relaxed. For example, if

669 `0.1`, use `0.1 / num_classes` for non-target labels and

670 `0.9 + 0.1 / num_classes` for target labels.

671 axis: The axis along which to compute crossentropy (the features axis).

672 Defaults to -1.

673 reduction: Type of `tf.keras.losses.Reduction` to apply to

674 loss. Default value is `AUTO`. `AUTO` indicates that the reduction

675 option will be determined by the usage context. For almost all cases

676 this defaults to `SUM_OVER_BATCH_SIZE`. When used with

677 `tf.distribute.Strategy`, outside of built-in training loops such as

678 `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`

679 will raise an error. Please see this custom training [tutorial](

680 https://www.tensorflow.org/tutorials/distribute/custom_training) for

681 more details.

682 name: Optional name for the instance.

683 Defaults to 'categorical_crossentropy'.

684 """

685 super().__init__(

686 categorical_crossentropy,

687 name=name,

688 reduction=reduction,

689 from_logits=from_logits,

690 label_smoothing=label_smoothing,

691 axis=axis)

692

693

694@keras_export('keras.losses.SparseCategoricalCrossentropy')

695class SparseCategoricalCrossentropy(LossFunctionWrapper):

696 """Computes the crossentropy loss between the labels and predictions.

697

698 Use this crossentropy loss function when there are two or more label classes.

699 We expect labels to be provided as integers. If you want to provide labels

700 using `one-hot` representation, please use `CategoricalCrossentropy` loss.

701 There should be `# classes` floating point values per feature for `y_pred`

702 and a single floating point value per feature for `y_true`.

703

704 In the snippet below, there is a single floating point value per example for

705 `y_true` and `# classes` floating pointing values per example for `y_pred`.

706 The shape of `y_true` is `[batch_size]` and the shape of `y_pred` is

707 `[batch_size, num_classes]`.

708

709 Standalone usage:

710

711 >>> y_true = [1, 2]

712 >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]

713 >>> # Using 'auto'/'sum_over_batch_size' reduction type.

714 >>> scce = tf.keras.losses.SparseCategoricalCrossentropy()

715 >>> scce(y_true, y_pred).numpy()

716 1.177

717

718 >>> # Calling with 'sample_weight'.

719 >>> scce(y_true, y_pred, sample_weight=tf.constant([0.3, 0.7])).numpy()

720 0.814

721

722 >>> # Using 'sum' reduction type.

723 >>> scce = tf.keras.losses.SparseCategoricalCrossentropy(

724 ... reduction=tf.keras.losses.Reduction.SUM)

725 >>> scce(y_true, y_pred).numpy()

726 2.354

727

728 >>> # Using 'none' reduction type.

729 >>> scce = tf.keras.losses.SparseCategoricalCrossentropy(

730 ... reduction=tf.keras.losses.Reduction.NONE)

731 >>> scce(y_true, y_pred).numpy()

732 array([0.0513, 2.303], dtype=float32)

733

734 Usage with the `compile()` API:

735

736 ```python

737 model.compile(optimizer='sgd',

738 loss=tf.keras.losses.SparseCategoricalCrossentropy())

739 ```

740 """

741

742 def __init__(self,

743 from_logits=False,

744 reduction=losses_utils.ReductionV2.AUTO,

745 name='sparse_categorical_crossentropy'):

746 """Initializes `SparseCategoricalCrossentropy` instance.

747

748 Args:

749 from_logits: Whether `y_pred` is expected to be a logits tensor. By

750 default, we assume that `y_pred` encodes a probability distribution.

751 reduction: Type of `tf.keras.losses.Reduction` to apply to

752 loss. Default value is `AUTO`. `AUTO` indicates that the reduction

753 option will be determined by the usage context. For almost all cases

754 this defaults to `SUM_OVER_BATCH_SIZE`. When used with

755 `tf.distribute.Strategy`, outside of built-in training loops such as

756 `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`

757 will raise an error. Please see this custom training [tutorial](

758 https://www.tensorflow.org/tutorials/distribute/custom_training) for

759 more details.

760 name: Optional name for the instance. Defaults to

761 'sparse_categorical_crossentropy'.

762 """

763 super().__init__(

764 sparse_categorical_crossentropy,

765 name=name,

766 reduction=reduction,

767 from_logits=from_logits)

768

769

770@keras_export('keras.losses.Hinge')

771class Hinge(LossFunctionWrapper):

772 """Computes the hinge loss between `y_true` and `y_pred`.

773

774 `loss = maximum(1 - y_true * y_pred, 0)`

775

776 `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are

777 provided we will convert them to -1 or 1.

778

779 Standalone usage:

780

781 >>> y_true = [[0., 1.], [0., 0.]]

782 >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]

783 >>> # Using 'auto'/'sum_over_batch_size' reduction type.

784 >>> h = tf.keras.losses.Hinge()

785 >>> h(y_true, y_pred).numpy()

786 1.3

787

788 >>> # Calling with 'sample_weight'.

789 >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy()

790 0.55

791

792 >>> # Using 'sum' reduction type.

793 >>> h = tf.keras.losses.Hinge(

794 ... reduction=tf.keras.losses.Reduction.SUM)

795 >>> h(y_true, y_pred).numpy()

796 2.6

797

798 >>> # Using 'none' reduction type.

799 >>> h = tf.keras.losses.Hinge(

800 ... reduction=tf.keras.losses.Reduction.NONE)

801 >>> h(y_true, y_pred).numpy()

802 array([1.1, 1.5], dtype=float32)

803

804 Usage with the `compile()` API:

805

806 ```python

807 model.compile(optimizer='sgd', loss=tf.keras.losses.Hinge())

808 ```

809 """

810

811 def __init__(self, reduction=losses_utils.ReductionV2.AUTO, name='hinge'):

812 """Initializes `Hinge` instance.

813

814 Args:

815 reduction: Type of `tf.keras.losses.Reduction` to apply to

816 loss. Default value is `AUTO`. `AUTO` indicates that the reduction

817 option will be determined by the usage context. For almost all cases

818 this defaults to `SUM_OVER_BATCH_SIZE`. When used with

819 `tf.distribute.Strategy`, outside of built-in training loops such as

820 `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`

821 will raise an error. Please see this custom training [tutorial](

822 https://www.tensorflow.org/tutorials/distribute/custom_training) for

823 more details.

824 name: Optional name for the instance. Defaults to 'hinge'.

825 """

826 super().__init__(hinge, name=name, reduction=reduction)

827

828

829@keras_export('keras.losses.SquaredHinge')

830class SquaredHinge(LossFunctionWrapper):

831 """Computes the squared hinge loss between `y_true` and `y_pred`.

832

833 `loss = square(maximum(1 - y_true * y_pred, 0))`

834

835 `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are

836 provided we will convert them to -1 or 1.

837

838 Standalone usage:

839

840 >>> y_true = [[0., 1.], [0., 0.]]

841 >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]

842 >>> # Using 'auto'/'sum_over_batch_size' reduction type.

843 >>> h = tf.keras.losses.SquaredHinge()

844 >>> h(y_true, y_pred).numpy()

845 1.86

846

847 >>> # Calling with 'sample_weight'.

848 >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy()

849 0.73

850

851 >>> # Using 'sum' reduction type.

852 >>> h = tf.keras.losses.SquaredHinge(

853 ... reduction=tf.keras.losses.Reduction.SUM)

854 >>> h(y_true, y_pred).numpy()

855 3.72

856

857 >>> # Using 'none' reduction type.

858 >>> h = tf.keras.losses.SquaredHinge(

859 ... reduction=tf.keras.losses.Reduction.NONE)

860 >>> h(y_true, y_pred).numpy()

861 array([1.46, 2.26], dtype=float32)

862

863 Usage with the `compile()` API:

864

865 ```python

866 model.compile(optimizer='sgd', loss=tf.keras.losses.SquaredHinge())

867 ```

868 """

869

870 def __init__(self,

871 reduction=losses_utils.ReductionV2.AUTO,

872 name='squared_hinge'):

873 """Initializes `SquaredHinge` instance.

874

875 Args:

876 reduction: Type of `tf.keras.losses.Reduction` to apply to

877 loss. Default value is `AUTO`. `AUTO` indicates that the reduction

878 option will be determined by the usage context. For almost all cases

879 this defaults to `SUM_OVER_BATCH_SIZE`. When used with

880 `tf.distribute.Strategy`, outside of built-in training loops such as

881 `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`

882 will raise an error. Please see this custom training [tutorial](

883 https://www.tensorflow.org/tutorials/distribute/custom_training) for

884 more details.

885 name: Optional name for the instance. Defaults to 'squared_hinge'.

886 """

887 super().__init__(squared_hinge, name=name, reduction=reduction)

888

889

890@keras_export('keras.losses.CategoricalHinge')

891class CategoricalHinge(LossFunctionWrapper):

892 """Computes the categorical hinge loss between `y_true` and `y_pred`.

893

894 `loss = maximum(neg - pos + 1, 0)`

895 where `neg=maximum((1-y_true)*y_pred) and pos=sum(y_true*y_pred)`

896

897 Standalone usage:

898

899 >>> y_true = [[0, 1], [0, 0]]

900 >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]

901 >>> # Using 'auto'/'sum_over_batch_size' reduction type.

902 >>> h = tf.keras.losses.CategoricalHinge()

903 >>> h(y_true, y_pred).numpy()

904 1.4

905

906 >>> # Calling with 'sample_weight'.

907 >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy()

908 0.6

909

910 >>> # Using 'sum' reduction type.

911 >>> h = tf.keras.losses.CategoricalHinge(

912 ... reduction=tf.keras.losses.Reduction.SUM)

913 >>> h(y_true, y_pred).numpy()

914 2.8

915

916 >>> # Using 'none' reduction type.

917 >>> h = tf.keras.losses.CategoricalHinge(

918 ... reduction=tf.keras.losses.Reduction.NONE)

919 >>> h(y_true, y_pred).numpy()

920 array([1.2, 1.6], dtype=float32)

921

922 Usage with the `compile()` API:

923

924 ```python

925 model.compile(optimizer='sgd', loss=tf.keras.losses.CategoricalHinge())

926 ```

927 """

928

929 def __init__(self,

930 reduction=losses_utils.ReductionV2.AUTO,

931 name='categorical_hinge'):

932 """Initializes `CategoricalHinge` instance.

933

934 Args:

935 reduction: Type of `tf.keras.losses.Reduction` to apply to

936 loss. Default value is `AUTO`. `AUTO` indicates that the reduction

937 option will be determined by the usage context. For almost all cases

938 this defaults to `SUM_OVER_BATCH_SIZE`. When used with

939 `tf.distribute.Strategy`, outside of built-in training loops such as

940 `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`

941 will raise an error. Please see this custom training [tutorial](

942 https://www.tensorflow.org/tutorials/distribute/custom_training) for

943 more details.

944 name: Optional name for the instance. Defaults to 'categorical_hinge'.

945 """

946 super().__init__(categorical_hinge, name=name, reduction=reduction)

947

948

949@keras_export('keras.losses.Poisson')

950class Poisson(LossFunctionWrapper):

951 """Computes the Poisson loss between `y_true` and `y_pred`.

952

953 `loss = y_pred - y_true * log(y_pred)`

954

955 Standalone usage:

956

957 >>> y_true = [[0., 1.], [0., 0.]]

958 >>> y_pred = [[1., 1.], [0., 0.]]

959 >>> # Using 'auto'/'sum_over_batch_size' reduction type.

960 >>> p = tf.keras.losses.Poisson()

961 >>> p(y_true, y_pred).numpy()

962 0.5

963

964 >>> # Calling with 'sample_weight'.

965 >>> p(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()

966 0.4

967

968 >>> # Using 'sum' reduction type.

969 >>> p = tf.keras.losses.Poisson(

970 ... reduction=tf.keras.losses.Reduction.SUM)

971 >>> p(y_true, y_pred).numpy()

972 0.999

973

974 >>> # Using 'none' reduction type.

975 >>> p = tf.keras.losses.Poisson(

976 ... reduction=tf.keras.losses.Reduction.NONE)

977 >>> p(y_true, y_pred).numpy()

978 array([0.999, 0.], dtype=float32)

979

980 Usage with the `compile()` API:

981

982 ```python

983 model.compile(optimizer='sgd', loss=tf.keras.losses.Poisson())

984 ```

985 """

986

987 def __init__(self, reduction=losses_utils.ReductionV2.AUTO, name='poisson'):

988 """Initializes `Poisson` instance.

989

990 Args:

991 reduction: Type of `tf.keras.losses.Reduction` to apply to

992 loss. Default value is `AUTO`. `AUTO` indicates that the reduction

993 option will be determined by the usage context. For almost all cases

994 this defaults to `SUM_OVER_BATCH_SIZE`. When used with

995 `tf.distribute.Strategy`, outside of built-in training loops such as

996 `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`

997 will raise an error. Please see this custom training [tutorial](

998 https://www.tensorflow.org/tutorials/distribute/custom_training) for

999 more details.

1000 name: Optional name for the instance. Defaults to 'poisson'.

1001 """

1002 super().__init__(poisson, name=name, reduction=reduction)

1003

1004

1005@keras_export('keras.losses.LogCosh')

1006class LogCosh(LossFunctionWrapper):

1007 """Computes the logarithm of the hyperbolic cosine of the prediction error.

1008

1009 `logcosh = log((exp(x) + exp(-x))/2)`,

1010 where x is the error `y_pred - y_true`.

1011

1012 Standalone usage:

1013

1014 >>> y_true = [[0., 1.], [0., 0.]]

1015 >>> y_pred = [[1., 1.], [0., 0.]]

1016 >>> # Using 'auto'/'sum_over_batch_size' reduction type.

1017 >>> l = tf.keras.losses.LogCosh()

1018 >>> l(y_true, y_pred).numpy()

1019 0.108

1020

1021 >>> # Calling with 'sample_weight'.

1022 >>> l(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()

1023 0.087

1024

1025 >>> # Using 'sum' reduction type.

1026 >>> l = tf.keras.losses.LogCosh(

1027 ... reduction=tf.keras.losses.Reduction.SUM)

1028 >>> l(y_true, y_pred).numpy()

1029 0.217

1030

1031 >>> # Using 'none' reduction type.

1032 >>> l = tf.keras.losses.LogCosh(

1033 ... reduction=tf.keras.losses.Reduction.NONE)

1034 >>> l(y_true, y_pred).numpy()

1035 array([0.217, 0.], dtype=float32)

1036

1037 Usage with the `compile()` API:

1038

1039 ```python

1040 model.compile(optimizer='sgd', loss=tf.keras.losses.LogCosh())

1041 ```

1042 """

1043

1044 def __init__(self, reduction=losses_utils.ReductionV2.AUTO, name='log_cosh'):

1045 """Initializes `LogCosh` instance.

1046

1047 Args:

1048 reduction: Type of `tf.keras.losses.Reduction` to apply to

1049 loss. Default value is `AUTO`. `AUTO` indicates that the reduction

1050 option will be determined by the usage context. For almost all cases

1051 this defaults to `SUM_OVER_BATCH_SIZE`. When used with

1052 `tf.distribute.Strategy`, outside of built-in training loops such as

1053 `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`

1054 will raise an error. Please see this custom training [tutorial](

1055 https://www.tensorflow.org/tutorials/distribute/custom_training) for

1056 more details.

1057 name: Optional name for the instance. Defaults to 'log_cosh'.

1058 """

1059 super().__init__(log_cosh, name=name, reduction=reduction)

1060

1061

1062@keras_export('keras.losses.KLDivergence')

1063class KLDivergence(LossFunctionWrapper):

1064 """Computes Kullback-Leibler divergence loss between `y_true` and `y_pred`.

1065

1066 `loss = y_true * log(y_true / y_pred)`

1067

1068 See: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence

1069

1070 Standalone usage:

1071

1072 >>> y_true = [[0, 1], [0, 0]]

1073 >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]

1074 >>> # Using 'auto'/'sum_over_batch_size' reduction type.

1075 >>> kl = tf.keras.losses.KLDivergence()

1076 >>> kl(y_true, y_pred).numpy()

1077 0.458

1078

1079 >>> # Calling with 'sample_weight'.

1080 >>> kl(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()

1081 0.366

1082

1083 >>> # Using 'sum' reduction type.

1084 >>> kl = tf.keras.losses.KLDivergence(

1085 ... reduction=tf.keras.losses.Reduction.SUM)

1086 >>> kl(y_true, y_pred).numpy()

1087 0.916

1088

1089 >>> # Using 'none' reduction type.

1090 >>> kl = tf.keras.losses.KLDivergence(

1091 ... reduction=tf.keras.losses.Reduction.NONE)

1092 >>> kl(y_true, y_pred).numpy()

1093 array([0.916, -3.08e-06], dtype=float32)

1094

1095 Usage with the `compile()` API:

1096

1097 ```python

1098 model.compile(optimizer='sgd', loss=tf.keras.losses.KLDivergence())

1099 ```

1100 """

1101

1102 def __init__(self,

1103 reduction=losses_utils.ReductionV2.AUTO,

1104 name='kl_divergence'):

1105 """Initializes `KLDivergence` instance.

1106

1107 Args:

1108 reduction: Type of `tf.keras.losses.Reduction` to apply to

1109 loss. Default value is `AUTO`. `AUTO` indicates that the reduction

1110 option will be determined by the usage context. For almost all cases

1111 this defaults to `SUM_OVER_BATCH_SIZE`. When used with

1112 `tf.distribute.Strategy`, outside of built-in training loops such as

1113 `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`

1114 will raise an error. Please see this custom training [tutorial](

1115 https://www.tensorflow.org/tutorials/distribute/custom_training) for

1116 more details.

1117 name: Optional name for the instance. Defaults to 'kl_divergence'.

1118 """

1119 super().__init__(kl_divergence, name=name, reduction=reduction)

1120

1121

1122@keras_export('keras.losses.Huber')

1123class Huber(LossFunctionWrapper):

1124 """Computes the Huber loss between `y_true` and `y_pred`.

1125

1126 For each value x in `error = y_true - y_pred`:

1127

1128 ```

1129 loss = 0.5 * x^2 if |x| <= d

1130 loss = 0.5 * d^2 + d * (|x| - d) if |x| > d

1131 ```

1132 where d is `delta`. See: https://en.wikipedia.org/wiki/Huber_loss

1133

1134 Standalone usage:

1135

1136 >>> y_true = [[0, 1], [0, 0]]

1137 >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]

1138 >>> # Using 'auto'/'sum_over_batch_size' reduction type.

1139 >>> h = tf.keras.losses.Huber()

1140 >>> h(y_true, y_pred).numpy()

1141 0.155

1142

1143 >>> # Calling with 'sample_weight'.

1144 >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy()

1145 0.09

1146

1147 >>> # Using 'sum' reduction type.

1148 >>> h = tf.keras.losses.Huber(

1149 ... reduction=tf.keras.losses.Reduction.SUM)

1150 >>> h(y_true, y_pred).numpy()

1151 0.31

1152

1153 >>> # Using 'none' reduction type.

1154 >>> h = tf.keras.losses.Huber(

1155 ... reduction=tf.keras.losses.Reduction.NONE)

1156 >>> h(y_true, y_pred).numpy()

1157 array([0.18, 0.13], dtype=float32)

1158

1159 Usage with the `compile()` API:

1160

1161 ```python

1162 model.compile(optimizer='sgd', loss=tf.keras.losses.Huber())

1163 ```

1164 """

1165

1166 def __init__(self,

1167 delta=1.0,

1168 reduction=losses_utils.ReductionV2.AUTO,

1169 name='huber_loss'):

1170 """Initializes `Huber` instance.

1171

1172 Args:

1173 delta: A float, the point where the Huber loss function changes from a

1174 quadratic to linear.

1175 reduction: Type of `tf.keras.losses.Reduction` to apply to

1176 loss. Default value is `AUTO`. `AUTO` indicates that the reduction

1177 option will be determined by the usage context. For almost all cases

1178 this defaults to `SUM_OVER_BATCH_SIZE`. When used with

1179 `tf.distribute.Strategy`, outside of built-in training loops such as

1180 `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`

1181 will raise an error. Please see this custom training [tutorial](

1182 https://www.tensorflow.org/tutorials/distribute/custom_training) for

1183 more details.

1184 name: Optional name for the instance. Defaults to 'huber_loss'.

1185 """

1186 super().__init__(huber, name=name, reduction=reduction, delta=delta)

1187

1188

1189@keras_export('keras.metrics.mean_squared_error', 'keras.metrics.mse',

1190 'keras.metrics.MSE', 'keras.losses.mean_squared_error',

1191 'keras.losses.mse', 'keras.losses.MSE')

1192@dispatch.add_dispatch_support

1193def mean_squared_error(y_true, y_pred):

1194 """Computes the mean squared error between labels and predictions.

1195

1196 After computing the squared distance between the inputs, the mean value over

1197 the last dimension is returned.

1198

1199 `loss = mean(square(y_true - y_pred), axis=-1)`

1200

1201 Standalone usage:

1202

1203 >>> y_true = np.random.randint(0, 2, size=(2, 3))

1204 >>> y_pred = np.random.random(size=(2, 3))

1205 >>> loss = tf.keras.losses.mean_squared_error(y_true, y_pred)

1206 >>> assert loss.shape == (2,)

1207 >>> assert np.array_equal(

1208 ... loss.numpy(), np.mean(np.square(y_true - y_pred), axis=-1))

1209

1210 Args:

1211 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.

1212 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

1213

1214 Returns:

1215 Mean squared error values. shape = `[batch_size, d0, .. dN-1]`.

1216 """

1217 y_pred = tensor_conversion.convert_to_tensor_v2_with_dispatch(y_pred)

1218 y_true = math_ops.cast(y_true, y_pred.dtype)

1219 return backend.mean(math_ops.squared_difference(y_pred, y_true), axis=-1)

1220

1221

1222def _ragged_tensor_apply_loss(loss_fn, y_true, y_pred, y_pred_extra_dim=False):

1223 """Apply a loss function on a per batch basis.

1224

1225 Args:

1226 loss_fn: The loss function

1227 y_true: truth values (RaggedTensor)

1228 y_pred: predicted values (RaggedTensor)

1229 y_pred_extra_dim: whether y_pred has an additional dimension compared to

1230 y_true

1231

1232 Returns:

1233 Loss-function result. A dense tensor if the output has a single dimension

1234 (per-batch loss value); a ragged tensor otherwise.

1235 """

1236

1237 def rt_is_equiv_dense(rt):

1238 """Returns true if this RaggedTensor has the same row_lenghts across

1239

1240 all ragged dimensions and thus can be converted to a dense tensor

1241 without loss of information.

1242

1243 Args:

1244 rt: RaggedTensor.

1245 """

1246 return math_ops.reduce_all([

1247 math_ops.equal(

1248 math_ops.reduce_variance(math_ops.cast(row_lens, backend.floatx())),

1249 constant_op.constant([0.])) for row_lens in rt.nested_row_lengths()

1250 ])

1251

1252 def _convert_to_dense(inputs):

1253 return tuple(

1254 rt.to_tensor() if isinstance(rt, ragged_tensor.RaggedTensor) else rt

1255 for rt in inputs)

1256

1257 def _call_loss(inputs, ragged_output):

1258 """ Adapt the result to ragged or dense tensor according to the expected

1259

1260 output type. This is done so that all the return values of the map

1261 operation have the same type.

1262 """

1263 r = loss_fn(*inputs)

1264 if ragged_output and not isinstance(r, ragged_tensor.RaggedTensor):

1265 r = ragged_tensor.RaggedTensor.from_tensor(r)

1266 elif not ragged_output and isinstance(r, ragged_tensor.RaggedTensor):

1267 r = r.to_tensor()

1268 return r

1269

1270 def _wrapper(inputs, ragged_output):

1271 _, y_pred = inputs

1272 if isinstance(y_pred, ragged_tensor.RaggedTensor):

1273 return cond.cond(

1274 rt_is_equiv_dense(y_pred),

1275 lambda: _call_loss(_convert_to_dense(inputs), ragged_output),

1276 lambda: _call_loss(inputs, ragged_output))

1277

1278 return loss_fn(*inputs)

1279

1280 if not isinstance(y_true, ragged_tensor.RaggedTensor):

1281 return loss_fn(y_true, y_pred.to_tensor())

1282

1283 lshape = y_pred.shape.as_list()[1:-1]

1284 if len(lshape) > 0:

1285 spec = ragged_tensor.RaggedTensorSpec(shape=lshape, dtype=y_pred.dtype)

1286 else:

1287 spec = tensor_spec.TensorSpec(shape=[], dtype=y_pred.dtype)

1288

1289 nested_splits_list = [rt.nested_row_splits for rt in (y_true, y_pred)]

1290 if y_pred_extra_dim:

1291 # The last dimension of a categorical prediction may be ragged or not.

1292 rdims = [len(slist) for slist in nested_splits_list]

1293 if rdims[0] == rdims[1] - 1:

1294 nested_splits_list[1] = nested_splits_list[1][:-1]

1295

1296 map_fn = functools.partial(_wrapper, ragged_output=len(lshape) > 1)

1297

1298 assertion_list = ragged_util.assert_splits_match(nested_splits_list)

1299 with ops.control_dependencies(assertion_list):

1300 return ragged_map_ops.map_fn(map_fn, elems=(y_true, y_pred), dtype=spec)

1301

1302

1303@dispatch.dispatch_for_types(mean_squared_error, ragged_tensor.RaggedTensor)

1304def _ragged_tensor_mse(y_true, y_pred):

1305 """Implements support for handling RaggedTensors.

1306

1307 Args:

1308 y_true: RaggedTensor truth values. shape = `[batch_size, d0, .. dN]`.

1309 y_pred: RaggedTensor predicted values. shape = `[batch_size, d0, .. dN]`.

1310

1311 Returns:

1312 Mean squared error values. shape = `[batch_size, d0, .. dN-1]`.

1313 When the number of dimensions of the batch feature vector [d0, .. dN] is

1314 greater than one the return value is a RaggedTensor. Otherwise a Dense

1315 tensor with dimensions [batch_size] is returned.

1316 """

1317 return _ragged_tensor_apply_loss(mean_squared_error, y_true, y_pred)

1318

1319

1320@keras_export('keras.metrics.mean_absolute_error', 'keras.metrics.mae',

1321 'keras.metrics.MAE', 'keras.losses.mean_absolute_error',

1322 'keras.losses.mae', 'keras.losses.MAE')

1323@dispatch.add_dispatch_support

1324def mean_absolute_error(y_true, y_pred):

1325 """Computes the mean absolute error between labels and predictions.

1326

1327 `loss = mean(abs(y_true - y_pred), axis=-1)`

1328

1329 Standalone usage:

1330

1331 >>> y_true = np.random.randint(0, 2, size=(2, 3))

1332 >>> y_pred = np.random.random(size=(2, 3))

1333 >>> loss = tf.keras.losses.mean_absolute_error(y_true, y_pred)

1334 >>> assert loss.shape == (2,)

1335 >>> assert np.array_equal(

1336 ... loss.numpy(), np.mean(np.abs(y_true - y_pred), axis=-1))

1337

1338 Args:

1339 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.

1340 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

1341

1342 Returns:

1343 Mean absolute error values. shape = `[batch_size, d0, .. dN-1]`.

1344 """

1345 y_pred = tensor_conversion.convert_to_tensor_v2_with_dispatch(y_pred)

1346 y_true = math_ops.cast(y_true, y_pred.dtype)

1347 return backend.mean(math_ops.abs(y_pred - y_true), axis=-1)

1348

1349

1350@dispatch.dispatch_for_types(mean_absolute_error, ragged_tensor.RaggedTensor)

1351def _ragged_tensor_mae(y_true, y_pred):

1352 """RaggedTensor adapter for mean_absolute_error."""

1353 return _ragged_tensor_apply_loss(mean_absolute_error, y_true, y_pred)

1354

1355

1356@keras_export('keras.metrics.mean_absolute_percentage_error',

1357 'keras.metrics.mape', 'keras.metrics.MAPE',

1358 'keras.losses.mean_absolute_percentage_error',

1359 'keras.losses.mape', 'keras.losses.MAPE')

1360@dispatch.add_dispatch_support

1361def mean_absolute_percentage_error(y_true, y_pred):

1362 """Computes the mean absolute percentage error between `y_true` and `y_pred`.

1363

1364 `loss = 100 * mean(abs((y_true - y_pred) / y_true), axis=-1)`

1365

1366 Standalone usage:

1367

1368 >>> y_true = np.random.random(size=(2, 3))

1369 >>> y_true = np.maximum(y_true, 1e-7) # Prevent division by zero

1370 >>> y_pred = np.random.random(size=(2, 3))

1371 >>> loss = tf.keras.losses.mean_absolute_percentage_error(y_true, y_pred)

1372 >>> assert loss.shape == (2,)

1373 >>> assert np.array_equal(

1374 ... loss.numpy(),

1375 ... 100. * np.mean(np.abs((y_true - y_pred) / y_true), axis=-1))

1376

1377 Args:

1378 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.

1379 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

1380

1381 Returns:

1382 Mean absolute percentage error values. shape = `[batch_size, d0, .. dN-1]`.

1383 """

1384 y_pred = tensor_conversion.convert_to_tensor_v2_with_dispatch(y_pred)

1385 y_true = math_ops.cast(y_true, y_pred.dtype)

1386 diff = math_ops.abs(

1387 (y_true - y_pred) / backend.maximum(math_ops.abs(y_true),

1388 backend.epsilon()))

1389 return 100. * backend.mean(diff, axis=-1)

1390

1391

1392@dispatch.dispatch_for_types(mean_absolute_percentage_error,

1393 ragged_tensor.RaggedTensor)

1394def _ragged_tensor_mape(y_true, y_pred):

1395 """Support RaggedTensors."""

1396 return _ragged_tensor_apply_loss(mean_absolute_percentage_error, y_true,

1397 y_pred)

1398

1399

1400@keras_export('keras.metrics.mean_squared_logarithmic_error',

1401 'keras.metrics.msle', 'keras.metrics.MSLE',

1402 'keras.losses.mean_squared_logarithmic_error',

1403 'keras.losses.msle', 'keras.losses.MSLE')

1404@dispatch.add_dispatch_support

1405def mean_squared_logarithmic_error(y_true, y_pred):

1406 """Computes the mean squared logarithmic error between `y_true` and `y_pred`.

1407

1408 `loss = mean(square(log(y_true + 1) - log(y_pred + 1)), axis=-1)`

1409

1410 Standalone usage:

1411

1412 >>> y_true = np.random.randint(0, 2, size=(2, 3))

1413 >>> y_pred = np.random.random(size=(2, 3))

1414 >>> loss = tf.keras.losses.mean_squared_logarithmic_error(y_true, y_pred)

1415 >>> assert loss.shape == (2,)

1416 >>> y_true = np.maximum(y_true, 1e-7)

1417 >>> y_pred = np.maximum(y_pred, 1e-7)

1418 >>> assert np.allclose(

1419 ... loss.numpy(),

1420 ... np.mean(

1421 ... np.square(np.log(y_true + 1.) - np.log(y_pred + 1.)), axis=-1))

1422

1423 Args:

1424 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.

1425 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

1426

1427 Returns:

1428 Mean squared logarithmic error values. shape = `[batch_size, d0, .. dN-1]`.

1429 """

1430 y_pred = tensor_conversion.convert_to_tensor_v2_with_dispatch(y_pred)

1431 y_true = math_ops.cast(y_true, y_pred.dtype)

1432 first_log = math_ops.log(backend.maximum(y_pred, backend.epsilon()) + 1.)

1433 second_log = math_ops.log(backend.maximum(y_true, backend.epsilon()) + 1.)

1434 return backend.mean(

1435 math_ops.squared_difference(first_log, second_log), axis=-1)

1436

1437

1438@dispatch.dispatch_for_types(mean_squared_logarithmic_error,

1439 ragged_tensor.RaggedTensor)

1440def _ragged_tensor_msle(y_true, y_pred):

1441 """Implements support for handling RaggedTensors."""

1442 return _ragged_tensor_apply_loss(mean_squared_logarithmic_error, y_true,

1443 y_pred)

1444

1445

1446def _maybe_convert_labels(y_true):

1447 """Converts binary labels into -1/1."""

1448 are_zeros = math_ops.equal(y_true, 0)

1449 are_ones = math_ops.equal(y_true, 1)

1450 is_binary = math_ops.reduce_all(math_ops.logical_or(are_zeros, are_ones))

1451

1452 def _convert_binary_labels():

1453 # Convert the binary labels to -1 or 1.

1454 return 2. * y_true - 1.

1455

1456 updated_y_true = smart_cond.smart_cond(is_binary, _convert_binary_labels,

1457 lambda: y_true)

1458 return updated_y_true

1459

1460

1461@keras_export('keras.metrics.squared_hinge', 'keras.losses.squared_hinge')

1462@dispatch.add_dispatch_support

1463def squared_hinge(y_true, y_pred):

1464 """Computes the squared hinge loss between `y_true` and `y_pred`.

1465

1466 `loss = mean(square(maximum(1 - y_true * y_pred, 0)), axis=-1)`

1467

1468 Standalone usage:

1469

1470 >>> y_true = np.random.choice([-1, 1], size=(2, 3))

1471 >>> y_pred = np.random.random(size=(2, 3))

1472 >>> loss = tf.keras.losses.squared_hinge(y_true, y_pred)

1473 >>> assert loss.shape == (2,)

1474 >>> assert np.array_equal(

1475 ... loss.numpy(),

1476 ... np.mean(np.square(np.maximum(1. - y_true * y_pred, 0.)), axis=-1))

1477

1478 Args:

1479 y_true: The ground truth values. `y_true` values are expected to be -1 or 1.

1480 If binary (0 or 1) labels are provided we will convert them to -1 or 1.

1481 shape = `[batch_size, d0, .. dN]`.

1482 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

1483

1484 Returns:

1485 Squared hinge loss values. shape = `[batch_size, d0, .. dN-1]`.

1486 """

1487 y_pred = tensor_conversion.convert_to_tensor_v2_with_dispatch(y_pred)

1488 y_true = math_ops.cast(y_true, y_pred.dtype)

1489 y_true = _maybe_convert_labels(y_true)

1490 return backend.mean(

1491 math_ops.square(math_ops.maximum(1. - y_true * y_pred, 0.)), axis=-1)

1492

1493

1494@keras_export('keras.metrics.hinge', 'keras.losses.hinge')

1495@dispatch.add_dispatch_support

1496def hinge(y_true, y_pred):

1497 """Computes the hinge loss between `y_true` and `y_pred`.

1498

1499 `loss = mean(maximum(1 - y_true * y_pred, 0), axis=-1)`

1500

1501 Standalone usage:

1502

1503 >>> y_true = np.random.choice([-1, 1], size=(2, 3))

1504 >>> y_pred = np.random.random(size=(2, 3))

1505 >>> loss = tf.keras.losses.hinge(y_true, y_pred)

1506 >>> assert loss.shape == (2,)

1507 >>> assert np.array_equal(

1508 ... loss.numpy(),

1509 ... np.mean(np.maximum(1. - y_true * y_pred, 0.), axis=-1))

1510

1511 Args:

1512 y_true: The ground truth values. `y_true` values are expected to be -1 or 1.

1513 If binary (0 or 1) labels are provided they will be converted to -1 or 1.

1514 shape = `[batch_size, d0, .. dN]`.

1515 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

1516

1517 Returns:

1518 Hinge loss values. shape = `[batch_size, d0, .. dN-1]`.

1519 """

1520 y_pred = tensor_conversion.convert_to_tensor_v2_with_dispatch(y_pred)

1521 y_true = math_ops.cast(y_true, y_pred.dtype)

1522 y_true = _maybe_convert_labels(y_true)

1523 return backend.mean(math_ops.maximum(1. - y_true * y_pred, 0.), axis=-1)

1524

1525

1526@keras_export('keras.losses.categorical_hinge')

1527@dispatch.add_dispatch_support

1528def categorical_hinge(y_true, y_pred):

1529 """Computes the categorical hinge loss between `y_true` and `y_pred`.

1530

1531 `loss = maximum(neg - pos + 1, 0)`

1532 where `neg=maximum((1-y_true)*y_pred) and pos=sum(y_true*y_pred)`

1533

1534 Standalone usage:

1535

1536 >>> y_true = np.random.randint(0, 3, size=(2,))

1537 >>> y_true = tf.keras.utils.to_categorical(y_true, num_classes=3)

1538 >>> y_pred = np.random.random(size=(2, 3))

1539 >>> loss = tf.keras.losses.categorical_hinge(y_true, y_pred)

1540 >>> assert loss.shape == (2,)

1541 >>> pos = np.sum(y_true * y_pred, axis=-1)

1542 >>> neg = np.amax((1. - y_true) * y_pred, axis=-1)

1543 >>> assert np.array_equal(loss.numpy(), np.maximum(0., neg - pos + 1.))

1544

1545 Args:

1546 y_true: The ground truth values. `y_true` values are expected to be

1547 either `{-1, +1}` or `{0, 1}` (i.e. a one-hot-encoded tensor).

1548 y_pred: The predicted values.

1549

1550 Returns:

1551 Categorical hinge loss values.

1552 """

1553 y_pred = tensor_conversion.convert_to_tensor_v2_with_dispatch(y_pred)

1554 y_true = math_ops.cast(y_true, y_pred.dtype)

1555 pos = math_ops.reduce_sum(y_true * y_pred, axis=-1)

1556 neg = math_ops.reduce_max((1. - y_true) * y_pred, axis=-1)

1557 zero = math_ops.cast(0., y_pred.dtype)

1558 return math_ops.maximum(neg - pos + 1., zero)

1559

1560

1561@keras_export('keras.losses.huber', v1=[])

1562@dispatch.add_dispatch_support

1563def huber(y_true, y_pred, delta=1.0):

1564 """Computes Huber loss value.

1565

1566 For each value x in `error = y_true - y_pred`:

1567

1568 ```

1569 loss = 0.5 * x^2 if |x| <= d

1570 loss = d * |x| - 0.5 * d^2 if |x| > d

1571 ```

1572 where d is `delta`. See: https://en.wikipedia.org/wiki/Huber_loss

1573

1574 Args:

1575 y_true: tensor of true targets.

1576 y_pred: tensor of predicted targets.

1577 delta: A float, the point where the Huber loss function changes from a

1578 quadratic to linear.

1579

1580 Returns:

1581 Tensor with one scalar loss entry per sample.

1582 """

1583 y_pred = math_ops.cast(y_pred, dtype=backend.floatx())

1584 y_true = math_ops.cast(y_true, dtype=backend.floatx())

1585 delta = math_ops.cast(delta, dtype=backend.floatx())

1586 error = math_ops.subtract(y_pred, y_true)

1587 abs_error = math_ops.abs(error)

1588 half = tensor_conversion.convert_to_tensor_v2_with_dispatch(

1589 0.5, dtype=abs_error.dtype

1590 )

1591 return backend.mean(

1592 array_ops.where_v2(abs_error <= delta, half * math_ops.square(error),

1593 delta * abs_error - half * math_ops.square(delta)),

1594 axis=-1)

1595

1596

1597@keras_export('keras.losses.log_cosh', 'keras.losses.logcosh',

1598 'keras.metrics.log_cosh', 'keras.metrics.logcosh')

1599@dispatch.add_dispatch_support

1600def log_cosh(y_true, y_pred):

1601 """Logarithm of the hyperbolic cosine of the prediction error.

1602

1603 `log(cosh(x))` is approximately equal to `(x ** 2) / 2` for small `x` and

1604 to `abs(x) - log(2)` for large `x`. This means that 'logcosh' works mostly

1605 like the mean squared error, but will not be so strongly affected by the

1606 occasional wildly incorrect prediction.

1607

1608 Standalone usage:

1609

1610 >>> y_true = np.random.random(size=(2, 3))

1611 >>> y_pred = np.random.random(size=(2, 3))

1612 >>> loss = tf.keras.losses.logcosh(y_true, y_pred)

1613 >>> assert loss.shape == (2,)

1614 >>> x = y_pred - y_true

1615 >>> assert np.allclose(

1616 ... loss.numpy(),

1617 ... np.mean(x + np.log(np.exp(-2. * x) + 1.) - math_ops.log(2.), axis=-1),

1618 ... atol=1e-5)

1619

1620 Args:

1621 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.

1622 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

1623

1624 Returns:

1625 Logcosh error values. shape = `[batch_size, d0, .. dN-1]`.

1626 """

1627 y_pred = tensor_conversion.convert_to_tensor_v2_with_dispatch(y_pred)

1628 y_true = math_ops.cast(y_true, y_pred.dtype)

1629

1630 def _logcosh(x):

1631 return x + math_ops.softplus(-2. * x) - math_ops.cast(

1632 math_ops.log(2.), x.dtype)

1633

1634 return backend.mean(_logcosh(y_pred - y_true), axis=-1)

1635

1636

1637@keras_export('keras.metrics.categorical_crossentropy',

1638 'keras.losses.categorical_crossentropy')

1639@dispatch.add_dispatch_support

1640def categorical_crossentropy(y_true,

1641 y_pred,

1642 from_logits=False,

1643 label_smoothing=0,

1644 axis=-1):

1645 """Computes the categorical crossentropy loss.

1646

1647 Standalone usage:

1648

1649 >>> y_true = [[0, 1, 0], [0, 0, 1]]

1650 >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]

1651 >>> loss = tf.keras.losses.categorical_crossentropy(y_true, y_pred)

1652 >>> assert loss.shape == (2,)

1653 >>> loss.numpy()

1654 array([0.0513, 2.303], dtype=float32)

1655

1656 Args:

1657 y_true: Tensor of one-hot true targets.

1658 y_pred: Tensor of predicted targets.

1659 from_logits: Whether `y_pred` is expected to be a logits tensor. By default,

1660 we assume that `y_pred` encodes a probability distribution.

1661 label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For

1662 example, if `0.1`, use `0.1 / num_classes` for non-target labels

1663 and `0.9 + 0.1 / num_classes` for target labels.

1664 axis: Defaults to -1. The dimension along which the entropy is

1665 computed.

1666

1667 Returns:

1668 Categorical crossentropy loss value.

1669 """

1670 y_pred = tensor_conversion.convert_to_tensor_v2_with_dispatch(y_pred)

1671 y_true = math_ops.cast(y_true, y_pred.dtype)

1672 label_smoothing = tensor_conversion.convert_to_tensor_v2_with_dispatch(

1673 label_smoothing, dtype=backend.floatx()

1674 )

1675

1676 def _smooth_labels():

1677 num_classes = math_ops.cast(array_ops.shape(y_true)[-1], y_pred.dtype)

1678 return y_true * (1.0 - label_smoothing) + (label_smoothing / num_classes)

1679

1680 y_true = smart_cond.smart_cond(label_smoothing, _smooth_labels,

1681 lambda: y_true)

1682

1683 return backend.categorical_crossentropy(

1684 y_true, y_pred, from_logits=from_logits, axis=axis)

1685

1686

1687@dispatch.dispatch_for_types(categorical_crossentropy,

1688 ragged_tensor.RaggedTensor)

1689def _ragged_tensor_categorical_crossentropy(y_true,

1690 y_pred,

1691 from_logits=False,

1692 label_smoothing=0,

1693 axis=-1):

1694 """Implements support for handling RaggedTensors.

1695

1696 Args:

1697 y_true: Tensor of one-hot true targets.

1698 y_pred: Tensor of predicted targets.

1699 from_logits: Whether `y_pred` is expected to be a logits tensor. By default,

1700 we assume that `y_pred` encodes a probability distribution.

1701 label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For

1702 example, if `0.1`, use `0.1 / num_classes` for non-target labels

1703 and `0.9 + 0.1 / num_classes` for target labels.

1704 axis: The axis along which to compute crossentropy (the features axis).

1705 Defaults to -1.

1706

1707 Returns:

1708 Categorical crossentropy loss value.

1709

1710 Expected shape: (batch, sequence_len, n_classes) with sequence_len

1711 being variable per batch.

1712 Return shape: (batch, sequence_len).

1713

1714 When used by CategoricalCrossentropy() with the default reduction

1715 (SUM_OVER_BATCH_SIZE), the reduction averages the loss over the

1716 number of elements independent of the batch. E.g. if the RaggedTensor

1717 has 2 batches with [2, 1] values respectivly the resulting loss is

1718 the sum of the individual loss values divided by 3.

1719 """

1720 fn = functools.partial(

1721 categorical_crossentropy,

1722 from_logits=from_logits,

1723 label_smoothing=label_smoothing,

1724 axis=axis)

1725 return _ragged_tensor_apply_loss(fn, y_true, y_pred)

1726

1727

1728@keras_export('keras.metrics.sparse_categorical_crossentropy',

1729 'keras.losses.sparse_categorical_crossentropy')

1730@dispatch.add_dispatch_support

1731def sparse_categorical_crossentropy(y_true, y_pred, from_logits=False, axis=-1):

1732 """Computes the sparse categorical crossentropy loss.

1733

1734 Standalone usage:

1735

1736 >>> y_true = [1, 2]

1737 >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]

1738 >>> loss = tf.keras.losses.sparse_categorical_crossentropy(y_true, y_pred)

1739 >>> assert loss.shape == (2,)

1740 >>> loss.numpy()

1741 array([0.0513, 2.303], dtype=float32)

1742

1743 Args:

1744 y_true: Ground truth values.

1745 y_pred: The predicted values.

1746 from_logits: Whether `y_pred` is expected to be a logits tensor. By default,

1747 we assume that `y_pred` encodes a probability distribution.

1748 axis: Defaults to -1. The dimension along which the entropy is

1749 computed.

1750

1751 Returns:

1752 Sparse categorical crossentropy loss value.

1753 """

1754 y_pred = tensor_conversion.convert_to_tensor_v2_with_dispatch(y_pred)

1755 y_true = math_ops.cast(y_true, y_pred.dtype)

1756 return backend.sparse_categorical_crossentropy(

1757 y_true, y_pred, from_logits=from_logits, axis=axis)

1758

1759

1760@dispatch.dispatch_for_types(sparse_categorical_crossentropy,

1761 ragged_tensor.RaggedTensor)

1762def _ragged_tensor_sparse_categorical_crossentropy(y_true,

1763 y_pred,

1764 from_logits=False,

1765 axis=-1):

1766 """ Implements support for handling RaggedTensors.

1767

1768 Expected y_pred shape: (batch, sequence_len, n_classes) with sequence_len

1769 being variable per batch.

1770 Return shape: (batch, sequence_len).

1771

1772 When used by SparseCategoricalCrossentropy() with the default reduction

1773 (SUM_OVER_BATCH_SIZE), the reduction averages the loss over the

1774 number of elements independent of the batch. E.g. if the RaggedTensor

1775 has 2 batches with [2, 1] values respectively, the resulting loss is

1776 the sum of the individual loss values divided by 3.

1777 """

1778 fn = functools.partial(

1779 sparse_categorical_crossentropy, from_logits=from_logits, axis=axis)

1780 return _ragged_tensor_apply_loss(fn, y_true, y_pred, y_pred_extra_dim=True)

1781

1782

1783@keras_export('keras.metrics.binary_crossentropy',

1784 'keras.losses.binary_crossentropy')

1785@dispatch.add_dispatch_support

1786def binary_crossentropy(y_true,

1787 y_pred,

1788 from_logits=False,

1789 label_smoothing=0,

1790 axis=-1):

1791 """Computes the binary crossentropy loss.

1792

1793 Standalone usage:

1794

1795 >>> y_true = [[0, 1], [0, 0]]

1796 >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]

1797 >>> loss = tf.keras.losses.binary_crossentropy(y_true, y_pred)

1798 >>> assert loss.shape == (2,)

1799 >>> loss.numpy()

1800 array([0.916 , 0.714], dtype=float32)

1801

1802 Args:

1803 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.

1804 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

1805 from_logits: Whether `y_pred` is expected to be a logits tensor. By default,

1806 we assume that `y_pred` encodes a probability distribution.

1807 label_smoothing: Float in [0, 1]. If > `0` then smooth the labels by

1808 squeezing them towards 0.5 That is, using `1. - 0.5 * label_smoothing`

1809 for the target class and `0.5 * label_smoothing` for the non-target class.

1810 axis: The axis along which the mean is computed. Defaults to -1.

1811

1812 Returns:

1813 Binary crossentropy loss value. shape = `[batch_size, d0, .. dN-1]`.

1814 """

1815 y_pred = tensor_conversion.convert_to_tensor_v2_with_dispatch(y_pred)

1816 y_true = math_ops.cast(y_true, y_pred.dtype)

1817 label_smoothing = tensor_conversion.convert_to_tensor_v2_with_dispatch(

1818 label_smoothing, dtype=backend.floatx()

1819 )

1820

1821 def _smooth_labels():

1822 return y_true * (1.0 - label_smoothing) + 0.5 * label_smoothing

1823

1824 y_true = smart_cond.smart_cond(label_smoothing, _smooth_labels,

1825 lambda: y_true)

1826

1827 return backend.mean(

1828 backend.binary_crossentropy(y_true, y_pred, from_logits=from_logits),

1829 axis=axis)

1830

1831

1832@dispatch.dispatch_for_types(binary_crossentropy, ragged_tensor.RaggedTensor)

1833def _ragged_tensor_binary_crossentropy(y_true,

1834 y_pred,

1835 from_logits=False,

1836 label_smoothing=0,

1837 axis=-1):

1838 """Implements support for handling RaggedTensors.

1839

1840 Args:

1841 y_true: Tensor of one-hot true targets.

1842 y_pred: Tensor of predicted targets.

1843 from_logits: Whether `y_pred` is expected to be a logits tensor. By default,

1844 we assume that `y_pred` encodes a probability distribution.

1845 label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For

1846 example, if `0.1`, use `0.1 / num_classes` for non-target labels

1847 and `0.9 + 0.1 / num_classes` for target labels.

1848 axis: Axis along which to compute crossentropy.

1849

1850 Returns:

1851 Binary crossentropy loss value.

1852

1853 Expected shape: (batch, sequence_len) with sequence_len being variable

1854 per batch.

1855 Return shape: (batch,); returns the per batch mean of the loss values.

1856

1857 When used by BinaryCrossentropy() with the default reduction

1858 (SUM_OVER_BATCH_SIZE), the reduction averages the per batch losses over

1859 the number of batches.

1860 """

1861 fn = functools.partial(

1862 binary_crossentropy,

1863 from_logits=from_logits,

1864 label_smoothing=label_smoothing,

1865 axis=axis)

1866 return _ragged_tensor_apply_loss(fn, y_true, y_pred)

1867

1868

1869@keras_export('keras.metrics.kl_divergence',

1870 'keras.metrics.kullback_leibler_divergence', 'keras.metrics.kld',

1871 'keras.metrics.KLD', 'keras.losses.kl_divergence',

1872 'keras.losses.kullback_leibler_divergence', 'keras.losses.kld',

1873 'keras.losses.KLD')

1874@dispatch.add_dispatch_support

1875def kl_divergence(y_true, y_pred):

1876 """Computes Kullback-Leibler divergence loss between `y_true` and `y_pred`.

1877

1878 `loss = y_true * log(y_true / y_pred)`

1879

1880 See: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence

1881

1882 Standalone usage:

1883

1884 >>> y_true = np.random.randint(0, 2, size=(2, 3)).astype(np.float64)

1885 >>> y_pred = np.random.random(size=(2, 3))

1886 >>> loss = tf.keras.losses.kullback_leibler_divergence(y_true, y_pred)

1887 >>> assert loss.shape == (2,)

1888 >>> y_true = tf.keras.backend.clip(y_true, 1e-7, 1)

1889 >>> y_pred = tf.keras.backend.clip(y_pred, 1e-7, 1)

1890 >>> assert np.array_equal(

1891 ... loss.numpy(), np.sum(y_true * np.log(y_true / y_pred), axis=-1))

1892

1893 Args:

1894 y_true: Tensor of true targets.

1895 y_pred: Tensor of predicted targets.

1896

1897 Returns:

1898 A `Tensor` with loss.

1899

1900 Raises:

1901 TypeError: If `y_true` cannot be cast to the `y_pred.dtype`.

1902 """

1903 y_pred = tensor_conversion.convert_to_tensor_v2_with_dispatch(y_pred)

1904 y_true = math_ops.cast(y_true, y_pred.dtype)

1905 y_true = backend.clip(y_true, backend.epsilon(), 1)

1906 y_pred = backend.clip(y_pred, backend.epsilon(), 1)

1907 return math_ops.reduce_sum(y_true * math_ops.log(y_true / y_pred), axis=-1)

1908

1909

1910@keras_export('keras.metrics.poisson', 'keras.losses.poisson')

1911@dispatch.add_dispatch_support

1912def poisson(y_true, y_pred):

1913 """Computes the Poisson loss between y_true and y_pred.

1914

1915 The Poisson loss is the mean of the elements of the `Tensor`

1916 `y_pred - y_true * log(y_pred)`.

1917

1918 Standalone usage:

1919

1920 >>> y_true = np.random.randint(0, 2, size=(2, 3))

1921 >>> y_pred = np.random.random(size=(2, 3))

1922 >>> loss = tf.keras.losses.poisson(y_true, y_pred)

1923 >>> assert loss.shape == (2,)

1924 >>> y_pred = y_pred + 1e-7

1925 >>> assert np.allclose(

1926 ... loss.numpy(), np.mean(y_pred - y_true * np.log(y_pred), axis=-1),

1927 ... atol=1e-5)

1928

1929 Args:

1930 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.

1931 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

1932

1933 Returns:

1934 Poisson loss value. shape = `[batch_size, d0, .. dN-1]`.

1935

1936 Raises:

1937 InvalidArgumentError: If `y_true` and `y_pred` have incompatible shapes.

1938 """

1939 y_pred = tensor_conversion.convert_to_tensor_v2_with_dispatch(y_pred)

1940 y_true = math_ops.cast(y_true, y_pred.dtype)

1941 return backend.mean(

1942 y_pred - y_true * math_ops.log(y_pred + backend.epsilon()), axis=-1)

1943

1944

1945@keras_export(

1946 'keras.losses.cosine_similarity',

1947 v1=[

1948 'keras.metrics.cosine_proximity',

1949 'keras.metrics.cosine',

1950 'keras.losses.cosine_proximity',

1951 'keras.losses.cosine',

1952 'keras.losses.cosine_similarity',

1953 ])

1954@dispatch.add_dispatch_support

1955def cosine_similarity(y_true, y_pred, axis=-1):

1956 """Computes the cosine similarity between labels and predictions.

1957

1958 Note that it is a number between -1 and 1. When it is a negative number

1959 between -1 and 0, 0 indicates orthogonality and values closer to -1

1960 indicate greater similarity. The values closer to 1 indicate greater

1961 dissimilarity. This makes it usable as a loss function in a setting

1962 where you try to maximize the proximity between predictions and

1963 targets. If either `y_true` or `y_pred` is a zero vector, cosine

1964 similarity will be 0 regardless of the proximity between predictions

1965 and targets.

1966

1967 `loss = -sum(l2_norm(y_true) * l2_norm(y_pred))`

1968

1969 Standalone usage:

1970

1971 >>> y_true = [[0., 1.], [1., 1.], [1., 1.]]

1972 >>> y_pred = [[1., 0.], [1., 1.], [-1., -1.]]

1973 >>> loss = tf.keras.losses.cosine_similarity(y_true, y_pred, axis=1)

1974 >>> loss.numpy()

1975 array([-0., -0.999, 0.999], dtype=float32)

1976

1977 Args:

1978 y_true: Tensor of true targets.

1979 y_pred: Tensor of predicted targets.

1980 axis: Axis along which to determine similarity.

1981

1982 Returns:

1983 Cosine similarity tensor.

1984 """

1985 y_true = nn.l2_normalize(y_true, axis=axis)

1986 y_pred = nn.l2_normalize(y_pred, axis=axis)

1987 return -math_ops.reduce_sum(y_true * y_pred, axis=axis)

1988

1989

1990@keras_export('keras.losses.CosineSimilarity')

1991class CosineSimilarity(LossFunctionWrapper):

1992 """Computes the cosine similarity between labels and predictions.

1993

1994 Note that it is a number between -1 and 1. When it is a negative number

1995 between -1 and 0, 0 indicates orthogonality and values closer to -1

1996 indicate greater similarity. The values closer to 1 indicate greater

1997 dissimilarity. This makes it usable as a loss function in a setting

1998 where you try to maximize the proximity between predictions and targets.

1999 If either `y_true` or `y_pred` is a zero vector, cosine similarity will be 0

2000 regardless of the proximity between predictions and targets.

2001

2002 `loss = -sum(l2_norm(y_true) * l2_norm(y_pred))`

2003

2004 Standalone usage:

2005

2006 >>> y_true = [[0., 1.], [1., 1.]]

2007 >>> y_pred = [[1., 0.], [1., 1.]]

2008 >>> # Using 'auto'/'sum_over_batch_size' reduction type.

2009 >>> cosine_loss = tf.keras.losses.CosineSimilarity(axis=1)

2010 >>> # l2_norm(y_true) = [[0., 1.], [1./1.414, 1./1.414]]

2011 >>> # l2_norm(y_pred) = [[1., 0.], [1./1.414, 1./1.414]]

2012 >>> # l2_norm(y_true) . l2_norm(y_pred) = [[0., 0.], [0.5, 0.5]]

2013 >>> # loss = mean(sum(l2_norm(y_true) . l2_norm(y_pred), axis=1))

2014 >>> # = -((0. + 0.) + (0.5 + 0.5)) / 2

2015 >>> cosine_loss(y_true, y_pred).numpy()

2016 -0.5

2017

2018 >>> # Calling with 'sample_weight'.

2019 >>> cosine_loss(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()

2020 -0.0999

2021

2022 >>> # Using 'sum' reduction type.

2023 >>> cosine_loss = tf.keras.losses.CosineSimilarity(axis=1,

2024 ... reduction=tf.keras.losses.Reduction.SUM)

2025 >>> cosine_loss(y_true, y_pred).numpy()

2026 -0.999

2027

2028 >>> # Using 'none' reduction type.

2029 >>> cosine_loss = tf.keras.losses.CosineSimilarity(axis=1,

2030 ... reduction=tf.keras.losses.Reduction.NONE)

2031 >>> cosine_loss(y_true, y_pred).numpy()

2032 array([-0., -0.999], dtype=float32)

2033

2034 Usage with the `compile()` API:

2035

2036 ```python

2037 model.compile(optimizer='sgd', loss=tf.keras.losses.CosineSimilarity(axis=1))

2038 ```

2039

2040 Args:

2041 axis: The axis along which the cosine similarity is computed

2042 (the features axis). Defaults to -1.

2043 reduction: Type of `tf.keras.losses.Reduction` to apply to loss.

2044 Default value is `AUTO`. `AUTO` indicates that the reduction option will

2045 be determined by the usage context. For almost all cases this defaults to

2046 `SUM_OVER_BATCH_SIZE`. When used with `tf.distribute.Strategy`, outside of

2047 built-in training loops such as `tf.keras` `compile` and `fit`, using

2048 `AUTO` or `SUM_OVER_BATCH_SIZE` will raise an error. Please see this

2049 custom training [tutorial]

2050 (https://www.tensorflow.org/tutorials/distribute/custom_training) for more

2051 details.

2052 name: Optional name for the instance.

2053 """

2054

2055 def __init__(self,

2056 axis=-1,

2057 reduction=losses_utils.ReductionV2.AUTO,

2058 name='cosine_similarity'):

2059 super().__init__(

2060 cosine_similarity, reduction=reduction, name=name, axis=axis)

2061

2062

2063# Aliases.

2064

2065bce = BCE = binary_crossentropy

2066mse = MSE = mean_squared_error

2067mae = MAE = mean_absolute_error

2068mape = MAPE = mean_absolute_percentage_error

2069msle = MSLE = mean_squared_logarithmic_error

2070kld = KLD = kullback_leibler_divergence = kl_divergence

2071logcosh = log_cosh

2072huber_loss = huber

2073

2074

2075def is_categorical_crossentropy(loss):

2076 result = ((isinstance(loss, CategoricalCrossentropy) or

2077 (isinstance(loss, LossFunctionWrapper) and

2078 loss.fn == categorical_crossentropy) or

2079 (hasattr(loss, '__name__') and

2080 loss.__name__ == 'categorical_crossentropy') or

2081 (loss == 'categorical_crossentropy')))

2082 return result

2083

2084

2085@keras_export('keras.losses.serialize')

2086def serialize(loss):

2087 """Serializes loss function or `Loss` instance.

2088

2089 Args:

2090 loss: A Keras `Loss` instance or a loss function.

2091

2092 Returns:

2093 Loss configuration dictionary.

2094 """

2095 return serialize_keras_object(loss)

2096

2097

2098@keras_export('keras.losses.deserialize')

2099def deserialize(name, custom_objects=None):

2100 """Deserializes a serialized loss class/function instance.

2101

2102 Args:

2103 name: Loss configuration.

2104 custom_objects: Optional dictionary mapping names (strings) to custom

2105 objects (classes and functions) to be considered during deserialization.

2106

2107 Returns:

2108 A Keras `Loss` instance or a loss function.

2109 """

2110 return deserialize_keras_object(

2111 name,

2112 module_objects=globals(),

2113 custom_objects=custom_objects,

2114 printable_module_name='loss function')

2115

2116

2117@keras_export('keras.losses.get')

2118def get(identifier):

2119 """Retrieves a Keras loss as a `function`/`Loss` class instance.

2120

2121 The `identifier` may be the string name of a loss function or `Loss` class.

2122

2123 >>> loss = tf.keras.losses.get("categorical_crossentropy")

2124 >>> type(loss)

2125 <class 'function'>

2126 >>> loss = tf.keras.losses.get("CategoricalCrossentropy")

2127 >>> type(loss)

2128 <class '...keras.losses.CategoricalCrossentropy'>

2129

2130 You can also specify `config` of the loss to this function by passing dict

2131 containing `class_name` and `config` as an identifier. Also note that the

2132 `class_name` must map to a `Loss` class

2133

2134 >>> identifier = {"class_name": "CategoricalCrossentropy",

2135 ... "config": {"from_logits": True}}

2136 >>> loss = tf.keras.losses.get(identifier)

2137 >>> type(loss)

2138 <class '...keras.losses.CategoricalCrossentropy'>

2139

2140 Args:

2141 identifier: A loss identifier. One of None or string name of a loss

2142 function/class or loss configuration dictionary or a loss function or a

2143 loss class instance.

2144

2145 Returns:

2146 A Keras loss as a `function`/ `Loss` class instance.

2147

2148 Raises:

2149 ValueError: If `identifier` cannot be interpreted.

2150 """

2151 if identifier is None:

2152 return None

2153 if isinstance(identifier, str):

2154 identifier = str(identifier)

2155 return deserialize(identifier)

2156 if isinstance(identifier, dict):

2157 return deserialize(identifier)

2158 if callable(identifier):

2159 return identifier

2160 raise ValueError(

2161 f'Could not interpret loss function identifier: {identifier}')

2162

2163

2164LABEL_DTYPES_FOR_LOSSES = {

2165 losses_impl.sparse_softmax_cross_entropy: 'int32',

2166 sparse_categorical_crossentropy: 'int32'

2167}