Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/keras/src/losses.py: 41%

3# Licensed under the Apache License, Version 2.0 (the "License");

4# you may not use this file except in compliance with the License.

5# You may obtain a copy of the License at

7# http://www.apache.org/licenses/LICENSE-2.0

9# Unless required by applicable law or agreed to in writing, software

10# distributed under the License is distributed on an "AS IS" BASIS,

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12# See the License for the specific language governing permissions and

13# limitations under the License.

14# ==============================================================================

16"""Built-in loss functions."""

19import abc

20import functools

21import warnings

23import tensorflow.compat.v2 as tf

25from keras.src import backend

26from keras.src.saving import saving_lib

27from keras.src.saving.legacy import serialization as legacy_serialization

28from keras.src.saving.serialization_lib import deserialize_keras_object

29from keras.src.saving.serialization_lib import serialize_keras_object

30from keras.src.utils import losses_utils

31from keras.src.utils import tf_utils

33# isort: off

34from tensorflow.python.ops.ragged import ragged_map_ops

35from tensorflow.python.ops.ragged import ragged_util

36from tensorflow.python.util import dispatch

37from tensorflow.python.util.tf_export import keras_export

38from tensorflow.tools.docs import doc_controls

41@keras_export("keras.losses.Loss")

42class Loss:

43 """Loss base class.

45 To be implemented by subclasses:

46 * `call()`: Contains the logic for loss calculation using `y_true`,

47 `y_pred`.

49 Example subclass implementation:

51 ```python

52 class MeanSquaredError(Loss):

54 def call(self, y_true, y_pred):

55 return tf.reduce_mean(tf.math.square(y_pred - y_true), axis=-1)

56 ```

58 When using a Loss under a `tf.distribute.Strategy`, except passing it

59 to `Model.compile()` for use by `Model.fit()`, please use reduction

60 types 'SUM' or 'NONE', and reduce losses explicitly. Using 'AUTO' or

61 'SUM_OVER_BATCH_SIZE' will raise an error when calling the Loss object

62 from a custom training loop or from user-defined code in `Layer.call()`.

63 Please see this custom training

64 [tutorial](https://www.tensorflow.org/tutorials/distribute/custom_training)

65 for more details on this.

66 """

68 def __init__(self, reduction=losses_utils.ReductionV2.AUTO, name=None):

69 """Initializes `Loss` class.

71 Args:

72 reduction: Type of `tf.keras.losses.Reduction` to apply to

73 loss. Default value is `AUTO`. `AUTO` indicates that the reduction

74 option will be determined by the usage context. For almost all cases

75 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a

76 `tf.distribute.Strategy`, except via `Model.compile()` and

77 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE`

78 will raise an error. Please see this custom training [tutorial](

79 https://www.tensorflow.org/tutorials/distribute/custom_training)

80 for more details.

81 name: Optional name for the instance.

82 """

83 losses_utils.ReductionV2.validate(reduction)

84 self.reduction = reduction

85 self.name = name

86 # SUM_OVER_BATCH is only allowed in losses managed by `fit` or

87 # CannedEstimators.

88 self._allow_sum_over_batch_size = False

89 self._set_name_scope()

91 def _set_name_scope(self):

92 """Creates a valid `name_scope` name."""

93 if self.name is None:

94 self._name_scope = self.__class__.__name__.strip("_")

95 elif self.name == "<lambda>":

96 self._name_scope = "lambda"

97 else:

98 # E.g. '_my_loss' => 'my_loss'

99 self._name_scope = self.name.strip("_")

100

101 def __call__(self, y_true, y_pred, sample_weight=None):

102 """Invokes the `Loss` instance.

103

104 Args:

105 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`, except

106 sparse loss functions such as sparse categorical crossentropy where

107 shape = `[batch_size, d0, .. dN-1]`

108 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`

109 sample_weight: Optional `sample_weight` acts as a coefficient for the

110 loss. If a scalar is provided, then the loss is simply scaled by the

111 given value. If `sample_weight` is a tensor of size `[batch_size]`,

112 then the total loss for each sample of the batch is rescaled by the

113 corresponding element in the `sample_weight` vector. If the shape of

114 `sample_weight` is `[batch_size, d0, .. dN-1]` (or can be

115 broadcasted to this shape), then each loss element of `y_pred` is

116 scaled by the corresponding value of `sample_weight`. (Note

117 on`dN-1`: all loss functions reduce by 1 dimension, usually

118 axis=-1.)

119

120 Returns:

121 Weighted loss float `Tensor`. If `reduction` is `NONE`, this has

122 shape `[batch_size, d0, .. dN-1]`; otherwise, it is scalar. (Note

123 `dN-1` because all loss functions reduce by 1 dimension, usually

124 axis=-1.)

125

126 Raises:

127 ValueError: If the shape of `sample_weight` is invalid.

128 """

129 # If we are wrapping a lambda function strip '<>' from the name as it is

130 # not accepted in scope name.

131 graph_ctx = tf_utils.graph_context_for_symbolic_tensors(

132 y_true, y_pred, sample_weight

133 )

134 with backend.name_scope(self._name_scope), graph_ctx:

135 if tf.executing_eagerly():

136 call_fn = self.call

137 else:

138 call_fn = tf.__internal__.autograph.tf_convert(

139 self.call, tf.__internal__.autograph.control_status_ctx()

140 )

141

142 losses = call_fn(y_true, y_pred)

143

144 in_mask = losses_utils.get_mask(y_pred)

145 out_mask = losses_utils.get_mask(losses)

146

147 if in_mask is not None and out_mask is not None:

148 mask = in_mask & out_mask

149 elif in_mask is not None:

150 mask = in_mask

151 elif out_mask is not None:

152 mask = out_mask

153 else:

154 mask = None

155

156 reduction = self._get_reduction()

157 sample_weight = losses_utils.apply_valid_mask(

158 losses, sample_weight, mask, reduction

159 )

160 return losses_utils.compute_weighted_loss(

161 losses, sample_weight, reduction=reduction

162 )

163

164 @classmethod

165 def from_config(cls, config):

166 """Instantiates a `Loss` from its config (output of `get_config()`).

167

168 Args:

169 config: Output of `get_config()`.

170

171 Returns:

172 A `Loss` instance.

173 """

174 return cls(**config)

175

176 def get_config(self):

177 """Returns the config dictionary for a `Loss` instance."""

178 return {"reduction": self.reduction, "name": self.name}

179

180 @abc.abstractmethod

181 @doc_controls.for_subclass_implementers

182 def call(self, y_true, y_pred):

183 """Invokes the `Loss` instance.

184

185 Args:

186 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`, except

187 sparse loss functions such as sparse categorical crossentropy where

188 shape = `[batch_size, d0, .. dN-1]`

189 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`

190

191 Returns:

192 Loss values with the shape `[batch_size, d0, .. dN-1]`.

193 """

194 raise NotImplementedError("Must be implemented in subclasses.")

195

196 def _get_reduction(self):

197 """Handles `AUTO` reduction cases and returns the reduction value."""

198 if (

199 not self._allow_sum_over_batch_size

200 and tf.distribute.has_strategy()

201 and (

202 self.reduction == losses_utils.ReductionV2.AUTO

203 or self.reduction

204 == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE

205 )

206 ):

207 raise ValueError(

208 "Please use `tf.keras.losses.Reduction.SUM` or "

209 "`tf.keras.losses.Reduction.NONE` for loss reduction when "

210 "losses are used with `tf.distribute.Strategy`, "

211 "except for specifying losses in `Model.compile()` "

212 "for use by the built-in training looop `Model.fit()`.\n"

213 "Please see https://www.tensorflow.org/tutorials"

214 "/distribute/custom_training for more details."

215 )

216

217 if self.reduction == losses_utils.ReductionV2.AUTO:

218 return losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE

219 return self.reduction

220

221

222@keras_export("keras.__internal__.losses.LossFunctionWrapper", v1=[])

223class LossFunctionWrapper(Loss):

224 """Wraps a loss function in the `Loss` class."""

225

226 def __init__(

227 self, fn, reduction=losses_utils.ReductionV2.AUTO, name=None, **kwargs

228 ):

229 """Initializes `LossFunctionWrapper` class.

230

231 Args:

232 fn: The loss function to wrap, with signature `fn(y_true, y_pred,

233 **kwargs)`.

234 reduction: Type of `tf.keras.losses.Reduction` to apply to

235 loss. Default value is `AUTO`. `AUTO` indicates that the reduction

236 option will be determined by the usage context. For almost all cases

237 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a

238 `tf.distribute.Strategy`, except via `Model.compile()` and

239 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE`

240 will raise an error. Please see this custom training [tutorial](

241 https://www.tensorflow.org/tutorials/distribute/custom_training)

242 for more details.

243 name: Optional name for the instance.

244 **kwargs: The keyword arguments that are passed on to `fn`.

245 """

246 super().__init__(reduction=reduction, name=name)

247 self.fn = fn

248 self._fn_kwargs = kwargs

249

250 def call(self, y_true, y_pred):

251 """Invokes the `LossFunctionWrapper` instance.

252

253 Args:

254 y_true: Ground truth values.

255 y_pred: The predicted values.

256

257 Returns:

258 Loss values per sample.

259 """

260 if tf.is_tensor(y_pred) and tf.is_tensor(y_true):

261 y_pred, y_true = losses_utils.squeeze_or_expand_dimensions(

262 y_pred, y_true

263 )

264

265 ag_fn = tf.__internal__.autograph.tf_convert(

266 self.fn, tf.__internal__.autograph.control_status_ctx()

267 )

268 return ag_fn(y_true, y_pred, **self._fn_kwargs)

269

270 def get_config(self):

271 config = {}

272 for k, v in self._fn_kwargs.items():

273 config[k] = (

274 backend.eval(v) if tf_utils.is_tensor_or_variable(v) else v

275 )

276

277 if saving_lib.saving_v3_enabled():

278 from keras.src.utils import get_registered_name

279

280 config["fn"] = get_registered_name(self.fn)

281

282 base_config = super().get_config()

283 return dict(list(base_config.items()) + list(config.items()))

284

285 @classmethod

286 def from_config(cls, config):

287 """Instantiates a `Loss` from its config (output of `get_config()`).

288

289 Args:

290 config: Output of `get_config()`.

291

292 Returns:

293 A `keras.losses.Loss` instance.

294 """

295 if saving_lib.saving_v3_enabled():

296 fn_name = config.pop("fn", None)

297 if fn_name and cls is LossFunctionWrapper:

298 config["fn"] = get(fn_name)

299 return cls(**config)

300

301

302@keras_export("keras.losses.MeanSquaredError")

303class MeanSquaredError(LossFunctionWrapper):

304 """Computes the mean of squares of errors between labels and predictions.

305

306 `loss = mean(square(y_true - y_pred))`

307

308 Standalone usage:

309

310 >>> y_true = [[0., 1.], [0., 0.]]

311 >>> y_pred = [[1., 1.], [1., 0.]]

312 >>> # Using 'auto'/'sum_over_batch_size' reduction type.

313 >>> mse = tf.keras.losses.MeanSquaredError()

314 >>> mse(y_true, y_pred).numpy()

315 0.5

316

317 >>> # Calling with 'sample_weight'.

318 >>> mse(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy()

319 0.25

320

321 >>> # Using 'sum' reduction type.

322 >>> mse = tf.keras.losses.MeanSquaredError(

323 ... reduction=tf.keras.losses.Reduction.SUM)

324 >>> mse(y_true, y_pred).numpy()

325 1.0

326

327 >>> # Using 'none' reduction type.

328 >>> mse = tf.keras.losses.MeanSquaredError(

329 ... reduction=tf.keras.losses.Reduction.NONE)

330 >>> mse(y_true, y_pred).numpy()

331 array([0.5, 0.5], dtype=float32)

332

333 Usage with the `compile()` API:

334

335 ```python

336 model.compile(optimizer='sgd', loss=tf.keras.losses.MeanSquaredError())

337 ```

338 """

339

340 def __init__(

341 self, reduction=losses_utils.ReductionV2.AUTO, name="mean_squared_error"

342 ):

343 """Initializes `MeanSquaredError` instance.

344

345 Args:

346 reduction: Type of `tf.keras.losses.Reduction` to apply to

347 loss. Default value is `AUTO`. `AUTO` indicates that the reduction

348 option will be determined by the usage context. For almost all cases

349 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a

350 `tf.distribute.Strategy`, except via `Model.compile()` and

351 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE`

352 will raise an error. Please see this custom training [tutorial](

353 https://www.tensorflow.org/tutorials/distribute/custom_training)

354 for more details.

355 name: Optional name for the instance. Defaults to

356 'mean_squared_error'.

357 """

358 super().__init__(mean_squared_error, name=name, reduction=reduction)

359

360

361@keras_export("keras.losses.MeanAbsoluteError")

362class MeanAbsoluteError(LossFunctionWrapper):

363 """Computes the mean of absolute difference between labels and predictions.

364

365 `loss = mean(abs(y_true - y_pred))`

366

367 Standalone usage:

368

369 >>> y_true = [[0., 1.], [0., 0.]]

370 >>> y_pred = [[1., 1.], [1., 0.]]

371 >>> # Using 'auto'/'sum_over_batch_size' reduction type.

372 >>> mae = tf.keras.losses.MeanAbsoluteError()

373 >>> mae(y_true, y_pred).numpy()

374 0.5

375

376 >>> # Calling with 'sample_weight'.

377 >>> mae(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy()

378 0.25

379

380 >>> # Using 'sum' reduction type.

381 >>> mae = tf.keras.losses.MeanAbsoluteError(

382 ... reduction=tf.keras.losses.Reduction.SUM)

383 >>> mae(y_true, y_pred).numpy()

384 1.0

385

386 >>> # Using 'none' reduction type.

387 >>> mae = tf.keras.losses.MeanAbsoluteError(

388 ... reduction=tf.keras.losses.Reduction.NONE)

389 >>> mae(y_true, y_pred).numpy()

390 array([0.5, 0.5], dtype=float32)

391

392 Usage with the `compile()` API:

393

394 ```python

395 model.compile(optimizer='sgd', loss=tf.keras.losses.MeanAbsoluteError())

396 ```

397 """

398

399 def __init__(

400 self,

401 reduction=losses_utils.ReductionV2.AUTO,

402 name="mean_absolute_error",

403 ):

404 """Initializes `MeanAbsoluteError` instance.

405

406 Args:

407 reduction: Type of `tf.keras.losses.Reduction` to apply to

408 loss. Default value is `AUTO`. `AUTO` indicates that the reduction

409 option will be determined by the usage context. For almost all cases

410 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a

411 `tf.distribute.Strategy`, except via `Model.compile()` and

412 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE`

413 will raise an error. Please see this custom training [tutorial](

414 https://www.tensorflow.org/tutorials/distribute/custom_training)

415 for more details.

416 name: Optional name for the instance. Defaults to

417 'mean_absolute_error'.

418 """

419 super().__init__(mean_absolute_error, name=name, reduction=reduction)

420

421

422@keras_export("keras.losses.MeanAbsolutePercentageError")

423class MeanAbsolutePercentageError(LossFunctionWrapper):

424 """Computes the mean absolute percentage error between `y_true` & `y_pred`.

425

426 Formula:

427

428 `loss = 100 * abs((y_true - y_pred) / y_true)`

429

430 Note that to avoid dividing by zero, a small epsilon value

431 is added to the denominator.

432

433 Standalone usage:

434

435 >>> y_true = [[2., 1.], [2., 3.]]

436 >>> y_pred = [[1., 1.], [1., 0.]]

437 >>> # Using 'auto'/'sum_over_batch_size' reduction type.

438 >>> mape = tf.keras.losses.MeanAbsolutePercentageError()

439 >>> mape(y_true, y_pred).numpy()

440 50.

441

442 >>> # Calling with 'sample_weight'.

443 >>> mape(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy()

444 20.

445

446 >>> # Using 'sum' reduction type.

447 >>> mape = tf.keras.losses.MeanAbsolutePercentageError(

448 ... reduction=tf.keras.losses.Reduction.SUM)

449 >>> mape(y_true, y_pred).numpy()

450 100.

451

452 >>> # Using 'none' reduction type.

453 >>> mape = tf.keras.losses.MeanAbsolutePercentageError(

454 ... reduction=tf.keras.losses.Reduction.NONE)

455 >>> mape(y_true, y_pred).numpy()

456 array([25., 75.], dtype=float32)

457

458 Usage with the `compile()` API:

459

460 ```python

461 model.compile(optimizer='sgd',

462 loss=tf.keras.losses.MeanAbsolutePercentageError())

463 ```

464 """

465

466 def __init__(

467 self,

468 reduction=losses_utils.ReductionV2.AUTO,

469 name="mean_absolute_percentage_error",

470 ):

471 """Initializes `MeanAbsolutePercentageError` instance.

472

473 Args:

474 reduction: Type of `tf.keras.losses.Reduction` to apply to

475 loss. Default value is `AUTO`. `AUTO` indicates that the reduction

476 option will be determined by the usage context. For almost all cases

477 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a

478 `tf.distribute.Strategy`, except via `Model.compile()` and

479 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE`

480 will raise an error. Please see this custom training [tutorial](

481 https://www.tensorflow.org/tutorials/distribute/custom_training)

482 for more details.

483 name: Optional name for the instance. Defaults to

484 'mean_absolute_percentage_error'.

485 """

486 super().__init__(

487 mean_absolute_percentage_error, name=name, reduction=reduction

488 )

489

490

491@keras_export("keras.losses.MeanSquaredLogarithmicError")

492class MeanSquaredLogarithmicError(LossFunctionWrapper):

493 """Computes the mean squared logarithmic error between `y_true` & `y_pred`.

494

495 `loss = square(log(y_true + 1.) - log(y_pred + 1.))`

496

497 Standalone usage:

498

499 >>> y_true = [[0., 1.], [0., 0.]]

500 >>> y_pred = [[1., 1.], [1., 0.]]

501 >>> # Using 'auto'/'sum_over_batch_size' reduction type.

502 >>> msle = tf.keras.losses.MeanSquaredLogarithmicError()

503 >>> msle(y_true, y_pred).numpy()

504 0.240

505

506 >>> # Calling with 'sample_weight'.

507 >>> msle(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy()

508 0.120

509

510 >>> # Using 'sum' reduction type.

511 >>> msle = tf.keras.losses.MeanSquaredLogarithmicError(

512 ... reduction=tf.keras.losses.Reduction.SUM)

513 >>> msle(y_true, y_pred).numpy()

514 0.480

515

516 >>> # Using 'none' reduction type.

517 >>> msle = tf.keras.losses.MeanSquaredLogarithmicError(

518 ... reduction=tf.keras.losses.Reduction.NONE)

519 >>> msle(y_true, y_pred).numpy()

520 array([0.240, 0.240], dtype=float32)

521

522 Usage with the `compile()` API:

523

524 ```python

525 model.compile(optimizer='sgd',

526 loss=tf.keras.losses.MeanSquaredLogarithmicError())

527 ```

528 """

529

530 def __init__(

531 self,

532 reduction=losses_utils.ReductionV2.AUTO,

533 name="mean_squared_logarithmic_error",

534 ):

535 """Initializes `MeanSquaredLogarithmicError` instance.

536

537 Args:

538 reduction: Type of `tf.keras.losses.Reduction` to apply to

539 loss. Default value is `AUTO`. `AUTO` indicates that the reduction

540 option will be determined by the usage context. For almost all cases

541 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a

542 `tf.distribute.Strategy`, except via `Model.compile()` and

543 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE`

544 will raise an error. Please see this custom training [tutorial](

545 https://www.tensorflow.org/tutorials/distribute/custom_training)

546 for more details.

547 name: Optional name for the instance. Defaults to

548 'mean_squared_logarithmic_error'.

549 """

550 super().__init__(

551 mean_squared_logarithmic_error, name=name, reduction=reduction

552 )

553

554

555@keras_export("keras.losses.BinaryCrossentropy")

556class BinaryCrossentropy(LossFunctionWrapper):

557 """Computes the cross-entropy loss between true labels and predicted labels.

558

559 Use this cross-entropy loss for binary (0 or 1) classification applications.

560 The loss function requires the following inputs:

561

562 - `y_true` (true label): This is either 0 or 1.

563 - `y_pred` (predicted value): This is the model's prediction, i.e, a single

564 floating-point value which either represents a

565 [logit](https://en.wikipedia.org/wiki/Logit), (i.e, value in [-inf, inf]

566 when `from_logits=True`) or a probability (i.e, value in [0., 1.] when

567 `from_logits=False`).

568

569 **Recommended Usage:** (set `from_logits=True`)

570

571 With `tf.keras` API:

572

573 ```python

574 model.compile(

575 loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),

576 ....

577 )

578 ```

579

580 As a standalone function:

581

582 >>> # Example 1: (batch_size = 1, number of samples = 4)

583 >>> y_true = [0, 1, 0, 0]

584 >>> y_pred = [-18.6, 0.51, 2.94, -12.8]

585 >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True)

586 >>> bce(y_true, y_pred).numpy()

587 0.865

588

589 >>> # Example 2: (batch_size = 2, number of samples = 4)

590 >>> y_true = [[0, 1], [0, 0]]

591 >>> y_pred = [[-18.6, 0.51], [2.94, -12.8]]

592 >>> # Using default 'auto'/'sum_over_batch_size' reduction type.

593 >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True)

594 >>> bce(y_true, y_pred).numpy()

595 0.865

596 >>> # Using 'sample_weight' attribute

597 >>> bce(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()

598 0.243

599 >>> # Using 'sum' reduction` type.

600 >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True,

601 ... reduction=tf.keras.losses.Reduction.SUM)

602 >>> bce(y_true, y_pred).numpy()

603 1.730

604 >>> # Using 'none' reduction type.

605 >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True,

606 ... reduction=tf.keras.losses.Reduction.NONE)

607 >>> bce(y_true, y_pred).numpy()

608 array([0.235, 1.496], dtype=float32)

609

610 **Default Usage:** (set `from_logits=False`)

611

612 >>> # Make the following updates to the above "Recommended Usage" section

613 >>> # 1. Set `from_logits=False`

614 >>> tf.keras.losses.BinaryCrossentropy() # OR ...('from_logits=False')

615 >>> # 2. Update `y_pred` to use probabilities instead of logits

616 >>> y_pred = [0.6, 0.3, 0.2, 0.8] # OR [[0.6, 0.3], [0.2, 0.8]]

617 """

618

619 def __init__(

620 self,

621 from_logits=False,

622 label_smoothing=0.0,

623 axis=-1,

624 reduction=losses_utils.ReductionV2.AUTO,

625 name="binary_crossentropy",

626 ):

627 """Initializes `BinaryCrossentropy` instance.

628

629 Args:

630 from_logits: Whether to interpret `y_pred` as a tensor of

631 [logit](https://en.wikipedia.org/wiki/Logit) values. By default, we

632 assume that `y_pred` contains probabilities (i.e., values in [0,

633 1]).

634 label_smoothing: Float in [0, 1]. When 0, no smoothing occurs. When >

635 0, we compute the loss between the predicted labels and a smoothed

636 version of the true labels, where the smoothing squeezes the labels

637 towards 0.5. Larger values of `label_smoothing` correspond to

638 heavier smoothing.

639 axis: The axis along which to compute crossentropy (the features

640 axis). Defaults to -1.

641 reduction: Type of `tf.keras.losses.Reduction` to apply to

642 loss. Default value is `AUTO`. `AUTO` indicates that the reduction

643 option will be determined by the usage context. For almost all cases

644 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a

645 `tf.distribute.Strategy`, except via `Model.compile()` and

646 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE`

647 will raise an error. Please see this custom training [tutorial](

648 https://www.tensorflow.org/tutorials/distribute/custom_training)

649 for more details.

650 name: Name for the op. Defaults to 'binary_crossentropy'.

651 """

652 super().__init__(

653 binary_crossentropy,

654 name=name,

655 reduction=reduction,

656 from_logits=from_logits,

657 label_smoothing=label_smoothing,

658 axis=axis,

659 )

660 self.from_logits = from_logits

661

662

663@keras_export("keras.losses.BinaryFocalCrossentropy")

664class BinaryFocalCrossentropy(LossFunctionWrapper):

665 """Computes focal cross-entropy loss between true labels and predictions.

666

667 Binary cross-entropy loss is often used for binary (0 or 1) classification

668 tasks. The loss function requires the following inputs:

669

670 - `y_true` (true label): This is either 0 or 1.

671 - `y_pred` (predicted value): This is the model's prediction, i.e, a single

672 floating-point value which either represents a

673 [logit](https://en.wikipedia.org/wiki/Logit), (i.e, value in [-inf, inf]

674 when `from_logits=True`) or a probability (i.e, value in `[0., 1.]` when

675 `from_logits=False`).

676

677 According to [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf), it

678 helps to apply a "focal factor" to down-weight easy examples and focus more

679 on hard examples. By default, the focal tensor is computed as follows:

680

681 `focal_factor = (1 - output) ** gamma` for class 1

682 `focal_factor = output ** gamma` for class 0

683 where `gamma` is a focusing parameter. When `gamma=0`, this function is

684 equivalent to the binary crossentropy loss.

685

686 With the `compile()` API:

687

688 ```python

689 model.compile(

690 loss=tf.keras.losses.BinaryFocalCrossentropy(gamma=2.0, from_logits=True),

691 ....

692 )

693 ```

694

695 As a standalone function:

696

697 >>> # Example 1: (batch_size = 1, number of samples = 4)

698 >>> y_true = [0, 1, 0, 0]

699 >>> y_pred = [-18.6, 0.51, 2.94, -12.8]

700 >>> loss = tf.keras.losses.BinaryFocalCrossentropy(gamma=2,

701 ... from_logits=True)

702 >>> loss(y_true, y_pred).numpy()

703 0.691

704

705 >>> # Apply class weight

706 >>> loss = tf.keras.losses.BinaryFocalCrossentropy(

707 ... apply_class_balancing=True, gamma=2, from_logits=True)

708 >>> loss(y_true, y_pred).numpy()

709 0.51

710

711 >>> # Example 2: (batch_size = 2, number of samples = 4)

712 >>> y_true = [[0, 1], [0, 0]]

713 >>> y_pred = [[-18.6, 0.51], [2.94, -12.8]]

714 >>> # Using default 'auto'/'sum_over_batch_size' reduction type.

715 >>> loss = tf.keras.losses.BinaryFocalCrossentropy(gamma=3,

716 ... from_logits=True)

717 >>> loss(y_true, y_pred).numpy()

718 0.647

719

720 >>> # Apply class weight

721 >>> loss = tf.keras.losses.BinaryFocalCrossentropy(

722 ... apply_class_balancing=True, gamma=3, from_logits=True)

723 >>> loss(y_true, y_pred).numpy()

724 0.482

725

726 >>> # Using 'sample_weight' attribute with focal effect

727 >>> loss = tf.keras.losses.BinaryFocalCrossentropy(gamma=3,

728 ... from_logits=True)

729 >>> loss(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()

730 0.133

731

732 >>> # Apply class weight

733 >>> loss = tf.keras.losses.BinaryFocalCrossentropy(

734 ... apply_class_balancing=True, gamma=3, from_logits=True)

735 >>> loss(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()

736 0.097

737

738 >>> # Using 'sum' reduction` type.

739 >>> loss = tf.keras.losses.BinaryFocalCrossentropy(gamma=4,

740 ... from_logits=True,

741 ... reduction=tf.keras.losses.Reduction.SUM)

742 >>> loss(y_true, y_pred).numpy()

743 1.222

744

745 >>> # Apply class weight

746 >>> loss = tf.keras.losses.BinaryFocalCrossentropy(

747 ... apply_class_balancing=True, gamma=4, from_logits=True,

748 ... reduction=tf.keras.losses.Reduction.SUM)

749 >>> loss(y_true, y_pred).numpy()

750 0.914

751

752 >>> # Using 'none' reduction type.

753 >>> loss = tf.keras.losses.BinaryFocalCrossentropy(

754 ... gamma=5, from_logits=True,

755 ... reduction=tf.keras.losses.Reduction.NONE)

756 >>> loss(y_true, y_pred).numpy()

757 array([0.0017 1.1561], dtype=float32)

758

759 >>> # Apply class weight

760 >>> loss = tf.keras.losses.BinaryFocalCrossentropy(

761 ... apply_class_balancing=True, gamma=5, from_logits=True,

762 ... reduction=tf.keras.losses.Reduction.NONE)

763 >>> loss(y_true, y_pred).numpy()

764 array([0.0004 0.8670], dtype=float32)

765

766

767 Args:

768 apply_class_balancing: A bool, whether to apply weight balancing on the

769 binary classes 0 and 1.

770 alpha: A weight balancing factor for class 1, default is `0.25` as

771 mentioned in reference [Lin et al., 2018](

772 https://arxiv.org/pdf/1708.02002.pdf). The weight for class 0 is

773 `1.0 - alpha`.

774 gamma: A focusing parameter used to compute the focal factor, default is

775 `2.0` as mentioned in the reference

776 [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf).

777 from_logits: Whether to interpret `y_pred` as a tensor of

778 [logit](https://en.wikipedia.org/wiki/Logit) values. By default, we

779 assume that `y_pred` are probabilities (i.e., values in `[0, 1]`).

780 label_smoothing: Float in `[0, 1]`. When `0`, no smoothing occurs. When >

781 `0`, we compute the loss between the predicted labels and a smoothed

782 version of the true labels, where the smoothing squeezes the labels

783 towards `0.5`. Larger values of `label_smoothing` correspond to heavier

784 smoothing.

785 axis: The axis along which to compute crossentropy (the features axis).

786 Defaults to `-1`.

787 reduction: Type of `tf.keras.losses.Reduction` to apply to

788 loss. Default value is `AUTO`. `AUTO` indicates that the reduction

789 option will be determined by the usage context. For almost all cases

790 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a

791 `tf.distribute.Strategy`, except via `Model.compile()` and

792 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE`

793 will raise an error. Please see this custom training [tutorial](

794 https://www.tensorflow.org/tutorials/distribute/custom_training)

795 for more details.

796 name: Name for the op. Defaults to 'binary_focal_crossentropy'.

797 """

798

799 def __init__(

800 self,

801 apply_class_balancing=False,

802 alpha=0.25,

803 gamma=2.0,

804 from_logits=False,

805 label_smoothing=0.0,

806 axis=-1,

807 reduction=losses_utils.ReductionV2.AUTO,

808 name="binary_focal_crossentropy",

809 ):

810 """Initializes `BinaryFocalCrossentropy` instance."""

811 super().__init__(

812 binary_focal_crossentropy,

813 apply_class_balancing=apply_class_balancing,

814 alpha=alpha,

815 gamma=gamma,

816 name=name,

817 reduction=reduction,

818 from_logits=from_logits,

819 label_smoothing=label_smoothing,

820 axis=axis,

821 )

822 self.from_logits = from_logits

823 self.apply_class_balancing = apply_class_balancing

824 self.alpha = alpha

825 self.gamma = gamma

826

827 def get_config(self):

828 config = {

829 "apply_class_balancing": self.apply_class_balancing,

830 "alpha": self.alpha,

831 "gamma": self.gamma,

832 }

833 base_config = super().get_config()

834 return dict(list(base_config.items()) + list(config.items()))

835

836

837@keras_export("keras.losses.CategoricalCrossentropy")

838class CategoricalCrossentropy(LossFunctionWrapper):

839 """Computes the crossentropy loss between the labels and predictions.

840

841 Use this crossentropy loss function when there are two or more label

842 classes. We expect labels to be provided in a `one_hot` representation. If

843 you want to provide labels as integers, please use

844 `SparseCategoricalCrossentropy` loss. There should be `# classes` floating

845 point values per feature.

846

847 In the snippet below, there is `# classes` floating pointing values per

848 example. The shape of both `y_pred` and `y_true` are

849 `[batch_size, num_classes]`.

850

851 Standalone usage:

852

853 >>> y_true = [[0, 1, 0], [0, 0, 1]]

854 >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]

855 >>> # Using 'auto'/'sum_over_batch_size' reduction type.

856 >>> cce = tf.keras.losses.CategoricalCrossentropy()

857 >>> cce(y_true, y_pred).numpy()

858 1.177

859

860 >>> # Calling with 'sample_weight'.

861 >>> cce(y_true, y_pred, sample_weight=tf.constant([0.3, 0.7])).numpy()

862 0.814

863

864 >>> # Using 'sum' reduction type.

865 >>> cce = tf.keras.losses.CategoricalCrossentropy(

866 ... reduction=tf.keras.losses.Reduction.SUM)

867 >>> cce(y_true, y_pred).numpy()

868 2.354

869

870 >>> # Using 'none' reduction type.

871 >>> cce = tf.keras.losses.CategoricalCrossentropy(

872 ... reduction=tf.keras.losses.Reduction.NONE)

873 >>> cce(y_true, y_pred).numpy()

874 array([0.0513, 2.303], dtype=float32)

875

876 Usage with the `compile()` API:

877

878 ```python

879 model.compile(optimizer='sgd',

880 loss=tf.keras.losses.CategoricalCrossentropy())

881 ```

882 """

883

884 def __init__(

885 self,

886 from_logits=False,

887 label_smoothing=0.0,

888 axis=-1,

889 reduction=losses_utils.ReductionV2.AUTO,

890 name="categorical_crossentropy",

891 ):

892 """Initializes `CategoricalCrossentropy` instance.

893

894 Args:

895 from_logits: Whether `y_pred` is expected to be a logits tensor. By

896 default, we assume that `y_pred` encodes a probability distribution.

897 label_smoothing: Float in [0, 1]. When > 0, label values are smoothed,

898 meaning the confidence on label values are relaxed. For example, if

899 `0.1`, use `0.1 / num_classes` for non-target labels and

900 `0.9 + 0.1 / num_classes` for target labels.

901 axis: The axis along which to compute crossentropy (the features

902 axis). Defaults to -1.

903 reduction: Type of `tf.keras.losses.Reduction` to apply to

904 loss. Default value is `AUTO`. `AUTO` indicates that the reduction

905 option will be determined by the usage context. For almost all cases

906 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a

907 `tf.distribute.Strategy`, except via `Model.compile()` and

908 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE`

909 will raise an error. Please see this custom training [tutorial](

910 https://www.tensorflow.org/tutorials/distribute/custom_training)

911 for more details.

912 name: Optional name for the instance.

913 Defaults to 'categorical_crossentropy'.

914 """

915 super().__init__(

916 categorical_crossentropy,

917 name=name,

918 reduction=reduction,

919 from_logits=from_logits,

920 label_smoothing=label_smoothing,

921 axis=axis,

922 )

923

924

925@keras_export("keras.losses.CategoricalFocalCrossentropy")

926class CategoricalFocalCrossentropy(LossFunctionWrapper):

927 """Computes the alpha balanced focal crossentropy loss.

928

929 Use this crossentropy loss function when there are two or more label

930 classes and if you want to handle class imbalance without using

931 `class_weights`. We expect labels to be provided in a `one_hot`

932 representation.

933

934 According to [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf), it

935 helps to apply a focal factor to down-weight easy examples and focus more on

936 hard examples. The general formula for the focal loss (FL)

937 is as follows:

938

939 `FL(p_t) = (1 − p_t)^gamma * log(p_t)`

940

941 where `p_t` is defined as follows:

942 `p_t = output if y_true == 1, else 1 - output`

943

944 `(1 − p_t)^gamma` is the `modulating_factor`, where `gamma` is a focusing

945 parameter. When `gamma` = 0, there is no focal effect on the cross entropy.

946 `gamma` reduces the importance given to simple examples in a smooth manner.

947

948 The authors use alpha-balanced variant of focal loss (FL) in the paper:

949 `FL(p_t) = −alpha * (1 − p_t)^gamma * log(p_t)`

950

951 where `alpha` is the weight factor for the classes. If `alpha` = 1, the

952 loss won't be able to handle class imbalance properly as all

953 classes will have the same weight. This can be a constant or a list of

954 constants. If alpha is a list, it must have the same length as the number

955 of classes.

956

957 The formula above can be generalized to:

958 `FL(p_t) = alpha * (1 − p_t)^gamma * CrossEntropy(y_true, y_pred)`

959

960 where minus comes from `CrossEntropy(y_true, y_pred)` (CE).

961

962 Extending this to multi-class case is straightforward:

963 `FL(p_t) = alpha * (1 − p_t)^gamma * CategoricalCE(y_true, y_pred)`

964

965 In the snippet below, there is `# classes` floating pointing values per

966 example. The shape of both `y_pred` and `y_true` are

967 `[batch_size, num_classes]`.

968

969 Standalone usage:

970

971 >>> y_true = [[0., 1., 0.], [0., 0., 1.]]

972 >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]

973 >>> # Using 'auto'/'sum_over_batch_size' reduction type.

974 >>> cce = tf.keras.losses.CategoricalFocalCrossentropy()

975 >>> cce(y_true, y_pred).numpy()

976 0.23315276

977

978 >>> # Calling with 'sample_weight'.

979 >>> cce(y_true, y_pred, sample_weight=tf.constant([0.3, 0.7])).numpy()

980 0.1632

981

982 >>> # Using 'sum' reduction type.

983 >>> cce = tf.keras.losses.CategoricalFocalCrossentropy(

984 ... reduction=tf.keras.losses.Reduction.SUM)

985 >>> cce(y_true, y_pred).numpy()

986 0.46631

987

988 >>> # Using 'none' reduction type.

989 >>> cce = tf.keras.losses.CategoricalFocalCrossentropy(

990 ... reduction=tf.keras.losses.Reduction.NONE)

991 >>> cce(y_true, y_pred).numpy()

992 array([3.2058331e-05, 4.6627346e-01], dtype=float32)

993

994 Usage with the `compile()` API:

995 ```python

996 model.compile(optimizer='adam',

997 loss=tf.keras.losses.CategoricalFocalCrossentropy())

998 ```

999 Args:

1000 alpha: A weight balancing factor for all classes, default is `0.25` as

1001 mentioned in the reference. It can be a list of floats or a scalar.

1002 In the multi-class case, alpha may be set by inverse class

1003 frequency by using `compute_class_weight` from `sklearn.utils`.

1004 gamma: A focusing parameter, default is `2.0` as mentioned in the

1005 reference. It helps to gradually reduce the importance given to

1006 simple (easy) examples in a smooth manner.

1007 from_logits: Whether `output` is expected to be a logits tensor. By

1008 default, we consider that `output` encodes a probability

1009 distribution.

1010 label_smoothing: Float in [0, 1]. When > 0, label values are smoothed,

1011 meaning the confidence on label values are relaxed. For example, if

1012 `0.1`, use `0.1 / num_classes` for non-target labels and

1013 `0.9 + 0.1 / num_classes` for target labels.

1014 axis: The axis along which to compute crossentropy (the features

1015 axis). Defaults to -1.

1016 reduction: Type of `tf.keras.losses.Reduction` to apply to

1017 loss. Default value is `AUTO`. `AUTO` indicates that the reduction

1018 option will be determined by the usage context. For almost all cases

1019 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a

1020 `tf.distribute.Strategy`, except via `Model.compile()` and

1021 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE`

1022 will raise an error. Please see this custom training [tutorial](

1023 https://www.tensorflow.org/tutorials/distribute/custom_training)

1024 for more details.

1025 name: Optional name for the instance.

1026 Defaults to 'categorical_focal_crossentropy'.

1027 """

1028

1029 def __init__(

1030 self,

1031 alpha=0.25,

1032 gamma=2.0,

1033 from_logits=False,

1034 label_smoothing=0.0,

1035 axis=-1,

1036 reduction=losses_utils.ReductionV2.AUTO,

1037 name="categorical_focal_crossentropy",

1038 ):

1039 """Initializes `CategoricalFocalCrossentropy` instance."""

1040 super().__init__(

1041 categorical_focal_crossentropy,

1042 alpha=alpha,

1043 gamma=gamma,

1044 name=name,

1045 reduction=reduction,

1046 from_logits=from_logits,

1047 label_smoothing=label_smoothing,

1048 axis=axis,

1049 )

1050 self.from_logits = from_logits

1051 self.alpha = alpha

1052 self.gamma = gamma

1053

1054 def get_config(self):

1055 config = {

1056 "alpha": self.alpha,

1057 "gamma": self.gamma,

1058 }

1059 base_config = super().get_config()

1060 return dict(list(base_config.items()) + list(config.items()))

1061

1062

1063@keras_export("keras.losses.SparseCategoricalCrossentropy")

1064class SparseCategoricalCrossentropy(LossFunctionWrapper):

1065 """Computes the crossentropy loss between the labels and predictions.

1066

1067 Use this crossentropy loss function when there are two or more label

1068 classes. We expect labels to be provided as integers. If you want to

1069 provide labels using `one-hot` representation, please use

1070 `CategoricalCrossentropy` loss. There should be `# classes` floating point

1071 values per feature for `y_pred` and a single floating point value per

1072 feature for `y_true`.

1073

1074 In the snippet below, there is a single floating point value per example for

1075 `y_true` and `# classes` floating pointing values per example for `y_pred`.

1076 The shape of `y_true` is `[batch_size]` and the shape of `y_pred` is

1077 `[batch_size, num_classes]`.

1078

1079 Standalone usage:

1080

1081 >>> y_true = [1, 2]

1082 >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]

1083 >>> # Using 'auto'/'sum_over_batch_size' reduction type.

1084 >>> scce = tf.keras.losses.SparseCategoricalCrossentropy()

1085 >>> scce(y_true, y_pred).numpy()

1086 1.177

1087

1088 >>> # Calling with 'sample_weight'.

1089 >>> scce(y_true, y_pred, sample_weight=tf.constant([0.3, 0.7])).numpy()

1090 0.814

1091

1092 >>> # Using 'sum' reduction type.

1093 >>> scce = tf.keras.losses.SparseCategoricalCrossentropy(

1094 ... reduction=tf.keras.losses.Reduction.SUM)

1095 >>> scce(y_true, y_pred).numpy()

1096 2.354

1097

1098 >>> # Using 'none' reduction type.

1099 >>> scce = tf.keras.losses.SparseCategoricalCrossentropy(

1100 ... reduction=tf.keras.losses.Reduction.NONE)

1101 >>> scce(y_true, y_pred).numpy()

1102 array([0.0513, 2.303], dtype=float32)

1103

1104 Usage with the `compile()` API:

1105

1106 ```python

1107 model.compile(optimizer='sgd',

1108 loss=tf.keras.losses.SparseCategoricalCrossentropy())

1109 ```

1110 """

1111

1112 def __init__(

1113 self,

1114 from_logits=False,

1115 ignore_class=None,

1116 reduction=losses_utils.ReductionV2.AUTO,

1117 name="sparse_categorical_crossentropy",

1118 ):

1119 """Initializes `SparseCategoricalCrossentropy` instance.

1120

1121 Args:

1122 from_logits: Whether `y_pred` is expected to be a logits tensor. By

1123 default, we assume that `y_pred` encodes a probability distribution.

1124 ignore_class: Optional integer. The ID of a class to be ignored during

1125 loss computation. This is useful, for example, in segmentation

1126 problems featuring a "void" class (commonly -1 or 255) in

1127 segmentation maps.

1128 By default (`ignore_class=None`), all classes are considered.

1129 reduction: Type of `tf.keras.losses.Reduction` to apply to

1130 loss. Default value is `AUTO`. `AUTO` indicates that the reduction

1131 option will be determined by the usage context. For almost all cases

1132 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a

1133 `tf.distribute.Strategy`, except via `Model.compile()` and

1134 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE`

1135 will raise an error. Please see this custom training [tutorial](

1136 https://www.tensorflow.org/tutorials/distribute/custom_training)

1137 for more details.

1138 name: Optional name for the instance. Defaults to

1139 'sparse_categorical_crossentropy'.

1140 """

1141 super().__init__(

1142 sparse_categorical_crossentropy,

1143 name=name,

1144 reduction=reduction,

1145 from_logits=from_logits,

1146 ignore_class=ignore_class,

1147 )

1148

1149

1150@keras_export("keras.losses.Hinge")

1151class Hinge(LossFunctionWrapper):

1152 """Computes the hinge loss between `y_true` & `y_pred`.

1153

1154 `loss = maximum(1 - y_true * y_pred, 0)`

1155

1156 `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are

1157 provided we will convert them to -1 or 1.

1158

1159 Standalone usage:

1160

1161 >>> y_true = [[0., 1.], [0., 0.]]

1162 >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]

1163 >>> # Using 'auto'/'sum_over_batch_size' reduction type.

1164 >>> h = tf.keras.losses.Hinge()

1165 >>> h(y_true, y_pred).numpy()

1166 1.3

1167

1168 >>> # Calling with 'sample_weight'.

1169 >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy()

1170 0.55

1171

1172 >>> # Using 'sum' reduction type.

1173 >>> h = tf.keras.losses.Hinge(

1174 ... reduction=tf.keras.losses.Reduction.SUM)

1175 >>> h(y_true, y_pred).numpy()

1176 2.6

1177

1178 >>> # Using 'none' reduction type.

1179 >>> h = tf.keras.losses.Hinge(

1180 ... reduction=tf.keras.losses.Reduction.NONE)

1181 >>> h(y_true, y_pred).numpy()

1182 array([1.1, 1.5], dtype=float32)

1183

1184 Usage with the `compile()` API:

1185

1186 ```python

1187 model.compile(optimizer='sgd', loss=tf.keras.losses.Hinge())

1188 ```

1189 """

1190

1191 def __init__(self, reduction=losses_utils.ReductionV2.AUTO, name="hinge"):

1192 """Initializes `Hinge` instance.

1193

1194 Args:

1195 reduction: Type of `tf.keras.losses.Reduction` to apply to

1196 loss. Default value is `AUTO`. `AUTO` indicates that the reduction

1197 option will be determined by the usage context. For almost all cases

1198 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a

1199 `tf.distribute.Strategy`, except via `Model.compile()` and

1200 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE`

1201 will raise an error. Please see this custom training [tutorial](

1202 https://www.tensorflow.org/tutorials/distribute/custom_training)

1203 for more details.

1204 name: Optional name for the instance. Defaults to 'hinge'.

1205 """

1206 super().__init__(hinge, name=name, reduction=reduction)

1207

1208

1209@keras_export("keras.losses.SquaredHinge")

1210class SquaredHinge(LossFunctionWrapper):

1211 """Computes the squared hinge loss between `y_true` & `y_pred`.

1212

1213 `loss = square(maximum(1 - y_true * y_pred, 0))`

1214

1215 `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are

1216 provided we will convert them to -1 or 1.

1217

1218 Standalone usage:

1219

1220 >>> y_true = [[0., 1.], [0., 0.]]

1221 >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]

1222 >>> # Using 'auto'/'sum_over_batch_size' reduction type.

1223 >>> h = tf.keras.losses.SquaredHinge()

1224 >>> h(y_true, y_pred).numpy()

1225 1.86

1226

1227 >>> # Calling with 'sample_weight'.

1228 >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy()

1229 0.73

1230

1231 >>> # Using 'sum' reduction type.

1232 >>> h = tf.keras.losses.SquaredHinge(

1233 ... reduction=tf.keras.losses.Reduction.SUM)

1234 >>> h(y_true, y_pred).numpy()

1235 3.72

1236

1237 >>> # Using 'none' reduction type.

1238 >>> h = tf.keras.losses.SquaredHinge(

1239 ... reduction=tf.keras.losses.Reduction.NONE)

1240 >>> h(y_true, y_pred).numpy()

1241 array([1.46, 2.26], dtype=float32)

1242

1243 Usage with the `compile()` API:

1244

1245 ```python

1246 model.compile(optimizer='sgd', loss=tf.keras.losses.SquaredHinge())

1247 ```

1248 """

1249

1250 def __init__(

1251 self, reduction=losses_utils.ReductionV2.AUTO, name="squared_hinge"

1252 ):

1253 """Initializes `SquaredHinge` instance.

1254

1255 Args:

1256 reduction: Type of `tf.keras.losses.Reduction` to apply to

1257 loss. Default value is `AUTO`. `AUTO` indicates that the reduction

1258 option will be determined by the usage context. For almost all cases

1259 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a

1260 `tf.distribute.Strategy`, except via `Model.compile()` and

1261 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE`

1262 will raise an error. Please see this custom training [tutorial](

1263 https://www.tensorflow.org/tutorials/distribute/custom_training)

1264 for more details.

1265 name: Optional name for the instance. Defaults to 'squared_hinge'.

1266 """

1267 super().__init__(squared_hinge, name=name, reduction=reduction)

1268

1269

1270@keras_export("keras.losses.CategoricalHinge")

1271class CategoricalHinge(LossFunctionWrapper):

1272 """Computes the categorical hinge loss between `y_true` & `y_pred`.

1273

1274 `loss = maximum(neg - pos + 1, 0)`

1275 where `neg=maximum((1-y_true)*y_pred) and pos=sum(y_true*y_pred)`

1276

1277 Standalone usage:

1278

1279 >>> y_true = [[0, 1], [0, 0]]

1280 >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]

1281 >>> # Using 'auto'/'sum_over_batch_size' reduction type.

1282 >>> h = tf.keras.losses.CategoricalHinge()

1283 >>> h(y_true, y_pred).numpy()

1284 1.4

1285

1286 >>> # Calling with 'sample_weight'.

1287 >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy()

1288 0.6

1289

1290 >>> # Using 'sum' reduction type.

1291 >>> h = tf.keras.losses.CategoricalHinge(

1292 ... reduction=tf.keras.losses.Reduction.SUM)

1293 >>> h(y_true, y_pred).numpy()

1294 2.8

1295

1296 >>> # Using 'none' reduction type.

1297 >>> h = tf.keras.losses.CategoricalHinge(

1298 ... reduction=tf.keras.losses.Reduction.NONE)

1299 >>> h(y_true, y_pred).numpy()

1300 array([1.2, 1.6], dtype=float32)

1301

1302 Usage with the `compile()` API:

1303

1304 ```python

1305 model.compile(optimizer='sgd', loss=tf.keras.losses.CategoricalHinge())

1306 ```

1307 """

1308

1309 def __init__(

1310 self, reduction=losses_utils.ReductionV2.AUTO, name="categorical_hinge"

1311 ):

1312 """Initializes `CategoricalHinge` instance.

1313

1314 Args:

1315 reduction: Type of `tf.keras.losses.Reduction` to apply to

1316 loss. Default value is `AUTO`. `AUTO` indicates that the reduction

1317 option will be determined by the usage context. For almost all cases

1318 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a

1319 `tf.distribute.Strategy`, except via `Model.compile()` and

1320 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE`

1321 will raise an error. Please see this custom training [tutorial](

1322 https://www.tensorflow.org/tutorials/distribute/custom_training)

1323 for more details.

1324 name: Optional name for the instance. Defaults to 'categorical_hinge'.

1325 """

1326 super().__init__(categorical_hinge, name=name, reduction=reduction)

1327

1328

1329@keras_export("keras.losses.Poisson")

1330class Poisson(LossFunctionWrapper):

1331 """Computes the Poisson loss between `y_true` & `y_pred`.

1332

1333 `loss = y_pred - y_true * log(y_pred)`

1334

1335 Standalone usage:

1336

1337 >>> y_true = [[0., 1.], [0., 0.]]

1338 >>> y_pred = [[1., 1.], [0., 0.]]

1339 >>> # Using 'auto'/'sum_over_batch_size' reduction type.

1340 >>> p = tf.keras.losses.Poisson()

1341 >>> p(y_true, y_pred).numpy()

1342 0.5

1343

1344 >>> # Calling with 'sample_weight'.

1345 >>> p(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()

1346 0.4

1347

1348 >>> # Using 'sum' reduction type.

1349 >>> p = tf.keras.losses.Poisson(

1350 ... reduction=tf.keras.losses.Reduction.SUM)

1351 >>> p(y_true, y_pred).numpy()

1352 0.999

1353

1354 >>> # Using 'none' reduction type.

1355 >>> p = tf.keras.losses.Poisson(

1356 ... reduction=tf.keras.losses.Reduction.NONE)

1357 >>> p(y_true, y_pred).numpy()

1358 array([0.999, 0.], dtype=float32)

1359

1360 Usage with the `compile()` API:

1361

1362 ```python

1363 model.compile(optimizer='sgd', loss=tf.keras.losses.Poisson())

1364 ```

1365 """

1366

1367 def __init__(self, reduction=losses_utils.ReductionV2.AUTO, name="poisson"):

1368 """Initializes `Poisson` instance.

1369

1370 Args:

1371 reduction: Type of `tf.keras.losses.Reduction` to apply to

1372 loss. Default value is `AUTO`. `AUTO` indicates that the reduction

1373 option will be determined by the usage context. For almost all cases

1374 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a

1375 `tf.distribute.Strategy`, except via `Model.compile()` and

1376 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE`

1377 will raise an error. Please see this custom training [tutorial](

1378 https://www.tensorflow.org/tutorials/distribute/custom_training)

1379 for more details.

1380 name: Optional name for the instance. Defaults to 'poisson'.

1381 """

1382 super().__init__(poisson, name=name, reduction=reduction)

1383

1384

1385@keras_export("keras.losses.LogCosh")

1386class LogCosh(LossFunctionWrapper):

1387 """Computes the logarithm of the hyperbolic cosine of the prediction error.

1388

1389 `logcosh = log((exp(x) + exp(-x))/2)`,

1390 where x is the error `y_pred - y_true`.

1391

1392 Standalone usage:

1393

1394 >>> y_true = [[0., 1.], [0., 0.]]

1395 >>> y_pred = [[1., 1.], [0., 0.]]

1396 >>> # Using 'auto'/'sum_over_batch_size' reduction type.

1397 >>> l = tf.keras.losses.LogCosh()

1398 >>> l(y_true, y_pred).numpy()

1399 0.108

1400

1401 >>> # Calling with 'sample_weight'.

1402 >>> l(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()

1403 0.087

1404

1405 >>> # Using 'sum' reduction type.

1406 >>> l = tf.keras.losses.LogCosh(

1407 ... reduction=tf.keras.losses.Reduction.SUM)

1408 >>> l(y_true, y_pred).numpy()

1409 0.217

1410

1411 >>> # Using 'none' reduction type.

1412 >>> l = tf.keras.losses.LogCosh(

1413 ... reduction=tf.keras.losses.Reduction.NONE)

1414 >>> l(y_true, y_pred).numpy()

1415 array([0.217, 0.], dtype=float32)

1416

1417 Usage with the `compile()` API:

1418

1419 ```python

1420 model.compile(optimizer='sgd', loss=tf.keras.losses.LogCosh())

1421 ```

1422 """

1423

1424 def __init__(

1425 self, reduction=losses_utils.ReductionV2.AUTO, name="log_cosh"

1426 ):

1427 """Initializes `LogCosh` instance.

1428

1429 Args:

1430 reduction: Type of `tf.keras.losses.Reduction` to apply to

1431 loss. Default value is `AUTO`. `AUTO` indicates that the reduction

1432 option will be determined by the usage context. For almost all cases

1433 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a

1434 `tf.distribute.Strategy`, except via `Model.compile()` and

1435 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE`

1436 will raise an error. Please see this custom training [tutorial](

1437 https://www.tensorflow.org/tutorials/distribute/custom_training)

1438 for more details.

1439 name: Optional name for the instance. Defaults to 'log_cosh'.

1440 """

1441 super().__init__(log_cosh, name=name, reduction=reduction)

1442

1443

1444@keras_export("keras.losses.KLDivergence")

1445class KLDivergence(LossFunctionWrapper):

1446 """Computes Kullback-Leibler divergence loss between `y_true` & `y_pred`.

1447

1448 `loss = y_true * log(y_true / y_pred)`

1449

1450 See: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence

1451

1452 Standalone usage:

1453

1454 >>> y_true = [[0, 1], [0, 0]]

1455 >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]

1456 >>> # Using 'auto'/'sum_over_batch_size' reduction type.

1457 >>> kl = tf.keras.losses.KLDivergence()

1458 >>> kl(y_true, y_pred).numpy()

1459 0.458

1460

1461 >>> # Calling with 'sample_weight'.

1462 >>> kl(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()

1463 0.366

1464

1465 >>> # Using 'sum' reduction type.

1466 >>> kl = tf.keras.losses.KLDivergence(

1467 ... reduction=tf.keras.losses.Reduction.SUM)

1468 >>> kl(y_true, y_pred).numpy()

1469 0.916

1470

1471 >>> # Using 'none' reduction type.

1472 >>> kl = tf.keras.losses.KLDivergence(

1473 ... reduction=tf.keras.losses.Reduction.NONE)

1474 >>> kl(y_true, y_pred).numpy()

1475 array([0.916, -3.08e-06], dtype=float32)

1476

1477 Usage with the `compile()` API:

1478

1479 ```python

1480 model.compile(optimizer='sgd', loss=tf.keras.losses.KLDivergence())

1481 ```

1482 """

1483

1484 def __init__(

1485 self, reduction=losses_utils.ReductionV2.AUTO, name="kl_divergence"

1486 ):

1487 """Initializes `KLDivergence` instance.

1488

1489 Args:

1490 reduction: Type of `tf.keras.losses.Reduction` to apply to

1491 loss. Default value is `AUTO`. `AUTO` indicates that the reduction

1492 option will be determined by the usage context. For almost all cases

1493 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a

1494 `tf.distribute.Strategy`, except via `Model.compile()` and

1495 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE`

1496 will raise an error. Please see this custom training [tutorial](

1497 https://www.tensorflow.org/tutorials/distribute/custom_training)

1498 for more details.

1499 name: Optional name for the instance. Defaults to 'kl_divergence'.

1500 """

1501 super().__init__(kl_divergence, name=name, reduction=reduction)

1502

1503

1504@keras_export("keras.losses.Huber")

1505class Huber(LossFunctionWrapper):

1506 """Computes the Huber loss between `y_true` & `y_pred`.

1507

1508 For each value x in `error = y_true - y_pred`:

1509

1510 ```

1511 loss = 0.5 * x^2 if |x| <= d

1512 loss = 0.5 * d^2 + d * (|x| - d) if |x| > d

1513 ```

1514 where d is `delta`. See: https://en.wikipedia.org/wiki/Huber_loss

1515

1516 Standalone usage:

1517

1518 >>> y_true = [[0, 1], [0, 0]]

1519 >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]

1520 >>> # Using 'auto'/'sum_over_batch_size' reduction type.

1521 >>> h = tf.keras.losses.Huber()

1522 >>> h(y_true, y_pred).numpy()

1523 0.155

1524

1525 >>> # Calling with 'sample_weight'.

1526 >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy()

1527 0.09

1528

1529 >>> # Using 'sum' reduction type.

1530 >>> h = tf.keras.losses.Huber(

1531 ... reduction=tf.keras.losses.Reduction.SUM)

1532 >>> h(y_true, y_pred).numpy()

1533 0.31

1534

1535 >>> # Using 'none' reduction type.

1536 >>> h = tf.keras.losses.Huber(

1537 ... reduction=tf.keras.losses.Reduction.NONE)

1538 >>> h(y_true, y_pred).numpy()

1539 array([0.18, 0.13], dtype=float32)

1540

1541 Usage with the `compile()` API:

1542

1543 ```python

1544 model.compile(optimizer='sgd', loss=tf.keras.losses.Huber())

1545 ```

1546 """

1547

1548 def __init__(

1549 self,

1550 delta=1.0,

1551 reduction=losses_utils.ReductionV2.AUTO,

1552 name="huber_loss",

1553 ):

1554 """Initializes `Huber` instance.

1555

1556 Args:

1557 delta: A float, the point where the Huber loss function changes from a

1558 quadratic to linear.

1559 reduction: Type of `tf.keras.losses.Reduction` to apply to

1560 loss. Default value is `AUTO`. `AUTO` indicates that the reduction

1561 option will be determined by the usage context. For almost all cases

1562 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a

1563 `tf.distribute.Strategy`, except via `Model.compile()` and

1564 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE`

1565 will raise an error. Please see this custom training [tutorial](

1566 https://www.tensorflow.org/tutorials/distribute/custom_training)

1567 for more details.

1568 name: Optional name for the instance. Defaults to 'huber_loss'.

1569 """

1570 super().__init__(huber, name=name, reduction=reduction, delta=delta)

1571

1572

1573@keras_export(

1574 "keras.metrics.mean_squared_error",

1575 "keras.metrics.mse",

1576 "keras.metrics.MSE",

1577 "keras.losses.mean_squared_error",

1578 "keras.losses.mse",

1579 "keras.losses.MSE",

1580)

1581@tf.__internal__.dispatch.add_dispatch_support

1582def mean_squared_error(y_true, y_pred):

1583 """Computes the mean squared error between labels and predictions.

1584

1585 After computing the squared distance between the inputs, the mean value over

1586 the last dimension is returned.

1587

1588 `loss = mean(square(y_true - y_pred), axis=-1)`

1589

1590 Standalone usage:

1591

1592 >>> y_true = np.random.randint(0, 2, size=(2, 3))

1593 >>> y_pred = np.random.random(size=(2, 3))

1594 >>> loss = tf.keras.losses.mean_squared_error(y_true, y_pred)

1595 >>> assert loss.shape == (2,)

1596 >>> assert np.array_equal(

1597 ... loss.numpy(), np.mean(np.square(y_true - y_pred), axis=-1))

1598

1599 Args:

1600 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.

1601 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

1602

1603 Returns:

1604 Mean squared error values. shape = `[batch_size, d0, .. dN-1]`.

1605 """

1606 y_pred = tf.convert_to_tensor(y_pred)

1607 y_true = tf.cast(y_true, y_pred.dtype)

1608 return backend.mean(tf.math.squared_difference(y_pred, y_true), axis=-1)

1609

1610

1611def _ragged_tensor_apply_loss(loss_fn, y_true, y_pred, y_pred_extra_dim=False):

1612 """Apply a loss function on a per batch basis.

1613

1614 Args:

1615 loss_fn: The loss function

1616 y_true: truth values (RaggedTensor)

1617 y_pred: predicted values (RaggedTensor)

1618 y_pred_extra_dim: whether y_pred has an additional dimension compared to

1619 y_true

1620

1621 Returns:

1622 Loss-function result. A dense tensor if the output has a single dimension

1623 (per-batch loss value); a ragged tensor otherwise.

1624 """

1625

1626 def rt_is_equiv_dense(rt):

1627 """Returns true if this RaggedTensor has the same row_lengths across

1628

1629 all ragged dimensions and thus can be converted to a dense tensor

1630 without loss of information.

1631

1632 Args:

1633 rt: RaggedTensor.

1634 """

1635 return tf.reduce_all(

1636 [

1637 tf.equal(

1638 tf.math.reduce_variance(

1639 tf.cast(row_lens, backend.floatx())

1640 ),

1641 tf.constant([0.0]),

1642 )

1643 for row_lens in rt.nested_row_lengths()

1644 ]

1645 )

1646

1647 def _convert_to_dense(inputs):

1648 return tuple(

1649 rt.to_tensor() if isinstance(rt, tf.RaggedTensor) else rt

1650 for rt in inputs

1651 )

1652

1653 def _call_loss(inputs, ragged_output):

1654 """Adapt the result to ragged or dense tensor according to the expected

1655

1656 output type. This is done so that all the return values of the map

1657 operation have the same type.

1658 """

1659 r = loss_fn(*inputs)

1660 if ragged_output and not isinstance(r, tf.RaggedTensor):

1661 r = tf.RaggedTensor.from_tensor(r)

1662 elif not ragged_output and isinstance(r, tf.RaggedTensor):

1663 r = r.to_tensor()

1664 return r

1665

1666 def _wrapper(inputs, ragged_output):

1667 _, y_pred = inputs

1668 if isinstance(y_pred, tf.RaggedTensor):

1669 return tf.cond(

1670 rt_is_equiv_dense(y_pred),

1671 lambda: _call_loss(_convert_to_dense(inputs), ragged_output),

1672 lambda: _call_loss(inputs, ragged_output),

1673 )

1674

1675 return loss_fn(*inputs)

1676

1677 if not isinstance(y_true, tf.RaggedTensor):

1678 return loss_fn(y_true, y_pred.to_tensor())

1679

1680 lshape = y_pred.shape.as_list()[1:-1]

1681 if len(lshape) > 0:

1682 spec = tf.RaggedTensorSpec(shape=lshape, dtype=y_pred.dtype)

1683 else:

1684 spec = tf.TensorSpec(shape=[], dtype=y_pred.dtype)

1685

1686 nested_splits_list = [rt.nested_row_splits for rt in (y_true, y_pred)]

1687 if y_pred_extra_dim:

1688 # The last dimension of a categorical prediction may be ragged or not.

1689 rdims = [len(slist) for slist in nested_splits_list]

1690 if rdims[0] == rdims[1] - 1:

1691 nested_splits_list[1] = nested_splits_list[1][:-1]

1692

1693 map_fn = functools.partial(_wrapper, ragged_output=len(lshape) > 1)

1694

1695 assertion_list = ragged_util.assert_splits_match(nested_splits_list)

1696 with tf.control_dependencies(assertion_list):

1697 return ragged_map_ops.map_fn(map_fn, elems=(y_true, y_pred), dtype=spec)

1698

1699

1700@dispatch.dispatch_for_types(mean_squared_error, tf.RaggedTensor)

1701def _ragged_tensor_mse(y_true, y_pred):

1702 """Implements support for handling RaggedTensors.

1703

1704 Args:

1705 y_true: RaggedTensor truth values. shape = `[batch_size, d0, .. dN]`.

1706 y_pred: RaggedTensor predicted values. shape = `[batch_size, d0, .. dN]`.

1707

1708 Returns:

1709 Mean squared error values. shape = `[batch_size, d0, .. dN-1]`.

1710 When the number of dimensions of the batch feature vector [d0, .. dN] is

1711 greater than one the return value is a RaggedTensor. Otherwise a Dense

1712 tensor with dimensions [batch_size] is returned.

1713 """

1714 return _ragged_tensor_apply_loss(mean_squared_error, y_true, y_pred)

1715

1716

1717@keras_export(

1718 "keras.metrics.mean_absolute_error",

1719 "keras.metrics.mae",

1720 "keras.metrics.MAE",

1721 "keras.losses.mean_absolute_error",

1722 "keras.losses.mae",

1723 "keras.losses.MAE",

1724)

1725@tf.__internal__.dispatch.add_dispatch_support

1726def mean_absolute_error(y_true, y_pred):

1727 """Computes the mean absolute error between labels and predictions.

1728

1729 `loss = mean(abs(y_true - y_pred), axis=-1)`

1730

1731 Standalone usage:

1732

1733 >>> y_true = np.random.randint(0, 2, size=(2, 3))

1734 >>> y_pred = np.random.random(size=(2, 3))

1735 >>> loss = tf.keras.losses.mean_absolute_error(y_true, y_pred)

1736 >>> assert loss.shape == (2,)

1737 >>> assert np.array_equal(

1738 ... loss.numpy(), np.mean(np.abs(y_true - y_pred), axis=-1))

1739

1740 Args:

1741 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.

1742 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

1743

1744 Returns:

1745 Mean absolute error values. shape = `[batch_size, d0, .. dN-1]`.

1746 """

1747 y_pred = tf.convert_to_tensor(y_pred)

1748 y_true = tf.cast(y_true, y_pred.dtype)

1749 return backend.mean(tf.abs(y_pred - y_true), axis=-1)

1750

1751

1752@dispatch.dispatch_for_types(mean_absolute_error, tf.RaggedTensor)

1753def _ragged_tensor_mae(y_true, y_pred):

1754 """RaggedTensor adapter for mean_absolute_error."""

1755 return _ragged_tensor_apply_loss(mean_absolute_error, y_true, y_pred)

1756

1757

1758@keras_export(

1759 "keras.metrics.mean_absolute_percentage_error",

1760 "keras.metrics.mape",

1761 "keras.metrics.MAPE",

1762 "keras.losses.mean_absolute_percentage_error",

1763 "keras.losses.mape",

1764 "keras.losses.MAPE",

1765)

1766@tf.__internal__.dispatch.add_dispatch_support

1767def mean_absolute_percentage_error(y_true, y_pred):

1768 """Computes the mean absolute percentage error between `y_true` & `y_pred`.

1769

1770 `loss = 100 * mean(abs((y_true - y_pred) / y_true), axis=-1)`

1771

1772 Standalone usage:

1773

1774 >>> y_true = np.random.random(size=(2, 3))

1775 >>> y_true = np.maximum(y_true, 1e-7) # Prevent division by zero

1776 >>> y_pred = np.random.random(size=(2, 3))

1777 >>> loss = tf.keras.losses.mean_absolute_percentage_error(y_true, y_pred)

1778 >>> assert loss.shape == (2,)

1779 >>> assert np.array_equal(

1780 ... loss.numpy(),

1781 ... 100. * np.mean(np.abs((y_true - y_pred) / y_true), axis=-1))

1782

1783 Args:

1784 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.

1785 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

1786

1787 Returns:

1788 Mean absolute percentage error values. shape = `[batch_size, d0, ..

1789 dN-1]`.

1790 """

1791 y_pred = tf.convert_to_tensor(y_pred)

1792 y_true = tf.cast(y_true, y_pred.dtype)

1793 diff = tf.abs(

1794 (y_true - y_pred) / backend.maximum(tf.abs(y_true), backend.epsilon())

1795 )

1796 return 100.0 * backend.mean(diff, axis=-1)

1797

1798

1799@dispatch.dispatch_for_types(mean_absolute_percentage_error, tf.RaggedTensor)

1800def _ragged_tensor_mape(y_true, y_pred):

1801 """Support RaggedTensors."""

1802 return _ragged_tensor_apply_loss(

1803 mean_absolute_percentage_error, y_true, y_pred

1804 )

1805

1806

1807@keras_export(

1808 "keras.metrics.mean_squared_logarithmic_error",

1809 "keras.metrics.msle",

1810 "keras.metrics.MSLE",

1811 "keras.losses.mean_squared_logarithmic_error",

1812 "keras.losses.msle",

1813 "keras.losses.MSLE",

1814)

1815@tf.__internal__.dispatch.add_dispatch_support

1816def mean_squared_logarithmic_error(y_true, y_pred):

1817 """Computes the mean squared logarithmic error between `y_true` & `y_pred`.

1818

1819 `loss = mean(square(log(y_true + 1) - log(y_pred + 1)), axis=-1)`

1820

1821 Standalone usage:

1822

1823 >>> y_true = np.random.randint(0, 2, size=(2, 3))

1824 >>> y_pred = np.random.random(size=(2, 3))

1825 >>> loss = tf.keras.losses.mean_squared_logarithmic_error(y_true, y_pred)

1826 >>> assert loss.shape == (2,)

1827 >>> y_true = np.maximum(y_true, 1e-7)

1828 >>> y_pred = np.maximum(y_pred, 1e-7)

1829 >>> assert np.allclose(

1830 ... loss.numpy(),

1831 ... np.mean(

1832 ... np.square(np.log(y_true + 1.) - np.log(y_pred + 1.)), axis=-1))

1833

1834 Args:

1835 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.

1836 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

1837

1838 Returns:

1839 Mean squared logarithmic error values. shape = `[batch_size, d0, ..

1840 dN-1]`.

1841 """

1842 y_pred = tf.convert_to_tensor(y_pred)

1843 y_true = tf.cast(y_true, y_pred.dtype)

1844 first_log = tf.math.log(backend.maximum(y_pred, backend.epsilon()) + 1.0)

1845 second_log = tf.math.log(backend.maximum(y_true, backend.epsilon()) + 1.0)

1846 return backend.mean(

1847 tf.math.squared_difference(first_log, second_log), axis=-1

1848 )

1849

1850

1851@dispatch.dispatch_for_types(mean_squared_logarithmic_error, tf.RaggedTensor)

1852def _ragged_tensor_msle(y_true, y_pred):

1853 """Implements support for handling RaggedTensors."""

1854 return _ragged_tensor_apply_loss(

1855 mean_squared_logarithmic_error, y_true, y_pred

1856 )

1857

1858

1859def _maybe_convert_labels(y_true):

1860 """Converts binary labels into -1/1."""

1861 are_zeros = tf.equal(y_true, 0)

1862 are_ones = tf.equal(y_true, 1)

1863 is_binary = tf.reduce_all(tf.logical_or(are_zeros, are_ones))

1864

1865 def _convert_binary_labels():

1866 # Convert the binary labels to -1 or 1.

1867 return 2.0 * y_true - 1.0

1868

1869 updated_y_true = tf.__internal__.smart_cond.smart_cond(

1870 is_binary, _convert_binary_labels, lambda: y_true

1871 )

1872 return updated_y_true

1873

1874

1875@keras_export("keras.metrics.squared_hinge", "keras.losses.squared_hinge")

1876@tf.__internal__.dispatch.add_dispatch_support

1877def squared_hinge(y_true, y_pred):

1878 """Computes the squared hinge loss between `y_true` & `y_pred`.

1879

1880 `loss = mean(square(maximum(1 - y_true * y_pred, 0)), axis=-1)`

1881

1882 Standalone usage:

1883

1884 >>> y_true = np.random.choice([-1, 1], size=(2, 3))

1885 >>> y_pred = np.random.random(size=(2, 3))

1886 >>> loss = tf.keras.losses.squared_hinge(y_true, y_pred)

1887 >>> assert loss.shape == (2,)

1888 >>> assert np.array_equal(

1889 ... loss.numpy(),

1890 ... np.mean(np.square(np.maximum(1. - y_true * y_pred, 0.)), axis=-1))

1891

1892 Args:

1893 y_true: The ground truth values. `y_true` values are expected to be -1 or

1894 1. If binary (0 or 1) labels are provided we will convert them to -1 or

1895 1. shape = `[batch_size, d0, .. dN]`.

1896 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

1897

1898 Returns:

1899 Squared hinge loss values. shape = `[batch_size, d0, .. dN-1]`.

1900 """

1901 y_pred = tf.convert_to_tensor(y_pred)

1902 y_true = tf.cast(y_true, y_pred.dtype)

1903 y_true = _maybe_convert_labels(y_true)

1904 return backend.mean(

1905 tf.square(tf.maximum(1.0 - y_true * y_pred, 0.0)), axis=-1

1906 )

1907

1908

1909@keras_export("keras.metrics.hinge", "keras.losses.hinge")

1910@tf.__internal__.dispatch.add_dispatch_support

1911def hinge(y_true, y_pred):

1912 """Computes the hinge loss between `y_true` & `y_pred`.

1913

1914 `loss = mean(maximum(1 - y_true * y_pred, 0), axis=-1)`

1915

1916 Standalone usage:

1917

1918 >>> y_true = np.random.choice([-1, 1], size=(2, 3))

1919 >>> y_pred = np.random.random(size=(2, 3))

1920 >>> loss = tf.keras.losses.hinge(y_true, y_pred)

1921 >>> assert loss.shape == (2,)

1922 >>> assert np.array_equal(

1923 ... loss.numpy(),

1924 ... np.mean(np.maximum(1. - y_true * y_pred, 0.), axis=-1))

1925

1926 Args:

1927 y_true: The ground truth values. `y_true` values are expected to be -1 or

1928 1. If binary (0 or 1) labels are provided they will be converted to -1

1929 or 1. shape = `[batch_size, d0, .. dN]`.

1930 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

1931

1932 Returns:

1933 Hinge loss values. shape = `[batch_size, d0, .. dN-1]`.

1934 """

1935 y_pred = tf.convert_to_tensor(y_pred)

1936 y_true = tf.cast(y_true, y_pred.dtype)

1937 y_true = _maybe_convert_labels(y_true)

1938 return backend.mean(tf.maximum(1.0 - y_true * y_pred, 0.0), axis=-1)

1939

1940

1941@keras_export("keras.losses.categorical_hinge")

1942@tf.__internal__.dispatch.add_dispatch_support

1943def categorical_hinge(y_true, y_pred):

1944 """Computes the categorical hinge loss between `y_true` & `y_pred`.

1945

1946 `loss = maximum(neg - pos + 1, 0)`

1947 where `neg=maximum((1-y_true)*y_pred) and pos=sum(y_true*y_pred)`

1948

1949 Standalone usage:

1950

1951 >>> y_true = np.random.randint(0, 3, size=(2,))

1952 >>> y_true = tf.keras.utils.to_categorical(y_true, num_classes=3)

1953 >>> y_pred = np.random.random(size=(2, 3))

1954 >>> loss = tf.keras.losses.categorical_hinge(y_true, y_pred)

1955 >>> assert loss.shape == (2,)

1956 >>> pos = np.sum(y_true * y_pred, axis=-1)

1957 >>> neg = np.amax((1. - y_true) * y_pred, axis=-1)

1958 >>> assert np.array_equal(loss.numpy(), np.maximum(0., neg - pos + 1.))

1959

1960 Args:

1961 y_true: The ground truth values. `y_true` values are expected to be

1962 either `{-1, +1}` or `{0, 1}` (i.e. a one-hot-encoded tensor).

1963 y_pred: The predicted values.

1964

1965 Returns:

1966 Categorical hinge loss values.

1967 """

1968 y_pred = tf.convert_to_tensor(y_pred)

1969 y_true = tf.cast(y_true, y_pred.dtype)

1970 pos = tf.reduce_sum(y_true * y_pred, axis=-1)

1971 neg = tf.reduce_max((1.0 - y_true) * y_pred, axis=-1)

1972 zero = tf.cast(0.0, y_pred.dtype)

1973 return tf.maximum(neg - pos + 1.0, zero)

1974

1975

1976@keras_export("keras.losses.huber", v1=[])

1977@tf.__internal__.dispatch.add_dispatch_support

1978def huber(y_true, y_pred, delta=1.0):

1979 """Computes Huber loss value.

1980

1981 For each value x in `error = y_true - y_pred`:

1982

1983 ```

1984 loss = 0.5 * x^2 if |x| <= d

1985 loss = d * |x| - 0.5 * d^2 if |x| > d

1986 ```

1987 where d is `delta`. See: https://en.wikipedia.org/wiki/Huber_loss

1988

1989 Args:

1990 y_true: tensor of true targets.

1991 y_pred: tensor of predicted targets.

1992 delta: A float, the point where the Huber loss function changes from a

1993 quadratic to linear.

1994

1995 Returns:

1996 Tensor with one scalar loss entry per sample.

1997 """

1998 y_pred = tf.cast(y_pred, dtype=backend.floatx())

1999 y_true = tf.cast(y_true, dtype=backend.floatx())

2000 delta = tf.cast(delta, dtype=backend.floatx())

2001 error = tf.subtract(y_pred, y_true)

2002 abs_error = tf.abs(error)

2003 half = tf.convert_to_tensor(0.5, dtype=abs_error.dtype)

2004 return backend.mean(

2005 tf.where(

2006 abs_error <= delta,

2007 half * tf.square(error),

2008 delta * abs_error - half * tf.square(delta),

2009 ),

2010 axis=-1,

2011 )

2012

2013

2014@keras_export(

2015 "keras.losses.log_cosh",

2016 "keras.losses.logcosh",

2017 "keras.metrics.log_cosh",

2018 "keras.metrics.logcosh",

2019)

2020@tf.__internal__.dispatch.add_dispatch_support

2021def log_cosh(y_true, y_pred):

2022 """Logarithm of the hyperbolic cosine of the prediction error.

2023

2024 `log(cosh(x))` is approximately equal to `(x ** 2) / 2` for small `x` and

2025 to `abs(x) - log(2)` for large `x`. This means that 'logcosh' works mostly

2026 like the mean squared error, but will not be so strongly affected by the

2027 occasional wildly incorrect prediction.

2028

2029 Standalone usage:

2030

2031 >>> y_true = np.random.random(size=(2, 3))

2032 >>> y_pred = np.random.random(size=(2, 3))

2033 >>> loss = tf.keras.losses.logcosh(y_true, y_pred)

2034 >>> assert loss.shape == (2,)

2035 >>> x = y_pred - y_true

2036 >>> assert np.allclose(

2037 ... loss.numpy(),

2038 ... np.mean(x + np.log(np.exp(-2. * x) + 1.) - tf.math.log(2.),

2039 ... axis=-1),

2040 ... atol=1e-5)

2041

2042 Args:

2043 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.

2044 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

2045

2046 Returns:

2047 Logcosh error values. shape = `[batch_size, d0, .. dN-1]`.

2048 """

2049 y_pred = tf.convert_to_tensor(y_pred)

2050 y_true = tf.cast(y_true, y_pred.dtype)

2051

2052 def _logcosh(x):

2053 return (

2054 x + tf.math.softplus(-2.0 * x) - tf.cast(tf.math.log(2.0), x.dtype)

2055 )

2056

2057 return backend.mean(_logcosh(y_pred - y_true), axis=-1)

2058

2059

2060@keras_export(

2061 "keras.metrics.categorical_crossentropy",

2062 "keras.losses.categorical_crossentropy",

2063)

2064@tf.__internal__.dispatch.add_dispatch_support

2065def categorical_crossentropy(

2066 y_true, y_pred, from_logits=False, label_smoothing=0.0, axis=-1

2067):

2068 """Computes the categorical crossentropy loss.

2069

2070 Standalone usage:

2071

2072 >>> y_true = [[0, 1, 0], [0, 0, 1]]

2073 >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]

2074 >>> loss = tf.keras.losses.categorical_crossentropy(y_true, y_pred)

2075 >>> assert loss.shape == (2,)

2076 >>> loss.numpy()

2077 array([0.0513, 2.303], dtype=float32)

2078

2079 Args:

2080 y_true: Tensor of one-hot true targets.

2081 y_pred: Tensor of predicted targets.

2082 from_logits: Whether `y_pred` is expected to be a logits tensor. By

2083 default, we assume that `y_pred` encodes a probability distribution.

2084 label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For

2085 example, if `0.1`, use `0.1 / num_classes` for non-target labels

2086 and `0.9 + 0.1 / num_classes` for target labels.

2087 axis: Defaults to -1. The dimension along which the entropy is

2088 computed.

2089

2090 Returns:

2091 Categorical crossentropy loss value.

2092 """

2093 if isinstance(axis, bool):

2094 raise ValueError(

2095 "`axis` must be of type `int`. "

2096 f"Received: axis={axis} of type {type(axis)}"

2097 )

2098 y_pred = tf.convert_to_tensor(y_pred)

2099 y_true = tf.cast(y_true, y_pred.dtype)

2100 label_smoothing = tf.convert_to_tensor(label_smoothing, dtype=y_pred.dtype)

2101

2102 if y_pred.shape[-1] == 1:

2103 warnings.warn(

2104 "In loss categorical_crossentropy, expected "

2105 "y_pred.shape to be (batch_size, num_classes) "

2106 f"with num_classes > 1. Received: y_pred.shape={y_pred.shape}. "

2107 "Consider using 'binary_crossentropy' if you only have 2 classes.",

2108 SyntaxWarning,

2109 stacklevel=2,

2110 )

2111

2112 def _smooth_labels():

2113 num_classes = tf.cast(tf.shape(y_true)[-1], y_pred.dtype)

2114 return y_true * (1.0 - label_smoothing) + (

2115 label_smoothing / num_classes

2116 )

2117

2118 y_true = tf.__internal__.smart_cond.smart_cond(

2119 label_smoothing, _smooth_labels, lambda: y_true

2120 )

2121

2122 return backend.categorical_crossentropy(

2123 y_true, y_pred, from_logits=from_logits, axis=axis

2124 )

2125

2126

2127@dispatch.dispatch_for_types(categorical_crossentropy, tf.RaggedTensor)

2128def _ragged_tensor_categorical_crossentropy(

2129 y_true, y_pred, from_logits=False, label_smoothing=0.0, axis=-1

2130):

2131 """Implements support for handling RaggedTensors.

2132

2133 Args:

2134 y_true: Tensor of one-hot true targets.

2135 y_pred: Tensor of predicted targets.

2136 from_logits: Whether `y_pred` is expected to be a logits tensor. By

2137 default, we assume that `y_pred` encodes a probability distribution.

2138 label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For

2139 example, if `0.1`, use `0.1 / num_classes` for non-target labels

2140 and `0.9 + 0.1 / num_classes` for target labels.

2141 axis: The axis along which to compute crossentropy (the features axis).

2142 Defaults to -1.

2143

2144 Returns:

2145 Categorical crossentropy loss value.

2146

2147 Expected shape: (batch, sequence_len, n_classes) with sequence_len

2148 being variable per batch.

2149 Return shape: (batch, sequence_len).

2150

2151 When used by CategoricalCrossentropy() with the default reduction

2152 (SUM_OVER_BATCH_SIZE), the reduction averages the loss over the

2153 number of elements independent of the batch. E.g. if the RaggedTensor

2154 has 2 batches with [2, 1] values respectively the resulting loss is

2155 the sum of the individual loss values divided by 3.

2156 """

2157 fn = functools.partial(

2158 categorical_crossentropy,

2159 from_logits=from_logits,

2160 label_smoothing=label_smoothing,

2161 axis=axis,

2162 )

2163 return _ragged_tensor_apply_loss(fn, y_true, y_pred)

2164

2165

2166@keras_export(

2167 "keras.metrics.categorical_focal_crossentropy",

2168 "keras.losses.categorical_focal_crossentropy",

2169)

2170@tf.__internal__.dispatch.add_dispatch_support

2171def categorical_focal_crossentropy(

2172 y_true,

2173 y_pred,

2174 alpha=0.25,

2175 gamma=2.0,

2176 from_logits=False,

2177 label_smoothing=0.0,

2178 axis=-1,

2179):

2180 """Computes the categorical focal crossentropy loss.

2181

2182 Standalone usage:

2183 >>> y_true = [[0, 1, 0], [0, 0, 1]]

2184 >>> y_pred = [[0.05, 0.9, 0.05], [0.1, 0.85, 0.05]]

2185 >>> loss = tf.keras.losses.categorical_focal_crossentropy(y_true, y_pred)

2186 >>> assert loss.shape == (2,)

2187 >>> loss.numpy()

2188 array([2.63401289e-04, 6.75912094e-01], dtype=float32)

2189

2190 Args:

2191 y_true: Tensor of one-hot true targets.

2192 y_pred: Tensor of predicted targets.

2193 alpha: A weight balancing factor for all classes, default is `0.25` as

2194 mentioned in the reference. It can be a list of floats or a scalar.

2195 In the multi-class case, alpha may be set by inverse class

2196 frequency by using `compute_class_weight` from `sklearn.utils`.

2197 gamma: A focusing parameter, default is `2.0` as mentioned in the

2198 reference. It helps to gradually reduce the importance given to

2199 simple examples in a smooth manner. When `gamma` = 0, there is

2200 no focal effect on the categorical crossentropy.

2201 from_logits: Whether `y_pred` is expected to be a logits tensor. By

2202 default, we assume that `y_pred` encodes a probability

2203 distribution.

2204 label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For

2205 example, if `0.1`, use `0.1 / num_classes` for non-target labels

2206 and `0.9 + 0.1 / num_classes` for target labels.

2207 axis: Defaults to -1. The dimension along which the entropy is

2208 computed.

2209

2210 Returns:

2211 Categorical focal crossentropy loss value.

2212 """

2213 if isinstance(axis, bool):

2214 raise ValueError(

2215 "`axis` must be of type `int`. "

2216 f"Received: axis={axis} of type {type(axis)}"

2217 )

2218 y_pred = tf.convert_to_tensor(y_pred)

2219 y_true = tf.cast(y_true, y_pred.dtype)

2220 label_smoothing = tf.convert_to_tensor(label_smoothing, dtype=y_pred.dtype)

2221

2222 if y_pred.shape[-1] == 1:

2223 warnings.warn(

2224 "In loss categorical_focal_crossentropy, expected "

2225 "y_pred.shape to be (batch_size, num_classes) "

2226 f"with num_classes > 1. Received: y_pred.shape={y_pred.shape}. "

2227 "Consider using 'binary_crossentropy' if you only have 2 classes.",

2228 SyntaxWarning,

2229 stacklevel=2,

2230 )

2231

2232 def _smooth_labels():

2233 num_classes = tf.cast(tf.shape(y_true)[-1], y_pred.dtype)

2234 return y_true * (1.0 - label_smoothing) + (

2235 label_smoothing / num_classes

2236 )

2237

2238 y_true = tf.__internal__.smart_cond.smart_cond(

2239 label_smoothing, _smooth_labels, lambda: y_true

2240 )

2241

2242 return backend.categorical_focal_crossentropy(

2243 target=y_true,

2244 output=y_pred,

2245 alpha=alpha,

2246 gamma=gamma,

2247 from_logits=from_logits,

2248 axis=axis,

2249 )

2250

2251

2252@dispatch.dispatch_for_types(categorical_focal_crossentropy, tf.RaggedTensor)

2253def _ragged_tensor_categorical_focal_crossentropy(

2254 y_true,

2255 y_pred,

2256 alpha=0.25,

2257 gamma=2.0,

2258 from_logits=False,

2259 label_smoothing=0.0,

2260 axis=-1,

2261):

2262 """Implements support for handling RaggedTensors.

2263

2264 Expected shape: (batch, sequence_len, n_classes) with sequence_len

2265 being variable per batch.

2266 Return shape: (batch, sequence_len).

2267 When used by CategoricalFocalCrossentropy() with the default reduction

2268 (SUM_OVER_BATCH_SIZE), the reduction averages the loss over the

2269 number of elements independent of the batch. E.g. if the RaggedTensor

2270 has 2 batches with [2, 1] values respectively the resulting loss is

2271 the sum of the individual loss values divided by 3.

2272

2273 Args:

2274 alpha: A weight balancing factor for all classes, default is `0.25` as

2275 mentioned in the reference. It can be a list of floats or a scalar.

2276 In the multi-class case, alpha may be set by inverse class

2277 frequency by using `compute_class_weight` from `sklearn.utils`.

2278 gamma: A focusing parameter, default is `2.0` as mentioned in the

2279 reference. It helps to gradually reduce the importance given to

2280 simple examples in a smooth manner. When `gamma` = 0, there is

2281 no focal effect on the categorical crossentropy.

2282 from_logits: Whether `y_pred` is expected to be a logits tensor. By

2283 default, we assume that `y_pred` encodes a probability distribution.

2284 label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For

2285 example, if `0.1`, use `0.1 / num_classes` for non-target labels

2286 and `0.9 + 0.1 / num_classes` for target labels.

2287 axis: Defaults to -1. The dimension along which the entropy is

2288 computed.

2289

2290 Returns:

2291 Categorical focal crossentropy loss value.

2292 """

2293 fn = functools.partial(

2294 categorical_focal_crossentropy,

2295 alpha=alpha,

2296 gamma=gamma,

2297 from_logits=from_logits,

2298 label_smoothing=label_smoothing,

2299 axis=axis,

2300 )

2301 return _ragged_tensor_apply_loss(fn, y_true, y_pred)

2302

2303

2304@keras_export(

2305 "keras.metrics.sparse_categorical_crossentropy",

2306 "keras.losses.sparse_categorical_crossentropy",

2307)

2308@tf.__internal__.dispatch.add_dispatch_support

2309def sparse_categorical_crossentropy(

2310 y_true, y_pred, from_logits=False, axis=-1, ignore_class=None

2311):

2312 """Computes the sparse categorical crossentropy loss.

2313

2314 Standalone usage:

2315

2316 >>> y_true = [1, 2]

2317 >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]

2318 >>> loss = tf.keras.losses.sparse_categorical_crossentropy(y_true, y_pred)

2319 >>> assert loss.shape == (2,)

2320 >>> loss.numpy()

2321 array([0.0513, 2.303], dtype=float32)

2322

2323 >>> y_true = [[[ 0, 2],

2324 ... [-1, -1]],

2325 ... [[ 0, 2],

2326 ... [-1, -1]]]

2327 >>> y_pred = [[[[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]],

2328 ... [[0.2, 0.5, 0.3], [0.0, 1.0, 0.0]]],

2329 ... [[[1.0, 0.0, 0.0], [0.0, 0.5, 0.5]],

2330 ... [[0.2, 0.5, 0.3], [0.0, 1.0, 0.0]]]]

2331 >>> loss = tf.keras.losses.sparse_categorical_crossentropy(

2332 ... y_true, y_pred, ignore_class=-1)

2333 >>> loss.numpy()

2334 array([[[2.3841855e-07, 2.3841855e-07],

2335 [0.0000000e+00, 0.0000000e+00]],

2336 [[2.3841855e-07, 6.9314730e-01],

2337 [0.0000000e+00, 0.0000000e+00]]], dtype=float32)

2338

2339 Args:

2340 y_true: Ground truth values.

2341 y_pred: The predicted values.

2342 from_logits: Whether `y_pred` is expected to be a logits tensor. By

2343 default, we assume that `y_pred` encodes a probability distribution.

2344 axis: Defaults to -1. The dimension along which the entropy is

2345 computed.

2346 ignore_class: Optional integer. The ID of a class to be ignored during

2347 loss computation. This is useful, for example, in segmentation

2348 problems featuring a "void" class (commonly -1 or 255) in segmentation

2349 maps. By default (`ignore_class=None`), all classes are considered.

2350

2351 Returns:

2352 Sparse categorical crossentropy loss value.

2353 """

2354 return backend.sparse_categorical_crossentropy(

2355 y_true,

2356 y_pred,

2357 from_logits=from_logits,

2358 ignore_class=ignore_class,

2359 axis=axis,

2360 )

2361

2362

2363@dispatch.dispatch_for_types(sparse_categorical_crossentropy, tf.RaggedTensor)

2364def _ragged_tensor_sparse_categorical_crossentropy(

2365 y_true, y_pred, from_logits=False, axis=-1, ignore_class=None

2366):

2367 """Implements support for handling RaggedTensors.

2368

2369 Expected y_pred shape: (batch, sequence_len, n_classes) with sequence_len

2370 being variable per batch.

2371 Return shape: (batch, sequence_len).

2372

2373 When used by SparseCategoricalCrossentropy() with the default reduction

2374 (SUM_OVER_BATCH_SIZE), the reduction averages the loss over the

2375 number of elements independent of the batch. E.g. if the RaggedTensor

2376 has 2 batches with [2, 1] values respectively, the resulting loss is

2377 the sum of the individual loss values divided by 3.

2378 """

2379 fn = functools.partial(

2380 sparse_categorical_crossentropy,

2381 from_logits=from_logits,

2382 ignore_class=ignore_class,

2383 axis=axis,

2384 )

2385 return _ragged_tensor_apply_loss(fn, y_true, y_pred, y_pred_extra_dim=True)

2386

2387

2388@keras_export(

2389 "keras.metrics.binary_crossentropy", "keras.losses.binary_crossentropy"

2390)

2391@tf.__internal__.dispatch.add_dispatch_support

2392def binary_crossentropy(

2393 y_true, y_pred, from_logits=False, label_smoothing=0.0, axis=-1

2394):

2395 """Computes the binary crossentropy loss.

2396

2397 Standalone usage:

2398

2399 >>> y_true = [[0, 1], [0, 0]]

2400 >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]

2401 >>> loss = tf.keras.losses.binary_crossentropy(y_true, y_pred)

2402 >>> assert loss.shape == (2,)

2403 >>> loss.numpy()

2404 array([0.916 , 0.714], dtype=float32)

2405

2406 Args:

2407 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.

2408 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

2409 from_logits: Whether `y_pred` is expected to be a logits tensor. By

2410 default, we assume that `y_pred` encodes a probability distribution.

2411 label_smoothing: Float in [0, 1]. If > `0` then smooth the labels by

2412 squeezing them towards 0.5 That is, using `1. - 0.5 * label_smoothing`

2413 for the target class and `0.5 * label_smoothing` for the non-target

2414 class.

2415 axis: The axis along which the mean is computed. Defaults to -1.

2416

2417 Returns:

2418 Binary crossentropy loss value. shape = `[batch_size, d0, .. dN-1]`.

2419 """

2420 y_pred = tf.convert_to_tensor(y_pred)

2421 y_true = tf.cast(y_true, y_pred.dtype)

2422 label_smoothing = tf.convert_to_tensor(label_smoothing, dtype=y_pred.dtype)

2423

2424 def _smooth_labels():

2425 return y_true * (1.0 - label_smoothing) + 0.5 * label_smoothing

2426

2427 y_true = tf.__internal__.smart_cond.smart_cond(

2428 label_smoothing, _smooth_labels, lambda: y_true

2429 )

2430

2431 return backend.mean(

2432 backend.binary_crossentropy(y_true, y_pred, from_logits=from_logits),

2433 axis=axis,

2434 )

2435

2436

2437@dispatch.dispatch_for_types(binary_crossentropy, tf.RaggedTensor)

2438def _ragged_tensor_binary_crossentropy(

2439 y_true, y_pred, from_logits=False, label_smoothing=0.0, axis=-1

2440):

2441 """Implements support for handling RaggedTensors.

2442

2443 Args:

2444 y_true: Tensor of one-hot true targets.

2445 y_pred: Tensor of predicted targets.

2446 from_logits: Whether `y_pred` is expected to be a logits tensor. By

2447 default, we assume that `y_pred` encodes a probability distribution.

2448 label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For

2449 example, if `0.1`, use `0.1 / num_classes` for non-target labels

2450 and `0.9 + 0.1 / num_classes` for target labels.

2451 axis: Axis along which to compute crossentropy.

2452

2453 Returns:

2454 Binary crossentropy loss value.

2455

2456 Expected shape: (batch, sequence_len) with sequence_len being variable

2457 per batch.

2458 Return shape: (batch,); returns the per batch mean of the loss values.

2459

2460 When used by BinaryCrossentropy() with the default reduction

2461 (SUM_OVER_BATCH_SIZE), the reduction averages the per batch losses over

2462 the number of batches.

2463 """

2464 fn = functools.partial(

2465 binary_crossentropy,

2466 from_logits=from_logits,

2467 label_smoothing=label_smoothing,

2468 axis=axis,

2469 )

2470 return _ragged_tensor_apply_loss(fn, y_true, y_pred)

2471

2472

2473@keras_export(

2474 "keras.metrics.binary_focal_crossentropy",

2475 "keras.losses.binary_focal_crossentropy",

2476)

2477@tf.__internal__.dispatch.add_dispatch_support

2478def binary_focal_crossentropy(

2479 y_true,

2480 y_pred,

2481 apply_class_balancing=False,

2482 alpha=0.25,

2483 gamma=2.0,

2484 from_logits=False,

2485 label_smoothing=0.0,

2486 axis=-1,

2487):

2488 """Computes the binary focal crossentropy loss.

2489

2490 According to [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf), it

2491 helps to apply a focal factor to down-weight easy examples and focus more on

2492 hard examples. By default, the focal tensor is computed as follows:

2493

2494 `focal_factor = (1 - output)**gamma` for class 1

2495 `focal_factor = output**gamma` for class 0

2496 where `gamma` is a focusing parameter. When `gamma` = 0, there is no focal

2497 effect on the binary crossentropy loss.

2498

2499 If `apply_class_balancing == True`, this function also takes into account a

2500 weight balancing factor for the binary classes 0 and 1 as follows:

2501

2502 `weight = alpha` for class 1 (`target == 1`)

2503 `weight = 1 - alpha` for class 0

2504 where `alpha` is a float in the range of `[0, 1]`.

2505

2506 Standalone usage:

2507

2508 >>> y_true = [[0, 1], [0, 0]]

2509 >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]

2510 >>> loss = tf.keras.losses.binary_focal_crossentropy(y_true, y_pred,

2511 ... gamma=2)

2512 >>> assert loss.shape == (2,)

2513 >>> loss.numpy()

2514 array([0.330, 0.206], dtype=float32)

2515

2516 Args:

2517 y_true: Ground truth values, of shape `(batch_size, d0, .. dN)`.

2518 y_pred: The predicted values, of shape `(batch_size, d0, .. dN)`.

2519 apply_class_balancing: A bool, whether to apply weight balancing on the

2520 binary classes 0 and 1.

2521 alpha: A weight balancing factor for class 1, default is `0.25` as

2522 mentioned in the reference. The weight for class 0 is `1.0 - alpha`.

2523 gamma: A focusing parameter, default is `2.0` as mentioned in the

2524 reference.

2525 from_logits: Whether `y_pred` is expected to be a logits tensor. By

2526 default, we assume that `y_pred` encodes a probability distribution.

2527 label_smoothing: Float in `[0, 1]`. If higher than 0 then smooth the

2528 labels by squeezing them towards `0.5`, i.e., using `1. - 0.5 *

2529 label_smoothing` for the target class and `0.5 * label_smoothing` for

2530 the non-target class.

2531 axis: The axis along which the mean is computed. Defaults to `-1`.

2532

2533 Returns:

2534 Binary focal crossentropy loss value. shape = `[batch_size, d0, .. dN-1]`.

2535 """

2536 y_pred = tf.convert_to_tensor(y_pred)

2537 y_true = tf.cast(y_true, y_pred.dtype)

2538 label_smoothing = tf.convert_to_tensor(label_smoothing, dtype=y_pred.dtype)

2539

2540 def _smooth_labels():

2541 return y_true * (1.0 - label_smoothing) + 0.5 * label_smoothing

2542

2543 y_true = tf.__internal__.smart_cond.smart_cond(

2544 label_smoothing, _smooth_labels, lambda: y_true

2545 )

2546

2547 return backend.mean(

2548 backend.binary_focal_crossentropy(

2549 target=y_true,

2550 output=y_pred,

2551 apply_class_balancing=apply_class_balancing,

2552 alpha=alpha,

2553 gamma=gamma,

2554 from_logits=from_logits,

2555 ),

2556 axis=axis,

2557 )

2558

2559

2560@dispatch.dispatch_for_types(binary_focal_crossentropy, tf.RaggedTensor)

2561def _ragged_tensor_binary_focal_crossentropy(

2562 y_true,

2563 y_pred,

2564 apply_class_balancing=False,

2565 alpha=0.25,

2566 gamma=2.0,

2567 from_logits=False,

2568 label_smoothing=0.0,

2569 axis=-1,

2570):

2571 """Implements support for handling RaggedTensors.

2572

2573 Expected shape: `(batch, sequence_len)` with sequence_len being variable per

2574 batch.

2575 Return shape: `(batch,)`; returns the per batch mean of the loss values.

2576

2577 When used by BinaryFocalCrossentropy() with the default reduction

2578 (SUM_OVER_BATCH_SIZE), the reduction averages the per batch losses over

2579 the number of batches.

2580

2581 Args:

2582 y_true: Tensor of one-hot true targets.

2583 y_pred: Tensor of predicted targets.

2584 apply_class_balancing: A bool, whether to apply weight balancing on the

2585 binary classes 0 and 1.

2586 alpha: A weight balancing factor for class 1, default is `0.25` as

2587 mentioned in the reference [Lin et al., 2018](

2588 https://arxiv.org/pdf/1708.02002.pdf). The weight for class 0 is

2589 `1.0 - alpha`.

2590 gamma: A focusing parameter, default is `2.0` as mentioned in the

2591 reference.

2592 from_logits: Whether `y_pred` is expected to be a logits tensor. By

2593 default, we assume that `y_pred` encodes a probability distribution.

2594 label_smoothing: Float in `[0, 1]`. If > `0` then smooth the labels. For

2595 example, if `0.1`, use `0.1 / num_classes` for non-target labels

2596 and `0.9 + 0.1 / num_classes` for target labels.

2597 axis: Axis along which to compute crossentropy.

2598

2599 Returns:

2600 Binary focal crossentropy loss value.

2601 """

2602 fn = functools.partial(

2603 binary_focal_crossentropy,

2604 apply_class_balancing=apply_class_balancing,

2605 alpha=alpha,

2606 gamma=gamma,

2607 from_logits=from_logits,

2608 label_smoothing=label_smoothing,

2609 axis=axis,

2610 )

2611 return _ragged_tensor_apply_loss(fn, y_true, y_pred)

2612

2613

2614@keras_export(

2615 "keras.metrics.kl_divergence",

2616 "keras.metrics.kullback_leibler_divergence",

2617 "keras.metrics.kld",

2618 "keras.metrics.KLD",

2619 "keras.losses.kl_divergence",

2620 "keras.losses.kullback_leibler_divergence",

2621 "keras.losses.kld",

2622 "keras.losses.KLD",

2623)

2624@tf.__internal__.dispatch.add_dispatch_support

2625def kl_divergence(y_true, y_pred):

2626 """Computes Kullback-Leibler divergence loss between `y_true` & `y_pred`.

2627

2628 `loss = y_true * log(y_true / y_pred)`

2629

2630 See: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence

2631

2632 Standalone usage:

2633

2634 >>> y_true = np.random.randint(0, 2, size=(2, 3)).astype(np.float64)

2635 >>> y_pred = np.random.random(size=(2, 3))

2636 >>> loss = tf.keras.losses.kullback_leibler_divergence(y_true, y_pred)

2637 >>> assert loss.shape == (2,)

2638 >>> y_true = tf.keras.backend.clip(y_true, 1e-7, 1)

2639 >>> y_pred = tf.keras.backend.clip(y_pred, 1e-7, 1)

2640 >>> assert np.array_equal(

2641 ... loss.numpy(), np.sum(y_true * np.log(y_true / y_pred), axis=-1))

2642

2643 Args:

2644 y_true: Tensor of true targets.

2645 y_pred: Tensor of predicted targets.

2646

2647 Returns:

2648 A `Tensor` with loss.

2649

2650 Raises:

2651 TypeError: If `y_true` cannot be cast to the `y_pred.dtype`.

2652 """

2653 y_pred = tf.convert_to_tensor(y_pred)

2654 y_true = tf.cast(y_true, y_pred.dtype)

2655 y_true = backend.clip(y_true, backend.epsilon(), 1)

2656 y_pred = backend.clip(y_pred, backend.epsilon(), 1)

2657 return tf.reduce_sum(y_true * tf.math.log(y_true / y_pred), axis=-1)

2658

2659

2660@keras_export("keras.metrics.poisson", "keras.losses.poisson")

2661@tf.__internal__.dispatch.add_dispatch_support

2662def poisson(y_true, y_pred):

2663 """Computes the Poisson loss between y_true and y_pred.

2664

2665 The Poisson loss is the mean of the elements of the `Tensor`

2666 `y_pred - y_true * log(y_pred)`.

2667

2668 Standalone usage:

2669

2670 >>> y_true = np.random.randint(0, 2, size=(2, 3))

2671 >>> y_pred = np.random.random(size=(2, 3))

2672 >>> loss = tf.keras.losses.poisson(y_true, y_pred)

2673 >>> assert loss.shape == (2,)

2674 >>> y_pred = y_pred + 1e-7

2675 >>> assert np.allclose(

2676 ... loss.numpy(), np.mean(y_pred - y_true * np.log(y_pred), axis=-1),

2677 ... atol=1e-5)

2678

2679 Args:

2680 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.

2681 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

2682

2683 Returns:

2684 Poisson loss value. shape = `[batch_size, d0, .. dN-1]`.

2685

2686 Raises:

2687 InvalidArgumentError: If `y_true` and `y_pred` have incompatible shapes.

2688 """

2689 y_pred = tf.convert_to_tensor(y_pred)

2690 y_true = tf.cast(y_true, y_pred.dtype)

2691 return backend.mean(

2692 y_pred - y_true * tf.math.log(y_pred + backend.epsilon()), axis=-1

2693 )

2694

2695

2696@keras_export(

2697 "keras.losses.cosine_similarity",

2698 v1=[

2699 "keras.metrics.cosine_proximity",

2700 "keras.metrics.cosine",

2701 "keras.losses.cosine_proximity",

2702 "keras.losses.cosine",

2703 "keras.losses.cosine_similarity",

2704 ],

2705)

2706@tf.__internal__.dispatch.add_dispatch_support

2707def cosine_similarity(y_true, y_pred, axis=-1):

2708 """Computes the cosine similarity between labels and predictions.

2709

2710 Note that it is a number between -1 and 1. When it is a negative number

2711 between -1 and 0, 0 indicates orthogonality and values closer to -1

2712 indicate greater similarity. The values closer to 1 indicate greater

2713 dissimilarity. This makes it usable as a loss function in a setting

2714 where you try to maximize the proximity between predictions and

2715 targets. If either `y_true` or `y_pred` is a zero vector, cosine

2716 similarity will be 0 regardless of the proximity between predictions

2717 and targets.

2718

2719 `loss = -sum(l2_norm(y_true) * l2_norm(y_pred))`

2720

2721 Standalone usage:

2722

2723 >>> y_true = [[0., 1.], [1., 1.], [1., 1.]]

2724 >>> y_pred = [[1., 0.], [1., 1.], [-1., -1.]]

2725 >>> loss = tf.keras.losses.cosine_similarity(y_true, y_pred, axis=1)

2726 >>> loss.numpy()

2727 array([-0., -0.999, 0.999], dtype=float32)

2728

2729 Args:

2730 y_true: Tensor of true targets.

2731 y_pred: Tensor of predicted targets.

2732 axis: Axis along which to determine similarity.

2733

2734 Returns:

2735 Cosine similarity tensor.

2736 """

2737 y_true = tf.linalg.l2_normalize(y_true, axis=axis)

2738 y_pred = tf.linalg.l2_normalize(y_pred, axis=axis)

2739 return -tf.reduce_sum(y_true * y_pred, axis=axis)

2740

2741

2742@keras_export("keras.losses.CosineSimilarity")

2743class CosineSimilarity(LossFunctionWrapper):

2744 """Computes the cosine similarity between labels and predictions.

2745

2746 Note that it is a number between -1 and 1. When it is a negative number

2747 between -1 and 0, 0 indicates orthogonality and values closer to -1

2748 indicate greater similarity. The values closer to 1 indicate greater

2749 dissimilarity. This makes it usable as a loss function in a setting

2750 where you try to maximize the proximity between predictions and targets.

2751 If either `y_true` or `y_pred` is a zero vector, cosine similarity will be 0

2752 regardless of the proximity between predictions and targets.

2753

2754 `loss = -sum(l2_norm(y_true) * l2_norm(y_pred))`

2755

2756 Standalone usage:

2757

2758 >>> y_true = [[0., 1.], [1., 1.]]

2759 >>> y_pred = [[1., 0.], [1., 1.]]

2760 >>> # Using 'auto'/'sum_over_batch_size' reduction type.

2761 >>> cosine_loss = tf.keras.losses.CosineSimilarity(axis=1)

2762 >>> # l2_norm(y_true) = [[0., 1.], [1./1.414, 1./1.414]]

2763 >>> # l2_norm(y_pred) = [[1., 0.], [1./1.414, 1./1.414]]

2764 >>> # l2_norm(y_true) . l2_norm(y_pred) = [[0., 0.], [0.5, 0.5]]

2765 >>> # loss = mean(sum(l2_norm(y_true) . l2_norm(y_pred), axis=1))

2766 >>> # = -((0. + 0.) + (0.5 + 0.5)) / 2

2767 >>> cosine_loss(y_true, y_pred).numpy()

2768 -0.5

2769

2770 >>> # Calling with 'sample_weight'.

2771 >>> cosine_loss(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()

2772 -0.0999

2773

2774 >>> # Using 'sum' reduction type.

2775 >>> cosine_loss = tf.keras.losses.CosineSimilarity(axis=1,

2776 ... reduction=tf.keras.losses.Reduction.SUM)

2777 >>> cosine_loss(y_true, y_pred).numpy()

2778 -0.999

2779

2780 >>> # Using 'none' reduction type.

2781 >>> cosine_loss = tf.keras.losses.CosineSimilarity(axis=1,

2782 ... reduction=tf.keras.losses.Reduction.NONE)

2783 >>> cosine_loss(y_true, y_pred).numpy()

2784 array([-0., -0.999], dtype=float32)

2785

2786 Usage with the `compile()` API:

2787

2788 ```python

2789 model.compile(optimizer='sgd',

2790 loss=tf.keras.losses.CosineSimilarity(axis=1))

2791 ```

2792

2793 Args:

2794 axis: The axis along which the cosine similarity is computed

2795 (the features axis). Defaults to -1.

2796 reduction: Type of `tf.keras.losses.Reduction` to apply to loss.

2797 Default value is `AUTO`. `AUTO` indicates that the reduction option will

2798 be determined by the usage context. For almost all cases this defaults

2799 to `SUM_OVER_BATCH_SIZE`. When used under a

2800 `tf.distribute.Strategy`, except via `Model.compile()` and

2801 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE`

2802 will raise an error. Please see this custom training [tutorial](

2803 https://www.tensorflow.org/tutorials/distribute/custom_training)

2804 for more details.

2805 name: Optional name for the instance.

2806 """

2807

2808 def __init__(

2809 self,

2810 axis=-1,

2811 reduction=losses_utils.ReductionV2.AUTO,

2812 name="cosine_similarity",

2813 ):

2814 super().__init__(

2815 cosine_similarity, reduction=reduction, name=name, axis=axis

2816 )

2817

2818

2819# Aliases.

2820

2821bce = BCE = binary_crossentropy

2822mse = MSE = mean_squared_error

2823mae = MAE = mean_absolute_error

2824mape = MAPE = mean_absolute_percentage_error

2825msle = MSLE = mean_squared_logarithmic_error

2826kld = KLD = kullback_leibler_divergence = kl_divergence

2827logcosh = log_cosh

2828huber_loss = huber

2829

2830

2831def is_categorical_crossentropy(loss):

2832 result = (

2833 isinstance(loss, CategoricalCrossentropy)

2834 or (

2835 isinstance(loss, LossFunctionWrapper)

2836 and loss.fn == categorical_crossentropy

2837 )

2838 or (

2839 hasattr(loss, "__name__")

2840 and loss.__name__ == "categorical_crossentropy"

2841 )

2842 or (loss == "categorical_crossentropy")

2843 )

2844 return result

2845

2846

2847@keras_export("keras.losses.serialize")

2848def serialize(loss, use_legacy_format=False):

2849 """Serializes loss function or `Loss` instance.

2850

2851 Args:

2852 loss: A Keras `Loss` instance or a loss function.

2853

2854 Returns:

2855 Loss configuration dictionary.

2856 """

2857 if use_legacy_format:

2858 return legacy_serialization.serialize_keras_object(loss)

2859 return serialize_keras_object(loss)

2860

2861

2862@keras_export("keras.losses.deserialize")

2863def deserialize(name, custom_objects=None, use_legacy_format=False):

2864 """Deserializes a serialized loss class/function instance.

2865

2866 Args:

2867 name: Loss configuration.

2868 custom_objects: Optional dictionary mapping names (strings) to custom

2869 objects (classes and functions) to be considered during

2870 deserialization.

2871

2872 Returns:

2873 A Keras `Loss` instance or a loss function.

2874 """

2875 if use_legacy_format:

2876 return legacy_serialization.deserialize_keras_object(

2877 name,

2878 module_objects=globals(),

2879 custom_objects=custom_objects,

2880 printable_module_name="loss function",

2881 )

2882 return deserialize_keras_object(

2883 name,

2884 module_objects=globals(),

2885 custom_objects=custom_objects,

2886 printable_module_name="loss function",

2887 )

2888

2889

2890@keras_export("keras.losses.get")

2891def get(identifier):

2892 """Retrieves a Keras loss as a `function`/`Loss` class instance.

2893

2894 The `identifier` may be the string name of a loss function or `Loss` class.

2895

2896 >>> loss = tf.keras.losses.get("categorical_crossentropy")

2897 >>> type(loss)

2898 <class 'function'>

2899 >>> loss = tf.keras.losses.get("CategoricalCrossentropy")

2900 >>> type(loss)

2901 <class '...keras.losses.CategoricalCrossentropy'>

2902

2903 You can also specify `config` of the loss to this function by passing dict

2904 containing `class_name` and `config` as an identifier. Also note that the

2905 `class_name` must map to a `Loss` class

2906

2907 >>> identifier = {"class_name": "CategoricalCrossentropy",

2908 ... "config": {"from_logits": True}}

2909 >>> loss = tf.keras.losses.get(identifier)

2910 >>> type(loss)

2911 <class '...keras.losses.CategoricalCrossentropy'>

2912

2913 Args:

2914 identifier: A loss identifier. One of None or string name of a loss

2915 function/class or loss configuration dictionary or a loss function or a

2916 loss class instance.

2917

2918 Returns:

2919 A Keras loss as a `function`/ `Loss` class instance.

2920

2921 Raises:

2922 ValueError: If `identifier` cannot be interpreted.

2923 """

2924 if identifier is None:

2925 return None

2926 if isinstance(identifier, str):

2927 identifier = str(identifier)

2928 use_legacy_format = "module" not in identifier

2929 return deserialize(identifier, use_legacy_format=use_legacy_format)

2930 if isinstance(identifier, dict):

2931 return deserialize(identifier)

2932 if callable(identifier):

2933 return identifier

2934 raise ValueError(

2935 f"Could not interpret loss function identifier: {identifier}"

2936 )

2937

2938

2939LABEL_DTYPES_FOR_LOSSES = {

2940 tf.compat.v1.losses.sparse_softmax_cross_entropy: "int32",

2941 sparse_categorical_crossentropy: "int32",

2942}

2943