Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/keras/src/losses.py: 41%

425 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-03 07:57 +0000

1# Copyright 2015 The TensorFlow Authors. All Rights Reserved. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14# ============================================================================== 

15 

16"""Built-in loss functions.""" 

17 

18 

19import abc 

20import functools 

21import warnings 

22 

23import tensorflow.compat.v2 as tf 

24 

25from keras.src import backend 

26from keras.src.saving import saving_lib 

27from keras.src.saving.legacy import serialization as legacy_serialization 

28from keras.src.saving.serialization_lib import deserialize_keras_object 

29from keras.src.saving.serialization_lib import serialize_keras_object 

30from keras.src.utils import losses_utils 

31from keras.src.utils import tf_utils 

32 

33# isort: off 

34from tensorflow.python.ops.ragged import ragged_map_ops 

35from tensorflow.python.ops.ragged import ragged_util 

36from tensorflow.python.util import dispatch 

37from tensorflow.python.util.tf_export import keras_export 

38from tensorflow.tools.docs import doc_controls 

39 

40 

41@keras_export("keras.losses.Loss") 

42class Loss: 

43 """Loss base class. 

44 

45 To be implemented by subclasses: 

46 * `call()`: Contains the logic for loss calculation using `y_true`, 

47 `y_pred`. 

48 

49 Example subclass implementation: 

50 

51 ```python 

52 class MeanSquaredError(Loss): 

53 

54 def call(self, y_true, y_pred): 

55 return tf.reduce_mean(tf.math.square(y_pred - y_true), axis=-1) 

56 ``` 

57 

58 When using a Loss under a `tf.distribute.Strategy`, except passing it 

59 to `Model.compile()` for use by `Model.fit()`, please use reduction 

60 types 'SUM' or 'NONE', and reduce losses explicitly. Using 'AUTO' or 

61 'SUM_OVER_BATCH_SIZE' will raise an error when calling the Loss object 

62 from a custom training loop or from user-defined code in `Layer.call()`. 

63 Please see this custom training 

64 [tutorial](https://www.tensorflow.org/tutorials/distribute/custom_training) 

65 for more details on this. 

66 """ 

67 

68 def __init__(self, reduction=losses_utils.ReductionV2.AUTO, name=None): 

69 """Initializes `Loss` class. 

70 

71 Args: 

72 reduction: Type of `tf.keras.losses.Reduction` to apply to 

73 loss. Default value is `AUTO`. `AUTO` indicates that the reduction 

74 option will be determined by the usage context. For almost all cases 

75 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a 

76 `tf.distribute.Strategy`, except via `Model.compile()` and 

77 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` 

78 will raise an error. Please see this custom training [tutorial]( 

79 https://www.tensorflow.org/tutorials/distribute/custom_training) 

80 for more details. 

81 name: Optional name for the instance. 

82 """ 

83 losses_utils.ReductionV2.validate(reduction) 

84 self.reduction = reduction 

85 self.name = name 

86 # SUM_OVER_BATCH is only allowed in losses managed by `fit` or 

87 # CannedEstimators. 

88 self._allow_sum_over_batch_size = False 

89 self._set_name_scope() 

90 

91 def _set_name_scope(self): 

92 """Creates a valid `name_scope` name.""" 

93 if self.name is None: 

94 self._name_scope = self.__class__.__name__.strip("_") 

95 elif self.name == "<lambda>": 

96 self._name_scope = "lambda" 

97 else: 

98 # E.g. '_my_loss' => 'my_loss' 

99 self._name_scope = self.name.strip("_") 

100 

101 def __call__(self, y_true, y_pred, sample_weight=None): 

102 """Invokes the `Loss` instance. 

103 

104 Args: 

105 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`, except 

106 sparse loss functions such as sparse categorical crossentropy where 

107 shape = `[batch_size, d0, .. dN-1]` 

108 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]` 

109 sample_weight: Optional `sample_weight` acts as a coefficient for the 

110 loss. If a scalar is provided, then the loss is simply scaled by the 

111 given value. If `sample_weight` is a tensor of size `[batch_size]`, 

112 then the total loss for each sample of the batch is rescaled by the 

113 corresponding element in the `sample_weight` vector. If the shape of 

114 `sample_weight` is `[batch_size, d0, .. dN-1]` (or can be 

115 broadcasted to this shape), then each loss element of `y_pred` is 

116 scaled by the corresponding value of `sample_weight`. (Note 

117 on`dN-1`: all loss functions reduce by 1 dimension, usually 

118 axis=-1.) 

119 

120 Returns: 

121 Weighted loss float `Tensor`. If `reduction` is `NONE`, this has 

122 shape `[batch_size, d0, .. dN-1]`; otherwise, it is scalar. (Note 

123 `dN-1` because all loss functions reduce by 1 dimension, usually 

124 axis=-1.) 

125 

126 Raises: 

127 ValueError: If the shape of `sample_weight` is invalid. 

128 """ 

129 # If we are wrapping a lambda function strip '<>' from the name as it is 

130 # not accepted in scope name. 

131 graph_ctx = tf_utils.graph_context_for_symbolic_tensors( 

132 y_true, y_pred, sample_weight 

133 ) 

134 with backend.name_scope(self._name_scope), graph_ctx: 

135 if tf.executing_eagerly(): 

136 call_fn = self.call 

137 else: 

138 call_fn = tf.__internal__.autograph.tf_convert( 

139 self.call, tf.__internal__.autograph.control_status_ctx() 

140 ) 

141 

142 losses = call_fn(y_true, y_pred) 

143 

144 in_mask = losses_utils.get_mask(y_pred) 

145 out_mask = losses_utils.get_mask(losses) 

146 

147 if in_mask is not None and out_mask is not None: 

148 mask = in_mask & out_mask 

149 elif in_mask is not None: 

150 mask = in_mask 

151 elif out_mask is not None: 

152 mask = out_mask 

153 else: 

154 mask = None 

155 

156 reduction = self._get_reduction() 

157 sample_weight = losses_utils.apply_valid_mask( 

158 losses, sample_weight, mask, reduction 

159 ) 

160 return losses_utils.compute_weighted_loss( 

161 losses, sample_weight, reduction=reduction 

162 ) 

163 

164 @classmethod 

165 def from_config(cls, config): 

166 """Instantiates a `Loss` from its config (output of `get_config()`). 

167 

168 Args: 

169 config: Output of `get_config()`. 

170 

171 Returns: 

172 A `Loss` instance. 

173 """ 

174 return cls(**config) 

175 

176 def get_config(self): 

177 """Returns the config dictionary for a `Loss` instance.""" 

178 return {"reduction": self.reduction, "name": self.name} 

179 

180 @abc.abstractmethod 

181 @doc_controls.for_subclass_implementers 

182 def call(self, y_true, y_pred): 

183 """Invokes the `Loss` instance. 

184 

185 Args: 

186 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`, except 

187 sparse loss functions such as sparse categorical crossentropy where 

188 shape = `[batch_size, d0, .. dN-1]` 

189 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]` 

190 

191 Returns: 

192 Loss values with the shape `[batch_size, d0, .. dN-1]`. 

193 """ 

194 raise NotImplementedError("Must be implemented in subclasses.") 

195 

196 def _get_reduction(self): 

197 """Handles `AUTO` reduction cases and returns the reduction value.""" 

198 if ( 

199 not self._allow_sum_over_batch_size 

200 and tf.distribute.has_strategy() 

201 and ( 

202 self.reduction == losses_utils.ReductionV2.AUTO 

203 or self.reduction 

204 == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE 

205 ) 

206 ): 

207 raise ValueError( 

208 "Please use `tf.keras.losses.Reduction.SUM` or " 

209 "`tf.keras.losses.Reduction.NONE` for loss reduction when " 

210 "losses are used with `tf.distribute.Strategy`, " 

211 "except for specifying losses in `Model.compile()` " 

212 "for use by the built-in training looop `Model.fit()`.\n" 

213 "Please see https://www.tensorflow.org/tutorials" 

214 "/distribute/custom_training for more details." 

215 ) 

216 

217 if self.reduction == losses_utils.ReductionV2.AUTO: 

218 return losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE 

219 return self.reduction 

220 

221 

222@keras_export("keras.__internal__.losses.LossFunctionWrapper", v1=[]) 

223class LossFunctionWrapper(Loss): 

224 """Wraps a loss function in the `Loss` class.""" 

225 

226 def __init__( 

227 self, fn, reduction=losses_utils.ReductionV2.AUTO, name=None, **kwargs 

228 ): 

229 """Initializes `LossFunctionWrapper` class. 

230 

231 Args: 

232 fn: The loss function to wrap, with signature `fn(y_true, y_pred, 

233 **kwargs)`. 

234 reduction: Type of `tf.keras.losses.Reduction` to apply to 

235 loss. Default value is `AUTO`. `AUTO` indicates that the reduction 

236 option will be determined by the usage context. For almost all cases 

237 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a 

238 `tf.distribute.Strategy`, except via `Model.compile()` and 

239 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` 

240 will raise an error. Please see this custom training [tutorial]( 

241 https://www.tensorflow.org/tutorials/distribute/custom_training) 

242 for more details. 

243 name: Optional name for the instance. 

244 **kwargs: The keyword arguments that are passed on to `fn`. 

245 """ 

246 super().__init__(reduction=reduction, name=name) 

247 self.fn = fn 

248 self._fn_kwargs = kwargs 

249 

250 def call(self, y_true, y_pred): 

251 """Invokes the `LossFunctionWrapper` instance. 

252 

253 Args: 

254 y_true: Ground truth values. 

255 y_pred: The predicted values. 

256 

257 Returns: 

258 Loss values per sample. 

259 """ 

260 if tf.is_tensor(y_pred) and tf.is_tensor(y_true): 

261 y_pred, y_true = losses_utils.squeeze_or_expand_dimensions( 

262 y_pred, y_true 

263 ) 

264 

265 ag_fn = tf.__internal__.autograph.tf_convert( 

266 self.fn, tf.__internal__.autograph.control_status_ctx() 

267 ) 

268 return ag_fn(y_true, y_pred, **self._fn_kwargs) 

269 

270 def get_config(self): 

271 config = {} 

272 for k, v in self._fn_kwargs.items(): 

273 config[k] = ( 

274 backend.eval(v) if tf_utils.is_tensor_or_variable(v) else v 

275 ) 

276 

277 if saving_lib.saving_v3_enabled(): 

278 from keras.src.utils import get_registered_name 

279 

280 config["fn"] = get_registered_name(self.fn) 

281 

282 base_config = super().get_config() 

283 return dict(list(base_config.items()) + list(config.items())) 

284 

285 @classmethod 

286 def from_config(cls, config): 

287 """Instantiates a `Loss` from its config (output of `get_config()`). 

288 

289 Args: 

290 config: Output of `get_config()`. 

291 

292 Returns: 

293 A `keras.losses.Loss` instance. 

294 """ 

295 if saving_lib.saving_v3_enabled(): 

296 fn_name = config.pop("fn", None) 

297 if fn_name and cls is LossFunctionWrapper: 

298 config["fn"] = get(fn_name) 

299 return cls(**config) 

300 

301 

302@keras_export("keras.losses.MeanSquaredError") 

303class MeanSquaredError(LossFunctionWrapper): 

304 """Computes the mean of squares of errors between labels and predictions. 

305 

306 `loss = mean(square(y_true - y_pred))` 

307 

308 Standalone usage: 

309 

310 >>> y_true = [[0., 1.], [0., 0.]] 

311 >>> y_pred = [[1., 1.], [1., 0.]] 

312 >>> # Using 'auto'/'sum_over_batch_size' reduction type. 

313 >>> mse = tf.keras.losses.MeanSquaredError() 

314 >>> mse(y_true, y_pred).numpy() 

315 0.5 

316 

317 >>> # Calling with 'sample_weight'. 

318 >>> mse(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy() 

319 0.25 

320 

321 >>> # Using 'sum' reduction type. 

322 >>> mse = tf.keras.losses.MeanSquaredError( 

323 ... reduction=tf.keras.losses.Reduction.SUM) 

324 >>> mse(y_true, y_pred).numpy() 

325 1.0 

326 

327 >>> # Using 'none' reduction type. 

328 >>> mse = tf.keras.losses.MeanSquaredError( 

329 ... reduction=tf.keras.losses.Reduction.NONE) 

330 >>> mse(y_true, y_pred).numpy() 

331 array([0.5, 0.5], dtype=float32) 

332 

333 Usage with the `compile()` API: 

334 

335 ```python 

336 model.compile(optimizer='sgd', loss=tf.keras.losses.MeanSquaredError()) 

337 ``` 

338 """ 

339 

340 def __init__( 

341 self, reduction=losses_utils.ReductionV2.AUTO, name="mean_squared_error" 

342 ): 

343 """Initializes `MeanSquaredError` instance. 

344 

345 Args: 

346 reduction: Type of `tf.keras.losses.Reduction` to apply to 

347 loss. Default value is `AUTO`. `AUTO` indicates that the reduction 

348 option will be determined by the usage context. For almost all cases 

349 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a 

350 `tf.distribute.Strategy`, except via `Model.compile()` and 

351 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` 

352 will raise an error. Please see this custom training [tutorial]( 

353 https://www.tensorflow.org/tutorials/distribute/custom_training) 

354 for more details. 

355 name: Optional name for the instance. Defaults to 

356 'mean_squared_error'. 

357 """ 

358 super().__init__(mean_squared_error, name=name, reduction=reduction) 

359 

360 

361@keras_export("keras.losses.MeanAbsoluteError") 

362class MeanAbsoluteError(LossFunctionWrapper): 

363 """Computes the mean of absolute difference between labels and predictions. 

364 

365 `loss = mean(abs(y_true - y_pred))` 

366 

367 Standalone usage: 

368 

369 >>> y_true = [[0., 1.], [0., 0.]] 

370 >>> y_pred = [[1., 1.], [1., 0.]] 

371 >>> # Using 'auto'/'sum_over_batch_size' reduction type. 

372 >>> mae = tf.keras.losses.MeanAbsoluteError() 

373 >>> mae(y_true, y_pred).numpy() 

374 0.5 

375 

376 >>> # Calling with 'sample_weight'. 

377 >>> mae(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy() 

378 0.25 

379 

380 >>> # Using 'sum' reduction type. 

381 >>> mae = tf.keras.losses.MeanAbsoluteError( 

382 ... reduction=tf.keras.losses.Reduction.SUM) 

383 >>> mae(y_true, y_pred).numpy() 

384 1.0 

385 

386 >>> # Using 'none' reduction type. 

387 >>> mae = tf.keras.losses.MeanAbsoluteError( 

388 ... reduction=tf.keras.losses.Reduction.NONE) 

389 >>> mae(y_true, y_pred).numpy() 

390 array([0.5, 0.5], dtype=float32) 

391 

392 Usage with the `compile()` API: 

393 

394 ```python 

395 model.compile(optimizer='sgd', loss=tf.keras.losses.MeanAbsoluteError()) 

396 ``` 

397 """ 

398 

399 def __init__( 

400 self, 

401 reduction=losses_utils.ReductionV2.AUTO, 

402 name="mean_absolute_error", 

403 ): 

404 """Initializes `MeanAbsoluteError` instance. 

405 

406 Args: 

407 reduction: Type of `tf.keras.losses.Reduction` to apply to 

408 loss. Default value is `AUTO`. `AUTO` indicates that the reduction 

409 option will be determined by the usage context. For almost all cases 

410 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a 

411 `tf.distribute.Strategy`, except via `Model.compile()` and 

412 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` 

413 will raise an error. Please see this custom training [tutorial]( 

414 https://www.tensorflow.org/tutorials/distribute/custom_training) 

415 for more details. 

416 name: Optional name for the instance. Defaults to 

417 'mean_absolute_error'. 

418 """ 

419 super().__init__(mean_absolute_error, name=name, reduction=reduction) 

420 

421 

422@keras_export("keras.losses.MeanAbsolutePercentageError") 

423class MeanAbsolutePercentageError(LossFunctionWrapper): 

424 """Computes the mean absolute percentage error between `y_true` & `y_pred`. 

425 

426 Formula: 

427 

428 `loss = 100 * abs((y_true - y_pred) / y_true)` 

429 

430 Note that to avoid dividing by zero, a small epsilon value 

431 is added to the denominator. 

432 

433 Standalone usage: 

434 

435 >>> y_true = [[2., 1.], [2., 3.]] 

436 >>> y_pred = [[1., 1.], [1., 0.]] 

437 >>> # Using 'auto'/'sum_over_batch_size' reduction type. 

438 >>> mape = tf.keras.losses.MeanAbsolutePercentageError() 

439 >>> mape(y_true, y_pred).numpy() 

440 50. 

441 

442 >>> # Calling with 'sample_weight'. 

443 >>> mape(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy() 

444 20. 

445 

446 >>> # Using 'sum' reduction type. 

447 >>> mape = tf.keras.losses.MeanAbsolutePercentageError( 

448 ... reduction=tf.keras.losses.Reduction.SUM) 

449 >>> mape(y_true, y_pred).numpy() 

450 100. 

451 

452 >>> # Using 'none' reduction type. 

453 >>> mape = tf.keras.losses.MeanAbsolutePercentageError( 

454 ... reduction=tf.keras.losses.Reduction.NONE) 

455 >>> mape(y_true, y_pred).numpy() 

456 array([25., 75.], dtype=float32) 

457 

458 Usage with the `compile()` API: 

459 

460 ```python 

461 model.compile(optimizer='sgd', 

462 loss=tf.keras.losses.MeanAbsolutePercentageError()) 

463 ``` 

464 """ 

465 

466 def __init__( 

467 self, 

468 reduction=losses_utils.ReductionV2.AUTO, 

469 name="mean_absolute_percentage_error", 

470 ): 

471 """Initializes `MeanAbsolutePercentageError` instance. 

472 

473 Args: 

474 reduction: Type of `tf.keras.losses.Reduction` to apply to 

475 loss. Default value is `AUTO`. `AUTO` indicates that the reduction 

476 option will be determined by the usage context. For almost all cases 

477 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a 

478 `tf.distribute.Strategy`, except via `Model.compile()` and 

479 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` 

480 will raise an error. Please see this custom training [tutorial]( 

481 https://www.tensorflow.org/tutorials/distribute/custom_training) 

482 for more details. 

483 name: Optional name for the instance. Defaults to 

484 'mean_absolute_percentage_error'. 

485 """ 

486 super().__init__( 

487 mean_absolute_percentage_error, name=name, reduction=reduction 

488 ) 

489 

490 

491@keras_export("keras.losses.MeanSquaredLogarithmicError") 

492class MeanSquaredLogarithmicError(LossFunctionWrapper): 

493 """Computes the mean squared logarithmic error between `y_true` & `y_pred`. 

494 

495 `loss = square(log(y_true + 1.) - log(y_pred + 1.))` 

496 

497 Standalone usage: 

498 

499 >>> y_true = [[0., 1.], [0., 0.]] 

500 >>> y_pred = [[1., 1.], [1., 0.]] 

501 >>> # Using 'auto'/'sum_over_batch_size' reduction type. 

502 >>> msle = tf.keras.losses.MeanSquaredLogarithmicError() 

503 >>> msle(y_true, y_pred).numpy() 

504 0.240 

505 

506 >>> # Calling with 'sample_weight'. 

507 >>> msle(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy() 

508 0.120 

509 

510 >>> # Using 'sum' reduction type. 

511 >>> msle = tf.keras.losses.MeanSquaredLogarithmicError( 

512 ... reduction=tf.keras.losses.Reduction.SUM) 

513 >>> msle(y_true, y_pred).numpy() 

514 0.480 

515 

516 >>> # Using 'none' reduction type. 

517 >>> msle = tf.keras.losses.MeanSquaredLogarithmicError( 

518 ... reduction=tf.keras.losses.Reduction.NONE) 

519 >>> msle(y_true, y_pred).numpy() 

520 array([0.240, 0.240], dtype=float32) 

521 

522 Usage with the `compile()` API: 

523 

524 ```python 

525 model.compile(optimizer='sgd', 

526 loss=tf.keras.losses.MeanSquaredLogarithmicError()) 

527 ``` 

528 """ 

529 

530 def __init__( 

531 self, 

532 reduction=losses_utils.ReductionV2.AUTO, 

533 name="mean_squared_logarithmic_error", 

534 ): 

535 """Initializes `MeanSquaredLogarithmicError` instance. 

536 

537 Args: 

538 reduction: Type of `tf.keras.losses.Reduction` to apply to 

539 loss. Default value is `AUTO`. `AUTO` indicates that the reduction 

540 option will be determined by the usage context. For almost all cases 

541 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a 

542 `tf.distribute.Strategy`, except via `Model.compile()` and 

543 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` 

544 will raise an error. Please see this custom training [tutorial]( 

545 https://www.tensorflow.org/tutorials/distribute/custom_training) 

546 for more details. 

547 name: Optional name for the instance. Defaults to 

548 'mean_squared_logarithmic_error'. 

549 """ 

550 super().__init__( 

551 mean_squared_logarithmic_error, name=name, reduction=reduction 

552 ) 

553 

554 

555@keras_export("keras.losses.BinaryCrossentropy") 

556class BinaryCrossentropy(LossFunctionWrapper): 

557 """Computes the cross-entropy loss between true labels and predicted labels. 

558 

559 Use this cross-entropy loss for binary (0 or 1) classification applications. 

560 The loss function requires the following inputs: 

561 

562 - `y_true` (true label): This is either 0 or 1. 

563 - `y_pred` (predicted value): This is the model's prediction, i.e, a single 

564 floating-point value which either represents a 

565 [logit](https://en.wikipedia.org/wiki/Logit), (i.e, value in [-inf, inf] 

566 when `from_logits=True`) or a probability (i.e, value in [0., 1.] when 

567 `from_logits=False`). 

568 

569 **Recommended Usage:** (set `from_logits=True`) 

570 

571 With `tf.keras` API: 

572 

573 ```python 

574 model.compile( 

575 loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), 

576 .... 

577 ) 

578 ``` 

579 

580 As a standalone function: 

581 

582 >>> # Example 1: (batch_size = 1, number of samples = 4) 

583 >>> y_true = [0, 1, 0, 0] 

584 >>> y_pred = [-18.6, 0.51, 2.94, -12.8] 

585 >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True) 

586 >>> bce(y_true, y_pred).numpy() 

587 0.865 

588 

589 >>> # Example 2: (batch_size = 2, number of samples = 4) 

590 >>> y_true = [[0, 1], [0, 0]] 

591 >>> y_pred = [[-18.6, 0.51], [2.94, -12.8]] 

592 >>> # Using default 'auto'/'sum_over_batch_size' reduction type. 

593 >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True) 

594 >>> bce(y_true, y_pred).numpy() 

595 0.865 

596 >>> # Using 'sample_weight' attribute 

597 >>> bce(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy() 

598 0.243 

599 >>> # Using 'sum' reduction` type. 

600 >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True, 

601 ... reduction=tf.keras.losses.Reduction.SUM) 

602 >>> bce(y_true, y_pred).numpy() 

603 1.730 

604 >>> # Using 'none' reduction type. 

605 >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True, 

606 ... reduction=tf.keras.losses.Reduction.NONE) 

607 >>> bce(y_true, y_pred).numpy() 

608 array([0.235, 1.496], dtype=float32) 

609 

610 **Default Usage:** (set `from_logits=False`) 

611 

612 >>> # Make the following updates to the above "Recommended Usage" section 

613 >>> # 1. Set `from_logits=False` 

614 >>> tf.keras.losses.BinaryCrossentropy() # OR ...('from_logits=False') 

615 >>> # 2. Update `y_pred` to use probabilities instead of logits 

616 >>> y_pred = [0.6, 0.3, 0.2, 0.8] # OR [[0.6, 0.3], [0.2, 0.8]] 

617 """ 

618 

619 def __init__( 

620 self, 

621 from_logits=False, 

622 label_smoothing=0.0, 

623 axis=-1, 

624 reduction=losses_utils.ReductionV2.AUTO, 

625 name="binary_crossentropy", 

626 ): 

627 """Initializes `BinaryCrossentropy` instance. 

628 

629 Args: 

630 from_logits: Whether to interpret `y_pred` as a tensor of 

631 [logit](https://en.wikipedia.org/wiki/Logit) values. By default, we 

632 assume that `y_pred` contains probabilities (i.e., values in [0, 

633 1]). 

634 label_smoothing: Float in [0, 1]. When 0, no smoothing occurs. When > 

635 0, we compute the loss between the predicted labels and a smoothed 

636 version of the true labels, where the smoothing squeezes the labels 

637 towards 0.5. Larger values of `label_smoothing` correspond to 

638 heavier smoothing. 

639 axis: The axis along which to compute crossentropy (the features 

640 axis). Defaults to -1. 

641 reduction: Type of `tf.keras.losses.Reduction` to apply to 

642 loss. Default value is `AUTO`. `AUTO` indicates that the reduction 

643 option will be determined by the usage context. For almost all cases 

644 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a 

645 `tf.distribute.Strategy`, except via `Model.compile()` and 

646 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` 

647 will raise an error. Please see this custom training [tutorial]( 

648 https://www.tensorflow.org/tutorials/distribute/custom_training) 

649 for more details. 

650 name: Name for the op. Defaults to 'binary_crossentropy'. 

651 """ 

652 super().__init__( 

653 binary_crossentropy, 

654 name=name, 

655 reduction=reduction, 

656 from_logits=from_logits, 

657 label_smoothing=label_smoothing, 

658 axis=axis, 

659 ) 

660 self.from_logits = from_logits 

661 

662 

663@keras_export("keras.losses.BinaryFocalCrossentropy") 

664class BinaryFocalCrossentropy(LossFunctionWrapper): 

665 """Computes focal cross-entropy loss between true labels and predictions. 

666 

667 Binary cross-entropy loss is often used for binary (0 or 1) classification 

668 tasks. The loss function requires the following inputs: 

669 

670 - `y_true` (true label): This is either 0 or 1. 

671 - `y_pred` (predicted value): This is the model's prediction, i.e, a single 

672 floating-point value which either represents a 

673 [logit](https://en.wikipedia.org/wiki/Logit), (i.e, value in [-inf, inf] 

674 when `from_logits=True`) or a probability (i.e, value in `[0., 1.]` when 

675 `from_logits=False`). 

676 

677 According to [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf), it 

678 helps to apply a "focal factor" to down-weight easy examples and focus more 

679 on hard examples. By default, the focal tensor is computed as follows: 

680 

681 `focal_factor = (1 - output) ** gamma` for class 1 

682 `focal_factor = output ** gamma` for class 0 

683 where `gamma` is a focusing parameter. When `gamma=0`, this function is 

684 equivalent to the binary crossentropy loss. 

685 

686 With the `compile()` API: 

687 

688 ```python 

689 model.compile( 

690 loss=tf.keras.losses.BinaryFocalCrossentropy(gamma=2.0, from_logits=True), 

691 .... 

692 ) 

693 ``` 

694 

695 As a standalone function: 

696 

697 >>> # Example 1: (batch_size = 1, number of samples = 4) 

698 >>> y_true = [0, 1, 0, 0] 

699 >>> y_pred = [-18.6, 0.51, 2.94, -12.8] 

700 >>> loss = tf.keras.losses.BinaryFocalCrossentropy(gamma=2, 

701 ... from_logits=True) 

702 >>> loss(y_true, y_pred).numpy() 

703 0.691 

704 

705 >>> # Apply class weight 

706 >>> loss = tf.keras.losses.BinaryFocalCrossentropy( 

707 ... apply_class_balancing=True, gamma=2, from_logits=True) 

708 >>> loss(y_true, y_pred).numpy() 

709 0.51 

710 

711 >>> # Example 2: (batch_size = 2, number of samples = 4) 

712 >>> y_true = [[0, 1], [0, 0]] 

713 >>> y_pred = [[-18.6, 0.51], [2.94, -12.8]] 

714 >>> # Using default 'auto'/'sum_over_batch_size' reduction type. 

715 >>> loss = tf.keras.losses.BinaryFocalCrossentropy(gamma=3, 

716 ... from_logits=True) 

717 >>> loss(y_true, y_pred).numpy() 

718 0.647 

719 

720 >>> # Apply class weight 

721 >>> loss = tf.keras.losses.BinaryFocalCrossentropy( 

722 ... apply_class_balancing=True, gamma=3, from_logits=True) 

723 >>> loss(y_true, y_pred).numpy() 

724 0.482 

725 

726 >>> # Using 'sample_weight' attribute with focal effect 

727 >>> loss = tf.keras.losses.BinaryFocalCrossentropy(gamma=3, 

728 ... from_logits=True) 

729 >>> loss(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy() 

730 0.133 

731 

732 >>> # Apply class weight 

733 >>> loss = tf.keras.losses.BinaryFocalCrossentropy( 

734 ... apply_class_balancing=True, gamma=3, from_logits=True) 

735 >>> loss(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy() 

736 0.097 

737 

738 >>> # Using 'sum' reduction` type. 

739 >>> loss = tf.keras.losses.BinaryFocalCrossentropy(gamma=4, 

740 ... from_logits=True, 

741 ... reduction=tf.keras.losses.Reduction.SUM) 

742 >>> loss(y_true, y_pred).numpy() 

743 1.222 

744 

745 >>> # Apply class weight 

746 >>> loss = tf.keras.losses.BinaryFocalCrossentropy( 

747 ... apply_class_balancing=True, gamma=4, from_logits=True, 

748 ... reduction=tf.keras.losses.Reduction.SUM) 

749 >>> loss(y_true, y_pred).numpy() 

750 0.914 

751 

752 >>> # Using 'none' reduction type. 

753 >>> loss = tf.keras.losses.BinaryFocalCrossentropy( 

754 ... gamma=5, from_logits=True, 

755 ... reduction=tf.keras.losses.Reduction.NONE) 

756 >>> loss(y_true, y_pred).numpy() 

757 array([0.0017 1.1561], dtype=float32) 

758 

759 >>> # Apply class weight 

760 >>> loss = tf.keras.losses.BinaryFocalCrossentropy( 

761 ... apply_class_balancing=True, gamma=5, from_logits=True, 

762 ... reduction=tf.keras.losses.Reduction.NONE) 

763 >>> loss(y_true, y_pred).numpy() 

764 array([0.0004 0.8670], dtype=float32) 

765 

766 

767 Args: 

768 apply_class_balancing: A bool, whether to apply weight balancing on the 

769 binary classes 0 and 1. 

770 alpha: A weight balancing factor for class 1, default is `0.25` as 

771 mentioned in reference [Lin et al., 2018]( 

772 https://arxiv.org/pdf/1708.02002.pdf). The weight for class 0 is 

773 `1.0 - alpha`. 

774 gamma: A focusing parameter used to compute the focal factor, default is 

775 `2.0` as mentioned in the reference 

776 [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf). 

777 from_logits: Whether to interpret `y_pred` as a tensor of 

778 [logit](https://en.wikipedia.org/wiki/Logit) values. By default, we 

779 assume that `y_pred` are probabilities (i.e., values in `[0, 1]`). 

780 label_smoothing: Float in `[0, 1]`. When `0`, no smoothing occurs. When > 

781 `0`, we compute the loss between the predicted labels and a smoothed 

782 version of the true labels, where the smoothing squeezes the labels 

783 towards `0.5`. Larger values of `label_smoothing` correspond to heavier 

784 smoothing. 

785 axis: The axis along which to compute crossentropy (the features axis). 

786 Defaults to `-1`. 

787 reduction: Type of `tf.keras.losses.Reduction` to apply to 

788 loss. Default value is `AUTO`. `AUTO` indicates that the reduction 

789 option will be determined by the usage context. For almost all cases 

790 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a 

791 `tf.distribute.Strategy`, except via `Model.compile()` and 

792 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` 

793 will raise an error. Please see this custom training [tutorial]( 

794 https://www.tensorflow.org/tutorials/distribute/custom_training) 

795 for more details. 

796 name: Name for the op. Defaults to 'binary_focal_crossentropy'. 

797 """ 

798 

799 def __init__( 

800 self, 

801 apply_class_balancing=False, 

802 alpha=0.25, 

803 gamma=2.0, 

804 from_logits=False, 

805 label_smoothing=0.0, 

806 axis=-1, 

807 reduction=losses_utils.ReductionV2.AUTO, 

808 name="binary_focal_crossentropy", 

809 ): 

810 """Initializes `BinaryFocalCrossentropy` instance.""" 

811 super().__init__( 

812 binary_focal_crossentropy, 

813 apply_class_balancing=apply_class_balancing, 

814 alpha=alpha, 

815 gamma=gamma, 

816 name=name, 

817 reduction=reduction, 

818 from_logits=from_logits, 

819 label_smoothing=label_smoothing, 

820 axis=axis, 

821 ) 

822 self.from_logits = from_logits 

823 self.apply_class_balancing = apply_class_balancing 

824 self.alpha = alpha 

825 self.gamma = gamma 

826 

827 def get_config(self): 

828 config = { 

829 "apply_class_balancing": self.apply_class_balancing, 

830 "alpha": self.alpha, 

831 "gamma": self.gamma, 

832 } 

833 base_config = super().get_config() 

834 return dict(list(base_config.items()) + list(config.items())) 

835 

836 

837@keras_export("keras.losses.CategoricalCrossentropy") 

838class CategoricalCrossentropy(LossFunctionWrapper): 

839 """Computes the crossentropy loss between the labels and predictions. 

840 

841 Use this crossentropy loss function when there are two or more label 

842 classes. We expect labels to be provided in a `one_hot` representation. If 

843 you want to provide labels as integers, please use 

844 `SparseCategoricalCrossentropy` loss. There should be `# classes` floating 

845 point values per feature. 

846 

847 In the snippet below, there is `# classes` floating pointing values per 

848 example. The shape of both `y_pred` and `y_true` are 

849 `[batch_size, num_classes]`. 

850 

851 Standalone usage: 

852 

853 >>> y_true = [[0, 1, 0], [0, 0, 1]] 

854 >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]] 

855 >>> # Using 'auto'/'sum_over_batch_size' reduction type. 

856 >>> cce = tf.keras.losses.CategoricalCrossentropy() 

857 >>> cce(y_true, y_pred).numpy() 

858 1.177 

859 

860 >>> # Calling with 'sample_weight'. 

861 >>> cce(y_true, y_pred, sample_weight=tf.constant([0.3, 0.7])).numpy() 

862 0.814 

863 

864 >>> # Using 'sum' reduction type. 

865 >>> cce = tf.keras.losses.CategoricalCrossentropy( 

866 ... reduction=tf.keras.losses.Reduction.SUM) 

867 >>> cce(y_true, y_pred).numpy() 

868 2.354 

869 

870 >>> # Using 'none' reduction type. 

871 >>> cce = tf.keras.losses.CategoricalCrossentropy( 

872 ... reduction=tf.keras.losses.Reduction.NONE) 

873 >>> cce(y_true, y_pred).numpy() 

874 array([0.0513, 2.303], dtype=float32) 

875 

876 Usage with the `compile()` API: 

877 

878 ```python 

879 model.compile(optimizer='sgd', 

880 loss=tf.keras.losses.CategoricalCrossentropy()) 

881 ``` 

882 """ 

883 

884 def __init__( 

885 self, 

886 from_logits=False, 

887 label_smoothing=0.0, 

888 axis=-1, 

889 reduction=losses_utils.ReductionV2.AUTO, 

890 name="categorical_crossentropy", 

891 ): 

892 """Initializes `CategoricalCrossentropy` instance. 

893 

894 Args: 

895 from_logits: Whether `y_pred` is expected to be a logits tensor. By 

896 default, we assume that `y_pred` encodes a probability distribution. 

897 label_smoothing: Float in [0, 1]. When > 0, label values are smoothed, 

898 meaning the confidence on label values are relaxed. For example, if 

899 `0.1`, use `0.1 / num_classes` for non-target labels and 

900 `0.9 + 0.1 / num_classes` for target labels. 

901 axis: The axis along which to compute crossentropy (the features 

902 axis). Defaults to -1. 

903 reduction: Type of `tf.keras.losses.Reduction` to apply to 

904 loss. Default value is `AUTO`. `AUTO` indicates that the reduction 

905 option will be determined by the usage context. For almost all cases 

906 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a 

907 `tf.distribute.Strategy`, except via `Model.compile()` and 

908 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` 

909 will raise an error. Please see this custom training [tutorial]( 

910 https://www.tensorflow.org/tutorials/distribute/custom_training) 

911 for more details. 

912 name: Optional name for the instance. 

913 Defaults to 'categorical_crossentropy'. 

914 """ 

915 super().__init__( 

916 categorical_crossentropy, 

917 name=name, 

918 reduction=reduction, 

919 from_logits=from_logits, 

920 label_smoothing=label_smoothing, 

921 axis=axis, 

922 ) 

923 

924 

925@keras_export("keras.losses.CategoricalFocalCrossentropy") 

926class CategoricalFocalCrossentropy(LossFunctionWrapper): 

927 """Computes the alpha balanced focal crossentropy loss. 

928 

929 Use this crossentropy loss function when there are two or more label 

930 classes and if you want to handle class imbalance without using 

931 `class_weights`. We expect labels to be provided in a `one_hot` 

932 representation. 

933 

934 According to [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf), it 

935 helps to apply a focal factor to down-weight easy examples and focus more on 

936 hard examples. The general formula for the focal loss (FL) 

937 is as follows: 

938 

939 `FL(p_t) = (1 − p_t)^gamma * log(p_t)` 

940 

941 where `p_t` is defined as follows: 

942 `p_t = output if y_true == 1, else 1 - output` 

943 

944 `(1 − p_t)^gamma` is the `modulating_factor`, where `gamma` is a focusing 

945 parameter. When `gamma` = 0, there is no focal effect on the cross entropy. 

946 `gamma` reduces the importance given to simple examples in a smooth manner. 

947 

948 The authors use alpha-balanced variant of focal loss (FL) in the paper: 

949 `FL(p_t) = −alpha * (1 − p_t)^gamma * log(p_t)` 

950 

951 where `alpha` is the weight factor for the classes. If `alpha` = 1, the 

952 loss won't be able to handle class imbalance properly as all 

953 classes will have the same weight. This can be a constant or a list of 

954 constants. If alpha is a list, it must have the same length as the number 

955 of classes. 

956 

957 The formula above can be generalized to: 

958 `FL(p_t) = alpha * (1 − p_t)^gamma * CrossEntropy(y_true, y_pred)` 

959 

960 where minus comes from `CrossEntropy(y_true, y_pred)` (CE). 

961 

962 Extending this to multi-class case is straightforward: 

963 `FL(p_t) = alpha * (1 − p_t)^gamma * CategoricalCE(y_true, y_pred)` 

964 

965 In the snippet below, there is `# classes` floating pointing values per 

966 example. The shape of both `y_pred` and `y_true` are 

967 `[batch_size, num_classes]`. 

968 

969 Standalone usage: 

970 

971 >>> y_true = [[0., 1., 0.], [0., 0., 1.]] 

972 >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]] 

973 >>> # Using 'auto'/'sum_over_batch_size' reduction type. 

974 >>> cce = tf.keras.losses.CategoricalFocalCrossentropy() 

975 >>> cce(y_true, y_pred).numpy() 

976 0.23315276 

977 

978 >>> # Calling with 'sample_weight'. 

979 >>> cce(y_true, y_pred, sample_weight=tf.constant([0.3, 0.7])).numpy() 

980 0.1632 

981 

982 >>> # Using 'sum' reduction type. 

983 >>> cce = tf.keras.losses.CategoricalFocalCrossentropy( 

984 ... reduction=tf.keras.losses.Reduction.SUM) 

985 >>> cce(y_true, y_pred).numpy() 

986 0.46631 

987 

988 >>> # Using 'none' reduction type. 

989 >>> cce = tf.keras.losses.CategoricalFocalCrossentropy( 

990 ... reduction=tf.keras.losses.Reduction.NONE) 

991 >>> cce(y_true, y_pred).numpy() 

992 array([3.2058331e-05, 4.6627346e-01], dtype=float32) 

993 

994 Usage with the `compile()` API: 

995 ```python 

996 model.compile(optimizer='adam', 

997 loss=tf.keras.losses.CategoricalFocalCrossentropy()) 

998 ``` 

999 Args: 

1000 alpha: A weight balancing factor for all classes, default is `0.25` as 

1001 mentioned in the reference. It can be a list of floats or a scalar. 

1002 In the multi-class case, alpha may be set by inverse class 

1003 frequency by using `compute_class_weight` from `sklearn.utils`. 

1004 gamma: A focusing parameter, default is `2.0` as mentioned in the 

1005 reference. It helps to gradually reduce the importance given to 

1006 simple (easy) examples in a smooth manner. 

1007 from_logits: Whether `output` is expected to be a logits tensor. By 

1008 default, we consider that `output` encodes a probability 

1009 distribution. 

1010 label_smoothing: Float in [0, 1]. When > 0, label values are smoothed, 

1011 meaning the confidence on label values are relaxed. For example, if 

1012 `0.1`, use `0.1 / num_classes` for non-target labels and 

1013 `0.9 + 0.1 / num_classes` for target labels. 

1014 axis: The axis along which to compute crossentropy (the features 

1015 axis). Defaults to -1. 

1016 reduction: Type of `tf.keras.losses.Reduction` to apply to 

1017 loss. Default value is `AUTO`. `AUTO` indicates that the reduction 

1018 option will be determined by the usage context. For almost all cases 

1019 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a 

1020 `tf.distribute.Strategy`, except via `Model.compile()` and 

1021 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` 

1022 will raise an error. Please see this custom training [tutorial]( 

1023 https://www.tensorflow.org/tutorials/distribute/custom_training) 

1024 for more details. 

1025 name: Optional name for the instance. 

1026 Defaults to 'categorical_focal_crossentropy'. 

1027 """ 

1028 

1029 def __init__( 

1030 self, 

1031 alpha=0.25, 

1032 gamma=2.0, 

1033 from_logits=False, 

1034 label_smoothing=0.0, 

1035 axis=-1, 

1036 reduction=losses_utils.ReductionV2.AUTO, 

1037 name="categorical_focal_crossentropy", 

1038 ): 

1039 """Initializes `CategoricalFocalCrossentropy` instance.""" 

1040 super().__init__( 

1041 categorical_focal_crossentropy, 

1042 alpha=alpha, 

1043 gamma=gamma, 

1044 name=name, 

1045 reduction=reduction, 

1046 from_logits=from_logits, 

1047 label_smoothing=label_smoothing, 

1048 axis=axis, 

1049 ) 

1050 self.from_logits = from_logits 

1051 self.alpha = alpha 

1052 self.gamma = gamma 

1053 

1054 def get_config(self): 

1055 config = { 

1056 "alpha": self.alpha, 

1057 "gamma": self.gamma, 

1058 } 

1059 base_config = super().get_config() 

1060 return dict(list(base_config.items()) + list(config.items())) 

1061 

1062 

1063@keras_export("keras.losses.SparseCategoricalCrossentropy") 

1064class SparseCategoricalCrossentropy(LossFunctionWrapper): 

1065 """Computes the crossentropy loss between the labels and predictions. 

1066 

1067 Use this crossentropy loss function when there are two or more label 

1068 classes. We expect labels to be provided as integers. If you want to 

1069 provide labels using `one-hot` representation, please use 

1070 `CategoricalCrossentropy` loss. There should be `# classes` floating point 

1071 values per feature for `y_pred` and a single floating point value per 

1072 feature for `y_true`. 

1073 

1074 In the snippet below, there is a single floating point value per example for 

1075 `y_true` and `# classes` floating pointing values per example for `y_pred`. 

1076 The shape of `y_true` is `[batch_size]` and the shape of `y_pred` is 

1077 `[batch_size, num_classes]`. 

1078 

1079 Standalone usage: 

1080 

1081 >>> y_true = [1, 2] 

1082 >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]] 

1083 >>> # Using 'auto'/'sum_over_batch_size' reduction type. 

1084 >>> scce = tf.keras.losses.SparseCategoricalCrossentropy() 

1085 >>> scce(y_true, y_pred).numpy() 

1086 1.177 

1087 

1088 >>> # Calling with 'sample_weight'. 

1089 >>> scce(y_true, y_pred, sample_weight=tf.constant([0.3, 0.7])).numpy() 

1090 0.814 

1091 

1092 >>> # Using 'sum' reduction type. 

1093 >>> scce = tf.keras.losses.SparseCategoricalCrossentropy( 

1094 ... reduction=tf.keras.losses.Reduction.SUM) 

1095 >>> scce(y_true, y_pred).numpy() 

1096 2.354 

1097 

1098 >>> # Using 'none' reduction type. 

1099 >>> scce = tf.keras.losses.SparseCategoricalCrossentropy( 

1100 ... reduction=tf.keras.losses.Reduction.NONE) 

1101 >>> scce(y_true, y_pred).numpy() 

1102 array([0.0513, 2.303], dtype=float32) 

1103 

1104 Usage with the `compile()` API: 

1105 

1106 ```python 

1107 model.compile(optimizer='sgd', 

1108 loss=tf.keras.losses.SparseCategoricalCrossentropy()) 

1109 ``` 

1110 """ 

1111 

1112 def __init__( 

1113 self, 

1114 from_logits=False, 

1115 ignore_class=None, 

1116 reduction=losses_utils.ReductionV2.AUTO, 

1117 name="sparse_categorical_crossentropy", 

1118 ): 

1119 """Initializes `SparseCategoricalCrossentropy` instance. 

1120 

1121 Args: 

1122 from_logits: Whether `y_pred` is expected to be a logits tensor. By 

1123 default, we assume that `y_pred` encodes a probability distribution. 

1124 ignore_class: Optional integer. The ID of a class to be ignored during 

1125 loss computation. This is useful, for example, in segmentation 

1126 problems featuring a "void" class (commonly -1 or 255) in 

1127 segmentation maps. 

1128 By default (`ignore_class=None`), all classes are considered. 

1129 reduction: Type of `tf.keras.losses.Reduction` to apply to 

1130 loss. Default value is `AUTO`. `AUTO` indicates that the reduction 

1131 option will be determined by the usage context. For almost all cases 

1132 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a 

1133 `tf.distribute.Strategy`, except via `Model.compile()` and 

1134 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` 

1135 will raise an error. Please see this custom training [tutorial]( 

1136 https://www.tensorflow.org/tutorials/distribute/custom_training) 

1137 for more details. 

1138 name: Optional name for the instance. Defaults to 

1139 'sparse_categorical_crossentropy'. 

1140 """ 

1141 super().__init__( 

1142 sparse_categorical_crossentropy, 

1143 name=name, 

1144 reduction=reduction, 

1145 from_logits=from_logits, 

1146 ignore_class=ignore_class, 

1147 ) 

1148 

1149 

1150@keras_export("keras.losses.Hinge") 

1151class Hinge(LossFunctionWrapper): 

1152 """Computes the hinge loss between `y_true` & `y_pred`. 

1153 

1154 `loss = maximum(1 - y_true * y_pred, 0)` 

1155 

1156 `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are 

1157 provided we will convert them to -1 or 1. 

1158 

1159 Standalone usage: 

1160 

1161 >>> y_true = [[0., 1.], [0., 0.]] 

1162 >>> y_pred = [[0.6, 0.4], [0.4, 0.6]] 

1163 >>> # Using 'auto'/'sum_over_batch_size' reduction type. 

1164 >>> h = tf.keras.losses.Hinge() 

1165 >>> h(y_true, y_pred).numpy() 

1166 1.3 

1167 

1168 >>> # Calling with 'sample_weight'. 

1169 >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy() 

1170 0.55 

1171 

1172 >>> # Using 'sum' reduction type. 

1173 >>> h = tf.keras.losses.Hinge( 

1174 ... reduction=tf.keras.losses.Reduction.SUM) 

1175 >>> h(y_true, y_pred).numpy() 

1176 2.6 

1177 

1178 >>> # Using 'none' reduction type. 

1179 >>> h = tf.keras.losses.Hinge( 

1180 ... reduction=tf.keras.losses.Reduction.NONE) 

1181 >>> h(y_true, y_pred).numpy() 

1182 array([1.1, 1.5], dtype=float32) 

1183 

1184 Usage with the `compile()` API: 

1185 

1186 ```python 

1187 model.compile(optimizer='sgd', loss=tf.keras.losses.Hinge()) 

1188 ``` 

1189 """ 

1190 

1191 def __init__(self, reduction=losses_utils.ReductionV2.AUTO, name="hinge"): 

1192 """Initializes `Hinge` instance. 

1193 

1194 Args: 

1195 reduction: Type of `tf.keras.losses.Reduction` to apply to 

1196 loss. Default value is `AUTO`. `AUTO` indicates that the reduction 

1197 option will be determined by the usage context. For almost all cases 

1198 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a 

1199 `tf.distribute.Strategy`, except via `Model.compile()` and 

1200 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` 

1201 will raise an error. Please see this custom training [tutorial]( 

1202 https://www.tensorflow.org/tutorials/distribute/custom_training) 

1203 for more details. 

1204 name: Optional name for the instance. Defaults to 'hinge'. 

1205 """ 

1206 super().__init__(hinge, name=name, reduction=reduction) 

1207 

1208 

1209@keras_export("keras.losses.SquaredHinge") 

1210class SquaredHinge(LossFunctionWrapper): 

1211 """Computes the squared hinge loss between `y_true` & `y_pred`. 

1212 

1213 `loss = square(maximum(1 - y_true * y_pred, 0))` 

1214 

1215 `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are 

1216 provided we will convert them to -1 or 1. 

1217 

1218 Standalone usage: 

1219 

1220 >>> y_true = [[0., 1.], [0., 0.]] 

1221 >>> y_pred = [[0.6, 0.4], [0.4, 0.6]] 

1222 >>> # Using 'auto'/'sum_over_batch_size' reduction type. 

1223 >>> h = tf.keras.losses.SquaredHinge() 

1224 >>> h(y_true, y_pred).numpy() 

1225 1.86 

1226 

1227 >>> # Calling with 'sample_weight'. 

1228 >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy() 

1229 0.73 

1230 

1231 >>> # Using 'sum' reduction type. 

1232 >>> h = tf.keras.losses.SquaredHinge( 

1233 ... reduction=tf.keras.losses.Reduction.SUM) 

1234 >>> h(y_true, y_pred).numpy() 

1235 3.72 

1236 

1237 >>> # Using 'none' reduction type. 

1238 >>> h = tf.keras.losses.SquaredHinge( 

1239 ... reduction=tf.keras.losses.Reduction.NONE) 

1240 >>> h(y_true, y_pred).numpy() 

1241 array([1.46, 2.26], dtype=float32) 

1242 

1243 Usage with the `compile()` API: 

1244 

1245 ```python 

1246 model.compile(optimizer='sgd', loss=tf.keras.losses.SquaredHinge()) 

1247 ``` 

1248 """ 

1249 

1250 def __init__( 

1251 self, reduction=losses_utils.ReductionV2.AUTO, name="squared_hinge" 

1252 ): 

1253 """Initializes `SquaredHinge` instance. 

1254 

1255 Args: 

1256 reduction: Type of `tf.keras.losses.Reduction` to apply to 

1257 loss. Default value is `AUTO`. `AUTO` indicates that the reduction 

1258 option will be determined by the usage context. For almost all cases 

1259 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a 

1260 `tf.distribute.Strategy`, except via `Model.compile()` and 

1261 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` 

1262 will raise an error. Please see this custom training [tutorial]( 

1263 https://www.tensorflow.org/tutorials/distribute/custom_training) 

1264 for more details. 

1265 name: Optional name for the instance. Defaults to 'squared_hinge'. 

1266 """ 

1267 super().__init__(squared_hinge, name=name, reduction=reduction) 

1268 

1269 

1270@keras_export("keras.losses.CategoricalHinge") 

1271class CategoricalHinge(LossFunctionWrapper): 

1272 """Computes the categorical hinge loss between `y_true` & `y_pred`. 

1273 

1274 `loss = maximum(neg - pos + 1, 0)` 

1275 where `neg=maximum((1-y_true)*y_pred) and pos=sum(y_true*y_pred)` 

1276 

1277 Standalone usage: 

1278 

1279 >>> y_true = [[0, 1], [0, 0]] 

1280 >>> y_pred = [[0.6, 0.4], [0.4, 0.6]] 

1281 >>> # Using 'auto'/'sum_over_batch_size' reduction type. 

1282 >>> h = tf.keras.losses.CategoricalHinge() 

1283 >>> h(y_true, y_pred).numpy() 

1284 1.4 

1285 

1286 >>> # Calling with 'sample_weight'. 

1287 >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy() 

1288 0.6 

1289 

1290 >>> # Using 'sum' reduction type. 

1291 >>> h = tf.keras.losses.CategoricalHinge( 

1292 ... reduction=tf.keras.losses.Reduction.SUM) 

1293 >>> h(y_true, y_pred).numpy() 

1294 2.8 

1295 

1296 >>> # Using 'none' reduction type. 

1297 >>> h = tf.keras.losses.CategoricalHinge( 

1298 ... reduction=tf.keras.losses.Reduction.NONE) 

1299 >>> h(y_true, y_pred).numpy() 

1300 array([1.2, 1.6], dtype=float32) 

1301 

1302 Usage with the `compile()` API: 

1303 

1304 ```python 

1305 model.compile(optimizer='sgd', loss=tf.keras.losses.CategoricalHinge()) 

1306 ``` 

1307 """ 

1308 

1309 def __init__( 

1310 self, reduction=losses_utils.ReductionV2.AUTO, name="categorical_hinge" 

1311 ): 

1312 """Initializes `CategoricalHinge` instance. 

1313 

1314 Args: 

1315 reduction: Type of `tf.keras.losses.Reduction` to apply to 

1316 loss. Default value is `AUTO`. `AUTO` indicates that the reduction 

1317 option will be determined by the usage context. For almost all cases 

1318 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a 

1319 `tf.distribute.Strategy`, except via `Model.compile()` and 

1320 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` 

1321 will raise an error. Please see this custom training [tutorial]( 

1322 https://www.tensorflow.org/tutorials/distribute/custom_training) 

1323 for more details. 

1324 name: Optional name for the instance. Defaults to 'categorical_hinge'. 

1325 """ 

1326 super().__init__(categorical_hinge, name=name, reduction=reduction) 

1327 

1328 

1329@keras_export("keras.losses.Poisson") 

1330class Poisson(LossFunctionWrapper): 

1331 """Computes the Poisson loss between `y_true` & `y_pred`. 

1332 

1333 `loss = y_pred - y_true * log(y_pred)` 

1334 

1335 Standalone usage: 

1336 

1337 >>> y_true = [[0., 1.], [0., 0.]] 

1338 >>> y_pred = [[1., 1.], [0., 0.]] 

1339 >>> # Using 'auto'/'sum_over_batch_size' reduction type. 

1340 >>> p = tf.keras.losses.Poisson() 

1341 >>> p(y_true, y_pred).numpy() 

1342 0.5 

1343 

1344 >>> # Calling with 'sample_weight'. 

1345 >>> p(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy() 

1346 0.4 

1347 

1348 >>> # Using 'sum' reduction type. 

1349 >>> p = tf.keras.losses.Poisson( 

1350 ... reduction=tf.keras.losses.Reduction.SUM) 

1351 >>> p(y_true, y_pred).numpy() 

1352 0.999 

1353 

1354 >>> # Using 'none' reduction type. 

1355 >>> p = tf.keras.losses.Poisson( 

1356 ... reduction=tf.keras.losses.Reduction.NONE) 

1357 >>> p(y_true, y_pred).numpy() 

1358 array([0.999, 0.], dtype=float32) 

1359 

1360 Usage with the `compile()` API: 

1361 

1362 ```python 

1363 model.compile(optimizer='sgd', loss=tf.keras.losses.Poisson()) 

1364 ``` 

1365 """ 

1366 

1367 def __init__(self, reduction=losses_utils.ReductionV2.AUTO, name="poisson"): 

1368 """Initializes `Poisson` instance. 

1369 

1370 Args: 

1371 reduction: Type of `tf.keras.losses.Reduction` to apply to 

1372 loss. Default value is `AUTO`. `AUTO` indicates that the reduction 

1373 option will be determined by the usage context. For almost all cases 

1374 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a 

1375 `tf.distribute.Strategy`, except via `Model.compile()` and 

1376 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` 

1377 will raise an error. Please see this custom training [tutorial]( 

1378 https://www.tensorflow.org/tutorials/distribute/custom_training) 

1379 for more details. 

1380 name: Optional name for the instance. Defaults to 'poisson'. 

1381 """ 

1382 super().__init__(poisson, name=name, reduction=reduction) 

1383 

1384 

1385@keras_export("keras.losses.LogCosh") 

1386class LogCosh(LossFunctionWrapper): 

1387 """Computes the logarithm of the hyperbolic cosine of the prediction error. 

1388 

1389 `logcosh = log((exp(x) + exp(-x))/2)`, 

1390 where x is the error `y_pred - y_true`. 

1391 

1392 Standalone usage: 

1393 

1394 >>> y_true = [[0., 1.], [0., 0.]] 

1395 >>> y_pred = [[1., 1.], [0., 0.]] 

1396 >>> # Using 'auto'/'sum_over_batch_size' reduction type. 

1397 >>> l = tf.keras.losses.LogCosh() 

1398 >>> l(y_true, y_pred).numpy() 

1399 0.108 

1400 

1401 >>> # Calling with 'sample_weight'. 

1402 >>> l(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy() 

1403 0.087 

1404 

1405 >>> # Using 'sum' reduction type. 

1406 >>> l = tf.keras.losses.LogCosh( 

1407 ... reduction=tf.keras.losses.Reduction.SUM) 

1408 >>> l(y_true, y_pred).numpy() 

1409 0.217 

1410 

1411 >>> # Using 'none' reduction type. 

1412 >>> l = tf.keras.losses.LogCosh( 

1413 ... reduction=tf.keras.losses.Reduction.NONE) 

1414 >>> l(y_true, y_pred).numpy() 

1415 array([0.217, 0.], dtype=float32) 

1416 

1417 Usage with the `compile()` API: 

1418 

1419 ```python 

1420 model.compile(optimizer='sgd', loss=tf.keras.losses.LogCosh()) 

1421 ``` 

1422 """ 

1423 

1424 def __init__( 

1425 self, reduction=losses_utils.ReductionV2.AUTO, name="log_cosh" 

1426 ): 

1427 """Initializes `LogCosh` instance. 

1428 

1429 Args: 

1430 reduction: Type of `tf.keras.losses.Reduction` to apply to 

1431 loss. Default value is `AUTO`. `AUTO` indicates that the reduction 

1432 option will be determined by the usage context. For almost all cases 

1433 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a 

1434 `tf.distribute.Strategy`, except via `Model.compile()` and 

1435 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` 

1436 will raise an error. Please see this custom training [tutorial]( 

1437 https://www.tensorflow.org/tutorials/distribute/custom_training) 

1438 for more details. 

1439 name: Optional name for the instance. Defaults to 'log_cosh'. 

1440 """ 

1441 super().__init__(log_cosh, name=name, reduction=reduction) 

1442 

1443 

1444@keras_export("keras.losses.KLDivergence") 

1445class KLDivergence(LossFunctionWrapper): 

1446 """Computes Kullback-Leibler divergence loss between `y_true` & `y_pred`. 

1447 

1448 `loss = y_true * log(y_true / y_pred)` 

1449 

1450 See: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence 

1451 

1452 Standalone usage: 

1453 

1454 >>> y_true = [[0, 1], [0, 0]] 

1455 >>> y_pred = [[0.6, 0.4], [0.4, 0.6]] 

1456 >>> # Using 'auto'/'sum_over_batch_size' reduction type. 

1457 >>> kl = tf.keras.losses.KLDivergence() 

1458 >>> kl(y_true, y_pred).numpy() 

1459 0.458 

1460 

1461 >>> # Calling with 'sample_weight'. 

1462 >>> kl(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy() 

1463 0.366 

1464 

1465 >>> # Using 'sum' reduction type. 

1466 >>> kl = tf.keras.losses.KLDivergence( 

1467 ... reduction=tf.keras.losses.Reduction.SUM) 

1468 >>> kl(y_true, y_pred).numpy() 

1469 0.916 

1470 

1471 >>> # Using 'none' reduction type. 

1472 >>> kl = tf.keras.losses.KLDivergence( 

1473 ... reduction=tf.keras.losses.Reduction.NONE) 

1474 >>> kl(y_true, y_pred).numpy() 

1475 array([0.916, -3.08e-06], dtype=float32) 

1476 

1477 Usage with the `compile()` API: 

1478 

1479 ```python 

1480 model.compile(optimizer='sgd', loss=tf.keras.losses.KLDivergence()) 

1481 ``` 

1482 """ 

1483 

1484 def __init__( 

1485 self, reduction=losses_utils.ReductionV2.AUTO, name="kl_divergence" 

1486 ): 

1487 """Initializes `KLDivergence` instance. 

1488 

1489 Args: 

1490 reduction: Type of `tf.keras.losses.Reduction` to apply to 

1491 loss. Default value is `AUTO`. `AUTO` indicates that the reduction 

1492 option will be determined by the usage context. For almost all cases 

1493 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a 

1494 `tf.distribute.Strategy`, except via `Model.compile()` and 

1495 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` 

1496 will raise an error. Please see this custom training [tutorial]( 

1497 https://www.tensorflow.org/tutorials/distribute/custom_training) 

1498 for more details. 

1499 name: Optional name for the instance. Defaults to 'kl_divergence'. 

1500 """ 

1501 super().__init__(kl_divergence, name=name, reduction=reduction) 

1502 

1503 

1504@keras_export("keras.losses.Huber") 

1505class Huber(LossFunctionWrapper): 

1506 """Computes the Huber loss between `y_true` & `y_pred`. 

1507 

1508 For each value x in `error = y_true - y_pred`: 

1509 

1510 ``` 

1511 loss = 0.5 * x^2 if |x| <= d 

1512 loss = 0.5 * d^2 + d * (|x| - d) if |x| > d 

1513 ``` 

1514 where d is `delta`. See: https://en.wikipedia.org/wiki/Huber_loss 

1515 

1516 Standalone usage: 

1517 

1518 >>> y_true = [[0, 1], [0, 0]] 

1519 >>> y_pred = [[0.6, 0.4], [0.4, 0.6]] 

1520 >>> # Using 'auto'/'sum_over_batch_size' reduction type. 

1521 >>> h = tf.keras.losses.Huber() 

1522 >>> h(y_true, y_pred).numpy() 

1523 0.155 

1524 

1525 >>> # Calling with 'sample_weight'. 

1526 >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy() 

1527 0.09 

1528 

1529 >>> # Using 'sum' reduction type. 

1530 >>> h = tf.keras.losses.Huber( 

1531 ... reduction=tf.keras.losses.Reduction.SUM) 

1532 >>> h(y_true, y_pred).numpy() 

1533 0.31 

1534 

1535 >>> # Using 'none' reduction type. 

1536 >>> h = tf.keras.losses.Huber( 

1537 ... reduction=tf.keras.losses.Reduction.NONE) 

1538 >>> h(y_true, y_pred).numpy() 

1539 array([0.18, 0.13], dtype=float32) 

1540 

1541 Usage with the `compile()` API: 

1542 

1543 ```python 

1544 model.compile(optimizer='sgd', loss=tf.keras.losses.Huber()) 

1545 ``` 

1546 """ 

1547 

1548 def __init__( 

1549 self, 

1550 delta=1.0, 

1551 reduction=losses_utils.ReductionV2.AUTO, 

1552 name="huber_loss", 

1553 ): 

1554 """Initializes `Huber` instance. 

1555 

1556 Args: 

1557 delta: A float, the point where the Huber loss function changes from a 

1558 quadratic to linear. 

1559 reduction: Type of `tf.keras.losses.Reduction` to apply to 

1560 loss. Default value is `AUTO`. `AUTO` indicates that the reduction 

1561 option will be determined by the usage context. For almost all cases 

1562 this defaults to `SUM_OVER_BATCH_SIZE`. When used under a 

1563 `tf.distribute.Strategy`, except via `Model.compile()` and 

1564 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` 

1565 will raise an error. Please see this custom training [tutorial]( 

1566 https://www.tensorflow.org/tutorials/distribute/custom_training) 

1567 for more details. 

1568 name: Optional name for the instance. Defaults to 'huber_loss'. 

1569 """ 

1570 super().__init__(huber, name=name, reduction=reduction, delta=delta) 

1571 

1572 

1573@keras_export( 

1574 "keras.metrics.mean_squared_error", 

1575 "keras.metrics.mse", 

1576 "keras.metrics.MSE", 

1577 "keras.losses.mean_squared_error", 

1578 "keras.losses.mse", 

1579 "keras.losses.MSE", 

1580) 

1581@tf.__internal__.dispatch.add_dispatch_support 

1582def mean_squared_error(y_true, y_pred): 

1583 """Computes the mean squared error between labels and predictions. 

1584 

1585 After computing the squared distance between the inputs, the mean value over 

1586 the last dimension is returned. 

1587 

1588 `loss = mean(square(y_true - y_pred), axis=-1)` 

1589 

1590 Standalone usage: 

1591 

1592 >>> y_true = np.random.randint(0, 2, size=(2, 3)) 

1593 >>> y_pred = np.random.random(size=(2, 3)) 

1594 >>> loss = tf.keras.losses.mean_squared_error(y_true, y_pred) 

1595 >>> assert loss.shape == (2,) 

1596 >>> assert np.array_equal( 

1597 ... loss.numpy(), np.mean(np.square(y_true - y_pred), axis=-1)) 

1598 

1599 Args: 

1600 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. 

1601 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. 

1602 

1603 Returns: 

1604 Mean squared error values. shape = `[batch_size, d0, .. dN-1]`. 

1605 """ 

1606 y_pred = tf.convert_to_tensor(y_pred) 

1607 y_true = tf.cast(y_true, y_pred.dtype) 

1608 return backend.mean(tf.math.squared_difference(y_pred, y_true), axis=-1) 

1609 

1610 

1611def _ragged_tensor_apply_loss(loss_fn, y_true, y_pred, y_pred_extra_dim=False): 

1612 """Apply a loss function on a per batch basis. 

1613 

1614 Args: 

1615 loss_fn: The loss function 

1616 y_true: truth values (RaggedTensor) 

1617 y_pred: predicted values (RaggedTensor) 

1618 y_pred_extra_dim: whether y_pred has an additional dimension compared to 

1619 y_true 

1620 

1621 Returns: 

1622 Loss-function result. A dense tensor if the output has a single dimension 

1623 (per-batch loss value); a ragged tensor otherwise. 

1624 """ 

1625 

1626 def rt_is_equiv_dense(rt): 

1627 """Returns true if this RaggedTensor has the same row_lengths across 

1628 

1629 all ragged dimensions and thus can be converted to a dense tensor 

1630 without loss of information. 

1631 

1632 Args: 

1633 rt: RaggedTensor. 

1634 """ 

1635 return tf.reduce_all( 

1636 [ 

1637 tf.equal( 

1638 tf.math.reduce_variance( 

1639 tf.cast(row_lens, backend.floatx()) 

1640 ), 

1641 tf.constant([0.0]), 

1642 ) 

1643 for row_lens in rt.nested_row_lengths() 

1644 ] 

1645 ) 

1646 

1647 def _convert_to_dense(inputs): 

1648 return tuple( 

1649 rt.to_tensor() if isinstance(rt, tf.RaggedTensor) else rt 

1650 for rt in inputs 

1651 ) 

1652 

1653 def _call_loss(inputs, ragged_output): 

1654 """Adapt the result to ragged or dense tensor according to the expected 

1655 

1656 output type. This is done so that all the return values of the map 

1657 operation have the same type. 

1658 """ 

1659 r = loss_fn(*inputs) 

1660 if ragged_output and not isinstance(r, tf.RaggedTensor): 

1661 r = tf.RaggedTensor.from_tensor(r) 

1662 elif not ragged_output and isinstance(r, tf.RaggedTensor): 

1663 r = r.to_tensor() 

1664 return r 

1665 

1666 def _wrapper(inputs, ragged_output): 

1667 _, y_pred = inputs 

1668 if isinstance(y_pred, tf.RaggedTensor): 

1669 return tf.cond( 

1670 rt_is_equiv_dense(y_pred), 

1671 lambda: _call_loss(_convert_to_dense(inputs), ragged_output), 

1672 lambda: _call_loss(inputs, ragged_output), 

1673 ) 

1674 

1675 return loss_fn(*inputs) 

1676 

1677 if not isinstance(y_true, tf.RaggedTensor): 

1678 return loss_fn(y_true, y_pred.to_tensor()) 

1679 

1680 lshape = y_pred.shape.as_list()[1:-1] 

1681 if len(lshape) > 0: 

1682 spec = tf.RaggedTensorSpec(shape=lshape, dtype=y_pred.dtype) 

1683 else: 

1684 spec = tf.TensorSpec(shape=[], dtype=y_pred.dtype) 

1685 

1686 nested_splits_list = [rt.nested_row_splits for rt in (y_true, y_pred)] 

1687 if y_pred_extra_dim: 

1688 # The last dimension of a categorical prediction may be ragged or not. 

1689 rdims = [len(slist) for slist in nested_splits_list] 

1690 if rdims[0] == rdims[1] - 1: 

1691 nested_splits_list[1] = nested_splits_list[1][:-1] 

1692 

1693 map_fn = functools.partial(_wrapper, ragged_output=len(lshape) > 1) 

1694 

1695 assertion_list = ragged_util.assert_splits_match(nested_splits_list) 

1696 with tf.control_dependencies(assertion_list): 

1697 return ragged_map_ops.map_fn(map_fn, elems=(y_true, y_pred), dtype=spec) 

1698 

1699 

1700@dispatch.dispatch_for_types(mean_squared_error, tf.RaggedTensor) 

1701def _ragged_tensor_mse(y_true, y_pred): 

1702 """Implements support for handling RaggedTensors. 

1703 

1704 Args: 

1705 y_true: RaggedTensor truth values. shape = `[batch_size, d0, .. dN]`. 

1706 y_pred: RaggedTensor predicted values. shape = `[batch_size, d0, .. dN]`. 

1707 

1708 Returns: 

1709 Mean squared error values. shape = `[batch_size, d0, .. dN-1]`. 

1710 When the number of dimensions of the batch feature vector [d0, .. dN] is 

1711 greater than one the return value is a RaggedTensor. Otherwise a Dense 

1712 tensor with dimensions [batch_size] is returned. 

1713 """ 

1714 return _ragged_tensor_apply_loss(mean_squared_error, y_true, y_pred) 

1715 

1716 

1717@keras_export( 

1718 "keras.metrics.mean_absolute_error", 

1719 "keras.metrics.mae", 

1720 "keras.metrics.MAE", 

1721 "keras.losses.mean_absolute_error", 

1722 "keras.losses.mae", 

1723 "keras.losses.MAE", 

1724) 

1725@tf.__internal__.dispatch.add_dispatch_support 

1726def mean_absolute_error(y_true, y_pred): 

1727 """Computes the mean absolute error between labels and predictions. 

1728 

1729 `loss = mean(abs(y_true - y_pred), axis=-1)` 

1730 

1731 Standalone usage: 

1732 

1733 >>> y_true = np.random.randint(0, 2, size=(2, 3)) 

1734 >>> y_pred = np.random.random(size=(2, 3)) 

1735 >>> loss = tf.keras.losses.mean_absolute_error(y_true, y_pred) 

1736 >>> assert loss.shape == (2,) 

1737 >>> assert np.array_equal( 

1738 ... loss.numpy(), np.mean(np.abs(y_true - y_pred), axis=-1)) 

1739 

1740 Args: 

1741 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. 

1742 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. 

1743 

1744 Returns: 

1745 Mean absolute error values. shape = `[batch_size, d0, .. dN-1]`. 

1746 """ 

1747 y_pred = tf.convert_to_tensor(y_pred) 

1748 y_true = tf.cast(y_true, y_pred.dtype) 

1749 return backend.mean(tf.abs(y_pred - y_true), axis=-1) 

1750 

1751 

1752@dispatch.dispatch_for_types(mean_absolute_error, tf.RaggedTensor) 

1753def _ragged_tensor_mae(y_true, y_pred): 

1754 """RaggedTensor adapter for mean_absolute_error.""" 

1755 return _ragged_tensor_apply_loss(mean_absolute_error, y_true, y_pred) 

1756 

1757 

1758@keras_export( 

1759 "keras.metrics.mean_absolute_percentage_error", 

1760 "keras.metrics.mape", 

1761 "keras.metrics.MAPE", 

1762 "keras.losses.mean_absolute_percentage_error", 

1763 "keras.losses.mape", 

1764 "keras.losses.MAPE", 

1765) 

1766@tf.__internal__.dispatch.add_dispatch_support 

1767def mean_absolute_percentage_error(y_true, y_pred): 

1768 """Computes the mean absolute percentage error between `y_true` & `y_pred`. 

1769 

1770 `loss = 100 * mean(abs((y_true - y_pred) / y_true), axis=-1)` 

1771 

1772 Standalone usage: 

1773 

1774 >>> y_true = np.random.random(size=(2, 3)) 

1775 >>> y_true = np.maximum(y_true, 1e-7) # Prevent division by zero 

1776 >>> y_pred = np.random.random(size=(2, 3)) 

1777 >>> loss = tf.keras.losses.mean_absolute_percentage_error(y_true, y_pred) 

1778 >>> assert loss.shape == (2,) 

1779 >>> assert np.array_equal( 

1780 ... loss.numpy(), 

1781 ... 100. * np.mean(np.abs((y_true - y_pred) / y_true), axis=-1)) 

1782 

1783 Args: 

1784 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. 

1785 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. 

1786 

1787 Returns: 

1788 Mean absolute percentage error values. shape = `[batch_size, d0, .. 

1789 dN-1]`. 

1790 """ 

1791 y_pred = tf.convert_to_tensor(y_pred) 

1792 y_true = tf.cast(y_true, y_pred.dtype) 

1793 diff = tf.abs( 

1794 (y_true - y_pred) / backend.maximum(tf.abs(y_true), backend.epsilon()) 

1795 ) 

1796 return 100.0 * backend.mean(diff, axis=-1) 

1797 

1798 

1799@dispatch.dispatch_for_types(mean_absolute_percentage_error, tf.RaggedTensor) 

1800def _ragged_tensor_mape(y_true, y_pred): 

1801 """Support RaggedTensors.""" 

1802 return _ragged_tensor_apply_loss( 

1803 mean_absolute_percentage_error, y_true, y_pred 

1804 ) 

1805 

1806 

1807@keras_export( 

1808 "keras.metrics.mean_squared_logarithmic_error", 

1809 "keras.metrics.msle", 

1810 "keras.metrics.MSLE", 

1811 "keras.losses.mean_squared_logarithmic_error", 

1812 "keras.losses.msle", 

1813 "keras.losses.MSLE", 

1814) 

1815@tf.__internal__.dispatch.add_dispatch_support 

1816def mean_squared_logarithmic_error(y_true, y_pred): 

1817 """Computes the mean squared logarithmic error between `y_true` & `y_pred`. 

1818 

1819 `loss = mean(square(log(y_true + 1) - log(y_pred + 1)), axis=-1)` 

1820 

1821 Standalone usage: 

1822 

1823 >>> y_true = np.random.randint(0, 2, size=(2, 3)) 

1824 >>> y_pred = np.random.random(size=(2, 3)) 

1825 >>> loss = tf.keras.losses.mean_squared_logarithmic_error(y_true, y_pred) 

1826 >>> assert loss.shape == (2,) 

1827 >>> y_true = np.maximum(y_true, 1e-7) 

1828 >>> y_pred = np.maximum(y_pred, 1e-7) 

1829 >>> assert np.allclose( 

1830 ... loss.numpy(), 

1831 ... np.mean( 

1832 ... np.square(np.log(y_true + 1.) - np.log(y_pred + 1.)), axis=-1)) 

1833 

1834 Args: 

1835 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. 

1836 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. 

1837 

1838 Returns: 

1839 Mean squared logarithmic error values. shape = `[batch_size, d0, .. 

1840 dN-1]`. 

1841 """ 

1842 y_pred = tf.convert_to_tensor(y_pred) 

1843 y_true = tf.cast(y_true, y_pred.dtype) 

1844 first_log = tf.math.log(backend.maximum(y_pred, backend.epsilon()) + 1.0) 

1845 second_log = tf.math.log(backend.maximum(y_true, backend.epsilon()) + 1.0) 

1846 return backend.mean( 

1847 tf.math.squared_difference(first_log, second_log), axis=-1 

1848 ) 

1849 

1850 

1851@dispatch.dispatch_for_types(mean_squared_logarithmic_error, tf.RaggedTensor) 

1852def _ragged_tensor_msle(y_true, y_pred): 

1853 """Implements support for handling RaggedTensors.""" 

1854 return _ragged_tensor_apply_loss( 

1855 mean_squared_logarithmic_error, y_true, y_pred 

1856 ) 

1857 

1858 

1859def _maybe_convert_labels(y_true): 

1860 """Converts binary labels into -1/1.""" 

1861 are_zeros = tf.equal(y_true, 0) 

1862 are_ones = tf.equal(y_true, 1) 

1863 is_binary = tf.reduce_all(tf.logical_or(are_zeros, are_ones)) 

1864 

1865 def _convert_binary_labels(): 

1866 # Convert the binary labels to -1 or 1. 

1867 return 2.0 * y_true - 1.0 

1868 

1869 updated_y_true = tf.__internal__.smart_cond.smart_cond( 

1870 is_binary, _convert_binary_labels, lambda: y_true 

1871 ) 

1872 return updated_y_true 

1873 

1874 

1875@keras_export("keras.metrics.squared_hinge", "keras.losses.squared_hinge") 

1876@tf.__internal__.dispatch.add_dispatch_support 

1877def squared_hinge(y_true, y_pred): 

1878 """Computes the squared hinge loss between `y_true` & `y_pred`. 

1879 

1880 `loss = mean(square(maximum(1 - y_true * y_pred, 0)), axis=-1)` 

1881 

1882 Standalone usage: 

1883 

1884 >>> y_true = np.random.choice([-1, 1], size=(2, 3)) 

1885 >>> y_pred = np.random.random(size=(2, 3)) 

1886 >>> loss = tf.keras.losses.squared_hinge(y_true, y_pred) 

1887 >>> assert loss.shape == (2,) 

1888 >>> assert np.array_equal( 

1889 ... loss.numpy(), 

1890 ... np.mean(np.square(np.maximum(1. - y_true * y_pred, 0.)), axis=-1)) 

1891 

1892 Args: 

1893 y_true: The ground truth values. `y_true` values are expected to be -1 or 

1894 1. If binary (0 or 1) labels are provided we will convert them to -1 or 

1895 1. shape = `[batch_size, d0, .. dN]`. 

1896 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. 

1897 

1898 Returns: 

1899 Squared hinge loss values. shape = `[batch_size, d0, .. dN-1]`. 

1900 """ 

1901 y_pred = tf.convert_to_tensor(y_pred) 

1902 y_true = tf.cast(y_true, y_pred.dtype) 

1903 y_true = _maybe_convert_labels(y_true) 

1904 return backend.mean( 

1905 tf.square(tf.maximum(1.0 - y_true * y_pred, 0.0)), axis=-1 

1906 ) 

1907 

1908 

1909@keras_export("keras.metrics.hinge", "keras.losses.hinge") 

1910@tf.__internal__.dispatch.add_dispatch_support 

1911def hinge(y_true, y_pred): 

1912 """Computes the hinge loss between `y_true` & `y_pred`. 

1913 

1914 `loss = mean(maximum(1 - y_true * y_pred, 0), axis=-1)` 

1915 

1916 Standalone usage: 

1917 

1918 >>> y_true = np.random.choice([-1, 1], size=(2, 3)) 

1919 >>> y_pred = np.random.random(size=(2, 3)) 

1920 >>> loss = tf.keras.losses.hinge(y_true, y_pred) 

1921 >>> assert loss.shape == (2,) 

1922 >>> assert np.array_equal( 

1923 ... loss.numpy(), 

1924 ... np.mean(np.maximum(1. - y_true * y_pred, 0.), axis=-1)) 

1925 

1926 Args: 

1927 y_true: The ground truth values. `y_true` values are expected to be -1 or 

1928 1. If binary (0 or 1) labels are provided they will be converted to -1 

1929 or 1. shape = `[batch_size, d0, .. dN]`. 

1930 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. 

1931 

1932 Returns: 

1933 Hinge loss values. shape = `[batch_size, d0, .. dN-1]`. 

1934 """ 

1935 y_pred = tf.convert_to_tensor(y_pred) 

1936 y_true = tf.cast(y_true, y_pred.dtype) 

1937 y_true = _maybe_convert_labels(y_true) 

1938 return backend.mean(tf.maximum(1.0 - y_true * y_pred, 0.0), axis=-1) 

1939 

1940 

1941@keras_export("keras.losses.categorical_hinge") 

1942@tf.__internal__.dispatch.add_dispatch_support 

1943def categorical_hinge(y_true, y_pred): 

1944 """Computes the categorical hinge loss between `y_true` & `y_pred`. 

1945 

1946 `loss = maximum(neg - pos + 1, 0)` 

1947 where `neg=maximum((1-y_true)*y_pred) and pos=sum(y_true*y_pred)` 

1948 

1949 Standalone usage: 

1950 

1951 >>> y_true = np.random.randint(0, 3, size=(2,)) 

1952 >>> y_true = tf.keras.utils.to_categorical(y_true, num_classes=3) 

1953 >>> y_pred = np.random.random(size=(2, 3)) 

1954 >>> loss = tf.keras.losses.categorical_hinge(y_true, y_pred) 

1955 >>> assert loss.shape == (2,) 

1956 >>> pos = np.sum(y_true * y_pred, axis=-1) 

1957 >>> neg = np.amax((1. - y_true) * y_pred, axis=-1) 

1958 >>> assert np.array_equal(loss.numpy(), np.maximum(0., neg - pos + 1.)) 

1959 

1960 Args: 

1961 y_true: The ground truth values. `y_true` values are expected to be 

1962 either `{-1, +1}` or `{0, 1}` (i.e. a one-hot-encoded tensor). 

1963 y_pred: The predicted values. 

1964 

1965 Returns: 

1966 Categorical hinge loss values. 

1967 """ 

1968 y_pred = tf.convert_to_tensor(y_pred) 

1969 y_true = tf.cast(y_true, y_pred.dtype) 

1970 pos = tf.reduce_sum(y_true * y_pred, axis=-1) 

1971 neg = tf.reduce_max((1.0 - y_true) * y_pred, axis=-1) 

1972 zero = tf.cast(0.0, y_pred.dtype) 

1973 return tf.maximum(neg - pos + 1.0, zero) 

1974 

1975 

1976@keras_export("keras.losses.huber", v1=[]) 

1977@tf.__internal__.dispatch.add_dispatch_support 

1978def huber(y_true, y_pred, delta=1.0): 

1979 """Computes Huber loss value. 

1980 

1981 For each value x in `error = y_true - y_pred`: 

1982 

1983 ``` 

1984 loss = 0.5 * x^2 if |x| <= d 

1985 loss = d * |x| - 0.5 * d^2 if |x| > d 

1986 ``` 

1987 where d is `delta`. See: https://en.wikipedia.org/wiki/Huber_loss 

1988 

1989 Args: 

1990 y_true: tensor of true targets. 

1991 y_pred: tensor of predicted targets. 

1992 delta: A float, the point where the Huber loss function changes from a 

1993 quadratic to linear. 

1994 

1995 Returns: 

1996 Tensor with one scalar loss entry per sample. 

1997 """ 

1998 y_pred = tf.cast(y_pred, dtype=backend.floatx()) 

1999 y_true = tf.cast(y_true, dtype=backend.floatx()) 

2000 delta = tf.cast(delta, dtype=backend.floatx()) 

2001 error = tf.subtract(y_pred, y_true) 

2002 abs_error = tf.abs(error) 

2003 half = tf.convert_to_tensor(0.5, dtype=abs_error.dtype) 

2004 return backend.mean( 

2005 tf.where( 

2006 abs_error <= delta, 

2007 half * tf.square(error), 

2008 delta * abs_error - half * tf.square(delta), 

2009 ), 

2010 axis=-1, 

2011 ) 

2012 

2013 

2014@keras_export( 

2015 "keras.losses.log_cosh", 

2016 "keras.losses.logcosh", 

2017 "keras.metrics.log_cosh", 

2018 "keras.metrics.logcosh", 

2019) 

2020@tf.__internal__.dispatch.add_dispatch_support 

2021def log_cosh(y_true, y_pred): 

2022 """Logarithm of the hyperbolic cosine of the prediction error. 

2023 

2024 `log(cosh(x))` is approximately equal to `(x ** 2) / 2` for small `x` and 

2025 to `abs(x) - log(2)` for large `x`. This means that 'logcosh' works mostly 

2026 like the mean squared error, but will not be so strongly affected by the 

2027 occasional wildly incorrect prediction. 

2028 

2029 Standalone usage: 

2030 

2031 >>> y_true = np.random.random(size=(2, 3)) 

2032 >>> y_pred = np.random.random(size=(2, 3)) 

2033 >>> loss = tf.keras.losses.logcosh(y_true, y_pred) 

2034 >>> assert loss.shape == (2,) 

2035 >>> x = y_pred - y_true 

2036 >>> assert np.allclose( 

2037 ... loss.numpy(), 

2038 ... np.mean(x + np.log(np.exp(-2. * x) + 1.) - tf.math.log(2.), 

2039 ... axis=-1), 

2040 ... atol=1e-5) 

2041 

2042 Args: 

2043 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. 

2044 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. 

2045 

2046 Returns: 

2047 Logcosh error values. shape = `[batch_size, d0, .. dN-1]`. 

2048 """ 

2049 y_pred = tf.convert_to_tensor(y_pred) 

2050 y_true = tf.cast(y_true, y_pred.dtype) 

2051 

2052 def _logcosh(x): 

2053 return ( 

2054 x + tf.math.softplus(-2.0 * x) - tf.cast(tf.math.log(2.0), x.dtype) 

2055 ) 

2056 

2057 return backend.mean(_logcosh(y_pred - y_true), axis=-1) 

2058 

2059 

2060@keras_export( 

2061 "keras.metrics.categorical_crossentropy", 

2062 "keras.losses.categorical_crossentropy", 

2063) 

2064@tf.__internal__.dispatch.add_dispatch_support 

2065def categorical_crossentropy( 

2066 y_true, y_pred, from_logits=False, label_smoothing=0.0, axis=-1 

2067): 

2068 """Computes the categorical crossentropy loss. 

2069 

2070 Standalone usage: 

2071 

2072 >>> y_true = [[0, 1, 0], [0, 0, 1]] 

2073 >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]] 

2074 >>> loss = tf.keras.losses.categorical_crossentropy(y_true, y_pred) 

2075 >>> assert loss.shape == (2,) 

2076 >>> loss.numpy() 

2077 array([0.0513, 2.303], dtype=float32) 

2078 

2079 Args: 

2080 y_true: Tensor of one-hot true targets. 

2081 y_pred: Tensor of predicted targets. 

2082 from_logits: Whether `y_pred` is expected to be a logits tensor. By 

2083 default, we assume that `y_pred` encodes a probability distribution. 

2084 label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For 

2085 example, if `0.1`, use `0.1 / num_classes` for non-target labels 

2086 and `0.9 + 0.1 / num_classes` for target labels. 

2087 axis: Defaults to -1. The dimension along which the entropy is 

2088 computed. 

2089 

2090 Returns: 

2091 Categorical crossentropy loss value. 

2092 """ 

2093 if isinstance(axis, bool): 

2094 raise ValueError( 

2095 "`axis` must be of type `int`. " 

2096 f"Received: axis={axis} of type {type(axis)}" 

2097 ) 

2098 y_pred = tf.convert_to_tensor(y_pred) 

2099 y_true = tf.cast(y_true, y_pred.dtype) 

2100 label_smoothing = tf.convert_to_tensor(label_smoothing, dtype=y_pred.dtype) 

2101 

2102 if y_pred.shape[-1] == 1: 

2103 warnings.warn( 

2104 "In loss categorical_crossentropy, expected " 

2105 "y_pred.shape to be (batch_size, num_classes) " 

2106 f"with num_classes > 1. Received: y_pred.shape={y_pred.shape}. " 

2107 "Consider using 'binary_crossentropy' if you only have 2 classes.", 

2108 SyntaxWarning, 

2109 stacklevel=2, 

2110 ) 

2111 

2112 def _smooth_labels(): 

2113 num_classes = tf.cast(tf.shape(y_true)[-1], y_pred.dtype) 

2114 return y_true * (1.0 - label_smoothing) + ( 

2115 label_smoothing / num_classes 

2116 ) 

2117 

2118 y_true = tf.__internal__.smart_cond.smart_cond( 

2119 label_smoothing, _smooth_labels, lambda: y_true 

2120 ) 

2121 

2122 return backend.categorical_crossentropy( 

2123 y_true, y_pred, from_logits=from_logits, axis=axis 

2124 ) 

2125 

2126 

2127@dispatch.dispatch_for_types(categorical_crossentropy, tf.RaggedTensor) 

2128def _ragged_tensor_categorical_crossentropy( 

2129 y_true, y_pred, from_logits=False, label_smoothing=0.0, axis=-1 

2130): 

2131 """Implements support for handling RaggedTensors. 

2132 

2133 Args: 

2134 y_true: Tensor of one-hot true targets. 

2135 y_pred: Tensor of predicted targets. 

2136 from_logits: Whether `y_pred` is expected to be a logits tensor. By 

2137 default, we assume that `y_pred` encodes a probability distribution. 

2138 label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For 

2139 example, if `0.1`, use `0.1 / num_classes` for non-target labels 

2140 and `0.9 + 0.1 / num_classes` for target labels. 

2141 axis: The axis along which to compute crossentropy (the features axis). 

2142 Defaults to -1. 

2143 

2144 Returns: 

2145 Categorical crossentropy loss value. 

2146 

2147 Expected shape: (batch, sequence_len, n_classes) with sequence_len 

2148 being variable per batch. 

2149 Return shape: (batch, sequence_len). 

2150 

2151 When used by CategoricalCrossentropy() with the default reduction 

2152 (SUM_OVER_BATCH_SIZE), the reduction averages the loss over the 

2153 number of elements independent of the batch. E.g. if the RaggedTensor 

2154 has 2 batches with [2, 1] values respectively the resulting loss is 

2155 the sum of the individual loss values divided by 3. 

2156 """ 

2157 fn = functools.partial( 

2158 categorical_crossentropy, 

2159 from_logits=from_logits, 

2160 label_smoothing=label_smoothing, 

2161 axis=axis, 

2162 ) 

2163 return _ragged_tensor_apply_loss(fn, y_true, y_pred) 

2164 

2165 

2166@keras_export( 

2167 "keras.metrics.categorical_focal_crossentropy", 

2168 "keras.losses.categorical_focal_crossentropy", 

2169) 

2170@tf.__internal__.dispatch.add_dispatch_support 

2171def categorical_focal_crossentropy( 

2172 y_true, 

2173 y_pred, 

2174 alpha=0.25, 

2175 gamma=2.0, 

2176 from_logits=False, 

2177 label_smoothing=0.0, 

2178 axis=-1, 

2179): 

2180 """Computes the categorical focal crossentropy loss. 

2181 

2182 Standalone usage: 

2183 >>> y_true = [[0, 1, 0], [0, 0, 1]] 

2184 >>> y_pred = [[0.05, 0.9, 0.05], [0.1, 0.85, 0.05]] 

2185 >>> loss = tf.keras.losses.categorical_focal_crossentropy(y_true, y_pred) 

2186 >>> assert loss.shape == (2,) 

2187 >>> loss.numpy() 

2188 array([2.63401289e-04, 6.75912094e-01], dtype=float32) 

2189 

2190 Args: 

2191 y_true: Tensor of one-hot true targets. 

2192 y_pred: Tensor of predicted targets. 

2193 alpha: A weight balancing factor for all classes, default is `0.25` as 

2194 mentioned in the reference. It can be a list of floats or a scalar. 

2195 In the multi-class case, alpha may be set by inverse class 

2196 frequency by using `compute_class_weight` from `sklearn.utils`. 

2197 gamma: A focusing parameter, default is `2.0` as mentioned in the 

2198 reference. It helps to gradually reduce the importance given to 

2199 simple examples in a smooth manner. When `gamma` = 0, there is 

2200 no focal effect on the categorical crossentropy. 

2201 from_logits: Whether `y_pred` is expected to be a logits tensor. By 

2202 default, we assume that `y_pred` encodes a probability 

2203 distribution. 

2204 label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For 

2205 example, if `0.1`, use `0.1 / num_classes` for non-target labels 

2206 and `0.9 + 0.1 / num_classes` for target labels. 

2207 axis: Defaults to -1. The dimension along which the entropy is 

2208 computed. 

2209 

2210 Returns: 

2211 Categorical focal crossentropy loss value. 

2212 """ 

2213 if isinstance(axis, bool): 

2214 raise ValueError( 

2215 "`axis` must be of type `int`. " 

2216 f"Received: axis={axis} of type {type(axis)}" 

2217 ) 

2218 y_pred = tf.convert_to_tensor(y_pred) 

2219 y_true = tf.cast(y_true, y_pred.dtype) 

2220 label_smoothing = tf.convert_to_tensor(label_smoothing, dtype=y_pred.dtype) 

2221 

2222 if y_pred.shape[-1] == 1: 

2223 warnings.warn( 

2224 "In loss categorical_focal_crossentropy, expected " 

2225 "y_pred.shape to be (batch_size, num_classes) " 

2226 f"with num_classes > 1. Received: y_pred.shape={y_pred.shape}. " 

2227 "Consider using 'binary_crossentropy' if you only have 2 classes.", 

2228 SyntaxWarning, 

2229 stacklevel=2, 

2230 ) 

2231 

2232 def _smooth_labels(): 

2233 num_classes = tf.cast(tf.shape(y_true)[-1], y_pred.dtype) 

2234 return y_true * (1.0 - label_smoothing) + ( 

2235 label_smoothing / num_classes 

2236 ) 

2237 

2238 y_true = tf.__internal__.smart_cond.smart_cond( 

2239 label_smoothing, _smooth_labels, lambda: y_true 

2240 ) 

2241 

2242 return backend.categorical_focal_crossentropy( 

2243 target=y_true, 

2244 output=y_pred, 

2245 alpha=alpha, 

2246 gamma=gamma, 

2247 from_logits=from_logits, 

2248 axis=axis, 

2249 ) 

2250 

2251 

2252@dispatch.dispatch_for_types(categorical_focal_crossentropy, tf.RaggedTensor) 

2253def _ragged_tensor_categorical_focal_crossentropy( 

2254 y_true, 

2255 y_pred, 

2256 alpha=0.25, 

2257 gamma=2.0, 

2258 from_logits=False, 

2259 label_smoothing=0.0, 

2260 axis=-1, 

2261): 

2262 """Implements support for handling RaggedTensors. 

2263 

2264 Expected shape: (batch, sequence_len, n_classes) with sequence_len 

2265 being variable per batch. 

2266 Return shape: (batch, sequence_len). 

2267 When used by CategoricalFocalCrossentropy() with the default reduction 

2268 (SUM_OVER_BATCH_SIZE), the reduction averages the loss over the 

2269 number of elements independent of the batch. E.g. if the RaggedTensor 

2270 has 2 batches with [2, 1] values respectively the resulting loss is 

2271 the sum of the individual loss values divided by 3. 

2272 

2273 Args: 

2274 alpha: A weight balancing factor for all classes, default is `0.25` as 

2275 mentioned in the reference. It can be a list of floats or a scalar. 

2276 In the multi-class case, alpha may be set by inverse class 

2277 frequency by using `compute_class_weight` from `sklearn.utils`. 

2278 gamma: A focusing parameter, default is `2.0` as mentioned in the 

2279 reference. It helps to gradually reduce the importance given to 

2280 simple examples in a smooth manner. When `gamma` = 0, there is 

2281 no focal effect on the categorical crossentropy. 

2282 from_logits: Whether `y_pred` is expected to be a logits tensor. By 

2283 default, we assume that `y_pred` encodes a probability distribution. 

2284 label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For 

2285 example, if `0.1`, use `0.1 / num_classes` for non-target labels 

2286 and `0.9 + 0.1 / num_classes` for target labels. 

2287 axis: Defaults to -1. The dimension along which the entropy is 

2288 computed. 

2289 

2290 Returns: 

2291 Categorical focal crossentropy loss value. 

2292 """ 

2293 fn = functools.partial( 

2294 categorical_focal_crossentropy, 

2295 alpha=alpha, 

2296 gamma=gamma, 

2297 from_logits=from_logits, 

2298 label_smoothing=label_smoothing, 

2299 axis=axis, 

2300 ) 

2301 return _ragged_tensor_apply_loss(fn, y_true, y_pred) 

2302 

2303 

2304@keras_export( 

2305 "keras.metrics.sparse_categorical_crossentropy", 

2306 "keras.losses.sparse_categorical_crossentropy", 

2307) 

2308@tf.__internal__.dispatch.add_dispatch_support 

2309def sparse_categorical_crossentropy( 

2310 y_true, y_pred, from_logits=False, axis=-1, ignore_class=None 

2311): 

2312 """Computes the sparse categorical crossentropy loss. 

2313 

2314 Standalone usage: 

2315 

2316 >>> y_true = [1, 2] 

2317 >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]] 

2318 >>> loss = tf.keras.losses.sparse_categorical_crossentropy(y_true, y_pred) 

2319 >>> assert loss.shape == (2,) 

2320 >>> loss.numpy() 

2321 array([0.0513, 2.303], dtype=float32) 

2322 

2323 >>> y_true = [[[ 0, 2], 

2324 ... [-1, -1]], 

2325 ... [[ 0, 2], 

2326 ... [-1, -1]]] 

2327 >>> y_pred = [[[[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]], 

2328 ... [[0.2, 0.5, 0.3], [0.0, 1.0, 0.0]]], 

2329 ... [[[1.0, 0.0, 0.0], [0.0, 0.5, 0.5]], 

2330 ... [[0.2, 0.5, 0.3], [0.0, 1.0, 0.0]]]] 

2331 >>> loss = tf.keras.losses.sparse_categorical_crossentropy( 

2332 ... y_true, y_pred, ignore_class=-1) 

2333 >>> loss.numpy() 

2334 array([[[2.3841855e-07, 2.3841855e-07], 

2335 [0.0000000e+00, 0.0000000e+00]], 

2336 [[2.3841855e-07, 6.9314730e-01], 

2337 [0.0000000e+00, 0.0000000e+00]]], dtype=float32) 

2338 

2339 Args: 

2340 y_true: Ground truth values. 

2341 y_pred: The predicted values. 

2342 from_logits: Whether `y_pred` is expected to be a logits tensor. By 

2343 default, we assume that `y_pred` encodes a probability distribution. 

2344 axis: Defaults to -1. The dimension along which the entropy is 

2345 computed. 

2346 ignore_class: Optional integer. The ID of a class to be ignored during 

2347 loss computation. This is useful, for example, in segmentation 

2348 problems featuring a "void" class (commonly -1 or 255) in segmentation 

2349 maps. By default (`ignore_class=None`), all classes are considered. 

2350 

2351 Returns: 

2352 Sparse categorical crossentropy loss value. 

2353 """ 

2354 return backend.sparse_categorical_crossentropy( 

2355 y_true, 

2356 y_pred, 

2357 from_logits=from_logits, 

2358 ignore_class=ignore_class, 

2359 axis=axis, 

2360 ) 

2361 

2362 

2363@dispatch.dispatch_for_types(sparse_categorical_crossentropy, tf.RaggedTensor) 

2364def _ragged_tensor_sparse_categorical_crossentropy( 

2365 y_true, y_pred, from_logits=False, axis=-1, ignore_class=None 

2366): 

2367 """Implements support for handling RaggedTensors. 

2368 

2369 Expected y_pred shape: (batch, sequence_len, n_classes) with sequence_len 

2370 being variable per batch. 

2371 Return shape: (batch, sequence_len). 

2372 

2373 When used by SparseCategoricalCrossentropy() with the default reduction 

2374 (SUM_OVER_BATCH_SIZE), the reduction averages the loss over the 

2375 number of elements independent of the batch. E.g. if the RaggedTensor 

2376 has 2 batches with [2, 1] values respectively, the resulting loss is 

2377 the sum of the individual loss values divided by 3. 

2378 """ 

2379 fn = functools.partial( 

2380 sparse_categorical_crossentropy, 

2381 from_logits=from_logits, 

2382 ignore_class=ignore_class, 

2383 axis=axis, 

2384 ) 

2385 return _ragged_tensor_apply_loss(fn, y_true, y_pred, y_pred_extra_dim=True) 

2386 

2387 

2388@keras_export( 

2389 "keras.metrics.binary_crossentropy", "keras.losses.binary_crossentropy" 

2390) 

2391@tf.__internal__.dispatch.add_dispatch_support 

2392def binary_crossentropy( 

2393 y_true, y_pred, from_logits=False, label_smoothing=0.0, axis=-1 

2394): 

2395 """Computes the binary crossentropy loss. 

2396 

2397 Standalone usage: 

2398 

2399 >>> y_true = [[0, 1], [0, 0]] 

2400 >>> y_pred = [[0.6, 0.4], [0.4, 0.6]] 

2401 >>> loss = tf.keras.losses.binary_crossentropy(y_true, y_pred) 

2402 >>> assert loss.shape == (2,) 

2403 >>> loss.numpy() 

2404 array([0.916 , 0.714], dtype=float32) 

2405 

2406 Args: 

2407 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. 

2408 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. 

2409 from_logits: Whether `y_pred` is expected to be a logits tensor. By 

2410 default, we assume that `y_pred` encodes a probability distribution. 

2411 label_smoothing: Float in [0, 1]. If > `0` then smooth the labels by 

2412 squeezing them towards 0.5 That is, using `1. - 0.5 * label_smoothing` 

2413 for the target class and `0.5 * label_smoothing` for the non-target 

2414 class. 

2415 axis: The axis along which the mean is computed. Defaults to -1. 

2416 

2417 Returns: 

2418 Binary crossentropy loss value. shape = `[batch_size, d0, .. dN-1]`. 

2419 """ 

2420 y_pred = tf.convert_to_tensor(y_pred) 

2421 y_true = tf.cast(y_true, y_pred.dtype) 

2422 label_smoothing = tf.convert_to_tensor(label_smoothing, dtype=y_pred.dtype) 

2423 

2424 def _smooth_labels(): 

2425 return y_true * (1.0 - label_smoothing) + 0.5 * label_smoothing 

2426 

2427 y_true = tf.__internal__.smart_cond.smart_cond( 

2428 label_smoothing, _smooth_labels, lambda: y_true 

2429 ) 

2430 

2431 return backend.mean( 

2432 backend.binary_crossentropy(y_true, y_pred, from_logits=from_logits), 

2433 axis=axis, 

2434 ) 

2435 

2436 

2437@dispatch.dispatch_for_types(binary_crossentropy, tf.RaggedTensor) 

2438def _ragged_tensor_binary_crossentropy( 

2439 y_true, y_pred, from_logits=False, label_smoothing=0.0, axis=-1 

2440): 

2441 """Implements support for handling RaggedTensors. 

2442 

2443 Args: 

2444 y_true: Tensor of one-hot true targets. 

2445 y_pred: Tensor of predicted targets. 

2446 from_logits: Whether `y_pred` is expected to be a logits tensor. By 

2447 default, we assume that `y_pred` encodes a probability distribution. 

2448 label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For 

2449 example, if `0.1`, use `0.1 / num_classes` for non-target labels 

2450 and `0.9 + 0.1 / num_classes` for target labels. 

2451 axis: Axis along which to compute crossentropy. 

2452 

2453 Returns: 

2454 Binary crossentropy loss value. 

2455 

2456 Expected shape: (batch, sequence_len) with sequence_len being variable 

2457 per batch. 

2458 Return shape: (batch,); returns the per batch mean of the loss values. 

2459 

2460 When used by BinaryCrossentropy() with the default reduction 

2461 (SUM_OVER_BATCH_SIZE), the reduction averages the per batch losses over 

2462 the number of batches. 

2463 """ 

2464 fn = functools.partial( 

2465 binary_crossentropy, 

2466 from_logits=from_logits, 

2467 label_smoothing=label_smoothing, 

2468 axis=axis, 

2469 ) 

2470 return _ragged_tensor_apply_loss(fn, y_true, y_pred) 

2471 

2472 

2473@keras_export( 

2474 "keras.metrics.binary_focal_crossentropy", 

2475 "keras.losses.binary_focal_crossentropy", 

2476) 

2477@tf.__internal__.dispatch.add_dispatch_support 

2478def binary_focal_crossentropy( 

2479 y_true, 

2480 y_pred, 

2481 apply_class_balancing=False, 

2482 alpha=0.25, 

2483 gamma=2.0, 

2484 from_logits=False, 

2485 label_smoothing=0.0, 

2486 axis=-1, 

2487): 

2488 """Computes the binary focal crossentropy loss. 

2489 

2490 According to [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf), it 

2491 helps to apply a focal factor to down-weight easy examples and focus more on 

2492 hard examples. By default, the focal tensor is computed as follows: 

2493 

2494 `focal_factor = (1 - output)**gamma` for class 1 

2495 `focal_factor = output**gamma` for class 0 

2496 where `gamma` is a focusing parameter. When `gamma` = 0, there is no focal 

2497 effect on the binary crossentropy loss. 

2498 

2499 If `apply_class_balancing == True`, this function also takes into account a 

2500 weight balancing factor for the binary classes 0 and 1 as follows: 

2501 

2502 `weight = alpha` for class 1 (`target == 1`) 

2503 `weight = 1 - alpha` for class 0 

2504 where `alpha` is a float in the range of `[0, 1]`. 

2505 

2506 Standalone usage: 

2507 

2508 >>> y_true = [[0, 1], [0, 0]] 

2509 >>> y_pred = [[0.6, 0.4], [0.4, 0.6]] 

2510 >>> loss = tf.keras.losses.binary_focal_crossentropy(y_true, y_pred, 

2511 ... gamma=2) 

2512 >>> assert loss.shape == (2,) 

2513 >>> loss.numpy() 

2514 array([0.330, 0.206], dtype=float32) 

2515 

2516 Args: 

2517 y_true: Ground truth values, of shape `(batch_size, d0, .. dN)`. 

2518 y_pred: The predicted values, of shape `(batch_size, d0, .. dN)`. 

2519 apply_class_balancing: A bool, whether to apply weight balancing on the 

2520 binary classes 0 and 1. 

2521 alpha: A weight balancing factor for class 1, default is `0.25` as 

2522 mentioned in the reference. The weight for class 0 is `1.0 - alpha`. 

2523 gamma: A focusing parameter, default is `2.0` as mentioned in the 

2524 reference. 

2525 from_logits: Whether `y_pred` is expected to be a logits tensor. By 

2526 default, we assume that `y_pred` encodes a probability distribution. 

2527 label_smoothing: Float in `[0, 1]`. If higher than 0 then smooth the 

2528 labels by squeezing them towards `0.5`, i.e., using `1. - 0.5 * 

2529 label_smoothing` for the target class and `0.5 * label_smoothing` for 

2530 the non-target class. 

2531 axis: The axis along which the mean is computed. Defaults to `-1`. 

2532 

2533 Returns: 

2534 Binary focal crossentropy loss value. shape = `[batch_size, d0, .. dN-1]`. 

2535 """ 

2536 y_pred = tf.convert_to_tensor(y_pred) 

2537 y_true = tf.cast(y_true, y_pred.dtype) 

2538 label_smoothing = tf.convert_to_tensor(label_smoothing, dtype=y_pred.dtype) 

2539 

2540 def _smooth_labels(): 

2541 return y_true * (1.0 - label_smoothing) + 0.5 * label_smoothing 

2542 

2543 y_true = tf.__internal__.smart_cond.smart_cond( 

2544 label_smoothing, _smooth_labels, lambda: y_true 

2545 ) 

2546 

2547 return backend.mean( 

2548 backend.binary_focal_crossentropy( 

2549 target=y_true, 

2550 output=y_pred, 

2551 apply_class_balancing=apply_class_balancing, 

2552 alpha=alpha, 

2553 gamma=gamma, 

2554 from_logits=from_logits, 

2555 ), 

2556 axis=axis, 

2557 ) 

2558 

2559 

2560@dispatch.dispatch_for_types(binary_focal_crossentropy, tf.RaggedTensor) 

2561def _ragged_tensor_binary_focal_crossentropy( 

2562 y_true, 

2563 y_pred, 

2564 apply_class_balancing=False, 

2565 alpha=0.25, 

2566 gamma=2.0, 

2567 from_logits=False, 

2568 label_smoothing=0.0, 

2569 axis=-1, 

2570): 

2571 """Implements support for handling RaggedTensors. 

2572 

2573 Expected shape: `(batch, sequence_len)` with sequence_len being variable per 

2574 batch. 

2575 Return shape: `(batch,)`; returns the per batch mean of the loss values. 

2576 

2577 When used by BinaryFocalCrossentropy() with the default reduction 

2578 (SUM_OVER_BATCH_SIZE), the reduction averages the per batch losses over 

2579 the number of batches. 

2580 

2581 Args: 

2582 y_true: Tensor of one-hot true targets. 

2583 y_pred: Tensor of predicted targets. 

2584 apply_class_balancing: A bool, whether to apply weight balancing on the 

2585 binary classes 0 and 1. 

2586 alpha: A weight balancing factor for class 1, default is `0.25` as 

2587 mentioned in the reference [Lin et al., 2018]( 

2588 https://arxiv.org/pdf/1708.02002.pdf). The weight for class 0 is 

2589 `1.0 - alpha`. 

2590 gamma: A focusing parameter, default is `2.0` as mentioned in the 

2591 reference. 

2592 from_logits: Whether `y_pred` is expected to be a logits tensor. By 

2593 default, we assume that `y_pred` encodes a probability distribution. 

2594 label_smoothing: Float in `[0, 1]`. If > `0` then smooth the labels. For 

2595 example, if `0.1`, use `0.1 / num_classes` for non-target labels 

2596 and `0.9 + 0.1 / num_classes` for target labels. 

2597 axis: Axis along which to compute crossentropy. 

2598 

2599 Returns: 

2600 Binary focal crossentropy loss value. 

2601 """ 

2602 fn = functools.partial( 

2603 binary_focal_crossentropy, 

2604 apply_class_balancing=apply_class_balancing, 

2605 alpha=alpha, 

2606 gamma=gamma, 

2607 from_logits=from_logits, 

2608 label_smoothing=label_smoothing, 

2609 axis=axis, 

2610 ) 

2611 return _ragged_tensor_apply_loss(fn, y_true, y_pred) 

2612 

2613 

2614@keras_export( 

2615 "keras.metrics.kl_divergence", 

2616 "keras.metrics.kullback_leibler_divergence", 

2617 "keras.metrics.kld", 

2618 "keras.metrics.KLD", 

2619 "keras.losses.kl_divergence", 

2620 "keras.losses.kullback_leibler_divergence", 

2621 "keras.losses.kld", 

2622 "keras.losses.KLD", 

2623) 

2624@tf.__internal__.dispatch.add_dispatch_support 

2625def kl_divergence(y_true, y_pred): 

2626 """Computes Kullback-Leibler divergence loss between `y_true` & `y_pred`. 

2627 

2628 `loss = y_true * log(y_true / y_pred)` 

2629 

2630 See: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence 

2631 

2632 Standalone usage: 

2633 

2634 >>> y_true = np.random.randint(0, 2, size=(2, 3)).astype(np.float64) 

2635 >>> y_pred = np.random.random(size=(2, 3)) 

2636 >>> loss = tf.keras.losses.kullback_leibler_divergence(y_true, y_pred) 

2637 >>> assert loss.shape == (2,) 

2638 >>> y_true = tf.keras.backend.clip(y_true, 1e-7, 1) 

2639 >>> y_pred = tf.keras.backend.clip(y_pred, 1e-7, 1) 

2640 >>> assert np.array_equal( 

2641 ... loss.numpy(), np.sum(y_true * np.log(y_true / y_pred), axis=-1)) 

2642 

2643 Args: 

2644 y_true: Tensor of true targets. 

2645 y_pred: Tensor of predicted targets. 

2646 

2647 Returns: 

2648 A `Tensor` with loss. 

2649 

2650 Raises: 

2651 TypeError: If `y_true` cannot be cast to the `y_pred.dtype`. 

2652 """ 

2653 y_pred = tf.convert_to_tensor(y_pred) 

2654 y_true = tf.cast(y_true, y_pred.dtype) 

2655 y_true = backend.clip(y_true, backend.epsilon(), 1) 

2656 y_pred = backend.clip(y_pred, backend.epsilon(), 1) 

2657 return tf.reduce_sum(y_true * tf.math.log(y_true / y_pred), axis=-1) 

2658 

2659 

2660@keras_export("keras.metrics.poisson", "keras.losses.poisson") 

2661@tf.__internal__.dispatch.add_dispatch_support 

2662def poisson(y_true, y_pred): 

2663 """Computes the Poisson loss between y_true and y_pred. 

2664 

2665 The Poisson loss is the mean of the elements of the `Tensor` 

2666 `y_pred - y_true * log(y_pred)`. 

2667 

2668 Standalone usage: 

2669 

2670 >>> y_true = np.random.randint(0, 2, size=(2, 3)) 

2671 >>> y_pred = np.random.random(size=(2, 3)) 

2672 >>> loss = tf.keras.losses.poisson(y_true, y_pred) 

2673 >>> assert loss.shape == (2,) 

2674 >>> y_pred = y_pred + 1e-7 

2675 >>> assert np.allclose( 

2676 ... loss.numpy(), np.mean(y_pred - y_true * np.log(y_pred), axis=-1), 

2677 ... atol=1e-5) 

2678 

2679 Args: 

2680 y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. 

2681 y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. 

2682 

2683 Returns: 

2684 Poisson loss value. shape = `[batch_size, d0, .. dN-1]`. 

2685 

2686 Raises: 

2687 InvalidArgumentError: If `y_true` and `y_pred` have incompatible shapes. 

2688 """ 

2689 y_pred = tf.convert_to_tensor(y_pred) 

2690 y_true = tf.cast(y_true, y_pred.dtype) 

2691 return backend.mean( 

2692 y_pred - y_true * tf.math.log(y_pred + backend.epsilon()), axis=-1 

2693 ) 

2694 

2695 

2696@keras_export( 

2697 "keras.losses.cosine_similarity", 

2698 v1=[ 

2699 "keras.metrics.cosine_proximity", 

2700 "keras.metrics.cosine", 

2701 "keras.losses.cosine_proximity", 

2702 "keras.losses.cosine", 

2703 "keras.losses.cosine_similarity", 

2704 ], 

2705) 

2706@tf.__internal__.dispatch.add_dispatch_support 

2707def cosine_similarity(y_true, y_pred, axis=-1): 

2708 """Computes the cosine similarity between labels and predictions. 

2709 

2710 Note that it is a number between -1 and 1. When it is a negative number 

2711 between -1 and 0, 0 indicates orthogonality and values closer to -1 

2712 indicate greater similarity. The values closer to 1 indicate greater 

2713 dissimilarity. This makes it usable as a loss function in a setting 

2714 where you try to maximize the proximity between predictions and 

2715 targets. If either `y_true` or `y_pred` is a zero vector, cosine 

2716 similarity will be 0 regardless of the proximity between predictions 

2717 and targets. 

2718 

2719 `loss = -sum(l2_norm(y_true) * l2_norm(y_pred))` 

2720 

2721 Standalone usage: 

2722 

2723 >>> y_true = [[0., 1.], [1., 1.], [1., 1.]] 

2724 >>> y_pred = [[1., 0.], [1., 1.], [-1., -1.]] 

2725 >>> loss = tf.keras.losses.cosine_similarity(y_true, y_pred, axis=1) 

2726 >>> loss.numpy() 

2727 array([-0., -0.999, 0.999], dtype=float32) 

2728 

2729 Args: 

2730 y_true: Tensor of true targets. 

2731 y_pred: Tensor of predicted targets. 

2732 axis: Axis along which to determine similarity. 

2733 

2734 Returns: 

2735 Cosine similarity tensor. 

2736 """ 

2737 y_true = tf.linalg.l2_normalize(y_true, axis=axis) 

2738 y_pred = tf.linalg.l2_normalize(y_pred, axis=axis) 

2739 return -tf.reduce_sum(y_true * y_pred, axis=axis) 

2740 

2741 

2742@keras_export("keras.losses.CosineSimilarity") 

2743class CosineSimilarity(LossFunctionWrapper): 

2744 """Computes the cosine similarity between labels and predictions. 

2745 

2746 Note that it is a number between -1 and 1. When it is a negative number 

2747 between -1 and 0, 0 indicates orthogonality and values closer to -1 

2748 indicate greater similarity. The values closer to 1 indicate greater 

2749 dissimilarity. This makes it usable as a loss function in a setting 

2750 where you try to maximize the proximity between predictions and targets. 

2751 If either `y_true` or `y_pred` is a zero vector, cosine similarity will be 0 

2752 regardless of the proximity between predictions and targets. 

2753 

2754 `loss = -sum(l2_norm(y_true) * l2_norm(y_pred))` 

2755 

2756 Standalone usage: 

2757 

2758 >>> y_true = [[0., 1.], [1., 1.]] 

2759 >>> y_pred = [[1., 0.], [1., 1.]] 

2760 >>> # Using 'auto'/'sum_over_batch_size' reduction type. 

2761 >>> cosine_loss = tf.keras.losses.CosineSimilarity(axis=1) 

2762 >>> # l2_norm(y_true) = [[0., 1.], [1./1.414, 1./1.414]] 

2763 >>> # l2_norm(y_pred) = [[1., 0.], [1./1.414, 1./1.414]] 

2764 >>> # l2_norm(y_true) . l2_norm(y_pred) = [[0., 0.], [0.5, 0.5]] 

2765 >>> # loss = mean(sum(l2_norm(y_true) . l2_norm(y_pred), axis=1)) 

2766 >>> # = -((0. + 0.) + (0.5 + 0.5)) / 2 

2767 >>> cosine_loss(y_true, y_pred).numpy() 

2768 -0.5 

2769 

2770 >>> # Calling with 'sample_weight'. 

2771 >>> cosine_loss(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy() 

2772 -0.0999 

2773 

2774 >>> # Using 'sum' reduction type. 

2775 >>> cosine_loss = tf.keras.losses.CosineSimilarity(axis=1, 

2776 ... reduction=tf.keras.losses.Reduction.SUM) 

2777 >>> cosine_loss(y_true, y_pred).numpy() 

2778 -0.999 

2779 

2780 >>> # Using 'none' reduction type. 

2781 >>> cosine_loss = tf.keras.losses.CosineSimilarity(axis=1, 

2782 ... reduction=tf.keras.losses.Reduction.NONE) 

2783 >>> cosine_loss(y_true, y_pred).numpy() 

2784 array([-0., -0.999], dtype=float32) 

2785 

2786 Usage with the `compile()` API: 

2787 

2788 ```python 

2789 model.compile(optimizer='sgd', 

2790 loss=tf.keras.losses.CosineSimilarity(axis=1)) 

2791 ``` 

2792 

2793 Args: 

2794 axis: The axis along which the cosine similarity is computed 

2795 (the features axis). Defaults to -1. 

2796 reduction: Type of `tf.keras.losses.Reduction` to apply to loss. 

2797 Default value is `AUTO`. `AUTO` indicates that the reduction option will 

2798 be determined by the usage context. For almost all cases this defaults 

2799 to `SUM_OVER_BATCH_SIZE`. When used under a 

2800 `tf.distribute.Strategy`, except via `Model.compile()` and 

2801 `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` 

2802 will raise an error. Please see this custom training [tutorial]( 

2803 https://www.tensorflow.org/tutorials/distribute/custom_training) 

2804 for more details. 

2805 name: Optional name for the instance. 

2806 """ 

2807 

2808 def __init__( 

2809 self, 

2810 axis=-1, 

2811 reduction=losses_utils.ReductionV2.AUTO, 

2812 name="cosine_similarity", 

2813 ): 

2814 super().__init__( 

2815 cosine_similarity, reduction=reduction, name=name, axis=axis 

2816 ) 

2817 

2818 

2819# Aliases. 

2820 

2821bce = BCE = binary_crossentropy 

2822mse = MSE = mean_squared_error 

2823mae = MAE = mean_absolute_error 

2824mape = MAPE = mean_absolute_percentage_error 

2825msle = MSLE = mean_squared_logarithmic_error 

2826kld = KLD = kullback_leibler_divergence = kl_divergence 

2827logcosh = log_cosh 

2828huber_loss = huber 

2829 

2830 

2831def is_categorical_crossentropy(loss): 

2832 result = ( 

2833 isinstance(loss, CategoricalCrossentropy) 

2834 or ( 

2835 isinstance(loss, LossFunctionWrapper) 

2836 and loss.fn == categorical_crossentropy 

2837 ) 

2838 or ( 

2839 hasattr(loss, "__name__") 

2840 and loss.__name__ == "categorical_crossentropy" 

2841 ) 

2842 or (loss == "categorical_crossentropy") 

2843 ) 

2844 return result 

2845 

2846 

2847@keras_export("keras.losses.serialize") 

2848def serialize(loss, use_legacy_format=False): 

2849 """Serializes loss function or `Loss` instance. 

2850 

2851 Args: 

2852 loss: A Keras `Loss` instance or a loss function. 

2853 

2854 Returns: 

2855 Loss configuration dictionary. 

2856 """ 

2857 if use_legacy_format: 

2858 return legacy_serialization.serialize_keras_object(loss) 

2859 return serialize_keras_object(loss) 

2860 

2861 

2862@keras_export("keras.losses.deserialize") 

2863def deserialize(name, custom_objects=None, use_legacy_format=False): 

2864 """Deserializes a serialized loss class/function instance. 

2865 

2866 Args: 

2867 name: Loss configuration. 

2868 custom_objects: Optional dictionary mapping names (strings) to custom 

2869 objects (classes and functions) to be considered during 

2870 deserialization. 

2871 

2872 Returns: 

2873 A Keras `Loss` instance or a loss function. 

2874 """ 

2875 if use_legacy_format: 

2876 return legacy_serialization.deserialize_keras_object( 

2877 name, 

2878 module_objects=globals(), 

2879 custom_objects=custom_objects, 

2880 printable_module_name="loss function", 

2881 ) 

2882 return deserialize_keras_object( 

2883 name, 

2884 module_objects=globals(), 

2885 custom_objects=custom_objects, 

2886 printable_module_name="loss function", 

2887 ) 

2888 

2889 

2890@keras_export("keras.losses.get") 

2891def get(identifier): 

2892 """Retrieves a Keras loss as a `function`/`Loss` class instance. 

2893 

2894 The `identifier` may be the string name of a loss function or `Loss` class. 

2895 

2896 >>> loss = tf.keras.losses.get("categorical_crossentropy") 

2897 >>> type(loss) 

2898 <class 'function'> 

2899 >>> loss = tf.keras.losses.get("CategoricalCrossentropy") 

2900 >>> type(loss) 

2901 <class '...keras.losses.CategoricalCrossentropy'> 

2902 

2903 You can also specify `config` of the loss to this function by passing dict 

2904 containing `class_name` and `config` as an identifier. Also note that the 

2905 `class_name` must map to a `Loss` class 

2906 

2907 >>> identifier = {"class_name": "CategoricalCrossentropy", 

2908 ... "config": {"from_logits": True}} 

2909 >>> loss = tf.keras.losses.get(identifier) 

2910 >>> type(loss) 

2911 <class '...keras.losses.CategoricalCrossentropy'> 

2912 

2913 Args: 

2914 identifier: A loss identifier. One of None or string name of a loss 

2915 function/class or loss configuration dictionary or a loss function or a 

2916 loss class instance. 

2917 

2918 Returns: 

2919 A Keras loss as a `function`/ `Loss` class instance. 

2920 

2921 Raises: 

2922 ValueError: If `identifier` cannot be interpreted. 

2923 """ 

2924 if identifier is None: 

2925 return None 

2926 if isinstance(identifier, str): 

2927 identifier = str(identifier) 

2928 use_legacy_format = "module" not in identifier 

2929 return deserialize(identifier, use_legacy_format=use_legacy_format) 

2930 if isinstance(identifier, dict): 

2931 return deserialize(identifier) 

2932 if callable(identifier): 

2933 return identifier 

2934 raise ValueError( 

2935 f"Could not interpret loss function identifier: {identifier}" 

2936 ) 

2937 

2938 

2939LABEL_DTYPES_FOR_LOSSES = { 

2940 tf.compat.v1.losses.sparse_softmax_cross_entropy: "int32", 

2941 sparse_categorical_crossentropy: "int32", 

2942} 

2943