Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/keras/src/utils/losses_utils.py: 18%

153 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-03 07:57 +0000

1# Copyright 2018 The TensorFlow Authors. All Rights Reserved. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14# ============================================================================== 

15 

16"""Utilities related to loss functions.""" 

17 

18import tensorflow.compat.v2 as tf 

19 

20from keras.src import backend 

21from keras.src.engine import keras_tensor 

22from keras.src.utils import tf_utils 

23 

24# isort: off 

25from tensorflow.python.util.tf_export import keras_export 

26 

27 

28@keras_export("keras.losses.Reduction", v1=[]) 

29class ReductionV2: 

30 """Types of loss reduction. 

31 

32 Contains the following values: 

33 

34 * `AUTO`: Indicates that the reduction option will be determined by the 

35 usage context. For almost all cases this defaults to 

36 `SUM_OVER_BATCH_SIZE`. When used with `tf.distribute.Strategy`, outside of 

37 built-in training loops such as `tf.keras` `compile` and `fit`, we expect 

38 reduction value to be `SUM` or `NONE`. Using `AUTO` in that case will 

39 raise an error. 

40 * `NONE`: No **additional** reduction is applied to the output of the 

41 wrapped loss function. When non-scalar losses are returned to Keras 

42 functions like `fit`/`evaluate`, the unreduced vector loss is passed to 

43 the optimizer but the reported loss will be a scalar value. 

44 

45 Caution: **Verify the shape of the outputs when using** `Reduction.NONE`. 

46 The builtin loss functions wrapped by the loss classes reduce one 

47 dimension (`axis=-1`, or `axis` if specified by loss function). 

48 `Reduction.NONE` just means that no **additional** reduction is applied 

49 by the class wrapper. For categorical losses with an example input shape 

50 of `[batch, W, H, n_classes]` the `n_classes` dimension is reduced. For 

51 pointwise losses you must include a dummy axis so that `[batch, W, H, 1]` 

52 is reduced to `[batch, W, H]`. Without the dummy axis `[batch, W, H]` 

53 will be incorrectly reduced to `[batch, W]`. 

54 

55 * `SUM`: Scalar sum of weighted losses. 

56 * `SUM_OVER_BATCH_SIZE`: Scalar `SUM` divided by number of elements in 

57 losses. This reduction type is not supported when used with 

58 `tf.distribute.Strategy` outside of built-in training loops like 

59 `tf.keras` `compile`/`fit`. 

60 

61 You can implement 'SUM_OVER_BATCH_SIZE' using global batch size like: 

62 ``` 

63 with strategy.scope(): 

64 loss_obj = tf.keras.losses.CategoricalCrossentropy( 

65 reduction=tf.keras.losses.Reduction.NONE) 

66 .... 

67 loss = tf.reduce_sum(loss_obj(labels, predictions)) * 

68 (1. / global_batch_size) 

69 ``` 

70 

71 Please see the [custom training guide]( 

72 https://www.tensorflow.org/tutorials/distribute/custom_training) for more 

73 details on this. 

74 """ 

75 

76 AUTO = "auto" 

77 NONE = "none" 

78 SUM = "sum" 

79 SUM_OVER_BATCH_SIZE = "sum_over_batch_size" 

80 

81 @classmethod 

82 def all(cls): 

83 return (cls.AUTO, cls.NONE, cls.SUM, cls.SUM_OVER_BATCH_SIZE) 

84 

85 @classmethod 

86 def validate(cls, key): 

87 if key not in cls.all(): 

88 raise ValueError( 

89 f'Invalid Reduction Key: {key}. Expected keys are "{cls.all()}"' 

90 ) 

91 

92 

93def remove_squeezable_dimensions( 

94 labels, predictions, expected_rank_diff=0, name=None 

95): 

96 """Squeeze last dim if ranks differ from expected by exactly 1. 

97 

98 In the common case where we expect shapes to match, `expected_rank_diff` 

99 defaults to 0, and we squeeze the last dimension of the larger rank if they 

100 differ by 1. 

101 

102 But, for example, if `labels` contains class IDs and `predictions` contains 

103 1 probability per class, we expect `predictions` to have 1 more dimension 

104 than `labels`, so `expected_rank_diff` would be 1. In this case, we'd 

105 squeeze `labels` if `rank(predictions) - rank(labels) == 0`, and 

106 `predictions` if `rank(predictions) - rank(labels) == 2`. 

107 

108 This will use static shape if available. Otherwise, it will add graph 

109 operations, which could result in a performance hit. 

110 

111 Args: 

112 labels: Label values, a `Tensor` whose dimensions match `predictions`. 

113 predictions: Predicted values, a `Tensor` of arbitrary dimensions. 

114 expected_rank_diff: Expected result of `rank(predictions) - rank(labels)`. 

115 name: Name of the op. 

116 

117 Returns: 

118 Tuple of `labels` and `predictions`, possibly with last dim squeezed. 

119 """ 

120 with backend.name_scope(name or "remove_squeezable_dimensions"): 

121 if not tf_utils.is_tensor_or_extension_type(predictions): 

122 predictions = tf.convert_to_tensor(predictions) 

123 if not tf_utils.is_tensor_or_extension_type(labels): 

124 labels = tf.convert_to_tensor(labels) 

125 predictions_shape = predictions.shape 

126 predictions_rank = predictions_shape.ndims 

127 labels_shape = labels.shape 

128 labels_rank = labels_shape.ndims 

129 if (labels_rank is not None) and (predictions_rank is not None): 

130 # Use static rank. 

131 rank_diff = predictions_rank - labels_rank 

132 if rank_diff == expected_rank_diff + 1 and predictions_shape.dims[ 

133 -1 

134 ].is_compatible_with(1): 

135 predictions = tf.squeeze(predictions, [-1]) 

136 elif rank_diff == expected_rank_diff - 1 and labels_shape.dims[ 

137 -1 

138 ].is_compatible_with(1): 

139 labels = tf.squeeze(labels, [-1]) 

140 return labels, predictions 

141 

142 # Use dynamic rank. 

143 rank_diff = tf.rank(predictions) - tf.rank(labels) 

144 if (predictions_rank is None) or ( 

145 predictions_shape.dims[-1].is_compatible_with(1) 

146 ): 

147 predictions = tf.cond( 

148 tf.equal(expected_rank_diff + 1, rank_diff), 

149 lambda: tf.squeeze(predictions, [-1]), 

150 lambda: predictions, 

151 ) 

152 if (labels_rank is None) or ( 

153 labels_shape.dims[-1].is_compatible_with(1) 

154 ): 

155 labels = tf.cond( 

156 tf.equal(expected_rank_diff - 1, rank_diff), 

157 lambda: tf.squeeze(labels, [-1]), 

158 lambda: labels, 

159 ) 

160 return labels, predictions 

161 

162 

163def squeeze_or_expand_dimensions(y_pred, y_true=None, sample_weight=None): 

164 """Squeeze or expand last dimension if needed. 

165 

166 1. Squeezes last dim of `y_pred` or `y_true` if their rank differs by 1 

167 (using `remove_squeezable_dimensions`). 

168 2. Squeezes or expands last dim of `sample_weight` if its rank differs by 1 

169 from the new rank of `y_pred`. 

170 If `sample_weight` is scalar, it is kept scalar. 

171 

172 This will use static shape if available. Otherwise, it will add graph 

173 operations, which could result in a performance hit. 

174 

175 Args: 

176 y_pred: Predicted values, a `Tensor` of arbitrary dimensions. 

177 y_true: Optional label `Tensor` whose dimensions match `y_pred`. 

178 sample_weight: Optional weight scalar or `Tensor` whose dimensions match 

179 `y_pred`. 

180 

181 Returns: 

182 Tuple of `y_pred`, `y_true` and `sample_weight`. Each of them possibly has 

183 the last dimension squeezed, 

184 `sample_weight` could be extended by one dimension. 

185 If `sample_weight` is None, (y_pred, y_true) is returned. 

186 """ 

187 y_pred_shape = y_pred.shape 

188 y_pred_rank = y_pred_shape.ndims 

189 if y_true is not None: 

190 

191 # If sparse matrix is provided as `y_true`, the last dimension in 

192 # `y_pred` may be > 1. Eg: y_true = [0, 1, 2] (shape=(3,)), y_pred = 

193 # [[.9, .05, .05], [.5, .89, .6], [.05, .01, .94]] (shape=(3, 3)) In 

194 # this case, we should not try to remove squeezable dimension. 

195 y_true_shape = y_true.shape 

196 y_true_rank = y_true_shape.ndims 

197 if (y_true_rank is not None) and (y_pred_rank is not None): 

198 # Use static rank for `y_true` and `y_pred`. 

199 if (y_pred_rank - y_true_rank != 1) or y_pred_shape[-1] == 1: 

200 y_true, y_pred = remove_squeezable_dimensions(y_true, y_pred) 

201 else: 

202 # Use dynamic rank. 

203 rank_diff = tf.rank(y_pred) - tf.rank(y_true) 

204 squeeze_dims = lambda: remove_squeezable_dimensions(y_true, y_pred) 

205 is_last_dim_1 = tf.equal(1, tf.shape(y_pred)[-1]) 

206 maybe_squeeze_dims = lambda: tf.cond( 

207 is_last_dim_1, squeeze_dims, lambda: (y_true, y_pred) 

208 ) 

209 y_true, y_pred = tf.cond( 

210 tf.equal(1, rank_diff), maybe_squeeze_dims, squeeze_dims 

211 ) 

212 

213 if sample_weight is None: 

214 return y_pred, y_true 

215 

216 weights_shape = sample_weight.shape 

217 weights_rank = weights_shape.ndims 

218 if weights_rank == 0: # If weights is scalar, do nothing. 

219 return y_pred, y_true, sample_weight 

220 

221 if (y_pred_rank is not None) and (weights_rank is not None): 

222 # Use static rank. 

223 if weights_rank - y_pred_rank == 1: 

224 sample_weight = tf.squeeze(sample_weight, [-1]) 

225 elif y_pred_rank - weights_rank == 1: 

226 sample_weight = tf.expand_dims(sample_weight, [-1]) 

227 return y_pred, y_true, sample_weight 

228 

229 # Use dynamic rank. 

230 weights_rank_tensor = tf.rank(sample_weight) 

231 rank_diff = weights_rank_tensor - tf.rank(y_pred) 

232 maybe_squeeze_weights = lambda: tf.squeeze(sample_weight, [-1]) 

233 

234 def _maybe_expand_weights(): 

235 expand_weights = lambda: tf.expand_dims(sample_weight, [-1]) 

236 return tf.cond( 

237 tf.equal(rank_diff, -1), expand_weights, lambda: sample_weight 

238 ) 

239 

240 def _maybe_adjust_weights(): 

241 return tf.cond( 

242 tf.equal(rank_diff, 1), maybe_squeeze_weights, _maybe_expand_weights 

243 ) 

244 

245 # squeeze or expand last dim of `sample_weight` if its rank differs by 1 

246 # from the new rank of `y_pred`. 

247 sample_weight = tf.cond( 

248 tf.equal(weights_rank_tensor, 0), 

249 lambda: sample_weight, 

250 _maybe_adjust_weights, 

251 ) 

252 return y_pred, y_true, sample_weight 

253 

254 

255def _safe_mean(losses, num_present): 

256 """Computes a safe mean of the losses. 

257 

258 Args: 

259 losses: `Tensor` whose elements contain individual loss measurements. 

260 num_present: The number of measurable elements in `losses`. 

261 

262 Returns: 

263 A scalar representing the mean of `losses`. If `num_present` is zero, 

264 then zero is returned. 

265 """ 

266 total_loss = tf.reduce_sum(losses) 

267 return tf.math.divide_no_nan(total_loss, num_present, name="value") 

268 

269 

270def _num_elements(losses): 

271 """Computes the number of elements in `losses` tensor.""" 

272 with backend.name_scope("num_elements") as scope: 

273 return tf.cast(tf.size(losses, name=scope), dtype=losses.dtype) 

274 

275 

276def reduce_weighted_loss( 

277 weighted_losses, reduction=ReductionV2.SUM_OVER_BATCH_SIZE 

278): 

279 """Reduces the individual weighted loss measurements.""" 

280 if reduction == ReductionV2.NONE: 

281 loss = weighted_losses 

282 else: 

283 loss = tf.reduce_sum(weighted_losses) 

284 if reduction == ReductionV2.SUM_OVER_BATCH_SIZE: 

285 loss = _safe_mean(loss, _num_elements(weighted_losses)) 

286 return loss 

287 

288 

289@keras_export("keras.__internal__.losses.compute_weighted_loss", v1=[]) 

290def compute_weighted_loss( 

291 losses, 

292 sample_weight=None, 

293 reduction=ReductionV2.SUM_OVER_BATCH_SIZE, 

294 name=None, 

295): 

296 """Computes the weighted loss. 

297 

298 Args: 

299 losses: `Tensor` of shape `[batch_size, d1, ... dN]`. 

300 sample_weight: Optional `Tensor` whose rank is either 0, or the same rank 

301 as `losses`, or be broadcastable to `losses`. 

302 reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to 

303 loss. Default value is `SUM_OVER_BATCH_SIZE`. 

304 name: Optional name for the op. 

305 

306 Raises: 

307 ValueError: If the shape of `sample_weight` is not compatible with 

308 `losses`. 

309 

310 Returns: 

311 Weighted loss `Tensor` of the same type as `losses`. If `reduction` is 

312 `NONE`, this has the same shape as `losses`; otherwise, it is scalar. 

313 """ 

314 ReductionV2.validate(reduction) 

315 

316 # If this function is called directly, then we just default 'AUTO' to 

317 # 'SUM_OVER_BATCH_SIZE'. Eg. Canned estimator use cases. 

318 if reduction == ReductionV2.AUTO: 

319 reduction = ReductionV2.SUM_OVER_BATCH_SIZE 

320 if sample_weight is None: 

321 sample_weight = 1.0 

322 with backend.name_scope(name or "weighted_loss"): 

323 # Save the `reduction` argument for loss normalization when distributing 

324 # to multiple replicas. Used only for estimator + v1 optimizer flow. 

325 tf.compat.v1.get_default_graph()._last_loss_reduction = reduction 

326 

327 if not isinstance(losses, (keras_tensor.KerasTensor, tf.RaggedTensor)): 

328 losses = tf.convert_to_tensor(losses) 

329 

330 if not isinstance( 

331 sample_weight, (keras_tensor.KerasTensor, tf.RaggedTensor) 

332 ): 

333 sample_weight = tf.convert_to_tensor(sample_weight) 

334 

335 # Convert any non float dtypes to floats, to avoid it loss any precision 

336 # for dtype like int or bool. 

337 if not losses.dtype.is_floating: 

338 input_dtype = losses.dtype 

339 losses = tf.cast(losses, "float32") 

340 input_casted = True 

341 else: 

342 input_casted = False 

343 sample_weight = tf.cast(sample_weight, losses.dtype) 

344 # Update dimensions of `sample_weight` to match with `losses` if 

345 # possible. 

346 ( 

347 losses, 

348 _, 

349 sample_weight, 

350 ) = squeeze_or_expand_dimensions(losses, None, sample_weight) 

351 weighted_losses = tf.multiply(losses, sample_weight) 

352 

353 # Apply reduction function to the individual weighted losses. 

354 loss = reduce_weighted_loss(weighted_losses, reduction) 

355 if input_casted: 

356 # Convert the result back to the input type. 

357 loss = tf.cast(loss, input_dtype) 

358 return loss 

359 

360 

361def scale_loss_for_distribution(loss_value): 

362 """Scales and returns the given loss value by the number of replicas.""" 

363 num_replicas = tf.distribute.get_strategy().num_replicas_in_sync 

364 if num_replicas > 1: 

365 loss_value *= 1.0 / num_replicas 

366 return loss_value 

367 

368 

369def cast_losses_to_common_dtype(losses): 

370 """Cast a list of losses to a common dtype. 

371 

372 If any loss is floating-point, they will all be casted to the most-precise 

373 floating-point loss. Otherwise the losses are not casted. We also skip 

374 casting losses if there are any complex losses. 

375 

376 Args: 

377 losses: A list of losses. 

378 

379 Returns: 

380 `losses`, but they have been casted to a common dtype. 

381 """ 

382 highest_float = None 

383 for loss in losses: 

384 if loss.dtype.is_floating: 

385 if highest_float is None or loss.dtype.size > highest_float.size: 

386 highest_float = loss.dtype 

387 elif {loss.dtype, highest_float} == {"bfloat16", "float16"}: 

388 highest_float = "float32" 

389 if loss.dtype.is_complex: 

390 return ( 

391 losses # If we find any complex losses, do not cast any losses 

392 ) 

393 if highest_float: 

394 losses = [tf.cast(loss, highest_float) for loss in losses] 

395 return losses 

396 

397 

398def get_mask(y_p): 

399 """Returns Keras mask from tensor.""" 

400 return getattr(y_p, "_keras_mask", None) 

401 

402 

403def apply_mask(y_p, sw, mask): 

404 """Applies any mask on predictions to sample weights.""" 

405 if mask is not None: 

406 mask = tf.cast(mask, y_p.dtype) 

407 if sw is not None: 

408 sw = tf.cast(sw, mask.dtype) 

409 mask, _, sw = squeeze_or_expand_dimensions(mask, sample_weight=sw) 

410 sw *= mask 

411 else: 

412 sw = mask 

413 return sw 

414 

415 

416def apply_valid_mask(losses, sw, mask, reduction): 

417 """Redistribute sample weights considering only valid entries.""" 

418 if mask is not None: 

419 mask = tf.cast(mask, losses.dtype) 

420 

421 if reduction in (ReductionV2.AUTO, ReductionV2.SUM_OVER_BATCH_SIZE): 

422 # Valid entries have weight `total/valid`, while invalid ones 

423 # have 0. When summed over batch, they will be reduced to: 

424 # 

425 # mean(loss * sample_weight * total / valid) 

426 # = sum(loss * sample_weight * total / valid) / total 

427 # = sum(loss * sample_weight) / total * total / valid 

428 # = sum(loss * sample_weight) / valid 

429 

430 total = tf.cast(tf.size(mask), losses.dtype) 

431 valid = tf.reduce_sum(mask) 

432 mask *= total / valid 

433 

434 return apply_mask(losses, sw, mask) 

435