Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/keras/src/utils/losses_utils.py: 18%
153 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
1# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
16"""Utilities related to loss functions."""
18import tensorflow.compat.v2 as tf
20from keras.src import backend
21from keras.src.engine import keras_tensor
22from keras.src.utils import tf_utils
24# isort: off
25from tensorflow.python.util.tf_export import keras_export
28@keras_export("keras.losses.Reduction", v1=[])
29class ReductionV2:
30 """Types of loss reduction.
32 Contains the following values:
34 * `AUTO`: Indicates that the reduction option will be determined by the
35 usage context. For almost all cases this defaults to
36 `SUM_OVER_BATCH_SIZE`. When used with `tf.distribute.Strategy`, outside of
37 built-in training loops such as `tf.keras` `compile` and `fit`, we expect
38 reduction value to be `SUM` or `NONE`. Using `AUTO` in that case will
39 raise an error.
40 * `NONE`: No **additional** reduction is applied to the output of the
41 wrapped loss function. When non-scalar losses are returned to Keras
42 functions like `fit`/`evaluate`, the unreduced vector loss is passed to
43 the optimizer but the reported loss will be a scalar value.
45 Caution: **Verify the shape of the outputs when using** `Reduction.NONE`.
46 The builtin loss functions wrapped by the loss classes reduce one
47 dimension (`axis=-1`, or `axis` if specified by loss function).
48 `Reduction.NONE` just means that no **additional** reduction is applied
49 by the class wrapper. For categorical losses with an example input shape
50 of `[batch, W, H, n_classes]` the `n_classes` dimension is reduced. For
51 pointwise losses you must include a dummy axis so that `[batch, W, H, 1]`
52 is reduced to `[batch, W, H]`. Without the dummy axis `[batch, W, H]`
53 will be incorrectly reduced to `[batch, W]`.
55 * `SUM`: Scalar sum of weighted losses.
56 * `SUM_OVER_BATCH_SIZE`: Scalar `SUM` divided by number of elements in
57 losses. This reduction type is not supported when used with
58 `tf.distribute.Strategy` outside of built-in training loops like
59 `tf.keras` `compile`/`fit`.
61 You can implement 'SUM_OVER_BATCH_SIZE' using global batch size like:
62 ```
63 with strategy.scope():
64 loss_obj = tf.keras.losses.CategoricalCrossentropy(
65 reduction=tf.keras.losses.Reduction.NONE)
66 ....
67 loss = tf.reduce_sum(loss_obj(labels, predictions)) *
68 (1. / global_batch_size)
69 ```
71 Please see the [custom training guide](
72 https://www.tensorflow.org/tutorials/distribute/custom_training) for more
73 details on this.
74 """
76 AUTO = "auto"
77 NONE = "none"
78 SUM = "sum"
79 SUM_OVER_BATCH_SIZE = "sum_over_batch_size"
81 @classmethod
82 def all(cls):
83 return (cls.AUTO, cls.NONE, cls.SUM, cls.SUM_OVER_BATCH_SIZE)
85 @classmethod
86 def validate(cls, key):
87 if key not in cls.all():
88 raise ValueError(
89 f'Invalid Reduction Key: {key}. Expected keys are "{cls.all()}"'
90 )
93def remove_squeezable_dimensions(
94 labels, predictions, expected_rank_diff=0, name=None
95):
96 """Squeeze last dim if ranks differ from expected by exactly 1.
98 In the common case where we expect shapes to match, `expected_rank_diff`
99 defaults to 0, and we squeeze the last dimension of the larger rank if they
100 differ by 1.
102 But, for example, if `labels` contains class IDs and `predictions` contains
103 1 probability per class, we expect `predictions` to have 1 more dimension
104 than `labels`, so `expected_rank_diff` would be 1. In this case, we'd
105 squeeze `labels` if `rank(predictions) - rank(labels) == 0`, and
106 `predictions` if `rank(predictions) - rank(labels) == 2`.
108 This will use static shape if available. Otherwise, it will add graph
109 operations, which could result in a performance hit.
111 Args:
112 labels: Label values, a `Tensor` whose dimensions match `predictions`.
113 predictions: Predicted values, a `Tensor` of arbitrary dimensions.
114 expected_rank_diff: Expected result of `rank(predictions) - rank(labels)`.
115 name: Name of the op.
117 Returns:
118 Tuple of `labels` and `predictions`, possibly with last dim squeezed.
119 """
120 with backend.name_scope(name or "remove_squeezable_dimensions"):
121 if not tf_utils.is_tensor_or_extension_type(predictions):
122 predictions = tf.convert_to_tensor(predictions)
123 if not tf_utils.is_tensor_or_extension_type(labels):
124 labels = tf.convert_to_tensor(labels)
125 predictions_shape = predictions.shape
126 predictions_rank = predictions_shape.ndims
127 labels_shape = labels.shape
128 labels_rank = labels_shape.ndims
129 if (labels_rank is not None) and (predictions_rank is not None):
130 # Use static rank.
131 rank_diff = predictions_rank - labels_rank
132 if rank_diff == expected_rank_diff + 1 and predictions_shape.dims[
133 -1
134 ].is_compatible_with(1):
135 predictions = tf.squeeze(predictions, [-1])
136 elif rank_diff == expected_rank_diff - 1 and labels_shape.dims[
137 -1
138 ].is_compatible_with(1):
139 labels = tf.squeeze(labels, [-1])
140 return labels, predictions
142 # Use dynamic rank.
143 rank_diff = tf.rank(predictions) - tf.rank(labels)
144 if (predictions_rank is None) or (
145 predictions_shape.dims[-1].is_compatible_with(1)
146 ):
147 predictions = tf.cond(
148 tf.equal(expected_rank_diff + 1, rank_diff),
149 lambda: tf.squeeze(predictions, [-1]),
150 lambda: predictions,
151 )
152 if (labels_rank is None) or (
153 labels_shape.dims[-1].is_compatible_with(1)
154 ):
155 labels = tf.cond(
156 tf.equal(expected_rank_diff - 1, rank_diff),
157 lambda: tf.squeeze(labels, [-1]),
158 lambda: labels,
159 )
160 return labels, predictions
163def squeeze_or_expand_dimensions(y_pred, y_true=None, sample_weight=None):
164 """Squeeze or expand last dimension if needed.
166 1. Squeezes last dim of `y_pred` or `y_true` if their rank differs by 1
167 (using `remove_squeezable_dimensions`).
168 2. Squeezes or expands last dim of `sample_weight` if its rank differs by 1
169 from the new rank of `y_pred`.
170 If `sample_weight` is scalar, it is kept scalar.
172 This will use static shape if available. Otherwise, it will add graph
173 operations, which could result in a performance hit.
175 Args:
176 y_pred: Predicted values, a `Tensor` of arbitrary dimensions.
177 y_true: Optional label `Tensor` whose dimensions match `y_pred`.
178 sample_weight: Optional weight scalar or `Tensor` whose dimensions match
179 `y_pred`.
181 Returns:
182 Tuple of `y_pred`, `y_true` and `sample_weight`. Each of them possibly has
183 the last dimension squeezed,
184 `sample_weight` could be extended by one dimension.
185 If `sample_weight` is None, (y_pred, y_true) is returned.
186 """
187 y_pred_shape = y_pred.shape
188 y_pred_rank = y_pred_shape.ndims
189 if y_true is not None:
191 # If sparse matrix is provided as `y_true`, the last dimension in
192 # `y_pred` may be > 1. Eg: y_true = [0, 1, 2] (shape=(3,)), y_pred =
193 # [[.9, .05, .05], [.5, .89, .6], [.05, .01, .94]] (shape=(3, 3)) In
194 # this case, we should not try to remove squeezable dimension.
195 y_true_shape = y_true.shape
196 y_true_rank = y_true_shape.ndims
197 if (y_true_rank is not None) and (y_pred_rank is not None):
198 # Use static rank for `y_true` and `y_pred`.
199 if (y_pred_rank - y_true_rank != 1) or y_pred_shape[-1] == 1:
200 y_true, y_pred = remove_squeezable_dimensions(y_true, y_pred)
201 else:
202 # Use dynamic rank.
203 rank_diff = tf.rank(y_pred) - tf.rank(y_true)
204 squeeze_dims = lambda: remove_squeezable_dimensions(y_true, y_pred)
205 is_last_dim_1 = tf.equal(1, tf.shape(y_pred)[-1])
206 maybe_squeeze_dims = lambda: tf.cond(
207 is_last_dim_1, squeeze_dims, lambda: (y_true, y_pred)
208 )
209 y_true, y_pred = tf.cond(
210 tf.equal(1, rank_diff), maybe_squeeze_dims, squeeze_dims
211 )
213 if sample_weight is None:
214 return y_pred, y_true
216 weights_shape = sample_weight.shape
217 weights_rank = weights_shape.ndims
218 if weights_rank == 0: # If weights is scalar, do nothing.
219 return y_pred, y_true, sample_weight
221 if (y_pred_rank is not None) and (weights_rank is not None):
222 # Use static rank.
223 if weights_rank - y_pred_rank == 1:
224 sample_weight = tf.squeeze(sample_weight, [-1])
225 elif y_pred_rank - weights_rank == 1:
226 sample_weight = tf.expand_dims(sample_weight, [-1])
227 return y_pred, y_true, sample_weight
229 # Use dynamic rank.
230 weights_rank_tensor = tf.rank(sample_weight)
231 rank_diff = weights_rank_tensor - tf.rank(y_pred)
232 maybe_squeeze_weights = lambda: tf.squeeze(sample_weight, [-1])
234 def _maybe_expand_weights():
235 expand_weights = lambda: tf.expand_dims(sample_weight, [-1])
236 return tf.cond(
237 tf.equal(rank_diff, -1), expand_weights, lambda: sample_weight
238 )
240 def _maybe_adjust_weights():
241 return tf.cond(
242 tf.equal(rank_diff, 1), maybe_squeeze_weights, _maybe_expand_weights
243 )
245 # squeeze or expand last dim of `sample_weight` if its rank differs by 1
246 # from the new rank of `y_pred`.
247 sample_weight = tf.cond(
248 tf.equal(weights_rank_tensor, 0),
249 lambda: sample_weight,
250 _maybe_adjust_weights,
251 )
252 return y_pred, y_true, sample_weight
255def _safe_mean(losses, num_present):
256 """Computes a safe mean of the losses.
258 Args:
259 losses: `Tensor` whose elements contain individual loss measurements.
260 num_present: The number of measurable elements in `losses`.
262 Returns:
263 A scalar representing the mean of `losses`. If `num_present` is zero,
264 then zero is returned.
265 """
266 total_loss = tf.reduce_sum(losses)
267 return tf.math.divide_no_nan(total_loss, num_present, name="value")
270def _num_elements(losses):
271 """Computes the number of elements in `losses` tensor."""
272 with backend.name_scope("num_elements") as scope:
273 return tf.cast(tf.size(losses, name=scope), dtype=losses.dtype)
276def reduce_weighted_loss(
277 weighted_losses, reduction=ReductionV2.SUM_OVER_BATCH_SIZE
278):
279 """Reduces the individual weighted loss measurements."""
280 if reduction == ReductionV2.NONE:
281 loss = weighted_losses
282 else:
283 loss = tf.reduce_sum(weighted_losses)
284 if reduction == ReductionV2.SUM_OVER_BATCH_SIZE:
285 loss = _safe_mean(loss, _num_elements(weighted_losses))
286 return loss
289@keras_export("keras.__internal__.losses.compute_weighted_loss", v1=[])
290def compute_weighted_loss(
291 losses,
292 sample_weight=None,
293 reduction=ReductionV2.SUM_OVER_BATCH_SIZE,
294 name=None,
295):
296 """Computes the weighted loss.
298 Args:
299 losses: `Tensor` of shape `[batch_size, d1, ... dN]`.
300 sample_weight: Optional `Tensor` whose rank is either 0, or the same rank
301 as `losses`, or be broadcastable to `losses`.
302 reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to
303 loss. Default value is `SUM_OVER_BATCH_SIZE`.
304 name: Optional name for the op.
306 Raises:
307 ValueError: If the shape of `sample_weight` is not compatible with
308 `losses`.
310 Returns:
311 Weighted loss `Tensor` of the same type as `losses`. If `reduction` is
312 `NONE`, this has the same shape as `losses`; otherwise, it is scalar.
313 """
314 ReductionV2.validate(reduction)
316 # If this function is called directly, then we just default 'AUTO' to
317 # 'SUM_OVER_BATCH_SIZE'. Eg. Canned estimator use cases.
318 if reduction == ReductionV2.AUTO:
319 reduction = ReductionV2.SUM_OVER_BATCH_SIZE
320 if sample_weight is None:
321 sample_weight = 1.0
322 with backend.name_scope(name or "weighted_loss"):
323 # Save the `reduction` argument for loss normalization when distributing
324 # to multiple replicas. Used only for estimator + v1 optimizer flow.
325 tf.compat.v1.get_default_graph()._last_loss_reduction = reduction
327 if not isinstance(losses, (keras_tensor.KerasTensor, tf.RaggedTensor)):
328 losses = tf.convert_to_tensor(losses)
330 if not isinstance(
331 sample_weight, (keras_tensor.KerasTensor, tf.RaggedTensor)
332 ):
333 sample_weight = tf.convert_to_tensor(sample_weight)
335 # Convert any non float dtypes to floats, to avoid it loss any precision
336 # for dtype like int or bool.
337 if not losses.dtype.is_floating:
338 input_dtype = losses.dtype
339 losses = tf.cast(losses, "float32")
340 input_casted = True
341 else:
342 input_casted = False
343 sample_weight = tf.cast(sample_weight, losses.dtype)
344 # Update dimensions of `sample_weight` to match with `losses` if
345 # possible.
346 (
347 losses,
348 _,
349 sample_weight,
350 ) = squeeze_or_expand_dimensions(losses, None, sample_weight)
351 weighted_losses = tf.multiply(losses, sample_weight)
353 # Apply reduction function to the individual weighted losses.
354 loss = reduce_weighted_loss(weighted_losses, reduction)
355 if input_casted:
356 # Convert the result back to the input type.
357 loss = tf.cast(loss, input_dtype)
358 return loss
361def scale_loss_for_distribution(loss_value):
362 """Scales and returns the given loss value by the number of replicas."""
363 num_replicas = tf.distribute.get_strategy().num_replicas_in_sync
364 if num_replicas > 1:
365 loss_value *= 1.0 / num_replicas
366 return loss_value
369def cast_losses_to_common_dtype(losses):
370 """Cast a list of losses to a common dtype.
372 If any loss is floating-point, they will all be casted to the most-precise
373 floating-point loss. Otherwise the losses are not casted. We also skip
374 casting losses if there are any complex losses.
376 Args:
377 losses: A list of losses.
379 Returns:
380 `losses`, but they have been casted to a common dtype.
381 """
382 highest_float = None
383 for loss in losses:
384 if loss.dtype.is_floating:
385 if highest_float is None or loss.dtype.size > highest_float.size:
386 highest_float = loss.dtype
387 elif {loss.dtype, highest_float} == {"bfloat16", "float16"}:
388 highest_float = "float32"
389 if loss.dtype.is_complex:
390 return (
391 losses # If we find any complex losses, do not cast any losses
392 )
393 if highest_float:
394 losses = [tf.cast(loss, highest_float) for loss in losses]
395 return losses
398def get_mask(y_p):
399 """Returns Keras mask from tensor."""
400 return getattr(y_p, "_keras_mask", None)
403def apply_mask(y_p, sw, mask):
404 """Applies any mask on predictions to sample weights."""
405 if mask is not None:
406 mask = tf.cast(mask, y_p.dtype)
407 if sw is not None:
408 sw = tf.cast(sw, mask.dtype)
409 mask, _, sw = squeeze_or_expand_dimensions(mask, sample_weight=sw)
410 sw *= mask
411 else:
412 sw = mask
413 return sw
416def apply_valid_mask(losses, sw, mask, reduction):
417 """Redistribute sample weights considering only valid entries."""
418 if mask is not None:
419 mask = tf.cast(mask, losses.dtype)
421 if reduction in (ReductionV2.AUTO, ReductionV2.SUM_OVER_BATCH_SIZE):
422 # Valid entries have weight `total/valid`, while invalid ones
423 # have 0. When summed over batch, they will be reduced to:
424 #
425 # mean(loss * sample_weight * total / valid)
426 # = sum(loss * sample_weight * total / valid) / total
427 # = sum(loss * sample_weight) / total * total / valid
428 # = sum(loss * sample_weight) / valid
430 total = tf.cast(tf.size(mask), losses.dtype)
431 valid = tf.reduce_sum(mask)
432 mask *= total / valid
434 return apply_mask(losses, sw, mask)