Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/keras/src/optimizers/utils.py: 13%
69 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
1# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Optimizer utilities."""
17import tensorflow.compat.v2 as tf
19# isort: off
20from tensorflow.python.platform import tf_logging as logging
23def all_reduce_sum_gradients(grads_and_vars):
24 """Returns all-reduced gradients aggregated via summation.
26 Args:
27 grads_and_vars: List of (gradient, variable) pairs.
29 Returns:
30 List of (gradient, variable) pairs where gradients have been all-reduced.
31 """
32 grads_and_vars = list(grads_and_vars)
33 filtered_grads_and_vars = filter_empty_gradients(grads_and_vars)
34 if filtered_grads_and_vars:
35 if tf.__internal__.distribute.strategy_supports_no_merge_call():
36 grads = [pair[0] for pair in filtered_grads_and_vars]
37 reduced = tf.distribute.get_replica_context().all_reduce(
38 tf.distribute.ReduceOp.SUM, grads
39 )
40 else:
41 # TODO(b/183257003): Remove this branch
42 reduced = tf.distribute.get_replica_context().merge_call(
43 _all_reduce_sum_fn, args=(filtered_grads_and_vars,)
44 )
45 else:
46 reduced = []
47 # Copy 'reduced' but add None gradients back in
48 reduced_with_nones = []
49 reduced_pos = 0
50 for g, v in grads_and_vars:
51 if g is None:
52 reduced_with_nones.append((None, v))
53 else:
54 reduced_with_nones.append((reduced[reduced_pos], v))
55 reduced_pos += 1
56 assert reduced_pos == len(reduced), "Failed to add all gradients"
57 return reduced_with_nones
60def filter_empty_gradients(grads_and_vars):
61 """Filter out `(grad, var)` pairs that have a gradient equal to `None`."""
62 grads_and_vars = tuple(grads_and_vars)
63 if not grads_and_vars:
64 return grads_and_vars
66 filtered = []
67 vars_with_empty_grads = []
68 for grad, var in grads_and_vars:
69 if grad is None:
70 vars_with_empty_grads.append(var)
71 else:
72 filtered.append((grad, var))
73 filtered = tuple(filtered)
75 if not filtered:
76 variable = ([v.name for _, v in grads_and_vars],)
77 raise ValueError(
78 f"No gradients provided for any variable: {variable}. "
79 f"Provided `grads_and_vars` is {grads_and_vars}."
80 )
81 if vars_with_empty_grads:
82 logging.warning(
83 "Gradients do not exist for variables %s when minimizing the "
84 "loss. If you're using `model.compile()`, did you forget to "
85 "provide a `loss` argument?",
86 ([v.name for v in vars_with_empty_grads]),
87 )
88 return filtered
91def make_gradient_clipnorm_fn(clipnorm):
92 """Creates a gradient transformation function for clipping by norm."""
93 if clipnorm is None:
94 return lambda grads_and_vars: grads_and_vars
96 def gradient_clipnorm_fn(grads_and_vars):
98 if isinstance(
99 tf.distribute.get_strategy(),
100 (
101 tf.distribute.experimental.CentralStorageStrategy,
102 tf.compat.v1.distribute.experimental.CentralStorageStrategy,
103 ),
104 ):
105 raise ValueError(
106 "`clipnorm` is not supported with `CenteralStorageStrategy`. "
107 f"The strategy used is {tf.distribute.get_strategy()}."
108 )
110 clipped_grads_and_vars = [
111 (tf.clip_by_norm(g, clipnorm), v) for g, v in grads_and_vars
112 ]
113 return clipped_grads_and_vars
115 return gradient_clipnorm_fn
118def make_global_gradient_clipnorm_fn(clipnorm):
119 """Creates a gradient transformation function for clipping by norm."""
120 if clipnorm is None:
121 return lambda grads_and_vars: grads_and_vars
123 def gradient_clipnorm_fn(grads_and_vars):
125 if isinstance(
126 tf.distribute.get_strategy(),
127 (
128 tf.distribute.experimental.CentralStorageStrategy,
129 tf.compat.v1.distribute.experimental.CentralStorageStrategy,
130 ),
131 ):
132 raise ValueError(
133 "`global_clipnorm` is not supported with "
134 "`CenteralStorageStrategy`. "
135 f"The strategy used is {tf.distribute.get_strategy()}."
136 )
138 grads, variables = zip(*grads_and_vars)
139 clipped_grads, _ = tf.clip_by_global_norm(grads, clipnorm)
140 clipped_grads_and_vars = list(zip(clipped_grads, variables))
141 return clipped_grads_and_vars
143 return gradient_clipnorm_fn
146def make_gradient_clipvalue_fn(clipvalue):
147 """Creates a gradient transformation function for clipping by value."""
148 if clipvalue is None:
149 return lambda grads_and_vars: grads_and_vars
151 def gradient_clipvalue_fn(grads_and_vars):
153 if isinstance(
154 tf.distribute.get_strategy(),
155 (
156 tf.distribute.experimental.CentralStorageStrategy,
157 tf.compat.v1.distribute.experimental.CentralStorageStrategy,
158 ),
159 ):
160 raise ValueError(
161 "`clipvalue` is not supported with `CenteralStorageStrategy`. "
162 f"The strategy used is {tf.distribute.get_strategy()}."
163 )
165 clipped_grads_and_vars = [
166 (tf.clip_by_value(g, -clipvalue, clipvalue), v)
167 for g, v in grads_and_vars
168 ]
169 return clipped_grads_and_vars
171 return gradient_clipvalue_fn
174def _all_reduce_sum_fn(distribution, grads_and_vars):
175 return distribution.extended.batch_reduce_to(
176 tf.distribute.ReduceOp.SUM, grads_and_vars
177 )