Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/keras/src/optimizers/utils.py: 13%

3# Licensed under the Apache License, Version 2.0 (the "License");

4# you may not use this file except in compliance with the License.

5# You may obtain a copy of the License at

7# http://www.apache.org/licenses/LICENSE-2.0

9# Unless required by applicable law or agreed to in writing, software

10# distributed under the License is distributed on an "AS IS" BASIS,

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12# See the License for the specific language governing permissions and

13# limitations under the License.

14# ==============================================================================

15"""Optimizer utilities."""

17import tensorflow.compat.v2 as tf

19# isort: off

20from tensorflow.python.platform import tf_logging as logging

23def all_reduce_sum_gradients(grads_and_vars):

24 """Returns all-reduced gradients aggregated via summation.

26 Args:

27 grads_and_vars: List of (gradient, variable) pairs.

29 Returns:

30 List of (gradient, variable) pairs where gradients have been all-reduced.

31 """

32 grads_and_vars = list(grads_and_vars)

33 filtered_grads_and_vars = filter_empty_gradients(grads_and_vars)

34 if filtered_grads_and_vars:

35 if tf.__internal__.distribute.strategy_supports_no_merge_call():

36 grads = [pair[0] for pair in filtered_grads_and_vars]

37 reduced = tf.distribute.get_replica_context().all_reduce(

38 tf.distribute.ReduceOp.SUM, grads

39 )

40 else:

41 # TODO(b/183257003): Remove this branch

42 reduced = tf.distribute.get_replica_context().merge_call(

43 _all_reduce_sum_fn, args=(filtered_grads_and_vars,)

44 )

45 else:

46 reduced = []

47 # Copy 'reduced' but add None gradients back in

48 reduced_with_nones = []

49 reduced_pos = 0

50 for g, v in grads_and_vars:

51 if g is None:

52 reduced_with_nones.append((None, v))

53 else:

54 reduced_with_nones.append((reduced[reduced_pos], v))

55 reduced_pos += 1

56 assert reduced_pos == len(reduced), "Failed to add all gradients"

57 return reduced_with_nones

60def filter_empty_gradients(grads_and_vars):

61 """Filter out `(grad, var)` pairs that have a gradient equal to `None`."""

62 grads_and_vars = tuple(grads_and_vars)

63 if not grads_and_vars:

64 return grads_and_vars

66 filtered = []

67 vars_with_empty_grads = []

68 for grad, var in grads_and_vars:

69 if grad is None:

70 vars_with_empty_grads.append(var)

71 else:

72 filtered.append((grad, var))

73 filtered = tuple(filtered)

75 if not filtered:

76 variable = ([v.name for _, v in grads_and_vars],)

77 raise ValueError(

78 f"No gradients provided for any variable: {variable}. "

79 f"Provided `grads_and_vars` is {grads_and_vars}."

80 )

81 if vars_with_empty_grads:

82 logging.warning(

83 "Gradients do not exist for variables %s when minimizing the "

84 "loss. If you're using `model.compile()`, did you forget to "

85 "provide a `loss` argument?",

86 ([v.name for v in vars_with_empty_grads]),

87 )

88 return filtered

91def make_gradient_clipnorm_fn(clipnorm):

92 """Creates a gradient transformation function for clipping by norm."""

93 if clipnorm is None:

94 return lambda grads_and_vars: grads_and_vars

96 def gradient_clipnorm_fn(grads_and_vars):

98 if isinstance(

99 tf.distribute.get_strategy(),

100 (

101 tf.distribute.experimental.CentralStorageStrategy,

102 tf.compat.v1.distribute.experimental.CentralStorageStrategy,

103 ),

104 ):

105 raise ValueError(

106 "`clipnorm` is not supported with `CenteralStorageStrategy`. "

107 f"The strategy used is {tf.distribute.get_strategy()}."

108 )

109

110 clipped_grads_and_vars = [

111 (tf.clip_by_norm(g, clipnorm), v) for g, v in grads_and_vars

112 ]

113 return clipped_grads_and_vars

114

115 return gradient_clipnorm_fn

116

117

118def make_global_gradient_clipnorm_fn(clipnorm):

119 """Creates a gradient transformation function for clipping by norm."""

120 if clipnorm is None:

121 return lambda grads_and_vars: grads_and_vars

122

123 def gradient_clipnorm_fn(grads_and_vars):

124

125 if isinstance(

126 tf.distribute.get_strategy(),

127 (

128 tf.distribute.experimental.CentralStorageStrategy,

129 tf.compat.v1.distribute.experimental.CentralStorageStrategy,

130 ),

131 ):

132 raise ValueError(

133 "`global_clipnorm` is not supported with "

134 "`CenteralStorageStrategy`. "

135 f"The strategy used is {tf.distribute.get_strategy()}."

136 )

137

138 grads, variables = zip(*grads_and_vars)

139 clipped_grads, _ = tf.clip_by_global_norm(grads, clipnorm)

140 clipped_grads_and_vars = list(zip(clipped_grads, variables))

141 return clipped_grads_and_vars

142

143 return gradient_clipnorm_fn

144

145

146def make_gradient_clipvalue_fn(clipvalue):

147 """Creates a gradient transformation function for clipping by value."""

148 if clipvalue is None:

149 return lambda grads_and_vars: grads_and_vars

150

151 def gradient_clipvalue_fn(grads_and_vars):

152

153 if isinstance(

154 tf.distribute.get_strategy(),

155 (

156 tf.distribute.experimental.CentralStorageStrategy,

157 tf.compat.v1.distribute.experimental.CentralStorageStrategy,

158 ),

159 ):

160 raise ValueError(

161 "`clipvalue` is not supported with `CenteralStorageStrategy`. "

162 f"The strategy used is {tf.distribute.get_strategy()}."

163 )

164

165 clipped_grads_and_vars = [

166 (tf.clip_by_value(g, -clipvalue, clipvalue), v)

167 for g, v in grads_and_vars

168 ]

169 return clipped_grads_and_vars

170

171 return gradient_clipvalue_fn

172

173

174def _all_reduce_sum_fn(distribution, grads_and_vars):

175 return distribution.extended.batch_reduce_to(

176 tf.distribute.ReduceOp.SUM, grads_and_vars

177 )

178