Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/keras/src/optimizers/utils.py: 13%

69 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-03 07:57 +0000

1# Copyright 2020 The TensorFlow Authors. All Rights Reserved. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14# ============================================================================== 

15"""Optimizer utilities.""" 

16 

17import tensorflow.compat.v2 as tf 

18 

19# isort: off 

20from tensorflow.python.platform import tf_logging as logging 

21 

22 

23def all_reduce_sum_gradients(grads_and_vars): 

24 """Returns all-reduced gradients aggregated via summation. 

25 

26 Args: 

27 grads_and_vars: List of (gradient, variable) pairs. 

28 

29 Returns: 

30 List of (gradient, variable) pairs where gradients have been all-reduced. 

31 """ 

32 grads_and_vars = list(grads_and_vars) 

33 filtered_grads_and_vars = filter_empty_gradients(grads_and_vars) 

34 if filtered_grads_and_vars: 

35 if tf.__internal__.distribute.strategy_supports_no_merge_call(): 

36 grads = [pair[0] for pair in filtered_grads_and_vars] 

37 reduced = tf.distribute.get_replica_context().all_reduce( 

38 tf.distribute.ReduceOp.SUM, grads 

39 ) 

40 else: 

41 # TODO(b/183257003): Remove this branch 

42 reduced = tf.distribute.get_replica_context().merge_call( 

43 _all_reduce_sum_fn, args=(filtered_grads_and_vars,) 

44 ) 

45 else: 

46 reduced = [] 

47 # Copy 'reduced' but add None gradients back in 

48 reduced_with_nones = [] 

49 reduced_pos = 0 

50 for g, v in grads_and_vars: 

51 if g is None: 

52 reduced_with_nones.append((None, v)) 

53 else: 

54 reduced_with_nones.append((reduced[reduced_pos], v)) 

55 reduced_pos += 1 

56 assert reduced_pos == len(reduced), "Failed to add all gradients" 

57 return reduced_with_nones 

58 

59 

60def filter_empty_gradients(grads_and_vars): 

61 """Filter out `(grad, var)` pairs that have a gradient equal to `None`.""" 

62 grads_and_vars = tuple(grads_and_vars) 

63 if not grads_and_vars: 

64 return grads_and_vars 

65 

66 filtered = [] 

67 vars_with_empty_grads = [] 

68 for grad, var in grads_and_vars: 

69 if grad is None: 

70 vars_with_empty_grads.append(var) 

71 else: 

72 filtered.append((grad, var)) 

73 filtered = tuple(filtered) 

74 

75 if not filtered: 

76 variable = ([v.name for _, v in grads_and_vars],) 

77 raise ValueError( 

78 f"No gradients provided for any variable: {variable}. " 

79 f"Provided `grads_and_vars` is {grads_and_vars}." 

80 ) 

81 if vars_with_empty_grads: 

82 logging.warning( 

83 "Gradients do not exist for variables %s when minimizing the " 

84 "loss. If you're using `model.compile()`, did you forget to " 

85 "provide a `loss` argument?", 

86 ([v.name for v in vars_with_empty_grads]), 

87 ) 

88 return filtered 

89 

90 

91def make_gradient_clipnorm_fn(clipnorm): 

92 """Creates a gradient transformation function for clipping by norm.""" 

93 if clipnorm is None: 

94 return lambda grads_and_vars: grads_and_vars 

95 

96 def gradient_clipnorm_fn(grads_and_vars): 

97 

98 if isinstance( 

99 tf.distribute.get_strategy(), 

100 ( 

101 tf.distribute.experimental.CentralStorageStrategy, 

102 tf.compat.v1.distribute.experimental.CentralStorageStrategy, 

103 ), 

104 ): 

105 raise ValueError( 

106 "`clipnorm` is not supported with `CenteralStorageStrategy`. " 

107 f"The strategy used is {tf.distribute.get_strategy()}." 

108 ) 

109 

110 clipped_grads_and_vars = [ 

111 (tf.clip_by_norm(g, clipnorm), v) for g, v in grads_and_vars 

112 ] 

113 return clipped_grads_and_vars 

114 

115 return gradient_clipnorm_fn 

116 

117 

118def make_global_gradient_clipnorm_fn(clipnorm): 

119 """Creates a gradient transformation function for clipping by norm.""" 

120 if clipnorm is None: 

121 return lambda grads_and_vars: grads_and_vars 

122 

123 def gradient_clipnorm_fn(grads_and_vars): 

124 

125 if isinstance( 

126 tf.distribute.get_strategy(), 

127 ( 

128 tf.distribute.experimental.CentralStorageStrategy, 

129 tf.compat.v1.distribute.experimental.CentralStorageStrategy, 

130 ), 

131 ): 

132 raise ValueError( 

133 "`global_clipnorm` is not supported with " 

134 "`CenteralStorageStrategy`. " 

135 f"The strategy used is {tf.distribute.get_strategy()}." 

136 ) 

137 

138 grads, variables = zip(*grads_and_vars) 

139 clipped_grads, _ = tf.clip_by_global_norm(grads, clipnorm) 

140 clipped_grads_and_vars = list(zip(clipped_grads, variables)) 

141 return clipped_grads_and_vars 

142 

143 return gradient_clipnorm_fn 

144 

145 

146def make_gradient_clipvalue_fn(clipvalue): 

147 """Creates a gradient transformation function for clipping by value.""" 

148 if clipvalue is None: 

149 return lambda grads_and_vars: grads_and_vars 

150 

151 def gradient_clipvalue_fn(grads_and_vars): 

152 

153 if isinstance( 

154 tf.distribute.get_strategy(), 

155 ( 

156 tf.distribute.experimental.CentralStorageStrategy, 

157 tf.compat.v1.distribute.experimental.CentralStorageStrategy, 

158 ), 

159 ): 

160 raise ValueError( 

161 "`clipvalue` is not supported with `CenteralStorageStrategy`. " 

162 f"The strategy used is {tf.distribute.get_strategy()}." 

163 ) 

164 

165 clipped_grads_and_vars = [ 

166 (tf.clip_by_value(g, -clipvalue, clipvalue), v) 

167 for g, v in grads_and_vars 

168 ] 

169 return clipped_grads_and_vars 

170 

171 return gradient_clipvalue_fn 

172 

173 

174def _all_reduce_sum_fn(distribution, grads_and_vars): 

175 return distribution.extended.batch_reduce_to( 

176 tf.distribute.ReduceOp.SUM, grads_and_vars 

177 ) 

178