Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/keras/mixed_precision/policy.py: 32%
142 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
1# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Contains the Policy class for mixed precision training."""
17import contextlib
19from tensorflow.python.framework import dtypes
20from tensorflow.python.keras import backend
21from tensorflow.python.keras.engine import base_layer_utils
22from tensorflow.python.keras.mixed_precision import device_compatibility_check
23from tensorflow.python.keras.mixed_precision import loss_scale as keras_loss_scale_module
24from tensorflow.python.keras.utils import generic_utils
25from tensorflow.python.platform import tf_logging
26from tensorflow.python.training.experimental import mixed_precision_global_state
27from tensorflow.python.util.tf_export import keras_export
30# pylint: disable=g-classes-have-attributes
31@keras_export('keras.mixed_precision.Policy', v1=[])
32class Policy(object):
33 """A dtype policy for a Keras layer.
35 A dtype policy determines a layer's computation and variable dtypes. Each
36 layer has a policy. Policies can be passed to the `dtype` argument of layer
37 constructors, or a global policy can be set with
38 `tf.keras.mixed_precision.set_global_policy`.
40 Args:
41 name: The policy name, which determines the compute and variable dtypes. Can
42 be any dtype name, such as `'float32'` or `'float64'`, which causes both
43 the compute and variable dtypes will be that dtype. Can also be the string
44 `'mixed_float16'` or `'mixed_bfloat16'`, which causes the compute dtype to
45 be float16 or bfloat16 and the variable dtype to be float32.
47 Typically you only need to interact with dtype policies when using mixed
48 precision, which is the use of float16 or bfloat16 for computations and
49 float32 for variables. This is why the term `mixed_precision` appears in the
50 API name. Mixed precision can be enabled by passing `'mixed_float16'` or
51 `'mixed_bfloat16'` to `tf.keras.mixed_precision.set_global_policy`. See [the
52 mixed precision guide](https://www.tensorflow.org/guide/keras/mixed_precision)
53 for more information on how to use mixed precision.
55 >>> tf.keras.mixed_precision.set_global_policy('mixed_float16')
56 >>> layer1 = tf.keras.layers.Dense(10)
57 >>> layer1.dtype_policy # `layer1` will automatically use mixed precision
58 <Policy "mixed_float16">
59 >>> # Can optionally override layer to use float32 instead of mixed precision.
60 >>> layer2 = tf.keras.layers.Dense(10, dtype='float32')
61 >>> layer2.dtype_policy
62 <Policy "float32">
63 >>> # Set policy back to initial float32 for future examples.
64 >>> tf.keras.mixed_precision.set_global_policy('float32')
66 In the example above, passing `dtype='float32'` to the layer is equivalent to
67 passing `dtype=tf.keras.mixed_precision.Policy('float32')`. In general,
68 passing a dtype policy name to a layer is equivalent to passing the
69 corresponding policy, so it is never necessary to explicitly construct a
70 `Policy` object.
72 Note: `Model.compile` will automatically wrap an optimizer with a
73 `tf.keras.mixed_precision.LossScaleOptimizer` if you use the `'mixed_float16'`
74 policy. If you use a custom training loop instead of calling `Model.compile`,
75 you should explicitly use a `tf.keras.mixed_precision.LossScaleOptimizer` to
76 avoid numeric underflow with float16.
78 ### How a layer uses its policy's compute dtype
80 A layer casts its inputs to its compute dtype. This causes the layer's
81 computations and output to also be in the compute dtype. For example:
83 >>> x = tf.ones((4, 4, 4, 4), dtype='float64')
84 >>> # `layer`'s policy defaults to float32.
85 >>> layer = tf.keras.layers.Conv2D(filters=4, kernel_size=2)
86 >>> layer.compute_dtype # Equivalent to layer.dtype_policy.compute_dtype
87 'float32'
88 >>> # `layer` casts its inputs to its compute dtype and does computations in
89 >>> # that dtype.
90 >>> y = layer(x)
91 >>> y.dtype
92 tf.float32
94 Note that the base `tf.keras.layers.Layer` class inserts the casts. If
95 subclassing your own layer, you do not have to insert any casts.
97 Currently, only tensors in the first argument to the layer's `call` method are
98 casted (although this will likely be changed in a future minor release). For
99 example:
101 >>> class MyLayer(tf.keras.layers.Layer):
102 ... # Bug! `b` will not be casted.
103 ... def call(self, a, b):
104 ... return a + 1., b + 1.
105 >>> a = tf.constant(1., dtype="float32")
106 >>> b = tf.constant(1., dtype="float32")
107 >>> layer = MyLayer(dtype="float64")
108 >>> x, y = layer(a, b)
109 >>> x.dtype
110 tf.float64
111 >>> y.dtype
112 tf.float32
114 If writing your own layer with multiple inputs, you should either explicitly
115 cast other tensors to `self.compute_dtype` in `call` or accept all tensors in
116 the first argument as a list.
118 The casting only occurs in TensorFlow 2. If
119 `tf.compat.v1.disable_v2_behavior()` has been called, you can enable the
120 casting behavior with `tf.compat.v1.keras.layers.enable_v2_dtype_behavior()`.
122 ### How a layer uses its policy's variable dtype
124 The default dtype of variables created by `tf.keras.layers.Layer.add_weight`
125 is the layer's policy's variable dtype.
127 If a layer's compute and variable dtypes differ, `add_weight` will wrap
128 floating-point variables with a special wrapper called an `AutoCastVariable`.
129 `AutoCastVariable` is identical to the original variable except it casts
130 itself to the layer's compute dtype when used within `Layer.call`. This means
131 if you are writing a layer, you do not have to explicitly cast the variables
132 to the layer's compute dtype. For example:
134 >>> class SimpleDense(tf.keras.layers.Layer):
135 ...
136 ... def build(self, input_shape):
137 ... # With mixed precision, self.kernel is a float32 AutoCastVariable
138 ... self.kernel = self.add_weight('kernel', (input_shape[-1], 10))
139 ...
140 ... def call(self, inputs):
141 ... # With mixed precision, self.kernel will be casted to float16
142 ... return tf.linalg.matmul(inputs, self.kernel)
143 ...
144 >>> layer = SimpleDense(dtype='mixed_float16')
145 >>> y = layer(tf.ones((10, 10)))
146 >>> y.dtype
147 tf.float16
148 >>> layer.kernel.dtype
149 tf.float32
151 A layer author can prevent a variable from being wrapped with an
152 `AutoCastVariable` by passing `experimental_autocast=False` to `add_weight`,
153 which is useful if the float32 value of the variable must be accessed within
154 the layer.
156 ### How to write a layer that supports mixed precision and float64.
158 For the most part, layers will automatically support mixed precision and
159 float64 without any additional work, due to the fact the base layer
160 automatically casts inputs, creates variables of the correct type, and in the
161 case of mixed precision, wraps variables with `AutoCastVariables`.
163 The primary case where you need extra work to support mixed precision or
164 float64 is when you create a new tensor, such as with `tf.ones` or
165 `tf.random.normal`, In such cases, you must create the tensor of the correct
166 dtype. For example, if you call `tf.random.normal`, you must pass the compute
167 dtype, which is the dtype the inputs have been casted to:
169 >>> class AddRandom(tf.keras.layers.Layer):
170 ...
171 ... def call(self, inputs):
172 ... # We must pass `dtype=inputs.dtype`, otherwise a TypeError may
173 ... # occur when adding `inputs` to `rand`.
174 ... rand = tf.random.normal(shape=inputs.shape, dtype=inputs.dtype)
175 ... return inputs + rand
176 >>> layer = AddRandom(dtype='mixed_float16')
177 >>> y = layer(x)
178 >>> y.dtype
179 tf.float16
181 If you did not pass `dtype=inputs.dtype` to `tf.random.normal`, a
182 `TypeError` would have occurred. This is because the `tf.random.normal`'s
183 dtype defaults to `"float32"`, but the input dtype is float16. You cannot add
184 a float32 tensor with a float16 tensor.
185 """
187 def __init__(self, name):
188 if isinstance(name, dtypes.DType):
189 raise TypeError("'name' must be a string, not a DType. "
190 "Instead, pass DType.name. Got: %s" % (name.name,))
191 elif not isinstance(name, str):
192 raise TypeError("'name' must be a string, but got: %s" % (name,))
193 self._name = name
194 self._compute_dtype, self._variable_dtype = self._parse_name(name)
195 if name in ('mixed_float16', 'mixed_bloat16'):
196 device_compatibility_check.log_device_compatibility_check(name)
198 def _parse_name(self, name):
199 """Parses a Policy name into a compute and variable dtype.
201 Args:
202 name: The name of the policy:
204 Returns:
205 The (compute_dtype, variable_dtype) pair.
206 """
207 if name.endswith('_float32_vars'):
208 error_msg = ('Policies ending in \'_float32_vars\' have been removed '
209 'from TensorFlow.')
210 if name in ('infer_float32_vars', 'infer_with_float32_vars'):
211 error_msg += (' Please use the \'mixed_float16\' or \'mixed_bfloat16\' '
212 'policy instead.')
213 elif name == 'float16_with_float32_vars':
214 error_msg += (' Please use the \'mixed_float16\' policy instead.')
215 elif name == 'bfloat16_with_float32_vars':
216 error_msg += (' Please use the \'mixed_bfloat16\' policy instead.')
217 error_msg += ' Got policy name: \'%s\'' % name
218 raise ValueError(error_msg)
220 if name == 'mixed_float16':
221 return 'float16', 'float32'
222 elif name == 'mixed_bfloat16':
223 return 'bfloat16', 'float32'
224 elif name == '_infer':
225 # The "_infer" policy exists only for compatibility with TF 1, where
226 # "_infer" is the default. The behavior matches the behavior of TF 1's
227 # behavior before policies were introduced. With "_infer", the computation
228 # and variable dtype are inferred from the first input the first time the
229 # layer is called. Once the layer is called for the first time, the
230 # layer's policy will change to the dtype of the first input, and it will
231 # no longer have the "_infer" policy.
232 #
233 # The infer policy should be considered an implementation detail and may
234 # be removed in the future.
235 return None, None
237 try:
238 dtype = dtypes.as_dtype(name).name
239 except TypeError:
240 error = ("Cannot convert value %s to a mixed precision Policy. "
241 "Valid policies include 'mixed_float16', 'mixed_bfloat16', "
242 "and the name of any dtype such as 'float32'." % (name,))
243 raise ValueError(error)
244 return dtype, dtype
246 @property
247 def variable_dtype(self):
248 """The variable dtype of this policy.
250 This is the dtype layers will create their variables in, unless a layer
251 explicitly chooses a different dtype. If this is different than
252 `Policy.compute_dtype`, Layers will cast variables to the compute dtype to
253 avoid type errors.
255 Variable regularizers are run in the variable dtype, not the compute dtype.
257 Returns:
258 The variable dtype of this policy, as a string.
259 """
260 return self._variable_dtype
262 @property
263 def compute_dtype(self):
264 """The compute dtype of this policy.
266 This is the dtype layers will do their computations in. Typically layers
267 output tensors with the compute dtype as well.
269 Note that even if the compute dtype is float16 or bfloat16, hardware devices
270 may not do individual adds, multiplies, and other fundamental operations in
271 float16 or bfloat16, but instead may do some of them in float32 for numeric
272 stability. The compute dtype is the dtype of the inputs and outputs of the
273 TensorFlow ops that the layer executes. Internally, many TensorFlow ops will
274 do certain internal calculations in float32 or some other device-internal
275 intermediate format with higher precision than float16/bfloat16, to increase
276 numeric stability.
278 For example, a `tf.keras.layers.Dense` layer, when run on a GPU with a
279 float16 compute dtype, will pass float16 inputs to `tf.linalg.matmul`. But,
280 `tf.linalg.matmul` will do use float32 intermediate math. The performance
281 benefit of float16 is still apparent, due to increased memory bandwidth and
282 the fact modern GPUs have specialized hardware for computing matmuls on
283 float16 inputs while still keeping intermediate computations in float32.
285 Returns:
286 The compute dtype of this policy, as a string.
287 """
288 return self._compute_dtype
290 @property
291 def name(self):
292 """Returns the name of this policy."""
293 return self._name
295 def __repr__(self):
296 return '<Policy "%s">' % self._name
298 def get_config(self):
299 return {'name': self.name}
301 @classmethod
302 def from_config(cls, config, custom_objects=None):
303 del custom_objects
304 if 'loss_scale' in config:
305 config = config.copy()
306 # Policy.get_config in TensorFlow 2.3 and below had a loss_scale. We
307 # silently drop it.
308 del config['loss_scale']
309 return cls(**config)
312@keras_export('keras.mixed_precision.experimental.Policy', v1=[])
313class PolicyV1(Policy):
314 """A deprecated dtype policy for a Keras layer.
316 Warning: This class is now deprecated and will be removed soon. Please use the
317 non-experimental class `tf.keras.mixed_precision.Policy` instead.
319 The difference between this class and the non-experimental class is that this
320 class has a `loss_scale` field and the non-experimental class does not. The
321 loss scale is only used by `tf.keras.Model.compile`, which automatically wraps
322 the optimizer with a `LossScaleOptimizer` if the optimizer is not already a
323 `LossScaleOptimizer`. For the non-experimental Policy class, `Model.compile`
324 instead wraps the optimizer with a `LossScaleOptimizer` if `Policy.name` is
325 "mixed_float16".
327 When deserializing objects with an experimental policy using functions like
328 `tf.keras.utils.deserialize_keras_object`, the policy will be deserialized as
329 the non-experimental `tf.keras.mixed_precision.Policy`, and the loss scale
330 will silently be dropped. This is so that SavedModels that are generated
331 with an experimental policy can be restored after the experimental policy is
332 removed.
333 """
335 def __init__(self, name, loss_scale='auto'):
336 """Constructs the policy.
338 The `name` argument determines the compute and variable dtype, the default
339 loss scale, and has no additional effect on the Policy. The compute and
340 variable dtypes can only be specified through `name`, and cannot be
341 specified directly.
343 Args:
344 name: A string. Can be one of the following values:
345 * Any dtype name, such as 'float32' or 'float64'. Both the variable and
346 compute dtypes will be that dtype.
347 * 'mixed_float16' or 'mixed_bfloat16': The compute dtype is float16 or
348 bfloat16, while the variable dtype is float32. With 'mixed_float16',
349 a dynamic loss scale is used. These policies are used for mixed
350 precision training.
351 loss_scale: A `tf.compat.v1.mixed_precision.LossScale`, an int (which
352 uses a `FixedLossScale`), the string "dynamic" (which uses a
353 `DynamicLossScale`), or None (which uses no loss scale). Defaults to
354 `"auto"`. In the `"auto"` case: 1) if `name` is `"mixed_float16"`, then
355 use `loss_scale="dynamic"`. 2) otherwise, do not use a loss scale. Only
356 `tf.keras.Model`s, not layers, use the loss scale, and it is only used
357 during `Model.fit`, `Model.train_on_batch`, and other similar methods.
358 """
359 super(PolicyV1, self).__init__(name)
360 if loss_scale == 'auto':
361 loss_scale = 'dynamic' if name == 'mixed_float16' else None
362 self._using_default_loss_scale = True
363 else:
364 self._using_default_loss_scale = False
365 if loss_scale and self._compute_dtype not in (None, 'float16'):
366 tf_logging.warning(
367 'Creating a Policy with a loss scale is only useful for '
368 'float16 policies. You passed loss_scale=%r for policy '
369 '%s. Consider not passing any loss_scale instead.' %
370 (loss_scale, name))
371 self._loss_scale = keras_loss_scale_module.get(loss_scale)
373 @property
374 def loss_scale(self):
375 """Returns the loss scale of this Policy.
377 Returns:
378 A `tf.compat.v1.mixed_precision.experimental.LossScale`, or None.
379 """
380 return self._loss_scale
382 def __repr__(self):
383 return '<PolicyV1 "%s", loss_scale=%s>' % (self._name, self.loss_scale)
385 def get_config(self):
386 config = {
387 'name': self.name
388 }
389 if not self._using_default_loss_scale:
390 # We only include the loss scale if the default loss scale is not used.
391 # This allows us to change the loss scale config format without breaking
392 # users who use the default loss scale.
393 config['loss_scale'] = keras_loss_scale_module.serialize(self.loss_scale)
394 return config
396 @classmethod
397 def from_config(cls, config, custom_objects=None):
398 if 'loss_scale' in config and isinstance(config['loss_scale'], dict):
399 config = config.copy()
400 config['loss_scale'] = keras_loss_scale_module.deserialize(
401 config['loss_scale'], custom_objects=custom_objects)
402 return cls(**config)
405# The current global policy in effect. If None, it means the current value of
406# floatx should be used as the policy if the V2 dtype behavior is enabled,
407# or "_infer" otherwise.
408# TODO(reedwm): Make this thread local?
409_global_policy = None
412@keras_export('keras.mixed_precision.global_policy',
413 'keras.mixed_precision.experimental.global_policy', v1=[])
414def global_policy():
415 """Returns the global dtype policy.
417 The global policy is the default `tf.keras.mixed_precision.Policy` used for
418 layers, if no policy is passed to the layer constructor. If no policy has been
419 set with `keras.mixed_precision.set_global_policy`, this will return a policy
420 constructed from `tf.keras.backend.floatx()` (floatx defaults to float32).
422 >>> tf.keras.mixed_precision.global_policy()
423 <Policy "float32">
424 >>> tf.keras.layers.Dense(10).dtype_policy # Defaults to the global policy
425 <Policy "float32">
427 If TensorFlow 2 behavior has been disabled with
428 `tf.compat.v1.disable_v2_behavior()`, this will instead return a special
429 "_infer" policy which infers the dtype from the dtype of the first input the
430 first time the layer is called. This behavior matches the behavior that
431 existed in TensorFlow 1.
433 See `tf.keras.mixed_precision.Policy` for more information on policies.
435 Returns:
436 The global Policy.
437 """
438 if _global_policy is None:
439 if base_layer_utils.v2_dtype_behavior_enabled():
440 return Policy(backend.floatx())
441 else:
442 return Policy('_infer')
443 return _global_policy
446def _check_if_mixed_precision_graph_rewrite_is_enabled(policy):
447 if mixed_precision_global_state.is_mixed_precision_graph_rewrite_enabled():
448 raise ValueError(
449 'The global dtype policy cannot be set to "{policy.name}", because the '
450 'mixed precision graph rewrite has already been enabled.\n'
451 'At most, one of the following can be called:\n\n'
452 ' 1. tf.compat.v1.train.enable_mixed_precision_graph_rewrite() '
453 '(You called this first)\n'
454 ' 2. tf.keras.mixed_precision.experimental.set_global_policy() with a '
455 'mixed precision policy (You called this second)\n\n'
456 'You called both functions, which is an error, because both functions '
457 'enable you to use mixed precision. If in doubt which function to use, '
458 'use the second, as it supports Eager execution and is more '
459 'customizable.'.format(policy=policy))
462@keras_export('keras.mixed_precision.set_global_policy',
463 'keras.mixed_precision.experimental.set_global_policy', v1=[])
464def set_global_policy(policy):
465 """Sets the global dtype policy.
467 The global policy is the default `tf.keras.mixed_precision.Policy` used for
468 layers, if no policy is passed to the layer constructor.
470 >>> tf.keras.mixed_precision.set_global_policy('mixed_float16')
471 >>> tf.keras.mixed_precision.global_policy()
472 <Policy "mixed_float16">
473 >>> tf.keras.layers.Dense(10).dtype_policy
474 <Policy "mixed_float16">
475 >>> # Global policy is not used if a policy is directly passed to constructor
476 >>> tf.keras.layers.Dense(10, dtype='float64').dtype_policy
477 <Policy "float64">
478 >>> tf.keras.mixed_precision.set_global_policy('float32')
480 If no global policy is set, layers will instead default to a Policy
481 constructed from `tf.keras.backend.floatx()`.
483 To use mixed precision, the global policy should be set to `'mixed_float16'`
484 or `'mixed_bfloat16'`, so that every layer uses a 16-bit compute dtype and
485 float32 variable dtype by default.
487 Only floating point policies can be set as the global policy, such as
488 `'float32'` and `'mixed_float16'`. Non-floating point policies such as
489 `'int32'` and `'complex64'` cannot be set as the global policy because most
490 layers do not support such policies.
492 See `tf.keras.mixed_precision.Policy` for more information.
494 Args:
495 policy: A Policy, or a string that will be converted to a Policy. Can also
496 be None, in which case the global policy will be constructed from
497 `tf.keras.backend.floatx()`
498 """
499 global _global_policy
500 if not base_layer_utils.v2_dtype_behavior_enabled():
501 raise ValueError('The global policy can only be set in TensorFlow 2 or if '
502 'V2 dtype behavior has been set. To enable V2 dtype '
503 'behavior, call '
504 '"tf.compat.v1.keras.layers.enable_v2_dtype_behavior()"')
505 if policy is not None and not isinstance(policy, Policy):
506 policy = Policy(policy)
507 is_mixed_policy = (policy is not None and
508 policy.compute_dtype != policy.variable_dtype)
509 if is_mixed_policy:
510 _check_if_mixed_precision_graph_rewrite_is_enabled(policy)
511 if (policy is not None and policy.compute_dtype is not None and
512 not dtypes.as_dtype(policy.compute_dtype).is_floating):
513 raise ValueError('set_global_policy can only be used to set the global '
514 'policy to floating-point policies, such as "float32" and '
515 '"mixed_float16", but got policy: %s'
516 % (policy.name,))
517 _global_policy = policy
518 mixed_precision_global_state.set_using_mixed_precision_policy(is_mixed_policy)
521# TODO(reedwm): Make this thread local
522@contextlib.contextmanager
523def policy_scope(policy):
524 """A context manager that sets the global Policy under it.
526 Args:
527 policy: A Policy, or a string that will be converted to a Policy..
529 Yields:
530 Nothing.
531 """
532 old_policy = _global_policy
533 try:
534 set_global_policy(policy)
535 yield
536 finally:
537 set_global_policy(old_policy)
540def _is_convertible_to_dtype(dtype):
541 try:
542 dtypes.as_dtype(dtype)
543 return True
544 except TypeError:
545 return False
548def _policy_equivalent_to_dtype(policy):
549 """Returns True if the Policy is equivalent to a single dtype.
551 A policy is equivalent to a single dtype if the policy's compute and variable
552 dtypes are the same and the policy's type is Policy and not a subclass of
553 Policy (such as PolicyV1).
555 The "_infer" policy is considered equivalent to a single dtype.
557 Args:
558 policy: A Policy.
560 Returns:
561 True, if the policy is equivalent to a single dtype.
562 """
563 # We use type() instead of isinstance because a subclass of Policy is never
564 # equivalent to a dtype.
565 return (type(policy) == Policy and # pylint: disable=unidiomatic-typecheck
566 list(policy.get_config().keys()) == ['name'] and
567 (policy.name == '_infer' or _is_convertible_to_dtype(policy.name)))
570def serialize(policy):
571 if _policy_equivalent_to_dtype(policy):
572 # We return either None or the policy name for compatibility with older
573 # versions of Keras. If the policy name is returned, it is a dtype string
574 # such as 'float32'.
575 return None if policy.name == '_infer' else policy.name
576 return generic_utils.serialize_keras_object(policy)
579def deserialize(config, custom_objects=None):
580 if isinstance(config, str) and _is_convertible_to_dtype(config):
581 return Policy(config)
582 if config is None:
583 return Policy('_infer')
584 module_objects = {'Policy': Policy, 'PolicyV1': Policy}
585 return generic_utils.deserialize_keras_object(
586 config,
587 module_objects=module_objects,
588 custom_objects=custom_objects,
589 printable_module_name='dtype policy')