Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/framework/config.py: 58%
139 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
1# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Functions for configuring TensorFlow execution."""
17from typing import Union
19from tensorflow.python.eager import context
20from tensorflow.python.framework import errors
21from tensorflow.python.util import _pywrap_determinism
22from tensorflow.python.util import _pywrap_tensor_float_32_execution
23from tensorflow.python.util import deprecation
24from tensorflow.python.util.tf_export import tf_export
27@tf_export('config.experimental.tensor_float_32_execution_enabled')
28def tensor_float_32_execution_enabled():
29 """Returns whether TensorFloat-32 is enabled.
31 By default, TensorFloat-32 is enabled, but this can be changed with
32 `tf.config.experimental.enable_tensor_float_32_execution`.
34 Returns:
35 True if TensorFloat-32 is enabled (the default) and False otherwise
36 """
37 return _pywrap_tensor_float_32_execution.is_enabled()
40@tf_export('config.experimental.enable_tensor_float_32_execution')
41def enable_tensor_float_32_execution(enabled):
42 """Enable or disable the use of TensorFloat-32 on supported hardware.
44 [TensorFloat-32](https://blogs.nvidia.com/blog/2020/05/14/tensorfloat-32-precision-format),
45 or TF32 for short, is a math mode for NVIDIA Ampere GPUs and above.
46 TensorFloat-32 execution causes certain float32 ops, such as matrix
47 multiplications and convolutions, to run much faster on such GPUs but with
48 reduced precision. This reduced precision should not impact convergence of
49 deep learning models in practice.
51 TensorFloat-32 is enabled by default. TensorFloat-32 is only supported on
52 NVIDIA GPUs starting with the Ampere generation, so older NVIDIA GPUs and
53 other hardware will use the full float32 precision regardless of whether
54 TensorFloat-32 is enabled or not. If you want to use the full float32
55 precision on all GPUs, you can disable TensorFloat-32 execution with this
56 function. For example:
58 ```python
59 x = tf.fill((1024, 1024), 1.0001)
60 y = tf.fill((1024, 1024), 1.)
61 # TensorFloat-32 is enabled, so matmul is run with reduced precision
62 print(tf.linalg.matmul(x, y)[0, 0]) # 1024.0
63 tf.config.experimental.enable_tensor_float_32_execution(False)
64 # Matmul is run with full precision
65 print(tf.linalg.matmul(x, y)[0, 0]) # ~1024.1
66 ```
68 To check whether TensorFloat-32 execution is currently enabled, use
69 `tf.config.experimental.tensor_float_32_execution_enabled`.
71 If TensorFloat-32 is enabled, float32 inputs of supported ops, such as
72 `tf.linalg.matmul`, will be rounded from 23 bits of precision to 10 bits of
73 precision in most cases. This allows the ops to execute much faster by
74 utilizing the GPU's tensor cores. TensorFloat-32 has the same dynamic range as
75 float32, meaning it is no more likely to underflow or overflow than float32.
76 Ops still use float32 accumulation when TensorFloat-32 is enabled. Enabling or
77 disabling TensorFloat-32 only affects Ampere GPUs and above.
79 Note TensorFloat-32 is not always used in supported ops, as only inputs of
80 certain shapes are supported. Support for more input shapes and more ops may
81 be added in the future. As a result, precision of float32 ops may decrease in
82 minor versions of TensorFlow.
84 TensorFloat-32 is also used for some complex64 ops. Currently, TensorFloat-32
85 is used in fewer cases for complex64 as it is for float32.
87 Args:
88 enabled: Bool indicating whether to enable TensorFloat-32 execution.
89 """
90 _pywrap_tensor_float_32_execution.enable(enabled)
93@tf_export('config.threading.get_intra_op_parallelism_threads')
94def get_intra_op_parallelism_threads():
95 """Get number of threads used within an individual op for parallelism.
97 Certain operations like matrix multiplication and reductions can utilize
98 parallel threads for speed ups. A value of 0 means the system picks an
99 appropriate number.
101 Returns:
102 Number of parallel threads
103 """
104 return context.context().intra_op_parallelism_threads
107@tf_export('config.threading.set_intra_op_parallelism_threads')
108def set_intra_op_parallelism_threads(num_threads):
109 """Set number of threads used within an individual op for parallelism.
111 Certain operations like matrix multiplication and reductions can utilize
112 parallel threads for speed ups. A value of 0 means the system picks an
113 appropriate number.
115 Args:
116 num_threads: Number of parallel threads
117 """
118 context.context().intra_op_parallelism_threads = num_threads
121@tf_export('config.threading.get_inter_op_parallelism_threads')
122def get_inter_op_parallelism_threads():
123 """Get number of threads used for parallelism between independent operations.
125 Determines the number of threads used by independent non-blocking operations.
126 0 means the system picks an appropriate number.
128 Returns:
129 Number of parallel threads
130 """
131 return context.context().inter_op_parallelism_threads
134@tf_export('config.threading.set_inter_op_parallelism_threads')
135def set_inter_op_parallelism_threads(num_threads):
136 """Set number of threads used for parallelism between independent operations.
138 Determines the number of threads used by independent non-blocking operations.
139 0 means the system picks an appropriate number.
141 Args:
142 num_threads: Number of parallel threads
143 """
144 context.context().inter_op_parallelism_threads = num_threads
147@tf_export('config.optimizer.get_jit')
148def get_optimizer_jit() -> str:
149 """Returns JIT compilation configuration for code inside `tf.function`.
151 Possible return values:
152 -`"autoclustering"` if
153 [autoclustering](https://www.tensorflow.org/xla#auto-clustering) is enabled
154 - `""` when no default compilation is applied.
155 """
156 if context.context().optimizer_jit:
157 return 'autoclustering'
158 return ''
161@tf_export('config.optimizer.set_jit')
162@deprecation.deprecated_arg_values(
163 None,
164 '`True` setting is deprecated, use `autoclustering` instead.',
165 warn_once=True,
166 jit_config=True)
167def set_optimizer_jit(enabled: Union[bool, str]):
168 """Configure JIT compilation.
170 Note: compilation is only applied to code that is compiled into a
171 graph (in TF2 that's only a code inside `tf.function`).
173 Args:
174 enabled: JIT compilation configuration.
175 Possible values:
176 - `"autoclustering"` (`True` is a deprecated alias): perform
177 [autoclustering](https://www.tensorflow.org/xla#auto-clustering)
178 (automatically identify and compile clusters of nodes) on all graphs
179 using
180 [XLA](https://www.tensorflow.org/xla).
181 - `False`: do not automatically compile any graphs.
182 """
183 autoclustering_enabled = enabled in (True, 'autoclustering')
184 context.context().optimizer_jit = autoclustering_enabled
187@tf_export('config.optimizer.get_experimental_options')
188def get_optimizer_experimental_options():
189 """Get experimental optimizer options.
191 Refer to tf.config.optimizer.set_experimental_options for a list of current
192 options.
194 Note that optimizations are only applied in graph mode, (within tf.function).
195 In addition, as these are experimental options, the list is subject to change.
197 Returns:
198 Dictionary of configured experimental optimizer options
199 """
200 return context.context().get_optimizer_experimental_options()
203@tf_export('config.optimizer.set_experimental_options')
204def set_optimizer_experimental_options(options):
205 """Set experimental optimizer options.
207 Note that optimizations are only applied in graph mode, (within tf.function).
208 In addition, as these are experimental options, the list is subject to change.
210 Args:
211 options: Dictionary of experimental optimizer options to configure.
212 Valid keys:
213 - layout_optimizer: Optimize tensor layouts e.g. This will try to use NCHW
214 layout on GPU which is faster.
215 - constant_folding: Fold constants Statically infer the value of tensors
216 when possible, and materialize the result using constants.
217 - shape_optimization: Simplify computations made on shapes.
218 - remapping: Remap subgraphs onto more efficient implementations.
219 - arithmetic_optimization: Simplify arithmetic ops with common
220 sub-expression elimination and arithmetic simplification.
221 - dependency_optimization: Control dependency optimizations. Remove
222 redundant control dependencies, which may enable other optimization.
223 This optimizer is also essential for pruning Identity and NoOp nodes.
224 - loop_optimization: Loop optimizations.
225 - function_optimization: Function optimizations and inlining.
226 - debug_stripper: Strips debug-related nodes from the graph.
227 - disable_model_pruning: Disable removal of unnecessary ops from the graph
228 - scoped_allocator_optimization: Try to allocate some independent Op
229 outputs contiguously in order to merge or eliminate downstream Ops.
230 - pin_to_host_optimization: Force small ops onto the CPU.
231 - implementation_selector: Enable the swap of kernel implementations based
232 on the device placement.
233 - auto_mixed_precision: Change certain float32 ops to float16 on Volta
234 GPUs and above. Without the use of loss scaling, this can cause
235 numerical underflow (see
236 `keras.mixed_precision.experimental.LossScaleOptimizer`).
237 - disable_meta_optimizer: Disable the entire meta optimizer.
238 - min_graph_nodes: The minimum number of nodes in a graph to optimizer.
239 For smaller graphs, optimization is skipped.
240 - auto_parallel: Automatically parallelizes graphs by splitting along
241 the batch dimension
242 """
243 context.context().set_optimizer_experimental_options(options)
246@tf_export('config.get_soft_device_placement')
247def get_soft_device_placement():
248 """Return status of soft device placement flag.
250 If enabled, ops can be placed on different devices than the device explicitly
251 assigned by the user. This potentially has a large performance cost due to an
252 increase in data communication between devices.
254 Some cases where soft_device_placement would modify device assignment are:
255 1. no GPU/TPU implementation for the OP
256 2. no GPU devices are known or registered
257 3. need to co-locate with reftype input(s) which are from CPU
258 4. an OP can not be compiled by XLA. Common for TPU which always requires
259 the XLA compiler.
261 For TPUs, if this option is true, a feature called automatic outside
262 compilation is enabled. Automatic outside compilation will move uncompilable
263 ops within a TPU program to instead run on the host. This can be used when
264 encountering compilation failures due to unsupported ops.
266 Returns:
267 A boolean indicating if soft placement is enabled.
268 """
269 return context.context().soft_device_placement
272@tf_export('config.set_soft_device_placement')
273def set_soft_device_placement(enabled):
274 """Enable or disable soft device placement.
276 If enabled, ops can be placed on different devices than the device explicitly
277 assigned by the user. This potentially has a large performance cost due to an
278 increase in data communication between devices.
280 Some cases where soft_device_placement would modify device assignment are:
281 1. no GPU/TPU implementation for the OP
282 2. no GPU devices are known or registered
283 3. need to co-locate with reftype input(s) which are from CPU
284 4. an OP can not be compiled by XLA. Common for TPU which always requires
285 the XLA compiler.
287 For TPUs, if this option is true, a feature called automatic outside
288 compilation is enabled. Automatic outside compilation will move uncompilable
289 ops within a TPU program to instead run on the host. This can be used when
290 encountering compilation failures due to unsupported ops.
292 Note: by default soft device placement is enabled when running in eager mode
293 (for convenience) and disabled in graph mode (for performance).
295 Args:
296 enabled: A boolean indicating whether to enable soft placement.
297 """
298 context.context().soft_device_placement = enabled
301@tf_export('config.experimental.get_device_policy')
302def get_device_policy():
303 """Gets the current device policy.
305 The device policy controls how operations requiring inputs on a specific
306 device (e.g., on GPU:0) handle inputs on a different device (e.g. GPU:1).
308 This function only gets the device policy for the current thread. Any
309 subsequently started thread will again use the default policy.
311 Returns:
312 Current thread device policy
313 """
314 device_policy = context.context().device_policy
315 if device_policy == context.DEVICE_PLACEMENT_SILENT:
316 return 'silent'
317 elif device_policy == context.DEVICE_PLACEMENT_SILENT_FOR_INT32:
318 return 'silent_for_int32'
319 elif device_policy == context.DEVICE_PLACEMENT_WARN:
320 return 'warn'
321 elif device_policy == context.DEVICE_PLACEMENT_EXPLICIT:
322 return 'explicit'
323 else:
324 # pylint: disable-next=no-value-for-parameter
325 raise errors.InternalError(
326 f'Got an invalid device policy: {device_policy!r}.')
329@tf_export('config.experimental.set_device_policy')
330def set_device_policy(device_policy):
331 """Sets the current thread device policy.
333 The device policy controls how operations requiring inputs on a specific
334 device (e.g., on GPU:0) handle inputs on a different device (e.g. GPU:1).
336 When using the default, an appropriate policy will be picked automatically.
337 The default policy may change over time.
339 This function only sets the device policy for the current thread. Any
340 subsequently started thread will again use the default policy.
342 Args:
343 device_policy: A device policy.
344 Valid values:
345 - None: Switch to a system default.
346 - 'warn': Copies the tensors which are not on the right device and logs a
347 warning.
348 - 'explicit': Raises an error if the placement is not as required.
349 - 'silent': Silently copies the tensors. Note that this may hide
350 performance problems as there is no notification provided when
351 operations are blocked on the tensor being copied between devices.
352 - 'silent_for_int32': silently copies `int32` tensors, raising errors on
353 the other ones.
355 Raises:
356 ValueError: If an invalid `device_policy` is passed.
357 """
358 if device_policy == 'silent':
359 context.context().device_policy = context.DEVICE_PLACEMENT_SILENT
360 elif device_policy == 'silent_for_int32':
361 context.context().device_policy = context.DEVICE_PLACEMENT_SILENT_FOR_INT32
362 elif device_policy == 'warn':
363 context.context().device_policy = context.DEVICE_PLACEMENT_WARN
364 elif device_policy == 'explicit':
365 context.context().device_policy = context.DEVICE_PLACEMENT_EXPLICIT
366 elif device_policy is None:
367 context.context().device_policy = None
368 else:
369 raise ValueError(
370 f'Invalid argument `device_policy`: {device_policy!r}. Please refer to '
371 'https://www.tensorflow.org/api_docs/python/tf/config/experimental/set_device_policy '
372 'for valid `device_policy` arguments.')
375@tf_export('config.experimental.get_synchronous_execution')
376def get_synchronous_execution():
377 """Gets whether operations are executed synchronously or asynchronously.
379 TensorFlow can execute operations synchronously or asynchronously. If
380 asynchronous execution is enabled, operations may return "non-ready" handles.
382 Returns:
383 Current thread execution mode
384 """
385 return context.context().execution_mode == context.SYNC
388@tf_export('config.experimental.set_synchronous_execution')
389def set_synchronous_execution(enable):
390 """Specifies whether operations are executed synchronously or asynchronously.
392 TensorFlow can execute operations synchronously or asynchronously. If
393 asynchronous execution is enabled, operations may return "non-ready" handles.
395 When `enable` is set to None, an appropriate value will be picked
396 automatically. The value picked may change between TensorFlow releases.
398 Args:
399 enable: Whether operations should be dispatched synchronously.
400 Valid values:
401 - None: sets the system default.
402 - True: executes each operation synchronously.
403 - False: executes each operation asynchronously.
404 """
405 if enable is None:
406 context.context().execution_mode = None
407 elif enable:
408 context.context().execution_mode = context.SYNC
409 else:
410 context.context().execution_mode = context.ASYNC
413@tf_export('config.list_physical_devices',
414 'config.experimental.list_physical_devices')
415@deprecation.deprecated_endpoints('config.experimental.list_physical_devices')
416def list_physical_devices(device_type=None):
417 """Return a list of physical devices visible to the host runtime.
419 Physical devices are hardware devices present on the host machine. By default
420 all discovered CPU and GPU devices are considered visible.
422 This API allows querying the physical hardware resources prior to runtime
423 initialization. Thus, giving an opportunity to call any additional
424 configuration APIs. This is in contrast to `tf.config.list_logical_devices`,
425 which triggers runtime initialization in order to list the configured devices.
427 The following example lists the number of visible GPUs on the host.
429 >>> physical_devices = tf.config.list_physical_devices('GPU')
430 >>> print("Num GPUs:", len(physical_devices))
431 Num GPUs: ...
433 However, the number of GPUs available to the runtime may change during runtime
434 initialization due to marking certain devices as not visible or configuring
435 multiple logical devices.
437 Args:
438 device_type: (optional string) Only include devices matching this device
439 type. For example "CPU" or "GPU".
441 Returns:
442 List of discovered `tf.config.PhysicalDevice` objects
443 """
444 return context.context().list_physical_devices(device_type)
447@tf_export('config.list_logical_devices',
448 'config.experimental.list_logical_devices')
449@deprecation.deprecated_endpoints('config.experimental.list_logical_devices')
450def list_logical_devices(device_type=None):
451 """Return a list of logical devices created by runtime.
453 Logical devices may correspond to physical devices or remote devices in the
454 cluster. Operations and tensors may be placed on these devices by using the
455 `name` of the `tf.config.LogicalDevice`.
457 Calling `tf.config.list_logical_devices` triggers the runtime to configure any
458 `tf.config.PhysicalDevice` visible to the runtime, thereby preventing
459 further configuration. To avoid runtime initialization, call
460 `tf.config.list_physical_devices` instead.
462 For example:
464 >>> logical_devices = tf.config.list_logical_devices('GPU')
465 >>> if len(logical_devices) > 0:
466 ... # Allocate on GPU:0
467 ... with tf.device(logical_devices[0].name):
468 ... one = tf.constant(1)
469 ... # Allocate on GPU:1
470 ... with tf.device(logical_devices[1].name):
471 ... two = tf.constant(2)
473 Args:
474 device_type: (optional string) Only include devices matching this device
475 type. For example "CPU" or "GPU".
477 Returns:
478 List of initialized `LogicalDevice`s
479 """
480 return context.context().list_logical_devices(device_type=device_type)
483@tf_export('config.get_visible_devices',
484 'config.experimental.get_visible_devices')
485@deprecation.deprecated_endpoints('config.experimental.get_visible_devices')
486def get_visible_devices(device_type=None):
487 """Get the list of visible physical devices.
489 Returns the list of `PhysicalDevice`s currently marked as visible to the
490 runtime. A visible device will have at least one `LogicalDevice` associated
491 with it once the runtime is initialized.
493 The following example verifies all visible GPUs have been disabled:
495 >>> physical_devices = tf.config.list_physical_devices('GPU')
496 >>> try:
497 ... # Disable all GPUS
498 ... tf.config.set_visible_devices([], 'GPU')
499 ... visible_devices = tf.config.get_visible_devices()
500 ... for device in visible_devices:
501 ... assert device.device_type != 'GPU'
502 ... except:
503 ... # Invalid device or cannot modify virtual devices once initialized.
504 ... pass
506 Args:
507 device_type: (optional string) Only include devices matching this device
508 type. For example "CPU" or "GPU".
510 Returns:
511 List of visible `PhysicalDevice`s
512 """
513 return context.context().get_visible_devices(device_type)
516@tf_export('config.set_visible_devices',
517 'config.experimental.set_visible_devices')
518@deprecation.deprecated_endpoints('config.experimental.set_visible_devices')
519def set_visible_devices(devices, device_type=None):
520 """Set the list of visible devices.
522 Specifies which `PhysicalDevice` objects are visible to the runtime.
523 TensorFlow will only allocate memory and place operations on visible
524 physical devices, as otherwise no `LogicalDevice` will be created on them.
525 By default all discovered devices are marked as visible.
527 The following example demonstrates disabling the first GPU on the machine.
529 >>> physical_devices = tf.config.list_physical_devices('GPU')
530 >>> try:
531 ... # Disable first GPU
532 ... tf.config.set_visible_devices(physical_devices[1:], 'GPU')
533 ... logical_devices = tf.config.list_logical_devices('GPU')
534 ... # Logical device was not created for first GPU
535 ... assert len(logical_devices) == len(physical_devices) - 1
536 ... except:
537 ... # Invalid device or cannot modify virtual devices once initialized.
538 ... pass
540 Args:
541 devices: List of `PhysicalDevice`s to make visible
542 device_type: (optional) Only configure devices matching this device type.
543 For example "CPU" or "GPU". Other devices will be left unaltered.
545 Raises:
546 ValueError: If argument validation fails.
547 RuntimeError: Runtime is already initialized.
548 """
549 context.context().set_visible_devices(devices, device_type)
552# TODO(b/188089869): Redesign memory stats related APIs before move them out of
553# experimental.
554@tf_export('config.experimental.get_memory_info')
555def get_memory_info(device):
556 """Get memory info for the chosen device, as a dict.
558 This function returns a dict containing information about the device's memory
559 usage. For example:
561 >>> if tf.config.list_physical_devices('GPU'):
562 ... # Returns a dict in the form {'current': <current mem usage>,
563 ... # 'peak': <peak mem usage>}
564 ... tf.config.experimental.get_memory_info('GPU:0')
566 Currently returns the following keys:
567 - `'current'`: The current memory used by the device, in bytes.
568 - `'peak'`: The peak memory used by the device across the run of the
569 program, in bytes. Can be reset with
570 `tf.config.experimental.reset_memory_stats`.
572 More keys may be added in the future, including device-specific keys.
574 Currently only supports GPU and TPU. If called on a CPU device, an exception
575 will be raised.
577 For GPUs, TensorFlow will allocate all the memory by default, unless changed
578 with `tf.config.experimental.set_memory_growth`. The dict specifies only the
579 current and peak memory that TensorFlow is actually using, not the memory that
580 TensorFlow has allocated on the GPU.
582 Args:
583 device: Device string to get the memory information for, e.g. `"GPU:0"`,
584 `"TPU:0"`. See https://www.tensorflow.org/api_docs/python/tf/device for
585 specifying device strings.
587 Returns:
588 A dict with keys `'current'` and `'peak'`, specifying the current and peak
589 memory usage respectively.
591 Raises:
592 ValueError: No device found with the device name, like '"nonexistent"'.
593 ValueError: Invalid device name, like '"GPU"', '"CPU:GPU"', '"CPU:"'.
594 ValueError: Multiple devices matched with the device name.
595 ValueError: Memory statistics not tracked, like '"CPU:0"'.
596 """
597 return context.context().get_memory_info(device)
600# TODO(b/188089869): Redesign memory stats related APIs before move them out of
601# experimental.
602# TODO(b/189498350): Unify the behavior on CPU, GPU and TPU.
603@tf_export('config.experimental.reset_memory_stats')
604def reset_memory_stats(device):
605 """Resets the tracked memory stats for the chosen device.
607 This function sets the tracked peak memory for a device to the device's
608 current memory usage. This allows you to measure the peak memory usage for a
609 specific part of your program. For example:
611 >>> if tf.config.list_physical_devices('GPU'):
612 ... # Sets the peak memory to the current memory.
613 ... tf.config.experimental.reset_memory_stats('GPU:0')
614 ... # Creates the first peak memory usage.
615 ... x1 = tf.ones(1000 * 1000, dtype=tf.float64)
616 ... del x1 # Frees the memory referenced by `x1`.
617 ... peak1 = tf.config.experimental.get_memory_info('GPU:0')['peak']
618 ... # Sets the peak memory to the current memory again.
619 ... tf.config.experimental.reset_memory_stats('GPU:0')
620 ... # Creates the second peak memory usage.
621 ... x2 = tf.ones(1000 * 1000, dtype=tf.float32)
622 ... del x2
623 ... peak2 = tf.config.experimental.get_memory_info('GPU:0')['peak']
624 ... assert peak2 < peak1 # tf.float32 consumes less memory than tf.float64.
626 Currently only supports GPU and TPU. If called on a CPU device, an exception
627 will be raised.
629 Args:
630 device: Device string to reset the memory stats, e.g. `"GPU:0"`, `"TPU:0"`.
631 See https://www.tensorflow.org/api_docs/python/tf/device for specifying
632 device strings.
634 Raises:
635 ValueError: No device found with the device name, like '"nonexistent"'.
636 ValueError: Invalid device name, like '"GPU"', '"CPU:GPU"', '"CPU:"'.
637 ValueError: Multiple devices matched with the device name.
638 ValueError: Memory statistics not tracked or clearing memory statistics not
639 supported, like '"CPU:0"'.
640 """
641 context.context().reset_memory_stats(device)
644@deprecation.deprecated(
645 None,
646 "Use tf.config.experimental.get_memory_info(device)['current'] instead.")
647@tf_export('config.experimental.get_memory_usage')
648def get_memory_usage(device):
649 """Get the current memory usage, in bytes, for the chosen device.
651 This function is deprecated in favor of
652 `tf.config.experimental.get_memory_info`. Calling this function is equivalent
653 to calling `tf.config.experimental.get_memory_info()['current']`.
655 See https://www.tensorflow.org/api_docs/python/tf/device for specifying device
656 strings.
658 For example:
660 >>> gpu_devices = tf.config.list_physical_devices('GPU')
661 >>> if gpu_devices:
662 ... tf.config.experimental.get_memory_usage('GPU:0')
664 Does not work for CPU.
666 For GPUs, TensorFlow will allocate all the memory by default, unless changed
667 with `tf.config.experimental.set_memory_growth`. This function only returns
668 the memory that TensorFlow is actually using, not the memory that TensorFlow
669 has allocated on the GPU.
671 Args:
672 device: Device string to get the bytes in use for, e.g. `"GPU:0"`
674 Returns:
675 Total memory usage in bytes.
677 Raises:
678 ValueError: Non-existent or CPU device specified.
679 """
680 return get_memory_info(device)['current']
683@tf_export('config.experimental.get_memory_growth')
684def get_memory_growth(device):
685 """Get if memory growth is enabled for a `PhysicalDevice`.
687 If memory growth is enabled for a `PhysicalDevice`, the runtime initialization
688 will not allocate all memory on the device.
690 For example:
692 >>> physical_devices = tf.config.list_physical_devices('GPU')
693 >>> try:
694 ... tf.config.experimental.set_memory_growth(physical_devices[0], True)
695 ... assert tf.config.experimental.get_memory_growth(physical_devices[0])
696 ... except:
697 ... # Invalid device or cannot modify virtual devices once initialized.
698 ... pass
700 Args:
701 device: `PhysicalDevice` to query
703 Returns:
704 A boolean indicating the memory growth setting for the `PhysicalDevice`.
706 Raises:
707 ValueError: Invalid `PhysicalDevice` specified.
708 """
709 return context.context().get_memory_growth(device)
712@tf_export('config.experimental.set_memory_growth')
713def set_memory_growth(device, enable):
714 """Set if memory growth should be enabled for a `PhysicalDevice`.
716 If memory growth is enabled for a `PhysicalDevice`, the runtime initialization
717 will not allocate all memory on the device. Memory growth cannot be configured
718 on a `PhysicalDevice` with virtual devices configured.
720 For example:
722 >>> physical_devices = tf.config.list_physical_devices('GPU')
723 >>> try:
724 ... tf.config.experimental.set_memory_growth(physical_devices[0], True)
725 ... except:
726 ... # Invalid device or cannot modify virtual devices once initialized.
727 ... pass
729 Args:
730 device: `PhysicalDevice` to configure
731 enable: (Boolean) Whether to enable or disable memory growth
733 Raises:
734 ValueError: Invalid `PhysicalDevice` specified.
735 RuntimeError: Runtime is already initialized.
736 """
737 context.context().set_memory_growth(device, enable)
740@tf_export('config.experimental.get_device_details')
741def get_device_details(device):
742 """Returns details about a physical devices.
744 This API takes in a `tf.config.PhysicalDevice` returned by
745 `tf.config.list_physical_devices`. It returns a dict with string keys
746 containing various details about the device. Each key is only supported by a
747 subset of devices, so you should not assume the returned dict will have any
748 particular key.
750 >>> gpu_devices = tf.config.list_physical_devices('GPU')
751 >>> if gpu_devices:
752 ... details = tf.config.experimental.get_device_details(gpu_devices[0])
753 ... details.get('device_name', 'Unknown GPU')
755 Currently, details are only returned for GPUs. This function returns an
756 empty dict if passed a non-GPU device.
758 The returned dict may have the following keys:
759 * `'device_name'`: A human-readable name of the device as a string, e.g.
760 "Titan V". Unlike `tf.config.PhysicalDevice.name`, this will be the same for
761 multiple devices if each device is the same model. Currently only available
762 for GPUs.
763 * `'compute_capability'`: The
764 [compute capability](https://developer.nvidia.com/cuda-gpus) of the device
765 as a tuple of two ints, in the form `(major_version, minor_version)`. Only
766 available for NVIDIA GPUs
768 Note: This is similar to `tf.sysconfig.get_build_info` in that both functions
769 can return information relating to GPUs. However, this function returns
770 run-time information about a specific device (such as a GPU's compute
771 capability), while `tf.sysconfig.get_build_info` returns compile-time
772 information about how TensorFlow was built (such as what version of CUDA
773 TensorFlow was built for).
775 Args:
776 device: A `tf.config.PhysicalDevice` returned by
777 `tf.config.list_physical_devices` or `tf.config.get_visible_devices`.
779 Returns:
780 A dict with string keys.
781 """
782 return context.context().get_device_details(device)
785@tf_export('config.get_logical_device_configuration',
786 'config.experimental.get_virtual_device_configuration')
787@deprecation.deprecated_endpoints(
788 'config.experimental.get_virtual_device_configuration')
789def get_logical_device_configuration(device):
790 """Get the virtual device configuration for a `tf.config.PhysicalDevice`.
792 Returns the list of `tf.config.LogicalDeviceConfiguration`
793 objects previously configured by a call to
794 `tf.config.set_logical_device_configuration`.
796 For example:
798 >>> physical_devices = tf.config.list_physical_devices('CPU')
799 >>> assert len(physical_devices) == 1, "No CPUs found"
800 >>> configs = tf.config.get_logical_device_configuration(
801 ... physical_devices[0])
802 >>> try:
803 ... assert configs is None
804 ... tf.config.set_logical_device_configuration(
805 ... physical_devices[0],
806 ... [tf.config.LogicalDeviceConfiguration(),
807 ... tf.config.LogicalDeviceConfiguration()])
808 ... configs = tf.config.get_logical_device_configuration(
809 ... physical_devices[0])
810 ... assert len(configs) == 2
811 ... except:
812 ... # Cannot modify virtual devices once initialized.
813 ... pass
815 Args:
816 device: `PhysicalDevice` to query
818 Returns:
819 List of `tf.config.LogicalDeviceConfiguration` objects or
820 `None` if no virtual device configuration has been set for this physical
821 device.
822 """
823 return context.context().get_logical_device_configuration(device)
826@tf_export('config.set_logical_device_configuration',
827 'config.experimental.set_virtual_device_configuration')
828@deprecation.deprecated_endpoints(
829 'config.experimental.set_virtual_device_configuration')
830def set_logical_device_configuration(device, logical_devices):
831 """Set the logical device configuration for a `tf.config.PhysicalDevice`.
833 A visible `tf.config.PhysicalDevice` will by default have a single
834 `tf.config.LogicalDevice` associated with it once the runtime is initialized.
835 Specifying a list of `tf.config.LogicalDeviceConfiguration` objects allows
836 multiple devices to be created on the same `tf.config.PhysicalDevice`.
838 Logical device configurations can be modified by calling this function as
839 long as the runtime is uninitialized. After the runtime is initialized
840 calling this function raises a RuntimeError.
842 The following example splits the CPU into 2 logical devices:
844 >>> physical_devices = tf.config.list_physical_devices('CPU')
845 >>> assert len(physical_devices) == 1, "No CPUs found"
846 >>> # Specify 2 virtual CPUs. Note currently memory limit is not supported.
847 >>> try:
848 ... tf.config.set_logical_device_configuration(
849 ... physical_devices[0],
850 ... [tf.config.LogicalDeviceConfiguration(),
851 ... tf.config.LogicalDeviceConfiguration()])
852 ... logical_devices = tf.config.list_logical_devices('CPU')
853 ... assert len(logical_devices) == 2
854 ...
855 ... tf.config.set_logical_device_configuration(
856 ... physical_devices[0],
857 ... [tf.config.LogicalDeviceConfiguration(),
858 ... tf.config.LogicalDeviceConfiguration(),
859 ... tf.config.LogicalDeviceConfiguration(),
860 ... tf.config.LogicalDeviceConfiguration()])
861 ... except:
862 ... # Cannot modify logical devices once initialized.
863 ... pass
865 The following example splits the GPU into 2 logical devices with 100 MB each:
867 >>> physical_devices = tf.config.list_physical_devices('GPU')
868 >>> try:
869 ... tf.config.set_logical_device_configuration(
870 ... physical_devices[0],
871 ... [tf.config.LogicalDeviceConfiguration(memory_limit=100),
872 ... tf.config.LogicalDeviceConfiguration(memory_limit=100)])
873 ...
874 ... logical_devices = tf.config.list_logical_devices('GPU')
875 ... assert len(logical_devices) == len(physical_devices) + 1
876 ...
877 ... tf.config.set_logical_device_configuration(
878 ... physical_devices[0],
879 ... [tf.config.LogicalDeviceConfiguration(memory_limit=10),
880 ... tf.config.LogicalDeviceConfiguration(memory_limit=10)])
881 ... except:
882 ... # Invalid device or cannot modify logical devices once initialized.
883 ... pass
885 Args:
886 device: The `PhysicalDevice` to configure.
887 logical_devices: (optional) List of `tf.config.LogicalDeviceConfiguration`
888 objects to allocate for the specified `PhysicalDevice`. If None, the
889 default configuration will be used.
891 Raises:
892 ValueError: If argument validation fails.
893 RuntimeError: Runtime is already initialized.
894 """
895 context.context().set_logical_device_configuration(device, logical_devices)
898@tf_export('config.experimental.enable_mlir_bridge')
899def enable_mlir_bridge():
900 """Enables experimental MLIR-Based TensorFlow Compiler Bridge.
902 TensorFlow Compiler Bridge (TF Bridge) is responsible for translating parts
903 of TensorFlow graph into a form that can be accepted as an input by a backend
904 compiler such as XLA.
905 """
906 context.context().enable_mlir_bridge = True
909@tf_export('config.experimental.disable_mlir_bridge')
910def disable_mlir_bridge():
911 """Disables experimental MLIR-Based TensorFlow Compiler Bridge."""
912 context.context().enable_mlir_bridge = False
915@tf_export('config.experimental.enable_op_determinism', v1=[])
916def enable_op_determinism():
917 """Configures TensorFlow ops to run deterministically.
919 When op determinism is enabled, TensorFlow ops will be deterministic. This
920 means that if an op is run multiple times with the same inputs on the same
921 hardware, it will have the exact same outputs each time. This is useful for
922 debugging models. Note that determinism in general comes at the expense of
923 lower performance and so your model may run slower when op determinism is
924 enabled.
926 If you want your TensorFlow program to run deterministically, put the
927 following code near the start of your program.
929 ```python
930 tf.keras.utils.set_random_seed(1)
931 tf.config.experimental.enable_op_determinism()
932 ```
934 Calling `tf.keras.utils.set_random_seed` sets the Python seed, the NumPy seed,
935 and the TensorFlow seed. Setting these seeds is necessary to ensure any random
936 numbers your program generates are also deterministic.
938 By default, op determinism is not enabled, so ops might return different
939 results when run with the same inputs. These differences are often caused by
940 the use of asynchronous threads within the op nondeterministically changing
941 the order in which floating-point numbers are added. Most of these cases of
942 nondeterminism occur on GPUs, which have thousands of hardware threads that
943 are used to run ops. Enabling determinism directs such ops to use a different
944 algorithm, one that does not use threads in a nondeterministic way.
946 Another potential source of nondeterminism is `tf.data` based data processing.
947 Typically, this can introduce nondeterminsm due to the use of parallelism in
948 methods such as `Dataset.map` producing inputs or running stateful ops in a
949 nondeterministic order. Enabling determinism will remove such sources of
950 nondeterminism.
952 Enabling determinism will likely make your model or your `tf.data` data
953 processing slower. For example, `Dataset.map` can become several orders of
954 magnitude slower when the map function has random ops or other stateful ops.
955 See the “Determinism and tf.data” section below for more details. In future
956 TensorFlow releases, we plan on improving the performance of determinism,
957 especially for common scenarios such as `Dataset.map`.
959 Certain ops will raise an `UnimplementedError` because they do not yet have a
960 deterministic implementation. Additionally, due to bugs, some ops might be
961 nondeterministic and not raise an `UnimplementedError`. If you encounter such
962 ops, please [file an issue](https://github.com/tensorflow/tensorflow/issues).
964 An example of enabling determinism follows. The
965 `tf.nn.softmax_cross_entropy_with_logits` op is run multiple times and the
966 output is shown to be the same each time. This example would likely fail when
967 run on a GPU if determinism were not enabled, because
968 `tf.nn.softmax_cross_entropy_with_logits` uses a nondeterministic algorithm on
969 GPUs by default.
971 ```python
972 labels = tf.random.normal((1, 10000))
973 logits = tf.random.normal((1, 10000))
974 output = tf.nn.softmax_cross_entropy_with_logits(labels=labels,
975 logits=logits)
976 for _ in range(5):
977 output2 = tf.nn.softmax_cross_entropy_with_logits(labels=labels,
978 logits=logits)
979 tf.debugging.assert_equal(output, output2)
980 ```
982 ## Writing deterministic models
984 You can make your models deterministic by enabling op determinism. This
985 means that you can train a model and finish each run with exactly the same
986 trainable variables. This also means that the inferences of your
987 previously-trained model will be exactly the same on each run. Typically,
988 models can be made deterministic by simply setting the seeds and enabling
989 op determinism, as in the example above. However, to guarantee that your
990 model operates deterministically, you must meet all the following
991 requirements:
993 * Call `tf.config.experimental.enable_op_determinism()`, as mentioned above.
994 * Reproducibly reset any pseudorandom number generators (PRNGs) you’re using,
995 such as by setting the seeds for the default PRNGs in TensorFlow, Python,
996 and NumPy, as mentioned above. Note that certain newer NumPy classes like
997 ` numpy.random.default_rng` ignore the global NumPy seed, so a seed must be
998 explicitly passed to such classes, if used.
999 * Use the same hardware configuration in every run.
1000 * Use the same software environment in every run (OS, checkpoints, version of
1001 CUDA and TensorFlow, environmental variables, etc). Note that determinism is
1002 not guaranteed across different versions of TensorFlow.
1003 * Do not use constructs outside TensorFlow that are nondeterministic, such as
1004 reading from `/dev/random` or using multiple threads/processes in ways that
1005 influence TensorFlow’s behavior.
1006 * Ensure your input pipeline is deterministic. If you use `tf.data`, this is
1007 done automatically (at the expense of performance). See "Determinism and
1008 tf.data" below for more information.
1009 * Do not use `tf.compat.v1.Session` and
1010 `tf.distribute.experimental.ParameterServerStrategy`, which can introduce
1011 nondeterminism. Besides ops (including `tf.data` ops), these are the only
1012 known potential sources of nondeterminism within TensorFlow, (if you
1013 find more, please file an issue). Note that `tf.compat.v1.Session` is
1014 required to use the TF1 API, so determinism cannot be guaranteed when using
1015 the TF1 API.
1016 * Do not use nondeterministic custom ops.
1018 ## Additional details on determinism
1020 For stateful ops to be deterministic, the state of the system must be the same
1021 every time the op is run. For example the output of `tf.Variable.sparse_read`
1022 (obviously) depends on both the variable value and the `indices` function
1023 parameter. When determinism is enabled, the side effects of stateful ops are
1024 deterministic.
1026 TensorFlow’s random ops, such as `tf.random.normal`, will raise a
1027 `RuntimeError` if determinism is enabled and a seed has not been set. However,
1028 attempting to generate nondeterministic random numbers using Python or NumPy
1029 will not raise such errors. Make sure you remember to set the Python and NumPy
1030 seeds. Calling `tf.keras.utils.set_random_seed` is an easy way to set all
1031 three seeds.
1033 Note that latency, memory consumption, throughput, and other performance
1034 characteristics are *not* made deterministic by enabling op determinism.
1035 Only op outputs and side effects are made deterministic. Additionally, a model
1036 may nondeterministically raise a `tf.errors.ResourceExhaustedError` from a
1037 lack of memory due to the fact that memory consumption is nondeterministic.
1039 ## Determinism and tf.data
1041 Enabling deterministic ops makes `tf.data` deterministic in several ways:
1043 1. For dataset methods with a `deterministic` argument, such as `Dataset.map`
1044 and `Dataset.batch`, the `deterministic` argument is overridden to be
1045 `True` irrespective of its setting.
1046 2. The `tf.data.Option.experimental_deterministic` option is overridden to be
1047 `True` irrespective of its setting..
1048 3. In `Dataset.map` and `Dataset.interleave`, if the map or interleave
1049 function has stateful random ops or other stateful ops, the function will
1050 run serially instead of in parallel. This means the `num_parallel_calls`
1051 argument to `map` and `interleave` is effectively ignored.
1052 4. Prefetching with `Dataset.prefetch` will be disabled if any function run
1053 as part of the input pipeline has certain stateful ops. Similarly, any
1054 dataset method with a `num_parallel_calls` argument will be made to run
1055 serially if any function in the input pipeline has such stateful ops.
1056 Legacy random ops such as `tf.random.normal` will *not* cause such datasets
1057 to be changed, but most other stateful ops will.
1059 Unfortunately, due to (3), performance can be greatly reduced when stateful
1060 ops are used in `Dataset.map` due to no longer running the map function in
1061 parallel. A common example of stateful ops used in `Dataset.map` are random
1062 ops, such as `tf.random.normal`, which are typically used for distortions. One
1063 way to work around this is to use stateless random ops instead. Alternatively
1064 you can hoist all random ops into its own separate `Dataset.map` call, making
1065 the original `Dataset.map` call stateless and thus avoid the need to serialize
1066 its execution.
1068 (4) can also cause performance to be reduced, but occurs less frequently than
1069 (3) because legacy random ops do not cause (4) to take effect. However, unlike
1070 (3), when there are non-random stateful ops in a user-defined function, every
1071 `map` and `interleave` dataset is affected, instead of just the `map` or
1072 `interleave` dataset with the function that has stateful ops. Additionally,
1073 `prefetch` datasets and any dataset with the `num_parallel_calls` argument are
1074 also affected.
1075 """
1076 _pywrap_determinism.enable(True)
1079def disable_op_determinism():
1080 """Disables op determinism."""
1081 _pywrap_determinism.enable(False)
1084def is_op_determinism_enabled():
1085 """Returns True if op determinism is enabled."""
1086 return _pywrap_determinism.is_enabled()