Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/framework/config.py: 58%

3# Licensed under the Apache License, Version 2.0 (the "License");

4# you may not use this file except in compliance with the License.

5# You may obtain a copy of the License at

7# http://www.apache.org/licenses/LICENSE-2.0

9# Unless required by applicable law or agreed to in writing, software

10# distributed under the License is distributed on an "AS IS" BASIS,

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12# See the License for the specific language governing permissions and

13# limitations under the License.

14# ==============================================================================

15"""Functions for configuring TensorFlow execution."""

17from typing import Union

19from tensorflow.python.eager import context

20from tensorflow.python.framework import errors

21from tensorflow.python.util import _pywrap_determinism

22from tensorflow.python.util import _pywrap_tensor_float_32_execution

23from tensorflow.python.util import deprecation

24from tensorflow.python.util.tf_export import tf_export

27@tf_export('config.experimental.tensor_float_32_execution_enabled')

28def tensor_float_32_execution_enabled():

29 """Returns whether TensorFloat-32 is enabled.

31 By default, TensorFloat-32 is enabled, but this can be changed with

32 `tf.config.experimental.enable_tensor_float_32_execution`.

34 Returns:

35 True if TensorFloat-32 is enabled (the default) and False otherwise

36 """

37 return _pywrap_tensor_float_32_execution.is_enabled()

40@tf_export('config.experimental.enable_tensor_float_32_execution')

41def enable_tensor_float_32_execution(enabled):

42 """Enable or disable the use of TensorFloat-32 on supported hardware.

44 [TensorFloat-32](https://blogs.nvidia.com/blog/2020/05/14/tensorfloat-32-precision-format),

45 or TF32 for short, is a math mode for NVIDIA Ampere GPUs and above.

46 TensorFloat-32 execution causes certain float32 ops, such as matrix

47 multiplications and convolutions, to run much faster on such GPUs but with

48 reduced precision. This reduced precision should not impact convergence of

49 deep learning models in practice.

51 TensorFloat-32 is enabled by default. TensorFloat-32 is only supported on

52 NVIDIA GPUs starting with the Ampere generation, so older NVIDIA GPUs and

53 other hardware will use the full float32 precision regardless of whether

54 TensorFloat-32 is enabled or not. If you want to use the full float32

55 precision on all GPUs, you can disable TensorFloat-32 execution with this

56 function. For example:

58 ```python

59 x = tf.fill((1024, 1024), 1.0001)

60 y = tf.fill((1024, 1024), 1.)

61 # TensorFloat-32 is enabled, so matmul is run with reduced precision

62 print(tf.linalg.matmul(x, y)[0, 0]) # 1024.0

63 tf.config.experimental.enable_tensor_float_32_execution(False)

64 # Matmul is run with full precision

65 print(tf.linalg.matmul(x, y)[0, 0]) # ~1024.1

66 ```

68 To check whether TensorFloat-32 execution is currently enabled, use

69 `tf.config.experimental.tensor_float_32_execution_enabled`.

71 If TensorFloat-32 is enabled, float32 inputs of supported ops, such as

72 `tf.linalg.matmul`, will be rounded from 23 bits of precision to 10 bits of

73 precision in most cases. This allows the ops to execute much faster by

74 utilizing the GPU's tensor cores. TensorFloat-32 has the same dynamic range as

75 float32, meaning it is no more likely to underflow or overflow than float32.

76 Ops still use float32 accumulation when TensorFloat-32 is enabled. Enabling or

77 disabling TensorFloat-32 only affects Ampere GPUs and above.

79 Note TensorFloat-32 is not always used in supported ops, as only inputs of

80 certain shapes are supported. Support for more input shapes and more ops may

81 be added in the future. As a result, precision of float32 ops may decrease in

82 minor versions of TensorFlow.

84 TensorFloat-32 is also used for some complex64 ops. Currently, TensorFloat-32

85 is used in fewer cases for complex64 as it is for float32.

87 Args:

88 enabled: Bool indicating whether to enable TensorFloat-32 execution.

89 """

90 _pywrap_tensor_float_32_execution.enable(enabled)

93@tf_export('config.threading.get_intra_op_parallelism_threads')

94def get_intra_op_parallelism_threads():

95 """Get number of threads used within an individual op for parallelism.

97 Certain operations like matrix multiplication and reductions can utilize

98 parallel threads for speed ups. A value of 0 means the system picks an

99 appropriate number.

100

101 Returns:

102 Number of parallel threads

103 """

104 return context.context().intra_op_parallelism_threads

105

106

107@tf_export('config.threading.set_intra_op_parallelism_threads')

108def set_intra_op_parallelism_threads(num_threads):

109 """Set number of threads used within an individual op for parallelism.

110

111 Certain operations like matrix multiplication and reductions can utilize

112 parallel threads for speed ups. A value of 0 means the system picks an

113 appropriate number.

114

115 Args:

116 num_threads: Number of parallel threads

117 """

118 context.context().intra_op_parallelism_threads = num_threads

119

120

121@tf_export('config.threading.get_inter_op_parallelism_threads')

122def get_inter_op_parallelism_threads():

123 """Get number of threads used for parallelism between independent operations.

124

125 Determines the number of threads used by independent non-blocking operations.

126 0 means the system picks an appropriate number.

127

128 Returns:

129 Number of parallel threads

130 """

131 return context.context().inter_op_parallelism_threads

132

133

134@tf_export('config.threading.set_inter_op_parallelism_threads')

135def set_inter_op_parallelism_threads(num_threads):

136 """Set number of threads used for parallelism between independent operations.

137

138 Determines the number of threads used by independent non-blocking operations.

139 0 means the system picks an appropriate number.

140

141 Args:

142 num_threads: Number of parallel threads

143 """

144 context.context().inter_op_parallelism_threads = num_threads

145

146

147@tf_export('config.optimizer.get_jit')

148def get_optimizer_jit() -> str:

149 """Returns JIT compilation configuration for code inside `tf.function`.

150

151 Possible return values:

152 -`"autoclustering"` if

153 [autoclustering](https://www.tensorflow.org/xla#auto-clustering) is enabled

154 - `""` when no default compilation is applied.

155 """

156 if context.context().optimizer_jit:

157 return 'autoclustering'

158 return ''

159

160

161@tf_export('config.optimizer.set_jit')

162@deprecation.deprecated_arg_values(

163 None,

164 '`True` setting is deprecated, use `autoclustering` instead.',

165 warn_once=True,

166 jit_config=True)

167def set_optimizer_jit(enabled: Union[bool, str]):

168 """Configure JIT compilation.

169

170 Note: compilation is only applied to code that is compiled into a

171 graph (in TF2 that's only a code inside `tf.function`).

172

173 Args:

174 enabled: JIT compilation configuration.

175 Possible values:

176 - `"autoclustering"` (`True` is a deprecated alias): perform

177 [autoclustering](https://www.tensorflow.org/xla#auto-clustering)

178 (automatically identify and compile clusters of nodes) on all graphs

179 using

180 [XLA](https://www.tensorflow.org/xla).

181 - `False`: do not automatically compile any graphs.

182 """

183 autoclustering_enabled = enabled in (True, 'autoclustering')

184 context.context().optimizer_jit = autoclustering_enabled

185

186

187@tf_export('config.optimizer.get_experimental_options')

188def get_optimizer_experimental_options():

189 """Get experimental optimizer options.

190

191 Refer to tf.config.optimizer.set_experimental_options for a list of current

192 options.

193

194 Note that optimizations are only applied in graph mode, (within tf.function).

195 In addition, as these are experimental options, the list is subject to change.

196

197 Returns:

198 Dictionary of configured experimental optimizer options

199 """

200 return context.context().get_optimizer_experimental_options()

201

202

203@tf_export('config.optimizer.set_experimental_options')

204def set_optimizer_experimental_options(options):

205 """Set experimental optimizer options.

206

207 Note that optimizations are only applied in graph mode, (within tf.function).

208 In addition, as these are experimental options, the list is subject to change.

209

210 Args:

211 options: Dictionary of experimental optimizer options to configure.

212 Valid keys:

213 - layout_optimizer: Optimize tensor layouts e.g. This will try to use NCHW

214 layout on GPU which is faster.

215 - constant_folding: Fold constants Statically infer the value of tensors

216 when possible, and materialize the result using constants.

217 - shape_optimization: Simplify computations made on shapes.

218 - remapping: Remap subgraphs onto more efficient implementations.

219 - arithmetic_optimization: Simplify arithmetic ops with common

220 sub-expression elimination and arithmetic simplification.

221 - dependency_optimization: Control dependency optimizations. Remove

222 redundant control dependencies, which may enable other optimization.

223 This optimizer is also essential for pruning Identity and NoOp nodes.

224 - loop_optimization: Loop optimizations.

225 - function_optimization: Function optimizations and inlining.

226 - debug_stripper: Strips debug-related nodes from the graph.

227 - disable_model_pruning: Disable removal of unnecessary ops from the graph

228 - scoped_allocator_optimization: Try to allocate some independent Op

229 outputs contiguously in order to merge or eliminate downstream Ops.

230 - pin_to_host_optimization: Force small ops onto the CPU.

231 - implementation_selector: Enable the swap of kernel implementations based

232 on the device placement.

233 - auto_mixed_precision: Change certain float32 ops to float16 on Volta

234 GPUs and above. Without the use of loss scaling, this can cause

235 numerical underflow (see

236 `keras.mixed_precision.experimental.LossScaleOptimizer`).

237 - disable_meta_optimizer: Disable the entire meta optimizer.

238 - min_graph_nodes: The minimum number of nodes in a graph to optimizer.

239 For smaller graphs, optimization is skipped.

240 - auto_parallel: Automatically parallelizes graphs by splitting along

241 the batch dimension

242 """

243 context.context().set_optimizer_experimental_options(options)

244

245

246@tf_export('config.get_soft_device_placement')

247def get_soft_device_placement():

248 """Return status of soft device placement flag.

249

250 If enabled, ops can be placed on different devices than the device explicitly

251 assigned by the user. This potentially has a large performance cost due to an

252 increase in data communication between devices.

253

254 Some cases where soft_device_placement would modify device assignment are:

255 1. no GPU/TPU implementation for the OP

256 2. no GPU devices are known or registered

257 3. need to co-locate with reftype input(s) which are from CPU

258 4. an OP can not be compiled by XLA. Common for TPU which always requires

259 the XLA compiler.

260

261 For TPUs, if this option is true, a feature called automatic outside

262 compilation is enabled. Automatic outside compilation will move uncompilable

263 ops within a TPU program to instead run on the host. This can be used when

264 encountering compilation failures due to unsupported ops.

265

266 Returns:

267 A boolean indicating if soft placement is enabled.

268 """

269 return context.context().soft_device_placement

270

271

272@tf_export('config.set_soft_device_placement')

273def set_soft_device_placement(enabled):

274 """Enable or disable soft device placement.

275

276 If enabled, ops can be placed on different devices than the device explicitly

277 assigned by the user. This potentially has a large performance cost due to an

278 increase in data communication between devices.

279

280 Some cases where soft_device_placement would modify device assignment are:

281 1. no GPU/TPU implementation for the OP

282 2. no GPU devices are known or registered

283 3. need to co-locate with reftype input(s) which are from CPU

284 4. an OP can not be compiled by XLA. Common for TPU which always requires

285 the XLA compiler.

286

287 For TPUs, if this option is true, a feature called automatic outside

288 compilation is enabled. Automatic outside compilation will move uncompilable

289 ops within a TPU program to instead run on the host. This can be used when

290 encountering compilation failures due to unsupported ops.

291

292 Note: by default soft device placement is enabled when running in eager mode

293 (for convenience) and disabled in graph mode (for performance).

294

295 Args:

296 enabled: A boolean indicating whether to enable soft placement.

297 """

298 context.context().soft_device_placement = enabled

299

300

301@tf_export('config.experimental.get_device_policy')

302def get_device_policy():

303 """Gets the current device policy.

304

305 The device policy controls how operations requiring inputs on a specific

306 device (e.g., on GPU:0) handle inputs on a different device (e.g. GPU:1).

307

308 This function only gets the device policy for the current thread. Any

309 subsequently started thread will again use the default policy.

310

311 Returns:

312 Current thread device policy

313 """

314 device_policy = context.context().device_policy

315 if device_policy == context.DEVICE_PLACEMENT_SILENT:

316 return 'silent'

317 elif device_policy == context.DEVICE_PLACEMENT_SILENT_FOR_INT32:

318 return 'silent_for_int32'

319 elif device_policy == context.DEVICE_PLACEMENT_WARN:

320 return 'warn'

321 elif device_policy == context.DEVICE_PLACEMENT_EXPLICIT:

322 return 'explicit'

323 else:

324 # pylint: disable-next=no-value-for-parameter

325 raise errors.InternalError(

326 f'Got an invalid device policy: {device_policy!r}.')

327

328

329@tf_export('config.experimental.set_device_policy')

330def set_device_policy(device_policy):

331 """Sets the current thread device policy.

332

333 The device policy controls how operations requiring inputs on a specific

334 device (e.g., on GPU:0) handle inputs on a different device (e.g. GPU:1).

335

336 When using the default, an appropriate policy will be picked automatically.

337 The default policy may change over time.

338

339 This function only sets the device policy for the current thread. Any

340 subsequently started thread will again use the default policy.

341

342 Args:

343 device_policy: A device policy.

344 Valid values:

345 - None: Switch to a system default.

346 - 'warn': Copies the tensors which are not on the right device and logs a

347 warning.

348 - 'explicit': Raises an error if the placement is not as required.

349 - 'silent': Silently copies the tensors. Note that this may hide

350 performance problems as there is no notification provided when

351 operations are blocked on the tensor being copied between devices.

352 - 'silent_for_int32': silently copies `int32` tensors, raising errors on

353 the other ones.

354

355 Raises:

356 ValueError: If an invalid `device_policy` is passed.

357 """

358 if device_policy == 'silent':

359 context.context().device_policy = context.DEVICE_PLACEMENT_SILENT

360 elif device_policy == 'silent_for_int32':

361 context.context().device_policy = context.DEVICE_PLACEMENT_SILENT_FOR_INT32

362 elif device_policy == 'warn':

363 context.context().device_policy = context.DEVICE_PLACEMENT_WARN

364 elif device_policy == 'explicit':

365 context.context().device_policy = context.DEVICE_PLACEMENT_EXPLICIT

366 elif device_policy is None:

367 context.context().device_policy = None

368 else:

369 raise ValueError(

370 f'Invalid argument `device_policy`: {device_policy!r}. Please refer to '

371 'https://www.tensorflow.org/api_docs/python/tf/config/experimental/set_device_policy '

372 'for valid `device_policy` arguments.')

373

374

375@tf_export('config.experimental.get_synchronous_execution')

376def get_synchronous_execution():

377 """Gets whether operations are executed synchronously or asynchronously.

378

379 TensorFlow can execute operations synchronously or asynchronously. If

380 asynchronous execution is enabled, operations may return "non-ready" handles.

381

382 Returns:

383 Current thread execution mode

384 """

385 return context.context().execution_mode == context.SYNC

386

387

388@tf_export('config.experimental.set_synchronous_execution')

389def set_synchronous_execution(enable):

390 """Specifies whether operations are executed synchronously or asynchronously.

391

392 TensorFlow can execute operations synchronously or asynchronously. If

393 asynchronous execution is enabled, operations may return "non-ready" handles.

394

395 When `enable` is set to None, an appropriate value will be picked

396 automatically. The value picked may change between TensorFlow releases.

397

398 Args:

399 enable: Whether operations should be dispatched synchronously.

400 Valid values:

401 - None: sets the system default.

402 - True: executes each operation synchronously.

403 - False: executes each operation asynchronously.

404 """

405 if enable is None:

406 context.context().execution_mode = None

407 elif enable:

408 context.context().execution_mode = context.SYNC

409 else:

410 context.context().execution_mode = context.ASYNC

411

412

413@tf_export('config.list_physical_devices',

414 'config.experimental.list_physical_devices')

415@deprecation.deprecated_endpoints('config.experimental.list_physical_devices')

416def list_physical_devices(device_type=None):

417 """Return a list of physical devices visible to the host runtime.

418

419 Physical devices are hardware devices present on the host machine. By default

420 all discovered CPU and GPU devices are considered visible.

421

422 This API allows querying the physical hardware resources prior to runtime

423 initialization. Thus, giving an opportunity to call any additional

424 configuration APIs. This is in contrast to `tf.config.list_logical_devices`,

425 which triggers runtime initialization in order to list the configured devices.

426

427 The following example lists the number of visible GPUs on the host.

428

429 >>> physical_devices = tf.config.list_physical_devices('GPU')

430 >>> print("Num GPUs:", len(physical_devices))

431 Num GPUs: ...

432

433 However, the number of GPUs available to the runtime may change during runtime

434 initialization due to marking certain devices as not visible or configuring

435 multiple logical devices.

436

437 Args:

438 device_type: (optional string) Only include devices matching this device

439 type. For example "CPU" or "GPU".

440

441 Returns:

442 List of discovered `tf.config.PhysicalDevice` objects

443 """

444 return context.context().list_physical_devices(device_type)

445

446

447@tf_export('config.list_logical_devices',

448 'config.experimental.list_logical_devices')

449@deprecation.deprecated_endpoints('config.experimental.list_logical_devices')

450def list_logical_devices(device_type=None):

451 """Return a list of logical devices created by runtime.

452

453 Logical devices may correspond to physical devices or remote devices in the

454 cluster. Operations and tensors may be placed on these devices by using the

455 `name` of the `tf.config.LogicalDevice`.

456

457 Calling `tf.config.list_logical_devices` triggers the runtime to configure any

458 `tf.config.PhysicalDevice` visible to the runtime, thereby preventing

459 further configuration. To avoid runtime initialization, call

460 `tf.config.list_physical_devices` instead.

461

462 For example:

463

464 >>> logical_devices = tf.config.list_logical_devices('GPU')

465 >>> if len(logical_devices) > 0:

466 ... # Allocate on GPU:0

467 ... with tf.device(logical_devices[0].name):

468 ... one = tf.constant(1)

469 ... # Allocate on GPU:1

470 ... with tf.device(logical_devices[1].name):

471 ... two = tf.constant(2)

472

473 Args:

474 device_type: (optional string) Only include devices matching this device

475 type. For example "CPU" or "GPU".

476

477 Returns:

478 List of initialized `LogicalDevice`s

479 """

480 return context.context().list_logical_devices(device_type=device_type)

481

482

483@tf_export('config.get_visible_devices',

484 'config.experimental.get_visible_devices')

485@deprecation.deprecated_endpoints('config.experimental.get_visible_devices')

486def get_visible_devices(device_type=None):

487 """Get the list of visible physical devices.

488

489 Returns the list of `PhysicalDevice`s currently marked as visible to the

490 runtime. A visible device will have at least one `LogicalDevice` associated

491 with it once the runtime is initialized.

492

493 The following example verifies all visible GPUs have been disabled:

494

495 >>> physical_devices = tf.config.list_physical_devices('GPU')

496 >>> try:

497 ... # Disable all GPUS

498 ... tf.config.set_visible_devices([], 'GPU')

499 ... visible_devices = tf.config.get_visible_devices()

500 ... for device in visible_devices:

501 ... assert device.device_type != 'GPU'

502 ... except:

503 ... # Invalid device or cannot modify virtual devices once initialized.

504 ... pass

505

506 Args:

507 device_type: (optional string) Only include devices matching this device

508 type. For example "CPU" or "GPU".

509

510 Returns:

511 List of visible `PhysicalDevice`s

512 """

513 return context.context().get_visible_devices(device_type)

514

515

516@tf_export('config.set_visible_devices',

517 'config.experimental.set_visible_devices')

518@deprecation.deprecated_endpoints('config.experimental.set_visible_devices')

519def set_visible_devices(devices, device_type=None):

520 """Set the list of visible devices.

521

522 Specifies which `PhysicalDevice` objects are visible to the runtime.

523 TensorFlow will only allocate memory and place operations on visible

524 physical devices, as otherwise no `LogicalDevice` will be created on them.

525 By default all discovered devices are marked as visible.

526

527 The following example demonstrates disabling the first GPU on the machine.

528

529 >>> physical_devices = tf.config.list_physical_devices('GPU')

530 >>> try:

531 ... # Disable first GPU

532 ... tf.config.set_visible_devices(physical_devices[1:], 'GPU')

533 ... logical_devices = tf.config.list_logical_devices('GPU')

534 ... # Logical device was not created for first GPU

535 ... assert len(logical_devices) == len(physical_devices) - 1

536 ... except:

537 ... # Invalid device or cannot modify virtual devices once initialized.

538 ... pass

539

540 Args:

541 devices: List of `PhysicalDevice`s to make visible

542 device_type: (optional) Only configure devices matching this device type.

543 For example "CPU" or "GPU". Other devices will be left unaltered.

544

545 Raises:

546 ValueError: If argument validation fails.

547 RuntimeError: Runtime is already initialized.

548 """

549 context.context().set_visible_devices(devices, device_type)

550

551

552# TODO(b/188089869): Redesign memory stats related APIs before move them out of

553# experimental.

554@tf_export('config.experimental.get_memory_info')

555def get_memory_info(device):

556 """Get memory info for the chosen device, as a dict.

557

558 This function returns a dict containing information about the device's memory

559 usage. For example:

560

561 >>> if tf.config.list_physical_devices('GPU'):

562 ... # Returns a dict in the form {'current': <current mem usage>,

563 ... # 'peak': <peak mem usage>}

564 ... tf.config.experimental.get_memory_info('GPU:0')

565

566 Currently returns the following keys:

567 - `'current'`: The current memory used by the device, in bytes.

568 - `'peak'`: The peak memory used by the device across the run of the

569 program, in bytes. Can be reset with

570 `tf.config.experimental.reset_memory_stats`.

571

572 More keys may be added in the future, including device-specific keys.

573

574 Currently only supports GPU and TPU. If called on a CPU device, an exception

575 will be raised.

576

577 For GPUs, TensorFlow will allocate all the memory by default, unless changed

578 with `tf.config.experimental.set_memory_growth`. The dict specifies only the

579 current and peak memory that TensorFlow is actually using, not the memory that

580 TensorFlow has allocated on the GPU.

581

582 Args:

583 device: Device string to get the memory information for, e.g. `"GPU:0"`,

584 `"TPU:0"`. See https://www.tensorflow.org/api_docs/python/tf/device for

585 specifying device strings.

586

587 Returns:

588 A dict with keys `'current'` and `'peak'`, specifying the current and peak

589 memory usage respectively.

590

591 Raises:

592 ValueError: No device found with the device name, like '"nonexistent"'.

593 ValueError: Invalid device name, like '"GPU"', '"CPU:GPU"', '"CPU:"'.

594 ValueError: Multiple devices matched with the device name.

595 ValueError: Memory statistics not tracked, like '"CPU:0"'.

596 """

597 return context.context().get_memory_info(device)

598

599

600# TODO(b/188089869): Redesign memory stats related APIs before move them out of

601# experimental.

602# TODO(b/189498350): Unify the behavior on CPU, GPU and TPU.

603@tf_export('config.experimental.reset_memory_stats')

604def reset_memory_stats(device):

605 """Resets the tracked memory stats for the chosen device.

606

607 This function sets the tracked peak memory for a device to the device's

608 current memory usage. This allows you to measure the peak memory usage for a

609 specific part of your program. For example:

610

611 >>> if tf.config.list_physical_devices('GPU'):

612 ... # Sets the peak memory to the current memory.

613 ... tf.config.experimental.reset_memory_stats('GPU:0')

614 ... # Creates the first peak memory usage.

615 ... x1 = tf.ones(1000 * 1000, dtype=tf.float64)

616 ... del x1 # Frees the memory referenced by `x1`.

617 ... peak1 = tf.config.experimental.get_memory_info('GPU:0')['peak']

618 ... # Sets the peak memory to the current memory again.

619 ... tf.config.experimental.reset_memory_stats('GPU:0')

620 ... # Creates the second peak memory usage.

621 ... x2 = tf.ones(1000 * 1000, dtype=tf.float32)

622 ... del x2

623 ... peak2 = tf.config.experimental.get_memory_info('GPU:0')['peak']

624 ... assert peak2 < peak1 # tf.float32 consumes less memory than tf.float64.

625

626 Currently only supports GPU and TPU. If called on a CPU device, an exception

627 will be raised.

628

629 Args:

630 device: Device string to reset the memory stats, e.g. `"GPU:0"`, `"TPU:0"`.

631 See https://www.tensorflow.org/api_docs/python/tf/device for specifying

632 device strings.

633

634 Raises:

635 ValueError: No device found with the device name, like '"nonexistent"'.

636 ValueError: Invalid device name, like '"GPU"', '"CPU:GPU"', '"CPU:"'.

637 ValueError: Multiple devices matched with the device name.

638 ValueError: Memory statistics not tracked or clearing memory statistics not

639 supported, like '"CPU:0"'.

640 """

641 context.context().reset_memory_stats(device)

642

643

644@deprecation.deprecated(

645 None,

646 "Use tf.config.experimental.get_memory_info(device)['current'] instead.")

647@tf_export('config.experimental.get_memory_usage')

648def get_memory_usage(device):

649 """Get the current memory usage, in bytes, for the chosen device.

650

651 This function is deprecated in favor of

652 `tf.config.experimental.get_memory_info`. Calling this function is equivalent

653 to calling `tf.config.experimental.get_memory_info()['current']`.

654

655 See https://www.tensorflow.org/api_docs/python/tf/device for specifying device

656 strings.

657

658 For example:

659

660 >>> gpu_devices = tf.config.list_physical_devices('GPU')

661 >>> if gpu_devices:

662 ... tf.config.experimental.get_memory_usage('GPU:0')

663

664 Does not work for CPU.

665

666 For GPUs, TensorFlow will allocate all the memory by default, unless changed

667 with `tf.config.experimental.set_memory_growth`. This function only returns

668 the memory that TensorFlow is actually using, not the memory that TensorFlow

669 has allocated on the GPU.

670

671 Args:

672 device: Device string to get the bytes in use for, e.g. `"GPU:0"`

673

674 Returns:

675 Total memory usage in bytes.

676

677 Raises:

678 ValueError: Non-existent or CPU device specified.

679 """

680 return get_memory_info(device)['current']

681

682

683@tf_export('config.experimental.get_memory_growth')

684def get_memory_growth(device):

685 """Get if memory growth is enabled for a `PhysicalDevice`.

686

687 If memory growth is enabled for a `PhysicalDevice`, the runtime initialization

688 will not allocate all memory on the device.

689

690 For example:

691

692 >>> physical_devices = tf.config.list_physical_devices('GPU')

693 >>> try:

694 ... tf.config.experimental.set_memory_growth(physical_devices[0], True)

695 ... assert tf.config.experimental.get_memory_growth(physical_devices[0])

696 ... except:

697 ... # Invalid device or cannot modify virtual devices once initialized.

698 ... pass

699

700 Args:

701 device: `PhysicalDevice` to query

702

703 Returns:

704 A boolean indicating the memory growth setting for the `PhysicalDevice`.

705

706 Raises:

707 ValueError: Invalid `PhysicalDevice` specified.

708 """

709 return context.context().get_memory_growth(device)

710

711

712@tf_export('config.experimental.set_memory_growth')

713def set_memory_growth(device, enable):

714 """Set if memory growth should be enabled for a `PhysicalDevice`.

715

716 If memory growth is enabled for a `PhysicalDevice`, the runtime initialization

717 will not allocate all memory on the device. Memory growth cannot be configured

718 on a `PhysicalDevice` with virtual devices configured.

719

720 For example:

721

722 >>> physical_devices = tf.config.list_physical_devices('GPU')

723 >>> try:

724 ... tf.config.experimental.set_memory_growth(physical_devices[0], True)

725 ... except:

726 ... # Invalid device or cannot modify virtual devices once initialized.

727 ... pass

728

729 Args:

730 device: `PhysicalDevice` to configure

731 enable: (Boolean) Whether to enable or disable memory growth

732

733 Raises:

734 ValueError: Invalid `PhysicalDevice` specified.

735 RuntimeError: Runtime is already initialized.

736 """

737 context.context().set_memory_growth(device, enable)

738

739

740@tf_export('config.experimental.get_device_details')

741def get_device_details(device):

742 """Returns details about a physical devices.

743

744 This API takes in a `tf.config.PhysicalDevice` returned by

745 `tf.config.list_physical_devices`. It returns a dict with string keys

746 containing various details about the device. Each key is only supported by a

747 subset of devices, so you should not assume the returned dict will have any

748 particular key.

749

750 >>> gpu_devices = tf.config.list_physical_devices('GPU')

751 >>> if gpu_devices:

752 ... details = tf.config.experimental.get_device_details(gpu_devices[0])

753 ... details.get('device_name', 'Unknown GPU')

754

755 Currently, details are only returned for GPUs. This function returns an

756 empty dict if passed a non-GPU device.

757

758 The returned dict may have the following keys:

759 * `'device_name'`: A human-readable name of the device as a string, e.g.

760 "Titan V". Unlike `tf.config.PhysicalDevice.name`, this will be the same for

761 multiple devices if each device is the same model. Currently only available

762 for GPUs.

763 * `'compute_capability'`: The

764 [compute capability](https://developer.nvidia.com/cuda-gpus) of the device

765 as a tuple of two ints, in the form `(major_version, minor_version)`. Only

766 available for NVIDIA GPUs

767

768 Note: This is similar to `tf.sysconfig.get_build_info` in that both functions

769 can return information relating to GPUs. However, this function returns

770 run-time information about a specific device (such as a GPU's compute

771 capability), while `tf.sysconfig.get_build_info` returns compile-time

772 information about how TensorFlow was built (such as what version of CUDA

773 TensorFlow was built for).

774

775 Args:

776 device: A `tf.config.PhysicalDevice` returned by

777 `tf.config.list_physical_devices` or `tf.config.get_visible_devices`.

778

779 Returns:

780 A dict with string keys.

781 """

782 return context.context().get_device_details(device)

783

784

785@tf_export('config.get_logical_device_configuration',

786 'config.experimental.get_virtual_device_configuration')

787@deprecation.deprecated_endpoints(

788 'config.experimental.get_virtual_device_configuration')

789def get_logical_device_configuration(device):

790 """Get the virtual device configuration for a `tf.config.PhysicalDevice`.

791

792 Returns the list of `tf.config.LogicalDeviceConfiguration`

793 objects previously configured by a call to

794 `tf.config.set_logical_device_configuration`.

795

796 For example:

797

798 >>> physical_devices = tf.config.list_physical_devices('CPU')

799 >>> assert len(physical_devices) == 1, "No CPUs found"

800 >>> configs = tf.config.get_logical_device_configuration(

801 ... physical_devices[0])

802 >>> try:

803 ... assert configs is None

804 ... tf.config.set_logical_device_configuration(

805 ... physical_devices[0],

806 ... [tf.config.LogicalDeviceConfiguration(),

807 ... tf.config.LogicalDeviceConfiguration()])

808 ... configs = tf.config.get_logical_device_configuration(

809 ... physical_devices[0])

810 ... assert len(configs) == 2

811 ... except:

812 ... # Cannot modify virtual devices once initialized.

813 ... pass

814

815 Args:

816 device: `PhysicalDevice` to query

817

818 Returns:

819 List of `tf.config.LogicalDeviceConfiguration` objects or

820 `None` if no virtual device configuration has been set for this physical

821 device.

822 """

823 return context.context().get_logical_device_configuration(device)

824

825

826@tf_export('config.set_logical_device_configuration',

827 'config.experimental.set_virtual_device_configuration')

828@deprecation.deprecated_endpoints(

829 'config.experimental.set_virtual_device_configuration')

830def set_logical_device_configuration(device, logical_devices):

831 """Set the logical device configuration for a `tf.config.PhysicalDevice`.

832

833 A visible `tf.config.PhysicalDevice` will by default have a single

834 `tf.config.LogicalDevice` associated with it once the runtime is initialized.

835 Specifying a list of `tf.config.LogicalDeviceConfiguration` objects allows

836 multiple devices to be created on the same `tf.config.PhysicalDevice`.

837

838 Logical device configurations can be modified by calling this function as

839 long as the runtime is uninitialized. After the runtime is initialized

840 calling this function raises a RuntimeError.

841

842 The following example splits the CPU into 2 logical devices:

843

844 >>> physical_devices = tf.config.list_physical_devices('CPU')

845 >>> assert len(physical_devices) == 1, "No CPUs found"

846 >>> # Specify 2 virtual CPUs. Note currently memory limit is not supported.

847 >>> try:

848 ... tf.config.set_logical_device_configuration(

849 ... physical_devices[0],

850 ... [tf.config.LogicalDeviceConfiguration(),

851 ... tf.config.LogicalDeviceConfiguration()])

852 ... logical_devices = tf.config.list_logical_devices('CPU')

853 ... assert len(logical_devices) == 2

854 ...

855 ... tf.config.set_logical_device_configuration(

856 ... physical_devices[0],

857 ... [tf.config.LogicalDeviceConfiguration(),

858 ... tf.config.LogicalDeviceConfiguration(),

859 ... tf.config.LogicalDeviceConfiguration(),

860 ... tf.config.LogicalDeviceConfiguration()])

861 ... except:

862 ... # Cannot modify logical devices once initialized.

863 ... pass

864

865 The following example splits the GPU into 2 logical devices with 100 MB each:

866

867 >>> physical_devices = tf.config.list_physical_devices('GPU')

868 >>> try:

869 ... tf.config.set_logical_device_configuration(

870 ... physical_devices[0],

871 ... [tf.config.LogicalDeviceConfiguration(memory_limit=100),

872 ... tf.config.LogicalDeviceConfiguration(memory_limit=100)])

873 ...

874 ... logical_devices = tf.config.list_logical_devices('GPU')

875 ... assert len(logical_devices) == len(physical_devices) + 1

876 ...

877 ... tf.config.set_logical_device_configuration(

878 ... physical_devices[0],

879 ... [tf.config.LogicalDeviceConfiguration(memory_limit=10),

880 ... tf.config.LogicalDeviceConfiguration(memory_limit=10)])

881 ... except:

882 ... # Invalid device or cannot modify logical devices once initialized.

883 ... pass

884

885 Args:

886 device: The `PhysicalDevice` to configure.

887 logical_devices: (optional) List of `tf.config.LogicalDeviceConfiguration`

888 objects to allocate for the specified `PhysicalDevice`. If None, the

889 default configuration will be used.

890

891 Raises:

892 ValueError: If argument validation fails.

893 RuntimeError: Runtime is already initialized.

894 """

895 context.context().set_logical_device_configuration(device, logical_devices)

896

897

898@tf_export('config.experimental.enable_mlir_bridge')

899def enable_mlir_bridge():

900 """Enables experimental MLIR-Based TensorFlow Compiler Bridge.

901

902 TensorFlow Compiler Bridge (TF Bridge) is responsible for translating parts

903 of TensorFlow graph into a form that can be accepted as an input by a backend

904 compiler such as XLA.

905 """

906 context.context().enable_mlir_bridge = True

907

908

909@tf_export('config.experimental.disable_mlir_bridge')

910def disable_mlir_bridge():

911 """Disables experimental MLIR-Based TensorFlow Compiler Bridge."""

912 context.context().enable_mlir_bridge = False

913

914

915@tf_export('config.experimental.enable_op_determinism', v1=[])

916def enable_op_determinism():

917 """Configures TensorFlow ops to run deterministically.

918

919 When op determinism is enabled, TensorFlow ops will be deterministic. This

920 means that if an op is run multiple times with the same inputs on the same

921 hardware, it will have the exact same outputs each time. This is useful for

922 debugging models. Note that determinism in general comes at the expense of

923 lower performance and so your model may run slower when op determinism is

924 enabled.

925

926 If you want your TensorFlow program to run deterministically, put the

927 following code near the start of your program.

928

929 ```python

930 tf.keras.utils.set_random_seed(1)

931 tf.config.experimental.enable_op_determinism()

932 ```

933

934 Calling `tf.keras.utils.set_random_seed` sets the Python seed, the NumPy seed,

935 and the TensorFlow seed. Setting these seeds is necessary to ensure any random

936 numbers your program generates are also deterministic.

937

938 By default, op determinism is not enabled, so ops might return different

939 results when run with the same inputs. These differences are often caused by

940 the use of asynchronous threads within the op nondeterministically changing

941 the order in which floating-point numbers are added. Most of these cases of

942 nondeterminism occur on GPUs, which have thousands of hardware threads that

943 are used to run ops. Enabling determinism directs such ops to use a different

944 algorithm, one that does not use threads in a nondeterministic way.

945

946 Another potential source of nondeterminism is `tf.data` based data processing.

947 Typically, this can introduce nondeterminsm due to the use of parallelism in

948 methods such as `Dataset.map` producing inputs or running stateful ops in a

949 nondeterministic order. Enabling determinism will remove such sources of

950 nondeterminism.

951

952 Enabling determinism will likely make your model or your `tf.data` data

953 processing slower. For example, `Dataset.map` can become several orders of

954 magnitude slower when the map function has random ops or other stateful ops.

955 See the “Determinism and tf.data” section below for more details. In future

956 TensorFlow releases, we plan on improving the performance of determinism,

957 especially for common scenarios such as `Dataset.map`.

958

959 Certain ops will raise an `UnimplementedError` because they do not yet have a

960 deterministic implementation. Additionally, due to bugs, some ops might be

961 nondeterministic and not raise an `UnimplementedError`. If you encounter such

962 ops, please [file an issue](https://github.com/tensorflow/tensorflow/issues).

963

964 An example of enabling determinism follows. The

965 `tf.nn.softmax_cross_entropy_with_logits` op is run multiple times and the

966 output is shown to be the same each time. This example would likely fail when

967 run on a GPU if determinism were not enabled, because

968 `tf.nn.softmax_cross_entropy_with_logits` uses a nondeterministic algorithm on

969 GPUs by default.

970

971 ```python

972 labels = tf.random.normal((1, 10000))

973 logits = tf.random.normal((1, 10000))

974 output = tf.nn.softmax_cross_entropy_with_logits(labels=labels,

975 logits=logits)

976 for _ in range(5):

977 output2 = tf.nn.softmax_cross_entropy_with_logits(labels=labels,

978 logits=logits)

979 tf.debugging.assert_equal(output, output2)

980 ```

981

982 ## Writing deterministic models

983

984 You can make your models deterministic by enabling op determinism. This

985 means that you can train a model and finish each run with exactly the same

986 trainable variables. This also means that the inferences of your

987 previously-trained model will be exactly the same on each run. Typically,

988 models can be made deterministic by simply setting the seeds and enabling

989 op determinism, as in the example above. However, to guarantee that your

990 model operates deterministically, you must meet all the following

991 requirements:

992

993 * Call `tf.config.experimental.enable_op_determinism()`, as mentioned above.

994 * Reproducibly reset any pseudorandom number generators (PRNGs) you’re using,

995 such as by setting the seeds for the default PRNGs in TensorFlow, Python,

996 and NumPy, as mentioned above. Note that certain newer NumPy classes like

997 ` numpy.random.default_rng` ignore the global NumPy seed, so a seed must be

998 explicitly passed to such classes, if used.

999 * Use the same hardware configuration in every run.

1000 * Use the same software environment in every run (OS, checkpoints, version of

1001 CUDA and TensorFlow, environmental variables, etc). Note that determinism is

1002 not guaranteed across different versions of TensorFlow.

1003 * Do not use constructs outside TensorFlow that are nondeterministic, such as

1004 reading from `/dev/random` or using multiple threads/processes in ways that

1005 influence TensorFlow’s behavior.

1006 * Ensure your input pipeline is deterministic. If you use `tf.data`, this is

1007 done automatically (at the expense of performance). See "Determinism and

1008 tf.data" below for more information.

1009 * Do not use `tf.compat.v1.Session` and

1010 `tf.distribute.experimental.ParameterServerStrategy`, which can introduce

1011 nondeterminism. Besides ops (including `tf.data` ops), these are the only

1012 known potential sources of nondeterminism within TensorFlow, (if you

1013 find more, please file an issue). Note that `tf.compat.v1.Session` is

1014 required to use the TF1 API, so determinism cannot be guaranteed when using

1015 the TF1 API.

1016 * Do not use nondeterministic custom ops.

1017

1018 ## Additional details on determinism

1019

1020 For stateful ops to be deterministic, the state of the system must be the same

1021 every time the op is run. For example the output of `tf.Variable.sparse_read`

1022 (obviously) depends on both the variable value and the `indices` function

1023 parameter. When determinism is enabled, the side effects of stateful ops are

1024 deterministic.

1025

1026 TensorFlow’s random ops, such as `tf.random.normal`, will raise a

1027 `RuntimeError` if determinism is enabled and a seed has not been set. However,

1028 attempting to generate nondeterministic random numbers using Python or NumPy

1029 will not raise such errors. Make sure you remember to set the Python and NumPy

1030 seeds. Calling `tf.keras.utils.set_random_seed` is an easy way to set all

1031 three seeds.

1032

1033 Note that latency, memory consumption, throughput, and other performance

1034 characteristics are *not* made deterministic by enabling op determinism.

1035 Only op outputs and side effects are made deterministic. Additionally, a model

1036 may nondeterministically raise a `tf.errors.ResourceExhaustedError` from a

1037 lack of memory due to the fact that memory consumption is nondeterministic.

1038

1039 ## Determinism and tf.data

1040

1041 Enabling deterministic ops makes `tf.data` deterministic in several ways:

1042

1043 1. For dataset methods with a `deterministic` argument, such as `Dataset.map`

1044 and `Dataset.batch`, the `deterministic` argument is overridden to be

1045 `True` irrespective of its setting.

1046 2. The `tf.data.Option.experimental_deterministic` option is overridden to be

1047 `True` irrespective of its setting..

1048 3. In `Dataset.map` and `Dataset.interleave`, if the map or interleave

1049 function has stateful random ops or other stateful ops, the function will

1050 run serially instead of in parallel. This means the `num_parallel_calls`

1051 argument to `map` and `interleave` is effectively ignored.

1052 4. Prefetching with `Dataset.prefetch` will be disabled if any function run

1053 as part of the input pipeline has certain stateful ops. Similarly, any

1054 dataset method with a `num_parallel_calls` argument will be made to run

1055 serially if any function in the input pipeline has such stateful ops.

1056 Legacy random ops such as `tf.random.normal` will *not* cause such datasets

1057 to be changed, but most other stateful ops will.

1058

1059 Unfortunately, due to (3), performance can be greatly reduced when stateful

1060 ops are used in `Dataset.map` due to no longer running the map function in

1061 parallel. A common example of stateful ops used in `Dataset.map` are random

1062 ops, such as `tf.random.normal`, which are typically used for distortions. One

1063 way to work around this is to use stateless random ops instead. Alternatively

1064 you can hoist all random ops into its own separate `Dataset.map` call, making

1065 the original `Dataset.map` call stateless and thus avoid the need to serialize

1066 its execution.

1067

1068 (4) can also cause performance to be reduced, but occurs less frequently than

1069 (3) because legacy random ops do not cause (4) to take effect. However, unlike

1070 (3), when there are non-random stateful ops in a user-defined function, every

1071 `map` and `interleave` dataset is affected, instead of just the `map` or

1072 `interleave` dataset with the function that has stateful ops. Additionally,

1073 `prefetch` datasets and any dataset with the `num_parallel_calls` argument are

1074 also affected.

1075 """

1076 _pywrap_determinism.enable(True)

1077

1078

1079def disable_op_determinism():

1080 """Disables op determinism."""

1081 _pywrap_determinism.enable(False)

1082

1083

1084def is_op_determinism_enabled():

1085 """Returns True if op determinism is enabled."""

1086 return _pywrap_determinism.is_enabled()