Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/framework/config.py: 58%

139 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-03 07:57 +0000

1# Copyright 2019 The TensorFlow Authors. All Rights Reserved. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14# ============================================================================== 

15"""Functions for configuring TensorFlow execution.""" 

16 

17from typing import Union 

18 

19from tensorflow.python.eager import context 

20from tensorflow.python.framework import errors 

21from tensorflow.python.util import _pywrap_determinism 

22from tensorflow.python.util import _pywrap_tensor_float_32_execution 

23from tensorflow.python.util import deprecation 

24from tensorflow.python.util.tf_export import tf_export 

25 

26 

27@tf_export('config.experimental.tensor_float_32_execution_enabled') 

28def tensor_float_32_execution_enabled(): 

29 """Returns whether TensorFloat-32 is enabled. 

30 

31 By default, TensorFloat-32 is enabled, but this can be changed with 

32 `tf.config.experimental.enable_tensor_float_32_execution`. 

33 

34 Returns: 

35 True if TensorFloat-32 is enabled (the default) and False otherwise 

36 """ 

37 return _pywrap_tensor_float_32_execution.is_enabled() 

38 

39 

40@tf_export('config.experimental.enable_tensor_float_32_execution') 

41def enable_tensor_float_32_execution(enabled): 

42 """Enable or disable the use of TensorFloat-32 on supported hardware. 

43 

44 [TensorFloat-32](https://blogs.nvidia.com/blog/2020/05/14/tensorfloat-32-precision-format), 

45 or TF32 for short, is a math mode for NVIDIA Ampere GPUs and above. 

46 TensorFloat-32 execution causes certain float32 ops, such as matrix 

47 multiplications and convolutions, to run much faster on such GPUs but with 

48 reduced precision. This reduced precision should not impact convergence of 

49 deep learning models in practice. 

50 

51 TensorFloat-32 is enabled by default. TensorFloat-32 is only supported on 

52 NVIDIA GPUs starting with the Ampere generation, so older NVIDIA GPUs and 

53 other hardware will use the full float32 precision regardless of whether 

54 TensorFloat-32 is enabled or not. If you want to use the full float32 

55 precision on all GPUs, you can disable TensorFloat-32 execution with this 

56 function. For example: 

57 

58 ```python 

59 x = tf.fill((1024, 1024), 1.0001) 

60 y = tf.fill((1024, 1024), 1.) 

61 # TensorFloat-32 is enabled, so matmul is run with reduced precision 

62 print(tf.linalg.matmul(x, y)[0, 0]) # 1024.0 

63 tf.config.experimental.enable_tensor_float_32_execution(False) 

64 # Matmul is run with full precision 

65 print(tf.linalg.matmul(x, y)[0, 0]) # ~1024.1 

66 ``` 

67 

68 To check whether TensorFloat-32 execution is currently enabled, use 

69 `tf.config.experimental.tensor_float_32_execution_enabled`. 

70 

71 If TensorFloat-32 is enabled, float32 inputs of supported ops, such as 

72 `tf.linalg.matmul`, will be rounded from 23 bits of precision to 10 bits of 

73 precision in most cases. This allows the ops to execute much faster by 

74 utilizing the GPU's tensor cores. TensorFloat-32 has the same dynamic range as 

75 float32, meaning it is no more likely to underflow or overflow than float32. 

76 Ops still use float32 accumulation when TensorFloat-32 is enabled. Enabling or 

77 disabling TensorFloat-32 only affects Ampere GPUs and above. 

78 

79 Note TensorFloat-32 is not always used in supported ops, as only inputs of 

80 certain shapes are supported. Support for more input shapes and more ops may 

81 be added in the future. As a result, precision of float32 ops may decrease in 

82 minor versions of TensorFlow. 

83 

84 TensorFloat-32 is also used for some complex64 ops. Currently, TensorFloat-32 

85 is used in fewer cases for complex64 as it is for float32. 

86 

87 Args: 

88 enabled: Bool indicating whether to enable TensorFloat-32 execution. 

89 """ 

90 _pywrap_tensor_float_32_execution.enable(enabled) 

91 

92 

93@tf_export('config.threading.get_intra_op_parallelism_threads') 

94def get_intra_op_parallelism_threads(): 

95 """Get number of threads used within an individual op for parallelism. 

96 

97 Certain operations like matrix multiplication and reductions can utilize 

98 parallel threads for speed ups. A value of 0 means the system picks an 

99 appropriate number. 

100 

101 Returns: 

102 Number of parallel threads 

103 """ 

104 return context.context().intra_op_parallelism_threads 

105 

106 

107@tf_export('config.threading.set_intra_op_parallelism_threads') 

108def set_intra_op_parallelism_threads(num_threads): 

109 """Set number of threads used within an individual op for parallelism. 

110 

111 Certain operations like matrix multiplication and reductions can utilize 

112 parallel threads for speed ups. A value of 0 means the system picks an 

113 appropriate number. 

114 

115 Args: 

116 num_threads: Number of parallel threads 

117 """ 

118 context.context().intra_op_parallelism_threads = num_threads 

119 

120 

121@tf_export('config.threading.get_inter_op_parallelism_threads') 

122def get_inter_op_parallelism_threads(): 

123 """Get number of threads used for parallelism between independent operations. 

124 

125 Determines the number of threads used by independent non-blocking operations. 

126 0 means the system picks an appropriate number. 

127 

128 Returns: 

129 Number of parallel threads 

130 """ 

131 return context.context().inter_op_parallelism_threads 

132 

133 

134@tf_export('config.threading.set_inter_op_parallelism_threads') 

135def set_inter_op_parallelism_threads(num_threads): 

136 """Set number of threads used for parallelism between independent operations. 

137 

138 Determines the number of threads used by independent non-blocking operations. 

139 0 means the system picks an appropriate number. 

140 

141 Args: 

142 num_threads: Number of parallel threads 

143 """ 

144 context.context().inter_op_parallelism_threads = num_threads 

145 

146 

147@tf_export('config.optimizer.get_jit') 

148def get_optimizer_jit() -> str: 

149 """Returns JIT compilation configuration for code inside `tf.function`. 

150 

151 Possible return values: 

152 -`"autoclustering"` if 

153 [autoclustering](https://www.tensorflow.org/xla#auto-clustering) is enabled 

154 - `""` when no default compilation is applied. 

155 """ 

156 if context.context().optimizer_jit: 

157 return 'autoclustering' 

158 return '' 

159 

160 

161@tf_export('config.optimizer.set_jit') 

162@deprecation.deprecated_arg_values( 

163 None, 

164 '`True` setting is deprecated, use `autoclustering` instead.', 

165 warn_once=True, 

166 jit_config=True) 

167def set_optimizer_jit(enabled: Union[bool, str]): 

168 """Configure JIT compilation. 

169 

170 Note: compilation is only applied to code that is compiled into a 

171 graph (in TF2 that's only a code inside `tf.function`). 

172 

173 Args: 

174 enabled: JIT compilation configuration. 

175 Possible values: 

176 - `"autoclustering"` (`True` is a deprecated alias): perform 

177 [autoclustering](https://www.tensorflow.org/xla#auto-clustering) 

178 (automatically identify and compile clusters of nodes) on all graphs 

179 using 

180 [XLA](https://www.tensorflow.org/xla). 

181 - `False`: do not automatically compile any graphs. 

182 """ 

183 autoclustering_enabled = enabled in (True, 'autoclustering') 

184 context.context().optimizer_jit = autoclustering_enabled 

185 

186 

187@tf_export('config.optimizer.get_experimental_options') 

188def get_optimizer_experimental_options(): 

189 """Get experimental optimizer options. 

190 

191 Refer to tf.config.optimizer.set_experimental_options for a list of current 

192 options. 

193 

194 Note that optimizations are only applied in graph mode, (within tf.function). 

195 In addition, as these are experimental options, the list is subject to change. 

196 

197 Returns: 

198 Dictionary of configured experimental optimizer options 

199 """ 

200 return context.context().get_optimizer_experimental_options() 

201 

202 

203@tf_export('config.optimizer.set_experimental_options') 

204def set_optimizer_experimental_options(options): 

205 """Set experimental optimizer options. 

206 

207 Note that optimizations are only applied in graph mode, (within tf.function). 

208 In addition, as these are experimental options, the list is subject to change. 

209 

210 Args: 

211 options: Dictionary of experimental optimizer options to configure. 

212 Valid keys: 

213 - layout_optimizer: Optimize tensor layouts e.g. This will try to use NCHW 

214 layout on GPU which is faster. 

215 - constant_folding: Fold constants Statically infer the value of tensors 

216 when possible, and materialize the result using constants. 

217 - shape_optimization: Simplify computations made on shapes. 

218 - remapping: Remap subgraphs onto more efficient implementations. 

219 - arithmetic_optimization: Simplify arithmetic ops with common 

220 sub-expression elimination and arithmetic simplification. 

221 - dependency_optimization: Control dependency optimizations. Remove 

222 redundant control dependencies, which may enable other optimization. 

223 This optimizer is also essential for pruning Identity and NoOp nodes. 

224 - loop_optimization: Loop optimizations. 

225 - function_optimization: Function optimizations and inlining. 

226 - debug_stripper: Strips debug-related nodes from the graph. 

227 - disable_model_pruning: Disable removal of unnecessary ops from the graph 

228 - scoped_allocator_optimization: Try to allocate some independent Op 

229 outputs contiguously in order to merge or eliminate downstream Ops. 

230 - pin_to_host_optimization: Force small ops onto the CPU. 

231 - implementation_selector: Enable the swap of kernel implementations based 

232 on the device placement. 

233 - auto_mixed_precision: Change certain float32 ops to float16 on Volta 

234 GPUs and above. Without the use of loss scaling, this can cause 

235 numerical underflow (see 

236 `keras.mixed_precision.experimental.LossScaleOptimizer`). 

237 - disable_meta_optimizer: Disable the entire meta optimizer. 

238 - min_graph_nodes: The minimum number of nodes in a graph to optimizer. 

239 For smaller graphs, optimization is skipped. 

240 - auto_parallel: Automatically parallelizes graphs by splitting along 

241 the batch dimension 

242 """ 

243 context.context().set_optimizer_experimental_options(options) 

244 

245 

246@tf_export('config.get_soft_device_placement') 

247def get_soft_device_placement(): 

248 """Return status of soft device placement flag. 

249 

250 If enabled, ops can be placed on different devices than the device explicitly 

251 assigned by the user. This potentially has a large performance cost due to an 

252 increase in data communication between devices. 

253 

254 Some cases where soft_device_placement would modify device assignment are: 

255 1. no GPU/TPU implementation for the OP 

256 2. no GPU devices are known or registered 

257 3. need to co-locate with reftype input(s) which are from CPU 

258 4. an OP can not be compiled by XLA. Common for TPU which always requires 

259 the XLA compiler. 

260 

261 For TPUs, if this option is true, a feature called automatic outside 

262 compilation is enabled. Automatic outside compilation will move uncompilable 

263 ops within a TPU program to instead run on the host. This can be used when 

264 encountering compilation failures due to unsupported ops. 

265 

266 Returns: 

267 A boolean indicating if soft placement is enabled. 

268 """ 

269 return context.context().soft_device_placement 

270 

271 

272@tf_export('config.set_soft_device_placement') 

273def set_soft_device_placement(enabled): 

274 """Enable or disable soft device placement. 

275 

276 If enabled, ops can be placed on different devices than the device explicitly 

277 assigned by the user. This potentially has a large performance cost due to an 

278 increase in data communication between devices. 

279 

280 Some cases where soft_device_placement would modify device assignment are: 

281 1. no GPU/TPU implementation for the OP 

282 2. no GPU devices are known or registered 

283 3. need to co-locate with reftype input(s) which are from CPU 

284 4. an OP can not be compiled by XLA. Common for TPU which always requires 

285 the XLA compiler. 

286 

287 For TPUs, if this option is true, a feature called automatic outside 

288 compilation is enabled. Automatic outside compilation will move uncompilable 

289 ops within a TPU program to instead run on the host. This can be used when 

290 encountering compilation failures due to unsupported ops. 

291 

292 Note: by default soft device placement is enabled when running in eager mode 

293 (for convenience) and disabled in graph mode (for performance). 

294 

295 Args: 

296 enabled: A boolean indicating whether to enable soft placement. 

297 """ 

298 context.context().soft_device_placement = enabled 

299 

300 

301@tf_export('config.experimental.get_device_policy') 

302def get_device_policy(): 

303 """Gets the current device policy. 

304 

305 The device policy controls how operations requiring inputs on a specific 

306 device (e.g., on GPU:0) handle inputs on a different device (e.g. GPU:1). 

307 

308 This function only gets the device policy for the current thread. Any 

309 subsequently started thread will again use the default policy. 

310 

311 Returns: 

312 Current thread device policy 

313 """ 

314 device_policy = context.context().device_policy 

315 if device_policy == context.DEVICE_PLACEMENT_SILENT: 

316 return 'silent' 

317 elif device_policy == context.DEVICE_PLACEMENT_SILENT_FOR_INT32: 

318 return 'silent_for_int32' 

319 elif device_policy == context.DEVICE_PLACEMENT_WARN: 

320 return 'warn' 

321 elif device_policy == context.DEVICE_PLACEMENT_EXPLICIT: 

322 return 'explicit' 

323 else: 

324 # pylint: disable-next=no-value-for-parameter 

325 raise errors.InternalError( 

326 f'Got an invalid device policy: {device_policy!r}.') 

327 

328 

329@tf_export('config.experimental.set_device_policy') 

330def set_device_policy(device_policy): 

331 """Sets the current thread device policy. 

332 

333 The device policy controls how operations requiring inputs on a specific 

334 device (e.g., on GPU:0) handle inputs on a different device (e.g. GPU:1). 

335 

336 When using the default, an appropriate policy will be picked automatically. 

337 The default policy may change over time. 

338 

339 This function only sets the device policy for the current thread. Any 

340 subsequently started thread will again use the default policy. 

341 

342 Args: 

343 device_policy: A device policy. 

344 Valid values: 

345 - None: Switch to a system default. 

346 - 'warn': Copies the tensors which are not on the right device and logs a 

347 warning. 

348 - 'explicit': Raises an error if the placement is not as required. 

349 - 'silent': Silently copies the tensors. Note that this may hide 

350 performance problems as there is no notification provided when 

351 operations are blocked on the tensor being copied between devices. 

352 - 'silent_for_int32': silently copies `int32` tensors, raising errors on 

353 the other ones. 

354 

355 Raises: 

356 ValueError: If an invalid `device_policy` is passed. 

357 """ 

358 if device_policy == 'silent': 

359 context.context().device_policy = context.DEVICE_PLACEMENT_SILENT 

360 elif device_policy == 'silent_for_int32': 

361 context.context().device_policy = context.DEVICE_PLACEMENT_SILENT_FOR_INT32 

362 elif device_policy == 'warn': 

363 context.context().device_policy = context.DEVICE_PLACEMENT_WARN 

364 elif device_policy == 'explicit': 

365 context.context().device_policy = context.DEVICE_PLACEMENT_EXPLICIT 

366 elif device_policy is None: 

367 context.context().device_policy = None 

368 else: 

369 raise ValueError( 

370 f'Invalid argument `device_policy`: {device_policy!r}. Please refer to ' 

371 'https://www.tensorflow.org/api_docs/python/tf/config/experimental/set_device_policy ' 

372 'for valid `device_policy` arguments.') 

373 

374 

375@tf_export('config.experimental.get_synchronous_execution') 

376def get_synchronous_execution(): 

377 """Gets whether operations are executed synchronously or asynchronously. 

378 

379 TensorFlow can execute operations synchronously or asynchronously. If 

380 asynchronous execution is enabled, operations may return "non-ready" handles. 

381 

382 Returns: 

383 Current thread execution mode 

384 """ 

385 return context.context().execution_mode == context.SYNC 

386 

387 

388@tf_export('config.experimental.set_synchronous_execution') 

389def set_synchronous_execution(enable): 

390 """Specifies whether operations are executed synchronously or asynchronously. 

391 

392 TensorFlow can execute operations synchronously or asynchronously. If 

393 asynchronous execution is enabled, operations may return "non-ready" handles. 

394 

395 When `enable` is set to None, an appropriate value will be picked 

396 automatically. The value picked may change between TensorFlow releases. 

397 

398 Args: 

399 enable: Whether operations should be dispatched synchronously. 

400 Valid values: 

401 - None: sets the system default. 

402 - True: executes each operation synchronously. 

403 - False: executes each operation asynchronously. 

404 """ 

405 if enable is None: 

406 context.context().execution_mode = None 

407 elif enable: 

408 context.context().execution_mode = context.SYNC 

409 else: 

410 context.context().execution_mode = context.ASYNC 

411 

412 

413@tf_export('config.list_physical_devices', 

414 'config.experimental.list_physical_devices') 

415@deprecation.deprecated_endpoints('config.experimental.list_physical_devices') 

416def list_physical_devices(device_type=None): 

417 """Return a list of physical devices visible to the host runtime. 

418 

419 Physical devices are hardware devices present on the host machine. By default 

420 all discovered CPU and GPU devices are considered visible. 

421 

422 This API allows querying the physical hardware resources prior to runtime 

423 initialization. Thus, giving an opportunity to call any additional 

424 configuration APIs. This is in contrast to `tf.config.list_logical_devices`, 

425 which triggers runtime initialization in order to list the configured devices. 

426 

427 The following example lists the number of visible GPUs on the host. 

428 

429 >>> physical_devices = tf.config.list_physical_devices('GPU') 

430 >>> print("Num GPUs:", len(physical_devices)) 

431 Num GPUs: ... 

432 

433 However, the number of GPUs available to the runtime may change during runtime 

434 initialization due to marking certain devices as not visible or configuring 

435 multiple logical devices. 

436 

437 Args: 

438 device_type: (optional string) Only include devices matching this device 

439 type. For example "CPU" or "GPU". 

440 

441 Returns: 

442 List of discovered `tf.config.PhysicalDevice` objects 

443 """ 

444 return context.context().list_physical_devices(device_type) 

445 

446 

447@tf_export('config.list_logical_devices', 

448 'config.experimental.list_logical_devices') 

449@deprecation.deprecated_endpoints('config.experimental.list_logical_devices') 

450def list_logical_devices(device_type=None): 

451 """Return a list of logical devices created by runtime. 

452 

453 Logical devices may correspond to physical devices or remote devices in the 

454 cluster. Operations and tensors may be placed on these devices by using the 

455 `name` of the `tf.config.LogicalDevice`. 

456 

457 Calling `tf.config.list_logical_devices` triggers the runtime to configure any 

458 `tf.config.PhysicalDevice` visible to the runtime, thereby preventing 

459 further configuration. To avoid runtime initialization, call 

460 `tf.config.list_physical_devices` instead. 

461 

462 For example: 

463 

464 >>> logical_devices = tf.config.list_logical_devices('GPU') 

465 >>> if len(logical_devices) > 0: 

466 ... # Allocate on GPU:0 

467 ... with tf.device(logical_devices[0].name): 

468 ... one = tf.constant(1) 

469 ... # Allocate on GPU:1 

470 ... with tf.device(logical_devices[1].name): 

471 ... two = tf.constant(2) 

472 

473 Args: 

474 device_type: (optional string) Only include devices matching this device 

475 type. For example "CPU" or "GPU". 

476 

477 Returns: 

478 List of initialized `LogicalDevice`s 

479 """ 

480 return context.context().list_logical_devices(device_type=device_type) 

481 

482 

483@tf_export('config.get_visible_devices', 

484 'config.experimental.get_visible_devices') 

485@deprecation.deprecated_endpoints('config.experimental.get_visible_devices') 

486def get_visible_devices(device_type=None): 

487 """Get the list of visible physical devices. 

488 

489 Returns the list of `PhysicalDevice`s currently marked as visible to the 

490 runtime. A visible device will have at least one `LogicalDevice` associated 

491 with it once the runtime is initialized. 

492 

493 The following example verifies all visible GPUs have been disabled: 

494 

495 >>> physical_devices = tf.config.list_physical_devices('GPU') 

496 >>> try: 

497 ... # Disable all GPUS 

498 ... tf.config.set_visible_devices([], 'GPU') 

499 ... visible_devices = tf.config.get_visible_devices() 

500 ... for device in visible_devices: 

501 ... assert device.device_type != 'GPU' 

502 ... except: 

503 ... # Invalid device or cannot modify virtual devices once initialized. 

504 ... pass 

505 

506 Args: 

507 device_type: (optional string) Only include devices matching this device 

508 type. For example "CPU" or "GPU". 

509 

510 Returns: 

511 List of visible `PhysicalDevice`s 

512 """ 

513 return context.context().get_visible_devices(device_type) 

514 

515 

516@tf_export('config.set_visible_devices', 

517 'config.experimental.set_visible_devices') 

518@deprecation.deprecated_endpoints('config.experimental.set_visible_devices') 

519def set_visible_devices(devices, device_type=None): 

520 """Set the list of visible devices. 

521 

522 Specifies which `PhysicalDevice` objects are visible to the runtime. 

523 TensorFlow will only allocate memory and place operations on visible 

524 physical devices, as otherwise no `LogicalDevice` will be created on them. 

525 By default all discovered devices are marked as visible. 

526 

527 The following example demonstrates disabling the first GPU on the machine. 

528 

529 >>> physical_devices = tf.config.list_physical_devices('GPU') 

530 >>> try: 

531 ... # Disable first GPU 

532 ... tf.config.set_visible_devices(physical_devices[1:], 'GPU') 

533 ... logical_devices = tf.config.list_logical_devices('GPU') 

534 ... # Logical device was not created for first GPU 

535 ... assert len(logical_devices) == len(physical_devices) - 1 

536 ... except: 

537 ... # Invalid device or cannot modify virtual devices once initialized. 

538 ... pass 

539 

540 Args: 

541 devices: List of `PhysicalDevice`s to make visible 

542 device_type: (optional) Only configure devices matching this device type. 

543 For example "CPU" or "GPU". Other devices will be left unaltered. 

544 

545 Raises: 

546 ValueError: If argument validation fails. 

547 RuntimeError: Runtime is already initialized. 

548 """ 

549 context.context().set_visible_devices(devices, device_type) 

550 

551 

552# TODO(b/188089869): Redesign memory stats related APIs before move them out of 

553# experimental. 

554@tf_export('config.experimental.get_memory_info') 

555def get_memory_info(device): 

556 """Get memory info for the chosen device, as a dict. 

557 

558 This function returns a dict containing information about the device's memory 

559 usage. For example: 

560 

561 >>> if tf.config.list_physical_devices('GPU'): 

562 ... # Returns a dict in the form {'current': <current mem usage>, 

563 ... # 'peak': <peak mem usage>} 

564 ... tf.config.experimental.get_memory_info('GPU:0') 

565 

566 Currently returns the following keys: 

567 - `'current'`: The current memory used by the device, in bytes. 

568 - `'peak'`: The peak memory used by the device across the run of the 

569 program, in bytes. Can be reset with 

570 `tf.config.experimental.reset_memory_stats`. 

571 

572 More keys may be added in the future, including device-specific keys. 

573 

574 Currently only supports GPU and TPU. If called on a CPU device, an exception 

575 will be raised. 

576 

577 For GPUs, TensorFlow will allocate all the memory by default, unless changed 

578 with `tf.config.experimental.set_memory_growth`. The dict specifies only the 

579 current and peak memory that TensorFlow is actually using, not the memory that 

580 TensorFlow has allocated on the GPU. 

581 

582 Args: 

583 device: Device string to get the memory information for, e.g. `"GPU:0"`, 

584 `"TPU:0"`. See https://www.tensorflow.org/api_docs/python/tf/device for 

585 specifying device strings. 

586 

587 Returns: 

588 A dict with keys `'current'` and `'peak'`, specifying the current and peak 

589 memory usage respectively. 

590 

591 Raises: 

592 ValueError: No device found with the device name, like '"nonexistent"'. 

593 ValueError: Invalid device name, like '"GPU"', '"CPU:GPU"', '"CPU:"'. 

594 ValueError: Multiple devices matched with the device name. 

595 ValueError: Memory statistics not tracked, like '"CPU:0"'. 

596 """ 

597 return context.context().get_memory_info(device) 

598 

599 

600# TODO(b/188089869): Redesign memory stats related APIs before move them out of 

601# experimental. 

602# TODO(b/189498350): Unify the behavior on CPU, GPU and TPU. 

603@tf_export('config.experimental.reset_memory_stats') 

604def reset_memory_stats(device): 

605 """Resets the tracked memory stats for the chosen device. 

606 

607 This function sets the tracked peak memory for a device to the device's 

608 current memory usage. This allows you to measure the peak memory usage for a 

609 specific part of your program. For example: 

610 

611 >>> if tf.config.list_physical_devices('GPU'): 

612 ... # Sets the peak memory to the current memory. 

613 ... tf.config.experimental.reset_memory_stats('GPU:0') 

614 ... # Creates the first peak memory usage. 

615 ... x1 = tf.ones(1000 * 1000, dtype=tf.float64) 

616 ... del x1 # Frees the memory referenced by `x1`. 

617 ... peak1 = tf.config.experimental.get_memory_info('GPU:0')['peak'] 

618 ... # Sets the peak memory to the current memory again. 

619 ... tf.config.experimental.reset_memory_stats('GPU:0') 

620 ... # Creates the second peak memory usage. 

621 ... x2 = tf.ones(1000 * 1000, dtype=tf.float32) 

622 ... del x2 

623 ... peak2 = tf.config.experimental.get_memory_info('GPU:0')['peak'] 

624 ... assert peak2 < peak1 # tf.float32 consumes less memory than tf.float64. 

625 

626 Currently only supports GPU and TPU. If called on a CPU device, an exception 

627 will be raised. 

628 

629 Args: 

630 device: Device string to reset the memory stats, e.g. `"GPU:0"`, `"TPU:0"`. 

631 See https://www.tensorflow.org/api_docs/python/tf/device for specifying 

632 device strings. 

633 

634 Raises: 

635 ValueError: No device found with the device name, like '"nonexistent"'. 

636 ValueError: Invalid device name, like '"GPU"', '"CPU:GPU"', '"CPU:"'. 

637 ValueError: Multiple devices matched with the device name. 

638 ValueError: Memory statistics not tracked or clearing memory statistics not 

639 supported, like '"CPU:0"'. 

640 """ 

641 context.context().reset_memory_stats(device) 

642 

643 

644@deprecation.deprecated( 

645 None, 

646 "Use tf.config.experimental.get_memory_info(device)['current'] instead.") 

647@tf_export('config.experimental.get_memory_usage') 

648def get_memory_usage(device): 

649 """Get the current memory usage, in bytes, for the chosen device. 

650 

651 This function is deprecated in favor of 

652 `tf.config.experimental.get_memory_info`. Calling this function is equivalent 

653 to calling `tf.config.experimental.get_memory_info()['current']`. 

654 

655 See https://www.tensorflow.org/api_docs/python/tf/device for specifying device 

656 strings. 

657 

658 For example: 

659 

660 >>> gpu_devices = tf.config.list_physical_devices('GPU') 

661 >>> if gpu_devices: 

662 ... tf.config.experimental.get_memory_usage('GPU:0') 

663 

664 Does not work for CPU. 

665 

666 For GPUs, TensorFlow will allocate all the memory by default, unless changed 

667 with `tf.config.experimental.set_memory_growth`. This function only returns 

668 the memory that TensorFlow is actually using, not the memory that TensorFlow 

669 has allocated on the GPU. 

670 

671 Args: 

672 device: Device string to get the bytes in use for, e.g. `"GPU:0"` 

673 

674 Returns: 

675 Total memory usage in bytes. 

676 

677 Raises: 

678 ValueError: Non-existent or CPU device specified. 

679 """ 

680 return get_memory_info(device)['current'] 

681 

682 

683@tf_export('config.experimental.get_memory_growth') 

684def get_memory_growth(device): 

685 """Get if memory growth is enabled for a `PhysicalDevice`. 

686 

687 If memory growth is enabled for a `PhysicalDevice`, the runtime initialization 

688 will not allocate all memory on the device. 

689 

690 For example: 

691 

692 >>> physical_devices = tf.config.list_physical_devices('GPU') 

693 >>> try: 

694 ... tf.config.experimental.set_memory_growth(physical_devices[0], True) 

695 ... assert tf.config.experimental.get_memory_growth(physical_devices[0]) 

696 ... except: 

697 ... # Invalid device or cannot modify virtual devices once initialized. 

698 ... pass 

699 

700 Args: 

701 device: `PhysicalDevice` to query 

702 

703 Returns: 

704 A boolean indicating the memory growth setting for the `PhysicalDevice`. 

705 

706 Raises: 

707 ValueError: Invalid `PhysicalDevice` specified. 

708 """ 

709 return context.context().get_memory_growth(device) 

710 

711 

712@tf_export('config.experimental.set_memory_growth') 

713def set_memory_growth(device, enable): 

714 """Set if memory growth should be enabled for a `PhysicalDevice`. 

715 

716 If memory growth is enabled for a `PhysicalDevice`, the runtime initialization 

717 will not allocate all memory on the device. Memory growth cannot be configured 

718 on a `PhysicalDevice` with virtual devices configured. 

719 

720 For example: 

721 

722 >>> physical_devices = tf.config.list_physical_devices('GPU') 

723 >>> try: 

724 ... tf.config.experimental.set_memory_growth(physical_devices[0], True) 

725 ... except: 

726 ... # Invalid device or cannot modify virtual devices once initialized. 

727 ... pass 

728 

729 Args: 

730 device: `PhysicalDevice` to configure 

731 enable: (Boolean) Whether to enable or disable memory growth 

732 

733 Raises: 

734 ValueError: Invalid `PhysicalDevice` specified. 

735 RuntimeError: Runtime is already initialized. 

736 """ 

737 context.context().set_memory_growth(device, enable) 

738 

739 

740@tf_export('config.experimental.get_device_details') 

741def get_device_details(device): 

742 """Returns details about a physical devices. 

743 

744 This API takes in a `tf.config.PhysicalDevice` returned by 

745 `tf.config.list_physical_devices`. It returns a dict with string keys 

746 containing various details about the device. Each key is only supported by a 

747 subset of devices, so you should not assume the returned dict will have any 

748 particular key. 

749 

750 >>> gpu_devices = tf.config.list_physical_devices('GPU') 

751 >>> if gpu_devices: 

752 ... details = tf.config.experimental.get_device_details(gpu_devices[0]) 

753 ... details.get('device_name', 'Unknown GPU') 

754 

755 Currently, details are only returned for GPUs. This function returns an 

756 empty dict if passed a non-GPU device. 

757 

758 The returned dict may have the following keys: 

759 * `'device_name'`: A human-readable name of the device as a string, e.g. 

760 "Titan V". Unlike `tf.config.PhysicalDevice.name`, this will be the same for 

761 multiple devices if each device is the same model. Currently only available 

762 for GPUs. 

763 * `'compute_capability'`: The 

764 [compute capability](https://developer.nvidia.com/cuda-gpus) of the device 

765 as a tuple of two ints, in the form `(major_version, minor_version)`. Only 

766 available for NVIDIA GPUs 

767 

768 Note: This is similar to `tf.sysconfig.get_build_info` in that both functions 

769 can return information relating to GPUs. However, this function returns 

770 run-time information about a specific device (such as a GPU's compute 

771 capability), while `tf.sysconfig.get_build_info` returns compile-time 

772 information about how TensorFlow was built (such as what version of CUDA 

773 TensorFlow was built for). 

774 

775 Args: 

776 device: A `tf.config.PhysicalDevice` returned by 

777 `tf.config.list_physical_devices` or `tf.config.get_visible_devices`. 

778 

779 Returns: 

780 A dict with string keys. 

781 """ 

782 return context.context().get_device_details(device) 

783 

784 

785@tf_export('config.get_logical_device_configuration', 

786 'config.experimental.get_virtual_device_configuration') 

787@deprecation.deprecated_endpoints( 

788 'config.experimental.get_virtual_device_configuration') 

789def get_logical_device_configuration(device): 

790 """Get the virtual device configuration for a `tf.config.PhysicalDevice`. 

791 

792 Returns the list of `tf.config.LogicalDeviceConfiguration` 

793 objects previously configured by a call to 

794 `tf.config.set_logical_device_configuration`. 

795 

796 For example: 

797 

798 >>> physical_devices = tf.config.list_physical_devices('CPU') 

799 >>> assert len(physical_devices) == 1, "No CPUs found" 

800 >>> configs = tf.config.get_logical_device_configuration( 

801 ... physical_devices[0]) 

802 >>> try: 

803 ... assert configs is None 

804 ... tf.config.set_logical_device_configuration( 

805 ... physical_devices[0], 

806 ... [tf.config.LogicalDeviceConfiguration(), 

807 ... tf.config.LogicalDeviceConfiguration()]) 

808 ... configs = tf.config.get_logical_device_configuration( 

809 ... physical_devices[0]) 

810 ... assert len(configs) == 2 

811 ... except: 

812 ... # Cannot modify virtual devices once initialized. 

813 ... pass 

814 

815 Args: 

816 device: `PhysicalDevice` to query 

817 

818 Returns: 

819 List of `tf.config.LogicalDeviceConfiguration` objects or 

820 `None` if no virtual device configuration has been set for this physical 

821 device. 

822 """ 

823 return context.context().get_logical_device_configuration(device) 

824 

825 

826@tf_export('config.set_logical_device_configuration', 

827 'config.experimental.set_virtual_device_configuration') 

828@deprecation.deprecated_endpoints( 

829 'config.experimental.set_virtual_device_configuration') 

830def set_logical_device_configuration(device, logical_devices): 

831 """Set the logical device configuration for a `tf.config.PhysicalDevice`. 

832 

833 A visible `tf.config.PhysicalDevice` will by default have a single 

834 `tf.config.LogicalDevice` associated with it once the runtime is initialized. 

835 Specifying a list of `tf.config.LogicalDeviceConfiguration` objects allows 

836 multiple devices to be created on the same `tf.config.PhysicalDevice`. 

837 

838 Logical device configurations can be modified by calling this function as 

839 long as the runtime is uninitialized. After the runtime is initialized 

840 calling this function raises a RuntimeError. 

841 

842 The following example splits the CPU into 2 logical devices: 

843 

844 >>> physical_devices = tf.config.list_physical_devices('CPU') 

845 >>> assert len(physical_devices) == 1, "No CPUs found" 

846 >>> # Specify 2 virtual CPUs. Note currently memory limit is not supported. 

847 >>> try: 

848 ... tf.config.set_logical_device_configuration( 

849 ... physical_devices[0], 

850 ... [tf.config.LogicalDeviceConfiguration(), 

851 ... tf.config.LogicalDeviceConfiguration()]) 

852 ... logical_devices = tf.config.list_logical_devices('CPU') 

853 ... assert len(logical_devices) == 2 

854 ... 

855 ... tf.config.set_logical_device_configuration( 

856 ... physical_devices[0], 

857 ... [tf.config.LogicalDeviceConfiguration(), 

858 ... tf.config.LogicalDeviceConfiguration(), 

859 ... tf.config.LogicalDeviceConfiguration(), 

860 ... tf.config.LogicalDeviceConfiguration()]) 

861 ... except: 

862 ... # Cannot modify logical devices once initialized. 

863 ... pass 

864 

865 The following example splits the GPU into 2 logical devices with 100 MB each: 

866 

867 >>> physical_devices = tf.config.list_physical_devices('GPU') 

868 >>> try: 

869 ... tf.config.set_logical_device_configuration( 

870 ... physical_devices[0], 

871 ... [tf.config.LogicalDeviceConfiguration(memory_limit=100), 

872 ... tf.config.LogicalDeviceConfiguration(memory_limit=100)]) 

873 ... 

874 ... logical_devices = tf.config.list_logical_devices('GPU') 

875 ... assert len(logical_devices) == len(physical_devices) + 1 

876 ... 

877 ... tf.config.set_logical_device_configuration( 

878 ... physical_devices[0], 

879 ... [tf.config.LogicalDeviceConfiguration(memory_limit=10), 

880 ... tf.config.LogicalDeviceConfiguration(memory_limit=10)]) 

881 ... except: 

882 ... # Invalid device or cannot modify logical devices once initialized. 

883 ... pass 

884 

885 Args: 

886 device: The `PhysicalDevice` to configure. 

887 logical_devices: (optional) List of `tf.config.LogicalDeviceConfiguration` 

888 objects to allocate for the specified `PhysicalDevice`. If None, the 

889 default configuration will be used. 

890 

891 Raises: 

892 ValueError: If argument validation fails. 

893 RuntimeError: Runtime is already initialized. 

894 """ 

895 context.context().set_logical_device_configuration(device, logical_devices) 

896 

897 

898@tf_export('config.experimental.enable_mlir_bridge') 

899def enable_mlir_bridge(): 

900 """Enables experimental MLIR-Based TensorFlow Compiler Bridge. 

901 

902 TensorFlow Compiler Bridge (TF Bridge) is responsible for translating parts 

903 of TensorFlow graph into a form that can be accepted as an input by a backend 

904 compiler such as XLA. 

905 """ 

906 context.context().enable_mlir_bridge = True 

907 

908 

909@tf_export('config.experimental.disable_mlir_bridge') 

910def disable_mlir_bridge(): 

911 """Disables experimental MLIR-Based TensorFlow Compiler Bridge.""" 

912 context.context().enable_mlir_bridge = False 

913 

914 

915@tf_export('config.experimental.enable_op_determinism', v1=[]) 

916def enable_op_determinism(): 

917 """Configures TensorFlow ops to run deterministically. 

918 

919 When op determinism is enabled, TensorFlow ops will be deterministic. This 

920 means that if an op is run multiple times with the same inputs on the same 

921 hardware, it will have the exact same outputs each time. This is useful for 

922 debugging models. Note that determinism in general comes at the expense of 

923 lower performance and so your model may run slower when op determinism is 

924 enabled. 

925 

926 If you want your TensorFlow program to run deterministically, put the 

927 following code near the start of your program. 

928 

929 ```python 

930 tf.keras.utils.set_random_seed(1) 

931 tf.config.experimental.enable_op_determinism() 

932 ``` 

933 

934 Calling `tf.keras.utils.set_random_seed` sets the Python seed, the NumPy seed, 

935 and the TensorFlow seed. Setting these seeds is necessary to ensure any random 

936 numbers your program generates are also deterministic. 

937 

938 By default, op determinism is not enabled, so ops might return different 

939 results when run with the same inputs. These differences are often caused by 

940 the use of asynchronous threads within the op nondeterministically changing 

941 the order in which floating-point numbers are added. Most of these cases of 

942 nondeterminism occur on GPUs, which have thousands of hardware threads that 

943 are used to run ops. Enabling determinism directs such ops to use a different 

944 algorithm, one that does not use threads in a nondeterministic way. 

945 

946 Another potential source of nondeterminism is `tf.data` based data processing. 

947 Typically, this can introduce nondeterminsm due to the use of parallelism in 

948 methods such as `Dataset.map` producing inputs or running stateful ops in a 

949 nondeterministic order. Enabling determinism will remove such sources of 

950 nondeterminism. 

951 

952 Enabling determinism will likely make your model or your `tf.data` data 

953 processing slower. For example, `Dataset.map` can become several orders of 

954 magnitude slower when the map function has random ops or other stateful ops. 

955 See the “Determinism and tf.data” section below for more details. In future 

956 TensorFlow releases, we plan on improving the performance of determinism, 

957 especially for common scenarios such as `Dataset.map`. 

958 

959 Certain ops will raise an `UnimplementedError` because they do not yet have a 

960 deterministic implementation. Additionally, due to bugs, some ops might be 

961 nondeterministic and not raise an `UnimplementedError`. If you encounter such 

962 ops, please [file an issue](https://github.com/tensorflow/tensorflow/issues). 

963 

964 An example of enabling determinism follows. The 

965 `tf.nn.softmax_cross_entropy_with_logits` op is run multiple times and the 

966 output is shown to be the same each time. This example would likely fail when 

967 run on a GPU if determinism were not enabled, because 

968 `tf.nn.softmax_cross_entropy_with_logits` uses a nondeterministic algorithm on 

969 GPUs by default. 

970 

971 ```python 

972 labels = tf.random.normal((1, 10000)) 

973 logits = tf.random.normal((1, 10000)) 

974 output = tf.nn.softmax_cross_entropy_with_logits(labels=labels, 

975 logits=logits) 

976 for _ in range(5): 

977 output2 = tf.nn.softmax_cross_entropy_with_logits(labels=labels, 

978 logits=logits) 

979 tf.debugging.assert_equal(output, output2) 

980 ``` 

981 

982 ## Writing deterministic models 

983 

984 You can make your models deterministic by enabling op determinism. This 

985 means that you can train a model and finish each run with exactly the same 

986 trainable variables. This also means that the inferences of your 

987 previously-trained model will be exactly the same on each run. Typically, 

988 models can be made deterministic by simply setting the seeds and enabling 

989 op determinism, as in the example above. However, to guarantee that your 

990 model operates deterministically, you must meet all the following 

991 requirements: 

992 

993 * Call `tf.config.experimental.enable_op_determinism()`, as mentioned above. 

994 * Reproducibly reset any pseudorandom number generators (PRNGs) you’re using, 

995 such as by setting the seeds for the default PRNGs in TensorFlow, Python, 

996 and NumPy, as mentioned above. Note that certain newer NumPy classes like 

997 ` numpy.random.default_rng` ignore the global NumPy seed, so a seed must be 

998 explicitly passed to such classes, if used. 

999 * Use the same hardware configuration in every run. 

1000 * Use the same software environment in every run (OS, checkpoints, version of 

1001 CUDA and TensorFlow, environmental variables, etc). Note that determinism is 

1002 not guaranteed across different versions of TensorFlow. 

1003 * Do not use constructs outside TensorFlow that are nondeterministic, such as 

1004 reading from `/dev/random` or using multiple threads/processes in ways that 

1005 influence TensorFlow’s behavior. 

1006 * Ensure your input pipeline is deterministic. If you use `tf.data`, this is 

1007 done automatically (at the expense of performance). See "Determinism and 

1008 tf.data" below for more information. 

1009 * Do not use `tf.compat.v1.Session` and 

1010 `tf.distribute.experimental.ParameterServerStrategy`, which can introduce 

1011 nondeterminism. Besides ops (including `tf.data` ops), these are the only 

1012 known potential sources of nondeterminism within TensorFlow, (if you 

1013 find more, please file an issue). Note that `tf.compat.v1.Session` is 

1014 required to use the TF1 API, so determinism cannot be guaranteed when using 

1015 the TF1 API. 

1016 * Do not use nondeterministic custom ops. 

1017 

1018 ## Additional details on determinism 

1019 

1020 For stateful ops to be deterministic, the state of the system must be the same 

1021 every time the op is run. For example the output of `tf.Variable.sparse_read` 

1022 (obviously) depends on both the variable value and the `indices` function 

1023 parameter. When determinism is enabled, the side effects of stateful ops are 

1024 deterministic. 

1025 

1026 TensorFlow’s random ops, such as `tf.random.normal`, will raise a 

1027 `RuntimeError` if determinism is enabled and a seed has not been set. However, 

1028 attempting to generate nondeterministic random numbers using Python or NumPy 

1029 will not raise such errors. Make sure you remember to set the Python and NumPy 

1030 seeds. Calling `tf.keras.utils.set_random_seed` is an easy way to set all 

1031 three seeds. 

1032 

1033 Note that latency, memory consumption, throughput, and other performance 

1034 characteristics are *not* made deterministic by enabling op determinism. 

1035 Only op outputs and side effects are made deterministic. Additionally, a model 

1036 may nondeterministically raise a `tf.errors.ResourceExhaustedError` from a 

1037 lack of memory due to the fact that memory consumption is nondeterministic. 

1038 

1039 ## Determinism and tf.data 

1040 

1041 Enabling deterministic ops makes `tf.data` deterministic in several ways: 

1042 

1043 1. For dataset methods with a `deterministic` argument, such as `Dataset.map` 

1044 and `Dataset.batch`, the `deterministic` argument is overridden to be 

1045 `True` irrespective of its setting. 

1046 2. The `tf.data.Option.experimental_deterministic` option is overridden to be 

1047 `True` irrespective of its setting.. 

1048 3. In `Dataset.map` and `Dataset.interleave`, if the map or interleave 

1049 function has stateful random ops or other stateful ops, the function will 

1050 run serially instead of in parallel. This means the `num_parallel_calls` 

1051 argument to `map` and `interleave` is effectively ignored. 

1052 4. Prefetching with `Dataset.prefetch` will be disabled if any function run 

1053 as part of the input pipeline has certain stateful ops. Similarly, any 

1054 dataset method with a `num_parallel_calls` argument will be made to run 

1055 serially if any function in the input pipeline has such stateful ops. 

1056 Legacy random ops such as `tf.random.normal` will *not* cause such datasets 

1057 to be changed, but most other stateful ops will. 

1058 

1059 Unfortunately, due to (3), performance can be greatly reduced when stateful 

1060 ops are used in `Dataset.map` due to no longer running the map function in 

1061 parallel. A common example of stateful ops used in `Dataset.map` are random 

1062 ops, such as `tf.random.normal`, which are typically used for distortions. One 

1063 way to work around this is to use stateless random ops instead. Alternatively 

1064 you can hoist all random ops into its own separate `Dataset.map` call, making 

1065 the original `Dataset.map` call stateless and thus avoid the need to serialize 

1066 its execution. 

1067 

1068 (4) can also cause performance to be reduced, but occurs less frequently than 

1069 (3) because legacy random ops do not cause (4) to take effect. However, unlike 

1070 (3), when there are non-random stateful ops in a user-defined function, every 

1071 `map` and `interleave` dataset is affected, instead of just the `map` or 

1072 `interleave` dataset with the function that has stateful ops. Additionally, 

1073 `prefetch` datasets and any dataset with the `num_parallel_calls` argument are 

1074 also affected. 

1075 """ 

1076 _pywrap_determinism.enable(True) 

1077 

1078 

1079def disable_op_determinism(): 

1080 """Disables op determinism.""" 

1081 _pywrap_determinism.enable(False) 

1082 

1083 

1084def is_op_determinism_enabled(): 

1085 """Returns True if op determinism is enabled.""" 

1086 return _pywrap_determinism.is_enabled()