Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/debug/lib/dumping_callback.py: 21%

312 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-03 07:57 +0000

1# Copyright 2019 The TensorFlow Authors. All Rights Reserved. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14# ============================================================================== 

15"""Dumping op callbacks: Enables dump-based features in tfdbg v2.""" 

16 

17import atexit 

18import os 

19import re 

20import socket 

21import threading 

22import uuid 

23 

24from tensorflow.core.framework import graph_debug_info_pb2 

25from tensorflow.core.framework import tensor_pb2 

26from tensorflow.core.protobuf import debug_event_pb2 

27from tensorflow.python.debug.lib import debug_events_writer 

28from tensorflow.python.debug.lib import op_callbacks_common 

29from tensorflow.python.debug.lib import source_utils 

30from tensorflow.python.eager import function as function_lib 

31from tensorflow.python.framework import constant_op 

32from tensorflow.python.framework import dtypes 

33from tensorflow.python.framework import op_callbacks 

34from tensorflow.python.framework import ops 

35from tensorflow.python.framework import tensor_util 

36from tensorflow.python.ops import array_ops 

37from tensorflow.python.ops import gen_debug_ops 

38from tensorflow.python.platform import tf_logging as logging 

39from tensorflow.python.util import compat 

40from tensorflow.python.util import object_identity 

41from tensorflow.python.util import tf_stack 

42from tensorflow.python.util.tf_export import tf_export 

43 

44_state = threading.local() 

45DEFAULT_TENSOR_DEBUG_MODE = "NO_TENSOR" 

46 

47# pylint:disable=protected-access 

48_FUNCTION_PREFIXES = ( 

49 compat.as_bytes(function_lib._FORWARD_PREFIX), 

50 compat.as_bytes(function_lib._BACKWARD_PREFIX), 

51 compat.as_bytes(function_lib._INFERENCE_PREFIX)) 

52# pylint:enable=protected-access 

53 

54 

55def is_op_type_function(op_type): 

56 return compat.as_bytes(op_type).startswith(_FUNCTION_PREFIXES) 

57 

58 

59@ops.RegisterGradient("DebugIdentityV2") 

60def _debug_identity_v2_grad(op, dy): 

61 """Gradient function for the DebugIdentityV2 op.""" 

62 del op # Unused 

63 return dy 

64 

65 

66def _get_tfdbg_run_id(): 

67 return str(uuid.uuid4())[:8] 

68 

69 

70def _get_id(): 

71 """Get a short unique ID.""" 

72 return str(uuid.uuid4()) 

73 

74 

75def _concrete_tensor_to_proto(tensor): 

76 return tensor_util.make_tensor_proto(tensor.numpy()) 

77 

78 

79class _DumpingCallback(object): 

80 """An object holding the states surrounding the dumping callback.""" 

81 

82 def __init__(self, 

83 dump_root, 

84 tensor_debug_mode, 

85 circular_buffer_size, 

86 op_regex, 

87 tensor_dtypes): 

88 self._dump_root = dump_root 

89 self._tfdbg_run_id = _get_tfdbg_run_id() 

90 self._tensor_debug_mode = tensor_debug_mode 

91 self._circular_buffer_size = circular_buffer_size 

92 self._op_regex = op_regex 

93 self._tensor_dtypes = tensor_dtypes 

94 

95 self._hostname = socket.gethostname() 

96 # A list of source-file paths. 

97 self._source_file_paths = [] 

98 # A map from stack frame (FileLineCol) to unique ID. 

99 self._stack_frame_to_id = dict() 

100 # Mapping op context to unique ID. 

101 self._context_to_id = dict() 

102 self._function_to_graph_id = dict() 

103 self._op_type_to_context_id = dict() 

104 # Keeps track of counter for symbolic tensors output by in-graph ops. 

105 # It is used to make unique names for debugger-generated tensors. 

106 self._symbolic_tensor_counter = 0 

107 # A map from the names of debugger-generated Identity and DebugIdentityV2 

108 # tensors to the names of the original insrumented graph tensors. This is 

109 # applicable to v1 graph mode only. 

110 self._tensor_aliases = dict() 

111 self._source_file_paths_lock = threading.Lock() 

112 self._stack_frame_to_id_lock = threading.Lock() 

113 self._context_lock = threading.Lock() 

114 self._symbolic_tensor_counter_lock = threading.Lock() 

115 # A dict mapping Placeholder tensors to their instrumenting debug tensors. 

116 # Used only under V1 graph mode, where we can't rely on auto control 

117 # dependency to execute the debug tensors and hence need to attach the debug 

118 # tensors as control dependencies of the ops that consume the Placeholder. 

119 self._placeholder_to_debug_tensor = ( 

120 object_identity.ObjectIdentityDictionary()) 

121 self._writer = None 

122 

123 def function_callback(self, function): 

124 """A callback to be called on creation of Functions.""" 

125 graph_id = self._get_context_id(function.graph) 

126 with self._context_lock: 

127 # NOTE(cais): We currently store the function (AtomicFunction) 

128 # as keys of this dict, because weakrefs to them sometimes become 

129 # unreferenceable by the time the op callback is called. This approach 

130 # may cause memory leaks due to the holding of the functions. If that's 

131 # the case, calling `tf.debugging.disable_dump_debug_info()` should 

132 # cause GC of this object and this dict. 

133 self._function_to_graph_id[function] = graph_id 

134 return function 

135 

136 @property 

137 def dump_root(self): 

138 return self._dump_root 

139 

140 @dump_root.setter 

141 def dump_root(self, dump_root): 

142 if self._dump_root != dump_root: 

143 self._dump_root = dump_root 

144 self._writer = None 

145 

146 @property 

147 def tfdbg_run_id(self): 

148 return self._tfdbg_run_id 

149 

150 @property 

151 def tensor_debug_mode(self): 

152 return self._tensor_debug_mode 

153 

154 @property 

155 def circular_buffer_size(self): 

156 return self._circular_buffer_size 

157 

158 def get_writer(self): 

159 """Get the debug events writer for the currently configured dump root.""" 

160 if not self._writer: 

161 self._writer = debug_events_writer.DebugEventsWriter( 

162 self._dump_root, 

163 self._tfdbg_run_id, 

164 circular_buffer_size=self._circular_buffer_size) 

165 return self._writer 

166 

167 def _get_context_id(self, context): 

168 """Get a unique ID for an op-construction context (e.g., a graph). 

169 

170 If the graph has been encountered before, reuse the same unique ID. 

171 When encountering a new context (graph), this methods writes a DebugEvent 

172 proto with the debugged_graph field to the proper DebugEvent file. 

173 

174 Args: 

175 context: A context to get the unique ID for. Must be hashable. E.g., a 

176 Graph object. 

177 

178 Returns: 

179 A unique ID for the context. 

180 """ 

181 # Use the double-checked lock pattern to optimize the common case. 

182 if context in self._context_to_id: # 1st check, without lock. 

183 return self._context_to_id[context] 

184 graph_is_new = False 

185 with self._context_lock: 

186 if context not in self._context_to_id: # 2nd check, with lock. 

187 graph_is_new = True 

188 context_id = _get_id() 

189 self._context_to_id[context] = context_id 

190 if graph_is_new: 

191 self.get_writer().WriteDebuggedGraph(debug_event_pb2.DebuggedGraph( 

192 graph_id=context_id, 

193 graph_name=getattr(context, "name", None), 

194 outer_context_id=self._get_outer_context_id(context))) 

195 return self._context_to_id[context] 

196 

197 def _get_outer_context_id(self, graph): 

198 """Get the ID of the immediate outer context of the input graph. 

199 

200 Args: 

201 graph: The graph (context) in question. 

202 

203 Returns: 

204 If an outer context exists, the immediate outer context name as a string. 

205 If such as outer context does not exist (i.e., `graph` is itself 

206 outermost), `None`. 

207 """ 

208 if hasattr(graph, "outer_graph") and graph.outer_graph: 

209 return self._get_context_id(graph.outer_graph) 

210 else: 

211 return None 

212 

213 def _write_source_file_content(self, file_path): 

214 """Send the content of a source file via debug-events writer. 

215 

216 Args: 

217 file_path: Path to the source file. 

218 

219 Returns: 

220 An int index for the file. 

221 """ 

222 if file_path in self._source_file_paths: 

223 return self._source_file_paths.index(file_path) 

224 with self._source_file_paths_lock: 

225 if file_path not in self._source_file_paths: 

226 lines = None 

227 if source_utils.is_extension_uncompiled_python_source(file_path): 

228 try: 

229 lines, _ = source_utils.load_source(file_path) 

230 except IOError as e: 

231 logging.warn( 

232 "Failed to read source code from path: %s. Reason: %s", 

233 file_path, e) 

234 writer = self.get_writer() 

235 writer.WriteSourceFile(debug_event_pb2.SourceFile( 

236 file_path=file_path, host_name=self._hostname, lines=lines)) 

237 self._source_file_paths.append(file_path) 

238 return self._source_file_paths.index(file_path) 

239 

240 def _process_stack_frames(self): 

241 """Process stack frames. 

242 

243 Send the content of source-files, on a best-effort basis. 

244 

245 Returns: 

246 A list of stack frame IDs. 

247 """ 

248 stack_frames = tf_stack.extract_stack() 

249 stack_frame_ids = [] 

250 writer = None 

251 for file_path, lineno, func, _ in stack_frames: 

252 abs_path = os.path.abspath(file_path) 

253 if (abs_path, lineno, func) in self._stack_frame_to_id: 

254 stack_frame_ids.append( 

255 self._stack_frame_to_id[(abs_path, lineno, func)]) 

256 continue 

257 with self._stack_frame_to_id_lock: 

258 if (abs_path, lineno, func) not in self._stack_frame_to_id: 

259 stack_frame_id = _get_id() 

260 self._stack_frame_to_id[(abs_path, lineno, func)] = stack_frame_id 

261 file_index = self._write_source_file_content(abs_path) 

262 file_line_col = graph_debug_info_pb2.GraphDebugInfo.FileLineCol( 

263 file_index=file_index, line=lineno, func=func) 

264 stack_frame_with_id = debug_event_pb2.StackFrameWithId( 

265 id=stack_frame_id, file_line_col=file_line_col) 

266 writer = self.get_writer() 

267 writer.WriteStackFrameWithId(stack_frame_with_id) 

268 stack_frame_ids.append( 

269 self._stack_frame_to_id[(abs_path, lineno, func)]) 

270 

271 code_location = debug_event_pb2.CodeLocation( 

272 host_name=self._hostname, stack_frame_ids=stack_frame_ids) 

273 return code_location 

274 

275 def _process_v1_graph_mode_tensor(self, 

276 op_type, 

277 tensor, 

278 debug_tensor, 

279 tensor_debug_mode): 

280 """For V1 graph mode, determine what tensor to output from callback. 

281 

282 Args: 

283 op_type: Type of the op that outputs the original symbolic tensor. 

284 tensor: The original output symbolic tensor. 

285 debug_tensor: The debugger-instrumented tensor. 

286 tensor_debug_mode: Debug mode used, a tfdbg TensorDebugMode enum. 

287 

288 Returns: 

289 A symbolic tensor to be returned by the dumping op_callback. 

290 """ 

291 # Placeholders need special treatment under V1 graph mode. The 

292 # callback can't simply override the Placeholder tensor to a debug tensor, 

293 # as that would cause the Placeholder op to lack a value. 

294 if op_type in ("Placeholder", "PlaceholderWithDefault"): 

295 self._placeholder_to_debug_tensor[tensor] = debug_tensor 

296 return tensor 

297 else: 

298 # TODO(cais): Evaluate performance optimization options. For the 

299 # `NO_TENSOR` debug mode, an alternative is to add `debug_tensor` as a 

300 # control dependency of `tensor.op` without an additional identity op. 

301 if (tensor_debug_mode == debug_event_pb2.TensorDebugMode.FULL_TENSOR and 

302 op_type != "Const"): 

303 # NOTE(b/153716279): Under v1 graph mode, overriding the output tensor 

304 # of Const ops can lead to downstream errors related to shapes. We opt 

305 # to use an identity op to avoid this issue at the cost of slightly 

306 # larger graph size. 

307 self._tensor_aliases[debug_tensor.name] = tensor.name 

308 return debug_tensor 

309 else: 

310 with self._symbolic_tensor_counter_lock: 

311 identity_name = "tfdbg_identity_%d" % self._symbolic_tensor_counter 

312 identity = array_ops.identity(tensor, name=identity_name) 

313 identity.op._add_control_input( # pylint: disable=protected-access 

314 debug_tensor.op) 

315 self._tensor_aliases[identity.name] = tensor.name 

316 return identity 

317 

318 def _instrument_symbolic_tensors(self, 

319 tensors, 

320 op_type, 

321 op_name, 

322 tfdbg_context_id, 

323 tensor_ids): 

324 """Add debugging instrumentation for symbolic (i.e., non-eager) tensors. 

325 

326 The detailed fashion in which the tensors are instrumented is determined 

327 by the tensor_debug_mode configured for the currently enabled dumping 

328 callback. 

329 

330 Args: 

331 tensors: A tuple of Tensors to instrument. It is assumed that their 

332 ordering corresponds to the ordering of output tensors of an original 

333 op. Output slot indices (0-based) will be generated based on the 

334 ordering. 

335 op_type: Type name of the op that emits the Tensors (e.g., "MatMul"). 

336 op_name: Name of the op that emits the Tensors (e.g., "dense_1/MatMul"). 

337 tfdbg_context_id: A unique ID for the context that the op belongs to 

338 (e.g., a graph). 

339 tensor_ids: A list of unique ID numbers for the tensors, for tfdbg's 

340 internal use. 

341 

342 Returns: 

343 Non-eager Tensors that override the `tensors` as the output of the op 

344 that originally generated `tensors`. In some cases (e.g., non-V1 graph 

345 mode), this may be `None`, as the instrumentation can simply rely on 

346 automatic control dependencies (see `auto_control_deps.py`) instead of 

347 tensor overriding. 

348 """ 

349 tensor_debug_mode = self._tensor_debug_mode 

350 debug_urls = ["file://%s" % self._dump_root] 

351 is_v1_graph_mode = not ops.executing_eagerly_outside_functions() 

352 instrumented_tensors = [] if is_v1_graph_mode else None 

353 for output_slot, tensor in enumerate(tensors): 

354 with self._symbolic_tensor_counter_lock: 

355 debug_identity_name = ("DebugIdentityV2_%d" % 

356 self._symbolic_tensor_counter) 

357 debug_identity_op_kwargs = { 

358 "tfdbg_context_id": tfdbg_context_id, 

359 "op_name": op_name, 

360 "output_slot": output_slot, 

361 "tensor_debug_mode": self._tensor_debug_mode, 

362 "debug_urls": debug_urls, 

363 "name": debug_identity_name, 

364 "circular_buffer_size": self._circular_buffer_size, 

365 "tfdbg_run_id": self._tfdbg_run_id, 

366 } 

367 if tensor_debug_mode == debug_event_pb2.TensorDebugMode.NO_TENSOR: 

368 if (not self._should_dump_tensor(op_type, tensor.dtype) or 

369 not tensor.dtype.is_numpy_compatible): 

370 if is_v1_graph_mode: 

371 instrumented_tensors.append(tensor) 

372 continue 

373 if is_v1_graph_mode and not tensor.dtype.is_numpy_compatible: 

374 # Avoid instrumenting Placeholder under is_v1_graph_mode. Doing that 

375 # would cause runtime complaint about Placeholders not being fed. 

376 instrumented_tensors.append(tensor) 

377 continue 

378 # Except in V1 graph mode + control flow, debug_identity_v2 triggers 

379 # auto control dependency because it's a stateful op. 

380 debug_tensor = gen_debug_ops.debug_identity_v2( 

381 # Use an empty (shape=[0]) float32 tensor for the NO_TENSOR mode 

382 # as a low-overhead placeholder, since no actual tensor value is 

383 # traced. 

384 constant_op.constant([], dtype=dtypes.float32), 

385 **debug_identity_op_kwargs) 

386 if is_v1_graph_mode: 

387 instrumented_tensors.append(self._process_v1_graph_mode_tensor( 

388 op_type, tensor, debug_tensor, tensor_debug_mode)) 

389 elif tensor_debug_mode in (debug_event_pb2.TensorDebugMode.CURT_HEALTH, 

390 debug_event_pb2.TensorDebugMode.CONCISE_HEALTH, 

391 debug_event_pb2.TensorDebugMode.FULL_HEALTH, 

392 debug_event_pb2.TensorDebugMode.SHAPE): 

393 dtype = tensor.dtype 

394 dtype_is_dumpable = ( 

395 tensor_debug_mode in ( 

396 debug_event_pb2.TensorDebugMode.CURT_HEALTH, 

397 debug_event_pb2.TensorDebugMode.CONCISE_HEALTH, 

398 debug_event_pb2.TensorDebugMode.FULL_HEALTH) and 

399 dtype.is_floating or 

400 tensor_debug_mode == debug_event_pb2.TensorDebugMode.SHAPE and 

401 (dtype.is_floating or dtype.is_integer or dtype.is_bool)) 

402 if (not self._should_dump_tensor(op_type, tensor.dtype) or 

403 not dtype_is_dumpable): 

404 if is_v1_graph_mode: 

405 instrumented_tensors.append(tensor) 

406 continue 

407 debug_tensor = gen_debug_ops.debug_identity_v2( 

408 gen_debug_ops.debug_numeric_summary_v2( 

409 tensor, 

410 tensor_id=tensor_ids[output_slot], 

411 tensor_debug_mode=self._tensor_debug_mode, 

412 output_dtype=dtypes.float64), **debug_identity_op_kwargs) 

413 if is_v1_graph_mode: 

414 instrumented_tensors.append(self._process_v1_graph_mode_tensor( 

415 op_type, tensor, debug_tensor, tensor_debug_mode)) 

416 elif tensor_debug_mode == debug_event_pb2.TensorDebugMode.FULL_TENSOR: 

417 if (not self._should_dump_tensor(op_type, tensor.dtype) or 

418 not tensor.dtype.is_numpy_compatible): 

419 # Instrumenting DT_VARIANT and DT_RESOURCE type tensors under 

420 # V1 graph mode is known to have issues. TODO(cais): Investigate. 

421 if is_v1_graph_mode: 

422 instrumented_tensors.append(tensor) 

423 continue 

424 debug_tensor = gen_debug_ops.debug_identity_v2( 

425 tensor, **debug_identity_op_kwargs) 

426 if is_v1_graph_mode: 

427 instrumented_tensors.append(self._process_v1_graph_mode_tensor( 

428 op_type, tensor, debug_tensor, tensor_debug_mode)) 

429 else: 

430 raise NotImplementedError( 

431 "Symbolic tensor instrumentation is not implemented for debug mode " 

432 "%s" % self._tensor_debug_mode) 

433 return instrumented_tensors 

434 

435 def _dump_eager_tensors(self, 

436 tensors, 

437 op_type, 

438 input_tensor_ids, 

439 output_tensor_device_ids, 

440 graph_id=None): 

441 """Dump the value of eager tensors. 

442 

443 The destination of the dumping is determined by the dump_root of the 

444 currently enabled dumping callback. The tensors may be transformed prior to 

445 dumping (e.g., reduced as summary statistics such as minimum, maximum and 

446 arithmetic mean). The details of this transformation (if any) depends on 

447 the tensor_debug_mode of the currently enabled dumping callback. 

448 

449 Args: 

450 tensors: The EagerTensors whose values are to be dumped, with or without 

451 value transform. 

452 op_type: Type of the op that generates the tensors, as a string. 

453 input_tensor_ids: IDs of the input EagerTensors to the op. 

454 output_tensor_device_ids: Debugged-generated IDs for the devices on which 

455 the output tensors are allocated, as a `list` of `int`s. Must match 

456 `tensors` in length. 

457 graph_id: ID of the executed graph, applicable only to eager execution of 

458 a FuncGraph. 

459 

460 Returns: 

461 A tfdbg Execution protocol buffer. 

462 """ 

463 tensor_debug_mode = self._tensor_debug_mode 

464 output_tensor_ids = [ 

465 t._id for t in tensors] # pylint:disable=protected-access 

466 assert len(tensors) == len(output_tensor_device_ids) 

467 if tensor_debug_mode == debug_event_pb2.TensorDebugMode.NO_TENSOR: 

468 return debug_event_pb2.Execution( 

469 op_type=op_type, 

470 graph_id=graph_id, 

471 num_outputs=len(tensors), 

472 input_tensor_ids=input_tensor_ids, 

473 output_tensor_ids=output_tensor_ids, 

474 output_tensor_device_ids=output_tensor_device_ids, 

475 tensor_debug_mode=tensor_debug_mode, 

476 code_location=self._process_stack_frames()) 

477 elif tensor_debug_mode in (debug_event_pb2.TensorDebugMode.CURT_HEALTH, 

478 debug_event_pb2.TensorDebugMode.CONCISE_HEALTH, 

479 debug_event_pb2.TensorDebugMode.FULL_HEALTH, 

480 debug_event_pb2.TensorDebugMode.SHAPE, 

481 debug_event_pb2.TensorDebugMode.FULL_TENSOR): 

482 execution_proto = debug_event_pb2.Execution( 

483 op_type=op_type, 

484 num_outputs=len(tensors), 

485 graph_id=graph_id, 

486 input_tensor_ids=input_tensor_ids, 

487 output_tensor_ids=output_tensor_ids, 

488 output_tensor_device_ids=output_tensor_device_ids, 

489 tensor_debug_mode=tensor_debug_mode, 

490 code_location=self._process_stack_frames()) 

491 for tensor in tensors: 

492 if (self._should_dump_tensor(op_type, tensor.dtype) and 

493 tensor.dtype.is_numpy_compatible): 

494 if tensor_debug_mode in ( 

495 debug_event_pb2.TensorDebugMode.CURT_HEALTH, 

496 debug_event_pb2.TensorDebugMode.CONCISE_HEALTH, 

497 debug_event_pb2.TensorDebugMode.FULL_HEALTH): 

498 if tensor.dtype.is_floating: 

499 tensor_proto = _concrete_tensor_to_proto( 

500 gen_debug_ops.debug_numeric_summary_v2( 

501 tensor, 

502 tensor_debug_mode=tensor_debug_mode, 

503 output_dtype=dtypes.float64)) 

504 else: 

505 # A placeholder for non-floating-type output tensors. 

506 tensor_proto = tensor_pb2.TensorProto() 

507 elif tensor_debug_mode == debug_event_pb2.TensorDebugMode.SHAPE: 

508 if (tensor.dtype.is_floating or tensor.dtype.is_integer or 

509 tensor.dtype.is_bool): 

510 tensor_proto = _concrete_tensor_to_proto( 

511 gen_debug_ops.debug_numeric_summary_v2( 

512 tensor, 

513 tensor_debug_mode=tensor_debug_mode, 

514 output_dtype=dtypes.float64)) 

515 else: 

516 # A placeholder for non-floating-type output tensors. 

517 tensor_proto = tensor_pb2.TensorProto() 

518 elif tensor_debug_mode == debug_event_pb2.TensorDebugMode.FULL_TENSOR: 

519 tensor_proto = _concrete_tensor_to_proto(tensor) 

520 if tensor_proto: 

521 execution_proto.tensor_protos.append(tensor_proto) 

522 return execution_proto 

523 else: 

524 raise NotImplementedError( 

525 "Tensor instrumentation is not implemented for debug mode %s yet " % 

526 self._tensor_debug_mode) 

527 

528 def callback(self, 

529 op_type, 

530 inputs, 

531 attrs, 

532 outputs, 

533 op_name=None, 

534 graph=None): 

535 """Op callback for tracing (dumping) a TF program's execution.""" 

536 del attrs # Unused 

537 

538 writer = self.get_writer() 

539 if graph: 

540 is_v1_graph_mode = not ops.executing_eagerly_outside_functions() 

541 context_id = self._get_context_id(graph) # Innermost context ID. 

542 output_tensor_ids = self._get_symbolic_tensor_ids(len(outputs)) 

543 if op_type in ("Const", "Placeholder", "PlaceholderWithDefault"): 

544 # In some cases, the op name of a Const or Placeholder op in a graph 

545 # can be duplicate (e.g., `None` or "resource"). 

546 # When this happens, we use the output tensor name to infer 

547 # the non-duplicated tensor name. 

548 op_name = outputs[0].name.split(":")[0] 

549 if is_v1_graph_mode: 

550 for input_tensor in inputs: 

551 if input_tensor in self._placeholder_to_debug_tensor and outputs: 

552 outputs[0].op._add_control_input( # pylint: disable=protected-access 

553 self._placeholder_to_debug_tensor[input_tensor].op) 

554 graph_op_creation = debug_event_pb2.GraphOpCreation( 

555 op_type=op_type, 

556 op_name=op_name, 

557 graph_name=graph.name if hasattr(graph, "name") else None, 

558 graph_id=context_id, 

559 input_names=[ 

560 self._lookup_tensor_name(input_tensor) for input_tensor in inputs 

561 ], 

562 num_outputs=len(outputs), 

563 output_tensor_ids=output_tensor_ids, 

564 code_location=self._process_stack_frames()) 

565 writer.WriteGraphOpCreation(graph_op_creation) 

566 if outputs and compat.as_bytes( 

567 op_type) not in op_callbacks_common.OP_CALLBACK_SKIP_OPS: 

568 return self._instrument_symbolic_tensors( 

569 outputs, op_type, op_name, context_id, output_tensor_ids) 

570 else: 

571 op_type_bytes = compat.as_bytes(op_type) 

572 if op_type_bytes == b"DebugNumericSummaryV2": 

573 # TODO(b/140334369): Remove this special casing logic once op_callback. 

574 # automatically prevents infinite recursion in eager mode. 

575 return None 

576 if op_type_bytes in op_callbacks_common.OP_CALLBACK_SKIP_OPS: 

577 return None 

578 context_id = self._func_graph_id_from_func_name(op_type) 

579 input_ids = [t._id for t in inputs] # pylint:disable=protected-access 

580 output_tensor_device_ids = [writer.RegisterDeviceAndGetId(output.device) 

581 for output in outputs] if outputs else [] 

582 writer.WriteExecution(self._dump_eager_tensors( 

583 outputs, op_type, input_ids, output_tensor_device_ids, 

584 graph_id=context_id)) 

585 

586 def _lookup_tensor_name(self, tensor): 

587 """Look up the name of a graph tensor. 

588 

589 This method maps the name of a debugger-generated Identity or 

590 DebugIdentityV2 tensor to the name of the original instrumented tensor, 

591 if `tensor` is such a debugger-created tensor. 

592 Otherwise, it returns the name of `tensor` as is. 

593 

594 Args: 

595 tensor: The graph tensor to look up the name for. 

596 

597 Returns: 

598 Name of the orignal instrumented tensor as known to the debugger. 

599 """ 

600 return self._tensor_aliases.get(tensor.name, tensor.name) 

601 

602 def _func_graph_id_from_func_name(self, op_type): 

603 """Attempt to get the ID of a FuncGraph based on an op type name. 

604 

605 Also caches the ID for faster access later. 

606 

607 Args: 

608 op_type: Op type string, which may be the name of a function. 

609 

610 Returns: 

611 If the op_type name does not fit the pattern of a function name (e.g., 

612 one that starts with "__inference_"), `None` is returned immediately. 

613 Else, if the FuncGraph is found, ID of the underlying FuncGraph is 

614 returned as a string. 

615 Else, `None` is returned. 

616 """ 

617 op_type = compat.as_bytes(op_type) 

618 if is_op_type_function(op_type): 

619 # op_type for eagerly-executed FuncGraphs have the prefixed and suffixed 

620 # form such as "__inference_my_function_13579", wherein the middle part 

621 # "my_function" is the name of the Python function from which the 

622 # FuncGraph is compiled. Due to the suffix, the op_type is unique for 

623 # - duplicate Python function names 

624 # - multiple compilation of the same Python function 

625 if op_type in self._op_type_to_context_id: 

626 return self._op_type_to_context_id[op_type] 

627 with self._context_lock: 

628 for function in self._function_to_graph_id: 

629 if function.name == op_type: 

630 graph_id = self._function_to_graph_id[function] 

631 self._op_type_to_context_id[op_type] = graph_id 

632 return graph_id 

633 return None 

634 else: 

635 return None 

636 

637 def _get_symbolic_tensor_ids(self, num_tensors): 

638 tensor_ids = [] 

639 if num_tensors: 

640 with self._symbolic_tensor_counter_lock: 

641 for _ in range(num_tensors): 

642 self._symbolic_tensor_counter += 1 

643 tensor_ids.append(self._symbolic_tensor_counter) 

644 return tensor_ids 

645 

646 def _should_dump_tensor(self, op_type, dtype): 

647 """Determine if the given tensor's value will be dumped. 

648 

649 The determination is made given the configurations such as `op_regex`, 

650 `tensor_dtypes`. 

651 

652 Args: 

653 op_type: Name of the op's type, as a string (e.g., "MatMul"). 

654 dtype: The dtype of the tensor, as a `dtypes.DType` object. 

655 

656 Returns: 

657 A bool indicating whether the tensor's value will be dumped. 

658 """ 

659 should_dump = True 

660 if self._op_regex: 

661 should_dump = (should_dump and 

662 re.match(self._op_regex, op_type)) 

663 if self._tensor_dtypes: 

664 if isinstance(self._tensor_dtypes, (list, tuple)): 

665 should_dump = (should_dump and 

666 any(dtype == dtype_item for dtype_item 

667 in self._tensor_dtypes)) 

668 else: # A callable that takes a DType argument and return a boolean. 

669 should_dump = should_dump and self._tensor_dtypes(dtype) 

670 return should_dump 

671 

672 

673@tf_export("debugging.experimental.enable_dump_debug_info") 

674def enable_dump_debug_info(dump_root, 

675 tensor_debug_mode=DEFAULT_TENSOR_DEBUG_MODE, 

676 circular_buffer_size=1000, 

677 op_regex=None, 

678 tensor_dtypes=None): 

679 """Enable dumping debugging information from a TensorFlow program. 

680 

681 The debugging information is dumped to a directory on the file system 

682 specified as `dump_root`. 

683 

684 The dumped debugging information can be ingested by debugger UIs. 

685 

686 The files in the dump directory contain the following information: 

687 - TensorFlow Function construction (e.g., compilation of Python functions 

688 decorated with @tf.function), the op types, names (if available), context, 

689 the input and output tensors, and the associated stack traces. 

690 - Execution of TensorFlow operations (ops) and Functions and their stack 

691 traces, op types, names (if available) and contexts. In addition, 

692 depending on the value of the `tensor_debug_mode` argument (see Args 

693 section below), the value(s) of the output tensors or more concise 

694 summaries of the tensor values will be dumped. 

695 - A snapshot of Python source files involved in the execution of the 

696 TensorFlow program. 

697 

698 Once enabled, the dumping can be disabled with the corresponding 

699 `disable_dump_debug_info()` method under the same Python namespace. 

700 Calling this method more than once with the same `dump_root` is idempotent. 

701 Calling this method more than once with different `tensor_debug_mode`s 

702 leads to a `ValueError`. 

703 Calling this method more than once with different `circular_buffer_size`s 

704 leads to a `ValueError`. 

705 Calling this method with a different `dump_root` abolishes the 

706 previously-enabled `dump_root`. 

707 

708 Usage example: 

709 

710 ```py 

711 tf.debugging.experimental.enable_dump_debug_info('/tmp/my-tfdbg-dumps') 

712 

713 # Code to build, train and run your TensorFlow model... 

714 ``` 

715 

716 NOTE: If your code is running on TPUs, be sure to call 

717 `tf.config.set_soft_device_placement(True)` before calling 

718 `tf.debugging.experimental.enable_dump_debug_info()` as this API uses 

719 automatic outside compilation on TPUs. For example: 

720 

721 ```py 

722 tf.config.set_soft_device_placement(True) 

723 tf.debugging.experimental.enable_dump_debug_info( 

724 logdir, tensor_debug_mode="FULL_HEALTH") 

725 

726 resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='') 

727 strategy = tf.distribute.TPUStrategy(resolver) 

728 with strategy.scope(): 

729 # ... 

730 ``` 

731 

732 Args: 

733 dump_root: The directory path where the dumping information will be written. 

734 tensor_debug_mode: Debug mode for tensor values, as a string. 

735 The currently supported options are: 

736 - "NO_TENSOR": (Default) Only traces the output tensors of all executed 

737 ops (including those executed eagerly at the Python level or as a part 

738 of a TensorFlow graph) and functions, while not extracting any 

739 information from the values of the tensors. 

740 - "CURT_HEALTH": For each floating-dtype tensor (e.g., tensors of dtypes 

741 such as `float32`, `float64` and `bfloat16`), extracts a binary bit 

742 indicating whether it contains any -infinity, +infinity or NaN. 

743 - "CONCISE_HEALTH": For each floating-dtype tensor, extract total 

744 element count, and counts of -infinity, +infinity and NaN elements. 

745 - "FULL_HEALTH": For each floating-dtype tensor, extracts the dtype, 

746 rank (number of dimensions), total element count, and counts of 

747 -infinity, +infinity and NaN elements. 

748 - "SHAPE": For each tensor (regardless of dtype), extracts its dtype, 

749 rank, total element count and shape. 

750 circular_buffer_size: Size of the circular buffers for execution events. 

751 These circular buffers are designed to reduce the overhead of debugging 

752 dumping. They hold the most recent debug events concerning eager execution 

753 of ops and `tf.function`s and traces of tensor values computed inside 

754 `tf.function`s. They are written to the file system only when the proper 

755 flushing method is called (see description of return values below). 

756 Expected to be an integer. If <= 0, the circular-buffer behavior will be 

757 disabled, i.e., the execution debug events will be written to the file 

758 writers in the same way as non-execution events such as op creations and 

759 source-file snapshots. 

760 op_regex: Dump data from only the tensors from op types that matches to the 

761 regular expression (through Python's `re.match()`). 

762 "Op type" refers to the names of the TensorFlow operations (e.g., 

763 "MatMul", "LogSoftmax"), which may repeat in a TensorFlow 

764 function. It does *not* refer to the names of nodes (e.g., 

765 "dense/MatMul", "dense_1/MatMul_1") which are unique within a function. 

766 - Example 1: Dump tensor data from only MatMul and Relu ops 

767 `op_regex="^(MatMul|Relu)$"`. 

768 - Example 2: Dump tensors from all ops *except* Relu: 

769 `op_regex="(?!^Relu$)"`. 

770 This filter operates in a logical AND relation with `tensor_dtypes`. 

771 tensor_dtypes: Dump data from only the tensors of which the specified 

772 dtypes. This optional argument can be in any of the following format: 

773 - a list or tuple of `DType` objects or strings that can be converted 

774 to `DType` objects via `tf.as_dtype()`. Examples: 

775 - `tensor_dtype=[tf.float32, tf.float64]`, 

776 - `tensor_dtype=["float32", "float64"]`, 

777 - `tensor_dtypes=(tf.int32, tf.bool)`, 

778 - `tensor_dtypes=("int32", "bool")` 

779 - a callable that takes a single `DType` argument and returns a Python 

780 `boolean` indicating whether the dtype is to be included in the data 

781 dumping. Examples: 

782 - `tensor_dtype=lambda dtype: dtype.is_integer`. 

783 This filter operates in a logical AND relation with `op_regex`. 

784 Returns: 

785 A DebugEventsWriter instance used by the dumping callback. The caller 

786 may use its flushing methods, including `FlushNonExecutionFiles()` and 

787 `FlushExecutionFiles()`. 

788 """ 

789 # TODO(cais): Revise the "UIs (currently under construction)" part of the doc 

790 # string above. 

791 # TODO(cais): Add Python code example to the doc string above. 

792 global _state 

793 

794 tensor_debug_mode_keys = debug_event_pb2.TensorDebugMode.keys() 

795 if tensor_debug_mode not in tensor_debug_mode_keys: 

796 raise ValueError( 

797 "Invalid value in tensor_debug_mode ('%s'). Valid options are: %s" % 

798 (tensor_debug_mode, tensor_debug_mode_keys)) 

799 

800 tensor_debug_mode = debug_event_pb2.TensorDebugMode.Value(tensor_debug_mode) 

801 if tensor_debug_mode not in (debug_event_pb2.TensorDebugMode.NO_TENSOR, 

802 debug_event_pb2.TensorDebugMode.CURT_HEALTH, 

803 debug_event_pb2.TensorDebugMode.CONCISE_HEALTH, 

804 debug_event_pb2.TensorDebugMode.FULL_HEALTH, 

805 debug_event_pb2.TensorDebugMode.SHAPE, 

806 debug_event_pb2.TensorDebugMode.FULL_TENSOR): 

807 raise NotImplementedError( 

808 "tfdbg dumping: support for tensor debug mode %s is not " 

809 "implemented yet" % 

810 debug_event_pb2.TensorDebugMode.Name(tensor_debug_mode)) 

811 

812 # Validate the types of tensor_dtypes. 

813 if tensor_dtypes is not None: 

814 if (not isinstance(tensor_dtypes, (list, tuple)) and 

815 not callable(tensor_dtypes)): 

816 raise ValueError( 

817 "If specified, tensor_dtypes is expected to be a list, a tuple, or " 

818 "a callable that takes a DType argument and returns a boolean, " 

819 "but received %s" % (tensor_dtypes,)) 

820 if isinstance(tensor_dtypes, (list, tuple)): 

821 tensor_dtypes = [ 

822 dtypes.as_dtype(dtype_item) for dtype_item in tensor_dtypes] 

823 

824 if hasattr(_state, "dumping_callback"): 

825 if _state.dumping_callback.circular_buffer_size != circular_buffer_size: 

826 raise ValueError( 

827 "There is already a dumping callback configured with a different " 

828 "circular-buffer size (%d). Therefore the newly request " 

829 "circular-buffer size (%d) will not be honored." % 

830 (_state.dumping_callback.circular_buffer_size, circular_buffer_size)) 

831 if _state.dumping_callback.tensor_debug_mode != tensor_debug_mode: 

832 raise ValueError( 

833 "There is already a dumping callback configured for dump root " 

834 "%s with a different " 

835 "tensor-debug mode (%s). Therefore the newly request " 

836 "tensor-debug mode (%s) size will not be honored." % 

837 (_state.dumping_callback.dump_root, 

838 tensor_debug_mode_keys[_state.dumping_callback.tensor_debug_mode], 

839 tensor_debug_mode_keys[tensor_debug_mode])) 

840 else: 

841 _state.dumping_callback = _DumpingCallback(dump_root, 

842 tensor_debug_mode, 

843 circular_buffer_size, 

844 op_regex, 

845 tensor_dtypes) 

846 op_callbacks.add_op_callback(_state.dumping_callback.callback) 

847 function_lib.add_function_callback( 

848 _state.dumping_callback.function_callback) 

849 

850 if _state.dumping_callback.dump_root != dump_root: 

851 _state.dumping_callback.dump_root = dump_root 

852 

853 logging.info( 

854 "Enabled dumping callback in thread %s " 

855 "(dump root: %s, tensor debug mode: %s)", 

856 threading.current_thread().name, 

857 _state.dumping_callback.dump_root, 

858 debug_event_pb2.TensorDebugMode.Name(tensor_debug_mode)) 

859 

860 atexit.register(disable_dump_debug_info) 

861 return _state.dumping_callback.get_writer() 

862 

863 

864@tf_export("debugging.experimental.disable_dump_debug_info") 

865def disable_dump_debug_info(): 

866 """Disable the currently-enabled debugging dumping. 

867 

868 If the `enable_dump_debug_info()` method under the same Python namespace 

869 has been invoked before, calling this method disables it. If no call to 

870 `enable_dump_debug_info()` has been made, calling this method is a no-op. 

871 Calling this method more than once is idempotent. 

872 """ 

873 if hasattr(_state, "dumping_callback"): 

874 dump_root = _state.dumping_callback.dump_root 

875 tfdbg_run_id = _state.dumping_callback.tfdbg_run_id 

876 debug_events_writer.DebugEventsWriter(dump_root, tfdbg_run_id).Close() 

877 op_callbacks.remove_op_callback(_state.dumping_callback.callback) 

878 function_lib.remove_function_callback( 

879 _state.dumping_callback.function_callback) 

880 delattr(_state, "dumping_callback") 

881 logging.info("Disabled dumping callback in thread %s (dump root: %s)", 

882 threading.current_thread().name, dump_root)