Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/debug/lib/dumping

3# Licensed under the Apache License, Version 2.0 (the "License");

4# you may not use this file except in compliance with the License.

5# You may obtain a copy of the License at

7# http://www.apache.org/licenses/LICENSE-2.0

9# Unless required by applicable law or agreed to in writing, software

10# distributed under the License is distributed on an "AS IS" BASIS,

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12# See the License for the specific language governing permissions and

13# limitations under the License.

14# ==============================================================================

15"""Dumping op callbacks: Enables dump-based features in tfdbg v2."""

17import atexit

18import os

19import re

20import socket

21import threading

22import uuid

24from tensorflow.core.framework import graph_debug_info_pb2

25from tensorflow.core.framework import tensor_pb2

26from tensorflow.core.protobuf import debug_event_pb2

27from tensorflow.python.debug.lib import debug_events_writer

28from tensorflow.python.debug.lib import op_callbacks_common

29from tensorflow.python.debug.lib import source_utils

30from tensorflow.python.eager import function as function_lib

31from tensorflow.python.framework import constant_op

32from tensorflow.python.framework import dtypes

33from tensorflow.python.framework import op_callbacks

34from tensorflow.python.framework import ops

35from tensorflow.python.framework import tensor_util

36from tensorflow.python.ops import array_ops

37from tensorflow.python.ops import gen_debug_ops

38from tensorflow.python.platform import tf_logging as logging

39from tensorflow.python.util import compat

40from tensorflow.python.util import object_identity

41from tensorflow.python.util import tf_stack

42from tensorflow.python.util.tf_export import tf_export

44_state = threading.local()

45DEFAULT_TENSOR_DEBUG_MODE = "NO_TENSOR"

47# pylint:disable=protected-access

48_FUNCTION_PREFIXES = (

49 compat.as_bytes(function_lib._FORWARD_PREFIX),

50 compat.as_bytes(function_lib._BACKWARD_PREFIX),

51 compat.as_bytes(function_lib._INFERENCE_PREFIX))

52# pylint:enable=protected-access

55def is_op_type_function(op_type):

56 return compat.as_bytes(op_type).startswith(_FUNCTION_PREFIXES)

59@ops.RegisterGradient("DebugIdentityV2")

60def _debug_identity_v2_grad(op, dy):

61 """Gradient function for the DebugIdentityV2 op."""

62 del op # Unused

63 return dy

66def _get_tfdbg_run_id():

67 return str(uuid.uuid4())[:8]

70def _get_id():

71 """Get a short unique ID."""

72 return str(uuid.uuid4())

75def _concrete_tensor_to_proto(tensor):

76 return tensor_util.make_tensor_proto(tensor.numpy())

79class _DumpingCallback(object):

80 """An object holding the states surrounding the dumping callback."""

82 def __init__(self,

83 dump_root,

84 tensor_debug_mode,

85 circular_buffer_size,

86 op_regex,

87 tensor_dtypes):

88 self._dump_root = dump_root

89 self._tfdbg_run_id = _get_tfdbg_run_id()

90 self._tensor_debug_mode = tensor_debug_mode

91 self._circular_buffer_size = circular_buffer_size

92 self._op_regex = op_regex

93 self._tensor_dtypes = tensor_dtypes

95 self._hostname = socket.gethostname()

96 # A list of source-file paths.

97 self._source_file_paths = []

98 # A map from stack frame (FileLineCol) to unique ID.

99 self._stack_frame_to_id = dict()

100 # Mapping op context to unique ID.

101 self._context_to_id = dict()

102 self._function_to_graph_id = dict()

103 self._op_type_to_context_id = dict()

104 # Keeps track of counter for symbolic tensors output by in-graph ops.

105 # It is used to make unique names for debugger-generated tensors.

106 self._symbolic_tensor_counter = 0

107 # A map from the names of debugger-generated Identity and DebugIdentityV2

108 # tensors to the names of the original insrumented graph tensors. This is

109 # applicable to v1 graph mode only.

110 self._tensor_aliases = dict()

111 self._source_file_paths_lock = threading.Lock()

112 self._stack_frame_to_id_lock = threading.Lock()

113 self._context_lock = threading.Lock()

114 self._symbolic_tensor_counter_lock = threading.Lock()

115 # A dict mapping Placeholder tensors to their instrumenting debug tensors.

116 # Used only under V1 graph mode, where we can't rely on auto control

117 # dependency to execute the debug tensors and hence need to attach the debug

118 # tensors as control dependencies of the ops that consume the Placeholder.

119 self._placeholder_to_debug_tensor = (

120 object_identity.ObjectIdentityDictionary())

121 self._writer = None

122

123 def function_callback(self, function):

124 """A callback to be called on creation of Functions."""

125 graph_id = self._get_context_id(function.graph)

126 with self._context_lock:

127 # NOTE(cais): We currently store the function (AtomicFunction)

128 # as keys of this dict, because weakrefs to them sometimes become

129 # unreferenceable by the time the op callback is called. This approach

130 # may cause memory leaks due to the holding of the functions. If that's

131 # the case, calling `tf.debugging.disable_dump_debug_info()` should

132 # cause GC of this object and this dict.

133 self._function_to_graph_id[function] = graph_id

134 return function

135

136 @property

137 def dump_root(self):

138 return self._dump_root

139

140 @dump_root.setter

141 def dump_root(self, dump_root):

142 if self._dump_root != dump_root:

143 self._dump_root = dump_root

144 self._writer = None

145

146 @property

147 def tfdbg_run_id(self):

148 return self._tfdbg_run_id

149

150 @property

151 def tensor_debug_mode(self):

152 return self._tensor_debug_mode

153

154 @property

155 def circular_buffer_size(self):

156 return self._circular_buffer_size

157

158 def get_writer(self):

159 """Get the debug events writer for the currently configured dump root."""

160 if not self._writer:

161 self._writer = debug_events_writer.DebugEventsWriter(

162 self._dump_root,

163 self._tfdbg_run_id,

164 circular_buffer_size=self._circular_buffer_size)

165 return self._writer

166

167 def _get_context_id(self, context):

168 """Get a unique ID for an op-construction context (e.g., a graph).

169

170 If the graph has been encountered before, reuse the same unique ID.

171 When encountering a new context (graph), this methods writes a DebugEvent

172 proto with the debugged_graph field to the proper DebugEvent file.

173

174 Args:

175 context: A context to get the unique ID for. Must be hashable. E.g., a

176 Graph object.

177

178 Returns:

179 A unique ID for the context.

180 """

181 # Use the double-checked lock pattern to optimize the common case.

182 if context in self._context_to_id: # 1st check, without lock.

183 return self._context_to_id[context]

184 graph_is_new = False

185 with self._context_lock:

186 if context not in self._context_to_id: # 2nd check, with lock.

187 graph_is_new = True

188 context_id = _get_id()

189 self._context_to_id[context] = context_id

190 if graph_is_new:

191 self.get_writer().WriteDebuggedGraph(debug_event_pb2.DebuggedGraph(

192 graph_id=context_id,

193 graph_name=getattr(context, "name", None),

194 outer_context_id=self._get_outer_context_id(context)))

195 return self._context_to_id[context]

196

197 def _get_outer_context_id(self, graph):

198 """Get the ID of the immediate outer context of the input graph.

199

200 Args:

201 graph: The graph (context) in question.

202

203 Returns:

204 If an outer context exists, the immediate outer context name as a string.

205 If such as outer context does not exist (i.e., `graph` is itself

206 outermost), `None`.

207 """

208 if hasattr(graph, "outer_graph") and graph.outer_graph:

209 return self._get_context_id(graph.outer_graph)

210 else:

211 return None

212

213 def _write_source_file_content(self, file_path):

214 """Send the content of a source file via debug-events writer.

215

216 Args:

217 file_path: Path to the source file.

218

219 Returns:

220 An int index for the file.

221 """

222 if file_path in self._source_file_paths:

223 return self._source_file_paths.index(file_path)

224 with self._source_file_paths_lock:

225 if file_path not in self._source_file_paths:

226 lines = None

227 if source_utils.is_extension_uncompiled_python_source(file_path):

228 try:

229 lines, _ = source_utils.load_source(file_path)

230 except IOError as e:

231 logging.warn(

232 "Failed to read source code from path: %s. Reason: %s",

233 file_path, e)

234 writer = self.get_writer()

235 writer.WriteSourceFile(debug_event_pb2.SourceFile(

236 file_path=file_path, host_name=self._hostname, lines=lines))

237 self._source_file_paths.append(file_path)

238 return self._source_file_paths.index(file_path)

239

240 def _process_stack_frames(self):

241 """Process stack frames.

242

243 Send the content of source-files, on a best-effort basis.

244

245 Returns:

246 A list of stack frame IDs.

247 """

248 stack_frames = tf_stack.extract_stack()

249 stack_frame_ids = []

250 writer = None

251 for file_path, lineno, func, _ in stack_frames:

252 abs_path = os.path.abspath(file_path)

253 if (abs_path, lineno, func) in self._stack_frame_to_id:

254 stack_frame_ids.append(

255 self._stack_frame_to_id[(abs_path, lineno, func)])

256 continue

257 with self._stack_frame_to_id_lock:

258 if (abs_path, lineno, func) not in self._stack_frame_to_id:

259 stack_frame_id = _get_id()

260 self._stack_frame_to_id[(abs_path, lineno, func)] = stack_frame_id

261 file_index = self._write_source_file_content(abs_path)

262 file_line_col = graph_debug_info_pb2.GraphDebugInfo.FileLineCol(

263 file_index=file_index, line=lineno, func=func)

264 stack_frame_with_id = debug_event_pb2.StackFrameWithId(

265 id=stack_frame_id, file_line_col=file_line_col)

266 writer = self.get_writer()

267 writer.WriteStackFrameWithId(stack_frame_with_id)

268 stack_frame_ids.append(

269 self._stack_frame_to_id[(abs_path, lineno, func)])

270

271 code_location = debug_event_pb2.CodeLocation(

272 host_name=self._hostname, stack_frame_ids=stack_frame_ids)

273 return code_location

274

275 def _process_v1_graph_mode_tensor(self,

276 op_type,

277 tensor,

278 debug_tensor,

279 tensor_debug_mode):

280 """For V1 graph mode, determine what tensor to output from callback.

281

282 Args:

283 op_type: Type of the op that outputs the original symbolic tensor.

284 tensor: The original output symbolic tensor.

285 debug_tensor: The debugger-instrumented tensor.

286 tensor_debug_mode: Debug mode used, a tfdbg TensorDebugMode enum.

287

288 Returns:

289 A symbolic tensor to be returned by the dumping op_callback.

290 """

291 # Placeholders need special treatment under V1 graph mode. The

292 # callback can't simply override the Placeholder tensor to a debug tensor,

293 # as that would cause the Placeholder op to lack a value.

294 if op_type in ("Placeholder", "PlaceholderWithDefault"):

295 self._placeholder_to_debug_tensor[tensor] = debug_tensor

296 return tensor

297 else:

298 # TODO(cais): Evaluate performance optimization options. For the

299 # `NO_TENSOR` debug mode, an alternative is to add `debug_tensor` as a

300 # control dependency of `tensor.op` without an additional identity op.

301 if (tensor_debug_mode == debug_event_pb2.TensorDebugMode.FULL_TENSOR and

302 op_type != "Const"):

303 # NOTE(b/153716279): Under v1 graph mode, overriding the output tensor

304 # of Const ops can lead to downstream errors related to shapes. We opt

305 # to use an identity op to avoid this issue at the cost of slightly

306 # larger graph size.

307 self._tensor_aliases[debug_tensor.name] = tensor.name

308 return debug_tensor

309 else:

310 with self._symbolic_tensor_counter_lock:

311 identity_name = "tfdbg_identity_%d" % self._symbolic_tensor_counter

312 identity = array_ops.identity(tensor, name=identity_name)

313 identity.op._add_control_input( # pylint: disable=protected-access

314 debug_tensor.op)

315 self._tensor_aliases[identity.name] = tensor.name

316 return identity

317

318 def _instrument_symbolic_tensors(self,

319 tensors,

320 op_type,

321 op_name,

322 tfdbg_context_id,

323 tensor_ids):

324 """Add debugging instrumentation for symbolic (i.e., non-eager) tensors.

325

326 The detailed fashion in which the tensors are instrumented is determined

327 by the tensor_debug_mode configured for the currently enabled dumping

328 callback.

329

330 Args:

331 tensors: A tuple of Tensors to instrument. It is assumed that their

332 ordering corresponds to the ordering of output tensors of an original

333 op. Output slot indices (0-based) will be generated based on the

334 ordering.

335 op_type: Type name of the op that emits the Tensors (e.g., "MatMul").

336 op_name: Name of the op that emits the Tensors (e.g., "dense_1/MatMul").

337 tfdbg_context_id: A unique ID for the context that the op belongs to

338 (e.g., a graph).

339 tensor_ids: A list of unique ID numbers for the tensors, for tfdbg's

340 internal use.

341

342 Returns:

343 Non-eager Tensors that override the `tensors` as the output of the op

344 that originally generated `tensors`. In some cases (e.g., non-V1 graph

345 mode), this may be `None`, as the instrumentation can simply rely on

346 automatic control dependencies (see `auto_control_deps.py`) instead of

347 tensor overriding.

348 """

349 tensor_debug_mode = self._tensor_debug_mode

350 debug_urls = ["file://%s" % self._dump_root]

351 is_v1_graph_mode = not ops.executing_eagerly_outside_functions()

352 instrumented_tensors = [] if is_v1_graph_mode else None

353 for output_slot, tensor in enumerate(tensors):

354 with self._symbolic_tensor_counter_lock:

355 debug_identity_name = ("DebugIdentityV2_%d" %

356 self._symbolic_tensor_counter)

357 debug_identity_op_kwargs = {

358 "tfdbg_context_id": tfdbg_context_id,

359 "op_name": op_name,

360 "output_slot": output_slot,

361 "tensor_debug_mode": self._tensor_debug_mode,

362 "debug_urls": debug_urls,

363 "name": debug_identity_name,

364 "circular_buffer_size": self._circular_buffer_size,

365 "tfdbg_run_id": self._tfdbg_run_id,

366 }

367 if tensor_debug_mode == debug_event_pb2.TensorDebugMode.NO_TENSOR:

368 if (not self._should_dump_tensor(op_type, tensor.dtype) or

369 not tensor.dtype.is_numpy_compatible):

370 if is_v1_graph_mode:

371 instrumented_tensors.append(tensor)

372 continue

373 if is_v1_graph_mode and not tensor.dtype.is_numpy_compatible:

374 # Avoid instrumenting Placeholder under is_v1_graph_mode. Doing that

375 # would cause runtime complaint about Placeholders not being fed.

376 instrumented_tensors.append(tensor)

377 continue

378 # Except in V1 graph mode + control flow, debug_identity_v2 triggers

379 # auto control dependency because it's a stateful op.

380 debug_tensor = gen_debug_ops.debug_identity_v2(

381 # Use an empty (shape=[0]) float32 tensor for the NO_TENSOR mode

382 # as a low-overhead placeholder, since no actual tensor value is

383 # traced.

384 constant_op.constant([], dtype=dtypes.float32),

385 **debug_identity_op_kwargs)

386 if is_v1_graph_mode:

387 instrumented_tensors.append(self._process_v1_graph_mode_tensor(

388 op_type, tensor, debug_tensor, tensor_debug_mode))

389 elif tensor_debug_mode in (debug_event_pb2.TensorDebugMode.CURT_HEALTH,

390 debug_event_pb2.TensorDebugMode.CONCISE_HEALTH,

391 debug_event_pb2.TensorDebugMode.FULL_HEALTH,

392 debug_event_pb2.TensorDebugMode.SHAPE):

393 dtype = tensor.dtype

394 dtype_is_dumpable = (

395 tensor_debug_mode in (

396 debug_event_pb2.TensorDebugMode.CURT_HEALTH,

397 debug_event_pb2.TensorDebugMode.CONCISE_HEALTH,

398 debug_event_pb2.TensorDebugMode.FULL_HEALTH) and

399 dtype.is_floating or

400 tensor_debug_mode == debug_event_pb2.TensorDebugMode.SHAPE and

401 (dtype.is_floating or dtype.is_integer or dtype.is_bool))

402 if (not self._should_dump_tensor(op_type, tensor.dtype) or

403 not dtype_is_dumpable):

404 if is_v1_graph_mode:

405 instrumented_tensors.append(tensor)

406 continue

407 debug_tensor = gen_debug_ops.debug_identity_v2(

408 gen_debug_ops.debug_numeric_summary_v2(

409 tensor,

410 tensor_id=tensor_ids[output_slot],

411 tensor_debug_mode=self._tensor_debug_mode,

412 output_dtype=dtypes.float64), **debug_identity_op_kwargs)

413 if is_v1_graph_mode:

414 instrumented_tensors.append(self._process_v1_graph_mode_tensor(

415 op_type, tensor, debug_tensor, tensor_debug_mode))

416 elif tensor_debug_mode == debug_event_pb2.TensorDebugMode.FULL_TENSOR:

417 if (not self._should_dump_tensor(op_type, tensor.dtype) or

418 not tensor.dtype.is_numpy_compatible):

419 # Instrumenting DT_VARIANT and DT_RESOURCE type tensors under

420 # V1 graph mode is known to have issues. TODO(cais): Investigate.

421 if is_v1_graph_mode:

422 instrumented_tensors.append(tensor)

423 continue

424 debug_tensor = gen_debug_ops.debug_identity_v2(

425 tensor, **debug_identity_op_kwargs)

426 if is_v1_graph_mode:

427 instrumented_tensors.append(self._process_v1_graph_mode_tensor(

428 op_type, tensor, debug_tensor, tensor_debug_mode))

429 else:

430 raise NotImplementedError(

431 "Symbolic tensor instrumentation is not implemented for debug mode "

432 "%s" % self._tensor_debug_mode)

433 return instrumented_tensors

434

435 def _dump_eager_tensors(self,

436 tensors,

437 op_type,

438 input_tensor_ids,

439 output_tensor_device_ids,

440 graph_id=None):

441 """Dump the value of eager tensors.

442

443 The destination of the dumping is determined by the dump_root of the

444 currently enabled dumping callback. The tensors may be transformed prior to

445 dumping (e.g., reduced as summary statistics such as minimum, maximum and

446 arithmetic mean). The details of this transformation (if any) depends on

447 the tensor_debug_mode of the currently enabled dumping callback.

448

449 Args:

450 tensors: The EagerTensors whose values are to be dumped, with or without

451 value transform.

452 op_type: Type of the op that generates the tensors, as a string.

453 input_tensor_ids: IDs of the input EagerTensors to the op.

454 output_tensor_device_ids: Debugged-generated IDs for the devices on which

455 the output tensors are allocated, as a `list` of `int`s. Must match

456 `tensors` in length.

457 graph_id: ID of the executed graph, applicable only to eager execution of

458 a FuncGraph.

459

460 Returns:

461 A tfdbg Execution protocol buffer.

462 """

463 tensor_debug_mode = self._tensor_debug_mode

464 output_tensor_ids = [

465 t._id for t in tensors] # pylint:disable=protected-access

466 assert len(tensors) == len(output_tensor_device_ids)

467 if tensor_debug_mode == debug_event_pb2.TensorDebugMode.NO_TENSOR:

468 return debug_event_pb2.Execution(

469 op_type=op_type,

470 graph_id=graph_id,

471 num_outputs=len(tensors),

472 input_tensor_ids=input_tensor_ids,

473 output_tensor_ids=output_tensor_ids,

474 output_tensor_device_ids=output_tensor_device_ids,

475 tensor_debug_mode=tensor_debug_mode,

476 code_location=self._process_stack_frames())

477 elif tensor_debug_mode in (debug_event_pb2.TensorDebugMode.CURT_HEALTH,

478 debug_event_pb2.TensorDebugMode.CONCISE_HEALTH,

479 debug_event_pb2.TensorDebugMode.FULL_HEALTH,

480 debug_event_pb2.TensorDebugMode.SHAPE,

481 debug_event_pb2.TensorDebugMode.FULL_TENSOR):

482 execution_proto = debug_event_pb2.Execution(

483 op_type=op_type,

484 num_outputs=len(tensors),

485 graph_id=graph_id,

486 input_tensor_ids=input_tensor_ids,

487 output_tensor_ids=output_tensor_ids,

488 output_tensor_device_ids=output_tensor_device_ids,

489 tensor_debug_mode=tensor_debug_mode,

490 code_location=self._process_stack_frames())

491 for tensor in tensors:

492 if (self._should_dump_tensor(op_type, tensor.dtype) and

493 tensor.dtype.is_numpy_compatible):

494 if tensor_debug_mode in (

495 debug_event_pb2.TensorDebugMode.CURT_HEALTH,

496 debug_event_pb2.TensorDebugMode.CONCISE_HEALTH,

497 debug_event_pb2.TensorDebugMode.FULL_HEALTH):

498 if tensor.dtype.is_floating:

499 tensor_proto = _concrete_tensor_to_proto(

500 gen_debug_ops.debug_numeric_summary_v2(

501 tensor,

502 tensor_debug_mode=tensor_debug_mode,

503 output_dtype=dtypes.float64))

504 else:

505 # A placeholder for non-floating-type output tensors.

506 tensor_proto = tensor_pb2.TensorProto()

507 elif tensor_debug_mode == debug_event_pb2.TensorDebugMode.SHAPE:

508 if (tensor.dtype.is_floating or tensor.dtype.is_integer or

509 tensor.dtype.is_bool):

510 tensor_proto = _concrete_tensor_to_proto(

511 gen_debug_ops.debug_numeric_summary_v2(

512 tensor,

513 tensor_debug_mode=tensor_debug_mode,

514 output_dtype=dtypes.float64))

515 else:

516 # A placeholder for non-floating-type output tensors.

517 tensor_proto = tensor_pb2.TensorProto()

518 elif tensor_debug_mode == debug_event_pb2.TensorDebugMode.FULL_TENSOR:

519 tensor_proto = _concrete_tensor_to_proto(tensor)

520 if tensor_proto:

521 execution_proto.tensor_protos.append(tensor_proto)

522 return execution_proto

523 else:

524 raise NotImplementedError(

525 "Tensor instrumentation is not implemented for debug mode %s yet " %

526 self._tensor_debug_mode)

527

528 def callback(self,

529 op_type,

530 inputs,

531 attrs,

532 outputs,

533 op_name=None,

534 graph=None):

535 """Op callback for tracing (dumping) a TF program's execution."""

536 del attrs # Unused

537

538 writer = self.get_writer()

539 if graph:

540 is_v1_graph_mode = not ops.executing_eagerly_outside_functions()

541 context_id = self._get_context_id(graph) # Innermost context ID.

542 output_tensor_ids = self._get_symbolic_tensor_ids(len(outputs))

543 if op_type in ("Const", "Placeholder", "PlaceholderWithDefault"):

544 # In some cases, the op name of a Const or Placeholder op in a graph

545 # can be duplicate (e.g., `None` or "resource").

546 # When this happens, we use the output tensor name to infer

547 # the non-duplicated tensor name.

548 op_name = outputs[0].name.split(":")[0]

549 if is_v1_graph_mode:

550 for input_tensor in inputs:

551 if input_tensor in self._placeholder_to_debug_tensor and outputs:

552 outputs[0].op._add_control_input( # pylint: disable=protected-access

553 self._placeholder_to_debug_tensor[input_tensor].op)

554 graph_op_creation = debug_event_pb2.GraphOpCreation(

555 op_type=op_type,

556 op_name=op_name,

557 graph_name=graph.name if hasattr(graph, "name") else None,

558 graph_id=context_id,

559 input_names=[

560 self._lookup_tensor_name(input_tensor) for input_tensor in inputs

561 ],

562 num_outputs=len(outputs),

563 output_tensor_ids=output_tensor_ids,

564 code_location=self._process_stack_frames())

565 writer.WriteGraphOpCreation(graph_op_creation)

566 if outputs and compat.as_bytes(

567 op_type) not in op_callbacks_common.OP_CALLBACK_SKIP_OPS:

568 return self._instrument_symbolic_tensors(

569 outputs, op_type, op_name, context_id, output_tensor_ids)

570 else:

571 op_type_bytes = compat.as_bytes(op_type)

572 if op_type_bytes == b"DebugNumericSummaryV2":

573 # TODO(b/140334369): Remove this special casing logic once op_callback.

574 # automatically prevents infinite recursion in eager mode.

575 return None

576 if op_type_bytes in op_callbacks_common.OP_CALLBACK_SKIP_OPS:

577 return None

578 context_id = self._func_graph_id_from_func_name(op_type)

579 input_ids = [t._id for t in inputs] # pylint:disable=protected-access

580 output_tensor_device_ids = [writer.RegisterDeviceAndGetId(output.device)

581 for output in outputs] if outputs else []

582 writer.WriteExecution(self._dump_eager_tensors(

583 outputs, op_type, input_ids, output_tensor_device_ids,

584 graph_id=context_id))

585

586 def _lookup_tensor_name(self, tensor):

587 """Look up the name of a graph tensor.

588

589 This method maps the name of a debugger-generated Identity or

590 DebugIdentityV2 tensor to the name of the original instrumented tensor,

591 if `tensor` is such a debugger-created tensor.

592 Otherwise, it returns the name of `tensor` as is.

593

594 Args:

595 tensor: The graph tensor to look up the name for.

596

597 Returns:

598 Name of the orignal instrumented tensor as known to the debugger.

599 """

600 return self._tensor_aliases.get(tensor.name, tensor.name)

601

602 def _func_graph_id_from_func_name(self, op_type):

603 """Attempt to get the ID of a FuncGraph based on an op type name.

604

605 Also caches the ID for faster access later.

606

607 Args:

608 op_type: Op type string, which may be the name of a function.

609

610 Returns:

611 If the op_type name does not fit the pattern of a function name (e.g.,

612 one that starts with "__inference_"), `None` is returned immediately.

613 Else, if the FuncGraph is found, ID of the underlying FuncGraph is

614 returned as a string.

615 Else, `None` is returned.

616 """

617 op_type = compat.as_bytes(op_type)

618 if is_op_type_function(op_type):

619 # op_type for eagerly-executed FuncGraphs have the prefixed and suffixed

620 # form such as "__inference_my_function_13579", wherein the middle part

621 # "my_function" is the name of the Python function from which the

622 # FuncGraph is compiled. Due to the suffix, the op_type is unique for

623 # - duplicate Python function names

624 # - multiple compilation of the same Python function

625 if op_type in self._op_type_to_context_id:

626 return self._op_type_to_context_id[op_type]

627 with self._context_lock:

628 for function in self._function_to_graph_id:

629 if function.name == op_type:

630 graph_id = self._function_to_graph_id[function]

631 self._op_type_to_context_id[op_type] = graph_id

632 return graph_id

633 return None

634 else:

635 return None

636

637 def _get_symbolic_tensor_ids(self, num_tensors):

638 tensor_ids = []

639 if num_tensors:

640 with self._symbolic_tensor_counter_lock:

641 for _ in range(num_tensors):

642 self._symbolic_tensor_counter += 1

643 tensor_ids.append(self._symbolic_tensor_counter)

644 return tensor_ids

645

646 def _should_dump_tensor(self, op_type, dtype):

647 """Determine if the given tensor's value will be dumped.

648

649 The determination is made given the configurations such as `op_regex`,

650 `tensor_dtypes`.

651

652 Args:

653 op_type: Name of the op's type, as a string (e.g., "MatMul").

654 dtype: The dtype of the tensor, as a `dtypes.DType` object.

655

656 Returns:

657 A bool indicating whether the tensor's value will be dumped.

658 """

659 should_dump = True

660 if self._op_regex:

661 should_dump = (should_dump and

662 re.match(self._op_regex, op_type))

663 if self._tensor_dtypes:

664 if isinstance(self._tensor_dtypes, (list, tuple)):

665 should_dump = (should_dump and

666 any(dtype == dtype_item for dtype_item

667 in self._tensor_dtypes))

668 else: # A callable that takes a DType argument and return a boolean.

669 should_dump = should_dump and self._tensor_dtypes(dtype)

670 return should_dump

671

672

673@tf_export("debugging.experimental.enable_dump_debug_info")

674def enable_dump_debug_info(dump_root,

675 tensor_debug_mode=DEFAULT_TENSOR_DEBUG_MODE,

676 circular_buffer_size=1000,

677 op_regex=None,

678 tensor_dtypes=None):

679 """Enable dumping debugging information from a TensorFlow program.

680

681 The debugging information is dumped to a directory on the file system

682 specified as `dump_root`.

683

684 The dumped debugging information can be ingested by debugger UIs.

685

686 The files in the dump directory contain the following information:

687 - TensorFlow Function construction (e.g., compilation of Python functions

688 decorated with @tf.function), the op types, names (if available), context,

689 the input and output tensors, and the associated stack traces.

690 - Execution of TensorFlow operations (ops) and Functions and their stack

691 traces, op types, names (if available) and contexts. In addition,

692 depending on the value of the `tensor_debug_mode` argument (see Args

693 section below), the value(s) of the output tensors or more concise

694 summaries of the tensor values will be dumped.

695 - A snapshot of Python source files involved in the execution of the

696 TensorFlow program.

697

698 Once enabled, the dumping can be disabled with the corresponding

699 `disable_dump_debug_info()` method under the same Python namespace.

700 Calling this method more than once with the same `dump_root` is idempotent.

701 Calling this method more than once with different `tensor_debug_mode`s

702 leads to a `ValueError`.

703 Calling this method more than once with different `circular_buffer_size`s

704 leads to a `ValueError`.

705 Calling this method with a different `dump_root` abolishes the

706 previously-enabled `dump_root`.

707

708 Usage example:

709

710 ```py

711 tf.debugging.experimental.enable_dump_debug_info('/tmp/my-tfdbg-dumps')

712

713 # Code to build, train and run your TensorFlow model...

714 ```

715

716 NOTE: If your code is running on TPUs, be sure to call

717 `tf.config.set_soft_device_placement(True)` before calling

718 `tf.debugging.experimental.enable_dump_debug_info()` as this API uses

719 automatic outside compilation on TPUs. For example:

720

721 ```py

722 tf.config.set_soft_device_placement(True)

723 tf.debugging.experimental.enable_dump_debug_info(

724 logdir, tensor_debug_mode="FULL_HEALTH")

725

726 resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='')

727 strategy = tf.distribute.TPUStrategy(resolver)

728 with strategy.scope():

729 # ...

730 ```

731

732 Args:

733 dump_root: The directory path where the dumping information will be written.

734 tensor_debug_mode: Debug mode for tensor values, as a string.

735 The currently supported options are:

736 - "NO_TENSOR": (Default) Only traces the output tensors of all executed

737 ops (including those executed eagerly at the Python level or as a part

738 of a TensorFlow graph) and functions, while not extracting any

739 information from the values of the tensors.

740 - "CURT_HEALTH": For each floating-dtype tensor (e.g., tensors of dtypes

741 such as `float32`, `float64` and `bfloat16`), extracts a binary bit

742 indicating whether it contains any -infinity, +infinity or NaN.

743 - "CONCISE_HEALTH": For each floating-dtype tensor, extract total

744 element count, and counts of -infinity, +infinity and NaN elements.

745 - "FULL_HEALTH": For each floating-dtype tensor, extracts the dtype,

746 rank (number of dimensions), total element count, and counts of

747 -infinity, +infinity and NaN elements.

748 - "SHAPE": For each tensor (regardless of dtype), extracts its dtype,

749 rank, total element count and shape.

750 circular_buffer_size: Size of the circular buffers for execution events.

751 These circular buffers are designed to reduce the overhead of debugging

752 dumping. They hold the most recent debug events concerning eager execution

753 of ops and `tf.function`s and traces of tensor values computed inside

754 `tf.function`s. They are written to the file system only when the proper

755 flushing method is called (see description of return values below).

756 Expected to be an integer. If <= 0, the circular-buffer behavior will be

757 disabled, i.e., the execution debug events will be written to the file

758 writers in the same way as non-execution events such as op creations and

759 source-file snapshots.

760 op_regex: Dump data from only the tensors from op types that matches to the

761 regular expression (through Python's `re.match()`).

762 "Op type" refers to the names of the TensorFlow operations (e.g.,

763 "MatMul", "LogSoftmax"), which may repeat in a TensorFlow

764 function. It does *not* refer to the names of nodes (e.g.,

765 "dense/MatMul", "dense_1/MatMul_1") which are unique within a function.

766 - Example 1: Dump tensor data from only MatMul and Relu ops

767 `op_regex="^(MatMul|Relu)$"`.

768 - Example 2: Dump tensors from all ops *except* Relu:

769 `op_regex="(?!^Relu$)"`.

770 This filter operates in a logical AND relation with `tensor_dtypes`.

771 tensor_dtypes: Dump data from only the tensors of which the specified

772 dtypes. This optional argument can be in any of the following format:

773 - a list or tuple of `DType` objects or strings that can be converted

774 to `DType` objects via `tf.as_dtype()`. Examples:

775 - `tensor_dtype=[tf.float32, tf.float64]`,

776 - `tensor_dtype=["float32", "float64"]`,

777 - `tensor_dtypes=(tf.int32, tf.bool)`,

778 - `tensor_dtypes=("int32", "bool")`

779 - a callable that takes a single `DType` argument and returns a Python

780 `boolean` indicating whether the dtype is to be included in the data

781 dumping. Examples:

782 - `tensor_dtype=lambda dtype: dtype.is_integer`.

783 This filter operates in a logical AND relation with `op_regex`.

784 Returns:

785 A DebugEventsWriter instance used by the dumping callback. The caller

786 may use its flushing methods, including `FlushNonExecutionFiles()` and

787 `FlushExecutionFiles()`.

788 """

789 # TODO(cais): Revise the "UIs (currently under construction)" part of the doc

790 # string above.

791 # TODO(cais): Add Python code example to the doc string above.

792 global _state

793

794 tensor_debug_mode_keys = debug_event_pb2.TensorDebugMode.keys()

795 if tensor_debug_mode not in tensor_debug_mode_keys:

796 raise ValueError(

797 "Invalid value in tensor_debug_mode ('%s'). Valid options are: %s" %

798 (tensor_debug_mode, tensor_debug_mode_keys))

799

800 tensor_debug_mode = debug_event_pb2.TensorDebugMode.Value(tensor_debug_mode)

801 if tensor_debug_mode not in (debug_event_pb2.TensorDebugMode.NO_TENSOR,

802 debug_event_pb2.TensorDebugMode.CURT_HEALTH,

803 debug_event_pb2.TensorDebugMode.CONCISE_HEALTH,

804 debug_event_pb2.TensorDebugMode.FULL_HEALTH,

805 debug_event_pb2.TensorDebugMode.SHAPE,

806 debug_event_pb2.TensorDebugMode.FULL_TENSOR):

807 raise NotImplementedError(

808 "tfdbg dumping: support for tensor debug mode %s is not "

809 "implemented yet" %

810 debug_event_pb2.TensorDebugMode.Name(tensor_debug_mode))

811

812 # Validate the types of tensor_dtypes.

813 if tensor_dtypes is not None:

814 if (not isinstance(tensor_dtypes, (list, tuple)) and

815 not callable(tensor_dtypes)):

816 raise ValueError(

817 "If specified, tensor_dtypes is expected to be a list, a tuple, or "

818 "a callable that takes a DType argument and returns a boolean, "

819 "but received %s" % (tensor_dtypes,))

820 if isinstance(tensor_dtypes, (list, tuple)):

821 tensor_dtypes = [

822 dtypes.as_dtype(dtype_item) for dtype_item in tensor_dtypes]

823

824 if hasattr(_state, "dumping_callback"):

825 if _state.dumping_callback.circular_buffer_size != circular_buffer_size:

826 raise ValueError(

827 "There is already a dumping callback configured with a different "

828 "circular-buffer size (%d). Therefore the newly request "

829 "circular-buffer size (%d) will not be honored." %

830 (_state.dumping_callback.circular_buffer_size, circular_buffer_size))

831 if _state.dumping_callback.tensor_debug_mode != tensor_debug_mode:

832 raise ValueError(

833 "There is already a dumping callback configured for dump root "

834 "%s with a different "

835 "tensor-debug mode (%s). Therefore the newly request "

836 "tensor-debug mode (%s) size will not be honored." %

837 (_state.dumping_callback.dump_root,

838 tensor_debug_mode_keys[_state.dumping_callback.tensor_debug_mode],

839 tensor_debug_mode_keys[tensor_debug_mode]))

840 else:

841 _state.dumping_callback = _DumpingCallback(dump_root,

842 tensor_debug_mode,

843 circular_buffer_size,

844 op_regex,

845 tensor_dtypes)

846 op_callbacks.add_op_callback(_state.dumping_callback.callback)

847 function_lib.add_function_callback(

848 _state.dumping_callback.function_callback)

849

850 if _state.dumping_callback.dump_root != dump_root:

851 _state.dumping_callback.dump_root = dump_root

852

853 logging.info(

854 "Enabled dumping callback in thread %s "

855 "(dump root: %s, tensor debug mode: %s)",

856 threading.current_thread().name,

857 _state.dumping_callback.dump_root,

858 debug_event_pb2.TensorDebugMode.Name(tensor_debug_mode))

859

860 atexit.register(disable_dump_debug_info)

861 return _state.dumping_callback.get_writer()

862

863

864@tf_export("debugging.experimental.disable_dump_debug_info")

865def disable_dump_debug_info():

866 """Disable the currently-enabled debugging dumping.

867

868 If the `enable_dump_debug_info()` method under the same Python namespace

869 has been invoked before, calling this method disables it. If no call to

870 `enable_dump_debug_info()` has been made, calling this method is a no-op.

871 Calling this method more than once is idempotent.

872 """

873 if hasattr(_state, "dumping_callback"):

874 dump_root = _state.dumping_callback.dump_root

875 tfdbg_run_id = _state.dumping_callback.tfdbg_run_id

876 debug_events_writer.DebugEventsWriter(dump_root, tfdbg_run_id).Close()

877 op_callbacks.remove_op_callback(_state.dumping_callback.callback)

878 function_lib.remove_function_callback(

879 _state.dumping_callback.function_callback)

880 delattr(_state, "dumping_callback")

881 logging.info("Disabled dumping callback in thread %s (dump root: %s)",

882 threading.current_thread().name, dump_root)

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/debug/lib/dumping_callback.py: 21%

312 statements