Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/debug/cli/tensor_format.py: 9%
236 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
1# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Format tensors (ndarrays) for screen display and navigation."""
16import copy
17import re
19import numpy as np
21from tensorflow.python.debug.cli import debugger_cli_common
22from tensorflow.python.debug.lib import debug_data
24_NUMPY_OMISSION = "...,"
25_NUMPY_DEFAULT_EDGE_ITEMS = 3
27_NUMBER_REGEX = re.compile(r"[-+]?([0-9][-+0-9eE\.]+|nan|inf)(\s|,|\])")
29BEGIN_INDICES_KEY = "i0"
30OMITTED_INDICES_KEY = "omitted"
32DEFAULT_TENSOR_ELEMENT_HIGHLIGHT_FONT_ATTR = "bold"
35class HighlightOptions(object):
36 """Options for highlighting elements of a tensor."""
38 def __init__(self,
39 criterion,
40 description=None,
41 font_attr=DEFAULT_TENSOR_ELEMENT_HIGHLIGHT_FONT_ATTR):
42 """Constructor of HighlightOptions.
44 Args:
45 criterion: (callable) A callable of the following signature:
46 def to_highlight(X):
47 # Args:
48 # X: The tensor to highlight elements in.
49 #
50 # Returns:
51 # (boolean ndarray) A boolean ndarray of the same shape as X
52 # indicating which elements are to be highlighted (iff True).
53 This callable will be used as the argument of np.argwhere() to
54 determine which elements of the tensor are to be highlighted.
55 description: (str) Description of the highlight criterion embodied by
56 criterion.
57 font_attr: (str) Font attribute to be applied to the
58 highlighted elements.
60 """
62 self.criterion = criterion
63 self.description = description
64 self.font_attr = font_attr
67def format_tensor(tensor,
68 tensor_label,
69 include_metadata=False,
70 auxiliary_message=None,
71 include_numeric_summary=False,
72 np_printoptions=None,
73 highlight_options=None):
74 """Generate a RichTextLines object showing a tensor in formatted style.
76 Args:
77 tensor: The tensor to be displayed, as a numpy ndarray or other
78 appropriate format (e.g., None representing uninitialized tensors).
79 tensor_label: A label for the tensor, as a string. If set to None, will
80 suppress the tensor name line in the return value.
81 include_metadata: Whether metadata such as dtype and shape are to be
82 included in the formatted text.
83 auxiliary_message: An auxiliary message to display under the tensor label,
84 dtype and shape information lines.
85 include_numeric_summary: Whether a text summary of the numeric values (if
86 applicable) will be included.
87 np_printoptions: A dictionary of keyword arguments that are passed to a
88 call of np.set_printoptions() to set the text format for display numpy
89 ndarrays.
90 highlight_options: (HighlightOptions) options for highlighting elements
91 of the tensor.
93 Returns:
94 A RichTextLines object. Its annotation field has line-by-line markups to
95 indicate which indices in the array the first element of each line
96 corresponds to.
97 """
98 lines = []
99 font_attr_segs = {}
101 if tensor_label is not None:
102 lines.append("Tensor \"%s\":" % tensor_label)
103 suffix = tensor_label.split(":")[-1]
104 if suffix.isdigit():
105 # Suffix is a number. Assume it is the output slot index.
106 font_attr_segs[0] = [(8, 8 + len(tensor_label), "bold")]
107 else:
108 # Suffix is not a number. It is auxiliary information such as the debug
109 # op type. In this case, highlight the suffix with a different color.
110 debug_op_len = len(suffix)
111 proper_len = len(tensor_label) - debug_op_len - 1
112 font_attr_segs[0] = [
113 (8, 8 + proper_len, "bold"),
114 (8 + proper_len + 1, 8 + proper_len + 1 + debug_op_len, "yellow")
115 ]
117 if isinstance(tensor, debug_data.InconvertibleTensorProto):
118 if lines:
119 lines.append("")
120 lines.extend(str(tensor).split("\n"))
121 return debugger_cli_common.RichTextLines(lines)
122 elif not isinstance(tensor, np.ndarray):
123 # If tensor is not a np.ndarray, return simple text-line representation of
124 # the object without annotations.
125 if lines:
126 lines.append("")
127 lines.extend(repr(tensor).split("\n"))
128 return debugger_cli_common.RichTextLines(lines)
130 if include_metadata:
131 lines.append(" dtype: %s" % str(tensor.dtype))
132 lines.append(" shape: %s" % str(tensor.shape).replace("L", ""))
134 if lines:
135 lines.append("")
136 formatted = debugger_cli_common.RichTextLines(
137 lines, font_attr_segs=font_attr_segs)
139 if auxiliary_message:
140 formatted.extend(auxiliary_message)
142 if include_numeric_summary:
143 formatted.append("Numeric summary:")
144 formatted.extend(numeric_summary(tensor))
145 formatted.append("")
147 # Apply custom string formatting options for numpy ndarray.
148 if np_printoptions is not None:
149 np.set_printoptions(**np_printoptions)
151 array_lines = repr(tensor).split("\n")
152 if tensor.dtype.type is not np.string_:
153 # Parse array lines to get beginning indices for each line.
155 # TODO(cais): Currently, we do not annotate string-type tensors due to
156 # difficulty in escaping sequences. Address this issue.
157 annotations = _annotate_ndarray_lines(
158 array_lines, tensor, np_printoptions=np_printoptions)
159 else:
160 annotations = None
161 formatted_array = debugger_cli_common.RichTextLines(
162 array_lines, annotations=annotations)
163 formatted.extend(formatted_array)
165 # Perform optional highlighting.
166 if highlight_options is not None:
167 indices_list = list(np.argwhere(highlight_options.criterion(tensor)))
169 total_elements = np.size(tensor)
170 highlight_summary = "Highlighted%s: %d of %d element(s) (%.2f%%)" % (
171 "(%s)" % highlight_options.description if highlight_options.description
172 else "", len(indices_list), total_elements,
173 len(indices_list) / float(total_elements) * 100.0)
175 formatted.lines[0] += " " + highlight_summary
177 if indices_list:
178 indices_list = [list(indices) for indices in indices_list]
180 are_omitted, rows, start_cols, end_cols = locate_tensor_element(
181 formatted, indices_list)
182 for is_omitted, row, start_col, end_col in zip(are_omitted, rows,
183 start_cols, end_cols):
184 if is_omitted or start_col is None or end_col is None:
185 continue
187 if row in formatted.font_attr_segs:
188 formatted.font_attr_segs[row].append(
189 (start_col, end_col, highlight_options.font_attr))
190 else:
191 formatted.font_attr_segs[row] = [(start_col, end_col,
192 highlight_options.font_attr)]
194 return formatted
197def _annotate_ndarray_lines(
198 array_lines, tensor, np_printoptions=None, offset=0):
199 """Generate annotations for line-by-line begin indices of tensor text.
201 Parse the numpy-generated text representation of a numpy ndarray to
202 determine the indices of the first element of each text line (if any
203 element is present in the line).
205 For example, given the following multi-line ndarray text representation:
206 ["array([[ 0. , 0.0625, 0.125 , 0.1875],",
207 " [ 0.25 , 0.3125, 0.375 , 0.4375],",
208 " [ 0.5 , 0.5625, 0.625 , 0.6875],",
209 " [ 0.75 , 0.8125, 0.875 , 0.9375]])"]
210 the generate annotation will be:
211 {0: {BEGIN_INDICES_KEY: [0, 0]},
212 1: {BEGIN_INDICES_KEY: [1, 0]},
213 2: {BEGIN_INDICES_KEY: [2, 0]},
214 3: {BEGIN_INDICES_KEY: [3, 0]}}
216 Args:
217 array_lines: Text lines representing the tensor, as a list of str.
218 tensor: The tensor being formatted as string.
219 np_printoptions: A dictionary of keyword arguments that are passed to a
220 call of np.set_printoptions().
221 offset: Line number offset applied to the line indices in the returned
222 annotation.
224 Returns:
225 An annotation as a dict.
226 """
228 if np_printoptions and "edgeitems" in np_printoptions:
229 edge_items = np_printoptions["edgeitems"]
230 else:
231 edge_items = _NUMPY_DEFAULT_EDGE_ITEMS
233 annotations = {}
235 # Put metadata about the tensor in the annotations["tensor_metadata"].
236 annotations["tensor_metadata"] = {
237 "dtype": tensor.dtype, "shape": tensor.shape}
239 dims = np.shape(tensor)
240 ndims = len(dims)
241 if ndims == 0:
242 # No indices for a 0D tensor.
243 return annotations
245 curr_indices = [0] * len(dims)
246 curr_dim = 0
247 for i, raw_line in enumerate(array_lines):
248 line = raw_line.strip()
250 if not line:
251 # Skip empty lines, which can appear for >= 3D arrays.
252 continue
254 if line == _NUMPY_OMISSION:
255 annotations[offset + i] = {OMITTED_INDICES_KEY: copy.copy(curr_indices)}
256 curr_indices[curr_dim - 1] = dims[curr_dim - 1] - edge_items
257 else:
258 num_lbrackets = line.count("[") # TODO(cais): String array escaping.
259 num_rbrackets = line.count("]")
261 curr_dim += num_lbrackets - num_rbrackets
263 annotations[offset + i] = {BEGIN_INDICES_KEY: copy.copy(curr_indices)}
264 if num_rbrackets == 0:
265 line_content = line[line.rfind("[") + 1:]
266 num_elements = line_content.count(",")
267 curr_indices[curr_dim - 1] += num_elements
268 else:
269 if curr_dim > 0:
270 curr_indices[curr_dim - 1] += 1
271 for k in range(curr_dim, ndims):
272 curr_indices[k] = 0
274 return annotations
277def locate_tensor_element(formatted, indices):
278 """Locate a tensor element in formatted text lines, given element indices.
280 Given a RichTextLines object representing a tensor and indices of the sought
281 element, return the row number at which the element is located (if exists).
283 Args:
284 formatted: A RichTextLines object containing formatted text lines
285 representing the tensor.
286 indices: Indices of the sought element, as a list of int or a list of list
287 of int. The former case is for a single set of indices to look up,
288 whereas the latter case is for looking up a batch of indices sets at once.
289 In the latter case, the indices must be in ascending order, or a
290 ValueError will be raised.
292 Returns:
293 1) A boolean indicating whether the element falls into an omitted line.
294 2) Row index.
295 3) Column start index, i.e., the first column in which the representation
296 of the specified tensor starts, if it can be determined. If it cannot
297 be determined (e.g., due to ellipsis), None.
298 4) Column end index, i.e., the column right after the last column that
299 represents the specified tensor. Iff it cannot be determined, None.
301 For return values described above are based on a single set of indices to
302 look up. In the case of batch mode (multiple sets of indices), the return
303 values will be lists of the types described above.
305 Raises:
306 AttributeError: If:
307 Input argument "formatted" does not have the required annotations.
308 ValueError: If:
309 1) Indices do not match the dimensions of the tensor, or
310 2) Indices exceed sizes of the tensor, or
311 3) Indices contain negative value(s).
312 4) If in batch mode, and if not all sets of indices are in ascending
313 order.
314 """
316 if isinstance(indices[0], list):
317 indices_list = indices
318 input_batch = True
319 else:
320 indices_list = [indices]
321 input_batch = False
323 # Check that tensor_metadata is available.
324 if "tensor_metadata" not in formatted.annotations:
325 raise AttributeError("tensor_metadata is not available in annotations.")
327 # Sanity check on input argument.
328 _validate_indices_list(indices_list, formatted)
330 dims = formatted.annotations["tensor_metadata"]["shape"]
331 batch_size = len(indices_list)
332 lines = formatted.lines
333 annot = formatted.annotations
334 prev_r = 0
335 prev_line = ""
336 prev_indices = [0] * len(dims)
338 # Initialize return values
339 are_omitted = [None] * batch_size
340 row_indices = [None] * batch_size
341 start_columns = [None] * batch_size
342 end_columns = [None] * batch_size
344 batch_pos = 0 # Current position in the batch.
346 for r in range(len(lines)):
347 if r not in annot:
348 continue
350 if BEGIN_INDICES_KEY in annot[r]:
351 indices_key = BEGIN_INDICES_KEY
352 elif OMITTED_INDICES_KEY in annot[r]:
353 indices_key = OMITTED_INDICES_KEY
355 matching_indices_list = [
356 ind for ind in indices_list[batch_pos:]
357 if prev_indices <= ind < annot[r][indices_key]
358 ]
360 if matching_indices_list:
361 num_matches = len(matching_indices_list)
363 match_start_columns, match_end_columns = _locate_elements_in_line(
364 prev_line, matching_indices_list, prev_indices)
366 start_columns[batch_pos:batch_pos + num_matches] = match_start_columns
367 end_columns[batch_pos:batch_pos + num_matches] = match_end_columns
368 are_omitted[batch_pos:batch_pos + num_matches] = [
369 OMITTED_INDICES_KEY in annot[prev_r]
370 ] * num_matches
371 row_indices[batch_pos:batch_pos + num_matches] = [prev_r] * num_matches
373 batch_pos += num_matches
374 if batch_pos >= batch_size:
375 break
377 prev_r = r
378 prev_line = lines[r]
379 prev_indices = annot[r][indices_key]
381 if batch_pos < batch_size:
382 matching_indices_list = indices_list[batch_pos:]
383 num_matches = len(matching_indices_list)
385 match_start_columns, match_end_columns = _locate_elements_in_line(
386 prev_line, matching_indices_list, prev_indices)
388 start_columns[batch_pos:batch_pos + num_matches] = match_start_columns
389 end_columns[batch_pos:batch_pos + num_matches] = match_end_columns
390 are_omitted[batch_pos:batch_pos + num_matches] = [
391 OMITTED_INDICES_KEY in annot[prev_r]
392 ] * num_matches
393 row_indices[batch_pos:batch_pos + num_matches] = [prev_r] * num_matches
395 if input_batch:
396 return are_omitted, row_indices, start_columns, end_columns
397 else:
398 return are_omitted[0], row_indices[0], start_columns[0], end_columns[0]
401def _validate_indices_list(indices_list, formatted):
402 prev_ind = None
403 for ind in indices_list:
404 # Check indices match tensor dimensions.
405 dims = formatted.annotations["tensor_metadata"]["shape"]
406 if len(ind) != len(dims):
407 raise ValueError("Dimensions mismatch: requested: %d; actual: %d" %
408 (len(ind), len(dims)))
410 # Check indices is within size limits.
411 for req_idx, siz in zip(ind, dims):
412 if req_idx >= siz:
413 raise ValueError("Indices exceed tensor dimensions.")
414 if req_idx < 0:
415 raise ValueError("Indices contain negative value(s).")
417 # Check indices are in ascending order.
418 if prev_ind and ind < prev_ind:
419 raise ValueError("Input indices sets are not in ascending order.")
421 prev_ind = ind
424def _locate_elements_in_line(line, indices_list, ref_indices):
425 """Determine the start and end indices of an element in a line.
427 Args:
428 line: (str) the line in which the element is to be sought.
429 indices_list: (list of list of int) list of indices of the element to
430 search for. Assumes that the indices in the batch are unique and sorted
431 in ascending order.
432 ref_indices: (list of int) reference indices, i.e., the indices of the
433 first element represented in the line.
435 Returns:
436 start_columns: (list of int) start column indices, if found. If not found,
437 None.
438 end_columns: (list of int) end column indices, if found. If not found,
439 None.
440 If found, the element is represented in the left-closed-right-open interval
441 [start_column, end_column].
442 """
444 batch_size = len(indices_list)
445 offsets = [indices[-1] - ref_indices[-1] for indices in indices_list]
447 start_columns = [None] * batch_size
448 end_columns = [None] * batch_size
450 if _NUMPY_OMISSION in line:
451 ellipsis_index = line.find(_NUMPY_OMISSION)
452 else:
453 ellipsis_index = len(line)
455 matches_iter = re.finditer(_NUMBER_REGEX, line)
457 batch_pos = 0
459 offset_counter = 0
460 for match in matches_iter:
461 if match.start() > ellipsis_index:
462 # Do not attempt to search beyond ellipsis.
463 break
465 if offset_counter == offsets[batch_pos]:
466 start_columns[batch_pos] = match.start()
467 # Remove the final comma, right bracket, or whitespace.
468 end_columns[batch_pos] = match.end() - 1
470 batch_pos += 1
471 if batch_pos >= batch_size:
472 break
474 offset_counter += 1
476 return start_columns, end_columns
479def _pad_string_to_length(string, length):
480 return " " * (length - len(string)) + string
483def numeric_summary(tensor):
484 """Get a text summary of a numeric tensor.
486 This summary is only available for numeric (int*, float*, complex*) and
487 Boolean tensors.
489 Args:
490 tensor: (`numpy.ndarray`) the tensor value object to be summarized.
492 Returns:
493 The summary text as a `RichTextLines` object. If the type of `tensor` is not
494 numeric or Boolean, a single-line `RichTextLines` object containing a
495 warning message will reflect that.
496 """
498 def _counts_summary(counts, skip_zeros=True, total_count=None):
499 """Format values as a two-row table."""
500 if skip_zeros:
501 counts = [(count_key, count_val) for count_key, count_val in counts
502 if count_val]
503 max_common_len = 0
504 for count_key, count_val in counts:
505 count_val_str = str(count_val)
506 common_len = max(len(count_key) + 1, len(count_val_str) + 1)
507 max_common_len = max(common_len, max_common_len)
509 key_line = debugger_cli_common.RichLine("|")
510 val_line = debugger_cli_common.RichLine("|")
511 for count_key, count_val in counts:
512 count_val_str = str(count_val)
513 key_line += _pad_string_to_length(count_key, max_common_len)
514 val_line += _pad_string_to_length(count_val_str, max_common_len)
515 key_line += " |"
516 val_line += " |"
518 if total_count is not None:
519 total_key_str = "total"
520 total_val_str = str(total_count)
521 max_common_len = max(len(total_key_str) + 1, len(total_val_str))
522 total_key_str = _pad_string_to_length(total_key_str, max_common_len)
523 total_val_str = _pad_string_to_length(total_val_str, max_common_len)
524 key_line += total_key_str + " |"
525 val_line += total_val_str + " |"
527 return debugger_cli_common.rich_text_lines_from_rich_line_list(
528 [key_line, val_line])
530 if not isinstance(tensor, np.ndarray) or not np.size(tensor):
531 return debugger_cli_common.RichTextLines([
532 "No numeric summary available due to empty tensor."])
533 elif (np.issubdtype(tensor.dtype, np.floating) or
534 np.issubdtype(tensor.dtype, np.complexfloating) or
535 np.issubdtype(tensor.dtype, np.integer)):
536 counts = [
537 ("nan", np.sum(np.isnan(tensor))),
538 ("-inf", np.sum(np.isneginf(tensor))),
539 ("-", np.sum(np.logical_and(
540 tensor < 0.0, np.logical_not(np.isneginf(tensor))))),
541 ("0", np.sum(tensor == 0.0)),
542 ("+", np.sum(np.logical_and(
543 tensor > 0.0, np.logical_not(np.isposinf(tensor))))),
544 ("+inf", np.sum(np.isposinf(tensor)))]
545 output = _counts_summary(counts, total_count=np.size(tensor))
547 valid_array = tensor[
548 np.logical_not(np.logical_or(np.isinf(tensor), np.isnan(tensor)))]
549 if np.size(valid_array):
550 stats = [
551 ("min", np.min(valid_array)),
552 ("max", np.max(valid_array)),
553 ("mean", np.mean(valid_array)),
554 ("std", np.std(valid_array))]
555 output.extend(_counts_summary(stats, skip_zeros=False))
556 return output
557 elif tensor.dtype == np.bool_:
558 counts = [
559 ("False", np.sum(tensor == 0)),
560 ("True", np.sum(tensor > 0)),]
561 return _counts_summary(counts, total_count=np.size(tensor))
562 else:
563 return debugger_cli_common.RichTextLines([
564 "No numeric summary available due to tensor dtype: %s." % tensor.dtype])