1"""
2Printing tools.
3"""
4from __future__ import annotations
5
6from collections.abc import (
7 Iterable,
8 Mapping,
9 Sequence,
10)
11import sys
12from typing import (
13 Any,
14 Callable,
15 TypeVar,
16 Union,
17)
18from unicodedata import east_asian_width
19
20from pandas._config import get_option
21
22from pandas.core.dtypes.inference import is_sequence
23
24from pandas.io.formats.console import get_console_size
25
26EscapeChars = Union[Mapping[str, str], Iterable[str]]
27_KT = TypeVar("_KT")
28_VT = TypeVar("_VT")
29
30
31def adjoin(space: int, *lists: list[str], **kwargs) -> str:
32 """
33 Glues together two sets of strings using the amount of space requested.
34 The idea is to prettify.
35
36 ----------
37 space : int
38 number of spaces for padding
39 lists : str
40 list of str which being joined
41 strlen : callable
42 function used to calculate the length of each str. Needed for unicode
43 handling.
44 justfunc : callable
45 function used to justify str. Needed for unicode handling.
46 """
47 strlen = kwargs.pop("strlen", len)
48 justfunc = kwargs.pop("justfunc", _adj_justify)
49
50 newLists = []
51 lengths = [max(map(strlen, x)) + space for x in lists[:-1]]
52 # not the last one
53 lengths.append(max(map(len, lists[-1])))
54 maxLen = max(map(len, lists))
55 for i, lst in enumerate(lists):
56 nl = justfunc(lst, lengths[i], mode="left")
57 nl = ([" " * lengths[i]] * (maxLen - len(lst))) + nl
58 newLists.append(nl)
59 toJoin = zip(*newLists)
60 return "\n".join("".join(lines) for lines in toJoin)
61
62
63def _adj_justify(texts: Iterable[str], max_len: int, mode: str = "right") -> list[str]:
64 """
65 Perform ljust, center, rjust against string or list-like
66 """
67 if mode == "left":
68 return [x.ljust(max_len) for x in texts]
69 elif mode == "center":
70 return [x.center(max_len) for x in texts]
71 else:
72 return [x.rjust(max_len) for x in texts]
73
74
75# Unicode consolidation
76# ---------------------
77#
78# pprinting utility functions for generating Unicode text or
79# bytes(3.x)/str(2.x) representations of objects.
80# Try to use these as much as possible rather than rolling your own.
81#
82# When to use
83# -----------
84#
85# 1) If you're writing code internal to pandas (no I/O directly involved),
86# use pprint_thing().
87#
88# It will always return unicode text which can handled by other
89# parts of the package without breakage.
90#
91# 2) if you need to write something out to file, use
92# pprint_thing_encoded(encoding).
93#
94# If no encoding is specified, it defaults to utf-8. Since encoding pure
95# ascii with utf-8 is a no-op you can safely use the default utf-8 if you're
96# working with straight ascii.
97
98
99def _pprint_seq(
100 seq: Sequence, _nest_lvl: int = 0, max_seq_items: int | None = None, **kwds
101) -> str:
102 """
103 internal. pprinter for iterables. you should probably use pprint_thing()
104 rather than calling this directly.
105
106 bounds length of printed sequence, depending on options
107 """
108 if isinstance(seq, set):
109 fmt = "{{{body}}}"
110 else:
111 fmt = "[{body}]" if hasattr(seq, "__setitem__") else "({body})"
112
113 if max_seq_items is False:
114 nitems = len(seq)
115 else:
116 nitems = max_seq_items or get_option("max_seq_items") or len(seq)
117
118 s = iter(seq)
119 # handle sets, no slicing
120 r = [
121 pprint_thing(next(s), _nest_lvl + 1, max_seq_items=max_seq_items, **kwds)
122 for i in range(min(nitems, len(seq)))
123 ]
124 body = ", ".join(r)
125
126 if nitems < len(seq):
127 body += ", ..."
128 elif isinstance(seq, tuple) and len(seq) == 1:
129 body += ","
130
131 return fmt.format(body=body)
132
133
134def _pprint_dict(
135 seq: Mapping, _nest_lvl: int = 0, max_seq_items: int | None = None, **kwds
136) -> str:
137 """
138 internal. pprinter for iterables. you should probably use pprint_thing()
139 rather than calling this directly.
140 """
141 fmt = "{{{things}}}"
142 pairs = []
143
144 pfmt = "{key}: {val}"
145
146 if max_seq_items is False:
147 nitems = len(seq)
148 else:
149 nitems = max_seq_items or get_option("max_seq_items") or len(seq)
150
151 for k, v in list(seq.items())[:nitems]:
152 pairs.append(
153 pfmt.format(
154 key=pprint_thing(k, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds),
155 val=pprint_thing(v, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds),
156 )
157 )
158
159 if nitems < len(seq):
160 return fmt.format(things=", ".join(pairs) + ", ...")
161 else:
162 return fmt.format(things=", ".join(pairs))
163
164
165def pprint_thing(
166 thing: Any,
167 _nest_lvl: int = 0,
168 escape_chars: EscapeChars | None = None,
169 default_escapes: bool = False,
170 quote_strings: bool = False,
171 max_seq_items: int | None = None,
172) -> str:
173 """
174 This function is the sanctioned way of converting objects
175 to a string representation and properly handles nested sequences.
176
177 Parameters
178 ----------
179 thing : anything to be formatted
180 _nest_lvl : internal use only. pprint_thing() is mutually-recursive
181 with pprint_sequence, this argument is used to keep track of the
182 current nesting level, and limit it.
183 escape_chars : list or dict, optional
184 Characters to escape. If a dict is passed the values are the
185 replacements
186 default_escapes : bool, default False
187 Whether the input escape characters replaces or adds to the defaults
188 max_seq_items : int or None, default None
189 Pass through to other pretty printers to limit sequence printing
190
191 Returns
192 -------
193 str
194 """
195
196 def as_escaped_string(
197 thing: Any, escape_chars: EscapeChars | None = escape_chars
198 ) -> str:
199 translate = {"\t": r"\t", "\n": r"\n", "\r": r"\r"}
200 if isinstance(escape_chars, dict):
201 if default_escapes:
202 translate.update(escape_chars)
203 else:
204 translate = escape_chars
205 escape_chars = list(escape_chars.keys())
206 else:
207 escape_chars = escape_chars or ()
208
209 result = str(thing)
210 for c in escape_chars:
211 result = result.replace(c, translate[c])
212 return result
213
214 if hasattr(thing, "__next__"):
215 return str(thing)
216 elif isinstance(thing, dict) and _nest_lvl < get_option(
217 "display.pprint_nest_depth"
218 ):
219 result = _pprint_dict(
220 thing, _nest_lvl, quote_strings=True, max_seq_items=max_seq_items
221 )
222 elif is_sequence(thing) and _nest_lvl < get_option("display.pprint_nest_depth"):
223 result = _pprint_seq(
224 thing,
225 _nest_lvl,
226 escape_chars=escape_chars,
227 quote_strings=quote_strings,
228 max_seq_items=max_seq_items,
229 )
230 elif isinstance(thing, str) and quote_strings:
231 result = f"'{as_escaped_string(thing)}'"
232 else:
233 result = as_escaped_string(thing)
234
235 return result
236
237
238def pprint_thing_encoded(
239 object, encoding: str = "utf-8", errors: str = "replace"
240) -> bytes:
241 value = pprint_thing(object) # get unicode representation of object
242 return value.encode(encoding, errors)
243
244
245def enable_data_resource_formatter(enable: bool) -> None:
246 if "IPython" not in sys.modules:
247 # definitely not in IPython
248 return
249 from IPython import get_ipython
250
251 ip = get_ipython()
252 if ip is None:
253 # still not in IPython
254 return
255
256 formatters = ip.display_formatter.formatters
257 mimetype = "application/vnd.dataresource+json"
258
259 if enable:
260 if mimetype not in formatters:
261 # define tableschema formatter
262 from IPython.core.formatters import BaseFormatter
263 from traitlets import ObjectName
264
265 class TableSchemaFormatter(BaseFormatter):
266 print_method = ObjectName("_repr_data_resource_")
267 _return_type = (dict,)
268
269 # register it:
270 formatters[mimetype] = TableSchemaFormatter()
271 # enable it if it's been disabled:
272 formatters[mimetype].enabled = True
273 # unregister tableschema mime-type
274 elif mimetype in formatters:
275 formatters[mimetype].enabled = False
276
277
278def default_pprint(thing: Any, max_seq_items: int | None = None) -> str:
279 return pprint_thing(
280 thing,
281 escape_chars=("\t", "\r", "\n"),
282 quote_strings=True,
283 max_seq_items=max_seq_items,
284 )
285
286
287def format_object_summary(
288 obj,
289 formatter: Callable,
290 is_justify: bool = True,
291 name: str | None = None,
292 indent_for_name: bool = True,
293 line_break_each_value: bool = False,
294) -> str:
295 """
296 Return the formatted obj as a unicode string
297
298 Parameters
299 ----------
300 obj : object
301 must be iterable and support __getitem__
302 formatter : callable
303 string formatter for an element
304 is_justify : bool
305 should justify the display
306 name : name, optional
307 defaults to the class name of the obj
308 indent_for_name : bool, default True
309 Whether subsequent lines should be indented to
310 align with the name.
311 line_break_each_value : bool, default False
312 If True, inserts a line break for each value of ``obj``.
313 If False, only break lines when the a line of values gets wider
314 than the display width.
315
316 Returns
317 -------
318 summary string
319 """
320 display_width, _ = get_console_size()
321 if display_width is None:
322 display_width = get_option("display.width") or 80
323 if name is None:
324 name = type(obj).__name__
325
326 if indent_for_name:
327 name_len = len(name)
328 space1 = f'\n{(" " * (name_len + 1))}'
329 space2 = f'\n{(" " * (name_len + 2))}'
330 else:
331 space1 = "\n"
332 space2 = "\n " # space for the opening '['
333
334 n = len(obj)
335 if line_break_each_value:
336 # If we want to vertically align on each value of obj, we need to
337 # separate values by a line break and indent the values
338 sep = ",\n " + " " * len(name)
339 else:
340 sep = ","
341 max_seq_items = get_option("display.max_seq_items") or n
342
343 # are we a truncated display
344 is_truncated = n > max_seq_items
345
346 # adj can optionally handle unicode eastern asian width
347 adj = get_adjustment()
348
349 def _extend_line(
350 s: str, line: str, value: str, display_width: int, next_line_prefix: str
351 ) -> tuple[str, str]:
352 if adj.len(line.rstrip()) + adj.len(value.rstrip()) >= display_width:
353 s += line.rstrip()
354 line = next_line_prefix
355 line += value
356 return s, line
357
358 def best_len(values: list[str]) -> int:
359 if values:
360 return max(adj.len(x) for x in values)
361 else:
362 return 0
363
364 close = ", "
365
366 if n == 0:
367 summary = f"[]{close}"
368 elif n == 1 and not line_break_each_value:
369 first = formatter(obj[0])
370 summary = f"[{first}]{close}"
371 elif n == 2 and not line_break_each_value:
372 first = formatter(obj[0])
373 last = formatter(obj[-1])
374 summary = f"[{first}, {last}]{close}"
375 else:
376 if max_seq_items == 1:
377 # If max_seq_items=1 show only last element
378 head = []
379 tail = [formatter(x) for x in obj[-1:]]
380 elif n > max_seq_items:
381 n = min(max_seq_items // 2, 10)
382 head = [formatter(x) for x in obj[:n]]
383 tail = [formatter(x) for x in obj[-n:]]
384 else:
385 head = []
386 tail = [formatter(x) for x in obj]
387
388 # adjust all values to max length if needed
389 if is_justify:
390 if line_break_each_value:
391 # Justify each string in the values of head and tail, so the
392 # strings will right align when head and tail are stacked
393 # vertically.
394 head, tail = _justify(head, tail)
395 elif is_truncated or not (
396 len(", ".join(head)) < display_width
397 and len(", ".join(tail)) < display_width
398 ):
399 # Each string in head and tail should align with each other
400 max_length = max(best_len(head), best_len(tail))
401 head = [x.rjust(max_length) for x in head]
402 tail = [x.rjust(max_length) for x in tail]
403 # If we are not truncated and we are only a single
404 # line, then don't justify
405
406 if line_break_each_value:
407 # Now head and tail are of type List[Tuple[str]]. Below we
408 # convert them into List[str], so there will be one string per
409 # value. Also truncate items horizontally if wider than
410 # max_space
411 max_space = display_width - len(space2)
412 value = tail[0]
413 max_items = 1
414 for num_items in reversed(range(1, len(value) + 1)):
415 pprinted_seq = _pprint_seq(value, max_seq_items=num_items)
416 if len(pprinted_seq) < max_space:
417 max_items = num_items
418 break
419 head = [_pprint_seq(x, max_seq_items=max_items) for x in head]
420 tail = [_pprint_seq(x, max_seq_items=max_items) for x in tail]
421
422 summary = ""
423 line = space2
424
425 for head_value in head:
426 word = head_value + sep + " "
427 summary, line = _extend_line(summary, line, word, display_width, space2)
428
429 if is_truncated:
430 # remove trailing space of last line
431 summary += line.rstrip() + space2 + "..."
432 line = space2
433
434 for tail_item in tail[:-1]:
435 word = tail_item + sep + " "
436 summary, line = _extend_line(summary, line, word, display_width, space2)
437
438 # last value: no sep added + 1 space of width used for trailing ','
439 summary, line = _extend_line(summary, line, tail[-1], display_width - 2, space2)
440 summary += line
441
442 # right now close is either '' or ', '
443 # Now we want to include the ']', but not the maybe space.
444 close = "]" + close.rstrip(" ")
445 summary += close
446
447 if len(summary) > (display_width) or line_break_each_value:
448 summary += space1
449 else: # one row
450 summary += " "
451
452 # remove initial space
453 summary = "[" + summary[len(space2) :]
454
455 return summary
456
457
458def _justify(
459 head: list[Sequence[str]], tail: list[Sequence[str]]
460) -> tuple[list[tuple[str, ...]], list[tuple[str, ...]]]:
461 """
462 Justify items in head and tail, so they are right-aligned when stacked.
463
464 Parameters
465 ----------
466 head : list-like of list-likes of strings
467 tail : list-like of list-likes of strings
468
469 Returns
470 -------
471 tuple of list of tuples of strings
472 Same as head and tail, but items are right aligned when stacked
473 vertically.
474
475 Examples
476 --------
477 >>> _justify([['a', 'b']], [['abc', 'abcd']])
478 ([(' a', ' b')], [('abc', 'abcd')])
479 """
480 combined = head + tail
481
482 # For each position for the sequences in ``combined``,
483 # find the length of the largest string.
484 max_length = [0] * len(combined[0])
485 for inner_seq in combined:
486 length = [len(item) for item in inner_seq]
487 max_length = [max(x, y) for x, y in zip(max_length, length)]
488
489 # justify each item in each list-like in head and tail using max_length
490 head_tuples = [
491 tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length)) for seq in head
492 ]
493 tail_tuples = [
494 tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length)) for seq in tail
495 ]
496 return head_tuples, tail_tuples
497
498
499class PrettyDict(dict[_KT, _VT]):
500 """Dict extension to support abbreviated __repr__"""
501
502 def __repr__(self) -> str:
503 return pprint_thing(self)
504
505
506class _TextAdjustment:
507 def __init__(self) -> None:
508 self.encoding = get_option("display.encoding")
509
510 def len(self, text: str) -> int:
511 return len(text)
512
513 def justify(self, texts: Any, max_len: int, mode: str = "right") -> list[str]:
514 """
515 Perform ljust, center, rjust against string or list-like
516 """
517 if mode == "left":
518 return [x.ljust(max_len) for x in texts]
519 elif mode == "center":
520 return [x.center(max_len) for x in texts]
521 else:
522 return [x.rjust(max_len) for x in texts]
523
524 def adjoin(self, space: int, *lists, **kwargs) -> str:
525 return adjoin(space, *lists, strlen=self.len, justfunc=self.justify, **kwargs)
526
527
528class _EastAsianTextAdjustment(_TextAdjustment):
529 def __init__(self) -> None:
530 super().__init__()
531 if get_option("display.unicode.ambiguous_as_wide"):
532 self.ambiguous_width = 2
533 else:
534 self.ambiguous_width = 1
535
536 # Definition of East Asian Width
537 # https://unicode.org/reports/tr11/
538 # Ambiguous width can be changed by option
539 self._EAW_MAP = {"Na": 1, "N": 1, "W": 2, "F": 2, "H": 1}
540
541 def len(self, text: str) -> int:
542 """
543 Calculate display width considering unicode East Asian Width
544 """
545 if not isinstance(text, str):
546 return len(text)
547
548 return sum(
549 self._EAW_MAP.get(east_asian_width(c), self.ambiguous_width) for c in text
550 )
551
552 def justify(
553 self, texts: Iterable[str], max_len: int, mode: str = "right"
554 ) -> list[str]:
555 # re-calculate padding space per str considering East Asian Width
556 def _get_pad(t):
557 return max_len - self.len(t) + len(t)
558
559 if mode == "left":
560 return [x.ljust(_get_pad(x)) for x in texts]
561 elif mode == "center":
562 return [x.center(_get_pad(x)) for x in texts]
563 else:
564 return [x.rjust(_get_pad(x)) for x in texts]
565
566
567def get_adjustment() -> _TextAdjustment:
568 use_east_asian_width = get_option("display.unicode.east_asian_width")
569 if use_east_asian_width:
570 return _EastAsianTextAdjustment()
571 else:
572 return _TextAdjustment()