Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/io/formats/printing.py: 20%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

240 statements  

1""" 

2Printing tools. 

3""" 

4from __future__ import annotations 

5 

6from collections.abc import ( 

7 Iterable, 

8 Mapping, 

9 Sequence, 

10) 

11import sys 

12from typing import ( 

13 Any, 

14 Callable, 

15 TypeVar, 

16 Union, 

17) 

18from unicodedata import east_asian_width 

19 

20from pandas._config import get_option 

21 

22from pandas.core.dtypes.inference import is_sequence 

23 

24from pandas.io.formats.console import get_console_size 

25 

26EscapeChars = Union[Mapping[str, str], Iterable[str]] 

27_KT = TypeVar("_KT") 

28_VT = TypeVar("_VT") 

29 

30 

31def adjoin(space: int, *lists: list[str], **kwargs) -> str: 

32 """ 

33 Glues together two sets of strings using the amount of space requested. 

34 The idea is to prettify. 

35 

36 ---------- 

37 space : int 

38 number of spaces for padding 

39 lists : str 

40 list of str which being joined 

41 strlen : callable 

42 function used to calculate the length of each str. Needed for unicode 

43 handling. 

44 justfunc : callable 

45 function used to justify str. Needed for unicode handling. 

46 """ 

47 strlen = kwargs.pop("strlen", len) 

48 justfunc = kwargs.pop("justfunc", _adj_justify) 

49 

50 newLists = [] 

51 lengths = [max(map(strlen, x)) + space for x in lists[:-1]] 

52 # not the last one 

53 lengths.append(max(map(len, lists[-1]))) 

54 maxLen = max(map(len, lists)) 

55 for i, lst in enumerate(lists): 

56 nl = justfunc(lst, lengths[i], mode="left") 

57 nl = ([" " * lengths[i]] * (maxLen - len(lst))) + nl 

58 newLists.append(nl) 

59 toJoin = zip(*newLists) 

60 return "\n".join("".join(lines) for lines in toJoin) 

61 

62 

63def _adj_justify(texts: Iterable[str], max_len: int, mode: str = "right") -> list[str]: 

64 """ 

65 Perform ljust, center, rjust against string or list-like 

66 """ 

67 if mode == "left": 

68 return [x.ljust(max_len) for x in texts] 

69 elif mode == "center": 

70 return [x.center(max_len) for x in texts] 

71 else: 

72 return [x.rjust(max_len) for x in texts] 

73 

74 

75# Unicode consolidation 

76# --------------------- 

77# 

78# pprinting utility functions for generating Unicode text or 

79# bytes(3.x)/str(2.x) representations of objects. 

80# Try to use these as much as possible rather than rolling your own. 

81# 

82# When to use 

83# ----------- 

84# 

85# 1) If you're writing code internal to pandas (no I/O directly involved), 

86# use pprint_thing(). 

87# 

88# It will always return unicode text which can handled by other 

89# parts of the package without breakage. 

90# 

91# 2) if you need to write something out to file, use 

92# pprint_thing_encoded(encoding). 

93# 

94# If no encoding is specified, it defaults to utf-8. Since encoding pure 

95# ascii with utf-8 is a no-op you can safely use the default utf-8 if you're 

96# working with straight ascii. 

97 

98 

99def _pprint_seq( 

100 seq: Sequence, _nest_lvl: int = 0, max_seq_items: int | None = None, **kwds 

101) -> str: 

102 """ 

103 internal. pprinter for iterables. you should probably use pprint_thing() 

104 rather than calling this directly. 

105 

106 bounds length of printed sequence, depending on options 

107 """ 

108 if isinstance(seq, set): 

109 fmt = "{{{body}}}" 

110 else: 

111 fmt = "[{body}]" if hasattr(seq, "__setitem__") else "({body})" 

112 

113 if max_seq_items is False: 

114 nitems = len(seq) 

115 else: 

116 nitems = max_seq_items or get_option("max_seq_items") or len(seq) 

117 

118 s = iter(seq) 

119 # handle sets, no slicing 

120 r = [ 

121 pprint_thing(next(s), _nest_lvl + 1, max_seq_items=max_seq_items, **kwds) 

122 for i in range(min(nitems, len(seq))) 

123 ] 

124 body = ", ".join(r) 

125 

126 if nitems < len(seq): 

127 body += ", ..." 

128 elif isinstance(seq, tuple) and len(seq) == 1: 

129 body += "," 

130 

131 return fmt.format(body=body) 

132 

133 

134def _pprint_dict( 

135 seq: Mapping, _nest_lvl: int = 0, max_seq_items: int | None = None, **kwds 

136) -> str: 

137 """ 

138 internal. pprinter for iterables. you should probably use pprint_thing() 

139 rather than calling this directly. 

140 """ 

141 fmt = "{{{things}}}" 

142 pairs = [] 

143 

144 pfmt = "{key}: {val}" 

145 

146 if max_seq_items is False: 

147 nitems = len(seq) 

148 else: 

149 nitems = max_seq_items or get_option("max_seq_items") or len(seq) 

150 

151 for k, v in list(seq.items())[:nitems]: 

152 pairs.append( 

153 pfmt.format( 

154 key=pprint_thing(k, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds), 

155 val=pprint_thing(v, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds), 

156 ) 

157 ) 

158 

159 if nitems < len(seq): 

160 return fmt.format(things=", ".join(pairs) + ", ...") 

161 else: 

162 return fmt.format(things=", ".join(pairs)) 

163 

164 

165def pprint_thing( 

166 thing: Any, 

167 _nest_lvl: int = 0, 

168 escape_chars: EscapeChars | None = None, 

169 default_escapes: bool = False, 

170 quote_strings: bool = False, 

171 max_seq_items: int | None = None, 

172) -> str: 

173 """ 

174 This function is the sanctioned way of converting objects 

175 to a string representation and properly handles nested sequences. 

176 

177 Parameters 

178 ---------- 

179 thing : anything to be formatted 

180 _nest_lvl : internal use only. pprint_thing() is mutually-recursive 

181 with pprint_sequence, this argument is used to keep track of the 

182 current nesting level, and limit it. 

183 escape_chars : list or dict, optional 

184 Characters to escape. If a dict is passed the values are the 

185 replacements 

186 default_escapes : bool, default False 

187 Whether the input escape characters replaces or adds to the defaults 

188 max_seq_items : int or None, default None 

189 Pass through to other pretty printers to limit sequence printing 

190 

191 Returns 

192 ------- 

193 str 

194 """ 

195 

196 def as_escaped_string( 

197 thing: Any, escape_chars: EscapeChars | None = escape_chars 

198 ) -> str: 

199 translate = {"\t": r"\t", "\n": r"\n", "\r": r"\r"} 

200 if isinstance(escape_chars, dict): 

201 if default_escapes: 

202 translate.update(escape_chars) 

203 else: 

204 translate = escape_chars 

205 escape_chars = list(escape_chars.keys()) 

206 else: 

207 escape_chars = escape_chars or () 

208 

209 result = str(thing) 

210 for c in escape_chars: 

211 result = result.replace(c, translate[c]) 

212 return result 

213 

214 if hasattr(thing, "__next__"): 

215 return str(thing) 

216 elif isinstance(thing, dict) and _nest_lvl < get_option( 

217 "display.pprint_nest_depth" 

218 ): 

219 result = _pprint_dict( 

220 thing, _nest_lvl, quote_strings=True, max_seq_items=max_seq_items 

221 ) 

222 elif is_sequence(thing) and _nest_lvl < get_option("display.pprint_nest_depth"): 

223 result = _pprint_seq( 

224 thing, 

225 _nest_lvl, 

226 escape_chars=escape_chars, 

227 quote_strings=quote_strings, 

228 max_seq_items=max_seq_items, 

229 ) 

230 elif isinstance(thing, str) and quote_strings: 

231 result = f"'{as_escaped_string(thing)}'" 

232 else: 

233 result = as_escaped_string(thing) 

234 

235 return result 

236 

237 

238def pprint_thing_encoded( 

239 object, encoding: str = "utf-8", errors: str = "replace" 

240) -> bytes: 

241 value = pprint_thing(object) # get unicode representation of object 

242 return value.encode(encoding, errors) 

243 

244 

245def enable_data_resource_formatter(enable: bool) -> None: 

246 if "IPython" not in sys.modules: 

247 # definitely not in IPython 

248 return 

249 from IPython import get_ipython 

250 

251 ip = get_ipython() 

252 if ip is None: 

253 # still not in IPython 

254 return 

255 

256 formatters = ip.display_formatter.formatters 

257 mimetype = "application/vnd.dataresource+json" 

258 

259 if enable: 

260 if mimetype not in formatters: 

261 # define tableschema formatter 

262 from IPython.core.formatters import BaseFormatter 

263 from traitlets import ObjectName 

264 

265 class TableSchemaFormatter(BaseFormatter): 

266 print_method = ObjectName("_repr_data_resource_") 

267 _return_type = (dict,) 

268 

269 # register it: 

270 formatters[mimetype] = TableSchemaFormatter() 

271 # enable it if it's been disabled: 

272 formatters[mimetype].enabled = True 

273 # unregister tableschema mime-type 

274 elif mimetype in formatters: 

275 formatters[mimetype].enabled = False 

276 

277 

278def default_pprint(thing: Any, max_seq_items: int | None = None) -> str: 

279 return pprint_thing( 

280 thing, 

281 escape_chars=("\t", "\r", "\n"), 

282 quote_strings=True, 

283 max_seq_items=max_seq_items, 

284 ) 

285 

286 

287def format_object_summary( 

288 obj, 

289 formatter: Callable, 

290 is_justify: bool = True, 

291 name: str | None = None, 

292 indent_for_name: bool = True, 

293 line_break_each_value: bool = False, 

294) -> str: 

295 """ 

296 Return the formatted obj as a unicode string 

297 

298 Parameters 

299 ---------- 

300 obj : object 

301 must be iterable and support __getitem__ 

302 formatter : callable 

303 string formatter for an element 

304 is_justify : bool 

305 should justify the display 

306 name : name, optional 

307 defaults to the class name of the obj 

308 indent_for_name : bool, default True 

309 Whether subsequent lines should be indented to 

310 align with the name. 

311 line_break_each_value : bool, default False 

312 If True, inserts a line break for each value of ``obj``. 

313 If False, only break lines when the a line of values gets wider 

314 than the display width. 

315 

316 Returns 

317 ------- 

318 summary string 

319 """ 

320 display_width, _ = get_console_size() 

321 if display_width is None: 

322 display_width = get_option("display.width") or 80 

323 if name is None: 

324 name = type(obj).__name__ 

325 

326 if indent_for_name: 

327 name_len = len(name) 

328 space1 = f'\n{(" " * (name_len + 1))}' 

329 space2 = f'\n{(" " * (name_len + 2))}' 

330 else: 

331 space1 = "\n" 

332 space2 = "\n " # space for the opening '[' 

333 

334 n = len(obj) 

335 if line_break_each_value: 

336 # If we want to vertically align on each value of obj, we need to 

337 # separate values by a line break and indent the values 

338 sep = ",\n " + " " * len(name) 

339 else: 

340 sep = "," 

341 max_seq_items = get_option("display.max_seq_items") or n 

342 

343 # are we a truncated display 

344 is_truncated = n > max_seq_items 

345 

346 # adj can optionally handle unicode eastern asian width 

347 adj = get_adjustment() 

348 

349 def _extend_line( 

350 s: str, line: str, value: str, display_width: int, next_line_prefix: str 

351 ) -> tuple[str, str]: 

352 if adj.len(line.rstrip()) + adj.len(value.rstrip()) >= display_width: 

353 s += line.rstrip() 

354 line = next_line_prefix 

355 line += value 

356 return s, line 

357 

358 def best_len(values: list[str]) -> int: 

359 if values: 

360 return max(adj.len(x) for x in values) 

361 else: 

362 return 0 

363 

364 close = ", " 

365 

366 if n == 0: 

367 summary = f"[]{close}" 

368 elif n == 1 and not line_break_each_value: 

369 first = formatter(obj[0]) 

370 summary = f"[{first}]{close}" 

371 elif n == 2 and not line_break_each_value: 

372 first = formatter(obj[0]) 

373 last = formatter(obj[-1]) 

374 summary = f"[{first}, {last}]{close}" 

375 else: 

376 if max_seq_items == 1: 

377 # If max_seq_items=1 show only last element 

378 head = [] 

379 tail = [formatter(x) for x in obj[-1:]] 

380 elif n > max_seq_items: 

381 n = min(max_seq_items // 2, 10) 

382 head = [formatter(x) for x in obj[:n]] 

383 tail = [formatter(x) for x in obj[-n:]] 

384 else: 

385 head = [] 

386 tail = [formatter(x) for x in obj] 

387 

388 # adjust all values to max length if needed 

389 if is_justify: 

390 if line_break_each_value: 

391 # Justify each string in the values of head and tail, so the 

392 # strings will right align when head and tail are stacked 

393 # vertically. 

394 head, tail = _justify(head, tail) 

395 elif is_truncated or not ( 

396 len(", ".join(head)) < display_width 

397 and len(", ".join(tail)) < display_width 

398 ): 

399 # Each string in head and tail should align with each other 

400 max_length = max(best_len(head), best_len(tail)) 

401 head = [x.rjust(max_length) for x in head] 

402 tail = [x.rjust(max_length) for x in tail] 

403 # If we are not truncated and we are only a single 

404 # line, then don't justify 

405 

406 if line_break_each_value: 

407 # Now head and tail are of type List[Tuple[str]]. Below we 

408 # convert them into List[str], so there will be one string per 

409 # value. Also truncate items horizontally if wider than 

410 # max_space 

411 max_space = display_width - len(space2) 

412 value = tail[0] 

413 max_items = 1 

414 for num_items in reversed(range(1, len(value) + 1)): 

415 pprinted_seq = _pprint_seq(value, max_seq_items=num_items) 

416 if len(pprinted_seq) < max_space: 

417 max_items = num_items 

418 break 

419 head = [_pprint_seq(x, max_seq_items=max_items) for x in head] 

420 tail = [_pprint_seq(x, max_seq_items=max_items) for x in tail] 

421 

422 summary = "" 

423 line = space2 

424 

425 for head_value in head: 

426 word = head_value + sep + " " 

427 summary, line = _extend_line(summary, line, word, display_width, space2) 

428 

429 if is_truncated: 

430 # remove trailing space of last line 

431 summary += line.rstrip() + space2 + "..." 

432 line = space2 

433 

434 for tail_item in tail[:-1]: 

435 word = tail_item + sep + " " 

436 summary, line = _extend_line(summary, line, word, display_width, space2) 

437 

438 # last value: no sep added + 1 space of width used for trailing ',' 

439 summary, line = _extend_line(summary, line, tail[-1], display_width - 2, space2) 

440 summary += line 

441 

442 # right now close is either '' or ', ' 

443 # Now we want to include the ']', but not the maybe space. 

444 close = "]" + close.rstrip(" ") 

445 summary += close 

446 

447 if len(summary) > (display_width) or line_break_each_value: 

448 summary += space1 

449 else: # one row 

450 summary += " " 

451 

452 # remove initial space 

453 summary = "[" + summary[len(space2) :] 

454 

455 return summary 

456 

457 

458def _justify( 

459 head: list[Sequence[str]], tail: list[Sequence[str]] 

460) -> tuple[list[tuple[str, ...]], list[tuple[str, ...]]]: 

461 """ 

462 Justify items in head and tail, so they are right-aligned when stacked. 

463 

464 Parameters 

465 ---------- 

466 head : list-like of list-likes of strings 

467 tail : list-like of list-likes of strings 

468 

469 Returns 

470 ------- 

471 tuple of list of tuples of strings 

472 Same as head and tail, but items are right aligned when stacked 

473 vertically. 

474 

475 Examples 

476 -------- 

477 >>> _justify([['a', 'b']], [['abc', 'abcd']]) 

478 ([(' a', ' b')], [('abc', 'abcd')]) 

479 """ 

480 combined = head + tail 

481 

482 # For each position for the sequences in ``combined``, 

483 # find the length of the largest string. 

484 max_length = [0] * len(combined[0]) 

485 for inner_seq in combined: 

486 length = [len(item) for item in inner_seq] 

487 max_length = [max(x, y) for x, y in zip(max_length, length)] 

488 

489 # justify each item in each list-like in head and tail using max_length 

490 head_tuples = [ 

491 tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length)) for seq in head 

492 ] 

493 tail_tuples = [ 

494 tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length)) for seq in tail 

495 ] 

496 return head_tuples, tail_tuples 

497 

498 

499class PrettyDict(dict[_KT, _VT]): 

500 """Dict extension to support abbreviated __repr__""" 

501 

502 def __repr__(self) -> str: 

503 return pprint_thing(self) 

504 

505 

506class _TextAdjustment: 

507 def __init__(self) -> None: 

508 self.encoding = get_option("display.encoding") 

509 

510 def len(self, text: str) -> int: 

511 return len(text) 

512 

513 def justify(self, texts: Any, max_len: int, mode: str = "right") -> list[str]: 

514 """ 

515 Perform ljust, center, rjust against string or list-like 

516 """ 

517 if mode == "left": 

518 return [x.ljust(max_len) for x in texts] 

519 elif mode == "center": 

520 return [x.center(max_len) for x in texts] 

521 else: 

522 return [x.rjust(max_len) for x in texts] 

523 

524 def adjoin(self, space: int, *lists, **kwargs) -> str: 

525 return adjoin(space, *lists, strlen=self.len, justfunc=self.justify, **kwargs) 

526 

527 

528class _EastAsianTextAdjustment(_TextAdjustment): 

529 def __init__(self) -> None: 

530 super().__init__() 

531 if get_option("display.unicode.ambiguous_as_wide"): 

532 self.ambiguous_width = 2 

533 else: 

534 self.ambiguous_width = 1 

535 

536 # Definition of East Asian Width 

537 # https://unicode.org/reports/tr11/ 

538 # Ambiguous width can be changed by option 

539 self._EAW_MAP = {"Na": 1, "N": 1, "W": 2, "F": 2, "H": 1} 

540 

541 def len(self, text: str) -> int: 

542 """ 

543 Calculate display width considering unicode East Asian Width 

544 """ 

545 if not isinstance(text, str): 

546 return len(text) 

547 

548 return sum( 

549 self._EAW_MAP.get(east_asian_width(c), self.ambiguous_width) for c in text 

550 ) 

551 

552 def justify( 

553 self, texts: Iterable[str], max_len: int, mode: str = "right" 

554 ) -> list[str]: 

555 # re-calculate padding space per str considering East Asian Width 

556 def _get_pad(t): 

557 return max_len - self.len(t) + len(t) 

558 

559 if mode == "left": 

560 return [x.ljust(_get_pad(x)) for x in texts] 

561 elif mode == "center": 

562 return [x.center(_get_pad(x)) for x in texts] 

563 else: 

564 return [x.rjust(_get_pad(x)) for x in texts] 

565 

566 

567def get_adjustment() -> _TextAdjustment: 

568 use_east_asian_width = get_option("display.unicode.east_asian_width") 

569 if use_east_asian_width: 

570 return _EastAsianTextAdjustment() 

571 else: 

572 return _TextAdjustment()