Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/io/formats/format.py: 25%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

836 statements  

1""" 

2Internal module for formatting output data in csv, html, xml, 

3and latex files. This module also applies to display formatting. 

4""" 

5from __future__ import annotations 

6 

7from collections.abc import ( 

8 Generator, 

9 Hashable, 

10 Mapping, 

11 Sequence, 

12) 

13from contextlib import contextmanager 

14from csv import QUOTE_NONE 

15from decimal import Decimal 

16from functools import partial 

17from io import StringIO 

18import math 

19import re 

20from shutil import get_terminal_size 

21from typing import ( 

22 TYPE_CHECKING, 

23 Any, 

24 Callable, 

25 Final, 

26 cast, 

27) 

28 

29import numpy as np 

30 

31from pandas._config.config import ( 

32 get_option, 

33 set_option, 

34) 

35 

36from pandas._libs import lib 

37from pandas._libs.missing import NA 

38from pandas._libs.tslibs import ( 

39 NaT, 

40 Timedelta, 

41 Timestamp, 

42) 

43from pandas._libs.tslibs.nattype import NaTType 

44 

45from pandas.core.dtypes.common import ( 

46 is_complex_dtype, 

47 is_float, 

48 is_integer, 

49 is_list_like, 

50 is_numeric_dtype, 

51 is_scalar, 

52) 

53from pandas.core.dtypes.dtypes import ( 

54 CategoricalDtype, 

55 DatetimeTZDtype, 

56 ExtensionDtype, 

57) 

58from pandas.core.dtypes.missing import ( 

59 isna, 

60 notna, 

61) 

62 

63from pandas.core.arrays import ( 

64 Categorical, 

65 DatetimeArray, 

66 ExtensionArray, 

67 TimedeltaArray, 

68) 

69from pandas.core.arrays.string_ import StringDtype 

70from pandas.core.base import PandasObject 

71import pandas.core.common as com 

72from pandas.core.indexes.api import ( 

73 Index, 

74 MultiIndex, 

75 PeriodIndex, 

76 ensure_index, 

77) 

78from pandas.core.indexes.datetimes import DatetimeIndex 

79from pandas.core.indexes.timedeltas import TimedeltaIndex 

80from pandas.core.reshape.concat import concat 

81 

82from pandas.io.common import ( 

83 check_parent_directory, 

84 stringify_path, 

85) 

86from pandas.io.formats import printing 

87 

88if TYPE_CHECKING: 

89 from pandas._typing import ( 

90 ArrayLike, 

91 Axes, 

92 ColspaceArgType, 

93 ColspaceType, 

94 CompressionOptions, 

95 FilePath, 

96 FloatFormatType, 

97 FormattersType, 

98 IndexLabel, 

99 SequenceNotStr, 

100 StorageOptions, 

101 WriteBuffer, 

102 ) 

103 

104 from pandas import ( 

105 DataFrame, 

106 Series, 

107 ) 

108 

109 

110common_docstring: Final = """ 

111 Parameters 

112 ---------- 

113 buf : str, Path or StringIO-like, optional, default None 

114 Buffer to write to. If None, the output is returned as a string. 

115 columns : array-like, optional, default None 

116 The subset of columns to write. Writes all columns by default. 

117 col_space : %(col_space_type)s, optional 

118 %(col_space)s. 

119 header : %(header_type)s, optional 

120 %(header)s. 

121 index : bool, optional, default True 

122 Whether to print index (row) labels. 

123 na_rep : str, optional, default 'NaN' 

124 String representation of ``NaN`` to use. 

125 formatters : list, tuple or dict of one-param. functions, optional 

126 Formatter functions to apply to columns' elements by position or 

127 name. 

128 The result of each function must be a unicode string. 

129 List/tuple must be of length equal to the number of columns. 

130 float_format : one-parameter function, optional, default None 

131 Formatter function to apply to columns' elements if they are 

132 floats. This function must return a unicode string and will be 

133 applied only to the non-``NaN`` elements, with ``NaN`` being 

134 handled by ``na_rep``. 

135 sparsify : bool, optional, default True 

136 Set to False for a DataFrame with a hierarchical index to print 

137 every multiindex key at each row. 

138 index_names : bool, optional, default True 

139 Prints the names of the indexes. 

140 justify : str, default None 

141 How to justify the column labels. If None uses the option from 

142 the print configuration (controlled by set_option), 'right' out 

143 of the box. Valid values are 

144 

145 * left 

146 * right 

147 * center 

148 * justify 

149 * justify-all 

150 * start 

151 * end 

152 * inherit 

153 * match-parent 

154 * initial 

155 * unset. 

156 max_rows : int, optional 

157 Maximum number of rows to display in the console. 

158 max_cols : int, optional 

159 Maximum number of columns to display in the console. 

160 show_dimensions : bool, default False 

161 Display DataFrame dimensions (number of rows by number of columns). 

162 decimal : str, default '.' 

163 Character recognized as decimal separator, e.g. ',' in Europe. 

164 """ 

165 

166VALID_JUSTIFY_PARAMETERS = ( 

167 "left", 

168 "right", 

169 "center", 

170 "justify", 

171 "justify-all", 

172 "start", 

173 "end", 

174 "inherit", 

175 "match-parent", 

176 "initial", 

177 "unset", 

178) 

179 

180return_docstring: Final = """ 

181 Returns 

182 ------- 

183 str or None 

184 If buf is None, returns the result as a string. Otherwise returns 

185 None. 

186 """ 

187 

188 

189class SeriesFormatter: 

190 """ 

191 Implement the main logic of Series.to_string, which underlies 

192 Series.__repr__. 

193 """ 

194 

195 def __init__( 

196 self, 

197 series: Series, 

198 *, 

199 length: bool | str = True, 

200 header: bool = True, 

201 index: bool = True, 

202 na_rep: str = "NaN", 

203 name: bool = False, 

204 float_format: str | None = None, 

205 dtype: bool = True, 

206 max_rows: int | None = None, 

207 min_rows: int | None = None, 

208 ) -> None: 

209 self.series = series 

210 self.buf = StringIO() 

211 self.name = name 

212 self.na_rep = na_rep 

213 self.header = header 

214 self.length = length 

215 self.index = index 

216 self.max_rows = max_rows 

217 self.min_rows = min_rows 

218 

219 if float_format is None: 

220 float_format = get_option("display.float_format") 

221 self.float_format = float_format 

222 self.dtype = dtype 

223 self.adj = printing.get_adjustment() 

224 

225 self._chk_truncate() 

226 

227 def _chk_truncate(self) -> None: 

228 self.tr_row_num: int | None 

229 

230 min_rows = self.min_rows 

231 max_rows = self.max_rows 

232 # truncation determined by max_rows, actual truncated number of rows 

233 # used below by min_rows 

234 is_truncated_vertically = max_rows and (len(self.series) > max_rows) 

235 series = self.series 

236 if is_truncated_vertically: 

237 max_rows = cast(int, max_rows) 

238 if min_rows: 

239 # if min_rows is set (not None or 0), set max_rows to minimum 

240 # of both 

241 max_rows = min(min_rows, max_rows) 

242 if max_rows == 1: 

243 row_num = max_rows 

244 series = series.iloc[:max_rows] 

245 else: 

246 row_num = max_rows // 2 

247 series = concat((series.iloc[:row_num], series.iloc[-row_num:])) 

248 self.tr_row_num = row_num 

249 else: 

250 self.tr_row_num = None 

251 self.tr_series = series 

252 self.is_truncated_vertically = is_truncated_vertically 

253 

254 def _get_footer(self) -> str: 

255 name = self.series.name 

256 footer = "" 

257 

258 index = self.series.index 

259 if ( 

260 isinstance(index, (DatetimeIndex, PeriodIndex, TimedeltaIndex)) 

261 and index.freq is not None 

262 ): 

263 footer += f"Freq: {index.freqstr}" 

264 

265 if self.name is not False and name is not None: 

266 if footer: 

267 footer += ", " 

268 

269 series_name = printing.pprint_thing(name, escape_chars=("\t", "\r", "\n")) 

270 footer += f"Name: {series_name}" 

271 

272 if self.length is True or ( 

273 self.length == "truncate" and self.is_truncated_vertically 

274 ): 

275 if footer: 

276 footer += ", " 

277 footer += f"Length: {len(self.series)}" 

278 

279 if self.dtype is not False and self.dtype is not None: 

280 dtype_name = getattr(self.tr_series.dtype, "name", None) 

281 if dtype_name: 

282 if footer: 

283 footer += ", " 

284 footer += f"dtype: {printing.pprint_thing(dtype_name)}" 

285 

286 # level infos are added to the end and in a new line, like it is done 

287 # for Categoricals 

288 if isinstance(self.tr_series.dtype, CategoricalDtype): 

289 level_info = self.tr_series._values._get_repr_footer() 

290 if footer: 

291 footer += "\n" 

292 footer += level_info 

293 

294 return str(footer) 

295 

296 def _get_formatted_values(self) -> list[str]: 

297 return format_array( 

298 self.tr_series._values, 

299 None, 

300 float_format=self.float_format, 

301 na_rep=self.na_rep, 

302 leading_space=self.index, 

303 ) 

304 

305 def to_string(self) -> str: 

306 series = self.tr_series 

307 footer = self._get_footer() 

308 

309 if len(series) == 0: 

310 return f"{type(self.series).__name__}([], {footer})" 

311 

312 index = series.index 

313 have_header = _has_names(index) 

314 if isinstance(index, MultiIndex): 

315 fmt_index = index._format_multi(include_names=True, sparsify=None) 

316 adj = printing.get_adjustment() 

317 fmt_index = adj.adjoin(2, *fmt_index).split("\n") 

318 else: 

319 fmt_index = index._format_flat(include_name=True) 

320 fmt_values = self._get_formatted_values() 

321 

322 if self.is_truncated_vertically: 

323 n_header_rows = 0 

324 row_num = self.tr_row_num 

325 row_num = cast(int, row_num) 

326 width = self.adj.len(fmt_values[row_num - 1]) 

327 if width > 3: 

328 dot_str = "..." 

329 else: 

330 dot_str = ".." 

331 # Series uses mode=center because it has single value columns 

332 # DataFrame uses mode=left 

333 dot_str = self.adj.justify([dot_str], width, mode="center")[0] 

334 fmt_values.insert(row_num + n_header_rows, dot_str) 

335 fmt_index.insert(row_num + 1, "") 

336 

337 if self.index: 

338 result = self.adj.adjoin(3, *[fmt_index[1:], fmt_values]) 

339 else: 

340 result = self.adj.adjoin(3, fmt_values) 

341 

342 if self.header and have_header: 

343 result = fmt_index[0] + "\n" + result 

344 

345 if footer: 

346 result += "\n" + footer 

347 

348 return str("".join(result)) 

349 

350 

351def get_dataframe_repr_params() -> dict[str, Any]: 

352 """Get the parameters used to repr(dataFrame) calls using DataFrame.to_string. 

353 

354 Supplying these parameters to DataFrame.to_string is equivalent to calling 

355 ``repr(DataFrame)``. This is useful if you want to adjust the repr output. 

356 

357 .. versionadded:: 1.4.0 

358 

359 Example 

360 ------- 

361 >>> import pandas as pd 

362 >>> 

363 >>> df = pd.DataFrame([[1, 2], [3, 4]]) 

364 >>> repr_params = pd.io.formats.format.get_dataframe_repr_params() 

365 >>> repr(df) == df.to_string(**repr_params) 

366 True 

367 """ 

368 from pandas.io.formats import console 

369 

370 if get_option("display.expand_frame_repr"): 

371 line_width, _ = console.get_console_size() 

372 else: 

373 line_width = None 

374 return { 

375 "max_rows": get_option("display.max_rows"), 

376 "min_rows": get_option("display.min_rows"), 

377 "max_cols": get_option("display.max_columns"), 

378 "max_colwidth": get_option("display.max_colwidth"), 

379 "show_dimensions": get_option("display.show_dimensions"), 

380 "line_width": line_width, 

381 } 

382 

383 

384def get_series_repr_params() -> dict[str, Any]: 

385 """Get the parameters used to repr(Series) calls using Series.to_string. 

386 

387 Supplying these parameters to Series.to_string is equivalent to calling 

388 ``repr(series)``. This is useful if you want to adjust the series repr output. 

389 

390 .. versionadded:: 1.4.0 

391 

392 Example 

393 ------- 

394 >>> import pandas as pd 

395 >>> 

396 >>> ser = pd.Series([1, 2, 3, 4]) 

397 >>> repr_params = pd.io.formats.format.get_series_repr_params() 

398 >>> repr(ser) == ser.to_string(**repr_params) 

399 True 

400 """ 

401 width, height = get_terminal_size() 

402 max_rows_opt = get_option("display.max_rows") 

403 max_rows = height if max_rows_opt == 0 else max_rows_opt 

404 min_rows = height if max_rows_opt == 0 else get_option("display.min_rows") 

405 

406 return { 

407 "name": True, 

408 "dtype": True, 

409 "min_rows": min_rows, 

410 "max_rows": max_rows, 

411 "length": get_option("display.show_dimensions"), 

412 } 

413 

414 

415class DataFrameFormatter: 

416 """ 

417 Class for processing dataframe formatting options and data. 

418 

419 Used by DataFrame.to_string, which backs DataFrame.__repr__. 

420 """ 

421 

422 __doc__ = __doc__ if __doc__ else "" 

423 __doc__ += common_docstring + return_docstring 

424 

425 def __init__( 

426 self, 

427 frame: DataFrame, 

428 columns: Axes | None = None, 

429 col_space: ColspaceArgType | None = None, 

430 header: bool | SequenceNotStr[str] = True, 

431 index: bool = True, 

432 na_rep: str = "NaN", 

433 formatters: FormattersType | None = None, 

434 justify: str | None = None, 

435 float_format: FloatFormatType | None = None, 

436 sparsify: bool | None = None, 

437 index_names: bool = True, 

438 max_rows: int | None = None, 

439 min_rows: int | None = None, 

440 max_cols: int | None = None, 

441 show_dimensions: bool | str = False, 

442 decimal: str = ".", 

443 bold_rows: bool = False, 

444 escape: bool = True, 

445 ) -> None: 

446 self.frame = frame 

447 self.columns = self._initialize_columns(columns) 

448 self.col_space = self._initialize_colspace(col_space) 

449 self.header = header 

450 self.index = index 

451 self.na_rep = na_rep 

452 self.formatters = self._initialize_formatters(formatters) 

453 self.justify = self._initialize_justify(justify) 

454 self.float_format = float_format 

455 self.sparsify = self._initialize_sparsify(sparsify) 

456 self.show_index_names = index_names 

457 self.decimal = decimal 

458 self.bold_rows = bold_rows 

459 self.escape = escape 

460 self.max_rows = max_rows 

461 self.min_rows = min_rows 

462 self.max_cols = max_cols 

463 self.show_dimensions = show_dimensions 

464 

465 self.max_cols_fitted = self._calc_max_cols_fitted() 

466 self.max_rows_fitted = self._calc_max_rows_fitted() 

467 

468 self.tr_frame = self.frame 

469 self.truncate() 

470 self.adj = printing.get_adjustment() 

471 

472 def get_strcols(self) -> list[list[str]]: 

473 """ 

474 Render a DataFrame to a list of columns (as lists of strings). 

475 """ 

476 strcols = self._get_strcols_without_index() 

477 

478 if self.index: 

479 str_index = self._get_formatted_index(self.tr_frame) 

480 strcols.insert(0, str_index) 

481 

482 return strcols 

483 

484 @property 

485 def should_show_dimensions(self) -> bool: 

486 return self.show_dimensions is True or ( 

487 self.show_dimensions == "truncate" and self.is_truncated 

488 ) 

489 

490 @property 

491 def is_truncated(self) -> bool: 

492 return bool(self.is_truncated_horizontally or self.is_truncated_vertically) 

493 

494 @property 

495 def is_truncated_horizontally(self) -> bool: 

496 return bool(self.max_cols_fitted and (len(self.columns) > self.max_cols_fitted)) 

497 

498 @property 

499 def is_truncated_vertically(self) -> bool: 

500 return bool(self.max_rows_fitted and (len(self.frame) > self.max_rows_fitted)) 

501 

502 @property 

503 def dimensions_info(self) -> str: 

504 return f"\n\n[{len(self.frame)} rows x {len(self.frame.columns)} columns]" 

505 

506 @property 

507 def has_index_names(self) -> bool: 

508 return _has_names(self.frame.index) 

509 

510 @property 

511 def has_column_names(self) -> bool: 

512 return _has_names(self.frame.columns) 

513 

514 @property 

515 def show_row_idx_names(self) -> bool: 

516 return all((self.has_index_names, self.index, self.show_index_names)) 

517 

518 @property 

519 def show_col_idx_names(self) -> bool: 

520 return all((self.has_column_names, self.show_index_names, self.header)) 

521 

522 @property 

523 def max_rows_displayed(self) -> int: 

524 return min(self.max_rows or len(self.frame), len(self.frame)) 

525 

526 def _initialize_sparsify(self, sparsify: bool | None) -> bool: 

527 if sparsify is None: 

528 return get_option("display.multi_sparse") 

529 return sparsify 

530 

531 def _initialize_formatters( 

532 self, formatters: FormattersType | None 

533 ) -> FormattersType: 

534 if formatters is None: 

535 return {} 

536 elif len(self.frame.columns) == len(formatters) or isinstance(formatters, dict): 

537 return formatters 

538 else: 

539 raise ValueError( 

540 f"Formatters length({len(formatters)}) should match " 

541 f"DataFrame number of columns({len(self.frame.columns)})" 

542 ) 

543 

544 def _initialize_justify(self, justify: str | None) -> str: 

545 if justify is None: 

546 return get_option("display.colheader_justify") 

547 else: 

548 return justify 

549 

550 def _initialize_columns(self, columns: Axes | None) -> Index: 

551 if columns is not None: 

552 cols = ensure_index(columns) 

553 self.frame = self.frame[cols] 

554 return cols 

555 else: 

556 return self.frame.columns 

557 

558 def _initialize_colspace(self, col_space: ColspaceArgType | None) -> ColspaceType: 

559 result: ColspaceType 

560 

561 if col_space is None: 

562 result = {} 

563 elif isinstance(col_space, (int, str)): 

564 result = {"": col_space} 

565 result.update({column: col_space for column in self.frame.columns}) 

566 elif isinstance(col_space, Mapping): 

567 for column in col_space.keys(): 

568 if column not in self.frame.columns and column != "": 

569 raise ValueError( 

570 f"Col_space is defined for an unknown column: {column}" 

571 ) 

572 result = col_space 

573 else: 

574 if len(self.frame.columns) != len(col_space): 

575 raise ValueError( 

576 f"Col_space length({len(col_space)}) should match " 

577 f"DataFrame number of columns({len(self.frame.columns)})" 

578 ) 

579 result = dict(zip(self.frame.columns, col_space)) 

580 return result 

581 

582 def _calc_max_cols_fitted(self) -> int | None: 

583 """Number of columns fitting the screen.""" 

584 if not self._is_in_terminal(): 

585 return self.max_cols 

586 

587 width, _ = get_terminal_size() 

588 if self._is_screen_narrow(width): 

589 return width 

590 else: 

591 return self.max_cols 

592 

593 def _calc_max_rows_fitted(self) -> int | None: 

594 """Number of rows with data fitting the screen.""" 

595 max_rows: int | None 

596 

597 if self._is_in_terminal(): 

598 _, height = get_terminal_size() 

599 if self.max_rows == 0: 

600 # rows available to fill with actual data 

601 return height - self._get_number_of_auxiliary_rows() 

602 

603 if self._is_screen_short(height): 

604 max_rows = height 

605 else: 

606 max_rows = self.max_rows 

607 else: 

608 max_rows = self.max_rows 

609 

610 return self._adjust_max_rows(max_rows) 

611 

612 def _adjust_max_rows(self, max_rows: int | None) -> int | None: 

613 """Adjust max_rows using display logic. 

614 

615 See description here: 

616 https://pandas.pydata.org/docs/dev/user_guide/options.html#frequently-used-options 

617 

618 GH #37359 

619 """ 

620 if max_rows: 

621 if (len(self.frame) > max_rows) and self.min_rows: 

622 # if truncated, set max_rows showed to min_rows 

623 max_rows = min(self.min_rows, max_rows) 

624 return max_rows 

625 

626 def _is_in_terminal(self) -> bool: 

627 """Check if the output is to be shown in terminal.""" 

628 return bool(self.max_cols == 0 or self.max_rows == 0) 

629 

630 def _is_screen_narrow(self, max_width) -> bool: 

631 return bool(self.max_cols == 0 and len(self.frame.columns) > max_width) 

632 

633 def _is_screen_short(self, max_height) -> bool: 

634 return bool(self.max_rows == 0 and len(self.frame) > max_height) 

635 

636 def _get_number_of_auxiliary_rows(self) -> int: 

637 """Get number of rows occupied by prompt, dots and dimension info.""" 

638 dot_row = 1 

639 prompt_row = 1 

640 num_rows = dot_row + prompt_row 

641 

642 if self.show_dimensions: 

643 num_rows += len(self.dimensions_info.splitlines()) 

644 

645 if self.header: 

646 num_rows += 1 

647 

648 return num_rows 

649 

650 def truncate(self) -> None: 

651 """ 

652 Check whether the frame should be truncated. If so, slice the frame up. 

653 """ 

654 if self.is_truncated_horizontally: 

655 self._truncate_horizontally() 

656 

657 if self.is_truncated_vertically: 

658 self._truncate_vertically() 

659 

660 def _truncate_horizontally(self) -> None: 

661 """Remove columns, which are not to be displayed and adjust formatters. 

662 

663 Attributes affected: 

664 - tr_frame 

665 - formatters 

666 - tr_col_num 

667 """ 

668 assert self.max_cols_fitted is not None 

669 col_num = self.max_cols_fitted // 2 

670 if col_num >= 1: 

671 left = self.tr_frame.iloc[:, :col_num] 

672 right = self.tr_frame.iloc[:, -col_num:] 

673 self.tr_frame = concat((left, right), axis=1) 

674 

675 # truncate formatter 

676 if isinstance(self.formatters, (list, tuple)): 

677 self.formatters = [ 

678 *self.formatters[:col_num], 

679 *self.formatters[-col_num:], 

680 ] 

681 else: 

682 col_num = cast(int, self.max_cols) 

683 self.tr_frame = self.tr_frame.iloc[:, :col_num] 

684 self.tr_col_num = col_num 

685 

686 def _truncate_vertically(self) -> None: 

687 """Remove rows, which are not to be displayed. 

688 

689 Attributes affected: 

690 - tr_frame 

691 - tr_row_num 

692 """ 

693 assert self.max_rows_fitted is not None 

694 row_num = self.max_rows_fitted // 2 

695 if row_num >= 1: 

696 _len = len(self.tr_frame) 

697 _slice = np.hstack([np.arange(row_num), np.arange(_len - row_num, _len)]) 

698 self.tr_frame = self.tr_frame.iloc[_slice] 

699 else: 

700 row_num = cast(int, self.max_rows) 

701 self.tr_frame = self.tr_frame.iloc[:row_num, :] 

702 self.tr_row_num = row_num 

703 

704 def _get_strcols_without_index(self) -> list[list[str]]: 

705 strcols: list[list[str]] = [] 

706 

707 if not is_list_like(self.header) and not self.header: 

708 for i, c in enumerate(self.tr_frame): 

709 fmt_values = self.format_col(i) 

710 fmt_values = _make_fixed_width( 

711 strings=fmt_values, 

712 justify=self.justify, 

713 minimum=int(self.col_space.get(c, 0)), 

714 adj=self.adj, 

715 ) 

716 strcols.append(fmt_values) 

717 return strcols 

718 

719 if is_list_like(self.header): 

720 # cast here since can't be bool if is_list_like 

721 self.header = cast(list[str], self.header) 

722 if len(self.header) != len(self.columns): 

723 raise ValueError( 

724 f"Writing {len(self.columns)} cols " 

725 f"but got {len(self.header)} aliases" 

726 ) 

727 str_columns = [[label] for label in self.header] 

728 else: 

729 str_columns = self._get_formatted_column_labels(self.tr_frame) 

730 

731 if self.show_row_idx_names: 

732 for x in str_columns: 

733 x.append("") 

734 

735 for i, c in enumerate(self.tr_frame): 

736 cheader = str_columns[i] 

737 header_colwidth = max( 

738 int(self.col_space.get(c, 0)), *(self.adj.len(x) for x in cheader) 

739 ) 

740 fmt_values = self.format_col(i) 

741 fmt_values = _make_fixed_width( 

742 fmt_values, self.justify, minimum=header_colwidth, adj=self.adj 

743 ) 

744 

745 max_len = max(*(self.adj.len(x) for x in fmt_values), header_colwidth) 

746 cheader = self.adj.justify(cheader, max_len, mode=self.justify) 

747 strcols.append(cheader + fmt_values) 

748 

749 return strcols 

750 

751 def format_col(self, i: int) -> list[str]: 

752 frame = self.tr_frame 

753 formatter = self._get_formatter(i) 

754 return format_array( 

755 frame.iloc[:, i]._values, 

756 formatter, 

757 float_format=self.float_format, 

758 na_rep=self.na_rep, 

759 space=self.col_space.get(frame.columns[i]), 

760 decimal=self.decimal, 

761 leading_space=self.index, 

762 ) 

763 

764 def _get_formatter(self, i: str | int) -> Callable | None: 

765 if isinstance(self.formatters, (list, tuple)): 

766 if is_integer(i): 

767 i = cast(int, i) 

768 return self.formatters[i] 

769 else: 

770 return None 

771 else: 

772 if is_integer(i) and i not in self.columns: 

773 i = self.columns[i] 

774 return self.formatters.get(i, None) 

775 

776 def _get_formatted_column_labels(self, frame: DataFrame) -> list[list[str]]: 

777 from pandas.core.indexes.multi import sparsify_labels 

778 

779 columns = frame.columns 

780 

781 if isinstance(columns, MultiIndex): 

782 fmt_columns = columns._format_multi(sparsify=False, include_names=False) 

783 fmt_columns = list(zip(*fmt_columns)) 

784 dtypes = self.frame.dtypes._values 

785 

786 # if we have a Float level, they don't use leading space at all 

787 restrict_formatting = any(level.is_floating for level in columns.levels) 

788 need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes))) 

789 

790 def space_format(x, y): 

791 if ( 

792 y not in self.formatters 

793 and need_leadsp[x] 

794 and not restrict_formatting 

795 ): 

796 return " " + y 

797 return y 

798 

799 str_columns_tuple = list( 

800 zip(*([space_format(x, y) for y in x] for x in fmt_columns)) 

801 ) 

802 if self.sparsify and len(str_columns_tuple): 

803 str_columns_tuple = sparsify_labels(str_columns_tuple) 

804 

805 str_columns = [list(x) for x in zip(*str_columns_tuple)] 

806 else: 

807 fmt_columns = columns._format_flat(include_name=False) 

808 dtypes = self.frame.dtypes 

809 need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes))) 

810 str_columns = [ 

811 [" " + x if not self._get_formatter(i) and need_leadsp[x] else x] 

812 for i, x in enumerate(fmt_columns) 

813 ] 

814 # self.str_columns = str_columns 

815 return str_columns 

816 

817 def _get_formatted_index(self, frame: DataFrame) -> list[str]: 

818 # Note: this is only used by to_string() and to_latex(), not by 

819 # to_html(). so safe to cast col_space here. 

820 col_space = {k: cast(int, v) for k, v in self.col_space.items()} 

821 index = frame.index 

822 columns = frame.columns 

823 fmt = self._get_formatter("__index__") 

824 

825 if isinstance(index, MultiIndex): 

826 fmt_index = index._format_multi( 

827 sparsify=self.sparsify, 

828 include_names=self.show_row_idx_names, 

829 formatter=fmt, 

830 ) 

831 else: 

832 fmt_index = [ 

833 index._format_flat(include_name=self.show_row_idx_names, formatter=fmt) 

834 ] 

835 

836 fmt_index = [ 

837 tuple( 

838 _make_fixed_width( 

839 list(x), justify="left", minimum=col_space.get("", 0), adj=self.adj 

840 ) 

841 ) 

842 for x in fmt_index 

843 ] 

844 

845 adjoined = self.adj.adjoin(1, *fmt_index).split("\n") 

846 

847 # empty space for columns 

848 if self.show_col_idx_names: 

849 col_header = [str(x) for x in self._get_column_name_list()] 

850 else: 

851 col_header = [""] * columns.nlevels 

852 

853 if self.header: 

854 return col_header + adjoined 

855 else: 

856 return adjoined 

857 

858 def _get_column_name_list(self) -> list[Hashable]: 

859 names: list[Hashable] = [] 

860 columns = self.frame.columns 

861 if isinstance(columns, MultiIndex): 

862 names.extend("" if name is None else name for name in columns.names) 

863 else: 

864 names.append("" if columns.name is None else columns.name) 

865 return names 

866 

867 

868class DataFrameRenderer: 

869 """Class for creating dataframe output in multiple formats. 

870 

871 Called in pandas.core.generic.NDFrame: 

872 - to_csv 

873 - to_latex 

874 

875 Called in pandas.core.frame.DataFrame: 

876 - to_html 

877 - to_string 

878 

879 Parameters 

880 ---------- 

881 fmt : DataFrameFormatter 

882 Formatter with the formatting options. 

883 """ 

884 

885 def __init__(self, fmt: DataFrameFormatter) -> None: 

886 self.fmt = fmt 

887 

888 def to_html( 

889 self, 

890 buf: FilePath | WriteBuffer[str] | None = None, 

891 encoding: str | None = None, 

892 classes: str | list | tuple | None = None, 

893 notebook: bool = False, 

894 border: int | bool | None = None, 

895 table_id: str | None = None, 

896 render_links: bool = False, 

897 ) -> str | None: 

898 """ 

899 Render a DataFrame to a html table. 

900 

901 Parameters 

902 ---------- 

903 buf : str, path object, file-like object, or None, default None 

904 String, path object (implementing ``os.PathLike[str]``), or file-like 

905 object implementing a string ``write()`` function. If None, the result is 

906 returned as a string. 

907 encoding : str, default “utf-8” 

908 Set character encoding. 

909 classes : str or list-like 

910 classes to include in the `class` attribute of the opening 

911 ``<table>`` tag, in addition to the default "dataframe". 

912 notebook : {True, False}, optional, default False 

913 Whether the generated HTML is for IPython Notebook. 

914 border : int 

915 A ``border=border`` attribute is included in the opening 

916 ``<table>`` tag. Default ``pd.options.display.html.border``. 

917 table_id : str, optional 

918 A css id is included in the opening `<table>` tag if specified. 

919 render_links : bool, default False 

920 Convert URLs to HTML links. 

921 """ 

922 from pandas.io.formats.html import ( 

923 HTMLFormatter, 

924 NotebookFormatter, 

925 ) 

926 

927 Klass = NotebookFormatter if notebook else HTMLFormatter 

928 

929 html_formatter = Klass( 

930 self.fmt, 

931 classes=classes, 

932 border=border, 

933 table_id=table_id, 

934 render_links=render_links, 

935 ) 

936 string = html_formatter.to_string() 

937 return save_to_buffer(string, buf=buf, encoding=encoding) 

938 

939 def to_string( 

940 self, 

941 buf: FilePath | WriteBuffer[str] | None = None, 

942 encoding: str | None = None, 

943 line_width: int | None = None, 

944 ) -> str | None: 

945 """ 

946 Render a DataFrame to a console-friendly tabular output. 

947 

948 Parameters 

949 ---------- 

950 buf : str, path object, file-like object, or None, default None 

951 String, path object (implementing ``os.PathLike[str]``), or file-like 

952 object implementing a string ``write()`` function. If None, the result is 

953 returned as a string. 

954 encoding: str, default “utf-8” 

955 Set character encoding. 

956 line_width : int, optional 

957 Width to wrap a line in characters. 

958 """ 

959 from pandas.io.formats.string import StringFormatter 

960 

961 string_formatter = StringFormatter(self.fmt, line_width=line_width) 

962 string = string_formatter.to_string() 

963 return save_to_buffer(string, buf=buf, encoding=encoding) 

964 

965 def to_csv( 

966 self, 

967 path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None, 

968 encoding: str | None = None, 

969 sep: str = ",", 

970 columns: Sequence[Hashable] | None = None, 

971 index_label: IndexLabel | None = None, 

972 mode: str = "w", 

973 compression: CompressionOptions = "infer", 

974 quoting: int | None = None, 

975 quotechar: str = '"', 

976 lineterminator: str | None = None, 

977 chunksize: int | None = None, 

978 date_format: str | None = None, 

979 doublequote: bool = True, 

980 escapechar: str | None = None, 

981 errors: str = "strict", 

982 storage_options: StorageOptions | None = None, 

983 ) -> str | None: 

984 """ 

985 Render dataframe as comma-separated file. 

986 """ 

987 from pandas.io.formats.csvs import CSVFormatter 

988 

989 if path_or_buf is None: 

990 created_buffer = True 

991 path_or_buf = StringIO() 

992 else: 

993 created_buffer = False 

994 

995 csv_formatter = CSVFormatter( 

996 path_or_buf=path_or_buf, 

997 lineterminator=lineterminator, 

998 sep=sep, 

999 encoding=encoding, 

1000 errors=errors, 

1001 compression=compression, 

1002 quoting=quoting, 

1003 cols=columns, 

1004 index_label=index_label, 

1005 mode=mode, 

1006 chunksize=chunksize, 

1007 quotechar=quotechar, 

1008 date_format=date_format, 

1009 doublequote=doublequote, 

1010 escapechar=escapechar, 

1011 storage_options=storage_options, 

1012 formatter=self.fmt, 

1013 ) 

1014 csv_formatter.save() 

1015 

1016 if created_buffer: 

1017 assert isinstance(path_or_buf, StringIO) 

1018 content = path_or_buf.getvalue() 

1019 path_or_buf.close() 

1020 return content 

1021 

1022 return None 

1023 

1024 

1025def save_to_buffer( 

1026 string: str, 

1027 buf: FilePath | WriteBuffer[str] | None = None, 

1028 encoding: str | None = None, 

1029) -> str | None: 

1030 """ 

1031 Perform serialization. Write to buf or return as string if buf is None. 

1032 """ 

1033 with _get_buffer(buf, encoding=encoding) as fd: 

1034 fd.write(string) 

1035 if buf is None: 

1036 # error: "WriteBuffer[str]" has no attribute "getvalue" 

1037 return fd.getvalue() # type: ignore[attr-defined] 

1038 return None 

1039 

1040 

1041@contextmanager 

1042def _get_buffer( 

1043 buf: FilePath | WriteBuffer[str] | None, encoding: str | None = None 

1044) -> Generator[WriteBuffer[str], None, None] | Generator[StringIO, None, None]: 

1045 """ 

1046 Context manager to open, yield and close buffer for filenames or Path-like 

1047 objects, otherwise yield buf unchanged. 

1048 """ 

1049 if buf is not None: 

1050 buf = stringify_path(buf) 

1051 else: 

1052 buf = StringIO() 

1053 

1054 if encoding is None: 

1055 encoding = "utf-8" 

1056 elif not isinstance(buf, str): 

1057 raise ValueError("buf is not a file name and encoding is specified.") 

1058 

1059 if hasattr(buf, "write"): 

1060 # Incompatible types in "yield" (actual type "Union[str, WriteBuffer[str], 

1061 # StringIO]", expected type "Union[WriteBuffer[str], StringIO]") 

1062 yield buf # type: ignore[misc] 

1063 elif isinstance(buf, str): 

1064 check_parent_directory(str(buf)) 

1065 with open(buf, "w", encoding=encoding, newline="") as f: 

1066 # GH#30034 open instead of codecs.open prevents a file leak 

1067 # if we have an invalid encoding argument. 

1068 # newline="" is needed to roundtrip correctly on 

1069 # windows test_to_latex_filename 

1070 yield f 

1071 else: 

1072 raise TypeError("buf is not a file name and it has no write method") 

1073 

1074 

1075# ---------------------------------------------------------------------- 

1076# Array formatters 

1077 

1078 

1079def format_array( 

1080 values: ArrayLike, 

1081 formatter: Callable | None, 

1082 float_format: FloatFormatType | None = None, 

1083 na_rep: str = "NaN", 

1084 digits: int | None = None, 

1085 space: str | int | None = None, 

1086 justify: str = "right", 

1087 decimal: str = ".", 

1088 leading_space: bool | None = True, 

1089 quoting: int | None = None, 

1090 fallback_formatter: Callable | None = None, 

1091) -> list[str]: 

1092 """ 

1093 Format an array for printing. 

1094 

1095 Parameters 

1096 ---------- 

1097 values : np.ndarray or ExtensionArray 

1098 formatter 

1099 float_format 

1100 na_rep 

1101 digits 

1102 space 

1103 justify 

1104 decimal 

1105 leading_space : bool, optional, default True 

1106 Whether the array should be formatted with a leading space. 

1107 When an array as a column of a Series or DataFrame, we do want 

1108 the leading space to pad between columns. 

1109 

1110 When formatting an Index subclass 

1111 (e.g. IntervalIndex._get_values_for_csv), we don't want the 

1112 leading space since it should be left-aligned. 

1113 fallback_formatter 

1114 

1115 Returns 

1116 ------- 

1117 List[str] 

1118 """ 

1119 fmt_klass: type[_GenericArrayFormatter] 

1120 if lib.is_np_dtype(values.dtype, "M"): 

1121 fmt_klass = _Datetime64Formatter 

1122 values = cast(DatetimeArray, values) 

1123 elif isinstance(values.dtype, DatetimeTZDtype): 

1124 fmt_klass = _Datetime64TZFormatter 

1125 values = cast(DatetimeArray, values) 

1126 elif lib.is_np_dtype(values.dtype, "m"): 

1127 fmt_klass = _Timedelta64Formatter 

1128 values = cast(TimedeltaArray, values) 

1129 elif isinstance(values.dtype, ExtensionDtype): 

1130 fmt_klass = _ExtensionArrayFormatter 

1131 elif lib.is_np_dtype(values.dtype, "fc"): 

1132 fmt_klass = FloatArrayFormatter 

1133 elif lib.is_np_dtype(values.dtype, "iu"): 

1134 fmt_klass = _IntArrayFormatter 

1135 else: 

1136 fmt_klass = _GenericArrayFormatter 

1137 

1138 if space is None: 

1139 space = 12 

1140 

1141 if float_format is None: 

1142 float_format = get_option("display.float_format") 

1143 

1144 if digits is None: 

1145 digits = get_option("display.precision") 

1146 

1147 fmt_obj = fmt_klass( 

1148 values, 

1149 digits=digits, 

1150 na_rep=na_rep, 

1151 float_format=float_format, 

1152 formatter=formatter, 

1153 space=space, 

1154 justify=justify, 

1155 decimal=decimal, 

1156 leading_space=leading_space, 

1157 quoting=quoting, 

1158 fallback_formatter=fallback_formatter, 

1159 ) 

1160 

1161 return fmt_obj.get_result() 

1162 

1163 

1164class _GenericArrayFormatter: 

1165 def __init__( 

1166 self, 

1167 values: ArrayLike, 

1168 digits: int = 7, 

1169 formatter: Callable | None = None, 

1170 na_rep: str = "NaN", 

1171 space: str | int = 12, 

1172 float_format: FloatFormatType | None = None, 

1173 justify: str = "right", 

1174 decimal: str = ".", 

1175 quoting: int | None = None, 

1176 fixed_width: bool = True, 

1177 leading_space: bool | None = True, 

1178 fallback_formatter: Callable | None = None, 

1179 ) -> None: 

1180 self.values = values 

1181 self.digits = digits 

1182 self.na_rep = na_rep 

1183 self.space = space 

1184 self.formatter = formatter 

1185 self.float_format = float_format 

1186 self.justify = justify 

1187 self.decimal = decimal 

1188 self.quoting = quoting 

1189 self.fixed_width = fixed_width 

1190 self.leading_space = leading_space 

1191 self.fallback_formatter = fallback_formatter 

1192 

1193 def get_result(self) -> list[str]: 

1194 fmt_values = self._format_strings() 

1195 return _make_fixed_width(fmt_values, self.justify) 

1196 

1197 def _format_strings(self) -> list[str]: 

1198 if self.float_format is None: 

1199 float_format = get_option("display.float_format") 

1200 if float_format is None: 

1201 precision = get_option("display.precision") 

1202 float_format = lambda x: _trim_zeros_single_float( 

1203 f"{x: .{precision:d}f}" 

1204 ) 

1205 else: 

1206 float_format = self.float_format 

1207 

1208 if self.formatter is not None: 

1209 formatter = self.formatter 

1210 elif self.fallback_formatter is not None: 

1211 formatter = self.fallback_formatter 

1212 else: 

1213 quote_strings = self.quoting is not None and self.quoting != QUOTE_NONE 

1214 formatter = partial( 

1215 printing.pprint_thing, 

1216 escape_chars=("\t", "\r", "\n"), 

1217 quote_strings=quote_strings, 

1218 ) 

1219 

1220 def _format(x): 

1221 if self.na_rep is not None and is_scalar(x) and isna(x): 

1222 if x is None: 

1223 return "None" 

1224 elif x is NA: 

1225 return str(NA) 

1226 elif lib.is_float(x) and np.isinf(x): 

1227 # TODO(3.0): this will be unreachable when use_inf_as_na 

1228 # deprecation is enforced 

1229 return str(x) 

1230 elif x is NaT or isinstance(x, (np.datetime64, np.timedelta64)): 

1231 return "NaT" 

1232 return self.na_rep 

1233 elif isinstance(x, PandasObject): 

1234 return str(x) 

1235 elif isinstance(x, StringDtype): 

1236 return repr(x) 

1237 else: 

1238 # object dtype 

1239 return str(formatter(x)) 

1240 

1241 vals = self.values 

1242 if not isinstance(vals, np.ndarray): 

1243 raise TypeError( 

1244 "ExtensionArray formatting should use _ExtensionArrayFormatter" 

1245 ) 

1246 inferred = lib.map_infer(vals, is_float) 

1247 is_float_type = ( 

1248 inferred 

1249 # vals may have 2 or more dimensions 

1250 & np.all(notna(vals), axis=tuple(range(1, len(vals.shape)))) 

1251 ) 

1252 leading_space = self.leading_space 

1253 if leading_space is None: 

1254 leading_space = is_float_type.any() 

1255 

1256 fmt_values = [] 

1257 for i, v in enumerate(vals): 

1258 if (not is_float_type[i] or self.formatter is not None) and leading_space: 

1259 fmt_values.append(f" {_format(v)}") 

1260 elif is_float_type[i]: 

1261 fmt_values.append(float_format(v)) 

1262 else: 

1263 if leading_space is False: 

1264 # False specifically, so that the default is 

1265 # to include a space if we get here. 

1266 tpl = "{v}" 

1267 else: 

1268 tpl = " {v}" 

1269 fmt_values.append(tpl.format(v=_format(v))) 

1270 

1271 return fmt_values 

1272 

1273 

1274class FloatArrayFormatter(_GenericArrayFormatter): 

1275 def __init__(self, *args, **kwargs) -> None: 

1276 super().__init__(*args, **kwargs) 

1277 

1278 # float_format is expected to be a string 

1279 # formatter should be used to pass a function 

1280 if self.float_format is not None and self.formatter is None: 

1281 # GH21625, GH22270 

1282 self.fixed_width = False 

1283 if callable(self.float_format): 

1284 self.formatter = self.float_format 

1285 self.float_format = None 

1286 

1287 def _value_formatter( 

1288 self, 

1289 float_format: FloatFormatType | None = None, 

1290 threshold: float | None = None, 

1291 ) -> Callable: 

1292 """Returns a function to be applied on each value to format it""" 

1293 # the float_format parameter supersedes self.float_format 

1294 if float_format is None: 

1295 float_format = self.float_format 

1296 

1297 # we are going to compose different functions, to first convert to 

1298 # a string, then replace the decimal symbol, and finally chop according 

1299 # to the threshold 

1300 

1301 # when there is no float_format, we use str instead of '%g' 

1302 # because str(0.0) = '0.0' while '%g' % 0.0 = '0' 

1303 if float_format: 

1304 

1305 def base_formatter(v): 

1306 assert float_format is not None # for mypy 

1307 # error: "str" not callable 

1308 # error: Unexpected keyword argument "value" for "__call__" of 

1309 # "EngFormatter" 

1310 return ( 

1311 float_format(value=v) # type: ignore[operator,call-arg] 

1312 if notna(v) 

1313 else self.na_rep 

1314 ) 

1315 

1316 else: 

1317 

1318 def base_formatter(v): 

1319 return str(v) if notna(v) else self.na_rep 

1320 

1321 if self.decimal != ".": 

1322 

1323 def decimal_formatter(v): 

1324 return base_formatter(v).replace(".", self.decimal, 1) 

1325 

1326 else: 

1327 decimal_formatter = base_formatter 

1328 

1329 if threshold is None: 

1330 return decimal_formatter 

1331 

1332 def formatter(value): 

1333 if notna(value): 

1334 if abs(value) > threshold: 

1335 return decimal_formatter(value) 

1336 else: 

1337 return decimal_formatter(0.0) 

1338 else: 

1339 return self.na_rep 

1340 

1341 return formatter 

1342 

1343 def get_result_as_array(self) -> np.ndarray: 

1344 """ 

1345 Returns the float values converted into strings using 

1346 the parameters given at initialisation, as a numpy array 

1347 """ 

1348 

1349 def format_with_na_rep(values: ArrayLike, formatter: Callable, na_rep: str): 

1350 mask = isna(values) 

1351 formatted = np.array( 

1352 [ 

1353 formatter(val) if not m else na_rep 

1354 for val, m in zip(values.ravel(), mask.ravel()) 

1355 ] 

1356 ).reshape(values.shape) 

1357 return formatted 

1358 

1359 def format_complex_with_na_rep( 

1360 values: ArrayLike, formatter: Callable, na_rep: str 

1361 ): 

1362 real_values = np.real(values).ravel() # type: ignore[arg-type] 

1363 imag_values = np.imag(values).ravel() # type: ignore[arg-type] 

1364 real_mask, imag_mask = isna(real_values), isna(imag_values) 

1365 formatted_lst = [] 

1366 for val, real_val, imag_val, re_isna, im_isna in zip( 

1367 values.ravel(), 

1368 real_values, 

1369 imag_values, 

1370 real_mask, 

1371 imag_mask, 

1372 ): 

1373 if not re_isna and not im_isna: 

1374 formatted_lst.append(formatter(val)) 

1375 elif not re_isna: # xxx+nanj 

1376 formatted_lst.append(f"{formatter(real_val)}+{na_rep}j") 

1377 elif not im_isna: # nan[+/-]xxxj 

1378 # The imaginary part may either start with a "-" or a space 

1379 imag_formatted = formatter(imag_val).strip() 

1380 if imag_formatted.startswith("-"): 

1381 formatted_lst.append(f"{na_rep}{imag_formatted}j") 

1382 else: 

1383 formatted_lst.append(f"{na_rep}+{imag_formatted}j") 

1384 else: # nan+nanj 

1385 formatted_lst.append(f"{na_rep}+{na_rep}j") 

1386 return np.array(formatted_lst).reshape(values.shape) 

1387 

1388 if self.formatter is not None: 

1389 return format_with_na_rep(self.values, self.formatter, self.na_rep) 

1390 

1391 if self.fixed_width: 

1392 threshold = get_option("display.chop_threshold") 

1393 else: 

1394 threshold = None 

1395 

1396 # if we have a fixed_width, we'll need to try different float_format 

1397 def format_values_with(float_format): 

1398 formatter = self._value_formatter(float_format, threshold) 

1399 

1400 # default formatter leaves a space to the left when formatting 

1401 # floats, must be consistent for left-justifying NaNs (GH #25061) 

1402 na_rep = " " + self.na_rep if self.justify == "left" else self.na_rep 

1403 

1404 # different formatting strategies for complex and non-complex data 

1405 # need to distinguish complex and float NaNs (GH #53762) 

1406 values = self.values 

1407 is_complex = is_complex_dtype(values) 

1408 

1409 # separate the wheat from the chaff 

1410 if is_complex: 

1411 values = format_complex_with_na_rep(values, formatter, na_rep) 

1412 else: 

1413 values = format_with_na_rep(values, formatter, na_rep) 

1414 

1415 if self.fixed_width: 

1416 if is_complex: 

1417 result = _trim_zeros_complex(values, self.decimal) 

1418 else: 

1419 result = _trim_zeros_float(values, self.decimal) 

1420 return np.asarray(result, dtype="object") 

1421 

1422 return values 

1423 

1424 # There is a special default string when we are fixed-width 

1425 # The default is otherwise to use str instead of a formatting string 

1426 float_format: FloatFormatType | None 

1427 if self.float_format is None: 

1428 if self.fixed_width: 

1429 if self.leading_space is True: 

1430 fmt_str = "{value: .{digits:d}f}" 

1431 else: 

1432 fmt_str = "{value:.{digits:d}f}" 

1433 float_format = partial(fmt_str.format, digits=self.digits) 

1434 else: 

1435 float_format = self.float_format 

1436 else: 

1437 float_format = lambda value: self.float_format % value 

1438 

1439 formatted_values = format_values_with(float_format) 

1440 

1441 if not self.fixed_width: 

1442 return formatted_values 

1443 

1444 # we need do convert to engineering format if some values are too small 

1445 # and would appear as 0, or if some values are too big and take too 

1446 # much space 

1447 

1448 if len(formatted_values) > 0: 

1449 maxlen = max(len(x) for x in formatted_values) 

1450 too_long = maxlen > self.digits + 6 

1451 else: 

1452 too_long = False 

1453 

1454 abs_vals = np.abs(self.values) 

1455 # this is pretty arbitrary for now 

1456 # large values: more that 8 characters including decimal symbol 

1457 # and first digit, hence > 1e6 

1458 has_large_values = (abs_vals > 1e6).any() 

1459 has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any() 

1460 

1461 if has_small_values or (too_long and has_large_values): 

1462 if self.leading_space is True: 

1463 fmt_str = "{value: .{digits:d}e}" 

1464 else: 

1465 fmt_str = "{value:.{digits:d}e}" 

1466 float_format = partial(fmt_str.format, digits=self.digits) 

1467 formatted_values = format_values_with(float_format) 

1468 

1469 return formatted_values 

1470 

1471 def _format_strings(self) -> list[str]: 

1472 return list(self.get_result_as_array()) 

1473 

1474 

1475class _IntArrayFormatter(_GenericArrayFormatter): 

1476 def _format_strings(self) -> list[str]: 

1477 if self.leading_space is False: 

1478 formatter_str = lambda x: f"{x:d}".format(x=x) 

1479 else: 

1480 formatter_str = lambda x: f"{x: d}".format(x=x) 

1481 formatter = self.formatter or formatter_str 

1482 fmt_values = [formatter(x) for x in self.values] 

1483 return fmt_values 

1484 

1485 

1486class _Datetime64Formatter(_GenericArrayFormatter): 

1487 values: DatetimeArray 

1488 

1489 def __init__( 

1490 self, 

1491 values: DatetimeArray, 

1492 nat_rep: str = "NaT", 

1493 date_format: None = None, 

1494 **kwargs, 

1495 ) -> None: 

1496 super().__init__(values, **kwargs) 

1497 self.nat_rep = nat_rep 

1498 self.date_format = date_format 

1499 

1500 def _format_strings(self) -> list[str]: 

1501 """we by definition have DO NOT have a TZ""" 

1502 values = self.values 

1503 

1504 if self.formatter is not None: 

1505 return [self.formatter(x) for x in values] 

1506 

1507 fmt_values = values._format_native_types( 

1508 na_rep=self.nat_rep, date_format=self.date_format 

1509 ) 

1510 return fmt_values.tolist() 

1511 

1512 

1513class _ExtensionArrayFormatter(_GenericArrayFormatter): 

1514 values: ExtensionArray 

1515 

1516 def _format_strings(self) -> list[str]: 

1517 values = self.values 

1518 

1519 formatter = self.formatter 

1520 fallback_formatter = None 

1521 if formatter is None: 

1522 fallback_formatter = values._formatter(boxed=True) 

1523 

1524 if isinstance(values, Categorical): 

1525 # Categorical is special for now, so that we can preserve tzinfo 

1526 array = values._internal_get_values() 

1527 else: 

1528 array = np.asarray(values, dtype=object) 

1529 

1530 fmt_values = format_array( 

1531 array, 

1532 formatter, 

1533 float_format=self.float_format, 

1534 na_rep=self.na_rep, 

1535 digits=self.digits, 

1536 space=self.space, 

1537 justify=self.justify, 

1538 decimal=self.decimal, 

1539 leading_space=self.leading_space, 

1540 quoting=self.quoting, 

1541 fallback_formatter=fallback_formatter, 

1542 ) 

1543 return fmt_values 

1544 

1545 

1546def format_percentiles( 

1547 percentiles: (np.ndarray | Sequence[float]), 

1548) -> list[str]: 

1549 """ 

1550 Outputs rounded and formatted percentiles. 

1551 

1552 Parameters 

1553 ---------- 

1554 percentiles : list-like, containing floats from interval [0,1] 

1555 

1556 Returns 

1557 ------- 

1558 formatted : list of strings 

1559 

1560 Notes 

1561 ----- 

1562 Rounding precision is chosen so that: (1) if any two elements of 

1563 ``percentiles`` differ, they remain different after rounding 

1564 (2) no entry is *rounded* to 0% or 100%. 

1565 Any non-integer is always rounded to at least 1 decimal place. 

1566 

1567 Examples 

1568 -------- 

1569 Keeps all entries different after rounding: 

1570 

1571 >>> format_percentiles([0.01999, 0.02001, 0.5, 0.666666, 0.9999]) 

1572 ['1.999%', '2.001%', '50%', '66.667%', '99.99%'] 

1573 

1574 No element is rounded to 0% or 100% (unless already equal to it). 

1575 Duplicates are allowed: 

1576 

1577 >>> format_percentiles([0, 0.5, 0.02001, 0.5, 0.666666, 0.9999]) 

1578 ['0%', '50%', '2.0%', '50%', '66.67%', '99.99%'] 

1579 """ 

1580 percentiles = np.asarray(percentiles) 

1581 

1582 # It checks for np.nan as well 

1583 if ( 

1584 not is_numeric_dtype(percentiles) 

1585 or not np.all(percentiles >= 0) 

1586 or not np.all(percentiles <= 1) 

1587 ): 

1588 raise ValueError("percentiles should all be in the interval [0,1]") 

1589 

1590 percentiles = 100 * percentiles 

1591 prec = get_precision(percentiles) 

1592 percentiles_round_type = percentiles.round(prec).astype(int) 

1593 

1594 int_idx = np.isclose(percentiles_round_type, percentiles) 

1595 

1596 if np.all(int_idx): 

1597 out = percentiles_round_type.astype(str) 

1598 return [i + "%" for i in out] 

1599 

1600 unique_pcts = np.unique(percentiles) 

1601 prec = get_precision(unique_pcts) 

1602 out = np.empty_like(percentiles, dtype=object) 

1603 out[int_idx] = percentiles[int_idx].round().astype(int).astype(str) 

1604 

1605 out[~int_idx] = percentiles[~int_idx].round(prec).astype(str) 

1606 return [i + "%" for i in out] 

1607 

1608 

1609def get_precision(array: np.ndarray | Sequence[float]) -> int: 

1610 to_begin = array[0] if array[0] > 0 else None 

1611 to_end = 100 - array[-1] if array[-1] < 100 else None 

1612 diff = np.ediff1d(array, to_begin=to_begin, to_end=to_end) 

1613 diff = abs(diff) 

1614 prec = -np.floor(np.log10(np.min(diff))).astype(int) 

1615 prec = max(1, prec) 

1616 return prec 

1617 

1618 

1619def _format_datetime64(x: NaTType | Timestamp, nat_rep: str = "NaT") -> str: 

1620 if x is NaT: 

1621 return nat_rep 

1622 

1623 # Timestamp.__str__ falls back to datetime.datetime.__str__ = isoformat(sep=' ') 

1624 # so it already uses string formatting rather than strftime (faster). 

1625 return str(x) 

1626 

1627 

1628def _format_datetime64_dateonly( 

1629 x: NaTType | Timestamp, 

1630 nat_rep: str = "NaT", 

1631 date_format: str | None = None, 

1632) -> str: 

1633 if isinstance(x, NaTType): 

1634 return nat_rep 

1635 

1636 if date_format: 

1637 return x.strftime(date_format) 

1638 else: 

1639 # Timestamp._date_repr relies on string formatting (faster than strftime) 

1640 return x._date_repr 

1641 

1642 

1643def get_format_datetime64( 

1644 is_dates_only: bool, nat_rep: str = "NaT", date_format: str | None = None 

1645) -> Callable: 

1646 """Return a formatter callable taking a datetime64 as input and providing 

1647 a string as output""" 

1648 

1649 if is_dates_only: 

1650 return lambda x: _format_datetime64_dateonly( 

1651 x, nat_rep=nat_rep, date_format=date_format 

1652 ) 

1653 else: 

1654 return lambda x: _format_datetime64(x, nat_rep=nat_rep) 

1655 

1656 

1657class _Datetime64TZFormatter(_Datetime64Formatter): 

1658 values: DatetimeArray 

1659 

1660 def _format_strings(self) -> list[str]: 

1661 """we by definition have a TZ""" 

1662 ido = self.values._is_dates_only 

1663 values = self.values.astype(object) 

1664 formatter = self.formatter or get_format_datetime64( 

1665 ido, date_format=self.date_format 

1666 ) 

1667 fmt_values = [formatter(x) for x in values] 

1668 

1669 return fmt_values 

1670 

1671 

1672class _Timedelta64Formatter(_GenericArrayFormatter): 

1673 values: TimedeltaArray 

1674 

1675 def __init__( 

1676 self, 

1677 values: TimedeltaArray, 

1678 nat_rep: str = "NaT", 

1679 **kwargs, 

1680 ) -> None: 

1681 # TODO: nat_rep is never passed, na_rep is. 

1682 super().__init__(values, **kwargs) 

1683 self.nat_rep = nat_rep 

1684 

1685 def _format_strings(self) -> list[str]: 

1686 formatter = self.formatter or get_format_timedelta64( 

1687 self.values, nat_rep=self.nat_rep, box=False 

1688 ) 

1689 return [formatter(x) for x in self.values] 

1690 

1691 

1692def get_format_timedelta64( 

1693 values: TimedeltaArray, 

1694 nat_rep: str | float = "NaT", 

1695 box: bool = False, 

1696) -> Callable: 

1697 """ 

1698 Return a formatter function for a range of timedeltas. 

1699 These will all have the same format argument 

1700 

1701 If box, then show the return in quotes 

1702 """ 

1703 even_days = values._is_dates_only 

1704 

1705 if even_days: 

1706 format = None 

1707 else: 

1708 format = "long" 

1709 

1710 def _formatter(x): 

1711 if x is None or (is_scalar(x) and isna(x)): 

1712 return nat_rep 

1713 

1714 if not isinstance(x, Timedelta): 

1715 x = Timedelta(x) 

1716 

1717 # Timedelta._repr_base uses string formatting (faster than strftime) 

1718 result = x._repr_base(format=format) 

1719 if box: 

1720 result = f"'{result}'" 

1721 return result 

1722 

1723 return _formatter 

1724 

1725 

1726def _make_fixed_width( 

1727 strings: list[str], 

1728 justify: str = "right", 

1729 minimum: int | None = None, 

1730 adj: printing._TextAdjustment | None = None, 

1731) -> list[str]: 

1732 if len(strings) == 0 or justify == "all": 

1733 return strings 

1734 

1735 if adj is None: 

1736 adjustment = printing.get_adjustment() 

1737 else: 

1738 adjustment = adj 

1739 

1740 max_len = max(adjustment.len(x) for x in strings) 

1741 

1742 if minimum is not None: 

1743 max_len = max(minimum, max_len) 

1744 

1745 conf_max = get_option("display.max_colwidth") 

1746 if conf_max is not None and max_len > conf_max: 

1747 max_len = conf_max 

1748 

1749 def just(x: str) -> str: 

1750 if conf_max is not None: 

1751 if (conf_max > 3) & (adjustment.len(x) > max_len): 

1752 x = x[: max_len - 3] + "..." 

1753 return x 

1754 

1755 strings = [just(x) for x in strings] 

1756 result = adjustment.justify(strings, max_len, mode=justify) 

1757 return result 

1758 

1759 

1760def _trim_zeros_complex(str_complexes: ArrayLike, decimal: str = ".") -> list[str]: 

1761 """ 

1762 Separates the real and imaginary parts from the complex number, and 

1763 executes the _trim_zeros_float method on each of those. 

1764 """ 

1765 real_part, imag_part = [], [] 

1766 for x in str_complexes: 

1767 # Complex numbers are represented as "(-)xxx(+/-)xxxj" 

1768 # The split will give [{"", "-"}, "xxx", "+/-", "xxx", "j", ""] 

1769 # Therefore, the imaginary part is the 4th and 3rd last elements, 

1770 # and the real part is everything before the imaginary part 

1771 trimmed = re.split(r"([j+-])", x) 

1772 real_part.append("".join(trimmed[:-4])) 

1773 imag_part.append("".join(trimmed[-4:-2])) 

1774 

1775 # We want to align the lengths of the real and imaginary parts of each complex 

1776 # number, as well as the lengths the real (resp. complex) parts of all numbers 

1777 # in the array 

1778 n = len(str_complexes) 

1779 padded_parts = _trim_zeros_float(real_part + imag_part, decimal) 

1780 if len(padded_parts) == 0: 

1781 return [] 

1782 padded_length = max(len(part) for part in padded_parts) - 1 

1783 padded = [ 

1784 real_pt # real part, possibly NaN 

1785 + imag_pt[0] # +/- 

1786 + f"{imag_pt[1:]:>{padded_length}}" # complex part (no sign), possibly nan 

1787 + "j" 

1788 for real_pt, imag_pt in zip(padded_parts[:n], padded_parts[n:]) 

1789 ] 

1790 return padded 

1791 

1792 

1793def _trim_zeros_single_float(str_float: str) -> str: 

1794 """ 

1795 Trims trailing zeros after a decimal point, 

1796 leaving just one if necessary. 

1797 """ 

1798 str_float = str_float.rstrip("0") 

1799 if str_float.endswith("."): 

1800 str_float += "0" 

1801 

1802 return str_float 

1803 

1804 

1805def _trim_zeros_float( 

1806 str_floats: ArrayLike | list[str], decimal: str = "." 

1807) -> list[str]: 

1808 """ 

1809 Trims the maximum number of trailing zeros equally from 

1810 all numbers containing decimals, leaving just one if 

1811 necessary. 

1812 """ 

1813 trimmed = str_floats 

1814 number_regex = re.compile(rf"^\s*[\+-]?[0-9]+\{decimal}[0-9]*$") 

1815 

1816 def is_number_with_decimal(x) -> bool: 

1817 return re.match(number_regex, x) is not None 

1818 

1819 def should_trim(values: ArrayLike | list[str]) -> bool: 

1820 """ 

1821 Determine if an array of strings should be trimmed. 

1822 

1823 Returns True if all numbers containing decimals (defined by the 

1824 above regular expression) within the array end in a zero, otherwise 

1825 returns False. 

1826 """ 

1827 numbers = [x for x in values if is_number_with_decimal(x)] 

1828 return len(numbers) > 0 and all(x.endswith("0") for x in numbers) 

1829 

1830 while should_trim(trimmed): 

1831 trimmed = [x[:-1] if is_number_with_decimal(x) else x for x in trimmed] 

1832 

1833 # leave one 0 after the decimal points if need be. 

1834 result = [ 

1835 x + "0" if is_number_with_decimal(x) and x.endswith(decimal) else x 

1836 for x in trimmed 

1837 ] 

1838 return result 

1839 

1840 

1841def _has_names(index: Index) -> bool: 

1842 if isinstance(index, MultiIndex): 

1843 return com.any_not_none(*index.names) 

1844 else: 

1845 return index.name is not None 

1846 

1847 

1848class EngFormatter: 

1849 """ 

1850 Formats float values according to engineering format. 

1851 

1852 Based on matplotlib.ticker.EngFormatter 

1853 """ 

1854 

1855 # The SI engineering prefixes 

1856 ENG_PREFIXES = { 

1857 -24: "y", 

1858 -21: "z", 

1859 -18: "a", 

1860 -15: "f", 

1861 -12: "p", 

1862 -9: "n", 

1863 -6: "u", 

1864 -3: "m", 

1865 0: "", 

1866 3: "k", 

1867 6: "M", 

1868 9: "G", 

1869 12: "T", 

1870 15: "P", 

1871 18: "E", 

1872 21: "Z", 

1873 24: "Y", 

1874 } 

1875 

1876 def __init__( 

1877 self, accuracy: int | None = None, use_eng_prefix: bool = False 

1878 ) -> None: 

1879 self.accuracy = accuracy 

1880 self.use_eng_prefix = use_eng_prefix 

1881 

1882 def __call__(self, num: float) -> str: 

1883 """ 

1884 Formats a number in engineering notation, appending a letter 

1885 representing the power of 1000 of the original number. Some examples: 

1886 >>> format_eng = EngFormatter(accuracy=0, use_eng_prefix=True) 

1887 >>> format_eng(0) 

1888 ' 0' 

1889 >>> format_eng = EngFormatter(accuracy=1, use_eng_prefix=True) 

1890 >>> format_eng(1_000_000) 

1891 ' 1.0M' 

1892 >>> format_eng = EngFormatter(accuracy=2, use_eng_prefix=False) 

1893 >>> format_eng("-1e-6") 

1894 '-1.00E-06' 

1895 

1896 @param num: the value to represent 

1897 @type num: either a numeric value or a string that can be converted to 

1898 a numeric value (as per decimal.Decimal constructor) 

1899 

1900 @return: engineering formatted string 

1901 """ 

1902 dnum = Decimal(str(num)) 

1903 

1904 if Decimal.is_nan(dnum): 

1905 return "NaN" 

1906 

1907 if Decimal.is_infinite(dnum): 

1908 return "inf" 

1909 

1910 sign = 1 

1911 

1912 if dnum < 0: # pragma: no cover 

1913 sign = -1 

1914 dnum = -dnum 

1915 

1916 if dnum != 0: 

1917 pow10 = Decimal(int(math.floor(dnum.log10() / 3) * 3)) 

1918 else: 

1919 pow10 = Decimal(0) 

1920 

1921 pow10 = pow10.min(max(self.ENG_PREFIXES.keys())) 

1922 pow10 = pow10.max(min(self.ENG_PREFIXES.keys())) 

1923 int_pow10 = int(pow10) 

1924 

1925 if self.use_eng_prefix: 

1926 prefix = self.ENG_PREFIXES[int_pow10] 

1927 elif int_pow10 < 0: 

1928 prefix = f"E-{-int_pow10:02d}" 

1929 else: 

1930 prefix = f"E+{int_pow10:02d}" 

1931 

1932 mant = sign * dnum / (10**pow10) 

1933 

1934 if self.accuracy is None: # pragma: no cover 

1935 format_str = "{mant: g}{prefix}" 

1936 else: 

1937 format_str = f"{{mant: .{self.accuracy:d}f}}{{prefix}}" 

1938 

1939 formatted = format_str.format(mant=mant, prefix=prefix) 

1940 

1941 return formatted 

1942 

1943 

1944def set_eng_float_format(accuracy: int = 3, use_eng_prefix: bool = False) -> None: 

1945 """ 

1946 Format float representation in DataFrame with SI notation. 

1947 

1948 Parameters 

1949 ---------- 

1950 accuracy : int, default 3 

1951 Number of decimal digits after the floating point. 

1952 use_eng_prefix : bool, default False 

1953 Whether to represent a value with SI prefixes. 

1954 

1955 Returns 

1956 ------- 

1957 None 

1958 

1959 Examples 

1960 -------- 

1961 >>> df = pd.DataFrame([1e-9, 1e-3, 1, 1e3, 1e6]) 

1962 >>> df 

1963 0 

1964 0 1.000000e-09 

1965 1 1.000000e-03 

1966 2 1.000000e+00 

1967 3 1.000000e+03 

1968 4 1.000000e+06 

1969 

1970 >>> pd.set_eng_float_format(accuracy=1) 

1971 >>> df 

1972 0 

1973 0 1.0E-09 

1974 1 1.0E-03 

1975 2 1.0E+00 

1976 3 1.0E+03 

1977 4 1.0E+06 

1978 

1979 >>> pd.set_eng_float_format(use_eng_prefix=True) 

1980 >>> df 

1981 0 

1982 0 1.000n 

1983 1 1.000m 

1984 2 1.000 

1985 3 1.000k 

1986 4 1.000M 

1987 

1988 >>> pd.set_eng_float_format(accuracy=1, use_eng_prefix=True) 

1989 >>> df 

1990 0 

1991 0 1.0n 

1992 1 1.0m 

1993 2 1.0 

1994 3 1.0k 

1995 4 1.0M 

1996 

1997 >>> pd.set_option("display.float_format", None) # unset option 

1998 """ 

1999 set_option("display.float_format", EngFormatter(accuracy, use_eng_prefix)) 

2000 

2001 

2002def get_level_lengths( 

2003 levels: Any, sentinel: bool | object | str = "" 

2004) -> list[dict[int, int]]: 

2005 """ 

2006 For each index in each level the function returns lengths of indexes. 

2007 

2008 Parameters 

2009 ---------- 

2010 levels : list of lists 

2011 List of values on for level. 

2012 sentinel : string, optional 

2013 Value which states that no new index starts on there. 

2014 

2015 Returns 

2016 ------- 

2017 Returns list of maps. For each level returns map of indexes (key is index 

2018 in row and value is length of index). 

2019 """ 

2020 if len(levels) == 0: 

2021 return [] 

2022 

2023 control = [True] * len(levels[0]) 

2024 

2025 result = [] 

2026 for level in levels: 

2027 last_index = 0 

2028 

2029 lengths = {} 

2030 for i, key in enumerate(level): 

2031 if control[i] and key == sentinel: 

2032 pass 

2033 else: 

2034 control[i] = False 

2035 lengths[last_index] = i - last_index 

2036 last_index = i 

2037 

2038 lengths[last_index] = len(level) - last_index 

2039 

2040 result.append(lengths) 

2041 

2042 return result 

2043 

2044 

2045def buffer_put_lines(buf: WriteBuffer[str], lines: list[str]) -> None: 

2046 """ 

2047 Appends lines to a buffer. 

2048 

2049 Parameters 

2050 ---------- 

2051 buf 

2052 The buffer to write to 

2053 lines 

2054 The lines to append. 

2055 """ 

2056 if any(isinstance(x, str) for x in lines): 

2057 lines = [str(x) for x in lines] 

2058 buf.write("\n".join(lines))