Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/io/formats/format.py: 17%

1"""

2Internal module for formatting output data in csv, html, xml,

3and latex files. This module also applies to display formatting.

4"""

5from __future__ import annotations

7from contextlib import contextmanager

8from csv import (

9 QUOTE_NONE,

10 QUOTE_NONNUMERIC,

11)

12from decimal import Decimal

13from functools import partial

14from io import StringIO

15import math

16import re

17from shutil import get_terminal_size

18from typing import (

19 IO,

20 TYPE_CHECKING,

21 Any,

22 Callable,

23 Final,

24 Generator,

25 Hashable,

26 Iterable,

27 List,

28 Mapping,

29 Sequence,

30 cast,

31)

32from unicodedata import east_asian_width

34import numpy as np

36from pandas._config.config import (

37 get_option,

38 set_option,

39)

41from pandas._libs import lib

42from pandas._libs.missing import NA

43from pandas._libs.tslibs import (

44 NaT,

45 Timedelta,

46 Timestamp,

47 get_unit_from_dtype,

48 iNaT,

49 periods_per_day,

50)

51from pandas._libs.tslibs.nattype import NaTType

52from pandas._typing import (

53 ArrayLike,

54 Axes,

55 ColspaceArgType,

56 ColspaceType,

57 CompressionOptions,

58 FilePath,

59 FloatFormatType,

60 FormattersType,

61 IndexLabel,

62 StorageOptions,

63 WriteBuffer,

64)

66from pandas.core.dtypes.common import (

67 is_categorical_dtype,

68 is_complex_dtype,

69 is_datetime64_dtype,

70 is_extension_array_dtype,

71 is_float,

72 is_float_dtype,

73 is_integer,

74 is_integer_dtype,

75 is_list_like,

76 is_numeric_dtype,

77 is_scalar,

78 is_timedelta64_dtype,

79)

80from pandas.core.dtypes.dtypes import DatetimeTZDtype

81from pandas.core.dtypes.missing import (

82 isna,

83 notna,

84)

86from pandas.core.arrays import (

87 Categorical,

88 DatetimeArray,

89 TimedeltaArray,

90)

91from pandas.core.arrays.string_ import StringDtype

92from pandas.core.base import PandasObject

93import pandas.core.common as com

94from pandas.core.construction import extract_array

95from pandas.core.indexes.api import (

96 Index,

97 MultiIndex,

98 PeriodIndex,

99 ensure_index,

100)

101from pandas.core.indexes.datetimes import DatetimeIndex

102from pandas.core.indexes.timedeltas import TimedeltaIndex

103from pandas.core.reshape.concat import concat

104

105from pandas.io.common import (

106 check_parent_directory,

107 stringify_path,

108)

109from pandas.io.formats import printing

110

111if TYPE_CHECKING:

112 from pandas import (

113 DataFrame,

114 Series,

115 )

116

117

118common_docstring: Final = """

119 Parameters

120 ----------

121 buf : str, Path or StringIO-like, optional, default None

122 Buffer to write to. If None, the output is returned as a string.

123 columns : sequence, optional, default None

124 The subset of columns to write. Writes all columns by default.

125 col_space : %(col_space_type)s, optional

126 %(col_space)s.

127 header : %(header_type)s, optional

128 %(header)s.

129 index : bool, optional, default True

130 Whether to print index (row) labels.

131 na_rep : str, optional, default 'NaN'

132 String representation of ``NaN`` to use.

133 formatters : list, tuple or dict of one-param. functions, optional

134 Formatter functions to apply to columns' elements by position or

135 name.

136 The result of each function must be a unicode string.

137 List/tuple must be of length equal to the number of columns.

138 float_format : one-parameter function, optional, default None

139 Formatter function to apply to columns' elements if they are

140 floats. This function must return a unicode string and will be

141 applied only to the non-``NaN`` elements, with ``NaN`` being

142 handled by ``na_rep``.

143

144 .. versionchanged:: 1.2.0

145

146 sparsify : bool, optional, default True

147 Set to False for a DataFrame with a hierarchical index to print

148 every multiindex key at each row.

149 index_names : bool, optional, default True

150 Prints the names of the indexes.

151 justify : str, default None

152 How to justify the column labels. If None uses the option from

153 the print configuration (controlled by set_option), 'right' out

154 of the box. Valid values are

155

156 * left

157 * right

158 * center

159 * justify

160 * justify-all

161 * start

162 * end

163 * inherit

164 * match-parent

165 * initial

166 * unset.

167 max_rows : int, optional

168 Maximum number of rows to display in the console.

169 max_cols : int, optional

170 Maximum number of columns to display in the console.

171 show_dimensions : bool, default False

172 Display DataFrame dimensions (number of rows by number of columns).

173 decimal : str, default '.'

174 Character recognized as decimal separator, e.g. ',' in Europe.

175 """

176

177_VALID_JUSTIFY_PARAMETERS = (

178 "left",

179 "right",

180 "center",

181 "justify",

182 "justify-all",

183 "start",

184 "end",

185 "inherit",

186 "match-parent",

187 "initial",

188 "unset",

189)

190

191return_docstring: Final = """

192 Returns

193 -------

194 str or None

195 If buf is None, returns the result as a string. Otherwise returns

196 None.

197 """

198

199

200class CategoricalFormatter:

201 def __init__(

202 self,

203 categorical: Categorical,

204 buf: IO[str] | None = None,

205 length: bool = True,

206 na_rep: str = "NaN",

207 footer: bool = True,

208 ) -> None:

209 self.categorical = categorical

210 self.buf = buf if buf is not None else StringIO("")

211 self.na_rep = na_rep

212 self.length = length

213 self.footer = footer

214 self.quoting = QUOTE_NONNUMERIC

215

216 def _get_footer(self) -> str:

217 footer = ""

218

219 if self.length:

220 if footer:

221 footer += ", "

222 footer += f"Length: {len(self.categorical)}"

223

224 level_info = self.categorical._repr_categories_info()

225

226 # Levels are added in a newline

227 if footer:

228 footer += "\n"

229 footer += level_info

230

231 return str(footer)

232

233 def _get_formatted_values(self) -> list[str]:

234 return format_array(

235 self.categorical._internal_get_values(),

236 None,

237 float_format=None,

238 na_rep=self.na_rep,

239 quoting=self.quoting,

240 )

241

242 def to_string(self) -> str:

243 categorical = self.categorical

244

245 if len(categorical) == 0:

246 if self.footer:

247 return self._get_footer()

248 else:

249 return ""

250

251 fmt_values = self._get_formatted_values()

252

253 fmt_values = [i.strip() for i in fmt_values]

254 values = ", ".join(fmt_values)

255 result = ["[" + values + "]"]

256 if self.footer:

257 footer = self._get_footer()

258 if footer:

259 result.append(footer)

260

261 return str("\n".join(result))

262

263

264class SeriesFormatter:

265 def __init__(

266 self,

267 series: Series,

268 buf: IO[str] | None = None,

269 length: bool | str = True,

270 header: bool = True,

271 index: bool = True,

272 na_rep: str = "NaN",

273 name: bool = False,

274 float_format: str | None = None,

275 dtype: bool = True,

276 max_rows: int | None = None,

277 min_rows: int | None = None,

278 ) -> None:

279 self.series = series

280 self.buf = buf if buf is not None else StringIO()

281 self.name = name

282 self.na_rep = na_rep

283 self.header = header

284 self.length = length

285 self.index = index

286 self.max_rows = max_rows

287 self.min_rows = min_rows

288

289 if float_format is None:

290 float_format = get_option("display.float_format")

291 self.float_format = float_format

292 self.dtype = dtype

293 self.adj = get_adjustment()

294

295 self._chk_truncate()

296

297 def _chk_truncate(self) -> None:

298 self.tr_row_num: int | None

299

300 min_rows = self.min_rows

301 max_rows = self.max_rows

302 # truncation determined by max_rows, actual truncated number of rows

303 # used below by min_rows

304 is_truncated_vertically = max_rows and (len(self.series) > max_rows)

305 series = self.series

306 if is_truncated_vertically:

307 max_rows = cast(int, max_rows)

308 if min_rows:

309 # if min_rows is set (not None or 0), set max_rows to minimum

310 # of both

311 max_rows = min(min_rows, max_rows)

312 if max_rows == 1:

313 row_num = max_rows

314 series = series.iloc[:max_rows]

315 else:

316 row_num = max_rows // 2

317 series = concat((series.iloc[:row_num], series.iloc[-row_num:]))

318 self.tr_row_num = row_num

319 else:

320 self.tr_row_num = None

321 self.tr_series = series

322 self.is_truncated_vertically = is_truncated_vertically

323

324 def _get_footer(self) -> str:

325 name = self.series.name

326 footer = ""

327

328 if getattr(self.series.index, "freq", None) is not None:

329 assert isinstance(

330 self.series.index, (DatetimeIndex, PeriodIndex, TimedeltaIndex)

331 )

332 footer += f"Freq: {self.series.index.freqstr}"

333

334 if self.name is not False and name is not None:

335 if footer:

336 footer += ", "

337

338 series_name = printing.pprint_thing(name, escape_chars=("\t", "\r", "\n"))

339 footer += f"Name: {series_name}"

340

341 if self.length is True or (

342 self.length == "truncate" and self.is_truncated_vertically

343 ):

344 if footer:

345 footer += ", "

346 footer += f"Length: {len(self.series)}"

347

348 if self.dtype is not False and self.dtype is not None:

349 dtype_name = getattr(self.tr_series.dtype, "name", None)

350 if dtype_name:

351 if footer:

352 footer += ", "

353 footer += f"dtype: {printing.pprint_thing(dtype_name)}"

354

355 # level infos are added to the end and in a new line, like it is done

356 # for Categoricals

357 if is_categorical_dtype(self.tr_series.dtype):

358 level_info = self.tr_series._values._repr_categories_info()

359 if footer:

360 footer += "\n"

361 footer += level_info

362

363 return str(footer)

364

365 def _get_formatted_index(self) -> tuple[list[str], bool]:

366 index = self.tr_series.index

367

368 if isinstance(index, MultiIndex):

369 have_header = any(name for name in index.names)

370 fmt_index = index.format(names=True)

371 else:

372 have_header = index.name is not None

373 fmt_index = index.format(name=True)

374 return fmt_index, have_header

375

376 def _get_formatted_values(self) -> list[str]:

377 return format_array(

378 self.tr_series._values,

379 None,

380 float_format=self.float_format,

381 na_rep=self.na_rep,

382 leading_space=self.index,

383 )

384

385 def to_string(self) -> str:

386 series = self.tr_series

387 footer = self._get_footer()

388

389 if len(series) == 0:

390 return f"{type(self.series).__name__}([], {footer})"

391

392 fmt_index, have_header = self._get_formatted_index()

393 fmt_values = self._get_formatted_values()

394

395 if self.is_truncated_vertically:

396 n_header_rows = 0

397 row_num = self.tr_row_num

398 row_num = cast(int, row_num)

399 width = self.adj.len(fmt_values[row_num - 1])

400 if width > 3:

401 dot_str = "..."

402 else:

403 dot_str = ".."

404 # Series uses mode=center because it has single value columns

405 # DataFrame uses mode=left

406 dot_str = self.adj.justify([dot_str], width, mode="center")[0]

407 fmt_values.insert(row_num + n_header_rows, dot_str)

408 fmt_index.insert(row_num + 1, "")

409

410 if self.index:

411 result = self.adj.adjoin(3, *[fmt_index[1:], fmt_values])

412 else:

413 result = self.adj.adjoin(3, fmt_values)

414

415 if self.header and have_header:

416 result = fmt_index[0] + "\n" + result

417

418 if footer:

419 result += "\n" + footer

420

421 return str("".join(result))

422

423

424class TextAdjustment:

425 def __init__(self) -> None:

426 self.encoding = get_option("display.encoding")

427

428 def len(self, text: str) -> int:

429 return len(text)

430

431 def justify(self, texts: Any, max_len: int, mode: str = "right") -> list[str]:

432 return printing.justify(texts, max_len, mode=mode)

433

434 def adjoin(self, space: int, *lists, **kwargs) -> str:

435 return printing.adjoin(

436 space, *lists, strlen=self.len, justfunc=self.justify, **kwargs

437 )

438

439

440class EastAsianTextAdjustment(TextAdjustment):

441 def __init__(self) -> None:

442 super().__init__()

443 if get_option("display.unicode.ambiguous_as_wide"):

444 self.ambiguous_width = 2

445 else:

446 self.ambiguous_width = 1

447

448 # Definition of East Asian Width

449 # https://unicode.org/reports/tr11/

450 # Ambiguous width can be changed by option

451 self._EAW_MAP = {"Na": 1, "N": 1, "W": 2, "F": 2, "H": 1}

452

453 def len(self, text: str) -> int:

454 """

455 Calculate display width considering unicode East Asian Width

456 """

457 if not isinstance(text, str):

458 return len(text)

459

460 return sum(

461 self._EAW_MAP.get(east_asian_width(c), self.ambiguous_width) for c in text

462 )

463

464 def justify(

465 self, texts: Iterable[str], max_len: int, mode: str = "right"

466 ) -> list[str]:

467 # re-calculate padding space per str considering East Asian Width

468 def _get_pad(t):

469 return max_len - self.len(t) + len(t)

470

471 if mode == "left":

472 return [x.ljust(_get_pad(x)) for x in texts]

473 elif mode == "center":

474 return [x.center(_get_pad(x)) for x in texts]

475 else:

476 return [x.rjust(_get_pad(x)) for x in texts]

477

478

479def get_adjustment() -> TextAdjustment:

480 use_east_asian_width = get_option("display.unicode.east_asian_width")

481 if use_east_asian_width:

482 return EastAsianTextAdjustment()

483 else:

484 return TextAdjustment()

485

486

487def get_dataframe_repr_params() -> dict[str, Any]:

488 """Get the parameters used to repr(dataFrame) calls using DataFrame.to_string.

489

490 Supplying these parameters to DataFrame.to_string is equivalent to calling

491 ``repr(DataFrame)``. This is useful if you want to adjust the repr output.

492

493 .. versionadded:: 1.4.0

494

495 Example

496 -------

497 >>> import pandas as pd

498 >>>

499 >>> df = pd.DataFrame([[1, 2], [3, 4]])

500 >>> repr_params = pd.io.formats.format.get_dataframe_repr_params()

501 >>> repr(df) == df.to_string(**repr_params)

502 True

503 """

504 from pandas.io.formats import console

505

506 if get_option("display.expand_frame_repr"):

507 line_width, _ = console.get_console_size()

508 else:

509 line_width = None

510 return {

511 "max_rows": get_option("display.max_rows"),

512 "min_rows": get_option("display.min_rows"),

513 "max_cols": get_option("display.max_columns"),

514 "max_colwidth": get_option("display.max_colwidth"),

515 "show_dimensions": get_option("display.show_dimensions"),

516 "line_width": line_width,

517 }

518

519

520def get_series_repr_params() -> dict[str, Any]:

521 """Get the parameters used to repr(Series) calls using Series.to_string.

522

523 Supplying these parameters to Series.to_string is equivalent to calling

524 ``repr(series)``. This is useful if you want to adjust the series repr output.

525

526 .. versionadded:: 1.4.0

527

528 Example

529 -------

530 >>> import pandas as pd

531 >>>

532 >>> ser = pd.Series([1, 2, 3, 4])

533 >>> repr_params = pd.io.formats.format.get_series_repr_params()

534 >>> repr(ser) == ser.to_string(**repr_params)

535 True

536 """

537 width, height = get_terminal_size()

538 max_rows = (

539 height

540 if get_option("display.max_rows") == 0

541 else get_option("display.max_rows")

542 )

543 min_rows = (

544 height

545 if get_option("display.max_rows") == 0

546 else get_option("display.min_rows")

547 )

548

549 return {

550 "name": True,

551 "dtype": True,

552 "min_rows": min_rows,

553 "max_rows": max_rows,

554 "length": get_option("display.show_dimensions"),

555 }

556

557

558class DataFrameFormatter:

559 """Class for processing dataframe formatting options and data."""

560

561 __doc__ = __doc__ if __doc__ else ""

562 __doc__ += common_docstring + return_docstring

563

564 def __init__(

565 self,

566 frame: DataFrame,

567 columns: Sequence[Hashable] | None = None,

568 col_space: ColspaceArgType | None = None,

569 header: bool | Sequence[str] = True,

570 index: bool = True,

571 na_rep: str = "NaN",

572 formatters: FormattersType | None = None,

573 justify: str | None = None,

574 float_format: FloatFormatType | None = None,

575 sparsify: bool | None = None,

576 index_names: bool = True,

577 max_rows: int | None = None,

578 min_rows: int | None = None,

579 max_cols: int | None = None,

580 show_dimensions: bool | str = False,

581 decimal: str = ".",

582 bold_rows: bool = False,

583 escape: bool = True,

584 ) -> None:

585 self.frame = frame

586 self.columns = self._initialize_columns(columns)

587 self.col_space = self._initialize_colspace(col_space)

588 self.header = header

589 self.index = index

590 self.na_rep = na_rep

591 self.formatters = self._initialize_formatters(formatters)

592 self.justify = self._initialize_justify(justify)

593 self.float_format = float_format

594 self.sparsify = self._initialize_sparsify(sparsify)

595 self.show_index_names = index_names

596 self.decimal = decimal

597 self.bold_rows = bold_rows

598 self.escape = escape

599 self.max_rows = max_rows

600 self.min_rows = min_rows

601 self.max_cols = max_cols

602 self.show_dimensions = show_dimensions

603

604 self.max_cols_fitted = self._calc_max_cols_fitted()

605 self.max_rows_fitted = self._calc_max_rows_fitted()

606

607 self.tr_frame = self.frame

608 self.truncate()

609 self.adj = get_adjustment()

610

611 def get_strcols(self) -> list[list[str]]:

612 """

613 Render a DataFrame to a list of columns (as lists of strings).

614 """

615 strcols = self._get_strcols_without_index()

616

617 if self.index:

618 str_index = self._get_formatted_index(self.tr_frame)

619 strcols.insert(0, str_index)

620

621 return strcols

622

623 @property

624 def should_show_dimensions(self) -> bool:

625 return self.show_dimensions is True or (

626 self.show_dimensions == "truncate" and self.is_truncated

627 )

628

629 @property

630 def is_truncated(self) -> bool:

631 return bool(self.is_truncated_horizontally or self.is_truncated_vertically)

632

633 @property

634 def is_truncated_horizontally(self) -> bool:

635 return bool(self.max_cols_fitted and (len(self.columns) > self.max_cols_fitted))

636

637 @property

638 def is_truncated_vertically(self) -> bool:

639 return bool(self.max_rows_fitted and (len(self.frame) > self.max_rows_fitted))

640

641 @property

642 def dimensions_info(self) -> str:

643 return f"\n\n[{len(self.frame)} rows x {len(self.frame.columns)} columns]"

644

645 @property

646 def has_index_names(self) -> bool:

647 return _has_names(self.frame.index)

648

649 @property

650 def has_column_names(self) -> bool:

651 return _has_names(self.frame.columns)

652

653 @property

654 def show_row_idx_names(self) -> bool:

655 return all((self.has_index_names, self.index, self.show_index_names))

656

657 @property

658 def show_col_idx_names(self) -> bool:

659 return all((self.has_column_names, self.show_index_names, self.header))

660

661 @property

662 def max_rows_displayed(self) -> int:

663 return min(self.max_rows or len(self.frame), len(self.frame))

664

665 def _initialize_sparsify(self, sparsify: bool | None) -> bool:

666 if sparsify is None:

667 return get_option("display.multi_sparse")

668 return sparsify

669

670 def _initialize_formatters(

671 self, formatters: FormattersType | None

672 ) -> FormattersType:

673 if formatters is None:

674 return {}

675 elif len(self.frame.columns) == len(formatters) or isinstance(formatters, dict):

676 return formatters

677 else:

678 raise ValueError(

679 f"Formatters length({len(formatters)}) should match "

680 f"DataFrame number of columns({len(self.frame.columns)})"

681 )

682

683 def _initialize_justify(self, justify: str | None) -> str:

684 if justify is None:

685 return get_option("display.colheader_justify")

686 else:

687 return justify

688

689 def _initialize_columns(self, columns: Sequence[Hashable] | None) -> Index:

690 if columns is not None:

691 # GH 47231 - columns doesn't have to be `Sequence[str]`

692 # Will fix in later PR

693 cols = ensure_index(cast(Axes, columns))

694 self.frame = self.frame[cols]

695 return cols

696 else:

697 return self.frame.columns

698

699 def _initialize_colspace(self, col_space: ColspaceArgType | None) -> ColspaceType:

700 result: ColspaceType

701

702 if col_space is None:

703 result = {}

704 elif isinstance(col_space, (int, str)):

705 result = {"": col_space}

706 result.update({column: col_space for column in self.frame.columns})

707 elif isinstance(col_space, Mapping):

708 for column in col_space.keys():

709 if column not in self.frame.columns and column != "":

710 raise ValueError(

711 f"Col_space is defined for an unknown column: {column}"

712 )

713 result = col_space

714 else:

715 if len(self.frame.columns) != len(col_space):

716 raise ValueError(

717 f"Col_space length({len(col_space)}) should match "

718 f"DataFrame number of columns({len(self.frame.columns)})"

719 )

720 result = dict(zip(self.frame.columns, col_space))

721 return result

722

723 def _calc_max_cols_fitted(self) -> int | None:

724 """Number of columns fitting the screen."""

725 if not self._is_in_terminal():

726 return self.max_cols

727

728 width, _ = get_terminal_size()

729 if self._is_screen_narrow(width):

730 return width

731 else:

732 return self.max_cols

733

734 def _calc_max_rows_fitted(self) -> int | None:

735 """Number of rows with data fitting the screen."""

736 max_rows: int | None

737

738 if self._is_in_terminal():

739 _, height = get_terminal_size()

740 if self.max_rows == 0:

741 # rows available to fill with actual data

742 return height - self._get_number_of_auxillary_rows()

743

744 if self._is_screen_short(height):

745 max_rows = height

746 else:

747 max_rows = self.max_rows

748 else:

749 max_rows = self.max_rows

750

751 return self._adjust_max_rows(max_rows)

752

753 def _adjust_max_rows(self, max_rows: int | None) -> int | None:

754 """Adjust max_rows using display logic.

755

756 See description here:

757 https://pandas.pydata.org/docs/dev/user_guide/options.html#frequently-used-options

758

759 GH #37359

760 """

761 if max_rows:

762 if (len(self.frame) > max_rows) and self.min_rows:

763 # if truncated, set max_rows showed to min_rows

764 max_rows = min(self.min_rows, max_rows)

765 return max_rows

766

767 def _is_in_terminal(self) -> bool:

768 """Check if the output is to be shown in terminal."""

769 return bool(self.max_cols == 0 or self.max_rows == 0)

770

771 def _is_screen_narrow(self, max_width) -> bool:

772 return bool(self.max_cols == 0 and len(self.frame.columns) > max_width)

773

774 def _is_screen_short(self, max_height) -> bool:

775 return bool(self.max_rows == 0 and len(self.frame) > max_height)

776

777 def _get_number_of_auxillary_rows(self) -> int:

778 """Get number of rows occupied by prompt, dots and dimension info."""

779 dot_row = 1

780 prompt_row = 1

781 num_rows = dot_row + prompt_row

782

783 if self.show_dimensions:

784 num_rows += len(self.dimensions_info.splitlines())

785

786 if self.header:

787 num_rows += 1

788

789 return num_rows

790

791 def truncate(self) -> None:

792 """

793 Check whether the frame should be truncated. If so, slice the frame up.

794 """

795 if self.is_truncated_horizontally:

796 self._truncate_horizontally()

797

798 if self.is_truncated_vertically:

799 self._truncate_vertically()

800

801 def _truncate_horizontally(self) -> None:

802 """Remove columns, which are not to be displayed and adjust formatters.

803

804 Attributes affected:

805 - tr_frame

806 - formatters

807 - tr_col_num

808 """

809 assert self.max_cols_fitted is not None

810 col_num = self.max_cols_fitted // 2

811 if col_num >= 1:

812 left = self.tr_frame.iloc[:, :col_num]

813 right = self.tr_frame.iloc[:, -col_num:]

814 self.tr_frame = concat((left, right), axis=1)

815

816 # truncate formatter

817 if isinstance(self.formatters, (list, tuple)):

818 self.formatters = [

819 *self.formatters[:col_num],

820 *self.formatters[-col_num:],

821 ]

822 else:

823 col_num = cast(int, self.max_cols)

824 self.tr_frame = self.tr_frame.iloc[:, :col_num]

825 self.tr_col_num = col_num

826

827 def _truncate_vertically(self) -> None:

828 """Remove rows, which are not to be displayed.

829

830 Attributes affected:

831 - tr_frame

832 - tr_row_num

833 """

834 assert self.max_rows_fitted is not None

835 row_num = self.max_rows_fitted // 2

836 if row_num >= 1:

837 head = self.tr_frame.iloc[:row_num, :]

838 tail = self.tr_frame.iloc[-row_num:, :]

839 self.tr_frame = concat((head, tail))

840 else:

841 row_num = cast(int, self.max_rows)

842 self.tr_frame = self.tr_frame.iloc[:row_num, :]

843 self.tr_row_num = row_num

844

845 def _get_strcols_without_index(self) -> list[list[str]]:

846 strcols: list[list[str]] = []

847

848 if not is_list_like(self.header) and not self.header:

849 for i, c in enumerate(self.tr_frame):

850 fmt_values = self.format_col(i)

851 fmt_values = _make_fixed_width(

852 strings=fmt_values,

853 justify=self.justify,

854 minimum=int(self.col_space.get(c, 0)),

855 adj=self.adj,

856 )

857 strcols.append(fmt_values)

858 return strcols

859

860 if is_list_like(self.header):

861 # cast here since can't be bool if is_list_like

862 self.header = cast(List[str], self.header)

863 if len(self.header) != len(self.columns):

864 raise ValueError(

865 f"Writing {len(self.columns)} cols "

866 f"but got {len(self.header)} aliases"

867 )

868 str_columns = [[label] for label in self.header]

869 else:

870 str_columns = self._get_formatted_column_labels(self.tr_frame)

871

872 if self.show_row_idx_names:

873 for x in str_columns:

874 x.append("")

875

876 for i, c in enumerate(self.tr_frame):

877 cheader = str_columns[i]

878 header_colwidth = max(

879 int(self.col_space.get(c, 0)), *(self.adj.len(x) for x in cheader)

880 )

881 fmt_values = self.format_col(i)

882 fmt_values = _make_fixed_width(

883 fmt_values, self.justify, minimum=header_colwidth, adj=self.adj

884 )

885

886 max_len = max(max(self.adj.len(x) for x in fmt_values), header_colwidth)

887 cheader = self.adj.justify(cheader, max_len, mode=self.justify)

888 strcols.append(cheader + fmt_values)

889

890 return strcols

891

892 def format_col(self, i: int) -> list[str]:

893 frame = self.tr_frame

894 formatter = self._get_formatter(i)

895 return format_array(

896 frame.iloc[:, i]._values,

897 formatter,

898 float_format=self.float_format,

899 na_rep=self.na_rep,

900 space=self.col_space.get(frame.columns[i]),

901 decimal=self.decimal,

902 leading_space=self.index,

903 )

904

905 def _get_formatter(self, i: str | int) -> Callable | None:

906 if isinstance(self.formatters, (list, tuple)):

907 if is_integer(i):

908 i = cast(int, i)

909 return self.formatters[i]

910 else:

911 return None

912 else:

913 if is_integer(i) and i not in self.columns:

914 i = self.columns[i]

915 return self.formatters.get(i, None)

916

917 def _get_formatted_column_labels(self, frame: DataFrame) -> list[list[str]]:

918 from pandas.core.indexes.multi import sparsify_labels

919

920 columns = frame.columns

921

922 if isinstance(columns, MultiIndex):

923 fmt_columns = columns.format(sparsify=False, adjoin=False)

924 fmt_columns = list(zip(*fmt_columns))

925 dtypes = self.frame.dtypes._values

926

927 # if we have a Float level, they don't use leading space at all

928 restrict_formatting = any(level.is_floating for level in columns.levels)

929 need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))

930

931 def space_format(x, y):

932 if (

933 y not in self.formatters

934 and need_leadsp[x]

935 and not restrict_formatting

936 ):

937 return " " + y

938 return y

939

940 str_columns = list(

941 zip(*([space_format(x, y) for y in x] for x in fmt_columns))

942 )

943 if self.sparsify and len(str_columns):

944 str_columns = sparsify_labels(str_columns)

945

946 str_columns = [list(x) for x in zip(*str_columns)]

947 else:

948 fmt_columns = columns.format()

949 dtypes = self.frame.dtypes

950 need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))

951 str_columns = [

952 [" " + x if not self._get_formatter(i) and need_leadsp[x] else x]

953 for i, x in enumerate(fmt_columns)

954 ]

955 # self.str_columns = str_columns

956 return str_columns

957

958 def _get_formatted_index(self, frame: DataFrame) -> list[str]:

959 # Note: this is only used by to_string() and to_latex(), not by

960 # to_html(). so safe to cast col_space here.

961 col_space = {k: cast(int, v) for k, v in self.col_space.items()}

962 index = frame.index

963 columns = frame.columns

964 fmt = self._get_formatter("__index__")

965

966 if isinstance(index, MultiIndex):

967 fmt_index = index.format(

968 sparsify=self.sparsify,

969 adjoin=False,

970 names=self.show_row_idx_names,

971 formatter=fmt,

972 )

973 else:

974 fmt_index = [index.format(name=self.show_row_idx_names, formatter=fmt)]

975

976 fmt_index = [

977 tuple(

978 _make_fixed_width(

979 list(x), justify="left", minimum=col_space.get("", 0), adj=self.adj

980 )

981 )

982 for x in fmt_index

983 ]

984

985 adjoined = self.adj.adjoin(1, *fmt_index).split("\n")

986

987 # empty space for columns

988 if self.show_col_idx_names:

989 col_header = [str(x) for x in self._get_column_name_list()]

990 else:

991 col_header = [""] * columns.nlevels

992

993 if self.header:

994 return col_header + adjoined

995 else:

996 return adjoined

997

998 def _get_column_name_list(self) -> list[Hashable]:

999 names: list[Hashable] = []

1000 columns = self.frame.columns

1001 if isinstance(columns, MultiIndex):

1002 names.extend("" if name is None else name for name in columns.names)

1003 else:

1004 names.append("" if columns.name is None else columns.name)

1005 return names

1006

1007

1008class DataFrameRenderer:

1009 """Class for creating dataframe output in multiple formats.

1010

1011 Called in pandas.core.generic.NDFrame:

1012 - to_csv

1013 - to_latex

1014

1015 Called in pandas.core.frame.DataFrame:

1016 - to_html

1017 - to_string

1018

1019 Parameters

1020 ----------

1021 fmt : DataFrameFormatter

1022 Formatter with the formatting options.

1023 """

1024

1025 def __init__(self, fmt: DataFrameFormatter) -> None:

1026 self.fmt = fmt

1027

1028 def to_latex(

1029 self,

1030 buf: FilePath | WriteBuffer[str] | None = None,

1031 column_format: str | None = None,

1032 longtable: bool = False,

1033 encoding: str | None = None,

1034 multicolumn: bool = False,

1035 multicolumn_format: str | None = None,

1036 multirow: bool = False,

1037 caption: str | tuple[str, str] | None = None,

1038 label: str | None = None,

1039 position: str | None = None,

1040 ) -> str | None:

1041 """

1042 Render a DataFrame to a LaTeX tabular/longtable environment output.

1043 """

1044 from pandas.io.formats.latex import LatexFormatter

1045

1046 latex_formatter = LatexFormatter(

1047 self.fmt,

1048 longtable=longtable,

1049 column_format=column_format,

1050 multicolumn=multicolumn,

1051 multicolumn_format=multicolumn_format,

1052 multirow=multirow,

1053 caption=caption,

1054 label=label,

1055 position=position,

1056 )

1057 string = latex_formatter.to_string()

1058 return save_to_buffer(string, buf=buf, encoding=encoding)

1059

1060 def to_html(

1061 self,

1062 buf: FilePath | WriteBuffer[str] | None = None,

1063 encoding: str | None = None,

1064 classes: str | list | tuple | None = None,

1065 notebook: bool = False,

1066 border: int | bool | None = None,

1067 table_id: str | None = None,

1068 render_links: bool = False,

1069 ) -> str | None:

1070 """

1071 Render a DataFrame to a html table.

1072

1073 Parameters

1074 ----------

1075 buf : str, path object, file-like object, or None, default None

1076 String, path object (implementing ``os.PathLike[str]``), or file-like

1077 object implementing a string ``write()`` function. If None, the result is

1078 returned as a string.

1079 encoding : str, default “utf-8”

1080 Set character encoding.

1081 classes : str or list-like

1082 classes to include in the `class` attribute of the opening

1083 ``<table>`` tag, in addition to the default "dataframe".

1084 notebook : {True, False}, optional, default False

1085 Whether the generated HTML is for IPython Notebook.

1086 border : int

1087 A ``border=border`` attribute is included in the opening

1088 ``<table>`` tag. Default ``pd.options.display.html.border``.

1089 table_id : str, optional

1090 A css id is included in the opening `<table>` tag if specified.

1091 render_links : bool, default False

1092 Convert URLs to HTML links.

1093 """

1094 from pandas.io.formats.html import (

1095 HTMLFormatter,

1096 NotebookFormatter,

1097 )

1098

1099 Klass = NotebookFormatter if notebook else HTMLFormatter

1100

1101 html_formatter = Klass(

1102 self.fmt,

1103 classes=classes,

1104 border=border,

1105 table_id=table_id,

1106 render_links=render_links,

1107 )

1108 string = html_formatter.to_string()

1109 return save_to_buffer(string, buf=buf, encoding=encoding)

1110

1111 def to_string(

1112 self,

1113 buf: FilePath | WriteBuffer[str] | None = None,

1114 encoding: str | None = None,

1115 line_width: int | None = None,

1116 ) -> str | None:

1117 """

1118 Render a DataFrame to a console-friendly tabular output.

1119

1120 Parameters

1121 ----------

1122 buf : str, path object, file-like object, or None, default None

1123 String, path object (implementing ``os.PathLike[str]``), or file-like

1124 object implementing a string ``write()`` function. If None, the result is

1125 returned as a string.

1126 encoding: str, default “utf-8”

1127 Set character encoding.

1128 line_width : int, optional

1129 Width to wrap a line in characters.

1130 """

1131 from pandas.io.formats.string import StringFormatter

1132

1133 string_formatter = StringFormatter(self.fmt, line_width=line_width)

1134 string = string_formatter.to_string()

1135 return save_to_buffer(string, buf=buf, encoding=encoding)

1136

1137 def to_csv(

1138 self,

1139 path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None,

1140 encoding: str | None = None,

1141 sep: str = ",",

1142 columns: Sequence[Hashable] | None = None,

1143 index_label: IndexLabel | None = None,

1144 mode: str = "w",

1145 compression: CompressionOptions = "infer",

1146 quoting: int | None = None,

1147 quotechar: str = '"',

1148 lineterminator: str | None = None,

1149 chunksize: int | None = None,

1150 date_format: str | None = None,

1151 doublequote: bool = True,

1152 escapechar: str | None = None,

1153 errors: str = "strict",

1154 storage_options: StorageOptions = None,

1155 ) -> str | None:

1156 """

1157 Render dataframe as comma-separated file.

1158 """

1159 from pandas.io.formats.csvs import CSVFormatter

1160

1161 if path_or_buf is None:

1162 created_buffer = True

1163 path_or_buf = StringIO()

1164 else:

1165 created_buffer = False

1166

1167 csv_formatter = CSVFormatter(

1168 path_or_buf=path_or_buf,

1169 lineterminator=lineterminator,

1170 sep=sep,

1171 encoding=encoding,

1172 errors=errors,

1173 compression=compression,

1174 quoting=quoting,

1175 cols=columns,

1176 index_label=index_label,

1177 mode=mode,

1178 chunksize=chunksize,

1179 quotechar=quotechar,

1180 date_format=date_format,

1181 doublequote=doublequote,

1182 escapechar=escapechar,

1183 storage_options=storage_options,

1184 formatter=self.fmt,

1185 )

1186 csv_formatter.save()

1187

1188 if created_buffer:

1189 assert isinstance(path_or_buf, StringIO)

1190 content = path_or_buf.getvalue()

1191 path_or_buf.close()

1192 return content

1193

1194 return None

1195

1196

1197def save_to_buffer(

1198 string: str,

1199 buf: FilePath | WriteBuffer[str] | None = None,

1200 encoding: str | None = None,

1201) -> str | None:

1202 """

1203 Perform serialization. Write to buf or return as string if buf is None.

1204 """

1205 with get_buffer(buf, encoding=encoding) as f:

1206 f.write(string)

1207 if buf is None:

1208 # error: "WriteBuffer[str]" has no attribute "getvalue"

1209 return f.getvalue() # type: ignore[attr-defined]

1210 return None

1211

1212

1213@contextmanager

1214def get_buffer(

1215 buf: FilePath | WriteBuffer[str] | None, encoding: str | None = None

1216) -> Generator[WriteBuffer[str], None, None] | Generator[StringIO, None, None]:

1217 """

1218 Context manager to open, yield and close buffer for filenames or Path-like

1219 objects, otherwise yield buf unchanged.

1220 """

1221 if buf is not None:

1222 buf = stringify_path(buf)

1223 else:

1224 buf = StringIO()

1225

1226 if encoding is None:

1227 encoding = "utf-8"

1228 elif not isinstance(buf, str):

1229 raise ValueError("buf is not a file name and encoding is specified.")

1230

1231 if hasattr(buf, "write"):

1232 # Incompatible types in "yield" (actual type "Union[str, WriteBuffer[str],

1233 # StringIO]", expected type "Union[WriteBuffer[str], StringIO]")

1234 yield buf # type: ignore[misc]

1235 elif isinstance(buf, str):

1236 check_parent_directory(str(buf))

1237 with open(buf, "w", encoding=encoding, newline="") as f:

1238 # GH#30034 open instead of codecs.open prevents a file leak

1239 # if we have an invalid encoding argument.

1240 # newline="" is needed to roundtrip correctly on

1241 # windows test_to_latex_filename

1242 yield f

1243 else:

1244 raise TypeError("buf is not a file name and it has no write method")

1245

1246

1247# ----------------------------------------------------------------------

1248# Array formatters

1249

1250

1251def format_array(

1252 values: Any,

1253 formatter: Callable | None,

1254 float_format: FloatFormatType | None = None,

1255 na_rep: str = "NaN",

1256 digits: int | None = None,

1257 space: str | int | None = None,

1258 justify: str = "right",

1259 decimal: str = ".",

1260 leading_space: bool | None = True,

1261 quoting: int | None = None,

1262 fallback_formatter: Callable | None = None,

1263) -> list[str]:

1264 """

1265 Format an array for printing.

1266

1267 Parameters

1268 ----------

1269 values

1270 formatter

1271 float_format

1272 na_rep

1273 digits

1274 space

1275 justify

1276 decimal

1277 leading_space : bool, optional, default True

1278 Whether the array should be formatted with a leading space.

1279 When an array as a column of a Series or DataFrame, we do want

1280 the leading space to pad between columns.

1281

1282 When formatting an Index subclass

1283 (e.g. IntervalIndex._format_native_types), we don't want the

1284 leading space since it should be left-aligned.

1285 fallback_formatter

1286

1287 Returns

1288 -------

1289 List[str]

1290 """

1291 fmt_klass: type[GenericArrayFormatter]

1292 if is_datetime64_dtype(values.dtype):

1293 fmt_klass = Datetime64Formatter

1294 elif isinstance(values.dtype, DatetimeTZDtype):

1295 fmt_klass = Datetime64TZFormatter

1296 elif is_timedelta64_dtype(values.dtype):

1297 fmt_klass = Timedelta64Formatter

1298 elif is_extension_array_dtype(values.dtype):

1299 fmt_klass = ExtensionArrayFormatter

1300 elif is_float_dtype(values.dtype) or is_complex_dtype(values.dtype):

1301 fmt_klass = FloatArrayFormatter

1302 elif is_integer_dtype(values.dtype):

1303 fmt_klass = IntArrayFormatter

1304 else:

1305 fmt_klass = GenericArrayFormatter

1306

1307 if space is None:

1308 space = 12

1309

1310 if float_format is None:

1311 float_format = get_option("display.float_format")

1312

1313 if digits is None:

1314 digits = get_option("display.precision")

1315

1316 fmt_obj = fmt_klass(

1317 values,

1318 digits=digits,

1319 na_rep=na_rep,

1320 float_format=float_format,

1321 formatter=formatter,

1322 space=space,

1323 justify=justify,

1324 decimal=decimal,

1325 leading_space=leading_space,

1326 quoting=quoting,

1327 fallback_formatter=fallback_formatter,

1328 )

1329

1330 return fmt_obj.get_result()

1331

1332

1333class GenericArrayFormatter:

1334 def __init__(

1335 self,

1336 values: Any,

1337 digits: int = 7,

1338 formatter: Callable | None = None,

1339 na_rep: str = "NaN",

1340 space: str | int = 12,

1341 float_format: FloatFormatType | None = None,

1342 justify: str = "right",

1343 decimal: str = ".",

1344 quoting: int | None = None,

1345 fixed_width: bool = True,

1346 leading_space: bool | None = True,

1347 fallback_formatter: Callable | None = None,

1348 ) -> None:

1349 self.values = values

1350 self.digits = digits

1351 self.na_rep = na_rep

1352 self.space = space

1353 self.formatter = formatter

1354 self.float_format = float_format

1355 self.justify = justify

1356 self.decimal = decimal

1357 self.quoting = quoting

1358 self.fixed_width = fixed_width

1359 self.leading_space = leading_space

1360 self.fallback_formatter = fallback_formatter

1361

1362 def get_result(self) -> list[str]:

1363 fmt_values = self._format_strings()

1364 return _make_fixed_width(fmt_values, self.justify)

1365

1366 def _format_strings(self) -> list[str]:

1367 if self.float_format is None:

1368 float_format = get_option("display.float_format")

1369 if float_format is None:

1370 precision = get_option("display.precision")

1371 float_format = lambda x: _trim_zeros_single_float(

1372 f"{x: .{precision:d}f}"

1373 )

1374 else:

1375 float_format = self.float_format

1376

1377 if self.formatter is not None:

1378 formatter = self.formatter

1379 elif self.fallback_formatter is not None:

1380 formatter = self.fallback_formatter

1381 else:

1382 quote_strings = self.quoting is not None and self.quoting != QUOTE_NONE

1383 formatter = partial(

1384 printing.pprint_thing,

1385 escape_chars=("\t", "\r", "\n"),

1386 quote_strings=quote_strings,

1387 )

1388

1389 def _format(x):

1390 if self.na_rep is not None and is_scalar(x) and isna(x):

1391 try:

1392 # try block for np.isnat specifically

1393 # determine na_rep if x is None or NaT-like

1394 if x is None:

1395 return "None"

1396 elif x is NA:

1397 return str(NA)

1398 elif x is NaT or np.isnat(x):

1399 return "NaT"

1400 except (TypeError, ValueError):

1401 # np.isnat only handles datetime or timedelta objects

1402 pass

1403 return self.na_rep

1404 elif isinstance(x, PandasObject):

1405 return str(x)

1406 elif isinstance(x, StringDtype):

1407 return repr(x)

1408 else:

1409 # object dtype

1410 return str(formatter(x))

1411

1412 vals = extract_array(self.values, extract_numpy=True)

1413 if not isinstance(vals, np.ndarray):

1414 raise TypeError(

1415 "ExtensionArray formatting should use ExtensionArrayFormatter"

1416 )

1417 inferred = lib.map_infer(vals, is_float)

1418 is_float_type = (

1419 inferred

1420 # vals may have 2 or more dimensions

1421 & np.all(notna(vals), axis=tuple(range(1, len(vals.shape))))

1422 )

1423 leading_space = self.leading_space

1424 if leading_space is None:

1425 leading_space = is_float_type.any()

1426

1427 fmt_values = []

1428 for i, v in enumerate(vals):

1429 if (not is_float_type[i] or self.formatter is not None) and leading_space:

1430 fmt_values.append(f" {_format(v)}")

1431 elif is_float_type[i]:

1432 fmt_values.append(float_format(v))

1433 else:

1434 if leading_space is False:

1435 # False specifically, so that the default is

1436 # to include a space if we get here.

1437 tpl = "{v}"

1438 else:

1439 tpl = " {v}"

1440 fmt_values.append(tpl.format(v=_format(v)))

1441

1442 return fmt_values

1443

1444

1445class FloatArrayFormatter(GenericArrayFormatter):

1446 def __init__(self, *args, **kwargs) -> None:

1447 super().__init__(*args, **kwargs)

1448

1449 # float_format is expected to be a string

1450 # formatter should be used to pass a function

1451 if self.float_format is not None and self.formatter is None:

1452 # GH21625, GH22270

1453 self.fixed_width = False

1454 if callable(self.float_format):

1455 self.formatter = self.float_format

1456 self.float_format = None

1457

1458 def _value_formatter(

1459 self,

1460 float_format: FloatFormatType | None = None,

1461 threshold: float | None = None,

1462 ) -> Callable:

1463 """Returns a function to be applied on each value to format it"""

1464 # the float_format parameter supersedes self.float_format

1465 if float_format is None:

1466 float_format = self.float_format

1467

1468 # we are going to compose different functions, to first convert to

1469 # a string, then replace the decimal symbol, and finally chop according

1470 # to the threshold

1471

1472 # when there is no float_format, we use str instead of '%g'

1473 # because str(0.0) = '0.0' while '%g' % 0.0 = '0'

1474 if float_format:

1475

1476 def base_formatter(v):

1477 assert float_format is not None # for mypy

1478 # error: "str" not callable

1479 # error: Unexpected keyword argument "value" for "__call__" of

1480 # "EngFormatter"

1481 return (

1482 float_format(value=v) # type: ignore[operator,call-arg]

1483 if notna(v)

1484 else self.na_rep

1485 )

1486

1487 else:

1488

1489 def base_formatter(v):

1490 return str(v) if notna(v) else self.na_rep

1491

1492 if self.decimal != ".":

1493

1494 def decimal_formatter(v):

1495 return base_formatter(v).replace(".", self.decimal, 1)

1496

1497 else:

1498 decimal_formatter = base_formatter

1499

1500 if threshold is None:

1501 return decimal_formatter

1502

1503 def formatter(value):

1504 if notna(value):

1505 if abs(value) > threshold:

1506 return decimal_formatter(value)

1507 else:

1508 return decimal_formatter(0.0)

1509 else:

1510 return self.na_rep

1511

1512 return formatter

1513

1514 def get_result_as_array(self) -> np.ndarray:

1515 """

1516 Returns the float values converted into strings using

1517 the parameters given at initialisation, as a numpy array

1518 """

1519

1520 def format_with_na_rep(values: ArrayLike, formatter: Callable, na_rep: str):

1521 mask = isna(values)

1522 formatted = np.array(

1523 [

1524 formatter(val) if not m else na_rep

1525 for val, m in zip(values.ravel(), mask.ravel())

1526 ]

1527 ).reshape(values.shape)

1528 return formatted

1529

1530 if self.formatter is not None:

1531 return format_with_na_rep(self.values, self.formatter, self.na_rep)

1532

1533 if self.fixed_width:

1534 threshold = get_option("display.chop_threshold")

1535 else:

1536 threshold = None

1537

1538 # if we have a fixed_width, we'll need to try different float_format

1539 def format_values_with(float_format):

1540 formatter = self._value_formatter(float_format, threshold)

1541

1542 # default formatter leaves a space to the left when formatting

1543 # floats, must be consistent for left-justifying NaNs (GH #25061)

1544 if self.justify == "left":

1545 na_rep = " " + self.na_rep

1546 else:

1547 na_rep = self.na_rep

1548

1549 # separate the wheat from the chaff

1550 values = self.values

1551 is_complex = is_complex_dtype(values)

1552 values = format_with_na_rep(values, formatter, na_rep)

1553

1554 if self.fixed_width:

1555 if is_complex:

1556 result = _trim_zeros_complex(values, self.decimal)

1557 else:

1558 result = _trim_zeros_float(values, self.decimal)

1559 return np.asarray(result, dtype="object")

1560

1561 return values

1562

1563 # There is a special default string when we are fixed-width

1564 # The default is otherwise to use str instead of a formatting string

1565 float_format: FloatFormatType | None

1566 if self.float_format is None:

1567 if self.fixed_width:

1568 if self.leading_space is True:

1569 fmt_str = "{value: .{digits:d}f}"

1570 else:

1571 fmt_str = "{value:.{digits:d}f}"

1572 float_format = partial(fmt_str.format, digits=self.digits)

1573 else:

1574 float_format = self.float_format

1575 else:

1576 float_format = lambda value: self.float_format % value

1577

1578 formatted_values = format_values_with(float_format)

1579

1580 if not self.fixed_width:

1581 return formatted_values

1582

1583 # we need do convert to engineering format if some values are too small

1584 # and would appear as 0, or if some values are too big and take too

1585 # much space

1586

1587 if len(formatted_values) > 0:

1588 maxlen = max(len(x) for x in formatted_values)

1589 too_long = maxlen > self.digits + 6

1590 else:

1591 too_long = False

1592

1593 with np.errstate(invalid="ignore"):

1594 abs_vals = np.abs(self.values)

1595 # this is pretty arbitrary for now

1596 # large values: more that 8 characters including decimal symbol

1597 # and first digit, hence > 1e6

1598 has_large_values = (abs_vals > 1e6).any()

1599 has_small_values = (

1600 (abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)

1601 ).any()

1602

1603 if has_small_values or (too_long and has_large_values):

1604 if self.leading_space is True:

1605 fmt_str = "{value: .{digits:d}e}"

1606 else:

1607 fmt_str = "{value:.{digits:d}e}"

1608 float_format = partial(fmt_str.format, digits=self.digits)

1609 formatted_values = format_values_with(float_format)

1610

1611 return formatted_values

1612

1613 def _format_strings(self) -> list[str]:

1614 return list(self.get_result_as_array())

1615

1616

1617class IntArrayFormatter(GenericArrayFormatter):

1618 def _format_strings(self) -> list[str]:

1619 if self.leading_space is False:

1620 formatter_str = lambda x: f"{x:d}".format(x=x)

1621 else:

1622 formatter_str = lambda x: f"{x: d}".format(x=x)

1623 formatter = self.formatter or formatter_str

1624 fmt_values = [formatter(x) for x in self.values]

1625 return fmt_values

1626

1627

1628class Datetime64Formatter(GenericArrayFormatter):

1629 def __init__(

1630 self,

1631 values: np.ndarray | Series | DatetimeIndex | DatetimeArray,

1632 nat_rep: str = "NaT",

1633 date_format: None = None,

1634 **kwargs,

1635 ) -> None:

1636 super().__init__(values, **kwargs)

1637 self.nat_rep = nat_rep

1638 self.date_format = date_format

1639

1640 def _format_strings(self) -> list[str]:

1641 """we by definition have DO NOT have a TZ"""

1642 values = self.values

1643

1644 if not isinstance(values, DatetimeIndex):

1645 values = DatetimeIndex(values)

1646

1647 if self.formatter is not None and callable(self.formatter):

1648 return [self.formatter(x) for x in values]

1649

1650 fmt_values = values._data._format_native_types(

1651 na_rep=self.nat_rep, date_format=self.date_format

1652 )

1653 return fmt_values.tolist()

1654

1655

1656class ExtensionArrayFormatter(GenericArrayFormatter):

1657 def _format_strings(self) -> list[str]:

1658 values = extract_array(self.values, extract_numpy=True)

1659

1660 formatter = self.formatter

1661 fallback_formatter = None

1662 if formatter is None:

1663 fallback_formatter = values._formatter(boxed=True)

1664

1665 if isinstance(values, Categorical):

1666 # Categorical is special for now, so that we can preserve tzinfo

1667 array = values._internal_get_values()

1668 else:

1669 array = np.asarray(values)

1670

1671 fmt_values = format_array(

1672 array,

1673 formatter,

1674 float_format=self.float_format,

1675 na_rep=self.na_rep,

1676 digits=self.digits,

1677 space=self.space,

1678 justify=self.justify,

1679 decimal=self.decimal,

1680 leading_space=self.leading_space,

1681 quoting=self.quoting,

1682 fallback_formatter=fallback_formatter,

1683 )

1684 return fmt_values

1685

1686

1687def format_percentiles(

1688 percentiles: (np.ndarray | Sequence[float]),

1689) -> list[str]:

1690 """

1691 Outputs rounded and formatted percentiles.

1692

1693 Parameters

1694 ----------

1695 percentiles : list-like, containing floats from interval [0,1]

1696

1697 Returns

1698 -------

1699 formatted : list of strings

1700

1701 Notes

1702 -----

1703 Rounding precision is chosen so that: (1) if any two elements of

1704 ``percentiles`` differ, they remain different after rounding

1705 (2) no entry is *rounded* to 0% or 100%.

1706 Any non-integer is always rounded to at least 1 decimal place.

1707

1708 Examples

1709 --------

1710 Keeps all entries different after rounding:

1711

1712 >>> format_percentiles([0.01999, 0.02001, 0.5, 0.666666, 0.9999])

1713 ['1.999%', '2.001%', '50%', '66.667%', '99.99%']

1714

1715 No element is rounded to 0% or 100% (unless already equal to it).

1716 Duplicates are allowed:

1717

1718 >>> format_percentiles([0, 0.5, 0.02001, 0.5, 0.666666, 0.9999])

1719 ['0%', '50%', '2.0%', '50%', '66.67%', '99.99%']

1720 """

1721 percentiles = np.asarray(percentiles)

1722

1723 # It checks for np.NaN as well

1724 with np.errstate(invalid="ignore"):

1725 if (

1726 not is_numeric_dtype(percentiles)

1727 or not np.all(percentiles >= 0)

1728 or not np.all(percentiles <= 1)

1729 ):

1730 raise ValueError("percentiles should all be in the interval [0,1]")

1731

1732 percentiles = 100 * percentiles

1733 percentiles_round_type = percentiles.round().astype(int)

1734

1735 int_idx = np.isclose(percentiles_round_type, percentiles)

1736

1737 if np.all(int_idx):

1738 out = percentiles_round_type.astype(str)

1739 return [i + "%" for i in out]

1740

1741 unique_pcts = np.unique(percentiles)

1742 to_begin = unique_pcts[0] if unique_pcts[0] > 0 else None

1743 to_end = 100 - unique_pcts[-1] if unique_pcts[-1] < 100 else None

1744

1745 # Least precision that keeps percentiles unique after rounding

1746 prec = -np.floor(

1747 np.log10(np.min(np.ediff1d(unique_pcts, to_begin=to_begin, to_end=to_end)))

1748 ).astype(int)

1749 prec = max(1, prec)

1750 out = np.empty_like(percentiles, dtype=object)

1751 out[int_idx] = percentiles[int_idx].round().astype(int).astype(str)

1752

1753 out[~int_idx] = percentiles[~int_idx].round(prec).astype(str)

1754 return [i + "%" for i in out]

1755

1756

1757def is_dates_only(values: np.ndarray | DatetimeArray | Index | DatetimeIndex) -> bool:

1758 # return a boolean if we are only dates (and don't have a timezone)

1759 if not isinstance(values, Index):

1760 values = values.ravel()

1761

1762 if not isinstance(values, (DatetimeArray, DatetimeIndex)):

1763 values = DatetimeIndex(values)

1764

1765 if values.tz is not None:

1766 return False

1767

1768 values_int = values.asi8

1769 consider_values = values_int != iNaT

1770 # error: Argument 1 to "py_get_unit_from_dtype" has incompatible type

1771 # "Union[dtype[Any], ExtensionDtype]"; expected "dtype[Any]"

1772 reso = get_unit_from_dtype(values.dtype) # type: ignore[arg-type]

1773 ppd = periods_per_day(reso)

1774

1775 # TODO: can we reuse is_date_array_normalized? would need a skipna kwd

1776 even_days = np.logical_and(consider_values, values_int % ppd != 0).sum() == 0

1777 if even_days:

1778 return True

1779 return False

1780

1781

1782def _format_datetime64(x: NaTType | Timestamp, nat_rep: str = "NaT") -> str:

1783 if x is NaT:

1784 return nat_rep

1785

1786 # Timestamp.__str__ falls back to datetime.datetime.__str__ = isoformat(sep=' ')

1787 # so it already uses string formatting rather than strftime (faster).

1788 return str(x)

1789

1790

1791def _format_datetime64_dateonly(

1792 x: NaTType | Timestamp,

1793 nat_rep: str = "NaT",

1794 date_format: str | None = None,

1795) -> str:

1796 if isinstance(x, NaTType):

1797 return nat_rep

1798

1799 if date_format:

1800 return x.strftime(date_format)

1801 else:

1802 # Timestamp._date_repr relies on string formatting (faster than strftime)

1803 return x._date_repr

1804

1805

1806def get_format_datetime64(

1807 is_dates_only_: bool, nat_rep: str = "NaT", date_format: str | None = None

1808) -> Callable:

1809 """Return a formatter callable taking a datetime64 as input and providing

1810 a string as output"""

1811

1812 if is_dates_only_:

1813 return lambda x: _format_datetime64_dateonly(

1814 x, nat_rep=nat_rep, date_format=date_format

1815 )

1816 else:

1817 return lambda x: _format_datetime64(x, nat_rep=nat_rep)

1818

1819

1820def get_format_datetime64_from_values(

1821 values: np.ndarray | DatetimeArray | DatetimeIndex, date_format: str | None

1822) -> str | None:

1823 """given values and a date_format, return a string format"""

1824 if isinstance(values, np.ndarray) and values.ndim > 1:

1825 # We don't actually care about the order of values, and DatetimeIndex

1826 # only accepts 1D values

1827 values = values.ravel()

1828

1829 ido = is_dates_only(values)

1830 if ido:

1831 # Only dates and no timezone: provide a default format

1832 return date_format or "%Y-%m-%d"

1833 return date_format

1834

1835

1836class Datetime64TZFormatter(Datetime64Formatter):

1837 def _format_strings(self) -> list[str]:

1838 """we by definition have a TZ"""

1839 values = self.values.astype(object)

1840 ido = is_dates_only(values)

1841 formatter = self.formatter or get_format_datetime64(

1842 ido, date_format=self.date_format

1843 )

1844 fmt_values = [formatter(x) for x in values]

1845

1846 return fmt_values

1847

1848

1849class Timedelta64Formatter(GenericArrayFormatter):

1850 def __init__(

1851 self,

1852 values: np.ndarray | TimedeltaIndex,

1853 nat_rep: str = "NaT",

1854 box: bool = False,

1855 **kwargs,

1856 ) -> None:

1857 super().__init__(values, **kwargs)

1858 self.nat_rep = nat_rep

1859 self.box = box

1860

1861 def _format_strings(self) -> list[str]:

1862 formatter = self.formatter or get_format_timedelta64(

1863 self.values, nat_rep=self.nat_rep, box=self.box

1864 )

1865 return [formatter(x) for x in self.values]

1866

1867

1868def get_format_timedelta64(

1869 values: np.ndarray | TimedeltaIndex | TimedeltaArray,

1870 nat_rep: str | float = "NaT",

1871 box: bool = False,

1872) -> Callable:

1873 """

1874 Return a formatter function for a range of timedeltas.

1875 These will all have the same format argument

1876

1877 If box, then show the return in quotes

1878 """

1879 values_int = values.view(np.int64)

1880

1881 consider_values = values_int != iNaT

1882

1883 one_day_nanos = 86400 * 10**9

1884 # error: Unsupported operand types for % ("ExtensionArray" and "int")

1885 not_midnight = values_int % one_day_nanos != 0 # type: ignore[operator]

1886 # error: Argument 1 to "__call__" of "ufunc" has incompatible type

1887 # "Union[Any, ExtensionArray, ndarray]"; expected

1888 # "Union[Union[int, float, complex, str, bytes, generic],

1889 # Sequence[Union[int, float, complex, str, bytes, generic]],

1890 # Sequence[Sequence[Any]], _SupportsArray]"

1891 both = np.logical_and(consider_values, not_midnight) # type: ignore[arg-type]

1892 even_days = both.sum() == 0

1893

1894 if even_days:

1895 format = None

1896 else:

1897 format = "long"

1898

1899 def _formatter(x):

1900 if x is None or (is_scalar(x) and isna(x)):

1901 return nat_rep

1902

1903 if not isinstance(x, Timedelta):

1904 x = Timedelta(x)

1905

1906 # Timedelta._repr_base uses string formatting (faster than strftime)

1907 result = x._repr_base(format=format)

1908 if box:

1909 result = f"'{result}'"

1910 return result

1911

1912 return _formatter

1913

1914

1915def _make_fixed_width(

1916 strings: list[str],

1917 justify: str = "right",

1918 minimum: int | None = None,

1919 adj: TextAdjustment | None = None,

1920) -> list[str]:

1921 if len(strings) == 0 or justify == "all":

1922 return strings

1923

1924 if adj is None:

1925 adjustment = get_adjustment()

1926 else:

1927 adjustment = adj

1928

1929 max_len = max(adjustment.len(x) for x in strings)

1930

1931 if minimum is not None:

1932 max_len = max(minimum, max_len)

1933

1934 conf_max = get_option("display.max_colwidth")

1935 if conf_max is not None and max_len > conf_max:

1936 max_len = conf_max

1937

1938 def just(x: str) -> str:

1939 if conf_max is not None:

1940 if (conf_max > 3) & (adjustment.len(x) > max_len):

1941 x = x[: max_len - 3] + "..."

1942 return x

1943

1944 strings = [just(x) for x in strings]

1945 result = adjustment.justify(strings, max_len, mode=justify)

1946 return result

1947

1948

1949def _trim_zeros_complex(str_complexes: np.ndarray, decimal: str = ".") -> list[str]:

1950 """

1951 Separates the real and imaginary parts from the complex number, and

1952 executes the _trim_zeros_float method on each of those.

1953 """

1954 trimmed = [

1955 "".join(_trim_zeros_float(re.split(r"([j+-])", x), decimal))

1956 for x in str_complexes

1957 ]

1958

1959 # pad strings to the length of the longest trimmed string for alignment

1960 lengths = [len(s) for s in trimmed]

1961 max_length = max(lengths)

1962 padded = [

1963 s[: -((k - 1) // 2 + 1)] # real part

1964 + (max_length - k) // 2 * "0"

1965 + s[-((k - 1) // 2 + 1) : -((k - 1) // 2)] # + / -

1966 + s[-((k - 1) // 2) : -1] # imaginary part

1967 + (max_length - k) // 2 * "0"

1968 + s[-1]

1969 for s, k in zip(trimmed, lengths)

1970 ]

1971 return padded

1972

1973

1974def _trim_zeros_single_float(str_float: str) -> str:

1975 """

1976 Trims trailing zeros after a decimal point,

1977 leaving just one if necessary.

1978 """

1979 str_float = str_float.rstrip("0")

1980 if str_float.endswith("."):

1981 str_float += "0"

1982

1983 return str_float

1984

1985

1986def _trim_zeros_float(

1987 str_floats: np.ndarray | list[str], decimal: str = "."

1988) -> list[str]:

1989 """

1990 Trims the maximum number of trailing zeros equally from

1991 all numbers containing decimals, leaving just one if

1992 necessary.

1993 """

1994 trimmed = str_floats

1995 number_regex = re.compile(rf"^\s*[\+-]?[0-9]+\{decimal}[0-9]*$")

1996

1997 def is_number_with_decimal(x) -> bool:

1998 return re.match(number_regex, x) is not None

1999

2000 def should_trim(values: np.ndarray | list[str]) -> bool:

2001 """

2002 Determine if an array of strings should be trimmed.

2003

2004 Returns True if all numbers containing decimals (defined by the

2005 above regular expression) within the array end in a zero, otherwise

2006 returns False.

2007 """

2008 numbers = [x for x in values if is_number_with_decimal(x)]

2009 return len(numbers) > 0 and all(x.endswith("0") for x in numbers)

2010

2011 while should_trim(trimmed):

2012 trimmed = [x[:-1] if is_number_with_decimal(x) else x for x in trimmed]

2013

2014 # leave one 0 after the decimal points if need be.

2015 result = [

2016 x + "0" if is_number_with_decimal(x) and x.endswith(decimal) else x

2017 for x in trimmed

2018 ]

2019 return result

2020

2021

2022def _has_names(index: Index) -> bool:

2023 if isinstance(index, MultiIndex):

2024 return com.any_not_none(*index.names)

2025 else:

2026 return index.name is not None

2027

2028

2029class EngFormatter:

2030 """

2031 Formats float values according to engineering format.

2032

2033 Based on matplotlib.ticker.EngFormatter

2034 """

2035

2036 # The SI engineering prefixes

2037 ENG_PREFIXES = {

2038 -24: "y",

2039 -21: "z",

2040 -18: "a",

2041 -15: "f",

2042 -12: "p",

2043 -9: "n",

2044 -6: "u",

2045 -3: "m",

2046 0: "",

2047 3: "k",

2048 6: "M",

2049 9: "G",

2050 12: "T",

2051 15: "P",

2052 18: "E",

2053 21: "Z",

2054 24: "Y",

2055 }

2056

2057 def __init__(

2058 self, accuracy: int | None = None, use_eng_prefix: bool = False

2059 ) -> None:

2060 self.accuracy = accuracy

2061 self.use_eng_prefix = use_eng_prefix

2062

2063 def __call__(self, num: float) -> str:

2064 """

2065 Formats a number in engineering notation, appending a letter

2066 representing the power of 1000 of the original number. Some examples:

2067 >>> format_eng = EngFormatter(accuracy=0, use_eng_prefix=True)

2068 >>> format_eng(0)

2069 ' 0'

2070 >>> format_eng = EngFormatter(accuracy=1, use_eng_prefix=True)

2071 >>> format_eng(1_000_000)

2072 ' 1.0M'

2073 >>> format_eng = EngFormatter(accuracy=2, use_eng_prefix=False)

2074 >>> format_eng("-1e-6")

2075 '-1.00E-06'

2076

2077 @param num: the value to represent

2078 @type num: either a numeric value or a string that can be converted to

2079 a numeric value (as per decimal.Decimal constructor)

2080

2081 @return: engineering formatted string

2082 """

2083 dnum = Decimal(str(num))

2084

2085 if Decimal.is_nan(dnum):

2086 return "NaN"

2087

2088 if Decimal.is_infinite(dnum):

2089 return "inf"

2090

2091 sign = 1

2092

2093 if dnum < 0: # pragma: no cover

2094 sign = -1

2095 dnum = -dnum

2096

2097 if dnum != 0:

2098 pow10 = Decimal(int(math.floor(dnum.log10() / 3) * 3))

2099 else:

2100 pow10 = Decimal(0)

2101

2102 pow10 = pow10.min(max(self.ENG_PREFIXES.keys()))

2103 pow10 = pow10.max(min(self.ENG_PREFIXES.keys()))

2104 int_pow10 = int(pow10)

2105

2106 if self.use_eng_prefix:

2107 prefix = self.ENG_PREFIXES[int_pow10]

2108 else:

2109 if int_pow10 < 0:

2110 prefix = f"E-{-int_pow10:02d}"

2111 else:

2112 prefix = f"E+{int_pow10:02d}"

2113

2114 mant = sign * dnum / (10**pow10)

2115

2116 if self.accuracy is None: # pragma: no cover

2117 format_str = "{mant: g}{prefix}"

2118 else:

2119 format_str = f"{{mant: .{self.accuracy:d}f}}{{prefix}}"

2120

2121 formatted = format_str.format(mant=mant, prefix=prefix)

2122

2123 return formatted

2124

2125

2126def set_eng_float_format(accuracy: int = 3, use_eng_prefix: bool = False) -> None:

2127 """

2128 Format float representation in DataFrame with SI notation.

2129

2130 Parameters

2131 ----------

2132 accuracy : int, default 3

2133 Number of decimal digits after the floating point.

2134 use_eng_prefix : bool, default False

2135 Whether to represent a value with SI prefixes.

2136

2137 Returns

2138 -------

2139 None

2140

2141 Examples

2142 --------

2143 >>> df = pd.DataFrame([1e-9, 1e-3, 1, 1e3, 1e6])

2144 >>> df

2145 0

2146 0 1.000000e-09

2147 1 1.000000e-03

2148 2 1.000000e+00

2149 3 1.000000e+03

2150 4 1.000000e+06

2151

2152 >>> pd.set_eng_float_format(accuracy=1)

2153 >>> df

2154 0

2155 0 1.0E-09

2156 1 1.0E-03

2157 2 1.0E+00

2158 3 1.0E+03

2159 4 1.0E+06

2160

2161 >>> pd.set_eng_float_format(use_eng_prefix=True)

2162 >>> df

2163 0

2164 0 1.000n

2165 1 1.000m

2166 2 1.000

2167 3 1.000k

2168 4 1.000M

2169

2170 >>> pd.set_eng_float_format(accuracy=1, use_eng_prefix=True)

2171 >>> df

2172 0

2173 0 1.0n

2174 1 1.0m

2175 2 1.0

2176 3 1.0k

2177 4 1.0M

2178

2179 >>> pd.set_option("display.float_format", None) # unset option

2180 """

2181 set_option("display.float_format", EngFormatter(accuracy, use_eng_prefix))

2182

2183

2184def get_level_lengths(

2185 levels: Any, sentinel: bool | object | str = ""

2186) -> list[dict[int, int]]:

2187 """

2188 For each index in each level the function returns lengths of indexes.

2189

2190 Parameters

2191 ----------

2192 levels : list of lists

2193 List of values on for level.

2194 sentinel : string, optional

2195 Value which states that no new index starts on there.

2196

2197 Returns

2198 -------

2199 Returns list of maps. For each level returns map of indexes (key is index

2200 in row and value is length of index).

2201 """

2202 if len(levels) == 0:

2203 return []

2204

2205 control = [True] * len(levels[0])

2206

2207 result = []

2208 for level in levels:

2209 last_index = 0

2210

2211 lengths = {}

2212 for i, key in enumerate(level):

2213 if control[i] and key == sentinel:

2214 pass

2215 else:

2216 control[i] = False

2217 lengths[last_index] = i - last_index

2218 last_index = i

2219

2220 lengths[last_index] = len(level) - last_index

2221

2222 result.append(lengths)

2223

2224 return result

2225

2226

2227def buffer_put_lines(buf: WriteBuffer[str], lines: list[str]) -> None:

2228 """

2229 Appends lines to a buffer.

2230

2231 Parameters

2232 ----------

2233 buf

2234 The buffer to write to

2235 lines

2236 The lines to append.

2237 """

2238 if any(isinstance(x, str) for x in lines):

2239 lines = [str(x) for x in lines]

2240 buf.write("\n".join(lines))