Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/io/formats/format.py: 25%

1"""

2Internal module for formatting output data in csv, html, xml,

3and latex files. This module also applies to display formatting.

4"""

5from __future__ import annotations

7from collections.abc import (

8 Generator,

9 Hashable,

10 Mapping,

11 Sequence,

12)

13from contextlib import contextmanager

14from csv import QUOTE_NONE

15from decimal import Decimal

16from functools import partial

17from io import StringIO

18import math

19import re

20from shutil import get_terminal_size

21from typing import (

22 TYPE_CHECKING,

23 Any,

24 Callable,

25 Final,

26 cast,

27)

29import numpy as np

31from pandas._config.config import (

32 get_option,

33 set_option,

34)

36from pandas._libs import lib

37from pandas._libs.missing import NA

38from pandas._libs.tslibs import (

39 NaT,

40 Timedelta,

41 Timestamp,

42)

43from pandas._libs.tslibs.nattype import NaTType

45from pandas.core.dtypes.common import (

46 is_complex_dtype,

47 is_float,

48 is_integer,

49 is_list_like,

50 is_numeric_dtype,

51 is_scalar,

52)

53from pandas.core.dtypes.dtypes import (

54 CategoricalDtype,

55 DatetimeTZDtype,

56 ExtensionDtype,

57)

58from pandas.core.dtypes.missing import (

59 isna,

60 notna,

61)

63from pandas.core.arrays import (

64 Categorical,

65 DatetimeArray,

66 ExtensionArray,

67 TimedeltaArray,

68)

69from pandas.core.arrays.string_ import StringDtype

70from pandas.core.base import PandasObject

71import pandas.core.common as com

72from pandas.core.indexes.api import (

73 Index,

74 MultiIndex,

75 PeriodIndex,

76 ensure_index,

77)

78from pandas.core.indexes.datetimes import DatetimeIndex

79from pandas.core.indexes.timedeltas import TimedeltaIndex

80from pandas.core.reshape.concat import concat

82from pandas.io.common import (

83 check_parent_directory,

84 stringify_path,

85)

86from pandas.io.formats import printing

88if TYPE_CHECKING:

89 from pandas._typing import (

90 ArrayLike,

91 Axes,

92 ColspaceArgType,

93 ColspaceType,

94 CompressionOptions,

95 FilePath,

96 FloatFormatType,

97 FormattersType,

98 IndexLabel,

99 SequenceNotStr,

100 StorageOptions,

101 WriteBuffer,

102 )

103

104 from pandas import (

105 DataFrame,

106 Series,

107 )

108

109

110common_docstring: Final = """

111 Parameters

112 ----------

113 buf : str, Path or StringIO-like, optional, default None

114 Buffer to write to. If None, the output is returned as a string.

115 columns : array-like, optional, default None

116 The subset of columns to write. Writes all columns by default.

117 col_space : %(col_space_type)s, optional

118 %(col_space)s.

119 header : %(header_type)s, optional

120 %(header)s.

121 index : bool, optional, default True

122 Whether to print index (row) labels.

123 na_rep : str, optional, default 'NaN'

124 String representation of ``NaN`` to use.

125 formatters : list, tuple or dict of one-param. functions, optional

126 Formatter functions to apply to columns' elements by position or

127 name.

128 The result of each function must be a unicode string.

129 List/tuple must be of length equal to the number of columns.

130 float_format : one-parameter function, optional, default None

131 Formatter function to apply to columns' elements if they are

132 floats. This function must return a unicode string and will be

133 applied only to the non-``NaN`` elements, with ``NaN`` being

134 handled by ``na_rep``.

135 sparsify : bool, optional, default True

136 Set to False for a DataFrame with a hierarchical index to print

137 every multiindex key at each row.

138 index_names : bool, optional, default True

139 Prints the names of the indexes.

140 justify : str, default None

141 How to justify the column labels. If None uses the option from

142 the print configuration (controlled by set_option), 'right' out

143 of the box. Valid values are

144

145 * left

146 * right

147 * center

148 * justify

149 * justify-all

150 * start

151 * end

152 * inherit

153 * match-parent

154 * initial

155 * unset.

156 max_rows : int, optional

157 Maximum number of rows to display in the console.

158 max_cols : int, optional

159 Maximum number of columns to display in the console.

160 show_dimensions : bool, default False

161 Display DataFrame dimensions (number of rows by number of columns).

162 decimal : str, default '.'

163 Character recognized as decimal separator, e.g. ',' in Europe.

164 """

165

166VALID_JUSTIFY_PARAMETERS = (

167 "left",

168 "right",

169 "center",

170 "justify",

171 "justify-all",

172 "start",

173 "end",

174 "inherit",

175 "match-parent",

176 "initial",

177 "unset",

178)

179

180return_docstring: Final = """

181 Returns

182 -------

183 str or None

184 If buf is None, returns the result as a string. Otherwise returns

185 None.

186 """

187

188

189class SeriesFormatter:

190 """

191 Implement the main logic of Series.to_string, which underlies

192 Series.__repr__.

193 """

194

195 def __init__(

196 self,

197 series: Series,

198 *,

199 length: bool | str = True,

200 header: bool = True,

201 index: bool = True,

202 na_rep: str = "NaN",

203 name: bool = False,

204 float_format: str | None = None,

205 dtype: bool = True,

206 max_rows: int | None = None,

207 min_rows: int | None = None,

208 ) -> None:

209 self.series = series

210 self.buf = StringIO()

211 self.name = name

212 self.na_rep = na_rep

213 self.header = header

214 self.length = length

215 self.index = index

216 self.max_rows = max_rows

217 self.min_rows = min_rows

218

219 if float_format is None:

220 float_format = get_option("display.float_format")

221 self.float_format = float_format

222 self.dtype = dtype

223 self.adj = printing.get_adjustment()

224

225 self._chk_truncate()

226

227 def _chk_truncate(self) -> None:

228 self.tr_row_num: int | None

229

230 min_rows = self.min_rows

231 max_rows = self.max_rows

232 # truncation determined by max_rows, actual truncated number of rows

233 # used below by min_rows

234 is_truncated_vertically = max_rows and (len(self.series) > max_rows)

235 series = self.series

236 if is_truncated_vertically:

237 max_rows = cast(int, max_rows)

238 if min_rows:

239 # if min_rows is set (not None or 0), set max_rows to minimum

240 # of both

241 max_rows = min(min_rows, max_rows)

242 if max_rows == 1:

243 row_num = max_rows

244 series = series.iloc[:max_rows]

245 else:

246 row_num = max_rows // 2

247 series = concat((series.iloc[:row_num], series.iloc[-row_num:]))

248 self.tr_row_num = row_num

249 else:

250 self.tr_row_num = None

251 self.tr_series = series

252 self.is_truncated_vertically = is_truncated_vertically

253

254 def _get_footer(self) -> str:

255 name = self.series.name

256 footer = ""

257

258 index = self.series.index

259 if (

260 isinstance(index, (DatetimeIndex, PeriodIndex, TimedeltaIndex))

261 and index.freq is not None

262 ):

263 footer += f"Freq: {index.freqstr}"

264

265 if self.name is not False and name is not None:

266 if footer:

267 footer += ", "

268

269 series_name = printing.pprint_thing(name, escape_chars=("\t", "\r", "\n"))

270 footer += f"Name: {series_name}"

271

272 if self.length is True or (

273 self.length == "truncate" and self.is_truncated_vertically

274 ):

275 if footer:

276 footer += ", "

277 footer += f"Length: {len(self.series)}"

278

279 if self.dtype is not False and self.dtype is not None:

280 dtype_name = getattr(self.tr_series.dtype, "name", None)

281 if dtype_name:

282 if footer:

283 footer += ", "

284 footer += f"dtype: {printing.pprint_thing(dtype_name)}"

285

286 # level infos are added to the end and in a new line, like it is done

287 # for Categoricals

288 if isinstance(self.tr_series.dtype, CategoricalDtype):

289 level_info = self.tr_series._values._get_repr_footer()

290 if footer:

291 footer += "\n"

292 footer += level_info

293

294 return str(footer)

295

296 def _get_formatted_values(self) -> list[str]:

297 return format_array(

298 self.tr_series._values,

299 None,

300 float_format=self.float_format,

301 na_rep=self.na_rep,

302 leading_space=self.index,

303 )

304

305 def to_string(self) -> str:

306 series = self.tr_series

307 footer = self._get_footer()

308

309 if len(series) == 0:

310 return f"{type(self.series).__name__}([], {footer})"

311

312 index = series.index

313 have_header = _has_names(index)

314 if isinstance(index, MultiIndex):

315 fmt_index = index._format_multi(include_names=True, sparsify=None)

316 adj = printing.get_adjustment()

317 fmt_index = adj.adjoin(2, *fmt_index).split("\n")

318 else:

319 fmt_index = index._format_flat(include_name=True)

320 fmt_values = self._get_formatted_values()

321

322 if self.is_truncated_vertically:

323 n_header_rows = 0

324 row_num = self.tr_row_num

325 row_num = cast(int, row_num)

326 width = self.adj.len(fmt_values[row_num - 1])

327 if width > 3:

328 dot_str = "..."

329 else:

330 dot_str = ".."

331 # Series uses mode=center because it has single value columns

332 # DataFrame uses mode=left

333 dot_str = self.adj.justify([dot_str], width, mode="center")[0]

334 fmt_values.insert(row_num + n_header_rows, dot_str)

335 fmt_index.insert(row_num + 1, "")

336

337 if self.index:

338 result = self.adj.adjoin(3, *[fmt_index[1:], fmt_values])

339 else:

340 result = self.adj.adjoin(3, fmt_values)

341

342 if self.header and have_header:

343 result = fmt_index[0] + "\n" + result

344

345 if footer:

346 result += "\n" + footer

347

348 return str("".join(result))

349

350

351def get_dataframe_repr_params() -> dict[str, Any]:

352 """Get the parameters used to repr(dataFrame) calls using DataFrame.to_string.

353

354 Supplying these parameters to DataFrame.to_string is equivalent to calling

355 ``repr(DataFrame)``. This is useful if you want to adjust the repr output.

356

357 .. versionadded:: 1.4.0

358

359 Example

360 -------

361 >>> import pandas as pd

362 >>>

363 >>> df = pd.DataFrame([[1, 2], [3, 4]])

364 >>> repr_params = pd.io.formats.format.get_dataframe_repr_params()

365 >>> repr(df) == df.to_string(**repr_params)

366 True

367 """

368 from pandas.io.formats import console

369

370 if get_option("display.expand_frame_repr"):

371 line_width, _ = console.get_console_size()

372 else:

373 line_width = None

374 return {

375 "max_rows": get_option("display.max_rows"),

376 "min_rows": get_option("display.min_rows"),

377 "max_cols": get_option("display.max_columns"),

378 "max_colwidth": get_option("display.max_colwidth"),

379 "show_dimensions": get_option("display.show_dimensions"),

380 "line_width": line_width,

381 }

382

383

384def get_series_repr_params() -> dict[str, Any]:

385 """Get the parameters used to repr(Series) calls using Series.to_string.

386

387 Supplying these parameters to Series.to_string is equivalent to calling

388 ``repr(series)``. This is useful if you want to adjust the series repr output.

389

390 .. versionadded:: 1.4.0

391

392 Example

393 -------

394 >>> import pandas as pd

395 >>>

396 >>> ser = pd.Series([1, 2, 3, 4])

397 >>> repr_params = pd.io.formats.format.get_series_repr_params()

398 >>> repr(ser) == ser.to_string(**repr_params)

399 True

400 """

401 width, height = get_terminal_size()

402 max_rows_opt = get_option("display.max_rows")

403 max_rows = height if max_rows_opt == 0 else max_rows_opt

404 min_rows = height if max_rows_opt == 0 else get_option("display.min_rows")

405

406 return {

407 "name": True,

408 "dtype": True,

409 "min_rows": min_rows,

410 "max_rows": max_rows,

411 "length": get_option("display.show_dimensions"),

412 }

413

414

415class DataFrameFormatter:

416 """

417 Class for processing dataframe formatting options and data.

418

419 Used by DataFrame.to_string, which backs DataFrame.__repr__.

420 """

421

422 __doc__ = __doc__ if __doc__ else ""

423 __doc__ += common_docstring + return_docstring

424

425 def __init__(

426 self,

427 frame: DataFrame,

428 columns: Axes | None = None,

429 col_space: ColspaceArgType | None = None,

430 header: bool | SequenceNotStr[str] = True,

431 index: bool = True,

432 na_rep: str = "NaN",

433 formatters: FormattersType | None = None,

434 justify: str | None = None,

435 float_format: FloatFormatType | None = None,

436 sparsify: bool | None = None,

437 index_names: bool = True,

438 max_rows: int | None = None,

439 min_rows: int | None = None,

440 max_cols: int | None = None,

441 show_dimensions: bool | str = False,

442 decimal: str = ".",

443 bold_rows: bool = False,

444 escape: bool = True,

445 ) -> None:

446 self.frame = frame

447 self.columns = self._initialize_columns(columns)

448 self.col_space = self._initialize_colspace(col_space)

449 self.header = header

450 self.index = index

451 self.na_rep = na_rep

452 self.formatters = self._initialize_formatters(formatters)

453 self.justify = self._initialize_justify(justify)

454 self.float_format = float_format

455 self.sparsify = self._initialize_sparsify(sparsify)

456 self.show_index_names = index_names

457 self.decimal = decimal

458 self.bold_rows = bold_rows

459 self.escape = escape

460 self.max_rows = max_rows

461 self.min_rows = min_rows

462 self.max_cols = max_cols

463 self.show_dimensions = show_dimensions

464

465 self.max_cols_fitted = self._calc_max_cols_fitted()

466 self.max_rows_fitted = self._calc_max_rows_fitted()

467

468 self.tr_frame = self.frame

469 self.truncate()

470 self.adj = printing.get_adjustment()

471

472 def get_strcols(self) -> list[list[str]]:

473 """

474 Render a DataFrame to a list of columns (as lists of strings).

475 """

476 strcols = self._get_strcols_without_index()

477

478 if self.index:

479 str_index = self._get_formatted_index(self.tr_frame)

480 strcols.insert(0, str_index)

481

482 return strcols

483

484 @property

485 def should_show_dimensions(self) -> bool:

486 return self.show_dimensions is True or (

487 self.show_dimensions == "truncate" and self.is_truncated

488 )

489

490 @property

491 def is_truncated(self) -> bool:

492 return bool(self.is_truncated_horizontally or self.is_truncated_vertically)

493

494 @property

495 def is_truncated_horizontally(self) -> bool:

496 return bool(self.max_cols_fitted and (len(self.columns) > self.max_cols_fitted))

497

498 @property

499 def is_truncated_vertically(self) -> bool:

500 return bool(self.max_rows_fitted and (len(self.frame) > self.max_rows_fitted))

501

502 @property

503 def dimensions_info(self) -> str:

504 return f"\n\n[{len(self.frame)} rows x {len(self.frame.columns)} columns]"

505

506 @property

507 def has_index_names(self) -> bool:

508 return _has_names(self.frame.index)

509

510 @property

511 def has_column_names(self) -> bool:

512 return _has_names(self.frame.columns)

513

514 @property

515 def show_row_idx_names(self) -> bool:

516 return all((self.has_index_names, self.index, self.show_index_names))

517

518 @property

519 def show_col_idx_names(self) -> bool:

520 return all((self.has_column_names, self.show_index_names, self.header))

521

522 @property

523 def max_rows_displayed(self) -> int:

524 return min(self.max_rows or len(self.frame), len(self.frame))

525

526 def _initialize_sparsify(self, sparsify: bool | None) -> bool:

527 if sparsify is None:

528 return get_option("display.multi_sparse")

529 return sparsify

530

531 def _initialize_formatters(

532 self, formatters: FormattersType | None

533 ) -> FormattersType:

534 if formatters is None:

535 return {}

536 elif len(self.frame.columns) == len(formatters) or isinstance(formatters, dict):

537 return formatters

538 else:

539 raise ValueError(

540 f"Formatters length({len(formatters)}) should match "

541 f"DataFrame number of columns({len(self.frame.columns)})"

542 )

543

544 def _initialize_justify(self, justify: str | None) -> str:

545 if justify is None:

546 return get_option("display.colheader_justify")

547 else:

548 return justify

549

550 def _initialize_columns(self, columns: Axes | None) -> Index:

551 if columns is not None:

552 cols = ensure_index(columns)

553 self.frame = self.frame[cols]

554 return cols

555 else:

556 return self.frame.columns

557

558 def _initialize_colspace(self, col_space: ColspaceArgType | None) -> ColspaceType:

559 result: ColspaceType

560

561 if col_space is None:

562 result = {}

563 elif isinstance(col_space, (int, str)):

564 result = {"": col_space}

565 result.update({column: col_space for column in self.frame.columns})

566 elif isinstance(col_space, Mapping):

567 for column in col_space.keys():

568 if column not in self.frame.columns and column != "":

569 raise ValueError(

570 f"Col_space is defined for an unknown column: {column}"

571 )

572 result = col_space

573 else:

574 if len(self.frame.columns) != len(col_space):

575 raise ValueError(

576 f"Col_space length({len(col_space)}) should match "

577 f"DataFrame number of columns({len(self.frame.columns)})"

578 )

579 result = dict(zip(self.frame.columns, col_space))

580 return result

581

582 def _calc_max_cols_fitted(self) -> int | None:

583 """Number of columns fitting the screen."""

584 if not self._is_in_terminal():

585 return self.max_cols

586

587 width, _ = get_terminal_size()

588 if self._is_screen_narrow(width):

589 return width

590 else:

591 return self.max_cols

592

593 def _calc_max_rows_fitted(self) -> int | None:

594 """Number of rows with data fitting the screen."""

595 max_rows: int | None

596

597 if self._is_in_terminal():

598 _, height = get_terminal_size()

599 if self.max_rows == 0:

600 # rows available to fill with actual data

601 return height - self._get_number_of_auxiliary_rows()

602

603 if self._is_screen_short(height):

604 max_rows = height

605 else:

606 max_rows = self.max_rows

607 else:

608 max_rows = self.max_rows

609

610 return self._adjust_max_rows(max_rows)

611

612 def _adjust_max_rows(self, max_rows: int | None) -> int | None:

613 """Adjust max_rows using display logic.

614

615 See description here:

616 https://pandas.pydata.org/docs/dev/user_guide/options.html#frequently-used-options

617

618 GH #37359

619 """

620 if max_rows:

621 if (len(self.frame) > max_rows) and self.min_rows:

622 # if truncated, set max_rows showed to min_rows

623 max_rows = min(self.min_rows, max_rows)

624 return max_rows

625

626 def _is_in_terminal(self) -> bool:

627 """Check if the output is to be shown in terminal."""

628 return bool(self.max_cols == 0 or self.max_rows == 0)

629

630 def _is_screen_narrow(self, max_width) -> bool:

631 return bool(self.max_cols == 0 and len(self.frame.columns) > max_width)

632

633 def _is_screen_short(self, max_height) -> bool:

634 return bool(self.max_rows == 0 and len(self.frame) > max_height)

635

636 def _get_number_of_auxiliary_rows(self) -> int:

637 """Get number of rows occupied by prompt, dots and dimension info."""

638 dot_row = 1

639 prompt_row = 1

640 num_rows = dot_row + prompt_row

641

642 if self.show_dimensions:

643 num_rows += len(self.dimensions_info.splitlines())

644

645 if self.header:

646 num_rows += 1

647

648 return num_rows

649

650 def truncate(self) -> None:

651 """

652 Check whether the frame should be truncated. If so, slice the frame up.

653 """

654 if self.is_truncated_horizontally:

655 self._truncate_horizontally()

656

657 if self.is_truncated_vertically:

658 self._truncate_vertically()

659

660 def _truncate_horizontally(self) -> None:

661 """Remove columns, which are not to be displayed and adjust formatters.

662

663 Attributes affected:

664 - tr_frame

665 - formatters

666 - tr_col_num

667 """

668 assert self.max_cols_fitted is not None

669 col_num = self.max_cols_fitted // 2

670 if col_num >= 1:

671 left = self.tr_frame.iloc[:, :col_num]

672 right = self.tr_frame.iloc[:, -col_num:]

673 self.tr_frame = concat((left, right), axis=1)

674

675 # truncate formatter

676 if isinstance(self.formatters, (list, tuple)):

677 self.formatters = [

678 *self.formatters[:col_num],

679 *self.formatters[-col_num:],

680 ]

681 else:

682 col_num = cast(int, self.max_cols)

683 self.tr_frame = self.tr_frame.iloc[:, :col_num]

684 self.tr_col_num = col_num

685

686 def _truncate_vertically(self) -> None:

687 """Remove rows, which are not to be displayed.

688

689 Attributes affected:

690 - tr_frame

691 - tr_row_num

692 """

693 assert self.max_rows_fitted is not None

694 row_num = self.max_rows_fitted // 2

695 if row_num >= 1:

696 _len = len(self.tr_frame)

697 _slice = np.hstack([np.arange(row_num), np.arange(_len - row_num, _len)])

698 self.tr_frame = self.tr_frame.iloc[_slice]

699 else:

700 row_num = cast(int, self.max_rows)

701 self.tr_frame = self.tr_frame.iloc[:row_num, :]

702 self.tr_row_num = row_num

703

704 def _get_strcols_without_index(self) -> list[list[str]]:

705 strcols: list[list[str]] = []

706

707 if not is_list_like(self.header) and not self.header:

708 for i, c in enumerate(self.tr_frame):

709 fmt_values = self.format_col(i)

710 fmt_values = _make_fixed_width(

711 strings=fmt_values,

712 justify=self.justify,

713 minimum=int(self.col_space.get(c, 0)),

714 adj=self.adj,

715 )

716 strcols.append(fmt_values)

717 return strcols

718

719 if is_list_like(self.header):

720 # cast here since can't be bool if is_list_like

721 self.header = cast(list[str], self.header)

722 if len(self.header) != len(self.columns):

723 raise ValueError(

724 f"Writing {len(self.columns)} cols "

725 f"but got {len(self.header)} aliases"

726 )

727 str_columns = [[label] for label in self.header]

728 else:

729 str_columns = self._get_formatted_column_labels(self.tr_frame)

730

731 if self.show_row_idx_names:

732 for x in str_columns:

733 x.append("")

734

735 for i, c in enumerate(self.tr_frame):

736 cheader = str_columns[i]

737 header_colwidth = max(

738 int(self.col_space.get(c, 0)), *(self.adj.len(x) for x in cheader)

739 )

740 fmt_values = self.format_col(i)

741 fmt_values = _make_fixed_width(

742 fmt_values, self.justify, minimum=header_colwidth, adj=self.adj

743 )

744

745 max_len = max(*(self.adj.len(x) for x in fmt_values), header_colwidth)

746 cheader = self.adj.justify(cheader, max_len, mode=self.justify)

747 strcols.append(cheader + fmt_values)

748

749 return strcols

750

751 def format_col(self, i: int) -> list[str]:

752 frame = self.tr_frame

753 formatter = self._get_formatter(i)

754 return format_array(

755 frame.iloc[:, i]._values,

756 formatter,

757 float_format=self.float_format,

758 na_rep=self.na_rep,

759 space=self.col_space.get(frame.columns[i]),

760 decimal=self.decimal,

761 leading_space=self.index,

762 )

763

764 def _get_formatter(self, i: str | int) -> Callable | None:

765 if isinstance(self.formatters, (list, tuple)):

766 if is_integer(i):

767 i = cast(int, i)

768 return self.formatters[i]

769 else:

770 return None

771 else:

772 if is_integer(i) and i not in self.columns:

773 i = self.columns[i]

774 return self.formatters.get(i, None)

775

776 def _get_formatted_column_labels(self, frame: DataFrame) -> list[list[str]]:

777 from pandas.core.indexes.multi import sparsify_labels

778

779 columns = frame.columns

780

781 if isinstance(columns, MultiIndex):

782 fmt_columns = columns._format_multi(sparsify=False, include_names=False)

783 fmt_columns = list(zip(*fmt_columns))

784 dtypes = self.frame.dtypes._values

785

786 # if we have a Float level, they don't use leading space at all

787 restrict_formatting = any(level.is_floating for level in columns.levels)

788 need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))

789

790 def space_format(x, y):

791 if (

792 y not in self.formatters

793 and need_leadsp[x]

794 and not restrict_formatting

795 ):

796 return " " + y

797 return y

798

799 str_columns_tuple = list(

800 zip(*([space_format(x, y) for y in x] for x in fmt_columns))

801 )

802 if self.sparsify and len(str_columns_tuple):

803 str_columns_tuple = sparsify_labels(str_columns_tuple)

804

805 str_columns = [list(x) for x in zip(*str_columns_tuple)]

806 else:

807 fmt_columns = columns._format_flat(include_name=False)

808 dtypes = self.frame.dtypes

809 need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))

810 str_columns = [

811 [" " + x if not self._get_formatter(i) and need_leadsp[x] else x]

812 for i, x in enumerate(fmt_columns)

813 ]

814 # self.str_columns = str_columns

815 return str_columns

816

817 def _get_formatted_index(self, frame: DataFrame) -> list[str]:

818 # Note: this is only used by to_string() and to_latex(), not by

819 # to_html(). so safe to cast col_space here.

820 col_space = {k: cast(int, v) for k, v in self.col_space.items()}

821 index = frame.index

822 columns = frame.columns

823 fmt = self._get_formatter("__index__")

824

825 if isinstance(index, MultiIndex):

826 fmt_index = index._format_multi(

827 sparsify=self.sparsify,

828 include_names=self.show_row_idx_names,

829 formatter=fmt,

830 )

831 else:

832 fmt_index = [

833 index._format_flat(include_name=self.show_row_idx_names, formatter=fmt)

834 ]

835

836 fmt_index = [

837 tuple(

838 _make_fixed_width(

839 list(x), justify="left", minimum=col_space.get("", 0), adj=self.adj

840 )

841 )

842 for x in fmt_index

843 ]

844

845 adjoined = self.adj.adjoin(1, *fmt_index).split("\n")

846

847 # empty space for columns

848 if self.show_col_idx_names:

849 col_header = [str(x) for x in self._get_column_name_list()]

850 else:

851 col_header = [""] * columns.nlevels

852

853 if self.header:

854 return col_header + adjoined

855 else:

856 return adjoined

857

858 def _get_column_name_list(self) -> list[Hashable]:

859 names: list[Hashable] = []

860 columns = self.frame.columns

861 if isinstance(columns, MultiIndex):

862 names.extend("" if name is None else name for name in columns.names)

863 else:

864 names.append("" if columns.name is None else columns.name)

865 return names

866

867

868class DataFrameRenderer:

869 """Class for creating dataframe output in multiple formats.

870

871 Called in pandas.core.generic.NDFrame:

872 - to_csv

873 - to_latex

874

875 Called in pandas.core.frame.DataFrame:

876 - to_html

877 - to_string

878

879 Parameters

880 ----------

881 fmt : DataFrameFormatter

882 Formatter with the formatting options.

883 """

884

885 def __init__(self, fmt: DataFrameFormatter) -> None:

886 self.fmt = fmt

887

888 def to_html(

889 self,

890 buf: FilePath | WriteBuffer[str] | None = None,

891 encoding: str | None = None,

892 classes: str | list | tuple | None = None,

893 notebook: bool = False,

894 border: int | bool | None = None,

895 table_id: str | None = None,

896 render_links: bool = False,

897 ) -> str | None:

898 """

899 Render a DataFrame to a html table.

900

901 Parameters

902 ----------

903 buf : str, path object, file-like object, or None, default None

904 String, path object (implementing ``os.PathLike[str]``), or file-like

905 object implementing a string ``write()`` function. If None, the result is

906 returned as a string.

907 encoding : str, default “utf-8”

908 Set character encoding.

909 classes : str or list-like

910 classes to include in the `class` attribute of the opening

911 ``<table>`` tag, in addition to the default "dataframe".

912 notebook : {True, False}, optional, default False

913 Whether the generated HTML is for IPython Notebook.

914 border : int

915 A ``border=border`` attribute is included in the opening

916 ``<table>`` tag. Default ``pd.options.display.html.border``.

917 table_id : str, optional

918 A css id is included in the opening `<table>` tag if specified.

919 render_links : bool, default False

920 Convert URLs to HTML links.

921 """

922 from pandas.io.formats.html import (

923 HTMLFormatter,

924 NotebookFormatter,

925 )

926

927 Klass = NotebookFormatter if notebook else HTMLFormatter

928

929 html_formatter = Klass(

930 self.fmt,

931 classes=classes,

932 border=border,

933 table_id=table_id,

934 render_links=render_links,

935 )

936 string = html_formatter.to_string()

937 return save_to_buffer(string, buf=buf, encoding=encoding)

938

939 def to_string(

940 self,

941 buf: FilePath | WriteBuffer[str] | None = None,

942 encoding: str | None = None,

943 line_width: int | None = None,

944 ) -> str | None:

945 """

946 Render a DataFrame to a console-friendly tabular output.

947

948 Parameters

949 ----------

950 buf : str, path object, file-like object, or None, default None

951 String, path object (implementing ``os.PathLike[str]``), or file-like

952 object implementing a string ``write()`` function. If None, the result is

953 returned as a string.

954 encoding: str, default “utf-8”

955 Set character encoding.

956 line_width : int, optional

957 Width to wrap a line in characters.

958 """

959 from pandas.io.formats.string import StringFormatter

960

961 string_formatter = StringFormatter(self.fmt, line_width=line_width)

962 string = string_formatter.to_string()

963 return save_to_buffer(string, buf=buf, encoding=encoding)

964

965 def to_csv(

966 self,

967 path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None,

968 encoding: str | None = None,

969 sep: str = ",",

970 columns: Sequence[Hashable] | None = None,

971 index_label: IndexLabel | None = None,

972 mode: str = "w",

973 compression: CompressionOptions = "infer",

974 quoting: int | None = None,

975 quotechar: str = '"',

976 lineterminator: str | None = None,

977 chunksize: int | None = None,

978 date_format: str | None = None,

979 doublequote: bool = True,

980 escapechar: str | None = None,

981 errors: str = "strict",

982 storage_options: StorageOptions | None = None,

983 ) -> str | None:

984 """

985 Render dataframe as comma-separated file.

986 """

987 from pandas.io.formats.csvs import CSVFormatter

988

989 if path_or_buf is None:

990 created_buffer = True

991 path_or_buf = StringIO()

992 else:

993 created_buffer = False

994

995 csv_formatter = CSVFormatter(

996 path_or_buf=path_or_buf,

997 lineterminator=lineterminator,

998 sep=sep,

999 encoding=encoding,

1000 errors=errors,

1001 compression=compression,

1002 quoting=quoting,

1003 cols=columns,

1004 index_label=index_label,

1005 mode=mode,

1006 chunksize=chunksize,

1007 quotechar=quotechar,

1008 date_format=date_format,

1009 doublequote=doublequote,

1010 escapechar=escapechar,

1011 storage_options=storage_options,

1012 formatter=self.fmt,

1013 )

1014 csv_formatter.save()

1015

1016 if created_buffer:

1017 assert isinstance(path_or_buf, StringIO)

1018 content = path_or_buf.getvalue()

1019 path_or_buf.close()

1020 return content

1021

1022 return None

1023

1024

1025def save_to_buffer(

1026 string: str,

1027 buf: FilePath | WriteBuffer[str] | None = None,

1028 encoding: str | None = None,

1029) -> str | None:

1030 """

1031 Perform serialization. Write to buf or return as string if buf is None.

1032 """

1033 with _get_buffer(buf, encoding=encoding) as fd:

1034 fd.write(string)

1035 if buf is None:

1036 # error: "WriteBuffer[str]" has no attribute "getvalue"

1037 return fd.getvalue() # type: ignore[attr-defined]

1038 return None

1039

1040

1041@contextmanager

1042def _get_buffer(

1043 buf: FilePath | WriteBuffer[str] | None, encoding: str | None = None

1044) -> Generator[WriteBuffer[str], None, None] | Generator[StringIO, None, None]:

1045 """

1046 Context manager to open, yield and close buffer for filenames or Path-like

1047 objects, otherwise yield buf unchanged.

1048 """

1049 if buf is not None:

1050 buf = stringify_path(buf)

1051 else:

1052 buf = StringIO()

1053

1054 if encoding is None:

1055 encoding = "utf-8"

1056 elif not isinstance(buf, str):

1057 raise ValueError("buf is not a file name and encoding is specified.")

1058

1059 if hasattr(buf, "write"):

1060 # Incompatible types in "yield" (actual type "Union[str, WriteBuffer[str],

1061 # StringIO]", expected type "Union[WriteBuffer[str], StringIO]")

1062 yield buf # type: ignore[misc]

1063 elif isinstance(buf, str):

1064 check_parent_directory(str(buf))

1065 with open(buf, "w", encoding=encoding, newline="") as f:

1066 # GH#30034 open instead of codecs.open prevents a file leak

1067 # if we have an invalid encoding argument.

1068 # newline="" is needed to roundtrip correctly on

1069 # windows test_to_latex_filename

1070 yield f

1071 else:

1072 raise TypeError("buf is not a file name and it has no write method")

1073

1074

1075# ----------------------------------------------------------------------

1076# Array formatters

1077

1078

1079def format_array(

1080 values: ArrayLike,

1081 formatter: Callable | None,

1082 float_format: FloatFormatType | None = None,

1083 na_rep: str = "NaN",

1084 digits: int | None = None,

1085 space: str | int | None = None,

1086 justify: str = "right",

1087 decimal: str = ".",

1088 leading_space: bool | None = True,

1089 quoting: int | None = None,

1090 fallback_formatter: Callable | None = None,

1091) -> list[str]:

1092 """

1093 Format an array for printing.

1094

1095 Parameters

1096 ----------

1097 values : np.ndarray or ExtensionArray

1098 formatter

1099 float_format

1100 na_rep

1101 digits

1102 space

1103 justify

1104 decimal

1105 leading_space : bool, optional, default True

1106 Whether the array should be formatted with a leading space.

1107 When an array as a column of a Series or DataFrame, we do want

1108 the leading space to pad between columns.

1109

1110 When formatting an Index subclass

1111 (e.g. IntervalIndex._get_values_for_csv), we don't want the

1112 leading space since it should be left-aligned.

1113 fallback_formatter

1114

1115 Returns

1116 -------

1117 List[str]

1118 """

1119 fmt_klass: type[_GenericArrayFormatter]

1120 if lib.is_np_dtype(values.dtype, "M"):

1121 fmt_klass = _Datetime64Formatter

1122 values = cast(DatetimeArray, values)

1123 elif isinstance(values.dtype, DatetimeTZDtype):

1124 fmt_klass = _Datetime64TZFormatter

1125 values = cast(DatetimeArray, values)

1126 elif lib.is_np_dtype(values.dtype, "m"):

1127 fmt_klass = _Timedelta64Formatter

1128 values = cast(TimedeltaArray, values)

1129 elif isinstance(values.dtype, ExtensionDtype):

1130 fmt_klass = _ExtensionArrayFormatter

1131 elif lib.is_np_dtype(values.dtype, "fc"):

1132 fmt_klass = FloatArrayFormatter

1133 elif lib.is_np_dtype(values.dtype, "iu"):

1134 fmt_klass = _IntArrayFormatter

1135 else:

1136 fmt_klass = _GenericArrayFormatter

1137

1138 if space is None:

1139 space = 12

1140

1141 if float_format is None:

1142 float_format = get_option("display.float_format")

1143

1144 if digits is None:

1145 digits = get_option("display.precision")

1146

1147 fmt_obj = fmt_klass(

1148 values,

1149 digits=digits,

1150 na_rep=na_rep,

1151 float_format=float_format,

1152 formatter=formatter,

1153 space=space,

1154 justify=justify,

1155 decimal=decimal,

1156 leading_space=leading_space,

1157 quoting=quoting,

1158 fallback_formatter=fallback_formatter,

1159 )

1160

1161 return fmt_obj.get_result()

1162

1163

1164class _GenericArrayFormatter:

1165 def __init__(

1166 self,

1167 values: ArrayLike,

1168 digits: int = 7,

1169 formatter: Callable | None = None,

1170 na_rep: str = "NaN",

1171 space: str | int = 12,

1172 float_format: FloatFormatType | None = None,

1173 justify: str = "right",

1174 decimal: str = ".",

1175 quoting: int | None = None,

1176 fixed_width: bool = True,

1177 leading_space: bool | None = True,

1178 fallback_formatter: Callable | None = None,

1179 ) -> None:

1180 self.values = values

1181 self.digits = digits

1182 self.na_rep = na_rep

1183 self.space = space

1184 self.formatter = formatter

1185 self.float_format = float_format

1186 self.justify = justify

1187 self.decimal = decimal

1188 self.quoting = quoting

1189 self.fixed_width = fixed_width

1190 self.leading_space = leading_space

1191 self.fallback_formatter = fallback_formatter

1192

1193 def get_result(self) -> list[str]:

1194 fmt_values = self._format_strings()

1195 return _make_fixed_width(fmt_values, self.justify)

1196

1197 def _format_strings(self) -> list[str]:

1198 if self.float_format is None:

1199 float_format = get_option("display.float_format")

1200 if float_format is None:

1201 precision = get_option("display.precision")

1202 float_format = lambda x: _trim_zeros_single_float(

1203 f"{x: .{precision:d}f}"

1204 )

1205 else:

1206 float_format = self.float_format

1207

1208 if self.formatter is not None:

1209 formatter = self.formatter

1210 elif self.fallback_formatter is not None:

1211 formatter = self.fallback_formatter

1212 else:

1213 quote_strings = self.quoting is not None and self.quoting != QUOTE_NONE

1214 formatter = partial(

1215 printing.pprint_thing,

1216 escape_chars=("\t", "\r", "\n"),

1217 quote_strings=quote_strings,

1218 )

1219

1220 def _format(x):

1221 if self.na_rep is not None and is_scalar(x) and isna(x):

1222 if x is None:

1223 return "None"

1224 elif x is NA:

1225 return str(NA)

1226 elif lib.is_float(x) and np.isinf(x):

1227 # TODO(3.0): this will be unreachable when use_inf_as_na

1228 # deprecation is enforced

1229 return str(x)

1230 elif x is NaT or isinstance(x, (np.datetime64, np.timedelta64)):

1231 return "NaT"

1232 return self.na_rep

1233 elif isinstance(x, PandasObject):

1234 return str(x)

1235 elif isinstance(x, StringDtype):

1236 return repr(x)

1237 else:

1238 # object dtype

1239 return str(formatter(x))

1240

1241 vals = self.values

1242 if not isinstance(vals, np.ndarray):

1243 raise TypeError(

1244 "ExtensionArray formatting should use _ExtensionArrayFormatter"

1245 )

1246 inferred = lib.map_infer(vals, is_float)

1247 is_float_type = (

1248 inferred

1249 # vals may have 2 or more dimensions

1250 & np.all(notna(vals), axis=tuple(range(1, len(vals.shape))))

1251 )

1252 leading_space = self.leading_space

1253 if leading_space is None:

1254 leading_space = is_float_type.any()

1255

1256 fmt_values = []

1257 for i, v in enumerate(vals):

1258 if (not is_float_type[i] or self.formatter is not None) and leading_space:

1259 fmt_values.append(f" {_format(v)}")

1260 elif is_float_type[i]:

1261 fmt_values.append(float_format(v))

1262 else:

1263 if leading_space is False:

1264 # False specifically, so that the default is

1265 # to include a space if we get here.

1266 tpl = "{v}"

1267 else:

1268 tpl = " {v}"

1269 fmt_values.append(tpl.format(v=_format(v)))

1270

1271 return fmt_values

1272

1273

1274class FloatArrayFormatter(_GenericArrayFormatter):

1275 def __init__(self, *args, **kwargs) -> None:

1276 super().__init__(*args, **kwargs)

1277

1278 # float_format is expected to be a string

1279 # formatter should be used to pass a function

1280 if self.float_format is not None and self.formatter is None:

1281 # GH21625, GH22270

1282 self.fixed_width = False

1283 if callable(self.float_format):

1284 self.formatter = self.float_format

1285 self.float_format = None

1286

1287 def _value_formatter(

1288 self,

1289 float_format: FloatFormatType | None = None,

1290 threshold: float | None = None,

1291 ) -> Callable:

1292 """Returns a function to be applied on each value to format it"""

1293 # the float_format parameter supersedes self.float_format

1294 if float_format is None:

1295 float_format = self.float_format

1296

1297 # we are going to compose different functions, to first convert to

1298 # a string, then replace the decimal symbol, and finally chop according

1299 # to the threshold

1300

1301 # when there is no float_format, we use str instead of '%g'

1302 # because str(0.0) = '0.0' while '%g' % 0.0 = '0'

1303 if float_format:

1304

1305 def base_formatter(v):

1306 assert float_format is not None # for mypy

1307 # error: "str" not callable

1308 # error: Unexpected keyword argument "value" for "__call__" of

1309 # "EngFormatter"

1310 return (

1311 float_format(value=v) # type: ignore[operator,call-arg]

1312 if notna(v)

1313 else self.na_rep

1314 )

1315

1316 else:

1317

1318 def base_formatter(v):

1319 return str(v) if notna(v) else self.na_rep

1320

1321 if self.decimal != ".":

1322

1323 def decimal_formatter(v):

1324 return base_formatter(v).replace(".", self.decimal, 1)

1325

1326 else:

1327 decimal_formatter = base_formatter

1328

1329 if threshold is None:

1330 return decimal_formatter

1331

1332 def formatter(value):

1333 if notna(value):

1334 if abs(value) > threshold:

1335 return decimal_formatter(value)

1336 else:

1337 return decimal_formatter(0.0)

1338 else:

1339 return self.na_rep

1340

1341 return formatter

1342

1343 def get_result_as_array(self) -> np.ndarray:

1344 """

1345 Returns the float values converted into strings using

1346 the parameters given at initialisation, as a numpy array

1347 """

1348

1349 def format_with_na_rep(values: ArrayLike, formatter: Callable, na_rep: str):

1350 mask = isna(values)

1351 formatted = np.array(

1352 [

1353 formatter(val) if not m else na_rep

1354 for val, m in zip(values.ravel(), mask.ravel())

1355 ]

1356 ).reshape(values.shape)

1357 return formatted

1358

1359 def format_complex_with_na_rep(

1360 values: ArrayLike, formatter: Callable, na_rep: str

1361 ):

1362 real_values = np.real(values).ravel() # type: ignore[arg-type]

1363 imag_values = np.imag(values).ravel() # type: ignore[arg-type]

1364 real_mask, imag_mask = isna(real_values), isna(imag_values)

1365 formatted_lst = []

1366 for val, real_val, imag_val, re_isna, im_isna in zip(

1367 values.ravel(),

1368 real_values,

1369 imag_values,

1370 real_mask,

1371 imag_mask,

1372 ):

1373 if not re_isna and not im_isna:

1374 formatted_lst.append(formatter(val))

1375 elif not re_isna: # xxx+nanj

1376 formatted_lst.append(f"{formatter(real_val)}+{na_rep}j")

1377 elif not im_isna: # nan[+/-]xxxj

1378 # The imaginary part may either start with a "-" or a space

1379 imag_formatted = formatter(imag_val).strip()

1380 if imag_formatted.startswith("-"):

1381 formatted_lst.append(f"{na_rep}{imag_formatted}j")

1382 else:

1383 formatted_lst.append(f"{na_rep}+{imag_formatted}j")

1384 else: # nan+nanj

1385 formatted_lst.append(f"{na_rep}+{na_rep}j")

1386 return np.array(formatted_lst).reshape(values.shape)

1387

1388 if self.formatter is not None:

1389 return format_with_na_rep(self.values, self.formatter, self.na_rep)

1390

1391 if self.fixed_width:

1392 threshold = get_option("display.chop_threshold")

1393 else:

1394 threshold = None

1395

1396 # if we have a fixed_width, we'll need to try different float_format

1397 def format_values_with(float_format):

1398 formatter = self._value_formatter(float_format, threshold)

1399

1400 # default formatter leaves a space to the left when formatting

1401 # floats, must be consistent for left-justifying NaNs (GH #25061)

1402 na_rep = " " + self.na_rep if self.justify == "left" else self.na_rep

1403

1404 # different formatting strategies for complex and non-complex data

1405 # need to distinguish complex and float NaNs (GH #53762)

1406 values = self.values

1407 is_complex = is_complex_dtype(values)

1408

1409 # separate the wheat from the chaff

1410 if is_complex:

1411 values = format_complex_with_na_rep(values, formatter, na_rep)

1412 else:

1413 values = format_with_na_rep(values, formatter, na_rep)

1414

1415 if self.fixed_width:

1416 if is_complex:

1417 result = _trim_zeros_complex(values, self.decimal)

1418 else:

1419 result = _trim_zeros_float(values, self.decimal)

1420 return np.asarray(result, dtype="object")

1421

1422 return values

1423

1424 # There is a special default string when we are fixed-width

1425 # The default is otherwise to use str instead of a formatting string

1426 float_format: FloatFormatType | None

1427 if self.float_format is None:

1428 if self.fixed_width:

1429 if self.leading_space is True:

1430 fmt_str = "{value: .{digits:d}f}"

1431 else:

1432 fmt_str = "{value:.{digits:d}f}"

1433 float_format = partial(fmt_str.format, digits=self.digits)

1434 else:

1435 float_format = self.float_format

1436 else:

1437 float_format = lambda value: self.float_format % value

1438

1439 formatted_values = format_values_with(float_format)

1440

1441 if not self.fixed_width:

1442 return formatted_values

1443

1444 # we need do convert to engineering format if some values are too small

1445 # and would appear as 0, or if some values are too big and take too

1446 # much space

1447

1448 if len(formatted_values) > 0:

1449 maxlen = max(len(x) for x in formatted_values)

1450 too_long = maxlen > self.digits + 6

1451 else:

1452 too_long = False

1453

1454 abs_vals = np.abs(self.values)

1455 # this is pretty arbitrary for now

1456 # large values: more that 8 characters including decimal symbol

1457 # and first digit, hence > 1e6

1458 has_large_values = (abs_vals > 1e6).any()

1459 has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()

1460

1461 if has_small_values or (too_long and has_large_values):

1462 if self.leading_space is True:

1463 fmt_str = "{value: .{digits:d}e}"

1464 else:

1465 fmt_str = "{value:.{digits:d}e}"

1466 float_format = partial(fmt_str.format, digits=self.digits)

1467 formatted_values = format_values_with(float_format)

1468

1469 return formatted_values

1470

1471 def _format_strings(self) -> list[str]:

1472 return list(self.get_result_as_array())

1473

1474

1475class _IntArrayFormatter(_GenericArrayFormatter):

1476 def _format_strings(self) -> list[str]:

1477 if self.leading_space is False:

1478 formatter_str = lambda x: f"{x:d}".format(x=x)

1479 else:

1480 formatter_str = lambda x: f"{x: d}".format(x=x)

1481 formatter = self.formatter or formatter_str

1482 fmt_values = [formatter(x) for x in self.values]

1483 return fmt_values

1484

1485

1486class _Datetime64Formatter(_GenericArrayFormatter):

1487 values: DatetimeArray

1488

1489 def __init__(

1490 self,

1491 values: DatetimeArray,

1492 nat_rep: str = "NaT",

1493 date_format: None = None,

1494 **kwargs,

1495 ) -> None:

1496 super().__init__(values, **kwargs)

1497 self.nat_rep = nat_rep

1498 self.date_format = date_format

1499

1500 def _format_strings(self) -> list[str]:

1501 """we by definition have DO NOT have a TZ"""

1502 values = self.values

1503

1504 if self.formatter is not None:

1505 return [self.formatter(x) for x in values]

1506

1507 fmt_values = values._format_native_types(

1508 na_rep=self.nat_rep, date_format=self.date_format

1509 )

1510 return fmt_values.tolist()

1511

1512

1513class _ExtensionArrayFormatter(_GenericArrayFormatter):

1514 values: ExtensionArray

1515

1516 def _format_strings(self) -> list[str]:

1517 values = self.values

1518

1519 formatter = self.formatter

1520 fallback_formatter = None

1521 if formatter is None:

1522 fallback_formatter = values._formatter(boxed=True)

1523

1524 if isinstance(values, Categorical):

1525 # Categorical is special for now, so that we can preserve tzinfo

1526 array = values._internal_get_values()

1527 else:

1528 array = np.asarray(values, dtype=object)

1529

1530 fmt_values = format_array(

1531 array,

1532 formatter,

1533 float_format=self.float_format,

1534 na_rep=self.na_rep,

1535 digits=self.digits,

1536 space=self.space,

1537 justify=self.justify,

1538 decimal=self.decimal,

1539 leading_space=self.leading_space,

1540 quoting=self.quoting,

1541 fallback_formatter=fallback_formatter,

1542 )

1543 return fmt_values

1544

1545

1546def format_percentiles(

1547 percentiles: (np.ndarray | Sequence[float]),

1548) -> list[str]:

1549 """

1550 Outputs rounded and formatted percentiles.

1551

1552 Parameters

1553 ----------

1554 percentiles : list-like, containing floats from interval [0,1]

1555

1556 Returns

1557 -------

1558 formatted : list of strings

1559

1560 Notes

1561 -----

1562 Rounding precision is chosen so that: (1) if any two elements of

1563 ``percentiles`` differ, they remain different after rounding

1564 (2) no entry is *rounded* to 0% or 100%.

1565 Any non-integer is always rounded to at least 1 decimal place.

1566

1567 Examples

1568 --------

1569 Keeps all entries different after rounding:

1570

1571 >>> format_percentiles([0.01999, 0.02001, 0.5, 0.666666, 0.9999])

1572 ['1.999%', '2.001%', '50%', '66.667%', '99.99%']

1573

1574 No element is rounded to 0% or 100% (unless already equal to it).

1575 Duplicates are allowed:

1576

1577 >>> format_percentiles([0, 0.5, 0.02001, 0.5, 0.666666, 0.9999])

1578 ['0%', '50%', '2.0%', '50%', '66.67%', '99.99%']

1579 """

1580 percentiles = np.asarray(percentiles)

1581

1582 # It checks for np.nan as well

1583 if (

1584 not is_numeric_dtype(percentiles)

1585 or not np.all(percentiles >= 0)

1586 or not np.all(percentiles <= 1)

1587 ):

1588 raise ValueError("percentiles should all be in the interval [0,1]")

1589

1590 percentiles = 100 * percentiles

1591 prec = get_precision(percentiles)

1592 percentiles_round_type = percentiles.round(prec).astype(int)

1593

1594 int_idx = np.isclose(percentiles_round_type, percentiles)

1595

1596 if np.all(int_idx):

1597 out = percentiles_round_type.astype(str)

1598 return [i + "%" for i in out]

1599

1600 unique_pcts = np.unique(percentiles)

1601 prec = get_precision(unique_pcts)

1602 out = np.empty_like(percentiles, dtype=object)

1603 out[int_idx] = percentiles[int_idx].round().astype(int).astype(str)

1604

1605 out[~int_idx] = percentiles[~int_idx].round(prec).astype(str)

1606 return [i + "%" for i in out]

1607

1608

1609def get_precision(array: np.ndarray | Sequence[float]) -> int:

1610 to_begin = array[0] if array[0] > 0 else None

1611 to_end = 100 - array[-1] if array[-1] < 100 else None

1612 diff = np.ediff1d(array, to_begin=to_begin, to_end=to_end)

1613 diff = abs(diff)

1614 prec = -np.floor(np.log10(np.min(diff))).astype(int)

1615 prec = max(1, prec)

1616 return prec

1617

1618

1619def _format_datetime64(x: NaTType | Timestamp, nat_rep: str = "NaT") -> str:

1620 if x is NaT:

1621 return nat_rep

1622

1623 # Timestamp.__str__ falls back to datetime.datetime.__str__ = isoformat(sep=' ')

1624 # so it already uses string formatting rather than strftime (faster).

1625 return str(x)

1626

1627

1628def _format_datetime64_dateonly(

1629 x: NaTType | Timestamp,

1630 nat_rep: str = "NaT",

1631 date_format: str | None = None,

1632) -> str:

1633 if isinstance(x, NaTType):

1634 return nat_rep

1635

1636 if date_format:

1637 return x.strftime(date_format)

1638 else:

1639 # Timestamp._date_repr relies on string formatting (faster than strftime)

1640 return x._date_repr

1641

1642

1643def get_format_datetime64(

1644 is_dates_only: bool, nat_rep: str = "NaT", date_format: str | None = None

1645) -> Callable:

1646 """Return a formatter callable taking a datetime64 as input and providing

1647 a string as output"""

1648

1649 if is_dates_only:

1650 return lambda x: _format_datetime64_dateonly(

1651 x, nat_rep=nat_rep, date_format=date_format

1652 )

1653 else:

1654 return lambda x: _format_datetime64(x, nat_rep=nat_rep)

1655

1656

1657class _Datetime64TZFormatter(_Datetime64Formatter):

1658 values: DatetimeArray

1659

1660 def _format_strings(self) -> list[str]:

1661 """we by definition have a TZ"""

1662 ido = self.values._is_dates_only

1663 values = self.values.astype(object)

1664 formatter = self.formatter or get_format_datetime64(

1665 ido, date_format=self.date_format

1666 )

1667 fmt_values = [formatter(x) for x in values]

1668

1669 return fmt_values

1670

1671

1672class _Timedelta64Formatter(_GenericArrayFormatter):

1673 values: TimedeltaArray

1674

1675 def __init__(

1676 self,

1677 values: TimedeltaArray,

1678 nat_rep: str = "NaT",

1679 **kwargs,

1680 ) -> None:

1681 # TODO: nat_rep is never passed, na_rep is.

1682 super().__init__(values, **kwargs)

1683 self.nat_rep = nat_rep

1684

1685 def _format_strings(self) -> list[str]:

1686 formatter = self.formatter or get_format_timedelta64(

1687 self.values, nat_rep=self.nat_rep, box=False

1688 )

1689 return [formatter(x) for x in self.values]

1690

1691

1692def get_format_timedelta64(

1693 values: TimedeltaArray,

1694 nat_rep: str | float = "NaT",

1695 box: bool = False,

1696) -> Callable:

1697 """

1698 Return a formatter function for a range of timedeltas.

1699 These will all have the same format argument

1700

1701 If box, then show the return in quotes

1702 """

1703 even_days = values._is_dates_only

1704

1705 if even_days:

1706 format = None

1707 else:

1708 format = "long"

1709

1710 def _formatter(x):

1711 if x is None or (is_scalar(x) and isna(x)):

1712 return nat_rep

1713

1714 if not isinstance(x, Timedelta):

1715 x = Timedelta(x)

1716

1717 # Timedelta._repr_base uses string formatting (faster than strftime)

1718 result = x._repr_base(format=format)

1719 if box:

1720 result = f"'{result}'"

1721 return result

1722

1723 return _formatter

1724

1725

1726def _make_fixed_width(

1727 strings: list[str],

1728 justify: str = "right",

1729 minimum: int | None = None,

1730 adj: printing._TextAdjustment | None = None,

1731) -> list[str]:

1732 if len(strings) == 0 or justify == "all":

1733 return strings

1734

1735 if adj is None:

1736 adjustment = printing.get_adjustment()

1737 else:

1738 adjustment = adj

1739

1740 max_len = max(adjustment.len(x) for x in strings)

1741

1742 if minimum is not None:

1743 max_len = max(minimum, max_len)

1744

1745 conf_max = get_option("display.max_colwidth")

1746 if conf_max is not None and max_len > conf_max:

1747 max_len = conf_max

1748

1749 def just(x: str) -> str:

1750 if conf_max is not None:

1751 if (conf_max > 3) & (adjustment.len(x) > max_len):

1752 x = x[: max_len - 3] + "..."

1753 return x

1754

1755 strings = [just(x) for x in strings]

1756 result = adjustment.justify(strings, max_len, mode=justify)

1757 return result

1758

1759

1760def _trim_zeros_complex(str_complexes: ArrayLike, decimal: str = ".") -> list[str]:

1761 """

1762 Separates the real and imaginary parts from the complex number, and

1763 executes the _trim_zeros_float method on each of those.

1764 """

1765 real_part, imag_part = [], []

1766 for x in str_complexes:

1767 # Complex numbers are represented as "(-)xxx(+/-)xxxj"

1768 # The split will give [{"", "-"}, "xxx", "+/-", "xxx", "j", ""]

1769 # Therefore, the imaginary part is the 4th and 3rd last elements,

1770 # and the real part is everything before the imaginary part

1771 trimmed = re.split(r"([j+-])", x)

1772 real_part.append("".join(trimmed[:-4]))

1773 imag_part.append("".join(trimmed[-4:-2]))

1774

1775 # We want to align the lengths of the real and imaginary parts of each complex

1776 # number, as well as the lengths the real (resp. complex) parts of all numbers

1777 # in the array

1778 n = len(str_complexes)

1779 padded_parts = _trim_zeros_float(real_part + imag_part, decimal)

1780 if len(padded_parts) == 0:

1781 return []

1782 padded_length = max(len(part) for part in padded_parts) - 1

1783 padded = [

1784 real_pt # real part, possibly NaN

1785 + imag_pt[0] # +/-

1786 + f"{imag_pt[1:]:>{padded_length}}" # complex part (no sign), possibly nan

1787 + "j"

1788 for real_pt, imag_pt in zip(padded_parts[:n], padded_parts[n:])

1789 ]

1790 return padded

1791

1792

1793def _trim_zeros_single_float(str_float: str) -> str:

1794 """

1795 Trims trailing zeros after a decimal point,

1796 leaving just one if necessary.

1797 """

1798 str_float = str_float.rstrip("0")

1799 if str_float.endswith("."):

1800 str_float += "0"

1801

1802 return str_float

1803

1804

1805def _trim_zeros_float(

1806 str_floats: ArrayLike | list[str], decimal: str = "."

1807) -> list[str]:

1808 """

1809 Trims the maximum number of trailing zeros equally from

1810 all numbers containing decimals, leaving just one if

1811 necessary.

1812 """

1813 trimmed = str_floats

1814 number_regex = re.compile(rf"^\s*[\+-]?[0-9]+\{decimal}[0-9]*$")

1815

1816 def is_number_with_decimal(x) -> bool:

1817 return re.match(number_regex, x) is not None

1818

1819 def should_trim(values: ArrayLike | list[str]) -> bool:

1820 """

1821 Determine if an array of strings should be trimmed.

1822

1823 Returns True if all numbers containing decimals (defined by the

1824 above regular expression) within the array end in a zero, otherwise

1825 returns False.

1826 """

1827 numbers = [x for x in values if is_number_with_decimal(x)]

1828 return len(numbers) > 0 and all(x.endswith("0") for x in numbers)

1829

1830 while should_trim(trimmed):

1831 trimmed = [x[:-1] if is_number_with_decimal(x) else x for x in trimmed]

1832

1833 # leave one 0 after the decimal points if need be.

1834 result = [

1835 x + "0" if is_number_with_decimal(x) and x.endswith(decimal) else x

1836 for x in trimmed

1837 ]

1838 return result

1839

1840

1841def _has_names(index: Index) -> bool:

1842 if isinstance(index, MultiIndex):

1843 return com.any_not_none(*index.names)

1844 else:

1845 return index.name is not None

1846

1847

1848class EngFormatter:

1849 """

1850 Formats float values according to engineering format.

1851

1852 Based on matplotlib.ticker.EngFormatter

1853 """

1854

1855 # The SI engineering prefixes

1856 ENG_PREFIXES = {

1857 -24: "y",

1858 -21: "z",

1859 -18: "a",

1860 -15: "f",

1861 -12: "p",

1862 -9: "n",

1863 -6: "u",

1864 -3: "m",

1865 0: "",

1866 3: "k",

1867 6: "M",

1868 9: "G",

1869 12: "T",

1870 15: "P",

1871 18: "E",

1872 21: "Z",

1873 24: "Y",

1874 }

1875

1876 def __init__(

1877 self, accuracy: int | None = None, use_eng_prefix: bool = False

1878 ) -> None:

1879 self.accuracy = accuracy

1880 self.use_eng_prefix = use_eng_prefix

1881

1882 def __call__(self, num: float) -> str:

1883 """

1884 Formats a number in engineering notation, appending a letter

1885 representing the power of 1000 of the original number. Some examples:

1886 >>> format_eng = EngFormatter(accuracy=0, use_eng_prefix=True)

1887 >>> format_eng(0)

1888 ' 0'

1889 >>> format_eng = EngFormatter(accuracy=1, use_eng_prefix=True)

1890 >>> format_eng(1_000_000)

1891 ' 1.0M'

1892 >>> format_eng = EngFormatter(accuracy=2, use_eng_prefix=False)

1893 >>> format_eng("-1e-6")

1894 '-1.00E-06'

1895

1896 @param num: the value to represent

1897 @type num: either a numeric value or a string that can be converted to

1898 a numeric value (as per decimal.Decimal constructor)

1899

1900 @return: engineering formatted string

1901 """

1902 dnum = Decimal(str(num))

1903

1904 if Decimal.is_nan(dnum):

1905 return "NaN"

1906

1907 if Decimal.is_infinite(dnum):

1908 return "inf"

1909

1910 sign = 1

1911

1912 if dnum < 0: # pragma: no cover

1913 sign = -1

1914 dnum = -dnum

1915

1916 if dnum != 0:

1917 pow10 = Decimal(int(math.floor(dnum.log10() / 3) * 3))

1918 else:

1919 pow10 = Decimal(0)

1920

1921 pow10 = pow10.min(max(self.ENG_PREFIXES.keys()))

1922 pow10 = pow10.max(min(self.ENG_PREFIXES.keys()))

1923 int_pow10 = int(pow10)

1924

1925 if self.use_eng_prefix:

1926 prefix = self.ENG_PREFIXES[int_pow10]

1927 elif int_pow10 < 0:

1928 prefix = f"E-{-int_pow10:02d}"

1929 else:

1930 prefix = f"E+{int_pow10:02d}"

1931

1932 mant = sign * dnum / (10**pow10)

1933

1934 if self.accuracy is None: # pragma: no cover

1935 format_str = "{mant: g}{prefix}"

1936 else:

1937 format_str = f"{{mant: .{self.accuracy:d}f}}{{prefix}}"

1938

1939 formatted = format_str.format(mant=mant, prefix=prefix)

1940

1941 return formatted

1942

1943

1944def set_eng_float_format(accuracy: int = 3, use_eng_prefix: bool = False) -> None:

1945 """

1946 Format float representation in DataFrame with SI notation.

1947

1948 Parameters

1949 ----------

1950 accuracy : int, default 3

1951 Number of decimal digits after the floating point.

1952 use_eng_prefix : bool, default False

1953 Whether to represent a value with SI prefixes.

1954

1955 Returns

1956 -------

1957 None

1958

1959 Examples

1960 --------

1961 >>> df = pd.DataFrame([1e-9, 1e-3, 1, 1e3, 1e6])

1962 >>> df

1963 0

1964 0 1.000000e-09

1965 1 1.000000e-03

1966 2 1.000000e+00

1967 3 1.000000e+03

1968 4 1.000000e+06

1969

1970 >>> pd.set_eng_float_format(accuracy=1)

1971 >>> df

1972 0

1973 0 1.0E-09

1974 1 1.0E-03

1975 2 1.0E+00

1976 3 1.0E+03

1977 4 1.0E+06

1978

1979 >>> pd.set_eng_float_format(use_eng_prefix=True)

1980 >>> df

1981 0

1982 0 1.000n

1983 1 1.000m

1984 2 1.000

1985 3 1.000k

1986 4 1.000M

1987

1988 >>> pd.set_eng_float_format(accuracy=1, use_eng_prefix=True)

1989 >>> df

1990 0

1991 0 1.0n

1992 1 1.0m

1993 2 1.0

1994 3 1.0k

1995 4 1.0M

1996

1997 >>> pd.set_option("display.float_format", None) # unset option

1998 """

1999 set_option("display.float_format", EngFormatter(accuracy, use_eng_prefix))

2000

2001

2002def get_level_lengths(

2003 levels: Any, sentinel: bool | object | str = ""

2004) -> list[dict[int, int]]:

2005 """

2006 For each index in each level the function returns lengths of indexes.

2007

2008 Parameters

2009 ----------

2010 levels : list of lists

2011 List of values on for level.

2012 sentinel : string, optional

2013 Value which states that no new index starts on there.

2014

2015 Returns

2016 -------

2017 Returns list of maps. For each level returns map of indexes (key is index

2018 in row and value is length of index).

2019 """

2020 if len(levels) == 0:

2021 return []

2022

2023 control = [True] * len(levels[0])

2024

2025 result = []

2026 for level in levels:

2027 last_index = 0

2028

2029 lengths = {}

2030 for i, key in enumerate(level):

2031 if control[i] and key == sentinel:

2032 pass

2033 else:

2034 control[i] = False

2035 lengths[last_index] = i - last_index

2036 last_index = i

2037

2038 lengths[last_index] = len(level) - last_index

2039

2040 result.append(lengths)

2041

2042 return result

2043

2044

2045def buffer_put_lines(buf: WriteBuffer[str], lines: list[str]) -> None:

2046 """

2047 Appends lines to a buffer.

2048

2049 Parameters

2050 ----------

2051 buf

2052 The buffer to write to

2053 lines

2054 The lines to append.

2055 """

2056 if any(isinstance(x, str) for x in lines):

2057 lines = [str(x) for x in lines]

2058 buf.write("\n".join(lines))