1"""
2Internal module for formatting output data in csv, html, xml,
3and latex files. This module also applies to display formatting.
4"""
5from __future__ import annotations
6
7from contextlib import contextmanager
8from csv import (
9 QUOTE_NONE,
10 QUOTE_NONNUMERIC,
11)
12from decimal import Decimal
13from functools import partial
14from io import StringIO
15import math
16import re
17from shutil import get_terminal_size
18from typing import (
19 IO,
20 TYPE_CHECKING,
21 Any,
22 Callable,
23 Final,
24 Generator,
25 Hashable,
26 Iterable,
27 List,
28 Mapping,
29 Sequence,
30 cast,
31)
32from unicodedata import east_asian_width
33
34import numpy as np
35
36from pandas._config.config import (
37 get_option,
38 set_option,
39)
40
41from pandas._libs import lib
42from pandas._libs.missing import NA
43from pandas._libs.tslibs import (
44 NaT,
45 Timedelta,
46 Timestamp,
47 get_unit_from_dtype,
48 iNaT,
49 periods_per_day,
50)
51from pandas._libs.tslibs.nattype import NaTType
52from pandas._typing import (
53 ArrayLike,
54 Axes,
55 ColspaceArgType,
56 ColspaceType,
57 CompressionOptions,
58 FilePath,
59 FloatFormatType,
60 FormattersType,
61 IndexLabel,
62 StorageOptions,
63 WriteBuffer,
64)
65
66from pandas.core.dtypes.common import (
67 is_categorical_dtype,
68 is_complex_dtype,
69 is_datetime64_dtype,
70 is_extension_array_dtype,
71 is_float,
72 is_float_dtype,
73 is_integer,
74 is_integer_dtype,
75 is_list_like,
76 is_numeric_dtype,
77 is_scalar,
78 is_timedelta64_dtype,
79)
80from pandas.core.dtypes.dtypes import DatetimeTZDtype
81from pandas.core.dtypes.missing import (
82 isna,
83 notna,
84)
85
86from pandas.core.arrays import (
87 Categorical,
88 DatetimeArray,
89 TimedeltaArray,
90)
91from pandas.core.arrays.string_ import StringDtype
92from pandas.core.base import PandasObject
93import pandas.core.common as com
94from pandas.core.construction import extract_array
95from pandas.core.indexes.api import (
96 Index,
97 MultiIndex,
98 PeriodIndex,
99 ensure_index,
100)
101from pandas.core.indexes.datetimes import DatetimeIndex
102from pandas.core.indexes.timedeltas import TimedeltaIndex
103from pandas.core.reshape.concat import concat
104
105from pandas.io.common import (
106 check_parent_directory,
107 stringify_path,
108)
109from pandas.io.formats import printing
110
111if TYPE_CHECKING:
112 from pandas import (
113 DataFrame,
114 Series,
115 )
116
117
118common_docstring: Final = """
119 Parameters
120 ----------
121 buf : str, Path or StringIO-like, optional, default None
122 Buffer to write to. If None, the output is returned as a string.
123 columns : sequence, optional, default None
124 The subset of columns to write. Writes all columns by default.
125 col_space : %(col_space_type)s, optional
126 %(col_space)s.
127 header : %(header_type)s, optional
128 %(header)s.
129 index : bool, optional, default True
130 Whether to print index (row) labels.
131 na_rep : str, optional, default 'NaN'
132 String representation of ``NaN`` to use.
133 formatters : list, tuple or dict of one-param. functions, optional
134 Formatter functions to apply to columns' elements by position or
135 name.
136 The result of each function must be a unicode string.
137 List/tuple must be of length equal to the number of columns.
138 float_format : one-parameter function, optional, default None
139 Formatter function to apply to columns' elements if they are
140 floats. This function must return a unicode string and will be
141 applied only to the non-``NaN`` elements, with ``NaN`` being
142 handled by ``na_rep``.
143
144 .. versionchanged:: 1.2.0
145
146 sparsify : bool, optional, default True
147 Set to False for a DataFrame with a hierarchical index to print
148 every multiindex key at each row.
149 index_names : bool, optional, default True
150 Prints the names of the indexes.
151 justify : str, default None
152 How to justify the column labels. If None uses the option from
153 the print configuration (controlled by set_option), 'right' out
154 of the box. Valid values are
155
156 * left
157 * right
158 * center
159 * justify
160 * justify-all
161 * start
162 * end
163 * inherit
164 * match-parent
165 * initial
166 * unset.
167 max_rows : int, optional
168 Maximum number of rows to display in the console.
169 max_cols : int, optional
170 Maximum number of columns to display in the console.
171 show_dimensions : bool, default False
172 Display DataFrame dimensions (number of rows by number of columns).
173 decimal : str, default '.'
174 Character recognized as decimal separator, e.g. ',' in Europe.
175 """
176
177_VALID_JUSTIFY_PARAMETERS = (
178 "left",
179 "right",
180 "center",
181 "justify",
182 "justify-all",
183 "start",
184 "end",
185 "inherit",
186 "match-parent",
187 "initial",
188 "unset",
189)
190
191return_docstring: Final = """
192 Returns
193 -------
194 str or None
195 If buf is None, returns the result as a string. Otherwise returns
196 None.
197 """
198
199
200class CategoricalFormatter:
201 def __init__(
202 self,
203 categorical: Categorical,
204 buf: IO[str] | None = None,
205 length: bool = True,
206 na_rep: str = "NaN",
207 footer: bool = True,
208 ) -> None:
209 self.categorical = categorical
210 self.buf = buf if buf is not None else StringIO("")
211 self.na_rep = na_rep
212 self.length = length
213 self.footer = footer
214 self.quoting = QUOTE_NONNUMERIC
215
216 def _get_footer(self) -> str:
217 footer = ""
218
219 if self.length:
220 if footer:
221 footer += ", "
222 footer += f"Length: {len(self.categorical)}"
223
224 level_info = self.categorical._repr_categories_info()
225
226 # Levels are added in a newline
227 if footer:
228 footer += "\n"
229 footer += level_info
230
231 return str(footer)
232
233 def _get_formatted_values(self) -> list[str]:
234 return format_array(
235 self.categorical._internal_get_values(),
236 None,
237 float_format=None,
238 na_rep=self.na_rep,
239 quoting=self.quoting,
240 )
241
242 def to_string(self) -> str:
243 categorical = self.categorical
244
245 if len(categorical) == 0:
246 if self.footer:
247 return self._get_footer()
248 else:
249 return ""
250
251 fmt_values = self._get_formatted_values()
252
253 fmt_values = [i.strip() for i in fmt_values]
254 values = ", ".join(fmt_values)
255 result = ["[" + values + "]"]
256 if self.footer:
257 footer = self._get_footer()
258 if footer:
259 result.append(footer)
260
261 return str("\n".join(result))
262
263
264class SeriesFormatter:
265 def __init__(
266 self,
267 series: Series,
268 buf: IO[str] | None = None,
269 length: bool | str = True,
270 header: bool = True,
271 index: bool = True,
272 na_rep: str = "NaN",
273 name: bool = False,
274 float_format: str | None = None,
275 dtype: bool = True,
276 max_rows: int | None = None,
277 min_rows: int | None = None,
278 ) -> None:
279 self.series = series
280 self.buf = buf if buf is not None else StringIO()
281 self.name = name
282 self.na_rep = na_rep
283 self.header = header
284 self.length = length
285 self.index = index
286 self.max_rows = max_rows
287 self.min_rows = min_rows
288
289 if float_format is None:
290 float_format = get_option("display.float_format")
291 self.float_format = float_format
292 self.dtype = dtype
293 self.adj = get_adjustment()
294
295 self._chk_truncate()
296
297 def _chk_truncate(self) -> None:
298 self.tr_row_num: int | None
299
300 min_rows = self.min_rows
301 max_rows = self.max_rows
302 # truncation determined by max_rows, actual truncated number of rows
303 # used below by min_rows
304 is_truncated_vertically = max_rows and (len(self.series) > max_rows)
305 series = self.series
306 if is_truncated_vertically:
307 max_rows = cast(int, max_rows)
308 if min_rows:
309 # if min_rows is set (not None or 0), set max_rows to minimum
310 # of both
311 max_rows = min(min_rows, max_rows)
312 if max_rows == 1:
313 row_num = max_rows
314 series = series.iloc[:max_rows]
315 else:
316 row_num = max_rows // 2
317 series = concat((series.iloc[:row_num], series.iloc[-row_num:]))
318 self.tr_row_num = row_num
319 else:
320 self.tr_row_num = None
321 self.tr_series = series
322 self.is_truncated_vertically = is_truncated_vertically
323
324 def _get_footer(self) -> str:
325 name = self.series.name
326 footer = ""
327
328 if getattr(self.series.index, "freq", None) is not None:
329 assert isinstance(
330 self.series.index, (DatetimeIndex, PeriodIndex, TimedeltaIndex)
331 )
332 footer += f"Freq: {self.series.index.freqstr}"
333
334 if self.name is not False and name is not None:
335 if footer:
336 footer += ", "
337
338 series_name = printing.pprint_thing(name, escape_chars=("\t", "\r", "\n"))
339 footer += f"Name: {series_name}"
340
341 if self.length is True or (
342 self.length == "truncate" and self.is_truncated_vertically
343 ):
344 if footer:
345 footer += ", "
346 footer += f"Length: {len(self.series)}"
347
348 if self.dtype is not False and self.dtype is not None:
349 dtype_name = getattr(self.tr_series.dtype, "name", None)
350 if dtype_name:
351 if footer:
352 footer += ", "
353 footer += f"dtype: {printing.pprint_thing(dtype_name)}"
354
355 # level infos are added to the end and in a new line, like it is done
356 # for Categoricals
357 if is_categorical_dtype(self.tr_series.dtype):
358 level_info = self.tr_series._values._repr_categories_info()
359 if footer:
360 footer += "\n"
361 footer += level_info
362
363 return str(footer)
364
365 def _get_formatted_index(self) -> tuple[list[str], bool]:
366 index = self.tr_series.index
367
368 if isinstance(index, MultiIndex):
369 have_header = any(name for name in index.names)
370 fmt_index = index.format(names=True)
371 else:
372 have_header = index.name is not None
373 fmt_index = index.format(name=True)
374 return fmt_index, have_header
375
376 def _get_formatted_values(self) -> list[str]:
377 return format_array(
378 self.tr_series._values,
379 None,
380 float_format=self.float_format,
381 na_rep=self.na_rep,
382 leading_space=self.index,
383 )
384
385 def to_string(self) -> str:
386 series = self.tr_series
387 footer = self._get_footer()
388
389 if len(series) == 0:
390 return f"{type(self.series).__name__}([], {footer})"
391
392 fmt_index, have_header = self._get_formatted_index()
393 fmt_values = self._get_formatted_values()
394
395 if self.is_truncated_vertically:
396 n_header_rows = 0
397 row_num = self.tr_row_num
398 row_num = cast(int, row_num)
399 width = self.adj.len(fmt_values[row_num - 1])
400 if width > 3:
401 dot_str = "..."
402 else:
403 dot_str = ".."
404 # Series uses mode=center because it has single value columns
405 # DataFrame uses mode=left
406 dot_str = self.adj.justify([dot_str], width, mode="center")[0]
407 fmt_values.insert(row_num + n_header_rows, dot_str)
408 fmt_index.insert(row_num + 1, "")
409
410 if self.index:
411 result = self.adj.adjoin(3, *[fmt_index[1:], fmt_values])
412 else:
413 result = self.adj.adjoin(3, fmt_values)
414
415 if self.header and have_header:
416 result = fmt_index[0] + "\n" + result
417
418 if footer:
419 result += "\n" + footer
420
421 return str("".join(result))
422
423
424class TextAdjustment:
425 def __init__(self) -> None:
426 self.encoding = get_option("display.encoding")
427
428 def len(self, text: str) -> int:
429 return len(text)
430
431 def justify(self, texts: Any, max_len: int, mode: str = "right") -> list[str]:
432 return printing.justify(texts, max_len, mode=mode)
433
434 def adjoin(self, space: int, *lists, **kwargs) -> str:
435 return printing.adjoin(
436 space, *lists, strlen=self.len, justfunc=self.justify, **kwargs
437 )
438
439
440class EastAsianTextAdjustment(TextAdjustment):
441 def __init__(self) -> None:
442 super().__init__()
443 if get_option("display.unicode.ambiguous_as_wide"):
444 self.ambiguous_width = 2
445 else:
446 self.ambiguous_width = 1
447
448 # Definition of East Asian Width
449 # https://unicode.org/reports/tr11/
450 # Ambiguous width can be changed by option
451 self._EAW_MAP = {"Na": 1, "N": 1, "W": 2, "F": 2, "H": 1}
452
453 def len(self, text: str) -> int:
454 """
455 Calculate display width considering unicode East Asian Width
456 """
457 if not isinstance(text, str):
458 return len(text)
459
460 return sum(
461 self._EAW_MAP.get(east_asian_width(c), self.ambiguous_width) for c in text
462 )
463
464 def justify(
465 self, texts: Iterable[str], max_len: int, mode: str = "right"
466 ) -> list[str]:
467 # re-calculate padding space per str considering East Asian Width
468 def _get_pad(t):
469 return max_len - self.len(t) + len(t)
470
471 if mode == "left":
472 return [x.ljust(_get_pad(x)) for x in texts]
473 elif mode == "center":
474 return [x.center(_get_pad(x)) for x in texts]
475 else:
476 return [x.rjust(_get_pad(x)) for x in texts]
477
478
479def get_adjustment() -> TextAdjustment:
480 use_east_asian_width = get_option("display.unicode.east_asian_width")
481 if use_east_asian_width:
482 return EastAsianTextAdjustment()
483 else:
484 return TextAdjustment()
485
486
487def get_dataframe_repr_params() -> dict[str, Any]:
488 """Get the parameters used to repr(dataFrame) calls using DataFrame.to_string.
489
490 Supplying these parameters to DataFrame.to_string is equivalent to calling
491 ``repr(DataFrame)``. This is useful if you want to adjust the repr output.
492
493 .. versionadded:: 1.4.0
494
495 Example
496 -------
497 >>> import pandas as pd
498 >>>
499 >>> df = pd.DataFrame([[1, 2], [3, 4]])
500 >>> repr_params = pd.io.formats.format.get_dataframe_repr_params()
501 >>> repr(df) == df.to_string(**repr_params)
502 True
503 """
504 from pandas.io.formats import console
505
506 if get_option("display.expand_frame_repr"):
507 line_width, _ = console.get_console_size()
508 else:
509 line_width = None
510 return {
511 "max_rows": get_option("display.max_rows"),
512 "min_rows": get_option("display.min_rows"),
513 "max_cols": get_option("display.max_columns"),
514 "max_colwidth": get_option("display.max_colwidth"),
515 "show_dimensions": get_option("display.show_dimensions"),
516 "line_width": line_width,
517 }
518
519
520def get_series_repr_params() -> dict[str, Any]:
521 """Get the parameters used to repr(Series) calls using Series.to_string.
522
523 Supplying these parameters to Series.to_string is equivalent to calling
524 ``repr(series)``. This is useful if you want to adjust the series repr output.
525
526 .. versionadded:: 1.4.0
527
528 Example
529 -------
530 >>> import pandas as pd
531 >>>
532 >>> ser = pd.Series([1, 2, 3, 4])
533 >>> repr_params = pd.io.formats.format.get_series_repr_params()
534 >>> repr(ser) == ser.to_string(**repr_params)
535 True
536 """
537 width, height = get_terminal_size()
538 max_rows = (
539 height
540 if get_option("display.max_rows") == 0
541 else get_option("display.max_rows")
542 )
543 min_rows = (
544 height
545 if get_option("display.max_rows") == 0
546 else get_option("display.min_rows")
547 )
548
549 return {
550 "name": True,
551 "dtype": True,
552 "min_rows": min_rows,
553 "max_rows": max_rows,
554 "length": get_option("display.show_dimensions"),
555 }
556
557
558class DataFrameFormatter:
559 """Class for processing dataframe formatting options and data."""
560
561 __doc__ = __doc__ if __doc__ else ""
562 __doc__ += common_docstring + return_docstring
563
564 def __init__(
565 self,
566 frame: DataFrame,
567 columns: Sequence[Hashable] | None = None,
568 col_space: ColspaceArgType | None = None,
569 header: bool | Sequence[str] = True,
570 index: bool = True,
571 na_rep: str = "NaN",
572 formatters: FormattersType | None = None,
573 justify: str | None = None,
574 float_format: FloatFormatType | None = None,
575 sparsify: bool | None = None,
576 index_names: bool = True,
577 max_rows: int | None = None,
578 min_rows: int | None = None,
579 max_cols: int | None = None,
580 show_dimensions: bool | str = False,
581 decimal: str = ".",
582 bold_rows: bool = False,
583 escape: bool = True,
584 ) -> None:
585 self.frame = frame
586 self.columns = self._initialize_columns(columns)
587 self.col_space = self._initialize_colspace(col_space)
588 self.header = header
589 self.index = index
590 self.na_rep = na_rep
591 self.formatters = self._initialize_formatters(formatters)
592 self.justify = self._initialize_justify(justify)
593 self.float_format = float_format
594 self.sparsify = self._initialize_sparsify(sparsify)
595 self.show_index_names = index_names
596 self.decimal = decimal
597 self.bold_rows = bold_rows
598 self.escape = escape
599 self.max_rows = max_rows
600 self.min_rows = min_rows
601 self.max_cols = max_cols
602 self.show_dimensions = show_dimensions
603
604 self.max_cols_fitted = self._calc_max_cols_fitted()
605 self.max_rows_fitted = self._calc_max_rows_fitted()
606
607 self.tr_frame = self.frame
608 self.truncate()
609 self.adj = get_adjustment()
610
611 def get_strcols(self) -> list[list[str]]:
612 """
613 Render a DataFrame to a list of columns (as lists of strings).
614 """
615 strcols = self._get_strcols_without_index()
616
617 if self.index:
618 str_index = self._get_formatted_index(self.tr_frame)
619 strcols.insert(0, str_index)
620
621 return strcols
622
623 @property
624 def should_show_dimensions(self) -> bool:
625 return self.show_dimensions is True or (
626 self.show_dimensions == "truncate" and self.is_truncated
627 )
628
629 @property
630 def is_truncated(self) -> bool:
631 return bool(self.is_truncated_horizontally or self.is_truncated_vertically)
632
633 @property
634 def is_truncated_horizontally(self) -> bool:
635 return bool(self.max_cols_fitted and (len(self.columns) > self.max_cols_fitted))
636
637 @property
638 def is_truncated_vertically(self) -> bool:
639 return bool(self.max_rows_fitted and (len(self.frame) > self.max_rows_fitted))
640
641 @property
642 def dimensions_info(self) -> str:
643 return f"\n\n[{len(self.frame)} rows x {len(self.frame.columns)} columns]"
644
645 @property
646 def has_index_names(self) -> bool:
647 return _has_names(self.frame.index)
648
649 @property
650 def has_column_names(self) -> bool:
651 return _has_names(self.frame.columns)
652
653 @property
654 def show_row_idx_names(self) -> bool:
655 return all((self.has_index_names, self.index, self.show_index_names))
656
657 @property
658 def show_col_idx_names(self) -> bool:
659 return all((self.has_column_names, self.show_index_names, self.header))
660
661 @property
662 def max_rows_displayed(self) -> int:
663 return min(self.max_rows or len(self.frame), len(self.frame))
664
665 def _initialize_sparsify(self, sparsify: bool | None) -> bool:
666 if sparsify is None:
667 return get_option("display.multi_sparse")
668 return sparsify
669
670 def _initialize_formatters(
671 self, formatters: FormattersType | None
672 ) -> FormattersType:
673 if formatters is None:
674 return {}
675 elif len(self.frame.columns) == len(formatters) or isinstance(formatters, dict):
676 return formatters
677 else:
678 raise ValueError(
679 f"Formatters length({len(formatters)}) should match "
680 f"DataFrame number of columns({len(self.frame.columns)})"
681 )
682
683 def _initialize_justify(self, justify: str | None) -> str:
684 if justify is None:
685 return get_option("display.colheader_justify")
686 else:
687 return justify
688
689 def _initialize_columns(self, columns: Sequence[Hashable] | None) -> Index:
690 if columns is not None:
691 # GH 47231 - columns doesn't have to be `Sequence[str]`
692 # Will fix in later PR
693 cols = ensure_index(cast(Axes, columns))
694 self.frame = self.frame[cols]
695 return cols
696 else:
697 return self.frame.columns
698
699 def _initialize_colspace(self, col_space: ColspaceArgType | None) -> ColspaceType:
700 result: ColspaceType
701
702 if col_space is None:
703 result = {}
704 elif isinstance(col_space, (int, str)):
705 result = {"": col_space}
706 result.update({column: col_space for column in self.frame.columns})
707 elif isinstance(col_space, Mapping):
708 for column in col_space.keys():
709 if column not in self.frame.columns and column != "":
710 raise ValueError(
711 f"Col_space is defined for an unknown column: {column}"
712 )
713 result = col_space
714 else:
715 if len(self.frame.columns) != len(col_space):
716 raise ValueError(
717 f"Col_space length({len(col_space)}) should match "
718 f"DataFrame number of columns({len(self.frame.columns)})"
719 )
720 result = dict(zip(self.frame.columns, col_space))
721 return result
722
723 def _calc_max_cols_fitted(self) -> int | None:
724 """Number of columns fitting the screen."""
725 if not self._is_in_terminal():
726 return self.max_cols
727
728 width, _ = get_terminal_size()
729 if self._is_screen_narrow(width):
730 return width
731 else:
732 return self.max_cols
733
734 def _calc_max_rows_fitted(self) -> int | None:
735 """Number of rows with data fitting the screen."""
736 max_rows: int | None
737
738 if self._is_in_terminal():
739 _, height = get_terminal_size()
740 if self.max_rows == 0:
741 # rows available to fill with actual data
742 return height - self._get_number_of_auxillary_rows()
743
744 if self._is_screen_short(height):
745 max_rows = height
746 else:
747 max_rows = self.max_rows
748 else:
749 max_rows = self.max_rows
750
751 return self._adjust_max_rows(max_rows)
752
753 def _adjust_max_rows(self, max_rows: int | None) -> int | None:
754 """Adjust max_rows using display logic.
755
756 See description here:
757 https://pandas.pydata.org/docs/dev/user_guide/options.html#frequently-used-options
758
759 GH #37359
760 """
761 if max_rows:
762 if (len(self.frame) > max_rows) and self.min_rows:
763 # if truncated, set max_rows showed to min_rows
764 max_rows = min(self.min_rows, max_rows)
765 return max_rows
766
767 def _is_in_terminal(self) -> bool:
768 """Check if the output is to be shown in terminal."""
769 return bool(self.max_cols == 0 or self.max_rows == 0)
770
771 def _is_screen_narrow(self, max_width) -> bool:
772 return bool(self.max_cols == 0 and len(self.frame.columns) > max_width)
773
774 def _is_screen_short(self, max_height) -> bool:
775 return bool(self.max_rows == 0 and len(self.frame) > max_height)
776
777 def _get_number_of_auxillary_rows(self) -> int:
778 """Get number of rows occupied by prompt, dots and dimension info."""
779 dot_row = 1
780 prompt_row = 1
781 num_rows = dot_row + prompt_row
782
783 if self.show_dimensions:
784 num_rows += len(self.dimensions_info.splitlines())
785
786 if self.header:
787 num_rows += 1
788
789 return num_rows
790
791 def truncate(self) -> None:
792 """
793 Check whether the frame should be truncated. If so, slice the frame up.
794 """
795 if self.is_truncated_horizontally:
796 self._truncate_horizontally()
797
798 if self.is_truncated_vertically:
799 self._truncate_vertically()
800
801 def _truncate_horizontally(self) -> None:
802 """Remove columns, which are not to be displayed and adjust formatters.
803
804 Attributes affected:
805 - tr_frame
806 - formatters
807 - tr_col_num
808 """
809 assert self.max_cols_fitted is not None
810 col_num = self.max_cols_fitted // 2
811 if col_num >= 1:
812 left = self.tr_frame.iloc[:, :col_num]
813 right = self.tr_frame.iloc[:, -col_num:]
814 self.tr_frame = concat((left, right), axis=1)
815
816 # truncate formatter
817 if isinstance(self.formatters, (list, tuple)):
818 self.formatters = [
819 *self.formatters[:col_num],
820 *self.formatters[-col_num:],
821 ]
822 else:
823 col_num = cast(int, self.max_cols)
824 self.tr_frame = self.tr_frame.iloc[:, :col_num]
825 self.tr_col_num = col_num
826
827 def _truncate_vertically(self) -> None:
828 """Remove rows, which are not to be displayed.
829
830 Attributes affected:
831 - tr_frame
832 - tr_row_num
833 """
834 assert self.max_rows_fitted is not None
835 row_num = self.max_rows_fitted // 2
836 if row_num >= 1:
837 head = self.tr_frame.iloc[:row_num, :]
838 tail = self.tr_frame.iloc[-row_num:, :]
839 self.tr_frame = concat((head, tail))
840 else:
841 row_num = cast(int, self.max_rows)
842 self.tr_frame = self.tr_frame.iloc[:row_num, :]
843 self.tr_row_num = row_num
844
845 def _get_strcols_without_index(self) -> list[list[str]]:
846 strcols: list[list[str]] = []
847
848 if not is_list_like(self.header) and not self.header:
849 for i, c in enumerate(self.tr_frame):
850 fmt_values = self.format_col(i)
851 fmt_values = _make_fixed_width(
852 strings=fmt_values,
853 justify=self.justify,
854 minimum=int(self.col_space.get(c, 0)),
855 adj=self.adj,
856 )
857 strcols.append(fmt_values)
858 return strcols
859
860 if is_list_like(self.header):
861 # cast here since can't be bool if is_list_like
862 self.header = cast(List[str], self.header)
863 if len(self.header) != len(self.columns):
864 raise ValueError(
865 f"Writing {len(self.columns)} cols "
866 f"but got {len(self.header)} aliases"
867 )
868 str_columns = [[label] for label in self.header]
869 else:
870 str_columns = self._get_formatted_column_labels(self.tr_frame)
871
872 if self.show_row_idx_names:
873 for x in str_columns:
874 x.append("")
875
876 for i, c in enumerate(self.tr_frame):
877 cheader = str_columns[i]
878 header_colwidth = max(
879 int(self.col_space.get(c, 0)), *(self.adj.len(x) for x in cheader)
880 )
881 fmt_values = self.format_col(i)
882 fmt_values = _make_fixed_width(
883 fmt_values, self.justify, minimum=header_colwidth, adj=self.adj
884 )
885
886 max_len = max(max(self.adj.len(x) for x in fmt_values), header_colwidth)
887 cheader = self.adj.justify(cheader, max_len, mode=self.justify)
888 strcols.append(cheader + fmt_values)
889
890 return strcols
891
892 def format_col(self, i: int) -> list[str]:
893 frame = self.tr_frame
894 formatter = self._get_formatter(i)
895 return format_array(
896 frame.iloc[:, i]._values,
897 formatter,
898 float_format=self.float_format,
899 na_rep=self.na_rep,
900 space=self.col_space.get(frame.columns[i]),
901 decimal=self.decimal,
902 leading_space=self.index,
903 )
904
905 def _get_formatter(self, i: str | int) -> Callable | None:
906 if isinstance(self.formatters, (list, tuple)):
907 if is_integer(i):
908 i = cast(int, i)
909 return self.formatters[i]
910 else:
911 return None
912 else:
913 if is_integer(i) and i not in self.columns:
914 i = self.columns[i]
915 return self.formatters.get(i, None)
916
917 def _get_formatted_column_labels(self, frame: DataFrame) -> list[list[str]]:
918 from pandas.core.indexes.multi import sparsify_labels
919
920 columns = frame.columns
921
922 if isinstance(columns, MultiIndex):
923 fmt_columns = columns.format(sparsify=False, adjoin=False)
924 fmt_columns = list(zip(*fmt_columns))
925 dtypes = self.frame.dtypes._values
926
927 # if we have a Float level, they don't use leading space at all
928 restrict_formatting = any(level.is_floating for level in columns.levels)
929 need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))
930
931 def space_format(x, y):
932 if (
933 y not in self.formatters
934 and need_leadsp[x]
935 and not restrict_formatting
936 ):
937 return " " + y
938 return y
939
940 str_columns = list(
941 zip(*([space_format(x, y) for y in x] for x in fmt_columns))
942 )
943 if self.sparsify and len(str_columns):
944 str_columns = sparsify_labels(str_columns)
945
946 str_columns = [list(x) for x in zip(*str_columns)]
947 else:
948 fmt_columns = columns.format()
949 dtypes = self.frame.dtypes
950 need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))
951 str_columns = [
952 [" " + x if not self._get_formatter(i) and need_leadsp[x] else x]
953 for i, x in enumerate(fmt_columns)
954 ]
955 # self.str_columns = str_columns
956 return str_columns
957
958 def _get_formatted_index(self, frame: DataFrame) -> list[str]:
959 # Note: this is only used by to_string() and to_latex(), not by
960 # to_html(). so safe to cast col_space here.
961 col_space = {k: cast(int, v) for k, v in self.col_space.items()}
962 index = frame.index
963 columns = frame.columns
964 fmt = self._get_formatter("__index__")
965
966 if isinstance(index, MultiIndex):
967 fmt_index = index.format(
968 sparsify=self.sparsify,
969 adjoin=False,
970 names=self.show_row_idx_names,
971 formatter=fmt,
972 )
973 else:
974 fmt_index = [index.format(name=self.show_row_idx_names, formatter=fmt)]
975
976 fmt_index = [
977 tuple(
978 _make_fixed_width(
979 list(x), justify="left", minimum=col_space.get("", 0), adj=self.adj
980 )
981 )
982 for x in fmt_index
983 ]
984
985 adjoined = self.adj.adjoin(1, *fmt_index).split("\n")
986
987 # empty space for columns
988 if self.show_col_idx_names:
989 col_header = [str(x) for x in self._get_column_name_list()]
990 else:
991 col_header = [""] * columns.nlevels
992
993 if self.header:
994 return col_header + adjoined
995 else:
996 return adjoined
997
998 def _get_column_name_list(self) -> list[Hashable]:
999 names: list[Hashable] = []
1000 columns = self.frame.columns
1001 if isinstance(columns, MultiIndex):
1002 names.extend("" if name is None else name for name in columns.names)
1003 else:
1004 names.append("" if columns.name is None else columns.name)
1005 return names
1006
1007
1008class DataFrameRenderer:
1009 """Class for creating dataframe output in multiple formats.
1010
1011 Called in pandas.core.generic.NDFrame:
1012 - to_csv
1013 - to_latex
1014
1015 Called in pandas.core.frame.DataFrame:
1016 - to_html
1017 - to_string
1018
1019 Parameters
1020 ----------
1021 fmt : DataFrameFormatter
1022 Formatter with the formatting options.
1023 """
1024
1025 def __init__(self, fmt: DataFrameFormatter) -> None:
1026 self.fmt = fmt
1027
1028 def to_latex(
1029 self,
1030 buf: FilePath | WriteBuffer[str] | None = None,
1031 column_format: str | None = None,
1032 longtable: bool = False,
1033 encoding: str | None = None,
1034 multicolumn: bool = False,
1035 multicolumn_format: str | None = None,
1036 multirow: bool = False,
1037 caption: str | tuple[str, str] | None = None,
1038 label: str | None = None,
1039 position: str | None = None,
1040 ) -> str | None:
1041 """
1042 Render a DataFrame to a LaTeX tabular/longtable environment output.
1043 """
1044 from pandas.io.formats.latex import LatexFormatter
1045
1046 latex_formatter = LatexFormatter(
1047 self.fmt,
1048 longtable=longtable,
1049 column_format=column_format,
1050 multicolumn=multicolumn,
1051 multicolumn_format=multicolumn_format,
1052 multirow=multirow,
1053 caption=caption,
1054 label=label,
1055 position=position,
1056 )
1057 string = latex_formatter.to_string()
1058 return save_to_buffer(string, buf=buf, encoding=encoding)
1059
1060 def to_html(
1061 self,
1062 buf: FilePath | WriteBuffer[str] | None = None,
1063 encoding: str | None = None,
1064 classes: str | list | tuple | None = None,
1065 notebook: bool = False,
1066 border: int | bool | None = None,
1067 table_id: str | None = None,
1068 render_links: bool = False,
1069 ) -> str | None:
1070 """
1071 Render a DataFrame to a html table.
1072
1073 Parameters
1074 ----------
1075 buf : str, path object, file-like object, or None, default None
1076 String, path object (implementing ``os.PathLike[str]``), or file-like
1077 object implementing a string ``write()`` function. If None, the result is
1078 returned as a string.
1079 encoding : str, default “utf-8”
1080 Set character encoding.
1081 classes : str or list-like
1082 classes to include in the `class` attribute of the opening
1083 ``<table>`` tag, in addition to the default "dataframe".
1084 notebook : {True, False}, optional, default False
1085 Whether the generated HTML is for IPython Notebook.
1086 border : int
1087 A ``border=border`` attribute is included in the opening
1088 ``<table>`` tag. Default ``pd.options.display.html.border``.
1089 table_id : str, optional
1090 A css id is included in the opening `<table>` tag if specified.
1091 render_links : bool, default False
1092 Convert URLs to HTML links.
1093 """
1094 from pandas.io.formats.html import (
1095 HTMLFormatter,
1096 NotebookFormatter,
1097 )
1098
1099 Klass = NotebookFormatter if notebook else HTMLFormatter
1100
1101 html_formatter = Klass(
1102 self.fmt,
1103 classes=classes,
1104 border=border,
1105 table_id=table_id,
1106 render_links=render_links,
1107 )
1108 string = html_formatter.to_string()
1109 return save_to_buffer(string, buf=buf, encoding=encoding)
1110
1111 def to_string(
1112 self,
1113 buf: FilePath | WriteBuffer[str] | None = None,
1114 encoding: str | None = None,
1115 line_width: int | None = None,
1116 ) -> str | None:
1117 """
1118 Render a DataFrame to a console-friendly tabular output.
1119
1120 Parameters
1121 ----------
1122 buf : str, path object, file-like object, or None, default None
1123 String, path object (implementing ``os.PathLike[str]``), or file-like
1124 object implementing a string ``write()`` function. If None, the result is
1125 returned as a string.
1126 encoding: str, default “utf-8”
1127 Set character encoding.
1128 line_width : int, optional
1129 Width to wrap a line in characters.
1130 """
1131 from pandas.io.formats.string import StringFormatter
1132
1133 string_formatter = StringFormatter(self.fmt, line_width=line_width)
1134 string = string_formatter.to_string()
1135 return save_to_buffer(string, buf=buf, encoding=encoding)
1136
1137 def to_csv(
1138 self,
1139 path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None,
1140 encoding: str | None = None,
1141 sep: str = ",",
1142 columns: Sequence[Hashable] | None = None,
1143 index_label: IndexLabel | None = None,
1144 mode: str = "w",
1145 compression: CompressionOptions = "infer",
1146 quoting: int | None = None,
1147 quotechar: str = '"',
1148 lineterminator: str | None = None,
1149 chunksize: int | None = None,
1150 date_format: str | None = None,
1151 doublequote: bool = True,
1152 escapechar: str | None = None,
1153 errors: str = "strict",
1154 storage_options: StorageOptions = None,
1155 ) -> str | None:
1156 """
1157 Render dataframe as comma-separated file.
1158 """
1159 from pandas.io.formats.csvs import CSVFormatter
1160
1161 if path_or_buf is None:
1162 created_buffer = True
1163 path_or_buf = StringIO()
1164 else:
1165 created_buffer = False
1166
1167 csv_formatter = CSVFormatter(
1168 path_or_buf=path_or_buf,
1169 lineterminator=lineterminator,
1170 sep=sep,
1171 encoding=encoding,
1172 errors=errors,
1173 compression=compression,
1174 quoting=quoting,
1175 cols=columns,
1176 index_label=index_label,
1177 mode=mode,
1178 chunksize=chunksize,
1179 quotechar=quotechar,
1180 date_format=date_format,
1181 doublequote=doublequote,
1182 escapechar=escapechar,
1183 storage_options=storage_options,
1184 formatter=self.fmt,
1185 )
1186 csv_formatter.save()
1187
1188 if created_buffer:
1189 assert isinstance(path_or_buf, StringIO)
1190 content = path_or_buf.getvalue()
1191 path_or_buf.close()
1192 return content
1193
1194 return None
1195
1196
1197def save_to_buffer(
1198 string: str,
1199 buf: FilePath | WriteBuffer[str] | None = None,
1200 encoding: str | None = None,
1201) -> str | None:
1202 """
1203 Perform serialization. Write to buf or return as string if buf is None.
1204 """
1205 with get_buffer(buf, encoding=encoding) as f:
1206 f.write(string)
1207 if buf is None:
1208 # error: "WriteBuffer[str]" has no attribute "getvalue"
1209 return f.getvalue() # type: ignore[attr-defined]
1210 return None
1211
1212
1213@contextmanager
1214def get_buffer(
1215 buf: FilePath | WriteBuffer[str] | None, encoding: str | None = None
1216) -> Generator[WriteBuffer[str], None, None] | Generator[StringIO, None, None]:
1217 """
1218 Context manager to open, yield and close buffer for filenames or Path-like
1219 objects, otherwise yield buf unchanged.
1220 """
1221 if buf is not None:
1222 buf = stringify_path(buf)
1223 else:
1224 buf = StringIO()
1225
1226 if encoding is None:
1227 encoding = "utf-8"
1228 elif not isinstance(buf, str):
1229 raise ValueError("buf is not a file name and encoding is specified.")
1230
1231 if hasattr(buf, "write"):
1232 # Incompatible types in "yield" (actual type "Union[str, WriteBuffer[str],
1233 # StringIO]", expected type "Union[WriteBuffer[str], StringIO]")
1234 yield buf # type: ignore[misc]
1235 elif isinstance(buf, str):
1236 check_parent_directory(str(buf))
1237 with open(buf, "w", encoding=encoding, newline="") as f:
1238 # GH#30034 open instead of codecs.open prevents a file leak
1239 # if we have an invalid encoding argument.
1240 # newline="" is needed to roundtrip correctly on
1241 # windows test_to_latex_filename
1242 yield f
1243 else:
1244 raise TypeError("buf is not a file name and it has no write method")
1245
1246
1247# ----------------------------------------------------------------------
1248# Array formatters
1249
1250
1251def format_array(
1252 values: Any,
1253 formatter: Callable | None,
1254 float_format: FloatFormatType | None = None,
1255 na_rep: str = "NaN",
1256 digits: int | None = None,
1257 space: str | int | None = None,
1258 justify: str = "right",
1259 decimal: str = ".",
1260 leading_space: bool | None = True,
1261 quoting: int | None = None,
1262 fallback_formatter: Callable | None = None,
1263) -> list[str]:
1264 """
1265 Format an array for printing.
1266
1267 Parameters
1268 ----------
1269 values
1270 formatter
1271 float_format
1272 na_rep
1273 digits
1274 space
1275 justify
1276 decimal
1277 leading_space : bool, optional, default True
1278 Whether the array should be formatted with a leading space.
1279 When an array as a column of a Series or DataFrame, we do want
1280 the leading space to pad between columns.
1281
1282 When formatting an Index subclass
1283 (e.g. IntervalIndex._format_native_types), we don't want the
1284 leading space since it should be left-aligned.
1285 fallback_formatter
1286
1287 Returns
1288 -------
1289 List[str]
1290 """
1291 fmt_klass: type[GenericArrayFormatter]
1292 if is_datetime64_dtype(values.dtype):
1293 fmt_klass = Datetime64Formatter
1294 elif isinstance(values.dtype, DatetimeTZDtype):
1295 fmt_klass = Datetime64TZFormatter
1296 elif is_timedelta64_dtype(values.dtype):
1297 fmt_klass = Timedelta64Formatter
1298 elif is_extension_array_dtype(values.dtype):
1299 fmt_klass = ExtensionArrayFormatter
1300 elif is_float_dtype(values.dtype) or is_complex_dtype(values.dtype):
1301 fmt_klass = FloatArrayFormatter
1302 elif is_integer_dtype(values.dtype):
1303 fmt_klass = IntArrayFormatter
1304 else:
1305 fmt_klass = GenericArrayFormatter
1306
1307 if space is None:
1308 space = 12
1309
1310 if float_format is None:
1311 float_format = get_option("display.float_format")
1312
1313 if digits is None:
1314 digits = get_option("display.precision")
1315
1316 fmt_obj = fmt_klass(
1317 values,
1318 digits=digits,
1319 na_rep=na_rep,
1320 float_format=float_format,
1321 formatter=formatter,
1322 space=space,
1323 justify=justify,
1324 decimal=decimal,
1325 leading_space=leading_space,
1326 quoting=quoting,
1327 fallback_formatter=fallback_formatter,
1328 )
1329
1330 return fmt_obj.get_result()
1331
1332
1333class GenericArrayFormatter:
1334 def __init__(
1335 self,
1336 values: Any,
1337 digits: int = 7,
1338 formatter: Callable | None = None,
1339 na_rep: str = "NaN",
1340 space: str | int = 12,
1341 float_format: FloatFormatType | None = None,
1342 justify: str = "right",
1343 decimal: str = ".",
1344 quoting: int | None = None,
1345 fixed_width: bool = True,
1346 leading_space: bool | None = True,
1347 fallback_formatter: Callable | None = None,
1348 ) -> None:
1349 self.values = values
1350 self.digits = digits
1351 self.na_rep = na_rep
1352 self.space = space
1353 self.formatter = formatter
1354 self.float_format = float_format
1355 self.justify = justify
1356 self.decimal = decimal
1357 self.quoting = quoting
1358 self.fixed_width = fixed_width
1359 self.leading_space = leading_space
1360 self.fallback_formatter = fallback_formatter
1361
1362 def get_result(self) -> list[str]:
1363 fmt_values = self._format_strings()
1364 return _make_fixed_width(fmt_values, self.justify)
1365
1366 def _format_strings(self) -> list[str]:
1367 if self.float_format is None:
1368 float_format = get_option("display.float_format")
1369 if float_format is None:
1370 precision = get_option("display.precision")
1371 float_format = lambda x: _trim_zeros_single_float(
1372 f"{x: .{precision:d}f}"
1373 )
1374 else:
1375 float_format = self.float_format
1376
1377 if self.formatter is not None:
1378 formatter = self.formatter
1379 elif self.fallback_formatter is not None:
1380 formatter = self.fallback_formatter
1381 else:
1382 quote_strings = self.quoting is not None and self.quoting != QUOTE_NONE
1383 formatter = partial(
1384 printing.pprint_thing,
1385 escape_chars=("\t", "\r", "\n"),
1386 quote_strings=quote_strings,
1387 )
1388
1389 def _format(x):
1390 if self.na_rep is not None and is_scalar(x) and isna(x):
1391 try:
1392 # try block for np.isnat specifically
1393 # determine na_rep if x is None or NaT-like
1394 if x is None:
1395 return "None"
1396 elif x is NA:
1397 return str(NA)
1398 elif x is NaT or np.isnat(x):
1399 return "NaT"
1400 except (TypeError, ValueError):
1401 # np.isnat only handles datetime or timedelta objects
1402 pass
1403 return self.na_rep
1404 elif isinstance(x, PandasObject):
1405 return str(x)
1406 elif isinstance(x, StringDtype):
1407 return repr(x)
1408 else:
1409 # object dtype
1410 return str(formatter(x))
1411
1412 vals = extract_array(self.values, extract_numpy=True)
1413 if not isinstance(vals, np.ndarray):
1414 raise TypeError(
1415 "ExtensionArray formatting should use ExtensionArrayFormatter"
1416 )
1417 inferred = lib.map_infer(vals, is_float)
1418 is_float_type = (
1419 inferred
1420 # vals may have 2 or more dimensions
1421 & np.all(notna(vals), axis=tuple(range(1, len(vals.shape))))
1422 )
1423 leading_space = self.leading_space
1424 if leading_space is None:
1425 leading_space = is_float_type.any()
1426
1427 fmt_values = []
1428 for i, v in enumerate(vals):
1429 if (not is_float_type[i] or self.formatter is not None) and leading_space:
1430 fmt_values.append(f" {_format(v)}")
1431 elif is_float_type[i]:
1432 fmt_values.append(float_format(v))
1433 else:
1434 if leading_space is False:
1435 # False specifically, so that the default is
1436 # to include a space if we get here.
1437 tpl = "{v}"
1438 else:
1439 tpl = " {v}"
1440 fmt_values.append(tpl.format(v=_format(v)))
1441
1442 return fmt_values
1443
1444
1445class FloatArrayFormatter(GenericArrayFormatter):
1446 def __init__(self, *args, **kwargs) -> None:
1447 super().__init__(*args, **kwargs)
1448
1449 # float_format is expected to be a string
1450 # formatter should be used to pass a function
1451 if self.float_format is not None and self.formatter is None:
1452 # GH21625, GH22270
1453 self.fixed_width = False
1454 if callable(self.float_format):
1455 self.formatter = self.float_format
1456 self.float_format = None
1457
1458 def _value_formatter(
1459 self,
1460 float_format: FloatFormatType | None = None,
1461 threshold: float | None = None,
1462 ) -> Callable:
1463 """Returns a function to be applied on each value to format it"""
1464 # the float_format parameter supersedes self.float_format
1465 if float_format is None:
1466 float_format = self.float_format
1467
1468 # we are going to compose different functions, to first convert to
1469 # a string, then replace the decimal symbol, and finally chop according
1470 # to the threshold
1471
1472 # when there is no float_format, we use str instead of '%g'
1473 # because str(0.0) = '0.0' while '%g' % 0.0 = '0'
1474 if float_format:
1475
1476 def base_formatter(v):
1477 assert float_format is not None # for mypy
1478 # error: "str" not callable
1479 # error: Unexpected keyword argument "value" for "__call__" of
1480 # "EngFormatter"
1481 return (
1482 float_format(value=v) # type: ignore[operator,call-arg]
1483 if notna(v)
1484 else self.na_rep
1485 )
1486
1487 else:
1488
1489 def base_formatter(v):
1490 return str(v) if notna(v) else self.na_rep
1491
1492 if self.decimal != ".":
1493
1494 def decimal_formatter(v):
1495 return base_formatter(v).replace(".", self.decimal, 1)
1496
1497 else:
1498 decimal_formatter = base_formatter
1499
1500 if threshold is None:
1501 return decimal_formatter
1502
1503 def formatter(value):
1504 if notna(value):
1505 if abs(value) > threshold:
1506 return decimal_formatter(value)
1507 else:
1508 return decimal_formatter(0.0)
1509 else:
1510 return self.na_rep
1511
1512 return formatter
1513
1514 def get_result_as_array(self) -> np.ndarray:
1515 """
1516 Returns the float values converted into strings using
1517 the parameters given at initialisation, as a numpy array
1518 """
1519
1520 def format_with_na_rep(values: ArrayLike, formatter: Callable, na_rep: str):
1521 mask = isna(values)
1522 formatted = np.array(
1523 [
1524 formatter(val) if not m else na_rep
1525 for val, m in zip(values.ravel(), mask.ravel())
1526 ]
1527 ).reshape(values.shape)
1528 return formatted
1529
1530 if self.formatter is not None:
1531 return format_with_na_rep(self.values, self.formatter, self.na_rep)
1532
1533 if self.fixed_width:
1534 threshold = get_option("display.chop_threshold")
1535 else:
1536 threshold = None
1537
1538 # if we have a fixed_width, we'll need to try different float_format
1539 def format_values_with(float_format):
1540 formatter = self._value_formatter(float_format, threshold)
1541
1542 # default formatter leaves a space to the left when formatting
1543 # floats, must be consistent for left-justifying NaNs (GH #25061)
1544 if self.justify == "left":
1545 na_rep = " " + self.na_rep
1546 else:
1547 na_rep = self.na_rep
1548
1549 # separate the wheat from the chaff
1550 values = self.values
1551 is_complex = is_complex_dtype(values)
1552 values = format_with_na_rep(values, formatter, na_rep)
1553
1554 if self.fixed_width:
1555 if is_complex:
1556 result = _trim_zeros_complex(values, self.decimal)
1557 else:
1558 result = _trim_zeros_float(values, self.decimal)
1559 return np.asarray(result, dtype="object")
1560
1561 return values
1562
1563 # There is a special default string when we are fixed-width
1564 # The default is otherwise to use str instead of a formatting string
1565 float_format: FloatFormatType | None
1566 if self.float_format is None:
1567 if self.fixed_width:
1568 if self.leading_space is True:
1569 fmt_str = "{value: .{digits:d}f}"
1570 else:
1571 fmt_str = "{value:.{digits:d}f}"
1572 float_format = partial(fmt_str.format, digits=self.digits)
1573 else:
1574 float_format = self.float_format
1575 else:
1576 float_format = lambda value: self.float_format % value
1577
1578 formatted_values = format_values_with(float_format)
1579
1580 if not self.fixed_width:
1581 return formatted_values
1582
1583 # we need do convert to engineering format if some values are too small
1584 # and would appear as 0, or if some values are too big and take too
1585 # much space
1586
1587 if len(formatted_values) > 0:
1588 maxlen = max(len(x) for x in formatted_values)
1589 too_long = maxlen > self.digits + 6
1590 else:
1591 too_long = False
1592
1593 with np.errstate(invalid="ignore"):
1594 abs_vals = np.abs(self.values)
1595 # this is pretty arbitrary for now
1596 # large values: more that 8 characters including decimal symbol
1597 # and first digit, hence > 1e6
1598 has_large_values = (abs_vals > 1e6).any()
1599 has_small_values = (
1600 (abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)
1601 ).any()
1602
1603 if has_small_values or (too_long and has_large_values):
1604 if self.leading_space is True:
1605 fmt_str = "{value: .{digits:d}e}"
1606 else:
1607 fmt_str = "{value:.{digits:d}e}"
1608 float_format = partial(fmt_str.format, digits=self.digits)
1609 formatted_values = format_values_with(float_format)
1610
1611 return formatted_values
1612
1613 def _format_strings(self) -> list[str]:
1614 return list(self.get_result_as_array())
1615
1616
1617class IntArrayFormatter(GenericArrayFormatter):
1618 def _format_strings(self) -> list[str]:
1619 if self.leading_space is False:
1620 formatter_str = lambda x: f"{x:d}".format(x=x)
1621 else:
1622 formatter_str = lambda x: f"{x: d}".format(x=x)
1623 formatter = self.formatter or formatter_str
1624 fmt_values = [formatter(x) for x in self.values]
1625 return fmt_values
1626
1627
1628class Datetime64Formatter(GenericArrayFormatter):
1629 def __init__(
1630 self,
1631 values: np.ndarray | Series | DatetimeIndex | DatetimeArray,
1632 nat_rep: str = "NaT",
1633 date_format: None = None,
1634 **kwargs,
1635 ) -> None:
1636 super().__init__(values, **kwargs)
1637 self.nat_rep = nat_rep
1638 self.date_format = date_format
1639
1640 def _format_strings(self) -> list[str]:
1641 """we by definition have DO NOT have a TZ"""
1642 values = self.values
1643
1644 if not isinstance(values, DatetimeIndex):
1645 values = DatetimeIndex(values)
1646
1647 if self.formatter is not None and callable(self.formatter):
1648 return [self.formatter(x) for x in values]
1649
1650 fmt_values = values._data._format_native_types(
1651 na_rep=self.nat_rep, date_format=self.date_format
1652 )
1653 return fmt_values.tolist()
1654
1655
1656class ExtensionArrayFormatter(GenericArrayFormatter):
1657 def _format_strings(self) -> list[str]:
1658 values = extract_array(self.values, extract_numpy=True)
1659
1660 formatter = self.formatter
1661 fallback_formatter = None
1662 if formatter is None:
1663 fallback_formatter = values._formatter(boxed=True)
1664
1665 if isinstance(values, Categorical):
1666 # Categorical is special for now, so that we can preserve tzinfo
1667 array = values._internal_get_values()
1668 else:
1669 array = np.asarray(values)
1670
1671 fmt_values = format_array(
1672 array,
1673 formatter,
1674 float_format=self.float_format,
1675 na_rep=self.na_rep,
1676 digits=self.digits,
1677 space=self.space,
1678 justify=self.justify,
1679 decimal=self.decimal,
1680 leading_space=self.leading_space,
1681 quoting=self.quoting,
1682 fallback_formatter=fallback_formatter,
1683 )
1684 return fmt_values
1685
1686
1687def format_percentiles(
1688 percentiles: (np.ndarray | Sequence[float]),
1689) -> list[str]:
1690 """
1691 Outputs rounded and formatted percentiles.
1692
1693 Parameters
1694 ----------
1695 percentiles : list-like, containing floats from interval [0,1]
1696
1697 Returns
1698 -------
1699 formatted : list of strings
1700
1701 Notes
1702 -----
1703 Rounding precision is chosen so that: (1) if any two elements of
1704 ``percentiles`` differ, they remain different after rounding
1705 (2) no entry is *rounded* to 0% or 100%.
1706 Any non-integer is always rounded to at least 1 decimal place.
1707
1708 Examples
1709 --------
1710 Keeps all entries different after rounding:
1711
1712 >>> format_percentiles([0.01999, 0.02001, 0.5, 0.666666, 0.9999])
1713 ['1.999%', '2.001%', '50%', '66.667%', '99.99%']
1714
1715 No element is rounded to 0% or 100% (unless already equal to it).
1716 Duplicates are allowed:
1717
1718 >>> format_percentiles([0, 0.5, 0.02001, 0.5, 0.666666, 0.9999])
1719 ['0%', '50%', '2.0%', '50%', '66.67%', '99.99%']
1720 """
1721 percentiles = np.asarray(percentiles)
1722
1723 # It checks for np.NaN as well
1724 with np.errstate(invalid="ignore"):
1725 if (
1726 not is_numeric_dtype(percentiles)
1727 or not np.all(percentiles >= 0)
1728 or not np.all(percentiles <= 1)
1729 ):
1730 raise ValueError("percentiles should all be in the interval [0,1]")
1731
1732 percentiles = 100 * percentiles
1733 percentiles_round_type = percentiles.round().astype(int)
1734
1735 int_idx = np.isclose(percentiles_round_type, percentiles)
1736
1737 if np.all(int_idx):
1738 out = percentiles_round_type.astype(str)
1739 return [i + "%" for i in out]
1740
1741 unique_pcts = np.unique(percentiles)
1742 to_begin = unique_pcts[0] if unique_pcts[0] > 0 else None
1743 to_end = 100 - unique_pcts[-1] if unique_pcts[-1] < 100 else None
1744
1745 # Least precision that keeps percentiles unique after rounding
1746 prec = -np.floor(
1747 np.log10(np.min(np.ediff1d(unique_pcts, to_begin=to_begin, to_end=to_end)))
1748 ).astype(int)
1749 prec = max(1, prec)
1750 out = np.empty_like(percentiles, dtype=object)
1751 out[int_idx] = percentiles[int_idx].round().astype(int).astype(str)
1752
1753 out[~int_idx] = percentiles[~int_idx].round(prec).astype(str)
1754 return [i + "%" for i in out]
1755
1756
1757def is_dates_only(values: np.ndarray | DatetimeArray | Index | DatetimeIndex) -> bool:
1758 # return a boolean if we are only dates (and don't have a timezone)
1759 if not isinstance(values, Index):
1760 values = values.ravel()
1761
1762 if not isinstance(values, (DatetimeArray, DatetimeIndex)):
1763 values = DatetimeIndex(values)
1764
1765 if values.tz is not None:
1766 return False
1767
1768 values_int = values.asi8
1769 consider_values = values_int != iNaT
1770 # error: Argument 1 to "py_get_unit_from_dtype" has incompatible type
1771 # "Union[dtype[Any], ExtensionDtype]"; expected "dtype[Any]"
1772 reso = get_unit_from_dtype(values.dtype) # type: ignore[arg-type]
1773 ppd = periods_per_day(reso)
1774
1775 # TODO: can we reuse is_date_array_normalized? would need a skipna kwd
1776 even_days = np.logical_and(consider_values, values_int % ppd != 0).sum() == 0
1777 if even_days:
1778 return True
1779 return False
1780
1781
1782def _format_datetime64(x: NaTType | Timestamp, nat_rep: str = "NaT") -> str:
1783 if x is NaT:
1784 return nat_rep
1785
1786 # Timestamp.__str__ falls back to datetime.datetime.__str__ = isoformat(sep=' ')
1787 # so it already uses string formatting rather than strftime (faster).
1788 return str(x)
1789
1790
1791def _format_datetime64_dateonly(
1792 x: NaTType | Timestamp,
1793 nat_rep: str = "NaT",
1794 date_format: str | None = None,
1795) -> str:
1796 if isinstance(x, NaTType):
1797 return nat_rep
1798
1799 if date_format:
1800 return x.strftime(date_format)
1801 else:
1802 # Timestamp._date_repr relies on string formatting (faster than strftime)
1803 return x._date_repr
1804
1805
1806def get_format_datetime64(
1807 is_dates_only_: bool, nat_rep: str = "NaT", date_format: str | None = None
1808) -> Callable:
1809 """Return a formatter callable taking a datetime64 as input and providing
1810 a string as output"""
1811
1812 if is_dates_only_:
1813 return lambda x: _format_datetime64_dateonly(
1814 x, nat_rep=nat_rep, date_format=date_format
1815 )
1816 else:
1817 return lambda x: _format_datetime64(x, nat_rep=nat_rep)
1818
1819
1820def get_format_datetime64_from_values(
1821 values: np.ndarray | DatetimeArray | DatetimeIndex, date_format: str | None
1822) -> str | None:
1823 """given values and a date_format, return a string format"""
1824 if isinstance(values, np.ndarray) and values.ndim > 1:
1825 # We don't actually care about the order of values, and DatetimeIndex
1826 # only accepts 1D values
1827 values = values.ravel()
1828
1829 ido = is_dates_only(values)
1830 if ido:
1831 # Only dates and no timezone: provide a default format
1832 return date_format or "%Y-%m-%d"
1833 return date_format
1834
1835
1836class Datetime64TZFormatter(Datetime64Formatter):
1837 def _format_strings(self) -> list[str]:
1838 """we by definition have a TZ"""
1839 values = self.values.astype(object)
1840 ido = is_dates_only(values)
1841 formatter = self.formatter or get_format_datetime64(
1842 ido, date_format=self.date_format
1843 )
1844 fmt_values = [formatter(x) for x in values]
1845
1846 return fmt_values
1847
1848
1849class Timedelta64Formatter(GenericArrayFormatter):
1850 def __init__(
1851 self,
1852 values: np.ndarray | TimedeltaIndex,
1853 nat_rep: str = "NaT",
1854 box: bool = False,
1855 **kwargs,
1856 ) -> None:
1857 super().__init__(values, **kwargs)
1858 self.nat_rep = nat_rep
1859 self.box = box
1860
1861 def _format_strings(self) -> list[str]:
1862 formatter = self.formatter or get_format_timedelta64(
1863 self.values, nat_rep=self.nat_rep, box=self.box
1864 )
1865 return [formatter(x) for x in self.values]
1866
1867
1868def get_format_timedelta64(
1869 values: np.ndarray | TimedeltaIndex | TimedeltaArray,
1870 nat_rep: str | float = "NaT",
1871 box: bool = False,
1872) -> Callable:
1873 """
1874 Return a formatter function for a range of timedeltas.
1875 These will all have the same format argument
1876
1877 If box, then show the return in quotes
1878 """
1879 values_int = values.view(np.int64)
1880
1881 consider_values = values_int != iNaT
1882
1883 one_day_nanos = 86400 * 10**9
1884 # error: Unsupported operand types for % ("ExtensionArray" and "int")
1885 not_midnight = values_int % one_day_nanos != 0 # type: ignore[operator]
1886 # error: Argument 1 to "__call__" of "ufunc" has incompatible type
1887 # "Union[Any, ExtensionArray, ndarray]"; expected
1888 # "Union[Union[int, float, complex, str, bytes, generic],
1889 # Sequence[Union[int, float, complex, str, bytes, generic]],
1890 # Sequence[Sequence[Any]], _SupportsArray]"
1891 both = np.logical_and(consider_values, not_midnight) # type: ignore[arg-type]
1892 even_days = both.sum() == 0
1893
1894 if even_days:
1895 format = None
1896 else:
1897 format = "long"
1898
1899 def _formatter(x):
1900 if x is None or (is_scalar(x) and isna(x)):
1901 return nat_rep
1902
1903 if not isinstance(x, Timedelta):
1904 x = Timedelta(x)
1905
1906 # Timedelta._repr_base uses string formatting (faster than strftime)
1907 result = x._repr_base(format=format)
1908 if box:
1909 result = f"'{result}'"
1910 return result
1911
1912 return _formatter
1913
1914
1915def _make_fixed_width(
1916 strings: list[str],
1917 justify: str = "right",
1918 minimum: int | None = None,
1919 adj: TextAdjustment | None = None,
1920) -> list[str]:
1921 if len(strings) == 0 or justify == "all":
1922 return strings
1923
1924 if adj is None:
1925 adjustment = get_adjustment()
1926 else:
1927 adjustment = adj
1928
1929 max_len = max(adjustment.len(x) for x in strings)
1930
1931 if minimum is not None:
1932 max_len = max(minimum, max_len)
1933
1934 conf_max = get_option("display.max_colwidth")
1935 if conf_max is not None and max_len > conf_max:
1936 max_len = conf_max
1937
1938 def just(x: str) -> str:
1939 if conf_max is not None:
1940 if (conf_max > 3) & (adjustment.len(x) > max_len):
1941 x = x[: max_len - 3] + "..."
1942 return x
1943
1944 strings = [just(x) for x in strings]
1945 result = adjustment.justify(strings, max_len, mode=justify)
1946 return result
1947
1948
1949def _trim_zeros_complex(str_complexes: np.ndarray, decimal: str = ".") -> list[str]:
1950 """
1951 Separates the real and imaginary parts from the complex number, and
1952 executes the _trim_zeros_float method on each of those.
1953 """
1954 trimmed = [
1955 "".join(_trim_zeros_float(re.split(r"([j+-])", x), decimal))
1956 for x in str_complexes
1957 ]
1958
1959 # pad strings to the length of the longest trimmed string for alignment
1960 lengths = [len(s) for s in trimmed]
1961 max_length = max(lengths)
1962 padded = [
1963 s[: -((k - 1) // 2 + 1)] # real part
1964 + (max_length - k) // 2 * "0"
1965 + s[-((k - 1) // 2 + 1) : -((k - 1) // 2)] # + / -
1966 + s[-((k - 1) // 2) : -1] # imaginary part
1967 + (max_length - k) // 2 * "0"
1968 + s[-1]
1969 for s, k in zip(trimmed, lengths)
1970 ]
1971 return padded
1972
1973
1974def _trim_zeros_single_float(str_float: str) -> str:
1975 """
1976 Trims trailing zeros after a decimal point,
1977 leaving just one if necessary.
1978 """
1979 str_float = str_float.rstrip("0")
1980 if str_float.endswith("."):
1981 str_float += "0"
1982
1983 return str_float
1984
1985
1986def _trim_zeros_float(
1987 str_floats: np.ndarray | list[str], decimal: str = "."
1988) -> list[str]:
1989 """
1990 Trims the maximum number of trailing zeros equally from
1991 all numbers containing decimals, leaving just one if
1992 necessary.
1993 """
1994 trimmed = str_floats
1995 number_regex = re.compile(rf"^\s*[\+-]?[0-9]+\{decimal}[0-9]*$")
1996
1997 def is_number_with_decimal(x) -> bool:
1998 return re.match(number_regex, x) is not None
1999
2000 def should_trim(values: np.ndarray | list[str]) -> bool:
2001 """
2002 Determine if an array of strings should be trimmed.
2003
2004 Returns True if all numbers containing decimals (defined by the
2005 above regular expression) within the array end in a zero, otherwise
2006 returns False.
2007 """
2008 numbers = [x for x in values if is_number_with_decimal(x)]
2009 return len(numbers) > 0 and all(x.endswith("0") for x in numbers)
2010
2011 while should_trim(trimmed):
2012 trimmed = [x[:-1] if is_number_with_decimal(x) else x for x in trimmed]
2013
2014 # leave one 0 after the decimal points if need be.
2015 result = [
2016 x + "0" if is_number_with_decimal(x) and x.endswith(decimal) else x
2017 for x in trimmed
2018 ]
2019 return result
2020
2021
2022def _has_names(index: Index) -> bool:
2023 if isinstance(index, MultiIndex):
2024 return com.any_not_none(*index.names)
2025 else:
2026 return index.name is not None
2027
2028
2029class EngFormatter:
2030 """
2031 Formats float values according to engineering format.
2032
2033 Based on matplotlib.ticker.EngFormatter
2034 """
2035
2036 # The SI engineering prefixes
2037 ENG_PREFIXES = {
2038 -24: "y",
2039 -21: "z",
2040 -18: "a",
2041 -15: "f",
2042 -12: "p",
2043 -9: "n",
2044 -6: "u",
2045 -3: "m",
2046 0: "",
2047 3: "k",
2048 6: "M",
2049 9: "G",
2050 12: "T",
2051 15: "P",
2052 18: "E",
2053 21: "Z",
2054 24: "Y",
2055 }
2056
2057 def __init__(
2058 self, accuracy: int | None = None, use_eng_prefix: bool = False
2059 ) -> None:
2060 self.accuracy = accuracy
2061 self.use_eng_prefix = use_eng_prefix
2062
2063 def __call__(self, num: float) -> str:
2064 """
2065 Formats a number in engineering notation, appending a letter
2066 representing the power of 1000 of the original number. Some examples:
2067 >>> format_eng = EngFormatter(accuracy=0, use_eng_prefix=True)
2068 >>> format_eng(0)
2069 ' 0'
2070 >>> format_eng = EngFormatter(accuracy=1, use_eng_prefix=True)
2071 >>> format_eng(1_000_000)
2072 ' 1.0M'
2073 >>> format_eng = EngFormatter(accuracy=2, use_eng_prefix=False)
2074 >>> format_eng("-1e-6")
2075 '-1.00E-06'
2076
2077 @param num: the value to represent
2078 @type num: either a numeric value or a string that can be converted to
2079 a numeric value (as per decimal.Decimal constructor)
2080
2081 @return: engineering formatted string
2082 """
2083 dnum = Decimal(str(num))
2084
2085 if Decimal.is_nan(dnum):
2086 return "NaN"
2087
2088 if Decimal.is_infinite(dnum):
2089 return "inf"
2090
2091 sign = 1
2092
2093 if dnum < 0: # pragma: no cover
2094 sign = -1
2095 dnum = -dnum
2096
2097 if dnum != 0:
2098 pow10 = Decimal(int(math.floor(dnum.log10() / 3) * 3))
2099 else:
2100 pow10 = Decimal(0)
2101
2102 pow10 = pow10.min(max(self.ENG_PREFIXES.keys()))
2103 pow10 = pow10.max(min(self.ENG_PREFIXES.keys()))
2104 int_pow10 = int(pow10)
2105
2106 if self.use_eng_prefix:
2107 prefix = self.ENG_PREFIXES[int_pow10]
2108 else:
2109 if int_pow10 < 0:
2110 prefix = f"E-{-int_pow10:02d}"
2111 else:
2112 prefix = f"E+{int_pow10:02d}"
2113
2114 mant = sign * dnum / (10**pow10)
2115
2116 if self.accuracy is None: # pragma: no cover
2117 format_str = "{mant: g}{prefix}"
2118 else:
2119 format_str = f"{{mant: .{self.accuracy:d}f}}{{prefix}}"
2120
2121 formatted = format_str.format(mant=mant, prefix=prefix)
2122
2123 return formatted
2124
2125
2126def set_eng_float_format(accuracy: int = 3, use_eng_prefix: bool = False) -> None:
2127 """
2128 Format float representation in DataFrame with SI notation.
2129
2130 Parameters
2131 ----------
2132 accuracy : int, default 3
2133 Number of decimal digits after the floating point.
2134 use_eng_prefix : bool, default False
2135 Whether to represent a value with SI prefixes.
2136
2137 Returns
2138 -------
2139 None
2140
2141 Examples
2142 --------
2143 >>> df = pd.DataFrame([1e-9, 1e-3, 1, 1e3, 1e6])
2144 >>> df
2145 0
2146 0 1.000000e-09
2147 1 1.000000e-03
2148 2 1.000000e+00
2149 3 1.000000e+03
2150 4 1.000000e+06
2151
2152 >>> pd.set_eng_float_format(accuracy=1)
2153 >>> df
2154 0
2155 0 1.0E-09
2156 1 1.0E-03
2157 2 1.0E+00
2158 3 1.0E+03
2159 4 1.0E+06
2160
2161 >>> pd.set_eng_float_format(use_eng_prefix=True)
2162 >>> df
2163 0
2164 0 1.000n
2165 1 1.000m
2166 2 1.000
2167 3 1.000k
2168 4 1.000M
2169
2170 >>> pd.set_eng_float_format(accuracy=1, use_eng_prefix=True)
2171 >>> df
2172 0
2173 0 1.0n
2174 1 1.0m
2175 2 1.0
2176 3 1.0k
2177 4 1.0M
2178
2179 >>> pd.set_option("display.float_format", None) # unset option
2180 """
2181 set_option("display.float_format", EngFormatter(accuracy, use_eng_prefix))
2182
2183
2184def get_level_lengths(
2185 levels: Any, sentinel: bool | object | str = ""
2186) -> list[dict[int, int]]:
2187 """
2188 For each index in each level the function returns lengths of indexes.
2189
2190 Parameters
2191 ----------
2192 levels : list of lists
2193 List of values on for level.
2194 sentinel : string, optional
2195 Value which states that no new index starts on there.
2196
2197 Returns
2198 -------
2199 Returns list of maps. For each level returns map of indexes (key is index
2200 in row and value is length of index).
2201 """
2202 if len(levels) == 0:
2203 return []
2204
2205 control = [True] * len(levels[0])
2206
2207 result = []
2208 for level in levels:
2209 last_index = 0
2210
2211 lengths = {}
2212 for i, key in enumerate(level):
2213 if control[i] and key == sentinel:
2214 pass
2215 else:
2216 control[i] = False
2217 lengths[last_index] = i - last_index
2218 last_index = i
2219
2220 lengths[last_index] = len(level) - last_index
2221
2222 result.append(lengths)
2223
2224 return result
2225
2226
2227def buffer_put_lines(buf: WriteBuffer[str], lines: list[str]) -> None:
2228 """
2229 Appends lines to a buffer.
2230
2231 Parameters
2232 ----------
2233 buf
2234 The buffer to write to
2235 lines
2236 The lines to append.
2237 """
2238 if any(isinstance(x, str) for x in lines):
2239 lines = [str(x) for x in lines]
2240 buf.write("\n".join(lines))