1from __future__ import annotations
2
3from abc import (
4 ABC,
5 abstractmethod,
6)
7import sys
8from textwrap import dedent
9from typing import (
10 TYPE_CHECKING,
11 Iterable,
12 Iterator,
13 Mapping,
14 Sequence,
15)
16
17from pandas._config import get_option
18
19from pandas._typing import (
20 Dtype,
21 WriteBuffer,
22)
23
24from pandas.io.formats import format as fmt
25from pandas.io.formats.printing import pprint_thing
26
27if TYPE_CHECKING:
28 from pandas import (
29 DataFrame,
30 Index,
31 Series,
32 )
33
34
35frame_max_cols_sub = dedent(
36 """\
37 max_cols : int, optional
38 When to switch from the verbose to the truncated output. If the
39 DataFrame has more than `max_cols` columns, the truncated output
40 is used. By default, the setting in
41 ``pandas.options.display.max_info_columns`` is used."""
42)
43
44
45show_counts_sub = dedent(
46 """\
47 show_counts : bool, optional
48 Whether to show the non-null counts. By default, this is shown
49 only if the DataFrame is smaller than
50 ``pandas.options.display.max_info_rows`` and
51 ``pandas.options.display.max_info_columns``. A value of True always
52 shows the counts, and False never shows the counts."""
53)
54
55
56frame_examples_sub = dedent(
57 """\
58 >>> int_values = [1, 2, 3, 4, 5]
59 >>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon']
60 >>> float_values = [0.0, 0.25, 0.5, 0.75, 1.0]
61 >>> df = pd.DataFrame({"int_col": int_values, "text_col": text_values,
62 ... "float_col": float_values})
63 >>> df
64 int_col text_col float_col
65 0 1 alpha 0.00
66 1 2 beta 0.25
67 2 3 gamma 0.50
68 3 4 delta 0.75
69 4 5 epsilon 1.00
70
71 Prints information of all columns:
72
73 >>> df.info(verbose=True)
74 <class 'pandas.core.frame.DataFrame'>
75 RangeIndex: 5 entries, 0 to 4
76 Data columns (total 3 columns):
77 # Column Non-Null Count Dtype
78 --- ------ -------------- -----
79 0 int_col 5 non-null int64
80 1 text_col 5 non-null object
81 2 float_col 5 non-null float64
82 dtypes: float64(1), int64(1), object(1)
83 memory usage: 248.0+ bytes
84
85 Prints a summary of columns count and its dtypes but not per column
86 information:
87
88 >>> df.info(verbose=False)
89 <class 'pandas.core.frame.DataFrame'>
90 RangeIndex: 5 entries, 0 to 4
91 Columns: 3 entries, int_col to float_col
92 dtypes: float64(1), int64(1), object(1)
93 memory usage: 248.0+ bytes
94
95 Pipe output of DataFrame.info to buffer instead of sys.stdout, get
96 buffer content and writes to a text file:
97
98 >>> import io
99 >>> buffer = io.StringIO()
100 >>> df.info(buf=buffer)
101 >>> s = buffer.getvalue()
102 >>> with open("df_info.txt", "w",
103 ... encoding="utf-8") as f: # doctest: +SKIP
104 ... f.write(s)
105 260
106
107 The `memory_usage` parameter allows deep introspection mode, specially
108 useful for big DataFrames and fine-tune memory optimization:
109
110 >>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6)
111 >>> df = pd.DataFrame({
112 ... 'column_1': np.random.choice(['a', 'b', 'c'], 10 ** 6),
113 ... 'column_2': np.random.choice(['a', 'b', 'c'], 10 ** 6),
114 ... 'column_3': np.random.choice(['a', 'b', 'c'], 10 ** 6)
115 ... })
116 >>> df.info()
117 <class 'pandas.core.frame.DataFrame'>
118 RangeIndex: 1000000 entries, 0 to 999999
119 Data columns (total 3 columns):
120 # Column Non-Null Count Dtype
121 --- ------ -------------- -----
122 0 column_1 1000000 non-null object
123 1 column_2 1000000 non-null object
124 2 column_3 1000000 non-null object
125 dtypes: object(3)
126 memory usage: 22.9+ MB
127
128 >>> df.info(memory_usage='deep')
129 <class 'pandas.core.frame.DataFrame'>
130 RangeIndex: 1000000 entries, 0 to 999999
131 Data columns (total 3 columns):
132 # Column Non-Null Count Dtype
133 --- ------ -------------- -----
134 0 column_1 1000000 non-null object
135 1 column_2 1000000 non-null object
136 2 column_3 1000000 non-null object
137 dtypes: object(3)
138 memory usage: 165.9 MB"""
139)
140
141
142frame_see_also_sub = dedent(
143 """\
144 DataFrame.describe: Generate descriptive statistics of DataFrame
145 columns.
146 DataFrame.memory_usage: Memory usage of DataFrame columns."""
147)
148
149
150frame_sub_kwargs = {
151 "klass": "DataFrame",
152 "type_sub": " and columns",
153 "max_cols_sub": frame_max_cols_sub,
154 "show_counts_sub": show_counts_sub,
155 "examples_sub": frame_examples_sub,
156 "see_also_sub": frame_see_also_sub,
157 "version_added_sub": "",
158}
159
160
161series_examples_sub = dedent(
162 """\
163 >>> int_values = [1, 2, 3, 4, 5]
164 >>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon']
165 >>> s = pd.Series(text_values, index=int_values)
166 >>> s.info()
167 <class 'pandas.core.series.Series'>
168 Index: 5 entries, 1 to 5
169 Series name: None
170 Non-Null Count Dtype
171 -------------- -----
172 5 non-null object
173 dtypes: object(1)
174 memory usage: 80.0+ bytes
175
176 Prints a summary excluding information about its values:
177
178 >>> s.info(verbose=False)
179 <class 'pandas.core.series.Series'>
180 Index: 5 entries, 1 to 5
181 dtypes: object(1)
182 memory usage: 80.0+ bytes
183
184 Pipe output of Series.info to buffer instead of sys.stdout, get
185 buffer content and writes to a text file:
186
187 >>> import io
188 >>> buffer = io.StringIO()
189 >>> s.info(buf=buffer)
190 >>> s = buffer.getvalue()
191 >>> with open("df_info.txt", "w",
192 ... encoding="utf-8") as f: # doctest: +SKIP
193 ... f.write(s)
194 260
195
196 The `memory_usage` parameter allows deep introspection mode, specially
197 useful for big Series and fine-tune memory optimization:
198
199 >>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6)
200 >>> s = pd.Series(np.random.choice(['a', 'b', 'c'], 10 ** 6))
201 >>> s.info()
202 <class 'pandas.core.series.Series'>
203 RangeIndex: 1000000 entries, 0 to 999999
204 Series name: None
205 Non-Null Count Dtype
206 -------------- -----
207 1000000 non-null object
208 dtypes: object(1)
209 memory usage: 7.6+ MB
210
211 >>> s.info(memory_usage='deep')
212 <class 'pandas.core.series.Series'>
213 RangeIndex: 1000000 entries, 0 to 999999
214 Series name: None
215 Non-Null Count Dtype
216 -------------- -----
217 1000000 non-null object
218 dtypes: object(1)
219 memory usage: 55.3 MB"""
220)
221
222
223series_see_also_sub = dedent(
224 """\
225 Series.describe: Generate descriptive statistics of Series.
226 Series.memory_usage: Memory usage of Series."""
227)
228
229
230series_sub_kwargs = {
231 "klass": "Series",
232 "type_sub": "",
233 "max_cols_sub": "",
234 "show_counts_sub": show_counts_sub,
235 "examples_sub": series_examples_sub,
236 "see_also_sub": series_see_also_sub,
237 "version_added_sub": "\n.. versionadded:: 1.4.0\n",
238}
239
240
241INFO_DOCSTRING = dedent(
242 """
243 Print a concise summary of a {klass}.
244
245 This method prints information about a {klass} including
246 the index dtype{type_sub}, non-null values and memory usage.
247 {version_added_sub}\
248
249 Parameters
250 ----------
251 verbose : bool, optional
252 Whether to print the full summary. By default, the setting in
253 ``pandas.options.display.max_info_columns`` is followed.
254 buf : writable buffer, defaults to sys.stdout
255 Where to send the output. By default, the output is printed to
256 sys.stdout. Pass a writable buffer if you need to further process
257 the output.
258 {max_cols_sub}
259 memory_usage : bool, str, optional
260 Specifies whether total memory usage of the {klass}
261 elements (including the index) should be displayed. By default,
262 this follows the ``pandas.options.display.memory_usage`` setting.
263
264 True always show memory usage. False never shows memory usage.
265 A value of 'deep' is equivalent to "True with deep introspection".
266 Memory usage is shown in human-readable units (base-2
267 representation). Without deep introspection a memory estimation is
268 made based in column dtype and number of rows assuming values
269 consume the same memory amount for corresponding dtypes. With deep
270 memory introspection, a real memory usage calculation is performed
271 at the cost of computational resources. See the
272 :ref:`Frequently Asked Questions <df-memory-usage>` for more
273 details.
274 {show_counts_sub}
275
276 Returns
277 -------
278 None
279 This method prints a summary of a {klass} and returns None.
280
281 See Also
282 --------
283 {see_also_sub}
284
285 Examples
286 --------
287 {examples_sub}
288 """
289)
290
291
292def _put_str(s: str | Dtype, space: int) -> str:
293 """
294 Make string of specified length, padding to the right if necessary.
295
296 Parameters
297 ----------
298 s : Union[str, Dtype]
299 String to be formatted.
300 space : int
301 Length to force string to be of.
302
303 Returns
304 -------
305 str
306 String coerced to given length.
307
308 Examples
309 --------
310 >>> pd.io.formats.info._put_str("panda", 6)
311 'panda '
312 >>> pd.io.formats.info._put_str("panda", 4)
313 'pand'
314 """
315 return str(s)[:space].ljust(space)
316
317
318def _sizeof_fmt(num: float, size_qualifier: str) -> str:
319 """
320 Return size in human readable format.
321
322 Parameters
323 ----------
324 num : int
325 Size in bytes.
326 size_qualifier : str
327 Either empty, or '+' (if lower bound).
328
329 Returns
330 -------
331 str
332 Size in human readable format.
333
334 Examples
335 --------
336 >>> _sizeof_fmt(23028, '')
337 '22.5 KB'
338
339 >>> _sizeof_fmt(23028, '+')
340 '22.5+ KB'
341 """
342 for x in ["bytes", "KB", "MB", "GB", "TB"]:
343 if num < 1024.0:
344 return f"{num:3.1f}{size_qualifier} {x}"
345 num /= 1024.0
346 return f"{num:3.1f}{size_qualifier} PB"
347
348
349def _initialize_memory_usage(
350 memory_usage: bool | str | None = None,
351) -> bool | str:
352 """Get memory usage based on inputs and display options."""
353 if memory_usage is None:
354 memory_usage = get_option("display.memory_usage")
355 return memory_usage
356
357
358class BaseInfo(ABC):
359 """
360 Base class for DataFrameInfo and SeriesInfo.
361
362 Parameters
363 ----------
364 data : DataFrame or Series
365 Either dataframe or series.
366 memory_usage : bool or str, optional
367 If "deep", introspect the data deeply by interrogating object dtypes
368 for system-level memory consumption, and include it in the returned
369 values.
370 """
371
372 data: DataFrame | Series
373 memory_usage: bool | str
374
375 @property
376 @abstractmethod
377 def dtypes(self) -> Iterable[Dtype]:
378 """
379 Dtypes.
380
381 Returns
382 -------
383 dtypes : sequence
384 Dtype of each of the DataFrame's columns (or one series column).
385 """
386
387 @property
388 @abstractmethod
389 def dtype_counts(self) -> Mapping[str, int]:
390 """Mapping dtype - number of counts."""
391
392 @property
393 @abstractmethod
394 def non_null_counts(self) -> Sequence[int]:
395 """Sequence of non-null counts for all columns or column (if series)."""
396
397 @property
398 @abstractmethod
399 def memory_usage_bytes(self) -> int:
400 """
401 Memory usage in bytes.
402
403 Returns
404 -------
405 memory_usage_bytes : int
406 Object's total memory usage in bytes.
407 """
408
409 @property
410 def memory_usage_string(self) -> str:
411 """Memory usage in a form of human readable string."""
412 return f"{_sizeof_fmt(self.memory_usage_bytes, self.size_qualifier)}\n"
413
414 @property
415 def size_qualifier(self) -> str:
416 size_qualifier = ""
417 if self.memory_usage:
418 if self.memory_usage != "deep":
419 # size_qualifier is just a best effort; not guaranteed to catch
420 # all cases (e.g., it misses categorical data even with object
421 # categories)
422 if (
423 "object" in self.dtype_counts
424 or self.data.index._is_memory_usage_qualified()
425 ):
426 size_qualifier = "+"
427 return size_qualifier
428
429 @abstractmethod
430 def render(
431 self,
432 *,
433 buf: WriteBuffer[str] | None,
434 max_cols: int | None,
435 verbose: bool | None,
436 show_counts: bool | None,
437 ) -> None:
438 pass
439
440
441class DataFrameInfo(BaseInfo):
442 """
443 Class storing dataframe-specific info.
444 """
445
446 def __init__(
447 self,
448 data: DataFrame,
449 memory_usage: bool | str | None = None,
450 ) -> None:
451 self.data: DataFrame = data
452 self.memory_usage = _initialize_memory_usage(memory_usage)
453
454 @property
455 def dtype_counts(self) -> Mapping[str, int]:
456 return _get_dataframe_dtype_counts(self.data)
457
458 @property
459 def dtypes(self) -> Iterable[Dtype]:
460 """
461 Dtypes.
462
463 Returns
464 -------
465 dtypes
466 Dtype of each of the DataFrame's columns.
467 """
468 return self.data.dtypes
469
470 @property
471 def ids(self) -> Index:
472 """
473 Column names.
474
475 Returns
476 -------
477 ids : Index
478 DataFrame's column names.
479 """
480 return self.data.columns
481
482 @property
483 def col_count(self) -> int:
484 """Number of columns to be summarized."""
485 return len(self.ids)
486
487 @property
488 def non_null_counts(self) -> Sequence[int]:
489 """Sequence of non-null counts for all columns or column (if series)."""
490 return self.data.count()
491
492 @property
493 def memory_usage_bytes(self) -> int:
494 deep = self.memory_usage == "deep"
495 return self.data.memory_usage(index=True, deep=deep).sum()
496
497 def render(
498 self,
499 *,
500 buf: WriteBuffer[str] | None,
501 max_cols: int | None,
502 verbose: bool | None,
503 show_counts: bool | None,
504 ) -> None:
505 printer = DataFrameInfoPrinter(
506 info=self,
507 max_cols=max_cols,
508 verbose=verbose,
509 show_counts=show_counts,
510 )
511 printer.to_buffer(buf)
512
513
514class SeriesInfo(BaseInfo):
515 """
516 Class storing series-specific info.
517 """
518
519 def __init__(
520 self,
521 data: Series,
522 memory_usage: bool | str | None = None,
523 ) -> None:
524 self.data: Series = data
525 self.memory_usage = _initialize_memory_usage(memory_usage)
526
527 def render(
528 self,
529 *,
530 buf: WriteBuffer[str] | None = None,
531 max_cols: int | None = None,
532 verbose: bool | None = None,
533 show_counts: bool | None = None,
534 ) -> None:
535 if max_cols is not None:
536 raise ValueError(
537 "Argument `max_cols` can only be passed "
538 "in DataFrame.info, not Series.info"
539 )
540 printer = SeriesInfoPrinter(
541 info=self,
542 verbose=verbose,
543 show_counts=show_counts,
544 )
545 printer.to_buffer(buf)
546
547 @property
548 def non_null_counts(self) -> Sequence[int]:
549 return [self.data.count()]
550
551 @property
552 def dtypes(self) -> Iterable[Dtype]:
553 return [self.data.dtypes]
554
555 @property
556 def dtype_counts(self) -> Mapping[str, int]:
557 from pandas.core.frame import DataFrame
558
559 return _get_dataframe_dtype_counts(DataFrame(self.data))
560
561 @property
562 def memory_usage_bytes(self) -> int:
563 """Memory usage in bytes.
564
565 Returns
566 -------
567 memory_usage_bytes : int
568 Object's total memory usage in bytes.
569 """
570 deep = self.memory_usage == "deep"
571 return self.data.memory_usage(index=True, deep=deep)
572
573
574class InfoPrinterAbstract:
575 """
576 Class for printing dataframe or series info.
577 """
578
579 def to_buffer(self, buf: WriteBuffer[str] | None = None) -> None:
580 """Save dataframe info into buffer."""
581 table_builder = self._create_table_builder()
582 lines = table_builder.get_lines()
583 if buf is None: # pragma: no cover
584 buf = sys.stdout
585 fmt.buffer_put_lines(buf, lines)
586
587 @abstractmethod
588 def _create_table_builder(self) -> TableBuilderAbstract:
589 """Create instance of table builder."""
590
591
592class DataFrameInfoPrinter(InfoPrinterAbstract):
593 """
594 Class for printing dataframe info.
595
596 Parameters
597 ----------
598 info : DataFrameInfo
599 Instance of DataFrameInfo.
600 max_cols : int, optional
601 When to switch from the verbose to the truncated output.
602 verbose : bool, optional
603 Whether to print the full summary.
604 show_counts : bool, optional
605 Whether to show the non-null counts.
606 """
607
608 def __init__(
609 self,
610 info: DataFrameInfo,
611 max_cols: int | None = None,
612 verbose: bool | None = None,
613 show_counts: bool | None = None,
614 ) -> None:
615 self.info = info
616 self.data = info.data
617 self.verbose = verbose
618 self.max_cols = self._initialize_max_cols(max_cols)
619 self.show_counts = self._initialize_show_counts(show_counts)
620
621 @property
622 def max_rows(self) -> int:
623 """Maximum info rows to be displayed."""
624 return get_option("display.max_info_rows", len(self.data) + 1)
625
626 @property
627 def exceeds_info_cols(self) -> bool:
628 """Check if number of columns to be summarized does not exceed maximum."""
629 return bool(self.col_count > self.max_cols)
630
631 @property
632 def exceeds_info_rows(self) -> bool:
633 """Check if number of rows to be summarized does not exceed maximum."""
634 return bool(len(self.data) > self.max_rows)
635
636 @property
637 def col_count(self) -> int:
638 """Number of columns to be summarized."""
639 return self.info.col_count
640
641 def _initialize_max_cols(self, max_cols: int | None) -> int:
642 if max_cols is None:
643 return get_option("display.max_info_columns", self.col_count + 1)
644 return max_cols
645
646 def _initialize_show_counts(self, show_counts: bool | None) -> bool:
647 if show_counts is None:
648 return bool(not self.exceeds_info_cols and not self.exceeds_info_rows)
649 else:
650 return show_counts
651
652 def _create_table_builder(self) -> DataFrameTableBuilder:
653 """
654 Create instance of table builder based on verbosity and display settings.
655 """
656 if self.verbose:
657 return DataFrameTableBuilderVerbose(
658 info=self.info,
659 with_counts=self.show_counts,
660 )
661 elif self.verbose is False: # specifically set to False, not necessarily None
662 return DataFrameTableBuilderNonVerbose(info=self.info)
663 else:
664 if self.exceeds_info_cols:
665 return DataFrameTableBuilderNonVerbose(info=self.info)
666 else:
667 return DataFrameTableBuilderVerbose(
668 info=self.info,
669 with_counts=self.show_counts,
670 )
671
672
673class SeriesInfoPrinter(InfoPrinterAbstract):
674 """Class for printing series info.
675
676 Parameters
677 ----------
678 info : SeriesInfo
679 Instance of SeriesInfo.
680 verbose : bool, optional
681 Whether to print the full summary.
682 show_counts : bool, optional
683 Whether to show the non-null counts.
684 """
685
686 def __init__(
687 self,
688 info: SeriesInfo,
689 verbose: bool | None = None,
690 show_counts: bool | None = None,
691 ) -> None:
692 self.info = info
693 self.data = info.data
694 self.verbose = verbose
695 self.show_counts = self._initialize_show_counts(show_counts)
696
697 def _create_table_builder(self) -> SeriesTableBuilder:
698 """
699 Create instance of table builder based on verbosity.
700 """
701 if self.verbose or self.verbose is None:
702 return SeriesTableBuilderVerbose(
703 info=self.info,
704 with_counts=self.show_counts,
705 )
706 else:
707 return SeriesTableBuilderNonVerbose(info=self.info)
708
709 def _initialize_show_counts(self, show_counts: bool | None) -> bool:
710 if show_counts is None:
711 return True
712 else:
713 return show_counts
714
715
716class TableBuilderAbstract(ABC):
717 """
718 Abstract builder for info table.
719 """
720
721 _lines: list[str]
722 info: BaseInfo
723
724 @abstractmethod
725 def get_lines(self) -> list[str]:
726 """Product in a form of list of lines (strings)."""
727
728 @property
729 def data(self) -> DataFrame | Series:
730 return self.info.data
731
732 @property
733 def dtypes(self) -> Iterable[Dtype]:
734 """Dtypes of each of the DataFrame's columns."""
735 return self.info.dtypes
736
737 @property
738 def dtype_counts(self) -> Mapping[str, int]:
739 """Mapping dtype - number of counts."""
740 return self.info.dtype_counts
741
742 @property
743 def display_memory_usage(self) -> bool:
744 """Whether to display memory usage."""
745 return bool(self.info.memory_usage)
746
747 @property
748 def memory_usage_string(self) -> str:
749 """Memory usage string with proper size qualifier."""
750 return self.info.memory_usage_string
751
752 @property
753 def non_null_counts(self) -> Sequence[int]:
754 return self.info.non_null_counts
755
756 def add_object_type_line(self) -> None:
757 """Add line with string representation of dataframe to the table."""
758 self._lines.append(str(type(self.data)))
759
760 def add_index_range_line(self) -> None:
761 """Add line with range of indices to the table."""
762 self._lines.append(self.data.index._summary())
763
764 def add_dtypes_line(self) -> None:
765 """Add summary line with dtypes present in dataframe."""
766 collected_dtypes = [
767 f"{key}({val:d})" for key, val in sorted(self.dtype_counts.items())
768 ]
769 self._lines.append(f"dtypes: {', '.join(collected_dtypes)}")
770
771
772class DataFrameTableBuilder(TableBuilderAbstract):
773 """
774 Abstract builder for dataframe info table.
775
776 Parameters
777 ----------
778 info : DataFrameInfo.
779 Instance of DataFrameInfo.
780 """
781
782 def __init__(self, *, info: DataFrameInfo) -> None:
783 self.info: DataFrameInfo = info
784
785 def get_lines(self) -> list[str]:
786 self._lines = []
787 if self.col_count == 0:
788 self._fill_empty_info()
789 else:
790 self._fill_non_empty_info()
791 return self._lines
792
793 def _fill_empty_info(self) -> None:
794 """Add lines to the info table, pertaining to empty dataframe."""
795 self.add_object_type_line()
796 self.add_index_range_line()
797 self._lines.append(f"Empty {type(self.data).__name__}\n")
798
799 @abstractmethod
800 def _fill_non_empty_info(self) -> None:
801 """Add lines to the info table, pertaining to non-empty dataframe."""
802
803 @property
804 def data(self) -> DataFrame:
805 """DataFrame."""
806 return self.info.data
807
808 @property
809 def ids(self) -> Index:
810 """Dataframe columns."""
811 return self.info.ids
812
813 @property
814 def col_count(self) -> int:
815 """Number of dataframe columns to be summarized."""
816 return self.info.col_count
817
818 def add_memory_usage_line(self) -> None:
819 """Add line containing memory usage."""
820 self._lines.append(f"memory usage: {self.memory_usage_string}")
821
822
823class DataFrameTableBuilderNonVerbose(DataFrameTableBuilder):
824 """
825 Dataframe info table builder for non-verbose output.
826 """
827
828 def _fill_non_empty_info(self) -> None:
829 """Add lines to the info table, pertaining to non-empty dataframe."""
830 self.add_object_type_line()
831 self.add_index_range_line()
832 self.add_columns_summary_line()
833 self.add_dtypes_line()
834 if self.display_memory_usage:
835 self.add_memory_usage_line()
836
837 def add_columns_summary_line(self) -> None:
838 self._lines.append(self.ids._summary(name="Columns"))
839
840
841class TableBuilderVerboseMixin(TableBuilderAbstract):
842 """
843 Mixin for verbose info output.
844 """
845
846 SPACING: str = " " * 2
847 strrows: Sequence[Sequence[str]]
848 gross_column_widths: Sequence[int]
849 with_counts: bool
850
851 @property
852 @abstractmethod
853 def headers(self) -> Sequence[str]:
854 """Headers names of the columns in verbose table."""
855
856 @property
857 def header_column_widths(self) -> Sequence[int]:
858 """Widths of header columns (only titles)."""
859 return [len(col) for col in self.headers]
860
861 def _get_gross_column_widths(self) -> Sequence[int]:
862 """Get widths of columns containing both headers and actual content."""
863 body_column_widths = self._get_body_column_widths()
864 return [
865 max(*widths)
866 for widths in zip(self.header_column_widths, body_column_widths)
867 ]
868
869 def _get_body_column_widths(self) -> Sequence[int]:
870 """Get widths of table content columns."""
871 strcols: Sequence[Sequence[str]] = list(zip(*self.strrows))
872 return [max(len(x) for x in col) for col in strcols]
873
874 def _gen_rows(self) -> Iterator[Sequence[str]]:
875 """
876 Generator function yielding rows content.
877
878 Each element represents a row comprising a sequence of strings.
879 """
880 if self.with_counts:
881 return self._gen_rows_with_counts()
882 else:
883 return self._gen_rows_without_counts()
884
885 @abstractmethod
886 def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]:
887 """Iterator with string representation of body data with counts."""
888
889 @abstractmethod
890 def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]:
891 """Iterator with string representation of body data without counts."""
892
893 def add_header_line(self) -> None:
894 header_line = self.SPACING.join(
895 [
896 _put_str(header, col_width)
897 for header, col_width in zip(self.headers, self.gross_column_widths)
898 ]
899 )
900 self._lines.append(header_line)
901
902 def add_separator_line(self) -> None:
903 separator_line = self.SPACING.join(
904 [
905 _put_str("-" * header_colwidth, gross_colwidth)
906 for header_colwidth, gross_colwidth in zip(
907 self.header_column_widths, self.gross_column_widths
908 )
909 ]
910 )
911 self._lines.append(separator_line)
912
913 def add_body_lines(self) -> None:
914 for row in self.strrows:
915 body_line = self.SPACING.join(
916 [
917 _put_str(col, gross_colwidth)
918 for col, gross_colwidth in zip(row, self.gross_column_widths)
919 ]
920 )
921 self._lines.append(body_line)
922
923 def _gen_non_null_counts(self) -> Iterator[str]:
924 """Iterator with string representation of non-null counts."""
925 for count in self.non_null_counts:
926 yield f"{count} non-null"
927
928 def _gen_dtypes(self) -> Iterator[str]:
929 """Iterator with string representation of column dtypes."""
930 for dtype in self.dtypes:
931 yield pprint_thing(dtype)
932
933
934class DataFrameTableBuilderVerbose(DataFrameTableBuilder, TableBuilderVerboseMixin):
935 """
936 Dataframe info table builder for verbose output.
937 """
938
939 def __init__(
940 self,
941 *,
942 info: DataFrameInfo,
943 with_counts: bool,
944 ) -> None:
945 self.info = info
946 self.with_counts = with_counts
947 self.strrows: Sequence[Sequence[str]] = list(self._gen_rows())
948 self.gross_column_widths: Sequence[int] = self._get_gross_column_widths()
949
950 def _fill_non_empty_info(self) -> None:
951 """Add lines to the info table, pertaining to non-empty dataframe."""
952 self.add_object_type_line()
953 self.add_index_range_line()
954 self.add_columns_summary_line()
955 self.add_header_line()
956 self.add_separator_line()
957 self.add_body_lines()
958 self.add_dtypes_line()
959 if self.display_memory_usage:
960 self.add_memory_usage_line()
961
962 @property
963 def headers(self) -> Sequence[str]:
964 """Headers names of the columns in verbose table."""
965 if self.with_counts:
966 return [" # ", "Column", "Non-Null Count", "Dtype"]
967 return [" # ", "Column", "Dtype"]
968
969 def add_columns_summary_line(self) -> None:
970 self._lines.append(f"Data columns (total {self.col_count} columns):")
971
972 def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]:
973 """Iterator with string representation of body data without counts."""
974 yield from zip(
975 self._gen_line_numbers(),
976 self._gen_columns(),
977 self._gen_dtypes(),
978 )
979
980 def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]:
981 """Iterator with string representation of body data with counts."""
982 yield from zip(
983 self._gen_line_numbers(),
984 self._gen_columns(),
985 self._gen_non_null_counts(),
986 self._gen_dtypes(),
987 )
988
989 def _gen_line_numbers(self) -> Iterator[str]:
990 """Iterator with string representation of column numbers."""
991 for i, _ in enumerate(self.ids):
992 yield f" {i}"
993
994 def _gen_columns(self) -> Iterator[str]:
995 """Iterator with string representation of column names."""
996 for col in self.ids:
997 yield pprint_thing(col)
998
999
1000class SeriesTableBuilder(TableBuilderAbstract):
1001 """
1002 Abstract builder for series info table.
1003
1004 Parameters
1005 ----------
1006 info : SeriesInfo.
1007 Instance of SeriesInfo.
1008 """
1009
1010 def __init__(self, *, info: SeriesInfo) -> None:
1011 self.info: SeriesInfo = info
1012
1013 def get_lines(self) -> list[str]:
1014 self._lines = []
1015 self._fill_non_empty_info()
1016 return self._lines
1017
1018 @property
1019 def data(self) -> Series:
1020 """Series."""
1021 return self.info.data
1022
1023 def add_memory_usage_line(self) -> None:
1024 """Add line containing memory usage."""
1025 self._lines.append(f"memory usage: {self.memory_usage_string}")
1026
1027 @abstractmethod
1028 def _fill_non_empty_info(self) -> None:
1029 """Add lines to the info table, pertaining to non-empty series."""
1030
1031
1032class SeriesTableBuilderNonVerbose(SeriesTableBuilder):
1033 """
1034 Series info table builder for non-verbose output.
1035 """
1036
1037 def _fill_non_empty_info(self) -> None:
1038 """Add lines to the info table, pertaining to non-empty series."""
1039 self.add_object_type_line()
1040 self.add_index_range_line()
1041 self.add_dtypes_line()
1042 if self.display_memory_usage:
1043 self.add_memory_usage_line()
1044
1045
1046class SeriesTableBuilderVerbose(SeriesTableBuilder, TableBuilderVerboseMixin):
1047 """
1048 Series info table builder for verbose output.
1049 """
1050
1051 def __init__(
1052 self,
1053 *,
1054 info: SeriesInfo,
1055 with_counts: bool,
1056 ) -> None:
1057 self.info = info
1058 self.with_counts = with_counts
1059 self.strrows: Sequence[Sequence[str]] = list(self._gen_rows())
1060 self.gross_column_widths: Sequence[int] = self._get_gross_column_widths()
1061
1062 def _fill_non_empty_info(self) -> None:
1063 """Add lines to the info table, pertaining to non-empty series."""
1064 self.add_object_type_line()
1065 self.add_index_range_line()
1066 self.add_series_name_line()
1067 self.add_header_line()
1068 self.add_separator_line()
1069 self.add_body_lines()
1070 self.add_dtypes_line()
1071 if self.display_memory_usage:
1072 self.add_memory_usage_line()
1073
1074 def add_series_name_line(self) -> None:
1075 self._lines.append(f"Series name: {self.data.name}")
1076
1077 @property
1078 def headers(self) -> Sequence[str]:
1079 """Headers names of the columns in verbose table."""
1080 if self.with_counts:
1081 return ["Non-Null Count", "Dtype"]
1082 return ["Dtype"]
1083
1084 def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]:
1085 """Iterator with string representation of body data without counts."""
1086 yield from self._gen_dtypes()
1087
1088 def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]:
1089 """Iterator with string representation of body data with counts."""
1090 yield from zip(
1091 self._gen_non_null_counts(),
1092 self._gen_dtypes(),
1093 )
1094
1095
1096def _get_dataframe_dtype_counts(df: DataFrame) -> Mapping[str, int]:
1097 """
1098 Create mapping between datatypes and their number of occurrences.
1099 """
1100 # groupby dtype.name to collect e.g. Categorical columns
1101 return df.dtypes.value_counts().groupby(lambda x: x.name).sum()