Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/io/formats/info.py: 49%

1from __future__ import annotations

3from abc import (

4 ABC,

5 abstractmethod,

7import sys

8from textwrap import dedent

9from typing import TYPE_CHECKING

11from pandas._config import get_option

13from pandas.io.formats import format as fmt

14from pandas.io.formats.printing import pprint_thing

16if TYPE_CHECKING:

17 from collections.abc import (

18 Iterable,

19 Iterator,

20 Mapping,

21 Sequence,

22 )

24 from pandas._typing import (

25 Dtype,

26 WriteBuffer,

27 )

29 from pandas import (

30 DataFrame,

31 Index,

32 Series,

33 )

36frame_max_cols_sub = dedent(

37 """\

38 max_cols : int, optional

39 When to switch from the verbose to the truncated output. If the

40 DataFrame has more than `max_cols` columns, the truncated output

41 is used. By default, the setting in

42 ``pandas.options.display.max_info_columns`` is used."""

43)

46show_counts_sub = dedent(

47 """\

48 show_counts : bool, optional

49 Whether to show the non-null counts. By default, this is shown

50 only if the DataFrame is smaller than

51 ``pandas.options.display.max_info_rows`` and

52 ``pandas.options.display.max_info_columns``. A value of True always

53 shows the counts, and False never shows the counts."""

54)

57frame_examples_sub = dedent(

58 """\

59 >>> int_values = [1, 2, 3, 4, 5]

60 >>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon']

61 >>> float_values = [0.0, 0.25, 0.5, 0.75, 1.0]

62 >>> df = pd.DataFrame({"int_col": int_values, "text_col": text_values,

63 ... "float_col": float_values})

64 >>> df

65 int_col text_col float_col

66 0 1 alpha 0.00

67 1 2 beta 0.25

68 2 3 gamma 0.50

69 3 4 delta 0.75

70 4 5 epsilon 1.00

72 Prints information of all columns:

74 >>> df.info(verbose=True)

75 <class 'pandas.core.frame.DataFrame'>

76 RangeIndex: 5 entries, 0 to 4

77 Data columns (total 3 columns):

78 # Column Non-Null Count Dtype

79 --- ------ -------------- -----

80 0 int_col 5 non-null int64

81 1 text_col 5 non-null object

82 2 float_col 5 non-null float64

83 dtypes: float64(1), int64(1), object(1)

84 memory usage: 248.0+ bytes

86 Prints a summary of columns count and its dtypes but not per column

87 information:

89 >>> df.info(verbose=False)

90 <class 'pandas.core.frame.DataFrame'>

91 RangeIndex: 5 entries, 0 to 4

92 Columns: 3 entries, int_col to float_col

93 dtypes: float64(1), int64(1), object(1)

94 memory usage: 248.0+ bytes

96 Pipe output of DataFrame.info to buffer instead of sys.stdout, get

97 buffer content and writes to a text file:

99 >>> import io

100 >>> buffer = io.StringIO()

101 >>> df.info(buf=buffer)

102 >>> s = buffer.getvalue()

103 >>> with open("df_info.txt", "w",

104 ... encoding="utf-8") as f: # doctest: +SKIP

105 ... f.write(s)

106 260

107

108 The `memory_usage` parameter allows deep introspection mode, specially

109 useful for big DataFrames and fine-tune memory optimization:

110

111 >>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6)

112 >>> df = pd.DataFrame({

113 ... 'column_1': np.random.choice(['a', 'b', 'c'], 10 ** 6),

114 ... 'column_2': np.random.choice(['a', 'b', 'c'], 10 ** 6),

115 ... 'column_3': np.random.choice(['a', 'b', 'c'], 10 ** 6)

116 ... })

117 >>> df.info()

118 <class 'pandas.core.frame.DataFrame'>

119 RangeIndex: 1000000 entries, 0 to 999999

120 Data columns (total 3 columns):

121 # Column Non-Null Count Dtype

122 --- ------ -------------- -----

123 0 column_1 1000000 non-null object

124 1 column_2 1000000 non-null object

125 2 column_3 1000000 non-null object

126 dtypes: object(3)

127 memory usage: 22.9+ MB

128

129 >>> df.info(memory_usage='deep')

130 <class 'pandas.core.frame.DataFrame'>

131 RangeIndex: 1000000 entries, 0 to 999999

132 Data columns (total 3 columns):

133 # Column Non-Null Count Dtype

134 --- ------ -------------- -----

135 0 column_1 1000000 non-null object

136 1 column_2 1000000 non-null object

137 2 column_3 1000000 non-null object

138 dtypes: object(3)

139 memory usage: 165.9 MB"""

140)

141

142

143frame_see_also_sub = dedent(

144 """\

145 DataFrame.describe: Generate descriptive statistics of DataFrame

146 columns.

147 DataFrame.memory_usage: Memory usage of DataFrame columns."""

148)

149

150

151frame_sub_kwargs = {

152 "klass": "DataFrame",

153 "type_sub": " and columns",

154 "max_cols_sub": frame_max_cols_sub,

155 "show_counts_sub": show_counts_sub,

156 "examples_sub": frame_examples_sub,

157 "see_also_sub": frame_see_also_sub,

158 "version_added_sub": "",

159}

160

161

162series_examples_sub = dedent(

163 """\

164 >>> int_values = [1, 2, 3, 4, 5]

165 >>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon']

166 >>> s = pd.Series(text_values, index=int_values)

167 >>> s.info()

168 <class 'pandas.core.series.Series'>

169 Index: 5 entries, 1 to 5

170 Series name: None

171 Non-Null Count Dtype

172 -------------- -----

173 5 non-null object

174 dtypes: object(1)

175 memory usage: 80.0+ bytes

176

177 Prints a summary excluding information about its values:

178

179 >>> s.info(verbose=False)

180 <class 'pandas.core.series.Series'>

181 Index: 5 entries, 1 to 5

182 dtypes: object(1)

183 memory usage: 80.0+ bytes

184

185 Pipe output of Series.info to buffer instead of sys.stdout, get

186 buffer content and writes to a text file:

187

188 >>> import io

189 >>> buffer = io.StringIO()

190 >>> s.info(buf=buffer)

191 >>> s = buffer.getvalue()

192 >>> with open("df_info.txt", "w",

193 ... encoding="utf-8") as f: # doctest: +SKIP

194 ... f.write(s)

195 260

196

197 The `memory_usage` parameter allows deep introspection mode, specially

198 useful for big Series and fine-tune memory optimization:

199

200 >>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6)

201 >>> s = pd.Series(np.random.choice(['a', 'b', 'c'], 10 ** 6))

202 >>> s.info()

203 <class 'pandas.core.series.Series'>

204 RangeIndex: 1000000 entries, 0 to 999999

205 Series name: None

206 Non-Null Count Dtype

207 -------------- -----

208 1000000 non-null object

209 dtypes: object(1)

210 memory usage: 7.6+ MB

211

212 >>> s.info(memory_usage='deep')

213 <class 'pandas.core.series.Series'>

214 RangeIndex: 1000000 entries, 0 to 999999

215 Series name: None

216 Non-Null Count Dtype

217 -------------- -----

218 1000000 non-null object

219 dtypes: object(1)

220 memory usage: 55.3 MB"""

221)

222

223

224series_see_also_sub = dedent(

225 """\

226 Series.describe: Generate descriptive statistics of Series.

227 Series.memory_usage: Memory usage of Series."""

228)

229

230

231series_sub_kwargs = {

232 "klass": "Series",

233 "type_sub": "",

234 "max_cols_sub": "",

235 "show_counts_sub": show_counts_sub,

236 "examples_sub": series_examples_sub,

237 "see_also_sub": series_see_also_sub,

238 "version_added_sub": "\n.. versionadded:: 1.4.0\n",

239}

240

241

242INFO_DOCSTRING = dedent(

243 """

244 Print a concise summary of a {klass}.

245

246 This method prints information about a {klass} including

247 the index dtype{type_sub}, non-null values and memory usage.

248 {version_added_sub}\

249

250 Parameters

251 ----------

252 verbose : bool, optional

253 Whether to print the full summary. By default, the setting in

254 ``pandas.options.display.max_info_columns`` is followed.

255 buf : writable buffer, defaults to sys.stdout

256 Where to send the output. By default, the output is printed to

257 sys.stdout. Pass a writable buffer if you need to further process

258 the output.

259 {max_cols_sub}

260 memory_usage : bool, str, optional

261 Specifies whether total memory usage of the {klass}

262 elements (including the index) should be displayed. By default,

263 this follows the ``pandas.options.display.memory_usage`` setting.

264

265 True always show memory usage. False never shows memory usage.

266 A value of 'deep' is equivalent to "True with deep introspection".

267 Memory usage is shown in human-readable units (base-2

268 representation). Without deep introspection a memory estimation is

269 made based in column dtype and number of rows assuming values

270 consume the same memory amount for corresponding dtypes. With deep

271 memory introspection, a real memory usage calculation is performed

272 at the cost of computational resources. See the

273 :ref:`Frequently Asked Questions <df-memory-usage>` for more

274 details.

275 {show_counts_sub}

276

277 Returns

278 -------

279 None

280 This method prints a summary of a {klass} and returns None.

281

282 See Also

283 --------

284 {see_also_sub}

285

286 Examples

287 --------

288 {examples_sub}

289 """

290)

291

292

293def _put_str(s: str | Dtype, space: int) -> str:

294 """

295 Make string of specified length, padding to the right if necessary.

296

297 Parameters

298 ----------

299 s : Union[str, Dtype]

300 String to be formatted.

301 space : int

302 Length to force string to be of.

303

304 Returns

305 -------

306 str

307 String coerced to given length.

308

309 Examples

310 --------

311 >>> pd.io.formats.info._put_str("panda", 6)

312 'panda '

313 >>> pd.io.formats.info._put_str("panda", 4)

314 'pand'

315 """

316 return str(s)[:space].ljust(space)

317

318

319def _sizeof_fmt(num: float, size_qualifier: str) -> str:

320 """

321 Return size in human readable format.

322

323 Parameters

324 ----------

325 num : int

326 Size in bytes.

327 size_qualifier : str

328 Either empty, or '+' (if lower bound).

329

330 Returns

331 -------

332 str

333 Size in human readable format.

334

335 Examples

336 --------

337 >>> _sizeof_fmt(23028, '')

338 '22.5 KB'

339

340 >>> _sizeof_fmt(23028, '+')

341 '22.5+ KB'

342 """

343 for x in ["bytes", "KB", "MB", "GB", "TB"]:

344 if num < 1024.0:

345 return f"{num:3.1f}{size_qualifier} {x}"

346 num /= 1024.0

347 return f"{num:3.1f}{size_qualifier} PB"

348

349

350def _initialize_memory_usage(

351 memory_usage: bool | str | None = None,

352) -> bool | str:

353 """Get memory usage based on inputs and display options."""

354 if memory_usage is None:

355 memory_usage = get_option("display.memory_usage")

356 return memory_usage

357

358

359class _BaseInfo(ABC):

360 """

361 Base class for DataFrameInfo and SeriesInfo.

362

363 Parameters

364 ----------

365 data : DataFrame or Series

366 Either dataframe or series.

367 memory_usage : bool or str, optional

368 If "deep", introspect the data deeply by interrogating object dtypes

369 for system-level memory consumption, and include it in the returned

370 values.

371 """

372

373 data: DataFrame | Series

374 memory_usage: bool | str

375

376 @property

377 @abstractmethod

378 def dtypes(self) -> Iterable[Dtype]:

379 """

380 Dtypes.

381

382 Returns

383 -------

384 dtypes : sequence

385 Dtype of each of the DataFrame's columns (or one series column).

386 """

387

388 @property

389 @abstractmethod

390 def dtype_counts(self) -> Mapping[str, int]:

391 """Mapping dtype - number of counts."""

392

393 @property

394 @abstractmethod

395 def non_null_counts(self) -> Sequence[int]:

396 """Sequence of non-null counts for all columns or column (if series)."""

397

398 @property

399 @abstractmethod

400 def memory_usage_bytes(self) -> int:

401 """

402 Memory usage in bytes.

403

404 Returns

405 -------

406 memory_usage_bytes : int

407 Object's total memory usage in bytes.

408 """

409

410 @property

411 def memory_usage_string(self) -> str:

412 """Memory usage in a form of human readable string."""

413 return f"{_sizeof_fmt(self.memory_usage_bytes, self.size_qualifier)}\n"

414

415 @property

416 def size_qualifier(self) -> str:

417 size_qualifier = ""

418 if self.memory_usage:

419 if self.memory_usage != "deep":

420 # size_qualifier is just a best effort; not guaranteed to catch

421 # all cases (e.g., it misses categorical data even with object

422 # categories)

423 if (

424 "object" in self.dtype_counts

425 or self.data.index._is_memory_usage_qualified()

426 ):

427 size_qualifier = "+"

428 return size_qualifier

429

430 @abstractmethod

431 def render(

432 self,

433 *,

434 buf: WriteBuffer[str] | None,

435 max_cols: int | None,

436 verbose: bool | None,

437 show_counts: bool | None,

438 ) -> None:

439 pass

440

441

442class DataFrameInfo(_BaseInfo):

443 """

444 Class storing dataframe-specific info.

445 """

446

447 def __init__(

448 self,

449 data: DataFrame,

450 memory_usage: bool | str | None = None,

451 ) -> None:

452 self.data: DataFrame = data

453 self.memory_usage = _initialize_memory_usage(memory_usage)

454

455 @property

456 def dtype_counts(self) -> Mapping[str, int]:

457 return _get_dataframe_dtype_counts(self.data)

458

459 @property

460 def dtypes(self) -> Iterable[Dtype]:

461 """

462 Dtypes.

463

464 Returns

465 -------

466 dtypes

467 Dtype of each of the DataFrame's columns.

468 """

469 return self.data.dtypes

470

471 @property

472 def ids(self) -> Index:

473 """

474 Column names.

475

476 Returns

477 -------

478 ids : Index

479 DataFrame's column names.

480 """

481 return self.data.columns

482

483 @property

484 def col_count(self) -> int:

485 """Number of columns to be summarized."""

486 return len(self.ids)

487

488 @property

489 def non_null_counts(self) -> Sequence[int]:

490 """Sequence of non-null counts for all columns or column (if series)."""

491 return self.data.count()

492

493 @property

494 def memory_usage_bytes(self) -> int:

495 deep = self.memory_usage == "deep"

496 return self.data.memory_usage(index=True, deep=deep).sum()

497

498 def render(

499 self,

500 *,

501 buf: WriteBuffer[str] | None,

502 max_cols: int | None,

503 verbose: bool | None,

504 show_counts: bool | None,

505 ) -> None:

506 printer = _DataFrameInfoPrinter(

507 info=self,

508 max_cols=max_cols,

509 verbose=verbose,

510 show_counts=show_counts,

511 )

512 printer.to_buffer(buf)

513

514

515class SeriesInfo(_BaseInfo):

516 """

517 Class storing series-specific info.

518 """

519

520 def __init__(

521 self,

522 data: Series,

523 memory_usage: bool | str | None = None,

524 ) -> None:

525 self.data: Series = data

526 self.memory_usage = _initialize_memory_usage(memory_usage)

527

528 def render(

529 self,

530 *,

531 buf: WriteBuffer[str] | None = None,

532 max_cols: int | None = None,

533 verbose: bool | None = None,

534 show_counts: bool | None = None,

535 ) -> None:

536 if max_cols is not None:

537 raise ValueError(

538 "Argument `max_cols` can only be passed "

539 "in DataFrame.info, not Series.info"

540 )

541 printer = _SeriesInfoPrinter(

542 info=self,

543 verbose=verbose,

544 show_counts=show_counts,

545 )

546 printer.to_buffer(buf)

547

548 @property

549 def non_null_counts(self) -> Sequence[int]:

550 return [self.data.count()]

551

552 @property

553 def dtypes(self) -> Iterable[Dtype]:

554 return [self.data.dtypes]

555

556 @property

557 def dtype_counts(self) -> Mapping[str, int]:

558 from pandas.core.frame import DataFrame

559

560 return _get_dataframe_dtype_counts(DataFrame(self.data))

561

562 @property

563 def memory_usage_bytes(self) -> int:

564 """Memory usage in bytes.

565

566 Returns

567 -------

568 memory_usage_bytes : int

569 Object's total memory usage in bytes.

570 """

571 deep = self.memory_usage == "deep"

572 return self.data.memory_usage(index=True, deep=deep)

573

574

575class _InfoPrinterAbstract:

576 """

577 Class for printing dataframe or series info.

578 """

579

580 def to_buffer(self, buf: WriteBuffer[str] | None = None) -> None:

581 """Save dataframe info into buffer."""

582 table_builder = self._create_table_builder()

583 lines = table_builder.get_lines()

584 if buf is None: # pragma: no cover

585 buf = sys.stdout

586 fmt.buffer_put_lines(buf, lines)

587

588 @abstractmethod

589 def _create_table_builder(self) -> _TableBuilderAbstract:

590 """Create instance of table builder."""

591

592

593class _DataFrameInfoPrinter(_InfoPrinterAbstract):

594 """

595 Class for printing dataframe info.

596

597 Parameters

598 ----------

599 info : DataFrameInfo

600 Instance of DataFrameInfo.

601 max_cols : int, optional

602 When to switch from the verbose to the truncated output.

603 verbose : bool, optional

604 Whether to print the full summary.

605 show_counts : bool, optional

606 Whether to show the non-null counts.

607 """

608

609 def __init__(

610 self,

611 info: DataFrameInfo,

612 max_cols: int | None = None,

613 verbose: bool | None = None,

614 show_counts: bool | None = None,

615 ) -> None:

616 self.info = info

617 self.data = info.data

618 self.verbose = verbose

619 self.max_cols = self._initialize_max_cols(max_cols)

620 self.show_counts = self._initialize_show_counts(show_counts)

621

622 @property

623 def max_rows(self) -> int:

624 """Maximum info rows to be displayed."""

625 return get_option("display.max_info_rows", len(self.data) + 1)

626

627 @property

628 def exceeds_info_cols(self) -> bool:

629 """Check if number of columns to be summarized does not exceed maximum."""

630 return bool(self.col_count > self.max_cols)

631

632 @property

633 def exceeds_info_rows(self) -> bool:

634 """Check if number of rows to be summarized does not exceed maximum."""

635 return bool(len(self.data) > self.max_rows)

636

637 @property

638 def col_count(self) -> int:

639 """Number of columns to be summarized."""

640 return self.info.col_count

641

642 def _initialize_max_cols(self, max_cols: int | None) -> int:

643 if max_cols is None:

644 return get_option("display.max_info_columns", self.col_count + 1)

645 return max_cols

646

647 def _initialize_show_counts(self, show_counts: bool | None) -> bool:

648 if show_counts is None:

649 return bool(not self.exceeds_info_cols and not self.exceeds_info_rows)

650 else:

651 return show_counts

652

653 def _create_table_builder(self) -> _DataFrameTableBuilder:

654 """

655 Create instance of table builder based on verbosity and display settings.

656 """

657 if self.verbose:

658 return _DataFrameTableBuilderVerbose(

659 info=self.info,

660 with_counts=self.show_counts,

661 )

662 elif self.verbose is False: # specifically set to False, not necessarily None

663 return _DataFrameTableBuilderNonVerbose(info=self.info)

664 elif self.exceeds_info_cols:

665 return _DataFrameTableBuilderNonVerbose(info=self.info)

666 else:

667 return _DataFrameTableBuilderVerbose(

668 info=self.info,

669 with_counts=self.show_counts,

670 )

671

672

673class _SeriesInfoPrinter(_InfoPrinterAbstract):

674 """Class for printing series info.

675

676 Parameters

677 ----------

678 info : SeriesInfo

679 Instance of SeriesInfo.

680 verbose : bool, optional

681 Whether to print the full summary.

682 show_counts : bool, optional

683 Whether to show the non-null counts.

684 """

685

686 def __init__(

687 self,

688 info: SeriesInfo,

689 verbose: bool | None = None,

690 show_counts: bool | None = None,

691 ) -> None:

692 self.info = info

693 self.data = info.data

694 self.verbose = verbose

695 self.show_counts = self._initialize_show_counts(show_counts)

696

697 def _create_table_builder(self) -> _SeriesTableBuilder:

698 """

699 Create instance of table builder based on verbosity.

700 """

701 if self.verbose or self.verbose is None:

702 return _SeriesTableBuilderVerbose(

703 info=self.info,

704 with_counts=self.show_counts,

705 )

706 else:

707 return _SeriesTableBuilderNonVerbose(info=self.info)

708

709 def _initialize_show_counts(self, show_counts: bool | None) -> bool:

710 if show_counts is None:

711 return True

712 else:

713 return show_counts

714

715

716class _TableBuilderAbstract(ABC):

717 """

718 Abstract builder for info table.

719 """

720

721 _lines: list[str]

722 info: _BaseInfo

723

724 @abstractmethod

725 def get_lines(self) -> list[str]:

726 """Product in a form of list of lines (strings)."""

727

728 @property

729 def data(self) -> DataFrame | Series:

730 return self.info.data

731

732 @property

733 def dtypes(self) -> Iterable[Dtype]:

734 """Dtypes of each of the DataFrame's columns."""

735 return self.info.dtypes

736

737 @property

738 def dtype_counts(self) -> Mapping[str, int]:

739 """Mapping dtype - number of counts."""

740 return self.info.dtype_counts

741

742 @property

743 def display_memory_usage(self) -> bool:

744 """Whether to display memory usage."""

745 return bool(self.info.memory_usage)

746

747 @property

748 def memory_usage_string(self) -> str:

749 """Memory usage string with proper size qualifier."""

750 return self.info.memory_usage_string

751

752 @property

753 def non_null_counts(self) -> Sequence[int]:

754 return self.info.non_null_counts

755

756 def add_object_type_line(self) -> None:

757 """Add line with string representation of dataframe to the table."""

758 self._lines.append(str(type(self.data)))

759

760 def add_index_range_line(self) -> None:

761 """Add line with range of indices to the table."""

762 self._lines.append(self.data.index._summary())

763

764 def add_dtypes_line(self) -> None:

765 """Add summary line with dtypes present in dataframe."""

766 collected_dtypes = [

767 f"{key}({val:d})" for key, val in sorted(self.dtype_counts.items())

768 ]

769 self._lines.append(f"dtypes: {', '.join(collected_dtypes)}")

770

771

772class _DataFrameTableBuilder(_TableBuilderAbstract):

773 """

774 Abstract builder for dataframe info table.

775

776 Parameters

777 ----------

778 info : DataFrameInfo.

779 Instance of DataFrameInfo.

780 """

781

782 def __init__(self, *, info: DataFrameInfo) -> None:

783 self.info: DataFrameInfo = info

784

785 def get_lines(self) -> list[str]:

786 self._lines = []

787 if self.col_count == 0:

788 self._fill_empty_info()

789 else:

790 self._fill_non_empty_info()

791 return self._lines

792

793 def _fill_empty_info(self) -> None:

794 """Add lines to the info table, pertaining to empty dataframe."""

795 self.add_object_type_line()

796 self.add_index_range_line()

797 self._lines.append(f"Empty {type(self.data).__name__}\n")

798

799 @abstractmethod

800 def _fill_non_empty_info(self) -> None:

801 """Add lines to the info table, pertaining to non-empty dataframe."""

802

803 @property

804 def data(self) -> DataFrame:

805 """DataFrame."""

806 return self.info.data

807

808 @property

809 def ids(self) -> Index:

810 """Dataframe columns."""

811 return self.info.ids

812

813 @property

814 def col_count(self) -> int:

815 """Number of dataframe columns to be summarized."""

816 return self.info.col_count

817

818 def add_memory_usage_line(self) -> None:

819 """Add line containing memory usage."""

820 self._lines.append(f"memory usage: {self.memory_usage_string}")

821

822

823class _DataFrameTableBuilderNonVerbose(_DataFrameTableBuilder):

824 """

825 Dataframe info table builder for non-verbose output.

826 """

827

828 def _fill_non_empty_info(self) -> None:

829 """Add lines to the info table, pertaining to non-empty dataframe."""

830 self.add_object_type_line()

831 self.add_index_range_line()

832 self.add_columns_summary_line()

833 self.add_dtypes_line()

834 if self.display_memory_usage:

835 self.add_memory_usage_line()

836

837 def add_columns_summary_line(self) -> None:

838 self._lines.append(self.ids._summary(name="Columns"))

839

840

841class _TableBuilderVerboseMixin(_TableBuilderAbstract):

842 """

843 Mixin for verbose info output.

844 """

845

846 SPACING: str = " " * 2

847 strrows: Sequence[Sequence[str]]

848 gross_column_widths: Sequence[int]

849 with_counts: bool

850

851 @property

852 @abstractmethod

853 def headers(self) -> Sequence[str]:

854 """Headers names of the columns in verbose table."""

855

856 @property

857 def header_column_widths(self) -> Sequence[int]:

858 """Widths of header columns (only titles)."""

859 return [len(col) for col in self.headers]

860

861 def _get_gross_column_widths(self) -> Sequence[int]:

862 """Get widths of columns containing both headers and actual content."""

863 body_column_widths = self._get_body_column_widths()

864 return [

865 max(*widths)

866 for widths in zip(self.header_column_widths, body_column_widths)

867 ]

868

869 def _get_body_column_widths(self) -> Sequence[int]:

870 """Get widths of table content columns."""

871 strcols: Sequence[Sequence[str]] = list(zip(*self.strrows))

872 return [max(len(x) for x in col) for col in strcols]

873

874 def _gen_rows(self) -> Iterator[Sequence[str]]:

875 """

876 Generator function yielding rows content.

877

878 Each element represents a row comprising a sequence of strings.

879 """

880 if self.with_counts:

881 return self._gen_rows_with_counts()

882 else:

883 return self._gen_rows_without_counts()

884

885 @abstractmethod

886 def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]:

887 """Iterator with string representation of body data with counts."""

888

889 @abstractmethod

890 def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]:

891 """Iterator with string representation of body data without counts."""

892

893 def add_header_line(self) -> None:

894 header_line = self.SPACING.join(

895 [

896 _put_str(header, col_width)

897 for header, col_width in zip(self.headers, self.gross_column_widths)

898 ]

899 )

900 self._lines.append(header_line)

901

902 def add_separator_line(self) -> None:

903 separator_line = self.SPACING.join(

904 [

905 _put_str("-" * header_colwidth, gross_colwidth)

906 for header_colwidth, gross_colwidth in zip(

907 self.header_column_widths, self.gross_column_widths

908 )

909 ]

910 )

911 self._lines.append(separator_line)

912

913 def add_body_lines(self) -> None:

914 for row in self.strrows:

915 body_line = self.SPACING.join(

916 [

917 _put_str(col, gross_colwidth)

918 for col, gross_colwidth in zip(row, self.gross_column_widths)

919 ]

920 )

921 self._lines.append(body_line)

922

923 def _gen_non_null_counts(self) -> Iterator[str]:

924 """Iterator with string representation of non-null counts."""

925 for count in self.non_null_counts:

926 yield f"{count} non-null"

927

928 def _gen_dtypes(self) -> Iterator[str]:

929 """Iterator with string representation of column dtypes."""

930 for dtype in self.dtypes:

931 yield pprint_thing(dtype)

932

933

934class _DataFrameTableBuilderVerbose(_DataFrameTableBuilder, _TableBuilderVerboseMixin):

935 """

936 Dataframe info table builder for verbose output.

937 """

938

939 def __init__(

940 self,

941 *,

942 info: DataFrameInfo,

943 with_counts: bool,

944 ) -> None:

945 self.info = info

946 self.with_counts = with_counts

947 self.strrows: Sequence[Sequence[str]] = list(self._gen_rows())

948 self.gross_column_widths: Sequence[int] = self._get_gross_column_widths()

949

950 def _fill_non_empty_info(self) -> None:

951 """Add lines to the info table, pertaining to non-empty dataframe."""

952 self.add_object_type_line()

953 self.add_index_range_line()

954 self.add_columns_summary_line()

955 self.add_header_line()

956 self.add_separator_line()

957 self.add_body_lines()

958 self.add_dtypes_line()

959 if self.display_memory_usage:

960 self.add_memory_usage_line()

961

962 @property

963 def headers(self) -> Sequence[str]:

964 """Headers names of the columns in verbose table."""

965 if self.with_counts:

966 return [" # ", "Column", "Non-Null Count", "Dtype"]

967 return [" # ", "Column", "Dtype"]

968

969 def add_columns_summary_line(self) -> None:

970 self._lines.append(f"Data columns (total {self.col_count} columns):")

971

972 def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]:

973 """Iterator with string representation of body data without counts."""

974 yield from zip(

975 self._gen_line_numbers(),

976 self._gen_columns(),

977 self._gen_dtypes(),

978 )

979

980 def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]:

981 """Iterator with string representation of body data with counts."""

982 yield from zip(

983 self._gen_line_numbers(),

984 self._gen_columns(),

985 self._gen_non_null_counts(),

986 self._gen_dtypes(),

987 )

988

989 def _gen_line_numbers(self) -> Iterator[str]:

990 """Iterator with string representation of column numbers."""

991 for i, _ in enumerate(self.ids):

992 yield f" {i}"

993

994 def _gen_columns(self) -> Iterator[str]:

995 """Iterator with string representation of column names."""

996 for col in self.ids:

997 yield pprint_thing(col)

998

999

1000class _SeriesTableBuilder(_TableBuilderAbstract):

1001 """

1002 Abstract builder for series info table.

1003

1004 Parameters

1005 ----------

1006 info : SeriesInfo.

1007 Instance of SeriesInfo.

1008 """

1009

1010 def __init__(self, *, info: SeriesInfo) -> None:

1011 self.info: SeriesInfo = info

1012

1013 def get_lines(self) -> list[str]:

1014 self._lines = []

1015 self._fill_non_empty_info()

1016 return self._lines

1017

1018 @property

1019 def data(self) -> Series:

1020 """Series."""

1021 return self.info.data

1022

1023 def add_memory_usage_line(self) -> None:

1024 """Add line containing memory usage."""

1025 self._lines.append(f"memory usage: {self.memory_usage_string}")

1026

1027 @abstractmethod

1028 def _fill_non_empty_info(self) -> None:

1029 """Add lines to the info table, pertaining to non-empty series."""

1030

1031

1032class _SeriesTableBuilderNonVerbose(_SeriesTableBuilder):

1033 """

1034 Series info table builder for non-verbose output.

1035 """

1036

1037 def _fill_non_empty_info(self) -> None:

1038 """Add lines to the info table, pertaining to non-empty series."""

1039 self.add_object_type_line()

1040 self.add_index_range_line()

1041 self.add_dtypes_line()

1042 if self.display_memory_usage:

1043 self.add_memory_usage_line()

1044

1045

1046class _SeriesTableBuilderVerbose(_SeriesTableBuilder, _TableBuilderVerboseMixin):

1047 """

1048 Series info table builder for verbose output.

1049 """

1050

1051 def __init__(

1052 self,

1053 *,

1054 info: SeriesInfo,

1055 with_counts: bool,

1056 ) -> None:

1057 self.info = info

1058 self.with_counts = with_counts

1059 self.strrows: Sequence[Sequence[str]] = list(self._gen_rows())

1060 self.gross_column_widths: Sequence[int] = self._get_gross_column_widths()

1061

1062 def _fill_non_empty_info(self) -> None:

1063 """Add lines to the info table, pertaining to non-empty series."""

1064 self.add_object_type_line()

1065 self.add_index_range_line()

1066 self.add_series_name_line()

1067 self.add_header_line()

1068 self.add_separator_line()

1069 self.add_body_lines()

1070 self.add_dtypes_line()

1071 if self.display_memory_usage:

1072 self.add_memory_usage_line()

1073

1074 def add_series_name_line(self) -> None:

1075 self._lines.append(f"Series name: {self.data.name}")

1076

1077 @property

1078 def headers(self) -> Sequence[str]:

1079 """Headers names of the columns in verbose table."""

1080 if self.with_counts:

1081 return ["Non-Null Count", "Dtype"]

1082 return ["Dtype"]

1083

1084 def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]:

1085 """Iterator with string representation of body data without counts."""

1086 yield from self._gen_dtypes()

1087

1088 def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]:

1089 """Iterator with string representation of body data with counts."""

1090 yield from zip(

1091 self._gen_non_null_counts(),

1092 self._gen_dtypes(),

1093 )

1094

1095

1096def _get_dataframe_dtype_counts(df: DataFrame) -> Mapping[str, int]:

1097 """

1098 Create mapping between datatypes and their number of occurrences.

1099 """

1100 # groupby dtype.name to collect e.g. Categorical columns

1101 return df.dtypes.value_counts().groupby(lambda x: x.name).sum()