Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/io/formats/info.py: 50%

1from __future__ import annotations

3from abc import (

4 ABC,

5 abstractmethod,

7import sys

8from textwrap import dedent

9from typing import (

10 TYPE_CHECKING,

11 Iterable,

12 Iterator,

13 Mapping,

14 Sequence,

15)

17from pandas._config import get_option

19from pandas._typing import (

20 Dtype,

21 WriteBuffer,

22)

24from pandas.io.formats import format as fmt

25from pandas.io.formats.printing import pprint_thing

27if TYPE_CHECKING:

28 from pandas import (

29 DataFrame,

30 Index,

31 Series,

32 )

35frame_max_cols_sub = dedent(

36 """\

37 max_cols : int, optional

38 When to switch from the verbose to the truncated output. If the

39 DataFrame has more than `max_cols` columns, the truncated output

40 is used. By default, the setting in

41 ``pandas.options.display.max_info_columns`` is used."""

42)

45show_counts_sub = dedent(

46 """\

47 show_counts : bool, optional

48 Whether to show the non-null counts. By default, this is shown

49 only if the DataFrame is smaller than

50 ``pandas.options.display.max_info_rows`` and

51 ``pandas.options.display.max_info_columns``. A value of True always

52 shows the counts, and False never shows the counts."""

53)

56frame_examples_sub = dedent(

57 """\

58 >>> int_values = [1, 2, 3, 4, 5]

59 >>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon']

60 >>> float_values = [0.0, 0.25, 0.5, 0.75, 1.0]

61 >>> df = pd.DataFrame({"int_col": int_values, "text_col": text_values,

62 ... "float_col": float_values})

63 >>> df

64 int_col text_col float_col

65 0 1 alpha 0.00

66 1 2 beta 0.25

67 2 3 gamma 0.50

68 3 4 delta 0.75

69 4 5 epsilon 1.00

71 Prints information of all columns:

73 >>> df.info(verbose=True)

74 <class 'pandas.core.frame.DataFrame'>

75 RangeIndex: 5 entries, 0 to 4

76 Data columns (total 3 columns):

77 # Column Non-Null Count Dtype

78 --- ------ -------------- -----

79 0 int_col 5 non-null int64

80 1 text_col 5 non-null object

81 2 float_col 5 non-null float64

82 dtypes: float64(1), int64(1), object(1)

83 memory usage: 248.0+ bytes

85 Prints a summary of columns count and its dtypes but not per column

86 information:

88 >>> df.info(verbose=False)

89 <class 'pandas.core.frame.DataFrame'>

90 RangeIndex: 5 entries, 0 to 4

91 Columns: 3 entries, int_col to float_col

92 dtypes: float64(1), int64(1), object(1)

93 memory usage: 248.0+ bytes

95 Pipe output of DataFrame.info to buffer instead of sys.stdout, get

96 buffer content and writes to a text file:

98 >>> import io

99 >>> buffer = io.StringIO()

100 >>> df.info(buf=buffer)

101 >>> s = buffer.getvalue()

102 >>> with open("df_info.txt", "w",

103 ... encoding="utf-8") as f: # doctest: +SKIP

104 ... f.write(s)

105 260

106

107 The `memory_usage` parameter allows deep introspection mode, specially

108 useful for big DataFrames and fine-tune memory optimization:

109

110 >>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6)

111 >>> df = pd.DataFrame({

112 ... 'column_1': np.random.choice(['a', 'b', 'c'], 10 ** 6),

113 ... 'column_2': np.random.choice(['a', 'b', 'c'], 10 ** 6),

114 ... 'column_3': np.random.choice(['a', 'b', 'c'], 10 ** 6)

115 ... })

116 >>> df.info()

117 <class 'pandas.core.frame.DataFrame'>

118 RangeIndex: 1000000 entries, 0 to 999999

119 Data columns (total 3 columns):

120 # Column Non-Null Count Dtype

121 --- ------ -------------- -----

122 0 column_1 1000000 non-null object

123 1 column_2 1000000 non-null object

124 2 column_3 1000000 non-null object

125 dtypes: object(3)

126 memory usage: 22.9+ MB

127

128 >>> df.info(memory_usage='deep')

129 <class 'pandas.core.frame.DataFrame'>

130 RangeIndex: 1000000 entries, 0 to 999999

131 Data columns (total 3 columns):

132 # Column Non-Null Count Dtype

133 --- ------ -------------- -----

134 0 column_1 1000000 non-null object

135 1 column_2 1000000 non-null object

136 2 column_3 1000000 non-null object

137 dtypes: object(3)

138 memory usage: 165.9 MB"""

139)

140

141

142frame_see_also_sub = dedent(

143 """\

144 DataFrame.describe: Generate descriptive statistics of DataFrame

145 columns.

146 DataFrame.memory_usage: Memory usage of DataFrame columns."""

147)

148

149

150frame_sub_kwargs = {

151 "klass": "DataFrame",

152 "type_sub": " and columns",

153 "max_cols_sub": frame_max_cols_sub,

154 "show_counts_sub": show_counts_sub,

155 "examples_sub": frame_examples_sub,

156 "see_also_sub": frame_see_also_sub,

157 "version_added_sub": "",

158}

159

160

161series_examples_sub = dedent(

162 """\

163 >>> int_values = [1, 2, 3, 4, 5]

164 >>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon']

165 >>> s = pd.Series(text_values, index=int_values)

166 >>> s.info()

167 <class 'pandas.core.series.Series'>

168 Index: 5 entries, 1 to 5

169 Series name: None

170 Non-Null Count Dtype

171 -------------- -----

172 5 non-null object

173 dtypes: object(1)

174 memory usage: 80.0+ bytes

175

176 Prints a summary excluding information about its values:

177

178 >>> s.info(verbose=False)

179 <class 'pandas.core.series.Series'>

180 Index: 5 entries, 1 to 5

181 dtypes: object(1)

182 memory usage: 80.0+ bytes

183

184 Pipe output of Series.info to buffer instead of sys.stdout, get

185 buffer content and writes to a text file:

186

187 >>> import io

188 >>> buffer = io.StringIO()

189 >>> s.info(buf=buffer)

190 >>> s = buffer.getvalue()

191 >>> with open("df_info.txt", "w",

192 ... encoding="utf-8") as f: # doctest: +SKIP

193 ... f.write(s)

194 260

195

196 The `memory_usage` parameter allows deep introspection mode, specially

197 useful for big Series and fine-tune memory optimization:

198

199 >>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6)

200 >>> s = pd.Series(np.random.choice(['a', 'b', 'c'], 10 ** 6))

201 >>> s.info()

202 <class 'pandas.core.series.Series'>

203 RangeIndex: 1000000 entries, 0 to 999999

204 Series name: None

205 Non-Null Count Dtype

206 -------------- -----

207 1000000 non-null object

208 dtypes: object(1)

209 memory usage: 7.6+ MB

210

211 >>> s.info(memory_usage='deep')

212 <class 'pandas.core.series.Series'>

213 RangeIndex: 1000000 entries, 0 to 999999

214 Series name: None

215 Non-Null Count Dtype

216 -------------- -----

217 1000000 non-null object

218 dtypes: object(1)

219 memory usage: 55.3 MB"""

220)

221

222

223series_see_also_sub = dedent(

224 """\

225 Series.describe: Generate descriptive statistics of Series.

226 Series.memory_usage: Memory usage of Series."""

227)

228

229

230series_sub_kwargs = {

231 "klass": "Series",

232 "type_sub": "",

233 "max_cols_sub": "",

234 "show_counts_sub": show_counts_sub,

235 "examples_sub": series_examples_sub,

236 "see_also_sub": series_see_also_sub,

237 "version_added_sub": "\n.. versionadded:: 1.4.0\n",

238}

239

240

241INFO_DOCSTRING = dedent(

242 """

243 Print a concise summary of a {klass}.

244

245 This method prints information about a {klass} including

246 the index dtype{type_sub}, non-null values and memory usage.

247 {version_added_sub}\

248

249 Parameters

250 ----------

251 verbose : bool, optional

252 Whether to print the full summary. By default, the setting in

253 ``pandas.options.display.max_info_columns`` is followed.

254 buf : writable buffer, defaults to sys.stdout

255 Where to send the output. By default, the output is printed to

256 sys.stdout. Pass a writable buffer if you need to further process

257 the output.

258 {max_cols_sub}

259 memory_usage : bool, str, optional

260 Specifies whether total memory usage of the {klass}

261 elements (including the index) should be displayed. By default,

262 this follows the ``pandas.options.display.memory_usage`` setting.

263

264 True always show memory usage. False never shows memory usage.

265 A value of 'deep' is equivalent to "True with deep introspection".

266 Memory usage is shown in human-readable units (base-2

267 representation). Without deep introspection a memory estimation is

268 made based in column dtype and number of rows assuming values

269 consume the same memory amount for corresponding dtypes. With deep

270 memory introspection, a real memory usage calculation is performed

271 at the cost of computational resources. See the

272 :ref:`Frequently Asked Questions <df-memory-usage>` for more

273 details.

274 {show_counts_sub}

275

276 Returns

277 -------

278 None

279 This method prints a summary of a {klass} and returns None.

280

281 See Also

282 --------

283 {see_also_sub}

284

285 Examples

286 --------

287 {examples_sub}

288 """

289)

290

291

292def _put_str(s: str | Dtype, space: int) -> str:

293 """

294 Make string of specified length, padding to the right if necessary.

295

296 Parameters

297 ----------

298 s : Union[str, Dtype]

299 String to be formatted.

300 space : int

301 Length to force string to be of.

302

303 Returns

304 -------

305 str

306 String coerced to given length.

307

308 Examples

309 --------

310 >>> pd.io.formats.info._put_str("panda", 6)

311 'panda '

312 >>> pd.io.formats.info._put_str("panda", 4)

313 'pand'

314 """

315 return str(s)[:space].ljust(space)

316

317

318def _sizeof_fmt(num: float, size_qualifier: str) -> str:

319 """

320 Return size in human readable format.

321

322 Parameters

323 ----------

324 num : int

325 Size in bytes.

326 size_qualifier : str

327 Either empty, or '+' (if lower bound).

328

329 Returns

330 -------

331 str

332 Size in human readable format.

333

334 Examples

335 --------

336 >>> _sizeof_fmt(23028, '')

337 '22.5 KB'

338

339 >>> _sizeof_fmt(23028, '+')

340 '22.5+ KB'

341 """

342 for x in ["bytes", "KB", "MB", "GB", "TB"]:

343 if num < 1024.0:

344 return f"{num:3.1f}{size_qualifier} {x}"

345 num /= 1024.0

346 return f"{num:3.1f}{size_qualifier} PB"

347

348

349def _initialize_memory_usage(

350 memory_usage: bool | str | None = None,

351) -> bool | str:

352 """Get memory usage based on inputs and display options."""

353 if memory_usage is None:

354 memory_usage = get_option("display.memory_usage")

355 return memory_usage

356

357

358class BaseInfo(ABC):

359 """

360 Base class for DataFrameInfo and SeriesInfo.

361

362 Parameters

363 ----------

364 data : DataFrame or Series

365 Either dataframe or series.

366 memory_usage : bool or str, optional

367 If "deep", introspect the data deeply by interrogating object dtypes

368 for system-level memory consumption, and include it in the returned

369 values.

370 """

371

372 data: DataFrame | Series

373 memory_usage: bool | str

374

375 @property

376 @abstractmethod

377 def dtypes(self) -> Iterable[Dtype]:

378 """

379 Dtypes.

380

381 Returns

382 -------

383 dtypes : sequence

384 Dtype of each of the DataFrame's columns (or one series column).

385 """

386

387 @property

388 @abstractmethod

389 def dtype_counts(self) -> Mapping[str, int]:

390 """Mapping dtype - number of counts."""

391

392 @property

393 @abstractmethod

394 def non_null_counts(self) -> Sequence[int]:

395 """Sequence of non-null counts for all columns or column (if series)."""

396

397 @property

398 @abstractmethod

399 def memory_usage_bytes(self) -> int:

400 """

401 Memory usage in bytes.

402

403 Returns

404 -------

405 memory_usage_bytes : int

406 Object's total memory usage in bytes.

407 """

408

409 @property

410 def memory_usage_string(self) -> str:

411 """Memory usage in a form of human readable string."""

412 return f"{_sizeof_fmt(self.memory_usage_bytes, self.size_qualifier)}\n"

413

414 @property

415 def size_qualifier(self) -> str:

416 size_qualifier = ""

417 if self.memory_usage:

418 if self.memory_usage != "deep":

419 # size_qualifier is just a best effort; not guaranteed to catch

420 # all cases (e.g., it misses categorical data even with object

421 # categories)

422 if (

423 "object" in self.dtype_counts

424 or self.data.index._is_memory_usage_qualified()

425 ):

426 size_qualifier = "+"

427 return size_qualifier

428

429 @abstractmethod

430 def render(

431 self,

432 *,

433 buf: WriteBuffer[str] | None,

434 max_cols: int | None,

435 verbose: bool | None,

436 show_counts: bool | None,

437 ) -> None:

438 pass

439

440

441class DataFrameInfo(BaseInfo):

442 """

443 Class storing dataframe-specific info.

444 """

445

446 def __init__(

447 self,

448 data: DataFrame,

449 memory_usage: bool | str | None = None,

450 ) -> None:

451 self.data: DataFrame = data

452 self.memory_usage = _initialize_memory_usage(memory_usage)

453

454 @property

455 def dtype_counts(self) -> Mapping[str, int]:

456 return _get_dataframe_dtype_counts(self.data)

457

458 @property

459 def dtypes(self) -> Iterable[Dtype]:

460 """

461 Dtypes.

462

463 Returns

464 -------

465 dtypes

466 Dtype of each of the DataFrame's columns.

467 """

468 return self.data.dtypes

469

470 @property

471 def ids(self) -> Index:

472 """

473 Column names.

474

475 Returns

476 -------

477 ids : Index

478 DataFrame's column names.

479 """

480 return self.data.columns

481

482 @property

483 def col_count(self) -> int:

484 """Number of columns to be summarized."""

485 return len(self.ids)

486

487 @property

488 def non_null_counts(self) -> Sequence[int]:

489 """Sequence of non-null counts for all columns or column (if series)."""

490 return self.data.count()

491

492 @property

493 def memory_usage_bytes(self) -> int:

494 deep = self.memory_usage == "deep"

495 return self.data.memory_usage(index=True, deep=deep).sum()

496

497 def render(

498 self,

499 *,

500 buf: WriteBuffer[str] | None,

501 max_cols: int | None,

502 verbose: bool | None,

503 show_counts: bool | None,

504 ) -> None:

505 printer = DataFrameInfoPrinter(

506 info=self,

507 max_cols=max_cols,

508 verbose=verbose,

509 show_counts=show_counts,

510 )

511 printer.to_buffer(buf)

512

513

514class SeriesInfo(BaseInfo):

515 """

516 Class storing series-specific info.

517 """

518

519 def __init__(

520 self,

521 data: Series,

522 memory_usage: bool | str | None = None,

523 ) -> None:

524 self.data: Series = data

525 self.memory_usage = _initialize_memory_usage(memory_usage)

526

527 def render(

528 self,

529 *,

530 buf: WriteBuffer[str] | None = None,

531 max_cols: int | None = None,

532 verbose: bool | None = None,

533 show_counts: bool | None = None,

534 ) -> None:

535 if max_cols is not None:

536 raise ValueError(

537 "Argument `max_cols` can only be passed "

538 "in DataFrame.info, not Series.info"

539 )

540 printer = SeriesInfoPrinter(

541 info=self,

542 verbose=verbose,

543 show_counts=show_counts,

544 )

545 printer.to_buffer(buf)

546

547 @property

548 def non_null_counts(self) -> Sequence[int]:

549 return [self.data.count()]

550

551 @property

552 def dtypes(self) -> Iterable[Dtype]:

553 return [self.data.dtypes]

554

555 @property

556 def dtype_counts(self) -> Mapping[str, int]:

557 from pandas.core.frame import DataFrame

558

559 return _get_dataframe_dtype_counts(DataFrame(self.data))

560

561 @property

562 def memory_usage_bytes(self) -> int:

563 """Memory usage in bytes.

564

565 Returns

566 -------

567 memory_usage_bytes : int

568 Object's total memory usage in bytes.

569 """

570 deep = self.memory_usage == "deep"

571 return self.data.memory_usage(index=True, deep=deep)

572

573

574class InfoPrinterAbstract:

575 """

576 Class for printing dataframe or series info.

577 """

578

579 def to_buffer(self, buf: WriteBuffer[str] | None = None) -> None:

580 """Save dataframe info into buffer."""

581 table_builder = self._create_table_builder()

582 lines = table_builder.get_lines()

583 if buf is None: # pragma: no cover

584 buf = sys.stdout

585 fmt.buffer_put_lines(buf, lines)

586

587 @abstractmethod

588 def _create_table_builder(self) -> TableBuilderAbstract:

589 """Create instance of table builder."""

590

591

592class DataFrameInfoPrinter(InfoPrinterAbstract):

593 """

594 Class for printing dataframe info.

595

596 Parameters

597 ----------

598 info : DataFrameInfo

599 Instance of DataFrameInfo.

600 max_cols : int, optional

601 When to switch from the verbose to the truncated output.

602 verbose : bool, optional

603 Whether to print the full summary.

604 show_counts : bool, optional

605 Whether to show the non-null counts.

606 """

607

608 def __init__(

609 self,

610 info: DataFrameInfo,

611 max_cols: int | None = None,

612 verbose: bool | None = None,

613 show_counts: bool | None = None,

614 ) -> None:

615 self.info = info

616 self.data = info.data

617 self.verbose = verbose

618 self.max_cols = self._initialize_max_cols(max_cols)

619 self.show_counts = self._initialize_show_counts(show_counts)

620

621 @property

622 def max_rows(self) -> int:

623 """Maximum info rows to be displayed."""

624 return get_option("display.max_info_rows", len(self.data) + 1)

625

626 @property

627 def exceeds_info_cols(self) -> bool:

628 """Check if number of columns to be summarized does not exceed maximum."""

629 return bool(self.col_count > self.max_cols)

630

631 @property

632 def exceeds_info_rows(self) -> bool:

633 """Check if number of rows to be summarized does not exceed maximum."""

634 return bool(len(self.data) > self.max_rows)

635

636 @property

637 def col_count(self) -> int:

638 """Number of columns to be summarized."""

639 return self.info.col_count

640

641 def _initialize_max_cols(self, max_cols: int | None) -> int:

642 if max_cols is None:

643 return get_option("display.max_info_columns", self.col_count + 1)

644 return max_cols

645

646 def _initialize_show_counts(self, show_counts: bool | None) -> bool:

647 if show_counts is None:

648 return bool(not self.exceeds_info_cols and not self.exceeds_info_rows)

649 else:

650 return show_counts

651

652 def _create_table_builder(self) -> DataFrameTableBuilder:

653 """

654 Create instance of table builder based on verbosity and display settings.

655 """

656 if self.verbose:

657 return DataFrameTableBuilderVerbose(

658 info=self.info,

659 with_counts=self.show_counts,

660 )

661 elif self.verbose is False: # specifically set to False, not necessarily None

662 return DataFrameTableBuilderNonVerbose(info=self.info)

663 else:

664 if self.exceeds_info_cols:

665 return DataFrameTableBuilderNonVerbose(info=self.info)

666 else:

667 return DataFrameTableBuilderVerbose(

668 info=self.info,

669 with_counts=self.show_counts,

670 )

671

672

673class SeriesInfoPrinter(InfoPrinterAbstract):

674 """Class for printing series info.

675

676 Parameters

677 ----------

678 info : SeriesInfo

679 Instance of SeriesInfo.

680 verbose : bool, optional

681 Whether to print the full summary.

682 show_counts : bool, optional

683 Whether to show the non-null counts.

684 """

685

686 def __init__(

687 self,

688 info: SeriesInfo,

689 verbose: bool | None = None,

690 show_counts: bool | None = None,

691 ) -> None:

692 self.info = info

693 self.data = info.data

694 self.verbose = verbose

695 self.show_counts = self._initialize_show_counts(show_counts)

696

697 def _create_table_builder(self) -> SeriesTableBuilder:

698 """

699 Create instance of table builder based on verbosity.

700 """

701 if self.verbose or self.verbose is None:

702 return SeriesTableBuilderVerbose(

703 info=self.info,

704 with_counts=self.show_counts,

705 )

706 else:

707 return SeriesTableBuilderNonVerbose(info=self.info)

708

709 def _initialize_show_counts(self, show_counts: bool | None) -> bool:

710 if show_counts is None:

711 return True

712 else:

713 return show_counts

714

715

716class TableBuilderAbstract(ABC):

717 """

718 Abstract builder for info table.

719 """

720

721 _lines: list[str]

722 info: BaseInfo

723

724 @abstractmethod

725 def get_lines(self) -> list[str]:

726 """Product in a form of list of lines (strings)."""

727

728 @property

729 def data(self) -> DataFrame | Series:

730 return self.info.data

731

732 @property

733 def dtypes(self) -> Iterable[Dtype]:

734 """Dtypes of each of the DataFrame's columns."""

735 return self.info.dtypes

736

737 @property

738 def dtype_counts(self) -> Mapping[str, int]:

739 """Mapping dtype - number of counts."""

740 return self.info.dtype_counts

741

742 @property

743 def display_memory_usage(self) -> bool:

744 """Whether to display memory usage."""

745 return bool(self.info.memory_usage)

746

747 @property

748 def memory_usage_string(self) -> str:

749 """Memory usage string with proper size qualifier."""

750 return self.info.memory_usage_string

751

752 @property

753 def non_null_counts(self) -> Sequence[int]:

754 return self.info.non_null_counts

755

756 def add_object_type_line(self) -> None:

757 """Add line with string representation of dataframe to the table."""

758 self._lines.append(str(type(self.data)))

759

760 def add_index_range_line(self) -> None:

761 """Add line with range of indices to the table."""

762 self._lines.append(self.data.index._summary())

763

764 def add_dtypes_line(self) -> None:

765 """Add summary line with dtypes present in dataframe."""

766 collected_dtypes = [

767 f"{key}({val:d})" for key, val in sorted(self.dtype_counts.items())

768 ]

769 self._lines.append(f"dtypes: {', '.join(collected_dtypes)}")

770

771

772class DataFrameTableBuilder(TableBuilderAbstract):

773 """

774 Abstract builder for dataframe info table.

775

776 Parameters

777 ----------

778 info : DataFrameInfo.

779 Instance of DataFrameInfo.

780 """

781

782 def __init__(self, *, info: DataFrameInfo) -> None:

783 self.info: DataFrameInfo = info

784

785 def get_lines(self) -> list[str]:

786 self._lines = []

787 if self.col_count == 0:

788 self._fill_empty_info()

789 else:

790 self._fill_non_empty_info()

791 return self._lines

792

793 def _fill_empty_info(self) -> None:

794 """Add lines to the info table, pertaining to empty dataframe."""

795 self.add_object_type_line()

796 self.add_index_range_line()

797 self._lines.append(f"Empty {type(self.data).__name__}\n")

798

799 @abstractmethod

800 def _fill_non_empty_info(self) -> None:

801 """Add lines to the info table, pertaining to non-empty dataframe."""

802

803 @property

804 def data(self) -> DataFrame:

805 """DataFrame."""

806 return self.info.data

807

808 @property

809 def ids(self) -> Index:

810 """Dataframe columns."""

811 return self.info.ids

812

813 @property

814 def col_count(self) -> int:

815 """Number of dataframe columns to be summarized."""

816 return self.info.col_count

817

818 def add_memory_usage_line(self) -> None:

819 """Add line containing memory usage."""

820 self._lines.append(f"memory usage: {self.memory_usage_string}")

821

822

823class DataFrameTableBuilderNonVerbose(DataFrameTableBuilder):

824 """

825 Dataframe info table builder for non-verbose output.

826 """

827

828 def _fill_non_empty_info(self) -> None:

829 """Add lines to the info table, pertaining to non-empty dataframe."""

830 self.add_object_type_line()

831 self.add_index_range_line()

832 self.add_columns_summary_line()

833 self.add_dtypes_line()

834 if self.display_memory_usage:

835 self.add_memory_usage_line()

836

837 def add_columns_summary_line(self) -> None:

838 self._lines.append(self.ids._summary(name="Columns"))

839

840

841class TableBuilderVerboseMixin(TableBuilderAbstract):

842 """

843 Mixin for verbose info output.

844 """

845

846 SPACING: str = " " * 2

847 strrows: Sequence[Sequence[str]]

848 gross_column_widths: Sequence[int]

849 with_counts: bool

850

851 @property

852 @abstractmethod

853 def headers(self) -> Sequence[str]:

854 """Headers names of the columns in verbose table."""

855

856 @property

857 def header_column_widths(self) -> Sequence[int]:

858 """Widths of header columns (only titles)."""

859 return [len(col) for col in self.headers]

860

861 def _get_gross_column_widths(self) -> Sequence[int]:

862 """Get widths of columns containing both headers and actual content."""

863 body_column_widths = self._get_body_column_widths()

864 return [

865 max(*widths)

866 for widths in zip(self.header_column_widths, body_column_widths)

867 ]

868

869 def _get_body_column_widths(self) -> Sequence[int]:

870 """Get widths of table content columns."""

871 strcols: Sequence[Sequence[str]] = list(zip(*self.strrows))

872 return [max(len(x) for x in col) for col in strcols]

873

874 def _gen_rows(self) -> Iterator[Sequence[str]]:

875 """

876 Generator function yielding rows content.

877

878 Each element represents a row comprising a sequence of strings.

879 """

880 if self.with_counts:

881 return self._gen_rows_with_counts()

882 else:

883 return self._gen_rows_without_counts()

884

885 @abstractmethod

886 def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]:

887 """Iterator with string representation of body data with counts."""

888

889 @abstractmethod

890 def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]:

891 """Iterator with string representation of body data without counts."""

892

893 def add_header_line(self) -> None:

894 header_line = self.SPACING.join(

895 [

896 _put_str(header, col_width)

897 for header, col_width in zip(self.headers, self.gross_column_widths)

898 ]

899 )

900 self._lines.append(header_line)

901

902 def add_separator_line(self) -> None:

903 separator_line = self.SPACING.join(

904 [

905 _put_str("-" * header_colwidth, gross_colwidth)

906 for header_colwidth, gross_colwidth in zip(

907 self.header_column_widths, self.gross_column_widths

908 )

909 ]

910 )

911 self._lines.append(separator_line)

912

913 def add_body_lines(self) -> None:

914 for row in self.strrows:

915 body_line = self.SPACING.join(

916 [

917 _put_str(col, gross_colwidth)

918 for col, gross_colwidth in zip(row, self.gross_column_widths)

919 ]

920 )

921 self._lines.append(body_line)

922

923 def _gen_non_null_counts(self) -> Iterator[str]:

924 """Iterator with string representation of non-null counts."""

925 for count in self.non_null_counts:

926 yield f"{count} non-null"

927

928 def _gen_dtypes(self) -> Iterator[str]:

929 """Iterator with string representation of column dtypes."""

930 for dtype in self.dtypes:

931 yield pprint_thing(dtype)

932

933

934class DataFrameTableBuilderVerbose(DataFrameTableBuilder, TableBuilderVerboseMixin):

935 """

936 Dataframe info table builder for verbose output.

937 """

938

939 def __init__(

940 self,

941 *,

942 info: DataFrameInfo,

943 with_counts: bool,

944 ) -> None:

945 self.info = info

946 self.with_counts = with_counts

947 self.strrows: Sequence[Sequence[str]] = list(self._gen_rows())

948 self.gross_column_widths: Sequence[int] = self._get_gross_column_widths()

949

950 def _fill_non_empty_info(self) -> None:

951 """Add lines to the info table, pertaining to non-empty dataframe."""

952 self.add_object_type_line()

953 self.add_index_range_line()

954 self.add_columns_summary_line()

955 self.add_header_line()

956 self.add_separator_line()

957 self.add_body_lines()

958 self.add_dtypes_line()

959 if self.display_memory_usage:

960 self.add_memory_usage_line()

961

962 @property

963 def headers(self) -> Sequence[str]:

964 """Headers names of the columns in verbose table."""

965 if self.with_counts:

966 return [" # ", "Column", "Non-Null Count", "Dtype"]

967 return [" # ", "Column", "Dtype"]

968

969 def add_columns_summary_line(self) -> None:

970 self._lines.append(f"Data columns (total {self.col_count} columns):")

971

972 def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]:

973 """Iterator with string representation of body data without counts."""

974 yield from zip(

975 self._gen_line_numbers(),

976 self._gen_columns(),

977 self._gen_dtypes(),

978 )

979

980 def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]:

981 """Iterator with string representation of body data with counts."""

982 yield from zip(

983 self._gen_line_numbers(),

984 self._gen_columns(),

985 self._gen_non_null_counts(),

986 self._gen_dtypes(),

987 )

988

989 def _gen_line_numbers(self) -> Iterator[str]:

990 """Iterator with string representation of column numbers."""

991 for i, _ in enumerate(self.ids):

992 yield f" {i}"

993

994 def _gen_columns(self) -> Iterator[str]:

995 """Iterator with string representation of column names."""

996 for col in self.ids:

997 yield pprint_thing(col)

998

999

1000class SeriesTableBuilder(TableBuilderAbstract):

1001 """

1002 Abstract builder for series info table.

1003

1004 Parameters

1005 ----------

1006 info : SeriesInfo.

1007 Instance of SeriesInfo.

1008 """

1009

1010 def __init__(self, *, info: SeriesInfo) -> None:

1011 self.info: SeriesInfo = info

1012

1013 def get_lines(self) -> list[str]:

1014 self._lines = []

1015 self._fill_non_empty_info()

1016 return self._lines

1017

1018 @property

1019 def data(self) -> Series:

1020 """Series."""

1021 return self.info.data

1022

1023 def add_memory_usage_line(self) -> None:

1024 """Add line containing memory usage."""

1025 self._lines.append(f"memory usage: {self.memory_usage_string}")

1026

1027 @abstractmethod

1028 def _fill_non_empty_info(self) -> None:

1029 """Add lines to the info table, pertaining to non-empty series."""

1030

1031

1032class SeriesTableBuilderNonVerbose(SeriesTableBuilder):

1033 """

1034 Series info table builder for non-verbose output.

1035 """

1036

1037 def _fill_non_empty_info(self) -> None:

1038 """Add lines to the info table, pertaining to non-empty series."""

1039 self.add_object_type_line()

1040 self.add_index_range_line()

1041 self.add_dtypes_line()

1042 if self.display_memory_usage:

1043 self.add_memory_usage_line()

1044

1045

1046class SeriesTableBuilderVerbose(SeriesTableBuilder, TableBuilderVerboseMixin):

1047 """

1048 Series info table builder for verbose output.

1049 """

1050

1051 def __init__(

1052 self,

1053 *,

1054 info: SeriesInfo,

1055 with_counts: bool,

1056 ) -> None:

1057 self.info = info

1058 self.with_counts = with_counts

1059 self.strrows: Sequence[Sequence[str]] = list(self._gen_rows())

1060 self.gross_column_widths: Sequence[int] = self._get_gross_column_widths()

1061

1062 def _fill_non_empty_info(self) -> None:

1063 """Add lines to the info table, pertaining to non-empty series."""

1064 self.add_object_type_line()

1065 self.add_index_range_line()

1066 self.add_series_name_line()

1067 self.add_header_line()

1068 self.add_separator_line()

1069 self.add_body_lines()

1070 self.add_dtypes_line()

1071 if self.display_memory_usage:

1072 self.add_memory_usage_line()

1073

1074 def add_series_name_line(self) -> None:

1075 self._lines.append(f"Series name: {self.data.name}")

1076

1077 @property

1078 def headers(self) -> Sequence[str]:

1079 """Headers names of the columns in verbose table."""

1080 if self.with_counts:

1081 return ["Non-Null Count", "Dtype"]

1082 return ["Dtype"]

1083

1084 def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]:

1085 """Iterator with string representation of body data without counts."""

1086 yield from self._gen_dtypes()

1087

1088 def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]:

1089 """Iterator with string representation of body data with counts."""

1090 yield from zip(

1091 self._gen_non_null_counts(),

1092 self._gen_dtypes(),

1093 )

1094

1095

1096def _get_dataframe_dtype_counts(df: DataFrame) -> Mapping[str, int]:

1097 """

1098 Create mapping between datatypes and their number of occurrences.

1099 """

1100 # groupby dtype.name to collect e.g. Categorical columns

1101 return df.dtypes.value_counts().groupby(lambda x: x.name).sum()