Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/io/formats/info.py: 49%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

357 statements  

1from __future__ import annotations 

2 

3from abc import ( 

4 ABC, 

5 abstractmethod, 

6) 

7import sys 

8from textwrap import dedent 

9from typing import TYPE_CHECKING 

10 

11from pandas._config import get_option 

12 

13from pandas.io.formats import format as fmt 

14from pandas.io.formats.printing import pprint_thing 

15 

16if TYPE_CHECKING: 

17 from collections.abc import ( 

18 Iterable, 

19 Iterator, 

20 Mapping, 

21 Sequence, 

22 ) 

23 

24 from pandas._typing import ( 

25 Dtype, 

26 WriteBuffer, 

27 ) 

28 

29 from pandas import ( 

30 DataFrame, 

31 Index, 

32 Series, 

33 ) 

34 

35 

36frame_max_cols_sub = dedent( 

37 """\ 

38 max_cols : int, optional 

39 When to switch from the verbose to the truncated output. If the 

40 DataFrame has more than `max_cols` columns, the truncated output 

41 is used. By default, the setting in 

42 ``pandas.options.display.max_info_columns`` is used.""" 

43) 

44 

45 

46show_counts_sub = dedent( 

47 """\ 

48 show_counts : bool, optional 

49 Whether to show the non-null counts. By default, this is shown 

50 only if the DataFrame is smaller than 

51 ``pandas.options.display.max_info_rows`` and 

52 ``pandas.options.display.max_info_columns``. A value of True always 

53 shows the counts, and False never shows the counts.""" 

54) 

55 

56 

57frame_examples_sub = dedent( 

58 """\ 

59 >>> int_values = [1, 2, 3, 4, 5] 

60 >>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon'] 

61 >>> float_values = [0.0, 0.25, 0.5, 0.75, 1.0] 

62 >>> df = pd.DataFrame({"int_col": int_values, "text_col": text_values, 

63 ... "float_col": float_values}) 

64 >>> df 

65 int_col text_col float_col 

66 0 1 alpha 0.00 

67 1 2 beta 0.25 

68 2 3 gamma 0.50 

69 3 4 delta 0.75 

70 4 5 epsilon 1.00 

71 

72 Prints information of all columns: 

73 

74 >>> df.info(verbose=True) 

75 <class 'pandas.core.frame.DataFrame'> 

76 RangeIndex: 5 entries, 0 to 4 

77 Data columns (total 3 columns): 

78 # Column Non-Null Count Dtype 

79 --- ------ -------------- ----- 

80 0 int_col 5 non-null int64 

81 1 text_col 5 non-null object 

82 2 float_col 5 non-null float64 

83 dtypes: float64(1), int64(1), object(1) 

84 memory usage: 248.0+ bytes 

85 

86 Prints a summary of columns count and its dtypes but not per column 

87 information: 

88 

89 >>> df.info(verbose=False) 

90 <class 'pandas.core.frame.DataFrame'> 

91 RangeIndex: 5 entries, 0 to 4 

92 Columns: 3 entries, int_col to float_col 

93 dtypes: float64(1), int64(1), object(1) 

94 memory usage: 248.0+ bytes 

95 

96 Pipe output of DataFrame.info to buffer instead of sys.stdout, get 

97 buffer content and writes to a text file: 

98 

99 >>> import io 

100 >>> buffer = io.StringIO() 

101 >>> df.info(buf=buffer) 

102 >>> s = buffer.getvalue() 

103 >>> with open("df_info.txt", "w", 

104 ... encoding="utf-8") as f: # doctest: +SKIP 

105 ... f.write(s) 

106 260 

107 

108 The `memory_usage` parameter allows deep introspection mode, specially 

109 useful for big DataFrames and fine-tune memory optimization: 

110 

111 >>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6) 

112 >>> df = pd.DataFrame({ 

113 ... 'column_1': np.random.choice(['a', 'b', 'c'], 10 ** 6), 

114 ... 'column_2': np.random.choice(['a', 'b', 'c'], 10 ** 6), 

115 ... 'column_3': np.random.choice(['a', 'b', 'c'], 10 ** 6) 

116 ... }) 

117 >>> df.info() 

118 <class 'pandas.core.frame.DataFrame'> 

119 RangeIndex: 1000000 entries, 0 to 999999 

120 Data columns (total 3 columns): 

121 # Column Non-Null Count Dtype 

122 --- ------ -------------- ----- 

123 0 column_1 1000000 non-null object 

124 1 column_2 1000000 non-null object 

125 2 column_3 1000000 non-null object 

126 dtypes: object(3) 

127 memory usage: 22.9+ MB 

128 

129 >>> df.info(memory_usage='deep') 

130 <class 'pandas.core.frame.DataFrame'> 

131 RangeIndex: 1000000 entries, 0 to 999999 

132 Data columns (total 3 columns): 

133 # Column Non-Null Count Dtype 

134 --- ------ -------------- ----- 

135 0 column_1 1000000 non-null object 

136 1 column_2 1000000 non-null object 

137 2 column_3 1000000 non-null object 

138 dtypes: object(3) 

139 memory usage: 165.9 MB""" 

140) 

141 

142 

143frame_see_also_sub = dedent( 

144 """\ 

145 DataFrame.describe: Generate descriptive statistics of DataFrame 

146 columns. 

147 DataFrame.memory_usage: Memory usage of DataFrame columns.""" 

148) 

149 

150 

151frame_sub_kwargs = { 

152 "klass": "DataFrame", 

153 "type_sub": " and columns", 

154 "max_cols_sub": frame_max_cols_sub, 

155 "show_counts_sub": show_counts_sub, 

156 "examples_sub": frame_examples_sub, 

157 "see_also_sub": frame_see_also_sub, 

158 "version_added_sub": "", 

159} 

160 

161 

162series_examples_sub = dedent( 

163 """\ 

164 >>> int_values = [1, 2, 3, 4, 5] 

165 >>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon'] 

166 >>> s = pd.Series(text_values, index=int_values) 

167 >>> s.info() 

168 <class 'pandas.core.series.Series'> 

169 Index: 5 entries, 1 to 5 

170 Series name: None 

171 Non-Null Count Dtype 

172 -------------- ----- 

173 5 non-null object 

174 dtypes: object(1) 

175 memory usage: 80.0+ bytes 

176 

177 Prints a summary excluding information about its values: 

178 

179 >>> s.info(verbose=False) 

180 <class 'pandas.core.series.Series'> 

181 Index: 5 entries, 1 to 5 

182 dtypes: object(1) 

183 memory usage: 80.0+ bytes 

184 

185 Pipe output of Series.info to buffer instead of sys.stdout, get 

186 buffer content and writes to a text file: 

187 

188 >>> import io 

189 >>> buffer = io.StringIO() 

190 >>> s.info(buf=buffer) 

191 >>> s = buffer.getvalue() 

192 >>> with open("df_info.txt", "w", 

193 ... encoding="utf-8") as f: # doctest: +SKIP 

194 ... f.write(s) 

195 260 

196 

197 The `memory_usage` parameter allows deep introspection mode, specially 

198 useful for big Series and fine-tune memory optimization: 

199 

200 >>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6) 

201 >>> s = pd.Series(np.random.choice(['a', 'b', 'c'], 10 ** 6)) 

202 >>> s.info() 

203 <class 'pandas.core.series.Series'> 

204 RangeIndex: 1000000 entries, 0 to 999999 

205 Series name: None 

206 Non-Null Count Dtype 

207 -------------- ----- 

208 1000000 non-null object 

209 dtypes: object(1) 

210 memory usage: 7.6+ MB 

211 

212 >>> s.info(memory_usage='deep') 

213 <class 'pandas.core.series.Series'> 

214 RangeIndex: 1000000 entries, 0 to 999999 

215 Series name: None 

216 Non-Null Count Dtype 

217 -------------- ----- 

218 1000000 non-null object 

219 dtypes: object(1) 

220 memory usage: 55.3 MB""" 

221) 

222 

223 

224series_see_also_sub = dedent( 

225 """\ 

226 Series.describe: Generate descriptive statistics of Series. 

227 Series.memory_usage: Memory usage of Series.""" 

228) 

229 

230 

231series_sub_kwargs = { 

232 "klass": "Series", 

233 "type_sub": "", 

234 "max_cols_sub": "", 

235 "show_counts_sub": show_counts_sub, 

236 "examples_sub": series_examples_sub, 

237 "see_also_sub": series_see_also_sub, 

238 "version_added_sub": "\n.. versionadded:: 1.4.0\n", 

239} 

240 

241 

242INFO_DOCSTRING = dedent( 

243 """ 

244 Print a concise summary of a {klass}. 

245 

246 This method prints information about a {klass} including 

247 the index dtype{type_sub}, non-null values and memory usage. 

248 {version_added_sub}\ 

249 

250 Parameters 

251 ---------- 

252 verbose : bool, optional 

253 Whether to print the full summary. By default, the setting in 

254 ``pandas.options.display.max_info_columns`` is followed. 

255 buf : writable buffer, defaults to sys.stdout 

256 Where to send the output. By default, the output is printed to 

257 sys.stdout. Pass a writable buffer if you need to further process 

258 the output. 

259 {max_cols_sub} 

260 memory_usage : bool, str, optional 

261 Specifies whether total memory usage of the {klass} 

262 elements (including the index) should be displayed. By default, 

263 this follows the ``pandas.options.display.memory_usage`` setting. 

264 

265 True always show memory usage. False never shows memory usage. 

266 A value of 'deep' is equivalent to "True with deep introspection". 

267 Memory usage is shown in human-readable units (base-2 

268 representation). Without deep introspection a memory estimation is 

269 made based in column dtype and number of rows assuming values 

270 consume the same memory amount for corresponding dtypes. With deep 

271 memory introspection, a real memory usage calculation is performed 

272 at the cost of computational resources. See the 

273 :ref:`Frequently Asked Questions <df-memory-usage>` for more 

274 details. 

275 {show_counts_sub} 

276 

277 Returns 

278 ------- 

279 None 

280 This method prints a summary of a {klass} and returns None. 

281 

282 See Also 

283 -------- 

284 {see_also_sub} 

285 

286 Examples 

287 -------- 

288 {examples_sub} 

289 """ 

290) 

291 

292 

293def _put_str(s: str | Dtype, space: int) -> str: 

294 """ 

295 Make string of specified length, padding to the right if necessary. 

296 

297 Parameters 

298 ---------- 

299 s : Union[str, Dtype] 

300 String to be formatted. 

301 space : int 

302 Length to force string to be of. 

303 

304 Returns 

305 ------- 

306 str 

307 String coerced to given length. 

308 

309 Examples 

310 -------- 

311 >>> pd.io.formats.info._put_str("panda", 6) 

312 'panda ' 

313 >>> pd.io.formats.info._put_str("panda", 4) 

314 'pand' 

315 """ 

316 return str(s)[:space].ljust(space) 

317 

318 

319def _sizeof_fmt(num: float, size_qualifier: str) -> str: 

320 """ 

321 Return size in human readable format. 

322 

323 Parameters 

324 ---------- 

325 num : int 

326 Size in bytes. 

327 size_qualifier : str 

328 Either empty, or '+' (if lower bound). 

329 

330 Returns 

331 ------- 

332 str 

333 Size in human readable format. 

334 

335 Examples 

336 -------- 

337 >>> _sizeof_fmt(23028, '') 

338 '22.5 KB' 

339 

340 >>> _sizeof_fmt(23028, '+') 

341 '22.5+ KB' 

342 """ 

343 for x in ["bytes", "KB", "MB", "GB", "TB"]: 

344 if num < 1024.0: 

345 return f"{num:3.1f}{size_qualifier} {x}" 

346 num /= 1024.0 

347 return f"{num:3.1f}{size_qualifier} PB" 

348 

349 

350def _initialize_memory_usage( 

351 memory_usage: bool | str | None = None, 

352) -> bool | str: 

353 """Get memory usage based on inputs and display options.""" 

354 if memory_usage is None: 

355 memory_usage = get_option("display.memory_usage") 

356 return memory_usage 

357 

358 

359class _BaseInfo(ABC): 

360 """ 

361 Base class for DataFrameInfo and SeriesInfo. 

362 

363 Parameters 

364 ---------- 

365 data : DataFrame or Series 

366 Either dataframe or series. 

367 memory_usage : bool or str, optional 

368 If "deep", introspect the data deeply by interrogating object dtypes 

369 for system-level memory consumption, and include it in the returned 

370 values. 

371 """ 

372 

373 data: DataFrame | Series 

374 memory_usage: bool | str 

375 

376 @property 

377 @abstractmethod 

378 def dtypes(self) -> Iterable[Dtype]: 

379 """ 

380 Dtypes. 

381 

382 Returns 

383 ------- 

384 dtypes : sequence 

385 Dtype of each of the DataFrame's columns (or one series column). 

386 """ 

387 

388 @property 

389 @abstractmethod 

390 def dtype_counts(self) -> Mapping[str, int]: 

391 """Mapping dtype - number of counts.""" 

392 

393 @property 

394 @abstractmethod 

395 def non_null_counts(self) -> Sequence[int]: 

396 """Sequence of non-null counts for all columns or column (if series).""" 

397 

398 @property 

399 @abstractmethod 

400 def memory_usage_bytes(self) -> int: 

401 """ 

402 Memory usage in bytes. 

403 

404 Returns 

405 ------- 

406 memory_usage_bytes : int 

407 Object's total memory usage in bytes. 

408 """ 

409 

410 @property 

411 def memory_usage_string(self) -> str: 

412 """Memory usage in a form of human readable string.""" 

413 return f"{_sizeof_fmt(self.memory_usage_bytes, self.size_qualifier)}\n" 

414 

415 @property 

416 def size_qualifier(self) -> str: 

417 size_qualifier = "" 

418 if self.memory_usage: 

419 if self.memory_usage != "deep": 

420 # size_qualifier is just a best effort; not guaranteed to catch 

421 # all cases (e.g., it misses categorical data even with object 

422 # categories) 

423 if ( 

424 "object" in self.dtype_counts 

425 or self.data.index._is_memory_usage_qualified() 

426 ): 

427 size_qualifier = "+" 

428 return size_qualifier 

429 

430 @abstractmethod 

431 def render( 

432 self, 

433 *, 

434 buf: WriteBuffer[str] | None, 

435 max_cols: int | None, 

436 verbose: bool | None, 

437 show_counts: bool | None, 

438 ) -> None: 

439 pass 

440 

441 

442class DataFrameInfo(_BaseInfo): 

443 """ 

444 Class storing dataframe-specific info. 

445 """ 

446 

447 def __init__( 

448 self, 

449 data: DataFrame, 

450 memory_usage: bool | str | None = None, 

451 ) -> None: 

452 self.data: DataFrame = data 

453 self.memory_usage = _initialize_memory_usage(memory_usage) 

454 

455 @property 

456 def dtype_counts(self) -> Mapping[str, int]: 

457 return _get_dataframe_dtype_counts(self.data) 

458 

459 @property 

460 def dtypes(self) -> Iterable[Dtype]: 

461 """ 

462 Dtypes. 

463 

464 Returns 

465 ------- 

466 dtypes 

467 Dtype of each of the DataFrame's columns. 

468 """ 

469 return self.data.dtypes 

470 

471 @property 

472 def ids(self) -> Index: 

473 """ 

474 Column names. 

475 

476 Returns 

477 ------- 

478 ids : Index 

479 DataFrame's column names. 

480 """ 

481 return self.data.columns 

482 

483 @property 

484 def col_count(self) -> int: 

485 """Number of columns to be summarized.""" 

486 return len(self.ids) 

487 

488 @property 

489 def non_null_counts(self) -> Sequence[int]: 

490 """Sequence of non-null counts for all columns or column (if series).""" 

491 return self.data.count() 

492 

493 @property 

494 def memory_usage_bytes(self) -> int: 

495 deep = self.memory_usage == "deep" 

496 return self.data.memory_usage(index=True, deep=deep).sum() 

497 

498 def render( 

499 self, 

500 *, 

501 buf: WriteBuffer[str] | None, 

502 max_cols: int | None, 

503 verbose: bool | None, 

504 show_counts: bool | None, 

505 ) -> None: 

506 printer = _DataFrameInfoPrinter( 

507 info=self, 

508 max_cols=max_cols, 

509 verbose=verbose, 

510 show_counts=show_counts, 

511 ) 

512 printer.to_buffer(buf) 

513 

514 

515class SeriesInfo(_BaseInfo): 

516 """ 

517 Class storing series-specific info. 

518 """ 

519 

520 def __init__( 

521 self, 

522 data: Series, 

523 memory_usage: bool | str | None = None, 

524 ) -> None: 

525 self.data: Series = data 

526 self.memory_usage = _initialize_memory_usage(memory_usage) 

527 

528 def render( 

529 self, 

530 *, 

531 buf: WriteBuffer[str] | None = None, 

532 max_cols: int | None = None, 

533 verbose: bool | None = None, 

534 show_counts: bool | None = None, 

535 ) -> None: 

536 if max_cols is not None: 

537 raise ValueError( 

538 "Argument `max_cols` can only be passed " 

539 "in DataFrame.info, not Series.info" 

540 ) 

541 printer = _SeriesInfoPrinter( 

542 info=self, 

543 verbose=verbose, 

544 show_counts=show_counts, 

545 ) 

546 printer.to_buffer(buf) 

547 

548 @property 

549 def non_null_counts(self) -> Sequence[int]: 

550 return [self.data.count()] 

551 

552 @property 

553 def dtypes(self) -> Iterable[Dtype]: 

554 return [self.data.dtypes] 

555 

556 @property 

557 def dtype_counts(self) -> Mapping[str, int]: 

558 from pandas.core.frame import DataFrame 

559 

560 return _get_dataframe_dtype_counts(DataFrame(self.data)) 

561 

562 @property 

563 def memory_usage_bytes(self) -> int: 

564 """Memory usage in bytes. 

565 

566 Returns 

567 ------- 

568 memory_usage_bytes : int 

569 Object's total memory usage in bytes. 

570 """ 

571 deep = self.memory_usage == "deep" 

572 return self.data.memory_usage(index=True, deep=deep) 

573 

574 

575class _InfoPrinterAbstract: 

576 """ 

577 Class for printing dataframe or series info. 

578 """ 

579 

580 def to_buffer(self, buf: WriteBuffer[str] | None = None) -> None: 

581 """Save dataframe info into buffer.""" 

582 table_builder = self._create_table_builder() 

583 lines = table_builder.get_lines() 

584 if buf is None: # pragma: no cover 

585 buf = sys.stdout 

586 fmt.buffer_put_lines(buf, lines) 

587 

588 @abstractmethod 

589 def _create_table_builder(self) -> _TableBuilderAbstract: 

590 """Create instance of table builder.""" 

591 

592 

593class _DataFrameInfoPrinter(_InfoPrinterAbstract): 

594 """ 

595 Class for printing dataframe info. 

596 

597 Parameters 

598 ---------- 

599 info : DataFrameInfo 

600 Instance of DataFrameInfo. 

601 max_cols : int, optional 

602 When to switch from the verbose to the truncated output. 

603 verbose : bool, optional 

604 Whether to print the full summary. 

605 show_counts : bool, optional 

606 Whether to show the non-null counts. 

607 """ 

608 

609 def __init__( 

610 self, 

611 info: DataFrameInfo, 

612 max_cols: int | None = None, 

613 verbose: bool | None = None, 

614 show_counts: bool | None = None, 

615 ) -> None: 

616 self.info = info 

617 self.data = info.data 

618 self.verbose = verbose 

619 self.max_cols = self._initialize_max_cols(max_cols) 

620 self.show_counts = self._initialize_show_counts(show_counts) 

621 

622 @property 

623 def max_rows(self) -> int: 

624 """Maximum info rows to be displayed.""" 

625 return get_option("display.max_info_rows", len(self.data) + 1) 

626 

627 @property 

628 def exceeds_info_cols(self) -> bool: 

629 """Check if number of columns to be summarized does not exceed maximum.""" 

630 return bool(self.col_count > self.max_cols) 

631 

632 @property 

633 def exceeds_info_rows(self) -> bool: 

634 """Check if number of rows to be summarized does not exceed maximum.""" 

635 return bool(len(self.data) > self.max_rows) 

636 

637 @property 

638 def col_count(self) -> int: 

639 """Number of columns to be summarized.""" 

640 return self.info.col_count 

641 

642 def _initialize_max_cols(self, max_cols: int | None) -> int: 

643 if max_cols is None: 

644 return get_option("display.max_info_columns", self.col_count + 1) 

645 return max_cols 

646 

647 def _initialize_show_counts(self, show_counts: bool | None) -> bool: 

648 if show_counts is None: 

649 return bool(not self.exceeds_info_cols and not self.exceeds_info_rows) 

650 else: 

651 return show_counts 

652 

653 def _create_table_builder(self) -> _DataFrameTableBuilder: 

654 """ 

655 Create instance of table builder based on verbosity and display settings. 

656 """ 

657 if self.verbose: 

658 return _DataFrameTableBuilderVerbose( 

659 info=self.info, 

660 with_counts=self.show_counts, 

661 ) 

662 elif self.verbose is False: # specifically set to False, not necessarily None 

663 return _DataFrameTableBuilderNonVerbose(info=self.info) 

664 elif self.exceeds_info_cols: 

665 return _DataFrameTableBuilderNonVerbose(info=self.info) 

666 else: 

667 return _DataFrameTableBuilderVerbose( 

668 info=self.info, 

669 with_counts=self.show_counts, 

670 ) 

671 

672 

673class _SeriesInfoPrinter(_InfoPrinterAbstract): 

674 """Class for printing series info. 

675 

676 Parameters 

677 ---------- 

678 info : SeriesInfo 

679 Instance of SeriesInfo. 

680 verbose : bool, optional 

681 Whether to print the full summary. 

682 show_counts : bool, optional 

683 Whether to show the non-null counts. 

684 """ 

685 

686 def __init__( 

687 self, 

688 info: SeriesInfo, 

689 verbose: bool | None = None, 

690 show_counts: bool | None = None, 

691 ) -> None: 

692 self.info = info 

693 self.data = info.data 

694 self.verbose = verbose 

695 self.show_counts = self._initialize_show_counts(show_counts) 

696 

697 def _create_table_builder(self) -> _SeriesTableBuilder: 

698 """ 

699 Create instance of table builder based on verbosity. 

700 """ 

701 if self.verbose or self.verbose is None: 

702 return _SeriesTableBuilderVerbose( 

703 info=self.info, 

704 with_counts=self.show_counts, 

705 ) 

706 else: 

707 return _SeriesTableBuilderNonVerbose(info=self.info) 

708 

709 def _initialize_show_counts(self, show_counts: bool | None) -> bool: 

710 if show_counts is None: 

711 return True 

712 else: 

713 return show_counts 

714 

715 

716class _TableBuilderAbstract(ABC): 

717 """ 

718 Abstract builder for info table. 

719 """ 

720 

721 _lines: list[str] 

722 info: _BaseInfo 

723 

724 @abstractmethod 

725 def get_lines(self) -> list[str]: 

726 """Product in a form of list of lines (strings).""" 

727 

728 @property 

729 def data(self) -> DataFrame | Series: 

730 return self.info.data 

731 

732 @property 

733 def dtypes(self) -> Iterable[Dtype]: 

734 """Dtypes of each of the DataFrame's columns.""" 

735 return self.info.dtypes 

736 

737 @property 

738 def dtype_counts(self) -> Mapping[str, int]: 

739 """Mapping dtype - number of counts.""" 

740 return self.info.dtype_counts 

741 

742 @property 

743 def display_memory_usage(self) -> bool: 

744 """Whether to display memory usage.""" 

745 return bool(self.info.memory_usage) 

746 

747 @property 

748 def memory_usage_string(self) -> str: 

749 """Memory usage string with proper size qualifier.""" 

750 return self.info.memory_usage_string 

751 

752 @property 

753 def non_null_counts(self) -> Sequence[int]: 

754 return self.info.non_null_counts 

755 

756 def add_object_type_line(self) -> None: 

757 """Add line with string representation of dataframe to the table.""" 

758 self._lines.append(str(type(self.data))) 

759 

760 def add_index_range_line(self) -> None: 

761 """Add line with range of indices to the table.""" 

762 self._lines.append(self.data.index._summary()) 

763 

764 def add_dtypes_line(self) -> None: 

765 """Add summary line with dtypes present in dataframe.""" 

766 collected_dtypes = [ 

767 f"{key}({val:d})" for key, val in sorted(self.dtype_counts.items()) 

768 ] 

769 self._lines.append(f"dtypes: {', '.join(collected_dtypes)}") 

770 

771 

772class _DataFrameTableBuilder(_TableBuilderAbstract): 

773 """ 

774 Abstract builder for dataframe info table. 

775 

776 Parameters 

777 ---------- 

778 info : DataFrameInfo. 

779 Instance of DataFrameInfo. 

780 """ 

781 

782 def __init__(self, *, info: DataFrameInfo) -> None: 

783 self.info: DataFrameInfo = info 

784 

785 def get_lines(self) -> list[str]: 

786 self._lines = [] 

787 if self.col_count == 0: 

788 self._fill_empty_info() 

789 else: 

790 self._fill_non_empty_info() 

791 return self._lines 

792 

793 def _fill_empty_info(self) -> None: 

794 """Add lines to the info table, pertaining to empty dataframe.""" 

795 self.add_object_type_line() 

796 self.add_index_range_line() 

797 self._lines.append(f"Empty {type(self.data).__name__}\n") 

798 

799 @abstractmethod 

800 def _fill_non_empty_info(self) -> None: 

801 """Add lines to the info table, pertaining to non-empty dataframe.""" 

802 

803 @property 

804 def data(self) -> DataFrame: 

805 """DataFrame.""" 

806 return self.info.data 

807 

808 @property 

809 def ids(self) -> Index: 

810 """Dataframe columns.""" 

811 return self.info.ids 

812 

813 @property 

814 def col_count(self) -> int: 

815 """Number of dataframe columns to be summarized.""" 

816 return self.info.col_count 

817 

818 def add_memory_usage_line(self) -> None: 

819 """Add line containing memory usage.""" 

820 self._lines.append(f"memory usage: {self.memory_usage_string}") 

821 

822 

823class _DataFrameTableBuilderNonVerbose(_DataFrameTableBuilder): 

824 """ 

825 Dataframe info table builder for non-verbose output. 

826 """ 

827 

828 def _fill_non_empty_info(self) -> None: 

829 """Add lines to the info table, pertaining to non-empty dataframe.""" 

830 self.add_object_type_line() 

831 self.add_index_range_line() 

832 self.add_columns_summary_line() 

833 self.add_dtypes_line() 

834 if self.display_memory_usage: 

835 self.add_memory_usage_line() 

836 

837 def add_columns_summary_line(self) -> None: 

838 self._lines.append(self.ids._summary(name="Columns")) 

839 

840 

841class _TableBuilderVerboseMixin(_TableBuilderAbstract): 

842 """ 

843 Mixin for verbose info output. 

844 """ 

845 

846 SPACING: str = " " * 2 

847 strrows: Sequence[Sequence[str]] 

848 gross_column_widths: Sequence[int] 

849 with_counts: bool 

850 

851 @property 

852 @abstractmethod 

853 def headers(self) -> Sequence[str]: 

854 """Headers names of the columns in verbose table.""" 

855 

856 @property 

857 def header_column_widths(self) -> Sequence[int]: 

858 """Widths of header columns (only titles).""" 

859 return [len(col) for col in self.headers] 

860 

861 def _get_gross_column_widths(self) -> Sequence[int]: 

862 """Get widths of columns containing both headers and actual content.""" 

863 body_column_widths = self._get_body_column_widths() 

864 return [ 

865 max(*widths) 

866 for widths in zip(self.header_column_widths, body_column_widths) 

867 ] 

868 

869 def _get_body_column_widths(self) -> Sequence[int]: 

870 """Get widths of table content columns.""" 

871 strcols: Sequence[Sequence[str]] = list(zip(*self.strrows)) 

872 return [max(len(x) for x in col) for col in strcols] 

873 

874 def _gen_rows(self) -> Iterator[Sequence[str]]: 

875 """ 

876 Generator function yielding rows content. 

877 

878 Each element represents a row comprising a sequence of strings. 

879 """ 

880 if self.with_counts: 

881 return self._gen_rows_with_counts() 

882 else: 

883 return self._gen_rows_without_counts() 

884 

885 @abstractmethod 

886 def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]: 

887 """Iterator with string representation of body data with counts.""" 

888 

889 @abstractmethod 

890 def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]: 

891 """Iterator with string representation of body data without counts.""" 

892 

893 def add_header_line(self) -> None: 

894 header_line = self.SPACING.join( 

895 [ 

896 _put_str(header, col_width) 

897 for header, col_width in zip(self.headers, self.gross_column_widths) 

898 ] 

899 ) 

900 self._lines.append(header_line) 

901 

902 def add_separator_line(self) -> None: 

903 separator_line = self.SPACING.join( 

904 [ 

905 _put_str("-" * header_colwidth, gross_colwidth) 

906 for header_colwidth, gross_colwidth in zip( 

907 self.header_column_widths, self.gross_column_widths 

908 ) 

909 ] 

910 ) 

911 self._lines.append(separator_line) 

912 

913 def add_body_lines(self) -> None: 

914 for row in self.strrows: 

915 body_line = self.SPACING.join( 

916 [ 

917 _put_str(col, gross_colwidth) 

918 for col, gross_colwidth in zip(row, self.gross_column_widths) 

919 ] 

920 ) 

921 self._lines.append(body_line) 

922 

923 def _gen_non_null_counts(self) -> Iterator[str]: 

924 """Iterator with string representation of non-null counts.""" 

925 for count in self.non_null_counts: 

926 yield f"{count} non-null" 

927 

928 def _gen_dtypes(self) -> Iterator[str]: 

929 """Iterator with string representation of column dtypes.""" 

930 for dtype in self.dtypes: 

931 yield pprint_thing(dtype) 

932 

933 

934class _DataFrameTableBuilderVerbose(_DataFrameTableBuilder, _TableBuilderVerboseMixin): 

935 """ 

936 Dataframe info table builder for verbose output. 

937 """ 

938 

939 def __init__( 

940 self, 

941 *, 

942 info: DataFrameInfo, 

943 with_counts: bool, 

944 ) -> None: 

945 self.info = info 

946 self.with_counts = with_counts 

947 self.strrows: Sequence[Sequence[str]] = list(self._gen_rows()) 

948 self.gross_column_widths: Sequence[int] = self._get_gross_column_widths() 

949 

950 def _fill_non_empty_info(self) -> None: 

951 """Add lines to the info table, pertaining to non-empty dataframe.""" 

952 self.add_object_type_line() 

953 self.add_index_range_line() 

954 self.add_columns_summary_line() 

955 self.add_header_line() 

956 self.add_separator_line() 

957 self.add_body_lines() 

958 self.add_dtypes_line() 

959 if self.display_memory_usage: 

960 self.add_memory_usage_line() 

961 

962 @property 

963 def headers(self) -> Sequence[str]: 

964 """Headers names of the columns in verbose table.""" 

965 if self.with_counts: 

966 return [" # ", "Column", "Non-Null Count", "Dtype"] 

967 return [" # ", "Column", "Dtype"] 

968 

969 def add_columns_summary_line(self) -> None: 

970 self._lines.append(f"Data columns (total {self.col_count} columns):") 

971 

972 def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]: 

973 """Iterator with string representation of body data without counts.""" 

974 yield from zip( 

975 self._gen_line_numbers(), 

976 self._gen_columns(), 

977 self._gen_dtypes(), 

978 ) 

979 

980 def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]: 

981 """Iterator with string representation of body data with counts.""" 

982 yield from zip( 

983 self._gen_line_numbers(), 

984 self._gen_columns(), 

985 self._gen_non_null_counts(), 

986 self._gen_dtypes(), 

987 ) 

988 

989 def _gen_line_numbers(self) -> Iterator[str]: 

990 """Iterator with string representation of column numbers.""" 

991 for i, _ in enumerate(self.ids): 

992 yield f" {i}" 

993 

994 def _gen_columns(self) -> Iterator[str]: 

995 """Iterator with string representation of column names.""" 

996 for col in self.ids: 

997 yield pprint_thing(col) 

998 

999 

1000class _SeriesTableBuilder(_TableBuilderAbstract): 

1001 """ 

1002 Abstract builder for series info table. 

1003 

1004 Parameters 

1005 ---------- 

1006 info : SeriesInfo. 

1007 Instance of SeriesInfo. 

1008 """ 

1009 

1010 def __init__(self, *, info: SeriesInfo) -> None: 

1011 self.info: SeriesInfo = info 

1012 

1013 def get_lines(self) -> list[str]: 

1014 self._lines = [] 

1015 self._fill_non_empty_info() 

1016 return self._lines 

1017 

1018 @property 

1019 def data(self) -> Series: 

1020 """Series.""" 

1021 return self.info.data 

1022 

1023 def add_memory_usage_line(self) -> None: 

1024 """Add line containing memory usage.""" 

1025 self._lines.append(f"memory usage: {self.memory_usage_string}") 

1026 

1027 @abstractmethod 

1028 def _fill_non_empty_info(self) -> None: 

1029 """Add lines to the info table, pertaining to non-empty series.""" 

1030 

1031 

1032class _SeriesTableBuilderNonVerbose(_SeriesTableBuilder): 

1033 """ 

1034 Series info table builder for non-verbose output. 

1035 """ 

1036 

1037 def _fill_non_empty_info(self) -> None: 

1038 """Add lines to the info table, pertaining to non-empty series.""" 

1039 self.add_object_type_line() 

1040 self.add_index_range_line() 

1041 self.add_dtypes_line() 

1042 if self.display_memory_usage: 

1043 self.add_memory_usage_line() 

1044 

1045 

1046class _SeriesTableBuilderVerbose(_SeriesTableBuilder, _TableBuilderVerboseMixin): 

1047 """ 

1048 Series info table builder for verbose output. 

1049 """ 

1050 

1051 def __init__( 

1052 self, 

1053 *, 

1054 info: SeriesInfo, 

1055 with_counts: bool, 

1056 ) -> None: 

1057 self.info = info 

1058 self.with_counts = with_counts 

1059 self.strrows: Sequence[Sequence[str]] = list(self._gen_rows()) 

1060 self.gross_column_widths: Sequence[int] = self._get_gross_column_widths() 

1061 

1062 def _fill_non_empty_info(self) -> None: 

1063 """Add lines to the info table, pertaining to non-empty series.""" 

1064 self.add_object_type_line() 

1065 self.add_index_range_line() 

1066 self.add_series_name_line() 

1067 self.add_header_line() 

1068 self.add_separator_line() 

1069 self.add_body_lines() 

1070 self.add_dtypes_line() 

1071 if self.display_memory_usage: 

1072 self.add_memory_usage_line() 

1073 

1074 def add_series_name_line(self) -> None: 

1075 self._lines.append(f"Series name: {self.data.name}") 

1076 

1077 @property 

1078 def headers(self) -> Sequence[str]: 

1079 """Headers names of the columns in verbose table.""" 

1080 if self.with_counts: 

1081 return ["Non-Null Count", "Dtype"] 

1082 return ["Dtype"] 

1083 

1084 def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]: 

1085 """Iterator with string representation of body data without counts.""" 

1086 yield from self._gen_dtypes() 

1087 

1088 def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]: 

1089 """Iterator with string representation of body data with counts.""" 

1090 yield from zip( 

1091 self._gen_non_null_counts(), 

1092 self._gen_dtypes(), 

1093 ) 

1094 

1095 

1096def _get_dataframe_dtype_counts(df: DataFrame) -> Mapping[str, int]: 

1097 """ 

1098 Create mapping between datatypes and their number of occurrences. 

1099 """ 

1100 # groupby dtype.name to collect e.g. Categorical columns 

1101 return df.dtypes.value_counts().groupby(lambda x: x.name).sum()