Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/io/formats/info.py: 50%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

356 statements  

1from __future__ import annotations 

2 

3from abc import ( 

4 ABC, 

5 abstractmethod, 

6) 

7import sys 

8from textwrap import dedent 

9from typing import ( 

10 TYPE_CHECKING, 

11 Iterable, 

12 Iterator, 

13 Mapping, 

14 Sequence, 

15) 

16 

17from pandas._config import get_option 

18 

19from pandas._typing import ( 

20 Dtype, 

21 WriteBuffer, 

22) 

23 

24from pandas.io.formats import format as fmt 

25from pandas.io.formats.printing import pprint_thing 

26 

27if TYPE_CHECKING: 

28 from pandas import ( 

29 DataFrame, 

30 Index, 

31 Series, 

32 ) 

33 

34 

35frame_max_cols_sub = dedent( 

36 """\ 

37 max_cols : int, optional 

38 When to switch from the verbose to the truncated output. If the 

39 DataFrame has more than `max_cols` columns, the truncated output 

40 is used. By default, the setting in 

41 ``pandas.options.display.max_info_columns`` is used.""" 

42) 

43 

44 

45show_counts_sub = dedent( 

46 """\ 

47 show_counts : bool, optional 

48 Whether to show the non-null counts. By default, this is shown 

49 only if the DataFrame is smaller than 

50 ``pandas.options.display.max_info_rows`` and 

51 ``pandas.options.display.max_info_columns``. A value of True always 

52 shows the counts, and False never shows the counts.""" 

53) 

54 

55 

56frame_examples_sub = dedent( 

57 """\ 

58 >>> int_values = [1, 2, 3, 4, 5] 

59 >>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon'] 

60 >>> float_values = [0.0, 0.25, 0.5, 0.75, 1.0] 

61 >>> df = pd.DataFrame({"int_col": int_values, "text_col": text_values, 

62 ... "float_col": float_values}) 

63 >>> df 

64 int_col text_col float_col 

65 0 1 alpha 0.00 

66 1 2 beta 0.25 

67 2 3 gamma 0.50 

68 3 4 delta 0.75 

69 4 5 epsilon 1.00 

70 

71 Prints information of all columns: 

72 

73 >>> df.info(verbose=True) 

74 <class 'pandas.core.frame.DataFrame'> 

75 RangeIndex: 5 entries, 0 to 4 

76 Data columns (total 3 columns): 

77 # Column Non-Null Count Dtype 

78 --- ------ -------------- ----- 

79 0 int_col 5 non-null int64 

80 1 text_col 5 non-null object 

81 2 float_col 5 non-null float64 

82 dtypes: float64(1), int64(1), object(1) 

83 memory usage: 248.0+ bytes 

84 

85 Prints a summary of columns count and its dtypes but not per column 

86 information: 

87 

88 >>> df.info(verbose=False) 

89 <class 'pandas.core.frame.DataFrame'> 

90 RangeIndex: 5 entries, 0 to 4 

91 Columns: 3 entries, int_col to float_col 

92 dtypes: float64(1), int64(1), object(1) 

93 memory usage: 248.0+ bytes 

94 

95 Pipe output of DataFrame.info to buffer instead of sys.stdout, get 

96 buffer content and writes to a text file: 

97 

98 >>> import io 

99 >>> buffer = io.StringIO() 

100 >>> df.info(buf=buffer) 

101 >>> s = buffer.getvalue() 

102 >>> with open("df_info.txt", "w", 

103 ... encoding="utf-8") as f: # doctest: +SKIP 

104 ... f.write(s) 

105 260 

106 

107 The `memory_usage` parameter allows deep introspection mode, specially 

108 useful for big DataFrames and fine-tune memory optimization: 

109 

110 >>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6) 

111 >>> df = pd.DataFrame({ 

112 ... 'column_1': np.random.choice(['a', 'b', 'c'], 10 ** 6), 

113 ... 'column_2': np.random.choice(['a', 'b', 'c'], 10 ** 6), 

114 ... 'column_3': np.random.choice(['a', 'b', 'c'], 10 ** 6) 

115 ... }) 

116 >>> df.info() 

117 <class 'pandas.core.frame.DataFrame'> 

118 RangeIndex: 1000000 entries, 0 to 999999 

119 Data columns (total 3 columns): 

120 # Column Non-Null Count Dtype 

121 --- ------ -------------- ----- 

122 0 column_1 1000000 non-null object 

123 1 column_2 1000000 non-null object 

124 2 column_3 1000000 non-null object 

125 dtypes: object(3) 

126 memory usage: 22.9+ MB 

127 

128 >>> df.info(memory_usage='deep') 

129 <class 'pandas.core.frame.DataFrame'> 

130 RangeIndex: 1000000 entries, 0 to 999999 

131 Data columns (total 3 columns): 

132 # Column Non-Null Count Dtype 

133 --- ------ -------------- ----- 

134 0 column_1 1000000 non-null object 

135 1 column_2 1000000 non-null object 

136 2 column_3 1000000 non-null object 

137 dtypes: object(3) 

138 memory usage: 165.9 MB""" 

139) 

140 

141 

142frame_see_also_sub = dedent( 

143 """\ 

144 DataFrame.describe: Generate descriptive statistics of DataFrame 

145 columns. 

146 DataFrame.memory_usage: Memory usage of DataFrame columns.""" 

147) 

148 

149 

150frame_sub_kwargs = { 

151 "klass": "DataFrame", 

152 "type_sub": " and columns", 

153 "max_cols_sub": frame_max_cols_sub, 

154 "show_counts_sub": show_counts_sub, 

155 "examples_sub": frame_examples_sub, 

156 "see_also_sub": frame_see_also_sub, 

157 "version_added_sub": "", 

158} 

159 

160 

161series_examples_sub = dedent( 

162 """\ 

163 >>> int_values = [1, 2, 3, 4, 5] 

164 >>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon'] 

165 >>> s = pd.Series(text_values, index=int_values) 

166 >>> s.info() 

167 <class 'pandas.core.series.Series'> 

168 Index: 5 entries, 1 to 5 

169 Series name: None 

170 Non-Null Count Dtype 

171 -------------- ----- 

172 5 non-null object 

173 dtypes: object(1) 

174 memory usage: 80.0+ bytes 

175 

176 Prints a summary excluding information about its values: 

177 

178 >>> s.info(verbose=False) 

179 <class 'pandas.core.series.Series'> 

180 Index: 5 entries, 1 to 5 

181 dtypes: object(1) 

182 memory usage: 80.0+ bytes 

183 

184 Pipe output of Series.info to buffer instead of sys.stdout, get 

185 buffer content and writes to a text file: 

186 

187 >>> import io 

188 >>> buffer = io.StringIO() 

189 >>> s.info(buf=buffer) 

190 >>> s = buffer.getvalue() 

191 >>> with open("df_info.txt", "w", 

192 ... encoding="utf-8") as f: # doctest: +SKIP 

193 ... f.write(s) 

194 260 

195 

196 The `memory_usage` parameter allows deep introspection mode, specially 

197 useful for big Series and fine-tune memory optimization: 

198 

199 >>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6) 

200 >>> s = pd.Series(np.random.choice(['a', 'b', 'c'], 10 ** 6)) 

201 >>> s.info() 

202 <class 'pandas.core.series.Series'> 

203 RangeIndex: 1000000 entries, 0 to 999999 

204 Series name: None 

205 Non-Null Count Dtype 

206 -------------- ----- 

207 1000000 non-null object 

208 dtypes: object(1) 

209 memory usage: 7.6+ MB 

210 

211 >>> s.info(memory_usage='deep') 

212 <class 'pandas.core.series.Series'> 

213 RangeIndex: 1000000 entries, 0 to 999999 

214 Series name: None 

215 Non-Null Count Dtype 

216 -------------- ----- 

217 1000000 non-null object 

218 dtypes: object(1) 

219 memory usage: 55.3 MB""" 

220) 

221 

222 

223series_see_also_sub = dedent( 

224 """\ 

225 Series.describe: Generate descriptive statistics of Series. 

226 Series.memory_usage: Memory usage of Series.""" 

227) 

228 

229 

230series_sub_kwargs = { 

231 "klass": "Series", 

232 "type_sub": "", 

233 "max_cols_sub": "", 

234 "show_counts_sub": show_counts_sub, 

235 "examples_sub": series_examples_sub, 

236 "see_also_sub": series_see_also_sub, 

237 "version_added_sub": "\n.. versionadded:: 1.4.0\n", 

238} 

239 

240 

241INFO_DOCSTRING = dedent( 

242 """ 

243 Print a concise summary of a {klass}. 

244 

245 This method prints information about a {klass} including 

246 the index dtype{type_sub}, non-null values and memory usage. 

247 {version_added_sub}\ 

248 

249 Parameters 

250 ---------- 

251 verbose : bool, optional 

252 Whether to print the full summary. By default, the setting in 

253 ``pandas.options.display.max_info_columns`` is followed. 

254 buf : writable buffer, defaults to sys.stdout 

255 Where to send the output. By default, the output is printed to 

256 sys.stdout. Pass a writable buffer if you need to further process 

257 the output. 

258 {max_cols_sub} 

259 memory_usage : bool, str, optional 

260 Specifies whether total memory usage of the {klass} 

261 elements (including the index) should be displayed. By default, 

262 this follows the ``pandas.options.display.memory_usage`` setting. 

263 

264 True always show memory usage. False never shows memory usage. 

265 A value of 'deep' is equivalent to "True with deep introspection". 

266 Memory usage is shown in human-readable units (base-2 

267 representation). Without deep introspection a memory estimation is 

268 made based in column dtype and number of rows assuming values 

269 consume the same memory amount for corresponding dtypes. With deep 

270 memory introspection, a real memory usage calculation is performed 

271 at the cost of computational resources. See the 

272 :ref:`Frequently Asked Questions <df-memory-usage>` for more 

273 details. 

274 {show_counts_sub} 

275 

276 Returns 

277 ------- 

278 None 

279 This method prints a summary of a {klass} and returns None. 

280 

281 See Also 

282 -------- 

283 {see_also_sub} 

284 

285 Examples 

286 -------- 

287 {examples_sub} 

288 """ 

289) 

290 

291 

292def _put_str(s: str | Dtype, space: int) -> str: 

293 """ 

294 Make string of specified length, padding to the right if necessary. 

295 

296 Parameters 

297 ---------- 

298 s : Union[str, Dtype] 

299 String to be formatted. 

300 space : int 

301 Length to force string to be of. 

302 

303 Returns 

304 ------- 

305 str 

306 String coerced to given length. 

307 

308 Examples 

309 -------- 

310 >>> pd.io.formats.info._put_str("panda", 6) 

311 'panda ' 

312 >>> pd.io.formats.info._put_str("panda", 4) 

313 'pand' 

314 """ 

315 return str(s)[:space].ljust(space) 

316 

317 

318def _sizeof_fmt(num: float, size_qualifier: str) -> str: 

319 """ 

320 Return size in human readable format. 

321 

322 Parameters 

323 ---------- 

324 num : int 

325 Size in bytes. 

326 size_qualifier : str 

327 Either empty, or '+' (if lower bound). 

328 

329 Returns 

330 ------- 

331 str 

332 Size in human readable format. 

333 

334 Examples 

335 -------- 

336 >>> _sizeof_fmt(23028, '') 

337 '22.5 KB' 

338 

339 >>> _sizeof_fmt(23028, '+') 

340 '22.5+ KB' 

341 """ 

342 for x in ["bytes", "KB", "MB", "GB", "TB"]: 

343 if num < 1024.0: 

344 return f"{num:3.1f}{size_qualifier} {x}" 

345 num /= 1024.0 

346 return f"{num:3.1f}{size_qualifier} PB" 

347 

348 

349def _initialize_memory_usage( 

350 memory_usage: bool | str | None = None, 

351) -> bool | str: 

352 """Get memory usage based on inputs and display options.""" 

353 if memory_usage is None: 

354 memory_usage = get_option("display.memory_usage") 

355 return memory_usage 

356 

357 

358class BaseInfo(ABC): 

359 """ 

360 Base class for DataFrameInfo and SeriesInfo. 

361 

362 Parameters 

363 ---------- 

364 data : DataFrame or Series 

365 Either dataframe or series. 

366 memory_usage : bool or str, optional 

367 If "deep", introspect the data deeply by interrogating object dtypes 

368 for system-level memory consumption, and include it in the returned 

369 values. 

370 """ 

371 

372 data: DataFrame | Series 

373 memory_usage: bool | str 

374 

375 @property 

376 @abstractmethod 

377 def dtypes(self) -> Iterable[Dtype]: 

378 """ 

379 Dtypes. 

380 

381 Returns 

382 ------- 

383 dtypes : sequence 

384 Dtype of each of the DataFrame's columns (or one series column). 

385 """ 

386 

387 @property 

388 @abstractmethod 

389 def dtype_counts(self) -> Mapping[str, int]: 

390 """Mapping dtype - number of counts.""" 

391 

392 @property 

393 @abstractmethod 

394 def non_null_counts(self) -> Sequence[int]: 

395 """Sequence of non-null counts for all columns or column (if series).""" 

396 

397 @property 

398 @abstractmethod 

399 def memory_usage_bytes(self) -> int: 

400 """ 

401 Memory usage in bytes. 

402 

403 Returns 

404 ------- 

405 memory_usage_bytes : int 

406 Object's total memory usage in bytes. 

407 """ 

408 

409 @property 

410 def memory_usage_string(self) -> str: 

411 """Memory usage in a form of human readable string.""" 

412 return f"{_sizeof_fmt(self.memory_usage_bytes, self.size_qualifier)}\n" 

413 

414 @property 

415 def size_qualifier(self) -> str: 

416 size_qualifier = "" 

417 if self.memory_usage: 

418 if self.memory_usage != "deep": 

419 # size_qualifier is just a best effort; not guaranteed to catch 

420 # all cases (e.g., it misses categorical data even with object 

421 # categories) 

422 if ( 

423 "object" in self.dtype_counts 

424 or self.data.index._is_memory_usage_qualified() 

425 ): 

426 size_qualifier = "+" 

427 return size_qualifier 

428 

429 @abstractmethod 

430 def render( 

431 self, 

432 *, 

433 buf: WriteBuffer[str] | None, 

434 max_cols: int | None, 

435 verbose: bool | None, 

436 show_counts: bool | None, 

437 ) -> None: 

438 pass 

439 

440 

441class DataFrameInfo(BaseInfo): 

442 """ 

443 Class storing dataframe-specific info. 

444 """ 

445 

446 def __init__( 

447 self, 

448 data: DataFrame, 

449 memory_usage: bool | str | None = None, 

450 ) -> None: 

451 self.data: DataFrame = data 

452 self.memory_usage = _initialize_memory_usage(memory_usage) 

453 

454 @property 

455 def dtype_counts(self) -> Mapping[str, int]: 

456 return _get_dataframe_dtype_counts(self.data) 

457 

458 @property 

459 def dtypes(self) -> Iterable[Dtype]: 

460 """ 

461 Dtypes. 

462 

463 Returns 

464 ------- 

465 dtypes 

466 Dtype of each of the DataFrame's columns. 

467 """ 

468 return self.data.dtypes 

469 

470 @property 

471 def ids(self) -> Index: 

472 """ 

473 Column names. 

474 

475 Returns 

476 ------- 

477 ids : Index 

478 DataFrame's column names. 

479 """ 

480 return self.data.columns 

481 

482 @property 

483 def col_count(self) -> int: 

484 """Number of columns to be summarized.""" 

485 return len(self.ids) 

486 

487 @property 

488 def non_null_counts(self) -> Sequence[int]: 

489 """Sequence of non-null counts for all columns or column (if series).""" 

490 return self.data.count() 

491 

492 @property 

493 def memory_usage_bytes(self) -> int: 

494 deep = self.memory_usage == "deep" 

495 return self.data.memory_usage(index=True, deep=deep).sum() 

496 

497 def render( 

498 self, 

499 *, 

500 buf: WriteBuffer[str] | None, 

501 max_cols: int | None, 

502 verbose: bool | None, 

503 show_counts: bool | None, 

504 ) -> None: 

505 printer = DataFrameInfoPrinter( 

506 info=self, 

507 max_cols=max_cols, 

508 verbose=verbose, 

509 show_counts=show_counts, 

510 ) 

511 printer.to_buffer(buf) 

512 

513 

514class SeriesInfo(BaseInfo): 

515 """ 

516 Class storing series-specific info. 

517 """ 

518 

519 def __init__( 

520 self, 

521 data: Series, 

522 memory_usage: bool | str | None = None, 

523 ) -> None: 

524 self.data: Series = data 

525 self.memory_usage = _initialize_memory_usage(memory_usage) 

526 

527 def render( 

528 self, 

529 *, 

530 buf: WriteBuffer[str] | None = None, 

531 max_cols: int | None = None, 

532 verbose: bool | None = None, 

533 show_counts: bool | None = None, 

534 ) -> None: 

535 if max_cols is not None: 

536 raise ValueError( 

537 "Argument `max_cols` can only be passed " 

538 "in DataFrame.info, not Series.info" 

539 ) 

540 printer = SeriesInfoPrinter( 

541 info=self, 

542 verbose=verbose, 

543 show_counts=show_counts, 

544 ) 

545 printer.to_buffer(buf) 

546 

547 @property 

548 def non_null_counts(self) -> Sequence[int]: 

549 return [self.data.count()] 

550 

551 @property 

552 def dtypes(self) -> Iterable[Dtype]: 

553 return [self.data.dtypes] 

554 

555 @property 

556 def dtype_counts(self) -> Mapping[str, int]: 

557 from pandas.core.frame import DataFrame 

558 

559 return _get_dataframe_dtype_counts(DataFrame(self.data)) 

560 

561 @property 

562 def memory_usage_bytes(self) -> int: 

563 """Memory usage in bytes. 

564 

565 Returns 

566 ------- 

567 memory_usage_bytes : int 

568 Object's total memory usage in bytes. 

569 """ 

570 deep = self.memory_usage == "deep" 

571 return self.data.memory_usage(index=True, deep=deep) 

572 

573 

574class InfoPrinterAbstract: 

575 """ 

576 Class for printing dataframe or series info. 

577 """ 

578 

579 def to_buffer(self, buf: WriteBuffer[str] | None = None) -> None: 

580 """Save dataframe info into buffer.""" 

581 table_builder = self._create_table_builder() 

582 lines = table_builder.get_lines() 

583 if buf is None: # pragma: no cover 

584 buf = sys.stdout 

585 fmt.buffer_put_lines(buf, lines) 

586 

587 @abstractmethod 

588 def _create_table_builder(self) -> TableBuilderAbstract: 

589 """Create instance of table builder.""" 

590 

591 

592class DataFrameInfoPrinter(InfoPrinterAbstract): 

593 """ 

594 Class for printing dataframe info. 

595 

596 Parameters 

597 ---------- 

598 info : DataFrameInfo 

599 Instance of DataFrameInfo. 

600 max_cols : int, optional 

601 When to switch from the verbose to the truncated output. 

602 verbose : bool, optional 

603 Whether to print the full summary. 

604 show_counts : bool, optional 

605 Whether to show the non-null counts. 

606 """ 

607 

608 def __init__( 

609 self, 

610 info: DataFrameInfo, 

611 max_cols: int | None = None, 

612 verbose: bool | None = None, 

613 show_counts: bool | None = None, 

614 ) -> None: 

615 self.info = info 

616 self.data = info.data 

617 self.verbose = verbose 

618 self.max_cols = self._initialize_max_cols(max_cols) 

619 self.show_counts = self._initialize_show_counts(show_counts) 

620 

621 @property 

622 def max_rows(self) -> int: 

623 """Maximum info rows to be displayed.""" 

624 return get_option("display.max_info_rows", len(self.data) + 1) 

625 

626 @property 

627 def exceeds_info_cols(self) -> bool: 

628 """Check if number of columns to be summarized does not exceed maximum.""" 

629 return bool(self.col_count > self.max_cols) 

630 

631 @property 

632 def exceeds_info_rows(self) -> bool: 

633 """Check if number of rows to be summarized does not exceed maximum.""" 

634 return bool(len(self.data) > self.max_rows) 

635 

636 @property 

637 def col_count(self) -> int: 

638 """Number of columns to be summarized.""" 

639 return self.info.col_count 

640 

641 def _initialize_max_cols(self, max_cols: int | None) -> int: 

642 if max_cols is None: 

643 return get_option("display.max_info_columns", self.col_count + 1) 

644 return max_cols 

645 

646 def _initialize_show_counts(self, show_counts: bool | None) -> bool: 

647 if show_counts is None: 

648 return bool(not self.exceeds_info_cols and not self.exceeds_info_rows) 

649 else: 

650 return show_counts 

651 

652 def _create_table_builder(self) -> DataFrameTableBuilder: 

653 """ 

654 Create instance of table builder based on verbosity and display settings. 

655 """ 

656 if self.verbose: 

657 return DataFrameTableBuilderVerbose( 

658 info=self.info, 

659 with_counts=self.show_counts, 

660 ) 

661 elif self.verbose is False: # specifically set to False, not necessarily None 

662 return DataFrameTableBuilderNonVerbose(info=self.info) 

663 else: 

664 if self.exceeds_info_cols: 

665 return DataFrameTableBuilderNonVerbose(info=self.info) 

666 else: 

667 return DataFrameTableBuilderVerbose( 

668 info=self.info, 

669 with_counts=self.show_counts, 

670 ) 

671 

672 

673class SeriesInfoPrinter(InfoPrinterAbstract): 

674 """Class for printing series info. 

675 

676 Parameters 

677 ---------- 

678 info : SeriesInfo 

679 Instance of SeriesInfo. 

680 verbose : bool, optional 

681 Whether to print the full summary. 

682 show_counts : bool, optional 

683 Whether to show the non-null counts. 

684 """ 

685 

686 def __init__( 

687 self, 

688 info: SeriesInfo, 

689 verbose: bool | None = None, 

690 show_counts: bool | None = None, 

691 ) -> None: 

692 self.info = info 

693 self.data = info.data 

694 self.verbose = verbose 

695 self.show_counts = self._initialize_show_counts(show_counts) 

696 

697 def _create_table_builder(self) -> SeriesTableBuilder: 

698 """ 

699 Create instance of table builder based on verbosity. 

700 """ 

701 if self.verbose or self.verbose is None: 

702 return SeriesTableBuilderVerbose( 

703 info=self.info, 

704 with_counts=self.show_counts, 

705 ) 

706 else: 

707 return SeriesTableBuilderNonVerbose(info=self.info) 

708 

709 def _initialize_show_counts(self, show_counts: bool | None) -> bool: 

710 if show_counts is None: 

711 return True 

712 else: 

713 return show_counts 

714 

715 

716class TableBuilderAbstract(ABC): 

717 """ 

718 Abstract builder for info table. 

719 """ 

720 

721 _lines: list[str] 

722 info: BaseInfo 

723 

724 @abstractmethod 

725 def get_lines(self) -> list[str]: 

726 """Product in a form of list of lines (strings).""" 

727 

728 @property 

729 def data(self) -> DataFrame | Series: 

730 return self.info.data 

731 

732 @property 

733 def dtypes(self) -> Iterable[Dtype]: 

734 """Dtypes of each of the DataFrame's columns.""" 

735 return self.info.dtypes 

736 

737 @property 

738 def dtype_counts(self) -> Mapping[str, int]: 

739 """Mapping dtype - number of counts.""" 

740 return self.info.dtype_counts 

741 

742 @property 

743 def display_memory_usage(self) -> bool: 

744 """Whether to display memory usage.""" 

745 return bool(self.info.memory_usage) 

746 

747 @property 

748 def memory_usage_string(self) -> str: 

749 """Memory usage string with proper size qualifier.""" 

750 return self.info.memory_usage_string 

751 

752 @property 

753 def non_null_counts(self) -> Sequence[int]: 

754 return self.info.non_null_counts 

755 

756 def add_object_type_line(self) -> None: 

757 """Add line with string representation of dataframe to the table.""" 

758 self._lines.append(str(type(self.data))) 

759 

760 def add_index_range_line(self) -> None: 

761 """Add line with range of indices to the table.""" 

762 self._lines.append(self.data.index._summary()) 

763 

764 def add_dtypes_line(self) -> None: 

765 """Add summary line with dtypes present in dataframe.""" 

766 collected_dtypes = [ 

767 f"{key}({val:d})" for key, val in sorted(self.dtype_counts.items()) 

768 ] 

769 self._lines.append(f"dtypes: {', '.join(collected_dtypes)}") 

770 

771 

772class DataFrameTableBuilder(TableBuilderAbstract): 

773 """ 

774 Abstract builder for dataframe info table. 

775 

776 Parameters 

777 ---------- 

778 info : DataFrameInfo. 

779 Instance of DataFrameInfo. 

780 """ 

781 

782 def __init__(self, *, info: DataFrameInfo) -> None: 

783 self.info: DataFrameInfo = info 

784 

785 def get_lines(self) -> list[str]: 

786 self._lines = [] 

787 if self.col_count == 0: 

788 self._fill_empty_info() 

789 else: 

790 self._fill_non_empty_info() 

791 return self._lines 

792 

793 def _fill_empty_info(self) -> None: 

794 """Add lines to the info table, pertaining to empty dataframe.""" 

795 self.add_object_type_line() 

796 self.add_index_range_line() 

797 self._lines.append(f"Empty {type(self.data).__name__}\n") 

798 

799 @abstractmethod 

800 def _fill_non_empty_info(self) -> None: 

801 """Add lines to the info table, pertaining to non-empty dataframe.""" 

802 

803 @property 

804 def data(self) -> DataFrame: 

805 """DataFrame.""" 

806 return self.info.data 

807 

808 @property 

809 def ids(self) -> Index: 

810 """Dataframe columns.""" 

811 return self.info.ids 

812 

813 @property 

814 def col_count(self) -> int: 

815 """Number of dataframe columns to be summarized.""" 

816 return self.info.col_count 

817 

818 def add_memory_usage_line(self) -> None: 

819 """Add line containing memory usage.""" 

820 self._lines.append(f"memory usage: {self.memory_usage_string}") 

821 

822 

823class DataFrameTableBuilderNonVerbose(DataFrameTableBuilder): 

824 """ 

825 Dataframe info table builder for non-verbose output. 

826 """ 

827 

828 def _fill_non_empty_info(self) -> None: 

829 """Add lines to the info table, pertaining to non-empty dataframe.""" 

830 self.add_object_type_line() 

831 self.add_index_range_line() 

832 self.add_columns_summary_line() 

833 self.add_dtypes_line() 

834 if self.display_memory_usage: 

835 self.add_memory_usage_line() 

836 

837 def add_columns_summary_line(self) -> None: 

838 self._lines.append(self.ids._summary(name="Columns")) 

839 

840 

841class TableBuilderVerboseMixin(TableBuilderAbstract): 

842 """ 

843 Mixin for verbose info output. 

844 """ 

845 

846 SPACING: str = " " * 2 

847 strrows: Sequence[Sequence[str]] 

848 gross_column_widths: Sequence[int] 

849 with_counts: bool 

850 

851 @property 

852 @abstractmethod 

853 def headers(self) -> Sequence[str]: 

854 """Headers names of the columns in verbose table.""" 

855 

856 @property 

857 def header_column_widths(self) -> Sequence[int]: 

858 """Widths of header columns (only titles).""" 

859 return [len(col) for col in self.headers] 

860 

861 def _get_gross_column_widths(self) -> Sequence[int]: 

862 """Get widths of columns containing both headers and actual content.""" 

863 body_column_widths = self._get_body_column_widths() 

864 return [ 

865 max(*widths) 

866 for widths in zip(self.header_column_widths, body_column_widths) 

867 ] 

868 

869 def _get_body_column_widths(self) -> Sequence[int]: 

870 """Get widths of table content columns.""" 

871 strcols: Sequence[Sequence[str]] = list(zip(*self.strrows)) 

872 return [max(len(x) for x in col) for col in strcols] 

873 

874 def _gen_rows(self) -> Iterator[Sequence[str]]: 

875 """ 

876 Generator function yielding rows content. 

877 

878 Each element represents a row comprising a sequence of strings. 

879 """ 

880 if self.with_counts: 

881 return self._gen_rows_with_counts() 

882 else: 

883 return self._gen_rows_without_counts() 

884 

885 @abstractmethod 

886 def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]: 

887 """Iterator with string representation of body data with counts.""" 

888 

889 @abstractmethod 

890 def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]: 

891 """Iterator with string representation of body data without counts.""" 

892 

893 def add_header_line(self) -> None: 

894 header_line = self.SPACING.join( 

895 [ 

896 _put_str(header, col_width) 

897 for header, col_width in zip(self.headers, self.gross_column_widths) 

898 ] 

899 ) 

900 self._lines.append(header_line) 

901 

902 def add_separator_line(self) -> None: 

903 separator_line = self.SPACING.join( 

904 [ 

905 _put_str("-" * header_colwidth, gross_colwidth) 

906 for header_colwidth, gross_colwidth in zip( 

907 self.header_column_widths, self.gross_column_widths 

908 ) 

909 ] 

910 ) 

911 self._lines.append(separator_line) 

912 

913 def add_body_lines(self) -> None: 

914 for row in self.strrows: 

915 body_line = self.SPACING.join( 

916 [ 

917 _put_str(col, gross_colwidth) 

918 for col, gross_colwidth in zip(row, self.gross_column_widths) 

919 ] 

920 ) 

921 self._lines.append(body_line) 

922 

923 def _gen_non_null_counts(self) -> Iterator[str]: 

924 """Iterator with string representation of non-null counts.""" 

925 for count in self.non_null_counts: 

926 yield f"{count} non-null" 

927 

928 def _gen_dtypes(self) -> Iterator[str]: 

929 """Iterator with string representation of column dtypes.""" 

930 for dtype in self.dtypes: 

931 yield pprint_thing(dtype) 

932 

933 

934class DataFrameTableBuilderVerbose(DataFrameTableBuilder, TableBuilderVerboseMixin): 

935 """ 

936 Dataframe info table builder for verbose output. 

937 """ 

938 

939 def __init__( 

940 self, 

941 *, 

942 info: DataFrameInfo, 

943 with_counts: bool, 

944 ) -> None: 

945 self.info = info 

946 self.with_counts = with_counts 

947 self.strrows: Sequence[Sequence[str]] = list(self._gen_rows()) 

948 self.gross_column_widths: Sequence[int] = self._get_gross_column_widths() 

949 

950 def _fill_non_empty_info(self) -> None: 

951 """Add lines to the info table, pertaining to non-empty dataframe.""" 

952 self.add_object_type_line() 

953 self.add_index_range_line() 

954 self.add_columns_summary_line() 

955 self.add_header_line() 

956 self.add_separator_line() 

957 self.add_body_lines() 

958 self.add_dtypes_line() 

959 if self.display_memory_usage: 

960 self.add_memory_usage_line() 

961 

962 @property 

963 def headers(self) -> Sequence[str]: 

964 """Headers names of the columns in verbose table.""" 

965 if self.with_counts: 

966 return [" # ", "Column", "Non-Null Count", "Dtype"] 

967 return [" # ", "Column", "Dtype"] 

968 

969 def add_columns_summary_line(self) -> None: 

970 self._lines.append(f"Data columns (total {self.col_count} columns):") 

971 

972 def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]: 

973 """Iterator with string representation of body data without counts.""" 

974 yield from zip( 

975 self._gen_line_numbers(), 

976 self._gen_columns(), 

977 self._gen_dtypes(), 

978 ) 

979 

980 def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]: 

981 """Iterator with string representation of body data with counts.""" 

982 yield from zip( 

983 self._gen_line_numbers(), 

984 self._gen_columns(), 

985 self._gen_non_null_counts(), 

986 self._gen_dtypes(), 

987 ) 

988 

989 def _gen_line_numbers(self) -> Iterator[str]: 

990 """Iterator with string representation of column numbers.""" 

991 for i, _ in enumerate(self.ids): 

992 yield f" {i}" 

993 

994 def _gen_columns(self) -> Iterator[str]: 

995 """Iterator with string representation of column names.""" 

996 for col in self.ids: 

997 yield pprint_thing(col) 

998 

999 

1000class SeriesTableBuilder(TableBuilderAbstract): 

1001 """ 

1002 Abstract builder for series info table. 

1003 

1004 Parameters 

1005 ---------- 

1006 info : SeriesInfo. 

1007 Instance of SeriesInfo. 

1008 """ 

1009 

1010 def __init__(self, *, info: SeriesInfo) -> None: 

1011 self.info: SeriesInfo = info 

1012 

1013 def get_lines(self) -> list[str]: 

1014 self._lines = [] 

1015 self._fill_non_empty_info() 

1016 return self._lines 

1017 

1018 @property 

1019 def data(self) -> Series: 

1020 """Series.""" 

1021 return self.info.data 

1022 

1023 def add_memory_usage_line(self) -> None: 

1024 """Add line containing memory usage.""" 

1025 self._lines.append(f"memory usage: {self.memory_usage_string}") 

1026 

1027 @abstractmethod 

1028 def _fill_non_empty_info(self) -> None: 

1029 """Add lines to the info table, pertaining to non-empty series.""" 

1030 

1031 

1032class SeriesTableBuilderNonVerbose(SeriesTableBuilder): 

1033 """ 

1034 Series info table builder for non-verbose output. 

1035 """ 

1036 

1037 def _fill_non_empty_info(self) -> None: 

1038 """Add lines to the info table, pertaining to non-empty series.""" 

1039 self.add_object_type_line() 

1040 self.add_index_range_line() 

1041 self.add_dtypes_line() 

1042 if self.display_memory_usage: 

1043 self.add_memory_usage_line() 

1044 

1045 

1046class SeriesTableBuilderVerbose(SeriesTableBuilder, TableBuilderVerboseMixin): 

1047 """ 

1048 Series info table builder for verbose output. 

1049 """ 

1050 

1051 def __init__( 

1052 self, 

1053 *, 

1054 info: SeriesInfo, 

1055 with_counts: bool, 

1056 ) -> None: 

1057 self.info = info 

1058 self.with_counts = with_counts 

1059 self.strrows: Sequence[Sequence[str]] = list(self._gen_rows()) 

1060 self.gross_column_widths: Sequence[int] = self._get_gross_column_widths() 

1061 

1062 def _fill_non_empty_info(self) -> None: 

1063 """Add lines to the info table, pertaining to non-empty series.""" 

1064 self.add_object_type_line() 

1065 self.add_index_range_line() 

1066 self.add_series_name_line() 

1067 self.add_header_line() 

1068 self.add_separator_line() 

1069 self.add_body_lines() 

1070 self.add_dtypes_line() 

1071 if self.display_memory_usage: 

1072 self.add_memory_usage_line() 

1073 

1074 def add_series_name_line(self) -> None: 

1075 self._lines.append(f"Series name: {self.data.name}") 

1076 

1077 @property 

1078 def headers(self) -> Sequence[str]: 

1079 """Headers names of the columns in verbose table.""" 

1080 if self.with_counts: 

1081 return ["Non-Null Count", "Dtype"] 

1082 return ["Dtype"] 

1083 

1084 def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]: 

1085 """Iterator with string representation of body data without counts.""" 

1086 yield from self._gen_dtypes() 

1087 

1088 def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]: 

1089 """Iterator with string representation of body data with counts.""" 

1090 yield from zip( 

1091 self._gen_non_null_counts(), 

1092 self._gen_dtypes(), 

1093 ) 

1094 

1095 

1096def _get_dataframe_dtype_counts(df: DataFrame) -> Mapping[str, int]: 

1097 """ 

1098 Create mapping between datatypes and their number of occurrences. 

1099 """ 

1100 # groupby dtype.name to collect e.g. Categorical columns 

1101 return df.dtypes.value_counts().groupby(lambda x: x.name).sum()