Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/plotting/_core.py: 28%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

194 statements  

1from __future__ import annotations 

2 

3import importlib 

4from typing import ( 

5 TYPE_CHECKING, 

6 Callable, 

7 Literal, 

8) 

9 

10from pandas._config import get_option 

11 

12from pandas.util._decorators import ( 

13 Appender, 

14 Substitution, 

15) 

16 

17from pandas.core.dtypes.common import ( 

18 is_integer, 

19 is_list_like, 

20) 

21from pandas.core.dtypes.generic import ( 

22 ABCDataFrame, 

23 ABCSeries, 

24) 

25 

26from pandas.core.base import PandasObject 

27 

28if TYPE_CHECKING: 

29 from collections.abc import ( 

30 Hashable, 

31 Sequence, 

32 ) 

33 import types 

34 

35 from matplotlib.axes import Axes 

36 import numpy as np 

37 

38 from pandas._typing import IndexLabel 

39 

40 from pandas import ( 

41 DataFrame, 

42 Series, 

43 ) 

44 from pandas.core.groupby.generic import DataFrameGroupBy 

45 

46 

47def hist_series( 

48 self: Series, 

49 by=None, 

50 ax=None, 

51 grid: bool = True, 

52 xlabelsize: int | None = None, 

53 xrot: float | None = None, 

54 ylabelsize: int | None = None, 

55 yrot: float | None = None, 

56 figsize: tuple[int, int] | None = None, 

57 bins: int | Sequence[int] = 10, 

58 backend: str | None = None, 

59 legend: bool = False, 

60 **kwargs, 

61): 

62 """ 

63 Draw histogram of the input series using matplotlib. 

64 

65 Parameters 

66 ---------- 

67 by : object, optional 

68 If passed, then used to form histograms for separate groups. 

69 ax : matplotlib axis object 

70 If not passed, uses gca(). 

71 grid : bool, default True 

72 Whether to show axis grid lines. 

73 xlabelsize : int, default None 

74 If specified changes the x-axis label size. 

75 xrot : float, default None 

76 Rotation of x axis labels. 

77 ylabelsize : int, default None 

78 If specified changes the y-axis label size. 

79 yrot : float, default None 

80 Rotation of y axis labels. 

81 figsize : tuple, default None 

82 Figure size in inches by default. 

83 bins : int or sequence, default 10 

84 Number of histogram bins to be used. If an integer is given, bins + 1 

85 bin edges are calculated and returned. If bins is a sequence, gives 

86 bin edges, including left edge of first bin and right edge of last 

87 bin. In this case, bins is returned unmodified. 

88 backend : str, default None 

89 Backend to use instead of the backend specified in the option 

90 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to 

91 specify the ``plotting.backend`` for the whole session, set 

92 ``pd.options.plotting.backend``. 

93 legend : bool, default False 

94 Whether to show the legend. 

95 

96 **kwargs 

97 To be passed to the actual plotting function. 

98 

99 Returns 

100 ------- 

101 matplotlib.AxesSubplot 

102 A histogram plot. 

103 

104 See Also 

105 -------- 

106 matplotlib.axes.Axes.hist : Plot a histogram using matplotlib. 

107 

108 Examples 

109 -------- 

110 For Series: 

111 

112 .. plot:: 

113 :context: close-figs 

114 

115 >>> lst = ['a', 'a', 'a', 'b', 'b', 'b'] 

116 >>> ser = pd.Series([1, 2, 2, 4, 6, 6], index=lst) 

117 >>> hist = ser.hist() 

118 

119 For Groupby: 

120 

121 .. plot:: 

122 :context: close-figs 

123 

124 >>> lst = ['a', 'a', 'a', 'b', 'b', 'b'] 

125 >>> ser = pd.Series([1, 2, 2, 4, 6, 6], index=lst) 

126 >>> hist = ser.groupby(level=0).hist() 

127 """ 

128 plot_backend = _get_plot_backend(backend) 

129 return plot_backend.hist_series( 

130 self, 

131 by=by, 

132 ax=ax, 

133 grid=grid, 

134 xlabelsize=xlabelsize, 

135 xrot=xrot, 

136 ylabelsize=ylabelsize, 

137 yrot=yrot, 

138 figsize=figsize, 

139 bins=bins, 

140 legend=legend, 

141 **kwargs, 

142 ) 

143 

144 

145def hist_frame( 

146 data: DataFrame, 

147 column: IndexLabel | None = None, 

148 by=None, 

149 grid: bool = True, 

150 xlabelsize: int | None = None, 

151 xrot: float | None = None, 

152 ylabelsize: int | None = None, 

153 yrot: float | None = None, 

154 ax=None, 

155 sharex: bool = False, 

156 sharey: bool = False, 

157 figsize: tuple[int, int] | None = None, 

158 layout: tuple[int, int] | None = None, 

159 bins: int | Sequence[int] = 10, 

160 backend: str | None = None, 

161 legend: bool = False, 

162 **kwargs, 

163): 

164 """ 

165 Make a histogram of the DataFrame's columns. 

166 

167 A `histogram`_ is a representation of the distribution of data. 

168 This function calls :meth:`matplotlib.pyplot.hist`, on each series in 

169 the DataFrame, resulting in one histogram per column. 

170 

171 .. _histogram: https://en.wikipedia.org/wiki/Histogram 

172 

173 Parameters 

174 ---------- 

175 data : DataFrame 

176 The pandas object holding the data. 

177 column : str or sequence, optional 

178 If passed, will be used to limit data to a subset of columns. 

179 by : object, optional 

180 If passed, then used to form histograms for separate groups. 

181 grid : bool, default True 

182 Whether to show axis grid lines. 

183 xlabelsize : int, default None 

184 If specified changes the x-axis label size. 

185 xrot : float, default None 

186 Rotation of x axis labels. For example, a value of 90 displays the 

187 x labels rotated 90 degrees clockwise. 

188 ylabelsize : int, default None 

189 If specified changes the y-axis label size. 

190 yrot : float, default None 

191 Rotation of y axis labels. For example, a value of 90 displays the 

192 y labels rotated 90 degrees clockwise. 

193 ax : Matplotlib axes object, default None 

194 The axes to plot the histogram on. 

195 sharex : bool, default True if ax is None else False 

196 In case subplots=True, share x axis and set some x axis labels to 

197 invisible; defaults to True if ax is None otherwise False if an ax 

198 is passed in. 

199 Note that passing in both an ax and sharex=True will alter all x axis 

200 labels for all subplots in a figure. 

201 sharey : bool, default False 

202 In case subplots=True, share y axis and set some y axis labels to 

203 invisible. 

204 figsize : tuple, optional 

205 The size in inches of the figure to create. Uses the value in 

206 `matplotlib.rcParams` by default. 

207 layout : tuple, optional 

208 Tuple of (rows, columns) for the layout of the histograms. 

209 bins : int or sequence, default 10 

210 Number of histogram bins to be used. If an integer is given, bins + 1 

211 bin edges are calculated and returned. If bins is a sequence, gives 

212 bin edges, including left edge of first bin and right edge of last 

213 bin. In this case, bins is returned unmodified. 

214 

215 backend : str, default None 

216 Backend to use instead of the backend specified in the option 

217 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to 

218 specify the ``plotting.backend`` for the whole session, set 

219 ``pd.options.plotting.backend``. 

220 

221 legend : bool, default False 

222 Whether to show the legend. 

223 

224 **kwargs 

225 All other plotting keyword arguments to be passed to 

226 :meth:`matplotlib.pyplot.hist`. 

227 

228 Returns 

229 ------- 

230 matplotlib.AxesSubplot or numpy.ndarray of them 

231 

232 See Also 

233 -------- 

234 matplotlib.pyplot.hist : Plot a histogram using matplotlib. 

235 

236 Examples 

237 -------- 

238 This example draws a histogram based on the length and width of 

239 some animals, displayed in three bins 

240 

241 .. plot:: 

242 :context: close-figs 

243 

244 >>> data = {'length': [1.5, 0.5, 1.2, 0.9, 3], 

245 ... 'width': [0.7, 0.2, 0.15, 0.2, 1.1]} 

246 >>> index = ['pig', 'rabbit', 'duck', 'chicken', 'horse'] 

247 >>> df = pd.DataFrame(data, index=index) 

248 >>> hist = df.hist(bins=3) 

249 """ 

250 plot_backend = _get_plot_backend(backend) 

251 return plot_backend.hist_frame( 

252 data, 

253 column=column, 

254 by=by, 

255 grid=grid, 

256 xlabelsize=xlabelsize, 

257 xrot=xrot, 

258 ylabelsize=ylabelsize, 

259 yrot=yrot, 

260 ax=ax, 

261 sharex=sharex, 

262 sharey=sharey, 

263 figsize=figsize, 

264 layout=layout, 

265 legend=legend, 

266 bins=bins, 

267 **kwargs, 

268 ) 

269 

270 

271_boxplot_doc = """ 

272Make a box plot from DataFrame columns. 

273 

274Make a box-and-whisker plot from DataFrame columns, optionally grouped 

275by some other columns. A box plot is a method for graphically depicting 

276groups of numerical data through their quartiles. 

277The box extends from the Q1 to Q3 quartile values of the data, 

278with a line at the median (Q2). The whiskers extend from the edges 

279of box to show the range of the data. By default, they extend no more than 

280`1.5 * IQR (IQR = Q3 - Q1)` from the edges of the box, ending at the farthest 

281data point within that interval. Outliers are plotted as separate dots. 

282 

283For further details see 

284Wikipedia's entry for `boxplot <https://en.wikipedia.org/wiki/Box_plot>`_. 

285 

286Parameters 

287---------- 

288%(data)s\ 

289column : str or list of str, optional 

290 Column name or list of names, or vector. 

291 Can be any valid input to :meth:`pandas.DataFrame.groupby`. 

292by : str or array-like, optional 

293 Column in the DataFrame to :meth:`pandas.DataFrame.groupby`. 

294 One box-plot will be done per value of columns in `by`. 

295ax : object of class matplotlib.axes.Axes, optional 

296 The matplotlib axes to be used by boxplot. 

297fontsize : float or str 

298 Tick label font size in points or as a string (e.g., `large`). 

299rot : float, default 0 

300 The rotation angle of labels (in degrees) 

301 with respect to the screen coordinate system. 

302grid : bool, default True 

303 Setting this to True will show the grid. 

304figsize : A tuple (width, height) in inches 

305 The size of the figure to create in matplotlib. 

306layout : tuple (rows, columns), optional 

307 For example, (3, 5) will display the subplots 

308 using 3 rows and 5 columns, starting from the top-left. 

309return_type : {'axes', 'dict', 'both'} or None, default 'axes' 

310 The kind of object to return. The default is ``axes``. 

311 

312 * 'axes' returns the matplotlib axes the boxplot is drawn on. 

313 * 'dict' returns a dictionary whose values are the matplotlib 

314 Lines of the boxplot. 

315 * 'both' returns a namedtuple with the axes and dict. 

316 * when grouping with ``by``, a Series mapping columns to 

317 ``return_type`` is returned. 

318 

319 If ``return_type`` is `None`, a NumPy array 

320 of axes with the same shape as ``layout`` is returned. 

321%(backend)s\ 

322 

323**kwargs 

324 All other plotting keyword arguments to be passed to 

325 :func:`matplotlib.pyplot.boxplot`. 

326 

327Returns 

328------- 

329result 

330 See Notes. 

331 

332See Also 

333-------- 

334pandas.Series.plot.hist: Make a histogram. 

335matplotlib.pyplot.boxplot : Matplotlib equivalent plot. 

336 

337Notes 

338----- 

339The return type depends on the `return_type` parameter: 

340 

341* 'axes' : object of class matplotlib.axes.Axes 

342* 'dict' : dict of matplotlib.lines.Line2D objects 

343* 'both' : a namedtuple with structure (ax, lines) 

344 

345For data grouped with ``by``, return a Series of the above or a numpy 

346array: 

347 

348* :class:`~pandas.Series` 

349* :class:`~numpy.array` (for ``return_type = None``) 

350 

351Use ``return_type='dict'`` when you want to tweak the appearance 

352of the lines after plotting. In this case a dict containing the Lines 

353making up the boxes, caps, fliers, medians, and whiskers is returned. 

354 

355Examples 

356-------- 

357 

358Boxplots can be created for every column in the dataframe 

359by ``df.boxplot()`` or indicating the columns to be used: 

360 

361.. plot:: 

362 :context: close-figs 

363 

364 >>> np.random.seed(1234) 

365 >>> df = pd.DataFrame(np.random.randn(10, 4), 

366 ... columns=['Col1', 'Col2', 'Col3', 'Col4']) 

367 >>> boxplot = df.boxplot(column=['Col1', 'Col2', 'Col3']) # doctest: +SKIP 

368 

369Boxplots of variables distributions grouped by the values of a third 

370variable can be created using the option ``by``. For instance: 

371 

372.. plot:: 

373 :context: close-figs 

374 

375 >>> df = pd.DataFrame(np.random.randn(10, 2), 

376 ... columns=['Col1', 'Col2']) 

377 >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A', 

378 ... 'B', 'B', 'B', 'B', 'B']) 

379 >>> boxplot = df.boxplot(by='X') 

380 

381A list of strings (i.e. ``['X', 'Y']``) can be passed to boxplot 

382in order to group the data by combination of the variables in the x-axis: 

383 

384.. plot:: 

385 :context: close-figs 

386 

387 >>> df = pd.DataFrame(np.random.randn(10, 3), 

388 ... columns=['Col1', 'Col2', 'Col3']) 

389 >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A', 

390 ... 'B', 'B', 'B', 'B', 'B']) 

391 >>> df['Y'] = pd.Series(['A', 'B', 'A', 'B', 'A', 

392 ... 'B', 'A', 'B', 'A', 'B']) 

393 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by=['X', 'Y']) 

394 

395The layout of boxplot can be adjusted giving a tuple to ``layout``: 

396 

397.. plot:: 

398 :context: close-figs 

399 

400 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X', 

401 ... layout=(2, 1)) 

402 

403Additional formatting can be done to the boxplot, like suppressing the grid 

404(``grid=False``), rotating the labels in the x-axis (i.e. ``rot=45``) 

405or changing the fontsize (i.e. ``fontsize=15``): 

406 

407.. plot:: 

408 :context: close-figs 

409 

410 >>> boxplot = df.boxplot(grid=False, rot=45, fontsize=15) # doctest: +SKIP 

411 

412The parameter ``return_type`` can be used to select the type of element 

413returned by `boxplot`. When ``return_type='axes'`` is selected, 

414the matplotlib axes on which the boxplot is drawn are returned: 

415 

416 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], return_type='axes') 

417 >>> type(boxplot) 

418 <class 'matplotlib.axes._axes.Axes'> 

419 

420When grouping with ``by``, a Series mapping columns to ``return_type`` 

421is returned: 

422 

423 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X', 

424 ... return_type='axes') 

425 >>> type(boxplot) 

426 <class 'pandas.core.series.Series'> 

427 

428If ``return_type`` is `None`, a NumPy array of axes with the same shape 

429as ``layout`` is returned: 

430 

431 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X', 

432 ... return_type=None) 

433 >>> type(boxplot) 

434 <class 'numpy.ndarray'> 

435""" 

436 

437_backend_doc = """\ 

438backend : str, default None 

439 Backend to use instead of the backend specified in the option 

440 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to 

441 specify the ``plotting.backend`` for the whole session, set 

442 ``pd.options.plotting.backend``. 

443""" 

444 

445 

446_bar_or_line_doc = """ 

447 Parameters 

448 ---------- 

449 x : label or position, optional 

450 Allows plotting of one column versus another. If not specified, 

451 the index of the DataFrame is used. 

452 y : label or position, optional 

453 Allows plotting of one column versus another. If not specified, 

454 all numerical columns are used. 

455 color : str, array-like, or dict, optional 

456 The color for each of the DataFrame's columns. Possible values are: 

457 

458 - A single color string referred to by name, RGB or RGBA code, 

459 for instance 'red' or '#a98d19'. 

460 

461 - A sequence of color strings referred to by name, RGB or RGBA 

462 code, which will be used for each column recursively. For 

463 instance ['green','yellow'] each column's %(kind)s will be filled in 

464 green or yellow, alternatively. If there is only a single column to 

465 be plotted, then only the first color from the color list will be 

466 used. 

467 

468 - A dict of the form {column name : color}, so that each column will be 

469 colored accordingly. For example, if your columns are called `a` and 

470 `b`, then passing {'a': 'green', 'b': 'red'} will color %(kind)ss for 

471 column `a` in green and %(kind)ss for column `b` in red. 

472 

473 **kwargs 

474 Additional keyword arguments are documented in 

475 :meth:`DataFrame.plot`. 

476 

477 Returns 

478 ------- 

479 matplotlib.axes.Axes or np.ndarray of them 

480 An ndarray is returned with one :class:`matplotlib.axes.Axes` 

481 per column when ``subplots=True``. 

482""" 

483 

484 

485@Substitution(data="data : DataFrame\n The data to visualize.\n", backend="") 

486@Appender(_boxplot_doc) 

487def boxplot( 

488 data: DataFrame, 

489 column: str | list[str] | None = None, 

490 by: str | list[str] | None = None, 

491 ax: Axes | None = None, 

492 fontsize: float | str | None = None, 

493 rot: int = 0, 

494 grid: bool = True, 

495 figsize: tuple[float, float] | None = None, 

496 layout: tuple[int, int] | None = None, 

497 return_type: str | None = None, 

498 **kwargs, 

499): 

500 plot_backend = _get_plot_backend("matplotlib") 

501 return plot_backend.boxplot( 

502 data, 

503 column=column, 

504 by=by, 

505 ax=ax, 

506 fontsize=fontsize, 

507 rot=rot, 

508 grid=grid, 

509 figsize=figsize, 

510 layout=layout, 

511 return_type=return_type, 

512 **kwargs, 

513 ) 

514 

515 

516@Substitution(data="", backend=_backend_doc) 

517@Appender(_boxplot_doc) 

518def boxplot_frame( 

519 self: DataFrame, 

520 column=None, 

521 by=None, 

522 ax=None, 

523 fontsize: int | None = None, 

524 rot: int = 0, 

525 grid: bool = True, 

526 figsize: tuple[float, float] | None = None, 

527 layout=None, 

528 return_type=None, 

529 backend=None, 

530 **kwargs, 

531): 

532 plot_backend = _get_plot_backend(backend) 

533 return plot_backend.boxplot_frame( 

534 self, 

535 column=column, 

536 by=by, 

537 ax=ax, 

538 fontsize=fontsize, 

539 rot=rot, 

540 grid=grid, 

541 figsize=figsize, 

542 layout=layout, 

543 return_type=return_type, 

544 **kwargs, 

545 ) 

546 

547 

548def boxplot_frame_groupby( 

549 grouped: DataFrameGroupBy, 

550 subplots: bool = True, 

551 column=None, 

552 fontsize: int | None = None, 

553 rot: int = 0, 

554 grid: bool = True, 

555 ax=None, 

556 figsize: tuple[float, float] | None = None, 

557 layout=None, 

558 sharex: bool = False, 

559 sharey: bool = True, 

560 backend=None, 

561 **kwargs, 

562): 

563 """ 

564 Make box plots from DataFrameGroupBy data. 

565 

566 Parameters 

567 ---------- 

568 grouped : Grouped DataFrame 

569 subplots : bool 

570 * ``False`` - no subplots will be used 

571 * ``True`` - create a subplot for each group. 

572 

573 column : column name or list of names, or vector 

574 Can be any valid input to groupby. 

575 fontsize : float or str 

576 rot : label rotation angle 

577 grid : Setting this to True will show the grid 

578 ax : Matplotlib axis object, default None 

579 figsize : A tuple (width, height) in inches 

580 layout : tuple (optional) 

581 The layout of the plot: (rows, columns). 

582 sharex : bool, default False 

583 Whether x-axes will be shared among subplots. 

584 sharey : bool, default True 

585 Whether y-axes will be shared among subplots. 

586 backend : str, default None 

587 Backend to use instead of the backend specified in the option 

588 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to 

589 specify the ``plotting.backend`` for the whole session, set 

590 ``pd.options.plotting.backend``. 

591 **kwargs 

592 All other plotting keyword arguments to be passed to 

593 matplotlib's boxplot function. 

594 

595 Returns 

596 ------- 

597 dict of key/value = group key/DataFrame.boxplot return value 

598 or DataFrame.boxplot return value in case subplots=figures=False 

599 

600 Examples 

601 -------- 

602 You can create boxplots for grouped data and show them as separate subplots: 

603 

604 .. plot:: 

605 :context: close-figs 

606 

607 >>> import itertools 

608 >>> tuples = [t for t in itertools.product(range(1000), range(4))] 

609 >>> index = pd.MultiIndex.from_tuples(tuples, names=['lvl0', 'lvl1']) 

610 >>> data = np.random.randn(len(index), 4) 

611 >>> df = pd.DataFrame(data, columns=list('ABCD'), index=index) 

612 >>> grouped = df.groupby(level='lvl1') 

613 >>> grouped.boxplot(rot=45, fontsize=12, figsize=(8, 10)) # doctest: +SKIP 

614 

615 The ``subplots=False`` option shows the boxplots in a single figure. 

616 

617 .. plot:: 

618 :context: close-figs 

619 

620 >>> grouped.boxplot(subplots=False, rot=45, fontsize=12) # doctest: +SKIP 

621 """ 

622 plot_backend = _get_plot_backend(backend) 

623 return plot_backend.boxplot_frame_groupby( 

624 grouped, 

625 subplots=subplots, 

626 column=column, 

627 fontsize=fontsize, 

628 rot=rot, 

629 grid=grid, 

630 ax=ax, 

631 figsize=figsize, 

632 layout=layout, 

633 sharex=sharex, 

634 sharey=sharey, 

635 **kwargs, 

636 ) 

637 

638 

639class PlotAccessor(PandasObject): 

640 """ 

641 Make plots of Series or DataFrame. 

642 

643 Uses the backend specified by the 

644 option ``plotting.backend``. By default, matplotlib is used. 

645 

646 Parameters 

647 ---------- 

648 data : Series or DataFrame 

649 The object for which the method is called. 

650 x : label or position, default None 

651 Only used if data is a DataFrame. 

652 y : label, position or list of label, positions, default None 

653 Allows plotting of one column versus another. Only used if data is a 

654 DataFrame. 

655 kind : str 

656 The kind of plot to produce: 

657 

658 - 'line' : line plot (default) 

659 - 'bar' : vertical bar plot 

660 - 'barh' : horizontal bar plot 

661 - 'hist' : histogram 

662 - 'box' : boxplot 

663 - 'kde' : Kernel Density Estimation plot 

664 - 'density' : same as 'kde' 

665 - 'area' : area plot 

666 - 'pie' : pie plot 

667 - 'scatter' : scatter plot (DataFrame only) 

668 - 'hexbin' : hexbin plot (DataFrame only) 

669 ax : matplotlib axes object, default None 

670 An axes of the current figure. 

671 subplots : bool or sequence of iterables, default False 

672 Whether to group columns into subplots: 

673 

674 - ``False`` : No subplots will be used 

675 - ``True`` : Make separate subplots for each column. 

676 - sequence of iterables of column labels: Create a subplot for each 

677 group of columns. For example `[('a', 'c'), ('b', 'd')]` will 

678 create 2 subplots: one with columns 'a' and 'c', and one 

679 with columns 'b' and 'd'. Remaining columns that aren't specified 

680 will be plotted in additional subplots (one per column). 

681 

682 .. versionadded:: 1.5.0 

683 

684 sharex : bool, default True if ax is None else False 

685 In case ``subplots=True``, share x axis and set some x axis labels 

686 to invisible; defaults to True if ax is None otherwise False if 

687 an ax is passed in; Be aware, that passing in both an ax and 

688 ``sharex=True`` will alter all x axis labels for all axis in a figure. 

689 sharey : bool, default False 

690 In case ``subplots=True``, share y axis and set some y axis labels to invisible. 

691 layout : tuple, optional 

692 (rows, columns) for the layout of subplots. 

693 figsize : a tuple (width, height) in inches 

694 Size of a figure object. 

695 use_index : bool, default True 

696 Use index as ticks for x axis. 

697 title : str or list 

698 Title to use for the plot. If a string is passed, print the string 

699 at the top of the figure. If a list is passed and `subplots` is 

700 True, print each item in the list above the corresponding subplot. 

701 grid : bool, default None (matlab style default) 

702 Axis grid lines. 

703 legend : bool or {'reverse'} 

704 Place legend on axis subplots. 

705 style : list or dict 

706 The matplotlib line style per column. 

707 logx : bool or 'sym', default False 

708 Use log scaling or symlog scaling on x axis. 

709 

710 logy : bool or 'sym' default False 

711 Use log scaling or symlog scaling on y axis. 

712 

713 loglog : bool or 'sym', default False 

714 Use log scaling or symlog scaling on both x and y axes. 

715 

716 xticks : sequence 

717 Values to use for the xticks. 

718 yticks : sequence 

719 Values to use for the yticks. 

720 xlim : 2-tuple/list 

721 Set the x limits of the current axes. 

722 ylim : 2-tuple/list 

723 Set the y limits of the current axes. 

724 xlabel : label, optional 

725 Name to use for the xlabel on x-axis. Default uses index name as xlabel, or the 

726 x-column name for planar plots. 

727 

728 .. versionchanged:: 2.0.0 

729 

730 Now applicable to histograms. 

731 

732 ylabel : label, optional 

733 Name to use for the ylabel on y-axis. Default will show no ylabel, or the 

734 y-column name for planar plots. 

735 

736 .. versionchanged:: 2.0.0 

737 

738 Now applicable to histograms. 

739 

740 rot : float, default None 

741 Rotation for ticks (xticks for vertical, yticks for horizontal 

742 plots). 

743 fontsize : float, default None 

744 Font size for xticks and yticks. 

745 colormap : str or matplotlib colormap object, default None 

746 Colormap to select colors from. If string, load colormap with that 

747 name from matplotlib. 

748 colorbar : bool, optional 

749 If True, plot colorbar (only relevant for 'scatter' and 'hexbin' 

750 plots). 

751 position : float 

752 Specify relative alignments for bar plot layout. 

753 From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 

754 (center). 

755 table : bool, Series or DataFrame, default False 

756 If True, draw a table using the data in the DataFrame and the data 

757 will be transposed to meet matplotlib's default layout. 

758 If a Series or DataFrame is passed, use passed data to draw a 

759 table. 

760 yerr : DataFrame, Series, array-like, dict and str 

761 See :ref:`Plotting with Error Bars <visualization.errorbars>` for 

762 detail. 

763 xerr : DataFrame, Series, array-like, dict and str 

764 Equivalent to yerr. 

765 stacked : bool, default False in line and bar plots, and True in area plot 

766 If True, create stacked plot. 

767 secondary_y : bool or sequence, default False 

768 Whether to plot on the secondary y-axis if a list/tuple, which 

769 columns to plot on secondary y-axis. 

770 mark_right : bool, default True 

771 When using a secondary_y axis, automatically mark the column 

772 labels with "(right)" in the legend. 

773 include_bool : bool, default is False 

774 If True, boolean values can be plotted. 

775 backend : str, default None 

776 Backend to use instead of the backend specified in the option 

777 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to 

778 specify the ``plotting.backend`` for the whole session, set 

779 ``pd.options.plotting.backend``. 

780 **kwargs 

781 Options to pass to matplotlib plotting method. 

782 

783 Returns 

784 ------- 

785 :class:`matplotlib.axes.Axes` or numpy.ndarray of them 

786 If the backend is not the default matplotlib one, the return value 

787 will be the object returned by the backend. 

788 

789 Notes 

790 ----- 

791 - See matplotlib documentation online for more on this subject 

792 - If `kind` = 'bar' or 'barh', you can specify relative alignments 

793 for bar plot layout by `position` keyword. 

794 From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 

795 (center) 

796 

797 Examples 

798 -------- 

799 For Series: 

800 

801 .. plot:: 

802 :context: close-figs 

803 

804 >>> ser = pd.Series([1, 2, 3, 3]) 

805 >>> plot = ser.plot(kind='hist', title="My plot") 

806 

807 For DataFrame: 

808 

809 .. plot:: 

810 :context: close-figs 

811 

812 >>> df = pd.DataFrame({'length': [1.5, 0.5, 1.2, 0.9, 3], 

813 ... 'width': [0.7, 0.2, 0.15, 0.2, 1.1]}, 

814 ... index=['pig', 'rabbit', 'duck', 'chicken', 'horse']) 

815 >>> plot = df.plot(title="DataFrame Plot") 

816 

817 For SeriesGroupBy: 

818 

819 .. plot:: 

820 :context: close-figs 

821 

822 >>> lst = [-1, -2, -3, 1, 2, 3] 

823 >>> ser = pd.Series([1, 2, 2, 4, 6, 6], index=lst) 

824 >>> plot = ser.groupby(lambda x: x > 0).plot(title="SeriesGroupBy Plot") 

825 

826 For DataFrameGroupBy: 

827 

828 .. plot:: 

829 :context: close-figs 

830 

831 >>> df = pd.DataFrame({"col1" : [1, 2, 3, 4], 

832 ... "col2" : ["A", "B", "A", "B"]}) 

833 >>> plot = df.groupby("col2").plot(kind="bar", title="DataFrameGroupBy Plot") 

834 """ 

835 

836 _common_kinds = ("line", "bar", "barh", "kde", "density", "area", "hist", "box") 

837 _series_kinds = ("pie",) 

838 _dataframe_kinds = ("scatter", "hexbin") 

839 _kind_aliases = {"density": "kde"} 

840 _all_kinds = _common_kinds + _series_kinds + _dataframe_kinds 

841 

842 def __init__(self, data: Series | DataFrame) -> None: 

843 self._parent = data 

844 

845 @staticmethod 

846 def _get_call_args(backend_name: str, data: Series | DataFrame, args, kwargs): 

847 """ 

848 This function makes calls to this accessor `__call__` method compatible 

849 with the previous `SeriesPlotMethods.__call__` and 

850 `DataFramePlotMethods.__call__`. Those had slightly different 

851 signatures, since `DataFramePlotMethods` accepted `x` and `y` 

852 parameters. 

853 """ 

854 if isinstance(data, ABCSeries): 

855 arg_def = [ 

856 ("kind", "line"), 

857 ("ax", None), 

858 ("figsize", None), 

859 ("use_index", True), 

860 ("title", None), 

861 ("grid", None), 

862 ("legend", False), 

863 ("style", None), 

864 ("logx", False), 

865 ("logy", False), 

866 ("loglog", False), 

867 ("xticks", None), 

868 ("yticks", None), 

869 ("xlim", None), 

870 ("ylim", None), 

871 ("rot", None), 

872 ("fontsize", None), 

873 ("colormap", None), 

874 ("table", False), 

875 ("yerr", None), 

876 ("xerr", None), 

877 ("label", None), 

878 ("secondary_y", False), 

879 ("xlabel", None), 

880 ("ylabel", None), 

881 ] 

882 elif isinstance(data, ABCDataFrame): 

883 arg_def = [ 

884 ("x", None), 

885 ("y", None), 

886 ("kind", "line"), 

887 ("ax", None), 

888 ("subplots", False), 

889 ("sharex", None), 

890 ("sharey", False), 

891 ("layout", None), 

892 ("figsize", None), 

893 ("use_index", True), 

894 ("title", None), 

895 ("grid", None), 

896 ("legend", True), 

897 ("style", None), 

898 ("logx", False), 

899 ("logy", False), 

900 ("loglog", False), 

901 ("xticks", None), 

902 ("yticks", None), 

903 ("xlim", None), 

904 ("ylim", None), 

905 ("rot", None), 

906 ("fontsize", None), 

907 ("colormap", None), 

908 ("table", False), 

909 ("yerr", None), 

910 ("xerr", None), 

911 ("secondary_y", False), 

912 ("xlabel", None), 

913 ("ylabel", None), 

914 ] 

915 else: 

916 raise TypeError( 

917 f"Called plot accessor for type {type(data).__name__}, " 

918 "expected Series or DataFrame" 

919 ) 

920 

921 if args and isinstance(data, ABCSeries): 

922 positional_args = str(args)[1:-1] 

923 keyword_args = ", ".join( 

924 [f"{name}={repr(value)}" for (name, _), value in zip(arg_def, args)] 

925 ) 

926 msg = ( 

927 "`Series.plot()` should not be called with positional " 

928 "arguments, only keyword arguments. The order of " 

929 "positional arguments will change in the future. " 

930 f"Use `Series.plot({keyword_args})` instead of " 

931 f"`Series.plot({positional_args})`." 

932 ) 

933 raise TypeError(msg) 

934 

935 pos_args = {name: value for (name, _), value in zip(arg_def, args)} 

936 if backend_name == "pandas.plotting._matplotlib": 

937 kwargs = dict(arg_def, **pos_args, **kwargs) 

938 else: 

939 kwargs = dict(pos_args, **kwargs) 

940 

941 x = kwargs.pop("x", None) 

942 y = kwargs.pop("y", None) 

943 kind = kwargs.pop("kind", "line") 

944 return x, y, kind, kwargs 

945 

946 def __call__(self, *args, **kwargs): 

947 plot_backend = _get_plot_backend(kwargs.pop("backend", None)) 

948 

949 x, y, kind, kwargs = self._get_call_args( 

950 plot_backend.__name__, self._parent, args, kwargs 

951 ) 

952 

953 kind = self._kind_aliases.get(kind, kind) 

954 

955 # when using another backend, get out of the way 

956 if plot_backend.__name__ != "pandas.plotting._matplotlib": 

957 return plot_backend.plot(self._parent, x=x, y=y, kind=kind, **kwargs) 

958 

959 if kind not in self._all_kinds: 

960 raise ValueError( 

961 f"{kind} is not a valid plot kind " 

962 f"Valid plot kinds: {self._all_kinds}" 

963 ) 

964 

965 # The original data structured can be transformed before passed to the 

966 # backend. For example, for DataFrame is common to set the index as the 

967 # `x` parameter, and return a Series with the parameter `y` as values. 

968 data = self._parent.copy() 

969 

970 if isinstance(data, ABCSeries): 

971 kwargs["reuse_plot"] = True 

972 

973 if kind in self._dataframe_kinds: 

974 if isinstance(data, ABCDataFrame): 

975 return plot_backend.plot(data, x=x, y=y, kind=kind, **kwargs) 

976 else: 

977 raise ValueError(f"plot kind {kind} can only be used for data frames") 

978 elif kind in self._series_kinds: 

979 if isinstance(data, ABCDataFrame): 

980 if y is None and kwargs.get("subplots") is False: 

981 raise ValueError( 

982 f"{kind} requires either y column or 'subplots=True'" 

983 ) 

984 if y is not None: 

985 if is_integer(y) and not data.columns._holds_integer(): 

986 y = data.columns[y] 

987 # converted to series actually. copy to not modify 

988 data = data[y].copy() 

989 data.index.name = y 

990 elif isinstance(data, ABCDataFrame): 

991 data_cols = data.columns 

992 if x is not None: 

993 if is_integer(x) and not data.columns._holds_integer(): 

994 x = data_cols[x] 

995 elif not isinstance(data[x], ABCSeries): 

996 raise ValueError("x must be a label or position") 

997 data = data.set_index(x) 

998 if y is not None: 

999 # check if we have y as int or list of ints 

1000 int_ylist = is_list_like(y) and all(is_integer(c) for c in y) 

1001 int_y_arg = is_integer(y) or int_ylist 

1002 if int_y_arg and not data.columns._holds_integer(): 

1003 y = data_cols[y] 

1004 

1005 label_kw = kwargs["label"] if "label" in kwargs else False 

1006 for kw in ["xerr", "yerr"]: 

1007 if kw in kwargs and ( 

1008 isinstance(kwargs[kw], str) or is_integer(kwargs[kw]) 

1009 ): 

1010 try: 

1011 kwargs[kw] = data[kwargs[kw]] 

1012 except (IndexError, KeyError, TypeError): 

1013 pass 

1014 

1015 # don't overwrite 

1016 data = data[y].copy() 

1017 

1018 if isinstance(data, ABCSeries): 

1019 label_name = label_kw or y 

1020 data.name = label_name 

1021 else: 

1022 match = is_list_like(label_kw) and len(label_kw) == len(y) 

1023 if label_kw and not match: 

1024 raise ValueError( 

1025 "label should be list-like and same length as y" 

1026 ) 

1027 label_name = label_kw or data.columns 

1028 data.columns = label_name 

1029 

1030 return plot_backend.plot(data, kind=kind, **kwargs) 

1031 

1032 __call__.__doc__ = __doc__ 

1033 

1034 @Appender( 

1035 """ 

1036 See Also 

1037 -------- 

1038 matplotlib.pyplot.plot : Plot y versus x as lines and/or markers. 

1039 

1040 Examples 

1041 -------- 

1042 

1043 .. plot:: 

1044 :context: close-figs 

1045 

1046 >>> s = pd.Series([1, 3, 2]) 

1047 >>> s.plot.line() # doctest: +SKIP 

1048 

1049 .. plot:: 

1050 :context: close-figs 

1051 

1052 The following example shows the populations for some animals 

1053 over the years. 

1054 

1055 >>> df = pd.DataFrame({ 

1056 ... 'pig': [20, 18, 489, 675, 1776], 

1057 ... 'horse': [4, 25, 281, 600, 1900] 

1058 ... }, index=[1990, 1997, 2003, 2009, 2014]) 

1059 >>> lines = df.plot.line() 

1060 

1061 .. plot:: 

1062 :context: close-figs 

1063 

1064 An example with subplots, so an array of axes is returned. 

1065 

1066 >>> axes = df.plot.line(subplots=True) 

1067 >>> type(axes) 

1068 <class 'numpy.ndarray'> 

1069 

1070 .. plot:: 

1071 :context: close-figs 

1072 

1073 Let's repeat the same example, but specifying colors for 

1074 each column (in this case, for each animal). 

1075 

1076 >>> axes = df.plot.line( 

1077 ... subplots=True, color={"pig": "pink", "horse": "#742802"} 

1078 ... ) 

1079 

1080 .. plot:: 

1081 :context: close-figs 

1082 

1083 The following example shows the relationship between both 

1084 populations. 

1085 

1086 >>> lines = df.plot.line(x='pig', y='horse') 

1087 """ 

1088 ) 

1089 @Substitution(kind="line") 

1090 @Appender(_bar_or_line_doc) 

1091 def line( 

1092 self, x: Hashable | None = None, y: Hashable | None = None, **kwargs 

1093 ) -> PlotAccessor: 

1094 """ 

1095 Plot Series or DataFrame as lines. 

1096 

1097 This function is useful to plot lines using DataFrame's values 

1098 as coordinates. 

1099 """ 

1100 return self(kind="line", x=x, y=y, **kwargs) 

1101 

1102 @Appender( 

1103 """ 

1104 See Also 

1105 -------- 

1106 DataFrame.plot.barh : Horizontal bar plot. 

1107 DataFrame.plot : Make plots of a DataFrame. 

1108 matplotlib.pyplot.bar : Make a bar plot with matplotlib. 

1109 

1110 Examples 

1111 -------- 

1112 Basic plot. 

1113 

1114 .. plot:: 

1115 :context: close-figs 

1116 

1117 >>> df = pd.DataFrame({'lab':['A', 'B', 'C'], 'val':[10, 30, 20]}) 

1118 >>> ax = df.plot.bar(x='lab', y='val', rot=0) 

1119 

1120 Plot a whole dataframe to a bar plot. Each column is assigned a 

1121 distinct color, and each row is nested in a group along the 

1122 horizontal axis. 

1123 

1124 .. plot:: 

1125 :context: close-figs 

1126 

1127 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88] 

1128 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28] 

1129 >>> index = ['snail', 'pig', 'elephant', 

1130 ... 'rabbit', 'giraffe', 'coyote', 'horse'] 

1131 >>> df = pd.DataFrame({'speed': speed, 

1132 ... 'lifespan': lifespan}, index=index) 

1133 >>> ax = df.plot.bar(rot=0) 

1134 

1135 Plot stacked bar charts for the DataFrame 

1136 

1137 .. plot:: 

1138 :context: close-figs 

1139 

1140 >>> ax = df.plot.bar(stacked=True) 

1141 

1142 Instead of nesting, the figure can be split by column with 

1143 ``subplots=True``. In this case, a :class:`numpy.ndarray` of 

1144 :class:`matplotlib.axes.Axes` are returned. 

1145 

1146 .. plot:: 

1147 :context: close-figs 

1148 

1149 >>> axes = df.plot.bar(rot=0, subplots=True) 

1150 >>> axes[1].legend(loc=2) # doctest: +SKIP 

1151 

1152 If you don't like the default colours, you can specify how you'd 

1153 like each column to be colored. 

1154 

1155 .. plot:: 

1156 :context: close-figs 

1157 

1158 >>> axes = df.plot.bar( 

1159 ... rot=0, subplots=True, color={"speed": "red", "lifespan": "green"} 

1160 ... ) 

1161 >>> axes[1].legend(loc=2) # doctest: +SKIP 

1162 

1163 Plot a single column. 

1164 

1165 .. plot:: 

1166 :context: close-figs 

1167 

1168 >>> ax = df.plot.bar(y='speed', rot=0) 

1169 

1170 Plot only selected categories for the DataFrame. 

1171 

1172 .. plot:: 

1173 :context: close-figs 

1174 

1175 >>> ax = df.plot.bar(x='lifespan', rot=0) 

1176 """ 

1177 ) 

1178 @Substitution(kind="bar") 

1179 @Appender(_bar_or_line_doc) 

1180 def bar( # pylint: disable=disallowed-name 

1181 self, x: Hashable | None = None, y: Hashable | None = None, **kwargs 

1182 ) -> PlotAccessor: 

1183 """ 

1184 Vertical bar plot. 

1185 

1186 A bar plot is a plot that presents categorical data with 

1187 rectangular bars with lengths proportional to the values that they 

1188 represent. A bar plot shows comparisons among discrete categories. One 

1189 axis of the plot shows the specific categories being compared, and the 

1190 other axis represents a measured value. 

1191 """ 

1192 return self(kind="bar", x=x, y=y, **kwargs) 

1193 

1194 @Appender( 

1195 """ 

1196 See Also 

1197 -------- 

1198 DataFrame.plot.bar: Vertical bar plot. 

1199 DataFrame.plot : Make plots of DataFrame using matplotlib. 

1200 matplotlib.axes.Axes.bar : Plot a vertical bar plot using matplotlib. 

1201 

1202 Examples 

1203 -------- 

1204 Basic example 

1205 

1206 .. plot:: 

1207 :context: close-figs 

1208 

1209 >>> df = pd.DataFrame({'lab': ['A', 'B', 'C'], 'val': [10, 30, 20]}) 

1210 >>> ax = df.plot.barh(x='lab', y='val') 

1211 

1212 Plot a whole DataFrame to a horizontal bar plot 

1213 

1214 .. plot:: 

1215 :context: close-figs 

1216 

1217 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88] 

1218 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28] 

1219 >>> index = ['snail', 'pig', 'elephant', 

1220 ... 'rabbit', 'giraffe', 'coyote', 'horse'] 

1221 >>> df = pd.DataFrame({'speed': speed, 

1222 ... 'lifespan': lifespan}, index=index) 

1223 >>> ax = df.plot.barh() 

1224 

1225 Plot stacked barh charts for the DataFrame 

1226 

1227 .. plot:: 

1228 :context: close-figs 

1229 

1230 >>> ax = df.plot.barh(stacked=True) 

1231 

1232 We can specify colors for each column 

1233 

1234 .. plot:: 

1235 :context: close-figs 

1236 

1237 >>> ax = df.plot.barh(color={"speed": "red", "lifespan": "green"}) 

1238 

1239 Plot a column of the DataFrame to a horizontal bar plot 

1240 

1241 .. plot:: 

1242 :context: close-figs 

1243 

1244 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88] 

1245 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28] 

1246 >>> index = ['snail', 'pig', 'elephant', 

1247 ... 'rabbit', 'giraffe', 'coyote', 'horse'] 

1248 >>> df = pd.DataFrame({'speed': speed, 

1249 ... 'lifespan': lifespan}, index=index) 

1250 >>> ax = df.plot.barh(y='speed') 

1251 

1252 Plot DataFrame versus the desired column 

1253 

1254 .. plot:: 

1255 :context: close-figs 

1256 

1257 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88] 

1258 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28] 

1259 >>> index = ['snail', 'pig', 'elephant', 

1260 ... 'rabbit', 'giraffe', 'coyote', 'horse'] 

1261 >>> df = pd.DataFrame({'speed': speed, 

1262 ... 'lifespan': lifespan}, index=index) 

1263 >>> ax = df.plot.barh(x='lifespan') 

1264 """ 

1265 ) 

1266 @Substitution(kind="bar") 

1267 @Appender(_bar_or_line_doc) 

1268 def barh( 

1269 self, x: Hashable | None = None, y: Hashable | None = None, **kwargs 

1270 ) -> PlotAccessor: 

1271 """ 

1272 Make a horizontal bar plot. 

1273 

1274 A horizontal bar plot is a plot that presents quantitative data with 

1275 rectangular bars with lengths proportional to the values that they 

1276 represent. A bar plot shows comparisons among discrete categories. One 

1277 axis of the plot shows the specific categories being compared, and the 

1278 other axis represents a measured value. 

1279 """ 

1280 return self(kind="barh", x=x, y=y, **kwargs) 

1281 

1282 def box(self, by: IndexLabel | None = None, **kwargs) -> PlotAccessor: 

1283 r""" 

1284 Make a box plot of the DataFrame columns. 

1285 

1286 A box plot is a method for graphically depicting groups of numerical 

1287 data through their quartiles. 

1288 The box extends from the Q1 to Q3 quartile values of the data, 

1289 with a line at the median (Q2). The whiskers extend from the edges 

1290 of box to show the range of the data. The position of the whiskers 

1291 is set by default to 1.5*IQR (IQR = Q3 - Q1) from the edges of the 

1292 box. Outlier points are those past the end of the whiskers. 

1293 

1294 For further details see Wikipedia's 

1295 entry for `boxplot <https://en.wikipedia.org/wiki/Box_plot>`__. 

1296 

1297 A consideration when using this chart is that the box and the whiskers 

1298 can overlap, which is very common when plotting small sets of data. 

1299 

1300 Parameters 

1301 ---------- 

1302 by : str or sequence 

1303 Column in the DataFrame to group by. 

1304 

1305 .. versionchanged:: 1.4.0 

1306 

1307 Previously, `by` is silently ignore and makes no groupings 

1308 

1309 **kwargs 

1310 Additional keywords are documented in 

1311 :meth:`DataFrame.plot`. 

1312 

1313 Returns 

1314 ------- 

1315 :class:`matplotlib.axes.Axes` or numpy.ndarray of them 

1316 

1317 See Also 

1318 -------- 

1319 DataFrame.boxplot: Another method to draw a box plot. 

1320 Series.plot.box: Draw a box plot from a Series object. 

1321 matplotlib.pyplot.boxplot: Draw a box plot in matplotlib. 

1322 

1323 Examples 

1324 -------- 

1325 Draw a box plot from a DataFrame with four columns of randomly 

1326 generated data. 

1327 

1328 .. plot:: 

1329 :context: close-figs 

1330 

1331 >>> data = np.random.randn(25, 4) 

1332 >>> df = pd.DataFrame(data, columns=list('ABCD')) 

1333 >>> ax = df.plot.box() 

1334 

1335 You can also generate groupings if you specify the `by` parameter (which 

1336 can take a column name, or a list or tuple of column names): 

1337 

1338 .. versionchanged:: 1.4.0 

1339 

1340 .. plot:: 

1341 :context: close-figs 

1342 

1343 >>> age_list = [8, 10, 12, 14, 72, 74, 76, 78, 20, 25, 30, 35, 60, 85] 

1344 >>> df = pd.DataFrame({"gender": list("MMMMMMMMFFFFFF"), "age": age_list}) 

1345 >>> ax = df.plot.box(column="age", by="gender", figsize=(10, 8)) 

1346 """ 

1347 return self(kind="box", by=by, **kwargs) 

1348 

1349 def hist( 

1350 self, by: IndexLabel | None = None, bins: int = 10, **kwargs 

1351 ) -> PlotAccessor: 

1352 """ 

1353 Draw one histogram of the DataFrame's columns. 

1354 

1355 A histogram is a representation of the distribution of data. 

1356 This function groups the values of all given Series in the DataFrame 

1357 into bins and draws all bins in one :class:`matplotlib.axes.Axes`. 

1358 This is useful when the DataFrame's Series are in a similar scale. 

1359 

1360 Parameters 

1361 ---------- 

1362 by : str or sequence, optional 

1363 Column in the DataFrame to group by. 

1364 

1365 .. versionchanged:: 1.4.0 

1366 

1367 Previously, `by` is silently ignore and makes no groupings 

1368 

1369 bins : int, default 10 

1370 Number of histogram bins to be used. 

1371 **kwargs 

1372 Additional keyword arguments are documented in 

1373 :meth:`DataFrame.plot`. 

1374 

1375 Returns 

1376 ------- 

1377 class:`matplotlib.AxesSubplot` 

1378 Return a histogram plot. 

1379 

1380 See Also 

1381 -------- 

1382 DataFrame.hist : Draw histograms per DataFrame's Series. 

1383 Series.hist : Draw a histogram with Series' data. 

1384 

1385 Examples 

1386 -------- 

1387 When we roll a die 6000 times, we expect to get each value around 1000 

1388 times. But when we roll two dice and sum the result, the distribution 

1389 is going to be quite different. A histogram illustrates those 

1390 distributions. 

1391 

1392 .. plot:: 

1393 :context: close-figs 

1394 

1395 >>> df = pd.DataFrame(np.random.randint(1, 7, 6000), columns=['one']) 

1396 >>> df['two'] = df['one'] + np.random.randint(1, 7, 6000) 

1397 >>> ax = df.plot.hist(bins=12, alpha=0.5) 

1398 

1399 A grouped histogram can be generated by providing the parameter `by` (which 

1400 can be a column name, or a list of column names): 

1401 

1402 .. plot:: 

1403 :context: close-figs 

1404 

1405 >>> age_list = [8, 10, 12, 14, 72, 74, 76, 78, 20, 25, 30, 35, 60, 85] 

1406 >>> df = pd.DataFrame({"gender": list("MMMMMMMMFFFFFF"), "age": age_list}) 

1407 >>> ax = df.plot.hist(column=["age"], by="gender", figsize=(10, 8)) 

1408 """ 

1409 return self(kind="hist", by=by, bins=bins, **kwargs) 

1410 

1411 def kde( 

1412 self, 

1413 bw_method: Literal["scott", "silverman"] | float | Callable | None = None, 

1414 ind: np.ndarray | int | None = None, 

1415 **kwargs, 

1416 ) -> PlotAccessor: 

1417 """ 

1418 Generate Kernel Density Estimate plot using Gaussian kernels. 

1419 

1420 In statistics, `kernel density estimation`_ (KDE) is a non-parametric 

1421 way to estimate the probability density function (PDF) of a random 

1422 variable. This function uses Gaussian kernels and includes automatic 

1423 bandwidth determination. 

1424 

1425 .. _kernel density estimation: 

1426 https://en.wikipedia.org/wiki/Kernel_density_estimation 

1427 

1428 Parameters 

1429 ---------- 

1430 bw_method : str, scalar or callable, optional 

1431 The method used to calculate the estimator bandwidth. This can be 

1432 'scott', 'silverman', a scalar constant or a callable. 

1433 If None (default), 'scott' is used. 

1434 See :class:`scipy.stats.gaussian_kde` for more information. 

1435 ind : NumPy array or int, optional 

1436 Evaluation points for the estimated PDF. If None (default), 

1437 1000 equally spaced points are used. If `ind` is a NumPy array, the 

1438 KDE is evaluated at the points passed. If `ind` is an integer, 

1439 `ind` number of equally spaced points are used. 

1440 **kwargs 

1441 Additional keyword arguments are documented in 

1442 :meth:`DataFrame.plot`. 

1443 

1444 Returns 

1445 ------- 

1446 matplotlib.axes.Axes or numpy.ndarray of them 

1447 

1448 See Also 

1449 -------- 

1450 scipy.stats.gaussian_kde : Representation of a kernel-density 

1451 estimate using Gaussian kernels. This is the function used 

1452 internally to estimate the PDF. 

1453 

1454 Examples 

1455 -------- 

1456 Given a Series of points randomly sampled from an unknown 

1457 distribution, estimate its PDF using KDE with automatic 

1458 bandwidth determination and plot the results, evaluating them at 

1459 1000 equally spaced points (default): 

1460 

1461 .. plot:: 

1462 :context: close-figs 

1463 

1464 >>> s = pd.Series([1, 2, 2.5, 3, 3.5, 4, 5]) 

1465 >>> ax = s.plot.kde() 

1466 

1467 A scalar bandwidth can be specified. Using a small bandwidth value can 

1468 lead to over-fitting, while using a large bandwidth value may result 

1469 in under-fitting: 

1470 

1471 .. plot:: 

1472 :context: close-figs 

1473 

1474 >>> ax = s.plot.kde(bw_method=0.3) 

1475 

1476 .. plot:: 

1477 :context: close-figs 

1478 

1479 >>> ax = s.plot.kde(bw_method=3) 

1480 

1481 Finally, the `ind` parameter determines the evaluation points for the 

1482 plot of the estimated PDF: 

1483 

1484 .. plot:: 

1485 :context: close-figs 

1486 

1487 >>> ax = s.plot.kde(ind=[1, 2, 3, 4, 5]) 

1488 

1489 For DataFrame, it works in the same way: 

1490 

1491 .. plot:: 

1492 :context: close-figs 

1493 

1494 >>> df = pd.DataFrame({ 

1495 ... 'x': [1, 2, 2.5, 3, 3.5, 4, 5], 

1496 ... 'y': [4, 4, 4.5, 5, 5.5, 6, 6], 

1497 ... }) 

1498 >>> ax = df.plot.kde() 

1499 

1500 A scalar bandwidth can be specified. Using a small bandwidth value can 

1501 lead to over-fitting, while using a large bandwidth value may result 

1502 in under-fitting: 

1503 

1504 .. plot:: 

1505 :context: close-figs 

1506 

1507 >>> ax = df.plot.kde(bw_method=0.3) 

1508 

1509 .. plot:: 

1510 :context: close-figs 

1511 

1512 >>> ax = df.plot.kde(bw_method=3) 

1513 

1514 Finally, the `ind` parameter determines the evaluation points for the 

1515 plot of the estimated PDF: 

1516 

1517 .. plot:: 

1518 :context: close-figs 

1519 

1520 >>> ax = df.plot.kde(ind=[1, 2, 3, 4, 5, 6]) 

1521 """ 

1522 return self(kind="kde", bw_method=bw_method, ind=ind, **kwargs) 

1523 

1524 density = kde 

1525 

1526 def area( 

1527 self, 

1528 x: Hashable | None = None, 

1529 y: Hashable | None = None, 

1530 stacked: bool = True, 

1531 **kwargs, 

1532 ) -> PlotAccessor: 

1533 """ 

1534 Draw a stacked area plot. 

1535 

1536 An area plot displays quantitative data visually. 

1537 This function wraps the matplotlib area function. 

1538 

1539 Parameters 

1540 ---------- 

1541 x : label or position, optional 

1542 Coordinates for the X axis. By default uses the index. 

1543 y : label or position, optional 

1544 Column to plot. By default uses all columns. 

1545 stacked : bool, default True 

1546 Area plots are stacked by default. Set to False to create a 

1547 unstacked plot. 

1548 **kwargs 

1549 Additional keyword arguments are documented in 

1550 :meth:`DataFrame.plot`. 

1551 

1552 Returns 

1553 ------- 

1554 matplotlib.axes.Axes or numpy.ndarray 

1555 Area plot, or array of area plots if subplots is True. 

1556 

1557 See Also 

1558 -------- 

1559 DataFrame.plot : Make plots of DataFrame using matplotlib / pylab. 

1560 

1561 Examples 

1562 -------- 

1563 Draw an area plot based on basic business metrics: 

1564 

1565 .. plot:: 

1566 :context: close-figs 

1567 

1568 >>> df = pd.DataFrame({ 

1569 ... 'sales': [3, 2, 3, 9, 10, 6], 

1570 ... 'signups': [5, 5, 6, 12, 14, 13], 

1571 ... 'visits': [20, 42, 28, 62, 81, 50], 

1572 ... }, index=pd.date_range(start='2018/01/01', end='2018/07/01', 

1573 ... freq='ME')) 

1574 >>> ax = df.plot.area() 

1575 

1576 Area plots are stacked by default. To produce an unstacked plot, 

1577 pass ``stacked=False``: 

1578 

1579 .. plot:: 

1580 :context: close-figs 

1581 

1582 >>> ax = df.plot.area(stacked=False) 

1583 

1584 Draw an area plot for a single column: 

1585 

1586 .. plot:: 

1587 :context: close-figs 

1588 

1589 >>> ax = df.plot.area(y='sales') 

1590 

1591 Draw with a different `x`: 

1592 

1593 .. plot:: 

1594 :context: close-figs 

1595 

1596 >>> df = pd.DataFrame({ 

1597 ... 'sales': [3, 2, 3], 

1598 ... 'visits': [20, 42, 28], 

1599 ... 'day': [1, 2, 3], 

1600 ... }) 

1601 >>> ax = df.plot.area(x='day') 

1602 """ 

1603 return self(kind="area", x=x, y=y, stacked=stacked, **kwargs) 

1604 

1605 def pie(self, **kwargs) -> PlotAccessor: 

1606 """ 

1607 Generate a pie plot. 

1608 

1609 A pie plot is a proportional representation of the numerical data in a 

1610 column. This function wraps :meth:`matplotlib.pyplot.pie` for the 

1611 specified column. If no column reference is passed and 

1612 ``subplots=True`` a pie plot is drawn for each numerical column 

1613 independently. 

1614 

1615 Parameters 

1616 ---------- 

1617 y : int or label, optional 

1618 Label or position of the column to plot. 

1619 If not provided, ``subplots=True`` argument must be passed. 

1620 **kwargs 

1621 Keyword arguments to pass on to :meth:`DataFrame.plot`. 

1622 

1623 Returns 

1624 ------- 

1625 matplotlib.axes.Axes or np.ndarray of them 

1626 A NumPy array is returned when `subplots` is True. 

1627 

1628 See Also 

1629 -------- 

1630 Series.plot.pie : Generate a pie plot for a Series. 

1631 DataFrame.plot : Make plots of a DataFrame. 

1632 

1633 Examples 

1634 -------- 

1635 In the example below we have a DataFrame with the information about 

1636 planet's mass and radius. We pass the 'mass' column to the 

1637 pie function to get a pie plot. 

1638 

1639 .. plot:: 

1640 :context: close-figs 

1641 

1642 >>> df = pd.DataFrame({'mass': [0.330, 4.87 , 5.97], 

1643 ... 'radius': [2439.7, 6051.8, 6378.1]}, 

1644 ... index=['Mercury', 'Venus', 'Earth']) 

1645 >>> plot = df.plot.pie(y='mass', figsize=(5, 5)) 

1646 

1647 .. plot:: 

1648 :context: close-figs 

1649 

1650 >>> plot = df.plot.pie(subplots=True, figsize=(11, 6)) 

1651 """ 

1652 if ( 

1653 isinstance(self._parent, ABCDataFrame) 

1654 and kwargs.get("y", None) is None 

1655 and not kwargs.get("subplots", False) 

1656 ): 

1657 raise ValueError("pie requires either y column or 'subplots=True'") 

1658 return self(kind="pie", **kwargs) 

1659 

1660 def scatter( 

1661 self, 

1662 x: Hashable, 

1663 y: Hashable, 

1664 s: Hashable | Sequence[Hashable] | None = None, 

1665 c: Hashable | Sequence[Hashable] | None = None, 

1666 **kwargs, 

1667 ) -> PlotAccessor: 

1668 """ 

1669 Create a scatter plot with varying marker point size and color. 

1670 

1671 The coordinates of each point are defined by two dataframe columns and 

1672 filled circles are used to represent each point. This kind of plot is 

1673 useful to see complex correlations between two variables. Points could 

1674 be for instance natural 2D coordinates like longitude and latitude in 

1675 a map or, in general, any pair of metrics that can be plotted against 

1676 each other. 

1677 

1678 Parameters 

1679 ---------- 

1680 x : int or str 

1681 The column name or column position to be used as horizontal 

1682 coordinates for each point. 

1683 y : int or str 

1684 The column name or column position to be used as vertical 

1685 coordinates for each point. 

1686 s : str, scalar or array-like, optional 

1687 The size of each point. Possible values are: 

1688 

1689 - A string with the name of the column to be used for marker's size. 

1690 

1691 - A single scalar so all points have the same size. 

1692 

1693 - A sequence of scalars, which will be used for each point's size 

1694 recursively. For instance, when passing [2,14] all points size 

1695 will be either 2 or 14, alternatively. 

1696 

1697 c : str, int or array-like, optional 

1698 The color of each point. Possible values are: 

1699 

1700 - A single color string referred to by name, RGB or RGBA code, 

1701 for instance 'red' or '#a98d19'. 

1702 

1703 - A sequence of color strings referred to by name, RGB or RGBA 

1704 code, which will be used for each point's color recursively. For 

1705 instance ['green','yellow'] all points will be filled in green or 

1706 yellow, alternatively. 

1707 

1708 - A column name or position whose values will be used to color the 

1709 marker points according to a colormap. 

1710 

1711 **kwargs 

1712 Keyword arguments to pass on to :meth:`DataFrame.plot`. 

1713 

1714 Returns 

1715 ------- 

1716 :class:`matplotlib.axes.Axes` or numpy.ndarray of them 

1717 

1718 See Also 

1719 -------- 

1720 matplotlib.pyplot.scatter : Scatter plot using multiple input data 

1721 formats. 

1722 

1723 Examples 

1724 -------- 

1725 Let's see how to draw a scatter plot using coordinates from the values 

1726 in a DataFrame's columns. 

1727 

1728 .. plot:: 

1729 :context: close-figs 

1730 

1731 >>> df = pd.DataFrame([[5.1, 3.5, 0], [4.9, 3.0, 0], [7.0, 3.2, 1], 

1732 ... [6.4, 3.2, 1], [5.9, 3.0, 2]], 

1733 ... columns=['length', 'width', 'species']) 

1734 >>> ax1 = df.plot.scatter(x='length', 

1735 ... y='width', 

1736 ... c='DarkBlue') 

1737 

1738 And now with the color determined by a column as well. 

1739 

1740 .. plot:: 

1741 :context: close-figs 

1742 

1743 >>> ax2 = df.plot.scatter(x='length', 

1744 ... y='width', 

1745 ... c='species', 

1746 ... colormap='viridis') 

1747 """ 

1748 return self(kind="scatter", x=x, y=y, s=s, c=c, **kwargs) 

1749 

1750 def hexbin( 

1751 self, 

1752 x: Hashable, 

1753 y: Hashable, 

1754 C: Hashable | None = None, 

1755 reduce_C_function: Callable | None = None, 

1756 gridsize: int | tuple[int, int] | None = None, 

1757 **kwargs, 

1758 ) -> PlotAccessor: 

1759 """ 

1760 Generate a hexagonal binning plot. 

1761 

1762 Generate a hexagonal binning plot of `x` versus `y`. If `C` is `None` 

1763 (the default), this is a histogram of the number of occurrences 

1764 of the observations at ``(x[i], y[i])``. 

1765 

1766 If `C` is specified, specifies values at given coordinates 

1767 ``(x[i], y[i])``. These values are accumulated for each hexagonal 

1768 bin and then reduced according to `reduce_C_function`, 

1769 having as default the NumPy's mean function (:meth:`numpy.mean`). 

1770 (If `C` is specified, it must also be a 1-D sequence 

1771 of the same length as `x` and `y`, or a column label.) 

1772 

1773 Parameters 

1774 ---------- 

1775 x : int or str 

1776 The column label or position for x points. 

1777 y : int or str 

1778 The column label or position for y points. 

1779 C : int or str, optional 

1780 The column label or position for the value of `(x, y)` point. 

1781 reduce_C_function : callable, default `np.mean` 

1782 Function of one argument that reduces all the values in a bin to 

1783 a single number (e.g. `np.mean`, `np.max`, `np.sum`, `np.std`). 

1784 gridsize : int or tuple of (int, int), default 100 

1785 The number of hexagons in the x-direction. 

1786 The corresponding number of hexagons in the y-direction is 

1787 chosen in a way that the hexagons are approximately regular. 

1788 Alternatively, gridsize can be a tuple with two elements 

1789 specifying the number of hexagons in the x-direction and the 

1790 y-direction. 

1791 **kwargs 

1792 Additional keyword arguments are documented in 

1793 :meth:`DataFrame.plot`. 

1794 

1795 Returns 

1796 ------- 

1797 matplotlib.AxesSubplot 

1798 The matplotlib ``Axes`` on which the hexbin is plotted. 

1799 

1800 See Also 

1801 -------- 

1802 DataFrame.plot : Make plots of a DataFrame. 

1803 matplotlib.pyplot.hexbin : Hexagonal binning plot using matplotlib, 

1804 the matplotlib function that is used under the hood. 

1805 

1806 Examples 

1807 -------- 

1808 The following examples are generated with random data from 

1809 a normal distribution. 

1810 

1811 .. plot:: 

1812 :context: close-figs 

1813 

1814 >>> n = 10000 

1815 >>> df = pd.DataFrame({'x': np.random.randn(n), 

1816 ... 'y': np.random.randn(n)}) 

1817 >>> ax = df.plot.hexbin(x='x', y='y', gridsize=20) 

1818 

1819 The next example uses `C` and `np.sum` as `reduce_C_function`. 

1820 Note that `'observations'` values ranges from 1 to 5 but the result 

1821 plot shows values up to more than 25. This is because of the 

1822 `reduce_C_function`. 

1823 

1824 .. plot:: 

1825 :context: close-figs 

1826 

1827 >>> n = 500 

1828 >>> df = pd.DataFrame({ 

1829 ... 'coord_x': np.random.uniform(-3, 3, size=n), 

1830 ... 'coord_y': np.random.uniform(30, 50, size=n), 

1831 ... 'observations': np.random.randint(1,5, size=n) 

1832 ... }) 

1833 >>> ax = df.plot.hexbin(x='coord_x', 

1834 ... y='coord_y', 

1835 ... C='observations', 

1836 ... reduce_C_function=np.sum, 

1837 ... gridsize=10, 

1838 ... cmap="viridis") 

1839 """ 

1840 if reduce_C_function is not None: 

1841 kwargs["reduce_C_function"] = reduce_C_function 

1842 if gridsize is not None: 

1843 kwargs["gridsize"] = gridsize 

1844 

1845 return self(kind="hexbin", x=x, y=y, C=C, **kwargs) 

1846 

1847 

1848_backends: dict[str, types.ModuleType] = {} 

1849 

1850 

1851def _load_backend(backend: str) -> types.ModuleType: 

1852 """ 

1853 Load a pandas plotting backend. 

1854 

1855 Parameters 

1856 ---------- 

1857 backend : str 

1858 The identifier for the backend. Either an entrypoint item registered 

1859 with importlib.metadata, "matplotlib", or a module name. 

1860 

1861 Returns 

1862 ------- 

1863 types.ModuleType 

1864 The imported backend. 

1865 """ 

1866 from importlib.metadata import entry_points 

1867 

1868 if backend == "matplotlib": 

1869 # Because matplotlib is an optional dependency and first-party backend, 

1870 # we need to attempt an import here to raise an ImportError if needed. 

1871 try: 

1872 module = importlib.import_module("pandas.plotting._matplotlib") 

1873 except ImportError: 

1874 raise ImportError( 

1875 "matplotlib is required for plotting when the " 

1876 'default backend "matplotlib" is selected.' 

1877 ) from None 

1878 return module 

1879 

1880 found_backend = False 

1881 

1882 eps = entry_points() 

1883 key = "pandas_plotting_backends" 

1884 # entry_points lost dict API ~ PY 3.10 

1885 # https://github.com/python/importlib_metadata/issues/298 

1886 if hasattr(eps, "select"): 

1887 entry = eps.select(group=key) 

1888 else: 

1889 # Argument 2 to "get" of "dict" has incompatible type "Tuple[]"; 

1890 # expected "EntryPoints" [arg-type] 

1891 entry = eps.get(key, ()) # type: ignore[arg-type] 

1892 for entry_point in entry: 

1893 found_backend = entry_point.name == backend 

1894 if found_backend: 

1895 module = entry_point.load() 

1896 break 

1897 

1898 if not found_backend: 

1899 # Fall back to unregistered, module name approach. 

1900 try: 

1901 module = importlib.import_module(backend) 

1902 found_backend = True 

1903 except ImportError: 

1904 # We re-raise later on. 

1905 pass 

1906 

1907 if found_backend: 

1908 if hasattr(module, "plot"): 

1909 # Validate that the interface is implemented when the option is set, 

1910 # rather than at plot time. 

1911 return module 

1912 

1913 raise ValueError( 

1914 f"Could not find plotting backend '{backend}'. Ensure that you've " 

1915 f"installed the package providing the '{backend}' entrypoint, or that " 

1916 "the package has a top-level `.plot` method." 

1917 ) 

1918 

1919 

1920def _get_plot_backend(backend: str | None = None): 

1921 """ 

1922 Return the plotting backend to use (e.g. `pandas.plotting._matplotlib`). 

1923 

1924 The plotting system of pandas uses matplotlib by default, but the idea here 

1925 is that it can also work with other third-party backends. This function 

1926 returns the module which provides a top-level `.plot` method that will 

1927 actually do the plotting. The backend is specified from a string, which 

1928 either comes from the keyword argument `backend`, or, if not specified, from 

1929 the option `pandas.options.plotting.backend`. All the rest of the code in 

1930 this file uses the backend specified there for the plotting. 

1931 

1932 The backend is imported lazily, as matplotlib is a soft dependency, and 

1933 pandas can be used without it being installed. 

1934 

1935 Notes 

1936 ----- 

1937 Modifies `_backends` with imported backend as a side effect. 

1938 """ 

1939 backend_str: str = backend or get_option("plotting.backend") 

1940 

1941 if backend_str in _backends: 

1942 return _backends[backend_str] 

1943 

1944 module = _load_backend(backend_str) 

1945 _backends[backend_str] = module 

1946 return module