Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/plotting/_core.py: 30%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

191 statements  

1from __future__ import annotations 

2 

3import importlib 

4import types 

5from typing import ( 

6 TYPE_CHECKING, 

7 Sequence, 

8) 

9 

10from pandas._config import get_option 

11 

12from pandas._typing import IndexLabel 

13from pandas.util._decorators import ( 

14 Appender, 

15 Substitution, 

16) 

17 

18from pandas.core.dtypes.common import ( 

19 is_integer, 

20 is_list_like, 

21) 

22from pandas.core.dtypes.generic import ( 

23 ABCDataFrame, 

24 ABCSeries, 

25) 

26 

27from pandas.core.base import PandasObject 

28 

29if TYPE_CHECKING: 

30 from matplotlib.axes import Axes 

31 

32 from pandas import DataFrame 

33 

34 

35def hist_series( 

36 self, 

37 by=None, 

38 ax=None, 

39 grid: bool = True, 

40 xlabelsize: int | None = None, 

41 xrot: float | None = None, 

42 ylabelsize: int | None = None, 

43 yrot: float | None = None, 

44 figsize: tuple[int, int] | None = None, 

45 bins: int | Sequence[int] = 10, 

46 backend: str | None = None, 

47 legend: bool = False, 

48 **kwargs, 

49): 

50 """ 

51 Draw histogram of the input series using matplotlib. 

52 

53 Parameters 

54 ---------- 

55 by : object, optional 

56 If passed, then used to form histograms for separate groups. 

57 ax : matplotlib axis object 

58 If not passed, uses gca(). 

59 grid : bool, default True 

60 Whether to show axis grid lines. 

61 xlabelsize : int, default None 

62 If specified changes the x-axis label size. 

63 xrot : float, default None 

64 Rotation of x axis labels. 

65 ylabelsize : int, default None 

66 If specified changes the y-axis label size. 

67 yrot : float, default None 

68 Rotation of y axis labels. 

69 figsize : tuple, default None 

70 Figure size in inches by default. 

71 bins : int or sequence, default 10 

72 Number of histogram bins to be used. If an integer is given, bins + 1 

73 bin edges are calculated and returned. If bins is a sequence, gives 

74 bin edges, including left edge of first bin and right edge of last 

75 bin. In this case, bins is returned unmodified. 

76 backend : str, default None 

77 Backend to use instead of the backend specified in the option 

78 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to 

79 specify the ``plotting.backend`` for the whole session, set 

80 ``pd.options.plotting.backend``. 

81 legend : bool, default False 

82 Whether to show the legend. 

83 

84 .. versionadded:: 1.1.0 

85 

86 **kwargs 

87 To be passed to the actual plotting function. 

88 

89 Returns 

90 ------- 

91 matplotlib.AxesSubplot 

92 A histogram plot. 

93 

94 See Also 

95 -------- 

96 matplotlib.axes.Axes.hist : Plot a histogram using matplotlib. 

97 """ 

98 plot_backend = _get_plot_backend(backend) 

99 return plot_backend.hist_series( 

100 self, 

101 by=by, 

102 ax=ax, 

103 grid=grid, 

104 xlabelsize=xlabelsize, 

105 xrot=xrot, 

106 ylabelsize=ylabelsize, 

107 yrot=yrot, 

108 figsize=figsize, 

109 bins=bins, 

110 legend=legend, 

111 **kwargs, 

112 ) 

113 

114 

115def hist_frame( 

116 data: DataFrame, 

117 column: IndexLabel = None, 

118 by=None, 

119 grid: bool = True, 

120 xlabelsize: int | None = None, 

121 xrot: float | None = None, 

122 ylabelsize: int | None = None, 

123 yrot: float | None = None, 

124 ax=None, 

125 sharex: bool = False, 

126 sharey: bool = False, 

127 figsize: tuple[int, int] | None = None, 

128 layout: tuple[int, int] | None = None, 

129 bins: int | Sequence[int] = 10, 

130 backend: str | None = None, 

131 legend: bool = False, 

132 **kwargs, 

133): 

134 """ 

135 Make a histogram of the DataFrame's columns. 

136 

137 A `histogram`_ is a representation of the distribution of data. 

138 This function calls :meth:`matplotlib.pyplot.hist`, on each series in 

139 the DataFrame, resulting in one histogram per column. 

140 

141 .. _histogram: https://en.wikipedia.org/wiki/Histogram 

142 

143 Parameters 

144 ---------- 

145 data : DataFrame 

146 The pandas object holding the data. 

147 column : str or sequence, optional 

148 If passed, will be used to limit data to a subset of columns. 

149 by : object, optional 

150 If passed, then used to form histograms for separate groups. 

151 grid : bool, default True 

152 Whether to show axis grid lines. 

153 xlabelsize : int, default None 

154 If specified changes the x-axis label size. 

155 xrot : float, default None 

156 Rotation of x axis labels. For example, a value of 90 displays the 

157 x labels rotated 90 degrees clockwise. 

158 ylabelsize : int, default None 

159 If specified changes the y-axis label size. 

160 yrot : float, default None 

161 Rotation of y axis labels. For example, a value of 90 displays the 

162 y labels rotated 90 degrees clockwise. 

163 ax : Matplotlib axes object, default None 

164 The axes to plot the histogram on. 

165 sharex : bool, default True if ax is None else False 

166 In case subplots=True, share x axis and set some x axis labels to 

167 invisible; defaults to True if ax is None otherwise False if an ax 

168 is passed in. 

169 Note that passing in both an ax and sharex=True will alter all x axis 

170 labels for all subplots in a figure. 

171 sharey : bool, default False 

172 In case subplots=True, share y axis and set some y axis labels to 

173 invisible. 

174 figsize : tuple, optional 

175 The size in inches of the figure to create. Uses the value in 

176 `matplotlib.rcParams` by default. 

177 layout : tuple, optional 

178 Tuple of (rows, columns) for the layout of the histograms. 

179 bins : int or sequence, default 10 

180 Number of histogram bins to be used. If an integer is given, bins + 1 

181 bin edges are calculated and returned. If bins is a sequence, gives 

182 bin edges, including left edge of first bin and right edge of last 

183 bin. In this case, bins is returned unmodified. 

184 

185 backend : str, default None 

186 Backend to use instead of the backend specified in the option 

187 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to 

188 specify the ``plotting.backend`` for the whole session, set 

189 ``pd.options.plotting.backend``. 

190 

191 legend : bool, default False 

192 Whether to show the legend. 

193 

194 .. versionadded:: 1.1.0 

195 

196 **kwargs 

197 All other plotting keyword arguments to be passed to 

198 :meth:`matplotlib.pyplot.hist`. 

199 

200 Returns 

201 ------- 

202 matplotlib.AxesSubplot or numpy.ndarray of them 

203 

204 See Also 

205 -------- 

206 matplotlib.pyplot.hist : Plot a histogram using matplotlib. 

207 

208 Examples 

209 -------- 

210 This example draws a histogram based on the length and width of 

211 some animals, displayed in three bins 

212 

213 .. plot:: 

214 :context: close-figs 

215 

216 >>> df = pd.DataFrame({ 

217 ... 'length': [1.5, 0.5, 1.2, 0.9, 3], 

218 ... 'width': [0.7, 0.2, 0.15, 0.2, 1.1] 

219 ... }, index=['pig', 'rabbit', 'duck', 'chicken', 'horse']) 

220 >>> hist = df.hist(bins=3) 

221 """ 

222 plot_backend = _get_plot_backend(backend) 

223 return plot_backend.hist_frame( 

224 data, 

225 column=column, 

226 by=by, 

227 grid=grid, 

228 xlabelsize=xlabelsize, 

229 xrot=xrot, 

230 ylabelsize=ylabelsize, 

231 yrot=yrot, 

232 ax=ax, 

233 sharex=sharex, 

234 sharey=sharey, 

235 figsize=figsize, 

236 layout=layout, 

237 legend=legend, 

238 bins=bins, 

239 **kwargs, 

240 ) 

241 

242 

243_boxplot_doc = """ 

244Make a box plot from DataFrame columns. 

245 

246Make a box-and-whisker plot from DataFrame columns, optionally grouped 

247by some other columns. A box plot is a method for graphically depicting 

248groups of numerical data through their quartiles. 

249The box extends from the Q1 to Q3 quartile values of the data, 

250with a line at the median (Q2). The whiskers extend from the edges 

251of box to show the range of the data. By default, they extend no more than 

252`1.5 * IQR (IQR = Q3 - Q1)` from the edges of the box, ending at the farthest 

253data point within that interval. Outliers are plotted as separate dots. 

254 

255For further details see 

256Wikipedia's entry for `boxplot <https://en.wikipedia.org/wiki/Box_plot>`_. 

257 

258Parameters 

259---------- 

260%(data)s\ 

261column : str or list of str, optional 

262 Column name or list of names, or vector. 

263 Can be any valid input to :meth:`pandas.DataFrame.groupby`. 

264by : str or array-like, optional 

265 Column in the DataFrame to :meth:`pandas.DataFrame.groupby`. 

266 One box-plot will be done per value of columns in `by`. 

267ax : object of class matplotlib.axes.Axes, optional 

268 The matplotlib axes to be used by boxplot. 

269fontsize : float or str 

270 Tick label font size in points or as a string (e.g., `large`). 

271rot : float, default 0 

272 The rotation angle of labels (in degrees) 

273 with respect to the screen coordinate system. 

274grid : bool, default True 

275 Setting this to True will show the grid. 

276figsize : A tuple (width, height) in inches 

277 The size of the figure to create in matplotlib. 

278layout : tuple (rows, columns), optional 

279 For example, (3, 5) will display the subplots 

280 using 3 rows and 5 columns, starting from the top-left. 

281return_type : {'axes', 'dict', 'both'} or None, default 'axes' 

282 The kind of object to return. The default is ``axes``. 

283 

284 * 'axes' returns the matplotlib axes the boxplot is drawn on. 

285 * 'dict' returns a dictionary whose values are the matplotlib 

286 Lines of the boxplot. 

287 * 'both' returns a namedtuple with the axes and dict. 

288 * when grouping with ``by``, a Series mapping columns to 

289 ``return_type`` is returned. 

290 

291 If ``return_type`` is `None`, a NumPy array 

292 of axes with the same shape as ``layout`` is returned. 

293%(backend)s\ 

294 

295**kwargs 

296 All other plotting keyword arguments to be passed to 

297 :func:`matplotlib.pyplot.boxplot`. 

298 

299Returns 

300------- 

301result 

302 See Notes. 

303 

304See Also 

305-------- 

306pandas.Series.plot.hist: Make a histogram. 

307matplotlib.pyplot.boxplot : Matplotlib equivalent plot. 

308 

309Notes 

310----- 

311The return type depends on the `return_type` parameter: 

312 

313* 'axes' : object of class matplotlib.axes.Axes 

314* 'dict' : dict of matplotlib.lines.Line2D objects 

315* 'both' : a namedtuple with structure (ax, lines) 

316 

317For data grouped with ``by``, return a Series of the above or a numpy 

318array: 

319 

320* :class:`~pandas.Series` 

321* :class:`~numpy.array` (for ``return_type = None``) 

322 

323Use ``return_type='dict'`` when you want to tweak the appearance 

324of the lines after plotting. In this case a dict containing the Lines 

325making up the boxes, caps, fliers, medians, and whiskers is returned. 

326 

327Examples 

328-------- 

329 

330Boxplots can be created for every column in the dataframe 

331by ``df.boxplot()`` or indicating the columns to be used: 

332 

333.. plot:: 

334 :context: close-figs 

335 

336 >>> np.random.seed(1234) 

337 >>> df = pd.DataFrame(np.random.randn(10, 4), 

338 ... columns=['Col1', 'Col2', 'Col3', 'Col4']) 

339 >>> boxplot = df.boxplot(column=['Col1', 'Col2', 'Col3']) # doctest: +SKIP 

340 

341Boxplots of variables distributions grouped by the values of a third 

342variable can be created using the option ``by``. For instance: 

343 

344.. plot:: 

345 :context: close-figs 

346 

347 >>> df = pd.DataFrame(np.random.randn(10, 2), 

348 ... columns=['Col1', 'Col2']) 

349 >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A', 

350 ... 'B', 'B', 'B', 'B', 'B']) 

351 >>> boxplot = df.boxplot(by='X') 

352 

353A list of strings (i.e. ``['X', 'Y']``) can be passed to boxplot 

354in order to group the data by combination of the variables in the x-axis: 

355 

356.. plot:: 

357 :context: close-figs 

358 

359 >>> df = pd.DataFrame(np.random.randn(10, 3), 

360 ... columns=['Col1', 'Col2', 'Col3']) 

361 >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A', 

362 ... 'B', 'B', 'B', 'B', 'B']) 

363 >>> df['Y'] = pd.Series(['A', 'B', 'A', 'B', 'A', 

364 ... 'B', 'A', 'B', 'A', 'B']) 

365 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by=['X', 'Y']) 

366 

367The layout of boxplot can be adjusted giving a tuple to ``layout``: 

368 

369.. plot:: 

370 :context: close-figs 

371 

372 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X', 

373 ... layout=(2, 1)) 

374 

375Additional formatting can be done to the boxplot, like suppressing the grid 

376(``grid=False``), rotating the labels in the x-axis (i.e. ``rot=45``) 

377or changing the fontsize (i.e. ``fontsize=15``): 

378 

379.. plot:: 

380 :context: close-figs 

381 

382 >>> boxplot = df.boxplot(grid=False, rot=45, fontsize=15) # doctest: +SKIP 

383 

384The parameter ``return_type`` can be used to select the type of element 

385returned by `boxplot`. When ``return_type='axes'`` is selected, 

386the matplotlib axes on which the boxplot is drawn are returned: 

387 

388 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], return_type='axes') 

389 >>> type(boxplot) 

390 <class 'matplotlib.axes._subplots.AxesSubplot'> 

391 

392When grouping with ``by``, a Series mapping columns to ``return_type`` 

393is returned: 

394 

395 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X', 

396 ... return_type='axes') 

397 >>> type(boxplot) 

398 <class 'pandas.core.series.Series'> 

399 

400If ``return_type`` is `None`, a NumPy array of axes with the same shape 

401as ``layout`` is returned: 

402 

403 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X', 

404 ... return_type=None) 

405 >>> type(boxplot) 

406 <class 'numpy.ndarray'> 

407""" 

408 

409_backend_doc = """\ 

410backend : str, default None 

411 Backend to use instead of the backend specified in the option 

412 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to 

413 specify the ``plotting.backend`` for the whole session, set 

414 ``pd.options.plotting.backend``. 

415""" 

416 

417 

418_bar_or_line_doc = """ 

419 Parameters 

420 ---------- 

421 x : label or position, optional 

422 Allows plotting of one column versus another. If not specified, 

423 the index of the DataFrame is used. 

424 y : label or position, optional 

425 Allows plotting of one column versus another. If not specified, 

426 all numerical columns are used. 

427 color : str, array-like, or dict, optional 

428 The color for each of the DataFrame's columns. Possible values are: 

429 

430 - A single color string referred to by name, RGB or RGBA code, 

431 for instance 'red' or '#a98d19'. 

432 

433 - A sequence of color strings referred to by name, RGB or RGBA 

434 code, which will be used for each column recursively. For 

435 instance ['green','yellow'] each column's %(kind)s will be filled in 

436 green or yellow, alternatively. If there is only a single column to 

437 be plotted, then only the first color from the color list will be 

438 used. 

439 

440 - A dict of the form {column name : color}, so that each column will be 

441 colored accordingly. For example, if your columns are called `a` and 

442 `b`, then passing {'a': 'green', 'b': 'red'} will color %(kind)ss for 

443 column `a` in green and %(kind)ss for column `b` in red. 

444 

445 .. versionadded:: 1.1.0 

446 

447 **kwargs 

448 Additional keyword arguments are documented in 

449 :meth:`DataFrame.plot`. 

450 

451 Returns 

452 ------- 

453 matplotlib.axes.Axes or np.ndarray of them 

454 An ndarray is returned with one :class:`matplotlib.axes.Axes` 

455 per column when ``subplots=True``. 

456""" 

457 

458 

459@Substitution(data="data : DataFrame\n The data to visualize.\n", backend="") 

460@Appender(_boxplot_doc) 

461def boxplot( 

462 data: DataFrame, 

463 column: str | list[str] | None = None, 

464 by: str | list[str] | None = None, 

465 ax: Axes | None = None, 

466 fontsize: float | str | None = None, 

467 rot: int = 0, 

468 grid: bool = True, 

469 figsize: tuple[float, float] | None = None, 

470 layout: tuple[int, int] | None = None, 

471 return_type: str | None = None, 

472 **kwargs, 

473): 

474 plot_backend = _get_plot_backend("matplotlib") 

475 return plot_backend.boxplot( 

476 data, 

477 column=column, 

478 by=by, 

479 ax=ax, 

480 fontsize=fontsize, 

481 rot=rot, 

482 grid=grid, 

483 figsize=figsize, 

484 layout=layout, 

485 return_type=return_type, 

486 **kwargs, 

487 ) 

488 

489 

490@Substitution(data="", backend=_backend_doc) 

491@Appender(_boxplot_doc) 

492def boxplot_frame( 

493 self, 

494 column=None, 

495 by=None, 

496 ax=None, 

497 fontsize=None, 

498 rot: int = 0, 

499 grid: bool = True, 

500 figsize=None, 

501 layout=None, 

502 return_type=None, 

503 backend=None, 

504 **kwargs, 

505): 

506 plot_backend = _get_plot_backend(backend) 

507 return plot_backend.boxplot_frame( 

508 self, 

509 column=column, 

510 by=by, 

511 ax=ax, 

512 fontsize=fontsize, 

513 rot=rot, 

514 grid=grid, 

515 figsize=figsize, 

516 layout=layout, 

517 return_type=return_type, 

518 **kwargs, 

519 ) 

520 

521 

522def boxplot_frame_groupby( 

523 grouped, 

524 subplots: bool = True, 

525 column=None, 

526 fontsize=None, 

527 rot: int = 0, 

528 grid: bool = True, 

529 ax=None, 

530 figsize=None, 

531 layout=None, 

532 sharex: bool = False, 

533 sharey: bool = True, 

534 backend=None, 

535 **kwargs, 

536): 

537 """ 

538 Make box plots from DataFrameGroupBy data. 

539 

540 Parameters 

541 ---------- 

542 grouped : Grouped DataFrame 

543 subplots : bool 

544 * ``False`` - no subplots will be used 

545 * ``True`` - create a subplot for each group. 

546 

547 column : column name or list of names, or vector 

548 Can be any valid input to groupby. 

549 fontsize : float or str 

550 rot : label rotation angle 

551 grid : Setting this to True will show the grid 

552 ax : Matplotlib axis object, default None 

553 figsize : A tuple (width, height) in inches 

554 layout : tuple (optional) 

555 The layout of the plot: (rows, columns). 

556 sharex : bool, default False 

557 Whether x-axes will be shared among subplots. 

558 sharey : bool, default True 

559 Whether y-axes will be shared among subplots. 

560 backend : str, default None 

561 Backend to use instead of the backend specified in the option 

562 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to 

563 specify the ``plotting.backend`` for the whole session, set 

564 ``pd.options.plotting.backend``. 

565 **kwargs 

566 All other plotting keyword arguments to be passed to 

567 matplotlib's boxplot function. 

568 

569 Returns 

570 ------- 

571 dict of key/value = group key/DataFrame.boxplot return value 

572 or DataFrame.boxplot return value in case subplots=figures=False 

573 

574 Examples 

575 -------- 

576 You can create boxplots for grouped data and show them as separate subplots: 

577 

578 .. plot:: 

579 :context: close-figs 

580 

581 >>> import itertools 

582 >>> tuples = [t for t in itertools.product(range(1000), range(4))] 

583 >>> index = pd.MultiIndex.from_tuples(tuples, names=['lvl0', 'lvl1']) 

584 >>> data = np.random.randn(len(index),4) 

585 >>> df = pd.DataFrame(data, columns=list('ABCD'), index=index) 

586 >>> grouped = df.groupby(level='lvl1') 

587 >>> grouped.boxplot(rot=45, fontsize=12, figsize=(8,10)) # doctest: +SKIP 

588 

589 The ``subplots=False`` option shows the boxplots in a single figure. 

590 

591 .. plot:: 

592 :context: close-figs 

593 

594 >>> grouped.boxplot(subplots=False, rot=45, fontsize=12) # doctest: +SKIP 

595 """ 

596 plot_backend = _get_plot_backend(backend) 

597 return plot_backend.boxplot_frame_groupby( 

598 grouped, 

599 subplots=subplots, 

600 column=column, 

601 fontsize=fontsize, 

602 rot=rot, 

603 grid=grid, 

604 ax=ax, 

605 figsize=figsize, 

606 layout=layout, 

607 sharex=sharex, 

608 sharey=sharey, 

609 **kwargs, 

610 ) 

611 

612 

613class PlotAccessor(PandasObject): 

614 """ 

615 Make plots of Series or DataFrame. 

616 

617 Uses the backend specified by the 

618 option ``plotting.backend``. By default, matplotlib is used. 

619 

620 Parameters 

621 ---------- 

622 data : Series or DataFrame 

623 The object for which the method is called. 

624 x : label or position, default None 

625 Only used if data is a DataFrame. 

626 y : label, position or list of label, positions, default None 

627 Allows plotting of one column versus another. Only used if data is a 

628 DataFrame. 

629 kind : str 

630 The kind of plot to produce: 

631 

632 - 'line' : line plot (default) 

633 - 'bar' : vertical bar plot 

634 - 'barh' : horizontal bar plot 

635 - 'hist' : histogram 

636 - 'box' : boxplot 

637 - 'kde' : Kernel Density Estimation plot 

638 - 'density' : same as 'kde' 

639 - 'area' : area plot 

640 - 'pie' : pie plot 

641 - 'scatter' : scatter plot (DataFrame only) 

642 - 'hexbin' : hexbin plot (DataFrame only) 

643 ax : matplotlib axes object, default None 

644 An axes of the current figure. 

645 subplots : bool or sequence of iterables, default False 

646 Whether to group columns into subplots: 

647 

648 - ``False`` : No subplots will be used 

649 - ``True`` : Make separate subplots for each column. 

650 - sequence of iterables of column labels: Create a subplot for each 

651 group of columns. For example `[('a', 'c'), ('b', 'd')]` will 

652 create 2 subplots: one with columns 'a' and 'c', and one 

653 with columns 'b' and 'd'. Remaining columns that aren't specified 

654 will be plotted in additional subplots (one per column). 

655 

656 .. versionadded:: 1.5.0 

657 

658 sharex : bool, default True if ax is None else False 

659 In case ``subplots=True``, share x axis and set some x axis labels 

660 to invisible; defaults to True if ax is None otherwise False if 

661 an ax is passed in; Be aware, that passing in both an ax and 

662 ``sharex=True`` will alter all x axis labels for all axis in a figure. 

663 sharey : bool, default False 

664 In case ``subplots=True``, share y axis and set some y axis labels to invisible. 

665 layout : tuple, optional 

666 (rows, columns) for the layout of subplots. 

667 figsize : a tuple (width, height) in inches 

668 Size of a figure object. 

669 use_index : bool, default True 

670 Use index as ticks for x axis. 

671 title : str or list 

672 Title to use for the plot. If a string is passed, print the string 

673 at the top of the figure. If a list is passed and `subplots` is 

674 True, print each item in the list above the corresponding subplot. 

675 grid : bool, default None (matlab style default) 

676 Axis grid lines. 

677 legend : bool or {'reverse'} 

678 Place legend on axis subplots. 

679 style : list or dict 

680 The matplotlib line style per column. 

681 logx : bool or 'sym', default False 

682 Use log scaling or symlog scaling on x axis. 

683 

684 logy : bool or 'sym' default False 

685 Use log scaling or symlog scaling on y axis. 

686 

687 loglog : bool or 'sym', default False 

688 Use log scaling or symlog scaling on both x and y axes. 

689 

690 xticks : sequence 

691 Values to use for the xticks. 

692 yticks : sequence 

693 Values to use for the yticks. 

694 xlim : 2-tuple/list 

695 Set the x limits of the current axes. 

696 ylim : 2-tuple/list 

697 Set the y limits of the current axes. 

698 xlabel : label, optional 

699 Name to use for the xlabel on x-axis. Default uses index name as xlabel, or the 

700 x-column name for planar plots. 

701 

702 .. versionadded:: 1.1.0 

703 

704 .. versionchanged:: 1.2.0 

705 

706 Now applicable to planar plots (`scatter`, `hexbin`). 

707 

708 .. versionchanged:: 2.0.0 

709 

710 Now applicable to histograms. 

711 

712 ylabel : label, optional 

713 Name to use for the ylabel on y-axis. Default will show no ylabel, or the 

714 y-column name for planar plots. 

715 

716 .. versionadded:: 1.1.0 

717 

718 .. versionchanged:: 1.2.0 

719 

720 Now applicable to planar plots (`scatter`, `hexbin`). 

721 

722 .. versionchanged:: 2.0.0 

723 

724 Now applicable to histograms. 

725 

726 rot : float, default None 

727 Rotation for ticks (xticks for vertical, yticks for horizontal 

728 plots). 

729 fontsize : float, default None 

730 Font size for xticks and yticks. 

731 colormap : str or matplotlib colormap object, default None 

732 Colormap to select colors from. If string, load colormap with that 

733 name from matplotlib. 

734 colorbar : bool, optional 

735 If True, plot colorbar (only relevant for 'scatter' and 'hexbin' 

736 plots). 

737 position : float 

738 Specify relative alignments for bar plot layout. 

739 From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 

740 (center). 

741 table : bool, Series or DataFrame, default False 

742 If True, draw a table using the data in the DataFrame and the data 

743 will be transposed to meet matplotlib's default layout. 

744 If a Series or DataFrame is passed, use passed data to draw a 

745 table. 

746 yerr : DataFrame, Series, array-like, dict and str 

747 See :ref:`Plotting with Error Bars <visualization.errorbars>` for 

748 detail. 

749 xerr : DataFrame, Series, array-like, dict and str 

750 Equivalent to yerr. 

751 stacked : bool, default False in line and bar plots, and True in area plot 

752 If True, create stacked plot. 

753 secondary_y : bool or sequence, default False 

754 Whether to plot on the secondary y-axis if a list/tuple, which 

755 columns to plot on secondary y-axis. 

756 mark_right : bool, default True 

757 When using a secondary_y axis, automatically mark the column 

758 labels with "(right)" in the legend. 

759 include_bool : bool, default is False 

760 If True, boolean values can be plotted. 

761 backend : str, default None 

762 Backend to use instead of the backend specified in the option 

763 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to 

764 specify the ``plotting.backend`` for the whole session, set 

765 ``pd.options.plotting.backend``. 

766 **kwargs 

767 Options to pass to matplotlib plotting method. 

768 

769 Returns 

770 ------- 

771 :class:`matplotlib.axes.Axes` or numpy.ndarray of them 

772 If the backend is not the default matplotlib one, the return value 

773 will be the object returned by the backend. 

774 

775 Notes 

776 ----- 

777 - See matplotlib documentation online for more on this subject 

778 - If `kind` = 'bar' or 'barh', you can specify relative alignments 

779 for bar plot layout by `position` keyword. 

780 From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 

781 (center) 

782 """ 

783 

784 _common_kinds = ("line", "bar", "barh", "kde", "density", "area", "hist", "box") 

785 _series_kinds = ("pie",) 

786 _dataframe_kinds = ("scatter", "hexbin") 

787 _kind_aliases = {"density": "kde"} 

788 _all_kinds = _common_kinds + _series_kinds + _dataframe_kinds 

789 

790 def __init__(self, data) -> None: 

791 self._parent = data 

792 

793 @staticmethod 

794 def _get_call_args(backend_name, data, args, kwargs): 

795 """ 

796 This function makes calls to this accessor `__call__` method compatible 

797 with the previous `SeriesPlotMethods.__call__` and 

798 `DataFramePlotMethods.__call__`. Those had slightly different 

799 signatures, since `DataFramePlotMethods` accepted `x` and `y` 

800 parameters. 

801 """ 

802 if isinstance(data, ABCSeries): 

803 arg_def = [ 

804 ("kind", "line"), 

805 ("ax", None), 

806 ("figsize", None), 

807 ("use_index", True), 

808 ("title", None), 

809 ("grid", None), 

810 ("legend", False), 

811 ("style", None), 

812 ("logx", False), 

813 ("logy", False), 

814 ("loglog", False), 

815 ("xticks", None), 

816 ("yticks", None), 

817 ("xlim", None), 

818 ("ylim", None), 

819 ("rot", None), 

820 ("fontsize", None), 

821 ("colormap", None), 

822 ("table", False), 

823 ("yerr", None), 

824 ("xerr", None), 

825 ("label", None), 

826 ("secondary_y", False), 

827 ("xlabel", None), 

828 ("ylabel", None), 

829 ] 

830 elif isinstance(data, ABCDataFrame): 

831 arg_def = [ 

832 ("x", None), 

833 ("y", None), 

834 ("kind", "line"), 

835 ("ax", None), 

836 ("subplots", False), 

837 ("sharex", None), 

838 ("sharey", False), 

839 ("layout", None), 

840 ("figsize", None), 

841 ("use_index", True), 

842 ("title", None), 

843 ("grid", None), 

844 ("legend", True), 

845 ("style", None), 

846 ("logx", False), 

847 ("logy", False), 

848 ("loglog", False), 

849 ("xticks", None), 

850 ("yticks", None), 

851 ("xlim", None), 

852 ("ylim", None), 

853 ("rot", None), 

854 ("fontsize", None), 

855 ("colormap", None), 

856 ("table", False), 

857 ("yerr", None), 

858 ("xerr", None), 

859 ("secondary_y", False), 

860 ("xlabel", None), 

861 ("ylabel", None), 

862 ] 

863 else: 

864 raise TypeError( 

865 f"Called plot accessor for type {type(data).__name__}, " 

866 "expected Series or DataFrame" 

867 ) 

868 

869 if args and isinstance(data, ABCSeries): 

870 positional_args = str(args)[1:-1] 

871 keyword_args = ", ".join( 

872 [f"{name}={repr(value)}" for (name, _), value in zip(arg_def, args)] 

873 ) 

874 msg = ( 

875 "`Series.plot()` should not be called with positional " 

876 "arguments, only keyword arguments. The order of " 

877 "positional arguments will change in the future. " 

878 f"Use `Series.plot({keyword_args})` instead of " 

879 f"`Series.plot({positional_args})`." 

880 ) 

881 raise TypeError(msg) 

882 

883 pos_args = {name: value for (name, _), value in zip(arg_def, args)} 

884 if backend_name == "pandas.plotting._matplotlib": 

885 kwargs = dict(arg_def, **pos_args, **kwargs) 

886 else: 

887 kwargs = dict(pos_args, **kwargs) 

888 

889 x = kwargs.pop("x", None) 

890 y = kwargs.pop("y", None) 

891 kind = kwargs.pop("kind", "line") 

892 return x, y, kind, kwargs 

893 

894 def __call__(self, *args, **kwargs): 

895 plot_backend = _get_plot_backend(kwargs.pop("backend", None)) 

896 

897 x, y, kind, kwargs = self._get_call_args( 

898 plot_backend.__name__, self._parent, args, kwargs 

899 ) 

900 

901 kind = self._kind_aliases.get(kind, kind) 

902 

903 # when using another backend, get out of the way 

904 if plot_backend.__name__ != "pandas.plotting._matplotlib": 

905 return plot_backend.plot(self._parent, x=x, y=y, kind=kind, **kwargs) 

906 

907 if kind not in self._all_kinds: 

908 raise ValueError(f"{kind} is not a valid plot kind") 

909 

910 # The original data structured can be transformed before passed to the 

911 # backend. For example, for DataFrame is common to set the index as the 

912 # `x` parameter, and return a Series with the parameter `y` as values. 

913 data = self._parent.copy() 

914 

915 if isinstance(data, ABCSeries): 

916 kwargs["reuse_plot"] = True 

917 

918 if kind in self._dataframe_kinds: 

919 if isinstance(data, ABCDataFrame): 

920 return plot_backend.plot(data, x=x, y=y, kind=kind, **kwargs) 

921 else: 

922 raise ValueError(f"plot kind {kind} can only be used for data frames") 

923 elif kind in self._series_kinds: 

924 if isinstance(data, ABCDataFrame): 

925 if y is None and kwargs.get("subplots") is False: 

926 raise ValueError( 

927 f"{kind} requires either y column or 'subplots=True'" 

928 ) 

929 if y is not None: 

930 if is_integer(y) and not data.columns._holds_integer(): 

931 y = data.columns[y] 

932 # converted to series actually. copy to not modify 

933 data = data[y].copy() 

934 data.index.name = y 

935 elif isinstance(data, ABCDataFrame): 

936 data_cols = data.columns 

937 if x is not None: 

938 if is_integer(x) and not data.columns._holds_integer(): 

939 x = data_cols[x] 

940 elif not isinstance(data[x], ABCSeries): 

941 raise ValueError("x must be a label or position") 

942 data = data.set_index(x) 

943 if y is not None: 

944 # check if we have y as int or list of ints 

945 int_ylist = is_list_like(y) and all(is_integer(c) for c in y) 

946 int_y_arg = is_integer(y) or int_ylist 

947 if int_y_arg and not data.columns._holds_integer(): 

948 y = data_cols[y] 

949 

950 label_kw = kwargs["label"] if "label" in kwargs else False 

951 for kw in ["xerr", "yerr"]: 

952 if kw in kwargs and ( 

953 isinstance(kwargs[kw], str) or is_integer(kwargs[kw]) 

954 ): 

955 try: 

956 kwargs[kw] = data[kwargs[kw]] 

957 except (IndexError, KeyError, TypeError): 

958 pass 

959 

960 # don't overwrite 

961 data = data[y].copy() 

962 

963 if isinstance(data, ABCSeries): 

964 label_name = label_kw or y 

965 data.name = label_name 

966 else: 

967 match = is_list_like(label_kw) and len(label_kw) == len(y) 

968 if label_kw and not match: 

969 raise ValueError( 

970 "label should be list-like and same length as y" 

971 ) 

972 label_name = label_kw or data.columns 

973 data.columns = label_name 

974 

975 return plot_backend.plot(data, kind=kind, **kwargs) 

976 

977 __call__.__doc__ = __doc__ 

978 

979 @Appender( 

980 """ 

981 See Also 

982 -------- 

983 matplotlib.pyplot.plot : Plot y versus x as lines and/or markers. 

984 

985 Examples 

986 -------- 

987 

988 .. plot:: 

989 :context: close-figs 

990 

991 >>> s = pd.Series([1, 3, 2]) 

992 >>> s.plot.line() 

993 <AxesSubplot: ylabel='Density'> 

994 

995 .. plot:: 

996 :context: close-figs 

997 

998 The following example shows the populations for some animals 

999 over the years. 

1000 

1001 >>> df = pd.DataFrame({ 

1002 ... 'pig': [20, 18, 489, 675, 1776], 

1003 ... 'horse': [4, 25, 281, 600, 1900] 

1004 ... }, index=[1990, 1997, 2003, 2009, 2014]) 

1005 >>> lines = df.plot.line() 

1006 

1007 .. plot:: 

1008 :context: close-figs 

1009 

1010 An example with subplots, so an array of axes is returned. 

1011 

1012 >>> axes = df.plot.line(subplots=True) 

1013 >>> type(axes) 

1014 <class 'numpy.ndarray'> 

1015 

1016 .. plot:: 

1017 :context: close-figs 

1018 

1019 Let's repeat the same example, but specifying colors for 

1020 each column (in this case, for each animal). 

1021 

1022 >>> axes = df.plot.line( 

1023 ... subplots=True, color={"pig": "pink", "horse": "#742802"} 

1024 ... ) 

1025 

1026 .. plot:: 

1027 :context: close-figs 

1028 

1029 The following example shows the relationship between both 

1030 populations. 

1031 

1032 >>> lines = df.plot.line(x='pig', y='horse') 

1033 """ 

1034 ) 

1035 @Substitution(kind="line") 

1036 @Appender(_bar_or_line_doc) 

1037 def line(self, x=None, y=None, **kwargs) -> PlotAccessor: 

1038 """ 

1039 Plot Series or DataFrame as lines. 

1040 

1041 This function is useful to plot lines using DataFrame's values 

1042 as coordinates. 

1043 """ 

1044 return self(kind="line", x=x, y=y, **kwargs) 

1045 

1046 @Appender( 

1047 """ 

1048 See Also 

1049 -------- 

1050 DataFrame.plot.barh : Horizontal bar plot. 

1051 DataFrame.plot : Make plots of a DataFrame. 

1052 matplotlib.pyplot.bar : Make a bar plot with matplotlib. 

1053 

1054 Examples 

1055 -------- 

1056 Basic plot. 

1057 

1058 .. plot:: 

1059 :context: close-figs 

1060 

1061 >>> df = pd.DataFrame({'lab':['A', 'B', 'C'], 'val':[10, 30, 20]}) 

1062 >>> ax = df.plot.bar(x='lab', y='val', rot=0) 

1063 

1064 Plot a whole dataframe to a bar plot. Each column is assigned a 

1065 distinct color, and each row is nested in a group along the 

1066 horizontal axis. 

1067 

1068 .. plot:: 

1069 :context: close-figs 

1070 

1071 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88] 

1072 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28] 

1073 >>> index = ['snail', 'pig', 'elephant', 

1074 ... 'rabbit', 'giraffe', 'coyote', 'horse'] 

1075 >>> df = pd.DataFrame({'speed': speed, 

1076 ... 'lifespan': lifespan}, index=index) 

1077 >>> ax = df.plot.bar(rot=0) 

1078 

1079 Plot stacked bar charts for the DataFrame 

1080 

1081 .. plot:: 

1082 :context: close-figs 

1083 

1084 >>> ax = df.plot.bar(stacked=True) 

1085 

1086 Instead of nesting, the figure can be split by column with 

1087 ``subplots=True``. In this case, a :class:`numpy.ndarray` of 

1088 :class:`matplotlib.axes.Axes` are returned. 

1089 

1090 .. plot:: 

1091 :context: close-figs 

1092 

1093 >>> axes = df.plot.bar(rot=0, subplots=True) 

1094 >>> axes[1].legend(loc=2) # doctest: +SKIP 

1095 

1096 If you don't like the default colours, you can specify how you'd 

1097 like each column to be colored. 

1098 

1099 .. plot:: 

1100 :context: close-figs 

1101 

1102 >>> axes = df.plot.bar( 

1103 ... rot=0, subplots=True, color={"speed": "red", "lifespan": "green"} 

1104 ... ) 

1105 >>> axes[1].legend(loc=2) # doctest: +SKIP 

1106 

1107 Plot a single column. 

1108 

1109 .. plot:: 

1110 :context: close-figs 

1111 

1112 >>> ax = df.plot.bar(y='speed', rot=0) 

1113 

1114 Plot only selected categories for the DataFrame. 

1115 

1116 .. plot:: 

1117 :context: close-figs 

1118 

1119 >>> ax = df.plot.bar(x='lifespan', rot=0) 

1120 """ 

1121 ) 

1122 @Substitution(kind="bar") 

1123 @Appender(_bar_or_line_doc) 

1124 def bar( # pylint: disable=disallowed-name 

1125 self, x=None, y=None, **kwargs 

1126 ) -> PlotAccessor: 

1127 """ 

1128 Vertical bar plot. 

1129 

1130 A bar plot is a plot that presents categorical data with 

1131 rectangular bars with lengths proportional to the values that they 

1132 represent. A bar plot shows comparisons among discrete categories. One 

1133 axis of the plot shows the specific categories being compared, and the 

1134 other axis represents a measured value. 

1135 """ 

1136 return self(kind="bar", x=x, y=y, **kwargs) 

1137 

1138 @Appender( 

1139 """ 

1140 See Also 

1141 -------- 

1142 DataFrame.plot.bar: Vertical bar plot. 

1143 DataFrame.plot : Make plots of DataFrame using matplotlib. 

1144 matplotlib.axes.Axes.bar : Plot a vertical bar plot using matplotlib. 

1145 

1146 Examples 

1147 -------- 

1148 Basic example 

1149 

1150 .. plot:: 

1151 :context: close-figs 

1152 

1153 >>> df = pd.DataFrame({'lab': ['A', 'B', 'C'], 'val': [10, 30, 20]}) 

1154 >>> ax = df.plot.barh(x='lab', y='val') 

1155 

1156 Plot a whole DataFrame to a horizontal bar plot 

1157 

1158 .. plot:: 

1159 :context: close-figs 

1160 

1161 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88] 

1162 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28] 

1163 >>> index = ['snail', 'pig', 'elephant', 

1164 ... 'rabbit', 'giraffe', 'coyote', 'horse'] 

1165 >>> df = pd.DataFrame({'speed': speed, 

1166 ... 'lifespan': lifespan}, index=index) 

1167 >>> ax = df.plot.barh() 

1168 

1169 Plot stacked barh charts for the DataFrame 

1170 

1171 .. plot:: 

1172 :context: close-figs 

1173 

1174 >>> ax = df.plot.barh(stacked=True) 

1175 

1176 We can specify colors for each column 

1177 

1178 .. plot:: 

1179 :context: close-figs 

1180 

1181 >>> ax = df.plot.barh(color={"speed": "red", "lifespan": "green"}) 

1182 

1183 Plot a column of the DataFrame to a horizontal bar plot 

1184 

1185 .. plot:: 

1186 :context: close-figs 

1187 

1188 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88] 

1189 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28] 

1190 >>> index = ['snail', 'pig', 'elephant', 

1191 ... 'rabbit', 'giraffe', 'coyote', 'horse'] 

1192 >>> df = pd.DataFrame({'speed': speed, 

1193 ... 'lifespan': lifespan}, index=index) 

1194 >>> ax = df.plot.barh(y='speed') 

1195 

1196 Plot DataFrame versus the desired column 

1197 

1198 .. plot:: 

1199 :context: close-figs 

1200 

1201 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88] 

1202 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28] 

1203 >>> index = ['snail', 'pig', 'elephant', 

1204 ... 'rabbit', 'giraffe', 'coyote', 'horse'] 

1205 >>> df = pd.DataFrame({'speed': speed, 

1206 ... 'lifespan': lifespan}, index=index) 

1207 >>> ax = df.plot.barh(x='lifespan') 

1208 """ 

1209 ) 

1210 @Substitution(kind="bar") 

1211 @Appender(_bar_or_line_doc) 

1212 def barh(self, x=None, y=None, **kwargs) -> PlotAccessor: 

1213 """ 

1214 Make a horizontal bar plot. 

1215 

1216 A horizontal bar plot is a plot that presents quantitative data with 

1217 rectangular bars with lengths proportional to the values that they 

1218 represent. A bar plot shows comparisons among discrete categories. One 

1219 axis of the plot shows the specific categories being compared, and the 

1220 other axis represents a measured value. 

1221 """ 

1222 return self(kind="barh", x=x, y=y, **kwargs) 

1223 

1224 def box(self, by=None, **kwargs) -> PlotAccessor: 

1225 r""" 

1226 Make a box plot of the DataFrame columns. 

1227 

1228 A box plot is a method for graphically depicting groups of numerical 

1229 data through their quartiles. 

1230 The box extends from the Q1 to Q3 quartile values of the data, 

1231 with a line at the median (Q2). The whiskers extend from the edges 

1232 of box to show the range of the data. The position of the whiskers 

1233 is set by default to 1.5*IQR (IQR = Q3 - Q1) from the edges of the 

1234 box. Outlier points are those past the end of the whiskers. 

1235 

1236 For further details see Wikipedia's 

1237 entry for `boxplot <https://en.wikipedia.org/wiki/Box_plot>`__. 

1238 

1239 A consideration when using this chart is that the box and the whiskers 

1240 can overlap, which is very common when plotting small sets of data. 

1241 

1242 Parameters 

1243 ---------- 

1244 by : str or sequence 

1245 Column in the DataFrame to group by. 

1246 

1247 .. versionchanged:: 1.4.0 

1248 

1249 Previously, `by` is silently ignore and makes no groupings 

1250 

1251 **kwargs 

1252 Additional keywords are documented in 

1253 :meth:`DataFrame.plot`. 

1254 

1255 Returns 

1256 ------- 

1257 :class:`matplotlib.axes.Axes` or numpy.ndarray of them 

1258 

1259 See Also 

1260 -------- 

1261 DataFrame.boxplot: Another method to draw a box plot. 

1262 Series.plot.box: Draw a box plot from a Series object. 

1263 matplotlib.pyplot.boxplot: Draw a box plot in matplotlib. 

1264 

1265 Examples 

1266 -------- 

1267 Draw a box plot from a DataFrame with four columns of randomly 

1268 generated data. 

1269 

1270 .. plot:: 

1271 :context: close-figs 

1272 

1273 >>> data = np.random.randn(25, 4) 

1274 >>> df = pd.DataFrame(data, columns=list('ABCD')) 

1275 >>> ax = df.plot.box() 

1276 

1277 You can also generate groupings if you specify the `by` parameter (which 

1278 can take a column name, or a list or tuple of column names): 

1279 

1280 .. versionchanged:: 1.4.0 

1281 

1282 .. plot:: 

1283 :context: close-figs 

1284 

1285 >>> age_list = [8, 10, 12, 14, 72, 74, 76, 78, 20, 25, 30, 35, 60, 85] 

1286 >>> df = pd.DataFrame({"gender": list("MMMMMMMMFFFFFF"), "age": age_list}) 

1287 >>> ax = df.plot.box(column="age", by="gender", figsize=(10, 8)) 

1288 """ 

1289 return self(kind="box", by=by, **kwargs) 

1290 

1291 def hist(self, by=None, bins: int = 10, **kwargs) -> PlotAccessor: 

1292 """ 

1293 Draw one histogram of the DataFrame's columns. 

1294 

1295 A histogram is a representation of the distribution of data. 

1296 This function groups the values of all given Series in the DataFrame 

1297 into bins and draws all bins in one :class:`matplotlib.axes.Axes`. 

1298 This is useful when the DataFrame's Series are in a similar scale. 

1299 

1300 Parameters 

1301 ---------- 

1302 by : str or sequence, optional 

1303 Column in the DataFrame to group by. 

1304 

1305 .. versionchanged:: 1.4.0 

1306 

1307 Previously, `by` is silently ignore and makes no groupings 

1308 

1309 bins : int, default 10 

1310 Number of histogram bins to be used. 

1311 **kwargs 

1312 Additional keyword arguments are documented in 

1313 :meth:`DataFrame.plot`. 

1314 

1315 Returns 

1316 ------- 

1317 class:`matplotlib.AxesSubplot` 

1318 Return a histogram plot. 

1319 

1320 See Also 

1321 -------- 

1322 DataFrame.hist : Draw histograms per DataFrame's Series. 

1323 Series.hist : Draw a histogram with Series' data. 

1324 

1325 Examples 

1326 -------- 

1327 When we roll a die 6000 times, we expect to get each value around 1000 

1328 times. But when we roll two dice and sum the result, the distribution 

1329 is going to be quite different. A histogram illustrates those 

1330 distributions. 

1331 

1332 .. plot:: 

1333 :context: close-figs 

1334 

1335 >>> df = pd.DataFrame( 

1336 ... np.random.randint(1, 7, 6000), 

1337 ... columns = ['one']) 

1338 >>> df['two'] = df['one'] + np.random.randint(1, 7, 6000) 

1339 >>> ax = df.plot.hist(bins=12, alpha=0.5) 

1340 

1341 A grouped histogram can be generated by providing the parameter `by` (which 

1342 can be a column name, or a list of column names): 

1343 

1344 .. plot:: 

1345 :context: close-figs 

1346 

1347 >>> age_list = [8, 10, 12, 14, 72, 74, 76, 78, 20, 25, 30, 35, 60, 85] 

1348 >>> df = pd.DataFrame({"gender": list("MMMMMMMMFFFFFF"), "age": age_list}) 

1349 >>> ax = df.plot.hist(column=["age"], by="gender", figsize=(10, 8)) 

1350 """ 

1351 return self(kind="hist", by=by, bins=bins, **kwargs) 

1352 

1353 def kde(self, bw_method=None, ind=None, **kwargs) -> PlotAccessor: 

1354 """ 

1355 Generate Kernel Density Estimate plot using Gaussian kernels. 

1356 

1357 In statistics, `kernel density estimation`_ (KDE) is a non-parametric 

1358 way to estimate the probability density function (PDF) of a random 

1359 variable. This function uses Gaussian kernels and includes automatic 

1360 bandwidth determination. 

1361 

1362 .. _kernel density estimation: 

1363 https://en.wikipedia.org/wiki/Kernel_density_estimation 

1364 

1365 Parameters 

1366 ---------- 

1367 bw_method : str, scalar or callable, optional 

1368 The method used to calculate the estimator bandwidth. This can be 

1369 'scott', 'silverman', a scalar constant or a callable. 

1370 If None (default), 'scott' is used. 

1371 See :class:`scipy.stats.gaussian_kde` for more information. 

1372 ind : NumPy array or int, optional 

1373 Evaluation points for the estimated PDF. If None (default), 

1374 1000 equally spaced points are used. If `ind` is a NumPy array, the 

1375 KDE is evaluated at the points passed. If `ind` is an integer, 

1376 `ind` number of equally spaced points are used. 

1377 **kwargs 

1378 Additional keyword arguments are documented in 

1379 :meth:`DataFrame.plot`. 

1380 

1381 Returns 

1382 ------- 

1383 matplotlib.axes.Axes or numpy.ndarray of them 

1384 

1385 See Also 

1386 -------- 

1387 scipy.stats.gaussian_kde : Representation of a kernel-density 

1388 estimate using Gaussian kernels. This is the function used 

1389 internally to estimate the PDF. 

1390 

1391 Examples 

1392 -------- 

1393 Given a Series of points randomly sampled from an unknown 

1394 distribution, estimate its PDF using KDE with automatic 

1395 bandwidth determination and plot the results, evaluating them at 

1396 1000 equally spaced points (default): 

1397 

1398 .. plot:: 

1399 :context: close-figs 

1400 

1401 >>> s = pd.Series([1, 2, 2.5, 3, 3.5, 4, 5]) 

1402 >>> ax = s.plot.kde() 

1403 

1404 A scalar bandwidth can be specified. Using a small bandwidth value can 

1405 lead to over-fitting, while using a large bandwidth value may result 

1406 in under-fitting: 

1407 

1408 .. plot:: 

1409 :context: close-figs 

1410 

1411 >>> ax = s.plot.kde(bw_method=0.3) 

1412 

1413 .. plot:: 

1414 :context: close-figs 

1415 

1416 >>> ax = s.plot.kde(bw_method=3) 

1417 

1418 Finally, the `ind` parameter determines the evaluation points for the 

1419 plot of the estimated PDF: 

1420 

1421 .. plot:: 

1422 :context: close-figs 

1423 

1424 >>> ax = s.plot.kde(ind=[1, 2, 3, 4, 5]) 

1425 

1426 For DataFrame, it works in the same way: 

1427 

1428 .. plot:: 

1429 :context: close-figs 

1430 

1431 >>> df = pd.DataFrame({ 

1432 ... 'x': [1, 2, 2.5, 3, 3.5, 4, 5], 

1433 ... 'y': [4, 4, 4.5, 5, 5.5, 6, 6], 

1434 ... }) 

1435 >>> ax = df.plot.kde() 

1436 

1437 A scalar bandwidth can be specified. Using a small bandwidth value can 

1438 lead to over-fitting, while using a large bandwidth value may result 

1439 in under-fitting: 

1440 

1441 .. plot:: 

1442 :context: close-figs 

1443 

1444 >>> ax = df.plot.kde(bw_method=0.3) 

1445 

1446 .. plot:: 

1447 :context: close-figs 

1448 

1449 >>> ax = df.plot.kde(bw_method=3) 

1450 

1451 Finally, the `ind` parameter determines the evaluation points for the 

1452 plot of the estimated PDF: 

1453 

1454 .. plot:: 

1455 :context: close-figs 

1456 

1457 >>> ax = df.plot.kde(ind=[1, 2, 3, 4, 5, 6]) 

1458 """ 

1459 return self(kind="kde", bw_method=bw_method, ind=ind, **kwargs) 

1460 

1461 density = kde 

1462 

1463 def area(self, x=None, y=None, stacked: bool = True, **kwargs) -> PlotAccessor: 

1464 """ 

1465 Draw a stacked area plot. 

1466 

1467 An area plot displays quantitative data visually. 

1468 This function wraps the matplotlib area function. 

1469 

1470 Parameters 

1471 ---------- 

1472 x : label or position, optional 

1473 Coordinates for the X axis. By default uses the index. 

1474 y : label or position, optional 

1475 Column to plot. By default uses all columns. 

1476 stacked : bool, default True 

1477 Area plots are stacked by default. Set to False to create a 

1478 unstacked plot. 

1479 **kwargs 

1480 Additional keyword arguments are documented in 

1481 :meth:`DataFrame.plot`. 

1482 

1483 Returns 

1484 ------- 

1485 matplotlib.axes.Axes or numpy.ndarray 

1486 Area plot, or array of area plots if subplots is True. 

1487 

1488 See Also 

1489 -------- 

1490 DataFrame.plot : Make plots of DataFrame using matplotlib / pylab. 

1491 

1492 Examples 

1493 -------- 

1494 Draw an area plot based on basic business metrics: 

1495 

1496 .. plot:: 

1497 :context: close-figs 

1498 

1499 >>> df = pd.DataFrame({ 

1500 ... 'sales': [3, 2, 3, 9, 10, 6], 

1501 ... 'signups': [5, 5, 6, 12, 14, 13], 

1502 ... 'visits': [20, 42, 28, 62, 81, 50], 

1503 ... }, index=pd.date_range(start='2018/01/01', end='2018/07/01', 

1504 ... freq='M')) 

1505 >>> ax = df.plot.area() 

1506 

1507 Area plots are stacked by default. To produce an unstacked plot, 

1508 pass ``stacked=False``: 

1509 

1510 .. plot:: 

1511 :context: close-figs 

1512 

1513 >>> ax = df.plot.area(stacked=False) 

1514 

1515 Draw an area plot for a single column: 

1516 

1517 .. plot:: 

1518 :context: close-figs 

1519 

1520 >>> ax = df.plot.area(y='sales') 

1521 

1522 Draw with a different `x`: 

1523 

1524 .. plot:: 

1525 :context: close-figs 

1526 

1527 >>> df = pd.DataFrame({ 

1528 ... 'sales': [3, 2, 3], 

1529 ... 'visits': [20, 42, 28], 

1530 ... 'day': [1, 2, 3], 

1531 ... }) 

1532 >>> ax = df.plot.area(x='day') 

1533 """ 

1534 return self(kind="area", x=x, y=y, stacked=stacked, **kwargs) 

1535 

1536 def pie(self, **kwargs) -> PlotAccessor: 

1537 """ 

1538 Generate a pie plot. 

1539 

1540 A pie plot is a proportional representation of the numerical data in a 

1541 column. This function wraps :meth:`matplotlib.pyplot.pie` for the 

1542 specified column. If no column reference is passed and 

1543 ``subplots=True`` a pie plot is drawn for each numerical column 

1544 independently. 

1545 

1546 Parameters 

1547 ---------- 

1548 y : int or label, optional 

1549 Label or position of the column to plot. 

1550 If not provided, ``subplots=True`` argument must be passed. 

1551 **kwargs 

1552 Keyword arguments to pass on to :meth:`DataFrame.plot`. 

1553 

1554 Returns 

1555 ------- 

1556 matplotlib.axes.Axes or np.ndarray of them 

1557 A NumPy array is returned when `subplots` is True. 

1558 

1559 See Also 

1560 -------- 

1561 Series.plot.pie : Generate a pie plot for a Series. 

1562 DataFrame.plot : Make plots of a DataFrame. 

1563 

1564 Examples 

1565 -------- 

1566 In the example below we have a DataFrame with the information about 

1567 planet's mass and radius. We pass the 'mass' column to the 

1568 pie function to get a pie plot. 

1569 

1570 .. plot:: 

1571 :context: close-figs 

1572 

1573 >>> df = pd.DataFrame({'mass': [0.330, 4.87 , 5.97], 

1574 ... 'radius': [2439.7, 6051.8, 6378.1]}, 

1575 ... index=['Mercury', 'Venus', 'Earth']) 

1576 >>> plot = df.plot.pie(y='mass', figsize=(5, 5)) 

1577 

1578 .. plot:: 

1579 :context: close-figs 

1580 

1581 >>> plot = df.plot.pie(subplots=True, figsize=(11, 6)) 

1582 """ 

1583 if ( 

1584 isinstance(self._parent, ABCDataFrame) 

1585 and kwargs.get("y", None) is None 

1586 and not kwargs.get("subplots", False) 

1587 ): 

1588 raise ValueError("pie requires either y column or 'subplots=True'") 

1589 return self(kind="pie", **kwargs) 

1590 

1591 def scatter(self, x, y, s=None, c=None, **kwargs) -> PlotAccessor: 

1592 """ 

1593 Create a scatter plot with varying marker point size and color. 

1594 

1595 The coordinates of each point are defined by two dataframe columns and 

1596 filled circles are used to represent each point. This kind of plot is 

1597 useful to see complex correlations between two variables. Points could 

1598 be for instance natural 2D coordinates like longitude and latitude in 

1599 a map or, in general, any pair of metrics that can be plotted against 

1600 each other. 

1601 

1602 Parameters 

1603 ---------- 

1604 x : int or str 

1605 The column name or column position to be used as horizontal 

1606 coordinates for each point. 

1607 y : int or str 

1608 The column name or column position to be used as vertical 

1609 coordinates for each point. 

1610 s : str, scalar or array-like, optional 

1611 The size of each point. Possible values are: 

1612 

1613 - A string with the name of the column to be used for marker's size. 

1614 

1615 - A single scalar so all points have the same size. 

1616 

1617 - A sequence of scalars, which will be used for each point's size 

1618 recursively. For instance, when passing [2,14] all points size 

1619 will be either 2 or 14, alternatively. 

1620 

1621 .. versionchanged:: 1.1.0 

1622 

1623 c : str, int or array-like, optional 

1624 The color of each point. Possible values are: 

1625 

1626 - A single color string referred to by name, RGB or RGBA code, 

1627 for instance 'red' or '#a98d19'. 

1628 

1629 - A sequence of color strings referred to by name, RGB or RGBA 

1630 code, which will be used for each point's color recursively. For 

1631 instance ['green','yellow'] all points will be filled in green or 

1632 yellow, alternatively. 

1633 

1634 - A column name or position whose values will be used to color the 

1635 marker points according to a colormap. 

1636 

1637 **kwargs 

1638 Keyword arguments to pass on to :meth:`DataFrame.plot`. 

1639 

1640 Returns 

1641 ------- 

1642 :class:`matplotlib.axes.Axes` or numpy.ndarray of them 

1643 

1644 See Also 

1645 -------- 

1646 matplotlib.pyplot.scatter : Scatter plot using multiple input data 

1647 formats. 

1648 

1649 Examples 

1650 -------- 

1651 Let's see how to draw a scatter plot using coordinates from the values 

1652 in a DataFrame's columns. 

1653 

1654 .. plot:: 

1655 :context: close-figs 

1656 

1657 >>> df = pd.DataFrame([[5.1, 3.5, 0], [4.9, 3.0, 0], [7.0, 3.2, 1], 

1658 ... [6.4, 3.2, 1], [5.9, 3.0, 2]], 

1659 ... columns=['length', 'width', 'species']) 

1660 >>> ax1 = df.plot.scatter(x='length', 

1661 ... y='width', 

1662 ... c='DarkBlue') 

1663 

1664 And now with the color determined by a column as well. 

1665 

1666 .. plot:: 

1667 :context: close-figs 

1668 

1669 >>> ax2 = df.plot.scatter(x='length', 

1670 ... y='width', 

1671 ... c='species', 

1672 ... colormap='viridis') 

1673 """ 

1674 return self(kind="scatter", x=x, y=y, s=s, c=c, **kwargs) 

1675 

1676 def hexbin( 

1677 self, x, y, C=None, reduce_C_function=None, gridsize=None, **kwargs 

1678 ) -> PlotAccessor: 

1679 """ 

1680 Generate a hexagonal binning plot. 

1681 

1682 Generate a hexagonal binning plot of `x` versus `y`. If `C` is `None` 

1683 (the default), this is a histogram of the number of occurrences 

1684 of the observations at ``(x[i], y[i])``. 

1685 

1686 If `C` is specified, specifies values at given coordinates 

1687 ``(x[i], y[i])``. These values are accumulated for each hexagonal 

1688 bin and then reduced according to `reduce_C_function`, 

1689 having as default the NumPy's mean function (:meth:`numpy.mean`). 

1690 (If `C` is specified, it must also be a 1-D sequence 

1691 of the same length as `x` and `y`, or a column label.) 

1692 

1693 Parameters 

1694 ---------- 

1695 x : int or str 

1696 The column label or position for x points. 

1697 y : int or str 

1698 The column label or position for y points. 

1699 C : int or str, optional 

1700 The column label or position for the value of `(x, y)` point. 

1701 reduce_C_function : callable, default `np.mean` 

1702 Function of one argument that reduces all the values in a bin to 

1703 a single number (e.g. `np.mean`, `np.max`, `np.sum`, `np.std`). 

1704 gridsize : int or tuple of (int, int), default 100 

1705 The number of hexagons in the x-direction. 

1706 The corresponding number of hexagons in the y-direction is 

1707 chosen in a way that the hexagons are approximately regular. 

1708 Alternatively, gridsize can be a tuple with two elements 

1709 specifying the number of hexagons in the x-direction and the 

1710 y-direction. 

1711 **kwargs 

1712 Additional keyword arguments are documented in 

1713 :meth:`DataFrame.plot`. 

1714 

1715 Returns 

1716 ------- 

1717 matplotlib.AxesSubplot 

1718 The matplotlib ``Axes`` on which the hexbin is plotted. 

1719 

1720 See Also 

1721 -------- 

1722 DataFrame.plot : Make plots of a DataFrame. 

1723 matplotlib.pyplot.hexbin : Hexagonal binning plot using matplotlib, 

1724 the matplotlib function that is used under the hood. 

1725 

1726 Examples 

1727 -------- 

1728 The following examples are generated with random data from 

1729 a normal distribution. 

1730 

1731 .. plot:: 

1732 :context: close-figs 

1733 

1734 >>> n = 10000 

1735 >>> df = pd.DataFrame({'x': np.random.randn(n), 

1736 ... 'y': np.random.randn(n)}) 

1737 >>> ax = df.plot.hexbin(x='x', y='y', gridsize=20) 

1738 

1739 The next example uses `C` and `np.sum` as `reduce_C_function`. 

1740 Note that `'observations'` values ranges from 1 to 5 but the result 

1741 plot shows values up to more than 25. This is because of the 

1742 `reduce_C_function`. 

1743 

1744 .. plot:: 

1745 :context: close-figs 

1746 

1747 >>> n = 500 

1748 >>> df = pd.DataFrame({ 

1749 ... 'coord_x': np.random.uniform(-3, 3, size=n), 

1750 ... 'coord_y': np.random.uniform(30, 50, size=n), 

1751 ... 'observations': np.random.randint(1,5, size=n) 

1752 ... }) 

1753 >>> ax = df.plot.hexbin(x='coord_x', 

1754 ... y='coord_y', 

1755 ... C='observations', 

1756 ... reduce_C_function=np.sum, 

1757 ... gridsize=10, 

1758 ... cmap="viridis") 

1759 """ 

1760 if reduce_C_function is not None: 

1761 kwargs["reduce_C_function"] = reduce_C_function 

1762 if gridsize is not None: 

1763 kwargs["gridsize"] = gridsize 

1764 

1765 return self(kind="hexbin", x=x, y=y, C=C, **kwargs) 

1766 

1767 

1768_backends: dict[str, types.ModuleType] = {} 

1769 

1770 

1771def _load_backend(backend: str) -> types.ModuleType: 

1772 """ 

1773 Load a pandas plotting backend. 

1774 

1775 Parameters 

1776 ---------- 

1777 backend : str 

1778 The identifier for the backend. Either an entrypoint item registered 

1779 with importlib.metadata, "matplotlib", or a module name. 

1780 

1781 Returns 

1782 ------- 

1783 types.ModuleType 

1784 The imported backend. 

1785 """ 

1786 from importlib.metadata import entry_points 

1787 

1788 if backend == "matplotlib": 

1789 # Because matplotlib is an optional dependency and first-party backend, 

1790 # we need to attempt an import here to raise an ImportError if needed. 

1791 try: 

1792 module = importlib.import_module("pandas.plotting._matplotlib") 

1793 except ImportError: 

1794 raise ImportError( 

1795 "matplotlib is required for plotting when the " 

1796 'default backend "matplotlib" is selected.' 

1797 ) from None 

1798 return module 

1799 

1800 found_backend = False 

1801 

1802 eps = entry_points() 

1803 key = "pandas_plotting_backends" 

1804 # entry_points lost dict API ~ PY 3.10 

1805 # https://github.com/python/importlib_metadata/issues/298 

1806 if hasattr(eps, "select"): 

1807 entry = eps.select(group=key) # pyright: ignore[reportGeneralTypeIssues] 

1808 else: 

1809 entry = eps.get(key, ()) 

1810 for entry_point in entry: 

1811 found_backend = entry_point.name == backend 

1812 if found_backend: 

1813 module = entry_point.load() 

1814 break 

1815 

1816 if not found_backend: 

1817 # Fall back to unregistered, module name approach. 

1818 try: 

1819 module = importlib.import_module(backend) 

1820 found_backend = True 

1821 except ImportError: 

1822 # We re-raise later on. 

1823 pass 

1824 

1825 if found_backend: 

1826 if hasattr(module, "plot"): 

1827 # Validate that the interface is implemented when the option is set, 

1828 # rather than at plot time. 

1829 return module 

1830 

1831 raise ValueError( 

1832 f"Could not find plotting backend '{backend}'. Ensure that you've " 

1833 f"installed the package providing the '{backend}' entrypoint, or that " 

1834 "the package has a top-level `.plot` method." 

1835 ) 

1836 

1837 

1838def _get_plot_backend(backend: str | None = None): 

1839 """ 

1840 Return the plotting backend to use (e.g. `pandas.plotting._matplotlib`). 

1841 

1842 The plotting system of pandas uses matplotlib by default, but the idea here 

1843 is that it can also work with other third-party backends. This function 

1844 returns the module which provides a top-level `.plot` method that will 

1845 actually do the plotting. The backend is specified from a string, which 

1846 either comes from the keyword argument `backend`, or, if not specified, from 

1847 the option `pandas.options.plotting.backend`. All the rest of the code in 

1848 this file uses the backend specified there for the plotting. 

1849 

1850 The backend is imported lazily, as matplotlib is a soft dependency, and 

1851 pandas can be used without it being installed. 

1852 

1853 Notes 

1854 ----- 

1855 Modifies `_backends` with imported backend as a side effect. 

1856 """ 

1857 backend_str: str = backend or get_option("plotting.backend") 

1858 

1859 if backend_str in _backends: 

1860 return _backends[backend_str] 

1861 

1862 module = _load_backend(backend_str) 

1863 _backends[backend_str] = module 

1864 return module