Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/plotting/

1from __future__ import annotations

3import importlib

4from typing import (

5 TYPE_CHECKING,

6 Callable,

7 Literal,

10from pandas._config import get_option

12from pandas.util._decorators import (

13 Appender,

14 Substitution,

15)

17from pandas.core.dtypes.common import (

18 is_integer,

19 is_list_like,

20)

21from pandas.core.dtypes.generic import (

22 ABCDataFrame,

23 ABCSeries,

24)

26from pandas.core.base import PandasObject

28if TYPE_CHECKING:

29 from collections.abc import (

30 Hashable,

31 Sequence,

32 )

33 import types

35 from matplotlib.axes import Axes

36 import numpy as np

38 from pandas._typing import IndexLabel

40 from pandas import (

41 DataFrame,

42 Series,

43 )

44 from pandas.core.groupby.generic import DataFrameGroupBy

47def hist_series(

48 self: Series,

49 by=None,

50 ax=None,

51 grid: bool = True,

52 xlabelsize: int | None = None,

53 xrot: float | None = None,

54 ylabelsize: int | None = None,

55 yrot: float | None = None,

56 figsize: tuple[int, int] | None = None,

57 bins: int | Sequence[int] = 10,

58 backend: str | None = None,

59 legend: bool = False,

60 **kwargs,

61):

62 """

63 Draw histogram of the input series using matplotlib.

65 Parameters

66 ----------

67 by : object, optional

68 If passed, then used to form histograms for separate groups.

69 ax : matplotlib axis object

70 If not passed, uses gca().

71 grid : bool, default True

72 Whether to show axis grid lines.

73 xlabelsize : int, default None

74 If specified changes the x-axis label size.

75 xrot : float, default None

76 Rotation of x axis labels.

77 ylabelsize : int, default None

78 If specified changes the y-axis label size.

79 yrot : float, default None

80 Rotation of y axis labels.

81 figsize : tuple, default None

82 Figure size in inches by default.

83 bins : int or sequence, default 10

84 Number of histogram bins to be used. If an integer is given, bins + 1

85 bin edges are calculated and returned. If bins is a sequence, gives

86 bin edges, including left edge of first bin and right edge of last

87 bin. In this case, bins is returned unmodified.

88 backend : str, default None

89 Backend to use instead of the backend specified in the option

90 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to

91 specify the ``plotting.backend`` for the whole session, set

92 ``pd.options.plotting.backend``.

93 legend : bool, default False

94 Whether to show the legend.

96 **kwargs

97 To be passed to the actual plotting function.

99 Returns

100 -------

101 matplotlib.AxesSubplot

102 A histogram plot.

103

104 See Also

105 --------

106 matplotlib.axes.Axes.hist : Plot a histogram using matplotlib.

107

108 Examples

109 --------

110 For Series:

111

112 .. plot::

113 :context: close-figs

114

115 >>> lst = ['a', 'a', 'a', 'b', 'b', 'b']

116 >>> ser = pd.Series([1, 2, 2, 4, 6, 6], index=lst)

117 >>> hist = ser.hist()

118

119 For Groupby:

120

121 .. plot::

122 :context: close-figs

123

124 >>> lst = ['a', 'a', 'a', 'b', 'b', 'b']

125 >>> ser = pd.Series([1, 2, 2, 4, 6, 6], index=lst)

126 >>> hist = ser.groupby(level=0).hist()

127 """

128 plot_backend = _get_plot_backend(backend)

129 return plot_backend.hist_series(

130 self,

131 by=by,

132 ax=ax,

133 grid=grid,

134 xlabelsize=xlabelsize,

135 xrot=xrot,

136 ylabelsize=ylabelsize,

137 yrot=yrot,

138 figsize=figsize,

139 bins=bins,

140 legend=legend,

141 **kwargs,

142 )

143

144

145def hist_frame(

146 data: DataFrame,

147 column: IndexLabel | None = None,

148 by=None,

149 grid: bool = True,

150 xlabelsize: int | None = None,

151 xrot: float | None = None,

152 ylabelsize: int | None = None,

153 yrot: float | None = None,

154 ax=None,

155 sharex: bool = False,

156 sharey: bool = False,

157 figsize: tuple[int, int] | None = None,

158 layout: tuple[int, int] | None = None,

159 bins: int | Sequence[int] = 10,

160 backend: str | None = None,

161 legend: bool = False,

162 **kwargs,

163):

164 """

165 Make a histogram of the DataFrame's columns.

166

167 A `histogram`_ is a representation of the distribution of data.

168 This function calls :meth:`matplotlib.pyplot.hist`, on each series in

169 the DataFrame, resulting in one histogram per column.

170

171 .. _histogram: https://en.wikipedia.org/wiki/Histogram

172

173 Parameters

174 ----------

175 data : DataFrame

176 The pandas object holding the data.

177 column : str or sequence, optional

178 If passed, will be used to limit data to a subset of columns.

179 by : object, optional

180 If passed, then used to form histograms for separate groups.

181 grid : bool, default True

182 Whether to show axis grid lines.

183 xlabelsize : int, default None

184 If specified changes the x-axis label size.

185 xrot : float, default None

186 Rotation of x axis labels. For example, a value of 90 displays the

187 x labels rotated 90 degrees clockwise.

188 ylabelsize : int, default None

189 If specified changes the y-axis label size.

190 yrot : float, default None

191 Rotation of y axis labels. For example, a value of 90 displays the

192 y labels rotated 90 degrees clockwise.

193 ax : Matplotlib axes object, default None

194 The axes to plot the histogram on.

195 sharex : bool, default True if ax is None else False

196 In case subplots=True, share x axis and set some x axis labels to

197 invisible; defaults to True if ax is None otherwise False if an ax

198 is passed in.

199 Note that passing in both an ax and sharex=True will alter all x axis

200 labels for all subplots in a figure.

201 sharey : bool, default False

202 In case subplots=True, share y axis and set some y axis labels to

203 invisible.

204 figsize : tuple, optional

205 The size in inches of the figure to create. Uses the value in

206 `matplotlib.rcParams` by default.

207 layout : tuple, optional

208 Tuple of (rows, columns) for the layout of the histograms.

209 bins : int or sequence, default 10

210 Number of histogram bins to be used. If an integer is given, bins + 1

211 bin edges are calculated and returned. If bins is a sequence, gives

212 bin edges, including left edge of first bin and right edge of last

213 bin. In this case, bins is returned unmodified.

214

215 backend : str, default None

216 Backend to use instead of the backend specified in the option

217 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to

218 specify the ``plotting.backend`` for the whole session, set

219 ``pd.options.plotting.backend``.

220

221 legend : bool, default False

222 Whether to show the legend.

223

224 **kwargs

225 All other plotting keyword arguments to be passed to

226 :meth:`matplotlib.pyplot.hist`.

227

228 Returns

229 -------

230 matplotlib.AxesSubplot or numpy.ndarray of them

231

232 See Also

233 --------

234 matplotlib.pyplot.hist : Plot a histogram using matplotlib.

235

236 Examples

237 --------

238 This example draws a histogram based on the length and width of

239 some animals, displayed in three bins

240

241 .. plot::

242 :context: close-figs

243

244 >>> data = {'length': [1.5, 0.5, 1.2, 0.9, 3],

245 ... 'width': [0.7, 0.2, 0.15, 0.2, 1.1]}

246 >>> index = ['pig', 'rabbit', 'duck', 'chicken', 'horse']

247 >>> df = pd.DataFrame(data, index=index)

248 >>> hist = df.hist(bins=3)

249 """

250 plot_backend = _get_plot_backend(backend)

251 return plot_backend.hist_frame(

252 data,

253 column=column,

254 by=by,

255 grid=grid,

256 xlabelsize=xlabelsize,

257 xrot=xrot,

258 ylabelsize=ylabelsize,

259 yrot=yrot,

260 ax=ax,

261 sharex=sharex,

262 sharey=sharey,

263 figsize=figsize,

264 layout=layout,

265 legend=legend,

266 bins=bins,

267 **kwargs,

268 )

269

270

271_boxplot_doc = """

272Make a box plot from DataFrame columns.

273

274Make a box-and-whisker plot from DataFrame columns, optionally grouped

275by some other columns. A box plot is a method for graphically depicting

276groups of numerical data through their quartiles.

277The box extends from the Q1 to Q3 quartile values of the data,

278with a line at the median (Q2). The whiskers extend from the edges

279of box to show the range of the data. By default, they extend no more than

280`1.5 * IQR (IQR = Q3 - Q1)` from the edges of the box, ending at the farthest

281data point within that interval. Outliers are plotted as separate dots.

282

283For further details see

284Wikipedia's entry for `boxplot <https://en.wikipedia.org/wiki/Box_plot>`_.

285

286Parameters

287----------

288%(data)s\

289column : str or list of str, optional

290 Column name or list of names, or vector.

291 Can be any valid input to :meth:`pandas.DataFrame.groupby`.

292by : str or array-like, optional

293 Column in the DataFrame to :meth:`pandas.DataFrame.groupby`.

294 One box-plot will be done per value of columns in `by`.

295ax : object of class matplotlib.axes.Axes, optional

296 The matplotlib axes to be used by boxplot.

297fontsize : float or str

298 Tick label font size in points or as a string (e.g., `large`).

299rot : float, default 0

300 The rotation angle of labels (in degrees)

301 with respect to the screen coordinate system.

302grid : bool, default True

303 Setting this to True will show the grid.

304figsize : A tuple (width, height) in inches

305 The size of the figure to create in matplotlib.

306layout : tuple (rows, columns), optional

307 For example, (3, 5) will display the subplots

308 using 3 rows and 5 columns, starting from the top-left.

309return_type : {'axes', 'dict', 'both'} or None, default 'axes'

310 The kind of object to return. The default is ``axes``.

311

312 * 'axes' returns the matplotlib axes the boxplot is drawn on.

313 * 'dict' returns a dictionary whose values are the matplotlib

314 Lines of the boxplot.

315 * 'both' returns a namedtuple with the axes and dict.

316 * when grouping with ``by``, a Series mapping columns to

317 ``return_type`` is returned.

318

319 If ``return_type`` is `None`, a NumPy array

320 of axes with the same shape as ``layout`` is returned.

321%(backend)s\

322

323**kwargs

324 All other plotting keyword arguments to be passed to

325 :func:`matplotlib.pyplot.boxplot`.

326

327Returns

328-------

329result

330 See Notes.

331

332See Also

333--------

334pandas.Series.plot.hist: Make a histogram.

335matplotlib.pyplot.boxplot : Matplotlib equivalent plot.

336

337Notes

338-----

339The return type depends on the `return_type` parameter:

340

341* 'axes' : object of class matplotlib.axes.Axes

342* 'dict' : dict of matplotlib.lines.Line2D objects

343* 'both' : a namedtuple with structure (ax, lines)

344

345For data grouped with ``by``, return a Series of the above or a numpy

346array:

347

348* :class:`~pandas.Series`

349* :class:`~numpy.array` (for ``return_type = None``)

350

351Use ``return_type='dict'`` when you want to tweak the appearance

352of the lines after plotting. In this case a dict containing the Lines

353making up the boxes, caps, fliers, medians, and whiskers is returned.

354

355Examples

356--------

357

358Boxplots can be created for every column in the dataframe

359by ``df.boxplot()`` or indicating the columns to be used:

360

361.. plot::

362 :context: close-figs

363

364 >>> np.random.seed(1234)

365 >>> df = pd.DataFrame(np.random.randn(10, 4),

366 ... columns=['Col1', 'Col2', 'Col3', 'Col4'])

367 >>> boxplot = df.boxplot(column=['Col1', 'Col2', 'Col3']) # doctest: +SKIP

368

369Boxplots of variables distributions grouped by the values of a third

370variable can be created using the option ``by``. For instance:

371

372.. plot::

373 :context: close-figs

374

375 >>> df = pd.DataFrame(np.random.randn(10, 2),

376 ... columns=['Col1', 'Col2'])

377 >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A',

378 ... 'B', 'B', 'B', 'B', 'B'])

379 >>> boxplot = df.boxplot(by='X')

380

381A list of strings (i.e. ``['X', 'Y']``) can be passed to boxplot

382in order to group the data by combination of the variables in the x-axis:

383

384.. plot::

385 :context: close-figs

386

387 >>> df = pd.DataFrame(np.random.randn(10, 3),

388 ... columns=['Col1', 'Col2', 'Col3'])

389 >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A',

390 ... 'B', 'B', 'B', 'B', 'B'])

391 >>> df['Y'] = pd.Series(['A', 'B', 'A', 'B', 'A',

392 ... 'B', 'A', 'B', 'A', 'B'])

393 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by=['X', 'Y'])

394

395The layout of boxplot can be adjusted giving a tuple to ``layout``:

396

397.. plot::

398 :context: close-figs

399

400 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X',

401 ... layout=(2, 1))

402

403Additional formatting can be done to the boxplot, like suppressing the grid

404(``grid=False``), rotating the labels in the x-axis (i.e. ``rot=45``)

405or changing the fontsize (i.e. ``fontsize=15``):

406

407.. plot::

408 :context: close-figs

409

410 >>> boxplot = df.boxplot(grid=False, rot=45, fontsize=15) # doctest: +SKIP

411

412The parameter ``return_type`` can be used to select the type of element

413returned by `boxplot`. When ``return_type='axes'`` is selected,

414the matplotlib axes on which the boxplot is drawn are returned:

415

416 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], return_type='axes')

417 >>> type(boxplot)

418 <class 'matplotlib.axes._axes.Axes'>

419

420When grouping with ``by``, a Series mapping columns to ``return_type``

421is returned:

422

423 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X',

424 ... return_type='axes')

425 >>> type(boxplot)

426 <class 'pandas.core.series.Series'>

427

428If ``return_type`` is `None`, a NumPy array of axes with the same shape

429as ``layout`` is returned:

430

431 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X',

432 ... return_type=None)

433 >>> type(boxplot)

434 <class 'numpy.ndarray'>

435"""

436

437_backend_doc = """\

438backend : str, default None

439 Backend to use instead of the backend specified in the option

440 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to

441 specify the ``plotting.backend`` for the whole session, set

442 ``pd.options.plotting.backend``.

443"""

444

445

446_bar_or_line_doc = """

447 Parameters

448 ----------

449 x : label or position, optional

450 Allows plotting of one column versus another. If not specified,

451 the index of the DataFrame is used.

452 y : label or position, optional

453 Allows plotting of one column versus another. If not specified,

454 all numerical columns are used.

455 color : str, array-like, or dict, optional

456 The color for each of the DataFrame's columns. Possible values are:

457

458 - A single color string referred to by name, RGB or RGBA code,

459 for instance 'red' or '#a98d19'.

460

461 - A sequence of color strings referred to by name, RGB or RGBA

462 code, which will be used for each column recursively. For

463 instance ['green','yellow'] each column's %(kind)s will be filled in

464 green or yellow, alternatively. If there is only a single column to

465 be plotted, then only the first color from the color list will be

466 used.

467

468 - A dict of the form {column name : color}, so that each column will be

469 colored accordingly. For example, if your columns are called `a` and

470 `b`, then passing {'a': 'green', 'b': 'red'} will color %(kind)ss for

471 column `a` in green and %(kind)ss for column `b` in red.

472

473 **kwargs

474 Additional keyword arguments are documented in

475 :meth:`DataFrame.plot`.

476

477 Returns

478 -------

479 matplotlib.axes.Axes or np.ndarray of them

480 An ndarray is returned with one :class:`matplotlib.axes.Axes`

481 per column when ``subplots=True``.

482"""

483

484

485@Substitution(data="data : DataFrame\n The data to visualize.\n", backend="")

486@Appender(_boxplot_doc)

487def boxplot(

488 data: DataFrame,

489 column: str | list[str] | None = None,

490 by: str | list[str] | None = None,

491 ax: Axes | None = None,

492 fontsize: float | str | None = None,

493 rot: int = 0,

494 grid: bool = True,

495 figsize: tuple[float, float] | None = None,

496 layout: tuple[int, int] | None = None,

497 return_type: str | None = None,

498 **kwargs,

499):

500 plot_backend = _get_plot_backend("matplotlib")

501 return plot_backend.boxplot(

502 data,

503 column=column,

504 by=by,

505 ax=ax,

506 fontsize=fontsize,

507 rot=rot,

508 grid=grid,

509 figsize=figsize,

510 layout=layout,

511 return_type=return_type,

512 **kwargs,

513 )

514

515

516@Substitution(data="", backend=_backend_doc)

517@Appender(_boxplot_doc)

518def boxplot_frame(

519 self: DataFrame,

520 column=None,

521 by=None,

522 ax=None,

523 fontsize: int | None = None,

524 rot: int = 0,

525 grid: bool = True,

526 figsize: tuple[float, float] | None = None,

527 layout=None,

528 return_type=None,

529 backend=None,

530 **kwargs,

531):

532 plot_backend = _get_plot_backend(backend)

533 return plot_backend.boxplot_frame(

534 self,

535 column=column,

536 by=by,

537 ax=ax,

538 fontsize=fontsize,

539 rot=rot,

540 grid=grid,

541 figsize=figsize,

542 layout=layout,

543 return_type=return_type,

544 **kwargs,

545 )

546

547

548def boxplot_frame_groupby(

549 grouped: DataFrameGroupBy,

550 subplots: bool = True,

551 column=None,

552 fontsize: int | None = None,

553 rot: int = 0,

554 grid: bool = True,

555 ax=None,

556 figsize: tuple[float, float] | None = None,

557 layout=None,

558 sharex: bool = False,

559 sharey: bool = True,

560 backend=None,

561 **kwargs,

562):

563 """

564 Make box plots from DataFrameGroupBy data.

565

566 Parameters

567 ----------

568 grouped : Grouped DataFrame

569 subplots : bool

570 * ``False`` - no subplots will be used

571 * ``True`` - create a subplot for each group.

572

573 column : column name or list of names, or vector

574 Can be any valid input to groupby.

575 fontsize : float or str

576 rot : label rotation angle

577 grid : Setting this to True will show the grid

578 ax : Matplotlib axis object, default None

579 figsize : A tuple (width, height) in inches

580 layout : tuple (optional)

581 The layout of the plot: (rows, columns).

582 sharex : bool, default False

583 Whether x-axes will be shared among subplots.

584 sharey : bool, default True

585 Whether y-axes will be shared among subplots.

586 backend : str, default None

587 Backend to use instead of the backend specified in the option

588 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to

589 specify the ``plotting.backend`` for the whole session, set

590 ``pd.options.plotting.backend``.

591 **kwargs

592 All other plotting keyword arguments to be passed to

593 matplotlib's boxplot function.

594

595 Returns

596 -------

597 dict of key/value = group key/DataFrame.boxplot return value

598 or DataFrame.boxplot return value in case subplots=figures=False

599

600 Examples

601 --------

602 You can create boxplots for grouped data and show them as separate subplots:

603

604 .. plot::

605 :context: close-figs

606

607 >>> import itertools

608 >>> tuples = [t for t in itertools.product(range(1000), range(4))]

609 >>> index = pd.MultiIndex.from_tuples(tuples, names=['lvl0', 'lvl1'])

610 >>> data = np.random.randn(len(index), 4)

611 >>> df = pd.DataFrame(data, columns=list('ABCD'), index=index)

612 >>> grouped = df.groupby(level='lvl1')

613 >>> grouped.boxplot(rot=45, fontsize=12, figsize=(8, 10)) # doctest: +SKIP

614

615 The ``subplots=False`` option shows the boxplots in a single figure.

616

617 .. plot::

618 :context: close-figs

619

620 >>> grouped.boxplot(subplots=False, rot=45, fontsize=12) # doctest: +SKIP

621 """

622 plot_backend = _get_plot_backend(backend)

623 return plot_backend.boxplot_frame_groupby(

624 grouped,

625 subplots=subplots,

626 column=column,

627 fontsize=fontsize,

628 rot=rot,

629 grid=grid,

630 ax=ax,

631 figsize=figsize,

632 layout=layout,

633 sharex=sharex,

634 sharey=sharey,

635 **kwargs,

636 )

637

638

639class PlotAccessor(PandasObject):

640 """

641 Make plots of Series or DataFrame.

642

643 Uses the backend specified by the

644 option ``plotting.backend``. By default, matplotlib is used.

645

646 Parameters

647 ----------

648 data : Series or DataFrame

649 The object for which the method is called.

650 x : label or position, default None

651 Only used if data is a DataFrame.

652 y : label, position or list of label, positions, default None

653 Allows plotting of one column versus another. Only used if data is a

654 DataFrame.

655 kind : str

656 The kind of plot to produce:

657

658 - 'line' : line plot (default)

659 - 'bar' : vertical bar plot

660 - 'barh' : horizontal bar plot

661 - 'hist' : histogram

662 - 'box' : boxplot

663 - 'kde' : Kernel Density Estimation plot

664 - 'density' : same as 'kde'

665 - 'area' : area plot

666 - 'pie' : pie plot

667 - 'scatter' : scatter plot (DataFrame only)

668 - 'hexbin' : hexbin plot (DataFrame only)

669 ax : matplotlib axes object, default None

670 An axes of the current figure.

671 subplots : bool or sequence of iterables, default False

672 Whether to group columns into subplots:

673

674 - ``False`` : No subplots will be used

675 - ``True`` : Make separate subplots for each column.

676 - sequence of iterables of column labels: Create a subplot for each

677 group of columns. For example `[('a', 'c'), ('b', 'd')]` will

678 create 2 subplots: one with columns 'a' and 'c', and one

679 with columns 'b' and 'd'. Remaining columns that aren't specified

680 will be plotted in additional subplots (one per column).

681

682 .. versionadded:: 1.5.0

683

684 sharex : bool, default True if ax is None else False

685 In case ``subplots=True``, share x axis and set some x axis labels

686 to invisible; defaults to True if ax is None otherwise False if

687 an ax is passed in; Be aware, that passing in both an ax and

688 ``sharex=True`` will alter all x axis labels for all axis in a figure.

689 sharey : bool, default False

690 In case ``subplots=True``, share y axis and set some y axis labels to invisible.

691 layout : tuple, optional

692 (rows, columns) for the layout of subplots.

693 figsize : a tuple (width, height) in inches

694 Size of a figure object.

695 use_index : bool, default True

696 Use index as ticks for x axis.

697 title : str or list

698 Title to use for the plot. If a string is passed, print the string

699 at the top of the figure. If a list is passed and `subplots` is

700 True, print each item in the list above the corresponding subplot.

701 grid : bool, default None (matlab style default)

702 Axis grid lines.

703 legend : bool or {'reverse'}

704 Place legend on axis subplots.

705 style : list or dict

706 The matplotlib line style per column.

707 logx : bool or 'sym', default False

708 Use log scaling or symlog scaling on x axis.

709

710 logy : bool or 'sym' default False

711 Use log scaling or symlog scaling on y axis.

712

713 loglog : bool or 'sym', default False

714 Use log scaling or symlog scaling on both x and y axes.

715

716 xticks : sequence

717 Values to use for the xticks.

718 yticks : sequence

719 Values to use for the yticks.

720 xlim : 2-tuple/list

721 Set the x limits of the current axes.

722 ylim : 2-tuple/list

723 Set the y limits of the current axes.

724 xlabel : label, optional

725 Name to use for the xlabel on x-axis. Default uses index name as xlabel, or the

726 x-column name for planar plots.

727

728 .. versionchanged:: 2.0.0

729

730 Now applicable to histograms.

731

732 ylabel : label, optional

733 Name to use for the ylabel on y-axis. Default will show no ylabel, or the

734 y-column name for planar plots.

735

736 .. versionchanged:: 2.0.0

737

738 Now applicable to histograms.

739

740 rot : float, default None

741 Rotation for ticks (xticks for vertical, yticks for horizontal

742 plots).

743 fontsize : float, default None

744 Font size for xticks and yticks.

745 colormap : str or matplotlib colormap object, default None

746 Colormap to select colors from. If string, load colormap with that

747 name from matplotlib.

748 colorbar : bool, optional

749 If True, plot colorbar (only relevant for 'scatter' and 'hexbin'

750 plots).

751 position : float

752 Specify relative alignments for bar plot layout.

753 From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5

754 (center).

755 table : bool, Series or DataFrame, default False

756 If True, draw a table using the data in the DataFrame and the data

757 will be transposed to meet matplotlib's default layout.

758 If a Series or DataFrame is passed, use passed data to draw a

759 table.

760 yerr : DataFrame, Series, array-like, dict and str

761 See :ref:`Plotting with Error Bars <visualization.errorbars>` for

762 detail.

763 xerr : DataFrame, Series, array-like, dict and str

764 Equivalent to yerr.

765 stacked : bool, default False in line and bar plots, and True in area plot

766 If True, create stacked plot.

767 secondary_y : bool or sequence, default False

768 Whether to plot on the secondary y-axis if a list/tuple, which

769 columns to plot on secondary y-axis.

770 mark_right : bool, default True

771 When using a secondary_y axis, automatically mark the column

772 labels with "(right)" in the legend.

773 include_bool : bool, default is False

774 If True, boolean values can be plotted.

775 backend : str, default None

776 Backend to use instead of the backend specified in the option

777 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to

778 specify the ``plotting.backend`` for the whole session, set

779 ``pd.options.plotting.backend``.

780 **kwargs

781 Options to pass to matplotlib plotting method.

782

783 Returns

784 -------

785 :class:`matplotlib.axes.Axes` or numpy.ndarray of them

786 If the backend is not the default matplotlib one, the return value

787 will be the object returned by the backend.

788

789 Notes

790 -----

791 - See matplotlib documentation online for more on this subject

792 - If `kind` = 'bar' or 'barh', you can specify relative alignments

793 for bar plot layout by `position` keyword.

794 From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5

795 (center)

796

797 Examples

798 --------

799 For Series:

800

801 .. plot::

802 :context: close-figs

803

804 >>> ser = pd.Series([1, 2, 3, 3])

805 >>> plot = ser.plot(kind='hist', title="My plot")

806

807 For DataFrame:

808

809 .. plot::

810 :context: close-figs

811

812 >>> df = pd.DataFrame({'length': [1.5, 0.5, 1.2, 0.9, 3],

813 ... 'width': [0.7, 0.2, 0.15, 0.2, 1.1]},

814 ... index=['pig', 'rabbit', 'duck', 'chicken', 'horse'])

815 >>> plot = df.plot(title="DataFrame Plot")

816

817 For SeriesGroupBy:

818

819 .. plot::

820 :context: close-figs

821

822 >>> lst = [-1, -2, -3, 1, 2, 3]

823 >>> ser = pd.Series([1, 2, 2, 4, 6, 6], index=lst)

824 >>> plot = ser.groupby(lambda x: x > 0).plot(title="SeriesGroupBy Plot")

825

826 For DataFrameGroupBy:

827

828 .. plot::

829 :context: close-figs

830

831 >>> df = pd.DataFrame({"col1" : [1, 2, 3, 4],

832 ... "col2" : ["A", "B", "A", "B"]})

833 >>> plot = df.groupby("col2").plot(kind="bar", title="DataFrameGroupBy Plot")

834 """

835

836 _common_kinds = ("line", "bar", "barh", "kde", "density", "area", "hist", "box")

837 _series_kinds = ("pie",)

838 _dataframe_kinds = ("scatter", "hexbin")

839 _kind_aliases = {"density": "kde"}

840 _all_kinds = _common_kinds + _series_kinds + _dataframe_kinds

841

842 def __init__(self, data: Series | DataFrame) -> None:

843 self._parent = data

844

845 @staticmethod

846 def _get_call_args(backend_name: str, data: Series | DataFrame, args, kwargs):

847 """

848 This function makes calls to this accessor `__call__` method compatible

849 with the previous `SeriesPlotMethods.__call__` and

850 `DataFramePlotMethods.__call__`. Those had slightly different

851 signatures, since `DataFramePlotMethods` accepted `x` and `y`

852 parameters.

853 """

854 if isinstance(data, ABCSeries):

855 arg_def = [

856 ("kind", "line"),

857 ("ax", None),

858 ("figsize", None),

859 ("use_index", True),

860 ("title", None),

861 ("grid", None),

862 ("legend", False),

863 ("style", None),

864 ("logx", False),

865 ("logy", False),

866 ("loglog", False),

867 ("xticks", None),

868 ("yticks", None),

869 ("xlim", None),

870 ("ylim", None),

871 ("rot", None),

872 ("fontsize", None),

873 ("colormap", None),

874 ("table", False),

875 ("yerr", None),

876 ("xerr", None),

877 ("label", None),

878 ("secondary_y", False),

879 ("xlabel", None),

880 ("ylabel", None),

881 ]

882 elif isinstance(data, ABCDataFrame):

883 arg_def = [

884 ("x", None),

885 ("y", None),

886 ("kind", "line"),

887 ("ax", None),

888 ("subplots", False),

889 ("sharex", None),

890 ("sharey", False),

891 ("layout", None),

892 ("figsize", None),

893 ("use_index", True),

894 ("title", None),

895 ("grid", None),

896 ("legend", True),

897 ("style", None),

898 ("logx", False),

899 ("logy", False),

900 ("loglog", False),

901 ("xticks", None),

902 ("yticks", None),

903 ("xlim", None),

904 ("ylim", None),

905 ("rot", None),

906 ("fontsize", None),

907 ("colormap", None),

908 ("table", False),

909 ("yerr", None),

910 ("xerr", None),

911 ("secondary_y", False),

912 ("xlabel", None),

913 ("ylabel", None),

914 ]

915 else:

916 raise TypeError(

917 f"Called plot accessor for type {type(data).__name__}, "

918 "expected Series or DataFrame"

919 )

920

921 if args and isinstance(data, ABCSeries):

922 positional_args = str(args)[1:-1]

923 keyword_args = ", ".join(

924 [f"{name}={repr(value)}" for (name, _), value in zip(arg_def, args)]

925 )

926 msg = (

927 "`Series.plot()` should not be called with positional "

928 "arguments, only keyword arguments. The order of "

929 "positional arguments will change in the future. "

930 f"Use `Series.plot({keyword_args})` instead of "

931 f"`Series.plot({positional_args})`."

932 )

933 raise TypeError(msg)

934

935 pos_args = {name: value for (name, _), value in zip(arg_def, args)}

936 if backend_name == "pandas.plotting._matplotlib":

937 kwargs = dict(arg_def, **pos_args, **kwargs)

938 else:

939 kwargs = dict(pos_args, **kwargs)

940

941 x = kwargs.pop("x", None)

942 y = kwargs.pop("y", None)

943 kind = kwargs.pop("kind", "line")

944 return x, y, kind, kwargs

945

946 def __call__(self, *args, **kwargs):

947 plot_backend = _get_plot_backend(kwargs.pop("backend", None))

948

949 x, y, kind, kwargs = self._get_call_args(

950 plot_backend.__name__, self._parent, args, kwargs

951 )

952

953 kind = self._kind_aliases.get(kind, kind)

954

955 # when using another backend, get out of the way

956 if plot_backend.__name__ != "pandas.plotting._matplotlib":

957 return plot_backend.plot(self._parent, x=x, y=y, kind=kind, **kwargs)

958

959 if kind not in self._all_kinds:

960 raise ValueError(

961 f"{kind} is not a valid plot kind "

962 f"Valid plot kinds: {self._all_kinds}"

963 )

964

965 # The original data structured can be transformed before passed to the

966 # backend. For example, for DataFrame is common to set the index as the

967 # `x` parameter, and return a Series with the parameter `y` as values.

968 data = self._parent.copy()

969

970 if isinstance(data, ABCSeries):

971 kwargs["reuse_plot"] = True

972

973 if kind in self._dataframe_kinds:

974 if isinstance(data, ABCDataFrame):

975 return plot_backend.plot(data, x=x, y=y, kind=kind, **kwargs)

976 else:

977 raise ValueError(f"plot kind {kind} can only be used for data frames")

978 elif kind in self._series_kinds:

979 if isinstance(data, ABCDataFrame):

980 if y is None and kwargs.get("subplots") is False:

981 raise ValueError(

982 f"{kind} requires either y column or 'subplots=True'"

983 )

984 if y is not None:

985 if is_integer(y) and not data.columns._holds_integer():

986 y = data.columns[y]

987 # converted to series actually. copy to not modify

988 data = data[y].copy()

989 data.index.name = y

990 elif isinstance(data, ABCDataFrame):

991 data_cols = data.columns

992 if x is not None:

993 if is_integer(x) and not data.columns._holds_integer():

994 x = data_cols[x]

995 elif not isinstance(data[x], ABCSeries):

996 raise ValueError("x must be a label or position")

997 data = data.set_index(x)

998 if y is not None:

999 # check if we have y as int or list of ints

1000 int_ylist = is_list_like(y) and all(is_integer(c) for c in y)

1001 int_y_arg = is_integer(y) or int_ylist

1002 if int_y_arg and not data.columns._holds_integer():

1003 y = data_cols[y]

1004

1005 label_kw = kwargs["label"] if "label" in kwargs else False

1006 for kw in ["xerr", "yerr"]:

1007 if kw in kwargs and (

1008 isinstance(kwargs[kw], str) or is_integer(kwargs[kw])

1009 ):

1010 try:

1011 kwargs[kw] = data[kwargs[kw]]

1012 except (IndexError, KeyError, TypeError):

1013 pass

1014

1015 # don't overwrite

1016 data = data[y].copy()

1017

1018 if isinstance(data, ABCSeries):

1019 label_name = label_kw or y

1020 data.name = label_name

1021 else:

1022 match = is_list_like(label_kw) and len(label_kw) == len(y)

1023 if label_kw and not match:

1024 raise ValueError(

1025 "label should be list-like and same length as y"

1026 )

1027 label_name = label_kw or data.columns

1028 data.columns = label_name

1029

1030 return plot_backend.plot(data, kind=kind, **kwargs)

1031

1032 __call__.__doc__ = __doc__

1033

1034 @Appender(

1035 """

1036 See Also

1037 --------

1038 matplotlib.pyplot.plot : Plot y versus x as lines and/or markers.

1039

1040 Examples

1041 --------

1042

1043 .. plot::

1044 :context: close-figs

1045

1046 >>> s = pd.Series([1, 3, 2])

1047 >>> s.plot.line() # doctest: +SKIP

1048

1049 .. plot::

1050 :context: close-figs

1051

1052 The following example shows the populations for some animals

1053 over the years.

1054

1055 >>> df = pd.DataFrame({

1056 ... 'pig': [20, 18, 489, 675, 1776],

1057 ... 'horse': [4, 25, 281, 600, 1900]

1058 ... }, index=[1990, 1997, 2003, 2009, 2014])

1059 >>> lines = df.plot.line()

1060

1061 .. plot::

1062 :context: close-figs

1063

1064 An example with subplots, so an array of axes is returned.

1065

1066 >>> axes = df.plot.line(subplots=True)

1067 >>> type(axes)

1068 <class 'numpy.ndarray'>

1069

1070 .. plot::

1071 :context: close-figs

1072

1073 Let's repeat the same example, but specifying colors for

1074 each column (in this case, for each animal).

1075

1076 >>> axes = df.plot.line(

1077 ... subplots=True, color={"pig": "pink", "horse": "#742802"}

1078 ... )

1079

1080 .. plot::

1081 :context: close-figs

1082

1083 The following example shows the relationship between both

1084 populations.

1085

1086 >>> lines = df.plot.line(x='pig', y='horse')

1087 """

1088 )

1089 @Substitution(kind="line")

1090 @Appender(_bar_or_line_doc)

1091 def line(

1092 self, x: Hashable | None = None, y: Hashable | None = None, **kwargs

1093 ) -> PlotAccessor:

1094 """

1095 Plot Series or DataFrame as lines.

1096

1097 This function is useful to plot lines using DataFrame's values

1098 as coordinates.

1099 """

1100 return self(kind="line", x=x, y=y, **kwargs)

1101

1102 @Appender(

1103 """

1104 See Also

1105 --------

1106 DataFrame.plot.barh : Horizontal bar plot.

1107 DataFrame.plot : Make plots of a DataFrame.

1108 matplotlib.pyplot.bar : Make a bar plot with matplotlib.

1109

1110 Examples

1111 --------

1112 Basic plot.

1113

1114 .. plot::

1115 :context: close-figs

1116

1117 >>> df = pd.DataFrame({'lab':['A', 'B', 'C'], 'val':[10, 30, 20]})

1118 >>> ax = df.plot.bar(x='lab', y='val', rot=0)

1119

1120 Plot a whole dataframe to a bar plot. Each column is assigned a

1121 distinct color, and each row is nested in a group along the

1122 horizontal axis.

1123

1124 .. plot::

1125 :context: close-figs

1126

1127 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]

1128 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]

1129 >>> index = ['snail', 'pig', 'elephant',

1130 ... 'rabbit', 'giraffe', 'coyote', 'horse']

1131 >>> df = pd.DataFrame({'speed': speed,

1132 ... 'lifespan': lifespan}, index=index)

1133 >>> ax = df.plot.bar(rot=0)

1134

1135 Plot stacked bar charts for the DataFrame

1136

1137 .. plot::

1138 :context: close-figs

1139

1140 >>> ax = df.plot.bar(stacked=True)

1141

1142 Instead of nesting, the figure can be split by column with

1143 ``subplots=True``. In this case, a :class:`numpy.ndarray` of

1144 :class:`matplotlib.axes.Axes` are returned.

1145

1146 .. plot::

1147 :context: close-figs

1148

1149 >>> axes = df.plot.bar(rot=0, subplots=True)

1150 >>> axes[1].legend(loc=2) # doctest: +SKIP

1151

1152 If you don't like the default colours, you can specify how you'd

1153 like each column to be colored.

1154

1155 .. plot::

1156 :context: close-figs

1157

1158 >>> axes = df.plot.bar(

1159 ... rot=0, subplots=True, color={"speed": "red", "lifespan": "green"}

1160 ... )

1161 >>> axes[1].legend(loc=2) # doctest: +SKIP

1162

1163 Plot a single column.

1164

1165 .. plot::

1166 :context: close-figs

1167

1168 >>> ax = df.plot.bar(y='speed', rot=0)

1169

1170 Plot only selected categories for the DataFrame.

1171

1172 .. plot::

1173 :context: close-figs

1174

1175 >>> ax = df.plot.bar(x='lifespan', rot=0)

1176 """

1177 )

1178 @Substitution(kind="bar")

1179 @Appender(_bar_or_line_doc)

1180 def bar( # pylint: disable=disallowed-name

1181 self, x: Hashable | None = None, y: Hashable | None = None, **kwargs

1182 ) -> PlotAccessor:

1183 """

1184 Vertical bar plot.

1185

1186 A bar plot is a plot that presents categorical data with

1187 rectangular bars with lengths proportional to the values that they

1188 represent. A bar plot shows comparisons among discrete categories. One

1189 axis of the plot shows the specific categories being compared, and the

1190 other axis represents a measured value.

1191 """

1192 return self(kind="bar", x=x, y=y, **kwargs)

1193

1194 @Appender(

1195 """

1196 See Also

1197 --------

1198 DataFrame.plot.bar: Vertical bar plot.

1199 DataFrame.plot : Make plots of DataFrame using matplotlib.

1200 matplotlib.axes.Axes.bar : Plot a vertical bar plot using matplotlib.

1201

1202 Examples

1203 --------

1204 Basic example

1205

1206 .. plot::

1207 :context: close-figs

1208

1209 >>> df = pd.DataFrame({'lab': ['A', 'B', 'C'], 'val': [10, 30, 20]})

1210 >>> ax = df.plot.barh(x='lab', y='val')

1211

1212 Plot a whole DataFrame to a horizontal bar plot

1213

1214 .. plot::

1215 :context: close-figs

1216

1217 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]

1218 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]

1219 >>> index = ['snail', 'pig', 'elephant',

1220 ... 'rabbit', 'giraffe', 'coyote', 'horse']

1221 >>> df = pd.DataFrame({'speed': speed,

1222 ... 'lifespan': lifespan}, index=index)

1223 >>> ax = df.plot.barh()

1224

1225 Plot stacked barh charts for the DataFrame

1226

1227 .. plot::

1228 :context: close-figs

1229

1230 >>> ax = df.plot.barh(stacked=True)

1231

1232 We can specify colors for each column

1233

1234 .. plot::

1235 :context: close-figs

1236

1237 >>> ax = df.plot.barh(color={"speed": "red", "lifespan": "green"})

1238

1239 Plot a column of the DataFrame to a horizontal bar plot

1240

1241 .. plot::

1242 :context: close-figs

1243

1244 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]

1245 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]

1246 >>> index = ['snail', 'pig', 'elephant',

1247 ... 'rabbit', 'giraffe', 'coyote', 'horse']

1248 >>> df = pd.DataFrame({'speed': speed,

1249 ... 'lifespan': lifespan}, index=index)

1250 >>> ax = df.plot.barh(y='speed')

1251

1252 Plot DataFrame versus the desired column

1253

1254 .. plot::

1255 :context: close-figs

1256

1257 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]

1258 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]

1259 >>> index = ['snail', 'pig', 'elephant',

1260 ... 'rabbit', 'giraffe', 'coyote', 'horse']

1261 >>> df = pd.DataFrame({'speed': speed,

1262 ... 'lifespan': lifespan}, index=index)

1263 >>> ax = df.plot.barh(x='lifespan')

1264 """

1265 )

1266 @Substitution(kind="bar")

1267 @Appender(_bar_or_line_doc)

1268 def barh(

1269 self, x: Hashable | None = None, y: Hashable | None = None, **kwargs

1270 ) -> PlotAccessor:

1271 """

1272 Make a horizontal bar plot.

1273

1274 A horizontal bar plot is a plot that presents quantitative data with

1275 rectangular bars with lengths proportional to the values that they

1276 represent. A bar plot shows comparisons among discrete categories. One

1277 axis of the plot shows the specific categories being compared, and the

1278 other axis represents a measured value.

1279 """

1280 return self(kind="barh", x=x, y=y, **kwargs)

1281

1282 def box(self, by: IndexLabel | None = None, **kwargs) -> PlotAccessor:

1283 r"""

1284 Make a box plot of the DataFrame columns.

1285

1286 A box plot is a method for graphically depicting groups of numerical

1287 data through their quartiles.

1288 The box extends from the Q1 to Q3 quartile values of the data,

1289 with a line at the median (Q2). The whiskers extend from the edges

1290 of box to show the range of the data. The position of the whiskers

1291 is set by default to 1.5*IQR (IQR = Q3 - Q1) from the edges of the

1292 box. Outlier points are those past the end of the whiskers.

1293

1294 For further details see Wikipedia's

1295 entry for `boxplot <https://en.wikipedia.org/wiki/Box_plot>`__.

1296

1297 A consideration when using this chart is that the box and the whiskers

1298 can overlap, which is very common when plotting small sets of data.

1299

1300 Parameters

1301 ----------

1302 by : str or sequence

1303 Column in the DataFrame to group by.

1304

1305 .. versionchanged:: 1.4.0

1306

1307 Previously, `by` is silently ignore and makes no groupings

1308

1309 **kwargs

1310 Additional keywords are documented in

1311 :meth:`DataFrame.plot`.

1312

1313 Returns

1314 -------

1315 :class:`matplotlib.axes.Axes` or numpy.ndarray of them

1316

1317 See Also

1318 --------

1319 DataFrame.boxplot: Another method to draw a box plot.

1320 Series.plot.box: Draw a box plot from a Series object.

1321 matplotlib.pyplot.boxplot: Draw a box plot in matplotlib.

1322

1323 Examples

1324 --------

1325 Draw a box plot from a DataFrame with four columns of randomly

1326 generated data.

1327

1328 .. plot::

1329 :context: close-figs

1330

1331 >>> data = np.random.randn(25, 4)

1332 >>> df = pd.DataFrame(data, columns=list('ABCD'))

1333 >>> ax = df.plot.box()

1334

1335 You can also generate groupings if you specify the `by` parameter (which

1336 can take a column name, or a list or tuple of column names):

1337

1338 .. versionchanged:: 1.4.0

1339

1340 .. plot::

1341 :context: close-figs

1342

1343 >>> age_list = [8, 10, 12, 14, 72, 74, 76, 78, 20, 25, 30, 35, 60, 85]

1344 >>> df = pd.DataFrame({"gender": list("MMMMMMMMFFFFFF"), "age": age_list})

1345 >>> ax = df.plot.box(column="age", by="gender", figsize=(10, 8))

1346 """

1347 return self(kind="box", by=by, **kwargs)

1348

1349 def hist(

1350 self, by: IndexLabel | None = None, bins: int = 10, **kwargs

1351 ) -> PlotAccessor:

1352 """

1353 Draw one histogram of the DataFrame's columns.

1354

1355 A histogram is a representation of the distribution of data.

1356 This function groups the values of all given Series in the DataFrame

1357 into bins and draws all bins in one :class:`matplotlib.axes.Axes`.

1358 This is useful when the DataFrame's Series are in a similar scale.

1359

1360 Parameters

1361 ----------

1362 by : str or sequence, optional

1363 Column in the DataFrame to group by.

1364

1365 .. versionchanged:: 1.4.0

1366

1367 Previously, `by` is silently ignore and makes no groupings

1368

1369 bins : int, default 10

1370 Number of histogram bins to be used.

1371 **kwargs

1372 Additional keyword arguments are documented in

1373 :meth:`DataFrame.plot`.

1374

1375 Returns

1376 -------

1377 class:`matplotlib.AxesSubplot`

1378 Return a histogram plot.

1379

1380 See Also

1381 --------

1382 DataFrame.hist : Draw histograms per DataFrame's Series.

1383 Series.hist : Draw a histogram with Series' data.

1384

1385 Examples

1386 --------

1387 When we roll a die 6000 times, we expect to get each value around 1000

1388 times. But when we roll two dice and sum the result, the distribution

1389 is going to be quite different. A histogram illustrates those

1390 distributions.

1391

1392 .. plot::

1393 :context: close-figs

1394

1395 >>> df = pd.DataFrame(np.random.randint(1, 7, 6000), columns=['one'])

1396 >>> df['two'] = df['one'] + np.random.randint(1, 7, 6000)

1397 >>> ax = df.plot.hist(bins=12, alpha=0.5)

1398

1399 A grouped histogram can be generated by providing the parameter `by` (which

1400 can be a column name, or a list of column names):

1401

1402 .. plot::

1403 :context: close-figs

1404

1405 >>> age_list = [8, 10, 12, 14, 72, 74, 76, 78, 20, 25, 30, 35, 60, 85]

1406 >>> df = pd.DataFrame({"gender": list("MMMMMMMMFFFFFF"), "age": age_list})

1407 >>> ax = df.plot.hist(column=["age"], by="gender", figsize=(10, 8))

1408 """

1409 return self(kind="hist", by=by, bins=bins, **kwargs)

1410

1411 def kde(

1412 self,

1413 bw_method: Literal["scott", "silverman"] | float | Callable | None = None,

1414 ind: np.ndarray | int | None = None,

1415 **kwargs,

1416 ) -> PlotAccessor:

1417 """

1418 Generate Kernel Density Estimate plot using Gaussian kernels.

1419

1420 In statistics, `kernel density estimation`_ (KDE) is a non-parametric

1421 way to estimate the probability density function (PDF) of a random

1422 variable. This function uses Gaussian kernels and includes automatic

1423 bandwidth determination.

1424

1425 .. _kernel density estimation:

1426 https://en.wikipedia.org/wiki/Kernel_density_estimation

1427

1428 Parameters

1429 ----------

1430 bw_method : str, scalar or callable, optional

1431 The method used to calculate the estimator bandwidth. This can be

1432 'scott', 'silverman', a scalar constant or a callable.

1433 If None (default), 'scott' is used.

1434 See :class:`scipy.stats.gaussian_kde` for more information.

1435 ind : NumPy array or int, optional

1436 Evaluation points for the estimated PDF. If None (default),

1437 1000 equally spaced points are used. If `ind` is a NumPy array, the

1438 KDE is evaluated at the points passed. If `ind` is an integer,

1439 `ind` number of equally spaced points are used.

1440 **kwargs

1441 Additional keyword arguments are documented in

1442 :meth:`DataFrame.plot`.

1443

1444 Returns

1445 -------

1446 matplotlib.axes.Axes or numpy.ndarray of them

1447

1448 See Also

1449 --------

1450 scipy.stats.gaussian_kde : Representation of a kernel-density

1451 estimate using Gaussian kernels. This is the function used

1452 internally to estimate the PDF.

1453

1454 Examples

1455 --------

1456 Given a Series of points randomly sampled from an unknown

1457 distribution, estimate its PDF using KDE with automatic

1458 bandwidth determination and plot the results, evaluating them at

1459 1000 equally spaced points (default):

1460

1461 .. plot::

1462 :context: close-figs

1463

1464 >>> s = pd.Series([1, 2, 2.5, 3, 3.5, 4, 5])

1465 >>> ax = s.plot.kde()

1466

1467 A scalar bandwidth can be specified. Using a small bandwidth value can

1468 lead to over-fitting, while using a large bandwidth value may result

1469 in under-fitting:

1470

1471 .. plot::

1472 :context: close-figs

1473

1474 >>> ax = s.plot.kde(bw_method=0.3)

1475

1476 .. plot::

1477 :context: close-figs

1478

1479 >>> ax = s.plot.kde(bw_method=3)

1480

1481 Finally, the `ind` parameter determines the evaluation points for the

1482 plot of the estimated PDF:

1483

1484 .. plot::

1485 :context: close-figs

1486

1487 >>> ax = s.plot.kde(ind=[1, 2, 3, 4, 5])

1488

1489 For DataFrame, it works in the same way:

1490

1491 .. plot::

1492 :context: close-figs

1493

1494 >>> df = pd.DataFrame({

1495 ... 'x': [1, 2, 2.5, 3, 3.5, 4, 5],

1496 ... 'y': [4, 4, 4.5, 5, 5.5, 6, 6],

1497 ... })

1498 >>> ax = df.plot.kde()

1499

1500 A scalar bandwidth can be specified. Using a small bandwidth value can

1501 lead to over-fitting, while using a large bandwidth value may result

1502 in under-fitting:

1503

1504 .. plot::

1505 :context: close-figs

1506

1507 >>> ax = df.plot.kde(bw_method=0.3)

1508

1509 .. plot::

1510 :context: close-figs

1511

1512 >>> ax = df.plot.kde(bw_method=3)

1513

1514 Finally, the `ind` parameter determines the evaluation points for the

1515 plot of the estimated PDF:

1516

1517 .. plot::

1518 :context: close-figs

1519

1520 >>> ax = df.plot.kde(ind=[1, 2, 3, 4, 5, 6])

1521 """

1522 return self(kind="kde", bw_method=bw_method, ind=ind, **kwargs)

1523

1524 density = kde

1525

1526 def area(

1527 self,

1528 x: Hashable | None = None,

1529 y: Hashable | None = None,

1530 stacked: bool = True,

1531 **kwargs,

1532 ) -> PlotAccessor:

1533 """

1534 Draw a stacked area plot.

1535

1536 An area plot displays quantitative data visually.

1537 This function wraps the matplotlib area function.

1538

1539 Parameters

1540 ----------

1541 x : label or position, optional

1542 Coordinates for the X axis. By default uses the index.

1543 y : label or position, optional

1544 Column to plot. By default uses all columns.

1545 stacked : bool, default True

1546 Area plots are stacked by default. Set to False to create a

1547 unstacked plot.

1548 **kwargs

1549 Additional keyword arguments are documented in

1550 :meth:`DataFrame.plot`.

1551

1552 Returns

1553 -------

1554 matplotlib.axes.Axes or numpy.ndarray

1555 Area plot, or array of area plots if subplots is True.

1556

1557 See Also

1558 --------

1559 DataFrame.plot : Make plots of DataFrame using matplotlib / pylab.

1560

1561 Examples

1562 --------

1563 Draw an area plot based on basic business metrics:

1564

1565 .. plot::

1566 :context: close-figs

1567

1568 >>> df = pd.DataFrame({

1569 ... 'sales': [3, 2, 3, 9, 10, 6],

1570 ... 'signups': [5, 5, 6, 12, 14, 13],

1571 ... 'visits': [20, 42, 28, 62, 81, 50],

1572 ... }, index=pd.date_range(start='2018/01/01', end='2018/07/01',

1573 ... freq='ME'))

1574 >>> ax = df.plot.area()

1575

1576 Area plots are stacked by default. To produce an unstacked plot,

1577 pass ``stacked=False``:

1578

1579 .. plot::

1580 :context: close-figs

1581

1582 >>> ax = df.plot.area(stacked=False)

1583

1584 Draw an area plot for a single column:

1585

1586 .. plot::

1587 :context: close-figs

1588

1589 >>> ax = df.plot.area(y='sales')

1590

1591 Draw with a different `x`:

1592

1593 .. plot::

1594 :context: close-figs

1595

1596 >>> df = pd.DataFrame({

1597 ... 'sales': [3, 2, 3],

1598 ... 'visits': [20, 42, 28],

1599 ... 'day': [1, 2, 3],

1600 ... })

1601 >>> ax = df.plot.area(x='day')

1602 """

1603 return self(kind="area", x=x, y=y, stacked=stacked, **kwargs)

1604

1605 def pie(self, **kwargs) -> PlotAccessor:

1606 """

1607 Generate a pie plot.

1608

1609 A pie plot is a proportional representation of the numerical data in a

1610 column. This function wraps :meth:`matplotlib.pyplot.pie` for the

1611 specified column. If no column reference is passed and

1612 ``subplots=True`` a pie plot is drawn for each numerical column

1613 independently.

1614

1615 Parameters

1616 ----------

1617 y : int or label, optional

1618 Label or position of the column to plot.

1619 If not provided, ``subplots=True`` argument must be passed.

1620 **kwargs

1621 Keyword arguments to pass on to :meth:`DataFrame.plot`.

1622

1623 Returns

1624 -------

1625 matplotlib.axes.Axes or np.ndarray of them

1626 A NumPy array is returned when `subplots` is True.

1627

1628 See Also

1629 --------

1630 Series.plot.pie : Generate a pie plot for a Series.

1631 DataFrame.plot : Make plots of a DataFrame.

1632

1633 Examples

1634 --------

1635 In the example below we have a DataFrame with the information about

1636 planet's mass and radius. We pass the 'mass' column to the

1637 pie function to get a pie plot.

1638

1639 .. plot::

1640 :context: close-figs

1641

1642 >>> df = pd.DataFrame({'mass': [0.330, 4.87 , 5.97],

1643 ... 'radius': [2439.7, 6051.8, 6378.1]},

1644 ... index=['Mercury', 'Venus', 'Earth'])

1645 >>> plot = df.plot.pie(y='mass', figsize=(5, 5))

1646

1647 .. plot::

1648 :context: close-figs

1649

1650 >>> plot = df.plot.pie(subplots=True, figsize=(11, 6))

1651 """

1652 if (

1653 isinstance(self._parent, ABCDataFrame)

1654 and kwargs.get("y", None) is None

1655 and not kwargs.get("subplots", False)

1656 ):

1657 raise ValueError("pie requires either y column or 'subplots=True'")

1658 return self(kind="pie", **kwargs)

1659

1660 def scatter(

1661 self,

1662 x: Hashable,

1663 y: Hashable,

1664 s: Hashable | Sequence[Hashable] | None = None,

1665 c: Hashable | Sequence[Hashable] | None = None,

1666 **kwargs,

1667 ) -> PlotAccessor:

1668 """

1669 Create a scatter plot with varying marker point size and color.

1670

1671 The coordinates of each point are defined by two dataframe columns and

1672 filled circles are used to represent each point. This kind of plot is

1673 useful to see complex correlations between two variables. Points could

1674 be for instance natural 2D coordinates like longitude and latitude in

1675 a map or, in general, any pair of metrics that can be plotted against

1676 each other.

1677

1678 Parameters

1679 ----------

1680 x : int or str

1681 The column name or column position to be used as horizontal

1682 coordinates for each point.

1683 y : int or str

1684 The column name or column position to be used as vertical

1685 coordinates for each point.

1686 s : str, scalar or array-like, optional

1687 The size of each point. Possible values are:

1688

1689 - A string with the name of the column to be used for marker's size.

1690

1691 - A single scalar so all points have the same size.

1692

1693 - A sequence of scalars, which will be used for each point's size

1694 recursively. For instance, when passing [2,14] all points size

1695 will be either 2 or 14, alternatively.

1696

1697 c : str, int or array-like, optional

1698 The color of each point. Possible values are:

1699

1700 - A single color string referred to by name, RGB or RGBA code,

1701 for instance 'red' or '#a98d19'.

1702

1703 - A sequence of color strings referred to by name, RGB or RGBA

1704 code, which will be used for each point's color recursively. For

1705 instance ['green','yellow'] all points will be filled in green or

1706 yellow, alternatively.

1707

1708 - A column name or position whose values will be used to color the

1709 marker points according to a colormap.

1710

1711 **kwargs

1712 Keyword arguments to pass on to :meth:`DataFrame.plot`.

1713

1714 Returns

1715 -------

1716 :class:`matplotlib.axes.Axes` or numpy.ndarray of them

1717

1718 See Also

1719 --------

1720 matplotlib.pyplot.scatter : Scatter plot using multiple input data

1721 formats.

1722

1723 Examples

1724 --------

1725 Let's see how to draw a scatter plot using coordinates from the values

1726 in a DataFrame's columns.

1727

1728 .. plot::

1729 :context: close-figs

1730

1731 >>> df = pd.DataFrame([[5.1, 3.5, 0], [4.9, 3.0, 0], [7.0, 3.2, 1],

1732 ... [6.4, 3.2, 1], [5.9, 3.0, 2]],

1733 ... columns=['length', 'width', 'species'])

1734 >>> ax1 = df.plot.scatter(x='length',

1735 ... y='width',

1736 ... c='DarkBlue')

1737

1738 And now with the color determined by a column as well.

1739

1740 .. plot::

1741 :context: close-figs

1742

1743 >>> ax2 = df.plot.scatter(x='length',

1744 ... y='width',

1745 ... c='species',

1746 ... colormap='viridis')

1747 """

1748 return self(kind="scatter", x=x, y=y, s=s, c=c, **kwargs)

1749

1750 def hexbin(

1751 self,

1752 x: Hashable,

1753 y: Hashable,

1754 C: Hashable | None = None,

1755 reduce_C_function: Callable | None = None,

1756 gridsize: int | tuple[int, int] | None = None,

1757 **kwargs,

1758 ) -> PlotAccessor:

1759 """

1760 Generate a hexagonal binning plot.

1761

1762 Generate a hexagonal binning plot of `x` versus `y`. If `C` is `None`

1763 (the default), this is a histogram of the number of occurrences

1764 of the observations at ``(x[i], y[i])``.

1765

1766 If `C` is specified, specifies values at given coordinates

1767 ``(x[i], y[i])``. These values are accumulated for each hexagonal

1768 bin and then reduced according to `reduce_C_function`,

1769 having as default the NumPy's mean function (:meth:`numpy.mean`).

1770 (If `C` is specified, it must also be a 1-D sequence

1771 of the same length as `x` and `y`, or a column label.)

1772

1773 Parameters

1774 ----------

1775 x : int or str

1776 The column label or position for x points.

1777 y : int or str

1778 The column label or position for y points.

1779 C : int or str, optional

1780 The column label or position for the value of `(x, y)` point.

1781 reduce_C_function : callable, default `np.mean`

1782 Function of one argument that reduces all the values in a bin to

1783 a single number (e.g. `np.mean`, `np.max`, `np.sum`, `np.std`).

1784 gridsize : int or tuple of (int, int), default 100

1785 The number of hexagons in the x-direction.

1786 The corresponding number of hexagons in the y-direction is

1787 chosen in a way that the hexagons are approximately regular.

1788 Alternatively, gridsize can be a tuple with two elements

1789 specifying the number of hexagons in the x-direction and the

1790 y-direction.

1791 **kwargs

1792 Additional keyword arguments are documented in

1793 :meth:`DataFrame.plot`.

1794

1795 Returns

1796 -------

1797 matplotlib.AxesSubplot

1798 The matplotlib ``Axes`` on which the hexbin is plotted.

1799

1800 See Also

1801 --------

1802 DataFrame.plot : Make plots of a DataFrame.

1803 matplotlib.pyplot.hexbin : Hexagonal binning plot using matplotlib,

1804 the matplotlib function that is used under the hood.

1805

1806 Examples

1807 --------

1808 The following examples are generated with random data from

1809 a normal distribution.

1810

1811 .. plot::

1812 :context: close-figs

1813

1814 >>> n = 10000

1815 >>> df = pd.DataFrame({'x': np.random.randn(n),

1816 ... 'y': np.random.randn(n)})

1817 >>> ax = df.plot.hexbin(x='x', y='y', gridsize=20)

1818

1819 The next example uses `C` and `np.sum` as `reduce_C_function`.

1820 Note that `'observations'` values ranges from 1 to 5 but the result

1821 plot shows values up to more than 25. This is because of the

1822 `reduce_C_function`.

1823

1824 .. plot::

1825 :context: close-figs

1826

1827 >>> n = 500

1828 >>> df = pd.DataFrame({

1829 ... 'coord_x': np.random.uniform(-3, 3, size=n),

1830 ... 'coord_y': np.random.uniform(30, 50, size=n),

1831 ... 'observations': np.random.randint(1,5, size=n)

1832 ... })

1833 >>> ax = df.plot.hexbin(x='coord_x',

1834 ... y='coord_y',

1835 ... C='observations',

1836 ... reduce_C_function=np.sum,

1837 ... gridsize=10,

1838 ... cmap="viridis")

1839 """

1840 if reduce_C_function is not None:

1841 kwargs["reduce_C_function"] = reduce_C_function

1842 if gridsize is not None:

1843 kwargs["gridsize"] = gridsize

1844

1845 return self(kind="hexbin", x=x, y=y, C=C, **kwargs)

1846

1847

1848_backends: dict[str, types.ModuleType] = {}

1849

1850

1851def _load_backend(backend: str) -> types.ModuleType:

1852 """

1853 Load a pandas plotting backend.

1854

1855 Parameters

1856 ----------

1857 backend : str

1858 The identifier for the backend. Either an entrypoint item registered

1859 with importlib.metadata, "matplotlib", or a module name.

1860

1861 Returns

1862 -------

1863 types.ModuleType

1864 The imported backend.

1865 """

1866 from importlib.metadata import entry_points

1867

1868 if backend == "matplotlib":

1869 # Because matplotlib is an optional dependency and first-party backend,

1870 # we need to attempt an import here to raise an ImportError if needed.

1871 try:

1872 module = importlib.import_module("pandas.plotting._matplotlib")

1873 except ImportError:

1874 raise ImportError(

1875 "matplotlib is required for plotting when the "

1876 'default backend "matplotlib" is selected.'

1877 ) from None

1878 return module

1879

1880 found_backend = False

1881

1882 eps = entry_points()

1883 key = "pandas_plotting_backends"

1884 # entry_points lost dict API ~ PY 3.10

1885 # https://github.com/python/importlib_metadata/issues/298

1886 if hasattr(eps, "select"):

1887 entry = eps.select(group=key)

1888 else:

1889 # Argument 2 to "get" of "dict" has incompatible type "Tuple[]";

1890 # expected "EntryPoints" [arg-type]

1891 entry = eps.get(key, ()) # type: ignore[arg-type]

1892 for entry_point in entry:

1893 found_backend = entry_point.name == backend

1894 if found_backend:

1895 module = entry_point.load()

1896 break

1897

1898 if not found_backend:

1899 # Fall back to unregistered, module name approach.

1900 try:

1901 module = importlib.import_module(backend)

1902 found_backend = True

1903 except ImportError:

1904 # We re-raise later on.

1905 pass

1906

1907 if found_backend:

1908 if hasattr(module, "plot"):

1909 # Validate that the interface is implemented when the option is set,

1910 # rather than at plot time.

1911 return module

1912

1913 raise ValueError(

1914 f"Could not find plotting backend '{backend}'. Ensure that you've "

1915 f"installed the package providing the '{backend}' entrypoint, or that "

1916 "the package has a top-level `.plot` method."

1917 )

1918

1919

1920def _get_plot_backend(backend: str | None = None):

1921 """

1922 Return the plotting backend to use (e.g. `pandas.plotting._matplotlib`).

1923

1924 The plotting system of pandas uses matplotlib by default, but the idea here

1925 is that it can also work with other third-party backends. This function

1926 returns the module which provides a top-level `.plot` method that will

1927 actually do the plotting. The backend is specified from a string, which

1928 either comes from the keyword argument `backend`, or, if not specified, from

1929 the option `pandas.options.plotting.backend`. All the rest of the code in

1930 this file uses the backend specified there for the plotting.

1931

1932 The backend is imported lazily, as matplotlib is a soft dependency, and

1933 pandas can be used without it being installed.

1934

1935 Notes

1936 -----

1937 Modifies `_backends` with imported backend as a side effect.

1938 """

1939 backend_str: str = backend or get_option("plotting.backend")

1940

1941 if backend_str in _backends:

1942 return _backends[backend_str]

1943

1944 module = _load_backend(backend_str)

1945 _backends[backend_str] = module

1946 return module

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/plotting/_core.py: 28%

194 statements