Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/plotting/

1from __future__ import annotations

3import importlib

4import types

5from typing import (

6 TYPE_CHECKING,

7 Sequence,

10from pandas._config import get_option

12from pandas._typing import IndexLabel

13from pandas.util._decorators import (

14 Appender,

15 Substitution,

16)

18from pandas.core.dtypes.common import (

19 is_integer,

20 is_list_like,

21)

22from pandas.core.dtypes.generic import (

23 ABCDataFrame,

24 ABCSeries,

25)

27from pandas.core.base import PandasObject

29if TYPE_CHECKING:

30 from matplotlib.axes import Axes

32 from pandas import DataFrame

35def hist_series(

36 self,

37 by=None,

38 ax=None,

39 grid: bool = True,

40 xlabelsize: int | None = None,

41 xrot: float | None = None,

42 ylabelsize: int | None = None,

43 yrot: float | None = None,

44 figsize: tuple[int, int] | None = None,

45 bins: int | Sequence[int] = 10,

46 backend: str | None = None,

47 legend: bool = False,

48 **kwargs,

49):

50 """

51 Draw histogram of the input series using matplotlib.

53 Parameters

54 ----------

55 by : object, optional

56 If passed, then used to form histograms for separate groups.

57 ax : matplotlib axis object

58 If not passed, uses gca().

59 grid : bool, default True

60 Whether to show axis grid lines.

61 xlabelsize : int, default None

62 If specified changes the x-axis label size.

63 xrot : float, default None

64 Rotation of x axis labels.

65 ylabelsize : int, default None

66 If specified changes the y-axis label size.

67 yrot : float, default None

68 Rotation of y axis labels.

69 figsize : tuple, default None

70 Figure size in inches by default.

71 bins : int or sequence, default 10

72 Number of histogram bins to be used. If an integer is given, bins + 1

73 bin edges are calculated and returned. If bins is a sequence, gives

74 bin edges, including left edge of first bin and right edge of last

75 bin. In this case, bins is returned unmodified.

76 backend : str, default None

77 Backend to use instead of the backend specified in the option

78 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to

79 specify the ``plotting.backend`` for the whole session, set

80 ``pd.options.plotting.backend``.

81 legend : bool, default False

82 Whether to show the legend.

84 .. versionadded:: 1.1.0

86 **kwargs

87 To be passed to the actual plotting function.

89 Returns

90 -------

91 matplotlib.AxesSubplot

92 A histogram plot.

94 See Also

95 --------

96 matplotlib.axes.Axes.hist : Plot a histogram using matplotlib.

97 """

98 plot_backend = _get_plot_backend(backend)

99 return plot_backend.hist_series(

100 self,

101 by=by,

102 ax=ax,

103 grid=grid,

104 xlabelsize=xlabelsize,

105 xrot=xrot,

106 ylabelsize=ylabelsize,

107 yrot=yrot,

108 figsize=figsize,

109 bins=bins,

110 legend=legend,

111 **kwargs,

112 )

113

114

115def hist_frame(

116 data: DataFrame,

117 column: IndexLabel = None,

118 by=None,

119 grid: bool = True,

120 xlabelsize: int | None = None,

121 xrot: float | None = None,

122 ylabelsize: int | None = None,

123 yrot: float | None = None,

124 ax=None,

125 sharex: bool = False,

126 sharey: bool = False,

127 figsize: tuple[int, int] | None = None,

128 layout: tuple[int, int] | None = None,

129 bins: int | Sequence[int] = 10,

130 backend: str | None = None,

131 legend: bool = False,

132 **kwargs,

133):

134 """

135 Make a histogram of the DataFrame's columns.

136

137 A `histogram`_ is a representation of the distribution of data.

138 This function calls :meth:`matplotlib.pyplot.hist`, on each series in

139 the DataFrame, resulting in one histogram per column.

140

141 .. _histogram: https://en.wikipedia.org/wiki/Histogram

142

143 Parameters

144 ----------

145 data : DataFrame

146 The pandas object holding the data.

147 column : str or sequence, optional

148 If passed, will be used to limit data to a subset of columns.

149 by : object, optional

150 If passed, then used to form histograms for separate groups.

151 grid : bool, default True

152 Whether to show axis grid lines.

153 xlabelsize : int, default None

154 If specified changes the x-axis label size.

155 xrot : float, default None

156 Rotation of x axis labels. For example, a value of 90 displays the

157 x labels rotated 90 degrees clockwise.

158 ylabelsize : int, default None

159 If specified changes the y-axis label size.

160 yrot : float, default None

161 Rotation of y axis labels. For example, a value of 90 displays the

162 y labels rotated 90 degrees clockwise.

163 ax : Matplotlib axes object, default None

164 The axes to plot the histogram on.

165 sharex : bool, default True if ax is None else False

166 In case subplots=True, share x axis and set some x axis labels to

167 invisible; defaults to True if ax is None otherwise False if an ax

168 is passed in.

169 Note that passing in both an ax and sharex=True will alter all x axis

170 labels for all subplots in a figure.

171 sharey : bool, default False

172 In case subplots=True, share y axis and set some y axis labels to

173 invisible.

174 figsize : tuple, optional

175 The size in inches of the figure to create. Uses the value in

176 `matplotlib.rcParams` by default.

177 layout : tuple, optional

178 Tuple of (rows, columns) for the layout of the histograms.

179 bins : int or sequence, default 10

180 Number of histogram bins to be used. If an integer is given, bins + 1

181 bin edges are calculated and returned. If bins is a sequence, gives

182 bin edges, including left edge of first bin and right edge of last

183 bin. In this case, bins is returned unmodified.

184

185 backend : str, default None

186 Backend to use instead of the backend specified in the option

187 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to

188 specify the ``plotting.backend`` for the whole session, set

189 ``pd.options.plotting.backend``.

190

191 legend : bool, default False

192 Whether to show the legend.

193

194 .. versionadded:: 1.1.0

195

196 **kwargs

197 All other plotting keyword arguments to be passed to

198 :meth:`matplotlib.pyplot.hist`.

199

200 Returns

201 -------

202 matplotlib.AxesSubplot or numpy.ndarray of them

203

204 See Also

205 --------

206 matplotlib.pyplot.hist : Plot a histogram using matplotlib.

207

208 Examples

209 --------

210 This example draws a histogram based on the length and width of

211 some animals, displayed in three bins

212

213 .. plot::

214 :context: close-figs

215

216 >>> df = pd.DataFrame({

217 ... 'length': [1.5, 0.5, 1.2, 0.9, 3],

218 ... 'width': [0.7, 0.2, 0.15, 0.2, 1.1]

219 ... }, index=['pig', 'rabbit', 'duck', 'chicken', 'horse'])

220 >>> hist = df.hist(bins=3)

221 """

222 plot_backend = _get_plot_backend(backend)

223 return plot_backend.hist_frame(

224 data,

225 column=column,

226 by=by,

227 grid=grid,

228 xlabelsize=xlabelsize,

229 xrot=xrot,

230 ylabelsize=ylabelsize,

231 yrot=yrot,

232 ax=ax,

233 sharex=sharex,

234 sharey=sharey,

235 figsize=figsize,

236 layout=layout,

237 legend=legend,

238 bins=bins,

239 **kwargs,

240 )

241

242

243_boxplot_doc = """

244Make a box plot from DataFrame columns.

245

246Make a box-and-whisker plot from DataFrame columns, optionally grouped

247by some other columns. A box plot is a method for graphically depicting

248groups of numerical data through their quartiles.

249The box extends from the Q1 to Q3 quartile values of the data,

250with a line at the median (Q2). The whiskers extend from the edges

251of box to show the range of the data. By default, they extend no more than

252`1.5 * IQR (IQR = Q3 - Q1)` from the edges of the box, ending at the farthest

253data point within that interval. Outliers are plotted as separate dots.

254

255For further details see

256Wikipedia's entry for `boxplot <https://en.wikipedia.org/wiki/Box_plot>`_.

257

258Parameters

259----------

260%(data)s\

261column : str or list of str, optional

262 Column name or list of names, or vector.

263 Can be any valid input to :meth:`pandas.DataFrame.groupby`.

264by : str or array-like, optional

265 Column in the DataFrame to :meth:`pandas.DataFrame.groupby`.

266 One box-plot will be done per value of columns in `by`.

267ax : object of class matplotlib.axes.Axes, optional

268 The matplotlib axes to be used by boxplot.

269fontsize : float or str

270 Tick label font size in points or as a string (e.g., `large`).

271rot : float, default 0

272 The rotation angle of labels (in degrees)

273 with respect to the screen coordinate system.

274grid : bool, default True

275 Setting this to True will show the grid.

276figsize : A tuple (width, height) in inches

277 The size of the figure to create in matplotlib.

278layout : tuple (rows, columns), optional

279 For example, (3, 5) will display the subplots

280 using 3 rows and 5 columns, starting from the top-left.

281return_type : {'axes', 'dict', 'both'} or None, default 'axes'

282 The kind of object to return. The default is ``axes``.

283

284 * 'axes' returns the matplotlib axes the boxplot is drawn on.

285 * 'dict' returns a dictionary whose values are the matplotlib

286 Lines of the boxplot.

287 * 'both' returns a namedtuple with the axes and dict.

288 * when grouping with ``by``, a Series mapping columns to

289 ``return_type`` is returned.

290

291 If ``return_type`` is `None`, a NumPy array

292 of axes with the same shape as ``layout`` is returned.

293%(backend)s\

294

295**kwargs

296 All other plotting keyword arguments to be passed to

297 :func:`matplotlib.pyplot.boxplot`.

298

299Returns

300-------

301result

302 See Notes.

303

304See Also

305--------

306pandas.Series.plot.hist: Make a histogram.

307matplotlib.pyplot.boxplot : Matplotlib equivalent plot.

308

309Notes

310-----

311The return type depends on the `return_type` parameter:

312

313* 'axes' : object of class matplotlib.axes.Axes

314* 'dict' : dict of matplotlib.lines.Line2D objects

315* 'both' : a namedtuple with structure (ax, lines)

316

317For data grouped with ``by``, return a Series of the above or a numpy

318array:

319

320* :class:`~pandas.Series`

321* :class:`~numpy.array` (for ``return_type = None``)

322

323Use ``return_type='dict'`` when you want to tweak the appearance

324of the lines after plotting. In this case a dict containing the Lines

325making up the boxes, caps, fliers, medians, and whiskers is returned.

326

327Examples

328--------

329

330Boxplots can be created for every column in the dataframe

331by ``df.boxplot()`` or indicating the columns to be used:

332

333.. plot::

334 :context: close-figs

335

336 >>> np.random.seed(1234)

337 >>> df = pd.DataFrame(np.random.randn(10, 4),

338 ... columns=['Col1', 'Col2', 'Col3', 'Col4'])

339 >>> boxplot = df.boxplot(column=['Col1', 'Col2', 'Col3']) # doctest: +SKIP

340

341Boxplots of variables distributions grouped by the values of a third

342variable can be created using the option ``by``. For instance:

343

344.. plot::

345 :context: close-figs

346

347 >>> df = pd.DataFrame(np.random.randn(10, 2),

348 ... columns=['Col1', 'Col2'])

349 >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A',

350 ... 'B', 'B', 'B', 'B', 'B'])

351 >>> boxplot = df.boxplot(by='X')

352

353A list of strings (i.e. ``['X', 'Y']``) can be passed to boxplot

354in order to group the data by combination of the variables in the x-axis:

355

356.. plot::

357 :context: close-figs

358

359 >>> df = pd.DataFrame(np.random.randn(10, 3),

360 ... columns=['Col1', 'Col2', 'Col3'])

361 >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A',

362 ... 'B', 'B', 'B', 'B', 'B'])

363 >>> df['Y'] = pd.Series(['A', 'B', 'A', 'B', 'A',

364 ... 'B', 'A', 'B', 'A', 'B'])

365 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by=['X', 'Y'])

366

367The layout of boxplot can be adjusted giving a tuple to ``layout``:

368

369.. plot::

370 :context: close-figs

371

372 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X',

373 ... layout=(2, 1))

374

375Additional formatting can be done to the boxplot, like suppressing the grid

376(``grid=False``), rotating the labels in the x-axis (i.e. ``rot=45``)

377or changing the fontsize (i.e. ``fontsize=15``):

378

379.. plot::

380 :context: close-figs

381

382 >>> boxplot = df.boxplot(grid=False, rot=45, fontsize=15) # doctest: +SKIP

383

384The parameter ``return_type`` can be used to select the type of element

385returned by `boxplot`. When ``return_type='axes'`` is selected,

386the matplotlib axes on which the boxplot is drawn are returned:

387

388 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], return_type='axes')

389 >>> type(boxplot)

390 <class 'matplotlib.axes._subplots.AxesSubplot'>

391

392When grouping with ``by``, a Series mapping columns to ``return_type``

393is returned:

394

395 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X',

396 ... return_type='axes')

397 >>> type(boxplot)

398 <class 'pandas.core.series.Series'>

399

400If ``return_type`` is `None`, a NumPy array of axes with the same shape

401as ``layout`` is returned:

402

403 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X',

404 ... return_type=None)

405 >>> type(boxplot)

406 <class 'numpy.ndarray'>

407"""

408

409_backend_doc = """\

410backend : str, default None

411 Backend to use instead of the backend specified in the option

412 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to

413 specify the ``plotting.backend`` for the whole session, set

414 ``pd.options.plotting.backend``.

415"""

416

417

418_bar_or_line_doc = """

419 Parameters

420 ----------

421 x : label or position, optional

422 Allows plotting of one column versus another. If not specified,

423 the index of the DataFrame is used.

424 y : label or position, optional

425 Allows plotting of one column versus another. If not specified,

426 all numerical columns are used.

427 color : str, array-like, or dict, optional

428 The color for each of the DataFrame's columns. Possible values are:

429

430 - A single color string referred to by name, RGB or RGBA code,

431 for instance 'red' or '#a98d19'.

432

433 - A sequence of color strings referred to by name, RGB or RGBA

434 code, which will be used for each column recursively. For

435 instance ['green','yellow'] each column's %(kind)s will be filled in

436 green or yellow, alternatively. If there is only a single column to

437 be plotted, then only the first color from the color list will be

438 used.

439

440 - A dict of the form {column name : color}, so that each column will be

441 colored accordingly. For example, if your columns are called `a` and

442 `b`, then passing {'a': 'green', 'b': 'red'} will color %(kind)ss for

443 column `a` in green and %(kind)ss for column `b` in red.

444

445 .. versionadded:: 1.1.0

446

447 **kwargs

448 Additional keyword arguments are documented in

449 :meth:`DataFrame.plot`.

450

451 Returns

452 -------

453 matplotlib.axes.Axes or np.ndarray of them

454 An ndarray is returned with one :class:`matplotlib.axes.Axes`

455 per column when ``subplots=True``.

456"""

457

458

459@Substitution(data="data : DataFrame\n The data to visualize.\n", backend="")

460@Appender(_boxplot_doc)

461def boxplot(

462 data: DataFrame,

463 column: str | list[str] | None = None,

464 by: str | list[str] | None = None,

465 ax: Axes | None = None,

466 fontsize: float | str | None = None,

467 rot: int = 0,

468 grid: bool = True,

469 figsize: tuple[float, float] | None = None,

470 layout: tuple[int, int] | None = None,

471 return_type: str | None = None,

472 **kwargs,

473):

474 plot_backend = _get_plot_backend("matplotlib")

475 return plot_backend.boxplot(

476 data,

477 column=column,

478 by=by,

479 ax=ax,

480 fontsize=fontsize,

481 rot=rot,

482 grid=grid,

483 figsize=figsize,

484 layout=layout,

485 return_type=return_type,

486 **kwargs,

487 )

488

489

490@Substitution(data="", backend=_backend_doc)

491@Appender(_boxplot_doc)

492def boxplot_frame(

493 self,

494 column=None,

495 by=None,

496 ax=None,

497 fontsize=None,

498 rot: int = 0,

499 grid: bool = True,

500 figsize=None,

501 layout=None,

502 return_type=None,

503 backend=None,

504 **kwargs,

505):

506 plot_backend = _get_plot_backend(backend)

507 return plot_backend.boxplot_frame(

508 self,

509 column=column,

510 by=by,

511 ax=ax,

512 fontsize=fontsize,

513 rot=rot,

514 grid=grid,

515 figsize=figsize,

516 layout=layout,

517 return_type=return_type,

518 **kwargs,

519 )

520

521

522def boxplot_frame_groupby(

523 grouped,

524 subplots: bool = True,

525 column=None,

526 fontsize=None,

527 rot: int = 0,

528 grid: bool = True,

529 ax=None,

530 figsize=None,

531 layout=None,

532 sharex: bool = False,

533 sharey: bool = True,

534 backend=None,

535 **kwargs,

536):

537 """

538 Make box plots from DataFrameGroupBy data.

539

540 Parameters

541 ----------

542 grouped : Grouped DataFrame

543 subplots : bool

544 * ``False`` - no subplots will be used

545 * ``True`` - create a subplot for each group.

546

547 column : column name or list of names, or vector

548 Can be any valid input to groupby.

549 fontsize : float or str

550 rot : label rotation angle

551 grid : Setting this to True will show the grid

552 ax : Matplotlib axis object, default None

553 figsize : A tuple (width, height) in inches

554 layout : tuple (optional)

555 The layout of the plot: (rows, columns).

556 sharex : bool, default False

557 Whether x-axes will be shared among subplots.

558 sharey : bool, default True

559 Whether y-axes will be shared among subplots.

560 backend : str, default None

561 Backend to use instead of the backend specified in the option

562 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to

563 specify the ``plotting.backend`` for the whole session, set

564 ``pd.options.plotting.backend``.

565 **kwargs

566 All other plotting keyword arguments to be passed to

567 matplotlib's boxplot function.

568

569 Returns

570 -------

571 dict of key/value = group key/DataFrame.boxplot return value

572 or DataFrame.boxplot return value in case subplots=figures=False

573

574 Examples

575 --------

576 You can create boxplots for grouped data and show them as separate subplots:

577

578 .. plot::

579 :context: close-figs

580

581 >>> import itertools

582 >>> tuples = [t for t in itertools.product(range(1000), range(4))]

583 >>> index = pd.MultiIndex.from_tuples(tuples, names=['lvl0', 'lvl1'])

584 >>> data = np.random.randn(len(index),4)

585 >>> df = pd.DataFrame(data, columns=list('ABCD'), index=index)

586 >>> grouped = df.groupby(level='lvl1')

587 >>> grouped.boxplot(rot=45, fontsize=12, figsize=(8,10)) # doctest: +SKIP

588

589 The ``subplots=False`` option shows the boxplots in a single figure.

590

591 .. plot::

592 :context: close-figs

593

594 >>> grouped.boxplot(subplots=False, rot=45, fontsize=12) # doctest: +SKIP

595 """

596 plot_backend = _get_plot_backend(backend)

597 return plot_backend.boxplot_frame_groupby(

598 grouped,

599 subplots=subplots,

600 column=column,

601 fontsize=fontsize,

602 rot=rot,

603 grid=grid,

604 ax=ax,

605 figsize=figsize,

606 layout=layout,

607 sharex=sharex,

608 sharey=sharey,

609 **kwargs,

610 )

611

612

613class PlotAccessor(PandasObject):

614 """

615 Make plots of Series or DataFrame.

616

617 Uses the backend specified by the

618 option ``plotting.backend``. By default, matplotlib is used.

619

620 Parameters

621 ----------

622 data : Series or DataFrame

623 The object for which the method is called.

624 x : label or position, default None

625 Only used if data is a DataFrame.

626 y : label, position or list of label, positions, default None

627 Allows plotting of one column versus another. Only used if data is a

628 DataFrame.

629 kind : str

630 The kind of plot to produce:

631

632 - 'line' : line plot (default)

633 - 'bar' : vertical bar plot

634 - 'barh' : horizontal bar plot

635 - 'hist' : histogram

636 - 'box' : boxplot

637 - 'kde' : Kernel Density Estimation plot

638 - 'density' : same as 'kde'

639 - 'area' : area plot

640 - 'pie' : pie plot

641 - 'scatter' : scatter plot (DataFrame only)

642 - 'hexbin' : hexbin plot (DataFrame only)

643 ax : matplotlib axes object, default None

644 An axes of the current figure.

645 subplots : bool or sequence of iterables, default False

646 Whether to group columns into subplots:

647

648 - ``False`` : No subplots will be used

649 - ``True`` : Make separate subplots for each column.

650 - sequence of iterables of column labels: Create a subplot for each

651 group of columns. For example `[('a', 'c'), ('b', 'd')]` will

652 create 2 subplots: one with columns 'a' and 'c', and one

653 with columns 'b' and 'd'. Remaining columns that aren't specified

654 will be plotted in additional subplots (one per column).

655

656 .. versionadded:: 1.5.0

657

658 sharex : bool, default True if ax is None else False

659 In case ``subplots=True``, share x axis and set some x axis labels

660 to invisible; defaults to True if ax is None otherwise False if

661 an ax is passed in; Be aware, that passing in both an ax and

662 ``sharex=True`` will alter all x axis labels for all axis in a figure.

663 sharey : bool, default False

664 In case ``subplots=True``, share y axis and set some y axis labels to invisible.

665 layout : tuple, optional

666 (rows, columns) for the layout of subplots.

667 figsize : a tuple (width, height) in inches

668 Size of a figure object.

669 use_index : bool, default True

670 Use index as ticks for x axis.

671 title : str or list

672 Title to use for the plot. If a string is passed, print the string

673 at the top of the figure. If a list is passed and `subplots` is

674 True, print each item in the list above the corresponding subplot.

675 grid : bool, default None (matlab style default)

676 Axis grid lines.

677 legend : bool or {'reverse'}

678 Place legend on axis subplots.

679 style : list or dict

680 The matplotlib line style per column.

681 logx : bool or 'sym', default False

682 Use log scaling or symlog scaling on x axis.

683

684 logy : bool or 'sym' default False

685 Use log scaling or symlog scaling on y axis.

686

687 loglog : bool or 'sym', default False

688 Use log scaling or symlog scaling on both x and y axes.

689

690 xticks : sequence

691 Values to use for the xticks.

692 yticks : sequence

693 Values to use for the yticks.

694 xlim : 2-tuple/list

695 Set the x limits of the current axes.

696 ylim : 2-tuple/list

697 Set the y limits of the current axes.

698 xlabel : label, optional

699 Name to use for the xlabel on x-axis. Default uses index name as xlabel, or the

700 x-column name for planar plots.

701

702 .. versionadded:: 1.1.0

703

704 .. versionchanged:: 1.2.0

705

706 Now applicable to planar plots (`scatter`, `hexbin`).

707

708 .. versionchanged:: 2.0.0

709

710 Now applicable to histograms.

711

712 ylabel : label, optional

713 Name to use for the ylabel on y-axis. Default will show no ylabel, or the

714 y-column name for planar plots.

715

716 .. versionadded:: 1.1.0

717

718 .. versionchanged:: 1.2.0

719

720 Now applicable to planar plots (`scatter`, `hexbin`).

721

722 .. versionchanged:: 2.0.0

723

724 Now applicable to histograms.

725

726 rot : float, default None

727 Rotation for ticks (xticks for vertical, yticks for horizontal

728 plots).

729 fontsize : float, default None

730 Font size for xticks and yticks.

731 colormap : str or matplotlib colormap object, default None

732 Colormap to select colors from. If string, load colormap with that

733 name from matplotlib.

734 colorbar : bool, optional

735 If True, plot colorbar (only relevant for 'scatter' and 'hexbin'

736 plots).

737 position : float

738 Specify relative alignments for bar plot layout.

739 From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5

740 (center).

741 table : bool, Series or DataFrame, default False

742 If True, draw a table using the data in the DataFrame and the data

743 will be transposed to meet matplotlib's default layout.

744 If a Series or DataFrame is passed, use passed data to draw a

745 table.

746 yerr : DataFrame, Series, array-like, dict and str

747 See :ref:`Plotting with Error Bars <visualization.errorbars>` for

748 detail.

749 xerr : DataFrame, Series, array-like, dict and str

750 Equivalent to yerr.

751 stacked : bool, default False in line and bar plots, and True in area plot

752 If True, create stacked plot.

753 secondary_y : bool or sequence, default False

754 Whether to plot on the secondary y-axis if a list/tuple, which

755 columns to plot on secondary y-axis.

756 mark_right : bool, default True

757 When using a secondary_y axis, automatically mark the column

758 labels with "(right)" in the legend.

759 include_bool : bool, default is False

760 If True, boolean values can be plotted.

761 backend : str, default None

762 Backend to use instead of the backend specified in the option

763 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to

764 specify the ``plotting.backend`` for the whole session, set

765 ``pd.options.plotting.backend``.

766 **kwargs

767 Options to pass to matplotlib plotting method.

768

769 Returns

770 -------

771 :class:`matplotlib.axes.Axes` or numpy.ndarray of them

772 If the backend is not the default matplotlib one, the return value

773 will be the object returned by the backend.

774

775 Notes

776 -----

777 - See matplotlib documentation online for more on this subject

778 - If `kind` = 'bar' or 'barh', you can specify relative alignments

779 for bar plot layout by `position` keyword.

780 From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5

781 (center)

782 """

783

784 _common_kinds = ("line", "bar", "barh", "kde", "density", "area", "hist", "box")

785 _series_kinds = ("pie",)

786 _dataframe_kinds = ("scatter", "hexbin")

787 _kind_aliases = {"density": "kde"}

788 _all_kinds = _common_kinds + _series_kinds + _dataframe_kinds

789

790 def __init__(self, data) -> None:

791 self._parent = data

792

793 @staticmethod

794 def _get_call_args(backend_name, data, args, kwargs):

795 """

796 This function makes calls to this accessor `__call__` method compatible

797 with the previous `SeriesPlotMethods.__call__` and

798 `DataFramePlotMethods.__call__`. Those had slightly different

799 signatures, since `DataFramePlotMethods` accepted `x` and `y`

800 parameters.

801 """

802 if isinstance(data, ABCSeries):

803 arg_def = [

804 ("kind", "line"),

805 ("ax", None),

806 ("figsize", None),

807 ("use_index", True),

808 ("title", None),

809 ("grid", None),

810 ("legend", False),

811 ("style", None),

812 ("logx", False),

813 ("logy", False),

814 ("loglog", False),

815 ("xticks", None),

816 ("yticks", None),

817 ("xlim", None),

818 ("ylim", None),

819 ("rot", None),

820 ("fontsize", None),

821 ("colormap", None),

822 ("table", False),

823 ("yerr", None),

824 ("xerr", None),

825 ("label", None),

826 ("secondary_y", False),

827 ("xlabel", None),

828 ("ylabel", None),

829 ]

830 elif isinstance(data, ABCDataFrame):

831 arg_def = [

832 ("x", None),

833 ("y", None),

834 ("kind", "line"),

835 ("ax", None),

836 ("subplots", False),

837 ("sharex", None),

838 ("sharey", False),

839 ("layout", None),

840 ("figsize", None),

841 ("use_index", True),

842 ("title", None),

843 ("grid", None),

844 ("legend", True),

845 ("style", None),

846 ("logx", False),

847 ("logy", False),

848 ("loglog", False),

849 ("xticks", None),

850 ("yticks", None),

851 ("xlim", None),

852 ("ylim", None),

853 ("rot", None),

854 ("fontsize", None),

855 ("colormap", None),

856 ("table", False),

857 ("yerr", None),

858 ("xerr", None),

859 ("secondary_y", False),

860 ("xlabel", None),

861 ("ylabel", None),

862 ]

863 else:

864 raise TypeError(

865 f"Called plot accessor for type {type(data).__name__}, "

866 "expected Series or DataFrame"

867 )

868

869 if args and isinstance(data, ABCSeries):

870 positional_args = str(args)[1:-1]

871 keyword_args = ", ".join(

872 [f"{name}={repr(value)}" for (name, _), value in zip(arg_def, args)]

873 )

874 msg = (

875 "`Series.plot()` should not be called with positional "

876 "arguments, only keyword arguments. The order of "

877 "positional arguments will change in the future. "

878 f"Use `Series.plot({keyword_args})` instead of "

879 f"`Series.plot({positional_args})`."

880 )

881 raise TypeError(msg)

882

883 pos_args = {name: value for (name, _), value in zip(arg_def, args)}

884 if backend_name == "pandas.plotting._matplotlib":

885 kwargs = dict(arg_def, **pos_args, **kwargs)

886 else:

887 kwargs = dict(pos_args, **kwargs)

888

889 x = kwargs.pop("x", None)

890 y = kwargs.pop("y", None)

891 kind = kwargs.pop("kind", "line")

892 return x, y, kind, kwargs

893

894 def __call__(self, *args, **kwargs):

895 plot_backend = _get_plot_backend(kwargs.pop("backend", None))

896

897 x, y, kind, kwargs = self._get_call_args(

898 plot_backend.__name__, self._parent, args, kwargs

899 )

900

901 kind = self._kind_aliases.get(kind, kind)

902

903 # when using another backend, get out of the way

904 if plot_backend.__name__ != "pandas.plotting._matplotlib":

905 return plot_backend.plot(self._parent, x=x, y=y, kind=kind, **kwargs)

906

907 if kind not in self._all_kinds:

908 raise ValueError(f"{kind} is not a valid plot kind")

909

910 # The original data structured can be transformed before passed to the

911 # backend. For example, for DataFrame is common to set the index as the

912 # `x` parameter, and return a Series with the parameter `y` as values.

913 data = self._parent.copy()

914

915 if isinstance(data, ABCSeries):

916 kwargs["reuse_plot"] = True

917

918 if kind in self._dataframe_kinds:

919 if isinstance(data, ABCDataFrame):

920 return plot_backend.plot(data, x=x, y=y, kind=kind, **kwargs)

921 else:

922 raise ValueError(f"plot kind {kind} can only be used for data frames")

923 elif kind in self._series_kinds:

924 if isinstance(data, ABCDataFrame):

925 if y is None and kwargs.get("subplots") is False:

926 raise ValueError(

927 f"{kind} requires either y column or 'subplots=True'"

928 )

929 if y is not None:

930 if is_integer(y) and not data.columns._holds_integer():

931 y = data.columns[y]

932 # converted to series actually. copy to not modify

933 data = data[y].copy()

934 data.index.name = y

935 elif isinstance(data, ABCDataFrame):

936 data_cols = data.columns

937 if x is not None:

938 if is_integer(x) and not data.columns._holds_integer():

939 x = data_cols[x]

940 elif not isinstance(data[x], ABCSeries):

941 raise ValueError("x must be a label or position")

942 data = data.set_index(x)

943 if y is not None:

944 # check if we have y as int or list of ints

945 int_ylist = is_list_like(y) and all(is_integer(c) for c in y)

946 int_y_arg = is_integer(y) or int_ylist

947 if int_y_arg and not data.columns._holds_integer():

948 y = data_cols[y]

949

950 label_kw = kwargs["label"] if "label" in kwargs else False

951 for kw in ["xerr", "yerr"]:

952 if kw in kwargs and (

953 isinstance(kwargs[kw], str) or is_integer(kwargs[kw])

954 ):

955 try:

956 kwargs[kw] = data[kwargs[kw]]

957 except (IndexError, KeyError, TypeError):

958 pass

959

960 # don't overwrite

961 data = data[y].copy()

962

963 if isinstance(data, ABCSeries):

964 label_name = label_kw or y

965 data.name = label_name

966 else:

967 match = is_list_like(label_kw) and len(label_kw) == len(y)

968 if label_kw and not match:

969 raise ValueError(

970 "label should be list-like and same length as y"

971 )

972 label_name = label_kw or data.columns

973 data.columns = label_name

974

975 return plot_backend.plot(data, kind=kind, **kwargs)

976

977 __call__.__doc__ = __doc__

978

979 @Appender(

980 """

981 See Also

982 --------

983 matplotlib.pyplot.plot : Plot y versus x as lines and/or markers.

984

985 Examples

986 --------

987

988 .. plot::

989 :context: close-figs

990

991 >>> s = pd.Series([1, 3, 2])

992 >>> s.plot.line()

993 <AxesSubplot: ylabel='Density'>

994

995 .. plot::

996 :context: close-figs

997

998 The following example shows the populations for some animals

999 over the years.

1000

1001 >>> df = pd.DataFrame({

1002 ... 'pig': [20, 18, 489, 675, 1776],

1003 ... 'horse': [4, 25, 281, 600, 1900]

1004 ... }, index=[1990, 1997, 2003, 2009, 2014])

1005 >>> lines = df.plot.line()

1006

1007 .. plot::

1008 :context: close-figs

1009

1010 An example with subplots, so an array of axes is returned.

1011

1012 >>> axes = df.plot.line(subplots=True)

1013 >>> type(axes)

1014 <class 'numpy.ndarray'>

1015

1016 .. plot::

1017 :context: close-figs

1018

1019 Let's repeat the same example, but specifying colors for

1020 each column (in this case, for each animal).

1021

1022 >>> axes = df.plot.line(

1023 ... subplots=True, color={"pig": "pink", "horse": "#742802"}

1024 ... )

1025

1026 .. plot::

1027 :context: close-figs

1028

1029 The following example shows the relationship between both

1030 populations.

1031

1032 >>> lines = df.plot.line(x='pig', y='horse')

1033 """

1034 )

1035 @Substitution(kind="line")

1036 @Appender(_bar_or_line_doc)

1037 def line(self, x=None, y=None, **kwargs) -> PlotAccessor:

1038 """

1039 Plot Series or DataFrame as lines.

1040

1041 This function is useful to plot lines using DataFrame's values

1042 as coordinates.

1043 """

1044 return self(kind="line", x=x, y=y, **kwargs)

1045

1046 @Appender(

1047 """

1048 See Also

1049 --------

1050 DataFrame.plot.barh : Horizontal bar plot.

1051 DataFrame.plot : Make plots of a DataFrame.

1052 matplotlib.pyplot.bar : Make a bar plot with matplotlib.

1053

1054 Examples

1055 --------

1056 Basic plot.

1057

1058 .. plot::

1059 :context: close-figs

1060

1061 >>> df = pd.DataFrame({'lab':['A', 'B', 'C'], 'val':[10, 30, 20]})

1062 >>> ax = df.plot.bar(x='lab', y='val', rot=0)

1063

1064 Plot a whole dataframe to a bar plot. Each column is assigned a

1065 distinct color, and each row is nested in a group along the

1066 horizontal axis.

1067

1068 .. plot::

1069 :context: close-figs

1070

1071 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]

1072 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]

1073 >>> index = ['snail', 'pig', 'elephant',

1074 ... 'rabbit', 'giraffe', 'coyote', 'horse']

1075 >>> df = pd.DataFrame({'speed': speed,

1076 ... 'lifespan': lifespan}, index=index)

1077 >>> ax = df.plot.bar(rot=0)

1078

1079 Plot stacked bar charts for the DataFrame

1080

1081 .. plot::

1082 :context: close-figs

1083

1084 >>> ax = df.plot.bar(stacked=True)

1085

1086 Instead of nesting, the figure can be split by column with

1087 ``subplots=True``. In this case, a :class:`numpy.ndarray` of

1088 :class:`matplotlib.axes.Axes` are returned.

1089

1090 .. plot::

1091 :context: close-figs

1092

1093 >>> axes = df.plot.bar(rot=0, subplots=True)

1094 >>> axes[1].legend(loc=2) # doctest: +SKIP

1095

1096 If you don't like the default colours, you can specify how you'd

1097 like each column to be colored.

1098

1099 .. plot::

1100 :context: close-figs

1101

1102 >>> axes = df.plot.bar(

1103 ... rot=0, subplots=True, color={"speed": "red", "lifespan": "green"}

1104 ... )

1105 >>> axes[1].legend(loc=2) # doctest: +SKIP

1106

1107 Plot a single column.

1108

1109 .. plot::

1110 :context: close-figs

1111

1112 >>> ax = df.plot.bar(y='speed', rot=0)

1113

1114 Plot only selected categories for the DataFrame.

1115

1116 .. plot::

1117 :context: close-figs

1118

1119 >>> ax = df.plot.bar(x='lifespan', rot=0)

1120 """

1121 )

1122 @Substitution(kind="bar")

1123 @Appender(_bar_or_line_doc)

1124 def bar( # pylint: disable=disallowed-name

1125 self, x=None, y=None, **kwargs

1126 ) -> PlotAccessor:

1127 """

1128 Vertical bar plot.

1129

1130 A bar plot is a plot that presents categorical data with

1131 rectangular bars with lengths proportional to the values that they

1132 represent. A bar plot shows comparisons among discrete categories. One

1133 axis of the plot shows the specific categories being compared, and the

1134 other axis represents a measured value.

1135 """

1136 return self(kind="bar", x=x, y=y, **kwargs)

1137

1138 @Appender(

1139 """

1140 See Also

1141 --------

1142 DataFrame.plot.bar: Vertical bar plot.

1143 DataFrame.plot : Make plots of DataFrame using matplotlib.

1144 matplotlib.axes.Axes.bar : Plot a vertical bar plot using matplotlib.

1145

1146 Examples

1147 --------

1148 Basic example

1149

1150 .. plot::

1151 :context: close-figs

1152

1153 >>> df = pd.DataFrame({'lab': ['A', 'B', 'C'], 'val': [10, 30, 20]})

1154 >>> ax = df.plot.barh(x='lab', y='val')

1155

1156 Plot a whole DataFrame to a horizontal bar plot

1157

1158 .. plot::

1159 :context: close-figs

1160

1161 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]

1162 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]

1163 >>> index = ['snail', 'pig', 'elephant',

1164 ... 'rabbit', 'giraffe', 'coyote', 'horse']

1165 >>> df = pd.DataFrame({'speed': speed,

1166 ... 'lifespan': lifespan}, index=index)

1167 >>> ax = df.plot.barh()

1168

1169 Plot stacked barh charts for the DataFrame

1170

1171 .. plot::

1172 :context: close-figs

1173

1174 >>> ax = df.plot.barh(stacked=True)

1175

1176 We can specify colors for each column

1177

1178 .. plot::

1179 :context: close-figs

1180

1181 >>> ax = df.plot.barh(color={"speed": "red", "lifespan": "green"})

1182

1183 Plot a column of the DataFrame to a horizontal bar plot

1184

1185 .. plot::

1186 :context: close-figs

1187

1188 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]

1189 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]

1190 >>> index = ['snail', 'pig', 'elephant',

1191 ... 'rabbit', 'giraffe', 'coyote', 'horse']

1192 >>> df = pd.DataFrame({'speed': speed,

1193 ... 'lifespan': lifespan}, index=index)

1194 >>> ax = df.plot.barh(y='speed')

1195

1196 Plot DataFrame versus the desired column

1197

1198 .. plot::

1199 :context: close-figs

1200

1201 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]

1202 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]

1203 >>> index = ['snail', 'pig', 'elephant',

1204 ... 'rabbit', 'giraffe', 'coyote', 'horse']

1205 >>> df = pd.DataFrame({'speed': speed,

1206 ... 'lifespan': lifespan}, index=index)

1207 >>> ax = df.plot.barh(x='lifespan')

1208 """

1209 )

1210 @Substitution(kind="bar")

1211 @Appender(_bar_or_line_doc)

1212 def barh(self, x=None, y=None, **kwargs) -> PlotAccessor:

1213 """

1214 Make a horizontal bar plot.

1215

1216 A horizontal bar plot is a plot that presents quantitative data with

1217 rectangular bars with lengths proportional to the values that they

1218 represent. A bar plot shows comparisons among discrete categories. One

1219 axis of the plot shows the specific categories being compared, and the

1220 other axis represents a measured value.

1221 """

1222 return self(kind="barh", x=x, y=y, **kwargs)

1223

1224 def box(self, by=None, **kwargs) -> PlotAccessor:

1225 r"""

1226 Make a box plot of the DataFrame columns.

1227

1228 A box plot is a method for graphically depicting groups of numerical

1229 data through their quartiles.

1230 The box extends from the Q1 to Q3 quartile values of the data,

1231 with a line at the median (Q2). The whiskers extend from the edges

1232 of box to show the range of the data. The position of the whiskers

1233 is set by default to 1.5*IQR (IQR = Q3 - Q1) from the edges of the

1234 box. Outlier points are those past the end of the whiskers.

1235

1236 For further details see Wikipedia's

1237 entry for `boxplot <https://en.wikipedia.org/wiki/Box_plot>`__.

1238

1239 A consideration when using this chart is that the box and the whiskers

1240 can overlap, which is very common when plotting small sets of data.

1241

1242 Parameters

1243 ----------

1244 by : str or sequence

1245 Column in the DataFrame to group by.

1246

1247 .. versionchanged:: 1.4.0

1248

1249 Previously, `by` is silently ignore and makes no groupings

1250

1251 **kwargs

1252 Additional keywords are documented in

1253 :meth:`DataFrame.plot`.

1254

1255 Returns

1256 -------

1257 :class:`matplotlib.axes.Axes` or numpy.ndarray of them

1258

1259 See Also

1260 --------

1261 DataFrame.boxplot: Another method to draw a box plot.

1262 Series.plot.box: Draw a box plot from a Series object.

1263 matplotlib.pyplot.boxplot: Draw a box plot in matplotlib.

1264

1265 Examples

1266 --------

1267 Draw a box plot from a DataFrame with four columns of randomly

1268 generated data.

1269

1270 .. plot::

1271 :context: close-figs

1272

1273 >>> data = np.random.randn(25, 4)

1274 >>> df = pd.DataFrame(data, columns=list('ABCD'))

1275 >>> ax = df.plot.box()

1276

1277 You can also generate groupings if you specify the `by` parameter (which

1278 can take a column name, or a list or tuple of column names):

1279

1280 .. versionchanged:: 1.4.0

1281

1282 .. plot::

1283 :context: close-figs

1284

1285 >>> age_list = [8, 10, 12, 14, 72, 74, 76, 78, 20, 25, 30, 35, 60, 85]

1286 >>> df = pd.DataFrame({"gender": list("MMMMMMMMFFFFFF"), "age": age_list})

1287 >>> ax = df.plot.box(column="age", by="gender", figsize=(10, 8))

1288 """

1289 return self(kind="box", by=by, **kwargs)

1290

1291 def hist(self, by=None, bins: int = 10, **kwargs) -> PlotAccessor:

1292 """

1293 Draw one histogram of the DataFrame's columns.

1294

1295 A histogram is a representation of the distribution of data.

1296 This function groups the values of all given Series in the DataFrame

1297 into bins and draws all bins in one :class:`matplotlib.axes.Axes`.

1298 This is useful when the DataFrame's Series are in a similar scale.

1299

1300 Parameters

1301 ----------

1302 by : str or sequence, optional

1303 Column in the DataFrame to group by.

1304

1305 .. versionchanged:: 1.4.0

1306

1307 Previously, `by` is silently ignore and makes no groupings

1308

1309 bins : int, default 10

1310 Number of histogram bins to be used.

1311 **kwargs

1312 Additional keyword arguments are documented in

1313 :meth:`DataFrame.plot`.

1314

1315 Returns

1316 -------

1317 class:`matplotlib.AxesSubplot`

1318 Return a histogram plot.

1319

1320 See Also

1321 --------

1322 DataFrame.hist : Draw histograms per DataFrame's Series.

1323 Series.hist : Draw a histogram with Series' data.

1324

1325 Examples

1326 --------

1327 When we roll a die 6000 times, we expect to get each value around 1000

1328 times. But when we roll two dice and sum the result, the distribution

1329 is going to be quite different. A histogram illustrates those

1330 distributions.

1331

1332 .. plot::

1333 :context: close-figs

1334

1335 >>> df = pd.DataFrame(

1336 ... np.random.randint(1, 7, 6000),

1337 ... columns = ['one'])

1338 >>> df['two'] = df['one'] + np.random.randint(1, 7, 6000)

1339 >>> ax = df.plot.hist(bins=12, alpha=0.5)

1340

1341 A grouped histogram can be generated by providing the parameter `by` (which

1342 can be a column name, or a list of column names):

1343

1344 .. plot::

1345 :context: close-figs

1346

1347 >>> age_list = [8, 10, 12, 14, 72, 74, 76, 78, 20, 25, 30, 35, 60, 85]

1348 >>> df = pd.DataFrame({"gender": list("MMMMMMMMFFFFFF"), "age": age_list})

1349 >>> ax = df.plot.hist(column=["age"], by="gender", figsize=(10, 8))

1350 """

1351 return self(kind="hist", by=by, bins=bins, **kwargs)

1352

1353 def kde(self, bw_method=None, ind=None, **kwargs) -> PlotAccessor:

1354 """

1355 Generate Kernel Density Estimate plot using Gaussian kernels.

1356

1357 In statistics, `kernel density estimation`_ (KDE) is a non-parametric

1358 way to estimate the probability density function (PDF) of a random

1359 variable. This function uses Gaussian kernels and includes automatic

1360 bandwidth determination.

1361

1362 .. _kernel density estimation:

1363 https://en.wikipedia.org/wiki/Kernel_density_estimation

1364

1365 Parameters

1366 ----------

1367 bw_method : str, scalar or callable, optional

1368 The method used to calculate the estimator bandwidth. This can be

1369 'scott', 'silverman', a scalar constant or a callable.

1370 If None (default), 'scott' is used.

1371 See :class:`scipy.stats.gaussian_kde` for more information.

1372 ind : NumPy array or int, optional

1373 Evaluation points for the estimated PDF. If None (default),

1374 1000 equally spaced points are used. If `ind` is a NumPy array, the

1375 KDE is evaluated at the points passed. If `ind` is an integer,

1376 `ind` number of equally spaced points are used.

1377 **kwargs

1378 Additional keyword arguments are documented in

1379 :meth:`DataFrame.plot`.

1380

1381 Returns

1382 -------

1383 matplotlib.axes.Axes or numpy.ndarray of them

1384

1385 See Also

1386 --------

1387 scipy.stats.gaussian_kde : Representation of a kernel-density

1388 estimate using Gaussian kernels. This is the function used

1389 internally to estimate the PDF.

1390

1391 Examples

1392 --------

1393 Given a Series of points randomly sampled from an unknown

1394 distribution, estimate its PDF using KDE with automatic

1395 bandwidth determination and plot the results, evaluating them at

1396 1000 equally spaced points (default):

1397

1398 .. plot::

1399 :context: close-figs

1400

1401 >>> s = pd.Series([1, 2, 2.5, 3, 3.5, 4, 5])

1402 >>> ax = s.plot.kde()

1403

1404 A scalar bandwidth can be specified. Using a small bandwidth value can

1405 lead to over-fitting, while using a large bandwidth value may result

1406 in under-fitting:

1407

1408 .. plot::

1409 :context: close-figs

1410

1411 >>> ax = s.plot.kde(bw_method=0.3)

1412

1413 .. plot::

1414 :context: close-figs

1415

1416 >>> ax = s.plot.kde(bw_method=3)

1417

1418 Finally, the `ind` parameter determines the evaluation points for the

1419 plot of the estimated PDF:

1420

1421 .. plot::

1422 :context: close-figs

1423

1424 >>> ax = s.plot.kde(ind=[1, 2, 3, 4, 5])

1425

1426 For DataFrame, it works in the same way:

1427

1428 .. plot::

1429 :context: close-figs

1430

1431 >>> df = pd.DataFrame({

1432 ... 'x': [1, 2, 2.5, 3, 3.5, 4, 5],

1433 ... 'y': [4, 4, 4.5, 5, 5.5, 6, 6],

1434 ... })

1435 >>> ax = df.plot.kde()

1436

1437 A scalar bandwidth can be specified. Using a small bandwidth value can

1438 lead to over-fitting, while using a large bandwidth value may result

1439 in under-fitting:

1440

1441 .. plot::

1442 :context: close-figs

1443

1444 >>> ax = df.plot.kde(bw_method=0.3)

1445

1446 .. plot::

1447 :context: close-figs

1448

1449 >>> ax = df.plot.kde(bw_method=3)

1450

1451 Finally, the `ind` parameter determines the evaluation points for the

1452 plot of the estimated PDF:

1453

1454 .. plot::

1455 :context: close-figs

1456

1457 >>> ax = df.plot.kde(ind=[1, 2, 3, 4, 5, 6])

1458 """

1459 return self(kind="kde", bw_method=bw_method, ind=ind, **kwargs)

1460

1461 density = kde

1462

1463 def area(self, x=None, y=None, stacked: bool = True, **kwargs) -> PlotAccessor:

1464 """

1465 Draw a stacked area plot.

1466

1467 An area plot displays quantitative data visually.

1468 This function wraps the matplotlib area function.

1469

1470 Parameters

1471 ----------

1472 x : label or position, optional

1473 Coordinates for the X axis. By default uses the index.

1474 y : label or position, optional

1475 Column to plot. By default uses all columns.

1476 stacked : bool, default True

1477 Area plots are stacked by default. Set to False to create a

1478 unstacked plot.

1479 **kwargs

1480 Additional keyword arguments are documented in

1481 :meth:`DataFrame.plot`.

1482

1483 Returns

1484 -------

1485 matplotlib.axes.Axes or numpy.ndarray

1486 Area plot, or array of area plots if subplots is True.

1487

1488 See Also

1489 --------

1490 DataFrame.plot : Make plots of DataFrame using matplotlib / pylab.

1491

1492 Examples

1493 --------

1494 Draw an area plot based on basic business metrics:

1495

1496 .. plot::

1497 :context: close-figs

1498

1499 >>> df = pd.DataFrame({

1500 ... 'sales': [3, 2, 3, 9, 10, 6],

1501 ... 'signups': [5, 5, 6, 12, 14, 13],

1502 ... 'visits': [20, 42, 28, 62, 81, 50],

1503 ... }, index=pd.date_range(start='2018/01/01', end='2018/07/01',

1504 ... freq='M'))

1505 >>> ax = df.plot.area()

1506

1507 Area plots are stacked by default. To produce an unstacked plot,

1508 pass ``stacked=False``:

1509

1510 .. plot::

1511 :context: close-figs

1512

1513 >>> ax = df.plot.area(stacked=False)

1514

1515 Draw an area plot for a single column:

1516

1517 .. plot::

1518 :context: close-figs

1519

1520 >>> ax = df.plot.area(y='sales')

1521

1522 Draw with a different `x`:

1523

1524 .. plot::

1525 :context: close-figs

1526

1527 >>> df = pd.DataFrame({

1528 ... 'sales': [3, 2, 3],

1529 ... 'visits': [20, 42, 28],

1530 ... 'day': [1, 2, 3],

1531 ... })

1532 >>> ax = df.plot.area(x='day')

1533 """

1534 return self(kind="area", x=x, y=y, stacked=stacked, **kwargs)

1535

1536 def pie(self, **kwargs) -> PlotAccessor:

1537 """

1538 Generate a pie plot.

1539

1540 A pie plot is a proportional representation of the numerical data in a

1541 column. This function wraps :meth:`matplotlib.pyplot.pie` for the

1542 specified column. If no column reference is passed and

1543 ``subplots=True`` a pie plot is drawn for each numerical column

1544 independently.

1545

1546 Parameters

1547 ----------

1548 y : int or label, optional

1549 Label or position of the column to plot.

1550 If not provided, ``subplots=True`` argument must be passed.

1551 **kwargs

1552 Keyword arguments to pass on to :meth:`DataFrame.plot`.

1553

1554 Returns

1555 -------

1556 matplotlib.axes.Axes or np.ndarray of them

1557 A NumPy array is returned when `subplots` is True.

1558

1559 See Also

1560 --------

1561 Series.plot.pie : Generate a pie plot for a Series.

1562 DataFrame.plot : Make plots of a DataFrame.

1563

1564 Examples

1565 --------

1566 In the example below we have a DataFrame with the information about

1567 planet's mass and radius. We pass the 'mass' column to the

1568 pie function to get a pie plot.

1569

1570 .. plot::

1571 :context: close-figs

1572

1573 >>> df = pd.DataFrame({'mass': [0.330, 4.87 , 5.97],

1574 ... 'radius': [2439.7, 6051.8, 6378.1]},

1575 ... index=['Mercury', 'Venus', 'Earth'])

1576 >>> plot = df.plot.pie(y='mass', figsize=(5, 5))

1577

1578 .. plot::

1579 :context: close-figs

1580

1581 >>> plot = df.plot.pie(subplots=True, figsize=(11, 6))

1582 """

1583 if (

1584 isinstance(self._parent, ABCDataFrame)

1585 and kwargs.get("y", None) is None

1586 and not kwargs.get("subplots", False)

1587 ):

1588 raise ValueError("pie requires either y column or 'subplots=True'")

1589 return self(kind="pie", **kwargs)

1590

1591 def scatter(self, x, y, s=None, c=None, **kwargs) -> PlotAccessor:

1592 """

1593 Create a scatter plot with varying marker point size and color.

1594

1595 The coordinates of each point are defined by two dataframe columns and

1596 filled circles are used to represent each point. This kind of plot is

1597 useful to see complex correlations between two variables. Points could

1598 be for instance natural 2D coordinates like longitude and latitude in

1599 a map or, in general, any pair of metrics that can be plotted against

1600 each other.

1601

1602 Parameters

1603 ----------

1604 x : int or str

1605 The column name or column position to be used as horizontal

1606 coordinates for each point.

1607 y : int or str

1608 The column name or column position to be used as vertical

1609 coordinates for each point.

1610 s : str, scalar or array-like, optional

1611 The size of each point. Possible values are:

1612

1613 - A string with the name of the column to be used for marker's size.

1614

1615 - A single scalar so all points have the same size.

1616

1617 - A sequence of scalars, which will be used for each point's size

1618 recursively. For instance, when passing [2,14] all points size

1619 will be either 2 or 14, alternatively.

1620

1621 .. versionchanged:: 1.1.0

1622

1623 c : str, int or array-like, optional

1624 The color of each point. Possible values are:

1625

1626 - A single color string referred to by name, RGB or RGBA code,

1627 for instance 'red' or '#a98d19'.

1628

1629 - A sequence of color strings referred to by name, RGB or RGBA

1630 code, which will be used for each point's color recursively. For

1631 instance ['green','yellow'] all points will be filled in green or

1632 yellow, alternatively.

1633

1634 - A column name or position whose values will be used to color the

1635 marker points according to a colormap.

1636

1637 **kwargs

1638 Keyword arguments to pass on to :meth:`DataFrame.plot`.

1639

1640 Returns

1641 -------

1642 :class:`matplotlib.axes.Axes` or numpy.ndarray of them

1643

1644 See Also

1645 --------

1646 matplotlib.pyplot.scatter : Scatter plot using multiple input data

1647 formats.

1648

1649 Examples

1650 --------

1651 Let's see how to draw a scatter plot using coordinates from the values

1652 in a DataFrame's columns.

1653

1654 .. plot::

1655 :context: close-figs

1656

1657 >>> df = pd.DataFrame([[5.1, 3.5, 0], [4.9, 3.0, 0], [7.0, 3.2, 1],

1658 ... [6.4, 3.2, 1], [5.9, 3.0, 2]],

1659 ... columns=['length', 'width', 'species'])

1660 >>> ax1 = df.plot.scatter(x='length',

1661 ... y='width',

1662 ... c='DarkBlue')

1663

1664 And now with the color determined by a column as well.

1665

1666 .. plot::

1667 :context: close-figs

1668

1669 >>> ax2 = df.plot.scatter(x='length',

1670 ... y='width',

1671 ... c='species',

1672 ... colormap='viridis')

1673 """

1674 return self(kind="scatter", x=x, y=y, s=s, c=c, **kwargs)

1675

1676 def hexbin(

1677 self, x, y, C=None, reduce_C_function=None, gridsize=None, **kwargs

1678 ) -> PlotAccessor:

1679 """

1680 Generate a hexagonal binning plot.

1681

1682 Generate a hexagonal binning plot of `x` versus `y`. If `C` is `None`

1683 (the default), this is a histogram of the number of occurrences

1684 of the observations at ``(x[i], y[i])``.

1685

1686 If `C` is specified, specifies values at given coordinates

1687 ``(x[i], y[i])``. These values are accumulated for each hexagonal

1688 bin and then reduced according to `reduce_C_function`,

1689 having as default the NumPy's mean function (:meth:`numpy.mean`).

1690 (If `C` is specified, it must also be a 1-D sequence

1691 of the same length as `x` and `y`, or a column label.)

1692

1693 Parameters

1694 ----------

1695 x : int or str

1696 The column label or position for x points.

1697 y : int or str

1698 The column label or position for y points.

1699 C : int or str, optional

1700 The column label or position for the value of `(x, y)` point.

1701 reduce_C_function : callable, default `np.mean`

1702 Function of one argument that reduces all the values in a bin to

1703 a single number (e.g. `np.mean`, `np.max`, `np.sum`, `np.std`).

1704 gridsize : int or tuple of (int, int), default 100

1705 The number of hexagons in the x-direction.

1706 The corresponding number of hexagons in the y-direction is

1707 chosen in a way that the hexagons are approximately regular.

1708 Alternatively, gridsize can be a tuple with two elements

1709 specifying the number of hexagons in the x-direction and the

1710 y-direction.

1711 **kwargs

1712 Additional keyword arguments are documented in

1713 :meth:`DataFrame.plot`.

1714

1715 Returns

1716 -------

1717 matplotlib.AxesSubplot

1718 The matplotlib ``Axes`` on which the hexbin is plotted.

1719

1720 See Also

1721 --------

1722 DataFrame.plot : Make plots of a DataFrame.

1723 matplotlib.pyplot.hexbin : Hexagonal binning plot using matplotlib,

1724 the matplotlib function that is used under the hood.

1725

1726 Examples

1727 --------

1728 The following examples are generated with random data from

1729 a normal distribution.

1730

1731 .. plot::

1732 :context: close-figs

1733

1734 >>> n = 10000

1735 >>> df = pd.DataFrame({'x': np.random.randn(n),

1736 ... 'y': np.random.randn(n)})

1737 >>> ax = df.plot.hexbin(x='x', y='y', gridsize=20)

1738

1739 The next example uses `C` and `np.sum` as `reduce_C_function`.

1740 Note that `'observations'` values ranges from 1 to 5 but the result

1741 plot shows values up to more than 25. This is because of the

1742 `reduce_C_function`.

1743

1744 .. plot::

1745 :context: close-figs

1746

1747 >>> n = 500

1748 >>> df = pd.DataFrame({

1749 ... 'coord_x': np.random.uniform(-3, 3, size=n),

1750 ... 'coord_y': np.random.uniform(30, 50, size=n),

1751 ... 'observations': np.random.randint(1,5, size=n)

1752 ... })

1753 >>> ax = df.plot.hexbin(x='coord_x',

1754 ... y='coord_y',

1755 ... C='observations',

1756 ... reduce_C_function=np.sum,

1757 ... gridsize=10,

1758 ... cmap="viridis")

1759 """

1760 if reduce_C_function is not None:

1761 kwargs["reduce_C_function"] = reduce_C_function

1762 if gridsize is not None:

1763 kwargs["gridsize"] = gridsize

1764

1765 return self(kind="hexbin", x=x, y=y, C=C, **kwargs)

1766

1767

1768_backends: dict[str, types.ModuleType] = {}

1769

1770

1771def _load_backend(backend: str) -> types.ModuleType:

1772 """

1773 Load a pandas plotting backend.

1774

1775 Parameters

1776 ----------

1777 backend : str

1778 The identifier for the backend. Either an entrypoint item registered

1779 with importlib.metadata, "matplotlib", or a module name.

1780

1781 Returns

1782 -------

1783 types.ModuleType

1784 The imported backend.

1785 """

1786 from importlib.metadata import entry_points

1787

1788 if backend == "matplotlib":

1789 # Because matplotlib is an optional dependency and first-party backend,

1790 # we need to attempt an import here to raise an ImportError if needed.

1791 try:

1792 module = importlib.import_module("pandas.plotting._matplotlib")

1793 except ImportError:

1794 raise ImportError(

1795 "matplotlib is required for plotting when the "

1796 'default backend "matplotlib" is selected.'

1797 ) from None

1798 return module

1799

1800 found_backend = False

1801

1802 eps = entry_points()

1803 key = "pandas_plotting_backends"

1804 # entry_points lost dict API ~ PY 3.10

1805 # https://github.com/python/importlib_metadata/issues/298

1806 if hasattr(eps, "select"):

1807 entry = eps.select(group=key) # pyright: ignore[reportGeneralTypeIssues]

1808 else:

1809 entry = eps.get(key, ())

1810 for entry_point in entry:

1811 found_backend = entry_point.name == backend

1812 if found_backend:

1813 module = entry_point.load()

1814 break

1815

1816 if not found_backend:

1817 # Fall back to unregistered, module name approach.

1818 try:

1819 module = importlib.import_module(backend)

1820 found_backend = True

1821 except ImportError:

1822 # We re-raise later on.

1823 pass

1824

1825 if found_backend:

1826 if hasattr(module, "plot"):

1827 # Validate that the interface is implemented when the option is set,

1828 # rather than at plot time.

1829 return module

1830

1831 raise ValueError(

1832 f"Could not find plotting backend '{backend}'. Ensure that you've "

1833 f"installed the package providing the '{backend}' entrypoint, or that "

1834 "the package has a top-level `.plot` method."

1835 )

1836

1837

1838def _get_plot_backend(backend: str | None = None):

1839 """

1840 Return the plotting backend to use (e.g. `pandas.plotting._matplotlib`).

1841

1842 The plotting system of pandas uses matplotlib by default, but the idea here

1843 is that it can also work with other third-party backends. This function

1844 returns the module which provides a top-level `.plot` method that will

1845 actually do the plotting. The backend is specified from a string, which

1846 either comes from the keyword argument `backend`, or, if not specified, from

1847 the option `pandas.options.plotting.backend`. All the rest of the code in

1848 this file uses the backend specified there for the plotting.

1849

1850 The backend is imported lazily, as matplotlib is a soft dependency, and

1851 pandas can be used without it being installed.

1852

1853 Notes

1854 -----

1855 Modifies `_backends` with imported backend as a side effect.

1856 """

1857 backend_str: str = backend or get_option("plotting.backend")

1858

1859 if backend_str in _backends:

1860 return _backends[backend_str]

1861

1862 module = _load_backend(backend_str)

1863 _backends[backend_str] = module

1864 return module

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/plotting/_core.py: 30%

191 statements