1from __future__ import annotations
2
3import importlib
4import types
5from typing import (
6 TYPE_CHECKING,
7 Sequence,
8)
9
10from pandas._config import get_option
11
12from pandas._typing import IndexLabel
13from pandas.util._decorators import (
14 Appender,
15 Substitution,
16)
17
18from pandas.core.dtypes.common import (
19 is_integer,
20 is_list_like,
21)
22from pandas.core.dtypes.generic import (
23 ABCDataFrame,
24 ABCSeries,
25)
26
27from pandas.core.base import PandasObject
28
29if TYPE_CHECKING:
30 from matplotlib.axes import Axes
31
32 from pandas import DataFrame
33
34
35def hist_series(
36 self,
37 by=None,
38 ax=None,
39 grid: bool = True,
40 xlabelsize: int | None = None,
41 xrot: float | None = None,
42 ylabelsize: int | None = None,
43 yrot: float | None = None,
44 figsize: tuple[int, int] | None = None,
45 bins: int | Sequence[int] = 10,
46 backend: str | None = None,
47 legend: bool = False,
48 **kwargs,
49):
50 """
51 Draw histogram of the input series using matplotlib.
52
53 Parameters
54 ----------
55 by : object, optional
56 If passed, then used to form histograms for separate groups.
57 ax : matplotlib axis object
58 If not passed, uses gca().
59 grid : bool, default True
60 Whether to show axis grid lines.
61 xlabelsize : int, default None
62 If specified changes the x-axis label size.
63 xrot : float, default None
64 Rotation of x axis labels.
65 ylabelsize : int, default None
66 If specified changes the y-axis label size.
67 yrot : float, default None
68 Rotation of y axis labels.
69 figsize : tuple, default None
70 Figure size in inches by default.
71 bins : int or sequence, default 10
72 Number of histogram bins to be used. If an integer is given, bins + 1
73 bin edges are calculated and returned. If bins is a sequence, gives
74 bin edges, including left edge of first bin and right edge of last
75 bin. In this case, bins is returned unmodified.
76 backend : str, default None
77 Backend to use instead of the backend specified in the option
78 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to
79 specify the ``plotting.backend`` for the whole session, set
80 ``pd.options.plotting.backend``.
81 legend : bool, default False
82 Whether to show the legend.
83
84 .. versionadded:: 1.1.0
85
86 **kwargs
87 To be passed to the actual plotting function.
88
89 Returns
90 -------
91 matplotlib.AxesSubplot
92 A histogram plot.
93
94 See Also
95 --------
96 matplotlib.axes.Axes.hist : Plot a histogram using matplotlib.
97 """
98 plot_backend = _get_plot_backend(backend)
99 return plot_backend.hist_series(
100 self,
101 by=by,
102 ax=ax,
103 grid=grid,
104 xlabelsize=xlabelsize,
105 xrot=xrot,
106 ylabelsize=ylabelsize,
107 yrot=yrot,
108 figsize=figsize,
109 bins=bins,
110 legend=legend,
111 **kwargs,
112 )
113
114
115def hist_frame(
116 data: DataFrame,
117 column: IndexLabel = None,
118 by=None,
119 grid: bool = True,
120 xlabelsize: int | None = None,
121 xrot: float | None = None,
122 ylabelsize: int | None = None,
123 yrot: float | None = None,
124 ax=None,
125 sharex: bool = False,
126 sharey: bool = False,
127 figsize: tuple[int, int] | None = None,
128 layout: tuple[int, int] | None = None,
129 bins: int | Sequence[int] = 10,
130 backend: str | None = None,
131 legend: bool = False,
132 **kwargs,
133):
134 """
135 Make a histogram of the DataFrame's columns.
136
137 A `histogram`_ is a representation of the distribution of data.
138 This function calls :meth:`matplotlib.pyplot.hist`, on each series in
139 the DataFrame, resulting in one histogram per column.
140
141 .. _histogram: https://en.wikipedia.org/wiki/Histogram
142
143 Parameters
144 ----------
145 data : DataFrame
146 The pandas object holding the data.
147 column : str or sequence, optional
148 If passed, will be used to limit data to a subset of columns.
149 by : object, optional
150 If passed, then used to form histograms for separate groups.
151 grid : bool, default True
152 Whether to show axis grid lines.
153 xlabelsize : int, default None
154 If specified changes the x-axis label size.
155 xrot : float, default None
156 Rotation of x axis labels. For example, a value of 90 displays the
157 x labels rotated 90 degrees clockwise.
158 ylabelsize : int, default None
159 If specified changes the y-axis label size.
160 yrot : float, default None
161 Rotation of y axis labels. For example, a value of 90 displays the
162 y labels rotated 90 degrees clockwise.
163 ax : Matplotlib axes object, default None
164 The axes to plot the histogram on.
165 sharex : bool, default True if ax is None else False
166 In case subplots=True, share x axis and set some x axis labels to
167 invisible; defaults to True if ax is None otherwise False if an ax
168 is passed in.
169 Note that passing in both an ax and sharex=True will alter all x axis
170 labels for all subplots in a figure.
171 sharey : bool, default False
172 In case subplots=True, share y axis and set some y axis labels to
173 invisible.
174 figsize : tuple, optional
175 The size in inches of the figure to create. Uses the value in
176 `matplotlib.rcParams` by default.
177 layout : tuple, optional
178 Tuple of (rows, columns) for the layout of the histograms.
179 bins : int or sequence, default 10
180 Number of histogram bins to be used. If an integer is given, bins + 1
181 bin edges are calculated and returned. If bins is a sequence, gives
182 bin edges, including left edge of first bin and right edge of last
183 bin. In this case, bins is returned unmodified.
184
185 backend : str, default None
186 Backend to use instead of the backend specified in the option
187 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to
188 specify the ``plotting.backend`` for the whole session, set
189 ``pd.options.plotting.backend``.
190
191 legend : bool, default False
192 Whether to show the legend.
193
194 .. versionadded:: 1.1.0
195
196 **kwargs
197 All other plotting keyword arguments to be passed to
198 :meth:`matplotlib.pyplot.hist`.
199
200 Returns
201 -------
202 matplotlib.AxesSubplot or numpy.ndarray of them
203
204 See Also
205 --------
206 matplotlib.pyplot.hist : Plot a histogram using matplotlib.
207
208 Examples
209 --------
210 This example draws a histogram based on the length and width of
211 some animals, displayed in three bins
212
213 .. plot::
214 :context: close-figs
215
216 >>> df = pd.DataFrame({
217 ... 'length': [1.5, 0.5, 1.2, 0.9, 3],
218 ... 'width': [0.7, 0.2, 0.15, 0.2, 1.1]
219 ... }, index=['pig', 'rabbit', 'duck', 'chicken', 'horse'])
220 >>> hist = df.hist(bins=3)
221 """
222 plot_backend = _get_plot_backend(backend)
223 return plot_backend.hist_frame(
224 data,
225 column=column,
226 by=by,
227 grid=grid,
228 xlabelsize=xlabelsize,
229 xrot=xrot,
230 ylabelsize=ylabelsize,
231 yrot=yrot,
232 ax=ax,
233 sharex=sharex,
234 sharey=sharey,
235 figsize=figsize,
236 layout=layout,
237 legend=legend,
238 bins=bins,
239 **kwargs,
240 )
241
242
243_boxplot_doc = """
244Make a box plot from DataFrame columns.
245
246Make a box-and-whisker plot from DataFrame columns, optionally grouped
247by some other columns. A box plot is a method for graphically depicting
248groups of numerical data through their quartiles.
249The box extends from the Q1 to Q3 quartile values of the data,
250with a line at the median (Q2). The whiskers extend from the edges
251of box to show the range of the data. By default, they extend no more than
252`1.5 * IQR (IQR = Q3 - Q1)` from the edges of the box, ending at the farthest
253data point within that interval. Outliers are plotted as separate dots.
254
255For further details see
256Wikipedia's entry for `boxplot <https://en.wikipedia.org/wiki/Box_plot>`_.
257
258Parameters
259----------
260%(data)s\
261column : str or list of str, optional
262 Column name or list of names, or vector.
263 Can be any valid input to :meth:`pandas.DataFrame.groupby`.
264by : str or array-like, optional
265 Column in the DataFrame to :meth:`pandas.DataFrame.groupby`.
266 One box-plot will be done per value of columns in `by`.
267ax : object of class matplotlib.axes.Axes, optional
268 The matplotlib axes to be used by boxplot.
269fontsize : float or str
270 Tick label font size in points or as a string (e.g., `large`).
271rot : float, default 0
272 The rotation angle of labels (in degrees)
273 with respect to the screen coordinate system.
274grid : bool, default True
275 Setting this to True will show the grid.
276figsize : A tuple (width, height) in inches
277 The size of the figure to create in matplotlib.
278layout : tuple (rows, columns), optional
279 For example, (3, 5) will display the subplots
280 using 3 rows and 5 columns, starting from the top-left.
281return_type : {'axes', 'dict', 'both'} or None, default 'axes'
282 The kind of object to return. The default is ``axes``.
283
284 * 'axes' returns the matplotlib axes the boxplot is drawn on.
285 * 'dict' returns a dictionary whose values are the matplotlib
286 Lines of the boxplot.
287 * 'both' returns a namedtuple with the axes and dict.
288 * when grouping with ``by``, a Series mapping columns to
289 ``return_type`` is returned.
290
291 If ``return_type`` is `None`, a NumPy array
292 of axes with the same shape as ``layout`` is returned.
293%(backend)s\
294
295**kwargs
296 All other plotting keyword arguments to be passed to
297 :func:`matplotlib.pyplot.boxplot`.
298
299Returns
300-------
301result
302 See Notes.
303
304See Also
305--------
306pandas.Series.plot.hist: Make a histogram.
307matplotlib.pyplot.boxplot : Matplotlib equivalent plot.
308
309Notes
310-----
311The return type depends on the `return_type` parameter:
312
313* 'axes' : object of class matplotlib.axes.Axes
314* 'dict' : dict of matplotlib.lines.Line2D objects
315* 'both' : a namedtuple with structure (ax, lines)
316
317For data grouped with ``by``, return a Series of the above or a numpy
318array:
319
320* :class:`~pandas.Series`
321* :class:`~numpy.array` (for ``return_type = None``)
322
323Use ``return_type='dict'`` when you want to tweak the appearance
324of the lines after plotting. In this case a dict containing the Lines
325making up the boxes, caps, fliers, medians, and whiskers is returned.
326
327Examples
328--------
329
330Boxplots can be created for every column in the dataframe
331by ``df.boxplot()`` or indicating the columns to be used:
332
333.. plot::
334 :context: close-figs
335
336 >>> np.random.seed(1234)
337 >>> df = pd.DataFrame(np.random.randn(10, 4),
338 ... columns=['Col1', 'Col2', 'Col3', 'Col4'])
339 >>> boxplot = df.boxplot(column=['Col1', 'Col2', 'Col3']) # doctest: +SKIP
340
341Boxplots of variables distributions grouped by the values of a third
342variable can be created using the option ``by``. For instance:
343
344.. plot::
345 :context: close-figs
346
347 >>> df = pd.DataFrame(np.random.randn(10, 2),
348 ... columns=['Col1', 'Col2'])
349 >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A',
350 ... 'B', 'B', 'B', 'B', 'B'])
351 >>> boxplot = df.boxplot(by='X')
352
353A list of strings (i.e. ``['X', 'Y']``) can be passed to boxplot
354in order to group the data by combination of the variables in the x-axis:
355
356.. plot::
357 :context: close-figs
358
359 >>> df = pd.DataFrame(np.random.randn(10, 3),
360 ... columns=['Col1', 'Col2', 'Col3'])
361 >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A',
362 ... 'B', 'B', 'B', 'B', 'B'])
363 >>> df['Y'] = pd.Series(['A', 'B', 'A', 'B', 'A',
364 ... 'B', 'A', 'B', 'A', 'B'])
365 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by=['X', 'Y'])
366
367The layout of boxplot can be adjusted giving a tuple to ``layout``:
368
369.. plot::
370 :context: close-figs
371
372 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X',
373 ... layout=(2, 1))
374
375Additional formatting can be done to the boxplot, like suppressing the grid
376(``grid=False``), rotating the labels in the x-axis (i.e. ``rot=45``)
377or changing the fontsize (i.e. ``fontsize=15``):
378
379.. plot::
380 :context: close-figs
381
382 >>> boxplot = df.boxplot(grid=False, rot=45, fontsize=15) # doctest: +SKIP
383
384The parameter ``return_type`` can be used to select the type of element
385returned by `boxplot`. When ``return_type='axes'`` is selected,
386the matplotlib axes on which the boxplot is drawn are returned:
387
388 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], return_type='axes')
389 >>> type(boxplot)
390 <class 'matplotlib.axes._subplots.AxesSubplot'>
391
392When grouping with ``by``, a Series mapping columns to ``return_type``
393is returned:
394
395 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X',
396 ... return_type='axes')
397 >>> type(boxplot)
398 <class 'pandas.core.series.Series'>
399
400If ``return_type`` is `None`, a NumPy array of axes with the same shape
401as ``layout`` is returned:
402
403 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X',
404 ... return_type=None)
405 >>> type(boxplot)
406 <class 'numpy.ndarray'>
407"""
408
409_backend_doc = """\
410backend : str, default None
411 Backend to use instead of the backend specified in the option
412 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to
413 specify the ``plotting.backend`` for the whole session, set
414 ``pd.options.plotting.backend``.
415"""
416
417
418_bar_or_line_doc = """
419 Parameters
420 ----------
421 x : label or position, optional
422 Allows plotting of one column versus another. If not specified,
423 the index of the DataFrame is used.
424 y : label or position, optional
425 Allows plotting of one column versus another. If not specified,
426 all numerical columns are used.
427 color : str, array-like, or dict, optional
428 The color for each of the DataFrame's columns. Possible values are:
429
430 - A single color string referred to by name, RGB or RGBA code,
431 for instance 'red' or '#a98d19'.
432
433 - A sequence of color strings referred to by name, RGB or RGBA
434 code, which will be used for each column recursively. For
435 instance ['green','yellow'] each column's %(kind)s will be filled in
436 green or yellow, alternatively. If there is only a single column to
437 be plotted, then only the first color from the color list will be
438 used.
439
440 - A dict of the form {column name : color}, so that each column will be
441 colored accordingly. For example, if your columns are called `a` and
442 `b`, then passing {'a': 'green', 'b': 'red'} will color %(kind)ss for
443 column `a` in green and %(kind)ss for column `b` in red.
444
445 .. versionadded:: 1.1.0
446
447 **kwargs
448 Additional keyword arguments are documented in
449 :meth:`DataFrame.plot`.
450
451 Returns
452 -------
453 matplotlib.axes.Axes or np.ndarray of them
454 An ndarray is returned with one :class:`matplotlib.axes.Axes`
455 per column when ``subplots=True``.
456"""
457
458
459@Substitution(data="data : DataFrame\n The data to visualize.\n", backend="")
460@Appender(_boxplot_doc)
461def boxplot(
462 data: DataFrame,
463 column: str | list[str] | None = None,
464 by: str | list[str] | None = None,
465 ax: Axes | None = None,
466 fontsize: float | str | None = None,
467 rot: int = 0,
468 grid: bool = True,
469 figsize: tuple[float, float] | None = None,
470 layout: tuple[int, int] | None = None,
471 return_type: str | None = None,
472 **kwargs,
473):
474 plot_backend = _get_plot_backend("matplotlib")
475 return plot_backend.boxplot(
476 data,
477 column=column,
478 by=by,
479 ax=ax,
480 fontsize=fontsize,
481 rot=rot,
482 grid=grid,
483 figsize=figsize,
484 layout=layout,
485 return_type=return_type,
486 **kwargs,
487 )
488
489
490@Substitution(data="", backend=_backend_doc)
491@Appender(_boxplot_doc)
492def boxplot_frame(
493 self,
494 column=None,
495 by=None,
496 ax=None,
497 fontsize=None,
498 rot: int = 0,
499 grid: bool = True,
500 figsize=None,
501 layout=None,
502 return_type=None,
503 backend=None,
504 **kwargs,
505):
506 plot_backend = _get_plot_backend(backend)
507 return plot_backend.boxplot_frame(
508 self,
509 column=column,
510 by=by,
511 ax=ax,
512 fontsize=fontsize,
513 rot=rot,
514 grid=grid,
515 figsize=figsize,
516 layout=layout,
517 return_type=return_type,
518 **kwargs,
519 )
520
521
522def boxplot_frame_groupby(
523 grouped,
524 subplots: bool = True,
525 column=None,
526 fontsize=None,
527 rot: int = 0,
528 grid: bool = True,
529 ax=None,
530 figsize=None,
531 layout=None,
532 sharex: bool = False,
533 sharey: bool = True,
534 backend=None,
535 **kwargs,
536):
537 """
538 Make box plots from DataFrameGroupBy data.
539
540 Parameters
541 ----------
542 grouped : Grouped DataFrame
543 subplots : bool
544 * ``False`` - no subplots will be used
545 * ``True`` - create a subplot for each group.
546
547 column : column name or list of names, or vector
548 Can be any valid input to groupby.
549 fontsize : float or str
550 rot : label rotation angle
551 grid : Setting this to True will show the grid
552 ax : Matplotlib axis object, default None
553 figsize : A tuple (width, height) in inches
554 layout : tuple (optional)
555 The layout of the plot: (rows, columns).
556 sharex : bool, default False
557 Whether x-axes will be shared among subplots.
558 sharey : bool, default True
559 Whether y-axes will be shared among subplots.
560 backend : str, default None
561 Backend to use instead of the backend specified in the option
562 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to
563 specify the ``plotting.backend`` for the whole session, set
564 ``pd.options.plotting.backend``.
565 **kwargs
566 All other plotting keyword arguments to be passed to
567 matplotlib's boxplot function.
568
569 Returns
570 -------
571 dict of key/value = group key/DataFrame.boxplot return value
572 or DataFrame.boxplot return value in case subplots=figures=False
573
574 Examples
575 --------
576 You can create boxplots for grouped data and show them as separate subplots:
577
578 .. plot::
579 :context: close-figs
580
581 >>> import itertools
582 >>> tuples = [t for t in itertools.product(range(1000), range(4))]
583 >>> index = pd.MultiIndex.from_tuples(tuples, names=['lvl0', 'lvl1'])
584 >>> data = np.random.randn(len(index),4)
585 >>> df = pd.DataFrame(data, columns=list('ABCD'), index=index)
586 >>> grouped = df.groupby(level='lvl1')
587 >>> grouped.boxplot(rot=45, fontsize=12, figsize=(8,10)) # doctest: +SKIP
588
589 The ``subplots=False`` option shows the boxplots in a single figure.
590
591 .. plot::
592 :context: close-figs
593
594 >>> grouped.boxplot(subplots=False, rot=45, fontsize=12) # doctest: +SKIP
595 """
596 plot_backend = _get_plot_backend(backend)
597 return plot_backend.boxplot_frame_groupby(
598 grouped,
599 subplots=subplots,
600 column=column,
601 fontsize=fontsize,
602 rot=rot,
603 grid=grid,
604 ax=ax,
605 figsize=figsize,
606 layout=layout,
607 sharex=sharex,
608 sharey=sharey,
609 **kwargs,
610 )
611
612
613class PlotAccessor(PandasObject):
614 """
615 Make plots of Series or DataFrame.
616
617 Uses the backend specified by the
618 option ``plotting.backend``. By default, matplotlib is used.
619
620 Parameters
621 ----------
622 data : Series or DataFrame
623 The object for which the method is called.
624 x : label or position, default None
625 Only used if data is a DataFrame.
626 y : label, position or list of label, positions, default None
627 Allows plotting of one column versus another. Only used if data is a
628 DataFrame.
629 kind : str
630 The kind of plot to produce:
631
632 - 'line' : line plot (default)
633 - 'bar' : vertical bar plot
634 - 'barh' : horizontal bar plot
635 - 'hist' : histogram
636 - 'box' : boxplot
637 - 'kde' : Kernel Density Estimation plot
638 - 'density' : same as 'kde'
639 - 'area' : area plot
640 - 'pie' : pie plot
641 - 'scatter' : scatter plot (DataFrame only)
642 - 'hexbin' : hexbin plot (DataFrame only)
643 ax : matplotlib axes object, default None
644 An axes of the current figure.
645 subplots : bool or sequence of iterables, default False
646 Whether to group columns into subplots:
647
648 - ``False`` : No subplots will be used
649 - ``True`` : Make separate subplots for each column.
650 - sequence of iterables of column labels: Create a subplot for each
651 group of columns. For example `[('a', 'c'), ('b', 'd')]` will
652 create 2 subplots: one with columns 'a' and 'c', and one
653 with columns 'b' and 'd'. Remaining columns that aren't specified
654 will be plotted in additional subplots (one per column).
655
656 .. versionadded:: 1.5.0
657
658 sharex : bool, default True if ax is None else False
659 In case ``subplots=True``, share x axis and set some x axis labels
660 to invisible; defaults to True if ax is None otherwise False if
661 an ax is passed in; Be aware, that passing in both an ax and
662 ``sharex=True`` will alter all x axis labels for all axis in a figure.
663 sharey : bool, default False
664 In case ``subplots=True``, share y axis and set some y axis labels to invisible.
665 layout : tuple, optional
666 (rows, columns) for the layout of subplots.
667 figsize : a tuple (width, height) in inches
668 Size of a figure object.
669 use_index : bool, default True
670 Use index as ticks for x axis.
671 title : str or list
672 Title to use for the plot. If a string is passed, print the string
673 at the top of the figure. If a list is passed and `subplots` is
674 True, print each item in the list above the corresponding subplot.
675 grid : bool, default None (matlab style default)
676 Axis grid lines.
677 legend : bool or {'reverse'}
678 Place legend on axis subplots.
679 style : list or dict
680 The matplotlib line style per column.
681 logx : bool or 'sym', default False
682 Use log scaling or symlog scaling on x axis.
683
684 logy : bool or 'sym' default False
685 Use log scaling or symlog scaling on y axis.
686
687 loglog : bool or 'sym', default False
688 Use log scaling or symlog scaling on both x and y axes.
689
690 xticks : sequence
691 Values to use for the xticks.
692 yticks : sequence
693 Values to use for the yticks.
694 xlim : 2-tuple/list
695 Set the x limits of the current axes.
696 ylim : 2-tuple/list
697 Set the y limits of the current axes.
698 xlabel : label, optional
699 Name to use for the xlabel on x-axis. Default uses index name as xlabel, or the
700 x-column name for planar plots.
701
702 .. versionadded:: 1.1.0
703
704 .. versionchanged:: 1.2.0
705
706 Now applicable to planar plots (`scatter`, `hexbin`).
707
708 .. versionchanged:: 2.0.0
709
710 Now applicable to histograms.
711
712 ylabel : label, optional
713 Name to use for the ylabel on y-axis. Default will show no ylabel, or the
714 y-column name for planar plots.
715
716 .. versionadded:: 1.1.0
717
718 .. versionchanged:: 1.2.0
719
720 Now applicable to planar plots (`scatter`, `hexbin`).
721
722 .. versionchanged:: 2.0.0
723
724 Now applicable to histograms.
725
726 rot : float, default None
727 Rotation for ticks (xticks for vertical, yticks for horizontal
728 plots).
729 fontsize : float, default None
730 Font size for xticks and yticks.
731 colormap : str or matplotlib colormap object, default None
732 Colormap to select colors from. If string, load colormap with that
733 name from matplotlib.
734 colorbar : bool, optional
735 If True, plot colorbar (only relevant for 'scatter' and 'hexbin'
736 plots).
737 position : float
738 Specify relative alignments for bar plot layout.
739 From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5
740 (center).
741 table : bool, Series or DataFrame, default False
742 If True, draw a table using the data in the DataFrame and the data
743 will be transposed to meet matplotlib's default layout.
744 If a Series or DataFrame is passed, use passed data to draw a
745 table.
746 yerr : DataFrame, Series, array-like, dict and str
747 See :ref:`Plotting with Error Bars <visualization.errorbars>` for
748 detail.
749 xerr : DataFrame, Series, array-like, dict and str
750 Equivalent to yerr.
751 stacked : bool, default False in line and bar plots, and True in area plot
752 If True, create stacked plot.
753 secondary_y : bool or sequence, default False
754 Whether to plot on the secondary y-axis if a list/tuple, which
755 columns to plot on secondary y-axis.
756 mark_right : bool, default True
757 When using a secondary_y axis, automatically mark the column
758 labels with "(right)" in the legend.
759 include_bool : bool, default is False
760 If True, boolean values can be plotted.
761 backend : str, default None
762 Backend to use instead of the backend specified in the option
763 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to
764 specify the ``plotting.backend`` for the whole session, set
765 ``pd.options.plotting.backend``.
766 **kwargs
767 Options to pass to matplotlib plotting method.
768
769 Returns
770 -------
771 :class:`matplotlib.axes.Axes` or numpy.ndarray of them
772 If the backend is not the default matplotlib one, the return value
773 will be the object returned by the backend.
774
775 Notes
776 -----
777 - See matplotlib documentation online for more on this subject
778 - If `kind` = 'bar' or 'barh', you can specify relative alignments
779 for bar plot layout by `position` keyword.
780 From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5
781 (center)
782 """
783
784 _common_kinds = ("line", "bar", "barh", "kde", "density", "area", "hist", "box")
785 _series_kinds = ("pie",)
786 _dataframe_kinds = ("scatter", "hexbin")
787 _kind_aliases = {"density": "kde"}
788 _all_kinds = _common_kinds + _series_kinds + _dataframe_kinds
789
790 def __init__(self, data) -> None:
791 self._parent = data
792
793 @staticmethod
794 def _get_call_args(backend_name, data, args, kwargs):
795 """
796 This function makes calls to this accessor `__call__` method compatible
797 with the previous `SeriesPlotMethods.__call__` and
798 `DataFramePlotMethods.__call__`. Those had slightly different
799 signatures, since `DataFramePlotMethods` accepted `x` and `y`
800 parameters.
801 """
802 if isinstance(data, ABCSeries):
803 arg_def = [
804 ("kind", "line"),
805 ("ax", None),
806 ("figsize", None),
807 ("use_index", True),
808 ("title", None),
809 ("grid", None),
810 ("legend", False),
811 ("style", None),
812 ("logx", False),
813 ("logy", False),
814 ("loglog", False),
815 ("xticks", None),
816 ("yticks", None),
817 ("xlim", None),
818 ("ylim", None),
819 ("rot", None),
820 ("fontsize", None),
821 ("colormap", None),
822 ("table", False),
823 ("yerr", None),
824 ("xerr", None),
825 ("label", None),
826 ("secondary_y", False),
827 ("xlabel", None),
828 ("ylabel", None),
829 ]
830 elif isinstance(data, ABCDataFrame):
831 arg_def = [
832 ("x", None),
833 ("y", None),
834 ("kind", "line"),
835 ("ax", None),
836 ("subplots", False),
837 ("sharex", None),
838 ("sharey", False),
839 ("layout", None),
840 ("figsize", None),
841 ("use_index", True),
842 ("title", None),
843 ("grid", None),
844 ("legend", True),
845 ("style", None),
846 ("logx", False),
847 ("logy", False),
848 ("loglog", False),
849 ("xticks", None),
850 ("yticks", None),
851 ("xlim", None),
852 ("ylim", None),
853 ("rot", None),
854 ("fontsize", None),
855 ("colormap", None),
856 ("table", False),
857 ("yerr", None),
858 ("xerr", None),
859 ("secondary_y", False),
860 ("xlabel", None),
861 ("ylabel", None),
862 ]
863 else:
864 raise TypeError(
865 f"Called plot accessor for type {type(data).__name__}, "
866 "expected Series or DataFrame"
867 )
868
869 if args and isinstance(data, ABCSeries):
870 positional_args = str(args)[1:-1]
871 keyword_args = ", ".join(
872 [f"{name}={repr(value)}" for (name, _), value in zip(arg_def, args)]
873 )
874 msg = (
875 "`Series.plot()` should not be called with positional "
876 "arguments, only keyword arguments. The order of "
877 "positional arguments will change in the future. "
878 f"Use `Series.plot({keyword_args})` instead of "
879 f"`Series.plot({positional_args})`."
880 )
881 raise TypeError(msg)
882
883 pos_args = {name: value for (name, _), value in zip(arg_def, args)}
884 if backend_name == "pandas.plotting._matplotlib":
885 kwargs = dict(arg_def, **pos_args, **kwargs)
886 else:
887 kwargs = dict(pos_args, **kwargs)
888
889 x = kwargs.pop("x", None)
890 y = kwargs.pop("y", None)
891 kind = kwargs.pop("kind", "line")
892 return x, y, kind, kwargs
893
894 def __call__(self, *args, **kwargs):
895 plot_backend = _get_plot_backend(kwargs.pop("backend", None))
896
897 x, y, kind, kwargs = self._get_call_args(
898 plot_backend.__name__, self._parent, args, kwargs
899 )
900
901 kind = self._kind_aliases.get(kind, kind)
902
903 # when using another backend, get out of the way
904 if plot_backend.__name__ != "pandas.plotting._matplotlib":
905 return plot_backend.plot(self._parent, x=x, y=y, kind=kind, **kwargs)
906
907 if kind not in self._all_kinds:
908 raise ValueError(f"{kind} is not a valid plot kind")
909
910 # The original data structured can be transformed before passed to the
911 # backend. For example, for DataFrame is common to set the index as the
912 # `x` parameter, and return a Series with the parameter `y` as values.
913 data = self._parent.copy()
914
915 if isinstance(data, ABCSeries):
916 kwargs["reuse_plot"] = True
917
918 if kind in self._dataframe_kinds:
919 if isinstance(data, ABCDataFrame):
920 return plot_backend.plot(data, x=x, y=y, kind=kind, **kwargs)
921 else:
922 raise ValueError(f"plot kind {kind} can only be used for data frames")
923 elif kind in self._series_kinds:
924 if isinstance(data, ABCDataFrame):
925 if y is None and kwargs.get("subplots") is False:
926 raise ValueError(
927 f"{kind} requires either y column or 'subplots=True'"
928 )
929 if y is not None:
930 if is_integer(y) and not data.columns._holds_integer():
931 y = data.columns[y]
932 # converted to series actually. copy to not modify
933 data = data[y].copy()
934 data.index.name = y
935 elif isinstance(data, ABCDataFrame):
936 data_cols = data.columns
937 if x is not None:
938 if is_integer(x) and not data.columns._holds_integer():
939 x = data_cols[x]
940 elif not isinstance(data[x], ABCSeries):
941 raise ValueError("x must be a label or position")
942 data = data.set_index(x)
943 if y is not None:
944 # check if we have y as int or list of ints
945 int_ylist = is_list_like(y) and all(is_integer(c) for c in y)
946 int_y_arg = is_integer(y) or int_ylist
947 if int_y_arg and not data.columns._holds_integer():
948 y = data_cols[y]
949
950 label_kw = kwargs["label"] if "label" in kwargs else False
951 for kw in ["xerr", "yerr"]:
952 if kw in kwargs and (
953 isinstance(kwargs[kw], str) or is_integer(kwargs[kw])
954 ):
955 try:
956 kwargs[kw] = data[kwargs[kw]]
957 except (IndexError, KeyError, TypeError):
958 pass
959
960 # don't overwrite
961 data = data[y].copy()
962
963 if isinstance(data, ABCSeries):
964 label_name = label_kw or y
965 data.name = label_name
966 else:
967 match = is_list_like(label_kw) and len(label_kw) == len(y)
968 if label_kw and not match:
969 raise ValueError(
970 "label should be list-like and same length as y"
971 )
972 label_name = label_kw or data.columns
973 data.columns = label_name
974
975 return plot_backend.plot(data, kind=kind, **kwargs)
976
977 __call__.__doc__ = __doc__
978
979 @Appender(
980 """
981 See Also
982 --------
983 matplotlib.pyplot.plot : Plot y versus x as lines and/or markers.
984
985 Examples
986 --------
987
988 .. plot::
989 :context: close-figs
990
991 >>> s = pd.Series([1, 3, 2])
992 >>> s.plot.line()
993 <AxesSubplot: ylabel='Density'>
994
995 .. plot::
996 :context: close-figs
997
998 The following example shows the populations for some animals
999 over the years.
1000
1001 >>> df = pd.DataFrame({
1002 ... 'pig': [20, 18, 489, 675, 1776],
1003 ... 'horse': [4, 25, 281, 600, 1900]
1004 ... }, index=[1990, 1997, 2003, 2009, 2014])
1005 >>> lines = df.plot.line()
1006
1007 .. plot::
1008 :context: close-figs
1009
1010 An example with subplots, so an array of axes is returned.
1011
1012 >>> axes = df.plot.line(subplots=True)
1013 >>> type(axes)
1014 <class 'numpy.ndarray'>
1015
1016 .. plot::
1017 :context: close-figs
1018
1019 Let's repeat the same example, but specifying colors for
1020 each column (in this case, for each animal).
1021
1022 >>> axes = df.plot.line(
1023 ... subplots=True, color={"pig": "pink", "horse": "#742802"}
1024 ... )
1025
1026 .. plot::
1027 :context: close-figs
1028
1029 The following example shows the relationship between both
1030 populations.
1031
1032 >>> lines = df.plot.line(x='pig', y='horse')
1033 """
1034 )
1035 @Substitution(kind="line")
1036 @Appender(_bar_or_line_doc)
1037 def line(self, x=None, y=None, **kwargs) -> PlotAccessor:
1038 """
1039 Plot Series or DataFrame as lines.
1040
1041 This function is useful to plot lines using DataFrame's values
1042 as coordinates.
1043 """
1044 return self(kind="line", x=x, y=y, **kwargs)
1045
1046 @Appender(
1047 """
1048 See Also
1049 --------
1050 DataFrame.plot.barh : Horizontal bar plot.
1051 DataFrame.plot : Make plots of a DataFrame.
1052 matplotlib.pyplot.bar : Make a bar plot with matplotlib.
1053
1054 Examples
1055 --------
1056 Basic plot.
1057
1058 .. plot::
1059 :context: close-figs
1060
1061 >>> df = pd.DataFrame({'lab':['A', 'B', 'C'], 'val':[10, 30, 20]})
1062 >>> ax = df.plot.bar(x='lab', y='val', rot=0)
1063
1064 Plot a whole dataframe to a bar plot. Each column is assigned a
1065 distinct color, and each row is nested in a group along the
1066 horizontal axis.
1067
1068 .. plot::
1069 :context: close-figs
1070
1071 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]
1072 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]
1073 >>> index = ['snail', 'pig', 'elephant',
1074 ... 'rabbit', 'giraffe', 'coyote', 'horse']
1075 >>> df = pd.DataFrame({'speed': speed,
1076 ... 'lifespan': lifespan}, index=index)
1077 >>> ax = df.plot.bar(rot=0)
1078
1079 Plot stacked bar charts for the DataFrame
1080
1081 .. plot::
1082 :context: close-figs
1083
1084 >>> ax = df.plot.bar(stacked=True)
1085
1086 Instead of nesting, the figure can be split by column with
1087 ``subplots=True``. In this case, a :class:`numpy.ndarray` of
1088 :class:`matplotlib.axes.Axes` are returned.
1089
1090 .. plot::
1091 :context: close-figs
1092
1093 >>> axes = df.plot.bar(rot=0, subplots=True)
1094 >>> axes[1].legend(loc=2) # doctest: +SKIP
1095
1096 If you don't like the default colours, you can specify how you'd
1097 like each column to be colored.
1098
1099 .. plot::
1100 :context: close-figs
1101
1102 >>> axes = df.plot.bar(
1103 ... rot=0, subplots=True, color={"speed": "red", "lifespan": "green"}
1104 ... )
1105 >>> axes[1].legend(loc=2) # doctest: +SKIP
1106
1107 Plot a single column.
1108
1109 .. plot::
1110 :context: close-figs
1111
1112 >>> ax = df.plot.bar(y='speed', rot=0)
1113
1114 Plot only selected categories for the DataFrame.
1115
1116 .. plot::
1117 :context: close-figs
1118
1119 >>> ax = df.plot.bar(x='lifespan', rot=0)
1120 """
1121 )
1122 @Substitution(kind="bar")
1123 @Appender(_bar_or_line_doc)
1124 def bar( # pylint: disable=disallowed-name
1125 self, x=None, y=None, **kwargs
1126 ) -> PlotAccessor:
1127 """
1128 Vertical bar plot.
1129
1130 A bar plot is a plot that presents categorical data with
1131 rectangular bars with lengths proportional to the values that they
1132 represent. A bar plot shows comparisons among discrete categories. One
1133 axis of the plot shows the specific categories being compared, and the
1134 other axis represents a measured value.
1135 """
1136 return self(kind="bar", x=x, y=y, **kwargs)
1137
1138 @Appender(
1139 """
1140 See Also
1141 --------
1142 DataFrame.plot.bar: Vertical bar plot.
1143 DataFrame.plot : Make plots of DataFrame using matplotlib.
1144 matplotlib.axes.Axes.bar : Plot a vertical bar plot using matplotlib.
1145
1146 Examples
1147 --------
1148 Basic example
1149
1150 .. plot::
1151 :context: close-figs
1152
1153 >>> df = pd.DataFrame({'lab': ['A', 'B', 'C'], 'val': [10, 30, 20]})
1154 >>> ax = df.plot.barh(x='lab', y='val')
1155
1156 Plot a whole DataFrame to a horizontal bar plot
1157
1158 .. plot::
1159 :context: close-figs
1160
1161 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]
1162 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]
1163 >>> index = ['snail', 'pig', 'elephant',
1164 ... 'rabbit', 'giraffe', 'coyote', 'horse']
1165 >>> df = pd.DataFrame({'speed': speed,
1166 ... 'lifespan': lifespan}, index=index)
1167 >>> ax = df.plot.barh()
1168
1169 Plot stacked barh charts for the DataFrame
1170
1171 .. plot::
1172 :context: close-figs
1173
1174 >>> ax = df.plot.barh(stacked=True)
1175
1176 We can specify colors for each column
1177
1178 .. plot::
1179 :context: close-figs
1180
1181 >>> ax = df.plot.barh(color={"speed": "red", "lifespan": "green"})
1182
1183 Plot a column of the DataFrame to a horizontal bar plot
1184
1185 .. plot::
1186 :context: close-figs
1187
1188 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]
1189 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]
1190 >>> index = ['snail', 'pig', 'elephant',
1191 ... 'rabbit', 'giraffe', 'coyote', 'horse']
1192 >>> df = pd.DataFrame({'speed': speed,
1193 ... 'lifespan': lifespan}, index=index)
1194 >>> ax = df.plot.barh(y='speed')
1195
1196 Plot DataFrame versus the desired column
1197
1198 .. plot::
1199 :context: close-figs
1200
1201 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]
1202 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]
1203 >>> index = ['snail', 'pig', 'elephant',
1204 ... 'rabbit', 'giraffe', 'coyote', 'horse']
1205 >>> df = pd.DataFrame({'speed': speed,
1206 ... 'lifespan': lifespan}, index=index)
1207 >>> ax = df.plot.barh(x='lifespan')
1208 """
1209 )
1210 @Substitution(kind="bar")
1211 @Appender(_bar_or_line_doc)
1212 def barh(self, x=None, y=None, **kwargs) -> PlotAccessor:
1213 """
1214 Make a horizontal bar plot.
1215
1216 A horizontal bar plot is a plot that presents quantitative data with
1217 rectangular bars with lengths proportional to the values that they
1218 represent. A bar plot shows comparisons among discrete categories. One
1219 axis of the plot shows the specific categories being compared, and the
1220 other axis represents a measured value.
1221 """
1222 return self(kind="barh", x=x, y=y, **kwargs)
1223
1224 def box(self, by=None, **kwargs) -> PlotAccessor:
1225 r"""
1226 Make a box plot of the DataFrame columns.
1227
1228 A box plot is a method for graphically depicting groups of numerical
1229 data through their quartiles.
1230 The box extends from the Q1 to Q3 quartile values of the data,
1231 with a line at the median (Q2). The whiskers extend from the edges
1232 of box to show the range of the data. The position of the whiskers
1233 is set by default to 1.5*IQR (IQR = Q3 - Q1) from the edges of the
1234 box. Outlier points are those past the end of the whiskers.
1235
1236 For further details see Wikipedia's
1237 entry for `boxplot <https://en.wikipedia.org/wiki/Box_plot>`__.
1238
1239 A consideration when using this chart is that the box and the whiskers
1240 can overlap, which is very common when plotting small sets of data.
1241
1242 Parameters
1243 ----------
1244 by : str or sequence
1245 Column in the DataFrame to group by.
1246
1247 .. versionchanged:: 1.4.0
1248
1249 Previously, `by` is silently ignore and makes no groupings
1250
1251 **kwargs
1252 Additional keywords are documented in
1253 :meth:`DataFrame.plot`.
1254
1255 Returns
1256 -------
1257 :class:`matplotlib.axes.Axes` or numpy.ndarray of them
1258
1259 See Also
1260 --------
1261 DataFrame.boxplot: Another method to draw a box plot.
1262 Series.plot.box: Draw a box plot from a Series object.
1263 matplotlib.pyplot.boxplot: Draw a box plot in matplotlib.
1264
1265 Examples
1266 --------
1267 Draw a box plot from a DataFrame with four columns of randomly
1268 generated data.
1269
1270 .. plot::
1271 :context: close-figs
1272
1273 >>> data = np.random.randn(25, 4)
1274 >>> df = pd.DataFrame(data, columns=list('ABCD'))
1275 >>> ax = df.plot.box()
1276
1277 You can also generate groupings if you specify the `by` parameter (which
1278 can take a column name, or a list or tuple of column names):
1279
1280 .. versionchanged:: 1.4.0
1281
1282 .. plot::
1283 :context: close-figs
1284
1285 >>> age_list = [8, 10, 12, 14, 72, 74, 76, 78, 20, 25, 30, 35, 60, 85]
1286 >>> df = pd.DataFrame({"gender": list("MMMMMMMMFFFFFF"), "age": age_list})
1287 >>> ax = df.plot.box(column="age", by="gender", figsize=(10, 8))
1288 """
1289 return self(kind="box", by=by, **kwargs)
1290
1291 def hist(self, by=None, bins: int = 10, **kwargs) -> PlotAccessor:
1292 """
1293 Draw one histogram of the DataFrame's columns.
1294
1295 A histogram is a representation of the distribution of data.
1296 This function groups the values of all given Series in the DataFrame
1297 into bins and draws all bins in one :class:`matplotlib.axes.Axes`.
1298 This is useful when the DataFrame's Series are in a similar scale.
1299
1300 Parameters
1301 ----------
1302 by : str or sequence, optional
1303 Column in the DataFrame to group by.
1304
1305 .. versionchanged:: 1.4.0
1306
1307 Previously, `by` is silently ignore and makes no groupings
1308
1309 bins : int, default 10
1310 Number of histogram bins to be used.
1311 **kwargs
1312 Additional keyword arguments are documented in
1313 :meth:`DataFrame.plot`.
1314
1315 Returns
1316 -------
1317 class:`matplotlib.AxesSubplot`
1318 Return a histogram plot.
1319
1320 See Also
1321 --------
1322 DataFrame.hist : Draw histograms per DataFrame's Series.
1323 Series.hist : Draw a histogram with Series' data.
1324
1325 Examples
1326 --------
1327 When we roll a die 6000 times, we expect to get each value around 1000
1328 times. But when we roll two dice and sum the result, the distribution
1329 is going to be quite different. A histogram illustrates those
1330 distributions.
1331
1332 .. plot::
1333 :context: close-figs
1334
1335 >>> df = pd.DataFrame(
1336 ... np.random.randint(1, 7, 6000),
1337 ... columns = ['one'])
1338 >>> df['two'] = df['one'] + np.random.randint(1, 7, 6000)
1339 >>> ax = df.plot.hist(bins=12, alpha=0.5)
1340
1341 A grouped histogram can be generated by providing the parameter `by` (which
1342 can be a column name, or a list of column names):
1343
1344 .. plot::
1345 :context: close-figs
1346
1347 >>> age_list = [8, 10, 12, 14, 72, 74, 76, 78, 20, 25, 30, 35, 60, 85]
1348 >>> df = pd.DataFrame({"gender": list("MMMMMMMMFFFFFF"), "age": age_list})
1349 >>> ax = df.plot.hist(column=["age"], by="gender", figsize=(10, 8))
1350 """
1351 return self(kind="hist", by=by, bins=bins, **kwargs)
1352
1353 def kde(self, bw_method=None, ind=None, **kwargs) -> PlotAccessor:
1354 """
1355 Generate Kernel Density Estimate plot using Gaussian kernels.
1356
1357 In statistics, `kernel density estimation`_ (KDE) is a non-parametric
1358 way to estimate the probability density function (PDF) of a random
1359 variable. This function uses Gaussian kernels and includes automatic
1360 bandwidth determination.
1361
1362 .. _kernel density estimation:
1363 https://en.wikipedia.org/wiki/Kernel_density_estimation
1364
1365 Parameters
1366 ----------
1367 bw_method : str, scalar or callable, optional
1368 The method used to calculate the estimator bandwidth. This can be
1369 'scott', 'silverman', a scalar constant or a callable.
1370 If None (default), 'scott' is used.
1371 See :class:`scipy.stats.gaussian_kde` for more information.
1372 ind : NumPy array or int, optional
1373 Evaluation points for the estimated PDF. If None (default),
1374 1000 equally spaced points are used. If `ind` is a NumPy array, the
1375 KDE is evaluated at the points passed. If `ind` is an integer,
1376 `ind` number of equally spaced points are used.
1377 **kwargs
1378 Additional keyword arguments are documented in
1379 :meth:`DataFrame.plot`.
1380
1381 Returns
1382 -------
1383 matplotlib.axes.Axes or numpy.ndarray of them
1384
1385 See Also
1386 --------
1387 scipy.stats.gaussian_kde : Representation of a kernel-density
1388 estimate using Gaussian kernels. This is the function used
1389 internally to estimate the PDF.
1390
1391 Examples
1392 --------
1393 Given a Series of points randomly sampled from an unknown
1394 distribution, estimate its PDF using KDE with automatic
1395 bandwidth determination and plot the results, evaluating them at
1396 1000 equally spaced points (default):
1397
1398 .. plot::
1399 :context: close-figs
1400
1401 >>> s = pd.Series([1, 2, 2.5, 3, 3.5, 4, 5])
1402 >>> ax = s.plot.kde()
1403
1404 A scalar bandwidth can be specified. Using a small bandwidth value can
1405 lead to over-fitting, while using a large bandwidth value may result
1406 in under-fitting:
1407
1408 .. plot::
1409 :context: close-figs
1410
1411 >>> ax = s.plot.kde(bw_method=0.3)
1412
1413 .. plot::
1414 :context: close-figs
1415
1416 >>> ax = s.plot.kde(bw_method=3)
1417
1418 Finally, the `ind` parameter determines the evaluation points for the
1419 plot of the estimated PDF:
1420
1421 .. plot::
1422 :context: close-figs
1423
1424 >>> ax = s.plot.kde(ind=[1, 2, 3, 4, 5])
1425
1426 For DataFrame, it works in the same way:
1427
1428 .. plot::
1429 :context: close-figs
1430
1431 >>> df = pd.DataFrame({
1432 ... 'x': [1, 2, 2.5, 3, 3.5, 4, 5],
1433 ... 'y': [4, 4, 4.5, 5, 5.5, 6, 6],
1434 ... })
1435 >>> ax = df.plot.kde()
1436
1437 A scalar bandwidth can be specified. Using a small bandwidth value can
1438 lead to over-fitting, while using a large bandwidth value may result
1439 in under-fitting:
1440
1441 .. plot::
1442 :context: close-figs
1443
1444 >>> ax = df.plot.kde(bw_method=0.3)
1445
1446 .. plot::
1447 :context: close-figs
1448
1449 >>> ax = df.plot.kde(bw_method=3)
1450
1451 Finally, the `ind` parameter determines the evaluation points for the
1452 plot of the estimated PDF:
1453
1454 .. plot::
1455 :context: close-figs
1456
1457 >>> ax = df.plot.kde(ind=[1, 2, 3, 4, 5, 6])
1458 """
1459 return self(kind="kde", bw_method=bw_method, ind=ind, **kwargs)
1460
1461 density = kde
1462
1463 def area(self, x=None, y=None, stacked: bool = True, **kwargs) -> PlotAccessor:
1464 """
1465 Draw a stacked area plot.
1466
1467 An area plot displays quantitative data visually.
1468 This function wraps the matplotlib area function.
1469
1470 Parameters
1471 ----------
1472 x : label or position, optional
1473 Coordinates for the X axis. By default uses the index.
1474 y : label or position, optional
1475 Column to plot. By default uses all columns.
1476 stacked : bool, default True
1477 Area plots are stacked by default. Set to False to create a
1478 unstacked plot.
1479 **kwargs
1480 Additional keyword arguments are documented in
1481 :meth:`DataFrame.plot`.
1482
1483 Returns
1484 -------
1485 matplotlib.axes.Axes or numpy.ndarray
1486 Area plot, or array of area plots if subplots is True.
1487
1488 See Also
1489 --------
1490 DataFrame.plot : Make plots of DataFrame using matplotlib / pylab.
1491
1492 Examples
1493 --------
1494 Draw an area plot based on basic business metrics:
1495
1496 .. plot::
1497 :context: close-figs
1498
1499 >>> df = pd.DataFrame({
1500 ... 'sales': [3, 2, 3, 9, 10, 6],
1501 ... 'signups': [5, 5, 6, 12, 14, 13],
1502 ... 'visits': [20, 42, 28, 62, 81, 50],
1503 ... }, index=pd.date_range(start='2018/01/01', end='2018/07/01',
1504 ... freq='M'))
1505 >>> ax = df.plot.area()
1506
1507 Area plots are stacked by default. To produce an unstacked plot,
1508 pass ``stacked=False``:
1509
1510 .. plot::
1511 :context: close-figs
1512
1513 >>> ax = df.plot.area(stacked=False)
1514
1515 Draw an area plot for a single column:
1516
1517 .. plot::
1518 :context: close-figs
1519
1520 >>> ax = df.plot.area(y='sales')
1521
1522 Draw with a different `x`:
1523
1524 .. plot::
1525 :context: close-figs
1526
1527 >>> df = pd.DataFrame({
1528 ... 'sales': [3, 2, 3],
1529 ... 'visits': [20, 42, 28],
1530 ... 'day': [1, 2, 3],
1531 ... })
1532 >>> ax = df.plot.area(x='day')
1533 """
1534 return self(kind="area", x=x, y=y, stacked=stacked, **kwargs)
1535
1536 def pie(self, **kwargs) -> PlotAccessor:
1537 """
1538 Generate a pie plot.
1539
1540 A pie plot is a proportional representation of the numerical data in a
1541 column. This function wraps :meth:`matplotlib.pyplot.pie` for the
1542 specified column. If no column reference is passed and
1543 ``subplots=True`` a pie plot is drawn for each numerical column
1544 independently.
1545
1546 Parameters
1547 ----------
1548 y : int or label, optional
1549 Label or position of the column to plot.
1550 If not provided, ``subplots=True`` argument must be passed.
1551 **kwargs
1552 Keyword arguments to pass on to :meth:`DataFrame.plot`.
1553
1554 Returns
1555 -------
1556 matplotlib.axes.Axes or np.ndarray of them
1557 A NumPy array is returned when `subplots` is True.
1558
1559 See Also
1560 --------
1561 Series.plot.pie : Generate a pie plot for a Series.
1562 DataFrame.plot : Make plots of a DataFrame.
1563
1564 Examples
1565 --------
1566 In the example below we have a DataFrame with the information about
1567 planet's mass and radius. We pass the 'mass' column to the
1568 pie function to get a pie plot.
1569
1570 .. plot::
1571 :context: close-figs
1572
1573 >>> df = pd.DataFrame({'mass': [0.330, 4.87 , 5.97],
1574 ... 'radius': [2439.7, 6051.8, 6378.1]},
1575 ... index=['Mercury', 'Venus', 'Earth'])
1576 >>> plot = df.plot.pie(y='mass', figsize=(5, 5))
1577
1578 .. plot::
1579 :context: close-figs
1580
1581 >>> plot = df.plot.pie(subplots=True, figsize=(11, 6))
1582 """
1583 if (
1584 isinstance(self._parent, ABCDataFrame)
1585 and kwargs.get("y", None) is None
1586 and not kwargs.get("subplots", False)
1587 ):
1588 raise ValueError("pie requires either y column or 'subplots=True'")
1589 return self(kind="pie", **kwargs)
1590
1591 def scatter(self, x, y, s=None, c=None, **kwargs) -> PlotAccessor:
1592 """
1593 Create a scatter plot with varying marker point size and color.
1594
1595 The coordinates of each point are defined by two dataframe columns and
1596 filled circles are used to represent each point. This kind of plot is
1597 useful to see complex correlations between two variables. Points could
1598 be for instance natural 2D coordinates like longitude and latitude in
1599 a map or, in general, any pair of metrics that can be plotted against
1600 each other.
1601
1602 Parameters
1603 ----------
1604 x : int or str
1605 The column name or column position to be used as horizontal
1606 coordinates for each point.
1607 y : int or str
1608 The column name or column position to be used as vertical
1609 coordinates for each point.
1610 s : str, scalar or array-like, optional
1611 The size of each point. Possible values are:
1612
1613 - A string with the name of the column to be used for marker's size.
1614
1615 - A single scalar so all points have the same size.
1616
1617 - A sequence of scalars, which will be used for each point's size
1618 recursively. For instance, when passing [2,14] all points size
1619 will be either 2 or 14, alternatively.
1620
1621 .. versionchanged:: 1.1.0
1622
1623 c : str, int or array-like, optional
1624 The color of each point. Possible values are:
1625
1626 - A single color string referred to by name, RGB or RGBA code,
1627 for instance 'red' or '#a98d19'.
1628
1629 - A sequence of color strings referred to by name, RGB or RGBA
1630 code, which will be used for each point's color recursively. For
1631 instance ['green','yellow'] all points will be filled in green or
1632 yellow, alternatively.
1633
1634 - A column name or position whose values will be used to color the
1635 marker points according to a colormap.
1636
1637 **kwargs
1638 Keyword arguments to pass on to :meth:`DataFrame.plot`.
1639
1640 Returns
1641 -------
1642 :class:`matplotlib.axes.Axes` or numpy.ndarray of them
1643
1644 See Also
1645 --------
1646 matplotlib.pyplot.scatter : Scatter plot using multiple input data
1647 formats.
1648
1649 Examples
1650 --------
1651 Let's see how to draw a scatter plot using coordinates from the values
1652 in a DataFrame's columns.
1653
1654 .. plot::
1655 :context: close-figs
1656
1657 >>> df = pd.DataFrame([[5.1, 3.5, 0], [4.9, 3.0, 0], [7.0, 3.2, 1],
1658 ... [6.4, 3.2, 1], [5.9, 3.0, 2]],
1659 ... columns=['length', 'width', 'species'])
1660 >>> ax1 = df.plot.scatter(x='length',
1661 ... y='width',
1662 ... c='DarkBlue')
1663
1664 And now with the color determined by a column as well.
1665
1666 .. plot::
1667 :context: close-figs
1668
1669 >>> ax2 = df.plot.scatter(x='length',
1670 ... y='width',
1671 ... c='species',
1672 ... colormap='viridis')
1673 """
1674 return self(kind="scatter", x=x, y=y, s=s, c=c, **kwargs)
1675
1676 def hexbin(
1677 self, x, y, C=None, reduce_C_function=None, gridsize=None, **kwargs
1678 ) -> PlotAccessor:
1679 """
1680 Generate a hexagonal binning plot.
1681
1682 Generate a hexagonal binning plot of `x` versus `y`. If `C` is `None`
1683 (the default), this is a histogram of the number of occurrences
1684 of the observations at ``(x[i], y[i])``.
1685
1686 If `C` is specified, specifies values at given coordinates
1687 ``(x[i], y[i])``. These values are accumulated for each hexagonal
1688 bin and then reduced according to `reduce_C_function`,
1689 having as default the NumPy's mean function (:meth:`numpy.mean`).
1690 (If `C` is specified, it must also be a 1-D sequence
1691 of the same length as `x` and `y`, or a column label.)
1692
1693 Parameters
1694 ----------
1695 x : int or str
1696 The column label or position for x points.
1697 y : int or str
1698 The column label or position for y points.
1699 C : int or str, optional
1700 The column label or position for the value of `(x, y)` point.
1701 reduce_C_function : callable, default `np.mean`
1702 Function of one argument that reduces all the values in a bin to
1703 a single number (e.g. `np.mean`, `np.max`, `np.sum`, `np.std`).
1704 gridsize : int or tuple of (int, int), default 100
1705 The number of hexagons in the x-direction.
1706 The corresponding number of hexagons in the y-direction is
1707 chosen in a way that the hexagons are approximately regular.
1708 Alternatively, gridsize can be a tuple with two elements
1709 specifying the number of hexagons in the x-direction and the
1710 y-direction.
1711 **kwargs
1712 Additional keyword arguments are documented in
1713 :meth:`DataFrame.plot`.
1714
1715 Returns
1716 -------
1717 matplotlib.AxesSubplot
1718 The matplotlib ``Axes`` on which the hexbin is plotted.
1719
1720 See Also
1721 --------
1722 DataFrame.plot : Make plots of a DataFrame.
1723 matplotlib.pyplot.hexbin : Hexagonal binning plot using matplotlib,
1724 the matplotlib function that is used under the hood.
1725
1726 Examples
1727 --------
1728 The following examples are generated with random data from
1729 a normal distribution.
1730
1731 .. plot::
1732 :context: close-figs
1733
1734 >>> n = 10000
1735 >>> df = pd.DataFrame({'x': np.random.randn(n),
1736 ... 'y': np.random.randn(n)})
1737 >>> ax = df.plot.hexbin(x='x', y='y', gridsize=20)
1738
1739 The next example uses `C` and `np.sum` as `reduce_C_function`.
1740 Note that `'observations'` values ranges from 1 to 5 but the result
1741 plot shows values up to more than 25. This is because of the
1742 `reduce_C_function`.
1743
1744 .. plot::
1745 :context: close-figs
1746
1747 >>> n = 500
1748 >>> df = pd.DataFrame({
1749 ... 'coord_x': np.random.uniform(-3, 3, size=n),
1750 ... 'coord_y': np.random.uniform(30, 50, size=n),
1751 ... 'observations': np.random.randint(1,5, size=n)
1752 ... })
1753 >>> ax = df.plot.hexbin(x='coord_x',
1754 ... y='coord_y',
1755 ... C='observations',
1756 ... reduce_C_function=np.sum,
1757 ... gridsize=10,
1758 ... cmap="viridis")
1759 """
1760 if reduce_C_function is not None:
1761 kwargs["reduce_C_function"] = reduce_C_function
1762 if gridsize is not None:
1763 kwargs["gridsize"] = gridsize
1764
1765 return self(kind="hexbin", x=x, y=y, C=C, **kwargs)
1766
1767
1768_backends: dict[str, types.ModuleType] = {}
1769
1770
1771def _load_backend(backend: str) -> types.ModuleType:
1772 """
1773 Load a pandas plotting backend.
1774
1775 Parameters
1776 ----------
1777 backend : str
1778 The identifier for the backend. Either an entrypoint item registered
1779 with importlib.metadata, "matplotlib", or a module name.
1780
1781 Returns
1782 -------
1783 types.ModuleType
1784 The imported backend.
1785 """
1786 from importlib.metadata import entry_points
1787
1788 if backend == "matplotlib":
1789 # Because matplotlib is an optional dependency and first-party backend,
1790 # we need to attempt an import here to raise an ImportError if needed.
1791 try:
1792 module = importlib.import_module("pandas.plotting._matplotlib")
1793 except ImportError:
1794 raise ImportError(
1795 "matplotlib is required for plotting when the "
1796 'default backend "matplotlib" is selected.'
1797 ) from None
1798 return module
1799
1800 found_backend = False
1801
1802 eps = entry_points()
1803 key = "pandas_plotting_backends"
1804 # entry_points lost dict API ~ PY 3.10
1805 # https://github.com/python/importlib_metadata/issues/298
1806 if hasattr(eps, "select"):
1807 entry = eps.select(group=key) # pyright: ignore[reportGeneralTypeIssues]
1808 else:
1809 entry = eps.get(key, ())
1810 for entry_point in entry:
1811 found_backend = entry_point.name == backend
1812 if found_backend:
1813 module = entry_point.load()
1814 break
1815
1816 if not found_backend:
1817 # Fall back to unregistered, module name approach.
1818 try:
1819 module = importlib.import_module(backend)
1820 found_backend = True
1821 except ImportError:
1822 # We re-raise later on.
1823 pass
1824
1825 if found_backend:
1826 if hasattr(module, "plot"):
1827 # Validate that the interface is implemented when the option is set,
1828 # rather than at plot time.
1829 return module
1830
1831 raise ValueError(
1832 f"Could not find plotting backend '{backend}'. Ensure that you've "
1833 f"installed the package providing the '{backend}' entrypoint, or that "
1834 "the package has a top-level `.plot` method."
1835 )
1836
1837
1838def _get_plot_backend(backend: str | None = None):
1839 """
1840 Return the plotting backend to use (e.g. `pandas.plotting._matplotlib`).
1841
1842 The plotting system of pandas uses matplotlib by default, but the idea here
1843 is that it can also work with other third-party backends. This function
1844 returns the module which provides a top-level `.plot` method that will
1845 actually do the plotting. The backend is specified from a string, which
1846 either comes from the keyword argument `backend`, or, if not specified, from
1847 the option `pandas.options.plotting.backend`. All the rest of the code in
1848 this file uses the backend specified there for the plotting.
1849
1850 The backend is imported lazily, as matplotlib is a soft dependency, and
1851 pandas can be used without it being installed.
1852
1853 Notes
1854 -----
1855 Modifies `_backends` with imported backend as a side effect.
1856 """
1857 backend_str: str = backend or get_option("plotting.backend")
1858
1859 if backend_str in _backends:
1860 return _backends[backend_str]
1861
1862 module = _load_backend(backend_str)
1863 _backends[backend_str] = module
1864 return module