1from __future__ import annotations
2
3import importlib
4from typing import (
5 TYPE_CHECKING,
6 Callable,
7 Literal,
8)
9
10from pandas._config import get_option
11
12from pandas.util._decorators import (
13 Appender,
14 Substitution,
15)
16
17from pandas.core.dtypes.common import (
18 is_integer,
19 is_list_like,
20)
21from pandas.core.dtypes.generic import (
22 ABCDataFrame,
23 ABCSeries,
24)
25
26from pandas.core.base import PandasObject
27
28if TYPE_CHECKING:
29 from collections.abc import (
30 Hashable,
31 Sequence,
32 )
33 import types
34
35 from matplotlib.axes import Axes
36 import numpy as np
37
38 from pandas._typing import IndexLabel
39
40 from pandas import (
41 DataFrame,
42 Series,
43 )
44 from pandas.core.groupby.generic import DataFrameGroupBy
45
46
47def hist_series(
48 self: Series,
49 by=None,
50 ax=None,
51 grid: bool = True,
52 xlabelsize: int | None = None,
53 xrot: float | None = None,
54 ylabelsize: int | None = None,
55 yrot: float | None = None,
56 figsize: tuple[int, int] | None = None,
57 bins: int | Sequence[int] = 10,
58 backend: str | None = None,
59 legend: bool = False,
60 **kwargs,
61):
62 """
63 Draw histogram of the input series using matplotlib.
64
65 Parameters
66 ----------
67 by : object, optional
68 If passed, then used to form histograms for separate groups.
69 ax : matplotlib axis object
70 If not passed, uses gca().
71 grid : bool, default True
72 Whether to show axis grid lines.
73 xlabelsize : int, default None
74 If specified changes the x-axis label size.
75 xrot : float, default None
76 Rotation of x axis labels.
77 ylabelsize : int, default None
78 If specified changes the y-axis label size.
79 yrot : float, default None
80 Rotation of y axis labels.
81 figsize : tuple, default None
82 Figure size in inches by default.
83 bins : int or sequence, default 10
84 Number of histogram bins to be used. If an integer is given, bins + 1
85 bin edges are calculated and returned. If bins is a sequence, gives
86 bin edges, including left edge of first bin and right edge of last
87 bin. In this case, bins is returned unmodified.
88 backend : str, default None
89 Backend to use instead of the backend specified in the option
90 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to
91 specify the ``plotting.backend`` for the whole session, set
92 ``pd.options.plotting.backend``.
93 legend : bool, default False
94 Whether to show the legend.
95
96 **kwargs
97 To be passed to the actual plotting function.
98
99 Returns
100 -------
101 matplotlib.AxesSubplot
102 A histogram plot.
103
104 See Also
105 --------
106 matplotlib.axes.Axes.hist : Plot a histogram using matplotlib.
107
108 Examples
109 --------
110 For Series:
111
112 .. plot::
113 :context: close-figs
114
115 >>> lst = ['a', 'a', 'a', 'b', 'b', 'b']
116 >>> ser = pd.Series([1, 2, 2, 4, 6, 6], index=lst)
117 >>> hist = ser.hist()
118
119 For Groupby:
120
121 .. plot::
122 :context: close-figs
123
124 >>> lst = ['a', 'a', 'a', 'b', 'b', 'b']
125 >>> ser = pd.Series([1, 2, 2, 4, 6, 6], index=lst)
126 >>> hist = ser.groupby(level=0).hist()
127 """
128 plot_backend = _get_plot_backend(backend)
129 return plot_backend.hist_series(
130 self,
131 by=by,
132 ax=ax,
133 grid=grid,
134 xlabelsize=xlabelsize,
135 xrot=xrot,
136 ylabelsize=ylabelsize,
137 yrot=yrot,
138 figsize=figsize,
139 bins=bins,
140 legend=legend,
141 **kwargs,
142 )
143
144
145def hist_frame(
146 data: DataFrame,
147 column: IndexLabel | None = None,
148 by=None,
149 grid: bool = True,
150 xlabelsize: int | None = None,
151 xrot: float | None = None,
152 ylabelsize: int | None = None,
153 yrot: float | None = None,
154 ax=None,
155 sharex: bool = False,
156 sharey: bool = False,
157 figsize: tuple[int, int] | None = None,
158 layout: tuple[int, int] | None = None,
159 bins: int | Sequence[int] = 10,
160 backend: str | None = None,
161 legend: bool = False,
162 **kwargs,
163):
164 """
165 Make a histogram of the DataFrame's columns.
166
167 A `histogram`_ is a representation of the distribution of data.
168 This function calls :meth:`matplotlib.pyplot.hist`, on each series in
169 the DataFrame, resulting in one histogram per column.
170
171 .. _histogram: https://en.wikipedia.org/wiki/Histogram
172
173 Parameters
174 ----------
175 data : DataFrame
176 The pandas object holding the data.
177 column : str or sequence, optional
178 If passed, will be used to limit data to a subset of columns.
179 by : object, optional
180 If passed, then used to form histograms for separate groups.
181 grid : bool, default True
182 Whether to show axis grid lines.
183 xlabelsize : int, default None
184 If specified changes the x-axis label size.
185 xrot : float, default None
186 Rotation of x axis labels. For example, a value of 90 displays the
187 x labels rotated 90 degrees clockwise.
188 ylabelsize : int, default None
189 If specified changes the y-axis label size.
190 yrot : float, default None
191 Rotation of y axis labels. For example, a value of 90 displays the
192 y labels rotated 90 degrees clockwise.
193 ax : Matplotlib axes object, default None
194 The axes to plot the histogram on.
195 sharex : bool, default True if ax is None else False
196 In case subplots=True, share x axis and set some x axis labels to
197 invisible; defaults to True if ax is None otherwise False if an ax
198 is passed in.
199 Note that passing in both an ax and sharex=True will alter all x axis
200 labels for all subplots in a figure.
201 sharey : bool, default False
202 In case subplots=True, share y axis and set some y axis labels to
203 invisible.
204 figsize : tuple, optional
205 The size in inches of the figure to create. Uses the value in
206 `matplotlib.rcParams` by default.
207 layout : tuple, optional
208 Tuple of (rows, columns) for the layout of the histograms.
209 bins : int or sequence, default 10
210 Number of histogram bins to be used. If an integer is given, bins + 1
211 bin edges are calculated and returned. If bins is a sequence, gives
212 bin edges, including left edge of first bin and right edge of last
213 bin. In this case, bins is returned unmodified.
214
215 backend : str, default None
216 Backend to use instead of the backend specified in the option
217 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to
218 specify the ``plotting.backend`` for the whole session, set
219 ``pd.options.plotting.backend``.
220
221 legend : bool, default False
222 Whether to show the legend.
223
224 **kwargs
225 All other plotting keyword arguments to be passed to
226 :meth:`matplotlib.pyplot.hist`.
227
228 Returns
229 -------
230 matplotlib.AxesSubplot or numpy.ndarray of them
231
232 See Also
233 --------
234 matplotlib.pyplot.hist : Plot a histogram using matplotlib.
235
236 Examples
237 --------
238 This example draws a histogram based on the length and width of
239 some animals, displayed in three bins
240
241 .. plot::
242 :context: close-figs
243
244 >>> data = {'length': [1.5, 0.5, 1.2, 0.9, 3],
245 ... 'width': [0.7, 0.2, 0.15, 0.2, 1.1]}
246 >>> index = ['pig', 'rabbit', 'duck', 'chicken', 'horse']
247 >>> df = pd.DataFrame(data, index=index)
248 >>> hist = df.hist(bins=3)
249 """
250 plot_backend = _get_plot_backend(backend)
251 return plot_backend.hist_frame(
252 data,
253 column=column,
254 by=by,
255 grid=grid,
256 xlabelsize=xlabelsize,
257 xrot=xrot,
258 ylabelsize=ylabelsize,
259 yrot=yrot,
260 ax=ax,
261 sharex=sharex,
262 sharey=sharey,
263 figsize=figsize,
264 layout=layout,
265 legend=legend,
266 bins=bins,
267 **kwargs,
268 )
269
270
271_boxplot_doc = """
272Make a box plot from DataFrame columns.
273
274Make a box-and-whisker plot from DataFrame columns, optionally grouped
275by some other columns. A box plot is a method for graphically depicting
276groups of numerical data through their quartiles.
277The box extends from the Q1 to Q3 quartile values of the data,
278with a line at the median (Q2). The whiskers extend from the edges
279of box to show the range of the data. By default, they extend no more than
280`1.5 * IQR (IQR = Q3 - Q1)` from the edges of the box, ending at the farthest
281data point within that interval. Outliers are plotted as separate dots.
282
283For further details see
284Wikipedia's entry for `boxplot <https://en.wikipedia.org/wiki/Box_plot>`_.
285
286Parameters
287----------
288%(data)s\
289column : str or list of str, optional
290 Column name or list of names, or vector.
291 Can be any valid input to :meth:`pandas.DataFrame.groupby`.
292by : str or array-like, optional
293 Column in the DataFrame to :meth:`pandas.DataFrame.groupby`.
294 One box-plot will be done per value of columns in `by`.
295ax : object of class matplotlib.axes.Axes, optional
296 The matplotlib axes to be used by boxplot.
297fontsize : float or str
298 Tick label font size in points or as a string (e.g., `large`).
299rot : float, default 0
300 The rotation angle of labels (in degrees)
301 with respect to the screen coordinate system.
302grid : bool, default True
303 Setting this to True will show the grid.
304figsize : A tuple (width, height) in inches
305 The size of the figure to create in matplotlib.
306layout : tuple (rows, columns), optional
307 For example, (3, 5) will display the subplots
308 using 3 rows and 5 columns, starting from the top-left.
309return_type : {'axes', 'dict', 'both'} or None, default 'axes'
310 The kind of object to return. The default is ``axes``.
311
312 * 'axes' returns the matplotlib axes the boxplot is drawn on.
313 * 'dict' returns a dictionary whose values are the matplotlib
314 Lines of the boxplot.
315 * 'both' returns a namedtuple with the axes and dict.
316 * when grouping with ``by``, a Series mapping columns to
317 ``return_type`` is returned.
318
319 If ``return_type`` is `None`, a NumPy array
320 of axes with the same shape as ``layout`` is returned.
321%(backend)s\
322
323**kwargs
324 All other plotting keyword arguments to be passed to
325 :func:`matplotlib.pyplot.boxplot`.
326
327Returns
328-------
329result
330 See Notes.
331
332See Also
333--------
334pandas.Series.plot.hist: Make a histogram.
335matplotlib.pyplot.boxplot : Matplotlib equivalent plot.
336
337Notes
338-----
339The return type depends on the `return_type` parameter:
340
341* 'axes' : object of class matplotlib.axes.Axes
342* 'dict' : dict of matplotlib.lines.Line2D objects
343* 'both' : a namedtuple with structure (ax, lines)
344
345For data grouped with ``by``, return a Series of the above or a numpy
346array:
347
348* :class:`~pandas.Series`
349* :class:`~numpy.array` (for ``return_type = None``)
350
351Use ``return_type='dict'`` when you want to tweak the appearance
352of the lines after plotting. In this case a dict containing the Lines
353making up the boxes, caps, fliers, medians, and whiskers is returned.
354
355Examples
356--------
357
358Boxplots can be created for every column in the dataframe
359by ``df.boxplot()`` or indicating the columns to be used:
360
361.. plot::
362 :context: close-figs
363
364 >>> np.random.seed(1234)
365 >>> df = pd.DataFrame(np.random.randn(10, 4),
366 ... columns=['Col1', 'Col2', 'Col3', 'Col4'])
367 >>> boxplot = df.boxplot(column=['Col1', 'Col2', 'Col3']) # doctest: +SKIP
368
369Boxplots of variables distributions grouped by the values of a third
370variable can be created using the option ``by``. For instance:
371
372.. plot::
373 :context: close-figs
374
375 >>> df = pd.DataFrame(np.random.randn(10, 2),
376 ... columns=['Col1', 'Col2'])
377 >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A',
378 ... 'B', 'B', 'B', 'B', 'B'])
379 >>> boxplot = df.boxplot(by='X')
380
381A list of strings (i.e. ``['X', 'Y']``) can be passed to boxplot
382in order to group the data by combination of the variables in the x-axis:
383
384.. plot::
385 :context: close-figs
386
387 >>> df = pd.DataFrame(np.random.randn(10, 3),
388 ... columns=['Col1', 'Col2', 'Col3'])
389 >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A',
390 ... 'B', 'B', 'B', 'B', 'B'])
391 >>> df['Y'] = pd.Series(['A', 'B', 'A', 'B', 'A',
392 ... 'B', 'A', 'B', 'A', 'B'])
393 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by=['X', 'Y'])
394
395The layout of boxplot can be adjusted giving a tuple to ``layout``:
396
397.. plot::
398 :context: close-figs
399
400 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X',
401 ... layout=(2, 1))
402
403Additional formatting can be done to the boxplot, like suppressing the grid
404(``grid=False``), rotating the labels in the x-axis (i.e. ``rot=45``)
405or changing the fontsize (i.e. ``fontsize=15``):
406
407.. plot::
408 :context: close-figs
409
410 >>> boxplot = df.boxplot(grid=False, rot=45, fontsize=15) # doctest: +SKIP
411
412The parameter ``return_type`` can be used to select the type of element
413returned by `boxplot`. When ``return_type='axes'`` is selected,
414the matplotlib axes on which the boxplot is drawn are returned:
415
416 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], return_type='axes')
417 >>> type(boxplot)
418 <class 'matplotlib.axes._axes.Axes'>
419
420When grouping with ``by``, a Series mapping columns to ``return_type``
421is returned:
422
423 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X',
424 ... return_type='axes')
425 >>> type(boxplot)
426 <class 'pandas.core.series.Series'>
427
428If ``return_type`` is `None`, a NumPy array of axes with the same shape
429as ``layout`` is returned:
430
431 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X',
432 ... return_type=None)
433 >>> type(boxplot)
434 <class 'numpy.ndarray'>
435"""
436
437_backend_doc = """\
438backend : str, default None
439 Backend to use instead of the backend specified in the option
440 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to
441 specify the ``plotting.backend`` for the whole session, set
442 ``pd.options.plotting.backend``.
443"""
444
445
446_bar_or_line_doc = """
447 Parameters
448 ----------
449 x : label or position, optional
450 Allows plotting of one column versus another. If not specified,
451 the index of the DataFrame is used.
452 y : label or position, optional
453 Allows plotting of one column versus another. If not specified,
454 all numerical columns are used.
455 color : str, array-like, or dict, optional
456 The color for each of the DataFrame's columns. Possible values are:
457
458 - A single color string referred to by name, RGB or RGBA code,
459 for instance 'red' or '#a98d19'.
460
461 - A sequence of color strings referred to by name, RGB or RGBA
462 code, which will be used for each column recursively. For
463 instance ['green','yellow'] each column's %(kind)s will be filled in
464 green or yellow, alternatively. If there is only a single column to
465 be plotted, then only the first color from the color list will be
466 used.
467
468 - A dict of the form {column name : color}, so that each column will be
469 colored accordingly. For example, if your columns are called `a` and
470 `b`, then passing {'a': 'green', 'b': 'red'} will color %(kind)ss for
471 column `a` in green and %(kind)ss for column `b` in red.
472
473 **kwargs
474 Additional keyword arguments are documented in
475 :meth:`DataFrame.plot`.
476
477 Returns
478 -------
479 matplotlib.axes.Axes or np.ndarray of them
480 An ndarray is returned with one :class:`matplotlib.axes.Axes`
481 per column when ``subplots=True``.
482"""
483
484
485@Substitution(data="data : DataFrame\n The data to visualize.\n", backend="")
486@Appender(_boxplot_doc)
487def boxplot(
488 data: DataFrame,
489 column: str | list[str] | None = None,
490 by: str | list[str] | None = None,
491 ax: Axes | None = None,
492 fontsize: float | str | None = None,
493 rot: int = 0,
494 grid: bool = True,
495 figsize: tuple[float, float] | None = None,
496 layout: tuple[int, int] | None = None,
497 return_type: str | None = None,
498 **kwargs,
499):
500 plot_backend = _get_plot_backend("matplotlib")
501 return plot_backend.boxplot(
502 data,
503 column=column,
504 by=by,
505 ax=ax,
506 fontsize=fontsize,
507 rot=rot,
508 grid=grid,
509 figsize=figsize,
510 layout=layout,
511 return_type=return_type,
512 **kwargs,
513 )
514
515
516@Substitution(data="", backend=_backend_doc)
517@Appender(_boxplot_doc)
518def boxplot_frame(
519 self: DataFrame,
520 column=None,
521 by=None,
522 ax=None,
523 fontsize: int | None = None,
524 rot: int = 0,
525 grid: bool = True,
526 figsize: tuple[float, float] | None = None,
527 layout=None,
528 return_type=None,
529 backend=None,
530 **kwargs,
531):
532 plot_backend = _get_plot_backend(backend)
533 return plot_backend.boxplot_frame(
534 self,
535 column=column,
536 by=by,
537 ax=ax,
538 fontsize=fontsize,
539 rot=rot,
540 grid=grid,
541 figsize=figsize,
542 layout=layout,
543 return_type=return_type,
544 **kwargs,
545 )
546
547
548def boxplot_frame_groupby(
549 grouped: DataFrameGroupBy,
550 subplots: bool = True,
551 column=None,
552 fontsize: int | None = None,
553 rot: int = 0,
554 grid: bool = True,
555 ax=None,
556 figsize: tuple[float, float] | None = None,
557 layout=None,
558 sharex: bool = False,
559 sharey: bool = True,
560 backend=None,
561 **kwargs,
562):
563 """
564 Make box plots from DataFrameGroupBy data.
565
566 Parameters
567 ----------
568 grouped : Grouped DataFrame
569 subplots : bool
570 * ``False`` - no subplots will be used
571 * ``True`` - create a subplot for each group.
572
573 column : column name or list of names, or vector
574 Can be any valid input to groupby.
575 fontsize : float or str
576 rot : label rotation angle
577 grid : Setting this to True will show the grid
578 ax : Matplotlib axis object, default None
579 figsize : A tuple (width, height) in inches
580 layout : tuple (optional)
581 The layout of the plot: (rows, columns).
582 sharex : bool, default False
583 Whether x-axes will be shared among subplots.
584 sharey : bool, default True
585 Whether y-axes will be shared among subplots.
586 backend : str, default None
587 Backend to use instead of the backend specified in the option
588 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to
589 specify the ``plotting.backend`` for the whole session, set
590 ``pd.options.plotting.backend``.
591 **kwargs
592 All other plotting keyword arguments to be passed to
593 matplotlib's boxplot function.
594
595 Returns
596 -------
597 dict of key/value = group key/DataFrame.boxplot return value
598 or DataFrame.boxplot return value in case subplots=figures=False
599
600 Examples
601 --------
602 You can create boxplots for grouped data and show them as separate subplots:
603
604 .. plot::
605 :context: close-figs
606
607 >>> import itertools
608 >>> tuples = [t for t in itertools.product(range(1000), range(4))]
609 >>> index = pd.MultiIndex.from_tuples(tuples, names=['lvl0', 'lvl1'])
610 >>> data = np.random.randn(len(index), 4)
611 >>> df = pd.DataFrame(data, columns=list('ABCD'), index=index)
612 >>> grouped = df.groupby(level='lvl1')
613 >>> grouped.boxplot(rot=45, fontsize=12, figsize=(8, 10)) # doctest: +SKIP
614
615 The ``subplots=False`` option shows the boxplots in a single figure.
616
617 .. plot::
618 :context: close-figs
619
620 >>> grouped.boxplot(subplots=False, rot=45, fontsize=12) # doctest: +SKIP
621 """
622 plot_backend = _get_plot_backend(backend)
623 return plot_backend.boxplot_frame_groupby(
624 grouped,
625 subplots=subplots,
626 column=column,
627 fontsize=fontsize,
628 rot=rot,
629 grid=grid,
630 ax=ax,
631 figsize=figsize,
632 layout=layout,
633 sharex=sharex,
634 sharey=sharey,
635 **kwargs,
636 )
637
638
639class PlotAccessor(PandasObject):
640 """
641 Make plots of Series or DataFrame.
642
643 Uses the backend specified by the
644 option ``plotting.backend``. By default, matplotlib is used.
645
646 Parameters
647 ----------
648 data : Series or DataFrame
649 The object for which the method is called.
650 x : label or position, default None
651 Only used if data is a DataFrame.
652 y : label, position or list of label, positions, default None
653 Allows plotting of one column versus another. Only used if data is a
654 DataFrame.
655 kind : str
656 The kind of plot to produce:
657
658 - 'line' : line plot (default)
659 - 'bar' : vertical bar plot
660 - 'barh' : horizontal bar plot
661 - 'hist' : histogram
662 - 'box' : boxplot
663 - 'kde' : Kernel Density Estimation plot
664 - 'density' : same as 'kde'
665 - 'area' : area plot
666 - 'pie' : pie plot
667 - 'scatter' : scatter plot (DataFrame only)
668 - 'hexbin' : hexbin plot (DataFrame only)
669 ax : matplotlib axes object, default None
670 An axes of the current figure.
671 subplots : bool or sequence of iterables, default False
672 Whether to group columns into subplots:
673
674 - ``False`` : No subplots will be used
675 - ``True`` : Make separate subplots for each column.
676 - sequence of iterables of column labels: Create a subplot for each
677 group of columns. For example `[('a', 'c'), ('b', 'd')]` will
678 create 2 subplots: one with columns 'a' and 'c', and one
679 with columns 'b' and 'd'. Remaining columns that aren't specified
680 will be plotted in additional subplots (one per column).
681
682 .. versionadded:: 1.5.0
683
684 sharex : bool, default True if ax is None else False
685 In case ``subplots=True``, share x axis and set some x axis labels
686 to invisible; defaults to True if ax is None otherwise False if
687 an ax is passed in; Be aware, that passing in both an ax and
688 ``sharex=True`` will alter all x axis labels for all axis in a figure.
689 sharey : bool, default False
690 In case ``subplots=True``, share y axis and set some y axis labels to invisible.
691 layout : tuple, optional
692 (rows, columns) for the layout of subplots.
693 figsize : a tuple (width, height) in inches
694 Size of a figure object.
695 use_index : bool, default True
696 Use index as ticks for x axis.
697 title : str or list
698 Title to use for the plot. If a string is passed, print the string
699 at the top of the figure. If a list is passed and `subplots` is
700 True, print each item in the list above the corresponding subplot.
701 grid : bool, default None (matlab style default)
702 Axis grid lines.
703 legend : bool or {'reverse'}
704 Place legend on axis subplots.
705 style : list or dict
706 The matplotlib line style per column.
707 logx : bool or 'sym', default False
708 Use log scaling or symlog scaling on x axis.
709
710 logy : bool or 'sym' default False
711 Use log scaling or symlog scaling on y axis.
712
713 loglog : bool or 'sym', default False
714 Use log scaling or symlog scaling on both x and y axes.
715
716 xticks : sequence
717 Values to use for the xticks.
718 yticks : sequence
719 Values to use for the yticks.
720 xlim : 2-tuple/list
721 Set the x limits of the current axes.
722 ylim : 2-tuple/list
723 Set the y limits of the current axes.
724 xlabel : label, optional
725 Name to use for the xlabel on x-axis. Default uses index name as xlabel, or the
726 x-column name for planar plots.
727
728 .. versionchanged:: 2.0.0
729
730 Now applicable to histograms.
731
732 ylabel : label, optional
733 Name to use for the ylabel on y-axis. Default will show no ylabel, or the
734 y-column name for planar plots.
735
736 .. versionchanged:: 2.0.0
737
738 Now applicable to histograms.
739
740 rot : float, default None
741 Rotation for ticks (xticks for vertical, yticks for horizontal
742 plots).
743 fontsize : float, default None
744 Font size for xticks and yticks.
745 colormap : str or matplotlib colormap object, default None
746 Colormap to select colors from. If string, load colormap with that
747 name from matplotlib.
748 colorbar : bool, optional
749 If True, plot colorbar (only relevant for 'scatter' and 'hexbin'
750 plots).
751 position : float
752 Specify relative alignments for bar plot layout.
753 From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5
754 (center).
755 table : bool, Series or DataFrame, default False
756 If True, draw a table using the data in the DataFrame and the data
757 will be transposed to meet matplotlib's default layout.
758 If a Series or DataFrame is passed, use passed data to draw a
759 table.
760 yerr : DataFrame, Series, array-like, dict and str
761 See :ref:`Plotting with Error Bars <visualization.errorbars>` for
762 detail.
763 xerr : DataFrame, Series, array-like, dict and str
764 Equivalent to yerr.
765 stacked : bool, default False in line and bar plots, and True in area plot
766 If True, create stacked plot.
767 secondary_y : bool or sequence, default False
768 Whether to plot on the secondary y-axis if a list/tuple, which
769 columns to plot on secondary y-axis.
770 mark_right : bool, default True
771 When using a secondary_y axis, automatically mark the column
772 labels with "(right)" in the legend.
773 include_bool : bool, default is False
774 If True, boolean values can be plotted.
775 backend : str, default None
776 Backend to use instead of the backend specified in the option
777 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to
778 specify the ``plotting.backend`` for the whole session, set
779 ``pd.options.plotting.backend``.
780 **kwargs
781 Options to pass to matplotlib plotting method.
782
783 Returns
784 -------
785 :class:`matplotlib.axes.Axes` or numpy.ndarray of them
786 If the backend is not the default matplotlib one, the return value
787 will be the object returned by the backend.
788
789 Notes
790 -----
791 - See matplotlib documentation online for more on this subject
792 - If `kind` = 'bar' or 'barh', you can specify relative alignments
793 for bar plot layout by `position` keyword.
794 From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5
795 (center)
796
797 Examples
798 --------
799 For Series:
800
801 .. plot::
802 :context: close-figs
803
804 >>> ser = pd.Series([1, 2, 3, 3])
805 >>> plot = ser.plot(kind='hist', title="My plot")
806
807 For DataFrame:
808
809 .. plot::
810 :context: close-figs
811
812 >>> df = pd.DataFrame({'length': [1.5, 0.5, 1.2, 0.9, 3],
813 ... 'width': [0.7, 0.2, 0.15, 0.2, 1.1]},
814 ... index=['pig', 'rabbit', 'duck', 'chicken', 'horse'])
815 >>> plot = df.plot(title="DataFrame Plot")
816
817 For SeriesGroupBy:
818
819 .. plot::
820 :context: close-figs
821
822 >>> lst = [-1, -2, -3, 1, 2, 3]
823 >>> ser = pd.Series([1, 2, 2, 4, 6, 6], index=lst)
824 >>> plot = ser.groupby(lambda x: x > 0).plot(title="SeriesGroupBy Plot")
825
826 For DataFrameGroupBy:
827
828 .. plot::
829 :context: close-figs
830
831 >>> df = pd.DataFrame({"col1" : [1, 2, 3, 4],
832 ... "col2" : ["A", "B", "A", "B"]})
833 >>> plot = df.groupby("col2").plot(kind="bar", title="DataFrameGroupBy Plot")
834 """
835
836 _common_kinds = ("line", "bar", "barh", "kde", "density", "area", "hist", "box")
837 _series_kinds = ("pie",)
838 _dataframe_kinds = ("scatter", "hexbin")
839 _kind_aliases = {"density": "kde"}
840 _all_kinds = _common_kinds + _series_kinds + _dataframe_kinds
841
842 def __init__(self, data: Series | DataFrame) -> None:
843 self._parent = data
844
845 @staticmethod
846 def _get_call_args(backend_name: str, data: Series | DataFrame, args, kwargs):
847 """
848 This function makes calls to this accessor `__call__` method compatible
849 with the previous `SeriesPlotMethods.__call__` and
850 `DataFramePlotMethods.__call__`. Those had slightly different
851 signatures, since `DataFramePlotMethods` accepted `x` and `y`
852 parameters.
853 """
854 if isinstance(data, ABCSeries):
855 arg_def = [
856 ("kind", "line"),
857 ("ax", None),
858 ("figsize", None),
859 ("use_index", True),
860 ("title", None),
861 ("grid", None),
862 ("legend", False),
863 ("style", None),
864 ("logx", False),
865 ("logy", False),
866 ("loglog", False),
867 ("xticks", None),
868 ("yticks", None),
869 ("xlim", None),
870 ("ylim", None),
871 ("rot", None),
872 ("fontsize", None),
873 ("colormap", None),
874 ("table", False),
875 ("yerr", None),
876 ("xerr", None),
877 ("label", None),
878 ("secondary_y", False),
879 ("xlabel", None),
880 ("ylabel", None),
881 ]
882 elif isinstance(data, ABCDataFrame):
883 arg_def = [
884 ("x", None),
885 ("y", None),
886 ("kind", "line"),
887 ("ax", None),
888 ("subplots", False),
889 ("sharex", None),
890 ("sharey", False),
891 ("layout", None),
892 ("figsize", None),
893 ("use_index", True),
894 ("title", None),
895 ("grid", None),
896 ("legend", True),
897 ("style", None),
898 ("logx", False),
899 ("logy", False),
900 ("loglog", False),
901 ("xticks", None),
902 ("yticks", None),
903 ("xlim", None),
904 ("ylim", None),
905 ("rot", None),
906 ("fontsize", None),
907 ("colormap", None),
908 ("table", False),
909 ("yerr", None),
910 ("xerr", None),
911 ("secondary_y", False),
912 ("xlabel", None),
913 ("ylabel", None),
914 ]
915 else:
916 raise TypeError(
917 f"Called plot accessor for type {type(data).__name__}, "
918 "expected Series or DataFrame"
919 )
920
921 if args and isinstance(data, ABCSeries):
922 positional_args = str(args)[1:-1]
923 keyword_args = ", ".join(
924 [f"{name}={repr(value)}" for (name, _), value in zip(arg_def, args)]
925 )
926 msg = (
927 "`Series.plot()` should not be called with positional "
928 "arguments, only keyword arguments. The order of "
929 "positional arguments will change in the future. "
930 f"Use `Series.plot({keyword_args})` instead of "
931 f"`Series.plot({positional_args})`."
932 )
933 raise TypeError(msg)
934
935 pos_args = {name: value for (name, _), value in zip(arg_def, args)}
936 if backend_name == "pandas.plotting._matplotlib":
937 kwargs = dict(arg_def, **pos_args, **kwargs)
938 else:
939 kwargs = dict(pos_args, **kwargs)
940
941 x = kwargs.pop("x", None)
942 y = kwargs.pop("y", None)
943 kind = kwargs.pop("kind", "line")
944 return x, y, kind, kwargs
945
946 def __call__(self, *args, **kwargs):
947 plot_backend = _get_plot_backend(kwargs.pop("backend", None))
948
949 x, y, kind, kwargs = self._get_call_args(
950 plot_backend.__name__, self._parent, args, kwargs
951 )
952
953 kind = self._kind_aliases.get(kind, kind)
954
955 # when using another backend, get out of the way
956 if plot_backend.__name__ != "pandas.plotting._matplotlib":
957 return plot_backend.plot(self._parent, x=x, y=y, kind=kind, **kwargs)
958
959 if kind not in self._all_kinds:
960 raise ValueError(
961 f"{kind} is not a valid plot kind "
962 f"Valid plot kinds: {self._all_kinds}"
963 )
964
965 # The original data structured can be transformed before passed to the
966 # backend. For example, for DataFrame is common to set the index as the
967 # `x` parameter, and return a Series with the parameter `y` as values.
968 data = self._parent.copy()
969
970 if isinstance(data, ABCSeries):
971 kwargs["reuse_plot"] = True
972
973 if kind in self._dataframe_kinds:
974 if isinstance(data, ABCDataFrame):
975 return plot_backend.plot(data, x=x, y=y, kind=kind, **kwargs)
976 else:
977 raise ValueError(f"plot kind {kind} can only be used for data frames")
978 elif kind in self._series_kinds:
979 if isinstance(data, ABCDataFrame):
980 if y is None and kwargs.get("subplots") is False:
981 raise ValueError(
982 f"{kind} requires either y column or 'subplots=True'"
983 )
984 if y is not None:
985 if is_integer(y) and not data.columns._holds_integer():
986 y = data.columns[y]
987 # converted to series actually. copy to not modify
988 data = data[y].copy()
989 data.index.name = y
990 elif isinstance(data, ABCDataFrame):
991 data_cols = data.columns
992 if x is not None:
993 if is_integer(x) and not data.columns._holds_integer():
994 x = data_cols[x]
995 elif not isinstance(data[x], ABCSeries):
996 raise ValueError("x must be a label or position")
997 data = data.set_index(x)
998 if y is not None:
999 # check if we have y as int or list of ints
1000 int_ylist = is_list_like(y) and all(is_integer(c) for c in y)
1001 int_y_arg = is_integer(y) or int_ylist
1002 if int_y_arg and not data.columns._holds_integer():
1003 y = data_cols[y]
1004
1005 label_kw = kwargs["label"] if "label" in kwargs else False
1006 for kw in ["xerr", "yerr"]:
1007 if kw in kwargs and (
1008 isinstance(kwargs[kw], str) or is_integer(kwargs[kw])
1009 ):
1010 try:
1011 kwargs[kw] = data[kwargs[kw]]
1012 except (IndexError, KeyError, TypeError):
1013 pass
1014
1015 # don't overwrite
1016 data = data[y].copy()
1017
1018 if isinstance(data, ABCSeries):
1019 label_name = label_kw or y
1020 data.name = label_name
1021 else:
1022 match = is_list_like(label_kw) and len(label_kw) == len(y)
1023 if label_kw and not match:
1024 raise ValueError(
1025 "label should be list-like and same length as y"
1026 )
1027 label_name = label_kw or data.columns
1028 data.columns = label_name
1029
1030 return plot_backend.plot(data, kind=kind, **kwargs)
1031
1032 __call__.__doc__ = __doc__
1033
1034 @Appender(
1035 """
1036 See Also
1037 --------
1038 matplotlib.pyplot.plot : Plot y versus x as lines and/or markers.
1039
1040 Examples
1041 --------
1042
1043 .. plot::
1044 :context: close-figs
1045
1046 >>> s = pd.Series([1, 3, 2])
1047 >>> s.plot.line() # doctest: +SKIP
1048
1049 .. plot::
1050 :context: close-figs
1051
1052 The following example shows the populations for some animals
1053 over the years.
1054
1055 >>> df = pd.DataFrame({
1056 ... 'pig': [20, 18, 489, 675, 1776],
1057 ... 'horse': [4, 25, 281, 600, 1900]
1058 ... }, index=[1990, 1997, 2003, 2009, 2014])
1059 >>> lines = df.plot.line()
1060
1061 .. plot::
1062 :context: close-figs
1063
1064 An example with subplots, so an array of axes is returned.
1065
1066 >>> axes = df.plot.line(subplots=True)
1067 >>> type(axes)
1068 <class 'numpy.ndarray'>
1069
1070 .. plot::
1071 :context: close-figs
1072
1073 Let's repeat the same example, but specifying colors for
1074 each column (in this case, for each animal).
1075
1076 >>> axes = df.plot.line(
1077 ... subplots=True, color={"pig": "pink", "horse": "#742802"}
1078 ... )
1079
1080 .. plot::
1081 :context: close-figs
1082
1083 The following example shows the relationship between both
1084 populations.
1085
1086 >>> lines = df.plot.line(x='pig', y='horse')
1087 """
1088 )
1089 @Substitution(kind="line")
1090 @Appender(_bar_or_line_doc)
1091 def line(
1092 self, x: Hashable | None = None, y: Hashable | None = None, **kwargs
1093 ) -> PlotAccessor:
1094 """
1095 Plot Series or DataFrame as lines.
1096
1097 This function is useful to plot lines using DataFrame's values
1098 as coordinates.
1099 """
1100 return self(kind="line", x=x, y=y, **kwargs)
1101
1102 @Appender(
1103 """
1104 See Also
1105 --------
1106 DataFrame.plot.barh : Horizontal bar plot.
1107 DataFrame.plot : Make plots of a DataFrame.
1108 matplotlib.pyplot.bar : Make a bar plot with matplotlib.
1109
1110 Examples
1111 --------
1112 Basic plot.
1113
1114 .. plot::
1115 :context: close-figs
1116
1117 >>> df = pd.DataFrame({'lab':['A', 'B', 'C'], 'val':[10, 30, 20]})
1118 >>> ax = df.plot.bar(x='lab', y='val', rot=0)
1119
1120 Plot a whole dataframe to a bar plot. Each column is assigned a
1121 distinct color, and each row is nested in a group along the
1122 horizontal axis.
1123
1124 .. plot::
1125 :context: close-figs
1126
1127 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]
1128 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]
1129 >>> index = ['snail', 'pig', 'elephant',
1130 ... 'rabbit', 'giraffe', 'coyote', 'horse']
1131 >>> df = pd.DataFrame({'speed': speed,
1132 ... 'lifespan': lifespan}, index=index)
1133 >>> ax = df.plot.bar(rot=0)
1134
1135 Plot stacked bar charts for the DataFrame
1136
1137 .. plot::
1138 :context: close-figs
1139
1140 >>> ax = df.plot.bar(stacked=True)
1141
1142 Instead of nesting, the figure can be split by column with
1143 ``subplots=True``. In this case, a :class:`numpy.ndarray` of
1144 :class:`matplotlib.axes.Axes` are returned.
1145
1146 .. plot::
1147 :context: close-figs
1148
1149 >>> axes = df.plot.bar(rot=0, subplots=True)
1150 >>> axes[1].legend(loc=2) # doctest: +SKIP
1151
1152 If you don't like the default colours, you can specify how you'd
1153 like each column to be colored.
1154
1155 .. plot::
1156 :context: close-figs
1157
1158 >>> axes = df.plot.bar(
1159 ... rot=0, subplots=True, color={"speed": "red", "lifespan": "green"}
1160 ... )
1161 >>> axes[1].legend(loc=2) # doctest: +SKIP
1162
1163 Plot a single column.
1164
1165 .. plot::
1166 :context: close-figs
1167
1168 >>> ax = df.plot.bar(y='speed', rot=0)
1169
1170 Plot only selected categories for the DataFrame.
1171
1172 .. plot::
1173 :context: close-figs
1174
1175 >>> ax = df.plot.bar(x='lifespan', rot=0)
1176 """
1177 )
1178 @Substitution(kind="bar")
1179 @Appender(_bar_or_line_doc)
1180 def bar( # pylint: disable=disallowed-name
1181 self, x: Hashable | None = None, y: Hashable | None = None, **kwargs
1182 ) -> PlotAccessor:
1183 """
1184 Vertical bar plot.
1185
1186 A bar plot is a plot that presents categorical data with
1187 rectangular bars with lengths proportional to the values that they
1188 represent. A bar plot shows comparisons among discrete categories. One
1189 axis of the plot shows the specific categories being compared, and the
1190 other axis represents a measured value.
1191 """
1192 return self(kind="bar", x=x, y=y, **kwargs)
1193
1194 @Appender(
1195 """
1196 See Also
1197 --------
1198 DataFrame.plot.bar: Vertical bar plot.
1199 DataFrame.plot : Make plots of DataFrame using matplotlib.
1200 matplotlib.axes.Axes.bar : Plot a vertical bar plot using matplotlib.
1201
1202 Examples
1203 --------
1204 Basic example
1205
1206 .. plot::
1207 :context: close-figs
1208
1209 >>> df = pd.DataFrame({'lab': ['A', 'B', 'C'], 'val': [10, 30, 20]})
1210 >>> ax = df.plot.barh(x='lab', y='val')
1211
1212 Plot a whole DataFrame to a horizontal bar plot
1213
1214 .. plot::
1215 :context: close-figs
1216
1217 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]
1218 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]
1219 >>> index = ['snail', 'pig', 'elephant',
1220 ... 'rabbit', 'giraffe', 'coyote', 'horse']
1221 >>> df = pd.DataFrame({'speed': speed,
1222 ... 'lifespan': lifespan}, index=index)
1223 >>> ax = df.plot.barh()
1224
1225 Plot stacked barh charts for the DataFrame
1226
1227 .. plot::
1228 :context: close-figs
1229
1230 >>> ax = df.plot.barh(stacked=True)
1231
1232 We can specify colors for each column
1233
1234 .. plot::
1235 :context: close-figs
1236
1237 >>> ax = df.plot.barh(color={"speed": "red", "lifespan": "green"})
1238
1239 Plot a column of the DataFrame to a horizontal bar plot
1240
1241 .. plot::
1242 :context: close-figs
1243
1244 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]
1245 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]
1246 >>> index = ['snail', 'pig', 'elephant',
1247 ... 'rabbit', 'giraffe', 'coyote', 'horse']
1248 >>> df = pd.DataFrame({'speed': speed,
1249 ... 'lifespan': lifespan}, index=index)
1250 >>> ax = df.plot.barh(y='speed')
1251
1252 Plot DataFrame versus the desired column
1253
1254 .. plot::
1255 :context: close-figs
1256
1257 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]
1258 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]
1259 >>> index = ['snail', 'pig', 'elephant',
1260 ... 'rabbit', 'giraffe', 'coyote', 'horse']
1261 >>> df = pd.DataFrame({'speed': speed,
1262 ... 'lifespan': lifespan}, index=index)
1263 >>> ax = df.plot.barh(x='lifespan')
1264 """
1265 )
1266 @Substitution(kind="bar")
1267 @Appender(_bar_or_line_doc)
1268 def barh(
1269 self, x: Hashable | None = None, y: Hashable | None = None, **kwargs
1270 ) -> PlotAccessor:
1271 """
1272 Make a horizontal bar plot.
1273
1274 A horizontal bar plot is a plot that presents quantitative data with
1275 rectangular bars with lengths proportional to the values that they
1276 represent. A bar plot shows comparisons among discrete categories. One
1277 axis of the plot shows the specific categories being compared, and the
1278 other axis represents a measured value.
1279 """
1280 return self(kind="barh", x=x, y=y, **kwargs)
1281
1282 def box(self, by: IndexLabel | None = None, **kwargs) -> PlotAccessor:
1283 r"""
1284 Make a box plot of the DataFrame columns.
1285
1286 A box plot is a method for graphically depicting groups of numerical
1287 data through their quartiles.
1288 The box extends from the Q1 to Q3 quartile values of the data,
1289 with a line at the median (Q2). The whiskers extend from the edges
1290 of box to show the range of the data. The position of the whiskers
1291 is set by default to 1.5*IQR (IQR = Q3 - Q1) from the edges of the
1292 box. Outlier points are those past the end of the whiskers.
1293
1294 For further details see Wikipedia's
1295 entry for `boxplot <https://en.wikipedia.org/wiki/Box_plot>`__.
1296
1297 A consideration when using this chart is that the box and the whiskers
1298 can overlap, which is very common when plotting small sets of data.
1299
1300 Parameters
1301 ----------
1302 by : str or sequence
1303 Column in the DataFrame to group by.
1304
1305 .. versionchanged:: 1.4.0
1306
1307 Previously, `by` is silently ignore and makes no groupings
1308
1309 **kwargs
1310 Additional keywords are documented in
1311 :meth:`DataFrame.plot`.
1312
1313 Returns
1314 -------
1315 :class:`matplotlib.axes.Axes` or numpy.ndarray of them
1316
1317 See Also
1318 --------
1319 DataFrame.boxplot: Another method to draw a box plot.
1320 Series.plot.box: Draw a box plot from a Series object.
1321 matplotlib.pyplot.boxplot: Draw a box plot in matplotlib.
1322
1323 Examples
1324 --------
1325 Draw a box plot from a DataFrame with four columns of randomly
1326 generated data.
1327
1328 .. plot::
1329 :context: close-figs
1330
1331 >>> data = np.random.randn(25, 4)
1332 >>> df = pd.DataFrame(data, columns=list('ABCD'))
1333 >>> ax = df.plot.box()
1334
1335 You can also generate groupings if you specify the `by` parameter (which
1336 can take a column name, or a list or tuple of column names):
1337
1338 .. versionchanged:: 1.4.0
1339
1340 .. plot::
1341 :context: close-figs
1342
1343 >>> age_list = [8, 10, 12, 14, 72, 74, 76, 78, 20, 25, 30, 35, 60, 85]
1344 >>> df = pd.DataFrame({"gender": list("MMMMMMMMFFFFFF"), "age": age_list})
1345 >>> ax = df.plot.box(column="age", by="gender", figsize=(10, 8))
1346 """
1347 return self(kind="box", by=by, **kwargs)
1348
1349 def hist(
1350 self, by: IndexLabel | None = None, bins: int = 10, **kwargs
1351 ) -> PlotAccessor:
1352 """
1353 Draw one histogram of the DataFrame's columns.
1354
1355 A histogram is a representation of the distribution of data.
1356 This function groups the values of all given Series in the DataFrame
1357 into bins and draws all bins in one :class:`matplotlib.axes.Axes`.
1358 This is useful when the DataFrame's Series are in a similar scale.
1359
1360 Parameters
1361 ----------
1362 by : str or sequence, optional
1363 Column in the DataFrame to group by.
1364
1365 .. versionchanged:: 1.4.0
1366
1367 Previously, `by` is silently ignore and makes no groupings
1368
1369 bins : int, default 10
1370 Number of histogram bins to be used.
1371 **kwargs
1372 Additional keyword arguments are documented in
1373 :meth:`DataFrame.plot`.
1374
1375 Returns
1376 -------
1377 class:`matplotlib.AxesSubplot`
1378 Return a histogram plot.
1379
1380 See Also
1381 --------
1382 DataFrame.hist : Draw histograms per DataFrame's Series.
1383 Series.hist : Draw a histogram with Series' data.
1384
1385 Examples
1386 --------
1387 When we roll a die 6000 times, we expect to get each value around 1000
1388 times. But when we roll two dice and sum the result, the distribution
1389 is going to be quite different. A histogram illustrates those
1390 distributions.
1391
1392 .. plot::
1393 :context: close-figs
1394
1395 >>> df = pd.DataFrame(np.random.randint(1, 7, 6000), columns=['one'])
1396 >>> df['two'] = df['one'] + np.random.randint(1, 7, 6000)
1397 >>> ax = df.plot.hist(bins=12, alpha=0.5)
1398
1399 A grouped histogram can be generated by providing the parameter `by` (which
1400 can be a column name, or a list of column names):
1401
1402 .. plot::
1403 :context: close-figs
1404
1405 >>> age_list = [8, 10, 12, 14, 72, 74, 76, 78, 20, 25, 30, 35, 60, 85]
1406 >>> df = pd.DataFrame({"gender": list("MMMMMMMMFFFFFF"), "age": age_list})
1407 >>> ax = df.plot.hist(column=["age"], by="gender", figsize=(10, 8))
1408 """
1409 return self(kind="hist", by=by, bins=bins, **kwargs)
1410
1411 def kde(
1412 self,
1413 bw_method: Literal["scott", "silverman"] | float | Callable | None = None,
1414 ind: np.ndarray | int | None = None,
1415 **kwargs,
1416 ) -> PlotAccessor:
1417 """
1418 Generate Kernel Density Estimate plot using Gaussian kernels.
1419
1420 In statistics, `kernel density estimation`_ (KDE) is a non-parametric
1421 way to estimate the probability density function (PDF) of a random
1422 variable. This function uses Gaussian kernels and includes automatic
1423 bandwidth determination.
1424
1425 .. _kernel density estimation:
1426 https://en.wikipedia.org/wiki/Kernel_density_estimation
1427
1428 Parameters
1429 ----------
1430 bw_method : str, scalar or callable, optional
1431 The method used to calculate the estimator bandwidth. This can be
1432 'scott', 'silverman', a scalar constant or a callable.
1433 If None (default), 'scott' is used.
1434 See :class:`scipy.stats.gaussian_kde` for more information.
1435 ind : NumPy array or int, optional
1436 Evaluation points for the estimated PDF. If None (default),
1437 1000 equally spaced points are used. If `ind` is a NumPy array, the
1438 KDE is evaluated at the points passed. If `ind` is an integer,
1439 `ind` number of equally spaced points are used.
1440 **kwargs
1441 Additional keyword arguments are documented in
1442 :meth:`DataFrame.plot`.
1443
1444 Returns
1445 -------
1446 matplotlib.axes.Axes or numpy.ndarray of them
1447
1448 See Also
1449 --------
1450 scipy.stats.gaussian_kde : Representation of a kernel-density
1451 estimate using Gaussian kernels. This is the function used
1452 internally to estimate the PDF.
1453
1454 Examples
1455 --------
1456 Given a Series of points randomly sampled from an unknown
1457 distribution, estimate its PDF using KDE with automatic
1458 bandwidth determination and plot the results, evaluating them at
1459 1000 equally spaced points (default):
1460
1461 .. plot::
1462 :context: close-figs
1463
1464 >>> s = pd.Series([1, 2, 2.5, 3, 3.5, 4, 5])
1465 >>> ax = s.plot.kde()
1466
1467 A scalar bandwidth can be specified. Using a small bandwidth value can
1468 lead to over-fitting, while using a large bandwidth value may result
1469 in under-fitting:
1470
1471 .. plot::
1472 :context: close-figs
1473
1474 >>> ax = s.plot.kde(bw_method=0.3)
1475
1476 .. plot::
1477 :context: close-figs
1478
1479 >>> ax = s.plot.kde(bw_method=3)
1480
1481 Finally, the `ind` parameter determines the evaluation points for the
1482 plot of the estimated PDF:
1483
1484 .. plot::
1485 :context: close-figs
1486
1487 >>> ax = s.plot.kde(ind=[1, 2, 3, 4, 5])
1488
1489 For DataFrame, it works in the same way:
1490
1491 .. plot::
1492 :context: close-figs
1493
1494 >>> df = pd.DataFrame({
1495 ... 'x': [1, 2, 2.5, 3, 3.5, 4, 5],
1496 ... 'y': [4, 4, 4.5, 5, 5.5, 6, 6],
1497 ... })
1498 >>> ax = df.plot.kde()
1499
1500 A scalar bandwidth can be specified. Using a small bandwidth value can
1501 lead to over-fitting, while using a large bandwidth value may result
1502 in under-fitting:
1503
1504 .. plot::
1505 :context: close-figs
1506
1507 >>> ax = df.plot.kde(bw_method=0.3)
1508
1509 .. plot::
1510 :context: close-figs
1511
1512 >>> ax = df.plot.kde(bw_method=3)
1513
1514 Finally, the `ind` parameter determines the evaluation points for the
1515 plot of the estimated PDF:
1516
1517 .. plot::
1518 :context: close-figs
1519
1520 >>> ax = df.plot.kde(ind=[1, 2, 3, 4, 5, 6])
1521 """
1522 return self(kind="kde", bw_method=bw_method, ind=ind, **kwargs)
1523
1524 density = kde
1525
1526 def area(
1527 self,
1528 x: Hashable | None = None,
1529 y: Hashable | None = None,
1530 stacked: bool = True,
1531 **kwargs,
1532 ) -> PlotAccessor:
1533 """
1534 Draw a stacked area plot.
1535
1536 An area plot displays quantitative data visually.
1537 This function wraps the matplotlib area function.
1538
1539 Parameters
1540 ----------
1541 x : label or position, optional
1542 Coordinates for the X axis. By default uses the index.
1543 y : label or position, optional
1544 Column to plot. By default uses all columns.
1545 stacked : bool, default True
1546 Area plots are stacked by default. Set to False to create a
1547 unstacked plot.
1548 **kwargs
1549 Additional keyword arguments are documented in
1550 :meth:`DataFrame.plot`.
1551
1552 Returns
1553 -------
1554 matplotlib.axes.Axes or numpy.ndarray
1555 Area plot, or array of area plots if subplots is True.
1556
1557 See Also
1558 --------
1559 DataFrame.plot : Make plots of DataFrame using matplotlib / pylab.
1560
1561 Examples
1562 --------
1563 Draw an area plot based on basic business metrics:
1564
1565 .. plot::
1566 :context: close-figs
1567
1568 >>> df = pd.DataFrame({
1569 ... 'sales': [3, 2, 3, 9, 10, 6],
1570 ... 'signups': [5, 5, 6, 12, 14, 13],
1571 ... 'visits': [20, 42, 28, 62, 81, 50],
1572 ... }, index=pd.date_range(start='2018/01/01', end='2018/07/01',
1573 ... freq='ME'))
1574 >>> ax = df.plot.area()
1575
1576 Area plots are stacked by default. To produce an unstacked plot,
1577 pass ``stacked=False``:
1578
1579 .. plot::
1580 :context: close-figs
1581
1582 >>> ax = df.plot.area(stacked=False)
1583
1584 Draw an area plot for a single column:
1585
1586 .. plot::
1587 :context: close-figs
1588
1589 >>> ax = df.plot.area(y='sales')
1590
1591 Draw with a different `x`:
1592
1593 .. plot::
1594 :context: close-figs
1595
1596 >>> df = pd.DataFrame({
1597 ... 'sales': [3, 2, 3],
1598 ... 'visits': [20, 42, 28],
1599 ... 'day': [1, 2, 3],
1600 ... })
1601 >>> ax = df.plot.area(x='day')
1602 """
1603 return self(kind="area", x=x, y=y, stacked=stacked, **kwargs)
1604
1605 def pie(self, **kwargs) -> PlotAccessor:
1606 """
1607 Generate a pie plot.
1608
1609 A pie plot is a proportional representation of the numerical data in a
1610 column. This function wraps :meth:`matplotlib.pyplot.pie` for the
1611 specified column. If no column reference is passed and
1612 ``subplots=True`` a pie plot is drawn for each numerical column
1613 independently.
1614
1615 Parameters
1616 ----------
1617 y : int or label, optional
1618 Label or position of the column to plot.
1619 If not provided, ``subplots=True`` argument must be passed.
1620 **kwargs
1621 Keyword arguments to pass on to :meth:`DataFrame.plot`.
1622
1623 Returns
1624 -------
1625 matplotlib.axes.Axes or np.ndarray of them
1626 A NumPy array is returned when `subplots` is True.
1627
1628 See Also
1629 --------
1630 Series.plot.pie : Generate a pie plot for a Series.
1631 DataFrame.plot : Make plots of a DataFrame.
1632
1633 Examples
1634 --------
1635 In the example below we have a DataFrame with the information about
1636 planet's mass and radius. We pass the 'mass' column to the
1637 pie function to get a pie plot.
1638
1639 .. plot::
1640 :context: close-figs
1641
1642 >>> df = pd.DataFrame({'mass': [0.330, 4.87 , 5.97],
1643 ... 'radius': [2439.7, 6051.8, 6378.1]},
1644 ... index=['Mercury', 'Venus', 'Earth'])
1645 >>> plot = df.plot.pie(y='mass', figsize=(5, 5))
1646
1647 .. plot::
1648 :context: close-figs
1649
1650 >>> plot = df.plot.pie(subplots=True, figsize=(11, 6))
1651 """
1652 if (
1653 isinstance(self._parent, ABCDataFrame)
1654 and kwargs.get("y", None) is None
1655 and not kwargs.get("subplots", False)
1656 ):
1657 raise ValueError("pie requires either y column or 'subplots=True'")
1658 return self(kind="pie", **kwargs)
1659
1660 def scatter(
1661 self,
1662 x: Hashable,
1663 y: Hashable,
1664 s: Hashable | Sequence[Hashable] | None = None,
1665 c: Hashable | Sequence[Hashable] | None = None,
1666 **kwargs,
1667 ) -> PlotAccessor:
1668 """
1669 Create a scatter plot with varying marker point size and color.
1670
1671 The coordinates of each point are defined by two dataframe columns and
1672 filled circles are used to represent each point. This kind of plot is
1673 useful to see complex correlations between two variables. Points could
1674 be for instance natural 2D coordinates like longitude and latitude in
1675 a map or, in general, any pair of metrics that can be plotted against
1676 each other.
1677
1678 Parameters
1679 ----------
1680 x : int or str
1681 The column name or column position to be used as horizontal
1682 coordinates for each point.
1683 y : int or str
1684 The column name or column position to be used as vertical
1685 coordinates for each point.
1686 s : str, scalar or array-like, optional
1687 The size of each point. Possible values are:
1688
1689 - A string with the name of the column to be used for marker's size.
1690
1691 - A single scalar so all points have the same size.
1692
1693 - A sequence of scalars, which will be used for each point's size
1694 recursively. For instance, when passing [2,14] all points size
1695 will be either 2 or 14, alternatively.
1696
1697 c : str, int or array-like, optional
1698 The color of each point. Possible values are:
1699
1700 - A single color string referred to by name, RGB or RGBA code,
1701 for instance 'red' or '#a98d19'.
1702
1703 - A sequence of color strings referred to by name, RGB or RGBA
1704 code, which will be used for each point's color recursively. For
1705 instance ['green','yellow'] all points will be filled in green or
1706 yellow, alternatively.
1707
1708 - A column name or position whose values will be used to color the
1709 marker points according to a colormap.
1710
1711 **kwargs
1712 Keyword arguments to pass on to :meth:`DataFrame.plot`.
1713
1714 Returns
1715 -------
1716 :class:`matplotlib.axes.Axes` or numpy.ndarray of them
1717
1718 See Also
1719 --------
1720 matplotlib.pyplot.scatter : Scatter plot using multiple input data
1721 formats.
1722
1723 Examples
1724 --------
1725 Let's see how to draw a scatter plot using coordinates from the values
1726 in a DataFrame's columns.
1727
1728 .. plot::
1729 :context: close-figs
1730
1731 >>> df = pd.DataFrame([[5.1, 3.5, 0], [4.9, 3.0, 0], [7.0, 3.2, 1],
1732 ... [6.4, 3.2, 1], [5.9, 3.0, 2]],
1733 ... columns=['length', 'width', 'species'])
1734 >>> ax1 = df.plot.scatter(x='length',
1735 ... y='width',
1736 ... c='DarkBlue')
1737
1738 And now with the color determined by a column as well.
1739
1740 .. plot::
1741 :context: close-figs
1742
1743 >>> ax2 = df.plot.scatter(x='length',
1744 ... y='width',
1745 ... c='species',
1746 ... colormap='viridis')
1747 """
1748 return self(kind="scatter", x=x, y=y, s=s, c=c, **kwargs)
1749
1750 def hexbin(
1751 self,
1752 x: Hashable,
1753 y: Hashable,
1754 C: Hashable | None = None,
1755 reduce_C_function: Callable | None = None,
1756 gridsize: int | tuple[int, int] | None = None,
1757 **kwargs,
1758 ) -> PlotAccessor:
1759 """
1760 Generate a hexagonal binning plot.
1761
1762 Generate a hexagonal binning plot of `x` versus `y`. If `C` is `None`
1763 (the default), this is a histogram of the number of occurrences
1764 of the observations at ``(x[i], y[i])``.
1765
1766 If `C` is specified, specifies values at given coordinates
1767 ``(x[i], y[i])``. These values are accumulated for each hexagonal
1768 bin and then reduced according to `reduce_C_function`,
1769 having as default the NumPy's mean function (:meth:`numpy.mean`).
1770 (If `C` is specified, it must also be a 1-D sequence
1771 of the same length as `x` and `y`, or a column label.)
1772
1773 Parameters
1774 ----------
1775 x : int or str
1776 The column label or position for x points.
1777 y : int or str
1778 The column label or position for y points.
1779 C : int or str, optional
1780 The column label or position for the value of `(x, y)` point.
1781 reduce_C_function : callable, default `np.mean`
1782 Function of one argument that reduces all the values in a bin to
1783 a single number (e.g. `np.mean`, `np.max`, `np.sum`, `np.std`).
1784 gridsize : int or tuple of (int, int), default 100
1785 The number of hexagons in the x-direction.
1786 The corresponding number of hexagons in the y-direction is
1787 chosen in a way that the hexagons are approximately regular.
1788 Alternatively, gridsize can be a tuple with two elements
1789 specifying the number of hexagons in the x-direction and the
1790 y-direction.
1791 **kwargs
1792 Additional keyword arguments are documented in
1793 :meth:`DataFrame.plot`.
1794
1795 Returns
1796 -------
1797 matplotlib.AxesSubplot
1798 The matplotlib ``Axes`` on which the hexbin is plotted.
1799
1800 See Also
1801 --------
1802 DataFrame.plot : Make plots of a DataFrame.
1803 matplotlib.pyplot.hexbin : Hexagonal binning plot using matplotlib,
1804 the matplotlib function that is used under the hood.
1805
1806 Examples
1807 --------
1808 The following examples are generated with random data from
1809 a normal distribution.
1810
1811 .. plot::
1812 :context: close-figs
1813
1814 >>> n = 10000
1815 >>> df = pd.DataFrame({'x': np.random.randn(n),
1816 ... 'y': np.random.randn(n)})
1817 >>> ax = df.plot.hexbin(x='x', y='y', gridsize=20)
1818
1819 The next example uses `C` and `np.sum` as `reduce_C_function`.
1820 Note that `'observations'` values ranges from 1 to 5 but the result
1821 plot shows values up to more than 25. This is because of the
1822 `reduce_C_function`.
1823
1824 .. plot::
1825 :context: close-figs
1826
1827 >>> n = 500
1828 >>> df = pd.DataFrame({
1829 ... 'coord_x': np.random.uniform(-3, 3, size=n),
1830 ... 'coord_y': np.random.uniform(30, 50, size=n),
1831 ... 'observations': np.random.randint(1,5, size=n)
1832 ... })
1833 >>> ax = df.plot.hexbin(x='coord_x',
1834 ... y='coord_y',
1835 ... C='observations',
1836 ... reduce_C_function=np.sum,
1837 ... gridsize=10,
1838 ... cmap="viridis")
1839 """
1840 if reduce_C_function is not None:
1841 kwargs["reduce_C_function"] = reduce_C_function
1842 if gridsize is not None:
1843 kwargs["gridsize"] = gridsize
1844
1845 return self(kind="hexbin", x=x, y=y, C=C, **kwargs)
1846
1847
1848_backends: dict[str, types.ModuleType] = {}
1849
1850
1851def _load_backend(backend: str) -> types.ModuleType:
1852 """
1853 Load a pandas plotting backend.
1854
1855 Parameters
1856 ----------
1857 backend : str
1858 The identifier for the backend. Either an entrypoint item registered
1859 with importlib.metadata, "matplotlib", or a module name.
1860
1861 Returns
1862 -------
1863 types.ModuleType
1864 The imported backend.
1865 """
1866 from importlib.metadata import entry_points
1867
1868 if backend == "matplotlib":
1869 # Because matplotlib is an optional dependency and first-party backend,
1870 # we need to attempt an import here to raise an ImportError if needed.
1871 try:
1872 module = importlib.import_module("pandas.plotting._matplotlib")
1873 except ImportError:
1874 raise ImportError(
1875 "matplotlib is required for plotting when the "
1876 'default backend "matplotlib" is selected.'
1877 ) from None
1878 return module
1879
1880 found_backend = False
1881
1882 eps = entry_points()
1883 key = "pandas_plotting_backends"
1884 # entry_points lost dict API ~ PY 3.10
1885 # https://github.com/python/importlib_metadata/issues/298
1886 if hasattr(eps, "select"):
1887 entry = eps.select(group=key)
1888 else:
1889 # Argument 2 to "get" of "dict" has incompatible type "Tuple[]";
1890 # expected "EntryPoints" [arg-type]
1891 entry = eps.get(key, ()) # type: ignore[arg-type]
1892 for entry_point in entry:
1893 found_backend = entry_point.name == backend
1894 if found_backend:
1895 module = entry_point.load()
1896 break
1897
1898 if not found_backend:
1899 # Fall back to unregistered, module name approach.
1900 try:
1901 module = importlib.import_module(backend)
1902 found_backend = True
1903 except ImportError:
1904 # We re-raise later on.
1905 pass
1906
1907 if found_backend:
1908 if hasattr(module, "plot"):
1909 # Validate that the interface is implemented when the option is set,
1910 # rather than at plot time.
1911 return module
1912
1913 raise ValueError(
1914 f"Could not find plotting backend '{backend}'. Ensure that you've "
1915 f"installed the package providing the '{backend}' entrypoint, or that "
1916 "the package has a top-level `.plot` method."
1917 )
1918
1919
1920def _get_plot_backend(backend: str | None = None):
1921 """
1922 Return the plotting backend to use (e.g. `pandas.plotting._matplotlib`).
1923
1924 The plotting system of pandas uses matplotlib by default, but the idea here
1925 is that it can also work with other third-party backends. This function
1926 returns the module which provides a top-level `.plot` method that will
1927 actually do the plotting. The backend is specified from a string, which
1928 either comes from the keyword argument `backend`, or, if not specified, from
1929 the option `pandas.options.plotting.backend`. All the rest of the code in
1930 this file uses the backend specified there for the plotting.
1931
1932 The backend is imported lazily, as matplotlib is a soft dependency, and
1933 pandas can be used without it being installed.
1934
1935 Notes
1936 -----
1937 Modifies `_backends` with imported backend as a side effect.
1938 """
1939 backend_str: str = backend or get_option("plotting.backend")
1940
1941 if backend_str in _backends:
1942 return _backends[backend_str]
1943
1944 module = _load_backend(backend_str)
1945 _backends[backend_str] = module
1946 return module