Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/plotting/_core.py: 28%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from __future__ import annotations
3import importlib
4from typing import (
5 TYPE_CHECKING,
6 Callable,
7 Literal,
8)
10from pandas._config import get_option
12from pandas.util._decorators import (
13 Appender,
14 Substitution,
15)
17from pandas.core.dtypes.common import (
18 is_integer,
19 is_list_like,
20)
21from pandas.core.dtypes.generic import (
22 ABCDataFrame,
23 ABCSeries,
24)
26from pandas.core.base import PandasObject
28if TYPE_CHECKING:
29 from collections.abc import (
30 Hashable,
31 Sequence,
32 )
33 import types
35 from matplotlib.axes import Axes
36 import numpy as np
38 from pandas._typing import IndexLabel
40 from pandas import (
41 DataFrame,
42 Series,
43 )
44 from pandas.core.groupby.generic import DataFrameGroupBy
47def hist_series(
48 self: Series,
49 by=None,
50 ax=None,
51 grid: bool = True,
52 xlabelsize: int | None = None,
53 xrot: float | None = None,
54 ylabelsize: int | None = None,
55 yrot: float | None = None,
56 figsize: tuple[int, int] | None = None,
57 bins: int | Sequence[int] = 10,
58 backend: str | None = None,
59 legend: bool = False,
60 **kwargs,
61):
62 """
63 Draw histogram of the input series using matplotlib.
65 Parameters
66 ----------
67 by : object, optional
68 If passed, then used to form histograms for separate groups.
69 ax : matplotlib axis object
70 If not passed, uses gca().
71 grid : bool, default True
72 Whether to show axis grid lines.
73 xlabelsize : int, default None
74 If specified changes the x-axis label size.
75 xrot : float, default None
76 Rotation of x axis labels.
77 ylabelsize : int, default None
78 If specified changes the y-axis label size.
79 yrot : float, default None
80 Rotation of y axis labels.
81 figsize : tuple, default None
82 Figure size in inches by default.
83 bins : int or sequence, default 10
84 Number of histogram bins to be used. If an integer is given, bins + 1
85 bin edges are calculated and returned. If bins is a sequence, gives
86 bin edges, including left edge of first bin and right edge of last
87 bin. In this case, bins is returned unmodified.
88 backend : str, default None
89 Backend to use instead of the backend specified in the option
90 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to
91 specify the ``plotting.backend`` for the whole session, set
92 ``pd.options.plotting.backend``.
93 legend : bool, default False
94 Whether to show the legend.
96 **kwargs
97 To be passed to the actual plotting function.
99 Returns
100 -------
101 matplotlib.AxesSubplot
102 A histogram plot.
104 See Also
105 --------
106 matplotlib.axes.Axes.hist : Plot a histogram using matplotlib.
108 Examples
109 --------
110 For Series:
112 .. plot::
113 :context: close-figs
115 >>> lst = ['a', 'a', 'a', 'b', 'b', 'b']
116 >>> ser = pd.Series([1, 2, 2, 4, 6, 6], index=lst)
117 >>> hist = ser.hist()
119 For Groupby:
121 .. plot::
122 :context: close-figs
124 >>> lst = ['a', 'a', 'a', 'b', 'b', 'b']
125 >>> ser = pd.Series([1, 2, 2, 4, 6, 6], index=lst)
126 >>> hist = ser.groupby(level=0).hist()
127 """
128 plot_backend = _get_plot_backend(backend)
129 return plot_backend.hist_series(
130 self,
131 by=by,
132 ax=ax,
133 grid=grid,
134 xlabelsize=xlabelsize,
135 xrot=xrot,
136 ylabelsize=ylabelsize,
137 yrot=yrot,
138 figsize=figsize,
139 bins=bins,
140 legend=legend,
141 **kwargs,
142 )
145def hist_frame(
146 data: DataFrame,
147 column: IndexLabel | None = None,
148 by=None,
149 grid: bool = True,
150 xlabelsize: int | None = None,
151 xrot: float | None = None,
152 ylabelsize: int | None = None,
153 yrot: float | None = None,
154 ax=None,
155 sharex: bool = False,
156 sharey: bool = False,
157 figsize: tuple[int, int] | None = None,
158 layout: tuple[int, int] | None = None,
159 bins: int | Sequence[int] = 10,
160 backend: str | None = None,
161 legend: bool = False,
162 **kwargs,
163):
164 """
165 Make a histogram of the DataFrame's columns.
167 A `histogram`_ is a representation of the distribution of data.
168 This function calls :meth:`matplotlib.pyplot.hist`, on each series in
169 the DataFrame, resulting in one histogram per column.
171 .. _histogram: https://en.wikipedia.org/wiki/Histogram
173 Parameters
174 ----------
175 data : DataFrame
176 The pandas object holding the data.
177 column : str or sequence, optional
178 If passed, will be used to limit data to a subset of columns.
179 by : object, optional
180 If passed, then used to form histograms for separate groups.
181 grid : bool, default True
182 Whether to show axis grid lines.
183 xlabelsize : int, default None
184 If specified changes the x-axis label size.
185 xrot : float, default None
186 Rotation of x axis labels. For example, a value of 90 displays the
187 x labels rotated 90 degrees clockwise.
188 ylabelsize : int, default None
189 If specified changes the y-axis label size.
190 yrot : float, default None
191 Rotation of y axis labels. For example, a value of 90 displays the
192 y labels rotated 90 degrees clockwise.
193 ax : Matplotlib axes object, default None
194 The axes to plot the histogram on.
195 sharex : bool, default True if ax is None else False
196 In case subplots=True, share x axis and set some x axis labels to
197 invisible; defaults to True if ax is None otherwise False if an ax
198 is passed in.
199 Note that passing in both an ax and sharex=True will alter all x axis
200 labels for all subplots in a figure.
201 sharey : bool, default False
202 In case subplots=True, share y axis and set some y axis labels to
203 invisible.
204 figsize : tuple, optional
205 The size in inches of the figure to create. Uses the value in
206 `matplotlib.rcParams` by default.
207 layout : tuple, optional
208 Tuple of (rows, columns) for the layout of the histograms.
209 bins : int or sequence, default 10
210 Number of histogram bins to be used. If an integer is given, bins + 1
211 bin edges are calculated and returned. If bins is a sequence, gives
212 bin edges, including left edge of first bin and right edge of last
213 bin. In this case, bins is returned unmodified.
215 backend : str, default None
216 Backend to use instead of the backend specified in the option
217 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to
218 specify the ``plotting.backend`` for the whole session, set
219 ``pd.options.plotting.backend``.
221 legend : bool, default False
222 Whether to show the legend.
224 **kwargs
225 All other plotting keyword arguments to be passed to
226 :meth:`matplotlib.pyplot.hist`.
228 Returns
229 -------
230 matplotlib.AxesSubplot or numpy.ndarray of them
232 See Also
233 --------
234 matplotlib.pyplot.hist : Plot a histogram using matplotlib.
236 Examples
237 --------
238 This example draws a histogram based on the length and width of
239 some animals, displayed in three bins
241 .. plot::
242 :context: close-figs
244 >>> data = {'length': [1.5, 0.5, 1.2, 0.9, 3],
245 ... 'width': [0.7, 0.2, 0.15, 0.2, 1.1]}
246 >>> index = ['pig', 'rabbit', 'duck', 'chicken', 'horse']
247 >>> df = pd.DataFrame(data, index=index)
248 >>> hist = df.hist(bins=3)
249 """
250 plot_backend = _get_plot_backend(backend)
251 return plot_backend.hist_frame(
252 data,
253 column=column,
254 by=by,
255 grid=grid,
256 xlabelsize=xlabelsize,
257 xrot=xrot,
258 ylabelsize=ylabelsize,
259 yrot=yrot,
260 ax=ax,
261 sharex=sharex,
262 sharey=sharey,
263 figsize=figsize,
264 layout=layout,
265 legend=legend,
266 bins=bins,
267 **kwargs,
268 )
271_boxplot_doc = """
272Make a box plot from DataFrame columns.
274Make a box-and-whisker plot from DataFrame columns, optionally grouped
275by some other columns. A box plot is a method for graphically depicting
276groups of numerical data through their quartiles.
277The box extends from the Q1 to Q3 quartile values of the data,
278with a line at the median (Q2). The whiskers extend from the edges
279of box to show the range of the data. By default, they extend no more than
280`1.5 * IQR (IQR = Q3 - Q1)` from the edges of the box, ending at the farthest
281data point within that interval. Outliers are plotted as separate dots.
283For further details see
284Wikipedia's entry for `boxplot <https://en.wikipedia.org/wiki/Box_plot>`_.
286Parameters
287----------
288%(data)s\
289column : str or list of str, optional
290 Column name or list of names, or vector.
291 Can be any valid input to :meth:`pandas.DataFrame.groupby`.
292by : str or array-like, optional
293 Column in the DataFrame to :meth:`pandas.DataFrame.groupby`.
294 One box-plot will be done per value of columns in `by`.
295ax : object of class matplotlib.axes.Axes, optional
296 The matplotlib axes to be used by boxplot.
297fontsize : float or str
298 Tick label font size in points or as a string (e.g., `large`).
299rot : float, default 0
300 The rotation angle of labels (in degrees)
301 with respect to the screen coordinate system.
302grid : bool, default True
303 Setting this to True will show the grid.
304figsize : A tuple (width, height) in inches
305 The size of the figure to create in matplotlib.
306layout : tuple (rows, columns), optional
307 For example, (3, 5) will display the subplots
308 using 3 rows and 5 columns, starting from the top-left.
309return_type : {'axes', 'dict', 'both'} or None, default 'axes'
310 The kind of object to return. The default is ``axes``.
312 * 'axes' returns the matplotlib axes the boxplot is drawn on.
313 * 'dict' returns a dictionary whose values are the matplotlib
314 Lines of the boxplot.
315 * 'both' returns a namedtuple with the axes and dict.
316 * when grouping with ``by``, a Series mapping columns to
317 ``return_type`` is returned.
319 If ``return_type`` is `None`, a NumPy array
320 of axes with the same shape as ``layout`` is returned.
321%(backend)s\
323**kwargs
324 All other plotting keyword arguments to be passed to
325 :func:`matplotlib.pyplot.boxplot`.
327Returns
328-------
329result
330 See Notes.
332See Also
333--------
334pandas.Series.plot.hist: Make a histogram.
335matplotlib.pyplot.boxplot : Matplotlib equivalent plot.
337Notes
338-----
339The return type depends on the `return_type` parameter:
341* 'axes' : object of class matplotlib.axes.Axes
342* 'dict' : dict of matplotlib.lines.Line2D objects
343* 'both' : a namedtuple with structure (ax, lines)
345For data grouped with ``by``, return a Series of the above or a numpy
346array:
348* :class:`~pandas.Series`
349* :class:`~numpy.array` (for ``return_type = None``)
351Use ``return_type='dict'`` when you want to tweak the appearance
352of the lines after plotting. In this case a dict containing the Lines
353making up the boxes, caps, fliers, medians, and whiskers is returned.
355Examples
356--------
358Boxplots can be created for every column in the dataframe
359by ``df.boxplot()`` or indicating the columns to be used:
361.. plot::
362 :context: close-figs
364 >>> np.random.seed(1234)
365 >>> df = pd.DataFrame(np.random.randn(10, 4),
366 ... columns=['Col1', 'Col2', 'Col3', 'Col4'])
367 >>> boxplot = df.boxplot(column=['Col1', 'Col2', 'Col3']) # doctest: +SKIP
369Boxplots of variables distributions grouped by the values of a third
370variable can be created using the option ``by``. For instance:
372.. plot::
373 :context: close-figs
375 >>> df = pd.DataFrame(np.random.randn(10, 2),
376 ... columns=['Col1', 'Col2'])
377 >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A',
378 ... 'B', 'B', 'B', 'B', 'B'])
379 >>> boxplot = df.boxplot(by='X')
381A list of strings (i.e. ``['X', 'Y']``) can be passed to boxplot
382in order to group the data by combination of the variables in the x-axis:
384.. plot::
385 :context: close-figs
387 >>> df = pd.DataFrame(np.random.randn(10, 3),
388 ... columns=['Col1', 'Col2', 'Col3'])
389 >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A',
390 ... 'B', 'B', 'B', 'B', 'B'])
391 >>> df['Y'] = pd.Series(['A', 'B', 'A', 'B', 'A',
392 ... 'B', 'A', 'B', 'A', 'B'])
393 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by=['X', 'Y'])
395The layout of boxplot can be adjusted giving a tuple to ``layout``:
397.. plot::
398 :context: close-figs
400 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X',
401 ... layout=(2, 1))
403Additional formatting can be done to the boxplot, like suppressing the grid
404(``grid=False``), rotating the labels in the x-axis (i.e. ``rot=45``)
405or changing the fontsize (i.e. ``fontsize=15``):
407.. plot::
408 :context: close-figs
410 >>> boxplot = df.boxplot(grid=False, rot=45, fontsize=15) # doctest: +SKIP
412The parameter ``return_type`` can be used to select the type of element
413returned by `boxplot`. When ``return_type='axes'`` is selected,
414the matplotlib axes on which the boxplot is drawn are returned:
416 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], return_type='axes')
417 >>> type(boxplot)
418 <class 'matplotlib.axes._axes.Axes'>
420When grouping with ``by``, a Series mapping columns to ``return_type``
421is returned:
423 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X',
424 ... return_type='axes')
425 >>> type(boxplot)
426 <class 'pandas.core.series.Series'>
428If ``return_type`` is `None`, a NumPy array of axes with the same shape
429as ``layout`` is returned:
431 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X',
432 ... return_type=None)
433 >>> type(boxplot)
434 <class 'numpy.ndarray'>
435"""
437_backend_doc = """\
438backend : str, default None
439 Backend to use instead of the backend specified in the option
440 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to
441 specify the ``plotting.backend`` for the whole session, set
442 ``pd.options.plotting.backend``.
443"""
446_bar_or_line_doc = """
447 Parameters
448 ----------
449 x : label or position, optional
450 Allows plotting of one column versus another. If not specified,
451 the index of the DataFrame is used.
452 y : label or position, optional
453 Allows plotting of one column versus another. If not specified,
454 all numerical columns are used.
455 color : str, array-like, or dict, optional
456 The color for each of the DataFrame's columns. Possible values are:
458 - A single color string referred to by name, RGB or RGBA code,
459 for instance 'red' or '#a98d19'.
461 - A sequence of color strings referred to by name, RGB or RGBA
462 code, which will be used for each column recursively. For
463 instance ['green','yellow'] each column's %(kind)s will be filled in
464 green or yellow, alternatively. If there is only a single column to
465 be plotted, then only the first color from the color list will be
466 used.
468 - A dict of the form {column name : color}, so that each column will be
469 colored accordingly. For example, if your columns are called `a` and
470 `b`, then passing {'a': 'green', 'b': 'red'} will color %(kind)ss for
471 column `a` in green and %(kind)ss for column `b` in red.
473 **kwargs
474 Additional keyword arguments are documented in
475 :meth:`DataFrame.plot`.
477 Returns
478 -------
479 matplotlib.axes.Axes or np.ndarray of them
480 An ndarray is returned with one :class:`matplotlib.axes.Axes`
481 per column when ``subplots=True``.
482"""
485@Substitution(data="data : DataFrame\n The data to visualize.\n", backend="")
486@Appender(_boxplot_doc)
487def boxplot(
488 data: DataFrame,
489 column: str | list[str] | None = None,
490 by: str | list[str] | None = None,
491 ax: Axes | None = None,
492 fontsize: float | str | None = None,
493 rot: int = 0,
494 grid: bool = True,
495 figsize: tuple[float, float] | None = None,
496 layout: tuple[int, int] | None = None,
497 return_type: str | None = None,
498 **kwargs,
499):
500 plot_backend = _get_plot_backend("matplotlib")
501 return plot_backend.boxplot(
502 data,
503 column=column,
504 by=by,
505 ax=ax,
506 fontsize=fontsize,
507 rot=rot,
508 grid=grid,
509 figsize=figsize,
510 layout=layout,
511 return_type=return_type,
512 **kwargs,
513 )
516@Substitution(data="", backend=_backend_doc)
517@Appender(_boxplot_doc)
518def boxplot_frame(
519 self: DataFrame,
520 column=None,
521 by=None,
522 ax=None,
523 fontsize: int | None = None,
524 rot: int = 0,
525 grid: bool = True,
526 figsize: tuple[float, float] | None = None,
527 layout=None,
528 return_type=None,
529 backend=None,
530 **kwargs,
531):
532 plot_backend = _get_plot_backend(backend)
533 return plot_backend.boxplot_frame(
534 self,
535 column=column,
536 by=by,
537 ax=ax,
538 fontsize=fontsize,
539 rot=rot,
540 grid=grid,
541 figsize=figsize,
542 layout=layout,
543 return_type=return_type,
544 **kwargs,
545 )
548def boxplot_frame_groupby(
549 grouped: DataFrameGroupBy,
550 subplots: bool = True,
551 column=None,
552 fontsize: int | None = None,
553 rot: int = 0,
554 grid: bool = True,
555 ax=None,
556 figsize: tuple[float, float] | None = None,
557 layout=None,
558 sharex: bool = False,
559 sharey: bool = True,
560 backend=None,
561 **kwargs,
562):
563 """
564 Make box plots from DataFrameGroupBy data.
566 Parameters
567 ----------
568 grouped : Grouped DataFrame
569 subplots : bool
570 * ``False`` - no subplots will be used
571 * ``True`` - create a subplot for each group.
573 column : column name or list of names, or vector
574 Can be any valid input to groupby.
575 fontsize : float or str
576 rot : label rotation angle
577 grid : Setting this to True will show the grid
578 ax : Matplotlib axis object, default None
579 figsize : A tuple (width, height) in inches
580 layout : tuple (optional)
581 The layout of the plot: (rows, columns).
582 sharex : bool, default False
583 Whether x-axes will be shared among subplots.
584 sharey : bool, default True
585 Whether y-axes will be shared among subplots.
586 backend : str, default None
587 Backend to use instead of the backend specified in the option
588 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to
589 specify the ``plotting.backend`` for the whole session, set
590 ``pd.options.plotting.backend``.
591 **kwargs
592 All other plotting keyword arguments to be passed to
593 matplotlib's boxplot function.
595 Returns
596 -------
597 dict of key/value = group key/DataFrame.boxplot return value
598 or DataFrame.boxplot return value in case subplots=figures=False
600 Examples
601 --------
602 You can create boxplots for grouped data and show them as separate subplots:
604 .. plot::
605 :context: close-figs
607 >>> import itertools
608 >>> tuples = [t for t in itertools.product(range(1000), range(4))]
609 >>> index = pd.MultiIndex.from_tuples(tuples, names=['lvl0', 'lvl1'])
610 >>> data = np.random.randn(len(index), 4)
611 >>> df = pd.DataFrame(data, columns=list('ABCD'), index=index)
612 >>> grouped = df.groupby(level='lvl1')
613 >>> grouped.boxplot(rot=45, fontsize=12, figsize=(8, 10)) # doctest: +SKIP
615 The ``subplots=False`` option shows the boxplots in a single figure.
617 .. plot::
618 :context: close-figs
620 >>> grouped.boxplot(subplots=False, rot=45, fontsize=12) # doctest: +SKIP
621 """
622 plot_backend = _get_plot_backend(backend)
623 return plot_backend.boxplot_frame_groupby(
624 grouped,
625 subplots=subplots,
626 column=column,
627 fontsize=fontsize,
628 rot=rot,
629 grid=grid,
630 ax=ax,
631 figsize=figsize,
632 layout=layout,
633 sharex=sharex,
634 sharey=sharey,
635 **kwargs,
636 )
639class PlotAccessor(PandasObject):
640 """
641 Make plots of Series or DataFrame.
643 Uses the backend specified by the
644 option ``plotting.backend``. By default, matplotlib is used.
646 Parameters
647 ----------
648 data : Series or DataFrame
649 The object for which the method is called.
650 x : label or position, default None
651 Only used if data is a DataFrame.
652 y : label, position or list of label, positions, default None
653 Allows plotting of one column versus another. Only used if data is a
654 DataFrame.
655 kind : str
656 The kind of plot to produce:
658 - 'line' : line plot (default)
659 - 'bar' : vertical bar plot
660 - 'barh' : horizontal bar plot
661 - 'hist' : histogram
662 - 'box' : boxplot
663 - 'kde' : Kernel Density Estimation plot
664 - 'density' : same as 'kde'
665 - 'area' : area plot
666 - 'pie' : pie plot
667 - 'scatter' : scatter plot (DataFrame only)
668 - 'hexbin' : hexbin plot (DataFrame only)
669 ax : matplotlib axes object, default None
670 An axes of the current figure.
671 subplots : bool or sequence of iterables, default False
672 Whether to group columns into subplots:
674 - ``False`` : No subplots will be used
675 - ``True`` : Make separate subplots for each column.
676 - sequence of iterables of column labels: Create a subplot for each
677 group of columns. For example `[('a', 'c'), ('b', 'd')]` will
678 create 2 subplots: one with columns 'a' and 'c', and one
679 with columns 'b' and 'd'. Remaining columns that aren't specified
680 will be plotted in additional subplots (one per column).
682 .. versionadded:: 1.5.0
684 sharex : bool, default True if ax is None else False
685 In case ``subplots=True``, share x axis and set some x axis labels
686 to invisible; defaults to True if ax is None otherwise False if
687 an ax is passed in; Be aware, that passing in both an ax and
688 ``sharex=True`` will alter all x axis labels for all axis in a figure.
689 sharey : bool, default False
690 In case ``subplots=True``, share y axis and set some y axis labels to invisible.
691 layout : tuple, optional
692 (rows, columns) for the layout of subplots.
693 figsize : a tuple (width, height) in inches
694 Size of a figure object.
695 use_index : bool, default True
696 Use index as ticks for x axis.
697 title : str or list
698 Title to use for the plot. If a string is passed, print the string
699 at the top of the figure. If a list is passed and `subplots` is
700 True, print each item in the list above the corresponding subplot.
701 grid : bool, default None (matlab style default)
702 Axis grid lines.
703 legend : bool or {'reverse'}
704 Place legend on axis subplots.
705 style : list or dict
706 The matplotlib line style per column.
707 logx : bool or 'sym', default False
708 Use log scaling or symlog scaling on x axis.
710 logy : bool or 'sym' default False
711 Use log scaling or symlog scaling on y axis.
713 loglog : bool or 'sym', default False
714 Use log scaling or symlog scaling on both x and y axes.
716 xticks : sequence
717 Values to use for the xticks.
718 yticks : sequence
719 Values to use for the yticks.
720 xlim : 2-tuple/list
721 Set the x limits of the current axes.
722 ylim : 2-tuple/list
723 Set the y limits of the current axes.
724 xlabel : label, optional
725 Name to use for the xlabel on x-axis. Default uses index name as xlabel, or the
726 x-column name for planar plots.
728 .. versionchanged:: 2.0.0
730 Now applicable to histograms.
732 ylabel : label, optional
733 Name to use for the ylabel on y-axis. Default will show no ylabel, or the
734 y-column name for planar plots.
736 .. versionchanged:: 2.0.0
738 Now applicable to histograms.
740 rot : float, default None
741 Rotation for ticks (xticks for vertical, yticks for horizontal
742 plots).
743 fontsize : float, default None
744 Font size for xticks and yticks.
745 colormap : str or matplotlib colormap object, default None
746 Colormap to select colors from. If string, load colormap with that
747 name from matplotlib.
748 colorbar : bool, optional
749 If True, plot colorbar (only relevant for 'scatter' and 'hexbin'
750 plots).
751 position : float
752 Specify relative alignments for bar plot layout.
753 From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5
754 (center).
755 table : bool, Series or DataFrame, default False
756 If True, draw a table using the data in the DataFrame and the data
757 will be transposed to meet matplotlib's default layout.
758 If a Series or DataFrame is passed, use passed data to draw a
759 table.
760 yerr : DataFrame, Series, array-like, dict and str
761 See :ref:`Plotting with Error Bars <visualization.errorbars>` for
762 detail.
763 xerr : DataFrame, Series, array-like, dict and str
764 Equivalent to yerr.
765 stacked : bool, default False in line and bar plots, and True in area plot
766 If True, create stacked plot.
767 secondary_y : bool or sequence, default False
768 Whether to plot on the secondary y-axis if a list/tuple, which
769 columns to plot on secondary y-axis.
770 mark_right : bool, default True
771 When using a secondary_y axis, automatically mark the column
772 labels with "(right)" in the legend.
773 include_bool : bool, default is False
774 If True, boolean values can be plotted.
775 backend : str, default None
776 Backend to use instead of the backend specified in the option
777 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to
778 specify the ``plotting.backend`` for the whole session, set
779 ``pd.options.plotting.backend``.
780 **kwargs
781 Options to pass to matplotlib plotting method.
783 Returns
784 -------
785 :class:`matplotlib.axes.Axes` or numpy.ndarray of them
786 If the backend is not the default matplotlib one, the return value
787 will be the object returned by the backend.
789 Notes
790 -----
791 - See matplotlib documentation online for more on this subject
792 - If `kind` = 'bar' or 'barh', you can specify relative alignments
793 for bar plot layout by `position` keyword.
794 From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5
795 (center)
797 Examples
798 --------
799 For Series:
801 .. plot::
802 :context: close-figs
804 >>> ser = pd.Series([1, 2, 3, 3])
805 >>> plot = ser.plot(kind='hist', title="My plot")
807 For DataFrame:
809 .. plot::
810 :context: close-figs
812 >>> df = pd.DataFrame({'length': [1.5, 0.5, 1.2, 0.9, 3],
813 ... 'width': [0.7, 0.2, 0.15, 0.2, 1.1]},
814 ... index=['pig', 'rabbit', 'duck', 'chicken', 'horse'])
815 >>> plot = df.plot(title="DataFrame Plot")
817 For SeriesGroupBy:
819 .. plot::
820 :context: close-figs
822 >>> lst = [-1, -2, -3, 1, 2, 3]
823 >>> ser = pd.Series([1, 2, 2, 4, 6, 6], index=lst)
824 >>> plot = ser.groupby(lambda x: x > 0).plot(title="SeriesGroupBy Plot")
826 For DataFrameGroupBy:
828 .. plot::
829 :context: close-figs
831 >>> df = pd.DataFrame({"col1" : [1, 2, 3, 4],
832 ... "col2" : ["A", "B", "A", "B"]})
833 >>> plot = df.groupby("col2").plot(kind="bar", title="DataFrameGroupBy Plot")
834 """
836 _common_kinds = ("line", "bar", "barh", "kde", "density", "area", "hist", "box")
837 _series_kinds = ("pie",)
838 _dataframe_kinds = ("scatter", "hexbin")
839 _kind_aliases = {"density": "kde"}
840 _all_kinds = _common_kinds + _series_kinds + _dataframe_kinds
842 def __init__(self, data: Series | DataFrame) -> None:
843 self._parent = data
845 @staticmethod
846 def _get_call_args(backend_name: str, data: Series | DataFrame, args, kwargs):
847 """
848 This function makes calls to this accessor `__call__` method compatible
849 with the previous `SeriesPlotMethods.__call__` and
850 `DataFramePlotMethods.__call__`. Those had slightly different
851 signatures, since `DataFramePlotMethods` accepted `x` and `y`
852 parameters.
853 """
854 if isinstance(data, ABCSeries):
855 arg_def = [
856 ("kind", "line"),
857 ("ax", None),
858 ("figsize", None),
859 ("use_index", True),
860 ("title", None),
861 ("grid", None),
862 ("legend", False),
863 ("style", None),
864 ("logx", False),
865 ("logy", False),
866 ("loglog", False),
867 ("xticks", None),
868 ("yticks", None),
869 ("xlim", None),
870 ("ylim", None),
871 ("rot", None),
872 ("fontsize", None),
873 ("colormap", None),
874 ("table", False),
875 ("yerr", None),
876 ("xerr", None),
877 ("label", None),
878 ("secondary_y", False),
879 ("xlabel", None),
880 ("ylabel", None),
881 ]
882 elif isinstance(data, ABCDataFrame):
883 arg_def = [
884 ("x", None),
885 ("y", None),
886 ("kind", "line"),
887 ("ax", None),
888 ("subplots", False),
889 ("sharex", None),
890 ("sharey", False),
891 ("layout", None),
892 ("figsize", None),
893 ("use_index", True),
894 ("title", None),
895 ("grid", None),
896 ("legend", True),
897 ("style", None),
898 ("logx", False),
899 ("logy", False),
900 ("loglog", False),
901 ("xticks", None),
902 ("yticks", None),
903 ("xlim", None),
904 ("ylim", None),
905 ("rot", None),
906 ("fontsize", None),
907 ("colormap", None),
908 ("table", False),
909 ("yerr", None),
910 ("xerr", None),
911 ("secondary_y", False),
912 ("xlabel", None),
913 ("ylabel", None),
914 ]
915 else:
916 raise TypeError(
917 f"Called plot accessor for type {type(data).__name__}, "
918 "expected Series or DataFrame"
919 )
921 if args and isinstance(data, ABCSeries):
922 positional_args = str(args)[1:-1]
923 keyword_args = ", ".join(
924 [f"{name}={repr(value)}" for (name, _), value in zip(arg_def, args)]
925 )
926 msg = (
927 "`Series.plot()` should not be called with positional "
928 "arguments, only keyword arguments. The order of "
929 "positional arguments will change in the future. "
930 f"Use `Series.plot({keyword_args})` instead of "
931 f"`Series.plot({positional_args})`."
932 )
933 raise TypeError(msg)
935 pos_args = {name: value for (name, _), value in zip(arg_def, args)}
936 if backend_name == "pandas.plotting._matplotlib":
937 kwargs = dict(arg_def, **pos_args, **kwargs)
938 else:
939 kwargs = dict(pos_args, **kwargs)
941 x = kwargs.pop("x", None)
942 y = kwargs.pop("y", None)
943 kind = kwargs.pop("kind", "line")
944 return x, y, kind, kwargs
946 def __call__(self, *args, **kwargs):
947 plot_backend = _get_plot_backend(kwargs.pop("backend", None))
949 x, y, kind, kwargs = self._get_call_args(
950 plot_backend.__name__, self._parent, args, kwargs
951 )
953 kind = self._kind_aliases.get(kind, kind)
955 # when using another backend, get out of the way
956 if plot_backend.__name__ != "pandas.plotting._matplotlib":
957 return plot_backend.plot(self._parent, x=x, y=y, kind=kind, **kwargs)
959 if kind not in self._all_kinds:
960 raise ValueError(
961 f"{kind} is not a valid plot kind "
962 f"Valid plot kinds: {self._all_kinds}"
963 )
965 # The original data structured can be transformed before passed to the
966 # backend. For example, for DataFrame is common to set the index as the
967 # `x` parameter, and return a Series with the parameter `y` as values.
968 data = self._parent.copy()
970 if isinstance(data, ABCSeries):
971 kwargs["reuse_plot"] = True
973 if kind in self._dataframe_kinds:
974 if isinstance(data, ABCDataFrame):
975 return plot_backend.plot(data, x=x, y=y, kind=kind, **kwargs)
976 else:
977 raise ValueError(f"plot kind {kind} can only be used for data frames")
978 elif kind in self._series_kinds:
979 if isinstance(data, ABCDataFrame):
980 if y is None and kwargs.get("subplots") is False:
981 raise ValueError(
982 f"{kind} requires either y column or 'subplots=True'"
983 )
984 if y is not None:
985 if is_integer(y) and not data.columns._holds_integer():
986 y = data.columns[y]
987 # converted to series actually. copy to not modify
988 data = data[y].copy()
989 data.index.name = y
990 elif isinstance(data, ABCDataFrame):
991 data_cols = data.columns
992 if x is not None:
993 if is_integer(x) and not data.columns._holds_integer():
994 x = data_cols[x]
995 elif not isinstance(data[x], ABCSeries):
996 raise ValueError("x must be a label or position")
997 data = data.set_index(x)
998 if y is not None:
999 # check if we have y as int or list of ints
1000 int_ylist = is_list_like(y) and all(is_integer(c) for c in y)
1001 int_y_arg = is_integer(y) or int_ylist
1002 if int_y_arg and not data.columns._holds_integer():
1003 y = data_cols[y]
1005 label_kw = kwargs["label"] if "label" in kwargs else False
1006 for kw in ["xerr", "yerr"]:
1007 if kw in kwargs and (
1008 isinstance(kwargs[kw], str) or is_integer(kwargs[kw])
1009 ):
1010 try:
1011 kwargs[kw] = data[kwargs[kw]]
1012 except (IndexError, KeyError, TypeError):
1013 pass
1015 # don't overwrite
1016 data = data[y].copy()
1018 if isinstance(data, ABCSeries):
1019 label_name = label_kw or y
1020 data.name = label_name
1021 else:
1022 match = is_list_like(label_kw) and len(label_kw) == len(y)
1023 if label_kw and not match:
1024 raise ValueError(
1025 "label should be list-like and same length as y"
1026 )
1027 label_name = label_kw or data.columns
1028 data.columns = label_name
1030 return plot_backend.plot(data, kind=kind, **kwargs)
1032 __call__.__doc__ = __doc__
1034 @Appender(
1035 """
1036 See Also
1037 --------
1038 matplotlib.pyplot.plot : Plot y versus x as lines and/or markers.
1040 Examples
1041 --------
1043 .. plot::
1044 :context: close-figs
1046 >>> s = pd.Series([1, 3, 2])
1047 >>> s.plot.line() # doctest: +SKIP
1049 .. plot::
1050 :context: close-figs
1052 The following example shows the populations for some animals
1053 over the years.
1055 >>> df = pd.DataFrame({
1056 ... 'pig': [20, 18, 489, 675, 1776],
1057 ... 'horse': [4, 25, 281, 600, 1900]
1058 ... }, index=[1990, 1997, 2003, 2009, 2014])
1059 >>> lines = df.plot.line()
1061 .. plot::
1062 :context: close-figs
1064 An example with subplots, so an array of axes is returned.
1066 >>> axes = df.plot.line(subplots=True)
1067 >>> type(axes)
1068 <class 'numpy.ndarray'>
1070 .. plot::
1071 :context: close-figs
1073 Let's repeat the same example, but specifying colors for
1074 each column (in this case, for each animal).
1076 >>> axes = df.plot.line(
1077 ... subplots=True, color={"pig": "pink", "horse": "#742802"}
1078 ... )
1080 .. plot::
1081 :context: close-figs
1083 The following example shows the relationship between both
1084 populations.
1086 >>> lines = df.plot.line(x='pig', y='horse')
1087 """
1088 )
1089 @Substitution(kind="line")
1090 @Appender(_bar_or_line_doc)
1091 def line(
1092 self, x: Hashable | None = None, y: Hashable | None = None, **kwargs
1093 ) -> PlotAccessor:
1094 """
1095 Plot Series or DataFrame as lines.
1097 This function is useful to plot lines using DataFrame's values
1098 as coordinates.
1099 """
1100 return self(kind="line", x=x, y=y, **kwargs)
1102 @Appender(
1103 """
1104 See Also
1105 --------
1106 DataFrame.plot.barh : Horizontal bar plot.
1107 DataFrame.plot : Make plots of a DataFrame.
1108 matplotlib.pyplot.bar : Make a bar plot with matplotlib.
1110 Examples
1111 --------
1112 Basic plot.
1114 .. plot::
1115 :context: close-figs
1117 >>> df = pd.DataFrame({'lab':['A', 'B', 'C'], 'val':[10, 30, 20]})
1118 >>> ax = df.plot.bar(x='lab', y='val', rot=0)
1120 Plot a whole dataframe to a bar plot. Each column is assigned a
1121 distinct color, and each row is nested in a group along the
1122 horizontal axis.
1124 .. plot::
1125 :context: close-figs
1127 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]
1128 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]
1129 >>> index = ['snail', 'pig', 'elephant',
1130 ... 'rabbit', 'giraffe', 'coyote', 'horse']
1131 >>> df = pd.DataFrame({'speed': speed,
1132 ... 'lifespan': lifespan}, index=index)
1133 >>> ax = df.plot.bar(rot=0)
1135 Plot stacked bar charts for the DataFrame
1137 .. plot::
1138 :context: close-figs
1140 >>> ax = df.plot.bar(stacked=True)
1142 Instead of nesting, the figure can be split by column with
1143 ``subplots=True``. In this case, a :class:`numpy.ndarray` of
1144 :class:`matplotlib.axes.Axes` are returned.
1146 .. plot::
1147 :context: close-figs
1149 >>> axes = df.plot.bar(rot=0, subplots=True)
1150 >>> axes[1].legend(loc=2) # doctest: +SKIP
1152 If you don't like the default colours, you can specify how you'd
1153 like each column to be colored.
1155 .. plot::
1156 :context: close-figs
1158 >>> axes = df.plot.bar(
1159 ... rot=0, subplots=True, color={"speed": "red", "lifespan": "green"}
1160 ... )
1161 >>> axes[1].legend(loc=2) # doctest: +SKIP
1163 Plot a single column.
1165 .. plot::
1166 :context: close-figs
1168 >>> ax = df.plot.bar(y='speed', rot=0)
1170 Plot only selected categories for the DataFrame.
1172 .. plot::
1173 :context: close-figs
1175 >>> ax = df.plot.bar(x='lifespan', rot=0)
1176 """
1177 )
1178 @Substitution(kind="bar")
1179 @Appender(_bar_or_line_doc)
1180 def bar( # pylint: disable=disallowed-name
1181 self, x: Hashable | None = None, y: Hashable | None = None, **kwargs
1182 ) -> PlotAccessor:
1183 """
1184 Vertical bar plot.
1186 A bar plot is a plot that presents categorical data with
1187 rectangular bars with lengths proportional to the values that they
1188 represent. A bar plot shows comparisons among discrete categories. One
1189 axis of the plot shows the specific categories being compared, and the
1190 other axis represents a measured value.
1191 """
1192 return self(kind="bar", x=x, y=y, **kwargs)
1194 @Appender(
1195 """
1196 See Also
1197 --------
1198 DataFrame.plot.bar: Vertical bar plot.
1199 DataFrame.plot : Make plots of DataFrame using matplotlib.
1200 matplotlib.axes.Axes.bar : Plot a vertical bar plot using matplotlib.
1202 Examples
1203 --------
1204 Basic example
1206 .. plot::
1207 :context: close-figs
1209 >>> df = pd.DataFrame({'lab': ['A', 'B', 'C'], 'val': [10, 30, 20]})
1210 >>> ax = df.plot.barh(x='lab', y='val')
1212 Plot a whole DataFrame to a horizontal bar plot
1214 .. plot::
1215 :context: close-figs
1217 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]
1218 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]
1219 >>> index = ['snail', 'pig', 'elephant',
1220 ... 'rabbit', 'giraffe', 'coyote', 'horse']
1221 >>> df = pd.DataFrame({'speed': speed,
1222 ... 'lifespan': lifespan}, index=index)
1223 >>> ax = df.plot.barh()
1225 Plot stacked barh charts for the DataFrame
1227 .. plot::
1228 :context: close-figs
1230 >>> ax = df.plot.barh(stacked=True)
1232 We can specify colors for each column
1234 .. plot::
1235 :context: close-figs
1237 >>> ax = df.plot.barh(color={"speed": "red", "lifespan": "green"})
1239 Plot a column of the DataFrame to a horizontal bar plot
1241 .. plot::
1242 :context: close-figs
1244 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]
1245 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]
1246 >>> index = ['snail', 'pig', 'elephant',
1247 ... 'rabbit', 'giraffe', 'coyote', 'horse']
1248 >>> df = pd.DataFrame({'speed': speed,
1249 ... 'lifespan': lifespan}, index=index)
1250 >>> ax = df.plot.barh(y='speed')
1252 Plot DataFrame versus the desired column
1254 .. plot::
1255 :context: close-figs
1257 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]
1258 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]
1259 >>> index = ['snail', 'pig', 'elephant',
1260 ... 'rabbit', 'giraffe', 'coyote', 'horse']
1261 >>> df = pd.DataFrame({'speed': speed,
1262 ... 'lifespan': lifespan}, index=index)
1263 >>> ax = df.plot.barh(x='lifespan')
1264 """
1265 )
1266 @Substitution(kind="bar")
1267 @Appender(_bar_or_line_doc)
1268 def barh(
1269 self, x: Hashable | None = None, y: Hashable | None = None, **kwargs
1270 ) -> PlotAccessor:
1271 """
1272 Make a horizontal bar plot.
1274 A horizontal bar plot is a plot that presents quantitative data with
1275 rectangular bars with lengths proportional to the values that they
1276 represent. A bar plot shows comparisons among discrete categories. One
1277 axis of the plot shows the specific categories being compared, and the
1278 other axis represents a measured value.
1279 """
1280 return self(kind="barh", x=x, y=y, **kwargs)
1282 def box(self, by: IndexLabel | None = None, **kwargs) -> PlotAccessor:
1283 r"""
1284 Make a box plot of the DataFrame columns.
1286 A box plot is a method for graphically depicting groups of numerical
1287 data through their quartiles.
1288 The box extends from the Q1 to Q3 quartile values of the data,
1289 with a line at the median (Q2). The whiskers extend from the edges
1290 of box to show the range of the data. The position of the whiskers
1291 is set by default to 1.5*IQR (IQR = Q3 - Q1) from the edges of the
1292 box. Outlier points are those past the end of the whiskers.
1294 For further details see Wikipedia's
1295 entry for `boxplot <https://en.wikipedia.org/wiki/Box_plot>`__.
1297 A consideration when using this chart is that the box and the whiskers
1298 can overlap, which is very common when plotting small sets of data.
1300 Parameters
1301 ----------
1302 by : str or sequence
1303 Column in the DataFrame to group by.
1305 .. versionchanged:: 1.4.0
1307 Previously, `by` is silently ignore and makes no groupings
1309 **kwargs
1310 Additional keywords are documented in
1311 :meth:`DataFrame.plot`.
1313 Returns
1314 -------
1315 :class:`matplotlib.axes.Axes` or numpy.ndarray of them
1317 See Also
1318 --------
1319 DataFrame.boxplot: Another method to draw a box plot.
1320 Series.plot.box: Draw a box plot from a Series object.
1321 matplotlib.pyplot.boxplot: Draw a box plot in matplotlib.
1323 Examples
1324 --------
1325 Draw a box plot from a DataFrame with four columns of randomly
1326 generated data.
1328 .. plot::
1329 :context: close-figs
1331 >>> data = np.random.randn(25, 4)
1332 >>> df = pd.DataFrame(data, columns=list('ABCD'))
1333 >>> ax = df.plot.box()
1335 You can also generate groupings if you specify the `by` parameter (which
1336 can take a column name, or a list or tuple of column names):
1338 .. versionchanged:: 1.4.0
1340 .. plot::
1341 :context: close-figs
1343 >>> age_list = [8, 10, 12, 14, 72, 74, 76, 78, 20, 25, 30, 35, 60, 85]
1344 >>> df = pd.DataFrame({"gender": list("MMMMMMMMFFFFFF"), "age": age_list})
1345 >>> ax = df.plot.box(column="age", by="gender", figsize=(10, 8))
1346 """
1347 return self(kind="box", by=by, **kwargs)
1349 def hist(
1350 self, by: IndexLabel | None = None, bins: int = 10, **kwargs
1351 ) -> PlotAccessor:
1352 """
1353 Draw one histogram of the DataFrame's columns.
1355 A histogram is a representation of the distribution of data.
1356 This function groups the values of all given Series in the DataFrame
1357 into bins and draws all bins in one :class:`matplotlib.axes.Axes`.
1358 This is useful when the DataFrame's Series are in a similar scale.
1360 Parameters
1361 ----------
1362 by : str or sequence, optional
1363 Column in the DataFrame to group by.
1365 .. versionchanged:: 1.4.0
1367 Previously, `by` is silently ignore and makes no groupings
1369 bins : int, default 10
1370 Number of histogram bins to be used.
1371 **kwargs
1372 Additional keyword arguments are documented in
1373 :meth:`DataFrame.plot`.
1375 Returns
1376 -------
1377 class:`matplotlib.AxesSubplot`
1378 Return a histogram plot.
1380 See Also
1381 --------
1382 DataFrame.hist : Draw histograms per DataFrame's Series.
1383 Series.hist : Draw a histogram with Series' data.
1385 Examples
1386 --------
1387 When we roll a die 6000 times, we expect to get each value around 1000
1388 times. But when we roll two dice and sum the result, the distribution
1389 is going to be quite different. A histogram illustrates those
1390 distributions.
1392 .. plot::
1393 :context: close-figs
1395 >>> df = pd.DataFrame(np.random.randint(1, 7, 6000), columns=['one'])
1396 >>> df['two'] = df['one'] + np.random.randint(1, 7, 6000)
1397 >>> ax = df.plot.hist(bins=12, alpha=0.5)
1399 A grouped histogram can be generated by providing the parameter `by` (which
1400 can be a column name, or a list of column names):
1402 .. plot::
1403 :context: close-figs
1405 >>> age_list = [8, 10, 12, 14, 72, 74, 76, 78, 20, 25, 30, 35, 60, 85]
1406 >>> df = pd.DataFrame({"gender": list("MMMMMMMMFFFFFF"), "age": age_list})
1407 >>> ax = df.plot.hist(column=["age"], by="gender", figsize=(10, 8))
1408 """
1409 return self(kind="hist", by=by, bins=bins, **kwargs)
1411 def kde(
1412 self,
1413 bw_method: Literal["scott", "silverman"] | float | Callable | None = None,
1414 ind: np.ndarray | int | None = None,
1415 **kwargs,
1416 ) -> PlotAccessor:
1417 """
1418 Generate Kernel Density Estimate plot using Gaussian kernels.
1420 In statistics, `kernel density estimation`_ (KDE) is a non-parametric
1421 way to estimate the probability density function (PDF) of a random
1422 variable. This function uses Gaussian kernels and includes automatic
1423 bandwidth determination.
1425 .. _kernel density estimation:
1426 https://en.wikipedia.org/wiki/Kernel_density_estimation
1428 Parameters
1429 ----------
1430 bw_method : str, scalar or callable, optional
1431 The method used to calculate the estimator bandwidth. This can be
1432 'scott', 'silverman', a scalar constant or a callable.
1433 If None (default), 'scott' is used.
1434 See :class:`scipy.stats.gaussian_kde` for more information.
1435 ind : NumPy array or int, optional
1436 Evaluation points for the estimated PDF. If None (default),
1437 1000 equally spaced points are used. If `ind` is a NumPy array, the
1438 KDE is evaluated at the points passed. If `ind` is an integer,
1439 `ind` number of equally spaced points are used.
1440 **kwargs
1441 Additional keyword arguments are documented in
1442 :meth:`DataFrame.plot`.
1444 Returns
1445 -------
1446 matplotlib.axes.Axes or numpy.ndarray of them
1448 See Also
1449 --------
1450 scipy.stats.gaussian_kde : Representation of a kernel-density
1451 estimate using Gaussian kernels. This is the function used
1452 internally to estimate the PDF.
1454 Examples
1455 --------
1456 Given a Series of points randomly sampled from an unknown
1457 distribution, estimate its PDF using KDE with automatic
1458 bandwidth determination and plot the results, evaluating them at
1459 1000 equally spaced points (default):
1461 .. plot::
1462 :context: close-figs
1464 >>> s = pd.Series([1, 2, 2.5, 3, 3.5, 4, 5])
1465 >>> ax = s.plot.kde()
1467 A scalar bandwidth can be specified. Using a small bandwidth value can
1468 lead to over-fitting, while using a large bandwidth value may result
1469 in under-fitting:
1471 .. plot::
1472 :context: close-figs
1474 >>> ax = s.plot.kde(bw_method=0.3)
1476 .. plot::
1477 :context: close-figs
1479 >>> ax = s.plot.kde(bw_method=3)
1481 Finally, the `ind` parameter determines the evaluation points for the
1482 plot of the estimated PDF:
1484 .. plot::
1485 :context: close-figs
1487 >>> ax = s.plot.kde(ind=[1, 2, 3, 4, 5])
1489 For DataFrame, it works in the same way:
1491 .. plot::
1492 :context: close-figs
1494 >>> df = pd.DataFrame({
1495 ... 'x': [1, 2, 2.5, 3, 3.5, 4, 5],
1496 ... 'y': [4, 4, 4.5, 5, 5.5, 6, 6],
1497 ... })
1498 >>> ax = df.plot.kde()
1500 A scalar bandwidth can be specified. Using a small bandwidth value can
1501 lead to over-fitting, while using a large bandwidth value may result
1502 in under-fitting:
1504 .. plot::
1505 :context: close-figs
1507 >>> ax = df.plot.kde(bw_method=0.3)
1509 .. plot::
1510 :context: close-figs
1512 >>> ax = df.plot.kde(bw_method=3)
1514 Finally, the `ind` parameter determines the evaluation points for the
1515 plot of the estimated PDF:
1517 .. plot::
1518 :context: close-figs
1520 >>> ax = df.plot.kde(ind=[1, 2, 3, 4, 5, 6])
1521 """
1522 return self(kind="kde", bw_method=bw_method, ind=ind, **kwargs)
1524 density = kde
1526 def area(
1527 self,
1528 x: Hashable | None = None,
1529 y: Hashable | None = None,
1530 stacked: bool = True,
1531 **kwargs,
1532 ) -> PlotAccessor:
1533 """
1534 Draw a stacked area plot.
1536 An area plot displays quantitative data visually.
1537 This function wraps the matplotlib area function.
1539 Parameters
1540 ----------
1541 x : label or position, optional
1542 Coordinates for the X axis. By default uses the index.
1543 y : label or position, optional
1544 Column to plot. By default uses all columns.
1545 stacked : bool, default True
1546 Area plots are stacked by default. Set to False to create a
1547 unstacked plot.
1548 **kwargs
1549 Additional keyword arguments are documented in
1550 :meth:`DataFrame.plot`.
1552 Returns
1553 -------
1554 matplotlib.axes.Axes or numpy.ndarray
1555 Area plot, or array of area plots if subplots is True.
1557 See Also
1558 --------
1559 DataFrame.plot : Make plots of DataFrame using matplotlib / pylab.
1561 Examples
1562 --------
1563 Draw an area plot based on basic business metrics:
1565 .. plot::
1566 :context: close-figs
1568 >>> df = pd.DataFrame({
1569 ... 'sales': [3, 2, 3, 9, 10, 6],
1570 ... 'signups': [5, 5, 6, 12, 14, 13],
1571 ... 'visits': [20, 42, 28, 62, 81, 50],
1572 ... }, index=pd.date_range(start='2018/01/01', end='2018/07/01',
1573 ... freq='ME'))
1574 >>> ax = df.plot.area()
1576 Area plots are stacked by default. To produce an unstacked plot,
1577 pass ``stacked=False``:
1579 .. plot::
1580 :context: close-figs
1582 >>> ax = df.plot.area(stacked=False)
1584 Draw an area plot for a single column:
1586 .. plot::
1587 :context: close-figs
1589 >>> ax = df.plot.area(y='sales')
1591 Draw with a different `x`:
1593 .. plot::
1594 :context: close-figs
1596 >>> df = pd.DataFrame({
1597 ... 'sales': [3, 2, 3],
1598 ... 'visits': [20, 42, 28],
1599 ... 'day': [1, 2, 3],
1600 ... })
1601 >>> ax = df.plot.area(x='day')
1602 """
1603 return self(kind="area", x=x, y=y, stacked=stacked, **kwargs)
1605 def pie(self, **kwargs) -> PlotAccessor:
1606 """
1607 Generate a pie plot.
1609 A pie plot is a proportional representation of the numerical data in a
1610 column. This function wraps :meth:`matplotlib.pyplot.pie` for the
1611 specified column. If no column reference is passed and
1612 ``subplots=True`` a pie plot is drawn for each numerical column
1613 independently.
1615 Parameters
1616 ----------
1617 y : int or label, optional
1618 Label or position of the column to plot.
1619 If not provided, ``subplots=True`` argument must be passed.
1620 **kwargs
1621 Keyword arguments to pass on to :meth:`DataFrame.plot`.
1623 Returns
1624 -------
1625 matplotlib.axes.Axes or np.ndarray of them
1626 A NumPy array is returned when `subplots` is True.
1628 See Also
1629 --------
1630 Series.plot.pie : Generate a pie plot for a Series.
1631 DataFrame.plot : Make plots of a DataFrame.
1633 Examples
1634 --------
1635 In the example below we have a DataFrame with the information about
1636 planet's mass and radius. We pass the 'mass' column to the
1637 pie function to get a pie plot.
1639 .. plot::
1640 :context: close-figs
1642 >>> df = pd.DataFrame({'mass': [0.330, 4.87 , 5.97],
1643 ... 'radius': [2439.7, 6051.8, 6378.1]},
1644 ... index=['Mercury', 'Venus', 'Earth'])
1645 >>> plot = df.plot.pie(y='mass', figsize=(5, 5))
1647 .. plot::
1648 :context: close-figs
1650 >>> plot = df.plot.pie(subplots=True, figsize=(11, 6))
1651 """
1652 if (
1653 isinstance(self._parent, ABCDataFrame)
1654 and kwargs.get("y", None) is None
1655 and not kwargs.get("subplots", False)
1656 ):
1657 raise ValueError("pie requires either y column or 'subplots=True'")
1658 return self(kind="pie", **kwargs)
1660 def scatter(
1661 self,
1662 x: Hashable,
1663 y: Hashable,
1664 s: Hashable | Sequence[Hashable] | None = None,
1665 c: Hashable | Sequence[Hashable] | None = None,
1666 **kwargs,
1667 ) -> PlotAccessor:
1668 """
1669 Create a scatter plot with varying marker point size and color.
1671 The coordinates of each point are defined by two dataframe columns and
1672 filled circles are used to represent each point. This kind of plot is
1673 useful to see complex correlations between two variables. Points could
1674 be for instance natural 2D coordinates like longitude and latitude in
1675 a map or, in general, any pair of metrics that can be plotted against
1676 each other.
1678 Parameters
1679 ----------
1680 x : int or str
1681 The column name or column position to be used as horizontal
1682 coordinates for each point.
1683 y : int or str
1684 The column name or column position to be used as vertical
1685 coordinates for each point.
1686 s : str, scalar or array-like, optional
1687 The size of each point. Possible values are:
1689 - A string with the name of the column to be used for marker's size.
1691 - A single scalar so all points have the same size.
1693 - A sequence of scalars, which will be used for each point's size
1694 recursively. For instance, when passing [2,14] all points size
1695 will be either 2 or 14, alternatively.
1697 c : str, int or array-like, optional
1698 The color of each point. Possible values are:
1700 - A single color string referred to by name, RGB or RGBA code,
1701 for instance 'red' or '#a98d19'.
1703 - A sequence of color strings referred to by name, RGB or RGBA
1704 code, which will be used for each point's color recursively. For
1705 instance ['green','yellow'] all points will be filled in green or
1706 yellow, alternatively.
1708 - A column name or position whose values will be used to color the
1709 marker points according to a colormap.
1711 **kwargs
1712 Keyword arguments to pass on to :meth:`DataFrame.plot`.
1714 Returns
1715 -------
1716 :class:`matplotlib.axes.Axes` or numpy.ndarray of them
1718 See Also
1719 --------
1720 matplotlib.pyplot.scatter : Scatter plot using multiple input data
1721 formats.
1723 Examples
1724 --------
1725 Let's see how to draw a scatter plot using coordinates from the values
1726 in a DataFrame's columns.
1728 .. plot::
1729 :context: close-figs
1731 >>> df = pd.DataFrame([[5.1, 3.5, 0], [4.9, 3.0, 0], [7.0, 3.2, 1],
1732 ... [6.4, 3.2, 1], [5.9, 3.0, 2]],
1733 ... columns=['length', 'width', 'species'])
1734 >>> ax1 = df.plot.scatter(x='length',
1735 ... y='width',
1736 ... c='DarkBlue')
1738 And now with the color determined by a column as well.
1740 .. plot::
1741 :context: close-figs
1743 >>> ax2 = df.plot.scatter(x='length',
1744 ... y='width',
1745 ... c='species',
1746 ... colormap='viridis')
1747 """
1748 return self(kind="scatter", x=x, y=y, s=s, c=c, **kwargs)
1750 def hexbin(
1751 self,
1752 x: Hashable,
1753 y: Hashable,
1754 C: Hashable | None = None,
1755 reduce_C_function: Callable | None = None,
1756 gridsize: int | tuple[int, int] | None = None,
1757 **kwargs,
1758 ) -> PlotAccessor:
1759 """
1760 Generate a hexagonal binning plot.
1762 Generate a hexagonal binning plot of `x` versus `y`. If `C` is `None`
1763 (the default), this is a histogram of the number of occurrences
1764 of the observations at ``(x[i], y[i])``.
1766 If `C` is specified, specifies values at given coordinates
1767 ``(x[i], y[i])``. These values are accumulated for each hexagonal
1768 bin and then reduced according to `reduce_C_function`,
1769 having as default the NumPy's mean function (:meth:`numpy.mean`).
1770 (If `C` is specified, it must also be a 1-D sequence
1771 of the same length as `x` and `y`, or a column label.)
1773 Parameters
1774 ----------
1775 x : int or str
1776 The column label or position for x points.
1777 y : int or str
1778 The column label or position for y points.
1779 C : int or str, optional
1780 The column label or position for the value of `(x, y)` point.
1781 reduce_C_function : callable, default `np.mean`
1782 Function of one argument that reduces all the values in a bin to
1783 a single number (e.g. `np.mean`, `np.max`, `np.sum`, `np.std`).
1784 gridsize : int or tuple of (int, int), default 100
1785 The number of hexagons in the x-direction.
1786 The corresponding number of hexagons in the y-direction is
1787 chosen in a way that the hexagons are approximately regular.
1788 Alternatively, gridsize can be a tuple with two elements
1789 specifying the number of hexagons in the x-direction and the
1790 y-direction.
1791 **kwargs
1792 Additional keyword arguments are documented in
1793 :meth:`DataFrame.plot`.
1795 Returns
1796 -------
1797 matplotlib.AxesSubplot
1798 The matplotlib ``Axes`` on which the hexbin is plotted.
1800 See Also
1801 --------
1802 DataFrame.plot : Make plots of a DataFrame.
1803 matplotlib.pyplot.hexbin : Hexagonal binning plot using matplotlib,
1804 the matplotlib function that is used under the hood.
1806 Examples
1807 --------
1808 The following examples are generated with random data from
1809 a normal distribution.
1811 .. plot::
1812 :context: close-figs
1814 >>> n = 10000
1815 >>> df = pd.DataFrame({'x': np.random.randn(n),
1816 ... 'y': np.random.randn(n)})
1817 >>> ax = df.plot.hexbin(x='x', y='y', gridsize=20)
1819 The next example uses `C` and `np.sum` as `reduce_C_function`.
1820 Note that `'observations'` values ranges from 1 to 5 but the result
1821 plot shows values up to more than 25. This is because of the
1822 `reduce_C_function`.
1824 .. plot::
1825 :context: close-figs
1827 >>> n = 500
1828 >>> df = pd.DataFrame({
1829 ... 'coord_x': np.random.uniform(-3, 3, size=n),
1830 ... 'coord_y': np.random.uniform(30, 50, size=n),
1831 ... 'observations': np.random.randint(1,5, size=n)
1832 ... })
1833 >>> ax = df.plot.hexbin(x='coord_x',
1834 ... y='coord_y',
1835 ... C='observations',
1836 ... reduce_C_function=np.sum,
1837 ... gridsize=10,
1838 ... cmap="viridis")
1839 """
1840 if reduce_C_function is not None:
1841 kwargs["reduce_C_function"] = reduce_C_function
1842 if gridsize is not None:
1843 kwargs["gridsize"] = gridsize
1845 return self(kind="hexbin", x=x, y=y, C=C, **kwargs)
1848_backends: dict[str, types.ModuleType] = {}
1851def _load_backend(backend: str) -> types.ModuleType:
1852 """
1853 Load a pandas plotting backend.
1855 Parameters
1856 ----------
1857 backend : str
1858 The identifier for the backend. Either an entrypoint item registered
1859 with importlib.metadata, "matplotlib", or a module name.
1861 Returns
1862 -------
1863 types.ModuleType
1864 The imported backend.
1865 """
1866 from importlib.metadata import entry_points
1868 if backend == "matplotlib":
1869 # Because matplotlib is an optional dependency and first-party backend,
1870 # we need to attempt an import here to raise an ImportError if needed.
1871 try:
1872 module = importlib.import_module("pandas.plotting._matplotlib")
1873 except ImportError:
1874 raise ImportError(
1875 "matplotlib is required for plotting when the "
1876 'default backend "matplotlib" is selected.'
1877 ) from None
1878 return module
1880 found_backend = False
1882 eps = entry_points()
1883 key = "pandas_plotting_backends"
1884 # entry_points lost dict API ~ PY 3.10
1885 # https://github.com/python/importlib_metadata/issues/298
1886 if hasattr(eps, "select"):
1887 entry = eps.select(group=key)
1888 else:
1889 # Argument 2 to "get" of "dict" has incompatible type "Tuple[]";
1890 # expected "EntryPoints" [arg-type]
1891 entry = eps.get(key, ()) # type: ignore[arg-type]
1892 for entry_point in entry:
1893 found_backend = entry_point.name == backend
1894 if found_backend:
1895 module = entry_point.load()
1896 break
1898 if not found_backend:
1899 # Fall back to unregistered, module name approach.
1900 try:
1901 module = importlib.import_module(backend)
1902 found_backend = True
1903 except ImportError:
1904 # We re-raise later on.
1905 pass
1907 if found_backend:
1908 if hasattr(module, "plot"):
1909 # Validate that the interface is implemented when the option is set,
1910 # rather than at plot time.
1911 return module
1913 raise ValueError(
1914 f"Could not find plotting backend '{backend}'. Ensure that you've "
1915 f"installed the package providing the '{backend}' entrypoint, or that "
1916 "the package has a top-level `.plot` method."
1917 )
1920def _get_plot_backend(backend: str | None = None):
1921 """
1922 Return the plotting backend to use (e.g. `pandas.plotting._matplotlib`).
1924 The plotting system of pandas uses matplotlib by default, but the idea here
1925 is that it can also work with other third-party backends. This function
1926 returns the module which provides a top-level `.plot` method that will
1927 actually do the plotting. The backend is specified from a string, which
1928 either comes from the keyword argument `backend`, or, if not specified, from
1929 the option `pandas.options.plotting.backend`. All the rest of the code in
1930 this file uses the backend specified there for the plotting.
1932 The backend is imported lazily, as matplotlib is a soft dependency, and
1933 pandas can be used without it being installed.
1935 Notes
1936 -----
1937 Modifies `_backends` with imported backend as a side effect.
1938 """
1939 backend_str: str = backend or get_option("plotting.backend")
1941 if backend_str in _backends:
1942 return _backends[backend_str]
1944 module = _load_backend(backend_str)
1945 _backends[backend_str] = module
1946 return module