1from __future__ import annotations
2
3from contextlib import contextmanager
4from typing import (
5 TYPE_CHECKING,
6 Any,
7)
8
9from pandas.plotting._core import _get_plot_backend
10
11if TYPE_CHECKING:
12 from collections.abc import (
13 Generator,
14 Mapping,
15 )
16
17 from matplotlib.axes import Axes
18 from matplotlib.colors import Colormap
19 from matplotlib.figure import Figure
20 from matplotlib.table import Table
21 import numpy as np
22
23 from pandas import (
24 DataFrame,
25 Series,
26 )
27
28
29def table(ax: Axes, data: DataFrame | Series, **kwargs) -> Table:
30 """
31 Helper function to convert DataFrame and Series to matplotlib.table.
32
33 Parameters
34 ----------
35 ax : Matplotlib axes object
36 data : DataFrame or Series
37 Data for table contents.
38 **kwargs
39 Keyword arguments to be passed to matplotlib.table.table.
40 If `rowLabels` or `colLabels` is not specified, data index or column
41 name will be used.
42
43 Returns
44 -------
45 matplotlib table object
46
47 Examples
48 --------
49
50 .. plot::
51 :context: close-figs
52
53 >>> import matplotlib.pyplot as plt
54 >>> df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
55 >>> fix, ax = plt.subplots()
56 >>> ax.axis('off')
57 (0.0, 1.0, 0.0, 1.0)
58 >>> table = pd.plotting.table(ax, df, loc='center',
59 ... cellLoc='center', colWidths=list([.2, .2]))
60 """
61 plot_backend = _get_plot_backend("matplotlib")
62 return plot_backend.table(
63 ax=ax, data=data, rowLabels=None, colLabels=None, **kwargs
64 )
65
66
67def register() -> None:
68 """
69 Register pandas formatters and converters with matplotlib.
70
71 This function modifies the global ``matplotlib.units.registry``
72 dictionary. pandas adds custom converters for
73
74 * pd.Timestamp
75 * pd.Period
76 * np.datetime64
77 * datetime.datetime
78 * datetime.date
79 * datetime.time
80
81 See Also
82 --------
83 deregister_matplotlib_converters : Remove pandas formatters and converters.
84
85 Examples
86 --------
87 .. plot::
88 :context: close-figs
89
90 The following line is done automatically by pandas so
91 the plot can be rendered:
92
93 >>> pd.plotting.register_matplotlib_converters()
94
95 >>> df = pd.DataFrame({'ts': pd.period_range('2020', periods=2, freq='M'),
96 ... 'y': [1, 2]
97 ... })
98 >>> plot = df.plot.line(x='ts', y='y')
99
100 Unsetting the register manually an error will be raised:
101
102 >>> pd.set_option("plotting.matplotlib.register_converters",
103 ... False) # doctest: +SKIP
104 >>> df.plot.line(x='ts', y='y') # doctest: +SKIP
105 Traceback (most recent call last):
106 TypeError: float() argument must be a string or a real number, not 'Period'
107 """
108 plot_backend = _get_plot_backend("matplotlib")
109 plot_backend.register()
110
111
112def deregister() -> None:
113 """
114 Remove pandas formatters and converters.
115
116 Removes the custom converters added by :func:`register`. This
117 attempts to set the state of the registry back to the state before
118 pandas registered its own units. Converters for pandas' own types like
119 Timestamp and Period are removed completely. Converters for types
120 pandas overwrites, like ``datetime.datetime``, are restored to their
121 original value.
122
123 See Also
124 --------
125 register_matplotlib_converters : Register pandas formatters and converters
126 with matplotlib.
127
128 Examples
129 --------
130 .. plot::
131 :context: close-figs
132
133 The following line is done automatically by pandas so
134 the plot can be rendered:
135
136 >>> pd.plotting.register_matplotlib_converters()
137
138 >>> df = pd.DataFrame({'ts': pd.period_range('2020', periods=2, freq='M'),
139 ... 'y': [1, 2]
140 ... })
141 >>> plot = df.plot.line(x='ts', y='y')
142
143 Unsetting the register manually an error will be raised:
144
145 >>> pd.set_option("plotting.matplotlib.register_converters",
146 ... False) # doctest: +SKIP
147 >>> df.plot.line(x='ts', y='y') # doctest: +SKIP
148 Traceback (most recent call last):
149 TypeError: float() argument must be a string or a real number, not 'Period'
150 """
151 plot_backend = _get_plot_backend("matplotlib")
152 plot_backend.deregister()
153
154
155def scatter_matrix(
156 frame: DataFrame,
157 alpha: float = 0.5,
158 figsize: tuple[float, float] | None = None,
159 ax: Axes | None = None,
160 grid: bool = False,
161 diagonal: str = "hist",
162 marker: str = ".",
163 density_kwds: Mapping[str, Any] | None = None,
164 hist_kwds: Mapping[str, Any] | None = None,
165 range_padding: float = 0.05,
166 **kwargs,
167) -> np.ndarray:
168 """
169 Draw a matrix of scatter plots.
170
171 Parameters
172 ----------
173 frame : DataFrame
174 alpha : float, optional
175 Amount of transparency applied.
176 figsize : (float,float), optional
177 A tuple (width, height) in inches.
178 ax : Matplotlib axis object, optional
179 grid : bool, optional
180 Setting this to True will show the grid.
181 diagonal : {'hist', 'kde'}
182 Pick between 'kde' and 'hist' for either Kernel Density Estimation or
183 Histogram plot in the diagonal.
184 marker : str, optional
185 Matplotlib marker type, default '.'.
186 density_kwds : keywords
187 Keyword arguments to be passed to kernel density estimate plot.
188 hist_kwds : keywords
189 Keyword arguments to be passed to hist function.
190 range_padding : float, default 0.05
191 Relative extension of axis range in x and y with respect to
192 (x_max - x_min) or (y_max - y_min).
193 **kwargs
194 Keyword arguments to be passed to scatter function.
195
196 Returns
197 -------
198 numpy.ndarray
199 A matrix of scatter plots.
200
201 Examples
202 --------
203
204 .. plot::
205 :context: close-figs
206
207 >>> df = pd.DataFrame(np.random.randn(1000, 4), columns=['A','B','C','D'])
208 >>> pd.plotting.scatter_matrix(df, alpha=0.2)
209 array([[<Axes: xlabel='A', ylabel='A'>, <Axes: xlabel='B', ylabel='A'>,
210 <Axes: xlabel='C', ylabel='A'>, <Axes: xlabel='D', ylabel='A'>],
211 [<Axes: xlabel='A', ylabel='B'>, <Axes: xlabel='B', ylabel='B'>,
212 <Axes: xlabel='C', ylabel='B'>, <Axes: xlabel='D', ylabel='B'>],
213 [<Axes: xlabel='A', ylabel='C'>, <Axes: xlabel='B', ylabel='C'>,
214 <Axes: xlabel='C', ylabel='C'>, <Axes: xlabel='D', ylabel='C'>],
215 [<Axes: xlabel='A', ylabel='D'>, <Axes: xlabel='B', ylabel='D'>,
216 <Axes: xlabel='C', ylabel='D'>, <Axes: xlabel='D', ylabel='D'>]],
217 dtype=object)
218 """
219 plot_backend = _get_plot_backend("matplotlib")
220 return plot_backend.scatter_matrix(
221 frame=frame,
222 alpha=alpha,
223 figsize=figsize,
224 ax=ax,
225 grid=grid,
226 diagonal=diagonal,
227 marker=marker,
228 density_kwds=density_kwds,
229 hist_kwds=hist_kwds,
230 range_padding=range_padding,
231 **kwargs,
232 )
233
234
235def radviz(
236 frame: DataFrame,
237 class_column: str,
238 ax: Axes | None = None,
239 color: list[str] | tuple[str, ...] | None = None,
240 colormap: Colormap | str | None = None,
241 **kwds,
242) -> Axes:
243 """
244 Plot a multidimensional dataset in 2D.
245
246 Each Series in the DataFrame is represented as a evenly distributed
247 slice on a circle. Each data point is rendered in the circle according to
248 the value on each Series. Highly correlated `Series` in the `DataFrame`
249 are placed closer on the unit circle.
250
251 RadViz allow to project a N-dimensional data set into a 2D space where the
252 influence of each dimension can be interpreted as a balance between the
253 influence of all dimensions.
254
255 More info available at the `original article
256 <https://doi.org/10.1145/331770.331775>`_
257 describing RadViz.
258
259 Parameters
260 ----------
261 frame : `DataFrame`
262 Object holding the data.
263 class_column : str
264 Column name containing the name of the data point category.
265 ax : :class:`matplotlib.axes.Axes`, optional
266 A plot instance to which to add the information.
267 color : list[str] or tuple[str], optional
268 Assign a color to each category. Example: ['blue', 'green'].
269 colormap : str or :class:`matplotlib.colors.Colormap`, default None
270 Colormap to select colors from. If string, load colormap with that
271 name from matplotlib.
272 **kwds
273 Options to pass to matplotlib scatter plotting method.
274
275 Returns
276 -------
277 :class:`matplotlib.axes.Axes`
278
279 See Also
280 --------
281 pandas.plotting.andrews_curves : Plot clustering visualization.
282
283 Examples
284 --------
285
286 .. plot::
287 :context: close-figs
288
289 >>> df = pd.DataFrame(
290 ... {
291 ... 'SepalLength': [6.5, 7.7, 5.1, 5.8, 7.6, 5.0, 5.4, 4.6, 6.7, 4.6],
292 ... 'SepalWidth': [3.0, 3.8, 3.8, 2.7, 3.0, 2.3, 3.0, 3.2, 3.3, 3.6],
293 ... 'PetalLength': [5.5, 6.7, 1.9, 5.1, 6.6, 3.3, 4.5, 1.4, 5.7, 1.0],
294 ... 'PetalWidth': [1.8, 2.2, 0.4, 1.9, 2.1, 1.0, 1.5, 0.2, 2.1, 0.2],
295 ... 'Category': [
296 ... 'virginica',
297 ... 'virginica',
298 ... 'setosa',
299 ... 'virginica',
300 ... 'virginica',
301 ... 'versicolor',
302 ... 'versicolor',
303 ... 'setosa',
304 ... 'virginica',
305 ... 'setosa'
306 ... ]
307 ... }
308 ... )
309 >>> pd.plotting.radviz(df, 'Category') # doctest: +SKIP
310 """
311 plot_backend = _get_plot_backend("matplotlib")
312 return plot_backend.radviz(
313 frame=frame,
314 class_column=class_column,
315 ax=ax,
316 color=color,
317 colormap=colormap,
318 **kwds,
319 )
320
321
322def andrews_curves(
323 frame: DataFrame,
324 class_column: str,
325 ax: Axes | None = None,
326 samples: int = 200,
327 color: list[str] | tuple[str, ...] | None = None,
328 colormap: Colormap | str | None = None,
329 **kwargs,
330) -> Axes:
331 """
332 Generate a matplotlib plot for visualizing clusters of multivariate data.
333
334 Andrews curves have the functional form:
335
336 .. math::
337 f(t) = \\frac{x_1}{\\sqrt{2}} + x_2 \\sin(t) + x_3 \\cos(t) +
338 x_4 \\sin(2t) + x_5 \\cos(2t) + \\cdots
339
340 Where :math:`x` coefficients correspond to the values of each dimension
341 and :math:`t` is linearly spaced between :math:`-\\pi` and :math:`+\\pi`.
342 Each row of frame then corresponds to a single curve.
343
344 Parameters
345 ----------
346 frame : DataFrame
347 Data to be plotted, preferably normalized to (0.0, 1.0).
348 class_column : label
349 Name of the column containing class names.
350 ax : axes object, default None
351 Axes to use.
352 samples : int
353 Number of points to plot in each curve.
354 color : str, list[str] or tuple[str], optional
355 Colors to use for the different classes. Colors can be strings
356 or 3-element floating point RGB values.
357 colormap : str or matplotlib colormap object, default None
358 Colormap to select colors from. If a string, load colormap with that
359 name from matplotlib.
360 **kwargs
361 Options to pass to matplotlib plotting method.
362
363 Returns
364 -------
365 :class:`matplotlib.axes.Axes`
366
367 Examples
368 --------
369
370 .. plot::
371 :context: close-figs
372
373 >>> df = pd.read_csv(
374 ... 'https://raw.githubusercontent.com/pandas-dev/'
375 ... 'pandas/main/pandas/tests/io/data/csv/iris.csv'
376 ... )
377 >>> pd.plotting.andrews_curves(df, 'Name') # doctest: +SKIP
378 """
379 plot_backend = _get_plot_backend("matplotlib")
380 return plot_backend.andrews_curves(
381 frame=frame,
382 class_column=class_column,
383 ax=ax,
384 samples=samples,
385 color=color,
386 colormap=colormap,
387 **kwargs,
388 )
389
390
391def bootstrap_plot(
392 series: Series,
393 fig: Figure | None = None,
394 size: int = 50,
395 samples: int = 500,
396 **kwds,
397) -> Figure:
398 """
399 Bootstrap plot on mean, median and mid-range statistics.
400
401 The bootstrap plot is used to estimate the uncertainty of a statistic
402 by relying on random sampling with replacement [1]_. This function will
403 generate bootstrapping plots for mean, median and mid-range statistics
404 for the given number of samples of the given size.
405
406 .. [1] "Bootstrapping (statistics)" in \
407 https://en.wikipedia.org/wiki/Bootstrapping_%28statistics%29
408
409 Parameters
410 ----------
411 series : pandas.Series
412 Series from where to get the samplings for the bootstrapping.
413 fig : matplotlib.figure.Figure, default None
414 If given, it will use the `fig` reference for plotting instead of
415 creating a new one with default parameters.
416 size : int, default 50
417 Number of data points to consider during each sampling. It must be
418 less than or equal to the length of the `series`.
419 samples : int, default 500
420 Number of times the bootstrap procedure is performed.
421 **kwds
422 Options to pass to matplotlib plotting method.
423
424 Returns
425 -------
426 matplotlib.figure.Figure
427 Matplotlib figure.
428
429 See Also
430 --------
431 pandas.DataFrame.plot : Basic plotting for DataFrame objects.
432 pandas.Series.plot : Basic plotting for Series objects.
433
434 Examples
435 --------
436 This example draws a basic bootstrap plot for a Series.
437
438 .. plot::
439 :context: close-figs
440
441 >>> s = pd.Series(np.random.uniform(size=100))
442 >>> pd.plotting.bootstrap_plot(s) # doctest: +SKIP
443 <Figure size 640x480 with 6 Axes>
444 """
445 plot_backend = _get_plot_backend("matplotlib")
446 return plot_backend.bootstrap_plot(
447 series=series, fig=fig, size=size, samples=samples, **kwds
448 )
449
450
451def parallel_coordinates(
452 frame: DataFrame,
453 class_column: str,
454 cols: list[str] | None = None,
455 ax: Axes | None = None,
456 color: list[str] | tuple[str, ...] | None = None,
457 use_columns: bool = False,
458 xticks: list | tuple | None = None,
459 colormap: Colormap | str | None = None,
460 axvlines: bool = True,
461 axvlines_kwds: Mapping[str, Any] | None = None,
462 sort_labels: bool = False,
463 **kwargs,
464) -> Axes:
465 """
466 Parallel coordinates plotting.
467
468 Parameters
469 ----------
470 frame : DataFrame
471 class_column : str
472 Column name containing class names.
473 cols : list, optional
474 A list of column names to use.
475 ax : matplotlib.axis, optional
476 Matplotlib axis object.
477 color : list or tuple, optional
478 Colors to use for the different classes.
479 use_columns : bool, optional
480 If true, columns will be used as xticks.
481 xticks : list or tuple, optional
482 A list of values to use for xticks.
483 colormap : str or matplotlib colormap, default None
484 Colormap to use for line colors.
485 axvlines : bool, optional
486 If true, vertical lines will be added at each xtick.
487 axvlines_kwds : keywords, optional
488 Options to be passed to axvline method for vertical lines.
489 sort_labels : bool, default False
490 Sort class_column labels, useful when assigning colors.
491 **kwargs
492 Options to pass to matplotlib plotting method.
493
494 Returns
495 -------
496 matplotlib.axes.Axes
497
498 Examples
499 --------
500
501 .. plot::
502 :context: close-figs
503
504 >>> df = pd.read_csv(
505 ... 'https://raw.githubusercontent.com/pandas-dev/'
506 ... 'pandas/main/pandas/tests/io/data/csv/iris.csv'
507 ... )
508 >>> pd.plotting.parallel_coordinates(
509 ... df, 'Name', color=('#556270', '#4ECDC4', '#C7F464')
510 ... ) # doctest: +SKIP
511 """
512 plot_backend = _get_plot_backend("matplotlib")
513 return plot_backend.parallel_coordinates(
514 frame=frame,
515 class_column=class_column,
516 cols=cols,
517 ax=ax,
518 color=color,
519 use_columns=use_columns,
520 xticks=xticks,
521 colormap=colormap,
522 axvlines=axvlines,
523 axvlines_kwds=axvlines_kwds,
524 sort_labels=sort_labels,
525 **kwargs,
526 )
527
528
529def lag_plot(series: Series, lag: int = 1, ax: Axes | None = None, **kwds) -> Axes:
530 """
531 Lag plot for time series.
532
533 Parameters
534 ----------
535 series : Series
536 The time series to visualize.
537 lag : int, default 1
538 Lag length of the scatter plot.
539 ax : Matplotlib axis object, optional
540 The matplotlib axis object to use.
541 **kwds
542 Matplotlib scatter method keyword arguments.
543
544 Returns
545 -------
546 matplotlib.axes.Axes
547
548 Examples
549 --------
550 Lag plots are most commonly used to look for patterns in time series data.
551
552 Given the following time series
553
554 .. plot::
555 :context: close-figs
556
557 >>> np.random.seed(5)
558 >>> x = np.cumsum(np.random.normal(loc=1, scale=5, size=50))
559 >>> s = pd.Series(x)
560 >>> s.plot() # doctest: +SKIP
561
562 A lag plot with ``lag=1`` returns
563
564 .. plot::
565 :context: close-figs
566
567 >>> pd.plotting.lag_plot(s, lag=1)
568 <Axes: xlabel='y(t)', ylabel='y(t + 1)'>
569 """
570 plot_backend = _get_plot_backend("matplotlib")
571 return plot_backend.lag_plot(series=series, lag=lag, ax=ax, **kwds)
572
573
574def autocorrelation_plot(series: Series, ax: Axes | None = None, **kwargs) -> Axes:
575 """
576 Autocorrelation plot for time series.
577
578 Parameters
579 ----------
580 series : Series
581 The time series to visualize.
582 ax : Matplotlib axis object, optional
583 The matplotlib axis object to use.
584 **kwargs
585 Options to pass to matplotlib plotting method.
586
587 Returns
588 -------
589 matplotlib.axes.Axes
590
591 Examples
592 --------
593 The horizontal lines in the plot correspond to 95% and 99% confidence bands.
594
595 The dashed line is 99% confidence band.
596
597 .. plot::
598 :context: close-figs
599
600 >>> spacing = np.linspace(-9 * np.pi, 9 * np.pi, num=1000)
601 >>> s = pd.Series(0.7 * np.random.rand(1000) + 0.3 * np.sin(spacing))
602 >>> pd.plotting.autocorrelation_plot(s) # doctest: +SKIP
603 """
604 plot_backend = _get_plot_backend("matplotlib")
605 return plot_backend.autocorrelation_plot(series=series, ax=ax, **kwargs)
606
607
608class _Options(dict):
609 """
610 Stores pandas plotting options.
611
612 Allows for parameter aliasing so you can just use parameter names that are
613 the same as the plot function parameters, but is stored in a canonical
614 format that makes it easy to breakdown into groups later.
615
616 Examples
617 --------
618
619 .. plot::
620 :context: close-figs
621
622 >>> np.random.seed(42)
623 >>> df = pd.DataFrame({'A': np.random.randn(10),
624 ... 'B': np.random.randn(10)},
625 ... index=pd.date_range("1/1/2000",
626 ... freq='4MS', periods=10))
627 >>> with pd.plotting.plot_params.use("x_compat", True):
628 ... _ = df["A"].plot(color="r")
629 ... _ = df["B"].plot(color="g")
630 """
631
632 # alias so the names are same as plotting method parameter names
633 _ALIASES = {"x_compat": "xaxis.compat"}
634 _DEFAULT_KEYS = ["xaxis.compat"]
635
636 def __init__(self, deprecated: bool = False) -> None:
637 self._deprecated = deprecated
638 super().__setitem__("xaxis.compat", False)
639
640 def __getitem__(self, key):
641 key = self._get_canonical_key(key)
642 if key not in self:
643 raise ValueError(f"{key} is not a valid pandas plotting option")
644 return super().__getitem__(key)
645
646 def __setitem__(self, key, value) -> None:
647 key = self._get_canonical_key(key)
648 super().__setitem__(key, value)
649
650 def __delitem__(self, key) -> None:
651 key = self._get_canonical_key(key)
652 if key in self._DEFAULT_KEYS:
653 raise ValueError(f"Cannot remove default parameter {key}")
654 super().__delitem__(key)
655
656 def __contains__(self, key) -> bool:
657 key = self._get_canonical_key(key)
658 return super().__contains__(key)
659
660 def reset(self) -> None:
661 """
662 Reset the option store to its initial state
663
664 Returns
665 -------
666 None
667 """
668 # error: Cannot access "__init__" directly
669 self.__init__() # type: ignore[misc]
670
671 def _get_canonical_key(self, key):
672 return self._ALIASES.get(key, key)
673
674 @contextmanager
675 def use(self, key, value) -> Generator[_Options, None, None]:
676 """
677 Temporarily set a parameter value using the with statement.
678 Aliasing allowed.
679 """
680 old_value = self[key]
681 try:
682 self[key] = value
683 yield self
684 finally:
685 self[key] = old_value
686
687
688plot_params = _Options()