1from __future__ import annotations
2
3from contextlib import contextmanager
4from typing import (
5 TYPE_CHECKING,
6 Generator,
7)
8
9from pandas.plotting._core import _get_plot_backend
10
11if TYPE_CHECKING:
12 from matplotlib.axes import Axes
13 from matplotlib.figure import Figure
14 import numpy as np
15
16 from pandas import (
17 DataFrame,
18 Series,
19 )
20
21
22def table(ax, data, **kwargs):
23 """
24 Helper function to convert DataFrame and Series to matplotlib.table.
25
26 Parameters
27 ----------
28 ax : Matplotlib axes object
29 data : DataFrame or Series
30 Data for table contents.
31 **kwargs
32 Keyword arguments to be passed to matplotlib.table.table.
33 If `rowLabels` or `colLabels` is not specified, data index or column
34 name will be used.
35
36 Returns
37 -------
38 matplotlib table object
39 """
40 plot_backend = _get_plot_backend("matplotlib")
41 return plot_backend.table(
42 ax=ax, data=data, rowLabels=None, colLabels=None, **kwargs
43 )
44
45
46def register() -> None:
47 """
48 Register pandas formatters and converters with matplotlib.
49
50 This function modifies the global ``matplotlib.units.registry``
51 dictionary. pandas adds custom converters for
52
53 * pd.Timestamp
54 * pd.Period
55 * np.datetime64
56 * datetime.datetime
57 * datetime.date
58 * datetime.time
59
60 See Also
61 --------
62 deregister_matplotlib_converters : Remove pandas formatters and converters.
63 """
64 plot_backend = _get_plot_backend("matplotlib")
65 plot_backend.register()
66
67
68def deregister() -> None:
69 """
70 Remove pandas formatters and converters.
71
72 Removes the custom converters added by :func:`register`. This
73 attempts to set the state of the registry back to the state before
74 pandas registered its own units. Converters for pandas' own types like
75 Timestamp and Period are removed completely. Converters for types
76 pandas overwrites, like ``datetime.datetime``, are restored to their
77 original value.
78
79 See Also
80 --------
81 register_matplotlib_converters : Register pandas formatters and converters
82 with matplotlib.
83 """
84 plot_backend = _get_plot_backend("matplotlib")
85 plot_backend.deregister()
86
87
88def scatter_matrix(
89 frame: DataFrame,
90 alpha: float = 0.5,
91 figsize: tuple[float, float] | None = None,
92 ax: Axes | None = None,
93 grid: bool = False,
94 diagonal: str = "hist",
95 marker: str = ".",
96 density_kwds=None,
97 hist_kwds=None,
98 range_padding: float = 0.05,
99 **kwargs,
100) -> np.ndarray:
101 """
102 Draw a matrix of scatter plots.
103
104 Parameters
105 ----------
106 frame : DataFrame
107 alpha : float, optional
108 Amount of transparency applied.
109 figsize : (float,float), optional
110 A tuple (width, height) in inches.
111 ax : Matplotlib axis object, optional
112 grid : bool, optional
113 Setting this to True will show the grid.
114 diagonal : {'hist', 'kde'}
115 Pick between 'kde' and 'hist' for either Kernel Density Estimation or
116 Histogram plot in the diagonal.
117 marker : str, optional
118 Matplotlib marker type, default '.'.
119 density_kwds : keywords
120 Keyword arguments to be passed to kernel density estimate plot.
121 hist_kwds : keywords
122 Keyword arguments to be passed to hist function.
123 range_padding : float, default 0.05
124 Relative extension of axis range in x and y with respect to
125 (x_max - x_min) or (y_max - y_min).
126 **kwargs
127 Keyword arguments to be passed to scatter function.
128
129 Returns
130 -------
131 numpy.ndarray
132 A matrix of scatter plots.
133
134 Examples
135 --------
136
137 .. plot::
138 :context: close-figs
139
140 >>> df = pd.DataFrame(np.random.randn(1000, 4), columns=['A','B','C','D'])
141 >>> pd.plotting.scatter_matrix(df, alpha=0.2)
142 array([[<AxesSubplot: xlabel='A', ylabel='A'>,
143 <AxesSubplot: xlabel='B', ylabel='A'>,
144 <AxesSubplot: xlabel='C', ylabel='A'>,
145 <AxesSubplot: xlabel='D', ylabel='A'>],
146 [<AxesSubplot: xlabel='A', ylabel='B'>,
147 <AxesSubplot: xlabel='B', ylabel='B'>,
148 <AxesSubplot: xlabel='C', ylabel='B'>,
149 <AxesSubplot: xlabel='D', ylabel='B'>],
150 [<AxesSubplot: xlabel='A', ylabel='C'>,
151 <AxesSubplot: xlabel='B', ylabel='C'>,
152 <AxesSubplot: xlabel='C', ylabel='C'>,
153 <AxesSubplot: xlabel='D', ylabel='C'>],
154 [<AxesSubplot: xlabel='A', ylabel='D'>,
155 <AxesSubplot: xlabel='B', ylabel='D'>,
156 <AxesSubplot: xlabel='C', ylabel='D'>,
157 <AxesSubplot: xlabel='D', ylabel='D'>]], dtype=object)
158 """
159 plot_backend = _get_plot_backend("matplotlib")
160 return plot_backend.scatter_matrix(
161 frame=frame,
162 alpha=alpha,
163 figsize=figsize,
164 ax=ax,
165 grid=grid,
166 diagonal=diagonal,
167 marker=marker,
168 density_kwds=density_kwds,
169 hist_kwds=hist_kwds,
170 range_padding=range_padding,
171 **kwargs,
172 )
173
174
175def radviz(
176 frame: DataFrame,
177 class_column: str,
178 ax: Axes | None = None,
179 color: list[str] | tuple[str, ...] | None = None,
180 colormap=None,
181 **kwds,
182) -> Axes:
183 """
184 Plot a multidimensional dataset in 2D.
185
186 Each Series in the DataFrame is represented as a evenly distributed
187 slice on a circle. Each data point is rendered in the circle according to
188 the value on each Series. Highly correlated `Series` in the `DataFrame`
189 are placed closer on the unit circle.
190
191 RadViz allow to project a N-dimensional data set into a 2D space where the
192 influence of each dimension can be interpreted as a balance between the
193 influence of all dimensions.
194
195 More info available at the `original article
196 <https://doi.org/10.1145/331770.331775>`_
197 describing RadViz.
198
199 Parameters
200 ----------
201 frame : `DataFrame`
202 Object holding the data.
203 class_column : str
204 Column name containing the name of the data point category.
205 ax : :class:`matplotlib.axes.Axes`, optional
206 A plot instance to which to add the information.
207 color : list[str] or tuple[str], optional
208 Assign a color to each category. Example: ['blue', 'green'].
209 colormap : str or :class:`matplotlib.colors.Colormap`, default None
210 Colormap to select colors from. If string, load colormap with that
211 name from matplotlib.
212 **kwds
213 Options to pass to matplotlib scatter plotting method.
214
215 Returns
216 -------
217 :class:`matplotlib.axes.Axes`
218
219 See Also
220 --------
221 pandas.plotting.andrews_curves : Plot clustering visualization.
222
223 Examples
224 --------
225
226 .. plot::
227 :context: close-figs
228
229 >>> df = pd.DataFrame(
230 ... {
231 ... 'SepalLength': [6.5, 7.7, 5.1, 5.8, 7.6, 5.0, 5.4, 4.6, 6.7, 4.6],
232 ... 'SepalWidth': [3.0, 3.8, 3.8, 2.7, 3.0, 2.3, 3.0, 3.2, 3.3, 3.6],
233 ... 'PetalLength': [5.5, 6.7, 1.9, 5.1, 6.6, 3.3, 4.5, 1.4, 5.7, 1.0],
234 ... 'PetalWidth': [1.8, 2.2, 0.4, 1.9, 2.1, 1.0, 1.5, 0.2, 2.1, 0.2],
235 ... 'Category': [
236 ... 'virginica',
237 ... 'virginica',
238 ... 'setosa',
239 ... 'virginica',
240 ... 'virginica',
241 ... 'versicolor',
242 ... 'versicolor',
243 ... 'setosa',
244 ... 'virginica',
245 ... 'setosa'
246 ... ]
247 ... }
248 ... )
249 >>> pd.plotting.radviz(df, 'Category')
250 <AxesSubplot: xlabel='y(t)', ylabel='y(t + 1)'>
251 """
252 plot_backend = _get_plot_backend("matplotlib")
253 return plot_backend.radviz(
254 frame=frame,
255 class_column=class_column,
256 ax=ax,
257 color=color,
258 colormap=colormap,
259 **kwds,
260 )
261
262
263def andrews_curves(
264 frame: DataFrame,
265 class_column: str,
266 ax: Axes | None = None,
267 samples: int = 200,
268 color: list[str] | tuple[str, ...] | None = None,
269 colormap=None,
270 **kwargs,
271) -> Axes:
272 """
273 Generate a matplotlib plot for visualising clusters of multivariate data.
274
275 Andrews curves have the functional form:
276
277 .. math::
278 f(t) = \\frac{x_1}{\\sqrt{2}} + x_2 \\sin(t) + x_3 \\cos(t) +
279 x_4 \\sin(2t) + x_5 \\cos(2t) + \\cdots
280
281 Where :math:`x` coefficients correspond to the values of each dimension
282 and :math:`t` is linearly spaced between :math:`-\\pi` and :math:`+\\pi`.
283 Each row of frame then corresponds to a single curve.
284
285 Parameters
286 ----------
287 frame : DataFrame
288 Data to be plotted, preferably normalized to (0.0, 1.0).
289 class_column : label
290 Name of the column containing class names.
291 ax : axes object, default None
292 Axes to use.
293 samples : int
294 Number of points to plot in each curve.
295 color : str, list[str] or tuple[str], optional
296 Colors to use for the different classes. Colors can be strings
297 or 3-element floating point RGB values.
298 colormap : str or matplotlib colormap object, default None
299 Colormap to select colors from. If a string, load colormap with that
300 name from matplotlib.
301 **kwargs
302 Options to pass to matplotlib plotting method.
303
304 Returns
305 -------
306 :class:`matplotlib.axes.Axes`
307
308 Examples
309 --------
310
311 .. plot::
312 :context: close-figs
313
314 >>> df = pd.read_csv(
315 ... 'https://raw.githubusercontent.com/pandas-dev/'
316 ... 'pandas/main/pandas/tests/io/data/csv/iris.csv'
317 ... )
318 >>> pd.plotting.andrews_curves(df, 'Name')
319 <AxesSubplot: title={'center': 'width'}>
320 """
321 plot_backend = _get_plot_backend("matplotlib")
322 return plot_backend.andrews_curves(
323 frame=frame,
324 class_column=class_column,
325 ax=ax,
326 samples=samples,
327 color=color,
328 colormap=colormap,
329 **kwargs,
330 )
331
332
333def bootstrap_plot(
334 series: Series,
335 fig: Figure | None = None,
336 size: int = 50,
337 samples: int = 500,
338 **kwds,
339) -> Figure:
340 """
341 Bootstrap plot on mean, median and mid-range statistics.
342
343 The bootstrap plot is used to estimate the uncertainty of a statistic
344 by relying on random sampling with replacement [1]_. This function will
345 generate bootstrapping plots for mean, median and mid-range statistics
346 for the given number of samples of the given size.
347
348 .. [1] "Bootstrapping (statistics)" in \
349 https://en.wikipedia.org/wiki/Bootstrapping_%28statistics%29
350
351 Parameters
352 ----------
353 series : pandas.Series
354 Series from where to get the samplings for the bootstrapping.
355 fig : matplotlib.figure.Figure, default None
356 If given, it will use the `fig` reference for plotting instead of
357 creating a new one with default parameters.
358 size : int, default 50
359 Number of data points to consider during each sampling. It must be
360 less than or equal to the length of the `series`.
361 samples : int, default 500
362 Number of times the bootstrap procedure is performed.
363 **kwds
364 Options to pass to matplotlib plotting method.
365
366 Returns
367 -------
368 matplotlib.figure.Figure
369 Matplotlib figure.
370
371 See Also
372 --------
373 pandas.DataFrame.plot : Basic plotting for DataFrame objects.
374 pandas.Series.plot : Basic plotting for Series objects.
375
376 Examples
377 --------
378 This example draws a basic bootstrap plot for a Series.
379
380 .. plot::
381 :context: close-figs
382
383 >>> s = pd.Series(np.random.uniform(size=100))
384 >>> pd.plotting.bootstrap_plot(s)
385 <Figure size 640x480 with 6 Axes>
386 """
387 plot_backend = _get_plot_backend("matplotlib")
388 return plot_backend.bootstrap_plot(
389 series=series, fig=fig, size=size, samples=samples, **kwds
390 )
391
392
393def parallel_coordinates(
394 frame: DataFrame,
395 class_column: str,
396 cols: list[str] | None = None,
397 ax: Axes | None = None,
398 color: list[str] | tuple[str, ...] | None = None,
399 use_columns: bool = False,
400 xticks: list | tuple | None = None,
401 colormap=None,
402 axvlines: bool = True,
403 axvlines_kwds=None,
404 sort_labels: bool = False,
405 **kwargs,
406) -> Axes:
407 """
408 Parallel coordinates plotting.
409
410 Parameters
411 ----------
412 frame : DataFrame
413 class_column : str
414 Column name containing class names.
415 cols : list, optional
416 A list of column names to use.
417 ax : matplotlib.axis, optional
418 Matplotlib axis object.
419 color : list or tuple, optional
420 Colors to use for the different classes.
421 use_columns : bool, optional
422 If true, columns will be used as xticks.
423 xticks : list or tuple, optional
424 A list of values to use for xticks.
425 colormap : str or matplotlib colormap, default None
426 Colormap to use for line colors.
427 axvlines : bool, optional
428 If true, vertical lines will be added at each xtick.
429 axvlines_kwds : keywords, optional
430 Options to be passed to axvline method for vertical lines.
431 sort_labels : bool, default False
432 Sort class_column labels, useful when assigning colors.
433 **kwargs
434 Options to pass to matplotlib plotting method.
435
436 Returns
437 -------
438 matplotlib.axes.Axes
439
440 Examples
441 --------
442
443 .. plot::
444 :context: close-figs
445
446 >>> df = pd.read_csv(
447 ... 'https://raw.githubusercontent.com/pandas-dev/'
448 ... 'pandas/main/pandas/tests/io/data/csv/iris.csv'
449 ... )
450 >>> pd.plotting.parallel_coordinates(
451 ... df, 'Name', color=('#556270', '#4ECDC4', '#C7F464')
452 ... )
453 <AxesSubplot: xlabel='y(t)', ylabel='y(t + 1)'>
454 """
455 plot_backend = _get_plot_backend("matplotlib")
456 return plot_backend.parallel_coordinates(
457 frame=frame,
458 class_column=class_column,
459 cols=cols,
460 ax=ax,
461 color=color,
462 use_columns=use_columns,
463 xticks=xticks,
464 colormap=colormap,
465 axvlines=axvlines,
466 axvlines_kwds=axvlines_kwds,
467 sort_labels=sort_labels,
468 **kwargs,
469 )
470
471
472def lag_plot(series: Series, lag: int = 1, ax: Axes | None = None, **kwds) -> Axes:
473 """
474 Lag plot for time series.
475
476 Parameters
477 ----------
478 series : Series
479 The time series to visualize.
480 lag : int, default 1
481 Lag length of the scatter plot.
482 ax : Matplotlib axis object, optional
483 The matplotlib axis object to use.
484 **kwds
485 Matplotlib scatter method keyword arguments.
486
487 Returns
488 -------
489 matplotlib.axes.Axes
490
491 Examples
492 --------
493 Lag plots are most commonly used to look for patterns in time series data.
494
495 Given the following time series
496
497 .. plot::
498 :context: close-figs
499
500 >>> np.random.seed(5)
501 >>> x = np.cumsum(np.random.normal(loc=1, scale=5, size=50))
502 >>> s = pd.Series(x)
503 >>> s.plot()
504 <AxesSubplot: xlabel='Midrange'>
505
506 A lag plot with ``lag=1`` returns
507
508 .. plot::
509 :context: close-figs
510
511 >>> pd.plotting.lag_plot(s, lag=1)
512 <AxesSubplot: xlabel='y(t)', ylabel='y(t + 1)'>
513 """
514 plot_backend = _get_plot_backend("matplotlib")
515 return plot_backend.lag_plot(series=series, lag=lag, ax=ax, **kwds)
516
517
518def autocorrelation_plot(series: Series, ax: Axes | None = None, **kwargs) -> Axes:
519 """
520 Autocorrelation plot for time series.
521
522 Parameters
523 ----------
524 series : Series
525 The time series to visualize.
526 ax : Matplotlib axis object, optional
527 The matplotlib axis object to use.
528 **kwargs
529 Options to pass to matplotlib plotting method.
530
531 Returns
532 -------
533 matplotlib.axes.Axes
534
535 Examples
536 --------
537 The horizontal lines in the plot correspond to 95% and 99% confidence bands.
538
539 The dashed line is 99% confidence band.
540
541 .. plot::
542 :context: close-figs
543
544 >>> spacing = np.linspace(-9 * np.pi, 9 * np.pi, num=1000)
545 >>> s = pd.Series(0.7 * np.random.rand(1000) + 0.3 * np.sin(spacing))
546 >>> pd.plotting.autocorrelation_plot(s)
547 <AxesSubplot: title={'center': 'width'}, xlabel='Lag', ylabel='Autocorrelation'>
548 """
549 plot_backend = _get_plot_backend("matplotlib")
550 return plot_backend.autocorrelation_plot(series=series, ax=ax, **kwargs)
551
552
553class _Options(dict):
554 """
555 Stores pandas plotting options.
556
557 Allows for parameter aliasing so you can just use parameter names that are
558 the same as the plot function parameters, but is stored in a canonical
559 format that makes it easy to breakdown into groups later.
560 """
561
562 # alias so the names are same as plotting method parameter names
563 _ALIASES = {"x_compat": "xaxis.compat"}
564 _DEFAULT_KEYS = ["xaxis.compat"]
565
566 def __init__(self, deprecated: bool = False) -> None:
567 self._deprecated = deprecated
568 super().__setitem__("xaxis.compat", False)
569
570 def __getitem__(self, key):
571 key = self._get_canonical_key(key)
572 if key not in self:
573 raise ValueError(f"{key} is not a valid pandas plotting option")
574 return super().__getitem__(key)
575
576 def __setitem__(self, key, value) -> None:
577 key = self._get_canonical_key(key)
578 super().__setitem__(key, value)
579
580 def __delitem__(self, key) -> None:
581 key = self._get_canonical_key(key)
582 if key in self._DEFAULT_KEYS:
583 raise ValueError(f"Cannot remove default parameter {key}")
584 super().__delitem__(key)
585
586 def __contains__(self, key) -> bool:
587 key = self._get_canonical_key(key)
588 return super().__contains__(key)
589
590 def reset(self) -> None:
591 """
592 Reset the option store to its initial state
593
594 Returns
595 -------
596 None
597 """
598 # error: Cannot access "__init__" directly
599 self.__init__() # type: ignore[misc]
600
601 def _get_canonical_key(self, key):
602 return self._ALIASES.get(key, key)
603
604 @contextmanager
605 def use(self, key, value) -> Generator[_Options, None, None]:
606 """
607 Temporarily set a parameter value using the with statement.
608 Aliasing allowed.
609 """
610 old_value = self[key]
611 try:
612 self[key] = value
613 yield self
614 finally:
615 self[key] = old_value
616
617
618plot_params = _Options()