Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/plotting/_misc.py: 41%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from __future__ import annotations
3from contextlib import contextmanager
4from typing import (
5 TYPE_CHECKING,
6 Generator,
7)
9from pandas.plotting._core import _get_plot_backend
11if TYPE_CHECKING:
12 from matplotlib.axes import Axes
13 from matplotlib.figure import Figure
14 import numpy as np
16 from pandas import (
17 DataFrame,
18 Series,
19 )
22def table(ax, data, **kwargs):
23 """
24 Helper function to convert DataFrame and Series to matplotlib.table.
26 Parameters
27 ----------
28 ax : Matplotlib axes object
29 data : DataFrame or Series
30 Data for table contents.
31 **kwargs
32 Keyword arguments to be passed to matplotlib.table.table.
33 If `rowLabels` or `colLabels` is not specified, data index or column
34 name will be used.
36 Returns
37 -------
38 matplotlib table object
39 """
40 plot_backend = _get_plot_backend("matplotlib")
41 return plot_backend.table(
42 ax=ax, data=data, rowLabels=None, colLabels=None, **kwargs
43 )
46def register() -> None:
47 """
48 Register pandas formatters and converters with matplotlib.
50 This function modifies the global ``matplotlib.units.registry``
51 dictionary. pandas adds custom converters for
53 * pd.Timestamp
54 * pd.Period
55 * np.datetime64
56 * datetime.datetime
57 * datetime.date
58 * datetime.time
60 See Also
61 --------
62 deregister_matplotlib_converters : Remove pandas formatters and converters.
63 """
64 plot_backend = _get_plot_backend("matplotlib")
65 plot_backend.register()
68def deregister() -> None:
69 """
70 Remove pandas formatters and converters.
72 Removes the custom converters added by :func:`register`. This
73 attempts to set the state of the registry back to the state before
74 pandas registered its own units. Converters for pandas' own types like
75 Timestamp and Period are removed completely. Converters for types
76 pandas overwrites, like ``datetime.datetime``, are restored to their
77 original value.
79 See Also
80 --------
81 register_matplotlib_converters : Register pandas formatters and converters
82 with matplotlib.
83 """
84 plot_backend = _get_plot_backend("matplotlib")
85 plot_backend.deregister()
88def scatter_matrix(
89 frame: DataFrame,
90 alpha: float = 0.5,
91 figsize: tuple[float, float] | None = None,
92 ax: Axes | None = None,
93 grid: bool = False,
94 diagonal: str = "hist",
95 marker: str = ".",
96 density_kwds=None,
97 hist_kwds=None,
98 range_padding: float = 0.05,
99 **kwargs,
100) -> np.ndarray:
101 """
102 Draw a matrix of scatter plots.
104 Parameters
105 ----------
106 frame : DataFrame
107 alpha : float, optional
108 Amount of transparency applied.
109 figsize : (float,float), optional
110 A tuple (width, height) in inches.
111 ax : Matplotlib axis object, optional
112 grid : bool, optional
113 Setting this to True will show the grid.
114 diagonal : {'hist', 'kde'}
115 Pick between 'kde' and 'hist' for either Kernel Density Estimation or
116 Histogram plot in the diagonal.
117 marker : str, optional
118 Matplotlib marker type, default '.'.
119 density_kwds : keywords
120 Keyword arguments to be passed to kernel density estimate plot.
121 hist_kwds : keywords
122 Keyword arguments to be passed to hist function.
123 range_padding : float, default 0.05
124 Relative extension of axis range in x and y with respect to
125 (x_max - x_min) or (y_max - y_min).
126 **kwargs
127 Keyword arguments to be passed to scatter function.
129 Returns
130 -------
131 numpy.ndarray
132 A matrix of scatter plots.
134 Examples
135 --------
137 .. plot::
138 :context: close-figs
140 >>> df = pd.DataFrame(np.random.randn(1000, 4), columns=['A','B','C','D'])
141 >>> pd.plotting.scatter_matrix(df, alpha=0.2)
142 array([[<AxesSubplot: xlabel='A', ylabel='A'>,
143 <AxesSubplot: xlabel='B', ylabel='A'>,
144 <AxesSubplot: xlabel='C', ylabel='A'>,
145 <AxesSubplot: xlabel='D', ylabel='A'>],
146 [<AxesSubplot: xlabel='A', ylabel='B'>,
147 <AxesSubplot: xlabel='B', ylabel='B'>,
148 <AxesSubplot: xlabel='C', ylabel='B'>,
149 <AxesSubplot: xlabel='D', ylabel='B'>],
150 [<AxesSubplot: xlabel='A', ylabel='C'>,
151 <AxesSubplot: xlabel='B', ylabel='C'>,
152 <AxesSubplot: xlabel='C', ylabel='C'>,
153 <AxesSubplot: xlabel='D', ylabel='C'>],
154 [<AxesSubplot: xlabel='A', ylabel='D'>,
155 <AxesSubplot: xlabel='B', ylabel='D'>,
156 <AxesSubplot: xlabel='C', ylabel='D'>,
157 <AxesSubplot: xlabel='D', ylabel='D'>]], dtype=object)
158 """
159 plot_backend = _get_plot_backend("matplotlib")
160 return plot_backend.scatter_matrix(
161 frame=frame,
162 alpha=alpha,
163 figsize=figsize,
164 ax=ax,
165 grid=grid,
166 diagonal=diagonal,
167 marker=marker,
168 density_kwds=density_kwds,
169 hist_kwds=hist_kwds,
170 range_padding=range_padding,
171 **kwargs,
172 )
175def radviz(
176 frame: DataFrame,
177 class_column: str,
178 ax: Axes | None = None,
179 color: list[str] | tuple[str, ...] | None = None,
180 colormap=None,
181 **kwds,
182) -> Axes:
183 """
184 Plot a multidimensional dataset in 2D.
186 Each Series in the DataFrame is represented as a evenly distributed
187 slice on a circle. Each data point is rendered in the circle according to
188 the value on each Series. Highly correlated `Series` in the `DataFrame`
189 are placed closer on the unit circle.
191 RadViz allow to project a N-dimensional data set into a 2D space where the
192 influence of each dimension can be interpreted as a balance between the
193 influence of all dimensions.
195 More info available at the `original article
196 <https://doi.org/10.1145/331770.331775>`_
197 describing RadViz.
199 Parameters
200 ----------
201 frame : `DataFrame`
202 Object holding the data.
203 class_column : str
204 Column name containing the name of the data point category.
205 ax : :class:`matplotlib.axes.Axes`, optional
206 A plot instance to which to add the information.
207 color : list[str] or tuple[str], optional
208 Assign a color to each category. Example: ['blue', 'green'].
209 colormap : str or :class:`matplotlib.colors.Colormap`, default None
210 Colormap to select colors from. If string, load colormap with that
211 name from matplotlib.
212 **kwds
213 Options to pass to matplotlib scatter plotting method.
215 Returns
216 -------
217 :class:`matplotlib.axes.Axes`
219 See Also
220 --------
221 pandas.plotting.andrews_curves : Plot clustering visualization.
223 Examples
224 --------
226 .. plot::
227 :context: close-figs
229 >>> df = pd.DataFrame(
230 ... {
231 ... 'SepalLength': [6.5, 7.7, 5.1, 5.8, 7.6, 5.0, 5.4, 4.6, 6.7, 4.6],
232 ... 'SepalWidth': [3.0, 3.8, 3.8, 2.7, 3.0, 2.3, 3.0, 3.2, 3.3, 3.6],
233 ... 'PetalLength': [5.5, 6.7, 1.9, 5.1, 6.6, 3.3, 4.5, 1.4, 5.7, 1.0],
234 ... 'PetalWidth': [1.8, 2.2, 0.4, 1.9, 2.1, 1.0, 1.5, 0.2, 2.1, 0.2],
235 ... 'Category': [
236 ... 'virginica',
237 ... 'virginica',
238 ... 'setosa',
239 ... 'virginica',
240 ... 'virginica',
241 ... 'versicolor',
242 ... 'versicolor',
243 ... 'setosa',
244 ... 'virginica',
245 ... 'setosa'
246 ... ]
247 ... }
248 ... )
249 >>> pd.plotting.radviz(df, 'Category')
250 <AxesSubplot: xlabel='y(t)', ylabel='y(t + 1)'>
251 """
252 plot_backend = _get_plot_backend("matplotlib")
253 return plot_backend.radviz(
254 frame=frame,
255 class_column=class_column,
256 ax=ax,
257 color=color,
258 colormap=colormap,
259 **kwds,
260 )
263def andrews_curves(
264 frame: DataFrame,
265 class_column: str,
266 ax: Axes | None = None,
267 samples: int = 200,
268 color: list[str] | tuple[str, ...] | None = None,
269 colormap=None,
270 **kwargs,
271) -> Axes:
272 """
273 Generate a matplotlib plot for visualising clusters of multivariate data.
275 Andrews curves have the functional form:
277 .. math::
278 f(t) = \\frac{x_1}{\\sqrt{2}} + x_2 \\sin(t) + x_3 \\cos(t) +
279 x_4 \\sin(2t) + x_5 \\cos(2t) + \\cdots
281 Where :math:`x` coefficients correspond to the values of each dimension
282 and :math:`t` is linearly spaced between :math:`-\\pi` and :math:`+\\pi`.
283 Each row of frame then corresponds to a single curve.
285 Parameters
286 ----------
287 frame : DataFrame
288 Data to be plotted, preferably normalized to (0.0, 1.0).
289 class_column : label
290 Name of the column containing class names.
291 ax : axes object, default None
292 Axes to use.
293 samples : int
294 Number of points to plot in each curve.
295 color : str, list[str] or tuple[str], optional
296 Colors to use for the different classes. Colors can be strings
297 or 3-element floating point RGB values.
298 colormap : str or matplotlib colormap object, default None
299 Colormap to select colors from. If a string, load colormap with that
300 name from matplotlib.
301 **kwargs
302 Options to pass to matplotlib plotting method.
304 Returns
305 -------
306 :class:`matplotlib.axes.Axes`
308 Examples
309 --------
311 .. plot::
312 :context: close-figs
314 >>> df = pd.read_csv(
315 ... 'https://raw.githubusercontent.com/pandas-dev/'
316 ... 'pandas/main/pandas/tests/io/data/csv/iris.csv'
317 ... )
318 >>> pd.plotting.andrews_curves(df, 'Name')
319 <AxesSubplot: title={'center': 'width'}>
320 """
321 plot_backend = _get_plot_backend("matplotlib")
322 return plot_backend.andrews_curves(
323 frame=frame,
324 class_column=class_column,
325 ax=ax,
326 samples=samples,
327 color=color,
328 colormap=colormap,
329 **kwargs,
330 )
333def bootstrap_plot(
334 series: Series,
335 fig: Figure | None = None,
336 size: int = 50,
337 samples: int = 500,
338 **kwds,
339) -> Figure:
340 """
341 Bootstrap plot on mean, median and mid-range statistics.
343 The bootstrap plot is used to estimate the uncertainty of a statistic
344 by relying on random sampling with replacement [1]_. This function will
345 generate bootstrapping plots for mean, median and mid-range statistics
346 for the given number of samples of the given size.
348 .. [1] "Bootstrapping (statistics)" in \
349 https://en.wikipedia.org/wiki/Bootstrapping_%28statistics%29
351 Parameters
352 ----------
353 series : pandas.Series
354 Series from where to get the samplings for the bootstrapping.
355 fig : matplotlib.figure.Figure, default None
356 If given, it will use the `fig` reference for plotting instead of
357 creating a new one with default parameters.
358 size : int, default 50
359 Number of data points to consider during each sampling. It must be
360 less than or equal to the length of the `series`.
361 samples : int, default 500
362 Number of times the bootstrap procedure is performed.
363 **kwds
364 Options to pass to matplotlib plotting method.
366 Returns
367 -------
368 matplotlib.figure.Figure
369 Matplotlib figure.
371 See Also
372 --------
373 pandas.DataFrame.plot : Basic plotting for DataFrame objects.
374 pandas.Series.plot : Basic plotting for Series objects.
376 Examples
377 --------
378 This example draws a basic bootstrap plot for a Series.
380 .. plot::
381 :context: close-figs
383 >>> s = pd.Series(np.random.uniform(size=100))
384 >>> pd.plotting.bootstrap_plot(s)
385 <Figure size 640x480 with 6 Axes>
386 """
387 plot_backend = _get_plot_backend("matplotlib")
388 return plot_backend.bootstrap_plot(
389 series=series, fig=fig, size=size, samples=samples, **kwds
390 )
393def parallel_coordinates(
394 frame: DataFrame,
395 class_column: str,
396 cols: list[str] | None = None,
397 ax: Axes | None = None,
398 color: list[str] | tuple[str, ...] | None = None,
399 use_columns: bool = False,
400 xticks: list | tuple | None = None,
401 colormap=None,
402 axvlines: bool = True,
403 axvlines_kwds=None,
404 sort_labels: bool = False,
405 **kwargs,
406) -> Axes:
407 """
408 Parallel coordinates plotting.
410 Parameters
411 ----------
412 frame : DataFrame
413 class_column : str
414 Column name containing class names.
415 cols : list, optional
416 A list of column names to use.
417 ax : matplotlib.axis, optional
418 Matplotlib axis object.
419 color : list or tuple, optional
420 Colors to use for the different classes.
421 use_columns : bool, optional
422 If true, columns will be used as xticks.
423 xticks : list or tuple, optional
424 A list of values to use for xticks.
425 colormap : str or matplotlib colormap, default None
426 Colormap to use for line colors.
427 axvlines : bool, optional
428 If true, vertical lines will be added at each xtick.
429 axvlines_kwds : keywords, optional
430 Options to be passed to axvline method for vertical lines.
431 sort_labels : bool, default False
432 Sort class_column labels, useful when assigning colors.
433 **kwargs
434 Options to pass to matplotlib plotting method.
436 Returns
437 -------
438 matplotlib.axes.Axes
440 Examples
441 --------
443 .. plot::
444 :context: close-figs
446 >>> df = pd.read_csv(
447 ... 'https://raw.githubusercontent.com/pandas-dev/'
448 ... 'pandas/main/pandas/tests/io/data/csv/iris.csv'
449 ... )
450 >>> pd.plotting.parallel_coordinates(
451 ... df, 'Name', color=('#556270', '#4ECDC4', '#C7F464')
452 ... )
453 <AxesSubplot: xlabel='y(t)', ylabel='y(t + 1)'>
454 """
455 plot_backend = _get_plot_backend("matplotlib")
456 return plot_backend.parallel_coordinates(
457 frame=frame,
458 class_column=class_column,
459 cols=cols,
460 ax=ax,
461 color=color,
462 use_columns=use_columns,
463 xticks=xticks,
464 colormap=colormap,
465 axvlines=axvlines,
466 axvlines_kwds=axvlines_kwds,
467 sort_labels=sort_labels,
468 **kwargs,
469 )
472def lag_plot(series: Series, lag: int = 1, ax: Axes | None = None, **kwds) -> Axes:
473 """
474 Lag plot for time series.
476 Parameters
477 ----------
478 series : Series
479 The time series to visualize.
480 lag : int, default 1
481 Lag length of the scatter plot.
482 ax : Matplotlib axis object, optional
483 The matplotlib axis object to use.
484 **kwds
485 Matplotlib scatter method keyword arguments.
487 Returns
488 -------
489 matplotlib.axes.Axes
491 Examples
492 --------
493 Lag plots are most commonly used to look for patterns in time series data.
495 Given the following time series
497 .. plot::
498 :context: close-figs
500 >>> np.random.seed(5)
501 >>> x = np.cumsum(np.random.normal(loc=1, scale=5, size=50))
502 >>> s = pd.Series(x)
503 >>> s.plot()
504 <AxesSubplot: xlabel='Midrange'>
506 A lag plot with ``lag=1`` returns
508 .. plot::
509 :context: close-figs
511 >>> pd.plotting.lag_plot(s, lag=1)
512 <AxesSubplot: xlabel='y(t)', ylabel='y(t + 1)'>
513 """
514 plot_backend = _get_plot_backend("matplotlib")
515 return plot_backend.lag_plot(series=series, lag=lag, ax=ax, **kwds)
518def autocorrelation_plot(series: Series, ax: Axes | None = None, **kwargs) -> Axes:
519 """
520 Autocorrelation plot for time series.
522 Parameters
523 ----------
524 series : Series
525 The time series to visualize.
526 ax : Matplotlib axis object, optional
527 The matplotlib axis object to use.
528 **kwargs
529 Options to pass to matplotlib plotting method.
531 Returns
532 -------
533 matplotlib.axes.Axes
535 Examples
536 --------
537 The horizontal lines in the plot correspond to 95% and 99% confidence bands.
539 The dashed line is 99% confidence band.
541 .. plot::
542 :context: close-figs
544 >>> spacing = np.linspace(-9 * np.pi, 9 * np.pi, num=1000)
545 >>> s = pd.Series(0.7 * np.random.rand(1000) + 0.3 * np.sin(spacing))
546 >>> pd.plotting.autocorrelation_plot(s)
547 <AxesSubplot: title={'center': 'width'}, xlabel='Lag', ylabel='Autocorrelation'>
548 """
549 plot_backend = _get_plot_backend("matplotlib")
550 return plot_backend.autocorrelation_plot(series=series, ax=ax, **kwargs)
553class _Options(dict):
554 """
555 Stores pandas plotting options.
557 Allows for parameter aliasing so you can just use parameter names that are
558 the same as the plot function parameters, but is stored in a canonical
559 format that makes it easy to breakdown into groups later.
560 """
562 # alias so the names are same as plotting method parameter names
563 _ALIASES = {"x_compat": "xaxis.compat"}
564 _DEFAULT_KEYS = ["xaxis.compat"]
566 def __init__(self, deprecated: bool = False) -> None:
567 self._deprecated = deprecated
568 super().__setitem__("xaxis.compat", False)
570 def __getitem__(self, key):
571 key = self._get_canonical_key(key)
572 if key not in self:
573 raise ValueError(f"{key} is not a valid pandas plotting option")
574 return super().__getitem__(key)
576 def __setitem__(self, key, value) -> None:
577 key = self._get_canonical_key(key)
578 super().__setitem__(key, value)
580 def __delitem__(self, key) -> None:
581 key = self._get_canonical_key(key)
582 if key in self._DEFAULT_KEYS:
583 raise ValueError(f"Cannot remove default parameter {key}")
584 super().__delitem__(key)
586 def __contains__(self, key) -> bool:
587 key = self._get_canonical_key(key)
588 return super().__contains__(key)
590 def reset(self) -> None:
591 """
592 Reset the option store to its initial state
594 Returns
595 -------
596 None
597 """
598 # error: Cannot access "__init__" directly
599 self.__init__() # type: ignore[misc]
601 def _get_canonical_key(self, key):
602 return self._ALIASES.get(key, key)
604 @contextmanager
605 def use(self, key, value) -> Generator[_Options, None, None]:
606 """
607 Temporarily set a parameter value using the with statement.
608 Aliasing allowed.
609 """
610 old_value = self[key]
611 try:
612 self[key] = value
613 yield self
614 finally:
615 self[key] = old_value
618plot_params = _Options()