Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/plotting/_misc.py: 39%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

76 statements  

1from __future__ import annotations 

2 

3from contextlib import contextmanager 

4from typing import ( 

5 TYPE_CHECKING, 

6 Any, 

7) 

8 

9from pandas.plotting._core import _get_plot_backend 

10 

11if TYPE_CHECKING: 

12 from collections.abc import ( 

13 Generator, 

14 Mapping, 

15 ) 

16 

17 from matplotlib.axes import Axes 

18 from matplotlib.colors import Colormap 

19 from matplotlib.figure import Figure 

20 from matplotlib.table import Table 

21 import numpy as np 

22 

23 from pandas import ( 

24 DataFrame, 

25 Series, 

26 ) 

27 

28 

29def table(ax: Axes, data: DataFrame | Series, **kwargs) -> Table: 

30 """ 

31 Helper function to convert DataFrame and Series to matplotlib.table. 

32 

33 Parameters 

34 ---------- 

35 ax : Matplotlib axes object 

36 data : DataFrame or Series 

37 Data for table contents. 

38 **kwargs 

39 Keyword arguments to be passed to matplotlib.table.table. 

40 If `rowLabels` or `colLabels` is not specified, data index or column 

41 name will be used. 

42 

43 Returns 

44 ------- 

45 matplotlib table object 

46 

47 Examples 

48 -------- 

49 

50 .. plot:: 

51 :context: close-figs 

52 

53 >>> import matplotlib.pyplot as plt 

54 >>> df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]}) 

55 >>> fix, ax = plt.subplots() 

56 >>> ax.axis('off') 

57 (0.0, 1.0, 0.0, 1.0) 

58 >>> table = pd.plotting.table(ax, df, loc='center', 

59 ... cellLoc='center', colWidths=list([.2, .2])) 

60 """ 

61 plot_backend = _get_plot_backend("matplotlib") 

62 return plot_backend.table( 

63 ax=ax, data=data, rowLabels=None, colLabels=None, **kwargs 

64 ) 

65 

66 

67def register() -> None: 

68 """ 

69 Register pandas formatters and converters with matplotlib. 

70 

71 This function modifies the global ``matplotlib.units.registry`` 

72 dictionary. pandas adds custom converters for 

73 

74 * pd.Timestamp 

75 * pd.Period 

76 * np.datetime64 

77 * datetime.datetime 

78 * datetime.date 

79 * datetime.time 

80 

81 See Also 

82 -------- 

83 deregister_matplotlib_converters : Remove pandas formatters and converters. 

84 

85 Examples 

86 -------- 

87 .. plot:: 

88 :context: close-figs 

89 

90 The following line is done automatically by pandas so 

91 the plot can be rendered: 

92 

93 >>> pd.plotting.register_matplotlib_converters() 

94 

95 >>> df = pd.DataFrame({'ts': pd.period_range('2020', periods=2, freq='M'), 

96 ... 'y': [1, 2] 

97 ... }) 

98 >>> plot = df.plot.line(x='ts', y='y') 

99 

100 Unsetting the register manually an error will be raised: 

101 

102 >>> pd.set_option("plotting.matplotlib.register_converters", 

103 ... False) # doctest: +SKIP 

104 >>> df.plot.line(x='ts', y='y') # doctest: +SKIP 

105 Traceback (most recent call last): 

106 TypeError: float() argument must be a string or a real number, not 'Period' 

107 """ 

108 plot_backend = _get_plot_backend("matplotlib") 

109 plot_backend.register() 

110 

111 

112def deregister() -> None: 

113 """ 

114 Remove pandas formatters and converters. 

115 

116 Removes the custom converters added by :func:`register`. This 

117 attempts to set the state of the registry back to the state before 

118 pandas registered its own units. Converters for pandas' own types like 

119 Timestamp and Period are removed completely. Converters for types 

120 pandas overwrites, like ``datetime.datetime``, are restored to their 

121 original value. 

122 

123 See Also 

124 -------- 

125 register_matplotlib_converters : Register pandas formatters and converters 

126 with matplotlib. 

127 

128 Examples 

129 -------- 

130 .. plot:: 

131 :context: close-figs 

132 

133 The following line is done automatically by pandas so 

134 the plot can be rendered: 

135 

136 >>> pd.plotting.register_matplotlib_converters() 

137 

138 >>> df = pd.DataFrame({'ts': pd.period_range('2020', periods=2, freq='M'), 

139 ... 'y': [1, 2] 

140 ... }) 

141 >>> plot = df.plot.line(x='ts', y='y') 

142 

143 Unsetting the register manually an error will be raised: 

144 

145 >>> pd.set_option("plotting.matplotlib.register_converters", 

146 ... False) # doctest: +SKIP 

147 >>> df.plot.line(x='ts', y='y') # doctest: +SKIP 

148 Traceback (most recent call last): 

149 TypeError: float() argument must be a string or a real number, not 'Period' 

150 """ 

151 plot_backend = _get_plot_backend("matplotlib") 

152 plot_backend.deregister() 

153 

154 

155def scatter_matrix( 

156 frame: DataFrame, 

157 alpha: float = 0.5, 

158 figsize: tuple[float, float] | None = None, 

159 ax: Axes | None = None, 

160 grid: bool = False, 

161 diagonal: str = "hist", 

162 marker: str = ".", 

163 density_kwds: Mapping[str, Any] | None = None, 

164 hist_kwds: Mapping[str, Any] | None = None, 

165 range_padding: float = 0.05, 

166 **kwargs, 

167) -> np.ndarray: 

168 """ 

169 Draw a matrix of scatter plots. 

170 

171 Parameters 

172 ---------- 

173 frame : DataFrame 

174 alpha : float, optional 

175 Amount of transparency applied. 

176 figsize : (float,float), optional 

177 A tuple (width, height) in inches. 

178 ax : Matplotlib axis object, optional 

179 grid : bool, optional 

180 Setting this to True will show the grid. 

181 diagonal : {'hist', 'kde'} 

182 Pick between 'kde' and 'hist' for either Kernel Density Estimation or 

183 Histogram plot in the diagonal. 

184 marker : str, optional 

185 Matplotlib marker type, default '.'. 

186 density_kwds : keywords 

187 Keyword arguments to be passed to kernel density estimate plot. 

188 hist_kwds : keywords 

189 Keyword arguments to be passed to hist function. 

190 range_padding : float, default 0.05 

191 Relative extension of axis range in x and y with respect to 

192 (x_max - x_min) or (y_max - y_min). 

193 **kwargs 

194 Keyword arguments to be passed to scatter function. 

195 

196 Returns 

197 ------- 

198 numpy.ndarray 

199 A matrix of scatter plots. 

200 

201 Examples 

202 -------- 

203 

204 .. plot:: 

205 :context: close-figs 

206 

207 >>> df = pd.DataFrame(np.random.randn(1000, 4), columns=['A','B','C','D']) 

208 >>> pd.plotting.scatter_matrix(df, alpha=0.2) 

209 array([[<Axes: xlabel='A', ylabel='A'>, <Axes: xlabel='B', ylabel='A'>, 

210 <Axes: xlabel='C', ylabel='A'>, <Axes: xlabel='D', ylabel='A'>], 

211 [<Axes: xlabel='A', ylabel='B'>, <Axes: xlabel='B', ylabel='B'>, 

212 <Axes: xlabel='C', ylabel='B'>, <Axes: xlabel='D', ylabel='B'>], 

213 [<Axes: xlabel='A', ylabel='C'>, <Axes: xlabel='B', ylabel='C'>, 

214 <Axes: xlabel='C', ylabel='C'>, <Axes: xlabel='D', ylabel='C'>], 

215 [<Axes: xlabel='A', ylabel='D'>, <Axes: xlabel='B', ylabel='D'>, 

216 <Axes: xlabel='C', ylabel='D'>, <Axes: xlabel='D', ylabel='D'>]], 

217 dtype=object) 

218 """ 

219 plot_backend = _get_plot_backend("matplotlib") 

220 return plot_backend.scatter_matrix( 

221 frame=frame, 

222 alpha=alpha, 

223 figsize=figsize, 

224 ax=ax, 

225 grid=grid, 

226 diagonal=diagonal, 

227 marker=marker, 

228 density_kwds=density_kwds, 

229 hist_kwds=hist_kwds, 

230 range_padding=range_padding, 

231 **kwargs, 

232 ) 

233 

234 

235def radviz( 

236 frame: DataFrame, 

237 class_column: str, 

238 ax: Axes | None = None, 

239 color: list[str] | tuple[str, ...] | None = None, 

240 colormap: Colormap | str | None = None, 

241 **kwds, 

242) -> Axes: 

243 """ 

244 Plot a multidimensional dataset in 2D. 

245 

246 Each Series in the DataFrame is represented as a evenly distributed 

247 slice on a circle. Each data point is rendered in the circle according to 

248 the value on each Series. Highly correlated `Series` in the `DataFrame` 

249 are placed closer on the unit circle. 

250 

251 RadViz allow to project a N-dimensional data set into a 2D space where the 

252 influence of each dimension can be interpreted as a balance between the 

253 influence of all dimensions. 

254 

255 More info available at the `original article 

256 <https://doi.org/10.1145/331770.331775>`_ 

257 describing RadViz. 

258 

259 Parameters 

260 ---------- 

261 frame : `DataFrame` 

262 Object holding the data. 

263 class_column : str 

264 Column name containing the name of the data point category. 

265 ax : :class:`matplotlib.axes.Axes`, optional 

266 A plot instance to which to add the information. 

267 color : list[str] or tuple[str], optional 

268 Assign a color to each category. Example: ['blue', 'green']. 

269 colormap : str or :class:`matplotlib.colors.Colormap`, default None 

270 Colormap to select colors from. If string, load colormap with that 

271 name from matplotlib. 

272 **kwds 

273 Options to pass to matplotlib scatter plotting method. 

274 

275 Returns 

276 ------- 

277 :class:`matplotlib.axes.Axes` 

278 

279 See Also 

280 -------- 

281 pandas.plotting.andrews_curves : Plot clustering visualization. 

282 

283 Examples 

284 -------- 

285 

286 .. plot:: 

287 :context: close-figs 

288 

289 >>> df = pd.DataFrame( 

290 ... { 

291 ... 'SepalLength': [6.5, 7.7, 5.1, 5.8, 7.6, 5.0, 5.4, 4.6, 6.7, 4.6], 

292 ... 'SepalWidth': [3.0, 3.8, 3.8, 2.7, 3.0, 2.3, 3.0, 3.2, 3.3, 3.6], 

293 ... 'PetalLength': [5.5, 6.7, 1.9, 5.1, 6.6, 3.3, 4.5, 1.4, 5.7, 1.0], 

294 ... 'PetalWidth': [1.8, 2.2, 0.4, 1.9, 2.1, 1.0, 1.5, 0.2, 2.1, 0.2], 

295 ... 'Category': [ 

296 ... 'virginica', 

297 ... 'virginica', 

298 ... 'setosa', 

299 ... 'virginica', 

300 ... 'virginica', 

301 ... 'versicolor', 

302 ... 'versicolor', 

303 ... 'setosa', 

304 ... 'virginica', 

305 ... 'setosa' 

306 ... ] 

307 ... } 

308 ... ) 

309 >>> pd.plotting.radviz(df, 'Category') # doctest: +SKIP 

310 """ 

311 plot_backend = _get_plot_backend("matplotlib") 

312 return plot_backend.radviz( 

313 frame=frame, 

314 class_column=class_column, 

315 ax=ax, 

316 color=color, 

317 colormap=colormap, 

318 **kwds, 

319 ) 

320 

321 

322def andrews_curves( 

323 frame: DataFrame, 

324 class_column: str, 

325 ax: Axes | None = None, 

326 samples: int = 200, 

327 color: list[str] | tuple[str, ...] | None = None, 

328 colormap: Colormap | str | None = None, 

329 **kwargs, 

330) -> Axes: 

331 """ 

332 Generate a matplotlib plot for visualizing clusters of multivariate data. 

333 

334 Andrews curves have the functional form: 

335 

336 .. math:: 

337 f(t) = \\frac{x_1}{\\sqrt{2}} + x_2 \\sin(t) + x_3 \\cos(t) + 

338 x_4 \\sin(2t) + x_5 \\cos(2t) + \\cdots 

339 

340 Where :math:`x` coefficients correspond to the values of each dimension 

341 and :math:`t` is linearly spaced between :math:`-\\pi` and :math:`+\\pi`. 

342 Each row of frame then corresponds to a single curve. 

343 

344 Parameters 

345 ---------- 

346 frame : DataFrame 

347 Data to be plotted, preferably normalized to (0.0, 1.0). 

348 class_column : label 

349 Name of the column containing class names. 

350 ax : axes object, default None 

351 Axes to use. 

352 samples : int 

353 Number of points to plot in each curve. 

354 color : str, list[str] or tuple[str], optional 

355 Colors to use for the different classes. Colors can be strings 

356 or 3-element floating point RGB values. 

357 colormap : str or matplotlib colormap object, default None 

358 Colormap to select colors from. If a string, load colormap with that 

359 name from matplotlib. 

360 **kwargs 

361 Options to pass to matplotlib plotting method. 

362 

363 Returns 

364 ------- 

365 :class:`matplotlib.axes.Axes` 

366 

367 Examples 

368 -------- 

369 

370 .. plot:: 

371 :context: close-figs 

372 

373 >>> df = pd.read_csv( 

374 ... 'https://raw.githubusercontent.com/pandas-dev/' 

375 ... 'pandas/main/pandas/tests/io/data/csv/iris.csv' 

376 ... ) 

377 >>> pd.plotting.andrews_curves(df, 'Name') # doctest: +SKIP 

378 """ 

379 plot_backend = _get_plot_backend("matplotlib") 

380 return plot_backend.andrews_curves( 

381 frame=frame, 

382 class_column=class_column, 

383 ax=ax, 

384 samples=samples, 

385 color=color, 

386 colormap=colormap, 

387 **kwargs, 

388 ) 

389 

390 

391def bootstrap_plot( 

392 series: Series, 

393 fig: Figure | None = None, 

394 size: int = 50, 

395 samples: int = 500, 

396 **kwds, 

397) -> Figure: 

398 """ 

399 Bootstrap plot on mean, median and mid-range statistics. 

400 

401 The bootstrap plot is used to estimate the uncertainty of a statistic 

402 by relying on random sampling with replacement [1]_. This function will 

403 generate bootstrapping plots for mean, median and mid-range statistics 

404 for the given number of samples of the given size. 

405 

406 .. [1] "Bootstrapping (statistics)" in \ 

407 https://en.wikipedia.org/wiki/Bootstrapping_%28statistics%29 

408 

409 Parameters 

410 ---------- 

411 series : pandas.Series 

412 Series from where to get the samplings for the bootstrapping. 

413 fig : matplotlib.figure.Figure, default None 

414 If given, it will use the `fig` reference for plotting instead of 

415 creating a new one with default parameters. 

416 size : int, default 50 

417 Number of data points to consider during each sampling. It must be 

418 less than or equal to the length of the `series`. 

419 samples : int, default 500 

420 Number of times the bootstrap procedure is performed. 

421 **kwds 

422 Options to pass to matplotlib plotting method. 

423 

424 Returns 

425 ------- 

426 matplotlib.figure.Figure 

427 Matplotlib figure. 

428 

429 See Also 

430 -------- 

431 pandas.DataFrame.plot : Basic plotting for DataFrame objects. 

432 pandas.Series.plot : Basic plotting for Series objects. 

433 

434 Examples 

435 -------- 

436 This example draws a basic bootstrap plot for a Series. 

437 

438 .. plot:: 

439 :context: close-figs 

440 

441 >>> s = pd.Series(np.random.uniform(size=100)) 

442 >>> pd.plotting.bootstrap_plot(s) # doctest: +SKIP 

443 <Figure size 640x480 with 6 Axes> 

444 """ 

445 plot_backend = _get_plot_backend("matplotlib") 

446 return plot_backend.bootstrap_plot( 

447 series=series, fig=fig, size=size, samples=samples, **kwds 

448 ) 

449 

450 

451def parallel_coordinates( 

452 frame: DataFrame, 

453 class_column: str, 

454 cols: list[str] | None = None, 

455 ax: Axes | None = None, 

456 color: list[str] | tuple[str, ...] | None = None, 

457 use_columns: bool = False, 

458 xticks: list | tuple | None = None, 

459 colormap: Colormap | str | None = None, 

460 axvlines: bool = True, 

461 axvlines_kwds: Mapping[str, Any] | None = None, 

462 sort_labels: bool = False, 

463 **kwargs, 

464) -> Axes: 

465 """ 

466 Parallel coordinates plotting. 

467 

468 Parameters 

469 ---------- 

470 frame : DataFrame 

471 class_column : str 

472 Column name containing class names. 

473 cols : list, optional 

474 A list of column names to use. 

475 ax : matplotlib.axis, optional 

476 Matplotlib axis object. 

477 color : list or tuple, optional 

478 Colors to use for the different classes. 

479 use_columns : bool, optional 

480 If true, columns will be used as xticks. 

481 xticks : list or tuple, optional 

482 A list of values to use for xticks. 

483 colormap : str or matplotlib colormap, default None 

484 Colormap to use for line colors. 

485 axvlines : bool, optional 

486 If true, vertical lines will be added at each xtick. 

487 axvlines_kwds : keywords, optional 

488 Options to be passed to axvline method for vertical lines. 

489 sort_labels : bool, default False 

490 Sort class_column labels, useful when assigning colors. 

491 **kwargs 

492 Options to pass to matplotlib plotting method. 

493 

494 Returns 

495 ------- 

496 matplotlib.axes.Axes 

497 

498 Examples 

499 -------- 

500 

501 .. plot:: 

502 :context: close-figs 

503 

504 >>> df = pd.read_csv( 

505 ... 'https://raw.githubusercontent.com/pandas-dev/' 

506 ... 'pandas/main/pandas/tests/io/data/csv/iris.csv' 

507 ... ) 

508 >>> pd.plotting.parallel_coordinates( 

509 ... df, 'Name', color=('#556270', '#4ECDC4', '#C7F464') 

510 ... ) # doctest: +SKIP 

511 """ 

512 plot_backend = _get_plot_backend("matplotlib") 

513 return plot_backend.parallel_coordinates( 

514 frame=frame, 

515 class_column=class_column, 

516 cols=cols, 

517 ax=ax, 

518 color=color, 

519 use_columns=use_columns, 

520 xticks=xticks, 

521 colormap=colormap, 

522 axvlines=axvlines, 

523 axvlines_kwds=axvlines_kwds, 

524 sort_labels=sort_labels, 

525 **kwargs, 

526 ) 

527 

528 

529def lag_plot(series: Series, lag: int = 1, ax: Axes | None = None, **kwds) -> Axes: 

530 """ 

531 Lag plot for time series. 

532 

533 Parameters 

534 ---------- 

535 series : Series 

536 The time series to visualize. 

537 lag : int, default 1 

538 Lag length of the scatter plot. 

539 ax : Matplotlib axis object, optional 

540 The matplotlib axis object to use. 

541 **kwds 

542 Matplotlib scatter method keyword arguments. 

543 

544 Returns 

545 ------- 

546 matplotlib.axes.Axes 

547 

548 Examples 

549 -------- 

550 Lag plots are most commonly used to look for patterns in time series data. 

551 

552 Given the following time series 

553 

554 .. plot:: 

555 :context: close-figs 

556 

557 >>> np.random.seed(5) 

558 >>> x = np.cumsum(np.random.normal(loc=1, scale=5, size=50)) 

559 >>> s = pd.Series(x) 

560 >>> s.plot() # doctest: +SKIP 

561 

562 A lag plot with ``lag=1`` returns 

563 

564 .. plot:: 

565 :context: close-figs 

566 

567 >>> pd.plotting.lag_plot(s, lag=1) 

568 <Axes: xlabel='y(t)', ylabel='y(t + 1)'> 

569 """ 

570 plot_backend = _get_plot_backend("matplotlib") 

571 return plot_backend.lag_plot(series=series, lag=lag, ax=ax, **kwds) 

572 

573 

574def autocorrelation_plot(series: Series, ax: Axes | None = None, **kwargs) -> Axes: 

575 """ 

576 Autocorrelation plot for time series. 

577 

578 Parameters 

579 ---------- 

580 series : Series 

581 The time series to visualize. 

582 ax : Matplotlib axis object, optional 

583 The matplotlib axis object to use. 

584 **kwargs 

585 Options to pass to matplotlib plotting method. 

586 

587 Returns 

588 ------- 

589 matplotlib.axes.Axes 

590 

591 Examples 

592 -------- 

593 The horizontal lines in the plot correspond to 95% and 99% confidence bands. 

594 

595 The dashed line is 99% confidence band. 

596 

597 .. plot:: 

598 :context: close-figs 

599 

600 >>> spacing = np.linspace(-9 * np.pi, 9 * np.pi, num=1000) 

601 >>> s = pd.Series(0.7 * np.random.rand(1000) + 0.3 * np.sin(spacing)) 

602 >>> pd.plotting.autocorrelation_plot(s) # doctest: +SKIP 

603 """ 

604 plot_backend = _get_plot_backend("matplotlib") 

605 return plot_backend.autocorrelation_plot(series=series, ax=ax, **kwargs) 

606 

607 

608class _Options(dict): 

609 """ 

610 Stores pandas plotting options. 

611 

612 Allows for parameter aliasing so you can just use parameter names that are 

613 the same as the plot function parameters, but is stored in a canonical 

614 format that makes it easy to breakdown into groups later. 

615 

616 Examples 

617 -------- 

618 

619 .. plot:: 

620 :context: close-figs 

621 

622 >>> np.random.seed(42) 

623 >>> df = pd.DataFrame({'A': np.random.randn(10), 

624 ... 'B': np.random.randn(10)}, 

625 ... index=pd.date_range("1/1/2000", 

626 ... freq='4MS', periods=10)) 

627 >>> with pd.plotting.plot_params.use("x_compat", True): 

628 ... _ = df["A"].plot(color="r") 

629 ... _ = df["B"].plot(color="g") 

630 """ 

631 

632 # alias so the names are same as plotting method parameter names 

633 _ALIASES = {"x_compat": "xaxis.compat"} 

634 _DEFAULT_KEYS = ["xaxis.compat"] 

635 

636 def __init__(self, deprecated: bool = False) -> None: 

637 self._deprecated = deprecated 

638 super().__setitem__("xaxis.compat", False) 

639 

640 def __getitem__(self, key): 

641 key = self._get_canonical_key(key) 

642 if key not in self: 

643 raise ValueError(f"{key} is not a valid pandas plotting option") 

644 return super().__getitem__(key) 

645 

646 def __setitem__(self, key, value) -> None: 

647 key = self._get_canonical_key(key) 

648 super().__setitem__(key, value) 

649 

650 def __delitem__(self, key) -> None: 

651 key = self._get_canonical_key(key) 

652 if key in self._DEFAULT_KEYS: 

653 raise ValueError(f"Cannot remove default parameter {key}") 

654 super().__delitem__(key) 

655 

656 def __contains__(self, key) -> bool: 

657 key = self._get_canonical_key(key) 

658 return super().__contains__(key) 

659 

660 def reset(self) -> None: 

661 """ 

662 Reset the option store to its initial state 

663 

664 Returns 

665 ------- 

666 None 

667 """ 

668 # error: Cannot access "__init__" directly 

669 self.__init__() # type: ignore[misc] 

670 

671 def _get_canonical_key(self, key): 

672 return self._ALIASES.get(key, key) 

673 

674 @contextmanager 

675 def use(self, key, value) -> Generator[_Options, None, None]: 

676 """ 

677 Temporarily set a parameter value using the with statement. 

678 Aliasing allowed. 

679 """ 

680 old_value = self[key] 

681 try: 

682 self[key] = value 

683 yield self 

684 finally: 

685 self[key] = old_value 

686 

687 

688plot_params = _Options()