Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/plotting/_misc.py: 41%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

73 statements  

1from __future__ import annotations 

2 

3from contextlib import contextmanager 

4from typing import ( 

5 TYPE_CHECKING, 

6 Generator, 

7) 

8 

9from pandas.plotting._core import _get_plot_backend 

10 

11if TYPE_CHECKING: 

12 from matplotlib.axes import Axes 

13 from matplotlib.figure import Figure 

14 import numpy as np 

15 

16 from pandas import ( 

17 DataFrame, 

18 Series, 

19 ) 

20 

21 

22def table(ax, data, **kwargs): 

23 """ 

24 Helper function to convert DataFrame and Series to matplotlib.table. 

25 

26 Parameters 

27 ---------- 

28 ax : Matplotlib axes object 

29 data : DataFrame or Series 

30 Data for table contents. 

31 **kwargs 

32 Keyword arguments to be passed to matplotlib.table.table. 

33 If `rowLabels` or `colLabels` is not specified, data index or column 

34 name will be used. 

35 

36 Returns 

37 ------- 

38 matplotlib table object 

39 """ 

40 plot_backend = _get_plot_backend("matplotlib") 

41 return plot_backend.table( 

42 ax=ax, data=data, rowLabels=None, colLabels=None, **kwargs 

43 ) 

44 

45 

46def register() -> None: 

47 """ 

48 Register pandas formatters and converters with matplotlib. 

49 

50 This function modifies the global ``matplotlib.units.registry`` 

51 dictionary. pandas adds custom converters for 

52 

53 * pd.Timestamp 

54 * pd.Period 

55 * np.datetime64 

56 * datetime.datetime 

57 * datetime.date 

58 * datetime.time 

59 

60 See Also 

61 -------- 

62 deregister_matplotlib_converters : Remove pandas formatters and converters. 

63 """ 

64 plot_backend = _get_plot_backend("matplotlib") 

65 plot_backend.register() 

66 

67 

68def deregister() -> None: 

69 """ 

70 Remove pandas formatters and converters. 

71 

72 Removes the custom converters added by :func:`register`. This 

73 attempts to set the state of the registry back to the state before 

74 pandas registered its own units. Converters for pandas' own types like 

75 Timestamp and Period are removed completely. Converters for types 

76 pandas overwrites, like ``datetime.datetime``, are restored to their 

77 original value. 

78 

79 See Also 

80 -------- 

81 register_matplotlib_converters : Register pandas formatters and converters 

82 with matplotlib. 

83 """ 

84 plot_backend = _get_plot_backend("matplotlib") 

85 plot_backend.deregister() 

86 

87 

88def scatter_matrix( 

89 frame: DataFrame, 

90 alpha: float = 0.5, 

91 figsize: tuple[float, float] | None = None, 

92 ax: Axes | None = None, 

93 grid: bool = False, 

94 diagonal: str = "hist", 

95 marker: str = ".", 

96 density_kwds=None, 

97 hist_kwds=None, 

98 range_padding: float = 0.05, 

99 **kwargs, 

100) -> np.ndarray: 

101 """ 

102 Draw a matrix of scatter plots. 

103 

104 Parameters 

105 ---------- 

106 frame : DataFrame 

107 alpha : float, optional 

108 Amount of transparency applied. 

109 figsize : (float,float), optional 

110 A tuple (width, height) in inches. 

111 ax : Matplotlib axis object, optional 

112 grid : bool, optional 

113 Setting this to True will show the grid. 

114 diagonal : {'hist', 'kde'} 

115 Pick between 'kde' and 'hist' for either Kernel Density Estimation or 

116 Histogram plot in the diagonal. 

117 marker : str, optional 

118 Matplotlib marker type, default '.'. 

119 density_kwds : keywords 

120 Keyword arguments to be passed to kernel density estimate plot. 

121 hist_kwds : keywords 

122 Keyword arguments to be passed to hist function. 

123 range_padding : float, default 0.05 

124 Relative extension of axis range in x and y with respect to 

125 (x_max - x_min) or (y_max - y_min). 

126 **kwargs 

127 Keyword arguments to be passed to scatter function. 

128 

129 Returns 

130 ------- 

131 numpy.ndarray 

132 A matrix of scatter plots. 

133 

134 Examples 

135 -------- 

136 

137 .. plot:: 

138 :context: close-figs 

139 

140 >>> df = pd.DataFrame(np.random.randn(1000, 4), columns=['A','B','C','D']) 

141 >>> pd.plotting.scatter_matrix(df, alpha=0.2) 

142 array([[<AxesSubplot: xlabel='A', ylabel='A'>, 

143 <AxesSubplot: xlabel='B', ylabel='A'>, 

144 <AxesSubplot: xlabel='C', ylabel='A'>, 

145 <AxesSubplot: xlabel='D', ylabel='A'>], 

146 [<AxesSubplot: xlabel='A', ylabel='B'>, 

147 <AxesSubplot: xlabel='B', ylabel='B'>, 

148 <AxesSubplot: xlabel='C', ylabel='B'>, 

149 <AxesSubplot: xlabel='D', ylabel='B'>], 

150 [<AxesSubplot: xlabel='A', ylabel='C'>, 

151 <AxesSubplot: xlabel='B', ylabel='C'>, 

152 <AxesSubplot: xlabel='C', ylabel='C'>, 

153 <AxesSubplot: xlabel='D', ylabel='C'>], 

154 [<AxesSubplot: xlabel='A', ylabel='D'>, 

155 <AxesSubplot: xlabel='B', ylabel='D'>, 

156 <AxesSubplot: xlabel='C', ylabel='D'>, 

157 <AxesSubplot: xlabel='D', ylabel='D'>]], dtype=object) 

158 """ 

159 plot_backend = _get_plot_backend("matplotlib") 

160 return plot_backend.scatter_matrix( 

161 frame=frame, 

162 alpha=alpha, 

163 figsize=figsize, 

164 ax=ax, 

165 grid=grid, 

166 diagonal=diagonal, 

167 marker=marker, 

168 density_kwds=density_kwds, 

169 hist_kwds=hist_kwds, 

170 range_padding=range_padding, 

171 **kwargs, 

172 ) 

173 

174 

175def radviz( 

176 frame: DataFrame, 

177 class_column: str, 

178 ax: Axes | None = None, 

179 color: list[str] | tuple[str, ...] | None = None, 

180 colormap=None, 

181 **kwds, 

182) -> Axes: 

183 """ 

184 Plot a multidimensional dataset in 2D. 

185 

186 Each Series in the DataFrame is represented as a evenly distributed 

187 slice on a circle. Each data point is rendered in the circle according to 

188 the value on each Series. Highly correlated `Series` in the `DataFrame` 

189 are placed closer on the unit circle. 

190 

191 RadViz allow to project a N-dimensional data set into a 2D space where the 

192 influence of each dimension can be interpreted as a balance between the 

193 influence of all dimensions. 

194 

195 More info available at the `original article 

196 <https://doi.org/10.1145/331770.331775>`_ 

197 describing RadViz. 

198 

199 Parameters 

200 ---------- 

201 frame : `DataFrame` 

202 Object holding the data. 

203 class_column : str 

204 Column name containing the name of the data point category. 

205 ax : :class:`matplotlib.axes.Axes`, optional 

206 A plot instance to which to add the information. 

207 color : list[str] or tuple[str], optional 

208 Assign a color to each category. Example: ['blue', 'green']. 

209 colormap : str or :class:`matplotlib.colors.Colormap`, default None 

210 Colormap to select colors from. If string, load colormap with that 

211 name from matplotlib. 

212 **kwds 

213 Options to pass to matplotlib scatter plotting method. 

214 

215 Returns 

216 ------- 

217 :class:`matplotlib.axes.Axes` 

218 

219 See Also 

220 -------- 

221 pandas.plotting.andrews_curves : Plot clustering visualization. 

222 

223 Examples 

224 -------- 

225 

226 .. plot:: 

227 :context: close-figs 

228 

229 >>> df = pd.DataFrame( 

230 ... { 

231 ... 'SepalLength': [6.5, 7.7, 5.1, 5.8, 7.6, 5.0, 5.4, 4.6, 6.7, 4.6], 

232 ... 'SepalWidth': [3.0, 3.8, 3.8, 2.7, 3.0, 2.3, 3.0, 3.2, 3.3, 3.6], 

233 ... 'PetalLength': [5.5, 6.7, 1.9, 5.1, 6.6, 3.3, 4.5, 1.4, 5.7, 1.0], 

234 ... 'PetalWidth': [1.8, 2.2, 0.4, 1.9, 2.1, 1.0, 1.5, 0.2, 2.1, 0.2], 

235 ... 'Category': [ 

236 ... 'virginica', 

237 ... 'virginica', 

238 ... 'setosa', 

239 ... 'virginica', 

240 ... 'virginica', 

241 ... 'versicolor', 

242 ... 'versicolor', 

243 ... 'setosa', 

244 ... 'virginica', 

245 ... 'setosa' 

246 ... ] 

247 ... } 

248 ... ) 

249 >>> pd.plotting.radviz(df, 'Category') 

250 <AxesSubplot: xlabel='y(t)', ylabel='y(t + 1)'> 

251 """ 

252 plot_backend = _get_plot_backend("matplotlib") 

253 return plot_backend.radviz( 

254 frame=frame, 

255 class_column=class_column, 

256 ax=ax, 

257 color=color, 

258 colormap=colormap, 

259 **kwds, 

260 ) 

261 

262 

263def andrews_curves( 

264 frame: DataFrame, 

265 class_column: str, 

266 ax: Axes | None = None, 

267 samples: int = 200, 

268 color: list[str] | tuple[str, ...] | None = None, 

269 colormap=None, 

270 **kwargs, 

271) -> Axes: 

272 """ 

273 Generate a matplotlib plot for visualising clusters of multivariate data. 

274 

275 Andrews curves have the functional form: 

276 

277 .. math:: 

278 f(t) = \\frac{x_1}{\\sqrt{2}} + x_2 \\sin(t) + x_3 \\cos(t) + 

279 x_4 \\sin(2t) + x_5 \\cos(2t) + \\cdots 

280 

281 Where :math:`x` coefficients correspond to the values of each dimension 

282 and :math:`t` is linearly spaced between :math:`-\\pi` and :math:`+\\pi`. 

283 Each row of frame then corresponds to a single curve. 

284 

285 Parameters 

286 ---------- 

287 frame : DataFrame 

288 Data to be plotted, preferably normalized to (0.0, 1.0). 

289 class_column : label 

290 Name of the column containing class names. 

291 ax : axes object, default None 

292 Axes to use. 

293 samples : int 

294 Number of points to plot in each curve. 

295 color : str, list[str] or tuple[str], optional 

296 Colors to use for the different classes. Colors can be strings 

297 or 3-element floating point RGB values. 

298 colormap : str or matplotlib colormap object, default None 

299 Colormap to select colors from. If a string, load colormap with that 

300 name from matplotlib. 

301 **kwargs 

302 Options to pass to matplotlib plotting method. 

303 

304 Returns 

305 ------- 

306 :class:`matplotlib.axes.Axes` 

307 

308 Examples 

309 -------- 

310 

311 .. plot:: 

312 :context: close-figs 

313 

314 >>> df = pd.read_csv( 

315 ... 'https://raw.githubusercontent.com/pandas-dev/' 

316 ... 'pandas/main/pandas/tests/io/data/csv/iris.csv' 

317 ... ) 

318 >>> pd.plotting.andrews_curves(df, 'Name') 

319 <AxesSubplot: title={'center': 'width'}> 

320 """ 

321 plot_backend = _get_plot_backend("matplotlib") 

322 return plot_backend.andrews_curves( 

323 frame=frame, 

324 class_column=class_column, 

325 ax=ax, 

326 samples=samples, 

327 color=color, 

328 colormap=colormap, 

329 **kwargs, 

330 ) 

331 

332 

333def bootstrap_plot( 

334 series: Series, 

335 fig: Figure | None = None, 

336 size: int = 50, 

337 samples: int = 500, 

338 **kwds, 

339) -> Figure: 

340 """ 

341 Bootstrap plot on mean, median and mid-range statistics. 

342 

343 The bootstrap plot is used to estimate the uncertainty of a statistic 

344 by relying on random sampling with replacement [1]_. This function will 

345 generate bootstrapping plots for mean, median and mid-range statistics 

346 for the given number of samples of the given size. 

347 

348 .. [1] "Bootstrapping (statistics)" in \ 

349 https://en.wikipedia.org/wiki/Bootstrapping_%28statistics%29 

350 

351 Parameters 

352 ---------- 

353 series : pandas.Series 

354 Series from where to get the samplings for the bootstrapping. 

355 fig : matplotlib.figure.Figure, default None 

356 If given, it will use the `fig` reference for plotting instead of 

357 creating a new one with default parameters. 

358 size : int, default 50 

359 Number of data points to consider during each sampling. It must be 

360 less than or equal to the length of the `series`. 

361 samples : int, default 500 

362 Number of times the bootstrap procedure is performed. 

363 **kwds 

364 Options to pass to matplotlib plotting method. 

365 

366 Returns 

367 ------- 

368 matplotlib.figure.Figure 

369 Matplotlib figure. 

370 

371 See Also 

372 -------- 

373 pandas.DataFrame.plot : Basic plotting for DataFrame objects. 

374 pandas.Series.plot : Basic plotting for Series objects. 

375 

376 Examples 

377 -------- 

378 This example draws a basic bootstrap plot for a Series. 

379 

380 .. plot:: 

381 :context: close-figs 

382 

383 >>> s = pd.Series(np.random.uniform(size=100)) 

384 >>> pd.plotting.bootstrap_plot(s) 

385 <Figure size 640x480 with 6 Axes> 

386 """ 

387 plot_backend = _get_plot_backend("matplotlib") 

388 return plot_backend.bootstrap_plot( 

389 series=series, fig=fig, size=size, samples=samples, **kwds 

390 ) 

391 

392 

393def parallel_coordinates( 

394 frame: DataFrame, 

395 class_column: str, 

396 cols: list[str] | None = None, 

397 ax: Axes | None = None, 

398 color: list[str] | tuple[str, ...] | None = None, 

399 use_columns: bool = False, 

400 xticks: list | tuple | None = None, 

401 colormap=None, 

402 axvlines: bool = True, 

403 axvlines_kwds=None, 

404 sort_labels: bool = False, 

405 **kwargs, 

406) -> Axes: 

407 """ 

408 Parallel coordinates plotting. 

409 

410 Parameters 

411 ---------- 

412 frame : DataFrame 

413 class_column : str 

414 Column name containing class names. 

415 cols : list, optional 

416 A list of column names to use. 

417 ax : matplotlib.axis, optional 

418 Matplotlib axis object. 

419 color : list or tuple, optional 

420 Colors to use for the different classes. 

421 use_columns : bool, optional 

422 If true, columns will be used as xticks. 

423 xticks : list or tuple, optional 

424 A list of values to use for xticks. 

425 colormap : str or matplotlib colormap, default None 

426 Colormap to use for line colors. 

427 axvlines : bool, optional 

428 If true, vertical lines will be added at each xtick. 

429 axvlines_kwds : keywords, optional 

430 Options to be passed to axvline method for vertical lines. 

431 sort_labels : bool, default False 

432 Sort class_column labels, useful when assigning colors. 

433 **kwargs 

434 Options to pass to matplotlib plotting method. 

435 

436 Returns 

437 ------- 

438 matplotlib.axes.Axes 

439 

440 Examples 

441 -------- 

442 

443 .. plot:: 

444 :context: close-figs 

445 

446 >>> df = pd.read_csv( 

447 ... 'https://raw.githubusercontent.com/pandas-dev/' 

448 ... 'pandas/main/pandas/tests/io/data/csv/iris.csv' 

449 ... ) 

450 >>> pd.plotting.parallel_coordinates( 

451 ... df, 'Name', color=('#556270', '#4ECDC4', '#C7F464') 

452 ... ) 

453 <AxesSubplot: xlabel='y(t)', ylabel='y(t + 1)'> 

454 """ 

455 plot_backend = _get_plot_backend("matplotlib") 

456 return plot_backend.parallel_coordinates( 

457 frame=frame, 

458 class_column=class_column, 

459 cols=cols, 

460 ax=ax, 

461 color=color, 

462 use_columns=use_columns, 

463 xticks=xticks, 

464 colormap=colormap, 

465 axvlines=axvlines, 

466 axvlines_kwds=axvlines_kwds, 

467 sort_labels=sort_labels, 

468 **kwargs, 

469 ) 

470 

471 

472def lag_plot(series: Series, lag: int = 1, ax: Axes | None = None, **kwds) -> Axes: 

473 """ 

474 Lag plot for time series. 

475 

476 Parameters 

477 ---------- 

478 series : Series 

479 The time series to visualize. 

480 lag : int, default 1 

481 Lag length of the scatter plot. 

482 ax : Matplotlib axis object, optional 

483 The matplotlib axis object to use. 

484 **kwds 

485 Matplotlib scatter method keyword arguments. 

486 

487 Returns 

488 ------- 

489 matplotlib.axes.Axes 

490 

491 Examples 

492 -------- 

493 Lag plots are most commonly used to look for patterns in time series data. 

494 

495 Given the following time series 

496 

497 .. plot:: 

498 :context: close-figs 

499 

500 >>> np.random.seed(5) 

501 >>> x = np.cumsum(np.random.normal(loc=1, scale=5, size=50)) 

502 >>> s = pd.Series(x) 

503 >>> s.plot() 

504 <AxesSubplot: xlabel='Midrange'> 

505 

506 A lag plot with ``lag=1`` returns 

507 

508 .. plot:: 

509 :context: close-figs 

510 

511 >>> pd.plotting.lag_plot(s, lag=1) 

512 <AxesSubplot: xlabel='y(t)', ylabel='y(t + 1)'> 

513 """ 

514 plot_backend = _get_plot_backend("matplotlib") 

515 return plot_backend.lag_plot(series=series, lag=lag, ax=ax, **kwds) 

516 

517 

518def autocorrelation_plot(series: Series, ax: Axes | None = None, **kwargs) -> Axes: 

519 """ 

520 Autocorrelation plot for time series. 

521 

522 Parameters 

523 ---------- 

524 series : Series 

525 The time series to visualize. 

526 ax : Matplotlib axis object, optional 

527 The matplotlib axis object to use. 

528 **kwargs 

529 Options to pass to matplotlib plotting method. 

530 

531 Returns 

532 ------- 

533 matplotlib.axes.Axes 

534 

535 Examples 

536 -------- 

537 The horizontal lines in the plot correspond to 95% and 99% confidence bands. 

538 

539 The dashed line is 99% confidence band. 

540 

541 .. plot:: 

542 :context: close-figs 

543 

544 >>> spacing = np.linspace(-9 * np.pi, 9 * np.pi, num=1000) 

545 >>> s = pd.Series(0.7 * np.random.rand(1000) + 0.3 * np.sin(spacing)) 

546 >>> pd.plotting.autocorrelation_plot(s) 

547 <AxesSubplot: title={'center': 'width'}, xlabel='Lag', ylabel='Autocorrelation'> 

548 """ 

549 plot_backend = _get_plot_backend("matplotlib") 

550 return plot_backend.autocorrelation_plot(series=series, ax=ax, **kwargs) 

551 

552 

553class _Options(dict): 

554 """ 

555 Stores pandas plotting options. 

556 

557 Allows for parameter aliasing so you can just use parameter names that are 

558 the same as the plot function parameters, but is stored in a canonical 

559 format that makes it easy to breakdown into groups later. 

560 """ 

561 

562 # alias so the names are same as plotting method parameter names 

563 _ALIASES = {"x_compat": "xaxis.compat"} 

564 _DEFAULT_KEYS = ["xaxis.compat"] 

565 

566 def __init__(self, deprecated: bool = False) -> None: 

567 self._deprecated = deprecated 

568 super().__setitem__("xaxis.compat", False) 

569 

570 def __getitem__(self, key): 

571 key = self._get_canonical_key(key) 

572 if key not in self: 

573 raise ValueError(f"{key} is not a valid pandas plotting option") 

574 return super().__getitem__(key) 

575 

576 def __setitem__(self, key, value) -> None: 

577 key = self._get_canonical_key(key) 

578 super().__setitem__(key, value) 

579 

580 def __delitem__(self, key) -> None: 

581 key = self._get_canonical_key(key) 

582 if key in self._DEFAULT_KEYS: 

583 raise ValueError(f"Cannot remove default parameter {key}") 

584 super().__delitem__(key) 

585 

586 def __contains__(self, key) -> bool: 

587 key = self._get_canonical_key(key) 

588 return super().__contains__(key) 

589 

590 def reset(self) -> None: 

591 """ 

592 Reset the option store to its initial state 

593 

594 Returns 

595 ------- 

596 None 

597 """ 

598 # error: Cannot access "__init__" directly 

599 self.__init__() # type: ignore[misc] 

600 

601 def _get_canonical_key(self, key): 

602 return self._ALIASES.get(key, key) 

603 

604 @contextmanager 

605 def use(self, key, value) -> Generator[_Options, None, None]: 

606 """ 

607 Temporarily set a parameter value using the with statement. 

608 Aliasing allowed. 

609 """ 

610 old_value = self[key] 

611 try: 

612 self[key] = value 

613 yield self 

614 finally: 

615 self[key] = old_value 

616 

617 

618plot_params = _Options()