Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/generic.py: 26%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

2254 statements  

1# pyright: reportPropertyTypeMismatch=false 

2from __future__ import annotations 

3 

4import collections 

5import datetime as dt 

6from functools import partial 

7import gc 

8from json import loads 

9import operator 

10import pickle 

11import re 

12from typing import ( 

13 TYPE_CHECKING, 

14 Any, 

15 Callable, 

16 ClassVar, 

17 Hashable, 

18 Iterator, 

19 Literal, 

20 Mapping, 

21 NoReturn, 

22 Sequence, 

23 Type, 

24 cast, 

25 final, 

26 overload, 

27) 

28import warnings 

29import weakref 

30 

31import numpy as np 

32 

33from pandas._config import ( 

34 config, 

35 using_copy_on_write, 

36) 

37 

38from pandas._libs import lib 

39from pandas._libs.lib import is_range_indexer 

40from pandas._libs.tslibs import ( 

41 Period, 

42 Tick, 

43 Timestamp, 

44 to_offset, 

45) 

46from pandas._typing import ( 

47 AlignJoin, 

48 AnyArrayLike, 

49 ArrayLike, 

50 Axis, 

51 AxisInt, 

52 CompressionOptions, 

53 Dtype, 

54 DtypeArg, 

55 DtypeBackend, 

56 DtypeObj, 

57 FilePath, 

58 FillnaOptions, 

59 FloatFormatType, 

60 FormattersType, 

61 Frequency, 

62 IgnoreRaise, 

63 IndexKeyFunc, 

64 IndexLabel, 

65 IntervalClosedType, 

66 JSONSerializable, 

67 Level, 

68 Manager, 

69 NaPosition, 

70 NDFrameT, 

71 RandomState, 

72 Renamer, 

73 Scalar, 

74 SortKind, 

75 StorageOptions, 

76 Suffixes, 

77 T, 

78 TimeAmbiguous, 

79 TimedeltaConvertibleTypes, 

80 TimeNonexistent, 

81 TimestampConvertibleTypes, 

82 ValueKeyFunc, 

83 WriteBuffer, 

84 npt, 

85) 

86from pandas.compat._optional import import_optional_dependency 

87from pandas.compat.numpy import function as nv 

88from pandas.errors import ( 

89 AbstractMethodError, 

90 InvalidIndexError, 

91 SettingWithCopyError, 

92 SettingWithCopyWarning, 

93) 

94from pandas.util._decorators import doc 

95from pandas.util._exceptions import find_stack_level 

96from pandas.util._validators import ( 

97 check_dtype_backend, 

98 validate_ascending, 

99 validate_bool_kwarg, 

100 validate_fillna_kwargs, 

101 validate_inclusive, 

102) 

103 

104from pandas.core.dtypes.astype import astype_is_view 

105from pandas.core.dtypes.common import ( 

106 ensure_object, 

107 ensure_platform_int, 

108 ensure_str, 

109 is_bool, 

110 is_bool_dtype, 

111 is_datetime64_any_dtype, 

112 is_datetime64tz_dtype, 

113 is_dict_like, 

114 is_dtype_equal, 

115 is_extension_array_dtype, 

116 is_float, 

117 is_list_like, 

118 is_number, 

119 is_numeric_dtype, 

120 is_re_compilable, 

121 is_scalar, 

122 is_timedelta64_dtype, 

123 pandas_dtype, 

124) 

125from pandas.core.dtypes.generic import ( 

126 ABCDataFrame, 

127 ABCSeries, 

128) 

129from pandas.core.dtypes.inference import ( 

130 is_hashable, 

131 is_nested_list_like, 

132) 

133from pandas.core.dtypes.missing import ( 

134 isna, 

135 notna, 

136) 

137 

138from pandas.core import ( 

139 algorithms as algos, 

140 arraylike, 

141 common, 

142 indexing, 

143 nanops, 

144 sample, 

145) 

146from pandas.core.array_algos.replace import should_use_regex 

147from pandas.core.arrays import ExtensionArray 

148from pandas.core.base import PandasObject 

149from pandas.core.construction import extract_array 

150from pandas.core.flags import Flags 

151from pandas.core.indexes.api import ( 

152 DatetimeIndex, 

153 Index, 

154 MultiIndex, 

155 PeriodIndex, 

156 RangeIndex, 

157 default_index, 

158 ensure_index, 

159) 

160from pandas.core.internals import ( 

161 ArrayManager, 

162 BlockManager, 

163 SingleArrayManager, 

164) 

165from pandas.core.internals.construction import ( 

166 mgr_to_mgr, 

167 ndarray_to_mgr, 

168) 

169from pandas.core.methods.describe import describe_ndframe 

170from pandas.core.missing import ( 

171 clean_fill_method, 

172 clean_reindex_fill_method, 

173 find_valid_index, 

174) 

175from pandas.core.ops import align_method_FRAME 

176from pandas.core.reshape.concat import concat 

177from pandas.core.shared_docs import _shared_docs 

178from pandas.core.sorting import get_indexer_indexer 

179from pandas.core.window import ( 

180 Expanding, 

181 ExponentialMovingWindow, 

182 Rolling, 

183 Window, 

184) 

185 

186from pandas.io.formats.format import ( 

187 DataFrameFormatter, 

188 DataFrameRenderer, 

189) 

190from pandas.io.formats.printing import pprint_thing 

191 

192if TYPE_CHECKING: 

193 from pandas._libs.tslibs import BaseOffset 

194 

195 from pandas.core.frame import DataFrame 

196 from pandas.core.indexers.objects import BaseIndexer 

197 from pandas.core.resample import Resampler 

198 from pandas.core.series import Series 

199 

200 from pandas.io.pytables import HDFStore 

201 

202 

203# goal is to be able to define the docs close to function, while still being 

204# able to share 

205_shared_docs = {**_shared_docs} 

206_shared_doc_kwargs = { 

207 "axes": "keywords for axes", 

208 "klass": "Series/DataFrame", 

209 "axes_single_arg": "int or labels for object", 

210 "args_transpose": "axes to permute (int or label for object)", 

211 "inplace": """ 

212 inplace : bool, default False 

213 If True, performs operation inplace and returns None.""", 

214 "optional_by": """ 

215 by : str or list of str 

216 Name or list of names to sort by""", 

217 "replace_iloc": """ 

218 This differs from updating with ``.loc`` or ``.iloc``, which require 

219 you to specify a location to update with some value.""", 

220} 

221 

222 

223bool_t = bool # Need alias because NDFrame has def bool: 

224 

225 

226class NDFrame(PandasObject, indexing.IndexingMixin): 

227 """ 

228 N-dimensional analogue of DataFrame. Store multi-dimensional in a 

229 size-mutable, labeled data structure 

230 

231 Parameters 

232 ---------- 

233 data : BlockManager 

234 axes : list 

235 copy : bool, default False 

236 """ 

237 

238 _internal_names: list[str] = [ 

239 "_mgr", 

240 "_cacher", 

241 "_item_cache", 

242 "_cache", 

243 "_is_copy", 

244 "_subtyp", 

245 "_name", 

246 "_default_kind", 

247 "_default_fill_value", 

248 "_metadata", 

249 "__array_struct__", 

250 "__array_interface__", 

251 "_flags", 

252 ] 

253 _internal_names_set: set[str] = set(_internal_names) 

254 _accessors: set[str] = set() 

255 _hidden_attrs: frozenset[str] = frozenset([]) 

256 _metadata: list[str] = [] 

257 _is_copy: weakref.ReferenceType[NDFrame] | None = None 

258 _mgr: Manager 

259 _attrs: dict[Hashable, Any] 

260 _typ: str 

261 

262 # ---------------------------------------------------------------------- 

263 # Constructors 

264 

265 def __init__( 

266 self, 

267 data: Manager, 

268 copy: bool_t = False, 

269 attrs: Mapping[Hashable, Any] | None = None, 

270 ) -> None: 

271 # copy kwarg is retained for mypy compat, is not used 

272 

273 object.__setattr__(self, "_is_copy", None) 

274 object.__setattr__(self, "_mgr", data) 

275 object.__setattr__(self, "_item_cache", {}) 

276 if attrs is None: 

277 attrs = {} 

278 else: 

279 attrs = dict(attrs) 

280 object.__setattr__(self, "_attrs", attrs) 

281 object.__setattr__(self, "_flags", Flags(self, allows_duplicate_labels=True)) 

282 

283 @classmethod 

284 def _init_mgr( 

285 cls, 

286 mgr: Manager, 

287 axes, 

288 dtype: Dtype | None = None, 

289 copy: bool_t = False, 

290 ) -> Manager: 

291 """passed a manager and a axes dict""" 

292 for a, axe in axes.items(): 

293 if axe is not None: 

294 axe = ensure_index(axe) 

295 bm_axis = cls._get_block_manager_axis(a) 

296 mgr = mgr.reindex_axis(axe, axis=bm_axis) 

297 

298 # make a copy if explicitly requested 

299 if copy: 

300 mgr = mgr.copy() 

301 if dtype is not None: 

302 # avoid further copies if we can 

303 if ( 

304 isinstance(mgr, BlockManager) 

305 and len(mgr.blocks) == 1 

306 and is_dtype_equal(mgr.blocks[0].values.dtype, dtype) 

307 ): 

308 pass 

309 else: 

310 mgr = mgr.astype(dtype=dtype) 

311 return mgr 

312 

313 def _as_manager(self: NDFrameT, typ: str, copy: bool_t = True) -> NDFrameT: 

314 """ 

315 Private helper function to create a DataFrame with specific manager. 

316 

317 Parameters 

318 ---------- 

319 typ : {"block", "array"} 

320 copy : bool, default True 

321 Only controls whether the conversion from Block->ArrayManager 

322 copies the 1D arrays (to ensure proper/contiguous memory layout). 

323 

324 Returns 

325 ------- 

326 DataFrame 

327 New DataFrame using specified manager type. Is not guaranteed 

328 to be a copy or not. 

329 """ 

330 new_mgr: Manager 

331 new_mgr = mgr_to_mgr(self._mgr, typ=typ, copy=copy) 

332 # fastpath of passing a manager doesn't check the option/manager class 

333 return self._constructor(new_mgr).__finalize__(self) 

334 

335 # ---------------------------------------------------------------------- 

336 # attrs and flags 

337 

338 @property 

339 def attrs(self) -> dict[Hashable, Any]: 

340 """ 

341 Dictionary of global attributes of this dataset. 

342 

343 .. warning:: 

344 

345 attrs is experimental and may change without warning. 

346 

347 See Also 

348 -------- 

349 DataFrame.flags : Global flags applying to this object. 

350 """ 

351 if self._attrs is None: 

352 self._attrs = {} 

353 return self._attrs 

354 

355 @attrs.setter 

356 def attrs(self, value: Mapping[Hashable, Any]) -> None: 

357 self._attrs = dict(value) 

358 

359 @final 

360 @property 

361 def flags(self) -> Flags: 

362 """ 

363 Get the properties associated with this pandas object. 

364 

365 The available flags are 

366 

367 * :attr:`Flags.allows_duplicate_labels` 

368 

369 See Also 

370 -------- 

371 Flags : Flags that apply to pandas objects. 

372 DataFrame.attrs : Global metadata applying to this dataset. 

373 

374 Notes 

375 ----- 

376 "Flags" differ from "metadata". Flags reflect properties of the 

377 pandas object (the Series or DataFrame). Metadata refer to properties 

378 of the dataset, and should be stored in :attr:`DataFrame.attrs`. 

379 

380 Examples 

381 -------- 

382 >>> df = pd.DataFrame({"A": [1, 2]}) 

383 >>> df.flags 

384 <Flags(allows_duplicate_labels=True)> 

385 

386 Flags can be get or set using ``.`` 

387 

388 >>> df.flags.allows_duplicate_labels 

389 True 

390 >>> df.flags.allows_duplicate_labels = False 

391 

392 Or by slicing with a key 

393 

394 >>> df.flags["allows_duplicate_labels"] 

395 False 

396 >>> df.flags["allows_duplicate_labels"] = True 

397 """ 

398 return self._flags 

399 

400 @final 

401 def set_flags( 

402 self: NDFrameT, 

403 *, 

404 copy: bool_t = False, 

405 allows_duplicate_labels: bool_t | None = None, 

406 ) -> NDFrameT: 

407 """ 

408 Return a new object with updated flags. 

409 

410 Parameters 

411 ---------- 

412 copy : bool, default False 

413 Specify if a copy of the object should be made. 

414 allows_duplicate_labels : bool, optional 

415 Whether the returned object allows duplicate labels. 

416 

417 Returns 

418 ------- 

419 Series or DataFrame 

420 The same type as the caller. 

421 

422 See Also 

423 -------- 

424 DataFrame.attrs : Global metadata applying to this dataset. 

425 DataFrame.flags : Global flags applying to this object. 

426 

427 Notes 

428 ----- 

429 This method returns a new object that's a view on the same data 

430 as the input. Mutating the input or the output values will be reflected 

431 in the other. 

432 

433 This method is intended to be used in method chains. 

434 

435 "Flags" differ from "metadata". Flags reflect properties of the 

436 pandas object (the Series or DataFrame). Metadata refer to properties 

437 of the dataset, and should be stored in :attr:`DataFrame.attrs`. 

438 

439 Examples 

440 -------- 

441 >>> df = pd.DataFrame({"A": [1, 2]}) 

442 >>> df.flags.allows_duplicate_labels 

443 True 

444 >>> df2 = df.set_flags(allows_duplicate_labels=False) 

445 >>> df2.flags.allows_duplicate_labels 

446 False 

447 """ 

448 df = self.copy(deep=copy and not using_copy_on_write()) 

449 if allows_duplicate_labels is not None: 

450 df.flags["allows_duplicate_labels"] = allows_duplicate_labels 

451 return df 

452 

453 @final 

454 @classmethod 

455 def _validate_dtype(cls, dtype) -> DtypeObj | None: 

456 """validate the passed dtype""" 

457 if dtype is not None: 

458 dtype = pandas_dtype(dtype) 

459 

460 # a compound dtype 

461 if dtype.kind == "V": 

462 raise NotImplementedError( 

463 "compound dtypes are not implemented " 

464 f"in the {cls.__name__} constructor" 

465 ) 

466 

467 return dtype 

468 

469 # ---------------------------------------------------------------------- 

470 # Construction 

471 

472 @property 

473 def _constructor(self: NDFrameT) -> Callable[..., NDFrameT]: 

474 """ 

475 Used when a manipulation result has the same dimensions as the 

476 original. 

477 """ 

478 raise AbstractMethodError(self) 

479 

480 # ---------------------------------------------------------------------- 

481 # Internals 

482 

483 @final 

484 @property 

485 def _data(self): 

486 # GH#33054 retained because some downstream packages uses this, 

487 # e.g. fastparquet 

488 return self._mgr 

489 

490 # ---------------------------------------------------------------------- 

491 # Axis 

492 _stat_axis_number = 0 

493 _stat_axis_name = "index" 

494 _AXIS_ORDERS: list[Literal["index", "columns"]] 

495 _AXIS_TO_AXIS_NUMBER: dict[Axis, AxisInt] = {0: 0, "index": 0, "rows": 0} 

496 _info_axis_number: int 

497 _info_axis_name: Literal["index", "columns"] 

498 _AXIS_LEN: int 

499 

500 @final 

501 def _construct_axes_dict(self, axes: Sequence[Axis] | None = None, **kwargs): 

502 """Return an axes dictionary for myself.""" 

503 d = {a: self._get_axis(a) for a in (axes or self._AXIS_ORDERS)} 

504 # error: Argument 1 to "update" of "MutableMapping" has incompatible type 

505 # "Dict[str, Any]"; expected "SupportsKeysAndGetItem[Union[int, str], Any]" 

506 d.update(kwargs) # type: ignore[arg-type] 

507 return d 

508 

509 @final 

510 @classmethod 

511 def _get_axis_number(cls, axis: Axis) -> AxisInt: 

512 try: 

513 return cls._AXIS_TO_AXIS_NUMBER[axis] 

514 except KeyError: 

515 raise ValueError(f"No axis named {axis} for object type {cls.__name__}") 

516 

517 @final 

518 @classmethod 

519 def _get_axis_name(cls, axis: Axis) -> Literal["index", "columns"]: 

520 axis_number = cls._get_axis_number(axis) 

521 return cls._AXIS_ORDERS[axis_number] 

522 

523 @final 

524 def _get_axis(self, axis: Axis) -> Index: 

525 axis_number = self._get_axis_number(axis) 

526 assert axis_number in {0, 1} 

527 return self.index if axis_number == 0 else self.columns 

528 

529 @final 

530 @classmethod 

531 def _get_block_manager_axis(cls, axis: Axis) -> AxisInt: 

532 """Map the axis to the block_manager axis.""" 

533 axis = cls._get_axis_number(axis) 

534 ndim = cls._AXIS_LEN 

535 if ndim == 2: 

536 # i.e. DataFrame 

537 return 1 - axis 

538 return axis 

539 

540 @final 

541 def _get_axis_resolvers(self, axis: str) -> dict[str, Series | MultiIndex]: 

542 # index or columns 

543 axis_index = getattr(self, axis) 

544 d = {} 

545 prefix = axis[0] 

546 

547 for i, name in enumerate(axis_index.names): 

548 if name is not None: 

549 key = level = name 

550 else: 

551 # prefix with 'i' or 'c' depending on the input axis 

552 # e.g., you must do ilevel_0 for the 0th level of an unnamed 

553 # multiiindex 

554 key = f"{prefix}level_{i}" 

555 level = i 

556 

557 level_values = axis_index.get_level_values(level) 

558 s = level_values.to_series() 

559 s.index = axis_index 

560 d[key] = s 

561 

562 # put the index/columns itself in the dict 

563 if isinstance(axis_index, MultiIndex): 

564 dindex = axis_index 

565 else: 

566 dindex = axis_index.to_series() 

567 

568 d[axis] = dindex 

569 return d 

570 

571 @final 

572 def _get_index_resolvers(self) -> dict[Hashable, Series | MultiIndex]: 

573 from pandas.core.computation.parsing import clean_column_name 

574 

575 d: dict[str, Series | MultiIndex] = {} 

576 for axis_name in self._AXIS_ORDERS: 

577 d.update(self._get_axis_resolvers(axis_name)) 

578 

579 return {clean_column_name(k): v for k, v in d.items() if not isinstance(k, int)} 

580 

581 @final 

582 def _get_cleaned_column_resolvers(self) -> dict[Hashable, Series]: 

583 """ 

584 Return the special character free column resolvers of a dataframe. 

585 

586 Column names with special characters are 'cleaned up' so that they can 

587 be referred to by backtick quoting. 

588 Used in :meth:`DataFrame.eval`. 

589 """ 

590 from pandas.core.computation.parsing import clean_column_name 

591 

592 if isinstance(self, ABCSeries): 

593 return {clean_column_name(self.name): self} 

594 

595 return { 

596 clean_column_name(k): v for k, v in self.items() if not isinstance(k, int) 

597 } 

598 

599 @property 

600 def _info_axis(self) -> Index: 

601 return getattr(self, self._info_axis_name) 

602 

603 @property 

604 def _stat_axis(self) -> Index: 

605 return getattr(self, self._stat_axis_name) 

606 

607 @property 

608 def shape(self) -> tuple[int, ...]: 

609 """ 

610 Return a tuple of axis dimensions 

611 """ 

612 return tuple(len(self._get_axis(a)) for a in self._AXIS_ORDERS) 

613 

614 @property 

615 def axes(self) -> list[Index]: 

616 """ 

617 Return index label(s) of the internal NDFrame 

618 """ 

619 # we do it this way because if we have reversed axes, then 

620 # the block manager shows then reversed 

621 return [self._get_axis(a) for a in self._AXIS_ORDERS] 

622 

623 @property 

624 def ndim(self) -> int: 

625 """ 

626 Return an int representing the number of axes / array dimensions. 

627 

628 Return 1 if Series. Otherwise return 2 if DataFrame. 

629 

630 See Also 

631 -------- 

632 ndarray.ndim : Number of array dimensions. 

633 

634 Examples 

635 -------- 

636 >>> s = pd.Series({'a': 1, 'b': 2, 'c': 3}) 

637 >>> s.ndim 

638 1 

639 

640 >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) 

641 >>> df.ndim 

642 2 

643 """ 

644 return self._mgr.ndim 

645 

646 @property 

647 def size(self) -> int: 

648 """ 

649 Return an int representing the number of elements in this object. 

650 

651 Return the number of rows if Series. Otherwise return the number of 

652 rows times number of columns if DataFrame. 

653 

654 See Also 

655 -------- 

656 ndarray.size : Number of elements in the array. 

657 

658 Examples 

659 -------- 

660 >>> s = pd.Series({'a': 1, 'b': 2, 'c': 3}) 

661 >>> s.size 

662 3 

663 

664 >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) 

665 >>> df.size 

666 4 

667 """ 

668 # error: Incompatible return value type (got "signedinteger[_64Bit]", 

669 # expected "int") [return-value] 

670 return np.prod(self.shape) # type: ignore[return-value] 

671 

672 def set_axis( 

673 self: NDFrameT, 

674 labels, 

675 *, 

676 axis: Axis = 0, 

677 copy: bool_t | None = None, 

678 ) -> NDFrameT: 

679 """ 

680 Assign desired index to given axis. 

681 

682 Indexes for%(extended_summary_sub)s row labels can be changed by assigning 

683 a list-like or Index. 

684 

685 Parameters 

686 ---------- 

687 labels : list-like, Index 

688 The values for the new index. 

689 

690 axis : %(axes_single_arg)s, default 0 

691 The axis to update. The value 0 identifies the rows. For `Series` 

692 this parameter is unused and defaults to 0. 

693 

694 copy : bool, default True 

695 Whether to make a copy of the underlying data. 

696 

697 .. versionadded:: 1.5.0 

698 

699 Returns 

700 ------- 

701 %(klass)s 

702 An object of type %(klass)s. 

703 

704 See Also 

705 -------- 

706 %(klass)s.rename_axis : Alter the name of the index%(see_also_sub)s. 

707 """ 

708 return self._set_axis_nocheck(labels, axis, inplace=False, copy=copy) 

709 

710 @final 

711 def _set_axis_nocheck( 

712 self, labels, axis: Axis, inplace: bool_t, copy: bool_t | None 

713 ): 

714 if inplace: 

715 setattr(self, self._get_axis_name(axis), labels) 

716 else: 

717 # With copy=False, we create a new object but don't copy the 

718 # underlying data. 

719 obj = self.copy(deep=copy and not using_copy_on_write()) 

720 setattr(obj, obj._get_axis_name(axis), labels) 

721 return obj 

722 

723 @final 

724 def _set_axis(self, axis: AxisInt, labels: AnyArrayLike | list) -> None: 

725 """ 

726 This is called from the cython code when we set the `index` attribute 

727 directly, e.g. `series.index = [1, 2, 3]`. 

728 """ 

729 labels = ensure_index(labels) 

730 self._mgr.set_axis(axis, labels) 

731 self._clear_item_cache() 

732 

733 @final 

734 def swapaxes( 

735 self: NDFrameT, axis1: Axis, axis2: Axis, copy: bool_t | None = None 

736 ) -> NDFrameT: 

737 """ 

738 Interchange axes and swap values axes appropriately. 

739 

740 Returns 

741 ------- 

742 same as input 

743 """ 

744 i = self._get_axis_number(axis1) 

745 j = self._get_axis_number(axis2) 

746 

747 if i == j: 

748 return self.copy(deep=copy and not using_copy_on_write()) 

749 

750 mapping = {i: j, j: i} 

751 

752 new_axes = [self._get_axis(mapping.get(k, k)) for k in range(self._AXIS_LEN)] 

753 new_values = self._values.swapaxes(i, j) # type: ignore[union-attr] 

754 if ( 

755 using_copy_on_write() 

756 and self._mgr.is_single_block 

757 and isinstance(self._mgr, BlockManager) 

758 ): 

759 # This should only get hit in case of having a single block, otherwise a 

760 # copy is made, we don't have to set up references. 

761 new_mgr = ndarray_to_mgr( 

762 new_values, 

763 new_axes[0], 

764 new_axes[1], 

765 dtype=None, 

766 copy=False, 

767 typ="block", 

768 ) 

769 assert isinstance(new_mgr, BlockManager) 

770 assert isinstance(self._mgr, BlockManager) 

771 new_mgr.blocks[0].refs = self._mgr.blocks[0].refs 

772 new_mgr.blocks[0].refs.add_reference( 

773 new_mgr.blocks[0] # type: ignore[arg-type] 

774 ) 

775 return self._constructor(new_mgr).__finalize__(self, method="swapaxes") 

776 

777 elif (copy or copy is None) and self._mgr.is_single_block: 

778 new_values = new_values.copy() 

779 

780 return self._constructor( 

781 new_values, 

782 *new_axes, 

783 # The no-copy case for CoW is handled above 

784 copy=False, 

785 ).__finalize__(self, method="swapaxes") 

786 

787 @final 

788 @doc(klass=_shared_doc_kwargs["klass"]) 

789 def droplevel(self: NDFrameT, level: IndexLabel, axis: Axis = 0) -> NDFrameT: 

790 """ 

791 Return {klass} with requested index / column level(s) removed. 

792 

793 Parameters 

794 ---------- 

795 level : int, str, or list-like 

796 If a string is given, must be the name of a level 

797 If list-like, elements must be names or positional indexes 

798 of levels. 

799 

800 axis : {{0 or 'index', 1 or 'columns'}}, default 0 

801 Axis along which the level(s) is removed: 

802 

803 * 0 or 'index': remove level(s) in column. 

804 * 1 or 'columns': remove level(s) in row. 

805 

806 For `Series` this parameter is unused and defaults to 0. 

807 

808 Returns 

809 ------- 

810 {klass} 

811 {klass} with requested index / column level(s) removed. 

812 

813 Examples 

814 -------- 

815 >>> df = pd.DataFrame([ 

816 ... [1, 2, 3, 4], 

817 ... [5, 6, 7, 8], 

818 ... [9, 10, 11, 12] 

819 ... ]).set_index([0, 1]).rename_axis(['a', 'b']) 

820 

821 >>> df.columns = pd.MultiIndex.from_tuples([ 

822 ... ('c', 'e'), ('d', 'f') 

823 ... ], names=['level_1', 'level_2']) 

824 

825 >>> df 

826 level_1 c d 

827 level_2 e f 

828 a b 

829 1 2 3 4 

830 5 6 7 8 

831 9 10 11 12 

832 

833 >>> df.droplevel('a') 

834 level_1 c d 

835 level_2 e f 

836 b 

837 2 3 4 

838 6 7 8 

839 10 11 12 

840 

841 >>> df.droplevel('level_2', axis=1) 

842 level_1 c d 

843 a b 

844 1 2 3 4 

845 5 6 7 8 

846 9 10 11 12 

847 """ 

848 labels = self._get_axis(axis) 

849 new_labels = labels.droplevel(level) 

850 return self.set_axis(new_labels, axis=axis, copy=None) 

851 

852 def pop(self, item: Hashable) -> Series | Any: 

853 result = self[item] 

854 del self[item] 

855 

856 return result 

857 

858 @final 

859 def squeeze(self, axis: Axis | None = None): 

860 """ 

861 Squeeze 1 dimensional axis objects into scalars. 

862 

863 Series or DataFrames with a single element are squeezed to a scalar. 

864 DataFrames with a single column or a single row are squeezed to a 

865 Series. Otherwise the object is unchanged. 

866 

867 This method is most useful when you don't know if your 

868 object is a Series or DataFrame, but you do know it has just a single 

869 column. In that case you can safely call `squeeze` to ensure you have a 

870 Series. 

871 

872 Parameters 

873 ---------- 

874 axis : {0 or 'index', 1 or 'columns', None}, default None 

875 A specific axis to squeeze. By default, all length-1 axes are 

876 squeezed. For `Series` this parameter is unused and defaults to `None`. 

877 

878 Returns 

879 ------- 

880 DataFrame, Series, or scalar 

881 The projection after squeezing `axis` or all the axes. 

882 

883 See Also 

884 -------- 

885 Series.iloc : Integer-location based indexing for selecting scalars. 

886 DataFrame.iloc : Integer-location based indexing for selecting Series. 

887 Series.to_frame : Inverse of DataFrame.squeeze for a 

888 single-column DataFrame. 

889 

890 Examples 

891 -------- 

892 >>> primes = pd.Series([2, 3, 5, 7]) 

893 

894 Slicing might produce a Series with a single value: 

895 

896 >>> even_primes = primes[primes % 2 == 0] 

897 >>> even_primes 

898 0 2 

899 dtype: int64 

900 

901 >>> even_primes.squeeze() 

902 2 

903 

904 Squeezing objects with more than one value in every axis does nothing: 

905 

906 >>> odd_primes = primes[primes % 2 == 1] 

907 >>> odd_primes 

908 1 3 

909 2 5 

910 3 7 

911 dtype: int64 

912 

913 >>> odd_primes.squeeze() 

914 1 3 

915 2 5 

916 3 7 

917 dtype: int64 

918 

919 Squeezing is even more effective when used with DataFrames. 

920 

921 >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=['a', 'b']) 

922 >>> df 

923 a b 

924 0 1 2 

925 1 3 4 

926 

927 Slicing a single column will produce a DataFrame with the columns 

928 having only one value: 

929 

930 >>> df_a = df[['a']] 

931 >>> df_a 

932 a 

933 0 1 

934 1 3 

935 

936 So the columns can be squeezed down, resulting in a Series: 

937 

938 >>> df_a.squeeze('columns') 

939 0 1 

940 1 3 

941 Name: a, dtype: int64 

942 

943 Slicing a single row from a single column will produce a single 

944 scalar DataFrame: 

945 

946 >>> df_0a = df.loc[df.index < 1, ['a']] 

947 >>> df_0a 

948 a 

949 0 1 

950 

951 Squeezing the rows produces a single scalar Series: 

952 

953 >>> df_0a.squeeze('rows') 

954 a 1 

955 Name: 0, dtype: int64 

956 

957 Squeezing all axes will project directly into a scalar: 

958 

959 >>> df_0a.squeeze() 

960 1 

961 """ 

962 axes = range(self._AXIS_LEN) if axis is None else (self._get_axis_number(axis),) 

963 return self.iloc[ 

964 tuple( 

965 0 if i in axes and len(a) == 1 else slice(None) 

966 for i, a in enumerate(self.axes) 

967 ) 

968 ] 

969 

970 # ---------------------------------------------------------------------- 

971 # Rename 

972 

973 def _rename( 

974 self: NDFrameT, 

975 mapper: Renamer | None = None, 

976 *, 

977 index: Renamer | None = None, 

978 columns: Renamer | None = None, 

979 axis: Axis | None = None, 

980 copy: bool_t | None = None, 

981 inplace: bool_t = False, 

982 level: Level | None = None, 

983 errors: str = "ignore", 

984 ) -> NDFrameT | None: 

985 # called by Series.rename and DataFrame.rename 

986 

987 if mapper is None and index is None and columns is None: 

988 raise TypeError("must pass an index to rename") 

989 

990 if index is not None or columns is not None: 

991 if axis is not None: 

992 raise TypeError( 

993 "Cannot specify both 'axis' and any of 'index' or 'columns'" 

994 ) 

995 if mapper is not None: 

996 raise TypeError( 

997 "Cannot specify both 'mapper' and any of 'index' or 'columns'" 

998 ) 

999 else: 

1000 # use the mapper argument 

1001 if axis and self._get_axis_number(axis) == 1: 

1002 columns = mapper 

1003 else: 

1004 index = mapper 

1005 

1006 self._check_inplace_and_allows_duplicate_labels(inplace) 

1007 result = self if inplace else self.copy(deep=copy and not using_copy_on_write()) 

1008 

1009 for axis_no, replacements in enumerate((index, columns)): 

1010 if replacements is None: 

1011 continue 

1012 

1013 ax = self._get_axis(axis_no) 

1014 f = common.get_rename_function(replacements) 

1015 

1016 if level is not None: 

1017 level = ax._get_level_number(level) 

1018 

1019 # GH 13473 

1020 if not callable(replacements): 

1021 if ax._is_multi and level is not None: 

1022 indexer = ax.get_level_values(level).get_indexer_for(replacements) 

1023 else: 

1024 indexer = ax.get_indexer_for(replacements) 

1025 

1026 if errors == "raise" and len(indexer[indexer == -1]): 

1027 missing_labels = [ 

1028 label 

1029 for index, label in enumerate(replacements) 

1030 if indexer[index] == -1 

1031 ] 

1032 raise KeyError(f"{missing_labels} not found in axis") 

1033 

1034 new_index = ax._transform_index(f, level=level) 

1035 result._set_axis_nocheck(new_index, axis=axis_no, inplace=True, copy=False) 

1036 result._clear_item_cache() 

1037 

1038 if inplace: 

1039 self._update_inplace(result) 

1040 return None 

1041 else: 

1042 return result.__finalize__(self, method="rename") 

1043 

1044 @overload 

1045 def rename_axis( 

1046 self: NDFrameT, 

1047 mapper: IndexLabel | lib.NoDefault = ..., 

1048 *, 

1049 index=..., 

1050 columns=..., 

1051 axis: Axis = ..., 

1052 copy: bool_t | None = ..., 

1053 inplace: Literal[False] = ..., 

1054 ) -> NDFrameT: 

1055 ... 

1056 

1057 @overload 

1058 def rename_axis( 

1059 self, 

1060 mapper: IndexLabel | lib.NoDefault = ..., 

1061 *, 

1062 index=..., 

1063 columns=..., 

1064 axis: Axis = ..., 

1065 copy: bool_t | None = ..., 

1066 inplace: Literal[True], 

1067 ) -> None: 

1068 ... 

1069 

1070 @overload 

1071 def rename_axis( 

1072 self: NDFrameT, 

1073 mapper: IndexLabel | lib.NoDefault = ..., 

1074 *, 

1075 index=..., 

1076 columns=..., 

1077 axis: Axis = ..., 

1078 copy: bool_t | None = ..., 

1079 inplace: bool_t = ..., 

1080 ) -> NDFrameT | None: 

1081 ... 

1082 

1083 def rename_axis( 

1084 self: NDFrameT, 

1085 mapper: IndexLabel | lib.NoDefault = lib.no_default, 

1086 *, 

1087 index=lib.no_default, 

1088 columns=lib.no_default, 

1089 axis: Axis = 0, 

1090 copy: bool_t | None = None, 

1091 inplace: bool_t = False, 

1092 ) -> NDFrameT | None: 

1093 """ 

1094 Set the name of the axis for the index or columns. 

1095 

1096 Parameters 

1097 ---------- 

1098 mapper : scalar, list-like, optional 

1099 Value to set the axis name attribute. 

1100 index, columns : scalar, list-like, dict-like or function, optional 

1101 A scalar, list-like, dict-like or functions transformations to 

1102 apply to that axis' values. 

1103 Note that the ``columns`` parameter is not allowed if the 

1104 object is a Series. This parameter only apply for DataFrame 

1105 type objects. 

1106 

1107 Use either ``mapper`` and ``axis`` to 

1108 specify the axis to target with ``mapper``, or ``index`` 

1109 and/or ``columns``. 

1110 axis : {0 or 'index', 1 or 'columns'}, default 0 

1111 The axis to rename. For `Series` this parameter is unused and defaults to 0. 

1112 copy : bool, default None 

1113 Also copy underlying data. 

1114 inplace : bool, default False 

1115 Modifies the object directly, instead of creating a new Series 

1116 or DataFrame. 

1117 

1118 Returns 

1119 ------- 

1120 Series, DataFrame, or None 

1121 The same type as the caller or None if ``inplace=True``. 

1122 

1123 See Also 

1124 -------- 

1125 Series.rename : Alter Series index labels or name. 

1126 DataFrame.rename : Alter DataFrame index labels or name. 

1127 Index.rename : Set new names on index. 

1128 

1129 Notes 

1130 ----- 

1131 ``DataFrame.rename_axis`` supports two calling conventions 

1132 

1133 * ``(index=index_mapper, columns=columns_mapper, ...)`` 

1134 * ``(mapper, axis={'index', 'columns'}, ...)`` 

1135 

1136 The first calling convention will only modify the names of 

1137 the index and/or the names of the Index object that is the columns. 

1138 In this case, the parameter ``copy`` is ignored. 

1139 

1140 The second calling convention will modify the names of the 

1141 corresponding index if mapper is a list or a scalar. 

1142 However, if mapper is dict-like or a function, it will use the 

1143 deprecated behavior of modifying the axis *labels*. 

1144 

1145 We *highly* recommend using keyword arguments to clarify your 

1146 intent. 

1147 

1148 Examples 

1149 -------- 

1150 **Series** 

1151 

1152 >>> s = pd.Series(["dog", "cat", "monkey"]) 

1153 >>> s 

1154 0 dog 

1155 1 cat 

1156 2 monkey 

1157 dtype: object 

1158 >>> s.rename_axis("animal") 

1159 animal 

1160 0 dog 

1161 1 cat 

1162 2 monkey 

1163 dtype: object 

1164 

1165 **DataFrame** 

1166 

1167 >>> df = pd.DataFrame({"num_legs": [4, 4, 2], 

1168 ... "num_arms": [0, 0, 2]}, 

1169 ... ["dog", "cat", "monkey"]) 

1170 >>> df 

1171 num_legs num_arms 

1172 dog 4 0 

1173 cat 4 0 

1174 monkey 2 2 

1175 >>> df = df.rename_axis("animal") 

1176 >>> df 

1177 num_legs num_arms 

1178 animal 

1179 dog 4 0 

1180 cat 4 0 

1181 monkey 2 2 

1182 >>> df = df.rename_axis("limbs", axis="columns") 

1183 >>> df 

1184 limbs num_legs num_arms 

1185 animal 

1186 dog 4 0 

1187 cat 4 0 

1188 monkey 2 2 

1189 

1190 **MultiIndex** 

1191 

1192 >>> df.index = pd.MultiIndex.from_product([['mammal'], 

1193 ... ['dog', 'cat', 'monkey']], 

1194 ... names=['type', 'name']) 

1195 >>> df 

1196 limbs num_legs num_arms 

1197 type name 

1198 mammal dog 4 0 

1199 cat 4 0 

1200 monkey 2 2 

1201 

1202 >>> df.rename_axis(index={'type': 'class'}) 

1203 limbs num_legs num_arms 

1204 class name 

1205 mammal dog 4 0 

1206 cat 4 0 

1207 monkey 2 2 

1208 

1209 >>> df.rename_axis(columns=str.upper) 

1210 LIMBS num_legs num_arms 

1211 type name 

1212 mammal dog 4 0 

1213 cat 4 0 

1214 monkey 2 2 

1215 """ 

1216 axes = {"index": index, "columns": columns} 

1217 

1218 if axis is not None: 

1219 axis = self._get_axis_number(axis) 

1220 

1221 inplace = validate_bool_kwarg(inplace, "inplace") 

1222 

1223 if copy and using_copy_on_write(): 

1224 copy = False 

1225 

1226 if mapper is not lib.no_default: 

1227 # Use v0.23 behavior if a scalar or list 

1228 non_mapper = is_scalar(mapper) or ( 

1229 is_list_like(mapper) and not is_dict_like(mapper) 

1230 ) 

1231 if non_mapper: 

1232 return self._set_axis_name( 

1233 mapper, axis=axis, inplace=inplace, copy=copy 

1234 ) 

1235 else: 

1236 raise ValueError("Use `.rename` to alter labels with a mapper.") 

1237 else: 

1238 # Use new behavior. Means that index and/or columns 

1239 # is specified 

1240 result = self if inplace else self.copy(deep=copy) 

1241 

1242 for axis in range(self._AXIS_LEN): 

1243 v = axes.get(self._get_axis_name(axis)) 

1244 if v is lib.no_default: 

1245 continue 

1246 non_mapper = is_scalar(v) or (is_list_like(v) and not is_dict_like(v)) 

1247 if non_mapper: 

1248 newnames = v 

1249 else: 

1250 f = common.get_rename_function(v) 

1251 curnames = self._get_axis(axis).names 

1252 newnames = [f(name) for name in curnames] 

1253 result._set_axis_name(newnames, axis=axis, inplace=True, copy=copy) 

1254 if not inplace: 

1255 return result 

1256 return None 

1257 

1258 @final 

1259 def _set_axis_name( 

1260 self, name, axis: Axis = 0, inplace: bool_t = False, copy: bool_t | None = True 

1261 ): 

1262 """ 

1263 Set the name(s) of the axis. 

1264 

1265 Parameters 

1266 ---------- 

1267 name : str or list of str 

1268 Name(s) to set. 

1269 axis : {0 or 'index', 1 or 'columns'}, default 0 

1270 The axis to set the label. The value 0 or 'index' specifies index, 

1271 and the value 1 or 'columns' specifies columns. 

1272 inplace : bool, default False 

1273 If `True`, do operation inplace and return None. 

1274 copy: 

1275 Whether to make a copy of the result. 

1276 

1277 Returns 

1278 ------- 

1279 Series, DataFrame, or None 

1280 The same type as the caller or `None` if `inplace` is `True`. 

1281 

1282 See Also 

1283 -------- 

1284 DataFrame.rename : Alter the axis labels of :class:`DataFrame`. 

1285 Series.rename : Alter the index labels or set the index name 

1286 of :class:`Series`. 

1287 Index.rename : Set the name of :class:`Index` or :class:`MultiIndex`. 

1288 

1289 Examples 

1290 -------- 

1291 >>> df = pd.DataFrame({"num_legs": [4, 4, 2]}, 

1292 ... ["dog", "cat", "monkey"]) 

1293 >>> df 

1294 num_legs 

1295 dog 4 

1296 cat 4 

1297 monkey 2 

1298 >>> df._set_axis_name("animal") 

1299 num_legs 

1300 animal 

1301 dog 4 

1302 cat 4 

1303 monkey 2 

1304 >>> df.index = pd.MultiIndex.from_product( 

1305 ... [["mammal"], ['dog', 'cat', 'monkey']]) 

1306 >>> df._set_axis_name(["type", "name"]) 

1307 num_legs 

1308 type name 

1309 mammal dog 4 

1310 cat 4 

1311 monkey 2 

1312 """ 

1313 axis = self._get_axis_number(axis) 

1314 idx = self._get_axis(axis).set_names(name) 

1315 

1316 inplace = validate_bool_kwarg(inplace, "inplace") 

1317 renamed = self if inplace else self.copy(deep=copy) 

1318 if axis == 0: 

1319 renamed.index = idx 

1320 else: 

1321 renamed.columns = idx 

1322 

1323 if not inplace: 

1324 return renamed 

1325 

1326 # ---------------------------------------------------------------------- 

1327 # Comparison Methods 

1328 

1329 @final 

1330 def _indexed_same(self, other) -> bool_t: 

1331 return all( 

1332 self._get_axis(a).equals(other._get_axis(a)) for a in self._AXIS_ORDERS 

1333 ) 

1334 

1335 @final 

1336 def equals(self, other: object) -> bool_t: 

1337 """ 

1338 Test whether two objects contain the same elements. 

1339 

1340 This function allows two Series or DataFrames to be compared against 

1341 each other to see if they have the same shape and elements. NaNs in 

1342 the same location are considered equal. 

1343 

1344 The row/column index do not need to have the same type, as long 

1345 as the values are considered equal. Corresponding columns must be of 

1346 the same dtype. 

1347 

1348 Parameters 

1349 ---------- 

1350 other : Series or DataFrame 

1351 The other Series or DataFrame to be compared with the first. 

1352 

1353 Returns 

1354 ------- 

1355 bool 

1356 True if all elements are the same in both objects, False 

1357 otherwise. 

1358 

1359 See Also 

1360 -------- 

1361 Series.eq : Compare two Series objects of the same length 

1362 and return a Series where each element is True if the element 

1363 in each Series is equal, False otherwise. 

1364 DataFrame.eq : Compare two DataFrame objects of the same shape and 

1365 return a DataFrame where each element is True if the respective 

1366 element in each DataFrame is equal, False otherwise. 

1367 testing.assert_series_equal : Raises an AssertionError if left and 

1368 right are not equal. Provides an easy interface to ignore 

1369 inequality in dtypes, indexes and precision among others. 

1370 testing.assert_frame_equal : Like assert_series_equal, but targets 

1371 DataFrames. 

1372 numpy.array_equal : Return True if two arrays have the same shape 

1373 and elements, False otherwise. 

1374 

1375 Examples 

1376 -------- 

1377 >>> df = pd.DataFrame({1: [10], 2: [20]}) 

1378 >>> df 

1379 1 2 

1380 0 10 20 

1381 

1382 DataFrames df and exactly_equal have the same types and values for 

1383 their elements and column labels, which will return True. 

1384 

1385 >>> exactly_equal = pd.DataFrame({1: [10], 2: [20]}) 

1386 >>> exactly_equal 

1387 1 2 

1388 0 10 20 

1389 >>> df.equals(exactly_equal) 

1390 True 

1391 

1392 DataFrames df and different_column_type have the same element 

1393 types and values, but have different types for the column labels, 

1394 which will still return True. 

1395 

1396 >>> different_column_type = pd.DataFrame({1.0: [10], 2.0: [20]}) 

1397 >>> different_column_type 

1398 1.0 2.0 

1399 0 10 20 

1400 >>> df.equals(different_column_type) 

1401 True 

1402 

1403 DataFrames df and different_data_type have different types for the 

1404 same values for their elements, and will return False even though 

1405 their column labels are the same values and types. 

1406 

1407 >>> different_data_type = pd.DataFrame({1: [10.0], 2: [20.0]}) 

1408 >>> different_data_type 

1409 1 2 

1410 0 10.0 20.0 

1411 >>> df.equals(different_data_type) 

1412 False 

1413 """ 

1414 if not (isinstance(other, type(self)) or isinstance(self, type(other))): 

1415 return False 

1416 other = cast(NDFrame, other) 

1417 return self._mgr.equals(other._mgr) 

1418 

1419 # ------------------------------------------------------------------------- 

1420 # Unary Methods 

1421 

1422 @final 

1423 def __neg__(self: NDFrameT) -> NDFrameT: 

1424 def blk_func(values: ArrayLike): 

1425 if is_bool_dtype(values.dtype): 

1426 # error: Argument 1 to "inv" has incompatible type "Union 

1427 # [ExtensionArray, ndarray[Any, Any]]"; expected 

1428 # "_SupportsInversion[ndarray[Any, dtype[bool_]]]" 

1429 return operator.inv(values) # type: ignore[arg-type] 

1430 else: 

1431 # error: Argument 1 to "neg" has incompatible type "Union 

1432 # [ExtensionArray, ndarray[Any, Any]]"; expected 

1433 # "_SupportsNeg[ndarray[Any, dtype[Any]]]" 

1434 return operator.neg(values) # type: ignore[arg-type] 

1435 

1436 new_data = self._mgr.apply(blk_func) 

1437 res = self._constructor(new_data) 

1438 return res.__finalize__(self, method="__neg__") 

1439 

1440 @final 

1441 def __pos__(self: NDFrameT) -> NDFrameT: 

1442 def blk_func(values: ArrayLike): 

1443 if is_bool_dtype(values.dtype): 

1444 return values.copy() 

1445 else: 

1446 # error: Argument 1 to "pos" has incompatible type "Union 

1447 # [ExtensionArray, ndarray[Any, Any]]"; expected 

1448 # "_SupportsPos[ndarray[Any, dtype[Any]]]" 

1449 return operator.pos(values) # type: ignore[arg-type] 

1450 

1451 new_data = self._mgr.apply(blk_func) 

1452 res = self._constructor(new_data) 

1453 return res.__finalize__(self, method="__pos__") 

1454 

1455 @final 

1456 def __invert__(self: NDFrameT) -> NDFrameT: 

1457 if not self.size: 

1458 # inv fails with 0 len 

1459 return self.copy(deep=False) 

1460 

1461 new_data = self._mgr.apply(operator.invert) 

1462 return self._constructor(new_data).__finalize__(self, method="__invert__") 

1463 

1464 @final 

1465 def __nonzero__(self) -> NoReturn: 

1466 raise ValueError( 

1467 f"The truth value of a {type(self).__name__} is ambiguous. " 

1468 "Use a.empty, a.bool(), a.item(), a.any() or a.all()." 

1469 ) 

1470 

1471 __bool__ = __nonzero__ 

1472 

1473 @final 

1474 def bool(self) -> bool_t: 

1475 """ 

1476 Return the bool of a single element Series or DataFrame. 

1477 

1478 This must be a boolean scalar value, either True or False. It will raise a 

1479 ValueError if the Series or DataFrame does not have exactly 1 element, or that 

1480 element is not boolean (integer values 0 and 1 will also raise an exception). 

1481 

1482 Returns 

1483 ------- 

1484 bool 

1485 The value in the Series or DataFrame. 

1486 

1487 See Also 

1488 -------- 

1489 Series.astype : Change the data type of a Series, including to boolean. 

1490 DataFrame.astype : Change the data type of a DataFrame, including to boolean. 

1491 numpy.bool_ : NumPy boolean data type, used by pandas for boolean values. 

1492 

1493 Examples 

1494 -------- 

1495 The method will only work for single element objects with a boolean value: 

1496 

1497 >>> pd.Series([True]).bool() 

1498 True 

1499 >>> pd.Series([False]).bool() 

1500 False 

1501 

1502 >>> pd.DataFrame({'col': [True]}).bool() 

1503 True 

1504 >>> pd.DataFrame({'col': [False]}).bool() 

1505 False 

1506 """ 

1507 v = self.squeeze() 

1508 if isinstance(v, (bool, np.bool_)): 

1509 return bool(v) 

1510 elif is_scalar(v): 

1511 raise ValueError( 

1512 "bool cannot act on a non-boolean single element " 

1513 f"{type(self).__name__}" 

1514 ) 

1515 

1516 self.__nonzero__() 

1517 # for mypy (__nonzero__ raises) 

1518 return True 

1519 

1520 @final 

1521 def abs(self: NDFrameT) -> NDFrameT: 

1522 """ 

1523 Return a Series/DataFrame with absolute numeric value of each element. 

1524 

1525 This function only applies to elements that are all numeric. 

1526 

1527 Returns 

1528 ------- 

1529 abs 

1530 Series/DataFrame containing the absolute value of each element. 

1531 

1532 See Also 

1533 -------- 

1534 numpy.absolute : Calculate the absolute value element-wise. 

1535 

1536 Notes 

1537 ----- 

1538 For ``complex`` inputs, ``1.2 + 1j``, the absolute value is 

1539 :math:`\\sqrt{ a^2 + b^2 }`. 

1540 

1541 Examples 

1542 -------- 

1543 Absolute numeric values in a Series. 

1544 

1545 >>> s = pd.Series([-1.10, 2, -3.33, 4]) 

1546 >>> s.abs() 

1547 0 1.10 

1548 1 2.00 

1549 2 3.33 

1550 3 4.00 

1551 dtype: float64 

1552 

1553 Absolute numeric values in a Series with complex numbers. 

1554 

1555 >>> s = pd.Series([1.2 + 1j]) 

1556 >>> s.abs() 

1557 0 1.56205 

1558 dtype: float64 

1559 

1560 Absolute numeric values in a Series with a Timedelta element. 

1561 

1562 >>> s = pd.Series([pd.Timedelta('1 days')]) 

1563 >>> s.abs() 

1564 0 1 days 

1565 dtype: timedelta64[ns] 

1566 

1567 Select rows with data closest to certain value using argsort (from 

1568 `StackOverflow <https://stackoverflow.com/a/17758115>`__). 

1569 

1570 >>> df = pd.DataFrame({ 

1571 ... 'a': [4, 5, 6, 7], 

1572 ... 'b': [10, 20, 30, 40], 

1573 ... 'c': [100, 50, -30, -50] 

1574 ... }) 

1575 >>> df 

1576 a b c 

1577 0 4 10 100 

1578 1 5 20 50 

1579 2 6 30 -30 

1580 3 7 40 -50 

1581 >>> df.loc[(df.c - 43).abs().argsort()] 

1582 a b c 

1583 1 5 20 50 

1584 0 4 10 100 

1585 2 6 30 -30 

1586 3 7 40 -50 

1587 """ 

1588 res_mgr = self._mgr.apply(np.abs) 

1589 return self._constructor(res_mgr).__finalize__(self, name="abs") 

1590 

1591 @final 

1592 def __abs__(self: NDFrameT) -> NDFrameT: 

1593 return self.abs() 

1594 

1595 @final 

1596 def __round__(self: NDFrameT, decimals: int = 0) -> NDFrameT: 

1597 return self.round(decimals).__finalize__(self, method="__round__") 

1598 

1599 # ------------------------------------------------------------------------- 

1600 # Label or Level Combination Helpers 

1601 # 

1602 # A collection of helper methods for DataFrame/Series operations that 

1603 # accept a combination of column/index labels and levels. All such 

1604 # operations should utilize/extend these methods when possible so that we 

1605 # have consistent precedence and validation logic throughout the library. 

1606 

1607 @final 

1608 def _is_level_reference(self, key: Level, axis: Axis = 0) -> bool_t: 

1609 """ 

1610 Test whether a key is a level reference for a given axis. 

1611 

1612 To be considered a level reference, `key` must be a string that: 

1613 - (axis=0): Matches the name of an index level and does NOT match 

1614 a column label. 

1615 - (axis=1): Matches the name of a column level and does NOT match 

1616 an index label. 

1617 

1618 Parameters 

1619 ---------- 

1620 key : Hashable 

1621 Potential level name for the given axis 

1622 axis : int, default 0 

1623 Axis that levels are associated with (0 for index, 1 for columns) 

1624 

1625 Returns 

1626 ------- 

1627 is_level : bool 

1628 """ 

1629 axis_int = self._get_axis_number(axis) 

1630 

1631 return ( 

1632 key is not None 

1633 and is_hashable(key) 

1634 and key in self.axes[axis_int].names 

1635 and not self._is_label_reference(key, axis=axis_int) 

1636 ) 

1637 

1638 @final 

1639 def _is_label_reference(self, key: Level, axis: Axis = 0) -> bool_t: 

1640 """ 

1641 Test whether a key is a label reference for a given axis. 

1642 

1643 To be considered a label reference, `key` must be a string that: 

1644 - (axis=0): Matches a column label 

1645 - (axis=1): Matches an index label 

1646 

1647 Parameters 

1648 ---------- 

1649 key : Hashable 

1650 Potential label name, i.e. Index entry. 

1651 axis : int, default 0 

1652 Axis perpendicular to the axis that labels are associated with 

1653 (0 means search for column labels, 1 means search for index labels) 

1654 

1655 Returns 

1656 ------- 

1657 is_label: bool 

1658 """ 

1659 axis_int = self._get_axis_number(axis) 

1660 other_axes = (ax for ax in range(self._AXIS_LEN) if ax != axis_int) 

1661 

1662 return ( 

1663 key is not None 

1664 and is_hashable(key) 

1665 and any(key in self.axes[ax] for ax in other_axes) 

1666 ) 

1667 

1668 @final 

1669 def _is_label_or_level_reference(self, key: Level, axis: AxisInt = 0) -> bool_t: 

1670 """ 

1671 Test whether a key is a label or level reference for a given axis. 

1672 

1673 To be considered either a label or a level reference, `key` must be a 

1674 string that: 

1675 - (axis=0): Matches a column label or an index level 

1676 - (axis=1): Matches an index label or a column level 

1677 

1678 Parameters 

1679 ---------- 

1680 key : Hashable 

1681 Potential label or level name 

1682 axis : int, default 0 

1683 Axis that levels are associated with (0 for index, 1 for columns) 

1684 

1685 Returns 

1686 ------- 

1687 bool 

1688 """ 

1689 return self._is_level_reference(key, axis=axis) or self._is_label_reference( 

1690 key, axis=axis 

1691 ) 

1692 

1693 @final 

1694 def _check_label_or_level_ambiguity(self, key: Level, axis: Axis = 0) -> None: 

1695 """ 

1696 Check whether `key` is ambiguous. 

1697 

1698 By ambiguous, we mean that it matches both a level of the input 

1699 `axis` and a label of the other axis. 

1700 

1701 Parameters 

1702 ---------- 

1703 key : Hashable 

1704 Label or level name. 

1705 axis : int, default 0 

1706 Axis that levels are associated with (0 for index, 1 for columns). 

1707 

1708 Raises 

1709 ------ 

1710 ValueError: `key` is ambiguous 

1711 """ 

1712 

1713 axis_int = self._get_axis_number(axis) 

1714 other_axes = (ax for ax in range(self._AXIS_LEN) if ax != axis_int) 

1715 

1716 if ( 

1717 key is not None 

1718 and is_hashable(key) 

1719 and key in self.axes[axis_int].names 

1720 and any(key in self.axes[ax] for ax in other_axes) 

1721 ): 

1722 # Build an informative and grammatical warning 

1723 level_article, level_type = ( 

1724 ("an", "index") if axis_int == 0 else ("a", "column") 

1725 ) 

1726 

1727 label_article, label_type = ( 

1728 ("a", "column") if axis_int == 0 else ("an", "index") 

1729 ) 

1730 

1731 msg = ( 

1732 f"'{key}' is both {level_article} {level_type} level and " 

1733 f"{label_article} {label_type} label, which is ambiguous." 

1734 ) 

1735 raise ValueError(msg) 

1736 

1737 @final 

1738 def _get_label_or_level_values(self, key: Level, axis: AxisInt = 0) -> ArrayLike: 

1739 """ 

1740 Return a 1-D array of values associated with `key`, a label or level 

1741 from the given `axis`. 

1742 

1743 Retrieval logic: 

1744 - (axis=0): Return column values if `key` matches a column label. 

1745 Otherwise return index level values if `key` matches an index 

1746 level. 

1747 - (axis=1): Return row values if `key` matches an index label. 

1748 Otherwise return column level values if 'key' matches a column 

1749 level 

1750 

1751 Parameters 

1752 ---------- 

1753 key : Hashable 

1754 Label or level name. 

1755 axis : int, default 0 

1756 Axis that levels are associated with (0 for index, 1 for columns) 

1757 

1758 Returns 

1759 ------- 

1760 np.ndarray or ExtensionArray 

1761 

1762 Raises 

1763 ------ 

1764 KeyError 

1765 if `key` matches neither a label nor a level 

1766 ValueError 

1767 if `key` matches multiple labels 

1768 """ 

1769 axis = self._get_axis_number(axis) 

1770 other_axes = [ax for ax in range(self._AXIS_LEN) if ax != axis] 

1771 

1772 if self._is_label_reference(key, axis=axis): 

1773 self._check_label_or_level_ambiguity(key, axis=axis) 

1774 values = self.xs(key, axis=other_axes[0])._values 

1775 elif self._is_level_reference(key, axis=axis): 

1776 values = self.axes[axis].get_level_values(key)._values 

1777 else: 

1778 raise KeyError(key) 

1779 

1780 # Check for duplicates 

1781 if values.ndim > 1: 

1782 if other_axes and isinstance(self._get_axis(other_axes[0]), MultiIndex): 

1783 multi_message = ( 

1784 "\n" 

1785 "For a multi-index, the label must be a " 

1786 "tuple with elements corresponding to each level." 

1787 ) 

1788 else: 

1789 multi_message = "" 

1790 

1791 label_axis_name = "column" if axis == 0 else "index" 

1792 raise ValueError( 

1793 f"The {label_axis_name} label '{key}' is not unique.{multi_message}" 

1794 ) 

1795 

1796 return values 

1797 

1798 @final 

1799 def _drop_labels_or_levels(self, keys, axis: AxisInt = 0): 

1800 """ 

1801 Drop labels and/or levels for the given `axis`. 

1802 

1803 For each key in `keys`: 

1804 - (axis=0): If key matches a column label then drop the column. 

1805 Otherwise if key matches an index level then drop the level. 

1806 - (axis=1): If key matches an index label then drop the row. 

1807 Otherwise if key matches a column level then drop the level. 

1808 

1809 Parameters 

1810 ---------- 

1811 keys : str or list of str 

1812 labels or levels to drop 

1813 axis : int, default 0 

1814 Axis that levels are associated with (0 for index, 1 for columns) 

1815 

1816 Returns 

1817 ------- 

1818 dropped: DataFrame 

1819 

1820 Raises 

1821 ------ 

1822 ValueError 

1823 if any `keys` match neither a label nor a level 

1824 """ 

1825 axis = self._get_axis_number(axis) 

1826 

1827 # Validate keys 

1828 keys = common.maybe_make_list(keys) 

1829 invalid_keys = [ 

1830 k for k in keys if not self._is_label_or_level_reference(k, axis=axis) 

1831 ] 

1832 

1833 if invalid_keys: 

1834 raise ValueError( 

1835 "The following keys are not valid labels or " 

1836 f"levels for axis {axis}: {invalid_keys}" 

1837 ) 

1838 

1839 # Compute levels and labels to drop 

1840 levels_to_drop = [k for k in keys if self._is_level_reference(k, axis=axis)] 

1841 

1842 labels_to_drop = [k for k in keys if not self._is_level_reference(k, axis=axis)] 

1843 

1844 # Perform copy upfront and then use inplace operations below. 

1845 # This ensures that we always perform exactly one copy. 

1846 # ``copy`` and/or ``inplace`` options could be added in the future. 

1847 dropped = self.copy(deep=False) 

1848 

1849 if axis == 0: 

1850 # Handle dropping index levels 

1851 if levels_to_drop: 

1852 dropped.reset_index(levels_to_drop, drop=True, inplace=True) 

1853 

1854 # Handle dropping columns labels 

1855 if labels_to_drop: 

1856 dropped.drop(labels_to_drop, axis=1, inplace=True) 

1857 else: 

1858 # Handle dropping column levels 

1859 if levels_to_drop: 

1860 if isinstance(dropped.columns, MultiIndex): 

1861 # Drop the specified levels from the MultiIndex 

1862 dropped.columns = dropped.columns.droplevel(levels_to_drop) 

1863 else: 

1864 # Drop the last level of Index by replacing with 

1865 # a RangeIndex 

1866 dropped.columns = RangeIndex(dropped.columns.size) 

1867 

1868 # Handle dropping index labels 

1869 if labels_to_drop: 

1870 dropped.drop(labels_to_drop, axis=0, inplace=True) 

1871 

1872 return dropped 

1873 

1874 # ---------------------------------------------------------------------- 

1875 # Iteration 

1876 

1877 # https://github.com/python/typeshed/issues/2148#issuecomment-520783318 

1878 # Incompatible types in assignment (expression has type "None", base class 

1879 # "object" defined the type as "Callable[[object], int]") 

1880 __hash__: ClassVar[None] # type: ignore[assignment] 

1881 

1882 def __iter__(self) -> Iterator: 

1883 """ 

1884 Iterate over info axis. 

1885 

1886 Returns 

1887 ------- 

1888 iterator 

1889 Info axis as iterator. 

1890 """ 

1891 return iter(self._info_axis) 

1892 

1893 # can we get a better explanation of this? 

1894 def keys(self) -> Index: 

1895 """ 

1896 Get the 'info axis' (see Indexing for more). 

1897 

1898 This is index for Series, columns for DataFrame. 

1899 

1900 Returns 

1901 ------- 

1902 Index 

1903 Info axis. 

1904 """ 

1905 return self._info_axis 

1906 

1907 def items(self): 

1908 """ 

1909 Iterate over (label, values) on info axis 

1910 

1911 This is index for Series and columns for DataFrame. 

1912 

1913 Returns 

1914 ------- 

1915 Generator 

1916 """ 

1917 for h in self._info_axis: 

1918 yield h, self[h] 

1919 

1920 def __len__(self) -> int: 

1921 """Returns length of info axis""" 

1922 return len(self._info_axis) 

1923 

1924 @final 

1925 def __contains__(self, key) -> bool_t: 

1926 """True if the key is in the info axis""" 

1927 return key in self._info_axis 

1928 

1929 @property 

1930 def empty(self) -> bool_t: 

1931 """ 

1932 Indicator whether Series/DataFrame is empty. 

1933 

1934 True if Series/DataFrame is entirely empty (no items), meaning any of the 

1935 axes are of length 0. 

1936 

1937 Returns 

1938 ------- 

1939 bool 

1940 If Series/DataFrame is empty, return True, if not return False. 

1941 

1942 See Also 

1943 -------- 

1944 Series.dropna : Return series without null values. 

1945 DataFrame.dropna : Return DataFrame with labels on given axis omitted 

1946 where (all or any) data are missing. 

1947 

1948 Notes 

1949 ----- 

1950 If Series/DataFrame contains only NaNs, it is still not considered empty. See 

1951 the example below. 

1952 

1953 Examples 

1954 -------- 

1955 An example of an actual empty DataFrame. Notice the index is empty: 

1956 

1957 >>> df_empty = pd.DataFrame({'A' : []}) 

1958 >>> df_empty 

1959 Empty DataFrame 

1960 Columns: [A] 

1961 Index: [] 

1962 >>> df_empty.empty 

1963 True 

1964 

1965 If we only have NaNs in our DataFrame, it is not considered empty! We 

1966 will need to drop the NaNs to make the DataFrame empty: 

1967 

1968 >>> df = pd.DataFrame({'A' : [np.nan]}) 

1969 >>> df 

1970 A 

1971 0 NaN 

1972 >>> df.empty 

1973 False 

1974 >>> df.dropna().empty 

1975 True 

1976 

1977 >>> ser_empty = pd.Series({'A' : []}) 

1978 >>> ser_empty 

1979 A [] 

1980 dtype: object 

1981 >>> ser_empty.empty 

1982 False 

1983 >>> ser_empty = pd.Series() 

1984 >>> ser_empty.empty 

1985 True 

1986 """ 

1987 return any(len(self._get_axis(a)) == 0 for a in self._AXIS_ORDERS) 

1988 

1989 # ---------------------------------------------------------------------- 

1990 # Array Interface 

1991 

1992 # This is also set in IndexOpsMixin 

1993 # GH#23114 Ensure ndarray.__op__(DataFrame) returns NotImplemented 

1994 __array_priority__: int = 1000 

1995 

1996 def __array__(self, dtype: npt.DTypeLike | None = None) -> np.ndarray: 

1997 values = self._values 

1998 arr = np.asarray(values, dtype=dtype) 

1999 if ( 

2000 astype_is_view(values.dtype, arr.dtype) 

2001 and using_copy_on_write() 

2002 and self._mgr.is_single_block 

2003 ): 

2004 # Check if both conversions can be done without a copy 

2005 if astype_is_view(self.dtypes.iloc[0], values.dtype) and astype_is_view( 

2006 values.dtype, arr.dtype 

2007 ): 

2008 arr = arr.view() 

2009 arr.flags.writeable = False 

2010 return arr 

2011 

2012 @final 

2013 def __array_ufunc__( 

2014 self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any 

2015 ): 

2016 return arraylike.array_ufunc(self, ufunc, method, *inputs, **kwargs) 

2017 

2018 # ---------------------------------------------------------------------- 

2019 # Picklability 

2020 

2021 @final 

2022 def __getstate__(self) -> dict[str, Any]: 

2023 meta = {k: getattr(self, k, None) for k in self._metadata} 

2024 return { 

2025 "_mgr": self._mgr, 

2026 "_typ": self._typ, 

2027 "_metadata": self._metadata, 

2028 "attrs": self.attrs, 

2029 "_flags": {k: self.flags[k] for k in self.flags._keys}, 

2030 **meta, 

2031 } 

2032 

2033 @final 

2034 def __setstate__(self, state) -> None: 

2035 if isinstance(state, BlockManager): 

2036 self._mgr = state 

2037 elif isinstance(state, dict): 

2038 if "_data" in state and "_mgr" not in state: 

2039 # compat for older pickles 

2040 state["_mgr"] = state.pop("_data") 

2041 typ = state.get("_typ") 

2042 if typ is not None: 

2043 attrs = state.get("_attrs", {}) 

2044 object.__setattr__(self, "_attrs", attrs) 

2045 flags = state.get("_flags", {"allows_duplicate_labels": True}) 

2046 object.__setattr__(self, "_flags", Flags(self, **flags)) 

2047 

2048 # set in the order of internal names 

2049 # to avoid definitional recursion 

2050 # e.g. say fill_value needing _mgr to be 

2051 # defined 

2052 meta = set(self._internal_names + self._metadata) 

2053 for k in list(meta): 

2054 if k in state and k != "_flags": 

2055 v = state[k] 

2056 object.__setattr__(self, k, v) 

2057 

2058 for k, v in state.items(): 

2059 if k not in meta: 

2060 object.__setattr__(self, k, v) 

2061 

2062 else: 

2063 raise NotImplementedError("Pre-0.12 pickles are no longer supported") 

2064 elif len(state) == 2: 

2065 raise NotImplementedError("Pre-0.12 pickles are no longer supported") 

2066 

2067 self._item_cache: dict[Hashable, Series] = {} 

2068 

2069 # ---------------------------------------------------------------------- 

2070 # Rendering Methods 

2071 

2072 def __repr__(self) -> str: 

2073 # string representation based upon iterating over self 

2074 # (since, by definition, `PandasContainers` are iterable) 

2075 prepr = f"[{','.join(map(pprint_thing, self))}]" 

2076 return f"{type(self).__name__}({prepr})" 

2077 

2078 @final 

2079 def _repr_latex_(self): 

2080 """ 

2081 Returns a LaTeX representation for a particular object. 

2082 Mainly for use with nbconvert (jupyter notebook conversion to pdf). 

2083 """ 

2084 if config.get_option("styler.render.repr") == "latex": 

2085 return self.to_latex() 

2086 else: 

2087 return None 

2088 

2089 @final 

2090 def _repr_data_resource_(self): 

2091 """ 

2092 Not a real Jupyter special repr method, but we use the same 

2093 naming convention. 

2094 """ 

2095 if config.get_option("display.html.table_schema"): 

2096 data = self.head(config.get_option("display.max_rows")) 

2097 

2098 as_json = data.to_json(orient="table") 

2099 as_json = cast(str, as_json) 

2100 return loads(as_json, object_pairs_hook=collections.OrderedDict) 

2101 

2102 # ---------------------------------------------------------------------- 

2103 # I/O Methods 

2104 

2105 @final 

2106 @doc( 

2107 klass="object", 

2108 storage_options=_shared_docs["storage_options"], 

2109 storage_options_versionadded="1.2.0", 

2110 ) 

2111 def to_excel( 

2112 self, 

2113 excel_writer, 

2114 sheet_name: str = "Sheet1", 

2115 na_rep: str = "", 

2116 float_format: str | None = None, 

2117 columns: Sequence[Hashable] | None = None, 

2118 header: Sequence[Hashable] | bool_t = True, 

2119 index: bool_t = True, 

2120 index_label: IndexLabel = None, 

2121 startrow: int = 0, 

2122 startcol: int = 0, 

2123 engine: str | None = None, 

2124 merge_cells: bool_t = True, 

2125 inf_rep: str = "inf", 

2126 freeze_panes: tuple[int, int] | None = None, 

2127 storage_options: StorageOptions = None, 

2128 ) -> None: 

2129 """ 

2130 Write {klass} to an Excel sheet. 

2131 

2132 To write a single {klass} to an Excel .xlsx file it is only necessary to 

2133 specify a target file name. To write to multiple sheets it is necessary to 

2134 create an `ExcelWriter` object with a target file name, and specify a sheet 

2135 in the file to write to. 

2136 

2137 Multiple sheets may be written to by specifying unique `sheet_name`. 

2138 With all data written to the file it is necessary to save the changes. 

2139 Note that creating an `ExcelWriter` object with a file name that already 

2140 exists will result in the contents of the existing file being erased. 

2141 

2142 Parameters 

2143 ---------- 

2144 excel_writer : path-like, file-like, or ExcelWriter object 

2145 File path or existing ExcelWriter. 

2146 sheet_name : str, default 'Sheet1' 

2147 Name of sheet which will contain DataFrame. 

2148 na_rep : str, default '' 

2149 Missing data representation. 

2150 float_format : str, optional 

2151 Format string for floating point numbers. For example 

2152 ``float_format="%.2f"`` will format 0.1234 to 0.12. 

2153 columns : sequence or list of str, optional 

2154 Columns to write. 

2155 header : bool or list of str, default True 

2156 Write out the column names. If a list of string is given it is 

2157 assumed to be aliases for the column names. 

2158 index : bool, default True 

2159 Write row names (index). 

2160 index_label : str or sequence, optional 

2161 Column label for index column(s) if desired. If not specified, and 

2162 `header` and `index` are True, then the index names are used. A 

2163 sequence should be given if the DataFrame uses MultiIndex. 

2164 startrow : int, default 0 

2165 Upper left cell row to dump data frame. 

2166 startcol : int, default 0 

2167 Upper left cell column to dump data frame. 

2168 engine : str, optional 

2169 Write engine to use, 'openpyxl' or 'xlsxwriter'. You can also set this 

2170 via the options ``io.excel.xlsx.writer`` or 

2171 ``io.excel.xlsm.writer``. 

2172 

2173 merge_cells : bool, default True 

2174 Write MultiIndex and Hierarchical Rows as merged cells. 

2175 inf_rep : str, default 'inf' 

2176 Representation for infinity (there is no native representation for 

2177 infinity in Excel). 

2178 freeze_panes : tuple of int (length 2), optional 

2179 Specifies the one-based bottommost row and rightmost column that 

2180 is to be frozen. 

2181 {storage_options} 

2182 

2183 .. versionadded:: {storage_options_versionadded} 

2184 

2185 See Also 

2186 -------- 

2187 to_csv : Write DataFrame to a comma-separated values (csv) file. 

2188 ExcelWriter : Class for writing DataFrame objects into excel sheets. 

2189 read_excel : Read an Excel file into a pandas DataFrame. 

2190 read_csv : Read a comma-separated values (csv) file into DataFrame. 

2191 io.formats.style.Styler.to_excel : Add styles to Excel sheet. 

2192 

2193 Notes 

2194 ----- 

2195 For compatibility with :meth:`~DataFrame.to_csv`, 

2196 to_excel serializes lists and dicts to strings before writing. 

2197 

2198 Once a workbook has been saved it is not possible to write further 

2199 data without rewriting the whole workbook. 

2200 

2201 Examples 

2202 -------- 

2203 

2204 Create, write to and save a workbook: 

2205 

2206 >>> df1 = pd.DataFrame([['a', 'b'], ['c', 'd']], 

2207 ... index=['row 1', 'row 2'], 

2208 ... columns=['col 1', 'col 2']) 

2209 >>> df1.to_excel("output.xlsx") # doctest: +SKIP 

2210 

2211 To specify the sheet name: 

2212 

2213 >>> df1.to_excel("output.xlsx", 

2214 ... sheet_name='Sheet_name_1') # doctest: +SKIP 

2215 

2216 If you wish to write to more than one sheet in the workbook, it is 

2217 necessary to specify an ExcelWriter object: 

2218 

2219 >>> df2 = df1.copy() 

2220 >>> with pd.ExcelWriter('output.xlsx') as writer: # doctest: +SKIP 

2221 ... df1.to_excel(writer, sheet_name='Sheet_name_1') 

2222 ... df2.to_excel(writer, sheet_name='Sheet_name_2') 

2223 

2224 ExcelWriter can also be used to append to an existing Excel file: 

2225 

2226 >>> with pd.ExcelWriter('output.xlsx', 

2227 ... mode='a') as writer: # doctest: +SKIP 

2228 ... df.to_excel(writer, sheet_name='Sheet_name_3') 

2229 

2230 To set the library that is used to write the Excel file, 

2231 you can pass the `engine` keyword (the default engine is 

2232 automatically chosen depending on the file extension): 

2233 

2234 >>> df1.to_excel('output1.xlsx', engine='xlsxwriter') # doctest: +SKIP 

2235 """ 

2236 

2237 df = self if isinstance(self, ABCDataFrame) else self.to_frame() 

2238 

2239 from pandas.io.formats.excel import ExcelFormatter 

2240 

2241 formatter = ExcelFormatter( 

2242 df, 

2243 na_rep=na_rep, 

2244 cols=columns, 

2245 header=header, 

2246 float_format=float_format, 

2247 index=index, 

2248 index_label=index_label, 

2249 merge_cells=merge_cells, 

2250 inf_rep=inf_rep, 

2251 ) 

2252 formatter.write( 

2253 excel_writer, 

2254 sheet_name=sheet_name, 

2255 startrow=startrow, 

2256 startcol=startcol, 

2257 freeze_panes=freeze_panes, 

2258 engine=engine, 

2259 storage_options=storage_options, 

2260 ) 

2261 

2262 @final 

2263 @doc( 

2264 storage_options=_shared_docs["storage_options"], 

2265 compression_options=_shared_docs["compression_options"] % "path_or_buf", 

2266 ) 

2267 def to_json( 

2268 self, 

2269 path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None, 

2270 orient: str | None = None, 

2271 date_format: str | None = None, 

2272 double_precision: int = 10, 

2273 force_ascii: bool_t = True, 

2274 date_unit: str = "ms", 

2275 default_handler: Callable[[Any], JSONSerializable] | None = None, 

2276 lines: bool_t = False, 

2277 compression: CompressionOptions = "infer", 

2278 index: bool_t = True, 

2279 indent: int | None = None, 

2280 storage_options: StorageOptions = None, 

2281 mode: Literal["a", "w"] = "w", 

2282 ) -> str | None: 

2283 """ 

2284 Convert the object to a JSON string. 

2285 

2286 Note NaN's and None will be converted to null and datetime objects 

2287 will be converted to UNIX timestamps. 

2288 

2289 Parameters 

2290 ---------- 

2291 path_or_buf : str, path object, file-like object, or None, default None 

2292 String, path object (implementing os.PathLike[str]), or file-like 

2293 object implementing a write() function. If None, the result is 

2294 returned as a string. 

2295 orient : str 

2296 Indication of expected JSON string format. 

2297 

2298 * Series: 

2299 

2300 - default is 'index' 

2301 - allowed values are: {{'split', 'records', 'index', 'table'}}. 

2302 

2303 * DataFrame: 

2304 

2305 - default is 'columns' 

2306 - allowed values are: {{'split', 'records', 'index', 'columns', 

2307 'values', 'table'}}. 

2308 

2309 * The format of the JSON string: 

2310 

2311 - 'split' : dict like {{'index' -> [index], 'columns' -> [columns], 

2312 'data' -> [values]}} 

2313 - 'records' : list like [{{column -> value}}, ... , {{column -> value}}] 

2314 - 'index' : dict like {{index -> {{column -> value}}}} 

2315 - 'columns' : dict like {{column -> {{index -> value}}}} 

2316 - 'values' : just the values array 

2317 - 'table' : dict like {{'schema': {{schema}}, 'data': {{data}}}} 

2318 

2319 Describing the data, where data component is like ``orient='records'``. 

2320 

2321 date_format : {{None, 'epoch', 'iso'}} 

2322 Type of date conversion. 'epoch' = epoch milliseconds, 

2323 'iso' = ISO8601. The default depends on the `orient`. For 

2324 ``orient='table'``, the default is 'iso'. For all other orients, 

2325 the default is 'epoch'. 

2326 double_precision : int, default 10 

2327 The number of decimal places to use when encoding 

2328 floating point values. 

2329 force_ascii : bool, default True 

2330 Force encoded string to be ASCII. 

2331 date_unit : str, default 'ms' (milliseconds) 

2332 The time unit to encode to, governs timestamp and ISO8601 

2333 precision. One of 's', 'ms', 'us', 'ns' for second, millisecond, 

2334 microsecond, and nanosecond respectively. 

2335 default_handler : callable, default None 

2336 Handler to call if object cannot otherwise be converted to a 

2337 suitable format for JSON. Should receive a single argument which is 

2338 the object to convert and return a serialisable object. 

2339 lines : bool, default False 

2340 If 'orient' is 'records' write out line-delimited json format. Will 

2341 throw ValueError if incorrect 'orient' since others are not 

2342 list-like. 

2343 {compression_options} 

2344 

2345 .. versionchanged:: 1.4.0 Zstandard support. 

2346 

2347 index : bool, default True 

2348 Whether to include the index values in the JSON string. Not 

2349 including the index (``index=False``) is only supported when 

2350 orient is 'split' or 'table'. 

2351 indent : int, optional 

2352 Length of whitespace used to indent each record. 

2353 

2354 {storage_options} 

2355 

2356 .. versionadded:: 1.2.0 

2357 

2358 mode : str, default 'w' (writing) 

2359 Specify the IO mode for output when supplying a path_or_buf. 

2360 Accepted args are 'w' (writing) and 'a' (append) only. 

2361 mode='a' is only supported when lines is True and orient is 'records'. 

2362 

2363 Returns 

2364 ------- 

2365 None or str 

2366 If path_or_buf is None, returns the resulting json format as a 

2367 string. Otherwise returns None. 

2368 

2369 See Also 

2370 -------- 

2371 read_json : Convert a JSON string to pandas object. 

2372 

2373 Notes 

2374 ----- 

2375 The behavior of ``indent=0`` varies from the stdlib, which does not 

2376 indent the output but does insert newlines. Currently, ``indent=0`` 

2377 and the default ``indent=None`` are equivalent in pandas, though this 

2378 may change in a future release. 

2379 

2380 ``orient='table'`` contains a 'pandas_version' field under 'schema'. 

2381 This stores the version of `pandas` used in the latest revision of the 

2382 schema. 

2383 

2384 Examples 

2385 -------- 

2386 >>> from json import loads, dumps 

2387 >>> df = pd.DataFrame( 

2388 ... [["a", "b"], ["c", "d"]], 

2389 ... index=["row 1", "row 2"], 

2390 ... columns=["col 1", "col 2"], 

2391 ... ) 

2392 

2393 >>> result = df.to_json(orient="split") 

2394 >>> parsed = loads(result) 

2395 >>> dumps(parsed, indent=4) # doctest: +SKIP 

2396 {{ 

2397 "columns": [ 

2398 "col 1", 

2399 "col 2" 

2400 ], 

2401 "index": [ 

2402 "row 1", 

2403 "row 2" 

2404 ], 

2405 "data": [ 

2406 [ 

2407 "a", 

2408 "b" 

2409 ], 

2410 [ 

2411 "c", 

2412 "d" 

2413 ] 

2414 ] 

2415 }} 

2416 

2417 Encoding/decoding a Dataframe using ``'records'`` formatted JSON. 

2418 Note that index labels are not preserved with this encoding. 

2419 

2420 >>> result = df.to_json(orient="records") 

2421 >>> parsed = loads(result) 

2422 >>> dumps(parsed, indent=4) # doctest: +SKIP 

2423 [ 

2424 {{ 

2425 "col 1": "a", 

2426 "col 2": "b" 

2427 }}, 

2428 {{ 

2429 "col 1": "c", 

2430 "col 2": "d" 

2431 }} 

2432 ] 

2433 

2434 Encoding/decoding a Dataframe using ``'index'`` formatted JSON: 

2435 

2436 >>> result = df.to_json(orient="index") 

2437 >>> parsed = loads(result) 

2438 >>> dumps(parsed, indent=4) # doctest: +SKIP 

2439 {{ 

2440 "row 1": {{ 

2441 "col 1": "a", 

2442 "col 2": "b" 

2443 }}, 

2444 "row 2": {{ 

2445 "col 1": "c", 

2446 "col 2": "d" 

2447 }} 

2448 }} 

2449 

2450 Encoding/decoding a Dataframe using ``'columns'`` formatted JSON: 

2451 

2452 >>> result = df.to_json(orient="columns") 

2453 >>> parsed = loads(result) 

2454 >>> dumps(parsed, indent=4) # doctest: +SKIP 

2455 {{ 

2456 "col 1": {{ 

2457 "row 1": "a", 

2458 "row 2": "c" 

2459 }}, 

2460 "col 2": {{ 

2461 "row 1": "b", 

2462 "row 2": "d" 

2463 }} 

2464 }} 

2465 

2466 Encoding/decoding a Dataframe using ``'values'`` formatted JSON: 

2467 

2468 >>> result = df.to_json(orient="values") 

2469 >>> parsed = loads(result) 

2470 >>> dumps(parsed, indent=4) # doctest: +SKIP 

2471 [ 

2472 [ 

2473 "a", 

2474 "b" 

2475 ], 

2476 [ 

2477 "c", 

2478 "d" 

2479 ] 

2480 ] 

2481 

2482 Encoding with Table Schema: 

2483 

2484 >>> result = df.to_json(orient="table") 

2485 >>> parsed = loads(result) 

2486 >>> dumps(parsed, indent=4) # doctest: +SKIP 

2487 {{ 

2488 "schema": {{ 

2489 "fields": [ 

2490 {{ 

2491 "name": "index", 

2492 "type": "string" 

2493 }}, 

2494 {{ 

2495 "name": "col 1", 

2496 "type": "string" 

2497 }}, 

2498 {{ 

2499 "name": "col 2", 

2500 "type": "string" 

2501 }} 

2502 ], 

2503 "primaryKey": [ 

2504 "index" 

2505 ], 

2506 "pandas_version": "1.4.0" 

2507 }}, 

2508 "data": [ 

2509 {{ 

2510 "index": "row 1", 

2511 "col 1": "a", 

2512 "col 2": "b" 

2513 }}, 

2514 {{ 

2515 "index": "row 2", 

2516 "col 1": "c", 

2517 "col 2": "d" 

2518 }} 

2519 ] 

2520 }} 

2521 """ 

2522 from pandas.io import json 

2523 

2524 if date_format is None and orient == "table": 

2525 date_format = "iso" 

2526 elif date_format is None: 

2527 date_format = "epoch" 

2528 

2529 config.is_nonnegative_int(indent) 

2530 indent = indent or 0 

2531 

2532 return json.to_json( 

2533 path_or_buf=path_or_buf, 

2534 obj=self, 

2535 orient=orient, 

2536 date_format=date_format, 

2537 double_precision=double_precision, 

2538 force_ascii=force_ascii, 

2539 date_unit=date_unit, 

2540 default_handler=default_handler, 

2541 lines=lines, 

2542 compression=compression, 

2543 index=index, 

2544 indent=indent, 

2545 storage_options=storage_options, 

2546 mode=mode, 

2547 ) 

2548 

2549 @final 

2550 def to_hdf( 

2551 self, 

2552 path_or_buf: FilePath | HDFStore, 

2553 key: str, 

2554 mode: str = "a", 

2555 complevel: int | None = None, 

2556 complib: str | None = None, 

2557 append: bool_t = False, 

2558 format: str | None = None, 

2559 index: bool_t = True, 

2560 min_itemsize: int | dict[str, int] | None = None, 

2561 nan_rep=None, 

2562 dropna: bool_t | None = None, 

2563 data_columns: Literal[True] | list[str] | None = None, 

2564 errors: str = "strict", 

2565 encoding: str = "UTF-8", 

2566 ) -> None: 

2567 """ 

2568 Write the contained data to an HDF5 file using HDFStore. 

2569 

2570 Hierarchical Data Format (HDF) is self-describing, allowing an 

2571 application to interpret the structure and contents of a file with 

2572 no outside information. One HDF file can hold a mix of related objects 

2573 which can be accessed as a group or as individual objects. 

2574 

2575 In order to add another DataFrame or Series to an existing HDF file 

2576 please use append mode and a different a key. 

2577 

2578 .. warning:: 

2579 

2580 One can store a subclass of ``DataFrame`` or ``Series`` to HDF5, 

2581 but the type of the subclass is lost upon storing. 

2582 

2583 For more information see the :ref:`user guide <io.hdf5>`. 

2584 

2585 Parameters 

2586 ---------- 

2587 path_or_buf : str or pandas.HDFStore 

2588 File path or HDFStore object. 

2589 key : str 

2590 Identifier for the group in the store. 

2591 mode : {'a', 'w', 'r+'}, default 'a' 

2592 Mode to open file: 

2593 

2594 - 'w': write, a new file is created (an existing file with 

2595 the same name would be deleted). 

2596 - 'a': append, an existing file is opened for reading and 

2597 writing, and if the file does not exist it is created. 

2598 - 'r+': similar to 'a', but the file must already exist. 

2599 complevel : {0-9}, default None 

2600 Specifies a compression level for data. 

2601 A value of 0 or None disables compression. 

2602 complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default 'zlib' 

2603 Specifies the compression library to be used. 

2604 As of v0.20.2 these additional compressors for Blosc are supported 

2605 (default if no compressor specified: 'blosc:blosclz'): 

2606 {'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', 

2607 'blosc:zlib', 'blosc:zstd'}. 

2608 Specifying a compression library which is not available issues 

2609 a ValueError. 

2610 append : bool, default False 

2611 For Table formats, append the input data to the existing. 

2612 format : {'fixed', 'table', None}, default 'fixed' 

2613 Possible values: 

2614 

2615 - 'fixed': Fixed format. Fast writing/reading. Not-appendable, 

2616 nor searchable. 

2617 - 'table': Table format. Write as a PyTables Table structure 

2618 which may perform worse but allow more flexible operations 

2619 like searching / selecting subsets of the data. 

2620 - If None, pd.get_option('io.hdf.default_format') is checked, 

2621 followed by fallback to "fixed". 

2622 index : bool, default True 

2623 Write DataFrame index as a column. 

2624 min_itemsize : dict or int, optional 

2625 Map column names to minimum string sizes for columns. 

2626 nan_rep : Any, optional 

2627 How to represent null values as str. 

2628 Not allowed with append=True. 

2629 dropna : bool, default False, optional 

2630 Remove missing values. 

2631 data_columns : list of columns or True, optional 

2632 List of columns to create as indexed data columns for on-disk 

2633 queries, or True to use all columns. By default only the axes 

2634 of the object are indexed. See 

2635 :ref:`Query via data columns<io.hdf5-query-data-columns>`. for 

2636 more information. 

2637 Applicable only to format='table'. 

2638 errors : str, default 'strict' 

2639 Specifies how encoding and decoding errors are to be handled. 

2640 See the errors argument for :func:`open` for a full list 

2641 of options. 

2642 encoding : str, default "UTF-8" 

2643 

2644 See Also 

2645 -------- 

2646 read_hdf : Read from HDF file. 

2647 DataFrame.to_orc : Write a DataFrame to the binary orc format. 

2648 DataFrame.to_parquet : Write a DataFrame to the binary parquet format. 

2649 DataFrame.to_sql : Write to a SQL table. 

2650 DataFrame.to_feather : Write out feather-format for DataFrames. 

2651 DataFrame.to_csv : Write out to a csv file. 

2652 

2653 Examples 

2654 -------- 

2655 >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, 

2656 ... index=['a', 'b', 'c']) # doctest: +SKIP 

2657 >>> df.to_hdf('data.h5', key='df', mode='w') # doctest: +SKIP 

2658 

2659 We can add another object to the same file: 

2660 

2661 >>> s = pd.Series([1, 2, 3, 4]) # doctest: +SKIP 

2662 >>> s.to_hdf('data.h5', key='s') # doctest: +SKIP 

2663 

2664 Reading from HDF file: 

2665 

2666 >>> pd.read_hdf('data.h5', 'df') # doctest: +SKIP 

2667 A B 

2668 a 1 4 

2669 b 2 5 

2670 c 3 6 

2671 >>> pd.read_hdf('data.h5', 's') # doctest: +SKIP 

2672 0 1 

2673 1 2 

2674 2 3 

2675 3 4 

2676 dtype: int64 

2677 """ 

2678 from pandas.io import pytables 

2679 

2680 # Argument 3 to "to_hdf" has incompatible type "NDFrame"; expected 

2681 # "Union[DataFrame, Series]" [arg-type] 

2682 pytables.to_hdf( 

2683 path_or_buf, 

2684 key, 

2685 self, # type: ignore[arg-type] 

2686 mode=mode, 

2687 complevel=complevel, 

2688 complib=complib, 

2689 append=append, 

2690 format=format, 

2691 index=index, 

2692 min_itemsize=min_itemsize, 

2693 nan_rep=nan_rep, 

2694 dropna=dropna, 

2695 data_columns=data_columns, 

2696 errors=errors, 

2697 encoding=encoding, 

2698 ) 

2699 

2700 @final 

2701 def to_sql( 

2702 self, 

2703 name: str, 

2704 con, 

2705 schema: str | None = None, 

2706 if_exists: Literal["fail", "replace", "append"] = "fail", 

2707 index: bool_t = True, 

2708 index_label: IndexLabel = None, 

2709 chunksize: int | None = None, 

2710 dtype: DtypeArg | None = None, 

2711 method: str | None = None, 

2712 ) -> int | None: 

2713 """ 

2714 Write records stored in a DataFrame to a SQL database. 

2715 

2716 Databases supported by SQLAlchemy [1]_ are supported. Tables can be 

2717 newly created, appended to, or overwritten. 

2718 

2719 Parameters 

2720 ---------- 

2721 name : str 

2722 Name of SQL table. 

2723 con : sqlalchemy.engine.(Engine or Connection) or sqlite3.Connection 

2724 Using SQLAlchemy makes it possible to use any DB supported by that 

2725 library. Legacy support is provided for sqlite3.Connection objects. The user 

2726 is responsible for engine disposal and connection closure for the SQLAlchemy 

2727 connectable. See `here \ 

2728 <https://docs.sqlalchemy.org/en/20/core/connections.html>`_. 

2729 If passing a sqlalchemy.engine.Connection which is already in a transaction, 

2730 the transaction will not be committed. If passing a sqlite3.Connection, 

2731 it will not be possible to roll back the record insertion. 

2732 

2733 schema : str, optional 

2734 Specify the schema (if database flavor supports this). If None, use 

2735 default schema. 

2736 if_exists : {'fail', 'replace', 'append'}, default 'fail' 

2737 How to behave if the table already exists. 

2738 

2739 * fail: Raise a ValueError. 

2740 * replace: Drop the table before inserting new values. 

2741 * append: Insert new values to the existing table. 

2742 

2743 index : bool, default True 

2744 Write DataFrame index as a column. Uses `index_label` as the column 

2745 name in the table. 

2746 index_label : str or sequence, default None 

2747 Column label for index column(s). If None is given (default) and 

2748 `index` is True, then the index names are used. 

2749 A sequence should be given if the DataFrame uses MultiIndex. 

2750 chunksize : int, optional 

2751 Specify the number of rows in each batch to be written at a time. 

2752 By default, all rows will be written at once. 

2753 dtype : dict or scalar, optional 

2754 Specifying the datatype for columns. If a dictionary is used, the 

2755 keys should be the column names and the values should be the 

2756 SQLAlchemy types or strings for the sqlite3 legacy mode. If a 

2757 scalar is provided, it will be applied to all columns. 

2758 method : {None, 'multi', callable}, optional 

2759 Controls the SQL insertion clause used: 

2760 

2761 * None : Uses standard SQL ``INSERT`` clause (one per row). 

2762 * 'multi': Pass multiple values in a single ``INSERT`` clause. 

2763 * callable with signature ``(pd_table, conn, keys, data_iter)``. 

2764 

2765 Details and a sample callable implementation can be found in the 

2766 section :ref:`insert method <io.sql.method>`. 

2767 

2768 Returns 

2769 ------- 

2770 None or int 

2771 Number of rows affected by to_sql. None is returned if the callable 

2772 passed into ``method`` does not return an integer number of rows. 

2773 

2774 The number of returned rows affected is the sum of the ``rowcount`` 

2775 attribute of ``sqlite3.Cursor`` or SQLAlchemy connectable which may not 

2776 reflect the exact number of written rows as stipulated in the 

2777 `sqlite3 <https://docs.python.org/3/library/sqlite3.html#sqlite3.Cursor.rowcount>`__ or 

2778 `SQLAlchemy <https://docs.sqlalchemy.org/en/20/core/connections.html#sqlalchemy.engine.CursorResult.rowcount>`__. 

2779 

2780 .. versionadded:: 1.4.0 

2781 

2782 Raises 

2783 ------ 

2784 ValueError 

2785 When the table already exists and `if_exists` is 'fail' (the 

2786 default). 

2787 

2788 See Also 

2789 -------- 

2790 read_sql : Read a DataFrame from a table. 

2791 

2792 Notes 

2793 ----- 

2794 Timezone aware datetime columns will be written as 

2795 ``Timestamp with timezone`` type with SQLAlchemy if supported by the 

2796 database. Otherwise, the datetimes will be stored as timezone unaware 

2797 timestamps local to the original timezone. 

2798 

2799 References 

2800 ---------- 

2801 .. [1] https://docs.sqlalchemy.org 

2802 .. [2] https://www.python.org/dev/peps/pep-0249/ 

2803 

2804 Examples 

2805 -------- 

2806 Create an in-memory SQLite database. 

2807 

2808 >>> from sqlalchemy import create_engine 

2809 >>> engine = create_engine('sqlite://', echo=False) 

2810 

2811 Create a table from scratch with 3 rows. 

2812 

2813 >>> df = pd.DataFrame({'name' : ['User 1', 'User 2', 'User 3']}) 

2814 >>> df 

2815 name 

2816 0 User 1 

2817 1 User 2 

2818 2 User 3 

2819 

2820 >>> df.to_sql('users', con=engine) 

2821 3 

2822 >>> from sqlalchemy import text 

2823 >>> with engine.connect() as conn: 

2824 ... conn.execute(text("SELECT * FROM users")).fetchall() 

2825 [(0, 'User 1'), (1, 'User 2'), (2, 'User 3')] 

2826 

2827 An `sqlalchemy.engine.Connection` can also be passed to `con`: 

2828 

2829 >>> with engine.begin() as connection: 

2830 ... df1 = pd.DataFrame({'name' : ['User 4', 'User 5']}) 

2831 ... df1.to_sql('users', con=connection, if_exists='append') 

2832 2 

2833 

2834 This is allowed to support operations that require that the same 

2835 DBAPI connection is used for the entire operation. 

2836 

2837 >>> df2 = pd.DataFrame({'name' : ['User 6', 'User 7']}) 

2838 >>> df2.to_sql('users', con=engine, if_exists='append') 

2839 2 

2840 >>> with engine.connect() as conn: 

2841 ... conn.execute(text("SELECT * FROM users")).fetchall() 

2842 [(0, 'User 1'), (1, 'User 2'), (2, 'User 3'), 

2843 (0, 'User 4'), (1, 'User 5'), (0, 'User 6'), 

2844 (1, 'User 7')] 

2845 

2846 Overwrite the table with just ``df2``. 

2847 

2848 >>> df2.to_sql('users', con=engine, if_exists='replace', 

2849 ... index_label='id') 

2850 2 

2851 >>> with engine.connect() as conn: 

2852 ... conn.execute(text("SELECT * FROM users")).fetchall() 

2853 [(0, 'User 6'), (1, 'User 7')] 

2854 

2855 Specify the dtype (especially useful for integers with missing values). 

2856 Notice that while pandas is forced to store the data as floating point, 

2857 the database supports nullable integers. When fetching the data with 

2858 Python, we get back integer scalars. 

2859 

2860 >>> df = pd.DataFrame({"A": [1, None, 2]}) 

2861 >>> df 

2862 A 

2863 0 1.0 

2864 1 NaN 

2865 2 2.0 

2866 

2867 >>> from sqlalchemy.types import Integer 

2868 >>> df.to_sql('integers', con=engine, index=False, 

2869 ... dtype={"A": Integer()}) 

2870 3 

2871 

2872 >>> with engine.connect() as conn: 

2873 ... conn.execute(text("SELECT * FROM integers")).fetchall() 

2874 [(1,), (None,), (2,)] 

2875 """ # noqa:E501 

2876 from pandas.io import sql 

2877 

2878 return sql.to_sql( 

2879 self, 

2880 name, 

2881 con, 

2882 schema=schema, 

2883 if_exists=if_exists, 

2884 index=index, 

2885 index_label=index_label, 

2886 chunksize=chunksize, 

2887 dtype=dtype, 

2888 method=method, 

2889 ) 

2890 

2891 @final 

2892 @doc( 

2893 storage_options=_shared_docs["storage_options"], 

2894 compression_options=_shared_docs["compression_options"] % "path", 

2895 ) 

2896 def to_pickle( 

2897 self, 

2898 path: FilePath | WriteBuffer[bytes], 

2899 compression: CompressionOptions = "infer", 

2900 protocol: int = pickle.HIGHEST_PROTOCOL, 

2901 storage_options: StorageOptions = None, 

2902 ) -> None: 

2903 """ 

2904 Pickle (serialize) object to file. 

2905 

2906 Parameters 

2907 ---------- 

2908 path : str, path object, or file-like object 

2909 String, path object (implementing ``os.PathLike[str]``), or file-like 

2910 object implementing a binary ``write()`` function. File path where 

2911 the pickled object will be stored. 

2912 {compression_options} 

2913 protocol : int 

2914 Int which indicates which protocol should be used by the pickler, 

2915 default HIGHEST_PROTOCOL (see [1]_ paragraph 12.1.2). The possible 

2916 values are 0, 1, 2, 3, 4, 5. A negative value for the protocol 

2917 parameter is equivalent to setting its value to HIGHEST_PROTOCOL. 

2918 

2919 .. [1] https://docs.python.org/3/library/pickle.html. 

2920 

2921 {storage_options} 

2922 

2923 .. versionadded:: 1.2.0 

2924 

2925 See Also 

2926 -------- 

2927 read_pickle : Load pickled pandas object (or any object) from file. 

2928 DataFrame.to_hdf : Write DataFrame to an HDF5 file. 

2929 DataFrame.to_sql : Write DataFrame to a SQL database. 

2930 DataFrame.to_parquet : Write a DataFrame to the binary parquet format. 

2931 

2932 Examples 

2933 -------- 

2934 >>> original_df = pd.DataFrame({{"foo": range(5), "bar": range(5, 10)}}) # doctest: +SKIP 

2935 >>> original_df # doctest: +SKIP 

2936 foo bar 

2937 0 0 5 

2938 1 1 6 

2939 2 2 7 

2940 3 3 8 

2941 4 4 9 

2942 >>> original_df.to_pickle("./dummy.pkl") # doctest: +SKIP 

2943 

2944 >>> unpickled_df = pd.read_pickle("./dummy.pkl") # doctest: +SKIP 

2945 >>> unpickled_df # doctest: +SKIP 

2946 foo bar 

2947 0 0 5 

2948 1 1 6 

2949 2 2 7 

2950 3 3 8 

2951 4 4 9 

2952 """ # noqa: E501 

2953 from pandas.io.pickle import to_pickle 

2954 

2955 to_pickle( 

2956 self, 

2957 path, 

2958 compression=compression, 

2959 protocol=protocol, 

2960 storage_options=storage_options, 

2961 ) 

2962 

2963 @final 

2964 def to_clipboard( 

2965 self, excel: bool_t = True, sep: str | None = None, **kwargs 

2966 ) -> None: 

2967 r""" 

2968 Copy object to the system clipboard. 

2969 

2970 Write a text representation of object to the system clipboard. 

2971 This can be pasted into Excel, for example. 

2972 

2973 Parameters 

2974 ---------- 

2975 excel : bool, default True 

2976 Produce output in a csv format for easy pasting into excel. 

2977 

2978 - True, use the provided separator for csv pasting. 

2979 - False, write a string representation of the object to the clipboard. 

2980 

2981 sep : str, default ``'\t'`` 

2982 Field delimiter. 

2983 **kwargs 

2984 These parameters will be passed to DataFrame.to_csv. 

2985 

2986 See Also 

2987 -------- 

2988 DataFrame.to_csv : Write a DataFrame to a comma-separated values 

2989 (csv) file. 

2990 read_clipboard : Read text from clipboard and pass to read_csv. 

2991 

2992 Notes 

2993 ----- 

2994 Requirements for your platform. 

2995 

2996 - Linux : `xclip`, or `xsel` (with `PyQt4` modules) 

2997 - Windows : none 

2998 - macOS : none 

2999 

3000 This method uses the processes developed for the package `pyperclip`. A 

3001 solution to render any output string format is given in the examples. 

3002 

3003 Examples 

3004 -------- 

3005 Copy the contents of a DataFrame to the clipboard. 

3006 

3007 >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['A', 'B', 'C']) 

3008 

3009 >>> df.to_clipboard(sep=',') # doctest: +SKIP 

3010 ... # Wrote the following to the system clipboard: 

3011 ... # ,A,B,C 

3012 ... # 0,1,2,3 

3013 ... # 1,4,5,6 

3014 

3015 We can omit the index by passing the keyword `index` and setting 

3016 it to false. 

3017 

3018 >>> df.to_clipboard(sep=',', index=False) # doctest: +SKIP 

3019 ... # Wrote the following to the system clipboard: 

3020 ... # A,B,C 

3021 ... # 1,2,3 

3022 ... # 4,5,6 

3023 

3024 Using the original `pyperclip` package for any string output format. 

3025 

3026 .. code-block:: python 

3027 

3028 import pyperclip 

3029 html = df.style.to_html() 

3030 pyperclip.copy(html) 

3031 """ 

3032 from pandas.io import clipboards 

3033 

3034 clipboards.to_clipboard(self, excel=excel, sep=sep, **kwargs) 

3035 

3036 @final 

3037 def to_xarray(self): 

3038 """ 

3039 Return an xarray object from the pandas object. 

3040 

3041 Returns 

3042 ------- 

3043 xarray.DataArray or xarray.Dataset 

3044 Data in the pandas structure converted to Dataset if the object is 

3045 a DataFrame, or a DataArray if the object is a Series. 

3046 

3047 See Also 

3048 -------- 

3049 DataFrame.to_hdf : Write DataFrame to an HDF5 file. 

3050 DataFrame.to_parquet : Write a DataFrame to the binary parquet format. 

3051 

3052 Notes 

3053 ----- 

3054 See the `xarray docs <https://xarray.pydata.org/en/stable/>`__ 

3055 

3056 Examples 

3057 -------- 

3058 >>> df = pd.DataFrame([('falcon', 'bird', 389.0, 2), 

3059 ... ('parrot', 'bird', 24.0, 2), 

3060 ... ('lion', 'mammal', 80.5, 4), 

3061 ... ('monkey', 'mammal', np.nan, 4)], 

3062 ... columns=['name', 'class', 'max_speed', 

3063 ... 'num_legs']) 

3064 >>> df 

3065 name class max_speed num_legs 

3066 0 falcon bird 389.0 2 

3067 1 parrot bird 24.0 2 

3068 2 lion mammal 80.5 4 

3069 3 monkey mammal NaN 4 

3070 

3071 >>> df.to_xarray() 

3072 <xarray.Dataset> 

3073 Dimensions: (index: 4) 

3074 Coordinates: 

3075 * index (index) int64 0 1 2 3 

3076 Data variables: 

3077 name (index) object 'falcon' 'parrot' 'lion' 'monkey' 

3078 class (index) object 'bird' 'bird' 'mammal' 'mammal' 

3079 max_speed (index) float64 389.0 24.0 80.5 nan 

3080 num_legs (index) int64 2 2 4 4 

3081 

3082 >>> df['max_speed'].to_xarray() 

3083 <xarray.DataArray 'max_speed' (index: 4)> 

3084 array([389. , 24. , 80.5, nan]) 

3085 Coordinates: 

3086 * index (index) int64 0 1 2 3 

3087 

3088 >>> dates = pd.to_datetime(['2018-01-01', '2018-01-01', 

3089 ... '2018-01-02', '2018-01-02']) 

3090 >>> df_multiindex = pd.DataFrame({'date': dates, 

3091 ... 'animal': ['falcon', 'parrot', 

3092 ... 'falcon', 'parrot'], 

3093 ... 'speed': [350, 18, 361, 15]}) 

3094 >>> df_multiindex = df_multiindex.set_index(['date', 'animal']) 

3095 

3096 >>> df_multiindex 

3097 speed 

3098 date animal 

3099 2018-01-01 falcon 350 

3100 parrot 18 

3101 2018-01-02 falcon 361 

3102 parrot 15 

3103 

3104 >>> df_multiindex.to_xarray() 

3105 <xarray.Dataset> 

3106 Dimensions: (date: 2, animal: 2) 

3107 Coordinates: 

3108 * date (date) datetime64[ns] 2018-01-01 2018-01-02 

3109 * animal (animal) object 'falcon' 'parrot' 

3110 Data variables: 

3111 speed (date, animal) int64 350 18 361 15 

3112 """ 

3113 xarray = import_optional_dependency("xarray") 

3114 

3115 if self.ndim == 1: 

3116 return xarray.DataArray.from_series(self) 

3117 else: 

3118 return xarray.Dataset.from_dataframe(self) 

3119 

3120 @overload 

3121 def to_latex( 

3122 self, 

3123 buf: None = ..., 

3124 columns: Sequence[Hashable] | None = ..., 

3125 header: bool_t | Sequence[str] = ..., 

3126 index: bool_t = ..., 

3127 na_rep: str = ..., 

3128 formatters: FormattersType | None = ..., 

3129 float_format: FloatFormatType | None = ..., 

3130 sparsify: bool_t | None = ..., 

3131 index_names: bool_t = ..., 

3132 bold_rows: bool_t = ..., 

3133 column_format: str | None = ..., 

3134 longtable: bool_t | None = ..., 

3135 escape: bool_t | None = ..., 

3136 encoding: str | None = ..., 

3137 decimal: str = ..., 

3138 multicolumn: bool_t | None = ..., 

3139 multicolumn_format: str | None = ..., 

3140 multirow: bool_t | None = ..., 

3141 caption: str | tuple[str, str] | None = ..., 

3142 label: str | None = ..., 

3143 position: str | None = ..., 

3144 ) -> str: 

3145 ... 

3146 

3147 @overload 

3148 def to_latex( 

3149 self, 

3150 buf: FilePath | WriteBuffer[str], 

3151 columns: Sequence[Hashable] | None = ..., 

3152 header: bool_t | Sequence[str] = ..., 

3153 index: bool_t = ..., 

3154 na_rep: str = ..., 

3155 formatters: FormattersType | None = ..., 

3156 float_format: FloatFormatType | None = ..., 

3157 sparsify: bool_t | None = ..., 

3158 index_names: bool_t = ..., 

3159 bold_rows: bool_t = ..., 

3160 column_format: str | None = ..., 

3161 longtable: bool_t | None = ..., 

3162 escape: bool_t | None = ..., 

3163 encoding: str | None = ..., 

3164 decimal: str = ..., 

3165 multicolumn: bool_t | None = ..., 

3166 multicolumn_format: str | None = ..., 

3167 multirow: bool_t | None = ..., 

3168 caption: str | tuple[str, str] | None = ..., 

3169 label: str | None = ..., 

3170 position: str | None = ..., 

3171 ) -> None: 

3172 ... 

3173 

3174 @final 

3175 def to_latex( 

3176 self, 

3177 buf: FilePath | WriteBuffer[str] | None = None, 

3178 columns: Sequence[Hashable] | None = None, 

3179 header: bool_t | Sequence[str] = True, 

3180 index: bool_t = True, 

3181 na_rep: str = "NaN", 

3182 formatters: FormattersType | None = None, 

3183 float_format: FloatFormatType | None = None, 

3184 sparsify: bool_t | None = None, 

3185 index_names: bool_t = True, 

3186 bold_rows: bool_t = False, 

3187 column_format: str | None = None, 

3188 longtable: bool_t | None = None, 

3189 escape: bool_t | None = None, 

3190 encoding: str | None = None, 

3191 decimal: str = ".", 

3192 multicolumn: bool_t | None = None, 

3193 multicolumn_format: str | None = None, 

3194 multirow: bool_t | None = None, 

3195 caption: str | tuple[str, str] | None = None, 

3196 label: str | None = None, 

3197 position: str | None = None, 

3198 ) -> str | None: 

3199 r""" 

3200 Render object to a LaTeX tabular, longtable, or nested table. 

3201 

3202 Requires ``\usepackage{{booktabs}}``. The output can be copy/pasted 

3203 into a main LaTeX document or read from an external file 

3204 with ``\input{{table.tex}}``. 

3205 

3206 .. versionchanged:: 1.2.0 

3207 Added position argument, changed meaning of caption argument. 

3208 

3209 .. versionchanged:: 2.0.0 

3210 Refactored to use the Styler implementation via jinja2 templating. 

3211 

3212 Parameters 

3213 ---------- 

3214 buf : str, Path or StringIO-like, optional, default None 

3215 Buffer to write to. If None, the output is returned as a string. 

3216 columns : list of label, optional 

3217 The subset of columns to write. Writes all columns by default. 

3218 header : bool or list of str, default True 

3219 Write out the column names. If a list of strings is given, 

3220 it is assumed to be aliases for the column names. 

3221 index : bool, default True 

3222 Write row names (index). 

3223 na_rep : str, default 'NaN' 

3224 Missing data representation. 

3225 formatters : list of functions or dict of {{str: function}}, optional 

3226 Formatter functions to apply to columns' elements by position or 

3227 name. The result of each function must be a unicode string. 

3228 List must be of length equal to the number of columns. 

3229 float_format : one-parameter function or str, optional, default None 

3230 Formatter for floating point numbers. For example 

3231 ``float_format="%.2f"`` and ``float_format="{{:0.2f}}".format`` will 

3232 both result in 0.1234 being formatted as 0.12. 

3233 sparsify : bool, optional 

3234 Set to False for a DataFrame with a hierarchical index to print 

3235 every multiindex key at each row. By default, the value will be 

3236 read from the config module. 

3237 index_names : bool, default True 

3238 Prints the names of the indexes. 

3239 bold_rows : bool, default False 

3240 Make the row labels bold in the output. 

3241 column_format : str, optional 

3242 The columns format as specified in `LaTeX table format 

3243 <https://en.wikibooks.org/wiki/LaTeX/Tables>`__ e.g. 'rcl' for 3 

3244 columns. By default, 'l' will be used for all columns except 

3245 columns of numbers, which default to 'r'. 

3246 longtable : bool, optional 

3247 Use a longtable environment instead of tabular. Requires 

3248 adding a \usepackage{{longtable}} to your LaTeX preamble. 

3249 By default, the value will be read from the pandas config 

3250 module, and set to `True` if the option ``styler.latex.environment`` is 

3251 `"longtable"`. 

3252 

3253 .. versionchanged:: 2.0.0 

3254 The pandas option affecting this argument has changed. 

3255 escape : bool, optional 

3256 By default, the value will be read from the pandas config 

3257 module and set to `True` if the option ``styler.format.escape`` is 

3258 `"latex"`. When set to False prevents from escaping latex special 

3259 characters in column names. 

3260 

3261 .. versionchanged:: 2.0.0 

3262 The pandas option affecting this argument has changed, as has the 

3263 default value to `False`. 

3264 encoding : str, optional 

3265 A string representing the encoding to use in the output file, 

3266 defaults to 'utf-8'. 

3267 decimal : str, default '.' 

3268 Character recognized as decimal separator, e.g. ',' in Europe. 

3269 multicolumn : bool, default True 

3270 Use \multicolumn to enhance MultiIndex columns. 

3271 The default will be read from the config module, and is set 

3272 as the option ``styler.sparse.columns``. 

3273 

3274 .. versionchanged:: 2.0.0 

3275 The pandas option affecting this argument has changed. 

3276 multicolumn_format : str, default 'r' 

3277 The alignment for multicolumns, similar to `column_format` 

3278 The default will be read from the config module, and is set as the option 

3279 ``styler.latex.multicol_align``. 

3280 

3281 .. versionchanged:: 2.0.0 

3282 The pandas option affecting this argument has changed, as has the 

3283 default value to "r". 

3284 multirow : bool, default True 

3285 Use \multirow to enhance MultiIndex rows. Requires adding a 

3286 \usepackage{{multirow}} to your LaTeX preamble. Will print 

3287 centered labels (instead of top-aligned) across the contained 

3288 rows, separating groups via clines. The default will be read 

3289 from the pandas config module, and is set as the option 

3290 ``styler.sparse.index``. 

3291 

3292 .. versionchanged:: 2.0.0 

3293 The pandas option affecting this argument has changed, as has the 

3294 default value to `True`. 

3295 caption : str or tuple, optional 

3296 Tuple (full_caption, short_caption), 

3297 which results in ``\caption[short_caption]{{full_caption}}``; 

3298 if a single string is passed, no short caption will be set. 

3299 

3300 .. versionchanged:: 1.2.0 

3301 Optionally allow caption to be a tuple ``(full_caption, short_caption)``. 

3302 

3303 label : str, optional 

3304 The LaTeX label to be placed inside ``\label{{}}`` in the output. 

3305 This is used with ``\ref{{}}`` in the main ``.tex`` file. 

3306 

3307 position : str, optional 

3308 The LaTeX positional argument for tables, to be placed after 

3309 ``\begin{{}}`` in the output. 

3310 

3311 .. versionadded:: 1.2.0 

3312 

3313 Returns 

3314 ------- 

3315 str or None 

3316 If buf is None, returns the result as a string. Otherwise returns None. 

3317 

3318 See Also 

3319 -------- 

3320 io.formats.style.Styler.to_latex : Render a DataFrame to LaTeX 

3321 with conditional formatting. 

3322 DataFrame.to_string : Render a DataFrame to a console-friendly 

3323 tabular output. 

3324 DataFrame.to_html : Render a DataFrame as an HTML table. 

3325 

3326 Notes 

3327 ----- 

3328 As of v2.0.0 this method has changed to use the Styler implementation as 

3329 part of :meth:`.Styler.to_latex` via ``jinja2`` templating. This means 

3330 that ``jinja2`` is a requirement, and needs to be installed, for this method 

3331 to function. It is advised that users switch to using Styler, since that 

3332 implementation is more frequently updated and contains much more 

3333 flexibility with the output. 

3334 

3335 Examples 

3336 -------- 

3337 Convert a general DataFrame to LaTeX with formatting: 

3338 

3339 >>> df = pd.DataFrame(dict(name=['Raphael', 'Donatello'], 

3340 ... age=[26, 45], 

3341 ... height=[181.23, 177.65])) 

3342 >>> print(df.to_latex(index=False, 

3343 ... formatters={"name": str.upper}, 

3344 ... float_format="{:.1f}".format, 

3345 ... )) # doctest: +SKIP 

3346 \begin{tabular}{lrr} 

3347 \toprule 

3348 name & age & height \\ 

3349 \midrule 

3350 RAPHAEL & 26 & 181.2 \\ 

3351 DONATELLO & 45 & 177.7 \\ 

3352 \bottomrule 

3353 \end{tabular} 

3354 """ 

3355 # Get defaults from the pandas config 

3356 if self.ndim == 1: 

3357 self = self.to_frame() 

3358 if longtable is None: 

3359 longtable = config.get_option("styler.latex.environment") == "longtable" 

3360 if escape is None: 

3361 escape = config.get_option("styler.format.escape") == "latex" 

3362 if multicolumn is None: 

3363 multicolumn = config.get_option("styler.sparse.columns") 

3364 if multicolumn_format is None: 

3365 multicolumn_format = config.get_option("styler.latex.multicol_align") 

3366 if multirow is None: 

3367 multirow = config.get_option("styler.sparse.index") 

3368 

3369 if column_format is not None and not isinstance(column_format, str): 

3370 raise ValueError("`column_format` must be str or unicode") 

3371 length = len(self.columns) if columns is None else len(columns) 

3372 if isinstance(header, (list, tuple)) and len(header) != length: 

3373 raise ValueError(f"Writing {length} cols but got {len(header)} aliases") 

3374 

3375 # Refactor formatters/float_format/decimal/na_rep/escape to Styler structure 

3376 base_format_ = { 

3377 "na_rep": na_rep, 

3378 "escape": "latex" if escape else None, 

3379 "decimal": decimal, 

3380 } 

3381 index_format_: dict[str, Any] = {"axis": 0, **base_format_} 

3382 column_format_: dict[str, Any] = {"axis": 1, **base_format_} 

3383 

3384 if isinstance(float_format, str): 

3385 float_format_: Callable | None = lambda x: float_format % x 

3386 else: 

3387 float_format_ = float_format 

3388 

3389 def _wrap(x, alt_format_): 

3390 if isinstance(x, (float, complex)) and float_format_ is not None: 

3391 return float_format_(x) 

3392 else: 

3393 return alt_format_(x) 

3394 

3395 formatters_: list | tuple | dict | Callable | None = None 

3396 if isinstance(formatters, list): 

3397 formatters_ = { 

3398 c: partial(_wrap, alt_format_=formatters[i]) 

3399 for i, c in enumerate(self.columns) 

3400 } 

3401 elif isinstance(formatters, dict): 

3402 index_formatter = formatters.pop("__index__", None) 

3403 column_formatter = formatters.pop("__columns__", None) 

3404 if index_formatter is not None: 

3405 index_format_.update({"formatter": index_formatter}) 

3406 if column_formatter is not None: 

3407 column_format_.update({"formatter": column_formatter}) 

3408 

3409 formatters_ = formatters 

3410 float_columns = self.select_dtypes(include="float").columns 

3411 for col in float_columns: 

3412 if col not in formatters.keys(): 

3413 formatters_.update({col: float_format_}) 

3414 elif formatters is None and float_format is not None: 

3415 formatters_ = partial(_wrap, alt_format_=lambda v: v) 

3416 format_index_ = [index_format_, column_format_] 

3417 

3418 # Deal with hiding indexes and relabelling column names 

3419 hide_: list[dict] = [] 

3420 relabel_index_: list[dict] = [] 

3421 if columns: 

3422 hide_.append( 

3423 { 

3424 "subset": [c for c in self.columns if c not in columns], 

3425 "axis": "columns", 

3426 } 

3427 ) 

3428 if header is False: 

3429 hide_.append({"axis": "columns"}) 

3430 elif isinstance(header, (list, tuple)): 

3431 relabel_index_.append({"labels": header, "axis": "columns"}) 

3432 format_index_ = [index_format_] # column_format is overwritten 

3433 

3434 if index is False: 

3435 hide_.append({"axis": "index"}) 

3436 if index_names is False: 

3437 hide_.append({"names": True, "axis": "index"}) 

3438 

3439 render_kwargs_ = { 

3440 "hrules": True, 

3441 "sparse_index": sparsify, 

3442 "sparse_columns": sparsify, 

3443 "environment": "longtable" if longtable else None, 

3444 "multicol_align": multicolumn_format 

3445 if multicolumn 

3446 else f"naive-{multicolumn_format}", 

3447 "multirow_align": "t" if multirow else "naive", 

3448 "encoding": encoding, 

3449 "caption": caption, 

3450 "label": label, 

3451 "position": position, 

3452 "column_format": column_format, 

3453 "clines": "skip-last;data" 

3454 if (multirow and isinstance(self.index, MultiIndex)) 

3455 else None, 

3456 "bold_rows": bold_rows, 

3457 } 

3458 

3459 return self._to_latex_via_styler( 

3460 buf, 

3461 hide=hide_, 

3462 relabel_index=relabel_index_, 

3463 format={"formatter": formatters_, **base_format_}, 

3464 format_index=format_index_, 

3465 render_kwargs=render_kwargs_, 

3466 ) 

3467 

3468 def _to_latex_via_styler( 

3469 self, 

3470 buf=None, 

3471 *, 

3472 hide: dict | list[dict] | None = None, 

3473 relabel_index: dict | list[dict] | None = None, 

3474 format: dict | list[dict] | None = None, 

3475 format_index: dict | list[dict] | None = None, 

3476 render_kwargs: dict | None = None, 

3477 ): 

3478 """ 

3479 Render object to a LaTeX tabular, longtable, or nested table. 

3480 

3481 Uses the ``Styler`` implementation with the following, ordered, method chaining: 

3482 

3483 .. code-block:: python 

3484 styler = Styler(DataFrame) 

3485 styler.hide(**hide) 

3486 styler.relabel_index(**relabel_index) 

3487 styler.format(**format) 

3488 styler.format_index(**format_index) 

3489 styler.to_latex(buf=buf, **render_kwargs) 

3490 

3491 Parameters 

3492 ---------- 

3493 buf : str, Path or StringIO-like, optional, default None 

3494 Buffer to write to. If None, the output is returned as a string. 

3495 hide : dict, list of dict 

3496 Keyword args to pass to the method call of ``Styler.hide``. If a list will 

3497 call the method numerous times. 

3498 relabel_index : dict, list of dict 

3499 Keyword args to pass to the method of ``Styler.relabel_index``. If a list 

3500 will call the method numerous times. 

3501 format : dict, list of dict 

3502 Keyword args to pass to the method call of ``Styler.format``. If a list will 

3503 call the method numerous times. 

3504 format_index : dict, list of dict 

3505 Keyword args to pass to the method call of ``Styler.format_index``. If a 

3506 list will call the method numerous times. 

3507 render_kwargs : dict 

3508 Keyword args to pass to the method call of ``Styler.to_latex``. 

3509 

3510 Returns 

3511 ------- 

3512 str or None 

3513 If buf is None, returns the result as a string. Otherwise returns None. 

3514 """ 

3515 from pandas.io.formats.style import Styler 

3516 

3517 self = cast("DataFrame", self) 

3518 styler = Styler(self, uuid="") 

3519 

3520 for kw_name in ["hide", "relabel_index", "format", "format_index"]: 

3521 kw = vars()[kw_name] 

3522 if isinstance(kw, dict): 

3523 getattr(styler, kw_name)(**kw) 

3524 elif isinstance(kw, list): 

3525 for sub_kw in kw: 

3526 getattr(styler, kw_name)(**sub_kw) 

3527 

3528 # bold_rows is not a direct kwarg of Styler.to_latex 

3529 render_kwargs = {} if render_kwargs is None else render_kwargs 

3530 if render_kwargs.pop("bold_rows"): 

3531 styler.applymap_index(lambda v: "textbf:--rwrap;") 

3532 

3533 return styler.to_latex(buf=buf, **render_kwargs) 

3534 

3535 @overload 

3536 def to_csv( 

3537 self, 

3538 path_or_buf: None = ..., 

3539 sep: str = ..., 

3540 na_rep: str = ..., 

3541 float_format: str | Callable | None = ..., 

3542 columns: Sequence[Hashable] | None = ..., 

3543 header: bool_t | list[str] = ..., 

3544 index: bool_t = ..., 

3545 index_label: IndexLabel | None = ..., 

3546 mode: str = ..., 

3547 encoding: str | None = ..., 

3548 compression: CompressionOptions = ..., 

3549 quoting: int | None = ..., 

3550 quotechar: str = ..., 

3551 lineterminator: str | None = ..., 

3552 chunksize: int | None = ..., 

3553 date_format: str | None = ..., 

3554 doublequote: bool_t = ..., 

3555 escapechar: str | None = ..., 

3556 decimal: str = ..., 

3557 errors: str = ..., 

3558 storage_options: StorageOptions = ..., 

3559 ) -> str: 

3560 ... 

3561 

3562 @overload 

3563 def to_csv( 

3564 self, 

3565 path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str], 

3566 sep: str = ..., 

3567 na_rep: str = ..., 

3568 float_format: str | Callable | None = ..., 

3569 columns: Sequence[Hashable] | None = ..., 

3570 header: bool_t | list[str] = ..., 

3571 index: bool_t = ..., 

3572 index_label: IndexLabel | None = ..., 

3573 mode: str = ..., 

3574 encoding: str | None = ..., 

3575 compression: CompressionOptions = ..., 

3576 quoting: int | None = ..., 

3577 quotechar: str = ..., 

3578 lineterminator: str | None = ..., 

3579 chunksize: int | None = ..., 

3580 date_format: str | None = ..., 

3581 doublequote: bool_t = ..., 

3582 escapechar: str | None = ..., 

3583 decimal: str = ..., 

3584 errors: str = ..., 

3585 storage_options: StorageOptions = ..., 

3586 ) -> None: 

3587 ... 

3588 

3589 @final 

3590 @doc( 

3591 storage_options=_shared_docs["storage_options"], 

3592 compression_options=_shared_docs["compression_options"] % "path_or_buf", 

3593 ) 

3594 def to_csv( 

3595 self, 

3596 path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None, 

3597 sep: str = ",", 

3598 na_rep: str = "", 

3599 float_format: str | Callable | None = None, 

3600 columns: Sequence[Hashable] | None = None, 

3601 header: bool_t | list[str] = True, 

3602 index: bool_t = True, 

3603 index_label: IndexLabel | None = None, 

3604 mode: str = "w", 

3605 encoding: str | None = None, 

3606 compression: CompressionOptions = "infer", 

3607 quoting: int | None = None, 

3608 quotechar: str = '"', 

3609 lineterminator: str | None = None, 

3610 chunksize: int | None = None, 

3611 date_format: str | None = None, 

3612 doublequote: bool_t = True, 

3613 escapechar: str | None = None, 

3614 decimal: str = ".", 

3615 errors: str = "strict", 

3616 storage_options: StorageOptions = None, 

3617 ) -> str | None: 

3618 r""" 

3619 Write object to a comma-separated values (csv) file. 

3620 

3621 Parameters 

3622 ---------- 

3623 path_or_buf : str, path object, file-like object, or None, default None 

3624 String, path object (implementing os.PathLike[str]), or file-like 

3625 object implementing a write() function. If None, the result is 

3626 returned as a string. If a non-binary file object is passed, it should 

3627 be opened with `newline=''`, disabling universal newlines. If a binary 

3628 file object is passed, `mode` might need to contain a `'b'`. 

3629 

3630 .. versionchanged:: 1.2.0 

3631 

3632 Support for binary file objects was introduced. 

3633 

3634 sep : str, default ',' 

3635 String of length 1. Field delimiter for the output file. 

3636 na_rep : str, default '' 

3637 Missing data representation. 

3638 float_format : str, Callable, default None 

3639 Format string for floating point numbers. If a Callable is given, it takes 

3640 precedence over other numeric formatting parameters, like decimal. 

3641 columns : sequence, optional 

3642 Columns to write. 

3643 header : bool or list of str, default True 

3644 Write out the column names. If a list of strings is given it is 

3645 assumed to be aliases for the column names. 

3646 index : bool, default True 

3647 Write row names (index). 

3648 index_label : str or sequence, or False, default None 

3649 Column label for index column(s) if desired. If None is given, and 

3650 `header` and `index` are True, then the index names are used. A 

3651 sequence should be given if the object uses MultiIndex. If 

3652 False do not print fields for index names. Use index_label=False 

3653 for easier importing in R. 

3654 mode : str, default 'w' 

3655 Python write mode. The available write modes are the same as 

3656 :py:func:`open`. 

3657 encoding : str, optional 

3658 A string representing the encoding to use in the output file, 

3659 defaults to 'utf-8'. `encoding` is not supported if `path_or_buf` 

3660 is a non-binary file object. 

3661 {compression_options} 

3662 

3663 .. versionchanged:: 1.0.0 

3664 

3665 May now be a dict with key 'method' as compression mode 

3666 and other entries as additional compression options if 

3667 compression mode is 'zip'. 

3668 

3669 .. versionchanged:: 1.1.0 

3670 

3671 Passing compression options as keys in dict is 

3672 supported for compression modes 'gzip', 'bz2', 'zstd', and 'zip'. 

3673 

3674 .. versionchanged:: 1.2.0 

3675 

3676 Compression is supported for binary file objects. 

3677 

3678 .. versionchanged:: 1.2.0 

3679 

3680 Previous versions forwarded dict entries for 'gzip' to 

3681 `gzip.open` instead of `gzip.GzipFile` which prevented 

3682 setting `mtime`. 

3683 

3684 quoting : optional constant from csv module 

3685 Defaults to csv.QUOTE_MINIMAL. If you have set a `float_format` 

3686 then floats are converted to strings and thus csv.QUOTE_NONNUMERIC 

3687 will treat them as non-numeric. 

3688 quotechar : str, default '\"' 

3689 String of length 1. Character used to quote fields. 

3690 lineterminator : str, optional 

3691 The newline character or character sequence to use in the output 

3692 file. Defaults to `os.linesep`, which depends on the OS in which 

3693 this method is called ('\\n' for linux, '\\r\\n' for Windows, i.e.). 

3694 

3695 .. versionchanged:: 1.5.0 

3696 

3697 Previously was line_terminator, changed for consistency with 

3698 read_csv and the standard library 'csv' module. 

3699 

3700 chunksize : int or None 

3701 Rows to write at a time. 

3702 date_format : str, default None 

3703 Format string for datetime objects. 

3704 doublequote : bool, default True 

3705 Control quoting of `quotechar` inside a field. 

3706 escapechar : str, default None 

3707 String of length 1. Character used to escape `sep` and `quotechar` 

3708 when appropriate. 

3709 decimal : str, default '.' 

3710 Character recognized as decimal separator. E.g. use ',' for 

3711 European data. 

3712 errors : str, default 'strict' 

3713 Specifies how encoding and decoding errors are to be handled. 

3714 See the errors argument for :func:`open` for a full list 

3715 of options. 

3716 

3717 .. versionadded:: 1.1.0 

3718 

3719 {storage_options} 

3720 

3721 .. versionadded:: 1.2.0 

3722 

3723 Returns 

3724 ------- 

3725 None or str 

3726 If path_or_buf is None, returns the resulting csv format as a 

3727 string. Otherwise returns None. 

3728 

3729 See Also 

3730 -------- 

3731 read_csv : Load a CSV file into a DataFrame. 

3732 to_excel : Write DataFrame to an Excel file. 

3733 

3734 Examples 

3735 -------- 

3736 >>> df = pd.DataFrame({{'name': ['Raphael', 'Donatello'], 

3737 ... 'mask': ['red', 'purple'], 

3738 ... 'weapon': ['sai', 'bo staff']}}) 

3739 >>> df.to_csv(index=False) 

3740 'name,mask,weapon\nRaphael,red,sai\nDonatello,purple,bo staff\n' 

3741 

3742 Create 'out.zip' containing 'out.csv' 

3743 

3744 >>> compression_opts = dict(method='zip', 

3745 ... archive_name='out.csv') # doctest: +SKIP 

3746 >>> df.to_csv('out.zip', index=False, 

3747 ... compression=compression_opts) # doctest: +SKIP 

3748 

3749 To write a csv file to a new folder or nested folder you will first 

3750 need to create it using either Pathlib or os: 

3751 

3752 >>> from pathlib import Path # doctest: +SKIP 

3753 >>> filepath = Path('folder/subfolder/out.csv') # doctest: +SKIP 

3754 >>> filepath.parent.mkdir(parents=True, exist_ok=True) # doctest: +SKIP 

3755 >>> df.to_csv(filepath) # doctest: +SKIP 

3756 

3757 >>> import os # doctest: +SKIP 

3758 >>> os.makedirs('folder/subfolder', exist_ok=True) # doctest: +SKIP 

3759 >>> df.to_csv('folder/subfolder/out.csv') # doctest: +SKIP 

3760 """ 

3761 df = self if isinstance(self, ABCDataFrame) else self.to_frame() 

3762 

3763 formatter = DataFrameFormatter( 

3764 frame=df, 

3765 header=header, 

3766 index=index, 

3767 na_rep=na_rep, 

3768 float_format=float_format, 

3769 decimal=decimal, 

3770 ) 

3771 

3772 return DataFrameRenderer(formatter).to_csv( 

3773 path_or_buf, 

3774 lineterminator=lineterminator, 

3775 sep=sep, 

3776 encoding=encoding, 

3777 errors=errors, 

3778 compression=compression, 

3779 quoting=quoting, 

3780 columns=columns, 

3781 index_label=index_label, 

3782 mode=mode, 

3783 chunksize=chunksize, 

3784 quotechar=quotechar, 

3785 date_format=date_format, 

3786 doublequote=doublequote, 

3787 escapechar=escapechar, 

3788 storage_options=storage_options, 

3789 ) 

3790 

3791 # ---------------------------------------------------------------------- 

3792 # Lookup Caching 

3793 

3794 def _reset_cacher(self) -> None: 

3795 """ 

3796 Reset the cacher. 

3797 """ 

3798 raise AbstractMethodError(self) 

3799 

3800 def _maybe_update_cacher( 

3801 self, 

3802 clear: bool_t = False, 

3803 verify_is_copy: bool_t = True, 

3804 inplace: bool_t = False, 

3805 ) -> None: 

3806 """ 

3807 See if we need to update our parent cacher if clear, then clear our 

3808 cache. 

3809 

3810 Parameters 

3811 ---------- 

3812 clear : bool, default False 

3813 Clear the item cache. 

3814 verify_is_copy : bool, default True 

3815 Provide is_copy checks. 

3816 """ 

3817 if using_copy_on_write(): 

3818 return 

3819 

3820 if verify_is_copy: 

3821 self._check_setitem_copy(t="referent") 

3822 

3823 if clear: 

3824 self._clear_item_cache() 

3825 

3826 def _clear_item_cache(self) -> None: 

3827 raise AbstractMethodError(self) 

3828 

3829 # ---------------------------------------------------------------------- 

3830 # Indexing Methods 

3831 

3832 def take(self: NDFrameT, indices, axis: Axis = 0, **kwargs) -> NDFrameT: 

3833 """ 

3834 Return the elements in the given *positional* indices along an axis. 

3835 

3836 This means that we are not indexing according to actual values in 

3837 the index attribute of the object. We are indexing according to the 

3838 actual position of the element in the object. 

3839 

3840 Parameters 

3841 ---------- 

3842 indices : array-like 

3843 An array of ints indicating which positions to take. 

3844 axis : {0 or 'index', 1 or 'columns', None}, default 0 

3845 The axis on which to select elements. ``0`` means that we are 

3846 selecting rows, ``1`` means that we are selecting columns. 

3847 For `Series` this parameter is unused and defaults to 0. 

3848 **kwargs 

3849 For compatibility with :meth:`numpy.take`. Has no effect on the 

3850 output. 

3851 

3852 Returns 

3853 ------- 

3854 same type as caller 

3855 An array-like containing the elements taken from the object. 

3856 

3857 See Also 

3858 -------- 

3859 DataFrame.loc : Select a subset of a DataFrame by labels. 

3860 DataFrame.iloc : Select a subset of a DataFrame by positions. 

3861 numpy.take : Take elements from an array along an axis. 

3862 

3863 Examples 

3864 -------- 

3865 >>> df = pd.DataFrame([('falcon', 'bird', 389.0), 

3866 ... ('parrot', 'bird', 24.0), 

3867 ... ('lion', 'mammal', 80.5), 

3868 ... ('monkey', 'mammal', np.nan)], 

3869 ... columns=['name', 'class', 'max_speed'], 

3870 ... index=[0, 2, 3, 1]) 

3871 >>> df 

3872 name class max_speed 

3873 0 falcon bird 389.0 

3874 2 parrot bird 24.0 

3875 3 lion mammal 80.5 

3876 1 monkey mammal NaN 

3877 

3878 Take elements at positions 0 and 3 along the axis 0 (default). 

3879 

3880 Note how the actual indices selected (0 and 1) do not correspond to 

3881 our selected indices 0 and 3. That's because we are selecting the 0th 

3882 and 3rd rows, not rows whose indices equal 0 and 3. 

3883 

3884 >>> df.take([0, 3]) 

3885 name class max_speed 

3886 0 falcon bird 389.0 

3887 1 monkey mammal NaN 

3888 

3889 Take elements at indices 1 and 2 along the axis 1 (column selection). 

3890 

3891 >>> df.take([1, 2], axis=1) 

3892 class max_speed 

3893 0 bird 389.0 

3894 2 bird 24.0 

3895 3 mammal 80.5 

3896 1 mammal NaN 

3897 

3898 We may take elements using negative integers for positive indices, 

3899 starting from the end of the object, just like with Python lists. 

3900 

3901 >>> df.take([-1, -2]) 

3902 name class max_speed 

3903 1 monkey mammal NaN 

3904 3 lion mammal 80.5 

3905 """ 

3906 

3907 nv.validate_take((), kwargs) 

3908 

3909 return self._take(indices, axis) 

3910 

3911 def _take( 

3912 self: NDFrameT, 

3913 indices, 

3914 axis: Axis = 0, 

3915 convert_indices: bool_t = True, 

3916 ) -> NDFrameT: 

3917 """ 

3918 Internal version of the `take` allowing specification of additional args. 

3919 

3920 See the docstring of `take` for full explanation of the parameters. 

3921 """ 

3922 if not isinstance(indices, slice): 

3923 indices = np.asarray(indices, dtype=np.intp) 

3924 if ( 

3925 axis == 0 

3926 and indices.ndim == 1 

3927 and using_copy_on_write() 

3928 and is_range_indexer(indices, len(self)) 

3929 ): 

3930 return self.copy(deep=None) 

3931 

3932 new_data = self._mgr.take( 

3933 indices, 

3934 axis=self._get_block_manager_axis(axis), 

3935 verify=True, 

3936 convert_indices=convert_indices, 

3937 ) 

3938 return self._constructor(new_data).__finalize__(self, method="take") 

3939 

3940 def _take_with_is_copy(self: NDFrameT, indices, axis: Axis = 0) -> NDFrameT: 

3941 """ 

3942 Internal version of the `take` method that sets the `_is_copy` 

3943 attribute to keep track of the parent dataframe (using in indexing 

3944 for the SettingWithCopyWarning). 

3945 

3946 See the docstring of `take` for full explanation of the parameters. 

3947 """ 

3948 result = self._take(indices=indices, axis=axis) 

3949 # Maybe set copy if we didn't actually change the index. 

3950 if not result._get_axis(axis).equals(self._get_axis(axis)): 

3951 result._set_is_copy(self) 

3952 return result 

3953 

3954 @final 

3955 def xs( 

3956 self: NDFrameT, 

3957 key: IndexLabel, 

3958 axis: Axis = 0, 

3959 level: IndexLabel = None, 

3960 drop_level: bool_t = True, 

3961 ) -> NDFrameT: 

3962 """ 

3963 Return cross-section from the Series/DataFrame. 

3964 

3965 This method takes a `key` argument to select data at a particular 

3966 level of a MultiIndex. 

3967 

3968 Parameters 

3969 ---------- 

3970 key : label or tuple of label 

3971 Label contained in the index, or partially in a MultiIndex. 

3972 axis : {0 or 'index', 1 or 'columns'}, default 0 

3973 Axis to retrieve cross-section on. 

3974 level : object, defaults to first n levels (n=1 or len(key)) 

3975 In case of a key partially contained in a MultiIndex, indicate 

3976 which levels are used. Levels can be referred by label or position. 

3977 drop_level : bool, default True 

3978 If False, returns object with same levels as self. 

3979 

3980 Returns 

3981 ------- 

3982 Series or DataFrame 

3983 Cross-section from the original Series or DataFrame 

3984 corresponding to the selected index levels. 

3985 

3986 See Also 

3987 -------- 

3988 DataFrame.loc : Access a group of rows and columns 

3989 by label(s) or a boolean array. 

3990 DataFrame.iloc : Purely integer-location based indexing 

3991 for selection by position. 

3992 

3993 Notes 

3994 ----- 

3995 `xs` can not be used to set values. 

3996 

3997 MultiIndex Slicers is a generic way to get/set values on 

3998 any level or levels. 

3999 It is a superset of `xs` functionality, see 

4000 :ref:`MultiIndex Slicers <advanced.mi_slicers>`. 

4001 

4002 Examples 

4003 -------- 

4004 >>> d = {'num_legs': [4, 4, 2, 2], 

4005 ... 'num_wings': [0, 0, 2, 2], 

4006 ... 'class': ['mammal', 'mammal', 'mammal', 'bird'], 

4007 ... 'animal': ['cat', 'dog', 'bat', 'penguin'], 

4008 ... 'locomotion': ['walks', 'walks', 'flies', 'walks']} 

4009 >>> df = pd.DataFrame(data=d) 

4010 >>> df = df.set_index(['class', 'animal', 'locomotion']) 

4011 >>> df 

4012 num_legs num_wings 

4013 class animal locomotion 

4014 mammal cat walks 4 0 

4015 dog walks 4 0 

4016 bat flies 2 2 

4017 bird penguin walks 2 2 

4018 

4019 Get values at specified index 

4020 

4021 >>> df.xs('mammal') 

4022 num_legs num_wings 

4023 animal locomotion 

4024 cat walks 4 0 

4025 dog walks 4 0 

4026 bat flies 2 2 

4027 

4028 Get values at several indexes 

4029 

4030 >>> df.xs(('mammal', 'dog', 'walks')) 

4031 num_legs 4 

4032 num_wings 0 

4033 Name: (mammal, dog, walks), dtype: int64 

4034 

4035 Get values at specified index and level 

4036 

4037 >>> df.xs('cat', level=1) 

4038 num_legs num_wings 

4039 class locomotion 

4040 mammal walks 4 0 

4041 

4042 Get values at several indexes and levels 

4043 

4044 >>> df.xs(('bird', 'walks'), 

4045 ... level=[0, 'locomotion']) 

4046 num_legs num_wings 

4047 animal 

4048 penguin 2 2 

4049 

4050 Get values at specified column and axis 

4051 

4052 >>> df.xs('num_wings', axis=1) 

4053 class animal locomotion 

4054 mammal cat walks 0 

4055 dog walks 0 

4056 bat flies 2 

4057 bird penguin walks 2 

4058 Name: num_wings, dtype: int64 

4059 """ 

4060 axis = self._get_axis_number(axis) 

4061 labels = self._get_axis(axis) 

4062 

4063 if isinstance(key, list): 

4064 raise TypeError("list keys are not supported in xs, pass a tuple instead") 

4065 

4066 if level is not None: 

4067 if not isinstance(labels, MultiIndex): 

4068 raise TypeError("Index must be a MultiIndex") 

4069 loc, new_ax = labels.get_loc_level(key, level=level, drop_level=drop_level) 

4070 

4071 # create the tuple of the indexer 

4072 _indexer = [slice(None)] * self.ndim 

4073 _indexer[axis] = loc 

4074 indexer = tuple(_indexer) 

4075 

4076 result = self.iloc[indexer] 

4077 setattr(result, result._get_axis_name(axis), new_ax) 

4078 return result 

4079 

4080 if axis == 1: 

4081 if drop_level: 

4082 return self[key] 

4083 index = self.columns 

4084 else: 

4085 index = self.index 

4086 

4087 if isinstance(index, MultiIndex): 

4088 loc, new_index = index._get_loc_level(key, level=0) 

4089 if not drop_level: 

4090 if lib.is_integer(loc): 

4091 new_index = index[loc : loc + 1] 

4092 else: 

4093 new_index = index[loc] 

4094 else: 

4095 loc = index.get_loc(key) 

4096 

4097 if isinstance(loc, np.ndarray): 

4098 if loc.dtype == np.bool_: 

4099 (inds,) = loc.nonzero() 

4100 return self._take_with_is_copy(inds, axis=axis) 

4101 else: 

4102 return self._take_with_is_copy(loc, axis=axis) 

4103 

4104 if not is_scalar(loc): 

4105 new_index = index[loc] 

4106 

4107 if is_scalar(loc) and axis == 0: 

4108 # In this case loc should be an integer 

4109 if self.ndim == 1: 

4110 # if we encounter an array-like and we only have 1 dim 

4111 # that means that their are list/ndarrays inside the Series! 

4112 # so just return them (GH 6394) 

4113 return self._values[loc] 

4114 

4115 new_mgr = self._mgr.fast_xs(loc) 

4116 

4117 result = self._constructor_sliced( 

4118 new_mgr, name=self.index[loc] 

4119 ).__finalize__(self) 

4120 elif is_scalar(loc): 

4121 result = self.iloc[:, slice(loc, loc + 1)] 

4122 elif axis == 1: 

4123 result = self.iloc[:, loc] 

4124 else: 

4125 result = self.iloc[loc] 

4126 result.index = new_index 

4127 

4128 # this could be a view 

4129 # but only in a single-dtyped view sliceable case 

4130 result._set_is_copy(self, copy=not result._is_view) 

4131 return result 

4132 

4133 def __getitem__(self, item): 

4134 raise AbstractMethodError(self) 

4135 

4136 def _slice(self: NDFrameT, slobj: slice, axis: Axis = 0) -> NDFrameT: 

4137 """ 

4138 Construct a slice of this container. 

4139 

4140 Slicing with this method is *always* positional. 

4141 """ 

4142 assert isinstance(slobj, slice), type(slobj) 

4143 axis = self._get_block_manager_axis(axis) 

4144 result = self._constructor(self._mgr.get_slice(slobj, axis=axis)) 

4145 result = result.__finalize__(self) 

4146 

4147 # this could be a view 

4148 # but only in a single-dtyped view sliceable case 

4149 is_copy = axis != 0 or result._is_view 

4150 result._set_is_copy(self, copy=is_copy) 

4151 return result 

4152 

4153 @final 

4154 def _set_is_copy(self, ref: NDFrame, copy: bool_t = True) -> None: 

4155 if not copy: 

4156 self._is_copy = None 

4157 else: 

4158 assert ref is not None 

4159 self._is_copy = weakref.ref(ref) 

4160 

4161 def _check_is_chained_assignment_possible(self) -> bool_t: 

4162 """ 

4163 Check if we are a view, have a cacher, and are of mixed type. 

4164 If so, then force a setitem_copy check. 

4165 

4166 Should be called just near setting a value 

4167 

4168 Will return a boolean if it we are a view and are cached, but a 

4169 single-dtype meaning that the cacher should be updated following 

4170 setting. 

4171 """ 

4172 if self._is_copy: 

4173 self._check_setitem_copy(t="referent") 

4174 return False 

4175 

4176 @final 

4177 def _check_setitem_copy(self, t: str = "setting", force: bool_t = False): 

4178 """ 

4179 

4180 Parameters 

4181 ---------- 

4182 t : str, the type of setting error 

4183 force : bool, default False 

4184 If True, then force showing an error. 

4185 

4186 validate if we are doing a setitem on a chained copy. 

4187 

4188 It is technically possible to figure out that we are setting on 

4189 a copy even WITH a multi-dtyped pandas object. In other words, some 

4190 blocks may be views while other are not. Currently _is_view will ALWAYS 

4191 return False for multi-blocks to avoid having to handle this case. 

4192 

4193 df = DataFrame(np.arange(0,9), columns=['count']) 

4194 df['group'] = 'b' 

4195 

4196 # This technically need not raise SettingWithCopy if both are view 

4197 # (which is not generally guaranteed but is usually True. However, 

4198 # this is in general not a good practice and we recommend using .loc. 

4199 df.iloc[0:5]['group'] = 'a' 

4200 

4201 """ 

4202 if using_copy_on_write(): 

4203 return 

4204 

4205 # return early if the check is not needed 

4206 if not (force or self._is_copy): 

4207 return 

4208 

4209 value = config.get_option("mode.chained_assignment") 

4210 if value is None: 

4211 return 

4212 

4213 # see if the copy is not actually referred; if so, then dissolve 

4214 # the copy weakref 

4215 if self._is_copy is not None and not isinstance(self._is_copy, str): 

4216 r = self._is_copy() 

4217 if not gc.get_referents(r) or (r is not None and r.shape == self.shape): 

4218 self._is_copy = None 

4219 return 

4220 

4221 # a custom message 

4222 if isinstance(self._is_copy, str): 

4223 t = self._is_copy 

4224 

4225 elif t == "referent": 

4226 t = ( 

4227 "\n" 

4228 "A value is trying to be set on a copy of a slice from a " 

4229 "DataFrame\n\n" 

4230 "See the caveats in the documentation: " 

4231 "https://pandas.pydata.org/pandas-docs/stable/user_guide/" 

4232 "indexing.html#returning-a-view-versus-a-copy" 

4233 ) 

4234 

4235 else: 

4236 t = ( 

4237 "\n" 

4238 "A value is trying to be set on a copy of a slice from a " 

4239 "DataFrame.\n" 

4240 "Try using .loc[row_indexer,col_indexer] = value " 

4241 "instead\n\nSee the caveats in the documentation: " 

4242 "https://pandas.pydata.org/pandas-docs/stable/user_guide/" 

4243 "indexing.html#returning-a-view-versus-a-copy" 

4244 ) 

4245 

4246 if value == "raise": 

4247 raise SettingWithCopyError(t) 

4248 if value == "warn": 

4249 warnings.warn(t, SettingWithCopyWarning, stacklevel=find_stack_level()) 

4250 

4251 def __delitem__(self, key) -> None: 

4252 """ 

4253 Delete item 

4254 """ 

4255 deleted = False 

4256 

4257 maybe_shortcut = False 

4258 if self.ndim == 2 and isinstance(self.columns, MultiIndex): 

4259 try: 

4260 # By using engine's __contains__ we effectively 

4261 # restrict to same-length tuples 

4262 maybe_shortcut = key not in self.columns._engine 

4263 except TypeError: 

4264 pass 

4265 

4266 if maybe_shortcut: 

4267 # Allow shorthand to delete all columns whose first len(key) 

4268 # elements match key: 

4269 if not isinstance(key, tuple): 

4270 key = (key,) 

4271 for col in self.columns: 

4272 if isinstance(col, tuple) and col[: len(key)] == key: 

4273 del self[col] 

4274 deleted = True 

4275 if not deleted: 

4276 # If the above loop ran and didn't delete anything because 

4277 # there was no match, this call should raise the appropriate 

4278 # exception: 

4279 loc = self.axes[-1].get_loc(key) 

4280 self._mgr = self._mgr.idelete(loc) 

4281 

4282 # delete from the caches 

4283 try: 

4284 del self._item_cache[key] 

4285 except KeyError: 

4286 pass 

4287 

4288 # ---------------------------------------------------------------------- 

4289 # Unsorted 

4290 

4291 @final 

4292 def _check_inplace_and_allows_duplicate_labels(self, inplace): 

4293 if inplace and not self.flags.allows_duplicate_labels: 

4294 raise ValueError( 

4295 "Cannot specify 'inplace=True' when " 

4296 "'self.flags.allows_duplicate_labels' is False." 

4297 ) 

4298 

4299 @final 

4300 def get(self, key, default=None): 

4301 """ 

4302 Get item from object for given key (ex: DataFrame column). 

4303 

4304 Returns default value if not found. 

4305 

4306 Parameters 

4307 ---------- 

4308 key : object 

4309 

4310 Returns 

4311 ------- 

4312 same type as items contained in object 

4313 

4314 Examples 

4315 -------- 

4316 >>> df = pd.DataFrame( 

4317 ... [ 

4318 ... [24.3, 75.7, "high"], 

4319 ... [31, 87.8, "high"], 

4320 ... [22, 71.6, "medium"], 

4321 ... [35, 95, "medium"], 

4322 ... ], 

4323 ... columns=["temp_celsius", "temp_fahrenheit", "windspeed"], 

4324 ... index=pd.date_range(start="2014-02-12", end="2014-02-15", freq="D"), 

4325 ... ) 

4326 

4327 >>> df 

4328 temp_celsius temp_fahrenheit windspeed 

4329 2014-02-12 24.3 75.7 high 

4330 2014-02-13 31.0 87.8 high 

4331 2014-02-14 22.0 71.6 medium 

4332 2014-02-15 35.0 95.0 medium 

4333 

4334 >>> df.get(["temp_celsius", "windspeed"]) 

4335 temp_celsius windspeed 

4336 2014-02-12 24.3 high 

4337 2014-02-13 31.0 high 

4338 2014-02-14 22.0 medium 

4339 2014-02-15 35.0 medium 

4340 

4341 >>> ser = df['windspeed'] 

4342 >>> ser.get('2014-02-13') 

4343 'high' 

4344 

4345 If the key isn't found, the default value will be used. 

4346 

4347 >>> df.get(["temp_celsius", "temp_kelvin"], default="default_value") 

4348 'default_value' 

4349 

4350 >>> ser.get('2014-02-10', '[unknown]') 

4351 '[unknown]' 

4352 """ 

4353 try: 

4354 return self[key] 

4355 except (KeyError, ValueError, IndexError): 

4356 return default 

4357 

4358 @final 

4359 @property 

4360 def _is_view(self) -> bool_t: 

4361 """Return boolean indicating if self is view of another array""" 

4362 return self._mgr.is_view 

4363 

4364 @final 

4365 def reindex_like( 

4366 self: NDFrameT, 

4367 other, 

4368 method: Literal["backfill", "bfill", "pad", "ffill", "nearest"] | None = None, 

4369 copy: bool_t | None = None, 

4370 limit=None, 

4371 tolerance=None, 

4372 ) -> NDFrameT: 

4373 """ 

4374 Return an object with matching indices as other object. 

4375 

4376 Conform the object to the same index on all axes. Optional 

4377 filling logic, placing NaN in locations having no value 

4378 in the previous index. A new object is produced unless the 

4379 new index is equivalent to the current one and copy=False. 

4380 

4381 Parameters 

4382 ---------- 

4383 other : Object of the same data type 

4384 Its row and column indices are used to define the new indices 

4385 of this object. 

4386 method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'} 

4387 Method to use for filling holes in reindexed DataFrame. 

4388 Please note: this is only applicable to DataFrames/Series with a 

4389 monotonically increasing/decreasing index. 

4390 

4391 * None (default): don't fill gaps 

4392 * pad / ffill: propagate last valid observation forward to next 

4393 valid 

4394 * backfill / bfill: use next valid observation to fill gap 

4395 * nearest: use nearest valid observations to fill gap. 

4396 

4397 copy : bool, default True 

4398 Return a new object, even if the passed indexes are the same. 

4399 limit : int, default None 

4400 Maximum number of consecutive labels to fill for inexact matches. 

4401 tolerance : optional 

4402 Maximum distance between original and new labels for inexact 

4403 matches. The values of the index at the matching locations must 

4404 satisfy the equation ``abs(index[indexer] - target) <= tolerance``. 

4405 

4406 Tolerance may be a scalar value, which applies the same tolerance 

4407 to all values, or list-like, which applies variable tolerance per 

4408 element. List-like includes list, tuple, array, Series, and must be 

4409 the same size as the index and its dtype must exactly match the 

4410 index's type. 

4411 

4412 Returns 

4413 ------- 

4414 Series or DataFrame 

4415 Same type as caller, but with changed indices on each axis. 

4416 

4417 See Also 

4418 -------- 

4419 DataFrame.set_index : Set row labels. 

4420 DataFrame.reset_index : Remove row labels or move them to new columns. 

4421 DataFrame.reindex : Change to new indices or expand indices. 

4422 

4423 Notes 

4424 ----- 

4425 Same as calling 

4426 ``.reindex(index=other.index, columns=other.columns,...)``. 

4427 

4428 Examples 

4429 -------- 

4430 >>> df1 = pd.DataFrame([[24.3, 75.7, 'high'], 

4431 ... [31, 87.8, 'high'], 

4432 ... [22, 71.6, 'medium'], 

4433 ... [35, 95, 'medium']], 

4434 ... columns=['temp_celsius', 'temp_fahrenheit', 

4435 ... 'windspeed'], 

4436 ... index=pd.date_range(start='2014-02-12', 

4437 ... end='2014-02-15', freq='D')) 

4438 

4439 >>> df1 

4440 temp_celsius temp_fahrenheit windspeed 

4441 2014-02-12 24.3 75.7 high 

4442 2014-02-13 31.0 87.8 high 

4443 2014-02-14 22.0 71.6 medium 

4444 2014-02-15 35.0 95.0 medium 

4445 

4446 >>> df2 = pd.DataFrame([[28, 'low'], 

4447 ... [30, 'low'], 

4448 ... [35.1, 'medium']], 

4449 ... columns=['temp_celsius', 'windspeed'], 

4450 ... index=pd.DatetimeIndex(['2014-02-12', '2014-02-13', 

4451 ... '2014-02-15'])) 

4452 

4453 >>> df2 

4454 temp_celsius windspeed 

4455 2014-02-12 28.0 low 

4456 2014-02-13 30.0 low 

4457 2014-02-15 35.1 medium 

4458 

4459 >>> df2.reindex_like(df1) 

4460 temp_celsius temp_fahrenheit windspeed 

4461 2014-02-12 28.0 NaN low 

4462 2014-02-13 30.0 NaN low 

4463 2014-02-14 NaN NaN NaN 

4464 2014-02-15 35.1 NaN medium 

4465 """ 

4466 d = other._construct_axes_dict( 

4467 axes=self._AXIS_ORDERS, 

4468 method=method, 

4469 copy=copy, 

4470 limit=limit, 

4471 tolerance=tolerance, 

4472 ) 

4473 

4474 return self.reindex(**d) 

4475 

4476 @overload 

4477 def drop( 

4478 self, 

4479 labels: IndexLabel = ..., 

4480 *, 

4481 axis: Axis = ..., 

4482 index: IndexLabel = ..., 

4483 columns: IndexLabel = ..., 

4484 level: Level | None = ..., 

4485 inplace: Literal[True], 

4486 errors: IgnoreRaise = ..., 

4487 ) -> None: 

4488 ... 

4489 

4490 @overload 

4491 def drop( 

4492 self: NDFrameT, 

4493 labels: IndexLabel = ..., 

4494 *, 

4495 axis: Axis = ..., 

4496 index: IndexLabel = ..., 

4497 columns: IndexLabel = ..., 

4498 level: Level | None = ..., 

4499 inplace: Literal[False] = ..., 

4500 errors: IgnoreRaise = ..., 

4501 ) -> NDFrameT: 

4502 ... 

4503 

4504 @overload 

4505 def drop( 

4506 self: NDFrameT, 

4507 labels: IndexLabel = ..., 

4508 *, 

4509 axis: Axis = ..., 

4510 index: IndexLabel = ..., 

4511 columns: IndexLabel = ..., 

4512 level: Level | None = ..., 

4513 inplace: bool_t = ..., 

4514 errors: IgnoreRaise = ..., 

4515 ) -> NDFrameT | None: 

4516 ... 

4517 

4518 def drop( 

4519 self: NDFrameT, 

4520 labels: IndexLabel = None, 

4521 *, 

4522 axis: Axis = 0, 

4523 index: IndexLabel = None, 

4524 columns: IndexLabel = None, 

4525 level: Level | None = None, 

4526 inplace: bool_t = False, 

4527 errors: IgnoreRaise = "raise", 

4528 ) -> NDFrameT | None: 

4529 inplace = validate_bool_kwarg(inplace, "inplace") 

4530 

4531 if labels is not None: 

4532 if index is not None or columns is not None: 

4533 raise ValueError("Cannot specify both 'labels' and 'index'/'columns'") 

4534 axis_name = self._get_axis_name(axis) 

4535 axes = {axis_name: labels} 

4536 elif index is not None or columns is not None: 

4537 axes = {"index": index} 

4538 if self.ndim == 2: 

4539 axes["columns"] = columns 

4540 else: 

4541 raise ValueError( 

4542 "Need to specify at least one of 'labels', 'index' or 'columns'" 

4543 ) 

4544 

4545 obj = self 

4546 

4547 for axis, labels in axes.items(): 

4548 if labels is not None: 

4549 obj = obj._drop_axis(labels, axis, level=level, errors=errors) 

4550 

4551 if inplace: 

4552 self._update_inplace(obj) 

4553 return None 

4554 else: 

4555 return obj 

4556 

4557 @final 

4558 def _drop_axis( 

4559 self: NDFrameT, 

4560 labels, 

4561 axis, 

4562 level=None, 

4563 errors: IgnoreRaise = "raise", 

4564 only_slice: bool_t = False, 

4565 ) -> NDFrameT: 

4566 """ 

4567 Drop labels from specified axis. Used in the ``drop`` method 

4568 internally. 

4569 

4570 Parameters 

4571 ---------- 

4572 labels : single label or list-like 

4573 axis : int or axis name 

4574 level : int or level name, default None 

4575 For MultiIndex 

4576 errors : {'ignore', 'raise'}, default 'raise' 

4577 If 'ignore', suppress error and existing labels are dropped. 

4578 only_slice : bool, default False 

4579 Whether indexing along columns should be view-only. 

4580 

4581 """ 

4582 axis_num = self._get_axis_number(axis) 

4583 axis = self._get_axis(axis) 

4584 

4585 if axis.is_unique: 

4586 if level is not None: 

4587 if not isinstance(axis, MultiIndex): 

4588 raise AssertionError("axis must be a MultiIndex") 

4589 new_axis = axis.drop(labels, level=level, errors=errors) 

4590 else: 

4591 new_axis = axis.drop(labels, errors=errors) 

4592 indexer = axis.get_indexer(new_axis) 

4593 

4594 # Case for non-unique axis 

4595 else: 

4596 is_tuple_labels = is_nested_list_like(labels) or isinstance(labels, tuple) 

4597 labels = ensure_object(common.index_labels_to_array(labels)) 

4598 if level is not None: 

4599 if not isinstance(axis, MultiIndex): 

4600 raise AssertionError("axis must be a MultiIndex") 

4601 mask = ~axis.get_level_values(level).isin(labels) 

4602 

4603 # GH 18561 MultiIndex.drop should raise if label is absent 

4604 if errors == "raise" and mask.all(): 

4605 raise KeyError(f"{labels} not found in axis") 

4606 elif ( 

4607 isinstance(axis, MultiIndex) 

4608 and labels.dtype == "object" 

4609 and not is_tuple_labels 

4610 ): 

4611 # Set level to zero in case of MultiIndex and label is string, 

4612 # because isin can't handle strings for MultiIndexes GH#36293 

4613 # In case of tuples we get dtype object but have to use isin GH#42771 

4614 mask = ~axis.get_level_values(0).isin(labels) 

4615 else: 

4616 mask = ~axis.isin(labels) 

4617 # Check if label doesn't exist along axis 

4618 labels_missing = (axis.get_indexer_for(labels) == -1).any() 

4619 if errors == "raise" and labels_missing: 

4620 raise KeyError(f"{labels} not found in axis") 

4621 

4622 if is_extension_array_dtype(mask.dtype): 

4623 # GH#45860 

4624 mask = mask.to_numpy(dtype=bool) 

4625 

4626 indexer = mask.nonzero()[0] 

4627 new_axis = axis.take(indexer) 

4628 

4629 bm_axis = self.ndim - axis_num - 1 

4630 new_mgr = self._mgr.reindex_indexer( 

4631 new_axis, 

4632 indexer, 

4633 axis=bm_axis, 

4634 allow_dups=True, 

4635 copy=None, 

4636 only_slice=only_slice, 

4637 ) 

4638 result = self._constructor(new_mgr) 

4639 if self.ndim == 1: 

4640 result.name = self.name 

4641 

4642 return result.__finalize__(self) 

4643 

4644 @final 

4645 def _update_inplace(self, result, verify_is_copy: bool_t = True) -> None: 

4646 """ 

4647 Replace self internals with result. 

4648 

4649 Parameters 

4650 ---------- 

4651 result : same type as self 

4652 verify_is_copy : bool, default True 

4653 Provide is_copy checks. 

4654 """ 

4655 # NOTE: This does *not* call __finalize__ and that's an explicit 

4656 # decision that we may revisit in the future. 

4657 self._reset_cache() 

4658 self._clear_item_cache() 

4659 self._mgr = result._mgr 

4660 self._maybe_update_cacher(verify_is_copy=verify_is_copy, inplace=True) 

4661 

4662 @final 

4663 def add_prefix(self: NDFrameT, prefix: str, axis: Axis | None = None) -> NDFrameT: 

4664 """ 

4665 Prefix labels with string `prefix`. 

4666 

4667 For Series, the row labels are prefixed. 

4668 For DataFrame, the column labels are prefixed. 

4669 

4670 Parameters 

4671 ---------- 

4672 prefix : str 

4673 The string to add before each label. 

4674 axis : {{0 or 'index', 1 or 'columns', None}}, default None 

4675 Axis to add prefix on 

4676 

4677 .. versionadded:: 2.0.0 

4678 

4679 Returns 

4680 ------- 

4681 Series or DataFrame 

4682 New Series or DataFrame with updated labels. 

4683 

4684 See Also 

4685 -------- 

4686 Series.add_suffix: Suffix row labels with string `suffix`. 

4687 DataFrame.add_suffix: Suffix column labels with string `suffix`. 

4688 

4689 Examples 

4690 -------- 

4691 >>> s = pd.Series([1, 2, 3, 4]) 

4692 >>> s 

4693 0 1 

4694 1 2 

4695 2 3 

4696 3 4 

4697 dtype: int64 

4698 

4699 >>> s.add_prefix('item_') 

4700 item_0 1 

4701 item_1 2 

4702 item_2 3 

4703 item_3 4 

4704 dtype: int64 

4705 

4706 >>> df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': [3, 4, 5, 6]}) 

4707 >>> df 

4708 A B 

4709 0 1 3 

4710 1 2 4 

4711 2 3 5 

4712 3 4 6 

4713 

4714 >>> df.add_prefix('col_') 

4715 col_A col_B 

4716 0 1 3 

4717 1 2 4 

4718 2 3 5 

4719 3 4 6 

4720 """ 

4721 f = lambda x: f"{prefix}{x}" 

4722 

4723 axis_name = self._info_axis_name 

4724 if axis is not None: 

4725 axis_name = self._get_axis_name(axis) 

4726 

4727 mapper = {axis_name: f} 

4728 

4729 # error: Incompatible return value type (got "Optional[NDFrameT]", 

4730 # expected "NDFrameT") 

4731 # error: Argument 1 to "rename" of "NDFrame" has incompatible type 

4732 # "**Dict[str, partial[str]]"; expected "Union[str, int, None]" 

4733 # error: Keywords must be strings 

4734 return self._rename(**mapper) # type: ignore[return-value, arg-type, misc] 

4735 

4736 @final 

4737 def add_suffix(self: NDFrameT, suffix: str, axis: Axis | None = None) -> NDFrameT: 

4738 """ 

4739 Suffix labels with string `suffix`. 

4740 

4741 For Series, the row labels are suffixed. 

4742 For DataFrame, the column labels are suffixed. 

4743 

4744 Parameters 

4745 ---------- 

4746 suffix : str 

4747 The string to add after each label. 

4748 axis : {{0 or 'index', 1 or 'columns', None}}, default None 

4749 Axis to add suffix on 

4750 

4751 .. versionadded:: 2.0.0 

4752 

4753 Returns 

4754 ------- 

4755 Series or DataFrame 

4756 New Series or DataFrame with updated labels. 

4757 

4758 See Also 

4759 -------- 

4760 Series.add_prefix: Prefix row labels with string `prefix`. 

4761 DataFrame.add_prefix: Prefix column labels with string `prefix`. 

4762 

4763 Examples 

4764 -------- 

4765 >>> s = pd.Series([1, 2, 3, 4]) 

4766 >>> s 

4767 0 1 

4768 1 2 

4769 2 3 

4770 3 4 

4771 dtype: int64 

4772 

4773 >>> s.add_suffix('_item') 

4774 0_item 1 

4775 1_item 2 

4776 2_item 3 

4777 3_item 4 

4778 dtype: int64 

4779 

4780 >>> df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': [3, 4, 5, 6]}) 

4781 >>> df 

4782 A B 

4783 0 1 3 

4784 1 2 4 

4785 2 3 5 

4786 3 4 6 

4787 

4788 >>> df.add_suffix('_col') 

4789 A_col B_col 

4790 0 1 3 

4791 1 2 4 

4792 2 3 5 

4793 3 4 6 

4794 """ 

4795 f = lambda x: f"{x}{suffix}" 

4796 

4797 axis_name = self._info_axis_name 

4798 if axis is not None: 

4799 axis_name = self._get_axis_name(axis) 

4800 

4801 mapper = {axis_name: f} 

4802 # error: Incompatible return value type (got "Optional[NDFrameT]", 

4803 # expected "NDFrameT") 

4804 # error: Argument 1 to "rename" of "NDFrame" has incompatible type 

4805 # "**Dict[str, partial[str]]"; expected "Union[str, int, None]" 

4806 # error: Keywords must be strings 

4807 return self._rename(**mapper) # type: ignore[return-value, arg-type, misc] 

4808 

4809 @overload 

4810 def sort_values( 

4811 self: NDFrameT, 

4812 *, 

4813 axis: Axis = ..., 

4814 ascending: bool_t | Sequence[bool_t] = ..., 

4815 inplace: Literal[False] = ..., 

4816 kind: str = ..., 

4817 na_position: str = ..., 

4818 ignore_index: bool_t = ..., 

4819 key: ValueKeyFunc = ..., 

4820 ) -> NDFrameT: 

4821 ... 

4822 

4823 @overload 

4824 def sort_values( 

4825 self, 

4826 *, 

4827 axis: Axis = ..., 

4828 ascending: bool_t | Sequence[bool_t] = ..., 

4829 inplace: Literal[True], 

4830 kind: str = ..., 

4831 na_position: str = ..., 

4832 ignore_index: bool_t = ..., 

4833 key: ValueKeyFunc = ..., 

4834 ) -> None: 

4835 ... 

4836 

4837 @overload 

4838 def sort_values( 

4839 self: NDFrameT, 

4840 *, 

4841 axis: Axis = ..., 

4842 ascending: bool_t | Sequence[bool_t] = ..., 

4843 inplace: bool_t = ..., 

4844 kind: str = ..., 

4845 na_position: str = ..., 

4846 ignore_index: bool_t = ..., 

4847 key: ValueKeyFunc = ..., 

4848 ) -> NDFrameT | None: 

4849 ... 

4850 

4851 def sort_values( 

4852 self: NDFrameT, 

4853 *, 

4854 axis: Axis = 0, 

4855 ascending: bool_t | Sequence[bool_t] = True, 

4856 inplace: bool_t = False, 

4857 kind: str = "quicksort", 

4858 na_position: str = "last", 

4859 ignore_index: bool_t = False, 

4860 key: ValueKeyFunc = None, 

4861 ) -> NDFrameT | None: 

4862 """ 

4863 Sort by the values along either axis. 

4864 

4865 Parameters 

4866 ----------%(optional_by)s 

4867 axis : %(axes_single_arg)s, default 0 

4868 Axis to be sorted. 

4869 ascending : bool or list of bool, default True 

4870 Sort ascending vs. descending. Specify list for multiple sort 

4871 orders. If this is a list of bools, must match the length of 

4872 the by. 

4873 inplace : bool, default False 

4874 If True, perform operation in-place. 

4875 kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, default 'quicksort' 

4876 Choice of sorting algorithm. See also :func:`numpy.sort` for more 

4877 information. `mergesort` and `stable` are the only stable algorithms. For 

4878 DataFrames, this option is only applied when sorting on a single 

4879 column or label. 

4880 na_position : {'first', 'last'}, default 'last' 

4881 Puts NaNs at the beginning if `first`; `last` puts NaNs at the 

4882 end. 

4883 ignore_index : bool, default False 

4884 If True, the resulting axis will be labeled 0, 1, …, n - 1. 

4885 key : callable, optional 

4886 Apply the key function to the values 

4887 before sorting. This is similar to the `key` argument in the 

4888 builtin :meth:`sorted` function, with the notable difference that 

4889 this `key` function should be *vectorized*. It should expect a 

4890 ``Series`` and return a Series with the same shape as the input. 

4891 It will be applied to each column in `by` independently. 

4892 

4893 .. versionadded:: 1.1.0 

4894 

4895 Returns 

4896 ------- 

4897 DataFrame or None 

4898 DataFrame with sorted values or None if ``inplace=True``. 

4899 

4900 See Also 

4901 -------- 

4902 DataFrame.sort_index : Sort a DataFrame by the index. 

4903 Series.sort_values : Similar method for a Series. 

4904 

4905 Examples 

4906 -------- 

4907 >>> df = pd.DataFrame({ 

4908 ... 'col1': ['A', 'A', 'B', np.nan, 'D', 'C'], 

4909 ... 'col2': [2, 1, 9, 8, 7, 4], 

4910 ... 'col3': [0, 1, 9, 4, 2, 3], 

4911 ... 'col4': ['a', 'B', 'c', 'D', 'e', 'F'] 

4912 ... }) 

4913 >>> df 

4914 col1 col2 col3 col4 

4915 0 A 2 0 a 

4916 1 A 1 1 B 

4917 2 B 9 9 c 

4918 3 NaN 8 4 D 

4919 4 D 7 2 e 

4920 5 C 4 3 F 

4921 

4922 Sort by col1 

4923 

4924 >>> df.sort_values(by=['col1']) 

4925 col1 col2 col3 col4 

4926 0 A 2 0 a 

4927 1 A 1 1 B 

4928 2 B 9 9 c 

4929 5 C 4 3 F 

4930 4 D 7 2 e 

4931 3 NaN 8 4 D 

4932 

4933 Sort by multiple columns 

4934 

4935 >>> df.sort_values(by=['col1', 'col2']) 

4936 col1 col2 col3 col4 

4937 1 A 1 1 B 

4938 0 A 2 0 a 

4939 2 B 9 9 c 

4940 5 C 4 3 F 

4941 4 D 7 2 e 

4942 3 NaN 8 4 D 

4943 

4944 Sort Descending 

4945 

4946 >>> df.sort_values(by='col1', ascending=False) 

4947 col1 col2 col3 col4 

4948 4 D 7 2 e 

4949 5 C 4 3 F 

4950 2 B 9 9 c 

4951 0 A 2 0 a 

4952 1 A 1 1 B 

4953 3 NaN 8 4 D 

4954 

4955 Putting NAs first 

4956 

4957 >>> df.sort_values(by='col1', ascending=False, na_position='first') 

4958 col1 col2 col3 col4 

4959 3 NaN 8 4 D 

4960 4 D 7 2 e 

4961 5 C 4 3 F 

4962 2 B 9 9 c 

4963 0 A 2 0 a 

4964 1 A 1 1 B 

4965 

4966 Sorting with a key function 

4967 

4968 >>> df.sort_values(by='col4', key=lambda col: col.str.lower()) 

4969 col1 col2 col3 col4 

4970 0 A 2 0 a 

4971 1 A 1 1 B 

4972 2 B 9 9 c 

4973 3 NaN 8 4 D 

4974 4 D 7 2 e 

4975 5 C 4 3 F 

4976 

4977 Natural sort with the key argument, 

4978 using the `natsort <https://github.com/SethMMorton/natsort>` package. 

4979 

4980 >>> df = pd.DataFrame({ 

4981 ... "time": ['0hr', '128hr', '72hr', '48hr', '96hr'], 

4982 ... "value": [10, 20, 30, 40, 50] 

4983 ... }) 

4984 >>> df 

4985 time value 

4986 0 0hr 10 

4987 1 128hr 20 

4988 2 72hr 30 

4989 3 48hr 40 

4990 4 96hr 50 

4991 >>> from natsort import index_natsorted 

4992 >>> df.sort_values( 

4993 ... by="time", 

4994 ... key=lambda x: np.argsort(index_natsorted(df["time"])) 

4995 ... ) 

4996 time value 

4997 0 0hr 10 

4998 3 48hr 40 

4999 2 72hr 30 

5000 4 96hr 50 

5001 1 128hr 20 

5002 """ 

5003 raise AbstractMethodError(self) 

5004 

5005 @overload 

5006 def sort_index( 

5007 self, 

5008 *, 

5009 axis: Axis = ..., 

5010 level: IndexLabel = ..., 

5011 ascending: bool_t | Sequence[bool_t] = ..., 

5012 inplace: Literal[True], 

5013 kind: SortKind = ..., 

5014 na_position: NaPosition = ..., 

5015 sort_remaining: bool_t = ..., 

5016 ignore_index: bool_t = ..., 

5017 key: IndexKeyFunc = ..., 

5018 ) -> None: 

5019 ... 

5020 

5021 @overload 

5022 def sort_index( 

5023 self: NDFrameT, 

5024 *, 

5025 axis: Axis = ..., 

5026 level: IndexLabel = ..., 

5027 ascending: bool_t | Sequence[bool_t] = ..., 

5028 inplace: Literal[False] = ..., 

5029 kind: SortKind = ..., 

5030 na_position: NaPosition = ..., 

5031 sort_remaining: bool_t = ..., 

5032 ignore_index: bool_t = ..., 

5033 key: IndexKeyFunc = ..., 

5034 ) -> NDFrameT: 

5035 ... 

5036 

5037 @overload 

5038 def sort_index( 

5039 self: NDFrameT, 

5040 *, 

5041 axis: Axis = ..., 

5042 level: IndexLabel = ..., 

5043 ascending: bool_t | Sequence[bool_t] = ..., 

5044 inplace: bool_t = ..., 

5045 kind: SortKind = ..., 

5046 na_position: NaPosition = ..., 

5047 sort_remaining: bool_t = ..., 

5048 ignore_index: bool_t = ..., 

5049 key: IndexKeyFunc = ..., 

5050 ) -> NDFrameT | None: 

5051 ... 

5052 

5053 def sort_index( 

5054 self: NDFrameT, 

5055 *, 

5056 axis: Axis = 0, 

5057 level: IndexLabel = None, 

5058 ascending: bool_t | Sequence[bool_t] = True, 

5059 inplace: bool_t = False, 

5060 kind: SortKind = "quicksort", 

5061 na_position: NaPosition = "last", 

5062 sort_remaining: bool_t = True, 

5063 ignore_index: bool_t = False, 

5064 key: IndexKeyFunc = None, 

5065 ) -> NDFrameT | None: 

5066 inplace = validate_bool_kwarg(inplace, "inplace") 

5067 axis = self._get_axis_number(axis) 

5068 ascending = validate_ascending(ascending) 

5069 

5070 target = self._get_axis(axis) 

5071 

5072 indexer = get_indexer_indexer( 

5073 target, level, ascending, kind, na_position, sort_remaining, key 

5074 ) 

5075 

5076 if indexer is None: 

5077 if inplace: 

5078 result = self 

5079 else: 

5080 result = self.copy(deep=None) 

5081 

5082 if ignore_index: 

5083 result.index = default_index(len(self)) 

5084 if inplace: 

5085 return None 

5086 else: 

5087 return result 

5088 

5089 baxis = self._get_block_manager_axis(axis) 

5090 new_data = self._mgr.take(indexer, axis=baxis, verify=False) 

5091 

5092 # reconstruct axis if needed 

5093 new_data.set_axis(baxis, new_data.axes[baxis]._sort_levels_monotonic()) 

5094 

5095 if ignore_index: 

5096 axis = 1 if isinstance(self, ABCDataFrame) else 0 

5097 new_data.set_axis(axis, default_index(len(indexer))) 

5098 

5099 result = self._constructor(new_data) 

5100 

5101 if inplace: 

5102 return self._update_inplace(result) 

5103 else: 

5104 return result.__finalize__(self, method="sort_index") 

5105 

5106 @doc( 

5107 klass=_shared_doc_kwargs["klass"], 

5108 optional_reindex="", 

5109 ) 

5110 def reindex( 

5111 self: NDFrameT, 

5112 labels=None, 

5113 index=None, 

5114 columns=None, 

5115 axis: Axis | None = None, 

5116 method: str | None = None, 

5117 copy: bool_t | None = None, 

5118 level: Level | None = None, 

5119 fill_value: Scalar | None = np.nan, 

5120 limit: int | None = None, 

5121 tolerance=None, 

5122 ) -> NDFrameT: 

5123 """ 

5124 Conform {klass} to new index with optional filling logic. 

5125 

5126 Places NA/NaN in locations having no value in the previous index. A new object 

5127 is produced unless the new index is equivalent to the current one and 

5128 ``copy=False``. 

5129 

5130 Parameters 

5131 ---------- 

5132 {optional_reindex} 

5133 method : {{None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}} 

5134 Method to use for filling holes in reindexed DataFrame. 

5135 Please note: this is only applicable to DataFrames/Series with a 

5136 monotonically increasing/decreasing index. 

5137 

5138 * None (default): don't fill gaps 

5139 * pad / ffill: Propagate last valid observation forward to next 

5140 valid. 

5141 * backfill / bfill: Use next valid observation to fill gap. 

5142 * nearest: Use nearest valid observations to fill gap. 

5143 

5144 copy : bool, default True 

5145 Return a new object, even if the passed indexes are the same. 

5146 level : int or name 

5147 Broadcast across a level, matching Index values on the 

5148 passed MultiIndex level. 

5149 fill_value : scalar, default np.NaN 

5150 Value to use for missing values. Defaults to NaN, but can be any 

5151 "compatible" value. 

5152 limit : int, default None 

5153 Maximum number of consecutive elements to forward or backward fill. 

5154 tolerance : optional 

5155 Maximum distance between original and new labels for inexact 

5156 matches. The values of the index at the matching locations most 

5157 satisfy the equation ``abs(index[indexer] - target) <= tolerance``. 

5158 

5159 Tolerance may be a scalar value, which applies the same tolerance 

5160 to all values, or list-like, which applies variable tolerance per 

5161 element. List-like includes list, tuple, array, Series, and must be 

5162 the same size as the index and its dtype must exactly match the 

5163 index's type. 

5164 

5165 Returns 

5166 ------- 

5167 {klass} with changed index. 

5168 

5169 See Also 

5170 -------- 

5171 DataFrame.set_index : Set row labels. 

5172 DataFrame.reset_index : Remove row labels or move them to new columns. 

5173 DataFrame.reindex_like : Change to same indices as other DataFrame. 

5174 

5175 Examples 

5176 -------- 

5177 ``DataFrame.reindex`` supports two calling conventions 

5178 

5179 * ``(index=index_labels, columns=column_labels, ...)`` 

5180 * ``(labels, axis={{'index', 'columns'}}, ...)`` 

5181 

5182 We *highly* recommend using keyword arguments to clarify your 

5183 intent. 

5184 

5185 Create a dataframe with some fictional data. 

5186 

5187 >>> index = ['Firefox', 'Chrome', 'Safari', 'IE10', 'Konqueror'] 

5188 >>> df = pd.DataFrame({{'http_status': [200, 200, 404, 404, 301], 

5189 ... 'response_time': [0.04, 0.02, 0.07, 0.08, 1.0]}}, 

5190 ... index=index) 

5191 >>> df 

5192 http_status response_time 

5193 Firefox 200 0.04 

5194 Chrome 200 0.02 

5195 Safari 404 0.07 

5196 IE10 404 0.08 

5197 Konqueror 301 1.00 

5198 

5199 Create a new index and reindex the dataframe. By default 

5200 values in the new index that do not have corresponding 

5201 records in the dataframe are assigned ``NaN``. 

5202 

5203 >>> new_index = ['Safari', 'Iceweasel', 'Comodo Dragon', 'IE10', 

5204 ... 'Chrome'] 

5205 >>> df.reindex(new_index) 

5206 http_status response_time 

5207 Safari 404.0 0.07 

5208 Iceweasel NaN NaN 

5209 Comodo Dragon NaN NaN 

5210 IE10 404.0 0.08 

5211 Chrome 200.0 0.02 

5212 

5213 We can fill in the missing values by passing a value to 

5214 the keyword ``fill_value``. Because the index is not monotonically 

5215 increasing or decreasing, we cannot use arguments to the keyword 

5216 ``method`` to fill the ``NaN`` values. 

5217 

5218 >>> df.reindex(new_index, fill_value=0) 

5219 http_status response_time 

5220 Safari 404 0.07 

5221 Iceweasel 0 0.00 

5222 Comodo Dragon 0 0.00 

5223 IE10 404 0.08 

5224 Chrome 200 0.02 

5225 

5226 >>> df.reindex(new_index, fill_value='missing') 

5227 http_status response_time 

5228 Safari 404 0.07 

5229 Iceweasel missing missing 

5230 Comodo Dragon missing missing 

5231 IE10 404 0.08 

5232 Chrome 200 0.02 

5233 

5234 We can also reindex the columns. 

5235 

5236 >>> df.reindex(columns=['http_status', 'user_agent']) 

5237 http_status user_agent 

5238 Firefox 200 NaN 

5239 Chrome 200 NaN 

5240 Safari 404 NaN 

5241 IE10 404 NaN 

5242 Konqueror 301 NaN 

5243 

5244 Or we can use "axis-style" keyword arguments 

5245 

5246 >>> df.reindex(['http_status', 'user_agent'], axis="columns") 

5247 http_status user_agent 

5248 Firefox 200 NaN 

5249 Chrome 200 NaN 

5250 Safari 404 NaN 

5251 IE10 404 NaN 

5252 Konqueror 301 NaN 

5253 

5254 To further illustrate the filling functionality in 

5255 ``reindex``, we will create a dataframe with a 

5256 monotonically increasing index (for example, a sequence 

5257 of dates). 

5258 

5259 >>> date_index = pd.date_range('1/1/2010', periods=6, freq='D') 

5260 >>> df2 = pd.DataFrame({{"prices": [100, 101, np.nan, 100, 89, 88]}}, 

5261 ... index=date_index) 

5262 >>> df2 

5263 prices 

5264 2010-01-01 100.0 

5265 2010-01-02 101.0 

5266 2010-01-03 NaN 

5267 2010-01-04 100.0 

5268 2010-01-05 89.0 

5269 2010-01-06 88.0 

5270 

5271 Suppose we decide to expand the dataframe to cover a wider 

5272 date range. 

5273 

5274 >>> date_index2 = pd.date_range('12/29/2009', periods=10, freq='D') 

5275 >>> df2.reindex(date_index2) 

5276 prices 

5277 2009-12-29 NaN 

5278 2009-12-30 NaN 

5279 2009-12-31 NaN 

5280 2010-01-01 100.0 

5281 2010-01-02 101.0 

5282 2010-01-03 NaN 

5283 2010-01-04 100.0 

5284 2010-01-05 89.0 

5285 2010-01-06 88.0 

5286 2010-01-07 NaN 

5287 

5288 The index entries that did not have a value in the original data frame 

5289 (for example, '2009-12-29') are by default filled with ``NaN``. 

5290 If desired, we can fill in the missing values using one of several 

5291 options. 

5292 

5293 For example, to back-propagate the last valid value to fill the ``NaN`` 

5294 values, pass ``bfill`` as an argument to the ``method`` keyword. 

5295 

5296 >>> df2.reindex(date_index2, method='bfill') 

5297 prices 

5298 2009-12-29 100.0 

5299 2009-12-30 100.0 

5300 2009-12-31 100.0 

5301 2010-01-01 100.0 

5302 2010-01-02 101.0 

5303 2010-01-03 NaN 

5304 2010-01-04 100.0 

5305 2010-01-05 89.0 

5306 2010-01-06 88.0 

5307 2010-01-07 NaN 

5308 

5309 Please note that the ``NaN`` value present in the original dataframe 

5310 (at index value 2010-01-03) will not be filled by any of the 

5311 value propagation schemes. This is because filling while reindexing 

5312 does not look at dataframe values, but only compares the original and 

5313 desired indexes. If you do want to fill in the ``NaN`` values present 

5314 in the original dataframe, use the ``fillna()`` method. 

5315 

5316 See the :ref:`user guide <basics.reindexing>` for more. 

5317 """ 

5318 # TODO: Decide if we care about having different examples for different 

5319 # kinds 

5320 

5321 if index is not None and columns is not None and labels is not None: 

5322 raise TypeError("Cannot specify all of 'labels', 'index', 'columns'.") 

5323 elif index is not None or columns is not None: 

5324 if axis is not None: 

5325 raise TypeError( 

5326 "Cannot specify both 'axis' and any of 'index' or 'columns'" 

5327 ) 

5328 if labels is not None: 

5329 if index is not None: 

5330 columns = labels 

5331 else: 

5332 index = labels 

5333 else: 

5334 if axis and self._get_axis_number(axis) == 1: 

5335 columns = labels 

5336 else: 

5337 index = labels 

5338 axes: dict[Literal["index", "columns"], Any] = { 

5339 "index": index, 

5340 "columns": columns, 

5341 } 

5342 method = clean_reindex_fill_method(method) 

5343 

5344 # if all axes that are requested to reindex are equal, then only copy 

5345 # if indicated must have index names equal here as well as values 

5346 if copy and using_copy_on_write(): 

5347 copy = False 

5348 if all( 

5349 self._get_axis(axis_name).identical(ax) 

5350 for axis_name, ax in axes.items() 

5351 if ax is not None 

5352 ): 

5353 return self.copy(deep=copy) 

5354 

5355 # check if we are a multi reindex 

5356 if self._needs_reindex_multi(axes, method, level): 

5357 return self._reindex_multi(axes, copy, fill_value) 

5358 

5359 # perform the reindex on the axes 

5360 return self._reindex_axes( 

5361 axes, level, limit, tolerance, method, fill_value, copy 

5362 ).__finalize__(self, method="reindex") 

5363 

5364 def _reindex_axes( 

5365 self: NDFrameT, axes, level, limit, tolerance, method, fill_value, copy 

5366 ) -> NDFrameT: 

5367 """Perform the reindex for all the axes.""" 

5368 obj = self 

5369 for a in self._AXIS_ORDERS: 

5370 labels = axes[a] 

5371 if labels is None: 

5372 continue 

5373 

5374 ax = self._get_axis(a) 

5375 new_index, indexer = ax.reindex( 

5376 labels, level=level, limit=limit, tolerance=tolerance, method=method 

5377 ) 

5378 

5379 axis = self._get_axis_number(a) 

5380 obj = obj._reindex_with_indexers( 

5381 {axis: [new_index, indexer]}, 

5382 fill_value=fill_value, 

5383 copy=copy, 

5384 allow_dups=False, 

5385 ) 

5386 # If we've made a copy once, no need to make another one 

5387 copy = False 

5388 

5389 return obj 

5390 

5391 def _needs_reindex_multi(self, axes, method, level) -> bool_t: 

5392 """Check if we do need a multi reindex.""" 

5393 return ( 

5394 (common.count_not_none(*axes.values()) == self._AXIS_LEN) 

5395 and method is None 

5396 and level is None 

5397 and not self._is_mixed_type 

5398 and not ( 

5399 self.ndim == 2 

5400 and len(self.dtypes) == 1 

5401 and is_extension_array_dtype(self.dtypes.iloc[0]) 

5402 ) 

5403 ) 

5404 

5405 def _reindex_multi(self, axes, copy, fill_value): 

5406 raise AbstractMethodError(self) 

5407 

5408 @final 

5409 def _reindex_with_indexers( 

5410 self: NDFrameT, 

5411 reindexers, 

5412 fill_value=None, 

5413 copy: bool_t | None = False, 

5414 allow_dups: bool_t = False, 

5415 ) -> NDFrameT: 

5416 """allow_dups indicates an internal call here""" 

5417 # reindex doing multiple operations on different axes if indicated 

5418 new_data = self._mgr 

5419 for axis in sorted(reindexers.keys()): 

5420 index, indexer = reindexers[axis] 

5421 baxis = self._get_block_manager_axis(axis) 

5422 

5423 if index is None: 

5424 continue 

5425 

5426 index = ensure_index(index) 

5427 if indexer is not None: 

5428 indexer = ensure_platform_int(indexer) 

5429 

5430 # TODO: speed up on homogeneous DataFrame objects (see _reindex_multi) 

5431 new_data = new_data.reindex_indexer( 

5432 index, 

5433 indexer, 

5434 axis=baxis, 

5435 fill_value=fill_value, 

5436 allow_dups=allow_dups, 

5437 copy=copy, 

5438 ) 

5439 # If we've made a copy once, no need to make another one 

5440 copy = False 

5441 

5442 if ( 

5443 (copy or copy is None) 

5444 and new_data is self._mgr 

5445 and not using_copy_on_write() 

5446 ): 

5447 new_data = new_data.copy(deep=copy) 

5448 elif using_copy_on_write() and new_data is self._mgr: 

5449 new_data = new_data.copy(deep=False) 

5450 

5451 return self._constructor(new_data).__finalize__(self) 

5452 

5453 def filter( 

5454 self: NDFrameT, 

5455 items=None, 

5456 like: str | None = None, 

5457 regex: str | None = None, 

5458 axis: Axis | None = None, 

5459 ) -> NDFrameT: 

5460 """ 

5461 Subset the dataframe rows or columns according to the specified index labels. 

5462 

5463 Note that this routine does not filter a dataframe on its 

5464 contents. The filter is applied to the labels of the index. 

5465 

5466 Parameters 

5467 ---------- 

5468 items : list-like 

5469 Keep labels from axis which are in items. 

5470 like : str 

5471 Keep labels from axis for which "like in label == True". 

5472 regex : str (regular expression) 

5473 Keep labels from axis for which re.search(regex, label) == True. 

5474 axis : {0 or ‘index’, 1 or ‘columns’, None}, default None 

5475 The axis to filter on, expressed either as an index (int) 

5476 or axis name (str). By default this is the info axis, 'columns' for 

5477 DataFrame. For `Series` this parameter is unused and defaults to `None`. 

5478 

5479 Returns 

5480 ------- 

5481 same type as input object 

5482 

5483 See Also 

5484 -------- 

5485 DataFrame.loc : Access a group of rows and columns 

5486 by label(s) or a boolean array. 

5487 

5488 Notes 

5489 ----- 

5490 The ``items``, ``like``, and ``regex`` parameters are 

5491 enforced to be mutually exclusive. 

5492 

5493 ``axis`` defaults to the info axis that is used when indexing 

5494 with ``[]``. 

5495 

5496 Examples 

5497 -------- 

5498 >>> df = pd.DataFrame(np.array(([1, 2, 3], [4, 5, 6])), 

5499 ... index=['mouse', 'rabbit'], 

5500 ... columns=['one', 'two', 'three']) 

5501 >>> df 

5502 one two three 

5503 mouse 1 2 3 

5504 rabbit 4 5 6 

5505 

5506 >>> # select columns by name 

5507 >>> df.filter(items=['one', 'three']) 

5508 one three 

5509 mouse 1 3 

5510 rabbit 4 6 

5511 

5512 >>> # select columns by regular expression 

5513 >>> df.filter(regex='e$', axis=1) 

5514 one three 

5515 mouse 1 3 

5516 rabbit 4 6 

5517 

5518 >>> # select rows containing 'bbi' 

5519 >>> df.filter(like='bbi', axis=0) 

5520 one two three 

5521 rabbit 4 5 6 

5522 """ 

5523 nkw = common.count_not_none(items, like, regex) 

5524 if nkw > 1: 

5525 raise TypeError( 

5526 "Keyword arguments `items`, `like`, or `regex` " 

5527 "are mutually exclusive" 

5528 ) 

5529 

5530 if axis is None: 

5531 axis = self._info_axis_name 

5532 labels = self._get_axis(axis) 

5533 

5534 if items is not None: 

5535 name = self._get_axis_name(axis) 

5536 # error: Keywords must be strings 

5537 return self.reindex( # type: ignore[misc] 

5538 **{name: [r for r in items if r in labels]} # type: ignore[arg-type] 

5539 ) 

5540 elif like: 

5541 

5542 def f(x) -> bool_t: 

5543 assert like is not None # needed for mypy 

5544 return like in ensure_str(x) 

5545 

5546 values = labels.map(f) 

5547 return self.loc(axis=axis)[values] 

5548 elif regex: 

5549 

5550 def f(x) -> bool_t: 

5551 return matcher.search(ensure_str(x)) is not None 

5552 

5553 matcher = re.compile(regex) 

5554 values = labels.map(f) 

5555 return self.loc(axis=axis)[values] 

5556 else: 

5557 raise TypeError("Must pass either `items`, `like`, or `regex`") 

5558 

5559 @final 

5560 def head(self: NDFrameT, n: int = 5) -> NDFrameT: 

5561 """ 

5562 Return the first `n` rows. 

5563 

5564 This function returns the first `n` rows for the object based 

5565 on position. It is useful for quickly testing if your object 

5566 has the right type of data in it. 

5567 

5568 For negative values of `n`, this function returns all rows except 

5569 the last `|n|` rows, equivalent to ``df[:n]``. 

5570 

5571 If n is larger than the number of rows, this function returns all rows. 

5572 

5573 Parameters 

5574 ---------- 

5575 n : int, default 5 

5576 Number of rows to select. 

5577 

5578 Returns 

5579 ------- 

5580 same type as caller 

5581 The first `n` rows of the caller object. 

5582 

5583 See Also 

5584 -------- 

5585 DataFrame.tail: Returns the last `n` rows. 

5586 

5587 Examples 

5588 -------- 

5589 >>> df = pd.DataFrame({'animal': ['alligator', 'bee', 'falcon', 'lion', 

5590 ... 'monkey', 'parrot', 'shark', 'whale', 'zebra']}) 

5591 >>> df 

5592 animal 

5593 0 alligator 

5594 1 bee 

5595 2 falcon 

5596 3 lion 

5597 4 monkey 

5598 5 parrot 

5599 6 shark 

5600 7 whale 

5601 8 zebra 

5602 

5603 Viewing the first 5 lines 

5604 

5605 >>> df.head() 

5606 animal 

5607 0 alligator 

5608 1 bee 

5609 2 falcon 

5610 3 lion 

5611 4 monkey 

5612 

5613 Viewing the first `n` lines (three in this case) 

5614 

5615 >>> df.head(3) 

5616 animal 

5617 0 alligator 

5618 1 bee 

5619 2 falcon 

5620 

5621 For negative values of `n` 

5622 

5623 >>> df.head(-3) 

5624 animal 

5625 0 alligator 

5626 1 bee 

5627 2 falcon 

5628 3 lion 

5629 4 monkey 

5630 5 parrot 

5631 """ 

5632 return self.iloc[:n] 

5633 

5634 @final 

5635 def tail(self: NDFrameT, n: int = 5) -> NDFrameT: 

5636 """ 

5637 Return the last `n` rows. 

5638 

5639 This function returns last `n` rows from the object based on 

5640 position. It is useful for quickly verifying data, for example, 

5641 after sorting or appending rows. 

5642 

5643 For negative values of `n`, this function returns all rows except 

5644 the first `|n|` rows, equivalent to ``df[|n|:]``. 

5645 

5646 If n is larger than the number of rows, this function returns all rows. 

5647 

5648 Parameters 

5649 ---------- 

5650 n : int, default 5 

5651 Number of rows to select. 

5652 

5653 Returns 

5654 ------- 

5655 type of caller 

5656 The last `n` rows of the caller object. 

5657 

5658 See Also 

5659 -------- 

5660 DataFrame.head : The first `n` rows of the caller object. 

5661 

5662 Examples 

5663 -------- 

5664 >>> df = pd.DataFrame({'animal': ['alligator', 'bee', 'falcon', 'lion', 

5665 ... 'monkey', 'parrot', 'shark', 'whale', 'zebra']}) 

5666 >>> df 

5667 animal 

5668 0 alligator 

5669 1 bee 

5670 2 falcon 

5671 3 lion 

5672 4 monkey 

5673 5 parrot 

5674 6 shark 

5675 7 whale 

5676 8 zebra 

5677 

5678 Viewing the last 5 lines 

5679 

5680 >>> df.tail() 

5681 animal 

5682 4 monkey 

5683 5 parrot 

5684 6 shark 

5685 7 whale 

5686 8 zebra 

5687 

5688 Viewing the last `n` lines (three in this case) 

5689 

5690 >>> df.tail(3) 

5691 animal 

5692 6 shark 

5693 7 whale 

5694 8 zebra 

5695 

5696 For negative values of `n` 

5697 

5698 >>> df.tail(-3) 

5699 animal 

5700 3 lion 

5701 4 monkey 

5702 5 parrot 

5703 6 shark 

5704 7 whale 

5705 8 zebra 

5706 """ 

5707 if n == 0: 

5708 return self.iloc[0:0] 

5709 return self.iloc[-n:] 

5710 

5711 @final 

5712 def sample( 

5713 self: NDFrameT, 

5714 n: int | None = None, 

5715 frac: float | None = None, 

5716 replace: bool_t = False, 

5717 weights=None, 

5718 random_state: RandomState | None = None, 

5719 axis: Axis | None = None, 

5720 ignore_index: bool_t = False, 

5721 ) -> NDFrameT: 

5722 """ 

5723 Return a random sample of items from an axis of object. 

5724 

5725 You can use `random_state` for reproducibility. 

5726 

5727 Parameters 

5728 ---------- 

5729 n : int, optional 

5730 Number of items from axis to return. Cannot be used with `frac`. 

5731 Default = 1 if `frac` = None. 

5732 frac : float, optional 

5733 Fraction of axis items to return. Cannot be used with `n`. 

5734 replace : bool, default False 

5735 Allow or disallow sampling of the same row more than once. 

5736 weights : str or ndarray-like, optional 

5737 Default 'None' results in equal probability weighting. 

5738 If passed a Series, will align with target object on index. Index 

5739 values in weights not found in sampled object will be ignored and 

5740 index values in sampled object not in weights will be assigned 

5741 weights of zero. 

5742 If called on a DataFrame, will accept the name of a column 

5743 when axis = 0. 

5744 Unless weights are a Series, weights must be same length as axis 

5745 being sampled. 

5746 If weights do not sum to 1, they will be normalized to sum to 1. 

5747 Missing values in the weights column will be treated as zero. 

5748 Infinite values not allowed. 

5749 random_state : int, array-like, BitGenerator, np.random.RandomState, np.random.Generator, optional 

5750 If int, array-like, or BitGenerator, seed for random number generator. 

5751 If np.random.RandomState or np.random.Generator, use as given. 

5752 

5753 .. versionchanged:: 1.1.0 

5754 

5755 array-like and BitGenerator object now passed to np.random.RandomState() 

5756 as seed 

5757 

5758 .. versionchanged:: 1.4.0 

5759 

5760 np.random.Generator objects now accepted 

5761 

5762 axis : {0 or ‘index’, 1 or ‘columns’, None}, default None 

5763 Axis to sample. Accepts axis number or name. Default is stat axis 

5764 for given data type. For `Series` this parameter is unused and defaults to `None`. 

5765 ignore_index : bool, default False 

5766 If True, the resulting index will be labeled 0, 1, …, n - 1. 

5767 

5768 .. versionadded:: 1.3.0 

5769 

5770 Returns 

5771 ------- 

5772 Series or DataFrame 

5773 A new object of same type as caller containing `n` items randomly 

5774 sampled from the caller object. 

5775 

5776 See Also 

5777 -------- 

5778 DataFrameGroupBy.sample: Generates random samples from each group of a 

5779 DataFrame object. 

5780 SeriesGroupBy.sample: Generates random samples from each group of a 

5781 Series object. 

5782 numpy.random.choice: Generates a random sample from a given 1-D numpy 

5783 array. 

5784 

5785 Notes 

5786 ----- 

5787 If `frac` > 1, `replacement` should be set to `True`. 

5788 

5789 Examples 

5790 -------- 

5791 >>> df = pd.DataFrame({'num_legs': [2, 4, 8, 0], 

5792 ... 'num_wings': [2, 0, 0, 0], 

5793 ... 'num_specimen_seen': [10, 2, 1, 8]}, 

5794 ... index=['falcon', 'dog', 'spider', 'fish']) 

5795 >>> df 

5796 num_legs num_wings num_specimen_seen 

5797 falcon 2 2 10 

5798 dog 4 0 2 

5799 spider 8 0 1 

5800 fish 0 0 8 

5801 

5802 Extract 3 random elements from the ``Series`` ``df['num_legs']``: 

5803 Note that we use `random_state` to ensure the reproducibility of 

5804 the examples. 

5805 

5806 >>> df['num_legs'].sample(n=3, random_state=1) 

5807 fish 0 

5808 spider 8 

5809 falcon 2 

5810 Name: num_legs, dtype: int64 

5811 

5812 A random 50% sample of the ``DataFrame`` with replacement: 

5813 

5814 >>> df.sample(frac=0.5, replace=True, random_state=1) 

5815 num_legs num_wings num_specimen_seen 

5816 dog 4 0 2 

5817 fish 0 0 8 

5818 

5819 An upsample sample of the ``DataFrame`` with replacement: 

5820 Note that `replace` parameter has to be `True` for `frac` parameter > 1. 

5821 

5822 >>> df.sample(frac=2, replace=True, random_state=1) 

5823 num_legs num_wings num_specimen_seen 

5824 dog 4 0 2 

5825 fish 0 0 8 

5826 falcon 2 2 10 

5827 falcon 2 2 10 

5828 fish 0 0 8 

5829 dog 4 0 2 

5830 fish 0 0 8 

5831 dog 4 0 2 

5832 

5833 Using a DataFrame column as weights. Rows with larger value in the 

5834 `num_specimen_seen` column are more likely to be sampled. 

5835 

5836 >>> df.sample(n=2, weights='num_specimen_seen', random_state=1) 

5837 num_legs num_wings num_specimen_seen 

5838 falcon 2 2 10 

5839 fish 0 0 8 

5840 """ # noqa:E501 

5841 if axis is None: 

5842 axis = self._stat_axis_number 

5843 

5844 axis = self._get_axis_number(axis) 

5845 obj_len = self.shape[axis] 

5846 

5847 # Process random_state argument 

5848 rs = common.random_state(random_state) 

5849 

5850 size = sample.process_sampling_size(n, frac, replace) 

5851 if size is None: 

5852 assert frac is not None 

5853 size = round(frac * obj_len) 

5854 

5855 if weights is not None: 

5856 weights = sample.preprocess_weights(self, weights, axis) 

5857 

5858 sampled_indices = sample.sample(obj_len, size, replace, weights, rs) 

5859 result = self.take(sampled_indices, axis=axis) 

5860 

5861 if ignore_index: 

5862 result.index = default_index(len(result)) 

5863 

5864 return result 

5865 

5866 @final 

5867 @doc(klass=_shared_doc_kwargs["klass"]) 

5868 def pipe( 

5869 self, 

5870 func: Callable[..., T] | tuple[Callable[..., T], str], 

5871 *args, 

5872 **kwargs, 

5873 ) -> T: 

5874 r""" 

5875 Apply chainable functions that expect Series or DataFrames. 

5876 

5877 Parameters 

5878 ---------- 

5879 func : function 

5880 Function to apply to the {klass}. 

5881 ``args``, and ``kwargs`` are passed into ``func``. 

5882 Alternatively a ``(callable, data_keyword)`` tuple where 

5883 ``data_keyword`` is a string indicating the keyword of 

5884 ``callable`` that expects the {klass}. 

5885 args : iterable, optional 

5886 Positional arguments passed into ``func``. 

5887 kwargs : mapping, optional 

5888 A dictionary of keyword arguments passed into ``func``. 

5889 

5890 Returns 

5891 ------- 

5892 the return type of ``func``. 

5893 

5894 See Also 

5895 -------- 

5896 DataFrame.apply : Apply a function along input axis of DataFrame. 

5897 DataFrame.applymap : Apply a function elementwise on a whole DataFrame. 

5898 Series.map : Apply a mapping correspondence on a 

5899 :class:`~pandas.Series`. 

5900 

5901 Notes 

5902 ----- 

5903 Use ``.pipe`` when chaining together functions that expect 

5904 Series, DataFrames or GroupBy objects. Instead of writing 

5905 

5906 >>> func(g(h(df), arg1=a), arg2=b, arg3=c) # doctest: +SKIP 

5907 

5908 You can write 

5909 

5910 >>> (df.pipe(h) 

5911 ... .pipe(g, arg1=a) 

5912 ... .pipe(func, arg2=b, arg3=c) 

5913 ... ) # doctest: +SKIP 

5914 

5915 If you have a function that takes the data as (say) the second 

5916 argument, pass a tuple indicating which keyword expects the 

5917 data. For example, suppose ``func`` takes its data as ``arg2``: 

5918 

5919 >>> (df.pipe(h) 

5920 ... .pipe(g, arg1=a) 

5921 ... .pipe((func, 'arg2'), arg1=a, arg3=c) 

5922 ... ) # doctest: +SKIP 

5923 """ 

5924 if using_copy_on_write(): 

5925 return common.pipe(self.copy(deep=None), func, *args, **kwargs) 

5926 return common.pipe(self, func, *args, **kwargs) 

5927 

5928 # ---------------------------------------------------------------------- 

5929 # Attribute access 

5930 

5931 @final 

5932 def __finalize__( 

5933 self: NDFrameT, other, method: str | None = None, **kwargs 

5934 ) -> NDFrameT: 

5935 """ 

5936 Propagate metadata from other to self. 

5937 

5938 Parameters 

5939 ---------- 

5940 other : the object from which to get the attributes that we are going 

5941 to propagate 

5942 method : str, optional 

5943 A passed method name providing context on where ``__finalize__`` 

5944 was called. 

5945 

5946 .. warning:: 

5947 

5948 The value passed as `method` are not currently considered 

5949 stable across pandas releases. 

5950 """ 

5951 if isinstance(other, NDFrame): 

5952 for name in other.attrs: 

5953 self.attrs[name] = other.attrs[name] 

5954 

5955 self.flags.allows_duplicate_labels = other.flags.allows_duplicate_labels 

5956 # For subclasses using _metadata. 

5957 for name in set(self._metadata) & set(other._metadata): 

5958 assert isinstance(name, str) 

5959 object.__setattr__(self, name, getattr(other, name, None)) 

5960 

5961 if method == "concat": 

5962 attrs = other.objs[0].attrs 

5963 check_attrs = all(objs.attrs == attrs for objs in other.objs[1:]) 

5964 if check_attrs: 

5965 for name in attrs: 

5966 self.attrs[name] = attrs[name] 

5967 

5968 allows_duplicate_labels = all( 

5969 x.flags.allows_duplicate_labels for x in other.objs 

5970 ) 

5971 self.flags.allows_duplicate_labels = allows_duplicate_labels 

5972 

5973 return self 

5974 

5975 def __getattr__(self, name: str): 

5976 """ 

5977 After regular attribute access, try looking up the name 

5978 This allows simpler access to columns for interactive use. 

5979 """ 

5980 # Note: obj.x will always call obj.__getattribute__('x') prior to 

5981 # calling obj.__getattr__('x'). 

5982 if ( 

5983 name not in self._internal_names_set 

5984 and name not in self._metadata 

5985 and name not in self._accessors 

5986 and self._info_axis._can_hold_identifiers_and_holds_name(name) 

5987 ): 

5988 return self[name] 

5989 return object.__getattribute__(self, name) 

5990 

5991 def __setattr__(self, name: str, value) -> None: 

5992 """ 

5993 After regular attribute access, try setting the name 

5994 This allows simpler access to columns for interactive use. 

5995 """ 

5996 # first try regular attribute access via __getattribute__, so that 

5997 # e.g. ``obj.x`` and ``obj.x = 4`` will always reference/modify 

5998 # the same attribute. 

5999 

6000 try: 

6001 object.__getattribute__(self, name) 

6002 return object.__setattr__(self, name, value) 

6003 except AttributeError: 

6004 pass 

6005 

6006 # if this fails, go on to more involved attribute setting 

6007 # (note that this matches __getattr__, above). 

6008 if name in self._internal_names_set: 

6009 object.__setattr__(self, name, value) 

6010 elif name in self._metadata: 

6011 object.__setattr__(self, name, value) 

6012 else: 

6013 try: 

6014 existing = getattr(self, name) 

6015 if isinstance(existing, Index): 

6016 object.__setattr__(self, name, value) 

6017 elif name in self._info_axis: 

6018 self[name] = value 

6019 else: 

6020 object.__setattr__(self, name, value) 

6021 except (AttributeError, TypeError): 

6022 if isinstance(self, ABCDataFrame) and (is_list_like(value)): 

6023 warnings.warn( 

6024 "Pandas doesn't allow columns to be " 

6025 "created via a new attribute name - see " 

6026 "https://pandas.pydata.org/pandas-docs/" 

6027 "stable/indexing.html#attribute-access", 

6028 stacklevel=find_stack_level(), 

6029 ) 

6030 object.__setattr__(self, name, value) 

6031 

6032 @final 

6033 def _dir_additions(self) -> set[str]: 

6034 """ 

6035 add the string-like attributes from the info_axis. 

6036 If info_axis is a MultiIndex, its first level values are used. 

6037 """ 

6038 additions = super()._dir_additions() 

6039 if self._info_axis._can_hold_strings: 

6040 additions.update(self._info_axis._dir_additions_for_owner) 

6041 return additions 

6042 

6043 # ---------------------------------------------------------------------- 

6044 # Consolidation of internals 

6045 

6046 @final 

6047 def _protect_consolidate(self, f): 

6048 """ 

6049 Consolidate _mgr -- if the blocks have changed, then clear the 

6050 cache 

6051 """ 

6052 if isinstance(self._mgr, (ArrayManager, SingleArrayManager)): 

6053 return f() 

6054 blocks_before = len(self._mgr.blocks) 

6055 result = f() 

6056 if len(self._mgr.blocks) != blocks_before: 

6057 self._clear_item_cache() 

6058 return result 

6059 

6060 @final 

6061 def _consolidate_inplace(self) -> None: 

6062 """Consolidate data in place and return None""" 

6063 

6064 def f() -> None: 

6065 self._mgr = self._mgr.consolidate() 

6066 

6067 self._protect_consolidate(f) 

6068 

6069 @final 

6070 def _consolidate(self): 

6071 """ 

6072 Compute NDFrame with "consolidated" internals (data of each dtype 

6073 grouped together in a single ndarray). 

6074 

6075 Returns 

6076 ------- 

6077 consolidated : same type as caller 

6078 """ 

6079 f = lambda: self._mgr.consolidate() 

6080 cons_data = self._protect_consolidate(f) 

6081 return self._constructor(cons_data).__finalize__(self) 

6082 

6083 @property 

6084 def _is_mixed_type(self) -> bool_t: 

6085 if self._mgr.is_single_block: 

6086 return False 

6087 

6088 if self._mgr.any_extension_types: 

6089 # Even if they have the same dtype, we can't consolidate them, 

6090 # so we pretend this is "mixed'" 

6091 return True 

6092 

6093 return self.dtypes.nunique() > 1 

6094 

6095 @final 

6096 def _check_inplace_setting(self, value) -> bool_t: 

6097 """check whether we allow in-place setting with this type of value""" 

6098 if self._is_mixed_type and not self._mgr.is_numeric_mixed_type: 

6099 # allow an actual np.nan through 

6100 if is_float(value) and np.isnan(value) or value is lib.no_default: 

6101 return True 

6102 

6103 raise TypeError( 

6104 "Cannot do inplace boolean setting on " 

6105 "mixed-types with a non np.nan value" 

6106 ) 

6107 

6108 return True 

6109 

6110 @final 

6111 def _get_numeric_data(self: NDFrameT) -> NDFrameT: 

6112 return self._constructor(self._mgr.get_numeric_data()).__finalize__(self) 

6113 

6114 @final 

6115 def _get_bool_data(self): 

6116 return self._constructor(self._mgr.get_bool_data()).__finalize__(self) 

6117 

6118 # ---------------------------------------------------------------------- 

6119 # Internal Interface Methods 

6120 

6121 @property 

6122 def values(self): 

6123 raise AbstractMethodError(self) 

6124 

6125 @property 

6126 def _values(self) -> ArrayLike: 

6127 """internal implementation""" 

6128 raise AbstractMethodError(self) 

6129 

6130 @property 

6131 def dtypes(self): 

6132 """ 

6133 Return the dtypes in the DataFrame. 

6134 

6135 This returns a Series with the data type of each column. 

6136 The result's index is the original DataFrame's columns. Columns 

6137 with mixed types are stored with the ``object`` dtype. See 

6138 :ref:`the User Guide <basics.dtypes>` for more. 

6139 

6140 Returns 

6141 ------- 

6142 pandas.Series 

6143 The data type of each column. 

6144 

6145 Examples 

6146 -------- 

6147 >>> df = pd.DataFrame({'float': [1.0], 

6148 ... 'int': [1], 

6149 ... 'datetime': [pd.Timestamp('20180310')], 

6150 ... 'string': ['foo']}) 

6151 >>> df.dtypes 

6152 float float64 

6153 int int64 

6154 datetime datetime64[ns] 

6155 string object 

6156 dtype: object 

6157 """ 

6158 data = self._mgr.get_dtypes() 

6159 return self._constructor_sliced(data, index=self._info_axis, dtype=np.object_) 

6160 

6161 def astype( 

6162 self: NDFrameT, dtype, copy: bool_t | None = None, errors: IgnoreRaise = "raise" 

6163 ) -> NDFrameT: 

6164 """ 

6165 Cast a pandas object to a specified dtype ``dtype``. 

6166 

6167 Parameters 

6168 ---------- 

6169 dtype : str, data type, Series or Mapping of column name -> data type 

6170 Use a str, numpy.dtype, pandas.ExtensionDtype or Python type to 

6171 cast entire pandas object to the same type. Alternatively, use a 

6172 mapping, e.g. {col: dtype, ...}, where col is a column label and dtype is 

6173 a numpy.dtype or Python type to cast one or more of the DataFrame's 

6174 columns to column-specific types. 

6175 copy : bool, default True 

6176 Return a copy when ``copy=True`` (be very careful setting 

6177 ``copy=False`` as changes to values then may propagate to other 

6178 pandas objects). 

6179 errors : {'raise', 'ignore'}, default 'raise' 

6180 Control raising of exceptions on invalid data for provided dtype. 

6181 

6182 - ``raise`` : allow exceptions to be raised 

6183 - ``ignore`` : suppress exceptions. On error return original object. 

6184 

6185 Returns 

6186 ------- 

6187 same type as caller 

6188 

6189 See Also 

6190 -------- 

6191 to_datetime : Convert argument to datetime. 

6192 to_timedelta : Convert argument to timedelta. 

6193 to_numeric : Convert argument to a numeric type. 

6194 numpy.ndarray.astype : Cast a numpy array to a specified type. 

6195 

6196 Notes 

6197 ----- 

6198 .. versionchanged:: 2.0.0 

6199 

6200 Using ``astype`` to convert from timezone-naive dtype to 

6201 timezone-aware dtype will raise an exception. 

6202 Use :meth:`Series.dt.tz_localize` instead. 

6203 

6204 Examples 

6205 -------- 

6206 Create a DataFrame: 

6207 

6208 >>> d = {'col1': [1, 2], 'col2': [3, 4]} 

6209 >>> df = pd.DataFrame(data=d) 

6210 >>> df.dtypes 

6211 col1 int64 

6212 col2 int64 

6213 dtype: object 

6214 

6215 Cast all columns to int32: 

6216 

6217 >>> df.astype('int32').dtypes 

6218 col1 int32 

6219 col2 int32 

6220 dtype: object 

6221 

6222 Cast col1 to int32 using a dictionary: 

6223 

6224 >>> df.astype({'col1': 'int32'}).dtypes 

6225 col1 int32 

6226 col2 int64 

6227 dtype: object 

6228 

6229 Create a series: 

6230 

6231 >>> ser = pd.Series([1, 2], dtype='int32') 

6232 >>> ser 

6233 0 1 

6234 1 2 

6235 dtype: int32 

6236 >>> ser.astype('int64') 

6237 0 1 

6238 1 2 

6239 dtype: int64 

6240 

6241 Convert to categorical type: 

6242 

6243 >>> ser.astype('category') 

6244 0 1 

6245 1 2 

6246 dtype: category 

6247 Categories (2, int32): [1, 2] 

6248 

6249 Convert to ordered categorical type with custom ordering: 

6250 

6251 >>> from pandas.api.types import CategoricalDtype 

6252 >>> cat_dtype = CategoricalDtype( 

6253 ... categories=[2, 1], ordered=True) 

6254 >>> ser.astype(cat_dtype) 

6255 0 1 

6256 1 2 

6257 dtype: category 

6258 Categories (2, int64): [2 < 1] 

6259 

6260 Create a series of dates: 

6261 

6262 >>> ser_date = pd.Series(pd.date_range('20200101', periods=3)) 

6263 >>> ser_date 

6264 0 2020-01-01 

6265 1 2020-01-02 

6266 2 2020-01-03 

6267 dtype: datetime64[ns] 

6268 """ 

6269 if copy and using_copy_on_write(): 

6270 copy = False 

6271 

6272 if is_dict_like(dtype): 

6273 if self.ndim == 1: # i.e. Series 

6274 if len(dtype) > 1 or self.name not in dtype: 

6275 raise KeyError( 

6276 "Only the Series name can be used for " 

6277 "the key in Series dtype mappings." 

6278 ) 

6279 new_type = dtype[self.name] 

6280 return self.astype(new_type, copy, errors) 

6281 

6282 # GH#44417 cast to Series so we can use .iat below, which will be 

6283 # robust in case we 

6284 from pandas import Series 

6285 

6286 dtype_ser = Series(dtype, dtype=object) 

6287 

6288 for col_name in dtype_ser.index: 

6289 if col_name not in self: 

6290 raise KeyError( 

6291 "Only a column name can be used for the " 

6292 "key in a dtype mappings argument. " 

6293 f"'{col_name}' not found in columns." 

6294 ) 

6295 

6296 dtype_ser = dtype_ser.reindex(self.columns, fill_value=None, copy=False) 

6297 

6298 results = [] 

6299 for i, (col_name, col) in enumerate(self.items()): 

6300 cdt = dtype_ser.iat[i] 

6301 if isna(cdt): 

6302 res_col = col.copy(deep=copy) 

6303 else: 

6304 try: 

6305 res_col = col.astype(dtype=cdt, copy=copy, errors=errors) 

6306 except ValueError as ex: 

6307 ex.args = ( 

6308 f"{ex}: Error while type casting for column '{col_name}'", 

6309 ) 

6310 raise 

6311 results.append(res_col) 

6312 

6313 elif is_extension_array_dtype(dtype) and self.ndim > 1: 

6314 # GH 18099/22869: columnwise conversion to extension dtype 

6315 # GH 24704: use iloc to handle duplicate column names 

6316 # TODO(EA2D): special case not needed with 2D EAs 

6317 results = [ 

6318 self.iloc[:, i].astype(dtype, copy=copy) 

6319 for i in range(len(self.columns)) 

6320 ] 

6321 

6322 else: 

6323 # else, only a single dtype is given 

6324 new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors) 

6325 return self._constructor(new_data).__finalize__(self, method="astype") 

6326 

6327 # GH 33113: handle empty frame or series 

6328 if not results: 

6329 return self.copy(deep=None) 

6330 

6331 # GH 19920: retain column metadata after concat 

6332 result = concat(results, axis=1, copy=False) 

6333 # GH#40810 retain subclass 

6334 # error: Incompatible types in assignment 

6335 # (expression has type "NDFrameT", variable has type "DataFrame") 

6336 result = self._constructor(result) # type: ignore[assignment] 

6337 result.columns = self.columns 

6338 result = result.__finalize__(self, method="astype") 

6339 # https://github.com/python/mypy/issues/8354 

6340 return cast(NDFrameT, result) 

6341 

6342 @final 

6343 def copy(self: NDFrameT, deep: bool_t | None = True) -> NDFrameT: 

6344 """ 

6345 Make a copy of this object's indices and data. 

6346 

6347 When ``deep=True`` (default), a new object will be created with a 

6348 copy of the calling object's data and indices. Modifications to 

6349 the data or indices of the copy will not be reflected in the 

6350 original object (see notes below). 

6351 

6352 When ``deep=False``, a new object will be created without copying 

6353 the calling object's data or index (only references to the data 

6354 and index are copied). Any changes to the data of the original 

6355 will be reflected in the shallow copy (and vice versa). 

6356 

6357 Parameters 

6358 ---------- 

6359 deep : bool, default True 

6360 Make a deep copy, including a copy of the data and the indices. 

6361 With ``deep=False`` neither the indices nor the data are copied. 

6362 

6363 Returns 

6364 ------- 

6365 Series or DataFrame 

6366 Object type matches caller. 

6367 

6368 Notes 

6369 ----- 

6370 When ``deep=True``, data is copied but actual Python objects 

6371 will not be copied recursively, only the reference to the object. 

6372 This is in contrast to `copy.deepcopy` in the Standard Library, 

6373 which recursively copies object data (see examples below). 

6374 

6375 While ``Index`` objects are copied when ``deep=True``, the underlying 

6376 numpy array is not copied for performance reasons. Since ``Index`` is 

6377 immutable, the underlying data can be safely shared and a copy 

6378 is not needed. 

6379 

6380 Since pandas is not thread safe, see the 

6381 :ref:`gotchas <gotchas.thread-safety>` when copying in a threading 

6382 environment. 

6383 

6384 Examples 

6385 -------- 

6386 >>> s = pd.Series([1, 2], index=["a", "b"]) 

6387 >>> s 

6388 a 1 

6389 b 2 

6390 dtype: int64 

6391 

6392 >>> s_copy = s.copy() 

6393 >>> s_copy 

6394 a 1 

6395 b 2 

6396 dtype: int64 

6397 

6398 **Shallow copy versus default (deep) copy:** 

6399 

6400 >>> s = pd.Series([1, 2], index=["a", "b"]) 

6401 >>> deep = s.copy() 

6402 >>> shallow = s.copy(deep=False) 

6403 

6404 Shallow copy shares data and index with original. 

6405 

6406 >>> s is shallow 

6407 False 

6408 >>> s.values is shallow.values and s.index is shallow.index 

6409 True 

6410 

6411 Deep copy has own copy of data and index. 

6412 

6413 >>> s is deep 

6414 False 

6415 >>> s.values is deep.values or s.index is deep.index 

6416 False 

6417 

6418 Updates to the data shared by shallow copy and original is reflected 

6419 in both; deep copy remains unchanged. 

6420 

6421 >>> s[0] = 3 

6422 >>> shallow[1] = 4 

6423 >>> s 

6424 a 3 

6425 b 4 

6426 dtype: int64 

6427 >>> shallow 

6428 a 3 

6429 b 4 

6430 dtype: int64 

6431 >>> deep 

6432 a 1 

6433 b 2 

6434 dtype: int64 

6435 

6436 Note that when copying an object containing Python objects, a deep copy 

6437 will copy the data, but will not do so recursively. Updating a nested 

6438 data object will be reflected in the deep copy. 

6439 

6440 >>> s = pd.Series([[1, 2], [3, 4]]) 

6441 >>> deep = s.copy() 

6442 >>> s[0][0] = 10 

6443 >>> s 

6444 0 [10, 2] 

6445 1 [3, 4] 

6446 dtype: object 

6447 >>> deep 

6448 0 [10, 2] 

6449 1 [3, 4] 

6450 dtype: object 

6451 """ 

6452 data = self._mgr.copy(deep=deep) 

6453 self._clear_item_cache() 

6454 return self._constructor(data).__finalize__(self, method="copy") 

6455 

6456 @final 

6457 def __copy__(self: NDFrameT, deep: bool_t = True) -> NDFrameT: 

6458 return self.copy(deep=deep) 

6459 

6460 @final 

6461 def __deepcopy__(self: NDFrameT, memo=None) -> NDFrameT: 

6462 """ 

6463 Parameters 

6464 ---------- 

6465 memo, default None 

6466 Standard signature. Unused 

6467 """ 

6468 return self.copy(deep=True) 

6469 

6470 @final 

6471 def infer_objects(self: NDFrameT, copy: bool_t | None = None) -> NDFrameT: 

6472 """ 

6473 Attempt to infer better dtypes for object columns. 

6474 

6475 Attempts soft conversion of object-dtyped 

6476 columns, leaving non-object and unconvertible 

6477 columns unchanged. The inference rules are the 

6478 same as during normal Series/DataFrame construction. 

6479 

6480 Parameters 

6481 ---------- 

6482 copy : bool, default True 

6483 Whether to make a copy for non-object or non-inferrable columns 

6484 or Series. 

6485 

6486 Returns 

6487 ------- 

6488 same type as input object 

6489 

6490 See Also 

6491 -------- 

6492 to_datetime : Convert argument to datetime. 

6493 to_timedelta : Convert argument to timedelta. 

6494 to_numeric : Convert argument to numeric type. 

6495 convert_dtypes : Convert argument to best possible dtype. 

6496 

6497 Examples 

6498 -------- 

6499 >>> df = pd.DataFrame({"A": ["a", 1, 2, 3]}) 

6500 >>> df = df.iloc[1:] 

6501 >>> df 

6502 A 

6503 1 1 

6504 2 2 

6505 3 3 

6506 

6507 >>> df.dtypes 

6508 A object 

6509 dtype: object 

6510 

6511 >>> df.infer_objects().dtypes 

6512 A int64 

6513 dtype: object 

6514 """ 

6515 new_mgr = self._mgr.convert(copy=copy) 

6516 return self._constructor(new_mgr).__finalize__(self, method="infer_objects") 

6517 

6518 @final 

6519 def convert_dtypes( 

6520 self: NDFrameT, 

6521 infer_objects: bool_t = True, 

6522 convert_string: bool_t = True, 

6523 convert_integer: bool_t = True, 

6524 convert_boolean: bool_t = True, 

6525 convert_floating: bool_t = True, 

6526 dtype_backend: DtypeBackend = "numpy_nullable", 

6527 ) -> NDFrameT: 

6528 """ 

6529 Convert columns to the best possible dtypes using dtypes supporting ``pd.NA``. 

6530 

6531 Parameters 

6532 ---------- 

6533 infer_objects : bool, default True 

6534 Whether object dtypes should be converted to the best possible types. 

6535 convert_string : bool, default True 

6536 Whether object dtypes should be converted to ``StringDtype()``. 

6537 convert_integer : bool, default True 

6538 Whether, if possible, conversion can be done to integer extension types. 

6539 convert_boolean : bool, defaults True 

6540 Whether object dtypes should be converted to ``BooleanDtypes()``. 

6541 convert_floating : bool, defaults True 

6542 Whether, if possible, conversion can be done to floating extension types. 

6543 If `convert_integer` is also True, preference will be give to integer 

6544 dtypes if the floats can be faithfully casted to integers. 

6545 

6546 .. versionadded:: 1.2.0 

6547 dtype_backend : {"numpy_nullable", "pyarrow"}, default "numpy_nullable" 

6548 Which dtype_backend to use, e.g. whether a DataFrame should use nullable 

6549 dtypes for all dtypes that have a nullable 

6550 implementation when "numpy_nullable" is set, pyarrow is used for all 

6551 dtypes if "pyarrow" is set. 

6552 

6553 The dtype_backends are still experimential. 

6554 

6555 .. versionadded:: 2.0 

6556 

6557 Returns 

6558 ------- 

6559 Series or DataFrame 

6560 Copy of input object with new dtype. 

6561 

6562 See Also 

6563 -------- 

6564 infer_objects : Infer dtypes of objects. 

6565 to_datetime : Convert argument to datetime. 

6566 to_timedelta : Convert argument to timedelta. 

6567 to_numeric : Convert argument to a numeric type. 

6568 

6569 Notes 

6570 ----- 

6571 By default, ``convert_dtypes`` will attempt to convert a Series (or each 

6572 Series in a DataFrame) to dtypes that support ``pd.NA``. By using the options 

6573 ``convert_string``, ``convert_integer``, ``convert_boolean`` and 

6574 ``convert_floating``, it is possible to turn off individual conversions 

6575 to ``StringDtype``, the integer extension types, ``BooleanDtype`` 

6576 or floating extension types, respectively. 

6577 

6578 For object-dtyped columns, if ``infer_objects`` is ``True``, use the inference 

6579 rules as during normal Series/DataFrame construction. Then, if possible, 

6580 convert to ``StringDtype``, ``BooleanDtype`` or an appropriate integer 

6581 or floating extension type, otherwise leave as ``object``. 

6582 

6583 If the dtype is integer, convert to an appropriate integer extension type. 

6584 

6585 If the dtype is numeric, and consists of all integers, convert to an 

6586 appropriate integer extension type. Otherwise, convert to an 

6587 appropriate floating extension type. 

6588 

6589 .. versionchanged:: 1.2 

6590 Starting with pandas 1.2, this method also converts float columns 

6591 to the nullable floating extension type. 

6592 

6593 In the future, as new dtypes are added that support ``pd.NA``, the results 

6594 of this method will change to support those new dtypes. 

6595 

6596 Examples 

6597 -------- 

6598 >>> df = pd.DataFrame( 

6599 ... { 

6600 ... "a": pd.Series([1, 2, 3], dtype=np.dtype("int32")), 

6601 ... "b": pd.Series(["x", "y", "z"], dtype=np.dtype("O")), 

6602 ... "c": pd.Series([True, False, np.nan], dtype=np.dtype("O")), 

6603 ... "d": pd.Series(["h", "i", np.nan], dtype=np.dtype("O")), 

6604 ... "e": pd.Series([10, np.nan, 20], dtype=np.dtype("float")), 

6605 ... "f": pd.Series([np.nan, 100.5, 200], dtype=np.dtype("float")), 

6606 ... } 

6607 ... ) 

6608 

6609 Start with a DataFrame with default dtypes. 

6610 

6611 >>> df 

6612 a b c d e f 

6613 0 1 x True h 10.0 NaN 

6614 1 2 y False i NaN 100.5 

6615 2 3 z NaN NaN 20.0 200.0 

6616 

6617 >>> df.dtypes 

6618 a int32 

6619 b object 

6620 c object 

6621 d object 

6622 e float64 

6623 f float64 

6624 dtype: object 

6625 

6626 Convert the DataFrame to use best possible dtypes. 

6627 

6628 >>> dfn = df.convert_dtypes() 

6629 >>> dfn 

6630 a b c d e f 

6631 0 1 x True h 10 <NA> 

6632 1 2 y False i <NA> 100.5 

6633 2 3 z <NA> <NA> 20 200.0 

6634 

6635 >>> dfn.dtypes 

6636 a Int32 

6637 b string[python] 

6638 c boolean 

6639 d string[python] 

6640 e Int64 

6641 f Float64 

6642 dtype: object 

6643 

6644 Start with a Series of strings and missing data represented by ``np.nan``. 

6645 

6646 >>> s = pd.Series(["a", "b", np.nan]) 

6647 >>> s 

6648 0 a 

6649 1 b 

6650 2 NaN 

6651 dtype: object 

6652 

6653 Obtain a Series with dtype ``StringDtype``. 

6654 

6655 >>> s.convert_dtypes() 

6656 0 a 

6657 1 b 

6658 2 <NA> 

6659 dtype: string 

6660 """ 

6661 check_dtype_backend(dtype_backend) 

6662 if self.ndim == 1: 

6663 return self._convert_dtypes( 

6664 infer_objects, 

6665 convert_string, 

6666 convert_integer, 

6667 convert_boolean, 

6668 convert_floating, 

6669 dtype_backend=dtype_backend, 

6670 ) 

6671 else: 

6672 results = [ 

6673 col._convert_dtypes( 

6674 infer_objects, 

6675 convert_string, 

6676 convert_integer, 

6677 convert_boolean, 

6678 convert_floating, 

6679 dtype_backend=dtype_backend, 

6680 ) 

6681 for col_name, col in self.items() 

6682 ] 

6683 if len(results) > 0: 

6684 result = concat(results, axis=1, copy=False, keys=self.columns) 

6685 cons = cast(Type["DataFrame"], self._constructor) 

6686 result = cons(result) 

6687 result = result.__finalize__(self, method="convert_dtypes") 

6688 # https://github.com/python/mypy/issues/8354 

6689 return cast(NDFrameT, result) 

6690 else: 

6691 return self.copy(deep=None) 

6692 

6693 # ---------------------------------------------------------------------- 

6694 # Filling NA's 

6695 

6696 @overload 

6697 def fillna( 

6698 self: NDFrameT, 

6699 value: Hashable | Mapping | Series | DataFrame = ..., 

6700 *, 

6701 method: FillnaOptions | None = ..., 

6702 axis: Axis | None = ..., 

6703 inplace: Literal[False] = ..., 

6704 limit: int | None = ..., 

6705 downcast: dict | None = ..., 

6706 ) -> NDFrameT: 

6707 ... 

6708 

6709 @overload 

6710 def fillna( 

6711 self, 

6712 value: Hashable | Mapping | Series | DataFrame = ..., 

6713 *, 

6714 method: FillnaOptions | None = ..., 

6715 axis: Axis | None = ..., 

6716 inplace: Literal[True], 

6717 limit: int | None = ..., 

6718 downcast: dict | None = ..., 

6719 ) -> None: 

6720 ... 

6721 

6722 @overload 

6723 def fillna( 

6724 self: NDFrameT, 

6725 value: Hashable | Mapping | Series | DataFrame = ..., 

6726 *, 

6727 method: FillnaOptions | None = ..., 

6728 axis: Axis | None = ..., 

6729 inplace: bool_t = ..., 

6730 limit: int | None = ..., 

6731 downcast: dict | None = ..., 

6732 ) -> NDFrameT | None: 

6733 ... 

6734 

6735 @doc(**_shared_doc_kwargs) 

6736 def fillna( 

6737 self: NDFrameT, 

6738 value: Hashable | Mapping | Series | DataFrame = None, 

6739 *, 

6740 method: FillnaOptions | None = None, 

6741 axis: Axis | None = None, 

6742 inplace: bool_t = False, 

6743 limit: int | None = None, 

6744 downcast: dict | None = None, 

6745 ) -> NDFrameT | None: 

6746 """ 

6747 Fill NA/NaN values using the specified method. 

6748 

6749 Parameters 

6750 ---------- 

6751 value : scalar, dict, Series, or DataFrame 

6752 Value to use to fill holes (e.g. 0), alternately a 

6753 dict/Series/DataFrame of values specifying which value to use for 

6754 each index (for a Series) or column (for a DataFrame). Values not 

6755 in the dict/Series/DataFrame will not be filled. This value cannot 

6756 be a list. 

6757 method : {{'backfill', 'bfill', 'ffill', None}}, default None 

6758 Method to use for filling holes in reindexed Series: 

6759 

6760 * ffill: propagate last valid observation forward to next valid. 

6761 * backfill / bfill: use next valid observation to fill gap. 

6762 

6763 axis : {axes_single_arg} 

6764 Axis along which to fill missing values. For `Series` 

6765 this parameter is unused and defaults to 0. 

6766 inplace : bool, default False 

6767 If True, fill in-place. Note: this will modify any 

6768 other views on this object (e.g., a no-copy slice for a column in a 

6769 DataFrame). 

6770 limit : int, default None 

6771 If method is specified, this is the maximum number of consecutive 

6772 NaN values to forward/backward fill. In other words, if there is 

6773 a gap with more than this number of consecutive NaNs, it will only 

6774 be partially filled. If method is not specified, this is the 

6775 maximum number of entries along the entire axis where NaNs will be 

6776 filled. Must be greater than 0 if not None. 

6777 downcast : dict, default is None 

6778 A dict of item->dtype of what to downcast if possible, 

6779 or the string 'infer' which will try to downcast to an appropriate 

6780 equal type (e.g. float64 to int64 if possible). 

6781 

6782 Returns 

6783 ------- 

6784 {klass} or None 

6785 Object with missing values filled or None if ``inplace=True``. 

6786 

6787 See Also 

6788 -------- 

6789 interpolate : Fill NaN values using interpolation. 

6790 reindex : Conform object to new index. 

6791 asfreq : Convert TimeSeries to specified frequency. 

6792 

6793 Examples 

6794 -------- 

6795 >>> df = pd.DataFrame([[np.nan, 2, np.nan, 0], 

6796 ... [3, 4, np.nan, 1], 

6797 ... [np.nan, np.nan, np.nan, np.nan], 

6798 ... [np.nan, 3, np.nan, 4]], 

6799 ... columns=list("ABCD")) 

6800 >>> df 

6801 A B C D 

6802 0 NaN 2.0 NaN 0.0 

6803 1 3.0 4.0 NaN 1.0 

6804 2 NaN NaN NaN NaN 

6805 3 NaN 3.0 NaN 4.0 

6806 

6807 Replace all NaN elements with 0s. 

6808 

6809 >>> df.fillna(0) 

6810 A B C D 

6811 0 0.0 2.0 0.0 0.0 

6812 1 3.0 4.0 0.0 1.0 

6813 2 0.0 0.0 0.0 0.0 

6814 3 0.0 3.0 0.0 4.0 

6815 

6816 We can also propagate non-null values forward or backward. 

6817 

6818 >>> df.fillna(method="ffill") 

6819 A B C D 

6820 0 NaN 2.0 NaN 0.0 

6821 1 3.0 4.0 NaN 1.0 

6822 2 3.0 4.0 NaN 1.0 

6823 3 3.0 3.0 NaN 4.0 

6824 

6825 Replace all NaN elements in column 'A', 'B', 'C', and 'D', with 0, 1, 

6826 2, and 3 respectively. 

6827 

6828 >>> values = {{"A": 0, "B": 1, "C": 2, "D": 3}} 

6829 >>> df.fillna(value=values) 

6830 A B C D 

6831 0 0.0 2.0 2.0 0.0 

6832 1 3.0 4.0 2.0 1.0 

6833 2 0.0 1.0 2.0 3.0 

6834 3 0.0 3.0 2.0 4.0 

6835 

6836 Only replace the first NaN element. 

6837 

6838 >>> df.fillna(value=values, limit=1) 

6839 A B C D 

6840 0 0.0 2.0 2.0 0.0 

6841 1 3.0 4.0 NaN 1.0 

6842 2 NaN 1.0 NaN 3.0 

6843 3 NaN 3.0 NaN 4.0 

6844 

6845 When filling using a DataFrame, replacement happens along 

6846 the same column names and same indices 

6847 

6848 >>> df2 = pd.DataFrame(np.zeros((4, 4)), columns=list("ABCE")) 

6849 >>> df.fillna(df2) 

6850 A B C D 

6851 0 0.0 2.0 0.0 0.0 

6852 1 3.0 4.0 0.0 1.0 

6853 2 0.0 0.0 0.0 NaN 

6854 3 0.0 3.0 0.0 4.0 

6855 

6856 Note that column D is not affected since it is not present in df2. 

6857 """ 

6858 inplace = validate_bool_kwarg(inplace, "inplace") 

6859 value, method = validate_fillna_kwargs(value, method) 

6860 

6861 # set the default here, so functions examining the signaure 

6862 # can detect if something was set (e.g. in groupby) (GH9221) 

6863 if axis is None: 

6864 axis = 0 

6865 axis = self._get_axis_number(axis) 

6866 

6867 if value is None: 

6868 if not self._mgr.is_single_block and axis == 1: 

6869 if inplace: 

6870 raise NotImplementedError() 

6871 result = self.T.fillna(method=method, limit=limit).T 

6872 

6873 return result 

6874 

6875 new_data = self._mgr.interpolate( 

6876 method=method, 

6877 axis=axis, 

6878 limit=limit, 

6879 inplace=inplace, 

6880 downcast=downcast, 

6881 ) 

6882 else: 

6883 if self.ndim == 1: 

6884 if isinstance(value, (dict, ABCSeries)): 

6885 if not len(value): 

6886 # test_fillna_nonscalar 

6887 if inplace: 

6888 return None 

6889 return self.copy(deep=None) 

6890 from pandas import Series 

6891 

6892 value = Series(value) 

6893 value = value.reindex(self.index, copy=False) 

6894 value = value._values 

6895 elif not is_list_like(value): 

6896 pass 

6897 else: 

6898 raise TypeError( 

6899 '"value" parameter must be a scalar, dict ' 

6900 "or Series, but you passed a " 

6901 f'"{type(value).__name__}"' 

6902 ) 

6903 

6904 new_data = self._mgr.fillna( 

6905 value=value, limit=limit, inplace=inplace, downcast=downcast 

6906 ) 

6907 

6908 elif isinstance(value, (dict, ABCSeries)): 

6909 if axis == 1: 

6910 raise NotImplementedError( 

6911 "Currently only can fill " 

6912 "with dict/Series column " 

6913 "by column" 

6914 ) 

6915 if using_copy_on_write(): 

6916 result = self.copy(deep=None) 

6917 else: 

6918 result = self if inplace else self.copy() 

6919 is_dict = isinstance(downcast, dict) 

6920 for k, v in value.items(): 

6921 if k not in result: 

6922 continue 

6923 

6924 # error: Item "None" of "Optional[Dict[Any, Any]]" has no 

6925 # attribute "get" 

6926 downcast_k = ( 

6927 downcast 

6928 if not is_dict 

6929 else downcast.get(k) # type: ignore[union-attr] 

6930 ) 

6931 

6932 res_k = result[k].fillna(v, limit=limit, downcast=downcast_k) 

6933 

6934 if not inplace: 

6935 result[k] = res_k 

6936 else: 

6937 # We can write into our existing column(s) iff dtype 

6938 # was preserved. 

6939 if isinstance(res_k, ABCSeries): 

6940 # i.e. 'k' only shows up once in self.columns 

6941 if res_k.dtype == result[k].dtype: 

6942 result.loc[:, k] = res_k 

6943 else: 

6944 # Different dtype -> no way to do inplace. 

6945 result[k] = res_k 

6946 else: 

6947 # see test_fillna_dict_inplace_nonunique_columns 

6948 locs = result.columns.get_loc(k) 

6949 if isinstance(locs, slice): 

6950 locs = np.arange(self.shape[1])[locs] 

6951 elif ( 

6952 isinstance(locs, np.ndarray) and locs.dtype.kind == "b" 

6953 ): 

6954 locs = locs.nonzero()[0] 

6955 elif not ( 

6956 isinstance(locs, np.ndarray) and locs.dtype.kind == "i" 

6957 ): 

6958 # Should never be reached, but let's cover our bases 

6959 raise NotImplementedError( 

6960 "Unexpected get_loc result, please report a bug at " 

6961 "https://github.com/pandas-dev/pandas" 

6962 ) 

6963 

6964 for i, loc in enumerate(locs): 

6965 res_loc = res_k.iloc[:, i] 

6966 target = self.iloc[:, loc] 

6967 

6968 if res_loc.dtype == target.dtype: 

6969 result.iloc[:, loc] = res_loc 

6970 else: 

6971 result.isetitem(loc, res_loc) 

6972 if inplace: 

6973 return self._update_inplace(result) 

6974 else: 

6975 return result 

6976 

6977 elif not is_list_like(value): 

6978 if axis == 1: 

6979 result = self.T.fillna(value=value, limit=limit).T 

6980 

6981 new_data = result 

6982 else: 

6983 new_data = self._mgr.fillna( 

6984 value=value, limit=limit, inplace=inplace, downcast=downcast 

6985 ) 

6986 elif isinstance(value, ABCDataFrame) and self.ndim == 2: 

6987 new_data = self.where(self.notna(), value)._mgr 

6988 else: 

6989 raise ValueError(f"invalid fill value with a {type(value)}") 

6990 

6991 result = self._constructor(new_data) 

6992 if inplace: 

6993 return self._update_inplace(result) 

6994 else: 

6995 return result.__finalize__(self, method="fillna") 

6996 

6997 @overload 

6998 def ffill( 

6999 self: NDFrameT, 

7000 *, 

7001 axis: None | Axis = ..., 

7002 inplace: Literal[False] = ..., 

7003 limit: None | int = ..., 

7004 downcast: dict | None = ..., 

7005 ) -> NDFrameT: 

7006 ... 

7007 

7008 @overload 

7009 def ffill( 

7010 self, 

7011 *, 

7012 axis: None | Axis = ..., 

7013 inplace: Literal[True], 

7014 limit: None | int = ..., 

7015 downcast: dict | None = ..., 

7016 ) -> None: 

7017 ... 

7018 

7019 @overload 

7020 def ffill( 

7021 self: NDFrameT, 

7022 *, 

7023 axis: None | Axis = ..., 

7024 inplace: bool_t = ..., 

7025 limit: None | int = ..., 

7026 downcast: dict | None = ..., 

7027 ) -> NDFrameT | None: 

7028 ... 

7029 

7030 @doc(klass=_shared_doc_kwargs["klass"]) 

7031 def ffill( 

7032 self: NDFrameT, 

7033 *, 

7034 axis: None | Axis = None, 

7035 inplace: bool_t = False, 

7036 limit: None | int = None, 

7037 downcast: dict | None = None, 

7038 ) -> NDFrameT | None: 

7039 """ 

7040 Synonym for :meth:`DataFrame.fillna` with ``method='ffill'``. 

7041 

7042 Returns 

7043 ------- 

7044 {klass} or None 

7045 Object with missing values filled or None if ``inplace=True``. 

7046 """ 

7047 return self.fillna( 

7048 method="ffill", axis=axis, inplace=inplace, limit=limit, downcast=downcast 

7049 ) 

7050 

7051 @doc(klass=_shared_doc_kwargs["klass"]) 

7052 def pad( 

7053 self: NDFrameT, 

7054 *, 

7055 axis: None | Axis = None, 

7056 inplace: bool_t = False, 

7057 limit: None | int = None, 

7058 downcast: dict | None = None, 

7059 ) -> NDFrameT | None: 

7060 """ 

7061 Synonym for :meth:`DataFrame.fillna` with ``method='ffill'``. 

7062 

7063 .. deprecated:: 2.0 

7064 

7065 {klass}.pad is deprecated. Use {klass}.ffill instead. 

7066 

7067 Returns 

7068 ------- 

7069 {klass} or None 

7070 Object with missing values filled or None if ``inplace=True``. 

7071 """ 

7072 warnings.warn( 

7073 "DataFrame.pad/Series.pad is deprecated. Use " 

7074 "DataFrame.ffill/Series.ffill instead", 

7075 FutureWarning, 

7076 stacklevel=find_stack_level(), 

7077 ) 

7078 return self.ffill(axis=axis, inplace=inplace, limit=limit, downcast=downcast) 

7079 

7080 @overload 

7081 def bfill( 

7082 self: NDFrameT, 

7083 *, 

7084 axis: None | Axis = ..., 

7085 inplace: Literal[False] = ..., 

7086 limit: None | int = ..., 

7087 downcast: dict | None = ..., 

7088 ) -> NDFrameT: 

7089 ... 

7090 

7091 @overload 

7092 def bfill( 

7093 self, 

7094 *, 

7095 axis: None | Axis = ..., 

7096 inplace: Literal[True], 

7097 limit: None | int = ..., 

7098 downcast: dict | None = ..., 

7099 ) -> None: 

7100 ... 

7101 

7102 @overload 

7103 def bfill( 

7104 self: NDFrameT, 

7105 *, 

7106 axis: None | Axis = ..., 

7107 inplace: bool_t = ..., 

7108 limit: None | int = ..., 

7109 downcast: dict | None = ..., 

7110 ) -> NDFrameT | None: 

7111 ... 

7112 

7113 @doc(klass=_shared_doc_kwargs["klass"]) 

7114 def bfill( 

7115 self: NDFrameT, 

7116 *, 

7117 axis: None | Axis = None, 

7118 inplace: bool_t = False, 

7119 limit: None | int = None, 

7120 downcast: dict | None = None, 

7121 ) -> NDFrameT | None: 

7122 """ 

7123 Synonym for :meth:`DataFrame.fillna` with ``method='bfill'``. 

7124 

7125 Returns 

7126 ------- 

7127 {klass} or None 

7128 Object with missing values filled or None if ``inplace=True``. 

7129 """ 

7130 return self.fillna( 

7131 method="bfill", axis=axis, inplace=inplace, limit=limit, downcast=downcast 

7132 ) 

7133 

7134 @doc(klass=_shared_doc_kwargs["klass"]) 

7135 def backfill( 

7136 self: NDFrameT, 

7137 *, 

7138 axis: None | Axis = None, 

7139 inplace: bool_t = False, 

7140 limit: None | int = None, 

7141 downcast: dict | None = None, 

7142 ) -> NDFrameT | None: 

7143 """ 

7144 Synonym for :meth:`DataFrame.fillna` with ``method='bfill'``. 

7145 

7146 .. deprecated:: 2.0 

7147 

7148 {klass}.backfill is deprecated. Use {klass}.bfill instead. 

7149 

7150 Returns 

7151 ------- 

7152 {klass} or None 

7153 Object with missing values filled or None if ``inplace=True``. 

7154 """ 

7155 warnings.warn( 

7156 "DataFrame.backfill/Series.backfill is deprecated. Use " 

7157 "DataFrame.bfill/Series.bfill instead", 

7158 FutureWarning, 

7159 stacklevel=find_stack_level(), 

7160 ) 

7161 return self.bfill(axis=axis, inplace=inplace, limit=limit, downcast=downcast) 

7162 

7163 @overload 

7164 def replace( 

7165 self: NDFrameT, 

7166 to_replace=..., 

7167 value=..., 

7168 *, 

7169 inplace: Literal[False] = ..., 

7170 limit: int | None = ..., 

7171 regex: bool_t = ..., 

7172 method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = ..., 

7173 ) -> NDFrameT: 

7174 ... 

7175 

7176 @overload 

7177 def replace( 

7178 self, 

7179 to_replace=..., 

7180 value=..., 

7181 *, 

7182 inplace: Literal[True], 

7183 limit: int | None = ..., 

7184 regex: bool_t = ..., 

7185 method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = ..., 

7186 ) -> None: 

7187 ... 

7188 

7189 @overload 

7190 def replace( 

7191 self: NDFrameT, 

7192 to_replace=..., 

7193 value=..., 

7194 *, 

7195 inplace: bool_t = ..., 

7196 limit: int | None = ..., 

7197 regex: bool_t = ..., 

7198 method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = ..., 

7199 ) -> NDFrameT | None: 

7200 ... 

7201 

7202 @doc( 

7203 _shared_docs["replace"], 

7204 klass=_shared_doc_kwargs["klass"], 

7205 inplace=_shared_doc_kwargs["inplace"], 

7206 replace_iloc=_shared_doc_kwargs["replace_iloc"], 

7207 ) 

7208 def replace( 

7209 self: NDFrameT, 

7210 to_replace=None, 

7211 value=lib.no_default, 

7212 *, 

7213 inplace: bool_t = False, 

7214 limit: int | None = None, 

7215 regex: bool_t = False, 

7216 method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = lib.no_default, 

7217 ) -> NDFrameT | None: 

7218 if not ( 

7219 is_scalar(to_replace) 

7220 or is_re_compilable(to_replace) 

7221 or is_list_like(to_replace) 

7222 ): 

7223 raise TypeError( 

7224 "Expecting 'to_replace' to be either a scalar, array-like, " 

7225 "dict or None, got invalid type " 

7226 f"{repr(type(to_replace).__name__)}" 

7227 ) 

7228 

7229 inplace = validate_bool_kwarg(inplace, "inplace") 

7230 if not is_bool(regex) and to_replace is not None: 

7231 raise ValueError("'to_replace' must be 'None' if 'regex' is not a bool") 

7232 

7233 if value is lib.no_default or method is not lib.no_default: 

7234 # GH#36984 if the user explicitly passes value=None we want to 

7235 # respect that. We have the corner case where the user explicitly 

7236 # passes value=None *and* a method, which we interpret as meaning 

7237 # they want the (documented) default behavior. 

7238 if method is lib.no_default: 

7239 # TODO: get this to show up as the default in the docs? 

7240 method = "pad" 

7241 

7242 # passing a single value that is scalar like 

7243 # when value is None (GH5319), for compat 

7244 if not is_dict_like(to_replace) and not is_dict_like(regex): 

7245 to_replace = [to_replace] 

7246 

7247 if isinstance(to_replace, (tuple, list)): 

7248 # TODO: Consider copy-on-write for non-replaced columns's here 

7249 if isinstance(self, ABCDataFrame): 

7250 from pandas import Series 

7251 

7252 result = self.apply( 

7253 Series._replace_single, 

7254 args=(to_replace, method, inplace, limit), 

7255 ) 

7256 if inplace: 

7257 return None 

7258 return result 

7259 return self._replace_single(to_replace, method, inplace, limit) 

7260 

7261 if not is_dict_like(to_replace): 

7262 if not is_dict_like(regex): 

7263 raise TypeError( 

7264 'If "to_replace" and "value" are both None ' 

7265 'and "to_replace" is not a list, then ' 

7266 "regex must be a mapping" 

7267 ) 

7268 to_replace = regex 

7269 regex = True 

7270 

7271 items = list(to_replace.items()) 

7272 if items: 

7273 keys, values = zip(*items) 

7274 else: 

7275 keys, values = ([], []) 

7276 

7277 are_mappings = [is_dict_like(v) for v in values] 

7278 

7279 if any(are_mappings): 

7280 if not all(are_mappings): 

7281 raise TypeError( 

7282 "If a nested mapping is passed, all values " 

7283 "of the top level mapping must be mappings" 

7284 ) 

7285 # passed a nested dict/Series 

7286 to_rep_dict = {} 

7287 value_dict = {} 

7288 

7289 for k, v in items: 

7290 keys, values = list(zip(*v.items())) or ([], []) 

7291 

7292 to_rep_dict[k] = list(keys) 

7293 value_dict[k] = list(values) 

7294 

7295 to_replace, value = to_rep_dict, value_dict 

7296 else: 

7297 to_replace, value = keys, values 

7298 

7299 return self.replace( 

7300 to_replace, value, inplace=inplace, limit=limit, regex=regex 

7301 ) 

7302 else: 

7303 # need a non-zero len on all axes 

7304 if not self.size: 

7305 if inplace: 

7306 return None 

7307 return self.copy(deep=None) 

7308 

7309 if is_dict_like(to_replace): 

7310 if is_dict_like(value): # {'A' : NA} -> {'A' : 0} 

7311 # Note: Checking below for `in foo.keys()` instead of 

7312 # `in foo` is needed for when we have a Series and not dict 

7313 mapping = { 

7314 col: (to_replace[col], value[col]) 

7315 for col in to_replace.keys() 

7316 if col in value.keys() and col in self 

7317 } 

7318 return self._replace_columnwise(mapping, inplace, regex) 

7319 

7320 # {'A': NA} -> 0 

7321 elif not is_list_like(value): 

7322 # Operate column-wise 

7323 if self.ndim == 1: 

7324 raise ValueError( 

7325 "Series.replace cannot use dict-like to_replace " 

7326 "and non-None value" 

7327 ) 

7328 mapping = { 

7329 col: (to_rep, value) for col, to_rep in to_replace.items() 

7330 } 

7331 return self._replace_columnwise(mapping, inplace, regex) 

7332 else: 

7333 raise TypeError("value argument must be scalar, dict, or Series") 

7334 

7335 elif is_list_like(to_replace): 

7336 if not is_list_like(value): 

7337 # e.g. to_replace = [NA, ''] and value is 0, 

7338 # so we replace NA with 0 and then replace '' with 0 

7339 value = [value] * len(to_replace) 

7340 

7341 # e.g. we have to_replace = [NA, ''] and value = [0, 'missing'] 

7342 if len(to_replace) != len(value): 

7343 raise ValueError( 

7344 f"Replacement lists must match in length. " 

7345 f"Expecting {len(to_replace)} got {len(value)} " 

7346 ) 

7347 new_data = self._mgr.replace_list( 

7348 src_list=to_replace, 

7349 dest_list=value, 

7350 inplace=inplace, 

7351 regex=regex, 

7352 ) 

7353 

7354 elif to_replace is None: 

7355 if not ( 

7356 is_re_compilable(regex) 

7357 or is_list_like(regex) 

7358 or is_dict_like(regex) 

7359 ): 

7360 raise TypeError( 

7361 f"'regex' must be a string or a compiled regular expression " 

7362 f"or a list or dict of strings or regular expressions, " 

7363 f"you passed a {repr(type(regex).__name__)}" 

7364 ) 

7365 return self.replace( 

7366 regex, value, inplace=inplace, limit=limit, regex=True 

7367 ) 

7368 else: 

7369 # dest iterable dict-like 

7370 if is_dict_like(value): # NA -> {'A' : 0, 'B' : -1} 

7371 # Operate column-wise 

7372 if self.ndim == 1: 

7373 raise ValueError( 

7374 "Series.replace cannot use dict-value and " 

7375 "non-None to_replace" 

7376 ) 

7377 mapping = {col: (to_replace, val) for col, val in value.items()} 

7378 return self._replace_columnwise(mapping, inplace, regex) 

7379 

7380 elif not is_list_like(value): # NA -> 0 

7381 regex = should_use_regex(regex, to_replace) 

7382 if regex: 

7383 new_data = self._mgr.replace_regex( 

7384 to_replace=to_replace, 

7385 value=value, 

7386 inplace=inplace, 

7387 ) 

7388 else: 

7389 new_data = self._mgr.replace( 

7390 to_replace=to_replace, value=value, inplace=inplace 

7391 ) 

7392 else: 

7393 raise TypeError( 

7394 f'Invalid "to_replace" type: {repr(type(to_replace).__name__)}' 

7395 ) 

7396 

7397 result = self._constructor(new_data) 

7398 if inplace: 

7399 return self._update_inplace(result) 

7400 else: 

7401 return result.__finalize__(self, method="replace") 

7402 

7403 def interpolate( 

7404 self: NDFrameT, 

7405 method: str = "linear", 

7406 *, 

7407 axis: Axis = 0, 

7408 limit: int | None = None, 

7409 inplace: bool_t = False, 

7410 limit_direction: str | None = None, 

7411 limit_area: str | None = None, 

7412 downcast: str | None = None, 

7413 **kwargs, 

7414 ) -> NDFrameT | None: 

7415 """ 

7416 Fill NaN values using an interpolation method. 

7417 

7418 Please note that only ``method='linear'`` is supported for 

7419 DataFrame/Series with a MultiIndex. 

7420 

7421 Parameters 

7422 ---------- 

7423 method : str, default 'linear' 

7424 Interpolation technique to use. One of: 

7425 

7426 * 'linear': Ignore the index and treat the values as equally 

7427 spaced. This is the only method supported on MultiIndexes. 

7428 * 'time': Works on daily and higher resolution data to interpolate 

7429 given length of interval. 

7430 * 'index', 'values': use the actual numerical values of the index. 

7431 * 'pad': Fill in NaNs using existing values. 

7432 * 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 

7433 'barycentric', 'polynomial': Passed to 

7434 `scipy.interpolate.interp1d`, whereas 'spline' is passed to 

7435 `scipy.interpolate.UnivariateSpline`. These methods use the numerical 

7436 values of the index. Both 'polynomial' and 'spline' require that 

7437 you also specify an `order` (int), e.g. 

7438 ``df.interpolate(method='polynomial', order=5)``. Note that, 

7439 `slinear` method in Pandas refers to the Scipy first order `spline` 

7440 instead of Pandas first order `spline`. 

7441 * 'krogh', 'piecewise_polynomial', 'spline', 'pchip', 'akima', 

7442 'cubicspline': Wrappers around the SciPy interpolation methods of 

7443 similar names. See `Notes`. 

7444 * 'from_derivatives': Refers to 

7445 `scipy.interpolate.BPoly.from_derivatives` which 

7446 replaces 'piecewise_polynomial' interpolation method in 

7447 scipy 0.18. 

7448 

7449 axis : {{0 or 'index', 1 or 'columns', None}}, default None 

7450 Axis to interpolate along. For `Series` this parameter is unused 

7451 and defaults to 0. 

7452 limit : int, optional 

7453 Maximum number of consecutive NaNs to fill. Must be greater than 

7454 0. 

7455 inplace : bool, default False 

7456 Update the data in place if possible. 

7457 limit_direction : {{'forward', 'backward', 'both'}}, Optional 

7458 Consecutive NaNs will be filled in this direction. 

7459 

7460 If limit is specified: 

7461 * If 'method' is 'pad' or 'ffill', 'limit_direction' must be 'forward'. 

7462 * If 'method' is 'backfill' or 'bfill', 'limit_direction' must be 

7463 'backwards'. 

7464 

7465 If 'limit' is not specified: 

7466 * If 'method' is 'backfill' or 'bfill', the default is 'backward' 

7467 * else the default is 'forward' 

7468 

7469 .. versionchanged:: 1.1.0 

7470 raises ValueError if `limit_direction` is 'forward' or 'both' and 

7471 method is 'backfill' or 'bfill'. 

7472 raises ValueError if `limit_direction` is 'backward' or 'both' and 

7473 method is 'pad' or 'ffill'. 

7474 

7475 limit_area : {{`None`, 'inside', 'outside'}}, default None 

7476 If limit is specified, consecutive NaNs will be filled with this 

7477 restriction. 

7478 

7479 * ``None``: No fill restriction. 

7480 * 'inside': Only fill NaNs surrounded by valid values 

7481 (interpolate). 

7482 * 'outside': Only fill NaNs outside valid values (extrapolate). 

7483 

7484 downcast : optional, 'infer' or None, defaults to None 

7485 Downcast dtypes if possible. 

7486 ``**kwargs`` : optional 

7487 Keyword arguments to pass on to the interpolating function. 

7488 

7489 Returns 

7490 ------- 

7491 Series or DataFrame or None 

7492 Returns the same object type as the caller, interpolated at 

7493 some or all ``NaN`` values or None if ``inplace=True``. 

7494 

7495 See Also 

7496 -------- 

7497 fillna : Fill missing values using different methods. 

7498 scipy.interpolate.Akima1DInterpolator : Piecewise cubic polynomials 

7499 (Akima interpolator). 

7500 scipy.interpolate.BPoly.from_derivatives : Piecewise polynomial in the 

7501 Bernstein basis. 

7502 scipy.interpolate.interp1d : Interpolate a 1-D function. 

7503 scipy.interpolate.KroghInterpolator : Interpolate polynomial (Krogh 

7504 interpolator). 

7505 scipy.interpolate.PchipInterpolator : PCHIP 1-d monotonic cubic 

7506 interpolation. 

7507 scipy.interpolate.CubicSpline : Cubic spline data interpolator. 

7508 

7509 Notes 

7510 ----- 

7511 The 'krogh', 'piecewise_polynomial', 'spline', 'pchip' and 'akima' 

7512 methods are wrappers around the respective SciPy implementations of 

7513 similar names. These use the actual numerical values of the index. 

7514 For more information on their behavior, see the 

7515 `SciPy documentation 

7516 <https://docs.scipy.org/doc/scipy/reference/interpolate.html#univariate-interpolation>`__. 

7517 

7518 Examples 

7519 -------- 

7520 Filling in ``NaN`` in a :class:`~pandas.Series` via linear 

7521 interpolation. 

7522 

7523 >>> s = pd.Series([0, 1, np.nan, 3]) 

7524 >>> s 

7525 0 0.0 

7526 1 1.0 

7527 2 NaN 

7528 3 3.0 

7529 dtype: float64 

7530 >>> s.interpolate() 

7531 0 0.0 

7532 1 1.0 

7533 2 2.0 

7534 3 3.0 

7535 dtype: float64 

7536 

7537 Filling in ``NaN`` in a Series by padding, but filling at most two 

7538 consecutive ``NaN`` at a time. 

7539 

7540 >>> s = pd.Series([np.nan, "single_one", np.nan, 

7541 ... "fill_two_more", np.nan, np.nan, np.nan, 

7542 ... 4.71, np.nan]) 

7543 >>> s 

7544 0 NaN 

7545 1 single_one 

7546 2 NaN 

7547 3 fill_two_more 

7548 4 NaN 

7549 5 NaN 

7550 6 NaN 

7551 7 4.71 

7552 8 NaN 

7553 dtype: object 

7554 >>> s.interpolate(method='pad', limit=2) 

7555 0 NaN 

7556 1 single_one 

7557 2 single_one 

7558 3 fill_two_more 

7559 4 fill_two_more 

7560 5 fill_two_more 

7561 6 NaN 

7562 7 4.71 

7563 8 4.71 

7564 dtype: object 

7565 

7566 Filling in ``NaN`` in a Series via polynomial interpolation or splines: 

7567 Both 'polynomial' and 'spline' methods require that you also specify 

7568 an ``order`` (int). 

7569 

7570 >>> s = pd.Series([0, 2, np.nan, 8]) 

7571 >>> s.interpolate(method='polynomial', order=2) 

7572 0 0.000000 

7573 1 2.000000 

7574 2 4.666667 

7575 3 8.000000 

7576 dtype: float64 

7577 

7578 Fill the DataFrame forward (that is, going down) along each column 

7579 using linear interpolation. 

7580 

7581 Note how the last entry in column 'a' is interpolated differently, 

7582 because there is no entry after it to use for interpolation. 

7583 Note how the first entry in column 'b' remains ``NaN``, because there 

7584 is no entry before it to use for interpolation. 

7585 

7586 >>> df = pd.DataFrame([(0.0, np.nan, -1.0, 1.0), 

7587 ... (np.nan, 2.0, np.nan, np.nan), 

7588 ... (2.0, 3.0, np.nan, 9.0), 

7589 ... (np.nan, 4.0, -4.0, 16.0)], 

7590 ... columns=list('abcd')) 

7591 >>> df 

7592 a b c d 

7593 0 0.0 NaN -1.0 1.0 

7594 1 NaN 2.0 NaN NaN 

7595 2 2.0 3.0 NaN 9.0 

7596 3 NaN 4.0 -4.0 16.0 

7597 >>> df.interpolate(method='linear', limit_direction='forward', axis=0) 

7598 a b c d 

7599 0 0.0 NaN -1.0 1.0 

7600 1 1.0 2.0 -2.0 5.0 

7601 2 2.0 3.0 -3.0 9.0 

7602 3 2.0 4.0 -4.0 16.0 

7603 

7604 Using polynomial interpolation. 

7605 

7606 >>> df['d'].interpolate(method='polynomial', order=2) 

7607 0 1.0 

7608 1 4.0 

7609 2 9.0 

7610 3 16.0 

7611 Name: d, dtype: float64 

7612 """ 

7613 inplace = validate_bool_kwarg(inplace, "inplace") 

7614 

7615 axis = self._get_axis_number(axis) 

7616 

7617 fillna_methods = ["ffill", "bfill", "pad", "backfill"] 

7618 should_transpose = axis == 1 and method not in fillna_methods 

7619 

7620 obj = self.T if should_transpose else self 

7621 

7622 if obj.empty: 

7623 return self.copy() 

7624 

7625 if method not in fillna_methods: 

7626 axis = self._info_axis_number 

7627 

7628 if isinstance(obj.index, MultiIndex) and method != "linear": 

7629 raise ValueError( 

7630 "Only `method=linear` interpolation is supported on MultiIndexes." 

7631 ) 

7632 

7633 # Set `limit_direction` depending on `method` 

7634 if limit_direction is None: 

7635 limit_direction = ( 

7636 "backward" if method in ("backfill", "bfill") else "forward" 

7637 ) 

7638 else: 

7639 if method in ("pad", "ffill") and limit_direction != "forward": 

7640 raise ValueError( 

7641 f"`limit_direction` must be 'forward' for method `{method}`" 

7642 ) 

7643 if method in ("backfill", "bfill") and limit_direction != "backward": 

7644 raise ValueError( 

7645 f"`limit_direction` must be 'backward' for method `{method}`" 

7646 ) 

7647 

7648 if obj.ndim == 2 and np.all(obj.dtypes == np.dtype("object")): 

7649 raise TypeError( 

7650 "Cannot interpolate with all object-dtype columns " 

7651 "in the DataFrame. Try setting at least one " 

7652 "column to a numeric dtype." 

7653 ) 

7654 

7655 # create/use the index 

7656 if method == "linear": 

7657 # prior default 

7658 index = Index(np.arange(len(obj.index))) 

7659 else: 

7660 index = obj.index 

7661 methods = {"index", "values", "nearest", "time"} 

7662 is_numeric_or_datetime = ( 

7663 is_numeric_dtype(index.dtype) 

7664 or is_datetime64_any_dtype(index.dtype) 

7665 or is_timedelta64_dtype(index.dtype) 

7666 ) 

7667 if method not in methods and not is_numeric_or_datetime: 

7668 raise ValueError( 

7669 "Index column must be numeric or datetime type when " 

7670 f"using {method} method other than linear. " 

7671 "Try setting a numeric or datetime index column before " 

7672 "interpolating." 

7673 ) 

7674 

7675 if isna(index).any(): 

7676 raise NotImplementedError( 

7677 "Interpolation with NaNs in the index " 

7678 "has not been implemented. Try filling " 

7679 "those NaNs before interpolating." 

7680 ) 

7681 new_data = obj._mgr.interpolate( 

7682 method=method, 

7683 axis=axis, 

7684 index=index, 

7685 limit=limit, 

7686 limit_direction=limit_direction, 

7687 limit_area=limit_area, 

7688 inplace=inplace, 

7689 downcast=downcast, 

7690 **kwargs, 

7691 ) 

7692 

7693 result = self._constructor(new_data) 

7694 if should_transpose: 

7695 result = result.T 

7696 if inplace: 

7697 return self._update_inplace(result) 

7698 else: 

7699 return result.__finalize__(self, method="interpolate") 

7700 

7701 # ---------------------------------------------------------------------- 

7702 # Timeseries methods Methods 

7703 

7704 @final 

7705 def asof(self, where, subset=None): 

7706 """ 

7707 Return the last row(s) without any NaNs before `where`. 

7708 

7709 The last row (for each element in `where`, if list) without any 

7710 NaN is taken. 

7711 In case of a :class:`~pandas.DataFrame`, the last row without NaN 

7712 considering only the subset of columns (if not `None`) 

7713 

7714 If there is no good value, NaN is returned for a Series or 

7715 a Series of NaN values for a DataFrame 

7716 

7717 Parameters 

7718 ---------- 

7719 where : date or array-like of dates 

7720 Date(s) before which the last row(s) are returned. 

7721 subset : str or array-like of str, default `None` 

7722 For DataFrame, if not `None`, only use these columns to 

7723 check for NaNs. 

7724 

7725 Returns 

7726 ------- 

7727 scalar, Series, or DataFrame 

7728 

7729 The return can be: 

7730 

7731 * scalar : when `self` is a Series and `where` is a scalar 

7732 * Series: when `self` is a Series and `where` is an array-like, 

7733 or when `self` is a DataFrame and `where` is a scalar 

7734 * DataFrame : when `self` is a DataFrame and `where` is an 

7735 array-like 

7736 

7737 Return scalar, Series, or DataFrame. 

7738 

7739 See Also 

7740 -------- 

7741 merge_asof : Perform an asof merge. Similar to left join. 

7742 

7743 Notes 

7744 ----- 

7745 Dates are assumed to be sorted. Raises if this is not the case. 

7746 

7747 Examples 

7748 -------- 

7749 A Series and a scalar `where`. 

7750 

7751 >>> s = pd.Series([1, 2, np.nan, 4], index=[10, 20, 30, 40]) 

7752 >>> s 

7753 10 1.0 

7754 20 2.0 

7755 30 NaN 

7756 40 4.0 

7757 dtype: float64 

7758 

7759 >>> s.asof(20) 

7760 2.0 

7761 

7762 For a sequence `where`, a Series is returned. The first value is 

7763 NaN, because the first element of `where` is before the first 

7764 index value. 

7765 

7766 >>> s.asof([5, 20]) 

7767 5 NaN 

7768 20 2.0 

7769 dtype: float64 

7770 

7771 Missing values are not considered. The following is ``2.0``, not 

7772 NaN, even though NaN is at the index location for ``30``. 

7773 

7774 >>> s.asof(30) 

7775 2.0 

7776 

7777 Take all columns into consideration 

7778 

7779 >>> df = pd.DataFrame({'a': [10, 20, 30, 40, 50], 

7780 ... 'b': [None, None, None, None, 500]}, 

7781 ... index=pd.DatetimeIndex(['2018-02-27 09:01:00', 

7782 ... '2018-02-27 09:02:00', 

7783 ... '2018-02-27 09:03:00', 

7784 ... '2018-02-27 09:04:00', 

7785 ... '2018-02-27 09:05:00'])) 

7786 >>> df.asof(pd.DatetimeIndex(['2018-02-27 09:03:30', 

7787 ... '2018-02-27 09:04:30'])) 

7788 a b 

7789 2018-02-27 09:03:30 NaN NaN 

7790 2018-02-27 09:04:30 NaN NaN 

7791 

7792 Take a single column into consideration 

7793 

7794 >>> df.asof(pd.DatetimeIndex(['2018-02-27 09:03:30', 

7795 ... '2018-02-27 09:04:30']), 

7796 ... subset=['a']) 

7797 a b 

7798 2018-02-27 09:03:30 30 NaN 

7799 2018-02-27 09:04:30 40 NaN 

7800 """ 

7801 if isinstance(where, str): 

7802 where = Timestamp(where) 

7803 

7804 if not self.index.is_monotonic_increasing: 

7805 raise ValueError("asof requires a sorted index") 

7806 

7807 is_series = isinstance(self, ABCSeries) 

7808 if is_series: 

7809 if subset is not None: 

7810 raise ValueError("subset is not valid for Series") 

7811 else: 

7812 if subset is None: 

7813 subset = self.columns 

7814 if not is_list_like(subset): 

7815 subset = [subset] 

7816 

7817 is_list = is_list_like(where) 

7818 if not is_list: 

7819 start = self.index[0] 

7820 if isinstance(self.index, PeriodIndex): 

7821 where = Period(where, freq=self.index.freq) 

7822 

7823 if where < start: 

7824 if not is_series: 

7825 return self._constructor_sliced( 

7826 index=self.columns, name=where, dtype=np.float64 

7827 ) 

7828 return np.nan 

7829 

7830 # It's always much faster to use a *while* loop here for 

7831 # Series than pre-computing all the NAs. However a 

7832 # *while* loop is extremely expensive for DataFrame 

7833 # so we later pre-compute all the NAs and use the same 

7834 # code path whether *where* is a scalar or list. 

7835 # See PR: https://github.com/pandas-dev/pandas/pull/14476 

7836 if is_series: 

7837 loc = self.index.searchsorted(where, side="right") 

7838 if loc > 0: 

7839 loc -= 1 

7840 

7841 values = self._values 

7842 while loc > 0 and isna(values[loc]): 

7843 loc -= 1 

7844 return values[loc] 

7845 

7846 if not isinstance(where, Index): 

7847 where = Index(where) if is_list else Index([where]) 

7848 

7849 nulls = self.isna() if is_series else self[subset].isna().any(axis=1) 

7850 if nulls.all(): 

7851 if is_series: 

7852 self = cast("Series", self) 

7853 return self._constructor(np.nan, index=where, name=self.name) 

7854 elif is_list: 

7855 self = cast("DataFrame", self) 

7856 return self._constructor(np.nan, index=where, columns=self.columns) 

7857 else: 

7858 self = cast("DataFrame", self) 

7859 return self._constructor_sliced( 

7860 np.nan, index=self.columns, name=where[0] 

7861 ) 

7862 

7863 locs = self.index.asof_locs(where, ~(nulls._values)) 

7864 

7865 # mask the missing 

7866 missing = locs == -1 

7867 data = self.take(locs) 

7868 data.index = where 

7869 if missing.any(): 

7870 # GH#16063 only do this setting when necessary, otherwise 

7871 # we'd cast e.g. bools to floats 

7872 data.loc[missing] = np.nan 

7873 return data if is_list else data.iloc[-1] 

7874 

7875 # ---------------------------------------------------------------------- 

7876 # Action Methods 

7877 

7878 @doc(klass=_shared_doc_kwargs["klass"]) 

7879 def isna(self: NDFrameT) -> NDFrameT: 

7880 """ 

7881 Detect missing values. 

7882 

7883 Return a boolean same-sized object indicating if the values are NA. 

7884 NA values, such as None or :attr:`numpy.NaN`, gets mapped to True 

7885 values. 

7886 Everything else gets mapped to False values. Characters such as empty 

7887 strings ``''`` or :attr:`numpy.inf` are not considered NA values 

7888 (unless you set ``pandas.options.mode.use_inf_as_na = True``). 

7889 

7890 Returns 

7891 ------- 

7892 {klass} 

7893 Mask of bool values for each element in {klass} that 

7894 indicates whether an element is an NA value. 

7895 

7896 See Also 

7897 -------- 

7898 {klass}.isnull : Alias of isna. 

7899 {klass}.notna : Boolean inverse of isna. 

7900 {klass}.dropna : Omit axes labels with missing values. 

7901 isna : Top-level isna. 

7902 

7903 Examples 

7904 -------- 

7905 Show which entries in a DataFrame are NA. 

7906 

7907 >>> df = pd.DataFrame(dict(age=[5, 6, np.NaN], 

7908 ... born=[pd.NaT, pd.Timestamp('1939-05-27'), 

7909 ... pd.Timestamp('1940-04-25')], 

7910 ... name=['Alfred', 'Batman', ''], 

7911 ... toy=[None, 'Batmobile', 'Joker'])) 

7912 >>> df 

7913 age born name toy 

7914 0 5.0 NaT Alfred None 

7915 1 6.0 1939-05-27 Batman Batmobile 

7916 2 NaN 1940-04-25 Joker 

7917 

7918 >>> df.isna() 

7919 age born name toy 

7920 0 False True False True 

7921 1 False False False False 

7922 2 True False False False 

7923 

7924 Show which entries in a Series are NA. 

7925 

7926 >>> ser = pd.Series([5, 6, np.NaN]) 

7927 >>> ser 

7928 0 5.0 

7929 1 6.0 

7930 2 NaN 

7931 dtype: float64 

7932 

7933 >>> ser.isna() 

7934 0 False 

7935 1 False 

7936 2 True 

7937 dtype: bool 

7938 """ 

7939 return isna(self).__finalize__(self, method="isna") 

7940 

7941 @doc(isna, klass=_shared_doc_kwargs["klass"]) 

7942 def isnull(self: NDFrameT) -> NDFrameT: 

7943 return isna(self).__finalize__(self, method="isnull") 

7944 

7945 @doc(klass=_shared_doc_kwargs["klass"]) 

7946 def notna(self: NDFrameT) -> NDFrameT: 

7947 """ 

7948 Detect existing (non-missing) values. 

7949 

7950 Return a boolean same-sized object indicating if the values are not NA. 

7951 Non-missing values get mapped to True. Characters such as empty 

7952 strings ``''`` or :attr:`numpy.inf` are not considered NA values 

7953 (unless you set ``pandas.options.mode.use_inf_as_na = True``). 

7954 NA values, such as None or :attr:`numpy.NaN`, get mapped to False 

7955 values. 

7956 

7957 Returns 

7958 ------- 

7959 {klass} 

7960 Mask of bool values for each element in {klass} that 

7961 indicates whether an element is not an NA value. 

7962 

7963 See Also 

7964 -------- 

7965 {klass}.notnull : Alias of notna. 

7966 {klass}.isna : Boolean inverse of notna. 

7967 {klass}.dropna : Omit axes labels with missing values. 

7968 notna : Top-level notna. 

7969 

7970 Examples 

7971 -------- 

7972 Show which entries in a DataFrame are not NA. 

7973 

7974 >>> df = pd.DataFrame(dict(age=[5, 6, np.NaN], 

7975 ... born=[pd.NaT, pd.Timestamp('1939-05-27'), 

7976 ... pd.Timestamp('1940-04-25')], 

7977 ... name=['Alfred', 'Batman', ''], 

7978 ... toy=[None, 'Batmobile', 'Joker'])) 

7979 >>> df 

7980 age born name toy 

7981 0 5.0 NaT Alfred None 

7982 1 6.0 1939-05-27 Batman Batmobile 

7983 2 NaN 1940-04-25 Joker 

7984 

7985 >>> df.notna() 

7986 age born name toy 

7987 0 True False True False 

7988 1 True True True True 

7989 2 False True True True 

7990 

7991 Show which entries in a Series are not NA. 

7992 

7993 >>> ser = pd.Series([5, 6, np.NaN]) 

7994 >>> ser 

7995 0 5.0 

7996 1 6.0 

7997 2 NaN 

7998 dtype: float64 

7999 

8000 >>> ser.notna() 

8001 0 True 

8002 1 True 

8003 2 False 

8004 dtype: bool 

8005 """ 

8006 return notna(self).__finalize__(self, method="notna") 

8007 

8008 @doc(notna, klass=_shared_doc_kwargs["klass"]) 

8009 def notnull(self: NDFrameT) -> NDFrameT: 

8010 return notna(self).__finalize__(self, method="notnull") 

8011 

8012 @final 

8013 def _clip_with_scalar(self, lower, upper, inplace: bool_t = False): 

8014 if (lower is not None and np.any(isna(lower))) or ( 

8015 upper is not None and np.any(isna(upper)) 

8016 ): 

8017 raise ValueError("Cannot use an NA value as a clip threshold") 

8018 

8019 result = self 

8020 mask = isna(self._values) 

8021 

8022 with np.errstate(all="ignore"): 

8023 if upper is not None: 

8024 subset = self <= upper 

8025 result = result.where(subset, upper, axis=None, inplace=False) 

8026 if lower is not None: 

8027 subset = self >= lower 

8028 result = result.where(subset, lower, axis=None, inplace=False) 

8029 

8030 if np.any(mask): 

8031 result[mask] = np.nan 

8032 

8033 if inplace: 

8034 return self._update_inplace(result) 

8035 else: 

8036 return result 

8037 

8038 @final 

8039 def _clip_with_one_bound(self, threshold, method, axis, inplace): 

8040 if axis is not None: 

8041 axis = self._get_axis_number(axis) 

8042 

8043 # method is self.le for upper bound and self.ge for lower bound 

8044 if is_scalar(threshold) and is_number(threshold): 

8045 if method.__name__ == "le": 

8046 return self._clip_with_scalar(None, threshold, inplace=inplace) 

8047 return self._clip_with_scalar(threshold, None, inplace=inplace) 

8048 

8049 # GH #15390 

8050 # In order for where method to work, the threshold must 

8051 # be transformed to NDFrame from other array like structure. 

8052 if (not isinstance(threshold, ABCSeries)) and is_list_like(threshold): 

8053 if isinstance(self, ABCSeries): 

8054 threshold = self._constructor(threshold, index=self.index) 

8055 else: 

8056 threshold = align_method_FRAME(self, threshold, axis, flex=None)[1] 

8057 

8058 # GH 40420 

8059 # Treat missing thresholds as no bounds, not clipping the values 

8060 if is_list_like(threshold): 

8061 fill_value = np.inf if method.__name__ == "le" else -np.inf 

8062 threshold_inf = threshold.fillna(fill_value) 

8063 else: 

8064 threshold_inf = threshold 

8065 

8066 subset = method(threshold_inf, axis=axis) | isna(self) 

8067 

8068 # GH 40420 

8069 return self.where(subset, threshold, axis=axis, inplace=inplace) 

8070 

8071 def clip( 

8072 self: NDFrameT, 

8073 lower=None, 

8074 upper=None, 

8075 *, 

8076 axis: Axis | None = None, 

8077 inplace: bool_t = False, 

8078 **kwargs, 

8079 ) -> NDFrameT | None: 

8080 """ 

8081 Trim values at input threshold(s). 

8082 

8083 Assigns values outside boundary to boundary values. Thresholds 

8084 can be singular values or array like, and in the latter case 

8085 the clipping is performed element-wise in the specified axis. 

8086 

8087 Parameters 

8088 ---------- 

8089 lower : float or array-like, default None 

8090 Minimum threshold value. All values below this 

8091 threshold will be set to it. A missing 

8092 threshold (e.g `NA`) will not clip the value. 

8093 upper : float or array-like, default None 

8094 Maximum threshold value. All values above this 

8095 threshold will be set to it. A missing 

8096 threshold (e.g `NA`) will not clip the value. 

8097 axis : {{0 or 'index', 1 or 'columns', None}}, default None 

8098 Align object with lower and upper along the given axis. 

8099 For `Series` this parameter is unused and defaults to `None`. 

8100 inplace : bool, default False 

8101 Whether to perform the operation in place on the data. 

8102 *args, **kwargs 

8103 Additional keywords have no effect but might be accepted 

8104 for compatibility with numpy. 

8105 

8106 Returns 

8107 ------- 

8108 Series or DataFrame or None 

8109 Same type as calling object with the values outside the 

8110 clip boundaries replaced or None if ``inplace=True``. 

8111 

8112 See Also 

8113 -------- 

8114 Series.clip : Trim values at input threshold in series. 

8115 DataFrame.clip : Trim values at input threshold in dataframe. 

8116 numpy.clip : Clip (limit) the values in an array. 

8117 

8118 Examples 

8119 -------- 

8120 >>> data = {'col_0': [9, -3, 0, -1, 5], 'col_1': [-2, -7, 6, 8, -5]} 

8121 >>> df = pd.DataFrame(data) 

8122 >>> df 

8123 col_0 col_1 

8124 0 9 -2 

8125 1 -3 -7 

8126 2 0 6 

8127 3 -1 8 

8128 4 5 -5 

8129 

8130 Clips per column using lower and upper thresholds: 

8131 

8132 >>> df.clip(-4, 6) 

8133 col_0 col_1 

8134 0 6 -2 

8135 1 -3 -4 

8136 2 0 6 

8137 3 -1 6 

8138 4 5 -4 

8139 

8140 Clips using specific lower and upper thresholds per column element: 

8141 

8142 >>> t = pd.Series([2, -4, -1, 6, 3]) 

8143 >>> t 

8144 0 2 

8145 1 -4 

8146 2 -1 

8147 3 6 

8148 4 3 

8149 dtype: int64 

8150 

8151 >>> df.clip(t, t + 4, axis=0) 

8152 col_0 col_1 

8153 0 6 2 

8154 1 -3 -4 

8155 2 0 3 

8156 3 6 8 

8157 4 5 3 

8158 

8159 Clips using specific lower threshold per column element, with missing values: 

8160 

8161 >>> t = pd.Series([2, -4, np.NaN, 6, 3]) 

8162 >>> t 

8163 0 2.0 

8164 1 -4.0 

8165 2 NaN 

8166 3 6.0 

8167 4 3.0 

8168 dtype: float64 

8169 

8170 >>> df.clip(t, axis=0) 

8171 col_0 col_1 

8172 0 9 2 

8173 1 -3 -4 

8174 2 0 6 

8175 3 6 8 

8176 4 5 3 

8177 """ 

8178 inplace = validate_bool_kwarg(inplace, "inplace") 

8179 

8180 axis = nv.validate_clip_with_axis(axis, (), kwargs) 

8181 if axis is not None: 

8182 axis = self._get_axis_number(axis) 

8183 

8184 # GH 17276 

8185 # numpy doesn't like NaN as a clip value 

8186 # so ignore 

8187 # GH 19992 

8188 # numpy doesn't drop a list-like bound containing NaN 

8189 isna_lower = isna(lower) 

8190 if not is_list_like(lower): 

8191 if np.any(isna_lower): 

8192 lower = None 

8193 elif np.all(isna_lower): 

8194 lower = None 

8195 isna_upper = isna(upper) 

8196 if not is_list_like(upper): 

8197 if np.any(isna_upper): 

8198 upper = None 

8199 elif np.all(isna_upper): 

8200 upper = None 

8201 

8202 # GH 2747 (arguments were reversed) 

8203 if ( 

8204 lower is not None 

8205 and upper is not None 

8206 and is_scalar(lower) 

8207 and is_scalar(upper) 

8208 ): 

8209 lower, upper = min(lower, upper), max(lower, upper) 

8210 

8211 # fast-path for scalars 

8212 if (lower is None or (is_scalar(lower) and is_number(lower))) and ( 

8213 upper is None or (is_scalar(upper) and is_number(upper)) 

8214 ): 

8215 return self._clip_with_scalar(lower, upper, inplace=inplace) 

8216 

8217 result = self 

8218 if lower is not None: 

8219 result = result._clip_with_one_bound( 

8220 lower, method=self.ge, axis=axis, inplace=inplace 

8221 ) 

8222 if upper is not None: 

8223 if inplace: 

8224 result = self 

8225 result = result._clip_with_one_bound( 

8226 upper, method=self.le, axis=axis, inplace=inplace 

8227 ) 

8228 

8229 return result 

8230 

8231 @doc(**_shared_doc_kwargs) 

8232 def asfreq( 

8233 self: NDFrameT, 

8234 freq: Frequency, 

8235 method: FillnaOptions | None = None, 

8236 how: str | None = None, 

8237 normalize: bool_t = False, 

8238 fill_value: Hashable = None, 

8239 ) -> NDFrameT: 

8240 """ 

8241 Convert time series to specified frequency. 

8242 

8243 Returns the original data conformed to a new index with the specified 

8244 frequency. 

8245 

8246 If the index of this {klass} is a :class:`~pandas.PeriodIndex`, the new index 

8247 is the result of transforming the original index with 

8248 :meth:`PeriodIndex.asfreq <pandas.PeriodIndex.asfreq>` (so the original index 

8249 will map one-to-one to the new index). 

8250 

8251 Otherwise, the new index will be equivalent to ``pd.date_range(start, end, 

8252 freq=freq)`` where ``start`` and ``end`` are, respectively, the first and 

8253 last entries in the original index (see :func:`pandas.date_range`). The 

8254 values corresponding to any timesteps in the new index which were not present 

8255 in the original index will be null (``NaN``), unless a method for filling 

8256 such unknowns is provided (see the ``method`` parameter below). 

8257 

8258 The :meth:`resample` method is more appropriate if an operation on each group of 

8259 timesteps (such as an aggregate) is necessary to represent the data at the new 

8260 frequency. 

8261 

8262 Parameters 

8263 ---------- 

8264 freq : DateOffset or str 

8265 Frequency DateOffset or string. 

8266 method : {{'backfill'/'bfill', 'pad'/'ffill'}}, default None 

8267 Method to use for filling holes in reindexed Series (note this 

8268 does not fill NaNs that already were present): 

8269 

8270 * 'pad' / 'ffill': propagate last valid observation forward to next 

8271 valid 

8272 * 'backfill' / 'bfill': use NEXT valid observation to fill. 

8273 how : {{'start', 'end'}}, default end 

8274 For PeriodIndex only (see PeriodIndex.asfreq). 

8275 normalize : bool, default False 

8276 Whether to reset output index to midnight. 

8277 fill_value : scalar, optional 

8278 Value to use for missing values, applied during upsampling (note 

8279 this does not fill NaNs that already were present). 

8280 

8281 Returns 

8282 ------- 

8283 {klass} 

8284 {klass} object reindexed to the specified frequency. 

8285 

8286 See Also 

8287 -------- 

8288 reindex : Conform DataFrame to new index with optional filling logic. 

8289 

8290 Notes 

8291 ----- 

8292 To learn more about the frequency strings, please see `this link 

8293 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__. 

8294 

8295 Examples 

8296 -------- 

8297 Start by creating a series with 4 one minute timestamps. 

8298 

8299 >>> index = pd.date_range('1/1/2000', periods=4, freq='T') 

8300 >>> series = pd.Series([0.0, None, 2.0, 3.0], index=index) 

8301 >>> df = pd.DataFrame({{'s': series}}) 

8302 >>> df 

8303 s 

8304 2000-01-01 00:00:00 0.0 

8305 2000-01-01 00:01:00 NaN 

8306 2000-01-01 00:02:00 2.0 

8307 2000-01-01 00:03:00 3.0 

8308 

8309 Upsample the series into 30 second bins. 

8310 

8311 >>> df.asfreq(freq='30S') 

8312 s 

8313 2000-01-01 00:00:00 0.0 

8314 2000-01-01 00:00:30 NaN 

8315 2000-01-01 00:01:00 NaN 

8316 2000-01-01 00:01:30 NaN 

8317 2000-01-01 00:02:00 2.0 

8318 2000-01-01 00:02:30 NaN 

8319 2000-01-01 00:03:00 3.0 

8320 

8321 Upsample again, providing a ``fill value``. 

8322 

8323 >>> df.asfreq(freq='30S', fill_value=9.0) 

8324 s 

8325 2000-01-01 00:00:00 0.0 

8326 2000-01-01 00:00:30 9.0 

8327 2000-01-01 00:01:00 NaN 

8328 2000-01-01 00:01:30 9.0 

8329 2000-01-01 00:02:00 2.0 

8330 2000-01-01 00:02:30 9.0 

8331 2000-01-01 00:03:00 3.0 

8332 

8333 Upsample again, providing a ``method``. 

8334 

8335 >>> df.asfreq(freq='30S', method='bfill') 

8336 s 

8337 2000-01-01 00:00:00 0.0 

8338 2000-01-01 00:00:30 NaN 

8339 2000-01-01 00:01:00 NaN 

8340 2000-01-01 00:01:30 2.0 

8341 2000-01-01 00:02:00 2.0 

8342 2000-01-01 00:02:30 3.0 

8343 2000-01-01 00:03:00 3.0 

8344 """ 

8345 from pandas.core.resample import asfreq 

8346 

8347 return asfreq( 

8348 self, 

8349 freq, 

8350 method=method, 

8351 how=how, 

8352 normalize=normalize, 

8353 fill_value=fill_value, 

8354 ) 

8355 

8356 @final 

8357 def at_time( 

8358 self: NDFrameT, time, asof: bool_t = False, axis: Axis | None = None 

8359 ) -> NDFrameT: 

8360 """ 

8361 Select values at particular time of day (e.g., 9:30AM). 

8362 

8363 Parameters 

8364 ---------- 

8365 time : datetime.time or str 

8366 The values to select. 

8367 axis : {0 or 'index', 1 or 'columns'}, default 0 

8368 For `Series` this parameter is unused and defaults to 0. 

8369 

8370 Returns 

8371 ------- 

8372 Series or DataFrame 

8373 

8374 Raises 

8375 ------ 

8376 TypeError 

8377 If the index is not a :class:`DatetimeIndex` 

8378 

8379 See Also 

8380 -------- 

8381 between_time : Select values between particular times of the day. 

8382 first : Select initial periods of time series based on a date offset. 

8383 last : Select final periods of time series based on a date offset. 

8384 DatetimeIndex.indexer_at_time : Get just the index locations for 

8385 values at particular time of the day. 

8386 

8387 Examples 

8388 -------- 

8389 >>> i = pd.date_range('2018-04-09', periods=4, freq='12H') 

8390 >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i) 

8391 >>> ts 

8392 A 

8393 2018-04-09 00:00:00 1 

8394 2018-04-09 12:00:00 2 

8395 2018-04-10 00:00:00 3 

8396 2018-04-10 12:00:00 4 

8397 

8398 >>> ts.at_time('12:00') 

8399 A 

8400 2018-04-09 12:00:00 2 

8401 2018-04-10 12:00:00 4 

8402 """ 

8403 if axis is None: 

8404 axis = self._stat_axis_number 

8405 axis = self._get_axis_number(axis) 

8406 

8407 index = self._get_axis(axis) 

8408 

8409 if not isinstance(index, DatetimeIndex): 

8410 raise TypeError("Index must be DatetimeIndex") 

8411 

8412 indexer = index.indexer_at_time(time, asof=asof) 

8413 return self._take_with_is_copy(indexer, axis=axis) 

8414 

8415 @final 

8416 def between_time( 

8417 self: NDFrameT, 

8418 start_time, 

8419 end_time, 

8420 inclusive: IntervalClosedType = "both", 

8421 axis: Axis | None = None, 

8422 ) -> NDFrameT: 

8423 """ 

8424 Select values between particular times of the day (e.g., 9:00-9:30 AM). 

8425 

8426 By setting ``start_time`` to be later than ``end_time``, 

8427 you can get the times that are *not* between the two times. 

8428 

8429 Parameters 

8430 ---------- 

8431 start_time : datetime.time or str 

8432 Initial time as a time filter limit. 

8433 end_time : datetime.time or str 

8434 End time as a time filter limit. 

8435 inclusive : {"both", "neither", "left", "right"}, default "both" 

8436 Include boundaries; whether to set each bound as closed or open. 

8437 axis : {0 or 'index', 1 or 'columns'}, default 0 

8438 Determine range time on index or columns value. 

8439 For `Series` this parameter is unused and defaults to 0. 

8440 

8441 Returns 

8442 ------- 

8443 Series or DataFrame 

8444 Data from the original object filtered to the specified dates range. 

8445 

8446 Raises 

8447 ------ 

8448 TypeError 

8449 If the index is not a :class:`DatetimeIndex` 

8450 

8451 See Also 

8452 -------- 

8453 at_time : Select values at a particular time of the day. 

8454 first : Select initial periods of time series based on a date offset. 

8455 last : Select final periods of time series based on a date offset. 

8456 DatetimeIndex.indexer_between_time : Get just the index locations for 

8457 values between particular times of the day. 

8458 

8459 Examples 

8460 -------- 

8461 >>> i = pd.date_range('2018-04-09', periods=4, freq='1D20min') 

8462 >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i) 

8463 >>> ts 

8464 A 

8465 2018-04-09 00:00:00 1 

8466 2018-04-10 00:20:00 2 

8467 2018-04-11 00:40:00 3 

8468 2018-04-12 01:00:00 4 

8469 

8470 >>> ts.between_time('0:15', '0:45') 

8471 A 

8472 2018-04-10 00:20:00 2 

8473 2018-04-11 00:40:00 3 

8474 

8475 You get the times that are *not* between two times by setting 

8476 ``start_time`` later than ``end_time``: 

8477 

8478 >>> ts.between_time('0:45', '0:15') 

8479 A 

8480 2018-04-09 00:00:00 1 

8481 2018-04-12 01:00:00 4 

8482 """ 

8483 if axis is None: 

8484 axis = self._stat_axis_number 

8485 axis = self._get_axis_number(axis) 

8486 

8487 index = self._get_axis(axis) 

8488 if not isinstance(index, DatetimeIndex): 

8489 raise TypeError("Index must be DatetimeIndex") 

8490 

8491 left_inclusive, right_inclusive = validate_inclusive(inclusive) 

8492 indexer = index.indexer_between_time( 

8493 start_time, 

8494 end_time, 

8495 include_start=left_inclusive, 

8496 include_end=right_inclusive, 

8497 ) 

8498 return self._take_with_is_copy(indexer, axis=axis) 

8499 

8500 @doc(**_shared_doc_kwargs) 

8501 def resample( 

8502 self, 

8503 rule, 

8504 axis: Axis = 0, 

8505 closed: str | None = None, 

8506 label: str | None = None, 

8507 convention: str = "start", 

8508 kind: str | None = None, 

8509 on: Level = None, 

8510 level: Level = None, 

8511 origin: str | TimestampConvertibleTypes = "start_day", 

8512 offset: TimedeltaConvertibleTypes | None = None, 

8513 group_keys: bool_t = False, 

8514 ) -> Resampler: 

8515 """ 

8516 Resample time-series data. 

8517 

8518 Convenience method for frequency conversion and resampling of time series. 

8519 The object must have a datetime-like index (`DatetimeIndex`, `PeriodIndex`, 

8520 or `TimedeltaIndex`), or the caller must pass the label of a datetime-like 

8521 series/index to the ``on``/``level`` keyword parameter. 

8522 

8523 Parameters 

8524 ---------- 

8525 rule : DateOffset, Timedelta or str 

8526 The offset string or object representing target conversion. 

8527 axis : {{0 or 'index', 1 or 'columns'}}, default 0 

8528 Which axis to use for up- or down-sampling. For `Series` this parameter 

8529 is unused and defaults to 0. Must be 

8530 `DatetimeIndex`, `TimedeltaIndex` or `PeriodIndex`. 

8531 closed : {{'right', 'left'}}, default None 

8532 Which side of bin interval is closed. The default is 'left' 

8533 for all frequency offsets except for 'M', 'A', 'Q', 'BM', 

8534 'BA', 'BQ', and 'W' which all have a default of 'right'. 

8535 label : {{'right', 'left'}}, default None 

8536 Which bin edge label to label bucket with. The default is 'left' 

8537 for all frequency offsets except for 'M', 'A', 'Q', 'BM', 

8538 'BA', 'BQ', and 'W' which all have a default of 'right'. 

8539 convention : {{'start', 'end', 's', 'e'}}, default 'start' 

8540 For `PeriodIndex` only, controls whether to use the start or 

8541 end of `rule`. 

8542 kind : {{'timestamp', 'period'}}, optional, default None 

8543 Pass 'timestamp' to convert the resulting index to a 

8544 `DateTimeIndex` or 'period' to convert it to a `PeriodIndex`. 

8545 By default the input representation is retained. 

8546 

8547 on : str, optional 

8548 For a DataFrame, column to use instead of index for resampling. 

8549 Column must be datetime-like. 

8550 level : str or int, optional 

8551 For a MultiIndex, level (name or number) to use for 

8552 resampling. `level` must be datetime-like. 

8553 origin : Timestamp or str, default 'start_day' 

8554 The timestamp on which to adjust the grouping. The timezone of origin 

8555 must match the timezone of the index. 

8556 If string, must be one of the following: 

8557 

8558 - 'epoch': `origin` is 1970-01-01 

8559 - 'start': `origin` is the first value of the timeseries 

8560 - 'start_day': `origin` is the first day at midnight of the timeseries 

8561 

8562 .. versionadded:: 1.1.0 

8563 

8564 - 'end': `origin` is the last value of the timeseries 

8565 - 'end_day': `origin` is the ceiling midnight of the last day 

8566 

8567 .. versionadded:: 1.3.0 

8568 

8569 offset : Timedelta or str, default is None 

8570 An offset timedelta added to the origin. 

8571 

8572 .. versionadded:: 1.1.0 

8573 

8574 group_keys : bool, default False 

8575 Whether to include the group keys in the result index when using 

8576 ``.apply()`` on the resampled object. 

8577 

8578 .. versionadded:: 1.5.0 

8579 

8580 Not specifying ``group_keys`` will retain values-dependent behavior 

8581 from pandas 1.4 and earlier (see :ref:`pandas 1.5.0 Release notes 

8582 <whatsnew_150.enhancements.resample_group_keys>` for examples). 

8583 

8584 .. versionchanged:: 2.0.0 

8585 

8586 ``group_keys`` now defaults to ``False``. 

8587 

8588 Returns 

8589 ------- 

8590 pandas.core.Resampler 

8591 :class:`~pandas.core.Resampler` object. 

8592 

8593 See Also 

8594 -------- 

8595 Series.resample : Resample a Series. 

8596 DataFrame.resample : Resample a DataFrame. 

8597 groupby : Group {klass} by mapping, function, label, or list of labels. 

8598 asfreq : Reindex a {klass} with the given frequency without grouping. 

8599 

8600 Notes 

8601 ----- 

8602 See the `user guide 

8603 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#resampling>`__ 

8604 for more. 

8605 

8606 To learn more about the offset strings, please see `this link 

8607 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects>`__. 

8608 

8609 Examples 

8610 -------- 

8611 Start by creating a series with 9 one minute timestamps. 

8612 

8613 >>> index = pd.date_range('1/1/2000', periods=9, freq='T') 

8614 >>> series = pd.Series(range(9), index=index) 

8615 >>> series 

8616 2000-01-01 00:00:00 0 

8617 2000-01-01 00:01:00 1 

8618 2000-01-01 00:02:00 2 

8619 2000-01-01 00:03:00 3 

8620 2000-01-01 00:04:00 4 

8621 2000-01-01 00:05:00 5 

8622 2000-01-01 00:06:00 6 

8623 2000-01-01 00:07:00 7 

8624 2000-01-01 00:08:00 8 

8625 Freq: T, dtype: int64 

8626 

8627 Downsample the series into 3 minute bins and sum the values 

8628 of the timestamps falling into a bin. 

8629 

8630 >>> series.resample('3T').sum() 

8631 2000-01-01 00:00:00 3 

8632 2000-01-01 00:03:00 12 

8633 2000-01-01 00:06:00 21 

8634 Freq: 3T, dtype: int64 

8635 

8636 Downsample the series into 3 minute bins as above, but label each 

8637 bin using the right edge instead of the left. Please note that the 

8638 value in the bucket used as the label is not included in the bucket, 

8639 which it labels. For example, in the original series the 

8640 bucket ``2000-01-01 00:03:00`` contains the value 3, but the summed 

8641 value in the resampled bucket with the label ``2000-01-01 00:03:00`` 

8642 does not include 3 (if it did, the summed value would be 6, not 3). 

8643 To include this value close the right side of the bin interval as 

8644 illustrated in the example below this one. 

8645 

8646 >>> series.resample('3T', label='right').sum() 

8647 2000-01-01 00:03:00 3 

8648 2000-01-01 00:06:00 12 

8649 2000-01-01 00:09:00 21 

8650 Freq: 3T, dtype: int64 

8651 

8652 Downsample the series into 3 minute bins as above, but close the right 

8653 side of the bin interval. 

8654 

8655 >>> series.resample('3T', label='right', closed='right').sum() 

8656 2000-01-01 00:00:00 0 

8657 2000-01-01 00:03:00 6 

8658 2000-01-01 00:06:00 15 

8659 2000-01-01 00:09:00 15 

8660 Freq: 3T, dtype: int64 

8661 

8662 Upsample the series into 30 second bins. 

8663 

8664 >>> series.resample('30S').asfreq()[0:5] # Select first 5 rows 

8665 2000-01-01 00:00:00 0.0 

8666 2000-01-01 00:00:30 NaN 

8667 2000-01-01 00:01:00 1.0 

8668 2000-01-01 00:01:30 NaN 

8669 2000-01-01 00:02:00 2.0 

8670 Freq: 30S, dtype: float64 

8671 

8672 Upsample the series into 30 second bins and fill the ``NaN`` 

8673 values using the ``ffill`` method. 

8674 

8675 >>> series.resample('30S').ffill()[0:5] 

8676 2000-01-01 00:00:00 0 

8677 2000-01-01 00:00:30 0 

8678 2000-01-01 00:01:00 1 

8679 2000-01-01 00:01:30 1 

8680 2000-01-01 00:02:00 2 

8681 Freq: 30S, dtype: int64 

8682 

8683 Upsample the series into 30 second bins and fill the 

8684 ``NaN`` values using the ``bfill`` method. 

8685 

8686 >>> series.resample('30S').bfill()[0:5] 

8687 2000-01-01 00:00:00 0 

8688 2000-01-01 00:00:30 1 

8689 2000-01-01 00:01:00 1 

8690 2000-01-01 00:01:30 2 

8691 2000-01-01 00:02:00 2 

8692 Freq: 30S, dtype: int64 

8693 

8694 Pass a custom function via ``apply`` 

8695 

8696 >>> def custom_resampler(arraylike): 

8697 ... return np.sum(arraylike) + 5 

8698 ... 

8699 >>> series.resample('3T').apply(custom_resampler) 

8700 2000-01-01 00:00:00 8 

8701 2000-01-01 00:03:00 17 

8702 2000-01-01 00:06:00 26 

8703 Freq: 3T, dtype: int64 

8704 

8705 For a Series with a PeriodIndex, the keyword `convention` can be 

8706 used to control whether to use the start or end of `rule`. 

8707 

8708 Resample a year by quarter using 'start' `convention`. Values are 

8709 assigned to the first quarter of the period. 

8710 

8711 >>> s = pd.Series([1, 2], index=pd.period_range('2012-01-01', 

8712 ... freq='A', 

8713 ... periods=2)) 

8714 >>> s 

8715 2012 1 

8716 2013 2 

8717 Freq: A-DEC, dtype: int64 

8718 >>> s.resample('Q', convention='start').asfreq() 

8719 2012Q1 1.0 

8720 2012Q2 NaN 

8721 2012Q3 NaN 

8722 2012Q4 NaN 

8723 2013Q1 2.0 

8724 2013Q2 NaN 

8725 2013Q3 NaN 

8726 2013Q4 NaN 

8727 Freq: Q-DEC, dtype: float64 

8728 

8729 Resample quarters by month using 'end' `convention`. Values are 

8730 assigned to the last month of the period. 

8731 

8732 >>> q = pd.Series([1, 2, 3, 4], index=pd.period_range('2018-01-01', 

8733 ... freq='Q', 

8734 ... periods=4)) 

8735 >>> q 

8736 2018Q1 1 

8737 2018Q2 2 

8738 2018Q3 3 

8739 2018Q4 4 

8740 Freq: Q-DEC, dtype: int64 

8741 >>> q.resample('M', convention='end').asfreq() 

8742 2018-03 1.0 

8743 2018-04 NaN 

8744 2018-05 NaN 

8745 2018-06 2.0 

8746 2018-07 NaN 

8747 2018-08 NaN 

8748 2018-09 3.0 

8749 2018-10 NaN 

8750 2018-11 NaN 

8751 2018-12 4.0 

8752 Freq: M, dtype: float64 

8753 

8754 For DataFrame objects, the keyword `on` can be used to specify the 

8755 column instead of the index for resampling. 

8756 

8757 >>> d = {{'price': [10, 11, 9, 13, 14, 18, 17, 19], 

8758 ... 'volume': [50, 60, 40, 100, 50, 100, 40, 50]}} 

8759 >>> df = pd.DataFrame(d) 

8760 >>> df['week_starting'] = pd.date_range('01/01/2018', 

8761 ... periods=8, 

8762 ... freq='W') 

8763 >>> df 

8764 price volume week_starting 

8765 0 10 50 2018-01-07 

8766 1 11 60 2018-01-14 

8767 2 9 40 2018-01-21 

8768 3 13 100 2018-01-28 

8769 4 14 50 2018-02-04 

8770 5 18 100 2018-02-11 

8771 6 17 40 2018-02-18 

8772 7 19 50 2018-02-25 

8773 >>> df.resample('M', on='week_starting').mean() 

8774 price volume 

8775 week_starting 

8776 2018-01-31 10.75 62.5 

8777 2018-02-28 17.00 60.0 

8778 

8779 For a DataFrame with MultiIndex, the keyword `level` can be used to 

8780 specify on which level the resampling needs to take place. 

8781 

8782 >>> days = pd.date_range('1/1/2000', periods=4, freq='D') 

8783 >>> d2 = {{'price': [10, 11, 9, 13, 14, 18, 17, 19], 

8784 ... 'volume': [50, 60, 40, 100, 50, 100, 40, 50]}} 

8785 >>> df2 = pd.DataFrame( 

8786 ... d2, 

8787 ... index=pd.MultiIndex.from_product( 

8788 ... [days, ['morning', 'afternoon']] 

8789 ... ) 

8790 ... ) 

8791 >>> df2 

8792 price volume 

8793 2000-01-01 morning 10 50 

8794 afternoon 11 60 

8795 2000-01-02 morning 9 40 

8796 afternoon 13 100 

8797 2000-01-03 morning 14 50 

8798 afternoon 18 100 

8799 2000-01-04 morning 17 40 

8800 afternoon 19 50 

8801 >>> df2.resample('D', level=0).sum() 

8802 price volume 

8803 2000-01-01 21 110 

8804 2000-01-02 22 140 

8805 2000-01-03 32 150 

8806 2000-01-04 36 90 

8807 

8808 If you want to adjust the start of the bins based on a fixed timestamp: 

8809 

8810 >>> start, end = '2000-10-01 23:30:00', '2000-10-02 00:30:00' 

8811 >>> rng = pd.date_range(start, end, freq='7min') 

8812 >>> ts = pd.Series(np.arange(len(rng)) * 3, index=rng) 

8813 >>> ts 

8814 2000-10-01 23:30:00 0 

8815 2000-10-01 23:37:00 3 

8816 2000-10-01 23:44:00 6 

8817 2000-10-01 23:51:00 9 

8818 2000-10-01 23:58:00 12 

8819 2000-10-02 00:05:00 15 

8820 2000-10-02 00:12:00 18 

8821 2000-10-02 00:19:00 21 

8822 2000-10-02 00:26:00 24 

8823 Freq: 7T, dtype: int64 

8824 

8825 >>> ts.resample('17min').sum() 

8826 2000-10-01 23:14:00 0 

8827 2000-10-01 23:31:00 9 

8828 2000-10-01 23:48:00 21 

8829 2000-10-02 00:05:00 54 

8830 2000-10-02 00:22:00 24 

8831 Freq: 17T, dtype: int64 

8832 

8833 >>> ts.resample('17min', origin='epoch').sum() 

8834 2000-10-01 23:18:00 0 

8835 2000-10-01 23:35:00 18 

8836 2000-10-01 23:52:00 27 

8837 2000-10-02 00:09:00 39 

8838 2000-10-02 00:26:00 24 

8839 Freq: 17T, dtype: int64 

8840 

8841 >>> ts.resample('17min', origin='2000-01-01').sum() 

8842 2000-10-01 23:24:00 3 

8843 2000-10-01 23:41:00 15 

8844 2000-10-01 23:58:00 45 

8845 2000-10-02 00:15:00 45 

8846 Freq: 17T, dtype: int64 

8847 

8848 If you want to adjust the start of the bins with an `offset` Timedelta, the two 

8849 following lines are equivalent: 

8850 

8851 >>> ts.resample('17min', origin='start').sum() 

8852 2000-10-01 23:30:00 9 

8853 2000-10-01 23:47:00 21 

8854 2000-10-02 00:04:00 54 

8855 2000-10-02 00:21:00 24 

8856 Freq: 17T, dtype: int64 

8857 

8858 >>> ts.resample('17min', offset='23h30min').sum() 

8859 2000-10-01 23:30:00 9 

8860 2000-10-01 23:47:00 21 

8861 2000-10-02 00:04:00 54 

8862 2000-10-02 00:21:00 24 

8863 Freq: 17T, dtype: int64 

8864 

8865 If you want to take the largest Timestamp as the end of the bins: 

8866 

8867 >>> ts.resample('17min', origin='end').sum() 

8868 2000-10-01 23:35:00 0 

8869 2000-10-01 23:52:00 18 

8870 2000-10-02 00:09:00 27 

8871 2000-10-02 00:26:00 63 

8872 Freq: 17T, dtype: int64 

8873 

8874 In contrast with the `start_day`, you can use `end_day` to take the ceiling 

8875 midnight of the largest Timestamp as the end of the bins and drop the bins 

8876 not containing data: 

8877 

8878 >>> ts.resample('17min', origin='end_day').sum() 

8879 2000-10-01 23:38:00 3 

8880 2000-10-01 23:55:00 15 

8881 2000-10-02 00:12:00 45 

8882 2000-10-02 00:29:00 45 

8883 Freq: 17T, dtype: int64 

8884 """ 

8885 from pandas.core.resample import get_resampler 

8886 

8887 axis = self._get_axis_number(axis) 

8888 return get_resampler( 

8889 cast("Series | DataFrame", self), 

8890 freq=rule, 

8891 label=label, 

8892 closed=closed, 

8893 axis=axis, 

8894 kind=kind, 

8895 convention=convention, 

8896 key=on, 

8897 level=level, 

8898 origin=origin, 

8899 offset=offset, 

8900 group_keys=group_keys, 

8901 ) 

8902 

8903 @final 

8904 def first(self: NDFrameT, offset) -> NDFrameT: 

8905 """ 

8906 Select initial periods of time series data based on a date offset. 

8907 

8908 For a DataFrame with a sorted DatetimeIndex, this function can 

8909 select the first few rows based on a date offset. 

8910 

8911 Parameters 

8912 ---------- 

8913 offset : str, DateOffset or dateutil.relativedelta 

8914 The offset length of the data that will be selected. For instance, 

8915 '1M' will display all the rows having their index within the first month. 

8916 

8917 Returns 

8918 ------- 

8919 Series or DataFrame 

8920 A subset of the caller. 

8921 

8922 Raises 

8923 ------ 

8924 TypeError 

8925 If the index is not a :class:`DatetimeIndex` 

8926 

8927 See Also 

8928 -------- 

8929 last : Select final periods of time series based on a date offset. 

8930 at_time : Select values at a particular time of the day. 

8931 between_time : Select values between particular times of the day. 

8932 

8933 Examples 

8934 -------- 

8935 >>> i = pd.date_range('2018-04-09', periods=4, freq='2D') 

8936 >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i) 

8937 >>> ts 

8938 A 

8939 2018-04-09 1 

8940 2018-04-11 2 

8941 2018-04-13 3 

8942 2018-04-15 4 

8943 

8944 Get the rows for the first 3 days: 

8945 

8946 >>> ts.first('3D') 

8947 A 

8948 2018-04-09 1 

8949 2018-04-11 2 

8950 

8951 Notice the data for 3 first calendar days were returned, not the first 

8952 3 days observed in the dataset, and therefore data for 2018-04-13 was 

8953 not returned. 

8954 """ 

8955 if not isinstance(self.index, DatetimeIndex): 

8956 raise TypeError("'first' only supports a DatetimeIndex index") 

8957 

8958 if len(self.index) == 0: 

8959 return self.copy(deep=False) 

8960 

8961 offset = to_offset(offset) 

8962 if not isinstance(offset, Tick) and offset.is_on_offset(self.index[0]): 

8963 # GH#29623 if first value is end of period, remove offset with n = 1 

8964 # before adding the real offset 

8965 end_date = end = self.index[0] - offset.base + offset 

8966 else: 

8967 end_date = end = self.index[0] + offset 

8968 

8969 # Tick-like, e.g. 3 weeks 

8970 if isinstance(offset, Tick) and end_date in self.index: 

8971 end = self.index.searchsorted(end_date, side="left") 

8972 return self.iloc[:end] 

8973 

8974 return self.loc[:end] 

8975 

8976 @final 

8977 def last(self: NDFrameT, offset) -> NDFrameT: 

8978 """ 

8979 Select final periods of time series data based on a date offset. 

8980 

8981 For a DataFrame with a sorted DatetimeIndex, this function 

8982 selects the last few rows based on a date offset. 

8983 

8984 Parameters 

8985 ---------- 

8986 offset : str, DateOffset, dateutil.relativedelta 

8987 The offset length of the data that will be selected. For instance, 

8988 '3D' will display all the rows having their index within the last 3 days. 

8989 

8990 Returns 

8991 ------- 

8992 Series or DataFrame 

8993 A subset of the caller. 

8994 

8995 Raises 

8996 ------ 

8997 TypeError 

8998 If the index is not a :class:`DatetimeIndex` 

8999 

9000 See Also 

9001 -------- 

9002 first : Select initial periods of time series based on a date offset. 

9003 at_time : Select values at a particular time of the day. 

9004 between_time : Select values between particular times of the day. 

9005 

9006 Examples 

9007 -------- 

9008 >>> i = pd.date_range('2018-04-09', periods=4, freq='2D') 

9009 >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i) 

9010 >>> ts 

9011 A 

9012 2018-04-09 1 

9013 2018-04-11 2 

9014 2018-04-13 3 

9015 2018-04-15 4 

9016 

9017 Get the rows for the last 3 days: 

9018 

9019 >>> ts.last('3D') 

9020 A 

9021 2018-04-13 3 

9022 2018-04-15 4 

9023 

9024 Notice the data for 3 last calendar days were returned, not the last 

9025 3 observed days in the dataset, and therefore data for 2018-04-11 was 

9026 not returned. 

9027 """ 

9028 if not isinstance(self.index, DatetimeIndex): 

9029 raise TypeError("'last' only supports a DatetimeIndex index") 

9030 

9031 if len(self.index) == 0: 

9032 return self.copy(deep=False) 

9033 

9034 offset = to_offset(offset) 

9035 

9036 start_date = self.index[-1] - offset 

9037 start = self.index.searchsorted(start_date, side="right") 

9038 return self.iloc[start:] 

9039 

9040 @final 

9041 def rank( 

9042 self: NDFrameT, 

9043 axis: Axis = 0, 

9044 method: str = "average", 

9045 numeric_only: bool_t = False, 

9046 na_option: str = "keep", 

9047 ascending: bool_t = True, 

9048 pct: bool_t = False, 

9049 ) -> NDFrameT: 

9050 """ 

9051 Compute numerical data ranks (1 through n) along axis. 

9052 

9053 By default, equal values are assigned a rank that is the average of the 

9054 ranks of those values. 

9055 

9056 Parameters 

9057 ---------- 

9058 axis : {0 or 'index', 1 or 'columns'}, default 0 

9059 Index to direct ranking. 

9060 For `Series` this parameter is unused and defaults to 0. 

9061 method : {'average', 'min', 'max', 'first', 'dense'}, default 'average' 

9062 How to rank the group of records that have the same value (i.e. ties): 

9063 

9064 * average: average rank of the group 

9065 * min: lowest rank in the group 

9066 * max: highest rank in the group 

9067 * first: ranks assigned in order they appear in the array 

9068 * dense: like 'min', but rank always increases by 1 between groups. 

9069 

9070 numeric_only : bool, default False 

9071 For DataFrame objects, rank only numeric columns if set to True. 

9072 

9073 .. versionchanged:: 2.0.0 

9074 The default value of ``numeric_only`` is now ``False``. 

9075 

9076 na_option : {'keep', 'top', 'bottom'}, default 'keep' 

9077 How to rank NaN values: 

9078 

9079 * keep: assign NaN rank to NaN values 

9080 * top: assign lowest rank to NaN values 

9081 * bottom: assign highest rank to NaN values 

9082 

9083 ascending : bool, default True 

9084 Whether or not the elements should be ranked in ascending order. 

9085 pct : bool, default False 

9086 Whether or not to display the returned rankings in percentile 

9087 form. 

9088 

9089 Returns 

9090 ------- 

9091 same type as caller 

9092 Return a Series or DataFrame with data ranks as values. 

9093 

9094 See Also 

9095 -------- 

9096 core.groupby.DataFrameGroupBy.rank : Rank of values within each group. 

9097 core.groupby.SeriesGroupBy.rank : Rank of values within each group. 

9098 

9099 Examples 

9100 -------- 

9101 >>> df = pd.DataFrame(data={'Animal': ['cat', 'penguin', 'dog', 

9102 ... 'spider', 'snake'], 

9103 ... 'Number_legs': [4, 2, 4, 8, np.nan]}) 

9104 >>> df 

9105 Animal Number_legs 

9106 0 cat 4.0 

9107 1 penguin 2.0 

9108 2 dog 4.0 

9109 3 spider 8.0 

9110 4 snake NaN 

9111 

9112 Ties are assigned the mean of the ranks (by default) for the group. 

9113 

9114 >>> s = pd.Series(range(5), index=list("abcde")) 

9115 >>> s["d"] = s["b"] 

9116 >>> s.rank() 

9117 a 1.0 

9118 b 2.5 

9119 c 4.0 

9120 d 2.5 

9121 e 5.0 

9122 dtype: float64 

9123 

9124 The following example shows how the method behaves with the above 

9125 parameters: 

9126 

9127 * default_rank: this is the default behaviour obtained without using 

9128 any parameter. 

9129 * max_rank: setting ``method = 'max'`` the records that have the 

9130 same values are ranked using the highest rank (e.g.: since 'cat' 

9131 and 'dog' are both in the 2nd and 3rd position, rank 3 is assigned.) 

9132 * NA_bottom: choosing ``na_option = 'bottom'``, if there are records 

9133 with NaN values they are placed at the bottom of the ranking. 

9134 * pct_rank: when setting ``pct = True``, the ranking is expressed as 

9135 percentile rank. 

9136 

9137 >>> df['default_rank'] = df['Number_legs'].rank() 

9138 >>> df['max_rank'] = df['Number_legs'].rank(method='max') 

9139 >>> df['NA_bottom'] = df['Number_legs'].rank(na_option='bottom') 

9140 >>> df['pct_rank'] = df['Number_legs'].rank(pct=True) 

9141 >>> df 

9142 Animal Number_legs default_rank max_rank NA_bottom pct_rank 

9143 0 cat 4.0 2.5 3.0 2.5 0.625 

9144 1 penguin 2.0 1.0 1.0 1.0 0.250 

9145 2 dog 4.0 2.5 3.0 2.5 0.625 

9146 3 spider 8.0 4.0 4.0 4.0 1.000 

9147 4 snake NaN NaN NaN 5.0 NaN 

9148 """ 

9149 axis_int = self._get_axis_number(axis) 

9150 

9151 if na_option not in {"keep", "top", "bottom"}: 

9152 msg = "na_option must be one of 'keep', 'top', or 'bottom'" 

9153 raise ValueError(msg) 

9154 

9155 def ranker(data): 

9156 if data.ndim == 2: 

9157 # i.e. DataFrame, we cast to ndarray 

9158 values = data.values 

9159 else: 

9160 # i.e. Series, can dispatch to EA 

9161 values = data._values 

9162 

9163 if isinstance(values, ExtensionArray): 

9164 ranks = values._rank( 

9165 axis=axis_int, 

9166 method=method, 

9167 ascending=ascending, 

9168 na_option=na_option, 

9169 pct=pct, 

9170 ) 

9171 else: 

9172 ranks = algos.rank( 

9173 values, 

9174 axis=axis_int, 

9175 method=method, 

9176 ascending=ascending, 

9177 na_option=na_option, 

9178 pct=pct, 

9179 ) 

9180 

9181 ranks_obj = self._constructor(ranks, **data._construct_axes_dict()) 

9182 return ranks_obj.__finalize__(self, method="rank") 

9183 

9184 if numeric_only: 

9185 if self.ndim == 1 and not is_numeric_dtype(self.dtype): 

9186 # GH#47500 

9187 raise TypeError( 

9188 "Series.rank does not allow numeric_only=True with " 

9189 "non-numeric dtype." 

9190 ) 

9191 data = self._get_numeric_data() 

9192 else: 

9193 data = self 

9194 

9195 return ranker(data) 

9196 

9197 @doc(_shared_docs["compare"], klass=_shared_doc_kwargs["klass"]) 

9198 def compare( 

9199 self, 

9200 other, 

9201 align_axis: Axis = 1, 

9202 keep_shape: bool_t = False, 

9203 keep_equal: bool_t = False, 

9204 result_names: Suffixes = ("self", "other"), 

9205 ): 

9206 if type(self) is not type(other): 

9207 cls_self, cls_other = type(self).__name__, type(other).__name__ 

9208 raise TypeError( 

9209 f"can only compare '{cls_self}' (not '{cls_other}') with '{cls_self}'" 

9210 ) 

9211 

9212 mask = ~((self == other) | (self.isna() & other.isna())) 

9213 mask.fillna(True, inplace=True) 

9214 

9215 if not keep_equal: 

9216 self = self.where(mask) 

9217 other = other.where(mask) 

9218 

9219 if not keep_shape: 

9220 if isinstance(self, ABCDataFrame): 

9221 cmask = mask.any() 

9222 rmask = mask.any(axis=1) 

9223 self = self.loc[rmask, cmask] 

9224 other = other.loc[rmask, cmask] 

9225 else: 

9226 self = self[mask] 

9227 other = other[mask] 

9228 if not isinstance(result_names, tuple): 

9229 raise TypeError( 

9230 f"Passing 'result_names' as a {type(result_names)} is not " 

9231 "supported. Provide 'result_names' as a tuple instead." 

9232 ) 

9233 

9234 if align_axis in (1, "columns"): # This is needed for Series 

9235 axis = 1 

9236 else: 

9237 axis = self._get_axis_number(align_axis) 

9238 

9239 diff = concat([self, other], axis=axis, keys=result_names) 

9240 

9241 if axis >= self.ndim: 

9242 # No need to reorganize data if stacking on new axis 

9243 # This currently applies for stacking two Series on columns 

9244 return diff 

9245 

9246 ax = diff._get_axis(axis) 

9247 ax_names = np.array(ax.names) 

9248 

9249 # set index names to positions to avoid confusion 

9250 ax.names = np.arange(len(ax_names)) 

9251 

9252 # bring self-other to inner level 

9253 order = list(range(1, ax.nlevels)) + [0] 

9254 if isinstance(diff, ABCDataFrame): 

9255 diff = diff.reorder_levels(order, axis=axis) 

9256 else: 

9257 diff = diff.reorder_levels(order) 

9258 

9259 # restore the index names in order 

9260 diff._get_axis(axis=axis).names = ax_names[order] 

9261 

9262 # reorder axis to keep things organized 

9263 indices = ( 

9264 np.arange(diff.shape[axis]).reshape([2, diff.shape[axis] // 2]).T.flatten() 

9265 ) 

9266 diff = diff.take(indices, axis=axis) 

9267 

9268 return diff 

9269 

9270 @doc(**_shared_doc_kwargs) 

9271 def align( 

9272 self: NDFrameT, 

9273 other: NDFrameT, 

9274 join: AlignJoin = "outer", 

9275 axis: Axis | None = None, 

9276 level: Level = None, 

9277 copy: bool_t | None = None, 

9278 fill_value: Hashable = None, 

9279 method: FillnaOptions | None = None, 

9280 limit: int | None = None, 

9281 fill_axis: Axis = 0, 

9282 broadcast_axis: Axis | None = None, 

9283 ) -> NDFrameT: 

9284 """ 

9285 Align two objects on their axes with the specified join method. 

9286 

9287 Join method is specified for each axis Index. 

9288 

9289 Parameters 

9290 ---------- 

9291 other : DataFrame or Series 

9292 join : {{'outer', 'inner', 'left', 'right'}}, default 'outer' 

9293 axis : allowed axis of the other object, default None 

9294 Align on index (0), columns (1), or both (None). 

9295 level : int or level name, default None 

9296 Broadcast across a level, matching Index values on the 

9297 passed MultiIndex level. 

9298 copy : bool, default True 

9299 Always returns new objects. If copy=False and no reindexing is 

9300 required then original objects are returned. 

9301 fill_value : scalar, default np.NaN 

9302 Value to use for missing values. Defaults to NaN, but can be any 

9303 "compatible" value. 

9304 method : {{'backfill', 'bfill', 'pad', 'ffill', None}}, default None 

9305 Method to use for filling holes in reindexed Series: 

9306 

9307 - pad / ffill: propagate last valid observation forward to next valid. 

9308 - backfill / bfill: use NEXT valid observation to fill gap. 

9309 

9310 limit : int, default None 

9311 If method is specified, this is the maximum number of consecutive 

9312 NaN values to forward/backward fill. In other words, if there is 

9313 a gap with more than this number of consecutive NaNs, it will only 

9314 be partially filled. If method is not specified, this is the 

9315 maximum number of entries along the entire axis where NaNs will be 

9316 filled. Must be greater than 0 if not None. 

9317 fill_axis : {axes_single_arg}, default 0 

9318 Filling axis, method and limit. 

9319 broadcast_axis : {axes_single_arg}, default None 

9320 Broadcast values along this axis, if aligning two objects of 

9321 different dimensions. 

9322 

9323 Returns 

9324 ------- 

9325 tuple of ({klass}, type of other) 

9326 Aligned objects. 

9327 

9328 Examples 

9329 -------- 

9330 >>> df = pd.DataFrame( 

9331 ... [[1, 2, 3, 4], [6, 7, 8, 9]], columns=["D", "B", "E", "A"], index=[1, 2] 

9332 ... ) 

9333 >>> other = pd.DataFrame( 

9334 ... [[10, 20, 30, 40], [60, 70, 80, 90], [600, 700, 800, 900]], 

9335 ... columns=["A", "B", "C", "D"], 

9336 ... index=[2, 3, 4], 

9337 ... ) 

9338 >>> df 

9339 D B E A 

9340 1 1 2 3 4 

9341 2 6 7 8 9 

9342 >>> other 

9343 A B C D 

9344 2 10 20 30 40 

9345 3 60 70 80 90 

9346 4 600 700 800 900 

9347 

9348 Align on columns: 

9349 

9350 >>> left, right = df.align(other, join="outer", axis=1) 

9351 >>> left 

9352 A B C D E 

9353 1 4 2 NaN 1 3 

9354 2 9 7 NaN 6 8 

9355 >>> right 

9356 A B C D E 

9357 2 10 20 30 40 NaN 

9358 3 60 70 80 90 NaN 

9359 4 600 700 800 900 NaN 

9360 

9361 We can also align on the index: 

9362 

9363 >>> left, right = df.align(other, join="outer", axis=0) 

9364 >>> left 

9365 D B E A 

9366 1 1.0 2.0 3.0 4.0 

9367 2 6.0 7.0 8.0 9.0 

9368 3 NaN NaN NaN NaN 

9369 4 NaN NaN NaN NaN 

9370 >>> right 

9371 A B C D 

9372 1 NaN NaN NaN NaN 

9373 2 10.0 20.0 30.0 40.0 

9374 3 60.0 70.0 80.0 90.0 

9375 4 600.0 700.0 800.0 900.0 

9376 

9377 Finally, the default `axis=None` will align on both index and columns: 

9378 

9379 >>> left, right = df.align(other, join="outer", axis=None) 

9380 >>> left 

9381 A B C D E 

9382 1 4.0 2.0 NaN 1.0 3.0 

9383 2 9.0 7.0 NaN 6.0 8.0 

9384 3 NaN NaN NaN NaN NaN 

9385 4 NaN NaN NaN NaN NaN 

9386 >>> right 

9387 A B C D E 

9388 1 NaN NaN NaN NaN NaN 

9389 2 10.0 20.0 30.0 40.0 NaN 

9390 3 60.0 70.0 80.0 90.0 NaN 

9391 4 600.0 700.0 800.0 900.0 NaN 

9392 """ 

9393 

9394 method = clean_fill_method(method) 

9395 

9396 if broadcast_axis == 1 and self.ndim != other.ndim: 

9397 if isinstance(self, ABCSeries): 

9398 # this means other is a DataFrame, and we need to broadcast 

9399 # self 

9400 cons = self._constructor_expanddim 

9401 df = cons( 

9402 {c: self for c in other.columns}, **other._construct_axes_dict() 

9403 ) 

9404 return df._align_frame( 

9405 other, 

9406 join=join, 

9407 axis=axis, 

9408 level=level, 

9409 copy=copy, 

9410 fill_value=fill_value, 

9411 method=method, 

9412 limit=limit, 

9413 fill_axis=fill_axis, 

9414 ) 

9415 elif isinstance(other, ABCSeries): 

9416 # this means self is a DataFrame, and we need to broadcast 

9417 # other 

9418 cons = other._constructor_expanddim 

9419 df = cons( 

9420 {c: other for c in self.columns}, **self._construct_axes_dict() 

9421 ) 

9422 return self._align_frame( 

9423 df, 

9424 join=join, 

9425 axis=axis, 

9426 level=level, 

9427 copy=copy, 

9428 fill_value=fill_value, 

9429 method=method, 

9430 limit=limit, 

9431 fill_axis=fill_axis, 

9432 ) 

9433 

9434 if axis is not None: 

9435 axis = self._get_axis_number(axis) 

9436 if isinstance(other, ABCDataFrame): 

9437 return self._align_frame( 

9438 other, 

9439 join=join, 

9440 axis=axis, 

9441 level=level, 

9442 copy=copy, 

9443 fill_value=fill_value, 

9444 method=method, 

9445 limit=limit, 

9446 fill_axis=fill_axis, 

9447 ) 

9448 elif isinstance(other, ABCSeries): 

9449 return self._align_series( 

9450 other, 

9451 join=join, 

9452 axis=axis, 

9453 level=level, 

9454 copy=copy, 

9455 fill_value=fill_value, 

9456 method=method, 

9457 limit=limit, 

9458 fill_axis=fill_axis, 

9459 ) 

9460 else: # pragma: no cover 

9461 raise TypeError(f"unsupported type: {type(other)}") 

9462 

9463 @final 

9464 def _align_frame( 

9465 self, 

9466 other, 

9467 join: AlignJoin = "outer", 

9468 axis: Axis | None = None, 

9469 level=None, 

9470 copy: bool_t | None = None, 

9471 fill_value=None, 

9472 method=None, 

9473 limit=None, 

9474 fill_axis: Axis = 0, 

9475 ): 

9476 # defaults 

9477 join_index, join_columns = None, None 

9478 ilidx, iridx = None, None 

9479 clidx, cridx = None, None 

9480 

9481 is_series = isinstance(self, ABCSeries) 

9482 

9483 if (axis is None or axis == 0) and not self.index.equals(other.index): 

9484 join_index, ilidx, iridx = self.index.join( 

9485 other.index, how=join, level=level, return_indexers=True 

9486 ) 

9487 

9488 if ( 

9489 (axis is None or axis == 1) 

9490 and not is_series 

9491 and not self.columns.equals(other.columns) 

9492 ): 

9493 join_columns, clidx, cridx = self.columns.join( 

9494 other.columns, how=join, level=level, return_indexers=True 

9495 ) 

9496 

9497 if is_series: 

9498 reindexers = {0: [join_index, ilidx]} 

9499 else: 

9500 reindexers = {0: [join_index, ilidx], 1: [join_columns, clidx]} 

9501 

9502 left = self._reindex_with_indexers( 

9503 reindexers, copy=copy, fill_value=fill_value, allow_dups=True 

9504 ) 

9505 # other must be always DataFrame 

9506 right = other._reindex_with_indexers( 

9507 {0: [join_index, iridx], 1: [join_columns, cridx]}, 

9508 copy=copy, 

9509 fill_value=fill_value, 

9510 allow_dups=True, 

9511 ) 

9512 

9513 if method is not None: 

9514 _left = left.fillna(method=method, axis=fill_axis, limit=limit) 

9515 assert _left is not None # needed for mypy 

9516 left = _left 

9517 right = right.fillna(method=method, axis=fill_axis, limit=limit) 

9518 

9519 # if DatetimeIndex have different tz, convert to UTC 

9520 left, right = _align_as_utc(left, right, join_index) 

9521 

9522 return ( 

9523 left.__finalize__(self), 

9524 right.__finalize__(other), 

9525 ) 

9526 

9527 @final 

9528 def _align_series( 

9529 self, 

9530 other, 

9531 join: AlignJoin = "outer", 

9532 axis: Axis | None = None, 

9533 level=None, 

9534 copy: bool_t | None = None, 

9535 fill_value=None, 

9536 method=None, 

9537 limit=None, 

9538 fill_axis: Axis = 0, 

9539 ): 

9540 is_series = isinstance(self, ABCSeries) 

9541 if copy and using_copy_on_write(): 

9542 copy = False 

9543 

9544 if (not is_series and axis is None) or axis not in [None, 0, 1]: 

9545 raise ValueError("Must specify axis=0 or 1") 

9546 

9547 if is_series and axis == 1: 

9548 raise ValueError("cannot align series to a series other than axis 0") 

9549 

9550 # series/series compat, other must always be a Series 

9551 if not axis: 

9552 # equal 

9553 if self.index.equals(other.index): 

9554 join_index, lidx, ridx = None, None, None 

9555 else: 

9556 join_index, lidx, ridx = self.index.join( 

9557 other.index, how=join, level=level, return_indexers=True 

9558 ) 

9559 

9560 if is_series: 

9561 left = self._reindex_indexer(join_index, lidx, copy) 

9562 elif lidx is None or join_index is None: 

9563 left = self.copy(deep=copy) 

9564 else: 

9565 left = self._constructor( 

9566 self._mgr.reindex_indexer(join_index, lidx, axis=1, copy=copy) 

9567 ) 

9568 

9569 right = other._reindex_indexer(join_index, ridx, copy) 

9570 

9571 else: 

9572 # one has > 1 ndim 

9573 fdata = self._mgr 

9574 join_index = self.axes[1] 

9575 lidx, ridx = None, None 

9576 if not join_index.equals(other.index): 

9577 join_index, lidx, ridx = join_index.join( 

9578 other.index, how=join, level=level, return_indexers=True 

9579 ) 

9580 

9581 if lidx is not None: 

9582 bm_axis = self._get_block_manager_axis(1) 

9583 fdata = fdata.reindex_indexer(join_index, lidx, axis=bm_axis) 

9584 

9585 if copy and fdata is self._mgr: 

9586 fdata = fdata.copy() 

9587 

9588 left = self._constructor(fdata) 

9589 

9590 if ridx is None: 

9591 right = other.copy(deep=copy) 

9592 else: 

9593 right = other.reindex(join_index, level=level) 

9594 

9595 # fill 

9596 fill_na = notna(fill_value) or (method is not None) 

9597 if fill_na: 

9598 left = left.fillna(fill_value, method=method, limit=limit, axis=fill_axis) 

9599 right = right.fillna(fill_value, method=method, limit=limit) 

9600 

9601 # if DatetimeIndex have different tz, convert to UTC 

9602 if is_series or (not is_series and axis == 0): 

9603 left, right = _align_as_utc(left, right, join_index) 

9604 

9605 return ( 

9606 left.__finalize__(self), 

9607 right.__finalize__(other), 

9608 ) 

9609 

9610 @final 

9611 def _where( 

9612 self, 

9613 cond, 

9614 other=lib.no_default, 

9615 inplace: bool_t = False, 

9616 axis: Axis | None = None, 

9617 level=None, 

9618 ): 

9619 """ 

9620 Equivalent to public method `where`, except that `other` is not 

9621 applied as a function even if callable. Used in __setitem__. 

9622 """ 

9623 inplace = validate_bool_kwarg(inplace, "inplace") 

9624 

9625 if axis is not None: 

9626 axis = self._get_axis_number(axis) 

9627 

9628 # align the cond to same shape as myself 

9629 cond = common.apply_if_callable(cond, self) 

9630 if isinstance(cond, NDFrame): 

9631 # CoW: Make sure reference is not kept alive 

9632 cond = cond.align(self, join="right", broadcast_axis=1, copy=False)[0] 

9633 else: 

9634 if not hasattr(cond, "shape"): 

9635 cond = np.asanyarray(cond) 

9636 if cond.shape != self.shape: 

9637 raise ValueError("Array conditional must be same shape as self") 

9638 cond = self._constructor(cond, **self._construct_axes_dict(), copy=False) 

9639 

9640 # make sure we are boolean 

9641 fill_value = bool(inplace) 

9642 cond = cond.fillna(fill_value) 

9643 

9644 msg = "Boolean array expected for the condition, not {dtype}" 

9645 

9646 if not cond.empty: 

9647 if not isinstance(cond, ABCDataFrame): 

9648 # This is a single-dimensional object. 

9649 if not is_bool_dtype(cond): 

9650 raise ValueError(msg.format(dtype=cond.dtype)) 

9651 else: 

9652 for _dt in cond.dtypes: 

9653 if not is_bool_dtype(_dt): 

9654 raise ValueError(msg.format(dtype=_dt)) 

9655 else: 

9656 # GH#21947 we have an empty DataFrame/Series, could be object-dtype 

9657 cond = cond.astype(bool) 

9658 

9659 cond = -cond if inplace else cond 

9660 cond = cond.reindex(self._info_axis, axis=self._info_axis_number, copy=False) 

9661 

9662 # try to align with other 

9663 if isinstance(other, NDFrame): 

9664 # align with me 

9665 if other.ndim <= self.ndim: 

9666 # CoW: Make sure reference is not kept alive 

9667 other = self.align( 

9668 other, 

9669 join="left", 

9670 axis=axis, 

9671 level=level, 

9672 fill_value=None, 

9673 copy=False, 

9674 )[1] 

9675 

9676 # if we are NOT aligned, raise as we cannot where index 

9677 if axis is None and not other._indexed_same(self): 

9678 raise InvalidIndexError 

9679 

9680 if other.ndim < self.ndim: 

9681 # TODO(EA2D): avoid object-dtype cast in EA case GH#38729 

9682 other = other._values 

9683 if axis == 0: 

9684 other = np.reshape(other, (-1, 1)) 

9685 elif axis == 1: 

9686 other = np.reshape(other, (1, -1)) 

9687 

9688 other = np.broadcast_to(other, self.shape) 

9689 

9690 # slice me out of the other 

9691 else: 

9692 raise NotImplementedError( 

9693 "cannot align with a higher dimensional NDFrame" 

9694 ) 

9695 

9696 elif not isinstance(other, (MultiIndex, NDFrame)): 

9697 # mainly just catching Index here 

9698 other = extract_array(other, extract_numpy=True) 

9699 

9700 if isinstance(other, (np.ndarray, ExtensionArray)): 

9701 if other.shape != self.shape: 

9702 if self.ndim != 1: 

9703 # In the ndim == 1 case we may have 

9704 # other length 1, which we treat as scalar (GH#2745, GH#4192) 

9705 # or len(other) == icond.sum(), which we treat like 

9706 # __setitem__ (GH#3235) 

9707 raise ValueError( 

9708 "other must be the same shape as self when an ndarray" 

9709 ) 

9710 

9711 # we are the same shape, so create an actual object for alignment 

9712 else: 

9713 other = self._constructor( 

9714 other, **self._construct_axes_dict(), copy=False 

9715 ) 

9716 

9717 if axis is None: 

9718 axis = 0 

9719 

9720 if self.ndim == getattr(other, "ndim", 0): 

9721 align = True 

9722 else: 

9723 align = self._get_axis_number(axis) == 1 

9724 

9725 if inplace: 

9726 # we may have different type blocks come out of putmask, so 

9727 # reconstruct the block manager 

9728 

9729 self._check_inplace_setting(other) 

9730 new_data = self._mgr.putmask(mask=cond, new=other, align=align) 

9731 result = self._constructor(new_data) 

9732 return self._update_inplace(result) 

9733 

9734 else: 

9735 new_data = self._mgr.where( 

9736 other=other, 

9737 cond=cond, 

9738 align=align, 

9739 ) 

9740 result = self._constructor(new_data) 

9741 return result.__finalize__(self) 

9742 

9743 @overload 

9744 def where( 

9745 self: NDFrameT, 

9746 cond, 

9747 other=..., 

9748 *, 

9749 inplace: Literal[False] = ..., 

9750 axis: Axis | None = ..., 

9751 level: Level = ..., 

9752 ) -> NDFrameT: 

9753 ... 

9754 

9755 @overload 

9756 def where( 

9757 self, 

9758 cond, 

9759 other=..., 

9760 *, 

9761 inplace: Literal[True], 

9762 axis: Axis | None = ..., 

9763 level: Level = ..., 

9764 ) -> None: 

9765 ... 

9766 

9767 @overload 

9768 def where( 

9769 self: NDFrameT, 

9770 cond, 

9771 other=..., 

9772 *, 

9773 inplace: bool_t = ..., 

9774 axis: Axis | None = ..., 

9775 level: Level = ..., 

9776 ) -> NDFrameT | None: 

9777 ... 

9778 

9779 @doc( 

9780 klass=_shared_doc_kwargs["klass"], 

9781 cond="True", 

9782 cond_rev="False", 

9783 name="where", 

9784 name_other="mask", 

9785 ) 

9786 def where( 

9787 self: NDFrameT, 

9788 cond, 

9789 other=np.nan, 

9790 *, 

9791 inplace: bool_t = False, 

9792 axis: Axis | None = None, 

9793 level: Level = None, 

9794 ) -> NDFrameT | None: 

9795 """ 

9796 Replace values where the condition is {cond_rev}. 

9797 

9798 Parameters 

9799 ---------- 

9800 cond : bool {klass}, array-like, or callable 

9801 Where `cond` is {cond}, keep the original value. Where 

9802 {cond_rev}, replace with corresponding value from `other`. 

9803 If `cond` is callable, it is computed on the {klass} and 

9804 should return boolean {klass} or array. The callable must 

9805 not change input {klass} (though pandas doesn't check it). 

9806 other : scalar, {klass}, or callable 

9807 Entries where `cond` is {cond_rev} are replaced with 

9808 corresponding value from `other`. 

9809 If other is callable, it is computed on the {klass} and 

9810 should return scalar or {klass}. The callable must not 

9811 change input {klass} (though pandas doesn't check it). 

9812 If not specified, entries will be filled with the corresponding 

9813 NULL value (``np.nan`` for numpy dtypes, ``pd.NA`` for extension 

9814 dtypes). 

9815 inplace : bool, default False 

9816 Whether to perform the operation in place on the data. 

9817 axis : int, default None 

9818 Alignment axis if needed. For `Series` this parameter is 

9819 unused and defaults to 0. 

9820 level : int, default None 

9821 Alignment level if needed. 

9822 

9823 Returns 

9824 ------- 

9825 Same type as caller or None if ``inplace=True``. 

9826 

9827 See Also 

9828 -------- 

9829 :func:`DataFrame.{name_other}` : Return an object of same shape as 

9830 self. 

9831 

9832 Notes 

9833 ----- 

9834 The {name} method is an application of the if-then idiom. For each 

9835 element in the calling DataFrame, if ``cond`` is ``{cond}`` the 

9836 element is used; otherwise the corresponding element from the DataFrame 

9837 ``other`` is used. If the axis of ``other`` does not align with axis of 

9838 ``cond`` {klass}, the misaligned index positions will be filled with 

9839 {cond_rev}. 

9840 

9841 The signature for :func:`DataFrame.where` differs from 

9842 :func:`numpy.where`. Roughly ``df1.where(m, df2)`` is equivalent to 

9843 ``np.where(m, df1, df2)``. 

9844 

9845 For further details and examples see the ``{name}`` documentation in 

9846 :ref:`indexing <indexing.where_mask>`. 

9847 

9848 The dtype of the object takes precedence. The fill value is casted to 

9849 the object's dtype, if this can be done losslessly. 

9850 

9851 Examples 

9852 -------- 

9853 >>> s = pd.Series(range(5)) 

9854 >>> s.where(s > 0) 

9855 0 NaN 

9856 1 1.0 

9857 2 2.0 

9858 3 3.0 

9859 4 4.0 

9860 dtype: float64 

9861 >>> s.mask(s > 0) 

9862 0 0.0 

9863 1 NaN 

9864 2 NaN 

9865 3 NaN 

9866 4 NaN 

9867 dtype: float64 

9868 

9869 >>> s = pd.Series(range(5)) 

9870 >>> t = pd.Series([True, False]) 

9871 >>> s.where(t, 99) 

9872 0 0 

9873 1 99 

9874 2 99 

9875 3 99 

9876 4 99 

9877 dtype: int64 

9878 >>> s.mask(t, 99) 

9879 0 99 

9880 1 1 

9881 2 99 

9882 3 99 

9883 4 99 

9884 dtype: int64 

9885 

9886 >>> s.where(s > 1, 10) 

9887 0 10 

9888 1 10 

9889 2 2 

9890 3 3 

9891 4 4 

9892 dtype: int64 

9893 >>> s.mask(s > 1, 10) 

9894 0 0 

9895 1 1 

9896 2 10 

9897 3 10 

9898 4 10 

9899 dtype: int64 

9900 

9901 >>> df = pd.DataFrame(np.arange(10).reshape(-1, 2), columns=['A', 'B']) 

9902 >>> df 

9903 A B 

9904 0 0 1 

9905 1 2 3 

9906 2 4 5 

9907 3 6 7 

9908 4 8 9 

9909 >>> m = df % 3 == 0 

9910 >>> df.where(m, -df) 

9911 A B 

9912 0 0 -1 

9913 1 -2 3 

9914 2 -4 -5 

9915 3 6 -7 

9916 4 -8 9 

9917 >>> df.where(m, -df) == np.where(m, df, -df) 

9918 A B 

9919 0 True True 

9920 1 True True 

9921 2 True True 

9922 3 True True 

9923 4 True True 

9924 >>> df.where(m, -df) == df.mask(~m, -df) 

9925 A B 

9926 0 True True 

9927 1 True True 

9928 2 True True 

9929 3 True True 

9930 4 True True 

9931 """ 

9932 other = common.apply_if_callable(other, self) 

9933 return self._where(cond, other, inplace, axis, level) 

9934 

9935 @overload 

9936 def mask( 

9937 self: NDFrameT, 

9938 cond, 

9939 other=..., 

9940 *, 

9941 inplace: Literal[False] = ..., 

9942 axis: Axis | None = ..., 

9943 level: Level = ..., 

9944 ) -> NDFrameT: 

9945 ... 

9946 

9947 @overload 

9948 def mask( 

9949 self, 

9950 cond, 

9951 other=..., 

9952 *, 

9953 inplace: Literal[True], 

9954 axis: Axis | None = ..., 

9955 level: Level = ..., 

9956 ) -> None: 

9957 ... 

9958 

9959 @overload 

9960 def mask( 

9961 self: NDFrameT, 

9962 cond, 

9963 other=..., 

9964 *, 

9965 inplace: bool_t = ..., 

9966 axis: Axis | None = ..., 

9967 level: Level = ..., 

9968 ) -> NDFrameT | None: 

9969 ... 

9970 

9971 @doc( 

9972 where, 

9973 klass=_shared_doc_kwargs["klass"], 

9974 cond="False", 

9975 cond_rev="True", 

9976 name="mask", 

9977 name_other="where", 

9978 ) 

9979 def mask( 

9980 self: NDFrameT, 

9981 cond, 

9982 other=lib.no_default, 

9983 *, 

9984 inplace: bool_t = False, 

9985 axis: Axis | None = None, 

9986 level: Level = None, 

9987 ) -> NDFrameT | None: 

9988 inplace = validate_bool_kwarg(inplace, "inplace") 

9989 cond = common.apply_if_callable(cond, self) 

9990 

9991 # see gh-21891 

9992 if not hasattr(cond, "__invert__"): 

9993 cond = np.array(cond) 

9994 

9995 return self.where( 

9996 ~cond, 

9997 other=other, 

9998 inplace=inplace, 

9999 axis=axis, 

10000 level=level, 

10001 ) 

10002 

10003 @doc(klass=_shared_doc_kwargs["klass"]) 

10004 def shift( 

10005 self: NDFrameT, 

10006 periods: int = 1, 

10007 freq=None, 

10008 axis: Axis = 0, 

10009 fill_value: Hashable = None, 

10010 ) -> NDFrameT: 

10011 """ 

10012 Shift index by desired number of periods with an optional time `freq`. 

10013 

10014 When `freq` is not passed, shift the index without realigning the data. 

10015 If `freq` is passed (in this case, the index must be date or datetime, 

10016 or it will raise a `NotImplementedError`), the index will be 

10017 increased using the periods and the `freq`. `freq` can be inferred 

10018 when specified as "infer" as long as either freq or inferred_freq 

10019 attribute is set in the index. 

10020 

10021 Parameters 

10022 ---------- 

10023 periods : int 

10024 Number of periods to shift. Can be positive or negative. 

10025 freq : DateOffset, tseries.offsets, timedelta, or str, optional 

10026 Offset to use from the tseries module or time rule (e.g. 'EOM'). 

10027 If `freq` is specified then the index values are shifted but the 

10028 data is not realigned. That is, use `freq` if you would like to 

10029 extend the index when shifting and preserve the original data. 

10030 If `freq` is specified as "infer" then it will be inferred from 

10031 the freq or inferred_freq attributes of the index. If neither of 

10032 those attributes exist, a ValueError is thrown. 

10033 axis : {{0 or 'index', 1 or 'columns', None}}, default None 

10034 Shift direction. For `Series` this parameter is unused and defaults to 0. 

10035 fill_value : object, optional 

10036 The scalar value to use for newly introduced missing values. 

10037 the default depends on the dtype of `self`. 

10038 For numeric data, ``np.nan`` is used. 

10039 For datetime, timedelta, or period data, etc. :attr:`NaT` is used. 

10040 For extension dtypes, ``self.dtype.na_value`` is used. 

10041 

10042 .. versionchanged:: 1.1.0 

10043 

10044 Returns 

10045 ------- 

10046 {klass} 

10047 Copy of input object, shifted. 

10048 

10049 See Also 

10050 -------- 

10051 Index.shift : Shift values of Index. 

10052 DatetimeIndex.shift : Shift values of DatetimeIndex. 

10053 PeriodIndex.shift : Shift values of PeriodIndex. 

10054 

10055 Examples 

10056 -------- 

10057 >>> df = pd.DataFrame({{"Col1": [10, 20, 15, 30, 45], 

10058 ... "Col2": [13, 23, 18, 33, 48], 

10059 ... "Col3": [17, 27, 22, 37, 52]}}, 

10060 ... index=pd.date_range("2020-01-01", "2020-01-05")) 

10061 >>> df 

10062 Col1 Col2 Col3 

10063 2020-01-01 10 13 17 

10064 2020-01-02 20 23 27 

10065 2020-01-03 15 18 22 

10066 2020-01-04 30 33 37 

10067 2020-01-05 45 48 52 

10068 

10069 >>> df.shift(periods=3) 

10070 Col1 Col2 Col3 

10071 2020-01-01 NaN NaN NaN 

10072 2020-01-02 NaN NaN NaN 

10073 2020-01-03 NaN NaN NaN 

10074 2020-01-04 10.0 13.0 17.0 

10075 2020-01-05 20.0 23.0 27.0 

10076 

10077 >>> df.shift(periods=1, axis="columns") 

10078 Col1 Col2 Col3 

10079 2020-01-01 NaN 10 13 

10080 2020-01-02 NaN 20 23 

10081 2020-01-03 NaN 15 18 

10082 2020-01-04 NaN 30 33 

10083 2020-01-05 NaN 45 48 

10084 

10085 >>> df.shift(periods=3, fill_value=0) 

10086 Col1 Col2 Col3 

10087 2020-01-01 0 0 0 

10088 2020-01-02 0 0 0 

10089 2020-01-03 0 0 0 

10090 2020-01-04 10 13 17 

10091 2020-01-05 20 23 27 

10092 

10093 >>> df.shift(periods=3, freq="D") 

10094 Col1 Col2 Col3 

10095 2020-01-04 10 13 17 

10096 2020-01-05 20 23 27 

10097 2020-01-06 15 18 22 

10098 2020-01-07 30 33 37 

10099 2020-01-08 45 48 52 

10100 

10101 >>> df.shift(periods=3, freq="infer") 

10102 Col1 Col2 Col3 

10103 2020-01-04 10 13 17 

10104 2020-01-05 20 23 27 

10105 2020-01-06 15 18 22 

10106 2020-01-07 30 33 37 

10107 2020-01-08 45 48 52 

10108 """ 

10109 if periods == 0: 

10110 return self.copy(deep=None) 

10111 

10112 if freq is None: 

10113 # when freq is None, data is shifted, index is not 

10114 axis = self._get_axis_number(axis) 

10115 new_data = self._mgr.shift( 

10116 periods=periods, axis=axis, fill_value=fill_value 

10117 ) 

10118 return self._constructor(new_data).__finalize__(self, method="shift") 

10119 

10120 # when freq is given, index is shifted, data is not 

10121 index = self._get_axis(axis) 

10122 

10123 if freq == "infer": 

10124 freq = getattr(index, "freq", None) 

10125 

10126 if freq is None: 

10127 freq = getattr(index, "inferred_freq", None) 

10128 

10129 if freq is None: 

10130 msg = "Freq was not set in the index hence cannot be inferred" 

10131 raise ValueError(msg) 

10132 

10133 elif isinstance(freq, str): 

10134 freq = to_offset(freq) 

10135 

10136 if isinstance(index, PeriodIndex): 

10137 orig_freq = to_offset(index.freq) 

10138 if freq != orig_freq: 

10139 assert orig_freq is not None # for mypy 

10140 raise ValueError( 

10141 f"Given freq {freq.rule_code} does not match " 

10142 f"PeriodIndex freq {orig_freq.rule_code}" 

10143 ) 

10144 new_ax = index.shift(periods) 

10145 else: 

10146 new_ax = index.shift(periods, freq) 

10147 

10148 result = self.set_axis(new_ax, axis=axis) 

10149 return result.__finalize__(self, method="shift") 

10150 

10151 def truncate( 

10152 self: NDFrameT, 

10153 before=None, 

10154 after=None, 

10155 axis: Axis | None = None, 

10156 copy: bool_t | None = None, 

10157 ) -> NDFrameT: 

10158 """ 

10159 Truncate a Series or DataFrame before and after some index value. 

10160 

10161 This is a useful shorthand for boolean indexing based on index 

10162 values above or below certain thresholds. 

10163 

10164 Parameters 

10165 ---------- 

10166 before : date, str, int 

10167 Truncate all rows before this index value. 

10168 after : date, str, int 

10169 Truncate all rows after this index value. 

10170 axis : {0 or 'index', 1 or 'columns'}, optional 

10171 Axis to truncate. Truncates the index (rows) by default. 

10172 For `Series` this parameter is unused and defaults to 0. 

10173 copy : bool, default is True, 

10174 Return a copy of the truncated section. 

10175 

10176 Returns 

10177 ------- 

10178 type of caller 

10179 The truncated Series or DataFrame. 

10180 

10181 See Also 

10182 -------- 

10183 DataFrame.loc : Select a subset of a DataFrame by label. 

10184 DataFrame.iloc : Select a subset of a DataFrame by position. 

10185 

10186 Notes 

10187 ----- 

10188 If the index being truncated contains only datetime values, 

10189 `before` and `after` may be specified as strings instead of 

10190 Timestamps. 

10191 

10192 Examples 

10193 -------- 

10194 >>> df = pd.DataFrame({'A': ['a', 'b', 'c', 'd', 'e'], 

10195 ... 'B': ['f', 'g', 'h', 'i', 'j'], 

10196 ... 'C': ['k', 'l', 'm', 'n', 'o']}, 

10197 ... index=[1, 2, 3, 4, 5]) 

10198 >>> df 

10199 A B C 

10200 1 a f k 

10201 2 b g l 

10202 3 c h m 

10203 4 d i n 

10204 5 e j o 

10205 

10206 >>> df.truncate(before=2, after=4) 

10207 A B C 

10208 2 b g l 

10209 3 c h m 

10210 4 d i n 

10211 

10212 The columns of a DataFrame can be truncated. 

10213 

10214 >>> df.truncate(before="A", after="B", axis="columns") 

10215 A B 

10216 1 a f 

10217 2 b g 

10218 3 c h 

10219 4 d i 

10220 5 e j 

10221 

10222 For Series, only rows can be truncated. 

10223 

10224 >>> df['A'].truncate(before=2, after=4) 

10225 2 b 

10226 3 c 

10227 4 d 

10228 Name: A, dtype: object 

10229 

10230 The index values in ``truncate`` can be datetimes or string 

10231 dates. 

10232 

10233 >>> dates = pd.date_range('2016-01-01', '2016-02-01', freq='s') 

10234 >>> df = pd.DataFrame(index=dates, data={'A': 1}) 

10235 >>> df.tail() 

10236 A 

10237 2016-01-31 23:59:56 1 

10238 2016-01-31 23:59:57 1 

10239 2016-01-31 23:59:58 1 

10240 2016-01-31 23:59:59 1 

10241 2016-02-01 00:00:00 1 

10242 

10243 >>> df.truncate(before=pd.Timestamp('2016-01-05'), 

10244 ... after=pd.Timestamp('2016-01-10')).tail() 

10245 A 

10246 2016-01-09 23:59:56 1 

10247 2016-01-09 23:59:57 1 

10248 2016-01-09 23:59:58 1 

10249 2016-01-09 23:59:59 1 

10250 2016-01-10 00:00:00 1 

10251 

10252 Because the index is a DatetimeIndex containing only dates, we can 

10253 specify `before` and `after` as strings. They will be coerced to 

10254 Timestamps before truncation. 

10255 

10256 >>> df.truncate('2016-01-05', '2016-01-10').tail() 

10257 A 

10258 2016-01-09 23:59:56 1 

10259 2016-01-09 23:59:57 1 

10260 2016-01-09 23:59:58 1 

10261 2016-01-09 23:59:59 1 

10262 2016-01-10 00:00:00 1 

10263 

10264 Note that ``truncate`` assumes a 0 value for any unspecified time 

10265 component (midnight). This differs from partial string slicing, which 

10266 returns any partially matching dates. 

10267 

10268 >>> df.loc['2016-01-05':'2016-01-10', :].tail() 

10269 A 

10270 2016-01-10 23:59:55 1 

10271 2016-01-10 23:59:56 1 

10272 2016-01-10 23:59:57 1 

10273 2016-01-10 23:59:58 1 

10274 2016-01-10 23:59:59 1 

10275 """ 

10276 if axis is None: 

10277 axis = self._stat_axis_number 

10278 axis = self._get_axis_number(axis) 

10279 ax = self._get_axis(axis) 

10280 

10281 # GH 17935 

10282 # Check that index is sorted 

10283 if not ax.is_monotonic_increasing and not ax.is_monotonic_decreasing: 

10284 raise ValueError("truncate requires a sorted index") 

10285 

10286 # if we have a date index, convert to dates, otherwise 

10287 # treat like a slice 

10288 if ax._is_all_dates: 

10289 from pandas.core.tools.datetimes import to_datetime 

10290 

10291 before = to_datetime(before) 

10292 after = to_datetime(after) 

10293 

10294 if before is not None and after is not None and before > after: 

10295 raise ValueError(f"Truncate: {after} must be after {before}") 

10296 

10297 if len(ax) > 1 and ax.is_monotonic_decreasing and ax.nunique() > 1: 

10298 before, after = after, before 

10299 

10300 slicer = [slice(None, None)] * self._AXIS_LEN 

10301 slicer[axis] = slice(before, after) 

10302 result = self.loc[tuple(slicer)] 

10303 

10304 if isinstance(ax, MultiIndex): 

10305 setattr(result, self._get_axis_name(axis), ax.truncate(before, after)) 

10306 

10307 result = result.copy(deep=copy and not using_copy_on_write()) 

10308 

10309 return result 

10310 

10311 @final 

10312 @doc(klass=_shared_doc_kwargs["klass"]) 

10313 def tz_convert( 

10314 self: NDFrameT, tz, axis: Axis = 0, level=None, copy: bool_t | None = None 

10315 ) -> NDFrameT: 

10316 """ 

10317 Convert tz-aware axis to target time zone. 

10318 

10319 Parameters 

10320 ---------- 

10321 tz : str or tzinfo object or None 

10322 Target time zone. Passing ``None`` will convert to 

10323 UTC and remove the timezone information. 

10324 axis : {{0 or 'index', 1 or 'columns'}}, default 0 

10325 The axis to convert 

10326 level : int, str, default None 

10327 If axis is a MultiIndex, convert a specific level. Otherwise 

10328 must be None. 

10329 copy : bool, default True 

10330 Also make a copy of the underlying data. 

10331 

10332 Returns 

10333 ------- 

10334 {klass} 

10335 Object with time zone converted axis. 

10336 

10337 Raises 

10338 ------ 

10339 TypeError 

10340 If the axis is tz-naive. 

10341 

10342 Examples 

10343 -------- 

10344 Change to another time zone: 

10345 

10346 >>> s = pd.Series( 

10347 ... [1], 

10348 ... index=pd.DatetimeIndex(['2018-09-15 01:30:00+02:00']), 

10349 ... ) 

10350 >>> s.tz_convert('Asia/Shanghai') 

10351 2018-09-15 07:30:00+08:00 1 

10352 dtype: int64 

10353 

10354 Pass None to convert to UTC and get a tz-naive index: 

10355 

10356 >>> s = pd.Series([1], 

10357 ... index=pd.DatetimeIndex(['2018-09-15 01:30:00+02:00'])) 

10358 >>> s.tz_convert(None) 

10359 2018-09-14 23:30:00 1 

10360 dtype: int64 

10361 """ 

10362 axis = self._get_axis_number(axis) 

10363 ax = self._get_axis(axis) 

10364 

10365 def _tz_convert(ax, tz): 

10366 if not hasattr(ax, "tz_convert"): 

10367 if len(ax) > 0: 

10368 ax_name = self._get_axis_name(axis) 

10369 raise TypeError( 

10370 f"{ax_name} is not a valid DatetimeIndex or PeriodIndex" 

10371 ) 

10372 ax = DatetimeIndex([], tz=tz) 

10373 else: 

10374 ax = ax.tz_convert(tz) 

10375 return ax 

10376 

10377 # if a level is given it must be a MultiIndex level or 

10378 # equivalent to the axis name 

10379 if isinstance(ax, MultiIndex): 

10380 level = ax._get_level_number(level) 

10381 new_level = _tz_convert(ax.levels[level], tz) 

10382 ax = ax.set_levels(new_level, level=level) 

10383 else: 

10384 if level not in (None, 0, ax.name): 

10385 raise ValueError(f"The level {level} is not valid") 

10386 ax = _tz_convert(ax, tz) 

10387 

10388 result = self.copy(deep=copy and not using_copy_on_write()) 

10389 result = result.set_axis(ax, axis=axis, copy=False) 

10390 return result.__finalize__(self, method="tz_convert") 

10391 

10392 @final 

10393 @doc(klass=_shared_doc_kwargs["klass"]) 

10394 def tz_localize( 

10395 self: NDFrameT, 

10396 tz, 

10397 axis: Axis = 0, 

10398 level=None, 

10399 copy: bool_t | None = None, 

10400 ambiguous: TimeAmbiguous = "raise", 

10401 nonexistent: TimeNonexistent = "raise", 

10402 ) -> NDFrameT: 

10403 """ 

10404 Localize tz-naive index of a Series or DataFrame to target time zone. 

10405 

10406 This operation localizes the Index. To localize the values in a 

10407 timezone-naive Series, use :meth:`Series.dt.tz_localize`. 

10408 

10409 Parameters 

10410 ---------- 

10411 tz : str or tzinfo or None 

10412 Time zone to localize. Passing ``None`` will remove the 

10413 time zone information and preserve local time. 

10414 axis : {{0 or 'index', 1 or 'columns'}}, default 0 

10415 The axis to localize 

10416 level : int, str, default None 

10417 If axis ia a MultiIndex, localize a specific level. Otherwise 

10418 must be None. 

10419 copy : bool, default True 

10420 Also make a copy of the underlying data. 

10421 ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise' 

10422 When clocks moved backward due to DST, ambiguous times may arise. 

10423 For example in Central European Time (UTC+01), when going from 

10424 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at 

10425 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the 

10426 `ambiguous` parameter dictates how ambiguous times should be 

10427 handled. 

10428 

10429 - 'infer' will attempt to infer fall dst-transition hours based on 

10430 order 

10431 - bool-ndarray where True signifies a DST time, False designates 

10432 a non-DST time (note that this flag is only applicable for 

10433 ambiguous times) 

10434 - 'NaT' will return NaT where there are ambiguous times 

10435 - 'raise' will raise an AmbiguousTimeError if there are ambiguous 

10436 times. 

10437 nonexistent : str, default 'raise' 

10438 A nonexistent time does not exist in a particular timezone 

10439 where clocks moved forward due to DST. Valid values are: 

10440 

10441 - 'shift_forward' will shift the nonexistent time forward to the 

10442 closest existing time 

10443 - 'shift_backward' will shift the nonexistent time backward to the 

10444 closest existing time 

10445 - 'NaT' will return NaT where there are nonexistent times 

10446 - timedelta objects will shift nonexistent times by the timedelta 

10447 - 'raise' will raise an NonExistentTimeError if there are 

10448 nonexistent times. 

10449 

10450 Returns 

10451 ------- 

10452 {klass} 

10453 Same type as the input. 

10454 

10455 Raises 

10456 ------ 

10457 TypeError 

10458 If the TimeSeries is tz-aware and tz is not None. 

10459 

10460 Examples 

10461 -------- 

10462 Localize local times: 

10463 

10464 >>> s = pd.Series( 

10465 ... [1], 

10466 ... index=pd.DatetimeIndex(['2018-09-15 01:30:00']), 

10467 ... ) 

10468 >>> s.tz_localize('CET') 

10469 2018-09-15 01:30:00+02:00 1 

10470 dtype: int64 

10471 

10472 Pass None to convert to tz-naive index and preserve local time: 

10473 

10474 >>> s = pd.Series([1], 

10475 ... index=pd.DatetimeIndex(['2018-09-15 01:30:00+02:00'])) 

10476 >>> s.tz_localize(None) 

10477 2018-09-15 01:30:00 1 

10478 dtype: int64 

10479 

10480 Be careful with DST changes. When there is sequential data, pandas 

10481 can infer the DST time: 

10482 

10483 >>> s = pd.Series(range(7), 

10484 ... index=pd.DatetimeIndex(['2018-10-28 01:30:00', 

10485 ... '2018-10-28 02:00:00', 

10486 ... '2018-10-28 02:30:00', 

10487 ... '2018-10-28 02:00:00', 

10488 ... '2018-10-28 02:30:00', 

10489 ... '2018-10-28 03:00:00', 

10490 ... '2018-10-28 03:30:00'])) 

10491 >>> s.tz_localize('CET', ambiguous='infer') 

10492 2018-10-28 01:30:00+02:00 0 

10493 2018-10-28 02:00:00+02:00 1 

10494 2018-10-28 02:30:00+02:00 2 

10495 2018-10-28 02:00:00+01:00 3 

10496 2018-10-28 02:30:00+01:00 4 

10497 2018-10-28 03:00:00+01:00 5 

10498 2018-10-28 03:30:00+01:00 6 

10499 dtype: int64 

10500 

10501 In some cases, inferring the DST is impossible. In such cases, you can 

10502 pass an ndarray to the ambiguous parameter to set the DST explicitly 

10503 

10504 >>> s = pd.Series(range(3), 

10505 ... index=pd.DatetimeIndex(['2018-10-28 01:20:00', 

10506 ... '2018-10-28 02:36:00', 

10507 ... '2018-10-28 03:46:00'])) 

10508 >>> s.tz_localize('CET', ambiguous=np.array([True, True, False])) 

10509 2018-10-28 01:20:00+02:00 0 

10510 2018-10-28 02:36:00+02:00 1 

10511 2018-10-28 03:46:00+01:00 2 

10512 dtype: int64 

10513 

10514 If the DST transition causes nonexistent times, you can shift these 

10515 dates forward or backward with a timedelta object or `'shift_forward'` 

10516 or `'shift_backward'`. 

10517 

10518 >>> s = pd.Series(range(2), 

10519 ... index=pd.DatetimeIndex(['2015-03-29 02:30:00', 

10520 ... '2015-03-29 03:30:00'])) 

10521 >>> s.tz_localize('Europe/Warsaw', nonexistent='shift_forward') 

10522 2015-03-29 03:00:00+02:00 0 

10523 2015-03-29 03:30:00+02:00 1 

10524 dtype: int64 

10525 >>> s.tz_localize('Europe/Warsaw', nonexistent='shift_backward') 

10526 2015-03-29 01:59:59.999999999+01:00 0 

10527 2015-03-29 03:30:00+02:00 1 

10528 dtype: int64 

10529 >>> s.tz_localize('Europe/Warsaw', nonexistent=pd.Timedelta('1H')) 

10530 2015-03-29 03:30:00+02:00 0 

10531 2015-03-29 03:30:00+02:00 1 

10532 dtype: int64 

10533 """ 

10534 nonexistent_options = ("raise", "NaT", "shift_forward", "shift_backward") 

10535 if nonexistent not in nonexistent_options and not isinstance( 

10536 nonexistent, dt.timedelta 

10537 ): 

10538 raise ValueError( 

10539 "The nonexistent argument must be one of 'raise', " 

10540 "'NaT', 'shift_forward', 'shift_backward' or " 

10541 "a timedelta object" 

10542 ) 

10543 

10544 axis = self._get_axis_number(axis) 

10545 ax = self._get_axis(axis) 

10546 

10547 def _tz_localize(ax, tz, ambiguous, nonexistent): 

10548 if not hasattr(ax, "tz_localize"): 

10549 if len(ax) > 0: 

10550 ax_name = self._get_axis_name(axis) 

10551 raise TypeError( 

10552 f"{ax_name} is not a valid DatetimeIndex or PeriodIndex" 

10553 ) 

10554 ax = DatetimeIndex([], tz=tz) 

10555 else: 

10556 ax = ax.tz_localize(tz, ambiguous=ambiguous, nonexistent=nonexistent) 

10557 return ax 

10558 

10559 # if a level is given it must be a MultiIndex level or 

10560 # equivalent to the axis name 

10561 if isinstance(ax, MultiIndex): 

10562 level = ax._get_level_number(level) 

10563 new_level = _tz_localize(ax.levels[level], tz, ambiguous, nonexistent) 

10564 ax = ax.set_levels(new_level, level=level) 

10565 else: 

10566 if level not in (None, 0, ax.name): 

10567 raise ValueError(f"The level {level} is not valid") 

10568 ax = _tz_localize(ax, tz, ambiguous, nonexistent) 

10569 

10570 result = self.copy(deep=copy and not using_copy_on_write()) 

10571 result = result.set_axis(ax, axis=axis, copy=False) 

10572 return result.__finalize__(self, method="tz_localize") 

10573 

10574 # ---------------------------------------------------------------------- 

10575 # Numeric Methods 

10576 

10577 @final 

10578 def describe( 

10579 self: NDFrameT, 

10580 percentiles=None, 

10581 include=None, 

10582 exclude=None, 

10583 ) -> NDFrameT: 

10584 """ 

10585 Generate descriptive statistics. 

10586 

10587 Descriptive statistics include those that summarize the central 

10588 tendency, dispersion and shape of a 

10589 dataset's distribution, excluding ``NaN`` values. 

10590 

10591 Analyzes both numeric and object series, as well 

10592 as ``DataFrame`` column sets of mixed data types. The output 

10593 will vary depending on what is provided. Refer to the notes 

10594 below for more detail. 

10595 

10596 Parameters 

10597 ---------- 

10598 percentiles : list-like of numbers, optional 

10599 The percentiles to include in the output. All should 

10600 fall between 0 and 1. The default is 

10601 ``[.25, .5, .75]``, which returns the 25th, 50th, and 

10602 75th percentiles. 

10603 include : 'all', list-like of dtypes or None (default), optional 

10604 A white list of data types to include in the result. Ignored 

10605 for ``Series``. Here are the options: 

10606 

10607 - 'all' : All columns of the input will be included in the output. 

10608 - A list-like of dtypes : Limits the results to the 

10609 provided data types. 

10610 To limit the result to numeric types submit 

10611 ``numpy.number``. To limit it instead to object columns submit 

10612 the ``numpy.object`` data type. Strings 

10613 can also be used in the style of 

10614 ``select_dtypes`` (e.g. ``df.describe(include=['O'])``). To 

10615 select pandas categorical columns, use ``'category'`` 

10616 - None (default) : The result will include all numeric columns. 

10617 exclude : list-like of dtypes or None (default), optional, 

10618 A black list of data types to omit from the result. Ignored 

10619 for ``Series``. Here are the options: 

10620 

10621 - A list-like of dtypes : Excludes the provided data types 

10622 from the result. To exclude numeric types submit 

10623 ``numpy.number``. To exclude object columns submit the data 

10624 type ``numpy.object``. Strings can also be used in the style of 

10625 ``select_dtypes`` (e.g. ``df.describe(exclude=['O'])``). To 

10626 exclude pandas categorical columns, use ``'category'`` 

10627 - None (default) : The result will exclude nothing. 

10628 

10629 Returns 

10630 ------- 

10631 Series or DataFrame 

10632 Summary statistics of the Series or Dataframe provided. 

10633 

10634 See Also 

10635 -------- 

10636 DataFrame.count: Count number of non-NA/null observations. 

10637 DataFrame.max: Maximum of the values in the object. 

10638 DataFrame.min: Minimum of the values in the object. 

10639 DataFrame.mean: Mean of the values. 

10640 DataFrame.std: Standard deviation of the observations. 

10641 DataFrame.select_dtypes: Subset of a DataFrame including/excluding 

10642 columns based on their dtype. 

10643 

10644 Notes 

10645 ----- 

10646 For numeric data, the result's index will include ``count``, 

10647 ``mean``, ``std``, ``min``, ``max`` as well as lower, ``50`` and 

10648 upper percentiles. By default the lower percentile is ``25`` and the 

10649 upper percentile is ``75``. The ``50`` percentile is the 

10650 same as the median. 

10651 

10652 For object data (e.g. strings or timestamps), the result's index 

10653 will include ``count``, ``unique``, ``top``, and ``freq``. The ``top`` 

10654 is the most common value. The ``freq`` is the most common value's 

10655 frequency. Timestamps also include the ``first`` and ``last`` items. 

10656 

10657 If multiple object values have the highest count, then the 

10658 ``count`` and ``top`` results will be arbitrarily chosen from 

10659 among those with the highest count. 

10660 

10661 For mixed data types provided via a ``DataFrame``, the default is to 

10662 return only an analysis of numeric columns. If the dataframe consists 

10663 only of object and categorical data without any numeric columns, the 

10664 default is to return an analysis of both the object and categorical 

10665 columns. If ``include='all'`` is provided as an option, the result 

10666 will include a union of attributes of each type. 

10667 

10668 The `include` and `exclude` parameters can be used to limit 

10669 which columns in a ``DataFrame`` are analyzed for the output. 

10670 The parameters are ignored when analyzing a ``Series``. 

10671 

10672 Examples 

10673 -------- 

10674 Describing a numeric ``Series``. 

10675 

10676 >>> s = pd.Series([1, 2, 3]) 

10677 >>> s.describe() 

10678 count 3.0 

10679 mean 2.0 

10680 std 1.0 

10681 min 1.0 

10682 25% 1.5 

10683 50% 2.0 

10684 75% 2.5 

10685 max 3.0 

10686 dtype: float64 

10687 

10688 Describing a categorical ``Series``. 

10689 

10690 >>> s = pd.Series(['a', 'a', 'b', 'c']) 

10691 >>> s.describe() 

10692 count 4 

10693 unique 3 

10694 top a 

10695 freq 2 

10696 dtype: object 

10697 

10698 Describing a timestamp ``Series``. 

10699 

10700 >>> s = pd.Series([ 

10701 ... np.datetime64("2000-01-01"), 

10702 ... np.datetime64("2010-01-01"), 

10703 ... np.datetime64("2010-01-01") 

10704 ... ]) 

10705 >>> s.describe() 

10706 count 3 

10707 mean 2006-09-01 08:00:00 

10708 min 2000-01-01 00:00:00 

10709 25% 2004-12-31 12:00:00 

10710 50% 2010-01-01 00:00:00 

10711 75% 2010-01-01 00:00:00 

10712 max 2010-01-01 00:00:00 

10713 dtype: object 

10714 

10715 Describing a ``DataFrame``. By default only numeric fields 

10716 are returned. 

10717 

10718 >>> df = pd.DataFrame({'categorical': pd.Categorical(['d','e','f']), 

10719 ... 'numeric': [1, 2, 3], 

10720 ... 'object': ['a', 'b', 'c'] 

10721 ... }) 

10722 >>> df.describe() 

10723 numeric 

10724 count 3.0 

10725 mean 2.0 

10726 std 1.0 

10727 min 1.0 

10728 25% 1.5 

10729 50% 2.0 

10730 75% 2.5 

10731 max 3.0 

10732 

10733 Describing all columns of a ``DataFrame`` regardless of data type. 

10734 

10735 >>> df.describe(include='all') # doctest: +SKIP 

10736 categorical numeric object 

10737 count 3 3.0 3 

10738 unique 3 NaN 3 

10739 top f NaN a 

10740 freq 1 NaN 1 

10741 mean NaN 2.0 NaN 

10742 std NaN 1.0 NaN 

10743 min NaN 1.0 NaN 

10744 25% NaN 1.5 NaN 

10745 50% NaN 2.0 NaN 

10746 75% NaN 2.5 NaN 

10747 max NaN 3.0 NaN 

10748 

10749 Describing a column from a ``DataFrame`` by accessing it as 

10750 an attribute. 

10751 

10752 >>> df.numeric.describe() 

10753 count 3.0 

10754 mean 2.0 

10755 std 1.0 

10756 min 1.0 

10757 25% 1.5 

10758 50% 2.0 

10759 75% 2.5 

10760 max 3.0 

10761 Name: numeric, dtype: float64 

10762 

10763 Including only numeric columns in a ``DataFrame`` description. 

10764 

10765 >>> df.describe(include=[np.number]) 

10766 numeric 

10767 count 3.0 

10768 mean 2.0 

10769 std 1.0 

10770 min 1.0 

10771 25% 1.5 

10772 50% 2.0 

10773 75% 2.5 

10774 max 3.0 

10775 

10776 Including only string columns in a ``DataFrame`` description. 

10777 

10778 >>> df.describe(include=[object]) # doctest: +SKIP 

10779 object 

10780 count 3 

10781 unique 3 

10782 top a 

10783 freq 1 

10784 

10785 Including only categorical columns from a ``DataFrame`` description. 

10786 

10787 >>> df.describe(include=['category']) 

10788 categorical 

10789 count 3 

10790 unique 3 

10791 top d 

10792 freq 1 

10793 

10794 Excluding numeric columns from a ``DataFrame`` description. 

10795 

10796 >>> df.describe(exclude=[np.number]) # doctest: +SKIP 

10797 categorical object 

10798 count 3 3 

10799 unique 3 3 

10800 top f a 

10801 freq 1 1 

10802 

10803 Excluding object columns from a ``DataFrame`` description. 

10804 

10805 >>> df.describe(exclude=[object]) # doctest: +SKIP 

10806 categorical numeric 

10807 count 3 3.0 

10808 unique 3 NaN 

10809 top f NaN 

10810 freq 1 NaN 

10811 mean NaN 2.0 

10812 std NaN 1.0 

10813 min NaN 1.0 

10814 25% NaN 1.5 

10815 50% NaN 2.0 

10816 75% NaN 2.5 

10817 max NaN 3.0 

10818 """ 

10819 return describe_ndframe( 

10820 obj=self, 

10821 include=include, 

10822 exclude=exclude, 

10823 percentiles=percentiles, 

10824 ) 

10825 

10826 @final 

10827 def pct_change( 

10828 self: NDFrameT, 

10829 periods: int = 1, 

10830 fill_method: Literal["backfill", "bfill", "pad", "ffill"] | None = "pad", 

10831 limit=None, 

10832 freq=None, 

10833 **kwargs, 

10834 ) -> NDFrameT: 

10835 """ 

10836 Percentage change between the current and a prior element. 

10837 

10838 Computes the percentage change from the immediately previous row by 

10839 default. This is useful in comparing the percentage of change in a time 

10840 series of elements. 

10841 

10842 Parameters 

10843 ---------- 

10844 periods : int, default 1 

10845 Periods to shift for forming percent change. 

10846 fill_method : {'backfill', 'bfill', 'pad', 'ffill', None}, default 'pad' 

10847 How to handle NAs **before** computing percent changes. 

10848 limit : int, default None 

10849 The number of consecutive NAs to fill before stopping. 

10850 freq : DateOffset, timedelta, or str, optional 

10851 Increment to use from time series API (e.g. 'M' or BDay()). 

10852 **kwargs 

10853 Additional keyword arguments are passed into 

10854 `DataFrame.shift` or `Series.shift`. 

10855 

10856 Returns 

10857 ------- 

10858 Series or DataFrame 

10859 The same type as the calling object. 

10860 

10861 See Also 

10862 -------- 

10863 Series.diff : Compute the difference of two elements in a Series. 

10864 DataFrame.diff : Compute the difference of two elements in a DataFrame. 

10865 Series.shift : Shift the index by some number of periods. 

10866 DataFrame.shift : Shift the index by some number of periods. 

10867 

10868 Examples 

10869 -------- 

10870 **Series** 

10871 

10872 >>> s = pd.Series([90, 91, 85]) 

10873 >>> s 

10874 0 90 

10875 1 91 

10876 2 85 

10877 dtype: int64 

10878 

10879 >>> s.pct_change() 

10880 0 NaN 

10881 1 0.011111 

10882 2 -0.065934 

10883 dtype: float64 

10884 

10885 >>> s.pct_change(periods=2) 

10886 0 NaN 

10887 1 NaN 

10888 2 -0.055556 

10889 dtype: float64 

10890 

10891 See the percentage change in a Series where filling NAs with last 

10892 valid observation forward to next valid. 

10893 

10894 >>> s = pd.Series([90, 91, None, 85]) 

10895 >>> s 

10896 0 90.0 

10897 1 91.0 

10898 2 NaN 

10899 3 85.0 

10900 dtype: float64 

10901 

10902 >>> s.pct_change(fill_method='ffill') 

10903 0 NaN 

10904 1 0.011111 

10905 2 0.000000 

10906 3 -0.065934 

10907 dtype: float64 

10908 

10909 **DataFrame** 

10910 

10911 Percentage change in French franc, Deutsche Mark, and Italian lira from 

10912 1980-01-01 to 1980-03-01. 

10913 

10914 >>> df = pd.DataFrame({ 

10915 ... 'FR': [4.0405, 4.0963, 4.3149], 

10916 ... 'GR': [1.7246, 1.7482, 1.8519], 

10917 ... 'IT': [804.74, 810.01, 860.13]}, 

10918 ... index=['1980-01-01', '1980-02-01', '1980-03-01']) 

10919 >>> df 

10920 FR GR IT 

10921 1980-01-01 4.0405 1.7246 804.74 

10922 1980-02-01 4.0963 1.7482 810.01 

10923 1980-03-01 4.3149 1.8519 860.13 

10924 

10925 >>> df.pct_change() 

10926 FR GR IT 

10927 1980-01-01 NaN NaN NaN 

10928 1980-02-01 0.013810 0.013684 0.006549 

10929 1980-03-01 0.053365 0.059318 0.061876 

10930 

10931 Percentage of change in GOOG and APPL stock volume. Shows computing 

10932 the percentage change between columns. 

10933 

10934 >>> df = pd.DataFrame({ 

10935 ... '2016': [1769950, 30586265], 

10936 ... '2015': [1500923, 40912316], 

10937 ... '2014': [1371819, 41403351]}, 

10938 ... index=['GOOG', 'APPL']) 

10939 >>> df 

10940 2016 2015 2014 

10941 GOOG 1769950 1500923 1371819 

10942 APPL 30586265 40912316 41403351 

10943 

10944 >>> df.pct_change(axis='columns', periods=-1) 

10945 2016 2015 2014 

10946 GOOG 0.179241 0.094112 NaN 

10947 APPL -0.252395 -0.011860 NaN 

10948 """ 

10949 axis = self._get_axis_number(kwargs.pop("axis", self._stat_axis_name)) 

10950 if fill_method is None: 

10951 data = self 

10952 else: 

10953 _data = self.fillna(method=fill_method, axis=axis, limit=limit) 

10954 assert _data is not None # needed for mypy 

10955 data = _data 

10956 

10957 shifted = data.shift(periods=periods, freq=freq, axis=axis, **kwargs) 

10958 # Unsupported left operand type for / ("NDFrameT") 

10959 rs = data / shifted - 1 # type: ignore[operator] 

10960 if freq is not None: 

10961 # Shift method is implemented differently when freq is not None 

10962 # We want to restore the original index 

10963 rs = rs.loc[~rs.index.duplicated()] 

10964 rs = rs.reindex_like(data) 

10965 return rs.__finalize__(self, method="pct_change") 

10966 

10967 @final 

10968 def _logical_func( 

10969 self, 

10970 name: str, 

10971 func, 

10972 axis: Axis = 0, 

10973 bool_only: bool_t = False, 

10974 skipna: bool_t = True, 

10975 **kwargs, 

10976 ) -> Series | bool_t: 

10977 nv.validate_logical_func((), kwargs, fname=name) 

10978 validate_bool_kwarg(skipna, "skipna", none_allowed=False) 

10979 

10980 if self.ndim > 1 and axis is None: 

10981 # Reduce along one dimension then the other, to simplify DataFrame._reduce 

10982 res = self._logical_func( 

10983 name, func, axis=0, bool_only=bool_only, skipna=skipna, **kwargs 

10984 ) 

10985 return res._logical_func(name, func, skipna=skipna, **kwargs) 

10986 

10987 if ( 

10988 self.ndim > 1 

10989 and axis == 1 

10990 and len(self._mgr.arrays) > 1 

10991 # TODO(EA2D): special-case not needed 

10992 and all(x.ndim == 2 for x in self._mgr.arrays) 

10993 and not kwargs 

10994 ): 

10995 # Fastpath avoiding potentially expensive transpose 

10996 obj = self 

10997 if bool_only: 

10998 obj = self._get_bool_data() 

10999 return obj._reduce_axis1(name, func, skipna=skipna) 

11000 

11001 return self._reduce( 

11002 func, 

11003 name=name, 

11004 axis=axis, 

11005 skipna=skipna, 

11006 numeric_only=bool_only, 

11007 filter_type="bool", 

11008 ) 

11009 

11010 def any( 

11011 self, 

11012 axis: Axis = 0, 

11013 bool_only: bool_t = False, 

11014 skipna: bool_t = True, 

11015 **kwargs, 

11016 ) -> DataFrame | Series | bool_t: 

11017 return self._logical_func( 

11018 "any", nanops.nanany, axis, bool_only, skipna, **kwargs 

11019 ) 

11020 

11021 def all( 

11022 self, 

11023 axis: Axis = 0, 

11024 bool_only: bool_t = False, 

11025 skipna: bool_t = True, 

11026 **kwargs, 

11027 ) -> Series | bool_t: 

11028 return self._logical_func( 

11029 "all", nanops.nanall, axis, bool_only, skipna, **kwargs 

11030 ) 

11031 

11032 @final 

11033 def _accum_func( 

11034 self, 

11035 name: str, 

11036 func, 

11037 axis: Axis | None = None, 

11038 skipna: bool_t = True, 

11039 *args, 

11040 **kwargs, 

11041 ): 

11042 skipna = nv.validate_cum_func_with_skipna(skipna, args, kwargs, name) 

11043 if axis is None: 

11044 axis = self._stat_axis_number 

11045 else: 

11046 axis = self._get_axis_number(axis) 

11047 

11048 if axis == 1: 

11049 return self.T._accum_func( 

11050 name, func, axis=0, skipna=skipna, *args, **kwargs # noqa: B026 

11051 ).T 

11052 

11053 def block_accum_func(blk_values): 

11054 values = blk_values.T if hasattr(blk_values, "T") else blk_values 

11055 

11056 result: np.ndarray | ExtensionArray 

11057 if isinstance(values, ExtensionArray): 

11058 result = values._accumulate(name, skipna=skipna, **kwargs) 

11059 else: 

11060 result = nanops.na_accum_func(values, func, skipna=skipna) 

11061 

11062 result = result.T if hasattr(result, "T") else result 

11063 return result 

11064 

11065 result = self._mgr.apply(block_accum_func) 

11066 

11067 return self._constructor(result).__finalize__(self, method=name) 

11068 

11069 def cummax(self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs): 

11070 return self._accum_func( 

11071 "cummax", np.maximum.accumulate, axis, skipna, *args, **kwargs 

11072 ) 

11073 

11074 def cummin(self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs): 

11075 return self._accum_func( 

11076 "cummin", np.minimum.accumulate, axis, skipna, *args, **kwargs 

11077 ) 

11078 

11079 def cumsum(self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs): 

11080 return self._accum_func("cumsum", np.cumsum, axis, skipna, *args, **kwargs) 

11081 

11082 def cumprod(self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs): 

11083 return self._accum_func("cumprod", np.cumprod, axis, skipna, *args, **kwargs) 

11084 

11085 @final 

11086 def _stat_function_ddof( 

11087 self, 

11088 name: str, 

11089 func, 

11090 axis: Axis | None = None, 

11091 skipna: bool_t = True, 

11092 ddof: int = 1, 

11093 numeric_only: bool_t = False, 

11094 **kwargs, 

11095 ) -> Series | float: 

11096 nv.validate_stat_ddof_func((), kwargs, fname=name) 

11097 validate_bool_kwarg(skipna, "skipna", none_allowed=False) 

11098 if axis is None: 

11099 axis = self._stat_axis_number 

11100 

11101 return self._reduce( 

11102 func, name, axis=axis, numeric_only=numeric_only, skipna=skipna, ddof=ddof 

11103 ) 

11104 

11105 def sem( 

11106 self, 

11107 axis: Axis | None = None, 

11108 skipna: bool_t = True, 

11109 ddof: int = 1, 

11110 numeric_only: bool_t = False, 

11111 **kwargs, 

11112 ) -> Series | float: 

11113 return self._stat_function_ddof( 

11114 "sem", nanops.nansem, axis, skipna, ddof, numeric_only, **kwargs 

11115 ) 

11116 

11117 def var( 

11118 self, 

11119 axis: Axis | None = None, 

11120 skipna: bool_t = True, 

11121 ddof: int = 1, 

11122 numeric_only: bool_t = False, 

11123 **kwargs, 

11124 ) -> Series | float: 

11125 return self._stat_function_ddof( 

11126 "var", nanops.nanvar, axis, skipna, ddof, numeric_only, **kwargs 

11127 ) 

11128 

11129 def std( 

11130 self, 

11131 axis: Axis | None = None, 

11132 skipna: bool_t = True, 

11133 ddof: int = 1, 

11134 numeric_only: bool_t = False, 

11135 **kwargs, 

11136 ) -> Series | float: 

11137 return self._stat_function_ddof( 

11138 "std", nanops.nanstd, axis, skipna, ddof, numeric_only, **kwargs 

11139 ) 

11140 

11141 @final 

11142 def _stat_function( 

11143 self, 

11144 name: str, 

11145 func, 

11146 axis: Axis | None = 0, 

11147 skipna: bool_t = True, 

11148 numeric_only: bool_t = False, 

11149 **kwargs, 

11150 ): 

11151 if name == "median": 

11152 nv.validate_median((), kwargs) 

11153 else: 

11154 nv.validate_stat_func((), kwargs, fname=name) 

11155 

11156 validate_bool_kwarg(skipna, "skipna", none_allowed=False) 

11157 

11158 return self._reduce( 

11159 func, name=name, axis=axis, skipna=skipna, numeric_only=numeric_only 

11160 ) 

11161 

11162 def min( 

11163 self, 

11164 axis: Axis | None = 0, 

11165 skipna: bool_t = True, 

11166 numeric_only: bool_t = False, 

11167 **kwargs, 

11168 ): 

11169 return self._stat_function( 

11170 "min", 

11171 nanops.nanmin, 

11172 axis, 

11173 skipna, 

11174 numeric_only, 

11175 **kwargs, 

11176 ) 

11177 

11178 def max( 

11179 self, 

11180 axis: Axis | None = 0, 

11181 skipna: bool_t = True, 

11182 numeric_only: bool_t = False, 

11183 **kwargs, 

11184 ): 

11185 return self._stat_function( 

11186 "max", 

11187 nanops.nanmax, 

11188 axis, 

11189 skipna, 

11190 numeric_only, 

11191 **kwargs, 

11192 ) 

11193 

11194 def mean( 

11195 self, 

11196 axis: Axis | None = 0, 

11197 skipna: bool_t = True, 

11198 numeric_only: bool_t = False, 

11199 **kwargs, 

11200 ) -> Series | float: 

11201 return self._stat_function( 

11202 "mean", nanops.nanmean, axis, skipna, numeric_only, **kwargs 

11203 ) 

11204 

11205 def median( 

11206 self, 

11207 axis: Axis | None = 0, 

11208 skipna: bool_t = True, 

11209 numeric_only: bool_t = False, 

11210 **kwargs, 

11211 ) -> Series | float: 

11212 return self._stat_function( 

11213 "median", nanops.nanmedian, axis, skipna, numeric_only, **kwargs 

11214 ) 

11215 

11216 def skew( 

11217 self, 

11218 axis: Axis | None = 0, 

11219 skipna: bool_t = True, 

11220 numeric_only: bool_t = False, 

11221 **kwargs, 

11222 ) -> Series | float: 

11223 return self._stat_function( 

11224 "skew", nanops.nanskew, axis, skipna, numeric_only, **kwargs 

11225 ) 

11226 

11227 def kurt( 

11228 self, 

11229 axis: Axis | None = 0, 

11230 skipna: bool_t = True, 

11231 numeric_only: bool_t = False, 

11232 **kwargs, 

11233 ) -> Series | float: 

11234 return self._stat_function( 

11235 "kurt", nanops.nankurt, axis, skipna, numeric_only, **kwargs 

11236 ) 

11237 

11238 kurtosis = kurt 

11239 

11240 @final 

11241 def _min_count_stat_function( 

11242 self, 

11243 name: str, 

11244 func, 

11245 axis: Axis | None = None, 

11246 skipna: bool_t = True, 

11247 numeric_only: bool_t = False, 

11248 min_count: int = 0, 

11249 **kwargs, 

11250 ): 

11251 if name == "sum": 

11252 nv.validate_sum((), kwargs) 

11253 elif name == "prod": 

11254 nv.validate_prod((), kwargs) 

11255 else: 

11256 nv.validate_stat_func((), kwargs, fname=name) 

11257 

11258 validate_bool_kwarg(skipna, "skipna", none_allowed=False) 

11259 

11260 if axis is None: 

11261 axis = self._stat_axis_number 

11262 

11263 return self._reduce( 

11264 func, 

11265 name=name, 

11266 axis=axis, 

11267 skipna=skipna, 

11268 numeric_only=numeric_only, 

11269 min_count=min_count, 

11270 ) 

11271 

11272 def sum( 

11273 self, 

11274 axis: Axis | None = None, 

11275 skipna: bool_t = True, 

11276 numeric_only: bool_t = False, 

11277 min_count: int = 0, 

11278 **kwargs, 

11279 ): 

11280 return self._min_count_stat_function( 

11281 "sum", nanops.nansum, axis, skipna, numeric_only, min_count, **kwargs 

11282 ) 

11283 

11284 def prod( 

11285 self, 

11286 axis: Axis | None = None, 

11287 skipna: bool_t = True, 

11288 numeric_only: bool_t = False, 

11289 min_count: int = 0, 

11290 **kwargs, 

11291 ): 

11292 return self._min_count_stat_function( 

11293 "prod", 

11294 nanops.nanprod, 

11295 axis, 

11296 skipna, 

11297 numeric_only, 

11298 min_count, 

11299 **kwargs, 

11300 ) 

11301 

11302 product = prod 

11303 

11304 @classmethod 

11305 def _add_numeric_operations(cls) -> None: 

11306 """ 

11307 Add the operations to the cls; evaluate the doc strings again 

11308 """ 

11309 axis_descr, name1, name2 = _doc_params(cls) 

11310 

11311 @doc( 

11312 _bool_doc, 

11313 desc=_any_desc, 

11314 name1=name1, 

11315 name2=name2, 

11316 axis_descr=axis_descr, 

11317 see_also=_any_see_also, 

11318 examples=_any_examples, 

11319 empty_value=False, 

11320 ) 

11321 def any( 

11322 self, 

11323 *, 

11324 axis: Axis = 0, 

11325 bool_only=None, 

11326 skipna: bool_t = True, 

11327 **kwargs, 

11328 ): 

11329 return NDFrame.any( 

11330 self, 

11331 axis=axis, 

11332 bool_only=bool_only, 

11333 skipna=skipna, 

11334 **kwargs, 

11335 ) 

11336 

11337 setattr(cls, "any", any) 

11338 

11339 @doc( 

11340 _bool_doc, 

11341 desc=_all_desc, 

11342 name1=name1, 

11343 name2=name2, 

11344 axis_descr=axis_descr, 

11345 see_also=_all_see_also, 

11346 examples=_all_examples, 

11347 empty_value=True, 

11348 ) 

11349 def all( 

11350 self, 

11351 axis: Axis = 0, 

11352 bool_only=None, 

11353 skipna: bool_t = True, 

11354 **kwargs, 

11355 ): 

11356 return NDFrame.all(self, axis, bool_only, skipna, **kwargs) 

11357 

11358 setattr(cls, "all", all) 

11359 

11360 @doc( 

11361 _num_ddof_doc, 

11362 desc="Return unbiased standard error of the mean over requested " 

11363 "axis.\n\nNormalized by N-1 by default. This can be changed " 

11364 "using the ddof argument", 

11365 name1=name1, 

11366 name2=name2, 

11367 axis_descr=axis_descr, 

11368 notes="", 

11369 examples="", 

11370 ) 

11371 def sem( 

11372 self, 

11373 axis: Axis | None = None, 

11374 skipna: bool_t = True, 

11375 ddof: int = 1, 

11376 numeric_only: bool_t = False, 

11377 **kwargs, 

11378 ): 

11379 return NDFrame.sem(self, axis, skipna, ddof, numeric_only, **kwargs) 

11380 

11381 setattr(cls, "sem", sem) 

11382 

11383 @doc( 

11384 _num_ddof_doc, 

11385 desc="Return unbiased variance over requested axis.\n\nNormalized by " 

11386 "N-1 by default. This can be changed using the ddof argument.", 

11387 name1=name1, 

11388 name2=name2, 

11389 axis_descr=axis_descr, 

11390 notes="", 

11391 examples=_var_examples, 

11392 ) 

11393 def var( 

11394 self, 

11395 axis: Axis | None = None, 

11396 skipna: bool_t = True, 

11397 ddof: int = 1, 

11398 numeric_only: bool_t = False, 

11399 **kwargs, 

11400 ): 

11401 return NDFrame.var(self, axis, skipna, ddof, numeric_only, **kwargs) 

11402 

11403 setattr(cls, "var", var) 

11404 

11405 @doc( 

11406 _num_ddof_doc, 

11407 desc="Return sample standard deviation over requested axis." 

11408 "\n\nNormalized by N-1 by default. This can be changed using the " 

11409 "ddof argument.", 

11410 name1=name1, 

11411 name2=name2, 

11412 axis_descr=axis_descr, 

11413 notes=_std_notes, 

11414 examples=_std_examples, 

11415 ) 

11416 def std( 

11417 self, 

11418 axis: Axis | None = None, 

11419 skipna: bool_t = True, 

11420 ddof: int = 1, 

11421 numeric_only: bool_t = False, 

11422 **kwargs, 

11423 ): 

11424 return NDFrame.std(self, axis, skipna, ddof, numeric_only, **kwargs) 

11425 

11426 setattr(cls, "std", std) 

11427 

11428 @doc( 

11429 _cnum_doc, 

11430 desc="minimum", 

11431 name1=name1, 

11432 name2=name2, 

11433 axis_descr=axis_descr, 

11434 accum_func_name="min", 

11435 examples=_cummin_examples, 

11436 ) 

11437 def cummin( 

11438 self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs 

11439 ): 

11440 return NDFrame.cummin(self, axis, skipna, *args, **kwargs) 

11441 

11442 setattr(cls, "cummin", cummin) 

11443 

11444 @doc( 

11445 _cnum_doc, 

11446 desc="maximum", 

11447 name1=name1, 

11448 name2=name2, 

11449 axis_descr=axis_descr, 

11450 accum_func_name="max", 

11451 examples=_cummax_examples, 

11452 ) 

11453 def cummax( 

11454 self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs 

11455 ): 

11456 return NDFrame.cummax(self, axis, skipna, *args, **kwargs) 

11457 

11458 setattr(cls, "cummax", cummax) 

11459 

11460 @doc( 

11461 _cnum_doc, 

11462 desc="sum", 

11463 name1=name1, 

11464 name2=name2, 

11465 axis_descr=axis_descr, 

11466 accum_func_name="sum", 

11467 examples=_cumsum_examples, 

11468 ) 

11469 def cumsum( 

11470 self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs 

11471 ): 

11472 return NDFrame.cumsum(self, axis, skipna, *args, **kwargs) 

11473 

11474 setattr(cls, "cumsum", cumsum) 

11475 

11476 @doc( 

11477 _cnum_doc, 

11478 desc="product", 

11479 name1=name1, 

11480 name2=name2, 

11481 axis_descr=axis_descr, 

11482 accum_func_name="prod", 

11483 examples=_cumprod_examples, 

11484 ) 

11485 def cumprod( 

11486 self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs 

11487 ): 

11488 return NDFrame.cumprod(self, axis, skipna, *args, **kwargs) 

11489 

11490 setattr(cls, "cumprod", cumprod) 

11491 

11492 # error: Untyped decorator makes function "sum" untyped 

11493 @doc( # type: ignore[misc] 

11494 _num_doc, 

11495 desc="Return the sum of the values over the requested axis.\n\n" 

11496 "This is equivalent to the method ``numpy.sum``.", 

11497 name1=name1, 

11498 name2=name2, 

11499 axis_descr=axis_descr, 

11500 min_count=_min_count_stub, 

11501 see_also=_stat_func_see_also, 

11502 examples=_sum_examples, 

11503 ) 

11504 def sum( 

11505 self, 

11506 axis: Axis | None = None, 

11507 skipna: bool_t = True, 

11508 numeric_only: bool_t = False, 

11509 min_count: int = 0, 

11510 **kwargs, 

11511 ): 

11512 return NDFrame.sum(self, axis, skipna, numeric_only, min_count, **kwargs) 

11513 

11514 setattr(cls, "sum", sum) 

11515 

11516 @doc( 

11517 _num_doc, 

11518 desc="Return the product of the values over the requested axis.", 

11519 name1=name1, 

11520 name2=name2, 

11521 axis_descr=axis_descr, 

11522 min_count=_min_count_stub, 

11523 see_also=_stat_func_see_also, 

11524 examples=_prod_examples, 

11525 ) 

11526 def prod( 

11527 self, 

11528 axis: Axis | None = None, 

11529 skipna: bool_t = True, 

11530 numeric_only: bool_t = False, 

11531 min_count: int = 0, 

11532 **kwargs, 

11533 ): 

11534 return NDFrame.prod(self, axis, skipna, numeric_only, min_count, **kwargs) 

11535 

11536 setattr(cls, "prod", prod) 

11537 cls.product = prod 

11538 

11539 @doc( 

11540 _num_doc, 

11541 desc="Return the mean of the values over the requested axis.", 

11542 name1=name1, 

11543 name2=name2, 

11544 axis_descr=axis_descr, 

11545 min_count="", 

11546 see_also="", 

11547 examples="", 

11548 ) 

11549 def mean( 

11550 self, 

11551 axis: AxisInt | None = 0, 

11552 skipna: bool_t = True, 

11553 numeric_only: bool_t = False, 

11554 **kwargs, 

11555 ): 

11556 return NDFrame.mean(self, axis, skipna, numeric_only, **kwargs) 

11557 

11558 setattr(cls, "mean", mean) 

11559 

11560 @doc( 

11561 _num_doc, 

11562 desc="Return unbiased skew over requested axis.\n\nNormalized by N-1.", 

11563 name1=name1, 

11564 name2=name2, 

11565 axis_descr=axis_descr, 

11566 min_count="", 

11567 see_also="", 

11568 examples="", 

11569 ) 

11570 def skew( 

11571 self, 

11572 axis: AxisInt | None = 0, 

11573 skipna: bool_t = True, 

11574 numeric_only: bool_t = False, 

11575 **kwargs, 

11576 ): 

11577 return NDFrame.skew(self, axis, skipna, numeric_only, **kwargs) 

11578 

11579 setattr(cls, "skew", skew) 

11580 

11581 @doc( 

11582 _num_doc, 

11583 desc="Return unbiased kurtosis over requested axis.\n\n" 

11584 "Kurtosis obtained using Fisher's definition of\n" 

11585 "kurtosis (kurtosis of normal == 0.0). Normalized " 

11586 "by N-1.", 

11587 name1=name1, 

11588 name2=name2, 

11589 axis_descr=axis_descr, 

11590 min_count="", 

11591 see_also="", 

11592 examples="", 

11593 ) 

11594 def kurt( 

11595 self, 

11596 axis: Axis | None = 0, 

11597 skipna: bool_t = True, 

11598 numeric_only: bool_t = False, 

11599 **kwargs, 

11600 ): 

11601 return NDFrame.kurt(self, axis, skipna, numeric_only, **kwargs) 

11602 

11603 setattr(cls, "kurt", kurt) 

11604 cls.kurtosis = kurt 

11605 

11606 @doc( 

11607 _num_doc, 

11608 desc="Return the median of the values over the requested axis.", 

11609 name1=name1, 

11610 name2=name2, 

11611 axis_descr=axis_descr, 

11612 min_count="", 

11613 see_also="", 

11614 examples="", 

11615 ) 

11616 def median( 

11617 self, 

11618 axis: AxisInt | None = 0, 

11619 skipna: bool_t = True, 

11620 numeric_only: bool_t = False, 

11621 **kwargs, 

11622 ): 

11623 return NDFrame.median(self, axis, skipna, numeric_only, **kwargs) 

11624 

11625 setattr(cls, "median", median) 

11626 

11627 @doc( 

11628 _num_doc, 

11629 desc="Return the maximum of the values over the requested axis.\n\n" 

11630 "If you want the *index* of the maximum, use ``idxmax``. This is " 

11631 "the equivalent of the ``numpy.ndarray`` method ``argmax``.", 

11632 name1=name1, 

11633 name2=name2, 

11634 axis_descr=axis_descr, 

11635 min_count="", 

11636 see_also=_stat_func_see_also, 

11637 examples=_max_examples, 

11638 ) 

11639 def max( 

11640 self, 

11641 axis: AxisInt | None = 0, 

11642 skipna: bool_t = True, 

11643 numeric_only: bool_t = False, 

11644 **kwargs, 

11645 ): 

11646 return NDFrame.max(self, axis, skipna, numeric_only, **kwargs) 

11647 

11648 setattr(cls, "max", max) 

11649 

11650 @doc( 

11651 _num_doc, 

11652 desc="Return the minimum of the values over the requested axis.\n\n" 

11653 "If you want the *index* of the minimum, use ``idxmin``. This is " 

11654 "the equivalent of the ``numpy.ndarray`` method ``argmin``.", 

11655 name1=name1, 

11656 name2=name2, 

11657 axis_descr=axis_descr, 

11658 min_count="", 

11659 see_also=_stat_func_see_also, 

11660 examples=_min_examples, 

11661 ) 

11662 def min( 

11663 self, 

11664 axis: AxisInt | None = 0, 

11665 skipna: bool_t = True, 

11666 numeric_only: bool_t = False, 

11667 **kwargs, 

11668 ): 

11669 return NDFrame.min(self, axis, skipna, numeric_only, **kwargs) 

11670 

11671 setattr(cls, "min", min) 

11672 

11673 @final 

11674 @doc(Rolling) 

11675 def rolling( 

11676 self, 

11677 window: int | dt.timedelta | str | BaseOffset | BaseIndexer, 

11678 min_periods: int | None = None, 

11679 center: bool_t = False, 

11680 win_type: str | None = None, 

11681 on: str | None = None, 

11682 axis: Axis = 0, 

11683 closed: str | None = None, 

11684 step: int | None = None, 

11685 method: str = "single", 

11686 ) -> Window | Rolling: 

11687 axis = self._get_axis_number(axis) 

11688 

11689 if win_type is not None: 

11690 return Window( 

11691 self, 

11692 window=window, 

11693 min_periods=min_periods, 

11694 center=center, 

11695 win_type=win_type, 

11696 on=on, 

11697 axis=axis, 

11698 closed=closed, 

11699 step=step, 

11700 method=method, 

11701 ) 

11702 

11703 return Rolling( 

11704 self, 

11705 window=window, 

11706 min_periods=min_periods, 

11707 center=center, 

11708 win_type=win_type, 

11709 on=on, 

11710 axis=axis, 

11711 closed=closed, 

11712 step=step, 

11713 method=method, 

11714 ) 

11715 

11716 @final 

11717 @doc(Expanding) 

11718 def expanding( 

11719 self, 

11720 min_periods: int = 1, 

11721 axis: Axis = 0, 

11722 method: str = "single", 

11723 ) -> Expanding: 

11724 axis = self._get_axis_number(axis) 

11725 return Expanding(self, min_periods=min_periods, axis=axis, method=method) 

11726 

11727 @final 

11728 @doc(ExponentialMovingWindow) 

11729 def ewm( 

11730 self, 

11731 com: float | None = None, 

11732 span: float | None = None, 

11733 halflife: float | TimedeltaConvertibleTypes | None = None, 

11734 alpha: float | None = None, 

11735 min_periods: int | None = 0, 

11736 adjust: bool_t = True, 

11737 ignore_na: bool_t = False, 

11738 axis: Axis = 0, 

11739 times: np.ndarray | DataFrame | Series | None = None, 

11740 method: str = "single", 

11741 ) -> ExponentialMovingWindow: 

11742 axis = self._get_axis_number(axis) 

11743 return ExponentialMovingWindow( 

11744 self, 

11745 com=com, 

11746 span=span, 

11747 halflife=halflife, 

11748 alpha=alpha, 

11749 min_periods=min_periods, 

11750 adjust=adjust, 

11751 ignore_na=ignore_na, 

11752 axis=axis, 

11753 times=times, 

11754 method=method, 

11755 ) 

11756 

11757 # ---------------------------------------------------------------------- 

11758 # Arithmetic Methods 

11759 

11760 @final 

11761 def _inplace_method(self, other, op): 

11762 """ 

11763 Wrap arithmetic method to operate inplace. 

11764 """ 

11765 result = op(self, other) 

11766 

11767 if ( 

11768 self.ndim == 1 

11769 and result._indexed_same(self) 

11770 and is_dtype_equal(result.dtype, self.dtype) 

11771 ): 

11772 # GH#36498 this inplace op can _actually_ be inplace. 

11773 # Item "ArrayManager" of "Union[ArrayManager, SingleArrayManager, 

11774 # BlockManager, SingleBlockManager]" has no attribute "setitem_inplace" 

11775 self._mgr.setitem_inplace( # type: ignore[union-attr] 

11776 slice(None), result._values 

11777 ) 

11778 return self 

11779 

11780 # Delete cacher 

11781 self._reset_cacher() 

11782 

11783 # this makes sure that we are aligned like the input 

11784 # we are updating inplace so we want to ignore is_copy 

11785 self._update_inplace( 

11786 result.reindex_like(self, copy=False), verify_is_copy=False 

11787 ) 

11788 return self 

11789 

11790 def __iadd__(self: NDFrameT, other) -> NDFrameT: 

11791 # error: Unsupported left operand type for + ("Type[NDFrame]") 

11792 return self._inplace_method(other, type(self).__add__) # type: ignore[operator] 

11793 

11794 def __isub__(self: NDFrameT, other) -> NDFrameT: 

11795 # error: Unsupported left operand type for - ("Type[NDFrame]") 

11796 return self._inplace_method(other, type(self).__sub__) # type: ignore[operator] 

11797 

11798 def __imul__(self: NDFrameT, other) -> NDFrameT: 

11799 # error: Unsupported left operand type for * ("Type[NDFrame]") 

11800 return self._inplace_method(other, type(self).__mul__) # type: ignore[operator] 

11801 

11802 def __itruediv__(self: NDFrameT, other) -> NDFrameT: 

11803 # error: Unsupported left operand type for / ("Type[NDFrame]") 

11804 return self._inplace_method( 

11805 other, type(self).__truediv__ # type: ignore[operator] 

11806 ) 

11807 

11808 def __ifloordiv__(self: NDFrameT, other) -> NDFrameT: 

11809 # error: Unsupported left operand type for // ("Type[NDFrame]") 

11810 return self._inplace_method( 

11811 other, type(self).__floordiv__ # type: ignore[operator] 

11812 ) 

11813 

11814 def __imod__(self: NDFrameT, other) -> NDFrameT: 

11815 # error: Unsupported left operand type for % ("Type[NDFrame]") 

11816 return self._inplace_method(other, type(self).__mod__) # type: ignore[operator] 

11817 

11818 def __ipow__(self: NDFrameT, other) -> NDFrameT: 

11819 # error: Unsupported left operand type for ** ("Type[NDFrame]") 

11820 return self._inplace_method(other, type(self).__pow__) # type: ignore[operator] 

11821 

11822 def __iand__(self: NDFrameT, other) -> NDFrameT: 

11823 # error: Unsupported left operand type for & ("Type[NDFrame]") 

11824 return self._inplace_method(other, type(self).__and__) # type: ignore[operator] 

11825 

11826 def __ior__(self: NDFrameT, other) -> NDFrameT: 

11827 # error: Unsupported left operand type for | ("Type[NDFrame]") 

11828 return self._inplace_method(other, type(self).__or__) # type: ignore[operator] 

11829 

11830 def __ixor__(self: NDFrameT, other) -> NDFrameT: 

11831 # error: Unsupported left operand type for ^ ("Type[NDFrame]") 

11832 return self._inplace_method(other, type(self).__xor__) # type: ignore[operator] 

11833 

11834 # ---------------------------------------------------------------------- 

11835 # Misc methods 

11836 

11837 @final 

11838 def _find_valid_index(self, *, how: str) -> Hashable | None: 

11839 """ 

11840 Retrieves the index of the first valid value. 

11841 

11842 Parameters 

11843 ---------- 

11844 how : {'first', 'last'} 

11845 Use this parameter to change between the first or last valid index. 

11846 

11847 Returns 

11848 ------- 

11849 idx_first_valid : type of index 

11850 """ 

11851 idxpos = find_valid_index(self._values, how=how, is_valid=~isna(self._values)) 

11852 if idxpos is None: 

11853 return None 

11854 return self.index[idxpos] 

11855 

11856 @final 

11857 @doc(position="first", klass=_shared_doc_kwargs["klass"]) 

11858 def first_valid_index(self) -> Hashable | None: 

11859 """ 

11860 Return index for {position} non-NA value or None, if no non-NA value is found. 

11861 

11862 Returns 

11863 ------- 

11864 type of index 

11865 

11866 Notes 

11867 ----- 

11868 If all elements are non-NA/null, returns None. 

11869 Also returns None for empty {klass}. 

11870 """ 

11871 return self._find_valid_index(how="first") 

11872 

11873 @final 

11874 @doc(first_valid_index, position="last", klass=_shared_doc_kwargs["klass"]) 

11875 def last_valid_index(self) -> Hashable | None: 

11876 return self._find_valid_index(how="last") 

11877 

11878 

11879def _doc_params(cls): 

11880 """Return a tuple of the doc params.""" 

11881 axis_descr = ( 

11882 f"{{{', '.join([f'{a} ({i})' for i, a in enumerate(cls._AXIS_ORDERS)])}}}" 

11883 ) 

11884 name = cls._constructor_sliced.__name__ if cls._AXIS_LEN > 1 else "scalar" 

11885 name2 = cls.__name__ 

11886 return axis_descr, name, name2 

11887 

11888 

11889_num_doc = """ 

11890{desc} 

11891 

11892Parameters 

11893---------- 

11894axis : {axis_descr} 

11895 Axis for the function to be applied on. 

11896 For `Series` this parameter is unused and defaults to 0. 

11897 

11898 For DataFrames, specifying ``axis=None`` will apply the aggregation 

11899 across both axes. 

11900 

11901 .. versionadded:: 2.0.0 

11902 

11903skipna : bool, default True 

11904 Exclude NA/null values when computing the result. 

11905numeric_only : bool, default False 

11906 Include only float, int, boolean columns. Not implemented for Series. 

11907 

11908{min_count}\ 

11909**kwargs 

11910 Additional keyword arguments to be passed to the function. 

11911 

11912Returns 

11913------- 

11914{name1} or scalar\ 

11915{see_also}\ 

11916{examples} 

11917""" 

11918 

11919_num_ddof_doc = """ 

11920{desc} 

11921 

11922Parameters 

11923---------- 

11924axis : {axis_descr} 

11925 For `Series` this parameter is unused and defaults to 0. 

11926skipna : bool, default True 

11927 Exclude NA/null values. If an entire row/column is NA, the result 

11928 will be NA. 

11929ddof : int, default 1 

11930 Delta Degrees of Freedom. The divisor used in calculations is N - ddof, 

11931 where N represents the number of elements. 

11932numeric_only : bool, default False 

11933 Include only float, int, boolean columns. Not implemented for Series. 

11934 

11935Returns 

11936------- 

11937{name1} or {name2} (if level specified) \ 

11938{notes}\ 

11939{examples} 

11940""" 

11941 

11942_std_notes = """ 

11943 

11944Notes 

11945----- 

11946To have the same behaviour as `numpy.std`, use `ddof=0` (instead of the 

11947default `ddof=1`)""" 

11948 

11949_std_examples = """ 

11950 

11951Examples 

11952-------- 

11953>>> df = pd.DataFrame({'person_id': [0, 1, 2, 3], 

11954... 'age': [21, 25, 62, 43], 

11955... 'height': [1.61, 1.87, 1.49, 2.01]} 

11956... ).set_index('person_id') 

11957>>> df 

11958 age height 

11959person_id 

119600 21 1.61 

119611 25 1.87 

119622 62 1.49 

119633 43 2.01 

11964 

11965The standard deviation of the columns can be found as follows: 

11966 

11967>>> df.std() 

11968age 18.786076 

11969height 0.237417 

11970dtype: float64 

11971 

11972Alternatively, `ddof=0` can be set to normalize by N instead of N-1: 

11973 

11974>>> df.std(ddof=0) 

11975age 16.269219 

11976height 0.205609 

11977dtype: float64""" 

11978 

11979_var_examples = """ 

11980 

11981Examples 

11982-------- 

11983>>> df = pd.DataFrame({'person_id': [0, 1, 2, 3], 

11984... 'age': [21, 25, 62, 43], 

11985... 'height': [1.61, 1.87, 1.49, 2.01]} 

11986... ).set_index('person_id') 

11987>>> df 

11988 age height 

11989person_id 

119900 21 1.61 

119911 25 1.87 

119922 62 1.49 

119933 43 2.01 

11994 

11995>>> df.var() 

11996age 352.916667 

11997height 0.056367 

11998dtype: float64 

11999 

12000Alternatively, ``ddof=0`` can be set to normalize by N instead of N-1: 

12001 

12002>>> df.var(ddof=0) 

12003age 264.687500 

12004height 0.042275 

12005dtype: float64""" 

12006 

12007_bool_doc = """ 

12008{desc} 

12009 

12010Parameters 

12011---------- 

12012axis : {{0 or 'index', 1 or 'columns', None}}, default 0 

12013 Indicate which axis or axes should be reduced. For `Series` this parameter 

12014 is unused and defaults to 0. 

12015 

12016 * 0 / 'index' : reduce the index, return a Series whose index is the 

12017 original column labels. 

12018 * 1 / 'columns' : reduce the columns, return a Series whose index is the 

12019 original index. 

12020 * None : reduce all axes, return a scalar. 

12021 

12022bool_only : bool, default None 

12023 Include only boolean columns. If None, will attempt to use everything, 

12024 then use only boolean data. Not implemented for Series. 

12025skipna : bool, default True 

12026 Exclude NA/null values. If the entire row/column is NA and skipna is 

12027 True, then the result will be {empty_value}, as for an empty row/column. 

12028 If skipna is False, then NA are treated as True, because these are not 

12029 equal to zero. 

12030**kwargs : any, default None 

12031 Additional keywords have no effect but might be accepted for 

12032 compatibility with NumPy. 

12033 

12034Returns 

12035------- 

12036{name1} or {name2} 

12037 If level is specified, then, {name2} is returned; otherwise, {name1} 

12038 is returned. 

12039 

12040{see_also} 

12041{examples}""" 

12042 

12043_all_desc = """\ 

12044Return whether all elements are True, potentially over an axis. 

12045 

12046Returns True unless there at least one element within a series or 

12047along a Dataframe axis that is False or equivalent (e.g. zero or 

12048empty).""" 

12049 

12050_all_examples = """\ 

12051Examples 

12052-------- 

12053**Series** 

12054 

12055>>> pd.Series([True, True]).all() 

12056True 

12057>>> pd.Series([True, False]).all() 

12058False 

12059>>> pd.Series([], dtype="float64").all() 

12060True 

12061>>> pd.Series([np.nan]).all() 

12062True 

12063>>> pd.Series([np.nan]).all(skipna=False) 

12064True 

12065 

12066**DataFrames** 

12067 

12068Create a dataframe from a dictionary. 

12069 

12070>>> df = pd.DataFrame({'col1': [True, True], 'col2': [True, False]}) 

12071>>> df 

12072 col1 col2 

120730 True True 

120741 True False 

12075 

12076Default behaviour checks if values in each column all return True. 

12077 

12078>>> df.all() 

12079col1 True 

12080col2 False 

12081dtype: bool 

12082 

12083Specify ``axis='columns'`` to check if values in each row all return True. 

12084 

12085>>> df.all(axis='columns') 

120860 True 

120871 False 

12088dtype: bool 

12089 

12090Or ``axis=None`` for whether every value is True. 

12091 

12092>>> df.all(axis=None) 

12093False 

12094""" 

12095 

12096_all_see_also = """\ 

12097See Also 

12098-------- 

12099Series.all : Return True if all elements are True. 

12100DataFrame.any : Return True if one (or more) elements are True. 

12101""" 

12102 

12103_cnum_doc = """ 

12104Return cumulative {desc} over a DataFrame or Series axis. 

12105 

12106Returns a DataFrame or Series of the same size containing the cumulative 

12107{desc}. 

12108 

12109Parameters 

12110---------- 

12111axis : {{0 or 'index', 1 or 'columns'}}, default 0 

12112 The index or the name of the axis. 0 is equivalent to None or 'index'. 

12113 For `Series` this parameter is unused and defaults to 0. 

12114skipna : bool, default True 

12115 Exclude NA/null values. If an entire row/column is NA, the result 

12116 will be NA. 

12117*args, **kwargs 

12118 Additional keywords have no effect but might be accepted for 

12119 compatibility with NumPy. 

12120 

12121Returns 

12122------- 

12123{name1} or {name2} 

12124 Return cumulative {desc} of {name1} or {name2}. 

12125 

12126See Also 

12127-------- 

12128core.window.expanding.Expanding.{accum_func_name} : Similar functionality 

12129 but ignores ``NaN`` values. 

12130{name2}.{accum_func_name} : Return the {desc} over 

12131 {name2} axis. 

12132{name2}.cummax : Return cumulative maximum over {name2} axis. 

12133{name2}.cummin : Return cumulative minimum over {name2} axis. 

12134{name2}.cumsum : Return cumulative sum over {name2} axis. 

12135{name2}.cumprod : Return cumulative product over {name2} axis. 

12136 

12137{examples}""" 

12138 

12139_cummin_examples = """\ 

12140Examples 

12141-------- 

12142**Series** 

12143 

12144>>> s = pd.Series([2, np.nan, 5, -1, 0]) 

12145>>> s 

121460 2.0 

121471 NaN 

121482 5.0 

121493 -1.0 

121504 0.0 

12151dtype: float64 

12152 

12153By default, NA values are ignored. 

12154 

12155>>> s.cummin() 

121560 2.0 

121571 NaN 

121582 2.0 

121593 -1.0 

121604 -1.0 

12161dtype: float64 

12162 

12163To include NA values in the operation, use ``skipna=False`` 

12164 

12165>>> s.cummin(skipna=False) 

121660 2.0 

121671 NaN 

121682 NaN 

121693 NaN 

121704 NaN 

12171dtype: float64 

12172 

12173**DataFrame** 

12174 

12175>>> df = pd.DataFrame([[2.0, 1.0], 

12176... [3.0, np.nan], 

12177... [1.0, 0.0]], 

12178... columns=list('AB')) 

12179>>> df 

12180 A B 

121810 2.0 1.0 

121821 3.0 NaN 

121832 1.0 0.0 

12184 

12185By default, iterates over rows and finds the minimum 

12186in each column. This is equivalent to ``axis=None`` or ``axis='index'``. 

12187 

12188>>> df.cummin() 

12189 A B 

121900 2.0 1.0 

121911 2.0 NaN 

121922 1.0 0.0 

12193 

12194To iterate over columns and find the minimum in each row, 

12195use ``axis=1`` 

12196 

12197>>> df.cummin(axis=1) 

12198 A B 

121990 2.0 1.0 

122001 3.0 NaN 

122012 1.0 0.0 

12202""" 

12203 

12204_cumsum_examples = """\ 

12205Examples 

12206-------- 

12207**Series** 

12208 

12209>>> s = pd.Series([2, np.nan, 5, -1, 0]) 

12210>>> s 

122110 2.0 

122121 NaN 

122132 5.0 

122143 -1.0 

122154 0.0 

12216dtype: float64 

12217 

12218By default, NA values are ignored. 

12219 

12220>>> s.cumsum() 

122210 2.0 

122221 NaN 

122232 7.0 

122243 6.0 

122254 6.0 

12226dtype: float64 

12227 

12228To include NA values in the operation, use ``skipna=False`` 

12229 

12230>>> s.cumsum(skipna=False) 

122310 2.0 

122321 NaN 

122332 NaN 

122343 NaN 

122354 NaN 

12236dtype: float64 

12237 

12238**DataFrame** 

12239 

12240>>> df = pd.DataFrame([[2.0, 1.0], 

12241... [3.0, np.nan], 

12242... [1.0, 0.0]], 

12243... columns=list('AB')) 

12244>>> df 

12245 A B 

122460 2.0 1.0 

122471 3.0 NaN 

122482 1.0 0.0 

12249 

12250By default, iterates over rows and finds the sum 

12251in each column. This is equivalent to ``axis=None`` or ``axis='index'``. 

12252 

12253>>> df.cumsum() 

12254 A B 

122550 2.0 1.0 

122561 5.0 NaN 

122572 6.0 1.0 

12258 

12259To iterate over columns and find the sum in each row, 

12260use ``axis=1`` 

12261 

12262>>> df.cumsum(axis=1) 

12263 A B 

122640 2.0 3.0 

122651 3.0 NaN 

122662 1.0 1.0 

12267""" 

12268 

12269_cumprod_examples = """\ 

12270Examples 

12271-------- 

12272**Series** 

12273 

12274>>> s = pd.Series([2, np.nan, 5, -1, 0]) 

12275>>> s 

122760 2.0 

122771 NaN 

122782 5.0 

122793 -1.0 

122804 0.0 

12281dtype: float64 

12282 

12283By default, NA values are ignored. 

12284 

12285>>> s.cumprod() 

122860 2.0 

122871 NaN 

122882 10.0 

122893 -10.0 

122904 -0.0 

12291dtype: float64 

12292 

12293To include NA values in the operation, use ``skipna=False`` 

12294 

12295>>> s.cumprod(skipna=False) 

122960 2.0 

122971 NaN 

122982 NaN 

122993 NaN 

123004 NaN 

12301dtype: float64 

12302 

12303**DataFrame** 

12304 

12305>>> df = pd.DataFrame([[2.0, 1.0], 

12306... [3.0, np.nan], 

12307... [1.0, 0.0]], 

12308... columns=list('AB')) 

12309>>> df 

12310 A B 

123110 2.0 1.0 

123121 3.0 NaN 

123132 1.0 0.0 

12314 

12315By default, iterates over rows and finds the product 

12316in each column. This is equivalent to ``axis=None`` or ``axis='index'``. 

12317 

12318>>> df.cumprod() 

12319 A B 

123200 2.0 1.0 

123211 6.0 NaN 

123222 6.0 0.0 

12323 

12324To iterate over columns and find the product in each row, 

12325use ``axis=1`` 

12326 

12327>>> df.cumprod(axis=1) 

12328 A B 

123290 2.0 2.0 

123301 3.0 NaN 

123312 1.0 0.0 

12332""" 

12333 

12334_cummax_examples = """\ 

12335Examples 

12336-------- 

12337**Series** 

12338 

12339>>> s = pd.Series([2, np.nan, 5, -1, 0]) 

12340>>> s 

123410 2.0 

123421 NaN 

123432 5.0 

123443 -1.0 

123454 0.0 

12346dtype: float64 

12347 

12348By default, NA values are ignored. 

12349 

12350>>> s.cummax() 

123510 2.0 

123521 NaN 

123532 5.0 

123543 5.0 

123554 5.0 

12356dtype: float64 

12357 

12358To include NA values in the operation, use ``skipna=False`` 

12359 

12360>>> s.cummax(skipna=False) 

123610 2.0 

123621 NaN 

123632 NaN 

123643 NaN 

123654 NaN 

12366dtype: float64 

12367 

12368**DataFrame** 

12369 

12370>>> df = pd.DataFrame([[2.0, 1.0], 

12371... [3.0, np.nan], 

12372... [1.0, 0.0]], 

12373... columns=list('AB')) 

12374>>> df 

12375 A B 

123760 2.0 1.0 

123771 3.0 NaN 

123782 1.0 0.0 

12379 

12380By default, iterates over rows and finds the maximum 

12381in each column. This is equivalent to ``axis=None`` or ``axis='index'``. 

12382 

12383>>> df.cummax() 

12384 A B 

123850 2.0 1.0 

123861 3.0 NaN 

123872 3.0 1.0 

12388 

12389To iterate over columns and find the maximum in each row, 

12390use ``axis=1`` 

12391 

12392>>> df.cummax(axis=1) 

12393 A B 

123940 2.0 2.0 

123951 3.0 NaN 

123962 1.0 1.0 

12397""" 

12398 

12399_any_see_also = """\ 

12400See Also 

12401-------- 

12402numpy.any : Numpy version of this method. 

12403Series.any : Return whether any element is True. 

12404Series.all : Return whether all elements are True. 

12405DataFrame.any : Return whether any element is True over requested axis. 

12406DataFrame.all : Return whether all elements are True over requested axis. 

12407""" 

12408 

12409_any_desc = """\ 

12410Return whether any element is True, potentially over an axis. 

12411 

12412Returns False unless there is at least one element within a series or 

12413along a Dataframe axis that is True or equivalent (e.g. non-zero or 

12414non-empty).""" 

12415 

12416_any_examples = """\ 

12417Examples 

12418-------- 

12419**Series** 

12420 

12421For Series input, the output is a scalar indicating whether any element 

12422is True. 

12423 

12424>>> pd.Series([False, False]).any() 

12425False 

12426>>> pd.Series([True, False]).any() 

12427True 

12428>>> pd.Series([], dtype="float64").any() 

12429False 

12430>>> pd.Series([np.nan]).any() 

12431False 

12432>>> pd.Series([np.nan]).any(skipna=False) 

12433True 

12434 

12435**DataFrame** 

12436 

12437Whether each column contains at least one True element (the default). 

12438 

12439>>> df = pd.DataFrame({"A": [1, 2], "B": [0, 2], "C": [0, 0]}) 

12440>>> df 

12441 A B C 

124420 1 0 0 

124431 2 2 0 

12444 

12445>>> df.any() 

12446A True 

12447B True 

12448C False 

12449dtype: bool 

12450 

12451Aggregating over the columns. 

12452 

12453>>> df = pd.DataFrame({"A": [True, False], "B": [1, 2]}) 

12454>>> df 

12455 A B 

124560 True 1 

124571 False 2 

12458 

12459>>> df.any(axis='columns') 

124600 True 

124611 True 

12462dtype: bool 

12463 

12464>>> df = pd.DataFrame({"A": [True, False], "B": [1, 0]}) 

12465>>> df 

12466 A B 

124670 True 1 

124681 False 0 

12469 

12470>>> df.any(axis='columns') 

124710 True 

124721 False 

12473dtype: bool 

12474 

12475Aggregating over the entire DataFrame with ``axis=None``. 

12476 

12477>>> df.any(axis=None) 

12478True 

12479 

12480`any` for an empty DataFrame is an empty Series. 

12481 

12482>>> pd.DataFrame([]).any() 

12483Series([], dtype: bool) 

12484""" 

12485 

12486_shared_docs[ 

12487 "stat_func_example" 

12488] = """ 

12489 

12490Examples 

12491-------- 

12492>>> idx = pd.MultiIndex.from_arrays([ 

12493... ['warm', 'warm', 'cold', 'cold'], 

12494... ['dog', 'falcon', 'fish', 'spider']], 

12495... names=['blooded', 'animal']) 

12496>>> s = pd.Series([4, 2, 0, 8], name='legs', index=idx) 

12497>>> s 

12498blooded animal 

12499warm dog 4 

12500 falcon 2 

12501cold fish 0 

12502 spider 8 

12503Name: legs, dtype: int64 

12504 

12505>>> s.{stat_func}() 

12506{default_output}""" 

12507 

12508_sum_examples = _shared_docs["stat_func_example"].format( 

12509 stat_func="sum", verb="Sum", default_output=14, level_output_0=6, level_output_1=8 

12510) 

12511 

12512_sum_examples += """ 

12513 

12514By default, the sum of an empty or all-NA Series is ``0``. 

12515 

12516>>> pd.Series([], dtype="float64").sum() # min_count=0 is the default 

125170.0 

12518 

12519This can be controlled with the ``min_count`` parameter. For example, if 

12520you'd like the sum of an empty series to be NaN, pass ``min_count=1``. 

12521 

12522>>> pd.Series([], dtype="float64").sum(min_count=1) 

12523nan 

12524 

12525Thanks to the ``skipna`` parameter, ``min_count`` handles all-NA and 

12526empty series identically. 

12527 

12528>>> pd.Series([np.nan]).sum() 

125290.0 

12530 

12531>>> pd.Series([np.nan]).sum(min_count=1) 

12532nan""" 

12533 

12534_max_examples: str = _shared_docs["stat_func_example"].format( 

12535 stat_func="max", verb="Max", default_output=8, level_output_0=4, level_output_1=8 

12536) 

12537 

12538_min_examples: str = _shared_docs["stat_func_example"].format( 

12539 stat_func="min", verb="Min", default_output=0, level_output_0=2, level_output_1=0 

12540) 

12541 

12542_stat_func_see_also = """ 

12543 

12544See Also 

12545-------- 

12546Series.sum : Return the sum. 

12547Series.min : Return the minimum. 

12548Series.max : Return the maximum. 

12549Series.idxmin : Return the index of the minimum. 

12550Series.idxmax : Return the index of the maximum. 

12551DataFrame.sum : Return the sum over the requested axis. 

12552DataFrame.min : Return the minimum over the requested axis. 

12553DataFrame.max : Return the maximum over the requested axis. 

12554DataFrame.idxmin : Return the index of the minimum over the requested axis. 

12555DataFrame.idxmax : Return the index of the maximum over the requested axis.""" 

12556 

12557_prod_examples = """ 

12558 

12559Examples 

12560-------- 

12561By default, the product of an empty or all-NA Series is ``1`` 

12562 

12563>>> pd.Series([], dtype="float64").prod() 

125641.0 

12565 

12566This can be controlled with the ``min_count`` parameter 

12567 

12568>>> pd.Series([], dtype="float64").prod(min_count=1) 

12569nan 

12570 

12571Thanks to the ``skipna`` parameter, ``min_count`` handles all-NA and 

12572empty series identically. 

12573 

12574>>> pd.Series([np.nan]).prod() 

125751.0 

12576 

12577>>> pd.Series([np.nan]).prod(min_count=1) 

12578nan""" 

12579 

12580_min_count_stub = """\ 

12581min_count : int, default 0 

12582 The required number of valid values to perform the operation. If fewer than 

12583 ``min_count`` non-NA values are present the result will be NA. 

12584""" 

12585 

12586 

12587def _align_as_utc( 

12588 left: NDFrameT, right: NDFrameT, join_index: Index | None 

12589) -> tuple[NDFrameT, NDFrameT]: 

12590 """ 

12591 If we are aligning timezone-aware DatetimeIndexes and the timezones 

12592 do not match, convert both to UTC. 

12593 """ 

12594 if is_datetime64tz_dtype(left.index.dtype): 

12595 if left.index.tz != right.index.tz: 

12596 if join_index is not None: 

12597 # GH#33671 ensure we don't change the index on 

12598 # our original Series (NB: by default deep=False) 

12599 left = left.copy() 

12600 right = right.copy() 

12601 left.index = join_index 

12602 right.index = join_index 

12603 

12604 return left, right